PyPI - pixeltable - Versions diffs - 0.3.2__py3-none-any.whl → 0.3.3__py3-none-any.whl - Mend

pixeltable 0.3.2py3-none-any.whl → 0.3.3py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.

This version of pixeltable might be problematic. Click here for more details.

Files changed (147) hide show

pixeltable/__init__.py +64 -11
pixeltable/__version__.py +2 -2
pixeltable/catalog/__init__.py +1 -1
pixeltable/catalog/catalog.py +50 -27
pixeltable/catalog/column.py +27 -11
pixeltable/catalog/dir.py +6 -4
pixeltable/catalog/globals.py +8 -1
pixeltable/catalog/insertable_table.py +22 -12
pixeltable/catalog/named_function.py +10 -6
pixeltable/catalog/path.py +3 -2
pixeltable/catalog/path_dict.py +8 -6
pixeltable/catalog/schema_object.py +2 -1
pixeltable/catalog/table.py +121 -101
pixeltable/catalog/table_version.py +291 -142
pixeltable/catalog/table_version_path.py +8 -5
pixeltable/catalog/view.py +67 -26
pixeltable/dataframe.py +102 -72
pixeltable/env.py +20 -21
pixeltable/exec/__init__.py +2 -2
pixeltable/exec/aggregation_node.py +10 -4
pixeltable/exec/cache_prefetch_node.py +5 -3
pixeltable/exec/component_iteration_node.py +9 -8
pixeltable/exec/data_row_batch.py +21 -10
pixeltable/exec/exec_context.py +10 -3
pixeltable/exec/exec_node.py +23 -12
pixeltable/exec/expr_eval/evaluators.py +13 -7
pixeltable/exec/expr_eval/expr_eval_node.py +24 -15
pixeltable/exec/expr_eval/globals.py +30 -7
pixeltable/exec/expr_eval/row_buffer.py +5 -6
pixeltable/exec/expr_eval/schedulers.py +151 -31
pixeltable/exec/in_memory_data_node.py +8 -7
pixeltable/exec/row_update_node.py +15 -5
pixeltable/exec/sql_node.py +56 -27
pixeltable/exprs/__init__.py +2 -2
pixeltable/exprs/arithmetic_expr.py +57 -26
pixeltable/exprs/array_slice.py +1 -1
pixeltable/exprs/column_property_ref.py +2 -1
pixeltable/exprs/column_ref.py +20 -15
pixeltable/exprs/comparison.py +6 -2
pixeltable/exprs/compound_predicate.py +1 -3
pixeltable/exprs/data_row.py +2 -2
pixeltable/exprs/expr.py +101 -72
pixeltable/exprs/expr_dict.py +2 -1
pixeltable/exprs/expr_set.py +3 -1
pixeltable/exprs/function_call.py +39 -41
pixeltable/exprs/globals.py +1 -0
pixeltable/exprs/in_predicate.py +2 -2
pixeltable/exprs/inline_expr.py +20 -17
pixeltable/exprs/json_mapper.py +4 -2
pixeltable/exprs/json_path.py +12 -18
pixeltable/exprs/literal.py +5 -9
pixeltable/exprs/method_ref.py +1 -0
pixeltable/exprs/object_ref.py +1 -1
pixeltable/exprs/row_builder.py +32 -17
pixeltable/exprs/rowid_ref.py +14 -5
pixeltable/exprs/similarity_expr.py +11 -6
pixeltable/exprs/sql_element_cache.py +1 -1
pixeltable/exprs/type_cast.py +24 -9
pixeltable/ext/__init__.py +1 -0
pixeltable/ext/functions/__init__.py +1 -0
pixeltable/ext/functions/whisperx.py +2 -2
pixeltable/ext/functions/yolox.py +11 -11
pixeltable/func/aggregate_function.py +17 -13
pixeltable/func/callable_function.py +6 -6
pixeltable/func/expr_template_function.py +15 -14
pixeltable/func/function.py +16 -16
pixeltable/func/function_registry.py +11 -8
pixeltable/func/globals.py +4 -2
pixeltable/func/query_template_function.py +12 -13
pixeltable/func/signature.py +18 -9
pixeltable/func/tools.py +10 -17
pixeltable/func/udf.py +106 -11
pixeltable/functions/__init__.py +21 -2
pixeltable/functions/anthropic.py +16 -12
pixeltable/functions/fireworks.py +63 -5
pixeltable/functions/gemini.py +13 -3
pixeltable/functions/globals.py +18 -6
pixeltable/functions/huggingface.py +20 -38
pixeltable/functions/image.py +7 -3
pixeltable/functions/json.py +1 -0
pixeltable/functions/llama_cpp.py +1 -4
pixeltable/functions/mistralai.py +31 -20
pixeltable/functions/ollama.py +4 -18
pixeltable/functions/openai.py +201 -108
pixeltable/functions/replicate.py +11 -10
pixeltable/functions/string.py +70 -7
pixeltable/functions/timestamp.py +21 -8
pixeltable/functions/together.py +66 -52
pixeltable/functions/video.py +1 -0
pixeltable/functions/vision.py +14 -11
pixeltable/functions/whisper.py +2 -1
pixeltable/globals.py +60 -26
pixeltable/index/__init__.py +1 -1
pixeltable/index/btree.py +5 -3
pixeltable/index/embedding_index.py +15 -14
pixeltable/io/__init__.py +1 -1
pixeltable/io/external_store.py +30 -25
pixeltable/io/fiftyone.py +6 -14
pixeltable/io/globals.py +33 -27
pixeltable/io/hf_datasets.py +2 -1
pixeltable/io/label_studio.py +77 -68
pixeltable/io/pandas.py +33 -9
pixeltable/io/parquet.py +9 -12
pixeltable/iterators/__init__.py +1 -0
pixeltable/iterators/audio.py +205 -0
pixeltable/iterators/document.py +19 -8
pixeltable/iterators/image.py +6 -24
pixeltable/iterators/string.py +3 -6
pixeltable/iterators/video.py +1 -7
pixeltable/metadata/__init__.py +7 -1
pixeltable/metadata/converters/convert_10.py +2 -2
pixeltable/metadata/converters/convert_15.py +1 -5
pixeltable/metadata/converters/convert_16.py +2 -4
pixeltable/metadata/converters/convert_17.py +2 -4
pixeltable/metadata/converters/convert_18.py +2 -4
pixeltable/metadata/converters/convert_19.py +2 -5
pixeltable/metadata/converters/convert_20.py +1 -4
pixeltable/metadata/converters/convert_21.py +4 -6
pixeltable/metadata/converters/convert_22.py +1 -0
pixeltable/metadata/converters/convert_23.py +5 -5
pixeltable/metadata/converters/convert_24.py +12 -13
pixeltable/metadata/converters/convert_26.py +23 -0
pixeltable/metadata/converters/util.py +3 -4
pixeltable/metadata/notes.py +1 -0
pixeltable/metadata/schema.py +13 -2
pixeltable/plan.py +173 -98
pixeltable/store.py +42 -26
pixeltable/type_system.py +62 -54
pixeltable/utils/arrow.py +1 -2
pixeltable/utils/coco.py +16 -17
pixeltable/utils/code.py +1 -1
pixeltable/utils/console_output.py +6 -3
pixeltable/utils/description_helper.py +7 -7
pixeltable/utils/documents.py +3 -1
pixeltable/utils/filecache.py +12 -7
pixeltable/utils/http_server.py +9 -8
pixeltable/utils/media_store.py +2 -1
pixeltable/utils/pytorch.py +11 -14
pixeltable/utils/s3.py +1 -0
pixeltable/utils/sql.py +1 -0
pixeltable/utils/transactional_directory.py +2 -2
{pixeltable-0.3.2.dist-info → pixeltable-0.3.3.dist-info}/METADATA +6 -8
pixeltable-0.3.3.dist-info/RECORD +163 -0
pixeltable-0.3.2.dist-info/RECORD +0 -161
{pixeltable-0.3.2.dist-info → pixeltable-0.3.3.dist-info}/LICENSE +0 -0
{pixeltable-0.3.2.dist-info → pixeltable-0.3.3.dist-info}/WHEEL +0 -0
{pixeltable-0.3.2.dist-info → pixeltable-0.3.3.dist-info}/entry_points.txt +0 -0

pixeltable/functions/together.py CHANGED Viewed

@@ -25,12 +25,13 @@ if TYPE_CHECKING:
 @env.register_client('together')
-def _(api_key: str) -> 'together.Together':
+def _(api_key: str) -> 'together.AsyncTogether':
     import together
-    return together.Together(api_key=api_key)
+    return together.AsyncTogether(api_key=api_key)
-def _together_client() -> 'together.Together':
+def _together_client() -> 'together.AsyncTogether':
     return env.Env.get().get_client('together')
@@ -39,6 +40,7 @@ T = TypeVar('T')
 def _retry(fn: Callable[..., T]) -> Callable[..., T]:
     import together
     return tenacity.retry(
         retry=tenacity.retry_if_exception_type(together.error.RateLimitError),
         wait=tenacity.wait_random_exponential(multiplier=1, max=60),
@@ -46,8 +48,8 @@ def _retry(fn: Callable[..., T]) -> Callable[..., T]:
     )(fn)
-@pxt.udf
-def completions(
+@pxt.udf(resource_pool='request-rate:together:chat')
+async def completions(
     prompt: str,
     *,
     model: str,
@@ -68,6 +70,10 @@ def completions(
     Equivalent to the Together AI `completions` API endpoint.
     For additional details, see: [https://docs.together.ai/reference/completions-1](https://docs.together.ai/reference/completions-1)
+    Request throttling:
+    Applies the rate limit set in the config (section `together.rate_limits`, key `chat`). If no rate
+    limit is configured, uses a default of 600 RPM.
     __Requirements:__
     - `pip install together`
@@ -85,29 +91,27 @@ def completions(
         Add a computed column that applies the model `mistralai/Mixtral-8x7B-v0.1` to an existing Pixeltable column `tbl.prompt`
         of the table `tbl`:
-        >>> tbl['response'] = completions(tbl.prompt, model='mistralai/Mixtral-8x7B-v0.1')
+        >>> tbl.add_computed_column(response=completions(tbl.prompt, model='mistralai/Mixtral-8x7B-v0.1'))
     """
-    return (
-        _retry(_together_client().completions.create)(
-            prompt=prompt,
-            model=model,
-            max_tokens=max_tokens,
-            stop=stop,
-            temperature=temperature,
-            top_p=top_p,
-            top_k=top_k,
-            repetition_penalty=repetition_penalty,
-            logprobs=logprobs,
-            echo=echo,
-            n=n,
-            safety_model=safety_model,
-        )
-        .dict()
+    result = await _together_client().completions.create(
+        prompt=prompt,
+        model=model,
+        max_tokens=max_tokens,
+        stop=stop,
+        temperature=temperature,
+        top_p=top_p,
+        top_k=top_k,
+        repetition_penalty=repetition_penalty,
+        logprobs=logprobs,
+        echo=echo,
+        n=n,
+        safety_model=safety_model,
     )
+    return result.dict()
-@pxt.udf
-def chat_completions(
+@pxt.udf(resource_pool='request-rate:together:chat')
+async def chat_completions(
     messages: list[dict[str, str]],
     *,
     model: str,
@@ -131,6 +135,10 @@ def chat_completions(
     Equivalent to the Together AI `chat/completions` API endpoint.
     For additional details, see: [https://docs.together.ai/reference/chat-completions-1](https://docs.together.ai/reference/chat-completions-1)
+    Request throttling:
+    Applies the rate limit set in the config (section `together.rate_limits`, key `chat`). If no rate
+    limit is configured, uses a default of 600 RPM.
     __Requirements:__
     - `pip install together`
@@ -149,28 +157,26 @@ def chat_completions(
         of the table `tbl`:
         >>> messages = [{'role': 'user', 'content': tbl.prompt}]
-        ... tbl['response'] = chat_completions(messages, model='mistralai/Mixtral-8x7B-v0.1')
+        ... tbl.add_computed_column(response=chat_completions(messages, model='mistralai/Mixtral-8x7B-v0.1'))
     """
-    return (
-        _retry(_together_client().chat.completions.create)(
-            messages=messages,
-            model=model,
-            max_tokens=max_tokens,
-            stop=stop,
-            temperature=temperature,
-            top_p=top_p,
-            top_k=top_k,
-            repetition_penalty=repetition_penalty,
-            logprobs=logprobs,
-            echo=echo,
-            n=n,
-            safety_model=safety_model,
-            response_format=response_format,
-            tools=tools,
-            tool_choice=tool_choice,
-        )
-        .dict()
+    result = await _together_client().chat.completions.create(
+        messages=messages,
+        model=model,
+        max_tokens=max_tokens,
+        stop=stop,
+        temperature=temperature,
+        top_p=top_p,
+        top_k=top_k,
+        repetition_penalty=repetition_penalty,
+        logprobs=logprobs,
+        echo=echo,
+        n=n,
+        safety_model=safety_model,
+        response_format=response_format,
+        tools=tools,
+        tool_choice=tool_choice,
     )
+    return result.dict()
 _embedding_dimensions_cache = {
@@ -185,14 +191,18 @@ _embedding_dimensions_cache = {
 }
-@pxt.udf(batch_size=32)
-def embeddings(input: Batch[str], *, model: str) -> Batch[pxt.Array[(None,), pxt.Float]]:
+@pxt.udf(batch_size=32, resource_pool='request-rate:together:embeddings')
+async def embeddings(input: Batch[str], *, model: str) -> Batch[pxt.Array[(None,), pxt.Float]]:
     """
     Query an embedding model for a given string of text.
     Equivalent to the Together AI `embeddings` API endpoint.
     For additional details, see: [https://docs.together.ai/reference/embeddings-2](https://docs.together.ai/reference/embeddings-2)
+    Request throttling:
+    Applies the rate limit set in the config (section `together.rate_limits`, key `embeddings`). If no rate
+    limit is configured, uses a default of 600 RPM.
     __Requirements:__
     - `pip install together`
@@ -208,9 +218,9 @@ def embeddings(input: Batch[str], *, model: str) -> Batch[pxt.Array[(None,), pxt
         Add a computed column that applies the model `togethercomputer/m2-bert-80M-8k-retrieval`
         to an existing Pixeltable column `tbl.text` of the table `tbl`:
-        >>> tbl['response'] = embeddings(tbl.text, model='togethercomputer/m2-bert-80M-8k-retrieval')
+        >>> tbl.add_computed_column(response=embeddings(tbl.text, model='togethercomputer/m2-bert-80M-8k-retrieval'))
     """
-    result = _retry(_together_client().embeddings.create)(input=input, model=model)
+    result = await _together_client().embeddings.create(input=input, model=model)
     return [np.array(data.embedding, dtype=np.float64) for data in result.data]
@@ -223,8 +233,8 @@ def _(model: str) -> pxt.ArrayType:
     return pxt.ArrayType((dimensions,), dtype=pxt.FloatType())
-@pxt.udf
-def image_generations(
+@pxt.udf(resource_pool='request-rate:together:images')
+async def image_generations(
     prompt: str,
     *,
     model: str,
@@ -240,6 +250,10 @@ def image_generations(
     Equivalent to the Together AI `images/generations` API endpoint.
     For additional details, see: [https://docs.together.ai/reference/post_images-generations](https://docs.together.ai/reference/post_images-generations)
+    Request throttling:
+    Applies the rate limit set in the config (section `together.rate_limits`, key `images`). If no rate
+    limit is configured, uses a default of 600 RPM.
     __Requirements:__
     - `pip install together`
@@ -257,9 +271,9 @@ def image_generations(
         Add a computed column that applies the model `stabilityai/stable-diffusion-xl-base-1.0`
         to an existing Pixeltable column `tbl.prompt` of the table `tbl`:
-        >>> tbl['response'] = image_generations(tbl.prompt, model='stabilityai/stable-diffusion-xl-base-1.0')
+        >>> tbl.add_computed_column(response=image_generations(tbl.prompt, model='stabilityai/stable-diffusion-xl-base-1.0'))
     """
-    result = _retry(_together_client().images.generate)(
+    result = await _together_client().images.generate(
         prompt=prompt, model=model, steps=steps, seed=seed, height=height, width=width, negative_prompt=negative_prompt
     )
     if result.data[0].b64_json is not None:

pixeltable/functions/video.py CHANGED Viewed

@@ -52,6 +52,7 @@ class make_video(pxt.Aggregator):
     """
     Aggregator that creates a video from a sequence of images.
     """
     def __init__(self, fps: int = 25):
         """follows https://pyav.org/docs/develop/cookbook/numpy.html#generating-video"""
         self.container: Optional[av.container.OutputContainer] = None

pixeltable/functions/vision.py CHANGED Viewed

@@ -205,7 +205,9 @@ def eval_detections(
         pred_filter = pred_classes_arr == class_idx
         gt_filter = gt_classes_arr == class_idx
         class_pred_scores = pred_scores_arr[pred_filter]
-        tp, fp = __calculate_image_tpfp(pred_bboxes_arr[pred_filter], class_pred_scores, gt_bboxes_arr[gt_filter], min_iou)
+        tp, fp = __calculate_image_tpfp(
+            pred_bboxes_arr[pred_filter], class_pred_scores, gt_bboxes_arr[gt_filter], min_iou
+        )
         ordered_class_pred_scores = -np.sort(-class_pred_scores)
         result.append(
             {
@@ -235,6 +237,7 @@ class mean_ap(pxt.Aggregator):
     - A `dict[int, float]` mapping each label class to an average precision (AP) value for that class.
     """
     def __init__(self):
         self.class_tpfp: dict[int, list[dict]] = defaultdict(list)
@@ -282,22 +285,22 @@ def __create_label_colors(labels: list[Any]) -> dict[Any, str]:
         label_hash = int(hashlib.md5(str(label).encode()).hexdigest(), 16)
         hue = (label_hash % 360) / 360.0
         rgb = colorsys.hsv_to_rgb(hue, 0.7, 0.95)
-        hex_color = '#{:02x}{:02x}{:02x}'.format(int(rgb[0]*255), int(rgb[1]*255), int(rgb[2]*255))
+        hex_color = '#{:02x}{:02x}{:02x}'.format(int(rgb[0] * 255), int(rgb[1] * 255), int(rgb[2] * 255))
         result[label] = hex_color
     return result
 @pxt.udf
 def draw_bounding_boxes(
-        img: PIL.Image.Image,
-        boxes: list[list[int]],
-        labels: Optional[list[Any]] = None,
-        color: Optional[str] = None,
-        box_colors: Optional[list[str]] = None,
-        fill: bool  = False,
-        width: int = 1,
-        font: Optional[str] = None,
-        font_size: Optional[int] = None,
+    img: PIL.Image.Image,
+    boxes: list[list[int]],
+    labels: Optional[list[Any]] = None,
+    color: Optional[str] = None,
+    box_colors: Optional[list[str]] = None,
+    fill: bool = False,
+    width: int = 1,
+    font: Optional[str] = None,
+    font_size: Optional[int] = None,
 ) -> PIL.Image.Image:
     """
     Draws bounding boxes on the given image.

pixeltable/functions/whisper.py CHANGED Viewed

@@ -14,6 +14,7 @@ from pixeltable.env import Env
 if TYPE_CHECKING:
     from whisper import Whisper  # type: ignore[import-untyped]
 @pxt.udf
 def transcribe(
     audio: pxt.Audio,
@@ -52,7 +53,7 @@ def transcribe(
         Add a computed column that applies the model `base.en` to an existing Pixeltable column `tbl.audio`
         of the table `tbl`:
-        >>> tbl['result'] = transcribe(tbl.audio, model='base.en')
+        >>> tbl.add_computed_column(result=transcribe(tbl.audio, model='base.en'))
     """
     Env.get().require_package('whisper')
     Env.get().require_package('torch')

pixeltable/globals.py CHANGED Viewed

@@ -20,15 +20,17 @@ from pixeltable.utils.filecache import FileCache
 _logger = logging.getLogger('pixeltable')
 def init() -> None:
     """Initializes the Pixeltable environment."""
     _ = Catalog.get()
 def _get_or_drop_existing_path(
     path_str: str,
     expected_obj_type: type[catalog.SchemaObject],
     expected_snapshot: bool,
-    if_exists: catalog.IfExistsParam
+    if_exists: catalog.IfExistsParam,
 ) -> Optional[catalog.SchemaObject]:
     """Handle schema object path collision during creation according to the if_exists parameter.
@@ -53,12 +55,15 @@ def _get_or_drop_existing_path(
         raise excs.Error(f'Path `{path_str}` already exists.')
     existing_path = cat.paths[path]
-    existing_path_is_snapshot = 'is_snapshot' in existing_path.get_metadata() and existing_path.get_metadata()['is_snapshot']
+    existing_path_is_snapshot = (
+        'is_snapshot' in existing_path.get_metadata() and existing_path.get_metadata()['is_snapshot']
+    )
     obj_type_str = 'Snapshot' if expected_snapshot else expected_obj_type._display_name().capitalize()
     # Check if the existing path is of expected type.
-    if (not isinstance(existing_path, expected_obj_type)
-        or (expected_snapshot and not existing_path_is_snapshot)):
-            raise excs.Error(f'Path `{path_str}` already exists but is not a {obj_type_str}. Cannot {if_exists.name.lower()} it.')
+    if not isinstance(existing_path, expected_obj_type) or (expected_snapshot and not existing_path_is_snapshot):
+        raise excs.Error(
+            f'Path `{path_str}` already exists but is not a {obj_type_str}. Cannot {if_exists.name.lower()} it.'
+        )
     # if_exists='ignore' return the handle to the existing object.
     assert isinstance(existing_path, expected_obj_type)
@@ -69,12 +74,14 @@ def _get_or_drop_existing_path(
     # unless if_exists='replace_force'.
     has_dependents = existing_path._has_dependents
     if if_exists == catalog.IfExistsParam.REPLACE and has_dependents:
-        raise excs.Error(f"{obj_type_str} `{path_str}` already exists and has dependents. Use `if_exists='replace_force'` to replace it.")
+        raise excs.Error(
+            f"{obj_type_str} `{path_str}` already exists and has dependents. Use `if_exists='replace_force'` to replace it."
+        )
     else:
         assert if_exists == catalog.IfExistsParam.REPLACE_FORCE or not has_dependents
         # Drop the existing path so it can be replaced.
         # Any errors during drop will be raised.
-        _logger.info(f"Dropping {obj_type_str} `{path_str}` to replace it.")
+        _logger.info(f'Dropping {obj_type_str} `{path_str}` to replace it.')
         if isinstance(existing_path, catalog.Dir):
             drop_dir(path_str, force=True)
         else:
@@ -83,6 +90,7 @@ def _get_or_drop_existing_path(
     return None
 def create_table(
     path_str: str,
     schema_or_df: Union[dict[str, Any], DataFrame],
@@ -91,7 +99,7 @@ def create_table(
     num_retained_versions: int = 10,
     comment: str = '',
     media_validation: Literal['on_read', 'on_write'] = 'on_write',
-    if_exists: Literal['error', 'ignore', 'replace', 'replace_force'] = 'error'
+    if_exists: Literal['error', 'ignore', 'replace', 'replace_force'] = 'error',
 ) -> catalog.Table:
     """Create a new base table.
@@ -166,7 +174,9 @@ def create_table(
         df = schema_or_df
         schema = df.schema
     elif isinstance(schema_or_df, DataFrameResultSet):
-        raise excs.Error('`schema_or_df` must be either a schema dictionary or a Pixeltable DataFrame. (Is there an extraneous call to `collect()`?)')
+        raise excs.Error(
+            '`schema_or_df` must be either a schema dictionary or a Pixeltable DataFrame. (Is there an extraneous call to `collect()`?)'
+        )
     else:
         raise excs.Error('`schema_or_df` must be either a schema dictionary or a Pixeltable DataFrame.')
@@ -182,8 +192,15 @@ def create_table(
             raise excs.Error('primary_key must be a single column name or a list of column names')
     tbl = catalog.InsertableTable._create(
-        dir._id, path.name, schema, df, primary_key=primary_key, num_retained_versions=num_retained_versions,
-        comment=comment, media_validation=catalog.MediaValidation.validated(media_validation, 'media_validation'))
+        dir._id,
+        path.name,
+        schema,
+        df,
+        primary_key=primary_key,
+        num_retained_versions=num_retained_versions,
+        comment=comment,
+        media_validation=catalog.MediaValidation.validated(media_validation, 'media_validation'),
+    )
     cat.paths[path] = tbl
     _logger.info(f'Created table `{path_str}`.')
@@ -293,17 +310,27 @@ def create_view(
         # additional columns should not be in the base table
         for col_name in additional_columns.keys():
             if col_name in [c.name for c in tbl_version_path.columns()]:
-                raise excs.Error(f"Column {col_name!r} already exists in the base table {tbl_version_path.get_column(col_name).tbl.name}.")
+                raise excs.Error(
+                    f'Column {col_name!r} already exists in the base table {tbl_version_path.get_column(col_name).tbl.name}.'
+                )
     if iterator is None:
         iterator_class, iterator_args = None, None
     else:
         iterator_class, iterator_args = iterator
     view = catalog.View._create(
-        dir._id, path.name, base=tbl_version_path, additional_columns=additional_columns, predicate=where,
-        is_snapshot=is_snapshot, iterator_cls=iterator_class, iterator_args=iterator_args,
-        num_retained_versions=num_retained_versions, comment=comment,
-        media_validation=catalog.MediaValidation.validated(media_validation, 'media_validation'))
+        dir._id,
+        path.name,
+        base=tbl_version_path,
+        additional_columns=additional_columns,
+        predicate=where,
+        is_snapshot=is_snapshot,
+        iterator_cls=iterator_class,
+        iterator_args=iterator_args,
+        num_retained_versions=num_retained_versions,
+        comment=comment,
+        media_validation=catalog.MediaValidation.validated(media_validation, 'media_validation'),
+    )
     cat.paths[path] = view
     _logger.info(f'Created view `{path_str}`.')
     FileCache.get().emit_eviction_warnings()
@@ -450,8 +477,9 @@ def move(path: str, new_path: str) -> None:
     obj._move(new_p.name, new_dir._id)
-def drop_table(table: Union[str, catalog.Table], force: bool = False,
-    if_not_exists: Literal['error', 'ignore'] = 'error') -> None:
+def drop_table(
+    table: Union[str, catalog.Table], force: bool = False, if_not_exists: Literal['error', 'ignore'] = 'error'
+) -> None:
     """Drop a table, view, or snapshot.
     Args:
@@ -497,7 +525,9 @@ def drop_table(table: Union[str, catalog.Table], force: bool = False,
             else:
                 raise excs.Error(f'Table `{table}` does not exist.')
         if not isinstance(tbl, catalog.Table):
-            raise excs.Error(f'{tbl} needs to be a {catalog.Table._display_name()} but is a {type(tbl)._display_name()}')
+            raise excs.Error(
+                f'{tbl} needs to be a {catalog.Table._display_name()} but is a {type(tbl)._display_name()}'
+            )
     else:
         tbl = table
         tbl_path_obj = catalog.Path(tbl._path)
@@ -543,7 +573,10 @@ def list_tables(dir_path: str = '', recursive: bool = True) -> list[str]:
     Catalog.get().paths.check_is_valid(path, expected=catalog.Dir)
     return [str(p) for p in Catalog.get().paths.get_children(path, child_type=catalog.Table, recursive=recursive)]
-def create_dir(path_str: str, if_exists: Literal['error', 'ignore', 'replace', 'replace_force'] = 'error') -> Optional[catalog.Dir]:
+def create_dir(
+    path_str: str, if_exists: Literal['error', 'ignore', 'replace', 'replace_force'] = 'error'
+) -> Optional[catalog.Dir]:
     """Create a directory.
     Args:
@@ -609,6 +642,7 @@ def create_dir(path_str: str, if_exists: Literal['error', 'ignore', 'replace', '
         Env.get().console_logger.info(f'Created directory `{path_str}`.')
         return dir
 def drop_dir(path_str: str, force: bool = False, if_not_exists: Literal['error', 'ignore'] = 'error') -> None:
     """Remove a directory.
@@ -659,7 +693,8 @@ def drop_dir(path_str: str, force: bool = False, if_not_exists: Literal['error',
     if not isinstance(obj, catalog.Dir):
         raise excs.Error(
-            f'{str(path)} needs to be a {catalog.Dir._display_name()} but is a {type(obj)._display_name()}')
+            f'{str(path)} needs to be a {catalog.Dir._display_name()} but is a {type(obj)._display_name()}'
+        )
     children = cat.paths.get_children(path, child_type=None, recursive=True)
@@ -720,7 +755,9 @@ def list_functions() -> Styler:
     paths = ['.'.join(f.self_path.split('.')[:-1]) for f in functions]
     names = [f.name for f in functions]
     params = [
-        ', '.join([param_name + ': ' + str(param_type) for param_name, param_type in f.signatures[0].parameters.items()])
+        ', '.join(
+            [param_name + ': ' + str(param_type) for param_name, param_type in f.signatures[0].parameters.items()]
+        )
         for f in functions
     ]
     pd_df = pd.DataFrame(
@@ -771,10 +808,7 @@ def tools(*args: Union[func.Function, func.tools.Tool]) -> func.tools.Tools:
         ...     pxt.tool(traffic_quote, name='traffic_conditions'),
         ... )
     """
-    return func.tools.Tools(tools=[
-        arg if isinstance(arg, func.tools.Tool) else tool(arg)
-        for arg in args
-    ])
+    return func.tools.Tools(tools=[arg if isinstance(arg, func.tools.Tool) else tool(arg) for arg in args])
 def tool(fn: func.Function, name: Optional[str] = None, description: Optional[str] = None) -> func.tools.Tool:

pixeltable/index/__init__.py CHANGED Viewed

@@ -1,3 +1,3 @@
 from .base import IndexBase
-from .embedding_index import EmbeddingIndex
 from .btree import BtreeIndex
+from .embedding_index import EmbeddingIndex

pixeltable/index/btree.py CHANGED Viewed

@@ -1,4 +1,4 @@
-from typing import Optional, TYPE_CHECKING
+from typing import TYPE_CHECKING, Optional
 import sqlalchemy as sql
@@ -7,15 +7,18 @@ import sqlalchemy as sql
 import pixeltable.exceptions as excs
 from pixeltable import catalog, exprs
 from pixeltable.func.udf import udf
 from .base import IndexBase
 if TYPE_CHECKING:
     import pixeltable.exprs
 class BtreeIndex(IndexBase):
     """
     Interface to B-tree indices in Postgres.
     """
     MAX_STRING_LEN = 256
     value_expr: 'pixeltable.exprs.Expr'
@@ -25,7 +28,7 @@ class BtreeIndex(IndexBase):
     def str_filter(s: Optional[str]) -> Optional[str]:
         if s is None:
             return None
-        return s[:BtreeIndex.MAX_STRING_LEN]
+        return s[: BtreeIndex.MAX_STRING_LEN]
     def __init__(self, c: 'catalog.Column'):
         if not c.col_type.is_scalar_type() and not c.col_type.is_media_type():
@@ -64,4 +67,3 @@ class BtreeIndex(IndexBase):
     @classmethod
     def from_dict(cls, c: 'catalog.Column', d: dict) -> 'BtreeIndex':
         return cls(c)

pixeltable/index/embedding_index.py CHANGED Viewed

@@ -31,11 +31,7 @@ class EmbeddingIndex(IndexBase):
         IP = 2
         L2 = 3
-    PGVECTOR_OPS = {
-        Metric.COSINE: 'vector_cosine_ops',
-        Metric.IP: 'vector_ip_ops',
-        Metric.L2: 'vector_l2_ops'
-    }
+    PGVECTOR_OPS = {Metric.COSINE: 'vector_cosine_ops', Metric.IP: 'vector_ip_ops', Metric.L2: 'vector_l2_ops'}
     metric: Metric
     value_expr: exprs.FunctionCall
@@ -97,8 +93,7 @@ class EmbeddingIndex(IndexBase):
             # contains no matching signatures.
             assert embed is not None
             raise excs.Error(
-                f'The function `{embed.name}` is not a valid embedding: '
-                'it must take a single string or image parameter'
+                f'The function `{embed.name}` is not a valid embedding: it must take a single string or image parameter'
             )
         # Now validate the return types of the embedding functions.
@@ -116,7 +111,8 @@ class EmbeddingIndex(IndexBase):
         self.metric = self.Metric[metric.upper()]
         self.value_expr = (
-            self.string_embed(exprs.ColumnRef(c)) if c.col_type.is_string_type()
+            self.string_embed(exprs.ColumnRef(c))
+            if c.col_type.is_string_type()
             else self.image_embed(exprs.ColumnRef(c))
         )
         assert isinstance(self.value_expr.col_type, ts.ArrayType)
@@ -138,10 +134,11 @@ class EmbeddingIndex(IndexBase):
     def create_index(self, index_name: str, index_value_col: catalog.Column, conn: sql.engine.Connection) -> None:
         """Create the index on the index value column"""
         idx = sql.Index(
-            index_name, index_value_col.sa_col,
+            index_name,
+            index_value_col.sa_col,
             postgresql_using='hnsw',
             postgresql_with={'m': 16, 'ef_construction': 64},
-            postgresql_ops={index_value_col.sa_col.name: self.PGVECTOR_OPS[self.metric]}
+            postgresql_ops={index_value_col.sa_col.name: self.PGVECTOR_OPS[self.metric]},
         )
         idx.create(bind=conn)
@@ -191,16 +188,20 @@ class EmbeddingIndex(IndexBase):
         return 'embedding'
     @classmethod
-    def _resolve_embedding_fn(cls, embed_fn: func.Function, expected_type: ts.ColumnType.Type) -> Optional[func.Function]:
+    def _resolve_embedding_fn(
+        cls, embed_fn: func.Function, expected_type: ts.ColumnType.Type
+    ) -> Optional[func.Function]:
         """Find an overload resolution for `embed_fn` that matches the given type."""
         assert isinstance(embed_fn, func.Function)
         for resolved_fn in embed_fn._resolved_fns:
             # The embedding function must be a 1-ary function of the correct type. But it's ok if the function signature
             # has more than one parameter, as long as it has at most one *required* parameter.
             sig = resolved_fn.signature
-            if (len(sig.parameters) >= 1
+            if (
+                len(sig.parameters) >= 1
                 and len(sig.required_parameters) <= 1
-                and sig.parameters_by_pos[0].col_type.type_enum == expected_type):
+                and sig.parameters_by_pos[0].col_type.type_enum == expected_type
+            ):
                 return resolved_fn
         return None
@@ -237,7 +238,7 @@ class EmbeddingIndex(IndexBase):
         return {
             'metric': self.metric.name.lower(),
             'string_embed': None if self.string_embed is None else self.string_embed.as_dict(),
-            'image_embed': None if self.image_embed is None else self.image_embed.as_dict()
+            'image_embed': None if self.image_embed is None else self.image_embed.as_dict(),
         }
     @classmethod

pixeltable/io/__init__.py CHANGED Viewed

@@ -2,7 +2,7 @@ from .external_store import ExternalStore, SyncStatus
 from .globals import create_label_studio_project, export_images_as_fo_dataset, import_json, import_rows
 from .hf_datasets import import_huggingface_dataset
 from .pandas import import_csv, import_excel, import_pandas
-from .parquet import import_parquet, export_parquet
+from .parquet import export_parquet, import_parquet
 __default_dir = set(symbol for symbol in dir() if not symbol.startswith('_'))
 __removed_symbols = {'globals', 'hf_datasets', 'pandas', 'parquet'}

pixeltable 0.3.2__py3-none-any.whl → 0.3.3__py3-none-any.whl

Potentially problematic release.

pixeltable 0.3.2py3-none-any.whl → 0.3.3py3-none-any.whl