PyPI - pixeltable - Versions diffs - 0.2.18__py3-none-any.whl → 0.2.20__py3-none-any.whl - Mend

pixeltable 0.2.18py3-none-any.whl → 0.2.20py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.

This version of pixeltable might be problematic. Click here for more details.

Files changed (42) hide show

pixeltable/__init__.py +1 -1
pixeltable/__version__.py +2 -2
pixeltable/catalog/insertable_table.py +9 -7
pixeltable/catalog/table.py +18 -5
pixeltable/catalog/table_version.py +1 -1
pixeltable/catalog/view.py +1 -1
pixeltable/dataframe.py +1 -1
pixeltable/env.py +140 -40
pixeltable/exceptions.py +12 -5
pixeltable/exec/component_iteration_node.py +63 -42
pixeltable/exprs/__init__.py +1 -2
pixeltable/exprs/expr.py +5 -6
pixeltable/exprs/function_call.py +8 -10
pixeltable/exprs/inline_expr.py +200 -0
pixeltable/exprs/json_path.py +3 -6
pixeltable/ext/functions/whisperx.py +2 -0
pixeltable/ext/functions/yolox.py +5 -3
pixeltable/functions/huggingface.py +89 -12
pixeltable/functions/image.py +3 -3
pixeltable/functions/together.py +37 -16
pixeltable/functions/vision.py +43 -21
pixeltable/functions/whisper.py +3 -0
pixeltable/globals.py +7 -1
pixeltable/io/globals.py +1 -1
pixeltable/io/hf_datasets.py +3 -3
pixeltable/iterators/document.py +1 -1
pixeltable/metadata/__init__.py +1 -1
pixeltable/metadata/converters/convert_18.py +1 -1
pixeltable/metadata/converters/convert_20.py +56 -0
pixeltable/metadata/converters/util.py +29 -4
pixeltable/metadata/notes.py +1 -0
pixeltable/tool/create_test_db_dump.py +15 -4
pixeltable/type_system.py +3 -1
pixeltable/utils/filecache.py +126 -79
pixeltable-0.2.20.dist-info/LICENSE +201 -0
{pixeltable-0.2.18.dist-info → pixeltable-0.2.20.dist-info}/METADATA +16 -6
{pixeltable-0.2.18.dist-info → pixeltable-0.2.20.dist-info}/RECORD +39 -39
pixeltable/exprs/inline_array.py +0 -117
pixeltable/exprs/inline_dict.py +0 -104
pixeltable-0.2.18.dist-info/LICENSE +0 -18
{pixeltable-0.2.18.dist-info → pixeltable-0.2.20.dist-info}/WHEEL +0 -0
{pixeltable-0.2.18.dist-info → pixeltable-0.2.20.dist-info}/entry_points.txt +0 -0

pixeltable/functions/vision.py CHANGED Viewed

@@ -19,12 +19,9 @@ from typing import Any, Optional, Union
 import numpy as np
 import PIL.Image
-import pixeltable.func as func
-import pixeltable.type_system as ts
+import pixeltable as pxt
 from pixeltable.utils.code import local_public_names
-# TODO: figure out a better submodule structure
 # the following function has been adapted from MMEval
 # (sources at https://github.com/open-mmlab/mmeval)
@@ -161,25 +158,41 @@ def __calculate_image_tpfp(
     return tp, fp
-@func.udf(
-    return_type=ts.JsonType(nullable=False),
-    param_types=[
-        ts.JsonType(nullable=False),
-        ts.JsonType(nullable=False),
-        ts.JsonType(nullable=False),
-        ts.JsonType(nullable=False),
-        ts.JsonType(nullable=False),
-    ],
-)
+@pxt.udf
 def eval_detections(
     pred_bboxes: list[list[int]],
     pred_labels: list[int],
     pred_scores: list[float],
     gt_bboxes: list[list[int]],
     gt_labels: list[int],
+    min_iou: float = 0.5,
 ) -> list[dict]:
     """
     Evaluates the performance of a set of predicted bounding boxes against a set of ground truth bounding boxes.
+    Args:
+        pred_bboxes: List of predicted bounding boxes, each represented as [xmin, ymin, xmax, ymax].
+        pred_labels: List of predicted labels.
+        pred_scores: List of predicted scores.
+        gt_bboxes: List of ground truth bounding boxes, each represented as [xmin, ymin, xmax, ymax].
+        gt_labels: List of ground truth labels.
+        min_iou: Minimum intersection-over-union (IoU) threshold for a predicted bounding box to be
+            considered a true positive.
+    Returns:
+        A list of dictionaries, one per label class, with the following structure:
+        ```python
+        {
+            'min_iou': float,  # The value of `min_iou` used for the detections
+            'class': int,  # The label class
+            'tp': list[int],  # List of 1's and 0's indicating true positives for each
+                              # predicted bounding box of this class
+            'fp': list[int],  # List of 1's and 0's indicating false positives for each
+                              # predicted bounding box of this class; `fp[n] == 1 - tp[n]`
+            'scores': list[float],  # List of predicted scores for each bounding box of this class
+            'num_gts': int,  # Number of ground truth bounding boxes of this class
+        }
+        ```
     """
     class_idxs = list(set(pred_labels + gt_labels))
     result: list[dict] = []
@@ -192,11 +205,11 @@ def eval_detections(
         pred_filter = pred_classes_arr == class_idx
         gt_filter = gt_classes_arr == class_idx
         class_pred_scores = pred_scores_arr[pred_filter]
-        tp, fp = __calculate_image_tpfp(pred_bboxes_arr[pred_filter], class_pred_scores, gt_bboxes_arr[gt_filter], 0.5)
+        tp, fp = __calculate_image_tpfp(pred_bboxes_arr[pred_filter], class_pred_scores, gt_bboxes_arr[gt_filter], min_iou)
         ordered_class_pred_scores = -np.sort(-class_pred_scores)
         result.append(
             {
-                'min_iou': 0.5,
+                'min_iou': min_iou,
                 'class': class_idx,
                 'tp': tp.tolist(),
                 'fp': fp.tolist(),
@@ -207,11 +220,20 @@ def eval_detections(
     return result
-@func.uda(update_types=[ts.JsonType()], value_type=ts.JsonType(), allows_std_agg=True, allows_window=False)
-class mean_ap(func.Aggregator):
+@pxt.uda(update_types=[pxt.JsonType()], value_type=pxt.JsonType(), allows_std_agg=True, allows_window=False)
+class mean_ap(pxt.Aggregator):
     """
     Calculates the mean average precision (mAP) over
     [`eval_detections()`][pixeltable.functions.vision.eval_detections] results.
+    __Parameters:__
+    - `eval_dicts` (list[dict]): List of dictionaries as returned by
+        [`eval_detections()`][pixeltable.functions.vision.eval_detections].
+    __Returns:__
+    - A `dict[int, float]` mapping each label class to an average precision (AP) value for that class.
     """
     def __init__(self):
         self.class_tpfp: dict[int, list[dict]] = defaultdict(list)
@@ -246,7 +268,7 @@ class mean_ap(func.Aggregator):
         return result
-def _create_label_colors(labels: list[Any]) -> dict[Any, str]:
+def __create_label_colors(labels: list[Any]) -> dict[Any, str]:
     """
     Create random colors for labels such that a particular label always gets the same color.
@@ -265,7 +287,7 @@ def _create_label_colors(labels: list[Any]) -> dict[Any, str]:
     return result
-@func.udf
+@pxt.udf
 def draw_bounding_boxes(
         img: PIL.Image.Image,
         boxes: list[list[int]],
@@ -324,7 +346,7 @@ def draw_bounding_boxes(
         if color is not None:
             box_colors = [color] * num_boxes
         else:
-            label_colors = _create_label_colors(labels)
+            label_colors = __create_label_colors(labels)
             box_colors = [label_colors[label] for label in labels]
     from PIL import ImageColor, ImageDraw, ImageFont

pixeltable/functions/whisper.py CHANGED Viewed

@@ -9,6 +9,7 @@ first `pip install openai-whisper`.
 from typing import TYPE_CHECKING, Optional
 import pixeltable as pxt
+from pixeltable.env import Env
 if TYPE_CHECKING:
     from whisper import Whisper  # type: ignore[import-untyped]
@@ -71,6 +72,8 @@ def transcribe(
         >>> tbl['result'] = transcribe(tbl.audio, model='base.en')
     """
+    Env.get().require_package('whisper')
+    Env.get().require_package('torch')
     import torch
     if decode_options is None:

pixeltable/globals.py CHANGED Viewed

@@ -1,6 +1,6 @@
 import dataclasses
 import logging
-from typing import Any, Optional, Union
+from typing import Any, Iterable, Optional, Union
 from uuid import UUID
 import pandas as pd
@@ -16,6 +16,7 @@ from pixeltable.dataframe import DataFrameResultSet
 from pixeltable.env import Env
 from pixeltable.iterators import ComponentIterator
 from pixeltable.metadata import schema
+from pixeltable.utils.filecache import FileCache
 _logger = logging.getLogger('pixeltable')
@@ -193,6 +194,7 @@ def create_view(
     )
     Catalog.get().paths[path] = view
     _logger.info(f'Created view `{path_str}`.')
+    FileCache.get().emit_eviction_warnings()
     return view
@@ -487,3 +489,7 @@ def configure_logging(
         remove: comma-separated list of module names
     """
     return Env.get().configure_logging(to_stdout=to_stdout, level=level, add=add, remove=remove)
+def array(elements: Iterable) -> exprs.Expr:
+    return exprs.InlineArray(elements)

pixeltable/io/globals.py CHANGED Viewed

@@ -43,7 +43,7 @@ def create_label_studio_project(
     The API key and URL for a valid Label Studio server must be specified in Pixeltable config. Either:
     * Set the `LABEL_STUDIO_API_KEY` and `LABEL_STUDIO_URL` environment variables; or
-    * Specify `api_key` and `url` fields in the `label-studio` section of `$PIXELTABLE_HOME/config.yaml`.
+    * Specify `api_key` and `url` fields in the `label-studio` section of `$PIXELTABLE_HOME/config.toml`.
     __Requirements:__

pixeltable/io/hf_datasets.py CHANGED Viewed

@@ -34,9 +34,7 @@ _hf_to_pxt: dict[str, ts.ColumnType] = {
 }
-def _to_pixeltable_type(
-    feature_type: Union[datasets.ClassLabel, datasets.Value, datasets.Sequence],
-) -> Optional[ts.ColumnType]:
+def _to_pixeltable_type(feature_type: Any) -> Optional[ts.ColumnType]:
     """Convert a huggingface feature type to a pixeltable ColumnType if one is defined."""
     import datasets
@@ -51,6 +49,8 @@ def _to_pixeltable_type(
         dtype = _to_pixeltable_type(feature_type.feature)
         length = feature_type.length if feature_type.length != -1 else None
         return ts.ArrayType(shape=(length,), dtype=dtype)
+    elif isinstance(feature_type, datasets.Image):
+        return ts.ImageType(nullable=True)
     else:
         return None

pixeltable/iterators/document.py CHANGED Viewed

@@ -166,7 +166,7 @@ class DocumentSplitter(ComponentIterator):
         return {
             'document': DocumentType(nullable=False),
             'separators': StringType(nullable=False),
-            'metadata': StringType(nullable=True),
+            'metadata': StringType(nullable=False),
             'limit': IntType(nullable=True),
             'overlap': IntType(nullable=True),
             'skip_tags': StringType(nullable=True),

pixeltable/metadata/__init__.py CHANGED Viewed

@@ -10,7 +10,7 @@ import sqlalchemy.orm as orm
 from .schema import SystemInfo, SystemInfoMd
 # current version of the metadata; this is incremented whenever the metadata schema changes
-VERSION = 20
+VERSION = 21
 def create_system_info(engine: sql.engine.Engine) -> None:

pixeltable/metadata/converters/convert_18.py CHANGED Viewed

@@ -13,7 +13,7 @@ def _(engine: sql.engine.Engine) -> None:
     )
-def __substitute_md(k: Any, v: Any) -> Optional[tuple[Any, Any]]:
+def __substitute_md(k: Optional[str], v: Any) -> Optional[tuple[Optional[str], Any]]:
     # Migrate a few changed function names
     if k == 'path' and v == 'pixeltable.functions.string.str_format':
         return 'path', 'pixeltable.functions.string.format'

pixeltable/metadata/converters/convert_20.py ADDED Viewed

@@ -0,0 +1,56 @@
+from typing import Any, Optional
+import sqlalchemy as sql
+from pixeltable.metadata import register_converter
+from pixeltable.metadata.converters.util import convert_table_md
+@register_converter(version=20)
+def _(engine: sql.engine.Engine) -> None:
+    convert_table_md(
+        engine,
+        substitution_fn=__substitute_md
+    )
+def __substitute_md(k: Optional[str], v: Any) -> Optional[tuple[Optional[str], Any]]:
+    if isinstance(v, dict) and '_classname' in v:
+        # The way InlineArray is represented changed in v20. Previously, literal values were stored
+        # directly in the Inline expr; now we store them in Literal sub-exprs. This converter
+        # constructs new Literal exprs for the literal values in InlineArray, interleaving them
+        # with non-literal exprs into the correct sequence.
+        if v['_classname'] == 'InlineArray':
+            components = v.get('components')  # Might be None, but that's ok
+            updated_components = []
+            for idx, val in v['elements']:
+                # idx >= 0, then this is a non-literal sub-expr. Otherwise, idx could be either
+                # None or -1, for legacy reasons (which are now obviated).
+                if idx is not None and idx >= 0:
+                    updated_components.append(components[idx])
+                else:
+                    updated_components.append({'val': val, '_classname': 'Literal'})
+            # InlineList was split out from InlineArray in v20. If is_json=True, then this is
+            # actually an InlineList. If is_json=False, then we assume it's an InlineArray for now,
+            # but it might actually be transformed into an InlineList when it is instantiated
+            # (unfortunately, there is no way to disambiguate at this stage; see comments in
+            # InlineArray._from_dict() for more details).
+            updated_v = {'_classname': 'InlineList' if v.get('is_json') else 'InlineArray'}
+            if len(updated_components) > 0:
+                updated_v['components'] = updated_components
+            return k, updated_v
+        if v['_classname'] == 'InlineDict':
+            components = v.get('components')
+            keys = []
+            updated_components = []
+            for key, idx, val in v['dict_items']:
+                keys.append(key)
+                if idx is not None and idx >= 0:
+                    updated_components.append(components[idx])
+                else:
+                    updated_components.append({'val': val, '_classname': 'Literal'})
+            updated_v = {'keys': keys, '_classname': 'InlineDict'}
+            if len(updated_components) > 0:
+                updated_v['components'] = updated_components
+            return k, updated_v
+    return None

pixeltable/metadata/converters/util.py CHANGED Viewed

@@ -14,8 +14,22 @@ def convert_table_md(
     table_md_updater: Optional[Callable[[dict], None]] = None,
     column_md_updater: Optional[Callable[[dict], None]] = None,
     external_store_md_updater: Optional[Callable[[dict], None]] = None,
-    substitution_fn: Optional[Callable[[Any, Any], Optional[tuple[Any, Any]]]] = None
+    substitution_fn: Optional[Callable[[Optional[str], Any], Optional[tuple[Optional[str], Any]]]] = None
 ) -> None:
+    """
+    Converts table metadata based on the specified conversion functions.
+    Args:
+        engine: The SQLAlchemy engine.
+        table_md_updater: A function that updates the table metadata in place.
+        column_md_updater: A function that updates the column metadata in place.
+        external_store_md_updater: A function that updates the external store metadata in place.
+        substitution_fn: A function that substitutes metadata values. If specified, all metadata will be traversed
+            recursively, and `substitution_fn` will be called once for each metadata entry. If the entry appears in
+            a dict as a `(k, v)` pair, then `substitution_fn(k, v)` will be called. If the entry appears in a list,
+            then `substitution_fn(None, v)` will be called. If `substitution_fn` returns a tuple `(k', v')`, then
+            the original entry will be replaced, and the traversal will continue with `v'`.
+    """
     with engine.begin() as conn:
         for row in conn.execute(sql.select(Table)):
             id = row[0]
@@ -49,18 +63,29 @@ def __update_external_store_md(table_md: dict, external_store_md_updater: Callab
         external_store_md_updater(store_md)
-def __substitute_md_rec(md: Any, substitution_fn: Callable[[Any, Any], Optional[tuple[Any, Any]]]) -> Any:
+def __substitute_md_rec(
+    md: Any,
+    substitution_fn: Callable[[Optional[str], Any], Optional[tuple[Optional[str], Any]]]
+) -> Any:
     if isinstance(md, dict):
         updated_md = {}
         for k, v in md.items():
             substitute = substitution_fn(k, v)
             if substitute is not None:
                 updated_k, updated_v = substitute
-                updated_md[updated_k] = updated_v
+                updated_md[updated_k] = __substitute_md_rec(updated_v, substitution_fn)
             else:
                 updated_md[k] = __substitute_md_rec(v, substitution_fn)
         return updated_md
     elif isinstance(md, list):
-        return [__substitute_md_rec(v, substitution_fn) for v in md]
+        updated_md = []
+        for v in md:
+            substitute = substitution_fn(None, v)
+            if substitute is not None:
+                _, updated_v = substitute
+                updated_md.append(__substitute_md_rec(updated_v, substitution_fn))
+            else:
+                updated_md.append(__substitute_md_rec(v, substitution_fn))
+        return updated_md
     else:
         return md

pixeltable/metadata/notes.py CHANGED Viewed

@@ -2,6 +2,7 @@
 # rather than as a comment, so that the existence of a description can be enforced by
 # the unit tests when new versions are added.
 VERSION_NOTES = {
+    21: 'Separate InlineArray and InlineList',
     20: 'Store DB timestamps in UTC',
     19: 'UDF renames; ImageMemberAccess removal',
     18: 'Restructured index metadata',

pixeltable/tool/create_test_db_dump.py CHANGED Viewed

@@ -4,6 +4,7 @@ import logging
 import os
 import pathlib
 import subprocess
+import sys
 from typing import Any
 from zoneinfo import ZoneInfo
@@ -24,12 +25,18 @@ _logger = logging.getLogger('pixeltable')
 class Dumper:
     def __init__(self, output_dir='target', db_name='pxtdump') -> None:
+        if sys.version_info >= (3, 10):
+            raise RuntimeError(
+                'This script must be run on Python 3.9. '
+                'DB dumps are incompatible across versions due to issues with pickling anonymous UDFs.'
+            )
         self.output_dir = pathlib.Path(output_dir)
         shared_home = pathlib.Path(os.environ.get('PIXELTABLE_HOME', '~/.pixeltable')).expanduser()
         mock_home_dir = self.output_dir / '.pixeltable'
         mock_home_dir.mkdir(parents=True, exist_ok=True)
         os.environ['PIXELTABLE_HOME'] = str(mock_home_dir)
-        os.environ['PIXELTABLE_CONFIG'] = str(shared_home / 'config.yaml')
+        os.environ['PIXELTABLE_CONFIG'] = str(shared_home / 'config.toml')
         os.environ['PIXELTABLE_DB'] = db_name
         os.environ['PIXELTABLE_PGDATA'] = str(shared_home / 'pgdata')
@@ -226,9 +233,13 @@ class Dumper:
         add_column('isin_2', t.c2.isin([1, 2, 3, 4, 5]))
         add_column('isin_3', t.c2.isin(t.c6.f5))
-        # inline_array and inline_dict
-        add_column('inline_array_1', [[1, 2, 3], [4, 5, 6]])
-        add_column('inline_array_2', [['a', 'b', 'c'], ['d', 'e', 'f']])
+        # inline_array, inline_list, inline_dict
+        add_column('inline_array_1', pxt.array([[1, 2, 3], [4, 5, 6]]))
+        add_column('inline_array_2', pxt.array([['a', 'b', 'c'], ['d', 'e', 'f']]))
+        add_column('inline_array_exprs', pxt.array([[t.c2, t.c2 + 1], [t.c2 + 2, t.c2]]))
+        add_column('inline_array_mixed', pxt.array([[1, t.c2], [3, t.c2]]))
+        add_column('inline_list_1', [[1, 2, 3], [4, 5, 6]])
+        add_column('inline_list_2', [['a', 'b', 'c'], ['d', 'e', 'f']])
         add_column('inline_list_exprs', [t.c1, [t.c1n, t.c2]])
         add_column('inline_list_mixed', [1, 'a', t.c1, [1, 'a', t.c1n], 1, 'a'])
         add_column('inline_dict', {'int': 22, 'dict': {'key': 'val'}, 'expr': t.c1})

pixeltable/type_system.py CHANGED Viewed

@@ -204,6 +204,8 @@ class ColumnType:
     @classmethod
     def infer_literal_type(cls, val: Any, nullable: bool = False) -> Optional[ColumnType]:
+        if val is None:
+            return InvalidType(nullable=True)
         if isinstance(val, str):
             return StringType(nullable=nullable)
         if isinstance(val, bool):
@@ -395,7 +397,7 @@ class InvalidType(ColumnType):
         assert False
     def print_value(self, val: Any) -> str:
-        assert False
+        return str(val)
     def _validate_literal(self, val: Any) -> None:
         assert False

pixeltable 0.2.18__py3-none-any.whl → 0.2.20__py3-none-any.whl

Potentially problematic release.

pixeltable 0.2.18py3-none-any.whl → 0.2.20py3-none-any.whl