PyPI - pixeltable - Versions diffs - 0.4.17__py3-none-any.whl → 0.4.18__py3-none-any.whl - Mend

pixeltable 0.4.17py3-none-any.whl → 0.4.18py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.

This version of pixeltable might be problematic. Click here for more details.

Files changed (20) hide show

pixeltable/catalog/catalog.py +26 -19
pixeltable/catalog/table.py +33 -14
pixeltable/catalog/table_version.py +16 -12
pixeltable/dataframe.py +1 -1
pixeltable/env.py +4 -0
pixeltable/exec/exec_context.py +15 -2
pixeltable/exec/sql_node.py +3 -2
pixeltable/functions/huggingface.py +1031 -2
pixeltable/functions/video.py +34 -7
pixeltable/globals.py +23 -4
pixeltable/iterators/document.py +88 -57
pixeltable/iterators/video.py +58 -24
pixeltable/plan.py +2 -6
pixeltable/store.py +24 -3
pixeltable/utils/av.py +66 -38
{pixeltable-0.4.17.dist-info → pixeltable-0.4.18.dist-info}/METADATA +4 -4
{pixeltable-0.4.17.dist-info → pixeltable-0.4.18.dist-info}/RECORD +20 -20
{pixeltable-0.4.17.dist-info → pixeltable-0.4.18.dist-info}/WHEEL +0 -0
{pixeltable-0.4.17.dist-info → pixeltable-0.4.18.dist-info}/entry_points.txt +0 -0
{pixeltable-0.4.17.dist-info → pixeltable-0.4.18.dist-info}/licenses/LICENSE +0 -0

pixeltable/catalog/catalog.py CHANGED Viewed

@@ -280,7 +280,7 @@ class Catalog:
         - this needs to be done in a retry loop, because Postgres can decide to abort the transaction
           (SerializationFailure, LockNotAvailable)
         - for that reason, we do all lock acquisition prior to doing any real work (eg, compute column values),
-          to minimize the probability of loosing that work due to a forced abort
+          to minimize the probability of losing that work due to a forced abort
         If convert_db_excs == True, converts DBAPIErrors into excs.Errors.
         """
@@ -433,7 +433,7 @@ class Catalog:
         The function should not raise exceptions; if it does, they are logged and ignored.
         """
-        assert Env.get().in_xact
+        assert self.in_write_xact
         self._undo_actions.append(func)
         return func
@@ -792,19 +792,25 @@ class Catalog:
         return result
     @retry_loop(for_write=True)
-    def move(self, path: Path, new_path: Path) -> None:
-        self._move(path, new_path)
+    def move(self, path: Path, new_path: Path, if_exists: IfExistsParam, if_not_exists: IfNotExistsParam) -> None:
+        self._move(path, new_path, if_exists, if_not_exists)
-    def _move(self, path: Path, new_path: Path) -> None:
-        _, dest_dir, src_obj = self._prepare_dir_op(
+    def _move(self, path: Path, new_path: Path, if_exists: IfExistsParam, if_not_exists: IfNotExistsParam) -> None:
+        dest_obj, dest_dir, src_obj = self._prepare_dir_op(
             add_dir_path=new_path.parent,
             add_name=new_path.name,
             drop_dir_path=path.parent,
             drop_name=path.name,
-            raise_if_exists=True,
-            raise_if_not_exists=True,
+            raise_if_exists=(if_exists == IfExistsParam.ERROR),
+            raise_if_not_exists=(if_not_exists == IfNotExistsParam.ERROR),
         )
-        src_obj._move(new_path.name, dest_dir._id)
+        assert dest_obj is None or if_exists == IfExistsParam.IGNORE
+        assert src_obj is not None or if_not_exists == IfNotExistsParam.IGNORE
+        if dest_obj is None and src_obj is not None:
+            # If dest_obj is not None, it means `if_exists='ignore'` and the destination already exists.
+            # If src_obj is None, it means `if_not_exists='ignore'` and the source doesn't exist.
+            # If dest_obj is None and src_obj is not None, then we can proceed with the move.
+            src_obj._move(new_path.name, dest_dir._id)
     def _prepare_dir_op(
         self,
@@ -815,7 +821,7 @@ class Catalog:
         drop_expected: Optional[type[SchemaObject]] = None,
         raise_if_exists: bool = False,
         raise_if_not_exists: bool = False,
-    ) -> tuple[Optional[SchemaObject], Optional[SchemaObject], Optional[SchemaObject]]:
+    ) -> tuple[Optional[SchemaObject], Optional[Dir], Optional[SchemaObject]]:
         """
         Validates paths and acquires locks needed for a directory operation, ie, add/drop/rename (add + drop) of a
         directory entry.
@@ -902,9 +908,10 @@ class Catalog:
             schema.Table.md['name'].astext == name,
             schema.Table.md['user'].astext == user,
         )
-        tbl_id = conn.execute(q).scalar_one_or_none()
-        if tbl_id is not None:
-            return self.get_table_by_id(tbl_id, version)
+        tbl_id = conn.execute(q).scalars().all()
+        assert len(tbl_id) <= 1, name
+        if len(tbl_id) == 1:
+            return self.get_table_by_id(tbl_id[0], version)
         return None
@@ -1084,7 +1091,7 @@ class Catalog:
         The metadata should be presented in standard "ancestor order", with the table being replicated at
         list position 0 and the (root) base table at list position -1.
         """
-        assert Env.get().in_xact
+        assert self.in_write_xact
         tbl_id = UUID(md[0].tbl_md.tbl_id)
@@ -1150,11 +1157,11 @@ class Catalog:
         # We need to do this at the end, since `existing_path` needs to first have a non-fragment table version in
         # order to be instantiated as a schema object.
         existing = self.get_table_by_id(tbl_id)
-        if existing is not None:
-            existing_path = Path.parse(existing._path(), allow_system_path=True)
-            if existing_path != path:
-                assert existing_path.is_system_path
-                self._move(existing_path, path)
+        assert existing is not None
+        existing_path = Path.parse(existing._path(), allow_system_path=True)
+        if existing_path != path:
+            assert existing_path.is_system_path
+            self._move(existing_path, path, IfExistsParam.ERROR, IfNotExistsParam.ERROR)
     def __ensure_system_dir_exists(self) -> Dir:
         system_path = Path.parse('_system', allow_system_path=True)

pixeltable/catalog/table.py CHANGED Viewed

@@ -77,6 +77,17 @@ class Table(SchemaObject):
         self._tbl_version = None
     def _move(self, new_name: str, new_dir_id: UUID) -> None:
+        old_name = self._name
+        old_dir_id = self._dir_id
+        cat = catalog.Catalog.get()
+        @cat.register_undo_action
+        def _() -> None:
+            # TODO: We should really be invalidating the Table instance and forcing a reload.
+            self._name = old_name
+            self._dir_id = old_dir_id
         super()._move(new_name, new_dir_id)
         conn = env.Env.get().conn
         stmt = sql.text(
@@ -625,7 +636,7 @@ class Table(SchemaObject):
                 - `'abort'`: an exception will be raised and the column will not be added.
                 - `'ignore'`: execution will continue and the column will be added. Any rows
                     with errors will have a `None` value for the column, with information about the error stored in the
-                    corresponding `tbl.col_name.errormsg` tbl.col_name.errortype` fields.
+                    corresponding `tbl.col_name.errormsg` and `tbl.col_name.errortype` fields.
             if_exists: Determines the behavior if the column already exists. Must be one of the following:
                 - `'error'`: an exception will be raised.
@@ -986,22 +997,28 @@ class Table(SchemaObject):
         Only `String` and `Image` columns are currently supported. Here's an example that uses a
         [CLIP embedding][pixeltable.functions.huggingface.clip] to index an image column:
+        ```
         >>> from pixeltable.functions.huggingface import clip
-        ... embedding_fn = clip.using(model_id='openai/clip-vit-base-patch32')
-        ... tbl.add_embedding_index(tbl.img, embedding=embedding_fn)
+        >>> embedding_fn = clip.using(model_id='openai/clip-vit-base-patch32')
+        >>> tbl.add_embedding_index(tbl.img, embedding=embedding_fn)
+        ```
-        Once the index is created, similiarity lookups can be performed using the `similarity` pseudo-function.
+        Once the index is created, similarity lookups can be performed using the `similarity` pseudo-function:
+        ```
         >>> reference_img = PIL.Image.open('my_image.jpg')
-        ... sim = tbl.img.similarity(reference_img)
-        ... tbl.select(tbl.img, sim).order_by(sim, asc=False).limit(5)
+        >>> sim = tbl.img.similarity(reference_img)
+        >>> tbl.select(tbl.img, sim).order_by(sim, asc=False).limit(5)
+        ```
         If the embedding UDF is a multimodal embedding (supporting more than one data type), then lookups may be
         performed using any of its supported types. In our example, CLIP supports both text and images, so we can
         also search for images using a text description:
+        ```
         >>> sim = tbl.img.similarity('a picture of a train')
-        ... tbl.select(tbl.img, sim).order_by(sim, asc=False).limit(5)
+        >>> tbl.select(tbl.img, sim).order_by(sim, asc=False).limit(5)
+        ```
         Args:
             column: The name of, or reference to, the column to be indexed; must be a `String` or `Image` column.
@@ -1032,9 +1049,9 @@ class Table(SchemaObject):
             Add an index to the `img` column of the table `my_table`:
             >>> from pixeltable.functions.huggingface import clip
-            ... tbl = pxt.get_table('my_table')
-            ... embedding_fn = clip.using(model_id='openai/clip-vit-base-patch32')
-            ... tbl.add_embedding_index(tbl.img, embedding=embedding_fn)
+            >>> tbl = pxt.get_table('my_table')
+            >>> embedding_fn = clip.using(model_id='openai/clip-vit-base-patch32')
+            >>> tbl.add_embedding_index(tbl.img, embedding=embedding_fn)
             Alternatively, the `img` column may be specified by name:
@@ -1328,7 +1345,8 @@ class Table(SchemaObject):
             on_error: Literal['abort', 'ignore'] = 'abort',
             print_stats: bool = False,
             **kwargs: Any,
-        )```
+        )
+        ```
         To insert just a single row, you can use the more concise syntax:
@@ -1338,7 +1356,8 @@ class Table(SchemaObject):
             on_error: Literal['abort', 'ignore'] = 'abort',
             print_stats: bool = False,
             **kwargs: Any
-        )```
+        )
+        ```
         Args:
             source: A data source from which data can be imported.
@@ -1459,8 +1478,8 @@ class Table(SchemaObject):
             the row with new `id` 3 (assuming this key does not exist):
             >>> tbl.update(
-                [{'id': 1, 'name': 'Alice', 'age': 30}, {'id': 3, 'name': 'Bob', 'age': 40}],
-                if_not_exists='insert')
+            ...     [{'id': 1, 'name': 'Alice', 'age': 30}, {'id': 3, 'name': 'Bob', 'age': 40}],
+            ...     if_not_exists='insert')
         """
         from pixeltable.catalog import Catalog

pixeltable/catalog/table_version.py CHANGED Viewed

@@ -96,6 +96,8 @@ class TableVersion:
     cols_by_name: dict[str, Column]
     # contains only columns visible in this version, both system and user
     cols_by_id: dict[int, Column]
+    # all indices defined on this table
+    all_idxs: dict[str, TableVersion.IndexInfo]
     # contains only actively maintained indices
     idxs_by_name: dict[str, TableVersion.IndexInfo]
@@ -129,6 +131,12 @@ class TableVersion:
         base_path: Optional[pxt.catalog.TableVersionPath] = None,
         base: Optional[TableVersionHandle] = None,
     ):
+        from pixeltable import exprs
+        from pixeltable.plan import SampleClause
+        from .table_version_handle import TableVersionHandle
+        from .table_version_path import TableVersionPath
         self.is_validated = True  # a freshly constructed instance is always valid
         self.is_initialized = False
         self.id = id
@@ -141,9 +149,6 @@ class TableVersion:
         self.store_tbl = None
         # mutable tables need their TableVersionPath for expr eval during updates
-        from .table_version_handle import TableVersionHandle
-        from .table_version_path import TableVersionPath
         if self.is_snapshot:
             self.path = None
         else:
@@ -153,9 +158,6 @@ class TableVersion:
             self.path = TableVersionPath(self_handle, base=base_path)
         # view-specific initialization
-        from pixeltable import exprs
-        from pixeltable.plan import SampleClause
         predicate_dict = None if self.view_md is None or self.view_md.predicate is None else self.view_md.predicate
         self.predicate = exprs.Expr.from_dict(predicate_dict) if predicate_dict is not None else None
         sample_dict = None if self.view_md is None or self.view_md.sample_clause is None else self.view_md.sample_clause
@@ -180,6 +182,7 @@ class TableVersion:
         self.cols = []
         self.cols_by_name = {}
         self.cols_by_id = {}
+        self.all_idxs = {}
         self.idxs_by_name = {}
         self.external_stores = {}
@@ -373,7 +376,7 @@ class TableVersion:
         cat._tbl_versions[tbl_version.id, tbl_version.effective_version] = tbl_version
         tbl_version.init()
         tbl_version.store_tbl.create()
-        tbl_version.store_tbl.ensure_columns_exist(col for col in tbl_version.cols if col.is_stored)
+        tbl_version.store_tbl.ensure_updated_schema()
         return tbl_version
     def delete_media(self, tbl_version: Optional[int] = None) -> None:
@@ -463,13 +466,17 @@ class TableVersion:
             idx_col = self._lookup_column(QColumnId(UUID(md.indexed_col_tbl_id), md.indexed_col_id))
             assert idx_col is not None
             idx = cls.from_dict(idx_col, md.init_args)
+            assert isinstance(idx, index.IndexBase)
+            val_col = next(col for col in self.cols if col.id == md.index_val_col_id)
+            undo_col = next(col for col in self.cols if col.id == md.index_val_undo_col_id)
+            idx_info = self.IndexInfo(id=md.id, name=md.name, idx=idx, col=idx_col, val_col=val_col, undo_col=undo_col)
+            self.all_idxs[md.name] = idx_info
             # fix up the sa column type of the index value and undo columns
             # we need to do this for all indices, not just those that are active in this TableVersion, to ensure we get
             # the correct SA schema in the StoreTable.
-            val_col = next(col for col in self.cols if col.id == md.index_val_col_id)
             val_col.sa_col_type = idx.index_sa_type()
-            undo_col = next(col for col in self.cols if col.id == md.index_val_undo_col_id)
             undo_col.sa_col_type = idx.index_sa_type()
             if not isinstance(idx, index.EmbeddingIndex):
                 # Historically, the intent has been not to store cellmd data, even for embedding indices. However,
@@ -501,9 +508,6 @@ class TableVersion:
                 assert md.indexed_col_id in self.cols_by_id
                 assert md.index_val_col_id in self.cols_by_id
                 assert md.index_val_undo_col_id in self.cols_by_id
-                idx_info = self.IndexInfo(
-                    id=md.id, name=md.name, idx=idx, col=idx_col, val_col=val_col, undo_col=undo_col
-                )
                 self.idxs_by_name[md.name] = idx_info
     def _lookup_column(self, id: QColumnId) -> Column | None:

pixeltable/dataframe.py CHANGED Viewed

@@ -1039,7 +1039,7 @@ class DataFrame:
             >>> df = book.order_by(t.price, asc=False).order_by(t.pages)
         """
         if self.sample_clause is not None:
-            raise excs.Error('group_by() cannot be used with sample()')
+            raise excs.Error('order_by() cannot be used with sample()')
         for e in expr_list:
             if not isinstance(e, exprs.Expr):
                 raise excs.Error(f'Invalid expression in order_by(): {e}')

pixeltable/env.py CHANGED Viewed

@@ -760,10 +760,12 @@ class Env:
     def __register_packages(self) -> None:
         """Declare optional packages that are utilized by some parts of the code."""
+        self.__register_package('accelerate')
         self.__register_package('anthropic')
         self.__register_package('azure.storage.blob', library_name='azure-storage-blob')
         self.__register_package('boto3')
         self.__register_package('datasets')
+        self.__register_package('diffusers')
         self.__register_package('fiftyone')
         self.__register_package('fireworks', library_name='fireworks-ai')
         self.__register_package('google.cloud.storage', library_name='google-cloud-storage')
@@ -771,6 +773,7 @@ class Env:
         self.__register_package('groq')
         self.__register_package('huggingface_hub', library_name='huggingface-hub')
         self.__register_package('label_studio_sdk', library_name='label-studio-sdk')
+        self.__register_package('librosa')
         self.__register_package('llama_cpp', library_name='llama-cpp-python')
         self.__register_package('mcp')
         self.__register_package('mistralai')
@@ -783,6 +786,7 @@ class Env:
         self.__register_package('replicate')
         self.__register_package('sentencepiece')
         self.__register_package('sentence_transformers', library_name='sentence-transformers')
+        self.__register_package('soundfile')
         self.__register_package('spacy')
         self.__register_package('tiktoken')
         self.__register_package('together')

pixeltable/exec/exec_context.py CHANGED Viewed

@@ -1,3 +1,4 @@
+import random
 from typing import Optional
 import sqlalchemy as sql
@@ -8,6 +9,17 @@ from pixeltable import exprs
 class ExecContext:
     """Class for execution runtime constants"""
+    row_builder: exprs.RowBuilder
+    profile: exprs.ExecProfile
+    show_pbar: bool
+    batch_size: int
+    num_rows: Optional[int]
+    conn: Optional[sql.engine.Connection]
+    pk_clause: Optional[list[sql.ClauseElement]]
+    num_computed_exprs: int
+    ignore_errors: bool
+    random_seed: int  # general-purpose source of randomness with execution scope
     def __init__(
         self,
         row_builder: exprs.RowBuilder,
@@ -23,8 +35,9 @@ class ExecContext:
         self.row_builder = row_builder
         self.profile = exprs.ExecProfile(row_builder)
         # num_rows is used to compute the total number of computed cells used for the progress bar
-        self.num_rows: Optional[int] = None
-        self.conn: Optional[sql.engine.Connection] = None  # if present, use this to execute SQL queries
+        self.num_rows = None
+        self.conn = None  # if present, use this to execute SQL queries
         self.pk_clause = pk_clause
         self.num_computed_exprs = num_computed_exprs
         self.ignore_errors = ignore_errors
+        self.random_seed = random.randint(0, 1 << 63)

pixeltable/exec/sql_node.py CHANGED Viewed

@@ -648,7 +648,6 @@ class SqlSampleNode(SqlNode):
         )
         self.stratify_exprs = stratify_exprs
         self.sample_clause = sample_clause
-        assert isinstance(self.sample_clause.seed, int)
     @classmethod
     def key_sql_expr(cls, seed: sql.ColumnElement, sql_cols: Iterable[sql.ColumnElement]) -> sql.ColumnElement:
@@ -667,7 +666,9 @@ class SqlSampleNode(SqlNode):
         """Create an expression for randomly ordering rows with a given seed"""
         rowid_cols = [*cte.c[-self.pk_count : -1]]  # exclude the version column
         assert len(rowid_cols) > 0
-        return self.key_sql_expr(sql.literal_column(str(self.sample_clause.seed)), rowid_cols)
+        # If seed is not set in the sample clause, use the random seed given by the execution context
+        seed = self.sample_clause.seed if self.sample_clause.seed is not None else self.ctx.random_seed
+        return self.key_sql_expr(sql.literal_column(str(seed)), rowid_cols)
     def _create_stmt(self) -> sql.Select:
         from pixeltable.plan import SampleClause

pixeltable 0.4.17__py3-none-any.whl → 0.4.18__py3-none-any.whl

Potentially problematic release.

pixeltable 0.4.17py3-none-any.whl → 0.4.18py3-none-any.whl