PyPI - pixeltable - Versions diffs - 0.2.4__py3-none-any.whl → 0.2.6__py3-none-any.whl - Mend

pixeltable 0.2.4py3-none-any.whl → 0.2.6py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.

This version of pixeltable might be problematic. Click here for more details.

Files changed (99) hide show

pixeltable/__init__.py +18 -9
pixeltable/__version__.py +3 -0
pixeltable/catalog/column.py +31 -50
pixeltable/catalog/insertable_table.py +7 -6
pixeltable/catalog/table.py +171 -57
pixeltable/catalog/table_version.py +417 -140
pixeltable/catalog/table_version_path.py +2 -2
pixeltable/dataframe.py +239 -121
pixeltable/env.py +82 -16
pixeltable/exec/__init__.py +2 -1
pixeltable/exec/cache_prefetch_node.py +1 -1
pixeltable/exec/data_row_batch.py +6 -7
pixeltable/exec/expr_eval_node.py +28 -28
pixeltable/exec/in_memory_data_node.py +11 -7
pixeltable/exec/sql_scan_node.py +7 -6
pixeltable/exprs/__init__.py +4 -3
pixeltable/exprs/column_ref.py +9 -0
pixeltable/exprs/comparison.py +3 -3
pixeltable/exprs/data_row.py +5 -1
pixeltable/exprs/expr.py +15 -7
pixeltable/exprs/function_call.py +17 -15
pixeltable/exprs/image_member_access.py +9 -28
pixeltable/exprs/in_predicate.py +96 -0
pixeltable/exprs/inline_array.py +13 -11
pixeltable/exprs/inline_dict.py +15 -13
pixeltable/exprs/literal.py +16 -4
pixeltable/exprs/row_builder.py +15 -41
pixeltable/exprs/similarity_expr.py +65 -0
pixeltable/ext/__init__.py +5 -0
pixeltable/ext/functions/yolox.py +92 -0
pixeltable/func/__init__.py +0 -2
pixeltable/func/aggregate_function.py +18 -15
pixeltable/func/callable_function.py +57 -13
pixeltable/func/expr_template_function.py +20 -3
pixeltable/func/function.py +35 -4
pixeltable/func/globals.py +24 -14
pixeltable/func/signature.py +23 -27
pixeltable/func/udf.py +13 -12
pixeltable/functions/__init__.py +8 -8
pixeltable/functions/eval.py +7 -8
pixeltable/functions/huggingface.py +64 -17
pixeltable/functions/openai.py +36 -3
pixeltable/functions/pil/image.py +61 -64
pixeltable/functions/together.py +21 -0
pixeltable/functions/util.py +11 -0
pixeltable/globals.py +425 -0
pixeltable/index/__init__.py +2 -0
pixeltable/index/base.py +51 -0
pixeltable/index/embedding_index.py +168 -0
pixeltable/io/__init__.py +3 -0
pixeltable/{utils → io}/hf_datasets.py +48 -17
pixeltable/io/pandas.py +148 -0
pixeltable/{utils → io}/parquet.py +58 -33
pixeltable/iterators/__init__.py +1 -1
pixeltable/iterators/base.py +4 -0
pixeltable/iterators/document.py +218 -97
pixeltable/iterators/video.py +8 -9
pixeltable/metadata/__init__.py +7 -3
pixeltable/metadata/converters/convert_12.py +3 -0
pixeltable/metadata/converters/convert_13.py +41 -0
pixeltable/metadata/schema.py +45 -22
pixeltable/plan.py +15 -51
pixeltable/store.py +38 -41
pixeltable/tool/create_test_db_dump.py +39 -4
pixeltable/type_system.py +47 -96
pixeltable/utils/documents.py +42 -12
pixeltable/utils/http_server.py +70 -0
{pixeltable-0.2.4.dist-info → pixeltable-0.2.6.dist-info}/METADATA +14 -10
pixeltable-0.2.6.dist-info/RECORD +119 -0
{pixeltable-0.2.4.dist-info → pixeltable-0.2.6.dist-info}/WHEEL +1 -1
pixeltable/client.py +0 -604
pixeltable/exprs/image_similarity_predicate.py +0 -58
pixeltable/func/batched_function.py +0 -53
pixeltable/tests/conftest.py +0 -177
pixeltable/tests/functions/test_fireworks.py +0 -42
pixeltable/tests/functions/test_functions.py +0 -60
pixeltable/tests/functions/test_huggingface.py +0 -158
pixeltable/tests/functions/test_openai.py +0 -152
pixeltable/tests/functions/test_together.py +0 -111
pixeltable/tests/test_audio.py +0 -65
pixeltable/tests/test_catalog.py +0 -27
pixeltable/tests/test_client.py +0 -21
pixeltable/tests/test_component_view.py +0 -370
pixeltable/tests/test_dataframe.py +0 -439
pixeltable/tests/test_dirs.py +0 -107
pixeltable/tests/test_document.py +0 -120
pixeltable/tests/test_exprs.py +0 -805
pixeltable/tests/test_function.py +0 -324
pixeltable/tests/test_migration.py +0 -43
pixeltable/tests/test_nos.py +0 -54
pixeltable/tests/test_snapshot.py +0 -208
pixeltable/tests/test_table.py +0 -1267
pixeltable/tests/test_transactional_directory.py +0 -42
pixeltable/tests/test_types.py +0 -22
pixeltable/tests/test_video.py +0 -159
pixeltable/tests/test_view.py +0 -530
pixeltable/tests/utils.py +0 -408
pixeltable-0.2.4.dist-info/RECORD +0 -132
{pixeltable-0.2.4.dist-info → pixeltable-0.2.6.dist-info}/LICENSE +0 -0

pixeltable/metadata/schema.py CHANGED Viewed

@@ -1,4 +1,4 @@
-from typing import Optional, List, Dict, get_type_hints, Type, Any, TypeVar, Tuple, Union
+from typing import Optional, List, get_type_hints, Type, Any, TypeVar, Tuple, Union
 import platform
 import uuid
 import dataclasses
@@ -71,16 +71,43 @@ class Dir(Base):
 @dataclasses.dataclass
-class ColumnHistory:
+class ColumnMd:
     """
-    Records when a column was added/dropped, which is needed to GC unreachable storage columns
-    (a column that was added after table snapshot n and dropped before table snapshot n+1 can be removed
-    from the stored table).
-    One record per column (across all schema versions).
+    Records the non-versioned metadata of a column.
+    - immutable attributes: type, primary key, etc.
+    - when a column was added/dropped, which is needed to GC unreachable storage columns
+      (a column that was added after table snapshot n and dropped before table snapshot n+1 can be removed
+      from the stored table).
      """
-    col_id: int
+    id: int
     schema_version_add: int
     schema_version_drop: Optional[int]
+    col_type: dict
+    # if True, is part of the primary key
+    is_pk: bool
+    # if set, this is a computed column
+    value_expr: Optional[dict]
+    # if True, the column is present in the stored table
+    stored: Optional[bool]
+@dataclasses.dataclass
+class IndexMd:
+    """
+    Metadata needed to instantiate an EmbeddingIndex
+    """
+    id: int
+    name: str
+    indexed_col_id: int  # column being indexed
+    index_val_col_id: int  # column holding the values to be indexed
+    index_val_undo_col_id: int  # column holding index values for deleted rows
+    schema_version_add: int
+    schema_version_drop: Optional[int]
+    class_fqn: str
+    init_args: dict[str, Any]
 @dataclasses.dataclass
@@ -91,13 +118,13 @@ class ViewMd:
     base_versions: List[Tuple[str, Optional[int]]]
     # filter predicate applied to the base table; view-only
-    predicate: Optional[Dict[str, Any]]
+    predicate: Optional[dict[str, Any]]
     # ComponentIterator subclass; only for component views
     iterator_class_fqn: Optional[str]
     # args to pass to the iterator class constructor; only for component views
-    iterator_args: Optional[Dict[str, Any]]
+    iterator_args: Optional[dict[str, Any]]
 @dataclasses.dataclass
@@ -109,15 +136,15 @@ class TableMd:
     # each version has a corresponding schema version (current_version >= current_schema_version)
     current_schema_version: int
-    # used to assign Column.id
-    next_col_id: int
+    next_col_id: int  # used to assign Column.id
+    next_idx_id: int  # used to assign IndexMd.id
     # - used to assign the rowid column in the storage table
     # - every row is assigned a unique and immutable rowid on insertion
     next_row_id: int
-    column_history: Dict[int, ColumnHistory]  # col_id -> ColumnHistory
+    column_md: dict[int, ColumnMd]  # col_id -> ColumnMd
+    index_md: dict[int, IndexMd]  # index_id -> IndexMd
     view_md: Optional[ViewMd]
@@ -155,24 +182,20 @@ class TableVersion(Base):
 @dataclasses.dataclass
 class SchemaColumn:
     """
-    Records the logical (user-visible) schema of a table.
-    Contains the full set of columns for each new schema version: one record per (column x schema version).
+    Records the versioned metadata of a column.
     """
     pos: int
     name: str
-    col_type: dict
-    is_pk: bool
-    value_expr: Optional[dict]
-    stored: Optional[bool]
-    # if True, creates vector index for this column
-    is_indexed: bool
 @dataclasses.dataclass
 class TableSchemaVersionMd:
+    """
+    Records all versioned table metadata.
+    """
     schema_version: int
     preceding_schema_version: Optional[int]
-    columns: Dict[int, SchemaColumn]  # col_id -> SchemaColumn
+    columns: dict[int, SchemaColumn]  # col_id -> SchemaColumn
     num_retained_versions: int
     comment: str

pixeltable/plan.py CHANGED Viewed

@@ -60,24 +60,10 @@ class Analyzer:
         # filter predicate applied to output rows of the SQL scan
         self.filter: Optional[exprs.Predicate] = None
         # not executable
-        self.similarity_clause: Optional[exprs.ImageSimilarityPredicate] = None
+        #self.similarity_clause: Optional[exprs.ImageSimilarityPredicate] = None
         if where_clause is not None:
             where_clause_conjuncts, self.filter = where_clause.split_conjuncts(lambda e: e.sql_expr() is not None)
             self.sql_where_clause = exprs.CompoundPredicate.make_conjunction(where_clause_conjuncts)
-            if self.filter is not None:
-                similarity_clauses, self.filter = self.filter.split_conjuncts(
-                    lambda e: isinstance(e, exprs.ImageSimilarityPredicate))
-                if len(similarity_clauses) > 1:
-                    raise excs.Error(f'More than one nearest() not supported')
-                if len(similarity_clauses) == 1:
-                    if len(self.order_by_clause) > 0:
-                        raise excs.Error((
-                            f'nearest() returns results in order of proximity and cannot be used in conjunction with '
-                            f'order_by()'))
-                    self.similarity_clause = similarity_clauses[0]
-                    img_col = self.similarity_clause.img_col_ref.col
-                    if not img_col.is_indexed:
-                        raise excs.Error(f'nearest() not available for unindexed column {img_col.name}')
         # all exprs that are evaluated in Python; not executable
         self.all_exprs = self.select_list.copy()
@@ -203,8 +189,6 @@ class Planner:
         refd_tbl_ids: Set[UUID] = set()
         if where_clause is not None:
             analyzer = cls.analyze(tbl, where_clause)
-            if analyzer.similarity_clause is not None:
-                raise excs.Error('nearest() cannot be used with count()')
             if analyzer.filter is not None:
                 raise excs.Error(f'Filter {analyzer.filter} not expressible in SQL')
             clause_element = analyzer.sql_where_clause.sql_expr()
@@ -220,18 +204,11 @@ class Planner:
     ) -> exec.ExecNode:
         """Creates a plan for TableVersion.insert()"""
         assert not tbl.is_view()
-        # things we need to materialize:
-        # 1. stored_cols: all cols we need to store, incl computed cols (and indices)
+        # stored_cols: all cols we need to store, incl computed cols (and indices)
         stored_cols = [c for c in tbl.cols if c.is_stored]
         assert len(stored_cols) > 0
-        # 2. values to insert into indices
-        indexed_cols = [c for c in tbl.cols if c.is_indexed]
-        index_info: List[Tuple[catalog.Column, func.Function]] = []
-        if len(indexed_cols) > 0:
-            from pixeltable.functions.nos.image_embedding import openai_clip
-            index_info = [(c, openai_clip) for c in tbl.cols if c.is_indexed]
-        row_builder = exprs.RowBuilder([], stored_cols, index_info, [])
+        row_builder = exprs.RowBuilder([], stored_cols, [])
         # create InMemoryDataNode for 'rows'
         stored_col_info = row_builder.output_slot_idxs()
@@ -260,7 +237,7 @@ class Planner:
     @classmethod
     def create_update_plan(
             cls, tbl: catalog.TableVersionPath,
-            update_targets: List[Tuple[catalog.Column, exprs.Expr]],
+            update_targets: dict[catalog.Column, exprs.Expr],
             recompute_targets: List[catalog.Column],
             where_clause: Optional[exprs.Predicate], cascade: bool
     ) -> Tuple[exec.ExecNode, List[str], List[catalog.Column]]:
@@ -279,7 +256,7 @@ class Planner:
         # retrieve all stored cols and all target exprs
         assert isinstance(tbl, catalog.TableVersionPath)
         target = tbl.tbl_version  # the one we need to update
-        updated_cols = [col for col, _ in update_targets]
+        updated_cols = list(update_targets.keys())
         if len(recompute_targets) > 0:
             recomputed_cols = recompute_targets.copy()
         else:
@@ -291,12 +268,12 @@ class Planner:
             col for col in target.cols if col.is_stored and not col in updated_cols and not col in recomputed_base_cols
         ]
         select_list = [exprs.ColumnRef(col) for col in copied_cols]
-        select_list.extend([expr for _, expr in update_targets])
+        select_list.extend(update_targets.values())
         recomputed_exprs = \
             [c.value_expr.copy().resolve_computed_cols(resolve_cols=recomputed_base_cols) for c in recomputed_base_cols]
         # recomputed cols reference the new values of the updated cols
-        for col, e in update_targets:
+        for col, e in update_targets.items():
             exprs.Expr.list_substitute(recomputed_exprs, exprs.ColumnRef(col), e)
         select_list.extend(recomputed_exprs)
@@ -375,16 +352,10 @@ class Planner:
         #   the store
         target = view.tbl_version  # the one we need to populate
         stored_cols = [c for c in target.cols if c.is_stored and (c.is_computed or target.is_iterator_column(c))]
-        # 2. index values
-        indexed_cols = [c for c in target.cols if c.is_indexed]
-        index_info: List[Tuple[catalog.Column, func.Function]] = []
-        if len(indexed_cols) > 0:
-            from pixeltable.functions.nos.image_embedding import openai_clip
-            index_info = [(c, openai_clip) for c in target.cols if c.is_indexed]
-        # 3. for component views: iterator args
+        # 2. for component views: iterator args
         iterator_args = [target.iterator_args] if target.iterator_args is not None else []
-        row_builder = exprs.RowBuilder(iterator_args, stored_cols, index_info, [])
+        row_builder = exprs.RowBuilder(iterator_args, stored_cols, [])
         # execution plan:
         # 1. materialize exprs computed from the base that are needed for stored view columns
@@ -548,7 +519,7 @@ class Planner:
         analyzer = Analyzer(
             tbl, select_list, where_clause=where_clause, group_by_clause=group_by_clause,
             order_by_clause=order_by_clause)
-        row_builder = exprs.RowBuilder(analyzer.all_exprs, [], [], analyzer.sql_exprs)
+        row_builder = exprs.RowBuilder(analyzer.all_exprs, [], analyzer.sql_exprs)
         analyzer.finalize(row_builder)
         # select_list: we need to materialize everything that's been collected
@@ -582,7 +553,7 @@ class Planner:
         sql_select_list = analyzer.sql_exprs.copy()
         plan = exec.SqlScanNode(
             tbl, row_builder, select_list=sql_select_list, where_clause=analyzer.sql_where_clause,
-            filter=analyzer.filter, similarity_clause=analyzer.similarity_clause, order_by_items=order_by_items,
+            filter=analyzer.filter, order_by_items=order_by_items,
             limit=sql_limit, set_pk=with_pk, exact_version_only=exact_version_only)
         plan = cls._insert_prefetch_node(tbl.tbl_version.id, analyzer.select_list, row_builder, plan)
@@ -627,21 +598,15 @@ class Planner:
     @classmethod
     def create_add_column_plan(
             cls, tbl: catalog.TableVersionPath, col: catalog.Column
-    ) -> Tuple[exec.ExecNode, Optional[int], Optional[int]]:
+    ) -> Tuple[exec.ExecNode, Optional[int]]:
         """Creates a plan for InsertableTable.add_column()
         Returns:
             plan: the plan to execute
-            ctx: the context to use for the plan
             value_expr slot idx for the plan output (for computed cols)
-            embedding slot idx for the plan output (for indexed image cols)
         """
         assert isinstance(tbl, catalog.TableVersionPath)
         index_info: List[Tuple[catalog.Column, func.Function]] = []
-        if col.is_indexed:
-            from pixeltable.functions.nos.image_embedding import openai_clip
-            index_info = [(col, openai_clip)]
-        row_builder = exprs.RowBuilder(
-            output_exprs=[], columns=[col], indices=index_info, input_exprs=[])
+        row_builder = exprs.RowBuilder(output_exprs=[], columns=[col], input_exprs=[])
         analyzer = Analyzer(tbl, row_builder.default_eval_ctx.target_exprs)
         plan = cls._create_query_plan(tbl, row_builder=row_builder, analyzer=analyzer, with_pk=True)
         plan.ctx.batch_size = 16
@@ -651,6 +616,5 @@ class Planner:
         # we want to flush images
         if col.is_computed and col.is_stored and col.col_type.is_image_type():
             plan.set_stored_img_cols(row_builder.output_slot_idxs())
-        value_expr_slot_idx: Optional[int] = row_builder.output_slot_idxs()[0].slot_idx if col.is_computed else None
-        embedding_slot_idx: Optional[int] = row_builder.index_slot_idxs()[0].slot_idx if col.is_indexed else None
-        return plan, value_expr_slot_idx, embedding_slot_idx
+        value_expr_slot_idx = row_builder.output_slot_idxs()[0].slot_idx if col.is_computed else None
+        return plan, value_expr_slot_idx

pixeltable/store.py CHANGED Viewed

@@ -38,7 +38,7 @@ class StoreBase:
         self.tbl_version = tbl_version
         self.sa_md = sql.MetaData()
         self.sa_tbl: Optional[sql.Table] = None
-        self._create_sa_tbl()
+        self.create_sa_tbl()
     def pk_columns(self) -> List[sql.Column]:
         return self._pk_columns
@@ -62,7 +62,7 @@ class StoreBase:
         return [*rowid_cols, self.v_min_col, self.v_max_col]
-    def _create_sa_tbl(self) -> None:
+    def create_sa_tbl(self) -> None:
         """Create self.sa_tbl from self.tbl_version."""
         system_cols = self._create_system_columns()
         all_cols = system_cols.copy()
@@ -76,9 +76,6 @@ class StoreBase:
                 all_cols.append(col.sa_errormsg_col)
                 all_cols.append(col.sa_errortype_col)
-            if col.is_indexed:
-                all_cols.append(col.sa_idx_col)
             # we create an index for:
             # - scalar columns (except for strings, because long strings can't be used for B-tree indices)
             # - non-computed video and image columns (they will contain external paths/urls that users might want to
@@ -145,8 +142,8 @@ class StoreBase:
         """Move tmp media files that we generated to a permanent location"""
         for c in media_cols:
             for table_row in table_rows:
-                file_url = table_row[c.storage_name()]
-                table_row[c.storage_name()] = self._move_tmp_media_file(file_url, c, v_min)
+                file_url = table_row[c.store_name()]
+                table_row[c.store_name()] = self._move_tmp_media_file(file_url, c, v_min)
     def _create_table_row(
             self, input_row: exprs.DataRow, row_builder: exprs.RowBuilder, media_cols: List[catalog.Column],
@@ -168,16 +165,19 @@ class StoreBase:
         return table_row, num_excs
-    def count(self) -> None:
+    def count(self, conn: Optional[sql.engine.Connection] = None) -> int:
         """Return the number of rows visible in self.tbl_version"""
         stmt = sql.select(sql.func.count('*'))\
             .select_from(self.sa_tbl)\
             .where(self.v_min_col <= self.tbl_version.version)\
             .where(self.v_max_col > self.tbl_version.version)
-        with env.Env.get().engine.begin() as conn:
+        if conn is None:
+            with env.Env.get().engine.connect() as conn:
+                result = conn.execute(stmt).scalar_one()
+        else:
             result = conn.execute(stmt).scalar_one()
-            assert isinstance(result, int)
-            return result
+        assert isinstance(result, int)
+        return result
     def create(self, conn: sql.engine.Connection) -> None:
         self.sa_md.create_all(bind=conn)
@@ -193,38 +193,35 @@ class StoreBase:
         message).
         """
         assert col.is_stored
-        stmt = sql.text(f'ALTER TABLE {self._storage_name()} ADD COLUMN {col.storage_name()} {col.col_type.to_sql()}')
+        col_type_str = col.get_sa_col_type().compile(dialect=conn.dialect)
+        stmt = sql.text(f'ALTER TABLE {self._storage_name()} ADD COLUMN {col.store_name()} {col_type_str} NULL')
         log_stmt(_logger, stmt)
         conn.execute(stmt)
-        added_storage_cols = [col.storage_name()]
+        added_storage_cols = [col.store_name()]
         if col.records_errors:
             # we also need to create the errormsg and errortype storage cols
             stmt = (f'ALTER TABLE {self._storage_name()} '
-                    f'ADD COLUMN {col.errormsg_storage_name()} {StringType().to_sql()} DEFAULT NULL')
+                    f'ADD COLUMN {col.errormsg_store_name()} VARCHAR DEFAULT NULL')
             conn.execute(sql.text(stmt))
             stmt = (f'ALTER TABLE {self._storage_name()} '
-                    f'ADD COLUMN {col.errortype_storage_name()} {StringType().to_sql()} DEFAULT NULL')
+                    f'ADD COLUMN {col.errortype_store_name()} VARCHAR DEFAULT NULL')
             conn.execute(sql.text(stmt))
-        added_storage_cols.extend([col.errormsg_storage_name(), col.errortype_storage_name()])
-        self._create_sa_tbl()
+            added_storage_cols.extend([col.errormsg_store_name(), col.errortype_store_name()])
+        self.create_sa_tbl()
         _logger.info(f'Added columns {added_storage_cols} to storage table {self._storage_name()}')
-    def drop_column(self, col: Optional[catalog.Column] = None, conn: Optional[sql.engine.Connection] = None) -> None:
-        """Re-create self.sa_tbl and drop column, if one is given"""
-        if col is not None:
-            assert conn is not None
-            stmt = f'ALTER TABLE {self._storage_name()} DROP COLUMN {col.storage_name()}'
+    def drop_column(self, col: catalog.Column, conn: sql.engine.Connection) -> None:
+        """Execute Alter Table Drop Column statement"""
+        stmt = f'ALTER TABLE {self._storage_name()} DROP COLUMN {col.store_name()}'
+        conn.execute(sql.text(stmt))
+        if col.records_errors:
+            stmt = f'ALTER TABLE {self._storage_name()} DROP COLUMN {col.errormsg_store_name()}'
+            conn.execute(sql.text(stmt))
+            stmt = f'ALTER TABLE {self._storage_name()} DROP COLUMN {col.errortype_store_name()}'
             conn.execute(sql.text(stmt))
-            if col.records_errors:
-                stmt = f'ALTER TABLE {self._storage_name()} DROP COLUMN {col.errormsg_storage_name()}'
-                conn.execute(sql.text(stmt))
-                stmt = f'ALTER TABLE {self._storage_name()} DROP COLUMN {col.errortype_storage_name()}'
-                conn.execute(sql.text(stmt))
-        self._create_sa_tbl()
     def load_column(
-            self, col: catalog.Column, exec_plan: ExecNode, value_expr_slot_idx: int, embedding_slot_idx: int,
-            conn: sql.engine.Connection
+            self, col: catalog.Column, exec_plan: ExecNode, value_expr_slot_idx: int, conn: sql.engine.Connection
     ) -> int:
         """Update store column of a computed column with values produced by an execution plan
@@ -253,18 +250,11 @@ class StoreBase:
                             col.sa_errormsg_col: error_msg
                         }
                     else:
-                        val = result_row.get_stored_val(value_expr_slot_idx)
+                        val = result_row.get_stored_val(value_expr_slot_idx, col.sa_col.type)
                         if col.col_type.is_media_type():
                             val = self._move_tmp_media_file(val, col, result_row.pk[-1])
                         values_dict = {col.sa_col: val}
-                if col.is_indexed:
-                    # TODO: deal with exceptions
-                    assert not result_row.has_exc(embedding_slot_idx)
-                    # don't use get_stored_val() here, we need to pass the ndarray
-                    embedding = result_row[embedding_slot_idx]
-                    values_dict[col.sa_index_col] = embedding
                 update_stmt = sql.update(self.sa_tbl).values(values_dict)
                 for pk_col, pk_val in zip(self.pk_columns(), result_row.pk):
                     update_stmt = update_stmt.where(pk_col == pk_val)
@@ -337,6 +327,7 @@ class StoreBase:
             self, current_version: int, base_versions: List[Optional[int]], match_on_vmin: bool,
             where_clause: Optional[sql.ClauseElement], conn: sql.engine.Connection) -> int:
         """Mark rows as deleted that are live and were created prior to current_version.
+        Also: populate the undo columns
         Args:
             base_versions: if non-None, join only to base rows that were created at that version,
                 otherwise join to rows that are live in the base's current version (which is distinct from the
@@ -354,8 +345,14 @@ class StoreBase:
         rowid_join_clause = self._rowid_join_predicate()
         base_versions_clause = sql.true() if len(base_versions) == 0 \
             else self.base._versions_clause(base_versions, match_on_vmin)
+        set_clause = {self.v_max_col: current_version}
+        for index_info in self.tbl_version.idxs_by_name.values():
+            # copy value column to undo column
+            set_clause[index_info.undo_col.sa_col] = index_info.val_col.sa_col
+            # set value column to NULL
+            set_clause[index_info.val_col.sa_col] = None
         stmt = sql.update(self.sa_tbl) \
-            .values({self.v_max_col: current_version}) \
+            .values(set_clause) \
             .where(where_clause) \
             .where(rowid_join_clause) \
             .where(base_versions_clause)
@@ -416,8 +413,8 @@ class StoreComponentView(StoreView):
         self.rowid_cols.append(self.pos_col)
         return self.rowid_cols
-    def _create_sa_tbl(self) -> None:
-        super()._create_sa_tbl()
+    def create_sa_tbl(self) -> None:
+        super().create_sa_tbl()
         # we need to fix up the 'pos' column in TableVersion
         self.tbl_version.cols_by_name['pos'].sa_col = self.pos_col

pixeltable/tool/create_test_db_dump.py CHANGED Viewed

@@ -11,6 +11,7 @@ import toml
 import pixeltable as pxt
 import pixeltable.metadata as metadata
 from pixeltable.env import Env
+from pixeltable.func import Batch
 from pixeltable.type_system import \
     StringType, IntType, FloatType, BoolType, TimestampType, JsonType
@@ -29,9 +30,7 @@ class Dumper:
         os.environ['PIXELTABLE_DB'] = db_name
         os.environ['PIXELTABLE_PGDATA'] = str(shared_home / 'pgdata')
-        Env.get().set_up(reinit_db=True)
-        self.cl = pxt.Client()
-        self.cl.logging(level=logging.DEBUG, to_stdout=True)
+        Env.get().configure_logging(level=logging.DEBUG, to_stdout=True)
     def dump_db(self) -> None:
         md_version = metadata.VERSION
@@ -76,8 +75,18 @@ class Dumper:
             'c6': JsonType(nullable=False),
             'c7': JsonType(nullable=False),
         }
-        t = self.cl.create_table('sample_table', schema, primary_key='c2')
+        t = pxt.create_table('sample_table', schema, primary_key='c2')
+        # Add columns for InlineArray and InlineDict
         t.add_column(c8=[[1, 2, 3], [4, 5, 6]])
+        t.add_column(c9=[['a', 'b', 'c'], ['d', 'e', 'f']])
+        t.add_column(c10=[t.c1, [t.c1n, t.c2]])
+        t.add_column(c11={'int': 22, 'dict': {'key': 'val'}, 'expr': t.c1})
+        # InPredicate
+        t.add_column(isin_1=t.c1.isin(['test string 1', 'test string 2', 'test string 3']))
+        t.add_column(isin_2=t.c2.isin([1, 2, 3, 4, 5]))
+        t.add_column(isin_3=t.c2.isin(t.c6.f5))
         # Add columns for .astype converters to ensure they're persisted properly
         t.add_column(c2_as_float=t.c2.astype(FloatType()))
@@ -136,6 +145,32 @@ class Dumper:
             for i in range(num_rows)
         ]
         t.insert(rows)
+        pxt.create_dir('views')
+        v = pxt.create_view('views.sample_view', t, filter=(t.c2 < 50))
+        _ = pxt.create_view('views.sample_snapshot', t, filter=(t.c2 >= 75), is_snapshot=True)
+        e = pxt.create_view('views.empty_view', t, filter=t.c2 == 4171780)
+        assert e.count() == 0
+        # Computed column using a library function
+        v['str_format'] = pxt.functions.string.str_format('{0} {key}', t.c1, key=t.c1)
+        # Computed column using a bespoke stored udf
+        v['test_udf'] = test_udf_stored(t.c2)
+        # Computed column using a batched function
+        # (apply this to the empty view, since it's a "heavyweight" function)
+        e['batched'] = pxt.functions.huggingface.clip_text(t.c1, model_id='openai/clip-vit-base-patch32')
+        # computed column using a stored batched function
+        v['test_udf_batched'] = test_udf_stored_batched(t.c1, upper=False)
+        # astype
+        v['astype'] = t.c1.astype(pxt.FloatType())
+@pxt.udf(_force_stored=True)
+def test_udf_stored(n: int) -> int:
+    return n + 1
+@pxt.udf(batch_size=4, _force_stored=True)
+def test_udf_stored_batched(strings: Batch[str], *, upper: bool = True) -> Batch[str]:
+    return [string.upper() if upper else string.lower() for string in strings]
 def main() -> None:

pixeltable 0.2.4__py3-none-any.whl → 0.2.6__py3-none-any.whl

Potentially problematic release.

pixeltable 0.2.4py3-none-any.whl → 0.2.6py3-none-any.whl