PyPI - pixeltable - Versions diffs - 0.3.6__py3-none-any.whl → 0.3.8__py3-none-any.whl - Mend

pixeltable 0.3.6py3-none-any.whl → 0.3.8py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.

This version of pixeltable might be problematic. Click here for more details.

Files changed (127) hide show

pixeltable/__init__.py +5 -3
pixeltable/__version__.py +2 -2
pixeltable/catalog/__init__.py +1 -0
pixeltable/catalog/catalog.py +335 -128
pixeltable/catalog/column.py +22 -5
pixeltable/catalog/dir.py +19 -6
pixeltable/catalog/insertable_table.py +34 -37
pixeltable/catalog/named_function.py +0 -4
pixeltable/catalog/schema_object.py +28 -42
pixeltable/catalog/table.py +193 -158
pixeltable/catalog/table_version.py +191 -232
pixeltable/catalog/table_version_handle.py +50 -0
pixeltable/catalog/table_version_path.py +49 -33
pixeltable/catalog/view.py +56 -96
pixeltable/config.py +103 -0
pixeltable/dataframe.py +89 -89
pixeltable/env.py +98 -168
pixeltable/exec/aggregation_node.py +5 -4
pixeltable/exec/cache_prefetch_node.py +1 -1
pixeltable/exec/component_iteration_node.py +13 -9
pixeltable/exec/data_row_batch.py +3 -3
pixeltable/exec/exec_context.py +0 -4
pixeltable/exec/exec_node.py +3 -2
pixeltable/exec/expr_eval/schedulers.py +2 -1
pixeltable/exec/in_memory_data_node.py +9 -4
pixeltable/exec/row_update_node.py +1 -2
pixeltable/exec/sql_node.py +20 -16
pixeltable/exprs/__init__.py +2 -0
pixeltable/exprs/arithmetic_expr.py +7 -11
pixeltable/exprs/array_slice.py +1 -1
pixeltable/exprs/column_property_ref.py +3 -3
pixeltable/exprs/column_ref.py +12 -13
pixeltable/exprs/comparison.py +3 -6
pixeltable/exprs/compound_predicate.py +4 -4
pixeltable/exprs/expr.py +31 -22
pixeltable/exprs/expr_dict.py +3 -3
pixeltable/exprs/expr_set.py +1 -1
pixeltable/exprs/function_call.py +110 -80
pixeltable/exprs/globals.py +3 -3
pixeltable/exprs/in_predicate.py +1 -1
pixeltable/exprs/inline_expr.py +3 -3
pixeltable/exprs/is_null.py +1 -1
pixeltable/exprs/json_mapper.py +2 -2
pixeltable/exprs/json_path.py +17 -10
pixeltable/exprs/literal.py +1 -1
pixeltable/exprs/method_ref.py +2 -2
pixeltable/exprs/row_builder.py +8 -17
pixeltable/exprs/rowid_ref.py +21 -10
pixeltable/exprs/similarity_expr.py +5 -5
pixeltable/exprs/sql_element_cache.py +1 -1
pixeltable/exprs/type_cast.py +2 -3
pixeltable/exprs/variable.py +2 -2
pixeltable/ext/__init__.py +2 -0
pixeltable/ext/functions/__init__.py +2 -0
pixeltable/ext/functions/yolox.py +3 -3
pixeltable/func/__init__.py +3 -1
pixeltable/func/aggregate_function.py +9 -9
pixeltable/func/callable_function.py +3 -4
pixeltable/func/expr_template_function.py +6 -16
pixeltable/func/function.py +48 -14
pixeltable/func/function_registry.py +1 -3
pixeltable/func/query_template_function.py +5 -12
pixeltable/func/signature.py +23 -22
pixeltable/func/tools.py +3 -3
pixeltable/func/udf.py +6 -4
pixeltable/functions/__init__.py +2 -0
pixeltable/functions/fireworks.py +7 -4
pixeltable/functions/globals.py +4 -5
pixeltable/functions/huggingface.py +1 -5
pixeltable/functions/image.py +17 -7
pixeltable/functions/llama_cpp.py +1 -1
pixeltable/functions/mistralai.py +1 -1
pixeltable/functions/ollama.py +4 -4
pixeltable/functions/openai.py +19 -19
pixeltable/functions/string.py +23 -30
pixeltable/functions/timestamp.py +11 -6
pixeltable/functions/together.py +14 -12
pixeltable/functions/util.py +1 -1
pixeltable/functions/video.py +5 -4
pixeltable/functions/vision.py +6 -9
pixeltable/functions/whisper.py +3 -3
pixeltable/globals.py +246 -260
pixeltable/index/__init__.py +2 -0
pixeltable/index/base.py +1 -1
pixeltable/index/btree.py +3 -1
pixeltable/index/embedding_index.py +11 -5
pixeltable/io/external_store.py +11 -12
pixeltable/io/label_studio.py +4 -3
pixeltable/io/parquet.py +57 -56
pixeltable/iterators/__init__.py +4 -2
pixeltable/iterators/audio.py +11 -11
pixeltable/iterators/document.py +10 -10
pixeltable/iterators/string.py +1 -2
pixeltable/iterators/video.py +14 -15
pixeltable/metadata/__init__.py +9 -5
pixeltable/metadata/converters/convert_10.py +0 -1
pixeltable/metadata/converters/convert_15.py +0 -2
pixeltable/metadata/converters/convert_23.py +0 -2
pixeltable/metadata/converters/convert_24.py +3 -3
pixeltable/metadata/converters/convert_25.py +1 -1
pixeltable/metadata/converters/convert_27.py +0 -2
pixeltable/metadata/converters/convert_28.py +0 -2
pixeltable/metadata/converters/convert_29.py +7 -8
pixeltable/metadata/converters/util.py +7 -7
pixeltable/metadata/schema.py +27 -19
pixeltable/plan.py +68 -40
pixeltable/share/__init__.py +2 -0
pixeltable/share/packager.py +15 -12
pixeltable/share/publish.py +3 -5
pixeltable/store.py +37 -38
pixeltable/type_system.py +41 -28
pixeltable/utils/coco.py +4 -4
pixeltable/utils/console_output.py +1 -3
pixeltable/utils/description_helper.py +1 -1
pixeltable/utils/documents.py +3 -3
pixeltable/utils/filecache.py +20 -9
pixeltable/utils/formatter.py +2 -3
pixeltable/utils/media_store.py +1 -1
pixeltable/utils/pytorch.py +1 -1
pixeltable/utils/sql.py +4 -4
pixeltable/utils/transactional_directory.py +2 -1
{pixeltable-0.3.6.dist-info → pixeltable-0.3.8.dist-info}/METADATA +1 -1
pixeltable-0.3.8.dist-info/RECORD +174 -0
pixeltable-0.3.6.dist-info/RECORD +0 -172
{pixeltable-0.3.6.dist-info → pixeltable-0.3.8.dist-info}/LICENSE +0 -0
{pixeltable-0.3.6.dist-info → pixeltable-0.3.8.dist-info}/WHEEL +0 -0
{pixeltable-0.3.6.dist-info → pixeltable-0.3.8.dist-info}/entry_points.txt +0 -0

pixeltable/metadata/converters/convert_25.py CHANGED Viewed

@@ -13,7 +13,7 @@ def _(engine: sql.engine.Engine) -> None:
 def __substitute_md(k: Optional[str], v: Any) -> Optional[tuple[Optional[str], Any]]:
     if k == 'path' and (
-        v in ['pixeltable.functions.huggingface.clip_text', 'pixeltable.functions.huggingface.clip_image']
+        v in {'pixeltable.functions.huggingface.clip_text', 'pixeltable.functions.huggingface.clip_image'}
     ):
         return 'path', 'pixeltable.functions.huggingface.clip'
     return None

pixeltable/metadata/converters/convert_27.py CHANGED Viewed

@@ -1,12 +1,10 @@
 import logging
-from typing import Any, Optional
 from uuid import UUID
 import sqlalchemy as sql
 from pixeltable.metadata import register_converter
 from pixeltable.metadata.converters.util import convert_table_md
-from pixeltable.metadata.schema import Table
 _logger = logging.getLogger('pixeltable')

pixeltable/metadata/converters/convert_28.py CHANGED Viewed

@@ -1,5 +1,3 @@
-import logging
 import sqlalchemy as sql
 from pixeltable.metadata import register_converter

pixeltable/metadata/converters/convert_29.py CHANGED Viewed

@@ -63,13 +63,12 @@ def __substitute_md(k: Optional[str], v: Any) -> Optional[tuple[Optional[str], A
             # is an edge case that won't migrate properly.
             parameters: list[dict] = v['fn']['signature']['parameters']
             for i, param in enumerate(parameters):
-                if param['kind'] == 'VAR_POSITIONAL':
-                    if new_args_len > i:
-                        # For peculiar historical reasons, variable kwargs might show up in args. Thus variable
-                        # positional args is not necessarily the last element of args; it might be the second-to-last.
-                        assert new_args_len <= i + 2, new_args
-                        rolled_args = new_args[i]
-                        new_args = new_args[:i] + new_args[i + 1 :]
+                if param['kind'] == 'VAR_POSITIONAL' and new_args_len > i:
+                    # For peculiar historical reasons, variable kwargs might show up in args. Thus variable
+                    # positional args is not necessarily the last element of args; it might be the second-to-last.
+                    assert new_args_len <= i + 2, new_args
+                    rolled_args = new_args[i]
+                    new_args = new_args[:i] + new_args[i + 1 :]
                 if param['kind'] == 'VAR_KEYWORD':
                     # As noted above, variable kwargs might show up either in args or in kwargs. If it's in args, it
                     # is necessarily the last element.
@@ -81,7 +80,7 @@ def __substitute_md(k: Optional[str], v: Any) -> Optional[tuple[Optional[str], A
                         rolled_kwargs = kwargs.pop(param['name'])
         if rolled_args is not None:
-            assert rolled_args['_classname'] in ('InlineArray', 'InlineList')
+            assert rolled_args['_classname'] in {'InlineArray', 'InlineList'}
             new_args.extend(rolled_args['components'])
         if rolled_kwargs is not None:
             assert rolled_kwargs['_classname'] == 'InlineDict'

pixeltable/metadata/converters/util.py CHANGED Viewed

@@ -34,12 +34,12 @@ def convert_table_md(
     """
     with engine.begin() as conn:
         for row in conn.execute(sql.select(Table)):
-            id = row[0]
+            tbl_id = row[0]
             table_md = row[2]
             assert isinstance(table_md, dict)
             updated_table_md = copy.deepcopy(table_md)
             if table_md_updater is not None:
-                table_md_updater(updated_table_md, id)
+                table_md_updater(updated_table_md, tbl_id)
             if column_md_updater is not None:
                 __update_column_md(updated_table_md, column_md_updater)
             if external_store_md_updater is not None:
@@ -47,19 +47,19 @@ def convert_table_md(
             if substitution_fn is not None:
                 updated_table_md = __substitute_md_rec(updated_table_md, substitution_fn)
             if updated_table_md != table_md:
-                __logger.info(f'Updating schema for table: {id}')
-                conn.execute(sql.update(Table).where(Table.id == id).values(md=updated_table_md))
+                __logger.info(f'Updating schema for table: {tbl_id}')
+                conn.execute(sql.update(Table).where(Table.id == tbl_id).values(md=updated_table_md))
         for row in conn.execute(sql.select(Function)):
-            id = row[0]
+            fn_id = row[0]
             function_md = row[2]
             assert isinstance(function_md, dict)
             updated_function_md = copy.deepcopy(function_md)
             if substitution_fn is not None:
                 updated_function_md = __substitute_md_rec(updated_function_md, substitution_fn)
             if updated_function_md != function_md:
-                __logger.info(f'Updating function: {id}')
-                conn.execute(sql.update(Function).where(Function.id == id).values(md=updated_function_md))
+                __logger.info(f'Updating function: {fn_id}')
+                conn.execute(sql.update(Function).where(Function.id == fn_id).values(md=updated_function_md))
 def __update_column_md(table_md: dict, column_md_updater: Callable[[dict], None]) -> None:

pixeltable/metadata/schema.py CHANGED Viewed

@@ -4,16 +4,14 @@ import uuid
 from typing import Any, Optional, TypeVar, Union, get_type_hints
 import sqlalchemy as sql
-import sqlalchemy.orm as orm
-from sqlalchemy import BigInteger, ForeignKey, Integer, LargeBinary
+from sqlalchemy import BigInteger, ForeignKey, Integer, LargeBinary, orm
 from sqlalchemy.dialects.postgresql import JSONB, UUID
-from sqlalchemy.orm import declarative_base
 from sqlalchemy.orm.decl_api import DeclarativeMeta
 # Base has to be marked explicitly as a type, in order to be used elsewhere as a type hint. But in addition to being
 # a type, it's also a `DeclarativeMeta`. The following pattern enables us to expose both `Base` and `Base.metadata`
 # outside of the module in a typesafe way.
-Base: type = declarative_base()
+Base: type = orm.declarative_base()
 assert isinstance(Base, DeclarativeMeta)
 base_metadata = Base.metadata
@@ -23,7 +21,7 @@ T = TypeVar('T')
 def md_from_dict(data_class_type: type[T], data: Any) -> T:
     """Re-instantiate a dataclass instance that contains nested dataclasses from a dict."""
     if dataclasses.is_dataclass(data_class_type):
-        fieldtypes = {f: t for f, t in get_type_hints(data_class_type).items()}
+        fieldtypes = get_type_hints(data_class_type)
         return data_class_type(**{f: md_from_dict(fieldtypes[f], data[f]) for f in data})  # type: ignore[return-value]
     origin = typing.get_origin(data_class_type)
@@ -43,7 +41,7 @@ def md_from_dict(data_class_type: type[T], data: Any) -> T:
         elif origin is tuple:
             return tuple(md_from_dict(arg_type, elem) for arg_type, elem in zip(type_args, data))  # type: ignore[return-value]
         else:
-            assert False
+            raise AssertionError(origin)
     else:
         return data
@@ -85,7 +83,7 @@ class Dir(Base):
         UUID(as_uuid=True), primary_key=True, default=uuid.uuid4, nullable=False
     )
     parent_id: orm.Mapped[uuid.UUID] = orm.mapped_column(UUID(as_uuid=True), ForeignKey('dirs.id'), nullable=True)
-    md = sql.Column(JSONB, nullable=False)
+    md: orm.Mapped[dict[str, Any]] = orm.mapped_column(JSONB, nullable=False)  # DirMd
 @dataclasses.dataclass
@@ -131,13 +129,17 @@ class IndexMd:
     init_args: dict[str, Any]
+# a stored table version path is a list of (table id as str, effective table version)
+TableVersionPath = list[tuple[str, Optional[int]]]
 @dataclasses.dataclass
 class ViewMd:
     is_snapshot: bool
     include_base_columns: bool
     # (table id, version); for mutable views, all versions are None
-    base_versions: list[tuple[str, Optional[int]]]
+    base_versions: TableVersionPath
     # filter predicate applied to the base table; view-only
     predicate: Optional[dict[str, Any]]
@@ -192,7 +194,7 @@ class Table(Base):
     id: orm.Mapped[uuid.UUID] = orm.mapped_column(UUID(as_uuid=True), primary_key=True, nullable=False)
     dir_id: orm.Mapped[uuid.UUID] = orm.mapped_column(UUID(as_uuid=True), ForeignKey('dirs.id'), nullable=False)
-    md = sql.Column(JSONB, nullable=False)  # TableMd
+    md: orm.Mapped[dict[str, Any]] = orm.mapped_column(JSONB, nullable=False)  # TableMd
 @dataclasses.dataclass
@@ -205,9 +207,11 @@ class TableVersionMd:
 class TableVersion(Base):
     __tablename__ = 'tableversions'
-    tbl_id = sql.Column(UUID(as_uuid=True), ForeignKey('tables.id'), primary_key=True, nullable=False)
-    version = sql.Column(BigInteger, primary_key=True, nullable=False)
-    md = sql.Column(JSONB, nullable=False)  # TableVersionMd
+    tbl_id: orm.Mapped[uuid.UUID] = orm.mapped_column(
+        UUID(as_uuid=True), ForeignKey('tables.id'), primary_key=True, nullable=False
+    )
+    version: orm.Mapped[int] = orm.mapped_column(BigInteger, primary_key=True, nullable=False)
+    md: orm.Mapped[dict[str, Any]] = orm.mapped_column(JSONB, nullable=False)
 @dataclasses.dataclass
@@ -246,9 +250,11 @@ class TableSchemaVersionMd:
 class TableSchemaVersion(Base):
     __tablename__ = 'tableschemaversions'
-    tbl_id = sql.Column(UUID(as_uuid=True), ForeignKey('tables.id'), primary_key=True, nullable=False)
-    schema_version = sql.Column(BigInteger, primary_key=True, nullable=False)
-    md = sql.Column(JSONB, nullable=False)  # TableSchemaVersionMd
+    tbl_id: orm.Mapped[uuid.UUID] = orm.mapped_column(
+        UUID(as_uuid=True), ForeignKey('tables.id'), primary_key=True, nullable=False
+    )
+    schema_version: orm.Mapped[int] = orm.mapped_column(BigInteger, primary_key=True, nullable=False)
+    md: orm.Mapped[dict[str, Any]] = orm.mapped_column(JSONB, nullable=False)  # TableSchemaVersionMd
 @dataclasses.dataclass
@@ -271,7 +277,9 @@ class Function(Base):
     __tablename__ = 'functions'
-    id = sql.Column(UUID(as_uuid=True), primary_key=True, default=uuid.uuid4, nullable=False)
-    dir_id = sql.Column(UUID(as_uuid=True), ForeignKey('dirs.id'), nullable=True)
-    md = sql.Column(JSONB, nullable=False)  # FunctionMd
-    binary_obj = sql.Column(LargeBinary, nullable=True)
+    id: orm.Mapped[uuid.UUID] = orm.mapped_column(
+        UUID(as_uuid=True), primary_key=True, default=uuid.uuid4, nullable=False
+    )
+    dir_id: orm.Mapped[uuid.UUID] = orm.mapped_column(UUID(as_uuid=True), ForeignKey('dirs.id'), nullable=True)
+    md: orm.Mapped[dict[str, Any]] = orm.mapped_column(JSONB, nullable=False)  # FunctionMd
+    binary_obj: orm.Mapped[Optional[bytes]] = orm.mapped_column(LargeBinary, nullable=True)

pixeltable/plan.py CHANGED Viewed

@@ -2,14 +2,15 @@ from __future__ import annotations
 import dataclasses
 import enum
+from textwrap import dedent
 from typing import Any, Iterable, Literal, Optional, Sequence
 from uuid import UUID
 import sqlalchemy as sql
 import pixeltable as pxt
-import pixeltable.exec as exec
-from pixeltable import catalog, exceptions as excs, exprs
+from pixeltable import catalog, exceptions as excs, exec, exprs
+from pixeltable.catalog import Column, TableVersionHandle
 from pixeltable.exec.sql_node import OrderByClause, OrderByItem, combine_order_by_clauses, print_order_by_clause
@@ -54,9 +55,9 @@ class JoinType(enum.Enum):
     def validated(cls, name: str, error_prefix: str) -> JoinType:
         try:
             return cls[name.upper()]
-        except KeyError:
-            val_strs = ', '.join(f'{s.lower()!r}' for s in cls.__members__.keys())
-            raise excs.Error(f'{error_prefix} must be one of: [{val_strs}]')
+        except KeyError as exc:
+            val_strs = ', '.join(f'{s.lower()!r}' for s in cls.__members__)
+            raise excs.Error(f'{error_prefix} must be one of: [{val_strs}]') from exc
 @dataclasses.dataclass
@@ -177,19 +178,21 @@ class Analyzer:
             )
         # check that Where clause and filter doesn't contain aggregates
-        if self.sql_where_clause is not None:
-            if any(_is_agg_fn_call(e) for e in self.sql_where_clause.subexprs(expr_class=exprs.FunctionCall)):
-                raise excs.Error(f'where() cannot contain aggregate functions: {self.sql_where_clause}')
-        if self.filter is not None:
-            if any(_is_agg_fn_call(e) for e in self.filter.subexprs(expr_class=exprs.FunctionCall)):
-                raise excs.Error(f'where() cannot contain aggregate functions: {self.filter}')
+        if self.sql_where_clause is not None and any(
+            _is_agg_fn_call(e) for e in self.sql_where_clause.subexprs(expr_class=exprs.FunctionCall)
+        ):
+            raise excs.Error(f'where() cannot contain aggregate functions: {self.sql_where_clause}')
+        if self.filter is not None and any(
+            _is_agg_fn_call(e) for e in self.filter.subexprs(expr_class=exprs.FunctionCall)
+        ):
+            raise excs.Error(f'where() cannot contain aggregate functions: {self.filter}')
         # check that grouping exprs don't contain aggregates and can be expressed as SQL (we perform sort-based
         # aggregation and rely on the SqlScanNode returning data in the correct order)
         for e in self.group_by_clause:
             if not self.sql_elements.contains(e):
                 raise excs.Error(f'Invalid grouping expression, needs to be expressible in SQL: {e}')
-            if e._contains(filter=lambda e: _is_agg_fn_call(e)):
+            if e._contains(filter=_is_agg_fn_call):
                 raise excs.Error(f'Grouping expression contains aggregate function: {e}')
     def _determine_agg_status(self, e: exprs.Expr, grouping_expr_ids: set[int]) -> tuple[bool, bool]:
@@ -207,7 +210,7 @@ class Analyzer:
             return True, False
         elif isinstance(e, exprs.Literal):
             return True, True
-        elif isinstance(e, exprs.ColumnRef) or isinstance(e, exprs.RowidRef):
+        elif isinstance(e, (exprs.ColumnRef, exprs.RowidRef)):
             # we already know that this isn't a grouping expr
             return False, True
         else:
@@ -275,14 +278,19 @@ class Planner:
         cls, tbl: catalog.TableVersion, rows: list[dict[str, Any]], ignore_errors: bool
     ) -> exec.ExecNode:
         """Creates a plan for TableVersion.insert()"""
-        assert not tbl.is_view()
+        assert not tbl.is_view
         # stored_cols: all cols we need to store, incl computed cols (and indices)
         stored_cols = [c for c in tbl.cols_by_id.values() if c.is_stored]
         assert len(stored_cols) > 0  # there needs to be something to store
+        cls.__check_valid_columns(tbl, stored_cols, 'inserted into')
         row_builder = exprs.RowBuilder([], stored_cols, [])
         # create InMemoryDataNode for 'rows'
-        plan: exec.ExecNode = exec.InMemoryDataNode(tbl, rows, row_builder, tbl.next_rowid)
+        plan: exec.ExecNode = exec.InMemoryDataNode(
+            TableVersionHandle(tbl.id, tbl.effective_version), rows, row_builder, tbl.next_rowid
+        )
         media_input_col_info = [
             exprs.ColumnSlotIdx(col_ref.col, col_ref.slot_idx)
@@ -318,7 +326,7 @@ class Planner:
     def create_df_insert_plan(
         cls, tbl: catalog.TableVersion, df: 'pxt.DataFrame', ignore_errors: bool
     ) -> exec.ExecNode:
-        assert not tbl.is_view()
+        assert not tbl.is_view
         plan = df._create_query_plan()  # ExecNode constructed by the DataFrame
         # Modify the plan RowBuilder to register the output columns
@@ -363,7 +371,7 @@ class Planner:
         """
         # retrieve all stored cols and all target exprs
         assert isinstance(tbl, catalog.TableVersionPath)
-        target = tbl.tbl_version  # the one we need to update
+        target = tbl.tbl_version.get()  # the one we need to update
         updated_cols = list(update_targets.keys())
         if len(recompute_targets) > 0:
             recomputed_cols = set(recompute_targets)
@@ -374,11 +382,14 @@ class Planner:
             recomputed_cols.update(idx_val_cols)
             # we only need to recompute stored columns (unstored ones are substituted away)
             recomputed_cols = {c for c in recomputed_cols if c.is_stored}
-        recomputed_base_cols = {col for col in recomputed_cols if col.tbl == target}
+        cls.__check_valid_columns(tbl.tbl_version.get(), recomputed_cols, 'updated in')
+        recomputed_base_cols = {col for col in recomputed_cols if col.tbl == tbl.tbl_version}
         copied_cols = [
             col
             for col in target.cols_by_id.values()
-            if col.is_stored and not col in updated_cols and not col in recomputed_base_cols
+            if col.is_stored and col not in updated_cols and col not in recomputed_base_cols
         ]
         select_list: list[exprs.Expr] = [exprs.ColumnRef(col) for col in copied_cols]
         select_list.extend(update_targets.values())
@@ -398,7 +409,25 @@ class Planner:
         for i, col in enumerate(all_base_cols):
             plan.row_builder.add_table_column(col, select_list[i].slot_idx)
         recomputed_user_cols = [c for c in recomputed_cols if c.name is not None]
-        return plan, [f'{c.tbl.name}.{c.name}' for c in updated_cols + recomputed_user_cols], recomputed_user_cols
+        return plan, [f'{c.tbl.get().name}.{c.name}' for c in updated_cols + recomputed_user_cols], recomputed_user_cols
+    @classmethod
+    def __check_valid_columns(
+        cls, tbl: catalog.TableVersion, cols: Iterable[Column], op_name: Literal['inserted into', 'updated in']
+    ) -> None:
+        for col in cols:
+            if col.value_expr is not None and not col.value_expr.is_valid:
+                raise excs.Error(
+                    dedent(
+                        f"""
+                        Data cannot be {op_name} the table {tbl.name!r},
+                        because the column {col.name!r} is currently invalid:
+                        {{validation_error}}
+                        """
+                    )
+                    .strip()
+                    .format(validation_error=col.value_expr.validation_error)
+                )
     @classmethod
     def create_batch_update_plan(
@@ -417,7 +446,7 @@ class Planner:
         - list of user-visible columns that are being recomputed
         """
         assert isinstance(tbl, catalog.TableVersionPath)
-        target = tbl.tbl_version  # the one we need to update
+        target = tbl.tbl_version.get()  # the one we need to update
         sa_key_cols: list[sql.Column] = []
         key_vals: list[tuple] = []
         if len(rowids) > 0:
@@ -440,7 +469,7 @@ class Planner:
         copied_cols = [
             col
             for col in target.cols_by_id.values()
-            if col.is_stored and not col in updated_cols and not col in recomputed_base_cols
+            if col.is_stored and col not in updated_cols and col not in recomputed_base_cols
         ]
         select_list: list[exprs.Expr] = [exprs.ColumnRef(col) for col in copied_cols]
         select_list.extend(exprs.ColumnRef(col) for col in updated_cols)
@@ -507,11 +536,11 @@ class Planner:
             - list of columns that are being recomputed
         """
         assert isinstance(view, catalog.TableVersionPath)
-        assert view.is_view()
-        target = view.tbl_version  # the one we need to update
+        assert view.is_view
+        target = view.tbl_version.get()  # the one we need to update
         # retrieve all stored cols and all target exprs
         recomputed_cols = set(recompute_targets.copy())
-        copied_cols = [col for col in target.cols_by_id.values() if col.is_stored and not col in recomputed_cols]
+        copied_cols = [col for col in target.cols_by_id.values() if col.is_stored and col not in recomputed_cols]
         select_list: list[exprs.Expr] = [exprs.ColumnRef(col) for col in copied_cols]
         # resolve recomputed exprs to stored columns in the base
         recomputed_exprs = [
@@ -551,13 +580,13 @@ class Planner:
             - number of materialized values per row
         """
         assert isinstance(view, catalog.TableVersionPath)
-        assert view.is_view()
+        assert view.is_view
         # things we need to materialize as DataRows:
         # 1. stored computed cols
         # - iterator columns are effectively computed, just not with a value_expr
         # - we can ignore stored non-computed columns because they have a default value that is supplied directly by
         #   the store
-        target = view.tbl_version  # the one we need to populate
+        target = view.tbl_version.get()  # the one we need to populate
         stored_cols = [c for c in target.cols_by_id.values() if c.is_stored]
         # 2. for component views: iterator args
         iterator_args = [target.iterator_args] if target.iterator_args is not None else []
@@ -585,8 +614,8 @@ class Planner:
             exact_version_only=view.get_bases() if propagates_insert else [],
         )
         exec_ctx = plan.ctx
-        if target.is_component_view():
-            plan = exec.ComponentIterationNode(target, plan)
+        if target.is_component_view:
+            plan = exec.ComponentIterationNode(view.tbl_version, plan)
         if len(view_output_exprs) > 0:
             plan = exec.ExprEvalNode(
                 row_builder, output_exprs=view_output_exprs, input_exprs=base_output_exprs, input=plan
@@ -639,11 +668,12 @@ class Planner:
     @classmethod
     def _is_contained_in(cls, l1: Iterable[exprs.Expr], l2: Iterable[exprs.Expr]) -> bool:
         """Returns True if l1 is contained in l2"""
-        s1, s2 = set(e.id for e in l1), set(e.id for e in l2)
-        return s1 <= s2
+        return {e.id for e in l1} <= {e.id for e in l2}
     @classmethod
-    def _insert_prefetch_node(cls, tbl_id: UUID, row_builder: exprs.RowBuilder, input: exec.ExecNode) -> exec.ExecNode:
+    def _insert_prefetch_node(
+        cls, tbl_id: UUID, row_builder: exprs.RowBuilder, input_node: exec.ExecNode
+    ) -> exec.ExecNode:
         """Returns a CachePrefetchNode into the plan if needed, otherwise returns input"""
         # we prefetch external files for all media ColumnRefs, even those that aren't part of the dependencies
         # of output_exprs: if unstored iterator columns are present, we might need to materialize ColumnRefs that
@@ -652,10 +682,10 @@ class Planner:
             e for e in list(row_builder.unique_exprs) if isinstance(e, exprs.ColumnRef) and e.col_type.is_media_type()
         ]
         if len(media_col_refs) == 0:
-            return input
+            return input_node
         # we need to prefetch external files for media column types
         file_col_info = [exprs.ColumnSlotIdx(e.col, e.slot_idx) for e in media_col_refs]
-        prefetch_node = exec.CachePrefetchNode(tbl_id, file_col_info, input)
+        prefetch_node = exec.CachePrefetchNode(tbl_id, file_col_info, input_node)
         return prefetch_node
     @classmethod
@@ -668,7 +698,7 @@ class Planner:
         order_by_clause: Optional[list[tuple[exprs.Expr, bool]]] = None,
         limit: Optional[exprs.Expr] = None,
         ignore_errors: bool = False,
-        exact_version_only: Optional[list[catalog.TableVersion]] = None,
+        exact_version_only: Optional[list[catalog.TableVersionHandle]] = None,
     ) -> exec.ExecNode:
         """Return plan for executing a query.
         Updates 'select_list' in place to make it executable.
@@ -714,7 +744,7 @@ class Planner:
         eval_ctx: exprs.RowBuilder.EvalCtx,
         limit: Optional[exprs.Expr] = None,
         with_pk: bool = False,
-        exact_version_only: Optional[list[catalog.TableVersion]] = None,
+        exact_version_only: Optional[list[catalog.TableVersionHandle]] = None,
     ) -> exec.ExecNode:
         """
         Create plan to materialize eval_ctx.
@@ -752,13 +782,11 @@ class Planner:
         )
         if analyzer.filter is not None:
             candidates.extend(
-                exprs.Expr.subexprs(analyzer.filter, filter=lambda e: sql_elements.contains(e), traverse_matches=False)
+                exprs.Expr.subexprs(analyzer.filter, filter=sql_elements.contains, traverse_matches=False)
             )
         if is_python_agg and analyzer.group_by_clause is not None:
             candidates.extend(
-                exprs.Expr.list_subexprs(
-                    analyzer.group_by_clause, filter=lambda e: sql_elements.contains(e), traverse_matches=False
-                )
+                exprs.Expr.list_subexprs(analyzer.group_by_clause, filter=sql_elements.contains, traverse_matches=False)
             )
         # not isinstance(...): we don't want to materialize Literals via a Select
         sql_exprs = exprs.ExprSet(e for e in candidates if not isinstance(e, exprs.Literal))

pixeltable/share/__init__.py CHANGED Viewed

@@ -1 +1,3 @@
+# ruff: noqa: F401
 from .publish import publish_snapshot

pixeltable/share/packager.py CHANGED Viewed

@@ -66,13 +66,15 @@ class TablePackager:
                 'tables': [
                     {
                         'table_id': str(t._tbl_version.id),
-                        # These are temporary; will replace with a better solution once the concurrency changes to catalog have
-                        # been merged
-                        'table_md': dataclasses.asdict(t._tbl_version._create_tbl_md()),
+                        # These are temporary; will replace with a better solution once the concurrency
+                        # changes to catalog have been merged
+                        'table_md': dataclasses.asdict(t._tbl_version.get()._create_tbl_md()),
                         'table_version_md': dataclasses.asdict(
-                            t._tbl_version._create_version_md(datetime.now().timestamp())
+                            t._tbl_version.get()._create_version_md(datetime.now().timestamp())
+                        ),
+                        'table_schema_version_md': dataclasses.asdict(
+                            t._tbl_version.get()._create_schema_version_md(0)
                         ),
-                        'table_schema_version_md': dataclasses.asdict(t._tbl_version._create_schema_version_md(0)),
                     }
                     for t in (table, *table._bases)
                 ]
@@ -91,11 +93,12 @@ class TablePackager:
         with open(self.tmp_dir / 'metadata.json', 'w', encoding='utf8') as fp:
             json.dump(self.md, fp)
         self.iceberg_catalog = sqlite_catalog(self.tmp_dir / 'warehouse')
-        ancestors = (self.table, *self.table._bases)
-        for t in ancestors:
-            _logger.info(f"Exporting table '{t._path}'.")
-            self.__export_table(t)
-        _logger.info(f'Building archive.')
+        with Env.get().begin_xact():
+            ancestors = (self.table, *self.table._bases)
+            for t in ancestors:
+                _logger.info(f"Exporting table '{t._path}'.")
+                self.__export_table(t)
+        _logger.info('Building archive.')
         bundle_path = self.__build_tarball()
         _logger.info(f'Packaging complete: {bundle_path}')
         return bundle_path
@@ -117,7 +120,7 @@ class TablePackager:
         # to get the column types, since we'll be substituting `fileurl`s for media columns.
         actual_col_types: list[ts.ColumnType] = []
-        for col_name, col in t._tbl_version.cols_by_name.items():
+        for col_name, col in t._tbl_version.get().cols_by_name.items():
             if not col.is_stored:
                 continue
             if col.col_type.is_media_type():
@@ -150,7 +153,7 @@ class TablePackager:
         """
         Iceberg tables must have a namespace, which cannot be the empty string, so we prepend `pxt` to the table path.
         """
-        parent_path = table._parent._path
+        parent_path = table._parent()._path()
         if len(parent_path) == 0:
             return 'pxt'
         else:

pixeltable/share/publish.py CHANGED Viewed

@@ -1,16 +1,14 @@
-import dataclasses
 import os
 import sys
 import urllib.parse
 import urllib.request
-from datetime import datetime
 from pathlib import Path
 import requests
 from tqdm import tqdm
 import pixeltable as pxt
-from pixeltable import exceptions as excs, metadata
+from pixeltable import exceptions as excs
 from pixeltable.env import Env
 from pixeltable.utils import sha256sum
@@ -46,7 +44,7 @@ def publish_snapshot(dest_tbl_uri: str, src_tbl: pxt.Table) -> str:
     else:
         raise excs.Error(f'Unsupported destination: {destination_uri}')
-    Env.get().console_logger.info(f'Finalizing snapshot ...')
+    Env.get().console_logger.info('Finalizing snapshot ...')
     finalize_request_json = {
         'upload_id': upload_id,
@@ -83,7 +81,7 @@ def _upload_bundle_to_s3(bundle: Path, parsed_location: urllib.parse.ParseResult
     upload_args = {'ChecksumAlgorithm': 'SHA256'}
     progress_bar = tqdm(
-        desc=f'Uploading',
+        desc='Uploading',
         total=bundle.stat().st_size,
         unit='B',
         unit_scale=True,

pixeltable 0.3.6__py3-none-any.whl → 0.3.8__py3-none-any.whl

Potentially problematic release.

pixeltable 0.3.6py3-none-any.whl → 0.3.8py3-none-any.whl