PyPI - pixeltable - Versions diffs - 0.3.5__py3-none-any.whl → 0.3.7__py3-none-any.whl - Mend

pixeltable 0.3.5py3-none-any.whl → 0.3.7py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.

This version of pixeltable might be problematic. Click here for more details.

Files changed (94) hide show

pixeltable/__init__.py +5 -3
pixeltable/__version__.py +2 -2
pixeltable/catalog/__init__.py +1 -0
pixeltable/catalog/catalog.py +335 -128
pixeltable/catalog/column.py +21 -5
pixeltable/catalog/dir.py +19 -6
pixeltable/catalog/insertable_table.py +34 -37
pixeltable/catalog/named_function.py +0 -4
pixeltable/catalog/schema_object.py +28 -42
pixeltable/catalog/table.py +195 -158
pixeltable/catalog/table_version.py +187 -232
pixeltable/catalog/table_version_handle.py +50 -0
pixeltable/catalog/table_version_path.py +49 -33
pixeltable/catalog/view.py +56 -96
pixeltable/config.py +103 -0
pixeltable/dataframe.py +90 -90
pixeltable/env.py +98 -168
pixeltable/exec/aggregation_node.py +5 -4
pixeltable/exec/cache_prefetch_node.py +1 -1
pixeltable/exec/component_iteration_node.py +13 -9
pixeltable/exec/data_row_batch.py +3 -3
pixeltable/exec/exec_context.py +0 -4
pixeltable/exec/exec_node.py +3 -2
pixeltable/exec/expr_eval/schedulers.py +2 -1
pixeltable/exec/in_memory_data_node.py +9 -4
pixeltable/exec/row_update_node.py +1 -2
pixeltable/exec/sql_node.py +20 -16
pixeltable/exprs/column_ref.py +9 -9
pixeltable/exprs/comparison.py +1 -1
pixeltable/exprs/data_row.py +4 -4
pixeltable/exprs/expr.py +20 -5
pixeltable/exprs/function_call.py +98 -58
pixeltable/exprs/json_mapper.py +25 -8
pixeltable/exprs/json_path.py +6 -5
pixeltable/exprs/object_ref.py +16 -5
pixeltable/exprs/row_builder.py +15 -15
pixeltable/exprs/rowid_ref.py +21 -7
pixeltable/func/__init__.py +1 -1
pixeltable/func/function.py +38 -6
pixeltable/func/query_template_function.py +3 -6
pixeltable/func/tools.py +26 -26
pixeltable/func/udf.py +1 -1
pixeltable/functions/__init__.py +2 -0
pixeltable/functions/anthropic.py +9 -3
pixeltable/functions/fireworks.py +7 -4
pixeltable/functions/globals.py +4 -5
pixeltable/functions/huggingface.py +1 -5
pixeltable/functions/image.py +17 -7
pixeltable/functions/llama_cpp.py +1 -1
pixeltable/functions/mistralai.py +1 -1
pixeltable/functions/ollama.py +4 -4
pixeltable/functions/openai.py +26 -23
pixeltable/functions/string.py +23 -30
pixeltable/functions/timestamp.py +11 -6
pixeltable/functions/together.py +14 -12
pixeltable/functions/util.py +1 -1
pixeltable/functions/video.py +5 -4
pixeltable/functions/vision.py +6 -9
pixeltable/functions/whisper.py +3 -3
pixeltable/globals.py +246 -260
pixeltable/index/__init__.py +2 -0
pixeltable/index/base.py +1 -1
pixeltable/index/btree.py +3 -1
pixeltable/index/embedding_index.py +11 -5
pixeltable/io/external_store.py +11 -12
pixeltable/io/label_studio.py +4 -3
pixeltable/io/parquet.py +57 -56
pixeltable/iterators/__init__.py +4 -2
pixeltable/iterators/audio.py +11 -11
pixeltable/iterators/document.py +10 -10
pixeltable/iterators/string.py +1 -2
pixeltable/iterators/video.py +14 -15
pixeltable/metadata/__init__.py +9 -5
pixeltable/metadata/converters/convert_10.py +0 -1
pixeltable/metadata/converters/convert_15.py +0 -2
pixeltable/metadata/converters/convert_23.py +0 -2
pixeltable/metadata/converters/convert_24.py +3 -3
pixeltable/metadata/converters/convert_25.py +1 -1
pixeltable/metadata/converters/convert_27.py +0 -2
pixeltable/metadata/converters/convert_28.py +0 -2
pixeltable/metadata/converters/convert_29.py +7 -8
pixeltable/metadata/converters/util.py +7 -7
pixeltable/metadata/schema.py +27 -19
pixeltable/plan.py +68 -40
pixeltable/share/packager.py +12 -9
pixeltable/store.py +37 -38
pixeltable/type_system.py +41 -28
pixeltable/utils/filecache.py +2 -1
{pixeltable-0.3.5.dist-info → pixeltable-0.3.7.dist-info}/METADATA +1 -1
pixeltable-0.3.7.dist-info/RECORD +174 -0
pixeltable-0.3.5.dist-info/RECORD +0 -172
{pixeltable-0.3.5.dist-info → pixeltable-0.3.7.dist-info}/LICENSE +0 -0
{pixeltable-0.3.5.dist-info → pixeltable-0.3.7.dist-info}/WHEEL +0 -0
{pixeltable-0.3.5.dist-info → pixeltable-0.3.7.dist-info}/entry_points.txt +0 -0

pixeltable/exec/in_memory_data_node.py CHANGED Viewed

@@ -20,7 +20,8 @@ class InMemoryDataNode(ExecNode):
     - if an input row doesn't provide a value, sets the slot to the column default
     """
-    tbl: catalog.TableVersion
+    tbl: catalog.TableVersionHandle
     input_rows: list[dict[str, Any]]
     start_row_id: int
     output_rows: Optional[DataRowBatch]
@@ -29,12 +30,16 @@ class InMemoryDataNode(ExecNode):
     output_exprs: list[exprs.ColumnRef]
     def __init__(
-        self, tbl: catalog.TableVersion, rows: list[dict[str, Any]], row_builder: exprs.RowBuilder, start_row_id: int
+        self,
+        tbl: catalog.TableVersionHandle,
+        rows: list[dict[str, Any]],
+        row_builder: exprs.RowBuilder,
+        start_row_id: int,
     ):
         # we materialize the input slots
         output_exprs = list(row_builder.input_exprs)
         super().__init__(row_builder, output_exprs, [], None)
-        assert tbl.is_insertable()
+        assert tbl.get().is_insertable()
         self.tbl = tbl
         self.input_rows = rows
         self.start_row_id = start_row_id
@@ -62,7 +67,7 @@ class InMemoryDataNode(ExecNode):
                 if col_info.col.col_type.is_image_type() and isinstance(val, bytes):
                     # this is a literal image, ie, a sequence of bytes; we save this as a media file and store the path
-                    path = str(MediaStore.prepare_media_path(self.tbl.id, col_info.col.id, self.tbl.version))
+                    path = str(MediaStore.prepare_media_path(self.tbl.id, col_info.col.id, self.tbl.get().version))
                     open(path, 'wb').write(val)
                     val = path
                 self.output_rows[row_idx][col_info.slot_idx] = val

pixeltable/exec/row_update_node.py CHANGED Viewed

@@ -3,7 +3,6 @@ from typing import Any, AsyncIterator
 import pixeltable.catalog as catalog
 import pixeltable.exprs as exprs
-from pixeltable.utils.media_store import MediaStore
 from .data_row_batch import DataRowBatch
 from .exec_node import ExecNode
@@ -40,7 +39,7 @@ class RowUpdateNode(ExecNode):
             if isinstance(col_ref, exprs.ColumnRef)
         }
         self.col_slot_idxs = {col: all_col_slot_idxs[col] for col in col_vals_batch[0].keys()}
-        self.key_slot_idxs = {col: all_col_slot_idxs[col] for col in tbl.tbl_version.primary_key_columns()}
+        self.key_slot_idxs = {col: all_col_slot_idxs[col] for col in tbl.tbl_version.get().primary_key_columns()}
         self.matched_key_vals: set[tuple] = set()
     async def __aiter__(self) -> AsyncIterator[DataRowBatch]:

pixeltable/exec/sql_node.py CHANGED Viewed

@@ -1,13 +1,14 @@
 import logging
 import warnings
 from decimal import Decimal
-from typing import TYPE_CHECKING, AsyncIterator, Iterable, Iterator, NamedTuple, Optional, Sequence
+from typing import TYPE_CHECKING, AsyncIterator, Iterable, NamedTuple, Optional, Sequence
 from uuid import UUID
 import sqlalchemy as sql
 import pixeltable.catalog as catalog
 import pixeltable.exprs as exprs
+from pixeltable.env import Env
 from .data_row_batch import DataRowBatch
 from .exec_node import ExecNode
@@ -122,7 +123,7 @@ class SqlNode(ExecNode):
         if set_pk:
             # we also need to retrieve the pk columns
             assert tbl is not None
-            self.num_pk_cols = len(tbl.tbl_version.store_tbl.pk_columns())
+            self.num_pk_cols = len(tbl.tbl_version.get().store_tbl.pk_columns())
         # additional state
         self.result_cursor = None
@@ -142,7 +143,7 @@ class SqlNode(ExecNode):
         sql_select_list = [self.sql_elements.get(e) for e in self.select_list]
         if self.set_pk:
             assert self.tbl is not None
-            sql_select_list += self.tbl.tbl_version.store_tbl.pk_columns()
+            sql_select_list += self.tbl.tbl_version.get().store_tbl.pk_columns()
         stmt = sql.select(*sql_select_list)
         where_clause_element = (
@@ -215,29 +216,31 @@ class SqlNode(ExecNode):
             exact_version_only = set()
         candidates = tbl.get_tbl_versions()
         assert len(candidates) > 0
-        joined_tbls: list[catalog.TableVersion] = [candidates[0]]
+        joined_tbls: list[catalog.TableVersionHandle] = [candidates[0]]
         for tbl in candidates[1:]:
             if tbl.id in refd_tbl_ids:
                 joined_tbls.append(tbl)
         first = True
-        prev_tbl: catalog.TableVersion
+        prev_tbl: catalog.TableVersionHandle
         for tbl in joined_tbls[::-1]:
             if first:
-                stmt = stmt.select_from(tbl.store_tbl.sa_tbl)
+                stmt = stmt.select_from(tbl.get().store_tbl.sa_tbl)
                 first = False
             else:
                 # join tbl to prev_tbl on prev_tbl's rowid cols
-                prev_tbl_rowid_cols = prev_tbl.store_tbl.rowid_columns()
-                tbl_rowid_cols = tbl.store_tbl.rowid_columns()
+                prev_tbl_rowid_cols = prev_tbl.get().store_tbl.rowid_columns()
+                tbl_rowid_cols = tbl.get().store_tbl.rowid_columns()
                 rowid_clauses = [
                     c1 == c2 for c1, c2 in zip(prev_tbl_rowid_cols, tbl_rowid_cols[: len(prev_tbl_rowid_cols)])
                 ]
-                stmt = stmt.join(tbl.store_tbl.sa_tbl, sql.and_(*rowid_clauses))
+                stmt = stmt.join(tbl.get().store_tbl.sa_tbl, sql.and_(*rowid_clauses))
             if tbl.id in exact_version_only:
-                stmt = stmt.where(tbl.store_tbl.v_min_col == tbl.version)
+                stmt = stmt.where(tbl.get().store_tbl.v_min_col == tbl.get().version)
             else:
-                stmt = stmt.where(tbl.store_tbl.v_min_col <= tbl.version).where(tbl.store_tbl.v_max_col > tbl.version)
+                stmt = stmt.where(tbl.get().store_tbl.v_min_col <= tbl.get().version).where(
+                    tbl.get().store_tbl.v_max_col > tbl.get().version
+                )
             prev_tbl = tbl
         return stmt
@@ -264,10 +267,11 @@ class SqlNode(ExecNode):
         self.limit = limit
     def _log_explain(self, stmt: sql.Select) -> None:
+        conn = Env.get().conn
         try:
             # don't set dialect=Env.get().engine.dialect: x % y turns into x %% y, which results in a syntax error
             stmt_str = str(stmt.compile(compile_kwargs={'literal_binds': True}))
-            explain_result = self.ctx.conn.execute(sql.text(f'EXPLAIN {stmt_str}'))
+            explain_result = conn.execute(sql.text(f'EXPLAIN {stmt_str}'))
             explain_str = '\n'.join([str(row) for row in explain_result])
             _logger.debug(f'SqlScanNode explain:\n{explain_str}')
         except Exception as e:
@@ -275,7 +279,6 @@ class SqlNode(ExecNode):
     async def __aiter__(self) -> AsyncIterator[DataRowBatch]:
         # run the query; do this here rather than in _open(), exceptions are only expected during iteration
-        assert self.ctx.conn is not None
         with warnings.catch_warnings(record=True) as w:
             stmt = self._create_stmt()
             try:
@@ -286,7 +289,8 @@ class SqlNode(ExecNode):
                 pass
             self._log_explain(stmt)
-            result_cursor = self.ctx.conn.execute(stmt)
+            conn = Env.get().conn
+            result_cursor = conn.execute(stmt)
             for warning in w:
                 pass
@@ -351,7 +355,7 @@ class SqlScanNode(SqlNode):
     Supports filtering and ordering.
     """
-    exact_version_only: list[catalog.TableVersion]
+    exact_version_only: list[catalog.TableVersionHandle]
     def __init__(
         self,
@@ -359,7 +363,7 @@ class SqlScanNode(SqlNode):
         row_builder: exprs.RowBuilder,
         select_list: Iterable[exprs.Expr],
         set_pk: bool = False,
-        exact_version_only: Optional[list[catalog.TableVersion]] = None,
+        exact_version_only: Optional[list[catalog.TableVersionHandle]] = None,
     ):
         """
         Args:

pixeltable/exprs/column_ref.py CHANGED Viewed

@@ -52,15 +52,15 @@ class ColumnRef(Expr):
         assert col.tbl is not None
         self.col = col
         self.is_unstored_iter_col = (
-            col.tbl.is_component_view() and col.tbl.is_iterator_column(col) and not col.is_stored
+            col.tbl.get().is_component_view and col.tbl.get().is_iterator_column(col) and not col.is_stored
         )
         self.iter_arg_ctx = None
         # number of rowid columns in the base table
-        self.base_rowid_len = col.tbl.base.num_rowid_columns() if self.is_unstored_iter_col else 0
+        self.base_rowid_len = col.tbl.get().base.get().num_rowid_columns() if self.is_unstored_iter_col else 0
         self.base_rowid = [None] * self.base_rowid_len
         self.iterator = None
         # index of the position column in the view's primary key; don't try to reference tbl.store_tbl here
-        self.pos_idx = col.tbl.num_rowid_columns() - 1 if self.is_unstored_iter_col else None
+        self.pos_idx = col.tbl.get().num_rowid_columns() - 1 if self.is_unstored_iter_col else None
         self.perform_validation = False
         if col.col_type.is_media_type():
@@ -138,7 +138,7 @@ class ColumnRef(Expr):
         return self.col == other.col and self.perform_validation == other.perform_validation
     def _df(self) -> 'pxt.dataframe.DataFrame':
-        tbl = catalog.Catalog.get().tbls[self.col.tbl.id]
+        tbl = catalog.Catalog.get().get_tbl(self.col.tbl.id)
         return tbl.select(self)
     def show(self, *args, **kwargs) -> 'pxt.dataframe.DataFrameResultSet':
@@ -166,9 +166,9 @@ class ColumnRef(Expr):
         return self._descriptors().to_html()
     def _descriptors(self) -> DescriptionHelper:
-        tbl = catalog.Catalog.get().tbls[self.col.tbl.id]
+        tbl = catalog.Catalog.get().get_tbl(self.col.tbl.id)
         helper = DescriptionHelper()
-        helper.append(f'Column\n{self.col.name!r}\n(of table {tbl._path!r})')
+        helper.append(f'Column\n{self.col.name!r}\n(of table {tbl._path()!r})')
         helper.append(tbl._col_descriptor([self.col.name]))
         idxs = tbl._index_descriptor([self.col.name])
         if len(idxs) > 0:
@@ -217,7 +217,7 @@ class ColumnRef(Expr):
         if self.base_rowid != data_row.pk[: self.base_rowid_len]:
             row_builder.eval(data_row, self.iter_arg_ctx)
             iterator_args = data_row[self.iter_arg_ctx.target_slot_idxs[0]]
-            self.iterator = self.col.tbl.iterator_cls(**iterator_args)
+            self.iterator = self.col.tbl.get().iterator_cls(**iterator_args)
             self.base_rowid = data_row.pk[: self.base_rowid_len]
         self.iterator.set_pos(data_row.pk[self.pos_idx])
         res = next(self.iterator)
@@ -225,7 +225,7 @@ class ColumnRef(Expr):
     def _as_dict(self) -> dict:
         tbl = self.col.tbl
-        version = tbl.version if tbl.is_snapshot else None
+        version = tbl.get().version if tbl.get().is_snapshot else None
         # we omit self.components, even if this is a validating ColumnRef, because init() will recreate the
         # non-validating component ColumnRef
         return {
@@ -238,7 +238,7 @@ class ColumnRef(Expr):
     @classmethod
     def get_column(cls, d: dict) -> catalog.Column:
         tbl_id, version, col_id = UUID(d['tbl_id']), d['tbl_version'], d['col_id']
-        tbl_version = catalog.Catalog.get().tbl_versions[(tbl_id, version)]
+        tbl_version = catalog.Catalog.get().get_tbl_version(tbl_id, version)
         # don't use tbl_version.cols_by_id here, this might be a snapshot reference to a column that was then dropped
         col = next(col for col in tbl_version.cols if col.id == col_id)
         return col

pixeltable/exprs/comparison.py CHANGED Viewed

@@ -84,7 +84,7 @@ class Comparison(Expr):
         if self.is_search_arg_comparison:
             # reference the index value column if there is an index and this is not a snapshot
             # (indices don't apply to snapshots)
-            tbl = self._op1.col.tbl
+            tbl = self._op1.col.tbl.get()
             idx_info = [
                 info for info in self._op1.col.get_idx_info().values() if isinstance(info.idx, index.BtreeIndex)
             ]

pixeltable/exprs/data_row.py CHANGED Viewed

@@ -142,13 +142,13 @@ class DataRow:
         self.file_paths[slot_idx] = None
         self.file_urls[slot_idx] = None
-    def __getitem__(self, index: object) -> Any:
+    def __getitem__(self, index: int) -> Any:
         """Returns in-memory value, ie, what is needed for expr evaluation"""
         assert isinstance(index, int)
         if not self.has_val[index]:
-            # for debugging purposes
-            pass
-        assert self.has_val[index], index
+            # This is a sufficiently cheap and sensitive validation that it makes sense to keep the assertion around
+            # even if python is running with -O.
+            raise AssertionError(index)
         if self.file_urls[index] is not None and index in self.img_slot_idxs:
             # if we need to load this from a file, it should have been materialized locally

pixeltable/exprs/expr.py CHANGED Viewed

@@ -14,10 +14,7 @@ import numpy as np
 import sqlalchemy as sql
 from typing_extensions import Self, _AnnotatedAlias
-import pixeltable.catalog as catalog
-import pixeltable.exceptions as excs
-import pixeltable.func as func
-import pixeltable.type_system as ts
+from pixeltable import catalog, exceptions as excs, func, type_system as ts
 from .data_row import DataRow
 from .globals import ArithmeticOperator, ComparisonOperator, LiteralPythonTypes, LogicalOperator
@@ -110,6 +107,24 @@ class Expr(abc.ABC):
         """
         return None
+    @property
+    def validation_error(self) -> Optional[str]:
+        """
+        Subclasses can override this to indicate that validation has failed after a catalog load.
+        If an Expr (or any of its transitive components) is invalid, then it cannot be evaluated, but its metadata
+        will still be preserved in the catalog (so that the user can take appropriate corrective action).
+        """
+        for c in self.components:
+            error = c.validation_error
+            if error is not None:
+                return error
+        return None
+    @property
+    def is_valid(self) -> bool:
+        return self.validation_error is None
     def equals(self, other: Expr) -> bool:
         """
         Subclass-specific comparison. Implemented as a function because __eq__() is needed to construct Comparisons.
@@ -245,7 +260,7 @@ class Expr(abc.ABC):
     def retarget(self, tbl: catalog.TableVersionPath) -> Self:
         """Retarget ColumnRefs in this expr to the specific TableVersions in tbl."""
-        tbl_versions = {tbl_version.id: tbl_version for tbl_version in tbl.get_tbl_versions()}
+        tbl_versions = {tbl_version.id: tbl_version.get() for tbl_version in tbl.get_tbl_versions()}
         return self._retarget(tbl_versions)
     def _retarget(self, tbl_versions: dict[UUID, catalog.TableVersion]) -> Self:

pixeltable/exprs/function_call.py CHANGED Viewed

@@ -1,7 +1,10 @@
 from __future__ import annotations
 import inspect
+import logging
 import sys
+import warnings
+from textwrap import dedent
 from typing import Any, Optional, Sequence, Union
 import sqlalchemy as sql
@@ -18,6 +21,8 @@ from .row_builder import RowBuilder
 from .rowid_ref import RowidRef
 from .sql_element_cache import SqlElementCache
+_logger = logging.getLogger('pixeltable')
 class FunctionCall(Expr):
     fn: func.Function
@@ -45,6 +50,8 @@ class FunctionCall(Expr):
     aggregator: Optional[Any]
     current_partition_vals: Optional[list[Any]]
+    _validation_error: Optional[str]
     def __init__(
         self,
         fn: func.Function,
@@ -54,6 +61,7 @@ class FunctionCall(Expr):
         order_by_clause: Optional[list[Any]] = None,
         group_by_clause: Optional[list[Any]] = None,
         is_method_call: bool = False,
+        validation_error: Optional[str] = None,
     ):
         assert not fn.is_polymorphic
         assert all(isinstance(arg, Expr) for arg in args)
@@ -76,26 +84,6 @@ class FunctionCall(Expr):
         self.components.extend(arg.copy() for arg in kwargs.values())
         self.kwarg_idxs = {name: i + len(args) for i, name in enumerate(kwargs.keys())}
-        # Now generate bound_idxs for the args and kwargs indices.
-        # This is guaranteed to work, because at this point the call has already been validated.
-        # These will be used later to dereference specific parameter values.
-        bindings = fn.signature.py_signature.bind(*self.arg_idxs, **self.kwarg_idxs)
-        self.bound_idxs = bindings.arguments
-        # Separately generate bound_args for purposes of determining the resource pool.
-        bindings = fn.signature.py_signature.bind(*args, **kwargs)
-        bound_args = bindings.arguments
-        self.resource_pool = fn.call_resource_pool(bound_args)
-        self.agg_init_args = {}
-        if self.is_agg_fn_call:
-            # We separate out the init args for the aggregator. Unpack Literals in init args.
-            assert isinstance(fn, func.AggregateFunction)
-            for arg_name, arg in bound_args.items():
-                if arg_name in fn.init_param_names[0]:
-                    assert isinstance(arg, Literal)  # This was checked during validate_call
-                    self.agg_init_args[arg_name] = arg.val
         # window function state:
         # self.components[self.group_by_start_idx:self.group_by_stop_idx] contains group_by exprs
         self.group_by_start_idx, self.group_by_stop_idx = 0, 0
@@ -125,10 +113,35 @@ class FunctionCall(Expr):
             raise excs.Error(
                 f'order_by argument needs to be a Pixeltable expression, but instead is a {type(order_by_clause[0])}'
             )
-        # don't add components after this, everthing after order_by_start_idx is part of the order_by clause
         self.order_by_start_idx = len(self.components)
         self.components.extend(order_by_clause)
+        self._validation_error = validation_error
+        if validation_error is not None:
+            self.resource_pool = None
+            return
+        # Now generate bound_idxs for the args and kwargs indices.
+        # This is guaranteed to work, because at this point the call has already been validated.
+        # These will be used later to dereference specific parameter values.
+        bindings = fn.signature.py_signature.bind(*self.arg_idxs, **self.kwarg_idxs)
+        self.bound_idxs = bindings.arguments
+        # Separately generate bound_args for purposes of determining the resource pool.
+        bindings = fn.signature.py_signature.bind(*args, **kwargs)
+        bound_args = bindings.arguments
+        self.resource_pool = fn.call_resource_pool(bound_args)
+        self.agg_init_args = {}
+        if self.is_agg_fn_call:
+            # We separate out the init args for the aggregator. Unpack Literals in init args.
+            assert isinstance(fn, func.AggregateFunction)
+            for arg_name, arg in bound_args.items():
+                if arg_name in fn.init_param_names[0]:
+                    assert isinstance(arg, Literal)  # This was checked during validate_call
+                    self.agg_init_args[arg_name] = arg.val
         # execution state for aggregate functions
         self.aggregator = None
         self.current_partition_vals = None
@@ -137,7 +150,7 @@ class FunctionCall(Expr):
     def _create_rowid_refs(self, tbl: catalog.Table) -> list[Expr]:
         target = tbl._tbl_version_path.tbl_version
-        return [RowidRef(target, i) for i in range(target.num_rowid_columns())]
+        return [RowidRef(target, i) for i in range(target.get().num_rowid_columns())]
     def default_column_name(self) -> Optional[str]:
         return self.fn.name
@@ -165,12 +178,16 @@ class FunctionCall(Expr):
             ('group_by_start_idx', self.group_by_start_idx),
             ('group_by_stop_idx', self.group_by_stop_idx),
             ('fn_expr_idx', self.fn_expr_idx),
-            ('order_by_idx', self.order_by_start_idx),
+            ('order_by_start_idx', self.order_by_start_idx),
         ]
     def __repr__(self) -> str:
         return self.display_str()
+    @property
+    def validation_error(self) -> Optional[str]:
+        return self._validation_error or super().validation_error
     def display_str(self, inline: bool = True) -> str:
         if self.is_method_call:
             return f'{self.components[0]}.{self.fn.name}({self._print_args(1, inline)})'
@@ -232,6 +249,8 @@ class FunctionCall(Expr):
         return self.order_by
     def sql_expr(self, sql_elements: SqlElementCache) -> Optional[sql.ColumnElement]:
+        assert self.is_valid
         # we currently can't translate aggregate functions with grouping and/or ordering to SQL
         if self.has_group_by() or len(self.order_by) > 0:
             return None
@@ -304,6 +323,7 @@ class FunctionCall(Expr):
         Returns a list of dicts mapping each param name to its value when this FunctionCall is evaluated against
         data_rows
         """
+        assert self.is_valid
         assert all(name in self.fn.signature.parameters for name in param_names), f'{param_names}, {self.fn.signature}'
         result: list[dict[str, Any]] = []
         for row in data_rows:
@@ -327,6 +347,8 @@ class FunctionCall(Expr):
         return result
     def eval(self, data_row: DataRow, row_builder: RowBuilder) -> None:
+        assert self.is_valid
         if isinstance(self.fn, func.ExprTemplateFunction):
             # we need to evaluate the template
             # TODO: can we get rid of this extra copy?
@@ -396,51 +418,68 @@ class FunctionCall(Expr):
         group_by_exprs = components[group_by_start_idx:group_by_stop_idx]
         order_by_exprs = components[order_by_start_idx:]
+        validation_error: Optional[str] = None
+        if isinstance(fn, func.InvalidFunction):
+            validation_error = (
+                dedent(
+                    f"""
+                    The UDF '{fn.self_path}' cannot be located, because
+                    {{errormsg}}
+                    """
+                )
+                .strip()
+                .format(errormsg=fn.errormsg)
+            )
+            return cls(fn, args, kwargs, return_type, is_method_call=is_method_call, validation_error=validation_error)
         # Now re-bind args and kwargs using the version of `fn` that is currently represented in code. This ensures
         # that we get a valid binding even if the signatures of `fn` have changed since the FunctionCall was
         # serialized.
-        resolved_fn: func.Function
-        bound_args: dict[str, Expr]
+        resolved_fn: func.Function = fn
         try:
+            # Bind args and kwargs to the function signature in the current codebase.
             resolved_fn, bound_args = fn._bind_to_matching_signature(args, kwargs)
         except (TypeError, excs.Error):
-            # TODO: Handle this more gracefully (instead of failing the DB load, allow the DB load to succeed, but
-            #       mark any enclosing FunctionCall as unusable). It's the same issue as dealing with a renamed UDF or
-            #       FunctionCall return type mismatch.
             signature_note_str = 'any of its signatures' if fn.is_polymorphic else 'its signature'
-            instance_signature_str = f'{len(fn.signatures)} signatures' if fn.is_polymorphic else str(fn.signature)
-            raise excs.Error(
-                f'The signature stored in the database for the UDF `{fn.self_path}` no longer matches '
-                f'{signature_note_str} as currently defined in the code.\nThis probably means that the code for '
-                f'`{fn.self_path}` has changed in a backward-incompatible way.\n'
-                f'Signature in database: {fn}\n'
-                f'Signature as currently defined in code: {instance_signature_str}'
-            )
-        # Evaluate the call_return_type as defined in the current codebase.
-        call_return_type = resolved_fn.call_return_type(bound_args)
-        if return_type is None:
-            # Schema versions prior to 25 did not store the return_type in metadata, and there is no obvious way to
-            # infer it during DB migration, so we might encounter a stored return_type of None. In that case, we use
-            # the call_return_type that we just inferred (which matches the deserialization behavior prior to
-            # version 25).
-            return_type = call_return_type
+            args_str = [str(arg.col_type) for arg in args]
+            args_str.extend(f'{name}: {arg.col_type}' for name, arg in kwargs.items())
+            call_signature_str = f'({", ".join(args_str)}) -> {return_type}'
+            fn_signature_str = f'{len(fn.signatures)} signatures' if fn.is_polymorphic else str(fn.signature)
+            validation_error = dedent(
+                f"""
+                The signature stored in the database for a UDF call to {fn.self_path!r} no longer
+                matches {signature_note_str} as currently defined in the code. This probably means that the
+                code for {fn.self_path!r} has changed in a backward-incompatible way.
+                Signature of UDF call in the database: {call_signature_str}
+                Signature of UDF as currently defined in code: {fn_signature_str}
+                """
+            ).strip()
         else:
-            # There is a return_type stored in metadata (schema version >= 25).
-            # Check that the stored return_type of the UDF call matches the column type of the FunctionCall, and
-            # fail-fast if it doesn't (otherwise we risk getting downstream database errors).
-            # TODO: Handle this more gracefully (as noted above).
-            if not return_type.is_supertype_of(call_return_type, ignore_nullable=True):
-                raise excs.Error(
-                    f'The return type stored in the database for a UDF call to `{fn.self_path}` no longer matches the '
-                    f'return type of the UDF as currently defined in the code.\nThis probably means that the code for '
-                    f'`{fn.self_path}` has changed in a backward-incompatible way.\n'
-                    f'Return type in database: `{return_type}`\n'
-                    f'Return type as currently defined in code: `{call_return_type}`'
-                )
+            # Evaluate the call_return_type as defined in the current codebase.
+            call_return_type = resolved_fn.call_return_type(bound_args)
+            if return_type is None:
+                # Schema versions prior to 25 did not store the return_type in metadata, and there is no obvious way to
+                # infer it during DB migration, so we might encounter a stored return_type of None. In that case, we use
+                # the call_return_type that we just inferred (which matches the deserialization behavior prior to
+                # version 25).
+                return_type = call_return_type
+            else:
+                # There is a return_type stored in metadata (schema version >= 25).
+                # Check that the stored return_type of the UDF call matches the column type of the FunctionCall, and
+                # fail-fast if it doesn't (otherwise we risk getting downstream database errors).
+                if not return_type.is_supertype_of(call_return_type, ignore_nullable=True):
+                    validation_error = dedent(
+                        f"""
+                        The return type stored in the database for a UDF call to {fn.self_path!r} no longer
+                        matches its return type as currently defined in the code. This probably means that the
+                        code for {fn.self_path!r} has changed in a backward-incompatible way.
+                        Return type of UDF call in the database: {return_type}
+                        Return type of UDF as currently defined in code: {call_return_type}
+                        """
+                    ).strip()
         fn_call = cls(
             resolved_fn,
@@ -450,6 +489,7 @@ class FunctionCall(Expr):
             group_by_clause=group_by_exprs,
             order_by_clause=order_by_exprs,
             is_method_call=is_method_call,
+            validation_error=validation_error,
         )
         return fn_call

pixeltable/exprs/json_mapper.py CHANGED Viewed

@@ -1,6 +1,6 @@
 from __future__ import annotations
-from typing import Optional
+from typing import TYPE_CHECKING, Optional
 import sqlalchemy as sql
@@ -11,6 +11,9 @@ from .expr import _GLOBAL_SCOPE, Expr, ExprScope
 from .row_builder import RowBuilder
 from .sql_element_cache import SqlElementCache
+if TYPE_CHECKING:
+    from .object_ref import ObjectRef
 class JsonMapper(Expr):
     """
@@ -19,6 +22,10 @@ class JsonMapper(Expr):
     is populated by JsonMapper.eval(). The JsonMapper effectively creates a new scope for its target expr.
     """
+    target_expr_scope: ExprScope
+    parent_mapper: Optional[JsonMapper]
+    target_expr_eval_ctx: Optional[RowBuilder.EvalCtx]
     def __init__(self, src_expr: Expr, target_expr: Expr):
         # TODO: type spec should be list[target_expr.col_type]
         super().__init__(ts.JsonType())
@@ -29,12 +36,18 @@ class JsonMapper(Expr):
         from .object_ref import ObjectRef
-        scope_anchor = ObjectRef(self.target_expr_scope, self)
-        self.components = [src_expr, target_expr, scope_anchor]
-        self.parent_mapper: Optional[JsonMapper] = None
-        self.target_expr_eval_ctx: Optional[RowBuilder.EvalCtx] = None
+        self.components = [src_expr, target_expr]
+        self.parent_mapper = None
+        self.target_expr_eval_ctx = None
+        # Intentionally create the id now, before adding the scope anchor; this ensures that JsonMappers will
+        # be recognized as equal so long as they have the same src_expr and target_expr.
+        # TODO: Might this cause problems after certain substitutions?
         self.id = self._create_id()
+        scope_anchor = ObjectRef(self.target_expr_scope, self)
+        self.components.append(scope_anchor)
     def bind_rel_paths(self, mapper: Optional[JsonMapper] = None) -> None:
         self._src_expr.bind_rel_paths(mapper)
         self._target_expr.bind_rel_paths(self)
@@ -84,8 +97,12 @@ class JsonMapper(Expr):
         return self.components[1]
     @property
-    def scope_anchor(self) -> Expr:
-        return self.components[2]
+    def scope_anchor(self) -> 'ObjectRef':
+        from .object_ref import ObjectRef
+        result = self.components[2]
+        assert isinstance(result, ObjectRef)
+        return result
     def _equals(self, _: JsonMapper) -> bool:
         return True
@@ -107,7 +124,7 @@ class JsonMapper(Expr):
         for i, val in enumerate(src):
             data_row[self.scope_anchor.slot_idx] = val
             # stored target_expr
-            row_builder.eval(data_row, self.target_expr_eval_ctx)
+            row_builder.eval(data_row, self.target_expr_eval_ctx, force_eval=self._target_expr.scope())
             result[i] = data_row[self._target_expr.slot_idx]
         data_row[self.slot_idx] = result

pixeltable 0.3.5__py3-none-any.whl → 0.3.7__py3-none-any.whl

Potentially problematic release.

pixeltable 0.3.5py3-none-any.whl → 0.3.7py3-none-any.whl