PyPI - pixeltable - Versions diffs - 0.2.28__py3-none-any.whl → 0.2.30__py3-none-any.whl - Mend

pixeltable 0.2.28py3-none-any.whl → 0.2.30py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.

This version of pixeltable might be problematic. Click here for more details.

Files changed (62) hide show

pixeltable/__init__.py +1 -1
pixeltable/__version__.py +2 -2
pixeltable/catalog/__init__.py +1 -1
pixeltable/catalog/dir.py +6 -0
pixeltable/catalog/globals.py +25 -0
pixeltable/catalog/named_function.py +4 -0
pixeltable/catalog/path_dict.py +37 -11
pixeltable/catalog/schema_object.py +6 -0
pixeltable/catalog/table.py +96 -19
pixeltable/catalog/table_version.py +22 -8
pixeltable/dataframe.py +201 -3
pixeltable/env.py +9 -3
pixeltable/exec/expr_eval_node.py +1 -1
pixeltable/exec/sql_node.py +2 -2
pixeltable/exprs/function_call.py +134 -29
pixeltable/exprs/inline_expr.py +22 -2
pixeltable/exprs/row_builder.py +1 -1
pixeltable/exprs/similarity_expr.py +9 -2
pixeltable/func/__init__.py +1 -0
pixeltable/func/aggregate_function.py +151 -68
pixeltable/func/callable_function.py +50 -16
pixeltable/func/expr_template_function.py +62 -24
pixeltable/func/function.py +191 -23
pixeltable/func/function_registry.py +2 -1
pixeltable/func/query_template_function.py +11 -6
pixeltable/func/signature.py +64 -7
pixeltable/func/tools.py +116 -0
pixeltable/func/udf.py +57 -35
pixeltable/functions/__init__.py +2 -2
pixeltable/functions/anthropic.py +36 -2
pixeltable/functions/globals.py +54 -34
pixeltable/functions/json.py +3 -8
pixeltable/functions/math.py +67 -0
pixeltable/functions/ollama.py +4 -4
pixeltable/functions/openai.py +31 -2
pixeltable/functions/timestamp.py +1 -1
pixeltable/functions/video.py +2 -8
pixeltable/functions/vision.py +1 -1
pixeltable/globals.py +347 -79
pixeltable/index/embedding_index.py +44 -24
pixeltable/metadata/__init__.py +1 -1
pixeltable/metadata/converters/convert_16.py +2 -1
pixeltable/metadata/converters/convert_17.py +2 -1
pixeltable/metadata/converters/convert_23.py +35 -0
pixeltable/metadata/converters/convert_24.py +47 -0
pixeltable/metadata/converters/util.py +4 -2
pixeltable/metadata/notes.py +2 -0
pixeltable/metadata/schema.py +1 -0
pixeltable/type_system.py +192 -48
{pixeltable-0.2.28.dist-info → pixeltable-0.2.30.dist-info}/METADATA +4 -2
{pixeltable-0.2.28.dist-info → pixeltable-0.2.30.dist-info}/RECORD +54 -57
pixeltable-0.2.30.dist-info/entry_points.txt +3 -0
pixeltable/tool/create_test_db_dump.py +0 -311
pixeltable/tool/create_test_video.py +0 -81
pixeltable/tool/doc_plugins/griffe.py +0 -50
pixeltable/tool/doc_plugins/mkdocstrings.py +0 -6
pixeltable/tool/doc_plugins/templates/material/udf.html.jinja +0 -135
pixeltable/tool/embed_udf.py +0 -9
pixeltable/tool/mypy_plugin.py +0 -55
pixeltable-0.2.28.dist-info/entry_points.txt +0 -3
{pixeltable-0.2.28.dist-info → pixeltable-0.2.30.dist-info}/LICENSE +0 -0
{pixeltable-0.2.28.dist-info → pixeltable-0.2.30.dist-info}/WHEEL +0 -0

pixeltable/dataframe.py CHANGED Viewed

@@ -300,6 +300,20 @@ class DataFrame:
         return self.limit(n).collect()
     def head(self, n: int = 10) -> DataFrameResultSet:
+        """Return the first n rows of the DataFrame, in insertion order of the underlying Table.
+        head() is not supported for joins.
+        Args:
+            n: Number of rows to select. Default is 10.
+        Returns:
+            A DataFrameResultSet with the first n rows of the DataFrame.
+        Raises:
+            Error: If the DataFrame is the result of a join or
+                if the DataFrame has an order_by clause.
+        """
         if self.order_by_clause is not None:
             raise excs.Error(f'head() cannot be used with order_by()')
         if self._has_joins():
@@ -309,6 +323,20 @@ class DataFrame:
         return self.order_by(*order_by_clause, asc=True).limit(n).collect()
     def tail(self, n: int = 10) -> DataFrameResultSet:
+        """Return the last n rows of the DataFrame, in insertion order of the underlying Table.
+        tail() is not supported for joins.
+        Args:
+            n: Number of rows to select. Default is 10.
+        Returns:
+            A DataFrameResultSet with the last n rows of the DataFrame.
+        Raises:
+            Error: If the DataFrame is the result of a join or
+                if the DataFrame has an order_by clause.
+        """
         if self.order_by_clause is not None:
             raise excs.Error(f'tail() cannot be used with order_by()')
         if self._has_joins():
@@ -394,6 +422,11 @@ class DataFrame:
         return DataFrameResultSet(list(self._output_row_iterator(conn)), self.schema)
     def count(self) -> int:
+        """Return the number of rows in the DataFrame.
+        Returns:
+            The number of rows in the DataFrame.
+        """
         from pixeltable.plan import Planner
         stmt = Planner.create_count_stmt(self._first_tbl, self.where_clause)
@@ -463,6 +496,36 @@ class DataFrame:
         return self._descriptors().to_html()
     def select(self, *items: Any, **named_items: Any) -> DataFrame:
+        """ Select columns or expressions from the DataFrame.
+        Args:
+            items: expressions to be selected
+            named_items: named expressions to be selected
+        Returns:
+            A new DataFrame with the specified select list.
+        Raises:
+            Error: If the select list is already specified,
+                or if any of the specified expressions are invalid,
+                or refer to tables not in the DataFrame.
+        Examples:
+            Given the DataFrame person from a table t with all its columns and rows:
+            >>> person = t.select()
+            Select the columns 'name' and 'age' (referenced in table t) from the DataFrame person:
+            >>> df = person.select(t.name, t.age)
+            Select the columns 'name' (referenced in table t) from the DataFrame person,
+            and a named column 'is_adult' from the expression `age >= 18` where 'age' is
+            another column in table t:
+            >>> df = person.select(t.name, is_adult=(t.age >= 18))
+        """
         if self.select_list is not None:
             raise excs.Error(f'Select list already specified')
         for name, _ in named_items.items():
@@ -512,6 +575,29 @@ class DataFrame:
         )
     def where(self, pred: exprs.Expr) -> DataFrame:
+        """Filter rows based on a predicate.
+        Args:
+            pred: the predicate to filter rows
+        Returns:
+            A new DataFrame with the specified predicates replacing the where-clause.
+        Raises:
+            Error: If the predicate is not a Pixeltable expression,
+                or if it does not return a boolean value,
+                or refers to tables not in the DataFrame.
+        Examples:
+            Given the DataFrame person from a table t with all its columns and rows:
+            >>> person = t.select()
+            Filter the above DataFrame person to only include rows where the column 'age'
+            (referenced in table t) is greater than 30:
+            >>> df = person.where(t.age > 30)
+        """
         if not isinstance(pred, exprs.Expr):
             raise excs.Error(f'Where() requires a Pixeltable expression, but instead got {type(pred)}')
         if not pred.col_type.is_bool_type():
@@ -662,11 +748,45 @@ class DataFrame:
         )
     def group_by(self, *grouping_items: Any) -> DataFrame:
-        """
-        Add a group-by clause to this DataFrame.
+        """ Add a group-by clause to this DataFrame.
         Variants:
         - group_by(<base table>): group a component view by their respective base table rows
         - group_by(<expr>, ...): group by the given expressions
+        Note, that grouping will be applied to the rows and take effect when
+        used with an aggregation function like sum(), count() etc.
+        Args:
+            grouping_items: expressions to group by
+        Returns:
+            A new DataFrame with the specified group-by clause.
+        Raises:
+            Error: If the group-by clause is already specified,
+                or if the specified expression is invalid,
+                or refer to tables not in the DataFrame,
+                or if the DataFrame is a result of a join.
+        Examples:
+            Given the DataFrame book from a table t with all its columns and rows:
+            >>> book = t.select()
+            Group the above DataFrame book by the 'genre' column (referenced in table t):
+            >>> df = book.group_by(t.genre)
+            Use the above DataFrame df grouped by genre to count the number of
+            books for each 'genre':
+            >>> df = book.group_by(t.genre).select(t.genre, count=count(t.genre)).show()
+            Use the above DataFrame df grouped by genre to the total price of
+            books for each 'genre':
+            >>> df = book.group_by(t.genre).select(t.genre, total=sum(t.price)).show()
         """
         if self.group_by_clause is not None:
             raise excs.Error(f'Group-by already specified')
@@ -699,6 +819,35 @@ class DataFrame:
         )
     def order_by(self, *expr_list: exprs.Expr, asc: bool = True) -> DataFrame:
+        """ Add an order-by clause to this DataFrame.
+        Args:
+            expr_list: expressions to order by
+            asc: whether to order in ascending order (True) or descending order (False).
+                Default is True.
+        Returns:
+            A new DataFrame with the specified order-by clause.
+        Raises:
+            Error: If the order-by clause is already specified,
+                or if the specified expression is invalid,
+                or refer to tables not in the DataFrame.
+        Examples:
+            Given the DataFrame book from a table t with all its columns and rows:
+            >>> book = t.select()
+            Order the above DataFrame book by two columns (price, pages) in descending order:
+            >>> df = book.order_by(t.price, t.pages, asc=False)
+            Order the above DataFrame book by price in descending order, but order the pages
+            in ascending order:
+            >>> df = book.order_by(t.price, asc=False).order_by(t.pages)
+        """
         for e in expr_list:
             if not isinstance(e, exprs.Expr):
                 raise excs.Error(f'Invalid expression in order_by(): {e}')
@@ -715,6 +864,14 @@ class DataFrame:
         )
     def limit(self, n: int) -> DataFrame:
+        """ Limit the number of rows in the DataFrame.
+        Args:
+            n: Number of rows to select.
+        Returns:
+            A new DataFrame with the specified limited rows.
+        """
         # TODO: allow n to be a Variable that can be substituted in bind()
         assert n is not None and isinstance(n, int)
         return DataFrame(
@@ -728,17 +885,58 @@ class DataFrame:
         )
     def update(self, value_spec: dict[str, Any], cascade: bool = True) -> UpdateStatus:
+        """ Update rows in the underlying table of the DataFrame.
+        Update rows in the table with the specified value_spec.
+        Args:
+            value_spec: a dict of column names to update and the new value to update it to.
+            cascade: if True, also update all computed columns that transitively depend
+                    on the updated columns, including within views. Default is True.
+        Returns:
+            UpdateStatus: the status of the update operation.
+        Example:
+            Given the DataFrame person from a table t with all its columns and rows:
+            >>> person = t.select()
+            Via the above DataFrame person, update the column 'city' to 'Oakland' and 'state' to 'CA' in the table t:
+            >>> df = person.update({'city': 'Oakland', 'state': 'CA'})
+            Via the above DataFrame person, update the column 'age' to 30 for any rows where 'year' is 2014 in the table t:
+            >>> df = person.where(t.year == 2014).update({'age': 30})
+        """
         self._validate_mutable('update')
         return self._first_tbl.tbl_version.update(value_spec, where=self.where_clause, cascade=cascade)
     def delete(self) -> UpdateStatus:
+        """ Delete rows form the underlying table of the DataFrame.
+        The delete operation is only allowed for DataFrames on base tables.
+        Returns:
+            UpdateStatus: the status of the delete operation.
+        Example:
+            Given the DataFrame person from a table t with all its columns and rows:
+            >>> person = t.select()
+            Via the above DataFrame person, delete all rows from the table t where the column 'age' is less than 18:
+            >>> df = person.where(t.age < 18).delete()
+        """
         self._validate_mutable('delete')
         if not self._first_tbl.is_insertable():
             raise excs.Error(f'Cannot delete from view')
         return self._first_tbl.tbl_version.delete(where=self.where_clause)
     def _validate_mutable(self, op_name: str) -> None:
-        """Tests whether this `DataFrame` can be mutated (such as by an update operation)."""
+        """Tests whether this DataFrame can be mutated (such as by an update operation)."""
         if self.group_by_clause is not None or self.grouping_tbl is not None:
             raise excs.Error(f'Cannot use `{op_name}` after `group_by`')
         if self.order_by_clause is not None:

pixeltable/env.py CHANGED Viewed

@@ -8,6 +8,7 @@ import importlib.util
 import inspect
 import logging
 import os
+import platform
 import shutil
 import subprocess
 import sys
@@ -311,8 +312,12 @@ class Env:
         self._db_name = os.environ.get('PIXELTABLE_DB', 'pixeltable')
         self._pgdata_dir = Path(os.environ.get('PIXELTABLE_PGDATA', str(self._home / 'pgdata')))
-        # in pixeltable_pgserver.get_server(): cleanup_mode=None will leave db on for debugging purposes
-        self._db_server = pixeltable_pgserver.get_server(self._pgdata_dir, cleanup_mode=None)
+        # cleanup_mode=None will leave the postgres process running after Python exits
+        # cleanup_mode='stop' will terminate the postgres process when Python exits
+        # On Windows, we need cleanup_mode='stop' because child processes are killed automatically when the parent
+        # process (such as Terminal or VSCode) exits, potentially leaving it in an unusable state.
+        cleanup_mode = 'stop' if platform.system() == 'Windows' else None
+        self._db_server = pixeltable_pgserver.get_server(self._pgdata_dir, cleanup_mode=cleanup_mode)
         self._db_url = self._db_server.get_uri(database=self._db_name, driver='psycopg')
         tz_name = self.config.get_string_value('time_zone')
@@ -357,7 +362,7 @@ class Env:
             self.db_url,
             echo=echo,
             future=True,
-            isolation_level='AUTOCOMMIT',
+            isolation_level='REPEATABLE READ',
             connect_args=connect_args,
         )
         self._logger.info(f'Created SQLAlchemy engine at: {self.db_url}')
@@ -506,6 +511,7 @@ class Env:
         self.__register_package('openai')
         self.__register_package('openpyxl')
         self.__register_package('pyarrow')
+        self.__register_package('pydantic')
         self.__register_package('replicate')
         self.__register_package('sentencepiece')
         self.__register_package('sentence_transformers', library_name='sentence-transformers')

pixeltable/exec/expr_eval_node.py CHANGED Viewed

@@ -208,7 +208,7 @@ class ExprEvalNode(ExecNode):
                         }
                         start_ts = time.perf_counter()
                         assert isinstance(fn_call.fn, CallableFunction)
-                        result_batch = fn_call.fn.exec_batch(*call_args, **call_kwargs)
+                        result_batch = fn_call.fn.exec_batch(call_args, call_kwargs)
                         self.ctx.profile.eval_time[fn_call.slot_idx] += time.perf_counter() - start_ts
                         self.ctx.profile.eval_count[fn_call.slot_idx] += num_ext_batch_rows

pixeltable/exec/sql_node.py CHANGED Viewed

@@ -262,7 +262,7 @@ class SqlNode(ExecNode):
             explain_str = '\n'.join([str(row) for row in explain_result])
             _logger.debug(f'SqlScanNode explain:\n{explain_str}')
         except Exception as e:
-            _logger.warning(f'EXPLAIN failed')
+            _logger.warning(f'EXPLAIN failed with error: {e}')
     def __iter__(self) -> Iterator[DataRowBatch]:
         # run the query; do this here rather than in _open(), exceptions are only expected during iteration
@@ -468,4 +468,4 @@ class SqlJoinNode(SqlNode):
             stmt = stmt.join(
                 self.input_ctes[i + 1], onclause=on_clause, isouter=is_outer,
                 full=join_clause == plan.JoinType.FULL_OUTER)
-        return stmt
+        return stmt

pixeltable/exprs/function_call.py CHANGED Viewed

@@ -15,6 +15,7 @@ import pixeltable.type_system as ts
 from .data_row import DataRow
 from .expr import Expr
 from .inline_expr import InlineDict, InlineList
+from .literal import Literal
 from .row_builder import RowBuilder
 from .rowid_ref import RowidRef
 from .sql_element_cache import SqlElementCache
@@ -34,6 +35,7 @@ class FunctionCall(Expr):
     arg_types: list[ts.ColumnType]
     kwarg_types: dict[str, ts.ColumnType]
+    return_type: ts.ColumnType
     group_by_start_idx: int
     group_by_stop_idx: int
     fn_expr_idx: int
@@ -43,17 +45,25 @@ class FunctionCall(Expr):
     current_partition_vals: Optional[list[Any]]
     def __init__(
-            self, fn: func.Function, bound_args: dict[str, Any], order_by_clause: Optional[list[Any]] = None,
-            group_by_clause: Optional[list[Any]] = None, is_method_call: bool = False):
+        self,
+        fn: func.Function,
+        bound_args: dict[str, Any],
+        return_type: ts.ColumnType,
+        order_by_clause: Optional[list[Any]] = None,
+        group_by_clause: Optional[list[Any]] = None,
+        is_method_call: bool = False
+    ):
         if order_by_clause is None:
             order_by_clause = []
         if group_by_clause is None:
             group_by_clause = []
-        signature = fn.signature
-        return_type = fn.call_return_type(bound_args)
+        assert not fn.is_polymorphic
         self.fn = fn
         self.is_method_call = is_method_call
-        self.normalize_args(fn.name, signature, bound_args)
+        signature = fn.signature
         # If `return_type` is non-nullable, but the function call has a nullable input to any of its non-nullable
         # parameters, then we need to make it nullable. This is because Pixeltable defaults a function output to
@@ -67,6 +77,8 @@ class FunctionCall(Expr):
                 return_type = return_type.copy(nullable=True)
                 break
+        self.return_type = return_type
         super().__init__(return_type)
         self.agg_init_args = {}
@@ -74,9 +86,9 @@ class FunctionCall(Expr):
             # we separate out the init args for the aggregator
             assert isinstance(fn, func.AggregateFunction)
             self.agg_init_args = {
-                arg_name: arg for arg_name, arg in bound_args.items() if arg_name in fn.init_param_names
+                arg_name: arg for arg_name, arg in bound_args.items() if arg_name in fn.init_param_names[0]
             }
-            bound_args = {arg_name: arg for arg_name, arg in bound_args.items() if arg_name not in fn.init_param_names}
+            bound_args = {arg_name: arg for arg_name, arg in bound_args.items() if arg_name not in fn.init_param_names[0]}
         # construct components, args, kwargs
         self.args = []
@@ -88,7 +100,7 @@ class FunctionCall(Expr):
         # the prefix of parameters that are bound can be passed by position
         processed_args: set[str] = set()
-        for py_param in fn.signature.py_signature.parameters.values():
+        for py_param in signature.py_signature.parameters.values():
             if py_param.name not in bound_args or py_param.kind == inspect.Parameter.KEYWORD_ONLY:
                 break
             arg = bound_args[py_param.name]
@@ -110,7 +122,7 @@ class FunctionCall(Expr):
                     self.components.append(arg.copy())
                 else:
                     self.kwargs[param_name] = (None, arg)
-                if fn.signature.py_signature.parameters[param_name].kind != inspect.Parameter.VAR_KEYWORD:
+                if signature.py_signature.parameters[param_name].kind != inspect.Parameter.VAR_KEYWORD:
                     self.kwarg_types[param_name] = signature.parameters[param_name].col_type
         # window function state:
@@ -129,7 +141,7 @@ class FunctionCall(Expr):
         if isinstance(self.fn, func.ExprTemplateFunction):
             # we instantiate the template to create an Expr that can be evaluated and record that as a component
-            fn_expr = self.fn.instantiate(**bound_args)
+            fn_expr = self.fn.instantiate([], bound_args)
             self.components.append(fn_expr)
             self.fn_expr_idx = len(self.components) - 1
         else:
@@ -187,11 +199,6 @@ class FunctionCall(Expr):
                     pass
             if not isinstance(arg, Expr):
-                # make sure that non-Expr args are json-serializable and are literals of the correct type
-                try:
-                    _ = json.dumps(arg)
-                except TypeError:
-                    raise excs.Error(f'Argument for parameter {param_name!r} is not json-serializable: {arg} (of type {type(arg)})')
                 if arg is not None:
                     try:
                         param_type = param.col_type
@@ -360,7 +367,7 @@ class FunctionCall(Expr):
         """
         assert self.is_agg_fn_call
         assert isinstance(self.fn, func.AggregateFunction)
-        self.aggregator = self.fn.agg_cls(**self.agg_init_args)
+        self.aggregator = self.fn.agg_class(**self.agg_init_args)
     def update(self, data_row: DataRow) -> None:
         """
@@ -432,27 +439,32 @@ class FunctionCall(Expr):
             data_row[self.slot_idx] = self.fn.py_fn(*args, **kwargs)
         elif self.is_window_fn_call:
             assert isinstance(self.fn, func.AggregateFunction)
+            agg_cls = self.fn.agg_class
             if self.has_group_by():
                 if self.current_partition_vals is None:
                     self.current_partition_vals = [None] * len(self.group_by)
                 partition_vals = [data_row[e.slot_idx] for e in self.group_by]
                 if partition_vals != self.current_partition_vals:
                     # new partition
-                    self.aggregator = self.fn.agg_cls(**self.agg_init_args)
+                    self.aggregator = agg_cls(**self.agg_init_args)
                     self.current_partition_vals = partition_vals
             elif self.aggregator is None:
-                self.aggregator = self.fn.agg_cls(**self.agg_init_args)
+                self.aggregator = agg_cls(**self.agg_init_args)
             self.aggregator.update(*args)
             data_row[self.slot_idx] = self.aggregator.value()
         else:
-            data_row[self.slot_idx] = self.fn.exec(*args, **kwargs)
+            data_row[self.slot_idx] = self.fn.exec(args, kwargs)
     def _as_dict(self) -> dict:
         result = {
-            'fn': self.fn.as_dict(), 'args': self.args, 'kwargs': self.kwargs,
-            'group_by_start_idx': self.group_by_start_idx, 'group_by_stop_idx': self.group_by_stop_idx,
+            'fn': self.fn.as_dict(),
+            'args': self.args,
+            'kwargs': self.kwargs,
+            'return_type': self.return_type.as_dict(),
+            'group_by_start_idx': self.group_by_start_idx,
+            'group_by_stop_idx': self.group_by_stop_idx,
             'order_by_start_idx': self.order_by_start_idx,
-            **super()._as_dict()
+            **super()._as_dict(),
         }
         return result
@@ -461,15 +473,108 @@ class FunctionCall(Expr):
         assert 'fn' in d
         assert 'args' in d
         assert 'kwargs' in d
-        # reassemble bound args
         fn = func.Function.from_dict(d['fn'])
-        param_names = list(fn.signature.parameters.keys())
-        bound_args = {param_names[i]: arg if idx is None else components[idx] for i, (idx, arg) in enumerate(d['args'])}
-        bound_args.update(
-            {param_name: val if idx is None else components[idx] for param_name, (idx, val) in d['kwargs'].items()})
+        assert not fn.is_polymorphic
+        return_type = ts.ColumnType.from_dict(d['return_type']) if 'return_type' in d else None
         group_by_exprs = components[d['group_by_start_idx']:d['group_by_stop_idx']]
         order_by_exprs = components[d['order_by_start_idx']:]
+        args = [
+            expr if idx is None else components[idx]
+            for idx, expr in d['args']
+        ]
+        kwargs = {
+            param_name: (expr if idx is None else components[idx])
+            for param_name, (idx, expr) in d['kwargs'].items()
+        }
+        # `Function.from_dict()` does signature matching, so it is safe to assume that `args` and `kwargs` are
+        # consistent with its signature.
+        # Reassemble bound_args. Note that args and kwargs represent "already bound arguments": they are not bindable
+        # in the Python sense, because variable args (such as *args and **kwargs) have already been condensed.
+        param_names = list(fn.signature.parameters.keys())
+        bound_args = {param_names[i]: arg for i, arg in enumerate(args)}
+        bound_args.update(kwargs.items())
+        # TODO: In order to properly invoke call_return_type, we need to ensure that any InlineLists or InlineDicts
+        # in bound_args are unpacked into Python lists/dicts. There is an open task to ensure this is true in general;
+        # for now, as a hack, we do the unpacking here for the specific case of an InlineList of Literals (the only
+        # case where this is necessary to support existing conditional_return_type implementations). Once the general
+        # pattern is implemented, we can remove this hack.
+        unpacked_bound_args = {
+            param_name: cls.__unpack_bound_arg(arg) for param_name, arg in bound_args.items()
+        }
+        # Evaluate the call_return_type as defined in the current codebase.
+        call_return_type = fn.call_return_type([], unpacked_bound_args)
+        if return_type is None:
+            # Schema versions prior to 25 did not store the return_type in metadata, and there is no obvious way to
+            # infer it during DB migration, so we might encounter a stored return_type of None. In that case, we use
+            # the call_return_type that we just inferred (which matches the deserialization behavior prior to
+            # version 25).
+            return_type = call_return_type
+        else:
+            # There is a return_type stored in metadata (schema version >= 25).
+            # Check that the stored return_type of the UDF call matches the column type of the FunctionCall, and
+            # fail-fast if it doesn't (otherwise we risk getting downstream database errors).
+            # TODO: Handle this more gracefully (instead of failing the DB load, allow the DB load to succeed, but
+            #       mark this FunctionCall as unusable). It's the same issue as dealing with a renamed UDF or Function
+            #       signature mismatch.
+            if not return_type.is_supertype_of(call_return_type, ignore_nullable=True):
+                raise excs.Error(
+                    f'The return type stored in the database for a UDF call to `{fn.self_path}` no longer matches the '
+                    f'return type of the UDF as currently defined in the code.\nThis probably means that the code for '
+                    f'`{fn.self_path}` has changed in a backward-incompatible way.\n'
+                    f'Return type in database: `{return_type}`\n'
+                    f'Return type as currently defined: `{call_return_type}`'
+                )
         fn_call = cls(
-            func.Function.from_dict(d['fn']), bound_args, group_by_clause=group_by_exprs,
-            order_by_clause=order_by_exprs)
+            fn,
+            bound_args,
+            return_type,
+            group_by_clause=group_by_exprs,
+            order_by_clause=order_by_exprs
+        )
         return fn_call
+    @classmethod
+    def __find_matching_signature(cls, fn: func.Function, args: list[Any], kwargs: dict[str, Any]) -> Optional[int]:
+        for idx, sig in enumerate(fn.signatures):
+            if cls.__signature_matches(sig, args, kwargs):
+                return idx
+        return None
+    @classmethod
+    def __signature_matches(cls, sig: func.Signature, args: list[Any], kwargs: dict[str, Any]) -> bool:
+        unbound_parameters = set(sig.parameters.keys())
+        for i, arg in enumerate(args):
+            if i >= len(sig.parameters_by_pos):
+                return False
+            param = sig.parameters_by_pos[i]
+            arg_type = arg.col_type if isinstance(arg, Expr) else ts.ColumnType.infer_literal_type(arg)
+            if param.col_type is not None and not param.col_type.is_supertype_of(arg_type, ignore_nullable=True):
+                return False
+            unbound_parameters.remove(param.name)
+        for param_name, arg in kwargs.items():
+            if param_name not in unbound_parameters:
+                return False
+            param = sig.parameters[param_name]
+            arg_type = arg.col_type if isinstance(arg, Expr) else ts.ColumnType.infer_literal_type(arg)
+            if param.col_type is not None and not param.col_type.is_supertype_of(arg_type, ignore_nullable=True):
+                return False
+            unbound_parameters.remove(param_name)
+        for param_name in unbound_parameters:
+            param = sig.parameters[param_name]
+            if not param.has_default:
+                return False
+        return True
+    @classmethod
+    def __unpack_bound_arg(cls, arg: Any) -> Any:
+        if isinstance(arg, InlineList) and all(isinstance(el, Literal) for el in arg.components):
+            return [el.val for el in arg.components]
+        return arg

pixeltable/exprs/inline_expr.py CHANGED Viewed

@@ -101,7 +101,13 @@ class InlineList(Expr):
             else:
                 exprs.append(Literal(el))
-        super().__init__(ts.JsonType())
+        json_schema = {
+            'type': 'array',
+            'prefixItems': [expr.col_type.to_json_schema() for expr in exprs],
+            'items': False  # No additional items (fixed length)
+        }
+        super().__init__(ts.JsonType(json_schema))
         self.components.extend(exprs)
         self.id = self._create_id()
@@ -149,7 +155,21 @@ class InlineDict(Expr):
             else:
                 exprs.append(Literal(val))
-        super().__init__(ts.JsonType())
+        json_schema: Optional[dict[str, Any]]
+        try:
+            json_schema = {
+                'type': 'object',
+                'properties': {
+                    key: expr.col_type.to_json_schema()
+                    for key, expr in zip(self.keys, exprs)
+                },
+            }
+        except excs.Error:
+            # InlineDicts are used to store iterator arguments, which are not required to be valid JSON types,
+            # so we can't always construct a valid schema.
+            json_schema = None
+        super().__init__(ts.JsonType(json_schema))
         self.components.extend(exprs)
         self.id = self._create_id()

pixeltable/exprs/row_builder.py CHANGED Viewed

@@ -368,7 +368,7 @@ class RowBuilder:
                 if not ignore_errors:
                     input_vals = [data_row[d.slot_idx] for d in expr.dependencies()]
                     raise excs.ExprEvalError(
-                        expr, f'expression {expr}', data_row.get_exc(expr.slot_idx), exc_tb, input_vals, 0)
+                        expr, f'expression {expr}', data_row.get_exc(expr.slot_idx), exc_tb, input_vals, 0) from exc
     def create_table_row(self, data_row: DataRow, exc_col_ids: set[int]) -> tuple[dict[str, Any], int]:
         """Create a table row from the slots that have an output column assigned

pixeltable 0.2.28__py3-none-any.whl → 0.2.30__py3-none-any.whl

Potentially problematic release.

pixeltable 0.2.28py3-none-any.whl → 0.2.30py3-none-any.whl