PyPI - pixeltable - Versions diffs - 0.3.14__py3-none-any.whl → 0.4.0__py3-none-any.whl - Mend

pixeltable 0.3.14py3-none-any.whl → 0.4.0py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.

This version of pixeltable might be problematic. Click here for more details.

Files changed (79) hide show

pixeltable/__init__.py +1 -1
pixeltable/__version__.py +2 -2
pixeltable/catalog/__init__.py +9 -1
pixeltable/catalog/catalog.py +559 -134
pixeltable/catalog/column.py +36 -32
pixeltable/catalog/dir.py +1 -2
pixeltable/catalog/globals.py +12 -0
pixeltable/catalog/insertable_table.py +30 -25
pixeltable/catalog/schema_object.py +9 -6
pixeltable/catalog/table.py +334 -267
pixeltable/catalog/table_version.py +360 -241
pixeltable/catalog/table_version_handle.py +18 -2
pixeltable/catalog/table_version_path.py +86 -23
pixeltable/catalog/view.py +47 -23
pixeltable/dataframe.py +198 -19
pixeltable/env.py +6 -4
pixeltable/exceptions.py +6 -0
pixeltable/exec/__init__.py +1 -1
pixeltable/exec/exec_node.py +2 -0
pixeltable/exec/expr_eval/evaluators.py +4 -1
pixeltable/exec/expr_eval/expr_eval_node.py +4 -4
pixeltable/exec/in_memory_data_node.py +1 -1
pixeltable/exec/sql_node.py +188 -22
pixeltable/exprs/column_property_ref.py +16 -6
pixeltable/exprs/column_ref.py +33 -11
pixeltable/exprs/comparison.py +1 -1
pixeltable/exprs/data_row.py +5 -3
pixeltable/exprs/expr.py +11 -4
pixeltable/exprs/literal.py +2 -0
pixeltable/exprs/row_builder.py +4 -6
pixeltable/exprs/rowid_ref.py +8 -0
pixeltable/exprs/similarity_expr.py +1 -0
pixeltable/func/__init__.py +1 -0
pixeltable/func/mcp.py +74 -0
pixeltable/func/query_template_function.py +5 -3
pixeltable/func/tools.py +12 -2
pixeltable/func/udf.py +2 -2
pixeltable/functions/__init__.py +1 -0
pixeltable/functions/anthropic.py +19 -45
pixeltable/functions/deepseek.py +19 -38
pixeltable/functions/fireworks.py +9 -18
pixeltable/functions/gemini.py +165 -33
pixeltable/functions/groq.py +108 -0
pixeltable/functions/llama_cpp.py +6 -6
pixeltable/functions/math.py +63 -0
pixeltable/functions/mistralai.py +16 -53
pixeltable/functions/ollama.py +1 -1
pixeltable/functions/openai.py +82 -165
pixeltable/functions/string.py +212 -58
pixeltable/functions/together.py +22 -80
pixeltable/globals.py +10 -4
pixeltable/index/base.py +5 -0
pixeltable/index/btree.py +5 -0
pixeltable/index/embedding_index.py +5 -0
pixeltable/io/external_store.py +10 -31
pixeltable/io/label_studio.py +5 -5
pixeltable/io/parquet.py +4 -4
pixeltable/io/table_data_conduit.py +1 -32
pixeltable/metadata/__init__.py +11 -2
pixeltable/metadata/converters/convert_13.py +2 -2
pixeltable/metadata/converters/convert_30.py +6 -11
pixeltable/metadata/converters/convert_35.py +9 -0
pixeltable/metadata/converters/convert_36.py +38 -0
pixeltable/metadata/converters/convert_37.py +15 -0
pixeltable/metadata/converters/util.py +3 -9
pixeltable/metadata/notes.py +3 -0
pixeltable/metadata/schema.py +13 -1
pixeltable/plan.py +135 -12
pixeltable/share/packager.py +321 -20
pixeltable/share/publish.py +2 -2
pixeltable/store.py +31 -13
pixeltable/type_system.py +30 -0
pixeltable/utils/dbms.py +1 -1
pixeltable/utils/formatter.py +64 -42
{pixeltable-0.3.14.dist-info → pixeltable-0.4.0.dist-info}/METADATA +2 -1
{pixeltable-0.3.14.dist-info → pixeltable-0.4.0.dist-info}/RECORD +79 -74
{pixeltable-0.3.14.dist-info → pixeltable-0.4.0.dist-info}/LICENSE +0 -0
{pixeltable-0.3.14.dist-info → pixeltable-0.4.0.dist-info}/WHEEL +0 -0
{pixeltable-0.3.14.dist-info → pixeltable-0.4.0.dist-info}/entry_points.txt +0 -0

pixeltable/dataframe.py CHANGED Viewed

@@ -14,9 +14,10 @@ import pandas as pd
 import sqlalchemy as sql
 from pixeltable import catalog, exceptions as excs, exec, exprs, plan, type_system as ts
-from pixeltable.catalog import is_valid_identifier
+from pixeltable.catalog import Catalog, is_valid_identifier
 from pixeltable.catalog.globals import UpdateStatus
 from pixeltable.env import Env
+from pixeltable.plan import Planner, SampleClause
 from pixeltable.type_system import ColumnType
 from pixeltable.utils.description_helper import DescriptionHelper
 from pixeltable.utils.formatter import Formatter
@@ -139,6 +140,7 @@ class DataFrame:
     grouping_tbl: Optional[catalog.TableVersion]
     order_by_clause: Optional[list[tuple[exprs.Expr, bool]]]
     limit_val: Optional[exprs.Expr]
+    sample_clause: Optional[SampleClause]
     def __init__(
         self,
@@ -149,6 +151,7 @@ class DataFrame:
         grouping_tbl: Optional[catalog.TableVersion] = None,
         order_by_clause: Optional[list[tuple[exprs.Expr, bool]]] = None,  # list[(expr, asc)]
         limit: Optional[exprs.Expr] = None,
+        sample_clause: Optional[SampleClause] = None,
     ):
         self._from_clause = from_clause
@@ -168,6 +171,7 @@ class DataFrame:
         self.grouping_tbl = grouping_tbl
         self.order_by_clause = copy.deepcopy(order_by_clause)
         self.limit_val = limit
+        self.sample_clause = sample_clause
     @classmethod
     def _normalize_select_list(
@@ -210,8 +214,7 @@ class DataFrame:
     @property
     def _first_tbl(self) -> catalog.TableVersionPath:
-        assert len(self._from_clause.tbls) == 1
-        return self._from_clause.tbls[0]
+        return self._from_clause._first_tbl
     def _vars(self) -> dict[str, exprs.Variable]:
         """
@@ -236,6 +239,36 @@ class DataFrame:
                 raise excs.Error(f'Multiple definitions of parameter {var.name}')
         return unique_vars
+    @classmethod
+    def _convert_param_to_typed_expr(
+        cls, v: Any, required_type: ts.ColumnType, required: bool, name: str, range: Optional[tuple[Any, Any]] = None
+    ) -> Optional[exprs.Expr]:
+        if v is None:
+            if required:
+                raise excs.Error(f'{name!r} parameter must be present')
+            return v
+        v_expr = exprs.Expr.from_object(v)
+        if not v_expr.col_type.matches(required_type):
+            raise excs.Error(f'{name!r} parameter must be of type {required_type!r}, instead of {v_expr.col_type}')
+        if range is not None:
+            if not isinstance(v_expr, exprs.Literal):
+                raise excs.Error(f'{name!r} parameter must be a constant, not {v_expr}')
+            if range[0] is not None and not (v_expr.val >= range[0]):
+                raise excs.Error(f'{name!r} parameter must be >= {range[0]}')
+            if range[1] is not None and not (v_expr.val <= range[1]):
+                raise excs.Error(f'{name!r} parameter must be <= {range[1]}')
+        return v_expr
+    @classmethod
+    def validate_constant_type_range(
+        cls, v: Any, required_type: ts.ColumnType, required: bool, name: str, range: Optional[tuple[Any, Any]] = None
+    ) -> Any:
+        """Validate that the given named parameter is a constant of the required type and within the specified range."""
+        v_expr = cls._convert_param_to_typed_expr(v, required_type, required, name, range)
+        if v_expr is None:
+            return None
+        return v_expr.val
     def parameters(self) -> dict[str, ColumnType]:
         """Return a dict mapping parameter name to parameter type.
@@ -280,7 +313,7 @@ class DataFrame:
             num_rowid_cols = len(self.grouping_tbl.store_tbl.rowid_columns())
             # the grouping table must be a base of self.tbl
             assert num_rowid_cols <= len(self._first_tbl.tbl_version.get().store_tbl.rowid_columns())
-            group_by_clause = [exprs.RowidRef(self._first_tbl.tbl_version, idx) for idx in range(num_rowid_cols)]
+            group_by_clause = self.__rowid_columns(num_rowid_cols)
         elif self.group_by_clause is not None:
             group_by_clause = self.group_by_clause
@@ -292,14 +325,21 @@ class DataFrame:
             self._select_list_exprs,
             where_clause=self.where_clause,
             group_by_clause=group_by_clause,
-            order_by_clause=self.order_by_clause if self.order_by_clause is not None else [],
+            order_by_clause=self.order_by_clause,
             limit=self.limit_val,
+            sample_clause=self.sample_clause,
         )
+    def __rowid_columns(self, num_rowid_cols: Optional[int] = None) -> list[exprs.Expr]:
+        """Return list of RowidRef for the given number of associated rowids"""
+        return Planner.rowid_columns(self._first_tbl.tbl_version, num_rowid_cols)
     def _has_joins(self) -> bool:
         return len(self._from_clause.join_clauses) > 0
     def show(self, n: int = 20) -> DataFrameResultSet:
+        if self.sample_clause is not None:
+            raise excs.Error('show() cannot be used with sample()')
         assert n is not None
         return self.limit(n).collect()
@@ -322,6 +362,8 @@ class DataFrame:
             raise excs.Error('head() cannot be used with order_by()')
         if self._has_joins():
             raise excs.Error('head() not supported for joins')
+        if self.sample_clause is not None:
+            raise excs.Error('head() cannot be used with sample()')
         if self.group_by_clause is not None:
             raise excs.Error('head() cannot be used with group_by()')
         num_rowid_cols = len(self._first_tbl.tbl_version.get().store_tbl.rowid_columns())
@@ -347,6 +389,8 @@ class DataFrame:
             raise excs.Error('tail() cannot be used with order_by()')
         if self._has_joins():
             raise excs.Error('tail() not supported for joins')
+        if self.sample_clause is not None:
+            raise excs.Error('tail() cannot be used with sample()')
         if self.group_by_clause is not None:
             raise excs.Error('tail() cannot be used with group_by()')
         num_rowid_cols = len(self._first_tbl.tbl_version.get().store_tbl.rowid_columns())
@@ -431,7 +475,9 @@ class DataFrame:
         raise excs.Error(msg) from e
     def _output_row_iterator(self) -> Iterator[list]:
-        with Env.get().begin_xact():
+        # TODO: extend begin_xact() to accept multiple TVPs for joins
+        single_tbl = self._first_tbl if len(self._from_clause.tbls) == 1 else None
+        with Catalog.get().begin_xact(tbl=single_tbl, for_write=False):
             try:
                 for data_row in self._exec():
                     yield [data_row[e.slot_idx] for e in self._select_list_exprs]
@@ -463,8 +509,8 @@ class DataFrame:
         from pixeltable.plan import Planner
-        stmt = Planner.create_count_stmt(self._first_tbl, self.where_clause)
-        with Env.get().begin_xact() as conn:
+        with Catalog.get().begin_xact(tbl=self._first_tbl, for_write=False) as conn:
+            stmt = Planner.create_count_stmt(self._first_tbl, self.where_clause)
             result: int = conn.execute(stmt).scalar_one()
             assert isinstance(result, int)
             return result
@@ -510,6 +556,9 @@ class DataFrame:
         if self.limit_val is not None:
             heading_vals.append('Limit')
             info_vals.append(self.limit_val.display_str(inline=False))
+        if self.sample_clause is not None:
+            heading_vals.append('Sample')
+            info_vals.append(self.sample_clause.display_str(inline=False))
         assert len(heading_vals) == len(info_vals)
         return pd.DataFrame(info_vals, index=heading_vals)
@@ -644,6 +693,8 @@ class DataFrame:
         """
         if self.where_clause is not None:
             raise excs.Error('Where clause already specified')
+        if self.sample_clause is not None:
+            raise excs.Error('where cannot be used after sample()')
         if not isinstance(pred, exprs.Expr):
             raise excs.Error(f'Where() requires a Pixeltable expression, but instead got {type(pred)}')
         if not pred.col_type.is_bool_type():
@@ -771,6 +822,8 @@ class DataFrame:
             >>> df = t.join(d, on=(t.d1 == d.pk1) & (t.d2 == d.pk2), how='left')
         """
+        if self.sample_clause is not None:
+            raise excs.Error('join() cannot be used with sample()')
         join_pred: Optional[exprs.Expr]
         if how == 'cross':
             if on is not None:
@@ -838,6 +891,9 @@ class DataFrame:
         """
         if self.group_by_clause is not None:
             raise excs.Error('Group-by already specified')
+        if self.sample_clause is not None:
+            raise excs.Error('group_by() cannot be used with sample()')
         grouping_tbl: Optional[catalog.TableVersion] = None
         group_by_clause: Optional[list[exprs.Expr]] = None
         for item in grouping_items:
@@ -849,7 +905,7 @@ class DataFrame:
                 grouping_tbl = item if isinstance(item, catalog.TableVersion) else item._tbl_version.get()
                 # we need to make sure that the grouping table is a base of self.tbl
                 base = self._first_tbl.find_tbl_version(grouping_tbl.id)
-                if base is None or base.id == self._first_tbl.tbl_id():
+                if base is None or base.id == self._first_tbl.tbl_id:
                     raise excs.Error(
                         f'group_by(): {grouping_tbl.name} is not a base table of {self._first_tbl.tbl_name()}'
                     )
@@ -921,6 +977,8 @@ class DataFrame:
             >>> df = book.order_by(t.price, asc=False).order_by(t.pages)
         """
+        if self.sample_clause is not None:
+            raise excs.Error('group_by() cannot be used with sample()')
         for e in expr_list:
             if not isinstance(e, exprs.Expr):
                 raise excs.Error(f'Invalid expression in order_by(): {e}')
@@ -945,10 +1003,10 @@ class DataFrame:
         Returns:
             A new DataFrame with the specified limited rows.
         """
-        assert n is not None
-        n = exprs.Expr.from_object(n)
-        if not n.col_type.is_int_type():
-            raise excs.Error(f'limit(): parameter must be of type int, instead of {n.col_type}')
+        if self.sample_clause is not None:
+            raise excs.Error('limit() cannot be used with sample()')
+        limit_expr = self._convert_param_to_typed_expr(n, ts.IntType(nullable=False), True, 'limit()')
         return DataFrame(
             from_clause=self._from_clause,
             select_list=self.select_list,
@@ -956,7 +1014,124 @@ class DataFrame:
             group_by_clause=self.group_by_clause,
             grouping_tbl=self.grouping_tbl,
             order_by_clause=self.order_by_clause,
-            limit=n,
+            limit=limit_expr,
+        )
+    def sample(
+        self,
+        n: Optional[int] = None,
+        n_per_stratum: Optional[int] = None,
+        fraction: Optional[float] = None,
+        seed: Optional[int] = None,
+        stratify_by: Any = None,
+    ) -> DataFrame:
+        """
+        Return a new DataFrame specifying a sample of rows from the DataFrame, considered in a shuffled order.
+        The size of the sample can be specified in three ways:
+        - `n`: the total number of rows to produce as a sample
+        - `n_per_stratum`: the number of rows to produce per stratum as a sample
+        - `fraction`: the fraction of available rows to produce as a sample
+        The sample can be stratified by one or more columns, which means that the sample will
+        be selected from each stratum separately.
+        The data is shuffled before creating the sample.
+        Args:
+            n: Total number of rows to produce as a sample.
+            n_per_stratum: Number of rows to produce per stratum as a sample. This parameter is only valid if
+                `stratify_by` is specified. Only one of `n` or `n_per_stratum` can be specified.
+            fraction: Fraction of available rows to produce as a sample. This parameter is not usable with `n` or
+                `n_per_stratum`. The fraction must be between 0.0 and 1.0.
+            seed: Random seed for reproducible shuffling
+            stratify_by: If specified, the sample will be stratified by these values.
+        Returns:
+            A new DataFrame which specifies the sampled rows
+        Examples:
+            Given the Table `person` containing the field 'age', we can create samples of the table in various ways:
+            Sample 100 rows from the above Table:
+            >>> df = person.sample(n=100)
+            Sample 10% of the rows from the above Table:
+            >>> df = person.sample(fraction=0.1)
+            Sample 10% of the rows from the above Table, stratified by the column 'age':
+            >>> df = person.sample(fraction=0.1, stratify_by=t.age)
+            Equal allocation sampling: Sample 2 rows from each age present in the above Table:
+            >>> df = person.sample(n_per_stratum=2, stratify_by=t.age)
+            Sampling is compatible with the where clause, so we can also sample from a filtered DataFrame:
+            >>> df = person.where(t.age > 30).sample(n=100)
+        """
+        # Check context of usage
+        if self.sample_clause is not None:
+            raise excs.Error('sample() cannot be used with sample()')
+        if self.group_by_clause is not None:
+            raise excs.Error('sample() cannot be used with group_by()')
+        if self.order_by_clause is not None:
+            raise excs.Error('sample() cannot be used with order_by()')
+        if self.limit_val is not None:
+            raise excs.Error('sample() cannot be used with limit()')
+        if self._has_joins():
+            raise excs.Error('sample() cannot be used with join()')
+        # Check paramter combinations
+        if (n is not None) + (n_per_stratum is not None) + (fraction is not None) != 1:
+            raise excs.Error('Exactly one of `n`, `n_per_stratum`, or `fraction` must be specified.')
+        if n_per_stratum is not None and stratify_by is None:
+            raise excs.Error('Must specify `stratify_by` to use `n_per_stratum`')
+        # Check parameter types and values
+        n = self.validate_constant_type_range(n, ts.IntType(nullable=False), False, 'n', (1, None))
+        n_per_stratum = self.validate_constant_type_range(
+            n_per_stratum, ts.IntType(nullable=False), False, 'n_per_stratum', (1, None)
+        )
+        fraction = self.validate_constant_type_range(
+            fraction, ts.FloatType(nullable=False), False, 'fraction', (0.0, 1.0)
+        )
+        seed = self.validate_constant_type_range(seed, ts.IntType(nullable=False), False, 'seed')
+        # analyze stratify list
+        stratify_exprs: list[exprs.Expr] = []
+        if stratify_by is not None:
+            if isinstance(stratify_by, exprs.Expr):
+                stratify_by = [stratify_by]
+            if not isinstance(stratify_by, (list, tuple)):
+                raise excs.Error('`stratify_by` must be a list of scalar expressions')
+            for expr in stratify_by:
+                if expr is None or not isinstance(expr, exprs.Expr):
+                    raise excs.Error(f'Invalid expression: {expr}')
+                if not expr.col_type.is_scalar_type():
+                    raise excs.Error(f'Invalid type: expression must be a scalar type (not {expr.col_type})')
+                if not expr.is_bound_by(self._from_clause.tbls):
+                    raise excs.Error(
+                        f"Expression '{expr}' cannot be evaluated in the context of this query's tables "
+                        f'({",".join(tbl.tbl_name() for tbl in self._from_clause.tbls)})'
+                    )
+                stratify_exprs.append(expr)
+        sample_clause = SampleClause(None, n, n_per_stratum, fraction, seed, stratify_exprs)
+        return DataFrame(
+            from_clause=self._from_clause,
+            select_list=self.select_list,
+            where_clause=self.where_clause,
+            group_by_clause=self.group_by_clause,
+            grouping_tbl=self.grouping_tbl,
+            order_by_clause=self.order_by_clause,
+            limit=self.limit_val,
+            sample_clause=sample_clause,
         )
     def update(self, value_spec: dict[str, Any], cascade: bool = True) -> UpdateStatus:
@@ -988,7 +1163,7 @@ class DataFrame:
             >>> df = person.where(t.year == 2014).update({'age': 30})
         """
         self._validate_mutable('update', False)
-        with Env.get().begin_xact():
+        with Catalog.get().begin_xact(tbl=self._first_tbl, for_write=True, lock_mutable_tree=True):
             return self._first_tbl.tbl_version.get().update(value_spec, where=self.where_clause, cascade=cascade)
     def delete(self) -> UpdateStatus:
@@ -1011,7 +1186,7 @@ class DataFrame:
         self._validate_mutable('delete', False)
         if not self._first_tbl.is_insertable():
             raise excs.Error('Cannot delete from view')
-        with Env.get().begin_xact():
+        with Catalog.get().begin_xact(tbl=self._first_tbl, for_write=True, lock_mutable_tree=True):
             return self._first_tbl.tbl_version.get().delete(where=self.where_clause)
     def _validate_mutable(self, op_name: str, allow_select: bool) -> None:
@@ -1053,13 +1228,14 @@ class DataFrame:
             if self.order_by_clause is not None
             else None,
             'limit_val': self.limit_val.as_dict() if self.limit_val is not None else None,
+            'sample_clause': self.sample_clause.as_dict() if self.sample_clause is not None else None,
         }
         return d
     @classmethod
     def from_dict(cls, d: dict[str, Any]) -> 'DataFrame':
         # we need to wrap the construction with a transaction, because it might need to load metadata
-        with Env.get().begin_xact():
+        with Catalog.get().begin_xact(for_write=False):
             tbls = [catalog.TableVersionPath.from_dict(tbl_dict) for tbl_dict in d['from_clause']['tbls']]
             join_clauses = [plan.JoinClause(**clause_dict) for clause_dict in d['from_clause']['join_clauses']]
             from_clause = plan.FromClause(tbls=tbls, join_clauses=join_clauses)
@@ -1079,6 +1255,7 @@ class DataFrame:
                 else None
             )
             limit_val = exprs.Expr.from_dict(d['limit_val']) if d['limit_val'] is not None else None
+            sample_clause = SampleClause.from_dict(d['sample_clause']) if d['sample_clause'] is not None else None
             return DataFrame(
                 from_clause=from_clause,
@@ -1088,6 +1265,7 @@ class DataFrame:
                 grouping_tbl=grouping_tbl,
                 order_by_clause=order_by_clause,
                 limit=limit_val,
+                sample_clause=sample_clause,
             )
     def _hash_result_set(self) -> str:
@@ -1129,7 +1307,8 @@ class DataFrame:
             assert data_file_path.is_file()
             return data_file_path
         else:
-            with Env.get().begin_xact():
+            # TODO: extend begin_xact() to accept multiple TVPs for joins
+            with Catalog.get().begin_xact(tbl=self._first_tbl, for_write=False):
                 return write_coco_dataset(self, dest_path)
     def to_pytorch_dataset(self, image_format: str = 'pt') -> 'torch.utils.data.IterableDataset':
@@ -1174,7 +1353,7 @@ class DataFrame:
         if dest_path.exists():  # fast path: use cache
             assert dest_path.is_dir()
         else:
-            with Env.get().begin_xact():
+            with Catalog.get().begin_xact(tbl=self._first_tbl, for_write=False):
                 export_parquet(self, dest_path, inline_images=True)
         return PixeltablePytorchDataset(path=dest_path, image_format=image_format)

pixeltable/env.py CHANGED Viewed

@@ -25,6 +25,7 @@ from zoneinfo import ZoneInfo, ZoneInfoNotFoundError
 import pixeltable_pgserver
 import sqlalchemy as sql
+from pillow_heif import register_heif_opener  # type: ignore[import-untyped]
 from tqdm import TqdmWarning
 from pixeltable import exceptions as excs
@@ -191,6 +192,7 @@ class Env:
         assert self._dbms is not None
         return self._dbms
+    @property
     def in_xact(self) -> bool:
         return self._current_conn is not None
@@ -201,20 +203,17 @@ class Env:
     @contextmanager
     def begin_xact(self) -> Iterator[sql.Connection]:
-        """Return a context manager that yields a connection to the database. Idempotent."""
+        """Call Catalog.begin_xact() instead, unless there is a specific reason to call this directly."""
         if self._current_conn is None:
             assert self._current_session is None
             try:
                 with self.engine.begin() as conn, sql.orm.Session(conn) as session:
-                    # TODO: remove print() once we're done with debugging the concurrent update behavior
-                    # print(f'{datetime.datetime.now()}: start xact')
                     self._current_conn = conn
                     self._current_session = session
                     yield conn
             finally:
                 self._current_session = None
                 self._current_conn = None
-                # print(f'{datetime.datetime.now()}: end xact')
         else:
             assert self._current_session is not None
             yield self._current_conn
@@ -600,6 +599,7 @@ class Env:
     def _set_up_runtime(self) -> None:
         """Check for and start runtime services"""
+        register_heif_opener()
         self._start_web_server()
         self.__register_packages()
@@ -611,9 +611,11 @@ class Env:
         self.__register_package('fiftyone')
         self.__register_package('fireworks', library_name='fireworks-ai')
         self.__register_package('google.genai', library_name='google-genai')
+        self.__register_package('groq')
         self.__register_package('huggingface_hub', library_name='huggingface-hub')
         self.__register_package('label_studio_sdk', library_name='label-studio-sdk')
         self.__register_package('llama_cpp', library_name='llama-cpp-python')
+        self.__register_package('mcp')
         self.__register_package('mistralai')
         self.__register_package('mistune')
         self.__register_package('ollama')

pixeltable/exceptions.py CHANGED Viewed

@@ -10,6 +10,12 @@ class Error(Exception):
 class ExprEvalError(Exception):
+    """
+    Used during query execution to signal expr evaluation failures.
+    NOT A USER-FACING EXCEPTION. All ExprEvalError instances need to be converted into Error instances.
+    """
     expr: 'exprs.Expr'
     expr_msg: str
     exc: Exception

pixeltable/exec/__init__.py CHANGED Viewed

@@ -9,4 +9,4 @@ from .exec_node import ExecNode
 from .expr_eval import ExprEvalNode
 from .in_memory_data_node import InMemoryDataNode
 from .row_update_node import RowUpdateNode
-from .sql_node import SqlAggregationNode, SqlJoinNode, SqlLookupNode, SqlNode, SqlScanNode
+from .sql_node import SqlAggregationNode, SqlJoinNode, SqlLookupNode, SqlNode, SqlSampleNode, SqlScanNode

pixeltable/exec/exec_node.py CHANGED Viewed

@@ -73,6 +73,8 @@ class ExecNode(abc.ABC):
         except RuntimeError:
             loop = asyncio.new_event_loop()
             asyncio.set_event_loop(loop)
+            # we set a deliberately long duration to avoid warnings getting printed to the console in debug mode
+            loop.slow_callback_duration = 3600
         if _logger.isEnabledFor(logging.DEBUG):
             loop.set_debug(True)

pixeltable/exec/expr_eval/evaluators.py CHANGED Viewed

@@ -317,7 +317,10 @@ class JsonMapperDispatcher(Evaluator):
                 for _ in src
             ]
             for nested_row, anchor_val in zip(nested_rows, src):
-                nested_row[self.scope_anchor.slot_idx] = anchor_val
+                # It's possible that self.scope_anchor.slot_idx is None; this corresponds to the case where the
+                # mapper expression doesn't actually contain references to RELATIVE_PATH_ROOT.
+                if self.scope_anchor.slot_idx is not None:
+                    nested_row[self.scope_anchor.slot_idx] = anchor_val
                 for slot_idx_, nested_slot_idx in self.external_slot_map.items():
                     nested_row[nested_slot_idx] = row[slot_idx_]
             self.nested_exec_ctx.init_rows(nested_rows)

pixeltable/exec/expr_eval/expr_eval_node.py CHANGED Viewed

@@ -49,7 +49,7 @@ class ExprEvalNode(ExecNode):
     # execution state
     tasks: set[asyncio.Task]  # collects all running tasks to prevent them from getting gc'd
     exc_event: asyncio.Event  # set if an exception needs to be propagated
-    error: Optional[Union[excs.Error, excs.ExprEvalError]]  # exception that needs to be propagated
+    error: Optional[Union[Exception]]  # exception that needs to be propagated
     completed_rows: asyncio.Queue[exprs.DataRow]  # rows that have completed evaluation
     completed_event: asyncio.Event  # set when completed_rows is non-empty
     input_iter: AsyncIterator[DataRowBatch]
@@ -133,10 +133,10 @@ class ExprEvalNode(ExecNode):
         except StopAsyncIteration:
             self.input_complete = True
             _logger.debug(f'finished input: #input_rows={self.num_input_rows}, #avail={self.avail_input_rows}')
-        except excs.Error as err:
-            self.error = err
+        # make sure to pass DBAPIError through, so the transaction handling logic sees it
+        except Exception as exc:
+            self.error = exc
             self.exc_event.set()
-        # TODO: should we also handle Exception here and create an excs.Error from it?
     @property
     def total_buffered(self) -> int:

pixeltable/exec/in_memory_data_node.py CHANGED Viewed

@@ -38,7 +38,7 @@ class InMemoryDataNode(ExecNode):
         # we materialize the input slots
         output_exprs = list(row_builder.input_exprs)
         super().__init__(row_builder, output_exprs, [], None)
-        assert tbl.get().is_insertable()
+        assert tbl.get().is_insertable
         self.tbl = tbl
         self.input_rows = rows
         self.start_row_id = start_row_id

pixeltable 0.3.14__py3-none-any.whl → 0.4.0__py3-none-any.whl

Potentially problematic release.

pixeltable 0.3.14py3-none-any.whl → 0.4.0py3-none-any.whl