PyPI - pixeltable - Versions diffs - 0.3.12__py3-none-any.whl → 0.3.14__py3-none-any.whl - Mend

pixeltable 0.3.12py3-none-any.whl → 0.3.14py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (66) hide show

pixeltable/__init__.py +2 -27
pixeltable/__version__.py +2 -2
pixeltable/catalog/catalog.py +9 -7
pixeltable/catalog/column.py +6 -2
pixeltable/catalog/dir.py +2 -1
pixeltable/catalog/insertable_table.py +11 -0
pixeltable/catalog/schema_object.py +2 -1
pixeltable/catalog/table.py +27 -38
pixeltable/catalog/table_version.py +19 -0
pixeltable/catalog/table_version_path.py +7 -0
pixeltable/catalog/view.py +31 -0
pixeltable/dataframe.py +50 -7
pixeltable/env.py +1 -1
pixeltable/exceptions.py +20 -2
pixeltable/exec/aggregation_node.py +14 -0
pixeltable/exec/cache_prefetch_node.py +1 -1
pixeltable/exec/expr_eval/evaluators.py +0 -4
pixeltable/exec/expr_eval/expr_eval_node.py +1 -2
pixeltable/exec/sql_node.py +3 -2
pixeltable/exprs/column_ref.py +42 -17
pixeltable/exprs/data_row.py +3 -0
pixeltable/exprs/globals.py +1 -1
pixeltable/exprs/literal.py +11 -1
pixeltable/exprs/rowid_ref.py +4 -1
pixeltable/exprs/similarity_expr.py +1 -1
pixeltable/func/function.py +1 -1
pixeltable/func/udf.py +1 -1
pixeltable/functions/__init__.py +2 -0
pixeltable/functions/anthropic.py +1 -1
pixeltable/functions/bedrock.py +130 -0
pixeltable/functions/date.py +185 -0
pixeltable/functions/gemini.py +22 -20
pixeltable/functions/globals.py +1 -16
pixeltable/functions/huggingface.py +7 -6
pixeltable/functions/image.py +15 -16
pixeltable/functions/json.py +2 -1
pixeltable/functions/math.py +40 -0
pixeltable/functions/mistralai.py +3 -2
pixeltable/functions/openai.py +9 -8
pixeltable/functions/string.py +1 -2
pixeltable/functions/together.py +4 -3
pixeltable/functions/video.py +2 -2
pixeltable/globals.py +26 -9
pixeltable/io/datarows.py +4 -3
pixeltable/io/hf_datasets.py +2 -2
pixeltable/io/label_studio.py +17 -17
pixeltable/io/pandas.py +29 -16
pixeltable/io/parquet.py +2 -0
pixeltable/io/table_data_conduit.py +8 -2
pixeltable/metadata/__init__.py +1 -1
pixeltable/metadata/converters/convert_19.py +2 -2
pixeltable/metadata/converters/convert_34.py +21 -0
pixeltable/metadata/notes.py +1 -0
pixeltable/plan.py +12 -5
pixeltable/share/__init__.py +1 -1
pixeltable/share/packager.py +219 -119
pixeltable/share/publish.py +61 -16
pixeltable/store.py +45 -20
pixeltable/type_system.py +46 -2
pixeltable/utils/arrow.py +8 -2
pixeltable/utils/pytorch.py +4 -0
{pixeltable-0.3.12.dist-info → pixeltable-0.3.14.dist-info}/METADATA +2 -4
{pixeltable-0.3.12.dist-info → pixeltable-0.3.14.dist-info}/RECORD +66 -63
{pixeltable-0.3.12.dist-info → pixeltable-0.3.14.dist-info}/WHEEL +1 -1
{pixeltable-0.3.12.dist-info → pixeltable-0.3.14.dist-info}/LICENSE +0 -0
{pixeltable-0.3.12.dist-info → pixeltable-0.3.14.dist-info}/entry_points.txt +0 -0

pixeltable/__init__.py CHANGED Viewed

@@ -9,6 +9,7 @@ from .globals import (
     array,
     configure_logging,
     create_dir,
+    create_replica,
     create_snapshot,
     create_table,
     create_view,
@@ -20,36 +21,10 @@ from .globals import (
     list_functions,
     list_tables,
     move,
-    publish_snapshot,
     tool,
     tools,
 )
-from .type_system import (
-    Array,
-    ArrayType,
-    Audio,
-    AudioType,
-    Bool,
-    BoolType,
-    ColumnType,
-    Document,
-    DocumentType,
-    Float,
-    FloatType,
-    Image,
-    ImageType,
-    Int,
-    IntType,
-    Json,
-    JsonType,
-    Required,
-    String,
-    StringType,
-    Timestamp,
-    TimestampType,
-    Video,
-    VideoType,
-)
+from .type_system import Array, Audio, Bool, Date, Document, Float, Image, Int, Json, Required, String, Timestamp, Video
 # This import must go last to avoid circular imports.
 from . import ext, functions, io, iterators  # isort: skip

pixeltable/__version__.py CHANGED Viewed

@@ -1,3 +1,3 @@
 # These version placeholders will be replaced during build.
-__version__ = '0.3.12'
-__version_tuple__ = (0, 3, 12)
+__version__ = '0.3.14'
+__version_tuple__ = (0, 3, 14)

pixeltable/catalog/catalog.py CHANGED Viewed

@@ -432,7 +432,9 @@ class Catalog:
         return view
     @_retry_loop
-    def create_replica(self, path: Path, md: list[schema.FullTableMd], if_exists: IfExistsParam) -> Table:
+    def create_replica(
+        self, path: Path, md: list[schema.FullTableMd], if_exists: IfExistsParam = IfExistsParam.ERROR
+    ) -> Table:
         """
         Creates table, table_version, and table_schema_version records for a replica with the given metadata.
         The metadata should be presented in standard "ancestor order", with the table being replicated at
@@ -458,11 +460,11 @@ class Catalog:
         # TODO: Handle concurrency in create_replica()
         existing = Catalog.get().get_table_by_id(tbl_id)
         if existing is not None:
-            existing_path = Path(existing._path(), allow_system_paths=True)
+            existing_path = Path(existing._path, allow_system_paths=True)
             # It does exist. If it's a non-system table, that's an error: it's already been replicated.
             if not existing_path.is_system_path:
                 raise excs.Error(
-                    f'That table has already been replicated as {existing._path()!r}. \n'
+                    f'That table has already been replicated as {existing._path!r}. \n'
                     f'Drop the existing replica if you wish to re-create it.'
                 )
             # If it's a system table, then this means it was created at some point as the ancestor of some other
@@ -487,7 +489,7 @@ class Catalog:
                 # The table already exists in the catalog. The existing path might be a system path (if the table
                 # was created as an anonymous base table of some other table), or it might not (if it's a snapshot
                 # that was directly replicated by the user at some point). In either case, use the existing path.
-                replica_path = Path(replica._path(), allow_system_paths=True)
+                replica_path = Path(replica._path, allow_system_paths=True)
             # Store the metadata; it could be a new version (in which case a new record will be created) or a
             # known version (in which case the newly received metadata will be validated as identical).
@@ -619,11 +621,11 @@ class Catalog:
                 msg: str
                 if is_replace:
                     msg = (
-                        f'{obj_type_str} {tbl._path()} already exists and has dependents. '
+                        f'{obj_type_str} {tbl._path} already exists and has dependents. '
                         "Use `if_exists='replace_force'` to replace it."
                     )
                 else:
-                    msg = f'{obj_type_str} {tbl._path()} has dependents.'
+                    msg = f'{obj_type_str} {tbl._path} has dependents.'
                 raise excs.Error(msg)
             for view_id in view_ids:
@@ -634,7 +636,7 @@ class Catalog:
         tbl._drop()
         assert tbl._id in self._tbls
         del self._tbls[tbl._id]
-        _logger.info(f'Dropped table `{tbl._path()}`.')
+        _logger.info(f'Dropped table `{tbl._path}`.')
     @_retry_loop
     def create_dir(self, path: Path, if_exists: IfExistsParam, parents: bool) -> Dir:

pixeltable/catalog/column.py CHANGED Viewed

@@ -16,6 +16,7 @@ from .globals import MediaValidation, is_valid_identifier
 if TYPE_CHECKING:
     from .table_version import TableVersion
     from .table_version_handle import TableVersionHandle
+    from .table_version_path import TableVersionPath
 _logger = logging.getLogger('pixeltable')
@@ -170,9 +171,12 @@ class Column:
         )
         return len(window_fn_calls) > 0
-    def get_idx_info(self) -> dict[str, 'TableVersion.IndexInfo']:
+    # TODO: This should be moved out of `Column` (its presence in `Column` doesn't anticipate indices being defined on
+    #     multiple dependents)
+    def get_idx_info(self, reference_tbl: Optional['TableVersionPath'] = None) -> dict[str, 'TableVersion.IndexInfo']:
         assert self.tbl is not None
-        return {name: info for name, info in self.tbl.get().idxs_by_name.items() if info.col == self}
+        tbl = reference_tbl.tbl_version if reference_tbl is not None else self.tbl
+        return {name: info for name, info in tbl.get().idxs_by_name.items() if info.col == self}
     @property
     def is_computed(self) -> bool:

pixeltable/catalog/dir.py CHANGED Viewed

@@ -38,12 +38,13 @@ class Dir(SchemaObject):
     def _display_name(cls) -> str:
         return 'directory'
+    @property
     def _path(self) -> str:
         """Returns the path to this schema object."""
         if self._dir_id is None:
             # we're the root dir
             return ''
-        return super()._path()
+        return super()._path
     def _move(self, new_name: str, new_dir_id: UUID) -> None:
         # print(

pixeltable/catalog/insertable_table.py CHANGED Viewed

@@ -228,3 +228,14 @@ class InsertableTable(Table):
         """
         with Env.get().begin_xact():
             return self._tbl_version.get().delete(where=where)
+    @property
+    def _base_table(self) -> Optional['Table']:
+        return None
+    @property
+    def _effective_base_versions(self) -> list[Optional[int]]:
+        return []
+    def _table_descriptor(self) -> str:
+        return f'Table {self._path!r}'

pixeltable/catalog/schema_object.py CHANGED Viewed

@@ -33,6 +33,7 @@ class SchemaObject:
                 return None
             return Catalog.get().get_dir(self._dir_id)
+    @property
     def _path(self) -> str:
         """Returns the path to this schema object."""
         from .catalog import Catalog
@@ -44,7 +45,7 @@ class SchemaObject:
     def get_metadata(self) -> dict[str, Any]:
         """Returns metadata associated with this schema object."""
-        return {'name': self._name, 'path': self._path()}
+        return {'name': self._name, 'path': self._path}
     @classmethod
     @abstractmethod

pixeltable/catalog/table.py CHANGED Viewed

@@ -109,7 +109,7 @@ class Table(SchemaObject):
         self._check_is_dropped()
         with env.Env.get().begin_xact():
             md = super().get_metadata()
-            md['base'] = self._base._path() if self._base is not None else None
+            md['base'] = self._base_table._path if self._base_table is not None else None
             md['schema'] = self._schema
             md['is_replica'] = self._tbl_version.get().is_replica
             md['version'] = self._version
@@ -146,7 +146,7 @@ class Table(SchemaObject):
         col = self._tbl_version_path.get_column(name)
         if col is None:
             raise AttributeError(f'Column {name!r} unknown')
-        return ColumnRef(col)
+        return ColumnRef(col, reference_tbl=self._tbl_version_path)
     def __getitem__(self, name: str) -> 'exprs.ColumnRef':
         """Return a ColumnRef for the given name."""
@@ -165,7 +165,7 @@ class Table(SchemaObject):
         """
         self._check_is_dropped()
         with env.Env.get().begin_xact():
-            return [t._path() for t in self._get_views(recursive=recursive)]
+            return [t._path for t in self._get_views(recursive=recursive)]
     def _get_views(self, *, recursive: bool = True) -> list['Table']:
         cat = catalog.Catalog.get()
@@ -220,6 +220,10 @@ class Table(SchemaObject):
         """
         return self._df().group_by(*items)
+    def distinct(self) -> 'pxt.DataFrame':
+        """Remove duplicate rows from table."""
+        return self._df().distinct()
     def limit(self, n: int) -> 'pxt.DataFrame':
         return self._df().limit(n)
@@ -255,28 +259,30 @@ class Table(SchemaObject):
         return {c.name: c.col_type for c in self._tbl_version_path.columns()}
     @property
-    def _base(self) -> Optional['Table']:
-        """
-        The base table of this `Table`. If this table is a view, returns the `Table`
-        from which it was derived. Otherwise, returns `None`.
-        """
-        if self._tbl_version_path.base is None:
-            return None
-        base_id = self._tbl_version_path.base.tbl_version.id
-        return catalog.Catalog.get().get_table_by_id(base_id)
+    def base_table(self) -> Optional['Table']:
+        with env.Env.get().begin_xact():
+            return self._base_table
     @property
-    def _bases(self) -> list['Table']:
-        """
-        The ancestor list of bases of this table, starting with its immediate base.
-        """
+    @abc.abstractmethod
+    def _base_table(self) -> Optional['Table']:
+        """The base's Table instance"""
+    @property
+    def _base_tables(self) -> list['Table']:
+        """The ancestor list of bases of this table, starting with its immediate base."""
         bases = []
-        base = self._base
+        base = self._base_table
         while base is not None:
             bases.append(base)
-            base = base._base
+            base = base._base_table
         return bases
+    @property
+    @abc.abstractmethod
+    def _effective_base_versions(self) -> list[Optional[int]]:
+        """The effective versions of the ancestor bases, starting with its immediate base."""
     @property
     def _comment(self) -> str:
         return self._tbl_version.get().comment
@@ -300,7 +306,7 @@ class Table(SchemaObject):
         Constructs a list of descriptors for this table that can be pretty-printed.
         """
         helper = DescriptionHelper()
-        helper.append(self._title_descriptor())
+        helper.append(self._table_descriptor())
         helper.append(self._col_descriptor())
         idxs = self._index_descriptor()
         if not idxs.empty:
@@ -312,15 +318,6 @@ class Table(SchemaObject):
             helper.append(f'COMMENT: {self._comment}')
         return helper
-    def _title_descriptor(self) -> str:
-        title: str
-        if self._base is None:
-            title = f'Table\n{self._path()!r}'
-        else:
-            title = f'View\n{self._path()!r}'
-            title += f'\n(of {self.__bases_to_desc()})'
-        return title
     def _col_descriptor(self, columns: Optional[list[str]] = None) -> pd.DataFrame:
         return pd.DataFrame(
             {
@@ -332,14 +329,6 @@ class Table(SchemaObject):
             if columns is None or col.name in columns
         )
-    def __bases_to_desc(self) -> str:
-        bases = self._bases
-        assert len(bases) >= 1
-        if len(bases) <= 2:
-            return ', '.join(repr(b._path()) for b in bases)
-        else:
-            return f'{bases[0]._path()!r}, ..., {bases[-1]._path()!r}'
     def _index_descriptor(self, columns: Optional[list[str]] = None) -> pd.DataFrame:
         from pixeltable import index
@@ -373,9 +362,9 @@ class Table(SchemaObject):
         """
         self._check_is_dropped()
         if getattr(builtins, '__IPYTHON__', False):
-            from IPython.display import display
+            from IPython.display import Markdown, display
-            display(self._repr_html_())
+            display(Markdown(self._repr_html_()))
         else:
             print(repr(self))

pixeltable/catalog/table_version.py CHANGED Viewed

@@ -202,6 +202,13 @@ class TableVersion:
         return TableVersionHandle(self.id, self.effective_version, tbl_version=self)
+    @property
+    def versioned_name(self) -> str:
+        if self.effective_version is None:
+            return self.name
+        else:
+            return f'{self.name}:{self.effective_version}'
     @classmethod
     def create(
         cls,
@@ -314,6 +321,18 @@ class TableVersion:
         session.add(schema_version_record)
         return tbl_record.id, tbl_version
+    @classmethod
+    def create_replica(cls, md: schema.FullTableMd) -> TableVersion:
+        tbl_id = UUID(md.tbl_md.tbl_id)
+        view_md = md.tbl_md.view_md
+        base_path = pxt.catalog.TableVersionPath.from_md(view_md.base_versions) if view_md is not None else None
+        base = base_path.tbl_version if base_path is not None else None
+        tbl_version = cls(
+            tbl_id, md.tbl_md, md.version_md.version, md.schema_version_md, [], base_path=base_path, base=base
+        )
+        tbl_version.store_tbl.create()
+        return tbl_version
     def drop(self) -> None:
         from .catalog import Catalog

pixeltable/catalog/table_version_path.py CHANGED Viewed

@@ -98,6 +98,13 @@ class TableVersionPath:
             return None
         return self.base.find_tbl_version(id)
+    @property
+    def ancestor_paths(self) -> list[TableVersionPath]:
+        if self.base is None:
+            return [self]
+        else:
+            return [self, *self.base.ancestor_paths]
     def columns(self) -> list[Column]:
         """Return all user columns visible in this tbl version path, including columns from bases"""
         result = list(self.tbl_version.get().cols_by_name.values())

pixeltable/catalog/view.py CHANGED Viewed

@@ -267,3 +267,34 @@ class View(Table):
     def delete(self, where: Optional[exprs.Expr] = None) -> UpdateStatus:
         raise excs.Error(f'{self._display_name()} {self._name!r}: cannot delete from view')
+    @property
+    def _base_table(self) -> Optional['Table']:
+        # if this is a pure snapshot, our tbl_version_path only reflects the base (there is no TableVersion instance
+        # for the snapshot itself)
+        base_id = self._tbl_version.id if self._snapshot_only else self._tbl_version_path.base.tbl_version.id
+        return catalog.Catalog.get().get_table_by_id(base_id)
+    @property
+    def _effective_base_versions(self) -> list[Optional[int]]:
+        effective_versions = [tv.effective_version for tv in self._tbl_version_path.get_tbl_versions()]
+        if self._snapshot_only:
+            return effective_versions
+        else:
+            return effective_versions[1:]
+    def _table_descriptor(self) -> str:
+        display_name = 'Snapshot' if self._snapshot_only else 'View'
+        result = [f'{display_name} {self._path!r}']
+        bases_descrs: list[str] = []
+        for base, effective_version in zip(self._base_tables, self._effective_base_versions):
+            if effective_version is None:
+                bases_descrs.append(f'{base._path!r}')
+            else:
+                base_descr = f'{base._path}:{effective_version}'
+                bases_descrs.append(f'{base_descr!r}')
+        result.append(f' (of {", ".join(bases_descrs)})')
+        if self._tbl_version.get().predicate is not None:
+            result.append(f'\nWhere: {self._tbl_version.get().predicate!s}')
+        return ''.join(result)

pixeltable/dataframe.py CHANGED Viewed

@@ -322,6 +322,8 @@ class DataFrame:
             raise excs.Error('head() cannot be used with order_by()')
         if self._has_joins():
             raise excs.Error('head() not supported for joins')
+        if self.group_by_clause is not None:
+            raise excs.Error('head() cannot be used with group_by()')
         num_rowid_cols = len(self._first_tbl.tbl_version.get().store_tbl.rowid_columns())
         order_by_clause = [exprs.RowidRef(self._first_tbl.tbl_version, idx) for idx in range(num_rowid_cols)]
         return self.order_by(*order_by_clause, asc=True).limit(n).collect()
@@ -345,6 +347,8 @@ class DataFrame:
             raise excs.Error('tail() cannot be used with order_by()')
         if self._has_joins():
             raise excs.Error('tail() not supported for joins')
+        if self.group_by_clause is not None:
+            raise excs.Error('tail() cannot be used with group_by()')
         num_rowid_cols = len(self._first_tbl.tbl_version.get().store_tbl.rowid_columns())
         order_by_clause = [exprs.RowidRef(self._first_tbl.tbl_version, idx) for idx in range(num_rowid_cols)]
         result = self.order_by(*order_by_clause, asc=False).limit(n).collect()
@@ -454,6 +458,9 @@ class DataFrame:
         Returns:
             The number of rows in the DataFrame.
         """
+        if self.group_by_clause is not None:
+            raise excs.Error('count() cannot be used with group_by()')
         from pixeltable.plan import Planner
         stmt = Planner.create_count_stmt(self._first_tbl, self.where_clause)
@@ -513,9 +520,9 @@ class DataFrame:
                 (select list, where clause, ...) vertically.
         """
         if getattr(builtins, '__IPYTHON__', False):
-            from IPython.display import display
+            from IPython.display import Markdown, display
-            display(self._repr_html_())
+            display(Markdown(self._repr_html_()))
         else:
             print(repr(self))
@@ -573,10 +580,21 @@ class DataFrame:
                 raise excs.Error(f'Invalid expression: {raw_expr}')
             if expr.col_type.is_invalid_type() and not (isinstance(expr, exprs.Literal) and expr.val is None):
                 raise excs.Error(f'Invalid type: {raw_expr}')
+            if len(self._from_clause.tbls) == 1:
+                # Select expressions need to be retargeted in order to handle snapshots correctly, as in expressions
+                # such as `snapshot.select(base_tbl.col)`
+                # TODO: For joins involving snapshots, we need a more sophisticated retarget() that can handle
+                #     multiple TableVersionPaths.
+                expr = expr.copy()
+                try:
+                    expr.retarget(self._from_clause.tbls[0])
+                except Exception:
+                    # If retarget() fails, then the succeeding is_bound_by() will raise an error.
+                    pass
             if not expr.is_bound_by(self._from_clause.tbls):
                 raise excs.Error(
                     f"Expression '{expr}' cannot be evaluated in the context of this query's tables "
-                    f'({",".join(tbl.tbl_name() for tbl in self._from_clause.tbls)})'
+                    f'({",".join(tbl.tbl_version.get().versioned_name for tbl in self._from_clause.tbls)})'
                 )
             select_list.append((expr, name))
@@ -823,16 +841,18 @@ class DataFrame:
         grouping_tbl: Optional[catalog.TableVersion] = None
         group_by_clause: Optional[list[exprs.Expr]] = None
         for item in grouping_items:
-            if isinstance(item, catalog.Table):
+            if isinstance(item, (catalog.Table, catalog.TableVersion)):
                 if len(grouping_items) > 1:
                     raise excs.Error('group_by(): only one table can be specified')
                 if len(self._from_clause.tbls) > 1:
                     raise excs.Error('group_by() with Table not supported for joins')
+                grouping_tbl = item if isinstance(item, catalog.TableVersion) else item._tbl_version.get()
                 # we need to make sure that the grouping table is a base of self.tbl
-                base = self._first_tbl.find_tbl_version(item._tbl_version_path.tbl_id())
+                base = self._first_tbl.find_tbl_version(grouping_tbl.id)
                 if base is None or base.id == self._first_tbl.tbl_id():
-                    raise excs.Error(f'group_by(): {item._name} is not a base table of {self._first_tbl.tbl_name()}')
-                grouping_tbl = item._tbl_version_path.tbl_version.get()
+                    raise excs.Error(
+                        f'group_by(): {grouping_tbl.name} is not a base table of {self._first_tbl.tbl_name()}'
+                    )
                 break
             if not isinstance(item, exprs.Expr):
                 raise excs.Error(f'Invalid expression in group_by(): {item}')
@@ -848,6 +868,29 @@ class DataFrame:
             limit=self.limit_val,
         )
+    def distinct(self) -> DataFrame:
+        """
+        Remove duplicate rows from this DataFrame.
+        Note that grouping will be applied to the rows based on the select clause of this Dataframe.
+        In the absence of a select clause, by default, all columns are selected in the grouping.
+        Examples:
+            Select unique addresses from table `addresses`.
+            >>> results = addresses.distinct()
+            Select unique cities in table `addresses`
+            >>> results = addresses.city.distinct()
+            Select unique locations (street, city) in the state of `CA`
+            >>> results = addresses.select(addresses.street, addresses.city).where(addresses.state == 'CA').distinct()
+        """
+        exps, _ = self._normalize_select_list(self._from_clause.tbls, self.select_list)
+        return self.group_by(*exps)
     def order_by(self, *expr_list: exprs.Expr, asc: bool = True) -> DataFrame:
         """Add an order-by clause to this DataFrame.

pixeltable/env.py CHANGED Viewed

@@ -610,7 +610,7 @@ class Env:
         self.__register_package('datasets')
         self.__register_package('fiftyone')
         self.__register_package('fireworks', library_name='fireworks-ai')
-        self.__register_package('google.generativeai', library_name='google-generativeai')
+        self.__register_package('google.genai', library_name='google-genai')
         self.__register_package('huggingface_hub', library_name='huggingface-hub')
         self.__register_package('label_studio_sdk', library_name='label-studio-sdk')
         self.__register_package('llama_cpp', library_name='llama-cpp-python')

pixeltable/exceptions.py CHANGED Viewed

@@ -1,4 +1,3 @@
-from dataclasses import dataclass
 from types import TracebackType
 from typing import TYPE_CHECKING, Any
@@ -10,7 +9,6 @@ class Error(Exception):
     pass
-@dataclass
 class ExprEvalError(Exception):
     expr: 'exprs.Expr'
     expr_msg: str
@@ -19,6 +17,26 @@ class ExprEvalError(Exception):
     input_vals: list[Any]
     row_num: int
+    def __init__(
+        self,
+        expr: 'exprs.Expr',
+        expr_msg: str,
+        exc: Exception,
+        exc_tb: TracebackType,
+        input_vals: list[Any],
+        row_num: int,
+    ) -> None:
+        exct = type(exc)
+        super().__init__(
+            f'Expression evaluation failed with an error of type `{exct.__module__}.{exct.__qualname__}`:\n{expr}'
+        )
+        self.expr = expr
+        self.expr_msg = expr_msg
+        self.exc = exc
+        self.exc_tb = exc_tb
+        self.input_vals = input_vals
+        self.row_num = row_num
 class PixeltableWarning(Warning):
     pass

pixeltable/exec/aggregation_node.py CHANGED Viewed

@@ -24,6 +24,7 @@ class AggregationNode(ExecNode):
     agg_fn_eval_ctx: exprs.RowBuilder.EvalCtx
     agg_fn_calls: list[exprs.FunctionCall]
     output_batch: DataRowBatch
+    limit: Optional[int]
     def __init__(
         self,
@@ -45,6 +46,11 @@ class AggregationNode(ExecNode):
         self.agg_fn_calls = [cast(exprs.FunctionCall, e) for e in self.agg_fn_eval_ctx.target_exprs]
         # create output_batch here, rather than in __iter__(), so we don't need to remember tbl and row_builder
         self.output_batch = DataRowBatch(tbl, row_builder, 0)
+        self.limit = None
+    def set_limit(self, limit: int) -> None:
+        # we can't propagate the limit to our input
+        self.limit = limit
     def _reset_agg_state(self, row_num: int) -> None:
         for fn_call in self.agg_fn_calls:
@@ -69,21 +75,29 @@ class AggregationNode(ExecNode):
         prev_row: Optional[exprs.DataRow] = None
         current_group: Optional[list[Any]] = None  # the values of the group-by exprs
         num_input_rows = 0
+        num_output_rows = 0
         async for row_batch in self.input:
             num_input_rows += len(row_batch)
             for row in row_batch:
                 group = [row[e.slot_idx] for e in self.group_by] if self.group_by is not None else None
                 if current_group is None:
                     current_group = group
                     self._reset_agg_state(0)
                 if group != current_group:
                     # we're entering a new group, emit a row for the previous one
                     self.row_builder.eval(prev_row, self.agg_fn_eval_ctx, profile=self.ctx.profile)
                     self.output_batch.add_row(prev_row)
+                    num_output_rows += 1
+                    if self.limit is not None and num_output_rows == self.limit:
+                        yield self.output_batch
+                        return
                     current_group = group
                     self._reset_agg_state(0)
                 self._update_agg_state(row, 0)
                 prev_row = row
         if prev_row is not None:
             # emit the last group
             self.row_builder.eval(prev_row, self.agg_fn_eval_ctx, profile=self.ctx.profile)

pixeltable/exec/cache_prefetch_node.py CHANGED Viewed

@@ -167,7 +167,7 @@ class CachePrefetchNode(ExecNode):
         assert not self.input_finished
         input_batch: Optional[DataRowBatch]
         try:
-            input_batch = await input.__anext__()
+            input_batch = await anext(input)
         except StopAsyncIteration:
             input_batch = None
         if input_batch is None:

pixeltable/exec/expr_eval/evaluators.py CHANGED Viewed

@@ -208,10 +208,6 @@ class FnCallEvaluator(Evaluator):
             _logger.debug(f'Evaluated slot {self.fn_call.slot_idx} in {end_ts - start_ts}')
             self.dispatcher.dispatch([call_args.row], self.exec_ctx)
         except Exception as exc:
-            import anthropic
-            if isinstance(exc, anthropic.RateLimitError):
-                _logger.debug(f'RateLimitError: {exc}')
             _, _, exc_tb = sys.exc_info()
             call_args.row.set_exc(self.fn_call.slot_idx, exc)
             self.dispatcher.dispatch_exc(call_args.rows, self.fn_call.slot_idx, exc_tb, self.exec_ctx)

pixeltable/exec/expr_eval/expr_eval_node.py CHANGED Viewed

@@ -115,7 +115,7 @@ class ExprEvalNode(ExecNode):
         """
         assert not self.input_complete
         try:
-            batch = await self.input_iter.__anext__()
+            batch = await anext(self.input_iter)
             assert self.next_input_batch is None
             if self.current_input_batch is None:
                 self.current_input_batch = batch
@@ -282,7 +282,6 @@ class ExprEvalNode(ExecNode):
                 if self.exc_event.is_set():
                     # we got an exception that we need to propagate through __iter__()
-                    _logger.debug(f'Propagating exception {self.error}')
                     if isinstance(self.error, excs.ExprEvalError):
                         raise self.error from self.error.exc
                     else:

pixeltable 0.3.12__py3-none-any.whl → 0.3.14__py3-none-any.whl

pixeltable 0.3.12py3-none-any.whl → 0.3.14py3-none-any.whl