PyPI - pixeltable - Versions diffs - 0.2.20__py3-none-any.whl → 0.2.21__py3-none-any.whl - Mend

pixeltable 0.2.20py3-none-any.whl → 0.2.21py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.

This version of pixeltable might be problematic. Click here for more details.

Files changed (84) hide show

pixeltable/__init__.py +7 -19
pixeltable/__version__.py +2 -2
pixeltable/catalog/__init__.py +7 -7
pixeltable/catalog/globals.py +3 -0
pixeltable/catalog/table.py +208 -145
pixeltable/catalog/table_version.py +36 -18
pixeltable/catalog/table_version_path.py +0 -8
pixeltable/catalog/view.py +3 -3
pixeltable/dataframe.py +9 -24
pixeltable/env.py +1 -1
pixeltable/exec/__init__.py +1 -1
pixeltable/exec/aggregation_node.py +22 -15
pixeltable/exec/data_row_batch.py +7 -7
pixeltable/exec/exec_node.py +35 -7
pixeltable/exec/expr_eval_node.py +2 -1
pixeltable/exec/in_memory_data_node.py +9 -9
pixeltable/exec/sql_node.py +265 -136
pixeltable/exprs/__init__.py +1 -0
pixeltable/exprs/data_row.py +30 -19
pixeltable/exprs/expr.py +15 -14
pixeltable/exprs/expr_dict.py +55 -0
pixeltable/exprs/expr_set.py +21 -15
pixeltable/exprs/function_call.py +21 -8
pixeltable/exprs/rowid_ref.py +2 -2
pixeltable/exprs/sql_element_cache.py +5 -1
pixeltable/ext/functions/whisperx.py +7 -2
pixeltable/func/callable_function.py +2 -2
pixeltable/func/function_registry.py +6 -7
pixeltable/func/query_template_function.py +11 -12
pixeltable/func/signature.py +17 -15
pixeltable/func/udf.py +0 -4
pixeltable/functions/__init__.py +1 -1
pixeltable/functions/audio.py +4 -6
pixeltable/functions/globals.py +86 -42
pixeltable/functions/huggingface.py +12 -14
pixeltable/functions/image.py +59 -45
pixeltable/functions/json.py +0 -1
pixeltable/functions/mistralai.py +2 -2
pixeltable/functions/openai.py +22 -25
pixeltable/functions/string.py +50 -50
pixeltable/functions/timestamp.py +20 -20
pixeltable/functions/together.py +2 -2
pixeltable/functions/video.py +11 -20
pixeltable/functions/whisper.py +2 -20
pixeltable/globals.py +55 -56
pixeltable/index/base.py +2 -2
pixeltable/index/btree.py +7 -7
pixeltable/index/embedding_index.py +8 -10
pixeltable/io/external_store.py +11 -5
pixeltable/io/globals.py +2 -0
pixeltable/io/hf_datasets.py +1 -1
pixeltable/io/label_studio.py +6 -6
pixeltable/io/parquet.py +14 -13
pixeltable/iterators/document.py +9 -7
pixeltable/iterators/video.py +10 -1
pixeltable/metadata/__init__.py +3 -2
pixeltable/metadata/converters/convert_14.py +4 -2
pixeltable/metadata/converters/convert_15.py +1 -1
pixeltable/metadata/converters/convert_19.py +1 -0
pixeltable/metadata/converters/convert_20.py +1 -1
pixeltable/metadata/converters/util.py +9 -8
pixeltable/metadata/schema.py +32 -21
pixeltable/plan.py +136 -154
pixeltable/store.py +51 -36
pixeltable/tool/create_test_db_dump.py +6 -6
pixeltable/tool/doc_plugins/griffe.py +3 -34
pixeltable/tool/mypy_plugin.py +32 -0
pixeltable/type_system.py +243 -60
pixeltable/utils/arrow.py +10 -9
pixeltable/utils/coco.py +4 -4
pixeltable/utils/documents.py +1 -1
pixeltable/utils/filecache.py +9 -9
pixeltable/utils/formatter.py +1 -1
pixeltable/utils/http_server.py +2 -5
pixeltable/utils/media_store.py +6 -6
pixeltable/utils/pytorch.py +10 -11
pixeltable/utils/sql.py +2 -1
{pixeltable-0.2.20.dist-info → pixeltable-0.2.21.dist-info}/METADATA +6 -5
pixeltable-0.2.21.dist-info/RECORD +148 -0
pixeltable/utils/help.py +0 -11
pixeltable-0.2.20.dist-info/RECORD +0 -147
{pixeltable-0.2.20.dist-info → pixeltable-0.2.21.dist-info}/LICENSE +0 -0
{pixeltable-0.2.20.dist-info → pixeltable-0.2.21.dist-info}/WHEEL +0 -0
{pixeltable-0.2.20.dist-info → pixeltable-0.2.21.dist-info}/entry_points.txt +0 -0

pixeltable/catalog/table_version.py CHANGED Viewed

@@ -6,7 +6,7 @@ import inspect
 import logging
 import time
 import uuid
-from typing import TYPE_CHECKING, Any, Iterable, Optional
+from typing import TYPE_CHECKING, Any, Iterable, Iterator, Literal, Optional
 from uuid import UUID
 import sqlalchemy as sql
@@ -453,7 +453,9 @@ class TableVersion:
         self.idxs_by_name[idx_name] = idx_info
         # add the columns and update the metadata
-        status = self._add_columns([val_col, undo_col], conn)
+        # TODO support on_error='abort' for indices; it's tricky because of the way metadata changes are entangled
+        # with the database operations
+        status = self._add_columns([val_col, undo_col], conn, print_stats=False, on_error='ignore')
         # now create the index structure
         idx.create_index(self._store_idx_name(idx_id), val_col, conn)
@@ -478,7 +480,7 @@ class TableVersion:
             self._update_md(time.time(), conn, preceding_schema_version=preceding_schema_version)
             _logger.info(f'Dropped index {idx_md.name} on table {self.name}')
-    def add_column(self, col: Column, print_stats: bool = False) -> UpdateStatus:
+    def add_column(self, col: Column, print_stats: bool, on_error: Literal['abort', 'ignore']) -> UpdateStatus:
         """Adds a column to the table.
         """
         assert not self.is_snapshot
@@ -498,9 +500,8 @@ class TableVersion:
         preceding_schema_version = self.schema_version
         self.schema_version = self.version
         with Env.get().engine.begin() as conn:
-            status = self._add_columns([col], conn, print_stats=print_stats)
+            status = self._add_columns([col], conn, print_stats=print_stats, on_error=on_error)
             _ = self._add_default_index(col, conn)
-            # TODO: what to do about errors?
             self._update_md(time.time(), conn, preceding_schema_version=preceding_schema_version)
         _logger.info(f'Added column {col.name} to table {self.name}, new version: {self.version}')
@@ -512,7 +513,13 @@ class TableVersion:
         _logger.info(f'Column {col.name}: {msg}')
         return status
-    def _add_columns(self, cols: Iterable[Column], conn: sql.engine.Connection, print_stats: bool = False) -> UpdateStatus:
+    def _add_columns(
+        self,
+        cols: Iterable[Column],
+        conn: sql.engine.Connection,
+        print_stats: bool,
+        on_error: Literal['abort', 'ignore']
+    ) -> UpdateStatus:
         """Add and populate columns within the current transaction"""
         cols = list(cols)
         row_count = self.store_tbl.count(conn=conn)
@@ -550,10 +557,14 @@ class TableVersion:
             try:
                 plan.ctx.set_conn(conn)
                 plan.open()
-                num_excs = self.store_tbl.load_column(col, plan, value_expr_slot_idx, conn)
+                try:
+                    num_excs = self.store_tbl.load_column(col, plan, value_expr_slot_idx, conn, on_error)
+                except sql.exc.DBAPIError as exc:
+                    # Wrap the DBAPIError in an excs.Error to unify processing in the subsequent except block
+                    raise excs.Error(f'SQL error during execution of computed column `{col.name}`:\n{exc}') from exc
                 if num_excs > 0:
                     cols_with_excs.append(col)
-            except sql.exc.DBAPIError as e:
+            except excs.Error as exc:
                 self.cols.pop()
                 for col in cols:
                     # remove columns that we already added
@@ -564,7 +575,7 @@ class TableVersion:
                     del self.cols_by_id[col.id]
                 # we need to re-initialize the sqlalchemy schema
                 self.store_tbl.create_sa_tbl()
-                raise excs.Error(f'Error during SQL execution:\n{e}')
+                raise exc
             finally:
                 plan.close()
@@ -689,21 +700,30 @@ class TableVersion:
             plan = Planner.create_insert_plan(self, rows, ignore_errors=not fail_on_exception)
         else:
             plan = Planner.create_df_insert_plan(self, df, ignore_errors=not fail_on_exception)
+        # this is a base table; we generate rowids during the insert
+        def rowids() -> Iterator[int]:
+            while True:
+                rowid = self.next_rowid
+                self.next_rowid += 1
+                yield rowid
         if conn is None:
             with Env.get().engine.begin() as conn:
-                return self._insert(plan, conn, time.time(), print_stats)
+                return self._insert(plan, conn, time.time(), print_stats=print_stats, rowids=rowids())
         else:
-            return self._insert(plan, conn, time.time(), print_stats)
+            return self._insert(plan, conn, time.time(), print_stats=print_stats, rowids=rowids())
     def _insert(
-        self, exec_plan: 'exec.ExecNode', conn: sql.engine.Connection, timestamp: float, print_stats: bool = False,
+        self, exec_plan: 'exec.ExecNode', conn: sql.engine.Connection, timestamp: float, *,
+        rowids: Optional[Iterator[int]] = None, print_stats: bool = False,
     ) -> UpdateStatus:
         """Insert rows produced by exec_plan and propagate to views"""
         # we're creating a new version
         self.version += 1
         result = UpdateStatus()
-        num_rows, num_excs, cols_with_excs = self.store_tbl.insert_rows(exec_plan, conn, v_min=self.version)
-        self.next_rowid = num_rows
+        num_rows, num_excs, cols_with_excs = self.store_tbl.insert_rows(
+            exec_plan, conn, v_min=self.version, rowids=rowids)
         result.num_rows = num_rows
         result.num_excs = num_excs
         result.num_computed_values += exec_plan.ctx.num_computed_exprs * num_rows
@@ -714,7 +734,7 @@ class TableVersion:
         for view in self.mutable_views:
             from pixeltable.plan import Planner
             plan, _ = Planner.create_view_load_plan(view.path, propagates_insert=True)
-            status = view._insert(plan, conn, timestamp, print_stats)
+            status = view._insert(plan, conn, timestamp, print_stats=print_stats)
             result.num_rows += status.num_rows
             result.num_excs += status.num_excs
             result.num_computed_values += status.num_computed_values
@@ -751,9 +771,7 @@ class TableVersion:
                 raise excs.Error(f'Filter {analysis_info.filter} not expressible in SQL')
         with Env.get().engine.begin() as conn:
-            plan, updated_cols, recomputed_cols = (
-                Planner.create_update_plan(self.path, update_spec, [], where, cascade)
-            )
+            plan, updated_cols, recomputed_cols = Planner.create_update_plan(self.path, update_spec, [], where, cascade)
             from pixeltable.exprs import SqlElementCache
             result = self.propagate_update(
                 plan, where.sql_expr(SqlElementCache()) if where is not None else None, recomputed_cols,

pixeltable/catalog/table_version_path.py CHANGED Viewed

@@ -91,14 +91,6 @@ class TableVersionPath:
         col = self.tbl_version.cols_by_name[col_name]
         return ColumnRef(col)
-    def __getitem__(self, index: object) -> Union[exprs.ColumnRef, pxt.DataFrame]:
-        """Return a ColumnRef for the given column name, or a DataFrame for the given slice.
-        """
-        if isinstance(index, str):
-            # basically <tbl>.<colname>
-            return self.__getattr__(index)
-        return pxt.DataFrame(self).__getitem__(index)
     def columns(self) -> list[Column]:
         """Return all user columns visible in this tbl version path, including columns from bases"""
         result = list(self.tbl_version.cols_by_name.values())

pixeltable/catalog/view.py CHANGED Viewed

@@ -52,11 +52,11 @@ class View(Table):
     @classmethod
     def _create(
-            cls, dir_id: UUID, name: str, base: TableVersionPath, schema: Dict[str, Any],
-            predicate: 'pxt.exprs.Expr', is_snapshot: bool, num_retained_versions: int, comment: str,
+            cls, dir_id: UUID, name: str, base: TableVersionPath, additional_columns: Dict[str, Any],
+            predicate: Optional['pxt.exprs.Expr'], is_snapshot: bool, num_retained_versions: int, comment: str,
             iterator_cls: Optional[Type[ComponentIterator]], iterator_args: Optional[Dict]
     ) -> View:
-        columns = cls._create_columns(schema)
+        columns = cls._create_columns(additional_columns)
         cls._verify_schema(columns)
         # verify that filter can be evaluated in the context of the base

pixeltable/dataframe.py CHANGED Viewed

@@ -8,7 +8,7 @@ import logging
 import mimetypes
 import traceback
 from pathlib import Path
-from typing import TYPE_CHECKING, Any, Callable, Dict, Hashable, Iterator, List, Optional, Set, Tuple
+from typing import TYPE_CHECKING, Any, Callable, Dict, Hashable, Iterator, List, Optional, Sequence, Set, Tuple, Union
 import pandas as pd
 import pandas.io.formats.style
@@ -97,8 +97,8 @@ class DataFrameResultSet:
             return self._rows[index[0]][col_idx]
         raise excs.Error(f'Bad index: {index}')
-    def __iter__(self) -> DataFrameResultSetIterator:
-        return DataFrameResultSetIterator(self)
+    def __iter__(self) -> Iterator[dict[str, Any]]:
+        return (self._row_to_dict(i) for i in range(len(self)))
     def __eq__(self, other):
         if not isinstance(other, DataFrameResultSet):
@@ -106,19 +106,6 @@ class DataFrameResultSet:
         return self.to_pandas().equals(other.to_pandas())
-class DataFrameResultSetIterator:
-    def __init__(self, result_set: DataFrameResultSet):
-        self._result_set = result_set
-        self._idx = 0
-    def __next__(self) -> Dict[str, Any]:
-        if self._idx >= len(self._result_set):
-            raise StopIteration
-        row = self._result_set._row_to_dict(self._idx)
-        self._idx += 1
-        return row
 # # TODO: remove this; it's only here as a reminder that we still need to call release() in the current implementation
 # class AnalysisInfo:
 #     def __init__(self, tbl: catalog.TableVersion):
@@ -296,7 +283,7 @@ class DataFrame:
     def _create_query_plan(self) -> exec.ExecNode:
         # construct a group-by clause if we're grouping by a table
-        group_by_clause: List[exprs.Expr] = []
+        group_by_clause: Optional[list[exprs.Expr]] = None
         if self.grouping_tbl is not None:
             assert self.group_by_clause is None
             num_rowid_cols = len(self.grouping_tbl.store_tbl.rowid_columns())
@@ -315,8 +302,8 @@ class DataFrame:
             where_clause=self.where_clause,
             group_by_clause=group_by_clause,
             order_by_clause=self.order_by_clause if self.order_by_clause is not None else [],
-            limit=self.limit_val if self.limit_val is not None else 0,
-        )  # limit_val == 0: no limit_val
+            limit=self.limit_val
+        )
     def show(self, n: int = 20) -> DataFrameResultSet:
@@ -629,17 +616,15 @@ class DataFrame:
         if self.limit_val is not None:
             raise excs.Error(f'Cannot use `{op_name}` after `limit`')
-    def __getitem__(self, index: object) -> DataFrame:
+    def __getitem__(self, index: Union[exprs.Expr, Sequence[exprs.Expr]]) -> DataFrame:
         """
         Allowed:
         - [List[Expr]]/[Tuple[Expr]]: setting the select list
         - [Expr]: setting a single-col select list
         """
-        if isinstance(index, tuple):
-            index = list(index)
         if isinstance(index, exprs.Expr):
-            index = [index]
-        if isinstance(index, list):
+            return self.select(index)
+        if isinstance(index, Sequence):
             return self.select(*index)
         raise TypeError(f'Invalid index type: {type(index)}')

pixeltable/env.py CHANGED Viewed

@@ -342,7 +342,7 @@ class Env:
         if create_db:
             from pixeltable.metadata import schema
-            schema.Base.metadata.create_all(self._sa_engine)
+            schema.base_metadata.create_all(self._sa_engine)
             metadata.create_system_info(self._sa_engine)
         print(f'Connected to Pixeltable database at: {self.db_url}')

pixeltable/exec/__init__.py CHANGED Viewed

@@ -8,4 +8,4 @@ from .expr_eval_node import ExprEvalNode
 from .in_memory_data_node import InMemoryDataNode
 from .media_validation_node import MediaValidationNode
 from .row_update_node import RowUpdateNode
-from .sql_node import SqlLookupNode, SqlScanNode
+from .sql_node import SqlLookupNode, SqlScanNode, SqlAggregationNode, SqlNode

pixeltable/exec/aggregation_node.py CHANGED Viewed

@@ -2,7 +2,7 @@ from __future__ import annotations
 import logging
 import sys
-from typing import Iterable, List, Optional, Any
+from typing import Iterable, Optional, Any, Iterator
 import pixeltable.catalog as catalog
 import pixeltable.exceptions as excs
@@ -13,17 +13,29 @@ from .exec_node import ExecNode
 _logger = logging.getLogger('pixeltable')
 class AggregationNode(ExecNode):
+    """
+    In-memory aggregation for UDAs.
+    At the moment, this returns all results in a single DataRowBatch.
+    """
+    group_by: Optional[list[exprs.Expr]]
+    input_exprs: list[exprs.Expr]
+    agg_fn_eval_ctx: exprs.RowBuilder.EvalCtx
+    agg_fn_calls: list[exprs.FunctionCall]
+    output_batch: DataRowBatch
     def __init__(
-            self, tbl: catalog.TableVersion, row_builder: exprs.RowBuilder, group_by: List[exprs.Expr],
-            agg_fn_calls: List[exprs.FunctionCall], input_exprs: Iterable[exprs.Expr], input: ExecNode
+            self, tbl: catalog.TableVersion, row_builder: exprs.RowBuilder, group_by: Optional[list[exprs.Expr]],
+            agg_fn_calls: list[exprs.FunctionCall], input_exprs: Iterable[exprs.Expr], input: ExecNode
     ):
         super().__init__(row_builder, group_by + agg_fn_calls, input_exprs, input)
         self.input = input
         self.group_by = group_by
         self.input_exprs = list(input_exprs)
-        self.agg_fn_eval_ctx = row_builder.create_eval_ctx(agg_fn_calls, exclude=input_exprs)
+        self.agg_fn_eval_ctx = row_builder.create_eval_ctx(agg_fn_calls, exclude=self.input_exprs)
         # we need to make sure to refer to the same exprs that RowBuilder.eval() will use
         self.agg_fn_calls = self.agg_fn_eval_ctx.target_exprs
+        # create output_batch here, rather than in __iter__(), so we don't need to remember tbl and row_builder
         self.output_batch = DataRowBatch(tbl, row_builder, 0)
     def _reset_agg_state(self, row_num: int) -> None:
@@ -45,17 +57,14 @@ class AggregationNode(ExecNode):
                 input_vals = [row[d.slot_idx] for d in fn_call.dependencies()]
                 raise excs.ExprEvalError(fn_call, expr_msg, e, exc_tb, input_vals, row_num)
-    def __next__(self) -> DataRowBatch:
-        if self.output_batch is None:
-            raise StopIteration
+    def __iter__(self) -> Iterator[DataRowBatch]:
         prev_row: Optional[exprs.DataRow] = None
-        current_group: Optional[List[Any]] = None  # the values of the group-by exprs
+        current_group: Optional[list[Any]] = None  # the values of the group-by exprs
         num_input_rows = 0
         for row_batch in self.input:
             num_input_rows += len(row_batch)
             for row in row_batch:
-                group = [row[e.slot_idx] for e in self.group_by]
+                group = [row[e.slot_idx] for e in self.group_by] if self.group_by is not None else None
                 if current_group is None:
                     current_group = group
                     self._reset_agg_state(0)
@@ -71,9 +80,7 @@ class AggregationNode(ExecNode):
         self.row_builder.eval(prev_row, self.agg_fn_eval_ctx, profile=self.ctx.profile)
         self.output_batch.add_row(prev_row)
-        result = self.output_batch
-        result.flush_imgs(None, self.stored_img_cols, self.flushed_img_slots)
-        self.output_batch = None
-        _logger.debug(f'AggregateNode: consumed {num_input_rows} rows, returning {len(result.rows)} rows')
-        return result
+        self.output_batch.flush_imgs(None, self.stored_img_cols, self.flushed_img_slots)
+        _logger.debug(f'AggregateNode: consumed {num_input_rows} rows, returning {len(self.output_batch.rows)} rows')
+        yield self.output_batch

pixeltable/exec/data_row_batch.py CHANGED Viewed

@@ -14,6 +14,13 @@ class DataRowBatch:
     Contains the metadata needed to initialize DataRows.
     """
+    tbl: Optional[catalog.TableVersion]
+    row_builder: exprs.RowBuilder
+    img_slot_idxs: list[int]
+    media_slot_idxs: list[int]  # non-image media slots
+    array_slot_idxs: list[int]
+    rows: list[exprs.DataRow]
     def __init__(self, tbl: Optional[catalog.TableVersion], row_builder: exprs.RowBuilder, len: int = 0):
         self.tbl = tbl
         self.row_builder = row_builder
@@ -39,13 +46,6 @@ class DataRowBatch:
     def pop_row(self) -> exprs.DataRow:
         return self.rows.pop()
-    def set_row_ids(self, row_ids: List[int]) -> None:
-        """Sets pks for rows in batch"""
-        assert self.tbl is not None
-        assert len(row_ids) == len(self.rows)
-        for row, row_id in zip(self.rows, row_ids):
-            row.set_pk((row_id, self.tbl))
     def __len__(self) -> int:
         return len(self.rows)

pixeltable/exec/exec_node.py CHANGED Viewed

@@ -1,13 +1,25 @@
 from __future__ import annotations
-from typing import Iterable, Optional, List
 import abc
+from typing import Iterable, Optional, List, TYPE_CHECKING, Iterator
+import pixeltable.exprs as exprs
 from .data_row_batch import DataRowBatch
 from .exec_context import ExecContext
-import pixeltable.exprs as exprs
+if TYPE_CHECKING:
+    from pixeltable import exec
 class ExecNode(abc.ABC):
     """Base class of all execution nodes"""
+    output_exprs: Iterable[exprs.Expr]
+    row_builder: exprs.RowBuilder
+    input: Optional[ExecNode]
+    flushed_img_slots: list[int]  # idxs of image slots of our output_exprs dependencies
+    stored_img_cols: list[exprs.ColumnSlotIdx]
+    ctx: Optional[ExecContext]
+    __iter: Optional[Iterator[DataRowBatch]]
     def __init__(
             self, row_builder: exprs.RowBuilder, output_exprs: Iterable[exprs.Expr],
             input_exprs: Iterable[exprs.Expr], input: Optional[ExecNode] = None):
@@ -21,8 +33,9 @@ class ExecNode(abc.ABC):
             e.slot_idx for e in output_dependencies
             if e.col_type.is_image_type() and e.slot_idx not in output_slot_idxs
         ]
-        self.stored_img_cols: List[exprs.ColumnSlotIdx] = []
-        self.ctx: Optional[ExecContext] = None  # all nodes of a tree share the same context
+        self.stored_img_cols = []
+        self.ctx = None  # all nodes of a tree share the same context
+        self.__iter = None
     def set_ctx(self, ctx: ExecContext) -> None:
         self.ctx = ctx
@@ -35,12 +48,15 @@ class ExecNode(abc.ABC):
         if self.input is not None:
             self.input.set_stored_img_cols(stored_img_cols)
-    def __iter__(self):
+    # TODO: make this an abstractmethod when __next__() is removed
+    def __iter__(self) -> Iterator[DataRowBatch]:
         return self
-    @abc.abstractmethod
+    # TODO: remove this and switch every subclass over to implementing __iter__
     def __next__(self) -> DataRowBatch:
-        pass
+        if self.__iter is None:
+            self.__iter = iter(self)
+        return next(self.__iter)
     def open(self) -> None:
         """Bottom-up initialization of nodes for execution. Must be called before __next__."""
@@ -60,3 +76,15 @@ class ExecNode(abc.ABC):
     def _close(self) -> None:
         pass
+    def get_sql_node(self) -> Optional['exec.SqlNode']:
+        from .sql_node import SqlNode
+        if isinstance(self, SqlNode):
+            return self
+        if self.input is not None:
+            return self.input.get_sql_node()
+        return None
+    def set_limit(self, limit: int) -> None:
+        """Default implementation propagates to input"""
+        if self.input is not None:
+            self.input.set_limit(limit)

pixeltable/exec/expr_eval_node.py CHANGED Viewed

@@ -5,10 +5,11 @@ import warnings
 from dataclasses import dataclass
 from typing import Iterable, List, Optional
-from tqdm import tqdm, TqdmWarning
+from tqdm import TqdmWarning, tqdm
 import pixeltable.exprs as exprs
 from pixeltable.func import CallableFunction
 from .data_row_batch import DataRowBatch
 from .exec_node import ExecNode

pixeltable/exec/in_memory_data_node.py CHANGED Viewed

@@ -1,5 +1,5 @@
 import logging
-from typing import Any, Optional
+from typing import Any, Optional, Iterator
 import pixeltable.catalog as catalog
 import pixeltable.exprs as exprs
@@ -18,6 +18,11 @@ class InMemoryDataNode(ExecNode):
     - with the values provided in the input rows
     - if an input row doesn't provide a value, sets the slot to the column default
     """
+    tbl: catalog.TableVersion
+    input_rows: list[dict[str, Any]]
+    start_row_id: int
+    output_rows: Optional[DataRowBatch]
     def __init__(
         self, tbl: catalog.TableVersion, rows: list[dict[str, Any]],
         row_builder: exprs.RowBuilder, start_row_id: int,
@@ -29,8 +34,7 @@ class InMemoryDataNode(ExecNode):
         self.tbl = tbl
         self.input_rows = rows
         self.start_row_id = start_row_id
-        self.has_returned_data = False
-        self.output_rows: Optional[DataRowBatch] = None
+        self.output_rows = None
     def _open(self) -> None:
         """Create row batch and populate with self.input_rows"""
@@ -67,12 +71,8 @@ class InMemoryDataNode(ExecNode):
                 assert col_info is not None
                 self.output_rows[row_idx][col_info.slot_idx] = None
-        self.output_rows.set_row_ids([self.start_row_id + i for i in range(len(self.output_rows))])
         self.ctx.num_rows = len(self.output_rows)
-    def __next__(self) -> DataRowBatch:
-        if self.has_returned_data:
-            raise StopIteration
-        self.has_returned_data = True
+    def __iter__(self) -> Iterator[DataRowBatch]:
         _logger.debug(f'InMemoryDataNode: created row batch with {len(self.output_rows)} output_rows')
-        return self.output_rows
+        yield self.output_rows

pixeltable 0.2.20__py3-none-any.whl → 0.2.21__py3-none-any.whl

Potentially problematic release.

pixeltable 0.2.20py3-none-any.whl → 0.2.21py3-none-any.whl