PyPI - pixeltable - Versions diffs - 0.2.13__py3-none-any.whl → 0.2.15__py3-none-any.whl - Mend

pixeltable 0.2.13py3-none-any.whl → 0.2.15py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.

This version of pixeltable might be problematic. Click here for more details.

Files changed (58) hide show

pixeltable/__init__.py +1 -1
pixeltable/__version__.py +2 -2
pixeltable/catalog/column.py +8 -3
pixeltable/catalog/globals.py +8 -0
pixeltable/catalog/table.py +25 -9
pixeltable/catalog/table_version.py +30 -55
pixeltable/catalog/view.py +1 -1
pixeltable/env.py +4 -4
pixeltable/exec/__init__.py +2 -1
pixeltable/exec/row_update_node.py +61 -0
pixeltable/exec/{sql_scan_node.py → sql_node.py} +120 -56
pixeltable/exprs/__init__.py +1 -1
pixeltable/exprs/arithmetic_expr.py +41 -16
pixeltable/exprs/expr.py +72 -22
pixeltable/exprs/function_call.py +64 -29
pixeltable/exprs/globals.py +5 -1
pixeltable/exprs/inline_array.py +18 -11
pixeltable/exprs/method_ref.py +63 -0
pixeltable/ext/__init__.py +9 -0
pixeltable/ext/functions/__init__.py +8 -0
pixeltable/ext/functions/whisperx.py +45 -5
pixeltable/ext/functions/yolox.py +60 -14
pixeltable/func/callable_function.py +12 -4
pixeltable/func/expr_template_function.py +1 -1
pixeltable/func/function.py +12 -2
pixeltable/func/function_registry.py +24 -9
pixeltable/func/udf.py +32 -4
pixeltable/functions/__init__.py +1 -1
pixeltable/functions/fireworks.py +33 -0
pixeltable/functions/huggingface.py +96 -6
pixeltable/functions/image.py +226 -41
pixeltable/functions/json.py +46 -0
pixeltable/functions/openai.py +214 -0
pixeltable/functions/string.py +195 -218
pixeltable/functions/timestamp.py +210 -0
pixeltable/functions/together.py +106 -0
pixeltable/functions/video.py +2 -2
pixeltable/functions/{eval.py → vision.py} +170 -27
pixeltable/functions/whisper.py +32 -0
pixeltable/io/__init__.py +1 -1
pixeltable/io/external_store.py +2 -2
pixeltable/io/globals.py +133 -1
pixeltable/io/pandas.py +82 -31
pixeltable/iterators/video.py +55 -23
pixeltable/metadata/__init__.py +1 -1
pixeltable/metadata/converters/convert_18.py +39 -0
pixeltable/metadata/notes.py +10 -0
pixeltable/plan.py +76 -1
pixeltable/store.py +65 -28
pixeltable/tool/create_test_db_dump.py +8 -9
pixeltable/tool/doc_plugins/griffe.py +4 -0
pixeltable/type_system.py +84 -63
{pixeltable-0.2.13.dist-info → pixeltable-0.2.15.dist-info}/METADATA +2 -2
{pixeltable-0.2.13.dist-info → pixeltable-0.2.15.dist-info}/RECORD +57 -51
pixeltable/exprs/image_member_access.py +0 -96
{pixeltable-0.2.13.dist-info → pixeltable-0.2.15.dist-info}/LICENSE +0 -0
{pixeltable-0.2.13.dist-info → pixeltable-0.2.15.dist-info}/WHEEL +0 -0
{pixeltable-0.2.13.dist-info → pixeltable-0.2.15.dist-info}/entry_points.txt +0 -0

pixeltable/__init__.py CHANGED Viewed

@@ -21,7 +21,7 @@ from .type_system import (
 )
 from .utils.help import help
-from . import functions, io, iterators
+from . import ext, functions, io, iterators
 from .__version__ import __version__, __version_tuple__
 # This is the safest / most maintainable way to do this: start with the default and "blacklist" stuff that

pixeltable/__version__.py CHANGED Viewed

@@ -1,3 +1,3 @@
 # These version placeholders will be replaced during build.
-__version__ = "0.2.13"
-__version_tuple__ = (0, 2, 13)
+__version__ = "0.2.15"
+__version_tuple__ = (0, 2, 15)

pixeltable/catalog/column.py CHANGED Viewed

@@ -1,13 +1,13 @@
 from __future__ import annotations
 import logging
-from typing import Optional, Union, Callable, Any
-from uuid import UUID
+from typing import Any, Callable, Optional, Union
 import sqlalchemy as sql
 import pixeltable.exceptions as excs
 import pixeltable.type_system as ts
 from .globals import is_valid_identifier
 _logger = logging.getLogger('pixeltable')
@@ -21,7 +21,7 @@ class Column:
     def __init__(
             self, name: Optional[str], col_type: Optional[ts.ColumnType] = None,
             computed_with: Optional[Union['Expr', Callable]] = None,
-            is_pk: bool = False, stored: Optional[bool] = None,
+            is_pk: bool = False, stored: bool = True,
             col_id: Optional[int] = None, schema_version_add: Optional[int] = None,
             schema_version_drop: Optional[int] = None, sa_col_type: Optional[sql.sqltypes.TypeEngine] = None,
             records_errors: Optional[bool] = None, value_expr_dict: Optional[dict[str, Any]] = None,
@@ -152,6 +152,11 @@ class Column:
             return self._records_errors
         return self.is_stored and (self.is_computed or self.col_type.is_media_type())
+    @property
+    def qualified_name(self) -> str:
+        assert self.tbl is not None
+        return f'{self.tbl.name}.{self.name}'
     def source(self) -> None:
         """
         If this is a computed col and the top-level expr is a function call, print the source, if possible.

pixeltable/catalog/globals.py CHANGED Viewed

@@ -19,6 +19,14 @@ class UpdateStatus:
     updated_cols: List[str] = dataclasses.field(default_factory=list)
     cols_with_excs: List[str] = dataclasses.field(default_factory=list)
+    def __iadd__(self, other: 'UpdateStatus') -> 'UpdateStatus':
+        self.num_rows += other.num_rows
+        self.num_computed_values += other.num_computed_values
+        self.num_excs += other.num_excs
+        self.updated_cols = list(dict.fromkeys(self.updated_cols + other.updated_cols))
+        self.cols_with_excs = list(dict.fromkeys(self.cols_with_excs + other.cols_with_excs))
+        return self
 def is_valid_identifier(name: str) -> bool:
     return name.isidentifier() and not name.startswith('_')

pixeltable/catalog/table.py CHANGED Viewed

@@ -3,7 +3,7 @@ from __future__ import annotations
 import json
 import logging
 from pathlib import Path
-from typing import Union, Any, Optional, Callable, Set, Tuple, Iterable, overload, Type
+from typing import Union, Any, Optional, Callable, Set, Tuple, Iterable, overload, Type, Literal
 from uuid import UUID
 import abc
@@ -434,8 +434,8 @@ class Table(SchemaObject):
         for name, spec in schema.items():
             col_type: Optional[ts.ColumnType] = None
             value_expr: Optional[exprs.Expr] = None
-            stored: Optional[bool] = None
             primary_key: Optional[bool] = None
+            stored = True
             if isinstance(spec, ts.ColumnType):
                 # TODO: create copy
@@ -455,7 +455,7 @@ class Table(SchemaObject):
                 if value_expr is not None and isinstance(value_expr, exprs.Expr):
                     # create copy so we can modify it
                     value_expr = value_expr.copy()
-                stored = spec.get('stored')
+                stored = spec.get('stored', True)
                 primary_key = spec.get('primary_key')
             column = Column(
@@ -478,12 +478,10 @@ class Table(SchemaObject):
             raise excs.Error(f'Column name conflicts with a registered query: {col.name!r}')
         if col.stored is False and not (col.is_computed and col.col_type.is_image_type()):
             raise excs.Error(f'Column {col.name!r}: stored={col.stored} only applies to computed image columns')
-        if col.stored is False and not (col.col_type.is_image_type() and not col.has_window_fn_call()):
+        if col.stored is False and col.has_window_fn_call():
             raise excs.Error((
                 f'Column {col.name!r}: stored={col.stored} is not valid for image columns computed with a streaming '
                 f'function'))
-        if col.stored is None:
-            col.stored = not (col.is_computed and col.col_type.is_image_type() and not col.has_window_fn_call())
     @classmethod
     def _verify_schema(cls, schema: list[Column]) -> None:
@@ -745,18 +743,34 @@ class Table(SchemaObject):
         self._check_is_dropped()
         return self._tbl_version.update(value_spec, where, cascade)
-    def batch_update(self, rows: Iterable[dict[str, Any]], cascade: bool = True) -> UpdateStatus:
+    def batch_update(
+            self, rows: Iterable[dict[str, Any]], cascade: bool = True,
+            if_not_exists: Literal['error', 'ignore', 'insert'] = 'error'
+    ) -> UpdateStatus:
         """Update rows in this table.
         Args:
             rows: an Iterable of dictionaries containing values for the updated columns plus values for the primary key
                   columns.
             cascade: if True, also update all computed columns that transitively depend on the updated columns.
+            if_not_exists: Specifies the behavior if a row to update does not exist:
+                - `'error'`: Raise an error.
+                - `'ignore'`: Skip the row silently.
+                - `'insert'`: Insert the row.
         Examples:
-            Update the 'name' and 'age' columns for the rows with ids 1 and 2 (assuming 'id' is the primary key):
+            Update the `name` and `age` columns for the rows with ids 1 and 2 (assuming `id` is the primary key).
+            If either row does not exist, this raises an error:
             >>> tbl.update([{'id': 1, 'name': 'Alice', 'age': 30}, {'id': 2, 'name': 'Bob', 'age': 40}])
+            Update the `name` and `age` columns for the row with `id` 1 (assuming `id` is the primary key) and insert
+            the row with new `id` 3 (assuming this key does not exist):
+            >>> tbl.update(
+                [{'id': 1, 'name': 'Alice', 'age': 30}, {'id': 3, 'name': 'Bob', 'age': 40}],
+                if_not_exists='insert')
         """
         if self._tbl_version_path.is_snapshot():
             raise excs.Error('Cannot update a snapshot')
@@ -784,7 +798,9 @@ class Table(SchemaObject):
                     missing_cols = pk_col_names - set(col.name for col in col_vals.keys())
                     raise excs.Error(f'Primary key columns ({", ".join(missing_cols)}) missing in {row_spec}')
             row_updates.append(col_vals)
-        return self._tbl_version.batch_update(row_updates, rowids, cascade)
+        return self._tbl_version.batch_update(
+            row_updates, rowids, error_if_not_exists=if_not_exists == 'error',
+            insert_if_not_exists=if_not_exists == 'insert', cascade=cascade)
     def delete(self, where: Optional['pixeltable.exprs.Expr'] = None) -> UpdateStatus:
         """Delete rows in this table.

pixeltable/catalog/table_version.py CHANGED Viewed

@@ -702,10 +702,18 @@ class TableVersion:
                 raise excs.Error(f'Filter {analysis_info.filter} not expressible in SQL')
         with Env.get().engine.begin() as conn:
-            return self._update(conn, update_spec, where, cascade)
+            plan, updated_cols, recomputed_cols = (
+                Planner.create_update_plan(self.path, update_spec, [], where, cascade)
+            )
+            result = self.propagate_update(
+                plan, where.sql_expr() if where is not None else None, recomputed_cols,
+                base_versions=[], conn=conn, timestamp=time.time(), cascade=cascade, show_progress=True)
+            result.updated_cols = updated_cols
+            return result
     def batch_update(
-            self, batch: list[dict[Column, 'exprs.Expr']], rowids: list[tuple[int, ...]], cascade: bool = True
+            self, batch: list[dict[Column, 'exprs.Expr']], rowids: list[tuple[int, ...]], insert_if_not_exists: bool,
+            error_if_not_exists: bool, cascade: bool = True,
     ) -> UpdateStatus:
         """Update rows in batch.
         Args:
@@ -714,62 +722,26 @@ class TableVersion:
         """
         # if we do lookups of rowids, we must have one for each row in the batch
         assert len(rowids) == 0 or len(rowids) == len(batch)
-        result_status = UpdateStatus()
         cols_with_excs: set[str] = set()
-        updated_cols: set[str] = set()
-        pk_cols = self.primary_key_columns()
-        use_rowids = len(rowids) > 0
         with Env.get().engine.begin() as conn:
-            for i, row in enumerate(batch):
-                where_clause: Optional[exprs.Expr] = None
-                if use_rowids:
-                    # construct Where clause to match rowid
-                    num_rowid_cols = len(self.store_tbl.rowid_columns())
-                    for col_idx in range(num_rowid_cols):
-                        assert len(rowids[i]) == num_rowid_cols, f'len({rowids[i]}) != {num_rowid_cols}'
-                        clause = exprs.RowidRef(self, col_idx) == rowids[i][col_idx]
-                        if where_clause is None:
-                            where_clause = clause
-                        else:
-                            where_clause = where_clause & clause
-                else:
-                    # construct Where clause for primary key columns
-                    for col in pk_cols:
-                        assert col in row
-                        clause = exprs.ColumnRef(col) == row[col]
-                        if where_clause is None:
-                            where_clause = clause
-                        else:
-                            where_clause = where_clause & clause
-                update_targets = {col: row[col] for col in row if col not in pk_cols}
-                status = self._update(conn, update_targets, where_clause, cascade, show_progress=False)
-                result_status.num_rows += status.num_rows
-                result_status.num_excs += status.num_excs
-                result_status.num_computed_values += status.num_computed_values
-                cols_with_excs.update(status.cols_with_excs)
-                updated_cols.update(status.updated_cols)
-            result_status.cols_with_excs = list(cols_with_excs)
-            result_status.updated_cols = list(updated_cols)
-            return result_status
-    def _update(
-            self, conn: sql.engine.Connection, update_targets: dict[Column, 'pixeltable.exprs.Expr'],
-            where_clause: Optional['pixeltable.exprs.Expr'] = None, cascade: bool = True,
-            show_progress: bool = True
-    ) -> UpdateStatus:
-        from pixeltable.plan import Planner
+            from pixeltable.plan import Planner
-        plan, updated_cols, recomputed_cols = (
-            Planner.create_update_plan(self.path, update_targets, [], where_clause, cascade)
-        )
-        result = self.propagate_update(
-            plan, where_clause.sql_expr() if where_clause is not None else None, recomputed_cols,
-            base_versions=[], conn=conn, timestamp=time.time(), cascade=cascade, show_progress=show_progress)
-        result.updated_cols = updated_cols
-        return result
+            plan, row_update_node, delete_where_clause, updated_cols, recomputed_cols = \
+                Planner.create_batch_update_plan(self.path, batch, rowids, cascade=cascade)
+            result = self.propagate_update(
+                plan, delete_where_clause, recomputed_cols, base_versions=[], conn=conn, timestamp=time.time(),
+                cascade=cascade)
+            result.updated_cols = [c.qualified_name for c in updated_cols]
+            unmatched_rows = row_update_node.unmatched_rows()
+            if len(unmatched_rows) > 0:
+                if error_if_not_exists:
+                    raise excs.Error(f'batch_update(): {len(unmatched_rows)} row(s) not found')
+                if insert_if_not_exists:
+                    insert_status = self.insert(unmatched_rows, print_stats=False, fail_on_exception=False)
+                    result += insert_status
+            return result
     def _validate_update_spec(
             self, value_spec: dict[str, Any], allow_pk: bool, allow_exprs: bool
@@ -779,7 +751,10 @@ class TableVersion:
             if not isinstance(col_name, str):
                 raise excs.Error(f'Update specification: dict key must be column name, got {col_name!r}')
             if col_name == _ROWID_COLUMN_NAME:
-                # ignore pseudo-column _rowid
+                # a valid rowid is a list of ints, one per rowid column
+                assert len(val) == len(self.store_tbl.rowid_columns())
+                for el in val:
+                    assert isinstance(el, int)
                 continue
             col = self.path.get_column(col_name, include_bases=False)
             if col is None:

pixeltable/catalog/view.py CHANGED Viewed

@@ -92,7 +92,7 @@ class View(Table):
                 ]
                 sig = func.Signature(InvalidType(), params)
                 from pixeltable.exprs import FunctionCall
-                FunctionCall.check_args(sig, bound_args)
+                FunctionCall.normalize_args(sig, bound_args)
             except TypeError as e:
                 raise Error(f'Cannot instantiate iterator with given arguments: {e}')

pixeltable/env.py CHANGED Viewed

@@ -16,7 +16,7 @@ from dataclasses import dataclass
 from pathlib import Path
 from typing import Callable, Optional, Dict, Any, List, TYPE_CHECKING
-import pgserver
+import pixeltable_pgserver
 import sqlalchemy as sql
 import yaml
 from tqdm import TqdmWarning
@@ -60,7 +60,7 @@ class Env:
         self._sa_engine: Optional[sql.engine.base.Engine] = None
         self._pgdata_dir: Optional[Path] = None
         self._db_name: Optional[str] = None
-        self._db_server: Optional[pgserver.PostgresServer] = None
+        self._db_server: Optional[pixeltable_pgserver.PostgresServer] = None
         self._db_url: Optional[str] = None
         # info about installed packages that are utilized by some parts of the code;
@@ -266,8 +266,8 @@ class Env:
         self._db_name = os.environ.get('PIXELTABLE_DB', 'pixeltable')
         self._pgdata_dir = Path(os.environ.get('PIXELTABLE_PGDATA', str(self._home / 'pgdata')))
-        # in pgserver.get_server(): cleanup_mode=None will leave db on for debugging purposes
-        self._db_server = pgserver.get_server(self._pgdata_dir, cleanup_mode=None)
+        # in pixeltable_pgserver.get_server(): cleanup_mode=None will leave db on for debugging purposes
+        self._db_server = pixeltable_pgserver.get_server(self._pgdata_dir, cleanup_mode=None)
         self._db_url = self._db_server.get_uri(database=self._db_name)
         if reinit_db:

pixeltable/exec/__init__.py CHANGED Viewed

@@ -5,6 +5,7 @@ from .exec_context import ExecContext
 from .exec_node import ExecNode
 from .expr_eval_node import ExprEvalNode
 from .in_memory_data_node import InMemoryDataNode
-from .sql_scan_node import SqlScanNode
+from .sql_node import SqlScanNode, SqlLookupNode
+from .row_update_node import RowUpdateNode
 from .media_validation_node import MediaValidationNode
 from .data_row_batch import DataRowBatch

pixeltable/exec/row_update_node.py ADDED Viewed

@@ -0,0 +1,61 @@
+import logging
+from typing import Any
+import pixeltable.catalog as catalog
+import pixeltable.exprs as exprs
+from pixeltable.utils.media_store import MediaStore
+from .data_row_batch import DataRowBatch
+from .exec_node import ExecNode
+_logger = logging.getLogger('pixeltable')
+class RowUpdateNode(ExecNode):
+    """
+    Update individual rows in the input batches, identified by key columns.
+    The updates for a row are provided as a dict of column names to new values.
+    The node assumes that all update dicts contain the same keys, and it populates the slots of the columns present in
+    the update list.
+    """
+    def __init__(
+            self, tbl: catalog.TableVersionPath, key_vals_batch: list[tuple], is_rowid_key: bool,
+            col_vals_batch: list[dict[catalog.Column, Any]], row_builder: exprs.RowBuilder, input: ExecNode,
+    ):
+        super().__init__(row_builder, [], [], input)
+        self.updates = {key_vals: col_vals for key_vals, col_vals in zip(key_vals_batch, col_vals_batch)}
+        self.is_rowid_key = is_rowid_key
+        # determine slot idxs of all columns we need to read or write
+        # retrieve ColumnRefs from the RowBuilder (has slot_idx set)
+        all_col_slot_idxs = {
+            col_ref.col: col_ref.slot_idx
+            for col_ref in row_builder.unique_exprs if isinstance(col_ref, exprs.ColumnRef)
+        }
+        self.col_slot_idxs = {col: all_col_slot_idxs[col] for col in col_vals_batch[0].keys()}
+        self.key_slot_idxs = {col: all_col_slot_idxs[col] for col in tbl.tbl_version.primary_key_columns()}
+        self.matched_key_vals: set[tuple] = set()
+    def __next__(self) -> DataRowBatch:
+        batch = next(self.input)
+        for row in batch:
+            key_vals = row.rowid if self.is_rowid_key else \
+                tuple(row[slot_idx] for slot_idx in self.key_slot_idxs.values())
+            if key_vals not in self.updates:
+                continue
+            self.matched_key_vals.add(key_vals)
+            col_vals = self.updates[key_vals]
+            for col, val in col_vals.items():
+                slot_idx = self.col_slot_idxs[col]
+                row[slot_idx] = val
+        return batch
+    def unmatched_rows(self) -> list[dict[str, Any]]:
+        """Return rows that didn't get used in the updates as a list of dicts compatible with TableVersion.insert()."""
+        result: list[dict[str, Any]] = []
+        key_cols = self.key_slot_idxs.keys()
+        for key_vals, col_vals in self.updates.items():
+            if key_vals in self.matched_key_vals:
+                continue
+            row = {col.name: val for col, val in zip(key_cols, key_vals)}
+            row.update({col.name: val for col, val in col_vals.items()})
+            result.append(row)
+        return result

pixeltable/exec/{sql_scan_node.py → sql_node.py} RENAMED Viewed

@@ -13,30 +13,23 @@ import pixeltable.catalog as catalog
 _logger = logging.getLogger('pixeltable')
-class SqlScanNode(ExecNode):
-    """Materializes data from the store via SQL
-    """
+class SqlNode(ExecNode):
+    """Materializes data from the store via a Select stmt."""
     def __init__(
             self, tbl: catalog.TableVersionPath, row_builder: exprs.RowBuilder,
-            select_list: Iterable[exprs.Expr],
-            where_clause: Optional[exprs.Expr] = None, filter: Optional[exprs.Expr] = None,
-            order_by_items: Optional[List[Tuple[exprs.Expr, bool]]] = None,
-            limit: int = 0, set_pk: bool = False, exact_version_only: Optional[List[catalog.TableVersion]] = None
+            select_list: Iterable[exprs.Expr], set_pk: bool = False
     ):
         """
+        Initialize self.stmt with expressions derived from select_list.
+        This only provides the select list. The subclass is responsible for the From clause and any additional clauses.
         Args:
             select_list: output of the query
-            sql_where_clause: SQL Where clause
-            filter: additional Where-clause predicate that can't be evaluated via SQL
-            limit: max number of rows to return: 0 = no limit
             set_pk: if True, sets the primary for each DataRow
-            exact_version_only: tables for which we only want to see rows created at the current version
         """
         # create Select stmt
-        if order_by_items is None:
-            order_by_items = []
-        if exact_version_only is None:
-            exact_version_only = []
         self.tbl = tbl
         target = tbl.tbl_version  # the stored table we're scanning
         self.sql_exprs = exprs.ExprSet(select_list)
@@ -45,21 +38,15 @@ class SqlScanNode(ExecNode):
             sql_subexprs = iter_arg.subexprs(filter=lambda e: e.sql_expr() is not None, traverse_matches=False)
             [self.sql_exprs.append(e) for e in sql_subexprs]
         super().__init__(row_builder, self.sql_exprs, [], None)  # we materialize self.sql_exprs
-        self.filter = filter
-        self.filter_eval_ctx = \
-            row_builder.create_eval_ctx([filter], exclude=select_list) if filter is not None else None
-        self.limit = limit
         # change rowid refs against a base table to rowid refs against the target table, so that we minimize
         # the number of tables that need to be joined to the target table
         for rowid_ref in [e for e in self.sql_exprs if isinstance(e, exprs.RowidRef)]:
             rowid_ref.set_tbl(tbl)
-        where_clause_tbl_ids = where_clause.tbl_ids() if where_clause is not None else set()
-        refd_tbl_ids = exprs.Expr.list_tbl_ids(self.sql_exprs) | where_clause_tbl_ids
         sql_select_list = [e.sql_expr() for e in self.sql_exprs]
         assert len(sql_select_list) == len(self.sql_exprs)
-        assert all([e is not None for e in sql_select_list])
+        assert all(e is not None for e in sql_select_list)
         self.set_pk = set_pk
         self.num_pk_cols = 0
         if set_pk:
@@ -69,42 +56,12 @@ class SqlScanNode(ExecNode):
             sql_select_list += pk_columns
         self.stmt = sql.select(*sql_select_list)
-        self.stmt = self.create_from_clause(
-            tbl, self.stmt, refd_tbl_ids, exact_version_only={t.id for t in exact_version_only})
-        # change rowid refs against a base table to rowid refs against the target table, so that we minimize
-        # the number of tables that need to be joined to the target table
-        for rowid_ref in [e for e, _ in order_by_items if isinstance(e, exprs.RowidRef)]:
-            rowid_ref.set_tbl(tbl)
-        order_by_clause: List[sql.ClauseElement] = []
-        for e, asc in order_by_items:
-            if isinstance(e, exprs.SimilarityExpr):
-                order_by_clause.append(e.as_order_by_clause(asc))
-            else:
-                order_by_clause.append(e.sql_expr().desc() if not asc else e.sql_expr())
-        if where_clause is not None:
-            sql_where_clause = where_clause.sql_expr()
-            assert sql_where_clause is not None
-            self.stmt = self.stmt.where(sql_where_clause)
-        if len(order_by_clause) > 0:
-            self.stmt = self.stmt.order_by(*order_by_clause)
-        elif target.id in row_builder.unstored_iter_args:
-            # we are referencing unstored iter columns from this view and try to order by our primary key,
-            # which ensures that iterators will see monotonically increasing pos values
-            self.stmt = self.stmt.order_by(*self.tbl.store_tbl.rowid_columns())
-        if limit != 0 and self.filter is None:
-            # if we need to do post-SQL filtering, we can't use LIMIT
-            self.stmt = self.stmt.limit(limit)
+        # additional state
         self.result_cursor: Optional[sql.engine.CursorResult] = None
-        try:
-            # log stmt, if possible
-            stmt_str = str(self.stmt.compile(compile_kwargs={'literal_binds': True}))
-            _logger.debug(f'SqlScanNode stmt:\n{stmt_str}')
-        except Exception as e:
-            pass
+        # the filter is provided by the subclass
+        self.filter: Optional[exprs.Expr] = None
+        self.filter_eval_ctx: Optional[exprs.EvalContext] = None
     @classmethod
     def create_from_clause(
@@ -224,3 +181,110 @@ class SqlScanNode(ExecNode):
         if self.result_cursor is not None:
             self.result_cursor.close()
+class SqlScanNode(SqlNode):
+    """
+    Materializes data from the store via a Select stmt.
+    Supports filtering and ordering.
+    """
+    def __init__(
+            self, tbl: catalog.TableVersionPath, row_builder: exprs.RowBuilder,
+            select_list: Iterable[exprs.Expr],
+            where_clause: Optional[exprs.Expr] = None, filter: Optional[exprs.Expr] = None,
+            order_by_items: Optional[List[Tuple[exprs.Expr, bool]]] = None,
+            limit: int = 0, set_pk: bool = False, exact_version_only: Optional[List[catalog.TableVersion]] = None
+    ):
+        """
+        Args:
+            select_list: output of the query
+            sql_where_clause: SQL Where clause
+            filter: additional Where-clause predicate that can't be evaluated via SQL
+            limit: max number of rows to return: 0 = no limit
+            set_pk: if True, sets the primary for each DataRow
+            exact_version_only: tables for which we only want to see rows created at the current version
+        """
+        super().__init__(tbl, row_builder, select_list, set_pk=set_pk)
+        # create Select stmt
+        if order_by_items is None:
+            order_by_items = []
+        if exact_version_only is None:
+            exact_version_only = []
+        target = tbl.tbl_version  # the stored table we're scanning
+        self.filter = filter
+        self.filter_eval_ctx = \
+            row_builder.create_eval_ctx([filter], exclude=select_list) if filter is not None else None
+        self.limit = limit
+        where_clause_tbl_ids = where_clause.tbl_ids() if where_clause is not None else set()
+        refd_tbl_ids = exprs.Expr.list_tbl_ids(self.sql_exprs) | where_clause_tbl_ids
+        self.stmt = self.create_from_clause(
+            tbl, self.stmt, refd_tbl_ids, exact_version_only={t.id for t in exact_version_only})
+        # change rowid refs against a base table to rowid refs against the target table, so that we minimize
+        # the number of tables that need to be joined to the target table
+        for rowid_ref in [e for e, _ in order_by_items if isinstance(e, exprs.RowidRef)]:
+            rowid_ref.set_tbl(tbl)
+        order_by_clause: List[sql.ClauseElement] = []
+        for e, asc in order_by_items:
+            if isinstance(e, exprs.SimilarityExpr):
+                order_by_clause.append(e.as_order_by_clause(asc))
+            else:
+                order_by_clause.append(e.sql_expr().desc() if not asc else e.sql_expr())
+        if where_clause is not None:
+            sql_where_clause = where_clause.sql_expr()
+            assert sql_where_clause is not None
+            self.stmt = self.stmt.where(sql_where_clause)
+        if len(order_by_clause) > 0:
+            self.stmt = self.stmt.order_by(*order_by_clause)
+        elif target.id in row_builder.unstored_iter_args:
+            # we are referencing unstored iter columns from this view and try to order by our primary key,
+            # which ensures that iterators will see monotonically increasing pos values
+            self.stmt = self.stmt.order_by(*self.tbl.store_tbl.rowid_columns())
+        if limit != 0 and self.filter is None:
+            # if we need to do post-SQL filtering, we can't use LIMIT
+            self.stmt = self.stmt.limit(limit)
+        try:
+            # log stmt, if possible
+            stmt_str = str(self.stmt.compile(compile_kwargs={'literal_binds': True}))
+            _logger.debug(f'SqlScanNode stmt:\n{stmt_str}')
+        except Exception as e:
+            pass
+class SqlLookupNode(SqlNode):
+    """
+    Materializes data from the store via a Select stmt with a WHERE clause that matches a list of key values
+    """
+    def __init__(
+            self, tbl: catalog.TableVersionPath, row_builder: exprs.RowBuilder,
+            select_list: Iterable[exprs.Expr], sa_key_cols: list[sql.Column], key_vals: list[tuple],
+    ):
+        """
+        Args:
+            select_list: output of the query
+            sa_key_cols: list of key columns in the store table
+            key_vals: list of key values to look up
+        """
+        super().__init__(tbl, row_builder, select_list, set_pk=True)
+        target = tbl.tbl_version  # the stored table we're scanning
+        refd_tbl_ids = exprs.Expr.list_tbl_ids(self.sql_exprs)
+        self.stmt = self.create_from_clause(tbl, self.stmt, refd_tbl_ids)
+        # Where clause: (key-col-1, key-col-2, ...) IN ((val-1, val-2, ...), ...)
+        self.where_clause = sql.tuple_(*sa_key_cols).in_(key_vals)
+        self.stmt = self.stmt.where(self.where_clause)
+        if target.id in row_builder.unstored_iter_args:
+            # we are referencing unstored iter columns from this view and try to order by our primary key,
+            # which ensures that iterators will see monotonically increasing pos values
+            self.stmt = self.stmt.order_by(*self.tbl.store_tbl.rowid_columns())
+        try:
+            # log stmt, if possible
+            stmt_str = str(self.stmt.compile(compile_kwargs={'literal_binds': True}))
+            _logger.debug(f'SqlLookupNode stmt:\n{stmt_str}')
+        except Exception as e:
+            pass

pixeltable/exprs/__init__.py CHANGED Viewed

@@ -8,7 +8,6 @@ from .data_row import DataRow
 from .expr import Expr
 from .expr_set import ExprSet
 from .function_call import FunctionCall
-from .image_member_access import ImageMemberAccess
 from .in_predicate import InPredicate
 from .inline_array import InlineArray
 from .inline_dict import InlineDict
@@ -16,6 +15,7 @@ from .is_null import IsNull
 from .json_mapper import JsonMapper
 from .json_path import RELATIVE_PATH_ROOT, JsonPath
 from .literal import Literal
+from .method_ref import MethodRef
 from .object_ref import ObjectRef
 from .row_builder import RowBuilder, ColumnSlotIdx, ExecProfile
 from .rowid_ref import RowidRef

pixeltable 0.2.13__py3-none-any.whl → 0.2.15__py3-none-any.whl

Potentially problematic release.

pixeltable 0.2.13py3-none-any.whl → 0.2.15py3-none-any.whl