PyPI - pixeltable - Versions diffs - 0.2.13__py3-none-any.whl → 0.2.14__py3-none-any.whl - Mend

pixeltable 0.2.13py3-none-any.whl → 0.2.14py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.

This version of pixeltable might be problematic. Click here for more details.

Files changed (51) hide show

pixeltable/__init__.py +1 -1
pixeltable/__version__.py +2 -2
pixeltable/catalog/column.py +5 -0
pixeltable/catalog/globals.py +8 -0
pixeltable/catalog/table.py +22 -4
pixeltable/catalog/table_version.py +30 -55
pixeltable/catalog/view.py +1 -1
pixeltable/exec/__init__.py +2 -1
pixeltable/exec/row_update_node.py +61 -0
pixeltable/exec/{sql_scan_node.py → sql_node.py} +120 -56
pixeltable/exprs/__init__.py +1 -1
pixeltable/exprs/expr.py +35 -22
pixeltable/exprs/function_call.py +60 -29
pixeltable/exprs/globals.py +2 -0
pixeltable/exprs/inline_array.py +18 -11
pixeltable/exprs/method_ref.py +63 -0
pixeltable/ext/__init__.py +9 -0
pixeltable/ext/functions/__init__.py +8 -0
pixeltable/ext/functions/whisperx.py +45 -5
pixeltable/ext/functions/yolox.py +60 -14
pixeltable/func/callable_function.py +12 -4
pixeltable/func/expr_template_function.py +1 -1
pixeltable/func/function.py +12 -2
pixeltable/func/function_registry.py +24 -9
pixeltable/func/udf.py +32 -4
pixeltable/functions/__init__.py +1 -1
pixeltable/functions/fireworks.py +33 -0
pixeltable/functions/huggingface.py +96 -6
pixeltable/functions/image.py +226 -41
pixeltable/functions/openai.py +214 -0
pixeltable/functions/string.py +195 -218
pixeltable/functions/timestamp.py +210 -0
pixeltable/functions/together.py +106 -0
pixeltable/functions/video.py +2 -2
pixeltable/functions/whisper.py +32 -0
pixeltable/io/__init__.py +1 -1
pixeltable/io/globals.py +133 -1
pixeltable/io/pandas.py +52 -27
pixeltable/metadata/__init__.py +1 -1
pixeltable/metadata/converters/convert_18.py +39 -0
pixeltable/metadata/notes.py +10 -0
pixeltable/plan.py +76 -1
pixeltable/tool/create_test_db_dump.py +3 -4
pixeltable/tool/doc_plugins/griffe.py +4 -0
pixeltable/type_system.py +15 -14
{pixeltable-0.2.13.dist-info → pixeltable-0.2.14.dist-info}/METADATA +1 -1
{pixeltable-0.2.13.dist-info → pixeltable-0.2.14.dist-info}/RECORD +50 -45
pixeltable/exprs/image_member_access.py +0 -96
{pixeltable-0.2.13.dist-info → pixeltable-0.2.14.dist-info}/LICENSE +0 -0
{pixeltable-0.2.13.dist-info → pixeltable-0.2.14.dist-info}/WHEEL +0 -0
{pixeltable-0.2.13.dist-info → pixeltable-0.2.14.dist-info}/entry_points.txt +0 -0

pixeltable/__init__.py CHANGED Viewed

@@ -21,7 +21,7 @@ from .type_system import (
 )
 from .utils.help import help
-from . import functions, io, iterators
+from . import ext, functions, io, iterators
 from .__version__ import __version__, __version_tuple__
 # This is the safest / most maintainable way to do this: start with the default and "blacklist" stuff that

pixeltable/__version__.py CHANGED Viewed

@@ -1,3 +1,3 @@
 # These version placeholders will be replaced during build.
-__version__ = "0.2.13"
-__version_tuple__ = (0, 2, 13)
+__version__ = "0.2.14"
+__version_tuple__ = (0, 2, 14)

pixeltable/catalog/column.py CHANGED Viewed

@@ -152,6 +152,11 @@ class Column:
             return self._records_errors
         return self.is_stored and (self.is_computed or self.col_type.is_media_type())
+    @property
+    def qualified_name(self) -> str:
+        assert self.tbl is not None
+        return f'{self.tbl.name}.{self.name}'
     def source(self) -> None:
         """
         If this is a computed col and the top-level expr is a function call, print the source, if possible.

pixeltable/catalog/globals.py CHANGED Viewed

@@ -19,6 +19,14 @@ class UpdateStatus:
     updated_cols: List[str] = dataclasses.field(default_factory=list)
     cols_with_excs: List[str] = dataclasses.field(default_factory=list)
+    def __iadd__(self, other: 'UpdateStatus') -> 'UpdateStatus':
+        self.num_rows += other.num_rows
+        self.num_computed_values += other.num_computed_values
+        self.num_excs += other.num_excs
+        self.updated_cols = list(dict.fromkeys(self.updated_cols + other.updated_cols))
+        self.cols_with_excs = list(dict.fromkeys(self.cols_with_excs + other.cols_with_excs))
+        return self
 def is_valid_identifier(name: str) -> bool:
     return name.isidentifier() and not name.startswith('_')

pixeltable/catalog/table.py CHANGED Viewed

@@ -3,7 +3,7 @@ from __future__ import annotations
 import json
 import logging
 from pathlib import Path
-from typing import Union, Any, Optional, Callable, Set, Tuple, Iterable, overload, Type
+from typing import Union, Any, Optional, Callable, Set, Tuple, Iterable, overload, Type, Literal
 from uuid import UUID
 import abc
@@ -745,18 +745,34 @@ class Table(SchemaObject):
         self._check_is_dropped()
         return self._tbl_version.update(value_spec, where, cascade)
-    def batch_update(self, rows: Iterable[dict[str, Any]], cascade: bool = True) -> UpdateStatus:
+    def batch_update(
+            self, rows: Iterable[dict[str, Any]], cascade: bool = True,
+            if_not_exists: Literal['error', 'ignore', 'insert'] = 'error'
+    ) -> UpdateStatus:
         """Update rows in this table.
         Args:
             rows: an Iterable of dictionaries containing values for the updated columns plus values for the primary key
                   columns.
             cascade: if True, also update all computed columns that transitively depend on the updated columns.
+            if_not_exists: Specifies the behavior if a row to update does not exist:
+                - `'error'`: Raise an error.
+                - `'ignore'`: Skip the row silently.
+                - `'insert'`: Insert the row.
         Examples:
-            Update the 'name' and 'age' columns for the rows with ids 1 and 2 (assuming 'id' is the primary key):
+            Update the `name` and `age` columns for the rows with ids 1 and 2 (assuming `id` is the primary key).
+            If either row does not exist, this raises an error:
             >>> tbl.update([{'id': 1, 'name': 'Alice', 'age': 30}, {'id': 2, 'name': 'Bob', 'age': 40}])
+            Update the `name` and `age` columns for the row with `id` 1 (assuming `id` is the primary key) and insert
+            the row with new `id` 3 (assuming this key does not exist):
+            >>> tbl.update(
+                [{'id': 1, 'name': 'Alice', 'age': 30}, {'id': 3, 'name': 'Bob', 'age': 40}],
+                if_not_exists='insert')
         """
         if self._tbl_version_path.is_snapshot():
             raise excs.Error('Cannot update a snapshot')
@@ -784,7 +800,9 @@ class Table(SchemaObject):
                     missing_cols = pk_col_names - set(col.name for col in col_vals.keys())
                     raise excs.Error(f'Primary key columns ({", ".join(missing_cols)}) missing in {row_spec}')
             row_updates.append(col_vals)
-        return self._tbl_version.batch_update(row_updates, rowids, cascade)
+        return self._tbl_version.batch_update(
+            row_updates, rowids, error_if_not_exists=if_not_exists == 'error',
+            insert_if_not_exists=if_not_exists == 'insert', cascade=cascade)
     def delete(self, where: Optional['pixeltable.exprs.Expr'] = None) -> UpdateStatus:
         """Delete rows in this table.

pixeltable/catalog/table_version.py CHANGED Viewed

@@ -702,10 +702,18 @@ class TableVersion:
                 raise excs.Error(f'Filter {analysis_info.filter} not expressible in SQL')
         with Env.get().engine.begin() as conn:
-            return self._update(conn, update_spec, where, cascade)
+            plan, updated_cols, recomputed_cols = (
+                Planner.create_update_plan(self.path, update_spec, [], where, cascade)
+            )
+            result = self.propagate_update(
+                plan, where.sql_expr() if where is not None else None, recomputed_cols,
+                base_versions=[], conn=conn, timestamp=time.time(), cascade=cascade, show_progress=True)
+            result.updated_cols = updated_cols
+            return result
     def batch_update(
-            self, batch: list[dict[Column, 'exprs.Expr']], rowids: list[tuple[int, ...]], cascade: bool = True
+            self, batch: list[dict[Column, 'exprs.Expr']], rowids: list[tuple[int, ...]], insert_if_not_exists: bool,
+            error_if_not_exists: bool, cascade: bool = True,
     ) -> UpdateStatus:
         """Update rows in batch.
         Args:
@@ -714,62 +722,26 @@ class TableVersion:
         """
         # if we do lookups of rowids, we must have one for each row in the batch
         assert len(rowids) == 0 or len(rowids) == len(batch)
-        result_status = UpdateStatus()
         cols_with_excs: set[str] = set()
-        updated_cols: set[str] = set()
-        pk_cols = self.primary_key_columns()
-        use_rowids = len(rowids) > 0
         with Env.get().engine.begin() as conn:
-            for i, row in enumerate(batch):
-                where_clause: Optional[exprs.Expr] = None
-                if use_rowids:
-                    # construct Where clause to match rowid
-                    num_rowid_cols = len(self.store_tbl.rowid_columns())
-                    for col_idx in range(num_rowid_cols):
-                        assert len(rowids[i]) == num_rowid_cols, f'len({rowids[i]}) != {num_rowid_cols}'
-                        clause = exprs.RowidRef(self, col_idx) == rowids[i][col_idx]
-                        if where_clause is None:
-                            where_clause = clause
-                        else:
-                            where_clause = where_clause & clause
-                else:
-                    # construct Where clause for primary key columns
-                    for col in pk_cols:
-                        assert col in row
-                        clause = exprs.ColumnRef(col) == row[col]
-                        if where_clause is None:
-                            where_clause = clause
-                        else:
-                            where_clause = where_clause & clause
-                update_targets = {col: row[col] for col in row if col not in pk_cols}
-                status = self._update(conn, update_targets, where_clause, cascade, show_progress=False)
-                result_status.num_rows += status.num_rows
-                result_status.num_excs += status.num_excs
-                result_status.num_computed_values += status.num_computed_values
-                cols_with_excs.update(status.cols_with_excs)
-                updated_cols.update(status.updated_cols)
-            result_status.cols_with_excs = list(cols_with_excs)
-            result_status.updated_cols = list(updated_cols)
-            return result_status
-    def _update(
-            self, conn: sql.engine.Connection, update_targets: dict[Column, 'pixeltable.exprs.Expr'],
-            where_clause: Optional['pixeltable.exprs.Expr'] = None, cascade: bool = True,
-            show_progress: bool = True
-    ) -> UpdateStatus:
-        from pixeltable.plan import Planner
+            from pixeltable.plan import Planner
-        plan, updated_cols, recomputed_cols = (
-            Planner.create_update_plan(self.path, update_targets, [], where_clause, cascade)
-        )
-        result = self.propagate_update(
-            plan, where_clause.sql_expr() if where_clause is not None else None, recomputed_cols,
-            base_versions=[], conn=conn, timestamp=time.time(), cascade=cascade, show_progress=show_progress)
-        result.updated_cols = updated_cols
-        return result
+            plan, row_update_node, delete_where_clause, updated_cols, recomputed_cols = \
+                Planner.create_batch_update_plan(self.path, batch, rowids, cascade=cascade)
+            result = self.propagate_update(
+                plan, delete_where_clause, recomputed_cols, base_versions=[], conn=conn, timestamp=time.time(),
+                cascade=cascade)
+            result.updated_cols = [c.qualified_name for c in updated_cols]
+            unmatched_rows = row_update_node.unmatched_rows()
+            if len(unmatched_rows) > 0:
+                if error_if_not_exists:
+                    raise excs.Error(f'batch_update(): {len(unmatched_rows)} row(s) not found')
+                if insert_if_not_exists:
+                    insert_status = self.insert(unmatched_rows, print_stats=False, fail_on_exception=False)
+                    result += insert_status
+            return result
     def _validate_update_spec(
             self, value_spec: dict[str, Any], allow_pk: bool, allow_exprs: bool
@@ -779,7 +751,10 @@ class TableVersion:
             if not isinstance(col_name, str):
                 raise excs.Error(f'Update specification: dict key must be column name, got {col_name!r}')
             if col_name == _ROWID_COLUMN_NAME:
-                # ignore pseudo-column _rowid
+                # a valid rowid is a list of ints, one per rowid column
+                assert len(val) == len(self.store_tbl.rowid_columns())
+                for el in val:
+                    assert isinstance(el, int)
                 continue
             col = self.path.get_column(col_name, include_bases=False)
             if col is None:

pixeltable/catalog/view.py CHANGED Viewed

@@ -92,7 +92,7 @@ class View(Table):
                 ]
                 sig = func.Signature(InvalidType(), params)
                 from pixeltable.exprs import FunctionCall
-                FunctionCall.check_args(sig, bound_args)
+                FunctionCall.normalize_args(sig, bound_args)
             except TypeError as e:
                 raise Error(f'Cannot instantiate iterator with given arguments: {e}')

pixeltable/exec/__init__.py CHANGED Viewed

@@ -5,6 +5,7 @@ from .exec_context import ExecContext
 from .exec_node import ExecNode
 from .expr_eval_node import ExprEvalNode
 from .in_memory_data_node import InMemoryDataNode
-from .sql_scan_node import SqlScanNode
+from .sql_node import SqlScanNode, SqlLookupNode
+from .row_update_node import RowUpdateNode
 from .media_validation_node import MediaValidationNode
 from .data_row_batch import DataRowBatch

pixeltable/exec/row_update_node.py ADDED Viewed

@@ -0,0 +1,61 @@
+import logging
+from typing import Any
+import pixeltable.catalog as catalog
+import pixeltable.exprs as exprs
+from pixeltable.utils.media_store import MediaStore
+from .data_row_batch import DataRowBatch
+from .exec_node import ExecNode
+_logger = logging.getLogger('pixeltable')
+class RowUpdateNode(ExecNode):
+    """
+    Update individual rows in the input batches, identified by key columns.
+    The updates for a row are provided as a dict of column names to new values.
+    The node assumes that all update dicts contain the same keys, and it populates the slots of the columns present in
+    the update list.
+    """
+    def __init__(
+            self, tbl: catalog.TableVersionPath, key_vals_batch: list[tuple], is_rowid_key: bool,
+            col_vals_batch: list[dict[catalog.Column, Any]], row_builder: exprs.RowBuilder, input: ExecNode,
+    ):
+        super().__init__(row_builder, [], [], input)
+        self.updates = {key_vals: col_vals for key_vals, col_vals in zip(key_vals_batch, col_vals_batch)}
+        self.is_rowid_key = is_rowid_key
+        # determine slot idxs of all columns we need to read or write
+        # retrieve ColumnRefs from the RowBuilder (has slot_idx set)
+        all_col_slot_idxs = {
+            col_ref.col: col_ref.slot_idx
+            for col_ref in row_builder.unique_exprs if isinstance(col_ref, exprs.ColumnRef)
+        }
+        self.col_slot_idxs = {col: all_col_slot_idxs[col] for col in col_vals_batch[0].keys()}
+        self.key_slot_idxs = {col: all_col_slot_idxs[col] for col in tbl.tbl_version.primary_key_columns()}
+        self.matched_key_vals: set[tuple] = set()
+    def __next__(self) -> DataRowBatch:
+        batch = next(self.input)
+        for row in batch:
+            key_vals = row.rowid if self.is_rowid_key else \
+                tuple(row[slot_idx] for slot_idx in self.key_slot_idxs.values())
+            if key_vals not in self.updates:
+                continue
+            self.matched_key_vals.add(key_vals)
+            col_vals = self.updates[key_vals]
+            for col, val in col_vals.items():
+                slot_idx = self.col_slot_idxs[col]
+                row[slot_idx] = val
+        return batch
+    def unmatched_rows(self) -> list[dict[str, Any]]:
+        """Return rows that didn't get used in the updates as a list of dicts compatible with TableVersion.insert()."""
+        result: list[dict[str, Any]] = []
+        key_cols = self.key_slot_idxs.keys()
+        for key_vals, col_vals in self.updates.items():
+            if key_vals in self.matched_key_vals:
+                continue
+            row = {col.name: val for col, val in zip(key_cols, key_vals)}
+            row.update({col.name: val for col, val in col_vals.items()})
+            result.append(row)
+        return result

pixeltable/exec/{sql_scan_node.py → sql_node.py} RENAMED Viewed

@@ -13,30 +13,23 @@ import pixeltable.catalog as catalog
 _logger = logging.getLogger('pixeltable')
-class SqlScanNode(ExecNode):
-    """Materializes data from the store via SQL
-    """
+class SqlNode(ExecNode):
+    """Materializes data from the store via a Select stmt."""
     def __init__(
             self, tbl: catalog.TableVersionPath, row_builder: exprs.RowBuilder,
-            select_list: Iterable[exprs.Expr],
-            where_clause: Optional[exprs.Expr] = None, filter: Optional[exprs.Expr] = None,
-            order_by_items: Optional[List[Tuple[exprs.Expr, bool]]] = None,
-            limit: int = 0, set_pk: bool = False, exact_version_only: Optional[List[catalog.TableVersion]] = None
+            select_list: Iterable[exprs.Expr], set_pk: bool = False
     ):
         """
+        Initialize self.stmt with expressions derived from select_list.
+        This only provides the select list. The subclass is responsible for the From clause and any additional clauses.
         Args:
             select_list: output of the query
-            sql_where_clause: SQL Where clause
-            filter: additional Where-clause predicate that can't be evaluated via SQL
-            limit: max number of rows to return: 0 = no limit
             set_pk: if True, sets the primary for each DataRow
-            exact_version_only: tables for which we only want to see rows created at the current version
         """
         # create Select stmt
-        if order_by_items is None:
-            order_by_items = []
-        if exact_version_only is None:
-            exact_version_only = []
         self.tbl = tbl
         target = tbl.tbl_version  # the stored table we're scanning
         self.sql_exprs = exprs.ExprSet(select_list)
@@ -45,21 +38,15 @@ class SqlScanNode(ExecNode):
             sql_subexprs = iter_arg.subexprs(filter=lambda e: e.sql_expr() is not None, traverse_matches=False)
             [self.sql_exprs.append(e) for e in sql_subexprs]
         super().__init__(row_builder, self.sql_exprs, [], None)  # we materialize self.sql_exprs
-        self.filter = filter
-        self.filter_eval_ctx = \
-            row_builder.create_eval_ctx([filter], exclude=select_list) if filter is not None else None
-        self.limit = limit
         # change rowid refs against a base table to rowid refs against the target table, so that we minimize
         # the number of tables that need to be joined to the target table
         for rowid_ref in [e for e in self.sql_exprs if isinstance(e, exprs.RowidRef)]:
             rowid_ref.set_tbl(tbl)
-        where_clause_tbl_ids = where_clause.tbl_ids() if where_clause is not None else set()
-        refd_tbl_ids = exprs.Expr.list_tbl_ids(self.sql_exprs) | where_clause_tbl_ids
         sql_select_list = [e.sql_expr() for e in self.sql_exprs]
         assert len(sql_select_list) == len(self.sql_exprs)
-        assert all([e is not None for e in sql_select_list])
+        assert all(e is not None for e in sql_select_list)
         self.set_pk = set_pk
         self.num_pk_cols = 0
         if set_pk:
@@ -69,42 +56,12 @@ class SqlScanNode(ExecNode):
             sql_select_list += pk_columns
         self.stmt = sql.select(*sql_select_list)
-        self.stmt = self.create_from_clause(
-            tbl, self.stmt, refd_tbl_ids, exact_version_only={t.id for t in exact_version_only})
-        # change rowid refs against a base table to rowid refs against the target table, so that we minimize
-        # the number of tables that need to be joined to the target table
-        for rowid_ref in [e for e, _ in order_by_items if isinstance(e, exprs.RowidRef)]:
-            rowid_ref.set_tbl(tbl)
-        order_by_clause: List[sql.ClauseElement] = []
-        for e, asc in order_by_items:
-            if isinstance(e, exprs.SimilarityExpr):
-                order_by_clause.append(e.as_order_by_clause(asc))
-            else:
-                order_by_clause.append(e.sql_expr().desc() if not asc else e.sql_expr())
-        if where_clause is not None:
-            sql_where_clause = where_clause.sql_expr()
-            assert sql_where_clause is not None
-            self.stmt = self.stmt.where(sql_where_clause)
-        if len(order_by_clause) > 0:
-            self.stmt = self.stmt.order_by(*order_by_clause)
-        elif target.id in row_builder.unstored_iter_args:
-            # we are referencing unstored iter columns from this view and try to order by our primary key,
-            # which ensures that iterators will see monotonically increasing pos values
-            self.stmt = self.stmt.order_by(*self.tbl.store_tbl.rowid_columns())
-        if limit != 0 and self.filter is None:
-            # if we need to do post-SQL filtering, we can't use LIMIT
-            self.stmt = self.stmt.limit(limit)
+        # additional state
         self.result_cursor: Optional[sql.engine.CursorResult] = None
-        try:
-            # log stmt, if possible
-            stmt_str = str(self.stmt.compile(compile_kwargs={'literal_binds': True}))
-            _logger.debug(f'SqlScanNode stmt:\n{stmt_str}')
-        except Exception as e:
-            pass
+        # the filter is provided by the subclass
+        self.filter: Optional[exprs.Expr] = None
+        self.filter_eval_ctx: Optional[exprs.EvalContext] = None
     @classmethod
     def create_from_clause(
@@ -224,3 +181,110 @@ class SqlScanNode(ExecNode):
         if self.result_cursor is not None:
             self.result_cursor.close()
+class SqlScanNode(SqlNode):
+    """
+    Materializes data from the store via a Select stmt.
+    Supports filtering and ordering.
+    """
+    def __init__(
+            self, tbl: catalog.TableVersionPath, row_builder: exprs.RowBuilder,
+            select_list: Iterable[exprs.Expr],
+            where_clause: Optional[exprs.Expr] = None, filter: Optional[exprs.Expr] = None,
+            order_by_items: Optional[List[Tuple[exprs.Expr, bool]]] = None,
+            limit: int = 0, set_pk: bool = False, exact_version_only: Optional[List[catalog.TableVersion]] = None
+    ):
+        """
+        Args:
+            select_list: output of the query
+            sql_where_clause: SQL Where clause
+            filter: additional Where-clause predicate that can't be evaluated via SQL
+            limit: max number of rows to return: 0 = no limit
+            set_pk: if True, sets the primary for each DataRow
+            exact_version_only: tables for which we only want to see rows created at the current version
+        """
+        super().__init__(tbl, row_builder, select_list, set_pk=set_pk)
+        # create Select stmt
+        if order_by_items is None:
+            order_by_items = []
+        if exact_version_only is None:
+            exact_version_only = []
+        target = tbl.tbl_version  # the stored table we're scanning
+        self.filter = filter
+        self.filter_eval_ctx = \
+            row_builder.create_eval_ctx([filter], exclude=select_list) if filter is not None else None
+        self.limit = limit
+        where_clause_tbl_ids = where_clause.tbl_ids() if where_clause is not None else set()
+        refd_tbl_ids = exprs.Expr.list_tbl_ids(self.sql_exprs) | where_clause_tbl_ids
+        self.stmt = self.create_from_clause(
+            tbl, self.stmt, refd_tbl_ids, exact_version_only={t.id for t in exact_version_only})
+        # change rowid refs against a base table to rowid refs against the target table, so that we minimize
+        # the number of tables that need to be joined to the target table
+        for rowid_ref in [e for e, _ in order_by_items if isinstance(e, exprs.RowidRef)]:
+            rowid_ref.set_tbl(tbl)
+        order_by_clause: List[sql.ClauseElement] = []
+        for e, asc in order_by_items:
+            if isinstance(e, exprs.SimilarityExpr):
+                order_by_clause.append(e.as_order_by_clause(asc))
+            else:
+                order_by_clause.append(e.sql_expr().desc() if not asc else e.sql_expr())
+        if where_clause is not None:
+            sql_where_clause = where_clause.sql_expr()
+            assert sql_where_clause is not None
+            self.stmt = self.stmt.where(sql_where_clause)
+        if len(order_by_clause) > 0:
+            self.stmt = self.stmt.order_by(*order_by_clause)
+        elif target.id in row_builder.unstored_iter_args:
+            # we are referencing unstored iter columns from this view and try to order by our primary key,
+            # which ensures that iterators will see monotonically increasing pos values
+            self.stmt = self.stmt.order_by(*self.tbl.store_tbl.rowid_columns())
+        if limit != 0 and self.filter is None:
+            # if we need to do post-SQL filtering, we can't use LIMIT
+            self.stmt = self.stmt.limit(limit)
+        try:
+            # log stmt, if possible
+            stmt_str = str(self.stmt.compile(compile_kwargs={'literal_binds': True}))
+            _logger.debug(f'SqlScanNode stmt:\n{stmt_str}')
+        except Exception as e:
+            pass
+class SqlLookupNode(SqlNode):
+    """
+    Materializes data from the store via a Select stmt with a WHERE clause that matches a list of key values
+    """
+    def __init__(
+            self, tbl: catalog.TableVersionPath, row_builder: exprs.RowBuilder,
+            select_list: Iterable[exprs.Expr], sa_key_cols: list[sql.Column], key_vals: list[tuple],
+    ):
+        """
+        Args:
+            select_list: output of the query
+            sa_key_cols: list of key columns in the store table
+            key_vals: list of key values to look up
+        """
+        super().__init__(tbl, row_builder, select_list, set_pk=True)
+        target = tbl.tbl_version  # the stored table we're scanning
+        refd_tbl_ids = exprs.Expr.list_tbl_ids(self.sql_exprs)
+        self.stmt = self.create_from_clause(tbl, self.stmt, refd_tbl_ids)
+        # Where clause: (key-col-1, key-col-2, ...) IN ((val-1, val-2, ...), ...)
+        self.where_clause = sql.tuple_(*sa_key_cols).in_(key_vals)
+        self.stmt = self.stmt.where(self.where_clause)
+        if target.id in row_builder.unstored_iter_args:
+            # we are referencing unstored iter columns from this view and try to order by our primary key,
+            # which ensures that iterators will see monotonically increasing pos values
+            self.stmt = self.stmt.order_by(*self.tbl.store_tbl.rowid_columns())
+        try:
+            # log stmt, if possible
+            stmt_str = str(self.stmt.compile(compile_kwargs={'literal_binds': True}))
+            _logger.debug(f'SqlLookupNode stmt:\n{stmt_str}')
+        except Exception as e:
+            pass

pixeltable/exprs/__init__.py CHANGED Viewed

@@ -8,7 +8,6 @@ from .data_row import DataRow
 from .expr import Expr
 from .expr_set import ExprSet
 from .function_call import FunctionCall
-from .image_member_access import ImageMemberAccess
 from .in_predicate import InPredicate
 from .inline_array import InlineArray
 from .inline_dict import InlineDict
@@ -16,6 +15,7 @@ from .is_null import IsNull
 from .json_mapper import JsonMapper
 from .json_path import RELATIVE_PATH_ROOT, JsonPath
 from .literal import Literal
+from .method_ref import MethodRef
 from .object_ref import ObjectRef
 from .row_builder import RowBuilder, ColumnSlotIdx, ExecProfile
 from .rowid_ref import RowidRef

pixeltable/exprs/expr.py CHANGED Viewed

@@ -7,7 +7,6 @@ import inspect
 import json
 import sys
 import typing
-from itertools import islice
 from typing import Union, Optional, List, Callable, Any, Dict, Tuple, Set, Generator, Type
 from uuid import UUID
@@ -16,8 +15,8 @@ import sqlalchemy as sql
 import pixeltable
 import pixeltable.catalog as catalog
 import pixeltable.exceptions as excs
-import pixeltable.type_system as ts
 import pixeltable.func as func
+import pixeltable.type_system as ts
 from .data_row import DataRow
 from .globals import ComparisonOperator, LogicalOperator, LiteralPythonTypes, ArithmeticOperator
@@ -91,8 +90,8 @@ class Expr(abc.ABC):
     def default_column_name(self) -> Optional[str]:
         """
-        Returns:
-            None if this expression lacks a default name,
+        Returns:
+            None if this expression lacks a default name,
             or a valid identifier (according to catalog.is_valid_identifer) otherwise.
         """
         return None
@@ -231,9 +230,8 @@ class Expr(abc.ABC):
             self.components[i] = self.components[i]._retarget(tbl_versions)
         return self
-    @abc.abstractmethod
     def __str__(self) -> str:
-        pass
+        return f'<Expression of type {type(self)}>'
     def display_str(self, inline: bool = True) -> str:
         """
@@ -264,7 +262,7 @@ class Expr(abc.ABC):
         if is_match:
             yield self
-    def contains(self, cls: Optional[Type[Expr]] = None, filter: Optional[Callable[[Expr], bool]] = None) -> bool:
+    def _contains(self, cls: Optional[Type[Expr]] = None, filter: Optional[Callable[[Expr], bool]] = None) -> bool:
         """
         Returns True if any subexpr is an instance of cls.
         """
@@ -319,17 +317,20 @@ class Expr(abc.ABC):
         """
         if isinstance(o, Expr):
             return o
-        # try to create a literal
+        # Try to create a literal. We need to check for InlineArray/InlineDict
+        # first, to prevent arrays from inappropriately being interpreted as JsonType
+        # literals.
+        # TODO: general cleanup of InlineArray/InlineDict
+        if isinstance(o, list):
+            from .inline_array import InlineArray
+            return InlineArray(tuple(o))
+        if isinstance(o, dict):
+            from .inline_dict import InlineDict
+            return InlineDict(o)
         obj_type = ts.ColumnType.infer_literal_type(o)
         if obj_type is not None:
             from .literal import Literal
             return Literal(o, col_type=obj_type)
-        if isinstance(o, dict):
-            from .inline_dict import InlineDict
-            return InlineDict(o)
-        elif isinstance(o, list):
-            from .inline_array import InlineArray
-            return InlineArray(tuple(o))
         return None
     @abc.abstractmethod
@@ -427,6 +428,14 @@ class Expr(abc.ABC):
         # Return a `FunctionCall` obtained by passing this `Expr` to the new `function`.
         return function(self)
+    def __dir__(self) -> list[str]:
+        attrs = ['isin', 'astype', 'apply']
+        attrs += [
+            f.name
+            for f in func.FunctionRegistry.get().get_type_methods(self.col_type.type_enum)
+        ]
+        return attrs
     def __getitem__(self, index: object) -> Expr:
         if self.col_type.is_json_type():
             from .json_path import JsonPath
@@ -434,19 +443,23 @@ class Expr(abc.ABC):
         if self.col_type.is_array_type():
             from .array_slice import ArraySlice
             return ArraySlice(self, index)
-        raise excs.Error(f'Type {self.col_type} is not subscriptable')
+        raise AttributeError(f'Type {self.col_type} is not subscriptable')
-    def __getattr__(self, name: str) -> Union['pixeltable.exprs.ImageMemberAccess', 'pixeltable.exprs.JsonPath']:
+    def __getattr__(self, name: str) -> Union['pixeltable.exprs.MethodRef', 'pixeltable.exprs.FunctionCall', 'pixeltable.exprs.JsonPath']:
         """
         ex.: <img col>.rotate(60)
         """
-        if self.col_type.is_image_type():
-            from .image_member_access import ImageMemberAccess
-            return ImageMemberAccess(name, self)
         if self.col_type.is_json_type():
-            from .json_path import JsonPath
-            return JsonPath(self).__getattr__(name)
-        raise excs.Error(f'Member access not supported on type {self.col_type}: {name}')
+            return pixeltable.exprs.JsonPath(self).__getattr__(name)
+        else:
+            method_ref = pixeltable.exprs.MethodRef(self, name)
+            if method_ref.fn.is_property:
+                # Marked as a property, so autoinvoke the method to obtain a `FunctionCall`
+                assert method_ref.fn.arity == 1
+                return method_ref.fn(method_ref.base_expr)
+            else:
+                # Return the `MethodRef` object itself; it requires arguments to become a `FunctionCall`
+                return method_ref
     def __bool__(self) -> bool:
         raise TypeError(

pixeltable 0.2.13__py3-none-any.whl → 0.2.14__py3-none-any.whl

Potentially problematic release.

pixeltable 0.2.13py3-none-any.whl → 0.2.14py3-none-any.whl