pixeltable 0.2.13__py3-none-any.whl → 0.2.14__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of pixeltable might be problematic. Click here for more details.

Files changed (51) hide show
  1. pixeltable/__init__.py +1 -1
  2. pixeltable/__version__.py +2 -2
  3. pixeltable/catalog/column.py +5 -0
  4. pixeltable/catalog/globals.py +8 -0
  5. pixeltable/catalog/table.py +22 -4
  6. pixeltable/catalog/table_version.py +30 -55
  7. pixeltable/catalog/view.py +1 -1
  8. pixeltable/exec/__init__.py +2 -1
  9. pixeltable/exec/row_update_node.py +61 -0
  10. pixeltable/exec/{sql_scan_node.py → sql_node.py} +120 -56
  11. pixeltable/exprs/__init__.py +1 -1
  12. pixeltable/exprs/expr.py +35 -22
  13. pixeltable/exprs/function_call.py +60 -29
  14. pixeltable/exprs/globals.py +2 -0
  15. pixeltable/exprs/inline_array.py +18 -11
  16. pixeltable/exprs/method_ref.py +63 -0
  17. pixeltable/ext/__init__.py +9 -0
  18. pixeltable/ext/functions/__init__.py +8 -0
  19. pixeltable/ext/functions/whisperx.py +45 -5
  20. pixeltable/ext/functions/yolox.py +60 -14
  21. pixeltable/func/callable_function.py +12 -4
  22. pixeltable/func/expr_template_function.py +1 -1
  23. pixeltable/func/function.py +12 -2
  24. pixeltable/func/function_registry.py +24 -9
  25. pixeltable/func/udf.py +32 -4
  26. pixeltable/functions/__init__.py +1 -1
  27. pixeltable/functions/fireworks.py +33 -0
  28. pixeltable/functions/huggingface.py +96 -6
  29. pixeltable/functions/image.py +226 -41
  30. pixeltable/functions/openai.py +214 -0
  31. pixeltable/functions/string.py +195 -218
  32. pixeltable/functions/timestamp.py +210 -0
  33. pixeltable/functions/together.py +106 -0
  34. pixeltable/functions/video.py +2 -2
  35. pixeltable/functions/whisper.py +32 -0
  36. pixeltable/io/__init__.py +1 -1
  37. pixeltable/io/globals.py +133 -1
  38. pixeltable/io/pandas.py +52 -27
  39. pixeltable/metadata/__init__.py +1 -1
  40. pixeltable/metadata/converters/convert_18.py +39 -0
  41. pixeltable/metadata/notes.py +10 -0
  42. pixeltable/plan.py +76 -1
  43. pixeltable/tool/create_test_db_dump.py +3 -4
  44. pixeltable/tool/doc_plugins/griffe.py +4 -0
  45. pixeltable/type_system.py +15 -14
  46. {pixeltable-0.2.13.dist-info → pixeltable-0.2.14.dist-info}/METADATA +1 -1
  47. {pixeltable-0.2.13.dist-info → pixeltable-0.2.14.dist-info}/RECORD +50 -45
  48. pixeltable/exprs/image_member_access.py +0 -96
  49. {pixeltable-0.2.13.dist-info → pixeltable-0.2.14.dist-info}/LICENSE +0 -0
  50. {pixeltable-0.2.13.dist-info → pixeltable-0.2.14.dist-info}/WHEEL +0 -0
  51. {pixeltable-0.2.13.dist-info → pixeltable-0.2.14.dist-info}/entry_points.txt +0 -0
pixeltable/__init__.py CHANGED
@@ -21,7 +21,7 @@ from .type_system import (
21
21
  )
22
22
  from .utils.help import help
23
23
 
24
- from . import functions, io, iterators
24
+ from . import ext, functions, io, iterators
25
25
  from .__version__ import __version__, __version_tuple__
26
26
 
27
27
  # This is the safest / most maintainable way to do this: start with the default and "blacklist" stuff that
pixeltable/__version__.py CHANGED
@@ -1,3 +1,3 @@
1
1
  # These version placeholders will be replaced during build.
2
- __version__ = "0.2.13"
3
- __version_tuple__ = (0, 2, 13)
2
+ __version__ = "0.2.14"
3
+ __version_tuple__ = (0, 2, 14)
@@ -152,6 +152,11 @@ class Column:
152
152
  return self._records_errors
153
153
  return self.is_stored and (self.is_computed or self.col_type.is_media_type())
154
154
 
155
+ @property
156
+ def qualified_name(self) -> str:
157
+ assert self.tbl is not None
158
+ return f'{self.tbl.name}.{self.name}'
159
+
155
160
  def source(self) -> None:
156
161
  """
157
162
  If this is a computed col and the top-level expr is a function call, print the source, if possible.
@@ -19,6 +19,14 @@ class UpdateStatus:
19
19
  updated_cols: List[str] = dataclasses.field(default_factory=list)
20
20
  cols_with_excs: List[str] = dataclasses.field(default_factory=list)
21
21
 
22
+ def __iadd__(self, other: 'UpdateStatus') -> 'UpdateStatus':
23
+ self.num_rows += other.num_rows
24
+ self.num_computed_values += other.num_computed_values
25
+ self.num_excs += other.num_excs
26
+ self.updated_cols = list(dict.fromkeys(self.updated_cols + other.updated_cols))
27
+ self.cols_with_excs = list(dict.fromkeys(self.cols_with_excs + other.cols_with_excs))
28
+ return self
29
+
22
30
  def is_valid_identifier(name: str) -> bool:
23
31
  return name.isidentifier() and not name.startswith('_')
24
32
 
@@ -3,7 +3,7 @@ from __future__ import annotations
3
3
  import json
4
4
  import logging
5
5
  from pathlib import Path
6
- from typing import Union, Any, Optional, Callable, Set, Tuple, Iterable, overload, Type
6
+ from typing import Union, Any, Optional, Callable, Set, Tuple, Iterable, overload, Type, Literal
7
7
  from uuid import UUID
8
8
  import abc
9
9
 
@@ -745,18 +745,34 @@ class Table(SchemaObject):
745
745
  self._check_is_dropped()
746
746
  return self._tbl_version.update(value_spec, where, cascade)
747
747
 
748
- def batch_update(self, rows: Iterable[dict[str, Any]], cascade: bool = True) -> UpdateStatus:
748
+ def batch_update(
749
+ self, rows: Iterable[dict[str, Any]], cascade: bool = True,
750
+ if_not_exists: Literal['error', 'ignore', 'insert'] = 'error'
751
+ ) -> UpdateStatus:
749
752
  """Update rows in this table.
750
753
 
751
754
  Args:
752
755
  rows: an Iterable of dictionaries containing values for the updated columns plus values for the primary key
753
756
  columns.
754
757
  cascade: if True, also update all computed columns that transitively depend on the updated columns.
758
+ if_not_exists: Specifies the behavior if a row to update does not exist:
759
+
760
+ - `'error'`: Raise an error.
761
+ - `'ignore'`: Skip the row silently.
762
+ - `'insert'`: Insert the row.
755
763
 
756
764
  Examples:
757
- Update the 'name' and 'age' columns for the rows with ids 1 and 2 (assuming 'id' is the primary key):
765
+ Update the `name` and `age` columns for the rows with ids 1 and 2 (assuming `id` is the primary key).
766
+ If either row does not exist, this raises an error:
758
767
 
759
768
  >>> tbl.update([{'id': 1, 'name': 'Alice', 'age': 30}, {'id': 2, 'name': 'Bob', 'age': 40}])
769
+
770
+ Update the `name` and `age` columns for the row with `id` 1 (assuming `id` is the primary key) and insert
771
+ the row with new `id` 3 (assuming this key does not exist):
772
+
773
+ >>> tbl.update(
774
+ [{'id': 1, 'name': 'Alice', 'age': 30}, {'id': 3, 'name': 'Bob', 'age': 40}],
775
+ if_not_exists='insert')
760
776
  """
761
777
  if self._tbl_version_path.is_snapshot():
762
778
  raise excs.Error('Cannot update a snapshot')
@@ -784,7 +800,9 @@ class Table(SchemaObject):
784
800
  missing_cols = pk_col_names - set(col.name for col in col_vals.keys())
785
801
  raise excs.Error(f'Primary key columns ({", ".join(missing_cols)}) missing in {row_spec}')
786
802
  row_updates.append(col_vals)
787
- return self._tbl_version.batch_update(row_updates, rowids, cascade)
803
+ return self._tbl_version.batch_update(
804
+ row_updates, rowids, error_if_not_exists=if_not_exists == 'error',
805
+ insert_if_not_exists=if_not_exists == 'insert', cascade=cascade)
788
806
 
789
807
  def delete(self, where: Optional['pixeltable.exprs.Expr'] = None) -> UpdateStatus:
790
808
  """Delete rows in this table.
@@ -702,10 +702,18 @@ class TableVersion:
702
702
  raise excs.Error(f'Filter {analysis_info.filter} not expressible in SQL')
703
703
 
704
704
  with Env.get().engine.begin() as conn:
705
- return self._update(conn, update_spec, where, cascade)
705
+ plan, updated_cols, recomputed_cols = (
706
+ Planner.create_update_plan(self.path, update_spec, [], where, cascade)
707
+ )
708
+ result = self.propagate_update(
709
+ plan, where.sql_expr() if where is not None else None, recomputed_cols,
710
+ base_versions=[], conn=conn, timestamp=time.time(), cascade=cascade, show_progress=True)
711
+ result.updated_cols = updated_cols
712
+ return result
706
713
 
707
714
  def batch_update(
708
- self, batch: list[dict[Column, 'exprs.Expr']], rowids: list[tuple[int, ...]], cascade: bool = True
715
+ self, batch: list[dict[Column, 'exprs.Expr']], rowids: list[tuple[int, ...]], insert_if_not_exists: bool,
716
+ error_if_not_exists: bool, cascade: bool = True,
709
717
  ) -> UpdateStatus:
710
718
  """Update rows in batch.
711
719
  Args:
@@ -714,62 +722,26 @@ class TableVersion:
714
722
  """
715
723
  # if we do lookups of rowids, we must have one for each row in the batch
716
724
  assert len(rowids) == 0 or len(rowids) == len(batch)
717
- result_status = UpdateStatus()
718
725
  cols_with_excs: set[str] = set()
719
- updated_cols: set[str] = set()
720
- pk_cols = self.primary_key_columns()
721
- use_rowids = len(rowids) > 0
722
726
 
723
727
  with Env.get().engine.begin() as conn:
724
- for i, row in enumerate(batch):
725
- where_clause: Optional[exprs.Expr] = None
726
- if use_rowids:
727
- # construct Where clause to match rowid
728
- num_rowid_cols = len(self.store_tbl.rowid_columns())
729
- for col_idx in range(num_rowid_cols):
730
- assert len(rowids[i]) == num_rowid_cols, f'len({rowids[i]}) != {num_rowid_cols}'
731
- clause = exprs.RowidRef(self, col_idx) == rowids[i][col_idx]
732
- if where_clause is None:
733
- where_clause = clause
734
- else:
735
- where_clause = where_clause & clause
736
- else:
737
- # construct Where clause for primary key columns
738
- for col in pk_cols:
739
- assert col in row
740
- clause = exprs.ColumnRef(col) == row[col]
741
- if where_clause is None:
742
- where_clause = clause
743
- else:
744
- where_clause = where_clause & clause
745
-
746
- update_targets = {col: row[col] for col in row if col not in pk_cols}
747
- status = self._update(conn, update_targets, where_clause, cascade, show_progress=False)
748
- result_status.num_rows += status.num_rows
749
- result_status.num_excs += status.num_excs
750
- result_status.num_computed_values += status.num_computed_values
751
- cols_with_excs.update(status.cols_with_excs)
752
- updated_cols.update(status.updated_cols)
753
-
754
- result_status.cols_with_excs = list(cols_with_excs)
755
- result_status.updated_cols = list(updated_cols)
756
- return result_status
757
-
758
- def _update(
759
- self, conn: sql.engine.Connection, update_targets: dict[Column, 'pixeltable.exprs.Expr'],
760
- where_clause: Optional['pixeltable.exprs.Expr'] = None, cascade: bool = True,
761
- show_progress: bool = True
762
- ) -> UpdateStatus:
763
- from pixeltable.plan import Planner
728
+ from pixeltable.plan import Planner
764
729
 
765
- plan, updated_cols, recomputed_cols = (
766
- Planner.create_update_plan(self.path, update_targets, [], where_clause, cascade)
767
- )
768
- result = self.propagate_update(
769
- plan, where_clause.sql_expr() if where_clause is not None else None, recomputed_cols,
770
- base_versions=[], conn=conn, timestamp=time.time(), cascade=cascade, show_progress=show_progress)
771
- result.updated_cols = updated_cols
772
- return result
730
+ plan, row_update_node, delete_where_clause, updated_cols, recomputed_cols = \
731
+ Planner.create_batch_update_plan(self.path, batch, rowids, cascade=cascade)
732
+ result = self.propagate_update(
733
+ plan, delete_where_clause, recomputed_cols, base_versions=[], conn=conn, timestamp=time.time(),
734
+ cascade=cascade)
735
+ result.updated_cols = [c.qualified_name for c in updated_cols]
736
+
737
+ unmatched_rows = row_update_node.unmatched_rows()
738
+ if len(unmatched_rows) > 0:
739
+ if error_if_not_exists:
740
+ raise excs.Error(f'batch_update(): {len(unmatched_rows)} row(s) not found')
741
+ if insert_if_not_exists:
742
+ insert_status = self.insert(unmatched_rows, print_stats=False, fail_on_exception=False)
743
+ result += insert_status
744
+ return result
773
745
 
774
746
  def _validate_update_spec(
775
747
  self, value_spec: dict[str, Any], allow_pk: bool, allow_exprs: bool
@@ -779,7 +751,10 @@ class TableVersion:
779
751
  if not isinstance(col_name, str):
780
752
  raise excs.Error(f'Update specification: dict key must be column name, got {col_name!r}')
781
753
  if col_name == _ROWID_COLUMN_NAME:
782
- # ignore pseudo-column _rowid
754
+ # a valid rowid is a list of ints, one per rowid column
755
+ assert len(val) == len(self.store_tbl.rowid_columns())
756
+ for el in val:
757
+ assert isinstance(el, int)
783
758
  continue
784
759
  col = self.path.get_column(col_name, include_bases=False)
785
760
  if col is None:
@@ -92,7 +92,7 @@ class View(Table):
92
92
  ]
93
93
  sig = func.Signature(InvalidType(), params)
94
94
  from pixeltable.exprs import FunctionCall
95
- FunctionCall.check_args(sig, bound_args)
95
+ FunctionCall.normalize_args(sig, bound_args)
96
96
  except TypeError as e:
97
97
  raise Error(f'Cannot instantiate iterator with given arguments: {e}')
98
98
 
@@ -5,6 +5,7 @@ from .exec_context import ExecContext
5
5
  from .exec_node import ExecNode
6
6
  from .expr_eval_node import ExprEvalNode
7
7
  from .in_memory_data_node import InMemoryDataNode
8
- from .sql_scan_node import SqlScanNode
8
+ from .sql_node import SqlScanNode, SqlLookupNode
9
+ from .row_update_node import RowUpdateNode
9
10
  from .media_validation_node import MediaValidationNode
10
11
  from .data_row_batch import DataRowBatch
@@ -0,0 +1,61 @@
1
+ import logging
2
+ from typing import Any
3
+
4
+ import pixeltable.catalog as catalog
5
+ import pixeltable.exprs as exprs
6
+ from pixeltable.utils.media_store import MediaStore
7
+ from .data_row_batch import DataRowBatch
8
+ from .exec_node import ExecNode
9
+
10
+ _logger = logging.getLogger('pixeltable')
11
+
12
+ class RowUpdateNode(ExecNode):
13
+ """
14
+ Update individual rows in the input batches, identified by key columns.
15
+
16
+ The updates for a row are provided as a dict of column names to new values.
17
+ The node assumes that all update dicts contain the same keys, and it populates the slots of the columns present in
18
+ the update list.
19
+ """
20
+ def __init__(
21
+ self, tbl: catalog.TableVersionPath, key_vals_batch: list[tuple], is_rowid_key: bool,
22
+ col_vals_batch: list[dict[catalog.Column, Any]], row_builder: exprs.RowBuilder, input: ExecNode,
23
+ ):
24
+ super().__init__(row_builder, [], [], input)
25
+ self.updates = {key_vals: col_vals for key_vals, col_vals in zip(key_vals_batch, col_vals_batch)}
26
+ self.is_rowid_key = is_rowid_key
27
+ # determine slot idxs of all columns we need to read or write
28
+ # retrieve ColumnRefs from the RowBuilder (has slot_idx set)
29
+ all_col_slot_idxs = {
30
+ col_ref.col: col_ref.slot_idx
31
+ for col_ref in row_builder.unique_exprs if isinstance(col_ref, exprs.ColumnRef)
32
+ }
33
+ self.col_slot_idxs = {col: all_col_slot_idxs[col] for col in col_vals_batch[0].keys()}
34
+ self.key_slot_idxs = {col: all_col_slot_idxs[col] for col in tbl.tbl_version.primary_key_columns()}
35
+ self.matched_key_vals: set[tuple] = set()
36
+
37
+ def __next__(self) -> DataRowBatch:
38
+ batch = next(self.input)
39
+ for row in batch:
40
+ key_vals = row.rowid if self.is_rowid_key else \
41
+ tuple(row[slot_idx] for slot_idx in self.key_slot_idxs.values())
42
+ if key_vals not in self.updates:
43
+ continue
44
+ self.matched_key_vals.add(key_vals)
45
+ col_vals = self.updates[key_vals]
46
+ for col, val in col_vals.items():
47
+ slot_idx = self.col_slot_idxs[col]
48
+ row[slot_idx] = val
49
+ return batch
50
+
51
+ def unmatched_rows(self) -> list[dict[str, Any]]:
52
+ """Return rows that didn't get used in the updates as a list of dicts compatible with TableVersion.insert()."""
53
+ result: list[dict[str, Any]] = []
54
+ key_cols = self.key_slot_idxs.keys()
55
+ for key_vals, col_vals in self.updates.items():
56
+ if key_vals in self.matched_key_vals:
57
+ continue
58
+ row = {col.name: val for col, val in zip(key_cols, key_vals)}
59
+ row.update({col.name: val for col, val in col_vals.items()})
60
+ result.append(row)
61
+ return result
@@ -13,30 +13,23 @@ import pixeltable.catalog as catalog
13
13
 
14
14
  _logger = logging.getLogger('pixeltable')
15
15
 
16
- class SqlScanNode(ExecNode):
17
- """Materializes data from the store via SQL
18
- """
16
+ class SqlNode(ExecNode):
17
+ """Materializes data from the store via a Select stmt."""
18
+
19
19
  def __init__(
20
20
  self, tbl: catalog.TableVersionPath, row_builder: exprs.RowBuilder,
21
- select_list: Iterable[exprs.Expr],
22
- where_clause: Optional[exprs.Expr] = None, filter: Optional[exprs.Expr] = None,
23
- order_by_items: Optional[List[Tuple[exprs.Expr, bool]]] = None,
24
- limit: int = 0, set_pk: bool = False, exact_version_only: Optional[List[catalog.TableVersion]] = None
21
+ select_list: Iterable[exprs.Expr], set_pk: bool = False
25
22
  ):
26
23
  """
24
+ Initialize self.stmt with expressions derived from select_list.
25
+
26
+ This only provides the select list. The subclass is responsible for the From clause and any additional clauses.
27
+
27
28
  Args:
28
29
  select_list: output of the query
29
- sql_where_clause: SQL Where clause
30
- filter: additional Where-clause predicate that can't be evaluated via SQL
31
- limit: max number of rows to return: 0 = no limit
32
30
  set_pk: if True, sets the primary for each DataRow
33
- exact_version_only: tables for which we only want to see rows created at the current version
34
31
  """
35
32
  # create Select stmt
36
- if order_by_items is None:
37
- order_by_items = []
38
- if exact_version_only is None:
39
- exact_version_only = []
40
33
  self.tbl = tbl
41
34
  target = tbl.tbl_version # the stored table we're scanning
42
35
  self.sql_exprs = exprs.ExprSet(select_list)
@@ -45,21 +38,15 @@ class SqlScanNode(ExecNode):
45
38
  sql_subexprs = iter_arg.subexprs(filter=lambda e: e.sql_expr() is not None, traverse_matches=False)
46
39
  [self.sql_exprs.append(e) for e in sql_subexprs]
47
40
  super().__init__(row_builder, self.sql_exprs, [], None) # we materialize self.sql_exprs
48
- self.filter = filter
49
- self.filter_eval_ctx = \
50
- row_builder.create_eval_ctx([filter], exclude=select_list) if filter is not None else None
51
- self.limit = limit
52
41
 
53
42
  # change rowid refs against a base table to rowid refs against the target table, so that we minimize
54
43
  # the number of tables that need to be joined to the target table
55
44
  for rowid_ref in [e for e in self.sql_exprs if isinstance(e, exprs.RowidRef)]:
56
45
  rowid_ref.set_tbl(tbl)
57
46
 
58
- where_clause_tbl_ids = where_clause.tbl_ids() if where_clause is not None else set()
59
- refd_tbl_ids = exprs.Expr.list_tbl_ids(self.sql_exprs) | where_clause_tbl_ids
60
47
  sql_select_list = [e.sql_expr() for e in self.sql_exprs]
61
48
  assert len(sql_select_list) == len(self.sql_exprs)
62
- assert all([e is not None for e in sql_select_list])
49
+ assert all(e is not None for e in sql_select_list)
63
50
  self.set_pk = set_pk
64
51
  self.num_pk_cols = 0
65
52
  if set_pk:
@@ -69,42 +56,12 @@ class SqlScanNode(ExecNode):
69
56
  sql_select_list += pk_columns
70
57
 
71
58
  self.stmt = sql.select(*sql_select_list)
72
- self.stmt = self.create_from_clause(
73
- tbl, self.stmt, refd_tbl_ids, exact_version_only={t.id for t in exact_version_only})
74
-
75
- # change rowid refs against a base table to rowid refs against the target table, so that we minimize
76
- # the number of tables that need to be joined to the target table
77
- for rowid_ref in [e for e, _ in order_by_items if isinstance(e, exprs.RowidRef)]:
78
- rowid_ref.set_tbl(tbl)
79
- order_by_clause: List[sql.ClauseElement] = []
80
- for e, asc in order_by_items:
81
- if isinstance(e, exprs.SimilarityExpr):
82
- order_by_clause.append(e.as_order_by_clause(asc))
83
- else:
84
- order_by_clause.append(e.sql_expr().desc() if not asc else e.sql_expr())
85
-
86
- if where_clause is not None:
87
- sql_where_clause = where_clause.sql_expr()
88
- assert sql_where_clause is not None
89
- self.stmt = self.stmt.where(sql_where_clause)
90
- if len(order_by_clause) > 0:
91
- self.stmt = self.stmt.order_by(*order_by_clause)
92
- elif target.id in row_builder.unstored_iter_args:
93
- # we are referencing unstored iter columns from this view and try to order by our primary key,
94
- # which ensures that iterators will see monotonically increasing pos values
95
- self.stmt = self.stmt.order_by(*self.tbl.store_tbl.rowid_columns())
96
- if limit != 0 and self.filter is None:
97
- # if we need to do post-SQL filtering, we can't use LIMIT
98
- self.stmt = self.stmt.limit(limit)
99
59
 
60
+ # additional state
100
61
  self.result_cursor: Optional[sql.engine.CursorResult] = None
101
-
102
- try:
103
- # log stmt, if possible
104
- stmt_str = str(self.stmt.compile(compile_kwargs={'literal_binds': True}))
105
- _logger.debug(f'SqlScanNode stmt:\n{stmt_str}')
106
- except Exception as e:
107
- pass
62
+ # the filter is provided by the subclass
63
+ self.filter: Optional[exprs.Expr] = None
64
+ self.filter_eval_ctx: Optional[exprs.EvalContext] = None
108
65
 
109
66
  @classmethod
110
67
  def create_from_clause(
@@ -224,3 +181,110 @@ class SqlScanNode(ExecNode):
224
181
  if self.result_cursor is not None:
225
182
  self.result_cursor.close()
226
183
 
184
+
185
+ class SqlScanNode(SqlNode):
186
+ """
187
+ Materializes data from the store via a Select stmt.
188
+
189
+ Supports filtering and ordering.
190
+ """
191
+ def __init__(
192
+ self, tbl: catalog.TableVersionPath, row_builder: exprs.RowBuilder,
193
+ select_list: Iterable[exprs.Expr],
194
+ where_clause: Optional[exprs.Expr] = None, filter: Optional[exprs.Expr] = None,
195
+ order_by_items: Optional[List[Tuple[exprs.Expr, bool]]] = None,
196
+ limit: int = 0, set_pk: bool = False, exact_version_only: Optional[List[catalog.TableVersion]] = None
197
+ ):
198
+ """
199
+ Args:
200
+ select_list: output of the query
201
+ sql_where_clause: SQL Where clause
202
+ filter: additional Where-clause predicate that can't be evaluated via SQL
203
+ limit: max number of rows to return: 0 = no limit
204
+ set_pk: if True, sets the primary for each DataRow
205
+ exact_version_only: tables for which we only want to see rows created at the current version
206
+ """
207
+ super().__init__(tbl, row_builder, select_list, set_pk=set_pk)
208
+ # create Select stmt
209
+ if order_by_items is None:
210
+ order_by_items = []
211
+ if exact_version_only is None:
212
+ exact_version_only = []
213
+ target = tbl.tbl_version # the stored table we're scanning
214
+ self.filter = filter
215
+ self.filter_eval_ctx = \
216
+ row_builder.create_eval_ctx([filter], exclude=select_list) if filter is not None else None
217
+ self.limit = limit
218
+
219
+ where_clause_tbl_ids = where_clause.tbl_ids() if where_clause is not None else set()
220
+ refd_tbl_ids = exprs.Expr.list_tbl_ids(self.sql_exprs) | where_clause_tbl_ids
221
+ self.stmt = self.create_from_clause(
222
+ tbl, self.stmt, refd_tbl_ids, exact_version_only={t.id for t in exact_version_only})
223
+
224
+ # change rowid refs against a base table to rowid refs against the target table, so that we minimize
225
+ # the number of tables that need to be joined to the target table
226
+ for rowid_ref in [e for e, _ in order_by_items if isinstance(e, exprs.RowidRef)]:
227
+ rowid_ref.set_tbl(tbl)
228
+ order_by_clause: List[sql.ClauseElement] = []
229
+ for e, asc in order_by_items:
230
+ if isinstance(e, exprs.SimilarityExpr):
231
+ order_by_clause.append(e.as_order_by_clause(asc))
232
+ else:
233
+ order_by_clause.append(e.sql_expr().desc() if not asc else e.sql_expr())
234
+
235
+ if where_clause is not None:
236
+ sql_where_clause = where_clause.sql_expr()
237
+ assert sql_where_clause is not None
238
+ self.stmt = self.stmt.where(sql_where_clause)
239
+ if len(order_by_clause) > 0:
240
+ self.stmt = self.stmt.order_by(*order_by_clause)
241
+ elif target.id in row_builder.unstored_iter_args:
242
+ # we are referencing unstored iter columns from this view and try to order by our primary key,
243
+ # which ensures that iterators will see monotonically increasing pos values
244
+ self.stmt = self.stmt.order_by(*self.tbl.store_tbl.rowid_columns())
245
+ if limit != 0 and self.filter is None:
246
+ # if we need to do post-SQL filtering, we can't use LIMIT
247
+ self.stmt = self.stmt.limit(limit)
248
+
249
+ try:
250
+ # log stmt, if possible
251
+ stmt_str = str(self.stmt.compile(compile_kwargs={'literal_binds': True}))
252
+ _logger.debug(f'SqlScanNode stmt:\n{stmt_str}')
253
+ except Exception as e:
254
+ pass
255
+
256
+
257
+ class SqlLookupNode(SqlNode):
258
+ """
259
+ Materializes data from the store via a Select stmt with a WHERE clause that matches a list of key values
260
+ """
261
+ def __init__(
262
+ self, tbl: catalog.TableVersionPath, row_builder: exprs.RowBuilder,
263
+ select_list: Iterable[exprs.Expr], sa_key_cols: list[sql.Column], key_vals: list[tuple],
264
+ ):
265
+ """
266
+ Args:
267
+ select_list: output of the query
268
+ sa_key_cols: list of key columns in the store table
269
+ key_vals: list of key values to look up
270
+ """
271
+ super().__init__(tbl, row_builder, select_list, set_pk=True)
272
+ target = tbl.tbl_version # the stored table we're scanning
273
+ refd_tbl_ids = exprs.Expr.list_tbl_ids(self.sql_exprs)
274
+ self.stmt = self.create_from_clause(tbl, self.stmt, refd_tbl_ids)
275
+ # Where clause: (key-col-1, key-col-2, ...) IN ((val-1, val-2, ...), ...)
276
+ self.where_clause = sql.tuple_(*sa_key_cols).in_(key_vals)
277
+ self.stmt = self.stmt.where(self.where_clause)
278
+
279
+ if target.id in row_builder.unstored_iter_args:
280
+ # we are referencing unstored iter columns from this view and try to order by our primary key,
281
+ # which ensures that iterators will see monotonically increasing pos values
282
+ self.stmt = self.stmt.order_by(*self.tbl.store_tbl.rowid_columns())
283
+
284
+ try:
285
+ # log stmt, if possible
286
+ stmt_str = str(self.stmt.compile(compile_kwargs={'literal_binds': True}))
287
+ _logger.debug(f'SqlLookupNode stmt:\n{stmt_str}')
288
+ except Exception as e:
289
+ pass
290
+
@@ -8,7 +8,6 @@ from .data_row import DataRow
8
8
  from .expr import Expr
9
9
  from .expr_set import ExprSet
10
10
  from .function_call import FunctionCall
11
- from .image_member_access import ImageMemberAccess
12
11
  from .in_predicate import InPredicate
13
12
  from .inline_array import InlineArray
14
13
  from .inline_dict import InlineDict
@@ -16,6 +15,7 @@ from .is_null import IsNull
16
15
  from .json_mapper import JsonMapper
17
16
  from .json_path import RELATIVE_PATH_ROOT, JsonPath
18
17
  from .literal import Literal
18
+ from .method_ref import MethodRef
19
19
  from .object_ref import ObjectRef
20
20
  from .row_builder import RowBuilder, ColumnSlotIdx, ExecProfile
21
21
  from .rowid_ref import RowidRef
pixeltable/exprs/expr.py CHANGED
@@ -7,7 +7,6 @@ import inspect
7
7
  import json
8
8
  import sys
9
9
  import typing
10
- from itertools import islice
11
10
  from typing import Union, Optional, List, Callable, Any, Dict, Tuple, Set, Generator, Type
12
11
  from uuid import UUID
13
12
 
@@ -16,8 +15,8 @@ import sqlalchemy as sql
16
15
  import pixeltable
17
16
  import pixeltable.catalog as catalog
18
17
  import pixeltable.exceptions as excs
19
- import pixeltable.type_system as ts
20
18
  import pixeltable.func as func
19
+ import pixeltable.type_system as ts
21
20
  from .data_row import DataRow
22
21
  from .globals import ComparisonOperator, LogicalOperator, LiteralPythonTypes, ArithmeticOperator
23
22
 
@@ -91,8 +90,8 @@ class Expr(abc.ABC):
91
90
 
92
91
  def default_column_name(self) -> Optional[str]:
93
92
  """
94
- Returns:
95
- None if this expression lacks a default name,
93
+ Returns:
94
+ None if this expression lacks a default name,
96
95
  or a valid identifier (according to catalog.is_valid_identifer) otherwise.
97
96
  """
98
97
  return None
@@ -231,9 +230,8 @@ class Expr(abc.ABC):
231
230
  self.components[i] = self.components[i]._retarget(tbl_versions)
232
231
  return self
233
232
 
234
- @abc.abstractmethod
235
233
  def __str__(self) -> str:
236
- pass
234
+ return f'<Expression of type {type(self)}>'
237
235
 
238
236
  def display_str(self, inline: bool = True) -> str:
239
237
  """
@@ -264,7 +262,7 @@ class Expr(abc.ABC):
264
262
  if is_match:
265
263
  yield self
266
264
 
267
- def contains(self, cls: Optional[Type[Expr]] = None, filter: Optional[Callable[[Expr], bool]] = None) -> bool:
265
+ def _contains(self, cls: Optional[Type[Expr]] = None, filter: Optional[Callable[[Expr], bool]] = None) -> bool:
268
266
  """
269
267
  Returns True if any subexpr is an instance of cls.
270
268
  """
@@ -319,17 +317,20 @@ class Expr(abc.ABC):
319
317
  """
320
318
  if isinstance(o, Expr):
321
319
  return o
322
- # try to create a literal
320
+ # Try to create a literal. We need to check for InlineArray/InlineDict
321
+ # first, to prevent arrays from inappropriately being interpreted as JsonType
322
+ # literals.
323
+ # TODO: general cleanup of InlineArray/InlineDict
324
+ if isinstance(o, list):
325
+ from .inline_array import InlineArray
326
+ return InlineArray(tuple(o))
327
+ if isinstance(o, dict):
328
+ from .inline_dict import InlineDict
329
+ return InlineDict(o)
323
330
  obj_type = ts.ColumnType.infer_literal_type(o)
324
331
  if obj_type is not None:
325
332
  from .literal import Literal
326
333
  return Literal(o, col_type=obj_type)
327
- if isinstance(o, dict):
328
- from .inline_dict import InlineDict
329
- return InlineDict(o)
330
- elif isinstance(o, list):
331
- from .inline_array import InlineArray
332
- return InlineArray(tuple(o))
333
334
  return None
334
335
 
335
336
  @abc.abstractmethod
@@ -427,6 +428,14 @@ class Expr(abc.ABC):
427
428
  # Return a `FunctionCall` obtained by passing this `Expr` to the new `function`.
428
429
  return function(self)
429
430
 
431
+ def __dir__(self) -> list[str]:
432
+ attrs = ['isin', 'astype', 'apply']
433
+ attrs += [
434
+ f.name
435
+ for f in func.FunctionRegistry.get().get_type_methods(self.col_type.type_enum)
436
+ ]
437
+ return attrs
438
+
430
439
  def __getitem__(self, index: object) -> Expr:
431
440
  if self.col_type.is_json_type():
432
441
  from .json_path import JsonPath
@@ -434,19 +443,23 @@ class Expr(abc.ABC):
434
443
  if self.col_type.is_array_type():
435
444
  from .array_slice import ArraySlice
436
445
  return ArraySlice(self, index)
437
- raise excs.Error(f'Type {self.col_type} is not subscriptable')
446
+ raise AttributeError(f'Type {self.col_type} is not subscriptable')
438
447
 
439
- def __getattr__(self, name: str) -> Union['pixeltable.exprs.ImageMemberAccess', 'pixeltable.exprs.JsonPath']:
448
+ def __getattr__(self, name: str) -> Union['pixeltable.exprs.MethodRef', 'pixeltable.exprs.FunctionCall', 'pixeltable.exprs.JsonPath']:
440
449
  """
441
450
  ex.: <img col>.rotate(60)
442
451
  """
443
- if self.col_type.is_image_type():
444
- from .image_member_access import ImageMemberAccess
445
- return ImageMemberAccess(name, self)
446
452
  if self.col_type.is_json_type():
447
- from .json_path import JsonPath
448
- return JsonPath(self).__getattr__(name)
449
- raise excs.Error(f'Member access not supported on type {self.col_type}: {name}')
453
+ return pixeltable.exprs.JsonPath(self).__getattr__(name)
454
+ else:
455
+ method_ref = pixeltable.exprs.MethodRef(self, name)
456
+ if method_ref.fn.is_property:
457
+ # Marked as a property, so autoinvoke the method to obtain a `FunctionCall`
458
+ assert method_ref.fn.arity == 1
459
+ return method_ref.fn(method_ref.base_expr)
460
+ else:
461
+ # Return the `MethodRef` object itself; it requires arguments to become a `FunctionCall`
462
+ return method_ref
450
463
 
451
464
  def __bool__(self) -> bool:
452
465
  raise TypeError(