pixeltable 0.2.13__py3-none-any.whl → 0.2.15__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of pixeltable might be problematic. Click here for more details.

Files changed (58) hide show
  1. pixeltable/__init__.py +1 -1
  2. pixeltable/__version__.py +2 -2
  3. pixeltable/catalog/column.py +8 -3
  4. pixeltable/catalog/globals.py +8 -0
  5. pixeltable/catalog/table.py +25 -9
  6. pixeltable/catalog/table_version.py +30 -55
  7. pixeltable/catalog/view.py +1 -1
  8. pixeltable/env.py +4 -4
  9. pixeltable/exec/__init__.py +2 -1
  10. pixeltable/exec/row_update_node.py +61 -0
  11. pixeltable/exec/{sql_scan_node.py → sql_node.py} +120 -56
  12. pixeltable/exprs/__init__.py +1 -1
  13. pixeltable/exprs/arithmetic_expr.py +41 -16
  14. pixeltable/exprs/expr.py +72 -22
  15. pixeltable/exprs/function_call.py +64 -29
  16. pixeltable/exprs/globals.py +5 -1
  17. pixeltable/exprs/inline_array.py +18 -11
  18. pixeltable/exprs/method_ref.py +63 -0
  19. pixeltable/ext/__init__.py +9 -0
  20. pixeltable/ext/functions/__init__.py +8 -0
  21. pixeltable/ext/functions/whisperx.py +45 -5
  22. pixeltable/ext/functions/yolox.py +60 -14
  23. pixeltable/func/callable_function.py +12 -4
  24. pixeltable/func/expr_template_function.py +1 -1
  25. pixeltable/func/function.py +12 -2
  26. pixeltable/func/function_registry.py +24 -9
  27. pixeltable/func/udf.py +32 -4
  28. pixeltable/functions/__init__.py +1 -1
  29. pixeltable/functions/fireworks.py +33 -0
  30. pixeltable/functions/huggingface.py +96 -6
  31. pixeltable/functions/image.py +226 -41
  32. pixeltable/functions/json.py +46 -0
  33. pixeltable/functions/openai.py +214 -0
  34. pixeltable/functions/string.py +195 -218
  35. pixeltable/functions/timestamp.py +210 -0
  36. pixeltable/functions/together.py +106 -0
  37. pixeltable/functions/video.py +2 -2
  38. pixeltable/functions/{eval.py → vision.py} +170 -27
  39. pixeltable/functions/whisper.py +32 -0
  40. pixeltable/io/__init__.py +1 -1
  41. pixeltable/io/external_store.py +2 -2
  42. pixeltable/io/globals.py +133 -1
  43. pixeltable/io/pandas.py +82 -31
  44. pixeltable/iterators/video.py +55 -23
  45. pixeltable/metadata/__init__.py +1 -1
  46. pixeltable/metadata/converters/convert_18.py +39 -0
  47. pixeltable/metadata/notes.py +10 -0
  48. pixeltable/plan.py +76 -1
  49. pixeltable/store.py +65 -28
  50. pixeltable/tool/create_test_db_dump.py +8 -9
  51. pixeltable/tool/doc_plugins/griffe.py +4 -0
  52. pixeltable/type_system.py +84 -63
  53. {pixeltable-0.2.13.dist-info → pixeltable-0.2.15.dist-info}/METADATA +2 -2
  54. {pixeltable-0.2.13.dist-info → pixeltable-0.2.15.dist-info}/RECORD +57 -51
  55. pixeltable/exprs/image_member_access.py +0 -96
  56. {pixeltable-0.2.13.dist-info → pixeltable-0.2.15.dist-info}/LICENSE +0 -0
  57. {pixeltable-0.2.13.dist-info → pixeltable-0.2.15.dist-info}/WHEEL +0 -0
  58. {pixeltable-0.2.13.dist-info → pixeltable-0.2.15.dist-info}/entry_points.txt +0 -0
pixeltable/__init__.py CHANGED
@@ -21,7 +21,7 @@ from .type_system import (
21
21
  )
22
22
  from .utils.help import help
23
23
 
24
- from . import functions, io, iterators
24
+ from . import ext, functions, io, iterators
25
25
  from .__version__ import __version__, __version_tuple__
26
26
 
27
27
  # This is the safest / most maintainable way to do this: start with the default and "blacklist" stuff that
pixeltable/__version__.py CHANGED
@@ -1,3 +1,3 @@
1
1
  # These version placeholders will be replaced during build.
2
- __version__ = "0.2.13"
3
- __version_tuple__ = (0, 2, 13)
2
+ __version__ = "0.2.15"
3
+ __version_tuple__ = (0, 2, 15)
@@ -1,13 +1,13 @@
1
1
  from __future__ import annotations
2
2
 
3
3
  import logging
4
- from typing import Optional, Union, Callable, Any
5
- from uuid import UUID
4
+ from typing import Any, Callable, Optional, Union
6
5
 
7
6
  import sqlalchemy as sql
8
7
 
9
8
  import pixeltable.exceptions as excs
10
9
  import pixeltable.type_system as ts
10
+
11
11
  from .globals import is_valid_identifier
12
12
 
13
13
  _logger = logging.getLogger('pixeltable')
@@ -21,7 +21,7 @@ class Column:
21
21
  def __init__(
22
22
  self, name: Optional[str], col_type: Optional[ts.ColumnType] = None,
23
23
  computed_with: Optional[Union['Expr', Callable]] = None,
24
- is_pk: bool = False, stored: Optional[bool] = None,
24
+ is_pk: bool = False, stored: bool = True,
25
25
  col_id: Optional[int] = None, schema_version_add: Optional[int] = None,
26
26
  schema_version_drop: Optional[int] = None, sa_col_type: Optional[sql.sqltypes.TypeEngine] = None,
27
27
  records_errors: Optional[bool] = None, value_expr_dict: Optional[dict[str, Any]] = None,
@@ -152,6 +152,11 @@ class Column:
152
152
  return self._records_errors
153
153
  return self.is_stored and (self.is_computed or self.col_type.is_media_type())
154
154
 
155
+ @property
156
+ def qualified_name(self) -> str:
157
+ assert self.tbl is not None
158
+ return f'{self.tbl.name}.{self.name}'
159
+
155
160
  def source(self) -> None:
156
161
  """
157
162
  If this is a computed col and the top-level expr is a function call, print the source, if possible.
@@ -19,6 +19,14 @@ class UpdateStatus:
19
19
  updated_cols: List[str] = dataclasses.field(default_factory=list)
20
20
  cols_with_excs: List[str] = dataclasses.field(default_factory=list)
21
21
 
22
+ def __iadd__(self, other: 'UpdateStatus') -> 'UpdateStatus':
23
+ self.num_rows += other.num_rows
24
+ self.num_computed_values += other.num_computed_values
25
+ self.num_excs += other.num_excs
26
+ self.updated_cols = list(dict.fromkeys(self.updated_cols + other.updated_cols))
27
+ self.cols_with_excs = list(dict.fromkeys(self.cols_with_excs + other.cols_with_excs))
28
+ return self
29
+
22
30
  def is_valid_identifier(name: str) -> bool:
23
31
  return name.isidentifier() and not name.startswith('_')
24
32
 
@@ -3,7 +3,7 @@ from __future__ import annotations
3
3
  import json
4
4
  import logging
5
5
  from pathlib import Path
6
- from typing import Union, Any, Optional, Callable, Set, Tuple, Iterable, overload, Type
6
+ from typing import Union, Any, Optional, Callable, Set, Tuple, Iterable, overload, Type, Literal
7
7
  from uuid import UUID
8
8
  import abc
9
9
 
@@ -434,8 +434,8 @@ class Table(SchemaObject):
434
434
  for name, spec in schema.items():
435
435
  col_type: Optional[ts.ColumnType] = None
436
436
  value_expr: Optional[exprs.Expr] = None
437
- stored: Optional[bool] = None
438
437
  primary_key: Optional[bool] = None
438
+ stored = True
439
439
 
440
440
  if isinstance(spec, ts.ColumnType):
441
441
  # TODO: create copy
@@ -455,7 +455,7 @@ class Table(SchemaObject):
455
455
  if value_expr is not None and isinstance(value_expr, exprs.Expr):
456
456
  # create copy so we can modify it
457
457
  value_expr = value_expr.copy()
458
- stored = spec.get('stored')
458
+ stored = spec.get('stored', True)
459
459
  primary_key = spec.get('primary_key')
460
460
 
461
461
  column = Column(
@@ -478,12 +478,10 @@ class Table(SchemaObject):
478
478
  raise excs.Error(f'Column name conflicts with a registered query: {col.name!r}')
479
479
  if col.stored is False and not (col.is_computed and col.col_type.is_image_type()):
480
480
  raise excs.Error(f'Column {col.name!r}: stored={col.stored} only applies to computed image columns')
481
- if col.stored is False and not (col.col_type.is_image_type() and not col.has_window_fn_call()):
481
+ if col.stored is False and col.has_window_fn_call():
482
482
  raise excs.Error((
483
483
  f'Column {col.name!r}: stored={col.stored} is not valid for image columns computed with a streaming '
484
484
  f'function'))
485
- if col.stored is None:
486
- col.stored = not (col.is_computed and col.col_type.is_image_type() and not col.has_window_fn_call())
487
485
 
488
486
  @classmethod
489
487
  def _verify_schema(cls, schema: list[Column]) -> None:
@@ -745,18 +743,34 @@ class Table(SchemaObject):
745
743
  self._check_is_dropped()
746
744
  return self._tbl_version.update(value_spec, where, cascade)
747
745
 
748
- def batch_update(self, rows: Iterable[dict[str, Any]], cascade: bool = True) -> UpdateStatus:
746
+ def batch_update(
747
+ self, rows: Iterable[dict[str, Any]], cascade: bool = True,
748
+ if_not_exists: Literal['error', 'ignore', 'insert'] = 'error'
749
+ ) -> UpdateStatus:
749
750
  """Update rows in this table.
750
751
 
751
752
  Args:
752
753
  rows: an Iterable of dictionaries containing values for the updated columns plus values for the primary key
753
754
  columns.
754
755
  cascade: if True, also update all computed columns that transitively depend on the updated columns.
756
+ if_not_exists: Specifies the behavior if a row to update does not exist:
757
+
758
+ - `'error'`: Raise an error.
759
+ - `'ignore'`: Skip the row silently.
760
+ - `'insert'`: Insert the row.
755
761
 
756
762
  Examples:
757
- Update the 'name' and 'age' columns for the rows with ids 1 and 2 (assuming 'id' is the primary key):
763
+ Update the `name` and `age` columns for the rows with ids 1 and 2 (assuming `id` is the primary key).
764
+ If either row does not exist, this raises an error:
758
765
 
759
766
  >>> tbl.update([{'id': 1, 'name': 'Alice', 'age': 30}, {'id': 2, 'name': 'Bob', 'age': 40}])
767
+
768
+ Update the `name` and `age` columns for the row with `id` 1 (assuming `id` is the primary key) and insert
769
+ the row with new `id` 3 (assuming this key does not exist):
770
+
771
+ >>> tbl.update(
772
+ [{'id': 1, 'name': 'Alice', 'age': 30}, {'id': 3, 'name': 'Bob', 'age': 40}],
773
+ if_not_exists='insert')
760
774
  """
761
775
  if self._tbl_version_path.is_snapshot():
762
776
  raise excs.Error('Cannot update a snapshot')
@@ -784,7 +798,9 @@ class Table(SchemaObject):
784
798
  missing_cols = pk_col_names - set(col.name for col in col_vals.keys())
785
799
  raise excs.Error(f'Primary key columns ({", ".join(missing_cols)}) missing in {row_spec}')
786
800
  row_updates.append(col_vals)
787
- return self._tbl_version.batch_update(row_updates, rowids, cascade)
801
+ return self._tbl_version.batch_update(
802
+ row_updates, rowids, error_if_not_exists=if_not_exists == 'error',
803
+ insert_if_not_exists=if_not_exists == 'insert', cascade=cascade)
788
804
 
789
805
  def delete(self, where: Optional['pixeltable.exprs.Expr'] = None) -> UpdateStatus:
790
806
  """Delete rows in this table.
@@ -702,10 +702,18 @@ class TableVersion:
702
702
  raise excs.Error(f'Filter {analysis_info.filter} not expressible in SQL')
703
703
 
704
704
  with Env.get().engine.begin() as conn:
705
- return self._update(conn, update_spec, where, cascade)
705
+ plan, updated_cols, recomputed_cols = (
706
+ Planner.create_update_plan(self.path, update_spec, [], where, cascade)
707
+ )
708
+ result = self.propagate_update(
709
+ plan, where.sql_expr() if where is not None else None, recomputed_cols,
710
+ base_versions=[], conn=conn, timestamp=time.time(), cascade=cascade, show_progress=True)
711
+ result.updated_cols = updated_cols
712
+ return result
706
713
 
707
714
  def batch_update(
708
- self, batch: list[dict[Column, 'exprs.Expr']], rowids: list[tuple[int, ...]], cascade: bool = True
715
+ self, batch: list[dict[Column, 'exprs.Expr']], rowids: list[tuple[int, ...]], insert_if_not_exists: bool,
716
+ error_if_not_exists: bool, cascade: bool = True,
709
717
  ) -> UpdateStatus:
710
718
  """Update rows in batch.
711
719
  Args:
@@ -714,62 +722,26 @@ class TableVersion:
714
722
  """
715
723
  # if we do lookups of rowids, we must have one for each row in the batch
716
724
  assert len(rowids) == 0 or len(rowids) == len(batch)
717
- result_status = UpdateStatus()
718
725
  cols_with_excs: set[str] = set()
719
- updated_cols: set[str] = set()
720
- pk_cols = self.primary_key_columns()
721
- use_rowids = len(rowids) > 0
722
726
 
723
727
  with Env.get().engine.begin() as conn:
724
- for i, row in enumerate(batch):
725
- where_clause: Optional[exprs.Expr] = None
726
- if use_rowids:
727
- # construct Where clause to match rowid
728
- num_rowid_cols = len(self.store_tbl.rowid_columns())
729
- for col_idx in range(num_rowid_cols):
730
- assert len(rowids[i]) == num_rowid_cols, f'len({rowids[i]}) != {num_rowid_cols}'
731
- clause = exprs.RowidRef(self, col_idx) == rowids[i][col_idx]
732
- if where_clause is None:
733
- where_clause = clause
734
- else:
735
- where_clause = where_clause & clause
736
- else:
737
- # construct Where clause for primary key columns
738
- for col in pk_cols:
739
- assert col in row
740
- clause = exprs.ColumnRef(col) == row[col]
741
- if where_clause is None:
742
- where_clause = clause
743
- else:
744
- where_clause = where_clause & clause
745
-
746
- update_targets = {col: row[col] for col in row if col not in pk_cols}
747
- status = self._update(conn, update_targets, where_clause, cascade, show_progress=False)
748
- result_status.num_rows += status.num_rows
749
- result_status.num_excs += status.num_excs
750
- result_status.num_computed_values += status.num_computed_values
751
- cols_with_excs.update(status.cols_with_excs)
752
- updated_cols.update(status.updated_cols)
753
-
754
- result_status.cols_with_excs = list(cols_with_excs)
755
- result_status.updated_cols = list(updated_cols)
756
- return result_status
757
-
758
- def _update(
759
- self, conn: sql.engine.Connection, update_targets: dict[Column, 'pixeltable.exprs.Expr'],
760
- where_clause: Optional['pixeltable.exprs.Expr'] = None, cascade: bool = True,
761
- show_progress: bool = True
762
- ) -> UpdateStatus:
763
- from pixeltable.plan import Planner
728
+ from pixeltable.plan import Planner
764
729
 
765
- plan, updated_cols, recomputed_cols = (
766
- Planner.create_update_plan(self.path, update_targets, [], where_clause, cascade)
767
- )
768
- result = self.propagate_update(
769
- plan, where_clause.sql_expr() if where_clause is not None else None, recomputed_cols,
770
- base_versions=[], conn=conn, timestamp=time.time(), cascade=cascade, show_progress=show_progress)
771
- result.updated_cols = updated_cols
772
- return result
730
+ plan, row_update_node, delete_where_clause, updated_cols, recomputed_cols = \
731
+ Planner.create_batch_update_plan(self.path, batch, rowids, cascade=cascade)
732
+ result = self.propagate_update(
733
+ plan, delete_where_clause, recomputed_cols, base_versions=[], conn=conn, timestamp=time.time(),
734
+ cascade=cascade)
735
+ result.updated_cols = [c.qualified_name for c in updated_cols]
736
+
737
+ unmatched_rows = row_update_node.unmatched_rows()
738
+ if len(unmatched_rows) > 0:
739
+ if error_if_not_exists:
740
+ raise excs.Error(f'batch_update(): {len(unmatched_rows)} row(s) not found')
741
+ if insert_if_not_exists:
742
+ insert_status = self.insert(unmatched_rows, print_stats=False, fail_on_exception=False)
743
+ result += insert_status
744
+ return result
773
745
 
774
746
  def _validate_update_spec(
775
747
  self, value_spec: dict[str, Any], allow_pk: bool, allow_exprs: bool
@@ -779,7 +751,10 @@ class TableVersion:
779
751
  if not isinstance(col_name, str):
780
752
  raise excs.Error(f'Update specification: dict key must be column name, got {col_name!r}')
781
753
  if col_name == _ROWID_COLUMN_NAME:
782
- # ignore pseudo-column _rowid
754
+ # a valid rowid is a list of ints, one per rowid column
755
+ assert len(val) == len(self.store_tbl.rowid_columns())
756
+ for el in val:
757
+ assert isinstance(el, int)
783
758
  continue
784
759
  col = self.path.get_column(col_name, include_bases=False)
785
760
  if col is None:
@@ -92,7 +92,7 @@ class View(Table):
92
92
  ]
93
93
  sig = func.Signature(InvalidType(), params)
94
94
  from pixeltable.exprs import FunctionCall
95
- FunctionCall.check_args(sig, bound_args)
95
+ FunctionCall.normalize_args(sig, bound_args)
96
96
  except TypeError as e:
97
97
  raise Error(f'Cannot instantiate iterator with given arguments: {e}')
98
98
 
pixeltable/env.py CHANGED
@@ -16,7 +16,7 @@ from dataclasses import dataclass
16
16
  from pathlib import Path
17
17
  from typing import Callable, Optional, Dict, Any, List, TYPE_CHECKING
18
18
 
19
- import pgserver
19
+ import pixeltable_pgserver
20
20
  import sqlalchemy as sql
21
21
  import yaml
22
22
  from tqdm import TqdmWarning
@@ -60,7 +60,7 @@ class Env:
60
60
  self._sa_engine: Optional[sql.engine.base.Engine] = None
61
61
  self._pgdata_dir: Optional[Path] = None
62
62
  self._db_name: Optional[str] = None
63
- self._db_server: Optional[pgserver.PostgresServer] = None
63
+ self._db_server: Optional[pixeltable_pgserver.PostgresServer] = None
64
64
  self._db_url: Optional[str] = None
65
65
 
66
66
  # info about installed packages that are utilized by some parts of the code;
@@ -266,8 +266,8 @@ class Env:
266
266
  self._db_name = os.environ.get('PIXELTABLE_DB', 'pixeltable')
267
267
  self._pgdata_dir = Path(os.environ.get('PIXELTABLE_PGDATA', str(self._home / 'pgdata')))
268
268
 
269
- # in pgserver.get_server(): cleanup_mode=None will leave db on for debugging purposes
270
- self._db_server = pgserver.get_server(self._pgdata_dir, cleanup_mode=None)
269
+ # in pixeltable_pgserver.get_server(): cleanup_mode=None will leave db on for debugging purposes
270
+ self._db_server = pixeltable_pgserver.get_server(self._pgdata_dir, cleanup_mode=None)
271
271
  self._db_url = self._db_server.get_uri(database=self._db_name)
272
272
 
273
273
  if reinit_db:
@@ -5,6 +5,7 @@ from .exec_context import ExecContext
5
5
  from .exec_node import ExecNode
6
6
  from .expr_eval_node import ExprEvalNode
7
7
  from .in_memory_data_node import InMemoryDataNode
8
- from .sql_scan_node import SqlScanNode
8
+ from .sql_node import SqlScanNode, SqlLookupNode
9
+ from .row_update_node import RowUpdateNode
9
10
  from .media_validation_node import MediaValidationNode
10
11
  from .data_row_batch import DataRowBatch
@@ -0,0 +1,61 @@
1
+ import logging
2
+ from typing import Any
3
+
4
+ import pixeltable.catalog as catalog
5
+ import pixeltable.exprs as exprs
6
+ from pixeltable.utils.media_store import MediaStore
7
+ from .data_row_batch import DataRowBatch
8
+ from .exec_node import ExecNode
9
+
10
+ _logger = logging.getLogger('pixeltable')
11
+
12
+ class RowUpdateNode(ExecNode):
13
+ """
14
+ Update individual rows in the input batches, identified by key columns.
15
+
16
+ The updates for a row are provided as a dict of column names to new values.
17
+ The node assumes that all update dicts contain the same keys, and it populates the slots of the columns present in
18
+ the update list.
19
+ """
20
+ def __init__(
21
+ self, tbl: catalog.TableVersionPath, key_vals_batch: list[tuple], is_rowid_key: bool,
22
+ col_vals_batch: list[dict[catalog.Column, Any]], row_builder: exprs.RowBuilder, input: ExecNode,
23
+ ):
24
+ super().__init__(row_builder, [], [], input)
25
+ self.updates = {key_vals: col_vals for key_vals, col_vals in zip(key_vals_batch, col_vals_batch)}
26
+ self.is_rowid_key = is_rowid_key
27
+ # determine slot idxs of all columns we need to read or write
28
+ # retrieve ColumnRefs from the RowBuilder (has slot_idx set)
29
+ all_col_slot_idxs = {
30
+ col_ref.col: col_ref.slot_idx
31
+ for col_ref in row_builder.unique_exprs if isinstance(col_ref, exprs.ColumnRef)
32
+ }
33
+ self.col_slot_idxs = {col: all_col_slot_idxs[col] for col in col_vals_batch[0].keys()}
34
+ self.key_slot_idxs = {col: all_col_slot_idxs[col] for col in tbl.tbl_version.primary_key_columns()}
35
+ self.matched_key_vals: set[tuple] = set()
36
+
37
+ def __next__(self) -> DataRowBatch:
38
+ batch = next(self.input)
39
+ for row in batch:
40
+ key_vals = row.rowid if self.is_rowid_key else \
41
+ tuple(row[slot_idx] for slot_idx in self.key_slot_idxs.values())
42
+ if key_vals not in self.updates:
43
+ continue
44
+ self.matched_key_vals.add(key_vals)
45
+ col_vals = self.updates[key_vals]
46
+ for col, val in col_vals.items():
47
+ slot_idx = self.col_slot_idxs[col]
48
+ row[slot_idx] = val
49
+ return batch
50
+
51
+ def unmatched_rows(self) -> list[dict[str, Any]]:
52
+ """Return rows that didn't get used in the updates as a list of dicts compatible with TableVersion.insert()."""
53
+ result: list[dict[str, Any]] = []
54
+ key_cols = self.key_slot_idxs.keys()
55
+ for key_vals, col_vals in self.updates.items():
56
+ if key_vals in self.matched_key_vals:
57
+ continue
58
+ row = {col.name: val for col, val in zip(key_cols, key_vals)}
59
+ row.update({col.name: val for col, val in col_vals.items()})
60
+ result.append(row)
61
+ return result
@@ -13,30 +13,23 @@ import pixeltable.catalog as catalog
13
13
 
14
14
  _logger = logging.getLogger('pixeltable')
15
15
 
16
- class SqlScanNode(ExecNode):
17
- """Materializes data from the store via SQL
18
- """
16
+ class SqlNode(ExecNode):
17
+ """Materializes data from the store via a Select stmt."""
18
+
19
19
  def __init__(
20
20
  self, tbl: catalog.TableVersionPath, row_builder: exprs.RowBuilder,
21
- select_list: Iterable[exprs.Expr],
22
- where_clause: Optional[exprs.Expr] = None, filter: Optional[exprs.Expr] = None,
23
- order_by_items: Optional[List[Tuple[exprs.Expr, bool]]] = None,
24
- limit: int = 0, set_pk: bool = False, exact_version_only: Optional[List[catalog.TableVersion]] = None
21
+ select_list: Iterable[exprs.Expr], set_pk: bool = False
25
22
  ):
26
23
  """
24
+ Initialize self.stmt with expressions derived from select_list.
25
+
26
+ This only provides the select list. The subclass is responsible for the From clause and any additional clauses.
27
+
27
28
  Args:
28
29
  select_list: output of the query
29
- sql_where_clause: SQL Where clause
30
- filter: additional Where-clause predicate that can't be evaluated via SQL
31
- limit: max number of rows to return: 0 = no limit
32
30
  set_pk: if True, sets the primary for each DataRow
33
- exact_version_only: tables for which we only want to see rows created at the current version
34
31
  """
35
32
  # create Select stmt
36
- if order_by_items is None:
37
- order_by_items = []
38
- if exact_version_only is None:
39
- exact_version_only = []
40
33
  self.tbl = tbl
41
34
  target = tbl.tbl_version # the stored table we're scanning
42
35
  self.sql_exprs = exprs.ExprSet(select_list)
@@ -45,21 +38,15 @@ class SqlScanNode(ExecNode):
45
38
  sql_subexprs = iter_arg.subexprs(filter=lambda e: e.sql_expr() is not None, traverse_matches=False)
46
39
  [self.sql_exprs.append(e) for e in sql_subexprs]
47
40
  super().__init__(row_builder, self.sql_exprs, [], None) # we materialize self.sql_exprs
48
- self.filter = filter
49
- self.filter_eval_ctx = \
50
- row_builder.create_eval_ctx([filter], exclude=select_list) if filter is not None else None
51
- self.limit = limit
52
41
 
53
42
  # change rowid refs against a base table to rowid refs against the target table, so that we minimize
54
43
  # the number of tables that need to be joined to the target table
55
44
  for rowid_ref in [e for e in self.sql_exprs if isinstance(e, exprs.RowidRef)]:
56
45
  rowid_ref.set_tbl(tbl)
57
46
 
58
- where_clause_tbl_ids = where_clause.tbl_ids() if where_clause is not None else set()
59
- refd_tbl_ids = exprs.Expr.list_tbl_ids(self.sql_exprs) | where_clause_tbl_ids
60
47
  sql_select_list = [e.sql_expr() for e in self.sql_exprs]
61
48
  assert len(sql_select_list) == len(self.sql_exprs)
62
- assert all([e is not None for e in sql_select_list])
49
+ assert all(e is not None for e in sql_select_list)
63
50
  self.set_pk = set_pk
64
51
  self.num_pk_cols = 0
65
52
  if set_pk:
@@ -69,42 +56,12 @@ class SqlScanNode(ExecNode):
69
56
  sql_select_list += pk_columns
70
57
 
71
58
  self.stmt = sql.select(*sql_select_list)
72
- self.stmt = self.create_from_clause(
73
- tbl, self.stmt, refd_tbl_ids, exact_version_only={t.id for t in exact_version_only})
74
-
75
- # change rowid refs against a base table to rowid refs against the target table, so that we minimize
76
- # the number of tables that need to be joined to the target table
77
- for rowid_ref in [e for e, _ in order_by_items if isinstance(e, exprs.RowidRef)]:
78
- rowid_ref.set_tbl(tbl)
79
- order_by_clause: List[sql.ClauseElement] = []
80
- for e, asc in order_by_items:
81
- if isinstance(e, exprs.SimilarityExpr):
82
- order_by_clause.append(e.as_order_by_clause(asc))
83
- else:
84
- order_by_clause.append(e.sql_expr().desc() if not asc else e.sql_expr())
85
-
86
- if where_clause is not None:
87
- sql_where_clause = where_clause.sql_expr()
88
- assert sql_where_clause is not None
89
- self.stmt = self.stmt.where(sql_where_clause)
90
- if len(order_by_clause) > 0:
91
- self.stmt = self.stmt.order_by(*order_by_clause)
92
- elif target.id in row_builder.unstored_iter_args:
93
- # we are referencing unstored iter columns from this view and try to order by our primary key,
94
- # which ensures that iterators will see monotonically increasing pos values
95
- self.stmt = self.stmt.order_by(*self.tbl.store_tbl.rowid_columns())
96
- if limit != 0 and self.filter is None:
97
- # if we need to do post-SQL filtering, we can't use LIMIT
98
- self.stmt = self.stmt.limit(limit)
99
59
 
60
+ # additional state
100
61
  self.result_cursor: Optional[sql.engine.CursorResult] = None
101
-
102
- try:
103
- # log stmt, if possible
104
- stmt_str = str(self.stmt.compile(compile_kwargs={'literal_binds': True}))
105
- _logger.debug(f'SqlScanNode stmt:\n{stmt_str}')
106
- except Exception as e:
107
- pass
62
+ # the filter is provided by the subclass
63
+ self.filter: Optional[exprs.Expr] = None
64
+ self.filter_eval_ctx: Optional[exprs.EvalContext] = None
108
65
 
109
66
  @classmethod
110
67
  def create_from_clause(
@@ -224,3 +181,110 @@ class SqlScanNode(ExecNode):
224
181
  if self.result_cursor is not None:
225
182
  self.result_cursor.close()
226
183
 
184
+
185
+ class SqlScanNode(SqlNode):
186
+ """
187
+ Materializes data from the store via a Select stmt.
188
+
189
+ Supports filtering and ordering.
190
+ """
191
+ def __init__(
192
+ self, tbl: catalog.TableVersionPath, row_builder: exprs.RowBuilder,
193
+ select_list: Iterable[exprs.Expr],
194
+ where_clause: Optional[exprs.Expr] = None, filter: Optional[exprs.Expr] = None,
195
+ order_by_items: Optional[List[Tuple[exprs.Expr, bool]]] = None,
196
+ limit: int = 0, set_pk: bool = False, exact_version_only: Optional[List[catalog.TableVersion]] = None
197
+ ):
198
+ """
199
+ Args:
200
+ select_list: output of the query
201
+ sql_where_clause: SQL Where clause
202
+ filter: additional Where-clause predicate that can't be evaluated via SQL
203
+ limit: max number of rows to return: 0 = no limit
204
+ set_pk: if True, sets the primary for each DataRow
205
+ exact_version_only: tables for which we only want to see rows created at the current version
206
+ """
207
+ super().__init__(tbl, row_builder, select_list, set_pk=set_pk)
208
+ # create Select stmt
209
+ if order_by_items is None:
210
+ order_by_items = []
211
+ if exact_version_only is None:
212
+ exact_version_only = []
213
+ target = tbl.tbl_version # the stored table we're scanning
214
+ self.filter = filter
215
+ self.filter_eval_ctx = \
216
+ row_builder.create_eval_ctx([filter], exclude=select_list) if filter is not None else None
217
+ self.limit = limit
218
+
219
+ where_clause_tbl_ids = where_clause.tbl_ids() if where_clause is not None else set()
220
+ refd_tbl_ids = exprs.Expr.list_tbl_ids(self.sql_exprs) | where_clause_tbl_ids
221
+ self.stmt = self.create_from_clause(
222
+ tbl, self.stmt, refd_tbl_ids, exact_version_only={t.id for t in exact_version_only})
223
+
224
+ # change rowid refs against a base table to rowid refs against the target table, so that we minimize
225
+ # the number of tables that need to be joined to the target table
226
+ for rowid_ref in [e for e, _ in order_by_items if isinstance(e, exprs.RowidRef)]:
227
+ rowid_ref.set_tbl(tbl)
228
+ order_by_clause: List[sql.ClauseElement] = []
229
+ for e, asc in order_by_items:
230
+ if isinstance(e, exprs.SimilarityExpr):
231
+ order_by_clause.append(e.as_order_by_clause(asc))
232
+ else:
233
+ order_by_clause.append(e.sql_expr().desc() if not asc else e.sql_expr())
234
+
235
+ if where_clause is not None:
236
+ sql_where_clause = where_clause.sql_expr()
237
+ assert sql_where_clause is not None
238
+ self.stmt = self.stmt.where(sql_where_clause)
239
+ if len(order_by_clause) > 0:
240
+ self.stmt = self.stmt.order_by(*order_by_clause)
241
+ elif target.id in row_builder.unstored_iter_args:
242
+ # we are referencing unstored iter columns from this view and try to order by our primary key,
243
+ # which ensures that iterators will see monotonically increasing pos values
244
+ self.stmt = self.stmt.order_by(*self.tbl.store_tbl.rowid_columns())
245
+ if limit != 0 and self.filter is None:
246
+ # if we need to do post-SQL filtering, we can't use LIMIT
247
+ self.stmt = self.stmt.limit(limit)
248
+
249
+ try:
250
+ # log stmt, if possible
251
+ stmt_str = str(self.stmt.compile(compile_kwargs={'literal_binds': True}))
252
+ _logger.debug(f'SqlScanNode stmt:\n{stmt_str}')
253
+ except Exception as e:
254
+ pass
255
+
256
+
257
+ class SqlLookupNode(SqlNode):
258
+ """
259
+ Materializes data from the store via a Select stmt with a WHERE clause that matches a list of key values
260
+ """
261
+ def __init__(
262
+ self, tbl: catalog.TableVersionPath, row_builder: exprs.RowBuilder,
263
+ select_list: Iterable[exprs.Expr], sa_key_cols: list[sql.Column], key_vals: list[tuple],
264
+ ):
265
+ """
266
+ Args:
267
+ select_list: output of the query
268
+ sa_key_cols: list of key columns in the store table
269
+ key_vals: list of key values to look up
270
+ """
271
+ super().__init__(tbl, row_builder, select_list, set_pk=True)
272
+ target = tbl.tbl_version # the stored table we're scanning
273
+ refd_tbl_ids = exprs.Expr.list_tbl_ids(self.sql_exprs)
274
+ self.stmt = self.create_from_clause(tbl, self.stmt, refd_tbl_ids)
275
+ # Where clause: (key-col-1, key-col-2, ...) IN ((val-1, val-2, ...), ...)
276
+ self.where_clause = sql.tuple_(*sa_key_cols).in_(key_vals)
277
+ self.stmt = self.stmt.where(self.where_clause)
278
+
279
+ if target.id in row_builder.unstored_iter_args:
280
+ # we are referencing unstored iter columns from this view and try to order by our primary key,
281
+ # which ensures that iterators will see monotonically increasing pos values
282
+ self.stmt = self.stmt.order_by(*self.tbl.store_tbl.rowid_columns())
283
+
284
+ try:
285
+ # log stmt, if possible
286
+ stmt_str = str(self.stmt.compile(compile_kwargs={'literal_binds': True}))
287
+ _logger.debug(f'SqlLookupNode stmt:\n{stmt_str}')
288
+ except Exception as e:
289
+ pass
290
+
@@ -8,7 +8,6 @@ from .data_row import DataRow
8
8
  from .expr import Expr
9
9
  from .expr_set import ExprSet
10
10
  from .function_call import FunctionCall
11
- from .image_member_access import ImageMemberAccess
12
11
  from .in_predicate import InPredicate
13
12
  from .inline_array import InlineArray
14
13
  from .inline_dict import InlineDict
@@ -16,6 +15,7 @@ from .is_null import IsNull
16
15
  from .json_mapper import JsonMapper
17
16
  from .json_path import RELATIVE_PATH_ROOT, JsonPath
18
17
  from .literal import Literal
18
+ from .method_ref import MethodRef
19
19
  from .object_ref import ObjectRef
20
20
  from .row_builder import RowBuilder, ColumnSlotIdx, ExecProfile
21
21
  from .rowid_ref import RowidRef