pixeltable 0.2.16__py3-none-any.whl → 0.2.18__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of pixeltable might be problematic. Click here for more details.

Files changed (79) hide show
  1. pixeltable/__version__.py +2 -2
  2. pixeltable/catalog/catalog.py +8 -7
  3. pixeltable/catalog/column.py +11 -8
  4. pixeltable/catalog/insertable_table.py +1 -1
  5. pixeltable/catalog/path_dict.py +8 -6
  6. pixeltable/catalog/table.py +20 -13
  7. pixeltable/catalog/table_version.py +91 -54
  8. pixeltable/catalog/table_version_path.py +7 -9
  9. pixeltable/catalog/view.py +2 -1
  10. pixeltable/dataframe.py +1 -1
  11. pixeltable/env.py +173 -82
  12. pixeltable/exec/aggregation_node.py +2 -1
  13. pixeltable/exec/component_iteration_node.py +1 -1
  14. pixeltable/exec/sql_node.py +11 -8
  15. pixeltable/exprs/__init__.py +1 -0
  16. pixeltable/exprs/arithmetic_expr.py +4 -4
  17. pixeltable/exprs/array_slice.py +2 -1
  18. pixeltable/exprs/column_property_ref.py +9 -7
  19. pixeltable/exprs/column_ref.py +2 -1
  20. pixeltable/exprs/comparison.py +10 -7
  21. pixeltable/exprs/compound_predicate.py +3 -2
  22. pixeltable/exprs/data_row.py +19 -4
  23. pixeltable/exprs/expr.py +46 -35
  24. pixeltable/exprs/expr_set.py +32 -9
  25. pixeltable/exprs/function_call.py +56 -32
  26. pixeltable/exprs/in_predicate.py +3 -2
  27. pixeltable/exprs/inline_array.py +2 -1
  28. pixeltable/exprs/inline_dict.py +2 -1
  29. pixeltable/exprs/is_null.py +3 -2
  30. pixeltable/exprs/json_mapper.py +5 -4
  31. pixeltable/exprs/json_path.py +7 -1
  32. pixeltable/exprs/literal.py +34 -7
  33. pixeltable/exprs/method_ref.py +3 -3
  34. pixeltable/exprs/object_ref.py +6 -5
  35. pixeltable/exprs/row_builder.py +25 -17
  36. pixeltable/exprs/rowid_ref.py +2 -1
  37. pixeltable/exprs/similarity_expr.py +2 -1
  38. pixeltable/exprs/sql_element_cache.py +30 -0
  39. pixeltable/exprs/type_cast.py +3 -3
  40. pixeltable/exprs/variable.py +2 -1
  41. pixeltable/ext/functions/whisperx.py +4 -4
  42. pixeltable/ext/functions/yolox.py +6 -6
  43. pixeltable/func/aggregate_function.py +1 -0
  44. pixeltable/func/function.py +28 -4
  45. pixeltable/functions/__init__.py +4 -2
  46. pixeltable/functions/anthropic.py +107 -0
  47. pixeltable/functions/fireworks.py +2 -2
  48. pixeltable/functions/globals.py +6 -1
  49. pixeltable/functions/huggingface.py +2 -2
  50. pixeltable/functions/image.py +17 -2
  51. pixeltable/functions/json.py +5 -5
  52. pixeltable/functions/mistralai.py +188 -0
  53. pixeltable/functions/openai.py +6 -10
  54. pixeltable/functions/string.py +3 -2
  55. pixeltable/functions/timestamp.py +95 -7
  56. pixeltable/functions/together.py +5 -5
  57. pixeltable/functions/video.py +2 -2
  58. pixeltable/functions/vision.py +27 -17
  59. pixeltable/functions/whisper.py +1 -1
  60. pixeltable/io/hf_datasets.py +17 -15
  61. pixeltable/io/pandas.py +0 -2
  62. pixeltable/io/parquet.py +15 -14
  63. pixeltable/iterators/document.py +16 -15
  64. pixeltable/metadata/__init__.py +1 -1
  65. pixeltable/metadata/converters/convert_19.py +46 -0
  66. pixeltable/metadata/notes.py +1 -0
  67. pixeltable/metadata/schema.py +5 -4
  68. pixeltable/plan.py +100 -78
  69. pixeltable/store.py +5 -1
  70. pixeltable/tool/create_test_db_dump.py +4 -3
  71. pixeltable/type_system.py +12 -14
  72. pixeltable/utils/documents.py +45 -42
  73. pixeltable/utils/formatter.py +2 -2
  74. {pixeltable-0.2.16.dist-info → pixeltable-0.2.18.dist-info}/METADATA +79 -21
  75. pixeltable-0.2.18.dist-info/RECORD +147 -0
  76. pixeltable-0.2.16.dist-info/RECORD +0 -143
  77. {pixeltable-0.2.16.dist-info → pixeltable-0.2.18.dist-info}/LICENSE +0 -0
  78. {pixeltable-0.2.16.dist-info → pixeltable-0.2.18.dist-info}/WHEEL +0 -0
  79. {pixeltable-0.2.16.dist-info → pixeltable-0.2.18.dist-info}/entry_points.txt +0 -0
pixeltable/plan.py CHANGED
@@ -1,11 +1,11 @@
1
- from typing import Any, Iterable, List, Optional, Sequence, Set, Tuple
1
+ import itertools
2
+ from typing import Any, Iterable, Optional, Sequence
2
3
  from uuid import UUID
3
4
 
4
5
  import sqlalchemy as sql
5
6
 
6
7
  import pixeltable as pxt
7
8
  import pixeltable.exec as exec
8
- import pixeltable.func as func
9
9
  from pixeltable import catalog
10
10
  from pixeltable import exceptions as excs
11
11
  from pixeltable import exprs
@@ -14,11 +14,12 @@ from pixeltable import exprs
14
14
  def _is_agg_fn_call(e: exprs.Expr) -> bool:
15
15
  return isinstance(e, exprs.FunctionCall) and e.is_agg_fn_call and not e.is_window_fn_call
16
16
 
17
+
17
18
  def _get_combined_ordering(
18
- o1: List[Tuple[exprs.Expr, bool]], o2: List[Tuple[exprs.Expr, bool]]
19
- ) -> List[Tuple[exprs.Expr, bool]]:
19
+ o1: list[tuple[exprs.Expr, bool]], o2: list[tuple[exprs.Expr, bool]]
20
+ ) -> list[tuple[exprs.Expr, bool]]:
20
21
  """Returns an ordering that's compatible with both o1 and o2, or an empty list if no such ordering exists"""
21
- result: List[Tuple[exprs.Expr, bool]] = []
22
+ result: list[tuple[exprs.Expr, bool]] = []
22
23
  # determine combined ordering
23
24
  for (e1, asc1), (e2, asc2) in zip(o1, o2):
24
25
  if e1.id != e2.id:
@@ -36,18 +37,42 @@ def _get_combined_ordering(
36
37
  result.extend(o2[prefix_len:])
37
38
  return result
38
39
 
40
+
39
41
  class Analyzer:
40
- """Class to perform semantic analysis of a query and to store the analysis state"""
42
+ """
43
+ Performs semantic analysis of a query and stores the analysis state.
44
+ """
45
+
46
+ tbl: catalog.TableVersionPath
47
+ all_exprs: list[exprs.Expr]
48
+ select_list: list[exprs.Expr]
49
+ group_by_clause: list[exprs.Expr]
50
+ order_by_clause: list[tuple[exprs.Expr, bool]]
51
+
52
+ # exprs that can be expressed in SQL and are retrieved directly from the store
53
+ #sql_exprs: list[exprs.Expr]
54
+
55
+ sql_elements: exprs.SqlElementCache
56
+
57
+ # Where clause of the Select stmt of the SQL scan
58
+ sql_where_clause: Optional[exprs.Expr]
59
+
60
+ # filter predicate applied to output rows of the SQL scan
61
+ filter: Optional[exprs.Expr]
62
+
63
+ agg_fn_calls: list[exprs.FunctionCall]
64
+ agg_order_by: list[exprs.Expr]
41
65
 
42
66
  def __init__(
43
67
  self, tbl: catalog.TableVersionPath, select_list: Sequence[exprs.Expr],
44
- where_clause: Optional[exprs.Expr] = None, group_by_clause: Optional[List[exprs.Expr]] = None,
45
- order_by_clause: Optional[List[Tuple[exprs.Expr, bool]]] = None):
68
+ where_clause: Optional[exprs.Expr] = None, group_by_clause: Optional[list[exprs.Expr]] = None,
69
+ order_by_clause: Optional[list[tuple[exprs.Expr, bool]]] = None):
46
70
  if group_by_clause is None:
47
71
  group_by_clause = []
48
72
  if order_by_clause is None:
49
73
  order_by_clause = []
50
74
  self.tbl = tbl
75
+ self.sql_elements = exprs.SqlElementCache()
51
76
 
52
77
  # remove references to unstored computed cols
53
78
  self.select_list = [e.resolve_computed_cols() for e in select_list]
@@ -56,14 +81,10 @@ class Analyzer:
56
81
  self.group_by_clause = [e.resolve_computed_cols() for e in group_by_clause]
57
82
  self.order_by_clause = [(e.resolve_computed_cols(), asc) for e, asc in order_by_clause]
58
83
 
59
- # Where clause of the Select stmt of the SQL scan
60
- self.sql_where_clause: Optional[exprs.Expr] = None
61
- # filter predicate applied to output rows of the SQL scan
62
- self.filter: Optional[exprs.Expr] = None
63
- # not executable
64
- #self.similarity_clause: Optional[exprs.ImageSimilarityPredicate] = None
84
+ self.sql_where_clause = None
85
+ self.filter = None
65
86
  if where_clause is not None:
66
- where_clause_conjuncts, self.filter = where_clause.split_conjuncts(lambda e: e.sql_expr() is not None)
87
+ where_clause_conjuncts, self.filter = where_clause.split_conjuncts(self.sql_elements.contains)
67
88
  self.sql_where_clause = exprs.CompoundPredicate.make_conjunction(where_clause_conjuncts)
68
89
 
69
90
  # all exprs that are evaluated in Python; not executable
@@ -72,15 +93,8 @@ class Analyzer:
72
93
  self.all_exprs.extend(e for e, _ in self.order_by_clause)
73
94
  if self.filter is not None:
74
95
  self.all_exprs.append(self.filter)
75
- self.sql_exprs = list(exprs.Expr.list_subexprs(
76
- self.all_exprs, filter=lambda e: e.sql_expr() is not None, traverse_matches=False))
77
-
78
- # sql_exprs: exprs that can be expressed via SQL and are retrieved directly from the store
79
- # (we don't want to materialize literals via SQL, so we remove them here)
80
- self.sql_exprs = [e for e in self.sql_exprs if not isinstance(e, exprs.Literal)]
81
96
 
82
- self.agg_fn_calls: List[exprs.FunctionCall] = []
83
- self.agg_order_by: List[exprs.Expr] = []
97
+ self.agg_order_by = []
84
98
  self._analyze_agg()
85
99
 
86
100
  def _analyze_agg(self) -> None:
@@ -106,7 +120,7 @@ class Analyzer:
106
120
  # check that grouping exprs don't contain aggregates and can be expressed as SQL (we perform sort-based
107
121
  # aggregation and rely on the SqlScanNode returning data in the correct order)
108
122
  for e in self.group_by_clause:
109
- if e.sql_expr() is None:
123
+ if not self.sql_elements.contains(e):
110
124
  raise excs.Error(f'Invalid grouping expression, needs to be expressible in SQL: {e}')
111
125
  if e._contains(filter=lambda e: _is_agg_fn_call(e)):
112
126
  raise excs.Error(f'Grouping expression contains aggregate function: {e}')
@@ -132,7 +146,7 @@ class Analyzer:
132
146
  ))
133
147
  self.agg_order_by = order_by
134
148
 
135
- def _determine_agg_status(self, e: exprs.Expr, grouping_expr_ids: Set[int]) -> Tuple[bool, bool]:
149
+ def _determine_agg_status(self, e: exprs.Expr, grouping_expr_ids: set[int]) -> tuple[bool, bool]:
136
150
  """Determine whether expr is the input to or output of an aggregate function.
137
151
  Returns:
138
152
  (<is output>, <is input>)
@@ -167,17 +181,15 @@ class Analyzer:
167
181
  TODO: add EvalCtx for each expr list?
168
182
  """
169
183
  # maintain original composition of select list
170
- row_builder.substitute_exprs(self.select_list, remove_duplicates=False)
171
- row_builder.substitute_exprs(self.group_by_clause)
184
+ row_builder.set_slot_idxs(self.select_list, remove_duplicates=False)
185
+ row_builder.set_slot_idxs(self.group_by_clause)
172
186
  order_by_exprs = [e for e, _ in self.order_by_clause]
173
- row_builder.substitute_exprs(order_by_exprs)
174
- self.order_by_clause = [(e, asc) for e, (_, asc) in zip(order_by_exprs, self.order_by_clause)]
175
- row_builder.substitute_exprs(self.all_exprs)
176
- row_builder.substitute_exprs(self.sql_exprs)
187
+ row_builder.set_slot_idxs(order_by_exprs)
188
+ row_builder.set_slot_idxs(self.all_exprs)
177
189
  if self.filter is not None:
178
- self.filter = row_builder.unique_exprs[self.filter]
179
- row_builder.substitute_exprs(self.agg_fn_calls)
180
- row_builder.substitute_exprs(self.agg_order_by)
190
+ row_builder.set_slot_idxs([self.filter])
191
+ row_builder.set_slot_idxs(self.agg_fn_calls)
192
+ row_builder.set_slot_idxs(self.agg_order_by)
181
193
 
182
194
 
183
195
  class Planner:
@@ -187,12 +199,12 @@ class Planner:
187
199
  cls, tbl: catalog.TableVersionPath, where_clause: Optional[exprs.Expr] = None
188
200
  ) -> sql.Select:
189
201
  stmt = sql.select(sql.func.count())
190
- refd_tbl_ids: Set[UUID] = set()
202
+ refd_tbl_ids: set[UUID] = set()
191
203
  if where_clause is not None:
192
204
  analyzer = cls.analyze(tbl, where_clause)
193
205
  if analyzer.filter is not None:
194
206
  raise excs.Error(f'Filter {analyzer.filter} not expressible in SQL')
195
- clause_element = analyzer.sql_where_clause.sql_expr()
207
+ clause_element = analyzer.sql_where_clause.sql_expr(analyzer.sql_elements)
196
208
  assert clause_element is not None
197
209
  stmt = stmt.where(clause_element)
198
210
  refd_tbl_ids = where_clause.tbl_ids()
@@ -267,9 +279,9 @@ class Planner:
267
279
  def create_update_plan(
268
280
  cls, tbl: catalog.TableVersionPath,
269
281
  update_targets: dict[catalog.Column, exprs.Expr],
270
- recompute_targets: List[catalog.Column],
282
+ recompute_targets: list[catalog.Column],
271
283
  where_clause: Optional[exprs.Expr], cascade: bool
272
- ) -> Tuple[exec.ExecNode, List[str], List[catalog.Column]]:
284
+ ) -> tuple[exec.ExecNode, list[str], list[catalog.Column]]:
273
285
  """Creates a plan to materialize updated rows.
274
286
  The plan:
275
287
  - retrieves rows that are visible at the current version of the table
@@ -310,7 +322,7 @@ class Planner:
310
322
  select_list.extend(recomputed_exprs)
311
323
 
312
324
  # we need to retrieve the PK columns of the existing rows
313
- plan = cls.create_query_plan(tbl, select_list, where_clause=where_clause, with_pk=True, ignore_errors=True)
325
+ plan = cls.create_query_plan(tbl, select_list, where_clause=where_clause, ignore_errors=True)
314
326
  all_base_cols = copied_cols + updated_cols + list(recomputed_base_cols) # same order as select_list
315
327
  # update row builder with column information
316
328
  for i, col in enumerate(all_base_cols):
@@ -356,7 +368,7 @@ class Planner:
356
368
  copied_cols = [
357
369
  col for col in target.cols if col.is_stored and not col in updated_cols and not col in recomputed_base_cols
358
370
  ]
359
- select_list = [exprs.ColumnRef(col) for col in copied_cols]
371
+ select_list: list[exprs.Expr] = [exprs.ColumnRef(col) for col in copied_cols]
360
372
  select_list.extend(exprs.ColumnRef(col) for col in updated_cols)
361
373
 
362
374
  recomputed_exprs = \
@@ -369,19 +381,21 @@ class Planner:
369
381
  # - RowUpdateNode to update the retrieved rows
370
382
  # - ExprEvalNode to evaluate the remaining output exprs
371
383
  analyzer = Analyzer(tbl, select_list)
372
- row_builder = exprs.RowBuilder(analyzer.all_exprs, [], analyzer.sql_exprs)
384
+ sql_exprs = list(exprs.Expr.list_subexprs(
385
+ analyzer.all_exprs, filter=analyzer.sql_elements.contains, traverse_matches=False))
386
+ row_builder = exprs.RowBuilder(analyzer.all_exprs, [], sql_exprs)
373
387
  analyzer.finalize(row_builder)
374
- sql_lookup_node = exec.SqlLookupNode(tbl, row_builder, analyzer.sql_exprs, sa_key_cols, key_vals)
388
+ sql_lookup_node = exec.SqlLookupNode(tbl, row_builder, sql_exprs, sa_key_cols, key_vals)
375
389
  delete_where_clause = sql_lookup_node.where_clause
376
390
  col_vals = [{col: row[col].val for col in updated_cols} for row in batch]
377
391
  row_update_node = exec.RowUpdateNode(tbl, key_vals, len(rowids) > 0, col_vals, row_builder, sql_lookup_node)
378
392
  plan: exec.ExecNode = row_update_node
379
- if not cls._is_contained_in(analyzer.select_list, analyzer.sql_exprs):
393
+ if not cls._is_contained_in(analyzer.select_list, sql_exprs):
380
394
  # we need an ExprEvalNode to evaluate the remaining output exprs
381
- plan = exec.ExprEvalNode(row_builder, analyzer.select_list, analyzer.sql_exprs, input=plan)
395
+ plan = exec.ExprEvalNode(row_builder, analyzer.select_list, sql_exprs, input=plan)
382
396
  # update row builder with column information
383
397
  all_base_cols = copied_cols + list(updated_cols) + list(recomputed_base_cols) # same order as select_list
384
- row_builder.substitute_exprs(select_list, remove_duplicates=False)
398
+ row_builder.set_slot_idxs(select_list, remove_duplicates=False)
385
399
  for i, col in enumerate(all_base_cols):
386
400
  plan.row_builder.add_table_column(col, select_list[i].slot_idx)
387
401
 
@@ -396,7 +410,7 @@ class Planner:
396
410
 
397
411
  @classmethod
398
412
  def create_view_update_plan(
399
- cls, view: catalog.TableVersionPath, recompute_targets: List[catalog.Column]
413
+ cls, view: catalog.TableVersionPath, recompute_targets: list[catalog.Column]
400
414
  ) -> exec.ExecNode:
401
415
  """Creates a plan to materialize updated rows for a view, given that the base table has been updated.
402
416
  The plan:
@@ -427,8 +441,7 @@ class Planner:
427
441
 
428
442
  # we need to retrieve the PK columns of the existing rows
429
443
  plan = cls.create_query_plan(
430
- view, select_list, where_clause=target.predicate, with_pk=True, ignore_errors=True,
431
- exact_version_only=view.get_bases())
444
+ view, select_list, where_clause=target.predicate, ignore_errors=True, exact_version_only=view.get_bases())
432
445
  for i, col in enumerate(copied_cols + list(recomputed_cols)): # same order as select_list
433
446
  plan.row_builder.add_table_column(col, select_list[i].slot_idx)
434
447
  # TODO: avoid duplication with view_load_plan() logic (where does this belong?)
@@ -440,7 +453,7 @@ class Planner:
440
453
  @classmethod
441
454
  def create_view_load_plan(
442
455
  cls, view: catalog.TableVersionPath, propagates_insert: bool = False
443
- ) -> Tuple[exec.ExecNode, int]:
456
+ ) -> tuple[exec.ExecNode, int]:
444
457
  """Creates a query plan for populating a view.
445
458
 
446
459
  Args:
@@ -459,7 +472,6 @@ class Planner:
459
472
  # - we can ignore stored non-computed columns because they have a default value that is supplied directly by
460
473
  # the store
461
474
  target = view.tbl_version # the one we need to populate
462
- #stored_cols = [c for c in target.cols if c.is_stored and (c.is_computed or target.is_iterator_column(c))]
463
475
  stored_cols = [c for c in target.cols if c.is_stored]
464
476
  # 2. for component views: iterator args
465
477
  iterator_args = [target.iterator_args] if target.iterator_args is not None else []
@@ -477,8 +489,9 @@ class Planner:
477
489
  ]
478
490
  # if we're propagating an insert, we only want to see those base rows that were created for the current version
479
491
  base_analyzer = Analyzer(view, base_output_exprs, where_clause=target.predicate)
492
+ base_eval_ctx = row_builder.create_eval_ctx(base_analyzer.all_exprs)
480
493
  plan = cls._create_query_plan(
481
- view.base, row_builder=row_builder, analyzer=base_analyzer, with_pk=True,
494
+ view.base, row_builder=row_builder, analyzer=base_analyzer, eval_ctx=base_eval_ctx, with_pk=True,
482
495
  exact_version_only=view.get_bases() if propagates_insert else [])
483
496
  exec_ctx = plan.ctx
484
497
  if target.is_component_view():
@@ -494,9 +507,9 @@ class Planner:
494
507
  return plan, len(row_builder.default_eval_ctx.target_exprs)
495
508
 
496
509
  @classmethod
497
- def _determine_ordering(cls, analyzer: Analyzer) -> List[Tuple[exprs.Expr, bool]]:
510
+ def _determine_ordering(cls, analyzer: Analyzer) -> list[tuple[exprs.Expr, bool]]:
498
511
  """Returns the exprs for the ORDER BY clause of the SqlScanNode"""
499
- order_by_items: List[Tuple[exprs.Expr, Optional[bool]]] = []
512
+ order_by_items: list[tuple[exprs.Expr, Optional[bool]]] = []
500
513
  order_by_origin: Optional[exprs.Expr] = None # the expr that determines the ordering
501
514
 
502
515
 
@@ -576,7 +589,7 @@ class Planner:
576
589
  order_by_origin = unstored_iter_col_refs[0]
577
590
 
578
591
  for e in [e for e, _ in order_by_items]:
579
- if e.sql_expr() is None:
592
+ if not analyzer.sql_elements.contains(e):
580
593
  raise excs.Error(f'order_by element cannot be expressed in SQL: {e}')
581
594
  # we do ascending ordering by default, if not specified otherwise
582
595
  order_by_items = [(e, True) if asc is None else (e, asc) for e, asc in order_by_items]
@@ -590,7 +603,7 @@ class Planner:
590
603
 
591
604
  @classmethod
592
605
  def _insert_prefetch_node(
593
- cls, tbl_id: UUID, output_exprs: List[exprs.Expr], row_builder: exprs.RowBuilder, input: exec.ExecNode
606
+ cls, tbl_id: UUID, output_exprs: list[exprs.Expr], row_builder: exprs.RowBuilder, input: exec.ExecNode
594
607
  ) -> exec.ExecNode:
595
608
  """Returns a CachePrefetchNode into the plan if needed, otherwise returns input"""
596
609
  # we prefetch external files for all media ColumnRefs, even those that aren't part of the dependencies
@@ -608,10 +621,10 @@ class Planner:
608
621
 
609
622
  @classmethod
610
623
  def create_query_plan(
611
- cls, tbl: catalog.TableVersionPath, select_list: Optional[List[exprs.Expr]] = None,
612
- where_clause: Optional[exprs.Expr] = None, group_by_clause: Optional[List[exprs.Expr]] = None,
613
- order_by_clause: Optional[List[Tuple[exprs.Expr, bool]]] = None, limit: Optional[int] = None,
614
- with_pk: bool = False, ignore_errors: bool = False, exact_version_only: Optional[List[catalog.TableVersion]] = None
624
+ cls, tbl: catalog.TableVersionPath, select_list: Optional[list[exprs.Expr]] = None,
625
+ where_clause: Optional[exprs.Expr] = None, group_by_clause: Optional[list[exprs.Expr]] = None,
626
+ order_by_clause: Optional[list[tuple[exprs.Expr, bool]]] = None, limit: Optional[int] = None,
627
+ ignore_errors: bool = False, exact_version_only: Optional[list[catalog.TableVersion]] = None
615
628
  ) -> exec.ExecNode:
616
629
  """Return plan for executing a query.
617
630
  Updates 'select_list' in place to make it executable.
@@ -628,13 +641,19 @@ class Planner:
628
641
  analyzer = Analyzer(
629
642
  tbl, select_list, where_clause=where_clause, group_by_clause=group_by_clause,
630
643
  order_by_clause=order_by_clause)
631
- row_builder = exprs.RowBuilder(analyzer.all_exprs, [], analyzer.sql_exprs)
644
+ input_exprs = exprs.ExprSet(exprs.Expr.list_subexprs(
645
+ analyzer.all_exprs, filter=analyzer.sql_elements.contains, traverse_matches=False))
646
+ # remove Literals from sql_exprs, we don't want to materialize them via a Select
647
+ input_exprs = exprs.ExprSet(e for e in input_exprs if not isinstance(e, exprs.Literal))
648
+ row_builder = exprs.RowBuilder(analyzer.all_exprs, [], input_exprs)
632
649
 
633
650
  analyzer.finalize(row_builder)
634
651
  # select_list: we need to materialize everything that's been collected
635
652
  # with_pk: for now, we always retrieve the PK, because we need it for the file cache
653
+ eval_ctx = row_builder.create_eval_ctx(analyzer.all_exprs)
636
654
  plan = cls._create_query_plan(
637
- tbl, row_builder, analyzer=analyzer, limit=limit, with_pk=True, exact_version_only=exact_version_only)
655
+ tbl, row_builder, analyzer=analyzer, eval_ctx=eval_ctx, limit=limit, with_pk=True,
656
+ exact_version_only=exact_version_only)
638
657
  plan.ctx.ignore_errors = ignore_errors
639
658
  select_list.clear()
640
659
  select_list.extend(analyzer.select_list)
@@ -643,9 +662,13 @@ class Planner:
643
662
  @classmethod
644
663
  def _create_query_plan(
645
664
  cls, tbl: catalog.TableVersionPath, row_builder: exprs.RowBuilder, analyzer: Analyzer,
646
- limit: Optional[int] = None, with_pk: bool = False, exact_version_only: Optional[List[catalog.TableVersion]] = None
665
+ eval_ctx: exprs.RowBuilder.EvalCtx,
666
+ limit: Optional[int] = None, with_pk: bool = False,
667
+ exact_version_only: Optional[list[catalog.TableVersion]] = None
647
668
  ) -> exec.ExecNode:
648
669
  """
670
+ Create plan to materialize eval_ctx.
671
+
649
672
  Args:
650
673
  plan_target: if not None, generate a plan that materializes only expression that can be evaluted
651
674
  in the context of that table version (eg, if 'tbl' is a view, 'plan_target' might be the base)
@@ -659,9 +682,11 @@ class Planner:
659
682
 
660
683
  order_by_items = cls._determine_ordering(analyzer)
661
684
  sql_limit = 0 if is_agg_query else limit # if we're aggregating, the limit applies to the agg output
662
- sql_select_list = analyzer.sql_exprs.copy()
685
+ sql_exprs = [
686
+ e for e in eval_ctx.exprs if analyzer.sql_elements.contains(e) and not isinstance(e, exprs.Literal)
687
+ ]
663
688
  plan = exec.SqlScanNode(
664
- tbl, row_builder, select_list=sql_select_list, where_clause=analyzer.sql_where_clause,
689
+ tbl, row_builder, select_list=sql_exprs, where_clause=analyzer.sql_where_clause,
665
690
  filter=analyzer.filter, order_by_items=order_by_items,
666
691
  limit=sql_limit, set_pk=with_pk, exact_version_only=exact_version_only)
667
692
  plan = cls._insert_prefetch_node(tbl.tbl_version.id, analyzer.select_list, row_builder, plan)
@@ -671,29 +696,26 @@ class Planner:
671
696
  # args of the agg fn calls
672
697
  agg_input = exprs.ExprSet(analyzer.group_by_clause.copy())
673
698
  for fn_call in analyzer.agg_fn_calls:
674
- agg_input.extend(fn_call.components)
675
- if not cls._is_contained_in(agg_input, analyzer.sql_exprs):
699
+ agg_input.update(fn_call.components)
700
+ if not exprs.ExprSet(sql_exprs).issuperset(agg_input):
676
701
  # we need an ExprEvalNode
677
- plan = exec.ExprEvalNode(row_builder, agg_input, analyzer.sql_exprs, input=plan)
702
+ plan = exec.ExprEvalNode(row_builder, agg_input, sql_exprs, input=plan)
678
703
 
679
704
  # batch size for aggregation input: this could be the entire table, so we need to divide it into
680
705
  # smaller batches; at the same time, we need to make the batches large enough to amortize the
681
706
  # function call overhead
682
- # TODO: increase this if we have NOS calls in order to reduce the cost of switching models, but take
683
- # into account the amount of memory needed for intermediate images
684
707
  ctx.batch_size = 16
685
708
 
686
709
  plan = exec.AggregationNode(
687
710
  tbl.tbl_version, row_builder, analyzer.group_by_clause, analyzer.agg_fn_calls, agg_input, input=plan)
688
- agg_output = analyzer.group_by_clause + analyzer.agg_fn_calls
689
- if not cls._is_contained_in(analyzer.select_list, agg_output):
711
+ agg_output = exprs.ExprSet(itertools.chain(analyzer.group_by_clause, analyzer.agg_fn_calls))
712
+ if not agg_output.issuperset(exprs.ExprSet(eval_ctx.target_exprs)):
690
713
  # we need an ExprEvalNode to evaluate the remaining output exprs
691
- plan = exec.ExprEvalNode(
692
- row_builder, analyzer.select_list, agg_output, input=plan)
714
+ plan = exec.ExprEvalNode(row_builder, eval_ctx.target_exprs, agg_output, input=plan)
693
715
  else:
694
- if not cls._is_contained_in(analyzer.select_list, analyzer.sql_exprs):
716
+ if not exprs.ExprSet(sql_exprs).issuperset(exprs.ExprSet(eval_ctx.target_exprs)):
695
717
  # we need an ExprEvalNode to evaluate the remaining output exprs
696
- plan = exec.ExprEvalNode(row_builder, analyzer.select_list, analyzer.sql_exprs, input=plan)
718
+ plan = exec.ExprEvalNode(row_builder, eval_ctx.target_exprs, sql_exprs, input=plan)
697
719
  # we're returning everything to the user, so we might as well do it in a single batch
698
720
  ctx.batch_size = 0
699
721
 
@@ -707,17 +729,17 @@ class Planner:
707
729
  @classmethod
708
730
  def create_add_column_plan(
709
731
  cls, tbl: catalog.TableVersionPath, col: catalog.Column
710
- ) -> Tuple[exec.ExecNode, Optional[int]]:
732
+ ) -> tuple[exec.ExecNode, Optional[int]]:
711
733
  """Creates a plan for InsertableTable.add_column()
712
734
  Returns:
713
735
  plan: the plan to execute
714
736
  value_expr slot idx for the plan output (for computed cols)
715
737
  """
716
738
  assert isinstance(tbl, catalog.TableVersionPath)
717
- index_info: List[Tuple[catalog.Column, func.Function]] = []
718
739
  row_builder = exprs.RowBuilder(output_exprs=[], columns=[col], input_exprs=[])
719
740
  analyzer = Analyzer(tbl, row_builder.default_eval_ctx.target_exprs)
720
- plan = cls._create_query_plan(tbl, row_builder=row_builder, analyzer=analyzer, with_pk=True)
741
+ plan = cls._create_query_plan(
742
+ tbl, row_builder=row_builder, analyzer=analyzer, eval_ctx=row_builder.default_eval_ctx, with_pk=True)
721
743
  plan.ctx.batch_size = 16
722
744
  plan.ctx.show_pbar = True
723
745
  plan.ctx.ignore_errors = True
pixeltable/store.py CHANGED
@@ -53,7 +53,6 @@ class StoreBase:
53
53
  def _create_rowid_columns(self) -> List[sql.Column]:
54
54
  """Create and return rowid columns"""
55
55
 
56
- @abc.abstractmethod
57
56
  def _create_system_columns(self) -> List[sql.Column]:
58
57
  """Create and return system columns"""
59
58
  rowid_cols = self._create_rowid_columns()
@@ -432,6 +431,11 @@ class StoreComponentView(StoreView):
432
431
 
433
432
  PK: now also includes pos, the position returned by the ComponentIterator for the base row identified by base_rowid
434
433
  """
434
+
435
+ rowid_cols: list[sql.Column]
436
+ pos_col: sql.Column
437
+ pos_col_idx: int
438
+
435
439
  def __init__(self, catalog_view: catalog.TableVersion):
436
440
  super().__init__(catalog_view)
437
441
 
@@ -5,6 +5,7 @@ import os
5
5
  import pathlib
6
6
  import subprocess
7
7
  from typing import Any
8
+ from zoneinfo import ZoneInfo
8
9
 
9
10
  import pixeltable_pgserver
10
11
  import toml
@@ -15,8 +16,7 @@ from pixeltable.env import Env
15
16
  from pixeltable.func import Batch
16
17
  from pixeltable.io.external_store import Project
17
18
  from pixeltable.tool import embed_udf
18
- from pixeltable.type_system import \
19
- StringType, IntType, FloatType, BoolType, TimestampType, JsonType, ImageType
19
+ from pixeltable.type_system import BoolType, FloatType, ImageType, IntType, JsonType, StringType, TimestampType
20
20
 
21
21
  _logger = logging.getLogger('pixeltable')
22
22
 
@@ -248,7 +248,8 @@ class Dumper:
248
248
  add_column('str_const', 'str')
249
249
  add_column('int_const', 5)
250
250
  add_column('float_const', 5.0)
251
- add_column('timestamp_const_1', datetime.datetime.now(tz=datetime.timezone.utc))
251
+ add_column('timestamp_const_1', datetime.datetime.now())
252
+ add_column('timestamp_const_2', datetime.datetime.now().astimezone(ZoneInfo('America/Anchorage')))
252
253
 
253
254
  # type_cast
254
255
  add_column('astype', t.c2.astype(FloatType()))
pixeltable/type_system.py CHANGED
@@ -15,7 +15,8 @@ import numpy as np
15
15
  import PIL.Image
16
16
  import sqlalchemy as sql
17
17
 
18
- from pixeltable import exceptions as excs
18
+ import pixeltable.exceptions as excs
19
+ from pixeltable.env import Env
19
20
 
20
21
 
21
22
  class ColumnType:
@@ -99,7 +100,7 @@ class ColumnType:
99
100
  if nullable == self.nullable:
100
101
  return self
101
102
  else:
102
- return self.__class__(nullable=nullable)
103
+ return self.__class__(nullable=nullable) # type: ignore[call-arg]
103
104
 
104
105
  @classmethod
105
106
  def serialize_list(cls, type_list: List[ColumnType]) -> str:
@@ -474,7 +475,7 @@ class TimestampType(ColumnType):
474
475
  super().__init__(self.Type.TIMESTAMP, nullable=nullable)
475
476
 
476
477
  def to_sa_type(self) -> sql.types.TypeEngine:
477
- return sql.TIMESTAMP()
478
+ return sql.TIMESTAMP(timezone=True)
478
479
 
479
480
  def _validate_literal(self, val: Any) -> None:
480
481
  if not isinstance(val, datetime.datetime):
@@ -496,7 +497,7 @@ class JsonType(ColumnType):
496
497
  return JsonType(self.type_spec, nullable=nullable)
497
498
 
498
499
  def matches(self, other: ColumnType) -> bool:
499
- return other._type == self.Type.JSON and self.type_spec == other.type_spec
500
+ return isinstance(other, JsonType) and self.type_spec == other.type_spec
500
501
 
501
502
  def supertype(self, other: ColumnType) -> Optional[JsonType]:
502
503
  if not isinstance(other, JsonType):
@@ -558,7 +559,7 @@ class JsonType(ColumnType):
558
559
  raise TypeError(f'That literal is not a valid Pixeltable JSON object: {val}')
559
560
 
560
561
  @classmethod
561
- def __is_valid_literal(cls, val: Any) -> None:
562
+ def __is_valid_literal(cls, val: Any) -> bool:
562
563
  if val is None or isinstance(val, (str, int, float, bool)):
563
564
  return True
564
565
  if isinstance(val, (list, tuple)):
@@ -585,7 +586,7 @@ class ArrayType(ColumnType):
585
586
  return ArrayType(self.shape, self.pxt_dtype, nullable=nullable)
586
587
 
587
588
  def matches(self, other: ColumnType) -> bool:
588
- return other._type == self.Type.ARRAY and self.shape == other.shape and self.dtype == other.dtype
589
+ return isinstance(other, ArrayType) and self.shape == other.shape and self.dtype == other.dtype
589
590
 
590
591
  def supertype(self, other: ColumnType) -> Optional[ArrayType]:
591
592
  if not isinstance(other, ArrayType):
@@ -718,7 +719,7 @@ class ImageType(ColumnType):
718
719
 
719
720
  def matches(self, other: ColumnType) -> bool:
720
721
  return (
721
- other._type == self.Type.IMAGE
722
+ isinstance(other, ImageType)
722
723
  and self.width == other.width
723
724
  and self.height == other.height
724
725
  and self.mode == other.mode
@@ -848,7 +849,7 @@ class DocumentType(ColumnType):
848
849
  return DocumentType(doc_formats=self.doc_formats, nullable=nullable)
849
850
 
850
851
  def matches(self, other: ColumnType) -> bool:
851
- return other._type == self.Type.DOCUMENT and self._doc_formats == other._doc_formats
852
+ return isinstance(other, DocumentType) and self._doc_formats == other._doc_formats
852
853
 
853
854
  def to_sa_type(self) -> sql.types.TypeEngine:
854
855
  # stored as a file path
@@ -860,9 +861,6 @@ class DocumentType(ColumnType):
860
861
  def validate_media(self, val: Any) -> None:
861
862
  assert isinstance(val, str)
862
863
  from pixeltable.utils.documents import get_document_handle
863
- try:
864
- dh = get_document_handle(val)
865
- if dh is None:
866
- raise excs.Error(f'Not a recognized document format: {val}')
867
- except Exception as e:
868
- raise excs.Error(f'Not a recognized document format: {val}') from None
864
+ dh = get_document_handle(val)
865
+ if dh is None:
866
+ raise excs.Error(f'Not a recognized document format: {val}')