pixeltable 0.3.2__py3-none-any.whl → 0.3.3__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of pixeltable might be problematic. Click here for more details.

Files changed (147) hide show
  1. pixeltable/__init__.py +64 -11
  2. pixeltable/__version__.py +2 -2
  3. pixeltable/catalog/__init__.py +1 -1
  4. pixeltable/catalog/catalog.py +50 -27
  5. pixeltable/catalog/column.py +27 -11
  6. pixeltable/catalog/dir.py +6 -4
  7. pixeltable/catalog/globals.py +8 -1
  8. pixeltable/catalog/insertable_table.py +22 -12
  9. pixeltable/catalog/named_function.py +10 -6
  10. pixeltable/catalog/path.py +3 -2
  11. pixeltable/catalog/path_dict.py +8 -6
  12. pixeltable/catalog/schema_object.py +2 -1
  13. pixeltable/catalog/table.py +121 -101
  14. pixeltable/catalog/table_version.py +291 -142
  15. pixeltable/catalog/table_version_path.py +8 -5
  16. pixeltable/catalog/view.py +67 -26
  17. pixeltable/dataframe.py +102 -72
  18. pixeltable/env.py +20 -21
  19. pixeltable/exec/__init__.py +2 -2
  20. pixeltable/exec/aggregation_node.py +10 -4
  21. pixeltable/exec/cache_prefetch_node.py +5 -3
  22. pixeltable/exec/component_iteration_node.py +9 -8
  23. pixeltable/exec/data_row_batch.py +21 -10
  24. pixeltable/exec/exec_context.py +10 -3
  25. pixeltable/exec/exec_node.py +23 -12
  26. pixeltable/exec/expr_eval/evaluators.py +13 -7
  27. pixeltable/exec/expr_eval/expr_eval_node.py +24 -15
  28. pixeltable/exec/expr_eval/globals.py +30 -7
  29. pixeltable/exec/expr_eval/row_buffer.py +5 -6
  30. pixeltable/exec/expr_eval/schedulers.py +151 -31
  31. pixeltable/exec/in_memory_data_node.py +8 -7
  32. pixeltable/exec/row_update_node.py +15 -5
  33. pixeltable/exec/sql_node.py +56 -27
  34. pixeltable/exprs/__init__.py +2 -2
  35. pixeltable/exprs/arithmetic_expr.py +57 -26
  36. pixeltable/exprs/array_slice.py +1 -1
  37. pixeltable/exprs/column_property_ref.py +2 -1
  38. pixeltable/exprs/column_ref.py +20 -15
  39. pixeltable/exprs/comparison.py +6 -2
  40. pixeltable/exprs/compound_predicate.py +1 -3
  41. pixeltable/exprs/data_row.py +2 -2
  42. pixeltable/exprs/expr.py +101 -72
  43. pixeltable/exprs/expr_dict.py +2 -1
  44. pixeltable/exprs/expr_set.py +3 -1
  45. pixeltable/exprs/function_call.py +39 -41
  46. pixeltable/exprs/globals.py +1 -0
  47. pixeltable/exprs/in_predicate.py +2 -2
  48. pixeltable/exprs/inline_expr.py +20 -17
  49. pixeltable/exprs/json_mapper.py +4 -2
  50. pixeltable/exprs/json_path.py +12 -18
  51. pixeltable/exprs/literal.py +5 -9
  52. pixeltable/exprs/method_ref.py +1 -0
  53. pixeltable/exprs/object_ref.py +1 -1
  54. pixeltable/exprs/row_builder.py +32 -17
  55. pixeltable/exprs/rowid_ref.py +14 -5
  56. pixeltable/exprs/similarity_expr.py +11 -6
  57. pixeltable/exprs/sql_element_cache.py +1 -1
  58. pixeltable/exprs/type_cast.py +24 -9
  59. pixeltable/ext/__init__.py +1 -0
  60. pixeltable/ext/functions/__init__.py +1 -0
  61. pixeltable/ext/functions/whisperx.py +2 -2
  62. pixeltable/ext/functions/yolox.py +11 -11
  63. pixeltable/func/aggregate_function.py +17 -13
  64. pixeltable/func/callable_function.py +6 -6
  65. pixeltable/func/expr_template_function.py +15 -14
  66. pixeltable/func/function.py +16 -16
  67. pixeltable/func/function_registry.py +11 -8
  68. pixeltable/func/globals.py +4 -2
  69. pixeltable/func/query_template_function.py +12 -13
  70. pixeltable/func/signature.py +18 -9
  71. pixeltable/func/tools.py +10 -17
  72. pixeltable/func/udf.py +106 -11
  73. pixeltable/functions/__init__.py +21 -2
  74. pixeltable/functions/anthropic.py +16 -12
  75. pixeltable/functions/fireworks.py +63 -5
  76. pixeltable/functions/gemini.py +13 -3
  77. pixeltable/functions/globals.py +18 -6
  78. pixeltable/functions/huggingface.py +20 -38
  79. pixeltable/functions/image.py +7 -3
  80. pixeltable/functions/json.py +1 -0
  81. pixeltable/functions/llama_cpp.py +1 -4
  82. pixeltable/functions/mistralai.py +31 -20
  83. pixeltable/functions/ollama.py +4 -18
  84. pixeltable/functions/openai.py +201 -108
  85. pixeltable/functions/replicate.py +11 -10
  86. pixeltable/functions/string.py +70 -7
  87. pixeltable/functions/timestamp.py +21 -8
  88. pixeltable/functions/together.py +66 -52
  89. pixeltable/functions/video.py +1 -0
  90. pixeltable/functions/vision.py +14 -11
  91. pixeltable/functions/whisper.py +2 -1
  92. pixeltable/globals.py +60 -26
  93. pixeltable/index/__init__.py +1 -1
  94. pixeltable/index/btree.py +5 -3
  95. pixeltable/index/embedding_index.py +15 -14
  96. pixeltable/io/__init__.py +1 -1
  97. pixeltable/io/external_store.py +30 -25
  98. pixeltable/io/fiftyone.py +6 -14
  99. pixeltable/io/globals.py +33 -27
  100. pixeltable/io/hf_datasets.py +2 -1
  101. pixeltable/io/label_studio.py +77 -68
  102. pixeltable/io/pandas.py +33 -9
  103. pixeltable/io/parquet.py +9 -12
  104. pixeltable/iterators/__init__.py +1 -0
  105. pixeltable/iterators/audio.py +205 -0
  106. pixeltable/iterators/document.py +19 -8
  107. pixeltable/iterators/image.py +6 -24
  108. pixeltable/iterators/string.py +3 -6
  109. pixeltable/iterators/video.py +1 -7
  110. pixeltable/metadata/__init__.py +7 -1
  111. pixeltable/metadata/converters/convert_10.py +2 -2
  112. pixeltable/metadata/converters/convert_15.py +1 -5
  113. pixeltable/metadata/converters/convert_16.py +2 -4
  114. pixeltable/metadata/converters/convert_17.py +2 -4
  115. pixeltable/metadata/converters/convert_18.py +2 -4
  116. pixeltable/metadata/converters/convert_19.py +2 -5
  117. pixeltable/metadata/converters/convert_20.py +1 -4
  118. pixeltable/metadata/converters/convert_21.py +4 -6
  119. pixeltable/metadata/converters/convert_22.py +1 -0
  120. pixeltable/metadata/converters/convert_23.py +5 -5
  121. pixeltable/metadata/converters/convert_24.py +12 -13
  122. pixeltable/metadata/converters/convert_26.py +23 -0
  123. pixeltable/metadata/converters/util.py +3 -4
  124. pixeltable/metadata/notes.py +1 -0
  125. pixeltable/metadata/schema.py +13 -2
  126. pixeltable/plan.py +173 -98
  127. pixeltable/store.py +42 -26
  128. pixeltable/type_system.py +62 -54
  129. pixeltable/utils/arrow.py +1 -2
  130. pixeltable/utils/coco.py +16 -17
  131. pixeltable/utils/code.py +1 -1
  132. pixeltable/utils/console_output.py +6 -3
  133. pixeltable/utils/description_helper.py +7 -7
  134. pixeltable/utils/documents.py +3 -1
  135. pixeltable/utils/filecache.py +12 -7
  136. pixeltable/utils/http_server.py +9 -8
  137. pixeltable/utils/media_store.py +2 -1
  138. pixeltable/utils/pytorch.py +11 -14
  139. pixeltable/utils/s3.py +1 -0
  140. pixeltable/utils/sql.py +1 -0
  141. pixeltable/utils/transactional_directory.py +2 -2
  142. {pixeltable-0.3.2.dist-info → pixeltable-0.3.3.dist-info}/METADATA +6 -8
  143. pixeltable-0.3.3.dist-info/RECORD +163 -0
  144. pixeltable-0.3.2.dist-info/RECORD +0 -161
  145. {pixeltable-0.3.2.dist-info → pixeltable-0.3.3.dist-info}/LICENSE +0 -0
  146. {pixeltable-0.3.2.dist-info → pixeltable-0.3.3.dist-info}/WHEEL +0 -0
  147. {pixeltable-0.3.2.dist-info → pixeltable-0.3.3.dist-info}/entry_points.txt +0 -0
pixeltable/plan.py CHANGED
@@ -2,18 +2,15 @@ from __future__ import annotations
2
2
 
3
3
  import dataclasses
4
4
  import enum
5
- from typing import Any, Iterable, Optional, Sequence, Literal
5
+ from typing import Any, Iterable, Literal, Optional, Sequence
6
6
  from uuid import UUID
7
7
 
8
-
9
8
  import sqlalchemy as sql
10
9
 
11
10
  import pixeltable as pxt
12
11
  import pixeltable.exec as exec
13
- from pixeltable import catalog
14
- from pixeltable import exceptions as excs
15
- from pixeltable import exprs
16
- from pixeltable.exec.sql_node import OrderByItem, OrderByClause, combine_order_by_clauses, print_order_by_clause
12
+ from pixeltable import catalog, exceptions as excs, exprs
13
+ from pixeltable.exec.sql_node import OrderByClause, OrderByItem, combine_order_by_clauses, print_order_by_clause
17
14
 
18
15
 
19
16
  def _is_agg_fn_call(e: exprs.Expr) -> bool:
@@ -21,7 +18,7 @@ def _is_agg_fn_call(e: exprs.Expr) -> bool:
21
18
 
22
19
 
23
20
  def _get_combined_ordering(
24
- o1: list[tuple[exprs.Expr, bool]], o2: list[tuple[exprs.Expr, bool]]
21
+ o1: list[tuple[exprs.Expr, bool]], o2: list[tuple[exprs.Expr, bool]]
25
22
  ) -> list[tuple[exprs.Expr, bool]]:
26
23
  """Returns an ordering that's compatible with both o1 and o2, or an empty list if no such ordering exists"""
27
24
  result: list[tuple[exprs.Expr, bool]] = []
@@ -65,13 +62,15 @@ class JoinType(enum.Enum):
65
62
  @dataclasses.dataclass
66
63
  class JoinClause:
67
64
  """Corresponds to a single 'JOIN ... ON (...)' clause in a SELECT statement; excludes the joined table."""
65
+
68
66
  join_type: JoinType
69
67
  join_predicate: Optional[exprs.Expr] # None for join_type == CROSS
70
68
 
71
69
 
72
70
  @dataclasses.dataclass
73
71
  class FromClause:
74
- """Corresponds to the From-clause ('FROM <tbl> JOIN ... ON (...) JOIN ...') of a SELECT statement """
72
+ """Corresponds to the From-clause ('FROM <tbl> JOIN ... ON (...) JOIN ...') of a SELECT statement"""
73
+
75
74
  tbls: list[catalog.TableVersionPath]
76
75
  join_clauses: list[JoinClause] = dataclasses.field(default_factory=list)
77
76
 
@@ -101,9 +100,13 @@ class Analyzer:
101
100
  agg_order_by: list[exprs.Expr]
102
101
 
103
102
  def __init__(
104
- self, from_clause: FromClause, select_list: Sequence[exprs.Expr],
105
- where_clause: Optional[exprs.Expr] = None, group_by_clause: Optional[list[exprs.Expr]] = None,
106
- order_by_clause: Optional[list[tuple[exprs.Expr, bool]]] = None):
103
+ self,
104
+ from_clause: FromClause,
105
+ select_list: Sequence[exprs.Expr],
106
+ where_clause: Optional[exprs.Expr] = None,
107
+ group_by_clause: Optional[list[exprs.Expr]] = None,
108
+ order_by_clause: Optional[list[tuple[exprs.Expr, bool]]] = None,
109
+ ):
107
110
  if order_by_clause is None:
108
111
  order_by_clause = []
109
112
  self.from_clause = from_clause
@@ -146,12 +149,17 @@ class Analyzer:
146
149
  candidates = self.select_list
147
150
  agg_fn_calls = exprs.ExprSet(
148
151
  exprs.Expr.list_subexprs(
149
- candidates, expr_class=exprs.FunctionCall,
150
- filter=lambda e: bool(e.is_agg_fn_call and not e.is_window_fn_call)))
152
+ candidates,
153
+ expr_class=exprs.FunctionCall,
154
+ filter=lambda e: bool(e.is_agg_fn_call and not e.is_window_fn_call),
155
+ )
156
+ )
151
157
  self.agg_fn_calls = list(agg_fn_calls)
152
158
  window_fn_calls = exprs.ExprSet(
153
159
  exprs.Expr.list_subexprs(
154
- candidates, expr_class=exprs.FunctionCall, filter=lambda e: bool(e.is_window_fn_call)))
160
+ candidates, expr_class=exprs.FunctionCall, filter=lambda e: bool(e.is_window_fn_call)
161
+ )
162
+ )
155
163
  self.window_fn_calls = list(window_fn_calls)
156
164
  if len(self.agg_fn_calls) == 0:
157
165
  # nothing to do
@@ -165,7 +173,8 @@ class Analyzer:
165
173
  is_agg_output = [self._determine_agg_status(e, grouping_expr_ids)[0] for e in self.select_list]
166
174
  if is_agg_output.count(False) > 0:
167
175
  raise excs.Error(
168
- f'Invalid non-aggregate expression in aggregate query: {self.select_list[is_agg_output.index(False)]}')
176
+ f'Invalid non-aggregate expression in aggregate query: {self.select_list[is_agg_output.index(False)]}'
177
+ )
169
178
 
170
179
  # check that Where clause and filter doesn't contain aggregates
171
180
  if self.sql_where_clause is not None:
@@ -205,7 +214,8 @@ class Analyzer:
205
214
  # an expression such as <grouping expr 1> + <grouping expr 2> can both be the output and input of agg
206
215
  assert len(e.components) > 0
207
216
  component_is_output, component_is_input = zip(
208
- *[self._determine_agg_status(c, grouping_expr_ids) for c in e.components])
217
+ *[self._determine_agg_status(c, grouping_expr_ids) for c in e.components]
218
+ )
209
219
  is_output = component_is_output.count(True) == len(e.components)
210
220
  is_input = component_is_input.count(True) == len(e.components)
211
221
  if not is_output and not is_input:
@@ -234,7 +244,8 @@ class Analyzer:
234
244
  # window functions require ordering by the group_by/order_by clauses
235
245
  group_by_exprs, order_by_exprs = fn_call.get_window_sort_exprs()
236
246
  clause.append(
237
- [OrderByItem(e, None) for e in group_by_exprs] + [OrderByItem(e, True) for e in order_by_exprs])
247
+ [OrderByItem(e, None) for e in group_by_exprs] + [OrderByItem(e, True) for e in order_by_exprs]
248
+ )
238
249
  return combine_order_by_clauses(clause)
239
250
 
240
251
  def has_agg(self) -> bool:
@@ -245,9 +256,7 @@ class Analyzer:
245
256
  class Planner:
246
257
  # TODO: create an exec.CountNode and change this to create_count_plan()
247
258
  @classmethod
248
- def create_count_stmt(
249
- cls, tbl: catalog.TableVersionPath, where_clause: Optional[exprs.Expr] = None
250
- ) -> sql.Select:
259
+ def create_count_stmt(cls, tbl: catalog.TableVersionPath, where_clause: Optional[exprs.Expr] = None) -> sql.Select:
251
260
  stmt = sql.select(sql.func.count())
252
261
  refd_tbl_ids: set[UUID] = set()
253
262
  if where_clause is not None:
@@ -288,23 +297,26 @@ class Planner:
288
297
  if len(computed_exprs) > 0:
289
298
  # add an ExprEvalNode when there are exprs to compute
290
299
  plan = exec.ExprEvalNode(
291
- row_builder, computed_exprs, plan.output_exprs, input=plan, maintain_input_order=False)
300
+ row_builder, computed_exprs, plan.output_exprs, input=plan, maintain_input_order=False
301
+ )
292
302
 
293
303
  stored_col_info = row_builder.output_slot_idxs()
294
304
  stored_img_col_info = [info for info in stored_col_info if info.col.col_type.is_image_type()]
295
305
  plan.set_stored_img_cols(stored_img_col_info)
296
306
  plan.set_ctx(
297
307
  exec.ExecContext(
298
- row_builder, batch_size=0, show_pbar=True, num_computed_exprs=len(computed_exprs),
299
- ignore_errors=ignore_errors))
308
+ row_builder,
309
+ batch_size=0,
310
+ show_pbar=True,
311
+ num_computed_exprs=len(computed_exprs),
312
+ ignore_errors=ignore_errors,
313
+ )
314
+ )
300
315
  return plan
301
316
 
302
317
  @classmethod
303
318
  def create_df_insert_plan(
304
- cls,
305
- tbl: catalog.TableVersion,
306
- df: 'pxt.DataFrame',
307
- ignore_errors: bool
319
+ cls, tbl: catalog.TableVersion, df: 'pxt.DataFrame', ignore_errors: bool
308
320
  ) -> exec.ExecNode:
309
321
  assert not tbl.is_view()
310
322
  plan = df._create_query_plan() # ExecNode constructed by the DataFrame
@@ -321,18 +333,21 @@ class Planner:
321
333
 
322
334
  plan.set_ctx(
323
335
  exec.ExecContext(
324
- plan.row_builder, batch_size=0, show_pbar=True, num_computed_exprs=0,
325
- ignore_errors=ignore_errors))
336
+ plan.row_builder, batch_size=0, show_pbar=True, num_computed_exprs=0, ignore_errors=ignore_errors
337
+ )
338
+ )
326
339
  plan.ctx.num_rows = 0 # Unknown
327
340
 
328
341
  return plan
329
342
 
330
343
  @classmethod
331
344
  def create_update_plan(
332
- cls, tbl: catalog.TableVersionPath,
333
- update_targets: dict[catalog.Column, exprs.Expr],
334
- recompute_targets: list[catalog.Column],
335
- where_clause: Optional[exprs.Expr], cascade: bool
345
+ cls,
346
+ tbl: catalog.TableVersionPath,
347
+ update_targets: dict[catalog.Column, exprs.Expr],
348
+ recompute_targets: list[catalog.Column],
349
+ where_clause: Optional[exprs.Expr],
350
+ cascade: bool,
336
351
  ) -> tuple[exec.ExecNode, list[str], list[catalog.Column]]:
337
352
  """Creates a plan to materialize updated rows.
338
353
  The plan:
@@ -361,14 +376,16 @@ class Planner:
361
376
  recomputed_cols = {c for c in recomputed_cols if c.is_stored}
362
377
  recomputed_base_cols = {col for col in recomputed_cols if col.tbl == target}
363
378
  copied_cols = [
364
- col for col in target.cols_by_id.values()
379
+ col
380
+ for col in target.cols_by_id.values()
365
381
  if col.is_stored and not col in updated_cols and not col in recomputed_base_cols
366
382
  ]
367
383
  select_list: list[exprs.Expr] = [exprs.ColumnRef(col) for col in copied_cols]
368
384
  select_list.extend(update_targets.values())
369
385
 
370
- recomputed_exprs = \
371
- [c.value_expr.copy().resolve_computed_cols(resolve_cols=recomputed_base_cols) for c in recomputed_base_cols]
386
+ recomputed_exprs = [
387
+ c.value_expr.copy().resolve_computed_cols(resolve_cols=recomputed_base_cols) for c in recomputed_base_cols
388
+ ]
372
389
  # recomputed cols reference the new values of the updated cols
373
390
  spec: dict[exprs.Expr, exprs.Expr] = {exprs.ColumnRef(col): e for col, e in update_targets.items()}
374
391
  exprs.Expr.list_substitute(recomputed_exprs, spec)
@@ -385,9 +402,11 @@ class Planner:
385
402
 
386
403
  @classmethod
387
404
  def create_batch_update_plan(
388
- cls, tbl: catalog.TableVersionPath,
389
- batch: list[dict[catalog.Column, exprs.Expr]], rowids: list[tuple[int, ...]],
390
- cascade: bool
405
+ cls,
406
+ tbl: catalog.TableVersionPath,
407
+ batch: list[dict[catalog.Column, exprs.Expr]],
408
+ rowids: list[tuple[int, ...]],
409
+ cascade: bool,
391
410
  ) -> tuple[exec.ExecNode, exec.RowUpdateNode, sql.ColumnElement[bool], list[catalog.Column], list[catalog.Column]]:
392
411
  """
393
412
  Returns:
@@ -419,15 +438,17 @@ class Planner:
419
438
  recomputed_cols = {c for c in recomputed_cols if c.is_stored}
420
439
  recomputed_base_cols = {col for col in recomputed_cols if col.tbl == target}
421
440
  copied_cols = [
422
- col for col in target.cols_by_id.values()
441
+ col
442
+ for col in target.cols_by_id.values()
423
443
  if col.is_stored and not col in updated_cols and not col in recomputed_base_cols
424
444
  ]
425
445
  select_list: list[exprs.Expr] = [exprs.ColumnRef(col) for col in copied_cols]
426
446
  select_list.extend(exprs.ColumnRef(col) for col in updated_cols)
427
447
 
428
- recomputed_exprs = \
429
- [c.value_expr.copy().resolve_computed_cols(resolve_cols=recomputed_base_cols) for c in recomputed_base_cols]
430
- # the RowUpdateNode updates columns in-place, ie, in the original ColumnRef; no further sustitution is needed
448
+ recomputed_exprs = [
449
+ c.value_expr.copy().resolve_computed_cols(resolve_cols=recomputed_base_cols) for c in recomputed_base_cols
450
+ ]
451
+ # the RowUpdateNode updates columns in-place, ie, in the original ColumnRef; no further substitution is needed
431
452
  select_list.extend(recomputed_exprs)
432
453
 
433
454
  # ExecNode tree (from bottom to top):
@@ -435,8 +456,9 @@ class Planner:
435
456
  # - RowUpdateNode to update the retrieved rows
436
457
  # - ExprEvalNode to evaluate the remaining output exprs
437
458
  analyzer = Analyzer(FromClause(tbls=[tbl]), select_list)
438
- sql_exprs = list(exprs.Expr.list_subexprs(
439
- analyzer.all_exprs, filter=analyzer.sql_elements.contains, traverse_matches=False))
459
+ sql_exprs = list(
460
+ exprs.Expr.list_subexprs(analyzer.all_exprs, filter=analyzer.sql_elements.contains, traverse_matches=False)
461
+ )
440
462
  row_builder = exprs.RowBuilder(analyzer.all_exprs, [], sql_exprs)
441
463
  analyzer.finalize(row_builder)
442
464
  sql_lookup_node = exec.SqlLookupNode(tbl, row_builder, sql_exprs, sa_key_cols, key_vals)
@@ -458,13 +480,16 @@ class Planner:
458
480
  plan.set_ctx(ctx)
459
481
  recomputed_user_cols = [c for c in recomputed_cols if c.name is not None]
460
482
  return (
461
- plan, row_update_node, sql_lookup_node.where_clause_element, list(updated_cols) + recomputed_user_cols,
462
- recomputed_user_cols
483
+ plan,
484
+ row_update_node,
485
+ sql_lookup_node.where_clause_element,
486
+ list(updated_cols) + recomputed_user_cols,
487
+ recomputed_user_cols,
463
488
  )
464
489
 
465
490
  @classmethod
466
491
  def create_view_update_plan(
467
- cls, view: catalog.TableVersionPath, recompute_targets: list[catalog.Column]
492
+ cls, view: catalog.TableVersionPath, recompute_targets: list[catalog.Column]
468
493
  ) -> exec.ExecNode:
469
494
  """Creates a plan to materialize updated rows for a view, given that the base table has been updated.
470
495
  The plan:
@@ -489,19 +514,25 @@ class Planner:
489
514
  copied_cols = [col for col in target.cols_by_id.values() if col.is_stored and not col in recomputed_cols]
490
515
  select_list: list[exprs.Expr] = [exprs.ColumnRef(col) for col in copied_cols]
491
516
  # resolve recomputed exprs to stored columns in the base
492
- recomputed_exprs = \
493
- [c.value_expr.copy().resolve_computed_cols(resolve_cols=recomputed_cols) for c in recomputed_cols]
517
+ recomputed_exprs = [
518
+ c.value_expr.copy().resolve_computed_cols(resolve_cols=recomputed_cols) for c in recomputed_cols
519
+ ]
494
520
  select_list.extend(recomputed_exprs)
495
521
 
496
522
  # we need to retrieve the PK columns of the existing rows
497
523
  plan = cls.create_query_plan(
498
- FromClause(tbls=[view]), select_list, where_clause=target.predicate, ignore_errors=True,
499
- exact_version_only=view.get_bases())
524
+ FromClause(tbls=[view]),
525
+ select_list,
526
+ where_clause=target.predicate,
527
+ ignore_errors=True,
528
+ exact_version_only=view.get_bases(),
529
+ )
500
530
  for i, col in enumerate(copied_cols + list(recomputed_cols)): # same order as select_list
501
531
  plan.row_builder.add_table_column(col, select_list[i].slot_idx)
502
532
  # TODO: avoid duplication with view_load_plan() logic (where does this belong?)
503
- stored_img_col_info = \
504
- [info for info in plan.row_builder.output_slot_idxs() if info.col.col_type.is_image_type()]
533
+ stored_img_col_info = [
534
+ info for info in plan.row_builder.output_slot_idxs() if info.col.col_type.is_image_type()
535
+ ]
505
536
  plan.set_stored_img_cols(stored_img_col_info)
506
537
  return plan
507
538
 
@@ -539,21 +570,27 @@ class Planner:
539
570
  # 3. materialize stored view columns that haven't been produced by step 1
540
571
  base_output_exprs = [e for e in row_builder.default_eval_ctx.exprs if e.is_bound_by([view.base])]
541
572
  view_output_exprs = [
542
- e for e in row_builder.default_eval_ctx.target_exprs
573
+ e
574
+ for e in row_builder.default_eval_ctx.target_exprs
543
575
  if e.is_bound_by([view]) and not e.is_bound_by([view.base])
544
576
  ]
545
577
  # if we're propagating an insert, we only want to see those base rows that were created for the current version
546
578
  base_analyzer = Analyzer(FromClause(tbls=[view.base]), base_output_exprs, where_clause=target.predicate)
547
579
  base_eval_ctx = row_builder.create_eval_ctx(base_analyzer.all_exprs)
548
580
  plan = cls._create_query_plan(
549
- row_builder=row_builder, analyzer=base_analyzer, eval_ctx=base_eval_ctx, with_pk=True,
550
- exact_version_only=view.get_bases() if propagates_insert else [])
581
+ row_builder=row_builder,
582
+ analyzer=base_analyzer,
583
+ eval_ctx=base_eval_ctx,
584
+ with_pk=True,
585
+ exact_version_only=view.get_bases() if propagates_insert else [],
586
+ )
551
587
  exec_ctx = plan.ctx
552
588
  if target.is_component_view():
553
589
  plan = exec.ComponentIterationNode(target, plan)
554
590
  if len(view_output_exprs) > 0:
555
591
  plan = exec.ExprEvalNode(
556
- row_builder, output_exprs=view_output_exprs, input_exprs=base_output_exprs, input=plan)
592
+ row_builder, output_exprs=view_output_exprs, input_exprs=base_output_exprs, input=plan
593
+ )
557
594
 
558
595
  stored_img_col_info = [info for info in row_builder.output_slot_idxs() if info.col.col_type.is_image_type()]
559
596
  plan.set_stored_img_cols(stored_img_col_info)
@@ -582,10 +619,9 @@ class Planner:
582
619
  ob_clauses.append(ordering)
583
620
  for fn_call in analyzer.agg_fn_calls:
584
621
  # agg functions with an ordering requirement are implicitly ascending
585
- ordering = (
586
- [OrderByItem(e, None) for e in analyzer.group_by_clause]
587
- + [OrderByItem(e, True) for e in fn_call.get_agg_order_by()]
588
- )
622
+ ordering = [OrderByItem(e, None) for e in analyzer.group_by_clause] + [
623
+ OrderByItem(e, True) for e in fn_call.get_agg_order_by()
624
+ ]
589
625
  ob_clauses.append(ordering)
590
626
  if len(ob_clauses) <= 1:
591
627
  return
@@ -596,7 +632,8 @@ class Planner:
596
632
  if combined is None:
597
633
  raise excs.Error(
598
634
  f'Incompatible ordering requirements: '
599
- f'{print_order_by_clause(combined_ordering)} vs {print_order_by_clause(ordering)}')
635
+ f'{print_order_by_clause(combined_ordering)} vs {print_order_by_clause(ordering)}'
636
+ )
600
637
  combined_ordering = combined
601
638
 
602
639
  @classmethod
@@ -623,10 +660,15 @@ class Planner:
623
660
 
624
661
  @classmethod
625
662
  def create_query_plan(
626
- cls, from_clause: FromClause, select_list: Optional[list[exprs.Expr]] = None,
627
- where_clause: Optional[exprs.Expr] = None, group_by_clause: Optional[list[exprs.Expr]] = None,
628
- order_by_clause: Optional[list[tuple[exprs.Expr, bool]]] = None, limit: Optional[int] = None,
629
- ignore_errors: bool = False, exact_version_only: Optional[list[catalog.TableVersion]] = None
663
+ cls,
664
+ from_clause: FromClause,
665
+ select_list: Optional[list[exprs.Expr]] = None,
666
+ where_clause: Optional[exprs.Expr] = None,
667
+ group_by_clause: Optional[list[exprs.Expr]] = None,
668
+ order_by_clause: Optional[list[tuple[exprs.Expr, bool]]] = None,
669
+ limit: Optional[exprs.Expr] = None,
670
+ ignore_errors: bool = False,
671
+ exact_version_only: Optional[list[catalog.TableVersion]] = None,
630
672
  ) -> exec.ExecNode:
631
673
  """Return plan for executing a query.
632
674
  Updates 'select_list' in place to make it executable.
@@ -639,8 +681,12 @@ class Planner:
639
681
  if exact_version_only is None:
640
682
  exact_version_only = []
641
683
  analyzer = Analyzer(
642
- from_clause, select_list, where_clause=where_clause, group_by_clause=group_by_clause,
643
- order_by_clause=order_by_clause)
684
+ from_clause,
685
+ select_list,
686
+ where_clause=where_clause,
687
+ group_by_clause=group_by_clause,
688
+ order_by_clause=order_by_clause,
689
+ )
644
690
  row_builder = exprs.RowBuilder(analyzer.all_exprs, [], [])
645
691
 
646
692
  analyzer.finalize(row_builder)
@@ -648,8 +694,13 @@ class Planner:
648
694
  # with_pk: for now, we always retrieve the PK, because we need it for the file cache
649
695
  eval_ctx = row_builder.create_eval_ctx(analyzer.select_list)
650
696
  plan = cls._create_query_plan(
651
- row_builder=row_builder, analyzer=analyzer, eval_ctx=eval_ctx, limit=limit, with_pk=True,
652
- exact_version_only=exact_version_only)
697
+ row_builder=row_builder,
698
+ analyzer=analyzer,
699
+ eval_ctx=eval_ctx,
700
+ limit=limit,
701
+ with_pk=True,
702
+ exact_version_only=exact_version_only,
703
+ )
653
704
  plan.ctx.ignore_errors = ignore_errors
654
705
  select_list.clear()
655
706
  select_list.extend(analyzer.select_list)
@@ -657,9 +708,13 @@ class Planner:
657
708
 
658
709
  @classmethod
659
710
  def _create_query_plan(
660
- cls, row_builder: exprs.RowBuilder, analyzer: Analyzer, eval_ctx: exprs.RowBuilder.EvalCtx,
661
- limit: Optional[int] = None, with_pk: bool = False,
662
- exact_version_only: Optional[list[catalog.TableVersion]] = None
711
+ cls,
712
+ row_builder: exprs.RowBuilder,
713
+ analyzer: Analyzer,
714
+ eval_ctx: exprs.RowBuilder.EvalCtx,
715
+ limit: Optional[exprs.Expr] = None,
716
+ with_pk: bool = False,
717
+ exact_version_only: Optional[list[catalog.TableVersion]] = None,
663
718
  ) -> exec.ExecNode:
664
719
  """
665
720
  Create plan to materialize eval_ctx.
@@ -672,9 +727,8 @@ class Planner:
672
727
  if exact_version_only is None:
673
728
  exact_version_only = []
674
729
  sql_elements = analyzer.sql_elements
675
- is_python_agg = (
676
- not sql_elements.contains_all(analyzer.agg_fn_calls)
677
- or not sql_elements.contains_all(analyzer.window_fn_calls)
730
+ is_python_agg = not sql_elements.contains_all(analyzer.agg_fn_calls) or not sql_elements.contains_all(
731
+ analyzer.window_fn_calls
678
732
  )
679
733
  ctx = exec.ExecContext(row_builder)
680
734
  cls._verify_ordering(analyzer, verify_agg=is_python_agg)
@@ -686,19 +740,26 @@ class Planner:
686
740
  # - subexprs of Where clause conjuncts that can't be run in SQL
687
741
  # - all grouping exprs, if any aggregate function call can't be run in SQL (in that case, they all have to be
688
742
  # run in Python)
689
- candidates = list(exprs.Expr.list_subexprs(
690
- analyzer.select_list,
691
- filter=lambda e: (
743
+ candidates = list(
744
+ exprs.Expr.list_subexprs(
745
+ analyzer.select_list,
746
+ filter=lambda e: (
692
747
  sql_elements.contains(e)
693
748
  and not e._contains(cls=exprs.FunctionCall, filter=lambda e: bool(e.is_agg_fn_call))
694
- ),
695
- traverse_matches=False))
749
+ ),
750
+ traverse_matches=False,
751
+ )
752
+ )
696
753
  if analyzer.filter is not None:
697
- candidates.extend(exprs.Expr.subexprs(
698
- analyzer.filter, filter=lambda e: sql_elements.contains(e), traverse_matches=False))
754
+ candidates.extend(
755
+ exprs.Expr.subexprs(analyzer.filter, filter=lambda e: sql_elements.contains(e), traverse_matches=False)
756
+ )
699
757
  if is_python_agg and analyzer.group_by_clause is not None:
700
- candidates.extend(exprs.Expr.list_subexprs(
701
- analyzer.group_by_clause, filter=lambda e: sql_elements.contains(e), traverse_matches=False))
758
+ candidates.extend(
759
+ exprs.Expr.list_subexprs(
760
+ analyzer.group_by_clause, filter=lambda e: sql_elements.contains(e), traverse_matches=False
761
+ )
762
+ )
702
763
  # not isinstance(...): we don't want to materialize Literals via a Select
703
764
  sql_exprs = exprs.ExprSet(e for e in candidates if not isinstance(e, exprs.Literal))
704
765
 
@@ -706,7 +767,8 @@ class Planner:
706
767
  join_exprs = exprs.ExprSet(
707
768
  join_clause.join_predicate
708
769
  for join_clause in analyzer.from_clause.join_clauses
709
- if join_clause.join_predicate is not None)
770
+ if join_clause.join_predicate is not None
771
+ )
710
772
  scan_target_exprs = sql_exprs | join_exprs
711
773
  tbl_scan_plans: list[exec.SqlScanNode] = []
712
774
  plan: exec.ExecNode
@@ -716,16 +778,21 @@ class Planner:
716
778
  exprs.Expr.list_subexprs(
717
779
  scan_target_exprs,
718
780
  filter=lambda e: e.is_bound_by([tbl]) and not isinstance(e, exprs.Literal),
719
- traverse_matches=False))
781
+ traverse_matches=False,
782
+ )
783
+ )
720
784
  plan = exec.SqlScanNode(
721
- tbl, row_builder, select_list=tbl_scan_exprs,
722
- set_pk=with_pk, exact_version_only=exact_version_only)
785
+ tbl, row_builder, select_list=tbl_scan_exprs, set_pk=with_pk, exact_version_only=exact_version_only
786
+ )
723
787
  tbl_scan_plans.append(plan)
724
788
 
725
789
  if len(analyzer.from_clause.join_clauses) > 0:
726
790
  plan = exec.SqlJoinNode(
727
- row_builder, inputs=tbl_scan_plans, join_clauses=analyzer.from_clause.join_clauses,
728
- select_list=sql_exprs)
791
+ row_builder,
792
+ inputs=tbl_scan_plans,
793
+ join_clauses=analyzer.from_clause.join_clauses,
794
+ select_list=sql_exprs,
795
+ )
729
796
  else:
730
797
  plan = tbl_scan_plans[0]
731
798
 
@@ -762,11 +829,17 @@ class Planner:
762
829
  and plan.to_cte() is not None
763
830
  ):
764
831
  plan = exec.SqlAggregationNode(
765
- row_builder, input=plan, select_list=analyzer.select_list, group_by_items=analyzer.group_by_clause)
832
+ row_builder, input=plan, select_list=analyzer.select_list, group_by_items=analyzer.group_by_clause
833
+ )
766
834
  else:
767
835
  plan = exec.AggregationNode(
768
- tbl.tbl_version, row_builder, analyzer.group_by_clause,
769
- analyzer.agg_fn_calls + analyzer.window_fn_calls, agg_input, input=plan)
836
+ tbl.tbl_version,
837
+ row_builder,
838
+ analyzer.group_by_clause,
839
+ analyzer.agg_fn_calls + analyzer.window_fn_calls,
840
+ agg_input,
841
+ input=plan,
842
+ )
770
843
  typecheck_dummy = analyzer.grouping_exprs + analyzer.agg_fn_calls + analyzer.window_fn_calls
771
844
  agg_output = exprs.ExprSet(typecheck_dummy)
772
845
  if not agg_output.issuperset(exprs.ExprSet(eval_ctx.target_exprs)):
@@ -794,7 +867,8 @@ class Planner:
794
867
  expr_eval_node.set_input_order(False)
795
868
 
796
869
  if limit is not None:
797
- plan.set_limit(limit)
870
+ assert isinstance(limit, exprs.Literal)
871
+ plan.set_limit(limit.val)
798
872
 
799
873
  plan.set_ctx(ctx)
800
874
  return plan
@@ -805,7 +879,7 @@ class Planner:
805
879
 
806
880
  @classmethod
807
881
  def create_add_column_plan(
808
- cls, tbl: catalog.TableVersionPath, col: catalog.Column
882
+ cls, tbl: catalog.TableVersionPath, col: catalog.Column
809
883
  ) -> tuple[exec.ExecNode, Optional[int]]:
810
884
  """Creates a plan for InsertableTable.add_column()
811
885
  Returns:
@@ -816,7 +890,8 @@ class Planner:
816
890
  row_builder = exprs.RowBuilder(output_exprs=[], columns=[col], input_exprs=[])
817
891
  analyzer = Analyzer(FromClause(tbls=[tbl]), row_builder.default_eval_ctx.target_exprs)
818
892
  plan = cls._create_query_plan(
819
- row_builder=row_builder, analyzer=analyzer, eval_ctx=row_builder.default_eval_ctx, with_pk=True)
893
+ row_builder=row_builder, analyzer=analyzer, eval_ctx=row_builder.default_eval_ctx, with_pk=True
894
+ )
820
895
  plan.ctx.batch_size = 16
821
896
  plan.ctx.show_pbar = True
822
897
  plan.ctx.ignore_errors = True