pixeltable 0.4.15__py3-none-any.whl → 0.4.17__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of pixeltable might be problematic. Click here for more details.
- pixeltable/__init__.py +4 -0
- pixeltable/catalog/catalog.py +125 -63
- pixeltable/catalog/column.py +7 -2
- pixeltable/catalog/table.py +1 -0
- pixeltable/catalog/table_metadata.py +4 -0
- pixeltable/catalog/table_version.py +174 -117
- pixeltable/catalog/table_version_handle.py +4 -1
- pixeltable/catalog/table_version_path.py +0 -11
- pixeltable/catalog/view.py +6 -0
- pixeltable/config.py +7 -0
- pixeltable/dataframe.py +10 -5
- pixeltable/env.py +56 -19
- pixeltable/exec/__init__.py +2 -0
- pixeltable/exec/cell_materialization_node.py +231 -0
- pixeltable/exec/cell_reconstruction_node.py +135 -0
- pixeltable/exec/exec_node.py +1 -1
- pixeltable/exec/expr_eval/evaluators.py +1 -0
- pixeltable/exec/expr_eval/expr_eval_node.py +3 -0
- pixeltable/exec/expr_eval/globals.py +2 -0
- pixeltable/exec/globals.py +32 -0
- pixeltable/exec/object_store_save_node.py +1 -4
- pixeltable/exec/row_update_node.py +16 -9
- pixeltable/exec/sql_node.py +107 -14
- pixeltable/exprs/__init__.py +1 -1
- pixeltable/exprs/arithmetic_expr.py +23 -18
- pixeltable/exprs/column_property_ref.py +10 -10
- pixeltable/exprs/column_ref.py +2 -2
- pixeltable/exprs/data_row.py +106 -37
- pixeltable/exprs/expr.py +9 -0
- pixeltable/exprs/expr_set.py +14 -7
- pixeltable/exprs/inline_expr.py +2 -19
- pixeltable/exprs/json_path.py +45 -12
- pixeltable/exprs/row_builder.py +54 -22
- pixeltable/functions/__init__.py +1 -0
- pixeltable/functions/bedrock.py +7 -0
- pixeltable/functions/deepseek.py +11 -4
- pixeltable/functions/llama_cpp.py +7 -0
- pixeltable/functions/math.py +1 -1
- pixeltable/functions/ollama.py +7 -0
- pixeltable/functions/openai.py +4 -4
- pixeltable/functions/openrouter.py +143 -0
- pixeltable/functions/video.py +110 -28
- pixeltable/globals.py +10 -4
- pixeltable/io/globals.py +18 -17
- pixeltable/io/parquet.py +1 -1
- pixeltable/io/table_data_conduit.py +47 -22
- pixeltable/iterators/document.py +61 -23
- pixeltable/iterators/video.py +126 -53
- pixeltable/metadata/__init__.py +1 -1
- pixeltable/metadata/converters/convert_40.py +73 -0
- pixeltable/metadata/notes.py +1 -0
- pixeltable/plan.py +175 -46
- pixeltable/share/packager.py +155 -26
- pixeltable/store.py +2 -3
- pixeltable/type_system.py +5 -3
- pixeltable/utils/arrow.py +6 -6
- pixeltable/utils/av.py +65 -0
- pixeltable/utils/console_output.py +4 -1
- pixeltable/utils/exception_handler.py +5 -28
- pixeltable/utils/image.py +7 -0
- pixeltable/utils/misc.py +5 -0
- pixeltable/utils/object_stores.py +16 -1
- pixeltable/utils/s3_store.py +44 -11
- {pixeltable-0.4.15.dist-info → pixeltable-0.4.17.dist-info}/METADATA +29 -28
- {pixeltable-0.4.15.dist-info → pixeltable-0.4.17.dist-info}/RECORD +68 -61
- {pixeltable-0.4.15.dist-info → pixeltable-0.4.17.dist-info}/WHEEL +0 -0
- {pixeltable-0.4.15.dist-info → pixeltable-0.4.17.dist-info}/entry_points.txt +0 -0
- {pixeltable-0.4.15.dist-info → pixeltable-0.4.17.dist-info}/licenses/LICENSE +0 -0
pixeltable/plan.py
CHANGED
|
@@ -3,9 +3,10 @@ from __future__ import annotations
|
|
|
3
3
|
import dataclasses
|
|
4
4
|
import enum
|
|
5
5
|
from textwrap import dedent
|
|
6
|
-
from typing import Any, Iterable, Literal, Optional, Sequence
|
|
6
|
+
from typing import Any, Iterable, Literal, Optional, Sequence, cast
|
|
7
7
|
from uuid import UUID
|
|
8
8
|
|
|
9
|
+
import pgvector.sqlalchemy # type: ignore[import-untyped]
|
|
9
10
|
import sqlalchemy as sql
|
|
10
11
|
|
|
11
12
|
import pixeltable as pxt
|
|
@@ -385,7 +386,7 @@ class Planner:
|
|
|
385
386
|
TableVersionHandle(tbl.id, tbl.effective_version), rows, row_builder, tbl.next_row_id
|
|
386
387
|
)
|
|
387
388
|
|
|
388
|
-
plan = cls.
|
|
389
|
+
plan = cls._add_prefetch_node(tbl.id, row_builder.input_exprs, input_node=plan)
|
|
389
390
|
|
|
390
391
|
computed_exprs = row_builder.output_exprs - row_builder.input_exprs
|
|
391
392
|
if len(computed_exprs) > 0:
|
|
@@ -393,6 +394,8 @@ class Planner:
|
|
|
393
394
|
plan = exec.ExprEvalNode(
|
|
394
395
|
row_builder, computed_exprs, plan.output_exprs, input=plan, maintain_input_order=False
|
|
395
396
|
)
|
|
397
|
+
if any(c.col_type.is_json_type() or c.col_type.is_array_type() for c in stored_cols):
|
|
398
|
+
plan = exec.CellMaterializationNode(plan)
|
|
396
399
|
|
|
397
400
|
plan.set_ctx(
|
|
398
401
|
exec.ExecContext(
|
|
@@ -403,7 +406,7 @@ class Planner:
|
|
|
403
406
|
ignore_errors=ignore_errors,
|
|
404
407
|
)
|
|
405
408
|
)
|
|
406
|
-
plan = cls.
|
|
409
|
+
plan = cls._add_save_node(plan)
|
|
407
410
|
|
|
408
411
|
return plan
|
|
409
412
|
|
|
@@ -422,10 +425,17 @@ class Planner:
|
|
|
422
425
|
plan = df._create_query_plan() # ExecNode constructed by the DataFrame
|
|
423
426
|
|
|
424
427
|
# Modify the plan RowBuilder to register the output columns
|
|
428
|
+
needs_cell_materialization = False
|
|
425
429
|
for col_name, expr in zip(df.schema.keys(), df._select_list_exprs):
|
|
426
430
|
assert col_name in tbl.cols_by_name
|
|
427
431
|
col = tbl.cols_by_name[col_name]
|
|
428
432
|
plan.row_builder.add_table_column(col, expr.slot_idx)
|
|
433
|
+
needs_cell_materialization = (
|
|
434
|
+
needs_cell_materialization or col.col_type.is_json_type() or col.col_type.is_array_type()
|
|
435
|
+
)
|
|
436
|
+
|
|
437
|
+
if needs_cell_materialization:
|
|
438
|
+
plan = exec.CellMaterializationNode(plan)
|
|
429
439
|
|
|
430
440
|
plan.set_ctx(
|
|
431
441
|
exec.ExecContext(
|
|
@@ -446,12 +456,14 @@ class Planner:
|
|
|
446
456
|
cascade: bool,
|
|
447
457
|
) -> tuple[exec.ExecNode, list[str], list[catalog.Column]]:
|
|
448
458
|
"""Creates a plan to materialize updated rows.
|
|
459
|
+
|
|
449
460
|
The plan:
|
|
450
461
|
- retrieves rows that are visible at the current version of the table
|
|
451
462
|
- materializes all stored columns and the update targets
|
|
452
463
|
- if cascade is True, recomputes all computed columns that transitively depend on the updated columns
|
|
453
464
|
and copies the values of all other stored columns
|
|
454
465
|
- if cascade is False, copies all columns that aren't update targets from the original rows
|
|
466
|
+
|
|
455
467
|
Returns:
|
|
456
468
|
- root node of the plan
|
|
457
469
|
- list of qualified column names that are getting updated
|
|
@@ -477,14 +489,16 @@ class Planner:
|
|
|
477
489
|
|
|
478
490
|
cls.__check_valid_columns(tbl.tbl_version.get(), recomputed_cols, 'updated in')
|
|
479
491
|
|
|
492
|
+
# our query plan
|
|
493
|
+
# - evaluates the update targets and recomputed columns
|
|
494
|
+
# - copies all other stored columns
|
|
480
495
|
recomputed_base_cols = {col for col in recomputed_cols if col.tbl.id == tbl.tbl_version.id}
|
|
481
496
|
copied_cols = [
|
|
482
497
|
col
|
|
483
498
|
for col in target.cols_by_id.values()
|
|
484
499
|
if col.is_stored and col not in updated_cols and col not in recomputed_base_cols
|
|
485
500
|
]
|
|
486
|
-
select_list: list[exprs.Expr] =
|
|
487
|
-
select_list.extend(update_targets.values())
|
|
501
|
+
select_list: list[exprs.Expr] = list(update_targets.values())
|
|
488
502
|
|
|
489
503
|
recomputed_exprs = [
|
|
490
504
|
c.value_expr.copy().resolve_computed_cols(resolve_cols=recomputed_base_cols) for c in recomputed_base_cols
|
|
@@ -495,14 +509,22 @@ class Planner:
|
|
|
495
509
|
select_list.extend(recomputed_exprs)
|
|
496
510
|
|
|
497
511
|
# we need to retrieve the PK columns of the existing rows
|
|
498
|
-
plan = cls.create_query_plan(
|
|
499
|
-
|
|
512
|
+
plan = cls.create_query_plan(
|
|
513
|
+
FromClause(tbls=[tbl]),
|
|
514
|
+
select_list=select_list,
|
|
515
|
+
columns=copied_cols,
|
|
516
|
+
where_clause=where_clause,
|
|
517
|
+
ignore_errors=True,
|
|
518
|
+
)
|
|
519
|
+
evaluated_cols = updated_cols + list(recomputed_base_cols) # same order as select_list
|
|
500
520
|
# update row builder with column information
|
|
501
|
-
|
|
521
|
+
plan.row_builder.add_table_columns(copied_cols)
|
|
522
|
+
for i, col in enumerate(evaluated_cols):
|
|
502
523
|
plan.row_builder.add_table_column(col, select_list[i].slot_idx)
|
|
503
524
|
plan.ctx.num_computed_exprs = len(recomputed_exprs)
|
|
504
525
|
|
|
505
|
-
plan = cls.
|
|
526
|
+
plan = cls._add_cell_materialization_node(plan)
|
|
527
|
+
plan = cls._add_save_node(plan)
|
|
506
528
|
|
|
507
529
|
recomputed_user_cols = [c for c in recomputed_cols if c.name is not None]
|
|
508
530
|
return plan, [f'{c.tbl.name}.{c.name}' for c in updated_cols + recomputed_user_cols], recomputed_user_cols
|
|
@@ -525,6 +547,79 @@ class Planner:
|
|
|
525
547
|
.format(validation_error=col.value_expr.validation_error)
|
|
526
548
|
)
|
|
527
549
|
|
|
550
|
+
@classmethod
|
|
551
|
+
def _cell_md_col_refs(cls, expr_list: Iterable[exprs.Expr]) -> list[exprs.ColumnRef]:
|
|
552
|
+
"""Return list of ColumnRefs that need their cellmd values for reconstruction"""
|
|
553
|
+
json_col_refs = list(
|
|
554
|
+
exprs.Expr.list_subexprs(
|
|
555
|
+
expr_list,
|
|
556
|
+
expr_class=exprs.ColumnRef,
|
|
557
|
+
filter=lambda e: cast(exprs.ColumnRef, e).col.col_type.is_json_type(),
|
|
558
|
+
traverse_matches=False,
|
|
559
|
+
)
|
|
560
|
+
)
|
|
561
|
+
|
|
562
|
+
def needs_reconstruction(e: exprs.Expr) -> bool:
|
|
563
|
+
assert isinstance(e, exprs.ColumnRef)
|
|
564
|
+
# Vector-typed array columns are used for vector indexes, and are stored in the db
|
|
565
|
+
return e.col.col_type.is_array_type() and not isinstance(e.col.sa_col_type, pgvector.sqlalchemy.Vector)
|
|
566
|
+
|
|
567
|
+
array_col_refs = list(
|
|
568
|
+
exprs.Expr.list_subexprs(
|
|
569
|
+
expr_list, expr_class=exprs.ColumnRef, filter=needs_reconstruction, traverse_matches=False
|
|
570
|
+
)
|
|
571
|
+
)
|
|
572
|
+
|
|
573
|
+
return json_col_refs + array_col_refs
|
|
574
|
+
|
|
575
|
+
@classmethod
|
|
576
|
+
def _add_cell_materialization_node(cls, input: exec.ExecNode) -> exec.ExecNode:
|
|
577
|
+
# we need a CellMaterializationNode if any of the evaluated output columns are json or array-typed
|
|
578
|
+
has_target_cols = any(
|
|
579
|
+
col.col_type.is_json_type() or col.col_type.is_array_type()
|
|
580
|
+
for col, slot_idx in input.row_builder.table_columns.items()
|
|
581
|
+
if slot_idx is not None
|
|
582
|
+
)
|
|
583
|
+
if has_target_cols:
|
|
584
|
+
return exec.CellMaterializationNode(input)
|
|
585
|
+
else:
|
|
586
|
+
return input
|
|
587
|
+
|
|
588
|
+
@classmethod
|
|
589
|
+
def _add_cell_reconstruction_node(cls, expr_list: list[exprs.Expr], input: exec.ExecNode) -> exec.ExecNode:
|
|
590
|
+
"""
|
|
591
|
+
Add a CellReconstructionNode, if required by any of the exprs in expr_list.
|
|
592
|
+
|
|
593
|
+
Cell reconstruction is required for
|
|
594
|
+
1) all json-typed ColumnRefs that are not used as part of a JsonPath (the latter does its own reconstruction)
|
|
595
|
+
or as part of a ColumnPropertyRef
|
|
596
|
+
2) all array-typed ColumnRefs that are not used as part of a ColumnPropertyRef
|
|
597
|
+
"""
|
|
598
|
+
|
|
599
|
+
def json_filter(e: exprs.Expr) -> bool:
|
|
600
|
+
if isinstance(e, exprs.JsonPath):
|
|
601
|
+
return not e.is_relative_path() and isinstance(e.anchor, exprs.ColumnRef)
|
|
602
|
+
if isinstance(e, exprs.ColumnPropertyRef):
|
|
603
|
+
return e.col_ref.col.col_type.is_json_type()
|
|
604
|
+
return isinstance(e, exprs.ColumnRef) and e.col.col_type.is_json_type()
|
|
605
|
+
|
|
606
|
+
def array_filter(e: exprs.Expr) -> bool:
|
|
607
|
+
if isinstance(e, exprs.ColumnPropertyRef):
|
|
608
|
+
return e.col_ref.col.col_type.is_array_type()
|
|
609
|
+
if not isinstance(e, exprs.ColumnRef):
|
|
610
|
+
return False
|
|
611
|
+
# Vector-typed array columns are used for vector indexes, and are stored in the db
|
|
612
|
+
return e.col.col_type.is_array_type() and not isinstance(e.col.sa_col_type, pgvector.sqlalchemy.Vector)
|
|
613
|
+
|
|
614
|
+
json_candidates = list(exprs.Expr.list_subexprs(expr_list, filter=json_filter, traverse_matches=False))
|
|
615
|
+
json_refs = [e for e in json_candidates if isinstance(e, exprs.ColumnRef)]
|
|
616
|
+
array_candidates = list(exprs.Expr.list_subexprs(expr_list, filter=array_filter, traverse_matches=False))
|
|
617
|
+
array_refs = [e for e in array_candidates if isinstance(e, exprs.ColumnRef)]
|
|
618
|
+
if len(json_refs) > 0 or len(array_refs) > 0:
|
|
619
|
+
return exec.CellReconstructionNode(json_refs, array_refs, input.row_builder, input=input)
|
|
620
|
+
else:
|
|
621
|
+
return input
|
|
622
|
+
|
|
528
623
|
@classmethod
|
|
529
624
|
def create_batch_update_plan(
|
|
530
625
|
cls,
|
|
@@ -543,8 +638,8 @@ class Planner:
|
|
|
543
638
|
"""
|
|
544
639
|
assert isinstance(tbl, catalog.TableVersionPath)
|
|
545
640
|
target = tbl.tbl_version.get() # the one we need to update
|
|
546
|
-
sa_key_cols: list[sql.Column]
|
|
547
|
-
key_vals: list[tuple]
|
|
641
|
+
sa_key_cols: list[sql.Column]
|
|
642
|
+
key_vals: list[tuple]
|
|
548
643
|
if len(rowids) > 0:
|
|
549
644
|
sa_key_cols = target.store_tbl.rowid_columns()
|
|
550
645
|
key_vals = rowids
|
|
@@ -567,8 +662,7 @@ class Planner:
|
|
|
567
662
|
for col in target.cols_by_id.values()
|
|
568
663
|
if col.is_stored and col not in updated_cols and col not in recomputed_base_cols
|
|
569
664
|
]
|
|
570
|
-
select_list: list[exprs.Expr] = [exprs.ColumnRef(col) for col in
|
|
571
|
-
select_list.extend(exprs.ColumnRef(col) for col in updated_cols)
|
|
665
|
+
select_list: list[exprs.Expr] = [exprs.ColumnRef(col) for col in updated_cols]
|
|
572
666
|
|
|
573
667
|
recomputed_exprs = [
|
|
574
668
|
c.value_expr.copy().resolve_computed_cols(resolve_cols=recomputed_base_cols) for c in recomputed_base_cols
|
|
@@ -586,23 +680,37 @@ class Planner:
|
|
|
586
680
|
)
|
|
587
681
|
row_builder = exprs.RowBuilder(analyzer.all_exprs, [], sql_exprs, target)
|
|
588
682
|
analyzer.finalize(row_builder)
|
|
589
|
-
|
|
683
|
+
|
|
684
|
+
cell_md_col_refs = cls._cell_md_col_refs(sql_exprs)
|
|
685
|
+
sql_lookup_node = exec.SqlLookupNode(
|
|
686
|
+
tbl,
|
|
687
|
+
row_builder,
|
|
688
|
+
sql_exprs,
|
|
689
|
+
columns=copied_cols,
|
|
690
|
+
sa_key_cols=sa_key_cols,
|
|
691
|
+
key_vals=key_vals,
|
|
692
|
+
cell_md_col_refs=cell_md_col_refs,
|
|
693
|
+
)
|
|
590
694
|
col_vals = [{col: row[col].val for col in updated_cols} for row in batch]
|
|
591
695
|
row_update_node = exec.RowUpdateNode(tbl, key_vals, len(rowids) > 0, col_vals, row_builder, sql_lookup_node)
|
|
592
696
|
plan: exec.ExecNode = row_update_node
|
|
593
697
|
if not cls._is_contained_in(analyzer.select_list, sql_exprs):
|
|
594
698
|
# we need an ExprEvalNode to evaluate the remaining output exprs
|
|
595
699
|
plan = exec.ExprEvalNode(row_builder, analyzer.select_list, sql_exprs, input=plan)
|
|
700
|
+
|
|
596
701
|
# update row builder with column information
|
|
597
|
-
|
|
702
|
+
evaluated_cols = list(updated_cols) + list(recomputed_base_cols) # same order as select_list
|
|
598
703
|
row_builder.set_slot_idxs(select_list, remove_duplicates=False)
|
|
599
|
-
|
|
704
|
+
plan.row_builder.add_table_columns(copied_cols)
|
|
705
|
+
for i, col in enumerate(evaluated_cols):
|
|
600
706
|
plan.row_builder.add_table_column(col, select_list[i].slot_idx)
|
|
601
707
|
ctx = exec.ExecContext(row_builder, num_computed_exprs=len(recomputed_exprs))
|
|
602
|
-
#
|
|
708
|
+
# TODO: correct batch size?
|
|
603
709
|
ctx.batch_size = 0
|
|
604
710
|
plan.set_ctx(ctx)
|
|
605
|
-
|
|
711
|
+
|
|
712
|
+
plan = cls._add_cell_materialization_node(plan)
|
|
713
|
+
plan = cls._add_save_node(plan)
|
|
606
714
|
recomputed_user_cols = [c for c in recomputed_cols if c.name is not None]
|
|
607
715
|
return (
|
|
608
716
|
plan,
|
|
@@ -653,10 +761,11 @@ class Planner:
|
|
|
653
761
|
exact_version_only=view.get_bases(),
|
|
654
762
|
)
|
|
655
763
|
plan.ctx.num_computed_exprs = len(recomputed_exprs)
|
|
656
|
-
|
|
764
|
+
materialized_cols = copied_cols + list(recomputed_cols) # same order as select_list
|
|
765
|
+
for i, col in enumerate(materialized_cols):
|
|
657
766
|
plan.row_builder.add_table_column(col, select_list[i].slot_idx)
|
|
658
|
-
|
|
659
|
-
plan = cls.
|
|
767
|
+
plan = cls._add_cell_materialization_node(plan)
|
|
768
|
+
plan = cls._add_save_node(plan)
|
|
660
769
|
|
|
661
770
|
return plan
|
|
662
771
|
|
|
@@ -726,7 +835,9 @@ class Planner:
|
|
|
726
835
|
|
|
727
836
|
exec_ctx.ignore_errors = True
|
|
728
837
|
plan.set_ctx(exec_ctx)
|
|
729
|
-
|
|
838
|
+
if any(c.col_type.is_json_type() or c.col_type.is_array_type() for c in stored_cols):
|
|
839
|
+
plan = exec.CellMaterializationNode(plan)
|
|
840
|
+
plan = cls._add_save_node(plan)
|
|
730
841
|
|
|
731
842
|
return plan, len(row_builder.default_eval_ctx.target_exprs)
|
|
732
843
|
|
|
@@ -773,15 +884,13 @@ class Planner:
|
|
|
773
884
|
return combined_ordering
|
|
774
885
|
|
|
775
886
|
@classmethod
|
|
776
|
-
def
|
|
777
|
-
|
|
778
|
-
|
|
779
|
-
|
|
780
|
-
if len(stored_media_cols) == 0:
|
|
887
|
+
def _add_save_node(cls, input_node: exec.ExecNode) -> exec.ExecNode:
|
|
888
|
+
"""Add an ObjectStoreSaveNode, if needed."""
|
|
889
|
+
media_col_info = input_node.row_builder.media_output_col_info
|
|
890
|
+
if len(media_col_info) == 0:
|
|
781
891
|
return input_node
|
|
782
|
-
|
|
783
|
-
|
|
784
|
-
return save_node
|
|
892
|
+
else:
|
|
893
|
+
return exec.ObjectStoreSaveNode(media_col_info, input_node)
|
|
785
894
|
|
|
786
895
|
@classmethod
|
|
787
896
|
def _is_contained_in(cls, l1: Iterable[exprs.Expr], l2: Iterable[exprs.Expr]) -> bool:
|
|
@@ -789,10 +898,10 @@ class Planner:
|
|
|
789
898
|
return {e.id for e in l1} <= {e.id for e in l2}
|
|
790
899
|
|
|
791
900
|
@classmethod
|
|
792
|
-
def
|
|
901
|
+
def _add_prefetch_node(
|
|
793
902
|
cls, tbl_id: UUID, expressions: Iterable[exprs.Expr], input_node: exec.ExecNode
|
|
794
903
|
) -> exec.ExecNode:
|
|
795
|
-
"""
|
|
904
|
+
"""Add a CachePrefetch node, if needed."""
|
|
796
905
|
# we prefetch external files for all media ColumnRefs, even those that aren't part of the dependencies
|
|
797
906
|
# of output_exprs: if unstored iterator columns are present, we might need to materialize ColumnRefs that
|
|
798
907
|
# aren't explicitly captured as dependencies
|
|
@@ -808,21 +917,30 @@ class Planner:
|
|
|
808
917
|
def create_query_plan(
|
|
809
918
|
cls,
|
|
810
919
|
from_clause: FromClause,
|
|
811
|
-
select_list:
|
|
812
|
-
|
|
813
|
-
|
|
814
|
-
|
|
815
|
-
|
|
816
|
-
|
|
920
|
+
select_list: list[exprs.Expr] | None = None,
|
|
921
|
+
columns: list[catalog.Column] | None = None,
|
|
922
|
+
where_clause: exprs.Expr | None = None,
|
|
923
|
+
group_by_clause: list[exprs.Expr] | None = None,
|
|
924
|
+
order_by_clause: list[tuple[exprs.Expr, bool]] | None = None,
|
|
925
|
+
limit: exprs.Expr | None = None,
|
|
926
|
+
sample_clause: SampleClause | None = None,
|
|
817
927
|
ignore_errors: bool = False,
|
|
818
|
-
exact_version_only:
|
|
928
|
+
exact_version_only: list[catalog.TableVersionHandle] | None = None,
|
|
819
929
|
) -> exec.ExecNode:
|
|
820
|
-
"""
|
|
930
|
+
"""
|
|
931
|
+
Return plan for executing a query.
|
|
932
|
+
|
|
933
|
+
The plan:
|
|
934
|
+
- materializes the values of select_list exprs into their respective slots
|
|
935
|
+
- materializes cell values of 'columns' (and their cellmd, if applicable) into DataRow.cell_vals/cell_md
|
|
936
|
+
|
|
821
937
|
Updates 'select_list' in place to make it executable.
|
|
822
938
|
TODO: make exact_version_only a flag and use the versions from tbl
|
|
823
939
|
"""
|
|
824
940
|
if select_list is None:
|
|
825
941
|
select_list = []
|
|
942
|
+
if columns is None:
|
|
943
|
+
columns = []
|
|
826
944
|
if order_by_clause is None:
|
|
827
945
|
order_by_clause = []
|
|
828
946
|
if exact_version_only is None:
|
|
@@ -850,6 +968,7 @@ class Planner:
|
|
|
850
968
|
row_builder=row_builder,
|
|
851
969
|
analyzer=analyzer,
|
|
852
970
|
eval_ctx=eval_ctx,
|
|
971
|
+
columns=columns,
|
|
853
972
|
limit=limit,
|
|
854
973
|
with_pk=True,
|
|
855
974
|
exact_version_only=exact_version_only,
|
|
@@ -865,9 +984,10 @@ class Planner:
|
|
|
865
984
|
row_builder: exprs.RowBuilder,
|
|
866
985
|
analyzer: Analyzer,
|
|
867
986
|
eval_ctx: exprs.RowBuilder.EvalCtx,
|
|
987
|
+
columns: list[catalog.Column] | None = None,
|
|
868
988
|
limit: Optional[exprs.Expr] = None,
|
|
869
989
|
with_pk: bool = False,
|
|
870
|
-
exact_version_only:
|
|
990
|
+
exact_version_only: list[catalog.TableVersionHandle] | None = None,
|
|
871
991
|
) -> exec.ExecNode:
|
|
872
992
|
"""
|
|
873
993
|
Create plan to materialize eval_ctx.
|
|
@@ -877,6 +997,8 @@ class Planner:
|
|
|
877
997
|
in the context of that table version (eg, if 'tbl' is a view, 'plan_target' might be the base)
|
|
878
998
|
TODO: make exact_version_only a flag and use the versions from tbl
|
|
879
999
|
"""
|
|
1000
|
+
if columns is None:
|
|
1001
|
+
columns = []
|
|
880
1002
|
if exact_version_only is None:
|
|
881
1003
|
exact_version_only = []
|
|
882
1004
|
sql_elements = analyzer.sql_elements
|
|
@@ -934,8 +1056,15 @@ class Planner:
|
|
|
934
1056
|
traverse_matches=False,
|
|
935
1057
|
)
|
|
936
1058
|
)
|
|
1059
|
+
|
|
937
1060
|
plan = exec.SqlScanNode(
|
|
938
|
-
tbl,
|
|
1061
|
+
tbl,
|
|
1062
|
+
row_builder,
|
|
1063
|
+
select_list=tbl_scan_exprs,
|
|
1064
|
+
columns=[c for c in columns if c.tbl.id == tbl.tbl_id],
|
|
1065
|
+
set_pk=with_pk,
|
|
1066
|
+
cell_md_col_refs=cls._cell_md_col_refs(tbl_scan_exprs),
|
|
1067
|
+
exact_version_only=exact_version_only,
|
|
939
1068
|
)
|
|
940
1069
|
tbl_scan_plans.append(plan)
|
|
941
1070
|
|
|
@@ -966,7 +1095,8 @@ class Planner:
|
|
|
966
1095
|
stratify_exprs=analyzer.stratify_exprs,
|
|
967
1096
|
)
|
|
968
1097
|
|
|
969
|
-
plan = cls.
|
|
1098
|
+
plan = cls._add_prefetch_node(tbl.tbl_version.id, row_builder.unique_exprs, plan)
|
|
1099
|
+
plan = cls._add_cell_reconstruction_node(analyzer.all_exprs, plan)
|
|
970
1100
|
|
|
971
1101
|
if analyzer.group_by_clause is not None:
|
|
972
1102
|
# we're doing grouping aggregation; the input of the AggregateNode are the grouping exprs plus the
|
|
@@ -1010,7 +1140,7 @@ class Planner:
|
|
|
1010
1140
|
if not agg_output.issuperset(exprs.ExprSet(eval_ctx.target_exprs)):
|
|
1011
1141
|
# we need an ExprEvalNode to evaluate the remaining output exprs
|
|
1012
1142
|
plan = exec.ExprEvalNode(row_builder, eval_ctx.target_exprs, agg_output, input=plan)
|
|
1013
|
-
plan = cls.
|
|
1143
|
+
plan = cls._add_save_node(plan)
|
|
1014
1144
|
else:
|
|
1015
1145
|
if not exprs.ExprSet(sql_exprs).issuperset(exprs.ExprSet(eval_ctx.target_exprs)):
|
|
1016
1146
|
# we need an ExprEvalNode to evaluate the remaining output exprs
|
|
@@ -1062,7 +1192,6 @@ class Planner:
|
|
|
1062
1192
|
plan.ctx.ignore_errors = True
|
|
1063
1193
|
computed_exprs = row_builder.output_exprs - row_builder.input_exprs
|
|
1064
1194
|
plan.ctx.num_computed_exprs = len(computed_exprs) # we are adding a computed column, so we need to evaluate it
|
|
1065
|
-
|
|
1066
|
-
plan = cls._insert_save_node(tbl.tbl_version.id, row_builder.stored_media_cols, input_node=plan)
|
|
1195
|
+
plan = cls._add_save_node(plan)
|
|
1067
1196
|
|
|
1068
1197
|
return plan
|