pixeltable 0.4.15__py3-none-any.whl → 0.4.17__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of pixeltable might be problematic. Click here for more details.

Files changed (68) hide show
  1. pixeltable/__init__.py +4 -0
  2. pixeltable/catalog/catalog.py +125 -63
  3. pixeltable/catalog/column.py +7 -2
  4. pixeltable/catalog/table.py +1 -0
  5. pixeltable/catalog/table_metadata.py +4 -0
  6. pixeltable/catalog/table_version.py +174 -117
  7. pixeltable/catalog/table_version_handle.py +4 -1
  8. pixeltable/catalog/table_version_path.py +0 -11
  9. pixeltable/catalog/view.py +6 -0
  10. pixeltable/config.py +7 -0
  11. pixeltable/dataframe.py +10 -5
  12. pixeltable/env.py +56 -19
  13. pixeltable/exec/__init__.py +2 -0
  14. pixeltable/exec/cell_materialization_node.py +231 -0
  15. pixeltable/exec/cell_reconstruction_node.py +135 -0
  16. pixeltable/exec/exec_node.py +1 -1
  17. pixeltable/exec/expr_eval/evaluators.py +1 -0
  18. pixeltable/exec/expr_eval/expr_eval_node.py +3 -0
  19. pixeltable/exec/expr_eval/globals.py +2 -0
  20. pixeltable/exec/globals.py +32 -0
  21. pixeltable/exec/object_store_save_node.py +1 -4
  22. pixeltable/exec/row_update_node.py +16 -9
  23. pixeltable/exec/sql_node.py +107 -14
  24. pixeltable/exprs/__init__.py +1 -1
  25. pixeltable/exprs/arithmetic_expr.py +23 -18
  26. pixeltable/exprs/column_property_ref.py +10 -10
  27. pixeltable/exprs/column_ref.py +2 -2
  28. pixeltable/exprs/data_row.py +106 -37
  29. pixeltable/exprs/expr.py +9 -0
  30. pixeltable/exprs/expr_set.py +14 -7
  31. pixeltable/exprs/inline_expr.py +2 -19
  32. pixeltable/exprs/json_path.py +45 -12
  33. pixeltable/exprs/row_builder.py +54 -22
  34. pixeltable/functions/__init__.py +1 -0
  35. pixeltable/functions/bedrock.py +7 -0
  36. pixeltable/functions/deepseek.py +11 -4
  37. pixeltable/functions/llama_cpp.py +7 -0
  38. pixeltable/functions/math.py +1 -1
  39. pixeltable/functions/ollama.py +7 -0
  40. pixeltable/functions/openai.py +4 -4
  41. pixeltable/functions/openrouter.py +143 -0
  42. pixeltable/functions/video.py +110 -28
  43. pixeltable/globals.py +10 -4
  44. pixeltable/io/globals.py +18 -17
  45. pixeltable/io/parquet.py +1 -1
  46. pixeltable/io/table_data_conduit.py +47 -22
  47. pixeltable/iterators/document.py +61 -23
  48. pixeltable/iterators/video.py +126 -53
  49. pixeltable/metadata/__init__.py +1 -1
  50. pixeltable/metadata/converters/convert_40.py +73 -0
  51. pixeltable/metadata/notes.py +1 -0
  52. pixeltable/plan.py +175 -46
  53. pixeltable/share/packager.py +155 -26
  54. pixeltable/store.py +2 -3
  55. pixeltable/type_system.py +5 -3
  56. pixeltable/utils/arrow.py +6 -6
  57. pixeltable/utils/av.py +65 -0
  58. pixeltable/utils/console_output.py +4 -1
  59. pixeltable/utils/exception_handler.py +5 -28
  60. pixeltable/utils/image.py +7 -0
  61. pixeltable/utils/misc.py +5 -0
  62. pixeltable/utils/object_stores.py +16 -1
  63. pixeltable/utils/s3_store.py +44 -11
  64. {pixeltable-0.4.15.dist-info → pixeltable-0.4.17.dist-info}/METADATA +29 -28
  65. {pixeltable-0.4.15.dist-info → pixeltable-0.4.17.dist-info}/RECORD +68 -61
  66. {pixeltable-0.4.15.dist-info → pixeltable-0.4.17.dist-info}/WHEEL +0 -0
  67. {pixeltable-0.4.15.dist-info → pixeltable-0.4.17.dist-info}/entry_points.txt +0 -0
  68. {pixeltable-0.4.15.dist-info → pixeltable-0.4.17.dist-info}/licenses/LICENSE +0 -0
pixeltable/plan.py CHANGED
@@ -3,9 +3,10 @@ from __future__ import annotations
3
3
  import dataclasses
4
4
  import enum
5
5
  from textwrap import dedent
6
- from typing import Any, Iterable, Literal, Optional, Sequence
6
+ from typing import Any, Iterable, Literal, Optional, Sequence, cast
7
7
  from uuid import UUID
8
8
 
9
+ import pgvector.sqlalchemy # type: ignore[import-untyped]
9
10
  import sqlalchemy as sql
10
11
 
11
12
  import pixeltable as pxt
@@ -385,7 +386,7 @@ class Planner:
385
386
  TableVersionHandle(tbl.id, tbl.effective_version), rows, row_builder, tbl.next_row_id
386
387
  )
387
388
 
388
- plan = cls._insert_prefetch_node(tbl.id, row_builder.input_exprs, input_node=plan)
389
+ plan = cls._add_prefetch_node(tbl.id, row_builder.input_exprs, input_node=plan)
389
390
 
390
391
  computed_exprs = row_builder.output_exprs - row_builder.input_exprs
391
392
  if len(computed_exprs) > 0:
@@ -393,6 +394,8 @@ class Planner:
393
394
  plan = exec.ExprEvalNode(
394
395
  row_builder, computed_exprs, plan.output_exprs, input=plan, maintain_input_order=False
395
396
  )
397
+ if any(c.col_type.is_json_type() or c.col_type.is_array_type() for c in stored_cols):
398
+ plan = exec.CellMaterializationNode(plan)
396
399
 
397
400
  plan.set_ctx(
398
401
  exec.ExecContext(
@@ -403,7 +406,7 @@ class Planner:
403
406
  ignore_errors=ignore_errors,
404
407
  )
405
408
  )
406
- plan = cls._insert_save_node(tbl.id, row_builder.stored_media_cols, input_node=plan)
409
+ plan = cls._add_save_node(plan)
407
410
 
408
411
  return plan
409
412
 
@@ -422,10 +425,17 @@ class Planner:
422
425
  plan = df._create_query_plan() # ExecNode constructed by the DataFrame
423
426
 
424
427
  # Modify the plan RowBuilder to register the output columns
428
+ needs_cell_materialization = False
425
429
  for col_name, expr in zip(df.schema.keys(), df._select_list_exprs):
426
430
  assert col_name in tbl.cols_by_name
427
431
  col = tbl.cols_by_name[col_name]
428
432
  plan.row_builder.add_table_column(col, expr.slot_idx)
433
+ needs_cell_materialization = (
434
+ needs_cell_materialization or col.col_type.is_json_type() or col.col_type.is_array_type()
435
+ )
436
+
437
+ if needs_cell_materialization:
438
+ plan = exec.CellMaterializationNode(plan)
429
439
 
430
440
  plan.set_ctx(
431
441
  exec.ExecContext(
@@ -446,12 +456,14 @@ class Planner:
446
456
  cascade: bool,
447
457
  ) -> tuple[exec.ExecNode, list[str], list[catalog.Column]]:
448
458
  """Creates a plan to materialize updated rows.
459
+
449
460
  The plan:
450
461
  - retrieves rows that are visible at the current version of the table
451
462
  - materializes all stored columns and the update targets
452
463
  - if cascade is True, recomputes all computed columns that transitively depend on the updated columns
453
464
  and copies the values of all other stored columns
454
465
  - if cascade is False, copies all columns that aren't update targets from the original rows
466
+
455
467
  Returns:
456
468
  - root node of the plan
457
469
  - list of qualified column names that are getting updated
@@ -477,14 +489,16 @@ class Planner:
477
489
 
478
490
  cls.__check_valid_columns(tbl.tbl_version.get(), recomputed_cols, 'updated in')
479
491
 
492
+ # our query plan
493
+ # - evaluates the update targets and recomputed columns
494
+ # - copies all other stored columns
480
495
  recomputed_base_cols = {col for col in recomputed_cols if col.tbl.id == tbl.tbl_version.id}
481
496
  copied_cols = [
482
497
  col
483
498
  for col in target.cols_by_id.values()
484
499
  if col.is_stored and col not in updated_cols and col not in recomputed_base_cols
485
500
  ]
486
- select_list: list[exprs.Expr] = [exprs.ColumnRef(col) for col in copied_cols]
487
- select_list.extend(update_targets.values())
501
+ select_list: list[exprs.Expr] = list(update_targets.values())
488
502
 
489
503
  recomputed_exprs = [
490
504
  c.value_expr.copy().resolve_computed_cols(resolve_cols=recomputed_base_cols) for c in recomputed_base_cols
@@ -495,14 +509,22 @@ class Planner:
495
509
  select_list.extend(recomputed_exprs)
496
510
 
497
511
  # we need to retrieve the PK columns of the existing rows
498
- plan = cls.create_query_plan(FromClause(tbls=[tbl]), select_list, where_clause=where_clause, ignore_errors=True)
499
- all_base_cols = copied_cols + updated_cols + list(recomputed_base_cols) # same order as select_list
512
+ plan = cls.create_query_plan(
513
+ FromClause(tbls=[tbl]),
514
+ select_list=select_list,
515
+ columns=copied_cols,
516
+ where_clause=where_clause,
517
+ ignore_errors=True,
518
+ )
519
+ evaluated_cols = updated_cols + list(recomputed_base_cols) # same order as select_list
500
520
  # update row builder with column information
501
- for i, col in enumerate(all_base_cols):
521
+ plan.row_builder.add_table_columns(copied_cols)
522
+ for i, col in enumerate(evaluated_cols):
502
523
  plan.row_builder.add_table_column(col, select_list[i].slot_idx)
503
524
  plan.ctx.num_computed_exprs = len(recomputed_exprs)
504
525
 
505
- plan = cls._insert_save_node(tbl.tbl_version.id, plan.row_builder.stored_media_cols, input_node=plan)
526
+ plan = cls._add_cell_materialization_node(plan)
527
+ plan = cls._add_save_node(plan)
506
528
 
507
529
  recomputed_user_cols = [c for c in recomputed_cols if c.name is not None]
508
530
  return plan, [f'{c.tbl.name}.{c.name}' for c in updated_cols + recomputed_user_cols], recomputed_user_cols
@@ -525,6 +547,79 @@ class Planner:
525
547
  .format(validation_error=col.value_expr.validation_error)
526
548
  )
527
549
 
550
+ @classmethod
551
+ def _cell_md_col_refs(cls, expr_list: Iterable[exprs.Expr]) -> list[exprs.ColumnRef]:
552
+ """Return list of ColumnRefs that need their cellmd values for reconstruction"""
553
+ json_col_refs = list(
554
+ exprs.Expr.list_subexprs(
555
+ expr_list,
556
+ expr_class=exprs.ColumnRef,
557
+ filter=lambda e: cast(exprs.ColumnRef, e).col.col_type.is_json_type(),
558
+ traverse_matches=False,
559
+ )
560
+ )
561
+
562
+ def needs_reconstruction(e: exprs.Expr) -> bool:
563
+ assert isinstance(e, exprs.ColumnRef)
564
+ # Vector-typed array columns are used for vector indexes, and are stored in the db
565
+ return e.col.col_type.is_array_type() and not isinstance(e.col.sa_col_type, pgvector.sqlalchemy.Vector)
566
+
567
+ array_col_refs = list(
568
+ exprs.Expr.list_subexprs(
569
+ expr_list, expr_class=exprs.ColumnRef, filter=needs_reconstruction, traverse_matches=False
570
+ )
571
+ )
572
+
573
+ return json_col_refs + array_col_refs
574
+
575
+ @classmethod
576
+ def _add_cell_materialization_node(cls, input: exec.ExecNode) -> exec.ExecNode:
577
+ # we need a CellMaterializationNode if any of the evaluated output columns are json or array-typed
578
+ has_target_cols = any(
579
+ col.col_type.is_json_type() or col.col_type.is_array_type()
580
+ for col, slot_idx in input.row_builder.table_columns.items()
581
+ if slot_idx is not None
582
+ )
583
+ if has_target_cols:
584
+ return exec.CellMaterializationNode(input)
585
+ else:
586
+ return input
587
+
588
+ @classmethod
589
+ def _add_cell_reconstruction_node(cls, expr_list: list[exprs.Expr], input: exec.ExecNode) -> exec.ExecNode:
590
+ """
591
+ Add a CellReconstructionNode, if required by any of the exprs in expr_list.
592
+
593
+ Cell reconstruction is required for
594
+ 1) all json-typed ColumnRefs that are not used as part of a JsonPath (the latter does its own reconstruction)
595
+ or as part of a ColumnPropertyRef
596
+ 2) all array-typed ColumnRefs that are not used as part of a ColumnPropertyRef
597
+ """
598
+
599
+ def json_filter(e: exprs.Expr) -> bool:
600
+ if isinstance(e, exprs.JsonPath):
601
+ return not e.is_relative_path() and isinstance(e.anchor, exprs.ColumnRef)
602
+ if isinstance(e, exprs.ColumnPropertyRef):
603
+ return e.col_ref.col.col_type.is_json_type()
604
+ return isinstance(e, exprs.ColumnRef) and e.col.col_type.is_json_type()
605
+
606
+ def array_filter(e: exprs.Expr) -> bool:
607
+ if isinstance(e, exprs.ColumnPropertyRef):
608
+ return e.col_ref.col.col_type.is_array_type()
609
+ if not isinstance(e, exprs.ColumnRef):
610
+ return False
611
+ # Vector-typed array columns are used for vector indexes, and are stored in the db
612
+ return e.col.col_type.is_array_type() and not isinstance(e.col.sa_col_type, pgvector.sqlalchemy.Vector)
613
+
614
+ json_candidates = list(exprs.Expr.list_subexprs(expr_list, filter=json_filter, traverse_matches=False))
615
+ json_refs = [e for e in json_candidates if isinstance(e, exprs.ColumnRef)]
616
+ array_candidates = list(exprs.Expr.list_subexprs(expr_list, filter=array_filter, traverse_matches=False))
617
+ array_refs = [e for e in array_candidates if isinstance(e, exprs.ColumnRef)]
618
+ if len(json_refs) > 0 or len(array_refs) > 0:
619
+ return exec.CellReconstructionNode(json_refs, array_refs, input.row_builder, input=input)
620
+ else:
621
+ return input
622
+
528
623
  @classmethod
529
624
  def create_batch_update_plan(
530
625
  cls,
@@ -543,8 +638,8 @@ class Planner:
543
638
  """
544
639
  assert isinstance(tbl, catalog.TableVersionPath)
545
640
  target = tbl.tbl_version.get() # the one we need to update
546
- sa_key_cols: list[sql.Column] = []
547
- key_vals: list[tuple] = []
641
+ sa_key_cols: list[sql.Column]
642
+ key_vals: list[tuple]
548
643
  if len(rowids) > 0:
549
644
  sa_key_cols = target.store_tbl.rowid_columns()
550
645
  key_vals = rowids
@@ -567,8 +662,7 @@ class Planner:
567
662
  for col in target.cols_by_id.values()
568
663
  if col.is_stored and col not in updated_cols and col not in recomputed_base_cols
569
664
  ]
570
- select_list: list[exprs.Expr] = [exprs.ColumnRef(col) for col in copied_cols]
571
- select_list.extend(exprs.ColumnRef(col) for col in updated_cols)
665
+ select_list: list[exprs.Expr] = [exprs.ColumnRef(col) for col in updated_cols]
572
666
 
573
667
  recomputed_exprs = [
574
668
  c.value_expr.copy().resolve_computed_cols(resolve_cols=recomputed_base_cols) for c in recomputed_base_cols
@@ -586,23 +680,37 @@ class Planner:
586
680
  )
587
681
  row_builder = exprs.RowBuilder(analyzer.all_exprs, [], sql_exprs, target)
588
682
  analyzer.finalize(row_builder)
589
- sql_lookup_node = exec.SqlLookupNode(tbl, row_builder, sql_exprs, sa_key_cols, key_vals)
683
+
684
+ cell_md_col_refs = cls._cell_md_col_refs(sql_exprs)
685
+ sql_lookup_node = exec.SqlLookupNode(
686
+ tbl,
687
+ row_builder,
688
+ sql_exprs,
689
+ columns=copied_cols,
690
+ sa_key_cols=sa_key_cols,
691
+ key_vals=key_vals,
692
+ cell_md_col_refs=cell_md_col_refs,
693
+ )
590
694
  col_vals = [{col: row[col].val for col in updated_cols} for row in batch]
591
695
  row_update_node = exec.RowUpdateNode(tbl, key_vals, len(rowids) > 0, col_vals, row_builder, sql_lookup_node)
592
696
  plan: exec.ExecNode = row_update_node
593
697
  if not cls._is_contained_in(analyzer.select_list, sql_exprs):
594
698
  # we need an ExprEvalNode to evaluate the remaining output exprs
595
699
  plan = exec.ExprEvalNode(row_builder, analyzer.select_list, sql_exprs, input=plan)
700
+
596
701
  # update row builder with column information
597
- all_base_cols = copied_cols + list(updated_cols) + list(recomputed_base_cols) # same order as select_list
702
+ evaluated_cols = list(updated_cols) + list(recomputed_base_cols) # same order as select_list
598
703
  row_builder.set_slot_idxs(select_list, remove_duplicates=False)
599
- for i, col in enumerate(all_base_cols):
704
+ plan.row_builder.add_table_columns(copied_cols)
705
+ for i, col in enumerate(evaluated_cols):
600
706
  plan.row_builder.add_table_column(col, select_list[i].slot_idx)
601
707
  ctx = exec.ExecContext(row_builder, num_computed_exprs=len(recomputed_exprs))
602
- # we're returning everything to the user, so we might as well do it in a single batch
708
+ # TODO: correct batch size?
603
709
  ctx.batch_size = 0
604
710
  plan.set_ctx(ctx)
605
- plan = cls._insert_save_node(tbl.tbl_version.id, plan.row_builder.stored_media_cols, input_node=plan)
711
+
712
+ plan = cls._add_cell_materialization_node(plan)
713
+ plan = cls._add_save_node(plan)
606
714
  recomputed_user_cols = [c for c in recomputed_cols if c.name is not None]
607
715
  return (
608
716
  plan,
@@ -653,10 +761,11 @@ class Planner:
653
761
  exact_version_only=view.get_bases(),
654
762
  )
655
763
  plan.ctx.num_computed_exprs = len(recomputed_exprs)
656
- for i, col in enumerate(copied_cols + list(recomputed_cols)): # same order as select_list
764
+ materialized_cols = copied_cols + list(recomputed_cols) # same order as select_list
765
+ for i, col in enumerate(materialized_cols):
657
766
  plan.row_builder.add_table_column(col, select_list[i].slot_idx)
658
- # TODO: avoid duplication with view_load_plan() logic (where does this belong?)
659
- plan = cls._insert_save_node(view.tbl_version.id, plan.row_builder.stored_media_cols, input_node=plan)
767
+ plan = cls._add_cell_materialization_node(plan)
768
+ plan = cls._add_save_node(plan)
660
769
 
661
770
  return plan
662
771
 
@@ -726,7 +835,9 @@ class Planner:
726
835
 
727
836
  exec_ctx.ignore_errors = True
728
837
  plan.set_ctx(exec_ctx)
729
- plan = cls._insert_save_node(view.tbl_version.id, plan.row_builder.stored_media_cols, input_node=plan)
838
+ if any(c.col_type.is_json_type() or c.col_type.is_array_type() for c in stored_cols):
839
+ plan = exec.CellMaterializationNode(plan)
840
+ plan = cls._add_save_node(plan)
730
841
 
731
842
  return plan, len(row_builder.default_eval_ctx.target_exprs)
732
843
 
@@ -773,15 +884,13 @@ class Planner:
773
884
  return combined_ordering
774
885
 
775
886
  @classmethod
776
- def _insert_save_node(
777
- cls, tbl_id: UUID, stored_media_cols: list[exprs.ColumnSlotIdx], input_node: exec.ExecNode
778
- ) -> exec.ExecNode:
779
- """Return an ObjectStoreSaveNode if stored media columns are present, otherwise return input"""
780
- if len(stored_media_cols) == 0:
887
+ def _add_save_node(cls, input_node: exec.ExecNode) -> exec.ExecNode:
888
+ """Add an ObjectStoreSaveNode, if needed."""
889
+ media_col_info = input_node.row_builder.media_output_col_info
890
+ if len(media_col_info) == 0:
781
891
  return input_node
782
- save_node = exec.ObjectStoreSaveNode(tbl_id, stored_media_cols, input_node)
783
- save_node.set_ctx(input_node.ctx)
784
- return save_node
892
+ else:
893
+ return exec.ObjectStoreSaveNode(media_col_info, input_node)
785
894
 
786
895
  @classmethod
787
896
  def _is_contained_in(cls, l1: Iterable[exprs.Expr], l2: Iterable[exprs.Expr]) -> bool:
@@ -789,10 +898,10 @@ class Planner:
789
898
  return {e.id for e in l1} <= {e.id for e in l2}
790
899
 
791
900
  @classmethod
792
- def _insert_prefetch_node(
901
+ def _add_prefetch_node(
793
902
  cls, tbl_id: UUID, expressions: Iterable[exprs.Expr], input_node: exec.ExecNode
794
903
  ) -> exec.ExecNode:
795
- """Return a node to prefetch data if needed, otherwise return input"""
904
+ """Add a CachePrefetch node, if needed."""
796
905
  # we prefetch external files for all media ColumnRefs, even those that aren't part of the dependencies
797
906
  # of output_exprs: if unstored iterator columns are present, we might need to materialize ColumnRefs that
798
907
  # aren't explicitly captured as dependencies
@@ -808,21 +917,30 @@ class Planner:
808
917
  def create_query_plan(
809
918
  cls,
810
919
  from_clause: FromClause,
811
- select_list: Optional[list[exprs.Expr]] = None,
812
- where_clause: Optional[exprs.Expr] = None,
813
- group_by_clause: Optional[list[exprs.Expr]] = None,
814
- order_by_clause: Optional[list[tuple[exprs.Expr, bool]]] = None,
815
- limit: Optional[exprs.Expr] = None,
816
- sample_clause: Optional[SampleClause] = None,
920
+ select_list: list[exprs.Expr] | None = None,
921
+ columns: list[catalog.Column] | None = None,
922
+ where_clause: exprs.Expr | None = None,
923
+ group_by_clause: list[exprs.Expr] | None = None,
924
+ order_by_clause: list[tuple[exprs.Expr, bool]] | None = None,
925
+ limit: exprs.Expr | None = None,
926
+ sample_clause: SampleClause | None = None,
817
927
  ignore_errors: bool = False,
818
- exact_version_only: Optional[list[catalog.TableVersionHandle]] = None,
928
+ exact_version_only: list[catalog.TableVersionHandle] | None = None,
819
929
  ) -> exec.ExecNode:
820
- """Return plan for executing a query.
930
+ """
931
+ Return plan for executing a query.
932
+
933
+ The plan:
934
+ - materializes the values of select_list exprs into their respective slots
935
+ - materializes cell values of 'columns' (and their cellmd, if applicable) into DataRow.cell_vals/cell_md
936
+
821
937
  Updates 'select_list' in place to make it executable.
822
938
  TODO: make exact_version_only a flag and use the versions from tbl
823
939
  """
824
940
  if select_list is None:
825
941
  select_list = []
942
+ if columns is None:
943
+ columns = []
826
944
  if order_by_clause is None:
827
945
  order_by_clause = []
828
946
  if exact_version_only is None:
@@ -850,6 +968,7 @@ class Planner:
850
968
  row_builder=row_builder,
851
969
  analyzer=analyzer,
852
970
  eval_ctx=eval_ctx,
971
+ columns=columns,
853
972
  limit=limit,
854
973
  with_pk=True,
855
974
  exact_version_only=exact_version_only,
@@ -865,9 +984,10 @@ class Planner:
865
984
  row_builder: exprs.RowBuilder,
866
985
  analyzer: Analyzer,
867
986
  eval_ctx: exprs.RowBuilder.EvalCtx,
987
+ columns: list[catalog.Column] | None = None,
868
988
  limit: Optional[exprs.Expr] = None,
869
989
  with_pk: bool = False,
870
- exact_version_only: Optional[list[catalog.TableVersionHandle]] = None,
990
+ exact_version_only: list[catalog.TableVersionHandle] | None = None,
871
991
  ) -> exec.ExecNode:
872
992
  """
873
993
  Create plan to materialize eval_ctx.
@@ -877,6 +997,8 @@ class Planner:
877
997
  in the context of that table version (eg, if 'tbl' is a view, 'plan_target' might be the base)
878
998
  TODO: make exact_version_only a flag and use the versions from tbl
879
999
  """
1000
+ if columns is None:
1001
+ columns = []
880
1002
  if exact_version_only is None:
881
1003
  exact_version_only = []
882
1004
  sql_elements = analyzer.sql_elements
@@ -934,8 +1056,15 @@ class Planner:
934
1056
  traverse_matches=False,
935
1057
  )
936
1058
  )
1059
+
937
1060
  plan = exec.SqlScanNode(
938
- tbl, row_builder, select_list=tbl_scan_exprs, set_pk=with_pk, exact_version_only=exact_version_only
1061
+ tbl,
1062
+ row_builder,
1063
+ select_list=tbl_scan_exprs,
1064
+ columns=[c for c in columns if c.tbl.id == tbl.tbl_id],
1065
+ set_pk=with_pk,
1066
+ cell_md_col_refs=cls._cell_md_col_refs(tbl_scan_exprs),
1067
+ exact_version_only=exact_version_only,
939
1068
  )
940
1069
  tbl_scan_plans.append(plan)
941
1070
 
@@ -966,7 +1095,8 @@ class Planner:
966
1095
  stratify_exprs=analyzer.stratify_exprs,
967
1096
  )
968
1097
 
969
- plan = cls._insert_prefetch_node(tbl.tbl_version.id, row_builder.unique_exprs, plan)
1098
+ plan = cls._add_prefetch_node(tbl.tbl_version.id, row_builder.unique_exprs, plan)
1099
+ plan = cls._add_cell_reconstruction_node(analyzer.all_exprs, plan)
970
1100
 
971
1101
  if analyzer.group_by_clause is not None:
972
1102
  # we're doing grouping aggregation; the input of the AggregateNode are the grouping exprs plus the
@@ -1010,7 +1140,7 @@ class Planner:
1010
1140
  if not agg_output.issuperset(exprs.ExprSet(eval_ctx.target_exprs)):
1011
1141
  # we need an ExprEvalNode to evaluate the remaining output exprs
1012
1142
  plan = exec.ExprEvalNode(row_builder, eval_ctx.target_exprs, agg_output, input=plan)
1013
- plan = cls._insert_save_node(tbl.tbl_version.id, row_builder.stored_media_cols, input_node=plan)
1143
+ plan = cls._add_save_node(plan)
1014
1144
  else:
1015
1145
  if not exprs.ExprSet(sql_exprs).issuperset(exprs.ExprSet(eval_ctx.target_exprs)):
1016
1146
  # we need an ExprEvalNode to evaluate the remaining output exprs
@@ -1062,7 +1192,6 @@ class Planner:
1062
1192
  plan.ctx.ignore_errors = True
1063
1193
  computed_exprs = row_builder.output_exprs - row_builder.input_exprs
1064
1194
  plan.ctx.num_computed_exprs = len(computed_exprs) # we are adding a computed column, so we need to evaluate it
1065
-
1066
- plan = cls._insert_save_node(tbl.tbl_version.id, row_builder.stored_media_cols, input_node=plan)
1195
+ plan = cls._add_save_node(plan)
1067
1196
 
1068
1197
  return plan