PyPI - pixeltable - Versions diffs - 0.4.0rc3__py3-none-any.whl → 0.4.20__py3-none-any.whl - Mend

pixeltable 0.4.0rc3py3-none-any.whl → 0.4.20py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.

This version of pixeltable might be problematic. Click here for more details.

Files changed (202) hide show

pixeltable/__init__.py +23 -5
pixeltable/_version.py +1 -0
pixeltable/catalog/__init__.py +5 -3
pixeltable/catalog/catalog.py +1318 -404
pixeltable/catalog/column.py +186 -115
pixeltable/catalog/dir.py +1 -2
pixeltable/catalog/globals.py +11 -43
pixeltable/catalog/insertable_table.py +167 -79
pixeltable/catalog/path.py +61 -23
pixeltable/catalog/schema_object.py +9 -10
pixeltable/catalog/table.py +626 -308
pixeltable/catalog/table_metadata.py +101 -0
pixeltable/catalog/table_version.py +713 -569
pixeltable/catalog/table_version_handle.py +37 -6
pixeltable/catalog/table_version_path.py +42 -29
pixeltable/catalog/tbl_ops.py +50 -0
pixeltable/catalog/update_status.py +191 -0
pixeltable/catalog/view.py +108 -94
pixeltable/config.py +128 -22
pixeltable/dataframe.py +188 -100
pixeltable/env.py +407 -136
pixeltable/exceptions.py +6 -0
pixeltable/exec/__init__.py +3 -0
pixeltable/exec/aggregation_node.py +7 -8
pixeltable/exec/cache_prefetch_node.py +83 -110
pixeltable/exec/cell_materialization_node.py +231 -0
pixeltable/exec/cell_reconstruction_node.py +135 -0
pixeltable/exec/component_iteration_node.py +4 -3
pixeltable/exec/data_row_batch.py +8 -65
pixeltable/exec/exec_context.py +16 -4
pixeltable/exec/exec_node.py +13 -36
pixeltable/exec/expr_eval/evaluators.py +7 -6
pixeltable/exec/expr_eval/expr_eval_node.py +27 -12
pixeltable/exec/expr_eval/globals.py +8 -5
pixeltable/exec/expr_eval/row_buffer.py +1 -2
pixeltable/exec/expr_eval/schedulers.py +190 -30
pixeltable/exec/globals.py +32 -0
pixeltable/exec/in_memory_data_node.py +18 -18
pixeltable/exec/object_store_save_node.py +293 -0
pixeltable/exec/row_update_node.py +16 -9
pixeltable/exec/sql_node.py +206 -101
pixeltable/exprs/__init__.py +1 -1
pixeltable/exprs/arithmetic_expr.py +27 -22
pixeltable/exprs/array_slice.py +3 -3
pixeltable/exprs/column_property_ref.py +34 -30
pixeltable/exprs/column_ref.py +92 -96
pixeltable/exprs/comparison.py +5 -5
pixeltable/exprs/compound_predicate.py +5 -4
pixeltable/exprs/data_row.py +152 -55
pixeltable/exprs/expr.py +62 -43
pixeltable/exprs/expr_dict.py +3 -3
pixeltable/exprs/expr_set.py +17 -10
pixeltable/exprs/function_call.py +75 -37
pixeltable/exprs/globals.py +1 -2
pixeltable/exprs/in_predicate.py +4 -4
pixeltable/exprs/inline_expr.py +10 -27
pixeltable/exprs/is_null.py +1 -3
pixeltable/exprs/json_mapper.py +8 -8
pixeltable/exprs/json_path.py +56 -22
pixeltable/exprs/literal.py +5 -5
pixeltable/exprs/method_ref.py +2 -2
pixeltable/exprs/object_ref.py +2 -2
pixeltable/exprs/row_builder.py +127 -53
pixeltable/exprs/rowid_ref.py +8 -12
pixeltable/exprs/similarity_expr.py +50 -25
pixeltable/exprs/sql_element_cache.py +4 -4
pixeltable/exprs/string_op.py +5 -5
pixeltable/exprs/type_cast.py +3 -5
pixeltable/func/__init__.py +1 -0
pixeltable/func/aggregate_function.py +8 -8
pixeltable/func/callable_function.py +9 -9
pixeltable/func/expr_template_function.py +10 -10
pixeltable/func/function.py +18 -20
pixeltable/func/function_registry.py +6 -7
pixeltable/func/globals.py +2 -3
pixeltable/func/mcp.py +74 -0
pixeltable/func/query_template_function.py +20 -18
pixeltable/func/signature.py +43 -16
pixeltable/func/tools.py +23 -13
pixeltable/func/udf.py +18 -20
pixeltable/functions/__init__.py +6 -0
pixeltable/functions/anthropic.py +93 -33
pixeltable/functions/audio.py +114 -10
pixeltable/functions/bedrock.py +13 -6
pixeltable/functions/date.py +1 -1
pixeltable/functions/deepseek.py +20 -9
pixeltable/functions/fireworks.py +2 -2
pixeltable/functions/gemini.py +28 -11
pixeltable/functions/globals.py +13 -13
pixeltable/functions/groq.py +108 -0
pixeltable/functions/huggingface.py +1046 -23
pixeltable/functions/image.py +9 -18
pixeltable/functions/llama_cpp.py +23 -8
pixeltable/functions/math.py +3 -4
pixeltable/functions/mistralai.py +4 -15
pixeltable/functions/ollama.py +16 -9
pixeltable/functions/openai.py +104 -82
pixeltable/functions/openrouter.py +143 -0
pixeltable/functions/replicate.py +2 -2
pixeltable/functions/reve.py +250 -0
pixeltable/functions/string.py +21 -28
pixeltable/functions/timestamp.py +13 -14
pixeltable/functions/together.py +4 -6
pixeltable/functions/twelvelabs.py +92 -0
pixeltable/functions/util.py +6 -1
pixeltable/functions/video.py +1388 -106
pixeltable/functions/vision.py +7 -7
pixeltable/functions/whisper.py +15 -7
pixeltable/functions/whisperx.py +179 -0
pixeltable/{ext/functions → functions}/yolox.py +2 -4
pixeltable/globals.py +332 -105
pixeltable/index/base.py +13 -22
pixeltable/index/btree.py +23 -22
pixeltable/index/embedding_index.py +32 -44
pixeltable/io/__init__.py +4 -2
pixeltable/io/datarows.py +7 -6
pixeltable/io/external_store.py +49 -77
pixeltable/io/fiftyone.py +11 -11
pixeltable/io/globals.py +29 -28
pixeltable/io/hf_datasets.py +17 -9
pixeltable/io/label_studio.py +70 -66
pixeltable/io/lancedb.py +3 -0
pixeltable/io/pandas.py +12 -11
pixeltable/io/parquet.py +13 -93
pixeltable/io/table_data_conduit.py +71 -47
pixeltable/io/utils.py +3 -3
pixeltable/iterators/__init__.py +2 -1
pixeltable/iterators/audio.py +21 -11
pixeltable/iterators/document.py +116 -55
pixeltable/iterators/image.py +5 -2
pixeltable/iterators/video.py +293 -13
pixeltable/metadata/__init__.py +4 -2
pixeltable/metadata/converters/convert_18.py +2 -2
pixeltable/metadata/converters/convert_19.py +2 -2
pixeltable/metadata/converters/convert_20.py +2 -2
pixeltable/metadata/converters/convert_21.py +2 -2
pixeltable/metadata/converters/convert_22.py +2 -2
pixeltable/metadata/converters/convert_24.py +2 -2
pixeltable/metadata/converters/convert_25.py +2 -2
pixeltable/metadata/converters/convert_26.py +2 -2
pixeltable/metadata/converters/convert_29.py +4 -4
pixeltable/metadata/converters/convert_34.py +2 -2
pixeltable/metadata/converters/convert_36.py +2 -2
pixeltable/metadata/converters/convert_37.py +15 -0
pixeltable/metadata/converters/convert_38.py +39 -0
pixeltable/metadata/converters/convert_39.py +124 -0
pixeltable/metadata/converters/convert_40.py +73 -0
pixeltable/metadata/converters/util.py +13 -12
pixeltable/metadata/notes.py +4 -0
pixeltable/metadata/schema.py +79 -42
pixeltable/metadata/utils.py +74 -0
pixeltable/mypy/__init__.py +3 -0
pixeltable/mypy/mypy_plugin.py +123 -0
pixeltable/plan.py +274 -223
pixeltable/share/__init__.py +1 -1
pixeltable/share/packager.py +259 -129
pixeltable/share/protocol/__init__.py +34 -0
pixeltable/share/protocol/common.py +170 -0
pixeltable/share/protocol/operation_types.py +33 -0
pixeltable/share/protocol/replica.py +109 -0
pixeltable/share/publish.py +213 -57
pixeltable/store.py +238 -175
pixeltable/type_system.py +104 -63
pixeltable/utils/__init__.py +2 -3
pixeltable/utils/arrow.py +108 -13
pixeltable/utils/av.py +298 -0
pixeltable/utils/azure_store.py +305 -0
pixeltable/utils/code.py +3 -3
pixeltable/utils/console_output.py +4 -1
pixeltable/utils/coroutine.py +6 -23
pixeltable/utils/dbms.py +31 -5
pixeltable/utils/description_helper.py +4 -5
pixeltable/utils/documents.py +5 -6
pixeltable/utils/exception_handler.py +7 -30
pixeltable/utils/filecache.py +6 -6
pixeltable/utils/formatter.py +4 -6
pixeltable/utils/gcs_store.py +283 -0
pixeltable/utils/http_server.py +2 -3
pixeltable/utils/iceberg.py +1 -2
pixeltable/utils/image.py +17 -0
pixeltable/utils/lancedb.py +88 -0
pixeltable/utils/local_store.py +316 -0
pixeltable/utils/misc.py +5 -0
pixeltable/utils/object_stores.py +528 -0
pixeltable/utils/pydantic.py +60 -0
pixeltable/utils/pytorch.py +5 -6
pixeltable/utils/s3_store.py +392 -0
pixeltable-0.4.20.dist-info/METADATA +587 -0
pixeltable-0.4.20.dist-info/RECORD +218 -0
{pixeltable-0.4.0rc3.dist-info → pixeltable-0.4.20.dist-info}/WHEEL +1 -1
pixeltable-0.4.20.dist-info/entry_points.txt +2 -0
pixeltable/__version__.py +0 -3
pixeltable/ext/__init__.py +0 -17
pixeltable/ext/functions/__init__.py +0 -11
pixeltable/ext/functions/whisperx.py +0 -77
pixeltable/utils/media_store.py +0 -77
pixeltable/utils/s3.py +0 -17
pixeltable/utils/sample.py +0 -25
pixeltable-0.4.0rc3.dist-info/METADATA +0 -435
pixeltable-0.4.0rc3.dist-info/RECORD +0 -189
pixeltable-0.4.0rc3.dist-info/entry_points.txt +0 -3
{pixeltable-0.4.0rc3.dist-info → pixeltable-0.4.20.dist-info/licenses}/LICENSE +0 -0

pixeltable/plan.py CHANGED Viewed

@@ -3,16 +3,16 @@ from __future__ import annotations
 import dataclasses
 import enum
 from textwrap import dedent
-from typing import Any, Iterable, Literal, NamedTuple, Optional, Sequence
+from typing import Any, Iterable, Literal, Sequence, cast
 from uuid import UUID
+import pgvector.sqlalchemy  # type: ignore[import-untyped]
 import sqlalchemy as sql
 import pixeltable as pxt
 from pixeltable import catalog, exceptions as excs, exec, exprs
 from pixeltable.catalog import Column, TableVersionHandle
 from pixeltable.exec.sql_node import OrderByClause, OrderByItem, combine_order_by_clauses, print_order_by_clause
-from pixeltable.utils.sample import sample_key
 def _is_agg_fn_call(e: exprs.Expr) -> bool:
@@ -66,7 +66,7 @@ class JoinClause:
     """Corresponds to a single 'JOIN ... ON (...)' clause in a SELECT statement; excludes the joined table."""
     join_type: JoinType
-    join_predicate: Optional[exprs.Expr]  # None for join_type == CROSS
+    join_predicate: exprs.Expr | None  # None for join_type == CROSS
 @dataclasses.dataclass
@@ -86,25 +86,20 @@ class FromClause:
 class SampleClause:
     """Defines a sampling clause for a table."""
-    version: Optional[int]
-    n: Optional[int]
-    n_per_stratum: Optional[int]
-    fraction: Optional[float]
-    seed: Optional[int]
-    stratify_exprs: Optional[list[exprs.Expr]]
-    # This seed value is used if one is not supplied
-    DEFAULT_SEED = 0
+    version: int | None
+    n: int | None
+    n_per_stratum: int | None
+    fraction: float | None
+    seed: int | None
+    stratify_exprs: list[exprs.Expr] | None
     # The version of the hashing algorithm used for ordering and fractional sampling.
     CURRENT_VERSION = 1
     def __post_init__(self) -> None:
-        """If no version was provided, provide the default version"""
+        # If no version was provided, provide the default version
         if self.version is None:
             self.version = self.CURRENT_VERSION
-        if self.seed is None:
-            self.seed = self.DEFAULT_SEED
     @property
     def is_stratified(self) -> bool:
@@ -159,16 +154,6 @@ class SampleClause:
         return format(threshold_int, '08x') + 'ffffffffffffffffffffffff'
-class SamplingClauses(NamedTuple):
-    """Clauses provided when rewriting a SampleClause"""
-    where: exprs.Expr
-    group_by_clause: Optional[list[exprs.Expr]]
-    order_by_clause: Optional[list[tuple[exprs.Expr, bool]]]
-    limit: Optional[exprs.Expr]
-    sample_clause: Optional[SampleClause]
 class Analyzer:
     """
     Performs semantic analysis of a query and stores the analysis state.
@@ -177,17 +162,19 @@ class Analyzer:
     from_clause: FromClause
     all_exprs: list[exprs.Expr]  # union of all exprs, aside from sql_where_clause
     select_list: list[exprs.Expr]
-    group_by_clause: Optional[list[exprs.Expr]]  # None for non-aggregate queries; [] for agg query w/o grouping
+    group_by_clause: list[exprs.Expr] | None  # None for non-aggregate queries; [] for agg query w/o grouping
     grouping_exprs: list[exprs.Expr]  # [] for non-aggregate queries or agg query w/o grouping
     order_by_clause: OrderByClause
+    stratify_exprs: list[exprs.Expr]  # [] if no stratiifcation is required
+    sample_clause: SampleClause | None  # None if no sampling clause is present
     sql_elements: exprs.SqlElementCache
     # Where clause of the Select stmt of the SQL scan
-    sql_where_clause: Optional[exprs.Expr]
+    sql_where_clause: exprs.Expr | None
     # filter predicate applied to output rows of the SQL scan
-    filter: Optional[exprs.Expr]
+    filter: exprs.Expr | None
     agg_fn_calls: list[exprs.FunctionCall]  # grouping aggregation (ie, not window functions)
     window_fn_calls: list[exprs.FunctionCall]
@@ -197,9 +184,10 @@ class Analyzer:
         self,
         from_clause: FromClause,
         select_list: Sequence[exprs.Expr],
-        where_clause: Optional[exprs.Expr] = None,
-        group_by_clause: Optional[list[exprs.Expr]] = None,
-        order_by_clause: Optional[list[tuple[exprs.Expr, bool]]] = None,
+        where_clause: exprs.Expr | None = None,
+        group_by_clause: list[exprs.Expr] | None = None,
+        order_by_clause: list[tuple[exprs.Expr, bool]] | None = None,
+        sample_clause: SampleClause | None = None,
     ):
         if order_by_clause is None:
             order_by_clause = []
@@ -213,6 +201,11 @@ class Analyzer:
         self.group_by_clause = (
             [e.resolve_computed_cols() for e in group_by_clause] if group_by_clause is not None else None
         )
+        self.sample_clause = sample_clause
+        if self.sample_clause is not None and self.sample_clause.is_stratified:
+            self.stratify_exprs = [e.resolve_computed_cols() for e in sample_clause.stratify_exprs]
+        else:
+            self.stratify_exprs = []
         self.order_by_clause = [OrderByItem(e.resolve_computed_cols(), asc) for e, asc in order_by_clause]
         self.sql_where_clause = None
@@ -228,8 +221,11 @@ class Analyzer:
                 self.all_exprs.append(join_clause.join_predicate)
         if self.group_by_clause is not None:
             self.all_exprs.extend(self.group_by_clause)
+        self.all_exprs.extend(self.stratify_exprs)
         self.all_exprs.extend(e for e, _ in self.order_by_clause)
         if self.filter is not None:
+            if sample_clause is not None:
+                raise excs.Error(f'Filter {self.filter} not expressible in SQL')
             self.all_exprs.append(self.filter)
         self.agg_order_by = []
@@ -334,7 +330,7 @@ class Analyzer:
         row_builder.set_slot_idxs(self.agg_fn_calls)
         row_builder.set_slot_idxs(self.agg_order_by)
-    def get_window_fn_ob_clause(self) -> Optional[OrderByClause]:
+    def get_window_fn_ob_clause(self) -> OrderByClause | None:
         clause: list[OrderByClause] = []
         for fn_call in self.window_fn_calls:
             # window functions require ordering by the group_by/order_by clauses
@@ -352,7 +348,7 @@ class Analyzer:
 class Planner:
     # TODO: create an exec.CountNode and change this to create_count_plan()
     @classmethod
-    def create_count_stmt(cls, tbl: catalog.TableVersionPath, where_clause: Optional[exprs.Expr] = None) -> sql.Select:
+    def create_count_stmt(cls, tbl: catalog.TableVersionPath, where_clause: exprs.Expr | None = None) -> sql.Select:
         stmt = sql.select(sql.func.count().label('all_count'))
         refd_tbl_ids: set[UUID] = set()
         if where_clause is not None:
@@ -378,21 +374,14 @@ class Planner:
         cls.__check_valid_columns(tbl, stored_cols, 'inserted into')
-        row_builder = exprs.RowBuilder([], stored_cols, [])
+        row_builder = exprs.RowBuilder([], stored_cols, [], tbl)
         # create InMemoryDataNode for 'rows'
         plan: exec.ExecNode = exec.InMemoryDataNode(
             TableVersionHandle(tbl.id, tbl.effective_version), rows, row_builder, tbl.next_row_id
         )
-        media_input_col_info = [
-            exprs.ColumnSlotIdx(col_ref.col, col_ref.slot_idx)
-            for col_ref in row_builder.input_exprs
-            if isinstance(col_ref, exprs.ColumnRef) and col_ref.col_type.is_media_type()
-        ]
-        if len(media_input_col_info) > 0:
-            # prefetch external files for all input column refs
-            plan = exec.CachePrefetchNode(tbl.id, media_input_col_info, input=plan)
+        plan = cls._add_prefetch_node(tbl.id, row_builder.input_exprs, input_node=plan)
         computed_exprs = row_builder.output_exprs - row_builder.input_exprs
         if len(computed_exprs) > 0:
@@ -400,10 +389,9 @@ class Planner:
             plan = exec.ExprEvalNode(
                 row_builder, computed_exprs, plan.output_exprs, input=plan, maintain_input_order=False
             )
+        if any(c.col_type.is_json_type() or c.col_type.is_array_type() for c in stored_cols):
+            plan = exec.CellMaterializationNode(plan)
-        stored_col_info = row_builder.output_slot_idxs()
-        stored_img_col_info = [info for info in stored_col_info if info.col.col_type.is_image_type()]
-        plan.set_stored_img_cols(stored_img_col_info)
         plan.set_ctx(
             exec.ExecContext(
                 row_builder,
@@ -413,10 +401,12 @@ class Planner:
                 ignore_errors=ignore_errors,
             )
         )
+        plan = cls._add_save_node(plan)
         return plan
     @classmethod
-    def rowid_columns(cls, target: TableVersionHandle, num_rowid_cols: Optional[int] = None) -> list[exprs.Expr]:
+    def rowid_columns(cls, target: TableVersionHandle, num_rowid_cols: int | None = None) -> list[exprs.Expr]:
         """Return list of RowidRef for the given number of associated rowids"""
         if num_rowid_cols is None:
             num_rowid_cols = target.get().num_rowid_columns()
@@ -430,14 +420,17 @@ class Planner:
         plan = df._create_query_plan()  # ExecNode constructed by the DataFrame
         # Modify the plan RowBuilder to register the output columns
+        needs_cell_materialization = False
         for col_name, expr in zip(df.schema.keys(), df._select_list_exprs):
             assert col_name in tbl.cols_by_name
             col = tbl.cols_by_name[col_name]
             plan.row_builder.add_table_column(col, expr.slot_idx)
+            needs_cell_materialization = (
+                needs_cell_materialization or col.col_type.is_json_type() or col.col_type.is_array_type()
+            )
-        stored_col_info = plan.row_builder.output_slot_idxs()
-        stored_img_col_info = [info for info in stored_col_info if info.col.col_type.is_image_type()]
-        plan.set_stored_img_cols(stored_img_col_info)
+        if needs_cell_materialization:
+            plan = exec.CellMaterializationNode(plan)
         plan.set_ctx(
             exec.ExecContext(
@@ -454,16 +447,18 @@ class Planner:
         tbl: catalog.TableVersionPath,
         update_targets: dict[catalog.Column, exprs.Expr],
         recompute_targets: list[catalog.Column],
-        where_clause: Optional[exprs.Expr],
+        where_clause: exprs.Expr | None,
         cascade: bool,
     ) -> tuple[exec.ExecNode, list[str], list[catalog.Column]]:
         """Creates a plan to materialize updated rows.
         The plan:
         - retrieves rows that are visible at the current version of the table
         - materializes all stored columns and the update targets
         - if cascade is True, recomputes all computed columns that transitively depend on the updated columns
           and copies the values of all other stored columns
         - if cascade is False, copies all columns that aren't update targets from the original rows
         Returns:
             - root node of the plan
             - list of qualified column names that are getting updated
@@ -473,26 +468,33 @@ class Planner:
         assert isinstance(tbl, catalog.TableVersionPath)
         target = tbl.tbl_version.get()  # the one we need to update
         updated_cols = list(update_targets.keys())
+        recomputed_cols: set[Column]
         if len(recompute_targets) > 0:
-            recomputed_cols = set(recompute_targets)
+            assert len(update_targets) == 0
+            recomputed_cols = {*recompute_targets}
+            if cascade:
+                recomputed_cols |= target.get_dependent_columns(recomputed_cols)
         else:
             recomputed_cols = target.get_dependent_columns(updated_cols) if cascade else set()
-            # regardless of cascade, we need to update all indices on any updated column
-            idx_val_cols = target.get_idx_val_columns(updated_cols)
-            recomputed_cols.update(idx_val_cols)
-            # we only need to recompute stored columns (unstored ones are substituted away)
-            recomputed_cols = {c for c in recomputed_cols if c.is_stored}
+        # regardless of cascade, we need to update all indices on any updated/recomputed column
+        modified_base_cols = [c for c in set(updated_cols) | recomputed_cols if c.get_tbl().id == target.id]
+        idx_val_cols = target.get_idx_val_columns(modified_base_cols)
+        recomputed_cols.update(idx_val_cols)
+        # we only need to recompute stored columns (unstored ones are substituted away)
+        recomputed_cols = {c for c in recomputed_cols if c.is_stored}
         cls.__check_valid_columns(tbl.tbl_version.get(), recomputed_cols, 'updated in')
-        recomputed_base_cols = {col for col in recomputed_cols if col.tbl.id == tbl.tbl_version.id}
+        # our query plan
+        # - evaluates the update targets and recomputed columns
+        # - copies all other stored columns
+        recomputed_base_cols = {col for col in recomputed_cols if col.get_tbl().id == tbl.tbl_version.id}
         copied_cols = [
             col
             for col in target.cols_by_id.values()
             if col.is_stored and col not in updated_cols and col not in recomputed_base_cols
         ]
-        select_list: list[exprs.Expr] = [exprs.ColumnRef(col) for col in copied_cols]
-        select_list.extend(update_targets.values())
+        select_list: list[exprs.Expr] = list(update_targets.values())
         recomputed_exprs = [
             c.value_expr.copy().resolve_computed_cols(resolve_cols=recomputed_base_cols) for c in recomputed_base_cols
@@ -503,13 +505,25 @@ class Planner:
         select_list.extend(recomputed_exprs)
         # we need to retrieve the PK columns of the existing rows
-        plan = cls.create_query_plan(FromClause(tbls=[tbl]), select_list, where_clause=where_clause, ignore_errors=True)
-        all_base_cols = copied_cols + updated_cols + list(recomputed_base_cols)  # same order as select_list
+        plan = cls.create_query_plan(
+            FromClause(tbls=[tbl]),
+            select_list=select_list,
+            columns=copied_cols,
+            where_clause=where_clause,
+            ignore_errors=True,
+        )
+        evaluated_cols = updated_cols + list(recomputed_base_cols)  # same order as select_list
         # update row builder with column information
-        for i, col in enumerate(all_base_cols):
+        plan.row_builder.add_table_columns(copied_cols)
+        for i, col in enumerate(evaluated_cols):
             plan.row_builder.add_table_column(col, select_list[i].slot_idx)
+        plan.ctx.num_computed_exprs = len(recomputed_exprs)
+        plan = cls._add_cell_materialization_node(plan)
+        plan = cls._add_save_node(plan)
         recomputed_user_cols = [c for c in recomputed_cols if c.name is not None]
-        return plan, [f'{c.tbl.name}.{c.name}' for c in updated_cols + recomputed_user_cols], recomputed_user_cols
+        return plan, [f'{c.get_tbl().name}.{c.name}' for c in updated_cols + recomputed_user_cols], recomputed_user_cols
     @classmethod
     def __check_valid_columns(
@@ -529,6 +543,79 @@ class Planner:
                     .format(validation_error=col.value_expr.validation_error)
                 )
+    @classmethod
+    def _cell_md_col_refs(cls, expr_list: Iterable[exprs.Expr]) -> list[exprs.ColumnRef]:
+        """Return list of ColumnRefs that need their cellmd values for reconstruction"""
+        json_col_refs = list(
+            exprs.Expr.list_subexprs(
+                expr_list,
+                expr_class=exprs.ColumnRef,
+                filter=lambda e: cast(exprs.ColumnRef, e).col.col_type.is_json_type(),
+                traverse_matches=False,
+            )
+        )
+        def needs_reconstruction(e: exprs.Expr) -> bool:
+            assert isinstance(e, exprs.ColumnRef)
+            # Vector-typed array columns are used for vector indexes, and are stored in the db
+            return e.col.col_type.is_array_type() and not isinstance(e.col.sa_col_type, pgvector.sqlalchemy.Vector)
+        array_col_refs = list(
+            exprs.Expr.list_subexprs(
+                expr_list, expr_class=exprs.ColumnRef, filter=needs_reconstruction, traverse_matches=False
+            )
+        )
+        return json_col_refs + array_col_refs
+    @classmethod
+    def _add_cell_materialization_node(cls, input: exec.ExecNode) -> exec.ExecNode:
+        # we need a CellMaterializationNode if any of the evaluated output columns are json or array-typed
+        has_target_cols = any(
+            col.col_type.is_json_type() or col.col_type.is_array_type()
+            for col, slot_idx in input.row_builder.table_columns.items()
+            if slot_idx is not None
+        )
+        if has_target_cols:
+            return exec.CellMaterializationNode(input)
+        else:
+            return input
+    @classmethod
+    def _add_cell_reconstruction_node(cls, expr_list: list[exprs.Expr], input: exec.ExecNode) -> exec.ExecNode:
+        """
+        Add a CellReconstructionNode, if required by any of the exprs in expr_list.
+        Cell reconstruction is required for
+        1) all json-typed ColumnRefs that are not used as part of a JsonPath (the latter does its own reconstruction)
+           or as part of a ColumnPropertyRef
+        2) all array-typed ColumnRefs that are not used as part of a ColumnPropertyRef
+        """
+        def json_filter(e: exprs.Expr) -> bool:
+            if isinstance(e, exprs.JsonPath):
+                return not e.is_relative_path() and isinstance(e.anchor, exprs.ColumnRef)
+            if isinstance(e, exprs.ColumnPropertyRef):
+                return e.col_ref.col.col_type.is_json_type()
+            return isinstance(e, exprs.ColumnRef) and e.col.col_type.is_json_type()
+        def array_filter(e: exprs.Expr) -> bool:
+            if isinstance(e, exprs.ColumnPropertyRef):
+                return e.col_ref.col.col_type.is_array_type()
+            if not isinstance(e, exprs.ColumnRef):
+                return False
+            # Vector-typed array columns are used for vector indexes, and are stored in the db
+            return e.col.col_type.is_array_type() and not isinstance(e.col.sa_col_type, pgvector.sqlalchemy.Vector)
+        json_candidates = list(exprs.Expr.list_subexprs(expr_list, filter=json_filter, traverse_matches=False))
+        json_refs = [e for e in json_candidates if isinstance(e, exprs.ColumnRef)]
+        array_candidates = list(exprs.Expr.list_subexprs(expr_list, filter=array_filter, traverse_matches=False))
+        array_refs = [e for e in array_candidates if isinstance(e, exprs.ColumnRef)]
+        if len(json_refs) > 0 or len(array_refs) > 0:
+            return exec.CellReconstructionNode(json_refs, array_refs, input.row_builder, input=input)
+        else:
+            return input
     @classmethod
     def create_batch_update_plan(
         cls,
@@ -547,8 +634,8 @@ class Planner:
         """
         assert isinstance(tbl, catalog.TableVersionPath)
         target = tbl.tbl_version.get()  # the one we need to update
-        sa_key_cols: list[sql.Column] = []
-        key_vals: list[tuple] = []
+        sa_key_cols: list[sql.Column]
+        key_vals: list[tuple]
         if len(rowids) > 0:
             sa_key_cols = target.store_tbl.rowid_columns()
             key_vals = rowids
@@ -561,18 +648,18 @@ class Planner:
         updated_cols = batch[0].keys() - target.primary_key_columns()
         recomputed_cols = target.get_dependent_columns(updated_cols) if cascade else set()
         # regardless of cascade, we need to update all indices on any updated column
-        idx_val_cols = target.get_idx_val_columns(updated_cols)
+        modified_base_cols = [c for c in set(updated_cols) | recomputed_cols if c.get_tbl().id == target.id]
+        idx_val_cols = target.get_idx_val_columns(modified_base_cols)
         recomputed_cols.update(idx_val_cols)
         # we only need to recompute stored columns (unstored ones are substituted away)
         recomputed_cols = {c for c in recomputed_cols if c.is_stored}
-        recomputed_base_cols = {col for col in recomputed_cols if col.tbl.id == target.id}
+        recomputed_base_cols = {col for col in recomputed_cols if col.get_tbl().id == target.id}
         copied_cols = [
             col
             for col in target.cols_by_id.values()
             if col.is_stored and col not in updated_cols and col not in recomputed_base_cols
         ]
-        select_list: list[exprs.Expr] = [exprs.ColumnRef(col) for col in copied_cols]
-        select_list.extend(exprs.ColumnRef(col) for col in updated_cols)
+        select_list: list[exprs.Expr] = [exprs.ColumnRef(col) for col in updated_cols]
         recomputed_exprs = [
             c.value_expr.copy().resolve_computed_cols(resolve_cols=recomputed_base_cols) for c in recomputed_base_cols
@@ -588,25 +675,39 @@ class Planner:
         sql_exprs = list(
             exprs.Expr.list_subexprs(analyzer.all_exprs, filter=analyzer.sql_elements.contains, traverse_matches=False)
         )
-        row_builder = exprs.RowBuilder(analyzer.all_exprs, [], sql_exprs)
+        row_builder = exprs.RowBuilder(analyzer.all_exprs, [], sql_exprs, target)
         analyzer.finalize(row_builder)
-        sql_lookup_node = exec.SqlLookupNode(tbl, row_builder, sql_exprs, sa_key_cols, key_vals)
+        cell_md_col_refs = cls._cell_md_col_refs(sql_exprs)
+        sql_lookup_node = exec.SqlLookupNode(
+            tbl,
+            row_builder,
+            sql_exprs,
+            columns=copied_cols,
+            sa_key_cols=sa_key_cols,
+            key_vals=key_vals,
+            cell_md_col_refs=cell_md_col_refs,
+        )
         col_vals = [{col: row[col].val for col in updated_cols} for row in batch]
         row_update_node = exec.RowUpdateNode(tbl, key_vals, len(rowids) > 0, col_vals, row_builder, sql_lookup_node)
         plan: exec.ExecNode = row_update_node
         if not cls._is_contained_in(analyzer.select_list, sql_exprs):
             # we need an ExprEvalNode to evaluate the remaining output exprs
             plan = exec.ExprEvalNode(row_builder, analyzer.select_list, sql_exprs, input=plan)
         # update row builder with column information
-        all_base_cols = copied_cols + list(updated_cols) + list(recomputed_base_cols)  # same order as select_list
+        evaluated_cols = list(updated_cols) + list(recomputed_base_cols)  # same order as select_list
         row_builder.set_slot_idxs(select_list, remove_duplicates=False)
-        for i, col in enumerate(all_base_cols):
+        plan.row_builder.add_table_columns(copied_cols)
+        for i, col in enumerate(evaluated_cols):
             plan.row_builder.add_table_column(col, select_list[i].slot_idx)
-        ctx = exec.ExecContext(row_builder)
-        # we're returning everything to the user, so we might as well do it in a single batch
+        ctx = exec.ExecContext(row_builder, num_computed_exprs=len(recomputed_exprs))
+        # TODO: correct batch size?
         ctx.batch_size = 0
         plan.set_ctx(ctx)
+        plan = cls._add_cell_materialization_node(plan)
+        plan = cls._add_save_node(plan)
         recomputed_user_cols = [c for c in recomputed_cols if c.name is not None]
         return (
             plan,
@@ -656,13 +757,13 @@ class Planner:
             ignore_errors=True,
             exact_version_only=view.get_bases(),
         )
-        for i, col in enumerate(copied_cols + list(recomputed_cols)):  # same order as select_list
+        plan.ctx.num_computed_exprs = len(recomputed_exprs)
+        materialized_cols = copied_cols + list(recomputed_cols)  # same order as select_list
+        for i, col in enumerate(materialized_cols):
             plan.row_builder.add_table_column(col, select_list[i].slot_idx)
-        # TODO: avoid duplication with view_load_plan() logic (where does this belong?)
-        stored_img_col_info = [
-            info for info in plan.row_builder.output_slot_idxs() if info.col.col_type.is_image_type()
-        ]
-        plan.set_stored_img_cols(stored_img_col_info)
+        plan = cls._add_cell_materialization_node(plan)
+        plan = cls._add_save_node(plan)
         return plan
     @classmethod
@@ -691,25 +792,13 @@ class Planner:
         # 2. for component views: iterator args
         iterator_args = [target.iterator_args] if target.iterator_args is not None else []
-        # If this contains a sample specification, modify / create where, group_by, order_by, and limit clauses
         from_clause = FromClause(tbls=[view.base])
-        where, group_by_clause, order_by_clause, limit, sample_clause = cls.create_sample_clauses(
-            from_clause, target.sample_clause, target.predicate, None, [], None
-        )
-        # if we're propagating an insert, we only want to see those base rows that were created for the current version
         base_analyzer = Analyzer(
-            from_clause,
-            iterator_args,
-            where_clause=where,
-            group_by_clause=group_by_clause,
-            order_by_clause=order_by_clause,
+            from_clause, iterator_args, where_clause=target.predicate, sample_clause=target.sample_clause
         )
-        row_builder = exprs.RowBuilder(base_analyzer.all_exprs, stored_cols, [])
-        if target.sample_clause is not None and base_analyzer.filter is not None:
-            raise excs.Error(f'Filter {base_analyzer.filter} not expressible in SQL')
+        row_builder = exprs.RowBuilder(base_analyzer.all_exprs, stored_cols, [], target)
+        # if we're propagating an insert, we only want to see those base rows that were created for the current version
         # execution plan:
         # 1. materialize exprs computed from the base that are needed for stored view columns
         # 2. if it's an iterator view, expand the base rows into component rows
@@ -723,19 +812,13 @@ class Planner:
         # Create a new analyzer reflecting exactly what is required from the base table
         base_analyzer = Analyzer(
-            from_clause,
-            base_output_exprs,
-            where_clause=where,
-            group_by_clause=group_by_clause,
-            order_by_clause=order_by_clause,
+            from_clause, base_output_exprs, where_clause=target.predicate, sample_clause=target.sample_clause
         )
         base_eval_ctx = row_builder.create_eval_ctx(base_analyzer.all_exprs)
         plan = cls._create_query_plan(
             row_builder=row_builder,
             analyzer=base_analyzer,
             eval_ctx=base_eval_ctx,
-            limit=limit,
-            sample_clause=sample_clause,
             with_pk=True,
             exact_version_only=view.get_bases() if propagates_insert else [],
         )
@@ -747,10 +830,12 @@ class Planner:
                 row_builder, output_exprs=view_output_exprs, input_exprs=base_output_exprs, input=plan
             )
-        stored_img_col_info = [info for info in row_builder.output_slot_idxs() if info.col.col_type.is_image_type()]
-        plan.set_stored_img_cols(stored_img_col_info)
         exec_ctx.ignore_errors = True
         plan.set_ctx(exec_ctx)
+        if any(c.col_type.is_json_type() or c.col_type.is_array_type() for c in stored_cols):
+            plan = exec.CellMaterializationNode(plan)
+        plan = cls._add_save_node(plan)
         return plan, len(row_builder.default_eval_ctx.target_exprs)
     @classmethod
@@ -761,7 +846,7 @@ class Planner:
                 raise excs.Error(f'Join predicate {join_clause.join_predicate} not expressible in SQL')
     @classmethod
-    def _create_combined_ordering(cls, analyzer: Analyzer, verify_agg: bool) -> Optional[OrderByClause]:
+    def _create_combined_ordering(cls, analyzer: Analyzer, verify_agg: bool) -> OrderByClause | None:
         """Verify that the various ordering requirements don't conflict and return a combined ordering"""
         ob_clauses: list[OrderByClause] = [analyzer.order_by_clause.copy()]
@@ -795,22 +880,29 @@ class Planner:
             combined_ordering = combined
         return combined_ordering
+    @classmethod
+    def _add_save_node(cls, input_node: exec.ExecNode) -> exec.ExecNode:
+        """Add an ObjectStoreSaveNode, if needed."""
+        media_col_info = input_node.row_builder.media_output_col_info
+        if len(media_col_info) == 0:
+            return input_node
+        else:
+            return exec.ObjectStoreSaveNode(media_col_info, input_node)
     @classmethod
     def _is_contained_in(cls, l1: Iterable[exprs.Expr], l2: Iterable[exprs.Expr]) -> bool:
         """Returns True if l1 is contained in l2"""
         return {e.id for e in l1} <= {e.id for e in l2}
     @classmethod
-    def _insert_prefetch_node(
-        cls, tbl_id: UUID, row_builder: exprs.RowBuilder, input_node: exec.ExecNode
+    def _add_prefetch_node(
+        cls, tbl_id: UUID, expressions: Iterable[exprs.Expr], input_node: exec.ExecNode
     ) -> exec.ExecNode:
-        """Returns a CachePrefetchNode into the plan if needed, otherwise returns input"""
+        """Add a CachePrefetch node, if needed."""
         # we prefetch external files for all media ColumnRefs, even those that aren't part of the dependencies
         # of output_exprs: if unstored iterator columns are present, we might need to materialize ColumnRefs that
         # aren't explicitly captured as dependencies
-        media_col_refs = [
-            e for e in list(row_builder.unique_exprs) if isinstance(e, exprs.ColumnRef) and e.col_type.is_media_type()
-        ]
+        media_col_refs = [e for e in expressions if isinstance(e, exprs.ColumnRef) and e.col_type.is_media_type()]
         if len(media_col_refs) == 0:
             return input_node
         # we need to prefetch external files for media column types
@@ -818,101 +910,52 @@ class Planner:
         prefetch_node = exec.CachePrefetchNode(tbl_id, file_col_info, input_node)
         return prefetch_node
-    @classmethod
-    def create_sample_clauses(
-        cls,
-        from_clause: FromClause,
-        sample_clause: SampleClause,
-        where_clause: Optional[exprs.Expr],
-        group_by_clause: Optional[list[exprs.Expr]],
-        order_by_clause: Optional[list[tuple[exprs.Expr, bool]]],
-        limit: Optional[exprs.Expr],
-    ) -> SamplingClauses:
-        """tuple[
-            exprs.Expr,
-            Optional[list[exprs.Expr]],
-            Optional[list[tuple[exprs.Expr, bool]]],
-            Optional[exprs.Expr],
-            Optional[SampleClause],
-        ]:"""
-        """Construct clauses required for sampling under various conditions.
-        If there is no sampling, then return the original clauses.
-        If the sample is stratified, then return only the group by clause. The rest of the
-        mechanism for stratified sampling is provided by the SampleSqlNode.
-        If the sample is non-stratified, then rewrite the query to accommodate the supplied where clause,
-        and provide the other clauses required for sampling
-        """
-        # If no sample clause, return the original clauses
-        if sample_clause is None:
-            return SamplingClauses(where_clause, group_by_clause, order_by_clause, limit, None)
-        # If the sample clause is stratified, create a group by clause
-        if sample_clause.is_stratified:
-            group_by = sample_clause.stratify_exprs
-            # Note that limit is not possible here
-            return SamplingClauses(where_clause, group_by, order_by_clause, None, sample_clause)
-        else:
-            # If non-stratified sampling, construct a where clause, order_by, and limit clauses
-            # Construct an expression for sorting rows and limiting row counts
-            s_key = sample_key(
-                exprs.Literal(sample_clause.seed), *cls.rowid_columns(from_clause._first_tbl.tbl_version)
-            )
-            # Construct a suitable where clause
-            where = where_clause
-            if sample_clause.fraction is not None:
-                fraction_md5_hex = exprs.Expr.from_object(
-                    sample_clause.fraction_to_md5_hex(float(sample_clause.fraction))
-                )
-                f_where = s_key < fraction_md5_hex
-                where = where & f_where if where is not None else f_where
-            order_by: list[tuple[exprs.Expr, bool]] = [(s_key, True)]
-            limit = exprs.Literal(sample_clause.n)
-            # Note that group_by is not possible here
-            return SamplingClauses(where, None, order_by, limit, None)
     @classmethod
     def create_query_plan(
         cls,
         from_clause: FromClause,
-        select_list: Optional[list[exprs.Expr]] = None,
-        where_clause: Optional[exprs.Expr] = None,
-        group_by_clause: Optional[list[exprs.Expr]] = None,
-        order_by_clause: Optional[list[tuple[exprs.Expr, bool]]] = None,
-        limit: Optional[exprs.Expr] = None,
-        sample_clause: Optional[SampleClause] = None,
+        select_list: list[exprs.Expr] | None = None,
+        columns: list[catalog.Column] | None = None,
+        where_clause: exprs.Expr | None = None,
+        group_by_clause: list[exprs.Expr] | None = None,
+        order_by_clause: list[tuple[exprs.Expr, bool]] | None = None,
+        limit: exprs.Expr | None = None,
+        sample_clause: SampleClause | None = None,
         ignore_errors: bool = False,
-        exact_version_only: Optional[list[catalog.TableVersionHandle]] = None,
+        exact_version_only: list[catalog.TableVersionHandle] | None = None,
     ) -> exec.ExecNode:
-        """Return plan for executing a query.
+        """
+        Return plan for executing a query.
+        The plan:
+        - materializes the values of select_list exprs into their respective slots
+        - materializes cell values of 'columns' (and their cellmd, if applicable) into DataRow.cell_vals/cell_md
         Updates 'select_list' in place to make it executable.
         TODO: make exact_version_only a flag and use the versions from tbl
         """
         if select_list is None:
             select_list = []
+        if columns is None:
+            columns = []
         if order_by_clause is None:
             order_by_clause = []
         if exact_version_only is None:
             exact_version_only = []
-        # Modify clauses to include sample clause
-        where, group_by_clause, order_by_clause, limit, sample = cls.create_sample_clauses(
-            from_clause, sample_clause, where_clause, group_by_clause, order_by_clause, limit
-        )
         analyzer = Analyzer(
             from_clause,
             select_list,
-            where_clause=where,
+            where_clause=where_clause,
             group_by_clause=group_by_clause,
             order_by_clause=order_by_clause,
+            sample_clause=sample_clause,
         )
-        row_builder = exprs.RowBuilder(analyzer.all_exprs, [], [])
-        if sample_clause is not None and analyzer.filter is not None:
-            raise excs.Error(f'Filter {analyzer.filter} not expressible in SQL')
+        # If the from_clause has a single table, we can use it as the context table for the RowBuilder.
+        # Otherwise there is no context table, but that's ok, because the context table is only needed for
+        # table mutations, which can't happen during a join.
+        context_tbl = from_clause.tbls[0].tbl_version.get() if len(from_clause.tbls) == 1 else None
+        row_builder = exprs.RowBuilder(analyzer.all_exprs, [], [], context_tbl)
         analyzer.finalize(row_builder)
         # select_list: we need to materialize everything that's been collected
@@ -922,8 +965,8 @@ class Planner:
             row_builder=row_builder,
             analyzer=analyzer,
             eval_ctx=eval_ctx,
+            columns=columns,
             limit=limit,
-            sample_clause=sample,
             with_pk=True,
             exact_version_only=exact_version_only,
         )
@@ -938,10 +981,10 @@ class Planner:
         row_builder: exprs.RowBuilder,
         analyzer: Analyzer,
         eval_ctx: exprs.RowBuilder.EvalCtx,
-        limit: Optional[exprs.Expr] = None,
-        sample_clause: Optional[SampleClause] = None,
+        columns: list[catalog.Column] | None = None,
+        limit: exprs.Expr | None = None,
         with_pk: bool = False,
-        exact_version_only: Optional[list[catalog.TableVersionHandle]] = None,
+        exact_version_only: list[catalog.TableVersionHandle] | None = None,
     ) -> exec.ExecNode:
         """
         Create plan to materialize eval_ctx.
@@ -951,6 +994,8 @@ class Planner:
                 in the context of that table version (eg, if 'tbl' is a view, 'plan_target' might be the base)
         TODO: make exact_version_only a flag and use the versions from tbl
         """
+        if columns is None:
+            columns = []
         if exact_version_only is None:
             exact_version_only = []
         sql_elements = analyzer.sql_elements
@@ -958,6 +1003,7 @@ class Planner:
             analyzer.window_fn_calls
         )
         ctx = exec.ExecContext(row_builder)
         combined_ordering = cls._create_combined_ordering(analyzer, verify_agg=is_python_agg)
         cls._verify_join_clauses(analyzer)
@@ -966,6 +1012,7 @@ class Planner:
         # - join clause subexprs
         # - subexprs of Where clause conjuncts that can't be run in SQL
         # - all grouping exprs
+        # - all stratify exprs
         candidates = list(
             exprs.Expr.list_subexprs(
                 analyzer.select_list,
@@ -980,10 +1027,12 @@ class Planner:
             candidates.extend(
                 exprs.Expr.subexprs(analyzer.filter, filter=sql_elements.contains, traverse_matches=False)
             )
-        if analyzer.group_by_clause is not None:
-            candidates.extend(
-                exprs.Expr.list_subexprs(analyzer.group_by_clause, filter=sql_elements.contains, traverse_matches=False)
-            )
+        candidates.extend(
+            exprs.Expr.list_subexprs(analyzer.grouping_exprs, filter=sql_elements.contains, traverse_matches=False)
+        )
+        candidates.extend(
+            exprs.Expr.list_subexprs(analyzer.stratify_exprs, filter=sql_elements.contains, traverse_matches=False)
+        )
         # not isinstance(...): we don't want to materialize Literals via a Select
         sql_exprs = exprs.ExprSet(e for e in candidates if not isinstance(e, exprs.Literal))
@@ -1005,8 +1054,15 @@ class Planner:
                     traverse_matches=False,
                 )
             )
             plan = exec.SqlScanNode(
-                tbl, row_builder, select_list=tbl_scan_exprs, set_pk=with_pk, exact_version_only=exact_version_only
+                tbl,
+                row_builder,
+                select_list=tbl_scan_exprs,
+                columns=[c for c in columns if c.get_tbl().id == tbl.tbl_id],
+                set_pk=with_pk,
+                cell_md_col_refs=cls._cell_md_col_refs(tbl_scan_exprs),
+                exact_version_only=exact_version_only,
             )
             tbl_scan_plans.append(plan)
@@ -1028,7 +1084,17 @@ class Planner:
             # we need to order the input for window functions
             plan.set_order_by(analyzer.get_window_fn_ob_clause())
-        plan = cls._insert_prefetch_node(tbl.tbl_version.id, row_builder, plan)
+        if analyzer.sample_clause is not None:
+            plan = exec.SqlSampleNode(
+                row_builder,
+                input=plan,
+                select_list=tbl_scan_exprs,
+                sample_clause=analyzer.sample_clause,
+                stratify_exprs=analyzer.stratify_exprs,
+            )
+        plan = cls._add_prefetch_node(tbl.tbl_version.id, row_builder.unique_exprs, plan)
+        plan = cls._add_cell_reconstruction_node(analyzer.all_exprs, plan)
         if analyzer.group_by_clause is not None:
             # we're doing grouping aggregation; the input of the AggregateNode are the grouping exprs plus the
@@ -1050,26 +1116,12 @@ class Planner:
                 sql_elements.contains_all(analyzer.select_list)
                 and sql_elements.contains_all(analyzer.grouping_exprs)
                 and isinstance(plan, exec.SqlNode)
-                and plan.to_cte(keep_pk=(sample_clause is not None)) is not None
+                and plan.to_cte() is not None
             ):
-                if sample_clause is not None:
-                    plan = exec.SqlSampleNode(
-                        row_builder,
-                        input=plan,
-                        select_list=analyzer.select_list,
-                        stratify_exprs=analyzer.group_by_clause,
-                        sample_clause=sample_clause,
-                    )
-                else:
-                    plan = exec.SqlAggregationNode(
-                        row_builder,
-                        input=plan,
-                        select_list=analyzer.select_list,
-                        group_by_items=analyzer.group_by_clause,
-                    )
+                plan = exec.SqlAggregationNode(
+                    row_builder, input=plan, select_list=analyzer.select_list, group_by_items=analyzer.group_by_clause
+                )
             else:
-                if sample_clause is not None:
-                    raise excs.Error('Sample clause not supported with Python aggregation')
                 input_sql_node = plan.get_node(exec.SqlNode)
                 assert combined_ordering is not None
                 input_sql_node.set_order_by(combined_ordering)
@@ -1086,6 +1138,7 @@ class Planner:
                 if not agg_output.issuperset(exprs.ExprSet(eval_ctx.target_exprs)):
                     # we need an ExprEvalNode to evaluate the remaining output exprs
                     plan = exec.ExprEvalNode(row_builder, eval_ctx.target_exprs, agg_output, input=plan)
+                plan = cls._add_save_node(plan)
         else:
             if not exprs.ExprSet(sql_exprs).issuperset(exprs.ExprSet(eval_ctx.target_exprs)):
                 # we need an ExprEvalNode to evaluate the remaining output exprs
@@ -1119,26 +1172,24 @@ class Planner:
         return Analyzer(FromClause(tbls=[tbl]), [], where_clause=where_clause)
     @classmethod
-    def create_add_column_plan(
-        cls, tbl: catalog.TableVersionPath, col: catalog.Column
-    ) -> tuple[exec.ExecNode, Optional[int]]:
+    def create_add_column_plan(cls, tbl: catalog.TableVersionPath, col: catalog.Column) -> exec.ExecNode:
         """Creates a plan for InsertableTable.add_column()
         Returns:
             plan: the plan to execute
             value_expr slot idx for the plan output (for computed cols)
         """
         assert isinstance(tbl, catalog.TableVersionPath)
-        row_builder = exprs.RowBuilder(output_exprs=[], columns=[col], input_exprs=[])
+        row_builder = exprs.RowBuilder(output_exprs=[], columns=[col], input_exprs=[], tbl=tbl.tbl_version.get())
         analyzer = Analyzer(FromClause(tbls=[tbl]), row_builder.default_eval_ctx.target_exprs)
         plan = cls._create_query_plan(
             row_builder=row_builder, analyzer=analyzer, eval_ctx=row_builder.default_eval_ctx, with_pk=True
         )
         plan.ctx.batch_size = 16
         plan.ctx.show_pbar = True
         plan.ctx.ignore_errors = True
+        computed_exprs = row_builder.output_exprs - row_builder.input_exprs
+        plan.ctx.num_computed_exprs = len(computed_exprs)  # we are adding a computed column, so we need to evaluate it
+        plan = cls._add_save_node(plan)
-        # we want to flush images
-        if col.is_computed and col.is_stored and col.col_type.is_image_type():
-            plan.set_stored_img_cols(row_builder.output_slot_idxs())
-        value_expr_slot_idx = row_builder.output_slot_idxs()[0].slot_idx if col.is_computed else None
-        return plan, value_expr_slot_idx
+        return plan

pixeltable 0.4.0rc3__py3-none-any.whl → 0.4.20__py3-none-any.whl

Potentially problematic release.

pixeltable 0.4.0rc3py3-none-any.whl → 0.4.20py3-none-any.whl