PyPI - pixeltable - Versions diffs - 0.3.14__py3-none-any.whl → 0.5.7__py3-none-any.whl - Mend

pixeltable 0.3.14py3-none-any.whl → 0.5.7py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (220) hide show

pixeltable/__init__.py +42 -8
pixeltable/{dataframe.py → _query.py} +470 -206
pixeltable/_version.py +1 -0
pixeltable/catalog/__init__.py +5 -4
pixeltable/catalog/catalog.py +1785 -432
pixeltable/catalog/column.py +190 -113
pixeltable/catalog/dir.py +2 -4
pixeltable/catalog/globals.py +19 -46
pixeltable/catalog/insertable_table.py +191 -98
pixeltable/catalog/path.py +63 -23
pixeltable/catalog/schema_object.py +11 -15
pixeltable/catalog/table.py +843 -436
pixeltable/catalog/table_metadata.py +103 -0
pixeltable/catalog/table_version.py +978 -657
pixeltable/catalog/table_version_handle.py +72 -16
pixeltable/catalog/table_version_path.py +112 -43
pixeltable/catalog/tbl_ops.py +53 -0
pixeltable/catalog/update_status.py +191 -0
pixeltable/catalog/view.py +134 -90
pixeltable/config.py +134 -22
pixeltable/env.py +471 -157
pixeltable/exceptions.py +6 -0
pixeltable/exec/__init__.py +4 -1
pixeltable/exec/aggregation_node.py +7 -8
pixeltable/exec/cache_prefetch_node.py +83 -110
pixeltable/exec/cell_materialization_node.py +268 -0
pixeltable/exec/cell_reconstruction_node.py +168 -0
pixeltable/exec/component_iteration_node.py +4 -3
pixeltable/exec/data_row_batch.py +8 -65
pixeltable/exec/exec_context.py +16 -4
pixeltable/exec/exec_node.py +13 -36
pixeltable/exec/expr_eval/evaluators.py +11 -7
pixeltable/exec/expr_eval/expr_eval_node.py +27 -12
pixeltable/exec/expr_eval/globals.py +8 -5
pixeltable/exec/expr_eval/row_buffer.py +1 -2
pixeltable/exec/expr_eval/schedulers.py +106 -56
pixeltable/exec/globals.py +35 -0
pixeltable/exec/in_memory_data_node.py +19 -19
pixeltable/exec/object_store_save_node.py +293 -0
pixeltable/exec/row_update_node.py +16 -9
pixeltable/exec/sql_node.py +351 -84
pixeltable/exprs/__init__.py +1 -1
pixeltable/exprs/arithmetic_expr.py +27 -22
pixeltable/exprs/array_slice.py +3 -3
pixeltable/exprs/column_property_ref.py +36 -23
pixeltable/exprs/column_ref.py +213 -89
pixeltable/exprs/comparison.py +5 -5
pixeltable/exprs/compound_predicate.py +5 -4
pixeltable/exprs/data_row.py +164 -54
pixeltable/exprs/expr.py +70 -44
pixeltable/exprs/expr_dict.py +3 -3
pixeltable/exprs/expr_set.py +17 -10
pixeltable/exprs/function_call.py +100 -40
pixeltable/exprs/globals.py +2 -2
pixeltable/exprs/in_predicate.py +4 -4
pixeltable/exprs/inline_expr.py +18 -32
pixeltable/exprs/is_null.py +7 -3
pixeltable/exprs/json_mapper.py +8 -8
pixeltable/exprs/json_path.py +56 -22
pixeltable/exprs/literal.py +27 -5
pixeltable/exprs/method_ref.py +2 -2
pixeltable/exprs/object_ref.py +2 -2
pixeltable/exprs/row_builder.py +167 -67
pixeltable/exprs/rowid_ref.py +25 -10
pixeltable/exprs/similarity_expr.py +58 -40
pixeltable/exprs/sql_element_cache.py +4 -4
pixeltable/exprs/string_op.py +5 -5
pixeltable/exprs/type_cast.py +3 -5
pixeltable/func/__init__.py +1 -0
pixeltable/func/aggregate_function.py +8 -8
pixeltable/func/callable_function.py +9 -9
pixeltable/func/expr_template_function.py +17 -11
pixeltable/func/function.py +18 -20
pixeltable/func/function_registry.py +6 -7
pixeltable/func/globals.py +2 -3
pixeltable/func/mcp.py +74 -0
pixeltable/func/query_template_function.py +29 -27
pixeltable/func/signature.py +46 -19
pixeltable/func/tools.py +31 -13
pixeltable/func/udf.py +18 -20
pixeltable/functions/__init__.py +16 -0
pixeltable/functions/anthropic.py +123 -77
pixeltable/functions/audio.py +147 -10
pixeltable/functions/bedrock.py +13 -6
pixeltable/functions/date.py +7 -4
pixeltable/functions/deepseek.py +35 -43
pixeltable/functions/document.py +81 -0
pixeltable/functions/fal.py +76 -0
pixeltable/functions/fireworks.py +11 -20
pixeltable/functions/gemini.py +195 -39
pixeltable/functions/globals.py +142 -14
pixeltable/functions/groq.py +108 -0
pixeltable/functions/huggingface.py +1056 -24
pixeltable/functions/image.py +115 -57
pixeltable/functions/json.py +1 -1
pixeltable/functions/llama_cpp.py +28 -13
pixeltable/functions/math.py +67 -5
pixeltable/functions/mistralai.py +18 -55
pixeltable/functions/net.py +70 -0
pixeltable/functions/ollama.py +20 -13
pixeltable/functions/openai.py +240 -226
pixeltable/functions/openrouter.py +143 -0
pixeltable/functions/replicate.py +4 -4
pixeltable/functions/reve.py +250 -0
pixeltable/functions/string.py +239 -69
pixeltable/functions/timestamp.py +16 -16
pixeltable/functions/together.py +24 -84
pixeltable/functions/twelvelabs.py +188 -0
pixeltable/functions/util.py +6 -1
pixeltable/functions/uuid.py +30 -0
pixeltable/functions/video.py +1515 -107
pixeltable/functions/vision.py +8 -8
pixeltable/functions/voyageai.py +289 -0
pixeltable/functions/whisper.py +16 -8
pixeltable/functions/whisperx.py +179 -0
pixeltable/{ext/functions → functions}/yolox.py +2 -4
pixeltable/globals.py +362 -115
pixeltable/index/base.py +17 -21
pixeltable/index/btree.py +28 -22
pixeltable/index/embedding_index.py +100 -118
pixeltable/io/__init__.py +4 -2
pixeltable/io/datarows.py +8 -7
pixeltable/io/external_store.py +56 -105
pixeltable/io/fiftyone.py +13 -13
pixeltable/io/globals.py +31 -30
pixeltable/io/hf_datasets.py +61 -16
pixeltable/io/label_studio.py +74 -70
pixeltable/io/lancedb.py +3 -0
pixeltable/io/pandas.py +21 -12
pixeltable/io/parquet.py +25 -105
pixeltable/io/table_data_conduit.py +250 -123
pixeltable/io/utils.py +4 -4
pixeltable/iterators/__init__.py +2 -1
pixeltable/iterators/audio.py +26 -25
pixeltable/iterators/base.py +9 -3
pixeltable/iterators/document.py +112 -78
pixeltable/iterators/image.py +12 -15
pixeltable/iterators/string.py +11 -4
pixeltable/iterators/video.py +523 -120
pixeltable/metadata/__init__.py +14 -3
pixeltable/metadata/converters/convert_13.py +2 -2
pixeltable/metadata/converters/convert_18.py +2 -2
pixeltable/metadata/converters/convert_19.py +2 -2
pixeltable/metadata/converters/convert_20.py +2 -2
pixeltable/metadata/converters/convert_21.py +2 -2
pixeltable/metadata/converters/convert_22.py +2 -2
pixeltable/metadata/converters/convert_24.py +2 -2
pixeltable/metadata/converters/convert_25.py +2 -2
pixeltable/metadata/converters/convert_26.py +2 -2
pixeltable/metadata/converters/convert_29.py +4 -4
pixeltable/metadata/converters/convert_30.py +34 -21
pixeltable/metadata/converters/convert_34.py +2 -2
pixeltable/metadata/converters/convert_35.py +9 -0
pixeltable/metadata/converters/convert_36.py +38 -0
pixeltable/metadata/converters/convert_37.py +15 -0
pixeltable/metadata/converters/convert_38.py +39 -0
pixeltable/metadata/converters/convert_39.py +124 -0
pixeltable/metadata/converters/convert_40.py +73 -0
pixeltable/metadata/converters/convert_41.py +12 -0
pixeltable/metadata/converters/convert_42.py +9 -0
pixeltable/metadata/converters/convert_43.py +44 -0
pixeltable/metadata/converters/util.py +20 -31
pixeltable/metadata/notes.py +9 -0
pixeltable/metadata/schema.py +140 -53
pixeltable/metadata/utils.py +74 -0
pixeltable/mypy/__init__.py +3 -0
pixeltable/mypy/mypy_plugin.py +123 -0
pixeltable/plan.py +382 -115
pixeltable/share/__init__.py +1 -1
pixeltable/share/packager.py +547 -83
pixeltable/share/protocol/__init__.py +33 -0
pixeltable/share/protocol/common.py +165 -0
pixeltable/share/protocol/operation_types.py +33 -0
pixeltable/share/protocol/replica.py +119 -0
pixeltable/share/publish.py +257 -59
pixeltable/store.py +311 -194
pixeltable/type_system.py +373 -211
pixeltable/utils/__init__.py +2 -3
pixeltable/utils/arrow.py +131 -17
pixeltable/utils/av.py +298 -0
pixeltable/utils/azure_store.py +346 -0
pixeltable/utils/coco.py +6 -6
pixeltable/utils/code.py +3 -3
pixeltable/utils/console_output.py +4 -1
pixeltable/utils/coroutine.py +6 -23
pixeltable/utils/dbms.py +32 -6
pixeltable/utils/description_helper.py +4 -5
pixeltable/utils/documents.py +7 -18
pixeltable/utils/exception_handler.py +7 -30
pixeltable/utils/filecache.py +6 -6
pixeltable/utils/formatter.py +86 -48
pixeltable/utils/gcs_store.py +295 -0
pixeltable/utils/http.py +133 -0
pixeltable/utils/http_server.py +2 -3
pixeltable/utils/iceberg.py +1 -2
pixeltable/utils/image.py +17 -0
pixeltable/utils/lancedb.py +90 -0
pixeltable/utils/local_store.py +322 -0
pixeltable/utils/misc.py +5 -0
pixeltable/utils/object_stores.py +573 -0
pixeltable/utils/pydantic.py +60 -0
pixeltable/utils/pytorch.py +5 -6
pixeltable/utils/s3_store.py +527 -0
pixeltable/utils/sql.py +26 -0
pixeltable/utils/system.py +30 -0
pixeltable-0.5.7.dist-info/METADATA +579 -0
pixeltable-0.5.7.dist-info/RECORD +227 -0
{pixeltable-0.3.14.dist-info → pixeltable-0.5.7.dist-info}/WHEEL +1 -1
pixeltable-0.5.7.dist-info/entry_points.txt +2 -0
pixeltable/__version__.py +0 -3
pixeltable/catalog/named_function.py +0 -40
pixeltable/ext/__init__.py +0 -17
pixeltable/ext/functions/__init__.py +0 -11
pixeltable/ext/functions/whisperx.py +0 -77
pixeltable/utils/media_store.py +0 -77
pixeltable/utils/s3.py +0 -17
pixeltable-0.3.14.dist-info/METADATA +0 -434
pixeltable-0.3.14.dist-info/RECORD +0 -186
pixeltable-0.3.14.dist-info/entry_points.txt +0 -3
{pixeltable-0.3.14.dist-info → pixeltable-0.5.7.dist-info/licenses}/LICENSE +0 -0

pixeltable/plan.py CHANGED Viewed

@@ -3,9 +3,10 @@ from __future__ import annotations
 import dataclasses
 import enum
 from textwrap import dedent
-from typing import Any, Iterable, Literal, Optional, Sequence
+from typing import Any, Iterable, Literal, Sequence, cast
 from uuid import UUID
+import pgvector.sqlalchemy  # type: ignore[import-untyped]
 import sqlalchemy as sql
 import pixeltable as pxt
@@ -65,7 +66,7 @@ class JoinClause:
     """Corresponds to a single 'JOIN ... ON (...)' clause in a SELECT statement; excludes the joined table."""
     join_type: JoinType
-    join_predicate: Optional[exprs.Expr]  # None for join_type == CROSS
+    join_predicate: exprs.Expr | None  # None for join_type == CROSS
 @dataclasses.dataclass
@@ -75,6 +76,83 @@ class FromClause:
     tbls: list[catalog.TableVersionPath]
     join_clauses: list[JoinClause] = dataclasses.field(default_factory=list)
+    @property
+    def _first_tbl(self) -> catalog.TableVersionPath:
+        assert len(self.tbls) == 1
+        return self.tbls[0]
+@dataclasses.dataclass
+class SampleClause:
+    """Defines a sampling clause for a table."""
+    version: int | None
+    n: int | None
+    n_per_stratum: int | None
+    fraction: float | None
+    seed: int | None
+    stratify_exprs: list[exprs.Expr] | None
+    # The version of the hashing algorithm used for ordering and fractional sampling.
+    CURRENT_VERSION = 1
+    def __post_init__(self) -> None:
+        # If no version was provided, provide the default version
+        if self.version is None:
+            self.version = self.CURRENT_VERSION
+    @property
+    def is_stratified(self) -> bool:
+        """Check if the sampling is stratified"""
+        return self.stratify_exprs is not None and len(self.stratify_exprs) > 0
+    @property
+    def is_repeatable(self) -> bool:
+        """Return true if the same rows will continue to be sampled if source rows are added or deleted."""
+        return not self.is_stratified and self.fraction is not None
+    def display_str(self, inline: bool = False) -> str:
+        return str(self)
+    def as_dict(self) -> dict:
+        """Return a dictionary representation of the object"""
+        d = dataclasses.asdict(self)
+        d['_classname'] = self.__class__.__name__
+        if self.is_stratified:
+            d['stratify_exprs'] = [e.as_dict() for e in self.stratify_exprs]
+        return d
+    @classmethod
+    def from_dict(cls, d: dict) -> SampleClause:
+        """Create a SampleClause from a dictionary representation"""
+        d_cleaned = {key: value for key, value in d.items() if key != '_classname'}
+        s = cls(**d_cleaned)
+        if s.is_stratified:
+            s.stratify_exprs = [exprs.Expr.from_dict(e) for e in d_cleaned.get('stratify_exprs', [])]
+        return s
+    def __repr__(self) -> str:
+        s = ','.join(e.display_str(inline=True) for e in self.stratify_exprs)
+        return (
+            f'sample_{self.version}(n={self.n}, n_per_stratum={self.n_per_stratum}, '
+            f'fraction={self.fraction}, seed={self.seed}, [{s}])'
+        )
+    @classmethod
+    def fraction_to_md5_hex(cls, fraction: float) -> str:
+        """Return the string representation of an approximation (to ~1e-9) of a fraction of the total space
+        of md5 hash values.
+        This is used for fractional sampling.
+        """
+        # Maximum count for the upper 32 bits of MD5: 2^32
+        max_md5_value = (2**32) - 1
+        # Calculate the fraction of this value
+        threshold_int = max_md5_value * int(1_000_000_000 * fraction) // 1_000_000_000
+        # Convert to hexadecimal string with padding
+        return format(threshold_int, '08x') + 'ffffffffffffffffffffffff'
 class Analyzer:
     """
@@ -84,17 +162,19 @@ class Analyzer:
     from_clause: FromClause
     all_exprs: list[exprs.Expr]  # union of all exprs, aside from sql_where_clause
     select_list: list[exprs.Expr]
-    group_by_clause: Optional[list[exprs.Expr]]  # None for non-aggregate queries; [] for agg query w/o grouping
+    group_by_clause: list[exprs.Expr] | None  # None for non-aggregate queries; [] for agg query w/o grouping
     grouping_exprs: list[exprs.Expr]  # [] for non-aggregate queries or agg query w/o grouping
     order_by_clause: OrderByClause
+    stratify_exprs: list[exprs.Expr]  # [] if no stratiifcation is required
+    sample_clause: SampleClause | None  # None if no sampling clause is present
     sql_elements: exprs.SqlElementCache
     # Where clause of the Select stmt of the SQL scan
-    sql_where_clause: Optional[exprs.Expr]
+    sql_where_clause: exprs.Expr | None
     # filter predicate applied to output rows of the SQL scan
-    filter: Optional[exprs.Expr]
+    filter: exprs.Expr | None
     agg_fn_calls: list[exprs.FunctionCall]  # grouping aggregation (ie, not window functions)
     window_fn_calls: list[exprs.FunctionCall]
@@ -104,9 +184,10 @@ class Analyzer:
         self,
         from_clause: FromClause,
         select_list: Sequence[exprs.Expr],
-        where_clause: Optional[exprs.Expr] = None,
-        group_by_clause: Optional[list[exprs.Expr]] = None,
-        order_by_clause: Optional[list[tuple[exprs.Expr, bool]]] = None,
+        where_clause: exprs.Expr | None = None,
+        group_by_clause: list[exprs.Expr] | None = None,
+        order_by_clause: list[tuple[exprs.Expr, bool]] | None = None,
+        sample_clause: SampleClause | None = None,
     ):
         if order_by_clause is None:
             order_by_clause = []
@@ -120,6 +201,11 @@ class Analyzer:
         self.group_by_clause = (
             [e.resolve_computed_cols() for e in group_by_clause] if group_by_clause is not None else None
         )
+        self.sample_clause = sample_clause
+        if self.sample_clause is not None and self.sample_clause.is_stratified:
+            self.stratify_exprs = [e.resolve_computed_cols() for e in sample_clause.stratify_exprs]
+        else:
+            self.stratify_exprs = []
         self.order_by_clause = [OrderByItem(e.resolve_computed_cols(), asc) for e, asc in order_by_clause]
         self.sql_where_clause = None
@@ -135,8 +221,11 @@ class Analyzer:
                 self.all_exprs.append(join_clause.join_predicate)
         if self.group_by_clause is not None:
             self.all_exprs.extend(self.group_by_clause)
+        self.all_exprs.extend(self.stratify_exprs)
         self.all_exprs.extend(e for e, _ in self.order_by_clause)
         if self.filter is not None:
+            if sample_clause is not None:
+                raise excs.Error(f'Filter {self.filter} not expressible in SQL')
             self.all_exprs.append(self.filter)
         self.agg_order_by = []
@@ -241,7 +330,7 @@ class Analyzer:
         row_builder.set_slot_idxs(self.agg_fn_calls)
         row_builder.set_slot_idxs(self.agg_order_by)
-    def get_window_fn_ob_clause(self) -> Optional[OrderByClause]:
+    def get_window_fn_ob_clause(self) -> OrderByClause | None:
         clause: list[OrderByClause] = []
         for fn_call in self.window_fn_calls:
             # window functions require ordering by the group_by/order_by clauses
@@ -257,21 +346,19 @@ class Analyzer:
 class Planner:
-    # TODO: create an exec.CountNode and change this to create_count_plan()
     @classmethod
-    def create_count_stmt(cls, tbl: catalog.TableVersionPath, where_clause: Optional[exprs.Expr] = None) -> sql.Select:
-        stmt = sql.select(sql.func.count())
-        refd_tbl_ids: set[UUID] = set()
-        if where_clause is not None:
-            analyzer = cls.analyze(tbl, where_clause)
-            if analyzer.filter is not None:
-                raise excs.Error(f'Filter {analyzer.filter} not expressible in SQL')
-            clause_element = analyzer.sql_where_clause.sql_expr(analyzer.sql_elements)
-            assert clause_element is not None
-            stmt = stmt.where(clause_element)
-            refd_tbl_ids = where_clause.tbl_ids()
-        stmt = exec.SqlScanNode.create_from_clause(tbl, stmt, refd_tbl_ids)
-        return stmt
+    def create_count_stmt(cls, query: 'pxt.Query') -> sql.Select:
+        """Creates a SQL SELECT COUNT(*) statement for counting rows in a Query."""
+        # Create the query plan
+        plan = query._create_query_plan()
+        sql_node = plan.get_node(exec.SqlNode)
+        assert sql_node is not None
+        if sql_node.py_filter is not None:
+            raise excs.Error('count() cannot be used with Python-only filters. Use collect() instead.')
+        # Get the SQL statement from the SqlNode as a CTE
+        cte, _ = sql_node.to_cte(keep_pk=True)
+        count_stmt = sql.select(sql.func.count().label('all_count')).select_from(cte)
+        return count_stmt
     @classmethod
     def create_insert_plan(
@@ -285,21 +372,12 @@ class Planner:
         cls.__check_valid_columns(tbl, stored_cols, 'inserted into')
-        row_builder = exprs.RowBuilder([], stored_cols, [])
+        row_builder = exprs.RowBuilder([], stored_cols, [], tbl)
         # create InMemoryDataNode for 'rows'
-        plan: exec.ExecNode = exec.InMemoryDataNode(
-            TableVersionHandle(tbl.id, tbl.effective_version), rows, row_builder, tbl.next_rowid
-        )
+        plan: exec.ExecNode = exec.InMemoryDataNode(tbl.handle, rows, row_builder, tbl.next_row_id)
-        media_input_col_info = [
-            exprs.ColumnSlotIdx(col_ref.col, col_ref.slot_idx)
-            for col_ref in row_builder.input_exprs
-            if isinstance(col_ref, exprs.ColumnRef) and col_ref.col_type.is_media_type()
-        ]
-        if len(media_input_col_info) > 0:
-            # prefetch external files for all input column refs
-            plan = exec.CachePrefetchNode(tbl.id, media_input_col_info, input=plan)
+        plan = cls._add_prefetch_node(tbl.id, row_builder.input_exprs, input_node=plan)
         computed_exprs = row_builder.output_exprs - row_builder.input_exprs
         if len(computed_exprs) > 0:
@@ -307,10 +385,9 @@ class Planner:
             plan = exec.ExprEvalNode(
                 row_builder, computed_exprs, plan.output_exprs, input=plan, maintain_input_order=False
             )
+        if any(c.col_type.supports_file_offloading() for c in stored_cols):
+            plan = exec.CellMaterializationNode(plan)
-        stored_col_info = row_builder.output_slot_idxs()
-        stored_img_col_info = [info for info in stored_col_info if info.col.col_type.is_image_type()]
-        plan.set_stored_img_cols(stored_img_col_info)
         plan.set_ctx(
             exec.ExecContext(
                 row_builder,
@@ -320,24 +397,34 @@ class Planner:
                 ignore_errors=ignore_errors,
             )
         )
+        plan = cls._add_save_node(plan)
         return plan
     @classmethod
-    def create_df_insert_plan(
-        cls, tbl: catalog.TableVersion, df: 'pxt.DataFrame', ignore_errors: bool
+    def rowid_columns(cls, target: TableVersionHandle, num_rowid_cols: int | None = None) -> list[exprs.Expr]:
+        """Return list of RowidRef for the given number of associated rowids"""
+        if num_rowid_cols is None:
+            num_rowid_cols = target.get().num_rowid_columns()
+        return [exprs.RowidRef(target, i) for i in range(num_rowid_cols)]
+    @classmethod
+    def create_query_insert_plan(
+        cls, tbl: catalog.TableVersion, query: 'pxt.Query', ignore_errors: bool
     ) -> exec.ExecNode:
         assert not tbl.is_view
-        plan = df._create_query_plan()  # ExecNode constructed by the DataFrame
+        plan = query._create_query_plan()  # ExecNode constructed by the Query
         # Modify the plan RowBuilder to register the output columns
-        for col_name, expr in zip(df.schema.keys(), df._select_list_exprs):
+        needs_cell_materialization = False
+        for col_name, expr in zip(query.schema.keys(), query._select_list_exprs):
             assert col_name in tbl.cols_by_name
             col = tbl.cols_by_name[col_name]
             plan.row_builder.add_table_column(col, expr.slot_idx)
+            needs_cell_materialization = needs_cell_materialization or col.col_type.supports_file_offloading()
-        stored_col_info = plan.row_builder.output_slot_idxs()
-        stored_img_col_info = [info for info in stored_col_info if info.col.col_type.is_image_type()]
-        plan.set_stored_img_cols(stored_img_col_info)
+        if needs_cell_materialization:
+            plan = exec.CellMaterializationNode(plan)
         plan.set_ctx(
             exec.ExecContext(
@@ -354,16 +441,18 @@ class Planner:
         tbl: catalog.TableVersionPath,
         update_targets: dict[catalog.Column, exprs.Expr],
         recompute_targets: list[catalog.Column],
-        where_clause: Optional[exprs.Expr],
+        where_clause: exprs.Expr | None,
         cascade: bool,
     ) -> tuple[exec.ExecNode, list[str], list[catalog.Column]]:
         """Creates a plan to materialize updated rows.
         The plan:
         - retrieves rows that are visible at the current version of the table
         - materializes all stored columns and the update targets
         - if cascade is True, recomputes all computed columns that transitively depend on the updated columns
           and copies the values of all other stored columns
         - if cascade is False, copies all columns that aren't update targets from the original rows
         Returns:
             - root node of the plan
             - list of qualified column names that are getting updated
@@ -373,26 +462,33 @@ class Planner:
         assert isinstance(tbl, catalog.TableVersionPath)
         target = tbl.tbl_version.get()  # the one we need to update
         updated_cols = list(update_targets.keys())
+        recomputed_cols: set[Column]
         if len(recompute_targets) > 0:
-            recomputed_cols = set(recompute_targets)
+            assert len(update_targets) == 0
+            recomputed_cols = {*recompute_targets}
+            if cascade:
+                recomputed_cols |= target.get_dependent_columns(recomputed_cols)
         else:
             recomputed_cols = target.get_dependent_columns(updated_cols) if cascade else set()
-            # regardless of cascade, we need to update all indices on any updated column
-            idx_val_cols = target.get_idx_val_columns(updated_cols)
-            recomputed_cols.update(idx_val_cols)
-            # we only need to recompute stored columns (unstored ones are substituted away)
-            recomputed_cols = {c for c in recomputed_cols if c.is_stored}
+        # regardless of cascade, we need to update all indices on any updated/recomputed column
+        modified_base_cols = [c for c in set(updated_cols) | recomputed_cols if c.get_tbl().id == target.id]
+        idx_val_cols = target.get_idx_val_columns(modified_base_cols)
+        recomputed_cols.update(idx_val_cols)
+        # we only need to recompute stored columns (unstored ones are substituted away)
+        recomputed_cols = {c for c in recomputed_cols if c.is_stored}
         cls.__check_valid_columns(tbl.tbl_version.get(), recomputed_cols, 'updated in')
-        recomputed_base_cols = {col for col in recomputed_cols if col.tbl == tbl.tbl_version}
+        # our query plan
+        # - evaluates the update targets and recomputed columns
+        # - copies all other stored columns
+        recomputed_base_cols = {col for col in recomputed_cols if col.get_tbl().id == tbl.tbl_version.id}
         copied_cols = [
             col
             for col in target.cols_by_id.values()
             if col.is_stored and col not in updated_cols and col not in recomputed_base_cols
         ]
-        select_list: list[exprs.Expr] = [exprs.ColumnRef(col) for col in copied_cols]
-        select_list.extend(update_targets.values())
+        select_list: list[exprs.Expr] = list(update_targets.values())
         recomputed_exprs = [
             c.value_expr.copy().resolve_computed_cols(resolve_cols=recomputed_base_cols) for c in recomputed_base_cols
@@ -403,13 +499,25 @@ class Planner:
         select_list.extend(recomputed_exprs)
         # we need to retrieve the PK columns of the existing rows
-        plan = cls.create_query_plan(FromClause(tbls=[tbl]), select_list, where_clause=where_clause, ignore_errors=True)
-        all_base_cols = copied_cols + updated_cols + list(recomputed_base_cols)  # same order as select_list
+        plan = cls.create_query_plan(
+            FromClause(tbls=[tbl]),
+            select_list=select_list,
+            columns=copied_cols,
+            where_clause=where_clause,
+            ignore_errors=True,
+        )
+        evaluated_cols = updated_cols + list(recomputed_base_cols)  # same order as select_list
         # update row builder with column information
-        for i, col in enumerate(all_base_cols):
+        plan.row_builder.add_table_columns(copied_cols)
+        for i, col in enumerate(evaluated_cols):
             plan.row_builder.add_table_column(col, select_list[i].slot_idx)
+        plan.ctx.num_computed_exprs = len(recomputed_exprs)
+        plan = cls._add_cell_materialization_node(plan)
+        plan = cls._add_save_node(plan)
         recomputed_user_cols = [c for c in recomputed_cols if c.name is not None]
-        return plan, [f'{c.tbl.get().name}.{c.name}' for c in updated_cols + recomputed_user_cols], recomputed_user_cols
+        return plan, [f'{c.get_tbl().name}.{c.name}' for c in updated_cols + recomputed_user_cols], recomputed_user_cols
     @classmethod
     def __check_valid_columns(
@@ -429,6 +537,94 @@ class Planner:
                     .format(validation_error=col.value_expr.validation_error)
                 )
+    @classmethod
+    def _cell_md_col_refs(cls, expr_list: Iterable[exprs.Expr]) -> list[exprs.ColumnRef]:
+        """Return list of ColumnRefs that need their cellmd values for reconstruction"""
+        json_col_refs = list(
+            exprs.Expr.list_subexprs(
+                expr_list,
+                expr_class=exprs.ColumnRef,
+                filter=lambda e: cast(exprs.ColumnRef, e).col.col_type.is_json_type(),
+                traverse_matches=False,
+            )
+        )
+        def needs_reconstruction(e: exprs.Expr) -> bool:
+            assert isinstance(e, exprs.ColumnRef)
+            # Vector-typed array columns are used for vector indexes, and are stored in the db
+            return e.col.col_type.is_array_type() and not isinstance(e.col.sa_col_type, pgvector.sqlalchemy.Vector)
+        array_col_refs = list(
+            exprs.Expr.list_subexprs(
+                expr_list, expr_class=exprs.ColumnRef, filter=needs_reconstruction, traverse_matches=False
+            )
+        )
+        binary_col_refs = list(
+            exprs.Expr.list_subexprs(
+                expr_list,
+                expr_class=exprs.ColumnRef,
+                filter=lambda e: cast(exprs.ColumnRef, e).col.col_type.is_binary_type(),
+                traverse_matches=False,
+            )
+        )
+        return json_col_refs + array_col_refs + binary_col_refs
+    @classmethod
+    def _add_cell_materialization_node(cls, input: exec.ExecNode) -> exec.ExecNode:
+        # we need a CellMaterializationNode if any of the evaluated output columns are json or array-typed
+        has_target_cols = any(
+            col.col_type.supports_file_offloading()
+            for col, slot_idx in input.row_builder.table_columns.items()
+            if slot_idx is not None
+        )
+        if has_target_cols:
+            return exec.CellMaterializationNode(input)
+        else:
+            return input
+    @classmethod
+    def _add_cell_reconstruction_node(cls, expr_list: list[exprs.Expr], input: exec.ExecNode) -> exec.ExecNode:
+        """
+        Add a CellReconstructionNode, if required by any of the exprs in expr_list.
+        Cell reconstruction is required for
+        1) all json-typed ColumnRefs that are not used as part of a JsonPath (the latter does its own reconstruction)
+           or as part of a ColumnPropertyRef
+        2) all array-typed ColumnRefs that are not used as part of a ColumnPropertyRef
+        """
+        def json_filter(e: exprs.Expr) -> bool:
+            if isinstance(e, exprs.JsonPath):
+                return not e.is_relative_path() and isinstance(e.anchor, exprs.ColumnRef)
+            if isinstance(e, exprs.ColumnPropertyRef):
+                return e.col_ref.col.col_type.is_json_type()
+            return isinstance(e, exprs.ColumnRef) and e.col.col_type.is_json_type()
+        def array_filter(e: exprs.Expr) -> bool:
+            if isinstance(e, exprs.ColumnPropertyRef):
+                return e.col_ref.col.col_type.is_array_type()
+            if not isinstance(e, exprs.ColumnRef):
+                return False
+            # Vector-typed array columns are used for vector indexes, and are stored in the db
+            return e.col.col_type.is_array_type() and not isinstance(e.col.sa_col_type, pgvector.sqlalchemy.Vector)
+        def binary_filter(e: exprs.Expr) -> bool:
+            return isinstance(e, exprs.ColumnRef) and e.col.col_type.is_binary_type()
+        json_candidates = list(exprs.Expr.list_subexprs(expr_list, filter=json_filter, traverse_matches=False))
+        json_refs = [e for e in json_candidates if isinstance(e, exprs.ColumnRef)]
+        array_candidates = list(exprs.Expr.list_subexprs(expr_list, filter=array_filter, traverse_matches=False))
+        array_refs = [e for e in array_candidates if isinstance(e, exprs.ColumnRef)]
+        binary_refs = list(
+            exprs.Expr.list_subexprs(expr_list, exprs.ColumnRef, filter=binary_filter, traverse_matches=False)
+        )
+        if len(json_refs) > 0 or len(array_refs) > 0 or len(binary_refs) > 0:
+            return exec.CellReconstructionNode(json_refs, array_refs, binary_refs, input.row_builder, input=input)
+        else:
+            return input
     @classmethod
     def create_batch_update_plan(
         cls,
@@ -447,8 +643,8 @@ class Planner:
         """
         assert isinstance(tbl, catalog.TableVersionPath)
         target = tbl.tbl_version.get()  # the one we need to update
-        sa_key_cols: list[sql.Column] = []
-        key_vals: list[tuple] = []
+        sa_key_cols: list[sql.Column]
+        key_vals: list[tuple]
         if len(rowids) > 0:
             sa_key_cols = target.store_tbl.rowid_columns()
             key_vals = rowids
@@ -461,18 +657,18 @@ class Planner:
         updated_cols = batch[0].keys() - target.primary_key_columns()
         recomputed_cols = target.get_dependent_columns(updated_cols) if cascade else set()
         # regardless of cascade, we need to update all indices on any updated column
-        idx_val_cols = target.get_idx_val_columns(updated_cols)
+        modified_base_cols = [c for c in set(updated_cols) | recomputed_cols if c.get_tbl().id == target.id]
+        idx_val_cols = target.get_idx_val_columns(modified_base_cols)
         recomputed_cols.update(idx_val_cols)
         # we only need to recompute stored columns (unstored ones are substituted away)
         recomputed_cols = {c for c in recomputed_cols if c.is_stored}
-        recomputed_base_cols = {col for col in recomputed_cols if col.tbl == target}
+        recomputed_base_cols = {col for col in recomputed_cols if col.get_tbl().id == target.id}
         copied_cols = [
             col
             for col in target.cols_by_id.values()
             if col.is_stored and col not in updated_cols and col not in recomputed_base_cols
         ]
-        select_list: list[exprs.Expr] = [exprs.ColumnRef(col) for col in copied_cols]
-        select_list.extend(exprs.ColumnRef(col) for col in updated_cols)
+        select_list: list[exprs.Expr] = [exprs.ColumnRef(col) for col in updated_cols]
         recomputed_exprs = [
             c.value_expr.copy().resolve_computed_cols(resolve_cols=recomputed_base_cols) for c in recomputed_base_cols
@@ -488,25 +684,39 @@ class Planner:
         sql_exprs = list(
             exprs.Expr.list_subexprs(analyzer.all_exprs, filter=analyzer.sql_elements.contains, traverse_matches=False)
         )
-        row_builder = exprs.RowBuilder(analyzer.all_exprs, [], sql_exprs)
+        row_builder = exprs.RowBuilder(analyzer.all_exprs, [], sql_exprs, target)
         analyzer.finalize(row_builder)
-        sql_lookup_node = exec.SqlLookupNode(tbl, row_builder, sql_exprs, sa_key_cols, key_vals)
+        cell_md_col_refs = cls._cell_md_col_refs(sql_exprs)
+        sql_lookup_node = exec.SqlLookupNode(
+            tbl,
+            row_builder,
+            sql_exprs,
+            columns=copied_cols,
+            sa_key_cols=sa_key_cols,
+            key_vals=key_vals,
+            cell_md_col_refs=cell_md_col_refs,
+        )
         col_vals = [{col: row[col].val for col in updated_cols} for row in batch]
         row_update_node = exec.RowUpdateNode(tbl, key_vals, len(rowids) > 0, col_vals, row_builder, sql_lookup_node)
         plan: exec.ExecNode = row_update_node
         if not cls._is_contained_in(analyzer.select_list, sql_exprs):
             # we need an ExprEvalNode to evaluate the remaining output exprs
             plan = exec.ExprEvalNode(row_builder, analyzer.select_list, sql_exprs, input=plan)
         # update row builder with column information
-        all_base_cols = copied_cols + list(updated_cols) + list(recomputed_base_cols)  # same order as select_list
+        evaluated_cols = list(updated_cols) + list(recomputed_base_cols)  # same order as select_list
         row_builder.set_slot_idxs(select_list, remove_duplicates=False)
-        for i, col in enumerate(all_base_cols):
+        plan.row_builder.add_table_columns(copied_cols)
+        for i, col in enumerate(evaluated_cols):
             plan.row_builder.add_table_column(col, select_list[i].slot_idx)
-        ctx = exec.ExecContext(row_builder)
-        # we're returning everything to the user, so we might as well do it in a single batch
+        ctx = exec.ExecContext(row_builder, num_computed_exprs=len(recomputed_exprs))
+        # TODO: correct batch size?
         ctx.batch_size = 0
         plan.set_ctx(ctx)
+        plan = cls._add_cell_materialization_node(plan)
+        plan = cls._add_save_node(plan)
         recomputed_user_cols = [c for c in recomputed_cols if c.name is not None]
         return (
             plan,
@@ -556,13 +766,13 @@ class Planner:
             ignore_errors=True,
             exact_version_only=view.get_bases(),
         )
-        for i, col in enumerate(copied_cols + list(recomputed_cols)):  # same order as select_list
+        plan.ctx.num_computed_exprs = len(recomputed_exprs)
+        materialized_cols = copied_cols + list(recomputed_cols)  # same order as select_list
+        for i, col in enumerate(materialized_cols):
             plan.row_builder.add_table_column(col, select_list[i].slot_idx)
-        # TODO: avoid duplication with view_load_plan() logic (where does this belong?)
-        stored_img_col_info = [
-            info for info in plan.row_builder.output_slot_idxs() if info.col.col_type.is_image_type()
-        ]
-        plan.set_stored_img_cols(stored_img_col_info)
+        plan = cls._add_cell_materialization_node(plan)
+        plan = cls._add_save_node(plan)
         return plan
     @classmethod
@@ -591,8 +801,13 @@ class Planner:
         # 2. for component views: iterator args
         iterator_args = [target.iterator_args] if target.iterator_args is not None else []
-        row_builder = exprs.RowBuilder(iterator_args, stored_cols, [])
+        from_clause = FromClause(tbls=[view.base])
+        base_analyzer = Analyzer(
+            from_clause, iterator_args, where_clause=target.predicate, sample_clause=target.sample_clause
+        )
+        row_builder = exprs.RowBuilder(base_analyzer.all_exprs, stored_cols, [], target, for_view_load=True)
+        # if we're propagating an insert, we only want to see those base rows that were created for the current version
         # execution plan:
         # 1. materialize exprs computed from the base that are needed for stored view columns
         # 2. if it's an iterator view, expand the base rows into component rows
@@ -603,8 +818,11 @@ class Planner:
             for e in row_builder.default_eval_ctx.target_exprs
             if e.is_bound_by([view]) and not e.is_bound_by([view.base])
         ]
-        # if we're propagating an insert, we only want to see those base rows that were created for the current version
-        base_analyzer = Analyzer(FromClause(tbls=[view.base]), base_output_exprs, where_clause=target.predicate)
+        # Create a new analyzer reflecting exactly what is required from the base table
+        base_analyzer = Analyzer(
+            from_clause, base_output_exprs, where_clause=target.predicate, sample_clause=target.sample_clause
+        )
         base_eval_ctx = row_builder.create_eval_ctx(base_analyzer.all_exprs)
         plan = cls._create_query_plan(
             row_builder=row_builder,
@@ -621,10 +839,12 @@ class Planner:
                 row_builder, output_exprs=view_output_exprs, input_exprs=base_output_exprs, input=plan
             )
-        stored_img_col_info = [info for info in row_builder.output_slot_idxs() if info.col.col_type.is_image_type()]
-        plan.set_stored_img_cols(stored_img_col_info)
         exec_ctx.ignore_errors = True
         plan.set_ctx(exec_ctx)
+        if any(c.col_type.supports_file_offloading() for c in stored_cols):
+            plan = exec.CellMaterializationNode(plan)
+        plan = cls._add_save_node(plan)
         return plan, len(row_builder.default_eval_ctx.target_exprs)
     @classmethod
@@ -635,7 +855,7 @@ class Planner:
                 raise excs.Error(f'Join predicate {join_clause.join_predicate} not expressible in SQL')
     @classmethod
-    def _create_combined_ordering(cls, analyzer: Analyzer, verify_agg: bool) -> Optional[OrderByClause]:
+    def _create_combined_ordering(cls, analyzer: Analyzer, verify_agg: bool) -> OrderByClause | None:
         """Verify that the various ordering requirements don't conflict and return a combined ordering"""
         ob_clauses: list[OrderByClause] = [analyzer.order_by_clause.copy()]
@@ -669,22 +889,29 @@ class Planner:
             combined_ordering = combined
         return combined_ordering
+    @classmethod
+    def _add_save_node(cls, input_node: exec.ExecNode) -> exec.ExecNode:
+        """Add an ObjectStoreSaveNode, if needed."""
+        media_col_info = input_node.row_builder.media_output_col_info
+        if len(media_col_info) == 0:
+            return input_node
+        else:
+            return exec.ObjectStoreSaveNode(media_col_info, input_node)
     @classmethod
     def _is_contained_in(cls, l1: Iterable[exprs.Expr], l2: Iterable[exprs.Expr]) -> bool:
         """Returns True if l1 is contained in l2"""
         return {e.id for e in l1} <= {e.id for e in l2}
     @classmethod
-    def _insert_prefetch_node(
-        cls, tbl_id: UUID, row_builder: exprs.RowBuilder, input_node: exec.ExecNode
+    def _add_prefetch_node(
+        cls, tbl_id: UUID, expressions: Iterable[exprs.Expr], input_node: exec.ExecNode
     ) -> exec.ExecNode:
-        """Returns a CachePrefetchNode into the plan if needed, otherwise returns input"""
+        """Add a CachePrefetch node, if needed."""
         # we prefetch external files for all media ColumnRefs, even those that aren't part of the dependencies
         # of output_exprs: if unstored iterator columns are present, we might need to materialize ColumnRefs that
         # aren't explicitly captured as dependencies
-        media_col_refs = [
-            e for e in list(row_builder.unique_exprs) if isinstance(e, exprs.ColumnRef) and e.col_type.is_media_type()
-        ]
+        media_col_refs = [e for e in expressions if isinstance(e, exprs.ColumnRef) and e.col_type.is_media_type()]
         if len(media_col_refs) == 0:
             return input_node
         # we need to prefetch external files for media column types
@@ -696,32 +923,48 @@ class Planner:
     def create_query_plan(
         cls,
         from_clause: FromClause,
-        select_list: Optional[list[exprs.Expr]] = None,
-        where_clause: Optional[exprs.Expr] = None,
-        group_by_clause: Optional[list[exprs.Expr]] = None,
-        order_by_clause: Optional[list[tuple[exprs.Expr, bool]]] = None,
-        limit: Optional[exprs.Expr] = None,
+        select_list: list[exprs.Expr] | None = None,
+        columns: list[catalog.Column] | None = None,
+        where_clause: exprs.Expr | None = None,
+        group_by_clause: list[exprs.Expr] | None = None,
+        order_by_clause: list[tuple[exprs.Expr, bool]] | None = None,
+        limit: exprs.Expr | None = None,
+        sample_clause: SampleClause | None = None,
         ignore_errors: bool = False,
-        exact_version_only: Optional[list[catalog.TableVersionHandle]] = None,
+        exact_version_only: list[catalog.TableVersionHandle] | None = None,
     ) -> exec.ExecNode:
-        """Return plan for executing a query.
+        """
+        Return plan for executing a query.
+        The plan:
+        - materializes the values of select_list exprs into their respective slots
+        - materializes cell values of 'columns' (and their cellmd, if applicable) into DataRow.cell_vals/cell_md
         Updates 'select_list' in place to make it executable.
         TODO: make exact_version_only a flag and use the versions from tbl
         """
         if select_list is None:
             select_list = []
+        if columns is None:
+            columns = []
         if order_by_clause is None:
             order_by_clause = []
         if exact_version_only is None:
             exact_version_only = []
         analyzer = Analyzer(
             from_clause,
             select_list,
             where_clause=where_clause,
             group_by_clause=group_by_clause,
             order_by_clause=order_by_clause,
+            sample_clause=sample_clause,
         )
-        row_builder = exprs.RowBuilder(analyzer.all_exprs, [], [])
+        # If the from_clause has a single table, we can use it as the context table for the RowBuilder.
+        # Otherwise there is no context table, but that's ok, because the context table is only needed for
+        # table mutations, which can't happen during a join.
+        context_tbl = from_clause.tbls[0].tbl_version.get() if len(from_clause.tbls) == 1 else None
+        row_builder = exprs.RowBuilder(analyzer.all_exprs, [], [], context_tbl)
         analyzer.finalize(row_builder)
         # select_list: we need to materialize everything that's been collected
@@ -731,6 +974,7 @@ class Planner:
             row_builder=row_builder,
             analyzer=analyzer,
             eval_ctx=eval_ctx,
+            columns=columns,
             limit=limit,
             with_pk=True,
             exact_version_only=exact_version_only,
@@ -746,9 +990,10 @@ class Planner:
         row_builder: exprs.RowBuilder,
         analyzer: Analyzer,
         eval_ctx: exprs.RowBuilder.EvalCtx,
-        limit: Optional[exprs.Expr] = None,
+        columns: list[catalog.Column] | None = None,
+        limit: exprs.Expr | None = None,
         with_pk: bool = False,
-        exact_version_only: Optional[list[catalog.TableVersionHandle]] = None,
+        exact_version_only: list[catalog.TableVersionHandle] | None = None,
     ) -> exec.ExecNode:
         """
         Create plan to materialize eval_ctx.
@@ -758,6 +1003,8 @@ class Planner:
                 in the context of that table version (eg, if 'tbl' is a view, 'plan_target' might be the base)
         TODO: make exact_version_only a flag and use the versions from tbl
         """
+        if columns is None:
+            columns = []
         if exact_version_only is None:
             exact_version_only = []
         sql_elements = analyzer.sql_elements
@@ -765,6 +1012,7 @@ class Planner:
             analyzer.window_fn_calls
         )
         ctx = exec.ExecContext(row_builder)
         combined_ordering = cls._create_combined_ordering(analyzer, verify_agg=is_python_agg)
         cls._verify_join_clauses(analyzer)
@@ -773,6 +1021,7 @@ class Planner:
         # - join clause subexprs
         # - subexprs of Where clause conjuncts that can't be run in SQL
         # - all grouping exprs
+        # - all stratify exprs
         candidates = list(
             exprs.Expr.list_subexprs(
                 analyzer.select_list,
@@ -787,10 +1036,12 @@ class Planner:
             candidates.extend(
                 exprs.Expr.subexprs(analyzer.filter, filter=sql_elements.contains, traverse_matches=False)
             )
-        if analyzer.group_by_clause is not None:
-            candidates.extend(
-                exprs.Expr.list_subexprs(analyzer.group_by_clause, filter=sql_elements.contains, traverse_matches=False)
-            )
+        candidates.extend(
+            exprs.Expr.list_subexprs(analyzer.grouping_exprs, filter=sql_elements.contains, traverse_matches=False)
+        )
+        candidates.extend(
+            exprs.Expr.list_subexprs(analyzer.stratify_exprs, filter=sql_elements.contains, traverse_matches=False)
+        )
         # not isinstance(...): we don't want to materialize Literals via a Select
         sql_exprs = exprs.ExprSet(e for e in candidates if not isinstance(e, exprs.Literal))
@@ -812,8 +1063,15 @@ class Planner:
                     traverse_matches=False,
                 )
             )
             plan = exec.SqlScanNode(
-                tbl, row_builder, select_list=tbl_scan_exprs, set_pk=with_pk, exact_version_only=exact_version_only
+                tbl,
+                row_builder,
+                select_list=tbl_scan_exprs,
+                columns=[c for c in columns if c.get_tbl().id == tbl.tbl_id],
+                set_pk=with_pk,
+                cell_md_col_refs=cls._cell_md_col_refs(tbl_scan_exprs),
+                exact_version_only=exact_version_only,
             )
             tbl_scan_plans.append(plan)
@@ -835,7 +1093,17 @@ class Planner:
             # we need to order the input for window functions
             plan.set_order_by(analyzer.get_window_fn_ob_clause())
-        plan = cls._insert_prefetch_node(tbl.tbl_version.id, row_builder, plan)
+        if analyzer.sample_clause is not None:
+            plan = exec.SqlSampleNode(
+                row_builder,
+                input=plan,
+                select_list=tbl_scan_exprs,
+                sample_clause=analyzer.sample_clause,
+                stratify_exprs=analyzer.stratify_exprs,
+            )
+        plan = cls._add_prefetch_node(tbl.tbl_version.id, row_builder.unique_exprs, plan)
+        plan = cls._add_cell_reconstruction_node(analyzer.all_exprs, plan)
         if analyzer.group_by_clause is not None:
             # we're doing grouping aggregation; the input of the AggregateNode are the grouping exprs plus the
@@ -879,6 +1147,7 @@ class Planner:
                 if not agg_output.issuperset(exprs.ExprSet(eval_ctx.target_exprs)):
                     # we need an ExprEvalNode to evaluate the remaining output exprs
                     plan = exec.ExprEvalNode(row_builder, eval_ctx.target_exprs, agg_output, input=plan)
+                plan = cls._add_save_node(plan)
         else:
             if not exprs.ExprSet(sql_exprs).issuperset(exprs.ExprSet(eval_ctx.target_exprs)):
                 # we need an ExprEvalNode to evaluate the remaining output exprs
@@ -912,26 +1181,24 @@ class Planner:
         return Analyzer(FromClause(tbls=[tbl]), [], where_clause=where_clause)
     @classmethod
-    def create_add_column_plan(
-        cls, tbl: catalog.TableVersionPath, col: catalog.Column
-    ) -> tuple[exec.ExecNode, Optional[int]]:
+    def create_add_column_plan(cls, tbl: catalog.TableVersionPath, col: catalog.Column) -> exec.ExecNode:
         """Creates a plan for InsertableTable.add_column()
         Returns:
             plan: the plan to execute
             value_expr slot idx for the plan output (for computed cols)
         """
         assert isinstance(tbl, catalog.TableVersionPath)
-        row_builder = exprs.RowBuilder(output_exprs=[], columns=[col], input_exprs=[])
+        row_builder = exprs.RowBuilder(output_exprs=[], columns=[col], input_exprs=[], tbl=tbl.tbl_version.get())
         analyzer = Analyzer(FromClause(tbls=[tbl]), row_builder.default_eval_ctx.target_exprs)
         plan = cls._create_query_plan(
             row_builder=row_builder, analyzer=analyzer, eval_ctx=row_builder.default_eval_ctx, with_pk=True
         )
         plan.ctx.batch_size = 16
         plan.ctx.show_pbar = True
         plan.ctx.ignore_errors = True
+        computed_exprs = row_builder.output_exprs - row_builder.input_exprs
+        plan.ctx.num_computed_exprs = len(computed_exprs)  # we are adding a computed column, so we need to evaluate it
+        plan = cls._add_save_node(plan)
-        # we want to flush images
-        if col.is_computed and col.is_stored and col.col_type.is_image_type():
-            plan.set_stored_img_cols(row_builder.output_slot_idxs())
-        value_expr_slot_idx = row_builder.output_slot_idxs()[0].slot_idx if col.is_computed else None
-        return plan, value_expr_slot_idx
+        return plan

pixeltable 0.3.14__py3-none-any.whl → 0.5.7__py3-none-any.whl

pixeltable 0.3.14py3-none-any.whl → 0.5.7py3-none-any.whl