PyPI - pixeltable - Versions diffs - 0.4.14__py3-none-any.whl → 0.4.16__py3-none-any.whl - Mend

pixeltable 0.4.14py3-none-any.whl → 0.4.16py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.

This version of pixeltable might be problematic. Click here for more details.

Files changed (64) hide show

pixeltable/__init__.py +6 -1
pixeltable/catalog/catalog.py +107 -45
pixeltable/catalog/column.py +7 -2
pixeltable/catalog/table.py +1 -0
pixeltable/catalog/table_metadata.py +5 -0
pixeltable/catalog/table_version.py +100 -106
pixeltable/catalog/table_version_handle.py +4 -1
pixeltable/catalog/update_status.py +12 -0
pixeltable/config.py +6 -0
pixeltable/dataframe.py +11 -5
pixeltable/env.py +52 -19
pixeltable/exec/__init__.py +2 -0
pixeltable/exec/cell_materialization_node.py +231 -0
pixeltable/exec/cell_reconstruction_node.py +135 -0
pixeltable/exec/exec_node.py +1 -1
pixeltable/exec/expr_eval/evaluators.py +1 -0
pixeltable/exec/expr_eval/expr_eval_node.py +14 -0
pixeltable/exec/expr_eval/globals.py +2 -0
pixeltable/exec/globals.py +32 -0
pixeltable/exec/object_store_save_node.py +1 -4
pixeltable/exec/row_update_node.py +16 -9
pixeltable/exec/sql_node.py +107 -14
pixeltable/exprs/__init__.py +1 -1
pixeltable/exprs/arithmetic_expr.py +10 -11
pixeltable/exprs/column_property_ref.py +10 -10
pixeltable/exprs/column_ref.py +2 -2
pixeltable/exprs/data_row.py +106 -37
pixeltable/exprs/expr.py +9 -0
pixeltable/exprs/expr_set.py +14 -7
pixeltable/exprs/inline_expr.py +2 -19
pixeltable/exprs/json_path.py +45 -12
pixeltable/exprs/row_builder.py +54 -22
pixeltable/functions/__init__.py +1 -0
pixeltable/functions/bedrock.py +7 -0
pixeltable/functions/deepseek.py +11 -4
pixeltable/functions/llama_cpp.py +7 -0
pixeltable/functions/math.py +1 -1
pixeltable/functions/ollama.py +7 -0
pixeltable/functions/openai.py +4 -4
pixeltable/functions/openrouter.py +143 -0
pixeltable/functions/video.py +123 -9
pixeltable/functions/whisperx.py +2 -0
pixeltable/functions/yolox.py +2 -0
pixeltable/globals.py +56 -31
pixeltable/io/__init__.py +1 -0
pixeltable/io/globals.py +16 -15
pixeltable/io/table_data_conduit.py +46 -21
pixeltable/iterators/__init__.py +1 -0
pixeltable/metadata/__init__.py +1 -1
pixeltable/metadata/converters/convert_40.py +73 -0
pixeltable/metadata/notes.py +1 -0
pixeltable/plan.py +175 -46
pixeltable/share/publish.py +0 -1
pixeltable/store.py +2 -2
pixeltable/type_system.py +5 -3
pixeltable/utils/console_output.py +4 -1
pixeltable/utils/exception_handler.py +5 -28
pixeltable/utils/image.py +7 -0
pixeltable/utils/misc.py +5 -0
{pixeltable-0.4.14.dist-info → pixeltable-0.4.16.dist-info}/METADATA +2 -1
{pixeltable-0.4.14.dist-info → pixeltable-0.4.16.dist-info}/RECORD +64 -57
{pixeltable-0.4.14.dist-info → pixeltable-0.4.16.dist-info}/WHEEL +0 -0
{pixeltable-0.4.14.dist-info → pixeltable-0.4.16.dist-info}/entry_points.txt +0 -0
{pixeltable-0.4.14.dist-info → pixeltable-0.4.16.dist-info}/licenses/LICENSE +0 -0

pixeltable/plan.py CHANGED Viewed

@@ -3,9 +3,10 @@ from __future__ import annotations
 import dataclasses
 import enum
 from textwrap import dedent
-from typing import Any, Iterable, Literal, Optional, Sequence
+from typing import Any, Iterable, Literal, Optional, Sequence, cast
 from uuid import UUID
+import pgvector.sqlalchemy  # type: ignore[import-untyped]
 import sqlalchemy as sql
 import pixeltable as pxt
@@ -385,7 +386,7 @@ class Planner:
             TableVersionHandle(tbl.id, tbl.effective_version), rows, row_builder, tbl.next_row_id
         )
-        plan = cls._insert_prefetch_node(tbl.id, row_builder.input_exprs, input_node=plan)
+        plan = cls._add_prefetch_node(tbl.id, row_builder.input_exprs, input_node=plan)
         computed_exprs = row_builder.output_exprs - row_builder.input_exprs
         if len(computed_exprs) > 0:
@@ -393,6 +394,8 @@ class Planner:
             plan = exec.ExprEvalNode(
                 row_builder, computed_exprs, plan.output_exprs, input=plan, maintain_input_order=False
             )
+        if any(c.col_type.is_json_type() or c.col_type.is_array_type() for c in stored_cols):
+            plan = exec.CellMaterializationNode(plan)
         plan.set_ctx(
             exec.ExecContext(
@@ -403,7 +406,7 @@ class Planner:
                 ignore_errors=ignore_errors,
             )
         )
-        plan = cls._insert_save_node(tbl.id, row_builder.stored_media_cols, input_node=plan)
+        plan = cls._add_save_node(plan)
         return plan
@@ -422,10 +425,17 @@ class Planner:
         plan = df._create_query_plan()  # ExecNode constructed by the DataFrame
         # Modify the plan RowBuilder to register the output columns
+        needs_cell_materialization = False
         for col_name, expr in zip(df.schema.keys(), df._select_list_exprs):
             assert col_name in tbl.cols_by_name
             col = tbl.cols_by_name[col_name]
             plan.row_builder.add_table_column(col, expr.slot_idx)
+            needs_cell_materialization = (
+                needs_cell_materialization or col.col_type.is_json_type() or col.col_type.is_array_type()
+            )
+        if needs_cell_materialization:
+            plan = exec.CellMaterializationNode(plan)
         plan.set_ctx(
             exec.ExecContext(
@@ -446,12 +456,14 @@ class Planner:
         cascade: bool,
     ) -> tuple[exec.ExecNode, list[str], list[catalog.Column]]:
         """Creates a plan to materialize updated rows.
         The plan:
         - retrieves rows that are visible at the current version of the table
         - materializes all stored columns and the update targets
         - if cascade is True, recomputes all computed columns that transitively depend on the updated columns
           and copies the values of all other stored columns
         - if cascade is False, copies all columns that aren't update targets from the original rows
         Returns:
             - root node of the plan
             - list of qualified column names that are getting updated
@@ -477,14 +489,16 @@ class Planner:
         cls.__check_valid_columns(tbl.tbl_version.get(), recomputed_cols, 'updated in')
+        # our query plan
+        # - evaluates the update targets and recomputed columns
+        # - copies all other stored columns
         recomputed_base_cols = {col for col in recomputed_cols if col.tbl.id == tbl.tbl_version.id}
         copied_cols = [
             col
             for col in target.cols_by_id.values()
             if col.is_stored and col not in updated_cols and col not in recomputed_base_cols
         ]
-        select_list: list[exprs.Expr] = [exprs.ColumnRef(col) for col in copied_cols]
-        select_list.extend(update_targets.values())
+        select_list: list[exprs.Expr] = list(update_targets.values())
         recomputed_exprs = [
             c.value_expr.copy().resolve_computed_cols(resolve_cols=recomputed_base_cols) for c in recomputed_base_cols
@@ -495,14 +509,22 @@ class Planner:
         select_list.extend(recomputed_exprs)
         # we need to retrieve the PK columns of the existing rows
-        plan = cls.create_query_plan(FromClause(tbls=[tbl]), select_list, where_clause=where_clause, ignore_errors=True)
-        all_base_cols = copied_cols + updated_cols + list(recomputed_base_cols)  # same order as select_list
+        plan = cls.create_query_plan(
+            FromClause(tbls=[tbl]),
+            select_list=select_list,
+            columns=copied_cols,
+            where_clause=where_clause,
+            ignore_errors=True,
+        )
+        evaluated_cols = updated_cols + list(recomputed_base_cols)  # same order as select_list
         # update row builder with column information
-        for i, col in enumerate(all_base_cols):
+        plan.row_builder.add_table_columns(copied_cols)
+        for i, col in enumerate(evaluated_cols):
             plan.row_builder.add_table_column(col, select_list[i].slot_idx)
         plan.ctx.num_computed_exprs = len(recomputed_exprs)
-        plan = cls._insert_save_node(tbl.tbl_version.id, plan.row_builder.stored_media_cols, input_node=plan)
+        plan = cls._add_cell_materialization_node(plan)
+        plan = cls._add_save_node(plan)
         recomputed_user_cols = [c for c in recomputed_cols if c.name is not None]
         return plan, [f'{c.tbl.name}.{c.name}' for c in updated_cols + recomputed_user_cols], recomputed_user_cols
@@ -525,6 +547,79 @@ class Planner:
                     .format(validation_error=col.value_expr.validation_error)
                 )
+    @classmethod
+    def _cell_md_col_refs(cls, expr_list: Iterable[exprs.Expr]) -> list[exprs.ColumnRef]:
+        """Return list of ColumnRefs that need their cellmd values for reconstruction"""
+        json_col_refs = list(
+            exprs.Expr.list_subexprs(
+                expr_list,
+                expr_class=exprs.ColumnRef,
+                filter=lambda e: cast(exprs.ColumnRef, e).col.col_type.is_json_type(),
+                traverse_matches=False,
+            )
+        )
+        def needs_reconstruction(e: exprs.Expr) -> bool:
+            assert isinstance(e, exprs.ColumnRef)
+            # Vector-typed array columns are used for vector indexes, and are stored in the db
+            return e.col.col_type.is_array_type() and not isinstance(e.col.sa_col_type, pgvector.sqlalchemy.Vector)
+        array_col_refs = list(
+            exprs.Expr.list_subexprs(
+                expr_list, expr_class=exprs.ColumnRef, filter=needs_reconstruction, traverse_matches=False
+            )
+        )
+        return json_col_refs + array_col_refs
+    @classmethod
+    def _add_cell_materialization_node(cls, input: exec.ExecNode) -> exec.ExecNode:
+        # we need a CellMaterializationNode if any of the evaluated output columns are json or array-typed
+        has_target_cols = any(
+            col.col_type.is_json_type() or col.col_type.is_array_type()
+            for col, slot_idx in input.row_builder.table_columns.items()
+            if slot_idx is not None
+        )
+        if has_target_cols:
+            return exec.CellMaterializationNode(input)
+        else:
+            return input
+    @classmethod
+    def _add_cell_reconstruction_node(cls, expr_list: list[exprs.Expr], input: exec.ExecNode) -> exec.ExecNode:
+        """
+        Add a CellReconstructionNode, if required by any of the exprs in expr_list.
+        Cell reconstruction is required for
+        1) all json-typed ColumnRefs that are not used as part of a JsonPath (the latter does its own reconstruction)
+           or as part of a ColumnPropertyRef
+        2) all array-typed ColumnRefs that are not used as part of a ColumnPropertyRef
+        """
+        def json_filter(e: exprs.Expr) -> bool:
+            if isinstance(e, exprs.JsonPath):
+                return not e.is_relative_path() and isinstance(e.anchor, exprs.ColumnRef)
+            if isinstance(e, exprs.ColumnPropertyRef):
+                return e.col_ref.col.col_type.is_json_type()
+            return isinstance(e, exprs.ColumnRef) and e.col.col_type.is_json_type()
+        def array_filter(e: exprs.Expr) -> bool:
+            if isinstance(e, exprs.ColumnPropertyRef):
+                return e.col_ref.col.col_type.is_array_type()
+            if not isinstance(e, exprs.ColumnRef):
+                return False
+            # Vector-typed array columns are used for vector indexes, and are stored in the db
+            return e.col.col_type.is_array_type() and not isinstance(e.col.sa_col_type, pgvector.sqlalchemy.Vector)
+        json_candidates = list(exprs.Expr.list_subexprs(expr_list, filter=json_filter, traverse_matches=False))
+        json_refs = [e for e in json_candidates if isinstance(e, exprs.ColumnRef)]
+        array_candidates = list(exprs.Expr.list_subexprs(expr_list, filter=array_filter, traverse_matches=False))
+        array_refs = [e for e in array_candidates if isinstance(e, exprs.ColumnRef)]
+        if len(json_refs) > 0 or len(array_refs) > 0:
+            return exec.CellReconstructionNode(json_refs, array_refs, input.row_builder, input=input)
+        else:
+            return input
     @classmethod
     def create_batch_update_plan(
         cls,
@@ -543,8 +638,8 @@ class Planner:
         """
         assert isinstance(tbl, catalog.TableVersionPath)
         target = tbl.tbl_version.get()  # the one we need to update
-        sa_key_cols: list[sql.Column] = []
-        key_vals: list[tuple] = []
+        sa_key_cols: list[sql.Column]
+        key_vals: list[tuple]
         if len(rowids) > 0:
             sa_key_cols = target.store_tbl.rowid_columns()
             key_vals = rowids
@@ -567,8 +662,7 @@ class Planner:
             for col in target.cols_by_id.values()
             if col.is_stored and col not in updated_cols and col not in recomputed_base_cols
         ]
-        select_list: list[exprs.Expr] = [exprs.ColumnRef(col) for col in copied_cols]
-        select_list.extend(exprs.ColumnRef(col) for col in updated_cols)
+        select_list: list[exprs.Expr] = [exprs.ColumnRef(col) for col in updated_cols]
         recomputed_exprs = [
             c.value_expr.copy().resolve_computed_cols(resolve_cols=recomputed_base_cols) for c in recomputed_base_cols
@@ -586,23 +680,37 @@ class Planner:
         )
         row_builder = exprs.RowBuilder(analyzer.all_exprs, [], sql_exprs, target)
         analyzer.finalize(row_builder)
-        sql_lookup_node = exec.SqlLookupNode(tbl, row_builder, sql_exprs, sa_key_cols, key_vals)
+        cell_md_col_refs = cls._cell_md_col_refs(sql_exprs)
+        sql_lookup_node = exec.SqlLookupNode(
+            tbl,
+            row_builder,
+            sql_exprs,
+            columns=copied_cols,
+            sa_key_cols=sa_key_cols,
+            key_vals=key_vals,
+            cell_md_col_refs=cell_md_col_refs,
+        )
         col_vals = [{col: row[col].val for col in updated_cols} for row in batch]
         row_update_node = exec.RowUpdateNode(tbl, key_vals, len(rowids) > 0, col_vals, row_builder, sql_lookup_node)
         plan: exec.ExecNode = row_update_node
         if not cls._is_contained_in(analyzer.select_list, sql_exprs):
             # we need an ExprEvalNode to evaluate the remaining output exprs
             plan = exec.ExprEvalNode(row_builder, analyzer.select_list, sql_exprs, input=plan)
         # update row builder with column information
-        all_base_cols = copied_cols + list(updated_cols) + list(recomputed_base_cols)  # same order as select_list
+        evaluated_cols = list(updated_cols) + list(recomputed_base_cols)  # same order as select_list
         row_builder.set_slot_idxs(select_list, remove_duplicates=False)
-        for i, col in enumerate(all_base_cols):
+        plan.row_builder.add_table_columns(copied_cols)
+        for i, col in enumerate(evaluated_cols):
             plan.row_builder.add_table_column(col, select_list[i].slot_idx)
         ctx = exec.ExecContext(row_builder, num_computed_exprs=len(recomputed_exprs))
-        # we're returning everything to the user, so we might as well do it in a single batch
+        # TODO: correct batch size?
         ctx.batch_size = 0
         plan.set_ctx(ctx)
-        plan = cls._insert_save_node(tbl.tbl_version.id, plan.row_builder.stored_media_cols, input_node=plan)
+        plan = cls._add_cell_materialization_node(plan)
+        plan = cls._add_save_node(plan)
         recomputed_user_cols = [c for c in recomputed_cols if c.name is not None]
         return (
             plan,
@@ -653,10 +761,11 @@ class Planner:
             exact_version_only=view.get_bases(),
         )
         plan.ctx.num_computed_exprs = len(recomputed_exprs)
-        for i, col in enumerate(copied_cols + list(recomputed_cols)):  # same order as select_list
+        materialized_cols = copied_cols + list(recomputed_cols)  # same order as select_list
+        for i, col in enumerate(materialized_cols):
             plan.row_builder.add_table_column(col, select_list[i].slot_idx)
-        # TODO: avoid duplication with view_load_plan() logic (where does this belong?)
-        plan = cls._insert_save_node(view.tbl_version.id, plan.row_builder.stored_media_cols, input_node=plan)
+        plan = cls._add_cell_materialization_node(plan)
+        plan = cls._add_save_node(plan)
         return plan
@@ -726,7 +835,9 @@ class Planner:
         exec_ctx.ignore_errors = True
         plan.set_ctx(exec_ctx)
-        plan = cls._insert_save_node(view.tbl_version.id, plan.row_builder.stored_media_cols, input_node=plan)
+        if any(c.col_type.is_json_type() or c.col_type.is_array_type() for c in stored_cols):
+            plan = exec.CellMaterializationNode(plan)
+        plan = cls._add_save_node(plan)
         return plan, len(row_builder.default_eval_ctx.target_exprs)
@@ -773,15 +884,13 @@ class Planner:
         return combined_ordering
     @classmethod
-    def _insert_save_node(
-        cls, tbl_id: UUID, stored_media_cols: list[exprs.ColumnSlotIdx], input_node: exec.ExecNode
-    ) -> exec.ExecNode:
-        """Return an ObjectStoreSaveNode if stored media columns are present, otherwise return input"""
-        if len(stored_media_cols) == 0:
+    def _add_save_node(cls, input_node: exec.ExecNode) -> exec.ExecNode:
+        """Add an ObjectStoreSaveNode, if needed."""
+        media_col_info = input_node.row_builder.media_output_col_info
+        if len(media_col_info) == 0:
             return input_node
-        save_node = exec.ObjectStoreSaveNode(tbl_id, stored_media_cols, input_node)
-        save_node.set_ctx(input_node.ctx)
-        return save_node
+        else:
+            return exec.ObjectStoreSaveNode(media_col_info, input_node)
     @classmethod
     def _is_contained_in(cls, l1: Iterable[exprs.Expr], l2: Iterable[exprs.Expr]) -> bool:
@@ -789,10 +898,10 @@ class Planner:
         return {e.id for e in l1} <= {e.id for e in l2}
     @classmethod
-    def _insert_prefetch_node(
+    def _add_prefetch_node(
         cls, tbl_id: UUID, expressions: Iterable[exprs.Expr], input_node: exec.ExecNode
     ) -> exec.ExecNode:
-        """Return a node to prefetch data if needed, otherwise return input"""
+        """Add a CachePrefetch node, if needed."""
         # we prefetch external files for all media ColumnRefs, even those that aren't part of the dependencies
         # of output_exprs: if unstored iterator columns are present, we might need to materialize ColumnRefs that
         # aren't explicitly captured as dependencies
@@ -808,21 +917,30 @@ class Planner:
     def create_query_plan(
         cls,
         from_clause: FromClause,
-        select_list: Optional[list[exprs.Expr]] = None,
-        where_clause: Optional[exprs.Expr] = None,
-        group_by_clause: Optional[list[exprs.Expr]] = None,
-        order_by_clause: Optional[list[tuple[exprs.Expr, bool]]] = None,
-        limit: Optional[exprs.Expr] = None,
-        sample_clause: Optional[SampleClause] = None,
+        select_list: list[exprs.Expr] | None = None,
+        columns: list[catalog.Column] | None = None,
+        where_clause: exprs.Expr | None = None,
+        group_by_clause: list[exprs.Expr] | None = None,
+        order_by_clause: list[tuple[exprs.Expr, bool]] | None = None,
+        limit: exprs.Expr | None = None,
+        sample_clause: SampleClause | None = None,
         ignore_errors: bool = False,
-        exact_version_only: Optional[list[catalog.TableVersionHandle]] = None,
+        exact_version_only: list[catalog.TableVersionHandle] | None = None,
     ) -> exec.ExecNode:
-        """Return plan for executing a query.
+        """
+        Return plan for executing a query.
+        The plan:
+        - materializes the values of select_list exprs into their respective slots
+        - materializes cell values of 'columns' (and their cellmd, if applicable) into DataRow.cell_vals/cell_md
         Updates 'select_list' in place to make it executable.
         TODO: make exact_version_only a flag and use the versions from tbl
         """
         if select_list is None:
             select_list = []
+        if columns is None:
+            columns = []
         if order_by_clause is None:
             order_by_clause = []
         if exact_version_only is None:
@@ -850,6 +968,7 @@ class Planner:
             row_builder=row_builder,
             analyzer=analyzer,
             eval_ctx=eval_ctx,
+            columns=columns,
             limit=limit,
             with_pk=True,
             exact_version_only=exact_version_only,
@@ -865,9 +984,10 @@ class Planner:
         row_builder: exprs.RowBuilder,
         analyzer: Analyzer,
         eval_ctx: exprs.RowBuilder.EvalCtx,
+        columns: list[catalog.Column] | None = None,
         limit: Optional[exprs.Expr] = None,
         with_pk: bool = False,
-        exact_version_only: Optional[list[catalog.TableVersionHandle]] = None,
+        exact_version_only: list[catalog.TableVersionHandle] | None = None,
     ) -> exec.ExecNode:
         """
         Create plan to materialize eval_ctx.
@@ -877,6 +997,8 @@ class Planner:
                 in the context of that table version (eg, if 'tbl' is a view, 'plan_target' might be the base)
         TODO: make exact_version_only a flag and use the versions from tbl
         """
+        if columns is None:
+            columns = []
         if exact_version_only is None:
             exact_version_only = []
         sql_elements = analyzer.sql_elements
@@ -934,8 +1056,15 @@ class Planner:
                     traverse_matches=False,
                 )
             )
             plan = exec.SqlScanNode(
-                tbl, row_builder, select_list=tbl_scan_exprs, set_pk=with_pk, exact_version_only=exact_version_only
+                tbl,
+                row_builder,
+                select_list=tbl_scan_exprs,
+                columns=[c for c in columns if c.tbl.id == tbl.tbl_id],
+                set_pk=with_pk,
+                cell_md_col_refs=cls._cell_md_col_refs(tbl_scan_exprs),
+                exact_version_only=exact_version_only,
             )
             tbl_scan_plans.append(plan)
@@ -966,7 +1095,8 @@ class Planner:
                 stratify_exprs=analyzer.stratify_exprs,
             )
-        plan = cls._insert_prefetch_node(tbl.tbl_version.id, row_builder.unique_exprs, plan)
+        plan = cls._add_prefetch_node(tbl.tbl_version.id, row_builder.unique_exprs, plan)
+        plan = cls._add_cell_reconstruction_node(analyzer.all_exprs, plan)
         if analyzer.group_by_clause is not None:
             # we're doing grouping aggregation; the input of the AggregateNode are the grouping exprs plus the
@@ -1010,7 +1140,7 @@ class Planner:
                 if not agg_output.issuperset(exprs.ExprSet(eval_ctx.target_exprs)):
                     # we need an ExprEvalNode to evaluate the remaining output exprs
                     plan = exec.ExprEvalNode(row_builder, eval_ctx.target_exprs, agg_output, input=plan)
-                plan = cls._insert_save_node(tbl.tbl_version.id, row_builder.stored_media_cols, input_node=plan)
+                plan = cls._add_save_node(plan)
         else:
             if not exprs.ExprSet(sql_exprs).issuperset(exprs.ExprSet(eval_ctx.target_exprs)):
                 # we need an ExprEvalNode to evaluate the remaining output exprs
@@ -1062,7 +1192,6 @@ class Planner:
         plan.ctx.ignore_errors = True
         computed_exprs = row_builder.output_exprs - row_builder.input_exprs
         plan.ctx.num_computed_exprs = len(computed_exprs)  # we are adding a computed column, so we need to evaluate it
-        plan = cls._insert_save_node(tbl.tbl_version.id, row_builder.stored_media_cols, input_node=plan)
+        plan = cls._add_save_node(plan)
         return plan

pixeltable/share/publish.py CHANGED Viewed

@@ -254,7 +254,6 @@ def _download_from_presigned_url(
         session.close()
-# TODO: This will be replaced by drop_table with cloud table uri
 def delete_replica(dest_path: str) -> None:
     """Delete cloud replica"""
     delete_request_json = {'operation_type': 'delete_snapshot', 'table_uri': dest_path}

pixeltable/store.py CHANGED Viewed

@@ -274,7 +274,7 @@ class StoreBase:
                 self.sa_md.remove(tmp_tbl)
                 tmp_tbl.drop(bind=conn)
-            run_cleanup(remove_tmp_tbl, raise_error=True)
+            run_cleanup(remove_tmp_tbl, raise_error=False)
         return num_excs
@@ -321,7 +321,7 @@ class StoreBase:
                     table_row, num_row_exc = row_builder.create_store_table_row(row, cols_with_excs, pk)
                     num_excs += num_row_exc
-                    if show_progress:
+                    if show_progress and Env.get().verbosity >= 1:
                         if progress_bar is None:
                             warnings.simplefilter('ignore', category=TqdmWarning)
                             progress_bar = tqdm(

pixeltable/type_system.py CHANGED Viewed

@@ -25,6 +25,7 @@ import sqlalchemy as sql
 from typing_extensions import _AnnotatedAlias
 import pixeltable.exceptions as excs
+from pixeltable.env import Env
 from pixeltable.utils import parse_local_file_path
@@ -673,8 +674,9 @@ class TimestampType(ColumnType):
     def _create_literal(self, val: Any) -> Any:
         if isinstance(val, str):
             return datetime.datetime.fromisoformat(val)
-        if isinstance(val, datetime.datetime):
-            return val
+        # Place naive timestamps in the default time zone
+        if isinstance(val, datetime.datetime) and val.tzinfo is None:
+            return val.replace(tzinfo=Env.get().default_time_zone)
         return val
@@ -760,7 +762,7 @@ class JsonType(ColumnType):
     @classmethod
     def __is_valid_json(cls, val: Any) -> bool:
-        if val is None or isinstance(val, (str, int, float, bool)):
+        if val is None or isinstance(val, (str, int, float, bool, np.ndarray, PIL.Image.Image)):
             return True
         if isinstance(val, (list, tuple)):
             return all(cls.__is_valid_json(v) for v in val)

pixeltable/utils/console_output.py CHANGED Viewed

@@ -1,6 +1,8 @@
 import logging
 from typing import TextIO
+from pixeltable import exceptions as excs
 def map_level(verbosity: int) -> int:
     """
@@ -19,7 +21,8 @@ def map_level(verbosity: int) -> int:
         return logging.INFO
     if verbosity == 2:
         return logging.DEBUG
-    return logging.INFO
+    raise excs.Error(f'Invalid verbosity level: {verbosity}')
 class ConsoleOutputHandler(logging.StreamHandler):

pixeltable/utils/exception_handler.py CHANGED Viewed

@@ -1,32 +1,9 @@
 import logging
-import sys
 from typing import Any, Callable, Optional, TypeVar
 R = TypeVar('R')
-def _is_in_exception() -> bool:
-    """
-    Check if code is currently executing within an exception context.
-    """
-    current_exception = sys.exc_info()[1]
-    return current_exception is not None
-def run_cleanup_on_exception(cleanup_func: Callable[..., R], *args: Any, **kwargs: Any) -> Optional[R]:
-    """
-    Runs cleanup only when running in exception context.
-    The function `run_cleanup_on_exception()` should be used to clean up resources when an operation fails.
-    This is typically done using a try, except, and finally block, with the resource cleanup logic placed within
-    the except block. However, this pattern may not handle KeyboardInterrupt exceptions.
-    To ensure that resources are always cleaned up at least once when an exception or KeyboardInterrupt occurs,
-    create an idempotent function for cleaning up resources and pass it to the `run_cleanup_on_exception()` function
-    from the finally block.
-    """
-    if _is_in_exception():
-        return run_cleanup(cleanup_func, *args, raise_error=False, **kwargs)
-    return None
+logger = logging.getLogger('pixeltable')
 def run_cleanup(cleanup_func: Callable[..., R], *args: Any, raise_error: bool = True, **kwargs: Any) -> Optional[R]:
@@ -40,20 +17,20 @@ def run_cleanup(cleanup_func: Callable[..., R], *args: Any, raise_error: bool =
         raise_error: raise an exception if an error occurs during cleanup.
     """
     try:
-        logging.debug(f'Running cleanup function: {cleanup_func.__name__!r}')
+        logger.debug(f'Running cleanup function: {cleanup_func.__name__!r}')
         return cleanup_func(*args, **kwargs)
     except KeyboardInterrupt as interrupt:
         # Save original exception and re-attempt cleanup
         original_exception = interrupt
-        logging.debug(f'Cleanup {cleanup_func.__name__!r} interrupted, retrying')
+        logger.debug(f'Cleanup {cleanup_func.__name__!r} interrupted, retrying')
         try:
             return cleanup_func(*args, **kwargs)
         except Exception as e:
             # Suppress this exception
-            logging.error(f'Cleanup {cleanup_func.__name__!r} failed with exception {e}')
+            logger.error(f'Cleanup {cleanup_func.__name__!r} failed with exception {e.__class__}: {e}')
         raise KeyboardInterrupt from original_exception
     except Exception as e:
-        logging.error(f'Cleanup {cleanup_func.__name__!r} failed with exception {e}')
+        logger.error(f'Cleanup {cleanup_func.__name__!r} failed with exception {e.__class__}: {e}')
         if raise_error:
             raise e
     return None

pixeltable/utils/image.py ADDED Viewed

@@ -0,0 +1,7 @@
+import PIL.Image
+def default_format(img: PIL.Image.Image) -> str:
+    # Default to JPEG unless the image has a transparency layer (which isn't supported by JPEG).
+    # In that case, use WebP instead.
+    return 'webp' if img.has_transparency_data else 'jpeg'

pixeltable/utils/misc.py ADDED Viewed

@@ -0,0 +1,5 @@
+from typing import Any
+def non_none_dict_factory(d: list[tuple[str, Any]]) -> dict:
+    return {k: v for (k, v) in d if v is not None}

{pixeltable-0.4.14.dist-info → pixeltable-0.4.16.dist-info}/METADATA RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.4
 Name: pixeltable
-Version: 0.4.14
+Version: 0.4.16
 Summary: AI Data Infrastructure: Declarative, Multimodal, and Incremental
 Project-URL: homepage, https://pixeltable.com/
 Project-URL: repository, https://github.com/pixeltable/pixeltable
@@ -53,6 +53,7 @@ Requires-Dist: sqlalchemy>=2.0.23
 Requires-Dist: tenacity>=8.2
 Requires-Dist: toml>=0.10
 Requires-Dist: tqdm>=4.64
+Requires-Dist: tzlocal>=5.0
 Description-Content-Type: text/markdown
 <picture class="github-only">

pixeltable 0.4.14__py3-none-any.whl → 0.4.16__py3-none-any.whl

Potentially problematic release.

pixeltable 0.4.14py3-none-any.whl → 0.4.16py3-none-any.whl