PyPI - pixeltable - Versions diffs - 0.4.6__py3-none-any.whl → 0.4.8__py3-none-any.whl - Mend

pixeltable 0.4.6py3-none-any.whl → 0.4.8py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.

This version of pixeltable might be problematic. Click here for more details.

Files changed (69) hide show

pixeltable/__init__.py +4 -2
pixeltable/catalog/__init__.py +1 -1
pixeltable/catalog/catalog.py +7 -9
pixeltable/catalog/column.py +49 -0
pixeltable/catalog/insertable_table.py +0 -7
pixeltable/catalog/schema_object.py +1 -14
pixeltable/catalog/table.py +180 -67
pixeltable/catalog/table_version.py +42 -146
pixeltable/catalog/table_version_path.py +6 -5
pixeltable/catalog/view.py +2 -1
pixeltable/config.py +24 -9
pixeltable/dataframe.py +5 -6
pixeltable/env.py +113 -21
pixeltable/exec/aggregation_node.py +1 -1
pixeltable/exec/cache_prefetch_node.py +4 -3
pixeltable/exec/exec_node.py +0 -8
pixeltable/exec/expr_eval/expr_eval_node.py +2 -2
pixeltable/exec/expr_eval/globals.py +1 -0
pixeltable/exec/expr_eval/schedulers.py +52 -19
pixeltable/exec/in_memory_data_node.py +2 -3
pixeltable/exprs/array_slice.py +2 -2
pixeltable/exprs/data_row.py +15 -2
pixeltable/exprs/expr.py +9 -9
pixeltable/exprs/function_call.py +61 -23
pixeltable/exprs/globals.py +1 -2
pixeltable/exprs/json_path.py +3 -3
pixeltable/exprs/row_builder.py +25 -21
pixeltable/exprs/string_op.py +3 -3
pixeltable/func/expr_template_function.py +6 -3
pixeltable/func/query_template_function.py +2 -2
pixeltable/func/signature.py +30 -3
pixeltable/func/tools.py +2 -2
pixeltable/functions/anthropic.py +76 -27
pixeltable/functions/deepseek.py +5 -1
pixeltable/functions/gemini.py +11 -2
pixeltable/functions/globals.py +2 -2
pixeltable/functions/huggingface.py +6 -12
pixeltable/functions/llama_cpp.py +9 -1
pixeltable/functions/openai.py +76 -55
pixeltable/functions/video.py +59 -6
pixeltable/functions/vision.py +2 -2
pixeltable/globals.py +86 -13
pixeltable/io/datarows.py +3 -3
pixeltable/io/fiftyone.py +7 -7
pixeltable/io/globals.py +3 -3
pixeltable/io/hf_datasets.py +4 -4
pixeltable/io/label_studio.py +2 -1
pixeltable/io/pandas.py +6 -6
pixeltable/io/parquet.py +3 -3
pixeltable/io/table_data_conduit.py +2 -2
pixeltable/io/utils.py +2 -2
pixeltable/iterators/audio.py +3 -2
pixeltable/iterators/document.py +2 -8
pixeltable/iterators/video.py +49 -9
pixeltable/plan.py +0 -16
pixeltable/share/packager.py +51 -42
pixeltable/share/publish.py +134 -7
pixeltable/store.py +5 -25
pixeltable/type_system.py +5 -8
pixeltable/utils/__init__.py +2 -2
pixeltable/utils/arrow.py +5 -5
pixeltable/utils/description_helper.py +3 -3
pixeltable/utils/iceberg.py +1 -2
pixeltable/utils/media_store.py +131 -66
{pixeltable-0.4.6.dist-info → pixeltable-0.4.8.dist-info}/METADATA +238 -122
{pixeltable-0.4.6.dist-info → pixeltable-0.4.8.dist-info}/RECORD +69 -69
{pixeltable-0.4.6.dist-info → pixeltable-0.4.8.dist-info}/WHEEL +0 -0
{pixeltable-0.4.6.dist-info → pixeltable-0.4.8.dist-info}/entry_points.txt +0 -0
{pixeltable-0.4.6.dist-info → pixeltable-0.4.8.dist-info}/licenses/LICENSE +0 -0

pixeltable/io/hf_datasets.py CHANGED Viewed

@@ -1,7 +1,7 @@
 from __future__ import annotations
 import typing
-from typing import Any, Optional, Union
+from typing import Any, Optional
 import pixeltable as pxt
 import pixeltable.type_system as ts
@@ -66,7 +66,7 @@ def _to_pixeltable_type(feature_type: Any, nullable: bool) -> Optional[ts.Column
         return None
-def _get_hf_schema(dataset: Union[datasets.Dataset, datasets.DatasetDict]) -> datasets.Features:
+def _get_hf_schema(dataset: datasets.Dataset | datasets.DatasetDict) -> datasets.Features:
     """Get the schema of a huggingface dataset as a dictionary."""
     import datasets
@@ -91,10 +91,10 @@ def huggingface_schema_to_pxt_schema(
 def import_huggingface_dataset(
     table_path: str,
-    dataset: Union[datasets.Dataset, datasets.DatasetDict],
+    dataset: datasets.Dataset | datasets.DatasetDict,
     *,
     schema_overrides: Optional[dict[str, Any]] = None,
-    primary_key: Optional[Union[str, list[str]]] = None,
+    primary_key: str | list[str] | None = None,
     **kwargs: Any,
 ) -> pxt.Table:
     """Create a new base table from a Huggingface dataset, or dataset dict with multiple splits.

pixeltable/io/label_studio.py CHANGED Viewed

@@ -19,6 +19,7 @@ from pixeltable.config import Config
 from pixeltable.exprs import ColumnRef, DataRow, Expr
 from pixeltable.io.external_store import Project
 from pixeltable.utils import coco
+from pixeltable.utils.media_store import TempStore
 # label_studio_sdk>=1 and label_studio_sdk<1 are not compatible, so we need to try
 # the import two different ways to insure intercompatibility
@@ -215,7 +216,7 @@ class LabelStudioProject(Project):
                 else:
                     # No localpath; create a temp file and upload it
                     assert isinstance(row[media_col_idx], PIL.Image.Image)
-                    file = env.Env.get().create_tmp_path(extension='.png')
+                    file = TempStore.create_path(extension='.png')
                     row[media_col_idx].save(file, format='png')
                     task_id = self.project.import_tasks(file)[0]
                     os.remove(file)

pixeltable/io/pandas.py CHANGED Viewed

@@ -1,5 +1,5 @@
 import os
-from typing import Any, Optional, Union
+from typing import Any, Optional
 import numpy as np
 import pandas as pd
@@ -17,7 +17,7 @@ def import_pandas(
     df: pd.DataFrame,
     *,
     schema_overrides: Optional[dict[str, Any]] = None,
-    primary_key: Optional[Union[str, list[str]]] = None,
+    primary_key: str | list[str] | None = None,
     num_retained_versions: int = 10,
     comment: str = '',
 ) -> pxt.Table:
@@ -55,9 +55,9 @@ def import_pandas(
 def import_csv(
     tbl_name: str,
-    filepath_or_buffer: Union[str, os.PathLike],
+    filepath_or_buffer: str | os.PathLike,
     schema_overrides: Optional[dict[str, Any]] = None,
-    primary_key: Optional[Union[str, list[str]]] = None,
+    primary_key: str | list[str] | None = None,
     num_retained_versions: int = 10,
     comment: str = '',
     **kwargs: Any,
@@ -84,10 +84,10 @@ def import_csv(
 def import_excel(
     tbl_name: str,
-    io: Union[str, os.PathLike],
+    io: str | os.PathLike,
     *,
     schema_overrides: Optional[dict[str, Any]] = None,
-    primary_key: Optional[Union[str, list[str]]] = None,
+    primary_key: str | list[str] | None = None,
     num_retained_versions: int = 10,
     comment: str = '',
     **kwargs: Any,

pixeltable/io/parquet.py CHANGED Viewed

@@ -7,7 +7,7 @@ import logging
 import typing
 from collections import deque
 from pathlib import Path
-from typing import Any, Optional, Union
+from typing import Any, Optional
 import numpy as np
 import PIL.Image
@@ -42,7 +42,7 @@ def _write_batch(value_batch: dict[str, deque], schema: pa.Schema, output_path:
 def export_parquet(
-    table_or_df: Union[pxt.Table, pxt.DataFrame],
+    table_or_df: pxt.Table | pxt.DataFrame,
     parquet_path: Path,
     partition_size_bytes: int = 100_000_000,
     inline_images: bool = False,
@@ -152,7 +152,7 @@ def import_parquet(
     *,
     parquet_path: str,
     schema_overrides: Optional[dict[str, Any]] = None,
-    primary_key: Optional[Union[str, list[str]]] = None,
+    primary_key: str | list[str] | None = None,
     **kwargs: Any,
 ) -> pxt.Table:
     """Creates a new base table from a Parquet file or set of files. Requires pyarrow to be installed.

pixeltable/io/table_data_conduit.py CHANGED Viewed

@@ -8,7 +8,7 @@ import urllib.parse
 import urllib.request
 from dataclasses import dataclass, field, fields
 from pathlib import Path
-from typing import TYPE_CHECKING, Any, Iterable, Iterator, Literal, Optional, Union, cast
+from typing import TYPE_CHECKING, Any, Iterable, Iterator, Literal, Optional, cast
 import pandas as pd
 from pyarrow.parquet import ParquetDataset
@@ -325,7 +325,7 @@ class JsonTableDataConduit(TableDataConduit):
 class HFTableDataConduit(TableDataConduit):
-    hf_ds: Optional[Union[datasets.Dataset, datasets.DatasetDict]] = None
+    hf_ds: datasets.Dataset | datasets.DatasetDict | None = None
     column_name_for_split: Optional[str] = None
     categorical_features: dict[str, dict[int, str]]
     dataset_dict: dict[str, datasets.Dataset] = None

pixeltable/io/utils.py CHANGED Viewed

@@ -1,5 +1,5 @@
 from keyword import iskeyword as is_python_keyword
-from typing import Any, Optional, Union
+from typing import Any, Optional
 import pixeltable as pxt
 import pixeltable.exceptions as excs
@@ -21,7 +21,7 @@ def normalize_pxt_col_name(name: str) -> str:
     return id
-def normalize_primary_key_parameter(primary_key: Optional[Union[str, list[str]]] = None) -> list[str]:
+def normalize_primary_key_parameter(primary_key: str | list[str] | None = None) -> list[str]:
     if primary_key is None:
         primary_key = []
     elif isinstance(primary_key, str):

pixeltable/iterators/audio.py CHANGED Viewed

@@ -5,7 +5,8 @@ from typing import Any, ClassVar, Optional
 import av
-from pixeltable import env, exceptions as excs, type_system as ts
+from pixeltable import exceptions as excs, type_system as ts
+from pixeltable.utils.media_store import TempStore
 from .base import ComponentIterator
@@ -149,7 +150,7 @@ class AudioSplitter(ComponentIterator):
         target_chunk_start, target_chunk_end = self.chunks_to_extract_in_pts[self.next_pos]
         chunk_start_pts = 0
         chunk_end_pts = 0
-        chunk_file = str(env.Env.get().create_tmp_path(self.audio_path.suffix))
+        chunk_file = str(TempStore.create_path(extension=self.audio_path.suffix))
         output_container = av.open(chunk_file, mode='w')
         input_stream = self.container.streams.audio[0]
         codec_name = AudioSplitter.__codec_map.get(input_stream.codec_context.name, input_stream.codec_context.name)

pixeltable/iterators/document.py CHANGED Viewed

@@ -1,7 +1,7 @@
 import dataclasses
 import enum
 import logging
-from typing import Any, ClassVar, Iterable, Iterator, Optional, Union
+from typing import Any, ClassVar, Iterable, Iterator, Optional
 import ftfy
@@ -213,12 +213,6 @@ class DocumentSplitter(ComponentIterator):
             if kwargs.get('limit') is None:
                 raise Error('limit is required with "token_limit"/"char_limit" separators')
-        # check dependencies at the end
-        if Separator.SENTENCE in separators:
-            _ = Env.get().spacy_nlp
-        if Separator.TOKEN_LIMIT in separators:
-            Env.get().require_package('tiktoken')
         return schema, []
     def __next__(self) -> dict[str, Any]:
@@ -273,7 +267,7 @@ class DocumentSplitter(ComponentIterator):
                 yield DocumentSection(text=full_text, metadata=md)
                 accumulated_text = []
-        def process_element(el: Union[bs4.element.Tag, bs4.NavigableString]) -> Iterator[DocumentSection]:
+        def process_element(el: bs4.element.Tag | bs4.NavigableString) -> Iterator[DocumentSection]:
             # process the element and emit sections as necessary
             nonlocal accumulated_text, headings, sourceline, emit_on_heading, emit_on_paragraph

pixeltable/iterators/video.py CHANGED Viewed

@@ -29,12 +29,29 @@ class FrameIterator(ComponentIterator):
             extracted). If `fps` is greater than the frame rate of the video, an error will be raised.
         num_frames: Exact number of frames to extract. The frames will be spaced as evenly as possible. If
             `num_frames` is greater than the number of frames in the video, all frames will be extracted.
+        all_frame_attrs:
+            If True, outputs a `pxt.Json` column `frame_attrs` with the following `pyav`-provided attributes
+            (for more information, see `pyav`'s documentation on
+            [VideoFrame](https://pyav.org/docs/develop/api/video.html#module-av.video.frame) and
+            [Frame](https://pyav.org/docs/develop/api/frame.html)):
+            * `index` (`int`)
+            * `pts` (`Optional[int]`)
+            * `dts` (`Optional[int]`)
+            * `time` (`Optional[float]`)
+            * `is_corrupt` (`bool`)
+            * `key_frame` (`bool`)
+            * `pict_type` (`int`)
+            * `interlaced_frame` (`bool`)
+            If False, only outputs frame attributes `frame_idx`, `pos_msec`, and `pos_frame` as separate columns.
     """
     # Input parameters
     video_path: Path
     fps: Optional[float]
     num_frames: Optional[int]
+    all_frame_attrs: bool
     # Video info
     container: av.container.input.InputContainer
@@ -50,7 +67,14 @@ class FrameIterator(ComponentIterator):
     # frame index in the video. Otherwise, the corresponding video index is `frames_to_extract[next_pos]`.
     next_pos: int
-    def __init__(self, video: str, *, fps: Optional[float] = None, num_frames: Optional[int] = None):
+    def __init__(
+        self,
+        video: str,
+        *,
+        fps: Optional[float] = None,
+        num_frames: Optional[int] = None,
+        all_frame_attrs: bool = False,
+    ):
         if fps is not None and num_frames is not None:
             raise excs.Error('At most one of `fps` or `num_frames` may be specified')
@@ -60,6 +84,7 @@ class FrameIterator(ComponentIterator):
         self.container = av.open(str(video_path))
         self.fps = fps
         self.num_frames = num_frames
+        self.all_frame_attrs = all_frame_attrs
         self.video_framerate = self.container.streams.video[0].average_rate
         self.video_time_base = self.container.streams.video[0].time_base
@@ -115,16 +140,17 @@ class FrameIterator(ComponentIterator):
             'video': ts.VideoType(nullable=False),
             'fps': ts.FloatType(nullable=True),
             'num_frames': ts.IntType(nullable=True),
+            'all_frame_attrs': ts.BoolType(nullable=False),
         }
     @classmethod
     def output_schema(cls, *args: Any, **kwargs: Any) -> tuple[dict[str, ts.ColumnType], list[str]]:
-        return {
-            'frame_idx': ts.IntType(),
-            'pos_msec': ts.FloatType(),
-            'pos_frame': ts.IntType(),
-            'frame': ts.ImageType(),
-        }, ['frame']
+        attrs: dict[str, ts.ColumnType]
+        if kwargs.get('all_frame_attrs'):
+            attrs = {'frame_attrs': ts.JsonType()}
+        else:
+            attrs = {'frame_idx': ts.IntType(), 'pos_msec': ts.FloatType(), 'pos_frame': ts.IntType()}
+        return {**attrs, 'frame': ts.ImageType()}, ['frame']
     def __next__(self) -> dict[str, Any]:
         # Determine the frame index in the video corresponding to the iterator index `next_pos`;
@@ -164,8 +190,22 @@ class FrameIterator(ComponentIterator):
                 raise excs.Error(f'Frame {next_video_idx} is missing from the video (video file is corrupt)')
             img = frame.to_image()
             assert isinstance(img, PIL.Image.Image)
-            pos_msec = float(pts * self.video_time_base * 1000)
-            result = {'frame_idx': self.next_pos, 'pos_msec': pos_msec, 'pos_frame': video_idx, 'frame': img}
+            pts_msec = float(pts * self.video_time_base * 1000)
+            result: dict[str, Any] = {'frame': img}
+            if self.all_frame_attrs:
+                attrs = {
+                    'index': video_idx,
+                    'pts': frame.pts,
+                    'dts': frame.dts,
+                    'time': frame.time,
+                    'is_corrupt': frame.is_corrupt,
+                    'key_frame': frame.key_frame,
+                    'pict_type': frame.pict_type,
+                    'interlaced_frame': frame.interlaced_frame,
+                }
+                result['frame_attrs'] = attrs
+            else:
+                result.update({'frame_idx': self.next_pos, 'pos_msec': pts_msec, 'pos_frame': video_idx})
             self.next_pos += 1
             return result

pixeltable/plan.py CHANGED Viewed

@@ -394,9 +394,6 @@ class Planner:
                 row_builder, computed_exprs, plan.output_exprs, input=plan, maintain_input_order=False
             )
-        stored_col_info = row_builder.output_slot_idxs()
-        stored_img_col_info = [info for info in stored_col_info if info.col.col_type.is_image_type()]
-        plan.set_stored_img_cols(stored_img_col_info)
         plan.set_ctx(
             exec.ExecContext(
                 row_builder,
@@ -428,10 +425,6 @@ class Planner:
             col = tbl.cols_by_name[col_name]
             plan.row_builder.add_table_column(col, expr.slot_idx)
-        stored_col_info = plan.row_builder.output_slot_idxs()
-        stored_img_col_info = [info for info in stored_col_info if info.col.col_type.is_image_type()]
-        plan.set_stored_img_cols(stored_img_col_info)
         plan.set_ctx(
             exec.ExecContext(
                 plan.row_builder, batch_size=0, show_pbar=True, num_computed_exprs=0, ignore_errors=ignore_errors
@@ -657,10 +650,6 @@ class Planner:
         for i, col in enumerate(copied_cols + list(recomputed_cols)):  # same order as select_list
             plan.row_builder.add_table_column(col, select_list[i].slot_idx)
         # TODO: avoid duplication with view_load_plan() logic (where does this belong?)
-        stored_img_col_info = [
-            info for info in plan.row_builder.output_slot_idxs() if info.col.col_type.is_image_type()
-        ]
-        plan.set_stored_img_cols(stored_img_col_info)
         return plan
     @classmethod
@@ -727,8 +716,6 @@ class Planner:
                 row_builder, output_exprs=view_output_exprs, input_exprs=base_output_exprs, input=plan
             )
-        stored_img_col_info = [info for info in row_builder.output_slot_idxs() if info.col.col_type.is_image_type()]
-        plan.set_stored_img_cols(stored_img_col_info)
         exec_ctx.ignore_errors = True
         plan.set_ctx(exec_ctx)
         return plan, len(row_builder.default_eval_ctx.target_exprs)
@@ -1053,7 +1040,4 @@ class Planner:
         computed_exprs = row_builder.output_exprs - row_builder.input_exprs
         plan.ctx.num_computed_exprs = len(computed_exprs)  # we are adding a computed column, so we need to evaluate it
-        # we want to flush images
-        if col.is_computed and col.is_stored and col.col_type.is_image_type():
-            plan.set_stored_img_cols(row_builder.output_slot_idxs())
         return plan

pixeltable/share/packager.py CHANGED Viewed

@@ -24,7 +24,7 @@ from pixeltable.env import Env
 from pixeltable.metadata import schema
 from pixeltable.utils import sha256sum
 from pixeltable.utils.formatter import Formatter
-from pixeltable.utils.media_store import MediaStore
+from pixeltable.utils.media_store import MediaStore, TempStore
 _logger = logging.getLogger('pixeltable')
@@ -57,7 +57,7 @@ class TablePackager:
     def __init__(self, table: catalog.Table, additional_md: Optional[dict[str, Any]] = None) -> None:
         self.table = table
-        self.tmp_dir = Path(Env.get().create_tmp_path())
+        self.tmp_dir = TempStore.create_path()
         self.media_files = {}
         # Load metadata
@@ -92,10 +92,10 @@ class TablePackager:
         self.bundle_path = self.__build_tarball()
         _logger.info('Extracting preview data.')
-        self.md['count'] = self.table.count()
+        self.md['row_count'] = self.table.count()
         preview_header, preview = self.__extract_preview_data()
         self.md['preview_header'] = preview_header
-        self.md['preview'] = preview
+        self.md['preview_data'] = preview
         _logger.info(f'Packaging complete: {self.bundle_path}')
         return self.bundle_path
@@ -335,7 +335,7 @@ class TableRestorer:
     def __init__(self, tbl_path: str, md: Optional[dict[str, Any]] = None) -> None:
         self.tbl_path = tbl_path
         self.md = md
-        self.tmp_dir = Path(Env.get().create_tmp_path())
+        self.tmp_dir = TempStore.create_path()
         self.media_files = {}
     def restore(self, bundle_path: Path) -> pxt.Table:
@@ -459,42 +459,51 @@ class TableRestorer:
             for col_name, col in temp_cols.items()
             if col_name not in system_col_names and col_name not in media_col_names
         ]
-        mismatch_predicates = [store_col != temp_col for store_col, temp_col in zip(value_store_cols, value_temp_cols)]
-        mismatch_clause = sql.or_(*mismatch_predicates)
-        # This query looks for rows that have matching primary keys (rowid + pos_k + v_min), but differ in at least
-        # one value column. Pseudo-SQL:
-        #
-        # SELECT store_tbl.col_0, ..., store_tbl.col_n, temp_tbl.col_0, ...,  temp_tbl.col_n
-        # FROM store_tbl, temp_tbl
-        # WHERE store_tbl.rowid = temp_tbl.rowid
-        #     AND store_tbl.pos_0 = temp_tbl.pos_0
-        #     AND ... AND store_tbl.pos_k = temp_tbl.pos_k
-        #     AND store_tbl.v_min = temp_tbl.v_min
-        #     AND (
-        #         store_tbl.col_0 != temp_tbl.col_0
-        #         OR store_tbl.col_1 != temp_tbl.col_1
-        #         OR ... OR store_tbl.col_n != temp_tbl.col_n
-        #     )
-        #
-        # The value column comparisons (store_tbl.col_0 != temp_tbl.col_0, etc.) will always be false for rows where
-        # either column is NULL; this is what we want, since it may indicate a column that is present in one version
-        # but not the other.
-        q = sql.select(*value_store_cols, *value_temp_cols).where(pk_clause).where(mismatch_clause)
-        _logger.debug(q.compile())
-        result = conn.execute(q)
-        if result.rowcount > 0:
-            _logger.debug(
-                f'Data corruption error between {temp_sa_tbl_name!r} and {store_sa_tbl_name!r}: '
-                f'{result.rowcount} inconsistent row(s).'
-            )
-            row = result.first()
-            _logger.debug('Example mismatch:')
-            _logger.debug(f'{store_sa_tbl_name}: {row[: len(value_store_cols)]}')
-            _logger.debug(f'{temp_sa_tbl_name}: {row[len(value_store_cols) :]}')
-            raise excs.Error(
-                'Data corruption error: the replica data are inconsistent with data retrieved from a previous replica.'
-            )
+        q: sql.Executable
+        assert len(value_store_cols) == len(value_temp_cols)
+        if len(value_store_cols) > 0:
+            mismatch_predicates = [
+                store_col != temp_col for store_col, temp_col in zip(value_store_cols, value_temp_cols)
+            ]
+            mismatch_clause = sql.or_(*mismatch_predicates)
+            # This query looks for rows that have matching primary keys (rowid + pos_k + v_min), but differ in at least
+            # one value column. Pseudo-SQL:
+            #
+            # SELECT store_tbl.col_0, ..., store_tbl.col_n, temp_tbl.col_0, ...,  temp_tbl.col_n
+            # FROM store_tbl, temp_tbl
+            # WHERE store_tbl.rowid = temp_tbl.rowid
+            #     AND store_tbl.pos_0 = temp_tbl.pos_0
+            #     AND ... AND store_tbl.pos_k = temp_tbl.pos_k
+            #     AND store_tbl.v_min = temp_tbl.v_min
+            #     AND (
+            #         store_tbl.col_0 != temp_tbl.col_0
+            #         OR store_tbl.col_1 != temp_tbl.col_1
+            #         OR ... OR store_tbl.col_n != temp_tbl.col_n
+            #     )
+            #
+            # The value column comparisons (store_tbl.col_0 != temp_tbl.col_0, etc.) will always be false for rows where
+            # either column is NULL; this is what we want, since it may indicate a column that is present in one version
+            # but not the other.
+            q = sql.select(*value_store_cols, *value_temp_cols).where(pk_clause).where(mismatch_clause)
+            _logger.debug(q.compile())
+            result = conn.execute(q)
+            if result.rowcount > 0:
+                _logger.debug(
+                    f'Data corruption error between {temp_sa_tbl_name!r} and {store_sa_tbl_name!r}: '
+                    f'{result.rowcount} inconsistent row(s).'
+                )
+                row = result.first()
+                _logger.debug('Example mismatch:')
+                _logger.debug(f'{store_sa_tbl_name}: {row[: len(value_store_cols)]}')
+                _logger.debug(f'{temp_sa_tbl_name}: {row[len(value_store_cols) :]}')
+                raise excs.Error(
+                    'Data corruption error: '
+                    'the replica data are inconsistent with data retrieved from a previous replica.'
+                )
         _logger.debug(f'Verified data integrity between {store_sa_tbl_name!r} and {temp_sa_tbl_name!r}.')
         # Now rectify the v_max values in the temporary table.
@@ -610,7 +619,7 @@ class TableRestorer:
                 # in self.media_files.
                 src_path = self.tmp_dir / 'media' / parsed_url.netloc
                 # Move the file to the media store and update the URL.
-                self.media_files[url] = MediaStore.relocate_local_media_file(src_path, media_col)
+                self.media_files[url] = MediaStore.get().relocate_local_media_file(src_path, media_col)
             return self.media_files[url]
         # For any type of URL other than a local file, just return the URL as-is.
         return url

pixeltable 0.4.6__py3-none-any.whl → 0.4.8__py3-none-any.whl

Potentially problematic release.

pixeltable 0.4.6py3-none-any.whl → 0.4.8py3-none-any.whl