PyPI - pixeltable - Versions diffs - 0.4.2__py3-none-any.whl → 0.4.4__py3-none-any.whl - Mend

pixeltable 0.4.2py3-none-any.whl → 0.4.4py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.

This version of pixeltable might be problematic. Click here for more details.

Files changed (60) hide show

pixeltable/__init__.py +1 -0
pixeltable/__version__.py +2 -2
pixeltable/catalog/__init__.py +3 -11
pixeltable/catalog/catalog.py +575 -220
pixeltable/catalog/column.py +22 -23
pixeltable/catalog/dir.py +1 -2
pixeltable/catalog/globals.py +2 -148
pixeltable/catalog/insertable_table.py +15 -13
pixeltable/catalog/path.py +6 -0
pixeltable/catalog/schema_object.py +9 -4
pixeltable/catalog/table.py +96 -85
pixeltable/catalog/table_version.py +257 -174
pixeltable/catalog/table_version_path.py +1 -1
pixeltable/catalog/tbl_ops.py +44 -0
pixeltable/catalog/update_status.py +179 -0
pixeltable/catalog/view.py +50 -56
pixeltable/config.py +76 -12
pixeltable/dataframe.py +19 -6
pixeltable/env.py +50 -4
pixeltable/exec/data_row_batch.py +3 -1
pixeltable/exec/exec_node.py +7 -24
pixeltable/exec/expr_eval/schedulers.py +134 -7
pixeltable/exec/in_memory_data_node.py +6 -7
pixeltable/exprs/column_property_ref.py +21 -9
pixeltable/exprs/column_ref.py +7 -2
pixeltable/exprs/function_call.py +2 -2
pixeltable/exprs/row_builder.py +10 -9
pixeltable/exprs/rowid_ref.py +0 -4
pixeltable/func/function.py +3 -3
pixeltable/functions/audio.py +36 -9
pixeltable/functions/gemini.py +4 -4
pixeltable/functions/openai.py +1 -2
pixeltable/functions/video.py +59 -16
pixeltable/globals.py +109 -24
pixeltable/io/__init__.py +1 -1
pixeltable/io/datarows.py +2 -1
pixeltable/io/external_store.py +3 -55
pixeltable/io/globals.py +4 -4
pixeltable/io/hf_datasets.py +10 -2
pixeltable/io/label_studio.py +16 -16
pixeltable/io/pandas.py +1 -0
pixeltable/io/table_data_conduit.py +12 -13
pixeltable/iterators/audio.py +17 -8
pixeltable/iterators/image.py +5 -2
pixeltable/metadata/__init__.py +1 -1
pixeltable/metadata/converters/convert_39.py +125 -0
pixeltable/metadata/converters/util.py +3 -0
pixeltable/metadata/notes.py +1 -0
pixeltable/metadata/schema.py +50 -1
pixeltable/plan.py +4 -0
pixeltable/share/packager.py +20 -38
pixeltable/store.py +40 -51
pixeltable/type_system.py +2 -2
pixeltable/utils/coroutine.py +6 -23
pixeltable/utils/media_store.py +50 -0
{pixeltable-0.4.2.dist-info → pixeltable-0.4.4.dist-info}/METADATA +1 -1
{pixeltable-0.4.2.dist-info → pixeltable-0.4.4.dist-info}/RECORD +60 -57
{pixeltable-0.4.2.dist-info → pixeltable-0.4.4.dist-info}/LICENSE +0 -0
{pixeltable-0.4.2.dist-info → pixeltable-0.4.4.dist-info}/WHEEL +0 -0
{pixeltable-0.4.2.dist-info → pixeltable-0.4.4.dist-info}/entry_points.txt +0 -0

pixeltable/io/table_data_conduit.py CHANGED Viewed

@@ -47,13 +47,13 @@ class TableDataConduitFormat(str, enum.Enum):
 @dataclass
 class TableDataConduit:
-    source: TableDataSource
+    source: 'TableDataSource'
     source_format: Optional[str] = None
     source_column_map: Optional[dict[str, str]] = None
     if_row_exists: Literal['update', 'ignore', 'error'] = 'error'
-    pxt_schema: Optional[dict[str, Any]] = None
-    src_schema_overrides: Optional[dict[str, Any]] = None
-    src_schema: Optional[dict[str, Any]] = None
+    pxt_schema: Optional[dict[str, ts.ColumnType]] = None
+    src_schema_overrides: Optional[dict[str, ts.ColumnType]] = None
+    src_schema: Optional[dict[str, ts.ColumnType]] = None
     pxt_pk: Optional[list[str]] = None
     src_pk: Optional[list[str]] = None
     valid_rows: Optional[RowData] = None
@@ -87,7 +87,7 @@ class TableDataConduit:
         for name, coltype in self.pxt_schema.items():
             self.pxt_schema[name] = ts.ColumnType.normalize_type(coltype)
-    def infer_schema(self) -> dict[str, Any]:
+    def infer_schema(self) -> dict[str, ts.ColumnType]:
         raise NotImplementedError
     def valid_row_batch(self) -> Iterator[RowData]:
@@ -137,7 +137,7 @@ class DFTableDataConduit(TableDataConduit):
         t.pxt_df = tds.source
         return t
-    def infer_schema(self) -> dict[str, Any]:
+    def infer_schema(self) -> dict[str, ts.ColumnType]:
         self.pxt_schema = self.pxt_df.schema
         self.pxt_pk = self.src_pk
         return self.pxt_schema
@@ -168,7 +168,7 @@ class RowDataTableDataConduit(TableDataConduit):
         t.batch_count = 0
         return t
-    def infer_schema(self) -> dict[str, Any]:
+    def infer_schema(self) -> dict[str, ts.ColumnType]:
         from .datarows import _infer_schema_from_rows
         if self.source_column_map is None:
@@ -239,7 +239,7 @@ class PandasTableDataConduit(TableDataConduit):
         t.batch_count = 0
         return t
-    def infer_schema_part1(self) -> tuple[dict[str, Any], list[str]]:
+    def infer_schema_part1(self) -> tuple[dict[str, ts.ColumnType], list[str]]:
         """Return inferred schema, inferred primary key, and source column map"""
         if self.source_column_map is None:
             if self.src_schema_overrides is None:
@@ -252,7 +252,7 @@ class PandasTableDataConduit(TableDataConduit):
         else:
             raise NotImplementedError()
-    def infer_schema(self) -> dict[str, Any]:
+    def infer_schema(self) -> dict[str, ts.ColumnType]:
         self.pxt_schema, self.pxt_pk = self.infer_schema_part1()
         self.normalize_pxt_schema_types()
         _df_check_primary_key_values(self.pd_df, self.src_pk)
@@ -328,7 +328,6 @@ class HFTableDataConduit(TableDataConduit):
     hf_ds: Optional[Union[datasets.Dataset, datasets.DatasetDict]] = None
     column_name_for_split: Optional[str] = None
     categorical_features: dict[str, dict[int, str]]
-    hf_schema: dict[str, Any] = None
     dataset_dict: dict[str, datasets.Dataset] = None
     hf_schema_source: dict[str, Any] = None
@@ -356,7 +355,7 @@ class HFTableDataConduit(TableDataConduit):
         except ImportError:
             return False
-    def infer_schema_part1(self) -> tuple[dict[str, Any], list[str]]:
+    def infer_schema_part1(self) -> tuple[dict[str, ts.ColumnType], list[str]]:
         from pixeltable.io.hf_datasets import _get_hf_schema, huggingface_schema_to_pxt_schema
         if self.source_column_map is None:
@@ -469,7 +468,7 @@ class ParquetTableDataConduit(TableDataConduit):
         t.pq_ds = parquet.ParquetDataset(str(input_path))
         return t
-    def infer_schema_part1(self) -> tuple[dict[str, Any], list[str]]:
+    def infer_schema_part1(self) -> tuple[dict[str, ts.ColumnType], list[str]]:
         from pixeltable.utils.arrow import ar_infer_schema
         if self.source_column_map is None:
@@ -483,7 +482,7 @@ class ParquetTableDataConduit(TableDataConduit):
         else:
             raise NotImplementedError()
-    def infer_schema(self) -> dict[str, Any]:
+    def infer_schema(self) -> dict[str, ts.ColumnType]:
         self.pxt_schema, self.pxt_pk = self.infer_schema_part1()
         self.normalize_pxt_schema_types()
         self.prepare_insert()

pixeltable/iterators/audio.py CHANGED Viewed

@@ -1,5 +1,4 @@
 import logging
-import uuid
 from fractions import Fraction
 from pathlib import Path
 from typing import Any, ClassVar, Optional
@@ -55,12 +54,9 @@ class AudioSplitter(ComponentIterator):
     def __init__(
         self, audio: str, chunk_duration_sec: float, *, overlap_sec: float = 0.0, min_chunk_duration_sec: float = 0.0
     ):
-        if chunk_duration_sec <= 0.0:
-            raise excs.Error('chunk_duration_sec must be a positive number')
-        if chunk_duration_sec < min_chunk_duration_sec:
-            raise excs.Error('chunk_duration_sec must be at least min_chunk_duration_sec')
-        if overlap_sec >= chunk_duration_sec:
-            raise excs.Error('overlap_sec must be less than chunk_duration_sec')
+        assert chunk_duration_sec > 0.0
+        assert chunk_duration_sec >= min_chunk_duration_sec
+        assert overlap_sec < chunk_duration_sec
         audio_path = Path(audio)
         assert audio_path.exists() and audio_path.is_file()
         self.audio_path = audio_path
@@ -128,6 +124,19 @@ class AudioSplitter(ComponentIterator):
     @classmethod
     def output_schema(cls, *args: Any, **kwargs: Any) -> tuple[dict[str, ts.ColumnType], list[str]]:
+        param_names = ['chunk_duration_sec', 'min_chunk_duration_sec', 'overlap_sec']
+        params = dict(zip(param_names, args))
+        params.update(kwargs)
+        chunk_duration_sec = params['chunk_duration_sec']
+        min_chunk_duration_sec = params.get('min_chunk_duration_sec', 0.0)
+        overlap_sec = params.get('overlap_sec', 0.0)
+        if chunk_duration_sec <= 0.0:
+            raise excs.Error('chunk_duration_sec must be a positive number')
+        if chunk_duration_sec < min_chunk_duration_sec:
+            raise excs.Error('chunk_duration_sec must be at least min_chunk_duration_sec')
+        if overlap_sec >= chunk_duration_sec:
+            raise excs.Error('overlap_sec must be less than chunk_duration_sec')
         return {
             'start_time_sec': ts.FloatType(),
             'end_time_sec': ts.FloatType(),
@@ -140,7 +149,7 @@ class AudioSplitter(ComponentIterator):
         target_chunk_start, target_chunk_end = self.chunks_to_extract_in_pts[self.next_pos]
         chunk_start_pts = 0
         chunk_end_pts = 0
-        chunk_file = str(env.Env.get().tmp_dir / f'{uuid.uuid4()}{self.audio_path.suffix}')
+        chunk_file = str(env.Env.get().create_tmp_path(self.audio_path.suffix))
         output_container = av.open(chunk_file, mode='w')
         input_stream = self.container.streams.audio[0]
         codec_name = AudioSplitter.__codec_map.get(input_stream.codec_context.name, input_stream.codec_context.name)

pixeltable/iterators/image.py CHANGED Viewed

@@ -31,8 +31,7 @@ class TileIterator(ComponentIterator):
     __j: int
     def __init__(self, image: PIL.Image.Image, *, tile_size: tuple[int, int], overlap: tuple[int, int] = (0, 0)):
-        if overlap[0] >= tile_size[0] or overlap[1] >= tile_size[1]:
-            raise excs.Error(f'overlap dimensions {overlap} are not strictly smaller than tile size {tile_size}')
+        assert overlap[0] < tile_size[0] and overlap[1] < tile_size[1]
         self.__image = image
         self.__image.load()
@@ -79,4 +78,8 @@ class TileIterator(ComponentIterator):
     @classmethod
     def output_schema(cls, *args: Any, **kwargs: Any) -> tuple[dict[str, ts.ColumnType], list[str]]:
+        tile_size = kwargs.get('tile_size')
+        overlap = kwargs.get('overlap', (0, 0))
+        if overlap[0] >= tile_size[0] or overlap[1] >= tile_size[1]:
+            raise excs.Error(f'overlap dimensions {overlap} are not strictly smaller than tile size {tile_size}')
         return {'tile': ts.ImageType(), 'tile_coord': ts.JsonType(), 'tile_box': ts.JsonType()}, ['tile']

pixeltable/metadata/__init__.py CHANGED Viewed

@@ -18,7 +18,7 @@ _console_logger = ConsoleLogger(logging.getLogger('pixeltable'))
 _logger = logging.getLogger('pixeltable')
 # current version of the metadata; this is incremented whenever the metadata schema changes
-VERSION = 39
+VERSION = 40
 def create_system_info(engine: sql.engine.Engine) -> None:

pixeltable/metadata/converters/convert_39.py ADDED Viewed

@@ -0,0 +1,125 @@
+import logging
+from typing import Optional
+from uuid import UUID
+import sqlalchemy as sql
+from pixeltable.metadata import register_converter
+from pixeltable.metadata.converters.util import convert_table_md
+_logger = logging.getLogger('pixeltable')
+@register_converter(version=39)
+def _(engine: sql.engine.Engine) -> None:
+    convert_table_md(engine, table_modifier=__table_modifier)
+def __table_modifier(conn: sql.Connection, tbl_id: UUID, orig_table_md: dict, updated_table_md: dict) -> None:
+    store_prefix = 'view' if orig_table_md['view_md'] is not None else 'tbl'
+    store_name = f'{store_prefix}_{tbl_id.hex}'
+    # Get the list of column names that need to be migrated
+    col_names = find_error_columns(conn=conn, store_name=store_name)
+    if len(col_names) == 0:
+        _logger.info(f'No error columns found in table {store_name}. Skipping migration.')
+        return
+    # Check if the table exists, outside of the metadata we were given
+    # There seem to be cases where the metadata is present in the catalog,
+    # but the table itself is not in the database.
+    check_table_sql = sql.text(f"""
+        SELECT EXISTS (
+            SELECT 1
+            FROM information_schema.tables
+            WHERE table_name = '{store_name}'
+        )
+    """)
+    table_exists = conn.execute(check_table_sql).scalar()
+    if not table_exists:
+        _logger.warning(f'Table {store_name} does not exist. Skipping migration.')
+        return
+    return migrate_error_to_cellmd_columns(conn, store_name, col_names)
+def find_error_columns(conn: sql.Connection, store_name: str) -> list[str]:
+    """
+    Return and errormsg or errortype columns in the given table
+    Args:
+        conn: SQLAlchemy connection
+        store_name: Name of the table to check
+    Returns:
+        List of column name roots (root_errormsg, root_errortype)
+    """
+    check_columns_sql = sql.text(f"""
+        SELECT column_name
+        FROM information_schema.columns
+        WHERE table_name = '{store_name}'
+    """)
+    found_columns = [
+        row[0]
+        for row in conn.execute(check_columns_sql)
+        if row[0].endswith('_errormsg') or row[0].endswith('_errortype')
+    ]
+    column_roots = {s.removesuffix('_errormsg').removesuffix('_errortype') for s in found_columns}
+    return [*column_roots]
+def migrate_error_to_cellmd_columns(
+    conn: sql.Connection, store_name: str, col_names: list[str], backup_table: Optional[str] = None
+) -> None:
+    """
+    Safe version with error handling and optional backup.
+    Args:
+        engine: SQLAlchemy engine
+        store_name: Name of the table to modify
+        col_names: List of column name prefixes
+        backup_table: Optional name for backup table
+    Usage:
+        migrate_error_to_cellmd_columns(engine, 'my_table', ['columnname'], 'my_table_backup')
+    """
+    try:
+        # Optional: Create backup
+        if backup_table:
+            backup_sql = sql.text(f"""
+                CREATE TABLE {backup_table} AS SELECT * FROM {store_name}
+            """)
+            conn.execute(backup_sql)
+            _logger.info(f'Backup created: {backup_table}')
+        # Step 1: Add new columns
+        add_column_str = ', '.join(f'ADD COLUMN {col}_cellmd JSONB DEFAULT NULL' for col in col_names)
+        add_column_sql = sql.text(f'ALTER TABLE {store_name} {add_column_str}')
+        conn.execute(add_column_sql)
+        _logger.info(f'Added columns: {", ".join(f"{col}_cellmd" for col in col_names)}')
+        # Step 2: Populate new columns
+        set_column_str = ', '.join(
+            [
+                f'{col}_cellmd = CASE WHEN {col}_errormsg IS NULL OR {col}_errortype IS NULL '
+                f"THEN NULL ELSE jsonb_build_object('errormsg', {col}_errormsg, 'errortype', {col}_errortype) END"
+                for col in col_names
+            ]
+        )
+        populate_sql = sql.text(f'UPDATE {store_name} SET {set_column_str}')
+        result = conn.execute(populate_sql)
+        _logger.info(f'Updated {result.rowcount} rows')
+        # Step 3: Drop old columns
+        drop_columns_str = ', '.join(
+            [f'DROP COLUMN IF EXISTS {col}_errormsg, DROP COLUMN IF EXISTS {col}_errortype' for col in col_names]
+        )
+        drop_columns_sql = sql.text(f'ALTER TABLE {store_name} {drop_columns_str}')
+        conn.execute(drop_columns_sql)
+        _logger.info(f'Dropped columns: {", ".join(f"{col}_errormsg, {col}_errortype" for col in col_names)}')
+        _logger.info(f'Migration completed successfully for table: {store_name}')
+    except sql.exc.SQLAlchemyError as e:
+        _logger.error(f'Migration for table {store_name} failed: {e}')
+        raise

pixeltable/metadata/converters/util.py CHANGED Viewed

@@ -16,6 +16,7 @@ def convert_table_md(
     column_md_updater: Optional[Callable[[dict], None]] = None,
     external_store_md_updater: Optional[Callable[[dict], None]] = None,
     substitution_fn: Optional[Callable[[Optional[str], Any], Optional[tuple[Optional[str], Any]]]] = None,
+    table_modifier: Optional[Callable[[sql.Connection, UUID, dict, dict], None]] = None,
 ) -> None:
     """
     Converts schema.TableMd dicts based on the specified conversion functions.
@@ -50,6 +51,8 @@ def convert_table_md(
             if updated_table_md != table_md:
                 __logger.info(f'Updating schema for table: {tbl_id}')
                 conn.execute(sql.update(Table).where(Table.id == tbl_id).values(md=updated_table_md))
+            if table_modifier is not None:
+                table_modifier(conn, tbl_id, table_md, updated_table_md)
         for row in conn.execute(sql.select(Function)):
             fn_id = row[0]

pixeltable/metadata/notes.py CHANGED Viewed

@@ -2,6 +2,7 @@
 # rather than as a comment, so that the existence of a description can be enforced by
 # the unit tests when new versions are added.
 VERSION_NOTES = {
+    40: 'Convert error property columns to cellmd columns',
     39: 'ColumnHandles in external stores',
     38: 'Added TableMd.view_sn',
     37: 'Add support for the sample() method on DataFrames',

pixeltable/metadata/schema.py CHANGED Viewed

@@ -8,6 +8,8 @@ from sqlalchemy import BigInteger, ForeignKey, Integer, LargeBinary, orm
 from sqlalchemy.dialects.postgresql import JSONB, UUID
 from sqlalchemy.orm.decl_api import DeclarativeMeta
+from ..catalog.update_status import UpdateStatus
 # Base has to be marked explicitly as a type, in order to be used elsewhere as a type hint. But in addition to being
 # a type, it's also a `DeclarativeMeta`. The following pattern enables us to expose both `Base` and `Base.metadata`
 # outside of the module in a typesafe way.
@@ -180,6 +182,7 @@ class TableMd:
     # sequence number to track changes in the set of mutable views of this table (ie, this table = the view base)
     # - incremented for each add/drop of a mutable view
     # - only maintained for mutable tables
+    # TODO: replace with mutable_views: list[UUID] to help with debugging
     view_sn: int
     # Metadata format for external stores:
@@ -191,6 +194,26 @@ class TableMd:
     view_md: Optional[ViewMd]
     additional_md: dict[str, Any]
+    has_pending_ops: bool = False
+    @property
+    def is_snapshot(self) -> bool:
+        return self.view_md is not None and self.view_md.is_snapshot
+    @property
+    def is_mutable(self) -> bool:
+        return not self.is_snapshot and not self.is_replica
+    @property
+    def is_pure_snapshot(self) -> bool:
+        return (
+            self.view_md is not None
+            and self.view_md.is_snapshot
+            and self.view_md.sample_clause is None
+            and self.view_md.predicate is None
+            and len(self.column_md) == 0
+        )
 class Table(Base):
     """
@@ -219,7 +242,9 @@ class TableVersionMd:
     created_at: float  # time.time()
     version: int
     schema_version: int
-    additional_md: dict[str, Any]
+    user: Optional[str] = None  # User that created this version
+    update_status: Optional[UpdateStatus] = None  # UpdateStatus of the change that created this version
+    additional_md: dict[str, Any] = dataclasses.field(default_factory=dict)
 class TableVersion(Base):
@@ -275,6 +300,22 @@ class TableSchemaVersion(Base):
     md: orm.Mapped[dict[str, Any]] = orm.mapped_column(JSONB, nullable=False)  # TableSchemaVersionMd
+class PendingTableOp(Base):
+    """
+    Table operation that needs to be completed before the table can be used.
+    Operations need to be completed in order of increasing seq_num.
+    """
+    __tablename__ = 'pendingtableops'
+    tbl_id: orm.Mapped[uuid.UUID] = orm.mapped_column(
+        UUID(as_uuid=True), ForeignKey('tables.id'), primary_key=True, nullable=False
+    )
+    op_sn: orm.Mapped[int] = orm.mapped_column(Integer, primary_key=True, nullable=False)  # catalog.TableOp.op_sn
+    op: orm.Mapped[dict[str, Any]] = orm.mapped_column(JSONB, nullable=False)  # catalog.TableOp
 @dataclasses.dataclass
 class FunctionMd:
     name: str
@@ -308,6 +349,14 @@ class FullTableMd(NamedTuple):
     version_md: TableVersionMd
     schema_version_md: TableSchemaVersionMd
+    @property
+    def is_pure_snapshot(self) -> bool:
+        return (
+            self.tbl_md.view_md is not None
+            and self.tbl_md.view_md.predicate is None
+            and len(self.schema_version_md.columns) == 0
+        )
     def as_dict(self) -> dict[str, Any]:
         return {
             'table_id': self.tbl_md.tbl_id,

pixeltable/plan.py CHANGED Viewed

@@ -512,6 +512,7 @@ class Planner:
         # update row builder with column information
         for i, col in enumerate(all_base_cols):
             plan.row_builder.add_table_column(col, select_list[i].slot_idx)
+        plan.ctx.num_computed_exprs = len(recomputed_exprs)
         recomputed_user_cols = [c for c in recomputed_cols if c.name is not None]
         return plan, [f'{c.tbl.name}.{c.name}' for c in updated_cols + recomputed_user_cols], recomputed_user_cols
@@ -659,6 +660,7 @@ class Planner:
             ignore_errors=True,
             exact_version_only=view.get_bases(),
         )
+        plan.ctx.num_computed_exprs = len(recomputed_exprs)
         for i, col in enumerate(copied_cols + list(recomputed_cols)):  # same order as select_list
             plan.row_builder.add_table_column(col, select_list[i].slot_idx)
         # TODO: avoid duplication with view_load_plan() logic (where does this belong?)
@@ -1057,6 +1059,8 @@ class Planner:
         plan.ctx.batch_size = 16
         plan.ctx.show_pbar = True
         plan.ctx.ignore_errors = True
+        computed_exprs = row_builder.output_exprs - row_builder.input_exprs
+        plan.ctx.num_computed_exprs = len(computed_exprs)  # we are adding a computed column, so we need to evaluate it
         # we want to flush images
         if col.is_computed and col.is_stored and col.col_type.is_image_type():

pixeltable/share/packager.py CHANGED Viewed

@@ -361,49 +361,32 @@ class TableRestorer:
             )
         tbl_md = [schema.FullTableMd.from_dict(t) for t in self.md['md']['tables']]
+        for md in tbl_md:
+            md.tbl_md.is_replica = True
-        # Create the replica table
-        # The logic here needs to be completely restructured in order to make it concurrency-safe.
-        # - Catalog.create_replica() needs to write the metadata and also create the physical store tables
-        #   and populate them, otherwise concurrent readers will see an inconsistent state (table metadata w/o
-        #   an actual table)
-        # - this could be done one replica at a time (instead of the entire hierarchy)
         cat = catalog.Catalog.get()
-        cat.create_replica(catalog.Path(self.tbl_path), tbl_md)
-        # don't call get_table() until after the calls to create_replica() and __import_table() below;
-        # the TV instances created by get_table() would be replaced by create_replica(), which creates duplicate
-        # TV instances for the same replica version, which then leads to failures when constructing queries
-        # Now we need to instantiate and load data for replica_tbl and its ancestors, except that we skip
-        # replica_tbl itself if it's a pure snapshot.
-        target_md = tbl_md[0]
-        is_pure_snapshot = (
-            target_md.tbl_md.view_md is not None
-            and target_md.tbl_md.view_md.predicate is None
-            and len(target_md.schema_version_md.columns) == 0
-        )
-        if is_pure_snapshot:
-            ancestor_md = tbl_md[1:]  # Pure snapshot; skip replica_tbl
-        else:
-            ancestor_md = tbl_md  # Not a pure snapshot; include replica_tbl
-        # Instantiate data from the Parquet tables.
-        with Env.get().begin_xact():
-            for md in ancestor_md[::-1]:  # Base table first
-                # Create a TableVersion instance (and a store table) for this ancestor.
-                tv = catalog.TableVersion.create_replica(md)
-                # Now import data from Parquet.
-                _logger.info(f'Importing table {tv.name!r}.')
-                self.__import_table(self.tmp_dir, tv, md)
-        with cat.begin_xact(for_write=False):
+        with cat.begin_xact(for_write=True):
+            # Create (or update) the replica table and its ancestors, along with TableVersion instances for any
+            # versions that have not been seen before.
+            cat.create_replica(catalog.Path(self.tbl_path), tbl_md)
+            # Now we need to load data for replica_tbl and its ancestors, except that we skip
+            # replica_tbl itself if it's a pure snapshot.
+            for md in tbl_md[::-1]:  # Base table first
+                if not md.is_pure_snapshot:
+                    tv = cat.get_tbl_version(UUID(md.tbl_md.tbl_id), md.version_md.version)
+                    # Import data from Parquet.
+                    _logger.info(f'Importing table {tv.name!r}.')
+                    self.__import_table(self.tmp_dir, tv, md)
             return cat.get_table_by_id(UUID(tbl_md[0].tbl_md.tbl_id))
     def __import_table(self, bundle_path: Path, tv: catalog.TableVersion, tbl_md: schema.FullTableMd) -> None:
         """
         Import the Parquet table into the Pixeltable catalog.
         """
-        tbl_id = uuid.UUID(tbl_md.tbl_md.tbl_id)
+        tbl_id = UUID(tbl_md.tbl_md.tbl_id)
         parquet_dir = bundle_path / 'tables' / f'tbl_{tbl_id.hex}'
         parquet_table = pq.read_table(str(parquet_dir))
         replica_version = tv.version
@@ -626,9 +609,8 @@ class TableRestorer:
                 # First time seeing this pxtmedia:// URL. Relocate the file to the media store and record the mapping
                 # in self.media_files.
                 src_path = self.tmp_dir / 'media' / parsed_url.netloc
-                dest_path = MediaStore.prepare_media_path(tv.id, media_col_id, tv.version, ext=src_path.suffix)
-                src_path.rename(dest_path)
-                self.media_files[url] = urllib.parse.urljoin('file:', urllib.request.pathname2url(str(dest_path)))
+                # Move the file to the media store and update the URL.
+                self.media_files[url] = MediaStore.relocate_local_media_file(src_path, tv.id, media_col_id, tv.version)
             return self.media_files[url]
         # For any type of URL other than a local file, just return the URL as-is.
         return url

pixeltable 0.4.2__py3-none-any.whl → 0.4.4__py3-none-any.whl

Potentially problematic release.

pixeltable 0.4.2py3-none-any.whl → 0.4.4py3-none-any.whl