PyPI - pixeltable - Versions diffs - 0.3.11__py3-none-any.whl → 0.3.13__py3-none-any.whl - Mend

pixeltable 0.3.11py3-none-any.whl → 0.3.13py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.

This version of pixeltable might be problematic. Click here for more details.

Files changed (53) hide show

pixeltable/__init__.py +2 -27
pixeltable/__version__.py +2 -2
pixeltable/catalog/catalog.py +309 -59
pixeltable/catalog/globals.py +5 -5
pixeltable/catalog/insertable_table.py +13 -1
pixeltable/catalog/path.py +13 -6
pixeltable/catalog/table.py +28 -41
pixeltable/catalog/table_version.py +100 -72
pixeltable/catalog/view.py +35 -9
pixeltable/dataframe.py +2 -2
pixeltable/exceptions.py +20 -2
pixeltable/exec/expr_eval/evaluators.py +0 -4
pixeltable/exec/expr_eval/expr_eval_node.py +0 -1
pixeltable/exec/sql_node.py +3 -3
pixeltable/exprs/json_path.py +1 -5
pixeltable/func/__init__.py +1 -1
pixeltable/func/aggregate_function.py +1 -1
pixeltable/func/callable_function.py +1 -1
pixeltable/func/expr_template_function.py +2 -2
pixeltable/func/function.py +3 -4
pixeltable/func/query_template_function.py +87 -4
pixeltable/func/tools.py +1 -1
pixeltable/func/udf.py +1 -1
pixeltable/functions/__init__.py +1 -0
pixeltable/functions/anthropic.py +1 -1
pixeltable/functions/bedrock.py +130 -0
pixeltable/functions/huggingface.py +7 -6
pixeltable/functions/image.py +15 -16
pixeltable/functions/mistralai.py +3 -2
pixeltable/functions/openai.py +9 -8
pixeltable/functions/together.py +4 -3
pixeltable/globals.py +7 -2
pixeltable/io/datarows.py +4 -3
pixeltable/io/label_studio.py +17 -17
pixeltable/io/pandas.py +13 -12
pixeltable/io/table_data_conduit.py +8 -2
pixeltable/metadata/__init__.py +1 -1
pixeltable/metadata/converters/convert_19.py +2 -2
pixeltable/metadata/converters/convert_31.py +11 -0
pixeltable/metadata/converters/convert_32.py +15 -0
pixeltable/metadata/converters/convert_33.py +17 -0
pixeltable/metadata/notes.py +3 -0
pixeltable/metadata/schema.py +26 -1
pixeltable/plan.py +2 -3
pixeltable/share/packager.py +9 -25
pixeltable/share/publish.py +20 -9
pixeltable/store.py +7 -4
pixeltable/utils/exception_handler.py +59 -0
{pixeltable-0.3.11.dist-info → pixeltable-0.3.13.dist-info}/METADATA +1 -1
{pixeltable-0.3.11.dist-info → pixeltable-0.3.13.dist-info}/RECORD +53 -48
{pixeltable-0.3.11.dist-info → pixeltable-0.3.13.dist-info}/WHEEL +1 -1
{pixeltable-0.3.11.dist-info → pixeltable-0.3.13.dist-info}/LICENSE +0 -0
{pixeltable-0.3.11.dist-info → pixeltable-0.3.13.dist-info}/entry_points.txt +0 -0

pixeltable/globals.py CHANGED Viewed

@@ -616,9 +616,14 @@ def _extract_paths(
         matches = [name for name, entry in dir_entries.items() if entry.dir is not None]
     else:
         matches = [name for name, entry in dir_entries.items() if entry.table is not None]
+    # Filter out system paths
+    matches = [name for name in matches if catalog.is_valid_identifier(name)]
     result = [parent.append(name) for name in matches]
-    for name, entry in [(name, entry) for name, entry in dir_entries.items() if len(entry.dir_entries) > 0]:
-        result.extend(_extract_paths(entry.dir_entries, parent=parent.append(name), entry_type=entry_type))
+    for name, entry in dir_entries.items():
+        if len(entry.dir_entries) > 0 and catalog.is_valid_identifier(name):
+            result.extend(_extract_paths(entry.dir_entries, parent=parent.append(name), entry_type=entry_type))
     return result

pixeltable/io/datarows.py CHANGED Viewed

@@ -3,13 +3,14 @@ from __future__ import annotations
 from typing import Any, Iterable, Optional, Union
 import pixeltable as pxt
+import pixeltable.type_system as ts
 from pixeltable import exceptions as excs
 def _infer_schema_from_rows(
     rows: Iterable[dict[str, Any]], schema_overrides: dict[str, Any], primary_key: list[str]
-) -> dict[str, pxt.ColumnType]:
-    schema: dict[str, pxt.ColumnType] = {}
+) -> dict[str, ts.ColumnType]:
+    schema: dict[str, ts.ColumnType] = {}
     cols_with_nones: set[str] = set()
     for n, row in enumerate(rows):
@@ -23,7 +24,7 @@ def _infer_schema_from_rows(
             elif value is not None:
                 # If `key` is not in `schema_overrides`, then we infer its type from the data.
                 # The column type will always be nullable by default.
-                col_type = pxt.ColumnType.infer_literal_type(value, nullable=col_name not in primary_key)
+                col_type = ts.ColumnType.infer_literal_type(value, nullable=col_name not in primary_key)
                 if col_type is None:
                     raise excs.Error(
                         f'Could not infer type for column `{col_name}`; the value in row {n} '

pixeltable/io/label_studio.py CHANGED Viewed

@@ -11,7 +11,7 @@ import label_studio_sdk  # type: ignore[import-untyped]
 import PIL.Image
 from requests.exceptions import HTTPError
-import pixeltable as pxt
+import pixeltable.type_system as ts
 from pixeltable import Column, Table, env, exceptions as excs
 from pixeltable.config import Config
 from pixeltable.exprs import ColumnRef, DataRow, Expr
@@ -89,21 +89,21 @@ class LabelStudioProject(Project):
     def __project_config(self) -> '_LabelStudioConfig':
         return self.__parse_project_config(self.project_params['label_config'])
-    def get_export_columns(self) -> dict[str, pxt.ColumnType]:
+    def get_export_columns(self) -> dict[str, ts.ColumnType]:
         """
         The data keys and preannotation fields specified in this Label Studio project.
         """
         return self.__project_config.export_columns
-    def get_import_columns(self) -> dict[str, pxt.ColumnType]:
+    def get_import_columns(self) -> dict[str, ts.ColumnType]:
         """
         Always contains a single entry:
         ```
-        {"annotations": pxt.JsonType(nullable=True)}
+        {"annotations": ts.JsonType(nullable=True)}
         ```
         """
-        return {ANNOTATIONS_COLUMN: pxt.JsonType(nullable=True)}
+        return {ANNOTATIONS_COLUMN: ts.JsonType(nullable=True)}
     def sync(self, t: Table, export_data: bool, import_data: bool) -> SyncStatus:
         _logger.info(
@@ -412,8 +412,8 @@ class LabelStudioProject(Project):
             # TODO(aaron-siegel): Simplify this once propagation is properly implemented in batch_update
             ancestor = t
             while local_annotations_col not in ancestor._tbl_version.get().cols:
-                assert ancestor._base is not None
-                ancestor = ancestor._base
+                assert ancestor._base_table is not None
+                ancestor = ancestor._base_table
             update_status = ancestor.batch_update(updates)
             env.Env.get().console_logger.info(f'Updated annotation(s) from {len(updates)} task(s) in {self}.')
             return SyncStatus(pxt_rows_updated=update_status.num_rows, num_excs=update_status.num_excs)
@@ -577,10 +577,10 @@ class LabelStudioProject(Project):
             else:
                 local_annotations_column = next(k for k, v in col_mapping.items() if v == ANNOTATIONS_COLUMN)
             if local_annotations_column not in t._schema:
-                t.add_columns({local_annotations_column: pxt.JsonType(nullable=True)})
+                t.add_columns({local_annotations_column: ts.JsonType(nullable=True)})
         resolved_col_mapping = cls.validate_columns(
-            t, config.export_columns, {ANNOTATIONS_COLUMN: pxt.JsonType(nullable=True)}, col_mapping
+            t, config.export_columns, {ANNOTATIONS_COLUMN: ts.JsonType(nullable=True)}, col_mapping
         )
         # Perform some additional validation
@@ -649,7 +649,7 @@ class LabelStudioProject(Project):
 @dataclass(frozen=True)
 class _DataKey:
     name: Optional[str]  # The 'name' attribute of the data key; may differ from the field name
-    column_type: pxt.ColumnType
+    column_type: ts.ColumnType
 @dataclass(frozen=True)
@@ -673,18 +673,18 @@ class _LabelStudioConfig:
                 )
     @property
-    def export_columns(self) -> dict[str, pxt.ColumnType]:
+    def export_columns(self) -> dict[str, ts.ColumnType]:
         data_key_cols = {key_id: key_info.column_type for key_id, key_info in self.data_keys.items()}
-        rl_cols = {name: pxt.JsonType() for name in self.rectangle_labels}
+        rl_cols = {name: ts.JsonType() for name in self.rectangle_labels}
         return {**data_key_cols, **rl_cols}
 ANNOTATIONS_COLUMN = 'annotations'
 _PAGE_SIZE = 100  # This is the default used in the LS SDK
 _LS_TAG_MAP = {
-    'header': pxt.StringType(),
-    'text': pxt.StringType(),
-    'image': pxt.ImageType(),
-    'video': pxt.VideoType(),
-    'audio': pxt.AudioType(),
+    'header': ts.StringType(),
+    'text': ts.StringType(),
+    'image': ts.ImageType(),
+    'video': ts.VideoType(),
+    'audio': ts.AudioType(),
 }

pixeltable/io/pandas.py CHANGED Viewed

@@ -8,6 +8,7 @@ from pandas.api.types import is_datetime64_any_dtype, is_extension_array_dtype
 import pixeltable as pxt
 import pixeltable.exceptions as excs
+import pixeltable.type_system as ts
 def import_pandas(
@@ -119,15 +120,15 @@ def _df_check_primary_key_values(df: pd.DataFrame, primary_key: list[str]) -> No
 def df_infer_schema(
-    df: pd.DataFrame, schema_overrides: dict[str, pxt.ColumnType], primary_key: list[str]
-) -> dict[str, pxt.ColumnType]:
+    df: pd.DataFrame, schema_overrides: dict[str, ts.ColumnType], primary_key: list[str]
+) -> dict[str, ts.ColumnType]:
     """
     Infers a Pixeltable schema from a Pandas DataFrame.
     Returns:
         A tuple containing a Pixeltable schema and a list of primary key column names.
     """
-    pd_schema: dict[str, pxt.ColumnType] = {}
+    pd_schema: dict[str, ts.ColumnType] = {}
     for pd_name, pd_dtype in zip(df.columns, df.dtypes):
         if pd_name in schema_overrides:
             pxt_type = schema_overrides[pd_name]
@@ -138,7 +139,7 @@ def df_infer_schema(
     return pd_schema
-def __pd_dtype_to_pxt_type(pd_dtype: DtypeObj, nullable: bool) -> Optional[pxt.ColumnType]:
+def __pd_dtype_to_pxt_type(pd_dtype: DtypeObj, nullable: bool) -> Optional[ts.ColumnType]:
     """
     Determines a pixeltable ColumnType from a pandas dtype
@@ -146,21 +147,21 @@ def __pd_dtype_to_pxt_type(pd_dtype: DtypeObj, nullable: bool) -> Optional[pxt.C
         pd_dtype: A pandas dtype object
     Returns:
-        pxt.ColumnType: A pixeltable ColumnType
+        ts.ColumnType: A pixeltable ColumnType
     """
     # Pandas extension arrays / types (Int64, boolean, string[pyarrow], etc.) are not directly
     # compatible with NumPy dtypes
     # The timezone-aware datetime64[ns, tz=] dtype is a pandas extension dtype
     if is_datetime64_any_dtype(pd_dtype):
-        return pxt.TimestampType(nullable=nullable)
+        return ts.TimestampType(nullable=nullable)
     if is_extension_array_dtype(pd_dtype):
         return None
     # Most other pandas dtypes are directly NumPy compatible
     assert isinstance(pd_dtype, np.dtype)
-    return pxt.ArrayType.from_np_dtype(pd_dtype, nullable)
+    return ts.ArrayType.from_np_dtype(pd_dtype, nullable)
-def __pd_coltype_to_pxt_type(pd_dtype: DtypeObj, data_col: pd.Series, nullable: bool) -> pxt.ColumnType:
+def __pd_coltype_to_pxt_type(pd_dtype: DtypeObj, data_col: pd.Series, nullable: bool) -> ts.ColumnType:
     """
     Infers a Pixeltable type based on a pandas dtype.
     """
@@ -176,12 +177,12 @@ def __pd_coltype_to_pxt_type(pd_dtype: DtypeObj, data_col: pd.Series, nullable:
         if len(data_col) == 0:
             # No non-null values; default to FloatType (the Pandas type of an all-NaN column)
-            return pxt.FloatType(nullable=nullable)
+            return ts.FloatType(nullable=nullable)
-        inferred_type = pxt.ColumnType.infer_common_literal_type(data_col)
+        inferred_type = ts.ColumnType.infer_common_literal_type(data_col)
         if inferred_type is None:
             # Fallback on StringType if everything else fails
-            return pxt.StringType(nullable=nullable)
+            return ts.StringType(nullable=nullable)
         else:
             return inferred_type.copy(nullable=nullable)
@@ -189,7 +190,7 @@ def __pd_coltype_to_pxt_type(pd_dtype: DtypeObj, data_col: pd.Series, nullable:
 def _df_row_to_pxt_row(
-    row: tuple[Any, ...], schema: dict[str, pxt.ColumnType], col_mapping: Optional[dict[str, str]]
+    row: tuple[Any, ...], schema: dict[str, ts.ColumnType], col_mapping: Optional[dict[str, str]]
 ) -> dict[str, Any]:
     """Convert a row to insertable format"""
     pxt_row: dict[str, Any] = {}

pixeltable/io/table_data_conduit.py CHANGED Viewed

@@ -15,6 +15,7 @@ from pyarrow.parquet import ParquetDataset
 import pixeltable as pxt
 import pixeltable.exceptions as excs
+import pixeltable.type_system as ts
 from pixeltable.io.pandas import _df_check_primary_key_values, _df_row_to_pxt_row, df_infer_schema
 from pixeltable.utils import parse_local_file_path
@@ -72,6 +73,11 @@ class TableDataConduit:
     def check_source_format(self) -> None:
         assert self.source_format is None or TableDataConduitFormat.is_valid(self.source_format)
+    def __post_init__(self) -> None:
+        """If no extra_fields were provided, initialize to empty dict"""
+        if self.extra_fields is None:
+            self.extra_fields = {}
     @classmethod
     def is_rowdata_structure(cls, d: TableDataSource) -> bool:
         if not isinstance(d, list) or len(d) == 0:
@@ -83,7 +89,7 @@ class TableDataConduit:
     def normalize_pxt_schema_types(self) -> None:
         for name, coltype in self.pxt_schema.items():
-            self.pxt_schema[name] = pxt.ColumnType.normalize_type(coltype)
+            self.pxt_schema[name] = ts.ColumnType.normalize_type(coltype)
     def infer_schema(self) -> dict[str, Any]:
         raise NotImplementedError
@@ -393,7 +399,7 @@ class HFTableDataConduit(TableDataConduit):
                         f'Column name `{self.column_name_for_split}` already exists in dataset schema;'
                         f'provide a different `column_name_for_split`'
                     )
-                self.src_schema[self.column_name_for_split] = pxt.StringType(nullable=True)
+                self.src_schema[self.column_name_for_split] = ts.StringType(nullable=True)
             inferred_schema, inferred_pk, self.source_column_map = normalize_schema_names(
                 self.src_schema, self.src_pk, self.src_schema_overrides, True

pixeltable/metadata/__init__.py CHANGED Viewed

@@ -16,7 +16,7 @@ _console_logger = ConsoleLogger(logging.getLogger('pixeltable'))
 # current version of the metadata; this is incremented whenever the metadata schema changes
-VERSION = 31
+VERSION = 34
 def create_system_info(engine: sql.engine.Engine) -> None:

pixeltable/metadata/converters/convert_19.py CHANGED Viewed

@@ -3,7 +3,7 @@ from typing import Any, Optional
 import sqlalchemy as sql
-import pixeltable as pxt
+import pixeltable.type_system as ts
 from pixeltable.metadata import register_converter, schema
 from pixeltable.metadata.converters.util import convert_table_md
@@ -34,7 +34,7 @@ def __update_timestamp_literals(k: Any, v: Any) -> Optional[tuple[Any, Any]]:
         # timestamp literal, which (in version 19) is stored in the DB as a naive datetime.
         # We convert it to an aware datetime, stored in UTC.
         assert v['_classname'] == 'Literal'
-        assert v['val_t'] == pxt.ColumnType.Type.TIMESTAMP.name
+        assert v['val_t'] == ts.ColumnType.Type.TIMESTAMP.name
         assert isinstance(v['val'], str)
         dt = datetime.datetime.fromisoformat(v['val'])
         assert dt.tzinfo is None  # In version 19 all timestamps are naive

pixeltable/metadata/converters/convert_31.py ADDED Viewed

@@ -0,0 +1,11 @@
+import sqlalchemy as sql
+from pixeltable.metadata import register_converter
+@register_converter(version=31)
+def _(engine: sql.engine.Engine) -> None:
+    # Add a column "lock_dummy: int8" to the dirs table in the store
+    # This column is the target of an UPDATE operation to synchronize directory operations
+    with engine.begin() as conn:
+        conn.execute(sql.text('ALTER TABLE dirs ADD COLUMN lock_dummy int8'))

pixeltable/metadata/converters/convert_32.py ADDED Viewed

@@ -0,0 +1,15 @@
+from uuid import UUID
+import sqlalchemy as sql
+from pixeltable.metadata import register_converter
+from pixeltable.metadata.converters.util import convert_table_md
+@register_converter(version=32)
+def _(engine: sql.engine.Engine) -> None:
+    convert_table_md(engine, table_md_updater=__update_table_md)
+def __update_table_md(table_md: dict, table_id: UUID) -> None:
+    table_md['is_replica'] = False

pixeltable/metadata/converters/convert_33.py ADDED Viewed

@@ -0,0 +1,17 @@
+from uuid import UUID
+import sqlalchemy as sql
+from pixeltable.metadata import register_converter
+from pixeltable.metadata.converters.util import convert_table_md
+@register_converter(version=33)
+def _(engine: sql.engine.Engine) -> None:
+    convert_table_md(engine, table_md_updater=__update_table_md)
+def __update_table_md(table_md: dict, table_id: UUID) -> None:
+    """Set default value of 'is_pk' field in column metadata to False"""
+    for col_md in table_md['column_md'].values():
+        col_md['is_pk'] = False if col_md['is_pk'] is None else col_md['is_pk']

pixeltable/metadata/notes.py CHANGED Viewed

@@ -2,6 +2,9 @@
 # rather than as a comment, so that the existence of a description can be enforced by
 # the unit tests when new versions are added.
 VERSION_NOTES = {
+    34: 'Set default value for is_pk field in column metadata to False',
+    33: 'Add is_replica field to table metadata',
+    32: 'Add the lock_dummy BIGINT column to the dirs table',
     31: 'Add table ids to metadata structs',
     30: 'Store default values and constant arguments as literals',
     29: 'Add user and additional_md fields to metadata structs',

pixeltable/metadata/schema.py CHANGED Viewed

@@ -1,7 +1,7 @@
 import dataclasses
 import typing
 import uuid
-from typing import Any, Optional, TypeVar, Union, get_type_hints
+from typing import Any, NamedTuple, Optional, TypeVar, Union, get_type_hints
 import sqlalchemy as sql
 from sqlalchemy import BigInteger, ForeignKey, Integer, LargeBinary, orm
@@ -84,6 +84,8 @@ class Dir(Base):
     )
     parent_id: orm.Mapped[uuid.UUID] = orm.mapped_column(UUID(as_uuid=True), ForeignKey('dirs.id'), nullable=True)
     md: orm.Mapped[dict[str, Any]] = orm.mapped_column(JSONB, nullable=False)  # DirMd
+    # This field is updated to synchronize database operations across multiple sessions
+    lock_dummy: orm.Mapped[int] = orm.mapped_column(BigInteger, nullable=True)
 @dataclasses.dataclass
@@ -155,6 +157,7 @@ class ViewMd:
 class TableMd:
     tbl_id: str  # uuid.UUID
     name: str
+    is_replica: bool
     user: Optional[str]
@@ -286,3 +289,25 @@ class Function(Base):
     dir_id: orm.Mapped[uuid.UUID] = orm.mapped_column(UUID(as_uuid=True), ForeignKey('dirs.id'), nullable=True)
     md: orm.Mapped[dict[str, Any]] = orm.mapped_column(JSONB, nullable=False)  # FunctionMd
     binary_obj: orm.Mapped[Optional[bytes]] = orm.mapped_column(LargeBinary, nullable=True)
+class FullTableMd(NamedTuple):
+    tbl_md: TableMd
+    version_md: TableVersionMd
+    schema_version_md: TableSchemaVersionMd
+    def as_dict(self) -> dict[str, Any]:
+        return {
+            'table_id': self.tbl_md.tbl_id,
+            'table_md': dataclasses.asdict(self.tbl_md),
+            'table_version_md': dataclasses.asdict(self.version_md),
+            'table_schema_version_md': dataclasses.asdict(self.schema_version_md),
+        }
+    @classmethod
+    def from_dict(cls, data_dict: dict[str, Any]) -> 'FullTableMd':
+        return FullTableMd(
+            tbl_md=md_from_dict(TableMd, data_dict['table_md']),
+            version_md=md_from_dict(TableVersionMd, data_dict['table_version_md']),
+            schema_version_md=md_from_dict(TableSchemaVersionMd, data_dict['table_schema_version_md']),
+        )

pixeltable/plan.py CHANGED Viewed

@@ -768,8 +768,7 @@ class Planner:
         # - select list subexprs that aren't aggregates
         # - join clause subexprs
         # - subexprs of Where clause conjuncts that can't be run in SQL
-        # - all grouping exprs, if any aggregate function call can't be run in SQL (in that case, they all have to be
-        #   run in Python)
+        # - all grouping exprs
         candidates = list(
             exprs.Expr.list_subexprs(
                 analyzer.select_list,
@@ -784,7 +783,7 @@ class Planner:
             candidates.extend(
                 exprs.Expr.subexprs(analyzer.filter, filter=sql_elements.contains, traverse_matches=False)
             )
-        if is_python_agg and analyzer.group_by_clause is not None:
+        if analyzer.group_by_clause is not None:
             candidates.extend(
                 exprs.Expr.list_subexprs(analyzer.group_by_clause, filter=sql_elements.contains, traverse_matches=False)
             )

pixeltable/share/packager.py CHANGED Viewed

@@ -1,4 +1,3 @@
-import dataclasses
 import io
 import json
 import logging
@@ -6,7 +5,6 @@ import tarfile
 import urllib.parse
 import urllib.request
 import uuid
-from datetime import datetime
 from pathlib import Path
 from typing import Any, Iterator, Optional
@@ -58,28 +56,14 @@ class TablePackager:
         self.tmp_dir = Path(Env.get().create_tmp_path())
         self.media_files = {}
-        # Generate metadata
-        self.md = {
-            'pxt_version': pxt.__version__,
-            'pxt_md_version': metadata.VERSION,
-            'md': {
-                'tables': [
-                    {
-                        'table_id': str(t._tbl_version.id),
-                        # These are temporary; will replace with a better solution once the concurrency
-                        # changes to catalog have been merged
-                        'table_md': dataclasses.asdict(t._tbl_version.get()._create_tbl_md()),
-                        'table_version_md': dataclasses.asdict(
-                            t._tbl_version.get()._create_version_md(datetime.now().timestamp())
-                        ),
-                        'table_schema_version_md': dataclasses.asdict(
-                            t._tbl_version.get()._create_schema_version_md(0)
-                        ),
-                    }
-                    for t in (table, *table._bases)
-                ]
-            },
-        }
+        # Load metadata
+        with Env.get().begin_xact():
+            tbl_md = catalog.Catalog.get().load_replica_md(table)
+            self.md = {
+                'pxt_version': pxt.__version__,
+                'pxt_md_version': metadata.VERSION,
+                'md': {'tables': [md.as_dict() for md in tbl_md]},
+            }
         if additional_md is not None:
             self.md.update(additional_md)
@@ -94,7 +78,7 @@ class TablePackager:
             json.dump(self.md, fp)
         self.iceberg_catalog = sqlite_catalog(self.tmp_dir / 'warehouse')
         with Env.get().begin_xact():
-            ancestors = (self.table, *self.table._bases)
+            ancestors = (self.table, *self.table._base_tables)
             for t in ancestors:
                 _logger.info(f"Exporting table '{t._path}'.")
                 self.__export_table(t)

pixeltable/share/publish.py CHANGED Viewed

@@ -1,4 +1,3 @@
-import os
 import sys
 import urllib.parse
 import urllib.request
@@ -10,22 +9,22 @@ from tqdm import tqdm
 import pixeltable as pxt
 from pixeltable import exceptions as excs
 from pixeltable.env import Env
+from pixeltable.metadata.schema import FullTableMd
 from pixeltable.utils import sha256sum
 from .packager import TablePackager
 # These URLs are abstracted out for now, but will be replaced with actual (hard-coded) URLs once the
 # pixeltable.com URLs are available.
-_PUBLISH_URL = os.environ.get('PIXELTABLE_PUBLISH_URL')
-_FINALIZE_URL = os.environ.get('PIXELTABLE_FINALIZE_URL')
+PIXELTABLE_API_URL = 'https://internal-api.pixeltable.com'
 def publish_snapshot(dest_tbl_uri: str, src_tbl: pxt.Table) -> str:
     packager = TablePackager(src_tbl, additional_md={'table_uri': dest_tbl_uri})
-    request_json = packager.md
-    headers_json = {'X-api-key': Env.get().pxt_api_key}
-    response = requests.post(_PUBLISH_URL, json=request_json, headers=headers_json)
+    request_json = packager.md | {'operation_type': 'publish_snapshot'}
+    headers_json = {'X-api-key': Env.get().pxt_api_key, 'Content-Type': 'application/json'}
+    response = requests.post(PIXELTABLE_API_URL, json=request_json, headers=headers_json)
     if response.status_code != 200:
         raise excs.Error(f'Error publishing snapshot: {response.text}')
     response_json = response.json()
@@ -47,14 +46,14 @@ def publish_snapshot(dest_tbl_uri: str, src_tbl: pxt.Table) -> str:
     Env.get().console_logger.info('Finalizing snapshot ...')
     finalize_request_json = {
+        'operation_type': 'finalize_snapshot',
         'upload_id': upload_id,
         'datafile': bundle.name,
         'size': bundle.stat().st_size,
         'sha256': sha256sum(bundle),  # Generate our own SHA for independent verification
     }
     # TODO: Use Pydantic for validation
-    finalize_response = requests.post(_FINALIZE_URL, json=finalize_request_json, headers=headers_json)
+    finalize_response = requests.post(PIXELTABLE_API_URL, json=finalize_request_json, headers=headers_json)
     if finalize_response.status_code != 200:
         raise excs.Error(f'Error finalizing snapshot: {finalize_response.text}')
     finalize_response_json = finalize_response.json()
@@ -66,6 +65,18 @@ def publish_snapshot(dest_tbl_uri: str, src_tbl: pxt.Table) -> str:
     return confirmed_tbl_uri
+def clone_snapshot(dest_tbl_uri: str) -> list[FullTableMd]:
+    headers_json = {'X-api-key': Env.get().pxt_api_key, 'Content-Type': 'application/json'}
+    clone_request_json = {'operation_type': 'clone_snapshot', 'table_uri': dest_tbl_uri}
+    response = requests.post(PIXELTABLE_API_URL, json=clone_request_json, headers=headers_json)
+    if response.status_code != 200:
+        raise excs.Error(f'Error cloning snapshot: {response.text}')
+    response_json = response.json()
+    if not isinstance(response_json, dict) or 'table_uri' not in response_json:
+        raise excs.Error(f'Unexpected response from server.\n{response_json}')
+    return [FullTableMd.from_dict(t) for t in response_json['md']['tables']]
 def _upload_bundle_to_s3(bundle: Path, parsed_location: urllib.parse.ParseResult) -> None:
     from pixeltable.utils.s3 import get_client

pixeltable/store.py CHANGED Viewed

@@ -16,6 +16,7 @@ from pixeltable import catalog, exceptions as excs, exprs
 from pixeltable.env import Env
 from pixeltable.exec import ExecNode
 from pixeltable.metadata import schema
+from pixeltable.utils.exception_handler import run_cleanup
 from pixeltable.utils.media_store import MediaStore
 from pixeltable.utils.sql import log_explain, log_stmt
@@ -232,7 +233,6 @@ class StoreBase:
         assert col.tbl.id == self.tbl_version.id
         num_excs = 0
         num_rows = 0
         # create temp table to store output of exec_plan, with the same primary key as the store table
         tmp_name = f'temp_{self._storage_name()}'
         tmp_pk_cols = [sql.Column(col.name, col.type, primary_key=True) for col in self.pk_columns()]
@@ -301,10 +301,13 @@ class StoreBase:
                 )
             log_explain(_logger, update_stmt, conn)
             conn.execute(update_stmt)
         finally:
-            tmp_tbl.drop(bind=conn)
-            self.sa_md.remove(tmp_tbl)
+            def remove_tmp_tbl() -> None:
+                self.sa_md.remove(tmp_tbl)
+                tmp_tbl.drop(bind=conn)
+            run_cleanup(remove_tmp_tbl, raise_error=True)
         return num_excs
     def insert_rows(

pixeltable/utils/exception_handler.py ADDED Viewed

@@ -0,0 +1,59 @@
+import logging
+import sys
+from typing import Any, Callable, Optional, TypeVar
+R = TypeVar('R')
+def _is_in_exception() -> bool:
+    """
+    Check if code is currently executing within an exception context.
+    """
+    current_exception = sys.exc_info()[1]
+    return current_exception is not None
+def run_cleanup_on_exception(cleanup_func: Callable[..., R], *args: Any, **kwargs: Any) -> Optional[R]:
+    """
+    Runs cleanup only when running in exception context.
+    The function `run_cleanup_on_exception()` should be used to clean up resources when an operation fails.
+    This is typically done using a try, except, and finally block, with the resource cleanup logic placed within
+    the except block. However, this pattern may not handle KeyboardInterrupt exceptions.
+    To ensure that resources are always cleaned up at least once when an exception or KeyboardInterrupt occurs,
+    create an idempotent function for cleaning up resources and pass it to the `run_cleanup_on_exception()` function
+    from the finally block.
+    """
+    if _is_in_exception():
+        return run_cleanup(cleanup_func, *args, raise_error=False, **kwargs)
+    return None
+def run_cleanup(cleanup_func: Callable[..., R], *args: Any, raise_error: bool = True, **kwargs: Any) -> Optional[R]:
+    """
+    Runs a cleanup function. If interrupted, retry cleanup.
+    The `run_cleanup()` function ensures that the `cleanup_func()` function executes at least once.
+    If the `cleanup_func()` is interrupted during execution, it will be retried.
+    Args:
+        cleanup_func: an idempotent function
+        raise_error: raise an exception if an error occurs during cleanup.
+    """
+    try:
+        logging.debug(f'Running cleanup function: {cleanup_func.__name__!r}')
+        return cleanup_func(*args, **kwargs)
+    except KeyboardInterrupt as interrupt:
+        # Save original exception and re-attempt cleanup
+        original_exception = interrupt
+        logging.debug(f'Cleanup {cleanup_func.__name__!r} interrupted, retrying')
+        try:
+            return cleanup_func(*args, **kwargs)
+        except Exception as e:
+            # Suppress this exception
+            logging.error(f'Cleanup {cleanup_func.__name__!r} failed with exception {e}')
+        raise KeyboardInterrupt from original_exception
+    except Exception as e:
+        logging.error(f'Cleanup {cleanup_func.__name__!r} failed with exception {e}')
+        if raise_error:
+            raise e
+    return None

{pixeltable-0.3.11.dist-info → pixeltable-0.3.13.dist-info}/METADATA RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.3
 Name: pixeltable
-Version: 0.3.11
+Version: 0.3.13
 Summary: AI Data Infrastructure: Declarative, Multimodal, and Incremental
 License: Apache-2.0
 Keywords: data-science,machine-learning,database,ai,computer-vision,chatbot,ml,artificial-intelligence,feature-engineering,multimodal,mlops,feature-store,vector-database,llm,genai

pixeltable 0.3.11__py3-none-any.whl → 0.3.13__py3-none-any.whl

Potentially problematic release.

pixeltable 0.3.11py3-none-any.whl → 0.3.13py3-none-any.whl