PyPI - pixeltable - Versions diffs - 0.4.2__py3-none-any.whl → 0.4.4__py3-none-any.whl - Mend

pixeltable 0.4.2py3-none-any.whl → 0.4.4py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.

This version of pixeltable might be problematic. Click here for more details.

Files changed (60) hide show

pixeltable/__init__.py +1 -0
pixeltable/__version__.py +2 -2
pixeltable/catalog/__init__.py +3 -11
pixeltable/catalog/catalog.py +575 -220
pixeltable/catalog/column.py +22 -23
pixeltable/catalog/dir.py +1 -2
pixeltable/catalog/globals.py +2 -148
pixeltable/catalog/insertable_table.py +15 -13
pixeltable/catalog/path.py +6 -0
pixeltable/catalog/schema_object.py +9 -4
pixeltable/catalog/table.py +96 -85
pixeltable/catalog/table_version.py +257 -174
pixeltable/catalog/table_version_path.py +1 -1
pixeltable/catalog/tbl_ops.py +44 -0
pixeltable/catalog/update_status.py +179 -0
pixeltable/catalog/view.py +50 -56
pixeltable/config.py +76 -12
pixeltable/dataframe.py +19 -6
pixeltable/env.py +50 -4
pixeltable/exec/data_row_batch.py +3 -1
pixeltable/exec/exec_node.py +7 -24
pixeltable/exec/expr_eval/schedulers.py +134 -7
pixeltable/exec/in_memory_data_node.py +6 -7
pixeltable/exprs/column_property_ref.py +21 -9
pixeltable/exprs/column_ref.py +7 -2
pixeltable/exprs/function_call.py +2 -2
pixeltable/exprs/row_builder.py +10 -9
pixeltable/exprs/rowid_ref.py +0 -4
pixeltable/func/function.py +3 -3
pixeltable/functions/audio.py +36 -9
pixeltable/functions/gemini.py +4 -4
pixeltable/functions/openai.py +1 -2
pixeltable/functions/video.py +59 -16
pixeltable/globals.py +109 -24
pixeltable/io/__init__.py +1 -1
pixeltable/io/datarows.py +2 -1
pixeltable/io/external_store.py +3 -55
pixeltable/io/globals.py +4 -4
pixeltable/io/hf_datasets.py +10 -2
pixeltable/io/label_studio.py +16 -16
pixeltable/io/pandas.py +1 -0
pixeltable/io/table_data_conduit.py +12 -13
pixeltable/iterators/audio.py +17 -8
pixeltable/iterators/image.py +5 -2
pixeltable/metadata/__init__.py +1 -1
pixeltable/metadata/converters/convert_39.py +125 -0
pixeltable/metadata/converters/util.py +3 -0
pixeltable/metadata/notes.py +1 -0
pixeltable/metadata/schema.py +50 -1
pixeltable/plan.py +4 -0
pixeltable/share/packager.py +20 -38
pixeltable/store.py +40 -51
pixeltable/type_system.py +2 -2
pixeltable/utils/coroutine.py +6 -23
pixeltable/utils/media_store.py +50 -0
{pixeltable-0.4.2.dist-info → pixeltable-0.4.4.dist-info}/METADATA +1 -1
{pixeltable-0.4.2.dist-info → pixeltable-0.4.4.dist-info}/RECORD +60 -57
{pixeltable-0.4.2.dist-info → pixeltable-0.4.4.dist-info}/LICENSE +0 -0
{pixeltable-0.4.2.dist-info → pixeltable-0.4.4.dist-info}/WHEEL +0 -0
{pixeltable-0.4.2.dist-info → pixeltable-0.4.4.dist-info}/entry_points.txt +0 -0

pixeltable/functions/video.py CHANGED Viewed

@@ -1,19 +1,7 @@
 """
 Pixeltable [UDFs](https://pixeltable.readme.io/docs/user-defined-functions-udfs) for `VideoType`.
-Example:
-```python
-import pixeltable as pxt
-import pixeltable.functions as pxtf
-t = pxt.get_table(...)
-t.select(pxtf.video.extract_audio(t.video_col)).collect()
-```
 """
-import tempfile
-import uuid
-from pathlib import Path
 from typing import Any, Optional
 import av
@@ -68,8 +56,7 @@ class make_video(pxt.Aggregator):
         if frame is None:
             return
         if self.container is None:
-            (_, output_filename) = tempfile.mkstemp(suffix='.mp4', dir=str(env.Env.get().tmp_dir))
-            self.out_file = Path(output_filename)
+            self.out_file = env.Env.get().create_tmp_path('.mp4')
             self.container = av.open(str(self.out_file), mode='w')
             self.stream = self.container.add_stream('h264', rate=self.fps)
             self.stream.pix_fmt = 'yuv420p'
@@ -92,12 +79,22 @@ def extract_audio(
     video_path: pxt.Video, stream_idx: int = 0, format: str = 'wav', codec: Optional[str] = None
 ) -> pxt.Audio:
     """
-    Extract an audio stream from a video file, save it as a media file and return its path.
+    Extract an audio stream from a video.
     Args:
         stream_idx: Index of the audio stream to extract.
         format: The target audio format. (`'wav'`, `'mp3'`, `'flac'`).
         codec: The codec to use for the audio stream. If not provided, a default codec will be used.
+    Returns:
+        The extracted audio.
+    Examples:
+        Add a computed column to a table `tbl` that extracts audio from an existing column `video_col`:
+        >>> tbl.add_computed_column(
+        ...     extracted_audio=tbl.video_col.extract_audio(format='flac')
+        ... )
     """
     if format not in _format_defaults:
         raise ValueError(f'extract_audio(): unsupported audio format: {format}')
@@ -108,7 +105,7 @@ def extract_audio(
             return None
         audio_stream = container.streams.audio[stream_idx]
         # create this in our tmp directory, so it'll get cleaned up if it's being generated as part of a query
-        output_filename = str(env.Env.get().tmp_dir / f'{uuid.uuid4()}.{ext}')
+        output_filename = str(env.Env.get().create_tmp_path(f'.{ext}'))
         with av.open(output_filename, 'w', format=format) as output_container:
             output_stream = output_container.add_stream(codec or default_codec)
@@ -124,6 +121,52 @@ def extract_audio(
 def get_metadata(video: pxt.Video) -> dict:
     """
     Gets various metadata associated with a video file and returns it as a dictionary.
+    Args:
+        video: The video to get metadata for.
+    Returns:
+        A `dict` such as the following:
+            ```json
+            {
+                'bit_exact': False,
+                'bit_rate': 967260,
+                'size': 2234371,
+                'metadata': {
+                    'encoder': 'Lavf60.16.100',
+                    'major_brand': 'isom',
+                    'minor_version': '512',
+                    'compatible_brands': 'isomiso2avc1mp41',
+                },
+                'streams': [
+                    {
+                        'type': 'video',
+                        'width': 640,
+                        'height': 360,
+                        'frames': 462,
+                        'time_base': 1.0 / 12800,
+                        'duration': 236544,
+                        'duration_seconds': 236544.0 / 12800,
+                        'average_rate': 25.0,
+                        'base_rate': 25.0,
+                        'guessed_rate': 25.0,
+                        'metadata': {
+                            'language': 'und',
+                            'handler_name': 'L-SMASH Video Handler',
+                            'vendor_id': '[0][0][0][0]',
+                            'encoder': 'Lavc60.31.102 libx264',
+                        },
+                        'codec_context': {'name': 'h264', 'codec_tag': 'avc1', 'profile': 'High', 'pix_fmt': 'yuv420p'},
+                    }
+                ],
+            }
+            ```
+    Examples:
+        Extract metadata for files in the `video_col` column of the table `tbl`:
+        >>> tbl.select(tbl.video_col.get_metadata()).collect()
     """
     return _get_metadata(video)

pixeltable/globals.py CHANGED Viewed

@@ -8,9 +8,10 @@ from typing import TYPE_CHECKING, Any, Iterable, Iterator, Literal, Optional, Un
 import pandas as pd
 from pandas.io.formats.style import Styler
-from pixeltable import DataFrame, catalog, exceptions as excs, exprs, func, share
+from pixeltable import DataFrame, catalog, exceptions as excs, exprs, func, share, type_system as ts
 from pixeltable.catalog import Catalog, TableVersionPath
 from pixeltable.catalog.insertable_table import OnErrorParameter
+from pixeltable.config import Config
 from pixeltable.env import Env
 from pixeltable.iterators import ComponentIterator
@@ -34,13 +35,16 @@ if TYPE_CHECKING:
 _logger = logging.getLogger('pixeltable')
-def init() -> None:
+def init(config_overrides: Optional[dict[str, Any]] = None) -> None:
     """Initializes the Pixeltable environment."""
+    if config_overrides is None:
+        config_overrides = {}
+    Config.init(config_overrides)
     _ = Catalog.get()
 def create_table(
-    path_str: str,
+    path: str,
     schema: Optional[dict[str, Any]] = None,
     *,
     source: Optional[TableDataSource] = None,
@@ -54,14 +58,24 @@ def create_table(
     if_exists: Literal['error', 'ignore', 'replace', 'replace_force'] = 'error',
     extra_args: Optional[dict[str, Any]] = None,  # Additional arguments to data source provider
 ) -> catalog.Table:
-    """Create a new base table.
+    """Create a new base table. Exactly one of `schema` or `source` must be provided.
+    If a `schema` is provided, then an empty table will be created with the specified schema.
+    If a `source` is provided, then Pixeltable will attempt to infer a data source format and table schema from the
+    contents of the specified data, and the data will be imported from the specified source into the new table. The
+    source format and/or schema can be specified directly via the `source_format` and `schema_overrides` parameters.
     Args:
-        path_str: Path to the table.
-        schema: A dictionary that maps column names to column types
-        source: A data source from which a table schema can be inferred and data imported
-        source_format: A hint to the format of the source data
-        schema_overrides: If specified, then columns in `schema_overrides` will be given the specified types
+        path: Pixeltable path (qualified name) of the table, such as `'my_table'` or `'my_dir.my_subdir.my_table'`.
+        schema: Schema for the new table, mapping column names to Pixeltable types.
+        source: A data source (file, URL, DataFrame, or list of rows) to import from.
+        source_format: Must be used in conjunction with a `source`.
+            If specified, then the given format will be used to read the source data. (Otherwise,
+            Pixeltable will attempt to infer the format from the source data.)
+        schema_overrides: Must be used in conjunction with a `source`.
+            If specified, then columns in `schema_overrides` will be given the specified types.
+            (Pixeltable will attempt to infer the types of any columns not specified.)
         on_error: Determines the behavior if an error occurs while evaluating a computed column or detecting an
             invalid media file (such as a corrupt image) for one of the inserted rows.
@@ -77,14 +91,15 @@ def create_table(
             - `'on_read'`: validate media files at query time
             - `'on_write'`: validate media files during insert/update operations
-        if_exists: Directive regarding how to handle if the path already exists.
-            Must be one of the following:
+        if_exists: Determines the behavior if a table already exists at the specified path location.
             - `'error'`: raise an error
             - `'ignore'`: do nothing and return the existing table handle
-            - `'replace'`: if the existing table has no views, drop and replace it with a new one
-            - `'replace_force'`: drop the existing table and all its views, and create a new one
-        extra_args: Additional arguments to pass to the source data provider
+            - `'replace'`: if the existing table has no views or snapshots, drop and replace it with a new one;
+                raise an error if the existing table has views or snapshots
+            - `'replace_force'`: drop the existing table and all its views and snapshots, and create a new one
+        extra_args: Must be used in conjunction with a `source`. If specified, then additional arguments will be
+            passed along to the source data provider.
     Returns:
         A handle to the newly created table, or to an already existing table at the path when `if_exists='ignore'`.
@@ -110,7 +125,7 @@ def create_table(
         >>> tbl1 = pxt.get_table('orig_table')
         ... tbl2 = pxt.create_table('new_table', tbl1.where(tbl1.col1 < 10).select(tbl1.col2))
-        Create a table if does not already exist, otherwise get the existing table:
+        Create a table if it does not already exist, otherwise get the existing table:
         >>> tbl = pxt.create_table('my_table', schema={'col1': pxt.Int, 'col2': pxt.String}, if_exists='ignore')
@@ -126,12 +141,12 @@ def create_table(
     from pixeltable.io.utils import normalize_primary_key_parameter
     if (schema is None) == (source is None):
-        raise excs.Error('Must provide either a `schema` or a `source`')
+        raise excs.Error('Either a `schema` or a `source` must be provided (but not both)')
     if schema is not None and (len(schema) == 0 or not isinstance(schema, dict)):
         raise excs.Error('`schema` must be a non-empty dictionary')
-    path_obj = catalog.Path(path_str)
+    path_obj = catalog.Path(path)
     if_exists_ = catalog.IfExistsParam.validated(if_exists, 'if_exists')
     media_validation_ = catalog.MediaValidation.validated(media_validation, 'media_validation')
     primary_key: Optional[list[str]] = normalize_primary_key_parameter(primary_key)
@@ -142,7 +157,14 @@ def create_table(
         tds = UnkTableDataConduit(source, source_format=source_format, extra_fields=extra_args)
         tds.check_source_format()
         data_source = tds.specialize()
-        data_source.src_schema_overrides = schema_overrides
+        src_schema_overrides: dict[str, ts.ColumnType] = {}
+        if schema_overrides is not None:
+            for col_name, py_type in schema_overrides.items():
+                col_type = ts.ColumnType.normalize_type(py_type, nullable_default=True, allow_builtin_types=False)
+                if col_type is None:
+                    raise excs.Error(f'Invalid type for column {col_name!r} in `schema_overrides`: {py_type}')
+                src_schema_overrides[col_name] = col_type
+        data_source.src_schema_overrides = src_schema_overrides
         data_source.src_pk = primary_key
         data_source.infer_schema()
         schema = data_source.pxt_schema
@@ -251,9 +273,7 @@ def create_view(
         tbl_version_path = base._tbl_version_path
         sample_clause = None
     elif isinstance(base, DataFrame):
-        base._validate_mutable('create_view', allow_select=True)
-        if len(base._from_clause.tbls) > 1:
-            raise excs.Error('Cannot create a view of a join')
+        base._validate_mutable_op_sequence('create_view', allow_select=True)
         tbl_version_path = base._from_clause.tbls[0]
         where = base.where_clause
         sample_clause = base.sample_clause
@@ -533,9 +553,12 @@ def list_tables(dir_path: str = '', recursive: bool = True) -> list[str]:
         >>> pxt.list_tables('dir1')
     """
-    path_obj = catalog.Path(dir_path, empty_is_valid=True)  # validate format
-    cat = Catalog.get()
-    contents = cat.get_dir_contents(path_obj, recursive=recursive)
+    return _list_tables(dir_path, recursive=recursive, allow_system_paths=False)
+def _list_tables(dir_path: str = '', recursive: bool = True, allow_system_paths: bool = False) -> list[str]:
+    path_obj = catalog.Path(dir_path, empty_is_valid=True, allow_system_paths=allow_system_paths)
+    contents = Catalog.get().get_dir_contents(path_obj, recursive=recursive)
     return [str(p) for p in _extract_paths(contents, parent=path_obj, entry_type=catalog.Table)]
@@ -633,6 +656,68 @@ def drop_dir(path: str, force: bool = False, if_not_exists: Literal['error', 'ig
     Catalog.get().drop_dir(path_obj, if_not_exists=if_not_exists_, force=force)
+def ls(path: str = '') -> pd.DataFrame:
+    """
+    List the contents of a Pixeltable directory.
+    This function returns a Pandas DataFrame representing a human-readable listing of the specified directory,
+    including various attributes such as version and base table, as appropriate.
+    To get a programmatic list of tables and/or directories, use [list_tables()][pixeltable.list_tables] and/or
+    [list_dirs()][pixeltable.list_dirs] instead.
+    """
+    from pixeltable.catalog import retry_loop
+    from pixeltable.metadata import schema
+    cat = Catalog.get()
+    path_obj = catalog.Path(path, empty_is_valid=True)
+    dir_entries = cat.get_dir_contents(path_obj)
+    @retry_loop(for_write=False)
+    def op() -> list[list[str]]:
+        rows: list[list[str]] = []
+        for name, entry in dir_entries.items():
+            if name.startswith('_'):
+                continue
+            if entry.dir is not None:
+                kind = 'dir'
+                version = ''
+                base = ''
+            else:
+                assert entry.table is not None
+                assert isinstance(entry.table, schema.Table)
+                tbl = cat.get_table_by_id(entry.table.id)
+                md = tbl.get_metadata()
+                base = md['base'] or ''
+                if base.startswith('_'):
+                    base = '<anonymous base table>'
+                if md['is_snapshot']:
+                    kind = 'snapshot'
+                elif md['is_view']:
+                    kind = 'view'
+                else:
+                    kind = 'table'
+                version = '' if kind == 'snapshot' else md['version']
+                if md['is_replica']:
+                    kind = f'{kind}-replica'
+            rows.append([name, kind, version, base])
+        return rows
+    rows = op()
+    rows = sorted(rows, key=lambda x: x[0])
+    df = pd.DataFrame(
+        {
+            'Name': [row[0] for row in rows],
+            'Kind': [row[1] for row in rows],
+            'Version': [row[2] for row in rows],
+            'Base': [row[3] for row in rows],
+        },
+        index=([''] * len(rows)),
+    )
+    return df
 def _extract_paths(
     dir_entries: dict[str, Catalog.DirEntry],
     parent: catalog.Path,

pixeltable/io/__init__.py CHANGED Viewed

@@ -1,7 +1,7 @@
 # ruff: noqa: F401
 from .datarows import import_json, import_rows
-from .external_store import ExternalStore, SyncStatus
+from .external_store import ExternalStore
 from .globals import create_label_studio_project, export_images_as_fo_dataset
 from .hf_datasets import import_huggingface_dataset
 from .pandas import import_csv, import_excel, import_pandas

pixeltable/io/datarows.py CHANGED Viewed

@@ -8,7 +8,7 @@ from pixeltable import exceptions as excs
 def _infer_schema_from_rows(
-    rows: Iterable[dict[str, Any]], schema_overrides: dict[str, Any], primary_key: list[str]
+    rows: Iterable[dict[str, Any]], schema_overrides: dict[str, ts.ColumnType], primary_key: list[str]
 ) -> dict[str, ts.ColumnType]:
     schema: dict[str, ts.ColumnType] = {}
     cols_with_nones: set[str] = set()
@@ -20,6 +20,7 @@ def _infer_schema_from_rows(
                 # in which the column names are encountered in the input data, even if `schema_overrides`
                 # is specified.
                 if col_name not in schema:
+                    assert isinstance(schema_overrides[col_name], ts.ColumnType)
                     schema[col_name] = schema_overrides[col_name]
             elif value is not None:
                 # If `key` is not in `schema_overrides`, then we infer its type from the data.

pixeltable/io/external_store.py CHANGED Viewed

@@ -3,14 +3,13 @@ from __future__ import annotations
 import abc
 import itertools
 import logging
-from dataclasses import dataclass, field
 from typing import Any, Optional
 import pixeltable.exceptions as excs
 import pixeltable.type_system as ts
 from pixeltable import Column, Table
 from pixeltable.catalog import ColumnHandle, TableVersion
-from pixeltable.catalog.globals import RowCountStats, UpdateStatus
+from pixeltable.catalog.update_status import UpdateStatus
 _logger = logging.getLogger('pixeltable')
@@ -46,7 +45,7 @@ class ExternalStore(abc.ABC):
         """
     @abc.abstractmethod
-    def sync(self, t: Table, export_data: bool, import_data: bool) -> SyncStatus:
+    def sync(self, t: Table, export_data: bool, import_data: bool) -> UpdateStatus:
         """
         Called by `Table.sync()` to implement store-specific synchronization logic.
         """
@@ -263,57 +262,6 @@ class Project(ExternalStore, abc.ABC):
         return resolved_col_mapping
-@dataclass(frozen=True)
-class SyncStatus:
-    # stats for the rows affected by the operation in the external store
-    ext_row_count_stats: RowCountStats = field(default_factory=RowCountStats)
-    # stats for the rows affected by the operation
-    row_count_stats: RowCountStats = field(default_factory=RowCountStats)
-    @property
-    def num_excs(self) -> int:
-        """
-        Returns the total number of Pixeltable exceptions that occurred during the operation.
-        """
-        return self.row_count_stats.num_excs
-    @property
-    def pxt_rows_updated(self) -> int:
-        """
-        Returns the number of Pixeltable rows that were updated as a result of the operation.
-        """
-        return self.row_count_stats.upd_rows
-    @property
-    def external_rows_updated(self) -> int:
-        return self.ext_row_count_stats.upd_rows
-    @property
-    def external_rows_created(self) -> int:
-        return self.ext_row_count_stats.ins_rows
-    @property
-    def external_rows_deleted(self) -> int:
-        return self.ext_row_count_stats.del_rows
-    def __add__(self, other: 'SyncStatus') -> 'SyncStatus':
-        """
-        Add the sync status from two SyncStatus objects together.
-        """
-        return SyncStatus(
-            ext_row_count_stats=self.ext_row_count_stats + other.ext_row_count_stats,
-            row_count_stats=self.row_count_stats + other.row_count_stats,
-        )
-    @classmethod
-    def from_update_status(cls, us: UpdateStatus) -> 'SyncStatus':
-        """
-        Copy information from an UpdateStatus to a SyncStatus.
-        """
-        return SyncStatus(row_count_stats=us.row_count_stats + us.cascade_row_count_stats)
 class MockProject(Project):
     """A project that cannot be synced, used mainly for testing."""
@@ -348,7 +296,7 @@ class MockProject(Project):
     def get_import_columns(self) -> dict[str, ts.ColumnType]:
         return self.import_cols
-    def sync(self, t: Table, export_data: bool, import_data: bool) -> SyncStatus:
+    def sync(self, t: Table, export_data: bool, import_data: bool) -> UpdateStatus:
         raise NotImplementedError()
     def delete(self) -> None:

pixeltable/io/globals.py CHANGED Viewed

@@ -5,8 +5,8 @@ from typing import TYPE_CHECKING, Any, Literal, Optional, Union
 import pixeltable as pxt
 import pixeltable.exceptions as excs
 from pixeltable import Table, exprs
+from pixeltable.catalog.update_status import UpdateStatus
 from pixeltable.env import Env
-from pixeltable.io.external_store import SyncStatus
 if TYPE_CHECKING:
     import fiftyone as fo  # type: ignore[import-untyped]
@@ -22,7 +22,7 @@ def create_label_studio_project(
     sync_immediately: bool = True,
     s3_configuration: Optional[dict[str, Any]] = None,
     **kwargs: Any,
-) -> SyncStatus:
+) -> UpdateStatus:
     """
     Create a new Label Studio project and link it to the specified [`Table`][pixeltable.Table].
@@ -96,7 +96,7 @@ def create_label_studio_project(
             [Label Studio start_project docs](https://labelstud.io/sdk/project.html#label_studio_sdk.project.Project.start_project).
     Returns:
-        A `SyncStatus` representing the status of any synchronization operations that occurred.
+        An `UpdateStatus` representing the status of any synchronization operations that occurred.
     Examples:
         Create a Label Studio project whose tasks correspond to videos stored in the `video_col`
@@ -136,7 +136,7 @@ def create_label_studio_project(
     if sync_immediately:
         return t.sync()
     else:
-        return SyncStatus()
+        return UpdateStatus()
 def export_images_as_fo_dataset(

pixeltable/io/hf_datasets.py CHANGED Viewed

@@ -50,10 +50,18 @@ def _to_pixeltable_type(feature_type: Any, nullable: bool) -> Optional[ts.Column
     elif isinstance(feature_type, datasets.Sequence):
         # example: cohere wiki. Sequence(feature=Value(dtype='float32', id=None), length=-1, id=None)
         dtype = _to_pixeltable_type(feature_type.feature, nullable)
-        length = feature_type.length if feature_type.length != -1 else None
-        return ts.ArrayType(shape=(length,), dtype=dtype)
+        if dtype is None:
+            return None
+        if dtype.is_int_type() or dtype.is_float_type() or dtype.is_bool_type() or dtype.is_string_type():
+            length = feature_type.length if feature_type.length != -1 else None
+            return ts.ArrayType(shape=(length,), dtype=dtype, nullable=nullable)
+        else:
+            # Sequence of dicts must be cast as Json
+            return ts.JsonType(nullable=nullable)
     elif isinstance(feature_type, datasets.Image):
         return ts.ImageType(nullable=nullable)
+    elif isinstance(feature_type, dict):
+        return ts.JsonType(nullable=nullable)
     else:
         return None

pixeltable/io/label_studio.py CHANGED Viewed

@@ -14,10 +14,10 @@ from requests.exceptions import HTTPError
 import pixeltable.type_system as ts
 from pixeltable import Column, Table, env, exceptions as excs
 from pixeltable.catalog import ColumnHandle
-from pixeltable.catalog.globals import RowCountStats
+from pixeltable.catalog.update_status import RowCountStats, UpdateStatus
 from pixeltable.config import Config
 from pixeltable.exprs import ColumnRef, DataRow, Expr
-from pixeltable.io.external_store import Project, SyncStatus
+from pixeltable.io.external_store import Project
 from pixeltable.utils import coco
 # label_studio_sdk>=1 and label_studio_sdk<1 are not compatible, so we need to try
@@ -111,14 +111,14 @@ class LabelStudioProject(Project):
         """
         return {ANNOTATIONS_COLUMN: ts.JsonType(nullable=True)}
-    def sync(self, t: Table, export_data: bool, import_data: bool) -> SyncStatus:
+    def sync(self, t: Table, export_data: bool, import_data: bool) -> UpdateStatus:
         _logger.info(
             f'Syncing Label Studio project "{self.project_title}" with table `{t._name}`'
             f' (export: {export_data}, import: {import_data}).'
         )
         # Collect all existing tasks into a dict with entries `rowid: task`
         tasks = {tuple(task['meta']['rowid']): task for task in self.__fetch_all_tasks()}
-        sync_status = SyncStatus()
+        sync_status = UpdateStatus()
         if export_data:
             export_sync_status = self.__update_tasks(t, tasks)
             sync_status += export_sync_status
@@ -148,7 +148,7 @@ class LabelStudioProject(Project):
                 f'Label Studio project {self.project_title!r}.'
             )
-    def __update_tasks(self, t: Table, existing_tasks: dict[tuple, dict]) -> SyncStatus:
+    def __update_tasks(self, t: Table, existing_tasks: dict[tuple, dict]) -> UpdateStatus:
         """
         Updates all tasks in this Label Studio project based on the Pixeltable data:
         - Creates new tasks for rows that don't map to any existing task;
@@ -161,7 +161,7 @@ class LabelStudioProject(Project):
         t_data_cols = [t_col for t_col, ext_col_name in self.col_mapping.items() if ext_col_name in config.data_keys]
         if len(t_data_cols) == 0:
-            return SyncStatus()
+            return UpdateStatus()
         # Columns in `t` that map to `rectanglelabels` preannotations
         t_rl_cols = [
@@ -192,7 +192,7 @@ class LabelStudioProject(Project):
         media_col: ColumnHandle,
         t_rl_cols: list[ColumnHandle],
         rl_info: list['_RectangleLabel'],
-    ) -> SyncStatus:
+    ) -> UpdateStatus:
         is_stored = media_col.get().is_stored
         # If it's a stored column, we can use `localpath`
         localpath_col_opt = [t[media_col.get().name].localpath] if is_stored else []
@@ -238,7 +238,7 @@ class LabelStudioProject(Project):
         env.Env.get().console_logger.info(f'Created {tasks_created} new task(s) in {self}.')
-        sync_status = SyncStatus(ext_row_count_stats=RowCountStats(ins_rows=tasks_created))
+        sync_status = UpdateStatus(ext_row_count_stats=RowCountStats(ins_rows=tasks_created))
         deletion_sync_status = self.__delete_stale_tasks(existing_tasks, row_ids_in_pxt, tasks_created)
         sync_status += deletion_sync_status
@@ -251,7 +251,7 @@ class LabelStudioProject(Project):
         t_data_cols: list[ColumnHandle],
         t_rl_cols: list[ColumnHandle],
         rl_info: list['_RectangleLabel'],
-    ) -> SyncStatus:
+    ) -> UpdateStatus:
         ext_data_cols = [self.col_mapping[col] for col in t_data_cols]
         expr_refs: dict[str, Expr] = {}  # kwargs for the select statement
         for col in t_data_cols:
@@ -342,7 +342,7 @@ class LabelStudioProject(Project):
             f'Created {tasks_created} new task(s) and updated {tasks_updated} existing task(s) in {self}.'
         )
-        sync_status = SyncStatus(ext_row_count_stats=RowCountStats(ins_rows=tasks_created, upd_rows=tasks_updated))
+        sync_status = UpdateStatus(ext_row_count_stats=RowCountStats(ins_rows=tasks_created, upd_rows=tasks_updated))
         deletion_sync_status = self.__delete_stale_tasks(existing_tasks, row_ids_in_pxt, tasks_created)
         sync_status += deletion_sync_status
@@ -367,7 +367,7 @@ class LabelStudioProject(Project):
     def __delete_stale_tasks(
         self, existing_tasks: dict[tuple, dict], row_ids_in_pxt: set[tuple], tasks_created: int
-    ) -> SyncStatus:
+    ) -> UpdateStatus:
         deleted_rowids = set(existing_tasks.keys()) - row_ids_in_pxt
         # Sanity check the math
         assert len(deleted_rowids) == len(existing_tasks) + tasks_created - len(row_ids_in_pxt)
@@ -383,11 +383,11 @@ class LabelStudioProject(Project):
         for rowid in deleted_rowids:
             del existing_tasks[rowid]
-        return SyncStatus(ext_row_count_stats=RowCountStats(del_rows=len(deleted_rowids)))
+        return UpdateStatus(ext_row_count_stats=RowCountStats(del_rows=len(deleted_rowids)))
-    def __update_table_from_tasks(self, t: Table, tasks: dict[tuple, dict]) -> SyncStatus:
+    def __update_table_from_tasks(self, t: Table, tasks: dict[tuple, dict]) -> UpdateStatus:
         if ANNOTATIONS_COLUMN not in self.col_mapping.values():
-            return SyncStatus()
+            return UpdateStatus()
         annotations = {
             # Replace [] by None to indicate no annotations. We do want to sync rows with no annotations,
@@ -422,9 +422,9 @@ class LabelStudioProject(Project):
                 ancestor = ancestor._get_base_table()
             update_status = ancestor.batch_update(updates)
             env.Env.get().console_logger.info(f'Updated annotation(s) from {len(updates)} task(s) in {self}.')
-            return SyncStatus.from_update_status(update_status)
+            return update_status
         else:
-            return SyncStatus()
+            return UpdateStatus()
     def as_dict(self) -> dict[str, Any]:
         return {

pixeltable/io/pandas.py CHANGED Viewed

@@ -132,6 +132,7 @@ def df_infer_schema(
     pd_schema: dict[str, ts.ColumnType] = {}
     for pd_name, pd_dtype in zip(df.columns, df.dtypes):
         if pd_name in schema_overrides:
+            assert isinstance(schema_overrides[pd_name], ts.ColumnType)
             pxt_type = schema_overrides[pd_name]
         else:
             pxt_type = __pd_coltype_to_pxt_type(pd_dtype, df[pd_name], pd_name not in primary_key)

pixeltable 0.4.2__py3-none-any.whl → 0.4.4__py3-none-any.whl

Potentially problematic release.

pixeltable 0.4.2py3-none-any.whl → 0.4.4py3-none-any.whl