PyPI - pixeltable - Versions diffs - 0.4.12__py3-none-any.whl → 0.4.13__py3-none-any.whl - Mend

pixeltable 0.4.12py3-none-any.whl → 0.4.13py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.

This version of pixeltable might be problematic. Click here for more details.

Files changed (17) hide show

pixeltable/__init__.py +11 -1
pixeltable/catalog/__init__.py +2 -1
pixeltable/catalog/table.py +72 -116
pixeltable/catalog/table_metadata.py +96 -0
pixeltable/env.py +1 -0
pixeltable/globals.py +3 -4
pixeltable/io/__init__.py +2 -1
pixeltable/io/lancedb.py +3 -0
pixeltable/io/parquet.py +9 -89
pixeltable/io/table_data_conduit.py +2 -2
pixeltable/utils/arrow.py +97 -2
pixeltable/utils/lancedb.py +88 -0
{pixeltable-0.4.12.dist-info → pixeltable-0.4.13.dist-info}/METADATA +162 -127
{pixeltable-0.4.12.dist-info → pixeltable-0.4.13.dist-info}/RECORD +17 -14
{pixeltable-0.4.12.dist-info → pixeltable-0.4.13.dist-info}/WHEEL +0 -0
{pixeltable-0.4.12.dist-info → pixeltable-0.4.13.dist-info}/entry_points.txt +0 -0
{pixeltable-0.4.12.dist-info → pixeltable-0.4.13.dist-info}/licenses/LICENSE +0 -0

pixeltable/__init__.py CHANGED Viewed

@@ -1,7 +1,17 @@
 # ruff: noqa: F401
 from .__version__ import __version__, __version_tuple__
-from .catalog import Column, ColumnMetadata, IndexMetadata, InsertableTable, Table, TableMetadata, UpdateStatus, View
+from .catalog import (
+    Column,
+    ColumnMetadata,
+    IndexMetadata,
+    InsertableTable,
+    Table,
+    TableMetadata,
+    UpdateStatus,
+    VersionMetadata,
+    View,
+)
 from .dataframe import DataFrame
 from .exceptions import Error, ExprEvalError, PixeltableWarning
 from .func import Aggregator, Function, Tool, ToolChoice, Tools, expr_udf, mcp_udfs, query, retrieval_udf, uda, udf

pixeltable/catalog/__init__.py CHANGED Viewed

@@ -8,7 +8,8 @@ from .insertable_table import InsertableTable
 from .named_function import NamedFunction
 from .path import Path
 from .schema_object import SchemaObject
-from .table import ColumnMetadata, IndexMetadata, Table, TableMetadata
+from .table import Table
+from .table_metadata import ColumnMetadata, IndexMetadata, TableMetadata, VersionMetadata
 from .table_version import TableVersion
 from .table_version_handle import ColumnHandle, TableVersionHandle
 from .table_version_path import TableVersionPath

pixeltable/catalog/table.py CHANGED Viewed

@@ -7,9 +7,7 @@ import json
 import logging
 from keyword import iskeyword as is_python_keyword
 from pathlib import Path
-from typing import TYPE_CHECKING, Any, ClassVar, Iterable, Literal, Optional, TypedDict, overload
-from typing import _GenericAlias  # type: ignore[attr-defined]  # isort: skip
+from typing import TYPE_CHECKING, Any, Iterable, Literal, Optional, overload
 from uuid import UUID
 import pandas as pd
@@ -17,6 +15,13 @@ import sqlalchemy as sql
 import pixeltable as pxt
 from pixeltable import catalog, env, exceptions as excs, exprs, index, type_system as ts
+from pixeltable.catalog.table_metadata import (
+    ColumnMetadata,
+    EmbeddingIndexParams,
+    IndexMetadata,
+    TableMetadata,
+    VersionMetadata,
+)
 from pixeltable.metadata import schema
 from pixeltable.metadata.utils import MetadataUtils
@@ -37,6 +42,9 @@ from .table_version_handle import TableVersionHandle
 from .table_version_path import TableVersionPath
 from .update_status import UpdateStatus
+from typing import _GenericAlias  # type: ignore[attr-defined]  # isort: skip
 if TYPE_CHECKING:
     import torch.utils.data
@@ -95,7 +103,7 @@ class Table(SchemaObject):
         return op()
-    def _get_metadata(self) -> 'TableMetadata':
+    def _get_metadata(self) -> TableMetadata:
         columns = self._tbl_version_path.columns()
         column_info: dict[str, ColumnMetadata] = {}
         for col in columns:
@@ -1690,43 +1698,35 @@ class Table(SchemaObject):
     def _ipython_key_completions_(self) -> list[str]:
         return list(self._get_schema().keys())
-    _REPORT_SCHEMA: ClassVar[dict[str, ts.ColumnType]] = {
-        'version': ts.IntType(),
-        'created_at': ts.TimestampType(),
-        'user': ts.StringType(nullable=True),
-        'note': ts.StringType(),
-        'inserts': ts.IntType(nullable=True),
-        'updates': ts.IntType(nullable=True),
-        'deletes': ts.IntType(nullable=True),
-        'errors': ts.IntType(nullable=True),
-        'computed': ts.IntType(),
-        'schema_change': ts.StringType(),
-    }
-    def history(self, n: Optional[int] = None) -> pixeltable.dataframe.DataFrameResultSet:
-        """Returns rows of information about the versions of this table, most recent first.
+    def get_versions(self, n: Optional[int] = None) -> list[VersionMetadata]:
+        """
+        Returns information about versions of this table, most recent first.
+        `get_versions()` is intended for programmatic access to version metadata; for human-readable
+        output, use [`history()`][pixeltable.Table.history] instead.
         Args:
-            n: a limit to the number of versions listed
+            n: if specified, will return at most `n` versions
-        Examples:
-            Report history:
+        Returns:
+            A list of [VersionMetadata][pixeltable.VersionMetadata] dictionaries, one per version retrieved, most
+            recent first.
-            >>> tbl.history()
+        Examples:
+            Retrieve metadata about all versions of the table `tbl`:
-            Report only the most recent 5 changes to the table:
+            >>> tbl.get_versions()
-            >>> tbl.history(n=5)
+            Retrieve metadata about the most recent 5 versions of the table `tbl`:
-        Returns:
-            A list of information about each version, ordered from most recent to oldest version.
+            >>> tbl.get_versions(n=5)
         """
         from pixeltable.catalog import Catalog
         if n is None:
             n = 1_000_000_000
         if not isinstance(n, int) or n < 1:
-            raise excs.Error(f'Invalid value for n: {n}')
+            raise excs.Error(f'Invalid value for `n`: {n}')
         # Retrieve the table history components from the catalog
         tbl_id = self._id
@@ -1744,104 +1744,60 @@ class Table(SchemaObject):
         else:
             over_count = 0
-        report_lines: list[list[Any]] = []
+        metadata_dicts: list[VersionMetadata] = []
         for vers_md in vers_list[0 : len(vers_list) - over_count]:
             version = vers_md.version_md.version
-            schema_change = md_dict.get(version, '')
+            schema_change = md_dict.get(version, None)
             update_status = vers_md.version_md.update_status
             if update_status is None:
                 update_status = UpdateStatus()
-            change_type = 'schema' if schema_change != '' else ''
-            if change_type == '':
-                change_type = 'data'
+            change_type: Literal['schema', 'data'] = 'schema' if schema_change is not None else 'data'
             rcs = update_status.row_count_stats + update_status.cascade_row_count_stats
-            report_line = [
-                version,
-                datetime.datetime.fromtimestamp(vers_md.version_md.created_at),
-                vers_md.version_md.user,
-                change_type,
-                rcs.ins_rows,
-                rcs.upd_rows,
-                rcs.del_rows,
-                rcs.num_excs,
-                rcs.computed_values,
-                schema_change,
-            ]
-            report_lines.append(report_line)
+            metadata_dicts.append(
+                VersionMetadata(
+                    version=version,
+                    created_at=datetime.datetime.fromtimestamp(vers_md.version_md.created_at, tz=datetime.timezone.utc),
+                    user=vers_md.version_md.user,
+                    change_type=change_type,
+                    inserts=rcs.ins_rows,
+                    updates=rcs.upd_rows,
+                    deletes=rcs.del_rows,
+                    errors=rcs.num_excs,
+                    computed=rcs.computed_values,
+                    schema_change=schema_change,
+                )
+            )
-        return pxt.dataframe.DataFrameResultSet(report_lines, self._REPORT_SCHEMA)
+        return metadata_dicts
+    def history(self, n: Optional[int] = None) -> pd.DataFrame:
+        """
+        Returns a human-readable report about versions of this table.
+        `history()` is intended for human-readable output of version metadata; for programmatic access,
+        use [`get_versions()`][pixeltable.Table.get_versions] instead.
+        Args:
+            n: if specified, will return at most `n` versions
+        Returns:
+            A report with information about each version, one per row, most recent first.
+        Examples:
+            Report all versions of the table:
+            >>> tbl.history()
+            Report only the most recent 5 changes to the table:
+            >>> tbl.history(n=5)
+        """
+        versions = self.get_versions(n)
+        assert len(versions) > 0
+        return pd.DataFrame([list(v.values()) for v in versions], columns=list(versions[0].keys()))
     def __check_mutable(self, op_descr: str) -> None:
         if self._tbl_version_path.is_snapshot():
             raise excs.Error(f'{self._display_str()}: Cannot {op_descr} a snapshot.')
         if self._tbl_version_path.is_replica():
             raise excs.Error(f'{self._display_str()}: Cannot {op_descr} a {self._display_name()}.')
-class ColumnMetadata(TypedDict):
-    """Metadata for a column of a Pixeltable table."""
-    name: str
-    """The name of the column."""
-    type_: str
-    """The type specifier of the column."""
-    version_added: int
-    """The table version when this column was added."""
-    is_stored: bool
-    """`True` if this is a stored column; `False` if it is dynamically computed."""
-    is_primary_key: bool
-    """`True` if this column is part of the table's primary key."""
-    media_validation: Optional[Literal['on_read', 'on_write']]
-    """The media validation policy for this column."""
-    computed_with: Optional[str]
-    """Expression used to compute this column; `None` if this is not a computed column."""
-class IndexMetadata(TypedDict):
-    """Metadata for a column of a Pixeltable table."""
-    name: str
-    """The name of the index."""
-    columns: list[str]
-    """The table columns that are indexed."""
-    index_type: Literal['embedding']
-    """The type of index (currently only `'embedding'` is supported, but others will be added in the future)."""
-    parameters: EmbeddingIndexParams
-class EmbeddingIndexParams(TypedDict):
-    metric: Literal['cosine', 'ip', 'l2']
-    """Index metric."""
-    embeddings: list[str]
-    """List of embeddings defined for this index."""
-class TableMetadata(TypedDict):
-    """Metadata for a Pixeltable table."""
-    name: str
-    """The name of the table (ex: `'my_table'`)."""
-    path: str
-    """The full path of the table (ex: `'my_dir.my_subdir.my_table'`)."""
-    columns: dict[str, ColumnMetadata]
-    """Column metadata for all of the visible columns of the table."""
-    indices: dict[str, IndexMetadata]
-    """Index metadata for all of the indices of the table."""
-    is_replica: bool
-    """`True` if this table is a replica of another (shared) table."""
-    is_view: bool
-    """`True` if this table is a view."""
-    is_snapshot: bool
-    """`True` if this table is a snapshot."""
-    version: int
-    """The current version of the table."""
-    version_created: datetime.datetime
-    """The timestamp when this table version was created."""
-    schema_version: int
-    """The current schema version of the table."""
-    comment: Optional[str]
-    """User-provided table comment, if one exists."""
-    media_validation: Literal['on_read', 'on_write']
-    """The media validation policy for this table."""
-    base: Optional[str]
-    """If this table is a view or snapshot, the full path of its base table; otherwise `None`."""

pixeltable/catalog/table_metadata.py ADDED Viewed

@@ -0,0 +1,96 @@
+import datetime
+from typing import Literal, Optional, TypedDict
+class ColumnMetadata(TypedDict):
+    """Metadata for a column of a Pixeltable table."""
+    name: str
+    """The name of the column."""
+    type_: str
+    """The type specifier of the column."""
+    version_added: int
+    """The table version when this column was added."""
+    is_stored: bool
+    """`True` if this is a stored column; `False` if it is dynamically computed."""
+    is_primary_key: bool
+    """`True` if this column is part of the table's primary key."""
+    media_validation: Optional[Literal['on_read', 'on_write']]
+    """The media validation policy for this column."""
+    computed_with: Optional[str]
+    """Expression used to compute this column; `None` if this is not a computed column."""
+class EmbeddingIndexParams(TypedDict):
+    metric: Literal['cosine', 'ip', 'l2']
+    """Index metric."""
+    embeddings: list[str]
+    """List of embeddings defined for this index."""
+class IndexMetadata(TypedDict):
+    """Metadata for a column of a Pixeltable table."""
+    name: str
+    """The name of the index."""
+    columns: list[str]
+    """The table columns that are indexed."""
+    index_type: Literal['embedding']
+    """The type of index (currently only `'embedding'` is supported, but others will be added in the future)."""
+    parameters: EmbeddingIndexParams
+class TableMetadata(TypedDict):
+    """Metadata for a Pixeltable table."""
+    name: str
+    """The name of the table (ex: `'my_table'`)."""
+    path: str
+    """The full path of the table (ex: `'my_dir.my_subdir.my_table'`)."""
+    columns: dict[str, ColumnMetadata]
+    """Column metadata for all of the visible columns of the table."""
+    indices: dict[str, IndexMetadata]
+    """Index metadata for all of the indices of the table."""
+    is_replica: bool
+    """`True` if this table is a replica of another (shared) table."""
+    is_view: bool
+    """`True` if this table is a view."""
+    is_snapshot: bool
+    """`True` if this table is a snapshot."""
+    version: int
+    """The current version of the table."""
+    version_created: datetime.datetime
+    """The timestamp when this table version was created."""
+    schema_version: int
+    """The current schema version of the table."""
+    comment: Optional[str]
+    """User-provided table comment, if one exists."""
+    media_validation: Literal['on_read', 'on_write']
+    """The media validation policy for this table."""
+    base: Optional[str]
+    """If this table is a view or snapshot, the full path of its base table; otherwise `None`."""
+class VersionMetadata(TypedDict):
+    """Metadata for a specific version of a Pixeltable table."""
+    """The version number."""
+    version: int
+    """The timestamp when this version was created."""
+    created_at: datetime.datetime
+    """The user who created this version, if defined."""
+    user: str | None
+    """The type of table transformation that this version represents (`'data'` or `'schema'`)."""
+    change_type: Literal['data', 'schema']
+    """The number of rows inserted in this version."""
+    inserts: int
+    """The number of rows updated in this version."""
+    updates: int
+    """The number of rows deleted in this version."""
+    deletes: int
+    """The number of errors encountered during this version."""
+    errors: int
+    """The number of computed values calculated in this version."""
+    computed: int
+    """A description of the schema change that occurred in this version, if any."""
+    schema_change: str | None

pixeltable/env.py CHANGED Viewed

@@ -743,6 +743,7 @@ class Env:
         self.__register_package('whisper', library_name='openai-whisper')
         self.__register_package('whisperx')
         self.__register_package('yolox', library_name='pixeltable-yolox')
+        self.__register_package('lancedb')
     def __register_package(self, package_name: str, library_name: Optional[str] = None) -> None:
         is_installed: bool

pixeltable/globals.py CHANGED Viewed

@@ -3,7 +3,7 @@ from __future__ import annotations
 import logging
 import os
 from pathlib import Path
-from typing import TYPE_CHECKING, Any, Iterable, Iterator, Literal, NamedTuple, Optional, Sequence, Union
+from typing import TYPE_CHECKING, Any, Iterable, Literal, NamedTuple, Optional, Union
 import pandas as pd
 import pydantic
@@ -24,9 +24,8 @@ if TYPE_CHECKING:
         str,
         os.PathLike,
         Path,  # OS paths, filenames, URLs
-        Iterator[dict[str, Any]],  # iterator producing dictionaries of values
-        RowData,  # list of dictionaries
-        Sequence[pydantic.BaseModel],  # list of Pydantic models
+        Iterable[dict[str, Any]],  # dictionaries of values
+        Iterable[pydantic.BaseModel],  # Pydantic model instances
         DataFrame,  # Pixeltable DataFrame
         pd.DataFrame,  # pandas DataFrame
         datasets.Dataset,

pixeltable/io/__init__.py CHANGED Viewed

@@ -4,11 +4,12 @@ from .datarows import import_json, import_rows
 from .external_store import ExternalStore
 from .globals import create_label_studio_project, export_images_as_fo_dataset
 from .hf_datasets import import_huggingface_dataset
+from .lancedb import export_lancedb
 from .pandas import import_csv, import_excel, import_pandas
 from .parquet import export_parquet, import_parquet
 __default_dir = {symbol for symbol in dir() if not symbol.startswith('_')}
-__removed_symbols = {'globals', 'hf_datasets', 'pandas', 'parquet', 'datarows'}
+__removed_symbols = {'globals', 'hf_datasets', 'pandas', 'parquet', 'datarows', 'lancedb'}
 __all__ = sorted(__default_dir - __removed_symbols)

pixeltable/io/lancedb.py ADDED Viewed

@@ -0,0 +1,3 @@
+from pixeltable.utils.lancedb import export_lancedb
+__all__ = ['export_lancedb']

pixeltable/io/parquet.py CHANGED Viewed

@@ -1,46 +1,22 @@
 from __future__ import annotations
-import datetime
-import io
 import json
 import logging
 import typing
-from collections import deque
 from pathlib import Path
 from typing import Any, Optional
-import numpy as np
-import PIL.Image
 import pixeltable as pxt
 import pixeltable.exceptions as excs
 from pixeltable.catalog import Catalog
 from pixeltable.utils.transactional_directory import transactional_directory
 if typing.TYPE_CHECKING:
-    import pyarrow as pa
     import pixeltable as pxt
 _logger = logging.getLogger('pixeltable')
-def _write_batch(value_batch: dict[str, deque], schema: pa.Schema, output_path: Path) -> None:
-    import pyarrow as pa
-    from pyarrow import parquet
-    pydict = {}
-    for field in schema:
-        if isinstance(field.type, pa.FixedShapeTensorType):
-            stacked_arr = np.stack(value_batch[field.name])
-            pydict[field.name] = pa.FixedShapeTensorArray.from_numpy_ndarray(stacked_arr)
-        else:
-            pydict[field.name] = value_batch[field.name]
-    tab = pa.Table.from_pydict(pydict, schema=schema)
-    parquet.write_table(tab, str(output_path))
 def export_parquet(
     table_or_df: pxt.Table | pxt.DataFrame,
     parquet_path: Path,
@@ -63,7 +39,9 @@ def export_parquet(
                         If False, will raise an error if the Dataframe has any image column.
                         Default False.
     """
-    from pixeltable.utils.arrow import to_arrow_schema
+    import pyarrow as pa
+    from pixeltable.utils.arrow import to_record_batches
     df: pxt.DataFrame
     if isinstance(table_or_df, pxt.catalog.Table):
@@ -71,9 +49,6 @@ def export_parquet(
     else:
         df = table_or_df
-    type_dict = {k: v.as_dict() for k, v in df.schema.items()}
-    arrow_schema = to_arrow_schema(df.schema)
     if not inline_images and any(col_type.is_image_type() for col_type in df.schema.values()):
         raise excs.Error('Cannot export Dataframe with image columns when inline_images is False')
@@ -81,70 +56,15 @@ def export_parquet(
     with transactional_directory(parquet_path) as temp_path:
         # dump metadata json file so we can inspect what was the source of the parquet file later on.
         json.dump(df.as_dict(), (temp_path / '.pixeltable.json').open('w'))
+        type_dict = {k: v.as_dict() for k, v in df.schema.items()}
         json.dump(type_dict, (temp_path / '.pixeltable.column_types.json').open('w'))  # keep type metadata
         batch_num = 0
-        current_value_batch: dict[str, deque] = {k: deque() for k in df.schema}
-        current_byte_estimate = 0
         with Catalog.get().begin_xact(for_write=False):
-            for data_row in df._exec():
-                for (col_name, col_type), e in zip(df.schema.items(), df._select_list_exprs):
-                    val = data_row[e.slot_idx]
-                    if val is None:
-                        current_value_batch[col_name].append(val)
-                        continue
-                    assert val is not None
-                    if col_type.is_image_type():
-                        # images get inlined into the parquet file
-                        if data_row.file_paths is not None and data_row.file_paths[e.slot_idx] is not None:
-                            # if there is a file, read directly to preserve information
-                            with open(data_row.file_paths[e.slot_idx], 'rb') as f:
-                                val = f.read()
-                        elif isinstance(val, PIL.Image.Image):
-                            # if no file available, eg. bc it is computed, convert to png
-                            buf = io.BytesIO()
-                            val.save(buf, format='PNG')
-                            val = buf.getvalue()
-                        else:
-                            raise excs.Error(f'unknown image type {type(val)}')
-                        length = len(val)
-                    elif col_type.is_string_type():
-                        length = len(val)
-                    elif col_type.is_video_type() or col_type.is_audio_type():
-                        if data_row.file_paths is not None and data_row.file_paths[e.slot_idx] is not None:
-                            val = data_row.file_paths[e.slot_idx]
-                        else:
-                            raise excs.Error(f'unknown audio/video type {type(val)}')
-                        length = len(val)
-                    elif col_type.is_json_type():
-                        val = json.dumps(val)
-                        length = len(val)
-                    elif col_type.is_array_type():
-                        length = val.nbytes
-                    elif col_type.is_int_type() or col_type.is_float_type():
-                        length = 8
-                    elif col_type.is_bool_type():
-                        length = 1
-                    elif col_type.is_date_type():
-                        length = 4
-                    elif col_type.is_timestamp_type():
-                        val = val.astimezone(datetime.timezone.utc)
-                        length = 8
-                    else:
-                        raise excs.Error(f'unknown type {col_type} for {col_name}')
-                    current_value_batch[col_name].append(val)
-                    current_byte_estimate += length
-                if current_byte_estimate > partition_size_bytes:
-                    assert batch_num < 100_000, 'wrote too many parquet files, unclear ordering'
-                    _write_batch(current_value_batch, arrow_schema, temp_path / f'part-{batch_num:05d}.parquet')
-                    batch_num += 1
-                    current_value_batch = {k: deque() for k in df.schema}
-                    current_byte_estimate = 0
-            _write_batch(current_value_batch, arrow_schema, temp_path / f'part-{batch_num:05d}.parquet')
+            for record_batch in to_record_batches(df, partition_size_bytes):
+                output_path = temp_path / f'part-{batch_num:05d}.parquet'
+                arrow_tbl = pa.Table.from_batches([record_batch])  # type: ignore
+                pa.parquet.write_table(arrow_tbl, str(output_path))
+                batch_num += 1
 def import_parquet(

pixeltable/io/table_data_conduit.py CHANGED Viewed

@@ -469,12 +469,12 @@ class ParquetTableDataConduit(TableDataConduit):
         return t
     def infer_schema_part1(self) -> tuple[dict[str, ts.ColumnType], list[str]]:
-        from pixeltable.utils.arrow import ar_infer_schema
+        from pixeltable.utils.arrow import to_pxt_schema
         if self.source_column_map is None:
             if self.src_schema_overrides is None:
                 self.src_schema_overrides = {}
-            self.src_schema = ar_infer_schema(self.pq_ds.schema, self.src_schema_overrides, self.src_pk)
+            self.src_schema = to_pxt_schema(self.pq_ds.schema, self.src_schema_overrides, self.src_pk)
             inferred_schema, inferred_pk, self.source_column_map = normalize_schema_names(
                 self.src_schema, self.src_pk, self.src_schema_overrides
             )

pixeltable/utils/arrow.py CHANGED Viewed

@@ -1,11 +1,18 @@
 import datetime
-from typing import Any, Iterator, Optional
+import io
+import json
+from typing import TYPE_CHECKING, Any, Iterator, Optional, cast
 import numpy as np
+import PIL.Image
 import pyarrow as pa
+import pixeltable.exceptions as excs
 import pixeltable.type_system as ts
+if TYPE_CHECKING:
+    import pixeltable as pxt
 PA_TO_PXT_TYPES: dict[pa.DataType, ts.ColumnType] = {
     pa.string(): ts.StringType(nullable=True),
     pa.large_string(): ts.StringType(nullable=True),
@@ -71,7 +78,7 @@ def to_arrow_type(pixeltable_type: ts.ColumnType) -> Optional[pa.DataType]:
         return None
-def ar_infer_schema(
+def to_pxt_schema(
     arrow_schema: pa.Schema, schema_overrides: dict[str, Any], primary_key: list[str]
 ) -> dict[str, ts.ColumnType]:
     """Convert a pyarrow Schema to a schema using pyarrow names and pixeltable types."""
@@ -88,6 +95,94 @@ def to_arrow_schema(pixeltable_schema: dict[str, Any]) -> pa.Schema:
     return pa.schema((name, to_arrow_type(typ)) for name, typ in pixeltable_schema.items())  # type: ignore[misc]
+def _to_record_batch(column_vals: dict[str, list[Any]], schema: pa.Schema) -> pa.RecordBatch:
+    import pyarrow as pa
+    pa_arrays: list[pa.Array] = []
+    for field in schema:
+        if isinstance(field.type, pa.FixedShapeTensorType):
+            stacked_arr = np.stack(column_vals[field.name])
+            pa_arrays.append(pa.FixedShapeTensorArray.from_numpy_ndarray(stacked_arr))
+        else:
+            pa_array = cast(pa.Array, pa.array(column_vals[field.name]))
+            pa_arrays.append(pa_array)
+    return pa.RecordBatch.from_arrays(pa_arrays, schema=schema)  # type: ignore
+def to_record_batches(df: 'pxt.DataFrame', batch_size_bytes: int) -> Iterator[pa.RecordBatch]:
+    arrow_schema = to_arrow_schema(df.schema)
+    batch_columns: dict[str, list[Any]] = {k: [] for k in df.schema}
+    current_byte_estimate = 0
+    num_batch_rows = 0
+    # TODO: in order to avoid having to deal with ExprEvalError here, DataFrameResultSet should be an iterator
+    # over _exec()
+    try:
+        for data_row in df._exec():
+            num_batch_rows += 1
+            for (col_name, col_type), e in zip(df.schema.items(), df._select_list_exprs):
+                val = data_row[e.slot_idx]
+                val_size_bytes: int
+                if val is None:
+                    batch_columns[col_name].append(val)
+                    continue
+                assert val is not None
+                if col_type.is_image_type():
+                    # images get inlined into the parquet file
+                    if data_row.file_paths[e.slot_idx] is not None:
+                        # if there is a file, read directly to preserve information
+                        with open(data_row.file_paths[e.slot_idx], 'rb') as f:
+                            val = f.read()
+                    elif isinstance(val, PIL.Image.Image):
+                        # no file available: save as png
+                        buf = io.BytesIO()
+                        val.save(buf, format='png')
+                        val = buf.getvalue()
+                    else:
+                        raise excs.Error(f'unknown image type {type(val)}')
+                    val_size_bytes = len(val)
+                elif col_type.is_string_type():
+                    val_size_bytes = len(val)
+                elif col_type.is_media_type():
+                    assert data_row.file_paths[e.slot_idx] is not None
+                    val = data_row.file_paths[e.slot_idx]
+                    val_size_bytes = len(val)
+                elif col_type.is_json_type():
+                    val = json.dumps(val)
+                    val_size_bytes = len(val)
+                elif col_type.is_array_type():
+                    val_size_bytes = val.nbytes
+                elif col_type.is_int_type() or col_type.is_float_type():
+                    val_size_bytes = 8
+                elif col_type.is_bool_type():
+                    val_size_bytes = 1
+                elif col_type.is_date_type():
+                    val_size_bytes = 4
+                elif col_type.is_timestamp_type():
+                    val = val.astimezone(datetime.timezone.utc)
+                    val_size_bytes = 8
+                else:
+                    raise excs.Error(f'unknown type {col_type} for {col_name}')
+                batch_columns[col_name].append(val)
+                current_byte_estimate += val_size_bytes
+            if current_byte_estimate > batch_size_bytes and num_batch_rows > 0:
+                record_batch = _to_record_batch(batch_columns, arrow_schema)
+                yield record_batch
+                batch_columns = {k: [] for k in df.schema}
+                current_byte_estimate = 0
+                num_batch_rows = 0
+    except excs.ExprEvalError as e:
+        df._raise_expr_eval_err(e)
+    if num_batch_rows > 0:
+        record_batch = _to_record_batch(batch_columns, arrow_schema)
+        yield record_batch
 def to_pydict(batch: pa.Table | pa.RecordBatch) -> dict[str, list | np.ndarray]:
     """Convert a RecordBatch to a dictionary of lists, unlike pa.lib.RecordBatch.to_pydict,
     this function will not convert numpy arrays to lists, and will preserve the original numpy dtype.

pixeltable/utils/lancedb.py ADDED Viewed

@@ -0,0 +1,88 @@
+from __future__ import annotations
+import logging
+import shutil
+from pathlib import Path
+from typing import Literal
+import pixeltable as pxt
+import pixeltable.exceptions as excs
+from pixeltable.catalog import Catalog
+from pixeltable.env import Env
+_logger = logging.getLogger('pixeltable')
+def export_lancedb(
+    table_or_df: pxt.Table | pxt.DataFrame,
+    db_uri: Path,
+    table_name: str,
+    batch_size_bytes: int = 128 * 2**20,
+    if_exists: Literal['error', 'overwrite', 'append'] = 'error',
+) -> None:
+    """
+    Exports a dataframe's data to a LanceDB table.
+    This utilizes LanceDB's streaming interface for efficient table creation, via a sequence of in-memory pyarrow
+    `RecordBatches`, the size of which can be controlled with the `batch_size_bytes` parameter.
+    __Requirements:__
+    - `pip install lancedb`
+    Args:
+        table_or_df : Table or Dataframe to export.
+        db_uri: Local Path to the LanceDB database.
+        table_name : Name of the table in the LanceDB database.
+        batch_size_bytes : Maximum size in bytes for each batch.
+        if_exists: Determines the behavior if the table already exists. Must be one of the following:
+            - `'error'`: raise an error
+            - `'overwrite'`: overwrite the existing table
+            - `'append'`: append to the existing table
+    """
+    Env.get().require_package('lancedb')
+    import lancedb  # type: ignore[import-untyped]
+    from pixeltable.utils.arrow import to_arrow_schema, to_record_batches
+    if if_exists not in ('error', 'overwrite', 'append'):
+        raise excs.Error("export_lancedb(): 'if_exists' must be one of: ['error', 'overwrite', 'append']")
+    df: pxt.DataFrame
+    if isinstance(table_or_df, pxt.catalog.Table):
+        df = table_or_df._df()
+    else:
+        df = table_or_df
+    db_exists = False
+    if db_uri.exists():
+        if not db_uri.is_dir():
+            raise excs.Error(f"export_lancedb(): '{db_uri!s}' exists and is not a directory")
+        db_exists = True
+    try:
+        db = lancedb.connect(str(db_uri))
+        lance_tbl: lancedb.LanceTable | None = None
+        try:
+            lance_tbl = db.open_table(table_name)
+            if if_exists == 'error':
+                raise excs.Error(f'export_lancedb(): table {table_name!r} already exists in {db_uri!r}')
+        except ValueError:
+            # table doesn't exist
+            pass
+        with Catalog.get().begin_xact(for_write=False):
+            if lance_tbl is None or if_exists == 'overwrite':
+                mode = 'overwrite' if lance_tbl is not None else 'create'
+                arrow_schema = to_arrow_schema(df.schema)
+                _ = db.create_table(table_name, to_record_batches(df, batch_size_bytes), schema=arrow_schema, mode=mode)
+            else:
+                lance_tbl.add(to_record_batches(df, batch_size_bytes))
+    except Exception as e:
+        # cleanup
+        if not db_exists:
+            shutil.rmtree(db_uri)
+        raise e

{pixeltable-0.4.12.dist-info → pixeltable-0.4.13.dist-info}/METADATA RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.4
 Name: pixeltable
-Version: 0.4.12
+Version: 0.4.13
 Summary: AI Data Infrastructure: Declarative, Multimodal, and Incremental
 Project-URL: homepage, https://pixeltable.com/
 Project-URL: repository, https://github.com/pixeltable/pixeltable
@@ -55,44 +55,41 @@ Requires-Dist: toml>=0.10
 Requires-Dist: tqdm>=4.64
 Description-Content-Type: text/markdown
-<div align="center">
-<img src="https://raw.githubusercontent.com/pixeltable/pixeltable/main/docs/resources/pixeltable-logo-large.png"
-     alt="Pixeltable Logo" width="50%" />
-<br></br>
+<picture class="github-only">
+  <source media="(prefers-color-scheme: light)" srcset="https://github.com/user-attachments/assets/e9bf82b2-cace-4bd8-9523-b65495eb8131">
+  <source media="(prefers-color-scheme: dark)" srcset="https://github.com/user-attachments/assets/c5ab123e-806c-49bf-93e7-151353719b16">
+  <img alt="Pixeltable Logo" src="https://github.com/user-attachments/assets/e9bf82b2-cace-4bd8-9523-b65495eb8131" width="40%">
+</picture>
-<h2>Declarative Data Infrastructure for Multimodal AI Apps</h2>
+<div>
+<br>
+</div>
+The only open source Python library providing declarative data infrastructure for building multimodal AI applications, enabling incremental storage, transformation, indexing, retrieval, and orchestration of data.
 [![License](https://img.shields.io/badge/License-Apache%202.0-0530AD.svg)](https://opensource.org/licenses/Apache-2.0)
-![PyPI - Python Version](https://img.shields.io/pypi/pyversions/pixeltable?logo=python&logoColor=white&)
-![Platform Support](https://img.shields.io/badge/platform-Linux%20%7C%20macOS%20%7C%20Windows-E5DDD4)
-<br>
 [![tests status](https://github.com/pixeltable/pixeltable/actions/workflows/pytest.yml/badge.svg)](https://github.com/pixeltable/pixeltable/actions/workflows/pytest.yml)
 [![nightly status](https://github.com/pixeltable/pixeltable/actions/workflows/nightly.yml/badge.svg)](https://github.com/pixeltable/pixeltable/actions/workflows/nightly.yml)
 [![stress-tests status](https://github.com/pixeltable/pixeltable/actions/workflows/stress-tests.yml/badge.svg)](https://github.com/pixeltable/pixeltable/actions/workflows/stress-tests.yml)
 [![PyPI Package](https://img.shields.io/pypi/v/pixeltable?color=4D148C)](https://pypi.org/project/pixeltable/)
 [![My Discord (1306431018890166272)](https://img.shields.io/badge/💬-Discord-%235865F2.svg)](https://discord.gg/QPyqFYx2UN)
-[**Installation**](https://docs.pixeltable.com/docs/overview/installation) |
 [**Quick Start**](https://docs.pixeltable.com/docs/overview/quick-start) |
 [**Documentation**](https://docs.pixeltable.com/) |
 [**API Reference**](https://pixeltable.github.io/pixeltable/) |
-[**Examples**](https://docs.pixeltable.com/docs/examples/use-cases) |
+[**Sample Apps**](https://github.com/pixeltable/pixeltable/tree/main/docs/sample-apps) |
 [**Discord Community**](https://discord.gg/QPyqFYx2UN)
-</div>
 ---
-## 💾 Installation
+## Installation
 ```python
 pip install pixeltable
 ```
+Pixeltable replaces the complex multi-system architecture typically needed for AI applications (databases, file storage, vector DBs, APIs, orchestration) with a single declarative table interface that natively handles multimodal data like images, videos, and documents.
-**Pixeltable unifies storage, retrieval, and orchestration for multimodal data.**
-It stores metadata and computed results persistently, typically in a `.pixeltable` directory in your workspace.
-## Pixeltable Demo
+## Demo
 https://github.com/user-attachments/assets/b50fd6df-5169-4881-9dbe-1b6e5d06cede
@@ -152,7 +149,7 @@ results = t.select(
 ).collect()
 ```
-## ✨ What Happened?
+## What Happened?
 * **Data Ingestion & Storage:** References [files](https://docs.pixeltable.com/docs/datastore/bringing-data)
     (images, videos, audio, docs) in place, handles structured data.
@@ -174,7 +171,7 @@ as in the `insert` statement above, Pixeltable caches them locally before proces
 [Working with External Files](https://github.com/pixeltable/pixeltable/blob/main/docs/notebooks/feature-guides/working-with-external-files.ipynb)
 notebook for more details.
-## 🗄️ Where Did My Data Go?
+## Where Did My Data Go?
 Pixeltable workloads generate various outputs, including both structured outputs (such as bounding boxes for detected
 objects) and/or unstructured outputs (such as generated images or video). By default, everything resides in your
@@ -186,125 +183,163 @@ a unified table interface over both structured and unstructured data.
 In general, the user is not expected to interact directly with the data in `~/.pixeltable`; the data store is fully
 managed by Pixeltable and is intended to be accessed through the Pixeltable Python SDK.
-## ⚖️ Key Principles
+## Key Principles
-* **[Unified Multimodal Interface:](https://docs.pixeltable.com/docs/datastore/tables-and-operations)** `pxt.Image`,
-    `pxt.Video`, `pxt.Audio`, `pxt.Document`, etc. – manage diverse data consistently.
+**[Unified Multimodal Interface:](https://docs.pixeltable.com/docs/datastore/tables-and-operations)** `pxt.Image`,
+`pxt.Video`, `pxt.Audio`, `pxt.Document`, etc. – manage diverse data consistently.
-    ```python
-    t = pxt.create_table(
-        'media',
-        {
-            'img': pxt.Image,
-            'video': pxt.Video
-        }
-    )
-    ```
+```python
+t = pxt.create_table(
+   'media',
+   {
+       'img': pxt.Image,
+       'video': pxt.Video
+   }
+)
+```
-* **[Declarative Computed Columns:](https://docs.pixeltable.com/docs/datastore/computed-columns)** Define processing
-    steps once; they run automatically on new/updated data.
+**[Declarative Computed Columns:](https://docs.pixeltable.com/docs/datastore/computed-columns)** Define processing
+steps once; they run automatically on new/updated data.
-    ```python
-    t.add_computed_column(
-        classification=huggingface.vit_for_image_classification(
-            t.image
-        )
-    )
-    ```
+```python
+t.add_computed_column(
+   classification=huggingface.vit_for_image_classification(
+       t.image
+   )
+)
+```
-* **[Built-in Vector Search:](https://docs.pixeltable.com/docs/datastore/embedding-index)** Add embedding indexes and
-    perform similarity searches directly on tables/views.
+**[Built-in Vector Search:](https://docs.pixeltable.com/docs/datastore/embedding-index)** Add embedding indexes and
+perform similarity searches directly on tables/views.
-    ```python
-    t.add_embedding_index(
-        'img',
-        embedding=clip.using(
-            model_id='openai/clip-vit-base-patch32'
-        )
-    )
+```python
+t.add_embedding_index(
+   'img',
+   embedding=clip.using(
+       model_id='openai/clip-vit-base-patch32'
+   )
+)
-    sim = t.img.similarity("cat playing with yarn")
-    ```
+sim = t.img.similarity("cat playing with yarn")
+```
-* **[On-the-Fly Data Views:](https://docs.pixeltable.com/docs/datastore/views)** Create virtual tables using iterators
-    for efficient processing without data duplication.
+**[Incremental View Maintenance:](https://docs.pixeltable.com/docs/datastore/views)** Create virtual tables using iterators
+for efficient processing without data duplication.
-    ```python
-    frames = pxt.create_view(
-        'frames',
-        videos,
-        iterator=FrameIterator.create(
-            video=videos.video,
-            fps=1
-        )
-    )
-    ```
+```python
+# Document chunking with overlap & metadata and many more options to build your own iterator
+chunks = pxt.create_view('chunks', docs,
+   iterator=DocumentSplitter.create(
+       document=docs.doc,
+       separators='sentence,token_limit',
+       overlap=50, limit=500
+   ))
+# Video frame extraction
+frames = pxt.create_view('frames', videos,
+   iterator=FrameIterator.create(video=videos.video, fps=0.5))
+```
-* **[Seamless AI Integration:](https://docs.pixeltable.com/docs/integrations/frameworks)** Built-in functions for
-    OpenAI, Anthropic, Hugging Face, CLIP, YOLOX, and more.
+**[Seamless AI Integration:](https://docs.pixeltable.com/docs/integrations/frameworks)** Built-in functions for
+OpenAI, Anthropic, Hugging Face, CLIP, YOLOX, and more.
-    ```python
-    t.add_computed_column(
-        response=openai.chat_completions(
-            messages=[{"role": "user", "content": t.prompt}]
-        )
-    )
-    ```
+```python
+# LLM integration (OpenAI, Anthropic, etc.)
+t.add_computed_column(
+   response=openai.chat_completions(
+       messages=[{"role": "user", "content": t.prompt}], model='gpt-4o-mini'
+   )
+)
-* **[Bring Your Own Code:](https://docs.pixeltable.com/docs/datastore/custom-functions)** Extend Pixeltable with simple
-    Python User-Defined Functions.
+# Computer vision (YOLOX object detection)
+t.add_computed_column(
+   detections=yolox(t.image, model_id='yolox_s', threshold=0.5)
+)
-    ```python
-    @pxt.udf
-    def format_prompt(context: list, question: str) -> str:
-        return f"Context: {context}\nQuestion: {question}"
-    ```
+# Embedding models (Hugging Face, CLIP)
+t.add_computed_column(
+   embeddings=huggingface.sentence_transformer(
+       t.text, model_id='all-MiniLM-L6-v2'
+   )
+)
+```
-* **[Agentic Workflows / Tool Calling:](https://docs.pixeltable.com/docs/examples/chat/tools)** Register `@pxt.udf` or
-    `@pxt.query` functions as tools and orchestrate LLM-based tool use (incl. multimodal).
+**[Bring Your Own Code:](https://docs.pixeltable.com/docs/datastore/custom-functions)** Extend Pixeltable with UDFs, batch processing, and custom aggregators.
+```python
+@pxt.udf
+def format_prompt(context: list, question: str) -> str:
+   return f"Context: {context}\nQuestion: {question}"
+```
-    ```python
-    # Example tools: a UDF and a Query function for RAG
-    tools = pxt.tools(get_weather_udf, search_context_query)
+**[Agentic Workflows / Tool Calling:](https://docs.pixeltable.com/docs/examples/chat/tools)** Register `@pxt.udf`,
+`@pxt.query` functions, or **MCP tools** as tools.
-    # LLM decides which tool to call; Pixeltable executes it
-    t.add_computed_column(
-        tool_output=invoke_tools(tools, t.llm_tool_choice)
-    )
-    ```
-* **[Data Persistence:](https://docs.pixeltable.com/docs/datastore/tables-and-operations#data-operations)** All data,
-    metadata, and computed results are automatically stored and versioned.
-    ```python
-    t = pxt.get_table('my_table')  # Get a handle to an existing table
-    t.select(t.account, t.balance).collect()  # Query its contents
-    t.revert()  # Undo the last modification to the table and restore its previous state
-    ```
-* **[Time Travel:](https://docs.pixeltable.com/docs/datastore/tables-and-operations#data-operations)** By default,
-    Pixeltable preserves the full change history of each table, and any prior version can be selected and queried.
-    ```python
-    t.history()  # Display a human-readable list of all prior versions of the table
-    old_version = pxt.get_table('my_table:472')  # Get a handle to a specific table version
-    old_version.select(t.account, t.balance).collect()  # Query the older version
-    ```
-* **[SQL-like Python Querying:](https://docs.pixeltable.com/docs/datastore/filtering-and-selecting)** Familiar syntax
-    combined with powerful AI capabilities.
-    ```python
-    results = (
-        t.where(t.score > 0.8)
-        .order_by(t.timestamp)
-        .select(t.image, score=t.score)
-        .limit(10)
-        .collect()
-    )
-    ```
+```python
+# Example tools: UDFs, Query functions, and MCP tools
+mcp_tools = pxt.mcp_udfs('http://localhost:8000/mcp')  # Load from MCP server
+tools = pxt.tools(get_weather_udf, search_context_query, *mcp_tools)
+# LLM decides which tool to call; Pixeltable executes it
+t.add_computed_column(
+   tool_output=invoke_tools(tools, t.llm_tool_choice)
+)
+```
+**[Data Persistence:](https://docs.pixeltable.com/docs/datastore/tables-and-operations#data-operations)** All data,
+metadata, and computed results are automatically stored and versioned.
+```python
+t = pxt.get_table('my_table')  # Get a handle to an existing table
+t.select(t.account, t.balance).collect()  # Query its contents
+t.revert()  # Undo the last modification to the table and restore its previous state
+```
+**[Time Travel:](https://docs.pixeltable.com/docs/datastore/tables-and-operations#data-operations)** By default,
+Pixeltable preserves the full change history of each table, and any prior version can be selected and queried.
+```python
+t.history()  # Display a human-readable list of all prior versions of the table
+old_version = pxt.get_table('my_table:472')  # Get a handle to a specific table version
+old_version.select(t.account, t.balance).collect()  # Query the older version
+```
+**[SQL-like Python Querying:](https://docs.pixeltable.com/docs/datastore/filtering-and-selecting)** Familiar syntax
+combined with powerful AI capabilities.
+```python
+results = (
+   t.where(t.score > 0.8)
+   .order_by(t.timestamp)
+   .select(t.image, score=t.score)
+   .limit(10)
+   .collect()
+)
+```
+**[I/O & Integration:](https://pixeltable.github.io/pixeltable/pixeltable/io/)** Export to multiple
+formats and integrate with ML/AI tools ecosystem.
+```python
+# Export to analytics/ML formats
+pxt.export_parquet(table, 'data.parquet', partition_size_bytes=100_000_000)
+pxt.export_lancedb(table, 'vector_db')
+# DataFrame conversions
+results = table.select(table.image, table.labels).collect()
+df = results.to_pandas()                           # → pandas DataFrame
+models = results.to_pydantic(MyModel)              # → Pydantic models
+# Specialized ML dataset formats
+coco_path = table.to_coco_dataset()                # → COCO annotations
+pytorch_ds = table.to_pytorch_dataset('pt')        # → PyTorch DataLoader ready
+# ML tool integrations
+pxt.create_label_studio_project(table, label_config)  # Annotation
+pxt.export_images_as_fo_dataset(table, table.image)   # FiftyOne
+```
-## 💡 Key Examples
+## Key Examples
 *(See the [Full Quick Start](https://docs.pixeltable.com/docs/overview/quick-start) or
 [Notebook Gallery](#-notebook-gallery) for more details)*
@@ -497,7 +532,7 @@ print("--- Final Answer ---")
 print(qa.select(qa.answer).collect())
 ```
-## 📚 Notebook Gallery
+## Notebook Gallery
 Explore Pixeltable's capabilities interactively:
@@ -514,7 +549,7 @@ Explore Pixeltable's capabilities interactively:
 | Object Detection | <a target="_blank" href="https://colab.research.google.com/github/pixeltable/pixeltable/blob/release/docs/notebooks/use-cases/object-detection-in-videos.ipynb"> <img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/> </a> | Image/Text Search | <a target="_blank" href="https://github.com/pixeltable/pixeltable/tree/main/docs/sample-apps/text-and-image-similarity-search-nextjs-fastapi">  <img src="https://img.shields.io/badge/🖥️%20App-black.svg" alt="GitHub App"/> |
 | Audio Transcription | <a target="_blank" href="https://colab.research.google.com/github/pixeltable/pixeltable/blob/release/docs/notebooks/use-cases/audio-transcriptions.ipynb">  <img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/> | Discord Bot | <a target="_blank" href="https://github.com/pixeltable/pixeltable/blob/main/docs/sample-apps/context-aware-discord-bot"> <img src="https://img.shields.io/badge/%F0%9F%92%AC%20Bot-%235865F2.svg" alt="GitHub App"/></a> |
-## 🚨 Maintaining Production-Ready Multimodal AI Apps is Still Too Hard
+## Maintaining Production-Ready Multimodal AI Apps is Still Too Hard
 Building robust AI applications, especially [multimodal](https://docs.pixeltable.com/docs/datastore/bringing-data) ones,
 requires stitching together numerous tools:
@@ -528,7 +563,7 @@ requires stitching together numerous tools:
 This complex "data plumbing" slows down development, increases costs, and makes applications brittle and hard to reproduce.
-## 🔮 Roadmap (2025)
+## Roadmap (2025)
 ### Cloud Infrastructure and Deployment
@@ -538,13 +573,13 @@ We're working on a hosted Pixeltable service that will:
 * Provide a persistent cloud instance
 * Turn Pixeltable workflows (Tables, Queries, UDFs) into API endpoints/[MCP Servers](https://github.com/pixeltable/pixeltable-mcp-server)
-## 🤝 Contributing
+## Contributing
 We love contributions! Whether it's reporting bugs, suggesting features, improving documentation, or submitting code
 changes, please check out our [Contributing Guide](CONTRIBUTING.md) and join the
 [Discussions](https://github.com/pixeltable/pixeltable/discussions) or our
 [Discord Server](https://discord.gg/QPyqFYx2UN).
-## 🏢 License
+## License
 Pixeltable is licensed under the Apache 2.0 License.

{pixeltable-0.4.12.dist-info → pixeltable-0.4.13.dist-info}/RECORD RENAMED Viewed

@@ -1,15 +1,15 @@
-pixeltable/__init__.py,sha256=wJ_4oQdkBAaaVKM8XiZKKSsWPnoemZxh34o6_5vDcxk,1562
+pixeltable/__init__.py,sha256=PDfphK_WypPopRbBNhJ0wXiX5T9Vp4Vq9Hf8Oz_oXZA,1620
 pixeltable/__version__.py,sha256=LnMIuAxx6nAQDMev_jnZyUdgsaiE3F8lulfXQBRl9qQ,112
 pixeltable/config.py,sha256=-aoSVF0Aak83IC-u-XANw3if76TDq5VnnWNWoFDR5Hc,8390
 pixeltable/dataframe.py,sha256=XbrzPjnPgZKJ5lVgPO71cK-nRHCpqGCGWFc52kUO8_E,64213
-pixeltable/env.py,sha256=FlE7s649xBiE5WSs65WwQ4bKbPjMYQaF0Z0HeuEuCs4,44160
+pixeltable/env.py,sha256=LUTOi3DcinsVFoqiOmsG8Dlhe8yWBEfgIdY9rOlJMME,44203
 pixeltable/exceptions.py,sha256=Gm8d3TL2iiv6Pj2DLd29wp_j41qNBhxXL9iTQnL4Nk4,1116
-pixeltable/globals.py,sha256=nR6XJKFlsb12oo_wOWAoAMlnPbHY7FhM3dgEKoM9iSM,39262
+pixeltable/globals.py,sha256=dktqUbpsiLorB4-1VjYDp7LH0rfqfh_3c8OD819K_H4,39183
 pixeltable/plan.py,sha256=4yAe7ExAqaSvkFxwK7LPH_HpmoumwqoLeOo7czJ8CyQ,48001
 pixeltable/py.typed,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
 pixeltable/store.py,sha256=CneWUmgN-EwaPYLcizlAxONC7WYwMr8SNpSFeNBBmOA,22885
 pixeltable/type_system.py,sha256=UfPZZy4zJ2kGvdHXI9rqxOGAjgIxCZ9QGvvidPWcq-M,56153
-pixeltable/catalog/__init__.py,sha256=zw6hiyAIjMBxCExtsr7G51ul2XQ9fTQQKcs45rIy7xA,682
+pixeltable/catalog/__init__.py,sha256=GL0MLxqCBHlhKWqhC3e9B4kwTazagTOiqBHHRjyWbTg,726
 pixeltable/catalog/catalog.py,sha256=gaq10XFwkr6jyv8yVi5xV3_oiDkPvqVe55vxOo14W6k,93853
 pixeltable/catalog/column.py,sha256=MXa5o3ku94T8ZFEL7wnAvqvlk65fOmmHPqIvrUVf3uo,13514
 pixeltable/catalog/dir.py,sha256=VYTscPlKR6XhupPTXlJ8txAHxS5GSpPJ3LIleDJagVQ,2047
@@ -18,7 +18,8 @@ pixeltable/catalog/insertable_table.py,sha256=VUuJ8z7OtMqgy_LMzkn1KzeLXdR-9poTtt
 pixeltable/catalog/named_function.py,sha256=vZ-j7P4HugWh9OmUzBMwyRYvO3tQn9jWyJz_1stPavU,1210
 pixeltable/catalog/path.py,sha256=O3FfxrvyX2crijBhp_2k4-3mG3BFxwba-tlPB74QtJQ,3780
 pixeltable/catalog/schema_object.py,sha256=rQ6-3rzqnOHyEEHi97kai2S7BO3D9AkH7rirnfbGc14,1785
-pixeltable/catalog/table.py,sha256=Ug65hRZhzjp3sIUSppA-mXUEWLXgPK22bq22f7WFy0M,81816
+pixeltable/catalog/table.py,sha256=phOf59IZJO7xPPR91F2trJpA4TC9lic-dd13mbiUz5Q,80222
+pixeltable/catalog/table_metadata.py,sha256=MVxJLS6Tz2PVOerlnoOOjjhq6LxUdDLeN0BUJf42Smw,3518
 pixeltable/catalog/table_version.py,sha256=SRF2ACp_DcPMLTbc4dbZSgYEfW6-o-UzDOBehecKbb0,65073
 pixeltable/catalog/table_version_handle.py,sha256=FTPRqcGY-h-POcWyZbd9b8P2D5zIw5OSUvwF_dbyCGo,3608
 pixeltable/catalog/table_version_path.py,sha256=IaFVDH06_6ZMuBv5eLNCRTlWizpvz95jgAzqp4OVx_o,9713
@@ -113,16 +114,17 @@ pixeltable/index/__init__.py,sha256=97aFuxiP_oz1ldn5iq8IWApkOV8XG6ZIBW5-9rkS0vM,
 pixeltable/index/base.py,sha256=200s7v3Zy810bRlbSAYzxxaEjVssl6r8esTHiSvWRwQ,1704
 pixeltable/index/btree.py,sha256=8B06D67ay0DFUtEBC5q4bLjxMq7ILpKyyoLAiSaamzA,2503
 pixeltable/index/embedding_index.py,sha256=B_k_3UJmSv7t2ljUg8GC_D4t1jc03PVsTAvxqiTmHBA,11754
-pixeltable/io/__init__.py,sha256=chVGh3ygtZwSY6g_skIyCsjxwzo2847jDq9YGObAY98,608
+pixeltable/io/__init__.py,sha256=SO9xvWuQHfg_YyVahDmstB3lSuMoPKRarW8qgUR81jM,655
 pixeltable/io/datarows.py,sha256=s2fDQTttGxq7cS5JwKFEJRSKn6WsXTaGdmm9VJSl_2M,6154
 pixeltable/io/external_store.py,sha256=rOYBwTqcZZVU2toWxJ_9Iy2w2YO0DhuABrM2xGmqHSo,14787
 pixeltable/io/fiftyone.py,sha256=JcAL9zFszSTcsws6ioF1KZZJFmUeg-11W-c4Gyh3FyQ,6891
 pixeltable/io/globals.py,sha256=B9ubI9Z0m2wGPZXWmZm10vlaP0UCuUsVyrMWvyudZSc,11360
 pixeltable/io/hf_datasets.py,sha256=5WfWfXoQppG1Bx_pS5n44KO1Vo_mEb_S82PLB8cLfAU,5606
 pixeltable/io/label_studio.py,sha256=OCQBVgGjXRSdukFQv2ZKdaBmpxanqH9ibDLxZd1L3mc,31469
+pixeltable/io/lancedb.py,sha256=kNcYXptieMlJ6yxEIZHVFklEMOEB2mrSyp7XZmOw4qs,82
 pixeltable/io/pandas.py,sha256=xQmkwbqE9_fjbbPUgeG5yNICrbVVK73UHxDL-cgrQw0,9007
-pixeltable/io/parquet.py,sha256=qoVDuCoW-Tq14IlzN_psoNP7z83hIQ3ZEg_pKzHSqoY,7796
-pixeltable/io/table_data_conduit.py,sha256=--UWwG6agBtOA5PLPfjxp2XKoAQ-f5nSPJqOgA5DAAI,22062
+pixeltable/io/parquet.py,sha256=qVvg9nixJnK9gXYxZocD8HE13SznyLrgW9IsehtT4j4,4101
+pixeltable/io/table_data_conduit.py,sha256=8jwQ3IOoOBS-8j2TEfgiqsFUD85kEP5IjoC0dg2uPEk,22058
 pixeltable/io/utils.py,sha256=qzBTmqdIawXMt2bfXQOraYnEstL69eC2Z33nl8RrwJk,4244
 pixeltable/iterators/__init__.py,sha256=hI937cmBRU3eWbfJ7miFthAGUo_xmcYciw6gAjOCg9g,470
 pixeltable/iterators/audio.py,sha256=HYE8JcqaJsTGdrq4NkwV5tn7lcyMp6Fjrm59efOLzb0,9671
@@ -171,7 +173,7 @@ pixeltable/share/__init__.py,sha256=PTX1mw61Ss4acEOI-sUlu0HaoVsosLqwDfh0ldn8Hkg,
 pixeltable/share/packager.py,sha256=5rSKnQCs3YP5h48d79bXEK4L8tLUSeTSbXaB8X9SmBI,31265
 pixeltable/share/publish.py,sha256=VE_H3ux56gdSHd8_ganxCnNYtxrjaalMPgwAIYmdbE8,11300
 pixeltable/utils/__init__.py,sha256=45qEM20L2VuIe-Cc3BTKWFqQb-S7A8qDtmmgl77zYK0,1728
-pixeltable/utils/arrow.py,sha256=Rooa02GL5k--D2utlKATtYKrrlsHbbi6JmkarXMux1M,6384
+pixeltable/utils/arrow.py,sha256=U7vb_ffPCR7zv-phyBMPMDosPdKN6LK4IVMpfm2mRy8,10424
 pixeltable/utils/av.py,sha256=omJufz62dzaTTwlR7quKfcT7apf8KkBLJ9cQ9240dt0,4016
 pixeltable/utils/coco.py,sha256=Y1DWVYguZD4VhKyf7JruYfHWvhkJLq39fzbiSm5cdyY,7304
 pixeltable/utils/code.py,sha256=3CZMVJm69JIG5sxmd56mjB4Fo4L-s0_Y8YvQeJIj0F0,1280
@@ -185,14 +187,15 @@ pixeltable/utils/filecache.py,sha256=3TTEqhGg0pEAP_l0GKn34uspC4dha1jPab1Ka9_oTBM
 pixeltable/utils/formatter.py,sha256=tbMxE9rBw6wdKUnJhNZ8h9uAF8dZKcihQ2KesqAag9A,10096
 pixeltable/utils/http_server.py,sha256=6khOAtpVj1lDIm9Dx8VIECLm87cFEp4IFbAg8T92A2o,2441
 pixeltable/utils/iceberg.py,sha256=COeNqqy5RRMkDGLS8CTnaUeAccG10x2fwP3e1veuqIA,522
+pixeltable/utils/lancedb.py,sha256=Otr-t47YACRo0Cq9-FyelcUuan1Kgs4gxCOpLOckj3s,2988
 pixeltable/utils/media_store.py,sha256=-rYfpZOUrWU1YtEFrxdrn9Na0NeyRW3HJYsOdH-kJO4,10898
 pixeltable/utils/pydantic.py,sha256=-ztUsuRXA7B6bywb5Yy1h5pNQ2DnsT1d0oHMxqtK3WY,2011
 pixeltable/utils/pytorch.py,sha256=564VHRdDHwD9h0v5lBHEDTJ8c6zx8wuzWYx8ZYjBxlI,3621
 pixeltable/utils/s3.py,sha256=pxip2MlCqd2Qon2dzJXzfxvwtZyc-BAsjAnLL4J_OXY,587
 pixeltable/utils/sql.py,sha256=Sa4Lh-VGe8GToU5W7DRiWf2lMl9B6saPqemiT0ZdHEc,806
 pixeltable/utils/transactional_directory.py,sha256=OFKmu90oP7KwBAljwjnzP_w8euGdAXob3y4Nx9SCNHA,1357
-pixeltable-0.4.12.dist-info/METADATA,sha256=mJSdwTquIKJqX6iziu0ZKHMg0guXT1HrGwqNCGvjZJw,24248
-pixeltable-0.4.12.dist-info/WHEEL,sha256=qtCwoSJWgHk21S1Kb4ihdzI2rlJ1ZKaIurTj_ngOhyQ,87
-pixeltable-0.4.12.dist-info/entry_points.txt,sha256=rrKugZmxDtGnXCnEQ5UJMaaSYY7-g1cLjUZ4W1moIhM,98
-pixeltable-0.4.12.dist-info/licenses/LICENSE,sha256=xx0jnfkXJvxRnG63LTGOxlggYnIysveWIZ6H3PNdCrQ,11357
-pixeltable-0.4.12.dist-info/RECORD,,
+pixeltable-0.4.13.dist-info/METADATA,sha256=VSQp0eAebSMwoxcFkjAwTQbtuLISMx-PZ-LoCJo55hg,25631
+pixeltable-0.4.13.dist-info/WHEEL,sha256=qtCwoSJWgHk21S1Kb4ihdzI2rlJ1ZKaIurTj_ngOhyQ,87
+pixeltable-0.4.13.dist-info/entry_points.txt,sha256=rrKugZmxDtGnXCnEQ5UJMaaSYY7-g1cLjUZ4W1moIhM,98
+pixeltable-0.4.13.dist-info/licenses/LICENSE,sha256=xx0jnfkXJvxRnG63LTGOxlggYnIysveWIZ6H3PNdCrQ,11357
+pixeltable-0.4.13.dist-info/RECORD,,

{pixeltable-0.4.12.dist-info → pixeltable-0.4.13.dist-info}/WHEEL RENAMED Viewed

File without changes

{pixeltable-0.4.12.dist-info → pixeltable-0.4.13.dist-info}/entry_points.txt RENAMED Viewed

File without changes

{pixeltable-0.4.12.dist-info → pixeltable-0.4.13.dist-info}/licenses/LICENSE RENAMED Viewed

File without changes

pixeltable 0.4.12__py3-none-any.whl → 0.4.13__py3-none-any.whl

Potentially problematic release.

pixeltable 0.4.12py3-none-any.whl → 0.4.13py3-none-any.whl