PyPI - pixeltable - Versions diffs - 0.4.4__py3-none-any.whl → 0.4.5__py3-none-any.whl - Mend

pixeltable 0.4.4py3-none-any.whl → 0.4.5py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.

This version of pixeltable might be problematic. Click here for more details.

Files changed (39) hide show

pixeltable/__version__.py +2 -2
pixeltable/catalog/catalog.py +106 -71
pixeltable/catalog/path.py +59 -20
pixeltable/catalog/schema_object.py +1 -0
pixeltable/catalog/table.py +6 -0
pixeltable/catalog/table_version.py +2 -1
pixeltable/catalog/view.py +21 -10
pixeltable/config.py +12 -4
pixeltable/dataframe.py +57 -1
pixeltable/env.py +25 -13
pixeltable/exec/aggregation_node.py +1 -1
pixeltable/exec/cache_prefetch_node.py +2 -6
pixeltable/exec/component_iteration_node.py +4 -3
pixeltable/exec/data_row_batch.py +10 -53
pixeltable/exec/expr_eval/expr_eval_node.py +2 -2
pixeltable/exec/in_memory_data_node.py +13 -11
pixeltable/exec/sql_node.py +6 -7
pixeltable/exprs/data_row.py +13 -13
pixeltable/exprs/row_builder.py +16 -4
pixeltable/exprs/string_op.py +1 -1
pixeltable/func/expr_template_function.py +1 -4
pixeltable/functions/date.py +1 -1
pixeltable/functions/math.py +1 -1
pixeltable/functions/openai.py +8 -4
pixeltable/functions/timestamp.py +6 -6
pixeltable/globals.py +14 -10
pixeltable/metadata/schema.py +1 -1
pixeltable/plan.py +5 -14
pixeltable/share/packager.py +13 -13
pixeltable/store.py +9 -6
pixeltable/type_system.py +2 -1
pixeltable/utils/filecache.py +1 -1
pixeltable/utils/http_server.py +2 -3
pixeltable/utils/media_store.py +84 -39
{pixeltable-0.4.4.dist-info → pixeltable-0.4.5.dist-info}/METADATA +1 -1
{pixeltable-0.4.4.dist-info → pixeltable-0.4.5.dist-info}/RECORD +39 -39
{pixeltable-0.4.4.dist-info → pixeltable-0.4.5.dist-info}/LICENSE +0 -0
{pixeltable-0.4.4.dist-info → pixeltable-0.4.5.dist-info}/WHEEL +0 -0
{pixeltable-0.4.4.dist-info → pixeltable-0.4.5.dist-info}/entry_points.txt +0 -0

pixeltable/functions/timestamp.py CHANGED Viewed

@@ -237,12 +237,12 @@ def _(
     microsecond: sql.ColumnElement = _SQL_ZERO,
 ) -> sql.ColumnElement:
     return sql.func.make_timestamptz(
-        sql.cast(year, sql.Integer),
-        sql.cast(month, sql.Integer),
-        sql.cast(day, sql.Integer),
-        sql.cast(hour, sql.Integer),
-        sql.cast(minute, sql.Integer),
-        sql.cast(second + microsecond / 1000000.0, sql.Float),
+        year.cast(sql.Integer),
+        month.cast(sql.Integer),
+        day.cast(sql.Integer),
+        hour.cast(sql.Integer),
+        minute.cast(sql.Integer),
+        (second + microsecond / 1000000.0).cast(sql.Float),
     )

pixeltable/globals.py CHANGED Viewed

@@ -146,7 +146,7 @@ def create_table(
     if schema is not None and (len(schema) == 0 or not isinstance(schema, dict)):
         raise excs.Error('`schema` must be a non-empty dictionary')
-    path_obj = catalog.Path(path)
+    path_obj = catalog.Path.parse(path)
     if_exists_ = catalog.IfExistsParam.validated(if_exists, 'if_exists')
     media_validation_ = catalog.MediaValidation.validated(media_validation, 'media_validation')
     primary_key: Optional[list[str]] = normalize_primary_key_parameter(primary_key)
@@ -284,7 +284,7 @@ def create_view(
         raise excs.Error('`base` must be an instance of `Table` or `DataFrame`')
     assert isinstance(base, (catalog.Table, DataFrame))
-    path_obj = catalog.Path(path)
+    path_obj = catalog.Path.parse(path)
     if_exists_ = catalog.IfExistsParam.validated(if_exists, 'if_exists')
     media_validation_ = catalog.MediaValidation.validated(media_validation, 'media_validation')
@@ -445,8 +445,12 @@ def get_table(path: str) -> catalog.Table:
         Handles to views and snapshots are retrieved in the same way:
         >>> tbl = pxt.get_table('my_snapshot')
+        Get a handle to a specific version of a table:
+        >>> tbl = pxt.get_table('my_table:722')
     """
-    path_obj = catalog.Path(path)
+    path_obj = catalog.Path.parse(path, allow_versioned_path=True)
     tbl = Catalog.get().get_table(path_obj)
     return tbl
@@ -472,7 +476,7 @@ def move(path: str, new_path: str) -> None:
     """
     if path == new_path:
         raise excs.Error('move(): source and destination cannot be identical')
-    path_obj, new_path_obj = catalog.Path(path), catalog.Path(new_path)
+    path_obj, new_path_obj = catalog.Path.parse(path), catalog.Path.parse(new_path)
     if path_obj.is_ancestor(new_path_obj):
         raise excs.Error(f'move(): cannot move {path!r} into its own subdirectory')
     cat = Catalog.get()
@@ -525,7 +529,7 @@ def drop_table(
         assert isinstance(table, str)
         tbl_path = table
-    path_obj = catalog.Path(tbl_path)
+    path_obj = catalog.Path.parse(tbl_path)
     if_not_exists_ = catalog.IfNotExistsParam.validated(if_not_exists, 'if_not_exists')
     Catalog.get().drop_table(path_obj, force=force, if_not_exists=if_not_exists_)
@@ -557,7 +561,7 @@ def list_tables(dir_path: str = '', recursive: bool = True) -> list[str]:
 def _list_tables(dir_path: str = '', recursive: bool = True, allow_system_paths: bool = False) -> list[str]:
-    path_obj = catalog.Path(dir_path, empty_is_valid=True, allow_system_paths=allow_system_paths)
+    path_obj = catalog.Path.parse(dir_path, allow_empty_path=True, allow_system_path=allow_system_paths)
     contents = Catalog.get().get_dir_contents(path_obj, recursive=recursive)
     return [str(p) for p in _extract_paths(contents, parent=path_obj, entry_type=catalog.Table)]
@@ -609,7 +613,7 @@ def create_dir(
         >>> pxt.create_dir('parent1.parent2.sub_dir', parents=True)
     """
-    path_obj = catalog.Path(path)
+    path_obj = catalog.Path.parse(path)
     if_exists_ = catalog.IfExistsParam.validated(if_exists, 'if_exists')
     return Catalog.get().create_dir(path_obj, if_exists=if_exists_, parents=parents)
@@ -651,7 +655,7 @@ def drop_dir(path: str, force: bool = False, if_not_exists: Literal['error', 'ig
         >>> pxt.drop_dir('my_dir', force=True)
     """
-    path_obj = catalog.Path(path)  # validate format
+    path_obj = catalog.Path.parse(path)  # validate format
     if_not_exists_ = catalog.IfNotExistsParam.validated(if_not_exists, 'if_not_exists')
     Catalog.get().drop_dir(path_obj, if_not_exists=if_not_exists_, force=force)
@@ -670,7 +674,7 @@ def ls(path: str = '') -> pd.DataFrame:
     from pixeltable.metadata import schema
     cat = Catalog.get()
-    path_obj = catalog.Path(path, empty_is_valid=True)
+    path_obj = catalog.Path.parse(path, allow_empty_path=True)
     dir_entries = cat.get_dir_contents(path_obj)
     @retry_loop(for_write=False)
@@ -759,7 +763,7 @@ def list_dirs(path: str = '', recursive: bool = True) -> list[str]:
         >>> cl.list_dirs('my_dir', recursive=True)
         ['my_dir', 'my_dir.sub_dir1']
     """
-    path_obj = catalog.Path(path, empty_is_valid=True)  # validate format
+    path_obj = catalog.Path.parse(path, allow_empty_path=True)  # validate format
     cat = Catalog.get()
     contents = cat.get_dir_contents(path_obj, recursive=recursive)
     return [str(p) for p in _extract_paths(contents, parent=path_obj, entry_type=catalog.Dir)]

pixeltable/metadata/schema.py CHANGED Viewed

@@ -24,7 +24,7 @@ def md_from_dict(data_class_type: type[T], data: Any) -> T:
     """Re-instantiate a dataclass instance that contains nested dataclasses from a dict."""
     if dataclasses.is_dataclass(data_class_type):
         fieldtypes = get_type_hints(data_class_type)
-        return data_class_type(**{f: md_from_dict(fieldtypes[f], data[f]) for f in data})  # type: ignore[return-value]
+        return data_class_type(**{f: md_from_dict(fieldtypes[f], data[f]) for f in data})
     origin = typing.get_origin(data_class_type)
     if origin is not None:

pixeltable/plan.py CHANGED Viewed

@@ -385,14 +385,7 @@ class Planner:
             TableVersionHandle(tbl.id, tbl.effective_version), rows, row_builder, tbl.next_row_id
         )
-        media_input_col_info = [
-            exprs.ColumnSlotIdx(col_ref.col, col_ref.slot_idx)
-            for col_ref in row_builder.input_exprs
-            if isinstance(col_ref, exprs.ColumnRef) and col_ref.col_type.is_media_type()
-        ]
-        if len(media_input_col_info) > 0:
-            # prefetch external files for all input column refs
-            plan = exec.CachePrefetchNode(tbl.id, media_input_col_info, input=plan)
+        plan = cls._insert_prefetch_node(tbl.id, row_builder.input_exprs, input_node=plan)
         computed_exprs = row_builder.output_exprs - row_builder.input_exprs
         if len(computed_exprs) > 0:
@@ -789,15 +782,13 @@ class Planner:
     @classmethod
     def _insert_prefetch_node(
-        cls, tbl_id: UUID, row_builder: exprs.RowBuilder, input_node: exec.ExecNode
+        cls, tbl_id: UUID, expressions: Iterable[exprs.Expr], input_node: exec.ExecNode
     ) -> exec.ExecNode:
-        """Returns a CachePrefetchNode into the plan if needed, otherwise returns input"""
+        """Return a CachePrefetchNode if needed, otherwise return input"""
         # we prefetch external files for all media ColumnRefs, even those that aren't part of the dependencies
         # of output_exprs: if unstored iterator columns are present, we might need to materialize ColumnRefs that
         # aren't explicitly captured as dependencies
-        media_col_refs = [
-            e for e in list(row_builder.unique_exprs) if isinstance(e, exprs.ColumnRef) and e.col_type.is_media_type()
-        ]
+        media_col_refs = [e for e in expressions if isinstance(e, exprs.ColumnRef) and e.col_type.is_media_type()]
         if len(media_col_refs) == 0:
             return input_node
         # we need to prefetch external files for media column types
@@ -967,7 +958,7 @@ class Planner:
                 stratify_exprs=analyzer.stratify_exprs,
             )
-        plan = cls._insert_prefetch_node(tbl.tbl_version.id, row_builder, plan)
+        plan = cls._insert_prefetch_node(tbl.tbl_version.id, row_builder.unique_exprs, plan)
         if analyzer.group_by_clause is not None:
             # we're doing grouping aggregation; the input of the AggregateNode are the grouping exprs plus the

pixeltable/share/packager.py CHANGED Viewed

@@ -1,7 +1,6 @@
 import base64
 import datetime
 import io
-import itertools
 import json
 import logging
 import tarfile
@@ -237,8 +236,7 @@ class TablePackager:
         - Videos are replaced by their first frame and resized as above
         - Documents are replaced by a thumbnail as a base64-encoded webp
         """
-        # First 8 columns
-        preview_cols = dict(itertools.islice(self.table._get_schema().items(), 0, 8))
+        preview_cols = self.table._get_schema()
         select_list = [self.table[col_name] for col_name in preview_cols]
         # First 5 rows
         rows = list(self.table.select(*select_list).head(n=5))
@@ -369,7 +367,7 @@ class TableRestorer:
         with cat.begin_xact(for_write=True):
             # Create (or update) the replica table and its ancestors, along with TableVersion instances for any
             # versions that have not been seen before.
-            cat.create_replica(catalog.Path(self.tbl_path), tbl_md)
+            cat.create_replica(catalog.Path.parse(self.tbl_path), tbl_md)
             # Now we need to load data for replica_tbl and its ancestors, except that we skip
             # replica_tbl itself if it's a pure snapshot.
@@ -572,16 +570,18 @@ class TableRestorer:
         for col_name in pydict:
             assert col_name in tv.store_tbl.sa_tbl.columns
             sql_types[col_name] = tv.store_tbl.sa_tbl.columns[col_name].type
-        media_col_ids: dict[str, int] = {}
+        media_cols: dict[str, catalog.Column] = {}
         for col in tv.cols:
             if col.is_stored and col.col_type.is_media_type():
-                media_col_ids[col.store_name()] = col.id
+                assert tv.id == col.tbl.id
+                assert tv.version == col.tbl.version
+                media_cols[col.store_name()] = col
         row_count = len(next(iter(pydict.values())))
         rows: list[dict[str, Any]] = []
         for i in range(row_count):
             row = {
-                col_name: self.__from_pa_value(tv, col_vals[i], sql_types[col_name], media_col_ids.get(col_name))
+                col_name: self.__from_pa_value(col_vals[i], sql_types[col_name], media_cols.get(col_name))
                 for col_name, col_vals in pydict.items()
             }
             rows.append(row)
@@ -589,19 +589,19 @@ class TableRestorer:
         return rows
     def __from_pa_value(
-        self, tv: catalog.TableVersion, val: Any, sql_type: sql.types.TypeEngine[Any], media_col_id: Optional[int]
+        self, val: Any, sql_type: sql.types.TypeEngine[Any], media_col: Optional[catalog.Column]
     ) -> Any:
         if val is None:
             return None
         if isinstance(sql_type, sql.JSON):
             return json.loads(val)
-        if media_col_id is not None:
-            assert isinstance(val, str)
-            return self.__relocate_media_file(tv, media_col_id, val)
+        if media_col is not None:
+            return self.__relocate_media_file(media_col, val)
         return val
-    def __relocate_media_file(self, tv: catalog.TableVersion, media_col_id: int, url: str) -> str:
+    def __relocate_media_file(self, media_col: catalog.Column, url: str) -> str:
         # If this is a pxtmedia:// URL, relocate it
+        assert isinstance(url, str)
         parsed_url = urllib.parse.urlparse(url)
         assert parsed_url.scheme != 'file'  # These should all have been converted to pxtmedia:// URLs
         if parsed_url.scheme == 'pxtmedia':
@@ -610,7 +610,7 @@ class TableRestorer:
                 # in self.media_files.
                 src_path = self.tmp_dir / 'media' / parsed_url.netloc
                 # Move the file to the media store and update the URL.
-                self.media_files[url] = MediaStore.relocate_local_media_file(src_path, tv.id, media_col_id, tv.version)
+                self.media_files[url] = MediaStore.relocate_local_media_file(src_path, media_col)
             return self.media_files[url]
         # For any type of URL other than a local file, just return the URL as-is.
         return url

pixeltable/store.py CHANGED Viewed

@@ -123,15 +123,20 @@ class StoreBase:
     def _storage_name(self) -> str:
         """Return the name of the data store table"""
-    def _move_tmp_media_file(self, file_url: Optional[str], col: catalog.Column, v_min: int) -> str:
-        return MediaStore.move_tmp_media_file(file_url, self.tbl_version.id, col.id, v_min)
+    def _move_tmp_media_file(self, file_url: Optional[str], col: catalog.Column) -> str:
+        src_path = MediaStore.resolve_tmp_url(file_url)
+        if src_path is None:
+            return file_url
+        assert col.tbl.id == self.tbl_version.id  # Ensure the column belongs to the same table as this store
+        new_file_url = MediaStore.relocate_local_media_file(src_path, col)
+        return new_file_url
     def _move_tmp_media_files(
         self, table_row: list[Any], media_cols_by_sql_idx: dict[int, catalog.Column], v_min: int
     ) -> None:
         """Move tmp media files that we generated to a permanent location"""
         for n, col in media_cols_by_sql_idx.items():
-            table_row[n] = self._move_tmp_media_file(table_row[n], col, v_min)
+            table_row[n] = self._move_tmp_media_file(table_row[n], col)
     def count(self) -> int:
         """Return the number of rows visible in self.tbl_version"""
@@ -259,9 +264,7 @@ class StoreBase:
                         raise excs.Error(f'Error while evaluating computed column {col.name!r}:\n{exc}') from exc
                     table_row, num_row_exc = row_builder.create_table_row(row, None, row.pk)
                     if col.col_type.is_media_type():
-                        table_row[tmp_val_col_sql_idx] = self._move_tmp_media_file(
-                            table_row[tmp_val_col_sql_idx], col, row.pk[-1]
-                        )
+                        table_row[tmp_val_col_sql_idx] = self._move_tmp_media_file(table_row[tmp_val_col_sql_idx], col)
                     num_excs += num_row_exc
                     batch_table_rows.append(tuple(table_row))

pixeltable/type_system.py CHANGED Viewed

@@ -5,6 +5,7 @@ import datetime
 import enum
 import io
 import json
+import types
 import typing
 import urllib.parse
 import urllib.request
@@ -307,7 +308,7 @@ class ColumnType:
         """
         origin = typing.get_origin(t)
         type_args = typing.get_args(t)
-        if origin is typing.Union:
+        if origin in (typing.Union, types.UnionType):
             # Check if `t` has the form Optional[T].
             if len(type_args) == 2 and type(None) in type_args:
                 # `t` is a type of the form Optional[T] (equivalently, Union[T, None] or Union[None, T]).

pixeltable/utils/filecache.py CHANGED Viewed

@@ -214,7 +214,7 @@ class FileCache:
         new_path = entry.path
         os.rename(str(path), str(new_path))
         new_path.touch(exist_ok=True)
-        _logger.debug(f'added entry for cell {url} to file cache')
+        _logger.debug(f'FileCache: cached url {url} with file name {new_path}')
         return new_path
     def ensure_capacity(self, size: int) -> None:

pixeltable/utils/http_server.py CHANGED Viewed

@@ -2,7 +2,7 @@ import http
 import http.server
 import logging
 import pathlib
-import urllib
+import urllib.request
 from typing import Any
 _logger = logging.getLogger('pixeltable.http.server')
@@ -36,8 +36,7 @@ class AbsolutePathHandler(http.server.SimpleHTTPRequestHandler):
         path = path.split('?', 1)[0]
         path = path.split('#', 1)[0]
-        path = pathlib.Path(urllib.request.url2pathname(path))
-        return str(path)
+        return str(pathlib.Path(urllib.request.url2pathname(path)))
     def log_message(self, format: str, *args: Any) -> None:
         """override logging to stderr in http.server.BaseHTTPRequestHandler"""

pixeltable/utils/media_store.py CHANGED Viewed

@@ -1,102 +1,147 @@
+from __future__ import annotations
 import glob
 import os
 import re
 import shutil
-import urllib
+import urllib.parse
+import urllib.request
 import uuid
 from collections import defaultdict
 from pathlib import Path
-from typing import Optional
+from typing import TYPE_CHECKING, Optional
 from uuid import UUID
-from pixeltable.env import Env
+import PIL.Image
+from pixeltable import env
+if TYPE_CHECKING:
+    from pixeltable.catalog import Column
 class MediaStore:
     """
     Utilities to manage media files stored in Env.media_dir
-    Media file names are a composite of: table id, column id, version, uuid:
-    the table id/column id/version are redundant but useful for identifying all files for a table
+    Media file names are a composite of: table id, column id, tbl_version, new uuid:
+    the table id/column id/tbl_version are redundant but useful for identifying all files for a table
     or all files created for a particular version of a table
     """
     pattern = re.compile(r'([0-9a-fA-F]+)_(\d+)_(\d+)_([0-9a-fA-F]+)')  # tbl_id, col_id, version, uuid
     @classmethod
-    def prepare_media_path(cls, tbl_id: UUID, col_id: int, version: int, ext: Optional[str] = None) -> Path:
+    def _media_dir(cls) -> Path:
+        """Returns the media directory path."""
+        return env.Env.get().media_dir
+    @classmethod
+    def _tmp_dir(cls) -> Path:
+        """Returns the temporary directory path."""
+        return env.Env.get().tmp_dir
+    @classmethod
+    def _prepare_media_path(cls, col: Column, ext: Optional[str] = None) -> Path:
         """
         Construct a new, unique Path name for a persisted media file, and create the parent directory
         for the new Path if it does not already exist. The Path will reside in
         the environment's media_dir.
         """
         id_hex = uuid.uuid4().hex
-        parent = Env.get().media_dir / tbl_id.hex / id_hex[:2] / id_hex[:4]
+        parent = cls._media_dir() / col.tbl.id.hex / id_hex[:2] / id_hex[:4]
         parent.mkdir(parents=True, exist_ok=True)
-        return parent / f'{tbl_id.hex}_{col_id}_{version}_{id_hex}{ext or ""}'
+        return parent / f'{col.tbl.id.hex}_{col.id}_{col.tbl.version}_{id_hex}{ext or ""}'
     @classmethod
-    def move_tmp_media_file(cls, file_url: Optional[str], tbl_id: UUID, col_id: int, v_min: int) -> Optional[str]:
-        """Move a tmp media file with given url into the MediaStore, and return new url
-        If it is not a tmp file in the tmp_dir, return the original url.
+    def resolve_tmp_url(cls, file_url: Optional[str]) -> Optional[Path]:
+        """Return path if the given url is a tmp file.
         Args:
-            file_url: URL of the tmp media file to move
-            tbl_id: Table ID to associate with the media file
-            col_id: Column ID to associate with the media file
-            v_min: Version number to associate with the media file
+            file_url: URL of the tmp media file to check
         Returns:
-            URL of the media final location of the file
+            If the file_url is a tmp file, return a Path() to the tmp file, None, otherwise
         """
         if file_url is None:
             return None
         assert isinstance(file_url, str), type(file_url)
-        pxt_tmp_dir = str(Env.get().tmp_dir)
         parsed = urllib.parse.urlparse(file_url)
         # We should never be passed a local file path here. The "len > 1" ensures that Windows
         # file paths aren't mistaken for URLs with a single-character scheme.
         assert len(parsed.scheme) > 1, file_url
         if parsed.scheme != 'file':
             # remote url
-            return file_url
-        file_path = urllib.parse.unquote(urllib.request.url2pathname(parsed.path))
-        if not file_path.startswith(pxt_tmp_dir):
+            return None
+        src_path = urllib.parse.unquote(urllib.request.url2pathname(parsed.path))
+        pxt_tmp_dir = str(cls._tmp_dir())
+        if not src_path.startswith(pxt_tmp_dir):
             # not a tmp file
-            return file_url
-        new_file_url = cls.relocate_local_media_file(Path(file_path), tbl_id, col_id, v_min)
-        return new_file_url
+            return None
+        return Path(src_path)
     @classmethod
-    def relocate_local_media_file(cls, src_path: Path, tbl_id: UUID, col_id: int, tbl_version: int) -> str:
-        dest_path = MediaStore.prepare_media_path(tbl_id, col_id, tbl_version, ext=src_path.suffix)
+    def relocate_local_media_file(cls, src_path: Path, col: Column) -> str:
+        """Relocate a local file to the MediaStore, and return its new URL"""
+        dest_path = cls._prepare_media_path(col, ext=src_path.suffix)
         src_path.rename(dest_path)
         return urllib.parse.urljoin('file:', urllib.request.pathname2url(str(dest_path)))
     @classmethod
-    def save_media_file(cls, file_data: bytes, tbl_id: UUID, col_id: int, tbl_version: int) -> Path:
-        """Save a media binary data to a file in the MediaStore."""
+    def save_media_object(cls, data: bytes | PIL.Image.Image, col: Column, format: Optional[str]) -> tuple[Path, str]:
+        """Save a media data to a file in the MediaStore
+        Returns:
+            dest_path: Path to the saved media file
+            url: URL of the saved media file
+        """
+        assert col.col_type.is_media_type(), f'MediaStore: request to store non media_type Column {col.name}'
+        dest_path = cls._prepare_media_path(col)
+        if isinstance(data, bytes):
+            dest_path = cls._save_binary_media_file(data, dest_path, format)
+        elif isinstance(data, PIL.Image.Image):
+            dest_path = cls._save_pil_image_file(data, dest_path, format)
+        else:
+            raise ValueError(f'Unsupported media object type: {type(data)}')
+        url = urllib.parse.urljoin('file:', urllib.request.pathname2url(str(dest_path)))
+        return dest_path, url
+    @classmethod
+    def _save_binary_media_file(cls, file_data: bytes, dest_path: Path, format: Optional[str]) -> Path:
+        """Save a media binary data to a file in the MediaStore. format is ignored for binary data."""
         assert isinstance(file_data, bytes)
-        media_path = cls.prepare_media_path(tbl_id, col_id, tbl_version)
-        with open(media_path, 'wb') as f:
+        with open(dest_path, 'wb') as f:
             f.write(file_data)
             f.flush()  # Ensures Python buffers are written to OS
             os.fsync(f.fileno())  # Forces OS to write to physical storage
-        return media_path
+        return dest_path
+    @classmethod
+    def _save_pil_image_file(cls, image: PIL.Image.Image, dest_path: Path, format: Optional[str]) -> Path:
+        """Save a PIL Image to a file in the MediaStore with the specified format."""
+        if dest_path.suffix != f'.{format}':
+            dest_path = dest_path.with_name(f'{dest_path.name}.{format}')
+        with open(dest_path, 'wb') as f:
+            image.save(f, format=format)
+            f.flush()  # Ensures Python buffers are written to OS
+            os.fsync(f.fileno())  # Forces OS to write to physical storage
+        return dest_path
     @classmethod
-    def delete(cls, tbl_id: UUID, version: Optional[int] = None) -> None:
-        """Delete all files belonging to tbl_id. If version is not None, delete
-        only those files belonging to the specified version."""
+    def delete(cls, tbl_id: UUID, tbl_version: Optional[int] = None) -> None:
+        """Delete all files belonging to tbl_id. If tbl_version is not None, delete
+        only those files belonging to the specified tbl_version."""
         assert tbl_id is not None
-        if version is None:
+        if tbl_version is None:
             # Remove the entire folder for this table id.
-            path = Env.get().media_dir / tbl_id.hex
+            path = cls._media_dir() / tbl_id.hex
             if path.exists():
                 shutil.rmtree(path)
         else:
-            # Remove only the elements for the specified version.
-            paths = glob.glob(str(Env.get().media_dir / tbl_id.hex) + f'/**/{tbl_id.hex}_*_{version}_*', recursive=True)
+            # Remove only the elements for the specified tbl_version.
+            paths = glob.glob(
+                str(cls._media_dir() / tbl_id.hex) + f'/**/{tbl_id.hex}_*_{tbl_version}_*', recursive=True
+            )
             for p in paths:
                 os.remove(p)
@@ -105,12 +150,12 @@ class MediaStore:
         """
         Return number of files for given tbl_id.
         """
-        paths = glob.glob(str(Env.get().media_dir / tbl_id.hex) + f'/**/{tbl_id.hex}_*', recursive=True)
+        paths = glob.glob(str(cls._media_dir() / tbl_id.hex) + f'/**/{tbl_id.hex}_*', recursive=True)
         return len(paths)
     @classmethod
     def stats(cls) -> list[tuple[UUID, int, int, int]]:
-        paths = glob.glob(str(Env.get().media_dir) + '/**', recursive=True)
+        paths = glob.glob(str(cls._media_dir()) + '/**', recursive=True)
         # key: (tbl_id, col_id), value: (num_files, size)
         d: dict[tuple[UUID, int], list[int]] = defaultdict(lambda: [0, 0])
         for p in paths:

{pixeltable-0.4.4.dist-info → pixeltable-0.4.5.dist-info}/METADATA RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.3
 Name: pixeltable
-Version: 0.4.4
+Version: 0.4.5
 Summary: AI Data Infrastructure: Declarative, Multimodal, and Incremental
 License: Apache-2.0
 Keywords: data-science,machine-learning,database,ai,computer-vision,chatbot,ml,artificial-intelligence,feature-engineering,multimodal,mlops,feature-store,vector-database,llm,genai

pixeltable 0.4.4__py3-none-any.whl → 0.4.5__py3-none-any.whl

Potentially problematic release.

pixeltable 0.4.4py3-none-any.whl → 0.4.5py3-none-any.whl