PyPI - pixeltable - Versions diffs - 0.4.2__py3-none-any.whl → 0.4.4__py3-none-any.whl - Mend

pixeltable 0.4.2py3-none-any.whl → 0.4.4py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.

This version of pixeltable might be problematic. Click here for more details.

Files changed (60) hide show

pixeltable/__init__.py +1 -0
pixeltable/__version__.py +2 -2
pixeltable/catalog/__init__.py +3 -11
pixeltable/catalog/catalog.py +575 -220
pixeltable/catalog/column.py +22 -23
pixeltable/catalog/dir.py +1 -2
pixeltable/catalog/globals.py +2 -148
pixeltable/catalog/insertable_table.py +15 -13
pixeltable/catalog/path.py +6 -0
pixeltable/catalog/schema_object.py +9 -4
pixeltable/catalog/table.py +96 -85
pixeltable/catalog/table_version.py +257 -174
pixeltable/catalog/table_version_path.py +1 -1
pixeltable/catalog/tbl_ops.py +44 -0
pixeltable/catalog/update_status.py +179 -0
pixeltable/catalog/view.py +50 -56
pixeltable/config.py +76 -12
pixeltable/dataframe.py +19 -6
pixeltable/env.py +50 -4
pixeltable/exec/data_row_batch.py +3 -1
pixeltable/exec/exec_node.py +7 -24
pixeltable/exec/expr_eval/schedulers.py +134 -7
pixeltable/exec/in_memory_data_node.py +6 -7
pixeltable/exprs/column_property_ref.py +21 -9
pixeltable/exprs/column_ref.py +7 -2
pixeltable/exprs/function_call.py +2 -2
pixeltable/exprs/row_builder.py +10 -9
pixeltable/exprs/rowid_ref.py +0 -4
pixeltable/func/function.py +3 -3
pixeltable/functions/audio.py +36 -9
pixeltable/functions/gemini.py +4 -4
pixeltable/functions/openai.py +1 -2
pixeltable/functions/video.py +59 -16
pixeltable/globals.py +109 -24
pixeltable/io/__init__.py +1 -1
pixeltable/io/datarows.py +2 -1
pixeltable/io/external_store.py +3 -55
pixeltable/io/globals.py +4 -4
pixeltable/io/hf_datasets.py +10 -2
pixeltable/io/label_studio.py +16 -16
pixeltable/io/pandas.py +1 -0
pixeltable/io/table_data_conduit.py +12 -13
pixeltable/iterators/audio.py +17 -8
pixeltable/iterators/image.py +5 -2
pixeltable/metadata/__init__.py +1 -1
pixeltable/metadata/converters/convert_39.py +125 -0
pixeltable/metadata/converters/util.py +3 -0
pixeltable/metadata/notes.py +1 -0
pixeltable/metadata/schema.py +50 -1
pixeltable/plan.py +4 -0
pixeltable/share/packager.py +20 -38
pixeltable/store.py +40 -51
pixeltable/type_system.py +2 -2
pixeltable/utils/coroutine.py +6 -23
pixeltable/utils/media_store.py +50 -0
{pixeltable-0.4.2.dist-info → pixeltable-0.4.4.dist-info}/METADATA +1 -1
{pixeltable-0.4.2.dist-info → pixeltable-0.4.4.dist-info}/RECORD +60 -57
{pixeltable-0.4.2.dist-info → pixeltable-0.4.4.dist-info}/LICENSE +0 -0
{pixeltable-0.4.2.dist-info → pixeltable-0.4.4.dist-info}/WHEEL +0 -0
{pixeltable-0.4.2.dist-info → pixeltable-0.4.4.dist-info}/entry_points.txt +0 -0

pixeltable/env.py CHANGED Viewed

@@ -1,5 +1,6 @@
 from __future__ import annotations
+import asyncio
 import datetime
 import glob
 import http.server
@@ -19,9 +20,10 @@ from contextlib import contextmanager
 from dataclasses import dataclass, field
 from pathlib import Path
 from sys import stdout
-from typing import TYPE_CHECKING, Any, Callable, Iterator, Optional, TypeVar
+from typing import TYPE_CHECKING, Any, Callable, Iterator, Literal, Optional, TypeVar
 from zoneinfo import ZoneInfo, ZoneInfoNotFoundError
+import nest_asyncio  # type: ignore[import-untyped]
 import pixeltable_pgserver
 import sqlalchemy as sql
 from pillow_heif import register_heif_opener  # type: ignore[import-untyped]
@@ -84,7 +86,9 @@ class Env:
     _resource_pool_info: dict[str, Any]
     _current_conn: Optional[sql.Connection]
     _current_session: Optional[sql.orm.Session]
+    _current_isolation_level: Optional[Literal['REPEATABLE_READ', 'SERIALIZABLE']]
     _dbms: Optional[Dbms]
+    _event_loop: Optional[asyncio.AbstractEventLoop]  # event loop for ExecNode
     @classmethod
     def get(cls) -> Env:
@@ -96,6 +100,7 @@ class Env:
     def _init_env(cls, reinit_db: bool = False) -> None:
         assert not cls.__initializing, 'Circular env initialization detected.'
         cls.__initializing = True
+        cls._instance = None
         env = Env()
         env._set_up(reinit_db=reinit_db)
         env._upgrade_metadata()
@@ -139,7 +144,34 @@ class Env:
         self._resource_pool_info = {}
         self._current_conn = None
         self._current_session = None
+        self._current_isolation_level = None
         self._dbms = None
+        self._event_loop = None
+    def _init_event_loop(self) -> None:
+        try:
+            # check if we are already in an event loop (eg, Jupyter's); if so, patch it to allow
+            # multiple run_until_complete()
+            running_loop = asyncio.get_running_loop()
+            self._event_loop = running_loop
+            _logger.debug('Patched running loop')
+        except RuntimeError:
+            self._event_loop = asyncio.new_event_loop()
+            asyncio.set_event_loop(self._event_loop)
+            # we set a deliberately long duration to avoid warnings getting printed to the console in debug mode
+            self._event_loop.slow_callback_duration = 3600
+        # always allow nested event loops, we need that to run async udfs synchronously (eg, for SimilarityExpr);
+        # see run_coroutine_synchronously()
+        nest_asyncio.apply()
+        if _logger.isEnabledFor(logging.DEBUG):
+            self._event_loop.set_debug(True)
+    @property
+    def event_loop(self) -> asyncio.AbstractEventLoop:
+        if self._event_loop is None:
+            self._init_event_loop()
+        return self._event_loop
     @property
     def db_url(self) -> str:
@@ -201,20 +233,34 @@ class Env:
         return self._db_server is not None
     @contextmanager
-    def begin_xact(self) -> Iterator[sql.Connection]:
-        """Call Catalog.begin_xact() instead, unless there is a specific reason to call this directly."""
+    def begin_xact(self, for_write: bool = False) -> Iterator[sql.Connection]:
+        """
+        Call Catalog.begin_xact() instead, unless there is a specific reason to call this directly.
+        for_write: if True, uses serializable isolation; if False, uses repeatable_read
+        TODO: repeatable read is not available in Cockroachdb; instead, run queries against a snapshot TVP
+        that avoids tripping over any pending ops
+        """
         if self._current_conn is None:
             assert self._current_session is None
             try:
-                with self.engine.begin() as conn, sql.orm.Session(conn) as session:
+                self._current_isolation_level = 'SERIALIZABLE' if for_write else 'REPEATABLE_READ'
+                with (
+                    self.engine.connect().execution_options(isolation_level=self._current_isolation_level) as conn,
+                    sql.orm.Session(conn) as session,
+                    conn.begin(),
+                ):
                     self._current_conn = conn
                     self._current_session = session
                     yield conn
             finally:
                 self._current_session = None
                 self._current_conn = None
+                self._current_isolation_level = None
         else:
             assert self._current_session is not None
+            assert for_write == (self._current_isolation_level == 'serializable')
             yield self._current_conn
     def configure_logging(

pixeltable/exec/data_row_batch.py CHANGED Viewed

@@ -90,7 +90,9 @@ class DataRowBatch:
             idx_range = slice(0, len(self.rows))
         for row in self.rows[idx_range]:
             for info in stored_img_info:
-                filepath = str(MediaStore.prepare_media_path(self.tbl.id, info.col.id, self.tbl.get().version))
+                col = info.col
+                assert col.tbl.id == self.tbl.id
+                filepath = str(MediaStore.prepare_media_path(col.tbl.id, col.id, col.tbl.version))
                 row.flush_img(info.slot_idx, filepath)
             for slot_idx in flushed_slot_idxs:
                 row.flush_img(slot_idx)

pixeltable/exec/exec_node.py CHANGED Viewed

@@ -1,11 +1,11 @@
 from __future__ import annotations
 import abc
-import asyncio
 import logging
 from typing import AsyncIterator, Iterable, Iterator, Optional, TypeVar
 from pixeltable import exprs
+from pixeltable.env import Env
 from .data_row_batch import DataRowBatch
 from .exec_context import ExecContext
@@ -59,26 +59,7 @@ class ExecNode(abc.ABC):
         pass
     def __iter__(self) -> Iterator[DataRowBatch]:
-        running_loop: Optional[asyncio.AbstractEventLoop] = None
-        loop: asyncio.AbstractEventLoop
-        try:
-            # check if we are already in an event loop (eg, Jupyter's); if so, patch it to allow
-            # multiple run_until_complete()
-            running_loop = asyncio.get_running_loop()
-            import nest_asyncio  # type: ignore[import-untyped]
-            nest_asyncio.apply()
-            loop = running_loop
-            _logger.debug('Patched running loop')
-        except RuntimeError:
-            loop = asyncio.new_event_loop()
-            asyncio.set_event_loop(loop)
-            # we set a deliberately long duration to avoid warnings getting printed to the console in debug mode
-            loop.slow_callback_duration = 3600
-        if _logger.isEnabledFor(logging.DEBUG):
-            loop.set_debug(True)
+        loop = Env.get().event_loop
         aiter = self.__aiter__()
         try:
             while True:
@@ -86,9 +67,11 @@ class ExecNode(abc.ABC):
                 yield batch
         except StopAsyncIteration:
             pass
-        finally:
-            if loop != running_loop:
-                loop.close()
+        # TODO:
+        #  - we seem to have some tasks that aren't accounted for by ExprEvalNode and don't get cancelled by the time
+        #    we end up here
+        # - however, blindly cancelling all pending tasks doesn't work when running in a jupyter environment, which
+        #   creates tasks on its own
     def open(self) -> None:
         """Bottom-up initialization of nodes for execution. Must be called before __next__."""

pixeltable/exec/expr_eval/schedulers.py CHANGED Viewed

@@ -4,9 +4,10 @@ import asyncio
 import datetime
 import inspect
 import logging
+import re
 import sys
 import time
-from typing import Awaitable, Collection, Optional
+from typing import Any, Awaitable, Collection, Optional
 from pixeltable import env, func
 from pixeltable.config import Config
@@ -250,8 +251,20 @@ class RequestRateScheduler(Scheduler):
     total_retried: int
     TIME_FORMAT = '%H:%M.%S %f'
-    MAX_RETRIES = 10
+    MAX_RETRIES = 3
     DEFAULT_RATE_LIMIT = 600  # requests per minute
+    RATE_LIMIT_INDICATORS = ('rate limit', 'too many requests', '429', 'quota exceeded', 'throttled', 'rate exceeded')
+    RETRY_AFTER_PATTERNS = (
+        r'retry after (\d+(?:\.\d+)?)\s*seconds?',
+        r'try again in (\d+(?:\.\d+)?)\s*seconds?',
+        r'wait (\d+(?:\.\d+)?)\s*seconds?',
+        r'retry-after:\s*(\d+(?:\.\d+)?)',
+    )
+    # Exponential backoff defaults
+    BASE_RETRY_DELAY = 1.0  # in seconds
+    MAX_RETRY_DELAY = 60.0  # in seconds
+    RETRY_BACKOFF_MULTIPLIER = 2.0
     def __init__(self, resource_pool: str, dispatcher: Dispatcher):
         super().__init__(resource_pool, dispatcher)
@@ -337,11 +350,12 @@ class RequestRateScheduler(Scheduler):
             self.dispatcher.dispatch(request.rows, exec_ctx)
         except Exception as exc:
-            # TODO: which exception can be retried?
-            _logger.debug(f'exception for {self.resource_pool}: {exc}')
-            status = getattr(exc, 'status', None)
-            _logger.debug(f'type={type(exc)} has_status={hasattr(exc, "status")} status={status}')
-            if num_retries < self.MAX_RETRIES:
+            _logger.debug(f'exception for {self.resource_pool}: type={type(exc)}\n{exc}')
+            is_rate_limit_error, retry_after = self._is_rate_limit_error(exc)
+            if is_rate_limit_error and num_retries < self.MAX_RETRIES:
+                retry_delay = self._compute_retry_delay(num_retries, retry_after)
+                _logger.debug(f'scheduler {self.resource_pool}: retrying after {retry_delay}')
+                await asyncio.sleep(retry_delay)
                 self.queue.put_nowait(self.QueueItem(request, num_retries + 1, exec_ctx))
                 return
@@ -358,6 +372,119 @@ class RequestRateScheduler(Scheduler):
             if is_task:
                 self.num_in_flight -= 1
+    def _is_rate_limit_error(self, exc: Exception) -> tuple[bool, Optional[float]]:
+        """Returns True if the exception indicates a rate limit error, and the retry delay in seconds."""
+        from http import HTTPStatus
+        # Check for HTTP status TOO_MANY_REQUESTS in various exception classes.
+        # We look for attributes that contain status codes, instead of checking the type of the exception,
+        # in order to handle a wider variety of exception classes.
+        is_rate_limit_error = False
+        retry_delay: Optional[float] = None
+        # requests.HTTPError/httpx.HTTPStatusError
+        if (
+            hasattr(exc, 'response')
+            and hasattr(exc.response, 'status_code')
+            and exc.response.status_code == HTTPStatus.TOO_MANY_REQUESTS.value
+        ):
+            is_rate_limit_error = True
+            retry_delay = self._extract_retry_delay_from_headers(exc.response.headers)
+        elif (
+            # urllib.error.HTTPError
+            (hasattr(exc, 'code') and exc.code == HTTPStatus.TOO_MANY_REQUESTS.value)
+            # aiohttp.ClientResponseError
+            or (hasattr(exc, 'status') and exc.status == HTTPStatus.TOO_MANY_REQUESTS.value)
+        ) and hasattr(exc, 'headers'):
+            is_rate_limit_error = True
+            retry_delay = self._extract_retry_delay_from_headers(exc.headers)
+        if is_rate_limit_error:
+            return True, retry_delay
+        # Check common rate limit keywords in exception message
+        error_msg = str(exc).lower()
+        if any(indicator in error_msg for indicator in self.RATE_LIMIT_INDICATORS):
+            retry_delay = self._extract_retry_delay_from_message(error_msg)
+            return True, retry_delay
+        return False, None
+    def _extract_retry_delay_from_headers(self, headers: Optional[Any]) -> Optional[float]:
+        """Extract retry delay from HTTP headers."""
+        if headers is None:
+            return None
+        # convert headers to dict-like object for consistent access
+        header_dict: dict
+        if hasattr(headers, 'get'):
+            header_dict = headers
+        else:
+            # headers are a list of tuples or other format
+            try:
+                header_dict = dict(headers)
+            except (TypeError, ValueError):
+                return None
+        # normalize dict keys: lowercase and remove dashes
+        header_dict = {k.lower().replace('-', ''): v for k, v in header_dict.items()}
+        # check Retry-After header
+        retry_after = header_dict.get('retryafter')
+        if retry_after is not None:
+            try:
+                return float(retry_after)
+            except (ValueError, TypeError):
+                pass
+        # check X-RateLimit-Reset (Unix timestamp)
+        reset_time = header_dict.get('xratelimitreset')
+        if reset_time is not None:
+            try:
+                reset_timestamp = float(reset_time)
+                delay = max(0, reset_timestamp - time.time())
+                return delay
+            except (ValueError, TypeError):
+                pass
+        # check X-RateLimit-Reset-After (seconds from now)
+        reset_after = header_dict.get('xratelimitresetafter')
+        if reset_after is not None:
+            try:
+                return float(reset_after)
+            except (ValueError, TypeError):
+                pass
+        return None
+    def _extract_retry_delay_from_message(self, msg: str) -> Optional[float]:
+        msg_lower = msg.lower()
+        for pattern in self.RETRY_AFTER_PATTERNS:
+            match = re.search(pattern, msg_lower)
+            if match is not None:
+                try:
+                    return float(match.group(1))
+                except (ValueError, TypeError):
+                    continue
+        return None
+    def _compute_retry_delay(self, num_retries: int, retry_after: Optional[float] = None) -> float:
+        """
+        Calculate exponential backoff delay for rate limit errors.
+        Args:
+            retry_count: Number of retries attempted (0-based)
+            retry_after: Suggested delay from Retry-After header
+        Returns:
+            Delay in seconds
+        """
+        if retry_after is not None and retry_after > 0:
+            # Use server-suggested delay, but cap it at max_delay
+            return max(min(retry_after, self.MAX_RETRY_DELAY), self.BASE_RETRY_DELAY)
+        else:
+            delay = self.BASE_RETRY_DELAY * (self.RETRY_BACKOFF_MULTIPLIER**num_retries)
+            return max(min(delay, self.MAX_RETRY_DELAY), self.BASE_RETRY_DELAY)
 # all concrete Scheduler subclasses that implement matches()
 SCHEDULERS = [RateLimitsScheduler, RequestRateScheduler]

pixeltable/exec/in_memory_data_node.py CHANGED Viewed

@@ -63,13 +63,12 @@ class InMemoryDataNode(ExecNode):
             for col_name, val in input_row.items():
                 col_info = user_cols_by_name.get(col_name)
                 assert col_info is not None
-                if col_info.col.col_type.is_image_type() and isinstance(val, bytes):
-                    # this is a literal image, ie, a sequence of bytes; we save this as a media file and store the path
-                    path = str(MediaStore.prepare_media_path(self.tbl.id, col_info.col.id, self.tbl.get().version))
-                    with open(path, 'wb') as fp:
-                        fp.write(val)
-                    self.output_rows[row_idx][col_info.slot_idx] = path
+                col = col_info.col
+                if col.col_type.is_image_type() and isinstance(val, bytes):
+                    # this is a literal media file, ie, a sequence of bytes; save it as a binary file and store the path
+                    assert col.tbl.id == self.tbl.id
+                    path = MediaStore.save_media_file(val, col.tbl.id, col.id, col.tbl.version)
+                    self.output_rows[row_idx][col_info.slot_idx] = str(path)
                 else:
                     self.output_rows[row_idx][col_info.slot_idx] = val

pixeltable/exprs/column_property_ref.py CHANGED Viewed

@@ -26,6 +26,7 @@ class ColumnPropertyRef(Expr):
         ERRORMSG = 1
         FILEURL = 2
         LOCALPATH = 3
+        CELLMD = 4  # JSON metadata for the cell, e.g. errortype, errormsg for media columns
     def __init__(self, col_ref: ColumnRef, prop: Property):
         super().__init__(ts.StringType(nullable=True))
@@ -51,8 +52,8 @@ class ColumnPropertyRef(Expr):
     def __repr__(self) -> str:
         return f'{self._col_ref}.{self.prop.name.lower()}'
-    def is_error_prop(self) -> bool:
-        return self.prop in (self.Property.ERRORTYPE, self.Property.ERRORMSG)
+    def is_cellmd_prop(self) -> bool:
+        return self.prop in (self.Property.ERRORTYPE, self.Property.ERRORMSG, self.Property.CELLMD)
     def sql_expr(self, sql_elements: SqlElementCache) -> Optional[sql.ColumnElement]:
         if not self._col_ref.col_handle.get().is_stored:
@@ -63,21 +64,27 @@ class ColumnPropertyRef(Expr):
         if (
             col.col_type.is_media_type()
             and col.media_validation == catalog.MediaValidation.ON_READ
-            and self.is_error_prop()
+            and self.is_cellmd_prop()
         ):
             return None
         if self.prop == self.Property.ERRORTYPE:
-            assert col.sa_errortype_col is not None
-            return col.sa_errortype_col
+            return col.sa_cellmd_col.op('->>')('errortype')
         if self.prop == self.Property.ERRORMSG:
-            assert col.sa_errormsg_col is not None
-            return col.sa_errormsg_col
+            return col.sa_cellmd_col.op('->>')('errormsg')
+        if self.prop == self.Property.CELLMD:
+            assert col.sa_cellmd_col is not None
+            return col.sa_cellmd_col
         if self.prop == self.Property.FILEURL:
             # the file url is stored as the column value
             return sql_elements.get(self._col_ref)
         return None
+    @classmethod
+    def create_cellmd_exc(cls, exc: Exception) -> dict[str, str]:
+        """Create a cellmd value from an exception."""
+        return {'errortype': type(exc).__name__, 'errormsg': str(exc)}
     def eval(self, data_row: DataRow, row_builder: RowBuilder) -> None:
         if self.prop == self.Property.FILEURL:
             assert data_row.has_val[self._col_ref.slot_idx]
@@ -87,14 +94,19 @@ class ColumnPropertyRef(Expr):
             assert data_row.has_val[self._col_ref.slot_idx]
             data_row[self.slot_idx] = data_row.file_paths[self._col_ref.slot_idx]
             return
-        elif self.is_error_prop():
+        elif self.is_cellmd_prop():
             exc = data_row.get_exc(self._col_ref.slot_idx)
             if exc is None:
                 data_row[self.slot_idx] = None
             elif self.prop == self.Property.ERRORTYPE:
                 data_row[self.slot_idx] = type(exc).__name__
-            else:
+            elif self.prop == self.Property.ERRORMSG:
                 data_row[self.slot_idx] = str(exc)
+            elif self.prop == self.Property.CELLMD:
+                data_row[self.slot_idx] = self.create_cellmd_exc(exc)
+            else:
+                raise AssertionError(f'Unknown property {self.prop}')
+            return
         else:
             raise AssertionError()

pixeltable/exprs/column_ref.py CHANGED Viewed

@@ -115,11 +115,15 @@ class ColumnRef(Expr):
         from .column_property_ref import ColumnPropertyRef
         # resolve column properties
+        if name == ColumnPropertyRef.Property.CELLMD.name.lower():
+            # This is not user accessible, but used internally to store cell metadata
+            return super().__getattr__(name)
         if (
             name == ColumnPropertyRef.Property.ERRORTYPE.name.lower()
             or name == ColumnPropertyRef.Property.ERRORMSG.name.lower()
         ):
-            property_is_present = self.col.is_stored and (self.col.is_computed or self.col_type.is_media_type())
+            property_is_present = self.col.stores_cellmd
             if not property_is_present:
                 raise excs.Error(f'{name} only valid for a stored computed or media column: {self}')
             return ColumnPropertyRef(self, ColumnPropertyRef.Property[name.upper()])
@@ -321,7 +325,8 @@ class ColumnRef(Expr):
     @classmethod
     def get_column(cls, d: dict) -> catalog.Column:
         tbl_id, version, col_id = UUID(d['tbl_id']), d['tbl_version'], d['col_id']
-        tbl_version = catalog.Catalog.get().get_tbl_version(tbl_id, version)
+        # validate_initialized=False: this gets called as part of TableVersion.init()
+        tbl_version = catalog.Catalog.get().get_tbl_version(tbl_id, version, validate_initialized=False)
         # don't use tbl_version.cols_by_id here, this might be a snapshot reference to a column that was then dropped
         col = next(col for col in tbl_version.cols if col.id == col_id)
         return col

pixeltable/exprs/function_call.py CHANGED Viewed

@@ -446,11 +446,11 @@ class FunctionCall(Expr):
                 dedent(
                     f"""
                     The UDF '{fn.self_path}' cannot be located, because
-                    {{errormsg}}
+                    {{error_msg}}
                     """
                 )
                 .strip()
-                .format(errormsg=fn.errormsg)
+                .format(error_msg=fn.error_msg)
             )
             return cls(fn, args, kwargs, return_type, is_method_call=is_method_call, validation_error=validation_error)

pixeltable/exprs/row_builder.py CHANGED Viewed

@@ -209,7 +209,7 @@ class RowBuilder:
                 # this is input and therefore doesn't depend on other exprs
                 continue
             # error properties don't have exceptions themselves
-            if isinstance(expr, ColumnPropertyRef) and expr.is_error_prop():
+            if isinstance(expr, ColumnPropertyRef) and expr.is_cellmd_prop():
                 continue
             dependency_idxs = [d.slot_idx for d in expr.dependencies()]
             self.dependencies[expr.slot_idx, dependency_idxs] = True
@@ -444,6 +444,8 @@ class RowBuilder:
         Return tuple[list of row values in `self.table_columns` order, # of exceptions]
             This excludes system columns.
         """
+        from pixeltable.exprs.column_property_ref import ColumnPropertyRef
         num_excs = 0
         table_row: list[Any] = list(pk)
         for info in self.table_columns:
@@ -454,9 +456,9 @@ class RowBuilder:
                 if cols_with_excs is not None:
                     cols_with_excs.add(col.id)
                 table_row.append(None)
-                if col.records_errors:
-                    # exceptions get stored in the errortype/-msg columns
-                    table_row.extend((type(exc).__name__, str(exc)))
+                if col.stores_cellmd:
+                    # exceptions get stored in the errortype/-msg properties of the cellmd column
+                    table_row.append(ColumnPropertyRef.create_cellmd_exc(exc))
             else:
                 if col.col_type.is_image_type() and data_row.file_urls[slot_idx] is None:
                     # we have yet to store this image
@@ -464,8 +466,8 @@ class RowBuilder:
                     data_row.flush_img(slot_idx, filepath)
                 val = data_row.get_stored_val(slot_idx, col.get_sa_col_type())
                 table_row.append(val)
-                if col.records_errors:
-                    table_row.extend((None, None))
+                if col.stores_cellmd:
+                    table_row.append(None)  # placeholder for cellmd column
         return table_row, num_excs
@@ -483,8 +485,7 @@ class RowBuilder:
             if col.col.col_type.is_media_type():
                 media_cols[len(store_col_names)] = col.col
             store_col_names.append(col.col.store_name())
-            if col.col.records_errors:
-                store_col_names.append(col.col.errortype_store_name())
-                store_col_names.append(col.col.errormsg_store_name())
+            if col.col.stores_cellmd:
+                store_col_names.append(col.col.cellmd_store_name())
         return store_col_names, media_cols

pixeltable/exprs/rowid_ref.py CHANGED Viewed

@@ -105,10 +105,6 @@ class RowidRef(Expr):
         assert self.rowid_component_idx <= len(rowid_cols), (
             f'{self.rowid_component_idx} not consistent with {rowid_cols}'
         )
-        # _logger.debug(
-        #     f'RowidRef.sql_expr: tbl={tbl.id}{tbl.effective_version} sa_tbl={id(tbl.store_tbl.sa_tbl):x} '
-        #     f'tv={id(tbl):x}'
-        # )
         return rowid_cols[self.rowid_component_idx]
     def eval(self, data_row: DataRow, row_builder: RowBuilder) -> None:

pixeltable/func/function.py CHANGED Viewed

@@ -504,12 +504,12 @@ class Function(ABC):
 class InvalidFunction(Function):
     fn_dict: dict[str, Any]
-    errormsg: str
+    error_msg: str
-    def __init__(self, self_path: str, fn_dict: dict[str, Any], errormsg: str):
+    def __init__(self, self_path: str, fn_dict: dict[str, Any], error_msg: str):
         super().__init__([], self_path)
         self.fn_dict = fn_dict
-        self.errormsg = errormsg
+        self.error_msg = error_msg
     def _as_dict(self) -> dict:
         """

pixeltable/functions/audio.py CHANGED Viewed

@@ -1,14 +1,5 @@
 """
 Pixeltable [UDFs](https://pixeltable.readme.io/docs/user-defined-functions-udfs) for `AudioType`.
-Example:
-```python
-import pixeltable as pxt
-import pixeltable.functions as pxtf
-t = pxt.get_table(...)
-t.select(pxtf.audio.get_metadata()).collect()
-```
 """
 import pixeltable as pxt
@@ -19,6 +10,42 @@ from pixeltable.utils.code import local_public_names
 def get_metadata(audio: pxt.Audio) -> dict:
     """
     Gets various metadata associated with an audio file and returns it as a dictionary.
+    Args:
+        audio: The audio to get metadata for.
+    Returns:
+        A `dict` such as the following:
+            ```json
+            {
+                'size': 2568827,
+                'streams': [
+                    {
+                        'type': 'audio',
+                        'frames': 0,
+                        'duration': 2646000,
+                        'metadata': {},
+                        'time_base': 2.2675736961451248e-05,
+                        'codec_context': {
+                            'name': 'flac',
+                            'profile': None,
+                            'channels': 1,
+                            'codec_tag': '\\x00\\x00\\x00\\x00',
+                        },
+                        'duration_seconds': 60.0,
+                    }
+                ],
+                'bit_rate': 342510,
+                'metadata': {'encoder': 'Lavf61.1.100'},
+                'bit_exact': False,
+            }
+            ```
+    Examples:
+        Extract metadata for files in the `audio_col` column of the table `tbl`:
+        >>> tbl.select(tbl.audio_col.get_metadata()).collect()
     """
     return pxt.functions.video._get_metadata(audio)

pixeltable/functions/gemini.py CHANGED Viewed

@@ -7,7 +7,6 @@ the [Working with Gemini](https://pixeltable.readme.io/docs/working-with-gemini)
 import asyncio
 import io
-import tempfile
 from pathlib import Path
 from typing import TYPE_CHECKING, Optional
@@ -215,9 +214,10 @@ async def generate_videos(
     video_bytes = await _genai_client().aio.files.download(file=video.video)  # type: ignore[arg-type]
     assert video_bytes is not None
-    _, output_filename = tempfile.mkstemp(suffix='.mp4', dir=str(env.Env.get().tmp_dir))
-    Path(output_filename).write_bytes(video_bytes)
-    return output_filename
+    # Create a temporary file to store the video bytes
+    output_path = env.Env.get().create_tmp_path('.mp4')
+    Path(output_path).write_bytes(video_bytes)
+    return str(output_path)
 @generate_videos.resource_pool

pixeltable/functions/openai.py CHANGED Viewed

@@ -13,7 +13,6 @@ import logging
 import math
 import pathlib
 import re
-import uuid
 from typing import TYPE_CHECKING, Any, Callable, Optional, Type
 import httpx
@@ -207,7 +206,7 @@ async def speech(input: str, *, model: str, voice: str, model_kwargs: Optional[d
     content = await _openai_client().audio.speech.create(input=input, model=model, voice=voice, **model_kwargs)
     ext = model_kwargs.get('response_format', 'mp3')
-    output_filename = str(env.Env.get().tmp_dir / f'{uuid.uuid4()}.{ext}')
+    output_filename = str(env.Env.get().create_tmp_path(f'.{ext}'))
     content.write_to_file(output_filename)
     return output_filename

pixeltable 0.4.2__py3-none-any.whl → 0.4.4__py3-none-any.whl

Potentially problematic release.

pixeltable 0.4.2py3-none-any.whl → 0.4.4py3-none-any.whl