PyPI - pixeltable - Versions diffs - 0.4.6__py3-none-any.whl → 0.4.7__py3-none-any.whl - Mend

pixeltable 0.4.6py3-none-any.whl → 0.4.7py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.

This version of pixeltable might be problematic. Click here for more details.

Files changed (53) hide show

pixeltable/__init__.py +4 -2
pixeltable/catalog/__init__.py +1 -1
pixeltable/catalog/catalog.py +3 -3
pixeltable/catalog/column.py +49 -0
pixeltable/catalog/insertable_table.py +0 -7
pixeltable/catalog/schema_object.py +1 -14
pixeltable/catalog/table.py +139 -53
pixeltable/catalog/table_version.py +30 -138
pixeltable/catalog/view.py +2 -1
pixeltable/dataframe.py +2 -3
pixeltable/env.py +43 -5
pixeltable/exec/expr_eval/expr_eval_node.py +2 -2
pixeltable/exec/expr_eval/schedulers.py +36 -15
pixeltable/exprs/array_slice.py +2 -2
pixeltable/exprs/data_row.py +13 -0
pixeltable/exprs/expr.py +9 -9
pixeltable/exprs/function_call.py +2 -2
pixeltable/exprs/globals.py +1 -2
pixeltable/exprs/json_path.py +3 -3
pixeltable/exprs/row_builder.py +14 -16
pixeltable/exprs/string_op.py +3 -3
pixeltable/func/query_template_function.py +2 -2
pixeltable/func/signature.py +30 -3
pixeltable/func/tools.py +2 -2
pixeltable/functions/anthropic.py +75 -25
pixeltable/functions/globals.py +2 -2
pixeltable/functions/llama_cpp.py +9 -1
pixeltable/functions/openai.py +74 -54
pixeltable/functions/video.py +54 -1
pixeltable/functions/vision.py +2 -2
pixeltable/globals.py +74 -12
pixeltable/io/datarows.py +3 -3
pixeltable/io/fiftyone.py +4 -4
pixeltable/io/globals.py +3 -3
pixeltable/io/hf_datasets.py +4 -4
pixeltable/io/pandas.py +6 -6
pixeltable/io/parquet.py +3 -3
pixeltable/io/table_data_conduit.py +2 -2
pixeltable/io/utils.py +2 -2
pixeltable/iterators/document.py +2 -2
pixeltable/iterators/video.py +49 -9
pixeltable/share/packager.py +45 -36
pixeltable/store.py +5 -25
pixeltable/type_system.py +5 -8
pixeltable/utils/__init__.py +2 -2
pixeltable/utils/arrow.py +5 -5
pixeltable/utils/description_helper.py +3 -3
pixeltable/utils/iceberg.py +1 -2
{pixeltable-0.4.6.dist-info → pixeltable-0.4.7.dist-info}/METADATA +70 -19
{pixeltable-0.4.6.dist-info → pixeltable-0.4.7.dist-info}/RECORD +53 -53
{pixeltable-0.4.6.dist-info → pixeltable-0.4.7.dist-info}/WHEEL +0 -0
{pixeltable-0.4.6.dist-info → pixeltable-0.4.7.dist-info}/entry_points.txt +0 -0
{pixeltable-0.4.6.dist-info → pixeltable-0.4.7.dist-info}/licenses/LICENSE +0 -0

pixeltable/exprs/json_path.py CHANGED Viewed

@@ -1,6 +1,6 @@
 from __future__ import annotations
-from typing import Any, Optional, Union
+from typing import Any, Optional
 import jmespath
 import sqlalchemy as sql
@@ -18,7 +18,7 @@ from .sql_element_cache import SqlElementCache
 class JsonPath(Expr):
     def __init__(
-        self, anchor: Optional[Expr], path_elements: Optional[list[Union[str, int, slice]]] = None, scope_idx: int = 0
+        self, anchor: Optional[Expr], path_elements: Optional[list[str | int | slice]] = None, scope_idx: int = 0
     ) -> None:
         """
         anchor can be None, in which case this is a relative JsonPath and the anchor is set later via set_anchor().
@@ -30,7 +30,7 @@ class JsonPath(Expr):
         super().__init__(ts.JsonType(nullable=True))  # JsonPath expressions are always nullable
         if anchor is not None:
             self.components = [anchor]
-        self.path_elements: list[Union[str, int, slice]] = path_elements
+        self.path_elements: list[str | int | slice] = path_elements
         self.compiled_path = jmespath.compile(self._json_path()) if len(path_elements) > 0 else None
         self.scope_idx = scope_idx
         # NOTE: the _create_id() result will change if set_anchor() gets called;

pixeltable/exprs/row_builder.py CHANGED Viewed

@@ -3,7 +3,7 @@ from __future__ import annotations
 import sys
 import time
 from dataclasses import dataclass
-from typing import Any, Iterable, Optional, Sequence
+from typing import Any, Iterable, NamedTuple, Optional, Sequence
 from uuid import UUID
 import numpy as np
@@ -34,8 +34,7 @@ class ExecProfile:
             )
-@dataclass
-class ColumnSlotIdx:
+class ColumnSlotIdx(NamedTuple):
     """Info for how to locate materialized column in DataRow
     TODO: can this be integrated into RowBuilder directly?
     """
@@ -127,7 +126,7 @@ class RowBuilder:
         )
         # if init(columns):
-        # - we are creating table rows and need to record columns for create_table_row()
+        # - we are creating table rows and need to record columns for create_store_table_row()
         # - output_exprs materialize those columns
         # - input_exprs are ColumnRefs of the non-computed columns (ie, what needs to be provided as input)
         # - media validation:
@@ -445,20 +444,20 @@ class RowBuilder:
                         expr, f'expression {expr}', data_row.get_exc(expr.slot_idx), exc_tb, input_vals, 0
                     ) from exc
-    def create_table_row(
+    def create_store_table_row(
         self, data_row: DataRow, cols_with_excs: Optional[set[int]], pk: tuple[int, ...]
     ) -> tuple[list[Any], int]:
-        """Create a table row from the slots that have an output column assigned
+        """Create a store table row from the slots that have an output column assigned
         Return tuple[list of row values in `self.table_columns` order, # of exceptions]
             This excludes system columns.
+            Row values are converted to their store type.
         """
         from pixeltable.exprs.column_property_ref import ColumnPropertyRef
         num_excs = 0
         table_row: list[Any] = list(pk)
-        for info in self.table_columns:
-            col, slot_idx = info.col, info.slot_idx
+        for col, slot_idx in self.table_columns:
             if data_row.has_exc(slot_idx):
                 exc = data_row.get_exc(slot_idx)
                 num_excs += 1
@@ -469,9 +468,11 @@ class RowBuilder:
                     # exceptions get stored in the errortype/-msg properties of the cellmd column
                     table_row.append(ColumnPropertyRef.create_cellmd_exc(exc))
             else:
-                if col.col_type.is_image_type() and data_row.file_urls[slot_idx] is None:
-                    # we have yet to store this image
-                    data_row.flush_img(slot_idx, col)
+                if col.col_type.is_media_type():
+                    if col.col_type.is_image_type() and data_row.file_urls[slot_idx] is None:
+                        # we have yet to store this image
+                        data_row.flush_img(slot_idx, col)
+                    data_row.move_tmp_media_file(slot_idx, col)
                 val = data_row.get_stored_val(slot_idx, col.get_sa_col_type())
                 table_row.append(val)
                 if col.stores_cellmd:
@@ -479,7 +480,7 @@ class RowBuilder:
         return table_row, num_excs
-    def store_column_names(self) -> tuple[list[str], dict[int, catalog.Column]]:
+    def store_column_names(self) -> list[str]:
         """
         Returns the list of store column names corresponding to the table_columns of this RowBuilder.
         The second tuple element of the return value is a dictionary containing all media columns in the
@@ -487,16 +488,13 @@ class RowBuilder:
         """
         assert self.tbl is not None, self.table_columns
         store_col_names: list[str] = [pk_col.name for pk_col in self.tbl.store_tbl.pk_columns()]
-        media_cols: dict[int, catalog.Column] = {}
         for col in self.table_columns:
-            if col.col.col_type.is_media_type():
-                media_cols[len(store_col_names)] = col.col
             store_col_names.append(col.col.store_name())
             if col.col.stores_cellmd:
                 store_col_names.append(col.col.cellmd_store_name())
-        return store_col_names, media_cols
+        return store_col_names
     def make_row(self) -> exprs.DataRow:
         """Creates a new DataRow with the current row_builder's configuration."""

pixeltable/exprs/string_op.py CHANGED Viewed

@@ -1,6 +1,6 @@
 from __future__ import annotations
-from typing import Any, Optional, Union
+from typing import Any, Optional
 import sqlalchemy as sql
@@ -76,7 +76,7 @@ class StringOp(Expr):
         op2_val = data_row[self._op2.slot_idx]
         data_row[self.slot_idx] = self.eval_nullable(op1_val, op2_val)
-    def eval_nullable(self, op1_val: Union[str, None], op2_val: Union[int, str, None]) -> Union[str, None]:
+    def eval_nullable(self, op1_val: str | None, op2_val: int | str | None) -> str | None:
         """
         Return the result of evaluating the expression on two nullable int/float operands,
         None is interpreted as SQL NULL
@@ -85,7 +85,7 @@ class StringOp(Expr):
             return None
         return self.eval_non_null(op1_val, op2_val)
-    def eval_non_null(self, op1_val: str, op2_val: Union[int, str]) -> str:
+    def eval_non_null(self, op1_val: str, op2_val: int | str) -> str:
         """
         Return the result of evaluating the expression on two int/float operands
         """

pixeltable/func/query_template_function.py CHANGED Viewed

@@ -2,7 +2,7 @@ from __future__ import annotations
 import inspect
 from functools import reduce
-from typing import TYPE_CHECKING, Any, Callable, Iterable, Optional, Union, overload
+from typing import TYPE_CHECKING, Any, Callable, Iterable, Optional, overload
 from pixeltable import catalog, exceptions as excs, exprs, func, type_system as ts
@@ -129,7 +129,7 @@ def retrieval_udf(
     table: catalog.Table,
     name: Optional[str] = None,
     description: Optional[str] = None,
-    parameters: Optional[Iterable[Union[str, exprs.ColumnRef]]] = None,
+    parameters: Optional[Iterable[str | exprs.ColumnRef]] = None,
     limit: Optional[int] = 10,
 ) -> func.QueryTemplateFunction:
     """

pixeltable/func/signature.py CHANGED Viewed

@@ -84,8 +84,28 @@ class Signature:
     """
     SPECIAL_PARAM_NAMES: ClassVar[list[str]] = ['group_by', 'order_by']
-    def __init__(self, return_type: ts.ColumnType, parameters: list[Parameter], is_batched: bool = False):
+    SYSTEM_PARAM_NAMES: ClassVar[list[str]] = ['_runtime_ctx']
+    return_type: ts.ColumnType
+    is_batched: bool
+    parameters: dict[str, Parameter]  # name -> Parameter
+    parameters_by_pos: list[Parameter]  # ordered by position in the signature
+    constant_parameters: list[Parameter]  # parameters that are not batched
+    batched_parameters: list[Parameter]  # parameters that are batched
+    required_parameters: list[Parameter]  # parameters that do not have a default value
+    # the names of recognized system parameters in the signature; these are excluded from self.parameters
+    system_parameters: list[str]
+    py_signature: inspect.Signature
+    def __init__(
+        self,
+        return_type: ts.ColumnType,
+        parameters: list[Parameter],
+        is_batched: bool = False,
+        system_parameters: Optional[list[str]] = None,
+    ):
         assert isinstance(return_type, ts.ColumnType)
         self.return_type = return_type
         self.is_batched = is_batched
@@ -95,6 +115,7 @@ class Signature:
         self.constant_parameters = [p for p in parameters if not p.is_batched]
         self.batched_parameters = [p for p in parameters if p.is_batched]
         self.required_parameters = [p for p in parameters if not p.has_default()]
+        self.system_parameters = system_parameters if system_parameters is not None else []
         self.py_signature = inspect.Signature([p.to_py_param() for p in self.parameters_by_pos])
     def get_return_type(self) -> ts.ColumnType:
@@ -237,6 +258,7 @@ class Signature:
         type_substitutions: Optional[dict] = None,
         is_cls_method: bool = False,
     ) -> list[Parameter]:
+        """Ignores parameters starting with '_'."""
         from pixeltable import exprs
         assert (py_fn is None) != (py_params is None)
@@ -251,6 +273,10 @@ class Signature:
         for idx, param in enumerate(py_params):
             if is_cls_method and idx == 0:
                 continue  # skip 'self' or 'cls' parameter
+            if param.name in cls.SYSTEM_PARAM_NAMES:
+                continue  # skip system parameters
+            if param.name.startswith('_'):
+                raise excs.Error(f"{param.name!r}: parameters starting with '_' are reserved")
             if param.name in cls.SPECIAL_PARAM_NAMES:
                 raise excs.Error(f'{param.name!r} is a reserved parameter name')
             if param.kind in (inspect.Parameter.VAR_POSITIONAL, inspect.Parameter.VAR_KEYWORD):
@@ -308,5 +334,6 @@ class Signature:
                 raise excs.Error('Cannot infer pixeltable return type')
         else:
             _, return_is_batched = cls._infer_type(sig.return_annotation)
+        system_params = [param_name for param_name in sig.parameters if param_name in cls.SYSTEM_PARAM_NAMES]
-        return Signature(return_type, parameters, return_is_batched)
+        return Signature(return_type, parameters, return_is_batched, system_parameters=system_params)

pixeltable/func/tools.py CHANGED Viewed

@@ -1,5 +1,5 @@
 import json
-from typing import TYPE_CHECKING, Any, Callable, Optional, TypeVar, Union
+from typing import TYPE_CHECKING, Any, Callable, Optional, TypeVar
 import pydantic
@@ -100,7 +100,7 @@ class Tools(pydantic.BaseModel):
         self,
         auto: bool = False,
         required: bool = False,
-        tool: Union[str, Function, None] = None,
+        tool: str | Function | None = None,
         parallel_tool_calls: bool = True,
     ) -> ToolChoice:
         if sum([auto, required, tool is not None]) != 1:

pixeltable/functions/anthropic.py CHANGED Viewed

@@ -38,6 +38,53 @@ def _anthropic_client() -> 'anthropic.AsyncAnthropic':
     return env.Env.get().get_client('anthropic')
+def _get_header_info(
+    headers: httpx.Headers,
+) -> tuple[
+    Optional[tuple[int, int, datetime.datetime]],
+    Optional[tuple[int, int, datetime.datetime]],
+    Optional[tuple[int, int, datetime.datetime]],
+]:
+    """Extract rate limit info from Anthropic API response headers."""
+    requests_limit_str = headers.get('anthropic-ratelimit-requests-limit')
+    requests_limit = int(requests_limit_str) if requests_limit_str is not None else None
+    requests_remaining_str = headers.get('anthropic-ratelimit-requests-remaining')
+    requests_remaining = int(requests_remaining_str) if requests_remaining_str is not None else None
+    requests_reset_str = headers.get('anthropic-ratelimit-requests-reset')
+    requests_reset = (
+        datetime.datetime.fromisoformat(requests_reset_str.replace('Z', '+00:00')) if requests_reset_str else None
+    )
+    requests_info = (requests_limit, requests_remaining, requests_reset) if requests_reset else None
+    input_tokens_limit_str = headers.get('anthropic-ratelimit-input-tokens-limit')
+    input_tokens_limit = int(input_tokens_limit_str) if input_tokens_limit_str is not None else None
+    input_tokens_remaining_str = headers.get('anthropic-ratelimit-input-tokens-remaining')
+    input_tokens_remaining = int(input_tokens_remaining_str) if input_tokens_remaining_str is not None else None
+    input_tokens_reset_str = headers.get('anthropic-ratelimit-input-tokens-reset')
+    input_tokens_reset = (
+        datetime.datetime.fromisoformat(input_tokens_reset_str.replace('Z', '+00:00'))
+        if input_tokens_reset_str
+        else None
+    )
+    input_tokens_info = (input_tokens_limit, input_tokens_remaining, input_tokens_reset) if input_tokens_reset else None
+    output_tokens_limit_str = headers.get('anthropic-ratelimit-output-tokens-limit')
+    output_tokens_limit = int(output_tokens_limit_str) if output_tokens_limit_str is not None else None
+    output_tokens_remaining_str = headers.get('anthropic-ratelimit-output-tokens-remaining')
+    output_tokens_remaining = int(output_tokens_remaining_str) if output_tokens_remaining_str is not None else None
+    output_tokens_reset_str = headers.get('anthropic-ratelimit-output-tokens-reset')
+    output_tokens_reset = (
+        datetime.datetime.fromisoformat(output_tokens_reset_str.replace('Z', '+00:00'))
+        if output_tokens_reset_str
+        else None
+    )
+    output_tokens_info = (
+        (output_tokens_limit, output_tokens_remaining, output_tokens_reset) if output_tokens_reset else None
+    )
+    return requests_info, input_tokens_info, output_tokens_info
 class AnthropicRateLimitsInfo(env.RateLimitsInfo):
     def __init__(self) -> None:
         super().__init__(self._get_request_resources)
@@ -51,6 +98,27 @@ class AnthropicRateLimitsInfo(env.RateLimitsInfo):
                 input_len += len(message['content'])
         return {'requests': 1, 'input_tokens': int(input_len / 4), 'output_tokens': max_tokens}
+    def record_exc(self, exc: Exception) -> None:
+        import anthropic
+        if (
+            not isinstance(exc, anthropic.APIError)
+            or not hasattr(exc, 'response')
+            or not hasattr(exc.response, 'headers')
+        ):
+            return
+        requests_info, input_tokens_info, output_tokens_info = _get_header_info(exc.response.headers)
+        _logger.debug(
+            f'record_exc(): requests_info={requests_info} input_tokens_info={input_tokens_info} '
+            f'output_tokens_info={output_tokens_info}'
+        )
+        self.record(requests=requests_info, input_tokens=input_tokens_info, output_tokens=output_tokens_info)
+        self.has_exc = True
+        retry_after_str = exc.response.headers.get('retry-after')
+        if retry_after_str is not None:
+            _logger.debug(f'retry-after: {retry_after_str}')
     def get_retry_delay(self, exc: Exception) -> Optional[float]:
         import anthropic
@@ -77,6 +145,7 @@ async def messages(
     model_kwargs: Optional[dict[str, Any]] = None,
     tools: Optional[list[dict[str, Any]]] = None,
     tool_choice: Optional[dict[str, Any]] = None,
+    _runtime_ctx: Optional[env.RuntimeCtx] = None,
 ) -> dict:
     """
     Create a Message.
@@ -151,32 +220,13 @@ async def messages(
         messages=cast(Iterable[MessageParam], messages), model=model, max_tokens=max_tokens, **model_kwargs
     )
-    requests_limit_str = result.headers.get('anthropic-ratelimit-requests-limit')
-    requests_limit = int(requests_limit_str) if requests_limit_str is not None else None
-    requests_remaining_str = result.headers.get('anthropic-ratelimit-requests-remaining')
-    requests_remaining = int(requests_remaining_str) if requests_remaining_str is not None else None
-    requests_reset_str = result.headers.get('anthropic-ratelimit-requests-reset')
-    requests_reset = datetime.datetime.fromisoformat(requests_reset_str.replace('Z', '+00:00'))
-    input_tokens_limit_str = result.headers.get('anthropic-ratelimit-input-tokens-limit')
-    input_tokens_limit = int(input_tokens_limit_str) if input_tokens_limit_str is not None else None
-    input_tokens_remaining_str = result.headers.get('anthropic-ratelimit-input-tokens-remaining')
-    input_tokens_remaining = int(input_tokens_remaining_str) if input_tokens_remaining_str is not None else None
-    input_tokens_reset_str = result.headers.get('anthropic-ratelimit-input-tokens-reset')
-    input_tokens_reset = datetime.datetime.fromisoformat(input_tokens_reset_str.replace('Z', '+00:00'))
-    output_tokens_limit_str = result.headers.get('anthropic-ratelimit-output-tokens-limit')
-    output_tokens_limit = int(output_tokens_limit_str) if output_tokens_limit_str is not None else None
-    output_tokens_remaining_str = result.headers.get('anthropic-ratelimit-output-tokens-remaining')
-    output_tokens_remaining = int(output_tokens_remaining_str) if output_tokens_remaining_str is not None else None
-    output_tokens_reset_str = result.headers.get('anthropic-ratelimit-output-tokens-reset')
-    output_tokens_reset = datetime.datetime.fromisoformat(output_tokens_reset_str.replace('Z', '+00:00'))
-    retry_after_str = result.headers.get('retry-after')
-    if retry_after_str is not None:
-        _logger.debug(f'retry-after: {retry_after_str}')
+    requests_info, input_tokens_info, output_tokens_info = _get_header_info(result.headers)
+    # retry_after_str = result.headers.get('retry-after')
+    # if retry_after_str is not None:
+    #     _logger.debug(f'retry-after: {retry_after_str}')
+    is_retry = _runtime_ctx is not None and _runtime_ctx.is_retry
     rate_limits_info.record(
-        requests=(requests_limit, requests_remaining, requests_reset),
-        input_tokens=(input_tokens_limit, input_tokens_remaining, input_tokens_reset),
-        output_tokens=(output_tokens_limit, output_tokens_remaining, output_tokens_reset),
+        requests=requests_info, input_tokens=input_tokens_info, output_tokens=output_tokens_info, reset_exc=is_retry
     )
     result_dict = json.loads(result.text)

pixeltable/functions/globals.py CHANGED Viewed

@@ -1,6 +1,6 @@
 import builtins
 import typing
-from typing import Any, Callable, Optional, Union
+from typing import Any, Callable, Optional
 import sqlalchemy as sql
@@ -11,7 +11,7 @@ from typing import _GenericAlias  # type: ignore[attr-defined]  # isort: skip
 # TODO: remove and replace calls with astype()
-def cast(expr: exprs.Expr, target_type: Union[ts.ColumnType, type, _GenericAlias]) -> exprs.Expr:
+def cast(expr: exprs.Expr, target_type: ts.ColumnType | type | _GenericAlias) -> exprs.Expr:
     expr.col_type = ts.ColumnType.normalize_type(target_type)
     return expr

pixeltable/functions/llama_cpp.py CHANGED Viewed

@@ -93,10 +93,18 @@ def _lookup_pretrained_model(repo_id: str, filename: Optional[str], n_gpu_layers
     return _model_cache[key]
-_model_cache: dict[tuple[str, str, int], Any] = {}
+_model_cache: dict[tuple[str, str, int], 'llama_cpp.Llama'] = {}
 _IS_GPU_AVAILABLE: Optional[bool] = None
+def cleanup() -> None:
+    for model in _model_cache.values():
+        if model._sampler is not None:
+            model._sampler.close()
+        model.close()
+    _model_cache.clear()
 __all__ = local_public_names(__name__)

pixeltable/functions/openai.py CHANGED Viewed

@@ -91,6 +91,49 @@ def _rate_limits_pool(model: str) -> str:
     return f'rate-limits:openai:{model}'
+# RE pattern for duration in '*-reset' headers;
+# examples: 1d2h3ms, 4m5.6s; # fractional seconds can be reported as 0.5s or 500ms
+_header_duration_pattern = re.compile(r'(?:(\d+)d)?(?:(\d+)h)?(?:(\d+)ms)|(?:(\d+)m)?(?:([\d.]+)s)?')
+def _parse_header_duration(duration_str: str) -> datetime.timedelta:
+    match = _header_duration_pattern.match(duration_str)
+    if not match:
+        raise ValueError(f'Invalid duration format: {duration_str}')
+    days = int(match.group(1) or 0)
+    hours = int(match.group(2) or 0)
+    milliseconds = int(match.group(3) or 0)
+    minutes = int(match.group(4) or 0)
+    seconds = float(match.group(5) or 0)
+    return datetime.timedelta(days=days, hours=hours, minutes=minutes, seconds=seconds, milliseconds=milliseconds)
+def _get_header_info(
+    headers: httpx.Headers,
+) -> tuple[Optional[tuple[int, int, datetime.datetime]], Optional[tuple[int, int, datetime.datetime]]]:
+    now = datetime.datetime.now(tz=datetime.timezone.utc)
+    requests_limit_str = headers.get('x-ratelimit-limit-requests')
+    requests_limit = int(requests_limit_str) if requests_limit_str is not None else None
+    requests_remaining_str = headers.get('x-ratelimit-remaining-requests')
+    requests_remaining = int(requests_remaining_str) if requests_remaining_str is not None else None
+    requests_reset_str = headers.get('x-ratelimit-reset-requests', '5s')  # Default to 5 seconds
+    requests_reset_ts = now + _parse_header_duration(requests_reset_str)
+    requests_info = (requests_limit, requests_remaining, requests_reset_ts)
+    tokens_limit_str = headers.get('x-ratelimit-limit-tokens')
+    tokens_limit = int(tokens_limit_str) if tokens_limit_str is not None else None
+    tokens_remaining_str = headers.get('x-ratelimit-remaining-tokens')
+    tokens_remaining = int(tokens_remaining_str) if tokens_remaining_str is not None else None
+    tokens_reset_str = headers.get('x-ratelimit-reset-tokens', '5s')  # Default to 5 seconds
+    tokens_reset_ts = now + _parse_header_duration(tokens_reset_str)
+    tokens_info = (tokens_limit, tokens_remaining, tokens_reset_ts)
+    return requests_info, tokens_info
 class OpenAIRateLimitsInfo(env.RateLimitsInfo):
     retryable_errors: tuple[Type[Exception], ...]
@@ -111,61 +154,24 @@ class OpenAIRateLimitsInfo(env.RateLimitsInfo):
             openai.InternalServerError,
         )
+    def record_exc(self, exc: Exception) -> None:
+        import openai
+        _ = isinstance(exc, openai.APIError)
+        if not isinstance(exc, openai.APIError) or not hasattr(exc, 'response') or not hasattr(exc.response, 'headers'):
+            return
+        requests_info, tokens_info = _get_header_info(exc.response.headers)
+        _logger.debug(f'record_exc(): requests_info={requests_info} tokens_info={tokens_info}')
+        self.record(requests=requests_info, tokens=tokens_info)
+        self.has_exc = True
     def get_retry_delay(self, exc: Exception) -> Optional[float]:
         import openai
         if not isinstance(exc, self.retryable_errors):
             return None
         assert isinstance(exc, openai.APIError)
-        return 1.0
-# RE pattern for duration in '*-reset' headers;
-# examples: 1d2h3ms, 4m5.6s; # fractional seconds can be reported as 0.5s or 500ms
-_header_duration_pattern = re.compile(r'(?:(\d+)d)?(?:(\d+)h)?(?:(\d+)ms)|(?:(\d+)m)?(?:([\d.]+)s)?')
-def _parse_header_duration(duration_str: str) -> datetime.timedelta:
-    match = _header_duration_pattern.match(duration_str)
-    if not match:
-        raise ValueError(f'Invalid duration format: {duration_str}')
-    days = int(match.group(1) or 0)
-    hours = int(match.group(2) or 0)
-    milliseconds = int(match.group(3) or 0)
-    minutes = int(match.group(4) or 0)
-    seconds = float(match.group(5) or 0)
-    return datetime.timedelta(days=days, hours=hours, minutes=minutes, seconds=seconds, milliseconds=milliseconds)
-def _get_header_info(
-    headers: httpx.Headers, *, requests: bool = True, tokens: bool = True
-) -> tuple[Optional[tuple[int, int, datetime.datetime]], Optional[tuple[int, int, datetime.datetime]]]:
-    assert requests or tokens
-    now = datetime.datetime.now(tz=datetime.timezone.utc)
-    requests_info: Optional[tuple[int, int, datetime.datetime]] = None
-    if requests:
-        requests_limit_str = headers.get('x-ratelimit-limit-requests')
-        requests_limit = int(requests_limit_str) if requests_limit_str is not None else None
-        requests_remaining_str = headers.get('x-ratelimit-remaining-requests')
-        requests_remaining = int(requests_remaining_str) if requests_remaining_str is not None else None
-        requests_reset_str = headers.get('x-ratelimit-reset-requests', '5s')  # Default to 5 seconds
-        requests_reset_ts = now + _parse_header_duration(requests_reset_str)
-        requests_info = (requests_limit, requests_remaining, requests_reset_ts)
-    tokens_info: Optional[tuple[int, int, datetime.datetime]] = None
-    if tokens:
-        tokens_limit_str = headers.get('x-ratelimit-limit-tokens')
-        tokens_limit = int(tokens_limit_str) if tokens_limit_str is not None else None
-        tokens_remaining_str = headers.get('x-ratelimit-remaining-tokens')
-        tokens_remaining = int(tokens_remaining_str) if tokens_remaining_str is not None else None
-        tokens_reset_str = headers.get('x-ratelimit-reset-tokens', '5s')  # Default to 5 seconds
-        tokens_reset_ts = now + _parse_header_duration(tokens_reset_str)
-        tokens_info = (tokens_limit, tokens_remaining, tokens_reset_ts)
-    return requests_info, tokens_info
+        return super().get_retry_delay(exc)
 #####################################
@@ -355,6 +361,7 @@ async def chat_completions(
     model_kwargs: Optional[dict[str, Any]] = None,
     tools: Optional[list[dict[str, Any]]] = None,
     tool_choice: Optional[dict[str, Any]] = None,
+    _runtime_ctx: Optional[env.RuntimeCtx] = None,
 ) -> dict:
     """
     Creates a model response for the given chat conversation.
@@ -418,7 +425,8 @@ async def chat_completions(
     )
     requests_info, tokens_info = _get_header_info(result.headers)
-    rate_limits_info.record(requests=requests_info, tokens=tokens_info)
+    is_retry = _runtime_ctx is not None and _runtime_ctx.is_retry
+    rate_limits_info.record(requests=requests_info, tokens=tokens_info, reset_exc=is_retry)
     return json.loads(result.text)
@@ -461,7 +469,12 @@ def _vision_get_request_resources(
 @pxt.udf
 async def vision(
-    prompt: str, image: PIL.Image.Image, *, model: str, model_kwargs: Optional[dict[str, Any]] = None
+    prompt: str,
+    image: PIL.Image.Image,
+    *,
+    model: str,
+    model_kwargs: Optional[dict[str, Any]] = None,
+    _runtime_ctx: Optional[env.RuntimeCtx] = None,
 ) -> str:
     """
     Analyzes an image with the OpenAI vision capability. This is a convenience function that takes an image and
@@ -521,8 +534,10 @@ async def vision(
         **model_kwargs,
     )
+    # _logger.debug(f'vision(): headers={result.headers}')
     requests_info, tokens_info = _get_header_info(result.headers)
-    rate_limits_info.record(requests=requests_info, tokens=tokens_info)
+    is_retry = _runtime_ctx is not None and _runtime_ctx.is_retry
+    rate_limits_info.record(requests=requests_info, tokens=tokens_info, reset_exc=is_retry)
     result = json.loads(result.text)
     return result['choices'][0]['message']['content']
@@ -545,7 +560,11 @@ def _embeddings_get_request_resources(input: list[str]) -> dict[str, int]:
 @pxt.udf(batch_size=32)
 async def embeddings(
-    input: Batch[str], *, model: str, model_kwargs: Optional[dict[str, Any]] = None
+    input: Batch[str],
+    *,
+    model: str,
+    model_kwargs: Optional[dict[str, Any]] = None,
+    _runtime_ctx: Optional[env.RuntimeCtx] = None,
 ) -> Batch[pxt.Array[(None,), pxt.Float]]:
     """
     Creates an embedding vector representing the input text.
@@ -592,7 +611,8 @@ async def embeddings(
         input=input, model=model, encoding_format='float', **model_kwargs
     )
     requests_info, tokens_info = _get_header_info(result.headers)
-    rate_limits_info.record(requests=requests_info, tokens=tokens_info)
+    is_retry = _runtime_ctx is not None and _runtime_ctx.is_retry
+    rate_limits_info.record(requests=requests_info, tokens=tokens_info, reset_exc=is_retry)
     return [np.array(data['embedding'], dtype=np.float64) for data in json.loads(result.content)['data']]

pixeltable/functions/video.py CHANGED Viewed

@@ -12,7 +12,7 @@ import pixeltable as pxt
 from pixeltable import env
 from pixeltable.utils.code import local_public_names
-_format_defaults = {  # format -> (codec, ext)
+_format_defaults: dict[str, tuple[str, str]] = {  # format -> (codec, ext)
     'wav': ('pcm_s16le', 'wav'),
     'mp3': ('libmp3lame', 'mp3'),
     'flac': ('flac', 'flac'),
@@ -40,6 +40,59 @@ _format_defaults = {  # format -> (codec, ext)
 class make_video(pxt.Aggregator):
     """
     Aggregator that creates a video from a sequence of images.
+    Creates an H.264 encoded MP4 video from a sequence of PIL Image frames. This aggregator requires the input
+    frames to be ordered (typically by frame position) and is commonly used with `FrameIterator` views to
+    reconstruct videos from processed frames.
+    Args:
+        fps: Frames per second for the output video. Default is 25. This is set when the aggregator is created.
+    Returns:
+    - A `pxt.Video` containing the created video file path.
+    Examples:
+        Create a video from frames extracted using FrameIterator:
+        >>> import pixeltable as pxt
+        >>> from pixeltable.functions.video import make_video
+        >>> from pixeltable.iterators import FrameIterator
+        >>>
+        >>> # Create base table for videos
+        >>> videos_table = pxt.create_table('videos', {'video': pxt.Video})
+        >>>
+        >>> # Create view to extract frames
+        >>> frames_view = pxt.create_view(
+        ...     'video_frames',
+        ...     videos_table,
+        ...     iterator=FrameIterator.create(video=videos_table.video, fps=1)
+        ... )
+        >>>
+        >>> # Reconstruct video from frames
+        >>> frames_view.group_by(videos_table).select(
+        ...     make_video(frames_view.pos, frames_view.frame)
+        ... ).show()
+        Apply transformations to frames before creating a video:
+        >>> # Add computed column with transformed frames
+        >>> frames_view.add_computed_column(
+        ...     rotated_frame=frames_view.frame.rotate(30),
+        ...     stored=True
+        ... )
+        >>>
+        >>> # Create video from transformed frames
+        >>> frames_view.group_by(videos_table).select(
+        ...     make_video(frames_view.pos, frames_view.rotated_frame)
+        ... ).show()
+        Compare multiple processed versions side-by-side:
+        >>> frames_view.group_by(videos_table).select(
+        ...     make_video(frames_view.pos, frames_view.frame),
+        ...     make_video(frames_view.pos, frames_view.rotated_frame)
+        ... ).show()
     """
     container: Optional[av.container.OutputContainer]

pixeltable 0.4.6__py3-none-any.whl → 0.4.7__py3-none-any.whl

Potentially problematic release.

pixeltable 0.4.6py3-none-any.whl → 0.4.7py3-none-any.whl