PyPI - pixeltable - Versions diffs - 0.4.6__py3-none-any.whl → 0.4.8__py3-none-any.whl - Mend

pixeltable 0.4.6py3-none-any.whl → 0.4.8py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.

This version of pixeltable might be problematic. Click here for more details.

Files changed (69) hide show

pixeltable/__init__.py +4 -2
pixeltable/catalog/__init__.py +1 -1
pixeltable/catalog/catalog.py +7 -9
pixeltable/catalog/column.py +49 -0
pixeltable/catalog/insertable_table.py +0 -7
pixeltable/catalog/schema_object.py +1 -14
pixeltable/catalog/table.py +180 -67
pixeltable/catalog/table_version.py +42 -146
pixeltable/catalog/table_version_path.py +6 -5
pixeltable/catalog/view.py +2 -1
pixeltable/config.py +24 -9
pixeltable/dataframe.py +5 -6
pixeltable/env.py +113 -21
pixeltable/exec/aggregation_node.py +1 -1
pixeltable/exec/cache_prefetch_node.py +4 -3
pixeltable/exec/exec_node.py +0 -8
pixeltable/exec/expr_eval/expr_eval_node.py +2 -2
pixeltable/exec/expr_eval/globals.py +1 -0
pixeltable/exec/expr_eval/schedulers.py +52 -19
pixeltable/exec/in_memory_data_node.py +2 -3
pixeltable/exprs/array_slice.py +2 -2
pixeltable/exprs/data_row.py +15 -2
pixeltable/exprs/expr.py +9 -9
pixeltable/exprs/function_call.py +61 -23
pixeltable/exprs/globals.py +1 -2
pixeltable/exprs/json_path.py +3 -3
pixeltable/exprs/row_builder.py +25 -21
pixeltable/exprs/string_op.py +3 -3
pixeltable/func/expr_template_function.py +6 -3
pixeltable/func/query_template_function.py +2 -2
pixeltable/func/signature.py +30 -3
pixeltable/func/tools.py +2 -2
pixeltable/functions/anthropic.py +76 -27
pixeltable/functions/deepseek.py +5 -1
pixeltable/functions/gemini.py +11 -2
pixeltable/functions/globals.py +2 -2
pixeltable/functions/huggingface.py +6 -12
pixeltable/functions/llama_cpp.py +9 -1
pixeltable/functions/openai.py +76 -55
pixeltable/functions/video.py +59 -6
pixeltable/functions/vision.py +2 -2
pixeltable/globals.py +86 -13
pixeltable/io/datarows.py +3 -3
pixeltable/io/fiftyone.py +7 -7
pixeltable/io/globals.py +3 -3
pixeltable/io/hf_datasets.py +4 -4
pixeltable/io/label_studio.py +2 -1
pixeltable/io/pandas.py +6 -6
pixeltable/io/parquet.py +3 -3
pixeltable/io/table_data_conduit.py +2 -2
pixeltable/io/utils.py +2 -2
pixeltable/iterators/audio.py +3 -2
pixeltable/iterators/document.py +2 -8
pixeltable/iterators/video.py +49 -9
pixeltable/plan.py +0 -16
pixeltable/share/packager.py +51 -42
pixeltable/share/publish.py +134 -7
pixeltable/store.py +5 -25
pixeltable/type_system.py +5 -8
pixeltable/utils/__init__.py +2 -2
pixeltable/utils/arrow.py +5 -5
pixeltable/utils/description_helper.py +3 -3
pixeltable/utils/iceberg.py +1 -2
pixeltable/utils/media_store.py +131 -66
{pixeltable-0.4.6.dist-info → pixeltable-0.4.8.dist-info}/METADATA +238 -122
{pixeltable-0.4.6.dist-info → pixeltable-0.4.8.dist-info}/RECORD +69 -69
{pixeltable-0.4.6.dist-info → pixeltable-0.4.8.dist-info}/WHEEL +0 -0
{pixeltable-0.4.6.dist-info → pixeltable-0.4.8.dist-info}/entry_points.txt +0 -0
{pixeltable-0.4.6.dist-info → pixeltable-0.4.8.dist-info}/licenses/LICENSE +0 -0

pixeltable/exprs/function_call.py CHANGED Viewed

@@ -4,7 +4,7 @@ import inspect
 import logging
 import sys
 from textwrap import dedent
-from typing import Any, Optional, Sequence, Union
+from typing import Any, Optional, Sequence
 import sqlalchemy as sql
@@ -36,7 +36,7 @@ class FunctionCall(Expr):
     # - a component index, if the parameter is a non-variadic parameter
     # - a list of component indices, if the parameter is a variadic positional parameter
     # - a dict mapping keyword names to component indices, if the parameter is a variadic keyword parameter
-    bound_idxs: dict[str, Union[int, list[int], dict[str, int]]]
+    bound_idxs: dict[str, int | list[int] | dict[str, int]]
     return_type: ts.ColumnType
     group_by_start_idx: int
@@ -115,6 +115,7 @@ class FunctionCall(Expr):
         self._validation_error = validation_error
         if validation_error is not None:
+            self.bound_idxs = {}
             self.resource_pool = None
             return
@@ -300,8 +301,16 @@ class FunctionCall(Expr):
         """
         res = super().substitute(spec)
         assert res is self
-        self.return_type = self.fn.call_return_type(self.bound_args)
-        self.col_type = self.return_type
+        if self.is_valid:
+            # If this FunctionCall is valid, re-evaluate the call_return_type of the substituted expression. If the
+            # FunctionCall is not valid, it isn't safe to do this. (Really we should be asserting that it *is* valid,
+            # but we still need to be able to do substitutions on invalid FunctionCalls, because loading an
+            # EmbeddingIndex from the db involves reconstructing the requisite (substituted) FunctionCalls. We could
+            # fix this by separately persisting the FunctionCall instances held by EmbeddingIndex to the db. That's
+            # probably a good idea, but it's also probably not urgent, since it only affects Functions that have a
+            # conditional_return_type implemented.)
+            self.return_type = self.fn.call_return_type(self.bound_args)
+            self.col_type = self.return_type
         return self
     def update(self, data_row: DataRow) -> None:
@@ -480,25 +489,54 @@ class FunctionCall(Expr):
             ).strip()
         else:
             # Evaluate the call_return_type as defined in the current codebase.
-            call_return_type = resolved_fn.call_return_type(bound_args)
-            if return_type is None:
-                # Schema versions prior to 25 did not store the return_type in metadata, and there is no obvious way to
-                # infer it during DB migration, so we might encounter a stored return_type of None. In that case, we use
-                # the call_return_type that we just inferred (which matches the deserialization behavior prior to
-                # version 25).
-                return_type = call_return_type
-            elif not return_type.is_supertype_of(call_return_type, ignore_nullable=True):
-                # There is a return_type stored in metadata (schema version >= 25),
-                # and the stored return_type of the UDF call doesn't match the column type of the FunctionCall.
-                validation_error = dedent(
-                    f"""
-                    The return type stored in the database for a UDF call to {fn.self_path!r} no longer
-                    matches its return type as currently defined in the code. This probably means that the
-                    code for {fn.self_path!r} has changed in a backward-incompatible way.
-                    Return type of UDF call in the database: {return_type}
-                    Return type of UDF as currently defined in code: {call_return_type}
-                    """
-                ).strip()
+            call_return_type: Optional[ts.ColumnType] = None
+            if isinstance(resolved_fn, func.ExprTemplateFunction) and not resolved_fn.template.expr.is_valid:
+                # The FunctionCall is based on an ExprTemplateFunction, but the template expression is not valid
+                # (because it in turn contains an invalid FunctionCall). In this case, inherit the validation error
+                # from the template expression.
+                validation_error = resolved_fn.template.expr.validation_error
+            else:
+                try:
+                    call_return_type = resolved_fn.call_return_type(bound_args)
+                except ImportError as exc:
+                    validation_error = dedent(
+                        f"""
+                        A UDF call to {fn.self_path!r} could not be fully resolved, because a module required
+                        by the UDF could not be imported:
+                        {exc}
+                        """
+                    )
+            assert (call_return_type is None) != (validation_error is None)
+            if call_return_type is None and return_type is None:
+                # Schema versions prior to 25 did not store the return_type in metadata, and there is no obvious
+                # way to infer it during DB migration, so we might encounter a stored return_type of None. If the
+                # resolution of call_return_type also fails, then we're out of luck; we have no choice but to
+                # fail-fast.
+                raise excs.Error(validation_error)
+            if call_return_type is not None:
+                # call_return_type resolution succeeded.
+                if return_type is None:
+                    # Schema versions prior to 25 did not store the return_type in metadata (as mentioned above), so
+                    # fall back on the call_return_type.
+                    return_type = call_return_type
+                elif not return_type.is_supertype_of(call_return_type, ignore_nullable=True):
+                    # There is a return_type stored in metadata (schema version >= 25),
+                    # and the stored return_type of the UDF call doesn't match the column type of the FunctionCall.
+                    validation_error = dedent(
+                        f"""
+                        The return type stored in the database for a UDF call to {fn.self_path!r} no longer
+                        matches its return type as currently defined in the code. This probably means that the
+                        code for {fn.self_path!r} has changed in a backward-incompatible way.
+                        Return type of UDF call in the database: {return_type}
+                        Return type of UDF as currently defined in code: {call_return_type}
+                        """
+                    ).strip()
+        assert return_type is not None  # Guaranteed by the above logic.
         fn_call = cls(
             resolved_fn,

pixeltable/exprs/globals.py CHANGED Viewed

@@ -2,10 +2,9 @@ from __future__ import annotations
 import datetime
 import enum
-from typing import Union
 # Python types corresponding to our literal types
-LiteralPythonTypes = Union[str, int, float, bool, datetime.datetime, datetime.date]
+LiteralPythonTypes = str | int | float | bool | datetime.datetime | datetime.date
 def print_slice(s: slice) -> str:

pixeltable/exprs/json_path.py CHANGED Viewed

@@ -1,6 +1,6 @@
 from __future__ import annotations
-from typing import Any, Optional, Union
+from typing import Any, Optional
 import jmespath
 import sqlalchemy as sql
@@ -18,7 +18,7 @@ from .sql_element_cache import SqlElementCache
 class JsonPath(Expr):
     def __init__(
-        self, anchor: Optional[Expr], path_elements: Optional[list[Union[str, int, slice]]] = None, scope_idx: int = 0
+        self, anchor: Optional[Expr], path_elements: Optional[list[str | int | slice]] = None, scope_idx: int = 0
     ) -> None:
         """
         anchor can be None, in which case this is a relative JsonPath and the anchor is set later via set_anchor().
@@ -30,7 +30,7 @@ class JsonPath(Expr):
         super().__init__(ts.JsonType(nullable=True))  # JsonPath expressions are always nullable
         if anchor is not None:
             self.components = [anchor]
-        self.path_elements: list[Union[str, int, slice]] = path_elements
+        self.path_elements: list[str | int | slice] = path_elements
         self.compiled_path = jmespath.compile(self._json_path()) if len(path_elements) > 0 else None
         self.scope_idx = scope_idx
         # NOTE: the _create_id() result will change if set_anchor() gets called;

pixeltable/exprs/row_builder.py CHANGED Viewed

@@ -3,7 +3,7 @@ from __future__ import annotations
 import sys
 import time
 from dataclasses import dataclass
-from typing import Any, Iterable, Optional, Sequence
+from typing import Any, Iterable, NamedTuple, Optional, Sequence
 from uuid import UUID
 import numpy as np
@@ -34,8 +34,7 @@ class ExecProfile:
             )
-@dataclass
-class ColumnSlotIdx:
+class ColumnSlotIdx(NamedTuple):
     """Info for how to locate materialized column in DataRow
     TODO: can this be integrated into RowBuilder directly?
     """
@@ -87,6 +86,8 @@ class RowBuilder:
     img_slot_idxs: list[int]  # Indices of image slots
     media_slot_idxs: list[int]  # Indices of non-image media slots
     array_slot_idxs: list[int]  # Indices of array slots
+    stored_img_cols: list[exprs.ColumnSlotIdx]
+    stored_media_cols: list[exprs.ColumnSlotIdx]
     @dataclass
     class EvalCtx:
@@ -113,6 +114,8 @@ class RowBuilder:
         """
         self.unique_exprs: ExprSet[Expr] = ExprSet()  # dependencies precede their dependents
         self.next_slot_idx = 0
+        self.stored_img_cols = []
+        self.stored_media_cols = []
         # record input and output exprs; make copies to avoid reusing execution state
         unique_input_exprs = [self._record_unique_expr(e.copy(), recursive=False) for e in input_exprs]
@@ -127,7 +130,7 @@ class RowBuilder:
         )
         # if init(columns):
-        # - we are creating table rows and need to record columns for create_table_row()
+        # - we are creating table rows and need to record columns for create_store_table_row()
         # - output_exprs materialize those columns
         # - input_exprs are ColumnRefs of the non-computed columns (ie, what needs to be provided as input)
         # - media validation:
@@ -247,11 +250,13 @@ class RowBuilder:
     def add_table_column(self, col: catalog.Column, slot_idx: int) -> None:
         """Record a column that is part of the table row"""
         assert self.tbl is not None
-        self.table_columns.append(ColumnSlotIdx(col, slot_idx))
-    def output_slot_idxs(self) -> list[ColumnSlotIdx]:
-        """Return ColumnSlotIdx for output columns"""
-        return self.table_columns
+        assert col.is_stored
+        info = ColumnSlotIdx(col, slot_idx)
+        self.table_columns.append(info)
+        if col.col_type.is_media_type():
+            self.stored_media_cols.append(info)
+            if col.col_type.is_image_type():
+                self.stored_img_cols.append(info)
     @property
     def num_materialized(self) -> int:
@@ -445,20 +450,20 @@ class RowBuilder:
                         expr, f'expression {expr}', data_row.get_exc(expr.slot_idx), exc_tb, input_vals, 0
                     ) from exc
-    def create_table_row(
+    def create_store_table_row(
         self, data_row: DataRow, cols_with_excs: Optional[set[int]], pk: tuple[int, ...]
     ) -> tuple[list[Any], int]:
-        """Create a table row from the slots that have an output column assigned
+        """Create a store table row from the slots that have an output column assigned
         Return tuple[list of row values in `self.table_columns` order, # of exceptions]
             This excludes system columns.
+            Row values are converted to their store type.
         """
         from pixeltable.exprs.column_property_ref import ColumnPropertyRef
         num_excs = 0
         table_row: list[Any] = list(pk)
-        for info in self.table_columns:
-            col, slot_idx = info.col, info.slot_idx
+        for col, slot_idx in self.table_columns:
             if data_row.has_exc(slot_idx):
                 exc = data_row.get_exc(slot_idx)
                 num_excs += 1
@@ -469,9 +474,11 @@ class RowBuilder:
                     # exceptions get stored in the errortype/-msg properties of the cellmd column
                     table_row.append(ColumnPropertyRef.create_cellmd_exc(exc))
             else:
-                if col.col_type.is_image_type() and data_row.file_urls[slot_idx] is None:
-                    # we have yet to store this image
-                    data_row.flush_img(slot_idx, col)
+                if col.col_type.is_media_type():
+                    if col.col_type.is_image_type() and data_row.file_urls[slot_idx] is None:
+                        # we have yet to store this image
+                        data_row.flush_img(slot_idx, col)
+                    data_row.move_tmp_media_file(slot_idx, col)
                 val = data_row.get_stored_val(slot_idx, col.get_sa_col_type())
                 table_row.append(val)
                 if col.stores_cellmd:
@@ -479,7 +486,7 @@ class RowBuilder:
         return table_row, num_excs
-    def store_column_names(self) -> tuple[list[str], dict[int, catalog.Column]]:
+    def store_column_names(self) -> list[str]:
         """
         Returns the list of store column names corresponding to the table_columns of this RowBuilder.
         The second tuple element of the return value is a dictionary containing all media columns in the
@@ -487,16 +494,13 @@ class RowBuilder:
         """
         assert self.tbl is not None, self.table_columns
         store_col_names: list[str] = [pk_col.name for pk_col in self.tbl.store_tbl.pk_columns()]
-        media_cols: dict[int, catalog.Column] = {}
         for col in self.table_columns:
-            if col.col.col_type.is_media_type():
-                media_cols[len(store_col_names)] = col.col
             store_col_names.append(col.col.store_name())
             if col.col.stores_cellmd:
                 store_col_names.append(col.col.cellmd_store_name())
-        return store_col_names, media_cols
+        return store_col_names
     def make_row(self) -> exprs.DataRow:
         """Creates a new DataRow with the current row_builder's configuration."""

pixeltable/exprs/string_op.py CHANGED Viewed

@@ -1,6 +1,6 @@
 from __future__ import annotations
-from typing import Any, Optional, Union
+from typing import Any, Optional
 import sqlalchemy as sql
@@ -76,7 +76,7 @@ class StringOp(Expr):
         op2_val = data_row[self._op2.slot_idx]
         data_row[self.slot_idx] = self.eval_nullable(op1_val, op2_val)
-    def eval_nullable(self, op1_val: Union[str, None], op2_val: Union[int, str, None]) -> Union[str, None]:
+    def eval_nullable(self, op1_val: str | None, op2_val: int | str | None) -> str | None:
         """
         Return the result of evaluating the expression on two nullable int/float operands,
         None is interpreted as SQL NULL
@@ -85,7 +85,7 @@ class StringOp(Expr):
             return None
         return self.eval_non_null(op1_val, op2_val)
-    def eval_non_null(self, op1_val: str, op2_val: Union[int, str]) -> str:
+    def eval_non_null(self, op1_val: str, op2_val: int | str) -> str:
         """
         Return the result of evaluating the expression on two int/float operands
         """

pixeltable/func/expr_template_function.py CHANGED Viewed

@@ -85,13 +85,16 @@ class ExprTemplateFunction(Function):
         conditional_return_type).
         """
         assert not self.is_polymorphic
-        template = self.template
         with_defaults = bound_args.copy()
         with_defaults.update(
-            {param_name: default for param_name, default in template.defaults.items() if param_name not in bound_args}
+            {
+                param_name: default
+                for param_name, default in self.template.defaults.items()
+                if param_name not in bound_args
+            }
         )
         substituted_expr = self.template.expr.copy().substitute(
-            {template.param_exprs[name]: expr for name, expr in with_defaults.items()}
+            {self.template.param_exprs[name]: expr for name, expr in with_defaults.items()}
         )
         return substituted_expr.col_type

pixeltable/func/query_template_function.py CHANGED Viewed

@@ -2,7 +2,7 @@ from __future__ import annotations
 import inspect
 from functools import reduce
-from typing import TYPE_CHECKING, Any, Callable, Iterable, Optional, Union, overload
+from typing import TYPE_CHECKING, Any, Callable, Iterable, Optional, overload
 from pixeltable import catalog, exceptions as excs, exprs, func, type_system as ts
@@ -129,7 +129,7 @@ def retrieval_udf(
     table: catalog.Table,
     name: Optional[str] = None,
     description: Optional[str] = None,
-    parameters: Optional[Iterable[Union[str, exprs.ColumnRef]]] = None,
+    parameters: Optional[Iterable[str | exprs.ColumnRef]] = None,
     limit: Optional[int] = 10,
 ) -> func.QueryTemplateFunction:
     """

pixeltable/func/signature.py CHANGED Viewed

@@ -84,8 +84,28 @@ class Signature:
     """
     SPECIAL_PARAM_NAMES: ClassVar[list[str]] = ['group_by', 'order_by']
-    def __init__(self, return_type: ts.ColumnType, parameters: list[Parameter], is_batched: bool = False):
+    SYSTEM_PARAM_NAMES: ClassVar[list[str]] = ['_runtime_ctx']
+    return_type: ts.ColumnType
+    is_batched: bool
+    parameters: dict[str, Parameter]  # name -> Parameter
+    parameters_by_pos: list[Parameter]  # ordered by position in the signature
+    constant_parameters: list[Parameter]  # parameters that are not batched
+    batched_parameters: list[Parameter]  # parameters that are batched
+    required_parameters: list[Parameter]  # parameters that do not have a default value
+    # the names of recognized system parameters in the signature; these are excluded from self.parameters
+    system_parameters: list[str]
+    py_signature: inspect.Signature
+    def __init__(
+        self,
+        return_type: ts.ColumnType,
+        parameters: list[Parameter],
+        is_batched: bool = False,
+        system_parameters: Optional[list[str]] = None,
+    ):
         assert isinstance(return_type, ts.ColumnType)
         self.return_type = return_type
         self.is_batched = is_batched
@@ -95,6 +115,7 @@ class Signature:
         self.constant_parameters = [p for p in parameters if not p.is_batched]
         self.batched_parameters = [p for p in parameters if p.is_batched]
         self.required_parameters = [p for p in parameters if not p.has_default()]
+        self.system_parameters = system_parameters if system_parameters is not None else []
         self.py_signature = inspect.Signature([p.to_py_param() for p in self.parameters_by_pos])
     def get_return_type(self) -> ts.ColumnType:
@@ -237,6 +258,7 @@ class Signature:
         type_substitutions: Optional[dict] = None,
         is_cls_method: bool = False,
     ) -> list[Parameter]:
+        """Ignores parameters starting with '_'."""
         from pixeltable import exprs
         assert (py_fn is None) != (py_params is None)
@@ -251,6 +273,10 @@ class Signature:
         for idx, param in enumerate(py_params):
             if is_cls_method and idx == 0:
                 continue  # skip 'self' or 'cls' parameter
+            if param.name in cls.SYSTEM_PARAM_NAMES:
+                continue  # skip system parameters
+            if param.name.startswith('_'):
+                raise excs.Error(f"{param.name!r}: parameters starting with '_' are reserved")
             if param.name in cls.SPECIAL_PARAM_NAMES:
                 raise excs.Error(f'{param.name!r} is a reserved parameter name')
             if param.kind in (inspect.Parameter.VAR_POSITIONAL, inspect.Parameter.VAR_KEYWORD):
@@ -308,5 +334,6 @@ class Signature:
                 raise excs.Error('Cannot infer pixeltable return type')
         else:
             _, return_is_batched = cls._infer_type(sig.return_annotation)
+        system_params = [param_name for param_name in sig.parameters if param_name in cls.SYSTEM_PARAM_NAMES]
-        return Signature(return_type, parameters, return_is_batched)
+        return Signature(return_type, parameters, return_is_batched, system_parameters=system_params)

pixeltable/func/tools.py CHANGED Viewed

@@ -1,5 +1,5 @@
 import json
-from typing import TYPE_CHECKING, Any, Callable, Optional, TypeVar, Union
+from typing import TYPE_CHECKING, Any, Callable, Optional, TypeVar
 import pydantic
@@ -100,7 +100,7 @@ class Tools(pydantic.BaseModel):
         self,
         auto: bool = False,
         required: bool = False,
-        tool: Union[str, Function, None] = None,
+        tool: str | Function | None = None,
         parallel_tool_calls: bool = True,
     ) -> ToolChoice:
         if sum([auto, required, tool is not None]) != 1:

pixeltable/functions/anthropic.py CHANGED Viewed

@@ -38,6 +38,53 @@ def _anthropic_client() -> 'anthropic.AsyncAnthropic':
     return env.Env.get().get_client('anthropic')
+def _get_header_info(
+    headers: httpx.Headers,
+) -> tuple[
+    Optional[tuple[int, int, datetime.datetime]],
+    Optional[tuple[int, int, datetime.datetime]],
+    Optional[tuple[int, int, datetime.datetime]],
+]:
+    """Extract rate limit info from Anthropic API response headers."""
+    requests_limit_str = headers.get('anthropic-ratelimit-requests-limit')
+    requests_limit = int(requests_limit_str) if requests_limit_str is not None else None
+    requests_remaining_str = headers.get('anthropic-ratelimit-requests-remaining')
+    requests_remaining = int(requests_remaining_str) if requests_remaining_str is not None else None
+    requests_reset_str = headers.get('anthropic-ratelimit-requests-reset')
+    requests_reset = (
+        datetime.datetime.fromisoformat(requests_reset_str.replace('Z', '+00:00')) if requests_reset_str else None
+    )
+    requests_info = (requests_limit, requests_remaining, requests_reset) if requests_reset else None
+    input_tokens_limit_str = headers.get('anthropic-ratelimit-input-tokens-limit')
+    input_tokens_limit = int(input_tokens_limit_str) if input_tokens_limit_str is not None else None
+    input_tokens_remaining_str = headers.get('anthropic-ratelimit-input-tokens-remaining')
+    input_tokens_remaining = int(input_tokens_remaining_str) if input_tokens_remaining_str is not None else None
+    input_tokens_reset_str = headers.get('anthropic-ratelimit-input-tokens-reset')
+    input_tokens_reset = (
+        datetime.datetime.fromisoformat(input_tokens_reset_str.replace('Z', '+00:00'))
+        if input_tokens_reset_str
+        else None
+    )
+    input_tokens_info = (input_tokens_limit, input_tokens_remaining, input_tokens_reset) if input_tokens_reset else None
+    output_tokens_limit_str = headers.get('anthropic-ratelimit-output-tokens-limit')
+    output_tokens_limit = int(output_tokens_limit_str) if output_tokens_limit_str is not None else None
+    output_tokens_remaining_str = headers.get('anthropic-ratelimit-output-tokens-remaining')
+    output_tokens_remaining = int(output_tokens_remaining_str) if output_tokens_remaining_str is not None else None
+    output_tokens_reset_str = headers.get('anthropic-ratelimit-output-tokens-reset')
+    output_tokens_reset = (
+        datetime.datetime.fromisoformat(output_tokens_reset_str.replace('Z', '+00:00'))
+        if output_tokens_reset_str
+        else None
+    )
+    output_tokens_info = (
+        (output_tokens_limit, output_tokens_remaining, output_tokens_reset) if output_tokens_reset else None
+    )
+    return requests_info, input_tokens_info, output_tokens_info
 class AnthropicRateLimitsInfo(env.RateLimitsInfo):
     def __init__(self) -> None:
         super().__init__(self._get_request_resources)
@@ -51,6 +98,27 @@ class AnthropicRateLimitsInfo(env.RateLimitsInfo):
                 input_len += len(message['content'])
         return {'requests': 1, 'input_tokens': int(input_len / 4), 'output_tokens': max_tokens}
+    def record_exc(self, exc: Exception) -> None:
+        import anthropic
+        if (
+            not isinstance(exc, anthropic.APIError)
+            or not hasattr(exc, 'response')
+            or not hasattr(exc.response, 'headers')
+        ):
+            return
+        requests_info, input_tokens_info, output_tokens_info = _get_header_info(exc.response.headers)
+        _logger.debug(
+            f'record_exc(): requests_info={requests_info} input_tokens_info={input_tokens_info} '
+            f'output_tokens_info={output_tokens_info}'
+        )
+        self.record(requests=requests_info, input_tokens=input_tokens_info, output_tokens=output_tokens_info)
+        self.has_exc = True
+        retry_after_str = exc.response.headers.get('retry-after')
+        if retry_after_str is not None:
+            _logger.debug(f'retry-after: {retry_after_str}')
     def get_retry_delay(self, exc: Exception) -> Optional[float]:
         import anthropic
@@ -64,8 +132,7 @@ class AnthropicRateLimitsInfo(env.RateLimitsInfo):
         should_retry_str = exc.response.headers.get('x-should-retry', '')
         if should_retry_str.lower() != 'true':
             return None
-        retry_after_str = exc.response.headers.get('retry-after', '1')
-        return int(retry_after_str)
+        return super().get_retry_delay(exc)
 @pxt.udf
@@ -77,6 +144,7 @@ async def messages(
     model_kwargs: Optional[dict[str, Any]] = None,
     tools: Optional[list[dict[str, Any]]] = None,
     tool_choice: Optional[dict[str, Any]] = None,
+    _runtime_ctx: Optional[env.RuntimeCtx] = None,
 ) -> dict:
     """
     Create a Message.
@@ -151,32 +219,13 @@ async def messages(
         messages=cast(Iterable[MessageParam], messages), model=model, max_tokens=max_tokens, **model_kwargs
     )
-    requests_limit_str = result.headers.get('anthropic-ratelimit-requests-limit')
-    requests_limit = int(requests_limit_str) if requests_limit_str is not None else None
-    requests_remaining_str = result.headers.get('anthropic-ratelimit-requests-remaining')
-    requests_remaining = int(requests_remaining_str) if requests_remaining_str is not None else None
-    requests_reset_str = result.headers.get('anthropic-ratelimit-requests-reset')
-    requests_reset = datetime.datetime.fromisoformat(requests_reset_str.replace('Z', '+00:00'))
-    input_tokens_limit_str = result.headers.get('anthropic-ratelimit-input-tokens-limit')
-    input_tokens_limit = int(input_tokens_limit_str) if input_tokens_limit_str is not None else None
-    input_tokens_remaining_str = result.headers.get('anthropic-ratelimit-input-tokens-remaining')
-    input_tokens_remaining = int(input_tokens_remaining_str) if input_tokens_remaining_str is not None else None
-    input_tokens_reset_str = result.headers.get('anthropic-ratelimit-input-tokens-reset')
-    input_tokens_reset = datetime.datetime.fromisoformat(input_tokens_reset_str.replace('Z', '+00:00'))
-    output_tokens_limit_str = result.headers.get('anthropic-ratelimit-output-tokens-limit')
-    output_tokens_limit = int(output_tokens_limit_str) if output_tokens_limit_str is not None else None
-    output_tokens_remaining_str = result.headers.get('anthropic-ratelimit-output-tokens-remaining')
-    output_tokens_remaining = int(output_tokens_remaining_str) if output_tokens_remaining_str is not None else None
-    output_tokens_reset_str = result.headers.get('anthropic-ratelimit-output-tokens-reset')
-    output_tokens_reset = datetime.datetime.fromisoformat(output_tokens_reset_str.replace('Z', '+00:00'))
-    retry_after_str = result.headers.get('retry-after')
-    if retry_after_str is not None:
-        _logger.debug(f'retry-after: {retry_after_str}')
+    requests_info, input_tokens_info, output_tokens_info = _get_header_info(result.headers)
+    # retry_after_str = result.headers.get('retry-after')
+    # if retry_after_str is not None:
+    #     _logger.debug(f'retry-after: {retry_after_str}')
+    is_retry = _runtime_ctx is not None and _runtime_ctx.is_retry
     rate_limits_info.record(
-        requests=(requests_limit, requests_remaining, requests_reset),
-        input_tokens=(input_tokens_limit, input_tokens_remaining, input_tokens_reset),
-        output_tokens=(output_tokens_limit, output_tokens_remaining, output_tokens_reset),
+        requests=requests_info, input_tokens=input_tokens_info, output_tokens=output_tokens_info, reset_exc=is_retry
     )
     result_dict = json.loads(result.text)

pixeltable/functions/deepseek.py CHANGED Viewed

@@ -26,7 +26,7 @@ def _deepseek_client() -> 'openai.AsyncOpenAI':
     return env.Env.get().get_client('deepseek')
-@pxt.udf
+@pxt.udf(resource_pool='request-rate:deepseek')
 async def chat_completions(
     messages: list,
     *,
@@ -43,6 +43,10 @@ async def chat_completions(
     Deepseek uses the OpenAI SDK, so you will need to install the `openai` package to use this UDF.
+    Request throttling:
+    Applies the rate limit set in the config (section `deepseek`, key `rate_limit`). If no rate
+    limit is configured, uses a default of 600 RPM.
     __Requirements:__
     - `pip install openai`

pixeltable/functions/gemini.py CHANGED Viewed

@@ -14,6 +14,7 @@ import PIL.Image
 import pixeltable as pxt
 from pixeltable import env, exceptions as excs, exprs
+from pixeltable.utils.media_store import TempStore
 if TYPE_CHECKING:
     from google import genai
@@ -39,7 +40,7 @@ async def generate_content(
     <https://ai.google.dev/gemini-api/docs/text-generation>
     Request throttling:
-    Applies the rate limit set in the config (section `gemini`, key `rate_limit`). If no rate
+    Applies the rate limit set in the config (section `gemini.rate_limits`; use the model id as the key). If no rate
     limit is configured, uses a default of 600 RPM.
     __Requirements:__
@@ -126,6 +127,10 @@ async def generate_images(prompt: str, *, model: str, config: Optional[dict] = N
     Generates images based on a text description and configuration. For additional details, see:
     <https://ai.google.dev/gemini-api/docs/image-generation>
+    Request throttling:
+    Applies the rate limit set in the config (section `imagen.rate_limits`; use the model id as the key). If no rate
+    limit is configured, uses a default of 600 RPM.
     __Requirements:__
     - `pip install google-genai`
@@ -167,6 +172,10 @@ async def generate_videos(
     Generates videos based on a text description and configuration. For additional details, see:
     <https://ai.google.dev/gemini-api/docs/video-generation>
+    Request throttling:
+    Applies the rate limit set in the config (section `veo.rate_limits`; use the model id as the key). If no rate
+    limit is configured, uses a default of 600 RPM.
     __Requirements:__
     - `pip install google-genai`
@@ -215,7 +224,7 @@ async def generate_videos(
     assert video_bytes is not None
     # Create a temporary file to store the video bytes
-    output_path = env.Env.get().create_tmp_path('.mp4')
+    output_path = TempStore.create_path(extension='.mp4')
     Path(output_path).write_bytes(video_bytes)
     return str(output_path)

pixeltable/functions/globals.py CHANGED Viewed

@@ -1,6 +1,6 @@
 import builtins
 import typing
-from typing import Any, Callable, Optional, Union
+from typing import Any, Callable, Optional
 import sqlalchemy as sql
@@ -11,7 +11,7 @@ from typing import _GenericAlias  # type: ignore[attr-defined]  # isort: skip
 # TODO: remove and replace calls with astype()
-def cast(expr: exprs.Expr, target_type: Union[ts.ColumnType, type, _GenericAlias]) -> exprs.Expr:
+def cast(expr: exprs.Expr, target_type: ts.ColumnType | type | _GenericAlias) -> exprs.Expr:
     expr.col_type = ts.ColumnType.normalize_type(target_type)
     return expr

pixeltable 0.4.6__py3-none-any.whl → 0.4.8__py3-none-any.whl

Potentially problematic release.

pixeltable 0.4.6py3-none-any.whl → 0.4.8py3-none-any.whl