PyPI - pixeltable - Versions diffs - 0.3.14__py3-none-any.whl → 0.5.7__py3-none-any.whl - Mend

pixeltable 0.3.14py3-none-any.whl → 0.5.7py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (220) hide show

pixeltable/__init__.py +42 -8
pixeltable/{dataframe.py → _query.py} +470 -206
pixeltable/_version.py +1 -0
pixeltable/catalog/__init__.py +5 -4
pixeltable/catalog/catalog.py +1785 -432
pixeltable/catalog/column.py +190 -113
pixeltable/catalog/dir.py +2 -4
pixeltable/catalog/globals.py +19 -46
pixeltable/catalog/insertable_table.py +191 -98
pixeltable/catalog/path.py +63 -23
pixeltable/catalog/schema_object.py +11 -15
pixeltable/catalog/table.py +843 -436
pixeltable/catalog/table_metadata.py +103 -0
pixeltable/catalog/table_version.py +978 -657
pixeltable/catalog/table_version_handle.py +72 -16
pixeltable/catalog/table_version_path.py +112 -43
pixeltable/catalog/tbl_ops.py +53 -0
pixeltable/catalog/update_status.py +191 -0
pixeltable/catalog/view.py +134 -90
pixeltable/config.py +134 -22
pixeltable/env.py +471 -157
pixeltable/exceptions.py +6 -0
pixeltable/exec/__init__.py +4 -1
pixeltable/exec/aggregation_node.py +7 -8
pixeltable/exec/cache_prefetch_node.py +83 -110
pixeltable/exec/cell_materialization_node.py +268 -0
pixeltable/exec/cell_reconstruction_node.py +168 -0
pixeltable/exec/component_iteration_node.py +4 -3
pixeltable/exec/data_row_batch.py +8 -65
pixeltable/exec/exec_context.py +16 -4
pixeltable/exec/exec_node.py +13 -36
pixeltable/exec/expr_eval/evaluators.py +11 -7
pixeltable/exec/expr_eval/expr_eval_node.py +27 -12
pixeltable/exec/expr_eval/globals.py +8 -5
pixeltable/exec/expr_eval/row_buffer.py +1 -2
pixeltable/exec/expr_eval/schedulers.py +106 -56
pixeltable/exec/globals.py +35 -0
pixeltable/exec/in_memory_data_node.py +19 -19
pixeltable/exec/object_store_save_node.py +293 -0
pixeltable/exec/row_update_node.py +16 -9
pixeltable/exec/sql_node.py +351 -84
pixeltable/exprs/__init__.py +1 -1
pixeltable/exprs/arithmetic_expr.py +27 -22
pixeltable/exprs/array_slice.py +3 -3
pixeltable/exprs/column_property_ref.py +36 -23
pixeltable/exprs/column_ref.py +213 -89
pixeltable/exprs/comparison.py +5 -5
pixeltable/exprs/compound_predicate.py +5 -4
pixeltable/exprs/data_row.py +164 -54
pixeltable/exprs/expr.py +70 -44
pixeltable/exprs/expr_dict.py +3 -3
pixeltable/exprs/expr_set.py +17 -10
pixeltable/exprs/function_call.py +100 -40
pixeltable/exprs/globals.py +2 -2
pixeltable/exprs/in_predicate.py +4 -4
pixeltable/exprs/inline_expr.py +18 -32
pixeltable/exprs/is_null.py +7 -3
pixeltable/exprs/json_mapper.py +8 -8
pixeltable/exprs/json_path.py +56 -22
pixeltable/exprs/literal.py +27 -5
pixeltable/exprs/method_ref.py +2 -2
pixeltable/exprs/object_ref.py +2 -2
pixeltable/exprs/row_builder.py +167 -67
pixeltable/exprs/rowid_ref.py +25 -10
pixeltable/exprs/similarity_expr.py +58 -40
pixeltable/exprs/sql_element_cache.py +4 -4
pixeltable/exprs/string_op.py +5 -5
pixeltable/exprs/type_cast.py +3 -5
pixeltable/func/__init__.py +1 -0
pixeltable/func/aggregate_function.py +8 -8
pixeltable/func/callable_function.py +9 -9
pixeltable/func/expr_template_function.py +17 -11
pixeltable/func/function.py +18 -20
pixeltable/func/function_registry.py +6 -7
pixeltable/func/globals.py +2 -3
pixeltable/func/mcp.py +74 -0
pixeltable/func/query_template_function.py +29 -27
pixeltable/func/signature.py +46 -19
pixeltable/func/tools.py +31 -13
pixeltable/func/udf.py +18 -20
pixeltable/functions/__init__.py +16 -0
pixeltable/functions/anthropic.py +123 -77
pixeltable/functions/audio.py +147 -10
pixeltable/functions/bedrock.py +13 -6
pixeltable/functions/date.py +7 -4
pixeltable/functions/deepseek.py +35 -43
pixeltable/functions/document.py +81 -0
pixeltable/functions/fal.py +76 -0
pixeltable/functions/fireworks.py +11 -20
pixeltable/functions/gemini.py +195 -39
pixeltable/functions/globals.py +142 -14
pixeltable/functions/groq.py +108 -0
pixeltable/functions/huggingface.py +1056 -24
pixeltable/functions/image.py +115 -57
pixeltable/functions/json.py +1 -1
pixeltable/functions/llama_cpp.py +28 -13
pixeltable/functions/math.py +67 -5
pixeltable/functions/mistralai.py +18 -55
pixeltable/functions/net.py +70 -0
pixeltable/functions/ollama.py +20 -13
pixeltable/functions/openai.py +240 -226
pixeltable/functions/openrouter.py +143 -0
pixeltable/functions/replicate.py +4 -4
pixeltable/functions/reve.py +250 -0
pixeltable/functions/string.py +239 -69
pixeltable/functions/timestamp.py +16 -16
pixeltable/functions/together.py +24 -84
pixeltable/functions/twelvelabs.py +188 -0
pixeltable/functions/util.py +6 -1
pixeltable/functions/uuid.py +30 -0
pixeltable/functions/video.py +1515 -107
pixeltable/functions/vision.py +8 -8
pixeltable/functions/voyageai.py +289 -0
pixeltable/functions/whisper.py +16 -8
pixeltable/functions/whisperx.py +179 -0
pixeltable/{ext/functions → functions}/yolox.py +2 -4
pixeltable/globals.py +362 -115
pixeltable/index/base.py +17 -21
pixeltable/index/btree.py +28 -22
pixeltable/index/embedding_index.py +100 -118
pixeltable/io/__init__.py +4 -2
pixeltable/io/datarows.py +8 -7
pixeltable/io/external_store.py +56 -105
pixeltable/io/fiftyone.py +13 -13
pixeltable/io/globals.py +31 -30
pixeltable/io/hf_datasets.py +61 -16
pixeltable/io/label_studio.py +74 -70
pixeltable/io/lancedb.py +3 -0
pixeltable/io/pandas.py +21 -12
pixeltable/io/parquet.py +25 -105
pixeltable/io/table_data_conduit.py +250 -123
pixeltable/io/utils.py +4 -4
pixeltable/iterators/__init__.py +2 -1
pixeltable/iterators/audio.py +26 -25
pixeltable/iterators/base.py +9 -3
pixeltable/iterators/document.py +112 -78
pixeltable/iterators/image.py +12 -15
pixeltable/iterators/string.py +11 -4
pixeltable/iterators/video.py +523 -120
pixeltable/metadata/__init__.py +14 -3
pixeltable/metadata/converters/convert_13.py +2 -2
pixeltable/metadata/converters/convert_18.py +2 -2
pixeltable/metadata/converters/convert_19.py +2 -2
pixeltable/metadata/converters/convert_20.py +2 -2
pixeltable/metadata/converters/convert_21.py +2 -2
pixeltable/metadata/converters/convert_22.py +2 -2
pixeltable/metadata/converters/convert_24.py +2 -2
pixeltable/metadata/converters/convert_25.py +2 -2
pixeltable/metadata/converters/convert_26.py +2 -2
pixeltable/metadata/converters/convert_29.py +4 -4
pixeltable/metadata/converters/convert_30.py +34 -21
pixeltable/metadata/converters/convert_34.py +2 -2
pixeltable/metadata/converters/convert_35.py +9 -0
pixeltable/metadata/converters/convert_36.py +38 -0
pixeltable/metadata/converters/convert_37.py +15 -0
pixeltable/metadata/converters/convert_38.py +39 -0
pixeltable/metadata/converters/convert_39.py +124 -0
pixeltable/metadata/converters/convert_40.py +73 -0
pixeltable/metadata/converters/convert_41.py +12 -0
pixeltable/metadata/converters/convert_42.py +9 -0
pixeltable/metadata/converters/convert_43.py +44 -0
pixeltable/metadata/converters/util.py +20 -31
pixeltable/metadata/notes.py +9 -0
pixeltable/metadata/schema.py +140 -53
pixeltable/metadata/utils.py +74 -0
pixeltable/mypy/__init__.py +3 -0
pixeltable/mypy/mypy_plugin.py +123 -0
pixeltable/plan.py +382 -115
pixeltable/share/__init__.py +1 -1
pixeltable/share/packager.py +547 -83
pixeltable/share/protocol/__init__.py +33 -0
pixeltable/share/protocol/common.py +165 -0
pixeltable/share/protocol/operation_types.py +33 -0
pixeltable/share/protocol/replica.py +119 -0
pixeltable/share/publish.py +257 -59
pixeltable/store.py +311 -194
pixeltable/type_system.py +373 -211
pixeltable/utils/__init__.py +2 -3
pixeltable/utils/arrow.py +131 -17
pixeltable/utils/av.py +298 -0
pixeltable/utils/azure_store.py +346 -0
pixeltable/utils/coco.py +6 -6
pixeltable/utils/code.py +3 -3
pixeltable/utils/console_output.py +4 -1
pixeltable/utils/coroutine.py +6 -23
pixeltable/utils/dbms.py +32 -6
pixeltable/utils/description_helper.py +4 -5
pixeltable/utils/documents.py +7 -18
pixeltable/utils/exception_handler.py +7 -30
pixeltable/utils/filecache.py +6 -6
pixeltable/utils/formatter.py +86 -48
pixeltable/utils/gcs_store.py +295 -0
pixeltable/utils/http.py +133 -0
pixeltable/utils/http_server.py +2 -3
pixeltable/utils/iceberg.py +1 -2
pixeltable/utils/image.py +17 -0
pixeltable/utils/lancedb.py +90 -0
pixeltable/utils/local_store.py +322 -0
pixeltable/utils/misc.py +5 -0
pixeltable/utils/object_stores.py +573 -0
pixeltable/utils/pydantic.py +60 -0
pixeltable/utils/pytorch.py +5 -6
pixeltable/utils/s3_store.py +527 -0
pixeltable/utils/sql.py +26 -0
pixeltable/utils/system.py +30 -0
pixeltable-0.5.7.dist-info/METADATA +579 -0
pixeltable-0.5.7.dist-info/RECORD +227 -0
{pixeltable-0.3.14.dist-info → pixeltable-0.5.7.dist-info}/WHEEL +1 -1
pixeltable-0.5.7.dist-info/entry_points.txt +2 -0
pixeltable/__version__.py +0 -3
pixeltable/catalog/named_function.py +0 -40
pixeltable/ext/__init__.py +0 -17
pixeltable/ext/functions/__init__.py +0 -11
pixeltable/ext/functions/whisperx.py +0 -77
pixeltable/utils/media_store.py +0 -77
pixeltable/utils/s3.py +0 -17
pixeltable-0.3.14.dist-info/METADATA +0 -434
pixeltable-0.3.14.dist-info/RECORD +0 -186
pixeltable-0.3.14.dist-info/entry_points.txt +0 -3
{pixeltable-0.3.14.dist-info → pixeltable-0.5.7.dist-info/licenses}/LICENSE +0 -0

pixeltable/functions/anthropic.py CHANGED Viewed

@@ -1,14 +1,14 @@
 """
-Pixeltable [UDFs](https://pixeltable.readme.io/docs/user-defined-functions-udfs)
+Pixeltable UDFs
 that wrap various endpoints from the Anthropic API. In order to use them, you must
 first `pip install anthropic` and configure your Anthropic credentials, as described in
-the [Working with Anthropic](https://pixeltable.readme.io/docs/working-with-anthropic) tutorial.
+the [Working with Anthropic](https://docs.pixeltable.com/notebooks/integrations/working-with-anthropic) tutorial.
 """
 import datetime
 import json
 import logging
-from typing import TYPE_CHECKING, Any, Iterable, Optional, TypeVar, Union, cast
+from typing import TYPE_CHECKING, Any, Iterable, cast
 import httpx
@@ -16,6 +16,7 @@ import pixeltable as pxt
 from pixeltable import env, exprs
 from pixeltable.func import Tools
 from pixeltable.utils.code import local_public_names
+from pixeltable.utils.http import exponential_backoff
 if TYPE_CHECKING:
     import anthropic
@@ -38,6 +39,64 @@ def _anthropic_client() -> 'anthropic.AsyncAnthropic':
     return env.Env.get().get_client('anthropic')
+def _get_header_info(
+    headers: httpx.Headers,
+) -> tuple[
+    tuple[int, int, datetime.datetime] | None,
+    tuple[int, int, datetime.datetime] | None,
+    tuple[int, int, datetime.datetime] | None,
+]:
+    """Extract rate limit info from Anthropic API response headers."""
+    requests_limit_str = headers.get('anthropic-ratelimit-requests-limit')
+    requests_limit = int(requests_limit_str) if requests_limit_str is not None else None
+    requests_remaining_str = headers.get('anthropic-ratelimit-requests-remaining')
+    requests_remaining = int(requests_remaining_str) if requests_remaining_str is not None else None
+    requests_reset_str = headers.get('anthropic-ratelimit-requests-reset')
+    requests_reset = (
+        datetime.datetime.fromisoformat(requests_reset_str.replace('Z', '+00:00')) if requests_reset_str else None
+    )
+    requests_info = (
+        (requests_limit, requests_remaining, requests_reset) if requests_reset and requests_remaining else None
+    )
+    input_tokens_limit_str = headers.get('anthropic-ratelimit-input-tokens-limit')
+    input_tokens_limit = int(input_tokens_limit_str) if input_tokens_limit_str is not None else None
+    input_tokens_remaining_str = headers.get('anthropic-ratelimit-input-tokens-remaining')
+    input_tokens_remaining = int(input_tokens_remaining_str) if input_tokens_remaining_str is not None else None
+    input_tokens_reset_str = headers.get('anthropic-ratelimit-input-tokens-reset')
+    input_tokens_reset = (
+        datetime.datetime.fromisoformat(input_tokens_reset_str.replace('Z', '+00:00'))
+        if input_tokens_reset_str
+        else None
+    )
+    input_tokens_info = (
+        (input_tokens_limit, input_tokens_remaining, input_tokens_reset)
+        if input_tokens_reset and input_tokens_remaining
+        else None
+    )
+    output_tokens_limit_str = headers.get('anthropic-ratelimit-output-tokens-limit')
+    output_tokens_limit = int(output_tokens_limit_str) if output_tokens_limit_str is not None else None
+    output_tokens_remaining_str = headers.get('anthropic-ratelimit-output-tokens-remaining')
+    output_tokens_remaining = int(output_tokens_remaining_str) if output_tokens_remaining_str is not None else None
+    output_tokens_reset_str = headers.get('anthropic-ratelimit-output-tokens-reset')
+    output_tokens_reset = (
+        datetime.datetime.fromisoformat(output_tokens_reset_str.replace('Z', '+00:00'))
+        if output_tokens_reset_str
+        else None
+    )
+    output_tokens_info = (
+        (output_tokens_limit, output_tokens_remaining, output_tokens_reset)
+        if output_tokens_reset and output_tokens_remaining
+        else None
+    )
+    if requests_info is None or input_tokens_info is None or output_tokens_info is None:
+        _logger.debug(f'get_header_info(): incomplete rate limit info: {headers}')
+    return requests_info, input_tokens_info, output_tokens_info
 class AnthropicRateLimitsInfo(env.RateLimitsInfo):
     def __init__(self) -> None:
         super().__init__(self._get_request_resources)
@@ -51,12 +110,38 @@ class AnthropicRateLimitsInfo(env.RateLimitsInfo):
                 input_len += len(message['content'])
         return {'requests': 1, 'input_tokens': int(input_len / 4), 'output_tokens': max_tokens}
-    def get_retry_delay(self, exc: Exception) -> Optional[float]:
+    def record_exc(self, request_ts: datetime.datetime, exc: Exception) -> None:
+        import anthropic
+        if (
+            not isinstance(exc, anthropic.APIError)
+            or not hasattr(exc, 'response')
+            or not hasattr(exc.response, 'headers')
+        ):
+            return
+        requests_info, input_tokens_info, output_tokens_info = _get_header_info(exc.response.headers)
+        _logger.debug(
+            f'record_exc(): request_ts: {request_ts}, requests_info={requests_info} '
+            f'input_tokens_info={input_tokens_info} output_tokens_info={output_tokens_info}'
+        )
+        self.record(
+            request_ts=request_ts,
+            requests=requests_info,
+            input_tokens=input_tokens_info,
+            output_tokens=output_tokens_info,
+        )
+        self.has_exc = True
+        retry_after_str = exc.response.headers.get('retry-after')
+        if retry_after_str is not None:
+            _logger.debug(f'retry-after: {retry_after_str}')
+    def get_retry_delay(self, exc: Exception, attempt: int) -> float | None:
         import anthropic
         # deal with timeouts separately, they don't come with headers
         if isinstance(exc, anthropic.APITimeoutError):
-            return 1.0
+            return exponential_backoff(attempt)
         if not isinstance(exc, anthropic.APIStatusError):
             return None
@@ -64,8 +149,7 @@ class AnthropicRateLimitsInfo(env.RateLimitsInfo):
         should_retry_str = exc.response.headers.get('x-should-retry', '')
         if should_retry_str.lower() != 'true':
             return None
-        retry_after_str = exc.response.headers.get('retry-after', '1')
-        return int(retry_after_str)
+        return super().get_retry_delay(exc, attempt)
 @pxt.udf
@@ -73,16 +157,11 @@ async def messages(
     messages: list[dict[str, str]],
     *,
     model: str,
-    max_tokens: int = 1024,
-    metadata: Optional[dict[str, Any]] = None,
-    stop_sequences: Optional[list[str]] = None,
-    system: Optional[str] = None,
-    temperature: Optional[float] = None,
-    tool_choice: Optional[dict] = None,
-    tools: Optional[list[dict]] = None,
-    top_k: Optional[int] = None,
-    top_p: Optional[float] = None,
-    timeout: Optional[float] = None,
+    max_tokens: int,
+    model_kwargs: dict[str, Any] | None = None,
+    tools: list[dict[str, Any]] | None = None,
+    tool_choice: dict[str, Any] | None = None,
+    _runtime_ctx: env.RuntimeCtx | None = None,
 ) -> dict:
     """
     Create a Message.
@@ -101,25 +180,27 @@ async def messages(
     Args:
         messages: Input messages.
         model: The model that will complete your prompt.
-    For details on the other parameters, see: <https://docs.anthropic.com/en/api/messages>
+        model_kwargs: Additional keyword args for the Anthropic `messages` API.
+            For details on the available parameters, see: <https://docs.anthropic.com/en/api/messages>
+        tools: An optional list of Pixeltable tools to use for the request.
+        tool_choice: An optional tool choice configuration.
     Returns:
         A dictionary containing the response and other metadata.
     Examples:
-        Add a computed column that applies the model `claude-3-haiku-20240307`
+        Add a computed column that applies the model `claude-3-5-sonnet-20241022`
         to an existing Pixeltable column `tbl.prompt` of the table `tbl`:
         >>> msgs = [{'role': 'user', 'content': tbl.prompt}]
-        ... tbl.add_computed_column(response=messages(msgs, model='claude-3-haiku-20240307'))
+        ... tbl.add_computed_column(response=messages(msgs, model='claude-3-5-sonnet-20241022'))
     """
-    # it doesn't look like count_tokens() actually exists in the current version of the library
+    if model_kwargs is None:
+        model_kwargs = {}
     if tools is not None:
         # Reformat `tools` into Anthropic format
-        tools = [
+        model_kwargs['tools'] = [
             {
                 'name': tool['name'],
                 'description': tool['description'],
@@ -132,17 +213,16 @@ async def messages(
             for tool in tools
         ]
-    tool_choice_: Optional[dict] = None
     if tool_choice is not None:
         if tool_choice['auto']:
-            tool_choice_ = {'type': 'auto'}
+            model_kwargs['tool_choice'] = {'type': 'auto'}
         elif tool_choice['required']:
-            tool_choice_ = {'type': 'any'}
+            model_kwargs['tool_choice'] = {'type': 'any'}
         else:
             assert tool_choice['tool'] is not None
-            tool_choice_ = {'type': 'tool', 'name': tool_choice['tool']}
+            model_kwargs['tool_choice'] = {'type': 'tool', 'name': tool_choice['tool']}
         if not tool_choice['parallel_tool_calls']:
-            tool_choice_['disable_parallel_tool_use'] = True
+            model_kwargs['tool_choice']['disable_parallel_tool_use'] = True
     # make sure the pool info exists prior to making the request
     resource_pool_id = f'rate-limits:anthropic:{model}'
@@ -152,48 +232,23 @@ async def messages(
     # TODO: timeouts should be set system-wide and be user-configurable
     from anthropic.types import MessageParam
-    # cast(Any, ...): avoid mypy errors
+    start_ts = datetime.datetime.now(tz=datetime.timezone.utc)
     result = await _anthropic_client().messages.with_raw_response.create(
-        messages=cast(Iterable[MessageParam], messages),
-        model=model,
-        max_tokens=max_tokens,
-        metadata=_opt(cast(Any, metadata)),
-        stop_sequences=_opt(stop_sequences),
-        system=_opt(system),
-        temperature=_opt(cast(Any, temperature)),
-        tools=_opt(cast(Any, tools)),
-        tool_choice=_opt(cast(Any, tool_choice_)),
-        top_k=_opt(top_k),
-        top_p=_opt(top_p),
-        timeout=_opt(timeout),
+        messages=cast(Iterable[MessageParam], messages), model=model, max_tokens=max_tokens, **model_kwargs
     )
-    requests_limit_str = result.headers.get('anthropic-ratelimit-requests-limit')
-    requests_limit = int(requests_limit_str) if requests_limit_str is not None else None
-    requests_remaining_str = result.headers.get('anthropic-ratelimit-requests-remaining')
-    requests_remaining = int(requests_remaining_str) if requests_remaining_str is not None else None
-    requests_reset_str = result.headers.get('anthropic-ratelimit-requests-reset')
-    requests_reset = datetime.datetime.fromisoformat(requests_reset_str.replace('Z', '+00:00'))
-    input_tokens_limit_str = result.headers.get('anthropic-ratelimit-input-tokens-limit')
-    input_tokens_limit = int(input_tokens_limit_str) if input_tokens_limit_str is not None else None
-    input_tokens_remaining_str = result.headers.get('anthropic-ratelimit-input-tokens-remaining')
-    input_tokens_remaining = int(input_tokens_remaining_str) if input_tokens_remaining_str is not None else None
-    input_tokens_reset_str = result.headers.get('anthropic-ratelimit-input-tokens-reset')
-    input_tokens_reset = datetime.datetime.fromisoformat(input_tokens_reset_str.replace('Z', '+00:00'))
-    output_tokens_limit_str = result.headers.get('anthropic-ratelimit-output-tokens-limit')
-    output_tokens_limit = int(output_tokens_limit_str) if output_tokens_limit_str is not None else None
-    output_tokens_remaining_str = result.headers.get('anthropic-ratelimit-output-tokens-remaining')
-    output_tokens_remaining = int(output_tokens_remaining_str) if output_tokens_remaining_str is not None else None
-    output_tokens_reset_str = result.headers.get('anthropic-ratelimit-output-tokens-reset')
-    output_tokens_reset = datetime.datetime.fromisoformat(output_tokens_reset_str.replace('Z', '+00:00'))
-    retry_after_str = result.headers.get('retry-after')
-    if retry_after_str is not None:
-        _logger.debug(f'retry-after: {retry_after_str}')
+    requests_info, input_tokens_info, output_tokens_info = _get_header_info(result.headers)
+    # retry_after_str = result.headers.get('retry-after')
+    # if retry_after_str is not None:
+    #     _logger.debug(f'retry-after: {retry_after_str}')
+    is_retry = _runtime_ctx is not None and _runtime_ctx.is_retry
     rate_limits_info.record(
-        requests=(requests_limit, requests_remaining, requests_reset),
-        input_tokens=(input_tokens_limit, input_tokens_remaining, input_tokens_reset),
-        output_tokens=(output_tokens_limit, output_tokens_remaining, output_tokens_reset),
+        request_ts=start_ts,
+        requests=requests_info,
+        input_tokens=input_tokens_info,
+        output_tokens=output_tokens_info,
+        reset_exc=is_retry,
     )
     result_dict = json.loads(result.text)
@@ -211,7 +266,7 @@ def invoke_tools(tools: Tools, response: exprs.Expr) -> exprs.InlineDict:
 @pxt.udf
-def _anthropic_response_to_pxt_tool_calls(response: dict) -> Optional[dict]:
+def _anthropic_response_to_pxt_tool_calls(response: dict) -> dict | None:
     anthropic_tool_calls = [r for r in response['content'] if r['type'] == 'tool_use']
     if len(anthropic_tool_calls) == 0:
         return None
@@ -224,15 +279,6 @@ def _anthropic_response_to_pxt_tool_calls(response: dict) -> Optional[dict]:
     return pxt_tool_calls
-_T = TypeVar('_T')
-def _opt(arg: _T) -> Union[_T, 'anthropic.NotGiven']:
-    import anthropic
-    return arg if arg is not None else anthropic.NOT_GIVEN
 __all__ = local_public_names(__name__)

pixeltable/functions/audio.py CHANGED Viewed

@@ -1,26 +1,163 @@
 """
-Pixeltable [UDFs](https://pixeltable.readme.io/docs/user-defined-functions-udfs) for `AudioType`.
+Pixeltable UDFs for `AudioType`.
+"""
-Example:
-```python
-import pixeltable as pxt
-import pixeltable.functions as pxtf
+from typing import Any
-t = pxt.get_table(...)
-t.select(pxtf.audio.get_metadata()).collect()
-```
-"""
+import av
+import numpy as np
 import pixeltable as pxt
+import pixeltable.utils.av as av_utils
 from pixeltable.utils.code import local_public_names
+from pixeltable.utils.local_store import TempStore
 @pxt.udf(is_method=True)
 def get_metadata(audio: pxt.Audio) -> dict:
     """
     Gets various metadata associated with an audio file and returns it as a dictionary.
+    Args:
+        audio: The audio to get metadata for.
+    Returns:
+        A `dict` such as the following:
+            ```json
+            {
+                'size': 2568827,
+                'streams': [
+                    {
+                        'type': 'audio',
+                        'frames': 0,
+                        'duration': 2646000,
+                        'metadata': {},
+                        'time_base': 2.2675736961451248e-05,
+                        'codec_context': {
+                            'name': 'flac',
+                            'profile': None,
+                            'channels': 1,
+                            'codec_tag': '\\x00\\x00\\x00\\x00',
+                        },
+                        'duration_seconds': 60.0,
+                    }
+                ],
+                'bit_rate': 342510,
+                'metadata': {'encoder': 'Lavf61.1.100'},
+                'bit_exact': False,
+            }
+            ```
+    Examples:
+        Extract metadata for files in the `audio_col` column of the table `tbl`:
+        >>> tbl.select(tbl.audio_col.get_metadata()).collect()
+    """
+    return av_utils.get_metadata(audio)
+@pxt.udf()
+def encode_audio(
+    audio_data: pxt.Array[pxt.Float], *, input_sample_rate: int, format: str, output_sample_rate: int | None = None
+) -> pxt.Audio:
+    """
+    Encodes an audio clip represented as an array into a specified audio format.
+    Parameters:
+        audio_data: An array of sampled amplitudes. The accepted array shapes are `(N,)` or `(1, N)` for mono audio
+            or `(2, N)` for stereo.
+        input_sample_rate: The sample rate of the input audio data.
+        format: The desired output audio format. The supported formats are 'wav', 'mp3', 'flac', and 'mp4'.
+        output_sample_rate: The desired sample rate for the output audio. Defaults to the input sample rate if
+            unspecified.
+    Examples:
+        Add a computed column with encoded FLAC audio files to a table with audio data (as arrays of floats) and sample
+        rates:
+        >>> t.add_computed_column(
+        ...     audio_file=encode_audio(
+        ...         t.audio_data, input_sample_rate=t.sample_rate, format='flac'
+        ...     )
+        ... )
+    """
+    if format not in av_utils.AUDIO_FORMATS:
+        raise pxt.Error(f'Only the following formats are supported: {av_utils.AUDIO_FORMATS.keys()}')
+    if output_sample_rate is None:
+        output_sample_rate = input_sample_rate
+    codec, ext = av_utils.AUDIO_FORMATS[format]
+    output_path = str(TempStore.create_path(extension=f'.{ext}'))
+    match audio_data.shape:
+        case (_,):
+            # Mono audio as 1D array, reshape for pyav
+            layout = 'mono'
+            audio_data_transformed = audio_data[None, :]
+        case (1, _):
+            # Mono audio as 2D array, simply reshape and transpose the input for pyav
+            layout = 'mono'
+            audio_data_transformed = audio_data.reshape(-1, 1).transpose()
+        case (2, _):
+            # Stereo audio. Input layout: [[L0, L1, L2, ...],[R0, R1, R2, ...]],
+            # pyav expects: [L0, R0, L1, R1, L2, R2, ...]
+            layout = 'stereo'
+            audio_data_transformed = np.empty(audio_data.shape[1] * 2, dtype=audio_data.dtype)
+            audio_data_transformed[0::2] = audio_data[0]
+            audio_data_transformed[1::2] = audio_data[1]
+            audio_data_transformed = audio_data_transformed.reshape(1, -1)
+        case _:
+            raise pxt.Error(
+                f'Supported input array shapes are (N,), (1, N) for mono and (2, N) for stereo, got {audio_data.shape}'
+            )
+    with av.open(output_path, mode='w') as output_container:
+        stream = output_container.add_stream(codec, rate=output_sample_rate)
+        assert isinstance(stream, av.AudioStream)
+        frame = av.AudioFrame.from_ndarray(audio_data_transformed, format='flt', layout=layout)
+        frame.sample_rate = input_sample_rate
+        for packet in stream.encode(frame):
+            output_container.mux(packet)
+        for packet in stream.encode():
+            output_container.mux(packet)
+        return output_path
+def audio_splitter(
+    audio: Any, chunk_duration_sec: float, *, overlap_sec: float = 0.0, min_chunk_duration_sec: float = 0.0
+) -> tuple[type[pxt.iterators.ComponentIterator], dict[str, Any]]:
+    """
+    Iterator over chunks of an audio file. The audio file is split into smaller chunks,
+    where the duration of each chunk is determined by chunk_duration_sec.
+    The iterator yields audio chunks as pxt.Audio, along with the start and end time of each chunk.
+    If the input contains no audio, no chunks are yielded.
+    Args:
+        chunk_duration_sec: Audio chunk duration in seconds
+        overlap_sec: Overlap between consecutive chunks in seconds
+        min_chunk_duration_sec: Drop the last chunk if it is smaller than min_chunk_duration_sec
+    Examples:
+        This example assumes an existing table `tbl` with a column `audio` of type `pxt.Audio`.
+        Create a view that splits all audio files into chunks of 30 seconds with 5 seconds overlap:
+        >>> pxt.create_view(
+        ...     'audio_chunks',
+        ...     tbl,
+        ...     iterator=audio_splitter(tbl.audio, chunk_duration_sec=30.0, overlap_sec=5.0)
+        ... )
     """
-    return pxt.functions.video._get_metadata(audio)
+    kwargs: dict[str, Any] = {}
+    if overlap_sec != 0.0:
+        kwargs['overlap_sec'] = overlap_sec
+    if min_chunk_duration_sec != 0.0:
+        kwargs['min_chunk_duration_sec'] = min_chunk_duration_sec
+    return pxt.iterators.AudioSplitter._create(audio=audio, chunk_duration_sec=chunk_duration_sec, **kwargs)
 __all__ = local_public_names(__name__)

pixeltable/functions/bedrock.py CHANGED Viewed

@@ -1,5 +1,12 @@
+"""
+Pixeltable UDFs for AWS Bedrock AI models.
+Provides integration with AWS Bedrock for accessing various foundation models
+including Anthropic Claude, Amazon Titan, and other providers.
+"""
 import logging
-from typing import TYPE_CHECKING, Any, Optional
+from typing import TYPE_CHECKING, Any
 import pixeltable as pxt
 from pixeltable import env, exprs
@@ -29,10 +36,10 @@ def converse(
     messages: list[dict[str, Any]],
     *,
     model_id: str,
-    system: Optional[list[dict[str, Any]]] = None,
-    inference_config: Optional[dict] = None,
-    additional_model_request_fields: Optional[dict] = None,
-    tool_config: Optional[list[dict]] = None,
+    system: list[dict[str, Any]] | None = None,
+    inference_config: dict | None = None,
+    additional_model_request_fields: dict | None = None,
+    tool_config: list[dict] | None = None,
 ) -> dict:
     """
     Generate a conversation response.
@@ -104,7 +111,7 @@ def invoke_tools(tools: Tools, response: exprs.Expr) -> exprs.InlineDict:
 @pxt.udf
-def _bedrock_response_to_pxt_tool_calls(response: dict) -> Optional[dict]:
+def _bedrock_response_to_pxt_tool_calls(response: dict) -> dict | None:
     if response.get('stopReason') != 'tool_use':
         return None

pixeltable/functions/date.py CHANGED Viewed

@@ -1,5 +1,5 @@
 """
-Pixeltable [UDFs](https://pixeltable.readme.io/docs/user-defined-functions-udfs) for `DateType`.
+Pixeltable UDFs for `DateType`.
 Usage example:
 ```python
@@ -28,8 +28,11 @@ _SQL_ZERO = sql.literal(0)
 @pxt.udf(is_property=True)
 def year(self: date) -> int:
     """
-    Between [`MINYEAR`](https://docs.python.org/3/library/datetime.html#datetime.MINYEAR) and
-    [`MAXYEAR`](https://docs.python.org/3/library/datetime.html#datetime.MAXYEAR) inclusive.
+    Between 1 and 9999 inclusive.
+    (Between [`MINYEAR`](https://docs.python.org/3/library/datetime.html#datetime.MINYEAR) and
+    [`MAXYEAR`](https://docs.python.org/3/library/datetime.html#datetime.MAXYEAR) as defined by the Python `datetime`
+    library).
     Equivalent to [`date.year`](https://docs.python.org/3/library/datetime.html#datetime.date.year).
     """
@@ -83,7 +86,7 @@ def make_date(year: int, month: int, day: int) -> date:
 @make_date.to_sql
 def _(year: sql.ColumnElement, month: sql.ColumnElement, day: sql.ColumnElement) -> sql.ColumnElement:
-    return sql.func.make_date(sql.cast(year, sql.Integer), sql.cast(month, sql.Integer), sql.cast(day, sql.Integer))
+    return sql.func.make_date(year.cast(sql.Integer), month.cast(sql.Integer), day.cast(sql.Integer))
 @pxt.udf(is_method=True)

pixeltable/functions/deepseek.py CHANGED Viewed

@@ -1,5 +1,12 @@
+"""
+Pixeltable UDFs for Deepseek AI models.
+Provides integration with Deepseek's language models for chat completions
+and other AI capabilities.
+"""
 import json
-from typing import TYPE_CHECKING, Any, Optional, Union, cast
+from typing import TYPE_CHECKING, Any
 import httpx
@@ -7,8 +14,6 @@ import pixeltable as pxt
 from pixeltable import env
 from pixeltable.utils.code import local_public_names
-from .openai import _opt
 if TYPE_CHECKING:
     import openai
@@ -28,22 +33,14 @@ def _deepseek_client() -> 'openai.AsyncOpenAI':
     return env.Env.get().get_client('deepseek')
-@pxt.udf
+@pxt.udf(resource_pool='request-rate:deepseek')
 async def chat_completions(
     messages: list,
     *,
     model: str,
-    frequency_penalty: Optional[float] = None,
-    logprobs: Optional[bool] = None,
-    top_logprobs: Optional[int] = None,
-    max_tokens: Optional[int] = None,
-    presence_penalty: Optional[float] = None,
-    response_format: Optional[dict] = None,
-    stop: Optional[list[str]] = None,
-    temperature: Optional[float] = None,
-    tools: Optional[list[dict]] = None,
-    tool_choice: Optional[dict] = None,
-    top_p: Optional[float] = None,
+    model_kwargs: dict[str, Any] | None = None,
+    tools: list[dict[str, Any]] | None = None,
+    tool_choice: dict[str, Any] | None = None,
 ) -> dict:
     """
     Creates a model response for the given chat conversation.
@@ -53,6 +50,10 @@ async def chat_completions(
     Deepseek uses the OpenAI SDK, so you will need to install the `openai` package to use this UDF.
+    Request throttling:
+    Applies the rate limit set in the config (section `deepseek`, key `rate_limit`). If no rate
+    limit is configured, uses a default of 600 RPM.
     __Requirements:__
     - `pip install openai`
@@ -60,8 +61,10 @@ async def chat_completions(
     Args:
         messages: A list of messages to use for chat completion, as described in the Deepseek API documentation.
         model: The model to use for chat completion.
-    For details on the other parameters, see: <https://api-docs.deepseek.com/api/create-chat-completion>
+        model_kwargs: Additional keyword args for the Deepseek `chat/completions` API.
+            For details on the available parameters, see: <https://api-docs.deepseek.com/api/create-chat-completion>
+        tools: An optional list of Pixeltable tools to use for the request.
+        tool_choice: An optional tool choice configuration.
     Returns:
         A dictionary containing the response and other metadata.
@@ -71,44 +74,33 @@ async def chat_completions(
         of the table `tbl`:
         >>> messages = [
-                {'role': 'system', 'content': 'You are a helpful assistant.'},
-                {'role': 'user', 'content': tbl.prompt}
-            ]
-            tbl.add_computed_column(response=chat_completions(messages, model='deepseek-chat'))
+        ...     {'role': 'system', 'content': 'You are a helpful assistant.'},
+        ...     {'role': 'user', 'content': tbl.prompt}
+        ... ]
+        >>> tbl.add_computed_column(response=chat_completions(messages, model='deepseek-chat'))
     """
+    if model_kwargs is None:
+        model_kwargs = {}
     if tools is not None:
-        tools = [{'type': 'function', 'function': tool} for tool in tools]
+        model_kwargs['tools'] = [{'type': 'function', 'function': tool} for tool in tools]
-    tool_choice_: Union[str, dict, None] = None
     if tool_choice is not None:
         if tool_choice['auto']:
-            tool_choice_ = 'auto'
+            model_kwargs['tool_choice'] = 'auto'
         elif tool_choice['required']:
-            tool_choice_ = 'required'
+            model_kwargs['tool_choice'] = 'required'
         else:
             assert tool_choice['tool'] is not None
-            tool_choice_ = {'type': 'function', 'function': {'name': tool_choice['tool']}}
+            model_kwargs['tool_choice'] = {'type': 'function', 'function': {'name': tool_choice['tool']}}
-    extra_body: Optional[dict[str, Any]] = None
     if tool_choice is not None and not tool_choice['parallel_tool_calls']:
-        extra_body = {'parallel_tool_calls': False}
+        if 'extra_body' not in model_kwargs:
+            model_kwargs['extra_body'] = {}
+        model_kwargs['extra_body']['parallel_tool_calls'] = False
-    # cast(Any, ...): avoid mypy errors
     result = await _deepseek_client().chat.completions.with_raw_response.create(
-        messages=messages,
-        model=model,
-        frequency_penalty=_opt(frequency_penalty),
-        logprobs=_opt(logprobs),
-        top_logprobs=_opt(top_logprobs),
-        max_tokens=_opt(max_tokens),
-        presence_penalty=_opt(presence_penalty),
-        response_format=_opt(cast(Any, response_format)),
-        stop=_opt(stop),
-        temperature=_opt(temperature),
-        tools=_opt(cast(Any, tools)),
-        tool_choice=_opt(cast(Any, tool_choice_)),
-        top_p=_opt(top_p),
-        extra_body=extra_body,
+        messages=messages, model=model, **model_kwargs
     )
     return json.loads(result.text)

pixeltable 0.3.14__py3-none-any.whl → 0.5.7__py3-none-any.whl

pixeltable 0.3.14py3-none-any.whl → 0.5.7py3-none-any.whl