PyPI - pixeltable - Versions diffs - 0.2.26__py3-none-any.whl → 0.5.7__py3-none-any.whl - Mend

pixeltable 0.2.26py3-none-any.whl → 0.5.7py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (245) hide show

pixeltable/__init__.py +83 -19
pixeltable/_query.py +1444 -0
pixeltable/_version.py +1 -0
pixeltable/catalog/__init__.py +7 -4
pixeltable/catalog/catalog.py +2394 -119
pixeltable/catalog/column.py +225 -104
pixeltable/catalog/dir.py +38 -9
pixeltable/catalog/globals.py +53 -34
pixeltable/catalog/insertable_table.py +265 -115
pixeltable/catalog/path.py +80 -17
pixeltable/catalog/schema_object.py +28 -43
pixeltable/catalog/table.py +1270 -677
pixeltable/catalog/table_metadata.py +103 -0
pixeltable/catalog/table_version.py +1270 -751
pixeltable/catalog/table_version_handle.py +109 -0
pixeltable/catalog/table_version_path.py +137 -42
pixeltable/catalog/tbl_ops.py +53 -0
pixeltable/catalog/update_status.py +191 -0
pixeltable/catalog/view.py +251 -134
pixeltable/config.py +215 -0
pixeltable/env.py +736 -285
pixeltable/exceptions.py +26 -2
pixeltable/exec/__init__.py +7 -2
pixeltable/exec/aggregation_node.py +39 -21
pixeltable/exec/cache_prefetch_node.py +87 -109
pixeltable/exec/cell_materialization_node.py +268 -0
pixeltable/exec/cell_reconstruction_node.py +168 -0
pixeltable/exec/component_iteration_node.py +25 -28
pixeltable/exec/data_row_batch.py +11 -46
pixeltable/exec/exec_context.py +26 -11
pixeltable/exec/exec_node.py +35 -27
pixeltable/exec/expr_eval/__init__.py +3 -0
pixeltable/exec/expr_eval/evaluators.py +365 -0
pixeltable/exec/expr_eval/expr_eval_node.py +413 -0
pixeltable/exec/expr_eval/globals.py +200 -0
pixeltable/exec/expr_eval/row_buffer.py +74 -0
pixeltable/exec/expr_eval/schedulers.py +413 -0
pixeltable/exec/globals.py +35 -0
pixeltable/exec/in_memory_data_node.py +35 -27
pixeltable/exec/object_store_save_node.py +293 -0
pixeltable/exec/row_update_node.py +44 -29
pixeltable/exec/sql_node.py +414 -115
pixeltable/exprs/__init__.py +8 -5
pixeltable/exprs/arithmetic_expr.py +79 -45
pixeltable/exprs/array_slice.py +5 -5
pixeltable/exprs/column_property_ref.py +40 -26
pixeltable/exprs/column_ref.py +254 -61
pixeltable/exprs/comparison.py +14 -9
pixeltable/exprs/compound_predicate.py +9 -10
pixeltable/exprs/data_row.py +213 -72
pixeltable/exprs/expr.py +270 -104
pixeltable/exprs/expr_dict.py +6 -5
pixeltable/exprs/expr_set.py +20 -11
pixeltable/exprs/function_call.py +383 -284
pixeltable/exprs/globals.py +18 -5
pixeltable/exprs/in_predicate.py +7 -7
pixeltable/exprs/inline_expr.py +37 -37
pixeltable/exprs/is_null.py +8 -4
pixeltable/exprs/json_mapper.py +120 -54
pixeltable/exprs/json_path.py +90 -60
pixeltable/exprs/literal.py +61 -16
pixeltable/exprs/method_ref.py +7 -6
pixeltable/exprs/object_ref.py +19 -8
pixeltable/exprs/row_builder.py +238 -75
pixeltable/exprs/rowid_ref.py +53 -15
pixeltable/exprs/similarity_expr.py +65 -50
pixeltable/exprs/sql_element_cache.py +5 -5
pixeltable/exprs/string_op.py +107 -0
pixeltable/exprs/type_cast.py +25 -13
pixeltable/exprs/variable.py +2 -2
pixeltable/func/__init__.py +9 -5
pixeltable/func/aggregate_function.py +197 -92
pixeltable/func/callable_function.py +119 -35
pixeltable/func/expr_template_function.py +101 -48
pixeltable/func/function.py +375 -62
pixeltable/func/function_registry.py +20 -19
pixeltable/func/globals.py +6 -5
pixeltable/func/mcp.py +74 -0
pixeltable/func/query_template_function.py +151 -35
pixeltable/func/signature.py +178 -49
pixeltable/func/tools.py +164 -0
pixeltable/func/udf.py +176 -53
pixeltable/functions/__init__.py +44 -4
pixeltable/functions/anthropic.py +226 -47
pixeltable/functions/audio.py +148 -11
pixeltable/functions/bedrock.py +137 -0
pixeltable/functions/date.py +188 -0
pixeltable/functions/deepseek.py +113 -0
pixeltable/functions/document.py +81 -0
pixeltable/functions/fal.py +76 -0
pixeltable/functions/fireworks.py +72 -20
pixeltable/functions/gemini.py +249 -0
pixeltable/functions/globals.py +208 -53
pixeltable/functions/groq.py +108 -0
pixeltable/functions/huggingface.py +1088 -95
pixeltable/functions/image.py +155 -84
pixeltable/functions/json.py +8 -11
pixeltable/functions/llama_cpp.py +31 -19
pixeltable/functions/math.py +169 -0
pixeltable/functions/mistralai.py +50 -75
pixeltable/functions/net.py +70 -0
pixeltable/functions/ollama.py +29 -36
pixeltable/functions/openai.py +548 -160
pixeltable/functions/openrouter.py +143 -0
pixeltable/functions/replicate.py +15 -14
pixeltable/functions/reve.py +250 -0
pixeltable/functions/string.py +310 -85
pixeltable/functions/timestamp.py +37 -19
pixeltable/functions/together.py +77 -120
pixeltable/functions/twelvelabs.py +188 -0
pixeltable/functions/util.py +7 -2
pixeltable/functions/uuid.py +30 -0
pixeltable/functions/video.py +1528 -117
pixeltable/functions/vision.py +26 -26
pixeltable/functions/voyageai.py +289 -0
pixeltable/functions/whisper.py +19 -10
pixeltable/functions/whisperx.py +179 -0
pixeltable/functions/yolox.py +112 -0
pixeltable/globals.py +716 -236
pixeltable/index/__init__.py +3 -1
pixeltable/index/base.py +17 -21
pixeltable/index/btree.py +32 -22
pixeltable/index/embedding_index.py +155 -92
pixeltable/io/__init__.py +12 -7
pixeltable/io/datarows.py +140 -0
pixeltable/io/external_store.py +83 -125
pixeltable/io/fiftyone.py +24 -33
pixeltable/io/globals.py +47 -182
pixeltable/io/hf_datasets.py +96 -127
pixeltable/io/label_studio.py +171 -156
pixeltable/io/lancedb.py +3 -0
pixeltable/io/pandas.py +136 -115
pixeltable/io/parquet.py +40 -153
pixeltable/io/table_data_conduit.py +702 -0
pixeltable/io/utils.py +100 -0
pixeltable/iterators/__init__.py +8 -4
pixeltable/iterators/audio.py +207 -0
pixeltable/iterators/base.py +9 -3
pixeltable/iterators/document.py +144 -87
pixeltable/iterators/image.py +17 -38
pixeltable/iterators/string.py +15 -12
pixeltable/iterators/video.py +523 -127
pixeltable/metadata/__init__.py +33 -8
pixeltable/metadata/converters/convert_10.py +2 -3
pixeltable/metadata/converters/convert_13.py +2 -2
pixeltable/metadata/converters/convert_15.py +15 -11
pixeltable/metadata/converters/convert_16.py +4 -5
pixeltable/metadata/converters/convert_17.py +4 -5
pixeltable/metadata/converters/convert_18.py +4 -6
pixeltable/metadata/converters/convert_19.py +6 -9
pixeltable/metadata/converters/convert_20.py +3 -6
pixeltable/metadata/converters/convert_21.py +6 -8
pixeltable/metadata/converters/convert_22.py +3 -2
pixeltable/metadata/converters/convert_23.py +33 -0
pixeltable/metadata/converters/convert_24.py +55 -0
pixeltable/metadata/converters/convert_25.py +19 -0
pixeltable/metadata/converters/convert_26.py +23 -0
pixeltable/metadata/converters/convert_27.py +29 -0
pixeltable/metadata/converters/convert_28.py +13 -0
pixeltable/metadata/converters/convert_29.py +110 -0
pixeltable/metadata/converters/convert_30.py +63 -0
pixeltable/metadata/converters/convert_31.py +11 -0
pixeltable/metadata/converters/convert_32.py +15 -0
pixeltable/metadata/converters/convert_33.py +17 -0
pixeltable/metadata/converters/convert_34.py +21 -0
pixeltable/metadata/converters/convert_35.py +9 -0
pixeltable/metadata/converters/convert_36.py +38 -0
pixeltable/metadata/converters/convert_37.py +15 -0
pixeltable/metadata/converters/convert_38.py +39 -0
pixeltable/metadata/converters/convert_39.py +124 -0
pixeltable/metadata/converters/convert_40.py +73 -0
pixeltable/metadata/converters/convert_41.py +12 -0
pixeltable/metadata/converters/convert_42.py +9 -0
pixeltable/metadata/converters/convert_43.py +44 -0
pixeltable/metadata/converters/util.py +44 -18
pixeltable/metadata/notes.py +21 -0
pixeltable/metadata/schema.py +185 -42
pixeltable/metadata/utils.py +74 -0
pixeltable/mypy/__init__.py +3 -0
pixeltable/mypy/mypy_plugin.py +123 -0
pixeltable/plan.py +616 -225
pixeltable/share/__init__.py +3 -0
pixeltable/share/packager.py +797 -0
pixeltable/share/protocol/__init__.py +33 -0
pixeltable/share/protocol/common.py +165 -0
pixeltable/share/protocol/operation_types.py +33 -0
pixeltable/share/protocol/replica.py +119 -0
pixeltable/share/publish.py +349 -0
pixeltable/store.py +398 -232
pixeltable/type_system.py +730 -267
pixeltable/utils/__init__.py +40 -0
pixeltable/utils/arrow.py +201 -29
pixeltable/utils/av.py +298 -0
pixeltable/utils/azure_store.py +346 -0
pixeltable/utils/coco.py +26 -27
pixeltable/utils/code.py +4 -4
pixeltable/utils/console_output.py +46 -0
pixeltable/utils/coroutine.py +24 -0
pixeltable/utils/dbms.py +92 -0
pixeltable/utils/description_helper.py +11 -12
pixeltable/utils/documents.py +60 -61
pixeltable/utils/exception_handler.py +36 -0
pixeltable/utils/filecache.py +38 -22
pixeltable/utils/formatter.py +88 -51
pixeltable/utils/gcs_store.py +295 -0
pixeltable/utils/http.py +133 -0
pixeltable/utils/http_server.py +14 -13
pixeltable/utils/iceberg.py +13 -0
pixeltable/utils/image.py +17 -0
pixeltable/utils/lancedb.py +90 -0
pixeltable/utils/local_store.py +322 -0
pixeltable/utils/misc.py +5 -0
pixeltable/utils/object_stores.py +573 -0
pixeltable/utils/pydantic.py +60 -0
pixeltable/utils/pytorch.py +20 -20
pixeltable/utils/s3_store.py +527 -0
pixeltable/utils/sql.py +32 -5
pixeltable/utils/system.py +30 -0
pixeltable/utils/transactional_directory.py +4 -3
pixeltable-0.5.7.dist-info/METADATA +579 -0
pixeltable-0.5.7.dist-info/RECORD +227 -0
{pixeltable-0.2.26.dist-info → pixeltable-0.5.7.dist-info}/WHEEL +1 -1
pixeltable-0.5.7.dist-info/entry_points.txt +2 -0
pixeltable/__version__.py +0 -3
pixeltable/catalog/named_function.py +0 -36
pixeltable/catalog/path_dict.py +0 -141
pixeltable/dataframe.py +0 -894
pixeltable/exec/expr_eval_node.py +0 -232
pixeltable/ext/__init__.py +0 -14
pixeltable/ext/functions/__init__.py +0 -8
pixeltable/ext/functions/whisperx.py +0 -77
pixeltable/ext/functions/yolox.py +0 -157
pixeltable/tool/create_test_db_dump.py +0 -311
pixeltable/tool/create_test_video.py +0 -81
pixeltable/tool/doc_plugins/griffe.py +0 -50
pixeltable/tool/doc_plugins/mkdocstrings.py +0 -6
pixeltable/tool/doc_plugins/templates/material/udf.html.jinja +0 -135
pixeltable/tool/embed_udf.py +0 -9
pixeltable/tool/mypy_plugin.py +0 -55
pixeltable/utils/media_store.py +0 -76
pixeltable/utils/s3.py +0 -16
pixeltable-0.2.26.dist-info/METADATA +0 -400
pixeltable-0.2.26.dist-info/RECORD +0 -156
pixeltable-0.2.26.dist-info/entry_points.txt +0 -3
{pixeltable-0.2.26.dist-info → pixeltable-0.5.7.dist-info/licenses}/LICENSE +0 -0

pixeltable/functions/anthropic.py CHANGED Viewed

@@ -1,55 +1,167 @@
 """
-Pixeltable [UDFs](https://pixeltable.readme.io/docs/user-defined-functions-udfs)
+Pixeltable UDFs
 that wrap various endpoints from the Anthropic API. In order to use them, you must
 first `pip install anthropic` and configure your Anthropic credentials, as described in
-the [Working with Anthropic](https://pixeltable.readme.io/docs/working-with-anthropic) tutorial.
+the [Working with Anthropic](https://docs.pixeltable.com/notebooks/integrations/working-with-anthropic) tutorial.
 """
-from typing import TYPE_CHECKING, Any, Callable, Optional, TypeVar, Union
+import datetime
+import json
+import logging
+from typing import TYPE_CHECKING, Any, Iterable, cast
-import tenacity
+import httpx
 import pixeltable as pxt
-from pixeltable import env
+from pixeltable import env, exprs
+from pixeltable.func import Tools
 from pixeltable.utils.code import local_public_names
+from pixeltable.utils.http import exponential_backoff
 if TYPE_CHECKING:
     import anthropic
+_logger = logging.getLogger('pixeltable')
 @env.register_client('anthropic')
-def _(api_key: str) -> 'anthropic.Anthropic':
+def _(api_key: str) -> 'anthropic.AsyncAnthropic':
     import anthropic
-    return anthropic.Anthropic(api_key=api_key)
+    return anthropic.AsyncAnthropic(
+        api_key=api_key,
+        # recommended to increase limits for async client to avoid connection errors
+        http_client=httpx.AsyncClient(limits=httpx.Limits(max_keepalive_connections=100, max_connections=500)),
+    )
-def _anthropic_client() -> 'anthropic.Anthropic':
+def _anthropic_client() -> 'anthropic.AsyncAnthropic':
     return env.Env.get().get_client('anthropic')
-def _retry(fn: Callable) -> Callable:
-    import anthropic
-    return tenacity.retry(
-        retry=tenacity.retry_if_exception_type(anthropic.RateLimitError),
-        wait=tenacity.wait_random_exponential(multiplier=1, max=60),
-        stop=tenacity.stop_after_attempt(20),
-    )(fn)
+def _get_header_info(
+    headers: httpx.Headers,
+) -> tuple[
+    tuple[int, int, datetime.datetime] | None,
+    tuple[int, int, datetime.datetime] | None,
+    tuple[int, int, datetime.datetime] | None,
+]:
+    """Extract rate limit info from Anthropic API response headers."""
+    requests_limit_str = headers.get('anthropic-ratelimit-requests-limit')
+    requests_limit = int(requests_limit_str) if requests_limit_str is not None else None
+    requests_remaining_str = headers.get('anthropic-ratelimit-requests-remaining')
+    requests_remaining = int(requests_remaining_str) if requests_remaining_str is not None else None
+    requests_reset_str = headers.get('anthropic-ratelimit-requests-reset')
+    requests_reset = (
+        datetime.datetime.fromisoformat(requests_reset_str.replace('Z', '+00:00')) if requests_reset_str else None
+    )
+    requests_info = (
+        (requests_limit, requests_remaining, requests_reset) if requests_reset and requests_remaining else None
+    )
+    input_tokens_limit_str = headers.get('anthropic-ratelimit-input-tokens-limit')
+    input_tokens_limit = int(input_tokens_limit_str) if input_tokens_limit_str is not None else None
+    input_tokens_remaining_str = headers.get('anthropic-ratelimit-input-tokens-remaining')
+    input_tokens_remaining = int(input_tokens_remaining_str) if input_tokens_remaining_str is not None else None
+    input_tokens_reset_str = headers.get('anthropic-ratelimit-input-tokens-reset')
+    input_tokens_reset = (
+        datetime.datetime.fromisoformat(input_tokens_reset_str.replace('Z', '+00:00'))
+        if input_tokens_reset_str
+        else None
+    )
+    input_tokens_info = (
+        (input_tokens_limit, input_tokens_remaining, input_tokens_reset)
+        if input_tokens_reset and input_tokens_remaining
+        else None
+    )
+    output_tokens_limit_str = headers.get('anthropic-ratelimit-output-tokens-limit')
+    output_tokens_limit = int(output_tokens_limit_str) if output_tokens_limit_str is not None else None
+    output_tokens_remaining_str = headers.get('anthropic-ratelimit-output-tokens-remaining')
+    output_tokens_remaining = int(output_tokens_remaining_str) if output_tokens_remaining_str is not None else None
+    output_tokens_reset_str = headers.get('anthropic-ratelimit-output-tokens-reset')
+    output_tokens_reset = (
+        datetime.datetime.fromisoformat(output_tokens_reset_str.replace('Z', '+00:00'))
+        if output_tokens_reset_str
+        else None
+    )
+    output_tokens_info = (
+        (output_tokens_limit, output_tokens_remaining, output_tokens_reset)
+        if output_tokens_reset and output_tokens_remaining
+        else None
+    )
+    if requests_info is None or input_tokens_info is None or output_tokens_info is None:
+        _logger.debug(f'get_header_info(): incomplete rate limit info: {headers}')
+    return requests_info, input_tokens_info, output_tokens_info
+class AnthropicRateLimitsInfo(env.RateLimitsInfo):
+    def __init__(self) -> None:
+        super().__init__(self._get_request_resources)
+    def _get_request_resources(self, messages: dict, max_tokens: int) -> dict[str, int]:
+        input_len = 0
+        for message in messages:
+            if 'role' in message:
+                input_len += len(message['role'])
+            if 'content' in message:
+                input_len += len(message['content'])
+        return {'requests': 1, 'input_tokens': int(input_len / 4), 'output_tokens': max_tokens}
+    def record_exc(self, request_ts: datetime.datetime, exc: Exception) -> None:
+        import anthropic
+        if (
+            not isinstance(exc, anthropic.APIError)
+            or not hasattr(exc, 'response')
+            or not hasattr(exc.response, 'headers')
+        ):
+            return
+        requests_info, input_tokens_info, output_tokens_info = _get_header_info(exc.response.headers)
+        _logger.debug(
+            f'record_exc(): request_ts: {request_ts}, requests_info={requests_info} '
+            f'input_tokens_info={input_tokens_info} output_tokens_info={output_tokens_info}'
+        )
+        self.record(
+            request_ts=request_ts,
+            requests=requests_info,
+            input_tokens=input_tokens_info,
+            output_tokens=output_tokens_info,
+        )
+        self.has_exc = True
+        retry_after_str = exc.response.headers.get('retry-after')
+        if retry_after_str is not None:
+            _logger.debug(f'retry-after: {retry_after_str}')
+    def get_retry_delay(self, exc: Exception, attempt: int) -> float | None:
+        import anthropic
+        # deal with timeouts separately, they don't come with headers
+        if isinstance(exc, anthropic.APITimeoutError):
+            return exponential_backoff(attempt)
+        if not isinstance(exc, anthropic.APIStatusError):
+            return None
+        _logger.debug(f'headers={exc.response.headers}')
+        should_retry_str = exc.response.headers.get('x-should-retry', '')
+        if should_retry_str.lower() != 'true':
+            return None
+        return super().get_retry_delay(exc, attempt)
 @pxt.udf
-def messages(
+async def messages(
     messages: list[dict[str, str]],
     *,
     model: str,
-    max_tokens: int = 1024,
-    metadata: Optional[dict[str, Any]] = None,
-    stop_sequences: Optional[list[str]] = None,
-    system: Optional[str] = None,
-    temperature: Optional[float] = None,
-    tool_choice: Optional[list[dict]] = None,
-    tools: Optional[dict] = None,
-    top_k: Optional[int] = None,
-    top_p: Optional[float] = None,
+    max_tokens: int,
+    model_kwargs: dict[str, Any] | None = None,
+    tools: list[dict[str, Any]] | None = None,
+    tool_choice: dict[str, Any] | None = None,
+    _runtime_ctx: env.RuntimeCtx | None = None,
 ) -> dict:
     """
     Create a Message.
@@ -57,6 +169,10 @@ def messages(
     Equivalent to the Anthropic `messages` API endpoint.
     For additional details, see: <https://docs.anthropic.com/en/api/messages>
+    Request throttling:
+    Uses the rate limit-related headers returned by the API to throttle requests adaptively, based on available
+    request and token capacity. No configuration is necessary.
     __Requirements:__
     - `pip install anthropic`
@@ -64,44 +180,107 @@ def messages(
     Args:
         messages: Input messages.
         model: The model that will complete your prompt.
-    For details on the other parameters, see: <https://docs.anthropic.com/en/api/messages>
+        model_kwargs: Additional keyword args for the Anthropic `messages` API.
+            For details on the available parameters, see: <https://docs.anthropic.com/en/api/messages>
+        tools: An optional list of Pixeltable tools to use for the request.
+        tool_choice: An optional tool choice configuration.
     Returns:
         A dictionary containing the response and other metadata.
     Examples:
-        Add a computed column that applies the model `claude-3-haiku-20240307`
+        Add a computed column that applies the model `claude-3-5-sonnet-20241022`
         to an existing Pixeltable column `tbl.prompt` of the table `tbl`:
         >>> msgs = [{'role': 'user', 'content': tbl.prompt}]
-        ... tbl['response'] = messages(msgs, model='claude-3-haiku-20240307')
+        ... tbl.add_computed_column(response=messages(msgs, model='claude-3-5-sonnet-20241022'))
     """
-    return _retry(_anthropic_client().messages.create)(
-        messages=messages,
-        model=model,
-        max_tokens=max_tokens,
-        metadata=_opt(metadata),
-        stop_sequences=_opt(stop_sequences),
-        system=_opt(system),
-        temperature=_opt(temperature),
-        tool_choice=_opt(tool_choice),
-        tools=_opt(tools),
-        top_k=_opt(top_k),
-        top_p=_opt(top_p),
-    ).dict()
+    if model_kwargs is None:
+        model_kwargs = {}
+    if tools is not None:
+        # Reformat `tools` into Anthropic format
+        model_kwargs['tools'] = [
+            {
+                'name': tool['name'],
+                'description': tool['description'],
+                'input_schema': {
+                    'type': 'object',
+                    'properties': tool['parameters']['properties'],
+                    'required': tool['required'],
+                },
+            }
+            for tool in tools
+        ]
-_T = TypeVar('_T')
+    if tool_choice is not None:
+        if tool_choice['auto']:
+            model_kwargs['tool_choice'] = {'type': 'auto'}
+        elif tool_choice['required']:
+            model_kwargs['tool_choice'] = {'type': 'any'}
+        else:
+            assert tool_choice['tool'] is not None
+            model_kwargs['tool_choice'] = {'type': 'tool', 'name': tool_choice['tool']}
+        if not tool_choice['parallel_tool_calls']:
+            model_kwargs['tool_choice']['disable_parallel_tool_use'] = True
+    # make sure the pool info exists prior to making the request
+    resource_pool_id = f'rate-limits:anthropic:{model}'
+    rate_limits_info = env.Env.get().get_resource_pool_info(resource_pool_id, AnthropicRateLimitsInfo)
+    assert isinstance(rate_limits_info, env.RateLimitsInfo)
-def _opt(arg: _T) -> Union[_T, 'anthropic.NotGiven']:
-    import anthropic
-    return arg if arg is not None else anthropic.NOT_GIVEN
+    # TODO: timeouts should be set system-wide and be user-configurable
+    from anthropic.types import MessageParam
+    start_ts = datetime.datetime.now(tz=datetime.timezone.utc)
+    result = await _anthropic_client().messages.with_raw_response.create(
+        messages=cast(Iterable[MessageParam], messages), model=model, max_tokens=max_tokens, **model_kwargs
+    )
+    requests_info, input_tokens_info, output_tokens_info = _get_header_info(result.headers)
+    # retry_after_str = result.headers.get('retry-after')
+    # if retry_after_str is not None:
+    #     _logger.debug(f'retry-after: {retry_after_str}')
+    is_retry = _runtime_ctx is not None and _runtime_ctx.is_retry
+    rate_limits_info.record(
+        request_ts=start_ts,
+        requests=requests_info,
+        input_tokens=input_tokens_info,
+        output_tokens=output_tokens_info,
+        reset_exc=is_retry,
+    )
+    result_dict = json.loads(result.text)
+    return result_dict
+@messages.resource_pool
+def _(model: str) -> str:
+    return f'rate-limits:anthropic:{model}'
+def invoke_tools(tools: Tools, response: exprs.Expr) -> exprs.InlineDict:
+    """Converts an Anthropic response dict to Pixeltable tool invocation format and calls `tools._invoke()`."""
+    return tools._invoke(_anthropic_response_to_pxt_tool_calls(response))
+@pxt.udf
+def _anthropic_response_to_pxt_tool_calls(response: dict) -> dict | None:
+    anthropic_tool_calls = [r for r in response['content'] if r['type'] == 'tool_use']
+    if len(anthropic_tool_calls) == 0:
+        return None
+    pxt_tool_calls: dict[str, list[dict[str, Any]]] = {}
+    for tool_call in anthropic_tool_calls:
+        tool_name = tool_call['name']
+        if tool_name not in pxt_tool_calls:
+            pxt_tool_calls[tool_name] = []
+        pxt_tool_calls[tool_name].append({'args': tool_call['input']})
+    return pxt_tool_calls
 __all__ = local_public_names(__name__)
-def __dir__():
+def __dir__() -> list[str]:
     return __all__

pixeltable/functions/audio.py CHANGED Viewed

@@ -1,30 +1,167 @@
 """
-Pixeltable [UDFs](https://pixeltable.readme.io/docs/user-defined-functions-udfs) for `AudioType`.
+Pixeltable UDFs for `AudioType`.
+"""
-Example:
-```python
-import pixeltable as pxt
-import pixeltable.functions as pxtf
+from typing import Any
-t = pxt.get_table(...)
-t.select(pxtf.audio.get_metadata()).collect()
-```
-"""
+import av
+import numpy as np
 import pixeltable as pxt
+import pixeltable.utils.av as av_utils
 from pixeltable.utils.code import local_public_names
+from pixeltable.utils.local_store import TempStore
 @pxt.udf(is_method=True)
 def get_metadata(audio: pxt.Audio) -> dict:
     """
     Gets various metadata associated with an audio file and returns it as a dictionary.
+    Args:
+        audio: The audio to get metadata for.
+    Returns:
+        A `dict` such as the following:
+            ```json
+            {
+                'size': 2568827,
+                'streams': [
+                    {
+                        'type': 'audio',
+                        'frames': 0,
+                        'duration': 2646000,
+                        'metadata': {},
+                        'time_base': 2.2675736961451248e-05,
+                        'codec_context': {
+                            'name': 'flac',
+                            'profile': None,
+                            'channels': 1,
+                            'codec_tag': '\\x00\\x00\\x00\\x00',
+                        },
+                        'duration_seconds': 60.0,
+                    }
+                ],
+                'bit_rate': 342510,
+                'metadata': {'encoder': 'Lavf61.1.100'},
+                'bit_exact': False,
+            }
+            ```
+    Examples:
+        Extract metadata for files in the `audio_col` column of the table `tbl`:
+        >>> tbl.select(tbl.audio_col.get_metadata()).collect()
+    """
+    return av_utils.get_metadata(audio)
+@pxt.udf()
+def encode_audio(
+    audio_data: pxt.Array[pxt.Float], *, input_sample_rate: int, format: str, output_sample_rate: int | None = None
+) -> pxt.Audio:
+    """
+    Encodes an audio clip represented as an array into a specified audio format.
+    Parameters:
+        audio_data: An array of sampled amplitudes. The accepted array shapes are `(N,)` or `(1, N)` for mono audio
+            or `(2, N)` for stereo.
+        input_sample_rate: The sample rate of the input audio data.
+        format: The desired output audio format. The supported formats are 'wav', 'mp3', 'flac', and 'mp4'.
+        output_sample_rate: The desired sample rate for the output audio. Defaults to the input sample rate if
+            unspecified.
+    Examples:
+        Add a computed column with encoded FLAC audio files to a table with audio data (as arrays of floats) and sample
+        rates:
+        >>> t.add_computed_column(
+        ...     audio_file=encode_audio(
+        ...         t.audio_data, input_sample_rate=t.sample_rate, format='flac'
+        ...     )
+        ... )
+    """
+    if format not in av_utils.AUDIO_FORMATS:
+        raise pxt.Error(f'Only the following formats are supported: {av_utils.AUDIO_FORMATS.keys()}')
+    if output_sample_rate is None:
+        output_sample_rate = input_sample_rate
+    codec, ext = av_utils.AUDIO_FORMATS[format]
+    output_path = str(TempStore.create_path(extension=f'.{ext}'))
+    match audio_data.shape:
+        case (_,):
+            # Mono audio as 1D array, reshape for pyav
+            layout = 'mono'
+            audio_data_transformed = audio_data[None, :]
+        case (1, _):
+            # Mono audio as 2D array, simply reshape and transpose the input for pyav
+            layout = 'mono'
+            audio_data_transformed = audio_data.reshape(-1, 1).transpose()
+        case (2, _):
+            # Stereo audio. Input layout: [[L0, L1, L2, ...],[R0, R1, R2, ...]],
+            # pyav expects: [L0, R0, L1, R1, L2, R2, ...]
+            layout = 'stereo'
+            audio_data_transformed = np.empty(audio_data.shape[1] * 2, dtype=audio_data.dtype)
+            audio_data_transformed[0::2] = audio_data[0]
+            audio_data_transformed[1::2] = audio_data[1]
+            audio_data_transformed = audio_data_transformed.reshape(1, -1)
+        case _:
+            raise pxt.Error(
+                f'Supported input array shapes are (N,), (1, N) for mono and (2, N) for stereo, got {audio_data.shape}'
+            )
+    with av.open(output_path, mode='w') as output_container:
+        stream = output_container.add_stream(codec, rate=output_sample_rate)
+        assert isinstance(stream, av.AudioStream)
+        frame = av.AudioFrame.from_ndarray(audio_data_transformed, format='flt', layout=layout)
+        frame.sample_rate = input_sample_rate
+        for packet in stream.encode(frame):
+            output_container.mux(packet)
+        for packet in stream.encode():
+            output_container.mux(packet)
+        return output_path
+def audio_splitter(
+    audio: Any, chunk_duration_sec: float, *, overlap_sec: float = 0.0, min_chunk_duration_sec: float = 0.0
+) -> tuple[type[pxt.iterators.ComponentIterator], dict[str, Any]]:
+    """
+    Iterator over chunks of an audio file. The audio file is split into smaller chunks,
+    where the duration of each chunk is determined by chunk_duration_sec.
+    The iterator yields audio chunks as pxt.Audio, along with the start and end time of each chunk.
+    If the input contains no audio, no chunks are yielded.
+    Args:
+        chunk_duration_sec: Audio chunk duration in seconds
+        overlap_sec: Overlap between consecutive chunks in seconds
+        min_chunk_duration_sec: Drop the last chunk if it is smaller than min_chunk_duration_sec
+    Examples:
+        This example assumes an existing table `tbl` with a column `audio` of type `pxt.Audio`.
+        Create a view that splits all audio files into chunks of 30 seconds with 5 seconds overlap:
+        >>> pxt.create_view(
+        ...     'audio_chunks',
+        ...     tbl,
+        ...     iterator=audio_splitter(tbl.audio, chunk_duration_sec=30.0, overlap_sec=5.0)
+        ... )
     """
-    return pxt.functions.video._get_metadata(audio)
+    kwargs: dict[str, Any] = {}
+    if overlap_sec != 0.0:
+        kwargs['overlap_sec'] = overlap_sec
+    if min_chunk_duration_sec != 0.0:
+        kwargs['min_chunk_duration_sec'] = min_chunk_duration_sec
+    return pxt.iterators.AudioSplitter._create(audio=audio, chunk_duration_sec=chunk_duration_sec, **kwargs)
 __all__ = local_public_names(__name__)
-def __dir__():
+def __dir__() -> list[str]:
     return __all__

pixeltable/functions/bedrock.py ADDED Viewed

@@ -0,0 +1,137 @@
+"""
+Pixeltable UDFs for AWS Bedrock AI models.
+Provides integration with AWS Bedrock for accessing various foundation models
+including Anthropic Claude, Amazon Titan, and other providers.
+"""
+import logging
+from typing import TYPE_CHECKING, Any
+import pixeltable as pxt
+from pixeltable import env, exprs
+from pixeltable.func import Tools
+from pixeltable.utils.code import local_public_names
+if TYPE_CHECKING:
+    from botocore.client import BaseClient
+_logger = logging.getLogger('pixeltable')
+@env.register_client('bedrock')
+def _() -> 'BaseClient':
+    import boto3
+    return boto3.client(service_name='bedrock-runtime')
+# boto3 typing is weird; type information is dynamically defined, so the best we can do for the static checker is `Any`
+def _bedrock_client() -> Any:
+    return env.Env.get().get_client('bedrock')
+@pxt.udf
+def converse(
+    messages: list[dict[str, Any]],
+    *,
+    model_id: str,
+    system: list[dict[str, Any]] | None = None,
+    inference_config: dict | None = None,
+    additional_model_request_fields: dict | None = None,
+    tool_config: list[dict] | None = None,
+) -> dict:
+    """
+    Generate a conversation response.
+    Equivalent to the AWS Bedrock `converse` API endpoint.
+    For additional details, see: <https://docs.aws.amazon.com/bedrock/latest/userguide/conversation-inference.html>
+    __Requirements:__
+    - `pip install boto3`
+    Args:
+        messages: Input messages.
+        model_id: The model that will complete your prompt.
+        system: An optional system prompt.
+        inference_config: Base inference parameters to use.
+        additional_model_request_fields: Additional inference parameters to use.
+    For details on the optional parameters, see:
+    <https://docs.aws.amazon.com/bedrock/latest/userguide/conversation-inference.html>
+    Returns:
+        A dictionary containing the response and other metadata.
+    Examples:
+        Add a computed column that applies the model `anthropic.claude-3-haiku-20240307-v1:0`
+        to an existing Pixeltable column `tbl.prompt` of the table `tbl`:
+        >>> msgs = [{'role': 'user', 'content': [{'text': tbl.prompt}]}]
+        ... tbl.add_computed_column(response=messages(msgs, model_id='anthropic.claude-3-haiku-20240307-v1:0'))
+    """
+    kwargs: dict[str, Any] = {'messages': messages, 'modelId': model_id}
+    if system is not None:
+        kwargs['system'] = system
+    if inference_config is not None:
+        kwargs['inferenceConfig'] = inference_config
+    if additional_model_request_fields is not None:
+        kwargs['additionalModelRequestFields'] = additional_model_request_fields
+    if tool_config is not None:
+        tool_config_ = {
+            'tools': [
+                {
+                    'toolSpec': {
+                        'name': tool['name'],
+                        'description': tool['description'],
+                        'inputSchema': {
+                            'json': {
+                                'type': 'object',
+                                'properties': tool['parameters']['properties'],
+                                'required': tool['required'],
+                            }
+                        },
+                    }
+                }
+                for tool in tool_config
+            ]
+        }
+        kwargs['toolConfig'] = tool_config_
+    return _bedrock_client().converse(**kwargs)
+def invoke_tools(tools: Tools, response: exprs.Expr) -> exprs.InlineDict:
+    """Converts an Anthropic response dict to Pixeltable tool invocation format and calls `tools._invoke()`."""
+    return tools._invoke(_bedrock_response_to_pxt_tool_calls(response))
+@pxt.udf
+def _bedrock_response_to_pxt_tool_calls(response: dict) -> dict | None:
+    if response.get('stopReason') != 'tool_use':
+        return None
+    pxt_tool_calls: dict[str, list[dict[str, Any]]] = {}
+    for message in response['output']['message']['content']:
+        if 'toolUse' in message:
+            tool_call = message['toolUse']
+            tool_name = tool_call['name']
+            if tool_name not in pxt_tool_calls:
+                pxt_tool_calls[tool_name] = []
+            pxt_tool_calls[tool_name].append({'args': tool_call['input']})
+    if len(pxt_tool_calls) == 0:
+        return None
+    return pxt_tool_calls
+__all__ = local_public_names(__name__)
+def __dir__() -> list[str]:
+    return __all__

pixeltable 0.2.26__py3-none-any.whl → 0.5.7__py3-none-any.whl

pixeltable 0.2.26py3-none-any.whl → 0.5.7py3-none-any.whl