PyPI - pixeltable - Versions diffs - 0.3.2__py3-none-any.whl → 0.3.3__py3-none-any.whl - Mend

pixeltable 0.3.2py3-none-any.whl → 0.3.3py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.

This version of pixeltable might be problematic. Click here for more details.

Files changed (147) hide show

pixeltable/__init__.py +64 -11
pixeltable/__version__.py +2 -2
pixeltable/catalog/__init__.py +1 -1
pixeltable/catalog/catalog.py +50 -27
pixeltable/catalog/column.py +27 -11
pixeltable/catalog/dir.py +6 -4
pixeltable/catalog/globals.py +8 -1
pixeltable/catalog/insertable_table.py +22 -12
pixeltable/catalog/named_function.py +10 -6
pixeltable/catalog/path.py +3 -2
pixeltable/catalog/path_dict.py +8 -6
pixeltable/catalog/schema_object.py +2 -1
pixeltable/catalog/table.py +121 -101
pixeltable/catalog/table_version.py +291 -142
pixeltable/catalog/table_version_path.py +8 -5
pixeltable/catalog/view.py +67 -26
pixeltable/dataframe.py +102 -72
pixeltable/env.py +20 -21
pixeltable/exec/__init__.py +2 -2
pixeltable/exec/aggregation_node.py +10 -4
pixeltable/exec/cache_prefetch_node.py +5 -3
pixeltable/exec/component_iteration_node.py +9 -8
pixeltable/exec/data_row_batch.py +21 -10
pixeltable/exec/exec_context.py +10 -3
pixeltable/exec/exec_node.py +23 -12
pixeltable/exec/expr_eval/evaluators.py +13 -7
pixeltable/exec/expr_eval/expr_eval_node.py +24 -15
pixeltable/exec/expr_eval/globals.py +30 -7
pixeltable/exec/expr_eval/row_buffer.py +5 -6
pixeltable/exec/expr_eval/schedulers.py +151 -31
pixeltable/exec/in_memory_data_node.py +8 -7
pixeltable/exec/row_update_node.py +15 -5
pixeltable/exec/sql_node.py +56 -27
pixeltable/exprs/__init__.py +2 -2
pixeltable/exprs/arithmetic_expr.py +57 -26
pixeltable/exprs/array_slice.py +1 -1
pixeltable/exprs/column_property_ref.py +2 -1
pixeltable/exprs/column_ref.py +20 -15
pixeltable/exprs/comparison.py +6 -2
pixeltable/exprs/compound_predicate.py +1 -3
pixeltable/exprs/data_row.py +2 -2
pixeltable/exprs/expr.py +101 -72
pixeltable/exprs/expr_dict.py +2 -1
pixeltable/exprs/expr_set.py +3 -1
pixeltable/exprs/function_call.py +39 -41
pixeltable/exprs/globals.py +1 -0
pixeltable/exprs/in_predicate.py +2 -2
pixeltable/exprs/inline_expr.py +20 -17
pixeltable/exprs/json_mapper.py +4 -2
pixeltable/exprs/json_path.py +12 -18
pixeltable/exprs/literal.py +5 -9
pixeltable/exprs/method_ref.py +1 -0
pixeltable/exprs/object_ref.py +1 -1
pixeltable/exprs/row_builder.py +32 -17
pixeltable/exprs/rowid_ref.py +14 -5
pixeltable/exprs/similarity_expr.py +11 -6
pixeltable/exprs/sql_element_cache.py +1 -1
pixeltable/exprs/type_cast.py +24 -9
pixeltable/ext/__init__.py +1 -0
pixeltable/ext/functions/__init__.py +1 -0
pixeltable/ext/functions/whisperx.py +2 -2
pixeltable/ext/functions/yolox.py +11 -11
pixeltable/func/aggregate_function.py +17 -13
pixeltable/func/callable_function.py +6 -6
pixeltable/func/expr_template_function.py +15 -14
pixeltable/func/function.py +16 -16
pixeltable/func/function_registry.py +11 -8
pixeltable/func/globals.py +4 -2
pixeltable/func/query_template_function.py +12 -13
pixeltable/func/signature.py +18 -9
pixeltable/func/tools.py +10 -17
pixeltable/func/udf.py +106 -11
pixeltable/functions/__init__.py +21 -2
pixeltable/functions/anthropic.py +16 -12
pixeltable/functions/fireworks.py +63 -5
pixeltable/functions/gemini.py +13 -3
pixeltable/functions/globals.py +18 -6
pixeltable/functions/huggingface.py +20 -38
pixeltable/functions/image.py +7 -3
pixeltable/functions/json.py +1 -0
pixeltable/functions/llama_cpp.py +1 -4
pixeltable/functions/mistralai.py +31 -20
pixeltable/functions/ollama.py +4 -18
pixeltable/functions/openai.py +201 -108
pixeltable/functions/replicate.py +11 -10
pixeltable/functions/string.py +70 -7
pixeltable/functions/timestamp.py +21 -8
pixeltable/functions/together.py +66 -52
pixeltable/functions/video.py +1 -0
pixeltable/functions/vision.py +14 -11
pixeltable/functions/whisper.py +2 -1
pixeltable/globals.py +60 -26
pixeltable/index/__init__.py +1 -1
pixeltable/index/btree.py +5 -3
pixeltable/index/embedding_index.py +15 -14
pixeltable/io/__init__.py +1 -1
pixeltable/io/external_store.py +30 -25
pixeltable/io/fiftyone.py +6 -14
pixeltable/io/globals.py +33 -27
pixeltable/io/hf_datasets.py +2 -1
pixeltable/io/label_studio.py +77 -68
pixeltable/io/pandas.py +33 -9
pixeltable/io/parquet.py +9 -12
pixeltable/iterators/__init__.py +1 -0
pixeltable/iterators/audio.py +205 -0
pixeltable/iterators/document.py +19 -8
pixeltable/iterators/image.py +6 -24
pixeltable/iterators/string.py +3 -6
pixeltable/iterators/video.py +1 -7
pixeltable/metadata/__init__.py +7 -1
pixeltable/metadata/converters/convert_10.py +2 -2
pixeltable/metadata/converters/convert_15.py +1 -5
pixeltable/metadata/converters/convert_16.py +2 -4
pixeltable/metadata/converters/convert_17.py +2 -4
pixeltable/metadata/converters/convert_18.py +2 -4
pixeltable/metadata/converters/convert_19.py +2 -5
pixeltable/metadata/converters/convert_20.py +1 -4
pixeltable/metadata/converters/convert_21.py +4 -6
pixeltable/metadata/converters/convert_22.py +1 -0
pixeltable/metadata/converters/convert_23.py +5 -5
pixeltable/metadata/converters/convert_24.py +12 -13
pixeltable/metadata/converters/convert_26.py +23 -0
pixeltable/metadata/converters/util.py +3 -4
pixeltable/metadata/notes.py +1 -0
pixeltable/metadata/schema.py +13 -2
pixeltable/plan.py +173 -98
pixeltable/store.py +42 -26
pixeltable/type_system.py +62 -54
pixeltable/utils/arrow.py +1 -2
pixeltable/utils/coco.py +16 -17
pixeltable/utils/code.py +1 -1
pixeltable/utils/console_output.py +6 -3
pixeltable/utils/description_helper.py +7 -7
pixeltable/utils/documents.py +3 -1
pixeltable/utils/filecache.py +12 -7
pixeltable/utils/http_server.py +9 -8
pixeltable/utils/media_store.py +2 -1
pixeltable/utils/pytorch.py +11 -14
pixeltable/utils/s3.py +1 -0
pixeltable/utils/sql.py +1 -0
pixeltable/utils/transactional_directory.py +2 -2
{pixeltable-0.3.2.dist-info → pixeltable-0.3.3.dist-info}/METADATA +6 -8
pixeltable-0.3.3.dist-info/RECORD +163 -0
pixeltable-0.3.2.dist-info/RECORD +0 -161
{pixeltable-0.3.2.dist-info → pixeltable-0.3.3.dist-info}/LICENSE +0 -0
{pixeltable-0.3.2.dist-info → pixeltable-0.3.3.dist-info}/WHEEL +0 -0
{pixeltable-0.3.2.dist-info → pixeltable-0.3.3.dist-info}/entry_points.txt +0 -0

pixeltable/func/tools.py CHANGED Viewed

@@ -21,6 +21,7 @@ if TYPE_CHECKING:
 # `Function`, which is not natively JSON-serializable; Pydantic provides a way of customizing its default
 # serialization behavior, whereas dataclasses do not.)
 class Tool(pydantic.BaseModel):
     # Allow arbitrary types so that we can include a Pixeltable function in the schema.
     # We will implement a model_serializer to ensure the Tool model can be serialized.
@@ -41,24 +42,16 @@ class Tool(pydantic.BaseModel):
             'description': self.description or self.fn._docstring(),
             'parameters': {
                 'type': 'object',
-                'properties': {
-                    param.name: param.col_type._to_json_schema()
-                    for param in self.parameters.values()
-                }
+                'properties': {param.name: param.col_type._to_json_schema() for param in self.parameters.values()},
             },
-            'required': [
-                param.name for param in self.parameters.values() if not param.col_type.nullable
-            ],
+            'required': [param.name for param in self.parameters.values() if not param.col_type.nullable],
             'additionalProperties': False,  # TODO Handle kwargs?
         }
     # `tool_calls` must be in standardized tool invocation format:
     # {tool_name: {'args': {name1: value1, name2: value2, ...}}, ...}
     def invoke(self, tool_calls: 'exprs.Expr') -> 'exprs.FunctionCall':
-        kwargs = {
-            param.name: self.__extract_tool_arg(param, tool_calls)
-            for param in self.parameters.values()
-        }
+        kwargs = {param.name: self.__extract_tool_arg(param, tool_calls) for param in self.parameters.values()}
         return self.fn(**kwargs)
     def __extract_tool_arg(self, param: Parameter, tool_calls: 'exprs.Expr') -> 'exprs.Expr':
@@ -93,10 +86,7 @@ class Tools(pydantic.BaseModel):
     def _invoke(self, tool_calls: 'exprs.Expr') -> 'exprs.InlineDict':
         from pixeltable import exprs
-        return exprs.InlineDict({
-            tool.name or tool.fn.name: tool.invoke(tool_calls)
-            for tool in self.tools
-        })
+        return exprs.InlineDict({tool.name or tool.fn.name: tool.invoke(tool_calls) for tool in self.tools})
     def choice(
         self,
@@ -111,7 +101,8 @@ class Tools(pydantic.BaseModel):
         if tool is not None:
             try:
                 tool_obj = next(
-                    t for t in self.tools
+                    t
+                    for t in self.tools
                     if (isinstance(tool, Function) and t.fn == tool)
                     or (isinstance(tool, str) and (t.name or t.fn.name) == tool)
                 )
@@ -144,7 +135,9 @@ def _extract_bool_tool_arg(tool_calls: dict[str, Any], func_name: str, param_nam
 T = TypeVar('T')
-def _extract_arg(eval_fn: Callable[[Any], T], tool_calls: dict[str, Any], func_name: str, param_name: str) -> Optional[T]:
+def _extract_arg(
+    eval_fn: Callable[[Any], T], tool_calls: dict[str, Any], func_name: str, param_name: str
+) -> Optional[T]:
     if func_name in tool_calls:
         arguments = tool_calls[func_name]['args']
         if param_name in arguments:

pixeltable/func/udf.py CHANGED Viewed

@@ -1,16 +1,20 @@
 from __future__ import annotations
-from typing import Any, Callable, Optional, Sequence, overload
+import inspect
+from typing import TYPE_CHECKING, Any, Callable, Optional, Sequence, overload
 import pixeltable.exceptions as excs
 import pixeltable.type_system as ts
+from pixeltable import catalog
 from .callable_function import CallableFunction
-from .expr_template_function import ExprTemplateFunction, ExprTemplate
-from .function import Function
+from .expr_template_function import ExprTemplate, ExprTemplateFunction
 from .function_registry import FunctionRegistry
 from .globals import validate_symbol_path
-from .signature import Signature
+from .signature import Parameter, Signature
+if TYPE_CHECKING:
+    from pixeltable import exprs
 # Decorator invoked without parentheses: @pxt.udf
@@ -28,10 +32,17 @@ def udf(
     is_property: bool = False,
     resource_pool: Optional[str] = None,
     type_substitutions: Optional[Sequence[dict]] = None,
-    _force_stored: bool = False
+    _force_stored: bool = False,
 ) -> Callable[[Callable], CallableFunction]: ...
+# pxt.udf() called explicitly on a Table:
+@overload
+def udf(
+    table: catalog.Table, /, *, return_value: Any = None, description: Optional[str] = None
+) -> ExprTemplateFunction: ...
 def udf(*args, **kwargs):
     """A decorator to create a Function from a function definition.
@@ -41,13 +52,19 @@ def udf(*args, **kwargs):
         ...    return x + 1
     """
     if len(args) == 1 and len(kwargs) == 0 and callable(args[0]):
         # Decorator invoked without parentheses: @pxt.udf
         # Simply call make_function with defaults.
         return make_function(decorated_fn=args[0])
-    else:
+    elif len(args) == 1 and isinstance(args[0], catalog.Table):
+        # pxt.udf() called explicitly on a Table
+        return_value = kwargs.pop('return_value', None)
+        description = kwargs.pop('description', None)
+        if len(kwargs) > 0:
+            raise excs.Error(f'Invalid udf kwargs: {", ".join(kwargs.keys())}')
+        return from_table(args[0], return_value, description)
+    else:
         # Decorator schema invoked with parentheses: @pxt.udf(**kwargs)
         # Create a decorator for the specified schema.
         batch_size = kwargs.pop('batch_size', None)
@@ -71,7 +88,7 @@ def udf(*args, **kwargs):
                 is_property=is_property,
                 resource_pool=resource_pool,
                 type_substitutions=type_substitutions,
-                force_stored=force_stored
+                force_stored=force_stored,
             )
         return decorator
@@ -88,7 +105,7 @@ def make_function(
     resource_pool: Optional[str] = None,
     type_substitutions: Optional[Sequence[dict]] = None,
     function_name: Optional[str] = None,
-    force_stored: bool = False
+    force_stored: bool = False,
 ) -> CallableFunction:
     """
     Constructs a `CallableFunction` from the specified parameters.
@@ -129,7 +146,7 @@ def make_function(
             raise excs.Error(f'Cannot specify both `is_method` and `is_property` (in function `{function_name}`)')
         if is_property and len(sig.parameters) != 1:
             raise excs.Error(
-                f"`is_property=True` expects a UDF with exactly 1 parameter, but `{function_name}` has {len(sig.parameters)}"
+                f'`is_property=True` expects a UDF with exactly 1 parameter, but `{function_name}` has {len(sig.parameters)}'
             )
         if (is_method or is_property) and function_path is None:
             raise excs.Error('Stored functions cannot be declared using `is_method` or `is_property`')
@@ -164,7 +181,7 @@ def make_function(
         self_name=function_name,
         batch_size=batch_size,
         is_method=is_method,
-        is_property=is_property
+        is_property=is_property,
     )
     if resource_pool is not None:
         result.resource_pool(lambda: resource_pool)
@@ -177,12 +194,15 @@ def make_function(
     return result
 @overload
 def expr_udf(py_fn: Callable) -> ExprTemplateFunction: ...
 @overload
 def expr_udf(*, param_types: Optional[list[ts.ColumnType]] = None) -> Callable[[Callable], ExprTemplateFunction]: ...
 def expr_udf(*args: Any, **kwargs: Any) -> Any:
     def make_expr_template(py_fn: Callable, param_types: Optional[list[ts.ColumnType]]) -> ExprTemplateFunction:
         if py_fn.__module__ != '__main__' and py_fn.__name__.isidentifier():
@@ -197,6 +217,7 @@ def expr_udf(*args: Any, **kwargs: Any) -> Any:
         # construct Signature from the function signature
         sig = Signature.create(py_fn=py_fn, param_types=param_types, return_type=ts.InvalidType())
         import pixeltable.exprs as exprs
         var_exprs = [exprs.Variable(param.name, param.col_type) for param in sig.parameters.values()]
         # call the function with the parameter expressions to construct an Expr with parameters
         expr = py_fn(*var_exprs)
@@ -212,3 +233,77 @@ def expr_udf(*args: Any, **kwargs: Any) -> Any:
     else:
         assert len(args) == 0 and len(kwargs) == 1 and 'param_types' in kwargs
         return lambda py_fn: make_expr_template(py_fn, kwargs['param_types'])
+def from_table(
+    tbl: catalog.Table, return_value: Optional['exprs.Expr'], description: Optional[str]
+) -> ExprTemplateFunction:
+    """
+    Constructs an `ExprTemplateFunction` from a `Table`.
+    The constructed function will have one parameter for each data column in the table, which is optional (with
+    default None) if and only if its column type is nullable. The output of the function is a dict of the form
+    {
+        'data_col_1': Variable('data_col_1', col_type_1),
+        'data_col_2': Variable('data_col_2', col_type_2),
+        ...,
+        'computed_col_1': computed_expr_1,
+        'computed_col_2': computed_expr_2,
+        ...
+    }
+    where the computed expressions correspond to fully substituted expressions for the computed columns of the
+    table. In the substitution, ColumnRefs of data columns are replaced by Variable expressions, and ColumnRefs of
+    computed columns are replaced by the (previously constructed) expressions for those columns.
+    If an optional `return_value` is specified, then it is used as the return value of the function in place of
+    the default dict. The same substitutions will be applied to the `return_value` expression.
+    """
+    from pixeltable import exprs
+    ancestors = [tbl] + tbl._bases
+    ancestors.reverse()  # We must traverse the ancestors in order from base to derived
+    subst: dict[exprs.Expr, exprs.Expr] = {}
+    result_dict: dict[str, exprs.Expr] = {}
+    params: list[Parameter] = []
+    for t in ancestors:
+        for name, col in t._tbl_version.cols_by_name.items():
+            assert name not in result_dict, f'Column name is not unique: {name}'
+            if col.is_computed:
+                # Computed column. Apply any existing substitutions and add the new expression to the subst dict.
+                new_expr = col.value_expr.copy()
+                new_expr.substitute(subst)
+                subst[t[name]] = new_expr  # Substitute new_expr for ColumnRefs to this column
+                result_dict[name] = new_expr
+            else:
+                # Data column. Include it as a parameter and add a variable expression as the subst dict.
+                var = exprs.Variable(name, col.col_type)
+                subst[t[name]] = var  # Substitute var for ColumnRefs to this column
+                result_dict[name] = var
+                # Since this is a data column, it becomes a UDF parameter.
+                # If the column is nullable, then the parameter will have a default value of None.
+                default_value = None if col.col_type.nullable else inspect.Parameter.empty
+                param = Parameter(name, col.col_type, inspect._ParameterKind.POSITIONAL_OR_KEYWORD, default_value)
+                params.append(param)
+    if return_value is None:
+        return_value = exprs.InlineDict(result_dict)
+    else:
+        return_value = exprs.Expr.from_object(return_value)
+        return_value = return_value.copy().substitute(subst)
+    if description is None:
+        # Default description is the table comment
+        description = tbl._comment
+        if len(description) == 0:
+            description = f"UDF for table '{tbl._name}'"
+    # TODO: Use column comments as parameter descriptions, when we have them
+    argstring = '\n'.join(f'    {param.name}: of type `{param.col_type}`' for param in params)
+    docstring = f'{description}\n\nArgs:\n{argstring}'
+    template = ExprTemplate(return_value, Signature(return_value.col_type, params))
+    fn = ExprTemplateFunction([template], name=tbl._name)
+    fn.__doc__ = docstring
+    return fn

pixeltable/functions/__init__.py CHANGED Viewed

@@ -1,7 +1,26 @@
 from pixeltable.utils.code import local_public_names
-from . import (anthropic, audio, fireworks, gemini, huggingface, image, json, llama_cpp, math, mistralai, ollama,
-               openai, string, timestamp, together, video, vision, whisper)
+from . import (
+    anthropic,
+    audio,
+    fireworks,
+    gemini,
+    huggingface,
+    image,
+    json,
+    llama_cpp,
+    math,
+    mistralai,
+    ollama,
+    openai,
+    replicate,
+    string,
+    timestamp,
+    together,
+    video,
+    vision,
+    whisper,
+)
 from .globals import count, max, mean, min, sum
 __all__ = local_public_names(__name__, exclude=['globals']) + local_public_names(globals.__name__)

pixeltable/functions/anthropic.py CHANGED Viewed

@@ -8,7 +8,7 @@ the [Working with Anthropic](https://pixeltable.readme.io/docs/working-with-anth
 import datetime
 import json
 import logging
-from typing import TYPE_CHECKING, Any, Optional, TypeVar, Union, cast, Iterable
+from typing import TYPE_CHECKING, Any, Iterable, Optional, TypeVar, Union, cast
 import httpx
@@ -22,13 +22,16 @@ if TYPE_CHECKING:
 _logger = logging.getLogger('pixeltable')
 @env.register_client('anthropic')
 def _(api_key: str) -> 'anthropic.AsyncAnthropic':
     import anthropic
     return anthropic.AsyncAnthropic(
         api_key=api_key,
         # recommended to increase limits for async client to avoid connection errors
-        http_client = httpx.AsyncClient(limits=httpx.Limits(max_keepalive_connections=100, max_connections=500)))
+        http_client=httpx.AsyncClient(limits=httpx.Limits(max_keepalive_connections=100, max_connections=500)),
+    )
 def _anthropic_client() -> 'anthropic.AsyncAnthropic':
@@ -36,7 +39,6 @@ def _anthropic_client() -> 'anthropic.AsyncAnthropic':
 class AnthropicRateLimitsInfo(env.RateLimitsInfo):
     def __init__(self):
         super().__init__(self._get_request_resources)
@@ -80,6 +82,7 @@ async def messages(
     tools: Optional[list[dict]] = None,
     top_k: Optional[int] = None,
     top_p: Optional[float] = None,
+    timeout: Optional[float] = None,
 ) -> dict:
     """
     Create a Message.
@@ -87,6 +90,10 @@ async def messages(
     Equivalent to the Anthropic `messages` API endpoint.
     For additional details, see: <https://docs.anthropic.com/en/api/messages>
+    Request throttling:
+    Uses the rate limit-related headers returned by the API to throttle requests adaptively, based on available
+    request and token capacity. No configuration is necessary.
     __Requirements:__
     - `pip install anthropic`
@@ -105,7 +112,7 @@ async def messages(
         to an existing Pixeltable column `tbl.prompt` of the table `tbl`:
         >>> msgs = [{'role': 'user', 'content': tbl.prompt}]
-        ... tbl['response'] = messages(msgs, model='claude-3-haiku-20240307')
+        ... tbl.add_computed_column(response= messages(msgs, model='claude-3-haiku-20240307'))
     """
     # it doesn't look like count_tokens() actually exists in the current version of the library
@@ -158,7 +165,7 @@ async def messages(
         tool_choice=_opt(cast(Any, tool_choice_)),
         top_k=_opt(top_k),
         top_p=_opt(top_p),
-        timeout=10,
+        timeout=_opt(timeout),
     )
     requests_limit_str = result.headers.get('anthropic-ratelimit-requests-limit')
@@ -186,7 +193,8 @@ async def messages(
     rate_limits_info.record(
         requests=(requests_limit, requests_remaining, requests_reset),
         input_tokens=(input_tokens_limit, input_tokens_remaining, input_tokens_reset),
-        output_tokens=(output_tokens_limit, output_tokens_remaining, output_tokens_reset))
+        output_tokens=(output_tokens_limit, output_tokens_remaining, output_tokens_reset),
+    )
     result_dict = json.loads(result.text)
     return result_dict
@@ -206,12 +214,7 @@ def invoke_tools(tools: Tools, response: exprs.Expr) -> exprs.InlineDict:
 def _anthropic_response_to_pxt_tool_calls(response: dict) -> Optional[dict]:
     anthropic_tool_calls = [r for r in response['content'] if r['type'] == 'tool_use']
     if len(anthropic_tool_calls) > 0:
-        return {
-            tool_call['name']: {
-                'args': tool_call['input']
-            }
-            for tool_call in anthropic_tool_calls
-        }
+        return {tool_call['name']: {'args': tool_call['input']} for tool_call in anthropic_tool_calls}
     return None
@@ -220,6 +223,7 @@ _T = TypeVar('_T')
 def _opt(arg: _T) -> Union[_T, 'anthropic.NotGiven']:
     import anthropic
     return arg if arg is not None else anthropic.NOT_GIVEN

pixeltable/functions/fireworks.py CHANGED Viewed

@@ -5,7 +5,7 @@ first `pip install fireworks-ai` and configure your Fireworks AI credentials, as
 the [Working with Fireworks](https://pixeltable.readme.io/docs/working-with-fireworks) tutorial.
 """
-from typing import Optional, TYPE_CHECKING
+from typing import TYPE_CHECKING, Optional
 import pixeltable as pxt
 from pixeltable import env
@@ -26,8 +26,8 @@ def _fireworks_client() -> 'fireworks.client.Fireworks':
     return env.Env.get().get_client('fireworks')
-@pxt.udf
-def chat_completions(
+@pxt.udf(resource_pool='request-rate:fireworks')
+async def chat_completions(
     messages: list[dict[str, str]],
     *,
     model: str,
@@ -35,6 +35,7 @@ def chat_completions(
     top_k: Optional[int] = None,
     top_p: Optional[float] = None,
     temperature: Optional[float] = None,
+    request_timeout: Optional[int] = None,
 ) -> dict:
     """
     Creates a model response for the given chat conversation.
@@ -42,6 +43,10 @@ def chat_completions(
     Equivalent to the Fireworks AI `chat/completions` API endpoint.
     For additional details, see: [https://docs.fireworks.ai/api-reference/post-chatcompletions](https://docs.fireworks.ai/api-reference/post-chatcompletions)
+    Request throttling:
+    Applies the rate limit set in the config (section `fireworks`, key `rate_limit`). If no rate
+    limit is configured, uses a default of 600 RPM.
     __Requirements:__
     - `pip install fireworks-ai`
@@ -60,11 +65,64 @@ def chat_completions(
         to an existing Pixeltable column `tbl.prompt` of the table `tbl`:
         >>> messages = [{'role': 'user', 'content': tbl.prompt}]
-        ... tbl['response'] = chat_completions(messages, model='accounts/fireworks/models/mixtral-8x22b-instruct')
+        ... tbl.add_computed_column(response=chat_completions(messages, model='accounts/fireworks/models/mixtral-8x22b-instruct'))
     """
     kwargs = {'max_tokens': max_tokens, 'top_k': top_k, 'top_p': top_p, 'temperature': temperature}
     kwargs_not_none = {k: v for k, v in kwargs.items() if v is not None}
-    return _fireworks_client().chat.completions.create(model=model, messages=messages, **kwargs_not_none).dict()
+    # for debugging purposes:
+    # res_sync = _fireworks_client().chat.completions.create(model=model, messages=messages, **kwargs_not_none)
+    # res_sync_dict = res_sync.dict()
+    if request_timeout is None:
+        request_timeout = env.Env.get().config.get_int_value('timeout', section='fireworks') or 600
+    # TODO: this timeout doesn't really work, I think it only applies to returning the stream, but not to the timing
+    # of the chunks; addressing this would require a timeout for the task running this udf
+    stream = _fireworks_client().chat.completions.acreate(
+        model=model, messages=messages, request_timeout=request_timeout, **kwargs_not_none
+    )
+    chunks = []
+    async for chunk in stream:
+        chunks.append(chunk)
+    res = {
+        'id': chunks[0].id,
+        'object': 'chat.completion',
+        'created': chunks[0].created,
+        'model': chunks[0].model,
+        'choices': [
+            {
+                'index': 0,
+                'message': {
+                    'role': None,
+                    'content': '',
+                    'tool_calls': None,
+                    'tool_call_id': None,
+                    'function': None,
+                    'name': None,
+                },
+                'finish_reason': None,
+                'logprobs': None,
+                'raw_output': None,
+            }
+        ],
+        'usage': {},
+    }
+    for chunk in chunks:
+        d = chunk.dict()
+        if 'usage' in d and d['usage'] is not None:
+            res['usage'] = d['usage']
+        if chunk.choices[0].finish_reason is not None:
+            res['choices'][0]['finish_reason'] = chunk.choices[0].finish_reason
+        if chunk.choices[0].delta.role is not None:
+            res['choices'][0]['message']['role'] = chunk.choices[0].delta.role
+        if chunk.choices[0].delta.content is not None:
+            res['choices'][0]['message']['content'] += chunk.choices[0].delta.content
+        if chunk.choices[0].delta.tool_calls is not None:
+            res['choices'][0]['message']['tool_calls'] = chunk.choices[0].delta.tool_calls
+        if chunk.choices[0].delta.function is not None:
+            res['choices'][0]['message']['function'] = chunk.choices[0].delta.function
+    return res
 __all__ = local_public_names(__name__)

pixeltable/functions/gemini.py CHANGED Viewed

@@ -14,6 +14,7 @@ from pixeltable import env
 @env.register_client('gemini')
 def _(api_key: str) -> None:
     import google.generativeai as genai
     genai.configure(api_key=api_key)
@@ -21,8 +22,8 @@ def _ensure_loaded() -> None:
     env.Env.get().get_client('gemini')
-@pxt.udf
-def generate_content(
+@pxt.udf(resource_pool='request-rate:gemini')
+async def generate_content(
     contents: str,
     *,
     model_name: str,
@@ -41,6 +42,10 @@ def generate_content(
     Generate content from the specified model. For additional details, see:
     <https://ai.google.dev/gemini-api/docs>
+    Request throttling:
+    Applies the rate limit set in the config (section `gemini`, key `rate_limit`). If no rate
+    limit is configured, uses a default of 600 RPM.
     __Requirements:__
     - `pip install google-generativeai`
@@ -77,5 +82,10 @@ def generate_content(
         presence_penalty=presence_penalty,
         frequency_penalty=frequency_penalty,
     )
-    response = model.generate_content(contents, generation_config=gc)
+    response = await model.generate_content_async(contents, generation_config=gc)
     return response.to_dict()
+@generate_content.resource_pool
+def _(model_name: str) -> str:
+    return f'request-rate:gemini:{model_name}'

pixeltable/functions/globals.py CHANGED Viewed

@@ -1,8 +1,9 @@
 import builtins
-from typing import _GenericAlias  # type: ignore[attr-defined]
-from typing import Optional, Union
 import typing
+from typing import _GenericAlias  # type: ignore[attr-defined]  # isort: skip
+from typing import Optional, Union
 import sqlalchemy as sql
 import pixeltable.func as func
@@ -23,6 +24,7 @@ T = typing.TypeVar('T')
 @func.uda(allows_window=True, type_substitutions=({T: Optional[int]}, {T: Optional[float]}))  # type: ignore[misc]
 class sum(func.Aggregator, typing.Generic[T]):
     """Sums the selected integers or floats."""
     def __init__(self):
         self.sum: T = None
@@ -52,8 +54,18 @@ def _(val: sql.ColumnElement) -> Optional[sql.ColumnElement]:
     # TODO: should we have an "Any" type that can be used here?
     type_substitutions=tuple(
         {T: Optional[t]}  # type: ignore[misc]
-        for t in (ts.String, ts.Int, ts.Float, ts.Bool, ts.Timestamp,
-                  ts.Json, ts.Image, ts.Video, ts.Audio, ts.Document)
+        for t in (
+            ts.String,
+            ts.Int,
+            ts.Float,
+            ts.Bool,
+            ts.Timestamp,
+            ts.Json,
+            ts.Image,
+            ts.Video,
+            ts.Audio,
+            ts.Document,
+        )
     ),
 )
 class count(func.Aggregator, typing.Generic[T]):
@@ -75,7 +87,7 @@ def _(val: sql.ColumnElement) -> Optional[sql.ColumnElement]:
 @func.uda(
     allows_window=True,
-    type_substitutions=tuple({T: Optional[t]} for t in (str, int, float, bool, ts.Timestamp))  # type: ignore[misc]
+    type_substitutions=tuple({T: Optional[t]} for t in (str, int, float, bool, ts.Timestamp)),  # type: ignore[misc]
 )
 class min(func.Aggregator, typing.Generic[T]):
     def __init__(self):
@@ -105,7 +117,7 @@ def _(val: sql.ColumnElement) -> Optional[sql.ColumnElement]:
 @func.uda(
     allows_window=True,
-    type_substitutions=tuple({T: Optional[t]} for t in (str, int, float, bool, ts.Timestamp))  # type: ignore[misc]
+    type_substitutions=tuple({T: Optional[t]} for t in (str, int, float, bool, ts.Timestamp)),  # type: ignore[misc]
 )
 class max(func.Aggregator, typing.Generic[T]):
     def __init__(self):

pixeltable 0.3.2__py3-none-any.whl → 0.3.3__py3-none-any.whl

Potentially problematic release.

pixeltable 0.3.2py3-none-any.whl → 0.3.3py3-none-any.whl