pixeltable 0.3.1__py3-none-any.whl → 0.3.3__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of pixeltable might be problematic. Click here for more details.

Files changed (147) hide show
  1. pixeltable/__init__.py +64 -11
  2. pixeltable/__version__.py +2 -2
  3. pixeltable/catalog/__init__.py +1 -1
  4. pixeltable/catalog/catalog.py +50 -27
  5. pixeltable/catalog/column.py +27 -11
  6. pixeltable/catalog/dir.py +6 -4
  7. pixeltable/catalog/globals.py +8 -1
  8. pixeltable/catalog/insertable_table.py +25 -15
  9. pixeltable/catalog/named_function.py +10 -6
  10. pixeltable/catalog/path.py +3 -2
  11. pixeltable/catalog/path_dict.py +8 -6
  12. pixeltable/catalog/schema_object.py +2 -1
  13. pixeltable/catalog/table.py +123 -103
  14. pixeltable/catalog/table_version.py +292 -143
  15. pixeltable/catalog/table_version_path.py +8 -5
  16. pixeltable/catalog/view.py +68 -27
  17. pixeltable/dataframe.py +102 -72
  18. pixeltable/env.py +39 -23
  19. pixeltable/exec/__init__.py +2 -2
  20. pixeltable/exec/aggregation_node.py +10 -4
  21. pixeltable/exec/cache_prefetch_node.py +5 -3
  22. pixeltable/exec/component_iteration_node.py +9 -8
  23. pixeltable/exec/data_row_batch.py +21 -10
  24. pixeltable/exec/exec_context.py +10 -3
  25. pixeltable/exec/exec_node.py +23 -12
  26. pixeltable/exec/expr_eval/evaluators.py +18 -17
  27. pixeltable/exec/expr_eval/expr_eval_node.py +29 -16
  28. pixeltable/exec/expr_eval/globals.py +33 -11
  29. pixeltable/exec/expr_eval/row_buffer.py +5 -6
  30. pixeltable/exec/expr_eval/schedulers.py +170 -42
  31. pixeltable/exec/in_memory_data_node.py +8 -7
  32. pixeltable/exec/row_update_node.py +15 -5
  33. pixeltable/exec/sql_node.py +56 -27
  34. pixeltable/exprs/__init__.py +2 -2
  35. pixeltable/exprs/arithmetic_expr.py +57 -26
  36. pixeltable/exprs/array_slice.py +1 -1
  37. pixeltable/exprs/column_property_ref.py +2 -1
  38. pixeltable/exprs/column_ref.py +20 -15
  39. pixeltable/exprs/comparison.py +6 -2
  40. pixeltable/exprs/compound_predicate.py +1 -3
  41. pixeltable/exprs/data_row.py +2 -2
  42. pixeltable/exprs/expr.py +101 -72
  43. pixeltable/exprs/expr_dict.py +2 -1
  44. pixeltable/exprs/expr_set.py +3 -1
  45. pixeltable/exprs/function_call.py +39 -41
  46. pixeltable/exprs/globals.py +1 -0
  47. pixeltable/exprs/in_predicate.py +2 -2
  48. pixeltable/exprs/inline_expr.py +20 -17
  49. pixeltable/exprs/json_mapper.py +4 -2
  50. pixeltable/exprs/json_path.py +12 -18
  51. pixeltable/exprs/literal.py +5 -9
  52. pixeltable/exprs/method_ref.py +1 -0
  53. pixeltable/exprs/object_ref.py +1 -1
  54. pixeltable/exprs/row_builder.py +31 -16
  55. pixeltable/exprs/rowid_ref.py +14 -5
  56. pixeltable/exprs/similarity_expr.py +11 -6
  57. pixeltable/exprs/sql_element_cache.py +1 -1
  58. pixeltable/exprs/type_cast.py +24 -9
  59. pixeltable/ext/__init__.py +1 -0
  60. pixeltable/ext/functions/__init__.py +1 -0
  61. pixeltable/ext/functions/whisperx.py +2 -2
  62. pixeltable/ext/functions/yolox.py +11 -11
  63. pixeltable/func/aggregate_function.py +17 -13
  64. pixeltable/func/callable_function.py +6 -6
  65. pixeltable/func/expr_template_function.py +15 -14
  66. pixeltable/func/function.py +16 -16
  67. pixeltable/func/function_registry.py +11 -8
  68. pixeltable/func/globals.py +4 -2
  69. pixeltable/func/query_template_function.py +12 -13
  70. pixeltable/func/signature.py +18 -9
  71. pixeltable/func/tools.py +10 -17
  72. pixeltable/func/udf.py +106 -11
  73. pixeltable/functions/__init__.py +21 -2
  74. pixeltable/functions/anthropic.py +21 -15
  75. pixeltable/functions/fireworks.py +63 -5
  76. pixeltable/functions/gemini.py +13 -3
  77. pixeltable/functions/globals.py +18 -6
  78. pixeltable/functions/huggingface.py +20 -38
  79. pixeltable/functions/image.py +7 -3
  80. pixeltable/functions/json.py +1 -0
  81. pixeltable/functions/llama_cpp.py +1 -4
  82. pixeltable/functions/mistralai.py +31 -20
  83. pixeltable/functions/ollama.py +4 -18
  84. pixeltable/functions/openai.py +214 -109
  85. pixeltable/functions/replicate.py +11 -10
  86. pixeltable/functions/string.py +70 -7
  87. pixeltable/functions/timestamp.py +21 -8
  88. pixeltable/functions/together.py +66 -52
  89. pixeltable/functions/video.py +1 -0
  90. pixeltable/functions/vision.py +14 -11
  91. pixeltable/functions/whisper.py +2 -1
  92. pixeltable/globals.py +61 -28
  93. pixeltable/index/__init__.py +1 -1
  94. pixeltable/index/btree.py +5 -3
  95. pixeltable/index/embedding_index.py +15 -14
  96. pixeltable/io/__init__.py +1 -1
  97. pixeltable/io/external_store.py +30 -25
  98. pixeltable/io/fiftyone.py +6 -14
  99. pixeltable/io/globals.py +33 -27
  100. pixeltable/io/hf_datasets.py +3 -2
  101. pixeltable/io/label_studio.py +80 -71
  102. pixeltable/io/pandas.py +33 -9
  103. pixeltable/io/parquet.py +10 -13
  104. pixeltable/iterators/__init__.py +1 -0
  105. pixeltable/iterators/audio.py +205 -0
  106. pixeltable/iterators/document.py +19 -8
  107. pixeltable/iterators/image.py +6 -24
  108. pixeltable/iterators/string.py +3 -6
  109. pixeltable/iterators/video.py +1 -7
  110. pixeltable/metadata/__init__.py +9 -2
  111. pixeltable/metadata/converters/convert_10.py +2 -2
  112. pixeltable/metadata/converters/convert_15.py +1 -5
  113. pixeltable/metadata/converters/convert_16.py +2 -4
  114. pixeltable/metadata/converters/convert_17.py +2 -4
  115. pixeltable/metadata/converters/convert_18.py +2 -4
  116. pixeltable/metadata/converters/convert_19.py +2 -5
  117. pixeltable/metadata/converters/convert_20.py +1 -4
  118. pixeltable/metadata/converters/convert_21.py +4 -6
  119. pixeltable/metadata/converters/convert_22.py +1 -0
  120. pixeltable/metadata/converters/convert_23.py +5 -5
  121. pixeltable/metadata/converters/convert_24.py +12 -13
  122. pixeltable/metadata/converters/convert_26.py +23 -0
  123. pixeltable/metadata/converters/util.py +3 -4
  124. pixeltable/metadata/notes.py +1 -0
  125. pixeltable/metadata/schema.py +13 -2
  126. pixeltable/plan.py +173 -98
  127. pixeltable/store.py +42 -26
  128. pixeltable/type_system.py +130 -85
  129. pixeltable/utils/arrow.py +1 -7
  130. pixeltable/utils/coco.py +16 -17
  131. pixeltable/utils/code.py +1 -1
  132. pixeltable/utils/console_output.py +44 -0
  133. pixeltable/utils/description_helper.py +7 -7
  134. pixeltable/utils/documents.py +3 -1
  135. pixeltable/utils/filecache.py +13 -8
  136. pixeltable/utils/http_server.py +9 -8
  137. pixeltable/utils/media_store.py +2 -1
  138. pixeltable/utils/pytorch.py +11 -14
  139. pixeltable/utils/s3.py +1 -0
  140. pixeltable/utils/sql.py +1 -0
  141. pixeltable/utils/transactional_directory.py +2 -2
  142. {pixeltable-0.3.1.dist-info → pixeltable-0.3.3.dist-info}/METADATA +7 -8
  143. pixeltable-0.3.3.dist-info/RECORD +163 -0
  144. pixeltable-0.3.1.dist-info/RECORD +0 -160
  145. {pixeltable-0.3.1.dist-info → pixeltable-0.3.3.dist-info}/LICENSE +0 -0
  146. {pixeltable-0.3.1.dist-info → pixeltable-0.3.3.dist-info}/WHEEL +0 -0
  147. {pixeltable-0.3.1.dist-info → pixeltable-0.3.3.dist-info}/entry_points.txt +0 -0
pixeltable/func/tools.py CHANGED
@@ -21,6 +21,7 @@ if TYPE_CHECKING:
21
21
  # `Function`, which is not natively JSON-serializable; Pydantic provides a way of customizing its default
22
22
  # serialization behavior, whereas dataclasses do not.)
23
23
 
24
+
24
25
  class Tool(pydantic.BaseModel):
25
26
  # Allow arbitrary types so that we can include a Pixeltable function in the schema.
26
27
  # We will implement a model_serializer to ensure the Tool model can be serialized.
@@ -41,24 +42,16 @@ class Tool(pydantic.BaseModel):
41
42
  'description': self.description or self.fn._docstring(),
42
43
  'parameters': {
43
44
  'type': 'object',
44
- 'properties': {
45
- param.name: param.col_type._to_json_schema()
46
- for param in self.parameters.values()
47
- }
45
+ 'properties': {param.name: param.col_type._to_json_schema() for param in self.parameters.values()},
48
46
  },
49
- 'required': [
50
- param.name for param in self.parameters.values() if not param.col_type.nullable
51
- ],
47
+ 'required': [param.name for param in self.parameters.values() if not param.col_type.nullable],
52
48
  'additionalProperties': False, # TODO Handle kwargs?
53
49
  }
54
50
 
55
51
  # `tool_calls` must be in standardized tool invocation format:
56
52
  # {tool_name: {'args': {name1: value1, name2: value2, ...}}, ...}
57
53
  def invoke(self, tool_calls: 'exprs.Expr') -> 'exprs.FunctionCall':
58
- kwargs = {
59
- param.name: self.__extract_tool_arg(param, tool_calls)
60
- for param in self.parameters.values()
61
- }
54
+ kwargs = {param.name: self.__extract_tool_arg(param, tool_calls) for param in self.parameters.values()}
62
55
  return self.fn(**kwargs)
63
56
 
64
57
  def __extract_tool_arg(self, param: Parameter, tool_calls: 'exprs.Expr') -> 'exprs.Expr':
@@ -93,10 +86,7 @@ class Tools(pydantic.BaseModel):
93
86
  def _invoke(self, tool_calls: 'exprs.Expr') -> 'exprs.InlineDict':
94
87
  from pixeltable import exprs
95
88
 
96
- return exprs.InlineDict({
97
- tool.name or tool.fn.name: tool.invoke(tool_calls)
98
- for tool in self.tools
99
- })
89
+ return exprs.InlineDict({tool.name or tool.fn.name: tool.invoke(tool_calls) for tool in self.tools})
100
90
 
101
91
  def choice(
102
92
  self,
@@ -111,7 +101,8 @@ class Tools(pydantic.BaseModel):
111
101
  if tool is not None:
112
102
  try:
113
103
  tool_obj = next(
114
- t for t in self.tools
104
+ t
105
+ for t in self.tools
115
106
  if (isinstance(tool, Function) and t.fn == tool)
116
107
  or (isinstance(tool, str) and (t.name or t.fn.name) == tool)
117
108
  )
@@ -144,7 +135,9 @@ def _extract_bool_tool_arg(tool_calls: dict[str, Any], func_name: str, param_nam
144
135
  T = TypeVar('T')
145
136
 
146
137
 
147
- def _extract_arg(eval_fn: Callable[[Any], T], tool_calls: dict[str, Any], func_name: str, param_name: str) -> Optional[T]:
138
+ def _extract_arg(
139
+ eval_fn: Callable[[Any], T], tool_calls: dict[str, Any], func_name: str, param_name: str
140
+ ) -> Optional[T]:
148
141
  if func_name in tool_calls:
149
142
  arguments = tool_calls[func_name]['args']
150
143
  if param_name in arguments:
pixeltable/func/udf.py CHANGED
@@ -1,16 +1,20 @@
1
1
  from __future__ import annotations
2
2
 
3
- from typing import Any, Callable, Optional, Sequence, overload
3
+ import inspect
4
+ from typing import TYPE_CHECKING, Any, Callable, Optional, Sequence, overload
4
5
 
5
6
  import pixeltable.exceptions as excs
6
7
  import pixeltable.type_system as ts
8
+ from pixeltable import catalog
7
9
 
8
10
  from .callable_function import CallableFunction
9
- from .expr_template_function import ExprTemplateFunction, ExprTemplate
10
- from .function import Function
11
+ from .expr_template_function import ExprTemplate, ExprTemplateFunction
11
12
  from .function_registry import FunctionRegistry
12
13
  from .globals import validate_symbol_path
13
- from .signature import Signature
14
+ from .signature import Parameter, Signature
15
+
16
+ if TYPE_CHECKING:
17
+ from pixeltable import exprs
14
18
 
15
19
 
16
20
  # Decorator invoked without parentheses: @pxt.udf
@@ -28,10 +32,17 @@ def udf(
28
32
  is_property: bool = False,
29
33
  resource_pool: Optional[str] = None,
30
34
  type_substitutions: Optional[Sequence[dict]] = None,
31
- _force_stored: bool = False
35
+ _force_stored: bool = False,
32
36
  ) -> Callable[[Callable], CallableFunction]: ...
33
37
 
34
38
 
39
+ # pxt.udf() called explicitly on a Table:
40
+ @overload
41
+ def udf(
42
+ table: catalog.Table, /, *, return_value: Any = None, description: Optional[str] = None
43
+ ) -> ExprTemplateFunction: ...
44
+
45
+
35
46
  def udf(*args, **kwargs):
36
47
  """A decorator to create a Function from a function definition.
37
48
 
@@ -41,13 +52,19 @@ def udf(*args, **kwargs):
41
52
  ... return x + 1
42
53
  """
43
54
  if len(args) == 1 and len(kwargs) == 0 and callable(args[0]):
44
-
45
55
  # Decorator invoked without parentheses: @pxt.udf
46
56
  # Simply call make_function with defaults.
47
57
  return make_function(decorated_fn=args[0])
48
58
 
49
- else:
59
+ elif len(args) == 1 and isinstance(args[0], catalog.Table):
60
+ # pxt.udf() called explicitly on a Table
61
+ return_value = kwargs.pop('return_value', None)
62
+ description = kwargs.pop('description', None)
63
+ if len(kwargs) > 0:
64
+ raise excs.Error(f'Invalid udf kwargs: {", ".join(kwargs.keys())}')
65
+ return from_table(args[0], return_value, description)
50
66
 
67
+ else:
51
68
  # Decorator schema invoked with parentheses: @pxt.udf(**kwargs)
52
69
  # Create a decorator for the specified schema.
53
70
  batch_size = kwargs.pop('batch_size', None)
@@ -71,7 +88,7 @@ def udf(*args, **kwargs):
71
88
  is_property=is_property,
72
89
  resource_pool=resource_pool,
73
90
  type_substitutions=type_substitutions,
74
- force_stored=force_stored
91
+ force_stored=force_stored,
75
92
  )
76
93
 
77
94
  return decorator
@@ -88,7 +105,7 @@ def make_function(
88
105
  resource_pool: Optional[str] = None,
89
106
  type_substitutions: Optional[Sequence[dict]] = None,
90
107
  function_name: Optional[str] = None,
91
- force_stored: bool = False
108
+ force_stored: bool = False,
92
109
  ) -> CallableFunction:
93
110
  """
94
111
  Constructs a `CallableFunction` from the specified parameters.
@@ -129,7 +146,7 @@ def make_function(
129
146
  raise excs.Error(f'Cannot specify both `is_method` and `is_property` (in function `{function_name}`)')
130
147
  if is_property and len(sig.parameters) != 1:
131
148
  raise excs.Error(
132
- f"`is_property=True` expects a UDF with exactly 1 parameter, but `{function_name}` has {len(sig.parameters)}"
149
+ f'`is_property=True` expects a UDF with exactly 1 parameter, but `{function_name}` has {len(sig.parameters)}'
133
150
  )
134
151
  if (is_method or is_property) and function_path is None:
135
152
  raise excs.Error('Stored functions cannot be declared using `is_method` or `is_property`')
@@ -164,7 +181,7 @@ def make_function(
164
181
  self_name=function_name,
165
182
  batch_size=batch_size,
166
183
  is_method=is_method,
167
- is_property=is_property
184
+ is_property=is_property,
168
185
  )
169
186
  if resource_pool is not None:
170
187
  result.resource_pool(lambda: resource_pool)
@@ -177,12 +194,15 @@ def make_function(
177
194
 
178
195
  return result
179
196
 
197
+
180
198
  @overload
181
199
  def expr_udf(py_fn: Callable) -> ExprTemplateFunction: ...
182
200
 
201
+
183
202
  @overload
184
203
  def expr_udf(*, param_types: Optional[list[ts.ColumnType]] = None) -> Callable[[Callable], ExprTemplateFunction]: ...
185
204
 
205
+
186
206
  def expr_udf(*args: Any, **kwargs: Any) -> Any:
187
207
  def make_expr_template(py_fn: Callable, param_types: Optional[list[ts.ColumnType]]) -> ExprTemplateFunction:
188
208
  if py_fn.__module__ != '__main__' and py_fn.__name__.isidentifier():
@@ -197,6 +217,7 @@ def expr_udf(*args: Any, **kwargs: Any) -> Any:
197
217
  # construct Signature from the function signature
198
218
  sig = Signature.create(py_fn=py_fn, param_types=param_types, return_type=ts.InvalidType())
199
219
  import pixeltable.exprs as exprs
220
+
200
221
  var_exprs = [exprs.Variable(param.name, param.col_type) for param in sig.parameters.values()]
201
222
  # call the function with the parameter expressions to construct an Expr with parameters
202
223
  expr = py_fn(*var_exprs)
@@ -212,3 +233,77 @@ def expr_udf(*args: Any, **kwargs: Any) -> Any:
212
233
  else:
213
234
  assert len(args) == 0 and len(kwargs) == 1 and 'param_types' in kwargs
214
235
  return lambda py_fn: make_expr_template(py_fn, kwargs['param_types'])
236
+
237
+
238
+ def from_table(
239
+ tbl: catalog.Table, return_value: Optional['exprs.Expr'], description: Optional[str]
240
+ ) -> ExprTemplateFunction:
241
+ """
242
+ Constructs an `ExprTemplateFunction` from a `Table`.
243
+
244
+ The constructed function will have one parameter for each data column in the table, which is optional (with
245
+ default None) if and only if its column type is nullable. The output of the function is a dict of the form
246
+ {
247
+ 'data_col_1': Variable('data_col_1', col_type_1),
248
+ 'data_col_2': Variable('data_col_2', col_type_2),
249
+ ...,
250
+ 'computed_col_1': computed_expr_1,
251
+ 'computed_col_2': computed_expr_2,
252
+ ...
253
+ }
254
+ where the computed expressions correspond to fully substituted expressions for the computed columns of the
255
+ table. In the substitution, ColumnRefs of data columns are replaced by Variable expressions, and ColumnRefs of
256
+ computed columns are replaced by the (previously constructed) expressions for those columns.
257
+
258
+ If an optional `return_value` is specified, then it is used as the return value of the function in place of
259
+ the default dict. The same substitutions will be applied to the `return_value` expression.
260
+ """
261
+ from pixeltable import exprs
262
+
263
+ ancestors = [tbl] + tbl._bases
264
+ ancestors.reverse() # We must traverse the ancestors in order from base to derived
265
+
266
+ subst: dict[exprs.Expr, exprs.Expr] = {}
267
+ result_dict: dict[str, exprs.Expr] = {}
268
+ params: list[Parameter] = []
269
+
270
+ for t in ancestors:
271
+ for name, col in t._tbl_version.cols_by_name.items():
272
+ assert name not in result_dict, f'Column name is not unique: {name}'
273
+ if col.is_computed:
274
+ # Computed column. Apply any existing substitutions and add the new expression to the subst dict.
275
+ new_expr = col.value_expr.copy()
276
+ new_expr.substitute(subst)
277
+ subst[t[name]] = new_expr # Substitute new_expr for ColumnRefs to this column
278
+ result_dict[name] = new_expr
279
+ else:
280
+ # Data column. Include it as a parameter and add a variable expression as the subst dict.
281
+ var = exprs.Variable(name, col.col_type)
282
+ subst[t[name]] = var # Substitute var for ColumnRefs to this column
283
+ result_dict[name] = var
284
+ # Since this is a data column, it becomes a UDF parameter.
285
+ # If the column is nullable, then the parameter will have a default value of None.
286
+ default_value = None if col.col_type.nullable else inspect.Parameter.empty
287
+ param = Parameter(name, col.col_type, inspect._ParameterKind.POSITIONAL_OR_KEYWORD, default_value)
288
+ params.append(param)
289
+
290
+ if return_value is None:
291
+ return_value = exprs.InlineDict(result_dict)
292
+ else:
293
+ return_value = exprs.Expr.from_object(return_value)
294
+ return_value = return_value.copy().substitute(subst)
295
+
296
+ if description is None:
297
+ # Default description is the table comment
298
+ description = tbl._comment
299
+ if len(description) == 0:
300
+ description = f"UDF for table '{tbl._name}'"
301
+
302
+ # TODO: Use column comments as parameter descriptions, when we have them
303
+ argstring = '\n'.join(f' {param.name}: of type `{param.col_type}`' for param in params)
304
+ docstring = f'{description}\n\nArgs:\n{argstring}'
305
+
306
+ template = ExprTemplate(return_value, Signature(return_value.col_type, params))
307
+ fn = ExprTemplateFunction([template], name=tbl._name)
308
+ fn.__doc__ = docstring
309
+ return fn
@@ -1,7 +1,26 @@
1
1
  from pixeltable.utils.code import local_public_names
2
2
 
3
- from . import (anthropic, audio, fireworks, gemini, huggingface, image, json, llama_cpp, math, mistralai, ollama,
4
- openai, string, timestamp, together, video, vision, whisper)
3
+ from . import (
4
+ anthropic,
5
+ audio,
6
+ fireworks,
7
+ gemini,
8
+ huggingface,
9
+ image,
10
+ json,
11
+ llama_cpp,
12
+ math,
13
+ mistralai,
14
+ ollama,
15
+ openai,
16
+ replicate,
17
+ string,
18
+ timestamp,
19
+ together,
20
+ video,
21
+ vision,
22
+ whisper,
23
+ )
5
24
  from .globals import count, max, mean, min, sum
6
25
 
7
26
  __all__ = local_public_names(__name__, exclude=['globals']) + local_public_names(globals.__name__)
@@ -8,7 +8,7 @@ the [Working with Anthropic](https://pixeltable.readme.io/docs/working-with-anth
8
8
  import datetime
9
9
  import json
10
10
  import logging
11
- from typing import TYPE_CHECKING, Any, Optional, TypeVar, Union, cast, Iterable
11
+ from typing import TYPE_CHECKING, Any, Iterable, Optional, TypeVar, Union, cast
12
12
 
13
13
  import httpx
14
14
 
@@ -22,13 +22,16 @@ if TYPE_CHECKING:
22
22
 
23
23
  _logger = logging.getLogger('pixeltable')
24
24
 
25
+
25
26
  @env.register_client('anthropic')
26
27
  def _(api_key: str) -> 'anthropic.AsyncAnthropic':
27
28
  import anthropic
29
+
28
30
  return anthropic.AsyncAnthropic(
29
31
  api_key=api_key,
30
32
  # recommended to increase limits for async client to avoid connection errors
31
- http_client = httpx.AsyncClient(limits=httpx.Limits(max_keepalive_connections=100, max_connections=500)))
33
+ http_client=httpx.AsyncClient(limits=httpx.Limits(max_keepalive_connections=100, max_connections=500)),
34
+ )
32
35
 
33
36
 
34
37
  def _anthropic_client() -> 'anthropic.AsyncAnthropic':
@@ -36,7 +39,6 @@ def _anthropic_client() -> 'anthropic.AsyncAnthropic':
36
39
 
37
40
 
38
41
  class AnthropicRateLimitsInfo(env.RateLimitsInfo):
39
-
40
42
  def __init__(self):
41
43
  super().__init__(self._get_request_resources)
42
44
 
@@ -80,6 +82,7 @@ async def messages(
80
82
  tools: Optional[list[dict]] = None,
81
83
  top_k: Optional[int] = None,
82
84
  top_p: Optional[float] = None,
85
+ timeout: Optional[float] = None,
83
86
  ) -> dict:
84
87
  """
85
88
  Create a Message.
@@ -87,6 +90,10 @@ async def messages(
87
90
  Equivalent to the Anthropic `messages` API endpoint.
88
91
  For additional details, see: <https://docs.anthropic.com/en/api/messages>
89
92
 
93
+ Request throttling:
94
+ Uses the rate limit-related headers returned by the API to throttle requests adaptively, based on available
95
+ request and token capacity. No configuration is necessary.
96
+
90
97
  __Requirements:__
91
98
 
92
99
  - `pip install anthropic`
@@ -105,7 +112,7 @@ async def messages(
105
112
  to an existing Pixeltable column `tbl.prompt` of the table `tbl`:
106
113
 
107
114
  >>> msgs = [{'role': 'user', 'content': tbl.prompt}]
108
- ... tbl['response'] = messages(msgs, model='claude-3-haiku-20240307')
115
+ ... tbl.add_computed_column(response= messages(msgs, model='claude-3-haiku-20240307'))
109
116
  """
110
117
 
111
118
  # it doesn't look like count_tokens() actually exists in the current version of the library
@@ -137,6 +144,11 @@ async def messages(
137
144
  if not tool_choice['parallel_tool_calls']:
138
145
  tool_choice_['disable_parallel_tool_use'] = True
139
146
 
147
+ # make sure the pool info exists prior to making the request
148
+ resource_pool_id = f'rate-limits:anthropic:{model}'
149
+ rate_limits_info = env.Env.get().get_resource_pool_info(resource_pool_id, AnthropicRateLimitsInfo)
150
+ assert isinstance(rate_limits_info, env.RateLimitsInfo)
151
+
140
152
  # TODO: timeouts should be set system-wide and be user-configurable
141
153
  from anthropic.types import MessageParam
142
154
 
@@ -153,7 +165,7 @@ async def messages(
153
165
  tool_choice=_opt(cast(Any, tool_choice_)),
154
166
  top_k=_opt(top_k),
155
167
  top_p=_opt(top_p),
156
- timeout=10,
168
+ timeout=_opt(timeout),
157
169
  )
158
170
 
159
171
  requests_limit_str = result.headers.get('anthropic-ratelimit-requests-limit')
@@ -178,13 +190,11 @@ async def messages(
178
190
  if retry_after_str is not None:
179
191
  _logger.debug(f'retry-after: {retry_after_str}')
180
192
 
181
- resource_pool_id = f'rate-limits:anthropic:{model}'
182
- rate_limits_info = env.Env.get().get_resource_pool_info(resource_pool_id, AnthropicRateLimitsInfo)
183
- assert isinstance(rate_limits_info, env.RateLimitsInfo)
184
193
  rate_limits_info.record(
185
194
  requests=(requests_limit, requests_remaining, requests_reset),
186
195
  input_tokens=(input_tokens_limit, input_tokens_remaining, input_tokens_reset),
187
- output_tokens=(output_tokens_limit, output_tokens_remaining, output_tokens_reset))
196
+ output_tokens=(output_tokens_limit, output_tokens_remaining, output_tokens_reset),
197
+ )
188
198
 
189
199
  result_dict = json.loads(result.text)
190
200
  return result_dict
@@ -204,12 +214,7 @@ def invoke_tools(tools: Tools, response: exprs.Expr) -> exprs.InlineDict:
204
214
  def _anthropic_response_to_pxt_tool_calls(response: dict) -> Optional[dict]:
205
215
  anthropic_tool_calls = [r for r in response['content'] if r['type'] == 'tool_use']
206
216
  if len(anthropic_tool_calls) > 0:
207
- return {
208
- tool_call['name']: {
209
- 'args': tool_call['input']
210
- }
211
- for tool_call in anthropic_tool_calls
212
- }
217
+ return {tool_call['name']: {'args': tool_call['input']} for tool_call in anthropic_tool_calls}
213
218
  return None
214
219
 
215
220
 
@@ -218,6 +223,7 @@ _T = TypeVar('_T')
218
223
 
219
224
  def _opt(arg: _T) -> Union[_T, 'anthropic.NotGiven']:
220
225
  import anthropic
226
+
221
227
  return arg if arg is not None else anthropic.NOT_GIVEN
222
228
 
223
229
 
@@ -5,7 +5,7 @@ first `pip install fireworks-ai` and configure your Fireworks AI credentials, as
5
5
  the [Working with Fireworks](https://pixeltable.readme.io/docs/working-with-fireworks) tutorial.
6
6
  """
7
7
 
8
- from typing import Optional, TYPE_CHECKING
8
+ from typing import TYPE_CHECKING, Optional
9
9
 
10
10
  import pixeltable as pxt
11
11
  from pixeltable import env
@@ -26,8 +26,8 @@ def _fireworks_client() -> 'fireworks.client.Fireworks':
26
26
  return env.Env.get().get_client('fireworks')
27
27
 
28
28
 
29
- @pxt.udf
30
- def chat_completions(
29
+ @pxt.udf(resource_pool='request-rate:fireworks')
30
+ async def chat_completions(
31
31
  messages: list[dict[str, str]],
32
32
  *,
33
33
  model: str,
@@ -35,6 +35,7 @@ def chat_completions(
35
35
  top_k: Optional[int] = None,
36
36
  top_p: Optional[float] = None,
37
37
  temperature: Optional[float] = None,
38
+ request_timeout: Optional[int] = None,
38
39
  ) -> dict:
39
40
  """
40
41
  Creates a model response for the given chat conversation.
@@ -42,6 +43,10 @@ def chat_completions(
42
43
  Equivalent to the Fireworks AI `chat/completions` API endpoint.
43
44
  For additional details, see: [https://docs.fireworks.ai/api-reference/post-chatcompletions](https://docs.fireworks.ai/api-reference/post-chatcompletions)
44
45
 
46
+ Request throttling:
47
+ Applies the rate limit set in the config (section `fireworks`, key `rate_limit`). If no rate
48
+ limit is configured, uses a default of 600 RPM.
49
+
45
50
  __Requirements:__
46
51
 
47
52
  - `pip install fireworks-ai`
@@ -60,11 +65,64 @@ def chat_completions(
60
65
  to an existing Pixeltable column `tbl.prompt` of the table `tbl`:
61
66
 
62
67
  >>> messages = [{'role': 'user', 'content': tbl.prompt}]
63
- ... tbl['response'] = chat_completions(messages, model='accounts/fireworks/models/mixtral-8x22b-instruct')
68
+ ... tbl.add_computed_column(response=chat_completions(messages, model='accounts/fireworks/models/mixtral-8x22b-instruct'))
64
69
  """
65
70
  kwargs = {'max_tokens': max_tokens, 'top_k': top_k, 'top_p': top_p, 'temperature': temperature}
66
71
  kwargs_not_none = {k: v for k, v in kwargs.items() if v is not None}
67
- return _fireworks_client().chat.completions.create(model=model, messages=messages, **kwargs_not_none).dict()
72
+
73
+ # for debugging purposes:
74
+ # res_sync = _fireworks_client().chat.completions.create(model=model, messages=messages, **kwargs_not_none)
75
+ # res_sync_dict = res_sync.dict()
76
+
77
+ if request_timeout is None:
78
+ request_timeout = env.Env.get().config.get_int_value('timeout', section='fireworks') or 600
79
+ # TODO: this timeout doesn't really work, I think it only applies to returning the stream, but not to the timing
80
+ # of the chunks; addressing this would require a timeout for the task running this udf
81
+ stream = _fireworks_client().chat.completions.acreate(
82
+ model=model, messages=messages, request_timeout=request_timeout, **kwargs_not_none
83
+ )
84
+ chunks = []
85
+ async for chunk in stream:
86
+ chunks.append(chunk)
87
+
88
+ res = {
89
+ 'id': chunks[0].id,
90
+ 'object': 'chat.completion',
91
+ 'created': chunks[0].created,
92
+ 'model': chunks[0].model,
93
+ 'choices': [
94
+ {
95
+ 'index': 0,
96
+ 'message': {
97
+ 'role': None,
98
+ 'content': '',
99
+ 'tool_calls': None,
100
+ 'tool_call_id': None,
101
+ 'function': None,
102
+ 'name': None,
103
+ },
104
+ 'finish_reason': None,
105
+ 'logprobs': None,
106
+ 'raw_output': None,
107
+ }
108
+ ],
109
+ 'usage': {},
110
+ }
111
+ for chunk in chunks:
112
+ d = chunk.dict()
113
+ if 'usage' in d and d['usage'] is not None:
114
+ res['usage'] = d['usage']
115
+ if chunk.choices[0].finish_reason is not None:
116
+ res['choices'][0]['finish_reason'] = chunk.choices[0].finish_reason
117
+ if chunk.choices[0].delta.role is not None:
118
+ res['choices'][0]['message']['role'] = chunk.choices[0].delta.role
119
+ if chunk.choices[0].delta.content is not None:
120
+ res['choices'][0]['message']['content'] += chunk.choices[0].delta.content
121
+ if chunk.choices[0].delta.tool_calls is not None:
122
+ res['choices'][0]['message']['tool_calls'] = chunk.choices[0].delta.tool_calls
123
+ if chunk.choices[0].delta.function is not None:
124
+ res['choices'][0]['message']['function'] = chunk.choices[0].delta.function
125
+ return res
68
126
 
69
127
 
70
128
  __all__ = local_public_names(__name__)
@@ -14,6 +14,7 @@ from pixeltable import env
14
14
  @env.register_client('gemini')
15
15
  def _(api_key: str) -> None:
16
16
  import google.generativeai as genai
17
+
17
18
  genai.configure(api_key=api_key)
18
19
 
19
20
 
@@ -21,8 +22,8 @@ def _ensure_loaded() -> None:
21
22
  env.Env.get().get_client('gemini')
22
23
 
23
24
 
24
- @pxt.udf
25
- def generate_content(
25
+ @pxt.udf(resource_pool='request-rate:gemini')
26
+ async def generate_content(
26
27
  contents: str,
27
28
  *,
28
29
  model_name: str,
@@ -41,6 +42,10 @@ def generate_content(
41
42
  Generate content from the specified model. For additional details, see:
42
43
  <https://ai.google.dev/gemini-api/docs>
43
44
 
45
+ Request throttling:
46
+ Applies the rate limit set in the config (section `gemini`, key `rate_limit`). If no rate
47
+ limit is configured, uses a default of 600 RPM.
48
+
44
49
  __Requirements:__
45
50
 
46
51
  - `pip install google-generativeai`
@@ -77,5 +82,10 @@ def generate_content(
77
82
  presence_penalty=presence_penalty,
78
83
  frequency_penalty=frequency_penalty,
79
84
  )
80
- response = model.generate_content(contents, generation_config=gc)
85
+ response = await model.generate_content_async(contents, generation_config=gc)
81
86
  return response.to_dict()
87
+
88
+
89
+ @generate_content.resource_pool
90
+ def _(model_name: str) -> str:
91
+ return f'request-rate:gemini:{model_name}'
@@ -1,8 +1,9 @@
1
1
  import builtins
2
- from typing import _GenericAlias # type: ignore[attr-defined]
3
- from typing import Optional, Union
4
2
  import typing
5
3
 
4
+ from typing import _GenericAlias # type: ignore[attr-defined] # isort: skip
5
+ from typing import Optional, Union
6
+
6
7
  import sqlalchemy as sql
7
8
 
8
9
  import pixeltable.func as func
@@ -23,6 +24,7 @@ T = typing.TypeVar('T')
23
24
  @func.uda(allows_window=True, type_substitutions=({T: Optional[int]}, {T: Optional[float]})) # type: ignore[misc]
24
25
  class sum(func.Aggregator, typing.Generic[T]):
25
26
  """Sums the selected integers or floats."""
27
+
26
28
  def __init__(self):
27
29
  self.sum: T = None
28
30
 
@@ -52,8 +54,18 @@ def _(val: sql.ColumnElement) -> Optional[sql.ColumnElement]:
52
54
  # TODO: should we have an "Any" type that can be used here?
53
55
  type_substitutions=tuple(
54
56
  {T: Optional[t]} # type: ignore[misc]
55
- for t in (ts.String, ts.Int, ts.Float, ts.Bool, ts.Timestamp,
56
- ts.Json, ts.Image, ts.Video, ts.Audio, ts.Document)
57
+ for t in (
58
+ ts.String,
59
+ ts.Int,
60
+ ts.Float,
61
+ ts.Bool,
62
+ ts.Timestamp,
63
+ ts.Json,
64
+ ts.Image,
65
+ ts.Video,
66
+ ts.Audio,
67
+ ts.Document,
68
+ )
57
69
  ),
58
70
  )
59
71
  class count(func.Aggregator, typing.Generic[T]):
@@ -75,7 +87,7 @@ def _(val: sql.ColumnElement) -> Optional[sql.ColumnElement]:
75
87
 
76
88
  @func.uda(
77
89
  allows_window=True,
78
- type_substitutions=tuple({T: Optional[t]} for t in (str, int, float, bool, ts.Timestamp)) # type: ignore[misc]
90
+ type_substitutions=tuple({T: Optional[t]} for t in (str, int, float, bool, ts.Timestamp)), # type: ignore[misc]
79
91
  )
80
92
  class min(func.Aggregator, typing.Generic[T]):
81
93
  def __init__(self):
@@ -105,7 +117,7 @@ def _(val: sql.ColumnElement) -> Optional[sql.ColumnElement]:
105
117
 
106
118
  @func.uda(
107
119
  allows_window=True,
108
- type_substitutions=tuple({T: Optional[t]} for t in (str, int, float, bool, ts.Timestamp)) # type: ignore[misc]
120
+ type_substitutions=tuple({T: Optional[t]} for t in (str, int, float, bool, ts.Timestamp)), # type: ignore[misc]
109
121
  )
110
122
  class max(func.Aggregator, typing.Generic[T]):
111
123
  def __init__(self):