pixeltable 0.3.2__py3-none-any.whl → 0.3.3__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of pixeltable might be problematic. Click here for more details.
- pixeltable/__init__.py +64 -11
- pixeltable/__version__.py +2 -2
- pixeltable/catalog/__init__.py +1 -1
- pixeltable/catalog/catalog.py +50 -27
- pixeltable/catalog/column.py +27 -11
- pixeltable/catalog/dir.py +6 -4
- pixeltable/catalog/globals.py +8 -1
- pixeltable/catalog/insertable_table.py +22 -12
- pixeltable/catalog/named_function.py +10 -6
- pixeltable/catalog/path.py +3 -2
- pixeltable/catalog/path_dict.py +8 -6
- pixeltable/catalog/schema_object.py +2 -1
- pixeltable/catalog/table.py +121 -101
- pixeltable/catalog/table_version.py +291 -142
- pixeltable/catalog/table_version_path.py +8 -5
- pixeltable/catalog/view.py +67 -26
- pixeltable/dataframe.py +102 -72
- pixeltable/env.py +20 -21
- pixeltable/exec/__init__.py +2 -2
- pixeltable/exec/aggregation_node.py +10 -4
- pixeltable/exec/cache_prefetch_node.py +5 -3
- pixeltable/exec/component_iteration_node.py +9 -8
- pixeltable/exec/data_row_batch.py +21 -10
- pixeltable/exec/exec_context.py +10 -3
- pixeltable/exec/exec_node.py +23 -12
- pixeltable/exec/expr_eval/evaluators.py +13 -7
- pixeltable/exec/expr_eval/expr_eval_node.py +24 -15
- pixeltable/exec/expr_eval/globals.py +30 -7
- pixeltable/exec/expr_eval/row_buffer.py +5 -6
- pixeltable/exec/expr_eval/schedulers.py +151 -31
- pixeltable/exec/in_memory_data_node.py +8 -7
- pixeltable/exec/row_update_node.py +15 -5
- pixeltable/exec/sql_node.py +56 -27
- pixeltable/exprs/__init__.py +2 -2
- pixeltable/exprs/arithmetic_expr.py +57 -26
- pixeltable/exprs/array_slice.py +1 -1
- pixeltable/exprs/column_property_ref.py +2 -1
- pixeltable/exprs/column_ref.py +20 -15
- pixeltable/exprs/comparison.py +6 -2
- pixeltable/exprs/compound_predicate.py +1 -3
- pixeltable/exprs/data_row.py +2 -2
- pixeltable/exprs/expr.py +101 -72
- pixeltable/exprs/expr_dict.py +2 -1
- pixeltable/exprs/expr_set.py +3 -1
- pixeltable/exprs/function_call.py +39 -41
- pixeltable/exprs/globals.py +1 -0
- pixeltable/exprs/in_predicate.py +2 -2
- pixeltable/exprs/inline_expr.py +20 -17
- pixeltable/exprs/json_mapper.py +4 -2
- pixeltable/exprs/json_path.py +12 -18
- pixeltable/exprs/literal.py +5 -9
- pixeltable/exprs/method_ref.py +1 -0
- pixeltable/exprs/object_ref.py +1 -1
- pixeltable/exprs/row_builder.py +32 -17
- pixeltable/exprs/rowid_ref.py +14 -5
- pixeltable/exprs/similarity_expr.py +11 -6
- pixeltable/exprs/sql_element_cache.py +1 -1
- pixeltable/exprs/type_cast.py +24 -9
- pixeltable/ext/__init__.py +1 -0
- pixeltable/ext/functions/__init__.py +1 -0
- pixeltable/ext/functions/whisperx.py +2 -2
- pixeltable/ext/functions/yolox.py +11 -11
- pixeltable/func/aggregate_function.py +17 -13
- pixeltable/func/callable_function.py +6 -6
- pixeltable/func/expr_template_function.py +15 -14
- pixeltable/func/function.py +16 -16
- pixeltable/func/function_registry.py +11 -8
- pixeltable/func/globals.py +4 -2
- pixeltable/func/query_template_function.py +12 -13
- pixeltable/func/signature.py +18 -9
- pixeltable/func/tools.py +10 -17
- pixeltable/func/udf.py +106 -11
- pixeltable/functions/__init__.py +21 -2
- pixeltable/functions/anthropic.py +16 -12
- pixeltable/functions/fireworks.py +63 -5
- pixeltable/functions/gemini.py +13 -3
- pixeltable/functions/globals.py +18 -6
- pixeltable/functions/huggingface.py +20 -38
- pixeltable/functions/image.py +7 -3
- pixeltable/functions/json.py +1 -0
- pixeltable/functions/llama_cpp.py +1 -4
- pixeltable/functions/mistralai.py +31 -20
- pixeltable/functions/ollama.py +4 -18
- pixeltable/functions/openai.py +201 -108
- pixeltable/functions/replicate.py +11 -10
- pixeltable/functions/string.py +70 -7
- pixeltable/functions/timestamp.py +21 -8
- pixeltable/functions/together.py +66 -52
- pixeltable/functions/video.py +1 -0
- pixeltable/functions/vision.py +14 -11
- pixeltable/functions/whisper.py +2 -1
- pixeltable/globals.py +60 -26
- pixeltable/index/__init__.py +1 -1
- pixeltable/index/btree.py +5 -3
- pixeltable/index/embedding_index.py +15 -14
- pixeltable/io/__init__.py +1 -1
- pixeltable/io/external_store.py +30 -25
- pixeltable/io/fiftyone.py +6 -14
- pixeltable/io/globals.py +33 -27
- pixeltable/io/hf_datasets.py +2 -1
- pixeltable/io/label_studio.py +77 -68
- pixeltable/io/pandas.py +33 -9
- pixeltable/io/parquet.py +9 -12
- pixeltable/iterators/__init__.py +1 -0
- pixeltable/iterators/audio.py +205 -0
- pixeltable/iterators/document.py +19 -8
- pixeltable/iterators/image.py +6 -24
- pixeltable/iterators/string.py +3 -6
- pixeltable/iterators/video.py +1 -7
- pixeltable/metadata/__init__.py +7 -1
- pixeltable/metadata/converters/convert_10.py +2 -2
- pixeltable/metadata/converters/convert_15.py +1 -5
- pixeltable/metadata/converters/convert_16.py +2 -4
- pixeltable/metadata/converters/convert_17.py +2 -4
- pixeltable/metadata/converters/convert_18.py +2 -4
- pixeltable/metadata/converters/convert_19.py +2 -5
- pixeltable/metadata/converters/convert_20.py +1 -4
- pixeltable/metadata/converters/convert_21.py +4 -6
- pixeltable/metadata/converters/convert_22.py +1 -0
- pixeltable/metadata/converters/convert_23.py +5 -5
- pixeltable/metadata/converters/convert_24.py +12 -13
- pixeltable/metadata/converters/convert_26.py +23 -0
- pixeltable/metadata/converters/util.py +3 -4
- pixeltable/metadata/notes.py +1 -0
- pixeltable/metadata/schema.py +13 -2
- pixeltable/plan.py +173 -98
- pixeltable/store.py +42 -26
- pixeltable/type_system.py +62 -54
- pixeltable/utils/arrow.py +1 -2
- pixeltable/utils/coco.py +16 -17
- pixeltable/utils/code.py +1 -1
- pixeltable/utils/console_output.py +6 -3
- pixeltable/utils/description_helper.py +7 -7
- pixeltable/utils/documents.py +3 -1
- pixeltable/utils/filecache.py +12 -7
- pixeltable/utils/http_server.py +9 -8
- pixeltable/utils/media_store.py +2 -1
- pixeltable/utils/pytorch.py +11 -14
- pixeltable/utils/s3.py +1 -0
- pixeltable/utils/sql.py +1 -0
- pixeltable/utils/transactional_directory.py +2 -2
- {pixeltable-0.3.2.dist-info → pixeltable-0.3.3.dist-info}/METADATA +6 -8
- pixeltable-0.3.3.dist-info/RECORD +163 -0
- pixeltable-0.3.2.dist-info/RECORD +0 -161
- {pixeltable-0.3.2.dist-info → pixeltable-0.3.3.dist-info}/LICENSE +0 -0
- {pixeltable-0.3.2.dist-info → pixeltable-0.3.3.dist-info}/WHEEL +0 -0
- {pixeltable-0.3.2.dist-info → pixeltable-0.3.3.dist-info}/entry_points.txt +0 -0
pixeltable/func/tools.py
CHANGED
|
@@ -21,6 +21,7 @@ if TYPE_CHECKING:
|
|
|
21
21
|
# `Function`, which is not natively JSON-serializable; Pydantic provides a way of customizing its default
|
|
22
22
|
# serialization behavior, whereas dataclasses do not.)
|
|
23
23
|
|
|
24
|
+
|
|
24
25
|
class Tool(pydantic.BaseModel):
|
|
25
26
|
# Allow arbitrary types so that we can include a Pixeltable function in the schema.
|
|
26
27
|
# We will implement a model_serializer to ensure the Tool model can be serialized.
|
|
@@ -41,24 +42,16 @@ class Tool(pydantic.BaseModel):
|
|
|
41
42
|
'description': self.description or self.fn._docstring(),
|
|
42
43
|
'parameters': {
|
|
43
44
|
'type': 'object',
|
|
44
|
-
'properties': {
|
|
45
|
-
param.name: param.col_type._to_json_schema()
|
|
46
|
-
for param in self.parameters.values()
|
|
47
|
-
}
|
|
45
|
+
'properties': {param.name: param.col_type._to_json_schema() for param in self.parameters.values()},
|
|
48
46
|
},
|
|
49
|
-
'required': [
|
|
50
|
-
param.name for param in self.parameters.values() if not param.col_type.nullable
|
|
51
|
-
],
|
|
47
|
+
'required': [param.name for param in self.parameters.values() if not param.col_type.nullable],
|
|
52
48
|
'additionalProperties': False, # TODO Handle kwargs?
|
|
53
49
|
}
|
|
54
50
|
|
|
55
51
|
# `tool_calls` must be in standardized tool invocation format:
|
|
56
52
|
# {tool_name: {'args': {name1: value1, name2: value2, ...}}, ...}
|
|
57
53
|
def invoke(self, tool_calls: 'exprs.Expr') -> 'exprs.FunctionCall':
|
|
58
|
-
kwargs = {
|
|
59
|
-
param.name: self.__extract_tool_arg(param, tool_calls)
|
|
60
|
-
for param in self.parameters.values()
|
|
61
|
-
}
|
|
54
|
+
kwargs = {param.name: self.__extract_tool_arg(param, tool_calls) for param in self.parameters.values()}
|
|
62
55
|
return self.fn(**kwargs)
|
|
63
56
|
|
|
64
57
|
def __extract_tool_arg(self, param: Parameter, tool_calls: 'exprs.Expr') -> 'exprs.Expr':
|
|
@@ -93,10 +86,7 @@ class Tools(pydantic.BaseModel):
|
|
|
93
86
|
def _invoke(self, tool_calls: 'exprs.Expr') -> 'exprs.InlineDict':
|
|
94
87
|
from pixeltable import exprs
|
|
95
88
|
|
|
96
|
-
return exprs.InlineDict({
|
|
97
|
-
tool.name or tool.fn.name: tool.invoke(tool_calls)
|
|
98
|
-
for tool in self.tools
|
|
99
|
-
})
|
|
89
|
+
return exprs.InlineDict({tool.name or tool.fn.name: tool.invoke(tool_calls) for tool in self.tools})
|
|
100
90
|
|
|
101
91
|
def choice(
|
|
102
92
|
self,
|
|
@@ -111,7 +101,8 @@ class Tools(pydantic.BaseModel):
|
|
|
111
101
|
if tool is not None:
|
|
112
102
|
try:
|
|
113
103
|
tool_obj = next(
|
|
114
|
-
t
|
|
104
|
+
t
|
|
105
|
+
for t in self.tools
|
|
115
106
|
if (isinstance(tool, Function) and t.fn == tool)
|
|
116
107
|
or (isinstance(tool, str) and (t.name or t.fn.name) == tool)
|
|
117
108
|
)
|
|
@@ -144,7 +135,9 @@ def _extract_bool_tool_arg(tool_calls: dict[str, Any], func_name: str, param_nam
|
|
|
144
135
|
T = TypeVar('T')
|
|
145
136
|
|
|
146
137
|
|
|
147
|
-
def _extract_arg(
|
|
138
|
+
def _extract_arg(
|
|
139
|
+
eval_fn: Callable[[Any], T], tool_calls: dict[str, Any], func_name: str, param_name: str
|
|
140
|
+
) -> Optional[T]:
|
|
148
141
|
if func_name in tool_calls:
|
|
149
142
|
arguments = tool_calls[func_name]['args']
|
|
150
143
|
if param_name in arguments:
|
pixeltable/func/udf.py
CHANGED
|
@@ -1,16 +1,20 @@
|
|
|
1
1
|
from __future__ import annotations
|
|
2
2
|
|
|
3
|
-
|
|
3
|
+
import inspect
|
|
4
|
+
from typing import TYPE_CHECKING, Any, Callable, Optional, Sequence, overload
|
|
4
5
|
|
|
5
6
|
import pixeltable.exceptions as excs
|
|
6
7
|
import pixeltable.type_system as ts
|
|
8
|
+
from pixeltable import catalog
|
|
7
9
|
|
|
8
10
|
from .callable_function import CallableFunction
|
|
9
|
-
from .expr_template_function import
|
|
10
|
-
from .function import Function
|
|
11
|
+
from .expr_template_function import ExprTemplate, ExprTemplateFunction
|
|
11
12
|
from .function_registry import FunctionRegistry
|
|
12
13
|
from .globals import validate_symbol_path
|
|
13
|
-
from .signature import Signature
|
|
14
|
+
from .signature import Parameter, Signature
|
|
15
|
+
|
|
16
|
+
if TYPE_CHECKING:
|
|
17
|
+
from pixeltable import exprs
|
|
14
18
|
|
|
15
19
|
|
|
16
20
|
# Decorator invoked without parentheses: @pxt.udf
|
|
@@ -28,10 +32,17 @@ def udf(
|
|
|
28
32
|
is_property: bool = False,
|
|
29
33
|
resource_pool: Optional[str] = None,
|
|
30
34
|
type_substitutions: Optional[Sequence[dict]] = None,
|
|
31
|
-
_force_stored: bool = False
|
|
35
|
+
_force_stored: bool = False,
|
|
32
36
|
) -> Callable[[Callable], CallableFunction]: ...
|
|
33
37
|
|
|
34
38
|
|
|
39
|
+
# pxt.udf() called explicitly on a Table:
|
|
40
|
+
@overload
|
|
41
|
+
def udf(
|
|
42
|
+
table: catalog.Table, /, *, return_value: Any = None, description: Optional[str] = None
|
|
43
|
+
) -> ExprTemplateFunction: ...
|
|
44
|
+
|
|
45
|
+
|
|
35
46
|
def udf(*args, **kwargs):
|
|
36
47
|
"""A decorator to create a Function from a function definition.
|
|
37
48
|
|
|
@@ -41,13 +52,19 @@ def udf(*args, **kwargs):
|
|
|
41
52
|
... return x + 1
|
|
42
53
|
"""
|
|
43
54
|
if len(args) == 1 and len(kwargs) == 0 and callable(args[0]):
|
|
44
|
-
|
|
45
55
|
# Decorator invoked without parentheses: @pxt.udf
|
|
46
56
|
# Simply call make_function with defaults.
|
|
47
57
|
return make_function(decorated_fn=args[0])
|
|
48
58
|
|
|
49
|
-
|
|
59
|
+
elif len(args) == 1 and isinstance(args[0], catalog.Table):
|
|
60
|
+
# pxt.udf() called explicitly on a Table
|
|
61
|
+
return_value = kwargs.pop('return_value', None)
|
|
62
|
+
description = kwargs.pop('description', None)
|
|
63
|
+
if len(kwargs) > 0:
|
|
64
|
+
raise excs.Error(f'Invalid udf kwargs: {", ".join(kwargs.keys())}')
|
|
65
|
+
return from_table(args[0], return_value, description)
|
|
50
66
|
|
|
67
|
+
else:
|
|
51
68
|
# Decorator schema invoked with parentheses: @pxt.udf(**kwargs)
|
|
52
69
|
# Create a decorator for the specified schema.
|
|
53
70
|
batch_size = kwargs.pop('batch_size', None)
|
|
@@ -71,7 +88,7 @@ def udf(*args, **kwargs):
|
|
|
71
88
|
is_property=is_property,
|
|
72
89
|
resource_pool=resource_pool,
|
|
73
90
|
type_substitutions=type_substitutions,
|
|
74
|
-
force_stored=force_stored
|
|
91
|
+
force_stored=force_stored,
|
|
75
92
|
)
|
|
76
93
|
|
|
77
94
|
return decorator
|
|
@@ -88,7 +105,7 @@ def make_function(
|
|
|
88
105
|
resource_pool: Optional[str] = None,
|
|
89
106
|
type_substitutions: Optional[Sequence[dict]] = None,
|
|
90
107
|
function_name: Optional[str] = None,
|
|
91
|
-
force_stored: bool = False
|
|
108
|
+
force_stored: bool = False,
|
|
92
109
|
) -> CallableFunction:
|
|
93
110
|
"""
|
|
94
111
|
Constructs a `CallableFunction` from the specified parameters.
|
|
@@ -129,7 +146,7 @@ def make_function(
|
|
|
129
146
|
raise excs.Error(f'Cannot specify both `is_method` and `is_property` (in function `{function_name}`)')
|
|
130
147
|
if is_property and len(sig.parameters) != 1:
|
|
131
148
|
raise excs.Error(
|
|
132
|
-
f
|
|
149
|
+
f'`is_property=True` expects a UDF with exactly 1 parameter, but `{function_name}` has {len(sig.parameters)}'
|
|
133
150
|
)
|
|
134
151
|
if (is_method or is_property) and function_path is None:
|
|
135
152
|
raise excs.Error('Stored functions cannot be declared using `is_method` or `is_property`')
|
|
@@ -164,7 +181,7 @@ def make_function(
|
|
|
164
181
|
self_name=function_name,
|
|
165
182
|
batch_size=batch_size,
|
|
166
183
|
is_method=is_method,
|
|
167
|
-
is_property=is_property
|
|
184
|
+
is_property=is_property,
|
|
168
185
|
)
|
|
169
186
|
if resource_pool is not None:
|
|
170
187
|
result.resource_pool(lambda: resource_pool)
|
|
@@ -177,12 +194,15 @@ def make_function(
|
|
|
177
194
|
|
|
178
195
|
return result
|
|
179
196
|
|
|
197
|
+
|
|
180
198
|
@overload
|
|
181
199
|
def expr_udf(py_fn: Callable) -> ExprTemplateFunction: ...
|
|
182
200
|
|
|
201
|
+
|
|
183
202
|
@overload
|
|
184
203
|
def expr_udf(*, param_types: Optional[list[ts.ColumnType]] = None) -> Callable[[Callable], ExprTemplateFunction]: ...
|
|
185
204
|
|
|
205
|
+
|
|
186
206
|
def expr_udf(*args: Any, **kwargs: Any) -> Any:
|
|
187
207
|
def make_expr_template(py_fn: Callable, param_types: Optional[list[ts.ColumnType]]) -> ExprTemplateFunction:
|
|
188
208
|
if py_fn.__module__ != '__main__' and py_fn.__name__.isidentifier():
|
|
@@ -197,6 +217,7 @@ def expr_udf(*args: Any, **kwargs: Any) -> Any:
|
|
|
197
217
|
# construct Signature from the function signature
|
|
198
218
|
sig = Signature.create(py_fn=py_fn, param_types=param_types, return_type=ts.InvalidType())
|
|
199
219
|
import pixeltable.exprs as exprs
|
|
220
|
+
|
|
200
221
|
var_exprs = [exprs.Variable(param.name, param.col_type) for param in sig.parameters.values()]
|
|
201
222
|
# call the function with the parameter expressions to construct an Expr with parameters
|
|
202
223
|
expr = py_fn(*var_exprs)
|
|
@@ -212,3 +233,77 @@ def expr_udf(*args: Any, **kwargs: Any) -> Any:
|
|
|
212
233
|
else:
|
|
213
234
|
assert len(args) == 0 and len(kwargs) == 1 and 'param_types' in kwargs
|
|
214
235
|
return lambda py_fn: make_expr_template(py_fn, kwargs['param_types'])
|
|
236
|
+
|
|
237
|
+
|
|
238
|
+
def from_table(
|
|
239
|
+
tbl: catalog.Table, return_value: Optional['exprs.Expr'], description: Optional[str]
|
|
240
|
+
) -> ExprTemplateFunction:
|
|
241
|
+
"""
|
|
242
|
+
Constructs an `ExprTemplateFunction` from a `Table`.
|
|
243
|
+
|
|
244
|
+
The constructed function will have one parameter for each data column in the table, which is optional (with
|
|
245
|
+
default None) if and only if its column type is nullable. The output of the function is a dict of the form
|
|
246
|
+
{
|
|
247
|
+
'data_col_1': Variable('data_col_1', col_type_1),
|
|
248
|
+
'data_col_2': Variable('data_col_2', col_type_2),
|
|
249
|
+
...,
|
|
250
|
+
'computed_col_1': computed_expr_1,
|
|
251
|
+
'computed_col_2': computed_expr_2,
|
|
252
|
+
...
|
|
253
|
+
}
|
|
254
|
+
where the computed expressions correspond to fully substituted expressions for the computed columns of the
|
|
255
|
+
table. In the substitution, ColumnRefs of data columns are replaced by Variable expressions, and ColumnRefs of
|
|
256
|
+
computed columns are replaced by the (previously constructed) expressions for those columns.
|
|
257
|
+
|
|
258
|
+
If an optional `return_value` is specified, then it is used as the return value of the function in place of
|
|
259
|
+
the default dict. The same substitutions will be applied to the `return_value` expression.
|
|
260
|
+
"""
|
|
261
|
+
from pixeltable import exprs
|
|
262
|
+
|
|
263
|
+
ancestors = [tbl] + tbl._bases
|
|
264
|
+
ancestors.reverse() # We must traverse the ancestors in order from base to derived
|
|
265
|
+
|
|
266
|
+
subst: dict[exprs.Expr, exprs.Expr] = {}
|
|
267
|
+
result_dict: dict[str, exprs.Expr] = {}
|
|
268
|
+
params: list[Parameter] = []
|
|
269
|
+
|
|
270
|
+
for t in ancestors:
|
|
271
|
+
for name, col in t._tbl_version.cols_by_name.items():
|
|
272
|
+
assert name not in result_dict, f'Column name is not unique: {name}'
|
|
273
|
+
if col.is_computed:
|
|
274
|
+
# Computed column. Apply any existing substitutions and add the new expression to the subst dict.
|
|
275
|
+
new_expr = col.value_expr.copy()
|
|
276
|
+
new_expr.substitute(subst)
|
|
277
|
+
subst[t[name]] = new_expr # Substitute new_expr for ColumnRefs to this column
|
|
278
|
+
result_dict[name] = new_expr
|
|
279
|
+
else:
|
|
280
|
+
# Data column. Include it as a parameter and add a variable expression as the subst dict.
|
|
281
|
+
var = exprs.Variable(name, col.col_type)
|
|
282
|
+
subst[t[name]] = var # Substitute var for ColumnRefs to this column
|
|
283
|
+
result_dict[name] = var
|
|
284
|
+
# Since this is a data column, it becomes a UDF parameter.
|
|
285
|
+
# If the column is nullable, then the parameter will have a default value of None.
|
|
286
|
+
default_value = None if col.col_type.nullable else inspect.Parameter.empty
|
|
287
|
+
param = Parameter(name, col.col_type, inspect._ParameterKind.POSITIONAL_OR_KEYWORD, default_value)
|
|
288
|
+
params.append(param)
|
|
289
|
+
|
|
290
|
+
if return_value is None:
|
|
291
|
+
return_value = exprs.InlineDict(result_dict)
|
|
292
|
+
else:
|
|
293
|
+
return_value = exprs.Expr.from_object(return_value)
|
|
294
|
+
return_value = return_value.copy().substitute(subst)
|
|
295
|
+
|
|
296
|
+
if description is None:
|
|
297
|
+
# Default description is the table comment
|
|
298
|
+
description = tbl._comment
|
|
299
|
+
if len(description) == 0:
|
|
300
|
+
description = f"UDF for table '{tbl._name}'"
|
|
301
|
+
|
|
302
|
+
# TODO: Use column comments as parameter descriptions, when we have them
|
|
303
|
+
argstring = '\n'.join(f' {param.name}: of type `{param.col_type}`' for param in params)
|
|
304
|
+
docstring = f'{description}\n\nArgs:\n{argstring}'
|
|
305
|
+
|
|
306
|
+
template = ExprTemplate(return_value, Signature(return_value.col_type, params))
|
|
307
|
+
fn = ExprTemplateFunction([template], name=tbl._name)
|
|
308
|
+
fn.__doc__ = docstring
|
|
309
|
+
return fn
|
pixeltable/functions/__init__.py
CHANGED
|
@@ -1,7 +1,26 @@
|
|
|
1
1
|
from pixeltable.utils.code import local_public_names
|
|
2
2
|
|
|
3
|
-
from . import (
|
|
4
|
-
|
|
3
|
+
from . import (
|
|
4
|
+
anthropic,
|
|
5
|
+
audio,
|
|
6
|
+
fireworks,
|
|
7
|
+
gemini,
|
|
8
|
+
huggingface,
|
|
9
|
+
image,
|
|
10
|
+
json,
|
|
11
|
+
llama_cpp,
|
|
12
|
+
math,
|
|
13
|
+
mistralai,
|
|
14
|
+
ollama,
|
|
15
|
+
openai,
|
|
16
|
+
replicate,
|
|
17
|
+
string,
|
|
18
|
+
timestamp,
|
|
19
|
+
together,
|
|
20
|
+
video,
|
|
21
|
+
vision,
|
|
22
|
+
whisper,
|
|
23
|
+
)
|
|
5
24
|
from .globals import count, max, mean, min, sum
|
|
6
25
|
|
|
7
26
|
__all__ = local_public_names(__name__, exclude=['globals']) + local_public_names(globals.__name__)
|
|
@@ -8,7 +8,7 @@ the [Working with Anthropic](https://pixeltable.readme.io/docs/working-with-anth
|
|
|
8
8
|
import datetime
|
|
9
9
|
import json
|
|
10
10
|
import logging
|
|
11
|
-
from typing import TYPE_CHECKING, Any, Optional, TypeVar, Union, cast
|
|
11
|
+
from typing import TYPE_CHECKING, Any, Iterable, Optional, TypeVar, Union, cast
|
|
12
12
|
|
|
13
13
|
import httpx
|
|
14
14
|
|
|
@@ -22,13 +22,16 @@ if TYPE_CHECKING:
|
|
|
22
22
|
|
|
23
23
|
_logger = logging.getLogger('pixeltable')
|
|
24
24
|
|
|
25
|
+
|
|
25
26
|
@env.register_client('anthropic')
|
|
26
27
|
def _(api_key: str) -> 'anthropic.AsyncAnthropic':
|
|
27
28
|
import anthropic
|
|
29
|
+
|
|
28
30
|
return anthropic.AsyncAnthropic(
|
|
29
31
|
api_key=api_key,
|
|
30
32
|
# recommended to increase limits for async client to avoid connection errors
|
|
31
|
-
http_client
|
|
33
|
+
http_client=httpx.AsyncClient(limits=httpx.Limits(max_keepalive_connections=100, max_connections=500)),
|
|
34
|
+
)
|
|
32
35
|
|
|
33
36
|
|
|
34
37
|
def _anthropic_client() -> 'anthropic.AsyncAnthropic':
|
|
@@ -36,7 +39,6 @@ def _anthropic_client() -> 'anthropic.AsyncAnthropic':
|
|
|
36
39
|
|
|
37
40
|
|
|
38
41
|
class AnthropicRateLimitsInfo(env.RateLimitsInfo):
|
|
39
|
-
|
|
40
42
|
def __init__(self):
|
|
41
43
|
super().__init__(self._get_request_resources)
|
|
42
44
|
|
|
@@ -80,6 +82,7 @@ async def messages(
|
|
|
80
82
|
tools: Optional[list[dict]] = None,
|
|
81
83
|
top_k: Optional[int] = None,
|
|
82
84
|
top_p: Optional[float] = None,
|
|
85
|
+
timeout: Optional[float] = None,
|
|
83
86
|
) -> dict:
|
|
84
87
|
"""
|
|
85
88
|
Create a Message.
|
|
@@ -87,6 +90,10 @@ async def messages(
|
|
|
87
90
|
Equivalent to the Anthropic `messages` API endpoint.
|
|
88
91
|
For additional details, see: <https://docs.anthropic.com/en/api/messages>
|
|
89
92
|
|
|
93
|
+
Request throttling:
|
|
94
|
+
Uses the rate limit-related headers returned by the API to throttle requests adaptively, based on available
|
|
95
|
+
request and token capacity. No configuration is necessary.
|
|
96
|
+
|
|
90
97
|
__Requirements:__
|
|
91
98
|
|
|
92
99
|
- `pip install anthropic`
|
|
@@ -105,7 +112,7 @@ async def messages(
|
|
|
105
112
|
to an existing Pixeltable column `tbl.prompt` of the table `tbl`:
|
|
106
113
|
|
|
107
114
|
>>> msgs = [{'role': 'user', 'content': tbl.prompt}]
|
|
108
|
-
... tbl
|
|
115
|
+
... tbl.add_computed_column(response= messages(msgs, model='claude-3-haiku-20240307'))
|
|
109
116
|
"""
|
|
110
117
|
|
|
111
118
|
# it doesn't look like count_tokens() actually exists in the current version of the library
|
|
@@ -158,7 +165,7 @@ async def messages(
|
|
|
158
165
|
tool_choice=_opt(cast(Any, tool_choice_)),
|
|
159
166
|
top_k=_opt(top_k),
|
|
160
167
|
top_p=_opt(top_p),
|
|
161
|
-
timeout=
|
|
168
|
+
timeout=_opt(timeout),
|
|
162
169
|
)
|
|
163
170
|
|
|
164
171
|
requests_limit_str = result.headers.get('anthropic-ratelimit-requests-limit')
|
|
@@ -186,7 +193,8 @@ async def messages(
|
|
|
186
193
|
rate_limits_info.record(
|
|
187
194
|
requests=(requests_limit, requests_remaining, requests_reset),
|
|
188
195
|
input_tokens=(input_tokens_limit, input_tokens_remaining, input_tokens_reset),
|
|
189
|
-
output_tokens=(output_tokens_limit, output_tokens_remaining, output_tokens_reset)
|
|
196
|
+
output_tokens=(output_tokens_limit, output_tokens_remaining, output_tokens_reset),
|
|
197
|
+
)
|
|
190
198
|
|
|
191
199
|
result_dict = json.loads(result.text)
|
|
192
200
|
return result_dict
|
|
@@ -206,12 +214,7 @@ def invoke_tools(tools: Tools, response: exprs.Expr) -> exprs.InlineDict:
|
|
|
206
214
|
def _anthropic_response_to_pxt_tool_calls(response: dict) -> Optional[dict]:
|
|
207
215
|
anthropic_tool_calls = [r for r in response['content'] if r['type'] == 'tool_use']
|
|
208
216
|
if len(anthropic_tool_calls) > 0:
|
|
209
|
-
return {
|
|
210
|
-
tool_call['name']: {
|
|
211
|
-
'args': tool_call['input']
|
|
212
|
-
}
|
|
213
|
-
for tool_call in anthropic_tool_calls
|
|
214
|
-
}
|
|
217
|
+
return {tool_call['name']: {'args': tool_call['input']} for tool_call in anthropic_tool_calls}
|
|
215
218
|
return None
|
|
216
219
|
|
|
217
220
|
|
|
@@ -220,6 +223,7 @@ _T = TypeVar('_T')
|
|
|
220
223
|
|
|
221
224
|
def _opt(arg: _T) -> Union[_T, 'anthropic.NotGiven']:
|
|
222
225
|
import anthropic
|
|
226
|
+
|
|
223
227
|
return arg if arg is not None else anthropic.NOT_GIVEN
|
|
224
228
|
|
|
225
229
|
|
|
@@ -5,7 +5,7 @@ first `pip install fireworks-ai` and configure your Fireworks AI credentials, as
|
|
|
5
5
|
the [Working with Fireworks](https://pixeltable.readme.io/docs/working-with-fireworks) tutorial.
|
|
6
6
|
"""
|
|
7
7
|
|
|
8
|
-
from typing import
|
|
8
|
+
from typing import TYPE_CHECKING, Optional
|
|
9
9
|
|
|
10
10
|
import pixeltable as pxt
|
|
11
11
|
from pixeltable import env
|
|
@@ -26,8 +26,8 @@ def _fireworks_client() -> 'fireworks.client.Fireworks':
|
|
|
26
26
|
return env.Env.get().get_client('fireworks')
|
|
27
27
|
|
|
28
28
|
|
|
29
|
-
@pxt.udf
|
|
30
|
-
def chat_completions(
|
|
29
|
+
@pxt.udf(resource_pool='request-rate:fireworks')
|
|
30
|
+
async def chat_completions(
|
|
31
31
|
messages: list[dict[str, str]],
|
|
32
32
|
*,
|
|
33
33
|
model: str,
|
|
@@ -35,6 +35,7 @@ def chat_completions(
|
|
|
35
35
|
top_k: Optional[int] = None,
|
|
36
36
|
top_p: Optional[float] = None,
|
|
37
37
|
temperature: Optional[float] = None,
|
|
38
|
+
request_timeout: Optional[int] = None,
|
|
38
39
|
) -> dict:
|
|
39
40
|
"""
|
|
40
41
|
Creates a model response for the given chat conversation.
|
|
@@ -42,6 +43,10 @@ def chat_completions(
|
|
|
42
43
|
Equivalent to the Fireworks AI `chat/completions` API endpoint.
|
|
43
44
|
For additional details, see: [https://docs.fireworks.ai/api-reference/post-chatcompletions](https://docs.fireworks.ai/api-reference/post-chatcompletions)
|
|
44
45
|
|
|
46
|
+
Request throttling:
|
|
47
|
+
Applies the rate limit set in the config (section `fireworks`, key `rate_limit`). If no rate
|
|
48
|
+
limit is configured, uses a default of 600 RPM.
|
|
49
|
+
|
|
45
50
|
__Requirements:__
|
|
46
51
|
|
|
47
52
|
- `pip install fireworks-ai`
|
|
@@ -60,11 +65,64 @@ def chat_completions(
|
|
|
60
65
|
to an existing Pixeltable column `tbl.prompt` of the table `tbl`:
|
|
61
66
|
|
|
62
67
|
>>> messages = [{'role': 'user', 'content': tbl.prompt}]
|
|
63
|
-
... tbl
|
|
68
|
+
... tbl.add_computed_column(response=chat_completions(messages, model='accounts/fireworks/models/mixtral-8x22b-instruct'))
|
|
64
69
|
"""
|
|
65
70
|
kwargs = {'max_tokens': max_tokens, 'top_k': top_k, 'top_p': top_p, 'temperature': temperature}
|
|
66
71
|
kwargs_not_none = {k: v for k, v in kwargs.items() if v is not None}
|
|
67
|
-
|
|
72
|
+
|
|
73
|
+
# for debugging purposes:
|
|
74
|
+
# res_sync = _fireworks_client().chat.completions.create(model=model, messages=messages, **kwargs_not_none)
|
|
75
|
+
# res_sync_dict = res_sync.dict()
|
|
76
|
+
|
|
77
|
+
if request_timeout is None:
|
|
78
|
+
request_timeout = env.Env.get().config.get_int_value('timeout', section='fireworks') or 600
|
|
79
|
+
# TODO: this timeout doesn't really work, I think it only applies to returning the stream, but not to the timing
|
|
80
|
+
# of the chunks; addressing this would require a timeout for the task running this udf
|
|
81
|
+
stream = _fireworks_client().chat.completions.acreate(
|
|
82
|
+
model=model, messages=messages, request_timeout=request_timeout, **kwargs_not_none
|
|
83
|
+
)
|
|
84
|
+
chunks = []
|
|
85
|
+
async for chunk in stream:
|
|
86
|
+
chunks.append(chunk)
|
|
87
|
+
|
|
88
|
+
res = {
|
|
89
|
+
'id': chunks[0].id,
|
|
90
|
+
'object': 'chat.completion',
|
|
91
|
+
'created': chunks[0].created,
|
|
92
|
+
'model': chunks[0].model,
|
|
93
|
+
'choices': [
|
|
94
|
+
{
|
|
95
|
+
'index': 0,
|
|
96
|
+
'message': {
|
|
97
|
+
'role': None,
|
|
98
|
+
'content': '',
|
|
99
|
+
'tool_calls': None,
|
|
100
|
+
'tool_call_id': None,
|
|
101
|
+
'function': None,
|
|
102
|
+
'name': None,
|
|
103
|
+
},
|
|
104
|
+
'finish_reason': None,
|
|
105
|
+
'logprobs': None,
|
|
106
|
+
'raw_output': None,
|
|
107
|
+
}
|
|
108
|
+
],
|
|
109
|
+
'usage': {},
|
|
110
|
+
}
|
|
111
|
+
for chunk in chunks:
|
|
112
|
+
d = chunk.dict()
|
|
113
|
+
if 'usage' in d and d['usage'] is not None:
|
|
114
|
+
res['usage'] = d['usage']
|
|
115
|
+
if chunk.choices[0].finish_reason is not None:
|
|
116
|
+
res['choices'][0]['finish_reason'] = chunk.choices[0].finish_reason
|
|
117
|
+
if chunk.choices[0].delta.role is not None:
|
|
118
|
+
res['choices'][0]['message']['role'] = chunk.choices[0].delta.role
|
|
119
|
+
if chunk.choices[0].delta.content is not None:
|
|
120
|
+
res['choices'][0]['message']['content'] += chunk.choices[0].delta.content
|
|
121
|
+
if chunk.choices[0].delta.tool_calls is not None:
|
|
122
|
+
res['choices'][0]['message']['tool_calls'] = chunk.choices[0].delta.tool_calls
|
|
123
|
+
if chunk.choices[0].delta.function is not None:
|
|
124
|
+
res['choices'][0]['message']['function'] = chunk.choices[0].delta.function
|
|
125
|
+
return res
|
|
68
126
|
|
|
69
127
|
|
|
70
128
|
__all__ = local_public_names(__name__)
|
pixeltable/functions/gemini.py
CHANGED
|
@@ -14,6 +14,7 @@ from pixeltable import env
|
|
|
14
14
|
@env.register_client('gemini')
|
|
15
15
|
def _(api_key: str) -> None:
|
|
16
16
|
import google.generativeai as genai
|
|
17
|
+
|
|
17
18
|
genai.configure(api_key=api_key)
|
|
18
19
|
|
|
19
20
|
|
|
@@ -21,8 +22,8 @@ def _ensure_loaded() -> None:
|
|
|
21
22
|
env.Env.get().get_client('gemini')
|
|
22
23
|
|
|
23
24
|
|
|
24
|
-
@pxt.udf
|
|
25
|
-
def generate_content(
|
|
25
|
+
@pxt.udf(resource_pool='request-rate:gemini')
|
|
26
|
+
async def generate_content(
|
|
26
27
|
contents: str,
|
|
27
28
|
*,
|
|
28
29
|
model_name: str,
|
|
@@ -41,6 +42,10 @@ def generate_content(
|
|
|
41
42
|
Generate content from the specified model. For additional details, see:
|
|
42
43
|
<https://ai.google.dev/gemini-api/docs>
|
|
43
44
|
|
|
45
|
+
Request throttling:
|
|
46
|
+
Applies the rate limit set in the config (section `gemini`, key `rate_limit`). If no rate
|
|
47
|
+
limit is configured, uses a default of 600 RPM.
|
|
48
|
+
|
|
44
49
|
__Requirements:__
|
|
45
50
|
|
|
46
51
|
- `pip install google-generativeai`
|
|
@@ -77,5 +82,10 @@ def generate_content(
|
|
|
77
82
|
presence_penalty=presence_penalty,
|
|
78
83
|
frequency_penalty=frequency_penalty,
|
|
79
84
|
)
|
|
80
|
-
response = model.
|
|
85
|
+
response = await model.generate_content_async(contents, generation_config=gc)
|
|
81
86
|
return response.to_dict()
|
|
87
|
+
|
|
88
|
+
|
|
89
|
+
@generate_content.resource_pool
|
|
90
|
+
def _(model_name: str) -> str:
|
|
91
|
+
return f'request-rate:gemini:{model_name}'
|
pixeltable/functions/globals.py
CHANGED
|
@@ -1,8 +1,9 @@
|
|
|
1
1
|
import builtins
|
|
2
|
-
from typing import _GenericAlias # type: ignore[attr-defined]
|
|
3
|
-
from typing import Optional, Union
|
|
4
2
|
import typing
|
|
5
3
|
|
|
4
|
+
from typing import _GenericAlias # type: ignore[attr-defined] # isort: skip
|
|
5
|
+
from typing import Optional, Union
|
|
6
|
+
|
|
6
7
|
import sqlalchemy as sql
|
|
7
8
|
|
|
8
9
|
import pixeltable.func as func
|
|
@@ -23,6 +24,7 @@ T = typing.TypeVar('T')
|
|
|
23
24
|
@func.uda(allows_window=True, type_substitutions=({T: Optional[int]}, {T: Optional[float]})) # type: ignore[misc]
|
|
24
25
|
class sum(func.Aggregator, typing.Generic[T]):
|
|
25
26
|
"""Sums the selected integers or floats."""
|
|
27
|
+
|
|
26
28
|
def __init__(self):
|
|
27
29
|
self.sum: T = None
|
|
28
30
|
|
|
@@ -52,8 +54,18 @@ def _(val: sql.ColumnElement) -> Optional[sql.ColumnElement]:
|
|
|
52
54
|
# TODO: should we have an "Any" type that can be used here?
|
|
53
55
|
type_substitutions=tuple(
|
|
54
56
|
{T: Optional[t]} # type: ignore[misc]
|
|
55
|
-
for t in (
|
|
56
|
-
|
|
57
|
+
for t in (
|
|
58
|
+
ts.String,
|
|
59
|
+
ts.Int,
|
|
60
|
+
ts.Float,
|
|
61
|
+
ts.Bool,
|
|
62
|
+
ts.Timestamp,
|
|
63
|
+
ts.Json,
|
|
64
|
+
ts.Image,
|
|
65
|
+
ts.Video,
|
|
66
|
+
ts.Audio,
|
|
67
|
+
ts.Document,
|
|
68
|
+
)
|
|
57
69
|
),
|
|
58
70
|
)
|
|
59
71
|
class count(func.Aggregator, typing.Generic[T]):
|
|
@@ -75,7 +87,7 @@ def _(val: sql.ColumnElement) -> Optional[sql.ColumnElement]:
|
|
|
75
87
|
|
|
76
88
|
@func.uda(
|
|
77
89
|
allows_window=True,
|
|
78
|
-
type_substitutions=tuple({T: Optional[t]} for t in (str, int, float, bool, ts.Timestamp)) # type: ignore[misc]
|
|
90
|
+
type_substitutions=tuple({T: Optional[t]} for t in (str, int, float, bool, ts.Timestamp)), # type: ignore[misc]
|
|
79
91
|
)
|
|
80
92
|
class min(func.Aggregator, typing.Generic[T]):
|
|
81
93
|
def __init__(self):
|
|
@@ -105,7 +117,7 @@ def _(val: sql.ColumnElement) -> Optional[sql.ColumnElement]:
|
|
|
105
117
|
|
|
106
118
|
@func.uda(
|
|
107
119
|
allows_window=True,
|
|
108
|
-
type_substitutions=tuple({T: Optional[t]} for t in (str, int, float, bool, ts.Timestamp)) # type: ignore[misc]
|
|
120
|
+
type_substitutions=tuple({T: Optional[t]} for t in (str, int, float, bool, ts.Timestamp)), # type: ignore[misc]
|
|
109
121
|
)
|
|
110
122
|
class max(func.Aggregator, typing.Generic[T]):
|
|
111
123
|
def __init__(self):
|