pixeltable 0.3.4__py3-none-any.whl → 0.3.6__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of pixeltable might be problematic. Click here for more details.
- pixeltable/__init__.py +1 -0
- pixeltable/__version__.py +2 -2
- pixeltable/catalog/catalog.py +9 -2
- pixeltable/catalog/column.py +1 -1
- pixeltable/catalog/dir.py +1 -1
- pixeltable/catalog/table.py +3 -1
- pixeltable/catalog/table_version.py +12 -2
- pixeltable/catalog/table_version_path.py +2 -2
- pixeltable/catalog/view.py +64 -20
- pixeltable/dataframe.py +11 -6
- pixeltable/env.py +12 -0
- pixeltable/exec/expr_eval/evaluators.py +4 -2
- pixeltable/exec/expr_eval/expr_eval_node.py +4 -1
- pixeltable/exprs/comparison.py +8 -4
- pixeltable/exprs/data_row.py +9 -7
- pixeltable/exprs/expr.py +2 -2
- pixeltable/exprs/function_call.py +155 -313
- pixeltable/exprs/json_mapper.py +25 -8
- pixeltable/exprs/json_path.py +6 -5
- pixeltable/exprs/object_ref.py +16 -5
- pixeltable/exprs/row_builder.py +10 -3
- pixeltable/func/aggregate_function.py +29 -15
- pixeltable/func/callable_function.py +11 -8
- pixeltable/func/expr_template_function.py +3 -9
- pixeltable/func/function.py +148 -74
- pixeltable/func/signature.py +65 -30
- pixeltable/func/tools.py +26 -26
- pixeltable/func/udf.py +1 -1
- pixeltable/functions/__init__.py +1 -0
- pixeltable/functions/anthropic.py +9 -3
- pixeltable/functions/deepseek.py +121 -0
- pixeltable/functions/image.py +7 -7
- pixeltable/functions/openai.py +30 -13
- pixeltable/functions/video.py +14 -7
- pixeltable/globals.py +14 -3
- pixeltable/index/embedding_index.py +4 -13
- pixeltable/io/globals.py +88 -77
- pixeltable/io/hf_datasets.py +34 -34
- pixeltable/io/pandas.py +75 -76
- pixeltable/io/parquet.py +19 -27
- pixeltable/io/utils.py +115 -0
- pixeltable/iterators/audio.py +2 -1
- pixeltable/iterators/video.py +1 -1
- pixeltable/metadata/__init__.py +2 -1
- pixeltable/metadata/converters/convert_15.py +18 -8
- pixeltable/metadata/converters/convert_27.py +31 -0
- pixeltable/metadata/converters/convert_28.py +15 -0
- pixeltable/metadata/converters/convert_29.py +111 -0
- pixeltable/metadata/converters/util.py +12 -1
- pixeltable/metadata/notes.py +3 -0
- pixeltable/metadata/schema.py +8 -0
- pixeltable/share/__init__.py +1 -0
- pixeltable/share/packager.py +41 -13
- pixeltable/share/publish.py +97 -0
- pixeltable/type_system.py +40 -14
- pixeltable/utils/__init__.py +41 -0
- pixeltable/utils/arrow.py +40 -7
- pixeltable/utils/formatter.py +1 -1
- {pixeltable-0.3.4.dist-info → pixeltable-0.3.6.dist-info}/METADATA +34 -49
- {pixeltable-0.3.4.dist-info → pixeltable-0.3.6.dist-info}/RECORD +63 -57
- {pixeltable-0.3.4.dist-info → pixeltable-0.3.6.dist-info}/WHEEL +1 -1
- {pixeltable-0.3.4.dist-info → pixeltable-0.3.6.dist-info}/LICENSE +0 -0
- {pixeltable-0.3.4.dist-info → pixeltable-0.3.6.dist-info}/entry_points.txt +0 -0
pixeltable/func/signature.py
CHANGED
|
@@ -1,16 +1,18 @@
|
|
|
1
1
|
from __future__ import annotations
|
|
2
2
|
|
|
3
3
|
import dataclasses
|
|
4
|
-
import enum
|
|
5
4
|
import inspect
|
|
6
5
|
import json
|
|
7
6
|
import logging
|
|
8
7
|
import typing
|
|
9
|
-
from typing import Any, Callable, Optional
|
|
8
|
+
from typing import TYPE_CHECKING, Any, Callable, Optional
|
|
10
9
|
|
|
11
10
|
import pixeltable.exceptions as excs
|
|
12
11
|
import pixeltable.type_system as ts
|
|
13
12
|
|
|
13
|
+
if TYPE_CHECKING:
|
|
14
|
+
from pixeltable import exprs
|
|
15
|
+
|
|
14
16
|
_logger = logging.getLogger('pixeltable')
|
|
15
17
|
|
|
16
18
|
|
|
@@ -21,25 +23,24 @@ class Parameter:
|
|
|
21
23
|
kind: inspect._ParameterKind
|
|
22
24
|
# for some reason, this needs to precede is_batched in the dataclass definition,
|
|
23
25
|
# otherwise Python complains that an argument with a default is followed by an argument without a default
|
|
24
|
-
default:
|
|
26
|
+
default: Optional['exprs.Literal'] = None # default value for the parameter
|
|
25
27
|
is_batched: bool = False # True if the parameter is a batched parameter (eg, Batch[dict])
|
|
26
28
|
|
|
27
29
|
def __post_init__(self) -> None:
|
|
28
|
-
|
|
29
|
-
|
|
30
|
-
|
|
31
|
-
|
|
32
|
-
|
|
33
|
-
|
|
34
|
-
|
|
35
|
-
|
|
36
|
-
|
|
37
|
-
|
|
38
|
-
|
|
39
|
-
raise excs.Error(f'Default value for parameter {self.name}: {str(e)}')
|
|
30
|
+
from pixeltable import exprs
|
|
31
|
+
|
|
32
|
+
if self.default is not None:
|
|
33
|
+
if self.col_type is None:
|
|
34
|
+
raise excs.Error(f'Cannot have a default value for variable parameter {self.name!r}')
|
|
35
|
+
if not isinstance(self.default, exprs.Literal):
|
|
36
|
+
raise excs.Error(f'Default value for parameter {self.name!r} is not a constant')
|
|
37
|
+
if not self.col_type.is_supertype_of(self.default.col_type):
|
|
38
|
+
raise excs.Error(
|
|
39
|
+
f'Default value for parameter {self.name!r} is not of type {self.col_type!r}: {self.default}'
|
|
40
|
+
)
|
|
40
41
|
|
|
41
42
|
def has_default(self) -> bool:
|
|
42
|
-
return self.default is not
|
|
43
|
+
return self.default is not None
|
|
43
44
|
|
|
44
45
|
def as_dict(self) -> dict[str, Any]:
|
|
45
46
|
return {
|
|
@@ -47,17 +48,15 @@ class Parameter:
|
|
|
47
48
|
'col_type': self.col_type.as_dict() if self.col_type is not None else None,
|
|
48
49
|
'kind': self.kind.name,
|
|
49
50
|
'is_batched': self.is_batched,
|
|
50
|
-
'
|
|
51
|
-
'default': self.default if self.has_default() else None,
|
|
51
|
+
'default': None if self.default is None else self.default.as_dict(),
|
|
52
52
|
}
|
|
53
53
|
|
|
54
54
|
@classmethod
|
|
55
55
|
def from_dict(cls, d: dict[str, Any]) -> Parameter:
|
|
56
|
-
|
|
57
|
-
|
|
58
|
-
|
|
59
|
-
else
|
|
60
|
-
default = inspect.Parameter.empty
|
|
56
|
+
from pixeltable import exprs
|
|
57
|
+
|
|
58
|
+
assert d['default'] is None or isinstance(d['default'], dict), d
|
|
59
|
+
default = None if d['default'] is None else exprs.Literal.from_dict(d['default'])
|
|
61
60
|
return cls(
|
|
62
61
|
name=d['name'],
|
|
63
62
|
col_type=ts.ColumnType.from_dict(d['col_type']) if d['col_type'] is not None else None,
|
|
@@ -67,7 +66,8 @@ class Parameter:
|
|
|
67
66
|
)
|
|
68
67
|
|
|
69
68
|
def to_py_param(self) -> inspect.Parameter:
|
|
70
|
-
|
|
69
|
+
py_default = self.default.val if self.default is not None else inspect.Parameter.empty
|
|
70
|
+
return inspect.Parameter(self.name, self.kind, default=py_default)
|
|
71
71
|
|
|
72
72
|
|
|
73
73
|
T = typing.TypeVar('T')
|
|
@@ -147,6 +147,37 @@ class Signature:
|
|
|
147
147
|
|
|
148
148
|
return True
|
|
149
149
|
|
|
150
|
+
def validate_args(self, bound_args: dict[str, Optional['exprs.Expr']], context: str = '') -> None:
|
|
151
|
+
if context != '':
|
|
152
|
+
context = f' ({context})'
|
|
153
|
+
|
|
154
|
+
for param_name, arg in bound_args.items():
|
|
155
|
+
assert param_name in self.parameters
|
|
156
|
+
param = self.parameters[param_name]
|
|
157
|
+
is_var_param = param.kind in (inspect.Parameter.VAR_POSITIONAL, inspect.Parameter.VAR_KEYWORD)
|
|
158
|
+
if is_var_param:
|
|
159
|
+
continue
|
|
160
|
+
assert param.col_type is not None
|
|
161
|
+
|
|
162
|
+
if arg is None:
|
|
163
|
+
raise excs.Error(f'Parameter {param_name!r}{context}: invalid argument')
|
|
164
|
+
|
|
165
|
+
# Check that the argument is consistent with the expected parameter type, with the allowance that
|
|
166
|
+
# non-nullable parameters can still accept nullable arguments (since in that event, FunctionCall.eval()
|
|
167
|
+
# detects the Nones and skips evaluation).
|
|
168
|
+
if not (
|
|
169
|
+
param.col_type.is_supertype_of(arg.col_type, ignore_nullable=True)
|
|
170
|
+
# TODO: this is a hack to allow JSON columns to be passed to functions that accept scalar
|
|
171
|
+
# types. It's necessary to avoid littering notebooks with `apply(str)` calls or equivalent.
|
|
172
|
+
# (Previously, this wasn't necessary because `is_supertype_of()` was improperly implemented.)
|
|
173
|
+
# We need to think through the right way to handle this scenario.
|
|
174
|
+
or (arg.col_type.is_json_type() and param.col_type.is_scalar_type())
|
|
175
|
+
):
|
|
176
|
+
raise excs.Error(
|
|
177
|
+
f'Parameter {param_name!r}{context}: argument type {arg.col_type} does not'
|
|
178
|
+
f' match parameter type {param.col_type}'
|
|
179
|
+
)
|
|
180
|
+
|
|
150
181
|
def __eq__(self, other: object) -> bool:
|
|
151
182
|
if not isinstance(other, Signature):
|
|
152
183
|
return False
|
|
@@ -199,6 +230,8 @@ class Signature:
|
|
|
199
230
|
type_substitutions: Optional[dict] = None,
|
|
200
231
|
is_cls_method: bool = False,
|
|
201
232
|
) -> list[Parameter]:
|
|
233
|
+
from pixeltable import exprs
|
|
234
|
+
|
|
202
235
|
assert (py_fn is None) != (py_params is None)
|
|
203
236
|
if py_fn is not None:
|
|
204
237
|
sig = inspect.signature(py_fn)
|
|
@@ -212,7 +245,7 @@ class Signature:
|
|
|
212
245
|
if is_cls_method and idx == 0:
|
|
213
246
|
continue # skip 'self' or 'cls' parameter
|
|
214
247
|
if param.name in cls.SPECIAL_PARAM_NAMES:
|
|
215
|
-
raise excs.Error(f
|
|
248
|
+
raise excs.Error(f'{param.name!r} is a reserved parameter name')
|
|
216
249
|
if param.kind == inspect.Parameter.VAR_POSITIONAL or param.kind == inspect.Parameter.VAR_KEYWORD:
|
|
217
250
|
parameters.append(Parameter(param.name, col_type=None, kind=param.kind))
|
|
218
251
|
continue
|
|
@@ -220,7 +253,7 @@ class Signature:
|
|
|
220
253
|
# check non-var parameters for name collisions and default value compatibility
|
|
221
254
|
if param_types is not None:
|
|
222
255
|
if idx >= len(param_types):
|
|
223
|
-
raise excs.Error(f'Missing type for parameter {param.name}')
|
|
256
|
+
raise excs.Error(f'Missing type for parameter {param.name!r}')
|
|
224
257
|
param_type = param_types[idx]
|
|
225
258
|
is_batched = False
|
|
226
259
|
else:
|
|
@@ -231,12 +264,14 @@ class Signature:
|
|
|
231
264
|
py_type = param.annotation
|
|
232
265
|
param_type, is_batched = cls._infer_type(py_type)
|
|
233
266
|
if param_type is None:
|
|
234
|
-
raise excs.Error(f'Cannot infer pixeltable type for parameter {param.name}')
|
|
267
|
+
raise excs.Error(f'Cannot infer pixeltable type for parameter {param.name!r}')
|
|
268
|
+
|
|
269
|
+
default = None if param.default is inspect.Parameter.empty else exprs.Expr.from_object(param.default)
|
|
270
|
+
if not (default is None or isinstance(default, exprs.Literal)):
|
|
271
|
+
raise excs.Error(f'Default value for parameter {param.name!r} must be a constant')
|
|
235
272
|
|
|
236
273
|
parameters.append(
|
|
237
|
-
Parameter(
|
|
238
|
-
param.name, col_type=param_type, kind=param.kind, is_batched=is_batched, default=param.default
|
|
239
|
-
)
|
|
274
|
+
Parameter(param.name, col_type=param_type, kind=param.kind, is_batched=is_batched, default=default)
|
|
240
275
|
)
|
|
241
276
|
|
|
242
277
|
return parameters
|
pixeltable/func/tools.py
CHANGED
|
@@ -48,22 +48,27 @@ class Tool(pydantic.BaseModel):
|
|
|
48
48
|
'additionalProperties': False, # TODO Handle kwargs?
|
|
49
49
|
}
|
|
50
50
|
|
|
51
|
-
# `tool_calls` must be in standardized tool invocation format:
|
|
52
|
-
# {tool_name: {'args': {name1: value1, name2: value2, ...}}, ...}
|
|
53
|
-
def invoke(self, tool_calls: 'exprs.Expr') -> 'exprs.
|
|
54
|
-
|
|
55
|
-
return self.fn(**kwargs)
|
|
51
|
+
# The output of `tool_calls` must be a dict in standardized tool invocation format:
|
|
52
|
+
# {tool_name: [{'args': {name1: value1, name2: value2, ...}}, ...], ...}
|
|
53
|
+
def invoke(self, tool_calls: 'exprs.Expr') -> 'exprs.Expr':
|
|
54
|
+
from pixeltable import exprs
|
|
56
55
|
|
|
57
|
-
def __extract_tool_arg(self, param: Parameter, tool_calls: 'exprs.Expr') -> 'exprs.Expr':
|
|
58
56
|
func_name = self.name or self.fn.name
|
|
57
|
+
return exprs.JsonMapper(tool_calls[func_name]['*'], self.__invoke_kwargs(exprs.RELATIVE_PATH_ROOT.args))
|
|
58
|
+
|
|
59
|
+
def __invoke_kwargs(self, kwargs: 'exprs.Expr') -> 'exprs.FunctionCall':
|
|
60
|
+
kwargs = {param.name: self.__extract_tool_arg(param, kwargs) for param in self.parameters.values()}
|
|
61
|
+
return self.fn(**kwargs)
|
|
62
|
+
|
|
63
|
+
def __extract_tool_arg(self, param: Parameter, kwargs: 'exprs.Expr') -> 'exprs.FunctionCall':
|
|
59
64
|
if param.col_type.is_string_type():
|
|
60
|
-
return _extract_str_tool_arg(
|
|
65
|
+
return _extract_str_tool_arg(kwargs, param_name=param.name)
|
|
61
66
|
if param.col_type.is_int_type():
|
|
62
|
-
return _extract_int_tool_arg(
|
|
67
|
+
return _extract_int_tool_arg(kwargs, param_name=param.name)
|
|
63
68
|
if param.col_type.is_float_type():
|
|
64
|
-
return _extract_float_tool_arg(
|
|
69
|
+
return _extract_float_tool_arg(kwargs, param_name=param.name)
|
|
65
70
|
if param.col_type.is_bool_type():
|
|
66
|
-
return _extract_bool_tool_arg(
|
|
71
|
+
return _extract_bool_tool_arg(kwargs, param_name=param.name)
|
|
67
72
|
assert False
|
|
68
73
|
|
|
69
74
|
|
|
@@ -113,34 +118,29 @@ class Tools(pydantic.BaseModel):
|
|
|
113
118
|
|
|
114
119
|
|
|
115
120
|
@udf
|
|
116
|
-
def _extract_str_tool_arg(
|
|
117
|
-
return _extract_arg(str,
|
|
121
|
+
def _extract_str_tool_arg(kwargs: dict[str, Any], param_name: str) -> Optional[str]:
|
|
122
|
+
return _extract_arg(str, kwargs, param_name)
|
|
118
123
|
|
|
119
124
|
|
|
120
125
|
@udf
|
|
121
|
-
def _extract_int_tool_arg(
|
|
122
|
-
return _extract_arg(int,
|
|
126
|
+
def _extract_int_tool_arg(kwargs: dict[str, Any], param_name: str) -> Optional[int]:
|
|
127
|
+
return _extract_arg(int, kwargs, param_name)
|
|
123
128
|
|
|
124
129
|
|
|
125
130
|
@udf
|
|
126
|
-
def _extract_float_tool_arg(
|
|
127
|
-
return _extract_arg(float,
|
|
131
|
+
def _extract_float_tool_arg(kwargs: dict[str, Any], param_name: str) -> Optional[float]:
|
|
132
|
+
return _extract_arg(float, kwargs, param_name)
|
|
128
133
|
|
|
129
134
|
|
|
130
135
|
@udf
|
|
131
|
-
def _extract_bool_tool_arg(
|
|
132
|
-
return _extract_arg(bool,
|
|
136
|
+
def _extract_bool_tool_arg(kwargs: dict[str, Any], param_name: str) -> Optional[bool]:
|
|
137
|
+
return _extract_arg(bool, kwargs, param_name)
|
|
133
138
|
|
|
134
139
|
|
|
135
140
|
T = TypeVar('T')
|
|
136
141
|
|
|
137
142
|
|
|
138
|
-
def _extract_arg(
|
|
139
|
-
|
|
140
|
-
|
|
141
|
-
if func_name in tool_calls:
|
|
142
|
-
arguments = tool_calls[func_name]['args']
|
|
143
|
-
if param_name in arguments:
|
|
144
|
-
return eval_fn(arguments[param_name])
|
|
145
|
-
return None
|
|
143
|
+
def _extract_arg(eval_fn: Callable[[Any], T], kwargs: dict[str, Any], param_name: str) -> Optional[T]:
|
|
144
|
+
if param_name in kwargs:
|
|
145
|
+
return eval_fn(kwargs[param_name])
|
|
146
146
|
return None
|
pixeltable/func/udf.py
CHANGED
|
@@ -283,7 +283,7 @@ def from_table(
|
|
|
283
283
|
result_dict[name] = var
|
|
284
284
|
# Since this is a data column, it becomes a UDF parameter.
|
|
285
285
|
# If the column is nullable, then the parameter will have a default value of None.
|
|
286
|
-
default_value = None if col.col_type.nullable else
|
|
286
|
+
default_value = exprs.Literal(None) if col.col_type.nullable else None
|
|
287
287
|
param = Parameter(name, col.col_type, inspect._ParameterKind.POSITIONAL_OR_KEYWORD, default_value)
|
|
288
288
|
params.append(param)
|
|
289
289
|
|
pixeltable/functions/__init__.py
CHANGED
|
@@ -213,9 +213,15 @@ def invoke_tools(tools: Tools, response: exprs.Expr) -> exprs.InlineDict:
|
|
|
213
213
|
@pxt.udf
|
|
214
214
|
def _anthropic_response_to_pxt_tool_calls(response: dict) -> Optional[dict]:
|
|
215
215
|
anthropic_tool_calls = [r for r in response['content'] if r['type'] == 'tool_use']
|
|
216
|
-
if len(anthropic_tool_calls)
|
|
217
|
-
return
|
|
218
|
-
|
|
216
|
+
if len(anthropic_tool_calls) == 0:
|
|
217
|
+
return None
|
|
218
|
+
pxt_tool_calls: dict[str, list[dict[str, Any]]] = {}
|
|
219
|
+
for tool_call in anthropic_tool_calls:
|
|
220
|
+
tool_name = tool_call['name']
|
|
221
|
+
if tool_name not in pxt_tool_calls:
|
|
222
|
+
pxt_tool_calls[tool_name] = []
|
|
223
|
+
pxt_tool_calls[tool_name].append({'args': tool_call['input']})
|
|
224
|
+
return pxt_tool_calls
|
|
219
225
|
|
|
220
226
|
|
|
221
227
|
_T = TypeVar('_T')
|
|
@@ -0,0 +1,121 @@
|
|
|
1
|
+
import json
|
|
2
|
+
from typing import TYPE_CHECKING, Any, Optional, Union, cast
|
|
3
|
+
|
|
4
|
+
import httpx
|
|
5
|
+
|
|
6
|
+
import pixeltable as pxt
|
|
7
|
+
from pixeltable import env
|
|
8
|
+
from pixeltable.utils.code import local_public_names
|
|
9
|
+
|
|
10
|
+
from .openai import _opt
|
|
11
|
+
|
|
12
|
+
if TYPE_CHECKING:
|
|
13
|
+
import openai
|
|
14
|
+
|
|
15
|
+
|
|
16
|
+
@env.register_client('deepseek')
|
|
17
|
+
def _(api_key: str) -> 'openai.AsyncOpenAI':
|
|
18
|
+
import openai
|
|
19
|
+
|
|
20
|
+
return openai.AsyncOpenAI(
|
|
21
|
+
api_key=api_key,
|
|
22
|
+
base_url='https://api.deepseek.com',
|
|
23
|
+
http_client=httpx.AsyncClient(limits=httpx.Limits(max_keepalive_connections=100, max_connections=500)),
|
|
24
|
+
)
|
|
25
|
+
|
|
26
|
+
|
|
27
|
+
def _deepseek_client() -> 'openai.AsyncOpenAI':
|
|
28
|
+
return env.Env.get().get_client('deepseek')
|
|
29
|
+
|
|
30
|
+
|
|
31
|
+
@pxt.udf
|
|
32
|
+
async def chat_completions(
|
|
33
|
+
messages: list,
|
|
34
|
+
*,
|
|
35
|
+
model: str,
|
|
36
|
+
frequency_penalty: Optional[float] = None,
|
|
37
|
+
logprobs: Optional[bool] = None,
|
|
38
|
+
top_logprobs: Optional[int] = None,
|
|
39
|
+
max_tokens: Optional[int] = None,
|
|
40
|
+
presence_penalty: Optional[float] = None,
|
|
41
|
+
response_format: Optional[dict] = None,
|
|
42
|
+
stop: Optional[list[str]] = None,
|
|
43
|
+
temperature: Optional[float] = None,
|
|
44
|
+
tools: Optional[list[dict]] = None,
|
|
45
|
+
tool_choice: Optional[dict] = None,
|
|
46
|
+
top_p: Optional[float] = None,
|
|
47
|
+
) -> dict:
|
|
48
|
+
"""
|
|
49
|
+
Creates a model response for the given chat conversation.
|
|
50
|
+
|
|
51
|
+
Equivalent to the Deepseek `chat/completions` API endpoint.
|
|
52
|
+
For additional details, see: <https://api-docs.deepseek.com/api/create-chat-completion>
|
|
53
|
+
|
|
54
|
+
Deepseek uses the OpenAI SDK, so you will need to install the `openai` package to use this UDF.
|
|
55
|
+
|
|
56
|
+
__Requirements:__
|
|
57
|
+
|
|
58
|
+
- `pip install openai`
|
|
59
|
+
|
|
60
|
+
Args:
|
|
61
|
+
messages: A list of messages to use for chat completion, as described in the Deepseek API documentation.
|
|
62
|
+
model: The model to use for chat completion.
|
|
63
|
+
|
|
64
|
+
For details on the other parameters, see: <https://api-docs.deepseek.com/api/create-chat-completion>
|
|
65
|
+
|
|
66
|
+
Returns:
|
|
67
|
+
A dictionary containing the response and other metadata.
|
|
68
|
+
|
|
69
|
+
Examples:
|
|
70
|
+
Add a computed column that applies the model `deepseek-chat` to an existing Pixeltable column `tbl.prompt`
|
|
71
|
+
of the table `tbl`:
|
|
72
|
+
|
|
73
|
+
>>> messages = [
|
|
74
|
+
{'role': 'system', 'content': 'You are a helpful assistant.'},
|
|
75
|
+
{'role': 'user', 'content': tbl.prompt}
|
|
76
|
+
]
|
|
77
|
+
tbl.add_computed_column(response=chat_completions(messages, model='deepseek-chat'))
|
|
78
|
+
"""
|
|
79
|
+
if tools is not None:
|
|
80
|
+
tools = [{'type': 'function', 'function': tool} for tool in tools]
|
|
81
|
+
|
|
82
|
+
tool_choice_: Union[str, dict, None] = None
|
|
83
|
+
if tool_choice is not None:
|
|
84
|
+
if tool_choice['auto']:
|
|
85
|
+
tool_choice_ = 'auto'
|
|
86
|
+
elif tool_choice['required']:
|
|
87
|
+
tool_choice_ = 'required'
|
|
88
|
+
else:
|
|
89
|
+
assert tool_choice['tool'] is not None
|
|
90
|
+
tool_choice_ = {'type': 'function', 'function': {'name': tool_choice['tool']}}
|
|
91
|
+
|
|
92
|
+
extra_body: Optional[dict[str, Any]] = None
|
|
93
|
+
if tool_choice is not None and not tool_choice['parallel_tool_calls']:
|
|
94
|
+
extra_body = {'parallel_tool_calls': False}
|
|
95
|
+
|
|
96
|
+
# cast(Any, ...): avoid mypy errors
|
|
97
|
+
result = await _deepseek_client().chat.completions.with_raw_response.create(
|
|
98
|
+
messages=messages,
|
|
99
|
+
model=model,
|
|
100
|
+
frequency_penalty=_opt(frequency_penalty),
|
|
101
|
+
logprobs=_opt(logprobs),
|
|
102
|
+
top_logprobs=_opt(top_logprobs),
|
|
103
|
+
max_tokens=_opt(max_tokens),
|
|
104
|
+
presence_penalty=_opt(presence_penalty),
|
|
105
|
+
response_format=_opt(cast(Any, response_format)),
|
|
106
|
+
stop=_opt(stop),
|
|
107
|
+
temperature=_opt(temperature),
|
|
108
|
+
tools=_opt(cast(Any, tools)),
|
|
109
|
+
tool_choice=_opt(cast(Any, tool_choice_)),
|
|
110
|
+
top_p=_opt(top_p),
|
|
111
|
+
extra_body=extra_body,
|
|
112
|
+
)
|
|
113
|
+
|
|
114
|
+
return json.loads(result.text)
|
|
115
|
+
|
|
116
|
+
|
|
117
|
+
__all__ = local_public_names(__name__)
|
|
118
|
+
|
|
119
|
+
|
|
120
|
+
def __dir__():
|
|
121
|
+
return __all__
|
pixeltable/functions/image.py
CHANGED
|
@@ -131,6 +131,13 @@ def getchannel(self: PIL.Image.Image, channel: int) -> PIL.Image.Image:
|
|
|
131
131
|
pass
|
|
132
132
|
|
|
133
133
|
|
|
134
|
+
@getchannel.conditional_return_type
|
|
135
|
+
def _(self: Expr) -> pxt.ColumnType:
|
|
136
|
+
input_type = self.col_type
|
|
137
|
+
assert isinstance(input_type, pxt.ImageType)
|
|
138
|
+
return pxt.ImageType(size=input_type.size, mode='L', nullable=input_type.nullable)
|
|
139
|
+
|
|
140
|
+
|
|
134
141
|
@pxt.udf(is_method=True)
|
|
135
142
|
def get_metadata(self: PIL.Image.Image) -> dict:
|
|
136
143
|
"""
|
|
@@ -146,13 +153,6 @@ def get_metadata(self: PIL.Image.Image) -> dict:
|
|
|
146
153
|
}
|
|
147
154
|
|
|
148
155
|
|
|
149
|
-
@getchannel.conditional_return_type
|
|
150
|
-
def _(self: Expr) -> pxt.ColumnType:
|
|
151
|
-
input_type = self.col_type
|
|
152
|
-
assert isinstance(input_type, pxt.ImageType)
|
|
153
|
-
return pxt.ImageType(size=input_type.size, mode='L', nullable=input_type.nullable)
|
|
154
|
-
|
|
155
|
-
|
|
156
156
|
# Image.point()
|
|
157
157
|
@pxt.udf(is_method=True)
|
|
158
158
|
def point(self: PIL.Image.Image, lut: list[int], mode: Optional[str] = None) -> PIL.Image.Image:
|
pixeltable/functions/openai.py
CHANGED
|
@@ -325,10 +325,30 @@ async def translations(
|
|
|
325
325
|
|
|
326
326
|
|
|
327
327
|
def _default_max_tokens(model: str) -> int:
|
|
328
|
-
if
|
|
328
|
+
if (
|
|
329
|
+
_is_model_family(model, 'gpt-4o-realtime')
|
|
330
|
+
or _is_model_family(model, 'gpt-4o-mini-realtime')
|
|
331
|
+
or _is_model_family(model, 'gpt-4-turbo')
|
|
332
|
+
or _is_model_family(model, 'gpt-3.5-turbo')
|
|
333
|
+
):
|
|
334
|
+
return 4096
|
|
335
|
+
if _is_model_family(model, 'gpt-4'):
|
|
336
|
+
return 8192 # All other gpt-4 models (will not match on gpt-4o models)
|
|
337
|
+
if _is_model_family(model, 'gpt-4o') or _is_model_family(model, 'gpt-4.5-preview'):
|
|
338
|
+
return 16384 # All other gpt-4o / gpt-4.5 models
|
|
339
|
+
if _is_model_family(model, 'o1-preview'):
|
|
340
|
+
return 32768
|
|
341
|
+
if _is_model_family(model, 'o1-mini'):
|
|
329
342
|
return 65536
|
|
330
|
-
|
|
331
|
-
return
|
|
343
|
+
if _is_model_family(model, 'o1') or _is_model_family(model, 'o3'):
|
|
344
|
+
return 100000 # All other o1 / o3 models
|
|
345
|
+
return 100000 # global default
|
|
346
|
+
|
|
347
|
+
|
|
348
|
+
def _is_model_family(model: str, family: str) -> bool:
|
|
349
|
+
# `model.startswith(family)` would be a simpler match, but increases the risk of false positives.
|
|
350
|
+
# We use a slightly more complicated criterion to make things a little less error prone.
|
|
351
|
+
return model == family or model.startswith(f'{family}-')
|
|
332
352
|
|
|
333
353
|
|
|
334
354
|
def _chat_completions_get_request_resources(
|
|
@@ -427,9 +447,6 @@ async def chat_completions(
|
|
|
427
447
|
resource_pool, lambda: OpenAIRateLimitsInfo(_chat_completions_get_request_resources)
|
|
428
448
|
)
|
|
429
449
|
|
|
430
|
-
if max_completion_tokens is None and max_tokens is None:
|
|
431
|
-
max_completion_tokens = _default_max_tokens(model)
|
|
432
|
-
|
|
433
450
|
# cast(Any, ...): avoid mypy errors
|
|
434
451
|
result = await _openai_client().chat.completions.with_raw_response.create(
|
|
435
452
|
messages=messages,
|
|
@@ -555,9 +572,6 @@ async def vision(
|
|
|
555
572
|
resource_pool, lambda: OpenAIRateLimitsInfo(_vision_get_request_resources)
|
|
556
573
|
)
|
|
557
574
|
|
|
558
|
-
if max_completion_tokens is None and max_tokens is None:
|
|
559
|
-
max_completion_tokens = _default_max_tokens(model)
|
|
560
|
-
|
|
561
575
|
result = await _openai_client().chat.completions.with_raw_response.create(
|
|
562
576
|
messages=messages, # type: ignore
|
|
563
577
|
model=model,
|
|
@@ -802,10 +816,13 @@ def _openai_response_to_pxt_tool_calls(response: dict) -> Optional[dict]:
|
|
|
802
816
|
if 'tool_calls' not in response['choices'][0]['message'] or response['choices'][0]['message']['tool_calls'] is None:
|
|
803
817
|
return None
|
|
804
818
|
openai_tool_calls = response['choices'][0]['message']['tool_calls']
|
|
805
|
-
|
|
806
|
-
|
|
807
|
-
|
|
808
|
-
|
|
819
|
+
pxt_tool_calls: dict[str, list[dict[str, Any]]] = {}
|
|
820
|
+
for tool_call in openai_tool_calls:
|
|
821
|
+
tool_name = tool_call['function']['name']
|
|
822
|
+
if tool_name not in pxt_tool_calls:
|
|
823
|
+
pxt_tool_calls[tool_name] = []
|
|
824
|
+
pxt_tool_calls[tool_name].append({'args': json.loads(tool_call['function']['arguments'])})
|
|
825
|
+
return pxt_tool_calls
|
|
809
826
|
|
|
810
827
|
|
|
811
828
|
_T = TypeVar('_T')
|
pixeltable/functions/video.py
CHANGED
|
@@ -14,9 +14,9 @@ t.select(pxt_video.extract_audio(t.video_col)).collect()
|
|
|
14
14
|
import tempfile
|
|
15
15
|
import uuid
|
|
16
16
|
from pathlib import Path
|
|
17
|
-
from typing import Optional
|
|
17
|
+
from typing import Any, Optional
|
|
18
18
|
|
|
19
|
-
import av
|
|
19
|
+
import av
|
|
20
20
|
import numpy as np
|
|
21
21
|
import PIL.Image
|
|
22
22
|
|
|
@@ -53,10 +53,14 @@ class make_video(pxt.Aggregator):
|
|
|
53
53
|
Aggregator that creates a video from a sequence of images.
|
|
54
54
|
"""
|
|
55
55
|
|
|
56
|
+
container: Optional[av.container.OutputContainer]
|
|
57
|
+
stream: Optional[av.video.stream.VideoStream]
|
|
58
|
+
fps: int
|
|
59
|
+
|
|
56
60
|
def __init__(self, fps: int = 25):
|
|
57
61
|
"""follows https://pyav.org/docs/develop/cookbook/numpy.html#generating-video"""
|
|
58
|
-
self.container
|
|
59
|
-
self.stream
|
|
62
|
+
self.container = None
|
|
63
|
+
self.stream = None
|
|
60
64
|
self.fps = fps
|
|
61
65
|
|
|
62
66
|
def update(self, frame: PIL.Image.Image) -> None:
|
|
@@ -107,9 +111,10 @@ def extract_audio(
|
|
|
107
111
|
|
|
108
112
|
with av.open(output_filename, 'w', format=format) as output_container:
|
|
109
113
|
output_stream = output_container.add_stream(codec or default_codec)
|
|
114
|
+
assert isinstance(output_stream, av.audio.stream.AudioStream)
|
|
110
115
|
for packet in container.demux(audio_stream):
|
|
111
116
|
for frame in packet.decode():
|
|
112
|
-
output_container.mux(output_stream.encode(frame))
|
|
117
|
+
output_container.mux(output_stream.encode(frame)) # type: ignore[arg-type]
|
|
113
118
|
|
|
114
119
|
return output_filename
|
|
115
120
|
|
|
@@ -141,7 +146,7 @@ def __get_stream_metadata(stream: av.stream.Stream) -> dict:
|
|
|
141
146
|
return {'type': stream.type} # Currently unsupported
|
|
142
147
|
|
|
143
148
|
codec_context = stream.codec_context
|
|
144
|
-
codec_context_md = {
|
|
149
|
+
codec_context_md: dict[str, Any] = {
|
|
145
150
|
'name': codec_context.name,
|
|
146
151
|
'codec_tag': codec_context.codec_tag.encode('unicode-escape').decode('utf-8'),
|
|
147
152
|
'profile': codec_context.profile,
|
|
@@ -160,9 +165,11 @@ def __get_stream_metadata(stream: av.stream.Stream) -> dict:
|
|
|
160
165
|
|
|
161
166
|
if stream.type == 'audio':
|
|
162
167
|
# Additional metadata for audio
|
|
163
|
-
|
|
168
|
+
channels = getattr(stream.codec_context, 'channels', None)
|
|
169
|
+
codec_context_md['channels'] = int(channels) if channels is not None else None
|
|
164
170
|
else:
|
|
165
171
|
assert stream.type == 'video'
|
|
172
|
+
assert isinstance(stream, av.video.stream.VideoStream)
|
|
166
173
|
# Additional metadata for video
|
|
167
174
|
codec_context_md['pix_fmt'] = getattr(stream.codec_context, 'pix_fmt', None)
|
|
168
175
|
metadata.update(
|
pixeltable/globals.py
CHANGED
|
@@ -1,5 +1,6 @@
|
|
|
1
1
|
import dataclasses
|
|
2
2
|
import logging
|
|
3
|
+
import urllib.parse
|
|
3
4
|
from typing import Any, Iterable, Literal, Optional, Union
|
|
4
5
|
from uuid import UUID
|
|
5
6
|
|
|
@@ -10,7 +11,7 @@ from sqlalchemy.util.preloaded import orm
|
|
|
10
11
|
|
|
11
12
|
import pixeltable.exceptions as excs
|
|
12
13
|
import pixeltable.exprs as exprs
|
|
13
|
-
from pixeltable import DataFrame, catalog, func
|
|
14
|
+
from pixeltable import DataFrame, catalog, func, share
|
|
14
15
|
from pixeltable.catalog import Catalog
|
|
15
16
|
from pixeltable.dataframe import DataFrameResultSet
|
|
16
17
|
from pixeltable.env import Env
|
|
@@ -279,14 +280,16 @@ def create_view(
|
|
|
279
280
|
... view = pxt.create_view('my_view', tbl.where(tbl.col1 > 100), if_exists='replace_force')
|
|
280
281
|
"""
|
|
281
282
|
where: Optional[exprs.Expr] = None
|
|
283
|
+
select_list: Optional[list[tuple[exprs.Expr, Optional[str]]]] = None
|
|
282
284
|
if isinstance(base, catalog.Table):
|
|
283
285
|
tbl_version_path = base._tbl_version_path
|
|
284
286
|
elif isinstance(base, DataFrame):
|
|
285
|
-
base._validate_mutable('create_view')
|
|
287
|
+
base._validate_mutable('create_view', allow_select=True)
|
|
286
288
|
if len(base._from_clause.tbls) > 1:
|
|
287
289
|
raise excs.Error('Cannot create a view of a join')
|
|
288
290
|
tbl_version_path = base._from_clause.tbls[0]
|
|
289
291
|
where = base.where_clause
|
|
292
|
+
select_list = base.select_list
|
|
290
293
|
else:
|
|
291
294
|
raise excs.Error('`base` must be an instance of `Table` or `DataFrame`')
|
|
292
295
|
assert isinstance(base, catalog.Table) or isinstance(base, DataFrame)
|
|
@@ -322,6 +325,7 @@ def create_view(
|
|
|
322
325
|
dir._id,
|
|
323
326
|
path.name,
|
|
324
327
|
base=tbl_version_path,
|
|
328
|
+
select_list=select_list,
|
|
325
329
|
additional_columns=additional_columns,
|
|
326
330
|
predicate=where,
|
|
327
331
|
is_snapshot=is_snapshot,
|
|
@@ -630,7 +634,7 @@ def create_dir(
|
|
|
630
634
|
parent = cat.paths[path.parent]
|
|
631
635
|
assert parent is not None
|
|
632
636
|
with orm.Session(Env.get().engine, future=True) as session:
|
|
633
|
-
dir_md = schema.DirMd(name=path.name)
|
|
637
|
+
dir_md = schema.DirMd(name=path.name, user=None, additional_md={})
|
|
634
638
|
dir_record = schema.Dir(parent_id=parent._id, md=dataclasses.asdict(dir_md))
|
|
635
639
|
session.add(dir_record)
|
|
636
640
|
session.flush()
|
|
@@ -723,6 +727,13 @@ def drop_dir(path_str: str, force: bool = False, if_not_exists: Literal['error',
|
|
|
723
727
|
_logger.info(f'Removed directory `{path_str}`.')
|
|
724
728
|
|
|
725
729
|
|
|
730
|
+
def publish_snapshot(dest_uri: str, table: catalog.Table) -> None:
|
|
731
|
+
parsed_uri = urllib.parse.urlparse(dest_uri)
|
|
732
|
+
if parsed_uri.scheme != 'pxt':
|
|
733
|
+
raise excs.Error(f'Invalid Pixeltable URI (does not start with pxt://): {dest_uri}')
|
|
734
|
+
share.publish_snapshot(dest_uri, table)
|
|
735
|
+
|
|
736
|
+
|
|
726
737
|
def list_dirs(path_str: str = '', recursive: bool = True) -> list[str]:
|
|
727
738
|
"""List the directories in a directory.
|
|
728
739
|
|