pixeltable 0.3.4__py3-none-any.whl → 0.3.6__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of pixeltable might be problematic. Click here for more details.
- pixeltable/__init__.py +1 -0
- pixeltable/__version__.py +2 -2
- pixeltable/catalog/catalog.py +9 -2
- pixeltable/catalog/column.py +1 -1
- pixeltable/catalog/dir.py +1 -1
- pixeltable/catalog/table.py +3 -1
- pixeltable/catalog/table_version.py +12 -2
- pixeltable/catalog/table_version_path.py +2 -2
- pixeltable/catalog/view.py +64 -20
- pixeltable/dataframe.py +11 -6
- pixeltable/env.py +12 -0
- pixeltable/exec/expr_eval/evaluators.py +4 -2
- pixeltable/exec/expr_eval/expr_eval_node.py +4 -1
- pixeltable/exprs/comparison.py +8 -4
- pixeltable/exprs/data_row.py +9 -7
- pixeltable/exprs/expr.py +2 -2
- pixeltable/exprs/function_call.py +155 -313
- pixeltable/exprs/json_mapper.py +25 -8
- pixeltable/exprs/json_path.py +6 -5
- pixeltable/exprs/object_ref.py +16 -5
- pixeltable/exprs/row_builder.py +10 -3
- pixeltable/func/aggregate_function.py +29 -15
- pixeltable/func/callable_function.py +11 -8
- pixeltable/func/expr_template_function.py +3 -9
- pixeltable/func/function.py +148 -74
- pixeltable/func/signature.py +65 -30
- pixeltable/func/tools.py +26 -26
- pixeltable/func/udf.py +1 -1
- pixeltable/functions/__init__.py +1 -0
- pixeltable/functions/anthropic.py +9 -3
- pixeltable/functions/deepseek.py +121 -0
- pixeltable/functions/image.py +7 -7
- pixeltable/functions/openai.py +30 -13
- pixeltable/functions/video.py +14 -7
- pixeltable/globals.py +14 -3
- pixeltable/index/embedding_index.py +4 -13
- pixeltable/io/globals.py +88 -77
- pixeltable/io/hf_datasets.py +34 -34
- pixeltable/io/pandas.py +75 -76
- pixeltable/io/parquet.py +19 -27
- pixeltable/io/utils.py +115 -0
- pixeltable/iterators/audio.py +2 -1
- pixeltable/iterators/video.py +1 -1
- pixeltable/metadata/__init__.py +2 -1
- pixeltable/metadata/converters/convert_15.py +18 -8
- pixeltable/metadata/converters/convert_27.py +31 -0
- pixeltable/metadata/converters/convert_28.py +15 -0
- pixeltable/metadata/converters/convert_29.py +111 -0
- pixeltable/metadata/converters/util.py +12 -1
- pixeltable/metadata/notes.py +3 -0
- pixeltable/metadata/schema.py +8 -0
- pixeltable/share/__init__.py +1 -0
- pixeltable/share/packager.py +41 -13
- pixeltable/share/publish.py +97 -0
- pixeltable/type_system.py +40 -14
- pixeltable/utils/__init__.py +41 -0
- pixeltable/utils/arrow.py +40 -7
- pixeltable/utils/formatter.py +1 -1
- {pixeltable-0.3.4.dist-info → pixeltable-0.3.6.dist-info}/METADATA +34 -49
- {pixeltable-0.3.4.dist-info → pixeltable-0.3.6.dist-info}/RECORD +63 -57
- {pixeltable-0.3.4.dist-info → pixeltable-0.3.6.dist-info}/WHEEL +1 -1
- {pixeltable-0.3.4.dist-info → pixeltable-0.3.6.dist-info}/LICENSE +0 -0
- {pixeltable-0.3.4.dist-info → pixeltable-0.3.6.dist-info}/entry_points.txt +0 -0
pixeltable/exprs/json_mapper.py
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
from __future__ import annotations
|
|
2
2
|
|
|
3
|
-
from typing import Optional
|
|
3
|
+
from typing import TYPE_CHECKING, Optional
|
|
4
4
|
|
|
5
5
|
import sqlalchemy as sql
|
|
6
6
|
|
|
@@ -11,6 +11,9 @@ from .expr import _GLOBAL_SCOPE, Expr, ExprScope
|
|
|
11
11
|
from .row_builder import RowBuilder
|
|
12
12
|
from .sql_element_cache import SqlElementCache
|
|
13
13
|
|
|
14
|
+
if TYPE_CHECKING:
|
|
15
|
+
from .object_ref import ObjectRef
|
|
16
|
+
|
|
14
17
|
|
|
15
18
|
class JsonMapper(Expr):
|
|
16
19
|
"""
|
|
@@ -19,6 +22,10 @@ class JsonMapper(Expr):
|
|
|
19
22
|
is populated by JsonMapper.eval(). The JsonMapper effectively creates a new scope for its target expr.
|
|
20
23
|
"""
|
|
21
24
|
|
|
25
|
+
target_expr_scope: ExprScope
|
|
26
|
+
parent_mapper: Optional[JsonMapper]
|
|
27
|
+
target_expr_eval_ctx: Optional[RowBuilder.EvalCtx]
|
|
28
|
+
|
|
22
29
|
def __init__(self, src_expr: Expr, target_expr: Expr):
|
|
23
30
|
# TODO: type spec should be list[target_expr.col_type]
|
|
24
31
|
super().__init__(ts.JsonType())
|
|
@@ -29,12 +36,18 @@ class JsonMapper(Expr):
|
|
|
29
36
|
|
|
30
37
|
from .object_ref import ObjectRef
|
|
31
38
|
|
|
32
|
-
|
|
33
|
-
self.
|
|
34
|
-
self.
|
|
35
|
-
|
|
39
|
+
self.components = [src_expr, target_expr]
|
|
40
|
+
self.parent_mapper = None
|
|
41
|
+
self.target_expr_eval_ctx = None
|
|
42
|
+
|
|
43
|
+
# Intentionally create the id now, before adding the scope anchor; this ensures that JsonMappers will
|
|
44
|
+
# be recognized as equal so long as they have the same src_expr and target_expr.
|
|
45
|
+
# TODO: Might this cause problems after certain substitutions?
|
|
36
46
|
self.id = self._create_id()
|
|
37
47
|
|
|
48
|
+
scope_anchor = ObjectRef(self.target_expr_scope, self)
|
|
49
|
+
self.components.append(scope_anchor)
|
|
50
|
+
|
|
38
51
|
def bind_rel_paths(self, mapper: Optional[JsonMapper] = None) -> None:
|
|
39
52
|
self._src_expr.bind_rel_paths(mapper)
|
|
40
53
|
self._target_expr.bind_rel_paths(self)
|
|
@@ -84,8 +97,12 @@ class JsonMapper(Expr):
|
|
|
84
97
|
return self.components[1]
|
|
85
98
|
|
|
86
99
|
@property
|
|
87
|
-
def scope_anchor(self) ->
|
|
88
|
-
|
|
100
|
+
def scope_anchor(self) -> 'ObjectRef':
|
|
101
|
+
from .object_ref import ObjectRef
|
|
102
|
+
|
|
103
|
+
result = self.components[2]
|
|
104
|
+
assert isinstance(result, ObjectRef)
|
|
105
|
+
return result
|
|
89
106
|
|
|
90
107
|
def _equals(self, _: JsonMapper) -> bool:
|
|
91
108
|
return True
|
|
@@ -107,7 +124,7 @@ class JsonMapper(Expr):
|
|
|
107
124
|
for i, val in enumerate(src):
|
|
108
125
|
data_row[self.scope_anchor.slot_idx] = val
|
|
109
126
|
# stored target_expr
|
|
110
|
-
row_builder.eval(data_row, self.target_expr_eval_ctx)
|
|
127
|
+
row_builder.eval(data_row, self.target_expr_eval_ctx, force_eval=self._target_expr.scope())
|
|
111
128
|
result[i] = data_row[self._target_expr.slot_idx]
|
|
112
129
|
data_row[self.slot_idx] = result
|
|
113
130
|
|
pixeltable/exprs/json_path.py
CHANGED
|
@@ -43,11 +43,11 @@ class JsonPath(Expr):
|
|
|
43
43
|
self.id = self._create_id()
|
|
44
44
|
|
|
45
45
|
def __repr__(self) -> str:
|
|
46
|
-
# else
|
|
47
|
-
|
|
48
|
-
|
|
49
|
-
|
|
50
|
-
)
|
|
46
|
+
# else 'R': the anchor is RELATIVE_PATH_ROOT
|
|
47
|
+
anchor_str = str(self._anchor) if self._anchor is not None else 'R'
|
|
48
|
+
if len(self.path_elements) == 0:
|
|
49
|
+
return anchor_str
|
|
50
|
+
return f'{anchor_str}{"." if isinstance(self.path_elements[0], str) else ""}{self._json_path()}'
|
|
51
51
|
|
|
52
52
|
def _as_dict(self) -> dict:
|
|
53
53
|
path_elements = [[el.start, el.stop, el.step] if isinstance(el, slice) else el for el in self.path_elements]
|
|
@@ -158,6 +158,7 @@ class JsonPath(Expr):
|
|
|
158
158
|
return ''.join(result)
|
|
159
159
|
|
|
160
160
|
def eval(self, data_row: DataRow, row_builder: RowBuilder) -> None:
|
|
161
|
+
assert self._anchor is not None, self
|
|
161
162
|
val = data_row[self._anchor.slot_idx]
|
|
162
163
|
if self.compiled_path is not None:
|
|
163
164
|
val = self.compiled_path.search(val)
|
pixeltable/exprs/object_ref.py
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
from __future__ import annotations
|
|
2
2
|
|
|
3
|
-
from typing import Optional
|
|
3
|
+
from typing import Any, Optional
|
|
4
4
|
|
|
5
5
|
import sqlalchemy as sql
|
|
6
6
|
|
|
@@ -26,14 +26,22 @@ class ObjectRef(Expr):
|
|
|
26
26
|
self.owner = owner
|
|
27
27
|
self.id = self._create_id()
|
|
28
28
|
|
|
29
|
+
def _id_attrs(self) -> list[tuple[str, Any]]:
|
|
30
|
+
# We have no components, so we can't rely on the default behavior here (otherwise, all ObjectRef
|
|
31
|
+
# instances will be conflated into a single slot).
|
|
32
|
+
return [('addr', id(self))]
|
|
33
|
+
|
|
34
|
+
def substitute(self, subs: dict[Expr, Expr]) -> Expr:
|
|
35
|
+
# Just return self; we need to avoid creating a new id after doing the substitution, because otherwise
|
|
36
|
+
# we'll wind up in a situation where the scope_anchor of the enclosing JsonMapper is different from the
|
|
37
|
+
# nested ObjectRefs inside its target_expr (and therefore occupies a different slot_idx).
|
|
38
|
+
return self
|
|
39
|
+
|
|
29
40
|
def scope(self) -> ExprScope:
|
|
30
41
|
return self._scope
|
|
31
42
|
|
|
32
|
-
def __str__(self) -> str:
|
|
33
|
-
assert False
|
|
34
|
-
|
|
35
43
|
def _equals(self, other: ObjectRef) -> bool:
|
|
36
|
-
return self.
|
|
44
|
+
return self.id == other.id
|
|
37
45
|
|
|
38
46
|
def sql_expr(self, _: SqlElementCache) -> Optional[sql.ColumnElement]:
|
|
39
47
|
return None
|
|
@@ -41,3 +49,6 @@ class ObjectRef(Expr):
|
|
|
41
49
|
def eval(self, data_row: DataRow, row_builder: RowBuilder) -> None:
|
|
42
50
|
# this will be called, but the value has already been materialized elsewhere
|
|
43
51
|
pass
|
|
52
|
+
|
|
53
|
+
def __repr__(self) -> str:
|
|
54
|
+
return f'ObjectRef({self.owner}, {self.id}, {self.owner.id})'
|
pixeltable/exprs/row_builder.py
CHANGED
|
@@ -17,7 +17,7 @@ from pixeltable.env import Env
|
|
|
17
17
|
from pixeltable.utils.media_store import MediaStore
|
|
18
18
|
|
|
19
19
|
from .data_row import DataRow
|
|
20
|
-
from .expr import Expr
|
|
20
|
+
from .expr import Expr, ExprScope
|
|
21
21
|
from .expr_set import ExprSet
|
|
22
22
|
|
|
23
23
|
|
|
@@ -299,6 +299,7 @@ class RowBuilder:
|
|
|
299
299
|
# this is input and therefore doesn't depend on other exprs
|
|
300
300
|
continue
|
|
301
301
|
for d in expr.dependencies():
|
|
302
|
+
assert d.slot_idx is not None, f'{expr}, {d}'
|
|
302
303
|
if d.slot_idx in excluded_slot_idxs:
|
|
303
304
|
continue
|
|
304
305
|
dependencies[expr.slot_idx].add(d.slot_idx)
|
|
@@ -376,7 +377,12 @@ class RowBuilder:
|
|
|
376
377
|
data_row.set_exc(slot_idx, exc)
|
|
377
378
|
|
|
378
379
|
def eval(
|
|
379
|
-
self,
|
|
380
|
+
self,
|
|
381
|
+
data_row: DataRow,
|
|
382
|
+
ctx: EvalCtx,
|
|
383
|
+
profile: Optional[ExecProfile] = None,
|
|
384
|
+
ignore_errors: bool = False,
|
|
385
|
+
force_eval: Optional[ExprScope] = None,
|
|
380
386
|
) -> None:
|
|
381
387
|
"""
|
|
382
388
|
Populates the slots in data_row given in ctx.
|
|
@@ -384,10 +390,11 @@ class RowBuilder:
|
|
|
384
390
|
and omits any of that expr's dependents's eval().
|
|
385
391
|
profile: if present, populated with execution time of each expr.eval() call; indexed by expr.slot_idx
|
|
386
392
|
ignore_errors: if False, raises ExprEvalError if any expr.eval() raises an exception
|
|
393
|
+
force_eval: forces exprs in the specified scope to be reevaluated, even if they already have a value
|
|
387
394
|
"""
|
|
388
395
|
for expr in ctx.exprs:
|
|
389
396
|
assert expr.slot_idx >= 0
|
|
390
|
-
if data_row.has_val[expr.slot_idx] or data_row.has_exc(expr.slot_idx):
|
|
397
|
+
if expr.scope() != force_eval and (data_row.has_val[expr.slot_idx] or data_row.has_exc(expr.slot_idx)):
|
|
391
398
|
continue
|
|
392
399
|
try:
|
|
393
400
|
start_time = time.perf_counter()
|
|
@@ -12,7 +12,7 @@ from .globals import validate_symbol_path
|
|
|
12
12
|
from .signature import Parameter, Signature
|
|
13
13
|
|
|
14
14
|
if TYPE_CHECKING:
|
|
15
|
-
import
|
|
15
|
+
from pixeltable import exprs
|
|
16
16
|
|
|
17
17
|
|
|
18
18
|
class Aggregator(abc.ABC):
|
|
@@ -80,6 +80,8 @@ class AggregateFunction(Function):
|
|
|
80
80
|
"""Inspects the Aggregator class to infer the corresponding function signature. Returns the
|
|
81
81
|
inferred signature along with the list of init_param_names (for downstream error handling).
|
|
82
82
|
"""
|
|
83
|
+
from pixeltable import exprs
|
|
84
|
+
|
|
83
85
|
# infer type parameters; set return_type=InvalidType() because it has no meaning here
|
|
84
86
|
init_sig = Signature.create(
|
|
85
87
|
py_fn=cls.__init__, return_type=ts.InvalidType(), is_cls_method=True, type_substitutions=type_substitutions
|
|
@@ -102,14 +104,24 @@ class AggregateFunction(Function):
|
|
|
102
104
|
py_update_params = list(inspect.signature(cls.update).parameters.values())[1:] # leave out self
|
|
103
105
|
assert len(py_update_params) == len(update_types)
|
|
104
106
|
update_params = [
|
|
105
|
-
Parameter(
|
|
107
|
+
Parameter(
|
|
108
|
+
p.name,
|
|
109
|
+
col_type=update_types[i],
|
|
110
|
+
kind=p.kind,
|
|
111
|
+
default=exprs.Expr.from_object(p.default), # type: ignore[arg-type]
|
|
112
|
+
)
|
|
106
113
|
for i, p in enumerate(py_update_params)
|
|
107
114
|
]
|
|
108
115
|
# starting at 1: leave out self
|
|
109
116
|
py_init_params = list(inspect.signature(cls.__init__).parameters.values())[1:]
|
|
110
117
|
assert len(py_init_params) == len(init_types)
|
|
111
118
|
init_params = [
|
|
112
|
-
Parameter(
|
|
119
|
+
Parameter(
|
|
120
|
+
p.name,
|
|
121
|
+
col_type=init_types[i],
|
|
122
|
+
kind=inspect.Parameter.KEYWORD_ONLY,
|
|
123
|
+
default=exprs.Expr.from_object(p.default), # type: ignore[arg-type]
|
|
124
|
+
)
|
|
113
125
|
for i, p in enumerate(py_init_params)
|
|
114
126
|
]
|
|
115
127
|
duplicate_params = set(p.name for p in init_params) & set(p.name for p in update_params)
|
|
@@ -157,7 +169,7 @@ class AggregateFunction(Function):
|
|
|
157
169
|
res += '\n\n' + inspect.getdoc(self.agg_classes[0].update)
|
|
158
170
|
return res
|
|
159
171
|
|
|
160
|
-
def __call__(self, *args:
|
|
172
|
+
def __call__(self, *args: Any, **kwargs: Any) -> 'exprs.FunctionCall':
|
|
161
173
|
from pixeltable import exprs
|
|
162
174
|
|
|
163
175
|
# perform semantic analysis of special parameters 'order_by' and 'group_by'
|
|
@@ -194,29 +206,31 @@ class AggregateFunction(Function):
|
|
|
194
206
|
)
|
|
195
207
|
group_by_clause = kwargs.pop(self.GROUP_BY_PARAM)
|
|
196
208
|
|
|
209
|
+
args = [exprs.Expr.from_object(arg) for arg in args]
|
|
210
|
+
kwargs = {k: exprs.Expr.from_object(v) for k, v in kwargs.items()}
|
|
211
|
+
|
|
197
212
|
resolved_fn, bound_args = self._bind_to_matching_signature(args, kwargs)
|
|
198
|
-
return_type = resolved_fn.call_return_type(
|
|
213
|
+
return_type = resolved_fn.call_return_type(bound_args)
|
|
214
|
+
|
|
199
215
|
return exprs.FunctionCall(
|
|
200
216
|
resolved_fn,
|
|
201
|
-
|
|
217
|
+
args,
|
|
218
|
+
kwargs,
|
|
202
219
|
return_type,
|
|
203
220
|
order_by_clause=[order_by_clause] if order_by_clause is not None else [],
|
|
204
221
|
group_by_clause=[group_by_clause] if group_by_clause is not None else [],
|
|
205
222
|
)
|
|
206
223
|
|
|
207
|
-
def validate_call(self, bound_args: dict[str,
|
|
208
|
-
# check that init parameters are not Exprs
|
|
209
|
-
# TODO: do this in the planner (check that init parameters are either constants or only refer to grouping exprs)
|
|
224
|
+
def validate_call(self, bound_args: dict[str, 'exprs.Expr']) -> None:
|
|
210
225
|
from pixeltable import exprs
|
|
211
226
|
|
|
212
|
-
|
|
227
|
+
super().validate_call(bound_args)
|
|
213
228
|
|
|
229
|
+
# check that init parameters are not Exprs
|
|
230
|
+
# TODO: do this in the planner (check that init parameters are either constants or only refer to grouping exprs)
|
|
214
231
|
for param_name in self.init_param_names[0]:
|
|
215
|
-
if param_name in bound_args and isinstance(bound_args[param_name], exprs.
|
|
216
|
-
raise excs.Error(
|
|
217
|
-
f'{self.display_name}(): init() parameter {param_name} needs to be a constant, not a Pixeltable '
|
|
218
|
-
f'expression'
|
|
219
|
-
)
|
|
232
|
+
if param_name in bound_args and not isinstance(bound_args[param_name], exprs.Literal):
|
|
233
|
+
raise excs.Error(f'{self.display_name}(): init() parameter {param_name!r} must be a constant value')
|
|
220
234
|
|
|
221
235
|
def __repr__(self) -> str:
|
|
222
236
|
return f'<Pixeltable Aggregator {self.name}>'
|
|
@@ -2,7 +2,7 @@ from __future__ import annotations
|
|
|
2
2
|
|
|
3
3
|
import asyncio
|
|
4
4
|
import inspect
|
|
5
|
-
from typing import Any, Callable, Optional, Sequence
|
|
5
|
+
from typing import TYPE_CHECKING, Any, Callable, Optional, Sequence
|
|
6
6
|
from uuid import UUID
|
|
7
7
|
|
|
8
8
|
import cloudpickle # type: ignore[import-untyped]
|
|
@@ -12,6 +12,9 @@ import pixeltable.exceptions as excs
|
|
|
12
12
|
from .function import Function
|
|
13
13
|
from .signature import Signature
|
|
14
14
|
|
|
15
|
+
if TYPE_CHECKING:
|
|
16
|
+
from pixeltable import exprs
|
|
17
|
+
|
|
15
18
|
|
|
16
19
|
class CallableFunction(Function):
|
|
17
20
|
"""Pixeltable Function backed by a Python Callable.
|
|
@@ -192,18 +195,18 @@ class CallableFunction(Function):
|
|
|
192
195
|
batch_size = md['batch_size']
|
|
193
196
|
return CallableFunction([sig], [py_fn], self_name=name, batch_size=batch_size)
|
|
194
197
|
|
|
195
|
-
def validate_call(self, bound_args: dict[str,
|
|
198
|
+
def validate_call(self, bound_args: dict[str, 'exprs.Expr']) -> None:
|
|
196
199
|
from pixeltable import exprs
|
|
197
200
|
|
|
198
|
-
|
|
201
|
+
super().validate_call(bound_args)
|
|
199
202
|
if self.is_batched:
|
|
200
203
|
signature = self.signatures[0]
|
|
201
204
|
for param in signature.constant_parameters:
|
|
202
|
-
|
|
203
|
-
|
|
204
|
-
|
|
205
|
-
|
|
206
|
-
)
|
|
205
|
+
# Check that constant parameters map to constant arguments. It's ok for the argument to be a Variable,
|
|
206
|
+
# since in that case the FunctionCall is part of an unresolved template; the check will be done again
|
|
207
|
+
# when the template is fully resolved.
|
|
208
|
+
if param.name in bound_args and not isinstance(bound_args[param.name], (exprs.Literal, exprs.Variable)):
|
|
209
|
+
raise ValueError(f'{self.display_name}(): parameter {param.name} must be a constant value')
|
|
207
210
|
|
|
208
211
|
def __repr__(self) -> str:
|
|
209
212
|
return f'<Pixeltable UDF {self.name}>'
|
|
@@ -32,17 +32,11 @@ class ExprTemplate:
|
|
|
32
32
|
assert var.name in self.param_exprs, f"Variable '{var.name}' in expression is not a parameter"
|
|
33
33
|
|
|
34
34
|
# verify default values
|
|
35
|
-
self.defaults: dict[str, exprs.Literal] = {}
|
|
35
|
+
self.defaults: dict[str, exprs.Literal] = {}
|
|
36
36
|
for param in self.signature.parameters.values():
|
|
37
|
-
if param.default is
|
|
37
|
+
if param.default is None:
|
|
38
38
|
continue
|
|
39
|
-
|
|
40
|
-
try:
|
|
41
|
-
literal_default = exprs.Literal(param.default, col_type=param_expr.col_type)
|
|
42
|
-
self.defaults[param.name] = literal_default
|
|
43
|
-
except TypeError as e:
|
|
44
|
-
msg = str(e)
|
|
45
|
-
raise excs.Error(f"Default value for parameter '{param.name}': {msg[0].lower() + msg[1:]}")
|
|
39
|
+
self.defaults[param.name] = param.default
|
|
46
40
|
|
|
47
41
|
|
|
48
42
|
class ExprTemplateFunction(Function):
|
pixeltable/func/function.py
CHANGED
|
@@ -2,6 +2,7 @@ from __future__ import annotations
|
|
|
2
2
|
|
|
3
3
|
import importlib
|
|
4
4
|
import inspect
|
|
5
|
+
import typing
|
|
5
6
|
from abc import ABC, abstractmethod
|
|
6
7
|
from copy import copy
|
|
7
8
|
from typing import TYPE_CHECKING, Any, Callable, Optional, Sequence, cast
|
|
@@ -17,6 +18,8 @@ from .globals import resolve_symbol
|
|
|
17
18
|
from .signature import Signature
|
|
18
19
|
|
|
19
20
|
if TYPE_CHECKING:
|
|
21
|
+
from pixeltable import exprs
|
|
22
|
+
|
|
20
23
|
from .expr_template_function import ExprTemplate, ExprTemplateFunction
|
|
21
24
|
|
|
22
25
|
|
|
@@ -152,9 +155,22 @@ class Function(ABC):
|
|
|
152
155
|
def __call__(self, *args: Any, **kwargs: Any) -> 'pxt.exprs.FunctionCall':
|
|
153
156
|
from pixeltable import exprs
|
|
154
157
|
|
|
158
|
+
args = [exprs.Expr.from_object(arg) for arg in args]
|
|
159
|
+
kwargs = {k: exprs.Expr.from_object(v) for k, v in kwargs.items()}
|
|
160
|
+
|
|
161
|
+
for i, expr in enumerate(args):
|
|
162
|
+
if expr is None:
|
|
163
|
+
raise excs.Error(f'Argument {i + 1} in call to {self.self_path!r} is not a valid Pixeltable expression')
|
|
164
|
+
for param_name, expr in kwargs.items():
|
|
165
|
+
if expr is None:
|
|
166
|
+
raise excs.Error(
|
|
167
|
+
f'Argument {param_name!r} in call to {self.self_path!r} is not a valid Pixeltable expression'
|
|
168
|
+
)
|
|
169
|
+
|
|
155
170
|
resolved_fn, bound_args = self._bind_to_matching_signature(args, kwargs)
|
|
156
|
-
return_type = resolved_fn.call_return_type(
|
|
157
|
-
|
|
171
|
+
return_type = resolved_fn.call_return_type(bound_args)
|
|
172
|
+
|
|
173
|
+
return exprs.FunctionCall(resolved_fn, args, kwargs, return_type)
|
|
158
174
|
|
|
159
175
|
def _bind_to_matching_signature(self, args: Sequence[Any], kwargs: dict[str, Any]) -> tuple[Self, dict[str, Any]]:
|
|
160
176
|
result: int = -1
|
|
@@ -185,49 +201,115 @@ class Function(ABC):
|
|
|
185
201
|
|
|
186
202
|
signature = self.signatures[signature_idx]
|
|
187
203
|
bound_args = signature.py_signature.bind(*args, **kwargs).arguments
|
|
188
|
-
|
|
189
|
-
|
|
190
|
-
return
|
|
204
|
+
normalized_args = {k: exprs.Expr.from_object(v) for k, v in bound_args.items()}
|
|
205
|
+
self._resolved_fns[signature_idx].validate_call(normalized_args)
|
|
206
|
+
return normalized_args
|
|
191
207
|
|
|
192
|
-
def validate_call(self, bound_args: dict[str,
|
|
208
|
+
def validate_call(self, bound_args: dict[str, Optional['exprs.Expr']]) -> None:
|
|
193
209
|
"""Override this to do custom validation of the arguments"""
|
|
194
210
|
assert not self.is_polymorphic
|
|
211
|
+
self.signature.validate_args(bound_args, context=f'in function {self.name!r}')
|
|
195
212
|
|
|
196
|
-
def
|
|
197
|
-
"""Return the kwargs to pass to callable, given kwargs passed to this function"""
|
|
198
|
-
bound_args = self.signature.py_signature.bind(**kwargs).arguments
|
|
199
|
-
# add defaults to bound_args, if not already present
|
|
200
|
-
bound_args.update(
|
|
201
|
-
{
|
|
202
|
-
name: param.default
|
|
203
|
-
for name, param in self.signature.parameters.items()
|
|
204
|
-
if name not in bound_args and param.has_default()
|
|
205
|
-
}
|
|
206
|
-
)
|
|
207
|
-
result: dict[str, Any] = {}
|
|
208
|
-
sig = inspect.signature(callable)
|
|
209
|
-
for param in sig.parameters.values():
|
|
210
|
-
if param.name in bound_args:
|
|
211
|
-
result[param.name] = bound_args[param.name]
|
|
212
|
-
return result
|
|
213
|
-
|
|
214
|
-
def call_resource_pool(self, kwargs: dict[str, Any]) -> str:
|
|
213
|
+
def call_resource_pool(self, bound_args: dict[str, 'exprs.Expr']) -> str:
|
|
215
214
|
"""Return the resource pool to use for calling this function with the given arguments"""
|
|
216
|
-
|
|
217
|
-
|
|
218
|
-
|
|
219
|
-
|
|
215
|
+
rp_kwargs = self._assemble_callable_args(self._resource_pool, bound_args)
|
|
216
|
+
if rp_kwargs is None:
|
|
217
|
+
# TODO: What to do in this case? An example where this can happen is if model_id is not a constant
|
|
218
|
+
# in a call to one of the OpenAI endpoints.
|
|
219
|
+
raise excs.Error('Could not determine resource pool')
|
|
220
|
+
return self._resource_pool(**rp_kwargs)
|
|
221
|
+
|
|
222
|
+
def call_return_type(self, bound_args: dict[str, 'exprs.Expr']) -> ts.ColumnType:
|
|
220
223
|
"""Return the type of the value returned by calling this function with the given arguments"""
|
|
221
|
-
assert not self.is_polymorphic
|
|
222
224
|
if self._conditional_return_type is None:
|
|
223
|
-
return
|
|
224
|
-
|
|
225
|
-
|
|
226
|
-
|
|
227
|
-
|
|
225
|
+
# No conditional return type specified; use the default return type
|
|
226
|
+
return_type = self.signature.return_type
|
|
227
|
+
else:
|
|
228
|
+
crt_kwargs = self._assemble_callable_args(self._conditional_return_type, bound_args)
|
|
229
|
+
if crt_kwargs is None:
|
|
230
|
+
# A conditional return type is specified, but one of its arguments is not a constant.
|
|
231
|
+
# Use the default return type
|
|
232
|
+
return_type = self.signature.return_type
|
|
233
|
+
else:
|
|
234
|
+
# A conditional return type is specified and all its arguments are constants; use the specific
|
|
235
|
+
# call return type
|
|
236
|
+
return_type = self._conditional_return_type(**crt_kwargs)
|
|
237
|
+
|
|
238
|
+
if return_type.nullable:
|
|
239
|
+
return return_type
|
|
240
|
+
|
|
241
|
+
# If `return_type` is non-nullable, but the function call has a nullable input to any of its non-nullable
|
|
242
|
+
# parameters, then we need to make it nullable. This is because Pixeltable defaults a function output to
|
|
243
|
+
# `None` when any of its non-nullable inputs are `None`.
|
|
244
|
+
for arg_name, arg in bound_args.items():
|
|
245
|
+
param = self.signature.parameters[arg_name]
|
|
246
|
+
if param.kind in (inspect.Parameter.VAR_POSITIONAL, inspect.Parameter.VAR_KEYWORD):
|
|
247
|
+
continue
|
|
248
|
+
if arg.col_type.nullable and not param.col_type.nullable:
|
|
249
|
+
return_type = return_type.copy(nullable=True)
|
|
250
|
+
break
|
|
251
|
+
|
|
252
|
+
return return_type
|
|
253
|
+
|
|
254
|
+
def _assemble_callable_args(
|
|
255
|
+
self, callable: Callable, bound_args: dict[str, 'exprs.Expr']
|
|
256
|
+
) -> Optional[dict[str, Any]]:
|
|
257
|
+
"""
|
|
258
|
+
Return the kwargs to pass to callable, given bound_args passed to this function.
|
|
259
|
+
|
|
260
|
+
This is used by `conditional_return_type` and `get_resource_pool` to determine call-specific characteristics
|
|
261
|
+
of this function.
|
|
262
|
+
|
|
263
|
+
In both cases, the specified `Callable` takes a subset of the parameters of this Function, which may
|
|
264
|
+
be typed as either `Expr`s or Python values. Any parameters typed as Python values expect to see constants
|
|
265
|
+
(Literals); if the corresponding entries in `bound_args` are not constants, then the return value is None.
|
|
266
|
+
"""
|
|
267
|
+
from pixeltable import exprs
|
|
268
|
+
|
|
269
|
+
assert not self.is_polymorphic
|
|
270
|
+
|
|
271
|
+
callable_signature = inspect.signature(callable)
|
|
272
|
+
callable_type_hints = typing.get_type_hints(callable)
|
|
273
|
+
callable_args: dict[str, Any] = {}
|
|
274
|
+
|
|
275
|
+
for param in callable_signature.parameters.values():
|
|
276
|
+
assert param.name in self.signature.parameters
|
|
277
|
+
|
|
278
|
+
arg: exprs.Expr
|
|
228
279
|
if param.name in bound_args:
|
|
229
|
-
|
|
230
|
-
|
|
280
|
+
arg = bound_args[param.name]
|
|
281
|
+
elif self.signature.parameters[param.name].has_default():
|
|
282
|
+
arg = self.signature.parameters[param.name].default
|
|
283
|
+
else:
|
|
284
|
+
# This parameter is missing from bound_args and has no default value, so return None.
|
|
285
|
+
return None
|
|
286
|
+
assert isinstance(arg, exprs.Expr)
|
|
287
|
+
|
|
288
|
+
expects_expr: Optional[type[exprs.Expr]] = None
|
|
289
|
+
type_hint = callable_type_hints.get(param.name)
|
|
290
|
+
if typing.get_origin(type_hint) is not None:
|
|
291
|
+
type_hint = typing.get_origin(type_hint) # Remove type subscript if one exists
|
|
292
|
+
if isinstance(type_hint, type) and issubclass(type_hint, exprs.Expr):
|
|
293
|
+
# The callable expects an Expr for this parameter. We allow for the case where the
|
|
294
|
+
# callable requests a specific subtype of Expr.
|
|
295
|
+
expects_expr = type_hint
|
|
296
|
+
|
|
297
|
+
if expects_expr is not None:
|
|
298
|
+
# The callable is expecting `param.name` to be an Expr. Validate that it's of the appropriate type;
|
|
299
|
+
# otherwise return None.
|
|
300
|
+
if isinstance(arg, expects_expr):
|
|
301
|
+
callable_args[param.name] = arg
|
|
302
|
+
else:
|
|
303
|
+
return None
|
|
304
|
+
else:
|
|
305
|
+
# The callable is expecting `param.name` to be a constant Python value. Unpack a Literal if we find
|
|
306
|
+
# one; otherwise return None.
|
|
307
|
+
if isinstance(arg, exprs.Literal):
|
|
308
|
+
callable_args[param.name] = arg.val
|
|
309
|
+
else:
|
|
310
|
+
return None
|
|
311
|
+
|
|
312
|
+
return callable_args
|
|
231
313
|
|
|
232
314
|
def conditional_return_type(self, fn: Callable[..., ts.ColumnType]) -> Callable[..., ts.ColumnType]:
|
|
233
315
|
"""Instance decorator for specifying a conditional return type for this function"""
|
|
@@ -280,18 +362,38 @@ class Function(ABC):
|
|
|
280
362
|
raise excs.Error(f'Unknown parameter: {k}')
|
|
281
363
|
param = self.signature.parameters[k]
|
|
282
364
|
expr = exprs.Expr.from_object(v)
|
|
365
|
+
if not isinstance(expr, exprs.Literal):
|
|
366
|
+
raise excs.Error(f'Expected a constant value for parameter {k!r} in call to .using()')
|
|
283
367
|
if not param.col_type.is_supertype_of(expr.col_type):
|
|
284
|
-
raise excs.Error(f'Expected type `{param.col_type}` for parameter
|
|
285
|
-
bindings[k] =
|
|
368
|
+
raise excs.Error(f'Expected type `{param.col_type}` for parameter {k!r}; got `{expr.col_type}`')
|
|
369
|
+
bindings[k] = expr
|
|
286
370
|
|
|
287
371
|
residual_params = [p for p in self.signature.parameters.values() if p.name not in bindings]
|
|
288
372
|
|
|
289
|
-
# Bind each remaining parameter to a like-named variable
|
|
290
|
-
for
|
|
291
|
-
|
|
292
|
-
|
|
293
|
-
|
|
294
|
-
|
|
373
|
+
# Bind each remaining parameter to a like-named variable.
|
|
374
|
+
# Also construct the call arguments for the template function call. Variables become args when possible;
|
|
375
|
+
# otherwise, they are passed as kwargs.
|
|
376
|
+
template_args: list[exprs.Expr] = []
|
|
377
|
+
template_kwargs: dict[str, exprs.Expr] = {}
|
|
378
|
+
args_ok = True
|
|
379
|
+
for name, param in self.signature.parameters.items():
|
|
380
|
+
if name in bindings:
|
|
381
|
+
template_kwargs[name] = bindings[name]
|
|
382
|
+
args_ok = False
|
|
383
|
+
else:
|
|
384
|
+
var = exprs.Variable(name, param.col_type)
|
|
385
|
+
bindings[name] = var
|
|
386
|
+
if args_ok and param.kind in (
|
|
387
|
+
inspect.Parameter.POSITIONAL_ONLY,
|
|
388
|
+
inspect.Parameter.POSITIONAL_OR_KEYWORD,
|
|
389
|
+
):
|
|
390
|
+
template_args.append(var)
|
|
391
|
+
else:
|
|
392
|
+
template_kwargs[name] = var
|
|
393
|
+
args_ok = False
|
|
394
|
+
|
|
395
|
+
return_type = self.call_return_type(bindings)
|
|
396
|
+
call = exprs.FunctionCall(self, template_args, template_kwargs, return_type)
|
|
295
397
|
|
|
296
398
|
# Construct the (n-k)-ary signature of the new function. We use `call.col_type` for this, rather than
|
|
297
399
|
# `self.signature.return_type`, because the return type of the new function may be specialized via a
|
|
@@ -370,35 +472,7 @@ class Function(ABC):
|
|
|
370
472
|
assert 'signature' in d and d['signature'] is not None
|
|
371
473
|
instance = resolve_symbol(d['path'])
|
|
372
474
|
assert isinstance(instance, Function)
|
|
373
|
-
|
|
374
|
-
# Load the signature from the DB and check that it is still valid (i.e., is still consistent with a signature
|
|
375
|
-
# in the code).
|
|
376
|
-
signature = Signature.from_dict(d['signature'])
|
|
377
|
-
idx = instance.__find_matching_overload(signature)
|
|
378
|
-
if idx is None:
|
|
379
|
-
# No match; generate an informative error message.
|
|
380
|
-
signature_note_str = 'any of its signatures' if instance.is_polymorphic else 'its signature as'
|
|
381
|
-
instance_signature_str = (
|
|
382
|
-
f'{len(instance.signatures)} signatures' if instance.is_polymorphic else str(instance.signature)
|
|
383
|
-
)
|
|
384
|
-
# TODO: Handle this more gracefully (instead of failing the DB load, allow the DB load to succeed, but
|
|
385
|
-
# mark any enclosing FunctionCall as unusable). It's the same issue as dealing with a renamed UDF or
|
|
386
|
-
# FunctionCall return type mismatch.
|
|
387
|
-
raise excs.Error(
|
|
388
|
-
f'The signature stored in the database for the UDF `{instance.self_path}` no longer matches '
|
|
389
|
-
f'{signature_note_str} as currently defined in the code.\nThis probably means that the code for '
|
|
390
|
-
f'`{instance.self_path}` has changed in a backward-incompatible way.\n'
|
|
391
|
-
f'Signature in database: {signature}\n'
|
|
392
|
-
f'Signature in code: {instance_signature_str}'
|
|
393
|
-
)
|
|
394
|
-
# We found a match; specialize to the appropriate overload resolution (non-polymorphic form) and return that.
|
|
395
|
-
return instance._resolved_fns[idx]
|
|
396
|
-
|
|
397
|
-
def __find_matching_overload(self, sig: Signature) -> Optional[int]:
|
|
398
|
-
for idx, overload_sig in enumerate(self.signatures):
|
|
399
|
-
if sig.is_consistent_with(overload_sig):
|
|
400
|
-
return idx
|
|
401
|
-
return None
|
|
475
|
+
return instance
|
|
402
476
|
|
|
403
477
|
def to_store(self) -> tuple[dict, bytes]:
|
|
404
478
|
"""
|