pixeltable 0.3.4__py3-none-any.whl → 0.3.5__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of pixeltable might be problematic. Click here for more details.
- pixeltable/__init__.py +1 -0
- pixeltable/__version__.py +2 -2
- pixeltable/catalog/catalog.py +9 -2
- pixeltable/catalog/column.py +1 -1
- pixeltable/catalog/dir.py +1 -1
- pixeltable/catalog/table.py +1 -1
- pixeltable/catalog/table_version.py +12 -2
- pixeltable/catalog/table_version_path.py +2 -2
- pixeltable/catalog/view.py +64 -20
- pixeltable/dataframe.py +10 -5
- pixeltable/env.py +12 -0
- pixeltable/exec/expr_eval/evaluators.py +4 -2
- pixeltable/exec/expr_eval/expr_eval_node.py +4 -1
- pixeltable/exprs/comparison.py +8 -4
- pixeltable/exprs/data_row.py +5 -3
- pixeltable/exprs/expr.py +2 -2
- pixeltable/exprs/function_call.py +155 -313
- pixeltable/func/aggregate_function.py +29 -15
- pixeltable/func/callable_function.py +11 -8
- pixeltable/func/expr_template_function.py +3 -9
- pixeltable/func/function.py +148 -74
- pixeltable/func/signature.py +65 -30
- pixeltable/func/udf.py +1 -1
- pixeltable/functions/__init__.py +1 -0
- pixeltable/functions/deepseek.py +121 -0
- pixeltable/functions/image.py +7 -7
- pixeltable/functions/openai.py +23 -9
- pixeltable/functions/video.py +14 -7
- pixeltable/globals.py +14 -3
- pixeltable/index/embedding_index.py +4 -13
- pixeltable/io/globals.py +88 -77
- pixeltable/io/hf_datasets.py +34 -34
- pixeltable/io/pandas.py +75 -76
- pixeltable/io/parquet.py +19 -27
- pixeltable/io/utils.py +115 -0
- pixeltable/iterators/audio.py +2 -1
- pixeltable/iterators/video.py +1 -1
- pixeltable/metadata/__init__.py +2 -1
- pixeltable/metadata/converters/convert_15.py +18 -8
- pixeltable/metadata/converters/convert_27.py +31 -0
- pixeltable/metadata/converters/convert_28.py +15 -0
- pixeltable/metadata/converters/convert_29.py +111 -0
- pixeltable/metadata/converters/util.py +12 -1
- pixeltable/metadata/notes.py +3 -0
- pixeltable/metadata/schema.py +8 -0
- pixeltable/share/__init__.py +1 -0
- pixeltable/share/packager.py +41 -13
- pixeltable/share/publish.py +97 -0
- pixeltable/type_system.py +40 -14
- pixeltable/utils/__init__.py +41 -0
- pixeltable/utils/arrow.py +40 -7
- pixeltable/utils/formatter.py +1 -1
- {pixeltable-0.3.4.dist-info → pixeltable-0.3.5.dist-info}/METADATA +34 -49
- {pixeltable-0.3.4.dist-info → pixeltable-0.3.5.dist-info}/RECORD +57 -51
- {pixeltable-0.3.4.dist-info → pixeltable-0.3.5.dist-info}/WHEEL +1 -1
- {pixeltable-0.3.4.dist-info → pixeltable-0.3.5.dist-info}/LICENSE +0 -0
- {pixeltable-0.3.4.dist-info → pixeltable-0.3.5.dist-info}/entry_points.txt +0 -0
|
@@ -12,7 +12,7 @@ from .globals import validate_symbol_path
|
|
|
12
12
|
from .signature import Parameter, Signature
|
|
13
13
|
|
|
14
14
|
if TYPE_CHECKING:
|
|
15
|
-
import
|
|
15
|
+
from pixeltable import exprs
|
|
16
16
|
|
|
17
17
|
|
|
18
18
|
class Aggregator(abc.ABC):
|
|
@@ -80,6 +80,8 @@ class AggregateFunction(Function):
|
|
|
80
80
|
"""Inspects the Aggregator class to infer the corresponding function signature. Returns the
|
|
81
81
|
inferred signature along with the list of init_param_names (for downstream error handling).
|
|
82
82
|
"""
|
|
83
|
+
from pixeltable import exprs
|
|
84
|
+
|
|
83
85
|
# infer type parameters; set return_type=InvalidType() because it has no meaning here
|
|
84
86
|
init_sig = Signature.create(
|
|
85
87
|
py_fn=cls.__init__, return_type=ts.InvalidType(), is_cls_method=True, type_substitutions=type_substitutions
|
|
@@ -102,14 +104,24 @@ class AggregateFunction(Function):
|
|
|
102
104
|
py_update_params = list(inspect.signature(cls.update).parameters.values())[1:] # leave out self
|
|
103
105
|
assert len(py_update_params) == len(update_types)
|
|
104
106
|
update_params = [
|
|
105
|
-
Parameter(
|
|
107
|
+
Parameter(
|
|
108
|
+
p.name,
|
|
109
|
+
col_type=update_types[i],
|
|
110
|
+
kind=p.kind,
|
|
111
|
+
default=exprs.Expr.from_object(p.default), # type: ignore[arg-type]
|
|
112
|
+
)
|
|
106
113
|
for i, p in enumerate(py_update_params)
|
|
107
114
|
]
|
|
108
115
|
# starting at 1: leave out self
|
|
109
116
|
py_init_params = list(inspect.signature(cls.__init__).parameters.values())[1:]
|
|
110
117
|
assert len(py_init_params) == len(init_types)
|
|
111
118
|
init_params = [
|
|
112
|
-
Parameter(
|
|
119
|
+
Parameter(
|
|
120
|
+
p.name,
|
|
121
|
+
col_type=init_types[i],
|
|
122
|
+
kind=inspect.Parameter.KEYWORD_ONLY,
|
|
123
|
+
default=exprs.Expr.from_object(p.default), # type: ignore[arg-type]
|
|
124
|
+
)
|
|
113
125
|
for i, p in enumerate(py_init_params)
|
|
114
126
|
]
|
|
115
127
|
duplicate_params = set(p.name for p in init_params) & set(p.name for p in update_params)
|
|
@@ -157,7 +169,7 @@ class AggregateFunction(Function):
|
|
|
157
169
|
res += '\n\n' + inspect.getdoc(self.agg_classes[0].update)
|
|
158
170
|
return res
|
|
159
171
|
|
|
160
|
-
def __call__(self, *args:
|
|
172
|
+
def __call__(self, *args: Any, **kwargs: Any) -> 'exprs.FunctionCall':
|
|
161
173
|
from pixeltable import exprs
|
|
162
174
|
|
|
163
175
|
# perform semantic analysis of special parameters 'order_by' and 'group_by'
|
|
@@ -194,29 +206,31 @@ class AggregateFunction(Function):
|
|
|
194
206
|
)
|
|
195
207
|
group_by_clause = kwargs.pop(self.GROUP_BY_PARAM)
|
|
196
208
|
|
|
209
|
+
args = [exprs.Expr.from_object(arg) for arg in args]
|
|
210
|
+
kwargs = {k: exprs.Expr.from_object(v) for k, v in kwargs.items()}
|
|
211
|
+
|
|
197
212
|
resolved_fn, bound_args = self._bind_to_matching_signature(args, kwargs)
|
|
198
|
-
return_type = resolved_fn.call_return_type(
|
|
213
|
+
return_type = resolved_fn.call_return_type(bound_args)
|
|
214
|
+
|
|
199
215
|
return exprs.FunctionCall(
|
|
200
216
|
resolved_fn,
|
|
201
|
-
|
|
217
|
+
args,
|
|
218
|
+
kwargs,
|
|
202
219
|
return_type,
|
|
203
220
|
order_by_clause=[order_by_clause] if order_by_clause is not None else [],
|
|
204
221
|
group_by_clause=[group_by_clause] if group_by_clause is not None else [],
|
|
205
222
|
)
|
|
206
223
|
|
|
207
|
-
def validate_call(self, bound_args: dict[str,
|
|
208
|
-
# check that init parameters are not Exprs
|
|
209
|
-
# TODO: do this in the planner (check that init parameters are either constants or only refer to grouping exprs)
|
|
224
|
+
def validate_call(self, bound_args: dict[str, 'exprs.Expr']) -> None:
|
|
210
225
|
from pixeltable import exprs
|
|
211
226
|
|
|
212
|
-
|
|
227
|
+
super().validate_call(bound_args)
|
|
213
228
|
|
|
229
|
+
# check that init parameters are not Exprs
|
|
230
|
+
# TODO: do this in the planner (check that init parameters are either constants or only refer to grouping exprs)
|
|
214
231
|
for param_name in self.init_param_names[0]:
|
|
215
|
-
if param_name in bound_args and isinstance(bound_args[param_name], exprs.
|
|
216
|
-
raise excs.Error(
|
|
217
|
-
f'{self.display_name}(): init() parameter {param_name} needs to be a constant, not a Pixeltable '
|
|
218
|
-
f'expression'
|
|
219
|
-
)
|
|
232
|
+
if param_name in bound_args and not isinstance(bound_args[param_name], exprs.Literal):
|
|
233
|
+
raise excs.Error(f'{self.display_name}(): init() parameter {param_name!r} must be a constant value')
|
|
220
234
|
|
|
221
235
|
def __repr__(self) -> str:
|
|
222
236
|
return f'<Pixeltable Aggregator {self.name}>'
|
|
@@ -2,7 +2,7 @@ from __future__ import annotations
|
|
|
2
2
|
|
|
3
3
|
import asyncio
|
|
4
4
|
import inspect
|
|
5
|
-
from typing import Any, Callable, Optional, Sequence
|
|
5
|
+
from typing import TYPE_CHECKING, Any, Callable, Optional, Sequence
|
|
6
6
|
from uuid import UUID
|
|
7
7
|
|
|
8
8
|
import cloudpickle # type: ignore[import-untyped]
|
|
@@ -12,6 +12,9 @@ import pixeltable.exceptions as excs
|
|
|
12
12
|
from .function import Function
|
|
13
13
|
from .signature import Signature
|
|
14
14
|
|
|
15
|
+
if TYPE_CHECKING:
|
|
16
|
+
from pixeltable import exprs
|
|
17
|
+
|
|
15
18
|
|
|
16
19
|
class CallableFunction(Function):
|
|
17
20
|
"""Pixeltable Function backed by a Python Callable.
|
|
@@ -192,18 +195,18 @@ class CallableFunction(Function):
|
|
|
192
195
|
batch_size = md['batch_size']
|
|
193
196
|
return CallableFunction([sig], [py_fn], self_name=name, batch_size=batch_size)
|
|
194
197
|
|
|
195
|
-
def validate_call(self, bound_args: dict[str,
|
|
198
|
+
def validate_call(self, bound_args: dict[str, 'exprs.Expr']) -> None:
|
|
196
199
|
from pixeltable import exprs
|
|
197
200
|
|
|
198
|
-
|
|
201
|
+
super().validate_call(bound_args)
|
|
199
202
|
if self.is_batched:
|
|
200
203
|
signature = self.signatures[0]
|
|
201
204
|
for param in signature.constant_parameters:
|
|
202
|
-
|
|
203
|
-
|
|
204
|
-
|
|
205
|
-
|
|
206
|
-
)
|
|
205
|
+
# Check that constant parameters map to constant arguments. It's ok for the argument to be a Variable,
|
|
206
|
+
# since in that case the FunctionCall is part of an unresolved template; the check will be done again
|
|
207
|
+
# when the template is fully resolved.
|
|
208
|
+
if param.name in bound_args and not isinstance(bound_args[param.name], (exprs.Literal, exprs.Variable)):
|
|
209
|
+
raise ValueError(f'{self.display_name}(): parameter {param.name} must be a constant value')
|
|
207
210
|
|
|
208
211
|
def __repr__(self) -> str:
|
|
209
212
|
return f'<Pixeltable UDF {self.name}>'
|
|
@@ -32,17 +32,11 @@ class ExprTemplate:
|
|
|
32
32
|
assert var.name in self.param_exprs, f"Variable '{var.name}' in expression is not a parameter"
|
|
33
33
|
|
|
34
34
|
# verify default values
|
|
35
|
-
self.defaults: dict[str, exprs.Literal] = {}
|
|
35
|
+
self.defaults: dict[str, exprs.Literal] = {}
|
|
36
36
|
for param in self.signature.parameters.values():
|
|
37
|
-
if param.default is
|
|
37
|
+
if param.default is None:
|
|
38
38
|
continue
|
|
39
|
-
|
|
40
|
-
try:
|
|
41
|
-
literal_default = exprs.Literal(param.default, col_type=param_expr.col_type)
|
|
42
|
-
self.defaults[param.name] = literal_default
|
|
43
|
-
except TypeError as e:
|
|
44
|
-
msg = str(e)
|
|
45
|
-
raise excs.Error(f"Default value for parameter '{param.name}': {msg[0].lower() + msg[1:]}")
|
|
39
|
+
self.defaults[param.name] = param.default
|
|
46
40
|
|
|
47
41
|
|
|
48
42
|
class ExprTemplateFunction(Function):
|
pixeltable/func/function.py
CHANGED
|
@@ -2,6 +2,7 @@ from __future__ import annotations
|
|
|
2
2
|
|
|
3
3
|
import importlib
|
|
4
4
|
import inspect
|
|
5
|
+
import typing
|
|
5
6
|
from abc import ABC, abstractmethod
|
|
6
7
|
from copy import copy
|
|
7
8
|
from typing import TYPE_CHECKING, Any, Callable, Optional, Sequence, cast
|
|
@@ -17,6 +18,8 @@ from .globals import resolve_symbol
|
|
|
17
18
|
from .signature import Signature
|
|
18
19
|
|
|
19
20
|
if TYPE_CHECKING:
|
|
21
|
+
from pixeltable import exprs
|
|
22
|
+
|
|
20
23
|
from .expr_template_function import ExprTemplate, ExprTemplateFunction
|
|
21
24
|
|
|
22
25
|
|
|
@@ -152,9 +155,22 @@ class Function(ABC):
|
|
|
152
155
|
def __call__(self, *args: Any, **kwargs: Any) -> 'pxt.exprs.FunctionCall':
|
|
153
156
|
from pixeltable import exprs
|
|
154
157
|
|
|
158
|
+
args = [exprs.Expr.from_object(arg) for arg in args]
|
|
159
|
+
kwargs = {k: exprs.Expr.from_object(v) for k, v in kwargs.items()}
|
|
160
|
+
|
|
161
|
+
for i, expr in enumerate(args):
|
|
162
|
+
if expr is None:
|
|
163
|
+
raise excs.Error(f'Argument {i + 1} in call to {self.self_path!r} is not a valid Pixeltable expression')
|
|
164
|
+
for param_name, expr in kwargs.items():
|
|
165
|
+
if expr is None:
|
|
166
|
+
raise excs.Error(
|
|
167
|
+
f'Argument {param_name!r} in call to {self.self_path!r} is not a valid Pixeltable expression'
|
|
168
|
+
)
|
|
169
|
+
|
|
155
170
|
resolved_fn, bound_args = self._bind_to_matching_signature(args, kwargs)
|
|
156
|
-
return_type = resolved_fn.call_return_type(
|
|
157
|
-
|
|
171
|
+
return_type = resolved_fn.call_return_type(bound_args)
|
|
172
|
+
|
|
173
|
+
return exprs.FunctionCall(resolved_fn, args, kwargs, return_type)
|
|
158
174
|
|
|
159
175
|
def _bind_to_matching_signature(self, args: Sequence[Any], kwargs: dict[str, Any]) -> tuple[Self, dict[str, Any]]:
|
|
160
176
|
result: int = -1
|
|
@@ -185,49 +201,115 @@ class Function(ABC):
|
|
|
185
201
|
|
|
186
202
|
signature = self.signatures[signature_idx]
|
|
187
203
|
bound_args = signature.py_signature.bind(*args, **kwargs).arguments
|
|
188
|
-
|
|
189
|
-
|
|
190
|
-
return
|
|
204
|
+
normalized_args = {k: exprs.Expr.from_object(v) for k, v in bound_args.items()}
|
|
205
|
+
self._resolved_fns[signature_idx].validate_call(normalized_args)
|
|
206
|
+
return normalized_args
|
|
191
207
|
|
|
192
|
-
def validate_call(self, bound_args: dict[str,
|
|
208
|
+
def validate_call(self, bound_args: dict[str, Optional['exprs.Expr']]) -> None:
|
|
193
209
|
"""Override this to do custom validation of the arguments"""
|
|
194
210
|
assert not self.is_polymorphic
|
|
211
|
+
self.signature.validate_args(bound_args, context=f'in function {self.name!r}')
|
|
195
212
|
|
|
196
|
-
def
|
|
197
|
-
"""Return the kwargs to pass to callable, given kwargs passed to this function"""
|
|
198
|
-
bound_args = self.signature.py_signature.bind(**kwargs).arguments
|
|
199
|
-
# add defaults to bound_args, if not already present
|
|
200
|
-
bound_args.update(
|
|
201
|
-
{
|
|
202
|
-
name: param.default
|
|
203
|
-
for name, param in self.signature.parameters.items()
|
|
204
|
-
if name not in bound_args and param.has_default()
|
|
205
|
-
}
|
|
206
|
-
)
|
|
207
|
-
result: dict[str, Any] = {}
|
|
208
|
-
sig = inspect.signature(callable)
|
|
209
|
-
for param in sig.parameters.values():
|
|
210
|
-
if param.name in bound_args:
|
|
211
|
-
result[param.name] = bound_args[param.name]
|
|
212
|
-
return result
|
|
213
|
-
|
|
214
|
-
def call_resource_pool(self, kwargs: dict[str, Any]) -> str:
|
|
213
|
+
def call_resource_pool(self, bound_args: dict[str, 'exprs.Expr']) -> str:
|
|
215
214
|
"""Return the resource pool to use for calling this function with the given arguments"""
|
|
216
|
-
|
|
217
|
-
|
|
218
|
-
|
|
219
|
-
|
|
215
|
+
rp_kwargs = self._assemble_callable_args(self._resource_pool, bound_args)
|
|
216
|
+
if rp_kwargs is None:
|
|
217
|
+
# TODO: What to do in this case? An example where this can happen is if model_id is not a constant
|
|
218
|
+
# in a call to one of the OpenAI endpoints.
|
|
219
|
+
raise excs.Error('Could not determine resource pool')
|
|
220
|
+
return self._resource_pool(**rp_kwargs)
|
|
221
|
+
|
|
222
|
+
def call_return_type(self, bound_args: dict[str, 'exprs.Expr']) -> ts.ColumnType:
|
|
220
223
|
"""Return the type of the value returned by calling this function with the given arguments"""
|
|
221
|
-
assert not self.is_polymorphic
|
|
222
224
|
if self._conditional_return_type is None:
|
|
223
|
-
return
|
|
224
|
-
|
|
225
|
-
|
|
226
|
-
|
|
227
|
-
|
|
225
|
+
# No conditional return type specified; use the default return type
|
|
226
|
+
return_type = self.signature.return_type
|
|
227
|
+
else:
|
|
228
|
+
crt_kwargs = self._assemble_callable_args(self._conditional_return_type, bound_args)
|
|
229
|
+
if crt_kwargs is None:
|
|
230
|
+
# A conditional return type is specified, but one of its arguments is not a constant.
|
|
231
|
+
# Use the default return type
|
|
232
|
+
return_type = self.signature.return_type
|
|
233
|
+
else:
|
|
234
|
+
# A conditional return type is specified and all its arguments are constants; use the specific
|
|
235
|
+
# call return type
|
|
236
|
+
return_type = self._conditional_return_type(**crt_kwargs)
|
|
237
|
+
|
|
238
|
+
if return_type.nullable:
|
|
239
|
+
return return_type
|
|
240
|
+
|
|
241
|
+
# If `return_type` is non-nullable, but the function call has a nullable input to any of its non-nullable
|
|
242
|
+
# parameters, then we need to make it nullable. This is because Pixeltable defaults a function output to
|
|
243
|
+
# `None` when any of its non-nullable inputs are `None`.
|
|
244
|
+
for arg_name, arg in bound_args.items():
|
|
245
|
+
param = self.signature.parameters[arg_name]
|
|
246
|
+
if param.kind in (inspect.Parameter.VAR_POSITIONAL, inspect.Parameter.VAR_KEYWORD):
|
|
247
|
+
continue
|
|
248
|
+
if arg.col_type.nullable and not param.col_type.nullable:
|
|
249
|
+
return_type = return_type.copy(nullable=True)
|
|
250
|
+
break
|
|
251
|
+
|
|
252
|
+
return return_type
|
|
253
|
+
|
|
254
|
+
def _assemble_callable_args(
|
|
255
|
+
self, callable: Callable, bound_args: dict[str, 'exprs.Expr']
|
|
256
|
+
) -> Optional[dict[str, Any]]:
|
|
257
|
+
"""
|
|
258
|
+
Return the kwargs to pass to callable, given bound_args passed to this function.
|
|
259
|
+
|
|
260
|
+
This is used by `conditional_return_type` and `get_resource_pool` to determine call-specific characteristics
|
|
261
|
+
of this function.
|
|
262
|
+
|
|
263
|
+
In both cases, the specified `Callable` takes a subset of the parameters of this Function, which may
|
|
264
|
+
be typed as either `Expr`s or Python values. Any parameters typed as Python values expect to see constants
|
|
265
|
+
(Literals); if the corresponding entries in `bound_args` are not constants, then the return value is None.
|
|
266
|
+
"""
|
|
267
|
+
from pixeltable import exprs
|
|
268
|
+
|
|
269
|
+
assert not self.is_polymorphic
|
|
270
|
+
|
|
271
|
+
callable_signature = inspect.signature(callable)
|
|
272
|
+
callable_type_hints = typing.get_type_hints(callable)
|
|
273
|
+
callable_args: dict[str, Any] = {}
|
|
274
|
+
|
|
275
|
+
for param in callable_signature.parameters.values():
|
|
276
|
+
assert param.name in self.signature.parameters
|
|
277
|
+
|
|
278
|
+
arg: exprs.Expr
|
|
228
279
|
if param.name in bound_args:
|
|
229
|
-
|
|
230
|
-
|
|
280
|
+
arg = bound_args[param.name]
|
|
281
|
+
elif self.signature.parameters[param.name].has_default():
|
|
282
|
+
arg = self.signature.parameters[param.name].default
|
|
283
|
+
else:
|
|
284
|
+
# This parameter is missing from bound_args and has no default value, so return None.
|
|
285
|
+
return None
|
|
286
|
+
assert isinstance(arg, exprs.Expr)
|
|
287
|
+
|
|
288
|
+
expects_expr: Optional[type[exprs.Expr]] = None
|
|
289
|
+
type_hint = callable_type_hints.get(param.name)
|
|
290
|
+
if typing.get_origin(type_hint) is not None:
|
|
291
|
+
type_hint = typing.get_origin(type_hint) # Remove type subscript if one exists
|
|
292
|
+
if isinstance(type_hint, type) and issubclass(type_hint, exprs.Expr):
|
|
293
|
+
# The callable expects an Expr for this parameter. We allow for the case where the
|
|
294
|
+
# callable requests a specific subtype of Expr.
|
|
295
|
+
expects_expr = type_hint
|
|
296
|
+
|
|
297
|
+
if expects_expr is not None:
|
|
298
|
+
# The callable is expecting `param.name` to be an Expr. Validate that it's of the appropriate type;
|
|
299
|
+
# otherwise return None.
|
|
300
|
+
if isinstance(arg, expects_expr):
|
|
301
|
+
callable_args[param.name] = arg
|
|
302
|
+
else:
|
|
303
|
+
return None
|
|
304
|
+
else:
|
|
305
|
+
# The callable is expecting `param.name` to be a constant Python value. Unpack a Literal if we find
|
|
306
|
+
# one; otherwise return None.
|
|
307
|
+
if isinstance(arg, exprs.Literal):
|
|
308
|
+
callable_args[param.name] = arg.val
|
|
309
|
+
else:
|
|
310
|
+
return None
|
|
311
|
+
|
|
312
|
+
return callable_args
|
|
231
313
|
|
|
232
314
|
def conditional_return_type(self, fn: Callable[..., ts.ColumnType]) -> Callable[..., ts.ColumnType]:
|
|
233
315
|
"""Instance decorator for specifying a conditional return type for this function"""
|
|
@@ -280,18 +362,38 @@ class Function(ABC):
|
|
|
280
362
|
raise excs.Error(f'Unknown parameter: {k}')
|
|
281
363
|
param = self.signature.parameters[k]
|
|
282
364
|
expr = exprs.Expr.from_object(v)
|
|
365
|
+
if not isinstance(expr, exprs.Literal):
|
|
366
|
+
raise excs.Error(f'Expected a constant value for parameter {k!r} in call to .using()')
|
|
283
367
|
if not param.col_type.is_supertype_of(expr.col_type):
|
|
284
|
-
raise excs.Error(f'Expected type `{param.col_type}` for parameter
|
|
285
|
-
bindings[k] =
|
|
368
|
+
raise excs.Error(f'Expected type `{param.col_type}` for parameter {k!r}; got `{expr.col_type}`')
|
|
369
|
+
bindings[k] = expr
|
|
286
370
|
|
|
287
371
|
residual_params = [p for p in self.signature.parameters.values() if p.name not in bindings]
|
|
288
372
|
|
|
289
|
-
# Bind each remaining parameter to a like-named variable
|
|
290
|
-
for
|
|
291
|
-
|
|
292
|
-
|
|
293
|
-
|
|
294
|
-
|
|
373
|
+
# Bind each remaining parameter to a like-named variable.
|
|
374
|
+
# Also construct the call arguments for the template function call. Variables become args when possible;
|
|
375
|
+
# otherwise, they are passed as kwargs.
|
|
376
|
+
template_args: list[exprs.Expr] = []
|
|
377
|
+
template_kwargs: dict[str, exprs.Expr] = {}
|
|
378
|
+
args_ok = True
|
|
379
|
+
for name, param in self.signature.parameters.items():
|
|
380
|
+
if name in bindings:
|
|
381
|
+
template_kwargs[name] = bindings[name]
|
|
382
|
+
args_ok = False
|
|
383
|
+
else:
|
|
384
|
+
var = exprs.Variable(name, param.col_type)
|
|
385
|
+
bindings[name] = var
|
|
386
|
+
if args_ok and param.kind in (
|
|
387
|
+
inspect.Parameter.POSITIONAL_ONLY,
|
|
388
|
+
inspect.Parameter.POSITIONAL_OR_KEYWORD,
|
|
389
|
+
):
|
|
390
|
+
template_args.append(var)
|
|
391
|
+
else:
|
|
392
|
+
template_kwargs[name] = var
|
|
393
|
+
args_ok = False
|
|
394
|
+
|
|
395
|
+
return_type = self.call_return_type(bindings)
|
|
396
|
+
call = exprs.FunctionCall(self, template_args, template_kwargs, return_type)
|
|
295
397
|
|
|
296
398
|
# Construct the (n-k)-ary signature of the new function. We use `call.col_type` for this, rather than
|
|
297
399
|
# `self.signature.return_type`, because the return type of the new function may be specialized via a
|
|
@@ -370,35 +472,7 @@ class Function(ABC):
|
|
|
370
472
|
assert 'signature' in d and d['signature'] is not None
|
|
371
473
|
instance = resolve_symbol(d['path'])
|
|
372
474
|
assert isinstance(instance, Function)
|
|
373
|
-
|
|
374
|
-
# Load the signature from the DB and check that it is still valid (i.e., is still consistent with a signature
|
|
375
|
-
# in the code).
|
|
376
|
-
signature = Signature.from_dict(d['signature'])
|
|
377
|
-
idx = instance.__find_matching_overload(signature)
|
|
378
|
-
if idx is None:
|
|
379
|
-
# No match; generate an informative error message.
|
|
380
|
-
signature_note_str = 'any of its signatures' if instance.is_polymorphic else 'its signature as'
|
|
381
|
-
instance_signature_str = (
|
|
382
|
-
f'{len(instance.signatures)} signatures' if instance.is_polymorphic else str(instance.signature)
|
|
383
|
-
)
|
|
384
|
-
# TODO: Handle this more gracefully (instead of failing the DB load, allow the DB load to succeed, but
|
|
385
|
-
# mark any enclosing FunctionCall as unusable). It's the same issue as dealing with a renamed UDF or
|
|
386
|
-
# FunctionCall return type mismatch.
|
|
387
|
-
raise excs.Error(
|
|
388
|
-
f'The signature stored in the database for the UDF `{instance.self_path}` no longer matches '
|
|
389
|
-
f'{signature_note_str} as currently defined in the code.\nThis probably means that the code for '
|
|
390
|
-
f'`{instance.self_path}` has changed in a backward-incompatible way.\n'
|
|
391
|
-
f'Signature in database: {signature}\n'
|
|
392
|
-
f'Signature in code: {instance_signature_str}'
|
|
393
|
-
)
|
|
394
|
-
# We found a match; specialize to the appropriate overload resolution (non-polymorphic form) and return that.
|
|
395
|
-
return instance._resolved_fns[idx]
|
|
396
|
-
|
|
397
|
-
def __find_matching_overload(self, sig: Signature) -> Optional[int]:
|
|
398
|
-
for idx, overload_sig in enumerate(self.signatures):
|
|
399
|
-
if sig.is_consistent_with(overload_sig):
|
|
400
|
-
return idx
|
|
401
|
-
return None
|
|
475
|
+
return instance
|
|
402
476
|
|
|
403
477
|
def to_store(self) -> tuple[dict, bytes]:
|
|
404
478
|
"""
|
pixeltable/func/signature.py
CHANGED
|
@@ -1,16 +1,18 @@
|
|
|
1
1
|
from __future__ import annotations
|
|
2
2
|
|
|
3
3
|
import dataclasses
|
|
4
|
-
import enum
|
|
5
4
|
import inspect
|
|
6
5
|
import json
|
|
7
6
|
import logging
|
|
8
7
|
import typing
|
|
9
|
-
from typing import Any, Callable, Optional
|
|
8
|
+
from typing import TYPE_CHECKING, Any, Callable, Optional
|
|
10
9
|
|
|
11
10
|
import pixeltable.exceptions as excs
|
|
12
11
|
import pixeltable.type_system as ts
|
|
13
12
|
|
|
13
|
+
if TYPE_CHECKING:
|
|
14
|
+
from pixeltable import exprs
|
|
15
|
+
|
|
14
16
|
_logger = logging.getLogger('pixeltable')
|
|
15
17
|
|
|
16
18
|
|
|
@@ -21,25 +23,24 @@ class Parameter:
|
|
|
21
23
|
kind: inspect._ParameterKind
|
|
22
24
|
# for some reason, this needs to precede is_batched in the dataclass definition,
|
|
23
25
|
# otherwise Python complains that an argument with a default is followed by an argument without a default
|
|
24
|
-
default:
|
|
26
|
+
default: Optional['exprs.Literal'] = None # default value for the parameter
|
|
25
27
|
is_batched: bool = False # True if the parameter is a batched parameter (eg, Batch[dict])
|
|
26
28
|
|
|
27
29
|
def __post_init__(self) -> None:
|
|
28
|
-
|
|
29
|
-
|
|
30
|
-
|
|
31
|
-
|
|
32
|
-
|
|
33
|
-
|
|
34
|
-
|
|
35
|
-
|
|
36
|
-
|
|
37
|
-
|
|
38
|
-
|
|
39
|
-
raise excs.Error(f'Default value for parameter {self.name}: {str(e)}')
|
|
30
|
+
from pixeltable import exprs
|
|
31
|
+
|
|
32
|
+
if self.default is not None:
|
|
33
|
+
if self.col_type is None:
|
|
34
|
+
raise excs.Error(f'Cannot have a default value for variable parameter {self.name!r}')
|
|
35
|
+
if not isinstance(self.default, exprs.Literal):
|
|
36
|
+
raise excs.Error(f'Default value for parameter {self.name!r} is not a constant')
|
|
37
|
+
if not self.col_type.is_supertype_of(self.default.col_type):
|
|
38
|
+
raise excs.Error(
|
|
39
|
+
f'Default value for parameter {self.name!r} is not of type {self.col_type!r}: {self.default}'
|
|
40
|
+
)
|
|
40
41
|
|
|
41
42
|
def has_default(self) -> bool:
|
|
42
|
-
return self.default is not
|
|
43
|
+
return self.default is not None
|
|
43
44
|
|
|
44
45
|
def as_dict(self) -> dict[str, Any]:
|
|
45
46
|
return {
|
|
@@ -47,17 +48,15 @@ class Parameter:
|
|
|
47
48
|
'col_type': self.col_type.as_dict() if self.col_type is not None else None,
|
|
48
49
|
'kind': self.kind.name,
|
|
49
50
|
'is_batched': self.is_batched,
|
|
50
|
-
'
|
|
51
|
-
'default': self.default if self.has_default() else None,
|
|
51
|
+
'default': None if self.default is None else self.default.as_dict(),
|
|
52
52
|
}
|
|
53
53
|
|
|
54
54
|
@classmethod
|
|
55
55
|
def from_dict(cls, d: dict[str, Any]) -> Parameter:
|
|
56
|
-
|
|
57
|
-
|
|
58
|
-
|
|
59
|
-
else
|
|
60
|
-
default = inspect.Parameter.empty
|
|
56
|
+
from pixeltable import exprs
|
|
57
|
+
|
|
58
|
+
assert d['default'] is None or isinstance(d['default'], dict), d
|
|
59
|
+
default = None if d['default'] is None else exprs.Literal.from_dict(d['default'])
|
|
61
60
|
return cls(
|
|
62
61
|
name=d['name'],
|
|
63
62
|
col_type=ts.ColumnType.from_dict(d['col_type']) if d['col_type'] is not None else None,
|
|
@@ -67,7 +66,8 @@ class Parameter:
|
|
|
67
66
|
)
|
|
68
67
|
|
|
69
68
|
def to_py_param(self) -> inspect.Parameter:
|
|
70
|
-
|
|
69
|
+
py_default = self.default.val if self.default is not None else inspect.Parameter.empty
|
|
70
|
+
return inspect.Parameter(self.name, self.kind, default=py_default)
|
|
71
71
|
|
|
72
72
|
|
|
73
73
|
T = typing.TypeVar('T')
|
|
@@ -147,6 +147,37 @@ class Signature:
|
|
|
147
147
|
|
|
148
148
|
return True
|
|
149
149
|
|
|
150
|
+
def validate_args(self, bound_args: dict[str, Optional['exprs.Expr']], context: str = '') -> None:
|
|
151
|
+
if context != '':
|
|
152
|
+
context = f' ({context})'
|
|
153
|
+
|
|
154
|
+
for param_name, arg in bound_args.items():
|
|
155
|
+
assert param_name in self.parameters
|
|
156
|
+
param = self.parameters[param_name]
|
|
157
|
+
is_var_param = param.kind in (inspect.Parameter.VAR_POSITIONAL, inspect.Parameter.VAR_KEYWORD)
|
|
158
|
+
if is_var_param:
|
|
159
|
+
continue
|
|
160
|
+
assert param.col_type is not None
|
|
161
|
+
|
|
162
|
+
if arg is None:
|
|
163
|
+
raise excs.Error(f'Parameter {param_name!r}{context}: invalid argument')
|
|
164
|
+
|
|
165
|
+
# Check that the argument is consistent with the expected parameter type, with the allowance that
|
|
166
|
+
# non-nullable parameters can still accept nullable arguments (since in that event, FunctionCall.eval()
|
|
167
|
+
# detects the Nones and skips evaluation).
|
|
168
|
+
if not (
|
|
169
|
+
param.col_type.is_supertype_of(arg.col_type, ignore_nullable=True)
|
|
170
|
+
# TODO: this is a hack to allow JSON columns to be passed to functions that accept scalar
|
|
171
|
+
# types. It's necessary to avoid littering notebooks with `apply(str)` calls or equivalent.
|
|
172
|
+
# (Previously, this wasn't necessary because `is_supertype_of()` was improperly implemented.)
|
|
173
|
+
# We need to think through the right way to handle this scenario.
|
|
174
|
+
or (arg.col_type.is_json_type() and param.col_type.is_scalar_type())
|
|
175
|
+
):
|
|
176
|
+
raise excs.Error(
|
|
177
|
+
f'Parameter {param_name!r}{context}: argument type {arg.col_type} does not'
|
|
178
|
+
f' match parameter type {param.col_type}'
|
|
179
|
+
)
|
|
180
|
+
|
|
150
181
|
def __eq__(self, other: object) -> bool:
|
|
151
182
|
if not isinstance(other, Signature):
|
|
152
183
|
return False
|
|
@@ -199,6 +230,8 @@ class Signature:
|
|
|
199
230
|
type_substitutions: Optional[dict] = None,
|
|
200
231
|
is_cls_method: bool = False,
|
|
201
232
|
) -> list[Parameter]:
|
|
233
|
+
from pixeltable import exprs
|
|
234
|
+
|
|
202
235
|
assert (py_fn is None) != (py_params is None)
|
|
203
236
|
if py_fn is not None:
|
|
204
237
|
sig = inspect.signature(py_fn)
|
|
@@ -212,7 +245,7 @@ class Signature:
|
|
|
212
245
|
if is_cls_method and idx == 0:
|
|
213
246
|
continue # skip 'self' or 'cls' parameter
|
|
214
247
|
if param.name in cls.SPECIAL_PARAM_NAMES:
|
|
215
|
-
raise excs.Error(f
|
|
248
|
+
raise excs.Error(f'{param.name!r} is a reserved parameter name')
|
|
216
249
|
if param.kind == inspect.Parameter.VAR_POSITIONAL or param.kind == inspect.Parameter.VAR_KEYWORD:
|
|
217
250
|
parameters.append(Parameter(param.name, col_type=None, kind=param.kind))
|
|
218
251
|
continue
|
|
@@ -220,7 +253,7 @@ class Signature:
|
|
|
220
253
|
# check non-var parameters for name collisions and default value compatibility
|
|
221
254
|
if param_types is not None:
|
|
222
255
|
if idx >= len(param_types):
|
|
223
|
-
raise excs.Error(f'Missing type for parameter {param.name}')
|
|
256
|
+
raise excs.Error(f'Missing type for parameter {param.name!r}')
|
|
224
257
|
param_type = param_types[idx]
|
|
225
258
|
is_batched = False
|
|
226
259
|
else:
|
|
@@ -231,12 +264,14 @@ class Signature:
|
|
|
231
264
|
py_type = param.annotation
|
|
232
265
|
param_type, is_batched = cls._infer_type(py_type)
|
|
233
266
|
if param_type is None:
|
|
234
|
-
raise excs.Error(f'Cannot infer pixeltable type for parameter {param.name}')
|
|
267
|
+
raise excs.Error(f'Cannot infer pixeltable type for parameter {param.name!r}')
|
|
268
|
+
|
|
269
|
+
default = None if param.default is inspect.Parameter.empty else exprs.Expr.from_object(param.default)
|
|
270
|
+
if not (default is None or isinstance(default, exprs.Literal)):
|
|
271
|
+
raise excs.Error(f'Default value for parameter {param.name!r} must be a constant')
|
|
235
272
|
|
|
236
273
|
parameters.append(
|
|
237
|
-
Parameter(
|
|
238
|
-
param.name, col_type=param_type, kind=param.kind, is_batched=is_batched, default=param.default
|
|
239
|
-
)
|
|
274
|
+
Parameter(param.name, col_type=param_type, kind=param.kind, is_batched=is_batched, default=default)
|
|
240
275
|
)
|
|
241
276
|
|
|
242
277
|
return parameters
|
pixeltable/func/udf.py
CHANGED
|
@@ -283,7 +283,7 @@ def from_table(
|
|
|
283
283
|
result_dict[name] = var
|
|
284
284
|
# Since this is a data column, it becomes a UDF parameter.
|
|
285
285
|
# If the column is nullable, then the parameter will have a default value of None.
|
|
286
|
-
default_value = None if col.col_type.nullable else
|
|
286
|
+
default_value = exprs.Literal(None) if col.col_type.nullable else None
|
|
287
287
|
param = Parameter(name, col.col_type, inspect._ParameterKind.POSITIONAL_OR_KEYWORD, default_value)
|
|
288
288
|
params.append(param)
|
|
289
289
|
|