pixeltable 0.3.3__py3-none-any.whl → 0.3.5__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of pixeltable might be problematic. Click here for more details.

Files changed (60) hide show
  1. pixeltable/__init__.py +1 -0
  2. pixeltable/__version__.py +2 -2
  3. pixeltable/catalog/catalog.py +9 -2
  4. pixeltable/catalog/column.py +1 -1
  5. pixeltable/catalog/dir.py +1 -1
  6. pixeltable/catalog/table.py +1 -1
  7. pixeltable/catalog/table_version.py +12 -2
  8. pixeltable/catalog/table_version_path.py +2 -2
  9. pixeltable/catalog/view.py +64 -20
  10. pixeltable/dataframe.py +14 -14
  11. pixeltable/env.py +20 -3
  12. pixeltable/exec/component_iteration_node.py +1 -2
  13. pixeltable/exec/expr_eval/evaluators.py +4 -2
  14. pixeltable/exec/expr_eval/expr_eval_node.py +4 -1
  15. pixeltable/exprs/comparison.py +8 -4
  16. pixeltable/exprs/data_row.py +5 -3
  17. pixeltable/exprs/expr.py +9 -2
  18. pixeltable/exprs/function_call.py +155 -313
  19. pixeltable/func/aggregate_function.py +29 -15
  20. pixeltable/func/callable_function.py +11 -8
  21. pixeltable/func/expr_template_function.py +3 -9
  22. pixeltable/func/function.py +148 -74
  23. pixeltable/func/signature.py +65 -30
  24. pixeltable/func/udf.py +1 -1
  25. pixeltable/functions/__init__.py +1 -0
  26. pixeltable/functions/deepseek.py +121 -0
  27. pixeltable/functions/image.py +7 -7
  28. pixeltable/functions/openai.py +49 -10
  29. pixeltable/functions/video.py +14 -7
  30. pixeltable/globals.py +14 -3
  31. pixeltable/index/embedding_index.py +4 -13
  32. pixeltable/io/globals.py +88 -77
  33. pixeltable/io/hf_datasets.py +34 -34
  34. pixeltable/io/pandas.py +75 -87
  35. pixeltable/io/parquet.py +19 -27
  36. pixeltable/io/utils.py +115 -0
  37. pixeltable/iterators/audio.py +2 -1
  38. pixeltable/iterators/video.py +1 -1
  39. pixeltable/metadata/__init__.py +2 -1
  40. pixeltable/metadata/converters/convert_15.py +18 -8
  41. pixeltable/metadata/converters/convert_27.py +31 -0
  42. pixeltable/metadata/converters/convert_28.py +15 -0
  43. pixeltable/metadata/converters/convert_29.py +111 -0
  44. pixeltable/metadata/converters/util.py +12 -1
  45. pixeltable/metadata/notes.py +3 -0
  46. pixeltable/metadata/schema.py +8 -0
  47. pixeltable/share/__init__.py +1 -0
  48. pixeltable/share/packager.py +246 -0
  49. pixeltable/share/publish.py +97 -0
  50. pixeltable/type_system.py +87 -42
  51. pixeltable/utils/__init__.py +41 -0
  52. pixeltable/utils/arrow.py +45 -12
  53. pixeltable/utils/formatter.py +1 -1
  54. pixeltable/utils/iceberg.py +14 -0
  55. pixeltable/utils/media_store.py +1 -1
  56. {pixeltable-0.3.3.dist-info → pixeltable-0.3.5.dist-info}/METADATA +37 -50
  57. {pixeltable-0.3.3.dist-info → pixeltable-0.3.5.dist-info}/RECORD +60 -51
  58. {pixeltable-0.3.3.dist-info → pixeltable-0.3.5.dist-info}/WHEEL +1 -1
  59. {pixeltable-0.3.3.dist-info → pixeltable-0.3.5.dist-info}/LICENSE +0 -0
  60. {pixeltable-0.3.3.dist-info → pixeltable-0.3.5.dist-info}/entry_points.txt +0 -0
@@ -12,7 +12,7 @@ from .globals import validate_symbol_path
12
12
  from .signature import Parameter, Signature
13
13
 
14
14
  if TYPE_CHECKING:
15
- import pixeltable
15
+ from pixeltable import exprs
16
16
 
17
17
 
18
18
  class Aggregator(abc.ABC):
@@ -80,6 +80,8 @@ class AggregateFunction(Function):
80
80
  """Inspects the Aggregator class to infer the corresponding function signature. Returns the
81
81
  inferred signature along with the list of init_param_names (for downstream error handling).
82
82
  """
83
+ from pixeltable import exprs
84
+
83
85
  # infer type parameters; set return_type=InvalidType() because it has no meaning here
84
86
  init_sig = Signature.create(
85
87
  py_fn=cls.__init__, return_type=ts.InvalidType(), is_cls_method=True, type_substitutions=type_substitutions
@@ -102,14 +104,24 @@ class AggregateFunction(Function):
102
104
  py_update_params = list(inspect.signature(cls.update).parameters.values())[1:] # leave out self
103
105
  assert len(py_update_params) == len(update_types)
104
106
  update_params = [
105
- Parameter(p.name, col_type=update_types[i], kind=p.kind, default=p.default)
107
+ Parameter(
108
+ p.name,
109
+ col_type=update_types[i],
110
+ kind=p.kind,
111
+ default=exprs.Expr.from_object(p.default), # type: ignore[arg-type]
112
+ )
106
113
  for i, p in enumerate(py_update_params)
107
114
  ]
108
115
  # starting at 1: leave out self
109
116
  py_init_params = list(inspect.signature(cls.__init__).parameters.values())[1:]
110
117
  assert len(py_init_params) == len(init_types)
111
118
  init_params = [
112
- Parameter(p.name, col_type=init_types[i], kind=inspect.Parameter.KEYWORD_ONLY, default=p.default)
119
+ Parameter(
120
+ p.name,
121
+ col_type=init_types[i],
122
+ kind=inspect.Parameter.KEYWORD_ONLY,
123
+ default=exprs.Expr.from_object(p.default), # type: ignore[arg-type]
124
+ )
113
125
  for i, p in enumerate(py_init_params)
114
126
  ]
115
127
  duplicate_params = set(p.name for p in init_params) & set(p.name for p in update_params)
@@ -157,7 +169,7 @@ class AggregateFunction(Function):
157
169
  res += '\n\n' + inspect.getdoc(self.agg_classes[0].update)
158
170
  return res
159
171
 
160
- def __call__(self, *args: object, **kwargs: object) -> 'pixeltable.exprs.FunctionCall':
172
+ def __call__(self, *args: Any, **kwargs: Any) -> 'exprs.FunctionCall':
161
173
  from pixeltable import exprs
162
174
 
163
175
  # perform semantic analysis of special parameters 'order_by' and 'group_by'
@@ -194,29 +206,31 @@ class AggregateFunction(Function):
194
206
  )
195
207
  group_by_clause = kwargs.pop(self.GROUP_BY_PARAM)
196
208
 
209
+ args = [exprs.Expr.from_object(arg) for arg in args]
210
+ kwargs = {k: exprs.Expr.from_object(v) for k, v in kwargs.items()}
211
+
197
212
  resolved_fn, bound_args = self._bind_to_matching_signature(args, kwargs)
198
- return_type = resolved_fn.call_return_type(args, kwargs)
213
+ return_type = resolved_fn.call_return_type(bound_args)
214
+
199
215
  return exprs.FunctionCall(
200
216
  resolved_fn,
201
- bound_args,
217
+ args,
218
+ kwargs,
202
219
  return_type,
203
220
  order_by_clause=[order_by_clause] if order_by_clause is not None else [],
204
221
  group_by_clause=[group_by_clause] if group_by_clause is not None else [],
205
222
  )
206
223
 
207
- def validate_call(self, bound_args: dict[str, Any]) -> None:
208
- # check that init parameters are not Exprs
209
- # TODO: do this in the planner (check that init parameters are either constants or only refer to grouping exprs)
224
+ def validate_call(self, bound_args: dict[str, 'exprs.Expr']) -> None:
210
225
  from pixeltable import exprs
211
226
 
212
- assert not self.is_polymorphic
227
+ super().validate_call(bound_args)
213
228
 
229
+ # check that init parameters are not Exprs
230
+ # TODO: do this in the planner (check that init parameters are either constants or only refer to grouping exprs)
214
231
  for param_name in self.init_param_names[0]:
215
- if param_name in bound_args and isinstance(bound_args[param_name], exprs.Expr):
216
- raise excs.Error(
217
- f'{self.display_name}(): init() parameter {param_name} needs to be a constant, not a Pixeltable '
218
- f'expression'
219
- )
232
+ if param_name in bound_args and not isinstance(bound_args[param_name], exprs.Literal):
233
+ raise excs.Error(f'{self.display_name}(): init() parameter {param_name!r} must be a constant value')
220
234
 
221
235
  def __repr__(self) -> str:
222
236
  return f'<Pixeltable Aggregator {self.name}>'
@@ -2,7 +2,7 @@ from __future__ import annotations
2
2
 
3
3
  import asyncio
4
4
  import inspect
5
- from typing import Any, Callable, Optional, Sequence
5
+ from typing import TYPE_CHECKING, Any, Callable, Optional, Sequence
6
6
  from uuid import UUID
7
7
 
8
8
  import cloudpickle # type: ignore[import-untyped]
@@ -12,6 +12,9 @@ import pixeltable.exceptions as excs
12
12
  from .function import Function
13
13
  from .signature import Signature
14
14
 
15
+ if TYPE_CHECKING:
16
+ from pixeltable import exprs
17
+
15
18
 
16
19
  class CallableFunction(Function):
17
20
  """Pixeltable Function backed by a Python Callable.
@@ -192,18 +195,18 @@ class CallableFunction(Function):
192
195
  batch_size = md['batch_size']
193
196
  return CallableFunction([sig], [py_fn], self_name=name, batch_size=batch_size)
194
197
 
195
- def validate_call(self, bound_args: dict[str, Any]) -> None:
198
+ def validate_call(self, bound_args: dict[str, 'exprs.Expr']) -> None:
196
199
  from pixeltable import exprs
197
200
 
198
- assert not self.is_polymorphic
201
+ super().validate_call(bound_args)
199
202
  if self.is_batched:
200
203
  signature = self.signatures[0]
201
204
  for param in signature.constant_parameters:
202
- if param.name in bound_args and isinstance(bound_args[param.name], exprs.Expr):
203
- raise ValueError(
204
- f'{self.display_name}(): '
205
- f'parameter {param.name} must be a constant value, not a Pixeltable expression'
206
- )
205
+ # Check that constant parameters map to constant arguments. It's ok for the argument to be a Variable,
206
+ # since in that case the FunctionCall is part of an unresolved template; the check will be done again
207
+ # when the template is fully resolved.
208
+ if param.name in bound_args and not isinstance(bound_args[param.name], (exprs.Literal, exprs.Variable)):
209
+ raise ValueError(f'{self.display_name}(): parameter {param.name} must be a constant value')
207
210
 
208
211
  def __repr__(self) -> str:
209
212
  return f'<Pixeltable UDF {self.name}>'
@@ -32,17 +32,11 @@ class ExprTemplate:
32
32
  assert var.name in self.param_exprs, f"Variable '{var.name}' in expression is not a parameter"
33
33
 
34
34
  # verify default values
35
- self.defaults: dict[str, exprs.Literal] = {} # key: param name, value: default value converted to a Literal
35
+ self.defaults: dict[str, exprs.Literal] = {}
36
36
  for param in self.signature.parameters.values():
37
- if param.default is inspect.Parameter.empty:
37
+ if param.default is None:
38
38
  continue
39
- param_expr = self.param_exprs[param.name]
40
- try:
41
- literal_default = exprs.Literal(param.default, col_type=param_expr.col_type)
42
- self.defaults[param.name] = literal_default
43
- except TypeError as e:
44
- msg = str(e)
45
- raise excs.Error(f"Default value for parameter '{param.name}': {msg[0].lower() + msg[1:]}")
39
+ self.defaults[param.name] = param.default
46
40
 
47
41
 
48
42
  class ExprTemplateFunction(Function):
@@ -2,6 +2,7 @@ from __future__ import annotations
2
2
 
3
3
  import importlib
4
4
  import inspect
5
+ import typing
5
6
  from abc import ABC, abstractmethod
6
7
  from copy import copy
7
8
  from typing import TYPE_CHECKING, Any, Callable, Optional, Sequence, cast
@@ -17,6 +18,8 @@ from .globals import resolve_symbol
17
18
  from .signature import Signature
18
19
 
19
20
  if TYPE_CHECKING:
21
+ from pixeltable import exprs
22
+
20
23
  from .expr_template_function import ExprTemplate, ExprTemplateFunction
21
24
 
22
25
 
@@ -152,9 +155,22 @@ class Function(ABC):
152
155
  def __call__(self, *args: Any, **kwargs: Any) -> 'pxt.exprs.FunctionCall':
153
156
  from pixeltable import exprs
154
157
 
158
+ args = [exprs.Expr.from_object(arg) for arg in args]
159
+ kwargs = {k: exprs.Expr.from_object(v) for k, v in kwargs.items()}
160
+
161
+ for i, expr in enumerate(args):
162
+ if expr is None:
163
+ raise excs.Error(f'Argument {i + 1} in call to {self.self_path!r} is not a valid Pixeltable expression')
164
+ for param_name, expr in kwargs.items():
165
+ if expr is None:
166
+ raise excs.Error(
167
+ f'Argument {param_name!r} in call to {self.self_path!r} is not a valid Pixeltable expression'
168
+ )
169
+
155
170
  resolved_fn, bound_args = self._bind_to_matching_signature(args, kwargs)
156
- return_type = resolved_fn.call_return_type(args, kwargs)
157
- return exprs.FunctionCall(resolved_fn, bound_args, return_type)
171
+ return_type = resolved_fn.call_return_type(bound_args)
172
+
173
+ return exprs.FunctionCall(resolved_fn, args, kwargs, return_type)
158
174
 
159
175
  def _bind_to_matching_signature(self, args: Sequence[Any], kwargs: dict[str, Any]) -> tuple[Self, dict[str, Any]]:
160
176
  result: int = -1
@@ -185,49 +201,115 @@ class Function(ABC):
185
201
 
186
202
  signature = self.signatures[signature_idx]
187
203
  bound_args = signature.py_signature.bind(*args, **kwargs).arguments
188
- self._resolved_fns[signature_idx].validate_call(bound_args)
189
- exprs.FunctionCall.normalize_args(self.name, signature, bound_args)
190
- return bound_args
204
+ normalized_args = {k: exprs.Expr.from_object(v) for k, v in bound_args.items()}
205
+ self._resolved_fns[signature_idx].validate_call(normalized_args)
206
+ return normalized_args
191
207
 
192
- def validate_call(self, bound_args: dict[str, Any]) -> None:
208
+ def validate_call(self, bound_args: dict[str, Optional['exprs.Expr']]) -> None:
193
209
  """Override this to do custom validation of the arguments"""
194
210
  assert not self.is_polymorphic
211
+ self.signature.validate_args(bound_args, context=f'in function {self.name!r}')
195
212
 
196
- def _get_callable_args(self, callable: Callable, kwargs: dict[str, Any]) -> dict[str, Any]:
197
- """Return the kwargs to pass to callable, given kwargs passed to this function"""
198
- bound_args = self.signature.py_signature.bind(**kwargs).arguments
199
- # add defaults to bound_args, if not already present
200
- bound_args.update(
201
- {
202
- name: param.default
203
- for name, param in self.signature.parameters.items()
204
- if name not in bound_args and param.has_default()
205
- }
206
- )
207
- result: dict[str, Any] = {}
208
- sig = inspect.signature(callable)
209
- for param in sig.parameters.values():
210
- if param.name in bound_args:
211
- result[param.name] = bound_args[param.name]
212
- return result
213
-
214
- def call_resource_pool(self, kwargs: dict[str, Any]) -> str:
213
+ def call_resource_pool(self, bound_args: dict[str, 'exprs.Expr']) -> str:
215
214
  """Return the resource pool to use for calling this function with the given arguments"""
216
- kw_args = self._get_callable_args(self._resource_pool, kwargs)
217
- return self._resource_pool(**kw_args)
218
-
219
- def call_return_type(self, args: Sequence[Any], kwargs: dict[str, Any]) -> ts.ColumnType:
215
+ rp_kwargs = self._assemble_callable_args(self._resource_pool, bound_args)
216
+ if rp_kwargs is None:
217
+ # TODO: What to do in this case? An example where this can happen is if model_id is not a constant
218
+ # in a call to one of the OpenAI endpoints.
219
+ raise excs.Error('Could not determine resource pool')
220
+ return self._resource_pool(**rp_kwargs)
221
+
222
+ def call_return_type(self, bound_args: dict[str, 'exprs.Expr']) -> ts.ColumnType:
220
223
  """Return the type of the value returned by calling this function with the given arguments"""
221
- assert not self.is_polymorphic
222
224
  if self._conditional_return_type is None:
223
- return self.signature.return_type
224
- bound_args = self.signature.py_signature.bind(*args, **kwargs).arguments
225
- kw_args: dict[str, Any] = {}
226
- sig = inspect.signature(self._conditional_return_type)
227
- for param in sig.parameters.values():
225
+ # No conditional return type specified; use the default return type
226
+ return_type = self.signature.return_type
227
+ else:
228
+ crt_kwargs = self._assemble_callable_args(self._conditional_return_type, bound_args)
229
+ if crt_kwargs is None:
230
+ # A conditional return type is specified, but one of its arguments is not a constant.
231
+ # Use the default return type
232
+ return_type = self.signature.return_type
233
+ else:
234
+ # A conditional return type is specified and all its arguments are constants; use the specific
235
+ # call return type
236
+ return_type = self._conditional_return_type(**crt_kwargs)
237
+
238
+ if return_type.nullable:
239
+ return return_type
240
+
241
+ # If `return_type` is non-nullable, but the function call has a nullable input to any of its non-nullable
242
+ # parameters, then we need to make it nullable. This is because Pixeltable defaults a function output to
243
+ # `None` when any of its non-nullable inputs are `None`.
244
+ for arg_name, arg in bound_args.items():
245
+ param = self.signature.parameters[arg_name]
246
+ if param.kind in (inspect.Parameter.VAR_POSITIONAL, inspect.Parameter.VAR_KEYWORD):
247
+ continue
248
+ if arg.col_type.nullable and not param.col_type.nullable:
249
+ return_type = return_type.copy(nullable=True)
250
+ break
251
+
252
+ return return_type
253
+
254
+ def _assemble_callable_args(
255
+ self, callable: Callable, bound_args: dict[str, 'exprs.Expr']
256
+ ) -> Optional[dict[str, Any]]:
257
+ """
258
+ Return the kwargs to pass to callable, given bound_args passed to this function.
259
+
260
+ This is used by `conditional_return_type` and `get_resource_pool` to determine call-specific characteristics
261
+ of this function.
262
+
263
+ In both cases, the specified `Callable` takes a subset of the parameters of this Function, which may
264
+ be typed as either `Expr`s or Python values. Any parameters typed as Python values expect to see constants
265
+ (Literals); if the corresponding entries in `bound_args` are not constants, then the return value is None.
266
+ """
267
+ from pixeltable import exprs
268
+
269
+ assert not self.is_polymorphic
270
+
271
+ callable_signature = inspect.signature(callable)
272
+ callable_type_hints = typing.get_type_hints(callable)
273
+ callable_args: dict[str, Any] = {}
274
+
275
+ for param in callable_signature.parameters.values():
276
+ assert param.name in self.signature.parameters
277
+
278
+ arg: exprs.Expr
228
279
  if param.name in bound_args:
229
- kw_args[param.name] = bound_args[param.name]
230
- return self._conditional_return_type(**kw_args)
280
+ arg = bound_args[param.name]
281
+ elif self.signature.parameters[param.name].has_default():
282
+ arg = self.signature.parameters[param.name].default
283
+ else:
284
+ # This parameter is missing from bound_args and has no default value, so return None.
285
+ return None
286
+ assert isinstance(arg, exprs.Expr)
287
+
288
+ expects_expr: Optional[type[exprs.Expr]] = None
289
+ type_hint = callable_type_hints.get(param.name)
290
+ if typing.get_origin(type_hint) is not None:
291
+ type_hint = typing.get_origin(type_hint) # Remove type subscript if one exists
292
+ if isinstance(type_hint, type) and issubclass(type_hint, exprs.Expr):
293
+ # The callable expects an Expr for this parameter. We allow for the case where the
294
+ # callable requests a specific subtype of Expr.
295
+ expects_expr = type_hint
296
+
297
+ if expects_expr is not None:
298
+ # The callable is expecting `param.name` to be an Expr. Validate that it's of the appropriate type;
299
+ # otherwise return None.
300
+ if isinstance(arg, expects_expr):
301
+ callable_args[param.name] = arg
302
+ else:
303
+ return None
304
+ else:
305
+ # The callable is expecting `param.name` to be a constant Python value. Unpack a Literal if we find
306
+ # one; otherwise return None.
307
+ if isinstance(arg, exprs.Literal):
308
+ callable_args[param.name] = arg.val
309
+ else:
310
+ return None
311
+
312
+ return callable_args
231
313
 
232
314
  def conditional_return_type(self, fn: Callable[..., ts.ColumnType]) -> Callable[..., ts.ColumnType]:
233
315
  """Instance decorator for specifying a conditional return type for this function"""
@@ -280,18 +362,38 @@ class Function(ABC):
280
362
  raise excs.Error(f'Unknown parameter: {k}')
281
363
  param = self.signature.parameters[k]
282
364
  expr = exprs.Expr.from_object(v)
365
+ if not isinstance(expr, exprs.Literal):
366
+ raise excs.Error(f'Expected a constant value for parameter {k!r} in call to .using()')
283
367
  if not param.col_type.is_supertype_of(expr.col_type):
284
- raise excs.Error(f'Expected type `{param.col_type}` for parameter `{k}`; got `{expr.col_type}`')
285
- bindings[k] = v # Use the original value, not the Expr (The Expr is only for validation)
368
+ raise excs.Error(f'Expected type `{param.col_type}` for parameter {k!r}; got `{expr.col_type}`')
369
+ bindings[k] = expr
286
370
 
287
371
  residual_params = [p for p in self.signature.parameters.values() if p.name not in bindings]
288
372
 
289
- # Bind each remaining parameter to a like-named variable
290
- for param in residual_params:
291
- bindings[param.name] = exprs.Variable(param.name, param.col_type)
292
-
293
- return_type = self.call_return_type([], bindings)
294
- call = exprs.FunctionCall(self, bindings, return_type)
373
+ # Bind each remaining parameter to a like-named variable.
374
+ # Also construct the call arguments for the template function call. Variables become args when possible;
375
+ # otherwise, they are passed as kwargs.
376
+ template_args: list[exprs.Expr] = []
377
+ template_kwargs: dict[str, exprs.Expr] = {}
378
+ args_ok = True
379
+ for name, param in self.signature.parameters.items():
380
+ if name in bindings:
381
+ template_kwargs[name] = bindings[name]
382
+ args_ok = False
383
+ else:
384
+ var = exprs.Variable(name, param.col_type)
385
+ bindings[name] = var
386
+ if args_ok and param.kind in (
387
+ inspect.Parameter.POSITIONAL_ONLY,
388
+ inspect.Parameter.POSITIONAL_OR_KEYWORD,
389
+ ):
390
+ template_args.append(var)
391
+ else:
392
+ template_kwargs[name] = var
393
+ args_ok = False
394
+
395
+ return_type = self.call_return_type(bindings)
396
+ call = exprs.FunctionCall(self, template_args, template_kwargs, return_type)
295
397
 
296
398
  # Construct the (n-k)-ary signature of the new function. We use `call.col_type` for this, rather than
297
399
  # `self.signature.return_type`, because the return type of the new function may be specialized via a
@@ -370,35 +472,7 @@ class Function(ABC):
370
472
  assert 'signature' in d and d['signature'] is not None
371
473
  instance = resolve_symbol(d['path'])
372
474
  assert isinstance(instance, Function)
373
-
374
- # Load the signature from the DB and check that it is still valid (i.e., is still consistent with a signature
375
- # in the code).
376
- signature = Signature.from_dict(d['signature'])
377
- idx = instance.__find_matching_overload(signature)
378
- if idx is None:
379
- # No match; generate an informative error message.
380
- signature_note_str = 'any of its signatures' if instance.is_polymorphic else 'its signature as'
381
- instance_signature_str = (
382
- f'{len(instance.signatures)} signatures' if instance.is_polymorphic else str(instance.signature)
383
- )
384
- # TODO: Handle this more gracefully (instead of failing the DB load, allow the DB load to succeed, but
385
- # mark any enclosing FunctionCall as unusable). It's the same issue as dealing with a renamed UDF or
386
- # FunctionCall return type mismatch.
387
- raise excs.Error(
388
- f'The signature stored in the database for the UDF `{instance.self_path}` no longer matches '
389
- f'{signature_note_str} as currently defined in the code.\nThis probably means that the code for '
390
- f'`{instance.self_path}` has changed in a backward-incompatible way.\n'
391
- f'Signature in database: {signature}\n'
392
- f'Signature in code: {instance_signature_str}'
393
- )
394
- # We found a match; specialize to the appropriate overload resolution (non-polymorphic form) and return that.
395
- return instance._resolved_fns[idx]
396
-
397
- def __find_matching_overload(self, sig: Signature) -> Optional[int]:
398
- for idx, overload_sig in enumerate(self.signatures):
399
- if sig.is_consistent_with(overload_sig):
400
- return idx
401
- return None
475
+ return instance
402
476
 
403
477
  def to_store(self) -> tuple[dict, bytes]:
404
478
  """
@@ -1,16 +1,18 @@
1
1
  from __future__ import annotations
2
2
 
3
3
  import dataclasses
4
- import enum
5
4
  import inspect
6
5
  import json
7
6
  import logging
8
7
  import typing
9
- from typing import Any, Callable, Optional, Union
8
+ from typing import TYPE_CHECKING, Any, Callable, Optional
10
9
 
11
10
  import pixeltable.exceptions as excs
12
11
  import pixeltable.type_system as ts
13
12
 
13
+ if TYPE_CHECKING:
14
+ from pixeltable import exprs
15
+
14
16
  _logger = logging.getLogger('pixeltable')
15
17
 
16
18
 
@@ -21,25 +23,24 @@ class Parameter:
21
23
  kind: inspect._ParameterKind
22
24
  # for some reason, this needs to precede is_batched in the dataclass definition,
23
25
  # otherwise Python complains that an argument with a default is followed by an argument without a default
24
- default: Any = inspect.Parameter.empty # default value for the parameter
26
+ default: Optional['exprs.Literal'] = None # default value for the parameter
25
27
  is_batched: bool = False # True if the parameter is a batched parameter (eg, Batch[dict])
26
28
 
27
29
  def __post_init__(self) -> None:
28
- # make sure that default is json-serializable and of the correct type
29
- if self.default is inspect.Parameter.empty or self.default is None:
30
- return
31
- try:
32
- _ = json.dumps(self.default)
33
- except TypeError:
34
- raise excs.Error(f'Default value for parameter {self.name} is not JSON-serializable: {str(self.default)}')
35
- if self.col_type is not None:
36
- try:
37
- self.col_type.validate_literal(self.default)
38
- except TypeError as e:
39
- raise excs.Error(f'Default value for parameter {self.name}: {str(e)}')
30
+ from pixeltable import exprs
31
+
32
+ if self.default is not None:
33
+ if self.col_type is None:
34
+ raise excs.Error(f'Cannot have a default value for variable parameter {self.name!r}')
35
+ if not isinstance(self.default, exprs.Literal):
36
+ raise excs.Error(f'Default value for parameter {self.name!r} is not a constant')
37
+ if not self.col_type.is_supertype_of(self.default.col_type):
38
+ raise excs.Error(
39
+ f'Default value for parameter {self.name!r} is not of type {self.col_type!r}: {self.default}'
40
+ )
40
41
 
41
42
  def has_default(self) -> bool:
42
- return self.default is not inspect.Parameter.empty
43
+ return self.default is not None
43
44
 
44
45
  def as_dict(self) -> dict[str, Any]:
45
46
  return {
@@ -47,17 +48,15 @@ class Parameter:
47
48
  'col_type': self.col_type.as_dict() if self.col_type is not None else None,
48
49
  'kind': self.kind.name,
49
50
  'is_batched': self.is_batched,
50
- 'has_default': self.has_default(),
51
- 'default': self.default if self.has_default() else None,
51
+ 'default': None if self.default is None else self.default.as_dict(),
52
52
  }
53
53
 
54
54
  @classmethod
55
55
  def from_dict(cls, d: dict[str, Any]) -> Parameter:
56
- has_default = d['has_default']
57
- if has_default:
58
- default = d['default']
59
- else:
60
- default = inspect.Parameter.empty
56
+ from pixeltable import exprs
57
+
58
+ assert d['default'] is None or isinstance(d['default'], dict), d
59
+ default = None if d['default'] is None else exprs.Literal.from_dict(d['default'])
61
60
  return cls(
62
61
  name=d['name'],
63
62
  col_type=ts.ColumnType.from_dict(d['col_type']) if d['col_type'] is not None else None,
@@ -67,7 +66,8 @@ class Parameter:
67
66
  )
68
67
 
69
68
  def to_py_param(self) -> inspect.Parameter:
70
- return inspect.Parameter(self.name, self.kind, default=self.default)
69
+ py_default = self.default.val if self.default is not None else inspect.Parameter.empty
70
+ return inspect.Parameter(self.name, self.kind, default=py_default)
71
71
 
72
72
 
73
73
  T = typing.TypeVar('T')
@@ -147,6 +147,37 @@ class Signature:
147
147
 
148
148
  return True
149
149
 
150
+ def validate_args(self, bound_args: dict[str, Optional['exprs.Expr']], context: str = '') -> None:
151
+ if context != '':
152
+ context = f' ({context})'
153
+
154
+ for param_name, arg in bound_args.items():
155
+ assert param_name in self.parameters
156
+ param = self.parameters[param_name]
157
+ is_var_param = param.kind in (inspect.Parameter.VAR_POSITIONAL, inspect.Parameter.VAR_KEYWORD)
158
+ if is_var_param:
159
+ continue
160
+ assert param.col_type is not None
161
+
162
+ if arg is None:
163
+ raise excs.Error(f'Parameter {param_name!r}{context}: invalid argument')
164
+
165
+ # Check that the argument is consistent with the expected parameter type, with the allowance that
166
+ # non-nullable parameters can still accept nullable arguments (since in that event, FunctionCall.eval()
167
+ # detects the Nones and skips evaluation).
168
+ if not (
169
+ param.col_type.is_supertype_of(arg.col_type, ignore_nullable=True)
170
+ # TODO: this is a hack to allow JSON columns to be passed to functions that accept scalar
171
+ # types. It's necessary to avoid littering notebooks with `apply(str)` calls or equivalent.
172
+ # (Previously, this wasn't necessary because `is_supertype_of()` was improperly implemented.)
173
+ # We need to think through the right way to handle this scenario.
174
+ or (arg.col_type.is_json_type() and param.col_type.is_scalar_type())
175
+ ):
176
+ raise excs.Error(
177
+ f'Parameter {param_name!r}{context}: argument type {arg.col_type} does not'
178
+ f' match parameter type {param.col_type}'
179
+ )
180
+
150
181
  def __eq__(self, other: object) -> bool:
151
182
  if not isinstance(other, Signature):
152
183
  return False
@@ -199,6 +230,8 @@ class Signature:
199
230
  type_substitutions: Optional[dict] = None,
200
231
  is_cls_method: bool = False,
201
232
  ) -> list[Parameter]:
233
+ from pixeltable import exprs
234
+
202
235
  assert (py_fn is None) != (py_params is None)
203
236
  if py_fn is not None:
204
237
  sig = inspect.signature(py_fn)
@@ -212,7 +245,7 @@ class Signature:
212
245
  if is_cls_method and idx == 0:
213
246
  continue # skip 'self' or 'cls' parameter
214
247
  if param.name in cls.SPECIAL_PARAM_NAMES:
215
- raise excs.Error(f"'{param.name}' is a reserved parameter name")
248
+ raise excs.Error(f'{param.name!r} is a reserved parameter name')
216
249
  if param.kind == inspect.Parameter.VAR_POSITIONAL or param.kind == inspect.Parameter.VAR_KEYWORD:
217
250
  parameters.append(Parameter(param.name, col_type=None, kind=param.kind))
218
251
  continue
@@ -220,7 +253,7 @@ class Signature:
220
253
  # check non-var parameters for name collisions and default value compatibility
221
254
  if param_types is not None:
222
255
  if idx >= len(param_types):
223
- raise excs.Error(f'Missing type for parameter {param.name}')
256
+ raise excs.Error(f'Missing type for parameter {param.name!r}')
224
257
  param_type = param_types[idx]
225
258
  is_batched = False
226
259
  else:
@@ -231,12 +264,14 @@ class Signature:
231
264
  py_type = param.annotation
232
265
  param_type, is_batched = cls._infer_type(py_type)
233
266
  if param_type is None:
234
- raise excs.Error(f'Cannot infer pixeltable type for parameter {param.name}')
267
+ raise excs.Error(f'Cannot infer pixeltable type for parameter {param.name!r}')
268
+
269
+ default = None if param.default is inspect.Parameter.empty else exprs.Expr.from_object(param.default)
270
+ if not (default is None or isinstance(default, exprs.Literal)):
271
+ raise excs.Error(f'Default value for parameter {param.name!r} must be a constant')
235
272
 
236
273
  parameters.append(
237
- Parameter(
238
- param.name, col_type=param_type, kind=param.kind, is_batched=is_batched, default=param.default
239
- )
274
+ Parameter(param.name, col_type=param_type, kind=param.kind, is_batched=is_batched, default=default)
240
275
  )
241
276
 
242
277
  return parameters
pixeltable/func/udf.py CHANGED
@@ -283,7 +283,7 @@ def from_table(
283
283
  result_dict[name] = var
284
284
  # Since this is a data column, it becomes a UDF parameter.
285
285
  # If the column is nullable, then the parameter will have a default value of None.
286
- default_value = None if col.col_type.nullable else inspect.Parameter.empty
286
+ default_value = exprs.Literal(None) if col.col_type.nullable else None
287
287
  param = Parameter(name, col.col_type, inspect._ParameterKind.POSITIONAL_OR_KEYWORD, default_value)
288
288
  params.append(param)
289
289
 
@@ -3,6 +3,7 @@ from pixeltable.utils.code import local_public_names
3
3
  from . import (
4
4
  anthropic,
5
5
  audio,
6
+ deepseek,
6
7
  fireworks,
7
8
  gemini,
8
9
  huggingface,