pixeltable 0.3.4__py3-none-any.whl → 0.3.6__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of pixeltable might be problematic. Click here for more details.

Files changed (63) hide show
  1. pixeltable/__init__.py +1 -0
  2. pixeltable/__version__.py +2 -2
  3. pixeltable/catalog/catalog.py +9 -2
  4. pixeltable/catalog/column.py +1 -1
  5. pixeltable/catalog/dir.py +1 -1
  6. pixeltable/catalog/table.py +3 -1
  7. pixeltable/catalog/table_version.py +12 -2
  8. pixeltable/catalog/table_version_path.py +2 -2
  9. pixeltable/catalog/view.py +64 -20
  10. pixeltable/dataframe.py +11 -6
  11. pixeltable/env.py +12 -0
  12. pixeltable/exec/expr_eval/evaluators.py +4 -2
  13. pixeltable/exec/expr_eval/expr_eval_node.py +4 -1
  14. pixeltable/exprs/comparison.py +8 -4
  15. pixeltable/exprs/data_row.py +9 -7
  16. pixeltable/exprs/expr.py +2 -2
  17. pixeltable/exprs/function_call.py +155 -313
  18. pixeltable/exprs/json_mapper.py +25 -8
  19. pixeltable/exprs/json_path.py +6 -5
  20. pixeltable/exprs/object_ref.py +16 -5
  21. pixeltable/exprs/row_builder.py +10 -3
  22. pixeltable/func/aggregate_function.py +29 -15
  23. pixeltable/func/callable_function.py +11 -8
  24. pixeltable/func/expr_template_function.py +3 -9
  25. pixeltable/func/function.py +148 -74
  26. pixeltable/func/signature.py +65 -30
  27. pixeltable/func/tools.py +26 -26
  28. pixeltable/func/udf.py +1 -1
  29. pixeltable/functions/__init__.py +1 -0
  30. pixeltable/functions/anthropic.py +9 -3
  31. pixeltable/functions/deepseek.py +121 -0
  32. pixeltable/functions/image.py +7 -7
  33. pixeltable/functions/openai.py +30 -13
  34. pixeltable/functions/video.py +14 -7
  35. pixeltable/globals.py +14 -3
  36. pixeltable/index/embedding_index.py +4 -13
  37. pixeltable/io/globals.py +88 -77
  38. pixeltable/io/hf_datasets.py +34 -34
  39. pixeltable/io/pandas.py +75 -76
  40. pixeltable/io/parquet.py +19 -27
  41. pixeltable/io/utils.py +115 -0
  42. pixeltable/iterators/audio.py +2 -1
  43. pixeltable/iterators/video.py +1 -1
  44. pixeltable/metadata/__init__.py +2 -1
  45. pixeltable/metadata/converters/convert_15.py +18 -8
  46. pixeltable/metadata/converters/convert_27.py +31 -0
  47. pixeltable/metadata/converters/convert_28.py +15 -0
  48. pixeltable/metadata/converters/convert_29.py +111 -0
  49. pixeltable/metadata/converters/util.py +12 -1
  50. pixeltable/metadata/notes.py +3 -0
  51. pixeltable/metadata/schema.py +8 -0
  52. pixeltable/share/__init__.py +1 -0
  53. pixeltable/share/packager.py +41 -13
  54. pixeltable/share/publish.py +97 -0
  55. pixeltable/type_system.py +40 -14
  56. pixeltable/utils/__init__.py +41 -0
  57. pixeltable/utils/arrow.py +40 -7
  58. pixeltable/utils/formatter.py +1 -1
  59. {pixeltable-0.3.4.dist-info → pixeltable-0.3.6.dist-info}/METADATA +34 -49
  60. {pixeltable-0.3.4.dist-info → pixeltable-0.3.6.dist-info}/RECORD +63 -57
  61. {pixeltable-0.3.4.dist-info → pixeltable-0.3.6.dist-info}/WHEEL +1 -1
  62. {pixeltable-0.3.4.dist-info → pixeltable-0.3.6.dist-info}/LICENSE +0 -0
  63. {pixeltable-0.3.4.dist-info → pixeltable-0.3.6.dist-info}/entry_points.txt +0 -0
@@ -1,6 +1,6 @@
1
1
  from __future__ import annotations
2
2
 
3
- from typing import Optional
3
+ from typing import TYPE_CHECKING, Optional
4
4
 
5
5
  import sqlalchemy as sql
6
6
 
@@ -11,6 +11,9 @@ from .expr import _GLOBAL_SCOPE, Expr, ExprScope
11
11
  from .row_builder import RowBuilder
12
12
  from .sql_element_cache import SqlElementCache
13
13
 
14
+ if TYPE_CHECKING:
15
+ from .object_ref import ObjectRef
16
+
14
17
 
15
18
  class JsonMapper(Expr):
16
19
  """
@@ -19,6 +22,10 @@ class JsonMapper(Expr):
19
22
  is populated by JsonMapper.eval(). The JsonMapper effectively creates a new scope for its target expr.
20
23
  """
21
24
 
25
+ target_expr_scope: ExprScope
26
+ parent_mapper: Optional[JsonMapper]
27
+ target_expr_eval_ctx: Optional[RowBuilder.EvalCtx]
28
+
22
29
  def __init__(self, src_expr: Expr, target_expr: Expr):
23
30
  # TODO: type spec should be list[target_expr.col_type]
24
31
  super().__init__(ts.JsonType())
@@ -29,12 +36,18 @@ class JsonMapper(Expr):
29
36
 
30
37
  from .object_ref import ObjectRef
31
38
 
32
- scope_anchor = ObjectRef(self.target_expr_scope, self)
33
- self.components = [src_expr, target_expr, scope_anchor]
34
- self.parent_mapper: Optional[JsonMapper] = None
35
- self.target_expr_eval_ctx: Optional[RowBuilder.EvalCtx] = None
39
+ self.components = [src_expr, target_expr]
40
+ self.parent_mapper = None
41
+ self.target_expr_eval_ctx = None
42
+
43
+ # Intentionally create the id now, before adding the scope anchor; this ensures that JsonMappers will
44
+ # be recognized as equal so long as they have the same src_expr and target_expr.
45
+ # TODO: Might this cause problems after certain substitutions?
36
46
  self.id = self._create_id()
37
47
 
48
+ scope_anchor = ObjectRef(self.target_expr_scope, self)
49
+ self.components.append(scope_anchor)
50
+
38
51
  def bind_rel_paths(self, mapper: Optional[JsonMapper] = None) -> None:
39
52
  self._src_expr.bind_rel_paths(mapper)
40
53
  self._target_expr.bind_rel_paths(self)
@@ -84,8 +97,12 @@ class JsonMapper(Expr):
84
97
  return self.components[1]
85
98
 
86
99
  @property
87
- def scope_anchor(self) -> Expr:
88
- return self.components[2]
100
+ def scope_anchor(self) -> 'ObjectRef':
101
+ from .object_ref import ObjectRef
102
+
103
+ result = self.components[2]
104
+ assert isinstance(result, ObjectRef)
105
+ return result
89
106
 
90
107
  def _equals(self, _: JsonMapper) -> bool:
91
108
  return True
@@ -107,7 +124,7 @@ class JsonMapper(Expr):
107
124
  for i, val in enumerate(src):
108
125
  data_row[self.scope_anchor.slot_idx] = val
109
126
  # stored target_expr
110
- row_builder.eval(data_row, self.target_expr_eval_ctx)
127
+ row_builder.eval(data_row, self.target_expr_eval_ctx, force_eval=self._target_expr.scope())
111
128
  result[i] = data_row[self._target_expr.slot_idx]
112
129
  data_row[self.slot_idx] = result
113
130
 
@@ -43,11 +43,11 @@ class JsonPath(Expr):
43
43
  self.id = self._create_id()
44
44
 
45
45
  def __repr__(self) -> str:
46
- # else "R": the anchor is RELATIVE_PATH_ROOT
47
- return (
48
- f'{str(self._anchor) if self._anchor is not None else "R"}'
49
- f'{"." if isinstance(self.path_elements[0], str) else ""}{self._json_path()}'
50
- )
46
+ # else 'R': the anchor is RELATIVE_PATH_ROOT
47
+ anchor_str = str(self._anchor) if self._anchor is not None else 'R'
48
+ if len(self.path_elements) == 0:
49
+ return anchor_str
50
+ return f'{anchor_str}{"." if isinstance(self.path_elements[0], str) else ""}{self._json_path()}'
51
51
 
52
52
  def _as_dict(self) -> dict:
53
53
  path_elements = [[el.start, el.stop, el.step] if isinstance(el, slice) else el for el in self.path_elements]
@@ -158,6 +158,7 @@ class JsonPath(Expr):
158
158
  return ''.join(result)
159
159
 
160
160
  def eval(self, data_row: DataRow, row_builder: RowBuilder) -> None:
161
+ assert self._anchor is not None, self
161
162
  val = data_row[self._anchor.slot_idx]
162
163
  if self.compiled_path is not None:
163
164
  val = self.compiled_path.search(val)
@@ -1,6 +1,6 @@
1
1
  from __future__ import annotations
2
2
 
3
- from typing import Optional
3
+ from typing import Any, Optional
4
4
 
5
5
  import sqlalchemy as sql
6
6
 
@@ -26,14 +26,22 @@ class ObjectRef(Expr):
26
26
  self.owner = owner
27
27
  self.id = self._create_id()
28
28
 
29
+ def _id_attrs(self) -> list[tuple[str, Any]]:
30
+ # We have no components, so we can't rely on the default behavior here (otherwise, all ObjectRef
31
+ # instances will be conflated into a single slot).
32
+ return [('addr', id(self))]
33
+
34
+ def substitute(self, subs: dict[Expr, Expr]) -> Expr:
35
+ # Just return self; we need to avoid creating a new id after doing the substitution, because otherwise
36
+ # we'll wind up in a situation where the scope_anchor of the enclosing JsonMapper is different from the
37
+ # nested ObjectRefs inside its target_expr (and therefore occupies a different slot_idx).
38
+ return self
39
+
29
40
  def scope(self) -> ExprScope:
30
41
  return self._scope
31
42
 
32
- def __str__(self) -> str:
33
- assert False
34
-
35
43
  def _equals(self, other: ObjectRef) -> bool:
36
- return self.owner is other.owner
44
+ return self.id == other.id
37
45
 
38
46
  def sql_expr(self, _: SqlElementCache) -> Optional[sql.ColumnElement]:
39
47
  return None
@@ -41,3 +49,6 @@ class ObjectRef(Expr):
41
49
  def eval(self, data_row: DataRow, row_builder: RowBuilder) -> None:
42
50
  # this will be called, but the value has already been materialized elsewhere
43
51
  pass
52
+
53
+ def __repr__(self) -> str:
54
+ return f'ObjectRef({self.owner}, {self.id}, {self.owner.id})'
@@ -17,7 +17,7 @@ from pixeltable.env import Env
17
17
  from pixeltable.utils.media_store import MediaStore
18
18
 
19
19
  from .data_row import DataRow
20
- from .expr import Expr
20
+ from .expr import Expr, ExprScope
21
21
  from .expr_set import ExprSet
22
22
 
23
23
 
@@ -299,6 +299,7 @@ class RowBuilder:
299
299
  # this is input and therefore doesn't depend on other exprs
300
300
  continue
301
301
  for d in expr.dependencies():
302
+ assert d.slot_idx is not None, f'{expr}, {d}'
302
303
  if d.slot_idx in excluded_slot_idxs:
303
304
  continue
304
305
  dependencies[expr.slot_idx].add(d.slot_idx)
@@ -376,7 +377,12 @@ class RowBuilder:
376
377
  data_row.set_exc(slot_idx, exc)
377
378
 
378
379
  def eval(
379
- self, data_row: DataRow, ctx: EvalCtx, profile: Optional[ExecProfile] = None, ignore_errors: bool = False
380
+ self,
381
+ data_row: DataRow,
382
+ ctx: EvalCtx,
383
+ profile: Optional[ExecProfile] = None,
384
+ ignore_errors: bool = False,
385
+ force_eval: Optional[ExprScope] = None,
380
386
  ) -> None:
381
387
  """
382
388
  Populates the slots in data_row given in ctx.
@@ -384,10 +390,11 @@ class RowBuilder:
384
390
  and omits any of that expr's dependents's eval().
385
391
  profile: if present, populated with execution time of each expr.eval() call; indexed by expr.slot_idx
386
392
  ignore_errors: if False, raises ExprEvalError if any expr.eval() raises an exception
393
+ force_eval: forces exprs in the specified scope to be reevaluated, even if they already have a value
387
394
  """
388
395
  for expr in ctx.exprs:
389
396
  assert expr.slot_idx >= 0
390
- if data_row.has_val[expr.slot_idx] or data_row.has_exc(expr.slot_idx):
397
+ if expr.scope() != force_eval and (data_row.has_val[expr.slot_idx] or data_row.has_exc(expr.slot_idx)):
391
398
  continue
392
399
  try:
393
400
  start_time = time.perf_counter()
@@ -12,7 +12,7 @@ from .globals import validate_symbol_path
12
12
  from .signature import Parameter, Signature
13
13
 
14
14
  if TYPE_CHECKING:
15
- import pixeltable
15
+ from pixeltable import exprs
16
16
 
17
17
 
18
18
  class Aggregator(abc.ABC):
@@ -80,6 +80,8 @@ class AggregateFunction(Function):
80
80
  """Inspects the Aggregator class to infer the corresponding function signature. Returns the
81
81
  inferred signature along with the list of init_param_names (for downstream error handling).
82
82
  """
83
+ from pixeltable import exprs
84
+
83
85
  # infer type parameters; set return_type=InvalidType() because it has no meaning here
84
86
  init_sig = Signature.create(
85
87
  py_fn=cls.__init__, return_type=ts.InvalidType(), is_cls_method=True, type_substitutions=type_substitutions
@@ -102,14 +104,24 @@ class AggregateFunction(Function):
102
104
  py_update_params = list(inspect.signature(cls.update).parameters.values())[1:] # leave out self
103
105
  assert len(py_update_params) == len(update_types)
104
106
  update_params = [
105
- Parameter(p.name, col_type=update_types[i], kind=p.kind, default=p.default)
107
+ Parameter(
108
+ p.name,
109
+ col_type=update_types[i],
110
+ kind=p.kind,
111
+ default=exprs.Expr.from_object(p.default), # type: ignore[arg-type]
112
+ )
106
113
  for i, p in enumerate(py_update_params)
107
114
  ]
108
115
  # starting at 1: leave out self
109
116
  py_init_params = list(inspect.signature(cls.__init__).parameters.values())[1:]
110
117
  assert len(py_init_params) == len(init_types)
111
118
  init_params = [
112
- Parameter(p.name, col_type=init_types[i], kind=inspect.Parameter.KEYWORD_ONLY, default=p.default)
119
+ Parameter(
120
+ p.name,
121
+ col_type=init_types[i],
122
+ kind=inspect.Parameter.KEYWORD_ONLY,
123
+ default=exprs.Expr.from_object(p.default), # type: ignore[arg-type]
124
+ )
113
125
  for i, p in enumerate(py_init_params)
114
126
  ]
115
127
  duplicate_params = set(p.name for p in init_params) & set(p.name for p in update_params)
@@ -157,7 +169,7 @@ class AggregateFunction(Function):
157
169
  res += '\n\n' + inspect.getdoc(self.agg_classes[0].update)
158
170
  return res
159
171
 
160
- def __call__(self, *args: object, **kwargs: object) -> 'pixeltable.exprs.FunctionCall':
172
+ def __call__(self, *args: Any, **kwargs: Any) -> 'exprs.FunctionCall':
161
173
  from pixeltable import exprs
162
174
 
163
175
  # perform semantic analysis of special parameters 'order_by' and 'group_by'
@@ -194,29 +206,31 @@ class AggregateFunction(Function):
194
206
  )
195
207
  group_by_clause = kwargs.pop(self.GROUP_BY_PARAM)
196
208
 
209
+ args = [exprs.Expr.from_object(arg) for arg in args]
210
+ kwargs = {k: exprs.Expr.from_object(v) for k, v in kwargs.items()}
211
+
197
212
  resolved_fn, bound_args = self._bind_to_matching_signature(args, kwargs)
198
- return_type = resolved_fn.call_return_type(args, kwargs)
213
+ return_type = resolved_fn.call_return_type(bound_args)
214
+
199
215
  return exprs.FunctionCall(
200
216
  resolved_fn,
201
- bound_args,
217
+ args,
218
+ kwargs,
202
219
  return_type,
203
220
  order_by_clause=[order_by_clause] if order_by_clause is not None else [],
204
221
  group_by_clause=[group_by_clause] if group_by_clause is not None else [],
205
222
  )
206
223
 
207
- def validate_call(self, bound_args: dict[str, Any]) -> None:
208
- # check that init parameters are not Exprs
209
- # TODO: do this in the planner (check that init parameters are either constants or only refer to grouping exprs)
224
+ def validate_call(self, bound_args: dict[str, 'exprs.Expr']) -> None:
210
225
  from pixeltable import exprs
211
226
 
212
- assert not self.is_polymorphic
227
+ super().validate_call(bound_args)
213
228
 
229
+ # check that init parameters are not Exprs
230
+ # TODO: do this in the planner (check that init parameters are either constants or only refer to grouping exprs)
214
231
  for param_name in self.init_param_names[0]:
215
- if param_name in bound_args and isinstance(bound_args[param_name], exprs.Expr):
216
- raise excs.Error(
217
- f'{self.display_name}(): init() parameter {param_name} needs to be a constant, not a Pixeltable '
218
- f'expression'
219
- )
232
+ if param_name in bound_args and not isinstance(bound_args[param_name], exprs.Literal):
233
+ raise excs.Error(f'{self.display_name}(): init() parameter {param_name!r} must be a constant value')
220
234
 
221
235
  def __repr__(self) -> str:
222
236
  return f'<Pixeltable Aggregator {self.name}>'
@@ -2,7 +2,7 @@ from __future__ import annotations
2
2
 
3
3
  import asyncio
4
4
  import inspect
5
- from typing import Any, Callable, Optional, Sequence
5
+ from typing import TYPE_CHECKING, Any, Callable, Optional, Sequence
6
6
  from uuid import UUID
7
7
 
8
8
  import cloudpickle # type: ignore[import-untyped]
@@ -12,6 +12,9 @@ import pixeltable.exceptions as excs
12
12
  from .function import Function
13
13
  from .signature import Signature
14
14
 
15
+ if TYPE_CHECKING:
16
+ from pixeltable import exprs
17
+
15
18
 
16
19
  class CallableFunction(Function):
17
20
  """Pixeltable Function backed by a Python Callable.
@@ -192,18 +195,18 @@ class CallableFunction(Function):
192
195
  batch_size = md['batch_size']
193
196
  return CallableFunction([sig], [py_fn], self_name=name, batch_size=batch_size)
194
197
 
195
- def validate_call(self, bound_args: dict[str, Any]) -> None:
198
+ def validate_call(self, bound_args: dict[str, 'exprs.Expr']) -> None:
196
199
  from pixeltable import exprs
197
200
 
198
- assert not self.is_polymorphic
201
+ super().validate_call(bound_args)
199
202
  if self.is_batched:
200
203
  signature = self.signatures[0]
201
204
  for param in signature.constant_parameters:
202
- if param.name in bound_args and isinstance(bound_args[param.name], exprs.Expr):
203
- raise ValueError(
204
- f'{self.display_name}(): '
205
- f'parameter {param.name} must be a constant value, not a Pixeltable expression'
206
- )
205
+ # Check that constant parameters map to constant arguments. It's ok for the argument to be a Variable,
206
+ # since in that case the FunctionCall is part of an unresolved template; the check will be done again
207
+ # when the template is fully resolved.
208
+ if param.name in bound_args and not isinstance(bound_args[param.name], (exprs.Literal, exprs.Variable)):
209
+ raise ValueError(f'{self.display_name}(): parameter {param.name} must be a constant value')
207
210
 
208
211
  def __repr__(self) -> str:
209
212
  return f'<Pixeltable UDF {self.name}>'
@@ -32,17 +32,11 @@ class ExprTemplate:
32
32
  assert var.name in self.param_exprs, f"Variable '{var.name}' in expression is not a parameter"
33
33
 
34
34
  # verify default values
35
- self.defaults: dict[str, exprs.Literal] = {} # key: param name, value: default value converted to a Literal
35
+ self.defaults: dict[str, exprs.Literal] = {}
36
36
  for param in self.signature.parameters.values():
37
- if param.default is inspect.Parameter.empty:
37
+ if param.default is None:
38
38
  continue
39
- param_expr = self.param_exprs[param.name]
40
- try:
41
- literal_default = exprs.Literal(param.default, col_type=param_expr.col_type)
42
- self.defaults[param.name] = literal_default
43
- except TypeError as e:
44
- msg = str(e)
45
- raise excs.Error(f"Default value for parameter '{param.name}': {msg[0].lower() + msg[1:]}")
39
+ self.defaults[param.name] = param.default
46
40
 
47
41
 
48
42
  class ExprTemplateFunction(Function):
@@ -2,6 +2,7 @@ from __future__ import annotations
2
2
 
3
3
  import importlib
4
4
  import inspect
5
+ import typing
5
6
  from abc import ABC, abstractmethod
6
7
  from copy import copy
7
8
  from typing import TYPE_CHECKING, Any, Callable, Optional, Sequence, cast
@@ -17,6 +18,8 @@ from .globals import resolve_symbol
17
18
  from .signature import Signature
18
19
 
19
20
  if TYPE_CHECKING:
21
+ from pixeltable import exprs
22
+
20
23
  from .expr_template_function import ExprTemplate, ExprTemplateFunction
21
24
 
22
25
 
@@ -152,9 +155,22 @@ class Function(ABC):
152
155
  def __call__(self, *args: Any, **kwargs: Any) -> 'pxt.exprs.FunctionCall':
153
156
  from pixeltable import exprs
154
157
 
158
+ args = [exprs.Expr.from_object(arg) for arg in args]
159
+ kwargs = {k: exprs.Expr.from_object(v) for k, v in kwargs.items()}
160
+
161
+ for i, expr in enumerate(args):
162
+ if expr is None:
163
+ raise excs.Error(f'Argument {i + 1} in call to {self.self_path!r} is not a valid Pixeltable expression')
164
+ for param_name, expr in kwargs.items():
165
+ if expr is None:
166
+ raise excs.Error(
167
+ f'Argument {param_name!r} in call to {self.self_path!r} is not a valid Pixeltable expression'
168
+ )
169
+
155
170
  resolved_fn, bound_args = self._bind_to_matching_signature(args, kwargs)
156
- return_type = resolved_fn.call_return_type(args, kwargs)
157
- return exprs.FunctionCall(resolved_fn, bound_args, return_type)
171
+ return_type = resolved_fn.call_return_type(bound_args)
172
+
173
+ return exprs.FunctionCall(resolved_fn, args, kwargs, return_type)
158
174
 
159
175
  def _bind_to_matching_signature(self, args: Sequence[Any], kwargs: dict[str, Any]) -> tuple[Self, dict[str, Any]]:
160
176
  result: int = -1
@@ -185,49 +201,115 @@ class Function(ABC):
185
201
 
186
202
  signature = self.signatures[signature_idx]
187
203
  bound_args = signature.py_signature.bind(*args, **kwargs).arguments
188
- self._resolved_fns[signature_idx].validate_call(bound_args)
189
- exprs.FunctionCall.normalize_args(self.name, signature, bound_args)
190
- return bound_args
204
+ normalized_args = {k: exprs.Expr.from_object(v) for k, v in bound_args.items()}
205
+ self._resolved_fns[signature_idx].validate_call(normalized_args)
206
+ return normalized_args
191
207
 
192
- def validate_call(self, bound_args: dict[str, Any]) -> None:
208
+ def validate_call(self, bound_args: dict[str, Optional['exprs.Expr']]) -> None:
193
209
  """Override this to do custom validation of the arguments"""
194
210
  assert not self.is_polymorphic
211
+ self.signature.validate_args(bound_args, context=f'in function {self.name!r}')
195
212
 
196
- def _get_callable_args(self, callable: Callable, kwargs: dict[str, Any]) -> dict[str, Any]:
197
- """Return the kwargs to pass to callable, given kwargs passed to this function"""
198
- bound_args = self.signature.py_signature.bind(**kwargs).arguments
199
- # add defaults to bound_args, if not already present
200
- bound_args.update(
201
- {
202
- name: param.default
203
- for name, param in self.signature.parameters.items()
204
- if name not in bound_args and param.has_default()
205
- }
206
- )
207
- result: dict[str, Any] = {}
208
- sig = inspect.signature(callable)
209
- for param in sig.parameters.values():
210
- if param.name in bound_args:
211
- result[param.name] = bound_args[param.name]
212
- return result
213
-
214
- def call_resource_pool(self, kwargs: dict[str, Any]) -> str:
213
+ def call_resource_pool(self, bound_args: dict[str, 'exprs.Expr']) -> str:
215
214
  """Return the resource pool to use for calling this function with the given arguments"""
216
- kw_args = self._get_callable_args(self._resource_pool, kwargs)
217
- return self._resource_pool(**kw_args)
218
-
219
- def call_return_type(self, args: Sequence[Any], kwargs: dict[str, Any]) -> ts.ColumnType:
215
+ rp_kwargs = self._assemble_callable_args(self._resource_pool, bound_args)
216
+ if rp_kwargs is None:
217
+ # TODO: What to do in this case? An example where this can happen is if model_id is not a constant
218
+ # in a call to one of the OpenAI endpoints.
219
+ raise excs.Error('Could not determine resource pool')
220
+ return self._resource_pool(**rp_kwargs)
221
+
222
+ def call_return_type(self, bound_args: dict[str, 'exprs.Expr']) -> ts.ColumnType:
220
223
  """Return the type of the value returned by calling this function with the given arguments"""
221
- assert not self.is_polymorphic
222
224
  if self._conditional_return_type is None:
223
- return self.signature.return_type
224
- bound_args = self.signature.py_signature.bind(*args, **kwargs).arguments
225
- kw_args: dict[str, Any] = {}
226
- sig = inspect.signature(self._conditional_return_type)
227
- for param in sig.parameters.values():
225
+ # No conditional return type specified; use the default return type
226
+ return_type = self.signature.return_type
227
+ else:
228
+ crt_kwargs = self._assemble_callable_args(self._conditional_return_type, bound_args)
229
+ if crt_kwargs is None:
230
+ # A conditional return type is specified, but one of its arguments is not a constant.
231
+ # Use the default return type
232
+ return_type = self.signature.return_type
233
+ else:
234
+ # A conditional return type is specified and all its arguments are constants; use the specific
235
+ # call return type
236
+ return_type = self._conditional_return_type(**crt_kwargs)
237
+
238
+ if return_type.nullable:
239
+ return return_type
240
+
241
+ # If `return_type` is non-nullable, but the function call has a nullable input to any of its non-nullable
242
+ # parameters, then we need to make it nullable. This is because Pixeltable defaults a function output to
243
+ # `None` when any of its non-nullable inputs are `None`.
244
+ for arg_name, arg in bound_args.items():
245
+ param = self.signature.parameters[arg_name]
246
+ if param.kind in (inspect.Parameter.VAR_POSITIONAL, inspect.Parameter.VAR_KEYWORD):
247
+ continue
248
+ if arg.col_type.nullable and not param.col_type.nullable:
249
+ return_type = return_type.copy(nullable=True)
250
+ break
251
+
252
+ return return_type
253
+
254
+ def _assemble_callable_args(
255
+ self, callable: Callable, bound_args: dict[str, 'exprs.Expr']
256
+ ) -> Optional[dict[str, Any]]:
257
+ """
258
+ Return the kwargs to pass to callable, given bound_args passed to this function.
259
+
260
+ This is used by `conditional_return_type` and `get_resource_pool` to determine call-specific characteristics
261
+ of this function.
262
+
263
+ In both cases, the specified `Callable` takes a subset of the parameters of this Function, which may
264
+ be typed as either `Expr`s or Python values. Any parameters typed as Python values expect to see constants
265
+ (Literals); if the corresponding entries in `bound_args` are not constants, then the return value is None.
266
+ """
267
+ from pixeltable import exprs
268
+
269
+ assert not self.is_polymorphic
270
+
271
+ callable_signature = inspect.signature(callable)
272
+ callable_type_hints = typing.get_type_hints(callable)
273
+ callable_args: dict[str, Any] = {}
274
+
275
+ for param in callable_signature.parameters.values():
276
+ assert param.name in self.signature.parameters
277
+
278
+ arg: exprs.Expr
228
279
  if param.name in bound_args:
229
- kw_args[param.name] = bound_args[param.name]
230
- return self._conditional_return_type(**kw_args)
280
+ arg = bound_args[param.name]
281
+ elif self.signature.parameters[param.name].has_default():
282
+ arg = self.signature.parameters[param.name].default
283
+ else:
284
+ # This parameter is missing from bound_args and has no default value, so return None.
285
+ return None
286
+ assert isinstance(arg, exprs.Expr)
287
+
288
+ expects_expr: Optional[type[exprs.Expr]] = None
289
+ type_hint = callable_type_hints.get(param.name)
290
+ if typing.get_origin(type_hint) is not None:
291
+ type_hint = typing.get_origin(type_hint) # Remove type subscript if one exists
292
+ if isinstance(type_hint, type) and issubclass(type_hint, exprs.Expr):
293
+ # The callable expects an Expr for this parameter. We allow for the case where the
294
+ # callable requests a specific subtype of Expr.
295
+ expects_expr = type_hint
296
+
297
+ if expects_expr is not None:
298
+ # The callable is expecting `param.name` to be an Expr. Validate that it's of the appropriate type;
299
+ # otherwise return None.
300
+ if isinstance(arg, expects_expr):
301
+ callable_args[param.name] = arg
302
+ else:
303
+ return None
304
+ else:
305
+ # The callable is expecting `param.name` to be a constant Python value. Unpack a Literal if we find
306
+ # one; otherwise return None.
307
+ if isinstance(arg, exprs.Literal):
308
+ callable_args[param.name] = arg.val
309
+ else:
310
+ return None
311
+
312
+ return callable_args
231
313
 
232
314
  def conditional_return_type(self, fn: Callable[..., ts.ColumnType]) -> Callable[..., ts.ColumnType]:
233
315
  """Instance decorator for specifying a conditional return type for this function"""
@@ -280,18 +362,38 @@ class Function(ABC):
280
362
  raise excs.Error(f'Unknown parameter: {k}')
281
363
  param = self.signature.parameters[k]
282
364
  expr = exprs.Expr.from_object(v)
365
+ if not isinstance(expr, exprs.Literal):
366
+ raise excs.Error(f'Expected a constant value for parameter {k!r} in call to .using()')
283
367
  if not param.col_type.is_supertype_of(expr.col_type):
284
- raise excs.Error(f'Expected type `{param.col_type}` for parameter `{k}`; got `{expr.col_type}`')
285
- bindings[k] = v # Use the original value, not the Expr (The Expr is only for validation)
368
+ raise excs.Error(f'Expected type `{param.col_type}` for parameter {k!r}; got `{expr.col_type}`')
369
+ bindings[k] = expr
286
370
 
287
371
  residual_params = [p for p in self.signature.parameters.values() if p.name not in bindings]
288
372
 
289
- # Bind each remaining parameter to a like-named variable
290
- for param in residual_params:
291
- bindings[param.name] = exprs.Variable(param.name, param.col_type)
292
-
293
- return_type = self.call_return_type([], bindings)
294
- call = exprs.FunctionCall(self, bindings, return_type)
373
+ # Bind each remaining parameter to a like-named variable.
374
+ # Also construct the call arguments for the template function call. Variables become args when possible;
375
+ # otherwise, they are passed as kwargs.
376
+ template_args: list[exprs.Expr] = []
377
+ template_kwargs: dict[str, exprs.Expr] = {}
378
+ args_ok = True
379
+ for name, param in self.signature.parameters.items():
380
+ if name in bindings:
381
+ template_kwargs[name] = bindings[name]
382
+ args_ok = False
383
+ else:
384
+ var = exprs.Variable(name, param.col_type)
385
+ bindings[name] = var
386
+ if args_ok and param.kind in (
387
+ inspect.Parameter.POSITIONAL_ONLY,
388
+ inspect.Parameter.POSITIONAL_OR_KEYWORD,
389
+ ):
390
+ template_args.append(var)
391
+ else:
392
+ template_kwargs[name] = var
393
+ args_ok = False
394
+
395
+ return_type = self.call_return_type(bindings)
396
+ call = exprs.FunctionCall(self, template_args, template_kwargs, return_type)
295
397
 
296
398
  # Construct the (n-k)-ary signature of the new function. We use `call.col_type` for this, rather than
297
399
  # `self.signature.return_type`, because the return type of the new function may be specialized via a
@@ -370,35 +472,7 @@ class Function(ABC):
370
472
  assert 'signature' in d and d['signature'] is not None
371
473
  instance = resolve_symbol(d['path'])
372
474
  assert isinstance(instance, Function)
373
-
374
- # Load the signature from the DB and check that it is still valid (i.e., is still consistent with a signature
375
- # in the code).
376
- signature = Signature.from_dict(d['signature'])
377
- idx = instance.__find_matching_overload(signature)
378
- if idx is None:
379
- # No match; generate an informative error message.
380
- signature_note_str = 'any of its signatures' if instance.is_polymorphic else 'its signature as'
381
- instance_signature_str = (
382
- f'{len(instance.signatures)} signatures' if instance.is_polymorphic else str(instance.signature)
383
- )
384
- # TODO: Handle this more gracefully (instead of failing the DB load, allow the DB load to succeed, but
385
- # mark any enclosing FunctionCall as unusable). It's the same issue as dealing with a renamed UDF or
386
- # FunctionCall return type mismatch.
387
- raise excs.Error(
388
- f'The signature stored in the database for the UDF `{instance.self_path}` no longer matches '
389
- f'{signature_note_str} as currently defined in the code.\nThis probably means that the code for '
390
- f'`{instance.self_path}` has changed in a backward-incompatible way.\n'
391
- f'Signature in database: {signature}\n'
392
- f'Signature in code: {instance_signature_str}'
393
- )
394
- # We found a match; specialize to the appropriate overload resolution (non-polymorphic form) and return that.
395
- return instance._resolved_fns[idx]
396
-
397
- def __find_matching_overload(self, sig: Signature) -> Optional[int]:
398
- for idx, overload_sig in enumerate(self.signatures):
399
- if sig.is_consistent_with(overload_sig):
400
- return idx
401
- return None
475
+ return instance
402
476
 
403
477
  def to_store(self) -> tuple[dict, bytes]:
404
478
  """