pixeltable 0.3.3__py3-none-any.whl → 0.3.5__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of pixeltable might be problematic. Click here for more details.

Files changed (60) hide show
  1. pixeltable/__init__.py +1 -0
  2. pixeltable/__version__.py +2 -2
  3. pixeltable/catalog/catalog.py +9 -2
  4. pixeltable/catalog/column.py +1 -1
  5. pixeltable/catalog/dir.py +1 -1
  6. pixeltable/catalog/table.py +1 -1
  7. pixeltable/catalog/table_version.py +12 -2
  8. pixeltable/catalog/table_version_path.py +2 -2
  9. pixeltable/catalog/view.py +64 -20
  10. pixeltable/dataframe.py +14 -14
  11. pixeltable/env.py +20 -3
  12. pixeltable/exec/component_iteration_node.py +1 -2
  13. pixeltable/exec/expr_eval/evaluators.py +4 -2
  14. pixeltable/exec/expr_eval/expr_eval_node.py +4 -1
  15. pixeltable/exprs/comparison.py +8 -4
  16. pixeltable/exprs/data_row.py +5 -3
  17. pixeltable/exprs/expr.py +9 -2
  18. pixeltable/exprs/function_call.py +155 -313
  19. pixeltable/func/aggregate_function.py +29 -15
  20. pixeltable/func/callable_function.py +11 -8
  21. pixeltable/func/expr_template_function.py +3 -9
  22. pixeltable/func/function.py +148 -74
  23. pixeltable/func/signature.py +65 -30
  24. pixeltable/func/udf.py +1 -1
  25. pixeltable/functions/__init__.py +1 -0
  26. pixeltable/functions/deepseek.py +121 -0
  27. pixeltable/functions/image.py +7 -7
  28. pixeltable/functions/openai.py +49 -10
  29. pixeltable/functions/video.py +14 -7
  30. pixeltable/globals.py +14 -3
  31. pixeltable/index/embedding_index.py +4 -13
  32. pixeltable/io/globals.py +88 -77
  33. pixeltable/io/hf_datasets.py +34 -34
  34. pixeltable/io/pandas.py +75 -87
  35. pixeltable/io/parquet.py +19 -27
  36. pixeltable/io/utils.py +115 -0
  37. pixeltable/iterators/audio.py +2 -1
  38. pixeltable/iterators/video.py +1 -1
  39. pixeltable/metadata/__init__.py +2 -1
  40. pixeltable/metadata/converters/convert_15.py +18 -8
  41. pixeltable/metadata/converters/convert_27.py +31 -0
  42. pixeltable/metadata/converters/convert_28.py +15 -0
  43. pixeltable/metadata/converters/convert_29.py +111 -0
  44. pixeltable/metadata/converters/util.py +12 -1
  45. pixeltable/metadata/notes.py +3 -0
  46. pixeltable/metadata/schema.py +8 -0
  47. pixeltable/share/__init__.py +1 -0
  48. pixeltable/share/packager.py +246 -0
  49. pixeltable/share/publish.py +97 -0
  50. pixeltable/type_system.py +87 -42
  51. pixeltable/utils/__init__.py +41 -0
  52. pixeltable/utils/arrow.py +45 -12
  53. pixeltable/utils/formatter.py +1 -1
  54. pixeltable/utils/iceberg.py +14 -0
  55. pixeltable/utils/media_store.py +1 -1
  56. {pixeltable-0.3.3.dist-info → pixeltable-0.3.5.dist-info}/METADATA +37 -50
  57. {pixeltable-0.3.3.dist-info → pixeltable-0.3.5.dist-info}/RECORD +60 -51
  58. {pixeltable-0.3.3.dist-info → pixeltable-0.3.5.dist-info}/WHEEL +1 -1
  59. {pixeltable-0.3.3.dist-info → pixeltable-0.3.5.dist-info}/LICENSE +0 -0
  60. {pixeltable-0.3.3.dist-info → pixeltable-0.3.5.dist-info}/entry_points.txt +0 -0
@@ -1,9 +1,8 @@
1
1
  from __future__ import annotations
2
2
 
3
3
  import inspect
4
- import json
5
4
  import sys
6
- from typing import Any, Optional, Sequence
5
+ from typing import Any, Optional, Sequence, Union
7
6
 
8
7
  import sqlalchemy as sql
9
8
 
@@ -14,7 +13,6 @@ import pixeltable.type_system as ts
14
13
 
15
14
  from .data_row import DataRow
16
15
  from .expr import Expr
17
- from .inline_expr import InlineDict, InlineList
18
16
  from .literal import Literal
19
17
  from .row_builder import RowBuilder
20
18
  from .rowid_ref import RowidRef
@@ -27,135 +25,86 @@ class FunctionCall(Expr):
27
25
  agg_init_args: dict[str, Any]
28
26
  resource_pool: Optional[str]
29
27
 
30
- # tuple[Optional[int], Optional[Any]]:
31
- # - for Exprs: (index into components, None)
32
- # - otherwise: (None, val)
33
- args: list[tuple[Optional[int], Optional[Any]]]
34
- kwargs: dict[str, tuple[Optional[int], Optional[Any]]]
28
+ # These collections hold the component indices corresponding to the args and kwargs
29
+ # that were passed to the FunctionCall. They're 1:1 with the original call pattern.
30
+ arg_idxs: list[int]
31
+ kwarg_idxs: dict[str, int]
35
32
 
36
- # maps each parameter name to tuple representing the value it has in the call:
37
- # - argument's index in components, if an argument is given in the call
38
- # - default value, if no argument given in the call
39
- # (in essence, this combines init()'s bound_args and default values)
40
- _param_values: dict[str, tuple[Optional[int], Optional[Any]]]
33
+ # A "bound" version of the FunctionCall arguments, mapping each specified parameter name
34
+ # to one of three types of bindings:
35
+ # - a component index, if the parameter is a non-variadic parameter
36
+ # - a list of component indices, if the parameter is a variadic positional parameter
37
+ # - a dict mapping keyword names to component indices, if the parameter is a variadic keyword parameter
38
+ bound_idxs: dict[str, Union[int, list[int], dict[str, int]]]
41
39
 
42
- arg_types: list[ts.ColumnType]
43
- kwarg_types: dict[str, ts.ColumnType]
44
40
  return_type: ts.ColumnType
45
41
  group_by_start_idx: int
46
42
  group_by_stop_idx: int
47
43
  fn_expr_idx: int
48
44
  order_by_start_idx: int
49
- constant_args: set[str]
50
45
  aggregator: Optional[Any]
51
46
  current_partition_vals: Optional[list[Any]]
52
47
 
53
48
  def __init__(
54
49
  self,
55
50
  fn: func.Function,
56
- bound_args: dict[str, Any],
51
+ args: list[Expr],
52
+ kwargs: dict[str, Expr],
57
53
  return_type: ts.ColumnType,
58
54
  order_by_clause: Optional[list[Any]] = None,
59
55
  group_by_clause: Optional[list[Any]] = None,
60
56
  is_method_call: bool = False,
61
57
  ):
58
+ assert not fn.is_polymorphic
59
+ assert all(isinstance(arg, Expr) for arg in args)
60
+ assert all(isinstance(arg, Expr) for arg in kwargs.values())
61
+
62
62
  if order_by_clause is None:
63
63
  order_by_clause = []
64
64
  if group_by_clause is None:
65
65
  group_by_clause = []
66
66
 
67
- assert not fn.is_polymorphic
67
+ super().__init__(return_type)
68
68
 
69
69
  self.fn = fn
70
- self.is_method_call = is_method_call
71
- # self.normalize_args(fn.name, signature, bound_args)
72
- self.resource_pool = fn.call_resource_pool(bound_args)
73
- signature = fn.signature
74
-
75
- # If `return_type` is non-nullable, but the function call has a nullable input to any of its non-nullable
76
- # parameters, then we need to make it nullable. This is because Pixeltable defaults a function output to
77
- # `None` when any of its non-nullable inputs are `None`.
78
- for arg_name, arg in bound_args.items():
79
- param = signature.parameters[arg_name]
80
- if (
81
- param.col_type is not None
82
- and not param.col_type.nullable
83
- and isinstance(arg, Expr)
84
- and arg.col_type.nullable
85
- ):
86
- return_type = return_type.copy(nullable=True)
87
- break
88
-
89
70
  self.return_type = return_type
71
+ self.is_method_call = is_method_call
90
72
 
91
- super().__init__(return_type)
73
+ # Build the components list from the specified args and kwargs, and note the component_idx of each argument.
74
+ self.components.extend(arg.copy() for arg in args)
75
+ self.arg_idxs = list(range(len(self.components)))
76
+ self.components.extend(arg.copy() for arg in kwargs.values())
77
+ self.kwarg_idxs = {name: i + len(args) for i, name in enumerate(kwargs.keys())}
78
+
79
+ # Now generate bound_idxs for the args and kwargs indices.
80
+ # This is guaranteed to work, because at this point the call has already been validated.
81
+ # These will be used later to dereference specific parameter values.
82
+ bindings = fn.signature.py_signature.bind(*self.arg_idxs, **self.kwarg_idxs)
83
+ self.bound_idxs = bindings.arguments
84
+
85
+ # Separately generate bound_args for purposes of determining the resource pool.
86
+ bindings = fn.signature.py_signature.bind(*args, **kwargs)
87
+ bound_args = bindings.arguments
88
+ self.resource_pool = fn.call_resource_pool(bound_args)
92
89
 
93
90
  self.agg_init_args = {}
94
91
  if self.is_agg_fn_call:
95
- # we separate out the init args for the aggregator
92
+ # We separate out the init args for the aggregator. Unpack Literals in init args.
96
93
  assert isinstance(fn, func.AggregateFunction)
97
- self.agg_init_args = {
98
- arg_name: arg for arg_name, arg in bound_args.items() if arg_name in fn.init_param_names[0]
99
- }
100
- bound_args = {
101
- arg_name: arg for arg_name, arg in bound_args.items() if arg_name not in fn.init_param_names[0]
102
- }
103
-
104
- # construct components, args, kwargs
105
- self.args = []
106
- self.kwargs = {}
107
- self._param_values = {}
108
-
109
- # we record the types of non-variable parameters for runtime type checks
110
- self.arg_types = []
111
- self.kwarg_types = {}
112
-
113
- # the prefix of parameters that are bound can be passed by position
114
- processed_args: set[str] = set()
115
- for py_param in signature.py_signature.parameters.values():
116
- if py_param.name not in bound_args or py_param.kind == inspect.Parameter.KEYWORD_ONLY:
117
- break
118
- arg = bound_args[py_param.name]
119
- if isinstance(arg, Expr):
120
- self.args.append((len(self.components), None))
121
- self._param_values[py_param.name] = (len(self.components), None)
122
- self.components.append(arg.copy())
123
- else:
124
- self.args.append((None, arg))
125
- self._param_values[py_param.name] = (None, arg)
126
- if py_param.kind != inspect.Parameter.VAR_POSITIONAL and py_param.kind != inspect.Parameter.VAR_KEYWORD:
127
- self.arg_types.append(signature.parameters[py_param.name].col_type)
128
- processed_args.add(py_param.name)
129
-
130
- # the remaining args are passed as keywords
131
- for param_name in bound_args.keys():
132
- if param_name not in processed_args:
133
- arg = bound_args[param_name]
134
- if isinstance(arg, Expr):
135
- self.kwargs[param_name] = (len(self.components), None)
136
- self._param_values[param_name] = (len(self.components), None)
137
- self.components.append(arg.copy())
138
- else:
139
- self.kwargs[param_name] = (None, arg)
140
- self._param_values[param_name] = (None, arg)
141
- if signature.py_signature.parameters[param_name].kind != inspect.Parameter.VAR_KEYWORD:
142
- self.kwarg_types[param_name] = signature.parameters[param_name].col_type
143
-
144
- # fill in default values for parameters that don't have explicit arguments
145
- for param in fn.signature.parameters.values():
146
- if param.name not in self._param_values:
147
- self._param_values[param.name] = (
148
- (None, None) if param.default is inspect.Parameter.empty else (None, param.default)
149
- )
94
+ for arg_name, arg in bound_args.items():
95
+ if arg_name in fn.init_param_names[0]:
96
+ assert isinstance(arg, Literal) # This was checked during validate_call
97
+ self.agg_init_args[arg_name] = arg.val
150
98
 
151
99
  # window function state:
152
100
  # self.components[self.group_by_start_idx:self.group_by_stop_idx] contains group_by exprs
153
101
  self.group_by_start_idx, self.group_by_stop_idx = 0, 0
154
102
  if len(group_by_clause) > 0:
155
103
  if isinstance(group_by_clause[0], catalog.Table):
104
+ assert len(group_by_clause) == 1
156
105
  group_by_exprs = self._create_rowid_refs(group_by_clause[0])
157
106
  else:
158
- assert isinstance(group_by_clause[0], Expr)
107
+ assert all(isinstance(expr, Expr) for expr in group_by_clause)
159
108
  group_by_exprs = group_by_clause
160
109
  # record grouping exprs in self.components, we need to evaluate them to get partition vals
161
110
  self.group_by_start_idx = len(self.components)
@@ -164,9 +113,9 @@ class FunctionCall(Expr):
164
113
 
165
114
  if isinstance(self.fn, func.ExprTemplateFunction):
166
115
  # we instantiate the template to create an Expr that can be evaluated and record that as a component
167
- fn_expr = self.fn.instantiate([], bound_args)
116
+ fn_expr = self.fn.instantiate(args, kwargs)
117
+ self.fn_expr_idx = len(self.components)
168
118
  self.components.append(fn_expr)
169
- self.fn_expr_idx = len(self.components) - 1
170
119
  else:
171
120
  self.fn_expr_idx = sys.maxsize
172
121
 
@@ -180,7 +129,6 @@ class FunctionCall(Expr):
180
129
  self.order_by_start_idx = len(self.components)
181
130
  self.components.extend(order_by_clause)
182
131
 
183
- self.constant_args = {param_name for param_name, arg in bound_args.items() if not isinstance(arg, Expr)}
184
132
  # execution state for aggregate functions
185
133
  self.aggregator = None
186
134
  self.current_partition_vals = None
@@ -194,84 +142,13 @@ class FunctionCall(Expr):
194
142
  def default_column_name(self) -> Optional[str]:
195
143
  return self.fn.name
196
144
 
197
- @classmethod
198
- def normalize_args(cls, fn_name: str, signature: func.Signature, bound_args: dict[str, Any]) -> None:
199
- """Converts args to Exprs where appropriate and checks that they are compatible with signature.
200
-
201
- Updates bound_args in place, where necessary.
202
- """
203
- for param_name, arg in bound_args.items():
204
- param = signature.parameters[param_name]
205
- is_var_param = param.kind in (inspect.Parameter.VAR_POSITIONAL, inspect.Parameter.VAR_KEYWORD)
206
-
207
- if isinstance(arg, dict):
208
- try:
209
- arg = InlineDict(arg)
210
- bound_args[param_name] = arg
211
- continue
212
- except excs.Error:
213
- # this didn't work, but it might be a literal
214
- pass
215
-
216
- if isinstance(arg, list) or isinstance(arg, tuple):
217
- try:
218
- arg = InlineList(arg)
219
- bound_args[param_name] = arg
220
- continue
221
- except excs.Error:
222
- # this didn't work, but it might be a literal
223
- pass
224
-
225
- if not isinstance(arg, Expr):
226
- if arg is not None:
227
- try:
228
- param_type = param.col_type
229
- bound_args[param_name] = param_type.create_literal(arg)
230
- except TypeError as e:
231
- msg = str(e)
232
- raise excs.Error(f'Argument for parameter {param_name!r}: {msg[0].lower() + msg[1:]}')
233
- continue
234
-
235
- # these checks break the db migration test, because InlineArray isn't serialized correctly (it looses
236
- # the type information)
237
- # if is_var_param:
238
- # if param.kind == inspect.Parameter.VAR_POSITIONAL:
239
- # if not isinstance(arg, InlineArray) or not arg.col_type.is_json_type():
240
- # pass
241
- # assert isinstance(arg, InlineArray), type(arg)
242
- # assert arg.col_type.is_json_type()
243
- # if param.kind == inspect.Parameter.VAR_KEYWORD:
244
- # if not isinstance(arg, InlineDict):
245
- # pass
246
- # assert isinstance(arg, InlineDict), type(arg)
247
- if is_var_param:
248
- pass
249
- else:
250
- assert param.col_type is not None
251
- # Check that the argument is consistent with the expected parameter type, with the allowance that
252
- # non-nullable parameters can still accept nullable arguments (since function calls with Nones
253
- # assigned to non-nullable parameters will always return None)
254
- if not (
255
- param.col_type.is_supertype_of(arg.col_type, ignore_nullable=True)
256
- # TODO: this is a hack to allow JSON columns to be passed to functions that accept scalar
257
- # types. It's necessary to avoid littering notebooks with `apply(str)` calls or equivalent.
258
- # (Previously, this wasn't necessary because `is_supertype_of()` was improperly implemented.)
259
- # We need to think through the right way to handle this scenario.
260
- or (arg.col_type.is_json_type() and param.col_type.is_scalar_type())
261
- ):
262
- raise excs.Error(
263
- f'Parameter {param_name} (in function {fn_name}): argument type {arg.col_type} does not match parameter type '
264
- f'{param.col_type}'
265
- )
266
-
267
145
  def _equals(self, other: FunctionCall) -> bool:
268
146
  if self.fn != other.fn:
269
147
  return False
270
- if len(self.args) != len(other.args):
148
+ if self.arg_idxs != other.arg_idxs:
149
+ return False
150
+ if self.kwarg_idxs != other.kwarg_idxs:
271
151
  return False
272
- for i in range(len(self.args)):
273
- if self.args[i] != other.args[i]:
274
- return False
275
152
  if self.group_by_start_idx != other.group_by_start_idx:
276
153
  return False
277
154
  if self.group_by_stop_idx != other.group_by_stop_idx:
@@ -283,11 +160,12 @@ class FunctionCall(Expr):
283
160
  def _id_attrs(self) -> list[tuple[str, Any]]:
284
161
  return super()._id_attrs() + [
285
162
  ('fn', id(self.fn)), # use the function pointer, not the fqn, which isn't set for lambdas
286
- ('args', self.args),
287
- ('kwargs', self.kwargs),
163
+ ('args', self.arg_idxs),
164
+ ('kwargs', self.kwarg_idxs),
288
165
  ('group_by_start_idx', self.group_by_start_idx),
289
166
  ('group_by_stop_idx', self.group_by_stop_idx),
290
- ('order_by_start_idx', self.order_by_start_idx),
167
+ ('fn_expr_idx', self.fn_expr_idx),
168
+ ('order_by_idx', self.order_by_start_idx),
291
169
  ]
292
170
 
293
171
  def __repr__(self) -> str:
@@ -301,16 +179,8 @@ class FunctionCall(Expr):
301
179
  return f'{fn_name}({self._print_args()})'
302
180
 
303
181
  def _print_args(self, start_idx: int = 0, inline: bool = True) -> str:
304
- def print_arg(arg: Any) -> str:
305
- return repr(arg) if isinstance(arg, str) else str(arg)
306
-
307
- arg_strs = [print_arg(arg) if idx is None else str(self.components[idx]) for idx, arg in self.args[start_idx:]]
308
- arg_strs.extend(
309
- [
310
- f'{param_name}={print_arg(arg) if idx is None else str(self.components[idx])}'
311
- for param_name, (idx, arg) in self.kwargs.items()
312
- ]
313
- )
182
+ arg_strs = [str(self.components[idx]) for idx in self.arg_idxs[start_idx:]]
183
+ arg_strs.extend([f'{param_name}={str(self.components[idx])}' for param_name, idx in self.kwarg_idxs.items()])
314
184
  if len(self.order_by) > 0:
315
185
  assert isinstance(self.fn, func.AggregateFunction)
316
186
  if self.fn.requires_order_by:
@@ -367,29 +237,21 @@ class FunctionCall(Expr):
367
237
  return None
368
238
 
369
239
  # try to construct args and kwargs to call self.fn._to_sql()
240
+ args: list[sql.ColumnElement] = []
241
+ for component_idx in self.arg_idxs:
242
+ arg_element = sql_elements.get(self.components[component_idx])
243
+ if arg_element is None:
244
+ return None
245
+ args.append(arg_element)
246
+
370
247
  kwargs: dict[str, sql.ColumnElement] = {}
371
- for param_name, (component_idx, arg) in self.kwargs.items():
372
- param = self.fn.signature.parameters[param_name]
373
- assert param.kind != inspect.Parameter.VAR_POSITIONAL and param.kind != inspect.Parameter.VAR_KEYWORD
374
- if component_idx is None:
375
- kwargs[param_name] = sql.literal(arg)
376
- else:
377
- arg_element = sql_elements.get(self.components[component_idx])
378
- if arg_element is None:
379
- return None
380
- kwargs[param_name] = arg_element
248
+ for param_name, component_idx in self.kwarg_idxs.items():
249
+ arg_element = sql_elements.get(self.components[component_idx])
250
+ if arg_element is None:
251
+ return None
252
+ kwargs[param_name] = arg_element
381
253
 
382
- args: list[sql.ColumnElement] = []
383
- for _, (component_idx, arg) in enumerate(self.args):
384
- if component_idx is None:
385
- args.append(sql.literal(arg))
386
- else:
387
- arg_element = sql_elements.get(self.components[component_idx])
388
- if arg_element is None:
389
- return None
390
- args.append(arg_element)
391
- result = self.fn._to_sql(*args, **kwargs)
392
- return result
254
+ return self.fn._to_sql(*args, **kwargs)
393
255
 
394
256
  def reset_agg(self) -> None:
395
257
  """
@@ -409,35 +271,32 @@ class FunctionCall(Expr):
409
271
 
410
272
  def make_args(self, data_row: DataRow) -> Optional[tuple[list[Any], dict[str, Any]]]:
411
273
  """Return args and kwargs, constructed for data_row; returns None if any non-nullable arg is None."""
274
+ args: list[Any] = []
275
+ parameters_by_pos = self.fn.signature.parameters_by_pos
276
+ for idx in self.arg_idxs:
277
+ val = data_row[self.components[idx].slot_idx]
278
+ if (
279
+ val is None
280
+ and parameters_by_pos[idx].kind
281
+ in (inspect.Parameter.POSITIONAL_ONLY, inspect.Parameter.POSITIONAL_OR_KEYWORD)
282
+ and not parameters_by_pos[idx].col_type.nullable
283
+ ):
284
+ return None
285
+ args.append(val)
286
+
412
287
  kwargs: dict[str, Any] = {}
413
- for param_name, (component_idx, arg) in self.kwargs.items():
414
- val = arg if component_idx is None else data_row[self.components[component_idx].slot_idx]
415
- param = self.fn.signature.parameters[param_name]
416
- if param.kind == inspect.Parameter.VAR_KEYWORD:
417
- # expand **kwargs parameter
418
- kwargs.update(val)
419
- else:
420
- assert param.kind != inspect.Parameter.VAR_POSITIONAL
421
- if not param.col_type.nullable and val is None:
422
- return None
423
- kwargs[param_name] = val
288
+ parameters = self.fn.signature.parameters
289
+ for param_name, idx in self.kwarg_idxs.items():
290
+ val = data_row[self.components[idx].slot_idx]
291
+ if (
292
+ val is None
293
+ and parameters[param_name].kind
294
+ in (inspect.Parameter.KEYWORD_ONLY, inspect.Parameter.POSITIONAL_OR_KEYWORD)
295
+ and not parameters[param_name].col_type.nullable
296
+ ):
297
+ return None
298
+ kwargs[param_name] = val
424
299
 
425
- args: list[Any] = []
426
- for param_idx, (component_idx, arg) in enumerate(self.args):
427
- val = arg if component_idx is None else data_row[self.components[component_idx].slot_idx]
428
- param = self.fn.signature.parameters_by_pos[param_idx]
429
- if param.kind == inspect.Parameter.VAR_POSITIONAL:
430
- # expand *args parameter
431
- assert isinstance(val, list)
432
- args.extend(val)
433
- elif param.kind == inspect.Parameter.VAR_KEYWORD:
434
- # expand **kwargs parameter
435
- assert isinstance(val, dict)
436
- kwargs.update(val)
437
- else:
438
- if not param.col_type.nullable and val is None:
439
- return None
440
- args.append(val)
441
300
  return args, kwargs
442
301
 
443
302
  def get_param_values(self, param_names: Sequence[str], data_rows: list[DataRow]) -> list[dict[str, Any]]:
@@ -445,17 +304,25 @@ class FunctionCall(Expr):
445
304
  Returns a list of dicts mapping each param name to its value when this FunctionCall is evaluated against
446
305
  data_rows
447
306
  """
448
- assert all(name in self._param_values for name in param_names), f'{param_names}, {self._param_values.keys()}'
307
+ assert all(name in self.fn.signature.parameters for name in param_names), f'{param_names}, {self.fn.signature}'
449
308
  result: list[dict[str, Any]] = []
450
309
  for row in data_rows:
451
310
  d: dict[str, Any] = {}
452
311
  for param_name in param_names:
453
- component_idx, default_val = self._param_values[param_name]
454
- if component_idx is None:
455
- d[param_name] = default_val
312
+ val = self.bound_idxs.get(param_name)
313
+ if isinstance(val, int):
314
+ d[param_name] = row[self.components[val].slot_idx]
315
+ elif isinstance(val, list):
316
+ # var_positional
317
+ d[param_name] = [row[self.components[idx].slot_idx] for idx in val]
318
+ elif isinstance(val, dict):
319
+ # var_keyword
320
+ d[param_name] = {k: row[self.components[idx].slot_idx] for k, idx in val.items()}
456
321
  else:
457
- slot_idx = self.components[component_idx].slot_idx
458
- d[param_name] = row[slot_idx]
322
+ assert val is None
323
+ default = self.fn.signature.parameters[param_name].default
324
+ assert default is not None
325
+ d[param_name] = default.val
459
326
  result.append(d)
460
327
  return result
461
328
 
@@ -501,53 +368,59 @@ class FunctionCall(Expr):
501
368
  data_row[self.slot_idx] = self.fn.exec(args, kwargs)
502
369
 
503
370
  def _as_dict(self) -> dict:
504
- result = {
371
+ return {
505
372
  'fn': self.fn.as_dict(),
506
- 'args': self.args,
507
- 'kwargs': self.kwargs,
508
373
  'return_type': self.return_type.as_dict(),
374
+ 'arg_idxs': self.arg_idxs,
375
+ 'kwarg_idxs': self.kwarg_idxs,
509
376
  'group_by_start_idx': self.group_by_start_idx,
510
377
  'group_by_stop_idx': self.group_by_stop_idx,
511
378
  'order_by_start_idx': self.order_by_start_idx,
379
+ 'is_method_call': self.is_method_call,
512
380
  **super()._as_dict(),
513
381
  }
514
- return result
515
382
 
516
383
  @classmethod
517
384
  def _from_dict(cls, d: dict, components: list[Expr]) -> FunctionCall:
518
- assert 'fn' in d
519
- assert 'args' in d
520
- assert 'kwargs' in d
521
-
522
385
  fn = func.Function.from_dict(d['fn'])
523
- assert not fn.is_polymorphic
524
386
  return_type = ts.ColumnType.from_dict(d['return_type']) if 'return_type' in d else None
525
- group_by_exprs = components[d['group_by_start_idx'] : d['group_by_stop_idx']]
526
- order_by_exprs = components[d['order_by_start_idx'] :]
527
-
528
- args = [expr if idx is None else components[idx] for idx, expr in d['args']]
529
- kwargs = {
530
- param_name: (expr if idx is None else components[idx]) for param_name, (idx, expr) in d['kwargs'].items()
531
- }
532
-
533
- # `Function.from_dict()` does signature matching, so it is safe to assume that `args` and `kwargs` are
534
- # consistent with its signature.
535
-
536
- # Reassemble bound_args. Note that args and kwargs represent "already bound arguments": they are not bindable
537
- # in the Python sense, because variable args (such as *args and **kwargs) have already been condensed.
538
- param_names = list(fn.signature.parameters.keys())
539
- bound_args = {param_names[i]: arg for i, arg in enumerate(args)}
540
- bound_args.update(kwargs.items())
541
-
542
- # TODO: In order to properly invoke call_return_type, we need to ensure that any InlineLists or InlineDicts
543
- # in bound_args are unpacked into Python lists/dicts. There is an open task to ensure this is true in general;
544
- # for now, as a hack, we do the unpacking here for the specific case of an InlineList of Literals (the only
545
- # case where this is necessary to support existing conditional_return_type implementations). Once the general
546
- # pattern is implemented, we can remove this hack.
547
- unpacked_bound_args = {param_name: cls.__unpack_bound_arg(arg) for param_name, arg in bound_args.items()}
387
+ arg_idxs: list[int] = d['arg_idxs']
388
+ kwarg_idxs: dict[str, int] = d['kwarg_idxs']
389
+ group_by_start_idx: int = d['group_by_start_idx']
390
+ group_by_stop_idx: int = d['group_by_stop_idx']
391
+ order_by_start_idx: int = d['order_by_start_idx']
392
+ is_method_call: bool = d['is_method_call']
393
+
394
+ args = [components[idx] for idx in arg_idxs]
395
+ kwargs = {name: components[idx] for name, idx in kwarg_idxs.items()}
396
+ group_by_exprs = components[group_by_start_idx:group_by_stop_idx]
397
+ order_by_exprs = components[order_by_start_idx:]
398
+
399
+ # Now re-bind args and kwargs using the version of `fn` that is currently represented in code. This ensures
400
+ # that we get a valid binding even if the signatures of `fn` have changed since the FunctionCall was
401
+ # serialized.
402
+
403
+ resolved_fn: func.Function
404
+ bound_args: dict[str, Expr]
405
+
406
+ try:
407
+ resolved_fn, bound_args = fn._bind_to_matching_signature(args, kwargs)
408
+ except (TypeError, excs.Error):
409
+ # TODO: Handle this more gracefully (instead of failing the DB load, allow the DB load to succeed, but
410
+ # mark any enclosing FunctionCall as unusable). It's the same issue as dealing with a renamed UDF or
411
+ # FunctionCall return type mismatch.
412
+ signature_note_str = 'any of its signatures' if fn.is_polymorphic else 'its signature'
413
+ instance_signature_str = f'{len(fn.signatures)} signatures' if fn.is_polymorphic else str(fn.signature)
414
+ raise excs.Error(
415
+ f'The signature stored in the database for the UDF `{fn.self_path}` no longer matches '
416
+ f'{signature_note_str} as currently defined in the code.\nThis probably means that the code for '
417
+ f'`{fn.self_path}` has changed in a backward-incompatible way.\n'
418
+ f'Signature in database: {fn}\n'
419
+ f'Signature as currently defined in code: {instance_signature_str}'
420
+ )
548
421
 
549
422
  # Evaluate the call_return_type as defined in the current codebase.
550
- call_return_type = fn.call_return_type([], unpacked_bound_args)
423
+ call_return_type = resolved_fn.call_return_type(bound_args)
551
424
 
552
425
  if return_type is None:
553
426
  # Schema versions prior to 25 did not store the return_type in metadata, and there is no obvious way to
@@ -559,55 +432,24 @@ class FunctionCall(Expr):
559
432
  # There is a return_type stored in metadata (schema version >= 25).
560
433
  # Check that the stored return_type of the UDF call matches the column type of the FunctionCall, and
561
434
  # fail-fast if it doesn't (otherwise we risk getting downstream database errors).
562
- # TODO: Handle this more gracefully (instead of failing the DB load, allow the DB load to succeed, but
563
- # mark this FunctionCall as unusable). It's the same issue as dealing with a renamed UDF or Function
564
- # signature mismatch.
435
+ # TODO: Handle this more gracefully (as noted above).
565
436
  if not return_type.is_supertype_of(call_return_type, ignore_nullable=True):
566
437
  raise excs.Error(
567
438
  f'The return type stored in the database for a UDF call to `{fn.self_path}` no longer matches the '
568
439
  f'return type of the UDF as currently defined in the code.\nThis probably means that the code for '
569
440
  f'`{fn.self_path}` has changed in a backward-incompatible way.\n'
570
441
  f'Return type in database: `{return_type}`\n'
571
- f'Return type as currently defined: `{call_return_type}`'
442
+ f'Return type as currently defined in code: `{call_return_type}`'
572
443
  )
573
444
 
574
- fn_call = cls(fn, bound_args, return_type, group_by_clause=group_by_exprs, order_by_clause=order_by_exprs)
575
- return fn_call
576
-
577
- @classmethod
578
- def __find_matching_signature(cls, fn: func.Function, args: list[Any], kwargs: dict[str, Any]) -> Optional[int]:
579
- for idx, sig in enumerate(fn.signatures):
580
- if cls.__signature_matches(sig, args, kwargs):
581
- return idx
582
- return None
583
-
584
- @classmethod
585
- def __signature_matches(cls, sig: func.Signature, args: list[Any], kwargs: dict[str, Any]) -> bool:
586
- unbound_parameters = set(sig.parameters.keys())
587
- for i, arg in enumerate(args):
588
- if i >= len(sig.parameters_by_pos):
589
- return False
590
- param = sig.parameters_by_pos[i]
591
- arg_type = arg.col_type if isinstance(arg, Expr) else ts.ColumnType.infer_literal_type(arg)
592
- if param.col_type is not None and not param.col_type.is_supertype_of(arg_type, ignore_nullable=True):
593
- return False
594
- unbound_parameters.remove(param.name)
595
- for param_name, arg in kwargs.items():
596
- if param_name not in unbound_parameters:
597
- return False
598
- param = sig.parameters[param_name]
599
- arg_type = arg.col_type if isinstance(arg, Expr) else ts.ColumnType.infer_literal_type(arg)
600
- if param.col_type is not None and not param.col_type.is_supertype_of(arg_type, ignore_nullable=True):
601
- return False
602
- unbound_parameters.remove(param_name)
603
- for param_name in unbound_parameters:
604
- param = sig.parameters[param_name]
605
- if not param.has_default:
606
- return False
607
- return True
445
+ fn_call = cls(
446
+ resolved_fn,
447
+ args,
448
+ kwargs,
449
+ return_type,
450
+ group_by_clause=group_by_exprs,
451
+ order_by_clause=order_by_exprs,
452
+ is_method_call=is_method_call,
453
+ )
608
454
 
609
- @classmethod
610
- def __unpack_bound_arg(cls, arg: Any) -> Any:
611
- if isinstance(arg, InlineList) and all(isinstance(el, Literal) for el in arg.components):
612
- return [el.val for el in arg.components]
613
- return arg
455
+ return fn_call