pixeltable 0.3.3__py3-none-any.whl → 0.3.5__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of pixeltable might be problematic. Click here for more details.
- pixeltable/__init__.py +1 -0
- pixeltable/__version__.py +2 -2
- pixeltable/catalog/catalog.py +9 -2
- pixeltable/catalog/column.py +1 -1
- pixeltable/catalog/dir.py +1 -1
- pixeltable/catalog/table.py +1 -1
- pixeltable/catalog/table_version.py +12 -2
- pixeltable/catalog/table_version_path.py +2 -2
- pixeltable/catalog/view.py +64 -20
- pixeltable/dataframe.py +14 -14
- pixeltable/env.py +20 -3
- pixeltable/exec/component_iteration_node.py +1 -2
- pixeltable/exec/expr_eval/evaluators.py +4 -2
- pixeltable/exec/expr_eval/expr_eval_node.py +4 -1
- pixeltable/exprs/comparison.py +8 -4
- pixeltable/exprs/data_row.py +5 -3
- pixeltable/exprs/expr.py +9 -2
- pixeltable/exprs/function_call.py +155 -313
- pixeltable/func/aggregate_function.py +29 -15
- pixeltable/func/callable_function.py +11 -8
- pixeltable/func/expr_template_function.py +3 -9
- pixeltable/func/function.py +148 -74
- pixeltable/func/signature.py +65 -30
- pixeltable/func/udf.py +1 -1
- pixeltable/functions/__init__.py +1 -0
- pixeltable/functions/deepseek.py +121 -0
- pixeltable/functions/image.py +7 -7
- pixeltable/functions/openai.py +49 -10
- pixeltable/functions/video.py +14 -7
- pixeltable/globals.py +14 -3
- pixeltable/index/embedding_index.py +4 -13
- pixeltable/io/globals.py +88 -77
- pixeltable/io/hf_datasets.py +34 -34
- pixeltable/io/pandas.py +75 -87
- pixeltable/io/parquet.py +19 -27
- pixeltable/io/utils.py +115 -0
- pixeltable/iterators/audio.py +2 -1
- pixeltable/iterators/video.py +1 -1
- pixeltable/metadata/__init__.py +2 -1
- pixeltable/metadata/converters/convert_15.py +18 -8
- pixeltable/metadata/converters/convert_27.py +31 -0
- pixeltable/metadata/converters/convert_28.py +15 -0
- pixeltable/metadata/converters/convert_29.py +111 -0
- pixeltable/metadata/converters/util.py +12 -1
- pixeltable/metadata/notes.py +3 -0
- pixeltable/metadata/schema.py +8 -0
- pixeltable/share/__init__.py +1 -0
- pixeltable/share/packager.py +246 -0
- pixeltable/share/publish.py +97 -0
- pixeltable/type_system.py +87 -42
- pixeltable/utils/__init__.py +41 -0
- pixeltable/utils/arrow.py +45 -12
- pixeltable/utils/formatter.py +1 -1
- pixeltable/utils/iceberg.py +14 -0
- pixeltable/utils/media_store.py +1 -1
- {pixeltable-0.3.3.dist-info → pixeltable-0.3.5.dist-info}/METADATA +37 -50
- {pixeltable-0.3.3.dist-info → pixeltable-0.3.5.dist-info}/RECORD +60 -51
- {pixeltable-0.3.3.dist-info → pixeltable-0.3.5.dist-info}/WHEEL +1 -1
- {pixeltable-0.3.3.dist-info → pixeltable-0.3.5.dist-info}/LICENSE +0 -0
- {pixeltable-0.3.3.dist-info → pixeltable-0.3.5.dist-info}/entry_points.txt +0 -0
|
@@ -1,9 +1,8 @@
|
|
|
1
1
|
from __future__ import annotations
|
|
2
2
|
|
|
3
3
|
import inspect
|
|
4
|
-
import json
|
|
5
4
|
import sys
|
|
6
|
-
from typing import Any, Optional, Sequence
|
|
5
|
+
from typing import Any, Optional, Sequence, Union
|
|
7
6
|
|
|
8
7
|
import sqlalchemy as sql
|
|
9
8
|
|
|
@@ -14,7 +13,6 @@ import pixeltable.type_system as ts
|
|
|
14
13
|
|
|
15
14
|
from .data_row import DataRow
|
|
16
15
|
from .expr import Expr
|
|
17
|
-
from .inline_expr import InlineDict, InlineList
|
|
18
16
|
from .literal import Literal
|
|
19
17
|
from .row_builder import RowBuilder
|
|
20
18
|
from .rowid_ref import RowidRef
|
|
@@ -27,135 +25,86 @@ class FunctionCall(Expr):
|
|
|
27
25
|
agg_init_args: dict[str, Any]
|
|
28
26
|
resource_pool: Optional[str]
|
|
29
27
|
|
|
30
|
-
#
|
|
31
|
-
#
|
|
32
|
-
|
|
33
|
-
|
|
34
|
-
kwargs: dict[str, tuple[Optional[int], Optional[Any]]]
|
|
28
|
+
# These collections hold the component indices corresponding to the args and kwargs
|
|
29
|
+
# that were passed to the FunctionCall. They're 1:1 with the original call pattern.
|
|
30
|
+
arg_idxs: list[int]
|
|
31
|
+
kwarg_idxs: dict[str, int]
|
|
35
32
|
|
|
36
|
-
#
|
|
37
|
-
#
|
|
38
|
-
# -
|
|
39
|
-
#
|
|
40
|
-
|
|
33
|
+
# A "bound" version of the FunctionCall arguments, mapping each specified parameter name
|
|
34
|
+
# to one of three types of bindings:
|
|
35
|
+
# - a component index, if the parameter is a non-variadic parameter
|
|
36
|
+
# - a list of component indices, if the parameter is a variadic positional parameter
|
|
37
|
+
# - a dict mapping keyword names to component indices, if the parameter is a variadic keyword parameter
|
|
38
|
+
bound_idxs: dict[str, Union[int, list[int], dict[str, int]]]
|
|
41
39
|
|
|
42
|
-
arg_types: list[ts.ColumnType]
|
|
43
|
-
kwarg_types: dict[str, ts.ColumnType]
|
|
44
40
|
return_type: ts.ColumnType
|
|
45
41
|
group_by_start_idx: int
|
|
46
42
|
group_by_stop_idx: int
|
|
47
43
|
fn_expr_idx: int
|
|
48
44
|
order_by_start_idx: int
|
|
49
|
-
constant_args: set[str]
|
|
50
45
|
aggregator: Optional[Any]
|
|
51
46
|
current_partition_vals: Optional[list[Any]]
|
|
52
47
|
|
|
53
48
|
def __init__(
|
|
54
49
|
self,
|
|
55
50
|
fn: func.Function,
|
|
56
|
-
|
|
51
|
+
args: list[Expr],
|
|
52
|
+
kwargs: dict[str, Expr],
|
|
57
53
|
return_type: ts.ColumnType,
|
|
58
54
|
order_by_clause: Optional[list[Any]] = None,
|
|
59
55
|
group_by_clause: Optional[list[Any]] = None,
|
|
60
56
|
is_method_call: bool = False,
|
|
61
57
|
):
|
|
58
|
+
assert not fn.is_polymorphic
|
|
59
|
+
assert all(isinstance(arg, Expr) for arg in args)
|
|
60
|
+
assert all(isinstance(arg, Expr) for arg in kwargs.values())
|
|
61
|
+
|
|
62
62
|
if order_by_clause is None:
|
|
63
63
|
order_by_clause = []
|
|
64
64
|
if group_by_clause is None:
|
|
65
65
|
group_by_clause = []
|
|
66
66
|
|
|
67
|
-
|
|
67
|
+
super().__init__(return_type)
|
|
68
68
|
|
|
69
69
|
self.fn = fn
|
|
70
|
-
self.is_method_call = is_method_call
|
|
71
|
-
# self.normalize_args(fn.name, signature, bound_args)
|
|
72
|
-
self.resource_pool = fn.call_resource_pool(bound_args)
|
|
73
|
-
signature = fn.signature
|
|
74
|
-
|
|
75
|
-
# If `return_type` is non-nullable, but the function call has a nullable input to any of its non-nullable
|
|
76
|
-
# parameters, then we need to make it nullable. This is because Pixeltable defaults a function output to
|
|
77
|
-
# `None` when any of its non-nullable inputs are `None`.
|
|
78
|
-
for arg_name, arg in bound_args.items():
|
|
79
|
-
param = signature.parameters[arg_name]
|
|
80
|
-
if (
|
|
81
|
-
param.col_type is not None
|
|
82
|
-
and not param.col_type.nullable
|
|
83
|
-
and isinstance(arg, Expr)
|
|
84
|
-
and arg.col_type.nullable
|
|
85
|
-
):
|
|
86
|
-
return_type = return_type.copy(nullable=True)
|
|
87
|
-
break
|
|
88
|
-
|
|
89
70
|
self.return_type = return_type
|
|
71
|
+
self.is_method_call = is_method_call
|
|
90
72
|
|
|
91
|
-
|
|
73
|
+
# Build the components list from the specified args and kwargs, and note the component_idx of each argument.
|
|
74
|
+
self.components.extend(arg.copy() for arg in args)
|
|
75
|
+
self.arg_idxs = list(range(len(self.components)))
|
|
76
|
+
self.components.extend(arg.copy() for arg in kwargs.values())
|
|
77
|
+
self.kwarg_idxs = {name: i + len(args) for i, name in enumerate(kwargs.keys())}
|
|
78
|
+
|
|
79
|
+
# Now generate bound_idxs for the args and kwargs indices.
|
|
80
|
+
# This is guaranteed to work, because at this point the call has already been validated.
|
|
81
|
+
# These will be used later to dereference specific parameter values.
|
|
82
|
+
bindings = fn.signature.py_signature.bind(*self.arg_idxs, **self.kwarg_idxs)
|
|
83
|
+
self.bound_idxs = bindings.arguments
|
|
84
|
+
|
|
85
|
+
# Separately generate bound_args for purposes of determining the resource pool.
|
|
86
|
+
bindings = fn.signature.py_signature.bind(*args, **kwargs)
|
|
87
|
+
bound_args = bindings.arguments
|
|
88
|
+
self.resource_pool = fn.call_resource_pool(bound_args)
|
|
92
89
|
|
|
93
90
|
self.agg_init_args = {}
|
|
94
91
|
if self.is_agg_fn_call:
|
|
95
|
-
#
|
|
92
|
+
# We separate out the init args for the aggregator. Unpack Literals in init args.
|
|
96
93
|
assert isinstance(fn, func.AggregateFunction)
|
|
97
|
-
|
|
98
|
-
|
|
99
|
-
|
|
100
|
-
|
|
101
|
-
arg_name: arg for arg_name, arg in bound_args.items() if arg_name not in fn.init_param_names[0]
|
|
102
|
-
}
|
|
103
|
-
|
|
104
|
-
# construct components, args, kwargs
|
|
105
|
-
self.args = []
|
|
106
|
-
self.kwargs = {}
|
|
107
|
-
self._param_values = {}
|
|
108
|
-
|
|
109
|
-
# we record the types of non-variable parameters for runtime type checks
|
|
110
|
-
self.arg_types = []
|
|
111
|
-
self.kwarg_types = {}
|
|
112
|
-
|
|
113
|
-
# the prefix of parameters that are bound can be passed by position
|
|
114
|
-
processed_args: set[str] = set()
|
|
115
|
-
for py_param in signature.py_signature.parameters.values():
|
|
116
|
-
if py_param.name not in bound_args or py_param.kind == inspect.Parameter.KEYWORD_ONLY:
|
|
117
|
-
break
|
|
118
|
-
arg = bound_args[py_param.name]
|
|
119
|
-
if isinstance(arg, Expr):
|
|
120
|
-
self.args.append((len(self.components), None))
|
|
121
|
-
self._param_values[py_param.name] = (len(self.components), None)
|
|
122
|
-
self.components.append(arg.copy())
|
|
123
|
-
else:
|
|
124
|
-
self.args.append((None, arg))
|
|
125
|
-
self._param_values[py_param.name] = (None, arg)
|
|
126
|
-
if py_param.kind != inspect.Parameter.VAR_POSITIONAL and py_param.kind != inspect.Parameter.VAR_KEYWORD:
|
|
127
|
-
self.arg_types.append(signature.parameters[py_param.name].col_type)
|
|
128
|
-
processed_args.add(py_param.name)
|
|
129
|
-
|
|
130
|
-
# the remaining args are passed as keywords
|
|
131
|
-
for param_name in bound_args.keys():
|
|
132
|
-
if param_name not in processed_args:
|
|
133
|
-
arg = bound_args[param_name]
|
|
134
|
-
if isinstance(arg, Expr):
|
|
135
|
-
self.kwargs[param_name] = (len(self.components), None)
|
|
136
|
-
self._param_values[param_name] = (len(self.components), None)
|
|
137
|
-
self.components.append(arg.copy())
|
|
138
|
-
else:
|
|
139
|
-
self.kwargs[param_name] = (None, arg)
|
|
140
|
-
self._param_values[param_name] = (None, arg)
|
|
141
|
-
if signature.py_signature.parameters[param_name].kind != inspect.Parameter.VAR_KEYWORD:
|
|
142
|
-
self.kwarg_types[param_name] = signature.parameters[param_name].col_type
|
|
143
|
-
|
|
144
|
-
# fill in default values for parameters that don't have explicit arguments
|
|
145
|
-
for param in fn.signature.parameters.values():
|
|
146
|
-
if param.name not in self._param_values:
|
|
147
|
-
self._param_values[param.name] = (
|
|
148
|
-
(None, None) if param.default is inspect.Parameter.empty else (None, param.default)
|
|
149
|
-
)
|
|
94
|
+
for arg_name, arg in bound_args.items():
|
|
95
|
+
if arg_name in fn.init_param_names[0]:
|
|
96
|
+
assert isinstance(arg, Literal) # This was checked during validate_call
|
|
97
|
+
self.agg_init_args[arg_name] = arg.val
|
|
150
98
|
|
|
151
99
|
# window function state:
|
|
152
100
|
# self.components[self.group_by_start_idx:self.group_by_stop_idx] contains group_by exprs
|
|
153
101
|
self.group_by_start_idx, self.group_by_stop_idx = 0, 0
|
|
154
102
|
if len(group_by_clause) > 0:
|
|
155
103
|
if isinstance(group_by_clause[0], catalog.Table):
|
|
104
|
+
assert len(group_by_clause) == 1
|
|
156
105
|
group_by_exprs = self._create_rowid_refs(group_by_clause[0])
|
|
157
106
|
else:
|
|
158
|
-
assert isinstance(
|
|
107
|
+
assert all(isinstance(expr, Expr) for expr in group_by_clause)
|
|
159
108
|
group_by_exprs = group_by_clause
|
|
160
109
|
# record grouping exprs in self.components, we need to evaluate them to get partition vals
|
|
161
110
|
self.group_by_start_idx = len(self.components)
|
|
@@ -164,9 +113,9 @@ class FunctionCall(Expr):
|
|
|
164
113
|
|
|
165
114
|
if isinstance(self.fn, func.ExprTemplateFunction):
|
|
166
115
|
# we instantiate the template to create an Expr that can be evaluated and record that as a component
|
|
167
|
-
fn_expr = self.fn.instantiate(
|
|
116
|
+
fn_expr = self.fn.instantiate(args, kwargs)
|
|
117
|
+
self.fn_expr_idx = len(self.components)
|
|
168
118
|
self.components.append(fn_expr)
|
|
169
|
-
self.fn_expr_idx = len(self.components) - 1
|
|
170
119
|
else:
|
|
171
120
|
self.fn_expr_idx = sys.maxsize
|
|
172
121
|
|
|
@@ -180,7 +129,6 @@ class FunctionCall(Expr):
|
|
|
180
129
|
self.order_by_start_idx = len(self.components)
|
|
181
130
|
self.components.extend(order_by_clause)
|
|
182
131
|
|
|
183
|
-
self.constant_args = {param_name for param_name, arg in bound_args.items() if not isinstance(arg, Expr)}
|
|
184
132
|
# execution state for aggregate functions
|
|
185
133
|
self.aggregator = None
|
|
186
134
|
self.current_partition_vals = None
|
|
@@ -194,84 +142,13 @@ class FunctionCall(Expr):
|
|
|
194
142
|
def default_column_name(self) -> Optional[str]:
|
|
195
143
|
return self.fn.name
|
|
196
144
|
|
|
197
|
-
@classmethod
|
|
198
|
-
def normalize_args(cls, fn_name: str, signature: func.Signature, bound_args: dict[str, Any]) -> None:
|
|
199
|
-
"""Converts args to Exprs where appropriate and checks that they are compatible with signature.
|
|
200
|
-
|
|
201
|
-
Updates bound_args in place, where necessary.
|
|
202
|
-
"""
|
|
203
|
-
for param_name, arg in bound_args.items():
|
|
204
|
-
param = signature.parameters[param_name]
|
|
205
|
-
is_var_param = param.kind in (inspect.Parameter.VAR_POSITIONAL, inspect.Parameter.VAR_KEYWORD)
|
|
206
|
-
|
|
207
|
-
if isinstance(arg, dict):
|
|
208
|
-
try:
|
|
209
|
-
arg = InlineDict(arg)
|
|
210
|
-
bound_args[param_name] = arg
|
|
211
|
-
continue
|
|
212
|
-
except excs.Error:
|
|
213
|
-
# this didn't work, but it might be a literal
|
|
214
|
-
pass
|
|
215
|
-
|
|
216
|
-
if isinstance(arg, list) or isinstance(arg, tuple):
|
|
217
|
-
try:
|
|
218
|
-
arg = InlineList(arg)
|
|
219
|
-
bound_args[param_name] = arg
|
|
220
|
-
continue
|
|
221
|
-
except excs.Error:
|
|
222
|
-
# this didn't work, but it might be a literal
|
|
223
|
-
pass
|
|
224
|
-
|
|
225
|
-
if not isinstance(arg, Expr):
|
|
226
|
-
if arg is not None:
|
|
227
|
-
try:
|
|
228
|
-
param_type = param.col_type
|
|
229
|
-
bound_args[param_name] = param_type.create_literal(arg)
|
|
230
|
-
except TypeError as e:
|
|
231
|
-
msg = str(e)
|
|
232
|
-
raise excs.Error(f'Argument for parameter {param_name!r}: {msg[0].lower() + msg[1:]}')
|
|
233
|
-
continue
|
|
234
|
-
|
|
235
|
-
# these checks break the db migration test, because InlineArray isn't serialized correctly (it looses
|
|
236
|
-
# the type information)
|
|
237
|
-
# if is_var_param:
|
|
238
|
-
# if param.kind == inspect.Parameter.VAR_POSITIONAL:
|
|
239
|
-
# if not isinstance(arg, InlineArray) or not arg.col_type.is_json_type():
|
|
240
|
-
# pass
|
|
241
|
-
# assert isinstance(arg, InlineArray), type(arg)
|
|
242
|
-
# assert arg.col_type.is_json_type()
|
|
243
|
-
# if param.kind == inspect.Parameter.VAR_KEYWORD:
|
|
244
|
-
# if not isinstance(arg, InlineDict):
|
|
245
|
-
# pass
|
|
246
|
-
# assert isinstance(arg, InlineDict), type(arg)
|
|
247
|
-
if is_var_param:
|
|
248
|
-
pass
|
|
249
|
-
else:
|
|
250
|
-
assert param.col_type is not None
|
|
251
|
-
# Check that the argument is consistent with the expected parameter type, with the allowance that
|
|
252
|
-
# non-nullable parameters can still accept nullable arguments (since function calls with Nones
|
|
253
|
-
# assigned to non-nullable parameters will always return None)
|
|
254
|
-
if not (
|
|
255
|
-
param.col_type.is_supertype_of(arg.col_type, ignore_nullable=True)
|
|
256
|
-
# TODO: this is a hack to allow JSON columns to be passed to functions that accept scalar
|
|
257
|
-
# types. It's necessary to avoid littering notebooks with `apply(str)` calls or equivalent.
|
|
258
|
-
# (Previously, this wasn't necessary because `is_supertype_of()` was improperly implemented.)
|
|
259
|
-
# We need to think through the right way to handle this scenario.
|
|
260
|
-
or (arg.col_type.is_json_type() and param.col_type.is_scalar_type())
|
|
261
|
-
):
|
|
262
|
-
raise excs.Error(
|
|
263
|
-
f'Parameter {param_name} (in function {fn_name}): argument type {arg.col_type} does not match parameter type '
|
|
264
|
-
f'{param.col_type}'
|
|
265
|
-
)
|
|
266
|
-
|
|
267
145
|
def _equals(self, other: FunctionCall) -> bool:
|
|
268
146
|
if self.fn != other.fn:
|
|
269
147
|
return False
|
|
270
|
-
if
|
|
148
|
+
if self.arg_idxs != other.arg_idxs:
|
|
149
|
+
return False
|
|
150
|
+
if self.kwarg_idxs != other.kwarg_idxs:
|
|
271
151
|
return False
|
|
272
|
-
for i in range(len(self.args)):
|
|
273
|
-
if self.args[i] != other.args[i]:
|
|
274
|
-
return False
|
|
275
152
|
if self.group_by_start_idx != other.group_by_start_idx:
|
|
276
153
|
return False
|
|
277
154
|
if self.group_by_stop_idx != other.group_by_stop_idx:
|
|
@@ -283,11 +160,12 @@ class FunctionCall(Expr):
|
|
|
283
160
|
def _id_attrs(self) -> list[tuple[str, Any]]:
|
|
284
161
|
return super()._id_attrs() + [
|
|
285
162
|
('fn', id(self.fn)), # use the function pointer, not the fqn, which isn't set for lambdas
|
|
286
|
-
('args', self.
|
|
287
|
-
('kwargs', self.
|
|
163
|
+
('args', self.arg_idxs),
|
|
164
|
+
('kwargs', self.kwarg_idxs),
|
|
288
165
|
('group_by_start_idx', self.group_by_start_idx),
|
|
289
166
|
('group_by_stop_idx', self.group_by_stop_idx),
|
|
290
|
-
('
|
|
167
|
+
('fn_expr_idx', self.fn_expr_idx),
|
|
168
|
+
('order_by_idx', self.order_by_start_idx),
|
|
291
169
|
]
|
|
292
170
|
|
|
293
171
|
def __repr__(self) -> str:
|
|
@@ -301,16 +179,8 @@ class FunctionCall(Expr):
|
|
|
301
179
|
return f'{fn_name}({self._print_args()})'
|
|
302
180
|
|
|
303
181
|
def _print_args(self, start_idx: int = 0, inline: bool = True) -> str:
|
|
304
|
-
|
|
305
|
-
|
|
306
|
-
|
|
307
|
-
arg_strs = [print_arg(arg) if idx is None else str(self.components[idx]) for idx, arg in self.args[start_idx:]]
|
|
308
|
-
arg_strs.extend(
|
|
309
|
-
[
|
|
310
|
-
f'{param_name}={print_arg(arg) if idx is None else str(self.components[idx])}'
|
|
311
|
-
for param_name, (idx, arg) in self.kwargs.items()
|
|
312
|
-
]
|
|
313
|
-
)
|
|
182
|
+
arg_strs = [str(self.components[idx]) for idx in self.arg_idxs[start_idx:]]
|
|
183
|
+
arg_strs.extend([f'{param_name}={str(self.components[idx])}' for param_name, idx in self.kwarg_idxs.items()])
|
|
314
184
|
if len(self.order_by) > 0:
|
|
315
185
|
assert isinstance(self.fn, func.AggregateFunction)
|
|
316
186
|
if self.fn.requires_order_by:
|
|
@@ -367,29 +237,21 @@ class FunctionCall(Expr):
|
|
|
367
237
|
return None
|
|
368
238
|
|
|
369
239
|
# try to construct args and kwargs to call self.fn._to_sql()
|
|
240
|
+
args: list[sql.ColumnElement] = []
|
|
241
|
+
for component_idx in self.arg_idxs:
|
|
242
|
+
arg_element = sql_elements.get(self.components[component_idx])
|
|
243
|
+
if arg_element is None:
|
|
244
|
+
return None
|
|
245
|
+
args.append(arg_element)
|
|
246
|
+
|
|
370
247
|
kwargs: dict[str, sql.ColumnElement] = {}
|
|
371
|
-
for param_name,
|
|
372
|
-
|
|
373
|
-
|
|
374
|
-
|
|
375
|
-
|
|
376
|
-
else:
|
|
377
|
-
arg_element = sql_elements.get(self.components[component_idx])
|
|
378
|
-
if arg_element is None:
|
|
379
|
-
return None
|
|
380
|
-
kwargs[param_name] = arg_element
|
|
248
|
+
for param_name, component_idx in self.kwarg_idxs.items():
|
|
249
|
+
arg_element = sql_elements.get(self.components[component_idx])
|
|
250
|
+
if arg_element is None:
|
|
251
|
+
return None
|
|
252
|
+
kwargs[param_name] = arg_element
|
|
381
253
|
|
|
382
|
-
|
|
383
|
-
for _, (component_idx, arg) in enumerate(self.args):
|
|
384
|
-
if component_idx is None:
|
|
385
|
-
args.append(sql.literal(arg))
|
|
386
|
-
else:
|
|
387
|
-
arg_element = sql_elements.get(self.components[component_idx])
|
|
388
|
-
if arg_element is None:
|
|
389
|
-
return None
|
|
390
|
-
args.append(arg_element)
|
|
391
|
-
result = self.fn._to_sql(*args, **kwargs)
|
|
392
|
-
return result
|
|
254
|
+
return self.fn._to_sql(*args, **kwargs)
|
|
393
255
|
|
|
394
256
|
def reset_agg(self) -> None:
|
|
395
257
|
"""
|
|
@@ -409,35 +271,32 @@ class FunctionCall(Expr):
|
|
|
409
271
|
|
|
410
272
|
def make_args(self, data_row: DataRow) -> Optional[tuple[list[Any], dict[str, Any]]]:
|
|
411
273
|
"""Return args and kwargs, constructed for data_row; returns None if any non-nullable arg is None."""
|
|
274
|
+
args: list[Any] = []
|
|
275
|
+
parameters_by_pos = self.fn.signature.parameters_by_pos
|
|
276
|
+
for idx in self.arg_idxs:
|
|
277
|
+
val = data_row[self.components[idx].slot_idx]
|
|
278
|
+
if (
|
|
279
|
+
val is None
|
|
280
|
+
and parameters_by_pos[idx].kind
|
|
281
|
+
in (inspect.Parameter.POSITIONAL_ONLY, inspect.Parameter.POSITIONAL_OR_KEYWORD)
|
|
282
|
+
and not parameters_by_pos[idx].col_type.nullable
|
|
283
|
+
):
|
|
284
|
+
return None
|
|
285
|
+
args.append(val)
|
|
286
|
+
|
|
412
287
|
kwargs: dict[str, Any] = {}
|
|
413
|
-
|
|
414
|
-
|
|
415
|
-
|
|
416
|
-
if
|
|
417
|
-
|
|
418
|
-
|
|
419
|
-
|
|
420
|
-
|
|
421
|
-
|
|
422
|
-
|
|
423
|
-
|
|
288
|
+
parameters = self.fn.signature.parameters
|
|
289
|
+
for param_name, idx in self.kwarg_idxs.items():
|
|
290
|
+
val = data_row[self.components[idx].slot_idx]
|
|
291
|
+
if (
|
|
292
|
+
val is None
|
|
293
|
+
and parameters[param_name].kind
|
|
294
|
+
in (inspect.Parameter.KEYWORD_ONLY, inspect.Parameter.POSITIONAL_OR_KEYWORD)
|
|
295
|
+
and not parameters[param_name].col_type.nullable
|
|
296
|
+
):
|
|
297
|
+
return None
|
|
298
|
+
kwargs[param_name] = val
|
|
424
299
|
|
|
425
|
-
args: list[Any] = []
|
|
426
|
-
for param_idx, (component_idx, arg) in enumerate(self.args):
|
|
427
|
-
val = arg if component_idx is None else data_row[self.components[component_idx].slot_idx]
|
|
428
|
-
param = self.fn.signature.parameters_by_pos[param_idx]
|
|
429
|
-
if param.kind == inspect.Parameter.VAR_POSITIONAL:
|
|
430
|
-
# expand *args parameter
|
|
431
|
-
assert isinstance(val, list)
|
|
432
|
-
args.extend(val)
|
|
433
|
-
elif param.kind == inspect.Parameter.VAR_KEYWORD:
|
|
434
|
-
# expand **kwargs parameter
|
|
435
|
-
assert isinstance(val, dict)
|
|
436
|
-
kwargs.update(val)
|
|
437
|
-
else:
|
|
438
|
-
if not param.col_type.nullable and val is None:
|
|
439
|
-
return None
|
|
440
|
-
args.append(val)
|
|
441
300
|
return args, kwargs
|
|
442
301
|
|
|
443
302
|
def get_param_values(self, param_names: Sequence[str], data_rows: list[DataRow]) -> list[dict[str, Any]]:
|
|
@@ -445,17 +304,25 @@ class FunctionCall(Expr):
|
|
|
445
304
|
Returns a list of dicts mapping each param name to its value when this FunctionCall is evaluated against
|
|
446
305
|
data_rows
|
|
447
306
|
"""
|
|
448
|
-
assert all(name in self.
|
|
307
|
+
assert all(name in self.fn.signature.parameters for name in param_names), f'{param_names}, {self.fn.signature}'
|
|
449
308
|
result: list[dict[str, Any]] = []
|
|
450
309
|
for row in data_rows:
|
|
451
310
|
d: dict[str, Any] = {}
|
|
452
311
|
for param_name in param_names:
|
|
453
|
-
|
|
454
|
-
if
|
|
455
|
-
d[param_name] =
|
|
312
|
+
val = self.bound_idxs.get(param_name)
|
|
313
|
+
if isinstance(val, int):
|
|
314
|
+
d[param_name] = row[self.components[val].slot_idx]
|
|
315
|
+
elif isinstance(val, list):
|
|
316
|
+
# var_positional
|
|
317
|
+
d[param_name] = [row[self.components[idx].slot_idx] for idx in val]
|
|
318
|
+
elif isinstance(val, dict):
|
|
319
|
+
# var_keyword
|
|
320
|
+
d[param_name] = {k: row[self.components[idx].slot_idx] for k, idx in val.items()}
|
|
456
321
|
else:
|
|
457
|
-
|
|
458
|
-
|
|
322
|
+
assert val is None
|
|
323
|
+
default = self.fn.signature.parameters[param_name].default
|
|
324
|
+
assert default is not None
|
|
325
|
+
d[param_name] = default.val
|
|
459
326
|
result.append(d)
|
|
460
327
|
return result
|
|
461
328
|
|
|
@@ -501,53 +368,59 @@ class FunctionCall(Expr):
|
|
|
501
368
|
data_row[self.slot_idx] = self.fn.exec(args, kwargs)
|
|
502
369
|
|
|
503
370
|
def _as_dict(self) -> dict:
|
|
504
|
-
|
|
371
|
+
return {
|
|
505
372
|
'fn': self.fn.as_dict(),
|
|
506
|
-
'args': self.args,
|
|
507
|
-
'kwargs': self.kwargs,
|
|
508
373
|
'return_type': self.return_type.as_dict(),
|
|
374
|
+
'arg_idxs': self.arg_idxs,
|
|
375
|
+
'kwarg_idxs': self.kwarg_idxs,
|
|
509
376
|
'group_by_start_idx': self.group_by_start_idx,
|
|
510
377
|
'group_by_stop_idx': self.group_by_stop_idx,
|
|
511
378
|
'order_by_start_idx': self.order_by_start_idx,
|
|
379
|
+
'is_method_call': self.is_method_call,
|
|
512
380
|
**super()._as_dict(),
|
|
513
381
|
}
|
|
514
|
-
return result
|
|
515
382
|
|
|
516
383
|
@classmethod
|
|
517
384
|
def _from_dict(cls, d: dict, components: list[Expr]) -> FunctionCall:
|
|
518
|
-
assert 'fn' in d
|
|
519
|
-
assert 'args' in d
|
|
520
|
-
assert 'kwargs' in d
|
|
521
|
-
|
|
522
385
|
fn = func.Function.from_dict(d['fn'])
|
|
523
|
-
assert not fn.is_polymorphic
|
|
524
386
|
return_type = ts.ColumnType.from_dict(d['return_type']) if 'return_type' in d else None
|
|
525
|
-
|
|
526
|
-
|
|
527
|
-
|
|
528
|
-
|
|
529
|
-
|
|
530
|
-
|
|
531
|
-
|
|
532
|
-
|
|
533
|
-
|
|
534
|
-
|
|
535
|
-
|
|
536
|
-
|
|
537
|
-
#
|
|
538
|
-
|
|
539
|
-
|
|
540
|
-
|
|
541
|
-
|
|
542
|
-
|
|
543
|
-
|
|
544
|
-
|
|
545
|
-
|
|
546
|
-
|
|
547
|
-
|
|
387
|
+
arg_idxs: list[int] = d['arg_idxs']
|
|
388
|
+
kwarg_idxs: dict[str, int] = d['kwarg_idxs']
|
|
389
|
+
group_by_start_idx: int = d['group_by_start_idx']
|
|
390
|
+
group_by_stop_idx: int = d['group_by_stop_idx']
|
|
391
|
+
order_by_start_idx: int = d['order_by_start_idx']
|
|
392
|
+
is_method_call: bool = d['is_method_call']
|
|
393
|
+
|
|
394
|
+
args = [components[idx] for idx in arg_idxs]
|
|
395
|
+
kwargs = {name: components[idx] for name, idx in kwarg_idxs.items()}
|
|
396
|
+
group_by_exprs = components[group_by_start_idx:group_by_stop_idx]
|
|
397
|
+
order_by_exprs = components[order_by_start_idx:]
|
|
398
|
+
|
|
399
|
+
# Now re-bind args and kwargs using the version of `fn` that is currently represented in code. This ensures
|
|
400
|
+
# that we get a valid binding even if the signatures of `fn` have changed since the FunctionCall was
|
|
401
|
+
# serialized.
|
|
402
|
+
|
|
403
|
+
resolved_fn: func.Function
|
|
404
|
+
bound_args: dict[str, Expr]
|
|
405
|
+
|
|
406
|
+
try:
|
|
407
|
+
resolved_fn, bound_args = fn._bind_to_matching_signature(args, kwargs)
|
|
408
|
+
except (TypeError, excs.Error):
|
|
409
|
+
# TODO: Handle this more gracefully (instead of failing the DB load, allow the DB load to succeed, but
|
|
410
|
+
# mark any enclosing FunctionCall as unusable). It's the same issue as dealing with a renamed UDF or
|
|
411
|
+
# FunctionCall return type mismatch.
|
|
412
|
+
signature_note_str = 'any of its signatures' if fn.is_polymorphic else 'its signature'
|
|
413
|
+
instance_signature_str = f'{len(fn.signatures)} signatures' if fn.is_polymorphic else str(fn.signature)
|
|
414
|
+
raise excs.Error(
|
|
415
|
+
f'The signature stored in the database for the UDF `{fn.self_path}` no longer matches '
|
|
416
|
+
f'{signature_note_str} as currently defined in the code.\nThis probably means that the code for '
|
|
417
|
+
f'`{fn.self_path}` has changed in a backward-incompatible way.\n'
|
|
418
|
+
f'Signature in database: {fn}\n'
|
|
419
|
+
f'Signature as currently defined in code: {instance_signature_str}'
|
|
420
|
+
)
|
|
548
421
|
|
|
549
422
|
# Evaluate the call_return_type as defined in the current codebase.
|
|
550
|
-
call_return_type =
|
|
423
|
+
call_return_type = resolved_fn.call_return_type(bound_args)
|
|
551
424
|
|
|
552
425
|
if return_type is None:
|
|
553
426
|
# Schema versions prior to 25 did not store the return_type in metadata, and there is no obvious way to
|
|
@@ -559,55 +432,24 @@ class FunctionCall(Expr):
|
|
|
559
432
|
# There is a return_type stored in metadata (schema version >= 25).
|
|
560
433
|
# Check that the stored return_type of the UDF call matches the column type of the FunctionCall, and
|
|
561
434
|
# fail-fast if it doesn't (otherwise we risk getting downstream database errors).
|
|
562
|
-
# TODO: Handle this more gracefully (
|
|
563
|
-
# mark this FunctionCall as unusable). It's the same issue as dealing with a renamed UDF or Function
|
|
564
|
-
# signature mismatch.
|
|
435
|
+
# TODO: Handle this more gracefully (as noted above).
|
|
565
436
|
if not return_type.is_supertype_of(call_return_type, ignore_nullable=True):
|
|
566
437
|
raise excs.Error(
|
|
567
438
|
f'The return type stored in the database for a UDF call to `{fn.self_path}` no longer matches the '
|
|
568
439
|
f'return type of the UDF as currently defined in the code.\nThis probably means that the code for '
|
|
569
440
|
f'`{fn.self_path}` has changed in a backward-incompatible way.\n'
|
|
570
441
|
f'Return type in database: `{return_type}`\n'
|
|
571
|
-
f'Return type as currently defined: `{call_return_type}`'
|
|
442
|
+
f'Return type as currently defined in code: `{call_return_type}`'
|
|
572
443
|
)
|
|
573
444
|
|
|
574
|
-
fn_call = cls(
|
|
575
|
-
|
|
576
|
-
|
|
577
|
-
|
|
578
|
-
|
|
579
|
-
|
|
580
|
-
|
|
581
|
-
|
|
582
|
-
|
|
583
|
-
|
|
584
|
-
@classmethod
|
|
585
|
-
def __signature_matches(cls, sig: func.Signature, args: list[Any], kwargs: dict[str, Any]) -> bool:
|
|
586
|
-
unbound_parameters = set(sig.parameters.keys())
|
|
587
|
-
for i, arg in enumerate(args):
|
|
588
|
-
if i >= len(sig.parameters_by_pos):
|
|
589
|
-
return False
|
|
590
|
-
param = sig.parameters_by_pos[i]
|
|
591
|
-
arg_type = arg.col_type if isinstance(arg, Expr) else ts.ColumnType.infer_literal_type(arg)
|
|
592
|
-
if param.col_type is not None and not param.col_type.is_supertype_of(arg_type, ignore_nullable=True):
|
|
593
|
-
return False
|
|
594
|
-
unbound_parameters.remove(param.name)
|
|
595
|
-
for param_name, arg in kwargs.items():
|
|
596
|
-
if param_name not in unbound_parameters:
|
|
597
|
-
return False
|
|
598
|
-
param = sig.parameters[param_name]
|
|
599
|
-
arg_type = arg.col_type if isinstance(arg, Expr) else ts.ColumnType.infer_literal_type(arg)
|
|
600
|
-
if param.col_type is not None and not param.col_type.is_supertype_of(arg_type, ignore_nullable=True):
|
|
601
|
-
return False
|
|
602
|
-
unbound_parameters.remove(param_name)
|
|
603
|
-
for param_name in unbound_parameters:
|
|
604
|
-
param = sig.parameters[param_name]
|
|
605
|
-
if not param.has_default:
|
|
606
|
-
return False
|
|
607
|
-
return True
|
|
445
|
+
fn_call = cls(
|
|
446
|
+
resolved_fn,
|
|
447
|
+
args,
|
|
448
|
+
kwargs,
|
|
449
|
+
return_type,
|
|
450
|
+
group_by_clause=group_by_exprs,
|
|
451
|
+
order_by_clause=order_by_exprs,
|
|
452
|
+
is_method_call=is_method_call,
|
|
453
|
+
)
|
|
608
454
|
|
|
609
|
-
|
|
610
|
-
def __unpack_bound_arg(cls, arg: Any) -> Any:
|
|
611
|
-
if isinstance(arg, InlineList) and all(isinstance(el, Literal) for el in arg.components):
|
|
612
|
-
return [el.val for el in arg.components]
|
|
613
|
-
return arg
|
|
455
|
+
return fn_call
|