pixeltable 0.2.26__py3-none-any.whl → 0.5.7__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- pixeltable/__init__.py +83 -19
- pixeltable/_query.py +1444 -0
- pixeltable/_version.py +1 -0
- pixeltable/catalog/__init__.py +7 -4
- pixeltable/catalog/catalog.py +2394 -119
- pixeltable/catalog/column.py +225 -104
- pixeltable/catalog/dir.py +38 -9
- pixeltable/catalog/globals.py +53 -34
- pixeltable/catalog/insertable_table.py +265 -115
- pixeltable/catalog/path.py +80 -17
- pixeltable/catalog/schema_object.py +28 -43
- pixeltable/catalog/table.py +1270 -677
- pixeltable/catalog/table_metadata.py +103 -0
- pixeltable/catalog/table_version.py +1270 -751
- pixeltable/catalog/table_version_handle.py +109 -0
- pixeltable/catalog/table_version_path.py +137 -42
- pixeltable/catalog/tbl_ops.py +53 -0
- pixeltable/catalog/update_status.py +191 -0
- pixeltable/catalog/view.py +251 -134
- pixeltable/config.py +215 -0
- pixeltable/env.py +736 -285
- pixeltable/exceptions.py +26 -2
- pixeltable/exec/__init__.py +7 -2
- pixeltable/exec/aggregation_node.py +39 -21
- pixeltable/exec/cache_prefetch_node.py +87 -109
- pixeltable/exec/cell_materialization_node.py +268 -0
- pixeltable/exec/cell_reconstruction_node.py +168 -0
- pixeltable/exec/component_iteration_node.py +25 -28
- pixeltable/exec/data_row_batch.py +11 -46
- pixeltable/exec/exec_context.py +26 -11
- pixeltable/exec/exec_node.py +35 -27
- pixeltable/exec/expr_eval/__init__.py +3 -0
- pixeltable/exec/expr_eval/evaluators.py +365 -0
- pixeltable/exec/expr_eval/expr_eval_node.py +413 -0
- pixeltable/exec/expr_eval/globals.py +200 -0
- pixeltable/exec/expr_eval/row_buffer.py +74 -0
- pixeltable/exec/expr_eval/schedulers.py +413 -0
- pixeltable/exec/globals.py +35 -0
- pixeltable/exec/in_memory_data_node.py +35 -27
- pixeltable/exec/object_store_save_node.py +293 -0
- pixeltable/exec/row_update_node.py +44 -29
- pixeltable/exec/sql_node.py +414 -115
- pixeltable/exprs/__init__.py +8 -5
- pixeltable/exprs/arithmetic_expr.py +79 -45
- pixeltable/exprs/array_slice.py +5 -5
- pixeltable/exprs/column_property_ref.py +40 -26
- pixeltable/exprs/column_ref.py +254 -61
- pixeltable/exprs/comparison.py +14 -9
- pixeltable/exprs/compound_predicate.py +9 -10
- pixeltable/exprs/data_row.py +213 -72
- pixeltable/exprs/expr.py +270 -104
- pixeltable/exprs/expr_dict.py +6 -5
- pixeltable/exprs/expr_set.py +20 -11
- pixeltable/exprs/function_call.py +383 -284
- pixeltable/exprs/globals.py +18 -5
- pixeltable/exprs/in_predicate.py +7 -7
- pixeltable/exprs/inline_expr.py +37 -37
- pixeltable/exprs/is_null.py +8 -4
- pixeltable/exprs/json_mapper.py +120 -54
- pixeltable/exprs/json_path.py +90 -60
- pixeltable/exprs/literal.py +61 -16
- pixeltable/exprs/method_ref.py +7 -6
- pixeltable/exprs/object_ref.py +19 -8
- pixeltable/exprs/row_builder.py +238 -75
- pixeltable/exprs/rowid_ref.py +53 -15
- pixeltable/exprs/similarity_expr.py +65 -50
- pixeltable/exprs/sql_element_cache.py +5 -5
- pixeltable/exprs/string_op.py +107 -0
- pixeltable/exprs/type_cast.py +25 -13
- pixeltable/exprs/variable.py +2 -2
- pixeltable/func/__init__.py +9 -5
- pixeltable/func/aggregate_function.py +197 -92
- pixeltable/func/callable_function.py +119 -35
- pixeltable/func/expr_template_function.py +101 -48
- pixeltable/func/function.py +375 -62
- pixeltable/func/function_registry.py +20 -19
- pixeltable/func/globals.py +6 -5
- pixeltable/func/mcp.py +74 -0
- pixeltable/func/query_template_function.py +151 -35
- pixeltable/func/signature.py +178 -49
- pixeltable/func/tools.py +164 -0
- pixeltable/func/udf.py +176 -53
- pixeltable/functions/__init__.py +44 -4
- pixeltable/functions/anthropic.py +226 -47
- pixeltable/functions/audio.py +148 -11
- pixeltable/functions/bedrock.py +137 -0
- pixeltable/functions/date.py +188 -0
- pixeltable/functions/deepseek.py +113 -0
- pixeltable/functions/document.py +81 -0
- pixeltable/functions/fal.py +76 -0
- pixeltable/functions/fireworks.py +72 -20
- pixeltable/functions/gemini.py +249 -0
- pixeltable/functions/globals.py +208 -53
- pixeltable/functions/groq.py +108 -0
- pixeltable/functions/huggingface.py +1088 -95
- pixeltable/functions/image.py +155 -84
- pixeltable/functions/json.py +8 -11
- pixeltable/functions/llama_cpp.py +31 -19
- pixeltable/functions/math.py +169 -0
- pixeltable/functions/mistralai.py +50 -75
- pixeltable/functions/net.py +70 -0
- pixeltable/functions/ollama.py +29 -36
- pixeltable/functions/openai.py +548 -160
- pixeltable/functions/openrouter.py +143 -0
- pixeltable/functions/replicate.py +15 -14
- pixeltable/functions/reve.py +250 -0
- pixeltable/functions/string.py +310 -85
- pixeltable/functions/timestamp.py +37 -19
- pixeltable/functions/together.py +77 -120
- pixeltable/functions/twelvelabs.py +188 -0
- pixeltable/functions/util.py +7 -2
- pixeltable/functions/uuid.py +30 -0
- pixeltable/functions/video.py +1528 -117
- pixeltable/functions/vision.py +26 -26
- pixeltable/functions/voyageai.py +289 -0
- pixeltable/functions/whisper.py +19 -10
- pixeltable/functions/whisperx.py +179 -0
- pixeltable/functions/yolox.py +112 -0
- pixeltable/globals.py +716 -236
- pixeltable/index/__init__.py +3 -1
- pixeltable/index/base.py +17 -21
- pixeltable/index/btree.py +32 -22
- pixeltable/index/embedding_index.py +155 -92
- pixeltable/io/__init__.py +12 -7
- pixeltable/io/datarows.py +140 -0
- pixeltable/io/external_store.py +83 -125
- pixeltable/io/fiftyone.py +24 -33
- pixeltable/io/globals.py +47 -182
- pixeltable/io/hf_datasets.py +96 -127
- pixeltable/io/label_studio.py +171 -156
- pixeltable/io/lancedb.py +3 -0
- pixeltable/io/pandas.py +136 -115
- pixeltable/io/parquet.py +40 -153
- pixeltable/io/table_data_conduit.py +702 -0
- pixeltable/io/utils.py +100 -0
- pixeltable/iterators/__init__.py +8 -4
- pixeltable/iterators/audio.py +207 -0
- pixeltable/iterators/base.py +9 -3
- pixeltable/iterators/document.py +144 -87
- pixeltable/iterators/image.py +17 -38
- pixeltable/iterators/string.py +15 -12
- pixeltable/iterators/video.py +523 -127
- pixeltable/metadata/__init__.py +33 -8
- pixeltable/metadata/converters/convert_10.py +2 -3
- pixeltable/metadata/converters/convert_13.py +2 -2
- pixeltable/metadata/converters/convert_15.py +15 -11
- pixeltable/metadata/converters/convert_16.py +4 -5
- pixeltable/metadata/converters/convert_17.py +4 -5
- pixeltable/metadata/converters/convert_18.py +4 -6
- pixeltable/metadata/converters/convert_19.py +6 -9
- pixeltable/metadata/converters/convert_20.py +3 -6
- pixeltable/metadata/converters/convert_21.py +6 -8
- pixeltable/metadata/converters/convert_22.py +3 -2
- pixeltable/metadata/converters/convert_23.py +33 -0
- pixeltable/metadata/converters/convert_24.py +55 -0
- pixeltable/metadata/converters/convert_25.py +19 -0
- pixeltable/metadata/converters/convert_26.py +23 -0
- pixeltable/metadata/converters/convert_27.py +29 -0
- pixeltable/metadata/converters/convert_28.py +13 -0
- pixeltable/metadata/converters/convert_29.py +110 -0
- pixeltable/metadata/converters/convert_30.py +63 -0
- pixeltable/metadata/converters/convert_31.py +11 -0
- pixeltable/metadata/converters/convert_32.py +15 -0
- pixeltable/metadata/converters/convert_33.py +17 -0
- pixeltable/metadata/converters/convert_34.py +21 -0
- pixeltable/metadata/converters/convert_35.py +9 -0
- pixeltable/metadata/converters/convert_36.py +38 -0
- pixeltable/metadata/converters/convert_37.py +15 -0
- pixeltable/metadata/converters/convert_38.py +39 -0
- pixeltable/metadata/converters/convert_39.py +124 -0
- pixeltable/metadata/converters/convert_40.py +73 -0
- pixeltable/metadata/converters/convert_41.py +12 -0
- pixeltable/metadata/converters/convert_42.py +9 -0
- pixeltable/metadata/converters/convert_43.py +44 -0
- pixeltable/metadata/converters/util.py +44 -18
- pixeltable/metadata/notes.py +21 -0
- pixeltable/metadata/schema.py +185 -42
- pixeltable/metadata/utils.py +74 -0
- pixeltable/mypy/__init__.py +3 -0
- pixeltable/mypy/mypy_plugin.py +123 -0
- pixeltable/plan.py +616 -225
- pixeltable/share/__init__.py +3 -0
- pixeltable/share/packager.py +797 -0
- pixeltable/share/protocol/__init__.py +33 -0
- pixeltable/share/protocol/common.py +165 -0
- pixeltable/share/protocol/operation_types.py +33 -0
- pixeltable/share/protocol/replica.py +119 -0
- pixeltable/share/publish.py +349 -0
- pixeltable/store.py +398 -232
- pixeltable/type_system.py +730 -267
- pixeltable/utils/__init__.py +40 -0
- pixeltable/utils/arrow.py +201 -29
- pixeltable/utils/av.py +298 -0
- pixeltable/utils/azure_store.py +346 -0
- pixeltable/utils/coco.py +26 -27
- pixeltable/utils/code.py +4 -4
- pixeltable/utils/console_output.py +46 -0
- pixeltable/utils/coroutine.py +24 -0
- pixeltable/utils/dbms.py +92 -0
- pixeltable/utils/description_helper.py +11 -12
- pixeltable/utils/documents.py +60 -61
- pixeltable/utils/exception_handler.py +36 -0
- pixeltable/utils/filecache.py +38 -22
- pixeltable/utils/formatter.py +88 -51
- pixeltable/utils/gcs_store.py +295 -0
- pixeltable/utils/http.py +133 -0
- pixeltable/utils/http_server.py +14 -13
- pixeltable/utils/iceberg.py +13 -0
- pixeltable/utils/image.py +17 -0
- pixeltable/utils/lancedb.py +90 -0
- pixeltable/utils/local_store.py +322 -0
- pixeltable/utils/misc.py +5 -0
- pixeltable/utils/object_stores.py +573 -0
- pixeltable/utils/pydantic.py +60 -0
- pixeltable/utils/pytorch.py +20 -20
- pixeltable/utils/s3_store.py +527 -0
- pixeltable/utils/sql.py +32 -5
- pixeltable/utils/system.py +30 -0
- pixeltable/utils/transactional_directory.py +4 -3
- pixeltable-0.5.7.dist-info/METADATA +579 -0
- pixeltable-0.5.7.dist-info/RECORD +227 -0
- {pixeltable-0.2.26.dist-info → pixeltable-0.5.7.dist-info}/WHEEL +1 -1
- pixeltable-0.5.7.dist-info/entry_points.txt +2 -0
- pixeltable/__version__.py +0 -3
- pixeltable/catalog/named_function.py +0 -36
- pixeltable/catalog/path_dict.py +0 -141
- pixeltable/dataframe.py +0 -894
- pixeltable/exec/expr_eval_node.py +0 -232
- pixeltable/ext/__init__.py +0 -14
- pixeltable/ext/functions/__init__.py +0 -8
- pixeltable/ext/functions/whisperx.py +0 -77
- pixeltable/ext/functions/yolox.py +0 -157
- pixeltable/tool/create_test_db_dump.py +0 -311
- pixeltable/tool/create_test_video.py +0 -81
- pixeltable/tool/doc_plugins/griffe.py +0 -50
- pixeltable/tool/doc_plugins/mkdocstrings.py +0 -6
- pixeltable/tool/doc_plugins/templates/material/udf.html.jinja +0 -135
- pixeltable/tool/embed_udf.py +0 -9
- pixeltable/tool/mypy_plugin.py +0 -55
- pixeltable/utils/media_store.py +0 -76
- pixeltable/utils/s3.py +0 -16
- pixeltable-0.2.26.dist-info/METADATA +0 -400
- pixeltable-0.2.26.dist-info/RECORD +0 -156
- pixeltable-0.2.26.dist-info/entry_points.txt +0 -3
- {pixeltable-0.2.26.dist-info → pixeltable-0.5.7.dist-info/licenses}/LICENSE +0 -0
|
@@ -1,126 +1,94 @@
|
|
|
1
1
|
from __future__ import annotations
|
|
2
2
|
|
|
3
3
|
import inspect
|
|
4
|
-
import
|
|
4
|
+
import logging
|
|
5
5
|
import sys
|
|
6
|
-
from
|
|
6
|
+
from textwrap import dedent
|
|
7
|
+
from typing import Any, Sequence
|
|
7
8
|
|
|
8
9
|
import sqlalchemy as sql
|
|
9
10
|
|
|
10
|
-
import
|
|
11
|
-
import pixeltable.exceptions as excs
|
|
12
|
-
import pixeltable.func as func
|
|
13
|
-
import pixeltable.type_system as ts
|
|
11
|
+
from pixeltable import catalog, exceptions as excs, func, type_system as ts
|
|
14
12
|
|
|
15
13
|
from .data_row import DataRow
|
|
16
14
|
from .expr import Expr
|
|
17
|
-
from .
|
|
15
|
+
from .literal import Literal
|
|
18
16
|
from .row_builder import RowBuilder
|
|
19
17
|
from .rowid_ref import RowidRef
|
|
20
18
|
from .sql_element_cache import SqlElementCache
|
|
21
19
|
|
|
20
|
+
_logger = logging.getLogger('pixeltable')
|
|
22
21
|
|
|
23
|
-
class FunctionCall(Expr):
|
|
24
22
|
|
|
23
|
+
class FunctionCall(Expr):
|
|
25
24
|
fn: func.Function
|
|
26
25
|
is_method_call: bool
|
|
27
26
|
agg_init_args: dict[str, Any]
|
|
27
|
+
resource_pool: str | None
|
|
28
|
+
|
|
29
|
+
# These collections hold the component indices corresponding to the args and kwargs
|
|
30
|
+
# that were passed to the FunctionCall. They're 1:1 with the original call pattern.
|
|
31
|
+
arg_idxs: list[int]
|
|
32
|
+
kwarg_idxs: dict[str, int]
|
|
28
33
|
|
|
29
|
-
#
|
|
30
|
-
#
|
|
31
|
-
# -
|
|
32
|
-
|
|
33
|
-
|
|
34
|
+
# A "bound" version of the FunctionCall arguments, mapping each specified parameter name
|
|
35
|
+
# to one of three types of bindings:
|
|
36
|
+
# - a component index, if the parameter is a non-variadic parameter
|
|
37
|
+
# - a list of component indices, if the parameter is a variadic positional parameter
|
|
38
|
+
# - a dict mapping keyword names to component indices, if the parameter is a variadic keyword parameter
|
|
39
|
+
bound_idxs: dict[str, int | list[int] | dict[str, int]]
|
|
34
40
|
|
|
35
|
-
|
|
36
|
-
kwarg_types: dict[str, ts.ColumnType]
|
|
41
|
+
return_type: ts.ColumnType
|
|
37
42
|
group_by_start_idx: int
|
|
38
43
|
group_by_stop_idx: int
|
|
39
44
|
fn_expr_idx: int
|
|
40
45
|
order_by_start_idx: int
|
|
41
|
-
|
|
42
|
-
|
|
43
|
-
|
|
46
|
+
aggregator: Any | None
|
|
47
|
+
current_partition_vals: list[Any] | None
|
|
48
|
+
|
|
49
|
+
_validation_error: str | None
|
|
44
50
|
|
|
45
51
|
def __init__(
|
|
46
|
-
|
|
47
|
-
|
|
52
|
+
self,
|
|
53
|
+
fn: func.Function,
|
|
54
|
+
args: list[Expr],
|
|
55
|
+
kwargs: dict[str, Expr],
|
|
56
|
+
return_type: ts.ColumnType,
|
|
57
|
+
order_by_clause: list[Any] | None = None,
|
|
58
|
+
group_by_clause: list[Any] | None = None,
|
|
59
|
+
is_method_call: bool = False,
|
|
60
|
+
validation_error: str | None = None,
|
|
61
|
+
):
|
|
62
|
+
assert not fn.is_polymorphic
|
|
63
|
+
assert all(isinstance(arg, Expr) for arg in args)
|
|
64
|
+
assert all(isinstance(arg, Expr) for arg in kwargs.values())
|
|
65
|
+
|
|
48
66
|
if order_by_clause is None:
|
|
49
67
|
order_by_clause = []
|
|
50
68
|
if group_by_clause is None:
|
|
51
69
|
group_by_clause = []
|
|
52
|
-
signature = fn.signature
|
|
53
|
-
return_type = fn.call_return_type(bound_args)
|
|
54
|
-
self.fn = fn
|
|
55
|
-
self.is_method_call = is_method_call
|
|
56
|
-
self.normalize_args(fn.name, signature, bound_args)
|
|
57
|
-
|
|
58
|
-
# If `return_type` is non-nullable, but the function call has a nullable input to any of its non-nullable
|
|
59
|
-
# parameters, then we need to make it nullable. This is because Pixeltable defaults a function output to
|
|
60
|
-
# `None` when any of its non-nullable inputs are `None`.
|
|
61
|
-
for arg_name, arg in bound_args.items():
|
|
62
|
-
param = signature.parameters[arg_name]
|
|
63
|
-
if (
|
|
64
|
-
param.col_type is not None and not param.col_type.nullable
|
|
65
|
-
and isinstance(arg, Expr) and arg.col_type.nullable
|
|
66
|
-
):
|
|
67
|
-
return_type = return_type.copy(nullable=True)
|
|
68
|
-
break
|
|
69
70
|
|
|
70
71
|
super().__init__(return_type)
|
|
71
72
|
|
|
72
|
-
self.
|
|
73
|
-
|
|
74
|
-
|
|
75
|
-
|
|
76
|
-
|
|
77
|
-
|
|
78
|
-
|
|
79
|
-
|
|
80
|
-
|
|
81
|
-
# construct components, args, kwargs
|
|
82
|
-
self.args = []
|
|
83
|
-
self.kwargs = {}
|
|
84
|
-
|
|
85
|
-
# we record the types of non-variable parameters for runtime type checks
|
|
86
|
-
self.arg_types = []
|
|
87
|
-
self.kwarg_types = {}
|
|
88
|
-
|
|
89
|
-
# the prefix of parameters that are bound can be passed by position
|
|
90
|
-
processed_args: set[str] = set()
|
|
91
|
-
for py_param in fn.signature.py_signature.parameters.values():
|
|
92
|
-
if py_param.name not in bound_args or py_param.kind == inspect.Parameter.KEYWORD_ONLY:
|
|
93
|
-
break
|
|
94
|
-
arg = bound_args[py_param.name]
|
|
95
|
-
if isinstance(arg, Expr):
|
|
96
|
-
self.args.append((len(self.components), None))
|
|
97
|
-
self.components.append(arg.copy())
|
|
98
|
-
else:
|
|
99
|
-
self.args.append((None, arg))
|
|
100
|
-
if py_param.kind != inspect.Parameter.VAR_POSITIONAL and py_param.kind != inspect.Parameter.VAR_KEYWORD:
|
|
101
|
-
self.arg_types.append(signature.parameters[py_param.name].col_type)
|
|
102
|
-
processed_args.add(py_param.name)
|
|
103
|
-
|
|
104
|
-
# the remaining args are passed as keywords
|
|
105
|
-
for param_name in bound_args.keys():
|
|
106
|
-
if param_name not in processed_args:
|
|
107
|
-
arg = bound_args[param_name]
|
|
108
|
-
if isinstance(arg, Expr):
|
|
109
|
-
self.kwargs[param_name] = (len(self.components), None)
|
|
110
|
-
self.components.append(arg.copy())
|
|
111
|
-
else:
|
|
112
|
-
self.kwargs[param_name] = (None, arg)
|
|
113
|
-
if fn.signature.py_signature.parameters[param_name].kind != inspect.Parameter.VAR_KEYWORD:
|
|
114
|
-
self.kwarg_types[param_name] = signature.parameters[param_name].col_type
|
|
73
|
+
self.fn = fn
|
|
74
|
+
self.return_type = return_type
|
|
75
|
+
self.is_method_call = is_method_call
|
|
76
|
+
|
|
77
|
+
# Build the components list from the specified args and kwargs, and note the component_idx of each argument.
|
|
78
|
+
self.components.extend(arg.copy() for arg in args)
|
|
79
|
+
self.arg_idxs = list(range(len(self.components)))
|
|
80
|
+
self.components.extend(arg.copy() for arg in kwargs.values())
|
|
81
|
+
self.kwarg_idxs = {name: i + len(args) for i, name in enumerate(kwargs.keys())}
|
|
115
82
|
|
|
116
83
|
# window function state:
|
|
117
84
|
# self.components[self.group_by_start_idx:self.group_by_stop_idx] contains group_by exprs
|
|
118
85
|
self.group_by_start_idx, self.group_by_stop_idx = 0, 0
|
|
119
86
|
if len(group_by_clause) > 0:
|
|
120
87
|
if isinstance(group_by_clause[0], catalog.Table):
|
|
88
|
+
assert len(group_by_clause) == 1
|
|
121
89
|
group_by_exprs = self._create_rowid_refs(group_by_clause[0])
|
|
122
90
|
else:
|
|
123
|
-
assert isinstance(
|
|
91
|
+
assert all(isinstance(expr, Expr) for expr in group_by_clause)
|
|
124
92
|
group_by_exprs = group_by_clause
|
|
125
93
|
# record grouping exprs in self.components, we need to evaluate them to get partition vals
|
|
126
94
|
self.group_by_start_idx = len(self.components)
|
|
@@ -129,9 +97,9 @@ class FunctionCall(Expr):
|
|
|
129
97
|
|
|
130
98
|
if isinstance(self.fn, func.ExprTemplateFunction):
|
|
131
99
|
# we instantiate the template to create an Expr that can be evaluated and record that as a component
|
|
132
|
-
fn_expr = self.fn.instantiate(
|
|
100
|
+
fn_expr = self.fn.instantiate(args, kwargs)
|
|
101
|
+
self.fn_expr_idx = len(self.components)
|
|
133
102
|
self.components.append(fn_expr)
|
|
134
|
-
self.fn_expr_idx = len(self.components) - 1
|
|
135
103
|
else:
|
|
136
104
|
self.fn_expr_idx = sys.maxsize
|
|
137
105
|
|
|
@@ -139,12 +107,38 @@ class FunctionCall(Expr):
|
|
|
139
107
|
# (that's done in SQL)
|
|
140
108
|
if len(order_by_clause) > 0 and not isinstance(order_by_clause[0], Expr):
|
|
141
109
|
raise excs.Error(
|
|
142
|
-
f'order_by argument needs to be a Pixeltable expression, but instead is a {type(order_by_clause[0])}'
|
|
143
|
-
|
|
110
|
+
f'order_by argument needs to be a Pixeltable expression, but instead is a {type(order_by_clause[0])}'
|
|
111
|
+
)
|
|
144
112
|
self.order_by_start_idx = len(self.components)
|
|
145
113
|
self.components.extend(order_by_clause)
|
|
146
114
|
|
|
147
|
-
self.
|
|
115
|
+
self._validation_error = validation_error
|
|
116
|
+
|
|
117
|
+
if validation_error is not None:
|
|
118
|
+
self.bound_idxs = {}
|
|
119
|
+
self.resource_pool = None
|
|
120
|
+
return
|
|
121
|
+
|
|
122
|
+
# Now generate bound_idxs for the args and kwargs indices.
|
|
123
|
+
# This is guaranteed to work, because at this point the call has already been validated.
|
|
124
|
+
# These will be used later to dereference specific parameter values.
|
|
125
|
+
bindings = fn.signature.py_signature.bind(*self.arg_idxs, **self.kwarg_idxs)
|
|
126
|
+
self.bound_idxs = bindings.arguments
|
|
127
|
+
|
|
128
|
+
# Separately generate bound_args for purposes of determining the resource pool.
|
|
129
|
+
bindings = fn.signature.py_signature.bind(*args, **kwargs)
|
|
130
|
+
bound_args = bindings.arguments
|
|
131
|
+
self.resource_pool = fn.call_resource_pool(bound_args)
|
|
132
|
+
|
|
133
|
+
self.agg_init_args = {}
|
|
134
|
+
if self.is_agg_fn_call:
|
|
135
|
+
# We separate out the init args for the aggregator. Unpack Literals in init args.
|
|
136
|
+
assert isinstance(fn, func.AggregateFunction)
|
|
137
|
+
for arg_name, arg in bound_args.items():
|
|
138
|
+
if arg_name in fn.init_param_names[0]:
|
|
139
|
+
assert isinstance(arg, Literal) # This was checked during validate_call
|
|
140
|
+
self.agg_init_args[arg_name] = arg.val
|
|
141
|
+
|
|
148
142
|
# execution state for aggregate functions
|
|
149
143
|
self.aggregator = None
|
|
150
144
|
self.current_partition_vals = None
|
|
@@ -153,131 +147,58 @@ class FunctionCall(Expr):
|
|
|
153
147
|
|
|
154
148
|
def _create_rowid_refs(self, tbl: catalog.Table) -> list[Expr]:
|
|
155
149
|
target = tbl._tbl_version_path.tbl_version
|
|
156
|
-
return [RowidRef(target, i) for i in range(target.num_rowid_columns())]
|
|
150
|
+
return [RowidRef(target, i) for i in range(target.get().num_rowid_columns())]
|
|
157
151
|
|
|
158
|
-
def default_column_name(self) ->
|
|
152
|
+
def default_column_name(self) -> str | None:
|
|
159
153
|
return self.fn.name
|
|
160
154
|
|
|
161
|
-
@classmethod
|
|
162
|
-
def normalize_args(cls, fn_name: str, signature: func.Signature, bound_args: dict[str, Any]) -> None:
|
|
163
|
-
"""Converts args to Exprs where appropriate and checks that they are compatible with signature.
|
|
164
|
-
|
|
165
|
-
Updates bound_args in place, where necessary.
|
|
166
|
-
"""
|
|
167
|
-
for param_name, arg in bound_args.items():
|
|
168
|
-
param = signature.parameters[param_name]
|
|
169
|
-
is_var_param = param.kind in (inspect.Parameter.VAR_POSITIONAL, inspect.Parameter.VAR_KEYWORD)
|
|
170
|
-
|
|
171
|
-
if isinstance(arg, dict):
|
|
172
|
-
try:
|
|
173
|
-
arg = InlineDict(arg)
|
|
174
|
-
bound_args[param_name] = arg
|
|
175
|
-
continue
|
|
176
|
-
except excs.Error:
|
|
177
|
-
# this didn't work, but it might be a literal
|
|
178
|
-
pass
|
|
179
|
-
|
|
180
|
-
if isinstance(arg, list) or isinstance(arg, tuple):
|
|
181
|
-
try:
|
|
182
|
-
arg = InlineList(arg)
|
|
183
|
-
bound_args[param_name] = arg
|
|
184
|
-
continue
|
|
185
|
-
except excs.Error:
|
|
186
|
-
# this didn't work, but it might be a literal
|
|
187
|
-
pass
|
|
188
|
-
|
|
189
|
-
if not isinstance(arg, Expr):
|
|
190
|
-
# make sure that non-Expr args are json-serializable and are literals of the correct type
|
|
191
|
-
try:
|
|
192
|
-
_ = json.dumps(arg)
|
|
193
|
-
except TypeError:
|
|
194
|
-
raise excs.Error(f'Argument for parameter {param_name!r} is not json-serializable: {arg} (of type {type(arg)})')
|
|
195
|
-
if arg is not None:
|
|
196
|
-
try:
|
|
197
|
-
param_type = param.col_type
|
|
198
|
-
bound_args[param_name] = param_type.create_literal(arg)
|
|
199
|
-
except TypeError as e:
|
|
200
|
-
msg = str(e)
|
|
201
|
-
raise excs.Error(f'Argument for parameter {param_name!r}: {msg[0].lower() + msg[1:]}')
|
|
202
|
-
continue
|
|
203
|
-
|
|
204
|
-
# these checks break the db migration test, because InlineArray isn't serialized correctly (it looses
|
|
205
|
-
# the type information)
|
|
206
|
-
# if is_var_param:
|
|
207
|
-
# if param.kind == inspect.Parameter.VAR_POSITIONAL:
|
|
208
|
-
# if not isinstance(arg, InlineArray) or not arg.col_type.is_json_type():
|
|
209
|
-
# pass
|
|
210
|
-
# assert isinstance(arg, InlineArray), type(arg)
|
|
211
|
-
# assert arg.col_type.is_json_type()
|
|
212
|
-
# if param.kind == inspect.Parameter.VAR_KEYWORD:
|
|
213
|
-
# if not isinstance(arg, InlineDict):
|
|
214
|
-
# pass
|
|
215
|
-
# assert isinstance(arg, InlineDict), type(arg)
|
|
216
|
-
if is_var_param:
|
|
217
|
-
pass
|
|
218
|
-
else:
|
|
219
|
-
assert param.col_type is not None
|
|
220
|
-
# Check that the argument is consistent with the expected parameter type, with the allowance that
|
|
221
|
-
# non-nullable parameters can still accept nullable arguments (since function calls with Nones
|
|
222
|
-
# assigned to non-nullable parameters will always return None)
|
|
223
|
-
if not (
|
|
224
|
-
param.col_type.is_supertype_of(arg.col_type, ignore_nullable=True)
|
|
225
|
-
# TODO: this is a hack to allow JSON columns to be passed to functions that accept scalar
|
|
226
|
-
# types. It's necessary to avoid littering notebooks with `apply(str)` calls or equivalent.
|
|
227
|
-
# (Previously, this wasn't necessary because `is_supertype_of()` was improperly implemented.)
|
|
228
|
-
# We need to think through the right way to handle this scenario.
|
|
229
|
-
or (arg.col_type.is_json_type() and param.col_type.is_scalar_type())
|
|
230
|
-
):
|
|
231
|
-
raise excs.Error(
|
|
232
|
-
f'Parameter {param_name} (in function {fn_name}): argument type {arg.col_type} does not match parameter type '
|
|
233
|
-
f'{param.col_type}')
|
|
234
|
-
|
|
235
155
|
def _equals(self, other: FunctionCall) -> bool:
|
|
236
|
-
|
|
237
|
-
|
|
238
|
-
|
|
239
|
-
|
|
240
|
-
|
|
241
|
-
|
|
242
|
-
|
|
243
|
-
|
|
244
|
-
return False
|
|
245
|
-
if self.group_by_stop_idx != other.group_by_stop_idx:
|
|
246
|
-
return False
|
|
247
|
-
if self.order_by_start_idx != other.order_by_start_idx:
|
|
248
|
-
return False
|
|
249
|
-
return True
|
|
156
|
+
return (
|
|
157
|
+
self.fn == other.fn
|
|
158
|
+
and self.arg_idxs == other.arg_idxs
|
|
159
|
+
and self.kwarg_idxs == other.kwarg_idxs
|
|
160
|
+
and self.group_by_start_idx == other.group_by_start_idx
|
|
161
|
+
and self.group_by_stop_idx == other.group_by_stop_idx
|
|
162
|
+
and self.order_by_start_idx == other.order_by_start_idx
|
|
163
|
+
)
|
|
250
164
|
|
|
251
165
|
def _id_attrs(self) -> list[tuple[str, Any]]:
|
|
252
|
-
return
|
|
166
|
+
return [
|
|
167
|
+
*super()._id_attrs(),
|
|
253
168
|
('fn', id(self.fn)), # use the function pointer, not the fqn, which isn't set for lambdas
|
|
254
|
-
('args', self.
|
|
255
|
-
('kwargs', self.
|
|
169
|
+
('args', self.arg_idxs),
|
|
170
|
+
('kwargs', self.kwarg_idxs),
|
|
256
171
|
('group_by_start_idx', self.group_by_start_idx),
|
|
257
172
|
('group_by_stop_idx', self.group_by_stop_idx),
|
|
258
|
-
('
|
|
173
|
+
('fn_expr_idx', self.fn_expr_idx),
|
|
174
|
+
('order_by_start_idx', self.order_by_start_idx),
|
|
259
175
|
]
|
|
260
176
|
|
|
261
177
|
def __repr__(self) -> str:
|
|
262
178
|
return self.display_str()
|
|
263
179
|
|
|
180
|
+
# def __repr__(self) -> str:
|
|
181
|
+
# return f'FunctionCall(fn={self.fn!r}, args={self.args!r}, kwargs={self.kwargs!r})'
|
|
182
|
+
|
|
183
|
+
@property
|
|
184
|
+
def validation_error(self) -> str | None:
|
|
185
|
+
return self._validation_error or super().validation_error
|
|
186
|
+
|
|
264
187
|
def display_str(self, inline: bool = True) -> str:
|
|
188
|
+
if isinstance(self.fn, func.ExprTemplateFunction) and isinstance(self.fn.template.expr, FunctionCall):
|
|
189
|
+
# If this FunctionCall uses an ExprTemplateFunction with a nested FunctionCall, then resolve the
|
|
190
|
+
# indirection by substitution into the ExprTemplateFunction.
|
|
191
|
+
subst = self.fn.instantiate(self.args, self.kwargs)
|
|
192
|
+
return subst.display_str(inline)
|
|
265
193
|
if self.is_method_call:
|
|
266
194
|
return f'{self.components[0]}.{self.fn.name}({self._print_args(1, inline)})'
|
|
267
195
|
else:
|
|
268
|
-
fn_name = self.fn.display_name
|
|
196
|
+
fn_name = self.fn.display_name or 'anonymous_fn'
|
|
269
197
|
return f'{fn_name}({self._print_args()})'
|
|
270
198
|
|
|
271
199
|
def _print_args(self, start_idx: int = 0, inline: bool = True) -> str:
|
|
272
|
-
|
|
273
|
-
|
|
274
|
-
arg_strs = [
|
|
275
|
-
print_arg(arg) if idx is None else str(self.components[idx]) for idx, arg in self.args[start_idx:]
|
|
276
|
-
]
|
|
277
|
-
arg_strs.extend([
|
|
278
|
-
f'{param_name}={print_arg(arg) if idx is None else str(self.components[idx])}'
|
|
279
|
-
for param_name, (idx, arg) in self.kwargs.items()
|
|
280
|
-
])
|
|
200
|
+
arg_strs = [str(self.components[idx]) for idx in self.arg_idxs[start_idx:]]
|
|
201
|
+
arg_strs.extend([f'{param_name}={self.components[idx]}' for param_name, idx in self.kwarg_idxs.items()])
|
|
281
202
|
if len(self.order_by) > 0:
|
|
282
203
|
assert isinstance(self.fn, func.AggregateFunction)
|
|
283
204
|
if self.fn.requires_order_by:
|
|
@@ -293,20 +214,28 @@ class FunctionCall(Expr):
|
|
|
293
214
|
def has_group_by(self) -> bool:
|
|
294
215
|
return self.group_by_stop_idx != 0
|
|
295
216
|
|
|
217
|
+
@property
|
|
218
|
+
def is_async(self) -> bool:
|
|
219
|
+
return self.fn.is_async
|
|
220
|
+
|
|
296
221
|
@property
|
|
297
222
|
def group_by(self) -> list[Expr]:
|
|
298
|
-
return self.components[self.group_by_start_idx:self.group_by_stop_idx]
|
|
223
|
+
return self.components[self.group_by_start_idx : self.group_by_stop_idx]
|
|
299
224
|
|
|
300
225
|
@property
|
|
301
226
|
def order_by(self) -> list[Expr]:
|
|
302
|
-
return self.components[self.order_by_start_idx:]
|
|
227
|
+
return self.components[self.order_by_start_idx :]
|
|
303
228
|
|
|
304
229
|
@property
|
|
305
230
|
def is_window_fn_call(self) -> bool:
|
|
306
|
-
return
|
|
307
|
-
|
|
308
|
-
|
|
309
|
-
|
|
231
|
+
return (
|
|
232
|
+
isinstance(self.fn, func.AggregateFunction)
|
|
233
|
+
and self.fn.allows_window
|
|
234
|
+
and (
|
|
235
|
+
not self.fn.allows_std_agg
|
|
236
|
+
or self.has_group_by()
|
|
237
|
+
or (len(self.order_by) > 0 and not self.fn.requires_order_by)
|
|
238
|
+
)
|
|
310
239
|
)
|
|
311
240
|
|
|
312
241
|
def get_window_sort_exprs(self) -> tuple[list[Expr], list[Expr]]:
|
|
@@ -324,35 +253,29 @@ class FunctionCall(Expr):
|
|
|
324
253
|
assert self.is_agg_fn_call
|
|
325
254
|
return self.order_by
|
|
326
255
|
|
|
327
|
-
def sql_expr(self, sql_elements: SqlElementCache) ->
|
|
256
|
+
def sql_expr(self, sql_elements: SqlElementCache) -> sql.ColumnElement | None:
|
|
257
|
+
assert self.is_valid
|
|
258
|
+
|
|
328
259
|
# we currently can't translate aggregate functions with grouping and/or ordering to SQL
|
|
329
260
|
if self.has_group_by() or len(self.order_by) > 0:
|
|
330
261
|
return None
|
|
331
262
|
|
|
332
263
|
# try to construct args and kwargs to call self.fn._to_sql()
|
|
264
|
+
args: list[sql.ColumnElement] = []
|
|
265
|
+
for component_idx in self.arg_idxs:
|
|
266
|
+
arg_element = sql_elements.get(self.components[component_idx])
|
|
267
|
+
if arg_element is None:
|
|
268
|
+
return None
|
|
269
|
+
args.append(arg_element)
|
|
270
|
+
|
|
333
271
|
kwargs: dict[str, sql.ColumnElement] = {}
|
|
334
|
-
for param_name,
|
|
335
|
-
|
|
336
|
-
|
|
337
|
-
|
|
338
|
-
|
|
339
|
-
else:
|
|
340
|
-
arg_element = sql_elements.get(self.components[component_idx])
|
|
341
|
-
if arg_element is None:
|
|
342
|
-
return None
|
|
343
|
-
kwargs[param_name] = arg_element
|
|
272
|
+
for param_name, component_idx in self.kwarg_idxs.items():
|
|
273
|
+
arg_element = sql_elements.get(self.components[component_idx])
|
|
274
|
+
if arg_element is None:
|
|
275
|
+
return None
|
|
276
|
+
kwargs[param_name] = arg_element
|
|
344
277
|
|
|
345
|
-
|
|
346
|
-
for _, (component_idx, arg) in enumerate(self.args):
|
|
347
|
-
if component_idx is None:
|
|
348
|
-
args.append(sql.literal(arg))
|
|
349
|
-
else:
|
|
350
|
-
arg_element = sql_elements.get(self.components[component_idx])
|
|
351
|
-
if arg_element is None:
|
|
352
|
-
return None
|
|
353
|
-
args.append(arg_element)
|
|
354
|
-
result = self.fn._to_sql(*args, **kwargs)
|
|
355
|
-
return result
|
|
278
|
+
return self.fn._to_sql(*args, **kwargs)
|
|
356
279
|
|
|
357
280
|
def reset_agg(self) -> None:
|
|
358
281
|
"""
|
|
@@ -360,46 +283,127 @@ class FunctionCall(Expr):
|
|
|
360
283
|
"""
|
|
361
284
|
assert self.is_agg_fn_call
|
|
362
285
|
assert isinstance(self.fn, func.AggregateFunction)
|
|
363
|
-
self.aggregator = self.fn.
|
|
286
|
+
self.aggregator = self.fn.agg_class(**self.agg_init_args)
|
|
287
|
+
|
|
288
|
+
@property
|
|
289
|
+
def bound_args(self) -> dict[str, Expr]:
|
|
290
|
+
"""
|
|
291
|
+
Reconstructs bound arguments from the components of this FunctionCall.
|
|
292
|
+
"""
|
|
293
|
+
bound_args: dict[str, Expr] = {}
|
|
294
|
+
for name, idx in self.bound_idxs.items():
|
|
295
|
+
if isinstance(idx, int):
|
|
296
|
+
bound_args[name] = self.components[idx]
|
|
297
|
+
elif isinstance(idx, Sequence):
|
|
298
|
+
bound_args[name] = Expr.from_object([self.components[i] for i in idx])
|
|
299
|
+
elif isinstance(idx, dict):
|
|
300
|
+
bound_args[name] = Expr.from_object({k: self.components[i] for k, i in idx.items()})
|
|
301
|
+
else:
|
|
302
|
+
raise AssertionError(f'{name}: {idx} (of type `{type(idx)}`)')
|
|
303
|
+
return bound_args
|
|
304
|
+
|
|
305
|
+
def substitute(self, spec: dict[Expr, Expr]) -> Expr:
|
|
306
|
+
"""
|
|
307
|
+
Substitution of FunctionCall arguments could cause the return value to become more specific, in the case
|
|
308
|
+
where a variable is replaced with a specific value.
|
|
309
|
+
"""
|
|
310
|
+
res = super().substitute(spec)
|
|
311
|
+
assert res is self
|
|
312
|
+
if self.is_valid:
|
|
313
|
+
# If this FunctionCall is valid, re-evaluate the call_return_type of the substituted expression. If the
|
|
314
|
+
# FunctionCall is not valid, it isn't safe to do this. (Really we should be asserting that it *is* valid,
|
|
315
|
+
# but we still need to be able to do substitutions on invalid FunctionCalls, because loading an
|
|
316
|
+
# EmbeddingIndex from the db involves reconstructing the requisite (substituted) FunctionCalls. We could
|
|
317
|
+
# fix this by separately persisting the FunctionCall instances held by EmbeddingIndex to the db. That's
|
|
318
|
+
# probably a good idea, but it's also probably not urgent, since it only affects Functions that have a
|
|
319
|
+
# conditional_return_type implemented.)
|
|
320
|
+
self.return_type = self.fn.call_return_type(self.bound_args)
|
|
321
|
+
self.col_type = self.return_type
|
|
322
|
+
return self
|
|
323
|
+
|
|
324
|
+
@property
|
|
325
|
+
def args(self) -> list[Expr]:
|
|
326
|
+
return [self.components[idx] for idx in self.arg_idxs]
|
|
327
|
+
|
|
328
|
+
@property
|
|
329
|
+
def kwargs(self) -> dict[str, Expr]:
|
|
330
|
+
return {name: self.components[idx] for name, idx in self.kwarg_idxs.items()}
|
|
331
|
+
|
|
332
|
+
@property
|
|
333
|
+
def fn_expr(self) -> Expr | None:
|
|
334
|
+
if self.fn_expr_idx != sys.maxsize:
|
|
335
|
+
return self.components[self.fn_expr_idx]
|
|
336
|
+
return None
|
|
364
337
|
|
|
365
338
|
def update(self, data_row: DataRow) -> None:
|
|
366
339
|
"""
|
|
367
340
|
Update agg state
|
|
368
341
|
"""
|
|
369
342
|
assert self.is_agg_fn_call
|
|
370
|
-
args, kwargs = self.
|
|
343
|
+
args, kwargs = self.make_args(data_row)
|
|
371
344
|
self.aggregator.update(*args, **kwargs)
|
|
372
345
|
|
|
373
|
-
def
|
|
374
|
-
"""Return args and kwargs, constructed for data_row"""
|
|
346
|
+
def make_args(self, data_row: DataRow) -> tuple[list[Any], dict[str, Any]] | None:
|
|
347
|
+
"""Return args and kwargs, constructed for data_row; returns None if any non-nullable arg is None."""
|
|
348
|
+
args: list[Any] = []
|
|
349
|
+
parameters_by_pos = self.fn.signature.parameters_by_pos
|
|
350
|
+
for idx in self.arg_idxs:
|
|
351
|
+
val = data_row[self.components[idx].slot_idx]
|
|
352
|
+
if (
|
|
353
|
+
val is None
|
|
354
|
+
and parameters_by_pos[idx].kind
|
|
355
|
+
in (inspect.Parameter.POSITIONAL_ONLY, inspect.Parameter.POSITIONAL_OR_KEYWORD)
|
|
356
|
+
and not parameters_by_pos[idx].col_type.nullable
|
|
357
|
+
):
|
|
358
|
+
return None
|
|
359
|
+
args.append(val)
|
|
360
|
+
|
|
375
361
|
kwargs: dict[str, Any] = {}
|
|
376
|
-
|
|
377
|
-
|
|
378
|
-
|
|
379
|
-
if
|
|
380
|
-
|
|
381
|
-
|
|
382
|
-
|
|
383
|
-
|
|
384
|
-
|
|
362
|
+
parameters = self.fn.signature.parameters
|
|
363
|
+
for param_name, idx in self.kwarg_idxs.items():
|
|
364
|
+
val = data_row[self.components[idx].slot_idx]
|
|
365
|
+
if (
|
|
366
|
+
val is None
|
|
367
|
+
and parameters[param_name].kind
|
|
368
|
+
in (inspect.Parameter.KEYWORD_ONLY, inspect.Parameter.POSITIONAL_OR_KEYWORD)
|
|
369
|
+
and not parameters[param_name].col_type.nullable
|
|
370
|
+
):
|
|
371
|
+
return None
|
|
372
|
+
kwargs[param_name] = val
|
|
385
373
|
|
|
386
|
-
args: list[Any] = []
|
|
387
|
-
for param_idx, (component_idx, arg) in enumerate(self.args):
|
|
388
|
-
val = arg if component_idx is None else data_row[self.components[component_idx].slot_idx]
|
|
389
|
-
param = self.fn.signature.parameters_by_pos[param_idx]
|
|
390
|
-
if param.kind == inspect.Parameter.VAR_POSITIONAL:
|
|
391
|
-
# expand *args parameter
|
|
392
|
-
assert isinstance(val, list)
|
|
393
|
-
args.extend(val)
|
|
394
|
-
elif param.kind == inspect.Parameter.VAR_KEYWORD:
|
|
395
|
-
# expand **kwargs parameter
|
|
396
|
-
assert isinstance(val, dict)
|
|
397
|
-
kwargs.update(val)
|
|
398
|
-
else:
|
|
399
|
-
args.append(val)
|
|
400
374
|
return args, kwargs
|
|
401
375
|
|
|
376
|
+
def get_param_values(self, param_names: Sequence[str], data_rows: list[DataRow]) -> list[dict[str, Any]]:
|
|
377
|
+
"""
|
|
378
|
+
Returns a list of dicts mapping each param name to its value when this FunctionCall is evaluated against
|
|
379
|
+
data_rows
|
|
380
|
+
"""
|
|
381
|
+
assert self.is_valid
|
|
382
|
+
assert all(name in self.fn.signature.parameters for name in param_names), f'{param_names}, {self.fn.signature}'
|
|
383
|
+
result: list[dict[str, Any]] = []
|
|
384
|
+
for row in data_rows:
|
|
385
|
+
d: dict[str, Any] = {}
|
|
386
|
+
for param_name in param_names:
|
|
387
|
+
val = self.bound_idxs.get(param_name)
|
|
388
|
+
if isinstance(val, int):
|
|
389
|
+
d[param_name] = row[self.components[val].slot_idx]
|
|
390
|
+
elif isinstance(val, list):
|
|
391
|
+
# var_positional
|
|
392
|
+
d[param_name] = [row[self.components[idx].slot_idx] for idx in val]
|
|
393
|
+
elif isinstance(val, dict):
|
|
394
|
+
# var_keyword
|
|
395
|
+
d[param_name] = {k: row[self.components[idx].slot_idx] for k, idx in val.items()}
|
|
396
|
+
else:
|
|
397
|
+
assert val is None
|
|
398
|
+
default = self.fn.signature.parameters[param_name].default
|
|
399
|
+
assert default is not None
|
|
400
|
+
d[param_name] = default.val
|
|
401
|
+
result.append(d)
|
|
402
|
+
return result
|
|
403
|
+
|
|
402
404
|
def eval(self, data_row: DataRow, row_builder: RowBuilder) -> None:
|
|
405
|
+
assert self.is_valid
|
|
406
|
+
|
|
403
407
|
if isinstance(self.fn, func.ExprTemplateFunction):
|
|
404
408
|
# we need to evaluate the template
|
|
405
409
|
# TODO: can we get rid of this extra copy?
|
|
@@ -412,64 +416,159 @@ class FunctionCall(Expr):
|
|
|
412
416
|
data_row[self.slot_idx] = self.aggregator.value()
|
|
413
417
|
return
|
|
414
418
|
|
|
415
|
-
|
|
416
|
-
|
|
417
|
-
|
|
418
|
-
|
|
419
|
-
|
|
420
|
-
|
|
421
|
-
|
|
422
|
-
|
|
423
|
-
return
|
|
424
|
-
for param_name, param_type in self.kwarg_types.items():
|
|
425
|
-
if kwargs[param_name] is None and not param_type.nullable:
|
|
426
|
-
# we can't evaluate this function
|
|
427
|
-
data_row[self.slot_idx] = None
|
|
428
|
-
return
|
|
429
|
-
|
|
430
|
-
if isinstance(self.fn, func.CallableFunction) and not self.fn.is_batched:
|
|
431
|
-
# optimization: avoid additional level of indirection we'd get from calling Function.exec()
|
|
432
|
-
data_row[self.slot_idx] = self.fn.py_fn(*args, **kwargs)
|
|
433
|
-
elif self.is_window_fn_call:
|
|
419
|
+
args_kwargs = self.make_args(data_row)
|
|
420
|
+
if args_kwargs is None:
|
|
421
|
+
# we can't evaluate this function
|
|
422
|
+
data_row[self.slot_idx] = None
|
|
423
|
+
return
|
|
424
|
+
args, kwargs = args_kwargs
|
|
425
|
+
|
|
426
|
+
if self.is_window_fn_call:
|
|
434
427
|
assert isinstance(self.fn, func.AggregateFunction)
|
|
428
|
+
agg_cls = self.fn.agg_class
|
|
435
429
|
if self.has_group_by():
|
|
436
430
|
if self.current_partition_vals is None:
|
|
437
431
|
self.current_partition_vals = [None] * len(self.group_by)
|
|
438
432
|
partition_vals = [data_row[e.slot_idx] for e in self.group_by]
|
|
439
433
|
if partition_vals != self.current_partition_vals:
|
|
440
434
|
# new partition
|
|
441
|
-
self.aggregator =
|
|
435
|
+
self.aggregator = agg_cls(**self.agg_init_args)
|
|
442
436
|
self.current_partition_vals = partition_vals
|
|
443
437
|
elif self.aggregator is None:
|
|
444
|
-
self.aggregator =
|
|
438
|
+
self.aggregator = agg_cls(**self.agg_init_args)
|
|
445
439
|
self.aggregator.update(*args)
|
|
446
440
|
data_row[self.slot_idx] = self.aggregator.value()
|
|
447
441
|
else:
|
|
448
|
-
data_row[self.slot_idx] = self.fn.exec(
|
|
442
|
+
data_row[self.slot_idx] = self.fn.exec(args, kwargs)
|
|
449
443
|
|
|
450
444
|
def _as_dict(self) -> dict:
|
|
451
|
-
|
|
452
|
-
'fn': self.fn.as_dict(),
|
|
453
|
-
'
|
|
445
|
+
return {
|
|
446
|
+
'fn': self.fn.as_dict(),
|
|
447
|
+
'return_type': self.return_type.as_dict(),
|
|
448
|
+
'arg_idxs': self.arg_idxs,
|
|
449
|
+
'kwarg_idxs': self.kwarg_idxs,
|
|
450
|
+
'group_by_start_idx': self.group_by_start_idx,
|
|
451
|
+
'group_by_stop_idx': self.group_by_stop_idx,
|
|
454
452
|
'order_by_start_idx': self.order_by_start_idx,
|
|
455
|
-
|
|
453
|
+
'is_method_call': self.is_method_call,
|
|
454
|
+
**super()._as_dict(),
|
|
456
455
|
}
|
|
457
|
-
return result
|
|
458
456
|
|
|
459
457
|
@classmethod
|
|
460
458
|
def _from_dict(cls, d: dict, components: list[Expr]) -> FunctionCall:
|
|
461
|
-
assert 'fn' in d
|
|
462
|
-
assert 'args' in d
|
|
463
|
-
assert 'kwargs' in d
|
|
464
|
-
# reassemble bound args
|
|
465
459
|
fn = func.Function.from_dict(d['fn'])
|
|
466
|
-
|
|
467
|
-
|
|
468
|
-
|
|
469
|
-
|
|
470
|
-
|
|
471
|
-
|
|
460
|
+
return_type = ts.ColumnType.from_dict(d['return_type']) if 'return_type' in d else None
|
|
461
|
+
arg_idxs: list[int] = d['arg_idxs']
|
|
462
|
+
kwarg_idxs: dict[str, int] = d['kwarg_idxs']
|
|
463
|
+
group_by_start_idx: int = d['group_by_start_idx']
|
|
464
|
+
group_by_stop_idx: int = d['group_by_stop_idx']
|
|
465
|
+
order_by_start_idx: int = d['order_by_start_idx']
|
|
466
|
+
is_method_call: bool = d['is_method_call']
|
|
467
|
+
|
|
468
|
+
args = [components[idx] for idx in arg_idxs]
|
|
469
|
+
kwargs = {name: components[idx] for name, idx in kwarg_idxs.items()}
|
|
470
|
+
group_by_exprs = components[group_by_start_idx:group_by_stop_idx]
|
|
471
|
+
order_by_exprs = components[order_by_start_idx:]
|
|
472
|
+
|
|
473
|
+
validation_error: str | None = None
|
|
474
|
+
|
|
475
|
+
if isinstance(fn, func.InvalidFunction):
|
|
476
|
+
validation_error = (
|
|
477
|
+
dedent(
|
|
478
|
+
f"""
|
|
479
|
+
The UDF '{fn.self_path}' cannot be located, because
|
|
480
|
+
{{error_msg}}
|
|
481
|
+
"""
|
|
482
|
+
)
|
|
483
|
+
.strip()
|
|
484
|
+
.format(error_msg=fn.error_msg)
|
|
485
|
+
)
|
|
486
|
+
return cls(fn, args, kwargs, return_type, is_method_call=is_method_call, validation_error=validation_error)
|
|
487
|
+
|
|
488
|
+
# Now re-bind args and kwargs using the version of `fn` that is currently represented in code. This ensures
|
|
489
|
+
# that we get a valid binding even if the signatures of `fn` have changed since the FunctionCall was
|
|
490
|
+
# serialized.
|
|
491
|
+
|
|
492
|
+
resolved_fn: func.Function = fn
|
|
493
|
+
|
|
494
|
+
try:
|
|
495
|
+
# Bind args and kwargs to the function signature in the current codebase.
|
|
496
|
+
resolved_fn, bound_args = fn._bind_to_matching_signature(args, kwargs)
|
|
497
|
+
except (TypeError, excs.Error):
|
|
498
|
+
signature_note_str = 'any of its signatures' if fn.is_polymorphic else 'its signature'
|
|
499
|
+
args_str = [f'pxt.{arg.col_type}' for arg in args]
|
|
500
|
+
args_str.extend(f'{name}: pxt.{arg.col_type}' for name, arg in kwargs.items())
|
|
501
|
+
call_signature_str = f'({", ".join(args_str)}) -> pxt.{return_type}'
|
|
502
|
+
fn_signature_str = f'{len(fn.signatures)} signatures' if fn.is_polymorphic else str(fn.signature)
|
|
503
|
+
validation_error = dedent(
|
|
504
|
+
f"""
|
|
505
|
+
The signature stored in the database for a UDF call to {fn.self_path!r} no longer
|
|
506
|
+
matches {signature_note_str} as currently defined in the code. This probably means that the
|
|
507
|
+
code for {fn.self_path!r} has changed in a backward-incompatible way.
|
|
508
|
+
Signature of UDF call in the database: {call_signature_str}
|
|
509
|
+
Signature of UDF as currently defined in code: {fn_signature_str}
|
|
510
|
+
"""
|
|
511
|
+
).strip()
|
|
512
|
+
else:
|
|
513
|
+
# Evaluate the call_return_type as defined in the current codebase.
|
|
514
|
+
call_return_type: ts.ColumnType | None = None
|
|
515
|
+
|
|
516
|
+
if isinstance(resolved_fn, func.ExprTemplateFunction) and not resolved_fn.template.expr.is_valid:
|
|
517
|
+
# The FunctionCall is based on an ExprTemplateFunction, but the template expression is not valid
|
|
518
|
+
# (because it in turn contains an invalid FunctionCall). In this case, inherit the validation error
|
|
519
|
+
# from the template expression.
|
|
520
|
+
validation_error = resolved_fn.template.expr.validation_error
|
|
521
|
+
else:
|
|
522
|
+
try:
|
|
523
|
+
call_return_type = resolved_fn.call_return_type(bound_args)
|
|
524
|
+
except ImportError as exc:
|
|
525
|
+
validation_error = dedent(
|
|
526
|
+
f"""
|
|
527
|
+
A UDF call to {fn.self_path!r} could not be fully resolved, because a module required
|
|
528
|
+
by the UDF could not be imported:
|
|
529
|
+
{exc}
|
|
530
|
+
"""
|
|
531
|
+
)
|
|
532
|
+
|
|
533
|
+
assert (call_return_type is None) != (validation_error is None)
|
|
534
|
+
|
|
535
|
+
if call_return_type is None and return_type is None:
|
|
536
|
+
# Schema versions prior to 25 did not store the return_type in metadata, and there is no obvious
|
|
537
|
+
# way to infer it during DB migration, so we might encounter a stored return_type of None. If the
|
|
538
|
+
# resolution of call_return_type also fails, then we're out of luck; we have no choice but to
|
|
539
|
+
# fail-fast.
|
|
540
|
+
raise excs.Error(validation_error)
|
|
541
|
+
|
|
542
|
+
if call_return_type is not None:
|
|
543
|
+
# call_return_type resolution succeeded.
|
|
544
|
+
if return_type is None:
|
|
545
|
+
# Schema versions prior to 25 did not store the return_type in metadata (as mentioned above), so
|
|
546
|
+
# fall back on the call_return_type.
|
|
547
|
+
return_type = call_return_type
|
|
548
|
+
elif not return_type.is_supertype_of(call_return_type, ignore_nullable=True):
|
|
549
|
+
# There is a return_type stored in metadata (schema version >= 25),
|
|
550
|
+
# and the stored return_type of the UDF call doesn't match the column type of the FunctionCall.
|
|
551
|
+
validation_error = dedent(
|
|
552
|
+
f"""
|
|
553
|
+
The return type stored in the database for a UDF call to {fn.self_path!r} no longer
|
|
554
|
+
matches its return type as currently defined in the code. This probably means that the
|
|
555
|
+
code for {fn.self_path!r} has changed in a backward-incompatible way.
|
|
556
|
+
Return type of UDF call in the database: {return_type}
|
|
557
|
+
Return type of UDF as currently defined in code: {call_return_type}
|
|
558
|
+
"""
|
|
559
|
+
).strip()
|
|
560
|
+
|
|
561
|
+
assert return_type is not None # Guaranteed by the above logic.
|
|
562
|
+
|
|
472
563
|
fn_call = cls(
|
|
473
|
-
|
|
474
|
-
|
|
564
|
+
resolved_fn,
|
|
565
|
+
args,
|
|
566
|
+
kwargs,
|
|
567
|
+
return_type,
|
|
568
|
+
group_by_clause=group_by_exprs,
|
|
569
|
+
order_by_clause=order_by_exprs,
|
|
570
|
+
is_method_call=is_method_call,
|
|
571
|
+
validation_error=validation_error,
|
|
572
|
+
)
|
|
573
|
+
|
|
475
574
|
return fn_call
|