pixeltable 0.2.26__py3-none-any.whl → 0.5.7__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (245) hide show
  1. pixeltable/__init__.py +83 -19
  2. pixeltable/_query.py +1444 -0
  3. pixeltable/_version.py +1 -0
  4. pixeltable/catalog/__init__.py +7 -4
  5. pixeltable/catalog/catalog.py +2394 -119
  6. pixeltable/catalog/column.py +225 -104
  7. pixeltable/catalog/dir.py +38 -9
  8. pixeltable/catalog/globals.py +53 -34
  9. pixeltable/catalog/insertable_table.py +265 -115
  10. pixeltable/catalog/path.py +80 -17
  11. pixeltable/catalog/schema_object.py +28 -43
  12. pixeltable/catalog/table.py +1270 -677
  13. pixeltable/catalog/table_metadata.py +103 -0
  14. pixeltable/catalog/table_version.py +1270 -751
  15. pixeltable/catalog/table_version_handle.py +109 -0
  16. pixeltable/catalog/table_version_path.py +137 -42
  17. pixeltable/catalog/tbl_ops.py +53 -0
  18. pixeltable/catalog/update_status.py +191 -0
  19. pixeltable/catalog/view.py +251 -134
  20. pixeltable/config.py +215 -0
  21. pixeltable/env.py +736 -285
  22. pixeltable/exceptions.py +26 -2
  23. pixeltable/exec/__init__.py +7 -2
  24. pixeltable/exec/aggregation_node.py +39 -21
  25. pixeltable/exec/cache_prefetch_node.py +87 -109
  26. pixeltable/exec/cell_materialization_node.py +268 -0
  27. pixeltable/exec/cell_reconstruction_node.py +168 -0
  28. pixeltable/exec/component_iteration_node.py +25 -28
  29. pixeltable/exec/data_row_batch.py +11 -46
  30. pixeltable/exec/exec_context.py +26 -11
  31. pixeltable/exec/exec_node.py +35 -27
  32. pixeltable/exec/expr_eval/__init__.py +3 -0
  33. pixeltable/exec/expr_eval/evaluators.py +365 -0
  34. pixeltable/exec/expr_eval/expr_eval_node.py +413 -0
  35. pixeltable/exec/expr_eval/globals.py +200 -0
  36. pixeltable/exec/expr_eval/row_buffer.py +74 -0
  37. pixeltable/exec/expr_eval/schedulers.py +413 -0
  38. pixeltable/exec/globals.py +35 -0
  39. pixeltable/exec/in_memory_data_node.py +35 -27
  40. pixeltable/exec/object_store_save_node.py +293 -0
  41. pixeltable/exec/row_update_node.py +44 -29
  42. pixeltable/exec/sql_node.py +414 -115
  43. pixeltable/exprs/__init__.py +8 -5
  44. pixeltable/exprs/arithmetic_expr.py +79 -45
  45. pixeltable/exprs/array_slice.py +5 -5
  46. pixeltable/exprs/column_property_ref.py +40 -26
  47. pixeltable/exprs/column_ref.py +254 -61
  48. pixeltable/exprs/comparison.py +14 -9
  49. pixeltable/exprs/compound_predicate.py +9 -10
  50. pixeltable/exprs/data_row.py +213 -72
  51. pixeltable/exprs/expr.py +270 -104
  52. pixeltable/exprs/expr_dict.py +6 -5
  53. pixeltable/exprs/expr_set.py +20 -11
  54. pixeltable/exprs/function_call.py +383 -284
  55. pixeltable/exprs/globals.py +18 -5
  56. pixeltable/exprs/in_predicate.py +7 -7
  57. pixeltable/exprs/inline_expr.py +37 -37
  58. pixeltable/exprs/is_null.py +8 -4
  59. pixeltable/exprs/json_mapper.py +120 -54
  60. pixeltable/exprs/json_path.py +90 -60
  61. pixeltable/exprs/literal.py +61 -16
  62. pixeltable/exprs/method_ref.py +7 -6
  63. pixeltable/exprs/object_ref.py +19 -8
  64. pixeltable/exprs/row_builder.py +238 -75
  65. pixeltable/exprs/rowid_ref.py +53 -15
  66. pixeltable/exprs/similarity_expr.py +65 -50
  67. pixeltable/exprs/sql_element_cache.py +5 -5
  68. pixeltable/exprs/string_op.py +107 -0
  69. pixeltable/exprs/type_cast.py +25 -13
  70. pixeltable/exprs/variable.py +2 -2
  71. pixeltable/func/__init__.py +9 -5
  72. pixeltable/func/aggregate_function.py +197 -92
  73. pixeltable/func/callable_function.py +119 -35
  74. pixeltable/func/expr_template_function.py +101 -48
  75. pixeltable/func/function.py +375 -62
  76. pixeltable/func/function_registry.py +20 -19
  77. pixeltable/func/globals.py +6 -5
  78. pixeltable/func/mcp.py +74 -0
  79. pixeltable/func/query_template_function.py +151 -35
  80. pixeltable/func/signature.py +178 -49
  81. pixeltable/func/tools.py +164 -0
  82. pixeltable/func/udf.py +176 -53
  83. pixeltable/functions/__init__.py +44 -4
  84. pixeltable/functions/anthropic.py +226 -47
  85. pixeltable/functions/audio.py +148 -11
  86. pixeltable/functions/bedrock.py +137 -0
  87. pixeltable/functions/date.py +188 -0
  88. pixeltable/functions/deepseek.py +113 -0
  89. pixeltable/functions/document.py +81 -0
  90. pixeltable/functions/fal.py +76 -0
  91. pixeltable/functions/fireworks.py +72 -20
  92. pixeltable/functions/gemini.py +249 -0
  93. pixeltable/functions/globals.py +208 -53
  94. pixeltable/functions/groq.py +108 -0
  95. pixeltable/functions/huggingface.py +1088 -95
  96. pixeltable/functions/image.py +155 -84
  97. pixeltable/functions/json.py +8 -11
  98. pixeltable/functions/llama_cpp.py +31 -19
  99. pixeltable/functions/math.py +169 -0
  100. pixeltable/functions/mistralai.py +50 -75
  101. pixeltable/functions/net.py +70 -0
  102. pixeltable/functions/ollama.py +29 -36
  103. pixeltable/functions/openai.py +548 -160
  104. pixeltable/functions/openrouter.py +143 -0
  105. pixeltable/functions/replicate.py +15 -14
  106. pixeltable/functions/reve.py +250 -0
  107. pixeltable/functions/string.py +310 -85
  108. pixeltable/functions/timestamp.py +37 -19
  109. pixeltable/functions/together.py +77 -120
  110. pixeltable/functions/twelvelabs.py +188 -0
  111. pixeltable/functions/util.py +7 -2
  112. pixeltable/functions/uuid.py +30 -0
  113. pixeltable/functions/video.py +1528 -117
  114. pixeltable/functions/vision.py +26 -26
  115. pixeltable/functions/voyageai.py +289 -0
  116. pixeltable/functions/whisper.py +19 -10
  117. pixeltable/functions/whisperx.py +179 -0
  118. pixeltable/functions/yolox.py +112 -0
  119. pixeltable/globals.py +716 -236
  120. pixeltable/index/__init__.py +3 -1
  121. pixeltable/index/base.py +17 -21
  122. pixeltable/index/btree.py +32 -22
  123. pixeltable/index/embedding_index.py +155 -92
  124. pixeltable/io/__init__.py +12 -7
  125. pixeltable/io/datarows.py +140 -0
  126. pixeltable/io/external_store.py +83 -125
  127. pixeltable/io/fiftyone.py +24 -33
  128. pixeltable/io/globals.py +47 -182
  129. pixeltable/io/hf_datasets.py +96 -127
  130. pixeltable/io/label_studio.py +171 -156
  131. pixeltable/io/lancedb.py +3 -0
  132. pixeltable/io/pandas.py +136 -115
  133. pixeltable/io/parquet.py +40 -153
  134. pixeltable/io/table_data_conduit.py +702 -0
  135. pixeltable/io/utils.py +100 -0
  136. pixeltable/iterators/__init__.py +8 -4
  137. pixeltable/iterators/audio.py +207 -0
  138. pixeltable/iterators/base.py +9 -3
  139. pixeltable/iterators/document.py +144 -87
  140. pixeltable/iterators/image.py +17 -38
  141. pixeltable/iterators/string.py +15 -12
  142. pixeltable/iterators/video.py +523 -127
  143. pixeltable/metadata/__init__.py +33 -8
  144. pixeltable/metadata/converters/convert_10.py +2 -3
  145. pixeltable/metadata/converters/convert_13.py +2 -2
  146. pixeltable/metadata/converters/convert_15.py +15 -11
  147. pixeltable/metadata/converters/convert_16.py +4 -5
  148. pixeltable/metadata/converters/convert_17.py +4 -5
  149. pixeltable/metadata/converters/convert_18.py +4 -6
  150. pixeltable/metadata/converters/convert_19.py +6 -9
  151. pixeltable/metadata/converters/convert_20.py +3 -6
  152. pixeltable/metadata/converters/convert_21.py +6 -8
  153. pixeltable/metadata/converters/convert_22.py +3 -2
  154. pixeltable/metadata/converters/convert_23.py +33 -0
  155. pixeltable/metadata/converters/convert_24.py +55 -0
  156. pixeltable/metadata/converters/convert_25.py +19 -0
  157. pixeltable/metadata/converters/convert_26.py +23 -0
  158. pixeltable/metadata/converters/convert_27.py +29 -0
  159. pixeltable/metadata/converters/convert_28.py +13 -0
  160. pixeltable/metadata/converters/convert_29.py +110 -0
  161. pixeltable/metadata/converters/convert_30.py +63 -0
  162. pixeltable/metadata/converters/convert_31.py +11 -0
  163. pixeltable/metadata/converters/convert_32.py +15 -0
  164. pixeltable/metadata/converters/convert_33.py +17 -0
  165. pixeltable/metadata/converters/convert_34.py +21 -0
  166. pixeltable/metadata/converters/convert_35.py +9 -0
  167. pixeltable/metadata/converters/convert_36.py +38 -0
  168. pixeltable/metadata/converters/convert_37.py +15 -0
  169. pixeltable/metadata/converters/convert_38.py +39 -0
  170. pixeltable/metadata/converters/convert_39.py +124 -0
  171. pixeltable/metadata/converters/convert_40.py +73 -0
  172. pixeltable/metadata/converters/convert_41.py +12 -0
  173. pixeltable/metadata/converters/convert_42.py +9 -0
  174. pixeltable/metadata/converters/convert_43.py +44 -0
  175. pixeltable/metadata/converters/util.py +44 -18
  176. pixeltable/metadata/notes.py +21 -0
  177. pixeltable/metadata/schema.py +185 -42
  178. pixeltable/metadata/utils.py +74 -0
  179. pixeltable/mypy/__init__.py +3 -0
  180. pixeltable/mypy/mypy_plugin.py +123 -0
  181. pixeltable/plan.py +616 -225
  182. pixeltable/share/__init__.py +3 -0
  183. pixeltable/share/packager.py +797 -0
  184. pixeltable/share/protocol/__init__.py +33 -0
  185. pixeltable/share/protocol/common.py +165 -0
  186. pixeltable/share/protocol/operation_types.py +33 -0
  187. pixeltable/share/protocol/replica.py +119 -0
  188. pixeltable/share/publish.py +349 -0
  189. pixeltable/store.py +398 -232
  190. pixeltable/type_system.py +730 -267
  191. pixeltable/utils/__init__.py +40 -0
  192. pixeltable/utils/arrow.py +201 -29
  193. pixeltable/utils/av.py +298 -0
  194. pixeltable/utils/azure_store.py +346 -0
  195. pixeltable/utils/coco.py +26 -27
  196. pixeltable/utils/code.py +4 -4
  197. pixeltable/utils/console_output.py +46 -0
  198. pixeltable/utils/coroutine.py +24 -0
  199. pixeltable/utils/dbms.py +92 -0
  200. pixeltable/utils/description_helper.py +11 -12
  201. pixeltable/utils/documents.py +60 -61
  202. pixeltable/utils/exception_handler.py +36 -0
  203. pixeltable/utils/filecache.py +38 -22
  204. pixeltable/utils/formatter.py +88 -51
  205. pixeltable/utils/gcs_store.py +295 -0
  206. pixeltable/utils/http.py +133 -0
  207. pixeltable/utils/http_server.py +14 -13
  208. pixeltable/utils/iceberg.py +13 -0
  209. pixeltable/utils/image.py +17 -0
  210. pixeltable/utils/lancedb.py +90 -0
  211. pixeltable/utils/local_store.py +322 -0
  212. pixeltable/utils/misc.py +5 -0
  213. pixeltable/utils/object_stores.py +573 -0
  214. pixeltable/utils/pydantic.py +60 -0
  215. pixeltable/utils/pytorch.py +20 -20
  216. pixeltable/utils/s3_store.py +527 -0
  217. pixeltable/utils/sql.py +32 -5
  218. pixeltable/utils/system.py +30 -0
  219. pixeltable/utils/transactional_directory.py +4 -3
  220. pixeltable-0.5.7.dist-info/METADATA +579 -0
  221. pixeltable-0.5.7.dist-info/RECORD +227 -0
  222. {pixeltable-0.2.26.dist-info → pixeltable-0.5.7.dist-info}/WHEEL +1 -1
  223. pixeltable-0.5.7.dist-info/entry_points.txt +2 -0
  224. pixeltable/__version__.py +0 -3
  225. pixeltable/catalog/named_function.py +0 -36
  226. pixeltable/catalog/path_dict.py +0 -141
  227. pixeltable/dataframe.py +0 -894
  228. pixeltable/exec/expr_eval_node.py +0 -232
  229. pixeltable/ext/__init__.py +0 -14
  230. pixeltable/ext/functions/__init__.py +0 -8
  231. pixeltable/ext/functions/whisperx.py +0 -77
  232. pixeltable/ext/functions/yolox.py +0 -157
  233. pixeltable/tool/create_test_db_dump.py +0 -311
  234. pixeltable/tool/create_test_video.py +0 -81
  235. pixeltable/tool/doc_plugins/griffe.py +0 -50
  236. pixeltable/tool/doc_plugins/mkdocstrings.py +0 -6
  237. pixeltable/tool/doc_plugins/templates/material/udf.html.jinja +0 -135
  238. pixeltable/tool/embed_udf.py +0 -9
  239. pixeltable/tool/mypy_plugin.py +0 -55
  240. pixeltable/utils/media_store.py +0 -76
  241. pixeltable/utils/s3.py +0 -16
  242. pixeltable-0.2.26.dist-info/METADATA +0 -400
  243. pixeltable-0.2.26.dist-info/RECORD +0 -156
  244. pixeltable-0.2.26.dist-info/entry_points.txt +0 -3
  245. {pixeltable-0.2.26.dist-info → pixeltable-0.5.7.dist-info/licenses}/LICENSE +0 -0
@@ -1,126 +1,94 @@
1
1
  from __future__ import annotations
2
2
 
3
3
  import inspect
4
- import json
4
+ import logging
5
5
  import sys
6
- from typing import Any, Optional
6
+ from textwrap import dedent
7
+ from typing import Any, Sequence
7
8
 
8
9
  import sqlalchemy as sql
9
10
 
10
- import pixeltable.catalog as catalog
11
- import pixeltable.exceptions as excs
12
- import pixeltable.func as func
13
- import pixeltable.type_system as ts
11
+ from pixeltable import catalog, exceptions as excs, func, type_system as ts
14
12
 
15
13
  from .data_row import DataRow
16
14
  from .expr import Expr
17
- from .inline_expr import InlineDict, InlineList
15
+ from .literal import Literal
18
16
  from .row_builder import RowBuilder
19
17
  from .rowid_ref import RowidRef
20
18
  from .sql_element_cache import SqlElementCache
21
19
 
20
+ _logger = logging.getLogger('pixeltable')
22
21
 
23
- class FunctionCall(Expr):
24
22
 
23
+ class FunctionCall(Expr):
25
24
  fn: func.Function
26
25
  is_method_call: bool
27
26
  agg_init_args: dict[str, Any]
27
+ resource_pool: str | None
28
+
29
+ # These collections hold the component indices corresponding to the args and kwargs
30
+ # that were passed to the FunctionCall. They're 1:1 with the original call pattern.
31
+ arg_idxs: list[int]
32
+ kwarg_idxs: dict[str, int]
28
33
 
29
- # tuple[Optional[int], Optional[Any]]:
30
- # - for Exprs: (index into components, None)
31
- # - otherwise: (None, val)
32
- args: list[tuple[Optional[int], Optional[Any]]]
33
- kwargs: dict[str, tuple[Optional[int], Optional[Any]]]
34
+ # A "bound" version of the FunctionCall arguments, mapping each specified parameter name
35
+ # to one of three types of bindings:
36
+ # - a component index, if the parameter is a non-variadic parameter
37
+ # - a list of component indices, if the parameter is a variadic positional parameter
38
+ # - a dict mapping keyword names to component indices, if the parameter is a variadic keyword parameter
39
+ bound_idxs: dict[str, int | list[int] | dict[str, int]]
34
40
 
35
- arg_types: list[ts.ColumnType]
36
- kwarg_types: dict[str, ts.ColumnType]
41
+ return_type: ts.ColumnType
37
42
  group_by_start_idx: int
38
43
  group_by_stop_idx: int
39
44
  fn_expr_idx: int
40
45
  order_by_start_idx: int
41
- constant_args: set[str]
42
- aggregator: Optional[Any]
43
- current_partition_vals: Optional[list[Any]]
46
+ aggregator: Any | None
47
+ current_partition_vals: list[Any] | None
48
+
49
+ _validation_error: str | None
44
50
 
45
51
  def __init__(
46
- self, fn: func.Function, bound_args: dict[str, Any], order_by_clause: Optional[list[Any]] = None,
47
- group_by_clause: Optional[list[Any]] = None, is_method_call: bool = False):
52
+ self,
53
+ fn: func.Function,
54
+ args: list[Expr],
55
+ kwargs: dict[str, Expr],
56
+ return_type: ts.ColumnType,
57
+ order_by_clause: list[Any] | None = None,
58
+ group_by_clause: list[Any] | None = None,
59
+ is_method_call: bool = False,
60
+ validation_error: str | None = None,
61
+ ):
62
+ assert not fn.is_polymorphic
63
+ assert all(isinstance(arg, Expr) for arg in args)
64
+ assert all(isinstance(arg, Expr) for arg in kwargs.values())
65
+
48
66
  if order_by_clause is None:
49
67
  order_by_clause = []
50
68
  if group_by_clause is None:
51
69
  group_by_clause = []
52
- signature = fn.signature
53
- return_type = fn.call_return_type(bound_args)
54
- self.fn = fn
55
- self.is_method_call = is_method_call
56
- self.normalize_args(fn.name, signature, bound_args)
57
-
58
- # If `return_type` is non-nullable, but the function call has a nullable input to any of its non-nullable
59
- # parameters, then we need to make it nullable. This is because Pixeltable defaults a function output to
60
- # `None` when any of its non-nullable inputs are `None`.
61
- for arg_name, arg in bound_args.items():
62
- param = signature.parameters[arg_name]
63
- if (
64
- param.col_type is not None and not param.col_type.nullable
65
- and isinstance(arg, Expr) and arg.col_type.nullable
66
- ):
67
- return_type = return_type.copy(nullable=True)
68
- break
69
70
 
70
71
  super().__init__(return_type)
71
72
 
72
- self.agg_init_args = {}
73
- if self.is_agg_fn_call:
74
- # we separate out the init args for the aggregator
75
- assert isinstance(fn, func.AggregateFunction)
76
- self.agg_init_args = {
77
- arg_name: arg for arg_name, arg in bound_args.items() if arg_name in fn.init_param_names
78
- }
79
- bound_args = {arg_name: arg for arg_name, arg in bound_args.items() if arg_name not in fn.init_param_names}
80
-
81
- # construct components, args, kwargs
82
- self.args = []
83
- self.kwargs = {}
84
-
85
- # we record the types of non-variable parameters for runtime type checks
86
- self.arg_types = []
87
- self.kwarg_types = {}
88
-
89
- # the prefix of parameters that are bound can be passed by position
90
- processed_args: set[str] = set()
91
- for py_param in fn.signature.py_signature.parameters.values():
92
- if py_param.name not in bound_args or py_param.kind == inspect.Parameter.KEYWORD_ONLY:
93
- break
94
- arg = bound_args[py_param.name]
95
- if isinstance(arg, Expr):
96
- self.args.append((len(self.components), None))
97
- self.components.append(arg.copy())
98
- else:
99
- self.args.append((None, arg))
100
- if py_param.kind != inspect.Parameter.VAR_POSITIONAL and py_param.kind != inspect.Parameter.VAR_KEYWORD:
101
- self.arg_types.append(signature.parameters[py_param.name].col_type)
102
- processed_args.add(py_param.name)
103
-
104
- # the remaining args are passed as keywords
105
- for param_name in bound_args.keys():
106
- if param_name not in processed_args:
107
- arg = bound_args[param_name]
108
- if isinstance(arg, Expr):
109
- self.kwargs[param_name] = (len(self.components), None)
110
- self.components.append(arg.copy())
111
- else:
112
- self.kwargs[param_name] = (None, arg)
113
- if fn.signature.py_signature.parameters[param_name].kind != inspect.Parameter.VAR_KEYWORD:
114
- self.kwarg_types[param_name] = signature.parameters[param_name].col_type
73
+ self.fn = fn
74
+ self.return_type = return_type
75
+ self.is_method_call = is_method_call
76
+
77
+ # Build the components list from the specified args and kwargs, and note the component_idx of each argument.
78
+ self.components.extend(arg.copy() for arg in args)
79
+ self.arg_idxs = list(range(len(self.components)))
80
+ self.components.extend(arg.copy() for arg in kwargs.values())
81
+ self.kwarg_idxs = {name: i + len(args) for i, name in enumerate(kwargs.keys())}
115
82
 
116
83
  # window function state:
117
84
  # self.components[self.group_by_start_idx:self.group_by_stop_idx] contains group_by exprs
118
85
  self.group_by_start_idx, self.group_by_stop_idx = 0, 0
119
86
  if len(group_by_clause) > 0:
120
87
  if isinstance(group_by_clause[0], catalog.Table):
88
+ assert len(group_by_clause) == 1
121
89
  group_by_exprs = self._create_rowid_refs(group_by_clause[0])
122
90
  else:
123
- assert isinstance(group_by_clause[0], Expr)
91
+ assert all(isinstance(expr, Expr) for expr in group_by_clause)
124
92
  group_by_exprs = group_by_clause
125
93
  # record grouping exprs in self.components, we need to evaluate them to get partition vals
126
94
  self.group_by_start_idx = len(self.components)
@@ -129,9 +97,9 @@ class FunctionCall(Expr):
129
97
 
130
98
  if isinstance(self.fn, func.ExprTemplateFunction):
131
99
  # we instantiate the template to create an Expr that can be evaluated and record that as a component
132
- fn_expr = self.fn.instantiate(**bound_args)
100
+ fn_expr = self.fn.instantiate(args, kwargs)
101
+ self.fn_expr_idx = len(self.components)
133
102
  self.components.append(fn_expr)
134
- self.fn_expr_idx = len(self.components) - 1
135
103
  else:
136
104
  self.fn_expr_idx = sys.maxsize
137
105
 
@@ -139,12 +107,38 @@ class FunctionCall(Expr):
139
107
  # (that's done in SQL)
140
108
  if len(order_by_clause) > 0 and not isinstance(order_by_clause[0], Expr):
141
109
  raise excs.Error(
142
- f'order_by argument needs to be a Pixeltable expression, but instead is a {type(order_by_clause[0])}')
143
- # don't add components after this, everthing after order_by_start_idx is part of the order_by clause
110
+ f'order_by argument needs to be a Pixeltable expression, but instead is a {type(order_by_clause[0])}'
111
+ )
144
112
  self.order_by_start_idx = len(self.components)
145
113
  self.components.extend(order_by_clause)
146
114
 
147
- self.constant_args = {param_name for param_name, arg in bound_args.items() if not isinstance(arg, Expr)}
115
+ self._validation_error = validation_error
116
+
117
+ if validation_error is not None:
118
+ self.bound_idxs = {}
119
+ self.resource_pool = None
120
+ return
121
+
122
+ # Now generate bound_idxs for the args and kwargs indices.
123
+ # This is guaranteed to work, because at this point the call has already been validated.
124
+ # These will be used later to dereference specific parameter values.
125
+ bindings = fn.signature.py_signature.bind(*self.arg_idxs, **self.kwarg_idxs)
126
+ self.bound_idxs = bindings.arguments
127
+
128
+ # Separately generate bound_args for purposes of determining the resource pool.
129
+ bindings = fn.signature.py_signature.bind(*args, **kwargs)
130
+ bound_args = bindings.arguments
131
+ self.resource_pool = fn.call_resource_pool(bound_args)
132
+
133
+ self.agg_init_args = {}
134
+ if self.is_agg_fn_call:
135
+ # We separate out the init args for the aggregator. Unpack Literals in init args.
136
+ assert isinstance(fn, func.AggregateFunction)
137
+ for arg_name, arg in bound_args.items():
138
+ if arg_name in fn.init_param_names[0]:
139
+ assert isinstance(arg, Literal) # This was checked during validate_call
140
+ self.agg_init_args[arg_name] = arg.val
141
+
148
142
  # execution state for aggregate functions
149
143
  self.aggregator = None
150
144
  self.current_partition_vals = None
@@ -153,131 +147,58 @@ class FunctionCall(Expr):
153
147
 
154
148
  def _create_rowid_refs(self, tbl: catalog.Table) -> list[Expr]:
155
149
  target = tbl._tbl_version_path.tbl_version
156
- return [RowidRef(target, i) for i in range(target.num_rowid_columns())]
150
+ return [RowidRef(target, i) for i in range(target.get().num_rowid_columns())]
157
151
 
158
- def default_column_name(self) -> Optional[str]:
152
+ def default_column_name(self) -> str | None:
159
153
  return self.fn.name
160
154
 
161
- @classmethod
162
- def normalize_args(cls, fn_name: str, signature: func.Signature, bound_args: dict[str, Any]) -> None:
163
- """Converts args to Exprs where appropriate and checks that they are compatible with signature.
164
-
165
- Updates bound_args in place, where necessary.
166
- """
167
- for param_name, arg in bound_args.items():
168
- param = signature.parameters[param_name]
169
- is_var_param = param.kind in (inspect.Parameter.VAR_POSITIONAL, inspect.Parameter.VAR_KEYWORD)
170
-
171
- if isinstance(arg, dict):
172
- try:
173
- arg = InlineDict(arg)
174
- bound_args[param_name] = arg
175
- continue
176
- except excs.Error:
177
- # this didn't work, but it might be a literal
178
- pass
179
-
180
- if isinstance(arg, list) or isinstance(arg, tuple):
181
- try:
182
- arg = InlineList(arg)
183
- bound_args[param_name] = arg
184
- continue
185
- except excs.Error:
186
- # this didn't work, but it might be a literal
187
- pass
188
-
189
- if not isinstance(arg, Expr):
190
- # make sure that non-Expr args are json-serializable and are literals of the correct type
191
- try:
192
- _ = json.dumps(arg)
193
- except TypeError:
194
- raise excs.Error(f'Argument for parameter {param_name!r} is not json-serializable: {arg} (of type {type(arg)})')
195
- if arg is not None:
196
- try:
197
- param_type = param.col_type
198
- bound_args[param_name] = param_type.create_literal(arg)
199
- except TypeError as e:
200
- msg = str(e)
201
- raise excs.Error(f'Argument for parameter {param_name!r}: {msg[0].lower() + msg[1:]}')
202
- continue
203
-
204
- # these checks break the db migration test, because InlineArray isn't serialized correctly (it looses
205
- # the type information)
206
- # if is_var_param:
207
- # if param.kind == inspect.Parameter.VAR_POSITIONAL:
208
- # if not isinstance(arg, InlineArray) or not arg.col_type.is_json_type():
209
- # pass
210
- # assert isinstance(arg, InlineArray), type(arg)
211
- # assert arg.col_type.is_json_type()
212
- # if param.kind == inspect.Parameter.VAR_KEYWORD:
213
- # if not isinstance(arg, InlineDict):
214
- # pass
215
- # assert isinstance(arg, InlineDict), type(arg)
216
- if is_var_param:
217
- pass
218
- else:
219
- assert param.col_type is not None
220
- # Check that the argument is consistent with the expected parameter type, with the allowance that
221
- # non-nullable parameters can still accept nullable arguments (since function calls with Nones
222
- # assigned to non-nullable parameters will always return None)
223
- if not (
224
- param.col_type.is_supertype_of(arg.col_type, ignore_nullable=True)
225
- # TODO: this is a hack to allow JSON columns to be passed to functions that accept scalar
226
- # types. It's necessary to avoid littering notebooks with `apply(str)` calls or equivalent.
227
- # (Previously, this wasn't necessary because `is_supertype_of()` was improperly implemented.)
228
- # We need to think through the right way to handle this scenario.
229
- or (arg.col_type.is_json_type() and param.col_type.is_scalar_type())
230
- ):
231
- raise excs.Error(
232
- f'Parameter {param_name} (in function {fn_name}): argument type {arg.col_type} does not match parameter type '
233
- f'{param.col_type}')
234
-
235
155
  def _equals(self, other: FunctionCall) -> bool:
236
- if self.fn != other.fn:
237
- return False
238
- if len(self.args) != len(other.args):
239
- return False
240
- for i in range(len(self.args)):
241
- if self.args[i] != other.args[i]:
242
- return False
243
- if self.group_by_start_idx != other.group_by_start_idx:
244
- return False
245
- if self.group_by_stop_idx != other.group_by_stop_idx:
246
- return False
247
- if self.order_by_start_idx != other.order_by_start_idx:
248
- return False
249
- return True
156
+ return (
157
+ self.fn == other.fn
158
+ and self.arg_idxs == other.arg_idxs
159
+ and self.kwarg_idxs == other.kwarg_idxs
160
+ and self.group_by_start_idx == other.group_by_start_idx
161
+ and self.group_by_stop_idx == other.group_by_stop_idx
162
+ and self.order_by_start_idx == other.order_by_start_idx
163
+ )
250
164
 
251
165
  def _id_attrs(self) -> list[tuple[str, Any]]:
252
- return super()._id_attrs() + [
166
+ return [
167
+ *super()._id_attrs(),
253
168
  ('fn', id(self.fn)), # use the function pointer, not the fqn, which isn't set for lambdas
254
- ('args', self.args),
255
- ('kwargs', self.kwargs),
169
+ ('args', self.arg_idxs),
170
+ ('kwargs', self.kwarg_idxs),
256
171
  ('group_by_start_idx', self.group_by_start_idx),
257
172
  ('group_by_stop_idx', self.group_by_stop_idx),
258
- ('order_by_start_idx', self.order_by_start_idx)
173
+ ('fn_expr_idx', self.fn_expr_idx),
174
+ ('order_by_start_idx', self.order_by_start_idx),
259
175
  ]
260
176
 
261
177
  def __repr__(self) -> str:
262
178
  return self.display_str()
263
179
 
180
+ # def __repr__(self) -> str:
181
+ # return f'FunctionCall(fn={self.fn!r}, args={self.args!r}, kwargs={self.kwargs!r})'
182
+
183
+ @property
184
+ def validation_error(self) -> str | None:
185
+ return self._validation_error or super().validation_error
186
+
264
187
  def display_str(self, inline: bool = True) -> str:
188
+ if isinstance(self.fn, func.ExprTemplateFunction) and isinstance(self.fn.template.expr, FunctionCall):
189
+ # If this FunctionCall uses an ExprTemplateFunction with a nested FunctionCall, then resolve the
190
+ # indirection by substitution into the ExprTemplateFunction.
191
+ subst = self.fn.instantiate(self.args, self.kwargs)
192
+ return subst.display_str(inline)
265
193
  if self.is_method_call:
266
194
  return f'{self.components[0]}.{self.fn.name}({self._print_args(1, inline)})'
267
195
  else:
268
- fn_name = self.fn.display_name if self.fn.display_name != '' else 'anonymous_fn'
196
+ fn_name = self.fn.display_name or 'anonymous_fn'
269
197
  return f'{fn_name}({self._print_args()})'
270
198
 
271
199
  def _print_args(self, start_idx: int = 0, inline: bool = True) -> str:
272
- def print_arg(arg: Any) -> str:
273
- return repr(arg) if isinstance(arg, str) else str(arg)
274
- arg_strs = [
275
- print_arg(arg) if idx is None else str(self.components[idx]) for idx, arg in self.args[start_idx:]
276
- ]
277
- arg_strs.extend([
278
- f'{param_name}={print_arg(arg) if idx is None else str(self.components[idx])}'
279
- for param_name, (idx, arg) in self.kwargs.items()
280
- ])
200
+ arg_strs = [str(self.components[idx]) for idx in self.arg_idxs[start_idx:]]
201
+ arg_strs.extend([f'{param_name}={self.components[idx]}' for param_name, idx in self.kwarg_idxs.items()])
281
202
  if len(self.order_by) > 0:
282
203
  assert isinstance(self.fn, func.AggregateFunction)
283
204
  if self.fn.requires_order_by:
@@ -293,20 +214,28 @@ class FunctionCall(Expr):
293
214
  def has_group_by(self) -> bool:
294
215
  return self.group_by_stop_idx != 0
295
216
 
217
+ @property
218
+ def is_async(self) -> bool:
219
+ return self.fn.is_async
220
+
296
221
  @property
297
222
  def group_by(self) -> list[Expr]:
298
- return self.components[self.group_by_start_idx:self.group_by_stop_idx]
223
+ return self.components[self.group_by_start_idx : self.group_by_stop_idx]
299
224
 
300
225
  @property
301
226
  def order_by(self) -> list[Expr]:
302
- return self.components[self.order_by_start_idx:]
227
+ return self.components[self.order_by_start_idx :]
303
228
 
304
229
  @property
305
230
  def is_window_fn_call(self) -> bool:
306
- return isinstance(self.fn, func.AggregateFunction) and self.fn.allows_window and (
307
- not self.fn.allows_std_agg
308
- or self.has_group_by()
309
- or (len(self.order_by) > 0 and not self.fn.requires_order_by)
231
+ return (
232
+ isinstance(self.fn, func.AggregateFunction)
233
+ and self.fn.allows_window
234
+ and (
235
+ not self.fn.allows_std_agg
236
+ or self.has_group_by()
237
+ or (len(self.order_by) > 0 and not self.fn.requires_order_by)
238
+ )
310
239
  )
311
240
 
312
241
  def get_window_sort_exprs(self) -> tuple[list[Expr], list[Expr]]:
@@ -324,35 +253,29 @@ class FunctionCall(Expr):
324
253
  assert self.is_agg_fn_call
325
254
  return self.order_by
326
255
 
327
- def sql_expr(self, sql_elements: SqlElementCache) -> Optional[sql.ColumnElement]:
256
+ def sql_expr(self, sql_elements: SqlElementCache) -> sql.ColumnElement | None:
257
+ assert self.is_valid
258
+
328
259
  # we currently can't translate aggregate functions with grouping and/or ordering to SQL
329
260
  if self.has_group_by() or len(self.order_by) > 0:
330
261
  return None
331
262
 
332
263
  # try to construct args and kwargs to call self.fn._to_sql()
264
+ args: list[sql.ColumnElement] = []
265
+ for component_idx in self.arg_idxs:
266
+ arg_element = sql_elements.get(self.components[component_idx])
267
+ if arg_element is None:
268
+ return None
269
+ args.append(arg_element)
270
+
333
271
  kwargs: dict[str, sql.ColumnElement] = {}
334
- for param_name, (component_idx, arg) in self.kwargs.items():
335
- param = self.fn.signature.parameters[param_name]
336
- assert param.kind != inspect.Parameter.VAR_POSITIONAL and param.kind != inspect.Parameter.VAR_KEYWORD
337
- if component_idx is None:
338
- kwargs[param_name] = sql.literal(arg)
339
- else:
340
- arg_element = sql_elements.get(self.components[component_idx])
341
- if arg_element is None:
342
- return None
343
- kwargs[param_name] = arg_element
272
+ for param_name, component_idx in self.kwarg_idxs.items():
273
+ arg_element = sql_elements.get(self.components[component_idx])
274
+ if arg_element is None:
275
+ return None
276
+ kwargs[param_name] = arg_element
344
277
 
345
- args: list[sql.ColumnElement] = []
346
- for _, (component_idx, arg) in enumerate(self.args):
347
- if component_idx is None:
348
- args.append(sql.literal(arg))
349
- else:
350
- arg_element = sql_elements.get(self.components[component_idx])
351
- if arg_element is None:
352
- return None
353
- args.append(arg_element)
354
- result = self.fn._to_sql(*args, **kwargs)
355
- return result
278
+ return self.fn._to_sql(*args, **kwargs)
356
279
 
357
280
  def reset_agg(self) -> None:
358
281
  """
@@ -360,46 +283,127 @@ class FunctionCall(Expr):
360
283
  """
361
284
  assert self.is_agg_fn_call
362
285
  assert isinstance(self.fn, func.AggregateFunction)
363
- self.aggregator = self.fn.agg_cls(**self.agg_init_args)
286
+ self.aggregator = self.fn.agg_class(**self.agg_init_args)
287
+
288
+ @property
289
+ def bound_args(self) -> dict[str, Expr]:
290
+ """
291
+ Reconstructs bound arguments from the components of this FunctionCall.
292
+ """
293
+ bound_args: dict[str, Expr] = {}
294
+ for name, idx in self.bound_idxs.items():
295
+ if isinstance(idx, int):
296
+ bound_args[name] = self.components[idx]
297
+ elif isinstance(idx, Sequence):
298
+ bound_args[name] = Expr.from_object([self.components[i] for i in idx])
299
+ elif isinstance(idx, dict):
300
+ bound_args[name] = Expr.from_object({k: self.components[i] for k, i in idx.items()})
301
+ else:
302
+ raise AssertionError(f'{name}: {idx} (of type `{type(idx)}`)')
303
+ return bound_args
304
+
305
+ def substitute(self, spec: dict[Expr, Expr]) -> Expr:
306
+ """
307
+ Substitution of FunctionCall arguments could cause the return value to become more specific, in the case
308
+ where a variable is replaced with a specific value.
309
+ """
310
+ res = super().substitute(spec)
311
+ assert res is self
312
+ if self.is_valid:
313
+ # If this FunctionCall is valid, re-evaluate the call_return_type of the substituted expression. If the
314
+ # FunctionCall is not valid, it isn't safe to do this. (Really we should be asserting that it *is* valid,
315
+ # but we still need to be able to do substitutions on invalid FunctionCalls, because loading an
316
+ # EmbeddingIndex from the db involves reconstructing the requisite (substituted) FunctionCalls. We could
317
+ # fix this by separately persisting the FunctionCall instances held by EmbeddingIndex to the db. That's
318
+ # probably a good idea, but it's also probably not urgent, since it only affects Functions that have a
319
+ # conditional_return_type implemented.)
320
+ self.return_type = self.fn.call_return_type(self.bound_args)
321
+ self.col_type = self.return_type
322
+ return self
323
+
324
+ @property
325
+ def args(self) -> list[Expr]:
326
+ return [self.components[idx] for idx in self.arg_idxs]
327
+
328
+ @property
329
+ def kwargs(self) -> dict[str, Expr]:
330
+ return {name: self.components[idx] for name, idx in self.kwarg_idxs.items()}
331
+
332
+ @property
333
+ def fn_expr(self) -> Expr | None:
334
+ if self.fn_expr_idx != sys.maxsize:
335
+ return self.components[self.fn_expr_idx]
336
+ return None
364
337
 
365
338
  def update(self, data_row: DataRow) -> None:
366
339
  """
367
340
  Update agg state
368
341
  """
369
342
  assert self.is_agg_fn_call
370
- args, kwargs = self._make_args(data_row)
343
+ args, kwargs = self.make_args(data_row)
371
344
  self.aggregator.update(*args, **kwargs)
372
345
 
373
- def _make_args(self, data_row: DataRow) -> tuple[list[Any], dict[str, Any]]:
374
- """Return args and kwargs, constructed for data_row"""
346
+ def make_args(self, data_row: DataRow) -> tuple[list[Any], dict[str, Any]] | None:
347
+ """Return args and kwargs, constructed for data_row; returns None if any non-nullable arg is None."""
348
+ args: list[Any] = []
349
+ parameters_by_pos = self.fn.signature.parameters_by_pos
350
+ for idx in self.arg_idxs:
351
+ val = data_row[self.components[idx].slot_idx]
352
+ if (
353
+ val is None
354
+ and parameters_by_pos[idx].kind
355
+ in (inspect.Parameter.POSITIONAL_ONLY, inspect.Parameter.POSITIONAL_OR_KEYWORD)
356
+ and not parameters_by_pos[idx].col_type.nullable
357
+ ):
358
+ return None
359
+ args.append(val)
360
+
375
361
  kwargs: dict[str, Any] = {}
376
- for param_name, (component_idx, arg) in self.kwargs.items():
377
- val = arg if component_idx is None else data_row[self.components[component_idx].slot_idx]
378
- param = self.fn.signature.parameters[param_name]
379
- if param.kind == inspect.Parameter.VAR_KEYWORD:
380
- # expand **kwargs parameter
381
- kwargs.update(val)
382
- else:
383
- assert param.kind != inspect.Parameter.VAR_POSITIONAL
384
- kwargs[param_name] = val
362
+ parameters = self.fn.signature.parameters
363
+ for param_name, idx in self.kwarg_idxs.items():
364
+ val = data_row[self.components[idx].slot_idx]
365
+ if (
366
+ val is None
367
+ and parameters[param_name].kind
368
+ in (inspect.Parameter.KEYWORD_ONLY, inspect.Parameter.POSITIONAL_OR_KEYWORD)
369
+ and not parameters[param_name].col_type.nullable
370
+ ):
371
+ return None
372
+ kwargs[param_name] = val
385
373
 
386
- args: list[Any] = []
387
- for param_idx, (component_idx, arg) in enumerate(self.args):
388
- val = arg if component_idx is None else data_row[self.components[component_idx].slot_idx]
389
- param = self.fn.signature.parameters_by_pos[param_idx]
390
- if param.kind == inspect.Parameter.VAR_POSITIONAL:
391
- # expand *args parameter
392
- assert isinstance(val, list)
393
- args.extend(val)
394
- elif param.kind == inspect.Parameter.VAR_KEYWORD:
395
- # expand **kwargs parameter
396
- assert isinstance(val, dict)
397
- kwargs.update(val)
398
- else:
399
- args.append(val)
400
374
  return args, kwargs
401
375
 
376
+ def get_param_values(self, param_names: Sequence[str], data_rows: list[DataRow]) -> list[dict[str, Any]]:
377
+ """
378
+ Returns a list of dicts mapping each param name to its value when this FunctionCall is evaluated against
379
+ data_rows
380
+ """
381
+ assert self.is_valid
382
+ assert all(name in self.fn.signature.parameters for name in param_names), f'{param_names}, {self.fn.signature}'
383
+ result: list[dict[str, Any]] = []
384
+ for row in data_rows:
385
+ d: dict[str, Any] = {}
386
+ for param_name in param_names:
387
+ val = self.bound_idxs.get(param_name)
388
+ if isinstance(val, int):
389
+ d[param_name] = row[self.components[val].slot_idx]
390
+ elif isinstance(val, list):
391
+ # var_positional
392
+ d[param_name] = [row[self.components[idx].slot_idx] for idx in val]
393
+ elif isinstance(val, dict):
394
+ # var_keyword
395
+ d[param_name] = {k: row[self.components[idx].slot_idx] for k, idx in val.items()}
396
+ else:
397
+ assert val is None
398
+ default = self.fn.signature.parameters[param_name].default
399
+ assert default is not None
400
+ d[param_name] = default.val
401
+ result.append(d)
402
+ return result
403
+
402
404
  def eval(self, data_row: DataRow, row_builder: RowBuilder) -> None:
405
+ assert self.is_valid
406
+
403
407
  if isinstance(self.fn, func.ExprTemplateFunction):
404
408
  # we need to evaluate the template
405
409
  # TODO: can we get rid of this extra copy?
@@ -412,64 +416,159 @@ class FunctionCall(Expr):
412
416
  data_row[self.slot_idx] = self.aggregator.value()
413
417
  return
414
418
 
415
- args, kwargs = self._make_args(data_row)
416
- signature = self.fn.signature
417
- if signature.parameters is not None:
418
- # check for nulls
419
- for i in range(len(self.arg_types)):
420
- if args[i] is None and not self.arg_types[i].nullable:
421
- # we can't evaluate this function
422
- data_row[self.slot_idx] = None
423
- return
424
- for param_name, param_type in self.kwarg_types.items():
425
- if kwargs[param_name] is None and not param_type.nullable:
426
- # we can't evaluate this function
427
- data_row[self.slot_idx] = None
428
- return
429
-
430
- if isinstance(self.fn, func.CallableFunction) and not self.fn.is_batched:
431
- # optimization: avoid additional level of indirection we'd get from calling Function.exec()
432
- data_row[self.slot_idx] = self.fn.py_fn(*args, **kwargs)
433
- elif self.is_window_fn_call:
419
+ args_kwargs = self.make_args(data_row)
420
+ if args_kwargs is None:
421
+ # we can't evaluate this function
422
+ data_row[self.slot_idx] = None
423
+ return
424
+ args, kwargs = args_kwargs
425
+
426
+ if self.is_window_fn_call:
434
427
  assert isinstance(self.fn, func.AggregateFunction)
428
+ agg_cls = self.fn.agg_class
435
429
  if self.has_group_by():
436
430
  if self.current_partition_vals is None:
437
431
  self.current_partition_vals = [None] * len(self.group_by)
438
432
  partition_vals = [data_row[e.slot_idx] for e in self.group_by]
439
433
  if partition_vals != self.current_partition_vals:
440
434
  # new partition
441
- self.aggregator = self.fn.agg_cls(**self.agg_init_args)
435
+ self.aggregator = agg_cls(**self.agg_init_args)
442
436
  self.current_partition_vals = partition_vals
443
437
  elif self.aggregator is None:
444
- self.aggregator = self.fn.agg_cls(**self.agg_init_args)
438
+ self.aggregator = agg_cls(**self.agg_init_args)
445
439
  self.aggregator.update(*args)
446
440
  data_row[self.slot_idx] = self.aggregator.value()
447
441
  else:
448
- data_row[self.slot_idx] = self.fn.exec(*args, **kwargs)
442
+ data_row[self.slot_idx] = self.fn.exec(args, kwargs)
449
443
 
450
444
  def _as_dict(self) -> dict:
451
- result = {
452
- 'fn': self.fn.as_dict(), 'args': self.args, 'kwargs': self.kwargs,
453
- 'group_by_start_idx': self.group_by_start_idx, 'group_by_stop_idx': self.group_by_stop_idx,
445
+ return {
446
+ 'fn': self.fn.as_dict(),
447
+ 'return_type': self.return_type.as_dict(),
448
+ 'arg_idxs': self.arg_idxs,
449
+ 'kwarg_idxs': self.kwarg_idxs,
450
+ 'group_by_start_idx': self.group_by_start_idx,
451
+ 'group_by_stop_idx': self.group_by_stop_idx,
454
452
  'order_by_start_idx': self.order_by_start_idx,
455
- **super()._as_dict()
453
+ 'is_method_call': self.is_method_call,
454
+ **super()._as_dict(),
456
455
  }
457
- return result
458
456
 
459
457
  @classmethod
460
458
  def _from_dict(cls, d: dict, components: list[Expr]) -> FunctionCall:
461
- assert 'fn' in d
462
- assert 'args' in d
463
- assert 'kwargs' in d
464
- # reassemble bound args
465
459
  fn = func.Function.from_dict(d['fn'])
466
- param_names = list(fn.signature.parameters.keys())
467
- bound_args = {param_names[i]: arg if idx is None else components[idx] for i, (idx, arg) in enumerate(d['args'])}
468
- bound_args.update(
469
- {param_name: val if idx is None else components[idx] for param_name, (idx, val) in d['kwargs'].items()})
470
- group_by_exprs = components[d['group_by_start_idx']:d['group_by_stop_idx']]
471
- order_by_exprs = components[d['order_by_start_idx']:]
460
+ return_type = ts.ColumnType.from_dict(d['return_type']) if 'return_type' in d else None
461
+ arg_idxs: list[int] = d['arg_idxs']
462
+ kwarg_idxs: dict[str, int] = d['kwarg_idxs']
463
+ group_by_start_idx: int = d['group_by_start_idx']
464
+ group_by_stop_idx: int = d['group_by_stop_idx']
465
+ order_by_start_idx: int = d['order_by_start_idx']
466
+ is_method_call: bool = d['is_method_call']
467
+
468
+ args = [components[idx] for idx in arg_idxs]
469
+ kwargs = {name: components[idx] for name, idx in kwarg_idxs.items()}
470
+ group_by_exprs = components[group_by_start_idx:group_by_stop_idx]
471
+ order_by_exprs = components[order_by_start_idx:]
472
+
473
+ validation_error: str | None = None
474
+
475
+ if isinstance(fn, func.InvalidFunction):
476
+ validation_error = (
477
+ dedent(
478
+ f"""
479
+ The UDF '{fn.self_path}' cannot be located, because
480
+ {{error_msg}}
481
+ """
482
+ )
483
+ .strip()
484
+ .format(error_msg=fn.error_msg)
485
+ )
486
+ return cls(fn, args, kwargs, return_type, is_method_call=is_method_call, validation_error=validation_error)
487
+
488
+ # Now re-bind args and kwargs using the version of `fn` that is currently represented in code. This ensures
489
+ # that we get a valid binding even if the signatures of `fn` have changed since the FunctionCall was
490
+ # serialized.
491
+
492
+ resolved_fn: func.Function = fn
493
+
494
+ try:
495
+ # Bind args and kwargs to the function signature in the current codebase.
496
+ resolved_fn, bound_args = fn._bind_to_matching_signature(args, kwargs)
497
+ except (TypeError, excs.Error):
498
+ signature_note_str = 'any of its signatures' if fn.is_polymorphic else 'its signature'
499
+ args_str = [f'pxt.{arg.col_type}' for arg in args]
500
+ args_str.extend(f'{name}: pxt.{arg.col_type}' for name, arg in kwargs.items())
501
+ call_signature_str = f'({", ".join(args_str)}) -> pxt.{return_type}'
502
+ fn_signature_str = f'{len(fn.signatures)} signatures' if fn.is_polymorphic else str(fn.signature)
503
+ validation_error = dedent(
504
+ f"""
505
+ The signature stored in the database for a UDF call to {fn.self_path!r} no longer
506
+ matches {signature_note_str} as currently defined in the code. This probably means that the
507
+ code for {fn.self_path!r} has changed in a backward-incompatible way.
508
+ Signature of UDF call in the database: {call_signature_str}
509
+ Signature of UDF as currently defined in code: {fn_signature_str}
510
+ """
511
+ ).strip()
512
+ else:
513
+ # Evaluate the call_return_type as defined in the current codebase.
514
+ call_return_type: ts.ColumnType | None = None
515
+
516
+ if isinstance(resolved_fn, func.ExprTemplateFunction) and not resolved_fn.template.expr.is_valid:
517
+ # The FunctionCall is based on an ExprTemplateFunction, but the template expression is not valid
518
+ # (because it in turn contains an invalid FunctionCall). In this case, inherit the validation error
519
+ # from the template expression.
520
+ validation_error = resolved_fn.template.expr.validation_error
521
+ else:
522
+ try:
523
+ call_return_type = resolved_fn.call_return_type(bound_args)
524
+ except ImportError as exc:
525
+ validation_error = dedent(
526
+ f"""
527
+ A UDF call to {fn.self_path!r} could not be fully resolved, because a module required
528
+ by the UDF could not be imported:
529
+ {exc}
530
+ """
531
+ )
532
+
533
+ assert (call_return_type is None) != (validation_error is None)
534
+
535
+ if call_return_type is None and return_type is None:
536
+ # Schema versions prior to 25 did not store the return_type in metadata, and there is no obvious
537
+ # way to infer it during DB migration, so we might encounter a stored return_type of None. If the
538
+ # resolution of call_return_type also fails, then we're out of luck; we have no choice but to
539
+ # fail-fast.
540
+ raise excs.Error(validation_error)
541
+
542
+ if call_return_type is not None:
543
+ # call_return_type resolution succeeded.
544
+ if return_type is None:
545
+ # Schema versions prior to 25 did not store the return_type in metadata (as mentioned above), so
546
+ # fall back on the call_return_type.
547
+ return_type = call_return_type
548
+ elif not return_type.is_supertype_of(call_return_type, ignore_nullable=True):
549
+ # There is a return_type stored in metadata (schema version >= 25),
550
+ # and the stored return_type of the UDF call doesn't match the column type of the FunctionCall.
551
+ validation_error = dedent(
552
+ f"""
553
+ The return type stored in the database for a UDF call to {fn.self_path!r} no longer
554
+ matches its return type as currently defined in the code. This probably means that the
555
+ code for {fn.self_path!r} has changed in a backward-incompatible way.
556
+ Return type of UDF call in the database: {return_type}
557
+ Return type of UDF as currently defined in code: {call_return_type}
558
+ """
559
+ ).strip()
560
+
561
+ assert return_type is not None # Guaranteed by the above logic.
562
+
472
563
  fn_call = cls(
473
- func.Function.from_dict(d['fn']), bound_args, group_by_clause=group_by_exprs,
474
- order_by_clause=order_by_exprs)
564
+ resolved_fn,
565
+ args,
566
+ kwargs,
567
+ return_type,
568
+ group_by_clause=group_by_exprs,
569
+ order_by_clause=order_by_exprs,
570
+ is_method_call=is_method_call,
571
+ validation_error=validation_error,
572
+ )
573
+
475
574
  return fn_call