pixeltable 0.3.6__py3-none-any.whl → 0.3.8__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of pixeltable might be problematic. Click here for more details.
- pixeltable/__init__.py +5 -3
- pixeltable/__version__.py +2 -2
- pixeltable/catalog/__init__.py +1 -0
- pixeltable/catalog/catalog.py +335 -128
- pixeltable/catalog/column.py +22 -5
- pixeltable/catalog/dir.py +19 -6
- pixeltable/catalog/insertable_table.py +34 -37
- pixeltable/catalog/named_function.py +0 -4
- pixeltable/catalog/schema_object.py +28 -42
- pixeltable/catalog/table.py +193 -158
- pixeltable/catalog/table_version.py +191 -232
- pixeltable/catalog/table_version_handle.py +50 -0
- pixeltable/catalog/table_version_path.py +49 -33
- pixeltable/catalog/view.py +56 -96
- pixeltable/config.py +103 -0
- pixeltable/dataframe.py +89 -89
- pixeltable/env.py +98 -168
- pixeltable/exec/aggregation_node.py +5 -4
- pixeltable/exec/cache_prefetch_node.py +1 -1
- pixeltable/exec/component_iteration_node.py +13 -9
- pixeltable/exec/data_row_batch.py +3 -3
- pixeltable/exec/exec_context.py +0 -4
- pixeltable/exec/exec_node.py +3 -2
- pixeltable/exec/expr_eval/schedulers.py +2 -1
- pixeltable/exec/in_memory_data_node.py +9 -4
- pixeltable/exec/row_update_node.py +1 -2
- pixeltable/exec/sql_node.py +20 -16
- pixeltable/exprs/__init__.py +2 -0
- pixeltable/exprs/arithmetic_expr.py +7 -11
- pixeltable/exprs/array_slice.py +1 -1
- pixeltable/exprs/column_property_ref.py +3 -3
- pixeltable/exprs/column_ref.py +12 -13
- pixeltable/exprs/comparison.py +3 -6
- pixeltable/exprs/compound_predicate.py +4 -4
- pixeltable/exprs/expr.py +31 -22
- pixeltable/exprs/expr_dict.py +3 -3
- pixeltable/exprs/expr_set.py +1 -1
- pixeltable/exprs/function_call.py +110 -80
- pixeltable/exprs/globals.py +3 -3
- pixeltable/exprs/in_predicate.py +1 -1
- pixeltable/exprs/inline_expr.py +3 -3
- pixeltable/exprs/is_null.py +1 -1
- pixeltable/exprs/json_mapper.py +2 -2
- pixeltable/exprs/json_path.py +17 -10
- pixeltable/exprs/literal.py +1 -1
- pixeltable/exprs/method_ref.py +2 -2
- pixeltable/exprs/row_builder.py +8 -17
- pixeltable/exprs/rowid_ref.py +21 -10
- pixeltable/exprs/similarity_expr.py +5 -5
- pixeltable/exprs/sql_element_cache.py +1 -1
- pixeltable/exprs/type_cast.py +2 -3
- pixeltable/exprs/variable.py +2 -2
- pixeltable/ext/__init__.py +2 -0
- pixeltable/ext/functions/__init__.py +2 -0
- pixeltable/ext/functions/yolox.py +3 -3
- pixeltable/func/__init__.py +3 -1
- pixeltable/func/aggregate_function.py +9 -9
- pixeltable/func/callable_function.py +3 -4
- pixeltable/func/expr_template_function.py +6 -16
- pixeltable/func/function.py +48 -14
- pixeltable/func/function_registry.py +1 -3
- pixeltable/func/query_template_function.py +5 -12
- pixeltable/func/signature.py +23 -22
- pixeltable/func/tools.py +3 -3
- pixeltable/func/udf.py +6 -4
- pixeltable/functions/__init__.py +2 -0
- pixeltable/functions/fireworks.py +7 -4
- pixeltable/functions/globals.py +4 -5
- pixeltable/functions/huggingface.py +1 -5
- pixeltable/functions/image.py +17 -7
- pixeltable/functions/llama_cpp.py +1 -1
- pixeltable/functions/mistralai.py +1 -1
- pixeltable/functions/ollama.py +4 -4
- pixeltable/functions/openai.py +19 -19
- pixeltable/functions/string.py +23 -30
- pixeltable/functions/timestamp.py +11 -6
- pixeltable/functions/together.py +14 -12
- pixeltable/functions/util.py +1 -1
- pixeltable/functions/video.py +5 -4
- pixeltable/functions/vision.py +6 -9
- pixeltable/functions/whisper.py +3 -3
- pixeltable/globals.py +246 -260
- pixeltable/index/__init__.py +2 -0
- pixeltable/index/base.py +1 -1
- pixeltable/index/btree.py +3 -1
- pixeltable/index/embedding_index.py +11 -5
- pixeltable/io/external_store.py +11 -12
- pixeltable/io/label_studio.py +4 -3
- pixeltable/io/parquet.py +57 -56
- pixeltable/iterators/__init__.py +4 -2
- pixeltable/iterators/audio.py +11 -11
- pixeltable/iterators/document.py +10 -10
- pixeltable/iterators/string.py +1 -2
- pixeltable/iterators/video.py +14 -15
- pixeltable/metadata/__init__.py +9 -5
- pixeltable/metadata/converters/convert_10.py +0 -1
- pixeltable/metadata/converters/convert_15.py +0 -2
- pixeltable/metadata/converters/convert_23.py +0 -2
- pixeltable/metadata/converters/convert_24.py +3 -3
- pixeltable/metadata/converters/convert_25.py +1 -1
- pixeltable/metadata/converters/convert_27.py +0 -2
- pixeltable/metadata/converters/convert_28.py +0 -2
- pixeltable/metadata/converters/convert_29.py +7 -8
- pixeltable/metadata/converters/util.py +7 -7
- pixeltable/metadata/schema.py +27 -19
- pixeltable/plan.py +68 -40
- pixeltable/share/__init__.py +2 -0
- pixeltable/share/packager.py +15 -12
- pixeltable/share/publish.py +3 -5
- pixeltable/store.py +37 -38
- pixeltable/type_system.py +41 -28
- pixeltable/utils/coco.py +4 -4
- pixeltable/utils/console_output.py +1 -3
- pixeltable/utils/description_helper.py +1 -1
- pixeltable/utils/documents.py +3 -3
- pixeltable/utils/filecache.py +20 -9
- pixeltable/utils/formatter.py +2 -3
- pixeltable/utils/media_store.py +1 -1
- pixeltable/utils/pytorch.py +1 -1
- pixeltable/utils/sql.py +4 -4
- pixeltable/utils/transactional_directory.py +2 -1
- {pixeltable-0.3.6.dist-info → pixeltable-0.3.8.dist-info}/METADATA +1 -1
- pixeltable-0.3.8.dist-info/RECORD +174 -0
- pixeltable-0.3.6.dist-info/RECORD +0 -172
- {pixeltable-0.3.6.dist-info → pixeltable-0.3.8.dist-info}/LICENSE +0 -0
- {pixeltable-0.3.6.dist-info → pixeltable-0.3.8.dist-info}/WHEEL +0 -0
- {pixeltable-0.3.6.dist-info → pixeltable-0.3.8.dist-info}/entry_points.txt +0 -0
|
@@ -1,15 +1,14 @@
|
|
|
1
1
|
from __future__ import annotations
|
|
2
2
|
|
|
3
3
|
import inspect
|
|
4
|
+
import logging
|
|
4
5
|
import sys
|
|
6
|
+
from textwrap import dedent
|
|
5
7
|
from typing import Any, Optional, Sequence, Union
|
|
6
8
|
|
|
7
9
|
import sqlalchemy as sql
|
|
8
10
|
|
|
9
|
-
import
|
|
10
|
-
import pixeltable.exceptions as excs
|
|
11
|
-
import pixeltable.func as func
|
|
12
|
-
import pixeltable.type_system as ts
|
|
11
|
+
from pixeltable import catalog, exceptions as excs, func, type_system as ts
|
|
13
12
|
|
|
14
13
|
from .data_row import DataRow
|
|
15
14
|
from .expr import Expr
|
|
@@ -18,6 +17,8 @@ from .row_builder import RowBuilder
|
|
|
18
17
|
from .rowid_ref import RowidRef
|
|
19
18
|
from .sql_element_cache import SqlElementCache
|
|
20
19
|
|
|
20
|
+
_logger = logging.getLogger('pixeltable')
|
|
21
|
+
|
|
21
22
|
|
|
22
23
|
class FunctionCall(Expr):
|
|
23
24
|
fn: func.Function
|
|
@@ -45,6 +46,8 @@ class FunctionCall(Expr):
|
|
|
45
46
|
aggregator: Optional[Any]
|
|
46
47
|
current_partition_vals: Optional[list[Any]]
|
|
47
48
|
|
|
49
|
+
_validation_error: Optional[str]
|
|
50
|
+
|
|
48
51
|
def __init__(
|
|
49
52
|
self,
|
|
50
53
|
fn: func.Function,
|
|
@@ -54,6 +57,7 @@ class FunctionCall(Expr):
|
|
|
54
57
|
order_by_clause: Optional[list[Any]] = None,
|
|
55
58
|
group_by_clause: Optional[list[Any]] = None,
|
|
56
59
|
is_method_call: bool = False,
|
|
60
|
+
validation_error: Optional[str] = None,
|
|
57
61
|
):
|
|
58
62
|
assert not fn.is_polymorphic
|
|
59
63
|
assert all(isinstance(arg, Expr) for arg in args)
|
|
@@ -76,26 +80,6 @@ class FunctionCall(Expr):
|
|
|
76
80
|
self.components.extend(arg.copy() for arg in kwargs.values())
|
|
77
81
|
self.kwarg_idxs = {name: i + len(args) for i, name in enumerate(kwargs.keys())}
|
|
78
82
|
|
|
79
|
-
# Now generate bound_idxs for the args and kwargs indices.
|
|
80
|
-
# This is guaranteed to work, because at this point the call has already been validated.
|
|
81
|
-
# These will be used later to dereference specific parameter values.
|
|
82
|
-
bindings = fn.signature.py_signature.bind(*self.arg_idxs, **self.kwarg_idxs)
|
|
83
|
-
self.bound_idxs = bindings.arguments
|
|
84
|
-
|
|
85
|
-
# Separately generate bound_args for purposes of determining the resource pool.
|
|
86
|
-
bindings = fn.signature.py_signature.bind(*args, **kwargs)
|
|
87
|
-
bound_args = bindings.arguments
|
|
88
|
-
self.resource_pool = fn.call_resource_pool(bound_args)
|
|
89
|
-
|
|
90
|
-
self.agg_init_args = {}
|
|
91
|
-
if self.is_agg_fn_call:
|
|
92
|
-
# We separate out the init args for the aggregator. Unpack Literals in init args.
|
|
93
|
-
assert isinstance(fn, func.AggregateFunction)
|
|
94
|
-
for arg_name, arg in bound_args.items():
|
|
95
|
-
if arg_name in fn.init_param_names[0]:
|
|
96
|
-
assert isinstance(arg, Literal) # This was checked during validate_call
|
|
97
|
-
self.agg_init_args[arg_name] = arg.val
|
|
98
|
-
|
|
99
83
|
# window function state:
|
|
100
84
|
# self.components[self.group_by_start_idx:self.group_by_stop_idx] contains group_by exprs
|
|
101
85
|
self.group_by_start_idx, self.group_by_stop_idx = 0, 0
|
|
@@ -125,10 +109,35 @@ class FunctionCall(Expr):
|
|
|
125
109
|
raise excs.Error(
|
|
126
110
|
f'order_by argument needs to be a Pixeltable expression, but instead is a {type(order_by_clause[0])}'
|
|
127
111
|
)
|
|
128
|
-
# don't add components after this, everthing after order_by_start_idx is part of the order_by clause
|
|
129
112
|
self.order_by_start_idx = len(self.components)
|
|
130
113
|
self.components.extend(order_by_clause)
|
|
131
114
|
|
|
115
|
+
self._validation_error = validation_error
|
|
116
|
+
|
|
117
|
+
if validation_error is not None:
|
|
118
|
+
self.resource_pool = None
|
|
119
|
+
return
|
|
120
|
+
|
|
121
|
+
# Now generate bound_idxs for the args and kwargs indices.
|
|
122
|
+
# This is guaranteed to work, because at this point the call has already been validated.
|
|
123
|
+
# These will be used later to dereference specific parameter values.
|
|
124
|
+
bindings = fn.signature.py_signature.bind(*self.arg_idxs, **self.kwarg_idxs)
|
|
125
|
+
self.bound_idxs = bindings.arguments
|
|
126
|
+
|
|
127
|
+
# Separately generate bound_args for purposes of determining the resource pool.
|
|
128
|
+
bindings = fn.signature.py_signature.bind(*args, **kwargs)
|
|
129
|
+
bound_args = bindings.arguments
|
|
130
|
+
self.resource_pool = fn.call_resource_pool(bound_args)
|
|
131
|
+
|
|
132
|
+
self.agg_init_args = {}
|
|
133
|
+
if self.is_agg_fn_call:
|
|
134
|
+
# We separate out the init args for the aggregator. Unpack Literals in init args.
|
|
135
|
+
assert isinstance(fn, func.AggregateFunction)
|
|
136
|
+
for arg_name, arg in bound_args.items():
|
|
137
|
+
if arg_name in fn.init_param_names[0]:
|
|
138
|
+
assert isinstance(arg, Literal) # This was checked during validate_call
|
|
139
|
+
self.agg_init_args[arg_name] = arg.val
|
|
140
|
+
|
|
132
141
|
# execution state for aggregate functions
|
|
133
142
|
self.aggregator = None
|
|
134
143
|
self.current_partition_vals = None
|
|
@@ -137,50 +146,50 @@ class FunctionCall(Expr):
|
|
|
137
146
|
|
|
138
147
|
def _create_rowid_refs(self, tbl: catalog.Table) -> list[Expr]:
|
|
139
148
|
target = tbl._tbl_version_path.tbl_version
|
|
140
|
-
return [RowidRef(target, i) for i in range(target.num_rowid_columns())]
|
|
149
|
+
return [RowidRef(target, i) for i in range(target.get().num_rowid_columns())]
|
|
141
150
|
|
|
142
151
|
def default_column_name(self) -> Optional[str]:
|
|
143
152
|
return self.fn.name
|
|
144
153
|
|
|
145
154
|
def _equals(self, other: FunctionCall) -> bool:
|
|
146
|
-
|
|
147
|
-
|
|
148
|
-
|
|
149
|
-
|
|
150
|
-
|
|
151
|
-
|
|
152
|
-
|
|
153
|
-
|
|
154
|
-
if self.group_by_stop_idx != other.group_by_stop_idx:
|
|
155
|
-
return False
|
|
156
|
-
if self.order_by_start_idx != other.order_by_start_idx:
|
|
157
|
-
return False
|
|
158
|
-
return True
|
|
155
|
+
return (
|
|
156
|
+
self.fn == other.fn
|
|
157
|
+
and self.arg_idxs == other.arg_idxs
|
|
158
|
+
and self.kwarg_idxs == other.kwarg_idxs
|
|
159
|
+
and self.group_by_start_idx == other.group_by_start_idx
|
|
160
|
+
and self.group_by_stop_idx == other.group_by_stop_idx
|
|
161
|
+
and self.order_by_start_idx == other.order_by_start_idx
|
|
162
|
+
)
|
|
159
163
|
|
|
160
164
|
def _id_attrs(self) -> list[tuple[str, Any]]:
|
|
161
|
-
return
|
|
165
|
+
return [
|
|
166
|
+
*super()._id_attrs(),
|
|
162
167
|
('fn', id(self.fn)), # use the function pointer, not the fqn, which isn't set for lambdas
|
|
163
168
|
('args', self.arg_idxs),
|
|
164
169
|
('kwargs', self.kwarg_idxs),
|
|
165
170
|
('group_by_start_idx', self.group_by_start_idx),
|
|
166
171
|
('group_by_stop_idx', self.group_by_stop_idx),
|
|
167
172
|
('fn_expr_idx', self.fn_expr_idx),
|
|
168
|
-
('
|
|
173
|
+
('order_by_start_idx', self.order_by_start_idx),
|
|
169
174
|
]
|
|
170
175
|
|
|
171
176
|
def __repr__(self) -> str:
|
|
172
177
|
return self.display_str()
|
|
173
178
|
|
|
179
|
+
@property
|
|
180
|
+
def validation_error(self) -> Optional[str]:
|
|
181
|
+
return self._validation_error or super().validation_error
|
|
182
|
+
|
|
174
183
|
def display_str(self, inline: bool = True) -> str:
|
|
175
184
|
if self.is_method_call:
|
|
176
185
|
return f'{self.components[0]}.{self.fn.name}({self._print_args(1, inline)})'
|
|
177
186
|
else:
|
|
178
|
-
fn_name = self.fn.display_name
|
|
187
|
+
fn_name = self.fn.display_name or 'anonymous_fn'
|
|
179
188
|
return f'{fn_name}({self._print_args()})'
|
|
180
189
|
|
|
181
190
|
def _print_args(self, start_idx: int = 0, inline: bool = True) -> str:
|
|
182
191
|
arg_strs = [str(self.components[idx]) for idx in self.arg_idxs[start_idx:]]
|
|
183
|
-
arg_strs.extend([f'{param_name}={
|
|
192
|
+
arg_strs.extend([f'{param_name}={self.components[idx]}' for param_name, idx in self.kwarg_idxs.items()])
|
|
184
193
|
if len(self.order_by) > 0:
|
|
185
194
|
assert isinstance(self.fn, func.AggregateFunction)
|
|
186
195
|
if self.fn.requires_order_by:
|
|
@@ -232,6 +241,8 @@ class FunctionCall(Expr):
|
|
|
232
241
|
return self.order_by
|
|
233
242
|
|
|
234
243
|
def sql_expr(self, sql_elements: SqlElementCache) -> Optional[sql.ColumnElement]:
|
|
244
|
+
assert self.is_valid
|
|
245
|
+
|
|
235
246
|
# we currently can't translate aggregate functions with grouping and/or ordering to SQL
|
|
236
247
|
if self.has_group_by() or len(self.order_by) > 0:
|
|
237
248
|
return None
|
|
@@ -278,7 +289,7 @@ class FunctionCall(Expr):
|
|
|
278
289
|
if (
|
|
279
290
|
val is None
|
|
280
291
|
and parameters_by_pos[idx].kind
|
|
281
|
-
in
|
|
292
|
+
in {inspect.Parameter.POSITIONAL_ONLY, inspect.Parameter.POSITIONAL_OR_KEYWORD}
|
|
282
293
|
and not parameters_by_pos[idx].col_type.nullable
|
|
283
294
|
):
|
|
284
295
|
return None
|
|
@@ -291,7 +302,7 @@ class FunctionCall(Expr):
|
|
|
291
302
|
if (
|
|
292
303
|
val is None
|
|
293
304
|
and parameters[param_name].kind
|
|
294
|
-
in
|
|
305
|
+
in {inspect.Parameter.KEYWORD_ONLY, inspect.Parameter.POSITIONAL_OR_KEYWORD}
|
|
295
306
|
and not parameters[param_name].col_type.nullable
|
|
296
307
|
):
|
|
297
308
|
return None
|
|
@@ -304,6 +315,7 @@ class FunctionCall(Expr):
|
|
|
304
315
|
Returns a list of dicts mapping each param name to its value when this FunctionCall is evaluated against
|
|
305
316
|
data_rows
|
|
306
317
|
"""
|
|
318
|
+
assert self.is_valid
|
|
307
319
|
assert all(name in self.fn.signature.parameters for name in param_names), f'{param_names}, {self.fn.signature}'
|
|
308
320
|
result: list[dict[str, Any]] = []
|
|
309
321
|
for row in data_rows:
|
|
@@ -327,6 +339,8 @@ class FunctionCall(Expr):
|
|
|
327
339
|
return result
|
|
328
340
|
|
|
329
341
|
def eval(self, data_row: DataRow, row_builder: RowBuilder) -> None:
|
|
342
|
+
assert self.is_valid
|
|
343
|
+
|
|
330
344
|
if isinstance(self.fn, func.ExprTemplateFunction):
|
|
331
345
|
# we need to evaluate the template
|
|
332
346
|
# TODO: can we get rid of this extra copy?
|
|
@@ -396,51 +410,66 @@ class FunctionCall(Expr):
|
|
|
396
410
|
group_by_exprs = components[group_by_start_idx:group_by_stop_idx]
|
|
397
411
|
order_by_exprs = components[order_by_start_idx:]
|
|
398
412
|
|
|
413
|
+
validation_error: Optional[str] = None
|
|
414
|
+
|
|
415
|
+
if isinstance(fn, func.InvalidFunction):
|
|
416
|
+
validation_error = (
|
|
417
|
+
dedent(
|
|
418
|
+
f"""
|
|
419
|
+
The UDF '{fn.self_path}' cannot be located, because
|
|
420
|
+
{{errormsg}}
|
|
421
|
+
"""
|
|
422
|
+
)
|
|
423
|
+
.strip()
|
|
424
|
+
.format(errormsg=fn.errormsg)
|
|
425
|
+
)
|
|
426
|
+
return cls(fn, args, kwargs, return_type, is_method_call=is_method_call, validation_error=validation_error)
|
|
427
|
+
|
|
399
428
|
# Now re-bind args and kwargs using the version of `fn` that is currently represented in code. This ensures
|
|
400
429
|
# that we get a valid binding even if the signatures of `fn` have changed since the FunctionCall was
|
|
401
430
|
# serialized.
|
|
402
431
|
|
|
403
|
-
resolved_fn: func.Function
|
|
404
|
-
bound_args: dict[str, Expr]
|
|
432
|
+
resolved_fn: func.Function = fn
|
|
405
433
|
|
|
406
434
|
try:
|
|
435
|
+
# Bind args and kwargs to the function signature in the current codebase.
|
|
407
436
|
resolved_fn, bound_args = fn._bind_to_matching_signature(args, kwargs)
|
|
408
437
|
except (TypeError, excs.Error):
|
|
409
|
-
# TODO: Handle this more gracefully (instead of failing the DB load, allow the DB load to succeed, but
|
|
410
|
-
# mark any enclosing FunctionCall as unusable). It's the same issue as dealing with a renamed UDF or
|
|
411
|
-
# FunctionCall return type mismatch.
|
|
412
438
|
signature_note_str = 'any of its signatures' if fn.is_polymorphic else 'its signature'
|
|
413
|
-
|
|
414
|
-
|
|
415
|
-
|
|
416
|
-
|
|
417
|
-
|
|
418
|
-
f
|
|
419
|
-
|
|
420
|
-
|
|
421
|
-
|
|
422
|
-
|
|
423
|
-
|
|
424
|
-
|
|
425
|
-
|
|
426
|
-
# Schema versions prior to 25 did not store the return_type in metadata, and there is no obvious way to
|
|
427
|
-
# infer it during DB migration, so we might encounter a stored return_type of None. In that case, we use
|
|
428
|
-
# the call_return_type that we just inferred (which matches the deserialization behavior prior to
|
|
429
|
-
# version 25).
|
|
430
|
-
return_type = call_return_type
|
|
439
|
+
args_str = [str(arg.col_type) for arg in args]
|
|
440
|
+
args_str.extend(f'{name}: {arg.col_type}' for name, arg in kwargs.items())
|
|
441
|
+
call_signature_str = f'({", ".join(args_str)}) -> {return_type}'
|
|
442
|
+
fn_signature_str = f'{len(fn.signatures)} signatures' if fn.is_polymorphic else str(fn.signature)
|
|
443
|
+
validation_error = dedent(
|
|
444
|
+
f"""
|
|
445
|
+
The signature stored in the database for a UDF call to {fn.self_path!r} no longer
|
|
446
|
+
matches {signature_note_str} as currently defined in the code. This probably means that the
|
|
447
|
+
code for {fn.self_path!r} has changed in a backward-incompatible way.
|
|
448
|
+
Signature of UDF call in the database: {call_signature_str}
|
|
449
|
+
Signature of UDF as currently defined in code: {fn_signature_str}
|
|
450
|
+
"""
|
|
451
|
+
).strip()
|
|
431
452
|
else:
|
|
432
|
-
#
|
|
433
|
-
|
|
434
|
-
|
|
435
|
-
|
|
436
|
-
|
|
437
|
-
|
|
438
|
-
|
|
439
|
-
|
|
440
|
-
|
|
441
|
-
|
|
442
|
-
|
|
443
|
-
|
|
453
|
+
# Evaluate the call_return_type as defined in the current codebase.
|
|
454
|
+
call_return_type = resolved_fn.call_return_type(bound_args)
|
|
455
|
+
if return_type is None:
|
|
456
|
+
# Schema versions prior to 25 did not store the return_type in metadata, and there is no obvious way to
|
|
457
|
+
# infer it during DB migration, so we might encounter a stored return_type of None. In that case, we use
|
|
458
|
+
# the call_return_type that we just inferred (which matches the deserialization behavior prior to
|
|
459
|
+
# version 25).
|
|
460
|
+
return_type = call_return_type
|
|
461
|
+
elif not return_type.is_supertype_of(call_return_type, ignore_nullable=True):
|
|
462
|
+
# There is a return_type stored in metadata (schema version >= 25),
|
|
463
|
+
# and the stored return_type of the UDF call doesn't match the column type of the FunctionCall.
|
|
464
|
+
validation_error = dedent(
|
|
465
|
+
f"""
|
|
466
|
+
The return type stored in the database for a UDF call to {fn.self_path!r} no longer
|
|
467
|
+
matches its return type as currently defined in the code. This probably means that the
|
|
468
|
+
code for {fn.self_path!r} has changed in a backward-incompatible way.
|
|
469
|
+
Return type of UDF call in the database: {return_type}
|
|
470
|
+
Return type of UDF as currently defined in code: {call_return_type}
|
|
471
|
+
"""
|
|
472
|
+
).strip()
|
|
444
473
|
|
|
445
474
|
fn_call = cls(
|
|
446
475
|
resolved_fn,
|
|
@@ -450,6 +479,7 @@ class FunctionCall(Expr):
|
|
|
450
479
|
group_by_clause=group_by_exprs,
|
|
451
480
|
order_by_clause=order_by_exprs,
|
|
452
481
|
is_method_call=is_method_call,
|
|
482
|
+
validation_error=validation_error,
|
|
453
483
|
)
|
|
454
484
|
|
|
455
485
|
return fn_call
|
pixeltable/exprs/globals.py
CHANGED
|
@@ -36,7 +36,7 @@ class ComparisonOperator(enum.Enum):
|
|
|
36
36
|
return '>'
|
|
37
37
|
if self == self.GE:
|
|
38
38
|
return '>='
|
|
39
|
-
|
|
39
|
+
raise AssertionError()
|
|
40
40
|
|
|
41
41
|
def reverse(self) -> ComparisonOperator:
|
|
42
42
|
if self == self.LT:
|
|
@@ -62,7 +62,7 @@ class LogicalOperator(enum.Enum):
|
|
|
62
62
|
return '|'
|
|
63
63
|
if self == self.NOT:
|
|
64
64
|
return '~'
|
|
65
|
-
|
|
65
|
+
raise AssertionError()
|
|
66
66
|
|
|
67
67
|
|
|
68
68
|
class ArithmeticOperator(enum.Enum):
|
|
@@ -86,4 +86,4 @@ class ArithmeticOperator(enum.Enum):
|
|
|
86
86
|
return '%'
|
|
87
87
|
if self == self.FLOORDIV:
|
|
88
88
|
return '//'
|
|
89
|
-
|
|
89
|
+
raise AssertionError()
|
pixeltable/exprs/in_predicate.py
CHANGED
|
@@ -71,7 +71,7 @@ class InPredicate(Expr):
|
|
|
71
71
|
return self.value_list == other.value_list
|
|
72
72
|
|
|
73
73
|
def _id_attrs(self) -> list[tuple[str, Any]]:
|
|
74
|
-
return super()._id_attrs()
|
|
74
|
+
return [*super()._id_attrs(), ('value_list', self.value_list)]
|
|
75
75
|
|
|
76
76
|
def sql_expr(self, sql_elements: SqlElementCache) -> Optional[sql.ColumnElement]:
|
|
77
77
|
lhs_sql_exprs = sql_elements.get(self.components[0])
|
pixeltable/exprs/inline_expr.py
CHANGED
|
@@ -131,7 +131,7 @@ class InlineList(Expr):
|
|
|
131
131
|
def as_literal(self) -> Optional[Literal]:
|
|
132
132
|
if not all(isinstance(comp, Literal) for comp in self.components):
|
|
133
133
|
return None
|
|
134
|
-
return Literal(
|
|
134
|
+
return Literal([c.as_literal().val for c in self.components], self.col_type)
|
|
135
135
|
|
|
136
136
|
|
|
137
137
|
class InlineDict(Expr):
|
|
@@ -166,7 +166,7 @@ class InlineDict(Expr):
|
|
|
166
166
|
self.id = self._create_id()
|
|
167
167
|
|
|
168
168
|
def __repr__(self) -> str:
|
|
169
|
-
item_strs =
|
|
169
|
+
item_strs = [f"'{key}': {expr}" for key, expr in zip(self.keys, self.components)]
|
|
170
170
|
return '{' + ', '.join(item_strs) + '}'
|
|
171
171
|
|
|
172
172
|
def _equals(self, other: InlineDict) -> bool:
|
|
@@ -174,7 +174,7 @@ class InlineDict(Expr):
|
|
|
174
174
|
return self.keys == other.keys
|
|
175
175
|
|
|
176
176
|
def _id_attrs(self) -> list[tuple[str, Any]]:
|
|
177
|
-
return super()._id_attrs()
|
|
177
|
+
return [*super()._id_attrs(), ('keys', self.keys)]
|
|
178
178
|
|
|
179
179
|
def sql_expr(self, _: SqlElementCache) -> Optional[sql.ColumnElement]:
|
|
180
180
|
return None
|
pixeltable/exprs/is_null.py
CHANGED
pixeltable/exprs/json_mapper.py
CHANGED
|
@@ -81,12 +81,12 @@ class JsonMapper(Expr):
|
|
|
81
81
|
"""
|
|
82
82
|
We override equals() because we need to avoid comparing our scope anchor.
|
|
83
83
|
"""
|
|
84
|
-
if type(self)
|
|
84
|
+
if type(self) is not type(other):
|
|
85
85
|
return False
|
|
86
86
|
return self._src_expr.equals(other._src_expr) and self._target_expr.equals(other._target_expr)
|
|
87
87
|
|
|
88
88
|
def __repr__(self) -> str:
|
|
89
|
-
return f'{
|
|
89
|
+
return f'{self._src_expr} >> {self._target_expr}'
|
|
90
90
|
|
|
91
91
|
@property
|
|
92
92
|
def _src_expr(self) -> Expr:
|
pixeltable/exprs/json_path.py
CHANGED
|
@@ -6,14 +6,13 @@ import jmespath
|
|
|
6
6
|
import sqlalchemy as sql
|
|
7
7
|
|
|
8
8
|
import pixeltable as pxt
|
|
9
|
-
import
|
|
10
|
-
import pixeltable.exceptions as excs
|
|
11
|
-
import pixeltable.type_system as ts
|
|
9
|
+
from pixeltable import catalog, exceptions as excs, type_system as ts
|
|
12
10
|
|
|
13
11
|
from .data_row import DataRow
|
|
14
12
|
from .expr import Expr
|
|
15
13
|
from .globals import print_slice
|
|
16
14
|
from .json_mapper import JsonMapper
|
|
15
|
+
from .object_ref import ObjectRef
|
|
17
16
|
from .row_builder import RowBuilder
|
|
18
17
|
from .sql_element_cache import SqlElementCache
|
|
19
18
|
|
|
@@ -50,8 +49,16 @@ class JsonPath(Expr):
|
|
|
50
49
|
return f'{anchor_str}{"." if isinstance(self.path_elements[0], str) else ""}{self._json_path()}'
|
|
51
50
|
|
|
52
51
|
def _as_dict(self) -> dict:
|
|
52
|
+
assert len(self.components) <= 1
|
|
53
|
+
components_dict: dict[str, Any]
|
|
54
|
+
if len(self.components) == 0 or isinstance(self.components[0], ObjectRef):
|
|
55
|
+
# If the anchor is an ObjectRef, it means this JsonPath is a bound relative path. We store it as a relative
|
|
56
|
+
# path, *not* a bound path (which has no meaning in the dict).
|
|
57
|
+
components_dict = {}
|
|
58
|
+
else:
|
|
59
|
+
components_dict = super()._as_dict()
|
|
53
60
|
path_elements = [[el.start, el.stop, el.step] if isinstance(el, slice) else el for el in self.path_elements]
|
|
54
|
-
return {'path_elements': path_elements, 'scope_idx': self.scope_idx, **
|
|
61
|
+
return {'path_elements': path_elements, 'scope_idx': self.scope_idx, **components_dict}
|
|
55
62
|
|
|
56
63
|
@classmethod
|
|
57
64
|
def _from_dict(cls, d: dict, components: list[Expr]) -> JsonPath:
|
|
@@ -84,18 +91,18 @@ class JsonPath(Expr):
|
|
|
84
91
|
Construct a relative path that references an ancestor of the immediately enclosing JsonMapper.
|
|
85
92
|
"""
|
|
86
93
|
if not self.is_relative_path():
|
|
87
|
-
raise excs.Error(
|
|
94
|
+
raise excs.Error('() for an absolute path is invalid')
|
|
88
95
|
if len(args) != 1 or not isinstance(args[0], int) or args[0] >= 0:
|
|
89
|
-
raise excs.Error(
|
|
96
|
+
raise excs.Error('R() requires a negative index')
|
|
90
97
|
return JsonPath(None, [], args[0])
|
|
91
98
|
|
|
92
99
|
def __getattr__(self, name: str) -> 'JsonPath':
|
|
93
100
|
assert isinstance(name, str)
|
|
94
|
-
return JsonPath(self._anchor, self.path_elements
|
|
101
|
+
return JsonPath(self._anchor, [*self.path_elements, name])
|
|
95
102
|
|
|
96
103
|
def __getitem__(self, index: object) -> 'JsonPath':
|
|
97
104
|
if isinstance(index, (int, slice, str)):
|
|
98
|
-
return JsonPath(self._anchor, self.path_elements
|
|
105
|
+
return JsonPath(self._anchor, [*self.path_elements, index])
|
|
99
106
|
raise excs.Error(f'Invalid json list index: {index}')
|
|
100
107
|
|
|
101
108
|
def __rshift__(self, other: object) -> 'JsonMapper':
|
|
@@ -120,7 +127,7 @@ class JsonPath(Expr):
|
|
|
120
127
|
|
|
121
128
|
clean_name = ''.join(map(cleanup_char, ret_name))
|
|
122
129
|
clean_name = clean_name.lstrip('_') # remove leading underscore
|
|
123
|
-
if clean_name
|
|
130
|
+
if not clean_name: # Replace '' with None
|
|
124
131
|
clean_name = None
|
|
125
132
|
|
|
126
133
|
assert clean_name is None or catalog.is_valid_identifier(clean_name)
|
|
@@ -130,7 +137,7 @@ class JsonPath(Expr):
|
|
|
130
137
|
return self.path_elements == other.path_elements
|
|
131
138
|
|
|
132
139
|
def _id_attrs(self) -> list[tuple[str, Any]]:
|
|
133
|
-
return super()._id_attrs()
|
|
140
|
+
return [*super()._id_attrs(), ('path_elements', self.path_elements)]
|
|
134
141
|
|
|
135
142
|
def sql_expr(self, _: SqlElementCache) -> Optional[sql.ColumnElement]:
|
|
136
143
|
"""
|
pixeltable/exprs/literal.py
CHANGED
|
@@ -62,7 +62,7 @@ class Literal(Expr):
|
|
|
62
62
|
return self.val == other.val
|
|
63
63
|
|
|
64
64
|
def _id_attrs(self) -> list[tuple[str, Any]]:
|
|
65
|
-
return super()._id_attrs()
|
|
65
|
+
return [*super()._id_attrs(), ('val', self.val)]
|
|
66
66
|
|
|
67
67
|
def sql_expr(self, _: SqlElementCache) -> Optional[sql.ColumnElement]:
|
|
68
68
|
# Return a sql object so that constants can participate in SQL expressions
|
pixeltable/exprs/method_ref.py
CHANGED
|
@@ -53,13 +53,13 @@ class MethodRef(Expr):
|
|
|
53
53
|
return self.base_expr.id == other.base_expr.id and self.method_name == other.method_name
|
|
54
54
|
|
|
55
55
|
def _id_attrs(self) -> list[tuple[str, Any]]:
|
|
56
|
-
return super()._id_attrs()
|
|
56
|
+
return [*super()._id_attrs(), ('method_name', self.method_name)]
|
|
57
57
|
|
|
58
58
|
def sql_expr(self, _: SqlElementCache) -> Optional[sql.ColumnElement]:
|
|
59
59
|
return None
|
|
60
60
|
|
|
61
61
|
def eval(self, data_row: DataRow, row_builder: RowBuilder) -> None:
|
|
62
|
-
|
|
62
|
+
raise AssertionError('MethodRef cannot be evaluated directly')
|
|
63
63
|
|
|
64
64
|
def __repr__(self) -> str:
|
|
65
65
|
return f'{self.base_expr}.{self.method_name}'
|
pixeltable/exprs/row_builder.py
CHANGED
|
@@ -7,12 +7,8 @@ from typing import Any, Iterable, Optional, Sequence
|
|
|
7
7
|
from uuid import UUID
|
|
8
8
|
|
|
9
9
|
import numpy as np
|
|
10
|
-
import sqlalchemy as sql
|
|
11
10
|
|
|
12
|
-
import
|
|
13
|
-
import pixeltable.exceptions as excs
|
|
14
|
-
import pixeltable.func as func
|
|
15
|
-
import pixeltable.utils as utils
|
|
11
|
+
from pixeltable import catalog, exceptions as excs, utils
|
|
16
12
|
from pixeltable.env import Env
|
|
17
13
|
from pixeltable.utils.media_store import MediaStore
|
|
18
14
|
|
|
@@ -174,11 +170,13 @@ class RowBuilder:
|
|
|
174
170
|
|
|
175
171
|
def refs_unstored_iter_col(col_ref: ColumnRef) -> bool:
|
|
176
172
|
tbl = col_ref.col.tbl
|
|
177
|
-
return
|
|
173
|
+
return (
|
|
174
|
+
tbl.get().is_component_view and tbl.get().is_iterator_column(col_ref.col) and not col_ref.col.is_stored
|
|
175
|
+
)
|
|
178
176
|
|
|
179
177
|
unstored_iter_col_refs = [col_ref for col_ref in col_refs if refs_unstored_iter_col(col_ref)]
|
|
180
178
|
component_views = [col_ref.col.tbl for col_ref in unstored_iter_col_refs]
|
|
181
|
-
unstored_iter_args = {view.id: view.iterator_args.copy() for view in component_views}
|
|
179
|
+
unstored_iter_args = {view.id: view.get().iterator_args.copy() for view in component_views}
|
|
182
180
|
self.unstored_iter_args = {
|
|
183
181
|
id: self._record_unique_expr(arg, recursive=True) for id, arg in unstored_iter_args.items()
|
|
184
182
|
}
|
|
@@ -236,13 +234,6 @@ class RowBuilder:
|
|
|
236
234
|
"""Return ColumnSlotIdx for output columns"""
|
|
237
235
|
return self.table_columns
|
|
238
236
|
|
|
239
|
-
def set_conn(self, conn: sql.engine.Connection) -> None:
|
|
240
|
-
from .function_call import FunctionCall
|
|
241
|
-
|
|
242
|
-
for expr in self.unique_exprs:
|
|
243
|
-
if isinstance(expr, FunctionCall) and isinstance(expr.fn, func.QueryTemplateFunction):
|
|
244
|
-
expr.fn.set_conn(conn)
|
|
245
|
-
|
|
246
237
|
@property
|
|
247
238
|
def num_materialized(self) -> int:
|
|
248
239
|
return self.next_slot_idx
|
|
@@ -373,8 +364,8 @@ class RowBuilder:
|
|
|
373
364
|
def set_exc(self, data_row: DataRow, slot_idx: int, exc: Exception) -> None:
|
|
374
365
|
"""Record an exception in data_row and propagate it to dependents"""
|
|
375
366
|
data_row.set_exc(slot_idx, exc)
|
|
376
|
-
for
|
|
377
|
-
data_row.set_exc(
|
|
367
|
+
for idx in self._exc_dependents[slot_idx]:
|
|
368
|
+
data_row.set_exc(idx, exc)
|
|
378
369
|
|
|
379
370
|
def eval(
|
|
380
371
|
self,
|
|
@@ -432,7 +423,7 @@ class RowBuilder:
|
|
|
432
423
|
else:
|
|
433
424
|
if col.col_type.is_image_type() and data_row.file_urls[slot_idx] is None:
|
|
434
425
|
# we have yet to store this image
|
|
435
|
-
filepath = str(MediaStore.prepare_media_path(col.tbl.id, col.id, col.tbl.version))
|
|
426
|
+
filepath = str(MediaStore.prepare_media_path(col.tbl.id, col.id, col.tbl.get().version))
|
|
436
427
|
data_row.flush_img(slot_idx, filepath)
|
|
437
428
|
val = data_row.get_stored_val(slot_idx, col.sa_col.type)
|
|
438
429
|
table_row[col.store_name()] = val
|
pixeltable/exprs/rowid_ref.py
CHANGED
|
@@ -1,12 +1,11 @@
|
|
|
1
1
|
from __future__ import annotations
|
|
2
2
|
|
|
3
|
-
from typing import Any, Optional
|
|
3
|
+
from typing import Any, Optional, cast
|
|
4
4
|
from uuid import UUID
|
|
5
5
|
|
|
6
6
|
import sqlalchemy as sql
|
|
7
7
|
|
|
8
|
-
import
|
|
9
|
-
import pixeltable.type_system as ts
|
|
8
|
+
from pixeltable import catalog, type_system as ts
|
|
10
9
|
|
|
11
10
|
from .data_row import DataRow
|
|
12
11
|
from .expr import Expr
|
|
@@ -23,9 +22,15 @@ class RowidRef(Expr):
|
|
|
23
22
|
(with and without a TableVersion).
|
|
24
23
|
"""
|
|
25
24
|
|
|
25
|
+
tbl: Optional[catalog.TableVersionHandle]
|
|
26
|
+
normalized_base: Optional[catalog.TableVersionHandle]
|
|
27
|
+
tbl_id: UUID
|
|
28
|
+
normalized_base_id: UUID
|
|
29
|
+
rowid_component_idx: int
|
|
30
|
+
|
|
26
31
|
def __init__(
|
|
27
32
|
self,
|
|
28
|
-
tbl: catalog.
|
|
33
|
+
tbl: catalog.TableVersionHandle,
|
|
29
34
|
idx: int,
|
|
30
35
|
tbl_id: Optional[UUID] = None,
|
|
31
36
|
normalized_base_id: Optional[UUID] = None,
|
|
@@ -37,8 +42,8 @@ class RowidRef(Expr):
|
|
|
37
42
|
# (which has the same values as all its descendent views)
|
|
38
43
|
normalized_base = tbl
|
|
39
44
|
# don't try to reference tbl.store_tbl here
|
|
40
|
-
while normalized_base.base is not None and normalized_base.base.num_rowid_columns() > idx:
|
|
41
|
-
normalized_base = normalized_base.base
|
|
45
|
+
while normalized_base.get().base is not None and normalized_base.get().base.get().num_rowid_columns() > idx:
|
|
46
|
+
normalized_base = normalized_base.get().base
|
|
42
47
|
self.normalized_base = normalized_base
|
|
43
48
|
else:
|
|
44
49
|
self.normalized_base = None
|
|
@@ -59,15 +64,21 @@ class RowidRef(Expr):
|
|
|
59
64
|
)
|
|
60
65
|
|
|
61
66
|
def _id_attrs(self) -> list[tuple[str, Any]]:
|
|
62
|
-
return
|
|
67
|
+
return [
|
|
68
|
+
*super()._id_attrs(),
|
|
63
69
|
('normalized_base_id', self.normalized_base_id),
|
|
64
70
|
('idx', self.rowid_component_idx),
|
|
65
71
|
]
|
|
66
72
|
|
|
67
73
|
def __repr__(self) -> str:
|
|
68
74
|
# check if this is the pos column of a component view
|
|
69
|
-
|
|
70
|
-
|
|
75
|
+
from pixeltable import store
|
|
76
|
+
|
|
77
|
+
tbl = self.tbl.get() if self.tbl is not None else catalog.Catalog.get().get_tbl_version(self.tbl_id, None)
|
|
78
|
+
if (
|
|
79
|
+
tbl.is_component_view
|
|
80
|
+
and self.rowid_component_idx == cast(store.StoreComponentView, tbl.store_tbl).pos_col_idx
|
|
81
|
+
):
|
|
71
82
|
return catalog.globals._POS_COLUMN_NAME
|
|
72
83
|
return ''
|
|
73
84
|
|
|
@@ -85,7 +96,7 @@ class RowidRef(Expr):
|
|
|
85
96
|
self.tbl_id = self.tbl.id
|
|
86
97
|
|
|
87
98
|
def sql_expr(self, _: SqlElementCache) -> Optional[sql.ColumnElement]:
|
|
88
|
-
tbl = self.tbl if self.tbl is not None else catalog.Catalog.get().
|
|
99
|
+
tbl = self.tbl.get() if self.tbl is not None else catalog.Catalog.get().get_tbl_version(self.tbl_id, None)
|
|
89
100
|
rowid_cols = tbl.store_tbl.rowid_columns()
|
|
90
101
|
return rowid_cols[self.rowid_component_idx]
|
|
91
102
|
|