pixeltable 0.3.14__py3-none-any.whl → 0.5.7__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- pixeltable/__init__.py +42 -8
- pixeltable/{dataframe.py → _query.py} +470 -206
- pixeltable/_version.py +1 -0
- pixeltable/catalog/__init__.py +5 -4
- pixeltable/catalog/catalog.py +1785 -432
- pixeltable/catalog/column.py +190 -113
- pixeltable/catalog/dir.py +2 -4
- pixeltable/catalog/globals.py +19 -46
- pixeltable/catalog/insertable_table.py +191 -98
- pixeltable/catalog/path.py +63 -23
- pixeltable/catalog/schema_object.py +11 -15
- pixeltable/catalog/table.py +843 -436
- pixeltable/catalog/table_metadata.py +103 -0
- pixeltable/catalog/table_version.py +978 -657
- pixeltable/catalog/table_version_handle.py +72 -16
- pixeltable/catalog/table_version_path.py +112 -43
- pixeltable/catalog/tbl_ops.py +53 -0
- pixeltable/catalog/update_status.py +191 -0
- pixeltable/catalog/view.py +134 -90
- pixeltable/config.py +134 -22
- pixeltable/env.py +471 -157
- pixeltable/exceptions.py +6 -0
- pixeltable/exec/__init__.py +4 -1
- pixeltable/exec/aggregation_node.py +7 -8
- pixeltable/exec/cache_prefetch_node.py +83 -110
- pixeltable/exec/cell_materialization_node.py +268 -0
- pixeltable/exec/cell_reconstruction_node.py +168 -0
- pixeltable/exec/component_iteration_node.py +4 -3
- pixeltable/exec/data_row_batch.py +8 -65
- pixeltable/exec/exec_context.py +16 -4
- pixeltable/exec/exec_node.py +13 -36
- pixeltable/exec/expr_eval/evaluators.py +11 -7
- pixeltable/exec/expr_eval/expr_eval_node.py +27 -12
- pixeltable/exec/expr_eval/globals.py +8 -5
- pixeltable/exec/expr_eval/row_buffer.py +1 -2
- pixeltable/exec/expr_eval/schedulers.py +106 -56
- pixeltable/exec/globals.py +35 -0
- pixeltable/exec/in_memory_data_node.py +19 -19
- pixeltable/exec/object_store_save_node.py +293 -0
- pixeltable/exec/row_update_node.py +16 -9
- pixeltable/exec/sql_node.py +351 -84
- pixeltable/exprs/__init__.py +1 -1
- pixeltable/exprs/arithmetic_expr.py +27 -22
- pixeltable/exprs/array_slice.py +3 -3
- pixeltable/exprs/column_property_ref.py +36 -23
- pixeltable/exprs/column_ref.py +213 -89
- pixeltable/exprs/comparison.py +5 -5
- pixeltable/exprs/compound_predicate.py +5 -4
- pixeltable/exprs/data_row.py +164 -54
- pixeltable/exprs/expr.py +70 -44
- pixeltable/exprs/expr_dict.py +3 -3
- pixeltable/exprs/expr_set.py +17 -10
- pixeltable/exprs/function_call.py +100 -40
- pixeltable/exprs/globals.py +2 -2
- pixeltable/exprs/in_predicate.py +4 -4
- pixeltable/exprs/inline_expr.py +18 -32
- pixeltable/exprs/is_null.py +7 -3
- pixeltable/exprs/json_mapper.py +8 -8
- pixeltable/exprs/json_path.py +56 -22
- pixeltable/exprs/literal.py +27 -5
- pixeltable/exprs/method_ref.py +2 -2
- pixeltable/exprs/object_ref.py +2 -2
- pixeltable/exprs/row_builder.py +167 -67
- pixeltable/exprs/rowid_ref.py +25 -10
- pixeltable/exprs/similarity_expr.py +58 -40
- pixeltable/exprs/sql_element_cache.py +4 -4
- pixeltable/exprs/string_op.py +5 -5
- pixeltable/exprs/type_cast.py +3 -5
- pixeltable/func/__init__.py +1 -0
- pixeltable/func/aggregate_function.py +8 -8
- pixeltable/func/callable_function.py +9 -9
- pixeltable/func/expr_template_function.py +17 -11
- pixeltable/func/function.py +18 -20
- pixeltable/func/function_registry.py +6 -7
- pixeltable/func/globals.py +2 -3
- pixeltable/func/mcp.py +74 -0
- pixeltable/func/query_template_function.py +29 -27
- pixeltable/func/signature.py +46 -19
- pixeltable/func/tools.py +31 -13
- pixeltable/func/udf.py +18 -20
- pixeltable/functions/__init__.py +16 -0
- pixeltable/functions/anthropic.py +123 -77
- pixeltable/functions/audio.py +147 -10
- pixeltable/functions/bedrock.py +13 -6
- pixeltable/functions/date.py +7 -4
- pixeltable/functions/deepseek.py +35 -43
- pixeltable/functions/document.py +81 -0
- pixeltable/functions/fal.py +76 -0
- pixeltable/functions/fireworks.py +11 -20
- pixeltable/functions/gemini.py +195 -39
- pixeltable/functions/globals.py +142 -14
- pixeltable/functions/groq.py +108 -0
- pixeltable/functions/huggingface.py +1056 -24
- pixeltable/functions/image.py +115 -57
- pixeltable/functions/json.py +1 -1
- pixeltable/functions/llama_cpp.py +28 -13
- pixeltable/functions/math.py +67 -5
- pixeltable/functions/mistralai.py +18 -55
- pixeltable/functions/net.py +70 -0
- pixeltable/functions/ollama.py +20 -13
- pixeltable/functions/openai.py +240 -226
- pixeltable/functions/openrouter.py +143 -0
- pixeltable/functions/replicate.py +4 -4
- pixeltable/functions/reve.py +250 -0
- pixeltable/functions/string.py +239 -69
- pixeltable/functions/timestamp.py +16 -16
- pixeltable/functions/together.py +24 -84
- pixeltable/functions/twelvelabs.py +188 -0
- pixeltable/functions/util.py +6 -1
- pixeltable/functions/uuid.py +30 -0
- pixeltable/functions/video.py +1515 -107
- pixeltable/functions/vision.py +8 -8
- pixeltable/functions/voyageai.py +289 -0
- pixeltable/functions/whisper.py +16 -8
- pixeltable/functions/whisperx.py +179 -0
- pixeltable/{ext/functions → functions}/yolox.py +2 -4
- pixeltable/globals.py +362 -115
- pixeltable/index/base.py +17 -21
- pixeltable/index/btree.py +28 -22
- pixeltable/index/embedding_index.py +100 -118
- pixeltable/io/__init__.py +4 -2
- pixeltable/io/datarows.py +8 -7
- pixeltable/io/external_store.py +56 -105
- pixeltable/io/fiftyone.py +13 -13
- pixeltable/io/globals.py +31 -30
- pixeltable/io/hf_datasets.py +61 -16
- pixeltable/io/label_studio.py +74 -70
- pixeltable/io/lancedb.py +3 -0
- pixeltable/io/pandas.py +21 -12
- pixeltable/io/parquet.py +25 -105
- pixeltable/io/table_data_conduit.py +250 -123
- pixeltable/io/utils.py +4 -4
- pixeltable/iterators/__init__.py +2 -1
- pixeltable/iterators/audio.py +26 -25
- pixeltable/iterators/base.py +9 -3
- pixeltable/iterators/document.py +112 -78
- pixeltable/iterators/image.py +12 -15
- pixeltable/iterators/string.py +11 -4
- pixeltable/iterators/video.py +523 -120
- pixeltable/metadata/__init__.py +14 -3
- pixeltable/metadata/converters/convert_13.py +2 -2
- pixeltable/metadata/converters/convert_18.py +2 -2
- pixeltable/metadata/converters/convert_19.py +2 -2
- pixeltable/metadata/converters/convert_20.py +2 -2
- pixeltable/metadata/converters/convert_21.py +2 -2
- pixeltable/metadata/converters/convert_22.py +2 -2
- pixeltable/metadata/converters/convert_24.py +2 -2
- pixeltable/metadata/converters/convert_25.py +2 -2
- pixeltable/metadata/converters/convert_26.py +2 -2
- pixeltable/metadata/converters/convert_29.py +4 -4
- pixeltable/metadata/converters/convert_30.py +34 -21
- pixeltable/metadata/converters/convert_34.py +2 -2
- pixeltable/metadata/converters/convert_35.py +9 -0
- pixeltable/metadata/converters/convert_36.py +38 -0
- pixeltable/metadata/converters/convert_37.py +15 -0
- pixeltable/metadata/converters/convert_38.py +39 -0
- pixeltable/metadata/converters/convert_39.py +124 -0
- pixeltable/metadata/converters/convert_40.py +73 -0
- pixeltable/metadata/converters/convert_41.py +12 -0
- pixeltable/metadata/converters/convert_42.py +9 -0
- pixeltable/metadata/converters/convert_43.py +44 -0
- pixeltable/metadata/converters/util.py +20 -31
- pixeltable/metadata/notes.py +9 -0
- pixeltable/metadata/schema.py +140 -53
- pixeltable/metadata/utils.py +74 -0
- pixeltable/mypy/__init__.py +3 -0
- pixeltable/mypy/mypy_plugin.py +123 -0
- pixeltable/plan.py +382 -115
- pixeltable/share/__init__.py +1 -1
- pixeltable/share/packager.py +547 -83
- pixeltable/share/protocol/__init__.py +33 -0
- pixeltable/share/protocol/common.py +165 -0
- pixeltable/share/protocol/operation_types.py +33 -0
- pixeltable/share/protocol/replica.py +119 -0
- pixeltable/share/publish.py +257 -59
- pixeltable/store.py +311 -194
- pixeltable/type_system.py +373 -211
- pixeltable/utils/__init__.py +2 -3
- pixeltable/utils/arrow.py +131 -17
- pixeltable/utils/av.py +298 -0
- pixeltable/utils/azure_store.py +346 -0
- pixeltable/utils/coco.py +6 -6
- pixeltable/utils/code.py +3 -3
- pixeltable/utils/console_output.py +4 -1
- pixeltable/utils/coroutine.py +6 -23
- pixeltable/utils/dbms.py +32 -6
- pixeltable/utils/description_helper.py +4 -5
- pixeltable/utils/documents.py +7 -18
- pixeltable/utils/exception_handler.py +7 -30
- pixeltable/utils/filecache.py +6 -6
- pixeltable/utils/formatter.py +86 -48
- pixeltable/utils/gcs_store.py +295 -0
- pixeltable/utils/http.py +133 -0
- pixeltable/utils/http_server.py +2 -3
- pixeltable/utils/iceberg.py +1 -2
- pixeltable/utils/image.py +17 -0
- pixeltable/utils/lancedb.py +90 -0
- pixeltable/utils/local_store.py +322 -0
- pixeltable/utils/misc.py +5 -0
- pixeltable/utils/object_stores.py +573 -0
- pixeltable/utils/pydantic.py +60 -0
- pixeltable/utils/pytorch.py +5 -6
- pixeltable/utils/s3_store.py +527 -0
- pixeltable/utils/sql.py +26 -0
- pixeltable/utils/system.py +30 -0
- pixeltable-0.5.7.dist-info/METADATA +579 -0
- pixeltable-0.5.7.dist-info/RECORD +227 -0
- {pixeltable-0.3.14.dist-info → pixeltable-0.5.7.dist-info}/WHEEL +1 -1
- pixeltable-0.5.7.dist-info/entry_points.txt +2 -0
- pixeltable/__version__.py +0 -3
- pixeltable/catalog/named_function.py +0 -40
- pixeltable/ext/__init__.py +0 -17
- pixeltable/ext/functions/__init__.py +0 -11
- pixeltable/ext/functions/whisperx.py +0 -77
- pixeltable/utils/media_store.py +0 -77
- pixeltable/utils/s3.py +0 -17
- pixeltable-0.3.14.dist-info/METADATA +0 -434
- pixeltable-0.3.14.dist-info/RECORD +0 -186
- pixeltable-0.3.14.dist-info/entry_points.txt +0 -3
- {pixeltable-0.3.14.dist-info → pixeltable-0.5.7.dist-info/licenses}/LICENSE +0 -0
|
@@ -4,7 +4,7 @@ import inspect
|
|
|
4
4
|
import logging
|
|
5
5
|
import sys
|
|
6
6
|
from textwrap import dedent
|
|
7
|
-
from typing import Any,
|
|
7
|
+
from typing import Any, Sequence
|
|
8
8
|
|
|
9
9
|
import sqlalchemy as sql
|
|
10
10
|
|
|
@@ -24,7 +24,7 @@ class FunctionCall(Expr):
|
|
|
24
24
|
fn: func.Function
|
|
25
25
|
is_method_call: bool
|
|
26
26
|
agg_init_args: dict[str, Any]
|
|
27
|
-
resource_pool:
|
|
27
|
+
resource_pool: str | None
|
|
28
28
|
|
|
29
29
|
# These collections hold the component indices corresponding to the args and kwargs
|
|
30
30
|
# that were passed to the FunctionCall. They're 1:1 with the original call pattern.
|
|
@@ -36,17 +36,17 @@ class FunctionCall(Expr):
|
|
|
36
36
|
# - a component index, if the parameter is a non-variadic parameter
|
|
37
37
|
# - a list of component indices, if the parameter is a variadic positional parameter
|
|
38
38
|
# - a dict mapping keyword names to component indices, if the parameter is a variadic keyword parameter
|
|
39
|
-
bound_idxs: dict[str,
|
|
39
|
+
bound_idxs: dict[str, int | list[int] | dict[str, int]]
|
|
40
40
|
|
|
41
41
|
return_type: ts.ColumnType
|
|
42
42
|
group_by_start_idx: int
|
|
43
43
|
group_by_stop_idx: int
|
|
44
44
|
fn_expr_idx: int
|
|
45
45
|
order_by_start_idx: int
|
|
46
|
-
aggregator:
|
|
47
|
-
current_partition_vals:
|
|
46
|
+
aggregator: Any | None
|
|
47
|
+
current_partition_vals: list[Any] | None
|
|
48
48
|
|
|
49
|
-
_validation_error:
|
|
49
|
+
_validation_error: str | None
|
|
50
50
|
|
|
51
51
|
def __init__(
|
|
52
52
|
self,
|
|
@@ -54,10 +54,10 @@ class FunctionCall(Expr):
|
|
|
54
54
|
args: list[Expr],
|
|
55
55
|
kwargs: dict[str, Expr],
|
|
56
56
|
return_type: ts.ColumnType,
|
|
57
|
-
order_by_clause:
|
|
58
|
-
group_by_clause:
|
|
57
|
+
order_by_clause: list[Any] | None = None,
|
|
58
|
+
group_by_clause: list[Any] | None = None,
|
|
59
59
|
is_method_call: bool = False,
|
|
60
|
-
validation_error:
|
|
60
|
+
validation_error: str | None = None,
|
|
61
61
|
):
|
|
62
62
|
assert not fn.is_polymorphic
|
|
63
63
|
assert all(isinstance(arg, Expr) for arg in args)
|
|
@@ -115,6 +115,7 @@ class FunctionCall(Expr):
|
|
|
115
115
|
self._validation_error = validation_error
|
|
116
116
|
|
|
117
117
|
if validation_error is not None:
|
|
118
|
+
self.bound_idxs = {}
|
|
118
119
|
self.resource_pool = None
|
|
119
120
|
return
|
|
120
121
|
|
|
@@ -148,7 +149,7 @@ class FunctionCall(Expr):
|
|
|
148
149
|
target = tbl._tbl_version_path.tbl_version
|
|
149
150
|
return [RowidRef(target, i) for i in range(target.get().num_rowid_columns())]
|
|
150
151
|
|
|
151
|
-
def default_column_name(self) ->
|
|
152
|
+
def default_column_name(self) -> str | None:
|
|
152
153
|
return self.fn.name
|
|
153
154
|
|
|
154
155
|
def _equals(self, other: FunctionCall) -> bool:
|
|
@@ -176,11 +177,19 @@ class FunctionCall(Expr):
|
|
|
176
177
|
def __repr__(self) -> str:
|
|
177
178
|
return self.display_str()
|
|
178
179
|
|
|
180
|
+
# def __repr__(self) -> str:
|
|
181
|
+
# return f'FunctionCall(fn={self.fn!r}, args={self.args!r}, kwargs={self.kwargs!r})'
|
|
182
|
+
|
|
179
183
|
@property
|
|
180
|
-
def validation_error(self) ->
|
|
184
|
+
def validation_error(self) -> str | None:
|
|
181
185
|
return self._validation_error or super().validation_error
|
|
182
186
|
|
|
183
187
|
def display_str(self, inline: bool = True) -> str:
|
|
188
|
+
if isinstance(self.fn, func.ExprTemplateFunction) and isinstance(self.fn.template.expr, FunctionCall):
|
|
189
|
+
# If this FunctionCall uses an ExprTemplateFunction with a nested FunctionCall, then resolve the
|
|
190
|
+
# indirection by substitution into the ExprTemplateFunction.
|
|
191
|
+
subst = self.fn.instantiate(self.args, self.kwargs)
|
|
192
|
+
return subst.display_str(inline)
|
|
184
193
|
if self.is_method_call:
|
|
185
194
|
return f'{self.components[0]}.{self.fn.name}({self._print_args(1, inline)})'
|
|
186
195
|
else:
|
|
@@ -244,7 +253,7 @@ class FunctionCall(Expr):
|
|
|
244
253
|
assert self.is_agg_fn_call
|
|
245
254
|
return self.order_by
|
|
246
255
|
|
|
247
|
-
def sql_expr(self, sql_elements: SqlElementCache) ->
|
|
256
|
+
def sql_expr(self, sql_elements: SqlElementCache) -> sql.ColumnElement | None:
|
|
248
257
|
assert self.is_valid
|
|
249
258
|
|
|
250
259
|
# we currently can't translate aggregate functions with grouping and/or ordering to SQL
|
|
@@ -300,10 +309,32 @@ class FunctionCall(Expr):
|
|
|
300
309
|
"""
|
|
301
310
|
res = super().substitute(spec)
|
|
302
311
|
assert res is self
|
|
303
|
-
|
|
304
|
-
|
|
312
|
+
if self.is_valid:
|
|
313
|
+
# If this FunctionCall is valid, re-evaluate the call_return_type of the substituted expression. If the
|
|
314
|
+
# FunctionCall is not valid, it isn't safe to do this. (Really we should be asserting that it *is* valid,
|
|
315
|
+
# but we still need to be able to do substitutions on invalid FunctionCalls, because loading an
|
|
316
|
+
# EmbeddingIndex from the db involves reconstructing the requisite (substituted) FunctionCalls. We could
|
|
317
|
+
# fix this by separately persisting the FunctionCall instances held by EmbeddingIndex to the db. That's
|
|
318
|
+
# probably a good idea, but it's also probably not urgent, since it only affects Functions that have a
|
|
319
|
+
# conditional_return_type implemented.)
|
|
320
|
+
self.return_type = self.fn.call_return_type(self.bound_args)
|
|
321
|
+
self.col_type = self.return_type
|
|
305
322
|
return self
|
|
306
323
|
|
|
324
|
+
@property
|
|
325
|
+
def args(self) -> list[Expr]:
|
|
326
|
+
return [self.components[idx] for idx in self.arg_idxs]
|
|
327
|
+
|
|
328
|
+
@property
|
|
329
|
+
def kwargs(self) -> dict[str, Expr]:
|
|
330
|
+
return {name: self.components[idx] for name, idx in self.kwarg_idxs.items()}
|
|
331
|
+
|
|
332
|
+
@property
|
|
333
|
+
def fn_expr(self) -> Expr | None:
|
|
334
|
+
if self.fn_expr_idx != sys.maxsize:
|
|
335
|
+
return self.components[self.fn_expr_idx]
|
|
336
|
+
return None
|
|
337
|
+
|
|
307
338
|
def update(self, data_row: DataRow) -> None:
|
|
308
339
|
"""
|
|
309
340
|
Update agg state
|
|
@@ -312,7 +343,7 @@ class FunctionCall(Expr):
|
|
|
312
343
|
args, kwargs = self.make_args(data_row)
|
|
313
344
|
self.aggregator.update(*args, **kwargs)
|
|
314
345
|
|
|
315
|
-
def make_args(self, data_row: DataRow) ->
|
|
346
|
+
def make_args(self, data_row: DataRow) -> tuple[list[Any], dict[str, Any]] | None:
|
|
316
347
|
"""Return args and kwargs, constructed for data_row; returns None if any non-nullable arg is None."""
|
|
317
348
|
args: list[Any] = []
|
|
318
349
|
parameters_by_pos = self.fn.signature.parameters_by_pos
|
|
@@ -439,18 +470,18 @@ class FunctionCall(Expr):
|
|
|
439
470
|
group_by_exprs = components[group_by_start_idx:group_by_stop_idx]
|
|
440
471
|
order_by_exprs = components[order_by_start_idx:]
|
|
441
472
|
|
|
442
|
-
validation_error:
|
|
473
|
+
validation_error: str | None = None
|
|
443
474
|
|
|
444
475
|
if isinstance(fn, func.InvalidFunction):
|
|
445
476
|
validation_error = (
|
|
446
477
|
dedent(
|
|
447
478
|
f"""
|
|
448
479
|
The UDF '{fn.self_path}' cannot be located, because
|
|
449
|
-
{{
|
|
480
|
+
{{error_msg}}
|
|
450
481
|
"""
|
|
451
482
|
)
|
|
452
483
|
.strip()
|
|
453
|
-
.format(
|
|
484
|
+
.format(error_msg=fn.error_msg)
|
|
454
485
|
)
|
|
455
486
|
return cls(fn, args, kwargs, return_type, is_method_call=is_method_call, validation_error=validation_error)
|
|
456
487
|
|
|
@@ -465,9 +496,9 @@ class FunctionCall(Expr):
|
|
|
465
496
|
resolved_fn, bound_args = fn._bind_to_matching_signature(args, kwargs)
|
|
466
497
|
except (TypeError, excs.Error):
|
|
467
498
|
signature_note_str = 'any of its signatures' if fn.is_polymorphic else 'its signature'
|
|
468
|
-
args_str = [
|
|
469
|
-
args_str.extend(f'{name}: {arg.col_type}' for name, arg in kwargs.items())
|
|
470
|
-
call_signature_str = f'({", ".join(args_str)}) -> {return_type}'
|
|
499
|
+
args_str = [f'pxt.{arg.col_type}' for arg in args]
|
|
500
|
+
args_str.extend(f'{name}: pxt.{arg.col_type}' for name, arg in kwargs.items())
|
|
501
|
+
call_signature_str = f'({", ".join(args_str)}) -> pxt.{return_type}'
|
|
471
502
|
fn_signature_str = f'{len(fn.signatures)} signatures' if fn.is_polymorphic else str(fn.signature)
|
|
472
503
|
validation_error = dedent(
|
|
473
504
|
f"""
|
|
@@ -480,25 +511,54 @@ class FunctionCall(Expr):
|
|
|
480
511
|
).strip()
|
|
481
512
|
else:
|
|
482
513
|
# Evaluate the call_return_type as defined in the current codebase.
|
|
483
|
-
call_return_type =
|
|
484
|
-
|
|
485
|
-
|
|
486
|
-
#
|
|
487
|
-
#
|
|
488
|
-
#
|
|
489
|
-
|
|
490
|
-
|
|
491
|
-
|
|
492
|
-
|
|
493
|
-
|
|
494
|
-
|
|
495
|
-
|
|
496
|
-
|
|
497
|
-
|
|
498
|
-
|
|
499
|
-
|
|
500
|
-
|
|
501
|
-
|
|
514
|
+
call_return_type: ts.ColumnType | None = None
|
|
515
|
+
|
|
516
|
+
if isinstance(resolved_fn, func.ExprTemplateFunction) and not resolved_fn.template.expr.is_valid:
|
|
517
|
+
# The FunctionCall is based on an ExprTemplateFunction, but the template expression is not valid
|
|
518
|
+
# (because it in turn contains an invalid FunctionCall). In this case, inherit the validation error
|
|
519
|
+
# from the template expression.
|
|
520
|
+
validation_error = resolved_fn.template.expr.validation_error
|
|
521
|
+
else:
|
|
522
|
+
try:
|
|
523
|
+
call_return_type = resolved_fn.call_return_type(bound_args)
|
|
524
|
+
except ImportError as exc:
|
|
525
|
+
validation_error = dedent(
|
|
526
|
+
f"""
|
|
527
|
+
A UDF call to {fn.self_path!r} could not be fully resolved, because a module required
|
|
528
|
+
by the UDF could not be imported:
|
|
529
|
+
{exc}
|
|
530
|
+
"""
|
|
531
|
+
)
|
|
532
|
+
|
|
533
|
+
assert (call_return_type is None) != (validation_error is None)
|
|
534
|
+
|
|
535
|
+
if call_return_type is None and return_type is None:
|
|
536
|
+
# Schema versions prior to 25 did not store the return_type in metadata, and there is no obvious
|
|
537
|
+
# way to infer it during DB migration, so we might encounter a stored return_type of None. If the
|
|
538
|
+
# resolution of call_return_type also fails, then we're out of luck; we have no choice but to
|
|
539
|
+
# fail-fast.
|
|
540
|
+
raise excs.Error(validation_error)
|
|
541
|
+
|
|
542
|
+
if call_return_type is not None:
|
|
543
|
+
# call_return_type resolution succeeded.
|
|
544
|
+
if return_type is None:
|
|
545
|
+
# Schema versions prior to 25 did not store the return_type in metadata (as mentioned above), so
|
|
546
|
+
# fall back on the call_return_type.
|
|
547
|
+
return_type = call_return_type
|
|
548
|
+
elif not return_type.is_supertype_of(call_return_type, ignore_nullable=True):
|
|
549
|
+
# There is a return_type stored in metadata (schema version >= 25),
|
|
550
|
+
# and the stored return_type of the UDF call doesn't match the column type of the FunctionCall.
|
|
551
|
+
validation_error = dedent(
|
|
552
|
+
f"""
|
|
553
|
+
The return type stored in the database for a UDF call to {fn.self_path!r} no longer
|
|
554
|
+
matches its return type as currently defined in the code. This probably means that the
|
|
555
|
+
code for {fn.self_path!r} has changed in a backward-incompatible way.
|
|
556
|
+
Return type of UDF call in the database: {return_type}
|
|
557
|
+
Return type of UDF as currently defined in code: {call_return_type}
|
|
558
|
+
"""
|
|
559
|
+
).strip()
|
|
560
|
+
|
|
561
|
+
assert return_type is not None # Guaranteed by the above logic.
|
|
502
562
|
|
|
503
563
|
fn_call = cls(
|
|
504
564
|
resolved_fn,
|
pixeltable/exprs/globals.py
CHANGED
|
@@ -2,10 +2,10 @@ from __future__ import annotations
|
|
|
2
2
|
|
|
3
3
|
import datetime
|
|
4
4
|
import enum
|
|
5
|
-
|
|
5
|
+
import uuid
|
|
6
6
|
|
|
7
7
|
# Python types corresponding to our literal types
|
|
8
|
-
LiteralPythonTypes =
|
|
8
|
+
LiteralPythonTypes = str | int | float | bool | datetime.datetime | datetime.date | uuid.UUID
|
|
9
9
|
|
|
10
10
|
|
|
11
11
|
def print_slice(s: slice) -> str:
|
pixeltable/exprs/in_predicate.py
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
from __future__ import annotations
|
|
2
2
|
|
|
3
|
-
from typing import Any, Iterable
|
|
3
|
+
from typing import Any, Iterable
|
|
4
4
|
|
|
5
5
|
import sqlalchemy as sql
|
|
6
6
|
|
|
@@ -16,13 +16,13 @@ from .sql_element_cache import SqlElementCache
|
|
|
16
16
|
class InPredicate(Expr):
|
|
17
17
|
"""Predicate corresponding to the SQL IN operator."""
|
|
18
18
|
|
|
19
|
-
def __init__(self, lhs: Expr, value_set_literal:
|
|
19
|
+
def __init__(self, lhs: Expr, value_set_literal: Iterable | None = None, value_set_expr: Expr | None = None):
|
|
20
20
|
assert (value_set_literal is None) != (value_set_expr is None)
|
|
21
21
|
if not lhs.col_type.is_scalar_type():
|
|
22
22
|
raise excs.Error(f'isin(): only supported for scalar types, not {lhs.col_type}')
|
|
23
23
|
super().__init__(ts.BoolType())
|
|
24
24
|
|
|
25
|
-
self.value_list:
|
|
25
|
+
self.value_list: list | None = None # only contains values of the correct type
|
|
26
26
|
if value_set_expr is not None:
|
|
27
27
|
if not value_set_expr.col_type.is_json_type():
|
|
28
28
|
raise excs.Error(
|
|
@@ -73,7 +73,7 @@ class InPredicate(Expr):
|
|
|
73
73
|
def _id_attrs(self) -> list[tuple[str, Any]]:
|
|
74
74
|
return [*super()._id_attrs(), ('value_list', self.value_list)]
|
|
75
75
|
|
|
76
|
-
def sql_expr(self, sql_elements: SqlElementCache) ->
|
|
76
|
+
def sql_expr(self, sql_elements: SqlElementCache) -> sql.ColumnElement | None:
|
|
77
77
|
lhs_sql_exprs = sql_elements.get(self.components[0])
|
|
78
78
|
if lhs_sql_exprs is None or self.value_list is None:
|
|
79
79
|
return None
|
pixeltable/exprs/inline_expr.py
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
from __future__ import annotations
|
|
2
2
|
|
|
3
|
-
from typing import Any, Iterable
|
|
3
|
+
from typing import Any, Iterable
|
|
4
4
|
|
|
5
5
|
import numpy as np
|
|
6
6
|
import sqlalchemy as sql
|
|
@@ -30,9 +30,9 @@ class InlineArray(Expr):
|
|
|
30
30
|
else:
|
|
31
31
|
exprs.append(Literal(el))
|
|
32
32
|
|
|
33
|
-
inferred_element_type:
|
|
33
|
+
inferred_element_type: ts.ColumnType | None = ts.InvalidType()
|
|
34
34
|
for i, expr in enumerate(exprs):
|
|
35
|
-
supertype = inferred_element_type.supertype(expr.col_type)
|
|
35
|
+
supertype = inferred_element_type.supertype(expr.col_type, for_inference=True)
|
|
36
36
|
if supertype is None:
|
|
37
37
|
raise excs.Error(
|
|
38
38
|
f'Could not infer element type of array: element of type `{expr.col_type}` at index {i} '
|
|
@@ -44,9 +44,12 @@ class InlineArray(Expr):
|
|
|
44
44
|
col_type = ts.ArrayType((len(exprs),), inferred_element_type)
|
|
45
45
|
elif inferred_element_type.is_array_type():
|
|
46
46
|
assert isinstance(inferred_element_type, ts.ArrayType)
|
|
47
|
-
|
|
48
|
-
|
|
49
|
-
|
|
47
|
+
dtype = inferred_element_type.dtype
|
|
48
|
+
shape = inferred_element_type.shape
|
|
49
|
+
if shape is not None and dtype is not None:
|
|
50
|
+
col_type = ts.ArrayType(shape=(len(exprs), *shape), dtype=dtype)
|
|
51
|
+
else:
|
|
52
|
+
col_type = ts.ArrayType(shape=None, dtype=dtype)
|
|
50
53
|
else:
|
|
51
54
|
raise excs.Error(f'Element type is not a valid dtype for an array: {inferred_element_type}')
|
|
52
55
|
|
|
@@ -61,7 +64,7 @@ class InlineArray(Expr):
|
|
|
61
64
|
def _equals(self, _: InlineArray) -> bool:
|
|
62
65
|
return True # Always true if components match
|
|
63
66
|
|
|
64
|
-
def sql_expr(self, _: SqlElementCache) ->
|
|
67
|
+
def sql_expr(self, _: SqlElementCache) -> sql.ColumnElement | None:
|
|
65
68
|
return None
|
|
66
69
|
|
|
67
70
|
def eval(self, data_row: DataRow, row_builder: RowBuilder) -> None:
|
|
@@ -81,12 +84,12 @@ class InlineArray(Expr):
|
|
|
81
84
|
# loaded and their types are known.
|
|
82
85
|
return InlineList(components) # type: ignore[return-value]
|
|
83
86
|
|
|
84
|
-
def as_literal(self) ->
|
|
87
|
+
def as_literal(self) -> Literal | None:
|
|
85
88
|
assert isinstance(self.col_type, ts.ArrayType)
|
|
86
89
|
if not all(isinstance(comp, Literal) for comp in self.components):
|
|
87
90
|
return None
|
|
88
91
|
return Literal(
|
|
89
|
-
np.array([c.as_literal().val for c in self.components], dtype=self.col_type.
|
|
92
|
+
np.array([c.as_literal().val for c in self.components], dtype=self.col_type.dtype), self.col_type
|
|
90
93
|
)
|
|
91
94
|
|
|
92
95
|
|
|
@@ -98,13 +101,7 @@ class InlineList(Expr):
|
|
|
98
101
|
def __init__(self, elements: Iterable):
|
|
99
102
|
exprs = [Expr.from_object(el) for el in elements]
|
|
100
103
|
|
|
101
|
-
|
|
102
|
-
'type': 'array',
|
|
103
|
-
'prefixItems': [expr.col_type.to_json_schema() for expr in exprs],
|
|
104
|
-
'items': False, # No additional items (fixed length)
|
|
105
|
-
}
|
|
106
|
-
|
|
107
|
-
super().__init__(ts.JsonType(json_schema))
|
|
104
|
+
super().__init__(ts.JsonType())
|
|
108
105
|
self.components.extend(exprs)
|
|
109
106
|
self.id = self._create_id()
|
|
110
107
|
|
|
@@ -115,7 +112,7 @@ class InlineList(Expr):
|
|
|
115
112
|
def _equals(self, _: InlineList) -> bool:
|
|
116
113
|
return True # Always true if components match
|
|
117
114
|
|
|
118
|
-
def sql_expr(self, _: SqlElementCache) ->
|
|
115
|
+
def sql_expr(self, _: SqlElementCache) -> sql.ColumnElement | None:
|
|
119
116
|
return None
|
|
120
117
|
|
|
121
118
|
def eval(self, data_row: DataRow, _: RowBuilder) -> None:
|
|
@@ -128,7 +125,7 @@ class InlineList(Expr):
|
|
|
128
125
|
def _from_dict(cls, _: dict, components: list[Expr]) -> InlineList:
|
|
129
126
|
return cls(components)
|
|
130
127
|
|
|
131
|
-
def as_literal(self) ->
|
|
128
|
+
def as_literal(self) -> Literal | None:
|
|
132
129
|
if not all(isinstance(comp, Literal) for comp in self.components):
|
|
133
130
|
return None
|
|
134
131
|
return Literal([c.as_literal().val for c in self.components], self.col_type)
|
|
@@ -150,18 +147,7 @@ class InlineDict(Expr):
|
|
|
150
147
|
self.keys.append(key)
|
|
151
148
|
exprs.append(Expr.from_object(val))
|
|
152
149
|
|
|
153
|
-
|
|
154
|
-
try:
|
|
155
|
-
json_schema = {
|
|
156
|
-
'type': 'object',
|
|
157
|
-
'properties': {key: expr.col_type.to_json_schema() for key, expr in zip(self.keys, exprs)},
|
|
158
|
-
}
|
|
159
|
-
except excs.Error:
|
|
160
|
-
# InlineDicts are used to store iterator arguments, which are not required to be valid JSON types,
|
|
161
|
-
# so we can't always construct a valid schema.
|
|
162
|
-
json_schema = None
|
|
163
|
-
|
|
164
|
-
super().__init__(ts.JsonType(json_schema))
|
|
150
|
+
super().__init__(ts.JsonType())
|
|
165
151
|
self.components.extend(exprs)
|
|
166
152
|
self.id = self._create_id()
|
|
167
153
|
|
|
@@ -176,7 +162,7 @@ class InlineDict(Expr):
|
|
|
176
162
|
def _id_attrs(self) -> list[tuple[str, Any]]:
|
|
177
163
|
return [*super()._id_attrs(), ('keys', self.keys)]
|
|
178
164
|
|
|
179
|
-
def sql_expr(self, _: SqlElementCache) ->
|
|
165
|
+
def sql_expr(self, _: SqlElementCache) -> sql.ColumnElement | None:
|
|
180
166
|
return None
|
|
181
167
|
|
|
182
168
|
def eval(self, data_row: DataRow, _: RowBuilder) -> None:
|
|
@@ -208,7 +194,7 @@ class InlineDict(Expr):
|
|
|
208
194
|
arg = dict(zip(d['keys'], components))
|
|
209
195
|
return InlineDict(arg)
|
|
210
196
|
|
|
211
|
-
def as_literal(self) ->
|
|
197
|
+
def as_literal(self) -> Literal | None:
|
|
212
198
|
if not all(isinstance(comp, Literal) for comp in self.components):
|
|
213
199
|
return None
|
|
214
200
|
return Literal(dict(zip(self.keys, (c.as_literal().val for c in self.components))), self.col_type)
|
pixeltable/exprs/is_null.py
CHANGED
|
@@ -1,11 +1,10 @@
|
|
|
1
1
|
from __future__ import annotations
|
|
2
2
|
|
|
3
|
-
from typing import Optional
|
|
4
|
-
|
|
5
3
|
import sqlalchemy as sql
|
|
6
4
|
|
|
7
5
|
import pixeltable.type_system as ts
|
|
8
6
|
|
|
7
|
+
from .column_ref import ColumnRef
|
|
9
8
|
from .data_row import DataRow
|
|
10
9
|
from .expr import Expr
|
|
11
10
|
from .row_builder import RowBuilder
|
|
@@ -24,7 +23,12 @@ class IsNull(Expr):
|
|
|
24
23
|
def _equals(self, other: IsNull) -> bool:
|
|
25
24
|
return True
|
|
26
25
|
|
|
27
|
-
def sql_expr(self, sql_elements: SqlElementCache) ->
|
|
26
|
+
def sql_expr(self, sql_elements: SqlElementCache) -> sql.ColumnElement | None:
|
|
27
|
+
c = self.components[0]
|
|
28
|
+
if isinstance(c, ColumnRef) and c.col.stores_external_array():
|
|
29
|
+
# we also need to check CellMd.file_urls for null
|
|
30
|
+
e = sql.and_(c.col.sa_cellmd_col['file_urls'] == None, c.col.sa_col == None)
|
|
31
|
+
return e
|
|
28
32
|
e = sql_elements.get(self.components[0])
|
|
29
33
|
if e is None:
|
|
30
34
|
return None
|
pixeltable/exprs/json_mapper.py
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
from __future__ import annotations
|
|
2
2
|
|
|
3
|
-
from typing import TYPE_CHECKING
|
|
3
|
+
from typing import TYPE_CHECKING
|
|
4
4
|
|
|
5
5
|
import sqlalchemy as sql
|
|
6
6
|
|
|
@@ -29,10 +29,10 @@ class JsonMapper(Expr):
|
|
|
29
29
|
"""
|
|
30
30
|
|
|
31
31
|
target_expr_scope: ExprScope
|
|
32
|
-
parent_mapper:
|
|
33
|
-
target_expr_eval_ctx:
|
|
32
|
+
parent_mapper: JsonMapper | None
|
|
33
|
+
target_expr_eval_ctx: RowBuilder.EvalCtx | None
|
|
34
34
|
|
|
35
|
-
def __init__(self, src_expr:
|
|
35
|
+
def __init__(self, src_expr: Expr | None, target_expr: Expr | None):
|
|
36
36
|
# TODO: type spec should be list[target_expr.col_type]
|
|
37
37
|
super().__init__(ts.JsonType())
|
|
38
38
|
|
|
@@ -54,7 +54,7 @@ class JsonMapper(Expr):
|
|
|
54
54
|
def _equals(self, _: JsonMapper) -> bool:
|
|
55
55
|
return True
|
|
56
56
|
|
|
57
|
-
def sql_expr(self, _: SqlElementCache) ->
|
|
57
|
+
def sql_expr(self, _: SqlElementCache) -> sql.ColumnElement | None:
|
|
58
58
|
return None
|
|
59
59
|
|
|
60
60
|
def eval(self, data_row: DataRow, row_builder: RowBuilder) -> None:
|
|
@@ -92,8 +92,8 @@ class JsonMapperDispatch(Expr):
|
|
|
92
92
|
"""
|
|
93
93
|
|
|
94
94
|
target_expr_scope: ExprScope
|
|
95
|
-
parent_mapper:
|
|
96
|
-
target_expr_eval_ctx:
|
|
95
|
+
parent_mapper: JsonMapperDispatch | None
|
|
96
|
+
target_expr_eval_ctx: RowBuilder.EvalCtx | None
|
|
97
97
|
|
|
98
98
|
def __init__(self, src_expr: Expr, target_expr: Expr):
|
|
99
99
|
super().__init__(ts.InvalidType())
|
|
@@ -116,7 +116,7 @@ class JsonMapperDispatch(Expr):
|
|
|
116
116
|
scope_anchor = ObjectRef(self.target_expr_scope, self)
|
|
117
117
|
self.components.append(scope_anchor)
|
|
118
118
|
|
|
119
|
-
def _bind_rel_paths(self, mapper:
|
|
119
|
+
def _bind_rel_paths(self, mapper: JsonMapperDispatch | None = None) -> None:
|
|
120
120
|
self.src_expr._bind_rel_paths(mapper)
|
|
121
121
|
self.target_expr._bind_rel_paths(self)
|
|
122
122
|
self.parent_mapper = mapper
|
pixeltable/exprs/json_path.py
CHANGED
|
@@ -1,12 +1,15 @@
|
|
|
1
1
|
from __future__ import annotations
|
|
2
2
|
|
|
3
|
-
|
|
3
|
+
import io
|
|
4
|
+
from pathlib import Path
|
|
5
|
+
from typing import Any
|
|
4
6
|
|
|
5
7
|
import jmespath
|
|
6
8
|
import sqlalchemy as sql
|
|
7
9
|
|
|
8
10
|
from pixeltable import catalog, exceptions as excs, type_system as ts
|
|
9
11
|
|
|
12
|
+
from .column_ref import ColumnRef
|
|
10
13
|
from .data_row import DataRow
|
|
11
14
|
from .expr import Expr
|
|
12
15
|
from .globals import print_slice
|
|
@@ -17,29 +20,41 @@ from .sql_element_cache import SqlElementCache
|
|
|
17
20
|
|
|
18
21
|
|
|
19
22
|
class JsonPath(Expr):
|
|
23
|
+
"""
|
|
24
|
+
anchor can be None, in which case this is a relative JsonPath and the anchor is set later via set_anchor().
|
|
25
|
+
scope_idx: for relative paths, index of referenced JsonMapper
|
|
26
|
+
(0: indicates the immediately preceding JsonMapper, -1: the parent of the immediately preceding mapper, ...)
|
|
27
|
+
"""
|
|
28
|
+
|
|
29
|
+
path_elements: list[str | int | slice]
|
|
30
|
+
compiled_path: jmespath.parser.ParsedResult | None
|
|
31
|
+
scope_idx: int
|
|
32
|
+
file_handles: dict[Path, io.BufferedReader] # key: file path
|
|
33
|
+
|
|
20
34
|
def __init__(
|
|
21
|
-
self, anchor:
|
|
35
|
+
self, anchor: Expr | None, path_elements: list[str | int | slice] | None = None, scope_idx: int = 0
|
|
22
36
|
) -> None:
|
|
23
|
-
"""
|
|
24
|
-
anchor can be None, in which case this is a relative JsonPath and the anchor is set later via set_anchor().
|
|
25
|
-
scope_idx: for relative paths, index of referenced JsonMapper
|
|
26
|
-
(0: indicates the immediately preceding JsonMapper, -1: the parent of the immediately preceding mapper, ...)
|
|
27
|
-
"""
|
|
28
37
|
if path_elements is None:
|
|
29
38
|
path_elements = []
|
|
30
39
|
super().__init__(ts.JsonType(nullable=True)) # JsonPath expressions are always nullable
|
|
31
40
|
if anchor is not None:
|
|
32
41
|
self.components = [anchor]
|
|
33
|
-
self.path_elements
|
|
42
|
+
self.path_elements = path_elements
|
|
34
43
|
self.compiled_path = jmespath.compile(self._json_path()) if len(path_elements) > 0 else None
|
|
35
44
|
self.scope_idx = scope_idx
|
|
36
45
|
# NOTE: the _create_id() result will change if set_anchor() gets called;
|
|
37
46
|
# this is not a problem, because _create_id() shouldn't be called after init()
|
|
38
47
|
self.id = self._create_id()
|
|
48
|
+
self.file_handles = {}
|
|
49
|
+
|
|
50
|
+
def release(self) -> None:
|
|
51
|
+
for fh in self.file_handles.values():
|
|
52
|
+
fh.close()
|
|
53
|
+
self.file_handles.clear()
|
|
39
54
|
|
|
40
55
|
def __repr__(self) -> str:
|
|
41
56
|
# else 'R': the anchor is RELATIVE_PATH_ROOT
|
|
42
|
-
anchor_str = str(self.
|
|
57
|
+
anchor_str = str(self.anchor) if self.anchor is not None else 'R'
|
|
43
58
|
if len(self.path_elements) == 0:
|
|
44
59
|
return anchor_str
|
|
45
60
|
return f'{anchor_str}{"." if isinstance(self.path_elements[0], str) else ""}{self._json_path()}'
|
|
@@ -66,7 +81,7 @@ class JsonPath(Expr):
|
|
|
66
81
|
return cls(anchor, path_elements, d['scope_idx'])
|
|
67
82
|
|
|
68
83
|
@property
|
|
69
|
-
def
|
|
84
|
+
def anchor(self) -> Expr | None:
|
|
70
85
|
return None if len(self.components) == 0 else self.components[0]
|
|
71
86
|
|
|
72
87
|
def set_anchor(self, anchor: Expr) -> None:
|
|
@@ -74,17 +89,17 @@ class JsonPath(Expr):
|
|
|
74
89
|
self.components = [anchor]
|
|
75
90
|
|
|
76
91
|
def is_relative_path(self) -> bool:
|
|
77
|
-
return self.
|
|
92
|
+
return self.anchor is None
|
|
78
93
|
|
|
79
94
|
def _has_relative_path(self) -> bool:
|
|
80
95
|
return self.is_relative_path() or super()._has_relative_path()
|
|
81
96
|
|
|
82
|
-
def _bind_rel_paths(self, mapper:
|
|
97
|
+
def _bind_rel_paths(self, mapper: 'JsonMapperDispatch' | None = None) -> None:
|
|
83
98
|
if self.is_relative_path():
|
|
84
99
|
# TODO: take scope_idx into account
|
|
85
100
|
self.set_anchor(mapper.scope_anchor)
|
|
86
101
|
else:
|
|
87
|
-
self.
|
|
102
|
+
self.anchor._bind_rel_paths(mapper)
|
|
88
103
|
|
|
89
104
|
def __call__(self, *args: object, **kwargs: object) -> 'JsonPath':
|
|
90
105
|
"""
|
|
@@ -98,15 +113,15 @@ class JsonPath(Expr):
|
|
|
98
113
|
|
|
99
114
|
def __getattr__(self, name: str) -> 'JsonPath':
|
|
100
115
|
assert isinstance(name, str)
|
|
101
|
-
return JsonPath(self.
|
|
116
|
+
return JsonPath(self.anchor, [*self.path_elements, name])
|
|
102
117
|
|
|
103
118
|
def __getitem__(self, index: object) -> 'JsonPath':
|
|
104
119
|
if isinstance(index, (int, slice, str)):
|
|
105
|
-
return JsonPath(self.
|
|
120
|
+
return JsonPath(self.anchor, [*self.path_elements, index])
|
|
106
121
|
raise excs.Error(f'Invalid json list index: {index}')
|
|
107
122
|
|
|
108
|
-
def default_column_name(self) ->
|
|
109
|
-
anchor_name = self.
|
|
123
|
+
def default_column_name(self) -> str | None:
|
|
124
|
+
anchor_name = self.anchor.default_column_name() if self.anchor is not None else ''
|
|
110
125
|
ret_name = f'{anchor_name}.{self._json_path()}'
|
|
111
126
|
|
|
112
127
|
def cleanup_char(s: str) -> str:
|
|
@@ -133,7 +148,7 @@ class JsonPath(Expr):
|
|
|
133
148
|
def _id_attrs(self) -> list[tuple[str, Any]]:
|
|
134
149
|
return [*super()._id_attrs(), ('path_elements', self.path_elements)]
|
|
135
150
|
|
|
136
|
-
def sql_expr(self, _: SqlElementCache) ->
|
|
151
|
+
def sql_expr(self, _: SqlElementCache) -> sql.ColumnElement | None:
|
|
137
152
|
"""
|
|
138
153
|
Postgres appears to have a bug: jsonb_path_query('{a: [{b: 0}, {b: 1}]}', '$.a.b') returns
|
|
139
154
|
*two* rows (each containing col val 0), not a single row with [0, 0].
|
|
@@ -158,12 +173,31 @@ class JsonPath(Expr):
|
|
|
158
173
|
result.append(f'[{print_slice(element)}]')
|
|
159
174
|
return ''.join(result)
|
|
160
175
|
|
|
161
|
-
def eval(self,
|
|
162
|
-
assert self.
|
|
163
|
-
val =
|
|
176
|
+
def eval(self, row: DataRow, row_builder: RowBuilder) -> None:
|
|
177
|
+
assert self.anchor is not None, self
|
|
178
|
+
val = row[self.anchor.slot_idx]
|
|
164
179
|
if self.compiled_path is not None:
|
|
165
180
|
val = self.compiled_path.search(val)
|
|
166
|
-
|
|
181
|
+
row[self.slot_idx] = val
|
|
182
|
+
if val is None or self.anchor is None or not isinstance(self.anchor, ColumnRef):
|
|
183
|
+
return
|
|
184
|
+
|
|
185
|
+
# the origin of val is a json-typed column, which might stored inlined objects
|
|
186
|
+
if self.anchor.slot_idx not in row.slot_md:
|
|
187
|
+
# we can infer that there aren't any inlined objects because our execution plan doesn't include
|
|
188
|
+
# materializing the cellmd (eg, insert plans)
|
|
189
|
+
# TODO: have the planner pass that fact into ExprEvalNode explicitly to streamline this path a bit more
|
|
190
|
+
return
|
|
191
|
+
|
|
192
|
+
# defer import until it's needed
|
|
193
|
+
from pixeltable.exec.cell_reconstruction_node import json_has_inlined_objs, reconstruct_json
|
|
194
|
+
|
|
195
|
+
cell_md = row.slot_md[self.anchor.slot_idx]
|
|
196
|
+
if cell_md is None or cell_md.file_urls is None or not json_has_inlined_objs(val):
|
|
197
|
+
# val doesn't contain inlined objects
|
|
198
|
+
return
|
|
199
|
+
|
|
200
|
+
row.vals[self.slot_idx] = reconstruct_json(val, cell_md.file_urls, self.file_handles)
|
|
167
201
|
|
|
168
202
|
|
|
169
203
|
RELATIVE_PATH_ROOT = JsonPath(None)
|