pixeltable 0.4.0rc3__py3-none-any.whl → 0.4.20__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of pixeltable might be problematic. Click here for more details.
- pixeltable/__init__.py +23 -5
- pixeltable/_version.py +1 -0
- pixeltable/catalog/__init__.py +5 -3
- pixeltable/catalog/catalog.py +1318 -404
- pixeltable/catalog/column.py +186 -115
- pixeltable/catalog/dir.py +1 -2
- pixeltable/catalog/globals.py +11 -43
- pixeltable/catalog/insertable_table.py +167 -79
- pixeltable/catalog/path.py +61 -23
- pixeltable/catalog/schema_object.py +9 -10
- pixeltable/catalog/table.py +626 -308
- pixeltable/catalog/table_metadata.py +101 -0
- pixeltable/catalog/table_version.py +713 -569
- pixeltable/catalog/table_version_handle.py +37 -6
- pixeltable/catalog/table_version_path.py +42 -29
- pixeltable/catalog/tbl_ops.py +50 -0
- pixeltable/catalog/update_status.py +191 -0
- pixeltable/catalog/view.py +108 -94
- pixeltable/config.py +128 -22
- pixeltable/dataframe.py +188 -100
- pixeltable/env.py +407 -136
- pixeltable/exceptions.py +6 -0
- pixeltable/exec/__init__.py +3 -0
- pixeltable/exec/aggregation_node.py +7 -8
- pixeltable/exec/cache_prefetch_node.py +83 -110
- pixeltable/exec/cell_materialization_node.py +231 -0
- pixeltable/exec/cell_reconstruction_node.py +135 -0
- pixeltable/exec/component_iteration_node.py +4 -3
- pixeltable/exec/data_row_batch.py +8 -65
- pixeltable/exec/exec_context.py +16 -4
- pixeltable/exec/exec_node.py +13 -36
- pixeltable/exec/expr_eval/evaluators.py +7 -6
- pixeltable/exec/expr_eval/expr_eval_node.py +27 -12
- pixeltable/exec/expr_eval/globals.py +8 -5
- pixeltable/exec/expr_eval/row_buffer.py +1 -2
- pixeltable/exec/expr_eval/schedulers.py +190 -30
- pixeltable/exec/globals.py +32 -0
- pixeltable/exec/in_memory_data_node.py +18 -18
- pixeltable/exec/object_store_save_node.py +293 -0
- pixeltable/exec/row_update_node.py +16 -9
- pixeltable/exec/sql_node.py +206 -101
- pixeltable/exprs/__init__.py +1 -1
- pixeltable/exprs/arithmetic_expr.py +27 -22
- pixeltable/exprs/array_slice.py +3 -3
- pixeltable/exprs/column_property_ref.py +34 -30
- pixeltable/exprs/column_ref.py +92 -96
- pixeltable/exprs/comparison.py +5 -5
- pixeltable/exprs/compound_predicate.py +5 -4
- pixeltable/exprs/data_row.py +152 -55
- pixeltable/exprs/expr.py +62 -43
- pixeltable/exprs/expr_dict.py +3 -3
- pixeltable/exprs/expr_set.py +17 -10
- pixeltable/exprs/function_call.py +75 -37
- pixeltable/exprs/globals.py +1 -2
- pixeltable/exprs/in_predicate.py +4 -4
- pixeltable/exprs/inline_expr.py +10 -27
- pixeltable/exprs/is_null.py +1 -3
- pixeltable/exprs/json_mapper.py +8 -8
- pixeltable/exprs/json_path.py +56 -22
- pixeltable/exprs/literal.py +5 -5
- pixeltable/exprs/method_ref.py +2 -2
- pixeltable/exprs/object_ref.py +2 -2
- pixeltable/exprs/row_builder.py +127 -53
- pixeltable/exprs/rowid_ref.py +8 -12
- pixeltable/exprs/similarity_expr.py +50 -25
- pixeltable/exprs/sql_element_cache.py +4 -4
- pixeltable/exprs/string_op.py +5 -5
- pixeltable/exprs/type_cast.py +3 -5
- pixeltable/func/__init__.py +1 -0
- pixeltable/func/aggregate_function.py +8 -8
- pixeltable/func/callable_function.py +9 -9
- pixeltable/func/expr_template_function.py +10 -10
- pixeltable/func/function.py +18 -20
- pixeltable/func/function_registry.py +6 -7
- pixeltable/func/globals.py +2 -3
- pixeltable/func/mcp.py +74 -0
- pixeltable/func/query_template_function.py +20 -18
- pixeltable/func/signature.py +43 -16
- pixeltable/func/tools.py +23 -13
- pixeltable/func/udf.py +18 -20
- pixeltable/functions/__init__.py +6 -0
- pixeltable/functions/anthropic.py +93 -33
- pixeltable/functions/audio.py +114 -10
- pixeltable/functions/bedrock.py +13 -6
- pixeltable/functions/date.py +1 -1
- pixeltable/functions/deepseek.py +20 -9
- pixeltable/functions/fireworks.py +2 -2
- pixeltable/functions/gemini.py +28 -11
- pixeltable/functions/globals.py +13 -13
- pixeltable/functions/groq.py +108 -0
- pixeltable/functions/huggingface.py +1046 -23
- pixeltable/functions/image.py +9 -18
- pixeltable/functions/llama_cpp.py +23 -8
- pixeltable/functions/math.py +3 -4
- pixeltable/functions/mistralai.py +4 -15
- pixeltable/functions/ollama.py +16 -9
- pixeltable/functions/openai.py +104 -82
- pixeltable/functions/openrouter.py +143 -0
- pixeltable/functions/replicate.py +2 -2
- pixeltable/functions/reve.py +250 -0
- pixeltable/functions/string.py +21 -28
- pixeltable/functions/timestamp.py +13 -14
- pixeltable/functions/together.py +4 -6
- pixeltable/functions/twelvelabs.py +92 -0
- pixeltable/functions/util.py +6 -1
- pixeltable/functions/video.py +1388 -106
- pixeltable/functions/vision.py +7 -7
- pixeltable/functions/whisper.py +15 -7
- pixeltable/functions/whisperx.py +179 -0
- pixeltable/{ext/functions → functions}/yolox.py +2 -4
- pixeltable/globals.py +332 -105
- pixeltable/index/base.py +13 -22
- pixeltable/index/btree.py +23 -22
- pixeltable/index/embedding_index.py +32 -44
- pixeltable/io/__init__.py +4 -2
- pixeltable/io/datarows.py +7 -6
- pixeltable/io/external_store.py +49 -77
- pixeltable/io/fiftyone.py +11 -11
- pixeltable/io/globals.py +29 -28
- pixeltable/io/hf_datasets.py +17 -9
- pixeltable/io/label_studio.py +70 -66
- pixeltable/io/lancedb.py +3 -0
- pixeltable/io/pandas.py +12 -11
- pixeltable/io/parquet.py +13 -93
- pixeltable/io/table_data_conduit.py +71 -47
- pixeltable/io/utils.py +3 -3
- pixeltable/iterators/__init__.py +2 -1
- pixeltable/iterators/audio.py +21 -11
- pixeltable/iterators/document.py +116 -55
- pixeltable/iterators/image.py +5 -2
- pixeltable/iterators/video.py +293 -13
- pixeltable/metadata/__init__.py +4 -2
- pixeltable/metadata/converters/convert_18.py +2 -2
- pixeltable/metadata/converters/convert_19.py +2 -2
- pixeltable/metadata/converters/convert_20.py +2 -2
- pixeltable/metadata/converters/convert_21.py +2 -2
- pixeltable/metadata/converters/convert_22.py +2 -2
- pixeltable/metadata/converters/convert_24.py +2 -2
- pixeltable/metadata/converters/convert_25.py +2 -2
- pixeltable/metadata/converters/convert_26.py +2 -2
- pixeltable/metadata/converters/convert_29.py +4 -4
- pixeltable/metadata/converters/convert_34.py +2 -2
- pixeltable/metadata/converters/convert_36.py +2 -2
- pixeltable/metadata/converters/convert_37.py +15 -0
- pixeltable/metadata/converters/convert_38.py +39 -0
- pixeltable/metadata/converters/convert_39.py +124 -0
- pixeltable/metadata/converters/convert_40.py +73 -0
- pixeltable/metadata/converters/util.py +13 -12
- pixeltable/metadata/notes.py +4 -0
- pixeltable/metadata/schema.py +79 -42
- pixeltable/metadata/utils.py +74 -0
- pixeltable/mypy/__init__.py +3 -0
- pixeltable/mypy/mypy_plugin.py +123 -0
- pixeltable/plan.py +274 -223
- pixeltable/share/__init__.py +1 -1
- pixeltable/share/packager.py +259 -129
- pixeltable/share/protocol/__init__.py +34 -0
- pixeltable/share/protocol/common.py +170 -0
- pixeltable/share/protocol/operation_types.py +33 -0
- pixeltable/share/protocol/replica.py +109 -0
- pixeltable/share/publish.py +213 -57
- pixeltable/store.py +238 -175
- pixeltable/type_system.py +104 -63
- pixeltable/utils/__init__.py +2 -3
- pixeltable/utils/arrow.py +108 -13
- pixeltable/utils/av.py +298 -0
- pixeltable/utils/azure_store.py +305 -0
- pixeltable/utils/code.py +3 -3
- pixeltable/utils/console_output.py +4 -1
- pixeltable/utils/coroutine.py +6 -23
- pixeltable/utils/dbms.py +31 -5
- pixeltable/utils/description_helper.py +4 -5
- pixeltable/utils/documents.py +5 -6
- pixeltable/utils/exception_handler.py +7 -30
- pixeltable/utils/filecache.py +6 -6
- pixeltable/utils/formatter.py +4 -6
- pixeltable/utils/gcs_store.py +283 -0
- pixeltable/utils/http_server.py +2 -3
- pixeltable/utils/iceberg.py +1 -2
- pixeltable/utils/image.py +17 -0
- pixeltable/utils/lancedb.py +88 -0
- pixeltable/utils/local_store.py +316 -0
- pixeltable/utils/misc.py +5 -0
- pixeltable/utils/object_stores.py +528 -0
- pixeltable/utils/pydantic.py +60 -0
- pixeltable/utils/pytorch.py +5 -6
- pixeltable/utils/s3_store.py +392 -0
- pixeltable-0.4.20.dist-info/METADATA +587 -0
- pixeltable-0.4.20.dist-info/RECORD +218 -0
- {pixeltable-0.4.0rc3.dist-info → pixeltable-0.4.20.dist-info}/WHEEL +1 -1
- pixeltable-0.4.20.dist-info/entry_points.txt +2 -0
- pixeltable/__version__.py +0 -3
- pixeltable/ext/__init__.py +0 -17
- pixeltable/ext/functions/__init__.py +0 -11
- pixeltable/ext/functions/whisperx.py +0 -77
- pixeltable/utils/media_store.py +0 -77
- pixeltable/utils/s3.py +0 -17
- pixeltable/utils/sample.py +0 -25
- pixeltable-0.4.0rc3.dist-info/METADATA +0 -435
- pixeltable-0.4.0rc3.dist-info/RECORD +0 -189
- pixeltable-0.4.0rc3.dist-info/entry_points.txt +0 -3
- {pixeltable-0.4.0rc3.dist-info → pixeltable-0.4.20.dist-info/licenses}/LICENSE +0 -0
pixeltable/exprs/expr_set.py
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
from __future__ import annotations
|
|
2
2
|
|
|
3
|
-
from typing import Generic, Iterable, Iterator,
|
|
3
|
+
from typing import Generic, Iterable, Iterator, TypeVar
|
|
4
4
|
|
|
5
5
|
from .expr import Expr
|
|
6
6
|
|
|
@@ -9,26 +9,33 @@ T = TypeVar('T', bound='Expr')
|
|
|
9
9
|
|
|
10
10
|
class ExprSet(Generic[T]):
|
|
11
11
|
"""
|
|
12
|
-
|
|
12
|
+
An ordered set that also supports indexed lookup (by slot_idx and Expr.id). Exprs are uniquely identified by
|
|
13
|
+
Expr.id.
|
|
13
14
|
"""
|
|
14
15
|
|
|
15
16
|
exprs: dict[int, T] # key: Expr.id
|
|
17
|
+
expr_offsets: dict[int, int] # key: Expr.id, value: offset into self.exprs.keys()
|
|
16
18
|
exprs_by_idx: dict[int, T] # key: slot_idx
|
|
17
19
|
|
|
18
|
-
def __init__(self, elements:
|
|
20
|
+
def __init__(self, elements: Iterable[T] | None = None):
|
|
19
21
|
self.exprs = {}
|
|
22
|
+
self.expr_offsets = {}
|
|
20
23
|
self.exprs_by_idx = {}
|
|
21
24
|
if elements is not None:
|
|
22
25
|
for e in elements:
|
|
23
26
|
self.add(e)
|
|
24
27
|
|
|
25
|
-
def add(self, expr: T) ->
|
|
26
|
-
|
|
27
|
-
|
|
28
|
+
def add(self, expr: T) -> int:
|
|
29
|
+
"""Returns offset corresponding to iteration order"""
|
|
30
|
+
offset = self.expr_offsets.get(expr.id)
|
|
31
|
+
if offset is not None:
|
|
32
|
+
return offset
|
|
33
|
+
offset = len(self.exprs)
|
|
28
34
|
self.exprs[expr.id] = expr
|
|
29
|
-
|
|
30
|
-
|
|
31
|
-
|
|
35
|
+
self.expr_offsets[expr.id] = offset
|
|
36
|
+
if expr.slot_idx is not None:
|
|
37
|
+
self.exprs_by_idx[expr.slot_idx] = expr
|
|
38
|
+
return offset
|
|
32
39
|
|
|
33
40
|
def update(self, *others: Iterable[T]) -> None:
|
|
34
41
|
for other in others:
|
|
@@ -44,7 +51,7 @@ class ExprSet(Generic[T]):
|
|
|
44
51
|
def __iter__(self) -> Iterator[T]:
|
|
45
52
|
return iter(self.exprs.values())
|
|
46
53
|
|
|
47
|
-
def __getitem__(self, index: object) ->
|
|
54
|
+
def __getitem__(self, index: object) -> T | None:
|
|
48
55
|
"""Indexed lookup by slot_idx or Expr.id."""
|
|
49
56
|
assert isinstance(index, (int, Expr))
|
|
50
57
|
if isinstance(index, int):
|
|
@@ -4,7 +4,7 @@ import inspect
|
|
|
4
4
|
import logging
|
|
5
5
|
import sys
|
|
6
6
|
from textwrap import dedent
|
|
7
|
-
from typing import Any,
|
|
7
|
+
from typing import Any, Sequence
|
|
8
8
|
|
|
9
9
|
import sqlalchemy as sql
|
|
10
10
|
|
|
@@ -24,7 +24,7 @@ class FunctionCall(Expr):
|
|
|
24
24
|
fn: func.Function
|
|
25
25
|
is_method_call: bool
|
|
26
26
|
agg_init_args: dict[str, Any]
|
|
27
|
-
resource_pool:
|
|
27
|
+
resource_pool: str | None
|
|
28
28
|
|
|
29
29
|
# These collections hold the component indices corresponding to the args and kwargs
|
|
30
30
|
# that were passed to the FunctionCall. They're 1:1 with the original call pattern.
|
|
@@ -36,17 +36,17 @@ class FunctionCall(Expr):
|
|
|
36
36
|
# - a component index, if the parameter is a non-variadic parameter
|
|
37
37
|
# - a list of component indices, if the parameter is a variadic positional parameter
|
|
38
38
|
# - a dict mapping keyword names to component indices, if the parameter is a variadic keyword parameter
|
|
39
|
-
bound_idxs: dict[str,
|
|
39
|
+
bound_idxs: dict[str, int | list[int] | dict[str, int]]
|
|
40
40
|
|
|
41
41
|
return_type: ts.ColumnType
|
|
42
42
|
group_by_start_idx: int
|
|
43
43
|
group_by_stop_idx: int
|
|
44
44
|
fn_expr_idx: int
|
|
45
45
|
order_by_start_idx: int
|
|
46
|
-
aggregator:
|
|
47
|
-
current_partition_vals:
|
|
46
|
+
aggregator: Any | None
|
|
47
|
+
current_partition_vals: list[Any] | None
|
|
48
48
|
|
|
49
|
-
_validation_error:
|
|
49
|
+
_validation_error: str | None
|
|
50
50
|
|
|
51
51
|
def __init__(
|
|
52
52
|
self,
|
|
@@ -54,10 +54,10 @@ class FunctionCall(Expr):
|
|
|
54
54
|
args: list[Expr],
|
|
55
55
|
kwargs: dict[str, Expr],
|
|
56
56
|
return_type: ts.ColumnType,
|
|
57
|
-
order_by_clause:
|
|
58
|
-
group_by_clause:
|
|
57
|
+
order_by_clause: list[Any] | None = None,
|
|
58
|
+
group_by_clause: list[Any] | None = None,
|
|
59
59
|
is_method_call: bool = False,
|
|
60
|
-
validation_error:
|
|
60
|
+
validation_error: str | None = None,
|
|
61
61
|
):
|
|
62
62
|
assert not fn.is_polymorphic
|
|
63
63
|
assert all(isinstance(arg, Expr) for arg in args)
|
|
@@ -115,6 +115,7 @@ class FunctionCall(Expr):
|
|
|
115
115
|
self._validation_error = validation_error
|
|
116
116
|
|
|
117
117
|
if validation_error is not None:
|
|
118
|
+
self.bound_idxs = {}
|
|
118
119
|
self.resource_pool = None
|
|
119
120
|
return
|
|
120
121
|
|
|
@@ -148,7 +149,7 @@ class FunctionCall(Expr):
|
|
|
148
149
|
target = tbl._tbl_version_path.tbl_version
|
|
149
150
|
return [RowidRef(target, i) for i in range(target.get().num_rowid_columns())]
|
|
150
151
|
|
|
151
|
-
def default_column_name(self) ->
|
|
152
|
+
def default_column_name(self) -> str | None:
|
|
152
153
|
return self.fn.name
|
|
153
154
|
|
|
154
155
|
def _equals(self, other: FunctionCall) -> bool:
|
|
@@ -177,7 +178,7 @@ class FunctionCall(Expr):
|
|
|
177
178
|
return self.display_str()
|
|
178
179
|
|
|
179
180
|
@property
|
|
180
|
-
def validation_error(self) ->
|
|
181
|
+
def validation_error(self) -> str | None:
|
|
181
182
|
return self._validation_error or super().validation_error
|
|
182
183
|
|
|
183
184
|
def display_str(self, inline: bool = True) -> str:
|
|
@@ -244,7 +245,7 @@ class FunctionCall(Expr):
|
|
|
244
245
|
assert self.is_agg_fn_call
|
|
245
246
|
return self.order_by
|
|
246
247
|
|
|
247
|
-
def sql_expr(self, sql_elements: SqlElementCache) ->
|
|
248
|
+
def sql_expr(self, sql_elements: SqlElementCache) -> sql.ColumnElement | None:
|
|
248
249
|
assert self.is_valid
|
|
249
250
|
|
|
250
251
|
# we currently can't translate aggregate functions with grouping and/or ordering to SQL
|
|
@@ -300,8 +301,16 @@ class FunctionCall(Expr):
|
|
|
300
301
|
"""
|
|
301
302
|
res = super().substitute(spec)
|
|
302
303
|
assert res is self
|
|
303
|
-
|
|
304
|
-
|
|
304
|
+
if self.is_valid:
|
|
305
|
+
# If this FunctionCall is valid, re-evaluate the call_return_type of the substituted expression. If the
|
|
306
|
+
# FunctionCall is not valid, it isn't safe to do this. (Really we should be asserting that it *is* valid,
|
|
307
|
+
# but we still need to be able to do substitutions on invalid FunctionCalls, because loading an
|
|
308
|
+
# EmbeddingIndex from the db involves reconstructing the requisite (substituted) FunctionCalls. We could
|
|
309
|
+
# fix this by separately persisting the FunctionCall instances held by EmbeddingIndex to the db. That's
|
|
310
|
+
# probably a good idea, but it's also probably not urgent, since it only affects Functions that have a
|
|
311
|
+
# conditional_return_type implemented.)
|
|
312
|
+
self.return_type = self.fn.call_return_type(self.bound_args)
|
|
313
|
+
self.col_type = self.return_type
|
|
305
314
|
return self
|
|
306
315
|
|
|
307
316
|
def update(self, data_row: DataRow) -> None:
|
|
@@ -312,7 +321,7 @@ class FunctionCall(Expr):
|
|
|
312
321
|
args, kwargs = self.make_args(data_row)
|
|
313
322
|
self.aggregator.update(*args, **kwargs)
|
|
314
323
|
|
|
315
|
-
def make_args(self, data_row: DataRow) ->
|
|
324
|
+
def make_args(self, data_row: DataRow) -> tuple[list[Any], dict[str, Any]] | None:
|
|
316
325
|
"""Return args and kwargs, constructed for data_row; returns None if any non-nullable arg is None."""
|
|
317
326
|
args: list[Any] = []
|
|
318
327
|
parameters_by_pos = self.fn.signature.parameters_by_pos
|
|
@@ -439,18 +448,18 @@ class FunctionCall(Expr):
|
|
|
439
448
|
group_by_exprs = components[group_by_start_idx:group_by_stop_idx]
|
|
440
449
|
order_by_exprs = components[order_by_start_idx:]
|
|
441
450
|
|
|
442
|
-
validation_error:
|
|
451
|
+
validation_error: str | None = None
|
|
443
452
|
|
|
444
453
|
if isinstance(fn, func.InvalidFunction):
|
|
445
454
|
validation_error = (
|
|
446
455
|
dedent(
|
|
447
456
|
f"""
|
|
448
457
|
The UDF '{fn.self_path}' cannot be located, because
|
|
449
|
-
{{
|
|
458
|
+
{{error_msg}}
|
|
450
459
|
"""
|
|
451
460
|
)
|
|
452
461
|
.strip()
|
|
453
|
-
.format(
|
|
462
|
+
.format(error_msg=fn.error_msg)
|
|
454
463
|
)
|
|
455
464
|
return cls(fn, args, kwargs, return_type, is_method_call=is_method_call, validation_error=validation_error)
|
|
456
465
|
|
|
@@ -480,25 +489,54 @@ class FunctionCall(Expr):
|
|
|
480
489
|
).strip()
|
|
481
490
|
else:
|
|
482
491
|
# Evaluate the call_return_type as defined in the current codebase.
|
|
483
|
-
call_return_type =
|
|
484
|
-
|
|
485
|
-
|
|
486
|
-
#
|
|
487
|
-
#
|
|
488
|
-
#
|
|
489
|
-
|
|
490
|
-
|
|
491
|
-
|
|
492
|
-
|
|
493
|
-
|
|
494
|
-
|
|
495
|
-
|
|
496
|
-
|
|
497
|
-
|
|
498
|
-
|
|
499
|
-
|
|
500
|
-
|
|
501
|
-
|
|
492
|
+
call_return_type: ts.ColumnType | None = None
|
|
493
|
+
|
|
494
|
+
if isinstance(resolved_fn, func.ExprTemplateFunction) and not resolved_fn.template.expr.is_valid:
|
|
495
|
+
# The FunctionCall is based on an ExprTemplateFunction, but the template expression is not valid
|
|
496
|
+
# (because it in turn contains an invalid FunctionCall). In this case, inherit the validation error
|
|
497
|
+
# from the template expression.
|
|
498
|
+
validation_error = resolved_fn.template.expr.validation_error
|
|
499
|
+
else:
|
|
500
|
+
try:
|
|
501
|
+
call_return_type = resolved_fn.call_return_type(bound_args)
|
|
502
|
+
except ImportError as exc:
|
|
503
|
+
validation_error = dedent(
|
|
504
|
+
f"""
|
|
505
|
+
A UDF call to {fn.self_path!r} could not be fully resolved, because a module required
|
|
506
|
+
by the UDF could not be imported:
|
|
507
|
+
{exc}
|
|
508
|
+
"""
|
|
509
|
+
)
|
|
510
|
+
|
|
511
|
+
assert (call_return_type is None) != (validation_error is None)
|
|
512
|
+
|
|
513
|
+
if call_return_type is None and return_type is None:
|
|
514
|
+
# Schema versions prior to 25 did not store the return_type in metadata, and there is no obvious
|
|
515
|
+
# way to infer it during DB migration, so we might encounter a stored return_type of None. If the
|
|
516
|
+
# resolution of call_return_type also fails, then we're out of luck; we have no choice but to
|
|
517
|
+
# fail-fast.
|
|
518
|
+
raise excs.Error(validation_error)
|
|
519
|
+
|
|
520
|
+
if call_return_type is not None:
|
|
521
|
+
# call_return_type resolution succeeded.
|
|
522
|
+
if return_type is None:
|
|
523
|
+
# Schema versions prior to 25 did not store the return_type in metadata (as mentioned above), so
|
|
524
|
+
# fall back on the call_return_type.
|
|
525
|
+
return_type = call_return_type
|
|
526
|
+
elif not return_type.is_supertype_of(call_return_type, ignore_nullable=True):
|
|
527
|
+
# There is a return_type stored in metadata (schema version >= 25),
|
|
528
|
+
# and the stored return_type of the UDF call doesn't match the column type of the FunctionCall.
|
|
529
|
+
validation_error = dedent(
|
|
530
|
+
f"""
|
|
531
|
+
The return type stored in the database for a UDF call to {fn.self_path!r} no longer
|
|
532
|
+
matches its return type as currently defined in the code. This probably means that the
|
|
533
|
+
code for {fn.self_path!r} has changed in a backward-incompatible way.
|
|
534
|
+
Return type of UDF call in the database: {return_type}
|
|
535
|
+
Return type of UDF as currently defined in code: {call_return_type}
|
|
536
|
+
"""
|
|
537
|
+
).strip()
|
|
538
|
+
|
|
539
|
+
assert return_type is not None # Guaranteed by the above logic.
|
|
502
540
|
|
|
503
541
|
fn_call = cls(
|
|
504
542
|
resolved_fn,
|
pixeltable/exprs/globals.py
CHANGED
|
@@ -2,10 +2,9 @@ from __future__ import annotations
|
|
|
2
2
|
|
|
3
3
|
import datetime
|
|
4
4
|
import enum
|
|
5
|
-
from typing import Union
|
|
6
5
|
|
|
7
6
|
# Python types corresponding to our literal types
|
|
8
|
-
LiteralPythonTypes =
|
|
7
|
+
LiteralPythonTypes = str | int | float | bool | datetime.datetime | datetime.date
|
|
9
8
|
|
|
10
9
|
|
|
11
10
|
def print_slice(s: slice) -> str:
|
pixeltable/exprs/in_predicate.py
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
from __future__ import annotations
|
|
2
2
|
|
|
3
|
-
from typing import Any, Iterable
|
|
3
|
+
from typing import Any, Iterable
|
|
4
4
|
|
|
5
5
|
import sqlalchemy as sql
|
|
6
6
|
|
|
@@ -16,13 +16,13 @@ from .sql_element_cache import SqlElementCache
|
|
|
16
16
|
class InPredicate(Expr):
|
|
17
17
|
"""Predicate corresponding to the SQL IN operator."""
|
|
18
18
|
|
|
19
|
-
def __init__(self, lhs: Expr, value_set_literal:
|
|
19
|
+
def __init__(self, lhs: Expr, value_set_literal: Iterable | None = None, value_set_expr: Expr | None = None):
|
|
20
20
|
assert (value_set_literal is None) != (value_set_expr is None)
|
|
21
21
|
if not lhs.col_type.is_scalar_type():
|
|
22
22
|
raise excs.Error(f'isin(): only supported for scalar types, not {lhs.col_type}')
|
|
23
23
|
super().__init__(ts.BoolType())
|
|
24
24
|
|
|
25
|
-
self.value_list:
|
|
25
|
+
self.value_list: list | None = None # only contains values of the correct type
|
|
26
26
|
if value_set_expr is not None:
|
|
27
27
|
if not value_set_expr.col_type.is_json_type():
|
|
28
28
|
raise excs.Error(
|
|
@@ -73,7 +73,7 @@ class InPredicate(Expr):
|
|
|
73
73
|
def _id_attrs(self) -> list[tuple[str, Any]]:
|
|
74
74
|
return [*super()._id_attrs(), ('value_list', self.value_list)]
|
|
75
75
|
|
|
76
|
-
def sql_expr(self, sql_elements: SqlElementCache) ->
|
|
76
|
+
def sql_expr(self, sql_elements: SqlElementCache) -> sql.ColumnElement | None:
|
|
77
77
|
lhs_sql_exprs = sql_elements.get(self.components[0])
|
|
78
78
|
if lhs_sql_exprs is None or self.value_list is None:
|
|
79
79
|
return None
|
pixeltable/exprs/inline_expr.py
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
from __future__ import annotations
|
|
2
2
|
|
|
3
|
-
from typing import Any, Iterable
|
|
3
|
+
from typing import Any, Iterable
|
|
4
4
|
|
|
5
5
|
import numpy as np
|
|
6
6
|
import sqlalchemy as sql
|
|
@@ -30,7 +30,7 @@ class InlineArray(Expr):
|
|
|
30
30
|
else:
|
|
31
31
|
exprs.append(Literal(el))
|
|
32
32
|
|
|
33
|
-
inferred_element_type:
|
|
33
|
+
inferred_element_type: ts.ColumnType | None = ts.InvalidType()
|
|
34
34
|
for i, expr in enumerate(exprs):
|
|
35
35
|
supertype = inferred_element_type.supertype(expr.col_type)
|
|
36
36
|
if supertype is None:
|
|
@@ -61,7 +61,7 @@ class InlineArray(Expr):
|
|
|
61
61
|
def _equals(self, _: InlineArray) -> bool:
|
|
62
62
|
return True # Always true if components match
|
|
63
63
|
|
|
64
|
-
def sql_expr(self, _: SqlElementCache) ->
|
|
64
|
+
def sql_expr(self, _: SqlElementCache) -> sql.ColumnElement | None:
|
|
65
65
|
return None
|
|
66
66
|
|
|
67
67
|
def eval(self, data_row: DataRow, row_builder: RowBuilder) -> None:
|
|
@@ -81,7 +81,7 @@ class InlineArray(Expr):
|
|
|
81
81
|
# loaded and their types are known.
|
|
82
82
|
return InlineList(components) # type: ignore[return-value]
|
|
83
83
|
|
|
84
|
-
def as_literal(self) ->
|
|
84
|
+
def as_literal(self) -> Literal | None:
|
|
85
85
|
assert isinstance(self.col_type, ts.ArrayType)
|
|
86
86
|
if not all(isinstance(comp, Literal) for comp in self.components):
|
|
87
87
|
return None
|
|
@@ -98,13 +98,7 @@ class InlineList(Expr):
|
|
|
98
98
|
def __init__(self, elements: Iterable):
|
|
99
99
|
exprs = [Expr.from_object(el) for el in elements]
|
|
100
100
|
|
|
101
|
-
|
|
102
|
-
'type': 'array',
|
|
103
|
-
'prefixItems': [expr.col_type.to_json_schema() for expr in exprs],
|
|
104
|
-
'items': False, # No additional items (fixed length)
|
|
105
|
-
}
|
|
106
|
-
|
|
107
|
-
super().__init__(ts.JsonType(json_schema))
|
|
101
|
+
super().__init__(ts.JsonType())
|
|
108
102
|
self.components.extend(exprs)
|
|
109
103
|
self.id = self._create_id()
|
|
110
104
|
|
|
@@ -115,7 +109,7 @@ class InlineList(Expr):
|
|
|
115
109
|
def _equals(self, _: InlineList) -> bool:
|
|
116
110
|
return True # Always true if components match
|
|
117
111
|
|
|
118
|
-
def sql_expr(self, _: SqlElementCache) ->
|
|
112
|
+
def sql_expr(self, _: SqlElementCache) -> sql.ColumnElement | None:
|
|
119
113
|
return None
|
|
120
114
|
|
|
121
115
|
def eval(self, data_row: DataRow, _: RowBuilder) -> None:
|
|
@@ -128,7 +122,7 @@ class InlineList(Expr):
|
|
|
128
122
|
def _from_dict(cls, _: dict, components: list[Expr]) -> InlineList:
|
|
129
123
|
return cls(components)
|
|
130
124
|
|
|
131
|
-
def as_literal(self) ->
|
|
125
|
+
def as_literal(self) -> Literal | None:
|
|
132
126
|
if not all(isinstance(comp, Literal) for comp in self.components):
|
|
133
127
|
return None
|
|
134
128
|
return Literal([c.as_literal().val for c in self.components], self.col_type)
|
|
@@ -150,18 +144,7 @@ class InlineDict(Expr):
|
|
|
150
144
|
self.keys.append(key)
|
|
151
145
|
exprs.append(Expr.from_object(val))
|
|
152
146
|
|
|
153
|
-
|
|
154
|
-
try:
|
|
155
|
-
json_schema = {
|
|
156
|
-
'type': 'object',
|
|
157
|
-
'properties': {key: expr.col_type.to_json_schema() for key, expr in zip(self.keys, exprs)},
|
|
158
|
-
}
|
|
159
|
-
except excs.Error:
|
|
160
|
-
# InlineDicts are used to store iterator arguments, which are not required to be valid JSON types,
|
|
161
|
-
# so we can't always construct a valid schema.
|
|
162
|
-
json_schema = None
|
|
163
|
-
|
|
164
|
-
super().__init__(ts.JsonType(json_schema))
|
|
147
|
+
super().__init__(ts.JsonType())
|
|
165
148
|
self.components.extend(exprs)
|
|
166
149
|
self.id = self._create_id()
|
|
167
150
|
|
|
@@ -176,7 +159,7 @@ class InlineDict(Expr):
|
|
|
176
159
|
def _id_attrs(self) -> list[tuple[str, Any]]:
|
|
177
160
|
return [*super()._id_attrs(), ('keys', self.keys)]
|
|
178
161
|
|
|
179
|
-
def sql_expr(self, _: SqlElementCache) ->
|
|
162
|
+
def sql_expr(self, _: SqlElementCache) -> sql.ColumnElement | None:
|
|
180
163
|
return None
|
|
181
164
|
|
|
182
165
|
def eval(self, data_row: DataRow, _: RowBuilder) -> None:
|
|
@@ -208,7 +191,7 @@ class InlineDict(Expr):
|
|
|
208
191
|
arg = dict(zip(d['keys'], components))
|
|
209
192
|
return InlineDict(arg)
|
|
210
193
|
|
|
211
|
-
def as_literal(self) ->
|
|
194
|
+
def as_literal(self) -> Literal | None:
|
|
212
195
|
if not all(isinstance(comp, Literal) for comp in self.components):
|
|
213
196
|
return None
|
|
214
197
|
return Literal(dict(zip(self.keys, (c.as_literal().val for c in self.components))), self.col_type)
|
pixeltable/exprs/is_null.py
CHANGED
|
@@ -1,7 +1,5 @@
|
|
|
1
1
|
from __future__ import annotations
|
|
2
2
|
|
|
3
|
-
from typing import Optional
|
|
4
|
-
|
|
5
3
|
import sqlalchemy as sql
|
|
6
4
|
|
|
7
5
|
import pixeltable.type_system as ts
|
|
@@ -24,7 +22,7 @@ class IsNull(Expr):
|
|
|
24
22
|
def _equals(self, other: IsNull) -> bool:
|
|
25
23
|
return True
|
|
26
24
|
|
|
27
|
-
def sql_expr(self, sql_elements: SqlElementCache) ->
|
|
25
|
+
def sql_expr(self, sql_elements: SqlElementCache) -> sql.ColumnElement | None:
|
|
28
26
|
e = sql_elements.get(self.components[0])
|
|
29
27
|
if e is None:
|
|
30
28
|
return None
|
pixeltable/exprs/json_mapper.py
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
from __future__ import annotations
|
|
2
2
|
|
|
3
|
-
from typing import TYPE_CHECKING
|
|
3
|
+
from typing import TYPE_CHECKING
|
|
4
4
|
|
|
5
5
|
import sqlalchemy as sql
|
|
6
6
|
|
|
@@ -29,10 +29,10 @@ class JsonMapper(Expr):
|
|
|
29
29
|
"""
|
|
30
30
|
|
|
31
31
|
target_expr_scope: ExprScope
|
|
32
|
-
parent_mapper:
|
|
33
|
-
target_expr_eval_ctx:
|
|
32
|
+
parent_mapper: JsonMapper | None
|
|
33
|
+
target_expr_eval_ctx: RowBuilder.EvalCtx | None
|
|
34
34
|
|
|
35
|
-
def __init__(self, src_expr:
|
|
35
|
+
def __init__(self, src_expr: Expr | None, target_expr: Expr | None):
|
|
36
36
|
# TODO: type spec should be list[target_expr.col_type]
|
|
37
37
|
super().__init__(ts.JsonType())
|
|
38
38
|
|
|
@@ -54,7 +54,7 @@ class JsonMapper(Expr):
|
|
|
54
54
|
def _equals(self, _: JsonMapper) -> bool:
|
|
55
55
|
return True
|
|
56
56
|
|
|
57
|
-
def sql_expr(self, _: SqlElementCache) ->
|
|
57
|
+
def sql_expr(self, _: SqlElementCache) -> sql.ColumnElement | None:
|
|
58
58
|
return None
|
|
59
59
|
|
|
60
60
|
def eval(self, data_row: DataRow, row_builder: RowBuilder) -> None:
|
|
@@ -92,8 +92,8 @@ class JsonMapperDispatch(Expr):
|
|
|
92
92
|
"""
|
|
93
93
|
|
|
94
94
|
target_expr_scope: ExprScope
|
|
95
|
-
parent_mapper:
|
|
96
|
-
target_expr_eval_ctx:
|
|
95
|
+
parent_mapper: JsonMapperDispatch | None
|
|
96
|
+
target_expr_eval_ctx: RowBuilder.EvalCtx | None
|
|
97
97
|
|
|
98
98
|
def __init__(self, src_expr: Expr, target_expr: Expr):
|
|
99
99
|
super().__init__(ts.InvalidType())
|
|
@@ -116,7 +116,7 @@ class JsonMapperDispatch(Expr):
|
|
|
116
116
|
scope_anchor = ObjectRef(self.target_expr_scope, self)
|
|
117
117
|
self.components.append(scope_anchor)
|
|
118
118
|
|
|
119
|
-
def _bind_rel_paths(self, mapper:
|
|
119
|
+
def _bind_rel_paths(self, mapper: JsonMapperDispatch | None = None) -> None:
|
|
120
120
|
self.src_expr._bind_rel_paths(mapper)
|
|
121
121
|
self.target_expr._bind_rel_paths(self)
|
|
122
122
|
self.parent_mapper = mapper
|
pixeltable/exprs/json_path.py
CHANGED
|
@@ -1,12 +1,15 @@
|
|
|
1
1
|
from __future__ import annotations
|
|
2
2
|
|
|
3
|
-
|
|
3
|
+
import io
|
|
4
|
+
from pathlib import Path
|
|
5
|
+
from typing import Any
|
|
4
6
|
|
|
5
7
|
import jmespath
|
|
6
8
|
import sqlalchemy as sql
|
|
7
9
|
|
|
8
10
|
from pixeltable import catalog, exceptions as excs, type_system as ts
|
|
9
11
|
|
|
12
|
+
from .column_ref import ColumnRef
|
|
10
13
|
from .data_row import DataRow
|
|
11
14
|
from .expr import Expr
|
|
12
15
|
from .globals import print_slice
|
|
@@ -17,29 +20,41 @@ from .sql_element_cache import SqlElementCache
|
|
|
17
20
|
|
|
18
21
|
|
|
19
22
|
class JsonPath(Expr):
|
|
23
|
+
"""
|
|
24
|
+
anchor can be None, in which case this is a relative JsonPath and the anchor is set later via set_anchor().
|
|
25
|
+
scope_idx: for relative paths, index of referenced JsonMapper
|
|
26
|
+
(0: indicates the immediately preceding JsonMapper, -1: the parent of the immediately preceding mapper, ...)
|
|
27
|
+
"""
|
|
28
|
+
|
|
29
|
+
path_elements: list[str | int | slice]
|
|
30
|
+
compiled_path: jmespath.parser.ParsedResult | None
|
|
31
|
+
scope_idx: int
|
|
32
|
+
file_handles: dict[Path, io.BufferedReader] # key: file path
|
|
33
|
+
|
|
20
34
|
def __init__(
|
|
21
|
-
self, anchor:
|
|
35
|
+
self, anchor: Expr | None, path_elements: list[str | int | slice] | None = None, scope_idx: int = 0
|
|
22
36
|
) -> None:
|
|
23
|
-
"""
|
|
24
|
-
anchor can be None, in which case this is a relative JsonPath and the anchor is set later via set_anchor().
|
|
25
|
-
scope_idx: for relative paths, index of referenced JsonMapper
|
|
26
|
-
(0: indicates the immediately preceding JsonMapper, -1: the parent of the immediately preceding mapper, ...)
|
|
27
|
-
"""
|
|
28
37
|
if path_elements is None:
|
|
29
38
|
path_elements = []
|
|
30
39
|
super().__init__(ts.JsonType(nullable=True)) # JsonPath expressions are always nullable
|
|
31
40
|
if anchor is not None:
|
|
32
41
|
self.components = [anchor]
|
|
33
|
-
self.path_elements
|
|
42
|
+
self.path_elements = path_elements
|
|
34
43
|
self.compiled_path = jmespath.compile(self._json_path()) if len(path_elements) > 0 else None
|
|
35
44
|
self.scope_idx = scope_idx
|
|
36
45
|
# NOTE: the _create_id() result will change if set_anchor() gets called;
|
|
37
46
|
# this is not a problem, because _create_id() shouldn't be called after init()
|
|
38
47
|
self.id = self._create_id()
|
|
48
|
+
self.file_handles = {}
|
|
49
|
+
|
|
50
|
+
def release(self) -> None:
|
|
51
|
+
for fh in self.file_handles.values():
|
|
52
|
+
fh.close()
|
|
53
|
+
self.file_handles.clear()
|
|
39
54
|
|
|
40
55
|
def __repr__(self) -> str:
|
|
41
56
|
# else 'R': the anchor is RELATIVE_PATH_ROOT
|
|
42
|
-
anchor_str = str(self.
|
|
57
|
+
anchor_str = str(self.anchor) if self.anchor is not None else 'R'
|
|
43
58
|
if len(self.path_elements) == 0:
|
|
44
59
|
return anchor_str
|
|
45
60
|
return f'{anchor_str}{"." if isinstance(self.path_elements[0], str) else ""}{self._json_path()}'
|
|
@@ -66,7 +81,7 @@ class JsonPath(Expr):
|
|
|
66
81
|
return cls(anchor, path_elements, d['scope_idx'])
|
|
67
82
|
|
|
68
83
|
@property
|
|
69
|
-
def
|
|
84
|
+
def anchor(self) -> Expr | None:
|
|
70
85
|
return None if len(self.components) == 0 else self.components[0]
|
|
71
86
|
|
|
72
87
|
def set_anchor(self, anchor: Expr) -> None:
|
|
@@ -74,17 +89,17 @@ class JsonPath(Expr):
|
|
|
74
89
|
self.components = [anchor]
|
|
75
90
|
|
|
76
91
|
def is_relative_path(self) -> bool:
|
|
77
|
-
return self.
|
|
92
|
+
return self.anchor is None
|
|
78
93
|
|
|
79
94
|
def _has_relative_path(self) -> bool:
|
|
80
95
|
return self.is_relative_path() or super()._has_relative_path()
|
|
81
96
|
|
|
82
|
-
def _bind_rel_paths(self, mapper:
|
|
97
|
+
def _bind_rel_paths(self, mapper: 'JsonMapperDispatch' | None = None) -> None:
|
|
83
98
|
if self.is_relative_path():
|
|
84
99
|
# TODO: take scope_idx into account
|
|
85
100
|
self.set_anchor(mapper.scope_anchor)
|
|
86
101
|
else:
|
|
87
|
-
self.
|
|
102
|
+
self.anchor._bind_rel_paths(mapper)
|
|
88
103
|
|
|
89
104
|
def __call__(self, *args: object, **kwargs: object) -> 'JsonPath':
|
|
90
105
|
"""
|
|
@@ -98,15 +113,15 @@ class JsonPath(Expr):
|
|
|
98
113
|
|
|
99
114
|
def __getattr__(self, name: str) -> 'JsonPath':
|
|
100
115
|
assert isinstance(name, str)
|
|
101
|
-
return JsonPath(self.
|
|
116
|
+
return JsonPath(self.anchor, [*self.path_elements, name])
|
|
102
117
|
|
|
103
118
|
def __getitem__(self, index: object) -> 'JsonPath':
|
|
104
119
|
if isinstance(index, (int, slice, str)):
|
|
105
|
-
return JsonPath(self.
|
|
120
|
+
return JsonPath(self.anchor, [*self.path_elements, index])
|
|
106
121
|
raise excs.Error(f'Invalid json list index: {index}')
|
|
107
122
|
|
|
108
|
-
def default_column_name(self) ->
|
|
109
|
-
anchor_name = self.
|
|
123
|
+
def default_column_name(self) -> str | None:
|
|
124
|
+
anchor_name = self.anchor.default_column_name() if self.anchor is not None else ''
|
|
110
125
|
ret_name = f'{anchor_name}.{self._json_path()}'
|
|
111
126
|
|
|
112
127
|
def cleanup_char(s: str) -> str:
|
|
@@ -133,7 +148,7 @@ class JsonPath(Expr):
|
|
|
133
148
|
def _id_attrs(self) -> list[tuple[str, Any]]:
|
|
134
149
|
return [*super()._id_attrs(), ('path_elements', self.path_elements)]
|
|
135
150
|
|
|
136
|
-
def sql_expr(self, _: SqlElementCache) ->
|
|
151
|
+
def sql_expr(self, _: SqlElementCache) -> sql.ColumnElement | None:
|
|
137
152
|
"""
|
|
138
153
|
Postgres appears to have a bug: jsonb_path_query('{a: [{b: 0}, {b: 1}]}', '$.a.b') returns
|
|
139
154
|
*two* rows (each containing col val 0), not a single row with [0, 0].
|
|
@@ -158,12 +173,31 @@ class JsonPath(Expr):
|
|
|
158
173
|
result.append(f'[{print_slice(element)}]')
|
|
159
174
|
return ''.join(result)
|
|
160
175
|
|
|
161
|
-
def eval(self,
|
|
162
|
-
assert self.
|
|
163
|
-
val =
|
|
176
|
+
def eval(self, row: DataRow, row_builder: RowBuilder) -> None:
|
|
177
|
+
assert self.anchor is not None, self
|
|
178
|
+
val = row[self.anchor.slot_idx]
|
|
164
179
|
if self.compiled_path is not None:
|
|
165
180
|
val = self.compiled_path.search(val)
|
|
166
|
-
|
|
181
|
+
row[self.slot_idx] = val
|
|
182
|
+
if val is None or self.anchor is None or not isinstance(self.anchor, ColumnRef):
|
|
183
|
+
return
|
|
184
|
+
|
|
185
|
+
# the origin of val is a json-typed column, which might stored inlined objects
|
|
186
|
+
if self.anchor.slot_idx not in row.slot_md:
|
|
187
|
+
# we can infer that there aren't any inlined objects because our execution plan doesn't include
|
|
188
|
+
# materializing the cellmd (eg, insert plans)
|
|
189
|
+
# TODO: have the planner pass that fact into ExprEvalNode explicitly to streamline this path a bit more
|
|
190
|
+
return
|
|
191
|
+
|
|
192
|
+
# defer import until it's needed
|
|
193
|
+
from pixeltable.exec.cell_reconstruction_node import json_has_inlined_objs, reconstruct_json
|
|
194
|
+
|
|
195
|
+
cell_md = row.slot_md[self.anchor.slot_idx]
|
|
196
|
+
if cell_md is None or cell_md.file_urls is None or not json_has_inlined_objs(val):
|
|
197
|
+
# val doesn't contain inlined objects
|
|
198
|
+
return
|
|
199
|
+
|
|
200
|
+
row.vals[self.slot_idx] = reconstruct_json(val, cell_md.file_urls, self.file_handles)
|
|
167
201
|
|
|
168
202
|
|
|
169
203
|
RELATIVE_PATH_ROOT = JsonPath(None)
|