pixeltable 0.2.26__py3-none-any.whl → 0.5.7__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- pixeltable/__init__.py +83 -19
- pixeltable/_query.py +1444 -0
- pixeltable/_version.py +1 -0
- pixeltable/catalog/__init__.py +7 -4
- pixeltable/catalog/catalog.py +2394 -119
- pixeltable/catalog/column.py +225 -104
- pixeltable/catalog/dir.py +38 -9
- pixeltable/catalog/globals.py +53 -34
- pixeltable/catalog/insertable_table.py +265 -115
- pixeltable/catalog/path.py +80 -17
- pixeltable/catalog/schema_object.py +28 -43
- pixeltable/catalog/table.py +1270 -677
- pixeltable/catalog/table_metadata.py +103 -0
- pixeltable/catalog/table_version.py +1270 -751
- pixeltable/catalog/table_version_handle.py +109 -0
- pixeltable/catalog/table_version_path.py +137 -42
- pixeltable/catalog/tbl_ops.py +53 -0
- pixeltable/catalog/update_status.py +191 -0
- pixeltable/catalog/view.py +251 -134
- pixeltable/config.py +215 -0
- pixeltable/env.py +736 -285
- pixeltable/exceptions.py +26 -2
- pixeltable/exec/__init__.py +7 -2
- pixeltable/exec/aggregation_node.py +39 -21
- pixeltable/exec/cache_prefetch_node.py +87 -109
- pixeltable/exec/cell_materialization_node.py +268 -0
- pixeltable/exec/cell_reconstruction_node.py +168 -0
- pixeltable/exec/component_iteration_node.py +25 -28
- pixeltable/exec/data_row_batch.py +11 -46
- pixeltable/exec/exec_context.py +26 -11
- pixeltable/exec/exec_node.py +35 -27
- pixeltable/exec/expr_eval/__init__.py +3 -0
- pixeltable/exec/expr_eval/evaluators.py +365 -0
- pixeltable/exec/expr_eval/expr_eval_node.py +413 -0
- pixeltable/exec/expr_eval/globals.py +200 -0
- pixeltable/exec/expr_eval/row_buffer.py +74 -0
- pixeltable/exec/expr_eval/schedulers.py +413 -0
- pixeltable/exec/globals.py +35 -0
- pixeltable/exec/in_memory_data_node.py +35 -27
- pixeltable/exec/object_store_save_node.py +293 -0
- pixeltable/exec/row_update_node.py +44 -29
- pixeltable/exec/sql_node.py +414 -115
- pixeltable/exprs/__init__.py +8 -5
- pixeltable/exprs/arithmetic_expr.py +79 -45
- pixeltable/exprs/array_slice.py +5 -5
- pixeltable/exprs/column_property_ref.py +40 -26
- pixeltable/exprs/column_ref.py +254 -61
- pixeltable/exprs/comparison.py +14 -9
- pixeltable/exprs/compound_predicate.py +9 -10
- pixeltable/exprs/data_row.py +213 -72
- pixeltable/exprs/expr.py +270 -104
- pixeltable/exprs/expr_dict.py +6 -5
- pixeltable/exprs/expr_set.py +20 -11
- pixeltable/exprs/function_call.py +383 -284
- pixeltable/exprs/globals.py +18 -5
- pixeltable/exprs/in_predicate.py +7 -7
- pixeltable/exprs/inline_expr.py +37 -37
- pixeltable/exprs/is_null.py +8 -4
- pixeltable/exprs/json_mapper.py +120 -54
- pixeltable/exprs/json_path.py +90 -60
- pixeltable/exprs/literal.py +61 -16
- pixeltable/exprs/method_ref.py +7 -6
- pixeltable/exprs/object_ref.py +19 -8
- pixeltable/exprs/row_builder.py +238 -75
- pixeltable/exprs/rowid_ref.py +53 -15
- pixeltable/exprs/similarity_expr.py +65 -50
- pixeltable/exprs/sql_element_cache.py +5 -5
- pixeltable/exprs/string_op.py +107 -0
- pixeltable/exprs/type_cast.py +25 -13
- pixeltable/exprs/variable.py +2 -2
- pixeltable/func/__init__.py +9 -5
- pixeltable/func/aggregate_function.py +197 -92
- pixeltable/func/callable_function.py +119 -35
- pixeltable/func/expr_template_function.py +101 -48
- pixeltable/func/function.py +375 -62
- pixeltable/func/function_registry.py +20 -19
- pixeltable/func/globals.py +6 -5
- pixeltable/func/mcp.py +74 -0
- pixeltable/func/query_template_function.py +151 -35
- pixeltable/func/signature.py +178 -49
- pixeltable/func/tools.py +164 -0
- pixeltable/func/udf.py +176 -53
- pixeltable/functions/__init__.py +44 -4
- pixeltable/functions/anthropic.py +226 -47
- pixeltable/functions/audio.py +148 -11
- pixeltable/functions/bedrock.py +137 -0
- pixeltable/functions/date.py +188 -0
- pixeltable/functions/deepseek.py +113 -0
- pixeltable/functions/document.py +81 -0
- pixeltable/functions/fal.py +76 -0
- pixeltable/functions/fireworks.py +72 -20
- pixeltable/functions/gemini.py +249 -0
- pixeltable/functions/globals.py +208 -53
- pixeltable/functions/groq.py +108 -0
- pixeltable/functions/huggingface.py +1088 -95
- pixeltable/functions/image.py +155 -84
- pixeltable/functions/json.py +8 -11
- pixeltable/functions/llama_cpp.py +31 -19
- pixeltable/functions/math.py +169 -0
- pixeltable/functions/mistralai.py +50 -75
- pixeltable/functions/net.py +70 -0
- pixeltable/functions/ollama.py +29 -36
- pixeltable/functions/openai.py +548 -160
- pixeltable/functions/openrouter.py +143 -0
- pixeltable/functions/replicate.py +15 -14
- pixeltable/functions/reve.py +250 -0
- pixeltable/functions/string.py +310 -85
- pixeltable/functions/timestamp.py +37 -19
- pixeltable/functions/together.py +77 -120
- pixeltable/functions/twelvelabs.py +188 -0
- pixeltable/functions/util.py +7 -2
- pixeltable/functions/uuid.py +30 -0
- pixeltable/functions/video.py +1528 -117
- pixeltable/functions/vision.py +26 -26
- pixeltable/functions/voyageai.py +289 -0
- pixeltable/functions/whisper.py +19 -10
- pixeltable/functions/whisperx.py +179 -0
- pixeltable/functions/yolox.py +112 -0
- pixeltable/globals.py +716 -236
- pixeltable/index/__init__.py +3 -1
- pixeltable/index/base.py +17 -21
- pixeltable/index/btree.py +32 -22
- pixeltable/index/embedding_index.py +155 -92
- pixeltable/io/__init__.py +12 -7
- pixeltable/io/datarows.py +140 -0
- pixeltable/io/external_store.py +83 -125
- pixeltable/io/fiftyone.py +24 -33
- pixeltable/io/globals.py +47 -182
- pixeltable/io/hf_datasets.py +96 -127
- pixeltable/io/label_studio.py +171 -156
- pixeltable/io/lancedb.py +3 -0
- pixeltable/io/pandas.py +136 -115
- pixeltable/io/parquet.py +40 -153
- pixeltable/io/table_data_conduit.py +702 -0
- pixeltable/io/utils.py +100 -0
- pixeltable/iterators/__init__.py +8 -4
- pixeltable/iterators/audio.py +207 -0
- pixeltable/iterators/base.py +9 -3
- pixeltable/iterators/document.py +144 -87
- pixeltable/iterators/image.py +17 -38
- pixeltable/iterators/string.py +15 -12
- pixeltable/iterators/video.py +523 -127
- pixeltable/metadata/__init__.py +33 -8
- pixeltable/metadata/converters/convert_10.py +2 -3
- pixeltable/metadata/converters/convert_13.py +2 -2
- pixeltable/metadata/converters/convert_15.py +15 -11
- pixeltable/metadata/converters/convert_16.py +4 -5
- pixeltable/metadata/converters/convert_17.py +4 -5
- pixeltable/metadata/converters/convert_18.py +4 -6
- pixeltable/metadata/converters/convert_19.py +6 -9
- pixeltable/metadata/converters/convert_20.py +3 -6
- pixeltable/metadata/converters/convert_21.py +6 -8
- pixeltable/metadata/converters/convert_22.py +3 -2
- pixeltable/metadata/converters/convert_23.py +33 -0
- pixeltable/metadata/converters/convert_24.py +55 -0
- pixeltable/metadata/converters/convert_25.py +19 -0
- pixeltable/metadata/converters/convert_26.py +23 -0
- pixeltable/metadata/converters/convert_27.py +29 -0
- pixeltable/metadata/converters/convert_28.py +13 -0
- pixeltable/metadata/converters/convert_29.py +110 -0
- pixeltable/metadata/converters/convert_30.py +63 -0
- pixeltable/metadata/converters/convert_31.py +11 -0
- pixeltable/metadata/converters/convert_32.py +15 -0
- pixeltable/metadata/converters/convert_33.py +17 -0
- pixeltable/metadata/converters/convert_34.py +21 -0
- pixeltable/metadata/converters/convert_35.py +9 -0
- pixeltable/metadata/converters/convert_36.py +38 -0
- pixeltable/metadata/converters/convert_37.py +15 -0
- pixeltable/metadata/converters/convert_38.py +39 -0
- pixeltable/metadata/converters/convert_39.py +124 -0
- pixeltable/metadata/converters/convert_40.py +73 -0
- pixeltable/metadata/converters/convert_41.py +12 -0
- pixeltable/metadata/converters/convert_42.py +9 -0
- pixeltable/metadata/converters/convert_43.py +44 -0
- pixeltable/metadata/converters/util.py +44 -18
- pixeltable/metadata/notes.py +21 -0
- pixeltable/metadata/schema.py +185 -42
- pixeltable/metadata/utils.py +74 -0
- pixeltable/mypy/__init__.py +3 -0
- pixeltable/mypy/mypy_plugin.py +123 -0
- pixeltable/plan.py +616 -225
- pixeltable/share/__init__.py +3 -0
- pixeltable/share/packager.py +797 -0
- pixeltable/share/protocol/__init__.py +33 -0
- pixeltable/share/protocol/common.py +165 -0
- pixeltable/share/protocol/operation_types.py +33 -0
- pixeltable/share/protocol/replica.py +119 -0
- pixeltable/share/publish.py +349 -0
- pixeltable/store.py +398 -232
- pixeltable/type_system.py +730 -267
- pixeltable/utils/__init__.py +40 -0
- pixeltable/utils/arrow.py +201 -29
- pixeltable/utils/av.py +298 -0
- pixeltable/utils/azure_store.py +346 -0
- pixeltable/utils/coco.py +26 -27
- pixeltable/utils/code.py +4 -4
- pixeltable/utils/console_output.py +46 -0
- pixeltable/utils/coroutine.py +24 -0
- pixeltable/utils/dbms.py +92 -0
- pixeltable/utils/description_helper.py +11 -12
- pixeltable/utils/documents.py +60 -61
- pixeltable/utils/exception_handler.py +36 -0
- pixeltable/utils/filecache.py +38 -22
- pixeltable/utils/formatter.py +88 -51
- pixeltable/utils/gcs_store.py +295 -0
- pixeltable/utils/http.py +133 -0
- pixeltable/utils/http_server.py +14 -13
- pixeltable/utils/iceberg.py +13 -0
- pixeltable/utils/image.py +17 -0
- pixeltable/utils/lancedb.py +90 -0
- pixeltable/utils/local_store.py +322 -0
- pixeltable/utils/misc.py +5 -0
- pixeltable/utils/object_stores.py +573 -0
- pixeltable/utils/pydantic.py +60 -0
- pixeltable/utils/pytorch.py +20 -20
- pixeltable/utils/s3_store.py +527 -0
- pixeltable/utils/sql.py +32 -5
- pixeltable/utils/system.py +30 -0
- pixeltable/utils/transactional_directory.py +4 -3
- pixeltable-0.5.7.dist-info/METADATA +579 -0
- pixeltable-0.5.7.dist-info/RECORD +227 -0
- {pixeltable-0.2.26.dist-info → pixeltable-0.5.7.dist-info}/WHEEL +1 -1
- pixeltable-0.5.7.dist-info/entry_points.txt +2 -0
- pixeltable/__version__.py +0 -3
- pixeltable/catalog/named_function.py +0 -36
- pixeltable/catalog/path_dict.py +0 -141
- pixeltable/dataframe.py +0 -894
- pixeltable/exec/expr_eval_node.py +0 -232
- pixeltable/ext/__init__.py +0 -14
- pixeltable/ext/functions/__init__.py +0 -8
- pixeltable/ext/functions/whisperx.py +0 -77
- pixeltable/ext/functions/yolox.py +0 -157
- pixeltable/tool/create_test_db_dump.py +0 -311
- pixeltable/tool/create_test_video.py +0 -81
- pixeltable/tool/doc_plugins/griffe.py +0 -50
- pixeltable/tool/doc_plugins/mkdocstrings.py +0 -6
- pixeltable/tool/doc_plugins/templates/material/udf.html.jinja +0 -135
- pixeltable/tool/embed_udf.py +0 -9
- pixeltable/tool/mypy_plugin.py +0 -55
- pixeltable/utils/media_store.py +0 -76
- pixeltable/utils/s3.py +0 -16
- pixeltable-0.2.26.dist-info/METADATA +0 -400
- pixeltable-0.2.26.dist-info/RECORD +0 -156
- pixeltable-0.2.26.dist-info/entry_points.txt +0 -3
- {pixeltable-0.2.26.dist-info → pixeltable-0.5.7.dist-info/licenses}/LICENSE +0 -0
pixeltable/exprs/globals.py
CHANGED
|
@@ -2,10 +2,11 @@ from __future__ import annotations
|
|
|
2
2
|
|
|
3
3
|
import datetime
|
|
4
4
|
import enum
|
|
5
|
-
|
|
5
|
+
import uuid
|
|
6
6
|
|
|
7
7
|
# Python types corresponding to our literal types
|
|
8
|
-
LiteralPythonTypes =
|
|
8
|
+
LiteralPythonTypes = str | int | float | bool | datetime.datetime | datetime.date | uuid.UUID
|
|
9
|
+
|
|
9
10
|
|
|
10
11
|
def print_slice(s: slice) -> str:
|
|
11
12
|
start_str = f'{str(s.start) if s.start is not None else ""}'
|
|
@@ -35,7 +36,7 @@ class ComparisonOperator(enum.Enum):
|
|
|
35
36
|
return '>'
|
|
36
37
|
if self == self.GE:
|
|
37
38
|
return '>='
|
|
38
|
-
|
|
39
|
+
raise AssertionError()
|
|
39
40
|
|
|
40
41
|
def reverse(self) -> ComparisonOperator:
|
|
41
42
|
if self == self.LT:
|
|
@@ -61,7 +62,7 @@ class LogicalOperator(enum.Enum):
|
|
|
61
62
|
return '|'
|
|
62
63
|
if self == self.NOT:
|
|
63
64
|
return '~'
|
|
64
|
-
|
|
65
|
+
raise AssertionError()
|
|
65
66
|
|
|
66
67
|
|
|
67
68
|
class ArithmeticOperator(enum.Enum):
|
|
@@ -85,4 +86,16 @@ class ArithmeticOperator(enum.Enum):
|
|
|
85
86
|
return '%'
|
|
86
87
|
if self == self.FLOORDIV:
|
|
87
88
|
return '//'
|
|
88
|
-
|
|
89
|
+
raise AssertionError()
|
|
90
|
+
|
|
91
|
+
|
|
92
|
+
class StringOperator(enum.Enum):
|
|
93
|
+
CONCAT = 0
|
|
94
|
+
REPEAT = 1
|
|
95
|
+
|
|
96
|
+
def __str__(self) -> str:
|
|
97
|
+
if self == self.CONCAT:
|
|
98
|
+
return '+'
|
|
99
|
+
if self == self.REPEAT:
|
|
100
|
+
return '*'
|
|
101
|
+
raise AssertionError()
|
pixeltable/exprs/in_predicate.py
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
from __future__ import annotations
|
|
2
2
|
|
|
3
|
-
from typing import Any, Iterable
|
|
3
|
+
from typing import Any, Iterable
|
|
4
4
|
|
|
5
5
|
import sqlalchemy as sql
|
|
6
6
|
|
|
@@ -16,17 +16,18 @@ from .sql_element_cache import SqlElementCache
|
|
|
16
16
|
class InPredicate(Expr):
|
|
17
17
|
"""Predicate corresponding to the SQL IN operator."""
|
|
18
18
|
|
|
19
|
-
def __init__(self, lhs: Expr, value_set_literal:
|
|
19
|
+
def __init__(self, lhs: Expr, value_set_literal: Iterable | None = None, value_set_expr: Expr | None = None):
|
|
20
20
|
assert (value_set_literal is None) != (value_set_expr is None)
|
|
21
21
|
if not lhs.col_type.is_scalar_type():
|
|
22
22
|
raise excs.Error(f'isin(): only supported for scalar types, not {lhs.col_type}')
|
|
23
23
|
super().__init__(ts.BoolType())
|
|
24
24
|
|
|
25
|
-
self.value_list:
|
|
25
|
+
self.value_list: list | None = None # only contains values of the correct type
|
|
26
26
|
if value_set_expr is not None:
|
|
27
27
|
if not value_set_expr.col_type.is_json_type():
|
|
28
28
|
raise excs.Error(
|
|
29
|
-
f'isin(): argument must have a JSON type, but {value_set_expr} has type {value_set_expr.col_type}'
|
|
29
|
+
f'isin(): argument must have a JSON type, but {value_set_expr} has type {value_set_expr.col_type}'
|
|
30
|
+
)
|
|
30
31
|
self.components = [lhs.copy(), value_set_expr.copy()]
|
|
31
32
|
else:
|
|
32
33
|
assert value_set_literal is not None
|
|
@@ -70,9 +71,9 @@ class InPredicate(Expr):
|
|
|
70
71
|
return self.value_list == other.value_list
|
|
71
72
|
|
|
72
73
|
def _id_attrs(self) -> list[tuple[str, Any]]:
|
|
73
|
-
return super()._id_attrs()
|
|
74
|
+
return [*super()._id_attrs(), ('value_list', self.value_list)]
|
|
74
75
|
|
|
75
|
-
def sql_expr(self, sql_elements: SqlElementCache) ->
|
|
76
|
+
def sql_expr(self, sql_elements: SqlElementCache) -> sql.ColumnElement | None:
|
|
76
77
|
lhs_sql_exprs = sql_elements.get(self.components[0])
|
|
77
78
|
if lhs_sql_exprs is None or self.value_list is None:
|
|
78
79
|
return None
|
|
@@ -95,4 +96,3 @@ class InPredicate(Expr):
|
|
|
95
96
|
assert 'value_list' in d
|
|
96
97
|
assert len(components) <= 2
|
|
97
98
|
return cls(components[0], d['value_list'], components[1] if len(components) == 2 else None)
|
|
98
|
-
|
pixeltable/exprs/inline_expr.py
CHANGED
|
@@ -1,7 +1,6 @@
|
|
|
1
1
|
from __future__ import annotations
|
|
2
2
|
|
|
3
|
-
import
|
|
4
|
-
from typing import Any, Iterable, Optional
|
|
3
|
+
from typing import Any, Iterable
|
|
5
4
|
|
|
6
5
|
import numpy as np
|
|
7
6
|
import sqlalchemy as sql
|
|
@@ -26,14 +25,14 @@ class InlineArray(Expr):
|
|
|
26
25
|
for el in elements:
|
|
27
26
|
if isinstance(el, Expr):
|
|
28
27
|
exprs.append(el)
|
|
29
|
-
elif isinstance(el, list
|
|
30
|
-
exprs.append(
|
|
28
|
+
elif isinstance(el, (list, tuple)):
|
|
29
|
+
exprs.append(Expr.from_array(el))
|
|
31
30
|
else:
|
|
32
31
|
exprs.append(Literal(el))
|
|
33
32
|
|
|
34
|
-
inferred_element_type:
|
|
33
|
+
inferred_element_type: ts.ColumnType | None = ts.InvalidType()
|
|
35
34
|
for i, expr in enumerate(exprs):
|
|
36
|
-
supertype = inferred_element_type.supertype(expr.col_type)
|
|
35
|
+
supertype = inferred_element_type.supertype(expr.col_type, for_inference=True)
|
|
37
36
|
if supertype is None:
|
|
38
37
|
raise excs.Error(
|
|
39
38
|
f'Could not infer element type of array: element of type `{expr.col_type}` at index {i} '
|
|
@@ -45,10 +44,12 @@ class InlineArray(Expr):
|
|
|
45
44
|
col_type = ts.ArrayType((len(exprs),), inferred_element_type)
|
|
46
45
|
elif inferred_element_type.is_array_type():
|
|
47
46
|
assert isinstance(inferred_element_type, ts.ArrayType)
|
|
48
|
-
|
|
49
|
-
|
|
50
|
-
|
|
51
|
-
|
|
47
|
+
dtype = inferred_element_type.dtype
|
|
48
|
+
shape = inferred_element_type.shape
|
|
49
|
+
if shape is not None and dtype is not None:
|
|
50
|
+
col_type = ts.ArrayType(shape=(len(exprs), *shape), dtype=dtype)
|
|
51
|
+
else:
|
|
52
|
+
col_type = ts.ArrayType(shape=None, dtype=dtype)
|
|
52
53
|
else:
|
|
53
54
|
raise excs.Error(f'Element type is not a valid dtype for an array: {inferred_element_type}')
|
|
54
55
|
|
|
@@ -63,7 +64,7 @@ class InlineArray(Expr):
|
|
|
63
64
|
def _equals(self, _: InlineArray) -> bool:
|
|
64
65
|
return True # Always true if components match
|
|
65
66
|
|
|
66
|
-
def sql_expr(self, _: SqlElementCache) ->
|
|
67
|
+
def sql_expr(self, _: SqlElementCache) -> sql.ColumnElement | None:
|
|
67
68
|
return None
|
|
68
69
|
|
|
69
70
|
def eval(self, data_row: DataRow, row_builder: RowBuilder) -> None:
|
|
@@ -83,6 +84,14 @@ class InlineArray(Expr):
|
|
|
83
84
|
# loaded and their types are known.
|
|
84
85
|
return InlineList(components) # type: ignore[return-value]
|
|
85
86
|
|
|
87
|
+
def as_literal(self) -> Literal | None:
|
|
88
|
+
assert isinstance(self.col_type, ts.ArrayType)
|
|
89
|
+
if not all(isinstance(comp, Literal) for comp in self.components):
|
|
90
|
+
return None
|
|
91
|
+
return Literal(
|
|
92
|
+
np.array([c.as_literal().val for c in self.components], dtype=self.col_type.dtype), self.col_type
|
|
93
|
+
)
|
|
94
|
+
|
|
86
95
|
|
|
87
96
|
class InlineList(Expr):
|
|
88
97
|
"""
|
|
@@ -90,16 +99,7 @@ class InlineList(Expr):
|
|
|
90
99
|
"""
|
|
91
100
|
|
|
92
101
|
def __init__(self, elements: Iterable):
|
|
93
|
-
exprs = []
|
|
94
|
-
for el in elements:
|
|
95
|
-
if isinstance(el, Expr):
|
|
96
|
-
exprs.append(el)
|
|
97
|
-
elif isinstance(el, list) or isinstance(el, tuple):
|
|
98
|
-
exprs.append(InlineList(el))
|
|
99
|
-
elif isinstance(el, dict):
|
|
100
|
-
exprs.append(InlineDict(el))
|
|
101
|
-
else:
|
|
102
|
-
exprs.append(Literal(el))
|
|
102
|
+
exprs = [Expr.from_object(el) for el in elements]
|
|
103
103
|
|
|
104
104
|
super().__init__(ts.JsonType())
|
|
105
105
|
self.components.extend(exprs)
|
|
@@ -112,7 +112,7 @@ class InlineList(Expr):
|
|
|
112
112
|
def _equals(self, _: InlineList) -> bool:
|
|
113
113
|
return True # Always true if components match
|
|
114
114
|
|
|
115
|
-
def sql_expr(self, _: SqlElementCache) ->
|
|
115
|
+
def sql_expr(self, _: SqlElementCache) -> sql.ColumnElement | None:
|
|
116
116
|
return None
|
|
117
117
|
|
|
118
118
|
def eval(self, data_row: DataRow, _: RowBuilder) -> None:
|
|
@@ -125,6 +125,11 @@ class InlineList(Expr):
|
|
|
125
125
|
def _from_dict(cls, _: dict, components: list[Expr]) -> InlineList:
|
|
126
126
|
return cls(components)
|
|
127
127
|
|
|
128
|
+
def as_literal(self) -> Literal | None:
|
|
129
|
+
if not all(isinstance(comp, Literal) for comp in self.components):
|
|
130
|
+
return None
|
|
131
|
+
return Literal([c.as_literal().val for c in self.components], self.col_type)
|
|
132
|
+
|
|
128
133
|
|
|
129
134
|
class InlineDict(Expr):
|
|
130
135
|
"""
|
|
@@ -140,21 +145,14 @@ class InlineDict(Expr):
|
|
|
140
145
|
if not isinstance(key, str):
|
|
141
146
|
raise excs.Error(f'Dictionary requires string keys; {key} has type {type(key)}')
|
|
142
147
|
self.keys.append(key)
|
|
143
|
-
|
|
144
|
-
exprs.append(val)
|
|
145
|
-
elif isinstance(val, dict):
|
|
146
|
-
exprs.append(InlineDict(val))
|
|
147
|
-
elif isinstance(val, list) or isinstance(val, tuple):
|
|
148
|
-
exprs.append(InlineList(val))
|
|
149
|
-
else:
|
|
150
|
-
exprs.append(Literal(val))
|
|
148
|
+
exprs.append(Expr.from_object(val))
|
|
151
149
|
|
|
152
150
|
super().__init__(ts.JsonType())
|
|
153
151
|
self.components.extend(exprs)
|
|
154
152
|
self.id = self._create_id()
|
|
155
153
|
|
|
156
154
|
def __repr__(self) -> str:
|
|
157
|
-
item_strs =
|
|
155
|
+
item_strs = [f"'{key}': {expr}" for key, expr in zip(self.keys, self.components)]
|
|
158
156
|
return '{' + ', '.join(item_strs) + '}'
|
|
159
157
|
|
|
160
158
|
def _equals(self, other: InlineDict) -> bool:
|
|
@@ -162,17 +160,14 @@ class InlineDict(Expr):
|
|
|
162
160
|
return self.keys == other.keys
|
|
163
161
|
|
|
164
162
|
def _id_attrs(self) -> list[tuple[str, Any]]:
|
|
165
|
-
return super()._id_attrs()
|
|
163
|
+
return [*super()._id_attrs(), ('keys', self.keys)]
|
|
166
164
|
|
|
167
|
-
def sql_expr(self, _: SqlElementCache) ->
|
|
165
|
+
def sql_expr(self, _: SqlElementCache) -> sql.ColumnElement | None:
|
|
168
166
|
return None
|
|
169
167
|
|
|
170
168
|
def eval(self, data_row: DataRow, _: RowBuilder) -> None:
|
|
171
169
|
assert len(self.keys) == len(self.components)
|
|
172
|
-
data_row[self.slot_idx] = {
|
|
173
|
-
key: data_row[expr.slot_idx]
|
|
174
|
-
for key, expr in zip(self.keys, self.components)
|
|
175
|
-
}
|
|
170
|
+
data_row[self.slot_idx] = {key: data_row[expr.slot_idx] for key, expr in zip(self.keys, self.components)}
|
|
176
171
|
|
|
177
172
|
def to_kwargs(self) -> dict[str, Any]:
|
|
178
173
|
"""Deconstructs this expression into a dictionary by recursively unwrapping all Literals,
|
|
@@ -198,3 +193,8 @@ class InlineDict(Expr):
|
|
|
198
193
|
assert len(d['keys']) == len(components)
|
|
199
194
|
arg = dict(zip(d['keys'], components))
|
|
200
195
|
return InlineDict(arg)
|
|
196
|
+
|
|
197
|
+
def as_literal(self) -> Literal | None:
|
|
198
|
+
if not all(isinstance(comp, Literal) for comp in self.components):
|
|
199
|
+
return None
|
|
200
|
+
return Literal(dict(zip(self.keys, (c.as_literal().val for c in self.components))), self.col_type)
|
pixeltable/exprs/is_null.py
CHANGED
|
@@ -1,11 +1,10 @@
|
|
|
1
1
|
from __future__ import annotations
|
|
2
2
|
|
|
3
|
-
from typing import Optional
|
|
4
|
-
|
|
5
3
|
import sqlalchemy as sql
|
|
6
4
|
|
|
7
5
|
import pixeltable.type_system as ts
|
|
8
6
|
|
|
7
|
+
from .column_ref import ColumnRef
|
|
9
8
|
from .data_row import DataRow
|
|
10
9
|
from .expr import Expr
|
|
11
10
|
from .row_builder import RowBuilder
|
|
@@ -19,12 +18,17 @@ class IsNull(Expr):
|
|
|
19
18
|
self.id = self._create_id()
|
|
20
19
|
|
|
21
20
|
def __repr__(self) -> str:
|
|
22
|
-
return f'{
|
|
21
|
+
return f'{self.components[0]} == None'
|
|
23
22
|
|
|
24
23
|
def _equals(self, other: IsNull) -> bool:
|
|
25
24
|
return True
|
|
26
25
|
|
|
27
|
-
def sql_expr(self, sql_elements: SqlElementCache) ->
|
|
26
|
+
def sql_expr(self, sql_elements: SqlElementCache) -> sql.ColumnElement | None:
|
|
27
|
+
c = self.components[0]
|
|
28
|
+
if isinstance(c, ColumnRef) and c.col.stores_external_array():
|
|
29
|
+
# we also need to check CellMd.file_urls for null
|
|
30
|
+
e = sql.and_(c.col.sa_cellmd_col['file_urls'] == None, c.col.sa_col == None)
|
|
31
|
+
return e
|
|
28
32
|
e = sql_elements.get(self.components[0])
|
|
29
33
|
if e is None:
|
|
30
34
|
return None
|
pixeltable/exprs/json_mapper.py
CHANGED
|
@@ -1,51 +1,143 @@
|
|
|
1
1
|
from __future__ import annotations
|
|
2
2
|
|
|
3
|
-
from typing import
|
|
3
|
+
from typing import TYPE_CHECKING
|
|
4
4
|
|
|
5
5
|
import sqlalchemy as sql
|
|
6
6
|
|
|
7
7
|
import pixeltable.type_system as ts
|
|
8
|
+
|
|
8
9
|
from .data_row import DataRow
|
|
9
|
-
from .expr import Expr, ExprScope
|
|
10
|
+
from .expr import _GLOBAL_SCOPE, Expr, ExprScope
|
|
10
11
|
from .row_builder import RowBuilder
|
|
11
12
|
from .sql_element_cache import SqlElementCache
|
|
12
13
|
|
|
14
|
+
if TYPE_CHECKING:
|
|
15
|
+
from .object_ref import ObjectRef
|
|
16
|
+
|
|
13
17
|
|
|
14
18
|
class JsonMapper(Expr):
|
|
15
19
|
"""
|
|
16
20
|
JsonMapper transforms the list output of a JsonPath by applying a target expr to every element of the list.
|
|
17
21
|
The target expr would typically contain relative JsonPaths, which are bound to an ObjectRef, which in turn
|
|
18
22
|
is populated by JsonMapper.eval(). The JsonMapper effectively creates a new scope for its target expr.
|
|
23
|
+
|
|
24
|
+
JsonMapper is executed in two phases:
|
|
25
|
+
- the first phase is handled by Expr subclass JsonMapperDispatch, which constructs one nested DataRow per source
|
|
26
|
+
list element and evaluates the target expr within that (the nested DataRows are stored as a NestedRowList in the
|
|
27
|
+
slot of JsonMapperDispatch)
|
|
28
|
+
- JsonMapper.eval() collects the slot values of the target expr into its result list
|
|
19
29
|
"""
|
|
20
|
-
|
|
30
|
+
|
|
31
|
+
target_expr_scope: ExprScope
|
|
32
|
+
parent_mapper: JsonMapper | None
|
|
33
|
+
target_expr_eval_ctx: RowBuilder.EvalCtx | None
|
|
34
|
+
|
|
35
|
+
def __init__(self, src_expr: Expr | None, target_expr: Expr | None):
|
|
21
36
|
# TODO: type spec should be list[target_expr.col_type]
|
|
22
37
|
super().__init__(ts.JsonType())
|
|
23
38
|
|
|
39
|
+
dispatch = JsonMapperDispatch(src_expr, target_expr)
|
|
40
|
+
self.components.append(dispatch)
|
|
41
|
+
self.id = self._create_id()
|
|
42
|
+
|
|
43
|
+
def __repr__(self) -> str:
|
|
44
|
+
return f'map({self._src_expr}, lambda R: {self._target_expr})'
|
|
45
|
+
|
|
46
|
+
@property
|
|
47
|
+
def _src_expr(self) -> Expr:
|
|
48
|
+
return self.components[0].src_expr
|
|
49
|
+
|
|
50
|
+
@property
|
|
51
|
+
def _target_expr(self) -> Expr:
|
|
52
|
+
return self.components[0].target_expr
|
|
53
|
+
|
|
54
|
+
def _equals(self, _: JsonMapper) -> bool:
|
|
55
|
+
return True
|
|
56
|
+
|
|
57
|
+
def sql_expr(self, _: SqlElementCache) -> sql.ColumnElement | None:
|
|
58
|
+
return None
|
|
59
|
+
|
|
60
|
+
def eval(self, data_row: DataRow, row_builder: RowBuilder) -> None:
|
|
61
|
+
from ..exec.expr_eval.evaluators import NestedRowList
|
|
62
|
+
|
|
63
|
+
dispatch_slot_idx = self.components[0].slot_idx
|
|
64
|
+
nested_rows = data_row.vals[dispatch_slot_idx]
|
|
65
|
+
if nested_rows is None:
|
|
66
|
+
data_row[self.slot_idx] = None
|
|
67
|
+
return
|
|
68
|
+
assert isinstance(nested_rows, NestedRowList)
|
|
69
|
+
# TODO: get the materialized slot idx, instead of relying on the fact that the target_expr is always at the end
|
|
70
|
+
data_row[self.slot_idx] = [row.vals[-1] for row in nested_rows.rows]
|
|
71
|
+
|
|
72
|
+
def _as_dict(self) -> dict:
|
|
73
|
+
"""
|
|
74
|
+
We only serialize src and target exprs, everything else is re-created at runtime.
|
|
75
|
+
"""
|
|
76
|
+
return {'components': [self._src_expr.as_dict(), self._target_expr.as_dict()]}
|
|
77
|
+
|
|
78
|
+
@classmethod
|
|
79
|
+
def _from_dict(cls, d: dict, components: list[Expr]) -> JsonMapper:
|
|
80
|
+
assert len(components) == 2
|
|
81
|
+
src_expr, target_expr = components[0], components[1]
|
|
82
|
+
return cls(src_expr, target_expr)
|
|
83
|
+
|
|
84
|
+
|
|
85
|
+
class JsonMapperDispatch(Expr):
|
|
86
|
+
"""
|
|
87
|
+
An operational Expr (ie, it doesn't represent any syntactic element) that is used by JsonMapper to materialize
|
|
88
|
+
its input DataRows. It has the same dependencies as the originating JsonMapper.
|
|
89
|
+
|
|
90
|
+
- The execution (= row dispatch) is handled by an expr_eval.Evaluator (JsonMapperDispatcher).
|
|
91
|
+
- It stores a NestedRowList instance in its slot.
|
|
92
|
+
"""
|
|
93
|
+
|
|
94
|
+
target_expr_scope: ExprScope
|
|
95
|
+
parent_mapper: JsonMapperDispatch | None
|
|
96
|
+
target_expr_eval_ctx: RowBuilder.EvalCtx | None
|
|
97
|
+
|
|
98
|
+
def __init__(self, src_expr: Expr, target_expr: Expr):
|
|
99
|
+
super().__init__(ts.InvalidType())
|
|
100
|
+
|
|
24
101
|
# we're creating a new scope, but we don't know yet whether this is nested within another JsonMapper;
|
|
25
102
|
# this gets resolved in bind_rel_paths(); for now we assume we're in the global scope
|
|
26
103
|
self.target_expr_scope = ExprScope(_GLOBAL_SCOPE)
|
|
27
104
|
|
|
28
105
|
from .object_ref import ObjectRef
|
|
29
|
-
|
|
30
|
-
self.components = [src_expr, target_expr
|
|
31
|
-
self.parent_mapper
|
|
32
|
-
self.target_expr_eval_ctx
|
|
106
|
+
|
|
107
|
+
self.components = [src_expr, target_expr]
|
|
108
|
+
self.parent_mapper = None
|
|
109
|
+
self.target_expr_eval_ctx = None
|
|
110
|
+
|
|
111
|
+
# Intentionally create the id now, before adding the scope anchor; this ensures that JsonMapperDispatch
|
|
112
|
+
# instances will be recognized as equal so long as they have the same src_expr and target_expr.
|
|
113
|
+
# TODO: Might this cause problems after certain substitutions?
|
|
33
114
|
self.id = self._create_id()
|
|
34
115
|
|
|
35
|
-
|
|
36
|
-
self.
|
|
37
|
-
|
|
116
|
+
scope_anchor = ObjectRef(self.target_expr_scope, self)
|
|
117
|
+
self.components.append(scope_anchor)
|
|
118
|
+
|
|
119
|
+
def _bind_rel_paths(self, mapper: JsonMapperDispatch | None = None) -> None:
|
|
120
|
+
self.src_expr._bind_rel_paths(mapper)
|
|
121
|
+
self.target_expr._bind_rel_paths(self)
|
|
38
122
|
self.parent_mapper = mapper
|
|
39
123
|
parent_scope = _GLOBAL_SCOPE if mapper is None else mapper.target_expr_scope
|
|
40
124
|
self.target_expr_scope.parent = parent_scope
|
|
41
125
|
|
|
126
|
+
def equals(self, other: Expr) -> bool:
|
|
127
|
+
"""
|
|
128
|
+
We override equals() because we need to avoid comparing our scope anchor.
|
|
129
|
+
"""
|
|
130
|
+
if type(self) is not type(other):
|
|
131
|
+
return False
|
|
132
|
+
return self.src_expr.equals(other.src_expr) and self.target_expr.equals(other.target_expr)
|
|
133
|
+
|
|
42
134
|
def scope(self) -> ExprScope:
|
|
43
135
|
# need to ignore target_expr
|
|
44
|
-
return self.
|
|
136
|
+
return self.src_expr.scope()
|
|
45
137
|
|
|
46
138
|
def dependencies(self) -> list[Expr]:
|
|
47
|
-
result = [self.
|
|
48
|
-
result.extend(self._target_dependencies(self.
|
|
139
|
+
result = [self.src_expr]
|
|
140
|
+
result.extend(self._target_dependencies(self.target_expr))
|
|
49
141
|
return result
|
|
50
142
|
|
|
51
143
|
def _target_dependencies(self, e: Expr) -> list[Expr]:
|
|
@@ -61,61 +153,35 @@ class JsonMapper(Expr):
|
|
|
61
153
|
result.extend(self._target_dependencies(c))
|
|
62
154
|
return result
|
|
63
155
|
|
|
64
|
-
def equals(self, other: Expr) -> bool:
|
|
65
|
-
"""
|
|
66
|
-
We override equals() because we need to avoid comparing our scope anchor.
|
|
67
|
-
"""
|
|
68
|
-
if type(self) != type(other):
|
|
69
|
-
return False
|
|
70
|
-
return self._src_expr.equals(other._src_expr) and self._target_expr.equals(other._target_expr)
|
|
71
|
-
|
|
72
|
-
def __repr__(self) -> str:
|
|
73
|
-
return f'{str(self._src_expr)} >> {str(self._target_expr)}'
|
|
74
|
-
|
|
75
156
|
@property
|
|
76
|
-
def
|
|
157
|
+
def src_expr(self) -> Expr:
|
|
77
158
|
return self.components[0]
|
|
78
159
|
|
|
79
160
|
@property
|
|
80
|
-
def
|
|
161
|
+
def target_expr(self) -> Expr:
|
|
81
162
|
return self.components[1]
|
|
82
163
|
|
|
83
164
|
@property
|
|
84
|
-
def scope_anchor(self) ->
|
|
85
|
-
|
|
165
|
+
def scope_anchor(self) -> 'ObjectRef':
|
|
166
|
+
from .object_ref import ObjectRef
|
|
86
167
|
|
|
87
|
-
|
|
88
|
-
|
|
168
|
+
result = self.components[2]
|
|
169
|
+
assert isinstance(result, ObjectRef)
|
|
170
|
+
return result
|
|
89
171
|
|
|
90
|
-
def
|
|
91
|
-
return
|
|
172
|
+
def __repr__(self) -> str:
|
|
173
|
+
return 'JsonMapperDispatch()'
|
|
92
174
|
|
|
93
175
|
def eval(self, data_row: DataRow, row_builder: RowBuilder) -> None:
|
|
94
|
-
#
|
|
95
|
-
|
|
96
|
-
if not isinstance(src, list):
|
|
97
|
-
# invalid/non-list src path
|
|
98
|
-
data_row[self.slot_idx] = None
|
|
99
|
-
return
|
|
100
|
-
|
|
101
|
-
result = [None] * len(src)
|
|
102
|
-
if self.target_expr_eval_ctx is None:
|
|
103
|
-
self.target_expr_eval_ctx = row_builder.create_eval_ctx([self._target_expr])
|
|
104
|
-
for i, val in enumerate(src):
|
|
105
|
-
data_row[self.scope_anchor.slot_idx] = val
|
|
106
|
-
# stored target_expr
|
|
107
|
-
row_builder.eval(data_row, self.target_expr_eval_ctx)
|
|
108
|
-
result[i] = data_row[self._target_expr.slot_idx]
|
|
109
|
-
data_row[self.slot_idx] = result
|
|
176
|
+
# eval is handled by JsonMapperDispatcher
|
|
177
|
+
raise AssertionError('this should never be called')
|
|
110
178
|
|
|
111
179
|
def _as_dict(self) -> dict:
|
|
112
180
|
"""
|
|
113
|
-
|
|
181
|
+
JsonMapperDispatch instances are only created by the JsonMapper c'tor and never need to be serialized.
|
|
114
182
|
"""
|
|
115
|
-
|
|
183
|
+
raise AssertionError('this should never be called')
|
|
116
184
|
|
|
117
185
|
@classmethod
|
|
118
|
-
def _from_dict(cls, d: dict, components: list[Expr]) ->
|
|
119
|
-
|
|
120
|
-
return cls(components[0], components[1])
|
|
121
|
-
|
|
186
|
+
def _from_dict(cls, d: dict, components: list[Expr]) -> JsonMapperDispatch:
|
|
187
|
+
raise AssertionError('this should never be called')
|