pixeltable 0.2.26__py3-none-any.whl → 0.5.7__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- pixeltable/__init__.py +83 -19
- pixeltable/_query.py +1444 -0
- pixeltable/_version.py +1 -0
- pixeltable/catalog/__init__.py +7 -4
- pixeltable/catalog/catalog.py +2394 -119
- pixeltable/catalog/column.py +225 -104
- pixeltable/catalog/dir.py +38 -9
- pixeltable/catalog/globals.py +53 -34
- pixeltable/catalog/insertable_table.py +265 -115
- pixeltable/catalog/path.py +80 -17
- pixeltable/catalog/schema_object.py +28 -43
- pixeltable/catalog/table.py +1270 -677
- pixeltable/catalog/table_metadata.py +103 -0
- pixeltable/catalog/table_version.py +1270 -751
- pixeltable/catalog/table_version_handle.py +109 -0
- pixeltable/catalog/table_version_path.py +137 -42
- pixeltable/catalog/tbl_ops.py +53 -0
- pixeltable/catalog/update_status.py +191 -0
- pixeltable/catalog/view.py +251 -134
- pixeltable/config.py +215 -0
- pixeltable/env.py +736 -285
- pixeltable/exceptions.py +26 -2
- pixeltable/exec/__init__.py +7 -2
- pixeltable/exec/aggregation_node.py +39 -21
- pixeltable/exec/cache_prefetch_node.py +87 -109
- pixeltable/exec/cell_materialization_node.py +268 -0
- pixeltable/exec/cell_reconstruction_node.py +168 -0
- pixeltable/exec/component_iteration_node.py +25 -28
- pixeltable/exec/data_row_batch.py +11 -46
- pixeltable/exec/exec_context.py +26 -11
- pixeltable/exec/exec_node.py +35 -27
- pixeltable/exec/expr_eval/__init__.py +3 -0
- pixeltable/exec/expr_eval/evaluators.py +365 -0
- pixeltable/exec/expr_eval/expr_eval_node.py +413 -0
- pixeltable/exec/expr_eval/globals.py +200 -0
- pixeltable/exec/expr_eval/row_buffer.py +74 -0
- pixeltable/exec/expr_eval/schedulers.py +413 -0
- pixeltable/exec/globals.py +35 -0
- pixeltable/exec/in_memory_data_node.py +35 -27
- pixeltable/exec/object_store_save_node.py +293 -0
- pixeltable/exec/row_update_node.py +44 -29
- pixeltable/exec/sql_node.py +414 -115
- pixeltable/exprs/__init__.py +8 -5
- pixeltable/exprs/arithmetic_expr.py +79 -45
- pixeltable/exprs/array_slice.py +5 -5
- pixeltable/exprs/column_property_ref.py +40 -26
- pixeltable/exprs/column_ref.py +254 -61
- pixeltable/exprs/comparison.py +14 -9
- pixeltable/exprs/compound_predicate.py +9 -10
- pixeltable/exprs/data_row.py +213 -72
- pixeltable/exprs/expr.py +270 -104
- pixeltable/exprs/expr_dict.py +6 -5
- pixeltable/exprs/expr_set.py +20 -11
- pixeltable/exprs/function_call.py +383 -284
- pixeltable/exprs/globals.py +18 -5
- pixeltable/exprs/in_predicate.py +7 -7
- pixeltable/exprs/inline_expr.py +37 -37
- pixeltable/exprs/is_null.py +8 -4
- pixeltable/exprs/json_mapper.py +120 -54
- pixeltable/exprs/json_path.py +90 -60
- pixeltable/exprs/literal.py +61 -16
- pixeltable/exprs/method_ref.py +7 -6
- pixeltable/exprs/object_ref.py +19 -8
- pixeltable/exprs/row_builder.py +238 -75
- pixeltable/exprs/rowid_ref.py +53 -15
- pixeltable/exprs/similarity_expr.py +65 -50
- pixeltable/exprs/sql_element_cache.py +5 -5
- pixeltable/exprs/string_op.py +107 -0
- pixeltable/exprs/type_cast.py +25 -13
- pixeltable/exprs/variable.py +2 -2
- pixeltable/func/__init__.py +9 -5
- pixeltable/func/aggregate_function.py +197 -92
- pixeltable/func/callable_function.py +119 -35
- pixeltable/func/expr_template_function.py +101 -48
- pixeltable/func/function.py +375 -62
- pixeltable/func/function_registry.py +20 -19
- pixeltable/func/globals.py +6 -5
- pixeltable/func/mcp.py +74 -0
- pixeltable/func/query_template_function.py +151 -35
- pixeltable/func/signature.py +178 -49
- pixeltable/func/tools.py +164 -0
- pixeltable/func/udf.py +176 -53
- pixeltable/functions/__init__.py +44 -4
- pixeltable/functions/anthropic.py +226 -47
- pixeltable/functions/audio.py +148 -11
- pixeltable/functions/bedrock.py +137 -0
- pixeltable/functions/date.py +188 -0
- pixeltable/functions/deepseek.py +113 -0
- pixeltable/functions/document.py +81 -0
- pixeltable/functions/fal.py +76 -0
- pixeltable/functions/fireworks.py +72 -20
- pixeltable/functions/gemini.py +249 -0
- pixeltable/functions/globals.py +208 -53
- pixeltable/functions/groq.py +108 -0
- pixeltable/functions/huggingface.py +1088 -95
- pixeltable/functions/image.py +155 -84
- pixeltable/functions/json.py +8 -11
- pixeltable/functions/llama_cpp.py +31 -19
- pixeltable/functions/math.py +169 -0
- pixeltable/functions/mistralai.py +50 -75
- pixeltable/functions/net.py +70 -0
- pixeltable/functions/ollama.py +29 -36
- pixeltable/functions/openai.py +548 -160
- pixeltable/functions/openrouter.py +143 -0
- pixeltable/functions/replicate.py +15 -14
- pixeltable/functions/reve.py +250 -0
- pixeltable/functions/string.py +310 -85
- pixeltable/functions/timestamp.py +37 -19
- pixeltable/functions/together.py +77 -120
- pixeltable/functions/twelvelabs.py +188 -0
- pixeltable/functions/util.py +7 -2
- pixeltable/functions/uuid.py +30 -0
- pixeltable/functions/video.py +1528 -117
- pixeltable/functions/vision.py +26 -26
- pixeltable/functions/voyageai.py +289 -0
- pixeltable/functions/whisper.py +19 -10
- pixeltable/functions/whisperx.py +179 -0
- pixeltable/functions/yolox.py +112 -0
- pixeltable/globals.py +716 -236
- pixeltable/index/__init__.py +3 -1
- pixeltable/index/base.py +17 -21
- pixeltable/index/btree.py +32 -22
- pixeltable/index/embedding_index.py +155 -92
- pixeltable/io/__init__.py +12 -7
- pixeltable/io/datarows.py +140 -0
- pixeltable/io/external_store.py +83 -125
- pixeltable/io/fiftyone.py +24 -33
- pixeltable/io/globals.py +47 -182
- pixeltable/io/hf_datasets.py +96 -127
- pixeltable/io/label_studio.py +171 -156
- pixeltable/io/lancedb.py +3 -0
- pixeltable/io/pandas.py +136 -115
- pixeltable/io/parquet.py +40 -153
- pixeltable/io/table_data_conduit.py +702 -0
- pixeltable/io/utils.py +100 -0
- pixeltable/iterators/__init__.py +8 -4
- pixeltable/iterators/audio.py +207 -0
- pixeltable/iterators/base.py +9 -3
- pixeltable/iterators/document.py +144 -87
- pixeltable/iterators/image.py +17 -38
- pixeltable/iterators/string.py +15 -12
- pixeltable/iterators/video.py +523 -127
- pixeltable/metadata/__init__.py +33 -8
- pixeltable/metadata/converters/convert_10.py +2 -3
- pixeltable/metadata/converters/convert_13.py +2 -2
- pixeltable/metadata/converters/convert_15.py +15 -11
- pixeltable/metadata/converters/convert_16.py +4 -5
- pixeltable/metadata/converters/convert_17.py +4 -5
- pixeltable/metadata/converters/convert_18.py +4 -6
- pixeltable/metadata/converters/convert_19.py +6 -9
- pixeltable/metadata/converters/convert_20.py +3 -6
- pixeltable/metadata/converters/convert_21.py +6 -8
- pixeltable/metadata/converters/convert_22.py +3 -2
- pixeltable/metadata/converters/convert_23.py +33 -0
- pixeltable/metadata/converters/convert_24.py +55 -0
- pixeltable/metadata/converters/convert_25.py +19 -0
- pixeltable/metadata/converters/convert_26.py +23 -0
- pixeltable/metadata/converters/convert_27.py +29 -0
- pixeltable/metadata/converters/convert_28.py +13 -0
- pixeltable/metadata/converters/convert_29.py +110 -0
- pixeltable/metadata/converters/convert_30.py +63 -0
- pixeltable/metadata/converters/convert_31.py +11 -0
- pixeltable/metadata/converters/convert_32.py +15 -0
- pixeltable/metadata/converters/convert_33.py +17 -0
- pixeltable/metadata/converters/convert_34.py +21 -0
- pixeltable/metadata/converters/convert_35.py +9 -0
- pixeltable/metadata/converters/convert_36.py +38 -0
- pixeltable/metadata/converters/convert_37.py +15 -0
- pixeltable/metadata/converters/convert_38.py +39 -0
- pixeltable/metadata/converters/convert_39.py +124 -0
- pixeltable/metadata/converters/convert_40.py +73 -0
- pixeltable/metadata/converters/convert_41.py +12 -0
- pixeltable/metadata/converters/convert_42.py +9 -0
- pixeltable/metadata/converters/convert_43.py +44 -0
- pixeltable/metadata/converters/util.py +44 -18
- pixeltable/metadata/notes.py +21 -0
- pixeltable/metadata/schema.py +185 -42
- pixeltable/metadata/utils.py +74 -0
- pixeltable/mypy/__init__.py +3 -0
- pixeltable/mypy/mypy_plugin.py +123 -0
- pixeltable/plan.py +616 -225
- pixeltable/share/__init__.py +3 -0
- pixeltable/share/packager.py +797 -0
- pixeltable/share/protocol/__init__.py +33 -0
- pixeltable/share/protocol/common.py +165 -0
- pixeltable/share/protocol/operation_types.py +33 -0
- pixeltable/share/protocol/replica.py +119 -0
- pixeltable/share/publish.py +349 -0
- pixeltable/store.py +398 -232
- pixeltable/type_system.py +730 -267
- pixeltable/utils/__init__.py +40 -0
- pixeltable/utils/arrow.py +201 -29
- pixeltable/utils/av.py +298 -0
- pixeltable/utils/azure_store.py +346 -0
- pixeltable/utils/coco.py +26 -27
- pixeltable/utils/code.py +4 -4
- pixeltable/utils/console_output.py +46 -0
- pixeltable/utils/coroutine.py +24 -0
- pixeltable/utils/dbms.py +92 -0
- pixeltable/utils/description_helper.py +11 -12
- pixeltable/utils/documents.py +60 -61
- pixeltable/utils/exception_handler.py +36 -0
- pixeltable/utils/filecache.py +38 -22
- pixeltable/utils/formatter.py +88 -51
- pixeltable/utils/gcs_store.py +295 -0
- pixeltable/utils/http.py +133 -0
- pixeltable/utils/http_server.py +14 -13
- pixeltable/utils/iceberg.py +13 -0
- pixeltable/utils/image.py +17 -0
- pixeltable/utils/lancedb.py +90 -0
- pixeltable/utils/local_store.py +322 -0
- pixeltable/utils/misc.py +5 -0
- pixeltable/utils/object_stores.py +573 -0
- pixeltable/utils/pydantic.py +60 -0
- pixeltable/utils/pytorch.py +20 -20
- pixeltable/utils/s3_store.py +527 -0
- pixeltable/utils/sql.py +32 -5
- pixeltable/utils/system.py +30 -0
- pixeltable/utils/transactional_directory.py +4 -3
- pixeltable-0.5.7.dist-info/METADATA +579 -0
- pixeltable-0.5.7.dist-info/RECORD +227 -0
- {pixeltable-0.2.26.dist-info → pixeltable-0.5.7.dist-info}/WHEEL +1 -1
- pixeltable-0.5.7.dist-info/entry_points.txt +2 -0
- pixeltable/__version__.py +0 -3
- pixeltable/catalog/named_function.py +0 -36
- pixeltable/catalog/path_dict.py +0 -141
- pixeltable/dataframe.py +0 -894
- pixeltable/exec/expr_eval_node.py +0 -232
- pixeltable/ext/__init__.py +0 -14
- pixeltable/ext/functions/__init__.py +0 -8
- pixeltable/ext/functions/whisperx.py +0 -77
- pixeltable/ext/functions/yolox.py +0 -157
- pixeltable/tool/create_test_db_dump.py +0 -311
- pixeltable/tool/create_test_video.py +0 -81
- pixeltable/tool/doc_plugins/griffe.py +0 -50
- pixeltable/tool/doc_plugins/mkdocstrings.py +0 -6
- pixeltable/tool/doc_plugins/templates/material/udf.html.jinja +0 -135
- pixeltable/tool/embed_udf.py +0 -9
- pixeltable/tool/mypy_plugin.py +0 -55
- pixeltable/utils/media_store.py +0 -76
- pixeltable/utils/s3.py +0 -16
- pixeltable-0.2.26.dist-info/METADATA +0 -400
- pixeltable-0.2.26.dist-info/RECORD +0 -156
- pixeltable-0.2.26.dist-info/entry_points.txt +0 -3
- {pixeltable-0.2.26.dist-info → pixeltable-0.5.7.dist-info/licenses}/LICENSE +0 -0
pixeltable/exprs/json_path.py
CHANGED
|
@@ -1,59 +1,75 @@
|
|
|
1
1
|
from __future__ import annotations
|
|
2
2
|
|
|
3
|
-
|
|
3
|
+
import io
|
|
4
|
+
from pathlib import Path
|
|
5
|
+
from typing import Any
|
|
4
6
|
|
|
5
7
|
import jmespath
|
|
6
8
|
import sqlalchemy as sql
|
|
7
9
|
|
|
8
|
-
|
|
9
|
-
import pixeltable.catalog as catalog
|
|
10
|
-
import pixeltable.exceptions as excs
|
|
11
|
-
import pixeltable.type_system as ts
|
|
10
|
+
from pixeltable import catalog, exceptions as excs, type_system as ts
|
|
12
11
|
|
|
12
|
+
from .column_ref import ColumnRef
|
|
13
13
|
from .data_row import DataRow
|
|
14
14
|
from .expr import Expr
|
|
15
15
|
from .globals import print_slice
|
|
16
|
-
from .json_mapper import
|
|
16
|
+
from .json_mapper import JsonMapperDispatch
|
|
17
|
+
from .object_ref import ObjectRef
|
|
17
18
|
from .row_builder import RowBuilder
|
|
18
19
|
from .sql_element_cache import SqlElementCache
|
|
19
20
|
|
|
20
21
|
|
|
21
22
|
class JsonPath(Expr):
|
|
23
|
+
"""
|
|
24
|
+
anchor can be None, in which case this is a relative JsonPath and the anchor is set later via set_anchor().
|
|
25
|
+
scope_idx: for relative paths, index of referenced JsonMapper
|
|
26
|
+
(0: indicates the immediately preceding JsonMapper, -1: the parent of the immediately preceding mapper, ...)
|
|
27
|
+
"""
|
|
28
|
+
|
|
29
|
+
path_elements: list[str | int | slice]
|
|
30
|
+
compiled_path: jmespath.parser.ParsedResult | None
|
|
31
|
+
scope_idx: int
|
|
32
|
+
file_handles: dict[Path, io.BufferedReader] # key: file path
|
|
33
|
+
|
|
22
34
|
def __init__(
|
|
23
|
-
self,
|
|
24
|
-
anchor: Optional['pxt.exprs.Expr'],
|
|
25
|
-
path_elements: Optional[list[Union[str, int, slice]]] = None,
|
|
26
|
-
scope_idx: int = 0
|
|
35
|
+
self, anchor: Expr | None, path_elements: list[str | int | slice] | None = None, scope_idx: int = 0
|
|
27
36
|
) -> None:
|
|
28
|
-
"""
|
|
29
|
-
anchor can be None, in which case this is a relative JsonPath and the anchor is set later via set_anchor().
|
|
30
|
-
scope_idx: for relative paths, index of referenced JsonMapper
|
|
31
|
-
(0: indicates the immediately preceding JsonMapper, -1: the parent of the immediately preceding mapper, ...)
|
|
32
|
-
"""
|
|
33
37
|
if path_elements is None:
|
|
34
38
|
path_elements = []
|
|
35
39
|
super().__init__(ts.JsonType(nullable=True)) # JsonPath expressions are always nullable
|
|
36
40
|
if anchor is not None:
|
|
37
41
|
self.components = [anchor]
|
|
38
|
-
self.path_elements
|
|
42
|
+
self.path_elements = path_elements
|
|
39
43
|
self.compiled_path = jmespath.compile(self._json_path()) if len(path_elements) > 0 else None
|
|
40
44
|
self.scope_idx = scope_idx
|
|
41
45
|
# NOTE: the _create_id() result will change if set_anchor() gets called;
|
|
42
46
|
# this is not a problem, because _create_id() shouldn't be called after init()
|
|
43
47
|
self.id = self._create_id()
|
|
48
|
+
self.file_handles = {}
|
|
49
|
+
|
|
50
|
+
def release(self) -> None:
|
|
51
|
+
for fh in self.file_handles.values():
|
|
52
|
+
fh.close()
|
|
53
|
+
self.file_handles.clear()
|
|
44
54
|
|
|
45
55
|
def __repr__(self) -> str:
|
|
46
|
-
# else
|
|
47
|
-
|
|
48
|
-
|
|
56
|
+
# else 'R': the anchor is RELATIVE_PATH_ROOT
|
|
57
|
+
anchor_str = str(self.anchor) if self.anchor is not None else 'R'
|
|
58
|
+
if len(self.path_elements) == 0:
|
|
59
|
+
return anchor_str
|
|
60
|
+
return f'{anchor_str}{"." if isinstance(self.path_elements[0], str) else ""}{self._json_path()}'
|
|
49
61
|
|
|
50
62
|
def _as_dict(self) -> dict:
|
|
51
|
-
|
|
52
|
-
|
|
53
|
-
|
|
54
|
-
|
|
55
|
-
|
|
56
|
-
|
|
63
|
+
assert len(self.components) <= 1
|
|
64
|
+
components_dict: dict[str, Any]
|
|
65
|
+
if len(self.components) == 0 or isinstance(self.components[0], ObjectRef):
|
|
66
|
+
# If the anchor is an ObjectRef, it means this JsonPath is a bound relative path. We store it as a relative
|
|
67
|
+
# path, *not* a bound path (which has no meaning in the dict).
|
|
68
|
+
components_dict = {}
|
|
69
|
+
else:
|
|
70
|
+
components_dict = super()._as_dict()
|
|
71
|
+
path_elements = [[el.start, el.stop, el.step] if isinstance(el, slice) else el for el in self.path_elements]
|
|
72
|
+
return {'path_elements': path_elements, 'scope_idx': self.scope_idx, **components_dict}
|
|
57
73
|
|
|
58
74
|
@classmethod
|
|
59
75
|
def _from_dict(cls, d: dict, components: list[Expr]) -> JsonPath:
|
|
@@ -61,15 +77,11 @@ class JsonPath(Expr):
|
|
|
61
77
|
assert 'scope_idx' in d
|
|
62
78
|
assert len(components) <= 1
|
|
63
79
|
anchor = components[0] if len(components) == 1 else None
|
|
64
|
-
path_elements = [
|
|
65
|
-
slice(el[0], el[1], el[2]) if isinstance(el, list)
|
|
66
|
-
else el
|
|
67
|
-
for el in d['path_elements']
|
|
68
|
-
]
|
|
80
|
+
path_elements = [slice(el[0], el[1], el[2]) if isinstance(el, list) else el for el in d['path_elements']]
|
|
69
81
|
return cls(anchor, path_elements, d['scope_idx'])
|
|
70
82
|
|
|
71
83
|
@property
|
|
72
|
-
def
|
|
84
|
+
def anchor(self) -> Expr | None:
|
|
73
85
|
return None if len(self.components) == 0 else self.components[0]
|
|
74
86
|
|
|
75
87
|
def set_anchor(self, anchor: Expr) -> None:
|
|
@@ -77,44 +89,42 @@ class JsonPath(Expr):
|
|
|
77
89
|
self.components = [anchor]
|
|
78
90
|
|
|
79
91
|
def is_relative_path(self) -> bool:
|
|
80
|
-
return self.
|
|
92
|
+
return self.anchor is None
|
|
81
93
|
|
|
82
|
-
def
|
|
83
|
-
|
|
84
|
-
|
|
85
|
-
|
|
86
|
-
self.
|
|
94
|
+
def _has_relative_path(self) -> bool:
|
|
95
|
+
return self.is_relative_path() or super()._has_relative_path()
|
|
96
|
+
|
|
97
|
+
def _bind_rel_paths(self, mapper: 'JsonMapperDispatch' | None = None) -> None:
|
|
98
|
+
if self.is_relative_path():
|
|
99
|
+
# TODO: take scope_idx into account
|
|
100
|
+
self.set_anchor(mapper.scope_anchor)
|
|
101
|
+
else:
|
|
102
|
+
self.anchor._bind_rel_paths(mapper)
|
|
87
103
|
|
|
88
104
|
def __call__(self, *args: object, **kwargs: object) -> 'JsonPath':
|
|
89
105
|
"""
|
|
90
106
|
Construct a relative path that references an ancestor of the immediately enclosing JsonMapper.
|
|
91
107
|
"""
|
|
92
108
|
if not self.is_relative_path():
|
|
93
|
-
raise excs.Error(
|
|
109
|
+
raise excs.Error('() for an absolute path is invalid')
|
|
94
110
|
if len(args) != 1 or not isinstance(args[0], int) or args[0] >= 0:
|
|
95
|
-
raise excs.Error(
|
|
111
|
+
raise excs.Error('R() requires a negative index')
|
|
96
112
|
return JsonPath(None, [], args[0])
|
|
97
113
|
|
|
98
114
|
def __getattr__(self, name: str) -> 'JsonPath':
|
|
99
115
|
assert isinstance(name, str)
|
|
100
|
-
return JsonPath(self.
|
|
116
|
+
return JsonPath(self.anchor, [*self.path_elements, name])
|
|
101
117
|
|
|
102
118
|
def __getitem__(self, index: object) -> 'JsonPath':
|
|
103
119
|
if isinstance(index, (int, slice, str)):
|
|
104
|
-
return JsonPath(self.
|
|
120
|
+
return JsonPath(self.anchor, [*self.path_elements, index])
|
|
105
121
|
raise excs.Error(f'Invalid json list index: {index}')
|
|
106
122
|
|
|
107
|
-
def
|
|
108
|
-
|
|
109
|
-
if rhs_expr is None:
|
|
110
|
-
raise excs.Error(f'>> requires an expression on the right-hand side, found {type(other)}')
|
|
111
|
-
return JsonMapper(self, rhs_expr)
|
|
112
|
-
|
|
113
|
-
def default_column_name(self) -> Optional[str]:
|
|
114
|
-
anchor_name = self._anchor.default_column_name() if self._anchor is not None else ''
|
|
123
|
+
def default_column_name(self) -> str | None:
|
|
124
|
+
anchor_name = self.anchor.default_column_name() if self.anchor is not None else ''
|
|
115
125
|
ret_name = f'{anchor_name}.{self._json_path()}'
|
|
116
126
|
|
|
117
|
-
def cleanup_char(s
|
|
127
|
+
def cleanup_char(s: str) -> str:
|
|
118
128
|
if s == '.':
|
|
119
129
|
return '_'
|
|
120
130
|
elif s == '*':
|
|
@@ -125,8 +135,8 @@ class JsonPath(Expr):
|
|
|
125
135
|
return ''
|
|
126
136
|
|
|
127
137
|
clean_name = ''.join(map(cleanup_char, ret_name))
|
|
128
|
-
clean_name = clean_name.lstrip('_')
|
|
129
|
-
if clean_name
|
|
138
|
+
clean_name = clean_name.lstrip('_') # remove leading underscore
|
|
139
|
+
if not clean_name: # Replace '' with None
|
|
130
140
|
clean_name = None
|
|
131
141
|
|
|
132
142
|
assert clean_name is None or catalog.is_valid_identifier(clean_name)
|
|
@@ -136,17 +146,17 @@ class JsonPath(Expr):
|
|
|
136
146
|
return self.path_elements == other.path_elements
|
|
137
147
|
|
|
138
148
|
def _id_attrs(self) -> list[tuple[str, Any]]:
|
|
139
|
-
return super()._id_attrs()
|
|
149
|
+
return [*super()._id_attrs(), ('path_elements', self.path_elements)]
|
|
140
150
|
|
|
141
|
-
def sql_expr(self, _: SqlElementCache) ->
|
|
151
|
+
def sql_expr(self, _: SqlElementCache) -> sql.ColumnElement | None:
|
|
142
152
|
"""
|
|
143
153
|
Postgres appears to have a bug: jsonb_path_query('{a: [{b: 0}, {b: 1}]}', '$.a.b') returns
|
|
144
154
|
*two* rows (each containing col val 0), not a single row with [0, 0].
|
|
145
155
|
We need to use a workaround: retrieve the entire dict, then use jmespath to extract the path correctly.
|
|
146
156
|
"""
|
|
147
|
-
#path_str = '$.' + '.'.join(self.path_elements)
|
|
148
|
-
#assert isinstance(self._anchor(), ColumnRef)
|
|
149
|
-
#return sql.func.jsonb_path_query(self._anchor().col.sa_col, path_str)
|
|
157
|
+
# path_str = '$.' + '.'.join(self.path_elements)
|
|
158
|
+
# assert isinstance(self._anchor(), ColumnRef)
|
|
159
|
+
# return sql.func.jsonb_path_query(self._anchor().col.sa_col, path_str)
|
|
150
160
|
return None
|
|
151
161
|
|
|
152
162
|
def _json_path(self) -> str:
|
|
@@ -163,11 +173,31 @@ class JsonPath(Expr):
|
|
|
163
173
|
result.append(f'[{print_slice(element)}]')
|
|
164
174
|
return ''.join(result)
|
|
165
175
|
|
|
166
|
-
def eval(self,
|
|
167
|
-
|
|
176
|
+
def eval(self, row: DataRow, row_builder: RowBuilder) -> None:
|
|
177
|
+
assert self.anchor is not None, self
|
|
178
|
+
val = row[self.anchor.slot_idx]
|
|
168
179
|
if self.compiled_path is not None:
|
|
169
180
|
val = self.compiled_path.search(val)
|
|
170
|
-
|
|
181
|
+
row[self.slot_idx] = val
|
|
182
|
+
if val is None or self.anchor is None or not isinstance(self.anchor, ColumnRef):
|
|
183
|
+
return
|
|
184
|
+
|
|
185
|
+
# the origin of val is a json-typed column, which might stored inlined objects
|
|
186
|
+
if self.anchor.slot_idx not in row.slot_md:
|
|
187
|
+
# we can infer that there aren't any inlined objects because our execution plan doesn't include
|
|
188
|
+
# materializing the cellmd (eg, insert plans)
|
|
189
|
+
# TODO: have the planner pass that fact into ExprEvalNode explicitly to streamline this path a bit more
|
|
190
|
+
return
|
|
191
|
+
|
|
192
|
+
# defer import until it's needed
|
|
193
|
+
from pixeltable.exec.cell_reconstruction_node import json_has_inlined_objs, reconstruct_json
|
|
194
|
+
|
|
195
|
+
cell_md = row.slot_md[self.anchor.slot_idx]
|
|
196
|
+
if cell_md is None or cell_md.file_urls is None or not json_has_inlined_objs(val):
|
|
197
|
+
# val doesn't contain inlined objects
|
|
198
|
+
return
|
|
199
|
+
|
|
200
|
+
row.vals[self.slot_idx] = reconstruct_json(val, cell_md.file_urls, self.file_handles)
|
|
171
201
|
|
|
172
202
|
|
|
173
203
|
RELATIVE_PATH_ROOT = JsonPath(None)
|
pixeltable/exprs/literal.py
CHANGED
|
@@ -1,8 +1,11 @@
|
|
|
1
1
|
from __future__ import annotations
|
|
2
2
|
|
|
3
|
+
import base64
|
|
3
4
|
import datetime
|
|
4
|
-
|
|
5
|
+
import uuid
|
|
6
|
+
from typing import Any
|
|
5
7
|
|
|
8
|
+
import numpy as np
|
|
6
9
|
import sqlalchemy as sql
|
|
7
10
|
|
|
8
11
|
import pixeltable.type_system as ts
|
|
@@ -15,7 +18,9 @@ from .sql_element_cache import SqlElementCache
|
|
|
15
18
|
|
|
16
19
|
|
|
17
20
|
class Literal(Expr):
|
|
18
|
-
|
|
21
|
+
val: Any
|
|
22
|
+
|
|
23
|
+
def __init__(self, val: Any, col_type: ts.ColumnType | None = None):
|
|
19
24
|
if col_type is not None:
|
|
20
25
|
val = col_type.create_literal(val)
|
|
21
26
|
else:
|
|
@@ -33,10 +38,13 @@ class Literal(Expr):
|
|
|
33
38
|
val = val.replace(tzinfo=default_tz)
|
|
34
39
|
# Now convert to UTC
|
|
35
40
|
val = val.astimezone(datetime.timezone.utc)
|
|
41
|
+
if isinstance(val, tuple):
|
|
42
|
+
# Tuples are stored as a list
|
|
43
|
+
val = list(val)
|
|
36
44
|
self.val = val
|
|
37
45
|
self.id = self._create_id()
|
|
38
46
|
|
|
39
|
-
def default_column_name(self) ->
|
|
47
|
+
def default_column_name(self) -> str | None:
|
|
40
48
|
return 'Literal'
|
|
41
49
|
|
|
42
50
|
def __str__(self) -> str:
|
|
@@ -46,6 +54,15 @@ class Literal(Expr):
|
|
|
46
54
|
assert isinstance(self.val, datetime.datetime)
|
|
47
55
|
default_tz = Env.get().default_time_zone
|
|
48
56
|
return f"'{self.val.astimezone(default_tz).isoformat()}'"
|
|
57
|
+
if self.col_type.is_date_type():
|
|
58
|
+
assert isinstance(self.val, datetime.date)
|
|
59
|
+
return f"'{self.val.isoformat()}'"
|
|
60
|
+
if self.col_type.is_uuid_type():
|
|
61
|
+
assert isinstance(self.val, uuid.UUID)
|
|
62
|
+
return f"'{self.val}'"
|
|
63
|
+
if self.col_type.is_array_type():
|
|
64
|
+
assert isinstance(self.val, np.ndarray)
|
|
65
|
+
return str(self.val.tolist())
|
|
49
66
|
return str(self.val)
|
|
50
67
|
|
|
51
68
|
def __repr__(self) -> str:
|
|
@@ -55,19 +72,18 @@ class Literal(Expr):
|
|
|
55
72
|
return self.val == other.val
|
|
56
73
|
|
|
57
74
|
def _id_attrs(self) -> list[tuple[str, Any]]:
|
|
58
|
-
return super()._id_attrs()
|
|
75
|
+
return [*super()._id_attrs(), ('val', self.val)]
|
|
59
76
|
|
|
60
|
-
def sql_expr(self, _: SqlElementCache) ->
|
|
61
|
-
#
|
|
62
|
-
|
|
63
|
-
return sql.sql.expression.literal(self.val)
|
|
77
|
+
def sql_expr(self, _: SqlElementCache) -> sql.ColumnElement | None:
|
|
78
|
+
# Return a sql object so that constants can participate in SQL expressions
|
|
79
|
+
return sql.sql.expression.literal(self.val, type_=self.col_type.to_sa_type())
|
|
64
80
|
|
|
65
81
|
def eval(self, data_row: DataRow, row_builder: RowBuilder) -> None:
|
|
66
82
|
# this will be called, even though sql_expr() does not return None
|
|
67
83
|
data_row[self.slot_idx] = self.val
|
|
68
84
|
|
|
69
85
|
def _as_dict(self) -> dict:
|
|
70
|
-
# For some types, we need to
|
|
86
|
+
# For some types, we need to explicitly record their type, because JSON does not know
|
|
71
87
|
# how to interpret them unambiguously
|
|
72
88
|
if self.col_type.is_timestamp_type():
|
|
73
89
|
assert isinstance(self.val, datetime.datetime)
|
|
@@ -76,18 +92,47 @@ class Literal(Expr):
|
|
|
76
92
|
# stored as UTC in the database)
|
|
77
93
|
encoded_val = self.val.isoformat()
|
|
78
94
|
return {'val': encoded_val, 'val_t': self.col_type._type.name, **super()._as_dict()}
|
|
95
|
+
elif self.col_type.is_date_type():
|
|
96
|
+
assert isinstance(self.val, datetime.date)
|
|
97
|
+
encoded_val = self.val.isoformat()
|
|
98
|
+
return {'val': encoded_val, 'val_t': self.col_type._type.name, **super()._as_dict()}
|
|
99
|
+
elif self.col_type.is_uuid_type():
|
|
100
|
+
assert isinstance(self.val, uuid.UUID)
|
|
101
|
+
encoded_val = str(self.val)
|
|
102
|
+
return {'val': encoded_val, 'val_t': self.col_type._type.name, **super()._as_dict()}
|
|
103
|
+
elif self.col_type.is_binary_type():
|
|
104
|
+
assert isinstance(self.val, bytes)
|
|
105
|
+
encoded_val = base64.b64encode(self.val).decode('utf-8')
|
|
106
|
+
return {'val': encoded_val, 'val_t': self.col_type._type.name, **super()._as_dict()}
|
|
107
|
+
elif self.col_type.is_array_type():
|
|
108
|
+
assert isinstance(self.val, np.ndarray)
|
|
109
|
+
return {'val': self.val.tolist(), 'val_t': self.col_type._type.name, **super()._as_dict()}
|
|
79
110
|
else:
|
|
80
111
|
return {'val': self.val, **super()._as_dict()}
|
|
81
112
|
|
|
113
|
+
def as_literal(self) -> Literal | None:
|
|
114
|
+
return self
|
|
115
|
+
|
|
82
116
|
@classmethod
|
|
83
117
|
def _from_dict(cls, d: dict, components: list[Expr]) -> Literal:
|
|
84
118
|
assert 'val' in d
|
|
85
119
|
if 'val_t' in d:
|
|
86
120
|
val_t = d['val_t']
|
|
87
|
-
|
|
88
|
-
|
|
89
|
-
|
|
90
|
-
|
|
91
|
-
|
|
92
|
-
|
|
93
|
-
|
|
121
|
+
if val_t == ts.ColumnType.Type.DATE.name:
|
|
122
|
+
dt = datetime.date.fromisoformat(d['val'])
|
|
123
|
+
return cls(dt)
|
|
124
|
+
elif val_t == ts.ColumnType.Type.TIMESTAMP.name:
|
|
125
|
+
dt = datetime.datetime.fromisoformat(d['val'])
|
|
126
|
+
assert dt.tzinfo == datetime.timezone.utc # Must be UTC in the database
|
|
127
|
+
return cls(dt)
|
|
128
|
+
elif val_t == ts.ColumnType.Type.UUID.name:
|
|
129
|
+
uuid_val = uuid.UUID(d['val'])
|
|
130
|
+
return cls(uuid_val)
|
|
131
|
+
elif val_t == ts.ColumnType.Type.BINARY.name:
|
|
132
|
+
assert isinstance(d['val'], str)
|
|
133
|
+
bytes_val = base64.b64decode(d['val'].encode('utf-8'))
|
|
134
|
+
return cls(bytes_val)
|
|
135
|
+
elif val_t == ts.ColumnType.Type.ARRAY.name:
|
|
136
|
+
arrays = np.array(d['val'])
|
|
137
|
+
return cls(arrays)
|
|
138
|
+
return cls(d['val'])
|
pixeltable/exprs/method_ref.py
CHANGED
|
@@ -1,4 +1,4 @@
|
|
|
1
|
-
from typing import Any
|
|
1
|
+
from typing import Any
|
|
2
2
|
|
|
3
3
|
import sqlalchemy as sql
|
|
4
4
|
|
|
@@ -19,10 +19,11 @@ class MethodRef(Expr):
|
|
|
19
19
|
When a `MethodRef` is called, it returns a `FunctionCall` with the base expression as the first argument.
|
|
20
20
|
The effective arity of a `MethodRef` is one less than the arity of the underlying `Function`.
|
|
21
21
|
"""
|
|
22
|
+
|
|
22
23
|
# TODO: Should this even be an `Expr`? It can't actually be evaluated directly (it has to be first
|
|
23
24
|
# converted to a `FunctionCall` by binding any remaining parameters).
|
|
24
25
|
|
|
25
|
-
def __init__(self, base_expr: Expr, method_name: str):
|
|
26
|
+
def __init__(self, base_expr: Expr, method_name: str) -> None:
|
|
26
27
|
super().__init__(ts.InvalidType()) # The `MethodRef` is untyped until it is called.
|
|
27
28
|
self.base_expr = base_expr
|
|
28
29
|
self.method_name = method_name
|
|
@@ -42,7 +43,7 @@ class MethodRef(Expr):
|
|
|
42
43
|
assert len(components) == 1
|
|
43
44
|
return cls(components[0], d['method_name'])
|
|
44
45
|
|
|
45
|
-
def __call__(self, *args, **kwargs) -> FunctionCall:
|
|
46
|
+
def __call__(self, *args: Any, **kwargs: Any) -> FunctionCall:
|
|
46
47
|
result = self.fn(*[self.base_expr, *args], **kwargs)
|
|
47
48
|
assert isinstance(result, FunctionCall)
|
|
48
49
|
result.is_method_call = True
|
|
@@ -52,13 +53,13 @@ class MethodRef(Expr):
|
|
|
52
53
|
return self.base_expr.id == other.base_expr.id and self.method_name == other.method_name
|
|
53
54
|
|
|
54
55
|
def _id_attrs(self) -> list[tuple[str, Any]]:
|
|
55
|
-
return super()._id_attrs()
|
|
56
|
+
return [*super()._id_attrs(), ('method_name', self.method_name)]
|
|
56
57
|
|
|
57
|
-
def sql_expr(self, _: SqlElementCache) ->
|
|
58
|
+
def sql_expr(self, _: SqlElementCache) -> sql.ColumnElement | None:
|
|
58
59
|
return None
|
|
59
60
|
|
|
60
61
|
def eval(self, data_row: DataRow, row_builder: RowBuilder) -> None:
|
|
61
|
-
|
|
62
|
+
raise AssertionError('MethodRef cannot be evaluated directly')
|
|
62
63
|
|
|
63
64
|
def __repr__(self) -> str:
|
|
64
65
|
return f'{self.base_expr}.{self.method_name}'
|
pixeltable/exprs/object_ref.py
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
from __future__ import annotations
|
|
2
2
|
|
|
3
|
-
from typing import
|
|
3
|
+
from typing import Any
|
|
4
4
|
|
|
5
5
|
import sqlalchemy as sql
|
|
6
6
|
|
|
@@ -8,7 +8,7 @@ import pixeltable.type_system as ts
|
|
|
8
8
|
|
|
9
9
|
from .data_row import DataRow
|
|
10
10
|
from .expr import Expr, ExprScope
|
|
11
|
-
from .json_mapper import
|
|
11
|
+
from .json_mapper import JsonMapperDispatch
|
|
12
12
|
from .row_builder import RowBuilder
|
|
13
13
|
from .sql_element_cache import SqlElementCache
|
|
14
14
|
|
|
@@ -18,26 +18,37 @@ class ObjectRef(Expr):
|
|
|
18
18
|
Reference to an intermediate result, such as the "scope variable" produced by a JsonMapper.
|
|
19
19
|
The object is generated/materialized elsewhere and establishes a new scope.
|
|
20
20
|
"""
|
|
21
|
-
|
|
21
|
+
|
|
22
|
+
def __init__(self, scope: ExprScope, owner: JsonMapperDispatch):
|
|
22
23
|
# TODO: do we need an Unknown type after all?
|
|
23
24
|
super().__init__(ts.JsonType()) # JsonType: this could be anything
|
|
24
25
|
self._scope = scope
|
|
25
26
|
self.owner = owner
|
|
26
27
|
self.id = self._create_id()
|
|
27
28
|
|
|
29
|
+
def _id_attrs(self) -> list[tuple[str, Any]]:
|
|
30
|
+
# We have no components, so we can't rely on the default behavior here (otherwise, all ObjectRef
|
|
31
|
+
# instances will be conflated into a single slot).
|
|
32
|
+
return [('addr', id(self))]
|
|
33
|
+
|
|
34
|
+
def substitute(self, subs: dict[Expr, Expr]) -> Expr:
|
|
35
|
+
# Just return self; we need to avoid creating a new id after doing the substitution, because otherwise
|
|
36
|
+
# we'll wind up in a situation where the scope_anchor of the enclosing JsonMapper is different from the
|
|
37
|
+
# nested ObjectRefs inside its target_expr (and therefore occupies a different slot_idx).
|
|
38
|
+
return self
|
|
39
|
+
|
|
28
40
|
def scope(self) -> ExprScope:
|
|
29
41
|
return self._scope
|
|
30
42
|
|
|
31
|
-
def __str__(self) -> str:
|
|
32
|
-
assert False
|
|
33
|
-
|
|
34
43
|
def _equals(self, other: ObjectRef) -> bool:
|
|
35
|
-
return self.
|
|
44
|
+
return self.id == other.id
|
|
36
45
|
|
|
37
|
-
def sql_expr(self, _: SqlElementCache) ->
|
|
46
|
+
def sql_expr(self, _: SqlElementCache) -> sql.ColumnElement | None:
|
|
38
47
|
return None
|
|
39
48
|
|
|
40
49
|
def eval(self, data_row: DataRow, row_builder: RowBuilder) -> None:
|
|
41
50
|
# this will be called, but the value has already been materialized elsewhere
|
|
42
51
|
pass
|
|
43
52
|
|
|
53
|
+
def __repr__(self) -> str:
|
|
54
|
+
return f'ObjectRef({self.owner}, {self.id}, {self.owner.id})'
|