pixeltable 0.2.21__py3-none-any.whl → 0.2.22__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of pixeltable might be problematic. Click here for more details.
- pixeltable/__version__.py +2 -2
- pixeltable/catalog/__init__.py +1 -1
- pixeltable/catalog/column.py +37 -11
- pixeltable/catalog/globals.py +18 -0
- pixeltable/catalog/insertable_table.py +6 -4
- pixeltable/catalog/table.py +19 -3
- pixeltable/catalog/table_version.py +34 -14
- pixeltable/catalog/view.py +16 -17
- pixeltable/dataframe.py +7 -8
- pixeltable/env.py +5 -0
- pixeltable/exec/__init__.py +0 -1
- pixeltable/exec/aggregation_node.py +6 -3
- pixeltable/exec/cache_prefetch_node.py +1 -1
- pixeltable/exec/data_row_batch.py +2 -19
- pixeltable/exec/exec_node.py +2 -1
- pixeltable/exec/expr_eval_node.py +17 -10
- pixeltable/exec/in_memory_data_node.py +6 -3
- pixeltable/exec/sql_node.py +24 -25
- pixeltable/exprs/arithmetic_expr.py +3 -1
- pixeltable/exprs/array_slice.py +7 -7
- pixeltable/exprs/column_property_ref.py +37 -10
- pixeltable/exprs/column_ref.py +93 -14
- pixeltable/exprs/comparison.py +5 -5
- pixeltable/exprs/compound_predicate.py +8 -7
- pixeltable/exprs/data_row.py +27 -18
- pixeltable/exprs/expr.py +53 -52
- pixeltable/exprs/expr_set.py +5 -0
- pixeltable/exprs/function_call.py +32 -16
- pixeltable/exprs/globals.py +4 -1
- pixeltable/exprs/in_predicate.py +8 -7
- pixeltable/exprs/inline_expr.py +4 -4
- pixeltable/exprs/is_null.py +4 -4
- pixeltable/exprs/json_mapper.py +11 -12
- pixeltable/exprs/json_path.py +5 -10
- pixeltable/exprs/literal.py +5 -5
- pixeltable/exprs/method_ref.py +5 -4
- pixeltable/exprs/object_ref.py +2 -1
- pixeltable/exprs/row_builder.py +88 -36
- pixeltable/exprs/rowid_ref.py +12 -11
- pixeltable/exprs/similarity_expr.py +12 -7
- pixeltable/exprs/sql_element_cache.py +7 -5
- pixeltable/exprs/type_cast.py +8 -6
- pixeltable/exprs/variable.py +5 -4
- pixeltable/func/aggregate_function.py +1 -1
- pixeltable/func/function.py +11 -10
- pixeltable/functions/__init__.py +2 -2
- pixeltable/functions/globals.py +5 -7
- pixeltable/functions/huggingface.py +19 -20
- pixeltable/functions/llama_cpp.py +106 -0
- pixeltable/functions/ollama.py +147 -0
- pixeltable/functions/replicate.py +72 -0
- pixeltable/functions/string.py +9 -0
- pixeltable/globals.py +12 -20
- pixeltable/index/btree.py +16 -3
- pixeltable/index/embedding_index.py +4 -4
- pixeltable/io/__init__.py +1 -2
- pixeltable/io/fiftyone.py +178 -0
- pixeltable/io/globals.py +96 -2
- pixeltable/iterators/base.py +3 -2
- pixeltable/iterators/document.py +1 -1
- pixeltable/iterators/video.py +120 -63
- pixeltable/metadata/__init__.py +1 -1
- pixeltable/metadata/converters/convert_21.py +34 -0
- pixeltable/metadata/converters/util.py +45 -4
- pixeltable/metadata/notes.py +1 -0
- pixeltable/metadata/schema.py +8 -0
- pixeltable/plan.py +16 -14
- pixeltable/py.typed +0 -0
- pixeltable/store.py +7 -2
- pixeltable/tool/create_test_video.py +1 -1
- pixeltable/tool/embed_udf.py +1 -1
- pixeltable/tool/mypy_plugin.py +28 -5
- pixeltable/type_system.py +17 -1
- pixeltable/utils/documents.py +15 -1
- pixeltable/utils/formatter.py +9 -10
- {pixeltable-0.2.21.dist-info → pixeltable-0.2.22.dist-info}/METADATA +46 -10
- pixeltable-0.2.22.dist-info/RECORD +153 -0
- pixeltable/exec/media_validation_node.py +0 -43
- pixeltable-0.2.21.dist-info/RECORD +0 -148
- {pixeltable-0.2.21.dist-info → pixeltable-0.2.22.dist-info}/LICENSE +0 -0
- {pixeltable-0.2.21.dist-info → pixeltable-0.2.22.dist-info}/WHEEL +0 -0
- {pixeltable-0.2.21.dist-info → pixeltable-0.2.22.dist-info}/entry_points.txt +0 -0
pixeltable/exprs/inline_expr.py
CHANGED
|
@@ -73,7 +73,7 @@ class InlineArray(Expr):
|
|
|
73
73
|
return super()._as_dict()
|
|
74
74
|
|
|
75
75
|
@classmethod
|
|
76
|
-
def _from_dict(cls, _: dict, components: list[Expr]) ->
|
|
76
|
+
def _from_dict(cls, _: dict, components: list[Expr]) -> InlineArray:
|
|
77
77
|
try:
|
|
78
78
|
return cls(components)
|
|
79
79
|
except excs.Error:
|
|
@@ -81,7 +81,7 @@ class InlineArray(Expr):
|
|
|
81
81
|
# This is because in schema versions <= 19, `InlineArray` was serialized incorrectly, and
|
|
82
82
|
# there is no way to determine the correct expression type until the subexpressions are
|
|
83
83
|
# loaded and their types are known.
|
|
84
|
-
return InlineList(components)
|
|
84
|
+
return InlineList(components) # type: ignore[return-value]
|
|
85
85
|
|
|
86
86
|
|
|
87
87
|
class InlineList(Expr):
|
|
@@ -122,7 +122,7 @@ class InlineList(Expr):
|
|
|
122
122
|
return super()._as_dict()
|
|
123
123
|
|
|
124
124
|
@classmethod
|
|
125
|
-
def _from_dict(cls, _: dict, components: list[Expr]) ->
|
|
125
|
+
def _from_dict(cls, _: dict, components: list[Expr]) -> InlineList:
|
|
126
126
|
return cls(components)
|
|
127
127
|
|
|
128
128
|
|
|
@@ -193,7 +193,7 @@ class InlineDict(Expr):
|
|
|
193
193
|
return {'keys': self.keys, **super()._as_dict()}
|
|
194
194
|
|
|
195
195
|
@classmethod
|
|
196
|
-
def _from_dict(cls, d: dict, components: list[Expr]) ->
|
|
196
|
+
def _from_dict(cls, d: dict, components: list[Expr]) -> InlineDict:
|
|
197
197
|
assert 'keys' in d
|
|
198
198
|
assert len(d['keys']) == len(components)
|
|
199
199
|
arg = dict(zip(d['keys'], components))
|
pixeltable/exprs/is_null.py
CHANGED
|
@@ -1,10 +1,11 @@
|
|
|
1
1
|
from __future__ import annotations
|
|
2
2
|
|
|
3
|
-
from typing import Optional
|
|
3
|
+
from typing import Optional
|
|
4
4
|
|
|
5
5
|
import sqlalchemy as sql
|
|
6
6
|
|
|
7
7
|
import pixeltable.type_system as ts
|
|
8
|
+
|
|
8
9
|
from .data_row import DataRow
|
|
9
10
|
from .expr import Expr
|
|
10
11
|
from .row_builder import RowBuilder
|
|
@@ -23,7 +24,7 @@ class IsNull(Expr):
|
|
|
23
24
|
def _equals(self, other: IsNull) -> bool:
|
|
24
25
|
return True
|
|
25
26
|
|
|
26
|
-
def sql_expr(self, sql_elements: SqlElementCache) -> Optional[sql.
|
|
27
|
+
def sql_expr(self, sql_elements: SqlElementCache) -> Optional[sql.ColumnElement]:
|
|
27
28
|
e = sql_elements.get(self.components[0])
|
|
28
29
|
if e is None:
|
|
29
30
|
return None
|
|
@@ -33,7 +34,6 @@ class IsNull(Expr):
|
|
|
33
34
|
data_row[self.slot_idx] = data_row[self.components[0].slot_idx] is None
|
|
34
35
|
|
|
35
36
|
@classmethod
|
|
36
|
-
def _from_dict(cls, d:
|
|
37
|
+
def _from_dict(cls, d: dict, components: list[Expr]) -> IsNull:
|
|
37
38
|
assert len(components) == 1
|
|
38
39
|
return cls(components[0])
|
|
39
|
-
|
pixeltable/exprs/json_mapper.py
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
from __future__ import annotations
|
|
2
2
|
|
|
3
|
-
from typing import Optional
|
|
3
|
+
from typing import Optional
|
|
4
4
|
|
|
5
5
|
import sqlalchemy as sql
|
|
6
6
|
|
|
@@ -18,7 +18,7 @@ class JsonMapper(Expr):
|
|
|
18
18
|
is populated by JsonMapper.eval(). The JsonMapper effectively creates a new scope for its target expr.
|
|
19
19
|
"""
|
|
20
20
|
def __init__(self, src_expr: Expr, target_expr: Expr):
|
|
21
|
-
# TODO: type spec should be
|
|
21
|
+
# TODO: type spec should be list[target_expr.col_type]
|
|
22
22
|
super().__init__(ts.JsonType())
|
|
23
23
|
|
|
24
24
|
# we're creating a new scope, but we don't know yet whether this is nested within another JsonMapper;
|
|
@@ -32,7 +32,7 @@ class JsonMapper(Expr):
|
|
|
32
32
|
self.target_expr_eval_ctx: Optional[RowBuilder.EvalCtx] = None
|
|
33
33
|
self.id = self._create_id()
|
|
34
34
|
|
|
35
|
-
def bind_rel_paths(self, mapper: Optional[JsonMapper]) -> None:
|
|
35
|
+
def bind_rel_paths(self, mapper: Optional[JsonMapper] = None) -> None:
|
|
36
36
|
self._src_expr.bind_rel_paths(mapper)
|
|
37
37
|
self._target_expr.bind_rel_paths(self)
|
|
38
38
|
self.parent_mapper = mapper
|
|
@@ -43,12 +43,12 @@ class JsonMapper(Expr):
|
|
|
43
43
|
# need to ignore target_expr
|
|
44
44
|
return self._src_expr.scope()
|
|
45
45
|
|
|
46
|
-
def dependencies(self) ->
|
|
46
|
+
def dependencies(self) -> list[Expr]:
|
|
47
47
|
result = [self._src_expr]
|
|
48
48
|
result.extend(self._target_dependencies(self._target_expr))
|
|
49
49
|
return result
|
|
50
50
|
|
|
51
|
-
def _target_dependencies(self, e: Expr) ->
|
|
51
|
+
def _target_dependencies(self, e: Expr) -> list[Expr]:
|
|
52
52
|
"""
|
|
53
53
|
Return all subexprs of e of which the scope isn't contained in target_expr_scope.
|
|
54
54
|
Those need to be evaluated before us.
|
|
@@ -56,7 +56,7 @@ class JsonMapper(Expr):
|
|
|
56
56
|
expr_scope = e.scope()
|
|
57
57
|
if not expr_scope.is_contained_in(self.target_expr_scope):
|
|
58
58
|
return [e]
|
|
59
|
-
result:
|
|
59
|
+
result: list[Expr] = []
|
|
60
60
|
for c in e.components:
|
|
61
61
|
result.extend(self._target_dependencies(c))
|
|
62
62
|
return result
|
|
@@ -84,10 +84,10 @@ class JsonMapper(Expr):
|
|
|
84
84
|
def scope_anchor(self) -> Expr:
|
|
85
85
|
return self.components[2]
|
|
86
86
|
|
|
87
|
-
def _equals(self,
|
|
87
|
+
def _equals(self, _: JsonMapper) -> bool:
|
|
88
88
|
return True
|
|
89
89
|
|
|
90
|
-
def sql_expr(self, _: SqlElementCache) -> Optional[sql.
|
|
90
|
+
def sql_expr(self, _: SqlElementCache) -> Optional[sql.ColumnElement]:
|
|
91
91
|
return None
|
|
92
92
|
|
|
93
93
|
def eval(self, data_row: DataRow, row_builder: RowBuilder) -> None:
|
|
@@ -104,19 +104,18 @@ class JsonMapper(Expr):
|
|
|
104
104
|
for i, val in enumerate(src):
|
|
105
105
|
data_row[self.scope_anchor.slot_idx] = val
|
|
106
106
|
# stored target_expr
|
|
107
|
-
|
|
108
|
-
assert exc_tb is None
|
|
107
|
+
row_builder.eval(data_row, self.target_expr_eval_ctx)
|
|
109
108
|
result[i] = data_row[self._target_expr.slot_idx]
|
|
110
109
|
data_row[self.slot_idx] = result
|
|
111
110
|
|
|
112
|
-
def _as_dict(self) ->
|
|
111
|
+
def _as_dict(self) -> dict:
|
|
113
112
|
"""
|
|
114
113
|
We need to avoid serializing component[2], which is an ObjectRef.
|
|
115
114
|
"""
|
|
116
115
|
return {'components': [c.as_dict() for c in self.components[0:2]]}
|
|
117
116
|
|
|
118
117
|
@classmethod
|
|
119
|
-
def _from_dict(cls, d:
|
|
118
|
+
def _from_dict(cls, d: dict, components: list[Expr]) -> JsonMapper:
|
|
120
119
|
assert len(components) == 2
|
|
121
120
|
return cls(components[0], components[1])
|
|
122
121
|
|
pixeltable/exprs/json_path.py
CHANGED
|
@@ -5,28 +5,23 @@ from typing import Any, Optional, Union
|
|
|
5
5
|
import jmespath
|
|
6
6
|
import sqlalchemy as sql
|
|
7
7
|
|
|
8
|
-
import pixeltable
|
|
8
|
+
import pixeltable as pxt
|
|
9
9
|
import pixeltable.catalog as catalog
|
|
10
10
|
import pixeltable.exceptions as excs
|
|
11
11
|
import pixeltable.type_system as ts
|
|
12
|
-
from .data_row import DataRow
|
|
13
|
-
from .expr import Expr
|
|
14
|
-
from .globals import print_slice
|
|
15
|
-
from .json_mapper import JsonMapper
|
|
16
|
-
from .row_builder import RowBuilder
|
|
17
|
-
from .sql_element_cache import SqlElementCache
|
|
18
12
|
|
|
19
13
|
from .data_row import DataRow
|
|
20
14
|
from .expr import Expr
|
|
21
15
|
from .globals import print_slice
|
|
22
16
|
from .json_mapper import JsonMapper
|
|
23
17
|
from .row_builder import RowBuilder
|
|
18
|
+
from .sql_element_cache import SqlElementCache
|
|
24
19
|
|
|
25
20
|
|
|
26
21
|
class JsonPath(Expr):
|
|
27
22
|
def __init__(
|
|
28
23
|
self,
|
|
29
|
-
anchor: Optional['
|
|
24
|
+
anchor: Optional['pxt.exprs.Expr'],
|
|
30
25
|
path_elements: Optional[list[Union[str, int, slice]]] = None,
|
|
31
26
|
scope_idx: int = 0
|
|
32
27
|
) -> None:
|
|
@@ -61,7 +56,7 @@ class JsonPath(Expr):
|
|
|
61
56
|
return {'path_elements': path_elements, 'scope_idx': self.scope_idx, **super()._as_dict()}
|
|
62
57
|
|
|
63
58
|
@classmethod
|
|
64
|
-
def _from_dict(cls, d: dict, components: list[Expr]) ->
|
|
59
|
+
def _from_dict(cls, d: dict, components: list[Expr]) -> JsonPath:
|
|
65
60
|
assert 'path_elements' in d
|
|
66
61
|
assert 'scope_idx' in d
|
|
67
62
|
assert len(components) <= 1
|
|
@@ -143,7 +138,7 @@ class JsonPath(Expr):
|
|
|
143
138
|
def _id_attrs(self) -> list[tuple[str, Any]]:
|
|
144
139
|
return super()._id_attrs() + [('path_elements', self.path_elements)]
|
|
145
140
|
|
|
146
|
-
def sql_expr(self, _: SqlElementCache) -> Optional[sql.
|
|
141
|
+
def sql_expr(self, _: SqlElementCache) -> Optional[sql.ColumnElement]:
|
|
147
142
|
"""
|
|
148
143
|
Postgres appears to have a bug: jsonb_path_query('{a: [{b: 0}, {b: 1}]}', '$.a.b') returns
|
|
149
144
|
*two* rows (each containing col val 0), not a single row with [0, 0].
|
pixeltable/exprs/literal.py
CHANGED
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
from __future__ import annotations
|
|
2
2
|
|
|
3
3
|
import datetime
|
|
4
|
-
from typing import Any,
|
|
4
|
+
from typing import Any, Optional
|
|
5
5
|
|
|
6
6
|
import sqlalchemy as sql
|
|
7
7
|
|
|
@@ -54,10 +54,10 @@ class Literal(Expr):
|
|
|
54
54
|
def _equals(self, other: Literal) -> bool:
|
|
55
55
|
return self.val == other.val
|
|
56
56
|
|
|
57
|
-
def _id_attrs(self) ->
|
|
57
|
+
def _id_attrs(self) -> list[tuple[str, Any]]:
|
|
58
58
|
return super()._id_attrs() + [('val', self.val)]
|
|
59
59
|
|
|
60
|
-
def sql_expr(self, _: SqlElementCache) -> Optional[sql.
|
|
60
|
+
def sql_expr(self, _: SqlElementCache) -> Optional[sql.ColumnElement]:
|
|
61
61
|
# we need to return something here so that we can generate a Where clause for predicates
|
|
62
62
|
# that involve literals (like Where c > 0)
|
|
63
63
|
return sql.sql.expression.literal(self.val)
|
|
@@ -66,7 +66,7 @@ class Literal(Expr):
|
|
|
66
66
|
# this will be called, even though sql_expr() does not return None
|
|
67
67
|
data_row[self.slot_idx] = self.val
|
|
68
68
|
|
|
69
|
-
def _as_dict(self) ->
|
|
69
|
+
def _as_dict(self) -> dict:
|
|
70
70
|
# For some types, we need to explictly record their type, because JSON does not know
|
|
71
71
|
# how to interpret them unambiguously
|
|
72
72
|
if self.col_type.is_timestamp_type():
|
|
@@ -80,7 +80,7 @@ class Literal(Expr):
|
|
|
80
80
|
return {'val': self.val, **super()._as_dict()}
|
|
81
81
|
|
|
82
82
|
@classmethod
|
|
83
|
-
def _from_dict(cls, d:
|
|
83
|
+
def _from_dict(cls, d: dict, components: list[Expr]) -> Literal:
|
|
84
84
|
assert 'val' in d
|
|
85
85
|
if 'val_t' in d:
|
|
86
86
|
val_t = d['val_t']
|
pixeltable/exprs/method_ref.py
CHANGED
|
@@ -5,6 +5,7 @@ import sqlalchemy as sql
|
|
|
5
5
|
import pixeltable.type_system as ts
|
|
6
6
|
from pixeltable.exprs import Expr, FunctionCall
|
|
7
7
|
from pixeltable.func import FunctionRegistry
|
|
8
|
+
|
|
8
9
|
from .data_row import DataRow
|
|
9
10
|
from .row_builder import RowBuilder
|
|
10
11
|
from .sql_element_cache import SqlElementCache
|
|
@@ -36,10 +37,10 @@ class MethodRef(Expr):
|
|
|
36
37
|
return {'method_name': self.method_name, **super()._as_dict()}
|
|
37
38
|
|
|
38
39
|
@classmethod
|
|
39
|
-
def _from_dict(cls, d: dict, components: list[Expr]) ->
|
|
40
|
+
def _from_dict(cls, d: dict, components: list[Expr]) -> 'MethodRef':
|
|
40
41
|
assert 'method_name' in d
|
|
41
42
|
assert len(components) == 1
|
|
42
|
-
return cls(d['method_name']
|
|
43
|
+
return cls(components[0], d['method_name'])
|
|
43
44
|
|
|
44
45
|
def __call__(self, *args, **kwargs) -> FunctionCall:
|
|
45
46
|
result = self.fn(*[self.base_expr, *args], **kwargs)
|
|
@@ -48,12 +49,12 @@ class MethodRef(Expr):
|
|
|
48
49
|
return result
|
|
49
50
|
|
|
50
51
|
def _equals(self, other: 'MethodRef') -> bool:
|
|
51
|
-
return self.base_expr == other.base_expr and self.method_name == other.method_name
|
|
52
|
+
return self.base_expr.id == other.base_expr.id and self.method_name == other.method_name
|
|
52
53
|
|
|
53
54
|
def _id_attrs(self) -> list[tuple[str, Any]]:
|
|
54
55
|
return super()._id_attrs() + [('method_name', self.method_name)]
|
|
55
56
|
|
|
56
|
-
def sql_expr(self, _: SqlElementCache) -> Optional[sql.
|
|
57
|
+
def sql_expr(self, _: SqlElementCache) -> Optional[sql.ColumnElement]:
|
|
57
58
|
return None
|
|
58
59
|
|
|
59
60
|
def eval(self, data_row: DataRow, row_builder: RowBuilder) -> None:
|
pixeltable/exprs/object_ref.py
CHANGED
|
@@ -5,6 +5,7 @@ from typing import Optional
|
|
|
5
5
|
import sqlalchemy as sql
|
|
6
6
|
|
|
7
7
|
import pixeltable.type_system as ts
|
|
8
|
+
|
|
8
9
|
from .data_row import DataRow
|
|
9
10
|
from .expr import Expr, ExprScope
|
|
10
11
|
from .json_mapper import JsonMapper
|
|
@@ -33,7 +34,7 @@ class ObjectRef(Expr):
|
|
|
33
34
|
def _equals(self, other: ObjectRef) -> bool:
|
|
34
35
|
return self.owner is other.owner
|
|
35
36
|
|
|
36
|
-
def sql_expr(self, _: SqlElementCache) -> Optional[sql.
|
|
37
|
+
def sql_expr(self, _: SqlElementCache) -> Optional[sql.ColumnElement]:
|
|
37
38
|
return None
|
|
38
39
|
|
|
39
40
|
def eval(self, data_row: DataRow, row_builder: RowBuilder) -> None:
|
pixeltable/exprs/row_builder.py
CHANGED
|
@@ -3,7 +3,8 @@ from __future__ import annotations
|
|
|
3
3
|
import sys
|
|
4
4
|
import time
|
|
5
5
|
from dataclasses import dataclass
|
|
6
|
-
from typing import Any,
|
|
6
|
+
from typing import Any, Iterable, Optional, Sequence
|
|
7
|
+
from uuid import UUID
|
|
7
8
|
|
|
8
9
|
import sqlalchemy as sql
|
|
9
10
|
|
|
@@ -11,7 +12,6 @@ import pixeltable.catalog as catalog
|
|
|
11
12
|
import pixeltable.exceptions as excs
|
|
12
13
|
import pixeltable.func as func
|
|
13
14
|
import pixeltable.utils as utils
|
|
14
|
-
|
|
15
15
|
from .data_row import DataRow
|
|
16
16
|
from .expr import Expr
|
|
17
17
|
from .expr_set import ExprSet
|
|
@@ -48,14 +48,37 @@ class RowBuilder:
|
|
|
48
48
|
For ColumnRefs to unstored iterator columns:
|
|
49
49
|
- in order for them to be executable, we also record the iterator args and pass them to the ColumnRef
|
|
50
50
|
"""
|
|
51
|
+
unique_exprs: ExprSet
|
|
52
|
+
next_slot_idx: int
|
|
53
|
+
input_expr_slot_idxs: set[int]
|
|
54
|
+
|
|
55
|
+
# output exprs: all exprs the caller wants to materialize
|
|
56
|
+
# - explicitly requested output_exprs
|
|
57
|
+
# - values for computed columns
|
|
58
|
+
output_exprs: ExprSet
|
|
59
|
+
|
|
60
|
+
input_exprs: ExprSet
|
|
61
|
+
|
|
62
|
+
table_columns: list[ColumnSlotIdx]
|
|
63
|
+
default_eval_ctx: EvalCtx
|
|
64
|
+
unstored_iter_args: dict[UUID, Expr]
|
|
65
|
+
|
|
66
|
+
# transitive dependents for the purpose of exception propagation: an exception for slot i is propagated to
|
|
67
|
+
# _exc_dependents[i]
|
|
68
|
+
# (list of set of slot_idxs, indexed by slot_idx)
|
|
69
|
+
_exc_dependents: list[set[int]]
|
|
70
|
+
|
|
71
|
+
# records the output_expr that a subexpr belongs to
|
|
72
|
+
# (a subexpr can be shared across multiple output exprs)
|
|
73
|
+
output_expr_ids: list[set[int]]
|
|
51
74
|
|
|
52
75
|
@dataclass
|
|
53
76
|
class EvalCtx:
|
|
54
77
|
"""Context for evaluating a set of target exprs"""
|
|
55
|
-
slot_idxs:
|
|
56
|
-
exprs:
|
|
57
|
-
target_slot_idxs:
|
|
58
|
-
target_exprs:
|
|
78
|
+
slot_idxs: list[int] # slot idxs of exprs needed to evaluate target exprs; does not contain duplicates
|
|
79
|
+
exprs: list[Expr] # exprs corresponding to slot_idxs
|
|
80
|
+
target_slot_idxs: list[int] # slot idxs of target exprs; might contain duplicates
|
|
81
|
+
target_exprs: list[Expr] # exprs corresponding to target_slot_idxs
|
|
59
82
|
|
|
60
83
|
def __init__(
|
|
61
84
|
self, output_exprs: Sequence[Expr], columns: Sequence[catalog.Column], input_exprs: Iterable[Expr]
|
|
@@ -67,35 +90,56 @@ class RowBuilder:
|
|
|
67
90
|
input_exprs: list of Exprs that are excluded from evaluation (because they're already materialized)
|
|
68
91
|
TODO: enforce that output_exprs doesn't overlap with input_exprs?
|
|
69
92
|
"""
|
|
70
|
-
self.unique_exprs = ExprSet() # dependencies precede their dependents
|
|
93
|
+
self.unique_exprs: ExprSet[Expr] = ExprSet() # dependencies precede their dependents
|
|
71
94
|
self.next_slot_idx = 0
|
|
72
95
|
|
|
73
96
|
# record input and output exprs; make copies to avoid reusing execution state
|
|
74
97
|
unique_input_exprs = [self._record_unique_expr(e.copy(), recursive=False) for e in input_exprs]
|
|
75
98
|
self.input_expr_slot_idxs = {e.slot_idx for e in unique_input_exprs}
|
|
76
99
|
|
|
77
|
-
# output exprs: all exprs the caller wants to materialize
|
|
78
|
-
# - explicitly requested output_exprs
|
|
79
|
-
# - values for computed columns
|
|
80
100
|
resolve_cols = set(columns)
|
|
81
101
|
self.output_exprs = ExprSet([
|
|
82
102
|
self._record_unique_expr(e.copy().resolve_computed_cols(resolve_cols=resolve_cols), recursive=True)
|
|
83
103
|
for e in output_exprs
|
|
84
104
|
])
|
|
85
105
|
|
|
86
|
-
#
|
|
106
|
+
# if init(columns):
|
|
107
|
+
# - we are creating table rows and need to record columns for create_table_row()
|
|
108
|
+
# - output_exprs materialize those columns
|
|
109
|
+
# - input_exprs are ColumnRefs of the non-computed columns (ie, what needs to be provided as input)
|
|
110
|
+
# - media validation:
|
|
111
|
+
# * for write-validated columns, we need to create validating ColumnRefs
|
|
112
|
+
# * further references to that column (eg, computed cols) need to resolve to the validating ColumnRef
|
|
87
113
|
from .column_ref import ColumnRef
|
|
88
|
-
self.table_columns:
|
|
114
|
+
self.table_columns: list[ColumnSlotIdx] = []
|
|
115
|
+
self.input_exprs = ExprSet()
|
|
116
|
+
validating_colrefs: dict[Expr, Expr] = {} # key: non-validating colref, value: corresp. validating colref
|
|
89
117
|
for col in columns:
|
|
118
|
+
expr: Expr
|
|
90
119
|
if col.is_computed:
|
|
91
120
|
assert col.value_expr is not None
|
|
92
121
|
# create a copy here so we don't reuse execution state and resolve references to computed columns
|
|
93
122
|
expr = col.value_expr.copy().resolve_computed_cols(resolve_cols=resolve_cols)
|
|
123
|
+
expr = expr.substitute(validating_colrefs)
|
|
94
124
|
expr = self._record_unique_expr(expr, recursive=True)
|
|
95
125
|
else:
|
|
96
126
|
# record a ColumnRef so that references to this column resolve to the same slot idx
|
|
97
|
-
|
|
98
|
-
|
|
127
|
+
perform_validation = (
|
|
128
|
+
None if not col.col_type.is_media_type()
|
|
129
|
+
else col.media_validation == catalog.MediaValidation.ON_WRITE
|
|
130
|
+
)
|
|
131
|
+
expr = ColumnRef(col, perform_validation=perform_validation)
|
|
132
|
+
# recursive=True: needed for validating ColumnRef
|
|
133
|
+
expr = self._record_unique_expr(expr, recursive=True)
|
|
134
|
+
|
|
135
|
+
if perform_validation:
|
|
136
|
+
# if expr is a validating ColumnRef, the input is the non-validating ColumnRef
|
|
137
|
+
non_validating_colref = expr.components[0]
|
|
138
|
+
self.input_exprs.add(non_validating_colref)
|
|
139
|
+
validating_colrefs[non_validating_colref] = expr
|
|
140
|
+
else:
|
|
141
|
+
self.input_exprs.add(expr)
|
|
142
|
+
|
|
99
143
|
self.add_table_column(col, expr.slot_idx)
|
|
100
144
|
self.output_exprs.add(expr)
|
|
101
145
|
|
|
@@ -118,8 +162,9 @@ class RowBuilder:
|
|
|
118
162
|
unstored_iter_col_refs = [col_ref for col_ref in col_refs if refs_unstored_iter_col(col_ref)]
|
|
119
163
|
component_views = [col_ref.col.tbl for col_ref in unstored_iter_col_refs]
|
|
120
164
|
unstored_iter_args = {view.id: view.iterator_args.copy() for view in component_views}
|
|
121
|
-
self.unstored_iter_args =
|
|
122
|
-
|
|
165
|
+
self.unstored_iter_args = {
|
|
166
|
+
id: self._record_unique_expr(arg, recursive=True) for id, arg in unstored_iter_args.items()
|
|
167
|
+
}
|
|
123
168
|
|
|
124
169
|
for col_ref in unstored_iter_col_refs:
|
|
125
170
|
iter_arg_ctx = self.create_eval_ctx([unstored_iter_args[col_ref.col.tbl.id]])
|
|
@@ -129,25 +174,28 @@ class RowBuilder:
|
|
|
129
174
|
for i, expr in enumerate(self.unique_exprs):
|
|
130
175
|
assert expr.slot_idx == i
|
|
131
176
|
|
|
132
|
-
#
|
|
133
|
-
|
|
177
|
+
# determine transitive dependencies for the purpose of exception propagation
|
|
178
|
+
# (list of set of slot_idxs, indexed by slot_idx)
|
|
179
|
+
exc_dependencies: list[set[int]] = [set() for _ in range(self.num_materialized)]
|
|
180
|
+
from .column_property_ref import ColumnPropertyRef
|
|
134
181
|
for expr in self.unique_exprs:
|
|
135
182
|
if expr.slot_idx in self.input_expr_slot_idxs:
|
|
136
183
|
# this is input and therefore doesn't depend on other exprs
|
|
137
184
|
continue
|
|
185
|
+
# error properties don't have exceptions themselves
|
|
186
|
+
if isinstance(expr, ColumnPropertyRef) and expr.is_error_prop():
|
|
187
|
+
continue
|
|
138
188
|
for d in expr.dependencies():
|
|
139
|
-
|
|
140
|
-
|
|
189
|
+
exc_dependencies[expr.slot_idx].add(d.slot_idx)
|
|
190
|
+
exc_dependencies[expr.slot_idx].update(exc_dependencies[d.slot_idx])
|
|
141
191
|
|
|
142
|
-
|
|
143
|
-
self.dependents: List[Set[int]] = [set() for _ in range(self.num_materialized)]
|
|
192
|
+
self._exc_dependents = [set() for _ in range(self.num_materialized)]
|
|
144
193
|
for expr in self.unique_exprs:
|
|
145
|
-
|
|
146
|
-
|
|
194
|
+
assert expr.slot_idx is not None
|
|
195
|
+
for d_idx in exc_dependencies[expr.slot_idx]:
|
|
196
|
+
self._exc_dependents[d_idx].add(expr.slot_idx)
|
|
147
197
|
|
|
148
|
-
|
|
149
|
-
# (a subexpr can be shared across multiple output exprs)
|
|
150
|
-
self.output_expr_ids: List[Set[int]] = [set() for _ in range(self.num_materialized)]
|
|
198
|
+
self.output_expr_ids = [set() for _ in range(self.num_materialized)]
|
|
151
199
|
for e in self.output_exprs:
|
|
152
200
|
self._record_output_expr_id(e, e.slot_idx)
|
|
153
201
|
|
|
@@ -155,7 +203,7 @@ class RowBuilder:
|
|
|
155
203
|
"""Record a column that is part of the table row"""
|
|
156
204
|
self.table_columns.append(ColumnSlotIdx(col, slot_idx))
|
|
157
205
|
|
|
158
|
-
def output_slot_idxs(self) ->
|
|
206
|
+
def output_slot_idxs(self) -> list[ColumnSlotIdx]:
|
|
159
207
|
"""Return ColumnSlotIdx for output columns"""
|
|
160
208
|
return self.table_columns
|
|
161
209
|
|
|
@@ -206,9 +254,9 @@ class RowBuilder:
|
|
|
206
254
|
for d in e.dependencies():
|
|
207
255
|
self._record_output_expr_id(d, output_expr_id)
|
|
208
256
|
|
|
209
|
-
def _compute_dependencies(self, target_slot_idxs:
|
|
257
|
+
def _compute_dependencies(self, target_slot_idxs: list[int], excluded_slot_idxs: list[int]) -> list[int]:
|
|
210
258
|
"""Compute exprs needed to materialize the given target slots, excluding 'excluded_slot_idxs'"""
|
|
211
|
-
dependencies = [set() for _ in range(self.num_materialized)] # indexed by slot_idx
|
|
259
|
+
dependencies: list[set[int]] = [set() for _ in range(self.num_materialized)] # indexed by slot_idx
|
|
212
260
|
# doing this front-to-back ensures that we capture transitive dependencies
|
|
213
261
|
max_target_slot_idx = max(target_slot_idxs)
|
|
214
262
|
for expr in self.unique_exprs:
|
|
@@ -237,6 +285,8 @@ class RowBuilder:
|
|
|
237
285
|
for e in expr_list:
|
|
238
286
|
self.__set_slot_idxs_aux(e)
|
|
239
287
|
if remove_duplicates:
|
|
288
|
+
# only allowed if `expr_list` is a mutable list
|
|
289
|
+
assert isinstance(expr_list, list)
|
|
240
290
|
deduped = list(ExprSet(expr_list))
|
|
241
291
|
expr_list[:] = deduped
|
|
242
292
|
|
|
@@ -248,13 +298,14 @@ class RowBuilder:
|
|
|
248
298
|
for c in e.components:
|
|
249
299
|
self.__set_slot_idxs_aux(c)
|
|
250
300
|
|
|
251
|
-
def get_dependencies(self, targets:
|
|
301
|
+
def get_dependencies(self, targets: Iterable[Expr], exclude: Optional[Iterable[Expr]] = None) -> list[Expr]:
|
|
252
302
|
"""
|
|
253
303
|
Return list of dependencies needed to evaluate the given target exprs (expressed as slot idxs).
|
|
254
304
|
The exprs given in 'exclude' are excluded.
|
|
255
305
|
Returns:
|
|
256
306
|
list of Exprs from unique_exprs (= with slot_idx set)
|
|
257
307
|
"""
|
|
308
|
+
targets = list(targets)
|
|
258
309
|
if exclude is None:
|
|
259
310
|
exclude = []
|
|
260
311
|
if len(targets) == 0:
|
|
@@ -270,8 +321,9 @@ class RowBuilder:
|
|
|
270
321
|
result_ids.sort()
|
|
271
322
|
return [self.unique_exprs[id] for id in result_ids]
|
|
272
323
|
|
|
273
|
-
def create_eval_ctx(self, targets:
|
|
324
|
+
def create_eval_ctx(self, targets: Iterable[Expr], exclude: Optional[Iterable[Expr]] = None) -> EvalCtx:
|
|
274
325
|
"""Return EvalCtx for targets"""
|
|
326
|
+
targets = list(targets)
|
|
275
327
|
if exclude is None:
|
|
276
328
|
exclude = []
|
|
277
329
|
if len(targets) == 0:
|
|
@@ -287,7 +339,7 @@ class RowBuilder:
|
|
|
287
339
|
def set_exc(self, data_row: DataRow, slot_idx: int, exc: Exception) -> None:
|
|
288
340
|
"""Record an exception in data_row and propagate it to dependents"""
|
|
289
341
|
data_row.set_exc(slot_idx, exc)
|
|
290
|
-
for slot_idx in self.
|
|
342
|
+
for slot_idx in self._exc_dependents[slot_idx]:
|
|
291
343
|
data_row.set_exc(slot_idx, exc)
|
|
292
344
|
|
|
293
345
|
def eval(
|
|
@@ -318,14 +370,14 @@ class RowBuilder:
|
|
|
318
370
|
raise excs.ExprEvalError(
|
|
319
371
|
expr, f'expression {expr}', data_row.get_exc(expr.slot_idx), exc_tb, input_vals, 0)
|
|
320
372
|
|
|
321
|
-
def create_table_row(self, data_row: DataRow, exc_col_ids:
|
|
373
|
+
def create_table_row(self, data_row: DataRow, exc_col_ids: set[int]) -> tuple[dict[str, Any], int]:
|
|
322
374
|
"""Create a table row from the slots that have an output column assigned
|
|
323
375
|
|
|
324
|
-
Return
|
|
376
|
+
Return tuple[dict that represents a stored row (can be passed to sql.insert()), # of exceptions]
|
|
325
377
|
This excludes system columns.
|
|
326
378
|
"""
|
|
327
379
|
num_excs = 0
|
|
328
|
-
table_row:
|
|
380
|
+
table_row: dict[str, Any] = {}
|
|
329
381
|
for info in self.table_columns:
|
|
330
382
|
col, slot_idx = info.col, info.slot_idx
|
|
331
383
|
if data_row.has_exc(slot_idx):
|
pixeltable/exprs/rowid_ref.py
CHANGED
|
@@ -1,15 +1,17 @@
|
|
|
1
1
|
from __future__ import annotations
|
|
2
|
-
|
|
3
|
-
from
|
|
2
|
+
|
|
3
|
+
from typing import Any, Optional
|
|
4
4
|
from uuid import UUID
|
|
5
5
|
|
|
6
6
|
import sqlalchemy as sql
|
|
7
7
|
|
|
8
|
-
|
|
8
|
+
import pixeltable.catalog as catalog
|
|
9
|
+
import pixeltable.type_system as ts
|
|
10
|
+
|
|
9
11
|
from .data_row import DataRow
|
|
12
|
+
from .expr import Expr
|
|
10
13
|
from .row_builder import RowBuilder
|
|
11
|
-
|
|
12
|
-
import pixeltable.catalog as catalog
|
|
14
|
+
from .sql_element_cache import SqlElementCache
|
|
13
15
|
|
|
14
16
|
|
|
15
17
|
class RowidRef(Expr):
|
|
@@ -49,14 +51,14 @@ class RowidRef(Expr):
|
|
|
49
51
|
return self.normalized_base_id == other.normalized_base_id \
|
|
50
52
|
and self.rowid_component_idx == other.rowid_component_idx
|
|
51
53
|
|
|
52
|
-
def _id_attrs(self) ->
|
|
54
|
+
def _id_attrs(self) -> list[tuple[str, Any]]:
|
|
53
55
|
return super()._id_attrs() +\
|
|
54
56
|
[('normalized_base_id', self.normalized_base_id), ('idx', self.rowid_component_idx)]
|
|
55
57
|
|
|
56
58
|
def __str__(self) -> str:
|
|
57
59
|
# check if this is the pos column of a component view
|
|
58
60
|
tbl = self.tbl if self.tbl is not None else catalog.Catalog.get().tbl_versions[(self.tbl_id, None)]
|
|
59
|
-
if tbl.is_component_view() and self.rowid_component_idx == tbl.store_tbl.pos_col_idx:
|
|
61
|
+
if tbl.is_component_view() and self.rowid_component_idx == tbl.store_tbl.pos_col_idx: # type: ignore[attr-defined]
|
|
60
62
|
return catalog.globals._POS_COLUMN_NAME
|
|
61
63
|
return ''
|
|
62
64
|
|
|
@@ -73,7 +75,7 @@ class RowidRef(Expr):
|
|
|
73
75
|
self.tbl = tbl.tbl_version
|
|
74
76
|
self.tbl_id = self.tbl.id
|
|
75
77
|
|
|
76
|
-
def sql_expr(self, _: SqlElementCache) -> Optional[sql.
|
|
78
|
+
def sql_expr(self, _: SqlElementCache) -> Optional[sql.ColumnElement]:
|
|
77
79
|
tbl = self.tbl if self.tbl is not None else catalog.Catalog.get().tbl_versions[(self.tbl_id, None)]
|
|
78
80
|
rowid_cols = tbl.store_tbl.rowid_columns()
|
|
79
81
|
return rowid_cols[self.rowid_component_idx]
|
|
@@ -81,7 +83,7 @@ class RowidRef(Expr):
|
|
|
81
83
|
def eval(self, data_row: DataRow, row_builder: RowBuilder) -> None:
|
|
82
84
|
data_row[self.slot_idx] = data_row.pk[self.rowid_component_idx]
|
|
83
85
|
|
|
84
|
-
def _as_dict(self) ->
|
|
86
|
+
def _as_dict(self) -> dict:
|
|
85
87
|
return {
|
|
86
88
|
'tbl_id': str(self.tbl_id),
|
|
87
89
|
'normalized_base_id': str(self.normalized_base_id),
|
|
@@ -89,7 +91,6 @@ class RowidRef(Expr):
|
|
|
89
91
|
}
|
|
90
92
|
|
|
91
93
|
@classmethod
|
|
92
|
-
def _from_dict(cls, d:
|
|
94
|
+
def _from_dict(cls, d: dict, components: list[Expr]) -> RowidRef:
|
|
93
95
|
tbl_id, normalized_base_id, idx = UUID(d['tbl_id']), UUID(d['normalized_base_id']), d['idx']
|
|
94
96
|
return cls(tbl=None, idx=idx, tbl_id=tbl_id, normalized_base_id=normalized_base_id)
|
|
95
|
-
|