pixeltable 0.2.21__py3-none-any.whl → 0.2.23__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- pixeltable/__init__.py +2 -2
- pixeltable/__version__.py +2 -2
- pixeltable/catalog/__init__.py +1 -1
- pixeltable/catalog/column.py +41 -29
- pixeltable/catalog/globals.py +18 -0
- pixeltable/catalog/insertable_table.py +30 -10
- pixeltable/catalog/table.py +198 -86
- pixeltable/catalog/table_version.py +47 -53
- pixeltable/catalog/table_version_path.py +2 -2
- pixeltable/catalog/view.py +17 -18
- pixeltable/dataframe.py +27 -36
- pixeltable/env.py +7 -0
- pixeltable/exec/__init__.py +0 -1
- pixeltable/exec/aggregation_node.py +6 -3
- pixeltable/exec/cache_prefetch_node.py +189 -43
- pixeltable/exec/data_row_batch.py +5 -22
- pixeltable/exec/exec_context.py +2 -2
- pixeltable/exec/exec_node.py +3 -2
- pixeltable/exec/expr_eval_node.py +23 -16
- pixeltable/exec/in_memory_data_node.py +6 -3
- pixeltable/exec/sql_node.py +24 -25
- pixeltable/exprs/arithmetic_expr.py +12 -5
- pixeltable/exprs/array_slice.py +7 -7
- pixeltable/exprs/column_property_ref.py +37 -10
- pixeltable/exprs/column_ref.py +97 -14
- pixeltable/exprs/comparison.py +10 -5
- pixeltable/exprs/compound_predicate.py +8 -7
- pixeltable/exprs/data_row.py +27 -18
- pixeltable/exprs/expr.py +53 -52
- pixeltable/exprs/expr_set.py +5 -0
- pixeltable/exprs/function_call.py +32 -16
- pixeltable/exprs/globals.py +4 -1
- pixeltable/exprs/in_predicate.py +8 -7
- pixeltable/exprs/inline_expr.py +4 -4
- pixeltable/exprs/is_null.py +4 -4
- pixeltable/exprs/json_mapper.py +11 -12
- pixeltable/exprs/json_path.py +6 -11
- pixeltable/exprs/literal.py +5 -5
- pixeltable/exprs/method_ref.py +5 -4
- pixeltable/exprs/object_ref.py +2 -1
- pixeltable/exprs/row_builder.py +88 -36
- pixeltable/exprs/rowid_ref.py +12 -11
- pixeltable/exprs/similarity_expr.py +12 -7
- pixeltable/exprs/sql_element_cache.py +7 -5
- pixeltable/exprs/type_cast.py +8 -6
- pixeltable/exprs/variable.py +5 -4
- pixeltable/func/aggregate_function.py +9 -9
- pixeltable/func/expr_template_function.py +6 -5
- pixeltable/func/function.py +11 -10
- pixeltable/func/udf.py +6 -11
- pixeltable/functions/__init__.py +2 -2
- pixeltable/functions/globals.py +5 -7
- pixeltable/functions/huggingface.py +155 -45
- pixeltable/functions/llama_cpp.py +107 -0
- pixeltable/functions/mistralai.py +1 -1
- pixeltable/functions/ollama.py +147 -0
- pixeltable/functions/openai.py +1 -1
- pixeltable/functions/replicate.py +72 -0
- pixeltable/functions/string.py +9 -0
- pixeltable/functions/together.py +1 -1
- pixeltable/functions/util.py +5 -2
- pixeltable/globals.py +67 -26
- pixeltable/index/btree.py +16 -3
- pixeltable/index/embedding_index.py +4 -4
- pixeltable/io/__init__.py +1 -2
- pixeltable/io/fiftyone.py +178 -0
- pixeltable/io/globals.py +96 -2
- pixeltable/iterators/base.py +3 -2
- pixeltable/iterators/document.py +1 -1
- pixeltable/iterators/video.py +120 -63
- pixeltable/metadata/__init__.py +1 -1
- pixeltable/metadata/converters/convert_21.py +34 -0
- pixeltable/metadata/converters/util.py +45 -4
- pixeltable/metadata/notes.py +1 -0
- pixeltable/metadata/schema.py +8 -0
- pixeltable/plan.py +17 -15
- pixeltable/py.typed +0 -0
- pixeltable/store.py +7 -2
- pixeltable/tool/create_test_db_dump.py +1 -1
- pixeltable/tool/create_test_video.py +1 -1
- pixeltable/tool/embed_udf.py +1 -1
- pixeltable/tool/mypy_plugin.py +28 -5
- pixeltable/type_system.py +100 -36
- pixeltable/utils/coco.py +5 -5
- pixeltable/utils/documents.py +15 -1
- pixeltable/utils/formatter.py +12 -13
- pixeltable/utils/s3.py +6 -3
- {pixeltable-0.2.21.dist-info → pixeltable-0.2.23.dist-info}/METADATA +158 -49
- pixeltable-0.2.23.dist-info/RECORD +153 -0
- pixeltable/exec/media_validation_node.py +0 -43
- pixeltable-0.2.21.dist-info/RECORD +0 -148
- {pixeltable-0.2.21.dist-info → pixeltable-0.2.23.dist-info}/LICENSE +0 -0
- {pixeltable-0.2.21.dist-info → pixeltable-0.2.23.dist-info}/WHEEL +0 -0
- {pixeltable-0.2.21.dist-info → pixeltable-0.2.23.dist-info}/entry_points.txt +0 -0
pixeltable/exprs/json_path.py
CHANGED
|
@@ -5,28 +5,23 @@ from typing import Any, Optional, Union
|
|
|
5
5
|
import jmespath
|
|
6
6
|
import sqlalchemy as sql
|
|
7
7
|
|
|
8
|
-
import pixeltable
|
|
8
|
+
import pixeltable as pxt
|
|
9
9
|
import pixeltable.catalog as catalog
|
|
10
10
|
import pixeltable.exceptions as excs
|
|
11
11
|
import pixeltable.type_system as ts
|
|
12
|
-
from .data_row import DataRow
|
|
13
|
-
from .expr import Expr
|
|
14
|
-
from .globals import print_slice
|
|
15
|
-
from .json_mapper import JsonMapper
|
|
16
|
-
from .row_builder import RowBuilder
|
|
17
|
-
from .sql_element_cache import SqlElementCache
|
|
18
12
|
|
|
19
13
|
from .data_row import DataRow
|
|
20
14
|
from .expr import Expr
|
|
21
15
|
from .globals import print_slice
|
|
22
16
|
from .json_mapper import JsonMapper
|
|
23
17
|
from .row_builder import RowBuilder
|
|
18
|
+
from .sql_element_cache import SqlElementCache
|
|
24
19
|
|
|
25
20
|
|
|
26
21
|
class JsonPath(Expr):
|
|
27
22
|
def __init__(
|
|
28
23
|
self,
|
|
29
|
-
anchor: Optional['
|
|
24
|
+
anchor: Optional['pxt.exprs.Expr'],
|
|
30
25
|
path_elements: Optional[list[Union[str, int, slice]]] = None,
|
|
31
26
|
scope_idx: int = 0
|
|
32
27
|
) -> None:
|
|
@@ -37,7 +32,7 @@ class JsonPath(Expr):
|
|
|
37
32
|
"""
|
|
38
33
|
if path_elements is None:
|
|
39
34
|
path_elements = []
|
|
40
|
-
super().__init__(ts.JsonType())
|
|
35
|
+
super().__init__(ts.JsonType(nullable=True)) # JsonPath expressions are always nullable
|
|
41
36
|
if anchor is not None:
|
|
42
37
|
self.components = [anchor]
|
|
43
38
|
self.path_elements: list[Union[str, int, slice]] = path_elements
|
|
@@ -61,7 +56,7 @@ class JsonPath(Expr):
|
|
|
61
56
|
return {'path_elements': path_elements, 'scope_idx': self.scope_idx, **super()._as_dict()}
|
|
62
57
|
|
|
63
58
|
@classmethod
|
|
64
|
-
def _from_dict(cls, d: dict, components: list[Expr]) ->
|
|
59
|
+
def _from_dict(cls, d: dict, components: list[Expr]) -> JsonPath:
|
|
65
60
|
assert 'path_elements' in d
|
|
66
61
|
assert 'scope_idx' in d
|
|
67
62
|
assert len(components) <= 1
|
|
@@ -143,7 +138,7 @@ class JsonPath(Expr):
|
|
|
143
138
|
def _id_attrs(self) -> list[tuple[str, Any]]:
|
|
144
139
|
return super()._id_attrs() + [('path_elements', self.path_elements)]
|
|
145
140
|
|
|
146
|
-
def sql_expr(self, _: SqlElementCache) -> Optional[sql.
|
|
141
|
+
def sql_expr(self, _: SqlElementCache) -> Optional[sql.ColumnElement]:
|
|
147
142
|
"""
|
|
148
143
|
Postgres appears to have a bug: jsonb_path_query('{a: [{b: 0}, {b: 1}]}', '$.a.b') returns
|
|
149
144
|
*two* rows (each containing col val 0), not a single row with [0, 0].
|
pixeltable/exprs/literal.py
CHANGED
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
from __future__ import annotations
|
|
2
2
|
|
|
3
3
|
import datetime
|
|
4
|
-
from typing import Any,
|
|
4
|
+
from typing import Any, Optional
|
|
5
5
|
|
|
6
6
|
import sqlalchemy as sql
|
|
7
7
|
|
|
@@ -54,10 +54,10 @@ class Literal(Expr):
|
|
|
54
54
|
def _equals(self, other: Literal) -> bool:
|
|
55
55
|
return self.val == other.val
|
|
56
56
|
|
|
57
|
-
def _id_attrs(self) ->
|
|
57
|
+
def _id_attrs(self) -> list[tuple[str, Any]]:
|
|
58
58
|
return super()._id_attrs() + [('val', self.val)]
|
|
59
59
|
|
|
60
|
-
def sql_expr(self, _: SqlElementCache) -> Optional[sql.
|
|
60
|
+
def sql_expr(self, _: SqlElementCache) -> Optional[sql.ColumnElement]:
|
|
61
61
|
# we need to return something here so that we can generate a Where clause for predicates
|
|
62
62
|
# that involve literals (like Where c > 0)
|
|
63
63
|
return sql.sql.expression.literal(self.val)
|
|
@@ -66,7 +66,7 @@ class Literal(Expr):
|
|
|
66
66
|
# this will be called, even though sql_expr() does not return None
|
|
67
67
|
data_row[self.slot_idx] = self.val
|
|
68
68
|
|
|
69
|
-
def _as_dict(self) ->
|
|
69
|
+
def _as_dict(self) -> dict:
|
|
70
70
|
# For some types, we need to explictly record their type, because JSON does not know
|
|
71
71
|
# how to interpret them unambiguously
|
|
72
72
|
if self.col_type.is_timestamp_type():
|
|
@@ -80,7 +80,7 @@ class Literal(Expr):
|
|
|
80
80
|
return {'val': self.val, **super()._as_dict()}
|
|
81
81
|
|
|
82
82
|
@classmethod
|
|
83
|
-
def _from_dict(cls, d:
|
|
83
|
+
def _from_dict(cls, d: dict, components: list[Expr]) -> Literal:
|
|
84
84
|
assert 'val' in d
|
|
85
85
|
if 'val_t' in d:
|
|
86
86
|
val_t = d['val_t']
|
pixeltable/exprs/method_ref.py
CHANGED
|
@@ -5,6 +5,7 @@ import sqlalchemy as sql
|
|
|
5
5
|
import pixeltable.type_system as ts
|
|
6
6
|
from pixeltable.exprs import Expr, FunctionCall
|
|
7
7
|
from pixeltable.func import FunctionRegistry
|
|
8
|
+
|
|
8
9
|
from .data_row import DataRow
|
|
9
10
|
from .row_builder import RowBuilder
|
|
10
11
|
from .sql_element_cache import SqlElementCache
|
|
@@ -36,10 +37,10 @@ class MethodRef(Expr):
|
|
|
36
37
|
return {'method_name': self.method_name, **super()._as_dict()}
|
|
37
38
|
|
|
38
39
|
@classmethod
|
|
39
|
-
def _from_dict(cls, d: dict, components: list[Expr]) ->
|
|
40
|
+
def _from_dict(cls, d: dict, components: list[Expr]) -> 'MethodRef':
|
|
40
41
|
assert 'method_name' in d
|
|
41
42
|
assert len(components) == 1
|
|
42
|
-
return cls(d['method_name']
|
|
43
|
+
return cls(components[0], d['method_name'])
|
|
43
44
|
|
|
44
45
|
def __call__(self, *args, **kwargs) -> FunctionCall:
|
|
45
46
|
result = self.fn(*[self.base_expr, *args], **kwargs)
|
|
@@ -48,12 +49,12 @@ class MethodRef(Expr):
|
|
|
48
49
|
return result
|
|
49
50
|
|
|
50
51
|
def _equals(self, other: 'MethodRef') -> bool:
|
|
51
|
-
return self.base_expr == other.base_expr and self.method_name == other.method_name
|
|
52
|
+
return self.base_expr.id == other.base_expr.id and self.method_name == other.method_name
|
|
52
53
|
|
|
53
54
|
def _id_attrs(self) -> list[tuple[str, Any]]:
|
|
54
55
|
return super()._id_attrs() + [('method_name', self.method_name)]
|
|
55
56
|
|
|
56
|
-
def sql_expr(self, _: SqlElementCache) -> Optional[sql.
|
|
57
|
+
def sql_expr(self, _: SqlElementCache) -> Optional[sql.ColumnElement]:
|
|
57
58
|
return None
|
|
58
59
|
|
|
59
60
|
def eval(self, data_row: DataRow, row_builder: RowBuilder) -> None:
|
pixeltable/exprs/object_ref.py
CHANGED
|
@@ -5,6 +5,7 @@ from typing import Optional
|
|
|
5
5
|
import sqlalchemy as sql
|
|
6
6
|
|
|
7
7
|
import pixeltable.type_system as ts
|
|
8
|
+
|
|
8
9
|
from .data_row import DataRow
|
|
9
10
|
from .expr import Expr, ExprScope
|
|
10
11
|
from .json_mapper import JsonMapper
|
|
@@ -33,7 +34,7 @@ class ObjectRef(Expr):
|
|
|
33
34
|
def _equals(self, other: ObjectRef) -> bool:
|
|
34
35
|
return self.owner is other.owner
|
|
35
36
|
|
|
36
|
-
def sql_expr(self, _: SqlElementCache) -> Optional[sql.
|
|
37
|
+
def sql_expr(self, _: SqlElementCache) -> Optional[sql.ColumnElement]:
|
|
37
38
|
return None
|
|
38
39
|
|
|
39
40
|
def eval(self, data_row: DataRow, row_builder: RowBuilder) -> None:
|
pixeltable/exprs/row_builder.py
CHANGED
|
@@ -3,7 +3,8 @@ from __future__ import annotations
|
|
|
3
3
|
import sys
|
|
4
4
|
import time
|
|
5
5
|
from dataclasses import dataclass
|
|
6
|
-
from typing import Any,
|
|
6
|
+
from typing import Any, Iterable, Optional, Sequence
|
|
7
|
+
from uuid import UUID
|
|
7
8
|
|
|
8
9
|
import sqlalchemy as sql
|
|
9
10
|
|
|
@@ -11,7 +12,6 @@ import pixeltable.catalog as catalog
|
|
|
11
12
|
import pixeltable.exceptions as excs
|
|
12
13
|
import pixeltable.func as func
|
|
13
14
|
import pixeltable.utils as utils
|
|
14
|
-
|
|
15
15
|
from .data_row import DataRow
|
|
16
16
|
from .expr import Expr
|
|
17
17
|
from .expr_set import ExprSet
|
|
@@ -48,14 +48,37 @@ class RowBuilder:
|
|
|
48
48
|
For ColumnRefs to unstored iterator columns:
|
|
49
49
|
- in order for them to be executable, we also record the iterator args and pass them to the ColumnRef
|
|
50
50
|
"""
|
|
51
|
+
unique_exprs: ExprSet
|
|
52
|
+
next_slot_idx: int
|
|
53
|
+
input_expr_slot_idxs: set[int]
|
|
54
|
+
|
|
55
|
+
# output exprs: all exprs the caller wants to materialize
|
|
56
|
+
# - explicitly requested output_exprs
|
|
57
|
+
# - values for computed columns
|
|
58
|
+
output_exprs: ExprSet
|
|
59
|
+
|
|
60
|
+
input_exprs: ExprSet
|
|
61
|
+
|
|
62
|
+
table_columns: list[ColumnSlotIdx]
|
|
63
|
+
default_eval_ctx: EvalCtx
|
|
64
|
+
unstored_iter_args: dict[UUID, Expr]
|
|
65
|
+
|
|
66
|
+
# transitive dependents for the purpose of exception propagation: an exception for slot i is propagated to
|
|
67
|
+
# _exc_dependents[i]
|
|
68
|
+
# (list of set of slot_idxs, indexed by slot_idx)
|
|
69
|
+
_exc_dependents: list[set[int]]
|
|
70
|
+
|
|
71
|
+
# records the output_expr that a subexpr belongs to
|
|
72
|
+
# (a subexpr can be shared across multiple output exprs)
|
|
73
|
+
output_expr_ids: list[set[int]]
|
|
51
74
|
|
|
52
75
|
@dataclass
|
|
53
76
|
class EvalCtx:
|
|
54
77
|
"""Context for evaluating a set of target exprs"""
|
|
55
|
-
slot_idxs:
|
|
56
|
-
exprs:
|
|
57
|
-
target_slot_idxs:
|
|
58
|
-
target_exprs:
|
|
78
|
+
slot_idxs: list[int] # slot idxs of exprs needed to evaluate target exprs; does not contain duplicates
|
|
79
|
+
exprs: list[Expr] # exprs corresponding to slot_idxs
|
|
80
|
+
target_slot_idxs: list[int] # slot idxs of target exprs; might contain duplicates
|
|
81
|
+
target_exprs: list[Expr] # exprs corresponding to target_slot_idxs
|
|
59
82
|
|
|
60
83
|
def __init__(
|
|
61
84
|
self, output_exprs: Sequence[Expr], columns: Sequence[catalog.Column], input_exprs: Iterable[Expr]
|
|
@@ -67,35 +90,56 @@ class RowBuilder:
|
|
|
67
90
|
input_exprs: list of Exprs that are excluded from evaluation (because they're already materialized)
|
|
68
91
|
TODO: enforce that output_exprs doesn't overlap with input_exprs?
|
|
69
92
|
"""
|
|
70
|
-
self.unique_exprs = ExprSet() # dependencies precede their dependents
|
|
93
|
+
self.unique_exprs: ExprSet[Expr] = ExprSet() # dependencies precede their dependents
|
|
71
94
|
self.next_slot_idx = 0
|
|
72
95
|
|
|
73
96
|
# record input and output exprs; make copies to avoid reusing execution state
|
|
74
97
|
unique_input_exprs = [self._record_unique_expr(e.copy(), recursive=False) for e in input_exprs]
|
|
75
98
|
self.input_expr_slot_idxs = {e.slot_idx for e in unique_input_exprs}
|
|
76
99
|
|
|
77
|
-
# output exprs: all exprs the caller wants to materialize
|
|
78
|
-
# - explicitly requested output_exprs
|
|
79
|
-
# - values for computed columns
|
|
80
100
|
resolve_cols = set(columns)
|
|
81
101
|
self.output_exprs = ExprSet([
|
|
82
102
|
self._record_unique_expr(e.copy().resolve_computed_cols(resolve_cols=resolve_cols), recursive=True)
|
|
83
103
|
for e in output_exprs
|
|
84
104
|
])
|
|
85
105
|
|
|
86
|
-
#
|
|
106
|
+
# if init(columns):
|
|
107
|
+
# - we are creating table rows and need to record columns for create_table_row()
|
|
108
|
+
# - output_exprs materialize those columns
|
|
109
|
+
# - input_exprs are ColumnRefs of the non-computed columns (ie, what needs to be provided as input)
|
|
110
|
+
# - media validation:
|
|
111
|
+
# * for write-validated columns, we need to create validating ColumnRefs
|
|
112
|
+
# * further references to that column (eg, computed cols) need to resolve to the validating ColumnRef
|
|
87
113
|
from .column_ref import ColumnRef
|
|
88
|
-
self.table_columns:
|
|
114
|
+
self.table_columns: list[ColumnSlotIdx] = []
|
|
115
|
+
self.input_exprs = ExprSet()
|
|
116
|
+
validating_colrefs: dict[Expr, Expr] = {} # key: non-validating colref, value: corresp. validating colref
|
|
89
117
|
for col in columns:
|
|
118
|
+
expr: Expr
|
|
90
119
|
if col.is_computed:
|
|
91
120
|
assert col.value_expr is not None
|
|
92
121
|
# create a copy here so we don't reuse execution state and resolve references to computed columns
|
|
93
122
|
expr = col.value_expr.copy().resolve_computed_cols(resolve_cols=resolve_cols)
|
|
123
|
+
expr = expr.substitute(validating_colrefs)
|
|
94
124
|
expr = self._record_unique_expr(expr, recursive=True)
|
|
95
125
|
else:
|
|
96
126
|
# record a ColumnRef so that references to this column resolve to the same slot idx
|
|
97
|
-
|
|
98
|
-
|
|
127
|
+
perform_validation = (
|
|
128
|
+
None if not col.col_type.is_media_type()
|
|
129
|
+
else col.media_validation == catalog.MediaValidation.ON_WRITE
|
|
130
|
+
)
|
|
131
|
+
expr = ColumnRef(col, perform_validation=perform_validation)
|
|
132
|
+
# recursive=True: needed for validating ColumnRef
|
|
133
|
+
expr = self._record_unique_expr(expr, recursive=True)
|
|
134
|
+
|
|
135
|
+
if perform_validation:
|
|
136
|
+
# if expr is a validating ColumnRef, the input is the non-validating ColumnRef
|
|
137
|
+
non_validating_colref = expr.components[0]
|
|
138
|
+
self.input_exprs.add(non_validating_colref)
|
|
139
|
+
validating_colrefs[non_validating_colref] = expr
|
|
140
|
+
else:
|
|
141
|
+
self.input_exprs.add(expr)
|
|
142
|
+
|
|
99
143
|
self.add_table_column(col, expr.slot_idx)
|
|
100
144
|
self.output_exprs.add(expr)
|
|
101
145
|
|
|
@@ -118,8 +162,9 @@ class RowBuilder:
|
|
|
118
162
|
unstored_iter_col_refs = [col_ref for col_ref in col_refs if refs_unstored_iter_col(col_ref)]
|
|
119
163
|
component_views = [col_ref.col.tbl for col_ref in unstored_iter_col_refs]
|
|
120
164
|
unstored_iter_args = {view.id: view.iterator_args.copy() for view in component_views}
|
|
121
|
-
self.unstored_iter_args =
|
|
122
|
-
|
|
165
|
+
self.unstored_iter_args = {
|
|
166
|
+
id: self._record_unique_expr(arg, recursive=True) for id, arg in unstored_iter_args.items()
|
|
167
|
+
}
|
|
123
168
|
|
|
124
169
|
for col_ref in unstored_iter_col_refs:
|
|
125
170
|
iter_arg_ctx = self.create_eval_ctx([unstored_iter_args[col_ref.col.tbl.id]])
|
|
@@ -129,25 +174,28 @@ class RowBuilder:
|
|
|
129
174
|
for i, expr in enumerate(self.unique_exprs):
|
|
130
175
|
assert expr.slot_idx == i
|
|
131
176
|
|
|
132
|
-
#
|
|
133
|
-
|
|
177
|
+
# determine transitive dependencies for the purpose of exception propagation
|
|
178
|
+
# (list of set of slot_idxs, indexed by slot_idx)
|
|
179
|
+
exc_dependencies: list[set[int]] = [set() for _ in range(self.num_materialized)]
|
|
180
|
+
from .column_property_ref import ColumnPropertyRef
|
|
134
181
|
for expr in self.unique_exprs:
|
|
135
182
|
if expr.slot_idx in self.input_expr_slot_idxs:
|
|
136
183
|
# this is input and therefore doesn't depend on other exprs
|
|
137
184
|
continue
|
|
185
|
+
# error properties don't have exceptions themselves
|
|
186
|
+
if isinstance(expr, ColumnPropertyRef) and expr.is_error_prop():
|
|
187
|
+
continue
|
|
138
188
|
for d in expr.dependencies():
|
|
139
|
-
|
|
140
|
-
|
|
189
|
+
exc_dependencies[expr.slot_idx].add(d.slot_idx)
|
|
190
|
+
exc_dependencies[expr.slot_idx].update(exc_dependencies[d.slot_idx])
|
|
141
191
|
|
|
142
|
-
|
|
143
|
-
self.dependents: List[Set[int]] = [set() for _ in range(self.num_materialized)]
|
|
192
|
+
self._exc_dependents = [set() for _ in range(self.num_materialized)]
|
|
144
193
|
for expr in self.unique_exprs:
|
|
145
|
-
|
|
146
|
-
|
|
194
|
+
assert expr.slot_idx is not None
|
|
195
|
+
for d_idx in exc_dependencies[expr.slot_idx]:
|
|
196
|
+
self._exc_dependents[d_idx].add(expr.slot_idx)
|
|
147
197
|
|
|
148
|
-
|
|
149
|
-
# (a subexpr can be shared across multiple output exprs)
|
|
150
|
-
self.output_expr_ids: List[Set[int]] = [set() for _ in range(self.num_materialized)]
|
|
198
|
+
self.output_expr_ids = [set() for _ in range(self.num_materialized)]
|
|
151
199
|
for e in self.output_exprs:
|
|
152
200
|
self._record_output_expr_id(e, e.slot_idx)
|
|
153
201
|
|
|
@@ -155,7 +203,7 @@ class RowBuilder:
|
|
|
155
203
|
"""Record a column that is part of the table row"""
|
|
156
204
|
self.table_columns.append(ColumnSlotIdx(col, slot_idx))
|
|
157
205
|
|
|
158
|
-
def output_slot_idxs(self) ->
|
|
206
|
+
def output_slot_idxs(self) -> list[ColumnSlotIdx]:
|
|
159
207
|
"""Return ColumnSlotIdx for output columns"""
|
|
160
208
|
return self.table_columns
|
|
161
209
|
|
|
@@ -206,9 +254,9 @@ class RowBuilder:
|
|
|
206
254
|
for d in e.dependencies():
|
|
207
255
|
self._record_output_expr_id(d, output_expr_id)
|
|
208
256
|
|
|
209
|
-
def _compute_dependencies(self, target_slot_idxs:
|
|
257
|
+
def _compute_dependencies(self, target_slot_idxs: list[int], excluded_slot_idxs: list[int]) -> list[int]:
|
|
210
258
|
"""Compute exprs needed to materialize the given target slots, excluding 'excluded_slot_idxs'"""
|
|
211
|
-
dependencies = [set() for _ in range(self.num_materialized)] # indexed by slot_idx
|
|
259
|
+
dependencies: list[set[int]] = [set() for _ in range(self.num_materialized)] # indexed by slot_idx
|
|
212
260
|
# doing this front-to-back ensures that we capture transitive dependencies
|
|
213
261
|
max_target_slot_idx = max(target_slot_idxs)
|
|
214
262
|
for expr in self.unique_exprs:
|
|
@@ -237,6 +285,8 @@ class RowBuilder:
|
|
|
237
285
|
for e in expr_list:
|
|
238
286
|
self.__set_slot_idxs_aux(e)
|
|
239
287
|
if remove_duplicates:
|
|
288
|
+
# only allowed if `expr_list` is a mutable list
|
|
289
|
+
assert isinstance(expr_list, list)
|
|
240
290
|
deduped = list(ExprSet(expr_list))
|
|
241
291
|
expr_list[:] = deduped
|
|
242
292
|
|
|
@@ -248,13 +298,14 @@ class RowBuilder:
|
|
|
248
298
|
for c in e.components:
|
|
249
299
|
self.__set_slot_idxs_aux(c)
|
|
250
300
|
|
|
251
|
-
def get_dependencies(self, targets:
|
|
301
|
+
def get_dependencies(self, targets: Iterable[Expr], exclude: Optional[Iterable[Expr]] = None) -> list[Expr]:
|
|
252
302
|
"""
|
|
253
303
|
Return list of dependencies needed to evaluate the given target exprs (expressed as slot idxs).
|
|
254
304
|
The exprs given in 'exclude' are excluded.
|
|
255
305
|
Returns:
|
|
256
306
|
list of Exprs from unique_exprs (= with slot_idx set)
|
|
257
307
|
"""
|
|
308
|
+
targets = list(targets)
|
|
258
309
|
if exclude is None:
|
|
259
310
|
exclude = []
|
|
260
311
|
if len(targets) == 0:
|
|
@@ -270,8 +321,9 @@ class RowBuilder:
|
|
|
270
321
|
result_ids.sort()
|
|
271
322
|
return [self.unique_exprs[id] for id in result_ids]
|
|
272
323
|
|
|
273
|
-
def create_eval_ctx(self, targets:
|
|
324
|
+
def create_eval_ctx(self, targets: Iterable[Expr], exclude: Optional[Iterable[Expr]] = None) -> EvalCtx:
|
|
274
325
|
"""Return EvalCtx for targets"""
|
|
326
|
+
targets = list(targets)
|
|
275
327
|
if exclude is None:
|
|
276
328
|
exclude = []
|
|
277
329
|
if len(targets) == 0:
|
|
@@ -287,7 +339,7 @@ class RowBuilder:
|
|
|
287
339
|
def set_exc(self, data_row: DataRow, slot_idx: int, exc: Exception) -> None:
|
|
288
340
|
"""Record an exception in data_row and propagate it to dependents"""
|
|
289
341
|
data_row.set_exc(slot_idx, exc)
|
|
290
|
-
for slot_idx in self.
|
|
342
|
+
for slot_idx in self._exc_dependents[slot_idx]:
|
|
291
343
|
data_row.set_exc(slot_idx, exc)
|
|
292
344
|
|
|
293
345
|
def eval(
|
|
@@ -318,14 +370,14 @@ class RowBuilder:
|
|
|
318
370
|
raise excs.ExprEvalError(
|
|
319
371
|
expr, f'expression {expr}', data_row.get_exc(expr.slot_idx), exc_tb, input_vals, 0)
|
|
320
372
|
|
|
321
|
-
def create_table_row(self, data_row: DataRow, exc_col_ids:
|
|
373
|
+
def create_table_row(self, data_row: DataRow, exc_col_ids: set[int]) -> tuple[dict[str, Any], int]:
|
|
322
374
|
"""Create a table row from the slots that have an output column assigned
|
|
323
375
|
|
|
324
|
-
Return
|
|
376
|
+
Return tuple[dict that represents a stored row (can be passed to sql.insert()), # of exceptions]
|
|
325
377
|
This excludes system columns.
|
|
326
378
|
"""
|
|
327
379
|
num_excs = 0
|
|
328
|
-
table_row:
|
|
380
|
+
table_row: dict[str, Any] = {}
|
|
329
381
|
for info in self.table_columns:
|
|
330
382
|
col, slot_idx = info.col, info.slot_idx
|
|
331
383
|
if data_row.has_exc(slot_idx):
|
pixeltable/exprs/rowid_ref.py
CHANGED
|
@@ -1,15 +1,17 @@
|
|
|
1
1
|
from __future__ import annotations
|
|
2
|
-
|
|
3
|
-
from
|
|
2
|
+
|
|
3
|
+
from typing import Any, Optional
|
|
4
4
|
from uuid import UUID
|
|
5
5
|
|
|
6
6
|
import sqlalchemy as sql
|
|
7
7
|
|
|
8
|
-
|
|
8
|
+
import pixeltable.catalog as catalog
|
|
9
|
+
import pixeltable.type_system as ts
|
|
10
|
+
|
|
9
11
|
from .data_row import DataRow
|
|
12
|
+
from .expr import Expr
|
|
10
13
|
from .row_builder import RowBuilder
|
|
11
|
-
|
|
12
|
-
import pixeltable.catalog as catalog
|
|
14
|
+
from .sql_element_cache import SqlElementCache
|
|
13
15
|
|
|
14
16
|
|
|
15
17
|
class RowidRef(Expr):
|
|
@@ -49,14 +51,14 @@ class RowidRef(Expr):
|
|
|
49
51
|
return self.normalized_base_id == other.normalized_base_id \
|
|
50
52
|
and self.rowid_component_idx == other.rowid_component_idx
|
|
51
53
|
|
|
52
|
-
def _id_attrs(self) ->
|
|
54
|
+
def _id_attrs(self) -> list[tuple[str, Any]]:
|
|
53
55
|
return super()._id_attrs() +\
|
|
54
56
|
[('normalized_base_id', self.normalized_base_id), ('idx', self.rowid_component_idx)]
|
|
55
57
|
|
|
56
58
|
def __str__(self) -> str:
|
|
57
59
|
# check if this is the pos column of a component view
|
|
58
60
|
tbl = self.tbl if self.tbl is not None else catalog.Catalog.get().tbl_versions[(self.tbl_id, None)]
|
|
59
|
-
if tbl.is_component_view() and self.rowid_component_idx == tbl.store_tbl.pos_col_idx:
|
|
61
|
+
if tbl.is_component_view() and self.rowid_component_idx == tbl.store_tbl.pos_col_idx: # type: ignore[attr-defined]
|
|
60
62
|
return catalog.globals._POS_COLUMN_NAME
|
|
61
63
|
return ''
|
|
62
64
|
|
|
@@ -73,7 +75,7 @@ class RowidRef(Expr):
|
|
|
73
75
|
self.tbl = tbl.tbl_version
|
|
74
76
|
self.tbl_id = self.tbl.id
|
|
75
77
|
|
|
76
|
-
def sql_expr(self, _: SqlElementCache) -> Optional[sql.
|
|
78
|
+
def sql_expr(self, _: SqlElementCache) -> Optional[sql.ColumnElement]:
|
|
77
79
|
tbl = self.tbl if self.tbl is not None else catalog.Catalog.get().tbl_versions[(self.tbl_id, None)]
|
|
78
80
|
rowid_cols = tbl.store_tbl.rowid_columns()
|
|
79
81
|
return rowid_cols[self.rowid_component_idx]
|
|
@@ -81,7 +83,7 @@ class RowidRef(Expr):
|
|
|
81
83
|
def eval(self, data_row: DataRow, row_builder: RowBuilder) -> None:
|
|
82
84
|
data_row[self.slot_idx] = data_row.pk[self.rowid_component_idx]
|
|
83
85
|
|
|
84
|
-
def _as_dict(self) ->
|
|
86
|
+
def _as_dict(self) -> dict:
|
|
85
87
|
return {
|
|
86
88
|
'tbl_id': str(self.tbl_id),
|
|
87
89
|
'normalized_base_id': str(self.normalized_base_id),
|
|
@@ -89,7 +91,6 @@ class RowidRef(Expr):
|
|
|
89
91
|
}
|
|
90
92
|
|
|
91
93
|
@classmethod
|
|
92
|
-
def _from_dict(cls, d:
|
|
94
|
+
def _from_dict(cls, d: dict, components: list[Expr]) -> RowidRef:
|
|
93
95
|
tbl_id, normalized_base_id, idx = UUID(d['tbl_id']), UUID(d['normalized_base_id']), d['idx']
|
|
94
96
|
return cls(tbl=None, idx=idx, tbl_id=tbl_id, normalized_base_id=normalized_base_id)
|
|
95
|
-
|
|
@@ -1,16 +1,16 @@
|
|
|
1
|
-
from typing import
|
|
2
|
-
from .sql_element_cache import SqlElementCache
|
|
1
|
+
from typing import Any, Optional
|
|
3
2
|
|
|
4
3
|
import sqlalchemy as sql
|
|
5
|
-
import PIL.Image
|
|
6
4
|
|
|
7
5
|
import pixeltable.exceptions as excs
|
|
8
6
|
import pixeltable.type_system as ts
|
|
7
|
+
|
|
9
8
|
from .column_ref import ColumnRef
|
|
10
9
|
from .data_row import DataRow
|
|
11
10
|
from .expr import Expr
|
|
12
11
|
from .literal import Literal
|
|
13
12
|
from .row_builder import RowBuilder
|
|
13
|
+
from .sql_element_cache import SqlElementCache
|
|
14
14
|
|
|
15
15
|
|
|
16
16
|
class SimilarityExpr(Expr):
|
|
@@ -27,7 +27,7 @@ class SimilarityExpr(Expr):
|
|
|
27
27
|
|
|
28
28
|
# determine index to use
|
|
29
29
|
idx_info = col_ref.col.get_idx_info()
|
|
30
|
-
|
|
30
|
+
from pixeltable import index
|
|
31
31
|
embedding_idx_info = {
|
|
32
32
|
info.name: info for info in idx_info.values() if isinstance(info.idx, index.EmbeddingIndex)
|
|
33
33
|
}
|
|
@@ -44,6 +44,7 @@ class SimilarityExpr(Expr):
|
|
|
44
44
|
else:
|
|
45
45
|
self.idx_info = next(iter(embedding_idx_info.values()))
|
|
46
46
|
idx = self.idx_info.idx
|
|
47
|
+
assert isinstance(idx, index.EmbeddingIndex)
|
|
47
48
|
|
|
48
49
|
if item_expr.col_type.is_string_type() and idx.string_embed is None:
|
|
49
50
|
raise excs.Error(
|
|
@@ -57,16 +58,20 @@ class SimilarityExpr(Expr):
|
|
|
57
58
|
def __str__(self) -> str:
|
|
58
59
|
return f'{self.components[0]}.similarity({self.components[1]})'
|
|
59
60
|
|
|
60
|
-
def sql_expr(self, _: SqlElementCache) -> Optional[sql.
|
|
61
|
+
def sql_expr(self, _: SqlElementCache) -> Optional[sql.ColumnElement]:
|
|
61
62
|
if not isinstance(self.components[1], Literal):
|
|
62
63
|
raise excs.Error(f'similarity(): requires a string or a PIL.Image.Image object, not an expression')
|
|
63
64
|
item = self.components[1].val
|
|
65
|
+
from pixeltable import index
|
|
66
|
+
assert isinstance(self.idx_info.idx, index.EmbeddingIndex)
|
|
64
67
|
return self.idx_info.idx.similarity_clause(self.idx_info.val_col, item)
|
|
65
68
|
|
|
66
|
-
def as_order_by_clause(self, is_asc: bool) -> Optional[sql.
|
|
69
|
+
def as_order_by_clause(self, is_asc: bool) -> Optional[sql.ColumnElement]:
|
|
67
70
|
if not isinstance(self.components[1], Literal):
|
|
68
71
|
raise excs.Error(f'similarity(): requires a string or a PIL.Image.Image object, not an expression')
|
|
69
72
|
item = self.components[1].val
|
|
73
|
+
from pixeltable import index
|
|
74
|
+
assert isinstance(self.idx_info.idx, index.EmbeddingIndex)
|
|
70
75
|
return self.idx_info.idx.order_by_clause(self.idx_info.val_col, item, is_asc)
|
|
71
76
|
|
|
72
77
|
def eval(self, data_row: DataRow, row_builder: RowBuilder) -> None:
|
|
@@ -74,7 +79,7 @@ class SimilarityExpr(Expr):
|
|
|
74
79
|
assert False
|
|
75
80
|
|
|
76
81
|
@classmethod
|
|
77
|
-
def _from_dict(cls, d: dict, components:
|
|
82
|
+
def _from_dict(cls, d: dict, components: list[Expr]) -> 'SimilarityExpr':
|
|
78
83
|
assert len(components) == 2
|
|
79
84
|
assert isinstance(components[0], ColumnRef)
|
|
80
85
|
return cls(components[0], components[1])
|
|
@@ -1,4 +1,4 @@
|
|
|
1
|
-
from typing import Iterable, Union, Optional
|
|
1
|
+
from typing import Iterable, Union, Optional, cast
|
|
2
2
|
|
|
3
3
|
import sqlalchemy as sql
|
|
4
4
|
|
|
@@ -27,8 +27,10 @@ class SqlElementCache:
|
|
|
27
27
|
self.cache[e.id] = el
|
|
28
28
|
return el
|
|
29
29
|
|
|
30
|
-
def contains(self,
|
|
31
|
-
"""Returns True if
|
|
32
|
-
|
|
33
|
-
|
|
30
|
+
def contains(self, item: Expr) -> bool:
|
|
31
|
+
"""Returns True if the cache contains a (non-None) value for the given Expr."""
|
|
32
|
+
return self.get(item) is not None
|
|
33
|
+
|
|
34
|
+
def contains_all(self, items: Iterable[Expr]) -> bool:
|
|
35
|
+
"""Returns True if the cache contains a (non-None) value for every item in the collection of Exprs."""
|
|
34
36
|
return all(self.get(e) is not None for e in items)
|
pixeltable/exprs/type_cast.py
CHANGED
|
@@ -1,8 +1,9 @@
|
|
|
1
|
-
from typing import
|
|
1
|
+
from typing import Any, Optional
|
|
2
2
|
|
|
3
3
|
import sqlalchemy as sql
|
|
4
4
|
|
|
5
5
|
import pixeltable.type_system as ts
|
|
6
|
+
|
|
6
7
|
from .expr import DataRow, Expr
|
|
7
8
|
from .row_builder import RowBuilder
|
|
8
9
|
from .sql_element_cache import SqlElementCache
|
|
@@ -15,7 +16,7 @@ class TypeCast(Expr):
|
|
|
15
16
|
"""
|
|
16
17
|
def __init__(self, underlying: Expr, new_type: ts.ColumnType):
|
|
17
18
|
super().__init__(new_type)
|
|
18
|
-
self.components:
|
|
19
|
+
self.components: list[Expr] = [underlying]
|
|
19
20
|
self.id: Optional[int] = self._create_id()
|
|
20
21
|
|
|
21
22
|
@property
|
|
@@ -26,10 +27,10 @@ class TypeCast(Expr):
|
|
|
26
27
|
# `TypeCast` has no properties beyond those captured by `Expr`.
|
|
27
28
|
return True
|
|
28
29
|
|
|
29
|
-
def _id_attrs(self) ->
|
|
30
|
+
def _id_attrs(self) -> list[tuple[str, Any]]:
|
|
30
31
|
return super()._id_attrs() + [('new_type', self.col_type)]
|
|
31
32
|
|
|
32
|
-
def sql_expr(self, _: SqlElementCache) -> Optional[sql.
|
|
33
|
+
def sql_expr(self, _: SqlElementCache) -> Optional[sql.ColumnElement]:
|
|
33
34
|
"""
|
|
34
35
|
sql_expr() is unimplemented for now, in order to sidestep potentially thorny
|
|
35
36
|
questions about consistency of doing type conversions in both Python and Postgres.
|
|
@@ -40,11 +41,12 @@ class TypeCast(Expr):
|
|
|
40
41
|
original_val = data_row[self._underlying.slot_idx]
|
|
41
42
|
data_row[self.slot_idx] = self.col_type.create_literal(original_val)
|
|
42
43
|
|
|
43
|
-
|
|
44
|
+
|
|
45
|
+
def _as_dict(self) -> dict:
|
|
44
46
|
return {'new_type': self.col_type.as_dict(), **super()._as_dict()}
|
|
45
47
|
|
|
46
48
|
@classmethod
|
|
47
|
-
def _from_dict(cls, d:
|
|
49
|
+
def _from_dict(cls, d: dict, components: list[Expr]) -> 'TypeCast':
|
|
48
50
|
assert 'new_type' in d
|
|
49
51
|
assert len(components) == 1
|
|
50
52
|
return cls(components[0], ts.ColumnType.from_dict(d['new_type']))
|
pixeltable/exprs/variable.py
CHANGED
|
@@ -1,8 +1,9 @@
|
|
|
1
1
|
from __future__ import annotations
|
|
2
2
|
|
|
3
|
-
from typing import
|
|
3
|
+
from typing import Any, NoReturn
|
|
4
4
|
|
|
5
5
|
import pixeltable.type_system as ts
|
|
6
|
+
|
|
6
7
|
from .data_row import DataRow
|
|
7
8
|
from .expr import Expr
|
|
8
9
|
from .row_builder import RowBuilder
|
|
@@ -20,7 +21,7 @@ class Variable(Expr):
|
|
|
20
21
|
self.name = name
|
|
21
22
|
self.id = self._create_id()
|
|
22
23
|
|
|
23
|
-
def _id_attrs(self) ->
|
|
24
|
+
def _id_attrs(self) -> list[tuple[str, Any]]:
|
|
24
25
|
return super()._id_attrs() + [('name', self.name)]
|
|
25
26
|
|
|
26
27
|
def default_column_name(self) -> NoReturn:
|
|
@@ -38,9 +39,9 @@ class Variable(Expr):
|
|
|
38
39
|
def eval(self, data_row: DataRow, row_builder: RowBuilder) -> NoReturn:
|
|
39
40
|
raise NotImplementedError()
|
|
40
41
|
|
|
41
|
-
def _as_dict(self) ->
|
|
42
|
+
def _as_dict(self) -> dict:
|
|
42
43
|
return {'name': self.name, 'type': self.col_type.as_dict(), **super()._as_dict()}
|
|
43
44
|
|
|
44
45
|
@classmethod
|
|
45
|
-
def _from_dict(cls, d:
|
|
46
|
+
def _from_dict(cls, d: dict, _: list[Expr]) -> Variable:
|
|
46
47
|
return cls(d['name'], ts.ColumnType.from_dict(d['type']))
|