pixeltable 0.2.20__py3-none-any.whl → 0.2.22__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of pixeltable might be problematic. Click here for more details.
- pixeltable/__init__.py +7 -19
- pixeltable/__version__.py +2 -2
- pixeltable/catalog/__init__.py +7 -7
- pixeltable/catalog/column.py +37 -11
- pixeltable/catalog/globals.py +21 -0
- pixeltable/catalog/insertable_table.py +6 -4
- pixeltable/catalog/table.py +227 -148
- pixeltable/catalog/table_version.py +66 -28
- pixeltable/catalog/table_version_path.py +0 -8
- pixeltable/catalog/view.py +18 -19
- pixeltable/dataframe.py +16 -32
- pixeltable/env.py +6 -1
- pixeltable/exec/__init__.py +1 -2
- pixeltable/exec/aggregation_node.py +27 -17
- pixeltable/exec/cache_prefetch_node.py +1 -1
- pixeltable/exec/data_row_batch.py +9 -26
- pixeltable/exec/exec_node.py +36 -7
- pixeltable/exec/expr_eval_node.py +19 -11
- pixeltable/exec/in_memory_data_node.py +14 -11
- pixeltable/exec/sql_node.py +266 -138
- pixeltable/exprs/__init__.py +1 -0
- pixeltable/exprs/arithmetic_expr.py +3 -1
- pixeltable/exprs/array_slice.py +7 -7
- pixeltable/exprs/column_property_ref.py +37 -10
- pixeltable/exprs/column_ref.py +93 -14
- pixeltable/exprs/comparison.py +5 -5
- pixeltable/exprs/compound_predicate.py +8 -7
- pixeltable/exprs/data_row.py +56 -36
- pixeltable/exprs/expr.py +65 -63
- pixeltable/exprs/expr_dict.py +55 -0
- pixeltable/exprs/expr_set.py +26 -15
- pixeltable/exprs/function_call.py +53 -24
- pixeltable/exprs/globals.py +4 -1
- pixeltable/exprs/in_predicate.py +8 -7
- pixeltable/exprs/inline_expr.py +4 -4
- pixeltable/exprs/is_null.py +4 -4
- pixeltable/exprs/json_mapper.py +11 -12
- pixeltable/exprs/json_path.py +5 -10
- pixeltable/exprs/literal.py +5 -5
- pixeltable/exprs/method_ref.py +5 -4
- pixeltable/exprs/object_ref.py +2 -1
- pixeltable/exprs/row_builder.py +88 -36
- pixeltable/exprs/rowid_ref.py +14 -13
- pixeltable/exprs/similarity_expr.py +12 -7
- pixeltable/exprs/sql_element_cache.py +12 -6
- pixeltable/exprs/type_cast.py +8 -6
- pixeltable/exprs/variable.py +5 -4
- pixeltable/ext/functions/whisperx.py +7 -2
- pixeltable/func/aggregate_function.py +1 -1
- pixeltable/func/callable_function.py +2 -2
- pixeltable/func/function.py +11 -10
- pixeltable/func/function_registry.py +6 -7
- pixeltable/func/query_template_function.py +11 -12
- pixeltable/func/signature.py +17 -15
- pixeltable/func/udf.py +0 -4
- pixeltable/functions/__init__.py +2 -2
- pixeltable/functions/audio.py +4 -6
- pixeltable/functions/globals.py +84 -42
- pixeltable/functions/huggingface.py +31 -34
- pixeltable/functions/image.py +59 -45
- pixeltable/functions/json.py +0 -1
- pixeltable/functions/llama_cpp.py +106 -0
- pixeltable/functions/mistralai.py +2 -2
- pixeltable/functions/ollama.py +147 -0
- pixeltable/functions/openai.py +22 -25
- pixeltable/functions/replicate.py +72 -0
- pixeltable/functions/string.py +59 -50
- pixeltable/functions/timestamp.py +20 -20
- pixeltable/functions/together.py +2 -2
- pixeltable/functions/video.py +11 -20
- pixeltable/functions/whisper.py +2 -20
- pixeltable/globals.py +65 -74
- pixeltable/index/base.py +2 -2
- pixeltable/index/btree.py +20 -7
- pixeltable/index/embedding_index.py +12 -14
- pixeltable/io/__init__.py +1 -2
- pixeltable/io/external_store.py +11 -5
- pixeltable/io/fiftyone.py +178 -0
- pixeltable/io/globals.py +98 -2
- pixeltable/io/hf_datasets.py +1 -1
- pixeltable/io/label_studio.py +6 -6
- pixeltable/io/parquet.py +14 -13
- pixeltable/iterators/base.py +3 -2
- pixeltable/iterators/document.py +10 -8
- pixeltable/iterators/video.py +126 -60
- pixeltable/metadata/__init__.py +4 -3
- pixeltable/metadata/converters/convert_14.py +4 -2
- pixeltable/metadata/converters/convert_15.py +1 -1
- pixeltable/metadata/converters/convert_19.py +1 -0
- pixeltable/metadata/converters/convert_20.py +1 -1
- pixeltable/metadata/converters/convert_21.py +34 -0
- pixeltable/metadata/converters/util.py +54 -12
- pixeltable/metadata/notes.py +1 -0
- pixeltable/metadata/schema.py +40 -21
- pixeltable/plan.py +149 -165
- pixeltable/py.typed +0 -0
- pixeltable/store.py +57 -37
- pixeltable/tool/create_test_db_dump.py +6 -6
- pixeltable/tool/create_test_video.py +1 -1
- pixeltable/tool/doc_plugins/griffe.py +3 -34
- pixeltable/tool/embed_udf.py +1 -1
- pixeltable/tool/mypy_plugin.py +55 -0
- pixeltable/type_system.py +260 -61
- pixeltable/utils/arrow.py +10 -9
- pixeltable/utils/coco.py +4 -4
- pixeltable/utils/documents.py +16 -2
- pixeltable/utils/filecache.py +9 -9
- pixeltable/utils/formatter.py +10 -11
- pixeltable/utils/http_server.py +2 -5
- pixeltable/utils/media_store.py +6 -6
- pixeltable/utils/pytorch.py +10 -11
- pixeltable/utils/sql.py +2 -1
- {pixeltable-0.2.20.dist-info → pixeltable-0.2.22.dist-info}/METADATA +50 -13
- pixeltable-0.2.22.dist-info/RECORD +153 -0
- pixeltable/exec/media_validation_node.py +0 -43
- pixeltable/utils/help.py +0 -11
- pixeltable-0.2.20.dist-info/RECORD +0 -147
- {pixeltable-0.2.20.dist-info → pixeltable-0.2.22.dist-info}/LICENSE +0 -0
- {pixeltable-0.2.20.dist-info → pixeltable-0.2.22.dist-info}/WHEEL +0 -0
- {pixeltable-0.2.20.dist-info → pixeltable-0.2.22.dist-info}/entry_points.txt +0 -0
|
@@ -1,11 +1,12 @@
|
|
|
1
1
|
from __future__ import annotations
|
|
2
2
|
|
|
3
3
|
import enum
|
|
4
|
-
from typing import
|
|
4
|
+
from typing import Any, Optional
|
|
5
5
|
|
|
6
6
|
import sqlalchemy as sql
|
|
7
7
|
|
|
8
8
|
import pixeltable.type_system as ts
|
|
9
|
+
from pixeltable import catalog
|
|
9
10
|
from .column_ref import ColumnRef
|
|
10
11
|
from .data_row import DataRow
|
|
11
12
|
from .expr import Expr
|
|
@@ -33,22 +34,36 @@ class ColumnPropertyRef(Expr):
|
|
|
33
34
|
def default_column_name(self) -> Optional[str]:
|
|
34
35
|
return str(self).replace('.', '_')
|
|
35
36
|
|
|
36
|
-
def _equals(self, other:
|
|
37
|
+
def _equals(self, other: ColumnPropertyRef) -> bool:
|
|
37
38
|
return self.prop == other.prop
|
|
38
39
|
|
|
39
|
-
def _id_attrs(self) ->
|
|
40
|
+
def _id_attrs(self) -> list[tuple[str, Any]]:
|
|
40
41
|
return super()._id_attrs() + [('prop', self.prop.value)]
|
|
41
42
|
|
|
42
43
|
@property
|
|
43
44
|
def _col_ref(self) -> ColumnRef:
|
|
44
|
-
|
|
45
|
+
col_ref = self.components[0]
|
|
46
|
+
assert isinstance(col_ref, ColumnRef)
|
|
47
|
+
return col_ref
|
|
45
48
|
|
|
46
49
|
def __str__(self) -> str:
|
|
47
50
|
return f'{self._col_ref}.{self.prop.name.lower()}'
|
|
48
51
|
|
|
52
|
+
def is_error_prop(self) -> bool:
|
|
53
|
+
return self.prop == self.Property.ERRORTYPE or self.prop == self.Property.ERRORMSG
|
|
54
|
+
|
|
49
55
|
def sql_expr(self, sql_elements: SqlElementCache) -> Optional[sql.ColumnElement]:
|
|
50
56
|
if not self._col_ref.col.is_stored:
|
|
51
57
|
return None
|
|
58
|
+
|
|
59
|
+
# the errortype/-msg properties of a read-validated media column need to be extracted from the DataRow
|
|
60
|
+
if (
|
|
61
|
+
self._col_ref.col.col_type.is_media_type()
|
|
62
|
+
and self._col_ref.col.media_validation == catalog.MediaValidation.ON_READ
|
|
63
|
+
and self.is_error_prop()
|
|
64
|
+
):
|
|
65
|
+
return None
|
|
66
|
+
|
|
52
67
|
if self.prop == self.Property.ERRORTYPE:
|
|
53
68
|
assert self._col_ref.col.sa_errortype_col is not None
|
|
54
69
|
return self._col_ref.col.sa_errortype_col
|
|
@@ -61,18 +76,30 @@ class ColumnPropertyRef(Expr):
|
|
|
61
76
|
return None
|
|
62
77
|
|
|
63
78
|
def eval(self, data_row: DataRow, row_builder: RowBuilder) -> None:
|
|
64
|
-
assert self.prop == self.Property.FILEURL or self.prop == self.Property.LOCALPATH
|
|
65
|
-
assert data_row.has_val[self._col_ref.slot_idx]
|
|
66
79
|
if self.prop == self.Property.FILEURL:
|
|
80
|
+
assert data_row.has_val[self._col_ref.slot_idx]
|
|
67
81
|
data_row[self.slot_idx] = data_row.file_urls[self._col_ref.slot_idx]
|
|
68
|
-
|
|
82
|
+
return
|
|
83
|
+
elif self.prop == self.Property.LOCALPATH:
|
|
84
|
+
assert data_row.has_val[self._col_ref.slot_idx]
|
|
69
85
|
data_row[self.slot_idx] = data_row.file_paths[self._col_ref.slot_idx]
|
|
70
|
-
|
|
71
|
-
|
|
86
|
+
return
|
|
87
|
+
elif self.is_error_prop():
|
|
88
|
+
exc = data_row.get_exc(self._col_ref.slot_idx)
|
|
89
|
+
if exc is None:
|
|
90
|
+
data_row[self.slot_idx] = None
|
|
91
|
+
elif self.prop == self.Property.ERRORTYPE:
|
|
92
|
+
data_row[self.slot_idx] = type(exc).__name__
|
|
93
|
+
else:
|
|
94
|
+
data_row[self.slot_idx] = str(exc)
|
|
95
|
+
else:
|
|
96
|
+
assert False
|
|
97
|
+
|
|
98
|
+
def _as_dict(self) -> dict:
|
|
72
99
|
return {'prop': self.prop.value, **super()._as_dict()}
|
|
73
100
|
|
|
74
101
|
@classmethod
|
|
75
|
-
def _from_dict(cls, d:
|
|
102
|
+
def _from_dict(cls, d: dict, components: list[Expr]) -> ColumnPropertyRef:
|
|
76
103
|
assert 'prop' in d
|
|
77
104
|
assert isinstance(components[0], ColumnRef)
|
|
78
105
|
return cls(components[0], cls.Property(d['prop']))
|
pixeltable/exprs/column_ref.py
CHANGED
|
@@ -1,16 +1,18 @@
|
|
|
1
1
|
from __future__ import annotations
|
|
2
|
-
|
|
2
|
+
|
|
3
|
+
from typing import Any, Optional, Sequence
|
|
3
4
|
from uuid import UUID
|
|
4
5
|
|
|
5
6
|
import sqlalchemy as sql
|
|
6
7
|
|
|
7
|
-
|
|
8
|
+
import pixeltable.catalog as catalog
|
|
9
|
+
import pixeltable.exceptions as excs
|
|
10
|
+
import pixeltable.iterators as iters
|
|
11
|
+
|
|
8
12
|
from .data_row import DataRow
|
|
13
|
+
from .expr import Expr
|
|
9
14
|
from .row_builder import RowBuilder
|
|
10
15
|
from .sql_element_cache import SqlElementCache
|
|
11
|
-
import pixeltable.iterators as iters
|
|
12
|
-
import pixeltable.exceptions as excs
|
|
13
|
-
import pixeltable.catalog as catalog
|
|
14
16
|
|
|
15
17
|
|
|
16
18
|
class ColumnRef(Expr):
|
|
@@ -19,18 +21,31 @@ class ColumnRef(Expr):
|
|
|
19
21
|
When this reference is created in the context of a view, it can also refer to a column of the view base.
|
|
20
22
|
For that reason, a ColumnRef needs to be serialized with the qualifying table id (column ids are only
|
|
21
23
|
unique in the context of a particular table).
|
|
24
|
+
|
|
25
|
+
Media validation:
|
|
26
|
+
- media validation is potentially cpu-intensive, and it's desirable to schedule and parallelize it during
|
|
27
|
+
general expr evaluation
|
|
28
|
+
- media validation on read is done in ColumnRef.eval()
|
|
29
|
+
- a validating ColumnRef cannot be translated to SQL (because the validation is done in Python)
|
|
30
|
+
- in that case, the ColumnRef also instantiates a second non-validating ColumnRef as a component (= dependency)
|
|
31
|
+
- the non-validating ColumnRef is used for SQL translation
|
|
32
|
+
|
|
33
|
+
TODO:
|
|
34
|
+
separate Exprs (like validating ColumnRefs) from the logical expression tree and instead have RowBuilder
|
|
35
|
+
insert them into the EvalCtxs as needed
|
|
22
36
|
"""
|
|
23
37
|
|
|
24
38
|
col: catalog.Column
|
|
25
39
|
is_unstored_iter_col: bool
|
|
26
40
|
iter_arg_ctx: Optional[RowBuilder.EvalCtx]
|
|
27
41
|
base_rowid_len: int
|
|
28
|
-
base_rowid:
|
|
42
|
+
base_rowid: Sequence[Optional[Any]]
|
|
29
43
|
iterator: Optional[iters.ComponentIterator]
|
|
30
44
|
pos_idx: Optional[int]
|
|
31
45
|
id: int
|
|
46
|
+
perform_validation: bool # if True, performs media validation
|
|
32
47
|
|
|
33
|
-
def __init__(self, col: catalog.Column):
|
|
48
|
+
def __init__(self, col: catalog.Column, perform_validation: Optional[bool] = None):
|
|
34
49
|
super().__init__(col.col_type)
|
|
35
50
|
assert col.tbl is not None
|
|
36
51
|
self.col = col
|
|
@@ -43,17 +58,44 @@ class ColumnRef(Expr):
|
|
|
43
58
|
self.iterator = None
|
|
44
59
|
# index of the position column in the view's primary key; don't try to reference tbl.store_tbl here
|
|
45
60
|
self.pos_idx = col.tbl.num_rowid_columns() - 1 if self.is_unstored_iter_col else None
|
|
61
|
+
|
|
62
|
+
self.perform_validation = False
|
|
63
|
+
if col.col_type.is_media_type():
|
|
64
|
+
# we perform media validation if the column is a media type and the validation is set to ON_READ,
|
|
65
|
+
# unless we're told not to
|
|
66
|
+
if perform_validation is not None:
|
|
67
|
+
self.perform_validation = perform_validation
|
|
68
|
+
else:
|
|
69
|
+
self.perform_validation = (
|
|
70
|
+
col.col_type.is_media_type() and col.media_validation == catalog.MediaValidation.ON_READ
|
|
71
|
+
)
|
|
72
|
+
else:
|
|
73
|
+
assert perform_validation is None or not perform_validation
|
|
74
|
+
if self.perform_validation:
|
|
75
|
+
non_validating_col_ref = ColumnRef(col, perform_validation=False)
|
|
76
|
+
self.components = [non_validating_col_ref]
|
|
46
77
|
self.id = self._create_id()
|
|
47
78
|
|
|
48
79
|
def set_iter_arg_ctx(self, iter_arg_ctx: RowBuilder.EvalCtx) -> None:
|
|
49
80
|
self.iter_arg_ctx = iter_arg_ctx
|
|
50
81
|
assert len(self.iter_arg_ctx.target_slot_idxs) == 1 # a single inline dict
|
|
51
82
|
|
|
52
|
-
def _id_attrs(self) -> list[
|
|
53
|
-
return
|
|
83
|
+
def _id_attrs(self) -> list[tuple[str, Any]]:
|
|
84
|
+
return (
|
|
85
|
+
super()._id_attrs()
|
|
86
|
+
+ [('tbl_id', self.col.tbl.id), ('col_id', self.col.id), ('perform_validation', self.perform_validation)]
|
|
87
|
+
)
|
|
88
|
+
|
|
89
|
+
# override
|
|
90
|
+
def _retarget(self, tbl_versions: dict[UUID, catalog.TableVersion]) -> ColumnRef:
|
|
91
|
+
target = tbl_versions[self.col.tbl.id]
|
|
92
|
+
assert self.col.id in target.cols_by_id
|
|
93
|
+
col = target.cols_by_id[self.col.id]
|
|
94
|
+
return ColumnRef(col)
|
|
54
95
|
|
|
55
96
|
def __getattr__(self, name: str) -> Expr:
|
|
56
97
|
from .column_property_ref import ColumnPropertyRef
|
|
98
|
+
|
|
57
99
|
# resolve column properties
|
|
58
100
|
if name == ColumnPropertyRef.Property.ERRORTYPE.name.lower() \
|
|
59
101
|
or name == ColumnPropertyRef.Property.ERRORMSG.name.lower():
|
|
@@ -82,7 +124,7 @@ class ColumnRef(Expr):
|
|
|
82
124
|
return str(self)
|
|
83
125
|
|
|
84
126
|
def _equals(self, other: ColumnRef) -> bool:
|
|
85
|
-
return self.col == other.col
|
|
127
|
+
return self.col == other.col and self.perform_validation == other.perform_validation
|
|
86
128
|
|
|
87
129
|
def __str__(self) -> str:
|
|
88
130
|
if self.col.name is None:
|
|
@@ -94,9 +136,38 @@ class ColumnRef(Expr):
|
|
|
94
136
|
return f'ColumnRef({self.col!r})'
|
|
95
137
|
|
|
96
138
|
def sql_expr(self, _: SqlElementCache) -> Optional[sql.ColumnElement]:
|
|
97
|
-
return self.col.sa_col
|
|
139
|
+
return None if self.perform_validation else self.col.sa_col
|
|
98
140
|
|
|
99
141
|
def eval(self, data_row: DataRow, row_builder: RowBuilder) -> None:
|
|
142
|
+
if self.perform_validation:
|
|
143
|
+
# validate media file of our input ColumnRef and if successful, replicate the state of that slot
|
|
144
|
+
# to our slot
|
|
145
|
+
unvalidated_slot_idx = self.components[0].slot_idx
|
|
146
|
+
if data_row.file_paths[unvalidated_slot_idx] is None:
|
|
147
|
+
# no media file to validate, we still need to replicate the value
|
|
148
|
+
assert data_row.file_urls[unvalidated_slot_idx] is None
|
|
149
|
+
val = data_row.vals[unvalidated_slot_idx]
|
|
150
|
+
data_row.vals[self.slot_idx] = val
|
|
151
|
+
data_row.has_val[self.slot_idx] = True
|
|
152
|
+
return
|
|
153
|
+
|
|
154
|
+
try:
|
|
155
|
+
self.col.col_type.validate_media(data_row.file_paths[unvalidated_slot_idx])
|
|
156
|
+
# access the value only after successful validation
|
|
157
|
+
val = data_row[unvalidated_slot_idx]
|
|
158
|
+
data_row.vals[self.slot_idx] = val
|
|
159
|
+
data_row.has_val[self.slot_idx] = True
|
|
160
|
+
# make sure that the validated slot points to the same file as the unvalidated slot
|
|
161
|
+
data_row.file_paths[self.slot_idx] = data_row.file_paths[unvalidated_slot_idx]
|
|
162
|
+
data_row.file_urls[self.slot_idx] = data_row.file_urls[unvalidated_slot_idx]
|
|
163
|
+
return
|
|
164
|
+
except excs.Error as exc:
|
|
165
|
+
# propagate the exception, but ignore it otherwise;
|
|
166
|
+
# media validation errors don't cause exceptions during query execution
|
|
167
|
+
# TODO: allow for different error-handling behavior
|
|
168
|
+
row_builder.set_exc(data_row, self.slot_idx, exc)
|
|
169
|
+
return
|
|
170
|
+
|
|
100
171
|
if not self.is_unstored_iter_col:
|
|
101
172
|
# supply default
|
|
102
173
|
data_row[self.slot_idx] = None
|
|
@@ -115,7 +186,14 @@ class ColumnRef(Expr):
|
|
|
115
186
|
def _as_dict(self) -> dict:
|
|
116
187
|
tbl = self.col.tbl
|
|
117
188
|
version = tbl.version if tbl.is_snapshot else None
|
|
118
|
-
|
|
189
|
+
# we omit self.components, even if this is a validating ColumnRef, because init() will recreate the
|
|
190
|
+
# non-validating component ColumnRef
|
|
191
|
+
return {
|
|
192
|
+
'tbl_id': str(tbl.id),
|
|
193
|
+
'tbl_version': version,
|
|
194
|
+
'col_id': self.col.id,
|
|
195
|
+
'perform_validation': self.perform_validation
|
|
196
|
+
}
|
|
119
197
|
|
|
120
198
|
@classmethod
|
|
121
199
|
def get_column(cls, d: dict) -> catalog.Column:
|
|
@@ -126,6 +204,7 @@ class ColumnRef(Expr):
|
|
|
126
204
|
return col
|
|
127
205
|
|
|
128
206
|
@classmethod
|
|
129
|
-
def _from_dict(cls, d: dict, _: list[Expr]) ->
|
|
207
|
+
def _from_dict(cls, d: dict, _: list[Expr]) -> ColumnRef:
|
|
130
208
|
col = cls.get_column(d)
|
|
131
|
-
|
|
209
|
+
perform_validation = d['perform_validation']
|
|
210
|
+
return cls(col, perform_validation=perform_validation)
|
pixeltable/exprs/comparison.py
CHANGED
|
@@ -1,12 +1,13 @@
|
|
|
1
1
|
from __future__ import annotations
|
|
2
2
|
|
|
3
|
-
from typing import
|
|
3
|
+
from typing import Any, Optional
|
|
4
4
|
|
|
5
5
|
import sqlalchemy as sql
|
|
6
6
|
|
|
7
7
|
import pixeltable.exceptions as excs
|
|
8
8
|
import pixeltable.index as index
|
|
9
9
|
import pixeltable.type_system as ts
|
|
10
|
+
|
|
10
11
|
from .column_ref import ColumnRef
|
|
11
12
|
from .data_row import DataRow
|
|
12
13
|
from .expr import Expr
|
|
@@ -65,7 +66,7 @@ class Comparison(Expr):
|
|
|
65
66
|
def _op2(self) -> Expr:
|
|
66
67
|
return self.components[1]
|
|
67
68
|
|
|
68
|
-
def sql_expr(self, sql_elements: SqlElementCache) -> Optional[sql.
|
|
69
|
+
def sql_expr(self, sql_elements: SqlElementCache) -> Optional[sql.ColumnElement]:
|
|
69
70
|
left = sql_elements.get(self._op1)
|
|
70
71
|
if self.is_search_arg_comparison:
|
|
71
72
|
# reference the index value column if there is an index and this is not a snapshot
|
|
@@ -113,11 +114,10 @@ class Comparison(Expr):
|
|
|
113
114
|
elif self.operator == ComparisonOperator.GE:
|
|
114
115
|
data_row[self.slot_idx] = left >= right
|
|
115
116
|
|
|
116
|
-
def _as_dict(self) ->
|
|
117
|
+
def _as_dict(self) -> dict:
|
|
117
118
|
return {'operator': self.operator.value, **super()._as_dict()}
|
|
118
119
|
|
|
119
120
|
@classmethod
|
|
120
|
-
def _from_dict(cls, d:
|
|
121
|
+
def _from_dict(cls, d: dict, components: list[Expr]) -> Comparison:
|
|
121
122
|
assert 'operator' in d
|
|
122
123
|
return cls(ComparisonOperator(d['operator']), components[0], components[1])
|
|
123
|
-
|
|
@@ -1,20 +1,21 @@
|
|
|
1
1
|
from __future__ import annotations
|
|
2
2
|
|
|
3
3
|
import operator
|
|
4
|
-
from typing import
|
|
4
|
+
from typing import Any, Callable, Optional
|
|
5
5
|
|
|
6
6
|
import sqlalchemy as sql
|
|
7
7
|
|
|
8
|
+
import pixeltable.type_system as ts
|
|
9
|
+
|
|
8
10
|
from .data_row import DataRow
|
|
9
11
|
from .expr import Expr
|
|
10
12
|
from .globals import LogicalOperator
|
|
11
13
|
from .row_builder import RowBuilder
|
|
12
14
|
from .sql_element_cache import SqlElementCache
|
|
13
|
-
import pixeltable.type_system as ts
|
|
14
15
|
|
|
15
16
|
|
|
16
17
|
class CompoundPredicate(Expr):
|
|
17
|
-
def __init__(self, operator: LogicalOperator, operands:
|
|
18
|
+
def __init__(self, operator: LogicalOperator, operands: list[Expr]):
|
|
18
19
|
super().__init__(ts.BoolType())
|
|
19
20
|
self.operator = operator
|
|
20
21
|
# operands are stored in self.components
|
|
@@ -23,7 +24,7 @@ class CompoundPredicate(Expr):
|
|
|
23
24
|
self.components = operands
|
|
24
25
|
else:
|
|
25
26
|
assert len(operands) > 1
|
|
26
|
-
self.operands:
|
|
27
|
+
self.operands: list[Expr] = []
|
|
27
28
|
for operand in operands:
|
|
28
29
|
self._merge_operand(operand)
|
|
29
30
|
|
|
@@ -35,7 +36,7 @@ class CompoundPredicate(Expr):
|
|
|
35
36
|
return f' {self.operator} '.join([f'({e})' for e in self.components])
|
|
36
37
|
|
|
37
38
|
@classmethod
|
|
38
|
-
def make_conjunction(cls, operands:
|
|
39
|
+
def make_conjunction(cls, operands: list[Expr]) -> Optional[Expr]:
|
|
39
40
|
if len(operands) == 0:
|
|
40
41
|
return None
|
|
41
42
|
if len(operands) == 1:
|
|
@@ -89,11 +90,11 @@ class CompoundPredicate(Expr):
|
|
|
89
90
|
val = op_function(val, data_row[op.slot_idx])
|
|
90
91
|
data_row[self.slot_idx] = val
|
|
91
92
|
|
|
92
|
-
def _as_dict(self) ->
|
|
93
|
+
def _as_dict(self) -> dict:
|
|
93
94
|
return {'operator': self.operator.value, **super()._as_dict()}
|
|
94
95
|
|
|
95
96
|
@classmethod
|
|
96
|
-
def _from_dict(cls, d:
|
|
97
|
+
def _from_dict(cls, d: dict, components: list[Expr]) -> CompoundPredicate:
|
|
97
98
|
assert 'operator' in d
|
|
98
99
|
return cls(LogicalOperator(d['operator']), components)
|
|
99
100
|
|
pixeltable/exprs/data_row.py
CHANGED
|
@@ -4,13 +4,13 @@ import datetime
|
|
|
4
4
|
import io
|
|
5
5
|
import urllib.parse
|
|
6
6
|
import urllib.request
|
|
7
|
-
from typing import
|
|
7
|
+
from typing import Any, Optional
|
|
8
8
|
|
|
9
|
-
import
|
|
10
|
-
import pgvector.sqlalchemy
|
|
9
|
+
import numpy as np
|
|
10
|
+
import pgvector.sqlalchemy # type: ignore[import-untyped]
|
|
11
11
|
import PIL
|
|
12
12
|
import PIL.Image
|
|
13
|
-
import
|
|
13
|
+
import sqlalchemy as sql
|
|
14
14
|
|
|
15
15
|
from pixeltable import env
|
|
16
16
|
|
|
@@ -33,29 +33,40 @@ class DataRow:
|
|
|
33
33
|
- ImageType: PIL.Image.Image
|
|
34
34
|
- VideoType: local path if available, otherwise url
|
|
35
35
|
"""
|
|
36
|
-
def __init__(self, size: int, img_slot_idxs: List[int], media_slot_idxs: List[int], array_slot_idxs: List[int]):
|
|
37
|
-
self.vals: List[Any] = [None] * size # either cell values or exceptions
|
|
38
|
-
self.has_val = [False] * size
|
|
39
|
-
self.excs: List[Optional[Exception]] = [None] * size
|
|
40
36
|
|
|
41
|
-
|
|
42
|
-
|
|
43
|
-
|
|
44
|
-
|
|
37
|
+
vals: list[Any]
|
|
38
|
+
has_val: list[bool]
|
|
39
|
+
excs: list[Optional[Exception]]
|
|
40
|
+
|
|
41
|
+
# control structures that are shared across all DataRows in a batch
|
|
42
|
+
img_slot_idxs: list[int]
|
|
43
|
+
media_slot_idxs: list[int]
|
|
44
|
+
array_slot_idxs: list[int]
|
|
45
45
|
|
|
46
|
-
|
|
47
|
-
|
|
46
|
+
# the primary key of a store row is a sequence of ints (the number is different for table vs view)
|
|
47
|
+
pk: Optional[tuple[int, ...]]
|
|
48
48
|
|
|
49
|
-
|
|
50
|
-
|
|
51
|
-
|
|
52
|
-
|
|
53
|
-
|
|
49
|
+
# file_urls:
|
|
50
|
+
# - stored url of file for media in vals[i]
|
|
51
|
+
# - None if vals[i] is not media type
|
|
52
|
+
# - not None if file_paths[i] is not None
|
|
53
|
+
file_urls: list[Optional[str]]
|
|
54
54
|
|
|
55
|
-
|
|
56
|
-
|
|
57
|
-
|
|
58
|
-
|
|
55
|
+
# file_paths:
|
|
56
|
+
# - local path of media file in vals[i]; points to the file cache if file_urls[i] is remote
|
|
57
|
+
# - None if vals[i] is not a media type or if there is no local file yet for file_urls[i]
|
|
58
|
+
file_paths: list[Optional[str]]
|
|
59
|
+
|
|
60
|
+
def __init__(self, size: int, img_slot_idxs: list[int], media_slot_idxs: list[int], array_slot_idxs: list[int]):
|
|
61
|
+
self.vals = [None] * size
|
|
62
|
+
self.has_val = [False] * size
|
|
63
|
+
self.excs = [None] * size
|
|
64
|
+
self.img_slot_idxs = img_slot_idxs
|
|
65
|
+
self.media_slot_idxs = media_slot_idxs
|
|
66
|
+
self.array_slot_idxs = array_slot_idxs
|
|
67
|
+
self.pk = None
|
|
68
|
+
self.file_urls = [None] * size
|
|
69
|
+
self.file_paths = [None] * size
|
|
59
70
|
|
|
60
71
|
def clear(self) -> None:
|
|
61
72
|
size = len(self.vals)
|
|
@@ -78,27 +89,35 @@ class DataRow:
|
|
|
78
89
|
target.file_urls = self.file_urls.copy()
|
|
79
90
|
target.file_paths = self.file_paths.copy()
|
|
80
91
|
|
|
81
|
-
def set_pk(self, pk:
|
|
92
|
+
def set_pk(self, pk: tuple[int, ...]) -> None:
|
|
82
93
|
self.pk = pk
|
|
83
94
|
|
|
84
|
-
def has_exc(self, slot_idx: int) -> bool:
|
|
85
|
-
|
|
95
|
+
def has_exc(self, slot_idx: Optional[int] = None) -> bool:
|
|
96
|
+
"""
|
|
97
|
+
Returns True if an exception has been set for the given slot index, or for any slot index if slot_idx is None
|
|
98
|
+
"""
|
|
99
|
+
if slot_idx is not None:
|
|
100
|
+
return self.excs[slot_idx] is not None
|
|
101
|
+
return any(exc is not None for exc in self.excs)
|
|
86
102
|
|
|
87
|
-
def get_exc(self, slot_idx: int) -> Exception:
|
|
88
|
-
assert self.has_val[slot_idx] is False
|
|
89
|
-
assert self.excs[slot_idx] is not None
|
|
103
|
+
def get_exc(self, slot_idx: int) -> Optional[Exception]:
|
|
90
104
|
return self.excs[slot_idx]
|
|
91
105
|
|
|
106
|
+
def get_first_exc(self) -> Optional[Exception]:
|
|
107
|
+
for exc in self.excs:
|
|
108
|
+
if exc is not None:
|
|
109
|
+
return exc
|
|
110
|
+
return None
|
|
111
|
+
|
|
92
112
|
def set_exc(self, slot_idx: int, exc: Exception) -> None:
|
|
93
113
|
assert self.excs[slot_idx] is None
|
|
94
114
|
self.excs[slot_idx] = exc
|
|
95
115
|
|
|
96
|
-
|
|
97
|
-
|
|
98
|
-
|
|
99
|
-
|
|
100
|
-
|
|
101
|
-
self.file_urls[slot_idx] = None
|
|
116
|
+
# an exception means the value is None
|
|
117
|
+
self.has_val[slot_idx] = True
|
|
118
|
+
self.vals[slot_idx] = None
|
|
119
|
+
self.file_paths[slot_idx] = None
|
|
120
|
+
self.file_urls[slot_idx] = None
|
|
102
121
|
|
|
103
122
|
def __len__(self) -> int:
|
|
104
123
|
return len(self.vals)
|
|
@@ -113,6 +132,7 @@ class DataRow:
|
|
|
113
132
|
|
|
114
133
|
if self.file_urls[index] is not None and index in self.img_slot_idxs:
|
|
115
134
|
# if we need to load this from a file, it should have been materialized locally
|
|
135
|
+
# TODO this fails if the url was instantiated dynamically using astype()
|
|
116
136
|
assert self.file_paths[index] is not None
|
|
117
137
|
if self.vals[index] is None:
|
|
118
138
|
self.vals[index] = PIL.Image.open(self.file_paths[index])
|
|
@@ -220,7 +240,7 @@ class DataRow:
|
|
|
220
240
|
self.vals[index] = None
|
|
221
241
|
|
|
222
242
|
@property
|
|
223
|
-
def rowid(self) ->
|
|
243
|
+
def rowid(self) -> tuple[int, ...]:
|
|
224
244
|
return self.pk[:-1]
|
|
225
245
|
|
|
226
246
|
@property
|