pixeltable 0.4.17__py3-none-any.whl → 0.4.19__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of pixeltable might be problematic. Click here for more details.
- pixeltable/__init__.py +1 -1
- pixeltable/_version.py +1 -0
- pixeltable/catalog/catalog.py +144 -118
- pixeltable/catalog/column.py +104 -115
- pixeltable/catalog/globals.py +1 -2
- pixeltable/catalog/insertable_table.py +44 -49
- pixeltable/catalog/path.py +3 -4
- pixeltable/catalog/schema_object.py +4 -4
- pixeltable/catalog/table.py +139 -124
- pixeltable/catalog/table_metadata.py +6 -6
- pixeltable/catalog/table_version.py +315 -246
- pixeltable/catalog/table_version_handle.py +4 -4
- pixeltable/catalog/table_version_path.py +9 -10
- pixeltable/catalog/tbl_ops.py +9 -3
- pixeltable/catalog/view.py +34 -28
- pixeltable/config.py +14 -10
- pixeltable/dataframe.py +69 -78
- pixeltable/env.py +78 -64
- pixeltable/exec/aggregation_node.py +6 -6
- pixeltable/exec/cache_prefetch_node.py +10 -10
- pixeltable/exec/data_row_batch.py +3 -3
- pixeltable/exec/exec_context.py +16 -4
- pixeltable/exec/exec_node.py +5 -5
- pixeltable/exec/expr_eval/evaluators.py +6 -6
- pixeltable/exec/expr_eval/expr_eval_node.py +8 -7
- pixeltable/exec/expr_eval/globals.py +6 -6
- pixeltable/exec/expr_eval/row_buffer.py +1 -2
- pixeltable/exec/expr_eval/schedulers.py +11 -11
- pixeltable/exec/in_memory_data_node.py +2 -2
- pixeltable/exec/object_store_save_node.py +14 -17
- pixeltable/exec/sql_node.py +28 -27
- pixeltable/exprs/arithmetic_expr.py +4 -4
- pixeltable/exprs/array_slice.py +2 -2
- pixeltable/exprs/column_property_ref.py +3 -3
- pixeltable/exprs/column_ref.py +61 -74
- pixeltable/exprs/comparison.py +5 -5
- pixeltable/exprs/compound_predicate.py +3 -3
- pixeltable/exprs/data_row.py +12 -12
- pixeltable/exprs/expr.py +41 -31
- pixeltable/exprs/expr_dict.py +3 -3
- pixeltable/exprs/expr_set.py +3 -3
- pixeltable/exprs/function_call.py +14 -14
- pixeltable/exprs/in_predicate.py +4 -4
- pixeltable/exprs/inline_expr.py +8 -8
- pixeltable/exprs/is_null.py +1 -3
- pixeltable/exprs/json_mapper.py +8 -8
- pixeltable/exprs/json_path.py +6 -6
- pixeltable/exprs/literal.py +5 -5
- pixeltable/exprs/method_ref.py +2 -2
- pixeltable/exprs/object_ref.py +2 -2
- pixeltable/exprs/row_builder.py +14 -14
- pixeltable/exprs/rowid_ref.py +8 -8
- pixeltable/exprs/similarity_expr.py +50 -25
- pixeltable/exprs/sql_element_cache.py +4 -4
- pixeltable/exprs/string_op.py +2 -2
- pixeltable/exprs/type_cast.py +3 -5
- pixeltable/func/aggregate_function.py +8 -8
- pixeltable/func/callable_function.py +9 -9
- pixeltable/func/expr_template_function.py +3 -3
- pixeltable/func/function.py +15 -17
- pixeltable/func/function_registry.py +6 -7
- pixeltable/func/globals.py +2 -3
- pixeltable/func/mcp.py +2 -2
- pixeltable/func/query_template_function.py +16 -16
- pixeltable/func/signature.py +14 -14
- pixeltable/func/tools.py +11 -11
- pixeltable/func/udf.py +16 -18
- pixeltable/functions/__init__.py +1 -0
- pixeltable/functions/anthropic.py +7 -7
- pixeltable/functions/audio.py +76 -0
- pixeltable/functions/bedrock.py +6 -6
- pixeltable/functions/deepseek.py +4 -4
- pixeltable/functions/fireworks.py +2 -2
- pixeltable/functions/gemini.py +6 -6
- pixeltable/functions/globals.py +12 -12
- pixeltable/functions/groq.py +4 -4
- pixeltable/functions/huggingface.py +1033 -6
- pixeltable/functions/image.py +7 -10
- pixeltable/functions/llama_cpp.py +7 -7
- pixeltable/functions/math.py +2 -3
- pixeltable/functions/mistralai.py +3 -3
- pixeltable/functions/ollama.py +9 -9
- pixeltable/functions/openai.py +21 -21
- pixeltable/functions/openrouter.py +7 -7
- pixeltable/functions/string.py +21 -28
- pixeltable/functions/timestamp.py +7 -8
- pixeltable/functions/together.py +4 -6
- pixeltable/functions/twelvelabs.py +92 -0
- pixeltable/functions/video.py +36 -31
- pixeltable/functions/vision.py +6 -6
- pixeltable/functions/whisper.py +7 -7
- pixeltable/functions/whisperx.py +16 -16
- pixeltable/globals.py +75 -40
- pixeltable/index/base.py +12 -8
- pixeltable/index/btree.py +19 -22
- pixeltable/index/embedding_index.py +30 -39
- pixeltable/io/datarows.py +3 -3
- pixeltable/io/external_store.py +13 -16
- pixeltable/io/fiftyone.py +5 -5
- pixeltable/io/globals.py +5 -5
- pixeltable/io/hf_datasets.py +4 -4
- pixeltable/io/label_studio.py +12 -12
- pixeltable/io/pandas.py +6 -6
- pixeltable/io/parquet.py +2 -2
- pixeltable/io/table_data_conduit.py +12 -12
- pixeltable/io/utils.py +2 -2
- pixeltable/iterators/audio.py +2 -2
- pixeltable/iterators/document.py +88 -57
- pixeltable/iterators/video.py +66 -37
- pixeltable/metadata/converters/convert_18.py +2 -2
- pixeltable/metadata/converters/convert_19.py +2 -2
- pixeltable/metadata/converters/convert_20.py +2 -2
- pixeltable/metadata/converters/convert_21.py +2 -2
- pixeltable/metadata/converters/convert_22.py +2 -2
- pixeltable/metadata/converters/convert_24.py +2 -2
- pixeltable/metadata/converters/convert_25.py +2 -2
- pixeltable/metadata/converters/convert_26.py +2 -2
- pixeltable/metadata/converters/convert_29.py +4 -4
- pixeltable/metadata/converters/convert_34.py +2 -2
- pixeltable/metadata/converters/convert_36.py +2 -2
- pixeltable/metadata/converters/convert_38.py +2 -2
- pixeltable/metadata/converters/convert_39.py +1 -2
- pixeltable/metadata/converters/util.py +11 -13
- pixeltable/metadata/schema.py +22 -21
- pixeltable/metadata/utils.py +2 -6
- pixeltable/mypy/mypy_plugin.py +5 -5
- pixeltable/plan.py +32 -34
- pixeltable/share/packager.py +7 -7
- pixeltable/share/publish.py +3 -3
- pixeltable/store.py +126 -41
- pixeltable/type_system.py +43 -46
- pixeltable/utils/__init__.py +1 -2
- pixeltable/utils/arrow.py +4 -4
- pixeltable/utils/av.py +74 -38
- pixeltable/utils/azure_store.py +305 -0
- pixeltable/utils/code.py +1 -2
- pixeltable/utils/dbms.py +15 -19
- pixeltable/utils/description_helper.py +2 -3
- pixeltable/utils/documents.py +5 -6
- pixeltable/utils/exception_handler.py +2 -2
- pixeltable/utils/filecache.py +5 -5
- pixeltable/utils/formatter.py +4 -6
- pixeltable/utils/gcs_store.py +9 -9
- pixeltable/utils/local_store.py +17 -17
- pixeltable/utils/object_stores.py +59 -43
- pixeltable/utils/s3_store.py +35 -30
- {pixeltable-0.4.17.dist-info → pixeltable-0.4.19.dist-info}/METADATA +4 -4
- pixeltable-0.4.19.dist-info/RECORD +213 -0
- pixeltable/__version__.py +0 -3
- pixeltable-0.4.17.dist-info/RECORD +0 -211
- {pixeltable-0.4.17.dist-info → pixeltable-0.4.19.dist-info}/WHEEL +0 -0
- {pixeltable-0.4.17.dist-info → pixeltable-0.4.19.dist-info}/entry_points.txt +0 -0
- {pixeltable-0.4.17.dist-info → pixeltable-0.4.19.dist-info}/licenses/LICENSE +0 -0
pixeltable/exprs/column_ref.py
CHANGED
|
@@ -1,13 +1,13 @@
|
|
|
1
1
|
from __future__ import annotations
|
|
2
2
|
|
|
3
|
-
import
|
|
4
|
-
from typing import Any, Optional, Sequence
|
|
3
|
+
from typing import TYPE_CHECKING, Any, Sequence, cast
|
|
5
4
|
from uuid import UUID
|
|
6
5
|
|
|
7
6
|
import sqlalchemy as sql
|
|
8
7
|
|
|
9
|
-
import pixeltable as
|
|
10
|
-
|
|
8
|
+
import pixeltable.catalog as catalog
|
|
9
|
+
import pixeltable.exceptions as excs
|
|
10
|
+
import pixeltable.iterators as iters
|
|
11
11
|
|
|
12
12
|
from ..utils.description_helper import DescriptionHelper
|
|
13
13
|
from ..utils.filecache import FileCache
|
|
@@ -16,6 +16,9 @@ from .expr import Expr
|
|
|
16
16
|
from .row_builder import RowBuilder
|
|
17
17
|
from .sql_element_cache import SqlElementCache
|
|
18
18
|
|
|
19
|
+
if TYPE_CHECKING:
|
|
20
|
+
from pixeltable.dataframe import DataFrame, DataFrameResultSet
|
|
21
|
+
|
|
19
22
|
|
|
20
23
|
class ColumnRef(Expr):
|
|
21
24
|
"""A reference to a table column
|
|
@@ -44,36 +47,34 @@ class ColumnRef(Expr):
|
|
|
44
47
|
|
|
45
48
|
col: catalog.Column # TODO: merge with col_handle
|
|
46
49
|
col_handle: catalog.ColumnHandle
|
|
47
|
-
reference_tbl:
|
|
50
|
+
reference_tbl: catalog.TableVersionPath | None
|
|
48
51
|
is_unstored_iter_col: bool
|
|
49
|
-
iter_arg_ctx: Optional[RowBuilder.EvalCtx]
|
|
50
|
-
base_rowid_len: int
|
|
51
|
-
base_rowid: Sequence[Optional[Any]]
|
|
52
|
-
iterator: Optional[iters.ComponentIterator]
|
|
53
|
-
pos_idx: Optional[int]
|
|
54
|
-
id: int
|
|
55
52
|
perform_validation: bool # if True, performs media validation
|
|
53
|
+
iter_arg_ctx: RowBuilder.EvalCtx | None
|
|
54
|
+
base_rowid_len: int # number of rowid columns in the base table
|
|
55
|
+
|
|
56
|
+
# execution state
|
|
57
|
+
base_rowid: Sequence[Any | None]
|
|
58
|
+
iterator: iters.ComponentIterator | None
|
|
59
|
+
pos_idx: int
|
|
56
60
|
|
|
57
61
|
def __init__(
|
|
58
62
|
self,
|
|
59
63
|
col: catalog.Column,
|
|
60
|
-
reference_tbl:
|
|
61
|
-
perform_validation:
|
|
64
|
+
reference_tbl: catalog.TableVersionPath | None = None,
|
|
65
|
+
perform_validation: bool | None = None,
|
|
62
66
|
):
|
|
63
67
|
super().__init__(col.col_type)
|
|
64
|
-
assert col.tbl is not None
|
|
65
68
|
self.col = col
|
|
66
69
|
self.reference_tbl = reference_tbl
|
|
67
|
-
self.col_handle =
|
|
70
|
+
self.col_handle = col.handle
|
|
68
71
|
|
|
69
|
-
self.is_unstored_iter_col = col.
|
|
72
|
+
self.is_unstored_iter_col = col.is_iterator_col and not col.is_stored
|
|
70
73
|
self.iter_arg_ctx = None
|
|
71
|
-
|
|
72
|
-
self.
|
|
73
|
-
self.base_rowid = [None] * self.base_rowid_len
|
|
74
|
+
self.base_rowid_len = 0
|
|
75
|
+
self.base_rowid = []
|
|
74
76
|
self.iterator = None
|
|
75
|
-
|
|
76
|
-
self.pos_idx = col.tbl.num_rowid_columns() - 1 if self.is_unstored_iter_col else None
|
|
77
|
+
self.pos_idx = 0
|
|
77
78
|
|
|
78
79
|
self.perform_validation = False
|
|
79
80
|
if col.col_type.is_media_type():
|
|
@@ -99,14 +100,14 @@ class ColumnRef(Expr):
|
|
|
99
100
|
def _id_attrs(self) -> list[tuple[str, Any]]:
|
|
100
101
|
return [
|
|
101
102
|
*super()._id_attrs(),
|
|
102
|
-
('tbl_id', self.col.
|
|
103
|
+
('tbl_id', self.col.tbl_handle.id),
|
|
103
104
|
('col_id', self.col.id),
|
|
104
105
|
('perform_validation', self.perform_validation),
|
|
105
106
|
]
|
|
106
107
|
|
|
107
108
|
# override
|
|
108
109
|
def _retarget(self, tbl_versions: dict[UUID, catalog.TableVersion]) -> ColumnRef:
|
|
109
|
-
target = tbl_versions[self.col.
|
|
110
|
+
target = tbl_versions[self.col.tbl_handle.id]
|
|
110
111
|
assert self.col.id in target.cols_by_id
|
|
111
112
|
col = target.cols_by_id[self.col.id]
|
|
112
113
|
return ColumnRef(col, self.reference_tbl)
|
|
@@ -144,33 +145,6 @@ class ColumnRef(Expr):
|
|
|
144
145
|
|
|
145
146
|
return super().__getattr__(name)
|
|
146
147
|
|
|
147
|
-
def find_embedding_index(
|
|
148
|
-
self, idx_name: Optional[str], method_name: str
|
|
149
|
-
) -> dict[str, catalog.TableVersion.IndexInfo]:
|
|
150
|
-
"""Return IndexInfo for a column, with an optional given name"""
|
|
151
|
-
from pixeltable import index
|
|
152
|
-
|
|
153
|
-
# determine index to use
|
|
154
|
-
idx_info_dict = self.col.get_idx_info(self.reference_tbl)
|
|
155
|
-
|
|
156
|
-
embedding_idx_info = {
|
|
157
|
-
info: value for info, value in idx_info_dict.items() if isinstance(value.idx, index.EmbeddingIndex)
|
|
158
|
-
}
|
|
159
|
-
if len(embedding_idx_info) == 0:
|
|
160
|
-
raise excs.Error(f'No indices found for {method_name!r} on column {self.col.name!r}')
|
|
161
|
-
if idx_name is not None and idx_name not in embedding_idx_info:
|
|
162
|
-
raise excs.Error(f'Index {idx_name!r} not found for {method_name!r} on column {self.col.name!r}')
|
|
163
|
-
if len(embedding_idx_info) > 1:
|
|
164
|
-
if idx_name is None:
|
|
165
|
-
raise excs.Error(
|
|
166
|
-
f'Column {self.col.name!r} has multiple indices; use the index name to disambiguate: '
|
|
167
|
-
f'`{method_name}(..., idx=<index_name>)`'
|
|
168
|
-
)
|
|
169
|
-
idx_info = {idx_name: embedding_idx_info[idx_name]}
|
|
170
|
-
else:
|
|
171
|
-
idx_info = embedding_idx_info
|
|
172
|
-
return idx_info
|
|
173
|
-
|
|
174
148
|
def recompute(self, *, cascade: bool = True, errors_only: bool = False) -> catalog.UpdateStatus:
|
|
175
149
|
cat = catalog.Catalog.get()
|
|
176
150
|
# lock_mutable_tree=True: we need to be able to see whether any transitive view has column dependents
|
|
@@ -185,49 +159,52 @@ class ColumnRef(Expr):
|
|
|
185
159
|
FileCache.get().emit_eviction_warnings()
|
|
186
160
|
return status
|
|
187
161
|
|
|
188
|
-
def similarity(self, item: Any, *, idx:
|
|
162
|
+
def similarity(self, item: Any, *, idx: str | None = None) -> Expr:
|
|
189
163
|
from .similarity_expr import SimilarityExpr
|
|
190
164
|
|
|
191
165
|
return SimilarityExpr(self, item, idx_name=idx)
|
|
192
166
|
|
|
193
|
-
def embedding(self, *, idx:
|
|
194
|
-
|
|
195
|
-
|
|
196
|
-
|
|
197
|
-
|
|
198
|
-
# col.create_sa_cols()
|
|
199
|
-
return ColumnRef(col)
|
|
167
|
+
def embedding(self, *, idx: str | None = None) -> ColumnRef:
|
|
168
|
+
from pixeltable.index import EmbeddingIndex
|
|
169
|
+
|
|
170
|
+
idx_info = self.tbl.get().get_idx(self.col, idx, EmbeddingIndex)
|
|
171
|
+
return ColumnRef(idx_info.val_col)
|
|
200
172
|
|
|
201
|
-
|
|
173
|
+
@property
|
|
174
|
+
def tbl(self) -> catalog.TableVersionHandle:
|
|
175
|
+
return self.reference_tbl.tbl_version if self.reference_tbl is not None else self.col.tbl_handle
|
|
176
|
+
|
|
177
|
+
def default_column_name(self) -> str | None:
|
|
202
178
|
return self.col.name if self.col is not None else None
|
|
203
179
|
|
|
204
180
|
def _equals(self, other: ColumnRef) -> bool:
|
|
205
181
|
return self.col == other.col and self.perform_validation == other.perform_validation
|
|
206
182
|
|
|
207
|
-
def _df(self) -> '
|
|
208
|
-
|
|
183
|
+
def _df(self) -> 'DataFrame':
|
|
184
|
+
import pixeltable.plan as plan
|
|
185
|
+
from pixeltable.dataframe import DataFrame
|
|
209
186
|
|
|
210
187
|
if self.reference_tbl is None:
|
|
211
188
|
# No reference table; use the current version of the table to which the column belongs
|
|
212
|
-
tbl = catalog.Catalog.get().get_table_by_id(self.col.
|
|
189
|
+
tbl = catalog.Catalog.get().get_table_by_id(self.col.tbl_handle.id)
|
|
213
190
|
return tbl.select(self)
|
|
214
191
|
else:
|
|
215
192
|
# Explicit reference table; construct a DataFrame directly from it
|
|
216
|
-
return
|
|
193
|
+
return DataFrame(plan.FromClause([self.reference_tbl])).select(self)
|
|
217
194
|
|
|
218
|
-
def show(self, *args: Any, **kwargs: Any) -> '
|
|
195
|
+
def show(self, *args: Any, **kwargs: Any) -> 'DataFrameResultSet':
|
|
219
196
|
return self._df().show(*args, **kwargs)
|
|
220
197
|
|
|
221
|
-
def head(self, *args: Any, **kwargs: Any) -> '
|
|
198
|
+
def head(self, *args: Any, **kwargs: Any) -> 'DataFrameResultSet':
|
|
222
199
|
return self._df().head(*args, **kwargs)
|
|
223
200
|
|
|
224
|
-
def tail(self, *args: Any, **kwargs: Any) -> '
|
|
201
|
+
def tail(self, *args: Any, **kwargs: Any) -> 'DataFrameResultSet':
|
|
225
202
|
return self._df().tail(*args, **kwargs)
|
|
226
203
|
|
|
227
204
|
def count(self) -> int:
|
|
228
205
|
return self._df().count()
|
|
229
206
|
|
|
230
|
-
def distinct(self) -> '
|
|
207
|
+
def distinct(self) -> 'DataFrame':
|
|
231
208
|
"""Return distinct values in this column."""
|
|
232
209
|
return self._df().distinct()
|
|
233
210
|
|
|
@@ -244,7 +221,7 @@ class ColumnRef(Expr):
|
|
|
244
221
|
return self._descriptors().to_html()
|
|
245
222
|
|
|
246
223
|
def _descriptors(self) -> DescriptionHelper:
|
|
247
|
-
tbl = catalog.Catalog.get().get_table_by_id(self.col.
|
|
224
|
+
tbl = catalog.Catalog.get().get_table_by_id(self.col.tbl_handle.id)
|
|
248
225
|
helper = DescriptionHelper()
|
|
249
226
|
helper.append(f'Column\n{self.col.name!r}\n(of table {tbl._path()!r})')
|
|
250
227
|
helper.append(tbl._col_descriptor([self.col.name]))
|
|
@@ -253,7 +230,18 @@ class ColumnRef(Expr):
|
|
|
253
230
|
helper.append(idxs)
|
|
254
231
|
return helper
|
|
255
232
|
|
|
256
|
-
def
|
|
233
|
+
def prepare(self) -> None:
|
|
234
|
+
from pixeltable import store
|
|
235
|
+
|
|
236
|
+
if not self.is_unstored_iter_col:
|
|
237
|
+
return
|
|
238
|
+
col = self.col_handle.get()
|
|
239
|
+
self.base_rowid_len = col.get_tbl().base.get().num_rowid_columns()
|
|
240
|
+
self.base_rowid = [None] * self.base_rowid_len
|
|
241
|
+
assert isinstance(col.get_tbl().store_tbl, store.StoreComponentView)
|
|
242
|
+
self.pos_idx = cast(store.StoreComponentView, col.get_tbl().store_tbl).pos_col_idx
|
|
243
|
+
|
|
244
|
+
def sql_expr(self, _: SqlElementCache) -> sql.ColumnElement | None:
|
|
257
245
|
if self.perform_validation:
|
|
258
246
|
return None
|
|
259
247
|
self.col = self.col_handle.get()
|
|
@@ -298,20 +286,19 @@ class ColumnRef(Expr):
|
|
|
298
286
|
if self.base_rowid != data_row.pk[: self.base_rowid_len]:
|
|
299
287
|
row_builder.eval(data_row, self.iter_arg_ctx)
|
|
300
288
|
iterator_args = data_row[self.iter_arg_ctx.target_slot_idxs[0]]
|
|
301
|
-
self.iterator = self.col.
|
|
289
|
+
self.iterator = self.col.get_tbl().iterator_cls(**iterator_args)
|
|
302
290
|
self.base_rowid = data_row.pk[: self.base_rowid_len]
|
|
303
291
|
self.iterator.set_pos(data_row.pk[self.pos_idx])
|
|
304
292
|
res = next(self.iterator)
|
|
305
293
|
data_row[self.slot_idx] = res[self.col.name]
|
|
306
294
|
|
|
307
295
|
def _as_dict(self) -> dict:
|
|
308
|
-
|
|
309
|
-
version = tbl.version if tbl.is_snapshot else None
|
|
296
|
+
tbl_handle = self.col.tbl_handle
|
|
310
297
|
# we omit self.components, even if this is a validating ColumnRef, because init() will recreate the
|
|
311
298
|
# non-validating component ColumnRef
|
|
312
299
|
return {
|
|
313
|
-
'tbl_id': str(
|
|
314
|
-
'tbl_version':
|
|
300
|
+
'tbl_id': str(tbl_handle.id),
|
|
301
|
+
'tbl_version': tbl_handle.effective_version,
|
|
315
302
|
'col_id': self.col.id,
|
|
316
303
|
'reference_tbl': self.reference_tbl.as_dict() if self.reference_tbl is not None else None,
|
|
317
304
|
'perform_validation': self.perform_validation,
|
pixeltable/exprs/comparison.py
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
from __future__ import annotations
|
|
2
2
|
|
|
3
|
-
from typing import Any
|
|
3
|
+
from typing import Any
|
|
4
4
|
|
|
5
5
|
import sqlalchemy as sql
|
|
6
6
|
|
|
@@ -69,8 +69,8 @@ class Comparison(Expr):
|
|
|
69
69
|
def _op2(self) -> Expr:
|
|
70
70
|
return self.components[1]
|
|
71
71
|
|
|
72
|
-
def sql_expr(self, sql_elements: SqlElementCache) ->
|
|
73
|
-
|
|
72
|
+
def sql_expr(self, sql_elements: SqlElementCache) -> sql.ColumnElement | None:
|
|
73
|
+
import pixeltable.index as index
|
|
74
74
|
|
|
75
75
|
if str(self._op1.col_type.to_sa_type()) != str(self._op2.col_type.to_sa_type()):
|
|
76
76
|
# Comparing columns of different SQL types (e.g., string vs. json); this can only be done in Python
|
|
@@ -81,9 +81,9 @@ class Comparison(Expr):
|
|
|
81
81
|
if self.is_search_arg_comparison:
|
|
82
82
|
# reference the index value column if there is an index and this is not a snapshot
|
|
83
83
|
# (indices don't apply to snapshots)
|
|
84
|
-
tbl = self._op1.col.
|
|
84
|
+
tbl = self._op1.col.get_tbl()
|
|
85
85
|
idx_info = [
|
|
86
|
-
info for info in self._op1.col.
|
|
86
|
+
info for info in tbl.idxs_by_col.get(self._op1.col.qid, []) if isinstance(info.idx, index.BtreeIndex)
|
|
87
87
|
]
|
|
88
88
|
if len(idx_info) > 0 and not tbl.is_snapshot:
|
|
89
89
|
# there shouldn't be multiple B-tree indices on a column
|
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
from __future__ import annotations
|
|
2
2
|
|
|
3
3
|
import operator
|
|
4
|
-
from typing import Any, Callable
|
|
4
|
+
from typing import Any, Callable
|
|
5
5
|
|
|
6
6
|
import sqlalchemy as sql
|
|
7
7
|
|
|
@@ -61,14 +61,14 @@ class CompoundPredicate(Expr):
|
|
|
61
61
|
def _id_attrs(self) -> list[tuple[str, Any]]:
|
|
62
62
|
return [*super()._id_attrs(), ('operator', self.operator.value)]
|
|
63
63
|
|
|
64
|
-
def split_conjuncts(self, condition: Callable[[Expr], bool]) -> tuple[list[Expr],
|
|
64
|
+
def split_conjuncts(self, condition: Callable[[Expr], bool]) -> tuple[list[Expr], Expr | None]:
|
|
65
65
|
if self.operator in (LogicalOperator.OR, LogicalOperator.NOT):
|
|
66
66
|
return super().split_conjuncts(condition)
|
|
67
67
|
matches = [op for op in self.components if condition(op)]
|
|
68
68
|
non_matches = [op for op in self.components if not condition(op)]
|
|
69
69
|
return (matches, self.make_conjunction(non_matches))
|
|
70
70
|
|
|
71
|
-
def sql_expr(self, sql_elements: SqlElementCache) ->
|
|
71
|
+
def sql_expr(self, sql_elements: SqlElementCache) -> sql.ColumnElement | None:
|
|
72
72
|
sql_exprs = [sql_elements.get(op) for op in self.components]
|
|
73
73
|
if any(e is None for e in sql_exprs):
|
|
74
74
|
return None
|
pixeltable/exprs/data_row.py
CHANGED
|
@@ -6,7 +6,7 @@ import io
|
|
|
6
6
|
import urllib.parse
|
|
7
7
|
import urllib.request
|
|
8
8
|
from pathlib import Path
|
|
9
|
-
from typing import Any
|
|
9
|
+
from typing import Any
|
|
10
10
|
|
|
11
11
|
import numpy as np
|
|
12
12
|
import pgvector.sqlalchemy # type: ignore[import-untyped]
|
|
@@ -126,10 +126,10 @@ class DataRow:
|
|
|
126
126
|
_may_have_exc: bool
|
|
127
127
|
|
|
128
128
|
# the primary key of a store row is a sequence of ints (the number is different for table vs view)
|
|
129
|
-
pk:
|
|
129
|
+
pk: tuple[int, ...] | None
|
|
130
130
|
# for nested rows (ie, those produced by JsonMapperDispatcher)
|
|
131
|
-
parent_row:
|
|
132
|
-
parent_slot_idx:
|
|
131
|
+
parent_row: DataRow | None
|
|
132
|
+
parent_slot_idx: int | None
|
|
133
133
|
|
|
134
134
|
# state for table output (insert()/update()); key: column id
|
|
135
135
|
cell_vals: dict[int, Any] # materialized values of output columns, in the format required for the column
|
|
@@ -148,8 +148,8 @@ class DataRow:
|
|
|
148
148
|
media_slot_idxs: list[int],
|
|
149
149
|
array_slot_idxs: list[int],
|
|
150
150
|
json_slot_idxs: list[int],
|
|
151
|
-
parent_row:
|
|
152
|
-
parent_slot_idx:
|
|
151
|
+
parent_row: DataRow | None = None,
|
|
152
|
+
parent_slot_idx: int | None = None,
|
|
153
153
|
):
|
|
154
154
|
self.init(size)
|
|
155
155
|
self.parent_row = parent_row
|
|
@@ -176,7 +176,7 @@ class DataRow:
|
|
|
176
176
|
self.parent_row = None
|
|
177
177
|
self.parent_slot_idx = None
|
|
178
178
|
|
|
179
|
-
def clear(self, slot_idxs:
|
|
179
|
+
def clear(self, slot_idxs: np.ndarray | None = None) -> None:
|
|
180
180
|
if slot_idxs is not None:
|
|
181
181
|
self.has_val[slot_idxs] = False
|
|
182
182
|
self.vals[slot_idxs] = None
|
|
@@ -209,7 +209,7 @@ class DataRow:
|
|
|
209
209
|
def set_pk(self, pk: tuple[int, ...]) -> None:
|
|
210
210
|
self.pk = pk
|
|
211
211
|
|
|
212
|
-
def has_exc(self, slot_idx:
|
|
212
|
+
def has_exc(self, slot_idx: int | None = None) -> bool:
|
|
213
213
|
"""
|
|
214
214
|
Returns True if an exception has been set for the given slot index, or for any slot index if slot_idx is None
|
|
215
215
|
"""
|
|
@@ -220,12 +220,12 @@ class DataRow:
|
|
|
220
220
|
return self.excs[slot_idx] is not None
|
|
221
221
|
return (self.excs != None).any()
|
|
222
222
|
|
|
223
|
-
def get_exc(self, slot_idx: int) ->
|
|
223
|
+
def get_exc(self, slot_idx: int) -> Exception | None:
|
|
224
224
|
exc = self.excs[slot_idx]
|
|
225
225
|
assert exc is None or isinstance(exc, Exception)
|
|
226
226
|
return exc
|
|
227
227
|
|
|
228
|
-
def get_first_exc(self) ->
|
|
228
|
+
def get_first_exc(self) -> Exception | None:
|
|
229
229
|
mask = self.excs != None
|
|
230
230
|
if not mask.any():
|
|
231
231
|
return None
|
|
@@ -260,7 +260,7 @@ class DataRow:
|
|
|
260
260
|
|
|
261
261
|
return self.vals[index]
|
|
262
262
|
|
|
263
|
-
def get_stored_val(self, index: int, sa_col_type:
|
|
263
|
+
def get_stored_val(self, index: int, sa_col_type: sql.types.TypeEngine | None = None) -> Any:
|
|
264
264
|
"""Return the value that gets stored in the db"""
|
|
265
265
|
assert self.excs[index] is None
|
|
266
266
|
if not self.has_val[index]:
|
|
@@ -328,7 +328,7 @@ class DataRow:
|
|
|
328
328
|
self.vals[idx] = val
|
|
329
329
|
self.has_val[idx] = True
|
|
330
330
|
|
|
331
|
-
def prepare_col_val_for_save(self, index: int, col:
|
|
331
|
+
def prepare_col_val_for_save(self, index: int, col: catalog.Column | None = None) -> bool:
|
|
332
332
|
"""
|
|
333
333
|
Prepare to save a column's value into the appropriate store. Discard unneeded values.
|
|
334
334
|
|
pixeltable/exprs/expr.py
CHANGED
|
@@ -7,7 +7,7 @@ import inspect
|
|
|
7
7
|
import json
|
|
8
8
|
import sys
|
|
9
9
|
import typing
|
|
10
|
-
from typing import TYPE_CHECKING, Any, Callable, Iterable, Iterator,
|
|
10
|
+
from typing import TYPE_CHECKING, Any, Callable, Iterable, Iterator, TypeVar, overload
|
|
11
11
|
from uuid import UUID
|
|
12
12
|
|
|
13
13
|
import numpy as np
|
|
@@ -29,7 +29,7 @@ class ExprScope:
|
|
|
29
29
|
parent is None: outermost scope
|
|
30
30
|
"""
|
|
31
31
|
|
|
32
|
-
def __init__(self, parent:
|
|
32
|
+
def __init__(self, parent: ExprScope | None):
|
|
33
33
|
self.parent = parent
|
|
34
34
|
|
|
35
35
|
def is_contained_in(self, other: ExprScope) -> bool:
|
|
@@ -61,13 +61,13 @@ class Expr(abc.ABC):
|
|
|
61
61
|
# - set by the subclass's __init__()
|
|
62
62
|
# - produced by _create_id()
|
|
63
63
|
# - not expected to survive a serialize()/deserialize() roundtrip
|
|
64
|
-
id:
|
|
64
|
+
id: int | None
|
|
65
65
|
|
|
66
66
|
# index of the expr's value in the data row:
|
|
67
67
|
# - set for all materialized exprs
|
|
68
68
|
# - None: not executable
|
|
69
69
|
# - not set for subexprs that don't need to be materialized because the parent can be materialized via SQL
|
|
70
|
-
slot_idx:
|
|
70
|
+
slot_idx: int | None
|
|
71
71
|
|
|
72
72
|
T = TypeVar('T', bound='Expr')
|
|
73
73
|
|
|
@@ -103,7 +103,7 @@ class Expr(abc.ABC):
|
|
|
103
103
|
assert not has_rel_path, self._expr_tree()
|
|
104
104
|
assert not self._has_relative_path(), self._expr_tree()
|
|
105
105
|
|
|
106
|
-
def _bind_rel_paths(self, mapper:
|
|
106
|
+
def _bind_rel_paths(self, mapper: 'exprs.JsonMapperDispatch' | None = None) -> None:
|
|
107
107
|
for c in self.components:
|
|
108
108
|
c._bind_rel_paths(mapper)
|
|
109
109
|
|
|
@@ -118,7 +118,7 @@ class Expr(abc.ABC):
|
|
|
118
118
|
for c in self.components:
|
|
119
119
|
c._expr_tree_r(indent + 2, buf)
|
|
120
120
|
|
|
121
|
-
def default_column_name(self) ->
|
|
121
|
+
def default_column_name(self) -> str | None:
|
|
122
122
|
"""
|
|
123
123
|
Returns:
|
|
124
124
|
None if this expression lacks a default name,
|
|
@@ -127,7 +127,7 @@ class Expr(abc.ABC):
|
|
|
127
127
|
return None
|
|
128
128
|
|
|
129
129
|
@property
|
|
130
|
-
def validation_error(self) ->
|
|
130
|
+
def validation_error(self) -> str | None:
|
|
131
131
|
"""
|
|
132
132
|
Subclasses can override this to indicate that validation has failed after a catalog load.
|
|
133
133
|
|
|
@@ -205,12 +205,12 @@ class Expr(abc.ABC):
|
|
|
205
205
|
return result
|
|
206
206
|
|
|
207
207
|
@classmethod
|
|
208
|
-
def copy_list(cls, expr_list:
|
|
208
|
+
def copy_list(cls, expr_list: list[Expr] | None) -> list[Expr] | None:
|
|
209
209
|
if expr_list is None:
|
|
210
210
|
return None
|
|
211
211
|
return [e.copy() for e in expr_list]
|
|
212
212
|
|
|
213
|
-
def __deepcopy__(self, memo:
|
|
213
|
+
def __deepcopy__(self, memo: dict[int, Any] | None = None) -> Expr:
|
|
214
214
|
# we don't need to create an actual deep copy because all state other than execution state is read-only
|
|
215
215
|
if memo is None:
|
|
216
216
|
memo = {}
|
|
@@ -241,7 +241,7 @@ class Expr(abc.ABC):
|
|
|
241
241
|
for i in range(len(expr_list)):
|
|
242
242
|
expr_list[i] = expr_list[i].substitute(spec)
|
|
243
243
|
|
|
244
|
-
def resolve_computed_cols(self, resolve_cols:
|
|
244
|
+
def resolve_computed_cols(self, resolve_cols: set[catalog.Column] | None = None) -> Expr:
|
|
245
245
|
"""
|
|
246
246
|
Recursively replace ColRefs to unstored computed columns with their value exprs.
|
|
247
247
|
Also replaces references to stored computed columns in resolve_cols.
|
|
@@ -309,18 +309,18 @@ class Expr(abc.ABC):
|
|
|
309
309
|
|
|
310
310
|
@overload
|
|
311
311
|
def subexprs(
|
|
312
|
-
self, *, filter:
|
|
312
|
+
self, *, filter: Callable[[Expr], bool] | None = None, traverse_matches: bool = True
|
|
313
313
|
) -> Iterator[Expr]: ...
|
|
314
314
|
|
|
315
315
|
@overload
|
|
316
316
|
def subexprs(
|
|
317
|
-
self, expr_class: type[T], filter:
|
|
317
|
+
self, expr_class: type[T], filter: Callable[[Expr], bool] | None = None, traverse_matches: bool = True
|
|
318
318
|
) -> Iterator[T]: ...
|
|
319
319
|
|
|
320
320
|
def subexprs(
|
|
321
321
|
self,
|
|
322
|
-
expr_class:
|
|
323
|
-
filter:
|
|
322
|
+
expr_class: type[T] | None = None,
|
|
323
|
+
filter: Callable[[Expr], bool] | None = None,
|
|
324
324
|
traverse_matches: bool = True,
|
|
325
325
|
) -> Iterator[T]:
|
|
326
326
|
"""
|
|
@@ -339,11 +339,7 @@ class Expr(abc.ABC):
|
|
|
339
339
|
@overload
|
|
340
340
|
@classmethod
|
|
341
341
|
def list_subexprs(
|
|
342
|
-
cls,
|
|
343
|
-
expr_list: Iterable[Expr],
|
|
344
|
-
*,
|
|
345
|
-
filter: Optional[Callable[[Expr], bool]] = None,
|
|
346
|
-
traverse_matches: bool = True,
|
|
342
|
+
cls, expr_list: Iterable[Expr], *, filter: Callable[[Expr], bool] | None = None, traverse_matches: bool = True
|
|
347
343
|
) -> Iterator[Expr]: ...
|
|
348
344
|
|
|
349
345
|
@overload
|
|
@@ -352,7 +348,7 @@ class Expr(abc.ABC):
|
|
|
352
348
|
cls,
|
|
353
349
|
expr_list: Iterable[Expr],
|
|
354
350
|
expr_class: type[T],
|
|
355
|
-
filter:
|
|
351
|
+
filter: Callable[[Expr], bool] | None = None,
|
|
356
352
|
traverse_matches: bool = True,
|
|
357
353
|
) -> Iterator[T]: ...
|
|
358
354
|
|
|
@@ -360,8 +356,8 @@ class Expr(abc.ABC):
|
|
|
360
356
|
def list_subexprs(
|
|
361
357
|
cls,
|
|
362
358
|
expr_list: Iterable[Expr],
|
|
363
|
-
expr_class:
|
|
364
|
-
filter:
|
|
359
|
+
expr_class: type[T] | None = None,
|
|
360
|
+
filter: Callable[[Expr], bool] | None = None,
|
|
365
361
|
traverse_matches: bool = True,
|
|
366
362
|
) -> Iterator[T]:
|
|
367
363
|
"""Produce subexprs for all exprs in list. Can contain duplicates."""
|
|
@@ -377,7 +373,7 @@ class Expr(abc.ABC):
|
|
|
377
373
|
) -> bool:
|
|
378
374
|
return any(e._contains(expr_class, filter) for e in expr_list)
|
|
379
375
|
|
|
380
|
-
def _contains(self, cls:
|
|
376
|
+
def _contains(self, cls: type[Expr] | None = None, filter: Callable[[Expr], bool] | None = None) -> bool:
|
|
381
377
|
"""
|
|
382
378
|
Returns True if any subexpr is an instance of cls and/or matches filter.
|
|
383
379
|
"""
|
|
@@ -396,7 +392,9 @@ class Expr(abc.ABC):
|
|
|
396
392
|
from .column_ref import ColumnRef
|
|
397
393
|
from .rowid_ref import RowidRef
|
|
398
394
|
|
|
399
|
-
return {ref.col.
|
|
395
|
+
return {ref.col.get_tbl().id for ref in self.subexprs(ColumnRef)} | {
|
|
396
|
+
ref.tbl.id for ref in self.subexprs(RowidRef)
|
|
397
|
+
}
|
|
400
398
|
|
|
401
399
|
@classmethod
|
|
402
400
|
def all_tbl_ids(cls, exprs_: Iterable[Expr]) -> set[UUID]:
|
|
@@ -416,14 +414,14 @@ class Expr(abc.ABC):
|
|
|
416
414
|
result.update(cls.get_refd_column_ids(component_dict))
|
|
417
415
|
return result
|
|
418
416
|
|
|
419
|
-
def as_literal(self) ->
|
|
417
|
+
def as_literal(self) -> Expr | None:
|
|
420
418
|
"""
|
|
421
419
|
Return a Literal expression if this expression can be evaluated to a constant value, otherwise return None.
|
|
422
420
|
"""
|
|
423
421
|
return None
|
|
424
422
|
|
|
425
423
|
@classmethod
|
|
426
|
-
def from_array(cls, elements: Iterable) ->
|
|
424
|
+
def from_array(cls, elements: Iterable) -> Expr | None:
|
|
427
425
|
from .inline_expr import InlineArray
|
|
428
426
|
from .literal import Literal
|
|
429
427
|
|
|
@@ -446,7 +444,7 @@ class Expr(abc.ABC):
|
|
|
446
444
|
return self
|
|
447
445
|
|
|
448
446
|
@classmethod
|
|
449
|
-
def from_object(cls, o: object) ->
|
|
447
|
+
def from_object(cls, o: object) -> Expr | None:
|
|
450
448
|
"""
|
|
451
449
|
Try to turn a literal object into an Expr.
|
|
452
450
|
"""
|
|
@@ -476,7 +474,7 @@ class Expr(abc.ABC):
|
|
|
476
474
|
return Literal(o, col_type=obj_type)
|
|
477
475
|
return None
|
|
478
476
|
|
|
479
|
-
def sql_expr(self, sql_elements: 'exprs.SqlElementCache') ->
|
|
477
|
+
def sql_expr(self, sql_elements: 'exprs.SqlElementCache') -> sql.ColumnElement | None:
|
|
480
478
|
"""
|
|
481
479
|
If this expr can be materialized directly in SQL:
|
|
482
480
|
- returns a ColumnElement
|
|
@@ -495,6 +493,18 @@ class Expr(abc.ABC):
|
|
|
495
493
|
"""
|
|
496
494
|
pass
|
|
497
495
|
|
|
496
|
+
def prepare(self) -> None:
|
|
497
|
+
"""
|
|
498
|
+
Create execution state. This is called before the first eval() call.
|
|
499
|
+
"""
|
|
500
|
+
for c in self.components:
|
|
501
|
+
c.prepare()
|
|
502
|
+
|
|
503
|
+
@classmethod
|
|
504
|
+
def prepare_list(cls, expr_list: Iterable[Expr]) -> None:
|
|
505
|
+
for e in expr_list:
|
|
506
|
+
e.prepare()
|
|
507
|
+
|
|
498
508
|
def release(self) -> None:
|
|
499
509
|
"""
|
|
500
510
|
Allow Expr class to tear down execution state. This is called after the last eval() call.
|
|
@@ -503,7 +513,7 @@ class Expr(abc.ABC):
|
|
|
503
513
|
c.release()
|
|
504
514
|
|
|
505
515
|
@classmethod
|
|
506
|
-
def release_list(cls, expr_list:
|
|
516
|
+
def release_list(cls, expr_list: Iterable[Expr]) -> None:
|
|
507
517
|
for e in expr_list:
|
|
508
518
|
e.release()
|
|
509
519
|
|
|
@@ -791,7 +801,7 @@ class Expr(abc.ABC):
|
|
|
791
801
|
|
|
792
802
|
return CompoundPredicate(LogicalOperator.NOT, [self])
|
|
793
803
|
|
|
794
|
-
def split_conjuncts(self, condition: Callable[[Expr], bool]) -> tuple[list[Expr],
|
|
804
|
+
def split_conjuncts(self, condition: Callable[[Expr], bool]) -> tuple[list[Expr], Expr | None]:
|
|
795
805
|
"""
|
|
796
806
|
Returns clauses of a conjunction that meet condition in the first element.
|
|
797
807
|
The second element contains remaining clauses, rolled into a conjunction.
|
|
@@ -802,7 +812,7 @@ class Expr(abc.ABC):
|
|
|
802
812
|
else:
|
|
803
813
|
return [], self
|
|
804
814
|
|
|
805
|
-
def _make_applicator_function(self, fn: Callable, col_type:
|
|
815
|
+
def _make_applicator_function(self, fn: Callable, col_type: ts.ColumnType | None) -> 'func.Function':
|
|
806
816
|
"""
|
|
807
817
|
Creates a unary pixeltable `Function` that encapsulates a python `Callable`. The result type of
|
|
808
818
|
the new `Function` is given by `col_type`, and its parameter type will be `self.col_type`.
|
pixeltable/exprs/expr_dict.py
CHANGED
|
@@ -1,4 +1,4 @@
|
|
|
1
|
-
from typing import Generic, Iterable, Iterator,
|
|
1
|
+
from typing import Generic, Iterable, Iterator, TypeVar
|
|
2
2
|
|
|
3
3
|
from .expr import Expr
|
|
4
4
|
|
|
@@ -14,7 +14,7 @@ class ExprDict(Generic[T]):
|
|
|
14
14
|
|
|
15
15
|
_data: dict[int, tuple[Expr, T]]
|
|
16
16
|
|
|
17
|
-
def __init__(self, iterable:
|
|
17
|
+
def __init__(self, iterable: Iterable[tuple[Expr, T]] | None = None):
|
|
18
18
|
self._data = {}
|
|
19
19
|
|
|
20
20
|
if iterable is not None:
|
|
@@ -39,7 +39,7 @@ class ExprDict(Generic[T]):
|
|
|
39
39
|
def __contains__(self, key: Expr) -> bool:
|
|
40
40
|
return key.id in self._data
|
|
41
41
|
|
|
42
|
-
def get(self, key: Expr, default:
|
|
42
|
+
def get(self, key: Expr, default: T | None = None) -> T | None:
|
|
43
43
|
item = self._data.get(key.id)
|
|
44
44
|
return item[1] if item is not None else default
|
|
45
45
|
|