pixeltable 0.2.26__py3-none-any.whl → 0.5.7__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- pixeltable/__init__.py +83 -19
- pixeltable/_query.py +1444 -0
- pixeltable/_version.py +1 -0
- pixeltable/catalog/__init__.py +7 -4
- pixeltable/catalog/catalog.py +2394 -119
- pixeltable/catalog/column.py +225 -104
- pixeltable/catalog/dir.py +38 -9
- pixeltable/catalog/globals.py +53 -34
- pixeltable/catalog/insertable_table.py +265 -115
- pixeltable/catalog/path.py +80 -17
- pixeltable/catalog/schema_object.py +28 -43
- pixeltable/catalog/table.py +1270 -677
- pixeltable/catalog/table_metadata.py +103 -0
- pixeltable/catalog/table_version.py +1270 -751
- pixeltable/catalog/table_version_handle.py +109 -0
- pixeltable/catalog/table_version_path.py +137 -42
- pixeltable/catalog/tbl_ops.py +53 -0
- pixeltable/catalog/update_status.py +191 -0
- pixeltable/catalog/view.py +251 -134
- pixeltable/config.py +215 -0
- pixeltable/env.py +736 -285
- pixeltable/exceptions.py +26 -2
- pixeltable/exec/__init__.py +7 -2
- pixeltable/exec/aggregation_node.py +39 -21
- pixeltable/exec/cache_prefetch_node.py +87 -109
- pixeltable/exec/cell_materialization_node.py +268 -0
- pixeltable/exec/cell_reconstruction_node.py +168 -0
- pixeltable/exec/component_iteration_node.py +25 -28
- pixeltable/exec/data_row_batch.py +11 -46
- pixeltable/exec/exec_context.py +26 -11
- pixeltable/exec/exec_node.py +35 -27
- pixeltable/exec/expr_eval/__init__.py +3 -0
- pixeltable/exec/expr_eval/evaluators.py +365 -0
- pixeltable/exec/expr_eval/expr_eval_node.py +413 -0
- pixeltable/exec/expr_eval/globals.py +200 -0
- pixeltable/exec/expr_eval/row_buffer.py +74 -0
- pixeltable/exec/expr_eval/schedulers.py +413 -0
- pixeltable/exec/globals.py +35 -0
- pixeltable/exec/in_memory_data_node.py +35 -27
- pixeltable/exec/object_store_save_node.py +293 -0
- pixeltable/exec/row_update_node.py +44 -29
- pixeltable/exec/sql_node.py +414 -115
- pixeltable/exprs/__init__.py +8 -5
- pixeltable/exprs/arithmetic_expr.py +79 -45
- pixeltable/exprs/array_slice.py +5 -5
- pixeltable/exprs/column_property_ref.py +40 -26
- pixeltable/exprs/column_ref.py +254 -61
- pixeltable/exprs/comparison.py +14 -9
- pixeltable/exprs/compound_predicate.py +9 -10
- pixeltable/exprs/data_row.py +213 -72
- pixeltable/exprs/expr.py +270 -104
- pixeltable/exprs/expr_dict.py +6 -5
- pixeltable/exprs/expr_set.py +20 -11
- pixeltable/exprs/function_call.py +383 -284
- pixeltable/exprs/globals.py +18 -5
- pixeltable/exprs/in_predicate.py +7 -7
- pixeltable/exprs/inline_expr.py +37 -37
- pixeltable/exprs/is_null.py +8 -4
- pixeltable/exprs/json_mapper.py +120 -54
- pixeltable/exprs/json_path.py +90 -60
- pixeltable/exprs/literal.py +61 -16
- pixeltable/exprs/method_ref.py +7 -6
- pixeltable/exprs/object_ref.py +19 -8
- pixeltable/exprs/row_builder.py +238 -75
- pixeltable/exprs/rowid_ref.py +53 -15
- pixeltable/exprs/similarity_expr.py +65 -50
- pixeltable/exprs/sql_element_cache.py +5 -5
- pixeltable/exprs/string_op.py +107 -0
- pixeltable/exprs/type_cast.py +25 -13
- pixeltable/exprs/variable.py +2 -2
- pixeltable/func/__init__.py +9 -5
- pixeltable/func/aggregate_function.py +197 -92
- pixeltable/func/callable_function.py +119 -35
- pixeltable/func/expr_template_function.py +101 -48
- pixeltable/func/function.py +375 -62
- pixeltable/func/function_registry.py +20 -19
- pixeltable/func/globals.py +6 -5
- pixeltable/func/mcp.py +74 -0
- pixeltable/func/query_template_function.py +151 -35
- pixeltable/func/signature.py +178 -49
- pixeltable/func/tools.py +164 -0
- pixeltable/func/udf.py +176 -53
- pixeltable/functions/__init__.py +44 -4
- pixeltable/functions/anthropic.py +226 -47
- pixeltable/functions/audio.py +148 -11
- pixeltable/functions/bedrock.py +137 -0
- pixeltable/functions/date.py +188 -0
- pixeltable/functions/deepseek.py +113 -0
- pixeltable/functions/document.py +81 -0
- pixeltable/functions/fal.py +76 -0
- pixeltable/functions/fireworks.py +72 -20
- pixeltable/functions/gemini.py +249 -0
- pixeltable/functions/globals.py +208 -53
- pixeltable/functions/groq.py +108 -0
- pixeltable/functions/huggingface.py +1088 -95
- pixeltable/functions/image.py +155 -84
- pixeltable/functions/json.py +8 -11
- pixeltable/functions/llama_cpp.py +31 -19
- pixeltable/functions/math.py +169 -0
- pixeltable/functions/mistralai.py +50 -75
- pixeltable/functions/net.py +70 -0
- pixeltable/functions/ollama.py +29 -36
- pixeltable/functions/openai.py +548 -160
- pixeltable/functions/openrouter.py +143 -0
- pixeltable/functions/replicate.py +15 -14
- pixeltable/functions/reve.py +250 -0
- pixeltable/functions/string.py +310 -85
- pixeltable/functions/timestamp.py +37 -19
- pixeltable/functions/together.py +77 -120
- pixeltable/functions/twelvelabs.py +188 -0
- pixeltable/functions/util.py +7 -2
- pixeltable/functions/uuid.py +30 -0
- pixeltable/functions/video.py +1528 -117
- pixeltable/functions/vision.py +26 -26
- pixeltable/functions/voyageai.py +289 -0
- pixeltable/functions/whisper.py +19 -10
- pixeltable/functions/whisperx.py +179 -0
- pixeltable/functions/yolox.py +112 -0
- pixeltable/globals.py +716 -236
- pixeltable/index/__init__.py +3 -1
- pixeltable/index/base.py +17 -21
- pixeltable/index/btree.py +32 -22
- pixeltable/index/embedding_index.py +155 -92
- pixeltable/io/__init__.py +12 -7
- pixeltable/io/datarows.py +140 -0
- pixeltable/io/external_store.py +83 -125
- pixeltable/io/fiftyone.py +24 -33
- pixeltable/io/globals.py +47 -182
- pixeltable/io/hf_datasets.py +96 -127
- pixeltable/io/label_studio.py +171 -156
- pixeltable/io/lancedb.py +3 -0
- pixeltable/io/pandas.py +136 -115
- pixeltable/io/parquet.py +40 -153
- pixeltable/io/table_data_conduit.py +702 -0
- pixeltable/io/utils.py +100 -0
- pixeltable/iterators/__init__.py +8 -4
- pixeltable/iterators/audio.py +207 -0
- pixeltable/iterators/base.py +9 -3
- pixeltable/iterators/document.py +144 -87
- pixeltable/iterators/image.py +17 -38
- pixeltable/iterators/string.py +15 -12
- pixeltable/iterators/video.py +523 -127
- pixeltable/metadata/__init__.py +33 -8
- pixeltable/metadata/converters/convert_10.py +2 -3
- pixeltable/metadata/converters/convert_13.py +2 -2
- pixeltable/metadata/converters/convert_15.py +15 -11
- pixeltable/metadata/converters/convert_16.py +4 -5
- pixeltable/metadata/converters/convert_17.py +4 -5
- pixeltable/metadata/converters/convert_18.py +4 -6
- pixeltable/metadata/converters/convert_19.py +6 -9
- pixeltable/metadata/converters/convert_20.py +3 -6
- pixeltable/metadata/converters/convert_21.py +6 -8
- pixeltable/metadata/converters/convert_22.py +3 -2
- pixeltable/metadata/converters/convert_23.py +33 -0
- pixeltable/metadata/converters/convert_24.py +55 -0
- pixeltable/metadata/converters/convert_25.py +19 -0
- pixeltable/metadata/converters/convert_26.py +23 -0
- pixeltable/metadata/converters/convert_27.py +29 -0
- pixeltable/metadata/converters/convert_28.py +13 -0
- pixeltable/metadata/converters/convert_29.py +110 -0
- pixeltable/metadata/converters/convert_30.py +63 -0
- pixeltable/metadata/converters/convert_31.py +11 -0
- pixeltable/metadata/converters/convert_32.py +15 -0
- pixeltable/metadata/converters/convert_33.py +17 -0
- pixeltable/metadata/converters/convert_34.py +21 -0
- pixeltable/metadata/converters/convert_35.py +9 -0
- pixeltable/metadata/converters/convert_36.py +38 -0
- pixeltable/metadata/converters/convert_37.py +15 -0
- pixeltable/metadata/converters/convert_38.py +39 -0
- pixeltable/metadata/converters/convert_39.py +124 -0
- pixeltable/metadata/converters/convert_40.py +73 -0
- pixeltable/metadata/converters/convert_41.py +12 -0
- pixeltable/metadata/converters/convert_42.py +9 -0
- pixeltable/metadata/converters/convert_43.py +44 -0
- pixeltable/metadata/converters/util.py +44 -18
- pixeltable/metadata/notes.py +21 -0
- pixeltable/metadata/schema.py +185 -42
- pixeltable/metadata/utils.py +74 -0
- pixeltable/mypy/__init__.py +3 -0
- pixeltable/mypy/mypy_plugin.py +123 -0
- pixeltable/plan.py +616 -225
- pixeltable/share/__init__.py +3 -0
- pixeltable/share/packager.py +797 -0
- pixeltable/share/protocol/__init__.py +33 -0
- pixeltable/share/protocol/common.py +165 -0
- pixeltable/share/protocol/operation_types.py +33 -0
- pixeltable/share/protocol/replica.py +119 -0
- pixeltable/share/publish.py +349 -0
- pixeltable/store.py +398 -232
- pixeltable/type_system.py +730 -267
- pixeltable/utils/__init__.py +40 -0
- pixeltable/utils/arrow.py +201 -29
- pixeltable/utils/av.py +298 -0
- pixeltable/utils/azure_store.py +346 -0
- pixeltable/utils/coco.py +26 -27
- pixeltable/utils/code.py +4 -4
- pixeltable/utils/console_output.py +46 -0
- pixeltable/utils/coroutine.py +24 -0
- pixeltable/utils/dbms.py +92 -0
- pixeltable/utils/description_helper.py +11 -12
- pixeltable/utils/documents.py +60 -61
- pixeltable/utils/exception_handler.py +36 -0
- pixeltable/utils/filecache.py +38 -22
- pixeltable/utils/formatter.py +88 -51
- pixeltable/utils/gcs_store.py +295 -0
- pixeltable/utils/http.py +133 -0
- pixeltable/utils/http_server.py +14 -13
- pixeltable/utils/iceberg.py +13 -0
- pixeltable/utils/image.py +17 -0
- pixeltable/utils/lancedb.py +90 -0
- pixeltable/utils/local_store.py +322 -0
- pixeltable/utils/misc.py +5 -0
- pixeltable/utils/object_stores.py +573 -0
- pixeltable/utils/pydantic.py +60 -0
- pixeltable/utils/pytorch.py +20 -20
- pixeltable/utils/s3_store.py +527 -0
- pixeltable/utils/sql.py +32 -5
- pixeltable/utils/system.py +30 -0
- pixeltable/utils/transactional_directory.py +4 -3
- pixeltable-0.5.7.dist-info/METADATA +579 -0
- pixeltable-0.5.7.dist-info/RECORD +227 -0
- {pixeltable-0.2.26.dist-info → pixeltable-0.5.7.dist-info}/WHEEL +1 -1
- pixeltable-0.5.7.dist-info/entry_points.txt +2 -0
- pixeltable/__version__.py +0 -3
- pixeltable/catalog/named_function.py +0 -36
- pixeltable/catalog/path_dict.py +0 -141
- pixeltable/dataframe.py +0 -894
- pixeltable/exec/expr_eval_node.py +0 -232
- pixeltable/ext/__init__.py +0 -14
- pixeltable/ext/functions/__init__.py +0 -8
- pixeltable/ext/functions/whisperx.py +0 -77
- pixeltable/ext/functions/yolox.py +0 -157
- pixeltable/tool/create_test_db_dump.py +0 -311
- pixeltable/tool/create_test_video.py +0 -81
- pixeltable/tool/doc_plugins/griffe.py +0 -50
- pixeltable/tool/doc_plugins/mkdocstrings.py +0 -6
- pixeltable/tool/doc_plugins/templates/material/udf.html.jinja +0 -135
- pixeltable/tool/embed_udf.py +0 -9
- pixeltable/tool/mypy_plugin.py +0 -55
- pixeltable/utils/media_store.py +0 -76
- pixeltable/utils/s3.py +0 -16
- pixeltable-0.2.26.dist-info/METADATA +0 -400
- pixeltable-0.2.26.dist-info/RECORD +0 -156
- pixeltable-0.2.26.dist-info/entry_points.txt +0 -3
- {pixeltable-0.2.26.dist-info → pixeltable-0.5.7.dist-info/licenses}/LICENSE +0 -0
pixeltable/exprs/column_ref.py
CHANGED
|
@@ -1,21 +1,29 @@
|
|
|
1
1
|
from __future__ import annotations
|
|
2
2
|
|
|
3
|
-
|
|
3
|
+
import warnings
|
|
4
|
+
from typing import TYPE_CHECKING, Any, Sequence, cast
|
|
4
5
|
from uuid import UUID
|
|
5
6
|
|
|
7
|
+
import PIL.Image
|
|
6
8
|
import sqlalchemy as sql
|
|
7
9
|
|
|
8
|
-
import pixeltable as pxt
|
|
9
10
|
import pixeltable.catalog as catalog
|
|
10
11
|
import pixeltable.exceptions as excs
|
|
11
12
|
import pixeltable.iterators as iters
|
|
13
|
+
import pixeltable.type_system as ts
|
|
14
|
+
from pixeltable.catalog.table_version import TableVersionKey
|
|
12
15
|
|
|
13
16
|
from ..utils.description_helper import DescriptionHelper
|
|
17
|
+
from ..utils.filecache import FileCache
|
|
14
18
|
from .data_row import DataRow
|
|
15
19
|
from .expr import Expr
|
|
20
|
+
from .literal import Literal
|
|
16
21
|
from .row_builder import RowBuilder
|
|
17
22
|
from .sql_element_cache import SqlElementCache
|
|
18
23
|
|
|
24
|
+
if TYPE_CHECKING:
|
|
25
|
+
from pixeltable._query import Query, ResultSet
|
|
26
|
+
|
|
19
27
|
|
|
20
28
|
class ColumnRef(Expr):
|
|
21
29
|
"""A reference to a table column
|
|
@@ -32,34 +40,48 @@ class ColumnRef(Expr):
|
|
|
32
40
|
- in that case, the ColumnRef also instantiates a second non-validating ColumnRef as a component (= dependency)
|
|
33
41
|
- the non-validating ColumnRef is used for SQL translation
|
|
34
42
|
|
|
43
|
+
A ColumnRef may have an optional reference table, which carries the context of the ColumnRef resolution. Thus
|
|
44
|
+
if `v` is a view of `t` (for example), then `v.my_col` and `t.my_col` refer to the same underlying column, but
|
|
45
|
+
their reference tables will be `v` and `t`, respectively. This is to ensure correct behavior of expressions such
|
|
46
|
+
as `v.my_col.head()`.
|
|
47
|
+
|
|
35
48
|
TODO:
|
|
36
49
|
separate Exprs (like validating ColumnRefs) from the logical expression tree and instead have RowBuilder
|
|
37
50
|
insert them into the EvalCtxs as needed
|
|
38
51
|
"""
|
|
39
52
|
|
|
40
|
-
col: catalog.Column
|
|
53
|
+
col: catalog.Column # TODO: merge with col_handle
|
|
54
|
+
col_handle: catalog.ColumnHandle
|
|
55
|
+
reference_tbl: catalog.TableVersionPath | None
|
|
41
56
|
is_unstored_iter_col: bool
|
|
42
|
-
iter_arg_ctx: Optional[RowBuilder.EvalCtx]
|
|
43
|
-
base_rowid_len: int
|
|
44
|
-
base_rowid: Sequence[Optional[Any]]
|
|
45
|
-
iterator: Optional[iters.ComponentIterator]
|
|
46
|
-
pos_idx: Optional[int]
|
|
47
|
-
id: int
|
|
48
57
|
perform_validation: bool # if True, performs media validation
|
|
49
|
-
|
|
50
|
-
|
|
58
|
+
iter_arg_ctx: RowBuilder.EvalCtx | None
|
|
59
|
+
iter_outputs: list[ColumnRef] | None
|
|
60
|
+
base_rowid_len: int # number of rowid columns in the base table
|
|
61
|
+
|
|
62
|
+
# execution state
|
|
63
|
+
base_rowid: Sequence[Any | None]
|
|
64
|
+
iterator: iters.ComponentIterator | None
|
|
65
|
+
pos_idx: int
|
|
66
|
+
|
|
67
|
+
def __init__(
|
|
68
|
+
self,
|
|
69
|
+
col: catalog.Column,
|
|
70
|
+
reference_tbl: catalog.TableVersionPath | None = None,
|
|
71
|
+
perform_validation: bool | None = None,
|
|
72
|
+
):
|
|
51
73
|
super().__init__(col.col_type)
|
|
52
|
-
assert col.tbl is not None
|
|
53
74
|
self.col = col
|
|
54
|
-
self.
|
|
55
|
-
|
|
75
|
+
self.reference_tbl = reference_tbl
|
|
76
|
+
self.col_handle = col.handle
|
|
77
|
+
|
|
78
|
+
self.is_unstored_iter_col = col.is_iterator_col and not col.is_stored
|
|
56
79
|
self.iter_arg_ctx = None
|
|
57
|
-
|
|
58
|
-
self.base_rowid_len =
|
|
59
|
-
self.base_rowid = [
|
|
80
|
+
self.iter_outputs = None
|
|
81
|
+
self.base_rowid_len = 0
|
|
82
|
+
self.base_rowid = []
|
|
60
83
|
self.iterator = None
|
|
61
|
-
|
|
62
|
-
self.pos_idx = col.tbl.num_rowid_columns() - 1 if self.is_unstored_iter_col else None
|
|
84
|
+
self.pos_idx = 0
|
|
63
85
|
|
|
64
86
|
self.perform_validation = False
|
|
65
87
|
if col.col_type.is_media_type():
|
|
@@ -78,34 +100,50 @@ class ColumnRef(Expr):
|
|
|
78
100
|
self.components = [non_validating_col_ref]
|
|
79
101
|
self.id = self._create_id()
|
|
80
102
|
|
|
81
|
-
def set_iter_arg_ctx(self, iter_arg_ctx: RowBuilder.EvalCtx) -> None:
|
|
103
|
+
def set_iter_arg_ctx(self, iter_arg_ctx: RowBuilder.EvalCtx, iter_outputs: list[ColumnRef]) -> None:
|
|
82
104
|
self.iter_arg_ctx = iter_arg_ctx
|
|
105
|
+
self.iter_outputs = iter_outputs
|
|
106
|
+
# If this is an unstored iterator column, then the iterator outputs may be needed in order to properly set the
|
|
107
|
+
# iterator position. Therefore, we need to add them as components in order to ensure they're marked as
|
|
108
|
+
# eval dependencies.
|
|
109
|
+
self.components.extend(iter_outputs)
|
|
83
110
|
assert len(self.iter_arg_ctx.target_slot_idxs) == 1 # a single inline dict
|
|
84
111
|
|
|
85
112
|
def _id_attrs(self) -> list[tuple[str, Any]]:
|
|
86
|
-
return
|
|
87
|
-
super()._id_attrs()
|
|
88
|
-
|
|
89
|
-
|
|
113
|
+
return [
|
|
114
|
+
*super()._id_attrs(),
|
|
115
|
+
('tbl_id', self.col.tbl_handle.id),
|
|
116
|
+
('col_id', self.col.id),
|
|
117
|
+
('perform_validation', self.perform_validation),
|
|
118
|
+
]
|
|
90
119
|
|
|
91
120
|
# override
|
|
92
121
|
def _retarget(self, tbl_versions: dict[UUID, catalog.TableVersion]) -> ColumnRef:
|
|
93
|
-
target = tbl_versions[self.col.
|
|
94
|
-
assert self.col.id in target.cols_by_id
|
|
122
|
+
target = tbl_versions[self.col.tbl_handle.id]
|
|
123
|
+
assert self.col.id in target.cols_by_id, f'{target}: {self.col.id} not in {list(target.cols_by_id.keys())}'
|
|
95
124
|
col = target.cols_by_id[self.col.id]
|
|
96
|
-
return ColumnRef(col)
|
|
125
|
+
return ColumnRef(col, self.reference_tbl)
|
|
97
126
|
|
|
98
127
|
def __getattr__(self, name: str) -> Expr:
|
|
99
128
|
from .column_property_ref import ColumnPropertyRef
|
|
100
129
|
|
|
101
130
|
# resolve column properties
|
|
102
|
-
if name == ColumnPropertyRef.Property.
|
|
103
|
-
|
|
104
|
-
|
|
131
|
+
if name == ColumnPropertyRef.Property.CELLMD.name.lower():
|
|
132
|
+
# This is not user accessible, but used internally to store cell metadata
|
|
133
|
+
return super().__getattr__(name)
|
|
134
|
+
|
|
135
|
+
if (
|
|
136
|
+
name == ColumnPropertyRef.Property.ERRORTYPE.name.lower()
|
|
137
|
+
or name == ColumnPropertyRef.Property.ERRORMSG.name.lower()
|
|
138
|
+
):
|
|
139
|
+
is_valid = (self.col.is_computed or self.col.col_type.is_media_type()) and self.col.is_stored
|
|
140
|
+
if not is_valid:
|
|
105
141
|
raise excs.Error(f'{name} only valid for a stored computed or media column: {self}')
|
|
106
142
|
return ColumnPropertyRef(self, ColumnPropertyRef.Property[name.upper()])
|
|
107
|
-
if
|
|
108
|
-
|
|
143
|
+
if (
|
|
144
|
+
name == ColumnPropertyRef.Property.FILEURL.name.lower()
|
|
145
|
+
or name == ColumnPropertyRef.Property.LOCALPATH.name.lower()
|
|
146
|
+
):
|
|
109
147
|
if not self.col.col_type.is_media_type():
|
|
110
148
|
raise excs.Error(f'{name} only valid for image/video/audio/document columns: {self}')
|
|
111
149
|
if self.col.is_computed and not self.col.is_stored:
|
|
@@ -114,35 +152,161 @@ class ColumnRef(Expr):
|
|
|
114
152
|
|
|
115
153
|
if self.col_type.is_json_type():
|
|
116
154
|
from .json_path import JsonPath
|
|
155
|
+
|
|
117
156
|
return JsonPath(self, [name])
|
|
118
157
|
|
|
119
158
|
return super().__getattr__(name)
|
|
120
159
|
|
|
121
|
-
def
|
|
160
|
+
def recompute(self, *, cascade: bool = True, errors_only: bool = False) -> catalog.UpdateStatus:
|
|
161
|
+
cat = catalog.Catalog.get()
|
|
162
|
+
# lock_mutable_tree=True: we need to be able to see whether any transitive view has column dependents
|
|
163
|
+
with cat.begin_xact(tbl=self.reference_tbl, for_write=True, lock_mutable_tree=True):
|
|
164
|
+
tbl_version = self.col_handle.tbl_version.get()
|
|
165
|
+
if tbl_version.id != self.reference_tbl.tbl_id:
|
|
166
|
+
raise excs.Error('Cannot recompute column of a base.')
|
|
167
|
+
if tbl_version.is_snapshot:
|
|
168
|
+
raise excs.Error('Cannot recompute column of a snapshot.')
|
|
169
|
+
col_name = self.col_handle.get().name
|
|
170
|
+
status = tbl_version.recompute_columns([col_name], errors_only=errors_only, cascade=cascade)
|
|
171
|
+
FileCache.get().emit_eviction_warnings()
|
|
172
|
+
return status
|
|
173
|
+
|
|
174
|
+
def similarity(
|
|
175
|
+
self,
|
|
176
|
+
item: Any = None,
|
|
177
|
+
*,
|
|
178
|
+
string: str | None = None,
|
|
179
|
+
image: PIL.Image.Image | None = None,
|
|
180
|
+
audio: str | None = None,
|
|
181
|
+
video: str | None = None,
|
|
182
|
+
idx: str | None = None,
|
|
183
|
+
) -> Expr:
|
|
122
184
|
from .similarity_expr import SimilarityExpr
|
|
123
|
-
return SimilarityExpr(self, item, idx_name=idx)
|
|
124
185
|
|
|
125
|
-
|
|
126
|
-
|
|
186
|
+
if item is not None:
|
|
187
|
+
warnings.warn(
|
|
188
|
+
'Use of similarity() without specifying an explicit modality is deprecated -- '
|
|
189
|
+
'since version 0.5.7. Please use one of the following instead:\n'
|
|
190
|
+
' .similarity(string=...)\n'
|
|
191
|
+
' .similarity(image=...)\n'
|
|
192
|
+
' .similarity(audio=...)\n'
|
|
193
|
+
' .similarity(video=...)',
|
|
194
|
+
DeprecationWarning,
|
|
195
|
+
stacklevel=2,
|
|
196
|
+
)
|
|
197
|
+
|
|
198
|
+
arg_count = (string is not None) + (image is not None) + (audio is not None) + (video is not None)
|
|
199
|
+
|
|
200
|
+
if item is not None and arg_count != 0:
|
|
201
|
+
raise excs.Error('similarity(): `item` is deprecated and cannot be used together with modality arguments')
|
|
202
|
+
|
|
203
|
+
if arg_count > 1:
|
|
204
|
+
raise excs.Error('similarity(): expected exactly one of string=..., image=..., audio=..., video=...')
|
|
205
|
+
|
|
206
|
+
expr: Expr
|
|
207
|
+
|
|
208
|
+
if item is not None:
|
|
209
|
+
if isinstance(item, Expr): # This can happen when using similarity() with @query
|
|
210
|
+
if not (item.col_type.is_string_type() or item.col_type.is_image_type()):
|
|
211
|
+
raise excs.Error(f'similarity(): expected `String` or `Image`; got `{item.col_type}`')
|
|
212
|
+
expr = item
|
|
213
|
+
else:
|
|
214
|
+
if not isinstance(item, (str, PIL.Image.Image)):
|
|
215
|
+
raise excs.Error(f'similarity(): expected `str` or `PIL.Image.Image`; got `{type(item).__name__}`')
|
|
216
|
+
expr = Expr.from_object(item)
|
|
217
|
+
assert expr.col_type.is_string_type() or expr.col_type.is_image_type()
|
|
218
|
+
|
|
219
|
+
if string is not None:
|
|
220
|
+
if isinstance(string, Expr):
|
|
221
|
+
if not string.col_type.is_string_type():
|
|
222
|
+
raise excs.Error(f'similarity(string=...): expected `String`; got `{expr.col_type}`')
|
|
223
|
+
expr = string
|
|
224
|
+
else:
|
|
225
|
+
if not isinstance(string, str):
|
|
226
|
+
raise excs.Error(f'similarity(string=...): expected `str`; got `{type(string).__name__}`')
|
|
227
|
+
expr = Expr.from_object(string)
|
|
228
|
+
assert expr.col_type.is_string_type()
|
|
229
|
+
|
|
230
|
+
if image is not None:
|
|
231
|
+
if isinstance(image, Expr):
|
|
232
|
+
if not image.col_type.is_image_type():
|
|
233
|
+
raise excs.Error(f'similarity(image=...): expected `Image`; got `{image.col_type}`')
|
|
234
|
+
expr = image
|
|
235
|
+
else:
|
|
236
|
+
if not isinstance(image, PIL.Image.Image):
|
|
237
|
+
raise excs.Error(f'similarity(image=...): expected `PIL.Image.Image`; got `{type(image).__name__}`')
|
|
238
|
+
expr = Expr.from_object(image)
|
|
239
|
+
assert expr.col_type.is_image_type()
|
|
240
|
+
|
|
241
|
+
if audio is not None:
|
|
242
|
+
if isinstance(audio, Expr):
|
|
243
|
+
if not audio.col_type.is_audio_type():
|
|
244
|
+
raise excs.Error(f'similarity(audio=...): expected `Audio`; got `{audio.col_type}`')
|
|
245
|
+
expr = audio
|
|
246
|
+
else:
|
|
247
|
+
if not isinstance(audio, str):
|
|
248
|
+
raise excs.Error(
|
|
249
|
+
f'similarity(audio=...): expected `str` (path to audio file); got `{type(audio).__name__}`'
|
|
250
|
+
)
|
|
251
|
+
expr = Literal(audio, ts.AudioType())
|
|
252
|
+
|
|
253
|
+
if video is not None:
|
|
254
|
+
if isinstance(video, Expr):
|
|
255
|
+
if not video.col_type.is_video_type():
|
|
256
|
+
raise excs.Error(f'similarity(video=...): expected `Video`; got `{video.col_type}`')
|
|
257
|
+
expr = video
|
|
258
|
+
else:
|
|
259
|
+
if not isinstance(video, str):
|
|
260
|
+
raise excs.Error(
|
|
261
|
+
f'similarity(video=...): expected `str` (path to video file); got `{type(video).__name__}`'
|
|
262
|
+
)
|
|
263
|
+
expr = Literal(video, ts.VideoType())
|
|
264
|
+
|
|
265
|
+
return SimilarityExpr(self, expr, idx_name=idx)
|
|
266
|
+
|
|
267
|
+
def embedding(self, *, idx: str | None = None) -> ColumnRef:
|
|
268
|
+
from pixeltable.index import EmbeddingIndex
|
|
269
|
+
|
|
270
|
+
idx_info = self.tbl.get().get_idx(self.col, idx, EmbeddingIndex)
|
|
271
|
+
return ColumnRef(idx_info.val_col)
|
|
272
|
+
|
|
273
|
+
@property
|
|
274
|
+
def tbl(self) -> catalog.TableVersionHandle:
|
|
275
|
+
return self.reference_tbl.tbl_version if self.reference_tbl is not None else self.col.tbl_handle
|
|
276
|
+
|
|
277
|
+
def default_column_name(self) -> str | None:
|
|
278
|
+
return self.col.name if self.col is not None else None
|
|
127
279
|
|
|
128
280
|
def _equals(self, other: ColumnRef) -> bool:
|
|
129
281
|
return self.col == other.col and self.perform_validation == other.perform_validation
|
|
130
282
|
|
|
131
|
-
def
|
|
132
|
-
|
|
133
|
-
|
|
283
|
+
def select(self) -> 'Query':
|
|
284
|
+
import pixeltable.plan as plan
|
|
285
|
+
from pixeltable._query import Query
|
|
286
|
+
|
|
287
|
+
if self.reference_tbl is None:
|
|
288
|
+
# No reference table; use the current version of the table to which the column belongs
|
|
289
|
+
tbl = catalog.Catalog.get().get_table_by_id(self.col.tbl_handle.id)
|
|
290
|
+
return tbl.select(self)
|
|
291
|
+
else:
|
|
292
|
+
# Explicit reference table; construct a Query directly from it
|
|
293
|
+
return Query(plan.FromClause([self.reference_tbl])).select(self)
|
|
134
294
|
|
|
135
|
-
def show(self, *args, **kwargs) -> '
|
|
136
|
-
return self.
|
|
295
|
+
def show(self, *args: Any, **kwargs: Any) -> 'ResultSet':
|
|
296
|
+
return self.select().show(*args, **kwargs)
|
|
137
297
|
|
|
138
|
-
def head(self, *args, **kwargs) -> '
|
|
139
|
-
return self.
|
|
298
|
+
def head(self, *args: Any, **kwargs: Any) -> 'ResultSet':
|
|
299
|
+
return self.select().head(*args, **kwargs)
|
|
140
300
|
|
|
141
|
-
def tail(self, *args, **kwargs) -> '
|
|
142
|
-
return self.
|
|
301
|
+
def tail(self, *args: Any, **kwargs: Any) -> 'ResultSet':
|
|
302
|
+
return self.select().tail(*args, **kwargs)
|
|
143
303
|
|
|
144
304
|
def count(self) -> int:
|
|
145
|
-
return self.
|
|
305
|
+
return self.select().count()
|
|
306
|
+
|
|
307
|
+
def distinct(self) -> 'Query':
|
|
308
|
+
"""Return distinct values in this column."""
|
|
309
|
+
return self.select().distinct()
|
|
146
310
|
|
|
147
311
|
def __str__(self) -> str:
|
|
148
312
|
if self.col.name is None:
|
|
@@ -157,17 +321,32 @@ class ColumnRef(Expr):
|
|
|
157
321
|
return self._descriptors().to_html()
|
|
158
322
|
|
|
159
323
|
def _descriptors(self) -> DescriptionHelper:
|
|
160
|
-
|
|
324
|
+
with catalog.Catalog.get().begin_xact():
|
|
325
|
+
tbl = catalog.Catalog.get().get_table_by_id(self.col.tbl_handle.id)
|
|
161
326
|
helper = DescriptionHelper()
|
|
162
|
-
helper.append(f'Column\n{self.col.name!r}\n(of table {tbl._path!r})')
|
|
327
|
+
helper.append(f'Column\n{self.col.name!r}\n(of table {tbl._path()!r})')
|
|
163
328
|
helper.append(tbl._col_descriptor([self.col.name]))
|
|
164
329
|
idxs = tbl._index_descriptor([self.col.name])
|
|
165
330
|
if len(idxs) > 0:
|
|
166
331
|
helper.append(idxs)
|
|
167
332
|
return helper
|
|
168
333
|
|
|
169
|
-
def
|
|
170
|
-
|
|
334
|
+
def prepare(self) -> None:
|
|
335
|
+
from pixeltable import store
|
|
336
|
+
|
|
337
|
+
if not self.is_unstored_iter_col:
|
|
338
|
+
return
|
|
339
|
+
col = self.col_handle.get()
|
|
340
|
+
self.base_rowid_len = col.get_tbl().base.get().num_rowid_columns()
|
|
341
|
+
self.base_rowid = [None] * self.base_rowid_len
|
|
342
|
+
assert isinstance(col.get_tbl().store_tbl, store.StoreComponentView)
|
|
343
|
+
self.pos_idx = cast(store.StoreComponentView, col.get_tbl().store_tbl).pos_col_idx
|
|
344
|
+
|
|
345
|
+
def sql_expr(self, _: SqlElementCache) -> sql.ColumnElement | None:
|
|
346
|
+
if self.perform_validation:
|
|
347
|
+
return None
|
|
348
|
+
self.col = self.col_handle.get()
|
|
349
|
+
return self.col.sa_col
|
|
171
350
|
|
|
172
351
|
def eval(self, data_row: DataRow, row_builder: RowBuilder) -> None:
|
|
173
352
|
if self.perform_validation:
|
|
@@ -205,31 +384,44 @@ class ColumnRef(Expr):
|
|
|
205
384
|
return
|
|
206
385
|
|
|
207
386
|
# if this is a new base row, we need to instantiate a new iterator
|
|
208
|
-
if self.base_rowid != data_row.pk[:self.base_rowid_len]:
|
|
387
|
+
if self.base_rowid != data_row.pk[: self.base_rowid_len]:
|
|
388
|
+
assert self.iter_arg_ctx is not None
|
|
209
389
|
row_builder.eval(data_row, self.iter_arg_ctx)
|
|
210
390
|
iterator_args = data_row[self.iter_arg_ctx.target_slot_idxs[0]]
|
|
211
|
-
self.iterator = self.col.
|
|
212
|
-
self.base_rowid = data_row.pk[:self.base_rowid_len]
|
|
213
|
-
|
|
391
|
+
self.iterator = self.col.get_tbl().iterator_cls(**iterator_args)
|
|
392
|
+
self.base_rowid = data_row.pk[: self.base_rowid_len]
|
|
393
|
+
stored_outputs = {col_ref.col.name: data_row[col_ref.slot_idx] for col_ref in self.iter_outputs}
|
|
394
|
+
assert all(name is not None for name in stored_outputs)
|
|
395
|
+
self.iterator.set_pos(data_row.pk[self.pos_idx], **stored_outputs)
|
|
214
396
|
res = next(self.iterator)
|
|
215
397
|
data_row[self.slot_idx] = res[self.col.name]
|
|
216
398
|
|
|
217
399
|
def _as_dict(self) -> dict:
|
|
218
|
-
|
|
219
|
-
version = tbl.version if tbl.is_snapshot else None
|
|
400
|
+
tbl_handle = self.col.tbl_handle
|
|
220
401
|
# we omit self.components, even if this is a validating ColumnRef, because init() will recreate the
|
|
221
402
|
# non-validating component ColumnRef
|
|
403
|
+
assert tbl_handle.anchor_tbl_id is None # TODO: support anchor_tbl_id for view-over-replica
|
|
222
404
|
return {
|
|
223
|
-
'tbl_id': str(
|
|
224
|
-
'tbl_version':
|
|
405
|
+
'tbl_id': str(tbl_handle.id),
|
|
406
|
+
'tbl_version': tbl_handle.effective_version,
|
|
225
407
|
'col_id': self.col.id,
|
|
226
|
-
'
|
|
408
|
+
'reference_tbl': self.reference_tbl.as_dict() if self.reference_tbl is not None else None,
|
|
409
|
+
'perform_validation': self.perform_validation,
|
|
227
410
|
}
|
|
228
411
|
|
|
412
|
+
@classmethod
|
|
413
|
+
def get_column_id(cls, d: dict) -> catalog.QColumnId:
|
|
414
|
+
tbl_id, col_id = UUID(d['tbl_id']), d['col_id']
|
|
415
|
+
return catalog.QColumnId(tbl_id, col_id)
|
|
416
|
+
|
|
229
417
|
@classmethod
|
|
230
418
|
def get_column(cls, d: dict) -> catalog.Column:
|
|
231
419
|
tbl_id, version, col_id = UUID(d['tbl_id']), d['tbl_version'], d['col_id']
|
|
232
|
-
|
|
420
|
+
# validate_initialized=False: this gets called as part of TableVersion.init()
|
|
421
|
+
# TODO: When we have views on replicas, we will need to store anchor_tbl_id in metadata as well.
|
|
422
|
+
tbl_version = catalog.Catalog.get().get_tbl_version(
|
|
423
|
+
TableVersionKey(tbl_id, version, None), validate_initialized=False
|
|
424
|
+
)
|
|
233
425
|
# don't use tbl_version.cols_by_id here, this might be a snapshot reference to a column that was then dropped
|
|
234
426
|
col = next(col for col in tbl_version.cols if col.id == col_id)
|
|
235
427
|
return col
|
|
@@ -237,5 +429,6 @@ class ColumnRef(Expr):
|
|
|
237
429
|
@classmethod
|
|
238
430
|
def _from_dict(cls, d: dict, _: list[Expr]) -> ColumnRef:
|
|
239
431
|
col = cls.get_column(d)
|
|
432
|
+
reference_tbl = None if d['reference_tbl'] is None else catalog.TableVersionPath.from_dict(d['reference_tbl'])
|
|
240
433
|
perform_validation = d['perform_validation']
|
|
241
|
-
return cls(col, perform_validation=perform_validation)
|
|
434
|
+
return cls(col, reference_tbl, perform_validation=perform_validation)
|
pixeltable/exprs/comparison.py
CHANGED
|
@@ -1,11 +1,10 @@
|
|
|
1
1
|
from __future__ import annotations
|
|
2
2
|
|
|
3
|
-
from typing import Any
|
|
3
|
+
from typing import Any
|
|
4
4
|
|
|
5
5
|
import sqlalchemy as sql
|
|
6
6
|
|
|
7
7
|
import pixeltable.exceptions as excs
|
|
8
|
-
import pixeltable.index as index
|
|
9
8
|
import pixeltable.type_system as ts
|
|
10
9
|
|
|
11
10
|
from .column_ref import ColumnRef
|
|
@@ -22,6 +21,8 @@ class Comparison(Expr):
|
|
|
22
21
|
operator: ComparisonOperator
|
|
23
22
|
|
|
24
23
|
def __init__(self, operator: ComparisonOperator, op1: Expr, op2: Expr):
|
|
24
|
+
from pixeltable import index
|
|
25
|
+
|
|
25
26
|
super().__init__(ts.BoolType())
|
|
26
27
|
self.operator = operator
|
|
27
28
|
|
|
@@ -38,9 +39,11 @@ class Comparison(Expr):
|
|
|
38
39
|
self.is_search_arg_comparison = False
|
|
39
40
|
self.components = [op1, op2]
|
|
40
41
|
|
|
41
|
-
|
|
42
|
-
|
|
43
|
-
|
|
42
|
+
if (
|
|
43
|
+
self.is_search_arg_comparison
|
|
44
|
+
and self._op2.col_type.is_string_type()
|
|
45
|
+
and len(self._op2.val) >= index.BtreeIndex.MAX_STRING_LEN
|
|
46
|
+
):
|
|
44
47
|
# we can't use an index for this after all
|
|
45
48
|
raise excs.Error(
|
|
46
49
|
f'String literal too long for comparison against indexed column {self._op1.col.name!r} '
|
|
@@ -56,7 +59,7 @@ class Comparison(Expr):
|
|
|
56
59
|
return self.operator == other.operator
|
|
57
60
|
|
|
58
61
|
def _id_attrs(self) -> list[tuple[str, Any]]:
|
|
59
|
-
return super()._id_attrs()
|
|
62
|
+
return [*super()._id_attrs(), ('operator', self.operator.value)]
|
|
60
63
|
|
|
61
64
|
@property
|
|
62
65
|
def _op1(self) -> Expr:
|
|
@@ -66,7 +69,9 @@ class Comparison(Expr):
|
|
|
66
69
|
def _op2(self) -> Expr:
|
|
67
70
|
return self.components[1]
|
|
68
71
|
|
|
69
|
-
def sql_expr(self, sql_elements: SqlElementCache) ->
|
|
72
|
+
def sql_expr(self, sql_elements: SqlElementCache) -> sql.ColumnElement | None:
|
|
73
|
+
import pixeltable.index as index
|
|
74
|
+
|
|
70
75
|
if str(self._op1.col_type.to_sa_type()) != str(self._op2.col_type.to_sa_type()):
|
|
71
76
|
# Comparing columns of different SQL types (e.g., string vs. json); this can only be done in Python
|
|
72
77
|
# TODO(aaron-siegel): We may be able to handle some cases in SQL by casting one side to the other's type
|
|
@@ -76,9 +81,9 @@ class Comparison(Expr):
|
|
|
76
81
|
if self.is_search_arg_comparison:
|
|
77
82
|
# reference the index value column if there is an index and this is not a snapshot
|
|
78
83
|
# (indices don't apply to snapshots)
|
|
79
|
-
tbl = self._op1.col.
|
|
84
|
+
tbl = self._op1.col.get_tbl()
|
|
80
85
|
idx_info = [
|
|
81
|
-
info for info in self._op1.col.
|
|
86
|
+
info for info in tbl.idxs_by_col.get(self._op1.col.qid, []) if isinstance(info.idx, index.BtreeIndex)
|
|
82
87
|
]
|
|
83
88
|
if len(idx_info) > 0 and not tbl.is_snapshot:
|
|
84
89
|
# there shouldn't be multiple B-tree indices on a column
|
|
@@ -1,11 +1,11 @@
|
|
|
1
1
|
from __future__ import annotations
|
|
2
2
|
|
|
3
3
|
import operator
|
|
4
|
-
from typing import Any, Callable
|
|
4
|
+
from typing import Any, Callable
|
|
5
5
|
|
|
6
6
|
import sqlalchemy as sql
|
|
7
7
|
|
|
8
|
-
import
|
|
8
|
+
from pixeltable import type_system as ts
|
|
9
9
|
|
|
10
10
|
from .data_row import DataRow
|
|
11
11
|
from .expr import Expr
|
|
@@ -36,7 +36,8 @@ class CompoundPredicate(Expr):
|
|
|
36
36
|
return f' {self.operator} '.join([f'({e})' for e in self.components])
|
|
37
37
|
|
|
38
38
|
@classmethod
|
|
39
|
-
def make_conjunction(cls, operands: list[Expr]) ->
|
|
39
|
+
def make_conjunction(cls, operands: list[Expr | None]) -> Expr | None:
|
|
40
|
+
operands = [e for e in operands if e is not None]
|
|
40
41
|
if len(operands) == 0:
|
|
41
42
|
return None
|
|
42
43
|
if len(operands) == 1:
|
|
@@ -58,17 +59,16 @@ class CompoundPredicate(Expr):
|
|
|
58
59
|
return self.operator == other.operator
|
|
59
60
|
|
|
60
61
|
def _id_attrs(self) -> list[tuple[str, Any]]:
|
|
61
|
-
return super()._id_attrs()
|
|
62
|
+
return [*super()._id_attrs(), ('operator', self.operator.value)]
|
|
62
63
|
|
|
63
|
-
def split_conjuncts(
|
|
64
|
-
|
|
65
|
-
if self.operator == LogicalOperator.OR or self.operator == LogicalOperator.NOT:
|
|
64
|
+
def split_conjuncts(self, condition: Callable[[Expr], bool]) -> tuple[list[Expr], Expr | None]:
|
|
65
|
+
if self.operator in (LogicalOperator.OR, LogicalOperator.NOT):
|
|
66
66
|
return super().split_conjuncts(condition)
|
|
67
67
|
matches = [op for op in self.components if condition(op)]
|
|
68
68
|
non_matches = [op for op in self.components if not condition(op)]
|
|
69
69
|
return (matches, self.make_conjunction(non_matches))
|
|
70
70
|
|
|
71
|
-
def sql_expr(self, sql_elements: SqlElementCache) ->
|
|
71
|
+
def sql_expr(self, sql_elements: SqlElementCache) -> sql.ColumnElement | None:
|
|
72
72
|
sql_exprs = [sql_elements.get(op) for op in self.components]
|
|
73
73
|
if any(e is None for e in sql_exprs):
|
|
74
74
|
return None
|
|
@@ -84,7 +84,7 @@ class CompoundPredicate(Expr):
|
|
|
84
84
|
if self.operator == LogicalOperator.NOT:
|
|
85
85
|
data_row[self.slot_idx] = not data_row[self.components[0].slot_idx]
|
|
86
86
|
else:
|
|
87
|
-
val =
|
|
87
|
+
val = self.operator == LogicalOperator.AND
|
|
88
88
|
op_function = operator.and_ if self.operator == LogicalOperator.AND else operator.or_
|
|
89
89
|
for op in self.components:
|
|
90
90
|
val = op_function(val, data_row[op.slot_idx])
|
|
@@ -97,4 +97,3 @@ class CompoundPredicate(Expr):
|
|
|
97
97
|
def _from_dict(cls, d: dict, components: list[Expr]) -> CompoundPredicate:
|
|
98
98
|
assert 'operator' in d
|
|
99
99
|
return cls(LogicalOperator(d['operator']), components)
|
|
100
|
-
|