pixeltable 0.3.14__py3-none-any.whl → 0.5.7__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- pixeltable/__init__.py +42 -8
- pixeltable/{dataframe.py → _query.py} +470 -206
- pixeltable/_version.py +1 -0
- pixeltable/catalog/__init__.py +5 -4
- pixeltable/catalog/catalog.py +1785 -432
- pixeltable/catalog/column.py +190 -113
- pixeltable/catalog/dir.py +2 -4
- pixeltable/catalog/globals.py +19 -46
- pixeltable/catalog/insertable_table.py +191 -98
- pixeltable/catalog/path.py +63 -23
- pixeltable/catalog/schema_object.py +11 -15
- pixeltable/catalog/table.py +843 -436
- pixeltable/catalog/table_metadata.py +103 -0
- pixeltable/catalog/table_version.py +978 -657
- pixeltable/catalog/table_version_handle.py +72 -16
- pixeltable/catalog/table_version_path.py +112 -43
- pixeltable/catalog/tbl_ops.py +53 -0
- pixeltable/catalog/update_status.py +191 -0
- pixeltable/catalog/view.py +134 -90
- pixeltable/config.py +134 -22
- pixeltable/env.py +471 -157
- pixeltable/exceptions.py +6 -0
- pixeltable/exec/__init__.py +4 -1
- pixeltable/exec/aggregation_node.py +7 -8
- pixeltable/exec/cache_prefetch_node.py +83 -110
- pixeltable/exec/cell_materialization_node.py +268 -0
- pixeltable/exec/cell_reconstruction_node.py +168 -0
- pixeltable/exec/component_iteration_node.py +4 -3
- pixeltable/exec/data_row_batch.py +8 -65
- pixeltable/exec/exec_context.py +16 -4
- pixeltable/exec/exec_node.py +13 -36
- pixeltable/exec/expr_eval/evaluators.py +11 -7
- pixeltable/exec/expr_eval/expr_eval_node.py +27 -12
- pixeltable/exec/expr_eval/globals.py +8 -5
- pixeltable/exec/expr_eval/row_buffer.py +1 -2
- pixeltable/exec/expr_eval/schedulers.py +106 -56
- pixeltable/exec/globals.py +35 -0
- pixeltable/exec/in_memory_data_node.py +19 -19
- pixeltable/exec/object_store_save_node.py +293 -0
- pixeltable/exec/row_update_node.py +16 -9
- pixeltable/exec/sql_node.py +351 -84
- pixeltable/exprs/__init__.py +1 -1
- pixeltable/exprs/arithmetic_expr.py +27 -22
- pixeltable/exprs/array_slice.py +3 -3
- pixeltable/exprs/column_property_ref.py +36 -23
- pixeltable/exprs/column_ref.py +213 -89
- pixeltable/exprs/comparison.py +5 -5
- pixeltable/exprs/compound_predicate.py +5 -4
- pixeltable/exprs/data_row.py +164 -54
- pixeltable/exprs/expr.py +70 -44
- pixeltable/exprs/expr_dict.py +3 -3
- pixeltable/exprs/expr_set.py +17 -10
- pixeltable/exprs/function_call.py +100 -40
- pixeltable/exprs/globals.py +2 -2
- pixeltable/exprs/in_predicate.py +4 -4
- pixeltable/exprs/inline_expr.py +18 -32
- pixeltable/exprs/is_null.py +7 -3
- pixeltable/exprs/json_mapper.py +8 -8
- pixeltable/exprs/json_path.py +56 -22
- pixeltable/exprs/literal.py +27 -5
- pixeltable/exprs/method_ref.py +2 -2
- pixeltable/exprs/object_ref.py +2 -2
- pixeltable/exprs/row_builder.py +167 -67
- pixeltable/exprs/rowid_ref.py +25 -10
- pixeltable/exprs/similarity_expr.py +58 -40
- pixeltable/exprs/sql_element_cache.py +4 -4
- pixeltable/exprs/string_op.py +5 -5
- pixeltable/exprs/type_cast.py +3 -5
- pixeltable/func/__init__.py +1 -0
- pixeltable/func/aggregate_function.py +8 -8
- pixeltable/func/callable_function.py +9 -9
- pixeltable/func/expr_template_function.py +17 -11
- pixeltable/func/function.py +18 -20
- pixeltable/func/function_registry.py +6 -7
- pixeltable/func/globals.py +2 -3
- pixeltable/func/mcp.py +74 -0
- pixeltable/func/query_template_function.py +29 -27
- pixeltable/func/signature.py +46 -19
- pixeltable/func/tools.py +31 -13
- pixeltable/func/udf.py +18 -20
- pixeltable/functions/__init__.py +16 -0
- pixeltable/functions/anthropic.py +123 -77
- pixeltable/functions/audio.py +147 -10
- pixeltable/functions/bedrock.py +13 -6
- pixeltable/functions/date.py +7 -4
- pixeltable/functions/deepseek.py +35 -43
- pixeltable/functions/document.py +81 -0
- pixeltable/functions/fal.py +76 -0
- pixeltable/functions/fireworks.py +11 -20
- pixeltable/functions/gemini.py +195 -39
- pixeltable/functions/globals.py +142 -14
- pixeltable/functions/groq.py +108 -0
- pixeltable/functions/huggingface.py +1056 -24
- pixeltable/functions/image.py +115 -57
- pixeltable/functions/json.py +1 -1
- pixeltable/functions/llama_cpp.py +28 -13
- pixeltable/functions/math.py +67 -5
- pixeltable/functions/mistralai.py +18 -55
- pixeltable/functions/net.py +70 -0
- pixeltable/functions/ollama.py +20 -13
- pixeltable/functions/openai.py +240 -226
- pixeltable/functions/openrouter.py +143 -0
- pixeltable/functions/replicate.py +4 -4
- pixeltable/functions/reve.py +250 -0
- pixeltable/functions/string.py +239 -69
- pixeltable/functions/timestamp.py +16 -16
- pixeltable/functions/together.py +24 -84
- pixeltable/functions/twelvelabs.py +188 -0
- pixeltable/functions/util.py +6 -1
- pixeltable/functions/uuid.py +30 -0
- pixeltable/functions/video.py +1515 -107
- pixeltable/functions/vision.py +8 -8
- pixeltable/functions/voyageai.py +289 -0
- pixeltable/functions/whisper.py +16 -8
- pixeltable/functions/whisperx.py +179 -0
- pixeltable/{ext/functions → functions}/yolox.py +2 -4
- pixeltable/globals.py +362 -115
- pixeltable/index/base.py +17 -21
- pixeltable/index/btree.py +28 -22
- pixeltable/index/embedding_index.py +100 -118
- pixeltable/io/__init__.py +4 -2
- pixeltable/io/datarows.py +8 -7
- pixeltable/io/external_store.py +56 -105
- pixeltable/io/fiftyone.py +13 -13
- pixeltable/io/globals.py +31 -30
- pixeltable/io/hf_datasets.py +61 -16
- pixeltable/io/label_studio.py +74 -70
- pixeltable/io/lancedb.py +3 -0
- pixeltable/io/pandas.py +21 -12
- pixeltable/io/parquet.py +25 -105
- pixeltable/io/table_data_conduit.py +250 -123
- pixeltable/io/utils.py +4 -4
- pixeltable/iterators/__init__.py +2 -1
- pixeltable/iterators/audio.py +26 -25
- pixeltable/iterators/base.py +9 -3
- pixeltable/iterators/document.py +112 -78
- pixeltable/iterators/image.py +12 -15
- pixeltable/iterators/string.py +11 -4
- pixeltable/iterators/video.py +523 -120
- pixeltable/metadata/__init__.py +14 -3
- pixeltable/metadata/converters/convert_13.py +2 -2
- pixeltable/metadata/converters/convert_18.py +2 -2
- pixeltable/metadata/converters/convert_19.py +2 -2
- pixeltable/metadata/converters/convert_20.py +2 -2
- pixeltable/metadata/converters/convert_21.py +2 -2
- pixeltable/metadata/converters/convert_22.py +2 -2
- pixeltable/metadata/converters/convert_24.py +2 -2
- pixeltable/metadata/converters/convert_25.py +2 -2
- pixeltable/metadata/converters/convert_26.py +2 -2
- pixeltable/metadata/converters/convert_29.py +4 -4
- pixeltable/metadata/converters/convert_30.py +34 -21
- pixeltable/metadata/converters/convert_34.py +2 -2
- pixeltable/metadata/converters/convert_35.py +9 -0
- pixeltable/metadata/converters/convert_36.py +38 -0
- pixeltable/metadata/converters/convert_37.py +15 -0
- pixeltable/metadata/converters/convert_38.py +39 -0
- pixeltable/metadata/converters/convert_39.py +124 -0
- pixeltable/metadata/converters/convert_40.py +73 -0
- pixeltable/metadata/converters/convert_41.py +12 -0
- pixeltable/metadata/converters/convert_42.py +9 -0
- pixeltable/metadata/converters/convert_43.py +44 -0
- pixeltable/metadata/converters/util.py +20 -31
- pixeltable/metadata/notes.py +9 -0
- pixeltable/metadata/schema.py +140 -53
- pixeltable/metadata/utils.py +74 -0
- pixeltable/mypy/__init__.py +3 -0
- pixeltable/mypy/mypy_plugin.py +123 -0
- pixeltable/plan.py +382 -115
- pixeltable/share/__init__.py +1 -1
- pixeltable/share/packager.py +547 -83
- pixeltable/share/protocol/__init__.py +33 -0
- pixeltable/share/protocol/common.py +165 -0
- pixeltable/share/protocol/operation_types.py +33 -0
- pixeltable/share/protocol/replica.py +119 -0
- pixeltable/share/publish.py +257 -59
- pixeltable/store.py +311 -194
- pixeltable/type_system.py +373 -211
- pixeltable/utils/__init__.py +2 -3
- pixeltable/utils/arrow.py +131 -17
- pixeltable/utils/av.py +298 -0
- pixeltable/utils/azure_store.py +346 -0
- pixeltable/utils/coco.py +6 -6
- pixeltable/utils/code.py +3 -3
- pixeltable/utils/console_output.py +4 -1
- pixeltable/utils/coroutine.py +6 -23
- pixeltable/utils/dbms.py +32 -6
- pixeltable/utils/description_helper.py +4 -5
- pixeltable/utils/documents.py +7 -18
- pixeltable/utils/exception_handler.py +7 -30
- pixeltable/utils/filecache.py +6 -6
- pixeltable/utils/formatter.py +86 -48
- pixeltable/utils/gcs_store.py +295 -0
- pixeltable/utils/http.py +133 -0
- pixeltable/utils/http_server.py +2 -3
- pixeltable/utils/iceberg.py +1 -2
- pixeltable/utils/image.py +17 -0
- pixeltable/utils/lancedb.py +90 -0
- pixeltable/utils/local_store.py +322 -0
- pixeltable/utils/misc.py +5 -0
- pixeltable/utils/object_stores.py +573 -0
- pixeltable/utils/pydantic.py +60 -0
- pixeltable/utils/pytorch.py +5 -6
- pixeltable/utils/s3_store.py +527 -0
- pixeltable/utils/sql.py +26 -0
- pixeltable/utils/system.py +30 -0
- pixeltable-0.5.7.dist-info/METADATA +579 -0
- pixeltable-0.5.7.dist-info/RECORD +227 -0
- {pixeltable-0.3.14.dist-info → pixeltable-0.5.7.dist-info}/WHEEL +1 -1
- pixeltable-0.5.7.dist-info/entry_points.txt +2 -0
- pixeltable/__version__.py +0 -3
- pixeltable/catalog/named_function.py +0 -40
- pixeltable/ext/__init__.py +0 -17
- pixeltable/ext/functions/__init__.py +0 -11
- pixeltable/ext/functions/whisperx.py +0 -77
- pixeltable/utils/media_store.py +0 -77
- pixeltable/utils/s3.py +0 -17
- pixeltable-0.3.14.dist-info/METADATA +0 -434
- pixeltable-0.3.14.dist-info/RECORD +0 -186
- pixeltable-0.3.14.dist-info/entry_points.txt +0 -3
- {pixeltable-0.3.14.dist-info → pixeltable-0.5.7.dist-info/licenses}/LICENSE +0 -0
pixeltable/exprs/column_ref.py
CHANGED
|
@@ -1,20 +1,29 @@
|
|
|
1
1
|
from __future__ import annotations
|
|
2
2
|
|
|
3
|
-
import
|
|
4
|
-
from typing import Any,
|
|
3
|
+
import warnings
|
|
4
|
+
from typing import TYPE_CHECKING, Any, Sequence, cast
|
|
5
5
|
from uuid import UUID
|
|
6
6
|
|
|
7
|
+
import PIL.Image
|
|
7
8
|
import sqlalchemy as sql
|
|
8
9
|
|
|
9
|
-
import pixeltable as
|
|
10
|
-
|
|
10
|
+
import pixeltable.catalog as catalog
|
|
11
|
+
import pixeltable.exceptions as excs
|
|
12
|
+
import pixeltable.iterators as iters
|
|
13
|
+
import pixeltable.type_system as ts
|
|
14
|
+
from pixeltable.catalog.table_version import TableVersionKey
|
|
11
15
|
|
|
12
16
|
from ..utils.description_helper import DescriptionHelper
|
|
17
|
+
from ..utils.filecache import FileCache
|
|
13
18
|
from .data_row import DataRow
|
|
14
19
|
from .expr import Expr
|
|
20
|
+
from .literal import Literal
|
|
15
21
|
from .row_builder import RowBuilder
|
|
16
22
|
from .sql_element_cache import SqlElementCache
|
|
17
23
|
|
|
24
|
+
if TYPE_CHECKING:
|
|
25
|
+
from pixeltable._query import Query, ResultSet
|
|
26
|
+
|
|
18
27
|
|
|
19
28
|
class ColumnRef(Expr):
|
|
20
29
|
"""A reference to a table column
|
|
@@ -41,37 +50,38 @@ class ColumnRef(Expr):
|
|
|
41
50
|
insert them into the EvalCtxs as needed
|
|
42
51
|
"""
|
|
43
52
|
|
|
44
|
-
col: catalog.Column
|
|
45
|
-
|
|
53
|
+
col: catalog.Column # TODO: merge with col_handle
|
|
54
|
+
col_handle: catalog.ColumnHandle
|
|
55
|
+
reference_tbl: catalog.TableVersionPath | None
|
|
46
56
|
is_unstored_iter_col: bool
|
|
47
|
-
iter_arg_ctx: Optional[RowBuilder.EvalCtx]
|
|
48
|
-
base_rowid_len: int
|
|
49
|
-
base_rowid: Sequence[Optional[Any]]
|
|
50
|
-
iterator: Optional[iters.ComponentIterator]
|
|
51
|
-
pos_idx: Optional[int]
|
|
52
|
-
id: int
|
|
53
57
|
perform_validation: bool # if True, performs media validation
|
|
58
|
+
iter_arg_ctx: RowBuilder.EvalCtx | None
|
|
59
|
+
iter_outputs: list[ColumnRef] | None
|
|
60
|
+
base_rowid_len: int # number of rowid columns in the base table
|
|
61
|
+
|
|
62
|
+
# execution state
|
|
63
|
+
base_rowid: Sequence[Any | None]
|
|
64
|
+
iterator: iters.ComponentIterator | None
|
|
65
|
+
pos_idx: int
|
|
54
66
|
|
|
55
67
|
def __init__(
|
|
56
68
|
self,
|
|
57
69
|
col: catalog.Column,
|
|
58
|
-
reference_tbl:
|
|
59
|
-
perform_validation:
|
|
70
|
+
reference_tbl: catalog.TableVersionPath | None = None,
|
|
71
|
+
perform_validation: bool | None = None,
|
|
60
72
|
):
|
|
61
73
|
super().__init__(col.col_type)
|
|
62
|
-
assert col.tbl is not None
|
|
63
74
|
self.col = col
|
|
64
75
|
self.reference_tbl = reference_tbl
|
|
65
|
-
self.
|
|
66
|
-
|
|
67
|
-
|
|
76
|
+
self.col_handle = col.handle
|
|
77
|
+
|
|
78
|
+
self.is_unstored_iter_col = col.is_iterator_col and not col.is_stored
|
|
68
79
|
self.iter_arg_ctx = None
|
|
69
|
-
|
|
70
|
-
self.base_rowid_len =
|
|
71
|
-
self.base_rowid = [
|
|
80
|
+
self.iter_outputs = None
|
|
81
|
+
self.base_rowid_len = 0
|
|
82
|
+
self.base_rowid = []
|
|
72
83
|
self.iterator = None
|
|
73
|
-
|
|
74
|
-
self.pos_idx = col.tbl.get().num_rowid_columns() - 1 if self.is_unstored_iter_col else None
|
|
84
|
+
self.pos_idx = 0
|
|
75
85
|
|
|
76
86
|
self.perform_validation = False
|
|
77
87
|
if col.col_type.is_media_type():
|
|
@@ -90,22 +100,27 @@ class ColumnRef(Expr):
|
|
|
90
100
|
self.components = [non_validating_col_ref]
|
|
91
101
|
self.id = self._create_id()
|
|
92
102
|
|
|
93
|
-
def set_iter_arg_ctx(self, iter_arg_ctx: RowBuilder.EvalCtx) -> None:
|
|
103
|
+
def set_iter_arg_ctx(self, iter_arg_ctx: RowBuilder.EvalCtx, iter_outputs: list[ColumnRef]) -> None:
|
|
94
104
|
self.iter_arg_ctx = iter_arg_ctx
|
|
105
|
+
self.iter_outputs = iter_outputs
|
|
106
|
+
# If this is an unstored iterator column, then the iterator outputs may be needed in order to properly set the
|
|
107
|
+
# iterator position. Therefore, we need to add them as components in order to ensure they're marked as
|
|
108
|
+
# eval dependencies.
|
|
109
|
+
self.components.extend(iter_outputs)
|
|
95
110
|
assert len(self.iter_arg_ctx.target_slot_idxs) == 1 # a single inline dict
|
|
96
111
|
|
|
97
112
|
def _id_attrs(self) -> list[tuple[str, Any]]:
|
|
98
113
|
return [
|
|
99
114
|
*super()._id_attrs(),
|
|
100
|
-
('tbl_id', self.col.
|
|
115
|
+
('tbl_id', self.col.tbl_handle.id),
|
|
101
116
|
('col_id', self.col.id),
|
|
102
117
|
('perform_validation', self.perform_validation),
|
|
103
118
|
]
|
|
104
119
|
|
|
105
120
|
# override
|
|
106
121
|
def _retarget(self, tbl_versions: dict[UUID, catalog.TableVersion]) -> ColumnRef:
|
|
107
|
-
target = tbl_versions[self.col.
|
|
108
|
-
assert self.col.id in target.cols_by_id
|
|
122
|
+
target = tbl_versions[self.col.tbl_handle.id]
|
|
123
|
+
assert self.col.id in target.cols_by_id, f'{target}: {self.col.id} not in {list(target.cols_by_id.keys())}'
|
|
109
124
|
col = target.cols_by_id[self.col.id]
|
|
110
125
|
return ColumnRef(col, self.reference_tbl)
|
|
111
126
|
|
|
@@ -113,12 +128,16 @@ class ColumnRef(Expr):
|
|
|
113
128
|
from .column_property_ref import ColumnPropertyRef
|
|
114
129
|
|
|
115
130
|
# resolve column properties
|
|
131
|
+
if name == ColumnPropertyRef.Property.CELLMD.name.lower():
|
|
132
|
+
# This is not user accessible, but used internally to store cell metadata
|
|
133
|
+
return super().__getattr__(name)
|
|
134
|
+
|
|
116
135
|
if (
|
|
117
136
|
name == ColumnPropertyRef.Property.ERRORTYPE.name.lower()
|
|
118
137
|
or name == ColumnPropertyRef.Property.ERRORMSG.name.lower()
|
|
119
138
|
):
|
|
120
|
-
|
|
121
|
-
if not
|
|
139
|
+
is_valid = (self.col.is_computed or self.col.col_type.is_media_type()) and self.col.is_stored
|
|
140
|
+
if not is_valid:
|
|
122
141
|
raise excs.Error(f'{name} only valid for a stored computed or media column: {self}')
|
|
123
142
|
return ColumnPropertyRef(self, ColumnPropertyRef.Property[name.upper()])
|
|
124
143
|
if (
|
|
@@ -138,78 +157,156 @@ class ColumnRef(Expr):
|
|
|
138
157
|
|
|
139
158
|
return super().__getattr__(name)
|
|
140
159
|
|
|
141
|
-
def
|
|
142
|
-
|
|
143
|
-
|
|
144
|
-
|
|
145
|
-
|
|
160
|
+
def recompute(self, *, cascade: bool = True, errors_only: bool = False) -> catalog.UpdateStatus:
|
|
161
|
+
cat = catalog.Catalog.get()
|
|
162
|
+
# lock_mutable_tree=True: we need to be able to see whether any transitive view has column dependents
|
|
163
|
+
with cat.begin_xact(tbl=self.reference_tbl, for_write=True, lock_mutable_tree=True):
|
|
164
|
+
tbl_version = self.col_handle.tbl_version.get()
|
|
165
|
+
if tbl_version.id != self.reference_tbl.tbl_id:
|
|
166
|
+
raise excs.Error('Cannot recompute column of a base.')
|
|
167
|
+
if tbl_version.is_snapshot:
|
|
168
|
+
raise excs.Error('Cannot recompute column of a snapshot.')
|
|
169
|
+
col_name = self.col_handle.get().name
|
|
170
|
+
status = tbl_version.recompute_columns([col_name], errors_only=errors_only, cascade=cascade)
|
|
171
|
+
FileCache.get().emit_eviction_warnings()
|
|
172
|
+
return status
|
|
173
|
+
|
|
174
|
+
def similarity(
|
|
175
|
+
self,
|
|
176
|
+
item: Any = None,
|
|
177
|
+
*,
|
|
178
|
+
string: str | None = None,
|
|
179
|
+
image: PIL.Image.Image | None = None,
|
|
180
|
+
audio: str | None = None,
|
|
181
|
+
video: str | None = None,
|
|
182
|
+
idx: str | None = None,
|
|
183
|
+
) -> Expr:
|
|
184
|
+
from .similarity_expr import SimilarityExpr
|
|
146
185
|
|
|
147
|
-
|
|
148
|
-
|
|
186
|
+
if item is not None:
|
|
187
|
+
warnings.warn(
|
|
188
|
+
'Use of similarity() without specifying an explicit modality is deprecated -- '
|
|
189
|
+
'since version 0.5.7. Please use one of the following instead:\n'
|
|
190
|
+
' .similarity(string=...)\n'
|
|
191
|
+
' .similarity(image=...)\n'
|
|
192
|
+
' .similarity(audio=...)\n'
|
|
193
|
+
' .similarity(video=...)',
|
|
194
|
+
DeprecationWarning,
|
|
195
|
+
stacklevel=2,
|
|
196
|
+
)
|
|
197
|
+
|
|
198
|
+
arg_count = (string is not None) + (image is not None) + (audio is not None) + (video is not None)
|
|
199
|
+
|
|
200
|
+
if item is not None and arg_count != 0:
|
|
201
|
+
raise excs.Error('similarity(): `item` is deprecated and cannot be used together with modality arguments')
|
|
202
|
+
|
|
203
|
+
if arg_count > 1:
|
|
204
|
+
raise excs.Error('similarity(): expected exactly one of string=..., image=..., audio=..., video=...')
|
|
205
|
+
|
|
206
|
+
expr: Expr
|
|
207
|
+
|
|
208
|
+
if item is not None:
|
|
209
|
+
if isinstance(item, Expr): # This can happen when using similarity() with @query
|
|
210
|
+
if not (item.col_type.is_string_type() or item.col_type.is_image_type()):
|
|
211
|
+
raise excs.Error(f'similarity(): expected `String` or `Image`; got `{item.col_type}`')
|
|
212
|
+
expr = item
|
|
213
|
+
else:
|
|
214
|
+
if not isinstance(item, (str, PIL.Image.Image)):
|
|
215
|
+
raise excs.Error(f'similarity(): expected `str` or `PIL.Image.Image`; got `{type(item).__name__}`')
|
|
216
|
+
expr = Expr.from_object(item)
|
|
217
|
+
assert expr.col_type.is_string_type() or expr.col_type.is_image_type()
|
|
218
|
+
|
|
219
|
+
if string is not None:
|
|
220
|
+
if isinstance(string, Expr):
|
|
221
|
+
if not string.col_type.is_string_type():
|
|
222
|
+
raise excs.Error(f'similarity(string=...): expected `String`; got `{expr.col_type}`')
|
|
223
|
+
expr = string
|
|
224
|
+
else:
|
|
225
|
+
if not isinstance(string, str):
|
|
226
|
+
raise excs.Error(f'similarity(string=...): expected `str`; got `{type(string).__name__}`')
|
|
227
|
+
expr = Expr.from_object(string)
|
|
228
|
+
assert expr.col_type.is_string_type()
|
|
229
|
+
|
|
230
|
+
if image is not None:
|
|
231
|
+
if isinstance(image, Expr):
|
|
232
|
+
if not image.col_type.is_image_type():
|
|
233
|
+
raise excs.Error(f'similarity(image=...): expected `Image`; got `{image.col_type}`')
|
|
234
|
+
expr = image
|
|
235
|
+
else:
|
|
236
|
+
if not isinstance(image, PIL.Image.Image):
|
|
237
|
+
raise excs.Error(f'similarity(image=...): expected `PIL.Image.Image`; got `{type(image).__name__}`')
|
|
238
|
+
expr = Expr.from_object(image)
|
|
239
|
+
assert expr.col_type.is_image_type()
|
|
240
|
+
|
|
241
|
+
if audio is not None:
|
|
242
|
+
if isinstance(audio, Expr):
|
|
243
|
+
if not audio.col_type.is_audio_type():
|
|
244
|
+
raise excs.Error(f'similarity(audio=...): expected `Audio`; got `{audio.col_type}`')
|
|
245
|
+
expr = audio
|
|
246
|
+
else:
|
|
247
|
+
if not isinstance(audio, str):
|
|
248
|
+
raise excs.Error(
|
|
249
|
+
f'similarity(audio=...): expected `str` (path to audio file); got `{type(audio).__name__}`'
|
|
250
|
+
)
|
|
251
|
+
expr = Literal(audio, ts.AudioType())
|
|
252
|
+
|
|
253
|
+
if video is not None:
|
|
254
|
+
if isinstance(video, Expr):
|
|
255
|
+
if not video.col_type.is_video_type():
|
|
256
|
+
raise excs.Error(f'similarity(video=...): expected `Video`; got `{video.col_type}`')
|
|
257
|
+
expr = video
|
|
258
|
+
else:
|
|
259
|
+
if not isinstance(video, str):
|
|
260
|
+
raise excs.Error(
|
|
261
|
+
f'similarity(video=...): expected `str` (path to video file); got `{type(video).__name__}`'
|
|
262
|
+
)
|
|
263
|
+
expr = Literal(video, ts.VideoType())
|
|
149
264
|
|
|
150
|
-
|
|
151
|
-
info: value for info, value in idx_info_dict.items() if isinstance(value.idx, index.EmbeddingIndex)
|
|
152
|
-
}
|
|
153
|
-
if len(embedding_idx_info) == 0:
|
|
154
|
-
raise excs.Error(f'No indices found for {method_name!r} on column {self.col.name!r}')
|
|
155
|
-
if idx_name is not None and idx_name not in embedding_idx_info:
|
|
156
|
-
raise excs.Error(f'Index {idx_name!r} not found for {method_name!r} on column {self.col.name!r}')
|
|
157
|
-
if len(embedding_idx_info) > 1:
|
|
158
|
-
if idx_name is None:
|
|
159
|
-
raise excs.Error(
|
|
160
|
-
f'Column {self.col.name!r} has multiple indices; use the index name to disambiguate: '
|
|
161
|
-
f'`{method_name}(..., idx=<index_name>)`'
|
|
162
|
-
)
|
|
163
|
-
idx_info = {idx_name: embedding_idx_info[idx_name]}
|
|
164
|
-
else:
|
|
165
|
-
idx_info = embedding_idx_info
|
|
166
|
-
return idx_info
|
|
265
|
+
return SimilarityExpr(self, expr, idx_name=idx)
|
|
167
266
|
|
|
168
|
-
def
|
|
169
|
-
from .
|
|
267
|
+
def embedding(self, *, idx: str | None = None) -> ColumnRef:
|
|
268
|
+
from pixeltable.index import EmbeddingIndex
|
|
170
269
|
|
|
171
|
-
|
|
270
|
+
idx_info = self.tbl.get().get_idx(self.col, idx, EmbeddingIndex)
|
|
271
|
+
return ColumnRef(idx_info.val_col)
|
|
172
272
|
|
|
173
|
-
|
|
174
|
-
|
|
175
|
-
|
|
176
|
-
col = copy.copy(next(iter(idx_info.values())).val_col)
|
|
177
|
-
col.name = f'{self.col.name}_embedding_{idx if idx is not None else ""}'
|
|
178
|
-
col.create_sa_cols()
|
|
179
|
-
return ColumnRef(col)
|
|
273
|
+
@property
|
|
274
|
+
def tbl(self) -> catalog.TableVersionHandle:
|
|
275
|
+
return self.reference_tbl.tbl_version if self.reference_tbl is not None else self.col.tbl_handle
|
|
180
276
|
|
|
181
|
-
def default_column_name(self) ->
|
|
277
|
+
def default_column_name(self) -> str | None:
|
|
182
278
|
return self.col.name if self.col is not None else None
|
|
183
279
|
|
|
184
280
|
def _equals(self, other: ColumnRef) -> bool:
|
|
185
281
|
return self.col == other.col and self.perform_validation == other.perform_validation
|
|
186
282
|
|
|
187
|
-
def
|
|
188
|
-
|
|
283
|
+
def select(self) -> 'Query':
|
|
284
|
+
import pixeltable.plan as plan
|
|
285
|
+
from pixeltable._query import Query
|
|
189
286
|
|
|
190
287
|
if self.reference_tbl is None:
|
|
191
288
|
# No reference table; use the current version of the table to which the column belongs
|
|
192
|
-
tbl = catalog.Catalog.get().get_table_by_id(self.col.
|
|
289
|
+
tbl = catalog.Catalog.get().get_table_by_id(self.col.tbl_handle.id)
|
|
193
290
|
return tbl.select(self)
|
|
194
291
|
else:
|
|
195
|
-
# Explicit reference table; construct a
|
|
196
|
-
return
|
|
292
|
+
# Explicit reference table; construct a Query directly from it
|
|
293
|
+
return Query(plan.FromClause([self.reference_tbl])).select(self)
|
|
197
294
|
|
|
198
|
-
def show(self, *args: Any, **kwargs: Any) -> '
|
|
199
|
-
return self.
|
|
295
|
+
def show(self, *args: Any, **kwargs: Any) -> 'ResultSet':
|
|
296
|
+
return self.select().show(*args, **kwargs)
|
|
200
297
|
|
|
201
|
-
def head(self, *args: Any, **kwargs: Any) -> '
|
|
202
|
-
return self.
|
|
298
|
+
def head(self, *args: Any, **kwargs: Any) -> 'ResultSet':
|
|
299
|
+
return self.select().head(*args, **kwargs)
|
|
203
300
|
|
|
204
|
-
def tail(self, *args: Any, **kwargs: Any) -> '
|
|
205
|
-
return self.
|
|
301
|
+
def tail(self, *args: Any, **kwargs: Any) -> 'ResultSet':
|
|
302
|
+
return self.select().tail(*args, **kwargs)
|
|
206
303
|
|
|
207
304
|
def count(self) -> int:
|
|
208
|
-
return self.
|
|
305
|
+
return self.select().count()
|
|
209
306
|
|
|
210
|
-
def distinct(self) -> '
|
|
307
|
+
def distinct(self) -> 'Query':
|
|
211
308
|
"""Return distinct values in this column."""
|
|
212
|
-
return self.
|
|
309
|
+
return self.select().distinct()
|
|
213
310
|
|
|
214
311
|
def __str__(self) -> str:
|
|
215
312
|
if self.col.name is None:
|
|
@@ -224,17 +321,32 @@ class ColumnRef(Expr):
|
|
|
224
321
|
return self._descriptors().to_html()
|
|
225
322
|
|
|
226
323
|
def _descriptors(self) -> DescriptionHelper:
|
|
227
|
-
|
|
324
|
+
with catalog.Catalog.get().begin_xact():
|
|
325
|
+
tbl = catalog.Catalog.get().get_table_by_id(self.col.tbl_handle.id)
|
|
228
326
|
helper = DescriptionHelper()
|
|
229
|
-
helper.append(f'Column\n{self.col.name!r}\n(of table {tbl._path!r})')
|
|
327
|
+
helper.append(f'Column\n{self.col.name!r}\n(of table {tbl._path()!r})')
|
|
230
328
|
helper.append(tbl._col_descriptor([self.col.name]))
|
|
231
329
|
idxs = tbl._index_descriptor([self.col.name])
|
|
232
330
|
if len(idxs) > 0:
|
|
233
331
|
helper.append(idxs)
|
|
234
332
|
return helper
|
|
235
333
|
|
|
236
|
-
def
|
|
237
|
-
|
|
334
|
+
def prepare(self) -> None:
|
|
335
|
+
from pixeltable import store
|
|
336
|
+
|
|
337
|
+
if not self.is_unstored_iter_col:
|
|
338
|
+
return
|
|
339
|
+
col = self.col_handle.get()
|
|
340
|
+
self.base_rowid_len = col.get_tbl().base.get().num_rowid_columns()
|
|
341
|
+
self.base_rowid = [None] * self.base_rowid_len
|
|
342
|
+
assert isinstance(col.get_tbl().store_tbl, store.StoreComponentView)
|
|
343
|
+
self.pos_idx = cast(store.StoreComponentView, col.get_tbl().store_tbl).pos_col_idx
|
|
344
|
+
|
|
345
|
+
def sql_expr(self, _: SqlElementCache) -> sql.ColumnElement | None:
|
|
346
|
+
if self.perform_validation:
|
|
347
|
+
return None
|
|
348
|
+
self.col = self.col_handle.get()
|
|
349
|
+
return self.col.sa_col
|
|
238
350
|
|
|
239
351
|
def eval(self, data_row: DataRow, row_builder: RowBuilder) -> None:
|
|
240
352
|
if self.perform_validation:
|
|
@@ -273,31 +385,43 @@ class ColumnRef(Expr):
|
|
|
273
385
|
|
|
274
386
|
# if this is a new base row, we need to instantiate a new iterator
|
|
275
387
|
if self.base_rowid != data_row.pk[: self.base_rowid_len]:
|
|
388
|
+
assert self.iter_arg_ctx is not None
|
|
276
389
|
row_builder.eval(data_row, self.iter_arg_ctx)
|
|
277
390
|
iterator_args = data_row[self.iter_arg_ctx.target_slot_idxs[0]]
|
|
278
|
-
self.iterator = self.col.
|
|
391
|
+
self.iterator = self.col.get_tbl().iterator_cls(**iterator_args)
|
|
279
392
|
self.base_rowid = data_row.pk[: self.base_rowid_len]
|
|
280
|
-
|
|
393
|
+
stored_outputs = {col_ref.col.name: data_row[col_ref.slot_idx] for col_ref in self.iter_outputs}
|
|
394
|
+
assert all(name is not None for name in stored_outputs)
|
|
395
|
+
self.iterator.set_pos(data_row.pk[self.pos_idx], **stored_outputs)
|
|
281
396
|
res = next(self.iterator)
|
|
282
397
|
data_row[self.slot_idx] = res[self.col.name]
|
|
283
398
|
|
|
284
399
|
def _as_dict(self) -> dict:
|
|
285
|
-
|
|
286
|
-
tbl_version = tbl.get().version if tbl.get().is_snapshot else None
|
|
400
|
+
tbl_handle = self.col.tbl_handle
|
|
287
401
|
# we omit self.components, even if this is a validating ColumnRef, because init() will recreate the
|
|
288
402
|
# non-validating component ColumnRef
|
|
403
|
+
assert tbl_handle.anchor_tbl_id is None # TODO: support anchor_tbl_id for view-over-replica
|
|
289
404
|
return {
|
|
290
|
-
'tbl_id': str(
|
|
291
|
-
'tbl_version':
|
|
405
|
+
'tbl_id': str(tbl_handle.id),
|
|
406
|
+
'tbl_version': tbl_handle.effective_version,
|
|
292
407
|
'col_id': self.col.id,
|
|
293
408
|
'reference_tbl': self.reference_tbl.as_dict() if self.reference_tbl is not None else None,
|
|
294
409
|
'perform_validation': self.perform_validation,
|
|
295
410
|
}
|
|
296
411
|
|
|
412
|
+
@classmethod
|
|
413
|
+
def get_column_id(cls, d: dict) -> catalog.QColumnId:
|
|
414
|
+
tbl_id, col_id = UUID(d['tbl_id']), d['col_id']
|
|
415
|
+
return catalog.QColumnId(tbl_id, col_id)
|
|
416
|
+
|
|
297
417
|
@classmethod
|
|
298
418
|
def get_column(cls, d: dict) -> catalog.Column:
|
|
299
419
|
tbl_id, version, col_id = UUID(d['tbl_id']), d['tbl_version'], d['col_id']
|
|
300
|
-
|
|
420
|
+
# validate_initialized=False: this gets called as part of TableVersion.init()
|
|
421
|
+
# TODO: When we have views on replicas, we will need to store anchor_tbl_id in metadata as well.
|
|
422
|
+
tbl_version = catalog.Catalog.get().get_tbl_version(
|
|
423
|
+
TableVersionKey(tbl_id, version, None), validate_initialized=False
|
|
424
|
+
)
|
|
301
425
|
# don't use tbl_version.cols_by_id here, this might be a snapshot reference to a column that was then dropped
|
|
302
426
|
col = next(col for col in tbl_version.cols if col.id == col_id)
|
|
303
427
|
return col
|
pixeltable/exprs/comparison.py
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
from __future__ import annotations
|
|
2
2
|
|
|
3
|
-
from typing import Any
|
|
3
|
+
from typing import Any
|
|
4
4
|
|
|
5
5
|
import sqlalchemy as sql
|
|
6
6
|
|
|
@@ -69,8 +69,8 @@ class Comparison(Expr):
|
|
|
69
69
|
def _op2(self) -> Expr:
|
|
70
70
|
return self.components[1]
|
|
71
71
|
|
|
72
|
-
def sql_expr(self, sql_elements: SqlElementCache) ->
|
|
73
|
-
|
|
72
|
+
def sql_expr(self, sql_elements: SqlElementCache) -> sql.ColumnElement | None:
|
|
73
|
+
import pixeltable.index as index
|
|
74
74
|
|
|
75
75
|
if str(self._op1.col_type.to_sa_type()) != str(self._op2.col_type.to_sa_type()):
|
|
76
76
|
# Comparing columns of different SQL types (e.g., string vs. json); this can only be done in Python
|
|
@@ -81,9 +81,9 @@ class Comparison(Expr):
|
|
|
81
81
|
if self.is_search_arg_comparison:
|
|
82
82
|
# reference the index value column if there is an index and this is not a snapshot
|
|
83
83
|
# (indices don't apply to snapshots)
|
|
84
|
-
tbl = self._op1.col.
|
|
84
|
+
tbl = self._op1.col.get_tbl()
|
|
85
85
|
idx_info = [
|
|
86
|
-
info for info in self._op1.col.
|
|
86
|
+
info for info in tbl.idxs_by_col.get(self._op1.col.qid, []) if isinstance(info.idx, index.BtreeIndex)
|
|
87
87
|
]
|
|
88
88
|
if len(idx_info) > 0 and not tbl.is_snapshot:
|
|
89
89
|
# there shouldn't be multiple B-tree indices on a column
|
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
from __future__ import annotations
|
|
2
2
|
|
|
3
3
|
import operator
|
|
4
|
-
from typing import Any, Callable
|
|
4
|
+
from typing import Any, Callable
|
|
5
5
|
|
|
6
6
|
import sqlalchemy as sql
|
|
7
7
|
|
|
@@ -36,7 +36,8 @@ class CompoundPredicate(Expr):
|
|
|
36
36
|
return f' {self.operator} '.join([f'({e})' for e in self.components])
|
|
37
37
|
|
|
38
38
|
@classmethod
|
|
39
|
-
def make_conjunction(cls, operands: list[Expr]) ->
|
|
39
|
+
def make_conjunction(cls, operands: list[Expr | None]) -> Expr | None:
|
|
40
|
+
operands = [e for e in operands if e is not None]
|
|
40
41
|
if len(operands) == 0:
|
|
41
42
|
return None
|
|
42
43
|
if len(operands) == 1:
|
|
@@ -60,14 +61,14 @@ class CompoundPredicate(Expr):
|
|
|
60
61
|
def _id_attrs(self) -> list[tuple[str, Any]]:
|
|
61
62
|
return [*super()._id_attrs(), ('operator', self.operator.value)]
|
|
62
63
|
|
|
63
|
-
def split_conjuncts(self, condition: Callable[[Expr], bool]) -> tuple[list[Expr],
|
|
64
|
+
def split_conjuncts(self, condition: Callable[[Expr], bool]) -> tuple[list[Expr], Expr | None]:
|
|
64
65
|
if self.operator in (LogicalOperator.OR, LogicalOperator.NOT):
|
|
65
66
|
return super().split_conjuncts(condition)
|
|
66
67
|
matches = [op for op in self.components if condition(op)]
|
|
67
68
|
non_matches = [op for op in self.components if not condition(op)]
|
|
68
69
|
return (matches, self.make_conjunction(non_matches))
|
|
69
70
|
|
|
70
|
-
def sql_expr(self, sql_elements: SqlElementCache) ->
|
|
71
|
+
def sql_expr(self, sql_elements: SqlElementCache) -> sql.ColumnElement | None:
|
|
71
72
|
sql_exprs = [sql_elements.get(op) for op in self.components]
|
|
72
73
|
if any(e is None for e in sql_exprs):
|
|
73
74
|
return None
|