pixeltable 0.3.14__py3-none-any.whl → 0.4.0rc1__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of pixeltable might be problematic. Click here for more details.
- pixeltable/__version__.py +2 -2
- pixeltable/catalog/catalog.py +292 -105
- pixeltable/catalog/column.py +10 -8
- pixeltable/catalog/dir.py +1 -2
- pixeltable/catalog/insertable_table.py +25 -20
- pixeltable/catalog/schema_object.py +3 -6
- pixeltable/catalog/table.py +245 -189
- pixeltable/catalog/table_version.py +319 -201
- pixeltable/catalog/table_version_handle.py +15 -2
- pixeltable/catalog/table_version_path.py +60 -21
- pixeltable/catalog/view.py +14 -5
- pixeltable/dataframe.py +11 -9
- pixeltable/env.py +2 -4
- pixeltable/exec/in_memory_data_node.py +1 -1
- pixeltable/exec/sql_node.py +20 -11
- pixeltable/exprs/column_property_ref.py +15 -6
- pixeltable/exprs/column_ref.py +32 -11
- pixeltable/exprs/comparison.py +1 -1
- pixeltable/exprs/row_builder.py +4 -6
- pixeltable/exprs/rowid_ref.py +8 -0
- pixeltable/exprs/similarity_expr.py +1 -0
- pixeltable/func/query_template_function.py +1 -1
- pixeltable/functions/gemini.py +166 -33
- pixeltable/functions/math.py +63 -0
- pixeltable/functions/string.py +212 -58
- pixeltable/globals.py +7 -4
- pixeltable/index/base.py +5 -0
- pixeltable/index/btree.py +5 -0
- pixeltable/index/embedding_index.py +5 -0
- pixeltable/io/external_store.py +8 -29
- pixeltable/io/label_studio.py +1 -1
- pixeltable/io/parquet.py +4 -4
- pixeltable/io/table_data_conduit.py +0 -31
- pixeltable/metadata/__init__.py +1 -1
- pixeltable/metadata/converters/convert_13.py +2 -2
- pixeltable/metadata/converters/convert_30.py +6 -11
- pixeltable/metadata/converters/convert_35.py +9 -0
- pixeltable/metadata/converters/util.py +3 -9
- pixeltable/metadata/notes.py +1 -0
- pixeltable/metadata/schema.py +5 -1
- pixeltable/plan.py +4 -4
- pixeltable/share/packager.py +207 -15
- pixeltable/share/publish.py +2 -2
- pixeltable/store.py +31 -13
- pixeltable/utils/dbms.py +1 -1
- {pixeltable-0.3.14.dist-info → pixeltable-0.4.0rc1.dist-info}/METADATA +1 -1
- {pixeltable-0.3.14.dist-info → pixeltable-0.4.0rc1.dist-info}/RECORD +50 -49
- {pixeltable-0.3.14.dist-info → pixeltable-0.4.0rc1.dist-info}/LICENSE +0 -0
- {pixeltable-0.3.14.dist-info → pixeltable-0.4.0rc1.dist-info}/WHEEL +0 -0
- {pixeltable-0.3.14.dist-info → pixeltable-0.4.0rc1.dist-info}/entry_points.txt +0 -0
|
@@ -41,8 +41,21 @@ class TableVersionHandle:
|
|
|
41
41
|
def get(self) -> TableVersion:
|
|
42
42
|
from .catalog import Catalog
|
|
43
43
|
|
|
44
|
-
|
|
45
|
-
|
|
44
|
+
cat = Catalog.get()
|
|
45
|
+
if self._tbl_version is None or not self._tbl_version.is_validated:
|
|
46
|
+
if self.effective_version is not None and self._tbl_version is not None:
|
|
47
|
+
# this is a snapshot version; we need to make sure we refer to the instance cached
|
|
48
|
+
# in Catalog, in order to avoid mixing sa_tbl instances in the same transaction
|
|
49
|
+
# (which will lead to duplicates in the From clause generated in SqlNode.create_from_clause())
|
|
50
|
+
assert (self.id, self.effective_version) in cat._tbl_versions
|
|
51
|
+
self._tbl_version = cat._tbl_versions[self.id, self.effective_version]
|
|
52
|
+
self._tbl_version.is_validated = True
|
|
53
|
+
else:
|
|
54
|
+
self._tbl_version = Catalog.get().get_tbl_version(self.id, self.effective_version)
|
|
55
|
+
if self.effective_version is None:
|
|
56
|
+
# make sure we don't see a discarded instance of a live TableVersion
|
|
57
|
+
tvs = list(Catalog.get()._tbl_versions.values())
|
|
58
|
+
assert self._tbl_version in tvs
|
|
46
59
|
return self._tbl_version
|
|
47
60
|
|
|
48
61
|
def as_dict(self) -> dict:
|
|
@@ -4,9 +4,11 @@ import logging
|
|
|
4
4
|
from typing import Optional
|
|
5
5
|
from uuid import UUID
|
|
6
6
|
|
|
7
|
+
from pixeltable.env import Env
|
|
7
8
|
from pixeltable.metadata import schema
|
|
8
9
|
|
|
9
10
|
from .column import Column
|
|
11
|
+
from .table_version import TableVersion
|
|
10
12
|
from .table_version_handle import TableVersionHandle
|
|
11
13
|
|
|
12
14
|
_logger = logging.getLogger('pixeltable')
|
|
@@ -22,15 +24,28 @@ class TableVersionPath:
|
|
|
22
24
|
|
|
23
25
|
TableVersionPath contains all metadata needed to execute queries and updates against a particular version of a
|
|
24
26
|
table/view.
|
|
27
|
+
|
|
28
|
+
TableVersionPath supplies metadata needed for query construction (eg, column names), for which it uses a
|
|
29
|
+
cached TableVersion instance.
|
|
30
|
+
- when running inside a transaction, this instance is guaranteed to be validated
|
|
31
|
+
- when running outside a transaction, we use an unvalidated instance in order to avoid repeated validation
|
|
32
|
+
on every metadata-related method call (the instance won't stay validated, because TableVersionHandle.get()
|
|
33
|
+
runs a local transaction, at the end of which the instance is again invalidated)
|
|
34
|
+
- supplying metadata from an unvalidated instance is okay, because it needs to get revalidated anyway when a
|
|
35
|
+
query actually runs (at which point there is a transaction context) - there is no guarantee that in between
|
|
36
|
+
constructing a DataFrame and executing it, the underlying table schema hasn't changed (eg, a concurrent process
|
|
37
|
+
could have dropped a column referenced in the query).
|
|
25
38
|
"""
|
|
26
39
|
|
|
27
40
|
tbl_version: TableVersionHandle
|
|
28
41
|
base: Optional[TableVersionPath]
|
|
42
|
+
_cached_tbl_version: Optional[TableVersion]
|
|
29
43
|
|
|
30
44
|
def __init__(self, tbl_version: TableVersionHandle, base: Optional[TableVersionPath] = None):
|
|
31
45
|
assert tbl_version is not None
|
|
32
46
|
self.tbl_version = tbl_version
|
|
33
47
|
self.base = base
|
|
48
|
+
self._cached_tbl_version = None
|
|
34
49
|
|
|
35
50
|
@classmethod
|
|
36
51
|
def from_md(cls, path: schema.TableVersionPath) -> TableVersionPath:
|
|
@@ -47,17 +62,40 @@ class TableVersionPath:
|
|
|
47
62
|
result.extend(self.base.as_md())
|
|
48
63
|
return result
|
|
49
64
|
|
|
65
|
+
def refresh_cached_md(self) -> None:
|
|
66
|
+
from pixeltable.catalog import Catalog
|
|
67
|
+
|
|
68
|
+
if Env.get().in_xact:
|
|
69
|
+
# when we're running inside a transaction, we need to make sure to supply current metadata;
|
|
70
|
+
# mixing stale metadata with current metadata leads to query construction failures
|
|
71
|
+
# (multiple sqlalchemy Table instances for the same underlying table create corrupted From clauses)
|
|
72
|
+
if self._cached_tbl_version is not None and self._cached_tbl_version.is_validated:
|
|
73
|
+
# nothing to refresh
|
|
74
|
+
return
|
|
75
|
+
elif self._cached_tbl_version is not None:
|
|
76
|
+
return
|
|
77
|
+
|
|
78
|
+
with Catalog.get().begin_xact(for_write=False):
|
|
79
|
+
self._cached_tbl_version = self.tbl_version.get()
|
|
80
|
+
|
|
81
|
+
def clear_cached_md(self) -> None:
|
|
82
|
+
self._cached_tbl_version = None
|
|
83
|
+
if self.base is not None:
|
|
84
|
+
self.base.clear_cached_md()
|
|
85
|
+
|
|
50
86
|
def tbl_id(self) -> UUID:
|
|
51
87
|
"""Return the id of the table/view that this path represents"""
|
|
52
88
|
return self.tbl_version.id
|
|
53
89
|
|
|
54
90
|
def version(self) -> int:
|
|
55
91
|
"""Return the version of the table/view that this path represents"""
|
|
56
|
-
|
|
92
|
+
self.refresh_cached_md()
|
|
93
|
+
return self._cached_tbl_version.version
|
|
57
94
|
|
|
58
95
|
def tbl_name(self) -> str:
|
|
59
96
|
"""Return the name of the table/view that this path represents"""
|
|
60
|
-
|
|
97
|
+
self.refresh_cached_md()
|
|
98
|
+
return self._cached_tbl_version.name
|
|
61
99
|
|
|
62
100
|
def path_len(self) -> int:
|
|
63
101
|
"""Return the length of the path"""
|
|
@@ -65,18 +103,22 @@ class TableVersionPath:
|
|
|
65
103
|
|
|
66
104
|
def is_snapshot(self) -> bool:
|
|
67
105
|
"""Return True if this is a path of snapshot versions"""
|
|
68
|
-
|
|
106
|
+
self.refresh_cached_md()
|
|
107
|
+
if not self._cached_tbl_version.is_snapshot:
|
|
69
108
|
return False
|
|
70
109
|
return self.base.is_snapshot() if self.base is not None else True
|
|
71
110
|
|
|
72
111
|
def is_view(self) -> bool:
|
|
73
|
-
|
|
112
|
+
self.refresh_cached_md()
|
|
113
|
+
return self._cached_tbl_version.is_view
|
|
74
114
|
|
|
75
115
|
def is_component_view(self) -> bool:
|
|
76
|
-
|
|
116
|
+
self.refresh_cached_md()
|
|
117
|
+
return self._cached_tbl_version.is_component_view
|
|
77
118
|
|
|
78
119
|
def is_insertable(self) -> bool:
|
|
79
|
-
|
|
120
|
+
self.refresh_cached_md()
|
|
121
|
+
return self._cached_tbl_version.is_insertable
|
|
80
122
|
|
|
81
123
|
def get_tbl_versions(self) -> list[TableVersionHandle]:
|
|
82
124
|
"""Return all tbl versions"""
|
|
@@ -98,20 +140,14 @@ class TableVersionPath:
|
|
|
98
140
|
return None
|
|
99
141
|
return self.base.find_tbl_version(id)
|
|
100
142
|
|
|
101
|
-
@property
|
|
102
|
-
def ancestor_paths(self) -> list[TableVersionPath]:
|
|
103
|
-
if self.base is None:
|
|
104
|
-
return [self]
|
|
105
|
-
else:
|
|
106
|
-
return [self, *self.base.ancestor_paths]
|
|
107
|
-
|
|
108
143
|
def columns(self) -> list[Column]:
|
|
109
144
|
"""Return all user columns visible in this tbl version path, including columns from bases"""
|
|
110
|
-
|
|
111
|
-
|
|
145
|
+
self.refresh_cached_md()
|
|
146
|
+
result = list(self._cached_tbl_version.cols_by_name.values())
|
|
147
|
+
if self.base is not None and self._cached_tbl_version.include_base_columns:
|
|
112
148
|
base_cols = self.base.columns()
|
|
113
149
|
# we only include base columns that don't conflict with one of our column names
|
|
114
|
-
result.extend(c for c in base_cols if c.name not in self.
|
|
150
|
+
result.extend(c for c in base_cols if c.name not in self._cached_tbl_version.cols_by_name)
|
|
115
151
|
return result
|
|
116
152
|
|
|
117
153
|
def cols_by_name(self) -> dict[str, Column]:
|
|
@@ -126,19 +162,21 @@ class TableVersionPath:
|
|
|
126
162
|
|
|
127
163
|
def get_column(self, name: str, include_bases: Optional[bool] = None) -> Optional[Column]:
|
|
128
164
|
"""Return the column with the given name, or None if not found"""
|
|
129
|
-
|
|
165
|
+
self.refresh_cached_md()
|
|
166
|
+
col = self._cached_tbl_version.cols_by_name.get(name)
|
|
130
167
|
if col is not None:
|
|
131
168
|
return col
|
|
132
|
-
elif self.base is not None and (include_bases or self.
|
|
169
|
+
elif self.base is not None and (include_bases or self._cached_tbl_version.include_base_columns):
|
|
133
170
|
return self.base.get_column(name)
|
|
134
171
|
else:
|
|
135
172
|
return None
|
|
136
173
|
|
|
137
174
|
def get_column_by_id(self, tbl_id: UUID, col_id: int) -> Optional[Column]:
|
|
138
175
|
"""Return the column for the given tbl/col id"""
|
|
176
|
+
self.refresh_cached_md()
|
|
139
177
|
if self.tbl_version.id == tbl_id:
|
|
140
|
-
assert col_id in self.
|
|
141
|
-
return self.
|
|
178
|
+
assert col_id in self._cached_tbl_version.cols_by_id
|
|
179
|
+
return self._cached_tbl_version.cols_by_id[col_id]
|
|
142
180
|
elif self.base is not None:
|
|
143
181
|
return self.base.get_column_by_id(tbl_id, col_id)
|
|
144
182
|
else:
|
|
@@ -146,11 +184,12 @@ class TableVersionPath:
|
|
|
146
184
|
|
|
147
185
|
def has_column(self, col: Column, include_bases: bool = True) -> bool:
|
|
148
186
|
"""Return True if this table has the given column."""
|
|
187
|
+
self.refresh_cached_md()
|
|
149
188
|
assert col.tbl is not None
|
|
150
189
|
if (
|
|
151
190
|
col.tbl.id == self.tbl_version.id
|
|
152
191
|
and col.tbl.effective_version == self.tbl_version.effective_version
|
|
153
|
-
and col.id in self.
|
|
192
|
+
and col.id in self._cached_tbl_version.cols_by_id
|
|
154
193
|
):
|
|
155
194
|
# the column is visible in this table version
|
|
156
195
|
return True
|
pixeltable/catalog/view.py
CHANGED
|
@@ -204,8 +204,17 @@ class View(Table):
|
|
|
204
204
|
|
|
205
205
|
from pixeltable.plan import Planner
|
|
206
206
|
|
|
207
|
-
|
|
208
|
-
|
|
207
|
+
try:
|
|
208
|
+
plan, _ = Planner.create_view_load_plan(view._tbl_version_path)
|
|
209
|
+
num_rows, num_excs, _ = tbl_version.store_tbl.insert_rows(plan, v_min=tbl_version.version)
|
|
210
|
+
except:
|
|
211
|
+
# we need to remove the orphaned TableVersion instance
|
|
212
|
+
del catalog.Catalog.get()._tbl_versions[tbl_version.id, tbl_version.effective_version]
|
|
213
|
+
base_tbl_version = base.tbl_version.get()
|
|
214
|
+
if tbl_version.effective_version is None and not base_tbl_version.is_snapshot:
|
|
215
|
+
# also remove tbl_version from the base
|
|
216
|
+
base_tbl_version.mutable_views.remove(TableVersionHandle.create(tbl_version))
|
|
217
|
+
raise
|
|
209
218
|
Env.get().console_logger.info(f'Created view `{name}` with {num_rows} rows, {num_excs} exceptions.')
|
|
210
219
|
|
|
211
220
|
session.commit()
|
|
@@ -285,13 +294,13 @@ class View(Table):
|
|
|
285
294
|
|
|
286
295
|
def _table_descriptor(self) -> str:
|
|
287
296
|
display_name = 'Snapshot' if self._snapshot_only else 'View'
|
|
288
|
-
result = [f'{display_name} {self._path!r}']
|
|
297
|
+
result = [f'{display_name} {self._path()!r}']
|
|
289
298
|
bases_descrs: list[str] = []
|
|
290
299
|
for base, effective_version in zip(self._base_tables, self._effective_base_versions):
|
|
291
300
|
if effective_version is None:
|
|
292
|
-
bases_descrs.append(f'{base._path!r}')
|
|
301
|
+
bases_descrs.append(f'{base._path()!r}')
|
|
293
302
|
else:
|
|
294
|
-
base_descr = f'{base._path}:{effective_version}'
|
|
303
|
+
base_descr = f'{base._path()}:{effective_version}'
|
|
295
304
|
bases_descrs.append(f'{base_descr!r}')
|
|
296
305
|
result.append(f' (of {", ".join(bases_descrs)})')
|
|
297
306
|
|
pixeltable/dataframe.py
CHANGED
|
@@ -14,7 +14,7 @@ import pandas as pd
|
|
|
14
14
|
import sqlalchemy as sql
|
|
15
15
|
|
|
16
16
|
from pixeltable import catalog, exceptions as excs, exec, exprs, plan, type_system as ts
|
|
17
|
-
from pixeltable.catalog import is_valid_identifier
|
|
17
|
+
from pixeltable.catalog import Catalog, is_valid_identifier
|
|
18
18
|
from pixeltable.catalog.globals import UpdateStatus
|
|
19
19
|
from pixeltable.env import Env
|
|
20
20
|
from pixeltable.type_system import ColumnType
|
|
@@ -431,7 +431,7 @@ class DataFrame:
|
|
|
431
431
|
raise excs.Error(msg) from e
|
|
432
432
|
|
|
433
433
|
def _output_row_iterator(self) -> Iterator[list]:
|
|
434
|
-
with
|
|
434
|
+
with Catalog.get().begin_xact(for_write=False):
|
|
435
435
|
try:
|
|
436
436
|
for data_row in self._exec():
|
|
437
437
|
yield [data_row[e.slot_idx] for e in self._select_list_exprs]
|
|
@@ -463,8 +463,8 @@ class DataFrame:
|
|
|
463
463
|
|
|
464
464
|
from pixeltable.plan import Planner
|
|
465
465
|
|
|
466
|
-
|
|
467
|
-
|
|
466
|
+
with Catalog.get().begin_xact(for_write=False) as conn:
|
|
467
|
+
stmt = Planner.create_count_stmt(self._first_tbl, self.where_clause)
|
|
468
468
|
result: int = conn.execute(stmt).scalar_one()
|
|
469
469
|
assert isinstance(result, int)
|
|
470
470
|
return result
|
|
@@ -988,7 +988,8 @@ class DataFrame:
|
|
|
988
988
|
>>> df = person.where(t.year == 2014).update({'age': 30})
|
|
989
989
|
"""
|
|
990
990
|
self._validate_mutable('update', False)
|
|
991
|
-
|
|
991
|
+
tbl_id = self._first_tbl.tbl_id()
|
|
992
|
+
with Catalog.get().begin_xact(tbl_id=tbl_id, for_write=True):
|
|
992
993
|
return self._first_tbl.tbl_version.get().update(value_spec, where=self.where_clause, cascade=cascade)
|
|
993
994
|
|
|
994
995
|
def delete(self) -> UpdateStatus:
|
|
@@ -1011,7 +1012,8 @@ class DataFrame:
|
|
|
1011
1012
|
self._validate_mutable('delete', False)
|
|
1012
1013
|
if not self._first_tbl.is_insertable():
|
|
1013
1014
|
raise excs.Error('Cannot delete from view')
|
|
1014
|
-
|
|
1015
|
+
tbl_id = self._first_tbl.tbl_id()
|
|
1016
|
+
with Catalog.get().begin_xact(tbl_id=tbl_id, for_write=True):
|
|
1015
1017
|
return self._first_tbl.tbl_version.get().delete(where=self.where_clause)
|
|
1016
1018
|
|
|
1017
1019
|
def _validate_mutable(self, op_name: str, allow_select: bool) -> None:
|
|
@@ -1059,7 +1061,7 @@ class DataFrame:
|
|
|
1059
1061
|
@classmethod
|
|
1060
1062
|
def from_dict(cls, d: dict[str, Any]) -> 'DataFrame':
|
|
1061
1063
|
# we need to wrap the construction with a transaction, because it might need to load metadata
|
|
1062
|
-
with
|
|
1064
|
+
with Catalog.get().begin_xact(for_write=False):
|
|
1063
1065
|
tbls = [catalog.TableVersionPath.from_dict(tbl_dict) for tbl_dict in d['from_clause']['tbls']]
|
|
1064
1066
|
join_clauses = [plan.JoinClause(**clause_dict) for clause_dict in d['from_clause']['join_clauses']]
|
|
1065
1067
|
from_clause = plan.FromClause(tbls=tbls, join_clauses=join_clauses)
|
|
@@ -1129,7 +1131,7 @@ class DataFrame:
|
|
|
1129
1131
|
assert data_file_path.is_file()
|
|
1130
1132
|
return data_file_path
|
|
1131
1133
|
else:
|
|
1132
|
-
with
|
|
1134
|
+
with Catalog.get().begin_xact(for_write=False):
|
|
1133
1135
|
return write_coco_dataset(self, dest_path)
|
|
1134
1136
|
|
|
1135
1137
|
def to_pytorch_dataset(self, image_format: str = 'pt') -> 'torch.utils.data.IterableDataset':
|
|
@@ -1174,7 +1176,7 @@ class DataFrame:
|
|
|
1174
1176
|
if dest_path.exists(): # fast path: use cache
|
|
1175
1177
|
assert dest_path.is_dir()
|
|
1176
1178
|
else:
|
|
1177
|
-
with
|
|
1179
|
+
with Catalog.get().begin_xact(for_write=False):
|
|
1178
1180
|
export_parquet(self, dest_path, inline_images=True)
|
|
1179
1181
|
|
|
1180
1182
|
return PixeltablePytorchDataset(path=dest_path, image_format=image_format)
|
pixeltable/env.py
CHANGED
|
@@ -191,6 +191,7 @@ class Env:
|
|
|
191
191
|
assert self._dbms is not None
|
|
192
192
|
return self._dbms
|
|
193
193
|
|
|
194
|
+
@property
|
|
194
195
|
def in_xact(self) -> bool:
|
|
195
196
|
return self._current_conn is not None
|
|
196
197
|
|
|
@@ -201,20 +202,17 @@ class Env:
|
|
|
201
202
|
|
|
202
203
|
@contextmanager
|
|
203
204
|
def begin_xact(self) -> Iterator[sql.Connection]:
|
|
204
|
-
"""
|
|
205
|
+
"""Call Catalog.begin_xact() instead, unless there is a specific reason to call this directly."""
|
|
205
206
|
if self._current_conn is None:
|
|
206
207
|
assert self._current_session is None
|
|
207
208
|
try:
|
|
208
209
|
with self.engine.begin() as conn, sql.orm.Session(conn) as session:
|
|
209
|
-
# TODO: remove print() once we're done with debugging the concurrent update behavior
|
|
210
|
-
# print(f'{datetime.datetime.now()}: start xact')
|
|
211
210
|
self._current_conn = conn
|
|
212
211
|
self._current_session = session
|
|
213
212
|
yield conn
|
|
214
213
|
finally:
|
|
215
214
|
self._current_session = None
|
|
216
215
|
self._current_conn = None
|
|
217
|
-
# print(f'{datetime.datetime.now()}: end xact')
|
|
218
216
|
else:
|
|
219
217
|
assert self._current_session is not None
|
|
220
218
|
yield self._current_conn
|
|
@@ -38,7 +38,7 @@ class InMemoryDataNode(ExecNode):
|
|
|
38
38
|
# we materialize the input slots
|
|
39
39
|
output_exprs = list(row_builder.input_exprs)
|
|
40
40
|
super().__init__(row_builder, output_exprs, [], None)
|
|
41
|
-
assert tbl.get().is_insertable
|
|
41
|
+
assert tbl.get().is_insertable
|
|
42
42
|
self.tbl = tbl
|
|
43
43
|
self.input_rows = rows
|
|
44
44
|
self.start_row_id = start_row_id
|
pixeltable/exec/sql_node.py
CHANGED
|
@@ -134,6 +134,11 @@ class SqlNode(ExecNode):
|
|
|
134
134
|
self.where_clause_element = None
|
|
135
135
|
self.order_by_clause = []
|
|
136
136
|
|
|
137
|
+
if self.tbl is not None:
|
|
138
|
+
tv = self.tbl.tbl_version._tbl_version
|
|
139
|
+
if tv is not None:
|
|
140
|
+
assert tv.is_validated
|
|
141
|
+
|
|
137
142
|
def _create_stmt(self) -> sql.Select:
|
|
138
143
|
"""Create Select from local state"""
|
|
139
144
|
|
|
@@ -141,6 +146,7 @@ class SqlNode(ExecNode):
|
|
|
141
146
|
sql_select_list = [self.sql_elements.get(e) for e in self.select_list]
|
|
142
147
|
if self.set_pk:
|
|
143
148
|
assert self.tbl is not None
|
|
149
|
+
assert self.tbl.tbl_version.get().is_validated
|
|
144
150
|
sql_select_list += self.tbl.tbl_version.get().store_tbl.pk_columns()
|
|
145
151
|
stmt = sql.select(*sql_select_list)
|
|
146
152
|
|
|
@@ -220,26 +226,29 @@ class SqlNode(ExecNode):
|
|
|
220
226
|
joined_tbls.append(t)
|
|
221
227
|
|
|
222
228
|
first = True
|
|
223
|
-
|
|
229
|
+
prev_tv: Optional[catalog.TableVersion] = None
|
|
224
230
|
for t in joined_tbls[::-1]:
|
|
231
|
+
tv = t.get()
|
|
232
|
+
# _logger.debug(f'create_from_clause: tbl_id={tv.id} {id(tv.store_tbl.sa_tbl)}')
|
|
225
233
|
if first:
|
|
226
|
-
stmt = stmt.select_from(
|
|
234
|
+
stmt = stmt.select_from(tv.store_tbl.sa_tbl)
|
|
227
235
|
first = False
|
|
228
236
|
else:
|
|
229
|
-
# join
|
|
230
|
-
prev_tbl_rowid_cols =
|
|
231
|
-
tbl_rowid_cols =
|
|
237
|
+
# join tv to prev_tv on prev_tv's rowid cols
|
|
238
|
+
prev_tbl_rowid_cols = prev_tv.store_tbl.rowid_columns()
|
|
239
|
+
tbl_rowid_cols = tv.store_tbl.rowid_columns()
|
|
232
240
|
rowid_clauses = [
|
|
233
241
|
c1 == c2 for c1, c2 in zip(prev_tbl_rowid_cols, tbl_rowid_cols[: len(prev_tbl_rowid_cols)])
|
|
234
242
|
]
|
|
235
|
-
stmt = stmt.join(
|
|
243
|
+
stmt = stmt.join(tv.store_tbl.sa_tbl, sql.and_(*rowid_clauses))
|
|
244
|
+
|
|
236
245
|
if t.id in exact_version_only:
|
|
237
|
-
stmt = stmt.where(
|
|
246
|
+
stmt = stmt.where(tv.store_tbl.v_min_col == tv.version)
|
|
238
247
|
else:
|
|
239
|
-
stmt = stmt.where(
|
|
240
|
-
|
|
241
|
-
|
|
242
|
-
|
|
248
|
+
stmt = stmt.where(tv.store_tbl.sa_tbl.c.v_min <= tv.version)
|
|
249
|
+
stmt = stmt.where(tv.store_tbl.sa_tbl.c.v_max > tv.version)
|
|
250
|
+
prev_tv = tv
|
|
251
|
+
|
|
243
252
|
return stmt
|
|
244
253
|
|
|
245
254
|
def set_where(self, where_clause: exprs.Expr) -> None:
|
|
@@ -58,20 +58,29 @@ class ColumnPropertyRef(Expr):
|
|
|
58
58
|
if not self._col_ref.col.is_stored:
|
|
59
59
|
return None
|
|
60
60
|
|
|
61
|
+
# we need to reestablish that we have the correct Column instance, there could have been a metadata
|
|
62
|
+
# reload since init()
|
|
63
|
+
# TODO: add an explicit prepare phase (ie, Expr.prepare()) that gives every subclass instance a chance to
|
|
64
|
+
# perform runtime checks and update state
|
|
65
|
+
tv = self._col_ref.tbl_version.get()
|
|
66
|
+
assert tv.is_validated
|
|
67
|
+
col = tv.cols_by_id[self._col_ref.col_id]
|
|
68
|
+
# TODO: check for column being dropped
|
|
69
|
+
|
|
61
70
|
# the errortype/-msg properties of a read-validated media column need to be extracted from the DataRow
|
|
62
71
|
if (
|
|
63
|
-
|
|
64
|
-
and
|
|
72
|
+
col.col_type.is_media_type()
|
|
73
|
+
and col.media_validation == catalog.MediaValidation.ON_READ
|
|
65
74
|
and self.is_error_prop()
|
|
66
75
|
):
|
|
67
76
|
return None
|
|
68
77
|
|
|
69
78
|
if self.prop == self.Property.ERRORTYPE:
|
|
70
|
-
assert
|
|
71
|
-
return
|
|
79
|
+
assert col.sa_errortype_col is not None
|
|
80
|
+
return col.sa_errortype_col
|
|
72
81
|
if self.prop == self.Property.ERRORMSG:
|
|
73
|
-
assert
|
|
74
|
-
return
|
|
82
|
+
assert col.sa_errormsg_col is not None
|
|
83
|
+
return col.sa_errormsg_col
|
|
75
84
|
if self.prop == self.Property.FILEURL:
|
|
76
85
|
# the file url is stored as the column value
|
|
77
86
|
return sql_elements.get(self._col_ref)
|
pixeltable/exprs/column_ref.py
CHANGED
|
@@ -52,6 +52,10 @@ class ColumnRef(Expr):
|
|
|
52
52
|
id: int
|
|
53
53
|
perform_validation: bool # if True, performs media validation
|
|
54
54
|
|
|
55
|
+
# needed by sql_expr() to re-resolve Column instance after a metadata reload
|
|
56
|
+
tbl_version: catalog.TableVersionHandle
|
|
57
|
+
col_id: int
|
|
58
|
+
|
|
55
59
|
def __init__(
|
|
56
60
|
self,
|
|
57
61
|
col: catalog.Column,
|
|
@@ -62,16 +66,17 @@ class ColumnRef(Expr):
|
|
|
62
66
|
assert col.tbl is not None
|
|
63
67
|
self.col = col
|
|
64
68
|
self.reference_tbl = reference_tbl
|
|
65
|
-
self.
|
|
66
|
-
|
|
67
|
-
|
|
69
|
+
self.tbl_version = catalog.TableVersionHandle(col.tbl.id, col.tbl.effective_version)
|
|
70
|
+
self.col_id = col.id
|
|
71
|
+
|
|
72
|
+
self.is_unstored_iter_col = col.tbl.is_component_view and col.tbl.is_iterator_column(col) and not col.is_stored
|
|
68
73
|
self.iter_arg_ctx = None
|
|
69
74
|
# number of rowid columns in the base table
|
|
70
|
-
self.base_rowid_len = col.tbl.
|
|
75
|
+
self.base_rowid_len = col.tbl.base.get().num_rowid_columns() if self.is_unstored_iter_col else 0
|
|
71
76
|
self.base_rowid = [None] * self.base_rowid_len
|
|
72
77
|
self.iterator = None
|
|
73
78
|
# index of the position column in the view's primary key; don't try to reference tbl.store_tbl here
|
|
74
|
-
self.pos_idx = col.tbl.
|
|
79
|
+
self.pos_idx = col.tbl.num_rowid_columns() - 1 if self.is_unstored_iter_col else None
|
|
75
80
|
|
|
76
81
|
self.perform_validation = False
|
|
77
82
|
if col.col_type.is_media_type():
|
|
@@ -175,7 +180,7 @@ class ColumnRef(Expr):
|
|
|
175
180
|
assert len(idx_info) == 1
|
|
176
181
|
col = copy.copy(next(iter(idx_info.values())).val_col)
|
|
177
182
|
col.name = f'{self.col.name}_embedding_{idx if idx is not None else ""}'
|
|
178
|
-
col.create_sa_cols()
|
|
183
|
+
# col.create_sa_cols()
|
|
179
184
|
return ColumnRef(col)
|
|
180
185
|
|
|
181
186
|
def default_column_name(self) -> Optional[str]:
|
|
@@ -226,7 +231,7 @@ class ColumnRef(Expr):
|
|
|
226
231
|
def _descriptors(self) -> DescriptionHelper:
|
|
227
232
|
tbl = catalog.Catalog.get().get_table_by_id(self.col.tbl.id)
|
|
228
233
|
helper = DescriptionHelper()
|
|
229
|
-
helper.append(f'Column\n{self.col.name!r}\n(of table {tbl._path!r})')
|
|
234
|
+
helper.append(f'Column\n{self.col.name!r}\n(of table {tbl._path()!r})')
|
|
230
235
|
helper.append(tbl._col_descriptor([self.col.name]))
|
|
231
236
|
idxs = tbl._index_descriptor([self.col.name])
|
|
232
237
|
if len(idxs) > 0:
|
|
@@ -234,7 +239,23 @@ class ColumnRef(Expr):
|
|
|
234
239
|
return helper
|
|
235
240
|
|
|
236
241
|
def sql_expr(self, _: SqlElementCache) -> Optional[sql.ColumnElement]:
|
|
237
|
-
return None if self.perform_validation else self.col.sa_col
|
|
242
|
+
# return None if self.perform_validation else self.col.sa_col
|
|
243
|
+
if self.perform_validation:
|
|
244
|
+
return None
|
|
245
|
+
# we need to reestablish that we have the correct Column instance, there could have been a metadata
|
|
246
|
+
# reload since init()
|
|
247
|
+
# TODO: add an explicit prepare phase (ie, Expr.prepare()) that gives every subclass instance a chance to
|
|
248
|
+
# perform runtime checks and update state
|
|
249
|
+
tv = self.tbl_version.get()
|
|
250
|
+
assert tv.is_validated
|
|
251
|
+
self.col = tv.cols_by_id[self.col_id]
|
|
252
|
+
assert self.col.tbl is tv
|
|
253
|
+
# TODO: check for column being dropped
|
|
254
|
+
# print(
|
|
255
|
+
# f'ColumnRef.sql_expr: tbl={tv.id}:{tv.effective_version} sa_tbl={id(self.col.tbl.store_tbl.sa_tbl):x} '
|
|
256
|
+
# f'tv={id(tv):x}'
|
|
257
|
+
# )
|
|
258
|
+
return self.col.sa_col
|
|
238
259
|
|
|
239
260
|
def eval(self, data_row: DataRow, row_builder: RowBuilder) -> None:
|
|
240
261
|
if self.perform_validation:
|
|
@@ -275,7 +296,7 @@ class ColumnRef(Expr):
|
|
|
275
296
|
if self.base_rowid != data_row.pk[: self.base_rowid_len]:
|
|
276
297
|
row_builder.eval(data_row, self.iter_arg_ctx)
|
|
277
298
|
iterator_args = data_row[self.iter_arg_ctx.target_slot_idxs[0]]
|
|
278
|
-
self.iterator = self.col.tbl.
|
|
299
|
+
self.iterator = self.col.tbl.iterator_cls(**iterator_args)
|
|
279
300
|
self.base_rowid = data_row.pk[: self.base_rowid_len]
|
|
280
301
|
self.iterator.set_pos(data_row.pk[self.pos_idx])
|
|
281
302
|
res = next(self.iterator)
|
|
@@ -283,12 +304,12 @@ class ColumnRef(Expr):
|
|
|
283
304
|
|
|
284
305
|
def _as_dict(self) -> dict:
|
|
285
306
|
tbl = self.col.tbl
|
|
286
|
-
|
|
307
|
+
version = tbl.version if tbl.is_snapshot else None
|
|
287
308
|
# we omit self.components, even if this is a validating ColumnRef, because init() will recreate the
|
|
288
309
|
# non-validating component ColumnRef
|
|
289
310
|
return {
|
|
290
311
|
'tbl_id': str(tbl.id),
|
|
291
|
-
'tbl_version':
|
|
312
|
+
'tbl_version': version,
|
|
292
313
|
'col_id': self.col.id,
|
|
293
314
|
'reference_tbl': self.reference_tbl.as_dict() if self.reference_tbl is not None else None,
|
|
294
315
|
'perform_validation': self.perform_validation,
|
pixeltable/exprs/comparison.py
CHANGED
|
@@ -81,7 +81,7 @@ class Comparison(Expr):
|
|
|
81
81
|
if self.is_search_arg_comparison:
|
|
82
82
|
# reference the index value column if there is an index and this is not a snapshot
|
|
83
83
|
# (indices don't apply to snapshots)
|
|
84
|
-
tbl = self._op1.col.tbl
|
|
84
|
+
tbl = self._op1.col.tbl
|
|
85
85
|
idx_info = [
|
|
86
86
|
info for info in self._op1.col.get_idx_info().values() if isinstance(info.idx, index.BtreeIndex)
|
|
87
87
|
]
|
pixeltable/exprs/row_builder.py
CHANGED
|
@@ -172,13 +172,11 @@ class RowBuilder:
|
|
|
172
172
|
|
|
173
173
|
def refs_unstored_iter_col(col_ref: ColumnRef) -> bool:
|
|
174
174
|
tbl = col_ref.col.tbl
|
|
175
|
-
return (
|
|
176
|
-
tbl.get().is_component_view and tbl.get().is_iterator_column(col_ref.col) and not col_ref.col.is_stored
|
|
177
|
-
)
|
|
175
|
+
return tbl.is_component_view and tbl.is_iterator_column(col_ref.col) and not col_ref.col.is_stored
|
|
178
176
|
|
|
179
177
|
unstored_iter_col_refs = [col_ref for col_ref in col_refs if refs_unstored_iter_col(col_ref)]
|
|
180
178
|
component_views = [col_ref.col.tbl for col_ref in unstored_iter_col_refs]
|
|
181
|
-
unstored_iter_args = {view.id: view.
|
|
179
|
+
unstored_iter_args = {view.id: view.iterator_args.copy() for view in component_views}
|
|
182
180
|
self.unstored_iter_args = {
|
|
183
181
|
id: self._record_unique_expr(arg, recursive=True) for id, arg in unstored_iter_args.items()
|
|
184
182
|
}
|
|
@@ -450,9 +448,9 @@ class RowBuilder:
|
|
|
450
448
|
else:
|
|
451
449
|
if col.col_type.is_image_type() and data_row.file_urls[slot_idx] is None:
|
|
452
450
|
# we have yet to store this image
|
|
453
|
-
filepath = str(MediaStore.prepare_media_path(col.tbl.id, col.id, col.tbl.
|
|
451
|
+
filepath = str(MediaStore.prepare_media_path(col.tbl.id, col.id, col.tbl.version))
|
|
454
452
|
data_row.flush_img(slot_idx, filepath)
|
|
455
|
-
val = data_row.get_stored_val(slot_idx, col.
|
|
453
|
+
val = data_row.get_stored_val(slot_idx, col.get_sa_col_type())
|
|
456
454
|
table_row[col.store_name()] = val
|
|
457
455
|
# we unfortunately need to set these, even if there are no errors
|
|
458
456
|
table_row[col.errortype_store_name()] = None
|
pixeltable/exprs/rowid_ref.py
CHANGED
|
@@ -1,5 +1,6 @@
|
|
|
1
1
|
from __future__ import annotations
|
|
2
2
|
|
|
3
|
+
import logging
|
|
3
4
|
from typing import Any, Optional, cast
|
|
4
5
|
from uuid import UUID
|
|
5
6
|
|
|
@@ -12,6 +13,8 @@ from .expr import Expr
|
|
|
12
13
|
from .row_builder import RowBuilder
|
|
13
14
|
from .sql_element_cache import SqlElementCache
|
|
14
15
|
|
|
16
|
+
_logger = logging.getLogger('pixeltable')
|
|
17
|
+
|
|
15
18
|
|
|
16
19
|
class RowidRef(Expr):
|
|
17
20
|
"""A reference to a part of a table rowid
|
|
@@ -97,10 +100,15 @@ class RowidRef(Expr):
|
|
|
97
100
|
|
|
98
101
|
def sql_expr(self, _: SqlElementCache) -> Optional[sql.ColumnElement]:
|
|
99
102
|
tbl = self.tbl.get() if self.tbl is not None else catalog.Catalog.get().get_tbl_version(self.tbl_id, None)
|
|
103
|
+
assert tbl.is_validated
|
|
100
104
|
rowid_cols = tbl.store_tbl.rowid_columns()
|
|
101
105
|
assert self.rowid_component_idx <= len(rowid_cols), (
|
|
102
106
|
f'{self.rowid_component_idx} not consistent with {rowid_cols}'
|
|
103
107
|
)
|
|
108
|
+
# _logger.debug(
|
|
109
|
+
# f'RowidRef.sql_expr: tbl={tbl.id}{tbl.effective_version} sa_tbl={id(tbl.store_tbl.sa_tbl):x} '
|
|
110
|
+
# f'tv={id(tbl):x}'
|
|
111
|
+
# )
|
|
104
112
|
return rowid_cols[self.rowid_component_idx]
|
|
105
113
|
|
|
106
114
|
def eval(self, data_row: DataRow, row_builder: RowBuilder) -> None:
|
|
@@ -54,6 +54,7 @@ class SimilarityExpr(Expr):
|
|
|
54
54
|
return 'similarity'
|
|
55
55
|
|
|
56
56
|
def sql_expr(self, _: SqlElementCache) -> Optional[sql.ColumnElement]:
|
|
57
|
+
# TODO: validate that the index still exists
|
|
57
58
|
if not isinstance(self.components[1], Literal):
|
|
58
59
|
raise excs.Error('similarity(): requires a string or a PIL.Image.Image object, not an expression')
|
|
59
60
|
item = self.components[1].val
|
|
@@ -162,7 +162,7 @@ def retrieval_udf(
|
|
|
162
162
|
else:
|
|
163
163
|
for param in parameters:
|
|
164
164
|
if isinstance(param, str) and param not in table.columns:
|
|
165
|
-
raise excs.Error(f'The specified parameter {param!r} is not a column of the table {table._path!r}')
|
|
165
|
+
raise excs.Error(f'The specified parameter {param!r} is not a column of the table {table._path()!r}')
|
|
166
166
|
col_refs = [table[param] if isinstance(param, str) else param for param in parameters]
|
|
167
167
|
|
|
168
168
|
if len(col_refs) == 0:
|