pixeltable 0.3.15__py3-none-any.whl → 0.4.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of pixeltable might be problematic. Click here for more details.
- pixeltable/__init__.py +1 -1
- pixeltable/__version__.py +2 -2
- pixeltable/catalog/__init__.py +9 -1
- pixeltable/catalog/catalog.py +559 -134
- pixeltable/catalog/column.py +36 -32
- pixeltable/catalog/dir.py +1 -2
- pixeltable/catalog/globals.py +12 -0
- pixeltable/catalog/insertable_table.py +30 -25
- pixeltable/catalog/schema_object.py +9 -6
- pixeltable/catalog/table.py +334 -267
- pixeltable/catalog/table_version.py +358 -241
- pixeltable/catalog/table_version_handle.py +18 -2
- pixeltable/catalog/table_version_path.py +86 -16
- pixeltable/catalog/view.py +47 -23
- pixeltable/dataframe.py +198 -19
- pixeltable/env.py +6 -4
- pixeltable/exceptions.py +6 -0
- pixeltable/exec/__init__.py +1 -1
- pixeltable/exec/exec_node.py +2 -0
- pixeltable/exec/expr_eval/evaluators.py +4 -1
- pixeltable/exec/expr_eval/expr_eval_node.py +4 -4
- pixeltable/exec/in_memory_data_node.py +1 -1
- pixeltable/exec/sql_node.py +188 -22
- pixeltable/exprs/column_property_ref.py +16 -6
- pixeltable/exprs/column_ref.py +33 -11
- pixeltable/exprs/comparison.py +1 -1
- pixeltable/exprs/data_row.py +5 -3
- pixeltable/exprs/expr.py +11 -4
- pixeltable/exprs/literal.py +2 -0
- pixeltable/exprs/row_builder.py +4 -6
- pixeltable/exprs/rowid_ref.py +8 -0
- pixeltable/exprs/similarity_expr.py +1 -0
- pixeltable/func/__init__.py +1 -0
- pixeltable/func/mcp.py +74 -0
- pixeltable/func/query_template_function.py +5 -3
- pixeltable/func/tools.py +12 -2
- pixeltable/func/udf.py +2 -2
- pixeltable/functions/__init__.py +1 -0
- pixeltable/functions/anthropic.py +19 -45
- pixeltable/functions/deepseek.py +19 -38
- pixeltable/functions/fireworks.py +9 -18
- pixeltable/functions/gemini.py +2 -3
- pixeltable/functions/groq.py +108 -0
- pixeltable/functions/llama_cpp.py +6 -6
- pixeltable/functions/mistralai.py +16 -53
- pixeltable/functions/ollama.py +1 -1
- pixeltable/functions/openai.py +82 -165
- pixeltable/functions/string.py +212 -58
- pixeltable/functions/together.py +22 -80
- pixeltable/globals.py +10 -4
- pixeltable/index/base.py +5 -0
- pixeltable/index/btree.py +5 -0
- pixeltable/index/embedding_index.py +5 -0
- pixeltable/io/external_store.py +10 -31
- pixeltable/io/label_studio.py +5 -5
- pixeltable/io/parquet.py +2 -2
- pixeltable/io/table_data_conduit.py +1 -32
- pixeltable/metadata/__init__.py +11 -2
- pixeltable/metadata/converters/convert_13.py +2 -2
- pixeltable/metadata/converters/convert_30.py +6 -11
- pixeltable/metadata/converters/convert_35.py +9 -0
- pixeltable/metadata/converters/convert_36.py +38 -0
- pixeltable/metadata/converters/convert_37.py +15 -0
- pixeltable/metadata/converters/util.py +3 -9
- pixeltable/metadata/notes.py +3 -0
- pixeltable/metadata/schema.py +13 -1
- pixeltable/plan.py +135 -12
- pixeltable/share/packager.py +138 -14
- pixeltable/share/publish.py +2 -2
- pixeltable/store.py +19 -13
- pixeltable/type_system.py +30 -0
- pixeltable/utils/dbms.py +1 -1
- pixeltable/utils/formatter.py +64 -42
- {pixeltable-0.3.15.dist-info → pixeltable-0.4.0.dist-info}/METADATA +2 -1
- {pixeltable-0.3.15.dist-info → pixeltable-0.4.0.dist-info}/RECORD +78 -73
- {pixeltable-0.3.15.dist-info → pixeltable-0.4.0.dist-info}/LICENSE +0 -0
- {pixeltable-0.3.15.dist-info → pixeltable-0.4.0.dist-info}/WHEEL +0 -0
- {pixeltable-0.3.15.dist-info → pixeltable-0.4.0.dist-info}/entry_points.txt +0 -0
|
@@ -1,5 +1,6 @@
|
|
|
1
1
|
from __future__ import annotations
|
|
2
2
|
|
|
3
|
+
import copy
|
|
3
4
|
import dataclasses
|
|
4
5
|
import importlib
|
|
5
6
|
import logging
|
|
@@ -22,6 +23,10 @@ from pixeltable.utils.exception_handler import run_cleanup_on_exception
|
|
|
22
23
|
from pixeltable.utils.filecache import FileCache
|
|
23
24
|
from pixeltable.utils.media_store import MediaStore
|
|
24
25
|
|
|
26
|
+
if TYPE_CHECKING:
|
|
27
|
+
from pixeltable.plan import SampleClause
|
|
28
|
+
|
|
29
|
+
|
|
25
30
|
from ..func.globals import resolve_symbol
|
|
26
31
|
from .column import Column
|
|
27
32
|
from .globals import _POS_COLUMN_NAME, _ROWID_COLUMN_NAME, MediaValidation, UpdateStatus, is_valid_identifier
|
|
@@ -50,43 +55,46 @@ class TableVersion:
|
|
|
50
55
|
|
|
51
56
|
Instances of TableVersion should not be stored as member variables (ie, used across transaction boundaries).
|
|
52
57
|
Use a TableVersionHandle instead.
|
|
58
|
+
|
|
59
|
+
Only TableVersion and Catalog interact directly with stored metadata. Everything else needs to go through these
|
|
60
|
+
two classes.
|
|
53
61
|
"""
|
|
54
62
|
|
|
55
63
|
id: UUID
|
|
56
|
-
|
|
57
|
-
|
|
64
|
+
|
|
65
|
+
# record metadata stored in catalog
|
|
66
|
+
_tbl_md: schema.TableMd
|
|
67
|
+
_schema_version_md: schema.TableSchemaVersionMd
|
|
68
|
+
|
|
58
69
|
effective_version: Optional[int]
|
|
59
|
-
is_replica: bool
|
|
60
|
-
version: int
|
|
61
|
-
comment: str
|
|
62
|
-
media_validation: MediaValidation
|
|
63
|
-
num_retained_versions: int
|
|
64
|
-
schema_version: int
|
|
65
|
-
view_md: Optional[schema.ViewMd]
|
|
66
70
|
path: Optional[pxt.catalog.TableVersionPath] # only set for live tables; needed to resolve computed cols
|
|
67
71
|
base: Optional[TableVersionHandle] # only set for views
|
|
68
|
-
next_col_id: int
|
|
69
|
-
next_idx_id: int
|
|
70
|
-
next_rowid: int
|
|
71
72
|
predicate: Optional[exprs.Expr]
|
|
72
|
-
|
|
73
|
+
sample_clause: Optional['SampleClause']
|
|
74
|
+
|
|
73
75
|
iterator_cls: Optional[type[ComponentIterator]]
|
|
74
76
|
iterator_args: Optional[exprs.InlineDict]
|
|
75
77
|
num_iterator_cols: int
|
|
76
78
|
|
|
79
|
+
# target for data operation propagation (only set for non-snapshots, and only records non-snapshot views)
|
|
80
|
+
mutable_views: set[TableVersionHandle]
|
|
81
|
+
|
|
77
82
|
# contains complete history of columns, incl dropped ones
|
|
78
83
|
cols: list[Column]
|
|
79
84
|
# contains only user-facing (named) columns visible in this version
|
|
80
85
|
cols_by_name: dict[str, Column]
|
|
81
86
|
# contains only columns visible in this version, both system and user
|
|
82
87
|
cols_by_id: dict[int, Column]
|
|
83
|
-
# needed for _create_tbl_md()
|
|
84
|
-
idx_md: dict[int, schema.IndexMd]
|
|
85
88
|
# contains only actively maintained indices
|
|
86
89
|
idxs_by_name: dict[str, TableVersion.IndexInfo]
|
|
87
90
|
|
|
88
91
|
external_stores: dict[str, pxt.io.ExternalStore]
|
|
89
|
-
store_tbl: 'store.StoreBase'
|
|
92
|
+
store_tbl: Optional['store.StoreBase']
|
|
93
|
+
|
|
94
|
+
# used by Catalog to invalidate cached instances at the end of a transaction;
|
|
95
|
+
# True if this instance reflects the state of stored metadata in the context of this transaction and
|
|
96
|
+
# it is the instance cached in Catalog
|
|
97
|
+
is_validated: bool
|
|
90
98
|
|
|
91
99
|
@dataclasses.dataclass
|
|
92
100
|
class IndexInfo:
|
|
@@ -106,21 +114,15 @@ class TableVersion:
|
|
|
106
114
|
mutable_views: list[TableVersionHandle],
|
|
107
115
|
base_path: Optional[pxt.catalog.TableVersionPath] = None,
|
|
108
116
|
base: Optional[TableVersionHandle] = None,
|
|
109
|
-
# base_store_tbl: Optional['store.StoreBase'] = None,
|
|
110
117
|
):
|
|
118
|
+
self.is_validated = True # a freshly constructed instance is always valid
|
|
111
119
|
self.id = id
|
|
112
|
-
self.
|
|
113
|
-
self.
|
|
120
|
+
self._tbl_md = copy.deepcopy(tbl_md)
|
|
121
|
+
self._schema_version_md = copy.deepcopy(schema_version_md)
|
|
114
122
|
self.effective_version = effective_version
|
|
115
|
-
self.version = tbl_md.current_version if effective_version is None else effective_version
|
|
116
|
-
self.is_replica = tbl_md.is_replica
|
|
117
|
-
self.comment = schema_version_md.comment
|
|
118
|
-
self.num_retained_versions = schema_version_md.num_retained_versions
|
|
119
|
-
self.schema_version = schema_version_md.schema_version
|
|
120
|
-
self.view_md = tbl_md.view_md # save this as-is, it's needed for _create_md()
|
|
121
|
-
self.media_validation = MediaValidation[schema_version_md.media_validation.upper()]
|
|
122
123
|
assert not (self.is_view and base is None)
|
|
123
124
|
self.base = base
|
|
125
|
+
self.store_tbl = None
|
|
124
126
|
|
|
125
127
|
# mutable tables need their TableVersionPath for expr eval during updates
|
|
126
128
|
from .table_version_handle import TableVersionHandle
|
|
@@ -134,22 +136,14 @@ class TableVersion:
|
|
|
134
136
|
assert base_path is not None
|
|
135
137
|
self.path = TableVersionPath(self_handle, base=base_path)
|
|
136
138
|
|
|
137
|
-
if self.is_snapshot:
|
|
138
|
-
self.next_col_id = -1
|
|
139
|
-
self.next_idx_id = -1 # TODO: can snapshots have separate indices?
|
|
140
|
-
self.next_rowid = -1
|
|
141
|
-
else:
|
|
142
|
-
assert tbl_md.current_version == self.version
|
|
143
|
-
self.next_col_id = tbl_md.next_col_id
|
|
144
|
-
self.next_idx_id = tbl_md.next_idx_id
|
|
145
|
-
self.next_rowid = tbl_md.next_row_id
|
|
146
|
-
|
|
147
139
|
# view-specific initialization
|
|
148
140
|
from pixeltable import exprs
|
|
141
|
+
from pixeltable.plan import SampleClause
|
|
149
142
|
|
|
150
143
|
predicate_dict = None if self.view_md is None or self.view_md.predicate is None else self.view_md.predicate
|
|
151
144
|
self.predicate = exprs.Expr.from_dict(predicate_dict) if predicate_dict is not None else None
|
|
152
|
-
self.
|
|
145
|
+
sample_dict = None if self.view_md is None or self.view_md.sample_clause is None else self.view_md.sample_clause
|
|
146
|
+
self.sample_clause = SampleClause.from_dict(sample_dict) if sample_dict is not None else None
|
|
153
147
|
|
|
154
148
|
# component view-specific initialization
|
|
155
149
|
self.iterator_cls = None
|
|
@@ -164,23 +158,15 @@ class TableVersion:
|
|
|
164
158
|
self.num_iterator_cols = len(output_schema)
|
|
165
159
|
assert tbl_md.view_md.iterator_args is not None
|
|
166
160
|
|
|
167
|
-
|
|
168
|
-
|
|
169
|
-
cat.add_tbl_version(self)
|
|
161
|
+
self.mutable_views = set(mutable_views)
|
|
162
|
+
assert self.is_mutable or len(self.mutable_views) == 0
|
|
170
163
|
|
|
171
|
-
# init schema after we determined whether we're a component view, and before we create the store table
|
|
172
164
|
self.cols = []
|
|
173
165
|
self.cols_by_name = {}
|
|
174
166
|
self.cols_by_id = {}
|
|
175
|
-
self.idx_md = tbl_md.index_md
|
|
176
167
|
self.idxs_by_name = {}
|
|
177
168
|
self.external_stores = {}
|
|
178
169
|
|
|
179
|
-
self._init_schema(tbl_md, schema_version_md)
|
|
180
|
-
|
|
181
|
-
# Init external stores (this needs to happen after the schema is created)
|
|
182
|
-
self._init_external_stores(tbl_md)
|
|
183
|
-
|
|
184
170
|
def __hash__(self) -> int:
|
|
185
171
|
return hash(self.id)
|
|
186
172
|
|
|
@@ -188,19 +174,7 @@ class TableVersion:
|
|
|
188
174
|
"""Create a snapshot copy of this TableVersion"""
|
|
189
175
|
assert not self.is_snapshot
|
|
190
176
|
base = self.path.base.tbl_version if self.is_view else None
|
|
191
|
-
return TableVersion(
|
|
192
|
-
self.id,
|
|
193
|
-
self._create_tbl_md(),
|
|
194
|
-
self.version,
|
|
195
|
-
self._create_schema_version_md(preceding_schema_version=0), # preceding_schema_version: dummy value
|
|
196
|
-
mutable_views=[],
|
|
197
|
-
base=base,
|
|
198
|
-
)
|
|
199
|
-
|
|
200
|
-
def create_handle(self) -> TableVersionHandle:
|
|
201
|
-
from .table_version_handle import TableVersionHandle
|
|
202
|
-
|
|
203
|
-
return TableVersionHandle(self.id, self.effective_version, tbl_version=self)
|
|
177
|
+
return TableVersion(self.id, self.tbl_md, self.version, self.schema_version_md, mutable_views=[], base=base)
|
|
204
178
|
|
|
205
179
|
@property
|
|
206
180
|
def versioned_name(self) -> str:
|
|
@@ -248,6 +222,7 @@ class TableVersion:
|
|
|
248
222
|
next_col_id=len(cols),
|
|
249
223
|
next_idx_id=0,
|
|
250
224
|
next_row_id=0,
|
|
225
|
+
view_sn=0,
|
|
251
226
|
column_md=column_md,
|
|
252
227
|
index_md={},
|
|
253
228
|
external_stores=[],
|
|
@@ -292,7 +267,13 @@ class TableVersion:
|
|
|
292
267
|
|
|
293
268
|
# if this is purely a snapshot (it doesn't require any additional storage for columns and it doesn't have a
|
|
294
269
|
# predicate to apply at runtime), we don't create a physical table and simply use the base's table version path
|
|
295
|
-
if
|
|
270
|
+
if (
|
|
271
|
+
view_md is not None
|
|
272
|
+
and view_md.is_snapshot
|
|
273
|
+
and view_md.predicate is None
|
|
274
|
+
and view_md.sample_clause is None
|
|
275
|
+
and len(cols) == 0
|
|
276
|
+
):
|
|
296
277
|
session.add(tbl_record)
|
|
297
278
|
session.add(tbl_version_record)
|
|
298
279
|
session.add(schema_version_record)
|
|
@@ -306,8 +287,19 @@ class TableVersion:
|
|
|
306
287
|
tbl_version = cls(
|
|
307
288
|
tbl_record.id, table_md, effective_version, schema_version_md, [], base_path=base_path, base=base
|
|
308
289
|
)
|
|
309
|
-
|
|
290
|
+
# TODO: break this up, so that Catalog.create_table() registers tbl_version
|
|
291
|
+
cat = pxt.catalog.Catalog.get()
|
|
292
|
+
cat._tbl_versions[tbl_record.id, effective_version] = tbl_version
|
|
293
|
+
tbl_version.init()
|
|
310
294
|
tbl_version.store_tbl.create()
|
|
295
|
+
is_mutable = not is_snapshot and not table_md.is_replica
|
|
296
|
+
if base is not None and base.get().is_mutable and is_mutable:
|
|
297
|
+
from .table_version_handle import TableVersionHandle
|
|
298
|
+
|
|
299
|
+
handle = TableVersionHandle(tbl_version.id, effective_version)
|
|
300
|
+
assert handle not in base.get().mutable_views
|
|
301
|
+
base.get().mutable_views.add(handle)
|
|
302
|
+
|
|
311
303
|
if view_md is None or not view_md.is_snapshot:
|
|
312
304
|
# add default indices, after creating the store table
|
|
313
305
|
for col in tbl_version.cols_by_name.values():
|
|
@@ -315,7 +307,7 @@ class TableVersion:
|
|
|
315
307
|
assert status is None or status.num_excs == 0
|
|
316
308
|
|
|
317
309
|
# we re-create the tbl_record here, now that we have new index metadata
|
|
318
|
-
tbl_record = schema.Table(id=tbl_id, dir_id=dir_id, md=dataclasses.asdict(tbl_version.
|
|
310
|
+
tbl_record = schema.Table(id=tbl_id, dir_id=dir_id, md=dataclasses.asdict(tbl_version.tbl_md))
|
|
319
311
|
session.add(tbl_record)
|
|
320
312
|
session.add(tbl_version_record)
|
|
321
313
|
session.add(schema_version_record)
|
|
@@ -331,40 +323,70 @@ class TableVersion:
|
|
|
331
323
|
tbl_version = cls(
|
|
332
324
|
tbl_id, md.tbl_md, md.version_md.version, md.schema_version_md, [], base_path=base_path, base=base
|
|
333
325
|
)
|
|
326
|
+
cat = pxt.catalog.Catalog.get()
|
|
327
|
+
cat._tbl_versions[tbl_version.id, tbl_version.effective_version] = tbl_version
|
|
328
|
+
tbl_version.init()
|
|
334
329
|
tbl_version.store_tbl.create()
|
|
335
330
|
tbl_version.store_tbl.ensure_columns_exist(col for col in tbl_version.cols if col.is_stored)
|
|
336
331
|
return tbl_version
|
|
337
332
|
|
|
338
333
|
def drop(self) -> None:
|
|
339
|
-
|
|
334
|
+
if self.is_view and self.is_mutable:
|
|
335
|
+
# update mutable_views
|
|
336
|
+
# TODO: invalidate base to force reload
|
|
337
|
+
from .table_version_handle import TableVersionHandle
|
|
340
338
|
|
|
341
|
-
|
|
339
|
+
assert self.base is not None
|
|
340
|
+
if self.base.get().is_mutable:
|
|
341
|
+
self.base.get().mutable_views.remove(TableVersionHandle.create(self))
|
|
342
|
+
|
|
343
|
+
# cat = Catalog.get()
|
|
342
344
|
# delete this table and all associated data
|
|
343
345
|
MediaStore.delete(self.id)
|
|
344
346
|
FileCache.get().clear(tbl_id=self.id)
|
|
345
|
-
cat.delete_tbl_md(self.id)
|
|
347
|
+
# cat.delete_tbl_md(self.id)
|
|
346
348
|
self.store_tbl.drop()
|
|
347
349
|
# de-register table version from catalog
|
|
348
|
-
cat.remove_tbl_version(self)
|
|
350
|
+
# cat.remove_tbl_version(self)
|
|
351
|
+
|
|
352
|
+
def init(self) -> None:
|
|
353
|
+
"""
|
|
354
|
+
Initialize schema-related in-memory metadata separately, now that this TableVersion instance is visible
|
|
355
|
+
in Catalog.
|
|
356
|
+
"""
|
|
357
|
+
from .catalog import Catalog
|
|
358
|
+
|
|
359
|
+
cat = Catalog.get()
|
|
360
|
+
assert (self.id, self.effective_version) in cat._tbl_versions
|
|
361
|
+
self._init_schema()
|
|
362
|
+
if not self.is_snapshot:
|
|
363
|
+
cat.record_column_dependencies(self)
|
|
349
364
|
|
|
350
|
-
|
|
365
|
+
# init external stores; this needs to happen after the schema is created
|
|
366
|
+
self._init_external_stores()
|
|
367
|
+
|
|
368
|
+
def _init_schema(self) -> None:
|
|
351
369
|
# create columns first, so the indices can reference them
|
|
352
|
-
self._init_cols(
|
|
370
|
+
self._init_cols()
|
|
353
371
|
if not self.is_snapshot:
|
|
354
|
-
self._init_idxs(
|
|
372
|
+
self._init_idxs()
|
|
355
373
|
# create the sa schema only after creating the columns and indices
|
|
356
374
|
self._init_sa_schema()
|
|
357
375
|
|
|
358
|
-
|
|
376
|
+
# created value_exprs after everything else has been initialized
|
|
377
|
+
for col in self.cols_by_id.values():
|
|
378
|
+
col.init_value_expr()
|
|
379
|
+
|
|
380
|
+
def _init_cols(self) -> None:
|
|
359
381
|
"""Initialize self.cols with the columns visible in our effective version"""
|
|
360
382
|
self.cols = []
|
|
361
383
|
self.cols_by_name = {}
|
|
362
384
|
self.cols_by_id = {}
|
|
363
385
|
# Sort columns in column_md by the position specified in col_md.id to guarantee that all references
|
|
364
386
|
# point backward.
|
|
365
|
-
sorted_column_md = sorted(tbl_md.column_md.values(), key=lambda item: item.id)
|
|
387
|
+
sorted_column_md = sorted(self.tbl_md.column_md.values(), key=lambda item: item.id)
|
|
366
388
|
for col_md in sorted_column_md:
|
|
367
|
-
schema_col_md = schema_version_md.columns.get(col_md.id)
|
|
389
|
+
schema_col_md = self.schema_version_md.columns.get(col_md.id)
|
|
368
390
|
col_name = schema_col_md.name if schema_col_md is not None else None
|
|
369
391
|
media_val = (
|
|
370
392
|
MediaValidation[schema_col_md.media_validation.upper()]
|
|
@@ -381,8 +403,9 @@ class TableVersion:
|
|
|
381
403
|
schema_version_add=col_md.schema_version_add,
|
|
382
404
|
schema_version_drop=col_md.schema_version_drop,
|
|
383
405
|
value_expr_dict=col_md.value_expr,
|
|
406
|
+
tbl=self,
|
|
384
407
|
)
|
|
385
|
-
col.tbl = self
|
|
408
|
+
col.tbl = self
|
|
386
409
|
self.cols.append(col)
|
|
387
410
|
|
|
388
411
|
# populate the lookup structures before Expr.from_dict()
|
|
@@ -396,17 +419,17 @@ class TableVersion:
|
|
|
396
419
|
self.cols_by_name[col.name] = col
|
|
397
420
|
self.cols_by_id[col.id] = col
|
|
398
421
|
|
|
399
|
-
# make sure to traverse columns ordered by position = order in which cols were created;
|
|
400
|
-
# this guarantees that references always point backwards
|
|
401
|
-
if not self.is_snapshot and col_md.value_expr is not None:
|
|
402
|
-
|
|
422
|
+
# # make sure to traverse columns ordered by position = order in which cols were created;
|
|
423
|
+
# # this guarantees that references always point backwards
|
|
424
|
+
# if not self.is_snapshot and col_md.value_expr is not None:
|
|
425
|
+
# self._record_refd_columns(col)
|
|
403
426
|
|
|
404
|
-
def _init_idxs(self
|
|
405
|
-
self.idx_md = tbl_md.index_md
|
|
427
|
+
def _init_idxs(self) -> None:
|
|
428
|
+
# self.idx_md = tbl_md.index_md
|
|
406
429
|
self.idxs_by_name = {}
|
|
407
430
|
import pixeltable.index as index_module
|
|
408
431
|
|
|
409
|
-
for md in tbl_md.index_md.values():
|
|
432
|
+
for md in self.tbl_md.index_md.values():
|
|
410
433
|
if md.schema_version_add > self.schema_version or (
|
|
411
434
|
md.schema_version_drop is not None and md.schema_version_drop <= self.schema_version
|
|
412
435
|
):
|
|
@@ -441,33 +464,32 @@ class TableVersion:
|
|
|
441
464
|
else:
|
|
442
465
|
self.store_tbl = StoreTable(self)
|
|
443
466
|
|
|
444
|
-
def
|
|
445
|
-
self, timestamp: float, update_tbl_version: bool = True, preceding_schema_version: Optional[int] = None
|
|
446
|
-
) -> None:
|
|
467
|
+
def _write_md(self, new_version: bool, new_version_ts: float, new_schema_version: bool) -> None:
|
|
447
468
|
"""Writes table metadata to the database.
|
|
448
469
|
|
|
449
470
|
Args:
|
|
450
471
|
timestamp: timestamp of the change
|
|
451
|
-
conn: database connection to use
|
|
452
472
|
update_tbl_version: if `True`, will also write `TableVersion` metadata
|
|
453
473
|
preceding_schema_version: if specified, will also write `TableSchemaVersion` metadata, recording the
|
|
454
474
|
specified preceding schema version
|
|
455
475
|
"""
|
|
456
|
-
assert update_tbl_version or preceding_schema_version is None
|
|
457
476
|
from pixeltable.catalog import Catalog
|
|
458
477
|
|
|
459
|
-
|
|
460
|
-
|
|
461
|
-
|
|
462
|
-
|
|
478
|
+
version_md: Optional[schema.TableVersionMd] = (
|
|
479
|
+
schema.TableVersionMd(
|
|
480
|
+
tbl_id=str(self.id),
|
|
481
|
+
created_at=new_version_ts,
|
|
482
|
+
version=self.version,
|
|
483
|
+
schema_version=self.schema_version,
|
|
484
|
+
additional_md={},
|
|
485
|
+
)
|
|
486
|
+
if new_version
|
|
487
|
+
else None
|
|
463
488
|
)
|
|
464
489
|
|
|
465
|
-
Catalog.get().store_tbl_md(
|
|
466
|
-
|
|
467
|
-
|
|
468
|
-
"""Ensure that table metadata is loaded."""
|
|
469
|
-
for col in self.cols_by_id.values():
|
|
470
|
-
_ = col.value_expr
|
|
490
|
+
Catalog.get().store_tbl_md(
|
|
491
|
+
self.id, self._tbl_md, version_md, self._schema_version_md if new_schema_version else None
|
|
492
|
+
)
|
|
471
493
|
|
|
472
494
|
def _store_idx_name(self, idx_id: int) -> str:
|
|
473
495
|
"""Return name of index in the store, which needs to be globally unique"""
|
|
@@ -476,10 +498,10 @@ class TableVersion:
|
|
|
476
498
|
def add_index(self, col: Column, idx_name: Optional[str], idx: index.IndexBase) -> UpdateStatus:
|
|
477
499
|
# we're creating a new schema version
|
|
478
500
|
self.version += 1
|
|
479
|
-
preceding_schema_version = self.schema_version
|
|
501
|
+
self.preceding_schema_version = self.schema_version
|
|
480
502
|
self.schema_version = self.version
|
|
481
503
|
status = self._add_index(col, idx_name, idx)
|
|
482
|
-
self.
|
|
504
|
+
self._write_md(new_version=True, new_version_ts=time.time(), new_schema_version=True)
|
|
483
505
|
_logger.info(f'Added index {idx_name} on column {col.name} to table {self.name}')
|
|
484
506
|
return status
|
|
485
507
|
|
|
@@ -524,7 +546,7 @@ class TableVersion:
|
|
|
524
546
|
schema_version_drop=None,
|
|
525
547
|
records_errors=idx.records_value_errors(),
|
|
526
548
|
)
|
|
527
|
-
val_col.tbl = self
|
|
549
|
+
val_col.tbl = self
|
|
528
550
|
val_col.col_type = val_col.col_type.copy(nullable=True)
|
|
529
551
|
self.next_col_id += 1
|
|
530
552
|
|
|
@@ -538,7 +560,7 @@ class TableVersion:
|
|
|
538
560
|
schema_version_drop=None,
|
|
539
561
|
records_errors=False,
|
|
540
562
|
)
|
|
541
|
-
undo_col.tbl = self
|
|
563
|
+
undo_col.tbl = self
|
|
542
564
|
undo_col.col_type = undo_col.col_type.copy(nullable=True)
|
|
543
565
|
self.next_col_id += 1
|
|
544
566
|
return val_col, undo_col
|
|
@@ -553,7 +575,7 @@ class TableVersion:
|
|
|
553
575
|
idx_name = f'idx{idx_id}'
|
|
554
576
|
else:
|
|
555
577
|
assert is_valid_identifier(idx_name)
|
|
556
|
-
assert idx_name not in [i.name for i in self.
|
|
578
|
+
assert idx_name not in [i.name for i in self._tbl_md.index_md.values()]
|
|
557
579
|
# create and register the index metadata
|
|
558
580
|
idx_cls = type(idx)
|
|
559
581
|
idx_md = schema.IndexMd(
|
|
@@ -569,7 +591,7 @@ class TableVersion:
|
|
|
569
591
|
init_args=idx.as_dict(),
|
|
570
592
|
)
|
|
571
593
|
idx_info = self.IndexInfo(id=idx_id, name=idx_name, idx=idx, col=col, val_col=val_col, undo_col=undo_col)
|
|
572
|
-
self.
|
|
594
|
+
self._tbl_md.index_md[idx_id] = idx_md
|
|
573
595
|
self.idxs_by_name[idx_name] = idx_info
|
|
574
596
|
try:
|
|
575
597
|
idx.create_index(self._store_idx_name(idx_id), val_col)
|
|
@@ -578,7 +600,7 @@ class TableVersion:
|
|
|
578
600
|
def cleanup_index() -> None:
|
|
579
601
|
"""Delete the newly added in-memory index structure"""
|
|
580
602
|
del self.idxs_by_name[idx_name]
|
|
581
|
-
del self.
|
|
603
|
+
del self._tbl_md.index_md[idx_id]
|
|
582
604
|
self.next_idx_id = idx_id
|
|
583
605
|
|
|
584
606
|
# Run cleanup only if there has been an exception; otherwise, skip cleanup.
|
|
@@ -596,47 +618,48 @@ class TableVersion:
|
|
|
596
618
|
|
|
597
619
|
def drop_index(self, idx_id: int) -> None:
|
|
598
620
|
assert not self.is_snapshot
|
|
599
|
-
assert idx_id in self.
|
|
621
|
+
assert idx_id in self._tbl_md.index_md
|
|
600
622
|
|
|
601
623
|
# we're creating a new schema version
|
|
602
624
|
self.version += 1
|
|
603
|
-
preceding_schema_version = self.schema_version
|
|
625
|
+
self.preceding_schema_version = self.schema_version
|
|
604
626
|
self.schema_version = self.version
|
|
605
|
-
idx_md = self.
|
|
627
|
+
idx_md = self._tbl_md.index_md[idx_id]
|
|
606
628
|
idx_md.schema_version_drop = self.schema_version
|
|
607
629
|
assert idx_md.name in self.idxs_by_name
|
|
608
630
|
idx_info = self.idxs_by_name[idx_md.name]
|
|
609
631
|
# remove this index entry from the active indexes (in memory)
|
|
610
632
|
# and the index metadata (in persistent table metadata)
|
|
633
|
+
# TODO: this is wrong, it breaks revert()
|
|
611
634
|
del self.idxs_by_name[idx_md.name]
|
|
612
|
-
del self.
|
|
635
|
+
del self._tbl_md.index_md[idx_id]
|
|
613
636
|
|
|
614
637
|
self._drop_columns([idx_info.val_col, idx_info.undo_col])
|
|
615
|
-
self.
|
|
638
|
+
self._write_md(new_version=True, new_version_ts=time.time(), new_schema_version=True)
|
|
616
639
|
_logger.info(f'Dropped index {idx_md.name} on table {self.name}')
|
|
617
640
|
|
|
618
641
|
def add_columns(
|
|
619
642
|
self, cols: Iterable[Column], print_stats: bool, on_error: Literal['abort', 'ignore']
|
|
620
643
|
) -> UpdateStatus:
|
|
621
|
-
"""Adds
|
|
644
|
+
"""Adds columns to the table."""
|
|
622
645
|
assert not self.is_snapshot
|
|
623
|
-
assert all(is_valid_identifier(col.name) for col in cols)
|
|
646
|
+
assert all(is_valid_identifier(col.name) for col in cols if col.name is not None)
|
|
624
647
|
assert all(col.stored is not None for col in cols)
|
|
625
|
-
assert all(col.name not in self.cols_by_name for col in cols)
|
|
648
|
+
assert all(col.name not in self.cols_by_name for col in cols if col.name is not None)
|
|
626
649
|
for col in cols:
|
|
627
|
-
col.tbl = self
|
|
650
|
+
col.tbl = self
|
|
628
651
|
col.id = self.next_col_id
|
|
629
652
|
self.next_col_id += 1
|
|
630
653
|
|
|
631
654
|
# we're creating a new schema version
|
|
632
655
|
self.version += 1
|
|
633
|
-
preceding_schema_version = self.schema_version
|
|
656
|
+
self.preceding_schema_version = self.schema_version
|
|
634
657
|
self.schema_version = self.version
|
|
635
658
|
index_cols: dict[Column, tuple[index.BtreeIndex, Column, Column]] = {}
|
|
636
659
|
all_cols: list[Column] = []
|
|
637
660
|
for col in cols:
|
|
638
661
|
all_cols.append(col)
|
|
639
|
-
if self._is_btree_indexable(col):
|
|
662
|
+
if col.name is not None and self._is_btree_indexable(col):
|
|
640
663
|
idx = index.BtreeIndex(col)
|
|
641
664
|
val_col, undo_col = self._create_index_columns(idx)
|
|
642
665
|
index_cols[col] = (idx, val_col, undo_col)
|
|
@@ -644,10 +667,10 @@ class TableVersion:
|
|
|
644
667
|
all_cols.append(undo_col)
|
|
645
668
|
# Add all columns
|
|
646
669
|
status = self._add_columns(all_cols, print_stats=print_stats, on_error=on_error)
|
|
647
|
-
# Create indices and their
|
|
670
|
+
# Create indices and their md records
|
|
648
671
|
for col, (idx, val_col, undo_col) in index_cols.items():
|
|
649
672
|
self._create_index(col, val_col, undo_col, idx_name=None, idx=idx)
|
|
650
|
-
self.
|
|
673
|
+
self._write_md(new_version=True, new_version_ts=time.time(), new_schema_version=True)
|
|
651
674
|
_logger.info(f'Added columns {[col.name for col in cols]} to table {self.name}, new version: {self.version}')
|
|
652
675
|
|
|
653
676
|
msg = (
|
|
@@ -681,9 +704,23 @@ class TableVersion:
|
|
|
681
704
|
if col.name is not None:
|
|
682
705
|
self.cols_by_name[col.name] = col
|
|
683
706
|
self.cols_by_id[col.id] = col
|
|
684
|
-
|
|
685
|
-
|
|
686
|
-
|
|
707
|
+
|
|
708
|
+
# also add to stored md
|
|
709
|
+
self._tbl_md.column_md[col.id] = schema.ColumnMd(
|
|
710
|
+
id=col.id,
|
|
711
|
+
col_type=col.col_type.as_dict(),
|
|
712
|
+
is_pk=col.is_pk,
|
|
713
|
+
schema_version_add=col.schema_version_add,
|
|
714
|
+
schema_version_drop=col.schema_version_drop,
|
|
715
|
+
value_expr=col.value_expr.as_dict() if col.value_expr is not None else None,
|
|
716
|
+
stored=col.stored,
|
|
717
|
+
)
|
|
718
|
+
if col.name is not None:
|
|
719
|
+
self._schema_version_md.columns[col.id] = schema.SchemaColumn(
|
|
720
|
+
name=col.name,
|
|
721
|
+
pos=len(self.cols_by_name),
|
|
722
|
+
media_validation=col._media_validation.name.lower() if col._media_validation is not None else None,
|
|
723
|
+
)
|
|
687
724
|
|
|
688
725
|
if col.is_stored:
|
|
689
726
|
self.store_tbl.add_column(col)
|
|
@@ -724,14 +761,16 @@ class TableVersion:
|
|
|
724
761
|
run_cleanup_on_exception(cleanup_on_error)
|
|
725
762
|
plan.close()
|
|
726
763
|
|
|
764
|
+
pxt.catalog.Catalog.get().record_column_dependencies(self)
|
|
765
|
+
|
|
727
766
|
if print_stats:
|
|
728
767
|
plan.ctx.profile.print(num_rows=row_count)
|
|
729
|
-
# TODO
|
|
768
|
+
# TODO: what to do about system columns with exceptions?
|
|
730
769
|
return UpdateStatus(
|
|
731
770
|
num_rows=row_count,
|
|
732
771
|
num_computed_values=row_count,
|
|
733
772
|
num_excs=num_excs,
|
|
734
|
-
cols_with_excs=[f'{col.tbl.
|
|
773
|
+
cols_with_excs=[f'{col.tbl.name}.{col.name}' for col in cols_with_excs if col.name is not None],
|
|
735
774
|
)
|
|
736
775
|
|
|
737
776
|
def drop_column(self, col: Column) -> None:
|
|
@@ -741,7 +780,7 @@ class TableVersion:
|
|
|
741
780
|
|
|
742
781
|
# we're creating a new schema version
|
|
743
782
|
self.version += 1
|
|
744
|
-
preceding_schema_version = self.schema_version
|
|
783
|
+
self.preceding_schema_version = self.schema_version
|
|
745
784
|
self.schema_version = self.version
|
|
746
785
|
|
|
747
786
|
# drop this column and all dependent index columns and indices
|
|
@@ -751,15 +790,17 @@ class TableVersion:
|
|
|
751
790
|
if idx_info.col != col:
|
|
752
791
|
continue
|
|
753
792
|
dropped_cols.extend([idx_info.val_col, idx_info.undo_col])
|
|
754
|
-
idx_md = self.
|
|
793
|
+
idx_md = self._tbl_md.index_md[idx_info.id]
|
|
755
794
|
idx_md.schema_version_drop = self.schema_version
|
|
756
795
|
assert idx_md.name in self.idxs_by_name
|
|
757
796
|
dropped_idx_names.append(idx_md.name)
|
|
797
|
+
|
|
758
798
|
# update idxs_by_name
|
|
759
799
|
for idx_name in dropped_idx_names:
|
|
760
800
|
del self.idxs_by_name[idx_name]
|
|
801
|
+
|
|
761
802
|
self._drop_columns(dropped_cols)
|
|
762
|
-
self.
|
|
803
|
+
self._write_md(new_version=True, new_version_ts=time.time(), new_schema_version=True)
|
|
763
804
|
_logger.info(f'Dropped column {col.name} from table {self.name}, new version: {self.version}')
|
|
764
805
|
|
|
765
806
|
def _drop_columns(self, cols: Iterable[Column]) -> None:
|
|
@@ -767,21 +808,23 @@ class TableVersion:
|
|
|
767
808
|
assert not self.is_snapshot
|
|
768
809
|
|
|
769
810
|
for col in cols:
|
|
770
|
-
if col.value_expr is not None:
|
|
771
|
-
# update Column.dependent_cols
|
|
772
|
-
for c in self.cols:
|
|
773
|
-
if c == col:
|
|
774
|
-
break
|
|
775
|
-
c.dependent_cols.discard(col)
|
|
776
|
-
|
|
777
811
|
col.schema_version_drop = self.schema_version
|
|
778
812
|
if col.name is not None:
|
|
779
813
|
assert col.name in self.cols_by_name
|
|
780
814
|
del self.cols_by_name[col.name]
|
|
781
815
|
assert col.id in self.cols_by_id
|
|
782
816
|
del self.cols_by_id[col.id]
|
|
817
|
+
# update stored md
|
|
818
|
+
self._tbl_md.column_md[col.id].schema_version_drop = col.schema_version_drop
|
|
819
|
+
if col.name is not None:
|
|
820
|
+
del self._schema_version_md.columns[col.id]
|
|
821
|
+
|
|
822
|
+
# update positions
|
|
823
|
+
for pos, schema_col in enumerate(self._schema_version_md.columns.values()):
|
|
824
|
+
schema_col.pos = pos
|
|
783
825
|
|
|
784
826
|
self.store_tbl.create_sa_tbl()
|
|
827
|
+
pxt.catalog.Catalog.get().record_column_dependencies(self)
|
|
785
828
|
|
|
786
829
|
def rename_column(self, old_name: str, new_name: str) -> None:
|
|
787
830
|
"""Rename a column."""
|
|
@@ -796,13 +839,14 @@ class TableVersion:
|
|
|
796
839
|
del self.cols_by_name[old_name]
|
|
797
840
|
col.name = new_name
|
|
798
841
|
self.cols_by_name[new_name] = col
|
|
842
|
+
self._schema_version_md.columns[col.id].name = new_name
|
|
799
843
|
|
|
800
844
|
# we're creating a new schema version
|
|
801
845
|
self.version += 1
|
|
802
|
-
preceding_schema_version = self.schema_version
|
|
846
|
+
self.preceding_schema_version = self.schema_version
|
|
803
847
|
self.schema_version = self.version
|
|
804
848
|
|
|
805
|
-
self.
|
|
849
|
+
self._write_md(new_version=True, new_version_ts=time.time(), new_schema_version=True)
|
|
806
850
|
_logger.info(f'Renamed column {old_name} to {new_name} in table {self.name}, new version: {self.version}')
|
|
807
851
|
|
|
808
852
|
def set_comment(self, new_comment: Optional[str]) -> None:
|
|
@@ -821,9 +865,9 @@ class TableVersion:
|
|
|
821
865
|
def _create_schema_version(self) -> None:
|
|
822
866
|
# we're creating a new schema version
|
|
823
867
|
self.version += 1
|
|
824
|
-
preceding_schema_version = self.schema_version
|
|
868
|
+
self.preceding_schema_version = self.schema_version
|
|
825
869
|
self.schema_version = self.version
|
|
826
|
-
self.
|
|
870
|
+
self._write_md(new_version=True, new_version_ts=time.time(), new_schema_version=True)
|
|
827
871
|
_logger.info(f'[{self.name}] Updating table schema to version: {self.version}')
|
|
828
872
|
|
|
829
873
|
def insert(
|
|
@@ -838,7 +882,7 @@ class TableVersion:
|
|
|
838
882
|
"""
|
|
839
883
|
from pixeltable.plan import Planner
|
|
840
884
|
|
|
841
|
-
assert self.is_insertable
|
|
885
|
+
assert self.is_insertable
|
|
842
886
|
assert (rows is None) != (df is None) # Exactly one must be specified
|
|
843
887
|
if rows is not None:
|
|
844
888
|
plan = Planner.create_insert_plan(self, rows, ignore_errors=not fail_on_exception)
|
|
@@ -848,8 +892,8 @@ class TableVersion:
|
|
|
848
892
|
# this is a base table; we generate rowids during the insert
|
|
849
893
|
def rowids() -> Iterator[int]:
|
|
850
894
|
while True:
|
|
851
|
-
rowid = self.
|
|
852
|
-
self.
|
|
895
|
+
rowid = self.next_row_id
|
|
896
|
+
self.next_row_id += 1
|
|
853
897
|
yield rowid
|
|
854
898
|
|
|
855
899
|
return self._insert(plan, time.time(), print_stats=print_stats, rowids=rowids(), abort_on_exc=fail_on_exception)
|
|
@@ -874,7 +918,7 @@ class TableVersion:
|
|
|
874
918
|
result.num_excs = num_excs
|
|
875
919
|
result.num_computed_values += exec_plan.ctx.num_computed_exprs * num_rows
|
|
876
920
|
result.cols_with_excs = [f'{self.name}.{self.cols_by_id[cid].name}' for cid in cols_with_excs]
|
|
877
|
-
self.
|
|
921
|
+
self._write_md(new_version=True, new_version_ts=timestamp, new_schema_version=False)
|
|
878
922
|
|
|
879
923
|
# update views
|
|
880
924
|
for view in self.mutable_views:
|
|
@@ -1038,13 +1082,13 @@ class TableVersion:
|
|
|
1038
1082
|
self.store_tbl.delete_rows(
|
|
1039
1083
|
self.version, base_versions=base_versions, match_on_vmin=True, where_clause=where_clause
|
|
1040
1084
|
)
|
|
1041
|
-
self.
|
|
1085
|
+
self._write_md(new_version=True, new_version_ts=timestamp, new_schema_version=False)
|
|
1042
1086
|
|
|
1043
1087
|
if cascade:
|
|
1044
1088
|
base_versions = [None if plan is None else self.version, *base_versions] # don't update in place
|
|
1045
1089
|
# propagate to views
|
|
1046
1090
|
for view in self.mutable_views:
|
|
1047
|
-
recomputed_cols = [col for col in recomputed_view_cols if col.tbl == view]
|
|
1091
|
+
recomputed_cols = [col for col in recomputed_view_cols if col.tbl.id == view.id]
|
|
1048
1092
|
plan = None
|
|
1049
1093
|
if len(recomputed_cols) > 0:
|
|
1050
1094
|
from pixeltable.plan import Planner
|
|
@@ -1065,7 +1109,7 @@ class TableVersion:
|
|
|
1065
1109
|
Args:
|
|
1066
1110
|
where: a predicate to filter rows to delete.
|
|
1067
1111
|
"""
|
|
1068
|
-
assert self.is_insertable
|
|
1112
|
+
assert self.is_insertable
|
|
1069
1113
|
from pixeltable.exprs import Expr
|
|
1070
1114
|
from pixeltable.plan import Planner
|
|
1071
1115
|
|
|
@@ -1093,14 +1137,22 @@ class TableVersion:
|
|
|
1093
1137
|
Returns:
|
|
1094
1138
|
number of deleted rows
|
|
1095
1139
|
"""
|
|
1140
|
+
# print(f'calling sql_expr()')
|
|
1096
1141
|
sql_where_clause = where.sql_expr(exprs.SqlElementCache()) if where is not None else None
|
|
1142
|
+
# #print(f'sql_where_clause={str(sql_where_clause) if sql_where_clause is not None else None}')
|
|
1143
|
+
# sql_cols: list[sql.Column] = []
|
|
1144
|
+
# def collect_cols(col) -> None:
|
|
1145
|
+
# sql_cols.append(col)
|
|
1146
|
+
# sql.sql.visitors.traverse(sql_where_clause, {}, {'column': collect_cols})
|
|
1147
|
+
# x = [f'{str(c)}:{hash(c)}:{id(c.table)}' for c in sql_cols]
|
|
1148
|
+
# print(f'where_clause cols: {x}')
|
|
1097
1149
|
num_rows = self.store_tbl.delete_rows(
|
|
1098
1150
|
self.version + 1, base_versions=base_versions, match_on_vmin=False, where_clause=sql_where_clause
|
|
1099
1151
|
)
|
|
1100
1152
|
if num_rows > 0:
|
|
1101
1153
|
# we're creating a new version
|
|
1102
1154
|
self.version += 1
|
|
1103
|
-
self.
|
|
1155
|
+
self._write_md(new_version=True, new_version_ts=timestamp, new_schema_version=False)
|
|
1104
1156
|
for view in self.mutable_views:
|
|
1105
1157
|
num_rows += view.get().propagate_delete(
|
|
1106
1158
|
where=None, base_versions=[self.version, *base_versions], timestamp=timestamp
|
|
@@ -1114,17 +1166,13 @@ class TableVersion:
|
|
|
1114
1166
|
raise excs.Error('Cannot revert version 0')
|
|
1115
1167
|
self._revert()
|
|
1116
1168
|
|
|
1117
|
-
def _delete_column(self, col: Column) -> None:
|
|
1118
|
-
"""Physically remove the column from the schema and the store table"""
|
|
1119
|
-
if col.is_stored:
|
|
1120
|
-
self.store_tbl.drop_column(col)
|
|
1121
|
-
self.cols.remove(col)
|
|
1122
|
-
if col.name is not None:
|
|
1123
|
-
del self.cols_by_name[col.name]
|
|
1124
|
-
del self.cols_by_id[col.id]
|
|
1125
|
-
|
|
1126
1169
|
def _revert(self) -> None:
|
|
1127
|
-
"""
|
|
1170
|
+
"""
|
|
1171
|
+
Reverts the stored metadata for this table version and propagates to views.
|
|
1172
|
+
|
|
1173
|
+
Doesn't attempt to revert the in-memory metadata, but instead invalidates this TableVersion instance
|
|
1174
|
+
and relies on Catalog to reload it
|
|
1175
|
+
"""
|
|
1128
1176
|
conn = Env.get().conn
|
|
1129
1177
|
# make sure we don't have a snapshot referencing this version
|
|
1130
1178
|
# (unclear how to express this with sqlalchemy)
|
|
@@ -1157,109 +1205,206 @@ class TableVersion:
|
|
|
1157
1205
|
stmt = sql.update(self.store_tbl.sa_tbl).values(set_clause).where(self.store_tbl.sa_tbl.c.v_max == self.version)
|
|
1158
1206
|
conn.execute(stmt)
|
|
1159
1207
|
|
|
1160
|
-
# revert schema changes
|
|
1208
|
+
# revert schema changes:
|
|
1209
|
+
# - undo changes to self._tbl_md and write that back
|
|
1210
|
+
# - delete newly-added TableVersion/TableSchemaVersion records
|
|
1161
1211
|
if self.version == self.schema_version:
|
|
1162
|
-
# delete newly-added columns
|
|
1212
|
+
# physically delete newly-added columns and remove them from the stored md
|
|
1163
1213
|
added_cols = [col for col in self.cols if col.schema_version_add == self.schema_version]
|
|
1164
1214
|
if len(added_cols) > 0:
|
|
1165
|
-
next_col_id = min(col.id for col in added_cols)
|
|
1215
|
+
self._tbl_md.next_col_id = min(col.id for col in added_cols)
|
|
1166
1216
|
for col in added_cols:
|
|
1167
|
-
|
|
1168
|
-
|
|
1217
|
+
if col.is_stored:
|
|
1218
|
+
self.store_tbl.drop_column(col)
|
|
1219
|
+
del self._tbl_md.column_md[col.id]
|
|
1169
1220
|
|
|
1170
1221
|
# remove newly-added indices from the lookup structures
|
|
1171
1222
|
# (the value and undo columns got removed in the preceding step)
|
|
1172
|
-
added_idx_md = [md for md in self.
|
|
1223
|
+
added_idx_md = [md for md in self._tbl_md.index_md.values() if md.schema_version_add == self.schema_version]
|
|
1173
1224
|
if len(added_idx_md) > 0:
|
|
1174
|
-
next_idx_id = min(md.id for md in added_idx_md)
|
|
1225
|
+
self._tbl_md.next_idx_id = min(md.id for md in added_idx_md)
|
|
1175
1226
|
for md in added_idx_md:
|
|
1176
|
-
|
|
1177
|
-
del self.
|
|
1178
|
-
self.next_idx_id = next_idx_id
|
|
1227
|
+
# TODO: drop the index
|
|
1228
|
+
del self._tbl_md.index_md[md.id]
|
|
1179
1229
|
|
|
1180
1230
|
# make newly-dropped columns visible again
|
|
1181
|
-
|
|
1182
|
-
|
|
1183
|
-
|
|
1231
|
+
dropped_col_md = [
|
|
1232
|
+
md for md in self._tbl_md.column_md.values() if md.schema_version_drop == self.schema_version
|
|
1233
|
+
]
|
|
1234
|
+
for col_md in dropped_col_md:
|
|
1235
|
+
col_md.schema_version_drop = None
|
|
1184
1236
|
|
|
1185
1237
|
# make newly-dropped indices visible again
|
|
1186
|
-
dropped_idx_md = [
|
|
1187
|
-
|
|
1188
|
-
|
|
1189
|
-
|
|
1190
|
-
|
|
1191
|
-
# we need to determine the preceding schema version and reload the schema
|
|
1192
|
-
schema_version_md_dict = (
|
|
1193
|
-
session.query(schema.TableSchemaVersion.md)
|
|
1194
|
-
.where(schema.TableSchemaVersion.tbl_id == self.id)
|
|
1195
|
-
.where(schema.TableSchemaVersion.schema_version == self.schema_version)
|
|
1196
|
-
.scalar()
|
|
1197
|
-
)
|
|
1198
|
-
preceding_schema_version = schema_version_md_dict['preceding_schema_version']
|
|
1199
|
-
preceding_schema_version_md_dict = (
|
|
1200
|
-
session.query(schema.TableSchemaVersion.md)
|
|
1201
|
-
.where(schema.TableSchemaVersion.tbl_id == self.id)
|
|
1202
|
-
.where(schema.TableSchemaVersion.schema_version == preceding_schema_version)
|
|
1203
|
-
.scalar()
|
|
1204
|
-
)
|
|
1205
|
-
preceding_schema_version_md = schema.md_from_dict(
|
|
1206
|
-
schema.TableSchemaVersionMd, preceding_schema_version_md_dict
|
|
1207
|
-
)
|
|
1208
|
-
tbl_md = self._create_tbl_md()
|
|
1209
|
-
self._init_schema(tbl_md, preceding_schema_version_md)
|
|
1238
|
+
dropped_idx_md = [
|
|
1239
|
+
md for md in self._tbl_md.index_md.values() if md.schema_version_drop == self.schema_version
|
|
1240
|
+
]
|
|
1241
|
+
for idx_md in dropped_idx_md:
|
|
1242
|
+
idx_md.schema_version_drop = None
|
|
1210
1243
|
|
|
1211
1244
|
conn.execute(
|
|
1212
1245
|
sql.delete(schema.TableSchemaVersion.__table__)
|
|
1213
1246
|
.where(schema.TableSchemaVersion.tbl_id == self.id)
|
|
1214
1247
|
.where(schema.TableSchemaVersion.schema_version == self.schema_version)
|
|
1215
1248
|
)
|
|
1216
|
-
self.
|
|
1217
|
-
self.comment = preceding_schema_version_md.comment
|
|
1218
|
-
self.num_retained_versions = preceding_schema_version_md.num_retained_versions
|
|
1249
|
+
self._tbl_md.current_schema_version = self._schema_version_md.preceding_schema_version
|
|
1219
1250
|
|
|
1220
1251
|
conn.execute(
|
|
1221
1252
|
sql.delete(schema.TableVersion.__table__)
|
|
1222
1253
|
.where(schema.TableVersion.tbl_id == self.id)
|
|
1223
1254
|
.where(schema.TableVersion.version == self.version)
|
|
1224
1255
|
)
|
|
1256
|
+
|
|
1225
1257
|
self.version -= 1
|
|
1226
|
-
|
|
1227
|
-
sql.update(schema.Table.__table__)
|
|
1228
|
-
.values({schema.Table.md: dataclasses.asdict(self._create_tbl_md())})
|
|
1229
|
-
.where(schema.Table.id == self.id)
|
|
1230
|
-
)
|
|
1258
|
+
self._write_md(new_version=False, new_version_ts=0, new_schema_version=False)
|
|
1231
1259
|
|
|
1232
1260
|
# propagate to views
|
|
1261
|
+
views_str = ', '.join([str(v.id) for v in self.mutable_views])
|
|
1262
|
+
print(f'revert(): mutable_views={views_str}')
|
|
1233
1263
|
for view in self.mutable_views:
|
|
1234
1264
|
view.get()._revert()
|
|
1265
|
+
|
|
1266
|
+
# force reload on next operation
|
|
1267
|
+
self.is_validated = False
|
|
1268
|
+
pxt.catalog.Catalog.get().remove_tbl_version(self)
|
|
1235
1269
|
_logger.info(f'TableVersion {self.name}: reverted to version {self.version}')
|
|
1236
1270
|
|
|
1237
|
-
def _init_external_stores(self
|
|
1238
|
-
for store_md in tbl_md.external_stores:
|
|
1271
|
+
def _init_external_stores(self) -> None:
|
|
1272
|
+
for store_md in self.tbl_md.external_stores:
|
|
1239
1273
|
store_cls = resolve_symbol(store_md['class'])
|
|
1240
1274
|
assert isinstance(store_cls, type) and issubclass(store_cls, pxt.io.ExternalStore)
|
|
1241
1275
|
store = store_cls.from_dict(store_md['md'])
|
|
1242
1276
|
self.external_stores[store.name] = store
|
|
1243
1277
|
|
|
1244
1278
|
def link_external_store(self, store: pxt.io.ExternalStore) -> None:
|
|
1245
|
-
|
|
1279
|
+
self.version += 1
|
|
1280
|
+
self.preceding_schema_version = self.schema_version
|
|
1281
|
+
self.schema_version = self.version
|
|
1282
|
+
|
|
1246
1283
|
self.external_stores[store.name] = store
|
|
1247
|
-
self.
|
|
1284
|
+
self._tbl_md.external_stores.append(
|
|
1285
|
+
{'class': f'{type(store).__module__}.{type(store).__qualname__}', 'md': store.as_dict()}
|
|
1286
|
+
)
|
|
1287
|
+
self._write_md(new_version=True, new_version_ts=time.time(), new_schema_version=True)
|
|
1288
|
+
|
|
1289
|
+
def unlink_external_store(self, store: pxt.io.ExternalStore) -> None:
|
|
1290
|
+
del self.external_stores[store.name]
|
|
1291
|
+
self.version += 1
|
|
1292
|
+
self.preceding_schema_version = self.schema_version
|
|
1293
|
+
self.schema_version = self.version
|
|
1294
|
+
idx = next(i for i, store_md in enumerate(self._tbl_md.external_stores) if store_md['md']['name'] == store.name)
|
|
1295
|
+
self._tbl_md.external_stores.pop(idx)
|
|
1296
|
+
self._write_md(new_version=True, new_version_ts=time.time(), new_schema_version=True)
|
|
1297
|
+
|
|
1298
|
+
@property
|
|
1299
|
+
def tbl_md(self) -> schema.TableMd:
|
|
1300
|
+
return self._tbl_md
|
|
1301
|
+
|
|
1302
|
+
@property
|
|
1303
|
+
def schema_version_md(self) -> schema.TableSchemaVersionMd:
|
|
1304
|
+
return self._schema_version_md
|
|
1305
|
+
|
|
1306
|
+
@property
|
|
1307
|
+
def view_md(self) -> Optional[schema.ViewMd]:
|
|
1308
|
+
return self._tbl_md.view_md
|
|
1309
|
+
|
|
1310
|
+
@property
|
|
1311
|
+
def name(self) -> str:
|
|
1312
|
+
return self._tbl_md.name
|
|
1313
|
+
|
|
1314
|
+
@property
|
|
1315
|
+
def user(self) -> Optional[str]:
|
|
1316
|
+
return self._tbl_md.user
|
|
1317
|
+
|
|
1318
|
+
@property
|
|
1319
|
+
def is_replica(self) -> bool:
|
|
1320
|
+
return self._tbl_md.is_replica
|
|
1321
|
+
|
|
1322
|
+
@property
|
|
1323
|
+
def comment(self) -> str:
|
|
1324
|
+
return self._schema_version_md.comment
|
|
1325
|
+
|
|
1326
|
+
@comment.setter
|
|
1327
|
+
def comment(self, c: str) -> None:
|
|
1328
|
+
assert self.effective_version is None
|
|
1329
|
+
self._schema_version_md.comment = c
|
|
1330
|
+
|
|
1331
|
+
@property
|
|
1332
|
+
def num_retained_versions(self) -> int:
|
|
1333
|
+
return self._schema_version_md.num_retained_versions
|
|
1334
|
+
|
|
1335
|
+
@num_retained_versions.setter
|
|
1336
|
+
def num_retained_versions(self, n: int) -> None:
|
|
1337
|
+
assert self.effective_version is None
|
|
1338
|
+
self._schema_version_md.num_retained_versions = n
|
|
1339
|
+
|
|
1340
|
+
@property
|
|
1341
|
+
def version(self) -> int:
|
|
1342
|
+
# if this is a snapshot instance, we need to ignore current_version
|
|
1343
|
+
return self._tbl_md.current_version if self.effective_version is None else self.effective_version
|
|
1344
|
+
|
|
1345
|
+
@version.setter
|
|
1346
|
+
def version(self, version: int) -> None:
|
|
1347
|
+
assert self.effective_version is None
|
|
1348
|
+
self._tbl_md.current_version = version
|
|
1349
|
+
|
|
1350
|
+
@property
|
|
1351
|
+
def schema_version(self) -> int:
|
|
1352
|
+
return self._schema_version_md.schema_version
|
|
1353
|
+
|
|
1354
|
+
@schema_version.setter
|
|
1355
|
+
def schema_version(self, version: int) -> None:
|
|
1356
|
+
assert self.effective_version is None
|
|
1357
|
+
self._tbl_md.current_schema_version = version
|
|
1358
|
+
self._schema_version_md.schema_version = version
|
|
1359
|
+
|
|
1360
|
+
@property
|
|
1361
|
+
def preceding_schema_version(self) -> int:
|
|
1362
|
+
return self._schema_version_md.preceding_schema_version
|
|
1363
|
+
|
|
1364
|
+
@preceding_schema_version.setter
|
|
1365
|
+
def preceding_schema_version(self, v: int) -> None:
|
|
1366
|
+
assert self.effective_version is None
|
|
1367
|
+
self._schema_version_md.preceding_schema_version = v
|
|
1368
|
+
|
|
1369
|
+
@property
|
|
1370
|
+
def media_validation(self) -> MediaValidation:
|
|
1371
|
+
return MediaValidation[self._schema_version_md.media_validation.upper()]
|
|
1372
|
+
|
|
1373
|
+
@property
|
|
1374
|
+
def next_col_id(self) -> int:
|
|
1375
|
+
return self._tbl_md.next_col_id
|
|
1376
|
+
|
|
1377
|
+
@next_col_id.setter
|
|
1378
|
+
def next_col_id(self, id: int) -> None:
|
|
1379
|
+
assert self.effective_version is None
|
|
1380
|
+
self._tbl_md.next_col_id = id
|
|
1248
1381
|
|
|
1249
|
-
|
|
1250
|
-
|
|
1251
|
-
|
|
1252
|
-
|
|
1253
|
-
|
|
1254
|
-
|
|
1382
|
+
@property
|
|
1383
|
+
def next_idx_id(self) -> int:
|
|
1384
|
+
return self._tbl_md.next_idx_id
|
|
1385
|
+
|
|
1386
|
+
@next_idx_id.setter
|
|
1387
|
+
def next_idx_id(self, id: int) -> None:
|
|
1388
|
+
assert self.effective_version is None
|
|
1389
|
+
self._tbl_md.next_idx_id = id
|
|
1390
|
+
|
|
1391
|
+
@property
|
|
1392
|
+
def next_row_id(self) -> int:
|
|
1393
|
+
return self._tbl_md.next_row_id
|
|
1255
1394
|
|
|
1256
|
-
|
|
1257
|
-
|
|
1395
|
+
@next_row_id.setter
|
|
1396
|
+
def next_row_id(self, id: int) -> None:
|
|
1397
|
+
assert self.effective_version is None
|
|
1398
|
+
self._tbl_md.next_row_id = id
|
|
1258
1399
|
|
|
1259
1400
|
@property
|
|
1260
1401
|
def is_snapshot(self) -> bool:
|
|
1261
1402
|
return self.effective_version is not None
|
|
1262
1403
|
|
|
1404
|
+
@property
|
|
1405
|
+
def is_mutable(self) -> bool:
|
|
1406
|
+
return not self.is_snapshot and not self.is_replica
|
|
1407
|
+
|
|
1263
1408
|
@property
|
|
1264
1409
|
def is_view(self) -> bool:
|
|
1265
1410
|
return self.view_md is not None
|
|
@@ -1272,6 +1417,7 @@ class TableVersion:
|
|
|
1272
1417
|
def is_component_view(self) -> bool:
|
|
1273
1418
|
return self.iterator_cls is not None
|
|
1274
1419
|
|
|
1420
|
+
@property
|
|
1275
1421
|
def is_insertable(self) -> bool:
|
|
1276
1422
|
"""Returns True if this corresponds to an InsertableTable"""
|
|
1277
1423
|
return not self.is_snapshot and not self.is_view
|
|
@@ -1309,18 +1455,6 @@ class TableVersion:
|
|
|
1309
1455
|
names = [c.name for c in self.cols_by_name.values() if c.is_computed]
|
|
1310
1456
|
return names
|
|
1311
1457
|
|
|
1312
|
-
def _record_refd_columns(self, col: Column) -> None:
|
|
1313
|
-
"""Update Column.dependent_cols for all cols referenced in col.value_expr."""
|
|
1314
|
-
from pixeltable import exprs
|
|
1315
|
-
|
|
1316
|
-
if col.value_expr_dict is not None:
|
|
1317
|
-
# if we have a value_expr_dict, use that instead of instantiating the value_expr
|
|
1318
|
-
refd_cols = exprs.Expr.get_refd_columns(col.value_expr_dict)
|
|
1319
|
-
else:
|
|
1320
|
-
refd_cols = [e.col for e in col.value_expr.subexprs(expr_class=exprs.ColumnRef)]
|
|
1321
|
-
for refd_col in refd_cols:
|
|
1322
|
-
refd_col.dependent_cols.add(col)
|
|
1323
|
-
|
|
1324
1458
|
def get_idx_val_columns(self, cols: Iterable[Column]) -> set[Column]:
|
|
1325
1459
|
result = {info.val_col for col in cols for info in col.get_idx_info().values()}
|
|
1326
1460
|
return result
|
|
@@ -1329,7 +1463,8 @@ class TableVersion:
|
|
|
1329
1463
|
"""
|
|
1330
1464
|
Return the set of columns that transitively depend on any of the given ones.
|
|
1331
1465
|
"""
|
|
1332
|
-
|
|
1466
|
+
cat = pxt.catalog.Catalog.get()
|
|
1467
|
+
result = set().union(*[cat.get_column_dependents(col.tbl.id, col.id) for col in cols])
|
|
1333
1468
|
if len(result) > 0:
|
|
1334
1469
|
result.update(self.get_dependent_columns(result))
|
|
1335
1470
|
return result
|
|
@@ -1363,24 +1498,6 @@ class TableVersion:
|
|
|
1363
1498
|
{'class': f'{type(store).__module__}.{type(store).__qualname__}', 'md': store.as_dict()} for store in stores
|
|
1364
1499
|
]
|
|
1365
1500
|
|
|
1366
|
-
def _create_tbl_md(self) -> schema.TableMd:
|
|
1367
|
-
return schema.TableMd(
|
|
1368
|
-
tbl_id=str(self.id),
|
|
1369
|
-
name=self.name,
|
|
1370
|
-
user=self.user,
|
|
1371
|
-
is_replica=self.is_replica,
|
|
1372
|
-
current_version=self.version,
|
|
1373
|
-
current_schema_version=self.schema_version,
|
|
1374
|
-
next_col_id=self.next_col_id,
|
|
1375
|
-
next_idx_id=self.next_idx_id,
|
|
1376
|
-
next_row_id=self.next_rowid,
|
|
1377
|
-
column_md=self._create_column_md(self.cols),
|
|
1378
|
-
index_md=self.idx_md,
|
|
1379
|
-
external_stores=self._create_stores_md(self.external_stores.values()),
|
|
1380
|
-
view_md=self.view_md,
|
|
1381
|
-
additional_md={},
|
|
1382
|
-
)
|
|
1383
|
-
|
|
1384
1501
|
def _create_version_md(self, timestamp: float) -> schema.TableVersionMd:
|
|
1385
1502
|
return schema.TableVersionMd(
|
|
1386
1503
|
tbl_id=str(self.id),
|