pixeltable 0.3.14__py3-none-any.whl → 0.4.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of pixeltable might be problematic. Click here for more details.
- pixeltable/__init__.py +1 -1
- pixeltable/__version__.py +2 -2
- pixeltable/catalog/__init__.py +9 -1
- pixeltable/catalog/catalog.py +559 -134
- pixeltable/catalog/column.py +36 -32
- pixeltable/catalog/dir.py +1 -2
- pixeltable/catalog/globals.py +12 -0
- pixeltable/catalog/insertable_table.py +30 -25
- pixeltable/catalog/schema_object.py +9 -6
- pixeltable/catalog/table.py +334 -267
- pixeltable/catalog/table_version.py +360 -241
- pixeltable/catalog/table_version_handle.py +18 -2
- pixeltable/catalog/table_version_path.py +86 -23
- pixeltable/catalog/view.py +47 -23
- pixeltable/dataframe.py +198 -19
- pixeltable/env.py +6 -4
- pixeltable/exceptions.py +6 -0
- pixeltable/exec/__init__.py +1 -1
- pixeltable/exec/exec_node.py +2 -0
- pixeltable/exec/expr_eval/evaluators.py +4 -1
- pixeltable/exec/expr_eval/expr_eval_node.py +4 -4
- pixeltable/exec/in_memory_data_node.py +1 -1
- pixeltable/exec/sql_node.py +188 -22
- pixeltable/exprs/column_property_ref.py +16 -6
- pixeltable/exprs/column_ref.py +33 -11
- pixeltable/exprs/comparison.py +1 -1
- pixeltable/exprs/data_row.py +5 -3
- pixeltable/exprs/expr.py +11 -4
- pixeltable/exprs/literal.py +2 -0
- pixeltable/exprs/row_builder.py +4 -6
- pixeltable/exprs/rowid_ref.py +8 -0
- pixeltable/exprs/similarity_expr.py +1 -0
- pixeltable/func/__init__.py +1 -0
- pixeltable/func/mcp.py +74 -0
- pixeltable/func/query_template_function.py +5 -3
- pixeltable/func/tools.py +12 -2
- pixeltable/func/udf.py +2 -2
- pixeltable/functions/__init__.py +1 -0
- pixeltable/functions/anthropic.py +19 -45
- pixeltable/functions/deepseek.py +19 -38
- pixeltable/functions/fireworks.py +9 -18
- pixeltable/functions/gemini.py +165 -33
- pixeltable/functions/groq.py +108 -0
- pixeltable/functions/llama_cpp.py +6 -6
- pixeltable/functions/math.py +63 -0
- pixeltable/functions/mistralai.py +16 -53
- pixeltable/functions/ollama.py +1 -1
- pixeltable/functions/openai.py +82 -165
- pixeltable/functions/string.py +212 -58
- pixeltable/functions/together.py +22 -80
- pixeltable/globals.py +10 -4
- pixeltable/index/base.py +5 -0
- pixeltable/index/btree.py +5 -0
- pixeltable/index/embedding_index.py +5 -0
- pixeltable/io/external_store.py +10 -31
- pixeltable/io/label_studio.py +5 -5
- pixeltable/io/parquet.py +4 -4
- pixeltable/io/table_data_conduit.py +1 -32
- pixeltable/metadata/__init__.py +11 -2
- pixeltable/metadata/converters/convert_13.py +2 -2
- pixeltable/metadata/converters/convert_30.py +6 -11
- pixeltable/metadata/converters/convert_35.py +9 -0
- pixeltable/metadata/converters/convert_36.py +38 -0
- pixeltable/metadata/converters/convert_37.py +15 -0
- pixeltable/metadata/converters/util.py +3 -9
- pixeltable/metadata/notes.py +3 -0
- pixeltable/metadata/schema.py +13 -1
- pixeltable/plan.py +135 -12
- pixeltable/share/packager.py +321 -20
- pixeltable/share/publish.py +2 -2
- pixeltable/store.py +31 -13
- pixeltable/type_system.py +30 -0
- pixeltable/utils/dbms.py +1 -1
- pixeltable/utils/formatter.py +64 -42
- {pixeltable-0.3.14.dist-info → pixeltable-0.4.0.dist-info}/METADATA +2 -1
- {pixeltable-0.3.14.dist-info → pixeltable-0.4.0.dist-info}/RECORD +79 -74
- {pixeltable-0.3.14.dist-info → pixeltable-0.4.0.dist-info}/LICENSE +0 -0
- {pixeltable-0.3.14.dist-info → pixeltable-0.4.0.dist-info}/WHEEL +0 -0
- {pixeltable-0.3.14.dist-info → pixeltable-0.4.0.dist-info}/entry_points.txt +0 -0
|
@@ -1,5 +1,6 @@
|
|
|
1
1
|
from __future__ import annotations
|
|
2
2
|
|
|
3
|
+
import copy
|
|
3
4
|
import dataclasses
|
|
4
5
|
import importlib
|
|
5
6
|
import logging
|
|
@@ -22,6 +23,10 @@ from pixeltable.utils.exception_handler import run_cleanup_on_exception
|
|
|
22
23
|
from pixeltable.utils.filecache import FileCache
|
|
23
24
|
from pixeltable.utils.media_store import MediaStore
|
|
24
25
|
|
|
26
|
+
if TYPE_CHECKING:
|
|
27
|
+
from pixeltable.plan import SampleClause
|
|
28
|
+
|
|
29
|
+
|
|
25
30
|
from ..func.globals import resolve_symbol
|
|
26
31
|
from .column import Column
|
|
27
32
|
from .globals import _POS_COLUMN_NAME, _ROWID_COLUMN_NAME, MediaValidation, UpdateStatus, is_valid_identifier
|
|
@@ -50,43 +55,46 @@ class TableVersion:
|
|
|
50
55
|
|
|
51
56
|
Instances of TableVersion should not be stored as member variables (ie, used across transaction boundaries).
|
|
52
57
|
Use a TableVersionHandle instead.
|
|
58
|
+
|
|
59
|
+
Only TableVersion and Catalog interact directly with stored metadata. Everything else needs to go through these
|
|
60
|
+
two classes.
|
|
53
61
|
"""
|
|
54
62
|
|
|
55
63
|
id: UUID
|
|
56
|
-
|
|
57
|
-
|
|
64
|
+
|
|
65
|
+
# record metadata stored in catalog
|
|
66
|
+
_tbl_md: schema.TableMd
|
|
67
|
+
_schema_version_md: schema.TableSchemaVersionMd
|
|
68
|
+
|
|
58
69
|
effective_version: Optional[int]
|
|
59
|
-
is_replica: bool
|
|
60
|
-
version: int
|
|
61
|
-
comment: str
|
|
62
|
-
media_validation: MediaValidation
|
|
63
|
-
num_retained_versions: int
|
|
64
|
-
schema_version: int
|
|
65
|
-
view_md: Optional[schema.ViewMd]
|
|
66
70
|
path: Optional[pxt.catalog.TableVersionPath] # only set for live tables; needed to resolve computed cols
|
|
67
71
|
base: Optional[TableVersionHandle] # only set for views
|
|
68
|
-
next_col_id: int
|
|
69
|
-
next_idx_id: int
|
|
70
|
-
next_rowid: int
|
|
71
72
|
predicate: Optional[exprs.Expr]
|
|
72
|
-
|
|
73
|
+
sample_clause: Optional['SampleClause']
|
|
74
|
+
|
|
73
75
|
iterator_cls: Optional[type[ComponentIterator]]
|
|
74
76
|
iterator_args: Optional[exprs.InlineDict]
|
|
75
77
|
num_iterator_cols: int
|
|
76
78
|
|
|
79
|
+
# target for data operation propagation (only set for non-snapshots, and only records non-snapshot views)
|
|
80
|
+
mutable_views: set[TableVersionHandle]
|
|
81
|
+
|
|
77
82
|
# contains complete history of columns, incl dropped ones
|
|
78
83
|
cols: list[Column]
|
|
79
84
|
# contains only user-facing (named) columns visible in this version
|
|
80
85
|
cols_by_name: dict[str, Column]
|
|
81
86
|
# contains only columns visible in this version, both system and user
|
|
82
87
|
cols_by_id: dict[int, Column]
|
|
83
|
-
# needed for _create_tbl_md()
|
|
84
|
-
idx_md: dict[int, schema.IndexMd]
|
|
85
88
|
# contains only actively maintained indices
|
|
86
89
|
idxs_by_name: dict[str, TableVersion.IndexInfo]
|
|
87
90
|
|
|
88
91
|
external_stores: dict[str, pxt.io.ExternalStore]
|
|
89
|
-
store_tbl: 'store.StoreBase'
|
|
92
|
+
store_tbl: Optional['store.StoreBase']
|
|
93
|
+
|
|
94
|
+
# used by Catalog to invalidate cached instances at the end of a transaction;
|
|
95
|
+
# True if this instance reflects the state of stored metadata in the context of this transaction and
|
|
96
|
+
# it is the instance cached in Catalog
|
|
97
|
+
is_validated: bool
|
|
90
98
|
|
|
91
99
|
@dataclasses.dataclass
|
|
92
100
|
class IndexInfo:
|
|
@@ -106,21 +114,15 @@ class TableVersion:
|
|
|
106
114
|
mutable_views: list[TableVersionHandle],
|
|
107
115
|
base_path: Optional[pxt.catalog.TableVersionPath] = None,
|
|
108
116
|
base: Optional[TableVersionHandle] = None,
|
|
109
|
-
# base_store_tbl: Optional['store.StoreBase'] = None,
|
|
110
117
|
):
|
|
118
|
+
self.is_validated = True # a freshly constructed instance is always valid
|
|
111
119
|
self.id = id
|
|
112
|
-
self.
|
|
113
|
-
self.
|
|
120
|
+
self._tbl_md = copy.deepcopy(tbl_md)
|
|
121
|
+
self._schema_version_md = copy.deepcopy(schema_version_md)
|
|
114
122
|
self.effective_version = effective_version
|
|
115
|
-
self.version = tbl_md.current_version if effective_version is None else effective_version
|
|
116
|
-
self.is_replica = tbl_md.is_replica
|
|
117
|
-
self.comment = schema_version_md.comment
|
|
118
|
-
self.num_retained_versions = schema_version_md.num_retained_versions
|
|
119
|
-
self.schema_version = schema_version_md.schema_version
|
|
120
|
-
self.view_md = tbl_md.view_md # save this as-is, it's needed for _create_md()
|
|
121
|
-
self.media_validation = MediaValidation[schema_version_md.media_validation.upper()]
|
|
122
123
|
assert not (self.is_view and base is None)
|
|
123
124
|
self.base = base
|
|
125
|
+
self.store_tbl = None
|
|
124
126
|
|
|
125
127
|
# mutable tables need their TableVersionPath for expr eval during updates
|
|
126
128
|
from .table_version_handle import TableVersionHandle
|
|
@@ -134,22 +136,14 @@ class TableVersion:
|
|
|
134
136
|
assert base_path is not None
|
|
135
137
|
self.path = TableVersionPath(self_handle, base=base_path)
|
|
136
138
|
|
|
137
|
-
if self.is_snapshot:
|
|
138
|
-
self.next_col_id = -1
|
|
139
|
-
self.next_idx_id = -1 # TODO: can snapshots have separate indices?
|
|
140
|
-
self.next_rowid = -1
|
|
141
|
-
else:
|
|
142
|
-
assert tbl_md.current_version == self.version
|
|
143
|
-
self.next_col_id = tbl_md.next_col_id
|
|
144
|
-
self.next_idx_id = tbl_md.next_idx_id
|
|
145
|
-
self.next_rowid = tbl_md.next_row_id
|
|
146
|
-
|
|
147
139
|
# view-specific initialization
|
|
148
140
|
from pixeltable import exprs
|
|
141
|
+
from pixeltable.plan import SampleClause
|
|
149
142
|
|
|
150
143
|
predicate_dict = None if self.view_md is None or self.view_md.predicate is None else self.view_md.predicate
|
|
151
144
|
self.predicate = exprs.Expr.from_dict(predicate_dict) if predicate_dict is not None else None
|
|
152
|
-
self.
|
|
145
|
+
sample_dict = None if self.view_md is None or self.view_md.sample_clause is None else self.view_md.sample_clause
|
|
146
|
+
self.sample_clause = SampleClause.from_dict(sample_dict) if sample_dict is not None else None
|
|
153
147
|
|
|
154
148
|
# component view-specific initialization
|
|
155
149
|
self.iterator_cls = None
|
|
@@ -164,23 +158,15 @@ class TableVersion:
|
|
|
164
158
|
self.num_iterator_cols = len(output_schema)
|
|
165
159
|
assert tbl_md.view_md.iterator_args is not None
|
|
166
160
|
|
|
167
|
-
|
|
168
|
-
|
|
169
|
-
cat.add_tbl_version(self)
|
|
161
|
+
self.mutable_views = set(mutable_views)
|
|
162
|
+
assert self.is_mutable or len(self.mutable_views) == 0
|
|
170
163
|
|
|
171
|
-
# init schema after we determined whether we're a component view, and before we create the store table
|
|
172
164
|
self.cols = []
|
|
173
165
|
self.cols_by_name = {}
|
|
174
166
|
self.cols_by_id = {}
|
|
175
|
-
self.idx_md = tbl_md.index_md
|
|
176
167
|
self.idxs_by_name = {}
|
|
177
168
|
self.external_stores = {}
|
|
178
169
|
|
|
179
|
-
self._init_schema(tbl_md, schema_version_md)
|
|
180
|
-
|
|
181
|
-
# Init external stores (this needs to happen after the schema is created)
|
|
182
|
-
self._init_external_stores(tbl_md)
|
|
183
|
-
|
|
184
170
|
def __hash__(self) -> int:
|
|
185
171
|
return hash(self.id)
|
|
186
172
|
|
|
@@ -188,19 +174,7 @@ class TableVersion:
|
|
|
188
174
|
"""Create a snapshot copy of this TableVersion"""
|
|
189
175
|
assert not self.is_snapshot
|
|
190
176
|
base = self.path.base.tbl_version if self.is_view else None
|
|
191
|
-
return TableVersion(
|
|
192
|
-
self.id,
|
|
193
|
-
self._create_tbl_md(),
|
|
194
|
-
self.version,
|
|
195
|
-
self._create_schema_version_md(preceding_schema_version=0), # preceding_schema_version: dummy value
|
|
196
|
-
mutable_views=[],
|
|
197
|
-
base=base,
|
|
198
|
-
)
|
|
199
|
-
|
|
200
|
-
def create_handle(self) -> TableVersionHandle:
|
|
201
|
-
from .table_version_handle import TableVersionHandle
|
|
202
|
-
|
|
203
|
-
return TableVersionHandle(self.id, self.effective_version, tbl_version=self)
|
|
177
|
+
return TableVersion(self.id, self.tbl_md, self.version, self.schema_version_md, mutable_views=[], base=base)
|
|
204
178
|
|
|
205
179
|
@property
|
|
206
180
|
def versioned_name(self) -> str:
|
|
@@ -248,6 +222,7 @@ class TableVersion:
|
|
|
248
222
|
next_col_id=len(cols),
|
|
249
223
|
next_idx_id=0,
|
|
250
224
|
next_row_id=0,
|
|
225
|
+
view_sn=0,
|
|
251
226
|
column_md=column_md,
|
|
252
227
|
index_md={},
|
|
253
228
|
external_stores=[],
|
|
@@ -292,7 +267,13 @@ class TableVersion:
|
|
|
292
267
|
|
|
293
268
|
# if this is purely a snapshot (it doesn't require any additional storage for columns and it doesn't have a
|
|
294
269
|
# predicate to apply at runtime), we don't create a physical table and simply use the base's table version path
|
|
295
|
-
if
|
|
270
|
+
if (
|
|
271
|
+
view_md is not None
|
|
272
|
+
and view_md.is_snapshot
|
|
273
|
+
and view_md.predicate is None
|
|
274
|
+
and view_md.sample_clause is None
|
|
275
|
+
and len(cols) == 0
|
|
276
|
+
):
|
|
296
277
|
session.add(tbl_record)
|
|
297
278
|
session.add(tbl_version_record)
|
|
298
279
|
session.add(schema_version_record)
|
|
@@ -306,8 +287,19 @@ class TableVersion:
|
|
|
306
287
|
tbl_version = cls(
|
|
307
288
|
tbl_record.id, table_md, effective_version, schema_version_md, [], base_path=base_path, base=base
|
|
308
289
|
)
|
|
309
|
-
|
|
290
|
+
# TODO: break this up, so that Catalog.create_table() registers tbl_version
|
|
291
|
+
cat = pxt.catalog.Catalog.get()
|
|
292
|
+
cat._tbl_versions[tbl_record.id, effective_version] = tbl_version
|
|
293
|
+
tbl_version.init()
|
|
310
294
|
tbl_version.store_tbl.create()
|
|
295
|
+
is_mutable = not is_snapshot and not table_md.is_replica
|
|
296
|
+
if base is not None and base.get().is_mutable and is_mutable:
|
|
297
|
+
from .table_version_handle import TableVersionHandle
|
|
298
|
+
|
|
299
|
+
handle = TableVersionHandle(tbl_version.id, effective_version)
|
|
300
|
+
assert handle not in base.get().mutable_views
|
|
301
|
+
base.get().mutable_views.add(handle)
|
|
302
|
+
|
|
311
303
|
if view_md is None or not view_md.is_snapshot:
|
|
312
304
|
# add default indices, after creating the store table
|
|
313
305
|
for col in tbl_version.cols_by_name.values():
|
|
@@ -315,7 +307,7 @@ class TableVersion:
|
|
|
315
307
|
assert status is None or status.num_excs == 0
|
|
316
308
|
|
|
317
309
|
# we re-create the tbl_record here, now that we have new index metadata
|
|
318
|
-
tbl_record = schema.Table(id=tbl_id, dir_id=dir_id, md=dataclasses.asdict(tbl_version.
|
|
310
|
+
tbl_record = schema.Table(id=tbl_id, dir_id=dir_id, md=dataclasses.asdict(tbl_version.tbl_md))
|
|
319
311
|
session.add(tbl_record)
|
|
320
312
|
session.add(tbl_version_record)
|
|
321
313
|
session.add(schema_version_record)
|
|
@@ -324,45 +316,77 @@ class TableVersion:
|
|
|
324
316
|
@classmethod
|
|
325
317
|
def create_replica(cls, md: schema.FullTableMd) -> TableVersion:
|
|
326
318
|
tbl_id = UUID(md.tbl_md.tbl_id)
|
|
319
|
+
_logger.info(f'Creating replica table version {tbl_id}:{md.version_md.version}.')
|
|
327
320
|
view_md = md.tbl_md.view_md
|
|
328
321
|
base_path = pxt.catalog.TableVersionPath.from_md(view_md.base_versions) if view_md is not None else None
|
|
329
322
|
base = base_path.tbl_version if base_path is not None else None
|
|
330
323
|
tbl_version = cls(
|
|
331
324
|
tbl_id, md.tbl_md, md.version_md.version, md.schema_version_md, [], base_path=base_path, base=base
|
|
332
325
|
)
|
|
326
|
+
cat = pxt.catalog.Catalog.get()
|
|
327
|
+
cat._tbl_versions[tbl_version.id, tbl_version.effective_version] = tbl_version
|
|
328
|
+
tbl_version.init()
|
|
333
329
|
tbl_version.store_tbl.create()
|
|
330
|
+
tbl_version.store_tbl.ensure_columns_exist(col for col in tbl_version.cols if col.is_stored)
|
|
334
331
|
return tbl_version
|
|
335
332
|
|
|
336
333
|
def drop(self) -> None:
|
|
337
|
-
|
|
334
|
+
if self.is_view and self.is_mutable:
|
|
335
|
+
# update mutable_views
|
|
336
|
+
# TODO: invalidate base to force reload
|
|
337
|
+
from .table_version_handle import TableVersionHandle
|
|
338
338
|
|
|
339
|
-
|
|
339
|
+
assert self.base is not None
|
|
340
|
+
if self.base.get().is_mutable:
|
|
341
|
+
self.base.get().mutable_views.remove(TableVersionHandle.create(self))
|
|
342
|
+
|
|
343
|
+
# cat = Catalog.get()
|
|
340
344
|
# delete this table and all associated data
|
|
341
345
|
MediaStore.delete(self.id)
|
|
342
346
|
FileCache.get().clear(tbl_id=self.id)
|
|
343
|
-
cat.delete_tbl_md(self.id)
|
|
347
|
+
# cat.delete_tbl_md(self.id)
|
|
344
348
|
self.store_tbl.drop()
|
|
345
349
|
# de-register table version from catalog
|
|
346
|
-
cat.remove_tbl_version(self)
|
|
350
|
+
# cat.remove_tbl_version(self)
|
|
351
|
+
|
|
352
|
+
def init(self) -> None:
|
|
353
|
+
"""
|
|
354
|
+
Initialize schema-related in-memory metadata separately, now that this TableVersion instance is visible
|
|
355
|
+
in Catalog.
|
|
356
|
+
"""
|
|
357
|
+
from .catalog import Catalog
|
|
358
|
+
|
|
359
|
+
cat = Catalog.get()
|
|
360
|
+
assert (self.id, self.effective_version) in cat._tbl_versions
|
|
361
|
+
self._init_schema()
|
|
362
|
+
if not self.is_snapshot:
|
|
363
|
+
cat.record_column_dependencies(self)
|
|
347
364
|
|
|
348
|
-
|
|
365
|
+
# init external stores; this needs to happen after the schema is created
|
|
366
|
+
self._init_external_stores()
|
|
367
|
+
|
|
368
|
+
def _init_schema(self) -> None:
|
|
349
369
|
# create columns first, so the indices can reference them
|
|
350
|
-
self._init_cols(
|
|
370
|
+
self._init_cols()
|
|
351
371
|
if not self.is_snapshot:
|
|
352
|
-
self._init_idxs(
|
|
372
|
+
self._init_idxs()
|
|
353
373
|
# create the sa schema only after creating the columns and indices
|
|
354
374
|
self._init_sa_schema()
|
|
355
375
|
|
|
356
|
-
|
|
376
|
+
# created value_exprs after everything else has been initialized
|
|
377
|
+
for col in self.cols_by_id.values():
|
|
378
|
+
col.init_value_expr()
|
|
379
|
+
|
|
380
|
+
def _init_cols(self) -> None:
|
|
357
381
|
"""Initialize self.cols with the columns visible in our effective version"""
|
|
358
382
|
self.cols = []
|
|
359
383
|
self.cols_by_name = {}
|
|
360
384
|
self.cols_by_id = {}
|
|
361
385
|
# Sort columns in column_md by the position specified in col_md.id to guarantee that all references
|
|
362
386
|
# point backward.
|
|
363
|
-
sorted_column_md = sorted(tbl_md.column_md.values(), key=lambda item: item.id)
|
|
387
|
+
sorted_column_md = sorted(self.tbl_md.column_md.values(), key=lambda item: item.id)
|
|
364
388
|
for col_md in sorted_column_md:
|
|
365
|
-
schema_col_md = schema_version_md.columns.get(col_md.id)
|
|
389
|
+
schema_col_md = self.schema_version_md.columns.get(col_md.id)
|
|
366
390
|
col_name = schema_col_md.name if schema_col_md is not None else None
|
|
367
391
|
media_val = (
|
|
368
392
|
MediaValidation[schema_col_md.media_validation.upper()]
|
|
@@ -379,8 +403,9 @@ class TableVersion:
|
|
|
379
403
|
schema_version_add=col_md.schema_version_add,
|
|
380
404
|
schema_version_drop=col_md.schema_version_drop,
|
|
381
405
|
value_expr_dict=col_md.value_expr,
|
|
406
|
+
tbl=self,
|
|
382
407
|
)
|
|
383
|
-
col.tbl = self
|
|
408
|
+
col.tbl = self
|
|
384
409
|
self.cols.append(col)
|
|
385
410
|
|
|
386
411
|
# populate the lookup structures before Expr.from_dict()
|
|
@@ -394,17 +419,17 @@ class TableVersion:
|
|
|
394
419
|
self.cols_by_name[col.name] = col
|
|
395
420
|
self.cols_by_id[col.id] = col
|
|
396
421
|
|
|
397
|
-
# make sure to traverse columns ordered by position = order in which cols were created;
|
|
398
|
-
# this guarantees that references always point backwards
|
|
399
|
-
if not self.is_snapshot and col_md.value_expr is not None:
|
|
400
|
-
|
|
422
|
+
# # make sure to traverse columns ordered by position = order in which cols were created;
|
|
423
|
+
# # this guarantees that references always point backwards
|
|
424
|
+
# if not self.is_snapshot and col_md.value_expr is not None:
|
|
425
|
+
# self._record_refd_columns(col)
|
|
401
426
|
|
|
402
|
-
def _init_idxs(self
|
|
403
|
-
self.idx_md = tbl_md.index_md
|
|
427
|
+
def _init_idxs(self) -> None:
|
|
428
|
+
# self.idx_md = tbl_md.index_md
|
|
404
429
|
self.idxs_by_name = {}
|
|
405
430
|
import pixeltable.index as index_module
|
|
406
431
|
|
|
407
|
-
for md in tbl_md.index_md.values():
|
|
432
|
+
for md in self.tbl_md.index_md.values():
|
|
408
433
|
if md.schema_version_add > self.schema_version or (
|
|
409
434
|
md.schema_version_drop is not None and md.schema_version_drop <= self.schema_version
|
|
410
435
|
):
|
|
@@ -439,33 +464,32 @@ class TableVersion:
|
|
|
439
464
|
else:
|
|
440
465
|
self.store_tbl = StoreTable(self)
|
|
441
466
|
|
|
442
|
-
def
|
|
443
|
-
self, timestamp: float, update_tbl_version: bool = True, preceding_schema_version: Optional[int] = None
|
|
444
|
-
) -> None:
|
|
467
|
+
def _write_md(self, new_version: bool, new_version_ts: float, new_schema_version: bool) -> None:
|
|
445
468
|
"""Writes table metadata to the database.
|
|
446
469
|
|
|
447
470
|
Args:
|
|
448
471
|
timestamp: timestamp of the change
|
|
449
|
-
conn: database connection to use
|
|
450
472
|
update_tbl_version: if `True`, will also write `TableVersion` metadata
|
|
451
473
|
preceding_schema_version: if specified, will also write `TableSchemaVersion` metadata, recording the
|
|
452
474
|
specified preceding schema version
|
|
453
475
|
"""
|
|
454
|
-
assert update_tbl_version or preceding_schema_version is None
|
|
455
476
|
from pixeltable.catalog import Catalog
|
|
456
477
|
|
|
457
|
-
|
|
458
|
-
|
|
459
|
-
|
|
460
|
-
|
|
478
|
+
version_md: Optional[schema.TableVersionMd] = (
|
|
479
|
+
schema.TableVersionMd(
|
|
480
|
+
tbl_id=str(self.id),
|
|
481
|
+
created_at=new_version_ts,
|
|
482
|
+
version=self.version,
|
|
483
|
+
schema_version=self.schema_version,
|
|
484
|
+
additional_md={},
|
|
485
|
+
)
|
|
486
|
+
if new_version
|
|
487
|
+
else None
|
|
461
488
|
)
|
|
462
489
|
|
|
463
|
-
Catalog.get().store_tbl_md(
|
|
464
|
-
|
|
465
|
-
|
|
466
|
-
"""Ensure that table metadata is loaded."""
|
|
467
|
-
for col in self.cols_by_id.values():
|
|
468
|
-
_ = col.value_expr
|
|
490
|
+
Catalog.get().store_tbl_md(
|
|
491
|
+
self.id, self._tbl_md, version_md, self._schema_version_md if new_schema_version else None
|
|
492
|
+
)
|
|
469
493
|
|
|
470
494
|
def _store_idx_name(self, idx_id: int) -> str:
|
|
471
495
|
"""Return name of index in the store, which needs to be globally unique"""
|
|
@@ -474,10 +498,10 @@ class TableVersion:
|
|
|
474
498
|
def add_index(self, col: Column, idx_name: Optional[str], idx: index.IndexBase) -> UpdateStatus:
|
|
475
499
|
# we're creating a new schema version
|
|
476
500
|
self.version += 1
|
|
477
|
-
preceding_schema_version = self.schema_version
|
|
501
|
+
self.preceding_schema_version = self.schema_version
|
|
478
502
|
self.schema_version = self.version
|
|
479
503
|
status = self._add_index(col, idx_name, idx)
|
|
480
|
-
self.
|
|
504
|
+
self._write_md(new_version=True, new_version_ts=time.time(), new_schema_version=True)
|
|
481
505
|
_logger.info(f'Added index {idx_name} on column {col.name} to table {self.name}')
|
|
482
506
|
return status
|
|
483
507
|
|
|
@@ -522,7 +546,7 @@ class TableVersion:
|
|
|
522
546
|
schema_version_drop=None,
|
|
523
547
|
records_errors=idx.records_value_errors(),
|
|
524
548
|
)
|
|
525
|
-
val_col.tbl = self
|
|
549
|
+
val_col.tbl = self
|
|
526
550
|
val_col.col_type = val_col.col_type.copy(nullable=True)
|
|
527
551
|
self.next_col_id += 1
|
|
528
552
|
|
|
@@ -536,7 +560,7 @@ class TableVersion:
|
|
|
536
560
|
schema_version_drop=None,
|
|
537
561
|
records_errors=False,
|
|
538
562
|
)
|
|
539
|
-
undo_col.tbl = self
|
|
563
|
+
undo_col.tbl = self
|
|
540
564
|
undo_col.col_type = undo_col.col_type.copy(nullable=True)
|
|
541
565
|
self.next_col_id += 1
|
|
542
566
|
return val_col, undo_col
|
|
@@ -551,7 +575,7 @@ class TableVersion:
|
|
|
551
575
|
idx_name = f'idx{idx_id}'
|
|
552
576
|
else:
|
|
553
577
|
assert is_valid_identifier(idx_name)
|
|
554
|
-
assert idx_name not in [i.name for i in self.
|
|
578
|
+
assert idx_name not in [i.name for i in self._tbl_md.index_md.values()]
|
|
555
579
|
# create and register the index metadata
|
|
556
580
|
idx_cls = type(idx)
|
|
557
581
|
idx_md = schema.IndexMd(
|
|
@@ -567,7 +591,7 @@ class TableVersion:
|
|
|
567
591
|
init_args=idx.as_dict(),
|
|
568
592
|
)
|
|
569
593
|
idx_info = self.IndexInfo(id=idx_id, name=idx_name, idx=idx, col=col, val_col=val_col, undo_col=undo_col)
|
|
570
|
-
self.
|
|
594
|
+
self._tbl_md.index_md[idx_id] = idx_md
|
|
571
595
|
self.idxs_by_name[idx_name] = idx_info
|
|
572
596
|
try:
|
|
573
597
|
idx.create_index(self._store_idx_name(idx_id), val_col)
|
|
@@ -576,7 +600,7 @@ class TableVersion:
|
|
|
576
600
|
def cleanup_index() -> None:
|
|
577
601
|
"""Delete the newly added in-memory index structure"""
|
|
578
602
|
del self.idxs_by_name[idx_name]
|
|
579
|
-
del self.
|
|
603
|
+
del self._tbl_md.index_md[idx_id]
|
|
580
604
|
self.next_idx_id = idx_id
|
|
581
605
|
|
|
582
606
|
# Run cleanup only if there has been an exception; otherwise, skip cleanup.
|
|
@@ -594,47 +618,48 @@ class TableVersion:
|
|
|
594
618
|
|
|
595
619
|
def drop_index(self, idx_id: int) -> None:
|
|
596
620
|
assert not self.is_snapshot
|
|
597
|
-
assert idx_id in self.
|
|
621
|
+
assert idx_id in self._tbl_md.index_md
|
|
598
622
|
|
|
599
623
|
# we're creating a new schema version
|
|
600
624
|
self.version += 1
|
|
601
|
-
preceding_schema_version = self.schema_version
|
|
625
|
+
self.preceding_schema_version = self.schema_version
|
|
602
626
|
self.schema_version = self.version
|
|
603
|
-
idx_md = self.
|
|
627
|
+
idx_md = self._tbl_md.index_md[idx_id]
|
|
604
628
|
idx_md.schema_version_drop = self.schema_version
|
|
605
629
|
assert idx_md.name in self.idxs_by_name
|
|
606
630
|
idx_info = self.idxs_by_name[idx_md.name]
|
|
607
631
|
# remove this index entry from the active indexes (in memory)
|
|
608
632
|
# and the index metadata (in persistent table metadata)
|
|
633
|
+
# TODO: this is wrong, it breaks revert()
|
|
609
634
|
del self.idxs_by_name[idx_md.name]
|
|
610
|
-
del self.
|
|
635
|
+
del self._tbl_md.index_md[idx_id]
|
|
611
636
|
|
|
612
637
|
self._drop_columns([idx_info.val_col, idx_info.undo_col])
|
|
613
|
-
self.
|
|
638
|
+
self._write_md(new_version=True, new_version_ts=time.time(), new_schema_version=True)
|
|
614
639
|
_logger.info(f'Dropped index {idx_md.name} on table {self.name}')
|
|
615
640
|
|
|
616
641
|
def add_columns(
|
|
617
642
|
self, cols: Iterable[Column], print_stats: bool, on_error: Literal['abort', 'ignore']
|
|
618
643
|
) -> UpdateStatus:
|
|
619
|
-
"""Adds
|
|
644
|
+
"""Adds columns to the table."""
|
|
620
645
|
assert not self.is_snapshot
|
|
621
|
-
assert all(is_valid_identifier(col.name) for col in cols)
|
|
646
|
+
assert all(is_valid_identifier(col.name) for col in cols if col.name is not None)
|
|
622
647
|
assert all(col.stored is not None for col in cols)
|
|
623
|
-
assert all(col.name not in self.cols_by_name for col in cols)
|
|
648
|
+
assert all(col.name not in self.cols_by_name for col in cols if col.name is not None)
|
|
624
649
|
for col in cols:
|
|
625
|
-
col.tbl = self
|
|
650
|
+
col.tbl = self
|
|
626
651
|
col.id = self.next_col_id
|
|
627
652
|
self.next_col_id += 1
|
|
628
653
|
|
|
629
654
|
# we're creating a new schema version
|
|
630
655
|
self.version += 1
|
|
631
|
-
preceding_schema_version = self.schema_version
|
|
656
|
+
self.preceding_schema_version = self.schema_version
|
|
632
657
|
self.schema_version = self.version
|
|
633
658
|
index_cols: dict[Column, tuple[index.BtreeIndex, Column, Column]] = {}
|
|
634
659
|
all_cols: list[Column] = []
|
|
635
660
|
for col in cols:
|
|
636
661
|
all_cols.append(col)
|
|
637
|
-
if self._is_btree_indexable(col):
|
|
662
|
+
if col.name is not None and self._is_btree_indexable(col):
|
|
638
663
|
idx = index.BtreeIndex(col)
|
|
639
664
|
val_col, undo_col = self._create_index_columns(idx)
|
|
640
665
|
index_cols[col] = (idx, val_col, undo_col)
|
|
@@ -642,10 +667,10 @@ class TableVersion:
|
|
|
642
667
|
all_cols.append(undo_col)
|
|
643
668
|
# Add all columns
|
|
644
669
|
status = self._add_columns(all_cols, print_stats=print_stats, on_error=on_error)
|
|
645
|
-
# Create indices and their
|
|
670
|
+
# Create indices and their md records
|
|
646
671
|
for col, (idx, val_col, undo_col) in index_cols.items():
|
|
647
672
|
self._create_index(col, val_col, undo_col, idx_name=None, idx=idx)
|
|
648
|
-
self.
|
|
673
|
+
self._write_md(new_version=True, new_version_ts=time.time(), new_schema_version=True)
|
|
649
674
|
_logger.info(f'Added columns {[col.name for col in cols]} to table {self.name}, new version: {self.version}')
|
|
650
675
|
|
|
651
676
|
msg = (
|
|
@@ -679,9 +704,23 @@ class TableVersion:
|
|
|
679
704
|
if col.name is not None:
|
|
680
705
|
self.cols_by_name[col.name] = col
|
|
681
706
|
self.cols_by_id[col.id] = col
|
|
682
|
-
|
|
683
|
-
|
|
684
|
-
|
|
707
|
+
|
|
708
|
+
# also add to stored md
|
|
709
|
+
self._tbl_md.column_md[col.id] = schema.ColumnMd(
|
|
710
|
+
id=col.id,
|
|
711
|
+
col_type=col.col_type.as_dict(),
|
|
712
|
+
is_pk=col.is_pk,
|
|
713
|
+
schema_version_add=col.schema_version_add,
|
|
714
|
+
schema_version_drop=col.schema_version_drop,
|
|
715
|
+
value_expr=col.value_expr.as_dict() if col.value_expr is not None else None,
|
|
716
|
+
stored=col.stored,
|
|
717
|
+
)
|
|
718
|
+
if col.name is not None:
|
|
719
|
+
self._schema_version_md.columns[col.id] = schema.SchemaColumn(
|
|
720
|
+
name=col.name,
|
|
721
|
+
pos=len(self.cols_by_name),
|
|
722
|
+
media_validation=col._media_validation.name.lower() if col._media_validation is not None else None,
|
|
723
|
+
)
|
|
685
724
|
|
|
686
725
|
if col.is_stored:
|
|
687
726
|
self.store_tbl.add_column(col)
|
|
@@ -722,14 +761,16 @@ class TableVersion:
|
|
|
722
761
|
run_cleanup_on_exception(cleanup_on_error)
|
|
723
762
|
plan.close()
|
|
724
763
|
|
|
764
|
+
pxt.catalog.Catalog.get().record_column_dependencies(self)
|
|
765
|
+
|
|
725
766
|
if print_stats:
|
|
726
767
|
plan.ctx.profile.print(num_rows=row_count)
|
|
727
|
-
# TODO
|
|
768
|
+
# TODO: what to do about system columns with exceptions?
|
|
728
769
|
return UpdateStatus(
|
|
729
770
|
num_rows=row_count,
|
|
730
771
|
num_computed_values=row_count,
|
|
731
772
|
num_excs=num_excs,
|
|
732
|
-
cols_with_excs=[f'{col.tbl.
|
|
773
|
+
cols_with_excs=[f'{col.tbl.name}.{col.name}' for col in cols_with_excs if col.name is not None],
|
|
733
774
|
)
|
|
734
775
|
|
|
735
776
|
def drop_column(self, col: Column) -> None:
|
|
@@ -739,7 +780,7 @@ class TableVersion:
|
|
|
739
780
|
|
|
740
781
|
# we're creating a new schema version
|
|
741
782
|
self.version += 1
|
|
742
|
-
preceding_schema_version = self.schema_version
|
|
783
|
+
self.preceding_schema_version = self.schema_version
|
|
743
784
|
self.schema_version = self.version
|
|
744
785
|
|
|
745
786
|
# drop this column and all dependent index columns and indices
|
|
@@ -749,15 +790,17 @@ class TableVersion:
|
|
|
749
790
|
if idx_info.col != col:
|
|
750
791
|
continue
|
|
751
792
|
dropped_cols.extend([idx_info.val_col, idx_info.undo_col])
|
|
752
|
-
idx_md = self.
|
|
793
|
+
idx_md = self._tbl_md.index_md[idx_info.id]
|
|
753
794
|
idx_md.schema_version_drop = self.schema_version
|
|
754
795
|
assert idx_md.name in self.idxs_by_name
|
|
755
796
|
dropped_idx_names.append(idx_md.name)
|
|
797
|
+
|
|
756
798
|
# update idxs_by_name
|
|
757
799
|
for idx_name in dropped_idx_names:
|
|
758
800
|
del self.idxs_by_name[idx_name]
|
|
801
|
+
|
|
759
802
|
self._drop_columns(dropped_cols)
|
|
760
|
-
self.
|
|
803
|
+
self._write_md(new_version=True, new_version_ts=time.time(), new_schema_version=True)
|
|
761
804
|
_logger.info(f'Dropped column {col.name} from table {self.name}, new version: {self.version}')
|
|
762
805
|
|
|
763
806
|
def _drop_columns(self, cols: Iterable[Column]) -> None:
|
|
@@ -765,21 +808,23 @@ class TableVersion:
|
|
|
765
808
|
assert not self.is_snapshot
|
|
766
809
|
|
|
767
810
|
for col in cols:
|
|
768
|
-
if col.value_expr is not None:
|
|
769
|
-
# update Column.dependent_cols
|
|
770
|
-
for c in self.cols:
|
|
771
|
-
if c == col:
|
|
772
|
-
break
|
|
773
|
-
c.dependent_cols.discard(col)
|
|
774
|
-
|
|
775
811
|
col.schema_version_drop = self.schema_version
|
|
776
812
|
if col.name is not None:
|
|
777
813
|
assert col.name in self.cols_by_name
|
|
778
814
|
del self.cols_by_name[col.name]
|
|
779
815
|
assert col.id in self.cols_by_id
|
|
780
816
|
del self.cols_by_id[col.id]
|
|
817
|
+
# update stored md
|
|
818
|
+
self._tbl_md.column_md[col.id].schema_version_drop = col.schema_version_drop
|
|
819
|
+
if col.name is not None:
|
|
820
|
+
del self._schema_version_md.columns[col.id]
|
|
821
|
+
|
|
822
|
+
# update positions
|
|
823
|
+
for pos, schema_col in enumerate(self._schema_version_md.columns.values()):
|
|
824
|
+
schema_col.pos = pos
|
|
781
825
|
|
|
782
826
|
self.store_tbl.create_sa_tbl()
|
|
827
|
+
pxt.catalog.Catalog.get().record_column_dependencies(self)
|
|
783
828
|
|
|
784
829
|
def rename_column(self, old_name: str, new_name: str) -> None:
|
|
785
830
|
"""Rename a column."""
|
|
@@ -794,13 +839,14 @@ class TableVersion:
|
|
|
794
839
|
del self.cols_by_name[old_name]
|
|
795
840
|
col.name = new_name
|
|
796
841
|
self.cols_by_name[new_name] = col
|
|
842
|
+
self._schema_version_md.columns[col.id].name = new_name
|
|
797
843
|
|
|
798
844
|
# we're creating a new schema version
|
|
799
845
|
self.version += 1
|
|
800
|
-
preceding_schema_version = self.schema_version
|
|
846
|
+
self.preceding_schema_version = self.schema_version
|
|
801
847
|
self.schema_version = self.version
|
|
802
848
|
|
|
803
|
-
self.
|
|
849
|
+
self._write_md(new_version=True, new_version_ts=time.time(), new_schema_version=True)
|
|
804
850
|
_logger.info(f'Renamed column {old_name} to {new_name} in table {self.name}, new version: {self.version}')
|
|
805
851
|
|
|
806
852
|
def set_comment(self, new_comment: Optional[str]) -> None:
|
|
@@ -819,9 +865,9 @@ class TableVersion:
|
|
|
819
865
|
def _create_schema_version(self) -> None:
|
|
820
866
|
# we're creating a new schema version
|
|
821
867
|
self.version += 1
|
|
822
|
-
preceding_schema_version = self.schema_version
|
|
868
|
+
self.preceding_schema_version = self.schema_version
|
|
823
869
|
self.schema_version = self.version
|
|
824
|
-
self.
|
|
870
|
+
self._write_md(new_version=True, new_version_ts=time.time(), new_schema_version=True)
|
|
825
871
|
_logger.info(f'[{self.name}] Updating table schema to version: {self.version}')
|
|
826
872
|
|
|
827
873
|
def insert(
|
|
@@ -836,7 +882,7 @@ class TableVersion:
|
|
|
836
882
|
"""
|
|
837
883
|
from pixeltable.plan import Planner
|
|
838
884
|
|
|
839
|
-
assert self.is_insertable
|
|
885
|
+
assert self.is_insertable
|
|
840
886
|
assert (rows is None) != (df is None) # Exactly one must be specified
|
|
841
887
|
if rows is not None:
|
|
842
888
|
plan = Planner.create_insert_plan(self, rows, ignore_errors=not fail_on_exception)
|
|
@@ -846,8 +892,8 @@ class TableVersion:
|
|
|
846
892
|
# this is a base table; we generate rowids during the insert
|
|
847
893
|
def rowids() -> Iterator[int]:
|
|
848
894
|
while True:
|
|
849
|
-
rowid = self.
|
|
850
|
-
self.
|
|
895
|
+
rowid = self.next_row_id
|
|
896
|
+
self.next_row_id += 1
|
|
851
897
|
yield rowid
|
|
852
898
|
|
|
853
899
|
return self._insert(plan, time.time(), print_stats=print_stats, rowids=rowids(), abort_on_exc=fail_on_exception)
|
|
@@ -872,7 +918,7 @@ class TableVersion:
|
|
|
872
918
|
result.num_excs = num_excs
|
|
873
919
|
result.num_computed_values += exec_plan.ctx.num_computed_exprs * num_rows
|
|
874
920
|
result.cols_with_excs = [f'{self.name}.{self.cols_by_id[cid].name}' for cid in cols_with_excs]
|
|
875
|
-
self.
|
|
921
|
+
self._write_md(new_version=True, new_version_ts=timestamp, new_schema_version=False)
|
|
876
922
|
|
|
877
923
|
# update views
|
|
878
924
|
for view in self.mutable_views:
|
|
@@ -1036,13 +1082,13 @@ class TableVersion:
|
|
|
1036
1082
|
self.store_tbl.delete_rows(
|
|
1037
1083
|
self.version, base_versions=base_versions, match_on_vmin=True, where_clause=where_clause
|
|
1038
1084
|
)
|
|
1039
|
-
self.
|
|
1085
|
+
self._write_md(new_version=True, new_version_ts=timestamp, new_schema_version=False)
|
|
1040
1086
|
|
|
1041
1087
|
if cascade:
|
|
1042
1088
|
base_versions = [None if plan is None else self.version, *base_versions] # don't update in place
|
|
1043
1089
|
# propagate to views
|
|
1044
1090
|
for view in self.mutable_views:
|
|
1045
|
-
recomputed_cols = [col for col in recomputed_view_cols if col.tbl == view]
|
|
1091
|
+
recomputed_cols = [col for col in recomputed_view_cols if col.tbl.id == view.id]
|
|
1046
1092
|
plan = None
|
|
1047
1093
|
if len(recomputed_cols) > 0:
|
|
1048
1094
|
from pixeltable.plan import Planner
|
|
@@ -1063,7 +1109,7 @@ class TableVersion:
|
|
|
1063
1109
|
Args:
|
|
1064
1110
|
where: a predicate to filter rows to delete.
|
|
1065
1111
|
"""
|
|
1066
|
-
assert self.is_insertable
|
|
1112
|
+
assert self.is_insertable
|
|
1067
1113
|
from pixeltable.exprs import Expr
|
|
1068
1114
|
from pixeltable.plan import Planner
|
|
1069
1115
|
|
|
@@ -1091,14 +1137,22 @@ class TableVersion:
|
|
|
1091
1137
|
Returns:
|
|
1092
1138
|
number of deleted rows
|
|
1093
1139
|
"""
|
|
1140
|
+
# print(f'calling sql_expr()')
|
|
1094
1141
|
sql_where_clause = where.sql_expr(exprs.SqlElementCache()) if where is not None else None
|
|
1142
|
+
# #print(f'sql_where_clause={str(sql_where_clause) if sql_where_clause is not None else None}')
|
|
1143
|
+
# sql_cols: list[sql.Column] = []
|
|
1144
|
+
# def collect_cols(col) -> None:
|
|
1145
|
+
# sql_cols.append(col)
|
|
1146
|
+
# sql.sql.visitors.traverse(sql_where_clause, {}, {'column': collect_cols})
|
|
1147
|
+
# x = [f'{str(c)}:{hash(c)}:{id(c.table)}' for c in sql_cols]
|
|
1148
|
+
# print(f'where_clause cols: {x}')
|
|
1095
1149
|
num_rows = self.store_tbl.delete_rows(
|
|
1096
1150
|
self.version + 1, base_versions=base_versions, match_on_vmin=False, where_clause=sql_where_clause
|
|
1097
1151
|
)
|
|
1098
1152
|
if num_rows > 0:
|
|
1099
1153
|
# we're creating a new version
|
|
1100
1154
|
self.version += 1
|
|
1101
|
-
self.
|
|
1155
|
+
self._write_md(new_version=True, new_version_ts=timestamp, new_schema_version=False)
|
|
1102
1156
|
for view in self.mutable_views:
|
|
1103
1157
|
num_rows += view.get().propagate_delete(
|
|
1104
1158
|
where=None, base_versions=[self.version, *base_versions], timestamp=timestamp
|
|
@@ -1112,17 +1166,13 @@ class TableVersion:
|
|
|
1112
1166
|
raise excs.Error('Cannot revert version 0')
|
|
1113
1167
|
self._revert()
|
|
1114
1168
|
|
|
1115
|
-
def _delete_column(self, col: Column) -> None:
|
|
1116
|
-
"""Physically remove the column from the schema and the store table"""
|
|
1117
|
-
if col.is_stored:
|
|
1118
|
-
self.store_tbl.drop_column(col)
|
|
1119
|
-
self.cols.remove(col)
|
|
1120
|
-
if col.name is not None:
|
|
1121
|
-
del self.cols_by_name[col.name]
|
|
1122
|
-
del self.cols_by_id[col.id]
|
|
1123
|
-
|
|
1124
1169
|
def _revert(self) -> None:
|
|
1125
|
-
"""
|
|
1170
|
+
"""
|
|
1171
|
+
Reverts the stored metadata for this table version and propagates to views.
|
|
1172
|
+
|
|
1173
|
+
Doesn't attempt to revert the in-memory metadata, but instead invalidates this TableVersion instance
|
|
1174
|
+
and relies on Catalog to reload it
|
|
1175
|
+
"""
|
|
1126
1176
|
conn = Env.get().conn
|
|
1127
1177
|
# make sure we don't have a snapshot referencing this version
|
|
1128
1178
|
# (unclear how to express this with sqlalchemy)
|
|
@@ -1155,109 +1205,206 @@ class TableVersion:
|
|
|
1155
1205
|
stmt = sql.update(self.store_tbl.sa_tbl).values(set_clause).where(self.store_tbl.sa_tbl.c.v_max == self.version)
|
|
1156
1206
|
conn.execute(stmt)
|
|
1157
1207
|
|
|
1158
|
-
# revert schema changes
|
|
1208
|
+
# revert schema changes:
|
|
1209
|
+
# - undo changes to self._tbl_md and write that back
|
|
1210
|
+
# - delete newly-added TableVersion/TableSchemaVersion records
|
|
1159
1211
|
if self.version == self.schema_version:
|
|
1160
|
-
# delete newly-added columns
|
|
1212
|
+
# physically delete newly-added columns and remove them from the stored md
|
|
1161
1213
|
added_cols = [col for col in self.cols if col.schema_version_add == self.schema_version]
|
|
1162
1214
|
if len(added_cols) > 0:
|
|
1163
|
-
next_col_id = min(col.id for col in added_cols)
|
|
1215
|
+
self._tbl_md.next_col_id = min(col.id for col in added_cols)
|
|
1164
1216
|
for col in added_cols:
|
|
1165
|
-
|
|
1166
|
-
|
|
1217
|
+
if col.is_stored:
|
|
1218
|
+
self.store_tbl.drop_column(col)
|
|
1219
|
+
del self._tbl_md.column_md[col.id]
|
|
1167
1220
|
|
|
1168
1221
|
# remove newly-added indices from the lookup structures
|
|
1169
1222
|
# (the value and undo columns got removed in the preceding step)
|
|
1170
|
-
added_idx_md = [md for md in self.
|
|
1223
|
+
added_idx_md = [md for md in self._tbl_md.index_md.values() if md.schema_version_add == self.schema_version]
|
|
1171
1224
|
if len(added_idx_md) > 0:
|
|
1172
|
-
next_idx_id = min(md.id for md in added_idx_md)
|
|
1225
|
+
self._tbl_md.next_idx_id = min(md.id for md in added_idx_md)
|
|
1173
1226
|
for md in added_idx_md:
|
|
1174
|
-
|
|
1175
|
-
del self.
|
|
1176
|
-
self.next_idx_id = next_idx_id
|
|
1227
|
+
# TODO: drop the index
|
|
1228
|
+
del self._tbl_md.index_md[md.id]
|
|
1177
1229
|
|
|
1178
1230
|
# make newly-dropped columns visible again
|
|
1179
|
-
|
|
1180
|
-
|
|
1181
|
-
|
|
1231
|
+
dropped_col_md = [
|
|
1232
|
+
md for md in self._tbl_md.column_md.values() if md.schema_version_drop == self.schema_version
|
|
1233
|
+
]
|
|
1234
|
+
for col_md in dropped_col_md:
|
|
1235
|
+
col_md.schema_version_drop = None
|
|
1182
1236
|
|
|
1183
1237
|
# make newly-dropped indices visible again
|
|
1184
|
-
dropped_idx_md = [
|
|
1185
|
-
|
|
1186
|
-
|
|
1187
|
-
|
|
1188
|
-
|
|
1189
|
-
# we need to determine the preceding schema version and reload the schema
|
|
1190
|
-
schema_version_md_dict = (
|
|
1191
|
-
session.query(schema.TableSchemaVersion.md)
|
|
1192
|
-
.where(schema.TableSchemaVersion.tbl_id == self.id)
|
|
1193
|
-
.where(schema.TableSchemaVersion.schema_version == self.schema_version)
|
|
1194
|
-
.scalar()
|
|
1195
|
-
)
|
|
1196
|
-
preceding_schema_version = schema_version_md_dict['preceding_schema_version']
|
|
1197
|
-
preceding_schema_version_md_dict = (
|
|
1198
|
-
session.query(schema.TableSchemaVersion.md)
|
|
1199
|
-
.where(schema.TableSchemaVersion.tbl_id == self.id)
|
|
1200
|
-
.where(schema.TableSchemaVersion.schema_version == preceding_schema_version)
|
|
1201
|
-
.scalar()
|
|
1202
|
-
)
|
|
1203
|
-
preceding_schema_version_md = schema.md_from_dict(
|
|
1204
|
-
schema.TableSchemaVersionMd, preceding_schema_version_md_dict
|
|
1205
|
-
)
|
|
1206
|
-
tbl_md = self._create_tbl_md()
|
|
1207
|
-
self._init_schema(tbl_md, preceding_schema_version_md)
|
|
1238
|
+
dropped_idx_md = [
|
|
1239
|
+
md for md in self._tbl_md.index_md.values() if md.schema_version_drop == self.schema_version
|
|
1240
|
+
]
|
|
1241
|
+
for idx_md in dropped_idx_md:
|
|
1242
|
+
idx_md.schema_version_drop = None
|
|
1208
1243
|
|
|
1209
1244
|
conn.execute(
|
|
1210
1245
|
sql.delete(schema.TableSchemaVersion.__table__)
|
|
1211
1246
|
.where(schema.TableSchemaVersion.tbl_id == self.id)
|
|
1212
1247
|
.where(schema.TableSchemaVersion.schema_version == self.schema_version)
|
|
1213
1248
|
)
|
|
1214
|
-
self.
|
|
1215
|
-
self.comment = preceding_schema_version_md.comment
|
|
1216
|
-
self.num_retained_versions = preceding_schema_version_md.num_retained_versions
|
|
1249
|
+
self._tbl_md.current_schema_version = self._schema_version_md.preceding_schema_version
|
|
1217
1250
|
|
|
1218
1251
|
conn.execute(
|
|
1219
1252
|
sql.delete(schema.TableVersion.__table__)
|
|
1220
1253
|
.where(schema.TableVersion.tbl_id == self.id)
|
|
1221
1254
|
.where(schema.TableVersion.version == self.version)
|
|
1222
1255
|
)
|
|
1256
|
+
|
|
1223
1257
|
self.version -= 1
|
|
1224
|
-
|
|
1225
|
-
sql.update(schema.Table.__table__)
|
|
1226
|
-
.values({schema.Table.md: dataclasses.asdict(self._create_tbl_md())})
|
|
1227
|
-
.where(schema.Table.id == self.id)
|
|
1228
|
-
)
|
|
1258
|
+
self._write_md(new_version=False, new_version_ts=0, new_schema_version=False)
|
|
1229
1259
|
|
|
1230
1260
|
# propagate to views
|
|
1261
|
+
views_str = ', '.join([str(v.id) for v in self.mutable_views])
|
|
1262
|
+
print(f'revert(): mutable_views={views_str}')
|
|
1231
1263
|
for view in self.mutable_views:
|
|
1232
1264
|
view.get()._revert()
|
|
1265
|
+
|
|
1266
|
+
# force reload on next operation
|
|
1267
|
+
self.is_validated = False
|
|
1268
|
+
pxt.catalog.Catalog.get().remove_tbl_version(self)
|
|
1233
1269
|
_logger.info(f'TableVersion {self.name}: reverted to version {self.version}')
|
|
1234
1270
|
|
|
1235
|
-
def _init_external_stores(self
|
|
1236
|
-
for store_md in tbl_md.external_stores:
|
|
1271
|
+
def _init_external_stores(self) -> None:
|
|
1272
|
+
for store_md in self.tbl_md.external_stores:
|
|
1237
1273
|
store_cls = resolve_symbol(store_md['class'])
|
|
1238
1274
|
assert isinstance(store_cls, type) and issubclass(store_cls, pxt.io.ExternalStore)
|
|
1239
1275
|
store = store_cls.from_dict(store_md['md'])
|
|
1240
1276
|
self.external_stores[store.name] = store
|
|
1241
1277
|
|
|
1242
1278
|
def link_external_store(self, store: pxt.io.ExternalStore) -> None:
|
|
1243
|
-
|
|
1279
|
+
self.version += 1
|
|
1280
|
+
self.preceding_schema_version = self.schema_version
|
|
1281
|
+
self.schema_version = self.version
|
|
1282
|
+
|
|
1244
1283
|
self.external_stores[store.name] = store
|
|
1245
|
-
self.
|
|
1284
|
+
self._tbl_md.external_stores.append(
|
|
1285
|
+
{'class': f'{type(store).__module__}.{type(store).__qualname__}', 'md': store.as_dict()}
|
|
1286
|
+
)
|
|
1287
|
+
self._write_md(new_version=True, new_version_ts=time.time(), new_schema_version=True)
|
|
1288
|
+
|
|
1289
|
+
def unlink_external_store(self, store: pxt.io.ExternalStore) -> None:
|
|
1290
|
+
del self.external_stores[store.name]
|
|
1291
|
+
self.version += 1
|
|
1292
|
+
self.preceding_schema_version = self.schema_version
|
|
1293
|
+
self.schema_version = self.version
|
|
1294
|
+
idx = next(i for i, store_md in enumerate(self._tbl_md.external_stores) if store_md['md']['name'] == store.name)
|
|
1295
|
+
self._tbl_md.external_stores.pop(idx)
|
|
1296
|
+
self._write_md(new_version=True, new_version_ts=time.time(), new_schema_version=True)
|
|
1297
|
+
|
|
1298
|
+
@property
|
|
1299
|
+
def tbl_md(self) -> schema.TableMd:
|
|
1300
|
+
return self._tbl_md
|
|
1301
|
+
|
|
1302
|
+
@property
|
|
1303
|
+
def schema_version_md(self) -> schema.TableSchemaVersionMd:
|
|
1304
|
+
return self._schema_version_md
|
|
1305
|
+
|
|
1306
|
+
@property
|
|
1307
|
+
def view_md(self) -> Optional[schema.ViewMd]:
|
|
1308
|
+
return self._tbl_md.view_md
|
|
1309
|
+
|
|
1310
|
+
@property
|
|
1311
|
+
def name(self) -> str:
|
|
1312
|
+
return self._tbl_md.name
|
|
1313
|
+
|
|
1314
|
+
@property
|
|
1315
|
+
def user(self) -> Optional[str]:
|
|
1316
|
+
return self._tbl_md.user
|
|
1317
|
+
|
|
1318
|
+
@property
|
|
1319
|
+
def is_replica(self) -> bool:
|
|
1320
|
+
return self._tbl_md.is_replica
|
|
1321
|
+
|
|
1322
|
+
@property
|
|
1323
|
+
def comment(self) -> str:
|
|
1324
|
+
return self._schema_version_md.comment
|
|
1325
|
+
|
|
1326
|
+
@comment.setter
|
|
1327
|
+
def comment(self, c: str) -> None:
|
|
1328
|
+
assert self.effective_version is None
|
|
1329
|
+
self._schema_version_md.comment = c
|
|
1330
|
+
|
|
1331
|
+
@property
|
|
1332
|
+
def num_retained_versions(self) -> int:
|
|
1333
|
+
return self._schema_version_md.num_retained_versions
|
|
1334
|
+
|
|
1335
|
+
@num_retained_versions.setter
|
|
1336
|
+
def num_retained_versions(self, n: int) -> None:
|
|
1337
|
+
assert self.effective_version is None
|
|
1338
|
+
self._schema_version_md.num_retained_versions = n
|
|
1339
|
+
|
|
1340
|
+
@property
|
|
1341
|
+
def version(self) -> int:
|
|
1342
|
+
# if this is a snapshot instance, we need to ignore current_version
|
|
1343
|
+
return self._tbl_md.current_version if self.effective_version is None else self.effective_version
|
|
1344
|
+
|
|
1345
|
+
@version.setter
|
|
1346
|
+
def version(self, version: int) -> None:
|
|
1347
|
+
assert self.effective_version is None
|
|
1348
|
+
self._tbl_md.current_version = version
|
|
1349
|
+
|
|
1350
|
+
@property
|
|
1351
|
+
def schema_version(self) -> int:
|
|
1352
|
+
return self._schema_version_md.schema_version
|
|
1353
|
+
|
|
1354
|
+
@schema_version.setter
|
|
1355
|
+
def schema_version(self, version: int) -> None:
|
|
1356
|
+
assert self.effective_version is None
|
|
1357
|
+
self._tbl_md.current_schema_version = version
|
|
1358
|
+
self._schema_version_md.schema_version = version
|
|
1359
|
+
|
|
1360
|
+
@property
|
|
1361
|
+
def preceding_schema_version(self) -> int:
|
|
1362
|
+
return self._schema_version_md.preceding_schema_version
|
|
1363
|
+
|
|
1364
|
+
@preceding_schema_version.setter
|
|
1365
|
+
def preceding_schema_version(self, v: int) -> None:
|
|
1366
|
+
assert self.effective_version is None
|
|
1367
|
+
self._schema_version_md.preceding_schema_version = v
|
|
1368
|
+
|
|
1369
|
+
@property
|
|
1370
|
+
def media_validation(self) -> MediaValidation:
|
|
1371
|
+
return MediaValidation[self._schema_version_md.media_validation.upper()]
|
|
1372
|
+
|
|
1373
|
+
@property
|
|
1374
|
+
def next_col_id(self) -> int:
|
|
1375
|
+
return self._tbl_md.next_col_id
|
|
1376
|
+
|
|
1377
|
+
@next_col_id.setter
|
|
1378
|
+
def next_col_id(self, id: int) -> None:
|
|
1379
|
+
assert self.effective_version is None
|
|
1380
|
+
self._tbl_md.next_col_id = id
|
|
1246
1381
|
|
|
1247
|
-
|
|
1248
|
-
|
|
1249
|
-
|
|
1250
|
-
|
|
1251
|
-
|
|
1252
|
-
|
|
1382
|
+
@property
|
|
1383
|
+
def next_idx_id(self) -> int:
|
|
1384
|
+
return self._tbl_md.next_idx_id
|
|
1385
|
+
|
|
1386
|
+
@next_idx_id.setter
|
|
1387
|
+
def next_idx_id(self, id: int) -> None:
|
|
1388
|
+
assert self.effective_version is None
|
|
1389
|
+
self._tbl_md.next_idx_id = id
|
|
1390
|
+
|
|
1391
|
+
@property
|
|
1392
|
+
def next_row_id(self) -> int:
|
|
1393
|
+
return self._tbl_md.next_row_id
|
|
1253
1394
|
|
|
1254
|
-
|
|
1255
|
-
|
|
1395
|
+
@next_row_id.setter
|
|
1396
|
+
def next_row_id(self, id: int) -> None:
|
|
1397
|
+
assert self.effective_version is None
|
|
1398
|
+
self._tbl_md.next_row_id = id
|
|
1256
1399
|
|
|
1257
1400
|
@property
|
|
1258
1401
|
def is_snapshot(self) -> bool:
|
|
1259
1402
|
return self.effective_version is not None
|
|
1260
1403
|
|
|
1404
|
+
@property
|
|
1405
|
+
def is_mutable(self) -> bool:
|
|
1406
|
+
return not self.is_snapshot and not self.is_replica
|
|
1407
|
+
|
|
1261
1408
|
@property
|
|
1262
1409
|
def is_view(self) -> bool:
|
|
1263
1410
|
return self.view_md is not None
|
|
@@ -1270,6 +1417,7 @@ class TableVersion:
|
|
|
1270
1417
|
def is_component_view(self) -> bool:
|
|
1271
1418
|
return self.iterator_cls is not None
|
|
1272
1419
|
|
|
1420
|
+
@property
|
|
1273
1421
|
def is_insertable(self) -> bool:
|
|
1274
1422
|
"""Returns True if this corresponds to an InsertableTable"""
|
|
1275
1423
|
return not self.is_snapshot and not self.is_view
|
|
@@ -1307,18 +1455,6 @@ class TableVersion:
|
|
|
1307
1455
|
names = [c.name for c in self.cols_by_name.values() if c.is_computed]
|
|
1308
1456
|
return names
|
|
1309
1457
|
|
|
1310
|
-
def _record_refd_columns(self, col: Column) -> None:
|
|
1311
|
-
"""Update Column.dependent_cols for all cols referenced in col.value_expr."""
|
|
1312
|
-
from pixeltable import exprs
|
|
1313
|
-
|
|
1314
|
-
if col.value_expr_dict is not None:
|
|
1315
|
-
# if we have a value_expr_dict, use that instead of instantiating the value_expr
|
|
1316
|
-
refd_cols = exprs.Expr.get_refd_columns(col.value_expr_dict)
|
|
1317
|
-
else:
|
|
1318
|
-
refd_cols = [e.col for e in col.value_expr.subexprs(expr_class=exprs.ColumnRef)]
|
|
1319
|
-
for refd_col in refd_cols:
|
|
1320
|
-
refd_col.dependent_cols.add(col)
|
|
1321
|
-
|
|
1322
1458
|
def get_idx_val_columns(self, cols: Iterable[Column]) -> set[Column]:
|
|
1323
1459
|
result = {info.val_col for col in cols for info in col.get_idx_info().values()}
|
|
1324
1460
|
return result
|
|
@@ -1327,7 +1463,8 @@ class TableVersion:
|
|
|
1327
1463
|
"""
|
|
1328
1464
|
Return the set of columns that transitively depend on any of the given ones.
|
|
1329
1465
|
"""
|
|
1330
|
-
|
|
1466
|
+
cat = pxt.catalog.Catalog.get()
|
|
1467
|
+
result = set().union(*[cat.get_column_dependents(col.tbl.id, col.id) for col in cols])
|
|
1331
1468
|
if len(result) > 0:
|
|
1332
1469
|
result.update(self.get_dependent_columns(result))
|
|
1333
1470
|
return result
|
|
@@ -1361,24 +1498,6 @@ class TableVersion:
|
|
|
1361
1498
|
{'class': f'{type(store).__module__}.{type(store).__qualname__}', 'md': store.as_dict()} for store in stores
|
|
1362
1499
|
]
|
|
1363
1500
|
|
|
1364
|
-
def _create_tbl_md(self) -> schema.TableMd:
|
|
1365
|
-
return schema.TableMd(
|
|
1366
|
-
tbl_id=str(self.id),
|
|
1367
|
-
name=self.name,
|
|
1368
|
-
user=self.user,
|
|
1369
|
-
is_replica=self.is_replica,
|
|
1370
|
-
current_version=self.version,
|
|
1371
|
-
current_schema_version=self.schema_version,
|
|
1372
|
-
next_col_id=self.next_col_id,
|
|
1373
|
-
next_idx_id=self.next_idx_id,
|
|
1374
|
-
next_row_id=self.next_rowid,
|
|
1375
|
-
column_md=self._create_column_md(self.cols),
|
|
1376
|
-
index_md=self.idx_md,
|
|
1377
|
-
external_stores=self._create_stores_md(self.external_stores.values()),
|
|
1378
|
-
view_md=self.view_md,
|
|
1379
|
-
additional_md={},
|
|
1380
|
-
)
|
|
1381
|
-
|
|
1382
1501
|
def _create_version_md(self, timestamp: float) -> schema.TableVersionMd:
|
|
1383
1502
|
return schema.TableVersionMd(
|
|
1384
1503
|
tbl_id=str(self.id),
|