pixeltable 0.4.17__py3-none-any.whl → 0.4.19__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of pixeltable might be problematic. Click here for more details.
- pixeltable/__init__.py +1 -1
- pixeltable/_version.py +1 -0
- pixeltable/catalog/catalog.py +144 -118
- pixeltable/catalog/column.py +104 -115
- pixeltable/catalog/globals.py +1 -2
- pixeltable/catalog/insertable_table.py +44 -49
- pixeltable/catalog/path.py +3 -4
- pixeltable/catalog/schema_object.py +4 -4
- pixeltable/catalog/table.py +139 -124
- pixeltable/catalog/table_metadata.py +6 -6
- pixeltable/catalog/table_version.py +315 -246
- pixeltable/catalog/table_version_handle.py +4 -4
- pixeltable/catalog/table_version_path.py +9 -10
- pixeltable/catalog/tbl_ops.py +9 -3
- pixeltable/catalog/view.py +34 -28
- pixeltable/config.py +14 -10
- pixeltable/dataframe.py +69 -78
- pixeltable/env.py +78 -64
- pixeltable/exec/aggregation_node.py +6 -6
- pixeltable/exec/cache_prefetch_node.py +10 -10
- pixeltable/exec/data_row_batch.py +3 -3
- pixeltable/exec/exec_context.py +16 -4
- pixeltable/exec/exec_node.py +5 -5
- pixeltable/exec/expr_eval/evaluators.py +6 -6
- pixeltable/exec/expr_eval/expr_eval_node.py +8 -7
- pixeltable/exec/expr_eval/globals.py +6 -6
- pixeltable/exec/expr_eval/row_buffer.py +1 -2
- pixeltable/exec/expr_eval/schedulers.py +11 -11
- pixeltable/exec/in_memory_data_node.py +2 -2
- pixeltable/exec/object_store_save_node.py +14 -17
- pixeltable/exec/sql_node.py +28 -27
- pixeltable/exprs/arithmetic_expr.py +4 -4
- pixeltable/exprs/array_slice.py +2 -2
- pixeltable/exprs/column_property_ref.py +3 -3
- pixeltable/exprs/column_ref.py +61 -74
- pixeltable/exprs/comparison.py +5 -5
- pixeltable/exprs/compound_predicate.py +3 -3
- pixeltable/exprs/data_row.py +12 -12
- pixeltable/exprs/expr.py +41 -31
- pixeltable/exprs/expr_dict.py +3 -3
- pixeltable/exprs/expr_set.py +3 -3
- pixeltable/exprs/function_call.py +14 -14
- pixeltable/exprs/in_predicate.py +4 -4
- pixeltable/exprs/inline_expr.py +8 -8
- pixeltable/exprs/is_null.py +1 -3
- pixeltable/exprs/json_mapper.py +8 -8
- pixeltable/exprs/json_path.py +6 -6
- pixeltable/exprs/literal.py +5 -5
- pixeltable/exprs/method_ref.py +2 -2
- pixeltable/exprs/object_ref.py +2 -2
- pixeltable/exprs/row_builder.py +14 -14
- pixeltable/exprs/rowid_ref.py +8 -8
- pixeltable/exprs/similarity_expr.py +50 -25
- pixeltable/exprs/sql_element_cache.py +4 -4
- pixeltable/exprs/string_op.py +2 -2
- pixeltable/exprs/type_cast.py +3 -5
- pixeltable/func/aggregate_function.py +8 -8
- pixeltable/func/callable_function.py +9 -9
- pixeltable/func/expr_template_function.py +3 -3
- pixeltable/func/function.py +15 -17
- pixeltable/func/function_registry.py +6 -7
- pixeltable/func/globals.py +2 -3
- pixeltable/func/mcp.py +2 -2
- pixeltable/func/query_template_function.py +16 -16
- pixeltable/func/signature.py +14 -14
- pixeltable/func/tools.py +11 -11
- pixeltable/func/udf.py +16 -18
- pixeltable/functions/__init__.py +1 -0
- pixeltable/functions/anthropic.py +7 -7
- pixeltable/functions/audio.py +76 -0
- pixeltable/functions/bedrock.py +6 -6
- pixeltable/functions/deepseek.py +4 -4
- pixeltable/functions/fireworks.py +2 -2
- pixeltable/functions/gemini.py +6 -6
- pixeltable/functions/globals.py +12 -12
- pixeltable/functions/groq.py +4 -4
- pixeltable/functions/huggingface.py +1033 -6
- pixeltable/functions/image.py +7 -10
- pixeltable/functions/llama_cpp.py +7 -7
- pixeltable/functions/math.py +2 -3
- pixeltable/functions/mistralai.py +3 -3
- pixeltable/functions/ollama.py +9 -9
- pixeltable/functions/openai.py +21 -21
- pixeltable/functions/openrouter.py +7 -7
- pixeltable/functions/string.py +21 -28
- pixeltable/functions/timestamp.py +7 -8
- pixeltable/functions/together.py +4 -6
- pixeltable/functions/twelvelabs.py +92 -0
- pixeltable/functions/video.py +36 -31
- pixeltable/functions/vision.py +6 -6
- pixeltable/functions/whisper.py +7 -7
- pixeltable/functions/whisperx.py +16 -16
- pixeltable/globals.py +75 -40
- pixeltable/index/base.py +12 -8
- pixeltable/index/btree.py +19 -22
- pixeltable/index/embedding_index.py +30 -39
- pixeltable/io/datarows.py +3 -3
- pixeltable/io/external_store.py +13 -16
- pixeltable/io/fiftyone.py +5 -5
- pixeltable/io/globals.py +5 -5
- pixeltable/io/hf_datasets.py +4 -4
- pixeltable/io/label_studio.py +12 -12
- pixeltable/io/pandas.py +6 -6
- pixeltable/io/parquet.py +2 -2
- pixeltable/io/table_data_conduit.py +12 -12
- pixeltable/io/utils.py +2 -2
- pixeltable/iterators/audio.py +2 -2
- pixeltable/iterators/document.py +88 -57
- pixeltable/iterators/video.py +66 -37
- pixeltable/metadata/converters/convert_18.py +2 -2
- pixeltable/metadata/converters/convert_19.py +2 -2
- pixeltable/metadata/converters/convert_20.py +2 -2
- pixeltable/metadata/converters/convert_21.py +2 -2
- pixeltable/metadata/converters/convert_22.py +2 -2
- pixeltable/metadata/converters/convert_24.py +2 -2
- pixeltable/metadata/converters/convert_25.py +2 -2
- pixeltable/metadata/converters/convert_26.py +2 -2
- pixeltable/metadata/converters/convert_29.py +4 -4
- pixeltable/metadata/converters/convert_34.py +2 -2
- pixeltable/metadata/converters/convert_36.py +2 -2
- pixeltable/metadata/converters/convert_38.py +2 -2
- pixeltable/metadata/converters/convert_39.py +1 -2
- pixeltable/metadata/converters/util.py +11 -13
- pixeltable/metadata/schema.py +22 -21
- pixeltable/metadata/utils.py +2 -6
- pixeltable/mypy/mypy_plugin.py +5 -5
- pixeltable/plan.py +32 -34
- pixeltable/share/packager.py +7 -7
- pixeltable/share/publish.py +3 -3
- pixeltable/store.py +126 -41
- pixeltable/type_system.py +43 -46
- pixeltable/utils/__init__.py +1 -2
- pixeltable/utils/arrow.py +4 -4
- pixeltable/utils/av.py +74 -38
- pixeltable/utils/azure_store.py +305 -0
- pixeltable/utils/code.py +1 -2
- pixeltable/utils/dbms.py +15 -19
- pixeltable/utils/description_helper.py +2 -3
- pixeltable/utils/documents.py +5 -6
- pixeltable/utils/exception_handler.py +2 -2
- pixeltable/utils/filecache.py +5 -5
- pixeltable/utils/formatter.py +4 -6
- pixeltable/utils/gcs_store.py +9 -9
- pixeltable/utils/local_store.py +17 -17
- pixeltable/utils/object_stores.py +59 -43
- pixeltable/utils/s3_store.py +35 -30
- {pixeltable-0.4.17.dist-info → pixeltable-0.4.19.dist-info}/METADATA +4 -4
- pixeltable-0.4.19.dist-info/RECORD +213 -0
- pixeltable/__version__.py +0 -3
- pixeltable-0.4.17.dist-info/RECORD +0 -211
- {pixeltable-0.4.17.dist-info → pixeltable-0.4.19.dist-info}/WHEEL +0 -0
- {pixeltable-0.4.17.dist-info → pixeltable-0.4.19.dist-info}/entry_points.txt +0 -0
- {pixeltable-0.4.17.dist-info → pixeltable-0.4.19.dist-info}/licenses/LICENSE +0 -0
|
@@ -3,19 +3,21 @@ from __future__ import annotations
|
|
|
3
3
|
import copy
|
|
4
4
|
import dataclasses
|
|
5
5
|
import importlib
|
|
6
|
+
import itertools
|
|
6
7
|
import logging
|
|
7
8
|
import time
|
|
8
9
|
import uuid
|
|
9
|
-
from typing import TYPE_CHECKING, Any, Iterable, Iterator, Literal
|
|
10
|
+
from typing import TYPE_CHECKING, Any, Callable, Iterable, Iterator, Literal
|
|
10
11
|
from uuid import UUID
|
|
11
12
|
|
|
12
13
|
import jsonschema.exceptions
|
|
13
14
|
import sqlalchemy as sql
|
|
14
15
|
from sqlalchemy import exc as sql_exc
|
|
15
16
|
|
|
16
|
-
import pixeltable as pxt
|
|
17
17
|
import pixeltable.exceptions as excs
|
|
18
|
-
|
|
18
|
+
import pixeltable.exprs as exprs
|
|
19
|
+
import pixeltable.index as index
|
|
20
|
+
import pixeltable.type_system as ts
|
|
19
21
|
from pixeltable.env import Env
|
|
20
22
|
from pixeltable.iterators import ComponentIterator
|
|
21
23
|
from pixeltable.metadata import schema
|
|
@@ -31,8 +33,12 @@ from .update_status import RowCountStats, UpdateStatus
|
|
|
31
33
|
if TYPE_CHECKING:
|
|
32
34
|
from pixeltable import exec, store
|
|
33
35
|
from pixeltable.catalog.table_version_handle import TableVersionHandle
|
|
36
|
+
from pixeltable.dataframe import DataFrame
|
|
37
|
+
from pixeltable.io import ExternalStore
|
|
34
38
|
from pixeltable.plan import SampleClause
|
|
35
39
|
|
|
40
|
+
from .table_version_path import TableVersionPath
|
|
41
|
+
|
|
36
42
|
_logger = logging.getLogger('pixeltable')
|
|
37
43
|
|
|
38
44
|
|
|
@@ -77,14 +83,14 @@ class TableVersion:
|
|
|
77
83
|
_version_md: schema.TableVersionMd
|
|
78
84
|
_schema_version_md: schema.TableSchemaVersionMd
|
|
79
85
|
|
|
80
|
-
effective_version:
|
|
81
|
-
path:
|
|
82
|
-
base:
|
|
83
|
-
predicate:
|
|
84
|
-
sample_clause:
|
|
86
|
+
effective_version: int | None
|
|
87
|
+
path: 'TableVersionPath' | None # only set for live tables; needed to resolve computed cols
|
|
88
|
+
base: TableVersionHandle | None # only set for views
|
|
89
|
+
predicate: exprs.Expr | None
|
|
90
|
+
sample_clause: 'SampleClause' | None
|
|
85
91
|
|
|
86
|
-
iterator_cls:
|
|
87
|
-
iterator_args:
|
|
92
|
+
iterator_cls: type[ComponentIterator] | None
|
|
93
|
+
iterator_args: exprs.InlineDict | None
|
|
88
94
|
num_iterator_cols: int
|
|
89
95
|
|
|
90
96
|
# target for data operation propagation (only set for non-snapshots, and only records non-snapshot views)
|
|
@@ -96,11 +102,19 @@ class TableVersion:
|
|
|
96
102
|
cols_by_name: dict[str, Column]
|
|
97
103
|
# contains only columns visible in this version, both system and user
|
|
98
104
|
cols_by_id: dict[int, Column]
|
|
99
|
-
|
|
105
|
+
|
|
106
|
+
# True if this TableVersion instance can have indices:
|
|
107
|
+
# - live version of a mutable table
|
|
108
|
+
# - the most recent version of a replica
|
|
109
|
+
supports_idxs: bool
|
|
110
|
+
|
|
111
|
+
# only populated with indices visible in this TableVersion instance
|
|
112
|
+
idxs: dict[int, TableVersion.IndexInfo] # key: index id
|
|
100
113
|
idxs_by_name: dict[str, TableVersion.IndexInfo]
|
|
114
|
+
idxs_by_col: dict[QColumnId, list[TableVersion.IndexInfo]]
|
|
101
115
|
|
|
102
|
-
external_stores: dict[str,
|
|
103
|
-
store_tbl:
|
|
116
|
+
external_stores: dict[str, ExternalStore]
|
|
117
|
+
store_tbl: 'store.StoreBase' | None
|
|
104
118
|
|
|
105
119
|
is_initialized: bool # True if init() has been called
|
|
106
120
|
|
|
@@ -123,11 +137,11 @@ class TableVersion:
|
|
|
123
137
|
id: UUID,
|
|
124
138
|
tbl_md: schema.TableMd,
|
|
125
139
|
version_md: schema.TableVersionMd,
|
|
126
|
-
effective_version:
|
|
140
|
+
effective_version: int | None,
|
|
127
141
|
schema_version_md: schema.TableSchemaVersionMd,
|
|
128
142
|
mutable_views: list[TableVersionHandle],
|
|
129
|
-
base_path:
|
|
130
|
-
base:
|
|
143
|
+
base_path: 'TableVersionPath' | None = None,
|
|
144
|
+
base: TableVersionHandle | None = None,
|
|
131
145
|
):
|
|
132
146
|
self.is_validated = True # a freshly constructed instance is always valid
|
|
133
147
|
self.is_initialized = False
|
|
@@ -180,7 +194,12 @@ class TableVersion:
|
|
|
180
194
|
self.cols = []
|
|
181
195
|
self.cols_by_name = {}
|
|
182
196
|
self.cols_by_id = {}
|
|
197
|
+
self.idxs = {}
|
|
183
198
|
self.idxs_by_name = {}
|
|
199
|
+
self.idxs_by_col = {}
|
|
200
|
+
self.supports_idxs = self.effective_version is None or (
|
|
201
|
+
self.is_replica and self.effective_version == self.tbl_md.current_version
|
|
202
|
+
)
|
|
184
203
|
self.external_stores = {}
|
|
185
204
|
|
|
186
205
|
def __hash__(self) -> int:
|
|
@@ -219,19 +238,27 @@ class TableVersion:
|
|
|
219
238
|
num_retained_versions: int,
|
|
220
239
|
comment: str,
|
|
221
240
|
media_validation: MediaValidation,
|
|
222
|
-
|
|
241
|
+
create_default_idxs: bool,
|
|
242
|
+
view_md: schema.ViewMd | None = None,
|
|
223
243
|
) -> TableVersionMd:
|
|
244
|
+
from .table_version_handle import TableVersionHandle
|
|
245
|
+
|
|
224
246
|
user = Env.get().user
|
|
225
247
|
timestamp = time.time()
|
|
226
248
|
|
|
249
|
+
tbl_id = uuid.uuid4()
|
|
250
|
+
tbl_id_str = str(tbl_id)
|
|
251
|
+
tbl_handle = TableVersionHandle(tbl_id, None)
|
|
252
|
+
column_ids = itertools.count(0)
|
|
253
|
+
index_ids = itertools.count(0)
|
|
254
|
+
|
|
227
255
|
# assign ids, create metadata
|
|
228
|
-
cols_by_name: dict[str, Column] = {}
|
|
229
256
|
column_md: dict[int, schema.ColumnMd] = {}
|
|
230
257
|
schema_col_md: dict[int, schema.SchemaColumn] = {}
|
|
231
258
|
for pos, col in enumerate(cols):
|
|
232
|
-
col.
|
|
259
|
+
col.tbl_handle = tbl_handle
|
|
260
|
+
col.id = next(column_ids)
|
|
233
261
|
col.schema_version_add = 0
|
|
234
|
-
cols_by_name[col.name] = col
|
|
235
262
|
if col.is_computed:
|
|
236
263
|
col.check_value_expr()
|
|
237
264
|
col_md, sch_md = col.to_md(pos)
|
|
@@ -239,8 +266,39 @@ class TableVersion:
|
|
|
239
266
|
column_md[col.id] = col_md
|
|
240
267
|
schema_col_md[col.id] = sch_md
|
|
241
268
|
|
|
242
|
-
|
|
243
|
-
|
|
269
|
+
index_md: dict[int, schema.IndexMd] = {}
|
|
270
|
+
if create_default_idxs and (view_md is None or not view_md.is_snapshot):
|
|
271
|
+
index_cols: list[Column] = []
|
|
272
|
+
for col in (c for c in cols if cls._is_btree_indexable(c)):
|
|
273
|
+
idx = index.BtreeIndex()
|
|
274
|
+
val_col, undo_col = cls._create_index_columns(col, idx, 0, tbl_handle, id_cb=lambda: next(column_ids))
|
|
275
|
+
index_cols.extend([val_col, undo_col])
|
|
276
|
+
|
|
277
|
+
idx_id = next(index_ids)
|
|
278
|
+
idx_cls = type(idx)
|
|
279
|
+
md = schema.IndexMd(
|
|
280
|
+
id=idx_id,
|
|
281
|
+
name=f'idx{idx_id}',
|
|
282
|
+
indexed_col_id=col.id,
|
|
283
|
+
indexed_col_tbl_id=tbl_id_str,
|
|
284
|
+
index_val_col_id=val_col.id,
|
|
285
|
+
index_val_undo_col_id=undo_col.id,
|
|
286
|
+
schema_version_add=0,
|
|
287
|
+
schema_version_drop=None,
|
|
288
|
+
class_fqn=idx_cls.__module__ + '.' + idx_cls.__name__,
|
|
289
|
+
init_args=idx.as_dict(),
|
|
290
|
+
)
|
|
291
|
+
index_md[idx_id] = md
|
|
292
|
+
|
|
293
|
+
for col in index_cols:
|
|
294
|
+
col_md, _ = col.to_md()
|
|
295
|
+
column_md[col.id] = col_md
|
|
296
|
+
|
|
297
|
+
assert all(column_md[id].id == id for id in column_md)
|
|
298
|
+
assert all(index_md[id].id == id for id in index_md)
|
|
299
|
+
|
|
300
|
+
cols.extend(index_cols)
|
|
301
|
+
|
|
244
302
|
tbl_md = schema.TableMd(
|
|
245
303
|
tbl_id=tbl_id_str,
|
|
246
304
|
name=name,
|
|
@@ -248,12 +306,12 @@ class TableVersion:
|
|
|
248
306
|
is_replica=False,
|
|
249
307
|
current_version=0,
|
|
250
308
|
current_schema_version=0,
|
|
251
|
-
next_col_id=
|
|
252
|
-
next_idx_id=
|
|
309
|
+
next_col_id=next(column_ids),
|
|
310
|
+
next_idx_id=next(index_ids),
|
|
253
311
|
next_row_id=0,
|
|
254
312
|
view_sn=0,
|
|
255
313
|
column_md=column_md,
|
|
256
|
-
index_md=
|
|
314
|
+
index_md=index_md,
|
|
257
315
|
external_stores=[],
|
|
258
316
|
view_md=view_md,
|
|
259
317
|
additional_md={},
|
|
@@ -281,51 +339,15 @@ class TableVersion:
|
|
|
281
339
|
)
|
|
282
340
|
return TableVersionMd(tbl_md, table_version_md, schema_version_md)
|
|
283
341
|
|
|
284
|
-
@classmethod
|
|
285
|
-
def create(
|
|
286
|
-
cls,
|
|
287
|
-
dir_id: UUID,
|
|
288
|
-
name: str,
|
|
289
|
-
cols: list[Column],
|
|
290
|
-
num_retained_versions: int,
|
|
291
|
-
comment: str,
|
|
292
|
-
media_validation: MediaValidation,
|
|
293
|
-
) -> tuple[UUID, Optional[TableVersion]]:
|
|
294
|
-
initial_md = cls.create_initial_md(name, cols, num_retained_versions, comment, media_validation, view_md=None)
|
|
295
|
-
cat = pxt.catalog.Catalog.get()
|
|
296
|
-
|
|
297
|
-
tbl_id = UUID(hex=initial_md.tbl_md.tbl_id)
|
|
298
|
-
assert (tbl_id, None) not in cat._tbl_versions
|
|
299
|
-
tbl_version = cls(tbl_id, initial_md.tbl_md, initial_md.version_md, None, initial_md.schema_version_md, [])
|
|
300
|
-
|
|
301
|
-
@cat.register_undo_action
|
|
302
|
-
def _() -> None:
|
|
303
|
-
if (tbl_id, None) in cat._tbl_versions:
|
|
304
|
-
del cat._tbl_versions[tbl_id, None]
|
|
305
|
-
|
|
306
|
-
# TODO: break this up, so that Catalog.create_table() registers tbl_version
|
|
307
|
-
cat._tbl_versions[tbl_id, None] = tbl_version
|
|
308
|
-
tbl_version.init()
|
|
309
|
-
tbl_version.store_tbl.create()
|
|
310
|
-
# add default indices, after creating the store table
|
|
311
|
-
for col in tbl_version.cols_by_name.values():
|
|
312
|
-
status = tbl_version._add_default_index(col)
|
|
313
|
-
assert status is None or status.num_excs == 0
|
|
314
|
-
|
|
315
|
-
cat.store_tbl_md(
|
|
316
|
-
tbl_id=tbl_id,
|
|
317
|
-
dir_id=dir_id,
|
|
318
|
-
tbl_md=tbl_version.tbl_md,
|
|
319
|
-
version_md=initial_md.version_md,
|
|
320
|
-
schema_version_md=initial_md.schema_version_md,
|
|
321
|
-
)
|
|
322
|
-
return tbl_id, tbl_version
|
|
323
|
-
|
|
324
342
|
def exec_op(self, op: TableOp) -> None:
|
|
325
343
|
if op.create_store_table_op is not None:
|
|
326
|
-
#
|
|
344
|
+
# this needs to be called outside of a transaction
|
|
345
|
+
self.store_tbl.create()
|
|
346
|
+
|
|
347
|
+
elif op.create_index_op is not None:
|
|
348
|
+
idx_info = self.idxs[op.create_index_op.idx_id]
|
|
327
349
|
with Env.get().begin_xact():
|
|
328
|
-
self.store_tbl.
|
|
350
|
+
self.store_tbl.create_index(idx_info.id)
|
|
329
351
|
|
|
330
352
|
elif op.load_view_op is not None:
|
|
331
353
|
from pixeltable.catalog import Catalog
|
|
@@ -344,7 +366,7 @@ class TableVersion:
|
|
|
344
366
|
|
|
345
367
|
@classmethod
|
|
346
368
|
def create_replica(cls, md: schema.FullTableMd) -> TableVersion:
|
|
347
|
-
from .catalog import TableVersionPath
|
|
369
|
+
from .catalog import Catalog, TableVersionPath
|
|
348
370
|
|
|
349
371
|
assert Env.get().in_xact
|
|
350
372
|
assert md.tbl_md.is_replica
|
|
@@ -363,7 +385,7 @@ class TableVersion:
|
|
|
363
385
|
base_path=base_path,
|
|
364
386
|
base=base,
|
|
365
387
|
)
|
|
366
|
-
cat =
|
|
388
|
+
cat = Catalog.get()
|
|
367
389
|
# We're creating a new TableVersion replica, so we should never have seen this particular
|
|
368
390
|
# TableVersion instance before.
|
|
369
391
|
# Actually this isn't true, because we might be re-creating a dropped replica.
|
|
@@ -373,10 +395,9 @@ class TableVersion:
|
|
|
373
395
|
cat._tbl_versions[tbl_version.id, tbl_version.effective_version] = tbl_version
|
|
374
396
|
tbl_version.init()
|
|
375
397
|
tbl_version.store_tbl.create()
|
|
376
|
-
tbl_version.store_tbl.ensure_columns_exist(col for col in tbl_version.cols if col.is_stored)
|
|
377
398
|
return tbl_version
|
|
378
399
|
|
|
379
|
-
def delete_media(self, tbl_version:
|
|
400
|
+
def delete_media(self, tbl_version: int | None = None) -> None:
|
|
380
401
|
# Assemble a set of column destinations and delete objects from all of them
|
|
381
402
|
# None is a valid column destination which refers to the default object location
|
|
382
403
|
destinations = {col.destination for col in self.cols if col.is_stored}
|
|
@@ -414,19 +435,21 @@ class TableVersion:
|
|
|
414
435
|
self.is_initialized = True
|
|
415
436
|
|
|
416
437
|
def _init_schema(self) -> None:
|
|
417
|
-
|
|
418
|
-
self._init_cols()
|
|
419
|
-
self._init_idxs()
|
|
420
|
-
|
|
421
|
-
# create the sa schema only after creating the columns and indices
|
|
422
|
-
self._init_sa_schema()
|
|
438
|
+
from pixeltable.store import StoreComponentView, StoreTable, StoreView
|
|
423
439
|
|
|
424
|
-
#
|
|
425
|
-
|
|
426
|
-
|
|
440
|
+
# initialize IndexBase instances and collect sa_col_types
|
|
441
|
+
idxs: dict[int, index.IndexBase] = {}
|
|
442
|
+
val_col_idxs: dict[int, index.IndexBase] = {} # key: id of value column
|
|
443
|
+
undo_col_idxs: dict[int, index.IndexBase] = {} # key: id of undo column
|
|
444
|
+
for md in self.tbl_md.index_md.values():
|
|
445
|
+
cls_name = md.class_fqn.rsplit('.', 1)[-1]
|
|
446
|
+
cls = getattr(index, cls_name)
|
|
447
|
+
idx = cls.from_dict(md.init_args)
|
|
448
|
+
idxs[md.id] = idx
|
|
449
|
+
val_col_idxs[md.index_val_col_id] = idx
|
|
450
|
+
undo_col_idxs[md.index_val_undo_col_id] = idx
|
|
427
451
|
|
|
428
|
-
|
|
429
|
-
"""Initialize self.cols with the columns visible in our effective version"""
|
|
452
|
+
# initialize Columns
|
|
430
453
|
self.cols = []
|
|
431
454
|
self.cols_by_name = {}
|
|
432
455
|
self.cols_by_id = {}
|
|
@@ -434,77 +457,88 @@ class TableVersion:
|
|
|
434
457
|
# point backward.
|
|
435
458
|
sorted_column_md = sorted(self.tbl_md.column_md.values(), key=lambda item: item.id)
|
|
436
459
|
for col_md in sorted_column_md:
|
|
460
|
+
col_type = ts.ColumnType.from_dict(col_md.col_type)
|
|
437
461
|
schema_col_md = self.schema_version_md.columns.get(col_md.id)
|
|
438
|
-
|
|
439
|
-
|
|
440
|
-
|
|
441
|
-
|
|
442
|
-
|
|
443
|
-
# column was added after this version
|
|
444
|
-
continue
|
|
445
|
-
if col_md.schema_version_drop is not None and col_md.schema_version_drop <= self.schema_version:
|
|
446
|
-
# column was dropped
|
|
447
|
-
continue
|
|
448
|
-
if col.name is not None:
|
|
449
|
-
self.cols_by_name[col.name] = col
|
|
450
|
-
self.cols_by_id[col.id] = col
|
|
451
|
-
|
|
452
|
-
# # make sure to traverse columns ordered by position = order in which cols were created;
|
|
453
|
-
# # this guarantees that references always point backwards
|
|
454
|
-
# if not self.is_snapshot and col_md.value_expr is not None:
|
|
455
|
-
# self._record_refd_columns(col)
|
|
462
|
+
media_val = (
|
|
463
|
+
MediaValidation[schema_col_md.media_validation.upper()]
|
|
464
|
+
if schema_col_md is not None and schema_col_md.media_validation is not None
|
|
465
|
+
else None
|
|
466
|
+
)
|
|
456
467
|
|
|
457
|
-
|
|
458
|
-
|
|
459
|
-
|
|
460
|
-
|
|
461
|
-
|
|
462
|
-
|
|
463
|
-
|
|
464
|
-
|
|
465
|
-
|
|
466
|
-
|
|
467
|
-
|
|
468
|
-
|
|
469
|
-
|
|
470
|
-
|
|
471
|
-
|
|
472
|
-
|
|
473
|
-
|
|
474
|
-
|
|
475
|
-
|
|
476
|
-
|
|
477
|
-
|
|
478
|
-
|
|
479
|
-
|
|
480
|
-
|
|
481
|
-
|
|
482
|
-
|
|
483
|
-
|
|
484
|
-
|
|
485
|
-
|
|
486
|
-
# The index is active in this TableVersion provided that:
|
|
487
|
-
# (i) the TableVersion supports indices (either it's not a snapshot, or it's a replica at
|
|
488
|
-
# the head version); and
|
|
489
|
-
# (ii) the index was created on or before the schema version of this TableVersion; and
|
|
490
|
-
# (iii) the index was not dropped on or before the schema version of this TableVersion.
|
|
491
|
-
supports_idxs = self.effective_version is None or (
|
|
492
|
-
self.tbl_md.is_replica and self.effective_version == self.tbl_md.current_version
|
|
468
|
+
stores_cellmd: bool | None = None # None: determined by the column properties (in the Column c'tor)
|
|
469
|
+
sa_col_type: sql.types.TypeEngine | None = None
|
|
470
|
+
if col_md.id in val_col_idxs:
|
|
471
|
+
idx = val_col_idxs[col_md.id]
|
|
472
|
+
# for index value columns, the index gets to override the default
|
|
473
|
+
stores_cellmd = idx.records_value_errors()
|
|
474
|
+
sa_col_type = idx.get_index_sa_type(col_type)
|
|
475
|
+
elif col_md.id in undo_col_idxs:
|
|
476
|
+
idx = undo_col_idxs[col_md.id]
|
|
477
|
+
# for index undo columns, we never store cellmd
|
|
478
|
+
stores_cellmd = False
|
|
479
|
+
sa_col_type = idx.get_index_sa_type(col_type)
|
|
480
|
+
|
|
481
|
+
col = Column(
|
|
482
|
+
col_id=col_md.id,
|
|
483
|
+
name=schema_col_md.name if schema_col_md is not None else None,
|
|
484
|
+
col_type=col_type,
|
|
485
|
+
is_pk=col_md.is_pk,
|
|
486
|
+
is_iterator_col=self.is_component_view and col_md.id < self.num_iterator_cols + 1,
|
|
487
|
+
stored=col_md.stored,
|
|
488
|
+
media_validation=media_val,
|
|
489
|
+
sa_col_type=sa_col_type,
|
|
490
|
+
schema_version_add=col_md.schema_version_add,
|
|
491
|
+
schema_version_drop=col_md.schema_version_drop,
|
|
492
|
+
stores_cellmd=stores_cellmd,
|
|
493
|
+
value_expr_dict=col_md.value_expr,
|
|
494
|
+
tbl_handle=self.handle,
|
|
495
|
+
destination=col_md.destination,
|
|
493
496
|
)
|
|
494
|
-
|
|
495
|
-
|
|
496
|
-
|
|
497
|
-
|
|
497
|
+
|
|
498
|
+
self.cols.append(col)
|
|
499
|
+
# populate lookup structures before Expr.from_dict()
|
|
500
|
+
if col_md.schema_version_add <= self.schema_version and (
|
|
501
|
+
col_md.schema_version_drop is None or col_md.schema_version_drop > self.schema_version
|
|
498
502
|
):
|
|
499
|
-
|
|
500
|
-
|
|
501
|
-
|
|
502
|
-
|
|
503
|
-
|
|
504
|
-
|
|
505
|
-
|
|
503
|
+
if col.name is not None:
|
|
504
|
+
self.cols_by_name[col.name] = col
|
|
505
|
+
self.cols_by_id[col.id] = col
|
|
506
|
+
|
|
507
|
+
if self.supports_idxs:
|
|
508
|
+
# create IndexInfo for indices visible in current_version
|
|
509
|
+
visible_idxs = [
|
|
510
|
+
md
|
|
511
|
+
for md in self.tbl_md.index_md.values()
|
|
512
|
+
if md.schema_version_add <= self.schema_version
|
|
513
|
+
and (md.schema_version_drop is None or md.schema_version_drop > self.schema_version)
|
|
514
|
+
]
|
|
515
|
+
for md in visible_idxs:
|
|
516
|
+
idx = idxs[md.id]
|
|
517
|
+
indexed_col_id = QColumnId(UUID(md.indexed_col_tbl_id), md.indexed_col_id)
|
|
518
|
+
idx_col = self._lookup_column(indexed_col_id)
|
|
519
|
+
info = self.IndexInfo(
|
|
520
|
+
id=md.id,
|
|
521
|
+
name=md.name,
|
|
522
|
+
idx=idx,
|
|
523
|
+
col=idx_col,
|
|
524
|
+
val_col=self.cols_by_id[md.index_val_col_id],
|
|
525
|
+
undo_col=self.cols_by_id[md.index_val_undo_col_id],
|
|
506
526
|
)
|
|
507
|
-
self.
|
|
527
|
+
self.idxs[md.id] = info
|
|
528
|
+
self.idxs_by_name[md.name] = info
|
|
529
|
+
self.idxs_by_col.setdefault(indexed_col_id, []).append(info)
|
|
530
|
+
|
|
531
|
+
# create value exprs, now that we have all lookup structures in place
|
|
532
|
+
for col in self.cols_by_id.values():
|
|
533
|
+
col.init_value_expr()
|
|
534
|
+
|
|
535
|
+
# create the sqlalchemy schema, after instantiating all Columns
|
|
536
|
+
if self.is_component_view:
|
|
537
|
+
self.store_tbl = StoreComponentView(self)
|
|
538
|
+
elif self.is_view:
|
|
539
|
+
self.store_tbl = StoreView(self)
|
|
540
|
+
else:
|
|
541
|
+
self.store_tbl = StoreTable(self)
|
|
508
542
|
|
|
509
543
|
def _lookup_column(self, id: QColumnId) -> Column | None:
|
|
510
544
|
"""
|
|
@@ -547,7 +581,7 @@ class TableVersion:
|
|
|
547
581
|
"""Return name of index in the store, which needs to be globally unique"""
|
|
548
582
|
return f'idx_{self.id.hex}_{idx_id}'
|
|
549
583
|
|
|
550
|
-
def add_index(self, col: Column, idx_name:
|
|
584
|
+
def add_index(self, col: Column, idx_name: str | None, idx: index.IndexBase) -> UpdateStatus:
|
|
551
585
|
# we're creating a new schema version
|
|
552
586
|
self.bump_version(bump_schema_version=True)
|
|
553
587
|
status = self._add_index(col, idx_name, idx)
|
|
@@ -555,12 +589,13 @@ class TableVersion:
|
|
|
555
589
|
_logger.info(f'Added index {idx_name} on column {col.name} to table {self.name}')
|
|
556
590
|
return status
|
|
557
591
|
|
|
558
|
-
|
|
592
|
+
@classmethod
|
|
593
|
+
def _is_btree_indexable(cls, col: Column) -> bool:
|
|
559
594
|
if not col.stored:
|
|
560
595
|
# if the column is intentionally not stored, we want to avoid the overhead of an index
|
|
561
596
|
return False
|
|
562
597
|
# Skip index for stored media columns produced by an iterator
|
|
563
|
-
if col.col_type.is_media_type() and
|
|
598
|
+
if col.col_type.is_media_type() and col.is_iterator_col:
|
|
564
599
|
return False
|
|
565
600
|
if not col.col_type.is_scalar_type() and not (col.col_type.is_media_type() and not col.is_computed):
|
|
566
601
|
# wrong type for a B-tree
|
|
@@ -570,53 +605,58 @@ class TableVersion:
|
|
|
570
605
|
return False
|
|
571
606
|
return True
|
|
572
607
|
|
|
573
|
-
def _add_default_index(self, col: Column) ->
|
|
608
|
+
def _add_default_index(self, col: Column) -> UpdateStatus | None:
|
|
574
609
|
"""Add a B-tree index on this column if it has a compatible type"""
|
|
575
610
|
if not self._is_btree_indexable(col):
|
|
576
611
|
return None
|
|
577
|
-
status = self._add_index(col, idx_name=None, idx=index.BtreeIndex(
|
|
612
|
+
status = self._add_index(col, idx_name=None, idx=index.BtreeIndex())
|
|
578
613
|
return status
|
|
579
614
|
|
|
580
|
-
|
|
615
|
+
@classmethod
|
|
616
|
+
def _create_index_columns(
|
|
617
|
+
cls,
|
|
618
|
+
col: Column,
|
|
619
|
+
idx: index.IndexBase,
|
|
620
|
+
schema_version: int,
|
|
621
|
+
tbl_handle: TableVersionHandle,
|
|
622
|
+
id_cb: Callable[[], int],
|
|
623
|
+
) -> tuple[Column, Column]:
|
|
581
624
|
"""Create value and undo columns for the given index.
|
|
582
625
|
Args:
|
|
583
626
|
idx: index for which columns will be created.
|
|
584
627
|
Returns:
|
|
585
|
-
A tuple containing the value column and the undo column.
|
|
628
|
+
A tuple containing the value column and the undo column, both of which are nullable.
|
|
586
629
|
"""
|
|
587
|
-
|
|
588
|
-
# add the index value and undo columns (which need to be nullable)
|
|
630
|
+
value_expr = idx.create_value_expr(col)
|
|
589
631
|
val_col = Column(
|
|
590
|
-
col_id=
|
|
632
|
+
col_id=id_cb(),
|
|
591
633
|
name=None,
|
|
592
|
-
computed_with=
|
|
593
|
-
sa_col_type=idx.
|
|
634
|
+
computed_with=value_expr,
|
|
635
|
+
sa_col_type=idx.get_index_sa_type(value_expr.col_type),
|
|
594
636
|
stored=True,
|
|
595
|
-
schema_version_add=self.schema_version,
|
|
596
|
-
schema_version_drop=None,
|
|
597
637
|
stores_cellmd=idx.records_value_errors(),
|
|
638
|
+
schema_version_add=schema_version,
|
|
639
|
+
schema_version_drop=None,
|
|
598
640
|
)
|
|
599
|
-
val_col.tbl = self
|
|
600
641
|
val_col.col_type = val_col.col_type.copy(nullable=True)
|
|
601
|
-
|
|
642
|
+
val_col.tbl_handle = tbl_handle
|
|
602
643
|
|
|
603
644
|
undo_col = Column(
|
|
604
|
-
col_id=
|
|
645
|
+
col_id=id_cb(),
|
|
605
646
|
name=None,
|
|
606
647
|
col_type=val_col.col_type,
|
|
607
648
|
sa_col_type=val_col.sa_col_type,
|
|
608
649
|
stored=True,
|
|
609
|
-
schema_version_add=self.schema_version,
|
|
610
|
-
schema_version_drop=None,
|
|
611
650
|
stores_cellmd=False,
|
|
651
|
+
schema_version_add=schema_version,
|
|
652
|
+
schema_version_drop=None,
|
|
612
653
|
)
|
|
613
|
-
undo_col.tbl = self
|
|
614
654
|
undo_col.col_type = undo_col.col_type.copy(nullable=True)
|
|
615
|
-
|
|
655
|
+
undo_col.tbl_handle = tbl_handle
|
|
616
656
|
return val_col, undo_col
|
|
617
657
|
|
|
618
658
|
def _create_index(
|
|
619
|
-
self, col: Column, val_col: Column, undo_col: Column, idx_name:
|
|
659
|
+
self, col: Column, val_col: Column, undo_col: Column, idx_name: str | None, idx: index.IndexBase
|
|
620
660
|
) -> None:
|
|
621
661
|
"""Create the given index along with index md"""
|
|
622
662
|
idx_id = self.next_idx_id
|
|
@@ -632,7 +672,7 @@ class TableVersion:
|
|
|
632
672
|
id=idx_id,
|
|
633
673
|
name=idx_name,
|
|
634
674
|
indexed_col_id=col.id,
|
|
635
|
-
indexed_col_tbl_id=str(col.
|
|
675
|
+
indexed_col_tbl_id=str(col.get_tbl().id),
|
|
636
676
|
index_val_col_id=val_col.id,
|
|
637
677
|
index_val_undo_col_id=undo_col.id,
|
|
638
678
|
schema_version_add=self.schema_version,
|
|
@@ -642,17 +682,21 @@ class TableVersion:
|
|
|
642
682
|
)
|
|
643
683
|
idx_info = self.IndexInfo(id=idx_id, name=idx_name, idx=idx, col=col, val_col=val_col, undo_col=undo_col)
|
|
644
684
|
self._tbl_md.index_md[idx_id] = idx_md
|
|
685
|
+
self.idxs[idx_id] = idx_info
|
|
645
686
|
self.idxs_by_name[idx_name] = idx_info
|
|
646
|
-
|
|
687
|
+
self.idxs_by_col.setdefault(col.qid, []).append(idx_info)
|
|
688
|
+
self.store_tbl.create_index(idx_id)
|
|
647
689
|
|
|
648
|
-
def _add_index(self, col: Column, idx_name:
|
|
649
|
-
val_col,
|
|
690
|
+
def _add_index(self, col: Column, idx_name: str | None, idx: index.IndexBase) -> UpdateStatus:
|
|
691
|
+
val_col, undo_col = self._create_index_columns(
|
|
692
|
+
col, idx, self.schema_version, self.handle, id_cb=self.next_col_id
|
|
693
|
+
)
|
|
650
694
|
# add the columns and update the metadata
|
|
651
695
|
# TODO support on_error='abort' for indices; it's tricky because of the way metadata changes are entangled
|
|
652
696
|
# with the database operations
|
|
653
|
-
status = self._add_columns([val_col,
|
|
697
|
+
status = self._add_columns([val_col, undo_col], print_stats=False, on_error='ignore')
|
|
654
698
|
# now create the index structure
|
|
655
|
-
self._create_index(col, val_col,
|
|
699
|
+
self._create_index(col, val_col, undo_col, idx_name, idx)
|
|
656
700
|
return status
|
|
657
701
|
|
|
658
702
|
def drop_index(self, idx_id: int) -> None:
|
|
@@ -668,7 +712,10 @@ class TableVersion:
|
|
|
668
712
|
# remove this index entry from the active indexes (in memory)
|
|
669
713
|
# and the index metadata (in persistent table metadata)
|
|
670
714
|
# TODO: this is wrong, it breaks revert()
|
|
715
|
+
del self.idxs[idx_id]
|
|
671
716
|
del self.idxs_by_name[idx_md.name]
|
|
717
|
+
if idx_info.col.qid in self.idxs_by_col:
|
|
718
|
+
self.idxs_by_col[idx_info.col.qid].remove(idx_info)
|
|
672
719
|
del self._tbl_md.index_md[idx_id]
|
|
673
720
|
|
|
674
721
|
self._drop_columns([idx_info.val_col, idx_info.undo_col])
|
|
@@ -684,9 +731,8 @@ class TableVersion:
|
|
|
684
731
|
assert all(col.stored is not None for col in cols)
|
|
685
732
|
assert all(col.name not in self.cols_by_name for col in cols if col.name is not None)
|
|
686
733
|
for col in cols:
|
|
687
|
-
col.
|
|
688
|
-
col.id = self.next_col_id
|
|
689
|
-
self.next_col_id += 1
|
|
734
|
+
col.tbl_handle = self.handle
|
|
735
|
+
col.id = self.next_col_id()
|
|
690
736
|
|
|
691
737
|
# we're creating a new schema version
|
|
692
738
|
self.bump_version(bump_schema_version=True)
|
|
@@ -695,8 +741,10 @@ class TableVersion:
|
|
|
695
741
|
for col in cols:
|
|
696
742
|
all_cols.append(col)
|
|
697
743
|
if col.name is not None and self._is_btree_indexable(col):
|
|
698
|
-
idx = index.BtreeIndex(
|
|
699
|
-
val_col, undo_col = self._create_index_columns(
|
|
744
|
+
idx = index.BtreeIndex()
|
|
745
|
+
val_col, undo_col = self._create_index_columns(
|
|
746
|
+
col, idx, self.schema_version, self.handle, id_cb=self.next_col_id
|
|
747
|
+
)
|
|
700
748
|
index_cols[col] = (idx, val_col, undo_col)
|
|
701
749
|
all_cols.append(val_col)
|
|
702
750
|
all_cols.append(undo_col)
|
|
@@ -728,7 +776,7 @@ class TableVersion:
|
|
|
728
776
|
|
|
729
777
|
row_count = self.store_tbl.count()
|
|
730
778
|
for col in cols_to_add:
|
|
731
|
-
assert col.
|
|
779
|
+
assert col.tbl_handle.id == self.id
|
|
732
780
|
if not col.col_type.nullable and not col.is_computed and row_count > 0:
|
|
733
781
|
raise excs.Error(
|
|
734
782
|
f'Cannot add non-nullable column {col.name!r} to table {self.name!r} with existing rows'
|
|
@@ -738,7 +786,7 @@ class TableVersion:
|
|
|
738
786
|
num_excs = 0
|
|
739
787
|
cols_with_excs: list[Column] = []
|
|
740
788
|
for col in cols_to_add:
|
|
741
|
-
assert col.id is not None
|
|
789
|
+
assert col.id is not None
|
|
742
790
|
excs_per_col = 0
|
|
743
791
|
col.schema_version_add = self.schema_version
|
|
744
792
|
# add the column to the lookup structures now, rather than after the store changes executed successfully,
|
|
@@ -792,7 +840,7 @@ class TableVersion:
|
|
|
792
840
|
upd_rows=row_count, num_excs=num_excs, computed_values=computed_values
|
|
793
841
|
) # add_columns
|
|
794
842
|
return UpdateStatus(
|
|
795
|
-
cols_with_excs=[f'{col.
|
|
843
|
+
cols_with_excs=[f'{col.get_tbl().name}.{col.name}' for col in cols_with_excs if col.name is not None],
|
|
796
844
|
row_count_stats=row_counts,
|
|
797
845
|
)
|
|
798
846
|
|
|
@@ -806,7 +854,7 @@ class TableVersion:
|
|
|
806
854
|
|
|
807
855
|
# drop this column and all dependent index columns and indices
|
|
808
856
|
dropped_cols = [col]
|
|
809
|
-
|
|
857
|
+
dropped_idx_info: list[TableVersion.IndexInfo] = []
|
|
810
858
|
for idx_info in self.idxs_by_name.values():
|
|
811
859
|
if idx_info.col != col:
|
|
812
860
|
continue
|
|
@@ -814,11 +862,14 @@ class TableVersion:
|
|
|
814
862
|
idx_md = self._tbl_md.index_md[idx_info.id]
|
|
815
863
|
idx_md.schema_version_drop = self.schema_version
|
|
816
864
|
assert idx_md.name in self.idxs_by_name
|
|
817
|
-
|
|
865
|
+
dropped_idx_info.append(idx_info)
|
|
818
866
|
|
|
819
|
-
# update
|
|
820
|
-
for
|
|
821
|
-
del self.
|
|
867
|
+
# update index lookup structures
|
|
868
|
+
for info in dropped_idx_info:
|
|
869
|
+
del self.idxs[info.id]
|
|
870
|
+
del self.idxs_by_name[info.name]
|
|
871
|
+
if col.qid in self.idxs_by_col:
|
|
872
|
+
del self.idxs_by_col[col.qid]
|
|
822
873
|
|
|
823
874
|
self._drop_columns(dropped_cols)
|
|
824
875
|
self._write_md(new_version=True, new_schema_version=True)
|
|
@@ -826,6 +877,8 @@ class TableVersion:
|
|
|
826
877
|
|
|
827
878
|
def _drop_columns(self, cols: Iterable[Column]) -> None:
|
|
828
879
|
"""Mark columns as dropped"""
|
|
880
|
+
from pixeltable.catalog import Catalog
|
|
881
|
+
|
|
829
882
|
assert self.is_mutable
|
|
830
883
|
|
|
831
884
|
for col in cols:
|
|
@@ -845,7 +898,7 @@ class TableVersion:
|
|
|
845
898
|
schema_col.pos = pos
|
|
846
899
|
|
|
847
900
|
self.store_tbl.create_sa_tbl()
|
|
848
|
-
|
|
901
|
+
Catalog.get().record_column_dependencies(self)
|
|
849
902
|
|
|
850
903
|
def rename_column(self, old_name: str, new_name: str) -> None:
|
|
851
904
|
"""Rename a column."""
|
|
@@ -854,12 +907,12 @@ class TableVersion:
|
|
|
854
907
|
col = self.path.get_column(old_name)
|
|
855
908
|
if col is None:
|
|
856
909
|
raise excs.Error(f'Unknown column: {old_name}')
|
|
857
|
-
if col.
|
|
910
|
+
if col.get_tbl().id != self.id:
|
|
858
911
|
raise excs.Error(f'Cannot rename base table column {col.name!r}')
|
|
859
912
|
if not is_valid_identifier(new_name):
|
|
860
|
-
raise excs.Error(f
|
|
913
|
+
raise excs.Error(f'Invalid column name: {new_name}')
|
|
861
914
|
if new_name in self.cols_by_name:
|
|
862
|
-
raise excs.Error(f'Column {new_name} already exists')
|
|
915
|
+
raise excs.Error(f'Column {new_name!r} already exists')
|
|
863
916
|
del self.cols_by_name[old_name]
|
|
864
917
|
col.name = new_name
|
|
865
918
|
self.cols_by_name[new_name] = col
|
|
@@ -871,7 +924,7 @@ class TableVersion:
|
|
|
871
924
|
self._write_md(new_version=True, new_schema_version=True)
|
|
872
925
|
_logger.info(f'Renamed column {old_name} to {new_name} in table {self.name}, new version: {self.version}')
|
|
873
926
|
|
|
874
|
-
def set_comment(self, new_comment:
|
|
927
|
+
def set_comment(self, new_comment: str | None) -> None:
|
|
875
928
|
_logger.info(f'[{self.name}] Updating comment: {new_comment}')
|
|
876
929
|
self.comment = new_comment
|
|
877
930
|
self._create_schema_version()
|
|
@@ -892,8 +945,8 @@ class TableVersion:
|
|
|
892
945
|
|
|
893
946
|
def insert(
|
|
894
947
|
self,
|
|
895
|
-
rows:
|
|
896
|
-
df:
|
|
948
|
+
rows: list[dict[str, Any]] | None,
|
|
949
|
+
df: DataFrame | None,
|
|
897
950
|
print_stats: bool = False,
|
|
898
951
|
fail_on_exception: bool = True,
|
|
899
952
|
) -> UpdateStatus:
|
|
@@ -927,7 +980,7 @@ class TableVersion:
|
|
|
927
980
|
exec_plan: 'exec.ExecNode',
|
|
928
981
|
timestamp: float,
|
|
929
982
|
*,
|
|
930
|
-
rowids:
|
|
983
|
+
rowids: Iterator[int] | None = None,
|
|
931
984
|
print_stats: bool = False,
|
|
932
985
|
abort_on_exc: bool = False,
|
|
933
986
|
) -> UpdateStatus:
|
|
@@ -958,9 +1011,7 @@ class TableVersion:
|
|
|
958
1011
|
_logger.info(f'TableVersion {self.name}: new version {self.version}')
|
|
959
1012
|
return result
|
|
960
1013
|
|
|
961
|
-
def update(
|
|
962
|
-
self, value_spec: dict[str, Any], where: Optional[exprs.Expr] = None, cascade: bool = True
|
|
963
|
-
) -> UpdateStatus:
|
|
1014
|
+
def update(self, value_spec: dict[str, Any], where: exprs.Expr | None = None, cascade: bool = True) -> UpdateStatus:
|
|
964
1015
|
"""Update rows in this TableVersionPath.
|
|
965
1016
|
Args:
|
|
966
1017
|
value_spec: a list of (column, value) pairs specifying the columns to update and their new values.
|
|
@@ -976,11 +1027,11 @@ class TableVersion:
|
|
|
976
1027
|
update_spec = self._validate_update_spec(value_spec, allow_pk=False, allow_exprs=True, allow_media=True)
|
|
977
1028
|
if where is not None:
|
|
978
1029
|
if not isinstance(where, exprs.Expr):
|
|
979
|
-
raise excs.Error(f
|
|
1030
|
+
raise excs.Error(f'`where` argument must be a valid Pixeltable expression; got `{type(where)}`')
|
|
980
1031
|
analysis_info = Planner.analyze(self.path, where)
|
|
981
1032
|
# for now we require that the updated rows can be identified via SQL, rather than via a Python filter
|
|
982
1033
|
if analysis_info.filter is not None:
|
|
983
|
-
raise excs.Error(f'Filter
|
|
1034
|
+
raise excs.Error(f'Filter not expressible in SQL: {analysis_info.filter}')
|
|
984
1035
|
|
|
985
1036
|
plan, updated_cols, recomputed_cols = Planner.create_update_plan(self.path, update_spec, [], where, cascade)
|
|
986
1037
|
|
|
@@ -1037,7 +1088,7 @@ class TableVersion:
|
|
|
1037
1088
|
update_targets: dict[Column, exprs.Expr] = {}
|
|
1038
1089
|
for col_name, val in value_spec.items():
|
|
1039
1090
|
if not isinstance(col_name, str):
|
|
1040
|
-
raise excs.Error(f'Update specification: dict key must be column name
|
|
1091
|
+
raise excs.Error(f'Update specification: dict key must be column name; got {col_name!r}')
|
|
1041
1092
|
if col_name == _ROWID_COLUMN_NAME:
|
|
1042
1093
|
# a valid rowid is a list of ints, one per rowid column
|
|
1043
1094
|
assert len(val) == len(self.store_tbl.rowid_columns())
|
|
@@ -1046,15 +1097,15 @@ class TableVersion:
|
|
|
1046
1097
|
continue
|
|
1047
1098
|
col = self.path.get_column(col_name)
|
|
1048
1099
|
if col is None:
|
|
1049
|
-
raise excs.Error(f'
|
|
1050
|
-
if col.
|
|
1100
|
+
raise excs.Error(f'Unknown column: {col_name}')
|
|
1101
|
+
if col.get_tbl().id != self.id:
|
|
1051
1102
|
raise excs.Error(f'Column {col.name!r} is a base table column and cannot be updated')
|
|
1052
1103
|
if col.is_computed:
|
|
1053
|
-
raise excs.Error(f'Column {col_name} is computed and cannot be updated')
|
|
1104
|
+
raise excs.Error(f'Column {col_name!r} is computed and cannot be updated')
|
|
1054
1105
|
if col.is_pk and not allow_pk:
|
|
1055
|
-
raise excs.Error(f'Column {col_name} is a primary key column and cannot be updated')
|
|
1106
|
+
raise excs.Error(f'Column {col_name!r} is a primary key column and cannot be updated')
|
|
1056
1107
|
if col.col_type.is_media_type() and not allow_media:
|
|
1057
|
-
raise excs.Error(f'Column {col_name} is a media column and cannot be updated')
|
|
1108
|
+
raise excs.Error(f'Column {col_name!r} is a media column and cannot be updated')
|
|
1058
1109
|
|
|
1059
1110
|
# make sure that the value is compatible with the column type
|
|
1060
1111
|
value_expr: exprs.Expr
|
|
@@ -1064,19 +1115,19 @@ class TableVersion:
|
|
|
1064
1115
|
except (TypeError, jsonschema.exceptions.ValidationError) as exc:
|
|
1065
1116
|
if not allow_exprs:
|
|
1066
1117
|
raise excs.Error(
|
|
1067
|
-
f'Column {col_name}: value
|
|
1068
|
-
f'(expected {col.col_type})'
|
|
1118
|
+
f'Column {col_name!r}: value is not a valid literal for this column '
|
|
1119
|
+
f'(expected `{col.col_type}`): {val!r}'
|
|
1069
1120
|
) from exc
|
|
1070
1121
|
# it's not a literal, let's try to create an expr from it
|
|
1071
1122
|
value_expr = exprs.Expr.from_object(val)
|
|
1072
1123
|
if value_expr is None:
|
|
1073
1124
|
raise excs.Error(
|
|
1074
|
-
f'Column {col_name}: value
|
|
1125
|
+
f'Column {col_name!r}: value is not a recognized literal or expression: {val!r}'
|
|
1075
1126
|
) from exc
|
|
1076
1127
|
if not col.col_type.is_supertype_of(value_expr.col_type, ignore_nullable=True):
|
|
1077
1128
|
raise excs.Error(
|
|
1078
|
-
f'Type of value {val!r}
|
|
1079
|
-
f'{
|
|
1129
|
+
f'Type `{value_expr.col_type}` of value {val!r} is not compatible with the type '
|
|
1130
|
+
f'`{col.col_type}` of column {col_name!r}'
|
|
1080
1131
|
) from exc
|
|
1081
1132
|
update_targets[col] = value_expr
|
|
1082
1133
|
|
|
@@ -1094,9 +1145,9 @@ class TableVersion:
|
|
|
1094
1145
|
assert len(col_names) == 1 or not errors_only
|
|
1095
1146
|
|
|
1096
1147
|
target_columns = [self.cols_by_name[name] for name in col_names]
|
|
1097
|
-
where_clause:
|
|
1148
|
+
where_clause: exprs.Expr | None = None
|
|
1098
1149
|
if where is not None:
|
|
1099
|
-
self._validate_where_clause(where, error_prefix=
|
|
1150
|
+
self._validate_where_clause(where, error_prefix='`where` argument')
|
|
1100
1151
|
where_clause = where
|
|
1101
1152
|
if errors_only:
|
|
1102
1153
|
errortype_pred = (
|
|
@@ -1122,10 +1173,10 @@ class TableVersion:
|
|
|
1122
1173
|
|
|
1123
1174
|
def propagate_update(
|
|
1124
1175
|
self,
|
|
1125
|
-
plan:
|
|
1126
|
-
where_clause:
|
|
1176
|
+
plan: exec.ExecNode | None,
|
|
1177
|
+
where_clause: sql.ColumnElement | None,
|
|
1127
1178
|
recomputed_view_cols: list[Column],
|
|
1128
|
-
base_versions: list[
|
|
1179
|
+
base_versions: list[int | None],
|
|
1129
1180
|
timestamp: float,
|
|
1130
1181
|
cascade: bool,
|
|
1131
1182
|
show_progress: bool = True,
|
|
@@ -1153,7 +1204,7 @@ class TableVersion:
|
|
|
1153
1204
|
base_versions = [None if plan is None else self.version, *base_versions] # don't update in place
|
|
1154
1205
|
# propagate to views
|
|
1155
1206
|
for view in self.mutable_views:
|
|
1156
|
-
recomputed_cols = [col for col in recomputed_view_cols if col.
|
|
1207
|
+
recomputed_cols = [col for col in recomputed_view_cols if col.get_tbl().id == view.id]
|
|
1157
1208
|
plan = None
|
|
1158
1209
|
if len(recomputed_cols) > 0:
|
|
1159
1210
|
plan = Planner.create_view_update_plan(view.get().path, recompute_targets=recomputed_cols)
|
|
@@ -1173,21 +1224,21 @@ class TableVersion:
|
|
|
1173
1224
|
from pixeltable.plan import Planner
|
|
1174
1225
|
|
|
1175
1226
|
if not isinstance(pred, Expr):
|
|
1176
|
-
raise excs.Error(f'{error_prefix} must be a
|
|
1227
|
+
raise excs.Error(f'{error_prefix} must be a valid Pixeltable expression; got `{type(pred)}`')
|
|
1177
1228
|
analysis_info = Planner.analyze(self.path, pred)
|
|
1178
1229
|
# for now we require that the updated rows can be identified via SQL, rather than via a Python filter
|
|
1179
1230
|
if analysis_info.filter is not None:
|
|
1180
|
-
raise excs.Error(f'Filter
|
|
1231
|
+
raise excs.Error(f'Filter not expressible in SQL: {analysis_info.filter}')
|
|
1181
1232
|
|
|
1182
1233
|
def delete(self, where: exprs.Expr | None = None) -> UpdateStatus:
|
|
1183
1234
|
assert self.is_insertable
|
|
1184
1235
|
if where is not None:
|
|
1185
|
-
self._validate_where_clause(where, error_prefix=
|
|
1236
|
+
self._validate_where_clause(where, error_prefix='`where` argument')
|
|
1186
1237
|
status = self.propagate_delete(where, base_versions=[], timestamp=time.time())
|
|
1187
1238
|
return status
|
|
1188
1239
|
|
|
1189
1240
|
def propagate_delete(
|
|
1190
|
-
self, where:
|
|
1241
|
+
self, where: exprs.Expr | None, base_versions: list[int | None], timestamp: float
|
|
1191
1242
|
) -> UpdateStatus:
|
|
1192
1243
|
"""Delete rows in this table and propagate to views"""
|
|
1193
1244
|
from pixeltable.catalog import Catalog
|
|
@@ -1252,7 +1303,7 @@ class TableVersion:
|
|
|
1252
1303
|
names = [row[1] for row in result]
|
|
1253
1304
|
raise excs.Error(
|
|
1254
1305
|
(
|
|
1255
|
-
f'Current version is needed for {len(result)} snapshot{"s" if len(result) > 1 else ""} '
|
|
1306
|
+
f'Current version is needed for {len(result)} snapshot{"s" if len(result) > 1 else ""}: '
|
|
1256
1307
|
f'({", ".join(names)})'
|
|
1257
1308
|
)
|
|
1258
1309
|
)
|
|
@@ -1261,7 +1312,7 @@ class TableVersion:
|
|
|
1261
1312
|
|
|
1262
1313
|
# revert new deletions
|
|
1263
1314
|
set_clause: dict[sql.Column, Any] = {self.store_tbl.sa_tbl.c.v_max: schema.Table.MAX_VERSION}
|
|
1264
|
-
for index_info in self.
|
|
1315
|
+
for index_info in self.idxs.values():
|
|
1265
1316
|
# copy the index value back from the undo column and reset the undo column to NULL
|
|
1266
1317
|
set_clause[index_info.val_col.sa_col] = index_info.undo_col.sa_col
|
|
1267
1318
|
set_clause[index_info.undo_col.sa_col] = None
|
|
@@ -1335,16 +1386,18 @@ class TableVersion:
|
|
|
1335
1386
|
# Do this at the end, after all DB operations have completed.
|
|
1336
1387
|
# TODO: The transaction could still fail. Really this should be done via PendingTableOps.
|
|
1337
1388
|
self.delete_media(tbl_version=old_version)
|
|
1338
|
-
_logger.info(f'TableVersion {self.name}: reverted to version {self.version}')
|
|
1389
|
+
_logger.info(f'TableVersion {self.name!r}: reverted to version {self.version}')
|
|
1339
1390
|
|
|
1340
1391
|
def _init_external_stores(self) -> None:
|
|
1392
|
+
from pixeltable.io.external_store import ExternalStore
|
|
1393
|
+
|
|
1341
1394
|
for store_md in self.tbl_md.external_stores:
|
|
1342
1395
|
store_cls = resolve_symbol(store_md['class'])
|
|
1343
|
-
assert isinstance(store_cls, type) and issubclass(store_cls,
|
|
1396
|
+
assert isinstance(store_cls, type) and issubclass(store_cls, ExternalStore)
|
|
1344
1397
|
store = store_cls.from_dict(store_md['md'])
|
|
1345
1398
|
self.external_stores[store.name] = store
|
|
1346
1399
|
|
|
1347
|
-
def link_external_store(self, store:
|
|
1400
|
+
def link_external_store(self, store: ExternalStore) -> None:
|
|
1348
1401
|
self.bump_version(bump_schema_version=True)
|
|
1349
1402
|
|
|
1350
1403
|
self.external_stores[store.name] = store
|
|
@@ -1353,7 +1406,7 @@ class TableVersion:
|
|
|
1353
1406
|
)
|
|
1354
1407
|
self._write_md(new_version=True, new_schema_version=True)
|
|
1355
1408
|
|
|
1356
|
-
def unlink_external_store(self, store:
|
|
1409
|
+
def unlink_external_store(self, store: ExternalStore) -> None:
|
|
1357
1410
|
del self.external_stores[store.name]
|
|
1358
1411
|
self.bump_version(bump_schema_version=True)
|
|
1359
1412
|
idx = next(i for i, store_md in enumerate(self._tbl_md.external_stores) if store_md['md']['name'] == store.name)
|
|
@@ -1373,7 +1426,7 @@ class TableVersion:
|
|
|
1373
1426
|
return self._schema_version_md
|
|
1374
1427
|
|
|
1375
1428
|
@property
|
|
1376
|
-
def view_md(self) ->
|
|
1429
|
+
def view_md(self) -> schema.ViewMd | None:
|
|
1377
1430
|
return self._tbl_md.view_md
|
|
1378
1431
|
|
|
1379
1432
|
@property
|
|
@@ -1381,7 +1434,7 @@ class TableVersion:
|
|
|
1381
1434
|
return self._tbl_md.name
|
|
1382
1435
|
|
|
1383
1436
|
@property
|
|
1384
|
-
def user(self) ->
|
|
1437
|
+
def user(self) -> str | None:
|
|
1385
1438
|
return self._tbl_md.user
|
|
1386
1439
|
|
|
1387
1440
|
@property
|
|
@@ -1419,7 +1472,7 @@ class TableVersion:
|
|
|
1419
1472
|
def schema_version(self) -> int:
|
|
1420
1473
|
return self._schema_version_md.schema_version
|
|
1421
1474
|
|
|
1422
|
-
def bump_version(self, timestamp:
|
|
1475
|
+
def bump_version(self, timestamp: float | None = None, *, bump_schema_version: bool) -> None:
|
|
1423
1476
|
"""
|
|
1424
1477
|
Increments the table version and adjusts all associated metadata. This will *not* trigger a database action;
|
|
1425
1478
|
_write_md() must be called separately to persist the changes.
|
|
@@ -1456,11 +1509,11 @@ class TableVersion:
|
|
|
1456
1509
|
self._schema_version_md.schema_version = new_version
|
|
1457
1510
|
|
|
1458
1511
|
@property
|
|
1459
|
-
def preceding_schema_version(self) ->
|
|
1512
|
+
def preceding_schema_version(self) -> int | None:
|
|
1460
1513
|
return self._schema_version_md.preceding_schema_version
|
|
1461
1514
|
|
|
1462
1515
|
@property
|
|
1463
|
-
def update_status(self) ->
|
|
1516
|
+
def update_status(self) -> UpdateStatus | None:
|
|
1464
1517
|
return self._version_md.update_status
|
|
1465
1518
|
|
|
1466
1519
|
@update_status.setter
|
|
@@ -1472,14 +1525,10 @@ class TableVersion:
|
|
|
1472
1525
|
def media_validation(self) -> MediaValidation:
|
|
1473
1526
|
return MediaValidation[self._schema_version_md.media_validation.upper()]
|
|
1474
1527
|
|
|
1475
|
-
@property
|
|
1476
1528
|
def next_col_id(self) -> int:
|
|
1477
|
-
|
|
1478
|
-
|
|
1479
|
-
|
|
1480
|
-
def next_col_id(self, id: int) -> None:
|
|
1481
|
-
assert self.effective_version is None
|
|
1482
|
-
self._tbl_md.next_col_id = id
|
|
1529
|
+
val = self._tbl_md.next_col_id
|
|
1530
|
+
self._tbl_md.next_col_id += 1
|
|
1531
|
+
return val
|
|
1483
1532
|
|
|
1484
1533
|
@property
|
|
1485
1534
|
def next_idx_id(self) -> int:
|
|
@@ -1558,15 +1607,35 @@ class TableVersion:
|
|
|
1558
1607
|
return names
|
|
1559
1608
|
|
|
1560
1609
|
def get_idx_val_columns(self, cols: Iterable[Column]) -> set[Column]:
|
|
1561
|
-
|
|
1562
|
-
|
|
1610
|
+
# assumes that the indexed columns are all in this table
|
|
1611
|
+
assert all(col.get_tbl().id == self.id for col in cols)
|
|
1612
|
+
col_ids = {col.id for col in cols}
|
|
1613
|
+
return {info.val_col for info in self.idxs.values() if info.col.id in col_ids}
|
|
1614
|
+
|
|
1615
|
+
def get_idx(self, col: Column, idx_name: str | None, idx_cls: type[index.IndexBase]) -> TableVersion.IndexInfo:
|
|
1616
|
+
if not self.supports_idxs:
|
|
1617
|
+
raise excs.Error('Snapshot does not support indices')
|
|
1618
|
+
if col.qid not in self.idxs_by_col:
|
|
1619
|
+
raise excs.Error(f'Column {col.name!r} does not have a {idx_cls.display_name()} index')
|
|
1620
|
+
candidates = [info for info in self.idxs_by_col[col.qid] if isinstance(info.idx, idx_cls)]
|
|
1621
|
+
if len(candidates) == 0:
|
|
1622
|
+
raise excs.Error(f'No {idx_cls.display_name()} index found for column {col.name!r}')
|
|
1623
|
+
if len(candidates) > 1 and idx_name is None:
|
|
1624
|
+
raise excs.Error(
|
|
1625
|
+
f'Column {col.name!r} has multiple {idx_cls.display_name()} indices; specify `idx_name` instead'
|
|
1626
|
+
)
|
|
1627
|
+
if idx_name is not None and idx_name not in [info.name for info in candidates]:
|
|
1628
|
+
raise excs.Error(f'Index {idx_name!r} not found for column {col.name!r}')
|
|
1629
|
+
return candidates[0] if idx_name is None else next(info for info in candidates if info.name == idx_name)
|
|
1563
1630
|
|
|
1564
1631
|
def get_dependent_columns(self, cols: Iterable[Column]) -> set[Column]:
|
|
1565
1632
|
"""
|
|
1566
1633
|
Return the set of columns that transitively depend on any of the given ones.
|
|
1567
1634
|
"""
|
|
1568
|
-
|
|
1569
|
-
|
|
1635
|
+
from pixeltable.catalog import Catalog
|
|
1636
|
+
|
|
1637
|
+
cat = Catalog.get()
|
|
1638
|
+
result = set().union(*[cat.get_column_dependents(col.get_tbl().id, col.id) for col in cols])
|
|
1570
1639
|
if len(result) > 0:
|
|
1571
1640
|
result.update(self.get_dependent_columns(result))
|
|
1572
1641
|
return result
|
|
@@ -1578,7 +1647,7 @@ class TableVersion:
|
|
|
1578
1647
|
return 1
|
|
1579
1648
|
|
|
1580
1649
|
@classmethod
|
|
1581
|
-
def _create_stores_md(cls, stores: Iterable[
|
|
1650
|
+
def _create_stores_md(cls, stores: Iterable[ExternalStore]) -> list[dict[str, Any]]:
|
|
1582
1651
|
return [
|
|
1583
1652
|
{'class': f'{type(store).__module__}.{type(store).__qualname__}', 'md': store.as_dict()} for store in stores
|
|
1584
1653
|
]
|