pixeltable 0.4.18__py3-none-any.whl → 0.4.19__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of pixeltable might be problematic. Click here for more details.
- pixeltable/__init__.py +1 -1
- pixeltable/_version.py +1 -0
- pixeltable/catalog/catalog.py +119 -100
- pixeltable/catalog/column.py +104 -115
- pixeltable/catalog/globals.py +1 -2
- pixeltable/catalog/insertable_table.py +44 -49
- pixeltable/catalog/path.py +3 -4
- pixeltable/catalog/schema_object.py +4 -4
- pixeltable/catalog/table.py +118 -122
- pixeltable/catalog/table_metadata.py +6 -6
- pixeltable/catalog/table_version.py +322 -257
- pixeltable/catalog/table_version_handle.py +4 -4
- pixeltable/catalog/table_version_path.py +9 -10
- pixeltable/catalog/tbl_ops.py +9 -3
- pixeltable/catalog/view.py +34 -28
- pixeltable/config.py +14 -10
- pixeltable/dataframe.py +68 -77
- pixeltable/env.py +74 -64
- pixeltable/exec/aggregation_node.py +6 -6
- pixeltable/exec/cache_prefetch_node.py +10 -10
- pixeltable/exec/data_row_batch.py +3 -3
- pixeltable/exec/exec_context.py +4 -5
- pixeltable/exec/exec_node.py +5 -5
- pixeltable/exec/expr_eval/evaluators.py +6 -6
- pixeltable/exec/expr_eval/expr_eval_node.py +8 -7
- pixeltable/exec/expr_eval/globals.py +6 -6
- pixeltable/exec/expr_eval/row_buffer.py +1 -2
- pixeltable/exec/expr_eval/schedulers.py +11 -11
- pixeltable/exec/in_memory_data_node.py +2 -2
- pixeltable/exec/object_store_save_node.py +14 -17
- pixeltable/exec/sql_node.py +25 -25
- pixeltable/exprs/arithmetic_expr.py +4 -4
- pixeltable/exprs/array_slice.py +2 -2
- pixeltable/exprs/column_property_ref.py +3 -3
- pixeltable/exprs/column_ref.py +61 -74
- pixeltable/exprs/comparison.py +5 -5
- pixeltable/exprs/compound_predicate.py +3 -3
- pixeltable/exprs/data_row.py +12 -12
- pixeltable/exprs/expr.py +41 -31
- pixeltable/exprs/expr_dict.py +3 -3
- pixeltable/exprs/expr_set.py +3 -3
- pixeltable/exprs/function_call.py +14 -14
- pixeltable/exprs/in_predicate.py +4 -4
- pixeltable/exprs/inline_expr.py +8 -8
- pixeltable/exprs/is_null.py +1 -3
- pixeltable/exprs/json_mapper.py +8 -8
- pixeltable/exprs/json_path.py +6 -6
- pixeltable/exprs/literal.py +5 -5
- pixeltable/exprs/method_ref.py +2 -2
- pixeltable/exprs/object_ref.py +2 -2
- pixeltable/exprs/row_builder.py +14 -14
- pixeltable/exprs/rowid_ref.py +8 -8
- pixeltable/exprs/similarity_expr.py +50 -25
- pixeltable/exprs/sql_element_cache.py +4 -4
- pixeltable/exprs/string_op.py +2 -2
- pixeltable/exprs/type_cast.py +3 -5
- pixeltable/func/aggregate_function.py +8 -8
- pixeltable/func/callable_function.py +9 -9
- pixeltable/func/expr_template_function.py +3 -3
- pixeltable/func/function.py +15 -17
- pixeltable/func/function_registry.py +6 -7
- pixeltable/func/globals.py +2 -3
- pixeltable/func/mcp.py +2 -2
- pixeltable/func/query_template_function.py +16 -16
- pixeltable/func/signature.py +14 -14
- pixeltable/func/tools.py +11 -11
- pixeltable/func/udf.py +16 -18
- pixeltable/functions/__init__.py +1 -0
- pixeltable/functions/anthropic.py +7 -7
- pixeltable/functions/audio.py +76 -0
- pixeltable/functions/bedrock.py +6 -6
- pixeltable/functions/deepseek.py +4 -4
- pixeltable/functions/fireworks.py +2 -2
- pixeltable/functions/gemini.py +6 -6
- pixeltable/functions/globals.py +12 -12
- pixeltable/functions/groq.py +4 -4
- pixeltable/functions/huggingface.py +18 -20
- pixeltable/functions/image.py +7 -10
- pixeltable/functions/llama_cpp.py +7 -7
- pixeltable/functions/math.py +2 -3
- pixeltable/functions/mistralai.py +3 -3
- pixeltable/functions/ollama.py +9 -9
- pixeltable/functions/openai.py +21 -21
- pixeltable/functions/openrouter.py +7 -7
- pixeltable/functions/string.py +21 -28
- pixeltable/functions/timestamp.py +7 -8
- pixeltable/functions/together.py +4 -6
- pixeltable/functions/twelvelabs.py +92 -0
- pixeltable/functions/video.py +2 -24
- pixeltable/functions/vision.py +6 -6
- pixeltable/functions/whisper.py +7 -7
- pixeltable/functions/whisperx.py +16 -16
- pixeltable/globals.py +52 -36
- pixeltable/index/base.py +12 -8
- pixeltable/index/btree.py +19 -22
- pixeltable/index/embedding_index.py +30 -39
- pixeltable/io/datarows.py +3 -3
- pixeltable/io/external_store.py +13 -16
- pixeltable/io/fiftyone.py +5 -5
- pixeltable/io/globals.py +5 -5
- pixeltable/io/hf_datasets.py +4 -4
- pixeltable/io/label_studio.py +12 -12
- pixeltable/io/pandas.py +6 -6
- pixeltable/io/parquet.py +2 -2
- pixeltable/io/table_data_conduit.py +12 -12
- pixeltable/io/utils.py +2 -2
- pixeltable/iterators/audio.py +2 -2
- pixeltable/iterators/video.py +8 -13
- pixeltable/metadata/converters/convert_18.py +2 -2
- pixeltable/metadata/converters/convert_19.py +2 -2
- pixeltable/metadata/converters/convert_20.py +2 -2
- pixeltable/metadata/converters/convert_21.py +2 -2
- pixeltable/metadata/converters/convert_22.py +2 -2
- pixeltable/metadata/converters/convert_24.py +2 -2
- pixeltable/metadata/converters/convert_25.py +2 -2
- pixeltable/metadata/converters/convert_26.py +2 -2
- pixeltable/metadata/converters/convert_29.py +4 -4
- pixeltable/metadata/converters/convert_34.py +2 -2
- pixeltable/metadata/converters/convert_36.py +2 -2
- pixeltable/metadata/converters/convert_38.py +2 -2
- pixeltable/metadata/converters/convert_39.py +1 -2
- pixeltable/metadata/converters/util.py +11 -13
- pixeltable/metadata/schema.py +22 -21
- pixeltable/metadata/utils.py +2 -6
- pixeltable/mypy/mypy_plugin.py +5 -5
- pixeltable/plan.py +30 -28
- pixeltable/share/packager.py +7 -7
- pixeltable/share/publish.py +3 -3
- pixeltable/store.py +125 -61
- pixeltable/type_system.py +43 -46
- pixeltable/utils/__init__.py +1 -2
- pixeltable/utils/arrow.py +4 -4
- pixeltable/utils/av.py +8 -0
- pixeltable/utils/azure_store.py +305 -0
- pixeltable/utils/code.py +1 -2
- pixeltable/utils/dbms.py +15 -19
- pixeltable/utils/description_helper.py +2 -3
- pixeltable/utils/documents.py +5 -6
- pixeltable/utils/exception_handler.py +2 -2
- pixeltable/utils/filecache.py +5 -5
- pixeltable/utils/formatter.py +4 -6
- pixeltable/utils/gcs_store.py +9 -9
- pixeltable/utils/local_store.py +17 -17
- pixeltable/utils/object_stores.py +59 -43
- pixeltable/utils/s3_store.py +35 -30
- {pixeltable-0.4.18.dist-info → pixeltable-0.4.19.dist-info}/METADATA +1 -1
- pixeltable-0.4.19.dist-info/RECORD +213 -0
- pixeltable/__version__.py +0 -3
- pixeltable-0.4.18.dist-info/RECORD +0 -211
- {pixeltable-0.4.18.dist-info → pixeltable-0.4.19.dist-info}/WHEEL +0 -0
- {pixeltable-0.4.18.dist-info → pixeltable-0.4.19.dist-info}/entry_points.txt +0 -0
- {pixeltable-0.4.18.dist-info → pixeltable-0.4.19.dist-info}/licenses/LICENSE +0 -0
|
@@ -3,19 +3,21 @@ from __future__ import annotations
|
|
|
3
3
|
import copy
|
|
4
4
|
import dataclasses
|
|
5
5
|
import importlib
|
|
6
|
+
import itertools
|
|
6
7
|
import logging
|
|
7
8
|
import time
|
|
8
9
|
import uuid
|
|
9
|
-
from typing import TYPE_CHECKING, Any, Iterable, Iterator, Literal
|
|
10
|
+
from typing import TYPE_CHECKING, Any, Callable, Iterable, Iterator, Literal
|
|
10
11
|
from uuid import UUID
|
|
11
12
|
|
|
12
13
|
import jsonschema.exceptions
|
|
13
14
|
import sqlalchemy as sql
|
|
14
15
|
from sqlalchemy import exc as sql_exc
|
|
15
16
|
|
|
16
|
-
import pixeltable as pxt
|
|
17
17
|
import pixeltable.exceptions as excs
|
|
18
|
-
|
|
18
|
+
import pixeltable.exprs as exprs
|
|
19
|
+
import pixeltable.index as index
|
|
20
|
+
import pixeltable.type_system as ts
|
|
19
21
|
from pixeltable.env import Env
|
|
20
22
|
from pixeltable.iterators import ComponentIterator
|
|
21
23
|
from pixeltable.metadata import schema
|
|
@@ -31,8 +33,12 @@ from .update_status import RowCountStats, UpdateStatus
|
|
|
31
33
|
if TYPE_CHECKING:
|
|
32
34
|
from pixeltable import exec, store
|
|
33
35
|
from pixeltable.catalog.table_version_handle import TableVersionHandle
|
|
36
|
+
from pixeltable.dataframe import DataFrame
|
|
37
|
+
from pixeltable.io import ExternalStore
|
|
34
38
|
from pixeltable.plan import SampleClause
|
|
35
39
|
|
|
40
|
+
from .table_version_path import TableVersionPath
|
|
41
|
+
|
|
36
42
|
_logger = logging.getLogger('pixeltable')
|
|
37
43
|
|
|
38
44
|
|
|
@@ -77,14 +83,14 @@ class TableVersion:
|
|
|
77
83
|
_version_md: schema.TableVersionMd
|
|
78
84
|
_schema_version_md: schema.TableSchemaVersionMd
|
|
79
85
|
|
|
80
|
-
effective_version:
|
|
81
|
-
path:
|
|
82
|
-
base:
|
|
83
|
-
predicate:
|
|
84
|
-
sample_clause:
|
|
86
|
+
effective_version: int | None
|
|
87
|
+
path: 'TableVersionPath' | None # only set for live tables; needed to resolve computed cols
|
|
88
|
+
base: TableVersionHandle | None # only set for views
|
|
89
|
+
predicate: exprs.Expr | None
|
|
90
|
+
sample_clause: 'SampleClause' | None
|
|
85
91
|
|
|
86
|
-
iterator_cls:
|
|
87
|
-
iterator_args:
|
|
92
|
+
iterator_cls: type[ComponentIterator] | None
|
|
93
|
+
iterator_args: exprs.InlineDict | None
|
|
88
94
|
num_iterator_cols: int
|
|
89
95
|
|
|
90
96
|
# target for data operation propagation (only set for non-snapshots, and only records non-snapshot views)
|
|
@@ -96,13 +102,19 @@ class TableVersion:
|
|
|
96
102
|
cols_by_name: dict[str, Column]
|
|
97
103
|
# contains only columns visible in this version, both system and user
|
|
98
104
|
cols_by_id: dict[int, Column]
|
|
99
|
-
|
|
100
|
-
|
|
101
|
-
#
|
|
105
|
+
|
|
106
|
+
# True if this TableVersion instance can have indices:
|
|
107
|
+
# - live version of a mutable table
|
|
108
|
+
# - the most recent version of a replica
|
|
109
|
+
supports_idxs: bool
|
|
110
|
+
|
|
111
|
+
# only populated with indices visible in this TableVersion instance
|
|
112
|
+
idxs: dict[int, TableVersion.IndexInfo] # key: index id
|
|
102
113
|
idxs_by_name: dict[str, TableVersion.IndexInfo]
|
|
114
|
+
idxs_by_col: dict[QColumnId, list[TableVersion.IndexInfo]]
|
|
103
115
|
|
|
104
|
-
external_stores: dict[str,
|
|
105
|
-
store_tbl:
|
|
116
|
+
external_stores: dict[str, ExternalStore]
|
|
117
|
+
store_tbl: 'store.StoreBase' | None
|
|
106
118
|
|
|
107
119
|
is_initialized: bool # True if init() has been called
|
|
108
120
|
|
|
@@ -125,18 +137,12 @@ class TableVersion:
|
|
|
125
137
|
id: UUID,
|
|
126
138
|
tbl_md: schema.TableMd,
|
|
127
139
|
version_md: schema.TableVersionMd,
|
|
128
|
-
effective_version:
|
|
140
|
+
effective_version: int | None,
|
|
129
141
|
schema_version_md: schema.TableSchemaVersionMd,
|
|
130
142
|
mutable_views: list[TableVersionHandle],
|
|
131
|
-
base_path:
|
|
132
|
-
base:
|
|
143
|
+
base_path: 'TableVersionPath' | None = None,
|
|
144
|
+
base: TableVersionHandle | None = None,
|
|
133
145
|
):
|
|
134
|
-
from pixeltable import exprs
|
|
135
|
-
from pixeltable.plan import SampleClause
|
|
136
|
-
|
|
137
|
-
from .table_version_handle import TableVersionHandle
|
|
138
|
-
from .table_version_path import TableVersionPath
|
|
139
|
-
|
|
140
146
|
self.is_validated = True # a freshly constructed instance is always valid
|
|
141
147
|
self.is_initialized = False
|
|
142
148
|
self.id = id
|
|
@@ -149,6 +155,9 @@ class TableVersion:
|
|
|
149
155
|
self.store_tbl = None
|
|
150
156
|
|
|
151
157
|
# mutable tables need their TableVersionPath for expr eval during updates
|
|
158
|
+
from .table_version_handle import TableVersionHandle
|
|
159
|
+
from .table_version_path import TableVersionPath
|
|
160
|
+
|
|
152
161
|
if self.is_snapshot:
|
|
153
162
|
self.path = None
|
|
154
163
|
else:
|
|
@@ -158,6 +167,9 @@ class TableVersion:
|
|
|
158
167
|
self.path = TableVersionPath(self_handle, base=base_path)
|
|
159
168
|
|
|
160
169
|
# view-specific initialization
|
|
170
|
+
from pixeltable import exprs
|
|
171
|
+
from pixeltable.plan import SampleClause
|
|
172
|
+
|
|
161
173
|
predicate_dict = None if self.view_md is None or self.view_md.predicate is None else self.view_md.predicate
|
|
162
174
|
self.predicate = exprs.Expr.from_dict(predicate_dict) if predicate_dict is not None else None
|
|
163
175
|
sample_dict = None if self.view_md is None or self.view_md.sample_clause is None else self.view_md.sample_clause
|
|
@@ -182,8 +194,12 @@ class TableVersion:
|
|
|
182
194
|
self.cols = []
|
|
183
195
|
self.cols_by_name = {}
|
|
184
196
|
self.cols_by_id = {}
|
|
185
|
-
self.
|
|
197
|
+
self.idxs = {}
|
|
186
198
|
self.idxs_by_name = {}
|
|
199
|
+
self.idxs_by_col = {}
|
|
200
|
+
self.supports_idxs = self.effective_version is None or (
|
|
201
|
+
self.is_replica and self.effective_version == self.tbl_md.current_version
|
|
202
|
+
)
|
|
187
203
|
self.external_stores = {}
|
|
188
204
|
|
|
189
205
|
def __hash__(self) -> int:
|
|
@@ -222,19 +238,27 @@ class TableVersion:
|
|
|
222
238
|
num_retained_versions: int,
|
|
223
239
|
comment: str,
|
|
224
240
|
media_validation: MediaValidation,
|
|
225
|
-
|
|
241
|
+
create_default_idxs: bool,
|
|
242
|
+
view_md: schema.ViewMd | None = None,
|
|
226
243
|
) -> TableVersionMd:
|
|
244
|
+
from .table_version_handle import TableVersionHandle
|
|
245
|
+
|
|
227
246
|
user = Env.get().user
|
|
228
247
|
timestamp = time.time()
|
|
229
248
|
|
|
249
|
+
tbl_id = uuid.uuid4()
|
|
250
|
+
tbl_id_str = str(tbl_id)
|
|
251
|
+
tbl_handle = TableVersionHandle(tbl_id, None)
|
|
252
|
+
column_ids = itertools.count(0)
|
|
253
|
+
index_ids = itertools.count(0)
|
|
254
|
+
|
|
230
255
|
# assign ids, create metadata
|
|
231
|
-
cols_by_name: dict[str, Column] = {}
|
|
232
256
|
column_md: dict[int, schema.ColumnMd] = {}
|
|
233
257
|
schema_col_md: dict[int, schema.SchemaColumn] = {}
|
|
234
258
|
for pos, col in enumerate(cols):
|
|
235
|
-
col.
|
|
259
|
+
col.tbl_handle = tbl_handle
|
|
260
|
+
col.id = next(column_ids)
|
|
236
261
|
col.schema_version_add = 0
|
|
237
|
-
cols_by_name[col.name] = col
|
|
238
262
|
if col.is_computed:
|
|
239
263
|
col.check_value_expr()
|
|
240
264
|
col_md, sch_md = col.to_md(pos)
|
|
@@ -242,8 +266,39 @@ class TableVersion:
|
|
|
242
266
|
column_md[col.id] = col_md
|
|
243
267
|
schema_col_md[col.id] = sch_md
|
|
244
268
|
|
|
245
|
-
|
|
246
|
-
|
|
269
|
+
index_md: dict[int, schema.IndexMd] = {}
|
|
270
|
+
if create_default_idxs and (view_md is None or not view_md.is_snapshot):
|
|
271
|
+
index_cols: list[Column] = []
|
|
272
|
+
for col in (c for c in cols if cls._is_btree_indexable(c)):
|
|
273
|
+
idx = index.BtreeIndex()
|
|
274
|
+
val_col, undo_col = cls._create_index_columns(col, idx, 0, tbl_handle, id_cb=lambda: next(column_ids))
|
|
275
|
+
index_cols.extend([val_col, undo_col])
|
|
276
|
+
|
|
277
|
+
idx_id = next(index_ids)
|
|
278
|
+
idx_cls = type(idx)
|
|
279
|
+
md = schema.IndexMd(
|
|
280
|
+
id=idx_id,
|
|
281
|
+
name=f'idx{idx_id}',
|
|
282
|
+
indexed_col_id=col.id,
|
|
283
|
+
indexed_col_tbl_id=tbl_id_str,
|
|
284
|
+
index_val_col_id=val_col.id,
|
|
285
|
+
index_val_undo_col_id=undo_col.id,
|
|
286
|
+
schema_version_add=0,
|
|
287
|
+
schema_version_drop=None,
|
|
288
|
+
class_fqn=idx_cls.__module__ + '.' + idx_cls.__name__,
|
|
289
|
+
init_args=idx.as_dict(),
|
|
290
|
+
)
|
|
291
|
+
index_md[idx_id] = md
|
|
292
|
+
|
|
293
|
+
for col in index_cols:
|
|
294
|
+
col_md, _ = col.to_md()
|
|
295
|
+
column_md[col.id] = col_md
|
|
296
|
+
|
|
297
|
+
assert all(column_md[id].id == id for id in column_md)
|
|
298
|
+
assert all(index_md[id].id == id for id in index_md)
|
|
299
|
+
|
|
300
|
+
cols.extend(index_cols)
|
|
301
|
+
|
|
247
302
|
tbl_md = schema.TableMd(
|
|
248
303
|
tbl_id=tbl_id_str,
|
|
249
304
|
name=name,
|
|
@@ -251,12 +306,12 @@ class TableVersion:
|
|
|
251
306
|
is_replica=False,
|
|
252
307
|
current_version=0,
|
|
253
308
|
current_schema_version=0,
|
|
254
|
-
next_col_id=
|
|
255
|
-
next_idx_id=
|
|
309
|
+
next_col_id=next(column_ids),
|
|
310
|
+
next_idx_id=next(index_ids),
|
|
256
311
|
next_row_id=0,
|
|
257
312
|
view_sn=0,
|
|
258
313
|
column_md=column_md,
|
|
259
|
-
index_md=
|
|
314
|
+
index_md=index_md,
|
|
260
315
|
external_stores=[],
|
|
261
316
|
view_md=view_md,
|
|
262
317
|
additional_md={},
|
|
@@ -284,51 +339,15 @@ class TableVersion:
|
|
|
284
339
|
)
|
|
285
340
|
return TableVersionMd(tbl_md, table_version_md, schema_version_md)
|
|
286
341
|
|
|
287
|
-
@classmethod
|
|
288
|
-
def create(
|
|
289
|
-
cls,
|
|
290
|
-
dir_id: UUID,
|
|
291
|
-
name: str,
|
|
292
|
-
cols: list[Column],
|
|
293
|
-
num_retained_versions: int,
|
|
294
|
-
comment: str,
|
|
295
|
-
media_validation: MediaValidation,
|
|
296
|
-
) -> tuple[UUID, Optional[TableVersion]]:
|
|
297
|
-
initial_md = cls.create_initial_md(name, cols, num_retained_versions, comment, media_validation, view_md=None)
|
|
298
|
-
cat = pxt.catalog.Catalog.get()
|
|
299
|
-
|
|
300
|
-
tbl_id = UUID(hex=initial_md.tbl_md.tbl_id)
|
|
301
|
-
assert (tbl_id, None) not in cat._tbl_versions
|
|
302
|
-
tbl_version = cls(tbl_id, initial_md.tbl_md, initial_md.version_md, None, initial_md.schema_version_md, [])
|
|
303
|
-
|
|
304
|
-
@cat.register_undo_action
|
|
305
|
-
def _() -> None:
|
|
306
|
-
if (tbl_id, None) in cat._tbl_versions:
|
|
307
|
-
del cat._tbl_versions[tbl_id, None]
|
|
308
|
-
|
|
309
|
-
# TODO: break this up, so that Catalog.create_table() registers tbl_version
|
|
310
|
-
cat._tbl_versions[tbl_id, None] = tbl_version
|
|
311
|
-
tbl_version.init()
|
|
312
|
-
tbl_version.store_tbl.create()
|
|
313
|
-
# add default indices, after creating the store table
|
|
314
|
-
for col in tbl_version.cols_by_name.values():
|
|
315
|
-
status = tbl_version._add_default_index(col)
|
|
316
|
-
assert status is None or status.num_excs == 0
|
|
317
|
-
|
|
318
|
-
cat.store_tbl_md(
|
|
319
|
-
tbl_id=tbl_id,
|
|
320
|
-
dir_id=dir_id,
|
|
321
|
-
tbl_md=tbl_version.tbl_md,
|
|
322
|
-
version_md=initial_md.version_md,
|
|
323
|
-
schema_version_md=initial_md.schema_version_md,
|
|
324
|
-
)
|
|
325
|
-
return tbl_id, tbl_version
|
|
326
|
-
|
|
327
342
|
def exec_op(self, op: TableOp) -> None:
|
|
328
343
|
if op.create_store_table_op is not None:
|
|
329
|
-
#
|
|
344
|
+
# this needs to be called outside of a transaction
|
|
345
|
+
self.store_tbl.create()
|
|
346
|
+
|
|
347
|
+
elif op.create_index_op is not None:
|
|
348
|
+
idx_info = self.idxs[op.create_index_op.idx_id]
|
|
330
349
|
with Env.get().begin_xact():
|
|
331
|
-
self.store_tbl.
|
|
350
|
+
self.store_tbl.create_index(idx_info.id)
|
|
332
351
|
|
|
333
352
|
elif op.load_view_op is not None:
|
|
334
353
|
from pixeltable.catalog import Catalog
|
|
@@ -347,7 +366,7 @@ class TableVersion:
|
|
|
347
366
|
|
|
348
367
|
@classmethod
|
|
349
368
|
def create_replica(cls, md: schema.FullTableMd) -> TableVersion:
|
|
350
|
-
from .catalog import TableVersionPath
|
|
369
|
+
from .catalog import Catalog, TableVersionPath
|
|
351
370
|
|
|
352
371
|
assert Env.get().in_xact
|
|
353
372
|
assert md.tbl_md.is_replica
|
|
@@ -366,7 +385,7 @@ class TableVersion:
|
|
|
366
385
|
base_path=base_path,
|
|
367
386
|
base=base,
|
|
368
387
|
)
|
|
369
|
-
cat =
|
|
388
|
+
cat = Catalog.get()
|
|
370
389
|
# We're creating a new TableVersion replica, so we should never have seen this particular
|
|
371
390
|
# TableVersion instance before.
|
|
372
391
|
# Actually this isn't true, because we might be re-creating a dropped replica.
|
|
@@ -376,10 +395,9 @@ class TableVersion:
|
|
|
376
395
|
cat._tbl_versions[tbl_version.id, tbl_version.effective_version] = tbl_version
|
|
377
396
|
tbl_version.init()
|
|
378
397
|
tbl_version.store_tbl.create()
|
|
379
|
-
tbl_version.store_tbl.ensure_updated_schema()
|
|
380
398
|
return tbl_version
|
|
381
399
|
|
|
382
|
-
def delete_media(self, tbl_version:
|
|
400
|
+
def delete_media(self, tbl_version: int | None = None) -> None:
|
|
383
401
|
# Assemble a set of column destinations and delete objects from all of them
|
|
384
402
|
# None is a valid column destination which refers to the default object location
|
|
385
403
|
destinations = {col.destination for col in self.cols if col.is_stored}
|
|
@@ -417,19 +435,21 @@ class TableVersion:
|
|
|
417
435
|
self.is_initialized = True
|
|
418
436
|
|
|
419
437
|
def _init_schema(self) -> None:
|
|
420
|
-
|
|
421
|
-
self._init_cols()
|
|
422
|
-
self._init_idxs()
|
|
423
|
-
|
|
424
|
-
# create the sa schema only after creating the columns and indices
|
|
425
|
-
self._init_sa_schema()
|
|
438
|
+
from pixeltable.store import StoreComponentView, StoreTable, StoreView
|
|
426
439
|
|
|
427
|
-
#
|
|
428
|
-
|
|
429
|
-
|
|
440
|
+
# initialize IndexBase instances and collect sa_col_types
|
|
441
|
+
idxs: dict[int, index.IndexBase] = {}
|
|
442
|
+
val_col_idxs: dict[int, index.IndexBase] = {} # key: id of value column
|
|
443
|
+
undo_col_idxs: dict[int, index.IndexBase] = {} # key: id of undo column
|
|
444
|
+
for md in self.tbl_md.index_md.values():
|
|
445
|
+
cls_name = md.class_fqn.rsplit('.', 1)[-1]
|
|
446
|
+
cls = getattr(index, cls_name)
|
|
447
|
+
idx = cls.from_dict(md.init_args)
|
|
448
|
+
idxs[md.id] = idx
|
|
449
|
+
val_col_idxs[md.index_val_col_id] = idx
|
|
450
|
+
undo_col_idxs[md.index_val_undo_col_id] = idx
|
|
430
451
|
|
|
431
|
-
|
|
432
|
-
"""Initialize self.cols with the columns visible in our effective version"""
|
|
452
|
+
# initialize Columns
|
|
433
453
|
self.cols = []
|
|
434
454
|
self.cols_by_name = {}
|
|
435
455
|
self.cols_by_id = {}
|
|
@@ -437,78 +457,88 @@ class TableVersion:
|
|
|
437
457
|
# point backward.
|
|
438
458
|
sorted_column_md = sorted(self.tbl_md.column_md.values(), key=lambda item: item.id)
|
|
439
459
|
for col_md in sorted_column_md:
|
|
460
|
+
col_type = ts.ColumnType.from_dict(col_md.col_type)
|
|
440
461
|
schema_col_md = self.schema_version_md.columns.get(col_md.id)
|
|
441
|
-
|
|
442
|
-
|
|
443
|
-
|
|
444
|
-
|
|
445
|
-
|
|
446
|
-
# column was added after this version
|
|
447
|
-
continue
|
|
448
|
-
if col_md.schema_version_drop is not None and col_md.schema_version_drop <= self.schema_version:
|
|
449
|
-
# column was dropped
|
|
450
|
-
continue
|
|
451
|
-
if col.name is not None:
|
|
452
|
-
self.cols_by_name[col.name] = col
|
|
453
|
-
self.cols_by_id[col.id] = col
|
|
454
|
-
|
|
455
|
-
# # make sure to traverse columns ordered by position = order in which cols were created;
|
|
456
|
-
# # this guarantees that references always point backwards
|
|
457
|
-
# if not self.is_snapshot and col_md.value_expr is not None:
|
|
458
|
-
# self._record_refd_columns(col)
|
|
462
|
+
media_val = (
|
|
463
|
+
MediaValidation[schema_col_md.media_validation.upper()]
|
|
464
|
+
if schema_col_md is not None and schema_col_md.media_validation is not None
|
|
465
|
+
else None
|
|
466
|
+
)
|
|
459
467
|
|
|
460
|
-
|
|
461
|
-
|
|
462
|
-
|
|
463
|
-
|
|
464
|
-
|
|
465
|
-
|
|
466
|
-
|
|
467
|
-
|
|
468
|
-
|
|
469
|
-
|
|
470
|
-
|
|
471
|
-
|
|
472
|
-
|
|
473
|
-
|
|
474
|
-
|
|
475
|
-
|
|
476
|
-
|
|
477
|
-
|
|
478
|
-
|
|
479
|
-
|
|
480
|
-
|
|
481
|
-
|
|
482
|
-
|
|
483
|
-
|
|
484
|
-
|
|
485
|
-
|
|
486
|
-
|
|
487
|
-
|
|
488
|
-
# remedy existing cellmd columns.
|
|
489
|
-
# - If so, remove this TODO.
|
|
490
|
-
val_col._stores_cellmd = False
|
|
491
|
-
undo_col._stores_cellmd = False
|
|
492
|
-
|
|
493
|
-
# The index is active in this TableVersion provided that:
|
|
494
|
-
# (i) the TableVersion supports indices (either it's not a snapshot, or it's a replica at
|
|
495
|
-
# the head version); and
|
|
496
|
-
# (ii) the index was created on or before the schema version of this TableVersion; and
|
|
497
|
-
# (iii) the index was not dropped on or before the schema version of this TableVersion.
|
|
498
|
-
supports_idxs = self.effective_version is None or (
|
|
499
|
-
self.tbl_md.is_replica and self.effective_version == self.tbl_md.current_version
|
|
468
|
+
stores_cellmd: bool | None = None # None: determined by the column properties (in the Column c'tor)
|
|
469
|
+
sa_col_type: sql.types.TypeEngine | None = None
|
|
470
|
+
if col_md.id in val_col_idxs:
|
|
471
|
+
idx = val_col_idxs[col_md.id]
|
|
472
|
+
# for index value columns, the index gets to override the default
|
|
473
|
+
stores_cellmd = idx.records_value_errors()
|
|
474
|
+
sa_col_type = idx.get_index_sa_type(col_type)
|
|
475
|
+
elif col_md.id in undo_col_idxs:
|
|
476
|
+
idx = undo_col_idxs[col_md.id]
|
|
477
|
+
# for index undo columns, we never store cellmd
|
|
478
|
+
stores_cellmd = False
|
|
479
|
+
sa_col_type = idx.get_index_sa_type(col_type)
|
|
480
|
+
|
|
481
|
+
col = Column(
|
|
482
|
+
col_id=col_md.id,
|
|
483
|
+
name=schema_col_md.name if schema_col_md is not None else None,
|
|
484
|
+
col_type=col_type,
|
|
485
|
+
is_pk=col_md.is_pk,
|
|
486
|
+
is_iterator_col=self.is_component_view and col_md.id < self.num_iterator_cols + 1,
|
|
487
|
+
stored=col_md.stored,
|
|
488
|
+
media_validation=media_val,
|
|
489
|
+
sa_col_type=sa_col_type,
|
|
490
|
+
schema_version_add=col_md.schema_version_add,
|
|
491
|
+
schema_version_drop=col_md.schema_version_drop,
|
|
492
|
+
stores_cellmd=stores_cellmd,
|
|
493
|
+
value_expr_dict=col_md.value_expr,
|
|
494
|
+
tbl_handle=self.handle,
|
|
495
|
+
destination=col_md.destination,
|
|
500
496
|
)
|
|
501
|
-
|
|
502
|
-
|
|
503
|
-
|
|
504
|
-
|
|
497
|
+
|
|
498
|
+
self.cols.append(col)
|
|
499
|
+
# populate lookup structures before Expr.from_dict()
|
|
500
|
+
if col_md.schema_version_add <= self.schema_version and (
|
|
501
|
+
col_md.schema_version_drop is None or col_md.schema_version_drop > self.schema_version
|
|
505
502
|
):
|
|
506
|
-
|
|
507
|
-
|
|
508
|
-
|
|
509
|
-
|
|
510
|
-
|
|
511
|
-
|
|
503
|
+
if col.name is not None:
|
|
504
|
+
self.cols_by_name[col.name] = col
|
|
505
|
+
self.cols_by_id[col.id] = col
|
|
506
|
+
|
|
507
|
+
if self.supports_idxs:
|
|
508
|
+
# create IndexInfo for indices visible in current_version
|
|
509
|
+
visible_idxs = [
|
|
510
|
+
md
|
|
511
|
+
for md in self.tbl_md.index_md.values()
|
|
512
|
+
if md.schema_version_add <= self.schema_version
|
|
513
|
+
and (md.schema_version_drop is None or md.schema_version_drop > self.schema_version)
|
|
514
|
+
]
|
|
515
|
+
for md in visible_idxs:
|
|
516
|
+
idx = idxs[md.id]
|
|
517
|
+
indexed_col_id = QColumnId(UUID(md.indexed_col_tbl_id), md.indexed_col_id)
|
|
518
|
+
idx_col = self._lookup_column(indexed_col_id)
|
|
519
|
+
info = self.IndexInfo(
|
|
520
|
+
id=md.id,
|
|
521
|
+
name=md.name,
|
|
522
|
+
idx=idx,
|
|
523
|
+
col=idx_col,
|
|
524
|
+
val_col=self.cols_by_id[md.index_val_col_id],
|
|
525
|
+
undo_col=self.cols_by_id[md.index_val_undo_col_id],
|
|
526
|
+
)
|
|
527
|
+
self.idxs[md.id] = info
|
|
528
|
+
self.idxs_by_name[md.name] = info
|
|
529
|
+
self.idxs_by_col.setdefault(indexed_col_id, []).append(info)
|
|
530
|
+
|
|
531
|
+
# create value exprs, now that we have all lookup structures in place
|
|
532
|
+
for col in self.cols_by_id.values():
|
|
533
|
+
col.init_value_expr()
|
|
534
|
+
|
|
535
|
+
# create the sqlalchemy schema, after instantiating all Columns
|
|
536
|
+
if self.is_component_view:
|
|
537
|
+
self.store_tbl = StoreComponentView(self)
|
|
538
|
+
elif self.is_view:
|
|
539
|
+
self.store_tbl = StoreView(self)
|
|
540
|
+
else:
|
|
541
|
+
self.store_tbl = StoreTable(self)
|
|
512
542
|
|
|
513
543
|
def _lookup_column(self, id: QColumnId) -> Column | None:
|
|
514
544
|
"""
|
|
@@ -551,7 +581,7 @@ class TableVersion:
|
|
|
551
581
|
"""Return name of index in the store, which needs to be globally unique"""
|
|
552
582
|
return f'idx_{self.id.hex}_{idx_id}'
|
|
553
583
|
|
|
554
|
-
def add_index(self, col: Column, idx_name:
|
|
584
|
+
def add_index(self, col: Column, idx_name: str | None, idx: index.IndexBase) -> UpdateStatus:
|
|
555
585
|
# we're creating a new schema version
|
|
556
586
|
self.bump_version(bump_schema_version=True)
|
|
557
587
|
status = self._add_index(col, idx_name, idx)
|
|
@@ -559,12 +589,13 @@ class TableVersion:
|
|
|
559
589
|
_logger.info(f'Added index {idx_name} on column {col.name} to table {self.name}')
|
|
560
590
|
return status
|
|
561
591
|
|
|
562
|
-
|
|
592
|
+
@classmethod
|
|
593
|
+
def _is_btree_indexable(cls, col: Column) -> bool:
|
|
563
594
|
if not col.stored:
|
|
564
595
|
# if the column is intentionally not stored, we want to avoid the overhead of an index
|
|
565
596
|
return False
|
|
566
597
|
# Skip index for stored media columns produced by an iterator
|
|
567
|
-
if col.col_type.is_media_type() and
|
|
598
|
+
if col.col_type.is_media_type() and col.is_iterator_col:
|
|
568
599
|
return False
|
|
569
600
|
if not col.col_type.is_scalar_type() and not (col.col_type.is_media_type() and not col.is_computed):
|
|
570
601
|
# wrong type for a B-tree
|
|
@@ -574,53 +605,58 @@ class TableVersion:
|
|
|
574
605
|
return False
|
|
575
606
|
return True
|
|
576
607
|
|
|
577
|
-
def _add_default_index(self, col: Column) ->
|
|
608
|
+
def _add_default_index(self, col: Column) -> UpdateStatus | None:
|
|
578
609
|
"""Add a B-tree index on this column if it has a compatible type"""
|
|
579
610
|
if not self._is_btree_indexable(col):
|
|
580
611
|
return None
|
|
581
|
-
status = self._add_index(col, idx_name=None, idx=index.BtreeIndex(
|
|
612
|
+
status = self._add_index(col, idx_name=None, idx=index.BtreeIndex())
|
|
582
613
|
return status
|
|
583
614
|
|
|
584
|
-
|
|
615
|
+
@classmethod
|
|
616
|
+
def _create_index_columns(
|
|
617
|
+
cls,
|
|
618
|
+
col: Column,
|
|
619
|
+
idx: index.IndexBase,
|
|
620
|
+
schema_version: int,
|
|
621
|
+
tbl_handle: TableVersionHandle,
|
|
622
|
+
id_cb: Callable[[], int],
|
|
623
|
+
) -> tuple[Column, Column]:
|
|
585
624
|
"""Create value and undo columns for the given index.
|
|
586
625
|
Args:
|
|
587
626
|
idx: index for which columns will be created.
|
|
588
627
|
Returns:
|
|
589
|
-
A tuple containing the value column and the undo column.
|
|
628
|
+
A tuple containing the value column and the undo column, both of which are nullable.
|
|
590
629
|
"""
|
|
591
|
-
|
|
592
|
-
# add the index value and undo columns (which need to be nullable)
|
|
630
|
+
value_expr = idx.create_value_expr(col)
|
|
593
631
|
val_col = Column(
|
|
594
|
-
col_id=
|
|
632
|
+
col_id=id_cb(),
|
|
595
633
|
name=None,
|
|
596
|
-
computed_with=
|
|
597
|
-
sa_col_type=idx.
|
|
634
|
+
computed_with=value_expr,
|
|
635
|
+
sa_col_type=idx.get_index_sa_type(value_expr.col_type),
|
|
598
636
|
stored=True,
|
|
599
|
-
schema_version_add=self.schema_version,
|
|
600
|
-
schema_version_drop=None,
|
|
601
637
|
stores_cellmd=idx.records_value_errors(),
|
|
638
|
+
schema_version_add=schema_version,
|
|
639
|
+
schema_version_drop=None,
|
|
602
640
|
)
|
|
603
|
-
val_col.tbl = self
|
|
604
641
|
val_col.col_type = val_col.col_type.copy(nullable=True)
|
|
605
|
-
|
|
642
|
+
val_col.tbl_handle = tbl_handle
|
|
606
643
|
|
|
607
644
|
undo_col = Column(
|
|
608
|
-
col_id=
|
|
645
|
+
col_id=id_cb(),
|
|
609
646
|
name=None,
|
|
610
647
|
col_type=val_col.col_type,
|
|
611
648
|
sa_col_type=val_col.sa_col_type,
|
|
612
649
|
stored=True,
|
|
613
|
-
schema_version_add=self.schema_version,
|
|
614
|
-
schema_version_drop=None,
|
|
615
650
|
stores_cellmd=False,
|
|
651
|
+
schema_version_add=schema_version,
|
|
652
|
+
schema_version_drop=None,
|
|
616
653
|
)
|
|
617
|
-
undo_col.tbl = self
|
|
618
654
|
undo_col.col_type = undo_col.col_type.copy(nullable=True)
|
|
619
|
-
|
|
655
|
+
undo_col.tbl_handle = tbl_handle
|
|
620
656
|
return val_col, undo_col
|
|
621
657
|
|
|
622
658
|
def _create_index(
|
|
623
|
-
self, col: Column, val_col: Column, undo_col: Column, idx_name:
|
|
659
|
+
self, col: Column, val_col: Column, undo_col: Column, idx_name: str | None, idx: index.IndexBase
|
|
624
660
|
) -> None:
|
|
625
661
|
"""Create the given index along with index md"""
|
|
626
662
|
idx_id = self.next_idx_id
|
|
@@ -636,7 +672,7 @@ class TableVersion:
|
|
|
636
672
|
id=idx_id,
|
|
637
673
|
name=idx_name,
|
|
638
674
|
indexed_col_id=col.id,
|
|
639
|
-
indexed_col_tbl_id=str(col.
|
|
675
|
+
indexed_col_tbl_id=str(col.get_tbl().id),
|
|
640
676
|
index_val_col_id=val_col.id,
|
|
641
677
|
index_val_undo_col_id=undo_col.id,
|
|
642
678
|
schema_version_add=self.schema_version,
|
|
@@ -646,17 +682,21 @@ class TableVersion:
|
|
|
646
682
|
)
|
|
647
683
|
idx_info = self.IndexInfo(id=idx_id, name=idx_name, idx=idx, col=col, val_col=val_col, undo_col=undo_col)
|
|
648
684
|
self._tbl_md.index_md[idx_id] = idx_md
|
|
685
|
+
self.idxs[idx_id] = idx_info
|
|
649
686
|
self.idxs_by_name[idx_name] = idx_info
|
|
650
|
-
|
|
687
|
+
self.idxs_by_col.setdefault(col.qid, []).append(idx_info)
|
|
688
|
+
self.store_tbl.create_index(idx_id)
|
|
651
689
|
|
|
652
|
-
def _add_index(self, col: Column, idx_name:
|
|
653
|
-
val_col,
|
|
690
|
+
def _add_index(self, col: Column, idx_name: str | None, idx: index.IndexBase) -> UpdateStatus:
|
|
691
|
+
val_col, undo_col = self._create_index_columns(
|
|
692
|
+
col, idx, self.schema_version, self.handle, id_cb=self.next_col_id
|
|
693
|
+
)
|
|
654
694
|
# add the columns and update the metadata
|
|
655
695
|
# TODO support on_error='abort' for indices; it's tricky because of the way metadata changes are entangled
|
|
656
696
|
# with the database operations
|
|
657
|
-
status = self._add_columns([val_col,
|
|
697
|
+
status = self._add_columns([val_col, undo_col], print_stats=False, on_error='ignore')
|
|
658
698
|
# now create the index structure
|
|
659
|
-
self._create_index(col, val_col,
|
|
699
|
+
self._create_index(col, val_col, undo_col, idx_name, idx)
|
|
660
700
|
return status
|
|
661
701
|
|
|
662
702
|
def drop_index(self, idx_id: int) -> None:
|
|
@@ -672,7 +712,10 @@ class TableVersion:
|
|
|
672
712
|
# remove this index entry from the active indexes (in memory)
|
|
673
713
|
# and the index metadata (in persistent table metadata)
|
|
674
714
|
# TODO: this is wrong, it breaks revert()
|
|
715
|
+
del self.idxs[idx_id]
|
|
675
716
|
del self.idxs_by_name[idx_md.name]
|
|
717
|
+
if idx_info.col.qid in self.idxs_by_col:
|
|
718
|
+
self.idxs_by_col[idx_info.col.qid].remove(idx_info)
|
|
676
719
|
del self._tbl_md.index_md[idx_id]
|
|
677
720
|
|
|
678
721
|
self._drop_columns([idx_info.val_col, idx_info.undo_col])
|
|
@@ -688,9 +731,8 @@ class TableVersion:
|
|
|
688
731
|
assert all(col.stored is not None for col in cols)
|
|
689
732
|
assert all(col.name not in self.cols_by_name for col in cols if col.name is not None)
|
|
690
733
|
for col in cols:
|
|
691
|
-
col.
|
|
692
|
-
col.id = self.next_col_id
|
|
693
|
-
self.next_col_id += 1
|
|
734
|
+
col.tbl_handle = self.handle
|
|
735
|
+
col.id = self.next_col_id()
|
|
694
736
|
|
|
695
737
|
# we're creating a new schema version
|
|
696
738
|
self.bump_version(bump_schema_version=True)
|
|
@@ -699,8 +741,10 @@ class TableVersion:
|
|
|
699
741
|
for col in cols:
|
|
700
742
|
all_cols.append(col)
|
|
701
743
|
if col.name is not None and self._is_btree_indexable(col):
|
|
702
|
-
idx = index.BtreeIndex(
|
|
703
|
-
val_col, undo_col = self._create_index_columns(
|
|
744
|
+
idx = index.BtreeIndex()
|
|
745
|
+
val_col, undo_col = self._create_index_columns(
|
|
746
|
+
col, idx, self.schema_version, self.handle, id_cb=self.next_col_id
|
|
747
|
+
)
|
|
704
748
|
index_cols[col] = (idx, val_col, undo_col)
|
|
705
749
|
all_cols.append(val_col)
|
|
706
750
|
all_cols.append(undo_col)
|
|
@@ -732,7 +776,7 @@ class TableVersion:
|
|
|
732
776
|
|
|
733
777
|
row_count = self.store_tbl.count()
|
|
734
778
|
for col in cols_to_add:
|
|
735
|
-
assert col.
|
|
779
|
+
assert col.tbl_handle.id == self.id
|
|
736
780
|
if not col.col_type.nullable and not col.is_computed and row_count > 0:
|
|
737
781
|
raise excs.Error(
|
|
738
782
|
f'Cannot add non-nullable column {col.name!r} to table {self.name!r} with existing rows'
|
|
@@ -742,7 +786,7 @@ class TableVersion:
|
|
|
742
786
|
num_excs = 0
|
|
743
787
|
cols_with_excs: list[Column] = []
|
|
744
788
|
for col in cols_to_add:
|
|
745
|
-
assert col.id is not None
|
|
789
|
+
assert col.id is not None
|
|
746
790
|
excs_per_col = 0
|
|
747
791
|
col.schema_version_add = self.schema_version
|
|
748
792
|
# add the column to the lookup structures now, rather than after the store changes executed successfully,
|
|
@@ -796,7 +840,7 @@ class TableVersion:
|
|
|
796
840
|
upd_rows=row_count, num_excs=num_excs, computed_values=computed_values
|
|
797
841
|
) # add_columns
|
|
798
842
|
return UpdateStatus(
|
|
799
|
-
cols_with_excs=[f'{col.
|
|
843
|
+
cols_with_excs=[f'{col.get_tbl().name}.{col.name}' for col in cols_with_excs if col.name is not None],
|
|
800
844
|
row_count_stats=row_counts,
|
|
801
845
|
)
|
|
802
846
|
|
|
@@ -810,7 +854,7 @@ class TableVersion:
|
|
|
810
854
|
|
|
811
855
|
# drop this column and all dependent index columns and indices
|
|
812
856
|
dropped_cols = [col]
|
|
813
|
-
|
|
857
|
+
dropped_idx_info: list[TableVersion.IndexInfo] = []
|
|
814
858
|
for idx_info in self.idxs_by_name.values():
|
|
815
859
|
if idx_info.col != col:
|
|
816
860
|
continue
|
|
@@ -818,11 +862,14 @@ class TableVersion:
|
|
|
818
862
|
idx_md = self._tbl_md.index_md[idx_info.id]
|
|
819
863
|
idx_md.schema_version_drop = self.schema_version
|
|
820
864
|
assert idx_md.name in self.idxs_by_name
|
|
821
|
-
|
|
865
|
+
dropped_idx_info.append(idx_info)
|
|
822
866
|
|
|
823
|
-
# update
|
|
824
|
-
for
|
|
825
|
-
del self.
|
|
867
|
+
# update index lookup structures
|
|
868
|
+
for info in dropped_idx_info:
|
|
869
|
+
del self.idxs[info.id]
|
|
870
|
+
del self.idxs_by_name[info.name]
|
|
871
|
+
if col.qid in self.idxs_by_col:
|
|
872
|
+
del self.idxs_by_col[col.qid]
|
|
826
873
|
|
|
827
874
|
self._drop_columns(dropped_cols)
|
|
828
875
|
self._write_md(new_version=True, new_schema_version=True)
|
|
@@ -830,6 +877,8 @@ class TableVersion:
|
|
|
830
877
|
|
|
831
878
|
def _drop_columns(self, cols: Iterable[Column]) -> None:
|
|
832
879
|
"""Mark columns as dropped"""
|
|
880
|
+
from pixeltable.catalog import Catalog
|
|
881
|
+
|
|
833
882
|
assert self.is_mutable
|
|
834
883
|
|
|
835
884
|
for col in cols:
|
|
@@ -849,7 +898,7 @@ class TableVersion:
|
|
|
849
898
|
schema_col.pos = pos
|
|
850
899
|
|
|
851
900
|
self.store_tbl.create_sa_tbl()
|
|
852
|
-
|
|
901
|
+
Catalog.get().record_column_dependencies(self)
|
|
853
902
|
|
|
854
903
|
def rename_column(self, old_name: str, new_name: str) -> None:
|
|
855
904
|
"""Rename a column."""
|
|
@@ -858,12 +907,12 @@ class TableVersion:
|
|
|
858
907
|
col = self.path.get_column(old_name)
|
|
859
908
|
if col is None:
|
|
860
909
|
raise excs.Error(f'Unknown column: {old_name}')
|
|
861
|
-
if col.
|
|
910
|
+
if col.get_tbl().id != self.id:
|
|
862
911
|
raise excs.Error(f'Cannot rename base table column {col.name!r}')
|
|
863
912
|
if not is_valid_identifier(new_name):
|
|
864
|
-
raise excs.Error(f
|
|
913
|
+
raise excs.Error(f'Invalid column name: {new_name}')
|
|
865
914
|
if new_name in self.cols_by_name:
|
|
866
|
-
raise excs.Error(f'Column {new_name} already exists')
|
|
915
|
+
raise excs.Error(f'Column {new_name!r} already exists')
|
|
867
916
|
del self.cols_by_name[old_name]
|
|
868
917
|
col.name = new_name
|
|
869
918
|
self.cols_by_name[new_name] = col
|
|
@@ -875,7 +924,7 @@ class TableVersion:
|
|
|
875
924
|
self._write_md(new_version=True, new_schema_version=True)
|
|
876
925
|
_logger.info(f'Renamed column {old_name} to {new_name} in table {self.name}, new version: {self.version}')
|
|
877
926
|
|
|
878
|
-
def set_comment(self, new_comment:
|
|
927
|
+
def set_comment(self, new_comment: str | None) -> None:
|
|
879
928
|
_logger.info(f'[{self.name}] Updating comment: {new_comment}')
|
|
880
929
|
self.comment = new_comment
|
|
881
930
|
self._create_schema_version()
|
|
@@ -896,8 +945,8 @@ class TableVersion:
|
|
|
896
945
|
|
|
897
946
|
def insert(
|
|
898
947
|
self,
|
|
899
|
-
rows:
|
|
900
|
-
df:
|
|
948
|
+
rows: list[dict[str, Any]] | None,
|
|
949
|
+
df: DataFrame | None,
|
|
901
950
|
print_stats: bool = False,
|
|
902
951
|
fail_on_exception: bool = True,
|
|
903
952
|
) -> UpdateStatus:
|
|
@@ -931,7 +980,7 @@ class TableVersion:
|
|
|
931
980
|
exec_plan: 'exec.ExecNode',
|
|
932
981
|
timestamp: float,
|
|
933
982
|
*,
|
|
934
|
-
rowids:
|
|
983
|
+
rowids: Iterator[int] | None = None,
|
|
935
984
|
print_stats: bool = False,
|
|
936
985
|
abort_on_exc: bool = False,
|
|
937
986
|
) -> UpdateStatus:
|
|
@@ -962,9 +1011,7 @@ class TableVersion:
|
|
|
962
1011
|
_logger.info(f'TableVersion {self.name}: new version {self.version}')
|
|
963
1012
|
return result
|
|
964
1013
|
|
|
965
|
-
def update(
|
|
966
|
-
self, value_spec: dict[str, Any], where: Optional[exprs.Expr] = None, cascade: bool = True
|
|
967
|
-
) -> UpdateStatus:
|
|
1014
|
+
def update(self, value_spec: dict[str, Any], where: exprs.Expr | None = None, cascade: bool = True) -> UpdateStatus:
|
|
968
1015
|
"""Update rows in this TableVersionPath.
|
|
969
1016
|
Args:
|
|
970
1017
|
value_spec: a list of (column, value) pairs specifying the columns to update and their new values.
|
|
@@ -980,11 +1027,11 @@ class TableVersion:
|
|
|
980
1027
|
update_spec = self._validate_update_spec(value_spec, allow_pk=False, allow_exprs=True, allow_media=True)
|
|
981
1028
|
if where is not None:
|
|
982
1029
|
if not isinstance(where, exprs.Expr):
|
|
983
|
-
raise excs.Error(f
|
|
1030
|
+
raise excs.Error(f'`where` argument must be a valid Pixeltable expression; got `{type(where)}`')
|
|
984
1031
|
analysis_info = Planner.analyze(self.path, where)
|
|
985
1032
|
# for now we require that the updated rows can be identified via SQL, rather than via a Python filter
|
|
986
1033
|
if analysis_info.filter is not None:
|
|
987
|
-
raise excs.Error(f'Filter
|
|
1034
|
+
raise excs.Error(f'Filter not expressible in SQL: {analysis_info.filter}')
|
|
988
1035
|
|
|
989
1036
|
plan, updated_cols, recomputed_cols = Planner.create_update_plan(self.path, update_spec, [], where, cascade)
|
|
990
1037
|
|
|
@@ -1041,7 +1088,7 @@ class TableVersion:
|
|
|
1041
1088
|
update_targets: dict[Column, exprs.Expr] = {}
|
|
1042
1089
|
for col_name, val in value_spec.items():
|
|
1043
1090
|
if not isinstance(col_name, str):
|
|
1044
|
-
raise excs.Error(f'Update specification: dict key must be column name
|
|
1091
|
+
raise excs.Error(f'Update specification: dict key must be column name; got {col_name!r}')
|
|
1045
1092
|
if col_name == _ROWID_COLUMN_NAME:
|
|
1046
1093
|
# a valid rowid is a list of ints, one per rowid column
|
|
1047
1094
|
assert len(val) == len(self.store_tbl.rowid_columns())
|
|
@@ -1050,15 +1097,15 @@ class TableVersion:
|
|
|
1050
1097
|
continue
|
|
1051
1098
|
col = self.path.get_column(col_name)
|
|
1052
1099
|
if col is None:
|
|
1053
|
-
raise excs.Error(f'
|
|
1054
|
-
if col.
|
|
1100
|
+
raise excs.Error(f'Unknown column: {col_name}')
|
|
1101
|
+
if col.get_tbl().id != self.id:
|
|
1055
1102
|
raise excs.Error(f'Column {col.name!r} is a base table column and cannot be updated')
|
|
1056
1103
|
if col.is_computed:
|
|
1057
|
-
raise excs.Error(f'Column {col_name} is computed and cannot be updated')
|
|
1104
|
+
raise excs.Error(f'Column {col_name!r} is computed and cannot be updated')
|
|
1058
1105
|
if col.is_pk and not allow_pk:
|
|
1059
|
-
raise excs.Error(f'Column {col_name} is a primary key column and cannot be updated')
|
|
1106
|
+
raise excs.Error(f'Column {col_name!r} is a primary key column and cannot be updated')
|
|
1060
1107
|
if col.col_type.is_media_type() and not allow_media:
|
|
1061
|
-
raise excs.Error(f'Column {col_name} is a media column and cannot be updated')
|
|
1108
|
+
raise excs.Error(f'Column {col_name!r} is a media column and cannot be updated')
|
|
1062
1109
|
|
|
1063
1110
|
# make sure that the value is compatible with the column type
|
|
1064
1111
|
value_expr: exprs.Expr
|
|
@@ -1068,19 +1115,19 @@ class TableVersion:
|
|
|
1068
1115
|
except (TypeError, jsonschema.exceptions.ValidationError) as exc:
|
|
1069
1116
|
if not allow_exprs:
|
|
1070
1117
|
raise excs.Error(
|
|
1071
|
-
f'Column {col_name}: value
|
|
1072
|
-
f'(expected {col.col_type})'
|
|
1118
|
+
f'Column {col_name!r}: value is not a valid literal for this column '
|
|
1119
|
+
f'(expected `{col.col_type}`): {val!r}'
|
|
1073
1120
|
) from exc
|
|
1074
1121
|
# it's not a literal, let's try to create an expr from it
|
|
1075
1122
|
value_expr = exprs.Expr.from_object(val)
|
|
1076
1123
|
if value_expr is None:
|
|
1077
1124
|
raise excs.Error(
|
|
1078
|
-
f'Column {col_name}: value
|
|
1125
|
+
f'Column {col_name!r}: value is not a recognized literal or expression: {val!r}'
|
|
1079
1126
|
) from exc
|
|
1080
1127
|
if not col.col_type.is_supertype_of(value_expr.col_type, ignore_nullable=True):
|
|
1081
1128
|
raise excs.Error(
|
|
1082
|
-
f'Type of value {val!r}
|
|
1083
|
-
f'{
|
|
1129
|
+
f'Type `{value_expr.col_type}` of value {val!r} is not compatible with the type '
|
|
1130
|
+
f'`{col.col_type}` of column {col_name!r}'
|
|
1084
1131
|
) from exc
|
|
1085
1132
|
update_targets[col] = value_expr
|
|
1086
1133
|
|
|
@@ -1098,9 +1145,9 @@ class TableVersion:
|
|
|
1098
1145
|
assert len(col_names) == 1 or not errors_only
|
|
1099
1146
|
|
|
1100
1147
|
target_columns = [self.cols_by_name[name] for name in col_names]
|
|
1101
|
-
where_clause:
|
|
1148
|
+
where_clause: exprs.Expr | None = None
|
|
1102
1149
|
if where is not None:
|
|
1103
|
-
self._validate_where_clause(where, error_prefix=
|
|
1150
|
+
self._validate_where_clause(where, error_prefix='`where` argument')
|
|
1104
1151
|
where_clause = where
|
|
1105
1152
|
if errors_only:
|
|
1106
1153
|
errortype_pred = (
|
|
@@ -1126,10 +1173,10 @@ class TableVersion:
|
|
|
1126
1173
|
|
|
1127
1174
|
def propagate_update(
|
|
1128
1175
|
self,
|
|
1129
|
-
plan:
|
|
1130
|
-
where_clause:
|
|
1176
|
+
plan: exec.ExecNode | None,
|
|
1177
|
+
where_clause: sql.ColumnElement | None,
|
|
1131
1178
|
recomputed_view_cols: list[Column],
|
|
1132
|
-
base_versions: list[
|
|
1179
|
+
base_versions: list[int | None],
|
|
1133
1180
|
timestamp: float,
|
|
1134
1181
|
cascade: bool,
|
|
1135
1182
|
show_progress: bool = True,
|
|
@@ -1157,7 +1204,7 @@ class TableVersion:
|
|
|
1157
1204
|
base_versions = [None if plan is None else self.version, *base_versions] # don't update in place
|
|
1158
1205
|
# propagate to views
|
|
1159
1206
|
for view in self.mutable_views:
|
|
1160
|
-
recomputed_cols = [col for col in recomputed_view_cols if col.
|
|
1207
|
+
recomputed_cols = [col for col in recomputed_view_cols if col.get_tbl().id == view.id]
|
|
1161
1208
|
plan = None
|
|
1162
1209
|
if len(recomputed_cols) > 0:
|
|
1163
1210
|
plan = Planner.create_view_update_plan(view.get().path, recompute_targets=recomputed_cols)
|
|
@@ -1177,21 +1224,21 @@ class TableVersion:
|
|
|
1177
1224
|
from pixeltable.plan import Planner
|
|
1178
1225
|
|
|
1179
1226
|
if not isinstance(pred, Expr):
|
|
1180
|
-
raise excs.Error(f'{error_prefix} must be a
|
|
1227
|
+
raise excs.Error(f'{error_prefix} must be a valid Pixeltable expression; got `{type(pred)}`')
|
|
1181
1228
|
analysis_info = Planner.analyze(self.path, pred)
|
|
1182
1229
|
# for now we require that the updated rows can be identified via SQL, rather than via a Python filter
|
|
1183
1230
|
if analysis_info.filter is not None:
|
|
1184
|
-
raise excs.Error(f'Filter
|
|
1231
|
+
raise excs.Error(f'Filter not expressible in SQL: {analysis_info.filter}')
|
|
1185
1232
|
|
|
1186
1233
|
def delete(self, where: exprs.Expr | None = None) -> UpdateStatus:
|
|
1187
1234
|
assert self.is_insertable
|
|
1188
1235
|
if where is not None:
|
|
1189
|
-
self._validate_where_clause(where, error_prefix=
|
|
1236
|
+
self._validate_where_clause(where, error_prefix='`where` argument')
|
|
1190
1237
|
status = self.propagate_delete(where, base_versions=[], timestamp=time.time())
|
|
1191
1238
|
return status
|
|
1192
1239
|
|
|
1193
1240
|
def propagate_delete(
|
|
1194
|
-
self, where:
|
|
1241
|
+
self, where: exprs.Expr | None, base_versions: list[int | None], timestamp: float
|
|
1195
1242
|
) -> UpdateStatus:
|
|
1196
1243
|
"""Delete rows in this table and propagate to views"""
|
|
1197
1244
|
from pixeltable.catalog import Catalog
|
|
@@ -1256,7 +1303,7 @@ class TableVersion:
|
|
|
1256
1303
|
names = [row[1] for row in result]
|
|
1257
1304
|
raise excs.Error(
|
|
1258
1305
|
(
|
|
1259
|
-
f'Current version is needed for {len(result)} snapshot{"s" if len(result) > 1 else ""} '
|
|
1306
|
+
f'Current version is needed for {len(result)} snapshot{"s" if len(result) > 1 else ""}: '
|
|
1260
1307
|
f'({", ".join(names)})'
|
|
1261
1308
|
)
|
|
1262
1309
|
)
|
|
@@ -1265,7 +1312,7 @@ class TableVersion:
|
|
|
1265
1312
|
|
|
1266
1313
|
# revert new deletions
|
|
1267
1314
|
set_clause: dict[sql.Column, Any] = {self.store_tbl.sa_tbl.c.v_max: schema.Table.MAX_VERSION}
|
|
1268
|
-
for index_info in self.
|
|
1315
|
+
for index_info in self.idxs.values():
|
|
1269
1316
|
# copy the index value back from the undo column and reset the undo column to NULL
|
|
1270
1317
|
set_clause[index_info.val_col.sa_col] = index_info.undo_col.sa_col
|
|
1271
1318
|
set_clause[index_info.undo_col.sa_col] = None
|
|
@@ -1339,16 +1386,18 @@ class TableVersion:
|
|
|
1339
1386
|
# Do this at the end, after all DB operations have completed.
|
|
1340
1387
|
# TODO: The transaction could still fail. Really this should be done via PendingTableOps.
|
|
1341
1388
|
self.delete_media(tbl_version=old_version)
|
|
1342
|
-
_logger.info(f'TableVersion {self.name}: reverted to version {self.version}')
|
|
1389
|
+
_logger.info(f'TableVersion {self.name!r}: reverted to version {self.version}')
|
|
1343
1390
|
|
|
1344
1391
|
def _init_external_stores(self) -> None:
|
|
1392
|
+
from pixeltable.io.external_store import ExternalStore
|
|
1393
|
+
|
|
1345
1394
|
for store_md in self.tbl_md.external_stores:
|
|
1346
1395
|
store_cls = resolve_symbol(store_md['class'])
|
|
1347
|
-
assert isinstance(store_cls, type) and issubclass(store_cls,
|
|
1396
|
+
assert isinstance(store_cls, type) and issubclass(store_cls, ExternalStore)
|
|
1348
1397
|
store = store_cls.from_dict(store_md['md'])
|
|
1349
1398
|
self.external_stores[store.name] = store
|
|
1350
1399
|
|
|
1351
|
-
def link_external_store(self, store:
|
|
1400
|
+
def link_external_store(self, store: ExternalStore) -> None:
|
|
1352
1401
|
self.bump_version(bump_schema_version=True)
|
|
1353
1402
|
|
|
1354
1403
|
self.external_stores[store.name] = store
|
|
@@ -1357,7 +1406,7 @@ class TableVersion:
|
|
|
1357
1406
|
)
|
|
1358
1407
|
self._write_md(new_version=True, new_schema_version=True)
|
|
1359
1408
|
|
|
1360
|
-
def unlink_external_store(self, store:
|
|
1409
|
+
def unlink_external_store(self, store: ExternalStore) -> None:
|
|
1361
1410
|
del self.external_stores[store.name]
|
|
1362
1411
|
self.bump_version(bump_schema_version=True)
|
|
1363
1412
|
idx = next(i for i, store_md in enumerate(self._tbl_md.external_stores) if store_md['md']['name'] == store.name)
|
|
@@ -1377,7 +1426,7 @@ class TableVersion:
|
|
|
1377
1426
|
return self._schema_version_md
|
|
1378
1427
|
|
|
1379
1428
|
@property
|
|
1380
|
-
def view_md(self) ->
|
|
1429
|
+
def view_md(self) -> schema.ViewMd | None:
|
|
1381
1430
|
return self._tbl_md.view_md
|
|
1382
1431
|
|
|
1383
1432
|
@property
|
|
@@ -1385,7 +1434,7 @@ class TableVersion:
|
|
|
1385
1434
|
return self._tbl_md.name
|
|
1386
1435
|
|
|
1387
1436
|
@property
|
|
1388
|
-
def user(self) ->
|
|
1437
|
+
def user(self) -> str | None:
|
|
1389
1438
|
return self._tbl_md.user
|
|
1390
1439
|
|
|
1391
1440
|
@property
|
|
@@ -1423,7 +1472,7 @@ class TableVersion:
|
|
|
1423
1472
|
def schema_version(self) -> int:
|
|
1424
1473
|
return self._schema_version_md.schema_version
|
|
1425
1474
|
|
|
1426
|
-
def bump_version(self, timestamp:
|
|
1475
|
+
def bump_version(self, timestamp: float | None = None, *, bump_schema_version: bool) -> None:
|
|
1427
1476
|
"""
|
|
1428
1477
|
Increments the table version and adjusts all associated metadata. This will *not* trigger a database action;
|
|
1429
1478
|
_write_md() must be called separately to persist the changes.
|
|
@@ -1460,11 +1509,11 @@ class TableVersion:
|
|
|
1460
1509
|
self._schema_version_md.schema_version = new_version
|
|
1461
1510
|
|
|
1462
1511
|
@property
|
|
1463
|
-
def preceding_schema_version(self) ->
|
|
1512
|
+
def preceding_schema_version(self) -> int | None:
|
|
1464
1513
|
return self._schema_version_md.preceding_schema_version
|
|
1465
1514
|
|
|
1466
1515
|
@property
|
|
1467
|
-
def update_status(self) ->
|
|
1516
|
+
def update_status(self) -> UpdateStatus | None:
|
|
1468
1517
|
return self._version_md.update_status
|
|
1469
1518
|
|
|
1470
1519
|
@update_status.setter
|
|
@@ -1476,14 +1525,10 @@ class TableVersion:
|
|
|
1476
1525
|
def media_validation(self) -> MediaValidation:
|
|
1477
1526
|
return MediaValidation[self._schema_version_md.media_validation.upper()]
|
|
1478
1527
|
|
|
1479
|
-
@property
|
|
1480
1528
|
def next_col_id(self) -> int:
|
|
1481
|
-
|
|
1482
|
-
|
|
1483
|
-
|
|
1484
|
-
def next_col_id(self, id: int) -> None:
|
|
1485
|
-
assert self.effective_version is None
|
|
1486
|
-
self._tbl_md.next_col_id = id
|
|
1529
|
+
val = self._tbl_md.next_col_id
|
|
1530
|
+
self._tbl_md.next_col_id += 1
|
|
1531
|
+
return val
|
|
1487
1532
|
|
|
1488
1533
|
@property
|
|
1489
1534
|
def next_idx_id(self) -> int:
|
|
@@ -1562,15 +1607,35 @@ class TableVersion:
|
|
|
1562
1607
|
return names
|
|
1563
1608
|
|
|
1564
1609
|
def get_idx_val_columns(self, cols: Iterable[Column]) -> set[Column]:
|
|
1565
|
-
|
|
1566
|
-
|
|
1610
|
+
# assumes that the indexed columns are all in this table
|
|
1611
|
+
assert all(col.get_tbl().id == self.id for col in cols)
|
|
1612
|
+
col_ids = {col.id for col in cols}
|
|
1613
|
+
return {info.val_col for info in self.idxs.values() if info.col.id in col_ids}
|
|
1614
|
+
|
|
1615
|
+
def get_idx(self, col: Column, idx_name: str | None, idx_cls: type[index.IndexBase]) -> TableVersion.IndexInfo:
|
|
1616
|
+
if not self.supports_idxs:
|
|
1617
|
+
raise excs.Error('Snapshot does not support indices')
|
|
1618
|
+
if col.qid not in self.idxs_by_col:
|
|
1619
|
+
raise excs.Error(f'Column {col.name!r} does not have a {idx_cls.display_name()} index')
|
|
1620
|
+
candidates = [info for info in self.idxs_by_col[col.qid] if isinstance(info.idx, idx_cls)]
|
|
1621
|
+
if len(candidates) == 0:
|
|
1622
|
+
raise excs.Error(f'No {idx_cls.display_name()} index found for column {col.name!r}')
|
|
1623
|
+
if len(candidates) > 1 and idx_name is None:
|
|
1624
|
+
raise excs.Error(
|
|
1625
|
+
f'Column {col.name!r} has multiple {idx_cls.display_name()} indices; specify `idx_name` instead'
|
|
1626
|
+
)
|
|
1627
|
+
if idx_name is not None and idx_name not in [info.name for info in candidates]:
|
|
1628
|
+
raise excs.Error(f'Index {idx_name!r} not found for column {col.name!r}')
|
|
1629
|
+
return candidates[0] if idx_name is None else next(info for info in candidates if info.name == idx_name)
|
|
1567
1630
|
|
|
1568
1631
|
def get_dependent_columns(self, cols: Iterable[Column]) -> set[Column]:
|
|
1569
1632
|
"""
|
|
1570
1633
|
Return the set of columns that transitively depend on any of the given ones.
|
|
1571
1634
|
"""
|
|
1572
|
-
|
|
1573
|
-
|
|
1635
|
+
from pixeltable.catalog import Catalog
|
|
1636
|
+
|
|
1637
|
+
cat = Catalog.get()
|
|
1638
|
+
result = set().union(*[cat.get_column_dependents(col.get_tbl().id, col.id) for col in cols])
|
|
1574
1639
|
if len(result) > 0:
|
|
1575
1640
|
result.update(self.get_dependent_columns(result))
|
|
1576
1641
|
return result
|
|
@@ -1582,7 +1647,7 @@ class TableVersion:
|
|
|
1582
1647
|
return 1
|
|
1583
1648
|
|
|
1584
1649
|
@classmethod
|
|
1585
|
-
def _create_stores_md(cls, stores: Iterable[
|
|
1650
|
+
def _create_stores_md(cls, stores: Iterable[ExternalStore]) -> list[dict[str, Any]]:
|
|
1586
1651
|
return [
|
|
1587
1652
|
{'class': f'{type(store).__module__}.{type(store).__qualname__}', 'md': store.as_dict()} for store in stores
|
|
1588
1653
|
]
|