pixeltable 0.2.5__py3-none-any.whl → 0.2.7__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of pixeltable might be problematic. Click here for more details.
- pixeltable/__init__.py +20 -9
- pixeltable/__version__.py +3 -0
- pixeltable/catalog/column.py +23 -7
- pixeltable/catalog/insertable_table.py +32 -19
- pixeltable/catalog/table.py +210 -20
- pixeltable/catalog/table_version.py +272 -111
- pixeltable/catalog/table_version_path.py +6 -1
- pixeltable/dataframe.py +184 -110
- pixeltable/datatransfer/__init__.py +1 -0
- pixeltable/datatransfer/label_studio.py +526 -0
- pixeltable/datatransfer/remote.py +113 -0
- pixeltable/env.py +213 -79
- pixeltable/exec/__init__.py +2 -1
- pixeltable/exec/data_row_batch.py +6 -7
- pixeltable/exec/expr_eval_node.py +28 -28
- pixeltable/exec/sql_scan_node.py +7 -6
- pixeltable/exprs/__init__.py +4 -3
- pixeltable/exprs/column_ref.py +11 -2
- pixeltable/exprs/comparison.py +39 -1
- pixeltable/exprs/data_row.py +7 -0
- pixeltable/exprs/expr.py +26 -19
- pixeltable/exprs/function_call.py +17 -18
- pixeltable/exprs/globals.py +14 -2
- pixeltable/exprs/image_member_access.py +9 -28
- pixeltable/exprs/in_predicate.py +96 -0
- pixeltable/exprs/inline_array.py +13 -11
- pixeltable/exprs/inline_dict.py +15 -13
- pixeltable/exprs/row_builder.py +7 -1
- pixeltable/exprs/similarity_expr.py +67 -0
- pixeltable/ext/functions/whisperx.py +30 -0
- pixeltable/ext/functions/yolox.py +16 -0
- pixeltable/func/__init__.py +0 -2
- pixeltable/func/aggregate_function.py +5 -2
- pixeltable/func/callable_function.py +57 -13
- pixeltable/func/expr_template_function.py +14 -3
- pixeltable/func/function.py +35 -4
- pixeltable/func/signature.py +5 -15
- pixeltable/func/udf.py +8 -12
- pixeltable/functions/fireworks.py +9 -4
- pixeltable/functions/huggingface.py +48 -5
- pixeltable/functions/openai.py +49 -11
- pixeltable/functions/pil/image.py +61 -64
- pixeltable/functions/together.py +32 -6
- pixeltable/functions/util.py +0 -43
- pixeltable/functions/video.py +46 -8
- pixeltable/globals.py +443 -0
- pixeltable/index/__init__.py +1 -0
- pixeltable/index/base.py +9 -2
- pixeltable/index/btree.py +54 -0
- pixeltable/index/embedding_index.py +91 -15
- pixeltable/io/__init__.py +4 -0
- pixeltable/io/globals.py +59 -0
- pixeltable/{utils → io}/hf_datasets.py +48 -17
- pixeltable/io/pandas.py +148 -0
- pixeltable/{utils → io}/parquet.py +58 -33
- pixeltable/iterators/__init__.py +1 -1
- pixeltable/iterators/base.py +8 -4
- pixeltable/iterators/document.py +225 -93
- pixeltable/iterators/video.py +16 -9
- pixeltable/metadata/__init__.py +8 -4
- pixeltable/metadata/converters/convert_12.py +3 -0
- pixeltable/metadata/converters/convert_13.py +41 -0
- pixeltable/metadata/converters/convert_14.py +13 -0
- pixeltable/metadata/converters/convert_15.py +29 -0
- pixeltable/metadata/converters/util.py +63 -0
- pixeltable/metadata/schema.py +12 -6
- pixeltable/plan.py +11 -24
- pixeltable/store.py +16 -23
- pixeltable/tool/create_test_db_dump.py +49 -14
- pixeltable/type_system.py +27 -58
- pixeltable/utils/coco.py +94 -0
- pixeltable/utils/documents.py +42 -12
- pixeltable/utils/http_server.py +70 -0
- pixeltable-0.2.7.dist-info/METADATA +137 -0
- pixeltable-0.2.7.dist-info/RECORD +126 -0
- {pixeltable-0.2.5.dist-info → pixeltable-0.2.7.dist-info}/WHEEL +1 -1
- pixeltable/client.py +0 -600
- pixeltable/exprs/image_similarity_predicate.py +0 -58
- pixeltable/func/batched_function.py +0 -53
- pixeltable/func/nos_function.py +0 -202
- pixeltable/tests/conftest.py +0 -171
- pixeltable/tests/ext/test_yolox.py +0 -21
- pixeltable/tests/functions/test_fireworks.py +0 -43
- pixeltable/tests/functions/test_functions.py +0 -60
- pixeltable/tests/functions/test_huggingface.py +0 -158
- pixeltable/tests/functions/test_openai.py +0 -162
- pixeltable/tests/functions/test_together.py +0 -112
- pixeltable/tests/test_audio.py +0 -65
- pixeltable/tests/test_catalog.py +0 -27
- pixeltable/tests/test_client.py +0 -21
- pixeltable/tests/test_component_view.py +0 -379
- pixeltable/tests/test_dataframe.py +0 -440
- pixeltable/tests/test_dirs.py +0 -107
- pixeltable/tests/test_document.py +0 -120
- pixeltable/tests/test_exprs.py +0 -802
- pixeltable/tests/test_function.py +0 -332
- pixeltable/tests/test_index.py +0 -138
- pixeltable/tests/test_migration.py +0 -44
- pixeltable/tests/test_nos.py +0 -54
- pixeltable/tests/test_snapshot.py +0 -231
- pixeltable/tests/test_table.py +0 -1343
- pixeltable/tests/test_transactional_directory.py +0 -42
- pixeltable/tests/test_types.py +0 -52
- pixeltable/tests/test_video.py +0 -159
- pixeltable/tests/test_view.py +0 -535
- pixeltable/tests/utils.py +0 -442
- pixeltable/utils/clip.py +0 -18
- pixeltable-0.2.5.dist-info/METADATA +0 -128
- pixeltable-0.2.5.dist-info/RECORD +0 -139
- {pixeltable-0.2.5.dist-info → pixeltable-0.2.7.dist-info}/LICENSE +0 -0
|
@@ -5,7 +5,8 @@ import importlib
|
|
|
5
5
|
import inspect
|
|
6
6
|
import logging
|
|
7
7
|
import time
|
|
8
|
-
from typing import Optional, List, Dict, Any, Tuple, Type, Set
|
|
8
|
+
from typing import Optional, List, Dict, Any, Tuple, Type, Set, Iterable
|
|
9
|
+
import uuid
|
|
9
10
|
from uuid import UUID
|
|
10
11
|
|
|
11
12
|
import sqlalchemy as sql
|
|
@@ -23,6 +24,7 @@ from pixeltable.utils.filecache import FileCache
|
|
|
23
24
|
from pixeltable.utils.media_store import MediaStore
|
|
24
25
|
from .column import Column
|
|
25
26
|
from .globals import UpdateStatus, POS_COLUMN_NAME, is_valid_identifier
|
|
27
|
+
from ..func.globals import resolve_symbol
|
|
26
28
|
|
|
27
29
|
_logger = logging.getLogger('pixeltable')
|
|
28
30
|
|
|
@@ -43,6 +45,7 @@ class TableVersion:
|
|
|
43
45
|
@dataclasses.dataclass
|
|
44
46
|
class IndexInfo:
|
|
45
47
|
id: int
|
|
48
|
+
name: str
|
|
46
49
|
idx: index.IndexBase
|
|
47
50
|
col: Column
|
|
48
51
|
val_col: Column
|
|
@@ -86,6 +89,8 @@ class TableVersion:
|
|
|
86
89
|
self.next_idx_id = tbl_md.next_idx_id
|
|
87
90
|
self.next_rowid = tbl_md.next_row_id
|
|
88
91
|
|
|
92
|
+
self.remotes = dict(TableVersion._init_remote(remote_md) for remote_md in tbl_md.remotes)
|
|
93
|
+
|
|
89
94
|
# view-specific initialization
|
|
90
95
|
from pixeltable import exprs
|
|
91
96
|
predicate_dict = None if not is_view or tbl_md.view_md.predicate is None else tbl_md.view_md.predicate
|
|
@@ -114,9 +119,9 @@ class TableVersion:
|
|
|
114
119
|
cat.tbl_versions[(self.id, self.effective_version)] = self
|
|
115
120
|
|
|
116
121
|
# init schema after we determined whether we're a component view, and before we create the store table
|
|
117
|
-
self.cols:
|
|
122
|
+
self.cols: list[Column] = [] # contains complete history of columns, incl dropped ones
|
|
118
123
|
self.cols_by_name: dict[str, Column] = {} # contains only user-facing (named) columns visible in this version
|
|
119
|
-
self.cols_by_id: dict[int, Column] = {} # contains only columns visible in this version
|
|
124
|
+
self.cols_by_id: dict[int, Column] = {} # contains only columns visible in this version, both system and user
|
|
120
125
|
self.idx_md = tbl_md.index_md # needed for _create_tbl_md()
|
|
121
126
|
self.idxs_by_name: dict[str, TableVersion.IndexInfo] = {} # contains only actively maintained indices
|
|
122
127
|
self._init_schema(tbl_md, schema_version_md)
|
|
@@ -149,23 +154,22 @@ class TableVersion:
|
|
|
149
154
|
if col.is_computed:
|
|
150
155
|
col.check_value_expr()
|
|
151
156
|
|
|
152
|
-
|
|
157
|
+
timestamp = time.time()
|
|
153
158
|
# create schema.Table
|
|
154
159
|
# Column.dependent_cols for existing cols is wrong at this point, but init() will set it correctly
|
|
155
160
|
column_md = cls._create_column_md(cols)
|
|
156
161
|
table_md = schema.TableMd(
|
|
157
|
-
name=name, current_version=0, current_schema_version=0,
|
|
158
|
-
|
|
159
|
-
|
|
160
|
-
session
|
|
161
|
-
|
|
162
|
-
|
|
162
|
+
name=name, current_version=0, current_schema_version=0, next_col_id=len(cols),
|
|
163
|
+
next_idx_id=0, next_row_id=0, column_md=column_md, index_md={}, remotes=[], view_md=view_md)
|
|
164
|
+
# create a schema.Table here, we need it to call our c'tor;
|
|
165
|
+
# don't add it to the session yet, we might add index metadata
|
|
166
|
+
tbl_id = uuid.uuid4()
|
|
167
|
+
tbl_record = schema.Table(id=tbl_id, dir_id=dir_id, md=dataclasses.asdict(table_md))
|
|
163
168
|
|
|
164
169
|
# create schema.TableVersion
|
|
165
|
-
table_version_md = schema.TableVersionMd(created_at=
|
|
170
|
+
table_version_md = schema.TableVersionMd(created_at=timestamp, version=0, schema_version=0)
|
|
166
171
|
tbl_version_record = schema.TableVersion(
|
|
167
172
|
tbl_id=tbl_record.id, version=0, md=dataclasses.asdict(table_version_md))
|
|
168
|
-
session.add(tbl_version_record)
|
|
169
173
|
|
|
170
174
|
# create schema.TableSchemaVersion
|
|
171
175
|
schema_col_md = {col.id: schema.SchemaColumn(pos=pos, name=col.name) for pos, col in enumerate(cols)}
|
|
@@ -175,19 +179,33 @@ class TableVersion:
|
|
|
175
179
|
num_retained_versions=num_retained_versions, comment=comment)
|
|
176
180
|
schema_version_record = schema.TableSchemaVersion(
|
|
177
181
|
tbl_id=tbl_record.id, schema_version=0, md=dataclasses.asdict(schema_version_md))
|
|
178
|
-
session.add(schema_version_record)
|
|
179
182
|
|
|
180
183
|
# if this is purely a snapshot (it doesn't require any additional storage for columns and it # doesn't have a
|
|
181
184
|
# predicate to apply at runtime), we don't create a physical table and simply use the base's table version path
|
|
182
185
|
if view_md is not None and view_md.is_snapshot and view_md.predicate is None and len(cols) == 0:
|
|
186
|
+
session.add(tbl_record)
|
|
187
|
+
session.add(tbl_version_record)
|
|
188
|
+
session.add(schema_version_record)
|
|
183
189
|
return tbl_record.id, None
|
|
184
190
|
|
|
185
191
|
assert (base_path is not None) == (view_md is not None)
|
|
186
192
|
base = base_path.tbl_version if base_path is not None and view_md.is_snapshot else None
|
|
187
193
|
base_path = base_path if base_path is not None and not view_md.is_snapshot else None
|
|
188
194
|
tbl_version = cls(tbl_record.id, table_md, 0, schema_version_md, base=base, base_path=base_path)
|
|
189
|
-
|
|
190
|
-
|
|
195
|
+
|
|
196
|
+
conn = session.connection()
|
|
197
|
+
tbl_version.store_tbl.create(conn)
|
|
198
|
+
if view_md is None or not view_md.is_snapshot:
|
|
199
|
+
# add default indices, after creating the store table
|
|
200
|
+
for col in tbl_version.cols_by_name.values():
|
|
201
|
+
status = tbl_version._add_default_index(col, conn=conn)
|
|
202
|
+
assert status is None or status.num_excs == 0
|
|
203
|
+
|
|
204
|
+
# we re-create the tbl_record here, now that we have new index metadata
|
|
205
|
+
tbl_record = schema.Table(id=tbl_id, dir_id=dir_id, md=dataclasses.asdict(tbl_version._create_tbl_md()))
|
|
206
|
+
session.add(tbl_record)
|
|
207
|
+
session.add(tbl_version_record)
|
|
208
|
+
session.add(schema_version_record)
|
|
191
209
|
return tbl_record.id, tbl_version
|
|
192
210
|
|
|
193
211
|
@classmethod
|
|
@@ -251,6 +269,16 @@ class TableVersion:
|
|
|
251
269
|
col.value_expr = exprs.Expr.from_dict(col_md.value_expr)
|
|
252
270
|
self._record_value_expr(col)
|
|
253
271
|
|
|
272
|
+
# if this is a stored proxy column, resolve the relationships with its proxy base.
|
|
273
|
+
if col_md.proxy_base is not None:
|
|
274
|
+
# proxy_base must have a strictly smaller id, so we must already have encountered it
|
|
275
|
+
# in traversal order; and if the proxy column is active at this version, then the
|
|
276
|
+
# proxy base must necessarily be active as well. This motivates the following assertion.
|
|
277
|
+
assert col_md.proxy_base in self.cols_by_id
|
|
278
|
+
base_col = self.cols_by_id[col_md.proxy_base]
|
|
279
|
+
base_col.stored_proxy = col
|
|
280
|
+
col.proxy_base = base_col
|
|
281
|
+
|
|
254
282
|
def _init_idxs(self, tbl_md: schema.TableMd) -> None:
|
|
255
283
|
self.idx_md = tbl_md.index_md
|
|
256
284
|
self.idxs_by_name = {}
|
|
@@ -258,7 +286,7 @@ class TableVersion:
|
|
|
258
286
|
for md in tbl_md.index_md.values():
|
|
259
287
|
if md.schema_version_add > self.schema_version \
|
|
260
288
|
or md.schema_version_drop is not None and md.schema_version_drop <= self.schema_version:
|
|
261
|
-
#
|
|
289
|
+
# index not visible in this schema version
|
|
262
290
|
continue
|
|
263
291
|
|
|
264
292
|
# instantiate index object
|
|
@@ -270,9 +298,11 @@ class TableVersion:
|
|
|
270
298
|
# fix up the sa column type of the index value and undo columns
|
|
271
299
|
val_col = self.cols_by_id[md.index_val_col_id]
|
|
272
300
|
val_col.sa_col_type = idx.index_sa_type()
|
|
301
|
+
val_col._records_errors = False
|
|
273
302
|
undo_col = self.cols_by_id[md.index_val_undo_col_id]
|
|
274
303
|
undo_col.sa_col_type = idx.index_sa_type()
|
|
275
|
-
|
|
304
|
+
undo_col._records_errors = False
|
|
305
|
+
idx_info = self.IndexInfo(id=md.id, name=md.name, idx=idx, col=idx_col, val_col=val_col, undo_col=undo_col)
|
|
276
306
|
self.idxs_by_name[md.name] = idx_info
|
|
277
307
|
|
|
278
308
|
def _init_sa_schema(self) -> None:
|
|
@@ -286,10 +316,12 @@ class TableVersion:
|
|
|
286
316
|
else:
|
|
287
317
|
self.store_tbl: StoreBase = StoreTable(self)
|
|
288
318
|
|
|
289
|
-
def _update_md(
|
|
319
|
+
def _update_md(
|
|
320
|
+
self, timestamp: float, preceding_schema_version: Optional[int], conn: sql.engine.Connection
|
|
321
|
+
) -> None:
|
|
290
322
|
"""Update all recorded metadata in response to a data or schema change.
|
|
291
323
|
Args:
|
|
292
|
-
|
|
324
|
+
timestamp: timestamp of the change
|
|
293
325
|
preceding_schema_version: last schema version if schema change, else None
|
|
294
326
|
"""
|
|
295
327
|
conn.execute(
|
|
@@ -297,7 +329,7 @@ class TableVersion:
|
|
|
297
329
|
.values({schema.Table.md: dataclasses.asdict(self._create_tbl_md())})
|
|
298
330
|
.where(schema.Table.id == self.id))
|
|
299
331
|
|
|
300
|
-
version_md = self._create_version_md(
|
|
332
|
+
version_md = self._create_version_md(timestamp)
|
|
301
333
|
conn.execute(
|
|
302
334
|
sql.insert(schema.TableVersion.__table__)
|
|
303
335
|
.values(tbl_id=self.id, version=self.version, md=dataclasses.asdict(version_md)))
|
|
@@ -314,6 +346,33 @@ class TableVersion:
|
|
|
314
346
|
return f'idx_{self.id.hex}_{idx_id}'
|
|
315
347
|
|
|
316
348
|
def add_index(self, col: Column, idx_name: Optional[str], idx: index.IndexBase) -> UpdateStatus:
|
|
349
|
+
# we're creating a new schema version
|
|
350
|
+
self.version += 1
|
|
351
|
+
preceding_schema_version = self.schema_version
|
|
352
|
+
self.schema_version = self.version
|
|
353
|
+
with Env.get().engine.begin() as conn:
|
|
354
|
+
status = self._add_index(col, idx_name, idx, conn)
|
|
355
|
+
self._update_md(time.time(), preceding_schema_version, conn)
|
|
356
|
+
_logger.info(f'Added index {idx_name} on column {col.name} to table {self.name}')
|
|
357
|
+
return status
|
|
358
|
+
|
|
359
|
+
def _add_default_index(self, col: Column, conn: sql.engine.Connection) -> Optional[UpdateStatus]:
|
|
360
|
+
"""Add a B-tree index on this column if it has a compatible type"""
|
|
361
|
+
if not col.stored:
|
|
362
|
+
# if the column is intentionally not stored, we want to avoid the overhead of an index
|
|
363
|
+
return None
|
|
364
|
+
if not col.col_type.is_scalar_type() and not (col.col_type.is_media_type() and not col.is_computed):
|
|
365
|
+
# wrong type for a B-tree
|
|
366
|
+
return None
|
|
367
|
+
if col.col_type.is_bool_type():
|
|
368
|
+
# B-trees on bools aren't useful
|
|
369
|
+
return None
|
|
370
|
+
status = self._add_index(col, idx_name=None, idx=index.BtreeIndex(col), conn=conn)
|
|
371
|
+
return status
|
|
372
|
+
|
|
373
|
+
def _add_index(
|
|
374
|
+
self, col: Column, idx_name: Optional[str], idx: index.IndexBase, conn: sql.engine.Connection
|
|
375
|
+
) -> UpdateStatus:
|
|
317
376
|
assert not self.is_snapshot
|
|
318
377
|
idx_id = self.next_idx_id
|
|
319
378
|
self.next_idx_id += 1
|
|
@@ -323,46 +382,41 @@ class TableVersion:
|
|
|
323
382
|
assert is_valid_identifier(idx_name)
|
|
324
383
|
assert idx_name not in [i.name for i in self.idx_md.values()]
|
|
325
384
|
|
|
326
|
-
#
|
|
327
|
-
|
|
328
|
-
|
|
329
|
-
|
|
330
|
-
|
|
331
|
-
|
|
332
|
-
|
|
333
|
-
|
|
334
|
-
|
|
335
|
-
|
|
336
|
-
|
|
337
|
-
|
|
338
|
-
val_col.
|
|
339
|
-
self.
|
|
340
|
-
|
|
341
|
-
|
|
342
|
-
|
|
343
|
-
|
|
344
|
-
|
|
345
|
-
|
|
346
|
-
|
|
347
|
-
|
|
348
|
-
|
|
349
|
-
|
|
350
|
-
|
|
351
|
-
|
|
352
|
-
|
|
353
|
-
|
|
354
|
-
|
|
355
|
-
|
|
356
|
-
|
|
357
|
-
|
|
358
|
-
|
|
359
|
-
|
|
360
|
-
|
|
361
|
-
status = self._add_columns([val_col, undo_col], conn, preceding_schema_version=preceding_schema_version)
|
|
362
|
-
# now create the index structure
|
|
363
|
-
idx.create_index(self._store_idx_name(idx_id), val_col, conn)
|
|
364
|
-
|
|
365
|
-
_logger.info(f'Added index {idx_name} on column {col.name} to table {self.name}')
|
|
385
|
+
# add the index value and undo columns (which need to be nullable)
|
|
386
|
+
val_col = Column(
|
|
387
|
+
col_id=self.next_col_id, name=None, computed_with=idx.index_value_expr(),
|
|
388
|
+
sa_col_type=idx.index_sa_type(), stored=True,
|
|
389
|
+
schema_version_add=self.schema_version, schema_version_drop=None,
|
|
390
|
+
records_errors=idx.records_value_errors())
|
|
391
|
+
val_col.tbl = self
|
|
392
|
+
val_col.col_type = val_col.col_type.copy(nullable=True)
|
|
393
|
+
self.next_col_id += 1
|
|
394
|
+
|
|
395
|
+
undo_col = Column(
|
|
396
|
+
col_id=self.next_col_id, name=None, col_type=val_col.col_type,
|
|
397
|
+
sa_col_type=val_col.sa_col_type, stored=True,
|
|
398
|
+
schema_version_add=self.schema_version, schema_version_drop=None,
|
|
399
|
+
records_errors=False)
|
|
400
|
+
undo_col.tbl = self
|
|
401
|
+
undo_col.col_type = undo_col.col_type.copy(nullable=True)
|
|
402
|
+
self.next_col_id += 1
|
|
403
|
+
|
|
404
|
+
# create and register the index metadata
|
|
405
|
+
idx_cls = type(idx)
|
|
406
|
+
idx_md = schema.IndexMd(
|
|
407
|
+
id=idx_id, name=idx_name,
|
|
408
|
+
indexed_col_id=col.id, index_val_col_id=val_col.id, index_val_undo_col_id=undo_col.id,
|
|
409
|
+
schema_version_add=self.schema_version, schema_version_drop=None,
|
|
410
|
+
class_fqn=idx_cls.__module__ + '.' + idx_cls.__name__, init_args=idx.as_dict())
|
|
411
|
+
idx_info = self.IndexInfo(id=idx_id, name=idx_name, idx=idx, col=col, val_col=val_col, undo_col=undo_col)
|
|
412
|
+
self.idx_md[idx_id] = idx_md
|
|
413
|
+
self.idxs_by_name[idx_name] = idx_info
|
|
414
|
+
|
|
415
|
+
# add the columns and update the metadata
|
|
416
|
+
status = self._add_columns([val_col, undo_col], conn)
|
|
417
|
+
# now create the index structure
|
|
418
|
+
idx.create_index(self._store_idx_name(idx_id), val_col, conn)
|
|
419
|
+
|
|
366
420
|
return status
|
|
367
421
|
|
|
368
422
|
def drop_index(self, idx_id: int) -> None:
|
|
@@ -380,7 +434,8 @@ class TableVersion:
|
|
|
380
434
|
del self.idxs_by_name[idx_md.name]
|
|
381
435
|
|
|
382
436
|
with Env.get().engine.begin() as conn:
|
|
383
|
-
self._drop_columns([idx_info.val_col, idx_info.undo_col]
|
|
437
|
+
self._drop_columns([idx_info.val_col, idx_info.undo_col])
|
|
438
|
+
self._update_md(time.time(), preceding_schema_version, conn)
|
|
384
439
|
_logger.info(f'Dropped index {idx_md.name} on table {self.name}')
|
|
385
440
|
|
|
386
441
|
def add_column(self, col: Column, print_stats: bool = False) -> UpdateStatus:
|
|
@@ -397,16 +452,16 @@ class TableVersion:
|
|
|
397
452
|
if col.compute_func is not None:
|
|
398
453
|
# create value_expr from compute_func
|
|
399
454
|
self._create_value_expr(col, self.path)
|
|
400
|
-
if col.value_expr is not None:
|
|
401
|
-
col.check_value_expr()
|
|
402
|
-
self._record_value_expr(col)
|
|
403
455
|
|
|
404
456
|
# we're creating a new schema version
|
|
405
457
|
self.version += 1
|
|
406
458
|
preceding_schema_version = self.schema_version
|
|
407
459
|
self.schema_version = self.version
|
|
408
460
|
with Env.get().engine.begin() as conn:
|
|
409
|
-
status = self._add_columns([col], conn,
|
|
461
|
+
status = self._add_columns([col], conn, print_stats=print_stats)
|
|
462
|
+
_ = self._add_default_index(col, conn)
|
|
463
|
+
# TODO: what to do about errors?
|
|
464
|
+
self._update_md(time.time(), preceding_schema_version, conn)
|
|
410
465
|
_logger.info(f'Added column {col.name} to table {self.name}, new version: {self.version}')
|
|
411
466
|
|
|
412
467
|
msg = (
|
|
@@ -417,13 +472,8 @@ class TableVersion:
|
|
|
417
472
|
_logger.info(f'Column {col.name}: {msg}')
|
|
418
473
|
return status
|
|
419
474
|
|
|
420
|
-
def _add_columns(
|
|
421
|
-
self, cols: List[Column], conn: sql.engine.Connection, preceding_schema_version: Optional[int] = None,
|
|
422
|
-
print_stats: bool = False
|
|
423
|
-
) -> UpdateStatus:
|
|
475
|
+
def _add_columns(self, cols: List[Column], conn: sql.engine.Connection, print_stats: bool = False) -> UpdateStatus:
|
|
424
476
|
"""Add and populate columns within the current transaction"""
|
|
425
|
-
ts = time.time()
|
|
426
|
-
|
|
427
477
|
row_count = self.store_tbl.count(conn=conn)
|
|
428
478
|
for col in cols:
|
|
429
479
|
if not col.col_type.nullable and not col.is_computed:
|
|
@@ -441,6 +491,9 @@ class TableVersion:
|
|
|
441
491
|
if col.name is not None:
|
|
442
492
|
self.cols_by_name[col.name] = col
|
|
443
493
|
self.cols_by_id[col.id] = col
|
|
494
|
+
if col.value_expr is not None:
|
|
495
|
+
col.check_value_expr()
|
|
496
|
+
self._record_value_expr(col)
|
|
444
497
|
|
|
445
498
|
if col.is_stored:
|
|
446
499
|
self.store_tbl.add_column(col, conn)
|
|
@@ -474,7 +527,6 @@ class TableVersion:
|
|
|
474
527
|
finally:
|
|
475
528
|
plan.close()
|
|
476
529
|
|
|
477
|
-
self._update_md(ts, preceding_schema_version, conn)
|
|
478
530
|
if print_stats:
|
|
479
531
|
plan.ctx.profile.print(num_rows=row_count)
|
|
480
532
|
# TODO(mkornacker): what to do about system columns with exceptions?
|
|
@@ -492,8 +544,16 @@ class TableVersion:
|
|
|
492
544
|
dependent_user_cols = [c for c in col.dependent_cols if c.name is not None]
|
|
493
545
|
if len(dependent_user_cols) > 0:
|
|
494
546
|
raise excs.Error(
|
|
495
|
-
f'Cannot drop column {name} because the following columns depend on it:\n'
|
|
496
|
-
f'{", ".join(
|
|
547
|
+
f'Cannot drop column `{name}` because the following columns depend on it:\n'
|
|
548
|
+
f'{", ".join(c.name for c in dependent_user_cols)}'
|
|
549
|
+
)
|
|
550
|
+
dependent_remotes = [remote for remote, col_mapping in self.remotes.items() if name in col_mapping]
|
|
551
|
+
if len(dependent_remotes) > 0:
|
|
552
|
+
raise excs.Error(
|
|
553
|
+
f'Cannot drop column `{name}` because the following remotes depend on it:\n'
|
|
554
|
+
f'{", ".join(str(r) for r in dependent_remotes)}'
|
|
555
|
+
)
|
|
556
|
+
assert col.stored_proxy is None # since there are no dependent remotes
|
|
497
557
|
|
|
498
558
|
# we're creating a new schema version
|
|
499
559
|
self.version += 1
|
|
@@ -515,14 +575,14 @@ class TableVersion:
|
|
|
515
575
|
# update idxs_by_name
|
|
516
576
|
for idx_name in dropped_idx_names:
|
|
517
577
|
del self.idxs_by_name[idx_name]
|
|
518
|
-
self._drop_columns(dropped_cols
|
|
578
|
+
self._drop_columns(dropped_cols)
|
|
579
|
+
self._update_md(time.time(), preceding_schema_version, conn)
|
|
519
580
|
_logger.info(f'Dropped column {name} from table {self.name}, new version: {self.version}')
|
|
520
581
|
|
|
521
|
-
def _drop_columns(self, cols: list[Column]
|
|
582
|
+
def _drop_columns(self, cols: list[Column]) -> None:
|
|
522
583
|
"""Mark columns as dropped"""
|
|
523
584
|
assert not self.is_snapshot
|
|
524
585
|
|
|
525
|
-
ts = time.time()
|
|
526
586
|
for col in cols:
|
|
527
587
|
if col.value_expr is not None:
|
|
528
588
|
# update Column.dependent_cols
|
|
@@ -538,7 +598,6 @@ class TableVersion:
|
|
|
538
598
|
assert col.id in self.cols_by_id
|
|
539
599
|
del self.cols_by_id[col.id]
|
|
540
600
|
|
|
541
|
-
self._update_md(ts, preceding_schema_version, conn)
|
|
542
601
|
self.store_tbl.create_sa_tbl()
|
|
543
602
|
|
|
544
603
|
def rename_column(self, old_name: str, new_name: str) -> None:
|
|
@@ -557,13 +616,12 @@ class TableVersion:
|
|
|
557
616
|
self.cols_by_name[new_name] = col
|
|
558
617
|
|
|
559
618
|
# we're creating a new schema version
|
|
560
|
-
ts = time.time()
|
|
561
619
|
self.version += 1
|
|
562
620
|
preceding_schema_version = self.schema_version
|
|
563
621
|
self.schema_version = self.version
|
|
564
622
|
|
|
565
623
|
with Env.get().engine.begin() as conn:
|
|
566
|
-
self._update_md(
|
|
624
|
+
self._update_md(time.time(), preceding_schema_version, conn)
|
|
567
625
|
_logger.info(f'Renamed column {old_name} to {new_name} in table {self.name}, new version: {self.version}')
|
|
568
626
|
|
|
569
627
|
def set_comment(self, new_comment: Optional[str]):
|
|
@@ -578,12 +636,11 @@ class TableVersion:
|
|
|
578
636
|
|
|
579
637
|
def _create_schema_version(self):
|
|
580
638
|
# we're creating a new schema version
|
|
581
|
-
ts = time.time()
|
|
582
639
|
self.version += 1
|
|
583
640
|
preceding_schema_version = self.schema_version
|
|
584
641
|
self.schema_version = self.version
|
|
585
642
|
with Env.get().engine.begin() as conn:
|
|
586
|
-
self._update_md(
|
|
643
|
+
self._update_md(time.time(), preceding_schema_version, conn)
|
|
587
644
|
_logger.info(f'[{self.name}] Updating table schema to version: {self.version}')
|
|
588
645
|
|
|
589
646
|
def insert(
|
|
@@ -594,12 +651,11 @@ class TableVersion:
|
|
|
594
651
|
assert self.is_insertable()
|
|
595
652
|
from pixeltable.plan import Planner
|
|
596
653
|
plan = Planner.create_insert_plan(self, rows, ignore_errors=not fail_on_exception)
|
|
597
|
-
ts = time.time()
|
|
598
654
|
with Env.get().engine.begin() as conn:
|
|
599
|
-
return self._insert(plan, conn,
|
|
655
|
+
return self._insert(plan, conn, time.time(), print_stats)
|
|
600
656
|
|
|
601
657
|
def _insert(
|
|
602
|
-
self, exec_plan: exec.ExecNode, conn: sql.engine.Connection,
|
|
658
|
+
self, exec_plan: exec.ExecNode, conn: sql.engine.Connection, timestamp: float, print_stats: bool = False,
|
|
603
659
|
) -> UpdateStatus:
|
|
604
660
|
"""Insert rows produced by exec_plan and propagate to views"""
|
|
605
661
|
# we're creating a new version
|
|
@@ -611,13 +667,13 @@ class TableVersion:
|
|
|
611
667
|
result.num_excs = num_excs
|
|
612
668
|
result.num_computed_values += exec_plan.ctx.num_computed_exprs * num_rows
|
|
613
669
|
result.cols_with_excs = [f'{self.name}.{self.cols_by_id[cid].name}' for cid in cols_with_excs]
|
|
614
|
-
self._update_md(
|
|
670
|
+
self._update_md(timestamp, None, conn)
|
|
615
671
|
|
|
616
672
|
# update views
|
|
617
673
|
for view in self.mutable_views:
|
|
618
674
|
from pixeltable.plan import Planner
|
|
619
675
|
plan, _ = Planner.create_view_load_plan(view.path, propagates_insert=True)
|
|
620
|
-
status = view._insert(plan, conn,
|
|
676
|
+
status = view._insert(plan, conn, timestamp, print_stats)
|
|
621
677
|
result.num_rows += status.num_rows
|
|
622
678
|
result.num_excs += status.num_excs
|
|
623
679
|
result.num_computed_values += status.num_computed_values
|
|
@@ -661,7 +717,7 @@ class TableVersion:
|
|
|
661
717
|
# construct Where clause to match rowid
|
|
662
718
|
num_rowid_cols = len(self.store_tbl.rowid_columns())
|
|
663
719
|
for col_idx in range(num_rowid_cols):
|
|
664
|
-
assert len(rowids[i]) == num_rowid_cols
|
|
720
|
+
assert len(rowids[i]) == num_rowid_cols, f'len({rowids[i]}) != {num_rowid_cols}'
|
|
665
721
|
clause = exprs.RowidRef(self, col_idx) == rowids[i][col_idx]
|
|
666
722
|
if where_clause is None:
|
|
667
723
|
where_clause = clause
|
|
@@ -678,7 +734,7 @@ class TableVersion:
|
|
|
678
734
|
where_clause = where_clause & clause
|
|
679
735
|
|
|
680
736
|
update_targets = {col: row[col] for col in row if col not in pk_cols}
|
|
681
|
-
status = self._update(conn, update_targets, where_clause, cascade)
|
|
737
|
+
status = self._update(conn, update_targets, where_clause, cascade, show_progress=False)
|
|
682
738
|
result_status.num_rows += status.num_rows
|
|
683
739
|
result_status.num_excs += status.num_excs
|
|
684
740
|
result_status.num_computed_values += status.num_computed_values
|
|
@@ -691,7 +747,8 @@ class TableVersion:
|
|
|
691
747
|
|
|
692
748
|
def _update(
|
|
693
749
|
self, conn: sql.engine.Connection, update_targets: dict[Column, 'pixeltable.exprs.Expr'],
|
|
694
|
-
where_clause: Optional['pixeltable.exprs.Predicate'] = None, cascade: bool = True
|
|
750
|
+
where_clause: Optional['pixeltable.exprs.Predicate'] = None, cascade: bool = True,
|
|
751
|
+
show_progress: bool = True
|
|
695
752
|
) -> UpdateStatus:
|
|
696
753
|
"""Update rows in this table.
|
|
697
754
|
Args:
|
|
@@ -704,28 +761,27 @@ class TableVersion:
|
|
|
704
761
|
from pixeltable.plan import Planner
|
|
705
762
|
plan, updated_cols, recomputed_cols = \
|
|
706
763
|
Planner.create_update_plan(self.path, update_targets, [], where_clause, cascade)
|
|
707
|
-
ts = time.time()
|
|
708
764
|
result = self._propagate_update(
|
|
709
765
|
plan, where_clause.sql_expr() if where_clause is not None else None, recomputed_cols,
|
|
710
|
-
base_versions=[], conn=conn,
|
|
766
|
+
base_versions=[], conn=conn, timestamp=time.time(), cascade=cascade, show_progress=show_progress)
|
|
711
767
|
result.updated_cols = updated_cols
|
|
712
768
|
return result
|
|
713
769
|
|
|
714
770
|
def _propagate_update(
|
|
715
771
|
self, plan: Optional[exec.ExecNode], where_clause: Optional[sql.ClauseElement],
|
|
716
772
|
recomputed_view_cols: List[Column], base_versions: List[Optional[int]], conn: sql.engine.Connection,
|
|
717
|
-
|
|
773
|
+
timestamp: float, cascade: bool, show_progress: bool = True
|
|
718
774
|
) -> UpdateStatus:
|
|
719
775
|
result = UpdateStatus()
|
|
720
776
|
if plan is not None:
|
|
721
777
|
# we're creating a new version
|
|
722
778
|
self.version += 1
|
|
723
779
|
result.num_rows, result.num_excs, cols_with_excs = \
|
|
724
|
-
self.store_tbl.insert_rows(plan, conn, v_min=self.version)
|
|
780
|
+
self.store_tbl.insert_rows(plan, conn, v_min=self.version, show_progress=show_progress)
|
|
725
781
|
result.cols_with_excs = [f'{self.name}.{self.cols_by_id[cid].name}' for cid in cols_with_excs]
|
|
726
782
|
self.store_tbl.delete_rows(
|
|
727
783
|
self.version, base_versions=base_versions, match_on_vmin=True, where_clause=where_clause, conn=conn)
|
|
728
|
-
self._update_md(
|
|
784
|
+
self._update_md(timestamp, None, conn)
|
|
729
785
|
|
|
730
786
|
if cascade:
|
|
731
787
|
base_versions = [None if plan is None else self.version] + base_versions # don't update in place
|
|
@@ -737,7 +793,7 @@ class TableVersion:
|
|
|
737
793
|
from pixeltable.plan import Planner
|
|
738
794
|
plan = Planner.create_view_update_plan(view.path, recompute_targets=recomputed_cols)
|
|
739
795
|
status = view._propagate_update(
|
|
740
|
-
plan, None, recomputed_view_cols, base_versions=base_versions, conn=conn,
|
|
796
|
+
plan, None, recomputed_view_cols, base_versions=base_versions, conn=conn, timestamp=timestamp, cascade=True)
|
|
741
797
|
result.num_rows += status.num_rows
|
|
742
798
|
result.num_excs += status.num_excs
|
|
743
799
|
result.cols_with_excs += status.cols_with_excs
|
|
@@ -753,16 +809,15 @@ class TableVersion:
|
|
|
753
809
|
assert self.is_insertable()
|
|
754
810
|
from pixeltable.plan import Planner
|
|
755
811
|
analysis_info = Planner.analyze(self, where)
|
|
756
|
-
ts = time.time()
|
|
757
812
|
with Env.get().engine.begin() as conn:
|
|
758
|
-
num_rows = self._delete(analysis_info.sql_where_clause, base_versions=[], conn=conn,
|
|
813
|
+
num_rows = self._delete(analysis_info.sql_where_clause, base_versions=[], conn=conn, timestamp=time.time())
|
|
759
814
|
|
|
760
815
|
status = UpdateStatus(num_rows=num_rows)
|
|
761
816
|
return status
|
|
762
817
|
|
|
763
818
|
def _delete(
|
|
764
819
|
self, where: Optional['pixeltable.exprs.Predicate'], base_versions: List[Optional[int]],
|
|
765
|
-
conn: sql.engine.Connection,
|
|
820
|
+
conn: sql.engine.Connection, timestamp: float) -> int:
|
|
766
821
|
"""Delete rows in this table and propagate to views.
|
|
767
822
|
Args:
|
|
768
823
|
where: a Predicate to filter rows to delete.
|
|
@@ -776,11 +831,12 @@ class TableVersion:
|
|
|
776
831
|
if num_rows > 0:
|
|
777
832
|
# we're creating a new version
|
|
778
833
|
self.version += 1
|
|
779
|
-
self._update_md(
|
|
834
|
+
self._update_md(timestamp, None, conn)
|
|
780
835
|
else:
|
|
781
836
|
pass
|
|
782
837
|
for view in self.mutable_views:
|
|
783
|
-
num_rows += view._delete(
|
|
838
|
+
num_rows += view._delete(
|
|
839
|
+
where=None, base_versions=[self.version] + base_versions, conn=conn, timestamp=timestamp)
|
|
784
840
|
return num_rows
|
|
785
841
|
|
|
786
842
|
def revert(self) -> None:
|
|
@@ -906,6 +962,94 @@ class TableVersion:
|
|
|
906
962
|
view._revert(session)
|
|
907
963
|
_logger.info(f'TableVersion {self.name}: reverted to version {self.version}')
|
|
908
964
|
|
|
965
|
+
@classmethod
|
|
966
|
+
def _init_remote(cls, remote_md: dict[str, Any]) -> Tuple[pixeltable.datatransfer.Remote, dict[str, str]]:
|
|
967
|
+
remote_cls = resolve_symbol(remote_md['class'])
|
|
968
|
+
assert isinstance(remote_cls, type) and issubclass(remote_cls, pixeltable.datatransfer.Remote)
|
|
969
|
+
remote = remote_cls.from_dict(remote_md['remote_md'])
|
|
970
|
+
col_mapping = remote_md['col_mapping']
|
|
971
|
+
return remote, col_mapping
|
|
972
|
+
|
|
973
|
+
def link(self, remote: pixeltable.datatransfer.Remote, col_mapping: dict[str, str]) -> None:
|
|
974
|
+
# All of the media columns being linked need to either be stored, computed columns or have stored proxies.
|
|
975
|
+
# This ensures that the media in those columns resides in the media cache, where it can be served.
|
|
976
|
+
# First determine which columns (if any) need stored proxies, but don't have one yet.
|
|
977
|
+
cols_by_name = self.path.cols_by_name() # Includes base columns
|
|
978
|
+
stored_proxies_needed = []
|
|
979
|
+
for col_name in col_mapping.keys():
|
|
980
|
+
col = cols_by_name[col_name]
|
|
981
|
+
if col.col_type.is_media_type() and not (col.is_stored and col.compute_func) and not col.stored_proxy:
|
|
982
|
+
stored_proxies_needed.append(col)
|
|
983
|
+
with Env.get().engine.begin() as conn:
|
|
984
|
+
self.version += 1
|
|
985
|
+
self.remotes[remote] = col_mapping
|
|
986
|
+
preceding_schema_version = None
|
|
987
|
+
if len(stored_proxies_needed) > 0:
|
|
988
|
+
_logger.info(f'Creating stored proxies for columns: {[col.name for col in stored_proxies_needed]}')
|
|
989
|
+
# Create stored proxies for columns that need one. Increment the schema version
|
|
990
|
+
# accordingly.
|
|
991
|
+
preceding_schema_version = self.schema_version
|
|
992
|
+
self.schema_version = self.version
|
|
993
|
+
proxy_cols = [self.create_stored_proxy(col) for col in stored_proxies_needed]
|
|
994
|
+
# Add the columns; this will also update table metadata.
|
|
995
|
+
# TODO Add to base tables
|
|
996
|
+
self._add_columns(proxy_cols, conn)
|
|
997
|
+
# We don't need to retain `UpdateStatus` since the stored proxies are intended to be
|
|
998
|
+
# invisible to the user.
|
|
999
|
+
self._update_md(time.time(), preceding_schema_version, conn)
|
|
1000
|
+
|
|
1001
|
+
def create_stored_proxy(self, col: Column) -> Column:
|
|
1002
|
+
from pixeltable import exprs
|
|
1003
|
+
|
|
1004
|
+
assert col.col_type.is_media_type() and not (col.is_stored and col.compute_func) and not col.stored_proxy
|
|
1005
|
+
proxy_col = Column(
|
|
1006
|
+
name=None,
|
|
1007
|
+
computed_with=exprs.ColumnRef(col).apply(lambda x: x, col_type=col.col_type),
|
|
1008
|
+
stored=True,
|
|
1009
|
+
col_id=self.next_col_id,
|
|
1010
|
+
sa_col_type=col.col_type.to_sa_type(),
|
|
1011
|
+
schema_version_add=self.schema_version
|
|
1012
|
+
)
|
|
1013
|
+
proxy_col.tbl = self
|
|
1014
|
+
self.next_col_id += 1
|
|
1015
|
+
col.stored_proxy = proxy_col
|
|
1016
|
+
proxy_col.proxy_base = col
|
|
1017
|
+
return proxy_col
|
|
1018
|
+
|
|
1019
|
+
def unlink(self, remote: pixeltable.datatransfer.Remote) -> None:
|
|
1020
|
+
assert remote in self.remotes
|
|
1021
|
+
timestamp = time.time()
|
|
1022
|
+
this_remote_col_names = list(self.remotes[remote].keys())
|
|
1023
|
+
other_remote_col_names = {
|
|
1024
|
+
col_name
|
|
1025
|
+
for other_remote, col_mapping in self.remotes.items() if other_remote != remote
|
|
1026
|
+
for col_name in col_mapping.keys()
|
|
1027
|
+
}
|
|
1028
|
+
cols_by_name = self.path.cols_by_name() # Includes base columns
|
|
1029
|
+
stored_proxy_deletions_needed = [
|
|
1030
|
+
cols_by_name[col_name]
|
|
1031
|
+
for col_name in this_remote_col_names
|
|
1032
|
+
if col_name not in other_remote_col_names and cols_by_name[col_name].stored_proxy
|
|
1033
|
+
]
|
|
1034
|
+
with Env.get().engine.begin() as conn:
|
|
1035
|
+
self.version += 1
|
|
1036
|
+
del self.remotes[remote]
|
|
1037
|
+
preceding_schema_version = None
|
|
1038
|
+
if len(stored_proxy_deletions_needed) > 0:
|
|
1039
|
+
preceding_schema_version = self.schema_version
|
|
1040
|
+
self.schema_version = self.version
|
|
1041
|
+
proxy_cols = [col.stored_proxy for col in stored_proxy_deletions_needed]
|
|
1042
|
+
for col in stored_proxy_deletions_needed:
|
|
1043
|
+
assert col.stored_proxy is not None and col.stored_proxy.proxy_base == col
|
|
1044
|
+
col.stored_proxy.proxy_base = None
|
|
1045
|
+
col.stored_proxy = None
|
|
1046
|
+
# TODO Drop from base tables
|
|
1047
|
+
self._drop_columns(proxy_cols)
|
|
1048
|
+
self._update_md(timestamp, preceding_schema_version, conn)
|
|
1049
|
+
|
|
1050
|
+
def get_remotes(self) -> dict[pixeltable.datatransfer.Remote, dict[str, str]]:
|
|
1051
|
+
return self.remotes
|
|
1052
|
+
|
|
909
1053
|
def is_view(self) -> bool:
|
|
910
1054
|
return self.base is not None
|
|
911
1055
|
|
|
@@ -938,16 +1082,16 @@ class TableVersion:
|
|
|
938
1082
|
def get_required_col_names(self) -> List[str]:
|
|
939
1083
|
"""Return the names of all columns for which values must be specified in insert()"""
|
|
940
1084
|
assert not self.is_view()
|
|
941
|
-
names = [c.name for c in self.
|
|
1085
|
+
names = [c.name for c in self.cols_by_name.values() if not c.is_computed and not c.col_type.nullable]
|
|
942
1086
|
return names
|
|
943
1087
|
|
|
944
1088
|
def get_computed_col_names(self) -> List[str]:
|
|
945
1089
|
"""Return the names of all computed columns"""
|
|
946
|
-
names = [c.name for c in self.
|
|
1090
|
+
names = [c.name for c in self.cols_by_name.values() if c.is_computed]
|
|
947
1091
|
return names
|
|
948
1092
|
|
|
949
1093
|
@classmethod
|
|
950
|
-
def _create_value_expr(cls, col: Column, path: 'TableVersionPath') -> None:
|
|
1094
|
+
def _create_value_expr(cls, col: Column, path: 'pixeltable.catalog.TableVersionPath') -> None:
|
|
951
1095
|
"""
|
|
952
1096
|
Create col.value_expr, given col.compute_func.
|
|
953
1097
|
Interprets compute_func's parameters to be references to columns and construct ColumnRefs as args.
|
|
@@ -977,13 +1121,17 @@ class TableVersion:
|
|
|
977
1121
|
for refd_col in refd_cols:
|
|
978
1122
|
refd_col.dependent_cols.add(col)
|
|
979
1123
|
|
|
980
|
-
def
|
|
1124
|
+
def get_idx_val_columns(self, cols: Iterable[Column]) -> set[Column]:
|
|
1125
|
+
result = {info.val_col for col in cols for info in col.get_idx_info().values()}
|
|
1126
|
+
return result
|
|
1127
|
+
|
|
1128
|
+
def get_dependent_columns(self, cols: list[Column]) -> set[Column]:
|
|
981
1129
|
"""
|
|
982
1130
|
Return the set of columns that transitively depend on any of the given ones.
|
|
983
1131
|
"""
|
|
984
1132
|
if len(cols) == 0:
|
|
985
|
-
return
|
|
986
|
-
result:
|
|
1133
|
+
return set()
|
|
1134
|
+
result: set[Column] = set()
|
|
987
1135
|
for col in cols:
|
|
988
1136
|
result.update(col.dependent_cols)
|
|
989
1137
|
result.update(self.get_dependent_columns(result))
|
|
@@ -1003,17 +1151,30 @@ class TableVersion:
|
|
|
1003
1151
|
column_md[col.id] = schema.ColumnMd(
|
|
1004
1152
|
id=col.id, col_type=col.col_type.as_dict(), is_pk=col.is_pk,
|
|
1005
1153
|
schema_version_add=col.schema_version_add, schema_version_drop=col.schema_version_drop,
|
|
1006
|
-
value_expr=value_expr_dict, stored=col.stored
|
|
1154
|
+
value_expr=value_expr_dict, stored=col.stored,
|
|
1155
|
+
proxy_base=col.proxy_base.id if col.proxy_base else None)
|
|
1007
1156
|
return column_md
|
|
1008
1157
|
|
|
1158
|
+
@classmethod
|
|
1159
|
+
def _create_remotes_md(cls, remotes: dict['pixeltable.datatransfer.Remote', dict[str, str]]) -> list[dict[str, Any]]:
|
|
1160
|
+
return [
|
|
1161
|
+
{
|
|
1162
|
+
'class': f'{type(remote).__module__}.{type(remote).__qualname__}',
|
|
1163
|
+
'remote_md': remote.to_dict(),
|
|
1164
|
+
'col_mapping': col_mapping
|
|
1165
|
+
}
|
|
1166
|
+
for remote, col_mapping in remotes.items()
|
|
1167
|
+
]
|
|
1168
|
+
|
|
1009
1169
|
def _create_tbl_md(self) -> schema.TableMd:
|
|
1010
1170
|
return schema.TableMd(
|
|
1011
1171
|
name=self.name, current_version=self.version, current_schema_version=self.schema_version,
|
|
1012
1172
|
next_col_id=self.next_col_id, next_idx_id=self.next_idx_id, next_row_id=self.next_rowid,
|
|
1013
|
-
column_md=self._create_column_md(self.cols), index_md=self.idx_md,
|
|
1173
|
+
column_md=self._create_column_md(self.cols), index_md=self.idx_md,
|
|
1174
|
+
remotes=self._create_remotes_md(self.remotes), view_md=self.view_md)
|
|
1014
1175
|
|
|
1015
|
-
def _create_version_md(self,
|
|
1016
|
-
return schema.TableVersionMd(created_at=
|
|
1176
|
+
def _create_version_md(self, timestamp: float) -> schema.TableVersionMd:
|
|
1177
|
+
return schema.TableVersionMd(created_at=timestamp, version=self.version, schema_version=self.schema_version)
|
|
1017
1178
|
|
|
1018
1179
|
def _create_schema_version_md(self, preceding_schema_version: int) -> schema.TableSchemaVersionMd:
|
|
1019
1180
|
column_md: Dict[int, schema.SchemaColumn] = {}
|