pixeltable 0.2.6__py3-none-any.whl → 0.2.8__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of pixeltable might be problematic. Click here for more details.
- pixeltable/__init__.py +3 -1
- pixeltable/__version__.py +2 -2
- pixeltable/catalog/column.py +8 -2
- pixeltable/catalog/insertable_table.py +32 -17
- pixeltable/catalog/table.py +167 -12
- pixeltable/catalog/table_version.py +185 -106
- pixeltable/datatransfer/__init__.py +1 -0
- pixeltable/datatransfer/label_studio.py +452 -0
- pixeltable/datatransfer/remote.py +85 -0
- pixeltable/env.py +148 -69
- pixeltable/exprs/column_ref.py +2 -2
- pixeltable/exprs/comparison.py +39 -1
- pixeltable/exprs/data_row.py +7 -0
- pixeltable/exprs/expr.py +11 -12
- pixeltable/exprs/function_call.py +0 -3
- pixeltable/exprs/globals.py +14 -2
- pixeltable/exprs/similarity_expr.py +5 -3
- pixeltable/ext/functions/whisperx.py +30 -0
- pixeltable/ext/functions/yolox.py +16 -0
- pixeltable/func/aggregate_function.py +2 -2
- pixeltable/func/expr_template_function.py +3 -1
- pixeltable/func/udf.py +2 -2
- pixeltable/functions/fireworks.py +9 -4
- pixeltable/functions/huggingface.py +25 -1
- pixeltable/functions/openai.py +15 -10
- pixeltable/functions/together.py +11 -6
- pixeltable/functions/util.py +0 -43
- pixeltable/functions/video.py +46 -8
- pixeltable/globals.py +20 -2
- pixeltable/index/__init__.py +1 -0
- pixeltable/index/base.py +6 -1
- pixeltable/index/btree.py +54 -0
- pixeltable/index/embedding_index.py +4 -1
- pixeltable/io/__init__.py +1 -0
- pixeltable/io/globals.py +58 -0
- pixeltable/iterators/base.py +4 -4
- pixeltable/iterators/document.py +26 -15
- pixeltable/iterators/video.py +9 -1
- pixeltable/metadata/__init__.py +2 -2
- pixeltable/metadata/converters/convert_14.py +13 -0
- pixeltable/metadata/schema.py +9 -6
- pixeltable/plan.py +9 -5
- pixeltable/store.py +14 -21
- pixeltable/tool/create_test_db_dump.py +14 -0
- pixeltable/type_system.py +14 -4
- pixeltable/utils/coco.py +94 -0
- pixeltable-0.2.8.dist-info/METADATA +137 -0
- {pixeltable-0.2.6.dist-info → pixeltable-0.2.8.dist-info}/RECORD +50 -45
- pixeltable/func/nos_function.py +0 -202
- pixeltable/utils/clip.py +0 -18
- pixeltable-0.2.6.dist-info/METADATA +0 -131
- {pixeltable-0.2.6.dist-info → pixeltable-0.2.8.dist-info}/LICENSE +0 -0
- {pixeltable-0.2.6.dist-info → pixeltable-0.2.8.dist-info}/WHEEL +0 -0
|
@@ -5,7 +5,8 @@ import importlib
|
|
|
5
5
|
import inspect
|
|
6
6
|
import logging
|
|
7
7
|
import time
|
|
8
|
-
from typing import Optional, List, Dict, Any, Tuple, Type, Set
|
|
8
|
+
from typing import Optional, List, Dict, Any, Tuple, Type, Set, Iterable
|
|
9
|
+
import uuid
|
|
9
10
|
from uuid import UUID
|
|
10
11
|
|
|
11
12
|
import sqlalchemy as sql
|
|
@@ -87,6 +88,8 @@ class TableVersion:
|
|
|
87
88
|
self.next_idx_id = tbl_md.next_idx_id
|
|
88
89
|
self.next_rowid = tbl_md.next_row_id
|
|
89
90
|
|
|
91
|
+
self.remotes = dict(TableVersion._init_remote(remote_md) for remote_md in tbl_md.remotes)
|
|
92
|
+
|
|
90
93
|
# view-specific initialization
|
|
91
94
|
from pixeltable import exprs
|
|
92
95
|
predicate_dict = None if not is_view or tbl_md.view_md.predicate is None else tbl_md.view_md.predicate
|
|
@@ -115,7 +118,7 @@ class TableVersion:
|
|
|
115
118
|
cat.tbl_versions[(self.id, self.effective_version)] = self
|
|
116
119
|
|
|
117
120
|
# init schema after we determined whether we're a component view, and before we create the store table
|
|
118
|
-
self.cols:
|
|
121
|
+
self.cols: list[Column] = [] # contains complete history of columns, incl dropped ones
|
|
119
122
|
self.cols_by_name: dict[str, Column] = {} # contains only user-facing (named) columns visible in this version
|
|
120
123
|
self.cols_by_id: dict[int, Column] = {} # contains only columns visible in this version
|
|
121
124
|
self.idx_md = tbl_md.index_md # needed for _create_tbl_md()
|
|
@@ -150,23 +153,22 @@ class TableVersion:
|
|
|
150
153
|
if col.is_computed:
|
|
151
154
|
col.check_value_expr()
|
|
152
155
|
|
|
153
|
-
|
|
156
|
+
timestamp = time.time()
|
|
154
157
|
# create schema.Table
|
|
155
158
|
# Column.dependent_cols for existing cols is wrong at this point, but init() will set it correctly
|
|
156
159
|
column_md = cls._create_column_md(cols)
|
|
157
160
|
table_md = schema.TableMd(
|
|
158
|
-
name=name, current_version=0, current_schema_version=0,
|
|
159
|
-
|
|
160
|
-
|
|
161
|
-
session
|
|
162
|
-
|
|
163
|
-
|
|
161
|
+
name=name, current_version=0, current_schema_version=0, next_col_id=len(cols),
|
|
162
|
+
next_idx_id=0, next_row_id=0, column_md=column_md, index_md={}, remotes=[], view_md=view_md)
|
|
163
|
+
# create a schema.Table here, we need it to call our c'tor;
|
|
164
|
+
# don't add it to the session yet, we might add index metadata
|
|
165
|
+
tbl_id = uuid.uuid4()
|
|
166
|
+
tbl_record = schema.Table(id=tbl_id, dir_id=dir_id, md=dataclasses.asdict(table_md))
|
|
164
167
|
|
|
165
168
|
# create schema.TableVersion
|
|
166
|
-
table_version_md = schema.TableVersionMd(created_at=
|
|
169
|
+
table_version_md = schema.TableVersionMd(created_at=timestamp, version=0, schema_version=0)
|
|
167
170
|
tbl_version_record = schema.TableVersion(
|
|
168
171
|
tbl_id=tbl_record.id, version=0, md=dataclasses.asdict(table_version_md))
|
|
169
|
-
session.add(tbl_version_record)
|
|
170
172
|
|
|
171
173
|
# create schema.TableSchemaVersion
|
|
172
174
|
schema_col_md = {col.id: schema.SchemaColumn(pos=pos, name=col.name) for pos, col in enumerate(cols)}
|
|
@@ -176,19 +178,33 @@ class TableVersion:
|
|
|
176
178
|
num_retained_versions=num_retained_versions, comment=comment)
|
|
177
179
|
schema_version_record = schema.TableSchemaVersion(
|
|
178
180
|
tbl_id=tbl_record.id, schema_version=0, md=dataclasses.asdict(schema_version_md))
|
|
179
|
-
session.add(schema_version_record)
|
|
180
181
|
|
|
181
182
|
# if this is purely a snapshot (it doesn't require any additional storage for columns and it # doesn't have a
|
|
182
183
|
# predicate to apply at runtime), we don't create a physical table and simply use the base's table version path
|
|
183
184
|
if view_md is not None and view_md.is_snapshot and view_md.predicate is None and len(cols) == 0:
|
|
185
|
+
session.add(tbl_record)
|
|
186
|
+
session.add(tbl_version_record)
|
|
187
|
+
session.add(schema_version_record)
|
|
184
188
|
return tbl_record.id, None
|
|
185
189
|
|
|
186
190
|
assert (base_path is not None) == (view_md is not None)
|
|
187
191
|
base = base_path.tbl_version if base_path is not None and view_md.is_snapshot else None
|
|
188
192
|
base_path = base_path if base_path is not None and not view_md.is_snapshot else None
|
|
189
193
|
tbl_version = cls(tbl_record.id, table_md, 0, schema_version_md, base=base, base_path=base_path)
|
|
190
|
-
|
|
191
|
-
|
|
194
|
+
|
|
195
|
+
conn = session.connection()
|
|
196
|
+
tbl_version.store_tbl.create(conn)
|
|
197
|
+
if view_md is None or not view_md.is_snapshot:
|
|
198
|
+
# add default indices, after creating the store table
|
|
199
|
+
for col in tbl_version.cols_by_name.values():
|
|
200
|
+
status = tbl_version._add_default_index(col, conn=conn)
|
|
201
|
+
assert status is None or status.num_excs == 0
|
|
202
|
+
|
|
203
|
+
# we re-create the tbl_record here, now that we have new index metadata
|
|
204
|
+
tbl_record = schema.Table(id=tbl_id, dir_id=dir_id, md=dataclasses.asdict(tbl_version._create_tbl_md()))
|
|
205
|
+
session.add(tbl_record)
|
|
206
|
+
session.add(tbl_version_record)
|
|
207
|
+
session.add(schema_version_record)
|
|
192
208
|
return tbl_record.id, tbl_version
|
|
193
209
|
|
|
194
210
|
@classmethod
|
|
@@ -259,7 +275,7 @@ class TableVersion:
|
|
|
259
275
|
for md in tbl_md.index_md.values():
|
|
260
276
|
if md.schema_version_add > self.schema_version \
|
|
261
277
|
or md.schema_version_drop is not None and md.schema_version_drop <= self.schema_version:
|
|
262
|
-
#
|
|
278
|
+
# index not visible in this schema version
|
|
263
279
|
continue
|
|
264
280
|
|
|
265
281
|
# instantiate index object
|
|
@@ -271,8 +287,10 @@ class TableVersion:
|
|
|
271
287
|
# fix up the sa column type of the index value and undo columns
|
|
272
288
|
val_col = self.cols_by_id[md.index_val_col_id]
|
|
273
289
|
val_col.sa_col_type = idx.index_sa_type()
|
|
290
|
+
val_col._records_errors = False
|
|
274
291
|
undo_col = self.cols_by_id[md.index_val_undo_col_id]
|
|
275
292
|
undo_col.sa_col_type = idx.index_sa_type()
|
|
293
|
+
undo_col._records_errors = False
|
|
276
294
|
idx_info = self.IndexInfo(id=md.id, name=md.name, idx=idx, col=idx_col, val_col=val_col, undo_col=undo_col)
|
|
277
295
|
self.idxs_by_name[md.name] = idx_info
|
|
278
296
|
|
|
@@ -287,10 +305,12 @@ class TableVersion:
|
|
|
287
305
|
else:
|
|
288
306
|
self.store_tbl: StoreBase = StoreTable(self)
|
|
289
307
|
|
|
290
|
-
def _update_md(
|
|
308
|
+
def _update_md(
|
|
309
|
+
self, timestamp: float, preceding_schema_version: Optional[int], conn: sql.engine.Connection
|
|
310
|
+
) -> None:
|
|
291
311
|
"""Update all recorded metadata in response to a data or schema change.
|
|
292
312
|
Args:
|
|
293
|
-
|
|
313
|
+
timestamp: timestamp of the change
|
|
294
314
|
preceding_schema_version: last schema version if schema change, else None
|
|
295
315
|
"""
|
|
296
316
|
conn.execute(
|
|
@@ -298,7 +318,7 @@ class TableVersion:
|
|
|
298
318
|
.values({schema.Table.md: dataclasses.asdict(self._create_tbl_md())})
|
|
299
319
|
.where(schema.Table.id == self.id))
|
|
300
320
|
|
|
301
|
-
version_md = self._create_version_md(
|
|
321
|
+
version_md = self._create_version_md(timestamp)
|
|
302
322
|
conn.execute(
|
|
303
323
|
sql.insert(schema.TableVersion.__table__)
|
|
304
324
|
.values(tbl_id=self.id, version=self.version, md=dataclasses.asdict(version_md)))
|
|
@@ -315,6 +335,33 @@ class TableVersion:
|
|
|
315
335
|
return f'idx_{self.id.hex}_{idx_id}'
|
|
316
336
|
|
|
317
337
|
def add_index(self, col: Column, idx_name: Optional[str], idx: index.IndexBase) -> UpdateStatus:
|
|
338
|
+
# we're creating a new schema version
|
|
339
|
+
self.version += 1
|
|
340
|
+
preceding_schema_version = self.schema_version
|
|
341
|
+
self.schema_version = self.version
|
|
342
|
+
with Env.get().engine.begin() as conn:
|
|
343
|
+
status = self._add_index(col, idx_name, idx, conn)
|
|
344
|
+
self._update_md(time.time(), preceding_schema_version, conn)
|
|
345
|
+
_logger.info(f'Added index {idx_name} on column {col.name} to table {self.name}')
|
|
346
|
+
return status
|
|
347
|
+
|
|
348
|
+
def _add_default_index(self, col: Column, conn: sql.engine.Connection) -> Optional[UpdateStatus]:
|
|
349
|
+
"""Add a B-tree index on this column if it has a compatible type"""
|
|
350
|
+
if not col.stored:
|
|
351
|
+
# if the column is intentionally not stored, we want to avoid the overhead of an index
|
|
352
|
+
return None
|
|
353
|
+
if not col.col_type.is_scalar_type() and not (col.col_type.is_media_type() and not col.is_computed):
|
|
354
|
+
# wrong type for a B-tree
|
|
355
|
+
return None
|
|
356
|
+
if col.col_type.is_bool_type():
|
|
357
|
+
# B-trees on bools aren't useful
|
|
358
|
+
return None
|
|
359
|
+
status = self._add_index(col, idx_name=None, idx=index.BtreeIndex(col), conn=conn)
|
|
360
|
+
return status
|
|
361
|
+
|
|
362
|
+
def _add_index(
|
|
363
|
+
self, col: Column, idx_name: Optional[str], idx: index.IndexBase, conn: sql.engine.Connection
|
|
364
|
+
) -> UpdateStatus:
|
|
318
365
|
assert not self.is_snapshot
|
|
319
366
|
idx_id = self.next_idx_id
|
|
320
367
|
self.next_idx_id += 1
|
|
@@ -324,46 +371,41 @@ class TableVersion:
|
|
|
324
371
|
assert is_valid_identifier(idx_name)
|
|
325
372
|
assert idx_name not in [i.name for i in self.idx_md.values()]
|
|
326
373
|
|
|
327
|
-
#
|
|
328
|
-
|
|
329
|
-
|
|
330
|
-
|
|
331
|
-
|
|
332
|
-
|
|
333
|
-
|
|
334
|
-
|
|
335
|
-
|
|
336
|
-
|
|
337
|
-
|
|
338
|
-
|
|
339
|
-
val_col.
|
|
340
|
-
self.
|
|
341
|
-
|
|
342
|
-
|
|
343
|
-
|
|
344
|
-
|
|
345
|
-
|
|
346
|
-
|
|
347
|
-
|
|
348
|
-
|
|
349
|
-
|
|
350
|
-
|
|
351
|
-
|
|
352
|
-
|
|
353
|
-
|
|
354
|
-
|
|
355
|
-
|
|
356
|
-
|
|
357
|
-
|
|
358
|
-
|
|
359
|
-
|
|
360
|
-
|
|
361
|
-
|
|
362
|
-
status = self._add_columns([val_col, undo_col], conn, preceding_schema_version=preceding_schema_version)
|
|
363
|
-
# now create the index structure
|
|
364
|
-
idx.create_index(self._store_idx_name(idx_id), val_col, conn)
|
|
365
|
-
|
|
366
|
-
_logger.info(f'Added index {idx_name} on column {col.name} to table {self.name}')
|
|
374
|
+
# add the index value and undo columns (which need to be nullable)
|
|
375
|
+
val_col = Column(
|
|
376
|
+
col_id=self.next_col_id, name=None, computed_with=idx.index_value_expr(),
|
|
377
|
+
sa_col_type=idx.index_sa_type(), stored=True,
|
|
378
|
+
schema_version_add=self.schema_version, schema_version_drop=None,
|
|
379
|
+
records_errors=idx.records_value_errors())
|
|
380
|
+
val_col.tbl = self
|
|
381
|
+
val_col.col_type = val_col.col_type.copy(nullable=True)
|
|
382
|
+
self.next_col_id += 1
|
|
383
|
+
|
|
384
|
+
undo_col = Column(
|
|
385
|
+
col_id=self.next_col_id, name=None, col_type=val_col.col_type,
|
|
386
|
+
sa_col_type=val_col.sa_col_type, stored=True,
|
|
387
|
+
schema_version_add=self.schema_version, schema_version_drop=None,
|
|
388
|
+
records_errors=False)
|
|
389
|
+
undo_col.tbl = self
|
|
390
|
+
undo_col.col_type = undo_col.col_type.copy(nullable=True)
|
|
391
|
+
self.next_col_id += 1
|
|
392
|
+
|
|
393
|
+
# create and register the index metadata
|
|
394
|
+
idx_cls = type(idx)
|
|
395
|
+
idx_md = schema.IndexMd(
|
|
396
|
+
id=idx_id, name=idx_name,
|
|
397
|
+
indexed_col_id=col.id, index_val_col_id=val_col.id, index_val_undo_col_id=undo_col.id,
|
|
398
|
+
schema_version_add=self.schema_version, schema_version_drop=None,
|
|
399
|
+
class_fqn=idx_cls.__module__ + '.' + idx_cls.__name__, init_args=idx.as_dict())
|
|
400
|
+
idx_info = self.IndexInfo(id=idx_id, name=idx_name, idx=idx, col=col, val_col=val_col, undo_col=undo_col)
|
|
401
|
+
self.idx_md[idx_id] = idx_md
|
|
402
|
+
self.idxs_by_name[idx_name] = idx_info
|
|
403
|
+
|
|
404
|
+
# add the columns and update the metadata
|
|
405
|
+
status = self._add_columns([val_col, undo_col], conn)
|
|
406
|
+
# now create the index structure
|
|
407
|
+
idx.create_index(self._store_idx_name(idx_id), val_col, conn)
|
|
408
|
+
|
|
367
409
|
return status
|
|
368
410
|
|
|
369
411
|
def drop_index(self, idx_id: int) -> None:
|
|
@@ -381,7 +423,8 @@ class TableVersion:
|
|
|
381
423
|
del self.idxs_by_name[idx_md.name]
|
|
382
424
|
|
|
383
425
|
with Env.get().engine.begin() as conn:
|
|
384
|
-
self._drop_columns([idx_info.val_col, idx_info.undo_col]
|
|
426
|
+
self._drop_columns([idx_info.val_col, idx_info.undo_col])
|
|
427
|
+
self._update_md(time.time(), preceding_schema_version, conn)
|
|
385
428
|
_logger.info(f'Dropped index {idx_md.name} on table {self.name}')
|
|
386
429
|
|
|
387
430
|
def add_column(self, col: Column, print_stats: bool = False) -> UpdateStatus:
|
|
@@ -398,16 +441,16 @@ class TableVersion:
|
|
|
398
441
|
if col.compute_func is not None:
|
|
399
442
|
# create value_expr from compute_func
|
|
400
443
|
self._create_value_expr(col, self.path)
|
|
401
|
-
if col.value_expr is not None:
|
|
402
|
-
col.check_value_expr()
|
|
403
|
-
self._record_value_expr(col)
|
|
404
444
|
|
|
405
445
|
# we're creating a new schema version
|
|
406
446
|
self.version += 1
|
|
407
447
|
preceding_schema_version = self.schema_version
|
|
408
448
|
self.schema_version = self.version
|
|
409
449
|
with Env.get().engine.begin() as conn:
|
|
410
|
-
status = self._add_columns([col], conn,
|
|
450
|
+
status = self._add_columns([col], conn, print_stats=print_stats)
|
|
451
|
+
_ = self._add_default_index(col, conn)
|
|
452
|
+
# TODO: what to do about errors?
|
|
453
|
+
self._update_md(time.time(), preceding_schema_version, conn)
|
|
411
454
|
_logger.info(f'Added column {col.name} to table {self.name}, new version: {self.version}')
|
|
412
455
|
|
|
413
456
|
msg = (
|
|
@@ -418,13 +461,8 @@ class TableVersion:
|
|
|
418
461
|
_logger.info(f'Column {col.name}: {msg}')
|
|
419
462
|
return status
|
|
420
463
|
|
|
421
|
-
def _add_columns(
|
|
422
|
-
self, cols: List[Column], conn: sql.engine.Connection, preceding_schema_version: Optional[int] = None,
|
|
423
|
-
print_stats: bool = False
|
|
424
|
-
) -> UpdateStatus:
|
|
464
|
+
def _add_columns(self, cols: List[Column], conn: sql.engine.Connection, print_stats: bool = False) -> UpdateStatus:
|
|
425
465
|
"""Add and populate columns within the current transaction"""
|
|
426
|
-
ts = time.time()
|
|
427
|
-
|
|
428
466
|
row_count = self.store_tbl.count(conn=conn)
|
|
429
467
|
for col in cols:
|
|
430
468
|
if not col.col_type.nullable and not col.is_computed:
|
|
@@ -442,6 +480,9 @@ class TableVersion:
|
|
|
442
480
|
if col.name is not None:
|
|
443
481
|
self.cols_by_name[col.name] = col
|
|
444
482
|
self.cols_by_id[col.id] = col
|
|
483
|
+
if col.value_expr is not None:
|
|
484
|
+
col.check_value_expr()
|
|
485
|
+
self._record_value_expr(col)
|
|
445
486
|
|
|
446
487
|
if col.is_stored:
|
|
447
488
|
self.store_tbl.add_column(col, conn)
|
|
@@ -475,7 +516,6 @@ class TableVersion:
|
|
|
475
516
|
finally:
|
|
476
517
|
plan.close()
|
|
477
518
|
|
|
478
|
-
self._update_md(ts, preceding_schema_version, conn)
|
|
479
519
|
if print_stats:
|
|
480
520
|
plan.ctx.profile.print(num_rows=row_count)
|
|
481
521
|
# TODO(mkornacker): what to do about system columns with exceptions?
|
|
@@ -516,14 +556,14 @@ class TableVersion:
|
|
|
516
556
|
# update idxs_by_name
|
|
517
557
|
for idx_name in dropped_idx_names:
|
|
518
558
|
del self.idxs_by_name[idx_name]
|
|
519
|
-
self._drop_columns(dropped_cols
|
|
559
|
+
self._drop_columns(dropped_cols)
|
|
560
|
+
self._update_md(time.time(), preceding_schema_version, conn)
|
|
520
561
|
_logger.info(f'Dropped column {name} from table {self.name}, new version: {self.version}')
|
|
521
562
|
|
|
522
|
-
def _drop_columns(self, cols: list[Column]
|
|
563
|
+
def _drop_columns(self, cols: list[Column]) -> None:
|
|
523
564
|
"""Mark columns as dropped"""
|
|
524
565
|
assert not self.is_snapshot
|
|
525
566
|
|
|
526
|
-
ts = time.time()
|
|
527
567
|
for col in cols:
|
|
528
568
|
if col.value_expr is not None:
|
|
529
569
|
# update Column.dependent_cols
|
|
@@ -539,7 +579,6 @@ class TableVersion:
|
|
|
539
579
|
assert col.id in self.cols_by_id
|
|
540
580
|
del self.cols_by_id[col.id]
|
|
541
581
|
|
|
542
|
-
self._update_md(ts, preceding_schema_version, conn)
|
|
543
582
|
self.store_tbl.create_sa_tbl()
|
|
544
583
|
|
|
545
584
|
def rename_column(self, old_name: str, new_name: str) -> None:
|
|
@@ -558,13 +597,12 @@ class TableVersion:
|
|
|
558
597
|
self.cols_by_name[new_name] = col
|
|
559
598
|
|
|
560
599
|
# we're creating a new schema version
|
|
561
|
-
ts = time.time()
|
|
562
600
|
self.version += 1
|
|
563
601
|
preceding_schema_version = self.schema_version
|
|
564
602
|
self.schema_version = self.version
|
|
565
603
|
|
|
566
604
|
with Env.get().engine.begin() as conn:
|
|
567
|
-
self._update_md(
|
|
605
|
+
self._update_md(time.time(), preceding_schema_version, conn)
|
|
568
606
|
_logger.info(f'Renamed column {old_name} to {new_name} in table {self.name}, new version: {self.version}')
|
|
569
607
|
|
|
570
608
|
def set_comment(self, new_comment: Optional[str]):
|
|
@@ -579,12 +617,11 @@ class TableVersion:
|
|
|
579
617
|
|
|
580
618
|
def _create_schema_version(self):
|
|
581
619
|
# we're creating a new schema version
|
|
582
|
-
ts = time.time()
|
|
583
620
|
self.version += 1
|
|
584
621
|
preceding_schema_version = self.schema_version
|
|
585
622
|
self.schema_version = self.version
|
|
586
623
|
with Env.get().engine.begin() as conn:
|
|
587
|
-
self._update_md(
|
|
624
|
+
self._update_md(time.time(), preceding_schema_version, conn)
|
|
588
625
|
_logger.info(f'[{self.name}] Updating table schema to version: {self.version}')
|
|
589
626
|
|
|
590
627
|
def insert(
|
|
@@ -595,12 +632,11 @@ class TableVersion:
|
|
|
595
632
|
assert self.is_insertable()
|
|
596
633
|
from pixeltable.plan import Planner
|
|
597
634
|
plan = Planner.create_insert_plan(self, rows, ignore_errors=not fail_on_exception)
|
|
598
|
-
ts = time.time()
|
|
599
635
|
with Env.get().engine.begin() as conn:
|
|
600
|
-
return self._insert(plan, conn,
|
|
636
|
+
return self._insert(plan, conn, time.time(), print_stats)
|
|
601
637
|
|
|
602
638
|
def _insert(
|
|
603
|
-
self, exec_plan: exec.ExecNode, conn: sql.engine.Connection,
|
|
639
|
+
self, exec_plan: exec.ExecNode, conn: sql.engine.Connection, timestamp: float, print_stats: bool = False,
|
|
604
640
|
) -> UpdateStatus:
|
|
605
641
|
"""Insert rows produced by exec_plan and propagate to views"""
|
|
606
642
|
# we're creating a new version
|
|
@@ -612,13 +648,13 @@ class TableVersion:
|
|
|
612
648
|
result.num_excs = num_excs
|
|
613
649
|
result.num_computed_values += exec_plan.ctx.num_computed_exprs * num_rows
|
|
614
650
|
result.cols_with_excs = [f'{self.name}.{self.cols_by_id[cid].name}' for cid in cols_with_excs]
|
|
615
|
-
self._update_md(
|
|
651
|
+
self._update_md(timestamp, None, conn)
|
|
616
652
|
|
|
617
653
|
# update views
|
|
618
654
|
for view in self.mutable_views:
|
|
619
655
|
from pixeltable.plan import Planner
|
|
620
656
|
plan, _ = Planner.create_view_load_plan(view.path, propagates_insert=True)
|
|
621
|
-
status = view._insert(plan, conn,
|
|
657
|
+
status = view._insert(plan, conn, timestamp, print_stats)
|
|
622
658
|
result.num_rows += status.num_rows
|
|
623
659
|
result.num_excs += status.num_excs
|
|
624
660
|
result.num_computed_values += status.num_computed_values
|
|
@@ -662,7 +698,7 @@ class TableVersion:
|
|
|
662
698
|
# construct Where clause to match rowid
|
|
663
699
|
num_rowid_cols = len(self.store_tbl.rowid_columns())
|
|
664
700
|
for col_idx in range(num_rowid_cols):
|
|
665
|
-
assert len(rowids[i]) == num_rowid_cols
|
|
701
|
+
assert len(rowids[i]) == num_rowid_cols, f'len({rowids[i]}) != {num_rowid_cols}'
|
|
666
702
|
clause = exprs.RowidRef(self, col_idx) == rowids[i][col_idx]
|
|
667
703
|
if where_clause is None:
|
|
668
704
|
where_clause = clause
|
|
@@ -679,7 +715,7 @@ class TableVersion:
|
|
|
679
715
|
where_clause = where_clause & clause
|
|
680
716
|
|
|
681
717
|
update_targets = {col: row[col] for col in row if col not in pk_cols}
|
|
682
|
-
status = self._update(conn, update_targets, where_clause, cascade)
|
|
718
|
+
status = self._update(conn, update_targets, where_clause, cascade, show_progress=False)
|
|
683
719
|
result_status.num_rows += status.num_rows
|
|
684
720
|
result_status.num_excs += status.num_excs
|
|
685
721
|
result_status.num_computed_values += status.num_computed_values
|
|
@@ -692,7 +728,8 @@ class TableVersion:
|
|
|
692
728
|
|
|
693
729
|
def _update(
|
|
694
730
|
self, conn: sql.engine.Connection, update_targets: dict[Column, 'pixeltable.exprs.Expr'],
|
|
695
|
-
where_clause: Optional['pixeltable.exprs.Predicate'] = None, cascade: bool = True
|
|
731
|
+
where_clause: Optional['pixeltable.exprs.Predicate'] = None, cascade: bool = True,
|
|
732
|
+
show_progress: bool = True
|
|
696
733
|
) -> UpdateStatus:
|
|
697
734
|
"""Update rows in this table.
|
|
698
735
|
Args:
|
|
@@ -705,28 +742,27 @@ class TableVersion:
|
|
|
705
742
|
from pixeltable.plan import Planner
|
|
706
743
|
plan, updated_cols, recomputed_cols = \
|
|
707
744
|
Planner.create_update_plan(self.path, update_targets, [], where_clause, cascade)
|
|
708
|
-
ts = time.time()
|
|
709
745
|
result = self._propagate_update(
|
|
710
746
|
plan, where_clause.sql_expr() if where_clause is not None else None, recomputed_cols,
|
|
711
|
-
base_versions=[], conn=conn,
|
|
747
|
+
base_versions=[], conn=conn, timestamp=time.time(), cascade=cascade, show_progress=show_progress)
|
|
712
748
|
result.updated_cols = updated_cols
|
|
713
749
|
return result
|
|
714
750
|
|
|
715
751
|
def _propagate_update(
|
|
716
752
|
self, plan: Optional[exec.ExecNode], where_clause: Optional[sql.ClauseElement],
|
|
717
753
|
recomputed_view_cols: List[Column], base_versions: List[Optional[int]], conn: sql.engine.Connection,
|
|
718
|
-
|
|
754
|
+
timestamp: float, cascade: bool, show_progress: bool = True
|
|
719
755
|
) -> UpdateStatus:
|
|
720
756
|
result = UpdateStatus()
|
|
721
757
|
if plan is not None:
|
|
722
758
|
# we're creating a new version
|
|
723
759
|
self.version += 1
|
|
724
760
|
result.num_rows, result.num_excs, cols_with_excs = \
|
|
725
|
-
self.store_tbl.insert_rows(plan, conn, v_min=self.version)
|
|
761
|
+
self.store_tbl.insert_rows(plan, conn, v_min=self.version, show_progress=show_progress)
|
|
726
762
|
result.cols_with_excs = [f'{self.name}.{self.cols_by_id[cid].name}' for cid in cols_with_excs]
|
|
727
763
|
self.store_tbl.delete_rows(
|
|
728
764
|
self.version, base_versions=base_versions, match_on_vmin=True, where_clause=where_clause, conn=conn)
|
|
729
|
-
self._update_md(
|
|
765
|
+
self._update_md(timestamp, None, conn)
|
|
730
766
|
|
|
731
767
|
if cascade:
|
|
732
768
|
base_versions = [None if plan is None else self.version] + base_versions # don't update in place
|
|
@@ -738,7 +774,7 @@ class TableVersion:
|
|
|
738
774
|
from pixeltable.plan import Planner
|
|
739
775
|
plan = Planner.create_view_update_plan(view.path, recompute_targets=recomputed_cols)
|
|
740
776
|
status = view._propagate_update(
|
|
741
|
-
plan, None, recomputed_view_cols, base_versions=base_versions, conn=conn,
|
|
777
|
+
plan, None, recomputed_view_cols, base_versions=base_versions, conn=conn, timestamp=timestamp, cascade=True)
|
|
742
778
|
result.num_rows += status.num_rows
|
|
743
779
|
result.num_excs += status.num_excs
|
|
744
780
|
result.cols_with_excs += status.cols_with_excs
|
|
@@ -754,16 +790,15 @@ class TableVersion:
|
|
|
754
790
|
assert self.is_insertable()
|
|
755
791
|
from pixeltable.plan import Planner
|
|
756
792
|
analysis_info = Planner.analyze(self, where)
|
|
757
|
-
ts = time.time()
|
|
758
793
|
with Env.get().engine.begin() as conn:
|
|
759
|
-
num_rows = self._delete(analysis_info.sql_where_clause, base_versions=[], conn=conn,
|
|
794
|
+
num_rows = self._delete(analysis_info.sql_where_clause, base_versions=[], conn=conn, timestamp=time.time())
|
|
760
795
|
|
|
761
796
|
status = UpdateStatus(num_rows=num_rows)
|
|
762
797
|
return status
|
|
763
798
|
|
|
764
799
|
def _delete(
|
|
765
800
|
self, where: Optional['pixeltable.exprs.Predicate'], base_versions: List[Optional[int]],
|
|
766
|
-
conn: sql.engine.Connection,
|
|
801
|
+
conn: sql.engine.Connection, timestamp: float) -> int:
|
|
767
802
|
"""Delete rows in this table and propagate to views.
|
|
768
803
|
Args:
|
|
769
804
|
where: a Predicate to filter rows to delete.
|
|
@@ -777,11 +812,12 @@ class TableVersion:
|
|
|
777
812
|
if num_rows > 0:
|
|
778
813
|
# we're creating a new version
|
|
779
814
|
self.version += 1
|
|
780
|
-
self._update_md(
|
|
815
|
+
self._update_md(timestamp, None, conn)
|
|
781
816
|
else:
|
|
782
817
|
pass
|
|
783
818
|
for view in self.mutable_views:
|
|
784
|
-
num_rows += view._delete(
|
|
819
|
+
num_rows += view._delete(
|
|
820
|
+
where=None, base_versions=[self.version] + base_versions, conn=conn, timestamp=timestamp)
|
|
785
821
|
return num_rows
|
|
786
822
|
|
|
787
823
|
def revert(self) -> None:
|
|
@@ -907,6 +943,32 @@ class TableVersion:
|
|
|
907
943
|
view._revert(session)
|
|
908
944
|
_logger.info(f'TableVersion {self.name}: reverted to version {self.version}')
|
|
909
945
|
|
|
946
|
+
@classmethod
|
|
947
|
+
def _init_remote(cls, remote_md: dict[str, Any]) -> Tuple[pixeltable.datatransfer.Remote, dict[str, str]]:
|
|
948
|
+
module = importlib.import_module(remote_md['module'])
|
|
949
|
+
remote_cls = getattr(module, remote_md['class'])
|
|
950
|
+
remote = remote_cls.from_dict(remote_md['remote_md'])
|
|
951
|
+
col_mapping = remote_md['col_mapping']
|
|
952
|
+
return remote, col_mapping
|
|
953
|
+
|
|
954
|
+
def link(self, remote: pixeltable.datatransfer.Remote, col_mapping: dict[str, str]) -> None:
|
|
955
|
+
timestamp = time.time()
|
|
956
|
+
self.version += 1
|
|
957
|
+
self.remotes[remote] = col_mapping
|
|
958
|
+
with Env.get().engine.begin() as conn:
|
|
959
|
+
self._update_md(timestamp, None, conn)
|
|
960
|
+
|
|
961
|
+
def unlink(self, remote: pixeltable.datatransfer.Remote) -> None:
|
|
962
|
+
assert remote in self.remotes
|
|
963
|
+
timestamp = time.time()
|
|
964
|
+
self.version += 1
|
|
965
|
+
del self.remotes[remote]
|
|
966
|
+
with Env.get().engine.begin() as conn:
|
|
967
|
+
self._update_md(timestamp, None, conn)
|
|
968
|
+
|
|
969
|
+
def get_remotes(self) -> dict[pixeltable.datatransfer.Remote, dict[str, str]]:
|
|
970
|
+
return self.remotes
|
|
971
|
+
|
|
910
972
|
def is_view(self) -> bool:
|
|
911
973
|
return self.base is not None
|
|
912
974
|
|
|
@@ -939,16 +1001,16 @@ class TableVersion:
|
|
|
939
1001
|
def get_required_col_names(self) -> List[str]:
|
|
940
1002
|
"""Return the names of all columns for which values must be specified in insert()"""
|
|
941
1003
|
assert not self.is_view()
|
|
942
|
-
names = [c.name for c in self.
|
|
1004
|
+
names = [c.name for c in self.cols_by_name.values() if not c.is_computed and not c.col_type.nullable]
|
|
943
1005
|
return names
|
|
944
1006
|
|
|
945
1007
|
def get_computed_col_names(self) -> List[str]:
|
|
946
1008
|
"""Return the names of all computed columns"""
|
|
947
|
-
names = [c.name for c in self.
|
|
1009
|
+
names = [c.name for c in self.cols_by_name.values() if c.is_computed]
|
|
948
1010
|
return names
|
|
949
1011
|
|
|
950
1012
|
@classmethod
|
|
951
|
-
def _create_value_expr(cls, col: Column, path: 'TableVersionPath') -> None:
|
|
1013
|
+
def _create_value_expr(cls, col: Column, path: 'pixeltable.catalog.TableVersionPath') -> None:
|
|
952
1014
|
"""
|
|
953
1015
|
Create col.value_expr, given col.compute_func.
|
|
954
1016
|
Interprets compute_func's parameters to be references to columns and construct ColumnRefs as args.
|
|
@@ -978,13 +1040,17 @@ class TableVersion:
|
|
|
978
1040
|
for refd_col in refd_cols:
|
|
979
1041
|
refd_col.dependent_cols.add(col)
|
|
980
1042
|
|
|
981
|
-
def
|
|
1043
|
+
def get_idx_val_columns(self, cols: Iterable[Column]) -> set[Column]:
|
|
1044
|
+
result = {info.val_col for col in cols for info in col.get_idx_info().values()}
|
|
1045
|
+
return result
|
|
1046
|
+
|
|
1047
|
+
def get_dependent_columns(self, cols: list[Column]) -> set[Column]:
|
|
982
1048
|
"""
|
|
983
1049
|
Return the set of columns that transitively depend on any of the given ones.
|
|
984
1050
|
"""
|
|
985
1051
|
if len(cols) == 0:
|
|
986
|
-
return
|
|
987
|
-
result:
|
|
1052
|
+
return set()
|
|
1053
|
+
result: set[Column] = set()
|
|
988
1054
|
for col in cols:
|
|
989
1055
|
result.update(col.dependent_cols)
|
|
990
1056
|
result.update(self.get_dependent_columns(result))
|
|
@@ -1007,14 +1073,27 @@ class TableVersion:
|
|
|
1007
1073
|
value_expr=value_expr_dict, stored=col.stored)
|
|
1008
1074
|
return column_md
|
|
1009
1075
|
|
|
1076
|
+
@classmethod
|
|
1077
|
+
def _create_remotes_md(cls, remotes: dict['pixeltable.datatransfer.Remote', dict[str, str]]) -> list[dict[str, Any]]:
|
|
1078
|
+
return [
|
|
1079
|
+
{
|
|
1080
|
+
'module': type(remote).__module__,
|
|
1081
|
+
'class': type(remote).__qualname__,
|
|
1082
|
+
'remote_md': remote.to_dict(),
|
|
1083
|
+
'col_mapping': col_mapping
|
|
1084
|
+
}
|
|
1085
|
+
for remote, col_mapping in remotes.items()
|
|
1086
|
+
]
|
|
1087
|
+
|
|
1010
1088
|
def _create_tbl_md(self) -> schema.TableMd:
|
|
1011
1089
|
return schema.TableMd(
|
|
1012
1090
|
name=self.name, current_version=self.version, current_schema_version=self.schema_version,
|
|
1013
1091
|
next_col_id=self.next_col_id, next_idx_id=self.next_idx_id, next_row_id=self.next_rowid,
|
|
1014
|
-
column_md=self._create_column_md(self.cols), index_md=self.idx_md,
|
|
1092
|
+
column_md=self._create_column_md(self.cols), index_md=self.idx_md,
|
|
1093
|
+
remotes=self._create_remotes_md(self.remotes), view_md=self.view_md)
|
|
1015
1094
|
|
|
1016
|
-
def _create_version_md(self,
|
|
1017
|
-
return schema.TableVersionMd(created_at=
|
|
1095
|
+
def _create_version_md(self, timestamp: float) -> schema.TableVersionMd:
|
|
1096
|
+
return schema.TableVersionMd(created_at=timestamp, version=self.version, schema_version=self.schema_version)
|
|
1018
1097
|
|
|
1019
1098
|
def _create_schema_version_md(self, preceding_schema_version: int) -> schema.TableSchemaVersionMd:
|
|
1020
1099
|
column_md: Dict[int, schema.SchemaColumn] = {}
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
from .remote import Remote
|