pixeltable 0.4.1__py3-none-any.whl → 0.4.3__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of pixeltable might be problematic. Click here for more details.
- pixeltable/__init__.py +1 -0
- pixeltable/__version__.py +2 -2
- pixeltable/catalog/__init__.py +3 -10
- pixeltable/catalog/catalog.py +139 -59
- pixeltable/catalog/column.py +32 -23
- pixeltable/catalog/globals.py +2 -45
- pixeltable/catalog/insertable_table.py +5 -2
- pixeltable/catalog/path.py +6 -0
- pixeltable/catalog/table.py +173 -23
- pixeltable/catalog/table_version.py +156 -92
- pixeltable/catalog/table_version_handle.py +26 -1
- pixeltable/catalog/update_status.py +179 -0
- pixeltable/catalog/view.py +12 -3
- pixeltable/config.py +76 -12
- pixeltable/dataframe.py +1 -1
- pixeltable/env.py +29 -0
- pixeltable/exec/exec_node.py +7 -24
- pixeltable/exec/expr_eval/schedulers.py +134 -7
- pixeltable/exprs/column_property_ref.py +23 -20
- pixeltable/exprs/column_ref.py +24 -18
- pixeltable/exprs/data_row.py +9 -0
- pixeltable/exprs/function_call.py +2 -2
- pixeltable/exprs/row_builder.py +46 -14
- pixeltable/exprs/rowid_ref.py +0 -4
- pixeltable/func/function.py +3 -3
- pixeltable/functions/audio.py +36 -9
- pixeltable/functions/video.py +57 -10
- pixeltable/globals.py +61 -1
- pixeltable/io/__init__.py +1 -1
- pixeltable/io/external_store.py +39 -64
- pixeltable/io/globals.py +4 -4
- pixeltable/io/hf_datasets.py +10 -2
- pixeltable/io/label_studio.py +52 -48
- pixeltable/metadata/__init__.py +1 -1
- pixeltable/metadata/converters/convert_38.py +39 -0
- pixeltable/metadata/converters/convert_39.py +125 -0
- pixeltable/metadata/converters/util.py +3 -0
- pixeltable/metadata/notes.py +2 -0
- pixeltable/metadata/schema.py +14 -2
- pixeltable/metadata/utils.py +78 -0
- pixeltable/plan.py +26 -18
- pixeltable/share/packager.py +20 -38
- pixeltable/store.py +121 -142
- pixeltable/type_system.py +2 -2
- pixeltable/utils/coroutine.py +6 -23
- pixeltable/utils/media_store.py +39 -0
- {pixeltable-0.4.1.dist-info → pixeltable-0.4.3.dist-info}/METADATA +1 -1
- {pixeltable-0.4.1.dist-info → pixeltable-0.4.3.dist-info}/RECORD +51 -47
- {pixeltable-0.4.1.dist-info → pixeltable-0.4.3.dist-info}/LICENSE +0 -0
- {pixeltable-0.4.1.dist-info → pixeltable-0.4.3.dist-info}/WHEEL +0 -0
- {pixeltable-0.4.1.dist-info → pixeltable-0.4.3.dist-info}/entry_points.txt +0 -0
|
@@ -29,7 +29,8 @@ if TYPE_CHECKING:
|
|
|
29
29
|
|
|
30
30
|
from ..func.globals import resolve_symbol
|
|
31
31
|
from .column import Column
|
|
32
|
-
from .globals import _POS_COLUMN_NAME, _ROWID_COLUMN_NAME, MediaValidation,
|
|
32
|
+
from .globals import _POS_COLUMN_NAME, _ROWID_COLUMN_NAME, MediaValidation, is_valid_identifier
|
|
33
|
+
from .update_status import RowCountStats, UpdateStatus
|
|
33
34
|
|
|
34
35
|
if TYPE_CHECKING:
|
|
35
36
|
from pixeltable import exec, store
|
|
@@ -183,6 +184,12 @@ class TableVersion:
|
|
|
183
184
|
else:
|
|
184
185
|
return f'{self.name}:{self.effective_version}'
|
|
185
186
|
|
|
187
|
+
@property
|
|
188
|
+
def handle(self) -> 'TableVersionHandle':
|
|
189
|
+
from .table_version_handle import TableVersionHandle
|
|
190
|
+
|
|
191
|
+
return TableVersionHandle(self.id, self.effective_version, self)
|
|
192
|
+
|
|
186
193
|
@classmethod
|
|
187
194
|
def create(
|
|
188
195
|
cls,
|
|
@@ -195,7 +202,6 @@ class TableVersion:
|
|
|
195
202
|
# base_path: Optional[pxt.catalog.TableVersionPath] = None,
|
|
196
203
|
view_md: Optional[schema.ViewMd] = None,
|
|
197
204
|
) -> tuple[UUID, Optional[TableVersion]]:
|
|
198
|
-
session = Env.get().session
|
|
199
205
|
user = Env.get().user
|
|
200
206
|
|
|
201
207
|
# assign ids
|
|
@@ -212,8 +218,9 @@ class TableVersion:
|
|
|
212
218
|
# Column.dependent_cols for existing cols is wrong at this point, but init() will set it correctly
|
|
213
219
|
column_md = cls._create_column_md(cols)
|
|
214
220
|
tbl_id = uuid.uuid4()
|
|
221
|
+
tbl_id_str = str(tbl_id)
|
|
215
222
|
table_md = schema.TableMd(
|
|
216
|
-
tbl_id=
|
|
223
|
+
tbl_id=tbl_id_str,
|
|
217
224
|
name=name,
|
|
218
225
|
user=user,
|
|
219
226
|
is_replica=False,
|
|
@@ -229,16 +236,16 @@ class TableVersion:
|
|
|
229
236
|
view_md=view_md,
|
|
230
237
|
additional_md={},
|
|
231
238
|
)
|
|
232
|
-
# create a schema.Table here, we need it to call our c'tor;
|
|
233
|
-
# don't add it to the session yet, we might add index metadata
|
|
234
|
-
tbl_record = schema.Table(id=tbl_id, dir_id=dir_id, md=dataclasses.asdict(table_md))
|
|
235
239
|
|
|
236
|
-
# create schema.TableVersion
|
|
240
|
+
# create schema.TableVersion of the initial version
|
|
237
241
|
table_version_md = schema.TableVersionMd(
|
|
238
|
-
tbl_id=
|
|
239
|
-
|
|
240
|
-
|
|
241
|
-
|
|
242
|
+
tbl_id=tbl_id_str,
|
|
243
|
+
created_at=timestamp,
|
|
244
|
+
version=0,
|
|
245
|
+
schema_version=0,
|
|
246
|
+
user=user,
|
|
247
|
+
update_status=None,
|
|
248
|
+
additional_md={},
|
|
242
249
|
)
|
|
243
250
|
|
|
244
251
|
# create schema.TableSchemaVersion
|
|
@@ -252,7 +259,7 @@ class TableVersion:
|
|
|
252
259
|
schema_col_md[col.id] = md
|
|
253
260
|
|
|
254
261
|
schema_version_md = schema.TableSchemaVersionMd(
|
|
255
|
-
tbl_id=
|
|
262
|
+
tbl_id=tbl_id_str,
|
|
256
263
|
schema_version=0,
|
|
257
264
|
preceding_schema_version=None,
|
|
258
265
|
columns=schema_col_md,
|
|
@@ -261,9 +268,8 @@ class TableVersion:
|
|
|
261
268
|
media_validation=media_validation.name.lower(),
|
|
262
269
|
additional_md={},
|
|
263
270
|
)
|
|
264
|
-
|
|
265
|
-
|
|
266
|
-
)
|
|
271
|
+
|
|
272
|
+
cat = pxt.catalog.Catalog.get()
|
|
267
273
|
|
|
268
274
|
# if this is purely a snapshot (it doesn't require any additional storage for columns and it doesn't have a
|
|
269
275
|
# predicate to apply at runtime), we don't create a physical table and simply use the base's table version path
|
|
@@ -274,22 +280,23 @@ class TableVersion:
|
|
|
274
280
|
and view_md.sample_clause is None
|
|
275
281
|
and len(cols) == 0
|
|
276
282
|
):
|
|
277
|
-
|
|
278
|
-
|
|
279
|
-
|
|
280
|
-
|
|
283
|
+
cat.store_tbl_md(
|
|
284
|
+
tbl_id=tbl_id,
|
|
285
|
+
dir_id=dir_id,
|
|
286
|
+
tbl_md=table_md,
|
|
287
|
+
version_md=table_version_md,
|
|
288
|
+
schema_version_md=schema_version_md,
|
|
289
|
+
)
|
|
290
|
+
return tbl_id, None
|
|
281
291
|
|
|
282
292
|
# assert (base_path is not None) == (view_md is not None)
|
|
283
293
|
is_snapshot = view_md is not None and view_md.is_snapshot
|
|
284
294
|
effective_version = 0 if is_snapshot else None
|
|
285
295
|
base_path = pxt.catalog.TableVersionPath.from_md(view_md.base_versions) if view_md is not None else None
|
|
286
296
|
base = base_path.tbl_version if base_path is not None else None
|
|
287
|
-
tbl_version = cls(
|
|
288
|
-
tbl_record.id, table_md, effective_version, schema_version_md, [], base_path=base_path, base=base
|
|
289
|
-
)
|
|
297
|
+
tbl_version = cls(tbl_id, table_md, effective_version, schema_version_md, [], base_path=base_path, base=base)
|
|
290
298
|
# TODO: break this up, so that Catalog.create_table() registers tbl_version
|
|
291
|
-
cat =
|
|
292
|
-
cat._tbl_versions[tbl_record.id, effective_version] = tbl_version
|
|
299
|
+
cat._tbl_versions[tbl_id, effective_version] = tbl_version
|
|
293
300
|
tbl_version.init()
|
|
294
301
|
tbl_version.store_tbl.create()
|
|
295
302
|
is_mutable = not is_snapshot and not table_md.is_replica
|
|
@@ -306,15 +313,18 @@ class TableVersion:
|
|
|
306
313
|
status = tbl_version._add_default_index(col)
|
|
307
314
|
assert status is None or status.num_excs == 0
|
|
308
315
|
|
|
309
|
-
|
|
310
|
-
|
|
311
|
-
|
|
312
|
-
|
|
313
|
-
|
|
314
|
-
|
|
316
|
+
cat.store_tbl_md(
|
|
317
|
+
tbl_id=tbl_id,
|
|
318
|
+
dir_id=dir_id,
|
|
319
|
+
tbl_md=tbl_version.tbl_md,
|
|
320
|
+
version_md=table_version_md,
|
|
321
|
+
schema_version_md=schema_version_md,
|
|
322
|
+
)
|
|
323
|
+
return tbl_id, tbl_version
|
|
315
324
|
|
|
316
325
|
@classmethod
|
|
317
326
|
def create_replica(cls, md: schema.FullTableMd) -> TableVersion:
|
|
327
|
+
assert Env.get().in_xact
|
|
318
328
|
tbl_id = UUID(md.tbl_md.tbl_id)
|
|
319
329
|
_logger.info(f'Creating replica table version {tbl_id}:{md.version_md.version}.')
|
|
320
330
|
view_md = md.tbl_md.view_md
|
|
@@ -324,6 +334,10 @@ class TableVersion:
|
|
|
324
334
|
tbl_id, md.tbl_md, md.version_md.version, md.schema_version_md, [], base_path=base_path, base=base
|
|
325
335
|
)
|
|
326
336
|
cat = pxt.catalog.Catalog.get()
|
|
337
|
+
# We're creating a new TableVersion replica, so we should never have seen this particular
|
|
338
|
+
# TableVersion instance before.
|
|
339
|
+
assert tbl_version.effective_version is not None
|
|
340
|
+
assert (tbl_version.id, tbl_version.effective_version) not in cat._tbl_versions
|
|
327
341
|
cat._tbl_versions[tbl_version.id, tbl_version.effective_version] = tbl_version
|
|
328
342
|
tbl_version.init()
|
|
329
343
|
tbl_version.store_tbl.create()
|
|
@@ -445,10 +459,10 @@ class TableVersion:
|
|
|
445
459
|
# fix up the sa column type of the index value and undo columns
|
|
446
460
|
val_col = self.cols_by_id[md.index_val_col_id]
|
|
447
461
|
val_col.sa_col_type = idx.index_sa_type()
|
|
448
|
-
val_col.
|
|
462
|
+
val_col._stores_cellmd = False
|
|
449
463
|
undo_col = self.cols_by_id[md.index_val_undo_col_id]
|
|
450
464
|
undo_col.sa_col_type = idx.index_sa_type()
|
|
451
|
-
undo_col.
|
|
465
|
+
undo_col._stores_cellmd = False
|
|
452
466
|
idx_info = self.IndexInfo(id=md.id, name=md.name, idx=idx, col=idx_col, val_col=val_col, undo_col=undo_col)
|
|
453
467
|
self.idxs_by_name[md.name] = idx_info
|
|
454
468
|
|
|
@@ -464,7 +478,13 @@ class TableVersion:
|
|
|
464
478
|
else:
|
|
465
479
|
self.store_tbl = StoreTable(self)
|
|
466
480
|
|
|
467
|
-
def _write_md(
|
|
481
|
+
def _write_md(
|
|
482
|
+
self,
|
|
483
|
+
new_version: bool,
|
|
484
|
+
new_version_ts: float,
|
|
485
|
+
new_schema_version: bool,
|
|
486
|
+
update_status: Optional[UpdateStatus] = None,
|
|
487
|
+
) -> None:
|
|
468
488
|
"""Writes table metadata to the database.
|
|
469
489
|
|
|
470
490
|
Args:
|
|
@@ -475,22 +495,23 @@ class TableVersion:
|
|
|
475
495
|
"""
|
|
476
496
|
from pixeltable.catalog import Catalog
|
|
477
497
|
|
|
478
|
-
version_md
|
|
479
|
-
schema.TableVersionMd(
|
|
480
|
-
tbl_id=str(self.id),
|
|
481
|
-
created_at=new_version_ts,
|
|
482
|
-
version=self.version,
|
|
483
|
-
schema_version=self.schema_version,
|
|
484
|
-
additional_md={},
|
|
485
|
-
)
|
|
486
|
-
if new_version
|
|
487
|
-
else None
|
|
488
|
-
)
|
|
498
|
+
version_md = self._create_version_md(new_version_ts, update_status=update_status) if new_version else None
|
|
489
499
|
|
|
490
500
|
Catalog.get().store_tbl_md(
|
|
491
|
-
self.id, self._tbl_md, version_md, self._schema_version_md if new_schema_version else None
|
|
501
|
+
self.id, None, self._tbl_md, version_md, self._schema_version_md if new_schema_version else None
|
|
492
502
|
)
|
|
493
503
|
|
|
504
|
+
def _write_md_update_status(self, new_version_ts: float, update_status: UpdateStatus) -> None:
|
|
505
|
+
"""Writes a new update_status in the table version metadata in the database.
|
|
506
|
+
|
|
507
|
+
Args:
|
|
508
|
+
timestamp: timestamp of the change
|
|
509
|
+
update_status: UpdateStatus to be updated in the database
|
|
510
|
+
"""
|
|
511
|
+
from pixeltable.catalog import Catalog
|
|
512
|
+
|
|
513
|
+
Catalog.get().update_tbl_version_md(self._create_version_md(new_version_ts, update_status))
|
|
514
|
+
|
|
494
515
|
def _store_idx_name(self, idx_id: int) -> str:
|
|
495
516
|
"""Return name of index in the store, which needs to be globally unique"""
|
|
496
517
|
return f'idx_{self.id.hex}_{idx_id}'
|
|
@@ -544,7 +565,7 @@ class TableVersion:
|
|
|
544
565
|
stored=True,
|
|
545
566
|
schema_version_add=self.schema_version,
|
|
546
567
|
schema_version_drop=None,
|
|
547
|
-
|
|
568
|
+
stores_cellmd=idx.records_value_errors(),
|
|
548
569
|
)
|
|
549
570
|
val_col.tbl = self
|
|
550
571
|
val_col.col_type = val_col.col_type.copy(nullable=True)
|
|
@@ -558,7 +579,7 @@ class TableVersion:
|
|
|
558
579
|
stored=True,
|
|
559
580
|
schema_version_add=self.schema_version,
|
|
560
581
|
schema_version_drop=None,
|
|
561
|
-
|
|
582
|
+
stores_cellmd=False,
|
|
562
583
|
)
|
|
563
584
|
undo_col.tbl = self
|
|
564
585
|
undo_col.col_type = undo_col.col_type.copy(nullable=True)
|
|
@@ -670,7 +691,7 @@ class TableVersion:
|
|
|
670
691
|
# Create indices and their md records
|
|
671
692
|
for col, (idx, val_col, undo_col) in index_cols.items():
|
|
672
693
|
self._create_index(col, val_col, undo_col, idx_name=None, idx=idx)
|
|
673
|
-
self._write_md(new_version=True, new_version_ts=time.time(), new_schema_version=True)
|
|
694
|
+
self._write_md(new_version=True, new_version_ts=time.time(), new_schema_version=True, update_status=status)
|
|
674
695
|
_logger.info(f'Added columns {[col.name for col in cols]} to table {self.name}, new version: {self.version}')
|
|
675
696
|
|
|
676
697
|
msg = (
|
|
@@ -693,6 +714,7 @@ class TableVersion:
|
|
|
693
714
|
f'Cannot add non-nullable column {col.name!r} to table {self.name!r} with existing rows'
|
|
694
715
|
)
|
|
695
716
|
|
|
717
|
+
computed_values = 0
|
|
696
718
|
num_excs = 0
|
|
697
719
|
cols_with_excs: list[Column] = []
|
|
698
720
|
for col in cols_to_add:
|
|
@@ -731,18 +753,19 @@ class TableVersion:
|
|
|
731
753
|
# populate the column
|
|
732
754
|
from pixeltable.plan import Planner
|
|
733
755
|
|
|
734
|
-
plan
|
|
756
|
+
plan = Planner.create_add_column_plan(self.path, col)
|
|
735
757
|
plan.ctx.num_rows = row_count
|
|
736
758
|
try:
|
|
737
759
|
plan.open()
|
|
738
760
|
try:
|
|
739
|
-
excs_per_col = self.store_tbl.load_column(col, plan,
|
|
761
|
+
excs_per_col = self.store_tbl.load_column(col, plan, on_error == 'abort')
|
|
740
762
|
except sql.exc.DBAPIError as exc:
|
|
741
763
|
# Wrap the DBAPIError in an excs.Error to unify processing in the subsequent except block
|
|
742
764
|
raise excs.Error(f'SQL error during execution of computed column `{col.name}`:\n{exc}') from exc
|
|
743
765
|
if excs_per_col > 0:
|
|
744
766
|
cols_with_excs.append(col)
|
|
745
767
|
num_excs += excs_per_col
|
|
768
|
+
computed_values += plan.ctx.num_computed_exprs * row_count
|
|
746
769
|
finally:
|
|
747
770
|
# Ensure cleanup occurs if an exception or keyboard interruption happens during `load_column()`.
|
|
748
771
|
def cleanup_on_error() -> None:
|
|
@@ -765,12 +788,14 @@ class TableVersion:
|
|
|
765
788
|
|
|
766
789
|
if print_stats:
|
|
767
790
|
plan.ctx.profile.print(num_rows=row_count)
|
|
791
|
+
|
|
768
792
|
# TODO: what to do about system columns with exceptions?
|
|
793
|
+
row_counts = RowCountStats(
|
|
794
|
+
upd_rows=row_count, num_excs=num_excs, computed_values=computed_values
|
|
795
|
+
) # add_columns
|
|
769
796
|
return UpdateStatus(
|
|
770
|
-
num_rows=row_count,
|
|
771
|
-
num_computed_values=row_count,
|
|
772
|
-
num_excs=num_excs,
|
|
773
797
|
cols_with_excs=[f'{col.tbl.name}.{col.name}' for col in cols_with_excs if col.name is not None],
|
|
798
|
+
row_count_stats=row_counts,
|
|
774
799
|
)
|
|
775
800
|
|
|
776
801
|
def drop_column(self, col: Column) -> None:
|
|
@@ -886,6 +911,7 @@ class TableVersion:
|
|
|
886
911
|
assert (rows is None) != (df is None) # Exactly one must be specified
|
|
887
912
|
if rows is not None:
|
|
888
913
|
plan = Planner.create_insert_plan(self, rows, ignore_errors=not fail_on_exception)
|
|
914
|
+
|
|
889
915
|
else:
|
|
890
916
|
plan = Planner.create_df_insert_plan(self, df, ignore_errors=not fail_on_exception)
|
|
891
917
|
|
|
@@ -896,7 +922,10 @@ class TableVersion:
|
|
|
896
922
|
self.next_row_id += 1
|
|
897
923
|
yield rowid
|
|
898
924
|
|
|
899
|
-
|
|
925
|
+
result = self._insert(
|
|
926
|
+
plan, time.time(), print_stats=print_stats, rowids=rowids(), abort_on_exc=fail_on_exception
|
|
927
|
+
)
|
|
928
|
+
return result
|
|
900
929
|
|
|
901
930
|
def _insert(
|
|
902
931
|
self,
|
|
@@ -910,30 +939,26 @@ class TableVersion:
|
|
|
910
939
|
"""Insert rows produced by exec_plan and propagate to views"""
|
|
911
940
|
# we're creating a new version
|
|
912
941
|
self.version += 1
|
|
913
|
-
|
|
914
|
-
num_rows, num_excs, cols_with_excs = self.store_tbl.insert_rows(
|
|
942
|
+
cols_with_excs, row_counts = self.store_tbl.insert_rows(
|
|
915
943
|
exec_plan, v_min=self.version, rowids=rowids, abort_on_exc=abort_on_exc
|
|
916
944
|
)
|
|
917
|
-
result
|
|
918
|
-
|
|
919
|
-
|
|
920
|
-
|
|
921
|
-
self._write_md(new_version=True, new_version_ts=timestamp, new_schema_version=False)
|
|
945
|
+
result = UpdateStatus(
|
|
946
|
+
cols_with_excs=[f'{self.name}.{self.cols_by_id[cid].name}' for cid in cols_with_excs],
|
|
947
|
+
row_count_stats=row_counts,
|
|
948
|
+
)
|
|
922
949
|
|
|
923
950
|
# update views
|
|
924
951
|
for view in self.mutable_views:
|
|
925
952
|
from pixeltable.plan import Planner
|
|
926
953
|
|
|
927
|
-
|
|
928
|
-
status = view.get()._insert(
|
|
929
|
-
result
|
|
930
|
-
result.num_excs += status.num_excs
|
|
931
|
-
result.num_computed_values += status.num_computed_values
|
|
932
|
-
result.cols_with_excs += status.cols_with_excs
|
|
954
|
+
plan2, _ = Planner.create_view_load_plan(view.get().path, propagates_insert=True)
|
|
955
|
+
status = view.get()._insert(plan2, timestamp, print_stats=print_stats)
|
|
956
|
+
result += status.to_cascade()
|
|
933
957
|
|
|
934
|
-
|
|
958
|
+
# Use the net status after all propagations
|
|
959
|
+
self._write_md(new_version=True, new_version_ts=timestamp, new_schema_version=False, update_status=result)
|
|
935
960
|
if print_stats:
|
|
936
|
-
|
|
961
|
+
exec_plan.ctx.profile.print(num_rows=result.num_rows)
|
|
937
962
|
_logger.info(f'TableVersion {self.name}: new version {self.version}')
|
|
938
963
|
return result
|
|
939
964
|
|
|
@@ -973,7 +998,7 @@ class TableVersion:
|
|
|
973
998
|
cascade=cascade,
|
|
974
999
|
show_progress=True,
|
|
975
1000
|
)
|
|
976
|
-
result
|
|
1001
|
+
result += UpdateStatus(updated_cols=updated_cols)
|
|
977
1002
|
return result
|
|
978
1003
|
|
|
979
1004
|
def batch_update(
|
|
@@ -1000,7 +1025,7 @@ class TableVersion:
|
|
|
1000
1025
|
result = self.propagate_update(
|
|
1001
1026
|
plan, delete_where_clause, recomputed_cols, base_versions=[], timestamp=time.time(), cascade=cascade
|
|
1002
1027
|
)
|
|
1003
|
-
result
|
|
1028
|
+
result += UpdateStatus(updated_cols=[c.qualified_name for c in updated_cols])
|
|
1004
1029
|
|
|
1005
1030
|
unmatched_rows = row_update_node.unmatched_rows()
|
|
1006
1031
|
if len(unmatched_rows) > 0:
|
|
@@ -1008,7 +1033,7 @@ class TableVersion:
|
|
|
1008
1033
|
raise excs.Error(f'batch_update(): {len(unmatched_rows)} row(s) not found')
|
|
1009
1034
|
if insert_if_not_exists:
|
|
1010
1035
|
insert_status = self.insert(unmatched_rows, None, print_stats=False, fail_on_exception=False)
|
|
1011
|
-
result += insert_status
|
|
1036
|
+
result += insert_status.to_cascade()
|
|
1012
1037
|
return result
|
|
1013
1038
|
|
|
1014
1039
|
def _validate_update_spec(
|
|
@@ -1061,6 +1086,38 @@ class TableVersion:
|
|
|
1061
1086
|
|
|
1062
1087
|
return update_targets
|
|
1063
1088
|
|
|
1089
|
+
def recompute_columns(self, col_names: list[str], errors_only: bool = False, cascade: bool = True) -> UpdateStatus:
|
|
1090
|
+
assert not self.is_snapshot
|
|
1091
|
+
assert all(name in self.cols_by_name for name in col_names)
|
|
1092
|
+
assert len(col_names) > 0
|
|
1093
|
+
assert len(col_names) == 1 or not errors_only
|
|
1094
|
+
|
|
1095
|
+
from pixeltable.plan import Planner
|
|
1096
|
+
|
|
1097
|
+
target_columns = [self.cols_by_name[name] for name in col_names]
|
|
1098
|
+
where_clause: Optional[exprs.Expr] = None
|
|
1099
|
+
if errors_only:
|
|
1100
|
+
where_clause = (
|
|
1101
|
+
exprs.ColumnPropertyRef(exprs.ColumnRef(target_columns[0]), exprs.ColumnPropertyRef.Property.ERRORTYPE)
|
|
1102
|
+
!= None
|
|
1103
|
+
)
|
|
1104
|
+
plan, updated_cols, recomputed_cols = Planner.create_update_plan(
|
|
1105
|
+
self.path, update_targets={}, recompute_targets=target_columns, where_clause=where_clause, cascade=cascade
|
|
1106
|
+
)
|
|
1107
|
+
from pixeltable.exprs import SqlElementCache
|
|
1108
|
+
|
|
1109
|
+
result = self.propagate_update(
|
|
1110
|
+
plan,
|
|
1111
|
+
where_clause.sql_expr(SqlElementCache()) if where_clause is not None else None,
|
|
1112
|
+
recomputed_cols,
|
|
1113
|
+
base_versions=[],
|
|
1114
|
+
timestamp=time.time(),
|
|
1115
|
+
cascade=cascade,
|
|
1116
|
+
show_progress=True,
|
|
1117
|
+
)
|
|
1118
|
+
result += UpdateStatus(updated_cols=updated_cols)
|
|
1119
|
+
return result
|
|
1120
|
+
|
|
1064
1121
|
def propagate_update(
|
|
1065
1122
|
self,
|
|
1066
1123
|
plan: Optional[exec.ExecNode],
|
|
@@ -1072,17 +1129,19 @@ class TableVersion:
|
|
|
1072
1129
|
show_progress: bool = True,
|
|
1073
1130
|
) -> UpdateStatus:
|
|
1074
1131
|
result = UpdateStatus()
|
|
1075
|
-
|
|
1076
|
-
|
|
1132
|
+
create_new_table_version = plan is not None
|
|
1133
|
+
if create_new_table_version:
|
|
1077
1134
|
self.version += 1
|
|
1078
|
-
|
|
1135
|
+
cols_with_excs, row_counts = self.store_tbl.insert_rows(
|
|
1079
1136
|
plan, v_min=self.version, show_progress=show_progress
|
|
1080
1137
|
)
|
|
1081
|
-
result
|
|
1138
|
+
result += UpdateStatus(
|
|
1139
|
+
row_count_stats=row_counts.insert_to_update(),
|
|
1140
|
+
cols_with_excs=[f'{self.name}.{self.cols_by_id[cid].name}' for cid in cols_with_excs],
|
|
1141
|
+
)
|
|
1082
1142
|
self.store_tbl.delete_rows(
|
|
1083
1143
|
self.version, base_versions=base_versions, match_on_vmin=True, where_clause=where_clause
|
|
1084
1144
|
)
|
|
1085
|
-
self._write_md(new_version=True, new_version_ts=timestamp, new_schema_version=False)
|
|
1086
1145
|
|
|
1087
1146
|
if cascade:
|
|
1088
1147
|
base_versions = [None if plan is None else self.version, *base_versions] # don't update in place
|
|
@@ -1097,17 +1156,18 @@ class TableVersion:
|
|
|
1097
1156
|
status = view.get().propagate_update(
|
|
1098
1157
|
plan, None, recomputed_view_cols, base_versions=base_versions, timestamp=timestamp, cascade=True
|
|
1099
1158
|
)
|
|
1100
|
-
result
|
|
1101
|
-
|
|
1102
|
-
|
|
1103
|
-
|
|
1104
|
-
result.cols_with_excs = list(dict.fromkeys(result.cols_with_excs).keys()) # remove duplicates
|
|
1159
|
+
result += status.to_cascade()
|
|
1160
|
+
if create_new_table_version:
|
|
1161
|
+
self._write_md(new_version=True, new_version_ts=timestamp, new_schema_version=False, update_status=result)
|
|
1105
1162
|
return result
|
|
1106
1163
|
|
|
1107
1164
|
def delete(self, where: Optional[exprs.Expr] = None) -> UpdateStatus:
|
|
1108
1165
|
"""Delete rows in this table.
|
|
1109
1166
|
Args:
|
|
1110
1167
|
where: a predicate to filter rows to delete.
|
|
1168
|
+
|
|
1169
|
+
Returns:
|
|
1170
|
+
UpdateStatus: an object containing the number of deleted rows and other statistics.
|
|
1111
1171
|
"""
|
|
1112
1172
|
assert self.is_insertable
|
|
1113
1173
|
from pixeltable.exprs import Expr
|
|
@@ -1123,14 +1183,12 @@ class TableVersion:
|
|
|
1123
1183
|
raise excs.Error(f'Filter {analysis_info.filter} not expressible in SQL')
|
|
1124
1184
|
sql_where_clause = analysis_info.sql_where_clause
|
|
1125
1185
|
|
|
1126
|
-
|
|
1127
|
-
|
|
1128
|
-
status = UpdateStatus(num_rows=num_rows)
|
|
1186
|
+
status = self.propagate_delete(sql_where_clause, base_versions=[], timestamp=time.time())
|
|
1129
1187
|
return status
|
|
1130
1188
|
|
|
1131
1189
|
def propagate_delete(
|
|
1132
1190
|
self, where: Optional[exprs.Expr], base_versions: list[Optional[int]], timestamp: float
|
|
1133
|
-
) ->
|
|
1191
|
+
) -> UpdateStatus:
|
|
1134
1192
|
"""Delete rows in this table and propagate to views.
|
|
1135
1193
|
Args:
|
|
1136
1194
|
where: a predicate to filter rows to delete.
|
|
@@ -1146,18 +1204,22 @@ class TableVersion:
|
|
|
1146
1204
|
# sql.sql.visitors.traverse(sql_where_clause, {}, {'column': collect_cols})
|
|
1147
1205
|
# x = [f'{str(c)}:{hash(c)}:{id(c.table)}' for c in sql_cols]
|
|
1148
1206
|
# print(f'where_clause cols: {x}')
|
|
1149
|
-
|
|
1207
|
+
del_rows = self.store_tbl.delete_rows(
|
|
1150
1208
|
self.version + 1, base_versions=base_versions, match_on_vmin=False, where_clause=sql_where_clause
|
|
1151
1209
|
)
|
|
1152
|
-
|
|
1210
|
+
row_counts = RowCountStats(del_rows=del_rows) # delete
|
|
1211
|
+
result = UpdateStatus(row_count_stats=row_counts)
|
|
1212
|
+
if del_rows > 0:
|
|
1153
1213
|
# we're creating a new version
|
|
1154
1214
|
self.version += 1
|
|
1155
|
-
self._write_md(new_version=True, new_version_ts=timestamp, new_schema_version=False)
|
|
1156
1215
|
for view in self.mutable_views:
|
|
1157
|
-
|
|
1216
|
+
status = view.get().propagate_delete(
|
|
1158
1217
|
where=None, base_versions=[self.version, *base_versions], timestamp=timestamp
|
|
1159
1218
|
)
|
|
1160
|
-
|
|
1219
|
+
result += status.to_cascade()
|
|
1220
|
+
if del_rows > 0:
|
|
1221
|
+
self._write_md(new_version=True, new_version_ts=timestamp, new_schema_version=False, update_status=result)
|
|
1222
|
+
return result
|
|
1161
1223
|
|
|
1162
1224
|
def revert(self) -> None:
|
|
1163
1225
|
"""Reverts the table to the previous version."""
|
|
@@ -1498,12 +1560,14 @@ class TableVersion:
|
|
|
1498
1560
|
{'class': f'{type(store).__module__}.{type(store).__qualname__}', 'md': store.as_dict()} for store in stores
|
|
1499
1561
|
]
|
|
1500
1562
|
|
|
1501
|
-
def _create_version_md(self, timestamp: float) -> schema.TableVersionMd:
|
|
1563
|
+
def _create_version_md(self, timestamp: float, update_status: Optional[UpdateStatus]) -> schema.TableVersionMd:
|
|
1502
1564
|
return schema.TableVersionMd(
|
|
1503
1565
|
tbl_id=str(self.id),
|
|
1504
1566
|
created_at=timestamp,
|
|
1505
1567
|
version=self.version,
|
|
1506
1568
|
schema_version=self.schema_version,
|
|
1569
|
+
user=Env.get().user,
|
|
1570
|
+
update_status=update_status,
|
|
1507
1571
|
additional_md={},
|
|
1508
1572
|
)
|
|
1509
1573
|
|
|
@@ -1,13 +1,16 @@
|
|
|
1
1
|
from __future__ import annotations
|
|
2
2
|
|
|
3
3
|
import logging
|
|
4
|
+
from dataclasses import dataclass
|
|
4
5
|
from typing import TYPE_CHECKING, Optional
|
|
5
6
|
from uuid import UUID
|
|
6
7
|
|
|
8
|
+
from pixeltable import exceptions as excs
|
|
9
|
+
|
|
7
10
|
from .table_version import TableVersion
|
|
8
11
|
|
|
9
12
|
if TYPE_CHECKING:
|
|
10
|
-
|
|
13
|
+
from pixeltable.catalog import Column
|
|
11
14
|
|
|
12
15
|
_logger = logging.getLogger('pixeltable')
|
|
13
16
|
|
|
@@ -67,3 +70,25 @@ class TableVersionHandle:
|
|
|
67
70
|
@classmethod
|
|
68
71
|
def from_dict(cls, d: dict) -> TableVersionHandle:
|
|
69
72
|
return cls(UUID(d['id']), d['effective_version'])
|
|
73
|
+
|
|
74
|
+
|
|
75
|
+
@dataclass(frozen=True)
|
|
76
|
+
class ColumnHandle:
|
|
77
|
+
tbl_version: TableVersionHandle
|
|
78
|
+
col_id: int
|
|
79
|
+
|
|
80
|
+
def get(self) -> 'Column':
|
|
81
|
+
if self.col_id not in self.tbl_version.get().cols_by_id:
|
|
82
|
+
schema_version_drop = self.tbl_version.get()._tbl_md.column_md[self.col_id].schema_version_drop
|
|
83
|
+
raise excs.Error(
|
|
84
|
+
f'Column has been dropped (no record for column ID {self.col_id} in table '
|
|
85
|
+
f'{self.tbl_version.get().versioned_name!r}; it was dropped in table version {schema_version_drop})'
|
|
86
|
+
)
|
|
87
|
+
return self.tbl_version.get().cols_by_id[self.col_id]
|
|
88
|
+
|
|
89
|
+
def as_dict(self) -> dict:
|
|
90
|
+
return {'tbl_version': self.tbl_version.as_dict(), 'col_id': self.col_id}
|
|
91
|
+
|
|
92
|
+
@classmethod
|
|
93
|
+
def from_dict(cls, d: dict) -> ColumnHandle:
|
|
94
|
+
return cls(tbl_version=TableVersionHandle.from_dict(d['tbl_version']), col_id=d['col_id'])
|