pixeltable 0.4.1__py3-none-any.whl → 0.4.3__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of pixeltable might be problematic. Click here for more details.

Files changed (51) hide show
  1. pixeltable/__init__.py +1 -0
  2. pixeltable/__version__.py +2 -2
  3. pixeltable/catalog/__init__.py +3 -10
  4. pixeltable/catalog/catalog.py +139 -59
  5. pixeltable/catalog/column.py +32 -23
  6. pixeltable/catalog/globals.py +2 -45
  7. pixeltable/catalog/insertable_table.py +5 -2
  8. pixeltable/catalog/path.py +6 -0
  9. pixeltable/catalog/table.py +173 -23
  10. pixeltable/catalog/table_version.py +156 -92
  11. pixeltable/catalog/table_version_handle.py +26 -1
  12. pixeltable/catalog/update_status.py +179 -0
  13. pixeltable/catalog/view.py +12 -3
  14. pixeltable/config.py +76 -12
  15. pixeltable/dataframe.py +1 -1
  16. pixeltable/env.py +29 -0
  17. pixeltable/exec/exec_node.py +7 -24
  18. pixeltable/exec/expr_eval/schedulers.py +134 -7
  19. pixeltable/exprs/column_property_ref.py +23 -20
  20. pixeltable/exprs/column_ref.py +24 -18
  21. pixeltable/exprs/data_row.py +9 -0
  22. pixeltable/exprs/function_call.py +2 -2
  23. pixeltable/exprs/row_builder.py +46 -14
  24. pixeltable/exprs/rowid_ref.py +0 -4
  25. pixeltable/func/function.py +3 -3
  26. pixeltable/functions/audio.py +36 -9
  27. pixeltable/functions/video.py +57 -10
  28. pixeltable/globals.py +61 -1
  29. pixeltable/io/__init__.py +1 -1
  30. pixeltable/io/external_store.py +39 -64
  31. pixeltable/io/globals.py +4 -4
  32. pixeltable/io/hf_datasets.py +10 -2
  33. pixeltable/io/label_studio.py +52 -48
  34. pixeltable/metadata/__init__.py +1 -1
  35. pixeltable/metadata/converters/convert_38.py +39 -0
  36. pixeltable/metadata/converters/convert_39.py +125 -0
  37. pixeltable/metadata/converters/util.py +3 -0
  38. pixeltable/metadata/notes.py +2 -0
  39. pixeltable/metadata/schema.py +14 -2
  40. pixeltable/metadata/utils.py +78 -0
  41. pixeltable/plan.py +26 -18
  42. pixeltable/share/packager.py +20 -38
  43. pixeltable/store.py +121 -142
  44. pixeltable/type_system.py +2 -2
  45. pixeltable/utils/coroutine.py +6 -23
  46. pixeltable/utils/media_store.py +39 -0
  47. {pixeltable-0.4.1.dist-info → pixeltable-0.4.3.dist-info}/METADATA +1 -1
  48. {pixeltable-0.4.1.dist-info → pixeltable-0.4.3.dist-info}/RECORD +51 -47
  49. {pixeltable-0.4.1.dist-info → pixeltable-0.4.3.dist-info}/LICENSE +0 -0
  50. {pixeltable-0.4.1.dist-info → pixeltable-0.4.3.dist-info}/WHEEL +0 -0
  51. {pixeltable-0.4.1.dist-info → pixeltable-0.4.3.dist-info}/entry_points.txt +0 -0
@@ -29,7 +29,8 @@ if TYPE_CHECKING:
29
29
 
30
30
  from ..func.globals import resolve_symbol
31
31
  from .column import Column
32
- from .globals import _POS_COLUMN_NAME, _ROWID_COLUMN_NAME, MediaValidation, UpdateStatus, is_valid_identifier
32
+ from .globals import _POS_COLUMN_NAME, _ROWID_COLUMN_NAME, MediaValidation, is_valid_identifier
33
+ from .update_status import RowCountStats, UpdateStatus
33
34
 
34
35
  if TYPE_CHECKING:
35
36
  from pixeltable import exec, store
@@ -183,6 +184,12 @@ class TableVersion:
183
184
  else:
184
185
  return f'{self.name}:{self.effective_version}'
185
186
 
187
+ @property
188
+ def handle(self) -> 'TableVersionHandle':
189
+ from .table_version_handle import TableVersionHandle
190
+
191
+ return TableVersionHandle(self.id, self.effective_version, self)
192
+
186
193
  @classmethod
187
194
  def create(
188
195
  cls,
@@ -195,7 +202,6 @@ class TableVersion:
195
202
  # base_path: Optional[pxt.catalog.TableVersionPath] = None,
196
203
  view_md: Optional[schema.ViewMd] = None,
197
204
  ) -> tuple[UUID, Optional[TableVersion]]:
198
- session = Env.get().session
199
205
  user = Env.get().user
200
206
 
201
207
  # assign ids
@@ -212,8 +218,9 @@ class TableVersion:
212
218
  # Column.dependent_cols for existing cols is wrong at this point, but init() will set it correctly
213
219
  column_md = cls._create_column_md(cols)
214
220
  tbl_id = uuid.uuid4()
221
+ tbl_id_str = str(tbl_id)
215
222
  table_md = schema.TableMd(
216
- tbl_id=str(tbl_id),
223
+ tbl_id=tbl_id_str,
217
224
  name=name,
218
225
  user=user,
219
226
  is_replica=False,
@@ -229,16 +236,16 @@ class TableVersion:
229
236
  view_md=view_md,
230
237
  additional_md={},
231
238
  )
232
- # create a schema.Table here, we need it to call our c'tor;
233
- # don't add it to the session yet, we might add index metadata
234
- tbl_record = schema.Table(id=tbl_id, dir_id=dir_id, md=dataclasses.asdict(table_md))
235
239
 
236
- # create schema.TableVersion
240
+ # create schema.TableVersion of the initial version
237
241
  table_version_md = schema.TableVersionMd(
238
- tbl_id=str(tbl_record.id), created_at=timestamp, version=0, schema_version=0, additional_md={}
239
- )
240
- tbl_version_record = schema.TableVersion(
241
- tbl_id=tbl_record.id, version=0, md=dataclasses.asdict(table_version_md)
242
+ tbl_id=tbl_id_str,
243
+ created_at=timestamp,
244
+ version=0,
245
+ schema_version=0,
246
+ user=user,
247
+ update_status=None,
248
+ additional_md={},
242
249
  )
243
250
 
244
251
  # create schema.TableSchemaVersion
@@ -252,7 +259,7 @@ class TableVersion:
252
259
  schema_col_md[col.id] = md
253
260
 
254
261
  schema_version_md = schema.TableSchemaVersionMd(
255
- tbl_id=str(tbl_record.id),
262
+ tbl_id=tbl_id_str,
256
263
  schema_version=0,
257
264
  preceding_schema_version=None,
258
265
  columns=schema_col_md,
@@ -261,9 +268,8 @@ class TableVersion:
261
268
  media_validation=media_validation.name.lower(),
262
269
  additional_md={},
263
270
  )
264
- schema_version_record = schema.TableSchemaVersion(
265
- tbl_id=tbl_record.id, schema_version=0, md=dataclasses.asdict(schema_version_md)
266
- )
271
+
272
+ cat = pxt.catalog.Catalog.get()
267
273
 
268
274
  # if this is purely a snapshot (it doesn't require any additional storage for columns and it doesn't have a
269
275
  # predicate to apply at runtime), we don't create a physical table and simply use the base's table version path
@@ -274,22 +280,23 @@ class TableVersion:
274
280
  and view_md.sample_clause is None
275
281
  and len(cols) == 0
276
282
  ):
277
- session.add(tbl_record)
278
- session.add(tbl_version_record)
279
- session.add(schema_version_record)
280
- return tbl_record.id, None
283
+ cat.store_tbl_md(
284
+ tbl_id=tbl_id,
285
+ dir_id=dir_id,
286
+ tbl_md=table_md,
287
+ version_md=table_version_md,
288
+ schema_version_md=schema_version_md,
289
+ )
290
+ return tbl_id, None
281
291
 
282
292
  # assert (base_path is not None) == (view_md is not None)
283
293
  is_snapshot = view_md is not None and view_md.is_snapshot
284
294
  effective_version = 0 if is_snapshot else None
285
295
  base_path = pxt.catalog.TableVersionPath.from_md(view_md.base_versions) if view_md is not None else None
286
296
  base = base_path.tbl_version if base_path is not None else None
287
- tbl_version = cls(
288
- tbl_record.id, table_md, effective_version, schema_version_md, [], base_path=base_path, base=base
289
- )
297
+ tbl_version = cls(tbl_id, table_md, effective_version, schema_version_md, [], base_path=base_path, base=base)
290
298
  # TODO: break this up, so that Catalog.create_table() registers tbl_version
291
- cat = pxt.catalog.Catalog.get()
292
- cat._tbl_versions[tbl_record.id, effective_version] = tbl_version
299
+ cat._tbl_versions[tbl_id, effective_version] = tbl_version
293
300
  tbl_version.init()
294
301
  tbl_version.store_tbl.create()
295
302
  is_mutable = not is_snapshot and not table_md.is_replica
@@ -306,15 +313,18 @@ class TableVersion:
306
313
  status = tbl_version._add_default_index(col)
307
314
  assert status is None or status.num_excs == 0
308
315
 
309
- # we re-create the tbl_record here, now that we have new index metadata
310
- tbl_record = schema.Table(id=tbl_id, dir_id=dir_id, md=dataclasses.asdict(tbl_version.tbl_md))
311
- session.add(tbl_record)
312
- session.add(tbl_version_record)
313
- session.add(schema_version_record)
314
- return tbl_record.id, tbl_version
316
+ cat.store_tbl_md(
317
+ tbl_id=tbl_id,
318
+ dir_id=dir_id,
319
+ tbl_md=tbl_version.tbl_md,
320
+ version_md=table_version_md,
321
+ schema_version_md=schema_version_md,
322
+ )
323
+ return tbl_id, tbl_version
315
324
 
316
325
  @classmethod
317
326
  def create_replica(cls, md: schema.FullTableMd) -> TableVersion:
327
+ assert Env.get().in_xact
318
328
  tbl_id = UUID(md.tbl_md.tbl_id)
319
329
  _logger.info(f'Creating replica table version {tbl_id}:{md.version_md.version}.')
320
330
  view_md = md.tbl_md.view_md
@@ -324,6 +334,10 @@ class TableVersion:
324
334
  tbl_id, md.tbl_md, md.version_md.version, md.schema_version_md, [], base_path=base_path, base=base
325
335
  )
326
336
  cat = pxt.catalog.Catalog.get()
337
+ # We're creating a new TableVersion replica, so we should never have seen this particular
338
+ # TableVersion instance before.
339
+ assert tbl_version.effective_version is not None
340
+ assert (tbl_version.id, tbl_version.effective_version) not in cat._tbl_versions
327
341
  cat._tbl_versions[tbl_version.id, tbl_version.effective_version] = tbl_version
328
342
  tbl_version.init()
329
343
  tbl_version.store_tbl.create()
@@ -445,10 +459,10 @@ class TableVersion:
445
459
  # fix up the sa column type of the index value and undo columns
446
460
  val_col = self.cols_by_id[md.index_val_col_id]
447
461
  val_col.sa_col_type = idx.index_sa_type()
448
- val_col._records_errors = False
462
+ val_col._stores_cellmd = False
449
463
  undo_col = self.cols_by_id[md.index_val_undo_col_id]
450
464
  undo_col.sa_col_type = idx.index_sa_type()
451
- undo_col._records_errors = False
465
+ undo_col._stores_cellmd = False
452
466
  idx_info = self.IndexInfo(id=md.id, name=md.name, idx=idx, col=idx_col, val_col=val_col, undo_col=undo_col)
453
467
  self.idxs_by_name[md.name] = idx_info
454
468
 
@@ -464,7 +478,13 @@ class TableVersion:
464
478
  else:
465
479
  self.store_tbl = StoreTable(self)
466
480
 
467
- def _write_md(self, new_version: bool, new_version_ts: float, new_schema_version: bool) -> None:
481
+ def _write_md(
482
+ self,
483
+ new_version: bool,
484
+ new_version_ts: float,
485
+ new_schema_version: bool,
486
+ update_status: Optional[UpdateStatus] = None,
487
+ ) -> None:
468
488
  """Writes table metadata to the database.
469
489
 
470
490
  Args:
@@ -475,22 +495,23 @@ class TableVersion:
475
495
  """
476
496
  from pixeltable.catalog import Catalog
477
497
 
478
- version_md: Optional[schema.TableVersionMd] = (
479
- schema.TableVersionMd(
480
- tbl_id=str(self.id),
481
- created_at=new_version_ts,
482
- version=self.version,
483
- schema_version=self.schema_version,
484
- additional_md={},
485
- )
486
- if new_version
487
- else None
488
- )
498
+ version_md = self._create_version_md(new_version_ts, update_status=update_status) if new_version else None
489
499
 
490
500
  Catalog.get().store_tbl_md(
491
- self.id, self._tbl_md, version_md, self._schema_version_md if new_schema_version else None
501
+ self.id, None, self._tbl_md, version_md, self._schema_version_md if new_schema_version else None
492
502
  )
493
503
 
504
+ def _write_md_update_status(self, new_version_ts: float, update_status: UpdateStatus) -> None:
505
+ """Writes a new update_status in the table version metadata in the database.
506
+
507
+ Args:
508
+ timestamp: timestamp of the change
509
+ update_status: UpdateStatus to be updated in the database
510
+ """
511
+ from pixeltable.catalog import Catalog
512
+
513
+ Catalog.get().update_tbl_version_md(self._create_version_md(new_version_ts, update_status))
514
+
494
515
  def _store_idx_name(self, idx_id: int) -> str:
495
516
  """Return name of index in the store, which needs to be globally unique"""
496
517
  return f'idx_{self.id.hex}_{idx_id}'
@@ -544,7 +565,7 @@ class TableVersion:
544
565
  stored=True,
545
566
  schema_version_add=self.schema_version,
546
567
  schema_version_drop=None,
547
- records_errors=idx.records_value_errors(),
568
+ stores_cellmd=idx.records_value_errors(),
548
569
  )
549
570
  val_col.tbl = self
550
571
  val_col.col_type = val_col.col_type.copy(nullable=True)
@@ -558,7 +579,7 @@ class TableVersion:
558
579
  stored=True,
559
580
  schema_version_add=self.schema_version,
560
581
  schema_version_drop=None,
561
- records_errors=False,
582
+ stores_cellmd=False,
562
583
  )
563
584
  undo_col.tbl = self
564
585
  undo_col.col_type = undo_col.col_type.copy(nullable=True)
@@ -670,7 +691,7 @@ class TableVersion:
670
691
  # Create indices and their md records
671
692
  for col, (idx, val_col, undo_col) in index_cols.items():
672
693
  self._create_index(col, val_col, undo_col, idx_name=None, idx=idx)
673
- self._write_md(new_version=True, new_version_ts=time.time(), new_schema_version=True)
694
+ self._write_md(new_version=True, new_version_ts=time.time(), new_schema_version=True, update_status=status)
674
695
  _logger.info(f'Added columns {[col.name for col in cols]} to table {self.name}, new version: {self.version}')
675
696
 
676
697
  msg = (
@@ -693,6 +714,7 @@ class TableVersion:
693
714
  f'Cannot add non-nullable column {col.name!r} to table {self.name!r} with existing rows'
694
715
  )
695
716
 
717
+ computed_values = 0
696
718
  num_excs = 0
697
719
  cols_with_excs: list[Column] = []
698
720
  for col in cols_to_add:
@@ -731,18 +753,19 @@ class TableVersion:
731
753
  # populate the column
732
754
  from pixeltable.plan import Planner
733
755
 
734
- plan, value_expr_slot_idx = Planner.create_add_column_plan(self.path, col)
756
+ plan = Planner.create_add_column_plan(self.path, col)
735
757
  plan.ctx.num_rows = row_count
736
758
  try:
737
759
  plan.open()
738
760
  try:
739
- excs_per_col = self.store_tbl.load_column(col, plan, value_expr_slot_idx, on_error)
761
+ excs_per_col = self.store_tbl.load_column(col, plan, on_error == 'abort')
740
762
  except sql.exc.DBAPIError as exc:
741
763
  # Wrap the DBAPIError in an excs.Error to unify processing in the subsequent except block
742
764
  raise excs.Error(f'SQL error during execution of computed column `{col.name}`:\n{exc}') from exc
743
765
  if excs_per_col > 0:
744
766
  cols_with_excs.append(col)
745
767
  num_excs += excs_per_col
768
+ computed_values += plan.ctx.num_computed_exprs * row_count
746
769
  finally:
747
770
  # Ensure cleanup occurs if an exception or keyboard interruption happens during `load_column()`.
748
771
  def cleanup_on_error() -> None:
@@ -765,12 +788,14 @@ class TableVersion:
765
788
 
766
789
  if print_stats:
767
790
  plan.ctx.profile.print(num_rows=row_count)
791
+
768
792
  # TODO: what to do about system columns with exceptions?
793
+ row_counts = RowCountStats(
794
+ upd_rows=row_count, num_excs=num_excs, computed_values=computed_values
795
+ ) # add_columns
769
796
  return UpdateStatus(
770
- num_rows=row_count,
771
- num_computed_values=row_count,
772
- num_excs=num_excs,
773
797
  cols_with_excs=[f'{col.tbl.name}.{col.name}' for col in cols_with_excs if col.name is not None],
798
+ row_count_stats=row_counts,
774
799
  )
775
800
 
776
801
  def drop_column(self, col: Column) -> None:
@@ -886,6 +911,7 @@ class TableVersion:
886
911
  assert (rows is None) != (df is None) # Exactly one must be specified
887
912
  if rows is not None:
888
913
  plan = Planner.create_insert_plan(self, rows, ignore_errors=not fail_on_exception)
914
+
889
915
  else:
890
916
  plan = Planner.create_df_insert_plan(self, df, ignore_errors=not fail_on_exception)
891
917
 
@@ -896,7 +922,10 @@ class TableVersion:
896
922
  self.next_row_id += 1
897
923
  yield rowid
898
924
 
899
- return self._insert(plan, time.time(), print_stats=print_stats, rowids=rowids(), abort_on_exc=fail_on_exception)
925
+ result = self._insert(
926
+ plan, time.time(), print_stats=print_stats, rowids=rowids(), abort_on_exc=fail_on_exception
927
+ )
928
+ return result
900
929
 
901
930
  def _insert(
902
931
  self,
@@ -910,30 +939,26 @@ class TableVersion:
910
939
  """Insert rows produced by exec_plan and propagate to views"""
911
940
  # we're creating a new version
912
941
  self.version += 1
913
- result = UpdateStatus()
914
- num_rows, num_excs, cols_with_excs = self.store_tbl.insert_rows(
942
+ cols_with_excs, row_counts = self.store_tbl.insert_rows(
915
943
  exec_plan, v_min=self.version, rowids=rowids, abort_on_exc=abort_on_exc
916
944
  )
917
- result.num_rows = num_rows
918
- result.num_excs = num_excs
919
- result.num_computed_values += exec_plan.ctx.num_computed_exprs * num_rows
920
- result.cols_with_excs = [f'{self.name}.{self.cols_by_id[cid].name}' for cid in cols_with_excs]
921
- self._write_md(new_version=True, new_version_ts=timestamp, new_schema_version=False)
945
+ result = UpdateStatus(
946
+ cols_with_excs=[f'{self.name}.{self.cols_by_id[cid].name}' for cid in cols_with_excs],
947
+ row_count_stats=row_counts,
948
+ )
922
949
 
923
950
  # update views
924
951
  for view in self.mutable_views:
925
952
  from pixeltable.plan import Planner
926
953
 
927
- plan, _ = Planner.create_view_load_plan(view.get().path, propagates_insert=True)
928
- status = view.get()._insert(plan, timestamp, print_stats=print_stats)
929
- result.num_rows += status.num_rows
930
- result.num_excs += status.num_excs
931
- result.num_computed_values += status.num_computed_values
932
- result.cols_with_excs += status.cols_with_excs
954
+ plan2, _ = Planner.create_view_load_plan(view.get().path, propagates_insert=True)
955
+ status = view.get()._insert(plan2, timestamp, print_stats=print_stats)
956
+ result += status.to_cascade()
933
957
 
934
- result.cols_with_excs = list(dict.fromkeys(result.cols_with_excs).keys()) # remove duplicates
958
+ # Use the net status after all propagations
959
+ self._write_md(new_version=True, new_version_ts=timestamp, new_schema_version=False, update_status=result)
935
960
  if print_stats:
936
- plan.ctx.profile.print(num_rows=num_rows)
961
+ exec_plan.ctx.profile.print(num_rows=result.num_rows)
937
962
  _logger.info(f'TableVersion {self.name}: new version {self.version}')
938
963
  return result
939
964
 
@@ -973,7 +998,7 @@ class TableVersion:
973
998
  cascade=cascade,
974
999
  show_progress=True,
975
1000
  )
976
- result.updated_cols = updated_cols
1001
+ result += UpdateStatus(updated_cols=updated_cols)
977
1002
  return result
978
1003
 
979
1004
  def batch_update(
@@ -1000,7 +1025,7 @@ class TableVersion:
1000
1025
  result = self.propagate_update(
1001
1026
  plan, delete_where_clause, recomputed_cols, base_versions=[], timestamp=time.time(), cascade=cascade
1002
1027
  )
1003
- result.updated_cols = [c.qualified_name for c in updated_cols]
1028
+ result += UpdateStatus(updated_cols=[c.qualified_name for c in updated_cols])
1004
1029
 
1005
1030
  unmatched_rows = row_update_node.unmatched_rows()
1006
1031
  if len(unmatched_rows) > 0:
@@ -1008,7 +1033,7 @@ class TableVersion:
1008
1033
  raise excs.Error(f'batch_update(): {len(unmatched_rows)} row(s) not found')
1009
1034
  if insert_if_not_exists:
1010
1035
  insert_status = self.insert(unmatched_rows, None, print_stats=False, fail_on_exception=False)
1011
- result += insert_status
1036
+ result += insert_status.to_cascade()
1012
1037
  return result
1013
1038
 
1014
1039
  def _validate_update_spec(
@@ -1061,6 +1086,38 @@ class TableVersion:
1061
1086
 
1062
1087
  return update_targets
1063
1088
 
1089
+ def recompute_columns(self, col_names: list[str], errors_only: bool = False, cascade: bool = True) -> UpdateStatus:
1090
+ assert not self.is_snapshot
1091
+ assert all(name in self.cols_by_name for name in col_names)
1092
+ assert len(col_names) > 0
1093
+ assert len(col_names) == 1 or not errors_only
1094
+
1095
+ from pixeltable.plan import Planner
1096
+
1097
+ target_columns = [self.cols_by_name[name] for name in col_names]
1098
+ where_clause: Optional[exprs.Expr] = None
1099
+ if errors_only:
1100
+ where_clause = (
1101
+ exprs.ColumnPropertyRef(exprs.ColumnRef(target_columns[0]), exprs.ColumnPropertyRef.Property.ERRORTYPE)
1102
+ != None
1103
+ )
1104
+ plan, updated_cols, recomputed_cols = Planner.create_update_plan(
1105
+ self.path, update_targets={}, recompute_targets=target_columns, where_clause=where_clause, cascade=cascade
1106
+ )
1107
+ from pixeltable.exprs import SqlElementCache
1108
+
1109
+ result = self.propagate_update(
1110
+ plan,
1111
+ where_clause.sql_expr(SqlElementCache()) if where_clause is not None else None,
1112
+ recomputed_cols,
1113
+ base_versions=[],
1114
+ timestamp=time.time(),
1115
+ cascade=cascade,
1116
+ show_progress=True,
1117
+ )
1118
+ result += UpdateStatus(updated_cols=updated_cols)
1119
+ return result
1120
+
1064
1121
  def propagate_update(
1065
1122
  self,
1066
1123
  plan: Optional[exec.ExecNode],
@@ -1072,17 +1129,19 @@ class TableVersion:
1072
1129
  show_progress: bool = True,
1073
1130
  ) -> UpdateStatus:
1074
1131
  result = UpdateStatus()
1075
- if plan is not None:
1076
- # we're creating a new version
1132
+ create_new_table_version = plan is not None
1133
+ if create_new_table_version:
1077
1134
  self.version += 1
1078
- result.num_rows, result.num_excs, cols_with_excs = self.store_tbl.insert_rows(
1135
+ cols_with_excs, row_counts = self.store_tbl.insert_rows(
1079
1136
  plan, v_min=self.version, show_progress=show_progress
1080
1137
  )
1081
- result.cols_with_excs = [f'{self.name}.{self.cols_by_id[cid].name}' for cid in cols_with_excs]
1138
+ result += UpdateStatus(
1139
+ row_count_stats=row_counts.insert_to_update(),
1140
+ cols_with_excs=[f'{self.name}.{self.cols_by_id[cid].name}' for cid in cols_with_excs],
1141
+ )
1082
1142
  self.store_tbl.delete_rows(
1083
1143
  self.version, base_versions=base_versions, match_on_vmin=True, where_clause=where_clause
1084
1144
  )
1085
- self._write_md(new_version=True, new_version_ts=timestamp, new_schema_version=False)
1086
1145
 
1087
1146
  if cascade:
1088
1147
  base_versions = [None if plan is None else self.version, *base_versions] # don't update in place
@@ -1097,17 +1156,18 @@ class TableVersion:
1097
1156
  status = view.get().propagate_update(
1098
1157
  plan, None, recomputed_view_cols, base_versions=base_versions, timestamp=timestamp, cascade=True
1099
1158
  )
1100
- result.num_rows += status.num_rows
1101
- result.num_excs += status.num_excs
1102
- result.cols_with_excs += status.cols_with_excs
1103
-
1104
- result.cols_with_excs = list(dict.fromkeys(result.cols_with_excs).keys()) # remove duplicates
1159
+ result += status.to_cascade()
1160
+ if create_new_table_version:
1161
+ self._write_md(new_version=True, new_version_ts=timestamp, new_schema_version=False, update_status=result)
1105
1162
  return result
1106
1163
 
1107
1164
  def delete(self, where: Optional[exprs.Expr] = None) -> UpdateStatus:
1108
1165
  """Delete rows in this table.
1109
1166
  Args:
1110
1167
  where: a predicate to filter rows to delete.
1168
+
1169
+ Returns:
1170
+ UpdateStatus: an object containing the number of deleted rows and other statistics.
1111
1171
  """
1112
1172
  assert self.is_insertable
1113
1173
  from pixeltable.exprs import Expr
@@ -1123,14 +1183,12 @@ class TableVersion:
1123
1183
  raise excs.Error(f'Filter {analysis_info.filter} not expressible in SQL')
1124
1184
  sql_where_clause = analysis_info.sql_where_clause
1125
1185
 
1126
- num_rows = self.propagate_delete(sql_where_clause, base_versions=[], timestamp=time.time())
1127
-
1128
- status = UpdateStatus(num_rows=num_rows)
1186
+ status = self.propagate_delete(sql_where_clause, base_versions=[], timestamp=time.time())
1129
1187
  return status
1130
1188
 
1131
1189
  def propagate_delete(
1132
1190
  self, where: Optional[exprs.Expr], base_versions: list[Optional[int]], timestamp: float
1133
- ) -> int:
1191
+ ) -> UpdateStatus:
1134
1192
  """Delete rows in this table and propagate to views.
1135
1193
  Args:
1136
1194
  where: a predicate to filter rows to delete.
@@ -1146,18 +1204,22 @@ class TableVersion:
1146
1204
  # sql.sql.visitors.traverse(sql_where_clause, {}, {'column': collect_cols})
1147
1205
  # x = [f'{str(c)}:{hash(c)}:{id(c.table)}' for c in sql_cols]
1148
1206
  # print(f'where_clause cols: {x}')
1149
- num_rows = self.store_tbl.delete_rows(
1207
+ del_rows = self.store_tbl.delete_rows(
1150
1208
  self.version + 1, base_versions=base_versions, match_on_vmin=False, where_clause=sql_where_clause
1151
1209
  )
1152
- if num_rows > 0:
1210
+ row_counts = RowCountStats(del_rows=del_rows) # delete
1211
+ result = UpdateStatus(row_count_stats=row_counts)
1212
+ if del_rows > 0:
1153
1213
  # we're creating a new version
1154
1214
  self.version += 1
1155
- self._write_md(new_version=True, new_version_ts=timestamp, new_schema_version=False)
1156
1215
  for view in self.mutable_views:
1157
- num_rows += view.get().propagate_delete(
1216
+ status = view.get().propagate_delete(
1158
1217
  where=None, base_versions=[self.version, *base_versions], timestamp=timestamp
1159
1218
  )
1160
- return num_rows
1219
+ result += status.to_cascade()
1220
+ if del_rows > 0:
1221
+ self._write_md(new_version=True, new_version_ts=timestamp, new_schema_version=False, update_status=result)
1222
+ return result
1161
1223
 
1162
1224
  def revert(self) -> None:
1163
1225
  """Reverts the table to the previous version."""
@@ -1498,12 +1560,14 @@ class TableVersion:
1498
1560
  {'class': f'{type(store).__module__}.{type(store).__qualname__}', 'md': store.as_dict()} for store in stores
1499
1561
  ]
1500
1562
 
1501
- def _create_version_md(self, timestamp: float) -> schema.TableVersionMd:
1563
+ def _create_version_md(self, timestamp: float, update_status: Optional[UpdateStatus]) -> schema.TableVersionMd:
1502
1564
  return schema.TableVersionMd(
1503
1565
  tbl_id=str(self.id),
1504
1566
  created_at=timestamp,
1505
1567
  version=self.version,
1506
1568
  schema_version=self.schema_version,
1569
+ user=Env.get().user,
1570
+ update_status=update_status,
1507
1571
  additional_md={},
1508
1572
  )
1509
1573
 
@@ -1,13 +1,16 @@
1
1
  from __future__ import annotations
2
2
 
3
3
  import logging
4
+ from dataclasses import dataclass
4
5
  from typing import TYPE_CHECKING, Optional
5
6
  from uuid import UUID
6
7
 
8
+ from pixeltable import exceptions as excs
9
+
7
10
  from .table_version import TableVersion
8
11
 
9
12
  if TYPE_CHECKING:
10
- pass
13
+ from pixeltable.catalog import Column
11
14
 
12
15
  _logger = logging.getLogger('pixeltable')
13
16
 
@@ -67,3 +70,25 @@ class TableVersionHandle:
67
70
  @classmethod
68
71
  def from_dict(cls, d: dict) -> TableVersionHandle:
69
72
  return cls(UUID(d['id']), d['effective_version'])
73
+
74
+
75
+ @dataclass(frozen=True)
76
+ class ColumnHandle:
77
+ tbl_version: TableVersionHandle
78
+ col_id: int
79
+
80
+ def get(self) -> 'Column':
81
+ if self.col_id not in self.tbl_version.get().cols_by_id:
82
+ schema_version_drop = self.tbl_version.get()._tbl_md.column_md[self.col_id].schema_version_drop
83
+ raise excs.Error(
84
+ f'Column has been dropped (no record for column ID {self.col_id} in table '
85
+ f'{self.tbl_version.get().versioned_name!r}; it was dropped in table version {schema_version_drop})'
86
+ )
87
+ return self.tbl_version.get().cols_by_id[self.col_id]
88
+
89
+ def as_dict(self) -> dict:
90
+ return {'tbl_version': self.tbl_version.as_dict(), 'col_id': self.col_id}
91
+
92
+ @classmethod
93
+ def from_dict(cls, d: dict) -> ColumnHandle:
94
+ return cls(tbl_version=TableVersionHandle.from_dict(d['tbl_version']), col_id=d['col_id'])