pixeltable 0.4.2__py3-none-any.whl → 0.4.3__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of pixeltable might be problematic. Click here for more details.

Files changed (47) hide show
  1. pixeltable/__init__.py +1 -0
  2. pixeltable/__version__.py +2 -2
  3. pixeltable/catalog/__init__.py +2 -10
  4. pixeltable/catalog/catalog.py +64 -38
  5. pixeltable/catalog/column.py +22 -23
  6. pixeltable/catalog/globals.py +2 -148
  7. pixeltable/catalog/insertable_table.py +6 -4
  8. pixeltable/catalog/path.py +6 -0
  9. pixeltable/catalog/table.py +51 -32
  10. pixeltable/catalog/table_version.py +69 -45
  11. pixeltable/catalog/update_status.py +179 -0
  12. pixeltable/catalog/view.py +9 -2
  13. pixeltable/config.py +76 -12
  14. pixeltable/dataframe.py +1 -1
  15. pixeltable/env.py +29 -0
  16. pixeltable/exec/exec_node.py +7 -24
  17. pixeltable/exec/expr_eval/schedulers.py +134 -7
  18. pixeltable/exprs/column_property_ref.py +21 -9
  19. pixeltable/exprs/column_ref.py +5 -1
  20. pixeltable/exprs/function_call.py +2 -2
  21. pixeltable/exprs/row_builder.py +10 -9
  22. pixeltable/exprs/rowid_ref.py +0 -4
  23. pixeltable/func/function.py +3 -3
  24. pixeltable/functions/audio.py +36 -9
  25. pixeltable/functions/video.py +57 -10
  26. pixeltable/globals.py +61 -1
  27. pixeltable/io/__init__.py +1 -1
  28. pixeltable/io/external_store.py +3 -55
  29. pixeltable/io/globals.py +4 -4
  30. pixeltable/io/hf_datasets.py +10 -2
  31. pixeltable/io/label_studio.py +16 -16
  32. pixeltable/metadata/__init__.py +1 -1
  33. pixeltable/metadata/converters/convert_39.py +125 -0
  34. pixeltable/metadata/converters/util.py +3 -0
  35. pixeltable/metadata/notes.py +1 -0
  36. pixeltable/metadata/schema.py +14 -2
  37. pixeltable/plan.py +4 -0
  38. pixeltable/share/packager.py +20 -38
  39. pixeltable/store.py +18 -50
  40. pixeltable/type_system.py +2 -2
  41. pixeltable/utils/coroutine.py +6 -23
  42. pixeltable/utils/media_store.py +39 -0
  43. {pixeltable-0.4.2.dist-info → pixeltable-0.4.3.dist-info}/METADATA +1 -1
  44. {pixeltable-0.4.2.dist-info → pixeltable-0.4.3.dist-info}/RECORD +47 -45
  45. {pixeltable-0.4.2.dist-info → pixeltable-0.4.3.dist-info}/LICENSE +0 -0
  46. {pixeltable-0.4.2.dist-info → pixeltable-0.4.3.dist-info}/WHEEL +0 -0
  47. {pixeltable-0.4.2.dist-info → pixeltable-0.4.3.dist-info}/entry_points.txt +0 -0
@@ -6,7 +6,7 @@ import json
6
6
  import logging
7
7
  from keyword import iskeyword as is_python_keyword
8
8
  from pathlib import Path
9
- from typing import TYPE_CHECKING, Any, Iterable, Literal, Optional, Union, overload
9
+ from typing import TYPE_CHECKING, Any, ClassVar, Iterable, Literal, Optional, Union, overload
10
10
 
11
11
  from typing import _GenericAlias # type: ignore[attr-defined] # isort: skip
12
12
  import datetime
@@ -29,13 +29,13 @@ from .globals import (
29
29
  IfExistsParam,
30
30
  IfNotExistsParam,
31
31
  MediaValidation,
32
- UpdateStatus,
33
32
  is_system_column_name,
34
33
  is_valid_identifier,
35
34
  )
36
35
  from .schema_object import SchemaObject
37
36
  from .table_version_handle import TableVersionHandle
38
37
  from .table_version_path import TableVersionPath
38
+ from .update_status import UpdateStatus
39
39
 
40
40
  if TYPE_CHECKING:
41
41
  import torch.utils.data
@@ -109,8 +109,6 @@ class Table(SchemaObject):
109
109
 
110
110
  def _get_metadata(self) -> dict[str, Any]:
111
111
  md = super()._get_metadata()
112
- base = self._get_base_table()
113
- md['base'] = base._path() if base is not None else None
114
112
  md['schema'] = self._get_schema()
115
113
  md['is_replica'] = self._tbl_version_path.is_replica()
116
114
  md['version'] = self._get_version()
@@ -510,15 +508,16 @@ class Table(SchemaObject):
510
508
  for cname in cols_to_ignore:
511
509
  assert cname in col_schema
512
510
  del col_schema[cname]
511
+ result = UpdateStatus()
513
512
  if len(col_schema) == 0:
514
- return UpdateStatus()
513
+ return result
515
514
  new_cols = self._create_columns(col_schema)
516
515
  for new_col in new_cols:
517
516
  self._verify_column(new_col)
518
517
  assert self._tbl_version is not None
519
- status = self._tbl_version.get().add_columns(new_cols, print_stats=False, on_error='abort')
518
+ result += self._tbl_version.get().add_columns(new_cols, print_stats=False, on_error='abort')
520
519
  FileCache.get().emit_eviction_warnings()
521
- return status
520
+ return result
522
521
 
523
522
  def add_column(
524
523
  self,
@@ -595,7 +594,7 @@ class Table(SchemaObject):
595
594
  - `'abort'`: an exception will be raised and the column will not be added.
596
595
  - `'ignore'`: execution will continue and the column will be added. Any rows
597
596
  with errors will have a `None` value for the column, with information about the error stored in the
598
- corresponding `tbl.col_name.errortype` and `tbl.col_name.errormsg` fields.
597
+ corresponding `tbl.col_name.errormsg` tbl.col_name.errortype` fields.
599
598
  if_exists: Determines the behavior if the column already exists. Must be one of the following:
600
599
 
601
600
  - `'error'`: an exception will be raised.
@@ -642,10 +641,10 @@ class Table(SchemaObject):
642
641
  # Raise an error if the column expression refers to a column error property
643
642
  if isinstance(spec, exprs.Expr):
644
643
  for e in spec.subexprs(expr_class=exprs.ColumnPropertyRef, traverse_matches=False):
645
- if e.is_error_prop():
644
+ if e.is_cellmd_prop():
646
645
  raise excs.Error(
647
- 'Use of a reference to an error property of another column is not allowed in a computed '
648
- f'column. The specified computation for this column contains this reference: `{e!r}`'
646
+ f'Use of a reference to the {e.prop.name.lower()!r} property of another column '
647
+ f'is not allowed in a computed column.'
649
648
  )
650
649
 
651
650
  # handle existing columns based on if_exists parameter
@@ -654,16 +653,17 @@ class Table(SchemaObject):
654
653
  )
655
654
  # if the column to add already exists and user asked to ignore
656
655
  # exiting column, there's nothing to do.
656
+ result = UpdateStatus()
657
657
  if len(cols_to_ignore) != 0:
658
658
  assert cols_to_ignore[0] == col_name
659
- return UpdateStatus()
659
+ return result
660
660
 
661
661
  new_col = self._create_columns({col_name: col_schema})[0]
662
662
  self._verify_column(new_col)
663
663
  assert self._tbl_version is not None
664
- status = self._tbl_version.get().add_columns([new_col], print_stats=print_stats, on_error=on_error)
664
+ result += self._tbl_version.get().add_columns([new_col], print_stats=print_stats, on_error=on_error)
665
665
  FileCache.get().emit_eviction_warnings()
666
- return status
666
+ return result
667
667
 
668
668
  @classmethod
669
669
  def _validate_column_spec(cls, name: str, spec: dict[str, Any]) -> None:
@@ -1349,9 +1349,9 @@ class Table(SchemaObject):
1349
1349
  with Catalog.get().begin_xact(tbl=self._tbl_version_path, for_write=True, lock_mutable_tree=True):
1350
1350
  if self._tbl_version_path.is_snapshot():
1351
1351
  raise excs.Error('Cannot update a snapshot')
1352
- status = self._tbl_version.get().update(value_spec, where, cascade)
1352
+ result = self._tbl_version.get().update(value_spec, where, cascade)
1353
1353
  FileCache.get().emit_eviction_warnings()
1354
- return status
1354
+ return result
1355
1355
 
1356
1356
  def batch_update(
1357
1357
  self,
@@ -1415,7 +1415,7 @@ class Table(SchemaObject):
1415
1415
  raise excs.Error(f'Primary key columns ({", ".join(missing_cols)}) missing in {row_spec}')
1416
1416
  row_updates.append(col_vals)
1417
1417
 
1418
- status = self._tbl_version.get().batch_update(
1418
+ result = self._tbl_version.get().batch_update(
1419
1419
  row_updates,
1420
1420
  rowids,
1421
1421
  error_if_not_exists=if_not_exists == 'error',
@@ -1423,7 +1423,7 @@ class Table(SchemaObject):
1423
1423
  cascade=cascade,
1424
1424
  )
1425
1425
  FileCache.get().emit_eviction_warnings()
1426
- return status
1426
+ return result
1427
1427
 
1428
1428
  def recompute_columns(
1429
1429
  self, *columns: Union[str, ColumnRef], errors_only: bool = False, cascade: bool = True
@@ -1433,7 +1433,7 @@ class Table(SchemaObject):
1433
1433
  Args:
1434
1434
  columns: The names or references of the computed columns to recompute.
1435
1435
  errors_only: If True, only run the recomputation for rows that have errors in the column (ie, the column's
1436
- `errortype` property is non-None). Only allowed for recomputing a single column.
1436
+ `errortype` property indicates that an error occurred). Only allowed for recomputing a single column.
1437
1437
  cascade: if True, also update all computed columns that transitively depend on the recomputed columns.
1438
1438
 
1439
1439
  Examples:
@@ -1484,9 +1484,9 @@ class Table(SchemaObject):
1484
1484
  raise excs.Error(f'Cannot recompute column of a base: {col_name!r}')
1485
1485
  col_names.append(col_name)
1486
1486
 
1487
- status = self._tbl_version.get().recompute_columns(col_names, errors_only=errors_only, cascade=cascade)
1487
+ result = self._tbl_version.get().recompute_columns(col_names, errors_only=errors_only, cascade=cascade)
1488
1488
  FileCache.get().emit_eviction_warnings()
1489
- return status
1489
+ return result
1490
1490
 
1491
1491
  def delete(self, where: Optional['exprs.Expr'] = None) -> UpdateStatus:
1492
1492
  """Delete rows in this table.
@@ -1588,7 +1588,7 @@ class Table(SchemaObject):
1588
1588
 
1589
1589
  def sync(
1590
1590
  self, stores: Optional[str | list[str]] = None, *, export_data: bool = True, import_data: bool = True
1591
- ) -> 'pxt.io.SyncStatus':
1591
+ ) -> UpdateStatus:
1592
1592
  """
1593
1593
  Synchronizes this table with its linked external stores.
1594
1594
 
@@ -1601,7 +1601,7 @@ class Table(SchemaObject):
1601
1601
  from pixeltable.catalog import Catalog
1602
1602
 
1603
1603
  if self._tbl_version_path.is_snapshot():
1604
- return pxt.io.SyncStatus()
1604
+ return UpdateStatus()
1605
1605
  # we lock the entire tree starting at the root base table in order to ensure that all synced columns can
1606
1606
  # have their updates propagated down the tree
1607
1607
  base_tv = self._tbl_version_path.get_tbl_versions()[-1]
@@ -1617,7 +1617,7 @@ class Table(SchemaObject):
1617
1617
  if store not in all_stores:
1618
1618
  raise excs.Error(f'Table `{self._name}` has no external store with that name: {store}')
1619
1619
 
1620
- sync_status = pxt.io.SyncStatus()
1620
+ sync_status = UpdateStatus()
1621
1621
  for store in stores:
1622
1622
  store_obj = self._tbl_version.get().external_stores[store]
1623
1623
  store_sync_status = store_obj.sync(self, export_data=export_data, import_data=import_data)
@@ -1631,6 +1631,19 @@ class Table(SchemaObject):
1631
1631
  def _ipython_key_completions_(self) -> list[str]:
1632
1632
  return list(self._get_schema().keys())
1633
1633
 
1634
+ _REPORT_SCHEMA: ClassVar[dict[str, ts.ColumnType]] = {
1635
+ 'version': ts.IntType(),
1636
+ 'created_at': ts.TimestampType(),
1637
+ 'user': ts.StringType(nullable=True),
1638
+ 'note': ts.StringType(),
1639
+ 'inserts': ts.IntType(nullable=True),
1640
+ 'updates': ts.IntType(nullable=True),
1641
+ 'deletes': ts.IntType(nullable=True),
1642
+ 'errors': ts.IntType(nullable=True),
1643
+ 'computed': ts.IntType(),
1644
+ 'schema_change': ts.StringType(),
1645
+ }
1646
+
1634
1647
  def history(self, n: Optional[int] = None) -> pixeltable.dataframe.DataFrameResultSet:
1635
1648
  """Returns rows of information about the versions of this table, most recent first.
1636
1649
 
@@ -1676,19 +1689,25 @@ class Table(SchemaObject):
1676
1689
  for vers_md in vers_list[0 : len(vers_list) - over_count]:
1677
1690
  version = vers_md.version_md.version
1678
1691
  schema_change = md_dict.get(version, '')
1679
- change_type = 'schema' if schema_change != '' else 'data'
1692
+ update_status = vers_md.version_md.update_status
1693
+ if update_status is None:
1694
+ update_status = UpdateStatus()
1695
+ change_type = 'schema' if schema_change != '' else ''
1696
+ if change_type == '':
1697
+ change_type = 'data'
1698
+ rcs = update_status.row_count_stats + update_status.cascade_row_count_stats
1680
1699
  report_line = [
1681
1700
  version,
1682
1701
  datetime.datetime.fromtimestamp(vers_md.version_md.created_at),
1702
+ vers_md.version_md.user,
1683
1703
  change_type,
1704
+ rcs.ins_rows,
1705
+ rcs.upd_rows,
1706
+ rcs.del_rows,
1707
+ rcs.num_excs,
1708
+ rcs.computed_values,
1684
1709
  schema_change,
1685
1710
  ]
1686
1711
  report_lines.append(report_line)
1687
1712
 
1688
- report_schema = {
1689
- 'version': ts.IntType(),
1690
- 'created_at': ts.TimestampType(),
1691
- 'change': ts.StringType(),
1692
- 'schema_change': ts.StringType(),
1693
- }
1694
- return pxt.dataframe.DataFrameResultSet(report_lines, report_schema)
1713
+ return pxt.dataframe.DataFrameResultSet(report_lines, self._REPORT_SCHEMA)
@@ -29,14 +29,8 @@ if TYPE_CHECKING:
29
29
 
30
30
  from ..func.globals import resolve_symbol
31
31
  from .column import Column
32
- from .globals import (
33
- _POS_COLUMN_NAME,
34
- _ROWID_COLUMN_NAME,
35
- MediaValidation,
36
- RowCountStats,
37
- UpdateStatus,
38
- is_valid_identifier,
39
- )
32
+ from .globals import _POS_COLUMN_NAME, _ROWID_COLUMN_NAME, MediaValidation, is_valid_identifier
33
+ from .update_status import RowCountStats, UpdateStatus
40
34
 
41
35
  if TYPE_CHECKING:
42
36
  from pixeltable import exec, store
@@ -243,9 +237,15 @@ class TableVersion:
243
237
  additional_md={},
244
238
  )
245
239
 
246
- # create schema.TableVersion
240
+ # create schema.TableVersion of the initial version
247
241
  table_version_md = schema.TableVersionMd(
248
- tbl_id=tbl_id_str, created_at=timestamp, version=0, schema_version=0, additional_md={}
242
+ tbl_id=tbl_id_str,
243
+ created_at=timestamp,
244
+ version=0,
245
+ schema_version=0,
246
+ user=user,
247
+ update_status=None,
248
+ additional_md={},
249
249
  )
250
250
 
251
251
  # create schema.TableSchemaVersion
@@ -324,6 +324,7 @@ class TableVersion:
324
324
 
325
325
  @classmethod
326
326
  def create_replica(cls, md: schema.FullTableMd) -> TableVersion:
327
+ assert Env.get().in_xact
327
328
  tbl_id = UUID(md.tbl_md.tbl_id)
328
329
  _logger.info(f'Creating replica table version {tbl_id}:{md.version_md.version}.')
329
330
  view_md = md.tbl_md.view_md
@@ -333,6 +334,10 @@ class TableVersion:
333
334
  tbl_id, md.tbl_md, md.version_md.version, md.schema_version_md, [], base_path=base_path, base=base
334
335
  )
335
336
  cat = pxt.catalog.Catalog.get()
337
+ # We're creating a new TableVersion replica, so we should never have seen this particular
338
+ # TableVersion instance before.
339
+ assert tbl_version.effective_version is not None
340
+ assert (tbl_version.id, tbl_version.effective_version) not in cat._tbl_versions
336
341
  cat._tbl_versions[tbl_version.id, tbl_version.effective_version] = tbl_version
337
342
  tbl_version.init()
338
343
  tbl_version.store_tbl.create()
@@ -454,10 +459,10 @@ class TableVersion:
454
459
  # fix up the sa column type of the index value and undo columns
455
460
  val_col = self.cols_by_id[md.index_val_col_id]
456
461
  val_col.sa_col_type = idx.index_sa_type()
457
- val_col._records_errors = False
462
+ val_col._stores_cellmd = False
458
463
  undo_col = self.cols_by_id[md.index_val_undo_col_id]
459
464
  undo_col.sa_col_type = idx.index_sa_type()
460
- undo_col._records_errors = False
465
+ undo_col._stores_cellmd = False
461
466
  idx_info = self.IndexInfo(id=md.id, name=md.name, idx=idx, col=idx_col, val_col=val_col, undo_col=undo_col)
462
467
  self.idxs_by_name[md.name] = idx_info
463
468
 
@@ -473,7 +478,13 @@ class TableVersion:
473
478
  else:
474
479
  self.store_tbl = StoreTable(self)
475
480
 
476
- def _write_md(self, new_version: bool, new_version_ts: float, new_schema_version: bool) -> None:
481
+ def _write_md(
482
+ self,
483
+ new_version: bool,
484
+ new_version_ts: float,
485
+ new_schema_version: bool,
486
+ update_status: Optional[UpdateStatus] = None,
487
+ ) -> None:
477
488
  """Writes table metadata to the database.
478
489
 
479
490
  Args:
@@ -484,22 +495,23 @@ class TableVersion:
484
495
  """
485
496
  from pixeltable.catalog import Catalog
486
497
 
487
- version_md: Optional[schema.TableVersionMd] = (
488
- schema.TableVersionMd(
489
- tbl_id=str(self.id),
490
- created_at=new_version_ts,
491
- version=self.version,
492
- schema_version=self.schema_version,
493
- additional_md={},
494
- )
495
- if new_version
496
- else None
497
- )
498
+ version_md = self._create_version_md(new_version_ts, update_status=update_status) if new_version else None
498
499
 
499
500
  Catalog.get().store_tbl_md(
500
501
  self.id, None, self._tbl_md, version_md, self._schema_version_md if new_schema_version else None
501
502
  )
502
503
 
504
+ def _write_md_update_status(self, new_version_ts: float, update_status: UpdateStatus) -> None:
505
+ """Writes a new update_status in the table version metadata in the database.
506
+
507
+ Args:
508
+ timestamp: timestamp of the change
509
+ update_status: UpdateStatus to be updated in the database
510
+ """
511
+ from pixeltable.catalog import Catalog
512
+
513
+ Catalog.get().update_tbl_version_md(self._create_version_md(new_version_ts, update_status))
514
+
503
515
  def _store_idx_name(self, idx_id: int) -> str:
504
516
  """Return name of index in the store, which needs to be globally unique"""
505
517
  return f'idx_{self.id.hex}_{idx_id}'
@@ -553,7 +565,7 @@ class TableVersion:
553
565
  stored=True,
554
566
  schema_version_add=self.schema_version,
555
567
  schema_version_drop=None,
556
- records_errors=idx.records_value_errors(),
568
+ stores_cellmd=idx.records_value_errors(),
557
569
  )
558
570
  val_col.tbl = self
559
571
  val_col.col_type = val_col.col_type.copy(nullable=True)
@@ -567,7 +579,7 @@ class TableVersion:
567
579
  stored=True,
568
580
  schema_version_add=self.schema_version,
569
581
  schema_version_drop=None,
570
- records_errors=False,
582
+ stores_cellmd=False,
571
583
  )
572
584
  undo_col.tbl = self
573
585
  undo_col.col_type = undo_col.col_type.copy(nullable=True)
@@ -679,7 +691,7 @@ class TableVersion:
679
691
  # Create indices and their md records
680
692
  for col, (idx, val_col, undo_col) in index_cols.items():
681
693
  self._create_index(col, val_col, undo_col, idx_name=None, idx=idx)
682
- self._write_md(new_version=True, new_version_ts=time.time(), new_schema_version=True)
694
+ self._write_md(new_version=True, new_version_ts=time.time(), new_schema_version=True, update_status=status)
683
695
  _logger.info(f'Added columns {[col.name for col in cols]} to table {self.name}, new version: {self.version}')
684
696
 
685
697
  msg = (
@@ -899,6 +911,7 @@ class TableVersion:
899
911
  assert (rows is None) != (df is None) # Exactly one must be specified
900
912
  if rows is not None:
901
913
  plan = Planner.create_insert_plan(self, rows, ignore_errors=not fail_on_exception)
914
+
902
915
  else:
903
916
  plan = Planner.create_df_insert_plan(self, df, ignore_errors=not fail_on_exception)
904
917
 
@@ -909,7 +922,10 @@ class TableVersion:
909
922
  self.next_row_id += 1
910
923
  yield rowid
911
924
 
912
- return self._insert(plan, time.time(), print_stats=print_stats, rowids=rowids(), abort_on_exc=fail_on_exception)
925
+ result = self._insert(
926
+ plan, time.time(), print_stats=print_stats, rowids=rowids(), abort_on_exc=fail_on_exception
927
+ )
928
+ return result
913
929
 
914
930
  def _insert(
915
931
  self,
@@ -923,22 +939,26 @@ class TableVersion:
923
939
  """Insert rows produced by exec_plan and propagate to views"""
924
940
  # we're creating a new version
925
941
  self.version += 1
926
- cols_with_excs, result = self.store_tbl.insert_rows(
942
+ cols_with_excs, row_counts = self.store_tbl.insert_rows(
927
943
  exec_plan, v_min=self.version, rowids=rowids, abort_on_exc=abort_on_exc
928
944
  )
929
- result += UpdateStatus(cols_with_excs=[f'{self.name}.{self.cols_by_id[cid].name}' for cid in cols_with_excs])
930
- self._write_md(new_version=True, new_version_ts=timestamp, new_schema_version=False)
945
+ result = UpdateStatus(
946
+ cols_with_excs=[f'{self.name}.{self.cols_by_id[cid].name}' for cid in cols_with_excs],
947
+ row_count_stats=row_counts,
948
+ )
931
949
 
932
950
  # update views
933
951
  for view in self.mutable_views:
934
952
  from pixeltable.plan import Planner
935
953
 
936
- plan, _ = Planner.create_view_load_plan(view.get().path, propagates_insert=True)
937
- status = view.get()._insert(plan, timestamp, print_stats=print_stats)
954
+ plan2, _ = Planner.create_view_load_plan(view.get().path, propagates_insert=True)
955
+ status = view.get()._insert(plan2, timestamp, print_stats=print_stats)
938
956
  result += status.to_cascade()
939
957
 
958
+ # Use the net status after all propagations
959
+ self._write_md(new_version=True, new_version_ts=timestamp, new_schema_version=False, update_status=result)
940
960
  if print_stats:
941
- plan.ctx.profile.print(num_rows=result.num_rows) # This is the net rows after all propagations
961
+ exec_plan.ctx.profile.print(num_rows=result.num_rows)
942
962
  _logger.info(f'TableVersion {self.name}: new version {self.version}')
943
963
  return result
944
964
 
@@ -1108,20 +1128,20 @@ class TableVersion:
1108
1128
  cascade: bool,
1109
1129
  show_progress: bool = True,
1110
1130
  ) -> UpdateStatus:
1111
- if plan is not None:
1112
- # we're creating a new version
1131
+ result = UpdateStatus()
1132
+ create_new_table_version = plan is not None
1133
+ if create_new_table_version:
1113
1134
  self.version += 1
1114
- cols_with_excs, status = self.store_tbl.insert_rows(plan, v_min=self.version, show_progress=show_progress)
1115
- result = status.insert_to_update()
1135
+ cols_with_excs, row_counts = self.store_tbl.insert_rows(
1136
+ plan, v_min=self.version, show_progress=show_progress
1137
+ )
1116
1138
  result += UpdateStatus(
1117
- cols_with_excs=[f'{self.name}.{self.cols_by_id[cid].name}' for cid in cols_with_excs]
1139
+ row_count_stats=row_counts.insert_to_update(),
1140
+ cols_with_excs=[f'{self.name}.{self.cols_by_id[cid].name}' for cid in cols_with_excs],
1118
1141
  )
1119
1142
  self.store_tbl.delete_rows(
1120
1143
  self.version, base_versions=base_versions, match_on_vmin=True, where_clause=where_clause
1121
1144
  )
1122
- self._write_md(new_version=True, new_version_ts=timestamp, new_schema_version=False)
1123
- else:
1124
- result = UpdateStatus()
1125
1145
 
1126
1146
  if cascade:
1127
1147
  base_versions = [None if plan is None else self.version, *base_versions] # don't update in place
@@ -1137,7 +1157,8 @@ class TableVersion:
1137
1157
  plan, None, recomputed_view_cols, base_versions=base_versions, timestamp=timestamp, cascade=True
1138
1158
  )
1139
1159
  result += status.to_cascade()
1140
-
1160
+ if create_new_table_version:
1161
+ self._write_md(new_version=True, new_version_ts=timestamp, new_schema_version=False, update_status=result)
1141
1162
  return result
1142
1163
 
1143
1164
  def delete(self, where: Optional[exprs.Expr] = None) -> UpdateStatus:
@@ -1191,12 +1212,13 @@ class TableVersion:
1191
1212
  if del_rows > 0:
1192
1213
  # we're creating a new version
1193
1214
  self.version += 1
1194
- self._write_md(new_version=True, new_version_ts=timestamp, new_schema_version=False)
1195
1215
  for view in self.mutable_views:
1196
1216
  status = view.get().propagate_delete(
1197
1217
  where=None, base_versions=[self.version, *base_versions], timestamp=timestamp
1198
1218
  )
1199
1219
  result += status.to_cascade()
1220
+ if del_rows > 0:
1221
+ self._write_md(new_version=True, new_version_ts=timestamp, new_schema_version=False, update_status=result)
1200
1222
  return result
1201
1223
 
1202
1224
  def revert(self) -> None:
@@ -1538,12 +1560,14 @@ class TableVersion:
1538
1560
  {'class': f'{type(store).__module__}.{type(store).__qualname__}', 'md': store.as_dict()} for store in stores
1539
1561
  ]
1540
1562
 
1541
- def _create_version_md(self, timestamp: float) -> schema.TableVersionMd:
1563
+ def _create_version_md(self, timestamp: float, update_status: Optional[UpdateStatus]) -> schema.TableVersionMd:
1542
1564
  return schema.TableVersionMd(
1543
1565
  tbl_id=str(self.id),
1544
1566
  created_at=timestamp,
1545
1567
  version=self.version,
1546
1568
  schema_version=self.schema_version,
1569
+ user=Env.get().user,
1570
+ update_status=update_status,
1547
1571
  additional_md={},
1548
1572
  )
1549
1573
 
@@ -0,0 +1,179 @@
1
+ from __future__ import annotations
2
+
3
+ from dataclasses import dataclass, field
4
+ from typing import TYPE_CHECKING
5
+
6
+ if TYPE_CHECKING:
7
+ from IPython.lib.pretty import RepresentationPrinter
8
+
9
+
10
+ @dataclass(frozen=True)
11
+ class RowCountStats:
12
+ """
13
+ Statistics about the counts of rows affected by a table operation.
14
+ """
15
+
16
+ ins_rows: int = 0 # rows inserted
17
+ del_rows: int = 0 # rows deleted
18
+ upd_rows: int = 0 # rows updated
19
+ num_excs: int = 0 # total number of exceptions
20
+ # TODO: disambiguate what this means: # of slots computed or # of columns computed?
21
+ computed_values: int = 0 # number of computed values (e.g., computed columns) affected by the operation
22
+
23
+ @property
24
+ def num_rows(self) -> int:
25
+ return self.ins_rows + self.del_rows + self.upd_rows
26
+
27
+ def insert_to_update(self) -> 'RowCountStats':
28
+ """
29
+ Convert insert row count stats to update row count stats.
30
+ This is used when an insert operation is treated as an update.
31
+ """
32
+ return RowCountStats(
33
+ ins_rows=0,
34
+ del_rows=self.del_rows,
35
+ upd_rows=self.upd_rows + self.ins_rows,
36
+ num_excs=self.num_excs,
37
+ computed_values=self.computed_values,
38
+ )
39
+
40
+ def __add__(self, other: 'RowCountStats') -> 'RowCountStats':
41
+ """
42
+ Add the stats from two RowCountStats objects together.
43
+ """
44
+ return RowCountStats(
45
+ ins_rows=self.ins_rows + other.ins_rows,
46
+ del_rows=self.del_rows + other.del_rows,
47
+ upd_rows=self.upd_rows + other.upd_rows,
48
+ num_excs=self.num_excs + other.num_excs,
49
+ computed_values=self.computed_values + other.computed_values,
50
+ )
51
+
52
+
53
+ @dataclass(frozen=True)
54
+ class UpdateStatus:
55
+ """
56
+ Information about changes to table data or table schema
57
+ """
58
+
59
+ updated_cols: list[str] = field(default_factory=list)
60
+ cols_with_excs: list[str] = field(default_factory=list)
61
+
62
+ # stats for the rows affected by the operation
63
+ row_count_stats: RowCountStats = field(default_factory=RowCountStats)
64
+
65
+ # stats for changes cascaded to other tables
66
+ cascade_row_count_stats: RowCountStats = field(default_factory=RowCountStats)
67
+
68
+ # stats for the rows affected by the operation in an external store
69
+ ext_row_count_stats: RowCountStats = field(default_factory=RowCountStats)
70
+
71
+ @property
72
+ def num_rows(self) -> int:
73
+ return self.row_count_stats.num_rows + self.cascade_row_count_stats.num_rows
74
+
75
+ @property
76
+ def num_excs(self) -> int:
77
+ return self.row_count_stats.num_excs + self.cascade_row_count_stats.num_excs
78
+
79
+ @property
80
+ def num_computed_values(self) -> int:
81
+ return self.row_count_stats.computed_values + self.cascade_row_count_stats.computed_values
82
+
83
+ def insert_to_update(self) -> 'UpdateStatus':
84
+ """
85
+ Convert the update status from an insert operation to an update operation.
86
+ This is used when an insert operation is treated as an update.
87
+ """
88
+ return UpdateStatus(
89
+ updated_cols=self.updated_cols,
90
+ cols_with_excs=self.cols_with_excs,
91
+ row_count_stats=self.row_count_stats.insert_to_update(),
92
+ cascade_row_count_stats=self.cascade_row_count_stats.insert_to_update(),
93
+ ext_row_count_stats=self.ext_row_count_stats,
94
+ )
95
+
96
+ def to_cascade(self) -> 'UpdateStatus':
97
+ """
98
+ Convert the update status to a cascade update status.
99
+ This is used when an operation cascades changes to other tables.
100
+ """
101
+ return UpdateStatus(
102
+ updated_cols=self.updated_cols,
103
+ cols_with_excs=self.cols_with_excs,
104
+ row_count_stats=RowCountStats(),
105
+ cascade_row_count_stats=self.cascade_row_count_stats + self.row_count_stats,
106
+ ext_row_count_stats=self.ext_row_count_stats,
107
+ )
108
+
109
+ def __add__(self, other: 'UpdateStatus') -> UpdateStatus:
110
+ """
111
+ Add the update status from two UpdateStatus objects together.
112
+ """
113
+ return UpdateStatus(
114
+ updated_cols=list(dict.fromkeys(self.updated_cols + other.updated_cols)),
115
+ cols_with_excs=list(dict.fromkeys(self.cols_with_excs + other.cols_with_excs)),
116
+ row_count_stats=self.row_count_stats + other.row_count_stats,
117
+ cascade_row_count_stats=self.cascade_row_count_stats + other.cascade_row_count_stats,
118
+ ext_row_count_stats=self.ext_row_count_stats + other.ext_row_count_stats,
119
+ )
120
+
121
+ @property
122
+ def insert_msg(self) -> str:
123
+ """Return a message describing the results of an insert operation."""
124
+ if self.num_excs == 0:
125
+ cols_with_excs_str = ''
126
+ else:
127
+ cols_with_excs_str = (
128
+ f' across {len(self.cols_with_excs)} column{"" if len(self.cols_with_excs) == 1 else "s"}'
129
+ )
130
+ cols_with_excs_str += f' ({", ".join(self.cols_with_excs)})'
131
+ msg = (
132
+ f'Inserted {self.num_rows} row{"" if self.num_rows == 1 else "s"} '
133
+ f'with {self.num_excs} error{"" if self.num_excs == 1 else "s"}{cols_with_excs_str}.'
134
+ )
135
+ return msg
136
+
137
+ @classmethod
138
+ def __cnt_str(cls, cnt: int, item: str) -> str:
139
+ assert cnt > 0
140
+ return f'{cnt} {item}{"" if cnt == 1 else "s"}'
141
+
142
+ def _repr_pretty_(self, p: 'RepresentationPrinter', cycle: bool) -> None:
143
+ messages = []
144
+ # Combine row count stats and cascade row count stats
145
+ stats = self.row_count_stats + self.cascade_row_count_stats
146
+ if stats.ins_rows > 0:
147
+ messages.append(f'{self.__cnt_str(stats.ins_rows, "row")} inserted')
148
+ if stats.del_rows > 0:
149
+ messages.append(f'{self.__cnt_str(stats.del_rows, "row")} deleted')
150
+ if stats.upd_rows > 0:
151
+ messages.append(f'{self.__cnt_str(stats.upd_rows, "row")} updated')
152
+ if stats.computed_values > 0:
153
+ messages.append(f'{self.__cnt_str(stats.computed_values, "value")} computed')
154
+ if stats.num_excs > 0:
155
+ messages.append(self.__cnt_str(stats.num_excs, 'exception'))
156
+ p.text(', '.join(messages) + '.' if len(messages) > 0 else 'No rows affected.')
157
+
158
+ @property
159
+ def pxt_rows_updated(self) -> int:
160
+ """
161
+ Returns the number of Pixeltable rows that were updated as a result of the operation.
162
+ """
163
+ return (self.row_count_stats + self.cascade_row_count_stats).upd_rows
164
+
165
+ @property
166
+ def external_rows_updated(self) -> int:
167
+ return self.ext_row_count_stats.upd_rows
168
+
169
+ @property
170
+ def external_rows_created(self) -> int:
171
+ return self.ext_row_count_stats.ins_rows
172
+
173
+ @property
174
+ def external_rows_deleted(self) -> int:
175
+ return self.ext_row_count_stats.del_rows
176
+
177
+ @property
178
+ def ext_num_rows(self) -> int:
179
+ return self.ext_row_count_stats.num_rows