pixeltable 0.4.2__py3-none-any.whl → 0.4.4__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of pixeltable might be problematic. Click here for more details.

Files changed (60) hide show
  1. pixeltable/__init__.py +1 -0
  2. pixeltable/__version__.py +2 -2
  3. pixeltable/catalog/__init__.py +3 -11
  4. pixeltable/catalog/catalog.py +575 -220
  5. pixeltable/catalog/column.py +22 -23
  6. pixeltable/catalog/dir.py +1 -2
  7. pixeltable/catalog/globals.py +2 -148
  8. pixeltable/catalog/insertable_table.py +15 -13
  9. pixeltable/catalog/path.py +6 -0
  10. pixeltable/catalog/schema_object.py +9 -4
  11. pixeltable/catalog/table.py +96 -85
  12. pixeltable/catalog/table_version.py +257 -174
  13. pixeltable/catalog/table_version_path.py +1 -1
  14. pixeltable/catalog/tbl_ops.py +44 -0
  15. pixeltable/catalog/update_status.py +179 -0
  16. pixeltable/catalog/view.py +50 -56
  17. pixeltable/config.py +76 -12
  18. pixeltable/dataframe.py +19 -6
  19. pixeltable/env.py +50 -4
  20. pixeltable/exec/data_row_batch.py +3 -1
  21. pixeltable/exec/exec_node.py +7 -24
  22. pixeltable/exec/expr_eval/schedulers.py +134 -7
  23. pixeltable/exec/in_memory_data_node.py +6 -7
  24. pixeltable/exprs/column_property_ref.py +21 -9
  25. pixeltable/exprs/column_ref.py +7 -2
  26. pixeltable/exprs/function_call.py +2 -2
  27. pixeltable/exprs/row_builder.py +10 -9
  28. pixeltable/exprs/rowid_ref.py +0 -4
  29. pixeltable/func/function.py +3 -3
  30. pixeltable/functions/audio.py +36 -9
  31. pixeltable/functions/gemini.py +4 -4
  32. pixeltable/functions/openai.py +1 -2
  33. pixeltable/functions/video.py +59 -16
  34. pixeltable/globals.py +109 -24
  35. pixeltable/io/__init__.py +1 -1
  36. pixeltable/io/datarows.py +2 -1
  37. pixeltable/io/external_store.py +3 -55
  38. pixeltable/io/globals.py +4 -4
  39. pixeltable/io/hf_datasets.py +10 -2
  40. pixeltable/io/label_studio.py +16 -16
  41. pixeltable/io/pandas.py +1 -0
  42. pixeltable/io/table_data_conduit.py +12 -13
  43. pixeltable/iterators/audio.py +17 -8
  44. pixeltable/iterators/image.py +5 -2
  45. pixeltable/metadata/__init__.py +1 -1
  46. pixeltable/metadata/converters/convert_39.py +125 -0
  47. pixeltable/metadata/converters/util.py +3 -0
  48. pixeltable/metadata/notes.py +1 -0
  49. pixeltable/metadata/schema.py +50 -1
  50. pixeltable/plan.py +4 -0
  51. pixeltable/share/packager.py +20 -38
  52. pixeltable/store.py +40 -51
  53. pixeltable/type_system.py +2 -2
  54. pixeltable/utils/coroutine.py +6 -23
  55. pixeltable/utils/media_store.py +50 -0
  56. {pixeltable-0.4.2.dist-info → pixeltable-0.4.4.dist-info}/METADATA +1 -1
  57. {pixeltable-0.4.2.dist-info → pixeltable-0.4.4.dist-info}/RECORD +60 -57
  58. {pixeltable-0.4.2.dist-info → pixeltable-0.4.4.dist-info}/LICENSE +0 -0
  59. {pixeltable-0.4.2.dist-info → pixeltable-0.4.4.dist-info}/WHEEL +0 -0
  60. {pixeltable-0.4.2.dist-info → pixeltable-0.4.4.dist-info}/entry_points.txt +0 -0
@@ -23,20 +23,15 @@ from pixeltable.utils.exception_handler import run_cleanup_on_exception
23
23
  from pixeltable.utils.filecache import FileCache
24
24
  from pixeltable.utils.media_store import MediaStore
25
25
 
26
+ from .tbl_ops import TableOp
27
+
26
28
  if TYPE_CHECKING:
27
29
  from pixeltable.plan import SampleClause
28
30
 
29
-
30
31
  from ..func.globals import resolve_symbol
31
32
  from .column import Column
32
- from .globals import (
33
- _POS_COLUMN_NAME,
34
- _ROWID_COLUMN_NAME,
35
- MediaValidation,
36
- RowCountStats,
37
- UpdateStatus,
38
- is_valid_identifier,
39
- )
33
+ from .globals import _POS_COLUMN_NAME, _ROWID_COLUMN_NAME, MediaValidation, is_valid_identifier
34
+ from .update_status import RowCountStats, UpdateStatus
40
35
 
41
36
  if TYPE_CHECKING:
42
37
  from pixeltable import exec, store
@@ -46,6 +41,19 @@ if TYPE_CHECKING:
46
41
  _logger = logging.getLogger('pixeltable')
47
42
 
48
43
 
44
+ @dataclasses.dataclass(frozen=True)
45
+ class TableVersionMd:
46
+ """
47
+ Complete set of md records for a specific TableVersion instance.
48
+
49
+ TODO: subsume schema.FullTableMd
50
+ """
51
+
52
+ tbl_md: schema.TableMd
53
+ version_md: schema.TableVersionMd
54
+ schema_version_md: schema.TableSchemaVersionMd
55
+
56
+
49
57
  class TableVersion:
50
58
  """
51
59
  TableVersion represents a particular version of a table/view along with its physical representation:
@@ -71,6 +79,7 @@ class TableVersion:
71
79
 
72
80
  # record metadata stored in catalog
73
81
  _tbl_md: schema.TableMd
82
+ _version_md: schema.TableVersionMd
74
83
  _schema_version_md: schema.TableSchemaVersionMd
75
84
 
76
85
  effective_version: Optional[int]
@@ -84,7 +93,7 @@ class TableVersion:
84
93
  num_iterator_cols: int
85
94
 
86
95
  # target for data operation propagation (only set for non-snapshots, and only records non-snapshot views)
87
- mutable_views: set[TableVersionHandle]
96
+ mutable_views: frozenset[TableVersionHandle]
88
97
 
89
98
  # contains complete history of columns, incl dropped ones
90
99
  cols: list[Column]
@@ -98,6 +107,8 @@ class TableVersion:
98
107
  external_stores: dict[str, pxt.io.ExternalStore]
99
108
  store_tbl: Optional['store.StoreBase']
100
109
 
110
+ is_initialized: bool # True if init() has been called
111
+
101
112
  # used by Catalog to invalidate cached instances at the end of a transaction;
102
113
  # True if this instance reflects the state of stored metadata in the context of this transaction and
103
114
  # it is the instance cached in Catalog
@@ -116,6 +127,7 @@ class TableVersion:
116
127
  self,
117
128
  id: UUID,
118
129
  tbl_md: schema.TableMd,
130
+ version_md: schema.TableVersionMd,
119
131
  effective_version: Optional[int],
120
132
  schema_version_md: schema.TableSchemaVersionMd,
121
133
  mutable_views: list[TableVersionHandle],
@@ -123,8 +135,10 @@ class TableVersion:
123
135
  base: Optional[TableVersionHandle] = None,
124
136
  ):
125
137
  self.is_validated = True # a freshly constructed instance is always valid
138
+ self.is_initialized = False
126
139
  self.id = id
127
140
  self._tbl_md = copy.deepcopy(tbl_md)
141
+ self._version_md = copy.deepcopy(version_md)
128
142
  self._schema_version_md = copy.deepcopy(schema_version_md)
129
143
  self.effective_version = effective_version
130
144
  assert not (self.is_view and base is None)
@@ -165,7 +179,7 @@ class TableVersion:
165
179
  self.num_iterator_cols = len(output_schema)
166
180
  assert tbl_md.view_md.iterator_args is not None
167
181
 
168
- self.mutable_views = set(mutable_views)
182
+ self.mutable_views = frozenset(mutable_views)
169
183
  assert self.is_mutable or len(self.mutable_views) == 0
170
184
 
171
185
  self.cols = []
@@ -181,7 +195,9 @@ class TableVersion:
181
195
  """Create a snapshot copy of this TableVersion"""
182
196
  assert not self.is_snapshot
183
197
  base = self.path.base.tbl_version if self.is_view else None
184
- return TableVersion(self.id, self.tbl_md, self.version, self.schema_version_md, mutable_views=[], base=base)
198
+ return TableVersion(
199
+ self.id, self.tbl_md, self.version_md, self.version, self.schema_version_md, mutable_views=[], base=base
200
+ )
185
201
 
186
202
  @property
187
203
  def versioned_name(self) -> str:
@@ -196,6 +212,74 @@ class TableVersion:
196
212
 
197
213
  return TableVersionHandle(self.id, self.effective_version, self)
198
214
 
215
+ @classmethod
216
+ def create_initial_md(
217
+ cls,
218
+ name: str,
219
+ cols: list[Column],
220
+ num_retained_versions: int,
221
+ comment: str,
222
+ media_validation: MediaValidation,
223
+ view_md: Optional[schema.ViewMd] = None,
224
+ ) -> TableVersionMd:
225
+ user = Env.get().user
226
+
227
+ # assign ids
228
+ cols_by_name: dict[str, Column] = {}
229
+ for pos, col in enumerate(cols):
230
+ col.id = pos
231
+ col.schema_version_add = 0
232
+ cols_by_name[col.name] = col
233
+ if col.is_computed:
234
+ col.check_value_expr()
235
+
236
+ timestamp = time.time()
237
+ column_md = cls._create_column_md(cols)
238
+ tbl_id = uuid.uuid4()
239
+ tbl_id_str = str(tbl_id)
240
+ tbl_md = schema.TableMd(
241
+ tbl_id=tbl_id_str,
242
+ name=name,
243
+ user=user,
244
+ is_replica=False,
245
+ current_version=0,
246
+ current_schema_version=0,
247
+ next_col_id=len(cols),
248
+ next_idx_id=0,
249
+ next_row_id=0,
250
+ view_sn=0,
251
+ column_md=column_md,
252
+ index_md={},
253
+ external_stores=[],
254
+ view_md=view_md,
255
+ additional_md={},
256
+ )
257
+
258
+ table_version_md = schema.TableVersionMd(
259
+ tbl_id=tbl_id_str, created_at=timestamp, version=0, schema_version=0, additional_md={}
260
+ )
261
+
262
+ schema_col_md: dict[int, schema.SchemaColumn] = {}
263
+ for pos, col in enumerate(cols):
264
+ md = schema.SchemaColumn(
265
+ pos=pos,
266
+ name=col.name,
267
+ media_validation=col._media_validation.name.lower() if col._media_validation is not None else None,
268
+ )
269
+ schema_col_md[col.id] = md
270
+
271
+ schema_version_md = schema.TableSchemaVersionMd(
272
+ tbl_id=tbl_id_str,
273
+ schema_version=0,
274
+ preceding_schema_version=None,
275
+ columns=schema_col_md,
276
+ num_retained_versions=num_retained_versions,
277
+ comment=comment,
278
+ media_validation=media_validation.name.lower(),
279
+ additional_md={},
280
+ )
281
+ return TableVersionMd(tbl_md, table_version_md, schema_version_md)
282
+
199
283
  @classmethod
200
284
  def create(
201
285
  cls,
@@ -205,8 +289,6 @@ class TableVersion:
205
289
  num_retained_versions: int,
206
290
  comment: str,
207
291
  media_validation: MediaValidation,
208
- # base_path: Optional[pxt.catalog.TableVersionPath] = None,
209
- view_md: Optional[schema.ViewMd] = None,
210
292
  ) -> tuple[UUID, Optional[TableVersion]]:
211
293
  user = Env.get().user
212
294
 
@@ -239,13 +321,19 @@ class TableVersion:
239
321
  column_md=column_md,
240
322
  index_md={},
241
323
  external_stores=[],
242
- view_md=view_md,
324
+ view_md=None,
243
325
  additional_md={},
244
326
  )
245
327
 
246
- # create schema.TableVersion
328
+ # create schema.TableVersion of the initial version
247
329
  table_version_md = schema.TableVersionMd(
248
- tbl_id=tbl_id_str, created_at=timestamp, version=0, schema_version=0, additional_md={}
330
+ tbl_id=tbl_id_str,
331
+ created_at=timestamp,
332
+ version=0,
333
+ schema_version=0,
334
+ user=user,
335
+ update_status=None,
336
+ additional_md={},
249
337
  )
250
338
 
251
339
  # create schema.TableSchemaVersion
@@ -271,47 +359,15 @@ class TableVersion:
271
359
 
272
360
  cat = pxt.catalog.Catalog.get()
273
361
 
274
- # if this is purely a snapshot (it doesn't require any additional storage for columns and it doesn't have a
275
- # predicate to apply at runtime), we don't create a physical table and simply use the base's table version path
276
- if (
277
- view_md is not None
278
- and view_md.is_snapshot
279
- and view_md.predicate is None
280
- and view_md.sample_clause is None
281
- and len(cols) == 0
282
- ):
283
- cat.store_tbl_md(
284
- tbl_id=tbl_id,
285
- dir_id=dir_id,
286
- tbl_md=table_md,
287
- version_md=table_version_md,
288
- schema_version_md=schema_version_md,
289
- )
290
- return tbl_id, None
291
-
292
- # assert (base_path is not None) == (view_md is not None)
293
- is_snapshot = view_md is not None and view_md.is_snapshot
294
- effective_version = 0 if is_snapshot else None
295
- base_path = pxt.catalog.TableVersionPath.from_md(view_md.base_versions) if view_md is not None else None
296
- base = base_path.tbl_version if base_path is not None else None
297
- tbl_version = cls(tbl_id, table_md, effective_version, schema_version_md, [], base_path=base_path, base=base)
362
+ tbl_version = cls(tbl_id, table_md, table_version_md, None, schema_version_md, [])
298
363
  # TODO: break this up, so that Catalog.create_table() registers tbl_version
299
- cat._tbl_versions[tbl_id, effective_version] = tbl_version
364
+ cat._tbl_versions[tbl_id, None] = tbl_version
300
365
  tbl_version.init()
301
366
  tbl_version.store_tbl.create()
302
- is_mutable = not is_snapshot and not table_md.is_replica
303
- if base is not None and base.get().is_mutable and is_mutable:
304
- from .table_version_handle import TableVersionHandle
305
-
306
- handle = TableVersionHandle(tbl_version.id, effective_version)
307
- assert handle not in base.get().mutable_views
308
- base.get().mutable_views.add(handle)
309
-
310
- if view_md is None or not view_md.is_snapshot:
311
- # add default indices, after creating the store table
312
- for col in tbl_version.cols_by_name.values():
313
- status = tbl_version._add_default_index(col)
314
- assert status is None or status.num_excs == 0
367
+ # add default indices, after creating the store table
368
+ for col in tbl_version.cols_by_name.values():
369
+ status = tbl_version._add_default_index(col)
370
+ assert status is None or status.num_excs == 0
315
371
 
316
372
  cat.store_tbl_md(
317
373
  tbl_id=tbl_id,
@@ -322,17 +378,50 @@ class TableVersion:
322
378
  )
323
379
  return tbl_id, tbl_version
324
380
 
381
+ def exec_op(self, op: TableOp) -> None:
382
+ if op.create_store_table_op is not None:
383
+ # don't use Catalog.begin_xact() here, to avoid accidental recursive calls to exec_op()
384
+ with Env.get().begin_xact():
385
+ self.store_tbl.create()
386
+
387
+ elif op.load_view_op is not None:
388
+ from pixeltable.catalog import Catalog
389
+ from pixeltable.plan import Planner
390
+
391
+ from .table_version_path import TableVersionPath
392
+
393
+ # clear out any remaining media files from an aborted previous attempt
394
+ MediaStore.delete(self.id)
395
+ view_path = TableVersionPath.from_dict(op.load_view_op.view_path)
396
+ plan, _ = Planner.create_view_load_plan(view_path)
397
+ _, row_counts = self.store_tbl.insert_rows(plan, v_min=self.version)
398
+ status = UpdateStatus(row_count_stats=row_counts)
399
+ Catalog.get().store_update_status(self.id, self.version, status)
400
+ _logger.debug(f'Loaded view {self.name} with {row_counts.num_rows} rows')
401
+
325
402
  @classmethod
326
403
  def create_replica(cls, md: schema.FullTableMd) -> TableVersion:
404
+ assert Env.get().in_xact
327
405
  tbl_id = UUID(md.tbl_md.tbl_id)
328
406
  _logger.info(f'Creating replica table version {tbl_id}:{md.version_md.version}.')
329
407
  view_md = md.tbl_md.view_md
330
408
  base_path = pxt.catalog.TableVersionPath.from_md(view_md.base_versions) if view_md is not None else None
331
409
  base = base_path.tbl_version if base_path is not None else None
332
410
  tbl_version = cls(
333
- tbl_id, md.tbl_md, md.version_md.version, md.schema_version_md, [], base_path=base_path, base=base
411
+ tbl_id,
412
+ md.tbl_md,
413
+ md.version_md,
414
+ md.version_md.version,
415
+ md.schema_version_md,
416
+ [],
417
+ base_path=base_path,
418
+ base=base,
334
419
  )
335
420
  cat = pxt.catalog.Catalog.get()
421
+ # We're creating a new TableVersion replica, so we should never have seen this particular
422
+ # TableVersion instance before.
423
+ assert tbl_version.effective_version is not None
424
+ assert (tbl_version.id, tbl_version.effective_version) not in cat._tbl_versions
336
425
  cat._tbl_versions[tbl_version.id, tbl_version.effective_version] = tbl_version
337
426
  tbl_version.init()
338
427
  tbl_version.store_tbl.create()
@@ -340,23 +429,18 @@ class TableVersion:
340
429
  return tbl_version
341
430
 
342
431
  def drop(self) -> None:
343
- if self.is_view and self.is_mutable:
344
- # update mutable_views
345
- # TODO: invalidate base to force reload
346
- from .table_version_handle import TableVersionHandle
432
+ # if self.is_view and self.is_mutable:
433
+ # # update mutable_views
434
+ # # TODO: invalidate base to force reload
435
+ # from .table_version_handle import TableVersionHandle
436
+ #
437
+ # assert self.base is not None
438
+ # if self.base.get().is_mutable:
439
+ # self.base.get().mutable_views.remove(TableVersionHandle.create(self))
347
440
 
348
- assert self.base is not None
349
- if self.base.get().is_mutable:
350
- self.base.get().mutable_views.remove(TableVersionHandle.create(self))
351
-
352
- # cat = Catalog.get()
353
- # delete this table and all associated data
354
441
  MediaStore.delete(self.id)
355
442
  FileCache.get().clear(tbl_id=self.id)
356
- # cat.delete_tbl_md(self.id)
357
443
  self.store_tbl.drop()
358
- # de-register table version from catalog
359
- # cat.remove_tbl_version(self)
360
444
 
361
445
  def init(self) -> None:
362
446
  """
@@ -368,11 +452,11 @@ class TableVersion:
368
452
  cat = Catalog.get()
369
453
  assert (self.id, self.effective_version) in cat._tbl_versions
370
454
  self._init_schema()
371
- if not self.is_snapshot:
455
+ if self.is_mutable:
372
456
  cat.record_column_dependencies(self)
373
-
374
457
  # init external stores; this needs to happen after the schema is created
375
458
  self._init_external_stores()
459
+ self.is_initialized = True
376
460
 
377
461
  def _init_schema(self) -> None:
378
462
  # create columns first, so the indices can reference them
@@ -448,16 +532,23 @@ class TableVersion:
448
532
  # instantiate index object
449
533
  cls_name = md.class_fqn.rsplit('.', 1)[-1]
450
534
  cls = getattr(index_module, cls_name)
451
- idx_col = self.path.get_column_by_id(UUID(md.indexed_col_tbl_id), md.indexed_col_id)
535
+ idx_col: Column
536
+ if md.indexed_col_tbl_id == str(self.id):
537
+ # this is a reference to one of our columns: avoid TVP.get_column_by_id() here, because we're not fully
538
+ # initialized yet
539
+ idx_col = self.cols_by_id[md.indexed_col_id]
540
+ else:
541
+ assert self.path.base is not None
542
+ idx_col = self.path.base.get_column_by_id(UUID(md.indexed_col_tbl_id), md.indexed_col_id)
452
543
  idx = cls.from_dict(idx_col, md.init_args)
453
544
 
454
545
  # fix up the sa column type of the index value and undo columns
455
546
  val_col = self.cols_by_id[md.index_val_col_id]
456
547
  val_col.sa_col_type = idx.index_sa_type()
457
- val_col._records_errors = False
548
+ val_col._stores_cellmd = False
458
549
  undo_col = self.cols_by_id[md.index_val_undo_col_id]
459
550
  undo_col.sa_col_type = idx.index_sa_type()
460
- undo_col._records_errors = False
551
+ undo_col._stores_cellmd = False
461
552
  idx_info = self.IndexInfo(id=md.id, name=md.name, idx=idx, col=idx_col, val_col=val_col, undo_col=undo_col)
462
553
  self.idxs_by_name[md.name] = idx_info
463
554
 
@@ -473,31 +564,15 @@ class TableVersion:
473
564
  else:
474
565
  self.store_tbl = StoreTable(self)
475
566
 
476
- def _write_md(self, new_version: bool, new_version_ts: float, new_schema_version: bool) -> None:
477
- """Writes table metadata to the database.
478
-
479
- Args:
480
- timestamp: timestamp of the change
481
- update_tbl_version: if `True`, will also write `TableVersion` metadata
482
- preceding_schema_version: if specified, will also write `TableSchemaVersion` metadata, recording the
483
- specified preceding schema version
484
- """
567
+ def _write_md(self, new_version: bool, new_schema_version: bool) -> None:
485
568
  from pixeltable.catalog import Catalog
486
569
 
487
- version_md: Optional[schema.TableVersionMd] = (
488
- schema.TableVersionMd(
489
- tbl_id=str(self.id),
490
- created_at=new_version_ts,
491
- version=self.version,
492
- schema_version=self.schema_version,
493
- additional_md={},
494
- )
495
- if new_version
496
- else None
497
- )
498
-
499
570
  Catalog.get().store_tbl_md(
500
- self.id, None, self._tbl_md, version_md, self._schema_version_md if new_schema_version else None
571
+ self.id,
572
+ None,
573
+ self._tbl_md,
574
+ self._version_md if new_version else None,
575
+ self._schema_version_md if new_schema_version else None,
501
576
  )
502
577
 
503
578
  def _store_idx_name(self, idx_id: int) -> str:
@@ -507,10 +582,10 @@ class TableVersion:
507
582
  def add_index(self, col: Column, idx_name: Optional[str], idx: index.IndexBase) -> UpdateStatus:
508
583
  # we're creating a new schema version
509
584
  self.version += 1
510
- self.preceding_schema_version = self.schema_version
585
+ self.created_at = time.time()
511
586
  self.schema_version = self.version
512
587
  status = self._add_index(col, idx_name, idx)
513
- self._write_md(new_version=True, new_version_ts=time.time(), new_schema_version=True)
588
+ self._write_md(new_version=True, new_schema_version=True)
514
589
  _logger.info(f'Added index {idx_name} on column {col.name} to table {self.name}')
515
590
  return status
516
591
 
@@ -553,7 +628,7 @@ class TableVersion:
553
628
  stored=True,
554
629
  schema_version_add=self.schema_version,
555
630
  schema_version_drop=None,
556
- records_errors=idx.records_value_errors(),
631
+ stores_cellmd=idx.records_value_errors(),
557
632
  )
558
633
  val_col.tbl = self
559
634
  val_col.col_type = val_col.col_type.copy(nullable=True)
@@ -567,7 +642,7 @@ class TableVersion:
567
642
  stored=True,
568
643
  schema_version_add=self.schema_version,
569
644
  schema_version_drop=None,
570
- records_errors=False,
645
+ stores_cellmd=False,
571
646
  )
572
647
  undo_col.tbl = self
573
648
  undo_col.col_type = undo_col.col_type.copy(nullable=True)
@@ -626,12 +701,12 @@ class TableVersion:
626
701
  return status
627
702
 
628
703
  def drop_index(self, idx_id: int) -> None:
629
- assert not self.is_snapshot
704
+ assert self.is_mutable
630
705
  assert idx_id in self._tbl_md.index_md
631
706
 
632
707
  # we're creating a new schema version
633
708
  self.version += 1
634
- self.preceding_schema_version = self.schema_version
709
+ self.created_at = time.time()
635
710
  self.schema_version = self.version
636
711
  idx_md = self._tbl_md.index_md[idx_id]
637
712
  idx_md.schema_version_drop = self.schema_version
@@ -644,14 +719,14 @@ class TableVersion:
644
719
  del self._tbl_md.index_md[idx_id]
645
720
 
646
721
  self._drop_columns([idx_info.val_col, idx_info.undo_col])
647
- self._write_md(new_version=True, new_version_ts=time.time(), new_schema_version=True)
722
+ self._write_md(new_version=True, new_schema_version=True)
648
723
  _logger.info(f'Dropped index {idx_md.name} on table {self.name}')
649
724
 
650
725
  def add_columns(
651
726
  self, cols: Iterable[Column], print_stats: bool, on_error: Literal['abort', 'ignore']
652
727
  ) -> UpdateStatus:
653
728
  """Adds columns to the table."""
654
- assert not self.is_snapshot
729
+ assert self.is_mutable
655
730
  assert all(is_valid_identifier(col.name) for col in cols if col.name is not None)
656
731
  assert all(col.stored is not None for col in cols)
657
732
  assert all(col.name not in self.cols_by_name for col in cols if col.name is not None)
@@ -662,7 +737,7 @@ class TableVersion:
662
737
 
663
738
  # we're creating a new schema version
664
739
  self.version += 1
665
- self.preceding_schema_version = self.schema_version
740
+ self.created_at = time.time()
666
741
  self.schema_version = self.version
667
742
  index_cols: dict[Column, tuple[index.BtreeIndex, Column, Column]] = {}
668
743
  all_cols: list[Column] = []
@@ -679,7 +754,8 @@ class TableVersion:
679
754
  # Create indices and their md records
680
755
  for col, (idx, val_col, undo_col) in index_cols.items():
681
756
  self._create_index(col, val_col, undo_col, idx_name=None, idx=idx)
682
- self._write_md(new_version=True, new_version_ts=time.time(), new_schema_version=True)
757
+ self.update_status = status
758
+ self._write_md(new_version=True, new_schema_version=True)
683
759
  _logger.info(f'Added columns {[col.name for col in cols]} to table {self.name}, new version: {self.version}')
684
760
 
685
761
  msg = (
@@ -789,11 +865,11 @@ class TableVersion:
789
865
  def drop_column(self, col: Column) -> None:
790
866
  """Drop a column from the table."""
791
867
 
792
- assert not self.is_snapshot
868
+ assert self.is_mutable
793
869
 
794
870
  # we're creating a new schema version
795
871
  self.version += 1
796
- self.preceding_schema_version = self.schema_version
872
+ self.created_at = time.time()
797
873
  self.schema_version = self.version
798
874
 
799
875
  # drop this column and all dependent index columns and indices
@@ -813,12 +889,12 @@ class TableVersion:
813
889
  del self.idxs_by_name[idx_name]
814
890
 
815
891
  self._drop_columns(dropped_cols)
816
- self._write_md(new_version=True, new_version_ts=time.time(), new_schema_version=True)
892
+ self._write_md(new_version=True, new_schema_version=True)
817
893
  _logger.info(f'Dropped column {col.name} from table {self.name}, new version: {self.version}')
818
894
 
819
895
  def _drop_columns(self, cols: Iterable[Column]) -> None:
820
896
  """Mark columns as dropped"""
821
- assert not self.is_snapshot
897
+ assert self.is_mutable
822
898
 
823
899
  for col in cols:
824
900
  col.schema_version_drop = self.schema_version
@@ -841,7 +917,7 @@ class TableVersion:
841
917
 
842
918
  def rename_column(self, old_name: str, new_name: str) -> None:
843
919
  """Rename a column."""
844
- assert not self.is_snapshot
920
+ assert self.is_mutable
845
921
  if old_name not in self.cols_by_name:
846
922
  raise excs.Error(f'Unknown column: {old_name}')
847
923
  if not is_valid_identifier(new_name):
@@ -856,10 +932,10 @@ class TableVersion:
856
932
 
857
933
  # we're creating a new schema version
858
934
  self.version += 1
859
- self.preceding_schema_version = self.schema_version
935
+ self.created_at = time.time()
860
936
  self.schema_version = self.version
861
937
 
862
- self._write_md(new_version=True, new_version_ts=time.time(), new_schema_version=True)
938
+ self._write_md(new_version=True, new_schema_version=True)
863
939
  _logger.info(f'Renamed column {old_name} to {new_name} in table {self.name}, new version: {self.version}')
864
940
 
865
941
  def set_comment(self, new_comment: Optional[str]) -> None:
@@ -878,9 +954,9 @@ class TableVersion:
878
954
  def _create_schema_version(self) -> None:
879
955
  # we're creating a new schema version
880
956
  self.version += 1
881
- self.preceding_schema_version = self.schema_version
957
+ self.created_at = time.time()
882
958
  self.schema_version = self.version
883
- self._write_md(new_version=True, new_version_ts=time.time(), new_schema_version=True)
959
+ self._write_md(new_version=True, new_schema_version=True)
884
960
  _logger.info(f'[{self.name}] Updating table schema to version: {self.version}')
885
961
 
886
962
  def insert(
@@ -899,6 +975,7 @@ class TableVersion:
899
975
  assert (rows is None) != (df is None) # Exactly one must be specified
900
976
  if rows is not None:
901
977
  plan = Planner.create_insert_plan(self, rows, ignore_errors=not fail_on_exception)
978
+
902
979
  else:
903
980
  plan = Planner.create_df_insert_plan(self, df, ignore_errors=not fail_on_exception)
904
981
 
@@ -909,7 +986,10 @@ class TableVersion:
909
986
  self.next_row_id += 1
910
987
  yield rowid
911
988
 
912
- return self._insert(plan, time.time(), print_stats=print_stats, rowids=rowids(), abort_on_exc=fail_on_exception)
989
+ result = self._insert(
990
+ plan, time.time(), print_stats=print_stats, rowids=rowids(), abort_on_exc=fail_on_exception
991
+ )
992
+ return result
913
993
 
914
994
  def _insert(
915
995
  self,
@@ -923,22 +1003,28 @@ class TableVersion:
923
1003
  """Insert rows produced by exec_plan and propagate to views"""
924
1004
  # we're creating a new version
925
1005
  self.version += 1
926
- cols_with_excs, result = self.store_tbl.insert_rows(
1006
+ self.created_at = timestamp
1007
+ cols_with_excs, row_counts = self.store_tbl.insert_rows(
927
1008
  exec_plan, v_min=self.version, rowids=rowids, abort_on_exc=abort_on_exc
928
1009
  )
929
- result += UpdateStatus(cols_with_excs=[f'{self.name}.{self.cols_by_id[cid].name}' for cid in cols_with_excs])
930
- self._write_md(new_version=True, new_version_ts=timestamp, new_schema_version=False)
1010
+ result = UpdateStatus(
1011
+ cols_with_excs=[f'{self.name}.{self.cols_by_id[cid].name}' for cid in cols_with_excs],
1012
+ row_count_stats=row_counts,
1013
+ )
931
1014
 
932
1015
  # update views
933
1016
  for view in self.mutable_views:
934
1017
  from pixeltable.plan import Planner
935
1018
 
936
- plan, _ = Planner.create_view_load_plan(view.get().path, propagates_insert=True)
937
- status = view.get()._insert(plan, timestamp, print_stats=print_stats)
1019
+ plan2, _ = Planner.create_view_load_plan(view.get().path, propagates_insert=True)
1020
+ status = view.get()._insert(plan2, timestamp, print_stats=print_stats)
938
1021
  result += status.to_cascade()
939
1022
 
1023
+ # Use the net status after all propagations
1024
+ self.update_status = result
1025
+ self._write_md(new_version=True, new_schema_version=False)
940
1026
  if print_stats:
941
- plan.ctx.profile.print(num_rows=result.num_rows) # This is the net rows after all propagations
1027
+ exec_plan.ctx.profile.print(num_rows=result.num_rows)
942
1028
  _logger.info(f'TableVersion {self.name}: new version {self.version}')
943
1029
  return result
944
1030
 
@@ -952,8 +1038,7 @@ class TableVersion:
952
1038
  cascade: if True, also update all computed columns that transitively depend on the updated columns,
953
1039
  including within views.
954
1040
  """
955
- if self.is_snapshot:
956
- raise excs.Error('Cannot update a snapshot')
1041
+ assert self.is_mutable
957
1042
 
958
1043
  from pixeltable.plan import Planner
959
1044
 
@@ -1067,7 +1152,7 @@ class TableVersion:
1067
1152
  return update_targets
1068
1153
 
1069
1154
  def recompute_columns(self, col_names: list[str], errors_only: bool = False, cascade: bool = True) -> UpdateStatus:
1070
- assert not self.is_snapshot
1155
+ assert self.is_mutable
1071
1156
  assert all(name in self.cols_by_name for name in col_names)
1072
1157
  assert len(col_names) > 0
1073
1158
  assert len(col_names) == 1 or not errors_only
@@ -1108,20 +1193,21 @@ class TableVersion:
1108
1193
  cascade: bool,
1109
1194
  show_progress: bool = True,
1110
1195
  ) -> UpdateStatus:
1111
- if plan is not None:
1112
- # we're creating a new version
1196
+ result = UpdateStatus()
1197
+ create_new_table_version = plan is not None
1198
+ if create_new_table_version:
1113
1199
  self.version += 1
1114
- cols_with_excs, status = self.store_tbl.insert_rows(plan, v_min=self.version, show_progress=show_progress)
1115
- result = status.insert_to_update()
1200
+ self.created_at = timestamp
1201
+ cols_with_excs, row_counts = self.store_tbl.insert_rows(
1202
+ plan, v_min=self.version, show_progress=show_progress
1203
+ )
1116
1204
  result += UpdateStatus(
1117
- cols_with_excs=[f'{self.name}.{self.cols_by_id[cid].name}' for cid in cols_with_excs]
1205
+ row_count_stats=row_counts.insert_to_update(),
1206
+ cols_with_excs=[f'{self.name}.{self.cols_by_id[cid].name}' for cid in cols_with_excs],
1118
1207
  )
1119
1208
  self.store_tbl.delete_rows(
1120
1209
  self.version, base_versions=base_versions, match_on_vmin=True, where_clause=where_clause
1121
1210
  )
1122
- self._write_md(new_version=True, new_version_ts=timestamp, new_schema_version=False)
1123
- else:
1124
- result = UpdateStatus()
1125
1211
 
1126
1212
  if cascade:
1127
1213
  base_versions = [None if plan is None else self.version, *base_versions] # don't update in place
@@ -1137,7 +1223,9 @@ class TableVersion:
1137
1223
  plan, None, recomputed_view_cols, base_versions=base_versions, timestamp=timestamp, cascade=True
1138
1224
  )
1139
1225
  result += status.to_cascade()
1140
-
1226
+ if create_new_table_version:
1227
+ self.update_status = result
1228
+ self._write_md(new_version=True, new_schema_version=False)
1141
1229
  return result
1142
1230
 
1143
1231
  def delete(self, where: Optional[exprs.Expr] = None) -> UpdateStatus:
@@ -1191,17 +1279,21 @@ class TableVersion:
1191
1279
  if del_rows > 0:
1192
1280
  # we're creating a new version
1193
1281
  self.version += 1
1194
- self._write_md(new_version=True, new_version_ts=timestamp, new_schema_version=False)
1282
+ self.created_at = timestamp
1195
1283
  for view in self.mutable_views:
1196
1284
  status = view.get().propagate_delete(
1197
1285
  where=None, base_versions=[self.version, *base_versions], timestamp=timestamp
1198
1286
  )
1199
1287
  result += status.to_cascade()
1288
+ self.update_status = result
1289
+
1290
+ if del_rows > 0:
1291
+ self._write_md(new_version=True, new_schema_version=False)
1200
1292
  return result
1201
1293
 
1202
1294
  def revert(self) -> None:
1203
1295
  """Reverts the table to the previous version."""
1204
- assert not self.is_snapshot
1296
+ assert self.is_mutable
1205
1297
  if self.version == 0:
1206
1298
  raise excs.Error('Cannot revert version 0')
1207
1299
  self._revert()
@@ -1295,7 +1387,7 @@ class TableVersion:
1295
1387
  )
1296
1388
 
1297
1389
  self.version -= 1
1298
- self._write_md(new_version=False, new_version_ts=0, new_schema_version=False)
1390
+ self._write_md(new_version=False, new_schema_version=False)
1299
1391
 
1300
1392
  # propagate to views
1301
1393
  views_str = ', '.join([str(v.id) for v in self.mutable_views])
@@ -1317,28 +1409,32 @@ class TableVersion:
1317
1409
 
1318
1410
  def link_external_store(self, store: pxt.io.ExternalStore) -> None:
1319
1411
  self.version += 1
1320
- self.preceding_schema_version = self.schema_version
1412
+ self.created_at = time.time()
1321
1413
  self.schema_version = self.version
1322
1414
 
1323
1415
  self.external_stores[store.name] = store
1324
1416
  self._tbl_md.external_stores.append(
1325
1417
  {'class': f'{type(store).__module__}.{type(store).__qualname__}', 'md': store.as_dict()}
1326
1418
  )
1327
- self._write_md(new_version=True, new_version_ts=time.time(), new_schema_version=True)
1419
+ self._write_md(new_version=True, new_schema_version=True)
1328
1420
 
1329
1421
  def unlink_external_store(self, store: pxt.io.ExternalStore) -> None:
1330
1422
  del self.external_stores[store.name]
1331
1423
  self.version += 1
1332
- self.preceding_schema_version = self.schema_version
1424
+ self.created_at = time.time()
1333
1425
  self.schema_version = self.version
1334
1426
  idx = next(i for i, store_md in enumerate(self._tbl_md.external_stores) if store_md['md']['name'] == store.name)
1335
1427
  self._tbl_md.external_stores.pop(idx)
1336
- self._write_md(new_version=True, new_version_ts=time.time(), new_schema_version=True)
1428
+ self._write_md(new_version=True, new_schema_version=True)
1337
1429
 
1338
1430
  @property
1339
1431
  def tbl_md(self) -> schema.TableMd:
1340
1432
  return self._tbl_md
1341
1433
 
1434
+ @property
1435
+ def version_md(self) -> schema.TableVersionMd:
1436
+ return self._version_md
1437
+
1342
1438
  @property
1343
1439
  def schema_version_md(self) -> schema.TableSchemaVersionMd:
1344
1440
  return self._schema_version_md
@@ -1386,6 +1482,16 @@ class TableVersion:
1386
1482
  def version(self, version: int) -> None:
1387
1483
  assert self.effective_version is None
1388
1484
  self._tbl_md.current_version = version
1485
+ self._version_md.version = version
1486
+
1487
+ @property
1488
+ def created_at(self) -> float:
1489
+ return self._version_md.created_at
1490
+
1491
+ @created_at.setter
1492
+ def created_at(self, ts: float) -> None:
1493
+ assert self.effective_version is None
1494
+ self._version_md.created_at = ts
1389
1495
 
1390
1496
  @property
1391
1497
  def schema_version(self) -> int:
@@ -1395,16 +1501,22 @@ class TableVersion:
1395
1501
  def schema_version(self, version: int) -> None:
1396
1502
  assert self.effective_version is None
1397
1503
  self._tbl_md.current_schema_version = version
1504
+ self._version_md.schema_version = version
1505
+ self._schema_version_md.preceding_schema_version = self._schema_version_md.schema_version
1398
1506
  self._schema_version_md.schema_version = version
1399
1507
 
1400
1508
  @property
1401
1509
  def preceding_schema_version(self) -> int:
1402
1510
  return self._schema_version_md.preceding_schema_version
1403
1511
 
1404
- @preceding_schema_version.setter
1405
- def preceding_schema_version(self, v: int) -> None:
1512
+ @property
1513
+ def update_status(self) -> Optional[UpdateStatus]:
1514
+ return self._version_md.update_status
1515
+
1516
+ @update_status.setter
1517
+ def update_status(self, status: UpdateStatus) -> None:
1406
1518
  assert self.effective_version is None
1407
- self._schema_version_md.preceding_schema_version = v
1519
+ self._version_md.update_status = status
1408
1520
 
1409
1521
  @property
1410
1522
  def media_validation(self) -> MediaValidation:
@@ -1460,7 +1572,7 @@ class TableVersion:
1460
1572
  @property
1461
1573
  def is_insertable(self) -> bool:
1462
1574
  """Returns True if this corresponds to an InsertableTable"""
1463
- return not self.is_snapshot and not self.is_view
1575
+ return self.is_mutable and not self.is_view
1464
1576
 
1465
1577
  def is_iterator_column(self, col: Column) -> bool:
1466
1578
  """Returns True if col is produced by an iterator"""
@@ -1538,35 +1650,6 @@ class TableVersion:
1538
1650
  {'class': f'{type(store).__module__}.{type(store).__qualname__}', 'md': store.as_dict()} for store in stores
1539
1651
  ]
1540
1652
 
1541
- def _create_version_md(self, timestamp: float) -> schema.TableVersionMd:
1542
- return schema.TableVersionMd(
1543
- tbl_id=str(self.id),
1544
- created_at=timestamp,
1545
- version=self.version,
1546
- schema_version=self.schema_version,
1547
- additional_md={},
1548
- )
1549
-
1550
- def _create_schema_version_md(self, preceding_schema_version: int) -> schema.TableSchemaVersionMd:
1551
- column_md: dict[int, schema.SchemaColumn] = {}
1552
- for pos, col in enumerate(self.cols_by_name.values()):
1553
- column_md[col.id] = schema.SchemaColumn(
1554
- pos=pos,
1555
- name=col.name,
1556
- media_validation=col._media_validation.name.lower() if col._media_validation is not None else None,
1557
- )
1558
- # preceding_schema_version to be set by the caller
1559
- return schema.TableSchemaVersionMd(
1560
- tbl_id=str(self.id),
1561
- schema_version=self.schema_version,
1562
- preceding_schema_version=preceding_schema_version,
1563
- columns=column_md,
1564
- num_retained_versions=self.num_retained_versions,
1565
- comment=self.comment,
1566
- media_validation=self.media_validation.name.lower(),
1567
- additional_md={},
1568
- )
1569
-
1570
1653
  def as_dict(self) -> dict:
1571
1654
  return {'id': str(self.id), 'effective_version': self.effective_version}
1572
1655