pixeltable 0.3.15__py3-none-any.whl → 0.4.0rc1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of pixeltable might be problematic. Click here for more details.

Files changed (48) hide show
  1. pixeltable/__version__.py +2 -2
  2. pixeltable/catalog/catalog.py +292 -105
  3. pixeltable/catalog/column.py +10 -8
  4. pixeltable/catalog/dir.py +1 -2
  5. pixeltable/catalog/insertable_table.py +25 -20
  6. pixeltable/catalog/schema_object.py +3 -6
  7. pixeltable/catalog/table.py +245 -189
  8. pixeltable/catalog/table_version.py +317 -201
  9. pixeltable/catalog/table_version_handle.py +15 -2
  10. pixeltable/catalog/table_version_path.py +60 -14
  11. pixeltable/catalog/view.py +14 -5
  12. pixeltable/dataframe.py +11 -9
  13. pixeltable/env.py +2 -4
  14. pixeltable/exec/in_memory_data_node.py +1 -1
  15. pixeltable/exec/sql_node.py +20 -11
  16. pixeltable/exprs/column_property_ref.py +15 -6
  17. pixeltable/exprs/column_ref.py +32 -11
  18. pixeltable/exprs/comparison.py +1 -1
  19. pixeltable/exprs/row_builder.py +4 -6
  20. pixeltable/exprs/rowid_ref.py +8 -0
  21. pixeltable/exprs/similarity_expr.py +1 -0
  22. pixeltable/func/query_template_function.py +1 -1
  23. pixeltable/functions/string.py +212 -58
  24. pixeltable/globals.py +7 -4
  25. pixeltable/index/base.py +5 -0
  26. pixeltable/index/btree.py +5 -0
  27. pixeltable/index/embedding_index.py +5 -0
  28. pixeltable/io/external_store.py +8 -29
  29. pixeltable/io/label_studio.py +1 -1
  30. pixeltable/io/parquet.py +2 -2
  31. pixeltable/io/table_data_conduit.py +0 -31
  32. pixeltable/metadata/__init__.py +1 -1
  33. pixeltable/metadata/converters/convert_13.py +2 -2
  34. pixeltable/metadata/converters/convert_30.py +6 -11
  35. pixeltable/metadata/converters/convert_35.py +9 -0
  36. pixeltable/metadata/converters/util.py +3 -9
  37. pixeltable/metadata/notes.py +1 -0
  38. pixeltable/metadata/schema.py +5 -1
  39. pixeltable/plan.py +4 -4
  40. pixeltable/share/packager.py +24 -9
  41. pixeltable/share/publish.py +2 -2
  42. pixeltable/store.py +19 -13
  43. pixeltable/utils/dbms.py +1 -1
  44. {pixeltable-0.3.15.dist-info → pixeltable-0.4.0rc1.dist-info}/METADATA +1 -1
  45. {pixeltable-0.3.15.dist-info → pixeltable-0.4.0rc1.dist-info}/RECORD +48 -47
  46. {pixeltable-0.3.15.dist-info → pixeltable-0.4.0rc1.dist-info}/LICENSE +0 -0
  47. {pixeltable-0.3.15.dist-info → pixeltable-0.4.0rc1.dist-info}/WHEEL +0 -0
  48. {pixeltable-0.3.15.dist-info → pixeltable-0.4.0rc1.dist-info}/entry_points.txt +0 -0
@@ -1,5 +1,6 @@
1
1
  from __future__ import annotations
2
2
 
3
+ import copy
3
4
  import dataclasses
4
5
  import importlib
5
6
  import logging
@@ -50,43 +51,44 @@ class TableVersion:
50
51
 
51
52
  Instances of TableVersion should not be stored as member variables (ie, used across transaction boundaries).
52
53
  Use a TableVersionHandle instead.
54
+
55
+ Only TableVersion and Catalog interact directly with stored metadata. Everything else needs to go through these
56
+ two classes.
53
57
  """
54
58
 
55
59
  id: UUID
56
- name: str
57
- user: Optional[str]
60
+
61
+ # record metadata stored in catalog
62
+ _tbl_md: schema.TableMd
63
+ _schema_version_md: schema.TableSchemaVersionMd
64
+
58
65
  effective_version: Optional[int]
59
- is_replica: bool
60
- version: int
61
- comment: str
62
- media_validation: MediaValidation
63
- num_retained_versions: int
64
- schema_version: int
65
- view_md: Optional[schema.ViewMd]
66
66
  path: Optional[pxt.catalog.TableVersionPath] # only set for live tables; needed to resolve computed cols
67
67
  base: Optional[TableVersionHandle] # only set for views
68
- next_col_id: int
69
- next_idx_id: int
70
- next_rowid: int
71
68
  predicate: Optional[exprs.Expr]
72
- mutable_views: list[TableVersionHandle] # target for data operation propagation (only set for live tables)
73
69
  iterator_cls: Optional[type[ComponentIterator]]
74
70
  iterator_args: Optional[exprs.InlineDict]
75
71
  num_iterator_cols: int
76
72
 
73
+ # target for data operation propagation (only set for non-snapshots, and only records non-snapshot views)
74
+ mutable_views: set[TableVersionHandle]
75
+
77
76
  # contains complete history of columns, incl dropped ones
78
77
  cols: list[Column]
79
78
  # contains only user-facing (named) columns visible in this version
80
79
  cols_by_name: dict[str, Column]
81
80
  # contains only columns visible in this version, both system and user
82
81
  cols_by_id: dict[int, Column]
83
- # needed for _create_tbl_md()
84
- idx_md: dict[int, schema.IndexMd]
85
82
  # contains only actively maintained indices
86
83
  idxs_by_name: dict[str, TableVersion.IndexInfo]
87
84
 
88
85
  external_stores: dict[str, pxt.io.ExternalStore]
89
- store_tbl: 'store.StoreBase'
86
+ store_tbl: Optional['store.StoreBase']
87
+
88
+ # used by Catalog to invalidate cached instances at the end of a transaction;
89
+ # True if this instance reflects the state of stored metadata in the context of this transaction and
90
+ # it is the instance cached in Catalog
91
+ is_validated: bool
90
92
 
91
93
  @dataclasses.dataclass
92
94
  class IndexInfo:
@@ -106,21 +108,15 @@ class TableVersion:
106
108
  mutable_views: list[TableVersionHandle],
107
109
  base_path: Optional[pxt.catalog.TableVersionPath] = None,
108
110
  base: Optional[TableVersionHandle] = None,
109
- # base_store_tbl: Optional['store.StoreBase'] = None,
110
111
  ):
112
+ self.is_validated = True # a freshly constructed instance is always valid
111
113
  self.id = id
112
- self.name = tbl_md.name
113
- self.user = tbl_md.user
114
+ self._tbl_md = copy.deepcopy(tbl_md)
115
+ self._schema_version_md = copy.deepcopy(schema_version_md)
114
116
  self.effective_version = effective_version
115
- self.version = tbl_md.current_version if effective_version is None else effective_version
116
- self.is_replica = tbl_md.is_replica
117
- self.comment = schema_version_md.comment
118
- self.num_retained_versions = schema_version_md.num_retained_versions
119
- self.schema_version = schema_version_md.schema_version
120
- self.view_md = tbl_md.view_md # save this as-is, it's needed for _create_md()
121
- self.media_validation = MediaValidation[schema_version_md.media_validation.upper()]
122
117
  assert not (self.is_view and base is None)
123
118
  self.base = base
119
+ self.store_tbl = None
124
120
 
125
121
  # mutable tables need their TableVersionPath for expr eval during updates
126
122
  from .table_version_handle import TableVersionHandle
@@ -134,22 +130,11 @@ class TableVersion:
134
130
  assert base_path is not None
135
131
  self.path = TableVersionPath(self_handle, base=base_path)
136
132
 
137
- if self.is_snapshot:
138
- self.next_col_id = -1
139
- self.next_idx_id = -1 # TODO: can snapshots have separate indices?
140
- self.next_rowid = -1
141
- else:
142
- assert tbl_md.current_version == self.version
143
- self.next_col_id = tbl_md.next_col_id
144
- self.next_idx_id = tbl_md.next_idx_id
145
- self.next_rowid = tbl_md.next_row_id
146
-
147
133
  # view-specific initialization
148
134
  from pixeltable import exprs
149
135
 
150
136
  predicate_dict = None if self.view_md is None or self.view_md.predicate is None else self.view_md.predicate
151
137
  self.predicate = exprs.Expr.from_dict(predicate_dict) if predicate_dict is not None else None
152
- self.mutable_views = mutable_views
153
138
 
154
139
  # component view-specific initialization
155
140
  self.iterator_cls = None
@@ -164,22 +149,26 @@ class TableVersion:
164
149
  self.num_iterator_cols = len(output_schema)
165
150
  assert tbl_md.view_md.iterator_args is not None
166
151
 
167
- # register this table version now so that it's available when we're re-creating value exprs
168
- cat = pxt.catalog.Catalog.get()
169
- cat.add_tbl_version(self)
152
+ self.mutable_views = set(mutable_views)
153
+ assert self.is_mutable or len(self.mutable_views) == 0
170
154
 
171
- # init schema after we determined whether we're a component view, and before we create the store table
172
155
  self.cols = []
173
156
  self.cols_by_name = {}
174
157
  self.cols_by_id = {}
175
- self.idx_md = tbl_md.index_md
176
158
  self.idxs_by_name = {}
177
159
  self.external_stores = {}
178
160
 
179
- self._init_schema(tbl_md, schema_version_md)
161
+ def init(self) -> None:
162
+ """
163
+ Initialize schema-related in-memory metadata separately, now that this TableVersion instance is visible
164
+ in Catalog.
165
+ """
166
+ from .catalog import Catalog
180
167
 
181
- # Init external stores (this needs to happen after the schema is created)
182
- self._init_external_stores(tbl_md)
168
+ assert (self.id, self.effective_version) in Catalog.get()._tbl_versions
169
+ self._init_schema()
170
+ # init external stores; this needs to happen after the schema is created
171
+ self._init_external_stores()
183
172
 
184
173
  def __hash__(self) -> int:
185
174
  return hash(self.id)
@@ -188,19 +177,7 @@ class TableVersion:
188
177
  """Create a snapshot copy of this TableVersion"""
189
178
  assert not self.is_snapshot
190
179
  base = self.path.base.tbl_version if self.is_view else None
191
- return TableVersion(
192
- self.id,
193
- self._create_tbl_md(),
194
- self.version,
195
- self._create_schema_version_md(preceding_schema_version=0), # preceding_schema_version: dummy value
196
- mutable_views=[],
197
- base=base,
198
- )
199
-
200
- def create_handle(self) -> TableVersionHandle:
201
- from .table_version_handle import TableVersionHandle
202
-
203
- return TableVersionHandle(self.id, self.effective_version, tbl_version=self)
180
+ return TableVersion(self.id, self.tbl_md, self.version, self.schema_version_md, mutable_views=[], base=base)
204
181
 
205
182
  @property
206
183
  def versioned_name(self) -> str:
@@ -306,8 +283,19 @@ class TableVersion:
306
283
  tbl_version = cls(
307
284
  tbl_record.id, table_md, effective_version, schema_version_md, [], base_path=base_path, base=base
308
285
  )
309
-
286
+ # TODO: break this up, so that Catalog.create_table() registers tbl_version
287
+ cat = pxt.catalog.Catalog.get()
288
+ cat._tbl_versions[tbl_record.id, effective_version] = tbl_version
289
+ tbl_version.init()
310
290
  tbl_version.store_tbl.create()
291
+ is_mutable = not is_snapshot and not table_md.is_replica
292
+ if base is not None and base.get().is_mutable and is_mutable:
293
+ from .table_version_handle import TableVersionHandle
294
+
295
+ handle = TableVersionHandle(tbl_version.id, effective_version)
296
+ assert handle not in base.get().mutable_views
297
+ base.get().mutable_views.add(handle)
298
+
311
299
  if view_md is None or not view_md.is_snapshot:
312
300
  # add default indices, after creating the store table
313
301
  for col in tbl_version.cols_by_name.values():
@@ -315,7 +303,7 @@ class TableVersion:
315
303
  assert status is None or status.num_excs == 0
316
304
 
317
305
  # we re-create the tbl_record here, now that we have new index metadata
318
- tbl_record = schema.Table(id=tbl_id, dir_id=dir_id, md=dataclasses.asdict(tbl_version._create_tbl_md()))
306
+ tbl_record = schema.Table(id=tbl_id, dir_id=dir_id, md=dataclasses.asdict(tbl_version.tbl_md))
319
307
  session.add(tbl_record)
320
308
  session.add(tbl_version_record)
321
309
  session.add(schema_version_record)
@@ -331,6 +319,9 @@ class TableVersion:
331
319
  tbl_version = cls(
332
320
  tbl_id, md.tbl_md, md.version_md.version, md.schema_version_md, [], base_path=base_path, base=base
333
321
  )
322
+ cat = pxt.catalog.Catalog.get()
323
+ cat._tbl_versions[tbl_version.id, tbl_version.effective_version] = tbl_version
324
+ tbl_version.init()
334
325
  tbl_version.store_tbl.create()
335
326
  tbl_version.store_tbl.ensure_columns_exist(col for col in tbl_version.cols if col.is_stored)
336
327
  return tbl_version
@@ -338,6 +329,14 @@ class TableVersion:
338
329
  def drop(self) -> None:
339
330
  from .catalog import Catalog
340
331
 
332
+ if self.is_view and self.is_mutable:
333
+ # update mutable_views
334
+ from .table_version_handle import TableVersionHandle
335
+
336
+ assert self.base is not None
337
+ if self.base.get().is_mutable:
338
+ self.base.get().mutable_views.remove(TableVersionHandle.create(self))
339
+
341
340
  cat = Catalog.get()
342
341
  # delete this table and all associated data
343
342
  MediaStore.delete(self.id)
@@ -347,24 +346,24 @@ class TableVersion:
347
346
  # de-register table version from catalog
348
347
  cat.remove_tbl_version(self)
349
348
 
350
- def _init_schema(self, tbl_md: schema.TableMd, schema_version_md: schema.TableSchemaVersionMd) -> None:
349
+ def _init_schema(self) -> None:
351
350
  # create columns first, so the indices can reference them
352
- self._init_cols(tbl_md, schema_version_md)
351
+ self._init_cols()
353
352
  if not self.is_snapshot:
354
- self._init_idxs(tbl_md)
353
+ self._init_idxs()
355
354
  # create the sa schema only after creating the columns and indices
356
355
  self._init_sa_schema()
357
356
 
358
- def _init_cols(self, tbl_md: schema.TableMd, schema_version_md: schema.TableSchemaVersionMd) -> None:
357
+ def _init_cols(self) -> None:
359
358
  """Initialize self.cols with the columns visible in our effective version"""
360
359
  self.cols = []
361
360
  self.cols_by_name = {}
362
361
  self.cols_by_id = {}
363
362
  # Sort columns in column_md by the position specified in col_md.id to guarantee that all references
364
363
  # point backward.
365
- sorted_column_md = sorted(tbl_md.column_md.values(), key=lambda item: item.id)
364
+ sorted_column_md = sorted(self.tbl_md.column_md.values(), key=lambda item: item.id)
366
365
  for col_md in sorted_column_md:
367
- schema_col_md = schema_version_md.columns.get(col_md.id)
366
+ schema_col_md = self.schema_version_md.columns.get(col_md.id)
368
367
  col_name = schema_col_md.name if schema_col_md is not None else None
369
368
  media_val = (
370
369
  MediaValidation[schema_col_md.media_validation.upper()]
@@ -382,7 +381,7 @@ class TableVersion:
382
381
  schema_version_drop=col_md.schema_version_drop,
383
382
  value_expr_dict=col_md.value_expr,
384
383
  )
385
- col.tbl = self.create_handle()
384
+ col.tbl = self
386
385
  self.cols.append(col)
387
386
 
388
387
  # populate the lookup structures before Expr.from_dict()
@@ -401,12 +400,12 @@ class TableVersion:
401
400
  if not self.is_snapshot and col_md.value_expr is not None:
402
401
  self._record_refd_columns(col)
403
402
 
404
- def _init_idxs(self, tbl_md: schema.TableMd) -> None:
405
- self.idx_md = tbl_md.index_md
403
+ def _init_idxs(self) -> None:
404
+ # self.idx_md = tbl_md.index_md
406
405
  self.idxs_by_name = {}
407
406
  import pixeltable.index as index_module
408
407
 
409
- for md in tbl_md.index_md.values():
408
+ for md in self.tbl_md.index_md.values():
410
409
  if md.schema_version_add > self.schema_version or (
411
410
  md.schema_version_drop is not None and md.schema_version_drop <= self.schema_version
412
411
  ):
@@ -441,28 +440,32 @@ class TableVersion:
441
440
  else:
442
441
  self.store_tbl = StoreTable(self)
443
442
 
444
- def _update_md(
445
- self, timestamp: float, update_tbl_version: bool = True, preceding_schema_version: Optional[int] = None
446
- ) -> None:
443
+ def _write_md(self, new_version: bool, new_version_ts: float, new_schema_version: bool) -> None:
447
444
  """Writes table metadata to the database.
448
445
 
449
446
  Args:
450
447
  timestamp: timestamp of the change
451
- conn: database connection to use
452
448
  update_tbl_version: if `True`, will also write `TableVersion` metadata
453
449
  preceding_schema_version: if specified, will also write `TableSchemaVersion` metadata, recording the
454
450
  specified preceding schema version
455
451
  """
456
- assert update_tbl_version or preceding_schema_version is None
457
452
  from pixeltable.catalog import Catalog
458
453
 
459
- tbl_md = self._create_tbl_md()
460
- version_md = self._create_version_md(timestamp) if update_tbl_version else None
461
- schema_version_md = (
462
- self._create_schema_version_md(preceding_schema_version) if preceding_schema_version is not None else None
454
+ version_md: Optional[schema.TableVersionMd] = (
455
+ schema.TableVersionMd(
456
+ tbl_id=str(self.id),
457
+ created_at=new_version_ts,
458
+ version=self.version,
459
+ schema_version=self.schema_version,
460
+ additional_md={},
461
+ )
462
+ if new_version
463
+ else None
463
464
  )
464
465
 
465
- Catalog.get().store_tbl_md(self.id, tbl_md, version_md, schema_version_md)
466
+ Catalog.get().store_tbl_md(
467
+ self.id, self._tbl_md, version_md, self._schema_version_md if new_schema_version else None
468
+ )
466
469
 
467
470
  def ensure_md_loaded(self) -> None:
468
471
  """Ensure that table metadata is loaded."""
@@ -476,10 +479,10 @@ class TableVersion:
476
479
  def add_index(self, col: Column, idx_name: Optional[str], idx: index.IndexBase) -> UpdateStatus:
477
480
  # we're creating a new schema version
478
481
  self.version += 1
479
- preceding_schema_version = self.schema_version
482
+ self.preceding_schema_version = self.schema_version
480
483
  self.schema_version = self.version
481
484
  status = self._add_index(col, idx_name, idx)
482
- self._update_md(time.time(), preceding_schema_version=preceding_schema_version)
485
+ self._write_md(new_version=True, new_version_ts=time.time(), new_schema_version=True)
483
486
  _logger.info(f'Added index {idx_name} on column {col.name} to table {self.name}')
484
487
  return status
485
488
 
@@ -524,7 +527,7 @@ class TableVersion:
524
527
  schema_version_drop=None,
525
528
  records_errors=idx.records_value_errors(),
526
529
  )
527
- val_col.tbl = self.create_handle()
530
+ val_col.tbl = self
528
531
  val_col.col_type = val_col.col_type.copy(nullable=True)
529
532
  self.next_col_id += 1
530
533
 
@@ -538,7 +541,7 @@ class TableVersion:
538
541
  schema_version_drop=None,
539
542
  records_errors=False,
540
543
  )
541
- undo_col.tbl = self.create_handle()
544
+ undo_col.tbl = self
542
545
  undo_col.col_type = undo_col.col_type.copy(nullable=True)
543
546
  self.next_col_id += 1
544
547
  return val_col, undo_col
@@ -553,7 +556,7 @@ class TableVersion:
553
556
  idx_name = f'idx{idx_id}'
554
557
  else:
555
558
  assert is_valid_identifier(idx_name)
556
- assert idx_name not in [i.name for i in self.idx_md.values()]
559
+ assert idx_name not in [i.name for i in self._tbl_md.index_md.values()]
557
560
  # create and register the index metadata
558
561
  idx_cls = type(idx)
559
562
  idx_md = schema.IndexMd(
@@ -569,7 +572,7 @@ class TableVersion:
569
572
  init_args=idx.as_dict(),
570
573
  )
571
574
  idx_info = self.IndexInfo(id=idx_id, name=idx_name, idx=idx, col=col, val_col=val_col, undo_col=undo_col)
572
- self.idx_md[idx_id] = idx_md
575
+ self._tbl_md.index_md[idx_id] = idx_md
573
576
  self.idxs_by_name[idx_name] = idx_info
574
577
  try:
575
578
  idx.create_index(self._store_idx_name(idx_id), val_col)
@@ -578,7 +581,7 @@ class TableVersion:
578
581
  def cleanup_index() -> None:
579
582
  """Delete the newly added in-memory index structure"""
580
583
  del self.idxs_by_name[idx_name]
581
- del self.idx_md[idx_id]
584
+ del self._tbl_md.index_md[idx_id]
582
585
  self.next_idx_id = idx_id
583
586
 
584
587
  # Run cleanup only if there has been an exception; otherwise, skip cleanup.
@@ -596,47 +599,48 @@ class TableVersion:
596
599
 
597
600
  def drop_index(self, idx_id: int) -> None:
598
601
  assert not self.is_snapshot
599
- assert idx_id in self.idx_md
602
+ assert idx_id in self._tbl_md.index_md
600
603
 
601
604
  # we're creating a new schema version
602
605
  self.version += 1
603
- preceding_schema_version = self.schema_version
606
+ self.preceding_schema_version = self.schema_version
604
607
  self.schema_version = self.version
605
- idx_md = self.idx_md[idx_id]
608
+ idx_md = self._tbl_md.index_md[idx_id]
606
609
  idx_md.schema_version_drop = self.schema_version
607
610
  assert idx_md.name in self.idxs_by_name
608
611
  idx_info = self.idxs_by_name[idx_md.name]
609
612
  # remove this index entry from the active indexes (in memory)
610
613
  # and the index metadata (in persistent table metadata)
614
+ # TODO: this is wrong, it breaks revert()
611
615
  del self.idxs_by_name[idx_md.name]
612
- del self.idx_md[idx_id]
616
+ del self._tbl_md.index_md[idx_id]
613
617
 
614
618
  self._drop_columns([idx_info.val_col, idx_info.undo_col])
615
- self._update_md(time.time(), preceding_schema_version=preceding_schema_version)
619
+ self._write_md(new_version=True, new_version_ts=time.time(), new_schema_version=True)
616
620
  _logger.info(f'Dropped index {idx_md.name} on table {self.name}')
617
621
 
618
622
  def add_columns(
619
623
  self, cols: Iterable[Column], print_stats: bool, on_error: Literal['abort', 'ignore']
620
624
  ) -> UpdateStatus:
621
- """Adds a column to the table."""
625
+ """Adds columns to the table."""
622
626
  assert not self.is_snapshot
623
- assert all(is_valid_identifier(col.name) for col in cols)
627
+ assert all(is_valid_identifier(col.name) for col in cols if col.name is not None)
624
628
  assert all(col.stored is not None for col in cols)
625
- assert all(col.name not in self.cols_by_name for col in cols)
629
+ assert all(col.name not in self.cols_by_name for col in cols if col.name is not None)
626
630
  for col in cols:
627
- col.tbl = self.create_handle()
631
+ col.tbl = self
628
632
  col.id = self.next_col_id
629
633
  self.next_col_id += 1
630
634
 
631
635
  # we're creating a new schema version
632
636
  self.version += 1
633
- preceding_schema_version = self.schema_version
637
+ self.preceding_schema_version = self.schema_version
634
638
  self.schema_version = self.version
635
639
  index_cols: dict[Column, tuple[index.BtreeIndex, Column, Column]] = {}
636
640
  all_cols: list[Column] = []
637
641
  for col in cols:
638
642
  all_cols.append(col)
639
- if self._is_btree_indexable(col):
643
+ if col.name is not None and self._is_btree_indexable(col):
640
644
  idx = index.BtreeIndex(col)
641
645
  val_col, undo_col = self._create_index_columns(idx)
642
646
  index_cols[col] = (idx, val_col, undo_col)
@@ -644,10 +648,10 @@ class TableVersion:
644
648
  all_cols.append(undo_col)
645
649
  # Add all columns
646
650
  status = self._add_columns(all_cols, print_stats=print_stats, on_error=on_error)
647
- # Create indices and their mds
651
+ # Create indices and their md records
648
652
  for col, (idx, val_col, undo_col) in index_cols.items():
649
653
  self._create_index(col, val_col, undo_col, idx_name=None, idx=idx)
650
- self._update_md(time.time(), preceding_schema_version=preceding_schema_version)
654
+ self._write_md(new_version=True, new_version_ts=time.time(), new_schema_version=True)
651
655
  _logger.info(f'Added columns {[col.name for col in cols]} to table {self.name}, new version: {self.version}')
652
656
 
653
657
  msg = (
@@ -685,6 +689,23 @@ class TableVersion:
685
689
  col.check_value_expr()
686
690
  self._record_refd_columns(col)
687
691
 
692
+ # also add to stored md
693
+ self._tbl_md.column_md[col.id] = schema.ColumnMd(
694
+ id=col.id,
695
+ col_type=col.col_type.as_dict(),
696
+ is_pk=col.is_pk,
697
+ schema_version_add=col.schema_version_add,
698
+ schema_version_drop=col.schema_version_drop,
699
+ value_expr=col.value_expr.as_dict() if col.value_expr is not None else None,
700
+ stored=col.stored,
701
+ )
702
+ if col.name is not None:
703
+ self._schema_version_md.columns[col.id] = schema.SchemaColumn(
704
+ name=col.name,
705
+ pos=len(self.cols_by_name),
706
+ media_validation=col._media_validation.name.lower() if col._media_validation is not None else None,
707
+ )
708
+
688
709
  if col.is_stored:
689
710
  self.store_tbl.add_column(col)
690
711
 
@@ -731,7 +752,7 @@ class TableVersion:
731
752
  num_rows=row_count,
732
753
  num_computed_values=row_count,
733
754
  num_excs=num_excs,
734
- cols_with_excs=[f'{col.tbl.get().name}.{col.name}' for col in cols_with_excs if col.name is not None],
755
+ cols_with_excs=[f'{col.tbl.name}.{col.name}' for col in cols_with_excs if col.name is not None],
735
756
  )
736
757
 
737
758
  def drop_column(self, col: Column) -> None:
@@ -741,7 +762,7 @@ class TableVersion:
741
762
 
742
763
  # we're creating a new schema version
743
764
  self.version += 1
744
- preceding_schema_version = self.schema_version
765
+ self.preceding_schema_version = self.schema_version
745
766
  self.schema_version = self.version
746
767
 
747
768
  # drop this column and all dependent index columns and indices
@@ -751,15 +772,17 @@ class TableVersion:
751
772
  if idx_info.col != col:
752
773
  continue
753
774
  dropped_cols.extend([idx_info.val_col, idx_info.undo_col])
754
- idx_md = self.idx_md[idx_info.id]
775
+ idx_md = self._tbl_md.index_md[idx_info.id]
755
776
  idx_md.schema_version_drop = self.schema_version
756
777
  assert idx_md.name in self.idxs_by_name
757
778
  dropped_idx_names.append(idx_md.name)
779
+
758
780
  # update idxs_by_name
759
781
  for idx_name in dropped_idx_names:
760
782
  del self.idxs_by_name[idx_name]
783
+
761
784
  self._drop_columns(dropped_cols)
762
- self._update_md(time.time(), preceding_schema_version=preceding_schema_version)
785
+ self._write_md(new_version=True, new_version_ts=time.time(), new_schema_version=True)
763
786
  _logger.info(f'Dropped column {col.name} from table {self.name}, new version: {self.version}')
764
787
 
765
788
  def _drop_columns(self, cols: Iterable[Column]) -> None:
@@ -780,6 +803,14 @@ class TableVersion:
780
803
  del self.cols_by_name[col.name]
781
804
  assert col.id in self.cols_by_id
782
805
  del self.cols_by_id[col.id]
806
+ # update stored md
807
+ self._tbl_md.column_md[col.id].schema_version_drop = col.schema_version_drop
808
+ if col.name is not None:
809
+ del self._schema_version_md.columns[col.id]
810
+
811
+ # update positions
812
+ for pos, schema_col in enumerate(self._schema_version_md.columns.values()):
813
+ schema_col.pos = pos
783
814
 
784
815
  self.store_tbl.create_sa_tbl()
785
816
 
@@ -796,13 +827,14 @@ class TableVersion:
796
827
  del self.cols_by_name[old_name]
797
828
  col.name = new_name
798
829
  self.cols_by_name[new_name] = col
830
+ self._schema_version_md.columns[col.id].name = new_name
799
831
 
800
832
  # we're creating a new schema version
801
833
  self.version += 1
802
- preceding_schema_version = self.schema_version
834
+ self.preceding_schema_version = self.schema_version
803
835
  self.schema_version = self.version
804
836
 
805
- self._update_md(time.time(), preceding_schema_version=preceding_schema_version)
837
+ self._write_md(new_version=True, new_version_ts=time.time(), new_schema_version=True)
806
838
  _logger.info(f'Renamed column {old_name} to {new_name} in table {self.name}, new version: {self.version}')
807
839
 
808
840
  def set_comment(self, new_comment: Optional[str]) -> None:
@@ -821,9 +853,9 @@ class TableVersion:
821
853
  def _create_schema_version(self) -> None:
822
854
  # we're creating a new schema version
823
855
  self.version += 1
824
- preceding_schema_version = self.schema_version
856
+ self.preceding_schema_version = self.schema_version
825
857
  self.schema_version = self.version
826
- self._update_md(time.time(), preceding_schema_version=preceding_schema_version)
858
+ self._write_md(new_version=True, new_version_ts=time.time(), new_schema_version=True)
827
859
  _logger.info(f'[{self.name}] Updating table schema to version: {self.version}')
828
860
 
829
861
  def insert(
@@ -838,7 +870,7 @@ class TableVersion:
838
870
  """
839
871
  from pixeltable.plan import Planner
840
872
 
841
- assert self.is_insertable()
873
+ assert self.is_insertable
842
874
  assert (rows is None) != (df is None) # Exactly one must be specified
843
875
  if rows is not None:
844
876
  plan = Planner.create_insert_plan(self, rows, ignore_errors=not fail_on_exception)
@@ -848,8 +880,8 @@ class TableVersion:
848
880
  # this is a base table; we generate rowids during the insert
849
881
  def rowids() -> Iterator[int]:
850
882
  while True:
851
- rowid = self.next_rowid
852
- self.next_rowid += 1
883
+ rowid = self.next_row_id
884
+ self.next_row_id += 1
853
885
  yield rowid
854
886
 
855
887
  return self._insert(plan, time.time(), print_stats=print_stats, rowids=rowids(), abort_on_exc=fail_on_exception)
@@ -874,7 +906,7 @@ class TableVersion:
874
906
  result.num_excs = num_excs
875
907
  result.num_computed_values += exec_plan.ctx.num_computed_exprs * num_rows
876
908
  result.cols_with_excs = [f'{self.name}.{self.cols_by_id[cid].name}' for cid in cols_with_excs]
877
- self._update_md(timestamp)
909
+ self._write_md(new_version=True, new_version_ts=time.time(), new_schema_version=False)
878
910
 
879
911
  # update views
880
912
  for view in self.mutable_views:
@@ -1038,13 +1070,13 @@ class TableVersion:
1038
1070
  self.store_tbl.delete_rows(
1039
1071
  self.version, base_versions=base_versions, match_on_vmin=True, where_clause=where_clause
1040
1072
  )
1041
- self._update_md(timestamp)
1073
+ self._write_md(new_version=True, new_version_ts=timestamp, new_schema_version=False)
1042
1074
 
1043
1075
  if cascade:
1044
1076
  base_versions = [None if plan is None else self.version, *base_versions] # don't update in place
1045
1077
  # propagate to views
1046
1078
  for view in self.mutable_views:
1047
- recomputed_cols = [col for col in recomputed_view_cols if col.tbl == view]
1079
+ recomputed_cols = [col for col in recomputed_view_cols if col.tbl.id == view.id]
1048
1080
  plan = None
1049
1081
  if len(recomputed_cols) > 0:
1050
1082
  from pixeltable.plan import Planner
@@ -1065,7 +1097,7 @@ class TableVersion:
1065
1097
  Args:
1066
1098
  where: a predicate to filter rows to delete.
1067
1099
  """
1068
- assert self.is_insertable()
1100
+ assert self.is_insertable
1069
1101
  from pixeltable.exprs import Expr
1070
1102
  from pixeltable.plan import Planner
1071
1103
 
@@ -1093,14 +1125,22 @@ class TableVersion:
1093
1125
  Returns:
1094
1126
  number of deleted rows
1095
1127
  """
1128
+ # print(f'calling sql_expr()')
1096
1129
  sql_where_clause = where.sql_expr(exprs.SqlElementCache()) if where is not None else None
1130
+ # #print(f'sql_where_clause={str(sql_where_clause) if sql_where_clause is not None else None}')
1131
+ # sql_cols: list[sql.Column] = []
1132
+ # def collect_cols(col) -> None:
1133
+ # sql_cols.append(col)
1134
+ # sql.sql.visitors.traverse(sql_where_clause, {}, {'column': collect_cols})
1135
+ # x = [f'{str(c)}:{hash(c)}:{id(c.table)}' for c in sql_cols]
1136
+ # print(f'where_clause cols: {x}')
1097
1137
  num_rows = self.store_tbl.delete_rows(
1098
1138
  self.version + 1, base_versions=base_versions, match_on_vmin=False, where_clause=sql_where_clause
1099
1139
  )
1100
1140
  if num_rows > 0:
1101
1141
  # we're creating a new version
1102
1142
  self.version += 1
1103
- self._update_md(timestamp)
1143
+ self._write_md(new_version=True, new_version_ts=timestamp, new_schema_version=False)
1104
1144
  for view in self.mutable_views:
1105
1145
  num_rows += view.get().propagate_delete(
1106
1146
  where=None, base_versions=[self.version, *base_versions], timestamp=timestamp
@@ -1114,17 +1154,13 @@ class TableVersion:
1114
1154
  raise excs.Error('Cannot revert version 0')
1115
1155
  self._revert()
1116
1156
 
1117
- def _delete_column(self, col: Column) -> None:
1118
- """Physically remove the column from the schema and the store table"""
1119
- if col.is_stored:
1120
- self.store_tbl.drop_column(col)
1121
- self.cols.remove(col)
1122
- if col.name is not None:
1123
- del self.cols_by_name[col.name]
1124
- del self.cols_by_id[col.id]
1125
-
1126
1157
  def _revert(self) -> None:
1127
- """Reverts this table version and propagates to views"""
1158
+ """
1159
+ Reverts the stored metadata for this table version and propagates to views.
1160
+
1161
+ Doesn't attempt to revert the in-memory metadata, but instead invalidates this TableVersion instance
1162
+ and relies on Catalog to reload it
1163
+ """
1128
1164
  conn = Env.get().conn
1129
1165
  # make sure we don't have a snapshot referencing this version
1130
1166
  # (unclear how to express this with sqlalchemy)
@@ -1157,109 +1193,206 @@ class TableVersion:
1157
1193
  stmt = sql.update(self.store_tbl.sa_tbl).values(set_clause).where(self.store_tbl.sa_tbl.c.v_max == self.version)
1158
1194
  conn.execute(stmt)
1159
1195
 
1160
- # revert schema changes
1196
+ # revert schema changes:
1197
+ # - undo changes to self._tbl_md and write that back
1198
+ # - delete newly-added TableVersion/TableSchemaVersion records
1161
1199
  if self.version == self.schema_version:
1162
- # delete newly-added columns
1200
+ # physically delete newly-added columns and remove them from the stored md
1163
1201
  added_cols = [col for col in self.cols if col.schema_version_add == self.schema_version]
1164
1202
  if len(added_cols) > 0:
1165
- next_col_id = min(col.id for col in added_cols)
1203
+ self._tbl_md.next_col_id = min(col.id for col in added_cols)
1166
1204
  for col in added_cols:
1167
- self._delete_column(col)
1168
- self.next_col_id = next_col_id
1205
+ if col.is_stored:
1206
+ self.store_tbl.drop_column(col)
1207
+ del self._tbl_md.column_md[col.id]
1169
1208
 
1170
1209
  # remove newly-added indices from the lookup structures
1171
1210
  # (the value and undo columns got removed in the preceding step)
1172
- added_idx_md = [md for md in self.idx_md.values() if md.schema_version_add == self.schema_version]
1211
+ added_idx_md = [md for md in self._tbl_md.index_md.values() if md.schema_version_add == self.schema_version]
1173
1212
  if len(added_idx_md) > 0:
1174
- next_idx_id = min(md.id for md in added_idx_md)
1213
+ self._tbl_md.next_idx_id = min(md.id for md in added_idx_md)
1175
1214
  for md in added_idx_md:
1176
- del self.idx_md[md.id]
1177
- del self.idxs_by_name[md.name]
1178
- self.next_idx_id = next_idx_id
1215
+ # TODO: drop the index
1216
+ del self._tbl_md.index_md[md.id]
1179
1217
 
1180
1218
  # make newly-dropped columns visible again
1181
- dropped_cols = [col for col in self.cols if col.schema_version_drop == self.schema_version]
1182
- for col in dropped_cols:
1183
- col.schema_version_drop = None
1219
+ dropped_col_md = [
1220
+ md for md in self._tbl_md.column_md.values() if md.schema_version_drop == self.schema_version
1221
+ ]
1222
+ for col_md in dropped_col_md:
1223
+ col_md.schema_version_drop = None
1184
1224
 
1185
1225
  # make newly-dropped indices visible again
1186
- dropped_idx_md = [md for md in self.idx_md.values() if md.schema_version_drop == self.schema_version]
1187
- for md in dropped_idx_md:
1188
- md.schema_version_drop = None
1189
-
1190
- session = Env.get().session
1191
- # we need to determine the preceding schema version and reload the schema
1192
- schema_version_md_dict = (
1193
- session.query(schema.TableSchemaVersion.md)
1194
- .where(schema.TableSchemaVersion.tbl_id == self.id)
1195
- .where(schema.TableSchemaVersion.schema_version == self.schema_version)
1196
- .scalar()
1197
- )
1198
- preceding_schema_version = schema_version_md_dict['preceding_schema_version']
1199
- preceding_schema_version_md_dict = (
1200
- session.query(schema.TableSchemaVersion.md)
1201
- .where(schema.TableSchemaVersion.tbl_id == self.id)
1202
- .where(schema.TableSchemaVersion.schema_version == preceding_schema_version)
1203
- .scalar()
1204
- )
1205
- preceding_schema_version_md = schema.md_from_dict(
1206
- schema.TableSchemaVersionMd, preceding_schema_version_md_dict
1207
- )
1208
- tbl_md = self._create_tbl_md()
1209
- self._init_schema(tbl_md, preceding_schema_version_md)
1226
+ dropped_idx_md = [
1227
+ md for md in self._tbl_md.index_md.values() if md.schema_version_drop == self.schema_version
1228
+ ]
1229
+ for idx_md in dropped_idx_md:
1230
+ idx_md.schema_version_drop = None
1210
1231
 
1211
1232
  conn.execute(
1212
1233
  sql.delete(schema.TableSchemaVersion.__table__)
1213
1234
  .where(schema.TableSchemaVersion.tbl_id == self.id)
1214
1235
  .where(schema.TableSchemaVersion.schema_version == self.schema_version)
1215
1236
  )
1216
- self.schema_version = preceding_schema_version
1217
- self.comment = preceding_schema_version_md.comment
1218
- self.num_retained_versions = preceding_schema_version_md.num_retained_versions
1237
+ self._tbl_md.current_schema_version = self._schema_version_md.preceding_schema_version
1219
1238
 
1220
1239
  conn.execute(
1221
1240
  sql.delete(schema.TableVersion.__table__)
1222
1241
  .where(schema.TableVersion.tbl_id == self.id)
1223
1242
  .where(schema.TableVersion.version == self.version)
1224
1243
  )
1244
+
1225
1245
  self.version -= 1
1226
- conn.execute(
1227
- sql.update(schema.Table.__table__)
1228
- .values({schema.Table.md: dataclasses.asdict(self._create_tbl_md())})
1229
- .where(schema.Table.id == self.id)
1230
- )
1246
+ self._write_md(new_version=False, new_version_ts=0, new_schema_version=False)
1231
1247
 
1232
1248
  # propagate to views
1249
+ views_str = ', '.join([str(v.id) for v in self.mutable_views])
1250
+ print(f'revert(): mutable_views={views_str}')
1233
1251
  for view in self.mutable_views:
1234
1252
  view.get()._revert()
1253
+
1254
+ # force reload on next operation
1255
+ self.is_validated = False
1256
+ pxt.catalog.Catalog.get().remove_tbl_version(self)
1235
1257
  _logger.info(f'TableVersion {self.name}: reverted to version {self.version}')
1236
1258
 
1237
- def _init_external_stores(self, tbl_md: schema.TableMd) -> None:
1238
- for store_md in tbl_md.external_stores:
1259
+ def _init_external_stores(self) -> None:
1260
+ for store_md in self.tbl_md.external_stores:
1239
1261
  store_cls = resolve_symbol(store_md['class'])
1240
1262
  assert isinstance(store_cls, type) and issubclass(store_cls, pxt.io.ExternalStore)
1241
1263
  store = store_cls.from_dict(store_md['md'])
1242
1264
  self.external_stores[store.name] = store
1243
1265
 
1244
1266
  def link_external_store(self, store: pxt.io.ExternalStore) -> None:
1245
- store.link(self) # May result in additional metadata changes
1267
+ self.version += 1
1268
+ self.preceding_schema_version = self.schema_version
1269
+ self.schema_version = self.version
1270
+
1246
1271
  self.external_stores[store.name] = store
1247
- self._update_md(time.time(), update_tbl_version=False)
1272
+ self._tbl_md.external_stores.append(
1273
+ {'class': f'{type(store).__module__}.{type(store).__qualname__}', 'md': store.as_dict()}
1274
+ )
1275
+ self._write_md(new_version=True, new_version_ts=time.time(), new_schema_version=True)
1276
+
1277
+ def unlink_external_store(self, store: pxt.io.ExternalStore) -> None:
1278
+ del self.external_stores[store.name]
1279
+ self.version += 1
1280
+ self.preceding_schema_version = self.schema_version
1281
+ self.schema_version = self.version
1282
+ idx = next(i for i, store_md in enumerate(self._tbl_md.external_stores) if store_md['md']['name'] == store.name)
1283
+ self._tbl_md.external_stores.pop(idx)
1284
+ self._write_md(new_version=True, new_version_ts=time.time(), new_schema_version=True)
1285
+
1286
+ @property
1287
+ def tbl_md(self) -> schema.TableMd:
1288
+ return self._tbl_md
1289
+
1290
+ @property
1291
+ def schema_version_md(self) -> schema.TableSchemaVersionMd:
1292
+ return self._schema_version_md
1293
+
1294
+ @property
1295
+ def view_md(self) -> Optional[schema.ViewMd]:
1296
+ return self._tbl_md.view_md
1297
+
1298
+ @property
1299
+ def name(self) -> str:
1300
+ return self._tbl_md.name
1301
+
1302
+ @property
1303
+ def user(self) -> Optional[str]:
1304
+ return self._tbl_md.user
1305
+
1306
+ @property
1307
+ def is_replica(self) -> bool:
1308
+ return self._tbl_md.is_replica
1309
+
1310
+ @property
1311
+ def comment(self) -> str:
1312
+ return self._schema_version_md.comment
1313
+
1314
+ @comment.setter
1315
+ def comment(self, c: str) -> None:
1316
+ assert self.effective_version is None
1317
+ self._schema_version_md.comment = c
1318
+
1319
+ @property
1320
+ def num_retained_versions(self) -> int:
1321
+ return self._schema_version_md.num_retained_versions
1248
1322
 
1249
- def unlink_external_store(self, store_name: str, delete_external_data: bool) -> None:
1250
- assert store_name in self.external_stores
1251
- store = self.external_stores[store_name]
1252
- store.unlink(self) # May result in additional metadata changes
1253
- del self.external_stores[store_name]
1254
- self._update_md(time.time(), update_tbl_version=False)
1323
+ @num_retained_versions.setter
1324
+ def num_retained_versions(self, n: int) -> None:
1325
+ assert self.effective_version is None
1326
+ self._schema_version_md.num_retained_versions = n
1255
1327
 
1256
- if delete_external_data and isinstance(store, pxt.io.external_store.Project):
1257
- store.delete()
1328
+ @property
1329
+ def version(self) -> int:
1330
+ # if this is a snapshot instance, we need to ignore current_version
1331
+ return self._tbl_md.current_version if self.effective_version is None else self.effective_version
1332
+
1333
+ @version.setter
1334
+ def version(self, version: int) -> None:
1335
+ assert self.effective_version is None
1336
+ self._tbl_md.current_version = version
1337
+
1338
+ @property
1339
+ def schema_version(self) -> int:
1340
+ return self._schema_version_md.schema_version
1341
+
1342
+ @schema_version.setter
1343
+ def schema_version(self, version: int) -> None:
1344
+ assert self.effective_version is None
1345
+ self._tbl_md.current_schema_version = version
1346
+ self._schema_version_md.schema_version = version
1347
+
1348
+ @property
1349
+ def preceding_schema_version(self) -> int:
1350
+ return self._schema_version_md.preceding_schema_version
1351
+
1352
+ @preceding_schema_version.setter
1353
+ def preceding_schema_version(self, v: int) -> None:
1354
+ assert self.effective_version is None
1355
+ self._schema_version_md.preceding_schema_version = v
1356
+
1357
+ @property
1358
+ def media_validation(self) -> MediaValidation:
1359
+ return MediaValidation[self._schema_version_md.media_validation.upper()]
1360
+
1361
+ @property
1362
+ def next_col_id(self) -> int:
1363
+ return self._tbl_md.next_col_id
1364
+
1365
+ @next_col_id.setter
1366
+ def next_col_id(self, id: int) -> None:
1367
+ assert self.effective_version is None
1368
+ self._tbl_md.next_col_id = id
1369
+
1370
+ @property
1371
+ def next_idx_id(self) -> int:
1372
+ return self._tbl_md.next_idx_id
1373
+
1374
+ @next_idx_id.setter
1375
+ def next_idx_id(self, id: int) -> None:
1376
+ assert self.effective_version is None
1377
+ self._tbl_md.next_idx_id = id
1378
+
1379
+ @property
1380
+ def next_row_id(self) -> int:
1381
+ return self._tbl_md.next_row_id
1382
+
1383
+ @next_row_id.setter
1384
+ def next_row_id(self, id: int) -> None:
1385
+ assert self.effective_version is None
1386
+ self._tbl_md.next_row_id = id
1258
1387
 
1259
1388
  @property
1260
1389
  def is_snapshot(self) -> bool:
1261
1390
  return self.effective_version is not None
1262
1391
 
1392
+ @property
1393
+ def is_mutable(self) -> bool:
1394
+ return not self.is_snapshot and not self.is_replica
1395
+
1263
1396
  @property
1264
1397
  def is_view(self) -> bool:
1265
1398
  return self.view_md is not None
@@ -1272,6 +1405,7 @@ class TableVersion:
1272
1405
  def is_component_view(self) -> bool:
1273
1406
  return self.iterator_cls is not None
1274
1407
 
1408
+ @property
1275
1409
  def is_insertable(self) -> bool:
1276
1410
  """Returns True if this corresponds to an InsertableTable"""
1277
1411
  return not self.is_snapshot and not self.is_view
@@ -1363,24 +1497,6 @@ class TableVersion:
1363
1497
  {'class': f'{type(store).__module__}.{type(store).__qualname__}', 'md': store.as_dict()} for store in stores
1364
1498
  ]
1365
1499
 
1366
- def _create_tbl_md(self) -> schema.TableMd:
1367
- return schema.TableMd(
1368
- tbl_id=str(self.id),
1369
- name=self.name,
1370
- user=self.user,
1371
- is_replica=self.is_replica,
1372
- current_version=self.version,
1373
- current_schema_version=self.schema_version,
1374
- next_col_id=self.next_col_id,
1375
- next_idx_id=self.next_idx_id,
1376
- next_row_id=self.next_rowid,
1377
- column_md=self._create_column_md(self.cols),
1378
- index_md=self.idx_md,
1379
- external_stores=self._create_stores_md(self.external_stores.values()),
1380
- view_md=self.view_md,
1381
- additional_md={},
1382
- )
1383
-
1384
1500
  def _create_version_md(self, timestamp: float) -> schema.TableVersionMd:
1385
1501
  return schema.TableVersionMd(
1386
1502
  tbl_id=str(self.id),