pixeltable 0.4.15__py3-none-any.whl → 0.4.17__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of pixeltable might be problematic. Click here for more details.

Files changed (68) hide show
  1. pixeltable/__init__.py +4 -0
  2. pixeltable/catalog/catalog.py +125 -63
  3. pixeltable/catalog/column.py +7 -2
  4. pixeltable/catalog/table.py +1 -0
  5. pixeltable/catalog/table_metadata.py +4 -0
  6. pixeltable/catalog/table_version.py +174 -117
  7. pixeltable/catalog/table_version_handle.py +4 -1
  8. pixeltable/catalog/table_version_path.py +0 -11
  9. pixeltable/catalog/view.py +6 -0
  10. pixeltable/config.py +7 -0
  11. pixeltable/dataframe.py +10 -5
  12. pixeltable/env.py +56 -19
  13. pixeltable/exec/__init__.py +2 -0
  14. pixeltable/exec/cell_materialization_node.py +231 -0
  15. pixeltable/exec/cell_reconstruction_node.py +135 -0
  16. pixeltable/exec/exec_node.py +1 -1
  17. pixeltable/exec/expr_eval/evaluators.py +1 -0
  18. pixeltable/exec/expr_eval/expr_eval_node.py +3 -0
  19. pixeltable/exec/expr_eval/globals.py +2 -0
  20. pixeltable/exec/globals.py +32 -0
  21. pixeltable/exec/object_store_save_node.py +1 -4
  22. pixeltable/exec/row_update_node.py +16 -9
  23. pixeltable/exec/sql_node.py +107 -14
  24. pixeltable/exprs/__init__.py +1 -1
  25. pixeltable/exprs/arithmetic_expr.py +23 -18
  26. pixeltable/exprs/column_property_ref.py +10 -10
  27. pixeltable/exprs/column_ref.py +2 -2
  28. pixeltable/exprs/data_row.py +106 -37
  29. pixeltable/exprs/expr.py +9 -0
  30. pixeltable/exprs/expr_set.py +14 -7
  31. pixeltable/exprs/inline_expr.py +2 -19
  32. pixeltable/exprs/json_path.py +45 -12
  33. pixeltable/exprs/row_builder.py +54 -22
  34. pixeltable/functions/__init__.py +1 -0
  35. pixeltable/functions/bedrock.py +7 -0
  36. pixeltable/functions/deepseek.py +11 -4
  37. pixeltable/functions/llama_cpp.py +7 -0
  38. pixeltable/functions/math.py +1 -1
  39. pixeltable/functions/ollama.py +7 -0
  40. pixeltable/functions/openai.py +4 -4
  41. pixeltable/functions/openrouter.py +143 -0
  42. pixeltable/functions/video.py +110 -28
  43. pixeltable/globals.py +10 -4
  44. pixeltable/io/globals.py +18 -17
  45. pixeltable/io/parquet.py +1 -1
  46. pixeltable/io/table_data_conduit.py +47 -22
  47. pixeltable/iterators/document.py +61 -23
  48. pixeltable/iterators/video.py +126 -53
  49. pixeltable/metadata/__init__.py +1 -1
  50. pixeltable/metadata/converters/convert_40.py +73 -0
  51. pixeltable/metadata/notes.py +1 -0
  52. pixeltable/plan.py +175 -46
  53. pixeltable/share/packager.py +155 -26
  54. pixeltable/store.py +2 -3
  55. pixeltable/type_system.py +5 -3
  56. pixeltable/utils/arrow.py +6 -6
  57. pixeltable/utils/av.py +65 -0
  58. pixeltable/utils/console_output.py +4 -1
  59. pixeltable/utils/exception_handler.py +5 -28
  60. pixeltable/utils/image.py +7 -0
  61. pixeltable/utils/misc.py +5 -0
  62. pixeltable/utils/object_stores.py +16 -1
  63. pixeltable/utils/s3_store.py +44 -11
  64. {pixeltable-0.4.15.dist-info → pixeltable-0.4.17.dist-info}/METADATA +29 -28
  65. {pixeltable-0.4.15.dist-info → pixeltable-0.4.17.dist-info}/RECORD +68 -61
  66. {pixeltable-0.4.15.dist-info → pixeltable-0.4.17.dist-info}/WHEEL +0 -0
  67. {pixeltable-0.4.15.dist-info → pixeltable-0.4.17.dist-info}/entry_points.txt +0 -0
  68. {pixeltable-0.4.15.dist-info → pixeltable-0.4.17.dist-info}/licenses/LICENSE +0 -0
@@ -11,6 +11,7 @@ from uuid import UUID
11
11
 
12
12
  import jsonschema.exceptions
13
13
  import sqlalchemy as sql
14
+ from sqlalchemy import exc as sql_exc
14
15
 
15
16
  import pixeltable as pxt
16
17
  import pixeltable.exceptions as excs
@@ -21,20 +22,16 @@ from pixeltable.metadata import schema
21
22
  from pixeltable.utils.filecache import FileCache
22
23
  from pixeltable.utils.object_stores import ObjectOps
23
24
 
24
- from .tbl_ops import TableOp
25
-
26
- if TYPE_CHECKING:
27
- from pixeltable.plan import SampleClause
28
-
29
25
  from ..func.globals import resolve_symbol
30
26
  from .column import Column
31
- from .globals import _POS_COLUMN_NAME, _ROWID_COLUMN_NAME, MediaValidation, is_valid_identifier
27
+ from .globals import _POS_COLUMN_NAME, _ROWID_COLUMN_NAME, MediaValidation, QColumnId, is_valid_identifier
28
+ from .tbl_ops import TableOp
32
29
  from .update_status import RowCountStats, UpdateStatus
33
30
 
34
31
  if TYPE_CHECKING:
35
32
  from pixeltable import exec, store
36
-
37
- from .table_version_handle import TableVersionHandle
33
+ from pixeltable.catalog.table_version_handle import TableVersionHandle
34
+ from pixeltable.plan import SampleClause
38
35
 
39
36
  _logger = logging.getLogger('pixeltable')
40
37
 
@@ -193,9 +190,7 @@ class TableVersion:
193
190
  """Create a snapshot copy of this TableVersion"""
194
191
  assert not self.is_snapshot
195
192
  base = self.path.base.tbl_version if self.is_view else None
196
- return TableVersion(
197
- self.id, self.tbl_md, self.version_md, self.version, self.schema_version_md, mutable_views=[], base=base
198
- )
193
+ return TableVersion(self.id, self.tbl_md, self.version_md, self.version, self.schema_version_md, [], base=base)
199
194
 
200
195
  @property
201
196
  def versioned_name(self) -> str:
@@ -204,6 +199,12 @@ class TableVersion:
204
199
  else:
205
200
  return f'{self.name}:{self.effective_version}'
206
201
 
202
+ def __repr__(self) -> str:
203
+ return (
204
+ f'TableVersion(id={self.id!r}, name={self.name!r}, '
205
+ f'version={self.version}, effective_version={self.effective_version})'
206
+ )
207
+
207
208
  @property
208
209
  def handle(self) -> 'TableVersionHandle':
209
210
  from .table_version_handle import TableVersionHandle
@@ -290,11 +291,18 @@ class TableVersion:
290
291
  comment: str,
291
292
  media_validation: MediaValidation,
292
293
  ) -> tuple[UUID, Optional[TableVersion]]:
293
- inital_md = cls.create_initial_md(name, cols, num_retained_versions, comment, media_validation, view_md=None)
294
+ initial_md = cls.create_initial_md(name, cols, num_retained_versions, comment, media_validation, view_md=None)
294
295
  cat = pxt.catalog.Catalog.get()
295
296
 
296
- tbl_id = UUID(hex=inital_md.tbl_md.tbl_id)
297
- tbl_version = cls(tbl_id, inital_md.tbl_md, inital_md.version_md, None, inital_md.schema_version_md, [])
297
+ tbl_id = UUID(hex=initial_md.tbl_md.tbl_id)
298
+ assert (tbl_id, None) not in cat._tbl_versions
299
+ tbl_version = cls(tbl_id, initial_md.tbl_md, initial_md.version_md, None, initial_md.schema_version_md, [])
300
+
301
+ @cat.register_undo_action
302
+ def _() -> None:
303
+ if (tbl_id, None) in cat._tbl_versions:
304
+ del cat._tbl_versions[tbl_id, None]
305
+
298
306
  # TODO: break this up, so that Catalog.create_table() registers tbl_version
299
307
  cat._tbl_versions[tbl_id, None] = tbl_version
300
308
  tbl_version.init()
@@ -308,8 +316,8 @@ class TableVersion:
308
316
  tbl_id=tbl_id,
309
317
  dir_id=dir_id,
310
318
  tbl_md=tbl_version.tbl_md,
311
- version_md=inital_md.version_md,
312
- schema_version_md=inital_md.schema_version_md,
319
+ version_md=initial_md.version_md,
320
+ schema_version_md=initial_md.schema_version_md,
313
321
  )
314
322
  return tbl_id, tbl_version
315
323
 
@@ -336,11 +344,14 @@ class TableVersion:
336
344
 
337
345
  @classmethod
338
346
  def create_replica(cls, md: schema.FullTableMd) -> TableVersion:
347
+ from .catalog import TableVersionPath
348
+
339
349
  assert Env.get().in_xact
350
+ assert md.tbl_md.is_replica
340
351
  tbl_id = UUID(md.tbl_md.tbl_id)
341
352
  _logger.info(f'Creating replica table version {tbl_id}:{md.version_md.version}.')
342
353
  view_md = md.tbl_md.view_md
343
- base_path = pxt.catalog.TableVersionPath.from_md(view_md.base_versions) if view_md is not None else None
354
+ base_path = TableVersionPath.from_md(view_md.base_versions) if view_md is not None else None
344
355
  base = base_path.tbl_version if base_path is not None else None
345
356
  tbl_version = cls(
346
357
  tbl_id,
@@ -405,8 +416,8 @@ class TableVersion:
405
416
  def _init_schema(self) -> None:
406
417
  # create columns first, so the indices can reference them
407
418
  self._init_cols()
408
- if not self.is_snapshot:
409
- self._init_idxs()
419
+ self._init_idxs()
420
+
410
421
  # create the sa schema only after creating the columns and indices
411
422
  self._init_sa_schema()
412
423
 
@@ -444,39 +455,70 @@ class TableVersion:
444
455
  # self._record_refd_columns(col)
445
456
 
446
457
  def _init_idxs(self) -> None:
447
- # self.idx_md = tbl_md.index_md
448
- self.idxs_by_name = {}
449
- import pixeltable.index as index_module
450
-
451
458
  for md in self.tbl_md.index_md.values():
452
- if md.schema_version_add > self.schema_version or (
453
- md.schema_version_drop is not None and md.schema_version_drop <= self.schema_version
454
- ):
455
- # index not visible in this schema version
456
- continue
457
-
458
- # instantiate index object
459
+ # Instantiate index object. This needs to be done for all indices, even those that are not active in this
460
+ # TableVersion, so that we can make appropriate adjustments to the SA schema.
459
461
  cls_name = md.class_fqn.rsplit('.', 1)[-1]
460
- cls = getattr(index_module, cls_name)
461
- idx_col: Column
462
- if md.indexed_col_tbl_id == str(self.id):
463
- # this is a reference to one of our columns: avoid TVP.get_column_by_id() here, because we're not fully
464
- # initialized yet
465
- idx_col = self.cols_by_id[md.indexed_col_id]
466
- else:
467
- assert self.path.base is not None
468
- idx_col = self.path.base.get_column_by_id(UUID(md.indexed_col_tbl_id), md.indexed_col_id)
462
+ cls = getattr(index, cls_name)
463
+ idx_col = self._lookup_column(QColumnId(UUID(md.indexed_col_tbl_id), md.indexed_col_id))
464
+ assert idx_col is not None
469
465
  idx = cls.from_dict(idx_col, md.init_args)
470
466
 
471
467
  # fix up the sa column type of the index value and undo columns
472
- val_col = self.cols_by_id[md.index_val_col_id]
468
+ # we need to do this for all indices, not just those that are active in this TableVersion, to ensure we get
469
+ # the correct SA schema in the StoreTable.
470
+ val_col = next(col for col in self.cols if col.id == md.index_val_col_id)
473
471
  val_col.sa_col_type = idx.index_sa_type()
474
- val_col._stores_cellmd = False
475
- undo_col = self.cols_by_id[md.index_val_undo_col_id]
472
+ undo_col = next(col for col in self.cols if col.id == md.index_val_undo_col_id)
476
473
  undo_col.sa_col_type = idx.index_sa_type()
474
+ if not isinstance(idx, index.EmbeddingIndex):
475
+ # Historically, the intent has been not to store cellmd data, even for embedding indices. However,
476
+ # the cellmd columns get created anyway, even if stores_cellmd is set to `False` here, due to the
477
+ # timing of index column creation. In order to ensure that SA schemas align with what is actually in
478
+ # the physical tables, we keep this `True` for embedding indices.
479
+ # TODO: Decide whether index columns should store cellmd data.
480
+ # - If not, set to `False`, fix the column creation timing issue, and add a migration script to
481
+ # remedy existing cellmd columns.
482
+ # - If so, remove this TODO.
483
+ val_col._stores_cellmd = False
477
484
  undo_col._stores_cellmd = False
478
- idx_info = self.IndexInfo(id=md.id, name=md.name, idx=idx, col=idx_col, val_col=val_col, undo_col=undo_col)
479
- self.idxs_by_name[md.name] = idx_info
485
+
486
+ # The index is active in this TableVersion provided that:
487
+ # (i) the TableVersion supports indices (either it's not a snapshot, or it's a replica at
488
+ # the head version); and
489
+ # (ii) the index was created on or before the schema version of this TableVersion; and
490
+ # (iii) the index was not dropped on or before the schema version of this TableVersion.
491
+ supports_idxs = self.effective_version is None or (
492
+ self.tbl_md.is_replica and self.effective_version == self.tbl_md.current_version
493
+ )
494
+ if (
495
+ supports_idxs
496
+ and md.schema_version_add <= self.schema_version
497
+ and (md.schema_version_drop is None or md.schema_version_drop > self.schema_version)
498
+ ):
499
+ # Since the index is present in this TableVersion, its associated columns must be as well.
500
+ # Sanity-check this.
501
+ assert md.indexed_col_id in self.cols_by_id
502
+ assert md.index_val_col_id in self.cols_by_id
503
+ assert md.index_val_undo_col_id in self.cols_by_id
504
+ idx_info = self.IndexInfo(
505
+ id=md.id, name=md.name, idx=idx, col=idx_col, val_col=val_col, undo_col=undo_col
506
+ )
507
+ self.idxs_by_name[md.name] = idx_info
508
+
509
+ def _lookup_column(self, id: QColumnId) -> Column | None:
510
+ """
511
+ Look up the column with the given table id and column id, searching through the ancestors of this TableVersion
512
+ to find it. We avoid referencing TableVersionPath in order to work properly with snapshots as well.
513
+
514
+ This will search through *all* known columns, including columns that are not visible in this TableVersion.
515
+ """
516
+ if id.tbl_id == self.id:
517
+ return next(col for col in self.cols if col.id == id.col_id)
518
+ elif self.base is not None:
519
+ return self.base.get()._lookup_column(id)
520
+ else:
521
+ return None
480
522
 
481
523
  def _init_sa_schema(self) -> None:
482
524
  # create the sqlalchemy schema; do this after instantiating columns, in order to determine whether they
@@ -507,9 +549,7 @@ class TableVersion:
507
549
 
508
550
  def add_index(self, col: Column, idx_name: Optional[str], idx: index.IndexBase) -> UpdateStatus:
509
551
  # we're creating a new schema version
510
- self.version += 1
511
- self.created_at = time.time()
512
- self.schema_version = self.version
552
+ self.bump_version(bump_schema_version=True)
513
553
  status = self._add_index(col, idx_name, idx)
514
554
  self._write_md(new_version=True, new_schema_version=True)
515
555
  _logger.info(f'Added index {idx_name} on column {col.name} to table {self.name}')
@@ -620,9 +660,7 @@ class TableVersion:
620
660
  assert idx_id in self._tbl_md.index_md
621
661
 
622
662
  # we're creating a new schema version
623
- self.version += 1
624
- self.created_at = time.time()
625
- self.schema_version = self.version
663
+ self.bump_version(bump_schema_version=True)
626
664
  idx_md = self._tbl_md.index_md[idx_id]
627
665
  idx_md.schema_version_drop = self.schema_version
628
666
  assert idx_md.name in self.idxs_by_name
@@ -651,9 +689,7 @@ class TableVersion:
651
689
  self.next_col_id += 1
652
690
 
653
691
  # we're creating a new schema version
654
- self.version += 1
655
- self.created_at = time.time()
656
- self.schema_version = self.version
692
+ self.bump_version(bump_schema_version=True)
657
693
  index_cols: dict[Column, tuple[index.BtreeIndex, Column, Column]] = {}
658
694
  all_cols: list[Column] = []
659
695
  for col in cols:
@@ -685,7 +721,11 @@ class TableVersion:
685
721
  self, cols: Iterable[Column], print_stats: bool, on_error: Literal['abort', 'ignore']
686
722
  ) -> UpdateStatus:
687
723
  """Add and populate columns within the current transaction"""
724
+ from pixeltable.catalog import Catalog
725
+ from pixeltable.plan import Planner
726
+
688
727
  cols_to_add = list(cols)
728
+
689
729
  row_count = self.store_tbl.count()
690
730
  for col in cols_to_add:
691
731
  assert col.tbl is self
@@ -722,17 +762,19 @@ class TableVersion:
722
762
  continue
723
763
 
724
764
  # populate the column
725
- from pixeltable.plan import Planner
726
-
727
765
  plan = Planner.create_add_column_plan(self.path, col)
728
766
  plan.ctx.num_rows = row_count
729
767
  try:
730
768
  plan.open()
731
769
  try:
732
770
  excs_per_col = self.store_tbl.load_column(col, plan, on_error == 'abort')
733
- except sql.exc.DBAPIError as exc:
734
- # Wrap the DBAPIError in an excs.Error to unify processing in the subsequent except block
735
- raise excs.Error(f'SQL error during execution of computed column `{col.name}`:\n{exc}') from exc
771
+ except sql_exc.DBAPIError as exc:
772
+ Catalog.get().convert_sql_exc(exc, self.id, self.handle, convert_db_excs=True)
773
+ # If it wasn't converted, re-raise as a generic Pixeltable error
774
+ # (this means it's not a known concurrency error; it's something else)
775
+ raise excs.Error(
776
+ f'Unexpected SQL error during execution of computed column {col.name!r}:\n{exc}'
777
+ ) from exc
736
778
  if excs_per_col > 0:
737
779
  cols_with_excs.append(col)
738
780
  num_excs += excs_per_col
@@ -740,7 +782,7 @@ class TableVersion:
740
782
  finally:
741
783
  plan.close()
742
784
 
743
- pxt.catalog.Catalog.get().record_column_dependencies(self)
785
+ Catalog.get().record_column_dependencies(self)
744
786
 
745
787
  if print_stats:
746
788
  plan.ctx.profile.print(num_rows=row_count)
@@ -760,9 +802,7 @@ class TableVersion:
760
802
  assert self.is_mutable
761
803
 
762
804
  # we're creating a new schema version
763
- self.version += 1
764
- self.created_at = time.time()
765
- self.schema_version = self.version
805
+ self.bump_version(bump_schema_version=True)
766
806
 
767
807
  # drop this column and all dependent index columns and indices
768
808
  dropped_cols = [col]
@@ -826,9 +866,7 @@ class TableVersion:
826
866
  self._schema_version_md.columns[col.id].name = new_name
827
867
 
828
868
  # we're creating a new schema version
829
- self.version += 1
830
- self.created_at = time.time()
831
- self.schema_version = self.version
869
+ self.bump_version(bump_schema_version=True)
832
870
 
833
871
  self._write_md(new_version=True, new_schema_version=True)
834
872
  _logger.info(f'Renamed column {old_name} to {new_name} in table {self.name}, new version: {self.version}')
@@ -848,9 +886,7 @@ class TableVersion:
848
886
 
849
887
  def _create_schema_version(self) -> None:
850
888
  # we're creating a new schema version
851
- self.version += 1
852
- self.created_at = time.time()
853
- self.schema_version = self.version
889
+ self.bump_version(bump_schema_version=True)
854
890
  self._write_md(new_version=True, new_schema_version=True)
855
891
  _logger.info(f'[{self.name}] Updating table schema to version: {self.version}')
856
892
 
@@ -897,8 +933,7 @@ class TableVersion:
897
933
  ) -> UpdateStatus:
898
934
  """Insert rows produced by exec_plan and propagate to views"""
899
935
  # we're creating a new version
900
- self.version += 1
901
- self.created_at = timestamp
936
+ self.bump_version(timestamp, bump_schema_version=False)
902
937
  cols_with_excs, row_counts = self.store_tbl.insert_rows(
903
938
  exec_plan, v_min=self.version, rowids=rowids, abort_on_exc=abort_on_exc
904
939
  )
@@ -933,10 +968,11 @@ class TableVersion:
933
968
  cascade: if True, also update all computed columns that transitively depend on the updated columns,
934
969
  including within views.
935
970
  """
936
- assert self.is_mutable
937
-
971
+ from pixeltable.exprs import SqlElementCache
938
972
  from pixeltable.plan import Planner
939
973
 
974
+ assert self.is_mutable
975
+
940
976
  update_spec = self._validate_update_spec(value_spec, allow_pk=False, allow_exprs=True, allow_media=True)
941
977
  if where is not None:
942
978
  if not isinstance(where, exprs.Expr):
@@ -947,7 +983,6 @@ class TableVersion:
947
983
  raise excs.Error(f'Filter {analysis_info.filter} not expressible in SQL')
948
984
 
949
985
  plan, updated_cols, recomputed_cols = Planner.create_update_plan(self.path, update_spec, [], where, cascade)
950
- from pixeltable.exprs import SqlElementCache
951
986
 
952
987
  result = self.propagate_update(
953
988
  plan,
@@ -974,11 +1009,11 @@ class TableVersion:
974
1009
  batch: one dict per row, each mapping Columns to LiteralExprs representing the new values
975
1010
  rowids: if not empty, one tuple per row, each containing the rowid values for the corresponding row in batch
976
1011
  """
1012
+ from pixeltable.plan import Planner
1013
+
977
1014
  # if we do lookups of rowids, we must have one for each row in the batch
978
1015
  assert len(rowids) == 0 or len(rowids) == len(batch)
979
1016
 
980
- from pixeltable.plan import Planner
981
-
982
1017
  plan, row_update_node, delete_where_clause, updated_cols, recomputed_cols = Planner.create_batch_update_plan(
983
1018
  self.path, batch, rowids, cascade=cascade
984
1019
  )
@@ -1050,14 +1085,14 @@ class TableVersion:
1050
1085
  def recompute_columns(
1051
1086
  self, col_names: list[str], where: exprs.Expr | None = None, errors_only: bool = False, cascade: bool = True
1052
1087
  ) -> UpdateStatus:
1088
+ from pixeltable.exprs import CompoundPredicate, SqlElementCache
1089
+ from pixeltable.plan import Planner
1090
+
1053
1091
  assert self.is_mutable
1054
1092
  assert all(name in self.cols_by_name for name in col_names)
1055
1093
  assert len(col_names) > 0
1056
1094
  assert len(col_names) == 1 or not errors_only
1057
1095
 
1058
- from pixeltable.exprs import CompoundPredicate
1059
- from pixeltable.plan import Planner
1060
-
1061
1096
  target_columns = [self.cols_by_name[name] for name in col_names]
1062
1097
  where_clause: Optional[exprs.Expr] = None
1063
1098
  if where is not None:
@@ -1072,7 +1107,6 @@ class TableVersion:
1072
1107
  plan, updated_cols, recomputed_cols = Planner.create_update_plan(
1073
1108
  self.path, update_targets={}, recompute_targets=target_columns, where_clause=where_clause, cascade=cascade
1074
1109
  )
1075
- from pixeltable.exprs import SqlElementCache
1076
1110
 
1077
1111
  result = self.propagate_update(
1078
1112
  plan,
@@ -1096,11 +1130,14 @@ class TableVersion:
1096
1130
  cascade: bool,
1097
1131
  show_progress: bool = True,
1098
1132
  ) -> UpdateStatus:
1133
+ from pixeltable.catalog import Catalog
1134
+ from pixeltable.plan import Planner
1135
+
1136
+ Catalog.get().mark_modified_tvs(self.handle)
1099
1137
  result = UpdateStatus()
1100
1138
  create_new_table_version = plan is not None
1101
1139
  if create_new_table_version:
1102
- self.version += 1
1103
- self.created_at = timestamp
1140
+ self.bump_version(timestamp, bump_schema_version=False)
1104
1141
  cols_with_excs, row_counts = self.store_tbl.insert_rows(
1105
1142
  plan, v_min=self.version, show_progress=show_progress
1106
1143
  )
@@ -1119,8 +1156,6 @@ class TableVersion:
1119
1156
  recomputed_cols = [col for col in recomputed_view_cols if col.tbl.id == view.id]
1120
1157
  plan = None
1121
1158
  if len(recomputed_cols) > 0:
1122
- from pixeltable.plan import Planner
1123
-
1124
1159
  plan = Planner.create_view_update_plan(view.get().path, recompute_targets=recomputed_cols)
1125
1160
  status = view.get().propagate_update(
1126
1161
  plan, None, recomputed_view_cols, base_versions=base_versions, timestamp=timestamp, cascade=True
@@ -1155,6 +1190,10 @@ class TableVersion:
1155
1190
  self, where: Optional[exprs.Expr], base_versions: list[Optional[int]], timestamp: float
1156
1191
  ) -> UpdateStatus:
1157
1192
  """Delete rows in this table and propagate to views"""
1193
+ from pixeltable.catalog import Catalog
1194
+
1195
+ Catalog.get().mark_modified_tvs(self.handle)
1196
+
1158
1197
  # print(f'calling sql_expr()')
1159
1198
  sql_where_clause = where.sql_expr(exprs.SqlElementCache()) if where is not None else None
1160
1199
  # #print(f'sql_where_clause={str(sql_where_clause) if sql_where_clause is not None else None}')
@@ -1171,8 +1210,7 @@ class TableVersion:
1171
1210
  result = UpdateStatus(row_count_stats=row_counts)
1172
1211
  if del_rows > 0:
1173
1212
  # we're creating a new version
1174
- self.version += 1
1175
- self.created_at = timestamp
1213
+ self.bump_version(timestamp, bump_schema_version=False)
1176
1214
  for view in self.mutable_views:
1177
1215
  status = view.get().propagate_delete(
1178
1216
  where=None, base_versions=[self.version, *base_versions], timestamp=timestamp
@@ -1198,6 +1236,8 @@ class TableVersion:
1198
1236
  Doesn't attempt to revert the in-memory metadata, but instead invalidates this TableVersion instance
1199
1237
  and relies on Catalog to reload it
1200
1238
  """
1239
+ from pixeltable.catalog import Catalog
1240
+
1201
1241
  conn = Env.get().conn
1202
1242
  # make sure we don't have a snapshot referencing this version
1203
1243
  # (unclear how to express this with sqlalchemy)
@@ -1217,8 +1257,6 @@ class TableVersion:
1217
1257
  )
1218
1258
  )
1219
1259
 
1220
- # delete newly-added data
1221
- self.delete_media(tbl_version=self.version)
1222
1260
  conn.execute(sql.delete(self.store_tbl.sa_tbl).where(self.store_tbl.sa_tbl.c.v_min == self.version))
1223
1261
 
1224
1262
  # revert new deletions
@@ -1233,6 +1271,8 @@ class TableVersion:
1233
1271
  # revert schema changes:
1234
1272
  # - undo changes to self._tbl_md and write that back
1235
1273
  # - delete newly-added TableVersion/TableSchemaVersion records
1274
+ Catalog.get().mark_modified_tvs(self.handle)
1275
+ old_version = self.version
1236
1276
  if self.version == self.schema_version:
1237
1277
  # physically delete newly-added columns and remove them from the stored md
1238
1278
  added_cols = [col for col in self.cols if col.schema_version_add == self.schema_version]
@@ -1279,18 +1319,22 @@ class TableVersion:
1279
1319
  .where(schema.TableVersion.version == self.version)
1280
1320
  )
1281
1321
 
1282
- self.version -= 1
1322
+ self._tbl_md.current_version = self._version_md.version = self.version - 1
1323
+
1283
1324
  self._write_md(new_version=False, new_schema_version=False)
1284
1325
 
1285
1326
  # propagate to views
1286
- views_str = ', '.join([str(v.id) for v in self.mutable_views])
1287
- print(f'revert(): mutable_views={views_str}')
1288
1327
  for view in self.mutable_views:
1289
1328
  view.get()._revert()
1290
1329
 
1291
1330
  # force reload on next operation
1292
1331
  self.is_validated = False
1293
- pxt.catalog.Catalog.get().remove_tbl_version(self)
1332
+ Catalog.get().remove_tbl_version(self)
1333
+
1334
+ # delete newly-added data
1335
+ # Do this at the end, after all DB operations have completed.
1336
+ # TODO: The transaction could still fail. Really this should be done via PendingTableOps.
1337
+ self.delete_media(tbl_version=old_version)
1294
1338
  _logger.info(f'TableVersion {self.name}: reverted to version {self.version}')
1295
1339
 
1296
1340
  def _init_external_stores(self) -> None:
@@ -1301,9 +1345,7 @@ class TableVersion:
1301
1345
  self.external_stores[store.name] = store
1302
1346
 
1303
1347
  def link_external_store(self, store: pxt.io.ExternalStore) -> None:
1304
- self.version += 1
1305
- self.created_at = time.time()
1306
- self.schema_version = self.version
1348
+ self.bump_version(bump_schema_version=True)
1307
1349
 
1308
1350
  self.external_stores[store.name] = store
1309
1351
  self._tbl_md.external_stores.append(
@@ -1313,9 +1355,7 @@ class TableVersion:
1313
1355
 
1314
1356
  def unlink_external_store(self, store: pxt.io.ExternalStore) -> None:
1315
1357
  del self.external_stores[store.name]
1316
- self.version += 1
1317
- self.created_at = time.time()
1318
- self.schema_version = self.version
1358
+ self.bump_version(bump_schema_version=True)
1319
1359
  idx = next(i for i, store_md in enumerate(self._tbl_md.external_stores) if store_md['md']['name'] == store.name)
1320
1360
  self._tbl_md.external_stores.pop(idx)
1321
1361
  self._write_md(new_version=True, new_schema_version=True)
@@ -1371,35 +1411,52 @@ class TableVersion:
1371
1411
  # if this is a snapshot instance, we need to ignore current_version
1372
1412
  return self._tbl_md.current_version if self.effective_version is None else self.effective_version
1373
1413
 
1374
- @version.setter
1375
- def version(self, version: int) -> None:
1376
- assert self.effective_version is None
1377
- self._tbl_md.current_version = version
1378
- self._version_md.version = version
1379
-
1380
1414
  @property
1381
1415
  def created_at(self) -> float:
1382
1416
  return self._version_md.created_at
1383
1417
 
1384
- @created_at.setter
1385
- def created_at(self, ts: float) -> None:
1386
- assert self.effective_version is None
1387
- self._version_md.created_at = ts
1388
-
1389
1418
  @property
1390
1419
  def schema_version(self) -> int:
1391
1420
  return self._schema_version_md.schema_version
1392
1421
 
1393
- @schema_version.setter
1394
- def schema_version(self, version: int) -> None:
1422
+ def bump_version(self, timestamp: Optional[float] = None, *, bump_schema_version: bool) -> None:
1423
+ """
1424
+ Increments the table version and adjusts all associated metadata. This will *not* trigger a database action;
1425
+ _write_md() must be called separately to persist the changes.
1426
+
1427
+ Args:
1428
+ timestamp: the creation time for the new version. Can be used to synchronize multiple metadata changes
1429
+ to the same timestamp. If `None`, then defaults to `time.time()`.
1430
+ bump_schema_version: if True, also adjusts the schema version (setting it equal to the new version)
1431
+ and associated metadata.
1432
+ """
1433
+ from pixeltable.catalog import Catalog
1434
+
1395
1435
  assert self.effective_version is None
1396
- self._tbl_md.current_schema_version = version
1397
- self._version_md.schema_version = version
1398
- self._schema_version_md.preceding_schema_version = self._schema_version_md.schema_version
1399
- self._schema_version_md.schema_version = version
1436
+
1437
+ if timestamp is None:
1438
+ timestamp = time.time()
1439
+
1440
+ Catalog.get().mark_modified_tvs(self.handle)
1441
+
1442
+ old_version = self._tbl_md.current_version
1443
+ assert self._version_md.version == old_version
1444
+ new_version = old_version + 1
1445
+ self._tbl_md.current_version = new_version
1446
+ self._version_md.version = new_version
1447
+ self._version_md.created_at = timestamp
1448
+
1449
+ if bump_schema_version:
1450
+ old_schema_version = self._tbl_md.current_schema_version
1451
+ assert self._version_md.schema_version == old_schema_version
1452
+ assert self._schema_version_md.schema_version == old_schema_version
1453
+ self._tbl_md.current_schema_version = new_version
1454
+ self._version_md.schema_version = new_version
1455
+ self._schema_version_md.preceding_schema_version = old_schema_version
1456
+ self._schema_version_md.schema_version = new_version
1400
1457
 
1401
1458
  @property
1402
- def preceding_schema_version(self) -> int:
1459
+ def preceding_schema_version(self) -> Optional[int]:
1403
1460
  return self._schema_version_md.preceding_schema_version
1404
1461
 
1405
1462
  @property
@@ -1531,8 +1588,8 @@ class TableVersion:
1531
1588
 
1532
1589
  @classmethod
1533
1590
  def from_dict(cls, d: dict) -> TableVersion:
1534
- from pixeltable import catalog
1591
+ from pixeltable.catalog import Catalog
1535
1592
 
1536
1593
  id = UUID(d['id'])
1537
1594
  effective_version = d['effective_version']
1538
- return catalog.Catalog.get().get_tbl_version(id, effective_version)
1595
+ return Catalog.get().get_tbl_version(id, effective_version)
@@ -37,6 +37,9 @@ class TableVersionHandle:
37
37
  def __hash__(self) -> int:
38
38
  return hash((self.id, self.effective_version))
39
39
 
40
+ def __repr__(self) -> str:
41
+ return f'TableVersionHandle(id={self.id!r}, effective_version={self.effective_version})'
42
+
40
43
  @property
41
44
  def is_snapshot(self) -> bool:
42
45
  return self.effective_version is not None
@@ -81,7 +84,7 @@ class ColumnHandle:
81
84
  if self.col_id not in self.tbl_version.get().cols_by_id:
82
85
  schema_version_drop = self.tbl_version.get()._tbl_md.column_md[self.col_id].schema_version_drop
83
86
  raise excs.Error(
84
- f'Column has been dropped (no record for column ID {self.col_id} in table '
87
+ f'Column was dropped (no record for column ID {self.col_id} in table '
85
88
  f'{self.tbl_version.get().versioned_name!r}; it was dropped in table version {schema_version_drop})'
86
89
  )
87
90
  return self.tbl_version.get().cols_by_id[self.col_id]
@@ -195,17 +195,6 @@ class TableVersionPath:
195
195
  else:
196
196
  return None
197
197
 
198
- def get_column_by_id(self, tbl_id: UUID, col_id: int) -> Optional[Column]:
199
- """Return the column for the given tbl/col id"""
200
- self.refresh_cached_md()
201
- if self.tbl_version.id == tbl_id:
202
- assert col_id in self._cached_tbl_version.cols_by_id
203
- return self._cached_tbl_version.cols_by_id[col_id]
204
- elif self.base is not None:
205
- return self.base.get_column_by_id(tbl_id, col_id)
206
- else:
207
- return None
208
-
209
198
  def has_column(self, col: Column) -> bool:
210
199
  """Return True if this table has the given column."""
211
200
  assert col.tbl is not None
@@ -252,6 +252,12 @@ class View(Table):
252
252
  base=cls._get_snapshot_path(tbl_version_path.base) if tbl_version_path.base is not None else None,
253
253
  )
254
254
 
255
+ def _is_named_pure_snapshot(self) -> bool:
256
+ """
257
+ Returns True if this is a named pure snapshot (i.e., a pure snapshot that is a separate schema object).
258
+ """
259
+ return self._id != self._tbl_version_path.tbl_id
260
+
255
261
  def _is_anonymous_snapshot(self) -> bool:
256
262
  """
257
263
  Returns True if this is an unnamed snapshot (i.e., a snapshot that is not a separate schema object).
pixeltable/config.py CHANGED
@@ -163,6 +163,7 @@ KNOWN_CONFIG_OPTIONS = {
163
163
  'api_key': 'API key for Pixeltable cloud',
164
164
  'r2_profile': 'AWS config profile name used to access R2 storage',
165
165
  's3_profile': 'AWS config profile name used to access S3 storage',
166
+ 'b2_profile': 'S3-compatible profile name used to access Backblaze B2 storage',
166
167
  },
167
168
  'anthropic': {'api_key': 'Anthropic API key'},
168
169
  'bedrock': {'api_key': 'AWS Bedrock API key'},
@@ -181,6 +182,12 @@ KNOWN_CONFIG_OPTIONS = {
181
182
  'api_version': 'API version if using Azure OpenAI',
182
183
  'rate_limits': 'Per-model rate limits for OpenAI API requests',
183
184
  },
185
+ 'openrouter': {
186
+ 'api_key': 'OpenRouter API key',
187
+ 'site_url': 'Optional URL for your application (for OpenRouter analytics)',
188
+ 'app_name': 'Optional name for your application (for OpenRouter analytics)',
189
+ 'rate_limit': 'Rate limit for OpenRouter API requests',
190
+ },
184
191
  'replicate': {'api_token': 'Replicate API token'},
185
192
  'together': {
186
193
  'api_key': 'Together API key',