pixeltable 0.4.17__py3-none-any.whl → 0.4.19__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of pixeltable might be problematic. Click here for more details.

Files changed (153) hide show
  1. pixeltable/__init__.py +1 -1
  2. pixeltable/_version.py +1 -0
  3. pixeltable/catalog/catalog.py +144 -118
  4. pixeltable/catalog/column.py +104 -115
  5. pixeltable/catalog/globals.py +1 -2
  6. pixeltable/catalog/insertable_table.py +44 -49
  7. pixeltable/catalog/path.py +3 -4
  8. pixeltable/catalog/schema_object.py +4 -4
  9. pixeltable/catalog/table.py +139 -124
  10. pixeltable/catalog/table_metadata.py +6 -6
  11. pixeltable/catalog/table_version.py +315 -246
  12. pixeltable/catalog/table_version_handle.py +4 -4
  13. pixeltable/catalog/table_version_path.py +9 -10
  14. pixeltable/catalog/tbl_ops.py +9 -3
  15. pixeltable/catalog/view.py +34 -28
  16. pixeltable/config.py +14 -10
  17. pixeltable/dataframe.py +69 -78
  18. pixeltable/env.py +78 -64
  19. pixeltable/exec/aggregation_node.py +6 -6
  20. pixeltable/exec/cache_prefetch_node.py +10 -10
  21. pixeltable/exec/data_row_batch.py +3 -3
  22. pixeltable/exec/exec_context.py +16 -4
  23. pixeltable/exec/exec_node.py +5 -5
  24. pixeltable/exec/expr_eval/evaluators.py +6 -6
  25. pixeltable/exec/expr_eval/expr_eval_node.py +8 -7
  26. pixeltable/exec/expr_eval/globals.py +6 -6
  27. pixeltable/exec/expr_eval/row_buffer.py +1 -2
  28. pixeltable/exec/expr_eval/schedulers.py +11 -11
  29. pixeltable/exec/in_memory_data_node.py +2 -2
  30. pixeltable/exec/object_store_save_node.py +14 -17
  31. pixeltable/exec/sql_node.py +28 -27
  32. pixeltable/exprs/arithmetic_expr.py +4 -4
  33. pixeltable/exprs/array_slice.py +2 -2
  34. pixeltable/exprs/column_property_ref.py +3 -3
  35. pixeltable/exprs/column_ref.py +61 -74
  36. pixeltable/exprs/comparison.py +5 -5
  37. pixeltable/exprs/compound_predicate.py +3 -3
  38. pixeltable/exprs/data_row.py +12 -12
  39. pixeltable/exprs/expr.py +41 -31
  40. pixeltable/exprs/expr_dict.py +3 -3
  41. pixeltable/exprs/expr_set.py +3 -3
  42. pixeltable/exprs/function_call.py +14 -14
  43. pixeltable/exprs/in_predicate.py +4 -4
  44. pixeltable/exprs/inline_expr.py +8 -8
  45. pixeltable/exprs/is_null.py +1 -3
  46. pixeltable/exprs/json_mapper.py +8 -8
  47. pixeltable/exprs/json_path.py +6 -6
  48. pixeltable/exprs/literal.py +5 -5
  49. pixeltable/exprs/method_ref.py +2 -2
  50. pixeltable/exprs/object_ref.py +2 -2
  51. pixeltable/exprs/row_builder.py +14 -14
  52. pixeltable/exprs/rowid_ref.py +8 -8
  53. pixeltable/exprs/similarity_expr.py +50 -25
  54. pixeltable/exprs/sql_element_cache.py +4 -4
  55. pixeltable/exprs/string_op.py +2 -2
  56. pixeltable/exprs/type_cast.py +3 -5
  57. pixeltable/func/aggregate_function.py +8 -8
  58. pixeltable/func/callable_function.py +9 -9
  59. pixeltable/func/expr_template_function.py +3 -3
  60. pixeltable/func/function.py +15 -17
  61. pixeltable/func/function_registry.py +6 -7
  62. pixeltable/func/globals.py +2 -3
  63. pixeltable/func/mcp.py +2 -2
  64. pixeltable/func/query_template_function.py +16 -16
  65. pixeltable/func/signature.py +14 -14
  66. pixeltable/func/tools.py +11 -11
  67. pixeltable/func/udf.py +16 -18
  68. pixeltable/functions/__init__.py +1 -0
  69. pixeltable/functions/anthropic.py +7 -7
  70. pixeltable/functions/audio.py +76 -0
  71. pixeltable/functions/bedrock.py +6 -6
  72. pixeltable/functions/deepseek.py +4 -4
  73. pixeltable/functions/fireworks.py +2 -2
  74. pixeltable/functions/gemini.py +6 -6
  75. pixeltable/functions/globals.py +12 -12
  76. pixeltable/functions/groq.py +4 -4
  77. pixeltable/functions/huggingface.py +1033 -6
  78. pixeltable/functions/image.py +7 -10
  79. pixeltable/functions/llama_cpp.py +7 -7
  80. pixeltable/functions/math.py +2 -3
  81. pixeltable/functions/mistralai.py +3 -3
  82. pixeltable/functions/ollama.py +9 -9
  83. pixeltable/functions/openai.py +21 -21
  84. pixeltable/functions/openrouter.py +7 -7
  85. pixeltable/functions/string.py +21 -28
  86. pixeltable/functions/timestamp.py +7 -8
  87. pixeltable/functions/together.py +4 -6
  88. pixeltable/functions/twelvelabs.py +92 -0
  89. pixeltable/functions/video.py +36 -31
  90. pixeltable/functions/vision.py +6 -6
  91. pixeltable/functions/whisper.py +7 -7
  92. pixeltable/functions/whisperx.py +16 -16
  93. pixeltable/globals.py +75 -40
  94. pixeltable/index/base.py +12 -8
  95. pixeltable/index/btree.py +19 -22
  96. pixeltable/index/embedding_index.py +30 -39
  97. pixeltable/io/datarows.py +3 -3
  98. pixeltable/io/external_store.py +13 -16
  99. pixeltable/io/fiftyone.py +5 -5
  100. pixeltable/io/globals.py +5 -5
  101. pixeltable/io/hf_datasets.py +4 -4
  102. pixeltable/io/label_studio.py +12 -12
  103. pixeltable/io/pandas.py +6 -6
  104. pixeltable/io/parquet.py +2 -2
  105. pixeltable/io/table_data_conduit.py +12 -12
  106. pixeltable/io/utils.py +2 -2
  107. pixeltable/iterators/audio.py +2 -2
  108. pixeltable/iterators/document.py +88 -57
  109. pixeltable/iterators/video.py +66 -37
  110. pixeltable/metadata/converters/convert_18.py +2 -2
  111. pixeltable/metadata/converters/convert_19.py +2 -2
  112. pixeltable/metadata/converters/convert_20.py +2 -2
  113. pixeltable/metadata/converters/convert_21.py +2 -2
  114. pixeltable/metadata/converters/convert_22.py +2 -2
  115. pixeltable/metadata/converters/convert_24.py +2 -2
  116. pixeltable/metadata/converters/convert_25.py +2 -2
  117. pixeltable/metadata/converters/convert_26.py +2 -2
  118. pixeltable/metadata/converters/convert_29.py +4 -4
  119. pixeltable/metadata/converters/convert_34.py +2 -2
  120. pixeltable/metadata/converters/convert_36.py +2 -2
  121. pixeltable/metadata/converters/convert_38.py +2 -2
  122. pixeltable/metadata/converters/convert_39.py +1 -2
  123. pixeltable/metadata/converters/util.py +11 -13
  124. pixeltable/metadata/schema.py +22 -21
  125. pixeltable/metadata/utils.py +2 -6
  126. pixeltable/mypy/mypy_plugin.py +5 -5
  127. pixeltable/plan.py +32 -34
  128. pixeltable/share/packager.py +7 -7
  129. pixeltable/share/publish.py +3 -3
  130. pixeltable/store.py +126 -41
  131. pixeltable/type_system.py +43 -46
  132. pixeltable/utils/__init__.py +1 -2
  133. pixeltable/utils/arrow.py +4 -4
  134. pixeltable/utils/av.py +74 -38
  135. pixeltable/utils/azure_store.py +305 -0
  136. pixeltable/utils/code.py +1 -2
  137. pixeltable/utils/dbms.py +15 -19
  138. pixeltable/utils/description_helper.py +2 -3
  139. pixeltable/utils/documents.py +5 -6
  140. pixeltable/utils/exception_handler.py +2 -2
  141. pixeltable/utils/filecache.py +5 -5
  142. pixeltable/utils/formatter.py +4 -6
  143. pixeltable/utils/gcs_store.py +9 -9
  144. pixeltable/utils/local_store.py +17 -17
  145. pixeltable/utils/object_stores.py +59 -43
  146. pixeltable/utils/s3_store.py +35 -30
  147. {pixeltable-0.4.17.dist-info → pixeltable-0.4.19.dist-info}/METADATA +4 -4
  148. pixeltable-0.4.19.dist-info/RECORD +213 -0
  149. pixeltable/__version__.py +0 -3
  150. pixeltable-0.4.17.dist-info/RECORD +0 -211
  151. {pixeltable-0.4.17.dist-info → pixeltable-0.4.19.dist-info}/WHEEL +0 -0
  152. {pixeltable-0.4.17.dist-info → pixeltable-0.4.19.dist-info}/entry_points.txt +0 -0
  153. {pixeltable-0.4.17.dist-info → pixeltable-0.4.19.dist-info}/licenses/LICENSE +0 -0
@@ -3,19 +3,21 @@ from __future__ import annotations
3
3
  import copy
4
4
  import dataclasses
5
5
  import importlib
6
+ import itertools
6
7
  import logging
7
8
  import time
8
9
  import uuid
9
- from typing import TYPE_CHECKING, Any, Iterable, Iterator, Literal, Optional, Tuple
10
+ from typing import TYPE_CHECKING, Any, Callable, Iterable, Iterator, Literal
10
11
  from uuid import UUID
11
12
 
12
13
  import jsonschema.exceptions
13
14
  import sqlalchemy as sql
14
15
  from sqlalchemy import exc as sql_exc
15
16
 
16
- import pixeltable as pxt
17
17
  import pixeltable.exceptions as excs
18
- from pixeltable import exprs, index
18
+ import pixeltable.exprs as exprs
19
+ import pixeltable.index as index
20
+ import pixeltable.type_system as ts
19
21
  from pixeltable.env import Env
20
22
  from pixeltable.iterators import ComponentIterator
21
23
  from pixeltable.metadata import schema
@@ -31,8 +33,12 @@ from .update_status import RowCountStats, UpdateStatus
31
33
  if TYPE_CHECKING:
32
34
  from pixeltable import exec, store
33
35
  from pixeltable.catalog.table_version_handle import TableVersionHandle
36
+ from pixeltable.dataframe import DataFrame
37
+ from pixeltable.io import ExternalStore
34
38
  from pixeltable.plan import SampleClause
35
39
 
40
+ from .table_version_path import TableVersionPath
41
+
36
42
  _logger = logging.getLogger('pixeltable')
37
43
 
38
44
 
@@ -77,14 +83,14 @@ class TableVersion:
77
83
  _version_md: schema.TableVersionMd
78
84
  _schema_version_md: schema.TableSchemaVersionMd
79
85
 
80
- effective_version: Optional[int]
81
- path: Optional[pxt.catalog.TableVersionPath] # only set for live tables; needed to resolve computed cols
82
- base: Optional[TableVersionHandle] # only set for views
83
- predicate: Optional[exprs.Expr]
84
- sample_clause: Optional['SampleClause']
86
+ effective_version: int | None
87
+ path: 'TableVersionPath' | None # only set for live tables; needed to resolve computed cols
88
+ base: TableVersionHandle | None # only set for views
89
+ predicate: exprs.Expr | None
90
+ sample_clause: 'SampleClause' | None
85
91
 
86
- iterator_cls: Optional[type[ComponentIterator]]
87
- iterator_args: Optional[exprs.InlineDict]
92
+ iterator_cls: type[ComponentIterator] | None
93
+ iterator_args: exprs.InlineDict | None
88
94
  num_iterator_cols: int
89
95
 
90
96
  # target for data operation propagation (only set for non-snapshots, and only records non-snapshot views)
@@ -96,11 +102,19 @@ class TableVersion:
96
102
  cols_by_name: dict[str, Column]
97
103
  # contains only columns visible in this version, both system and user
98
104
  cols_by_id: dict[int, Column]
99
- # contains only actively maintained indices
105
+
106
+ # True if this TableVersion instance can have indices:
107
+ # - live version of a mutable table
108
+ # - the most recent version of a replica
109
+ supports_idxs: bool
110
+
111
+ # only populated with indices visible in this TableVersion instance
112
+ idxs: dict[int, TableVersion.IndexInfo] # key: index id
100
113
  idxs_by_name: dict[str, TableVersion.IndexInfo]
114
+ idxs_by_col: dict[QColumnId, list[TableVersion.IndexInfo]]
101
115
 
102
- external_stores: dict[str, pxt.io.ExternalStore]
103
- store_tbl: Optional['store.StoreBase']
116
+ external_stores: dict[str, ExternalStore]
117
+ store_tbl: 'store.StoreBase' | None
104
118
 
105
119
  is_initialized: bool # True if init() has been called
106
120
 
@@ -123,11 +137,11 @@ class TableVersion:
123
137
  id: UUID,
124
138
  tbl_md: schema.TableMd,
125
139
  version_md: schema.TableVersionMd,
126
- effective_version: Optional[int],
140
+ effective_version: int | None,
127
141
  schema_version_md: schema.TableSchemaVersionMd,
128
142
  mutable_views: list[TableVersionHandle],
129
- base_path: Optional[pxt.catalog.TableVersionPath] = None,
130
- base: Optional[TableVersionHandle] = None,
143
+ base_path: 'TableVersionPath' | None = None,
144
+ base: TableVersionHandle | None = None,
131
145
  ):
132
146
  self.is_validated = True # a freshly constructed instance is always valid
133
147
  self.is_initialized = False
@@ -180,7 +194,12 @@ class TableVersion:
180
194
  self.cols = []
181
195
  self.cols_by_name = {}
182
196
  self.cols_by_id = {}
197
+ self.idxs = {}
183
198
  self.idxs_by_name = {}
199
+ self.idxs_by_col = {}
200
+ self.supports_idxs = self.effective_version is None or (
201
+ self.is_replica and self.effective_version == self.tbl_md.current_version
202
+ )
184
203
  self.external_stores = {}
185
204
 
186
205
  def __hash__(self) -> int:
@@ -219,19 +238,27 @@ class TableVersion:
219
238
  num_retained_versions: int,
220
239
  comment: str,
221
240
  media_validation: MediaValidation,
222
- view_md: Optional[schema.ViewMd] = None,
241
+ create_default_idxs: bool,
242
+ view_md: schema.ViewMd | None = None,
223
243
  ) -> TableVersionMd:
244
+ from .table_version_handle import TableVersionHandle
245
+
224
246
  user = Env.get().user
225
247
  timestamp = time.time()
226
248
 
249
+ tbl_id = uuid.uuid4()
250
+ tbl_id_str = str(tbl_id)
251
+ tbl_handle = TableVersionHandle(tbl_id, None)
252
+ column_ids = itertools.count(0)
253
+ index_ids = itertools.count(0)
254
+
227
255
  # assign ids, create metadata
228
- cols_by_name: dict[str, Column] = {}
229
256
  column_md: dict[int, schema.ColumnMd] = {}
230
257
  schema_col_md: dict[int, schema.SchemaColumn] = {}
231
258
  for pos, col in enumerate(cols):
232
- col.id = pos
259
+ col.tbl_handle = tbl_handle
260
+ col.id = next(column_ids)
233
261
  col.schema_version_add = 0
234
- cols_by_name[col.name] = col
235
262
  if col.is_computed:
236
263
  col.check_value_expr()
237
264
  col_md, sch_md = col.to_md(pos)
@@ -239,8 +266,39 @@ class TableVersion:
239
266
  column_md[col.id] = col_md
240
267
  schema_col_md[col.id] = sch_md
241
268
 
242
- tbl_id = uuid.uuid4()
243
- tbl_id_str = str(tbl_id)
269
+ index_md: dict[int, schema.IndexMd] = {}
270
+ if create_default_idxs and (view_md is None or not view_md.is_snapshot):
271
+ index_cols: list[Column] = []
272
+ for col in (c for c in cols if cls._is_btree_indexable(c)):
273
+ idx = index.BtreeIndex()
274
+ val_col, undo_col = cls._create_index_columns(col, idx, 0, tbl_handle, id_cb=lambda: next(column_ids))
275
+ index_cols.extend([val_col, undo_col])
276
+
277
+ idx_id = next(index_ids)
278
+ idx_cls = type(idx)
279
+ md = schema.IndexMd(
280
+ id=idx_id,
281
+ name=f'idx{idx_id}',
282
+ indexed_col_id=col.id,
283
+ indexed_col_tbl_id=tbl_id_str,
284
+ index_val_col_id=val_col.id,
285
+ index_val_undo_col_id=undo_col.id,
286
+ schema_version_add=0,
287
+ schema_version_drop=None,
288
+ class_fqn=idx_cls.__module__ + '.' + idx_cls.__name__,
289
+ init_args=idx.as_dict(),
290
+ )
291
+ index_md[idx_id] = md
292
+
293
+ for col in index_cols:
294
+ col_md, _ = col.to_md()
295
+ column_md[col.id] = col_md
296
+
297
+ assert all(column_md[id].id == id for id in column_md)
298
+ assert all(index_md[id].id == id for id in index_md)
299
+
300
+ cols.extend(index_cols)
301
+
244
302
  tbl_md = schema.TableMd(
245
303
  tbl_id=tbl_id_str,
246
304
  name=name,
@@ -248,12 +306,12 @@ class TableVersion:
248
306
  is_replica=False,
249
307
  current_version=0,
250
308
  current_schema_version=0,
251
- next_col_id=len(cols),
252
- next_idx_id=0,
309
+ next_col_id=next(column_ids),
310
+ next_idx_id=next(index_ids),
253
311
  next_row_id=0,
254
312
  view_sn=0,
255
313
  column_md=column_md,
256
- index_md={},
314
+ index_md=index_md,
257
315
  external_stores=[],
258
316
  view_md=view_md,
259
317
  additional_md={},
@@ -281,51 +339,15 @@ class TableVersion:
281
339
  )
282
340
  return TableVersionMd(tbl_md, table_version_md, schema_version_md)
283
341
 
284
- @classmethod
285
- def create(
286
- cls,
287
- dir_id: UUID,
288
- name: str,
289
- cols: list[Column],
290
- num_retained_versions: int,
291
- comment: str,
292
- media_validation: MediaValidation,
293
- ) -> tuple[UUID, Optional[TableVersion]]:
294
- initial_md = cls.create_initial_md(name, cols, num_retained_versions, comment, media_validation, view_md=None)
295
- cat = pxt.catalog.Catalog.get()
296
-
297
- tbl_id = UUID(hex=initial_md.tbl_md.tbl_id)
298
- assert (tbl_id, None) not in cat._tbl_versions
299
- tbl_version = cls(tbl_id, initial_md.tbl_md, initial_md.version_md, None, initial_md.schema_version_md, [])
300
-
301
- @cat.register_undo_action
302
- def _() -> None:
303
- if (tbl_id, None) in cat._tbl_versions:
304
- del cat._tbl_versions[tbl_id, None]
305
-
306
- # TODO: break this up, so that Catalog.create_table() registers tbl_version
307
- cat._tbl_versions[tbl_id, None] = tbl_version
308
- tbl_version.init()
309
- tbl_version.store_tbl.create()
310
- # add default indices, after creating the store table
311
- for col in tbl_version.cols_by_name.values():
312
- status = tbl_version._add_default_index(col)
313
- assert status is None or status.num_excs == 0
314
-
315
- cat.store_tbl_md(
316
- tbl_id=tbl_id,
317
- dir_id=dir_id,
318
- tbl_md=tbl_version.tbl_md,
319
- version_md=initial_md.version_md,
320
- schema_version_md=initial_md.schema_version_md,
321
- )
322
- return tbl_id, tbl_version
323
-
324
342
  def exec_op(self, op: TableOp) -> None:
325
343
  if op.create_store_table_op is not None:
326
- # don't use Catalog.begin_xact() here, to avoid accidental recursive calls to exec_op()
344
+ # this needs to be called outside of a transaction
345
+ self.store_tbl.create()
346
+
347
+ elif op.create_index_op is not None:
348
+ idx_info = self.idxs[op.create_index_op.idx_id]
327
349
  with Env.get().begin_xact():
328
- self.store_tbl.create()
350
+ self.store_tbl.create_index(idx_info.id)
329
351
 
330
352
  elif op.load_view_op is not None:
331
353
  from pixeltable.catalog import Catalog
@@ -344,7 +366,7 @@ class TableVersion:
344
366
 
345
367
  @classmethod
346
368
  def create_replica(cls, md: schema.FullTableMd) -> TableVersion:
347
- from .catalog import TableVersionPath
369
+ from .catalog import Catalog, TableVersionPath
348
370
 
349
371
  assert Env.get().in_xact
350
372
  assert md.tbl_md.is_replica
@@ -363,7 +385,7 @@ class TableVersion:
363
385
  base_path=base_path,
364
386
  base=base,
365
387
  )
366
- cat = pxt.catalog.Catalog.get()
388
+ cat = Catalog.get()
367
389
  # We're creating a new TableVersion replica, so we should never have seen this particular
368
390
  # TableVersion instance before.
369
391
  # Actually this isn't true, because we might be re-creating a dropped replica.
@@ -373,10 +395,9 @@ class TableVersion:
373
395
  cat._tbl_versions[tbl_version.id, tbl_version.effective_version] = tbl_version
374
396
  tbl_version.init()
375
397
  tbl_version.store_tbl.create()
376
- tbl_version.store_tbl.ensure_columns_exist(col for col in tbl_version.cols if col.is_stored)
377
398
  return tbl_version
378
399
 
379
- def delete_media(self, tbl_version: Optional[int] = None) -> None:
400
+ def delete_media(self, tbl_version: int | None = None) -> None:
380
401
  # Assemble a set of column destinations and delete objects from all of them
381
402
  # None is a valid column destination which refers to the default object location
382
403
  destinations = {col.destination for col in self.cols if col.is_stored}
@@ -414,19 +435,21 @@ class TableVersion:
414
435
  self.is_initialized = True
415
436
 
416
437
  def _init_schema(self) -> None:
417
- # create columns first, so the indices can reference them
418
- self._init_cols()
419
- self._init_idxs()
420
-
421
- # create the sa schema only after creating the columns and indices
422
- self._init_sa_schema()
438
+ from pixeltable.store import StoreComponentView, StoreTable, StoreView
423
439
 
424
- # created value_exprs after everything else has been initialized
425
- for col in self.cols_by_id.values():
426
- col.init_value_expr()
440
+ # initialize IndexBase instances and collect sa_col_types
441
+ idxs: dict[int, index.IndexBase] = {}
442
+ val_col_idxs: dict[int, index.IndexBase] = {} # key: id of value column
443
+ undo_col_idxs: dict[int, index.IndexBase] = {} # key: id of undo column
444
+ for md in self.tbl_md.index_md.values():
445
+ cls_name = md.class_fqn.rsplit('.', 1)[-1]
446
+ cls = getattr(index, cls_name)
447
+ idx = cls.from_dict(md.init_args)
448
+ idxs[md.id] = idx
449
+ val_col_idxs[md.index_val_col_id] = idx
450
+ undo_col_idxs[md.index_val_undo_col_id] = idx
427
451
 
428
- def _init_cols(self) -> None:
429
- """Initialize self.cols with the columns visible in our effective version"""
452
+ # initialize Columns
430
453
  self.cols = []
431
454
  self.cols_by_name = {}
432
455
  self.cols_by_id = {}
@@ -434,77 +457,88 @@ class TableVersion:
434
457
  # point backward.
435
458
  sorted_column_md = sorted(self.tbl_md.column_md.values(), key=lambda item: item.id)
436
459
  for col_md in sorted_column_md:
460
+ col_type = ts.ColumnType.from_dict(col_md.col_type)
437
461
  schema_col_md = self.schema_version_md.columns.get(col_md.id)
438
- col = Column.from_md(col_md, self, schema_col_md)
439
- self.cols.append(col)
440
-
441
- # populate the lookup structures before Expr.from_dict()
442
- if col_md.schema_version_add > self.schema_version:
443
- # column was added after this version
444
- continue
445
- if col_md.schema_version_drop is not None and col_md.schema_version_drop <= self.schema_version:
446
- # column was dropped
447
- continue
448
- if col.name is not None:
449
- self.cols_by_name[col.name] = col
450
- self.cols_by_id[col.id] = col
451
-
452
- # # make sure to traverse columns ordered by position = order in which cols were created;
453
- # # this guarantees that references always point backwards
454
- # if not self.is_snapshot and col_md.value_expr is not None:
455
- # self._record_refd_columns(col)
462
+ media_val = (
463
+ MediaValidation[schema_col_md.media_validation.upper()]
464
+ if schema_col_md is not None and schema_col_md.media_validation is not None
465
+ else None
466
+ )
456
467
 
457
- def _init_idxs(self) -> None:
458
- for md in self.tbl_md.index_md.values():
459
- # Instantiate index object. This needs to be done for all indices, even those that are not active in this
460
- # TableVersion, so that we can make appropriate adjustments to the SA schema.
461
- cls_name = md.class_fqn.rsplit('.', 1)[-1]
462
- cls = getattr(index, cls_name)
463
- idx_col = self._lookup_column(QColumnId(UUID(md.indexed_col_tbl_id), md.indexed_col_id))
464
- assert idx_col is not None
465
- idx = cls.from_dict(idx_col, md.init_args)
466
-
467
- # fix up the sa column type of the index value and undo columns
468
- # we need to do this for all indices, not just those that are active in this TableVersion, to ensure we get
469
- # the correct SA schema in the StoreTable.
470
- val_col = next(col for col in self.cols if col.id == md.index_val_col_id)
471
- val_col.sa_col_type = idx.index_sa_type()
472
- undo_col = next(col for col in self.cols if col.id == md.index_val_undo_col_id)
473
- undo_col.sa_col_type = idx.index_sa_type()
474
- if not isinstance(idx, index.EmbeddingIndex):
475
- # Historically, the intent has been not to store cellmd data, even for embedding indices. However,
476
- # the cellmd columns get created anyway, even if stores_cellmd is set to `False` here, due to the
477
- # timing of index column creation. In order to ensure that SA schemas align with what is actually in
478
- # the physical tables, we keep this `True` for embedding indices.
479
- # TODO: Decide whether index columns should store cellmd data.
480
- # - If not, set to `False`, fix the column creation timing issue, and add a migration script to
481
- # remedy existing cellmd columns.
482
- # - If so, remove this TODO.
483
- val_col._stores_cellmd = False
484
- undo_col._stores_cellmd = False
485
-
486
- # The index is active in this TableVersion provided that:
487
- # (i) the TableVersion supports indices (either it's not a snapshot, or it's a replica at
488
- # the head version); and
489
- # (ii) the index was created on or before the schema version of this TableVersion; and
490
- # (iii) the index was not dropped on or before the schema version of this TableVersion.
491
- supports_idxs = self.effective_version is None or (
492
- self.tbl_md.is_replica and self.effective_version == self.tbl_md.current_version
468
+ stores_cellmd: bool | None = None # None: determined by the column properties (in the Column c'tor)
469
+ sa_col_type: sql.types.TypeEngine | None = None
470
+ if col_md.id in val_col_idxs:
471
+ idx = val_col_idxs[col_md.id]
472
+ # for index value columns, the index gets to override the default
473
+ stores_cellmd = idx.records_value_errors()
474
+ sa_col_type = idx.get_index_sa_type(col_type)
475
+ elif col_md.id in undo_col_idxs:
476
+ idx = undo_col_idxs[col_md.id]
477
+ # for index undo columns, we never store cellmd
478
+ stores_cellmd = False
479
+ sa_col_type = idx.get_index_sa_type(col_type)
480
+
481
+ col = Column(
482
+ col_id=col_md.id,
483
+ name=schema_col_md.name if schema_col_md is not None else None,
484
+ col_type=col_type,
485
+ is_pk=col_md.is_pk,
486
+ is_iterator_col=self.is_component_view and col_md.id < self.num_iterator_cols + 1,
487
+ stored=col_md.stored,
488
+ media_validation=media_val,
489
+ sa_col_type=sa_col_type,
490
+ schema_version_add=col_md.schema_version_add,
491
+ schema_version_drop=col_md.schema_version_drop,
492
+ stores_cellmd=stores_cellmd,
493
+ value_expr_dict=col_md.value_expr,
494
+ tbl_handle=self.handle,
495
+ destination=col_md.destination,
493
496
  )
494
- if (
495
- supports_idxs
496
- and md.schema_version_add <= self.schema_version
497
- and (md.schema_version_drop is None or md.schema_version_drop > self.schema_version)
497
+
498
+ self.cols.append(col)
499
+ # populate lookup structures before Expr.from_dict()
500
+ if col_md.schema_version_add <= self.schema_version and (
501
+ col_md.schema_version_drop is None or col_md.schema_version_drop > self.schema_version
498
502
  ):
499
- # Since the index is present in this TableVersion, its associated columns must be as well.
500
- # Sanity-check this.
501
- assert md.indexed_col_id in self.cols_by_id
502
- assert md.index_val_col_id in self.cols_by_id
503
- assert md.index_val_undo_col_id in self.cols_by_id
504
- idx_info = self.IndexInfo(
505
- id=md.id, name=md.name, idx=idx, col=idx_col, val_col=val_col, undo_col=undo_col
503
+ if col.name is not None:
504
+ self.cols_by_name[col.name] = col
505
+ self.cols_by_id[col.id] = col
506
+
507
+ if self.supports_idxs:
508
+ # create IndexInfo for indices visible in current_version
509
+ visible_idxs = [
510
+ md
511
+ for md in self.tbl_md.index_md.values()
512
+ if md.schema_version_add <= self.schema_version
513
+ and (md.schema_version_drop is None or md.schema_version_drop > self.schema_version)
514
+ ]
515
+ for md in visible_idxs:
516
+ idx = idxs[md.id]
517
+ indexed_col_id = QColumnId(UUID(md.indexed_col_tbl_id), md.indexed_col_id)
518
+ idx_col = self._lookup_column(indexed_col_id)
519
+ info = self.IndexInfo(
520
+ id=md.id,
521
+ name=md.name,
522
+ idx=idx,
523
+ col=idx_col,
524
+ val_col=self.cols_by_id[md.index_val_col_id],
525
+ undo_col=self.cols_by_id[md.index_val_undo_col_id],
506
526
  )
507
- self.idxs_by_name[md.name] = idx_info
527
+ self.idxs[md.id] = info
528
+ self.idxs_by_name[md.name] = info
529
+ self.idxs_by_col.setdefault(indexed_col_id, []).append(info)
530
+
531
+ # create value exprs, now that we have all lookup structures in place
532
+ for col in self.cols_by_id.values():
533
+ col.init_value_expr()
534
+
535
+ # create the sqlalchemy schema, after instantiating all Columns
536
+ if self.is_component_view:
537
+ self.store_tbl = StoreComponentView(self)
538
+ elif self.is_view:
539
+ self.store_tbl = StoreView(self)
540
+ else:
541
+ self.store_tbl = StoreTable(self)
508
542
 
509
543
  def _lookup_column(self, id: QColumnId) -> Column | None:
510
544
  """
@@ -547,7 +581,7 @@ class TableVersion:
547
581
  """Return name of index in the store, which needs to be globally unique"""
548
582
  return f'idx_{self.id.hex}_{idx_id}'
549
583
 
550
- def add_index(self, col: Column, idx_name: Optional[str], idx: index.IndexBase) -> UpdateStatus:
584
+ def add_index(self, col: Column, idx_name: str | None, idx: index.IndexBase) -> UpdateStatus:
551
585
  # we're creating a new schema version
552
586
  self.bump_version(bump_schema_version=True)
553
587
  status = self._add_index(col, idx_name, idx)
@@ -555,12 +589,13 @@ class TableVersion:
555
589
  _logger.info(f'Added index {idx_name} on column {col.name} to table {self.name}')
556
590
  return status
557
591
 
558
- def _is_btree_indexable(self, col: Column) -> bool:
592
+ @classmethod
593
+ def _is_btree_indexable(cls, col: Column) -> bool:
559
594
  if not col.stored:
560
595
  # if the column is intentionally not stored, we want to avoid the overhead of an index
561
596
  return False
562
597
  # Skip index for stored media columns produced by an iterator
563
- if col.col_type.is_media_type() and self.is_iterator_column(col):
598
+ if col.col_type.is_media_type() and col.is_iterator_col:
564
599
  return False
565
600
  if not col.col_type.is_scalar_type() and not (col.col_type.is_media_type() and not col.is_computed):
566
601
  # wrong type for a B-tree
@@ -570,53 +605,58 @@ class TableVersion:
570
605
  return False
571
606
  return True
572
607
 
573
- def _add_default_index(self, col: Column) -> Optional[UpdateStatus]:
608
+ def _add_default_index(self, col: Column) -> UpdateStatus | None:
574
609
  """Add a B-tree index on this column if it has a compatible type"""
575
610
  if not self._is_btree_indexable(col):
576
611
  return None
577
- status = self._add_index(col, idx_name=None, idx=index.BtreeIndex(col))
612
+ status = self._add_index(col, idx_name=None, idx=index.BtreeIndex())
578
613
  return status
579
614
 
580
- def _create_index_columns(self, idx: index.IndexBase) -> Tuple[Column, Column]:
615
+ @classmethod
616
+ def _create_index_columns(
617
+ cls,
618
+ col: Column,
619
+ idx: index.IndexBase,
620
+ schema_version: int,
621
+ tbl_handle: TableVersionHandle,
622
+ id_cb: Callable[[], int],
623
+ ) -> tuple[Column, Column]:
581
624
  """Create value and undo columns for the given index.
582
625
  Args:
583
626
  idx: index for which columns will be created.
584
627
  Returns:
585
- A tuple containing the value column and the undo column.
628
+ A tuple containing the value column and the undo column, both of which are nullable.
586
629
  """
587
- assert not self.is_snapshot
588
- # add the index value and undo columns (which need to be nullable)
630
+ value_expr = idx.create_value_expr(col)
589
631
  val_col = Column(
590
- col_id=self.next_col_id,
632
+ col_id=id_cb(),
591
633
  name=None,
592
- computed_with=idx.index_value_expr(),
593
- sa_col_type=idx.index_sa_type(),
634
+ computed_with=value_expr,
635
+ sa_col_type=idx.get_index_sa_type(value_expr.col_type),
594
636
  stored=True,
595
- schema_version_add=self.schema_version,
596
- schema_version_drop=None,
597
637
  stores_cellmd=idx.records_value_errors(),
638
+ schema_version_add=schema_version,
639
+ schema_version_drop=None,
598
640
  )
599
- val_col.tbl = self
600
641
  val_col.col_type = val_col.col_type.copy(nullable=True)
601
- self.next_col_id += 1
642
+ val_col.tbl_handle = tbl_handle
602
643
 
603
644
  undo_col = Column(
604
- col_id=self.next_col_id,
645
+ col_id=id_cb(),
605
646
  name=None,
606
647
  col_type=val_col.col_type,
607
648
  sa_col_type=val_col.sa_col_type,
608
649
  stored=True,
609
- schema_version_add=self.schema_version,
610
- schema_version_drop=None,
611
650
  stores_cellmd=False,
651
+ schema_version_add=schema_version,
652
+ schema_version_drop=None,
612
653
  )
613
- undo_col.tbl = self
614
654
  undo_col.col_type = undo_col.col_type.copy(nullable=True)
615
- self.next_col_id += 1
655
+ undo_col.tbl_handle = tbl_handle
616
656
  return val_col, undo_col
617
657
 
618
658
  def _create_index(
619
- self, col: Column, val_col: Column, undo_col: Column, idx_name: Optional[str], idx: index.IndexBase
659
+ self, col: Column, val_col: Column, undo_col: Column, idx_name: str | None, idx: index.IndexBase
620
660
  ) -> None:
621
661
  """Create the given index along with index md"""
622
662
  idx_id = self.next_idx_id
@@ -632,7 +672,7 @@ class TableVersion:
632
672
  id=idx_id,
633
673
  name=idx_name,
634
674
  indexed_col_id=col.id,
635
- indexed_col_tbl_id=str(col.tbl.id),
675
+ indexed_col_tbl_id=str(col.get_tbl().id),
636
676
  index_val_col_id=val_col.id,
637
677
  index_val_undo_col_id=undo_col.id,
638
678
  schema_version_add=self.schema_version,
@@ -642,17 +682,21 @@ class TableVersion:
642
682
  )
643
683
  idx_info = self.IndexInfo(id=idx_id, name=idx_name, idx=idx, col=col, val_col=val_col, undo_col=undo_col)
644
684
  self._tbl_md.index_md[idx_id] = idx_md
685
+ self.idxs[idx_id] = idx_info
645
686
  self.idxs_by_name[idx_name] = idx_info
646
- idx.create_index(self._store_idx_name(idx_id), val_col)
687
+ self.idxs_by_col.setdefault(col.qid, []).append(idx_info)
688
+ self.store_tbl.create_index(idx_id)
647
689
 
648
- def _add_index(self, col: Column, idx_name: Optional[str], idx: index.IndexBase) -> UpdateStatus:
649
- val_col, undo_vol = self._create_index_columns(idx)
690
+ def _add_index(self, col: Column, idx_name: str | None, idx: index.IndexBase) -> UpdateStatus:
691
+ val_col, undo_col = self._create_index_columns(
692
+ col, idx, self.schema_version, self.handle, id_cb=self.next_col_id
693
+ )
650
694
  # add the columns and update the metadata
651
695
  # TODO support on_error='abort' for indices; it's tricky because of the way metadata changes are entangled
652
696
  # with the database operations
653
- status = self._add_columns([val_col, undo_vol], print_stats=False, on_error='ignore')
697
+ status = self._add_columns([val_col, undo_col], print_stats=False, on_error='ignore')
654
698
  # now create the index structure
655
- self._create_index(col, val_col, undo_vol, idx_name, idx)
699
+ self._create_index(col, val_col, undo_col, idx_name, idx)
656
700
  return status
657
701
 
658
702
  def drop_index(self, idx_id: int) -> None:
@@ -668,7 +712,10 @@ class TableVersion:
668
712
  # remove this index entry from the active indexes (in memory)
669
713
  # and the index metadata (in persistent table metadata)
670
714
  # TODO: this is wrong, it breaks revert()
715
+ del self.idxs[idx_id]
671
716
  del self.idxs_by_name[idx_md.name]
717
+ if idx_info.col.qid in self.idxs_by_col:
718
+ self.idxs_by_col[idx_info.col.qid].remove(idx_info)
672
719
  del self._tbl_md.index_md[idx_id]
673
720
 
674
721
  self._drop_columns([idx_info.val_col, idx_info.undo_col])
@@ -684,9 +731,8 @@ class TableVersion:
684
731
  assert all(col.stored is not None for col in cols)
685
732
  assert all(col.name not in self.cols_by_name for col in cols if col.name is not None)
686
733
  for col in cols:
687
- col.tbl = self
688
- col.id = self.next_col_id
689
- self.next_col_id += 1
734
+ col.tbl_handle = self.handle
735
+ col.id = self.next_col_id()
690
736
 
691
737
  # we're creating a new schema version
692
738
  self.bump_version(bump_schema_version=True)
@@ -695,8 +741,10 @@ class TableVersion:
695
741
  for col in cols:
696
742
  all_cols.append(col)
697
743
  if col.name is not None and self._is_btree_indexable(col):
698
- idx = index.BtreeIndex(col)
699
- val_col, undo_col = self._create_index_columns(idx)
744
+ idx = index.BtreeIndex()
745
+ val_col, undo_col = self._create_index_columns(
746
+ col, idx, self.schema_version, self.handle, id_cb=self.next_col_id
747
+ )
700
748
  index_cols[col] = (idx, val_col, undo_col)
701
749
  all_cols.append(val_col)
702
750
  all_cols.append(undo_col)
@@ -728,7 +776,7 @@ class TableVersion:
728
776
 
729
777
  row_count = self.store_tbl.count()
730
778
  for col in cols_to_add:
731
- assert col.tbl is self
779
+ assert col.tbl_handle.id == self.id
732
780
  if not col.col_type.nullable and not col.is_computed and row_count > 0:
733
781
  raise excs.Error(
734
782
  f'Cannot add non-nullable column {col.name!r} to table {self.name!r} with existing rows'
@@ -738,7 +786,7 @@ class TableVersion:
738
786
  num_excs = 0
739
787
  cols_with_excs: list[Column] = []
740
788
  for col in cols_to_add:
741
- assert col.id is not None, 'Column id must be set before adding the column'
789
+ assert col.id is not None
742
790
  excs_per_col = 0
743
791
  col.schema_version_add = self.schema_version
744
792
  # add the column to the lookup structures now, rather than after the store changes executed successfully,
@@ -792,7 +840,7 @@ class TableVersion:
792
840
  upd_rows=row_count, num_excs=num_excs, computed_values=computed_values
793
841
  ) # add_columns
794
842
  return UpdateStatus(
795
- cols_with_excs=[f'{col.tbl.name}.{col.name}' for col in cols_with_excs if col.name is not None],
843
+ cols_with_excs=[f'{col.get_tbl().name}.{col.name}' for col in cols_with_excs if col.name is not None],
796
844
  row_count_stats=row_counts,
797
845
  )
798
846
 
@@ -806,7 +854,7 @@ class TableVersion:
806
854
 
807
855
  # drop this column and all dependent index columns and indices
808
856
  dropped_cols = [col]
809
- dropped_idx_names: list[str] = []
857
+ dropped_idx_info: list[TableVersion.IndexInfo] = []
810
858
  for idx_info in self.idxs_by_name.values():
811
859
  if idx_info.col != col:
812
860
  continue
@@ -814,11 +862,14 @@ class TableVersion:
814
862
  idx_md = self._tbl_md.index_md[idx_info.id]
815
863
  idx_md.schema_version_drop = self.schema_version
816
864
  assert idx_md.name in self.idxs_by_name
817
- dropped_idx_names.append(idx_md.name)
865
+ dropped_idx_info.append(idx_info)
818
866
 
819
- # update idxs_by_name
820
- for idx_name in dropped_idx_names:
821
- del self.idxs_by_name[idx_name]
867
+ # update index lookup structures
868
+ for info in dropped_idx_info:
869
+ del self.idxs[info.id]
870
+ del self.idxs_by_name[info.name]
871
+ if col.qid in self.idxs_by_col:
872
+ del self.idxs_by_col[col.qid]
822
873
 
823
874
  self._drop_columns(dropped_cols)
824
875
  self._write_md(new_version=True, new_schema_version=True)
@@ -826,6 +877,8 @@ class TableVersion:
826
877
 
827
878
  def _drop_columns(self, cols: Iterable[Column]) -> None:
828
879
  """Mark columns as dropped"""
880
+ from pixeltable.catalog import Catalog
881
+
829
882
  assert self.is_mutable
830
883
 
831
884
  for col in cols:
@@ -845,7 +898,7 @@ class TableVersion:
845
898
  schema_col.pos = pos
846
899
 
847
900
  self.store_tbl.create_sa_tbl()
848
- pxt.catalog.Catalog.get().record_column_dependencies(self)
901
+ Catalog.get().record_column_dependencies(self)
849
902
 
850
903
  def rename_column(self, old_name: str, new_name: str) -> None:
851
904
  """Rename a column."""
@@ -854,12 +907,12 @@ class TableVersion:
854
907
  col = self.path.get_column(old_name)
855
908
  if col is None:
856
909
  raise excs.Error(f'Unknown column: {old_name}')
857
- if col.tbl.id != self.id:
910
+ if col.get_tbl().id != self.id:
858
911
  raise excs.Error(f'Cannot rename base table column {col.name!r}')
859
912
  if not is_valid_identifier(new_name):
860
- raise excs.Error(f"Invalid column name: '{new_name}'")
913
+ raise excs.Error(f'Invalid column name: {new_name}')
861
914
  if new_name in self.cols_by_name:
862
- raise excs.Error(f'Column {new_name} already exists')
915
+ raise excs.Error(f'Column {new_name!r} already exists')
863
916
  del self.cols_by_name[old_name]
864
917
  col.name = new_name
865
918
  self.cols_by_name[new_name] = col
@@ -871,7 +924,7 @@ class TableVersion:
871
924
  self._write_md(new_version=True, new_schema_version=True)
872
925
  _logger.info(f'Renamed column {old_name} to {new_name} in table {self.name}, new version: {self.version}')
873
926
 
874
- def set_comment(self, new_comment: Optional[str]) -> None:
927
+ def set_comment(self, new_comment: str | None) -> None:
875
928
  _logger.info(f'[{self.name}] Updating comment: {new_comment}')
876
929
  self.comment = new_comment
877
930
  self._create_schema_version()
@@ -892,8 +945,8 @@ class TableVersion:
892
945
 
893
946
  def insert(
894
947
  self,
895
- rows: Optional[list[dict[str, Any]]],
896
- df: Optional[pxt.DataFrame],
948
+ rows: list[dict[str, Any]] | None,
949
+ df: DataFrame | None,
897
950
  print_stats: bool = False,
898
951
  fail_on_exception: bool = True,
899
952
  ) -> UpdateStatus:
@@ -927,7 +980,7 @@ class TableVersion:
927
980
  exec_plan: 'exec.ExecNode',
928
981
  timestamp: float,
929
982
  *,
930
- rowids: Optional[Iterator[int]] = None,
983
+ rowids: Iterator[int] | None = None,
931
984
  print_stats: bool = False,
932
985
  abort_on_exc: bool = False,
933
986
  ) -> UpdateStatus:
@@ -958,9 +1011,7 @@ class TableVersion:
958
1011
  _logger.info(f'TableVersion {self.name}: new version {self.version}')
959
1012
  return result
960
1013
 
961
- def update(
962
- self, value_spec: dict[str, Any], where: Optional[exprs.Expr] = None, cascade: bool = True
963
- ) -> UpdateStatus:
1014
+ def update(self, value_spec: dict[str, Any], where: exprs.Expr | None = None, cascade: bool = True) -> UpdateStatus:
964
1015
  """Update rows in this TableVersionPath.
965
1016
  Args:
966
1017
  value_spec: a list of (column, value) pairs specifying the columns to update and their new values.
@@ -976,11 +1027,11 @@ class TableVersion:
976
1027
  update_spec = self._validate_update_spec(value_spec, allow_pk=False, allow_exprs=True, allow_media=True)
977
1028
  if where is not None:
978
1029
  if not isinstance(where, exprs.Expr):
979
- raise excs.Error(f"'where' argument must be a predicate, got {type(where)}")
1030
+ raise excs.Error(f'`where` argument must be a valid Pixeltable expression; got `{type(where)}`')
980
1031
  analysis_info = Planner.analyze(self.path, where)
981
1032
  # for now we require that the updated rows can be identified via SQL, rather than via a Python filter
982
1033
  if analysis_info.filter is not None:
983
- raise excs.Error(f'Filter {analysis_info.filter} not expressible in SQL')
1034
+ raise excs.Error(f'Filter not expressible in SQL: {analysis_info.filter}')
984
1035
 
985
1036
  plan, updated_cols, recomputed_cols = Planner.create_update_plan(self.path, update_spec, [], where, cascade)
986
1037
 
@@ -1037,7 +1088,7 @@ class TableVersion:
1037
1088
  update_targets: dict[Column, exprs.Expr] = {}
1038
1089
  for col_name, val in value_spec.items():
1039
1090
  if not isinstance(col_name, str):
1040
- raise excs.Error(f'Update specification: dict key must be column name, got {col_name!r}')
1091
+ raise excs.Error(f'Update specification: dict key must be column name; got {col_name!r}')
1041
1092
  if col_name == _ROWID_COLUMN_NAME:
1042
1093
  # a valid rowid is a list of ints, one per rowid column
1043
1094
  assert len(val) == len(self.store_tbl.rowid_columns())
@@ -1046,15 +1097,15 @@ class TableVersion:
1046
1097
  continue
1047
1098
  col = self.path.get_column(col_name)
1048
1099
  if col is None:
1049
- raise excs.Error(f'Column {col_name} unknown')
1050
- if col.tbl.id != self.id:
1100
+ raise excs.Error(f'Unknown column: {col_name}')
1101
+ if col.get_tbl().id != self.id:
1051
1102
  raise excs.Error(f'Column {col.name!r} is a base table column and cannot be updated')
1052
1103
  if col.is_computed:
1053
- raise excs.Error(f'Column {col_name} is computed and cannot be updated')
1104
+ raise excs.Error(f'Column {col_name!r} is computed and cannot be updated')
1054
1105
  if col.is_pk and not allow_pk:
1055
- raise excs.Error(f'Column {col_name} is a primary key column and cannot be updated')
1106
+ raise excs.Error(f'Column {col_name!r} is a primary key column and cannot be updated')
1056
1107
  if col.col_type.is_media_type() and not allow_media:
1057
- raise excs.Error(f'Column {col_name} is a media column and cannot be updated')
1108
+ raise excs.Error(f'Column {col_name!r} is a media column and cannot be updated')
1058
1109
 
1059
1110
  # make sure that the value is compatible with the column type
1060
1111
  value_expr: exprs.Expr
@@ -1064,19 +1115,19 @@ class TableVersion:
1064
1115
  except (TypeError, jsonschema.exceptions.ValidationError) as exc:
1065
1116
  if not allow_exprs:
1066
1117
  raise excs.Error(
1067
- f'Column {col_name}: value {val!r} is not a valid literal for this column '
1068
- f'(expected {col.col_type})'
1118
+ f'Column {col_name!r}: value is not a valid literal for this column '
1119
+ f'(expected `{col.col_type}`): {val!r}'
1069
1120
  ) from exc
1070
1121
  # it's not a literal, let's try to create an expr from it
1071
1122
  value_expr = exprs.Expr.from_object(val)
1072
1123
  if value_expr is None:
1073
1124
  raise excs.Error(
1074
- f'Column {col_name}: value {val!r} is not a recognized literal or expression'
1125
+ f'Column {col_name!r}: value is not a recognized literal or expression: {val!r}'
1075
1126
  ) from exc
1076
1127
  if not col.col_type.is_supertype_of(value_expr.col_type, ignore_nullable=True):
1077
1128
  raise excs.Error(
1078
- f'Type of value {val!r} ({value_expr.col_type}) is not compatible with the type of column '
1079
- f'{col_name} ({col.col_type})'
1129
+ f'Type `{value_expr.col_type}` of value {val!r} is not compatible with the type '
1130
+ f'`{col.col_type}` of column {col_name!r}'
1080
1131
  ) from exc
1081
1132
  update_targets[col] = value_expr
1082
1133
 
@@ -1094,9 +1145,9 @@ class TableVersion:
1094
1145
  assert len(col_names) == 1 or not errors_only
1095
1146
 
1096
1147
  target_columns = [self.cols_by_name[name] for name in col_names]
1097
- where_clause: Optional[exprs.Expr] = None
1148
+ where_clause: exprs.Expr | None = None
1098
1149
  if where is not None:
1099
- self._validate_where_clause(where, error_prefix="'where' argument")
1150
+ self._validate_where_clause(where, error_prefix='`where` argument')
1100
1151
  where_clause = where
1101
1152
  if errors_only:
1102
1153
  errortype_pred = (
@@ -1122,10 +1173,10 @@ class TableVersion:
1122
1173
 
1123
1174
  def propagate_update(
1124
1175
  self,
1125
- plan: Optional[exec.ExecNode],
1126
- where_clause: Optional[sql.ColumnElement],
1176
+ plan: exec.ExecNode | None,
1177
+ where_clause: sql.ColumnElement | None,
1127
1178
  recomputed_view_cols: list[Column],
1128
- base_versions: list[Optional[int]],
1179
+ base_versions: list[int | None],
1129
1180
  timestamp: float,
1130
1181
  cascade: bool,
1131
1182
  show_progress: bool = True,
@@ -1153,7 +1204,7 @@ class TableVersion:
1153
1204
  base_versions = [None if plan is None else self.version, *base_versions] # don't update in place
1154
1205
  # propagate to views
1155
1206
  for view in self.mutable_views:
1156
- recomputed_cols = [col for col in recomputed_view_cols if col.tbl.id == view.id]
1207
+ recomputed_cols = [col for col in recomputed_view_cols if col.get_tbl().id == view.id]
1157
1208
  plan = None
1158
1209
  if len(recomputed_cols) > 0:
1159
1210
  plan = Planner.create_view_update_plan(view.get().path, recompute_targets=recomputed_cols)
@@ -1173,21 +1224,21 @@ class TableVersion:
1173
1224
  from pixeltable.plan import Planner
1174
1225
 
1175
1226
  if not isinstance(pred, Expr):
1176
- raise excs.Error(f'{error_prefix} must be a predicate, got {type(pred)}')
1227
+ raise excs.Error(f'{error_prefix} must be a valid Pixeltable expression; got `{type(pred)}`')
1177
1228
  analysis_info = Planner.analyze(self.path, pred)
1178
1229
  # for now we require that the updated rows can be identified via SQL, rather than via a Python filter
1179
1230
  if analysis_info.filter is not None:
1180
- raise excs.Error(f'Filter {analysis_info.filter} not expressible in SQL')
1231
+ raise excs.Error(f'Filter not expressible in SQL: {analysis_info.filter}')
1181
1232
 
1182
1233
  def delete(self, where: exprs.Expr | None = None) -> UpdateStatus:
1183
1234
  assert self.is_insertable
1184
1235
  if where is not None:
1185
- self._validate_where_clause(where, error_prefix="'where' argument")
1236
+ self._validate_where_clause(where, error_prefix='`where` argument')
1186
1237
  status = self.propagate_delete(where, base_versions=[], timestamp=time.time())
1187
1238
  return status
1188
1239
 
1189
1240
  def propagate_delete(
1190
- self, where: Optional[exprs.Expr], base_versions: list[Optional[int]], timestamp: float
1241
+ self, where: exprs.Expr | None, base_versions: list[int | None], timestamp: float
1191
1242
  ) -> UpdateStatus:
1192
1243
  """Delete rows in this table and propagate to views"""
1193
1244
  from pixeltable.catalog import Catalog
@@ -1252,7 +1303,7 @@ class TableVersion:
1252
1303
  names = [row[1] for row in result]
1253
1304
  raise excs.Error(
1254
1305
  (
1255
- f'Current version is needed for {len(result)} snapshot{"s" if len(result) > 1 else ""} '
1306
+ f'Current version is needed for {len(result)} snapshot{"s" if len(result) > 1 else ""}: '
1256
1307
  f'({", ".join(names)})'
1257
1308
  )
1258
1309
  )
@@ -1261,7 +1312,7 @@ class TableVersion:
1261
1312
 
1262
1313
  # revert new deletions
1263
1314
  set_clause: dict[sql.Column, Any] = {self.store_tbl.sa_tbl.c.v_max: schema.Table.MAX_VERSION}
1264
- for index_info in self.idxs_by_name.values():
1315
+ for index_info in self.idxs.values():
1265
1316
  # copy the index value back from the undo column and reset the undo column to NULL
1266
1317
  set_clause[index_info.val_col.sa_col] = index_info.undo_col.sa_col
1267
1318
  set_clause[index_info.undo_col.sa_col] = None
@@ -1335,16 +1386,18 @@ class TableVersion:
1335
1386
  # Do this at the end, after all DB operations have completed.
1336
1387
  # TODO: The transaction could still fail. Really this should be done via PendingTableOps.
1337
1388
  self.delete_media(tbl_version=old_version)
1338
- _logger.info(f'TableVersion {self.name}: reverted to version {self.version}')
1389
+ _logger.info(f'TableVersion {self.name!r}: reverted to version {self.version}')
1339
1390
 
1340
1391
  def _init_external_stores(self) -> None:
1392
+ from pixeltable.io.external_store import ExternalStore
1393
+
1341
1394
  for store_md in self.tbl_md.external_stores:
1342
1395
  store_cls = resolve_symbol(store_md['class'])
1343
- assert isinstance(store_cls, type) and issubclass(store_cls, pxt.io.ExternalStore)
1396
+ assert isinstance(store_cls, type) and issubclass(store_cls, ExternalStore)
1344
1397
  store = store_cls.from_dict(store_md['md'])
1345
1398
  self.external_stores[store.name] = store
1346
1399
 
1347
- def link_external_store(self, store: pxt.io.ExternalStore) -> None:
1400
+ def link_external_store(self, store: ExternalStore) -> None:
1348
1401
  self.bump_version(bump_schema_version=True)
1349
1402
 
1350
1403
  self.external_stores[store.name] = store
@@ -1353,7 +1406,7 @@ class TableVersion:
1353
1406
  )
1354
1407
  self._write_md(new_version=True, new_schema_version=True)
1355
1408
 
1356
- def unlink_external_store(self, store: pxt.io.ExternalStore) -> None:
1409
+ def unlink_external_store(self, store: ExternalStore) -> None:
1357
1410
  del self.external_stores[store.name]
1358
1411
  self.bump_version(bump_schema_version=True)
1359
1412
  idx = next(i for i, store_md in enumerate(self._tbl_md.external_stores) if store_md['md']['name'] == store.name)
@@ -1373,7 +1426,7 @@ class TableVersion:
1373
1426
  return self._schema_version_md
1374
1427
 
1375
1428
  @property
1376
- def view_md(self) -> Optional[schema.ViewMd]:
1429
+ def view_md(self) -> schema.ViewMd | None:
1377
1430
  return self._tbl_md.view_md
1378
1431
 
1379
1432
  @property
@@ -1381,7 +1434,7 @@ class TableVersion:
1381
1434
  return self._tbl_md.name
1382
1435
 
1383
1436
  @property
1384
- def user(self) -> Optional[str]:
1437
+ def user(self) -> str | None:
1385
1438
  return self._tbl_md.user
1386
1439
 
1387
1440
  @property
@@ -1419,7 +1472,7 @@ class TableVersion:
1419
1472
  def schema_version(self) -> int:
1420
1473
  return self._schema_version_md.schema_version
1421
1474
 
1422
- def bump_version(self, timestamp: Optional[float] = None, *, bump_schema_version: bool) -> None:
1475
+ def bump_version(self, timestamp: float | None = None, *, bump_schema_version: bool) -> None:
1423
1476
  """
1424
1477
  Increments the table version and adjusts all associated metadata. This will *not* trigger a database action;
1425
1478
  _write_md() must be called separately to persist the changes.
@@ -1456,11 +1509,11 @@ class TableVersion:
1456
1509
  self._schema_version_md.schema_version = new_version
1457
1510
 
1458
1511
  @property
1459
- def preceding_schema_version(self) -> Optional[int]:
1512
+ def preceding_schema_version(self) -> int | None:
1460
1513
  return self._schema_version_md.preceding_schema_version
1461
1514
 
1462
1515
  @property
1463
- def update_status(self) -> Optional[UpdateStatus]:
1516
+ def update_status(self) -> UpdateStatus | None:
1464
1517
  return self._version_md.update_status
1465
1518
 
1466
1519
  @update_status.setter
@@ -1472,14 +1525,10 @@ class TableVersion:
1472
1525
  def media_validation(self) -> MediaValidation:
1473
1526
  return MediaValidation[self._schema_version_md.media_validation.upper()]
1474
1527
 
1475
- @property
1476
1528
  def next_col_id(self) -> int:
1477
- return self._tbl_md.next_col_id
1478
-
1479
- @next_col_id.setter
1480
- def next_col_id(self, id: int) -> None:
1481
- assert self.effective_version is None
1482
- self._tbl_md.next_col_id = id
1529
+ val = self._tbl_md.next_col_id
1530
+ self._tbl_md.next_col_id += 1
1531
+ return val
1483
1532
 
1484
1533
  @property
1485
1534
  def next_idx_id(self) -> int:
@@ -1558,15 +1607,35 @@ class TableVersion:
1558
1607
  return names
1559
1608
 
1560
1609
  def get_idx_val_columns(self, cols: Iterable[Column]) -> set[Column]:
1561
- result = {info.val_col for col in cols for info in col.get_idx_info().values()}
1562
- return result
1610
+ # assumes that the indexed columns are all in this table
1611
+ assert all(col.get_tbl().id == self.id for col in cols)
1612
+ col_ids = {col.id for col in cols}
1613
+ return {info.val_col for info in self.idxs.values() if info.col.id in col_ids}
1614
+
1615
+ def get_idx(self, col: Column, idx_name: str | None, idx_cls: type[index.IndexBase]) -> TableVersion.IndexInfo:
1616
+ if not self.supports_idxs:
1617
+ raise excs.Error('Snapshot does not support indices')
1618
+ if col.qid not in self.idxs_by_col:
1619
+ raise excs.Error(f'Column {col.name!r} does not have a {idx_cls.display_name()} index')
1620
+ candidates = [info for info in self.idxs_by_col[col.qid] if isinstance(info.idx, idx_cls)]
1621
+ if len(candidates) == 0:
1622
+ raise excs.Error(f'No {idx_cls.display_name()} index found for column {col.name!r}')
1623
+ if len(candidates) > 1 and idx_name is None:
1624
+ raise excs.Error(
1625
+ f'Column {col.name!r} has multiple {idx_cls.display_name()} indices; specify `idx_name` instead'
1626
+ )
1627
+ if idx_name is not None and idx_name not in [info.name for info in candidates]:
1628
+ raise excs.Error(f'Index {idx_name!r} not found for column {col.name!r}')
1629
+ return candidates[0] if idx_name is None else next(info for info in candidates if info.name == idx_name)
1563
1630
 
1564
1631
  def get_dependent_columns(self, cols: Iterable[Column]) -> set[Column]:
1565
1632
  """
1566
1633
  Return the set of columns that transitively depend on any of the given ones.
1567
1634
  """
1568
- cat = pxt.catalog.Catalog.get()
1569
- result = set().union(*[cat.get_column_dependents(col.tbl.id, col.id) for col in cols])
1635
+ from pixeltable.catalog import Catalog
1636
+
1637
+ cat = Catalog.get()
1638
+ result = set().union(*[cat.get_column_dependents(col.get_tbl().id, col.id) for col in cols])
1570
1639
  if len(result) > 0:
1571
1640
  result.update(self.get_dependent_columns(result))
1572
1641
  return result
@@ -1578,7 +1647,7 @@ class TableVersion:
1578
1647
  return 1
1579
1648
 
1580
1649
  @classmethod
1581
- def _create_stores_md(cls, stores: Iterable[pxt.io.ExternalStore]) -> list[dict[str, Any]]:
1650
+ def _create_stores_md(cls, stores: Iterable[ExternalStore]) -> list[dict[str, Any]]:
1582
1651
  return [
1583
1652
  {'class': f'{type(store).__module__}.{type(store).__qualname__}', 'md': store.as_dict()} for store in stores
1584
1653
  ]