pixeltable 0.4.18__py3-none-any.whl → 0.4.19__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of pixeltable might be problematic. Click here for more details.

Files changed (152) hide show
  1. pixeltable/__init__.py +1 -1
  2. pixeltable/_version.py +1 -0
  3. pixeltable/catalog/catalog.py +119 -100
  4. pixeltable/catalog/column.py +104 -115
  5. pixeltable/catalog/globals.py +1 -2
  6. pixeltable/catalog/insertable_table.py +44 -49
  7. pixeltable/catalog/path.py +3 -4
  8. pixeltable/catalog/schema_object.py +4 -4
  9. pixeltable/catalog/table.py +118 -122
  10. pixeltable/catalog/table_metadata.py +6 -6
  11. pixeltable/catalog/table_version.py +322 -257
  12. pixeltable/catalog/table_version_handle.py +4 -4
  13. pixeltable/catalog/table_version_path.py +9 -10
  14. pixeltable/catalog/tbl_ops.py +9 -3
  15. pixeltable/catalog/view.py +34 -28
  16. pixeltable/config.py +14 -10
  17. pixeltable/dataframe.py +68 -77
  18. pixeltable/env.py +74 -64
  19. pixeltable/exec/aggregation_node.py +6 -6
  20. pixeltable/exec/cache_prefetch_node.py +10 -10
  21. pixeltable/exec/data_row_batch.py +3 -3
  22. pixeltable/exec/exec_context.py +4 -5
  23. pixeltable/exec/exec_node.py +5 -5
  24. pixeltable/exec/expr_eval/evaluators.py +6 -6
  25. pixeltable/exec/expr_eval/expr_eval_node.py +8 -7
  26. pixeltable/exec/expr_eval/globals.py +6 -6
  27. pixeltable/exec/expr_eval/row_buffer.py +1 -2
  28. pixeltable/exec/expr_eval/schedulers.py +11 -11
  29. pixeltable/exec/in_memory_data_node.py +2 -2
  30. pixeltable/exec/object_store_save_node.py +14 -17
  31. pixeltable/exec/sql_node.py +25 -25
  32. pixeltable/exprs/arithmetic_expr.py +4 -4
  33. pixeltable/exprs/array_slice.py +2 -2
  34. pixeltable/exprs/column_property_ref.py +3 -3
  35. pixeltable/exprs/column_ref.py +61 -74
  36. pixeltable/exprs/comparison.py +5 -5
  37. pixeltable/exprs/compound_predicate.py +3 -3
  38. pixeltable/exprs/data_row.py +12 -12
  39. pixeltable/exprs/expr.py +41 -31
  40. pixeltable/exprs/expr_dict.py +3 -3
  41. pixeltable/exprs/expr_set.py +3 -3
  42. pixeltable/exprs/function_call.py +14 -14
  43. pixeltable/exprs/in_predicate.py +4 -4
  44. pixeltable/exprs/inline_expr.py +8 -8
  45. pixeltable/exprs/is_null.py +1 -3
  46. pixeltable/exprs/json_mapper.py +8 -8
  47. pixeltable/exprs/json_path.py +6 -6
  48. pixeltable/exprs/literal.py +5 -5
  49. pixeltable/exprs/method_ref.py +2 -2
  50. pixeltable/exprs/object_ref.py +2 -2
  51. pixeltable/exprs/row_builder.py +14 -14
  52. pixeltable/exprs/rowid_ref.py +8 -8
  53. pixeltable/exprs/similarity_expr.py +50 -25
  54. pixeltable/exprs/sql_element_cache.py +4 -4
  55. pixeltable/exprs/string_op.py +2 -2
  56. pixeltable/exprs/type_cast.py +3 -5
  57. pixeltable/func/aggregate_function.py +8 -8
  58. pixeltable/func/callable_function.py +9 -9
  59. pixeltable/func/expr_template_function.py +3 -3
  60. pixeltable/func/function.py +15 -17
  61. pixeltable/func/function_registry.py +6 -7
  62. pixeltable/func/globals.py +2 -3
  63. pixeltable/func/mcp.py +2 -2
  64. pixeltable/func/query_template_function.py +16 -16
  65. pixeltable/func/signature.py +14 -14
  66. pixeltable/func/tools.py +11 -11
  67. pixeltable/func/udf.py +16 -18
  68. pixeltable/functions/__init__.py +1 -0
  69. pixeltable/functions/anthropic.py +7 -7
  70. pixeltable/functions/audio.py +76 -0
  71. pixeltable/functions/bedrock.py +6 -6
  72. pixeltable/functions/deepseek.py +4 -4
  73. pixeltable/functions/fireworks.py +2 -2
  74. pixeltable/functions/gemini.py +6 -6
  75. pixeltable/functions/globals.py +12 -12
  76. pixeltable/functions/groq.py +4 -4
  77. pixeltable/functions/huggingface.py +18 -20
  78. pixeltable/functions/image.py +7 -10
  79. pixeltable/functions/llama_cpp.py +7 -7
  80. pixeltable/functions/math.py +2 -3
  81. pixeltable/functions/mistralai.py +3 -3
  82. pixeltable/functions/ollama.py +9 -9
  83. pixeltable/functions/openai.py +21 -21
  84. pixeltable/functions/openrouter.py +7 -7
  85. pixeltable/functions/string.py +21 -28
  86. pixeltable/functions/timestamp.py +7 -8
  87. pixeltable/functions/together.py +4 -6
  88. pixeltable/functions/twelvelabs.py +92 -0
  89. pixeltable/functions/video.py +2 -24
  90. pixeltable/functions/vision.py +6 -6
  91. pixeltable/functions/whisper.py +7 -7
  92. pixeltable/functions/whisperx.py +16 -16
  93. pixeltable/globals.py +52 -36
  94. pixeltable/index/base.py +12 -8
  95. pixeltable/index/btree.py +19 -22
  96. pixeltable/index/embedding_index.py +30 -39
  97. pixeltable/io/datarows.py +3 -3
  98. pixeltable/io/external_store.py +13 -16
  99. pixeltable/io/fiftyone.py +5 -5
  100. pixeltable/io/globals.py +5 -5
  101. pixeltable/io/hf_datasets.py +4 -4
  102. pixeltable/io/label_studio.py +12 -12
  103. pixeltable/io/pandas.py +6 -6
  104. pixeltable/io/parquet.py +2 -2
  105. pixeltable/io/table_data_conduit.py +12 -12
  106. pixeltable/io/utils.py +2 -2
  107. pixeltable/iterators/audio.py +2 -2
  108. pixeltable/iterators/video.py +8 -13
  109. pixeltable/metadata/converters/convert_18.py +2 -2
  110. pixeltable/metadata/converters/convert_19.py +2 -2
  111. pixeltable/metadata/converters/convert_20.py +2 -2
  112. pixeltable/metadata/converters/convert_21.py +2 -2
  113. pixeltable/metadata/converters/convert_22.py +2 -2
  114. pixeltable/metadata/converters/convert_24.py +2 -2
  115. pixeltable/metadata/converters/convert_25.py +2 -2
  116. pixeltable/metadata/converters/convert_26.py +2 -2
  117. pixeltable/metadata/converters/convert_29.py +4 -4
  118. pixeltable/metadata/converters/convert_34.py +2 -2
  119. pixeltable/metadata/converters/convert_36.py +2 -2
  120. pixeltable/metadata/converters/convert_38.py +2 -2
  121. pixeltable/metadata/converters/convert_39.py +1 -2
  122. pixeltable/metadata/converters/util.py +11 -13
  123. pixeltable/metadata/schema.py +22 -21
  124. pixeltable/metadata/utils.py +2 -6
  125. pixeltable/mypy/mypy_plugin.py +5 -5
  126. pixeltable/plan.py +30 -28
  127. pixeltable/share/packager.py +7 -7
  128. pixeltable/share/publish.py +3 -3
  129. pixeltable/store.py +125 -61
  130. pixeltable/type_system.py +43 -46
  131. pixeltable/utils/__init__.py +1 -2
  132. pixeltable/utils/arrow.py +4 -4
  133. pixeltable/utils/av.py +8 -0
  134. pixeltable/utils/azure_store.py +305 -0
  135. pixeltable/utils/code.py +1 -2
  136. pixeltable/utils/dbms.py +15 -19
  137. pixeltable/utils/description_helper.py +2 -3
  138. pixeltable/utils/documents.py +5 -6
  139. pixeltable/utils/exception_handler.py +2 -2
  140. pixeltable/utils/filecache.py +5 -5
  141. pixeltable/utils/formatter.py +4 -6
  142. pixeltable/utils/gcs_store.py +9 -9
  143. pixeltable/utils/local_store.py +17 -17
  144. pixeltable/utils/object_stores.py +59 -43
  145. pixeltable/utils/s3_store.py +35 -30
  146. {pixeltable-0.4.18.dist-info → pixeltable-0.4.19.dist-info}/METADATA +1 -1
  147. pixeltable-0.4.19.dist-info/RECORD +213 -0
  148. pixeltable/__version__.py +0 -3
  149. pixeltable-0.4.18.dist-info/RECORD +0 -211
  150. {pixeltable-0.4.18.dist-info → pixeltable-0.4.19.dist-info}/WHEEL +0 -0
  151. {pixeltable-0.4.18.dist-info → pixeltable-0.4.19.dist-info}/entry_points.txt +0 -0
  152. {pixeltable-0.4.18.dist-info → pixeltable-0.4.19.dist-info}/licenses/LICENSE +0 -0
@@ -3,19 +3,21 @@ from __future__ import annotations
3
3
  import copy
4
4
  import dataclasses
5
5
  import importlib
6
+ import itertools
6
7
  import logging
7
8
  import time
8
9
  import uuid
9
- from typing import TYPE_CHECKING, Any, Iterable, Iterator, Literal, Optional, Tuple
10
+ from typing import TYPE_CHECKING, Any, Callable, Iterable, Iterator, Literal
10
11
  from uuid import UUID
11
12
 
12
13
  import jsonschema.exceptions
13
14
  import sqlalchemy as sql
14
15
  from sqlalchemy import exc as sql_exc
15
16
 
16
- import pixeltable as pxt
17
17
  import pixeltable.exceptions as excs
18
- from pixeltable import exprs, index
18
+ import pixeltable.exprs as exprs
19
+ import pixeltable.index as index
20
+ import pixeltable.type_system as ts
19
21
  from pixeltable.env import Env
20
22
  from pixeltable.iterators import ComponentIterator
21
23
  from pixeltable.metadata import schema
@@ -31,8 +33,12 @@ from .update_status import RowCountStats, UpdateStatus
31
33
  if TYPE_CHECKING:
32
34
  from pixeltable import exec, store
33
35
  from pixeltable.catalog.table_version_handle import TableVersionHandle
36
+ from pixeltable.dataframe import DataFrame
37
+ from pixeltable.io import ExternalStore
34
38
  from pixeltable.plan import SampleClause
35
39
 
40
+ from .table_version_path import TableVersionPath
41
+
36
42
  _logger = logging.getLogger('pixeltable')
37
43
 
38
44
 
@@ -77,14 +83,14 @@ class TableVersion:
77
83
  _version_md: schema.TableVersionMd
78
84
  _schema_version_md: schema.TableSchemaVersionMd
79
85
 
80
- effective_version: Optional[int]
81
- path: Optional[pxt.catalog.TableVersionPath] # only set for live tables; needed to resolve computed cols
82
- base: Optional[TableVersionHandle] # only set for views
83
- predicate: Optional[exprs.Expr]
84
- sample_clause: Optional['SampleClause']
86
+ effective_version: int | None
87
+ path: 'TableVersionPath' | None # only set for live tables; needed to resolve computed cols
88
+ base: TableVersionHandle | None # only set for views
89
+ predicate: exprs.Expr | None
90
+ sample_clause: 'SampleClause' | None
85
91
 
86
- iterator_cls: Optional[type[ComponentIterator]]
87
- iterator_args: Optional[exprs.InlineDict]
92
+ iterator_cls: type[ComponentIterator] | None
93
+ iterator_args: exprs.InlineDict | None
88
94
  num_iterator_cols: int
89
95
 
90
96
  # target for data operation propagation (only set for non-snapshots, and only records non-snapshot views)
@@ -96,13 +102,19 @@ class TableVersion:
96
102
  cols_by_name: dict[str, Column]
97
103
  # contains only columns visible in this version, both system and user
98
104
  cols_by_id: dict[int, Column]
99
- # all indices defined on this table
100
- all_idxs: dict[str, TableVersion.IndexInfo]
101
- # contains only actively maintained indices
105
+
106
+ # True if this TableVersion instance can have indices:
107
+ # - live version of a mutable table
108
+ # - the most recent version of a replica
109
+ supports_idxs: bool
110
+
111
+ # only populated with indices visible in this TableVersion instance
112
+ idxs: dict[int, TableVersion.IndexInfo] # key: index id
102
113
  idxs_by_name: dict[str, TableVersion.IndexInfo]
114
+ idxs_by_col: dict[QColumnId, list[TableVersion.IndexInfo]]
103
115
 
104
- external_stores: dict[str, pxt.io.ExternalStore]
105
- store_tbl: Optional['store.StoreBase']
116
+ external_stores: dict[str, ExternalStore]
117
+ store_tbl: 'store.StoreBase' | None
106
118
 
107
119
  is_initialized: bool # True if init() has been called
108
120
 
@@ -125,18 +137,12 @@ class TableVersion:
125
137
  id: UUID,
126
138
  tbl_md: schema.TableMd,
127
139
  version_md: schema.TableVersionMd,
128
- effective_version: Optional[int],
140
+ effective_version: int | None,
129
141
  schema_version_md: schema.TableSchemaVersionMd,
130
142
  mutable_views: list[TableVersionHandle],
131
- base_path: Optional[pxt.catalog.TableVersionPath] = None,
132
- base: Optional[TableVersionHandle] = None,
143
+ base_path: 'TableVersionPath' | None = None,
144
+ base: TableVersionHandle | None = None,
133
145
  ):
134
- from pixeltable import exprs
135
- from pixeltable.plan import SampleClause
136
-
137
- from .table_version_handle import TableVersionHandle
138
- from .table_version_path import TableVersionPath
139
-
140
146
  self.is_validated = True # a freshly constructed instance is always valid
141
147
  self.is_initialized = False
142
148
  self.id = id
@@ -149,6 +155,9 @@ class TableVersion:
149
155
  self.store_tbl = None
150
156
 
151
157
  # mutable tables need their TableVersionPath for expr eval during updates
158
+ from .table_version_handle import TableVersionHandle
159
+ from .table_version_path import TableVersionPath
160
+
152
161
  if self.is_snapshot:
153
162
  self.path = None
154
163
  else:
@@ -158,6 +167,9 @@ class TableVersion:
158
167
  self.path = TableVersionPath(self_handle, base=base_path)
159
168
 
160
169
  # view-specific initialization
170
+ from pixeltable import exprs
171
+ from pixeltable.plan import SampleClause
172
+
161
173
  predicate_dict = None if self.view_md is None or self.view_md.predicate is None else self.view_md.predicate
162
174
  self.predicate = exprs.Expr.from_dict(predicate_dict) if predicate_dict is not None else None
163
175
  sample_dict = None if self.view_md is None or self.view_md.sample_clause is None else self.view_md.sample_clause
@@ -182,8 +194,12 @@ class TableVersion:
182
194
  self.cols = []
183
195
  self.cols_by_name = {}
184
196
  self.cols_by_id = {}
185
- self.all_idxs = {}
197
+ self.idxs = {}
186
198
  self.idxs_by_name = {}
199
+ self.idxs_by_col = {}
200
+ self.supports_idxs = self.effective_version is None or (
201
+ self.is_replica and self.effective_version == self.tbl_md.current_version
202
+ )
187
203
  self.external_stores = {}
188
204
 
189
205
  def __hash__(self) -> int:
@@ -222,19 +238,27 @@ class TableVersion:
222
238
  num_retained_versions: int,
223
239
  comment: str,
224
240
  media_validation: MediaValidation,
225
- view_md: Optional[schema.ViewMd] = None,
241
+ create_default_idxs: bool,
242
+ view_md: schema.ViewMd | None = None,
226
243
  ) -> TableVersionMd:
244
+ from .table_version_handle import TableVersionHandle
245
+
227
246
  user = Env.get().user
228
247
  timestamp = time.time()
229
248
 
249
+ tbl_id = uuid.uuid4()
250
+ tbl_id_str = str(tbl_id)
251
+ tbl_handle = TableVersionHandle(tbl_id, None)
252
+ column_ids = itertools.count(0)
253
+ index_ids = itertools.count(0)
254
+
230
255
  # assign ids, create metadata
231
- cols_by_name: dict[str, Column] = {}
232
256
  column_md: dict[int, schema.ColumnMd] = {}
233
257
  schema_col_md: dict[int, schema.SchemaColumn] = {}
234
258
  for pos, col in enumerate(cols):
235
- col.id = pos
259
+ col.tbl_handle = tbl_handle
260
+ col.id = next(column_ids)
236
261
  col.schema_version_add = 0
237
- cols_by_name[col.name] = col
238
262
  if col.is_computed:
239
263
  col.check_value_expr()
240
264
  col_md, sch_md = col.to_md(pos)
@@ -242,8 +266,39 @@ class TableVersion:
242
266
  column_md[col.id] = col_md
243
267
  schema_col_md[col.id] = sch_md
244
268
 
245
- tbl_id = uuid.uuid4()
246
- tbl_id_str = str(tbl_id)
269
+ index_md: dict[int, schema.IndexMd] = {}
270
+ if create_default_idxs and (view_md is None or not view_md.is_snapshot):
271
+ index_cols: list[Column] = []
272
+ for col in (c for c in cols if cls._is_btree_indexable(c)):
273
+ idx = index.BtreeIndex()
274
+ val_col, undo_col = cls._create_index_columns(col, idx, 0, tbl_handle, id_cb=lambda: next(column_ids))
275
+ index_cols.extend([val_col, undo_col])
276
+
277
+ idx_id = next(index_ids)
278
+ idx_cls = type(idx)
279
+ md = schema.IndexMd(
280
+ id=idx_id,
281
+ name=f'idx{idx_id}',
282
+ indexed_col_id=col.id,
283
+ indexed_col_tbl_id=tbl_id_str,
284
+ index_val_col_id=val_col.id,
285
+ index_val_undo_col_id=undo_col.id,
286
+ schema_version_add=0,
287
+ schema_version_drop=None,
288
+ class_fqn=idx_cls.__module__ + '.' + idx_cls.__name__,
289
+ init_args=idx.as_dict(),
290
+ )
291
+ index_md[idx_id] = md
292
+
293
+ for col in index_cols:
294
+ col_md, _ = col.to_md()
295
+ column_md[col.id] = col_md
296
+
297
+ assert all(column_md[id].id == id for id in column_md)
298
+ assert all(index_md[id].id == id for id in index_md)
299
+
300
+ cols.extend(index_cols)
301
+
247
302
  tbl_md = schema.TableMd(
248
303
  tbl_id=tbl_id_str,
249
304
  name=name,
@@ -251,12 +306,12 @@ class TableVersion:
251
306
  is_replica=False,
252
307
  current_version=0,
253
308
  current_schema_version=0,
254
- next_col_id=len(cols),
255
- next_idx_id=0,
309
+ next_col_id=next(column_ids),
310
+ next_idx_id=next(index_ids),
256
311
  next_row_id=0,
257
312
  view_sn=0,
258
313
  column_md=column_md,
259
- index_md={},
314
+ index_md=index_md,
260
315
  external_stores=[],
261
316
  view_md=view_md,
262
317
  additional_md={},
@@ -284,51 +339,15 @@ class TableVersion:
284
339
  )
285
340
  return TableVersionMd(tbl_md, table_version_md, schema_version_md)
286
341
 
287
- @classmethod
288
- def create(
289
- cls,
290
- dir_id: UUID,
291
- name: str,
292
- cols: list[Column],
293
- num_retained_versions: int,
294
- comment: str,
295
- media_validation: MediaValidation,
296
- ) -> tuple[UUID, Optional[TableVersion]]:
297
- initial_md = cls.create_initial_md(name, cols, num_retained_versions, comment, media_validation, view_md=None)
298
- cat = pxt.catalog.Catalog.get()
299
-
300
- tbl_id = UUID(hex=initial_md.tbl_md.tbl_id)
301
- assert (tbl_id, None) not in cat._tbl_versions
302
- tbl_version = cls(tbl_id, initial_md.tbl_md, initial_md.version_md, None, initial_md.schema_version_md, [])
303
-
304
- @cat.register_undo_action
305
- def _() -> None:
306
- if (tbl_id, None) in cat._tbl_versions:
307
- del cat._tbl_versions[tbl_id, None]
308
-
309
- # TODO: break this up, so that Catalog.create_table() registers tbl_version
310
- cat._tbl_versions[tbl_id, None] = tbl_version
311
- tbl_version.init()
312
- tbl_version.store_tbl.create()
313
- # add default indices, after creating the store table
314
- for col in tbl_version.cols_by_name.values():
315
- status = tbl_version._add_default_index(col)
316
- assert status is None or status.num_excs == 0
317
-
318
- cat.store_tbl_md(
319
- tbl_id=tbl_id,
320
- dir_id=dir_id,
321
- tbl_md=tbl_version.tbl_md,
322
- version_md=initial_md.version_md,
323
- schema_version_md=initial_md.schema_version_md,
324
- )
325
- return tbl_id, tbl_version
326
-
327
342
  def exec_op(self, op: TableOp) -> None:
328
343
  if op.create_store_table_op is not None:
329
- # don't use Catalog.begin_xact() here, to avoid accidental recursive calls to exec_op()
344
+ # this needs to be called outside of a transaction
345
+ self.store_tbl.create()
346
+
347
+ elif op.create_index_op is not None:
348
+ idx_info = self.idxs[op.create_index_op.idx_id]
330
349
  with Env.get().begin_xact():
331
- self.store_tbl.create()
350
+ self.store_tbl.create_index(idx_info.id)
332
351
 
333
352
  elif op.load_view_op is not None:
334
353
  from pixeltable.catalog import Catalog
@@ -347,7 +366,7 @@ class TableVersion:
347
366
 
348
367
  @classmethod
349
368
  def create_replica(cls, md: schema.FullTableMd) -> TableVersion:
350
- from .catalog import TableVersionPath
369
+ from .catalog import Catalog, TableVersionPath
351
370
 
352
371
  assert Env.get().in_xact
353
372
  assert md.tbl_md.is_replica
@@ -366,7 +385,7 @@ class TableVersion:
366
385
  base_path=base_path,
367
386
  base=base,
368
387
  )
369
- cat = pxt.catalog.Catalog.get()
388
+ cat = Catalog.get()
370
389
  # We're creating a new TableVersion replica, so we should never have seen this particular
371
390
  # TableVersion instance before.
372
391
  # Actually this isn't true, because we might be re-creating a dropped replica.
@@ -376,10 +395,9 @@ class TableVersion:
376
395
  cat._tbl_versions[tbl_version.id, tbl_version.effective_version] = tbl_version
377
396
  tbl_version.init()
378
397
  tbl_version.store_tbl.create()
379
- tbl_version.store_tbl.ensure_updated_schema()
380
398
  return tbl_version
381
399
 
382
- def delete_media(self, tbl_version: Optional[int] = None) -> None:
400
+ def delete_media(self, tbl_version: int | None = None) -> None:
383
401
  # Assemble a set of column destinations and delete objects from all of them
384
402
  # None is a valid column destination which refers to the default object location
385
403
  destinations = {col.destination for col in self.cols if col.is_stored}
@@ -417,19 +435,21 @@ class TableVersion:
417
435
  self.is_initialized = True
418
436
 
419
437
  def _init_schema(self) -> None:
420
- # create columns first, so the indices can reference them
421
- self._init_cols()
422
- self._init_idxs()
423
-
424
- # create the sa schema only after creating the columns and indices
425
- self._init_sa_schema()
438
+ from pixeltable.store import StoreComponentView, StoreTable, StoreView
426
439
 
427
- # created value_exprs after everything else has been initialized
428
- for col in self.cols_by_id.values():
429
- col.init_value_expr()
440
+ # initialize IndexBase instances and collect sa_col_types
441
+ idxs: dict[int, index.IndexBase] = {}
442
+ val_col_idxs: dict[int, index.IndexBase] = {} # key: id of value column
443
+ undo_col_idxs: dict[int, index.IndexBase] = {} # key: id of undo column
444
+ for md in self.tbl_md.index_md.values():
445
+ cls_name = md.class_fqn.rsplit('.', 1)[-1]
446
+ cls = getattr(index, cls_name)
447
+ idx = cls.from_dict(md.init_args)
448
+ idxs[md.id] = idx
449
+ val_col_idxs[md.index_val_col_id] = idx
450
+ undo_col_idxs[md.index_val_undo_col_id] = idx
430
451
 
431
- def _init_cols(self) -> None:
432
- """Initialize self.cols with the columns visible in our effective version"""
452
+ # initialize Columns
433
453
  self.cols = []
434
454
  self.cols_by_name = {}
435
455
  self.cols_by_id = {}
@@ -437,78 +457,88 @@ class TableVersion:
437
457
  # point backward.
438
458
  sorted_column_md = sorted(self.tbl_md.column_md.values(), key=lambda item: item.id)
439
459
  for col_md in sorted_column_md:
460
+ col_type = ts.ColumnType.from_dict(col_md.col_type)
440
461
  schema_col_md = self.schema_version_md.columns.get(col_md.id)
441
- col = Column.from_md(col_md, self, schema_col_md)
442
- self.cols.append(col)
443
-
444
- # populate the lookup structures before Expr.from_dict()
445
- if col_md.schema_version_add > self.schema_version:
446
- # column was added after this version
447
- continue
448
- if col_md.schema_version_drop is not None and col_md.schema_version_drop <= self.schema_version:
449
- # column was dropped
450
- continue
451
- if col.name is not None:
452
- self.cols_by_name[col.name] = col
453
- self.cols_by_id[col.id] = col
454
-
455
- # # make sure to traverse columns ordered by position = order in which cols were created;
456
- # # this guarantees that references always point backwards
457
- # if not self.is_snapshot and col_md.value_expr is not None:
458
- # self._record_refd_columns(col)
462
+ media_val = (
463
+ MediaValidation[schema_col_md.media_validation.upper()]
464
+ if schema_col_md is not None and schema_col_md.media_validation is not None
465
+ else None
466
+ )
459
467
 
460
- def _init_idxs(self) -> None:
461
- for md in self.tbl_md.index_md.values():
462
- # Instantiate index object. This needs to be done for all indices, even those that are not active in this
463
- # TableVersion, so that we can make appropriate adjustments to the SA schema.
464
- cls_name = md.class_fqn.rsplit('.', 1)[-1]
465
- cls = getattr(index, cls_name)
466
- idx_col = self._lookup_column(QColumnId(UUID(md.indexed_col_tbl_id), md.indexed_col_id))
467
- assert idx_col is not None
468
- idx = cls.from_dict(idx_col, md.init_args)
469
- assert isinstance(idx, index.IndexBase)
470
-
471
- val_col = next(col for col in self.cols if col.id == md.index_val_col_id)
472
- undo_col = next(col for col in self.cols if col.id == md.index_val_undo_col_id)
473
- idx_info = self.IndexInfo(id=md.id, name=md.name, idx=idx, col=idx_col, val_col=val_col, undo_col=undo_col)
474
- self.all_idxs[md.name] = idx_info
475
-
476
- # fix up the sa column type of the index value and undo columns
477
- # we need to do this for all indices, not just those that are active in this TableVersion, to ensure we get
478
- # the correct SA schema in the StoreTable.
479
- val_col.sa_col_type = idx.index_sa_type()
480
- undo_col.sa_col_type = idx.index_sa_type()
481
- if not isinstance(idx, index.EmbeddingIndex):
482
- # Historically, the intent has been not to store cellmd data, even for embedding indices. However,
483
- # the cellmd columns get created anyway, even if stores_cellmd is set to `False` here, due to the
484
- # timing of index column creation. In order to ensure that SA schemas align with what is actually in
485
- # the physical tables, we keep this `True` for embedding indices.
486
- # TODO: Decide whether index columns should store cellmd data.
487
- # - If not, set to `False`, fix the column creation timing issue, and add a migration script to
488
- # remedy existing cellmd columns.
489
- # - If so, remove this TODO.
490
- val_col._stores_cellmd = False
491
- undo_col._stores_cellmd = False
492
-
493
- # The index is active in this TableVersion provided that:
494
- # (i) the TableVersion supports indices (either it's not a snapshot, or it's a replica at
495
- # the head version); and
496
- # (ii) the index was created on or before the schema version of this TableVersion; and
497
- # (iii) the index was not dropped on or before the schema version of this TableVersion.
498
- supports_idxs = self.effective_version is None or (
499
- self.tbl_md.is_replica and self.effective_version == self.tbl_md.current_version
468
+ stores_cellmd: bool | None = None # None: determined by the column properties (in the Column c'tor)
469
+ sa_col_type: sql.types.TypeEngine | None = None
470
+ if col_md.id in val_col_idxs:
471
+ idx = val_col_idxs[col_md.id]
472
+ # for index value columns, the index gets to override the default
473
+ stores_cellmd = idx.records_value_errors()
474
+ sa_col_type = idx.get_index_sa_type(col_type)
475
+ elif col_md.id in undo_col_idxs:
476
+ idx = undo_col_idxs[col_md.id]
477
+ # for index undo columns, we never store cellmd
478
+ stores_cellmd = False
479
+ sa_col_type = idx.get_index_sa_type(col_type)
480
+
481
+ col = Column(
482
+ col_id=col_md.id,
483
+ name=schema_col_md.name if schema_col_md is not None else None,
484
+ col_type=col_type,
485
+ is_pk=col_md.is_pk,
486
+ is_iterator_col=self.is_component_view and col_md.id < self.num_iterator_cols + 1,
487
+ stored=col_md.stored,
488
+ media_validation=media_val,
489
+ sa_col_type=sa_col_type,
490
+ schema_version_add=col_md.schema_version_add,
491
+ schema_version_drop=col_md.schema_version_drop,
492
+ stores_cellmd=stores_cellmd,
493
+ value_expr_dict=col_md.value_expr,
494
+ tbl_handle=self.handle,
495
+ destination=col_md.destination,
500
496
  )
501
- if (
502
- supports_idxs
503
- and md.schema_version_add <= self.schema_version
504
- and (md.schema_version_drop is None or md.schema_version_drop > self.schema_version)
497
+
498
+ self.cols.append(col)
499
+ # populate lookup structures before Expr.from_dict()
500
+ if col_md.schema_version_add <= self.schema_version and (
501
+ col_md.schema_version_drop is None or col_md.schema_version_drop > self.schema_version
505
502
  ):
506
- # Since the index is present in this TableVersion, its associated columns must be as well.
507
- # Sanity-check this.
508
- assert md.indexed_col_id in self.cols_by_id
509
- assert md.index_val_col_id in self.cols_by_id
510
- assert md.index_val_undo_col_id in self.cols_by_id
511
- self.idxs_by_name[md.name] = idx_info
503
+ if col.name is not None:
504
+ self.cols_by_name[col.name] = col
505
+ self.cols_by_id[col.id] = col
506
+
507
+ if self.supports_idxs:
508
+ # create IndexInfo for indices visible in current_version
509
+ visible_idxs = [
510
+ md
511
+ for md in self.tbl_md.index_md.values()
512
+ if md.schema_version_add <= self.schema_version
513
+ and (md.schema_version_drop is None or md.schema_version_drop > self.schema_version)
514
+ ]
515
+ for md in visible_idxs:
516
+ idx = idxs[md.id]
517
+ indexed_col_id = QColumnId(UUID(md.indexed_col_tbl_id), md.indexed_col_id)
518
+ idx_col = self._lookup_column(indexed_col_id)
519
+ info = self.IndexInfo(
520
+ id=md.id,
521
+ name=md.name,
522
+ idx=idx,
523
+ col=idx_col,
524
+ val_col=self.cols_by_id[md.index_val_col_id],
525
+ undo_col=self.cols_by_id[md.index_val_undo_col_id],
526
+ )
527
+ self.idxs[md.id] = info
528
+ self.idxs_by_name[md.name] = info
529
+ self.idxs_by_col.setdefault(indexed_col_id, []).append(info)
530
+
531
+ # create value exprs, now that we have all lookup structures in place
532
+ for col in self.cols_by_id.values():
533
+ col.init_value_expr()
534
+
535
+ # create the sqlalchemy schema, after instantiating all Columns
536
+ if self.is_component_view:
537
+ self.store_tbl = StoreComponentView(self)
538
+ elif self.is_view:
539
+ self.store_tbl = StoreView(self)
540
+ else:
541
+ self.store_tbl = StoreTable(self)
512
542
 
513
543
  def _lookup_column(self, id: QColumnId) -> Column | None:
514
544
  """
@@ -551,7 +581,7 @@ class TableVersion:
551
581
  """Return name of index in the store, which needs to be globally unique"""
552
582
  return f'idx_{self.id.hex}_{idx_id}'
553
583
 
554
- def add_index(self, col: Column, idx_name: Optional[str], idx: index.IndexBase) -> UpdateStatus:
584
+ def add_index(self, col: Column, idx_name: str | None, idx: index.IndexBase) -> UpdateStatus:
555
585
  # we're creating a new schema version
556
586
  self.bump_version(bump_schema_version=True)
557
587
  status = self._add_index(col, idx_name, idx)
@@ -559,12 +589,13 @@ class TableVersion:
559
589
  _logger.info(f'Added index {idx_name} on column {col.name} to table {self.name}')
560
590
  return status
561
591
 
562
- def _is_btree_indexable(self, col: Column) -> bool:
592
+ @classmethod
593
+ def _is_btree_indexable(cls, col: Column) -> bool:
563
594
  if not col.stored:
564
595
  # if the column is intentionally not stored, we want to avoid the overhead of an index
565
596
  return False
566
597
  # Skip index for stored media columns produced by an iterator
567
- if col.col_type.is_media_type() and self.is_iterator_column(col):
598
+ if col.col_type.is_media_type() and col.is_iterator_col:
568
599
  return False
569
600
  if not col.col_type.is_scalar_type() and not (col.col_type.is_media_type() and not col.is_computed):
570
601
  # wrong type for a B-tree
@@ -574,53 +605,58 @@ class TableVersion:
574
605
  return False
575
606
  return True
576
607
 
577
- def _add_default_index(self, col: Column) -> Optional[UpdateStatus]:
608
+ def _add_default_index(self, col: Column) -> UpdateStatus | None:
578
609
  """Add a B-tree index on this column if it has a compatible type"""
579
610
  if not self._is_btree_indexable(col):
580
611
  return None
581
- status = self._add_index(col, idx_name=None, idx=index.BtreeIndex(col))
612
+ status = self._add_index(col, idx_name=None, idx=index.BtreeIndex())
582
613
  return status
583
614
 
584
- def _create_index_columns(self, idx: index.IndexBase) -> Tuple[Column, Column]:
615
+ @classmethod
616
+ def _create_index_columns(
617
+ cls,
618
+ col: Column,
619
+ idx: index.IndexBase,
620
+ schema_version: int,
621
+ tbl_handle: TableVersionHandle,
622
+ id_cb: Callable[[], int],
623
+ ) -> tuple[Column, Column]:
585
624
  """Create value and undo columns for the given index.
586
625
  Args:
587
626
  idx: index for which columns will be created.
588
627
  Returns:
589
- A tuple containing the value column and the undo column.
628
+ A tuple containing the value column and the undo column, both of which are nullable.
590
629
  """
591
- assert not self.is_snapshot
592
- # add the index value and undo columns (which need to be nullable)
630
+ value_expr = idx.create_value_expr(col)
593
631
  val_col = Column(
594
- col_id=self.next_col_id,
632
+ col_id=id_cb(),
595
633
  name=None,
596
- computed_with=idx.index_value_expr(),
597
- sa_col_type=idx.index_sa_type(),
634
+ computed_with=value_expr,
635
+ sa_col_type=idx.get_index_sa_type(value_expr.col_type),
598
636
  stored=True,
599
- schema_version_add=self.schema_version,
600
- schema_version_drop=None,
601
637
  stores_cellmd=idx.records_value_errors(),
638
+ schema_version_add=schema_version,
639
+ schema_version_drop=None,
602
640
  )
603
- val_col.tbl = self
604
641
  val_col.col_type = val_col.col_type.copy(nullable=True)
605
- self.next_col_id += 1
642
+ val_col.tbl_handle = tbl_handle
606
643
 
607
644
  undo_col = Column(
608
- col_id=self.next_col_id,
645
+ col_id=id_cb(),
609
646
  name=None,
610
647
  col_type=val_col.col_type,
611
648
  sa_col_type=val_col.sa_col_type,
612
649
  stored=True,
613
- schema_version_add=self.schema_version,
614
- schema_version_drop=None,
615
650
  stores_cellmd=False,
651
+ schema_version_add=schema_version,
652
+ schema_version_drop=None,
616
653
  )
617
- undo_col.tbl = self
618
654
  undo_col.col_type = undo_col.col_type.copy(nullable=True)
619
- self.next_col_id += 1
655
+ undo_col.tbl_handle = tbl_handle
620
656
  return val_col, undo_col
621
657
 
622
658
  def _create_index(
623
- self, col: Column, val_col: Column, undo_col: Column, idx_name: Optional[str], idx: index.IndexBase
659
+ self, col: Column, val_col: Column, undo_col: Column, idx_name: str | None, idx: index.IndexBase
624
660
  ) -> None:
625
661
  """Create the given index along with index md"""
626
662
  idx_id = self.next_idx_id
@@ -636,7 +672,7 @@ class TableVersion:
636
672
  id=idx_id,
637
673
  name=idx_name,
638
674
  indexed_col_id=col.id,
639
- indexed_col_tbl_id=str(col.tbl.id),
675
+ indexed_col_tbl_id=str(col.get_tbl().id),
640
676
  index_val_col_id=val_col.id,
641
677
  index_val_undo_col_id=undo_col.id,
642
678
  schema_version_add=self.schema_version,
@@ -646,17 +682,21 @@ class TableVersion:
646
682
  )
647
683
  idx_info = self.IndexInfo(id=idx_id, name=idx_name, idx=idx, col=col, val_col=val_col, undo_col=undo_col)
648
684
  self._tbl_md.index_md[idx_id] = idx_md
685
+ self.idxs[idx_id] = idx_info
649
686
  self.idxs_by_name[idx_name] = idx_info
650
- idx.create_index(self._store_idx_name(idx_id), val_col)
687
+ self.idxs_by_col.setdefault(col.qid, []).append(idx_info)
688
+ self.store_tbl.create_index(idx_id)
651
689
 
652
- def _add_index(self, col: Column, idx_name: Optional[str], idx: index.IndexBase) -> UpdateStatus:
653
- val_col, undo_vol = self._create_index_columns(idx)
690
+ def _add_index(self, col: Column, idx_name: str | None, idx: index.IndexBase) -> UpdateStatus:
691
+ val_col, undo_col = self._create_index_columns(
692
+ col, idx, self.schema_version, self.handle, id_cb=self.next_col_id
693
+ )
654
694
  # add the columns and update the metadata
655
695
  # TODO support on_error='abort' for indices; it's tricky because of the way metadata changes are entangled
656
696
  # with the database operations
657
- status = self._add_columns([val_col, undo_vol], print_stats=False, on_error='ignore')
697
+ status = self._add_columns([val_col, undo_col], print_stats=False, on_error='ignore')
658
698
  # now create the index structure
659
- self._create_index(col, val_col, undo_vol, idx_name, idx)
699
+ self._create_index(col, val_col, undo_col, idx_name, idx)
660
700
  return status
661
701
 
662
702
  def drop_index(self, idx_id: int) -> None:
@@ -672,7 +712,10 @@ class TableVersion:
672
712
  # remove this index entry from the active indexes (in memory)
673
713
  # and the index metadata (in persistent table metadata)
674
714
  # TODO: this is wrong, it breaks revert()
715
+ del self.idxs[idx_id]
675
716
  del self.idxs_by_name[idx_md.name]
717
+ if idx_info.col.qid in self.idxs_by_col:
718
+ self.idxs_by_col[idx_info.col.qid].remove(idx_info)
676
719
  del self._tbl_md.index_md[idx_id]
677
720
 
678
721
  self._drop_columns([idx_info.val_col, idx_info.undo_col])
@@ -688,9 +731,8 @@ class TableVersion:
688
731
  assert all(col.stored is not None for col in cols)
689
732
  assert all(col.name not in self.cols_by_name for col in cols if col.name is not None)
690
733
  for col in cols:
691
- col.tbl = self
692
- col.id = self.next_col_id
693
- self.next_col_id += 1
734
+ col.tbl_handle = self.handle
735
+ col.id = self.next_col_id()
694
736
 
695
737
  # we're creating a new schema version
696
738
  self.bump_version(bump_schema_version=True)
@@ -699,8 +741,10 @@ class TableVersion:
699
741
  for col in cols:
700
742
  all_cols.append(col)
701
743
  if col.name is not None and self._is_btree_indexable(col):
702
- idx = index.BtreeIndex(col)
703
- val_col, undo_col = self._create_index_columns(idx)
744
+ idx = index.BtreeIndex()
745
+ val_col, undo_col = self._create_index_columns(
746
+ col, idx, self.schema_version, self.handle, id_cb=self.next_col_id
747
+ )
704
748
  index_cols[col] = (idx, val_col, undo_col)
705
749
  all_cols.append(val_col)
706
750
  all_cols.append(undo_col)
@@ -732,7 +776,7 @@ class TableVersion:
732
776
 
733
777
  row_count = self.store_tbl.count()
734
778
  for col in cols_to_add:
735
- assert col.tbl is self
779
+ assert col.tbl_handle.id == self.id
736
780
  if not col.col_type.nullable and not col.is_computed and row_count > 0:
737
781
  raise excs.Error(
738
782
  f'Cannot add non-nullable column {col.name!r} to table {self.name!r} with existing rows'
@@ -742,7 +786,7 @@ class TableVersion:
742
786
  num_excs = 0
743
787
  cols_with_excs: list[Column] = []
744
788
  for col in cols_to_add:
745
- assert col.id is not None, 'Column id must be set before adding the column'
789
+ assert col.id is not None
746
790
  excs_per_col = 0
747
791
  col.schema_version_add = self.schema_version
748
792
  # add the column to the lookup structures now, rather than after the store changes executed successfully,
@@ -796,7 +840,7 @@ class TableVersion:
796
840
  upd_rows=row_count, num_excs=num_excs, computed_values=computed_values
797
841
  ) # add_columns
798
842
  return UpdateStatus(
799
- cols_with_excs=[f'{col.tbl.name}.{col.name}' for col in cols_with_excs if col.name is not None],
843
+ cols_with_excs=[f'{col.get_tbl().name}.{col.name}' for col in cols_with_excs if col.name is not None],
800
844
  row_count_stats=row_counts,
801
845
  )
802
846
 
@@ -810,7 +854,7 @@ class TableVersion:
810
854
 
811
855
  # drop this column and all dependent index columns and indices
812
856
  dropped_cols = [col]
813
- dropped_idx_names: list[str] = []
857
+ dropped_idx_info: list[TableVersion.IndexInfo] = []
814
858
  for idx_info in self.idxs_by_name.values():
815
859
  if idx_info.col != col:
816
860
  continue
@@ -818,11 +862,14 @@ class TableVersion:
818
862
  idx_md = self._tbl_md.index_md[idx_info.id]
819
863
  idx_md.schema_version_drop = self.schema_version
820
864
  assert idx_md.name in self.idxs_by_name
821
- dropped_idx_names.append(idx_md.name)
865
+ dropped_idx_info.append(idx_info)
822
866
 
823
- # update idxs_by_name
824
- for idx_name in dropped_idx_names:
825
- del self.idxs_by_name[idx_name]
867
+ # update index lookup structures
868
+ for info in dropped_idx_info:
869
+ del self.idxs[info.id]
870
+ del self.idxs_by_name[info.name]
871
+ if col.qid in self.idxs_by_col:
872
+ del self.idxs_by_col[col.qid]
826
873
 
827
874
  self._drop_columns(dropped_cols)
828
875
  self._write_md(new_version=True, new_schema_version=True)
@@ -830,6 +877,8 @@ class TableVersion:
830
877
 
831
878
  def _drop_columns(self, cols: Iterable[Column]) -> None:
832
879
  """Mark columns as dropped"""
880
+ from pixeltable.catalog import Catalog
881
+
833
882
  assert self.is_mutable
834
883
 
835
884
  for col in cols:
@@ -849,7 +898,7 @@ class TableVersion:
849
898
  schema_col.pos = pos
850
899
 
851
900
  self.store_tbl.create_sa_tbl()
852
- pxt.catalog.Catalog.get().record_column_dependencies(self)
901
+ Catalog.get().record_column_dependencies(self)
853
902
 
854
903
  def rename_column(self, old_name: str, new_name: str) -> None:
855
904
  """Rename a column."""
@@ -858,12 +907,12 @@ class TableVersion:
858
907
  col = self.path.get_column(old_name)
859
908
  if col is None:
860
909
  raise excs.Error(f'Unknown column: {old_name}')
861
- if col.tbl.id != self.id:
910
+ if col.get_tbl().id != self.id:
862
911
  raise excs.Error(f'Cannot rename base table column {col.name!r}')
863
912
  if not is_valid_identifier(new_name):
864
- raise excs.Error(f"Invalid column name: '{new_name}'")
913
+ raise excs.Error(f'Invalid column name: {new_name}')
865
914
  if new_name in self.cols_by_name:
866
- raise excs.Error(f'Column {new_name} already exists')
915
+ raise excs.Error(f'Column {new_name!r} already exists')
867
916
  del self.cols_by_name[old_name]
868
917
  col.name = new_name
869
918
  self.cols_by_name[new_name] = col
@@ -875,7 +924,7 @@ class TableVersion:
875
924
  self._write_md(new_version=True, new_schema_version=True)
876
925
  _logger.info(f'Renamed column {old_name} to {new_name} in table {self.name}, new version: {self.version}')
877
926
 
878
- def set_comment(self, new_comment: Optional[str]) -> None:
927
+ def set_comment(self, new_comment: str | None) -> None:
879
928
  _logger.info(f'[{self.name}] Updating comment: {new_comment}')
880
929
  self.comment = new_comment
881
930
  self._create_schema_version()
@@ -896,8 +945,8 @@ class TableVersion:
896
945
 
897
946
  def insert(
898
947
  self,
899
- rows: Optional[list[dict[str, Any]]],
900
- df: Optional[pxt.DataFrame],
948
+ rows: list[dict[str, Any]] | None,
949
+ df: DataFrame | None,
901
950
  print_stats: bool = False,
902
951
  fail_on_exception: bool = True,
903
952
  ) -> UpdateStatus:
@@ -931,7 +980,7 @@ class TableVersion:
931
980
  exec_plan: 'exec.ExecNode',
932
981
  timestamp: float,
933
982
  *,
934
- rowids: Optional[Iterator[int]] = None,
983
+ rowids: Iterator[int] | None = None,
935
984
  print_stats: bool = False,
936
985
  abort_on_exc: bool = False,
937
986
  ) -> UpdateStatus:
@@ -962,9 +1011,7 @@ class TableVersion:
962
1011
  _logger.info(f'TableVersion {self.name}: new version {self.version}')
963
1012
  return result
964
1013
 
965
- def update(
966
- self, value_spec: dict[str, Any], where: Optional[exprs.Expr] = None, cascade: bool = True
967
- ) -> UpdateStatus:
1014
+ def update(self, value_spec: dict[str, Any], where: exprs.Expr | None = None, cascade: bool = True) -> UpdateStatus:
968
1015
  """Update rows in this TableVersionPath.
969
1016
  Args:
970
1017
  value_spec: a list of (column, value) pairs specifying the columns to update and their new values.
@@ -980,11 +1027,11 @@ class TableVersion:
980
1027
  update_spec = self._validate_update_spec(value_spec, allow_pk=False, allow_exprs=True, allow_media=True)
981
1028
  if where is not None:
982
1029
  if not isinstance(where, exprs.Expr):
983
- raise excs.Error(f"'where' argument must be a predicate, got {type(where)}")
1030
+ raise excs.Error(f'`where` argument must be a valid Pixeltable expression; got `{type(where)}`')
984
1031
  analysis_info = Planner.analyze(self.path, where)
985
1032
  # for now we require that the updated rows can be identified via SQL, rather than via a Python filter
986
1033
  if analysis_info.filter is not None:
987
- raise excs.Error(f'Filter {analysis_info.filter} not expressible in SQL')
1034
+ raise excs.Error(f'Filter not expressible in SQL: {analysis_info.filter}')
988
1035
 
989
1036
  plan, updated_cols, recomputed_cols = Planner.create_update_plan(self.path, update_spec, [], where, cascade)
990
1037
 
@@ -1041,7 +1088,7 @@ class TableVersion:
1041
1088
  update_targets: dict[Column, exprs.Expr] = {}
1042
1089
  for col_name, val in value_spec.items():
1043
1090
  if not isinstance(col_name, str):
1044
- raise excs.Error(f'Update specification: dict key must be column name, got {col_name!r}')
1091
+ raise excs.Error(f'Update specification: dict key must be column name; got {col_name!r}')
1045
1092
  if col_name == _ROWID_COLUMN_NAME:
1046
1093
  # a valid rowid is a list of ints, one per rowid column
1047
1094
  assert len(val) == len(self.store_tbl.rowid_columns())
@@ -1050,15 +1097,15 @@ class TableVersion:
1050
1097
  continue
1051
1098
  col = self.path.get_column(col_name)
1052
1099
  if col is None:
1053
- raise excs.Error(f'Column {col_name} unknown')
1054
- if col.tbl.id != self.id:
1100
+ raise excs.Error(f'Unknown column: {col_name}')
1101
+ if col.get_tbl().id != self.id:
1055
1102
  raise excs.Error(f'Column {col.name!r} is a base table column and cannot be updated')
1056
1103
  if col.is_computed:
1057
- raise excs.Error(f'Column {col_name} is computed and cannot be updated')
1104
+ raise excs.Error(f'Column {col_name!r} is computed and cannot be updated')
1058
1105
  if col.is_pk and not allow_pk:
1059
- raise excs.Error(f'Column {col_name} is a primary key column and cannot be updated')
1106
+ raise excs.Error(f'Column {col_name!r} is a primary key column and cannot be updated')
1060
1107
  if col.col_type.is_media_type() and not allow_media:
1061
- raise excs.Error(f'Column {col_name} is a media column and cannot be updated')
1108
+ raise excs.Error(f'Column {col_name!r} is a media column and cannot be updated')
1062
1109
 
1063
1110
  # make sure that the value is compatible with the column type
1064
1111
  value_expr: exprs.Expr
@@ -1068,19 +1115,19 @@ class TableVersion:
1068
1115
  except (TypeError, jsonschema.exceptions.ValidationError) as exc:
1069
1116
  if not allow_exprs:
1070
1117
  raise excs.Error(
1071
- f'Column {col_name}: value {val!r} is not a valid literal for this column '
1072
- f'(expected {col.col_type})'
1118
+ f'Column {col_name!r}: value is not a valid literal for this column '
1119
+ f'(expected `{col.col_type}`): {val!r}'
1073
1120
  ) from exc
1074
1121
  # it's not a literal, let's try to create an expr from it
1075
1122
  value_expr = exprs.Expr.from_object(val)
1076
1123
  if value_expr is None:
1077
1124
  raise excs.Error(
1078
- f'Column {col_name}: value {val!r} is not a recognized literal or expression'
1125
+ f'Column {col_name!r}: value is not a recognized literal or expression: {val!r}'
1079
1126
  ) from exc
1080
1127
  if not col.col_type.is_supertype_of(value_expr.col_type, ignore_nullable=True):
1081
1128
  raise excs.Error(
1082
- f'Type of value {val!r} ({value_expr.col_type}) is not compatible with the type of column '
1083
- f'{col_name} ({col.col_type})'
1129
+ f'Type `{value_expr.col_type}` of value {val!r} is not compatible with the type '
1130
+ f'`{col.col_type}` of column {col_name!r}'
1084
1131
  ) from exc
1085
1132
  update_targets[col] = value_expr
1086
1133
 
@@ -1098,9 +1145,9 @@ class TableVersion:
1098
1145
  assert len(col_names) == 1 or not errors_only
1099
1146
 
1100
1147
  target_columns = [self.cols_by_name[name] for name in col_names]
1101
- where_clause: Optional[exprs.Expr] = None
1148
+ where_clause: exprs.Expr | None = None
1102
1149
  if where is not None:
1103
- self._validate_where_clause(where, error_prefix="'where' argument")
1150
+ self._validate_where_clause(where, error_prefix='`where` argument')
1104
1151
  where_clause = where
1105
1152
  if errors_only:
1106
1153
  errortype_pred = (
@@ -1126,10 +1173,10 @@ class TableVersion:
1126
1173
 
1127
1174
  def propagate_update(
1128
1175
  self,
1129
- plan: Optional[exec.ExecNode],
1130
- where_clause: Optional[sql.ColumnElement],
1176
+ plan: exec.ExecNode | None,
1177
+ where_clause: sql.ColumnElement | None,
1131
1178
  recomputed_view_cols: list[Column],
1132
- base_versions: list[Optional[int]],
1179
+ base_versions: list[int | None],
1133
1180
  timestamp: float,
1134
1181
  cascade: bool,
1135
1182
  show_progress: bool = True,
@@ -1157,7 +1204,7 @@ class TableVersion:
1157
1204
  base_versions = [None if plan is None else self.version, *base_versions] # don't update in place
1158
1205
  # propagate to views
1159
1206
  for view in self.mutable_views:
1160
- recomputed_cols = [col for col in recomputed_view_cols if col.tbl.id == view.id]
1207
+ recomputed_cols = [col for col in recomputed_view_cols if col.get_tbl().id == view.id]
1161
1208
  plan = None
1162
1209
  if len(recomputed_cols) > 0:
1163
1210
  plan = Planner.create_view_update_plan(view.get().path, recompute_targets=recomputed_cols)
@@ -1177,21 +1224,21 @@ class TableVersion:
1177
1224
  from pixeltable.plan import Planner
1178
1225
 
1179
1226
  if not isinstance(pred, Expr):
1180
- raise excs.Error(f'{error_prefix} must be a predicate, got {type(pred)}')
1227
+ raise excs.Error(f'{error_prefix} must be a valid Pixeltable expression; got `{type(pred)}`')
1181
1228
  analysis_info = Planner.analyze(self.path, pred)
1182
1229
  # for now we require that the updated rows can be identified via SQL, rather than via a Python filter
1183
1230
  if analysis_info.filter is not None:
1184
- raise excs.Error(f'Filter {analysis_info.filter} not expressible in SQL')
1231
+ raise excs.Error(f'Filter not expressible in SQL: {analysis_info.filter}')
1185
1232
 
1186
1233
  def delete(self, where: exprs.Expr | None = None) -> UpdateStatus:
1187
1234
  assert self.is_insertable
1188
1235
  if where is not None:
1189
- self._validate_where_clause(where, error_prefix="'where' argument")
1236
+ self._validate_where_clause(where, error_prefix='`where` argument')
1190
1237
  status = self.propagate_delete(where, base_versions=[], timestamp=time.time())
1191
1238
  return status
1192
1239
 
1193
1240
  def propagate_delete(
1194
- self, where: Optional[exprs.Expr], base_versions: list[Optional[int]], timestamp: float
1241
+ self, where: exprs.Expr | None, base_versions: list[int | None], timestamp: float
1195
1242
  ) -> UpdateStatus:
1196
1243
  """Delete rows in this table and propagate to views"""
1197
1244
  from pixeltable.catalog import Catalog
@@ -1256,7 +1303,7 @@ class TableVersion:
1256
1303
  names = [row[1] for row in result]
1257
1304
  raise excs.Error(
1258
1305
  (
1259
- f'Current version is needed for {len(result)} snapshot{"s" if len(result) > 1 else ""} '
1306
+ f'Current version is needed for {len(result)} snapshot{"s" if len(result) > 1 else ""}: '
1260
1307
  f'({", ".join(names)})'
1261
1308
  )
1262
1309
  )
@@ -1265,7 +1312,7 @@ class TableVersion:
1265
1312
 
1266
1313
  # revert new deletions
1267
1314
  set_clause: dict[sql.Column, Any] = {self.store_tbl.sa_tbl.c.v_max: schema.Table.MAX_VERSION}
1268
- for index_info in self.idxs_by_name.values():
1315
+ for index_info in self.idxs.values():
1269
1316
  # copy the index value back from the undo column and reset the undo column to NULL
1270
1317
  set_clause[index_info.val_col.sa_col] = index_info.undo_col.sa_col
1271
1318
  set_clause[index_info.undo_col.sa_col] = None
@@ -1339,16 +1386,18 @@ class TableVersion:
1339
1386
  # Do this at the end, after all DB operations have completed.
1340
1387
  # TODO: The transaction could still fail. Really this should be done via PendingTableOps.
1341
1388
  self.delete_media(tbl_version=old_version)
1342
- _logger.info(f'TableVersion {self.name}: reverted to version {self.version}')
1389
+ _logger.info(f'TableVersion {self.name!r}: reverted to version {self.version}')
1343
1390
 
1344
1391
  def _init_external_stores(self) -> None:
1392
+ from pixeltable.io.external_store import ExternalStore
1393
+
1345
1394
  for store_md in self.tbl_md.external_stores:
1346
1395
  store_cls = resolve_symbol(store_md['class'])
1347
- assert isinstance(store_cls, type) and issubclass(store_cls, pxt.io.ExternalStore)
1396
+ assert isinstance(store_cls, type) and issubclass(store_cls, ExternalStore)
1348
1397
  store = store_cls.from_dict(store_md['md'])
1349
1398
  self.external_stores[store.name] = store
1350
1399
 
1351
- def link_external_store(self, store: pxt.io.ExternalStore) -> None:
1400
+ def link_external_store(self, store: ExternalStore) -> None:
1352
1401
  self.bump_version(bump_schema_version=True)
1353
1402
 
1354
1403
  self.external_stores[store.name] = store
@@ -1357,7 +1406,7 @@ class TableVersion:
1357
1406
  )
1358
1407
  self._write_md(new_version=True, new_schema_version=True)
1359
1408
 
1360
- def unlink_external_store(self, store: pxt.io.ExternalStore) -> None:
1409
+ def unlink_external_store(self, store: ExternalStore) -> None:
1361
1410
  del self.external_stores[store.name]
1362
1411
  self.bump_version(bump_schema_version=True)
1363
1412
  idx = next(i for i, store_md in enumerate(self._tbl_md.external_stores) if store_md['md']['name'] == store.name)
@@ -1377,7 +1426,7 @@ class TableVersion:
1377
1426
  return self._schema_version_md
1378
1427
 
1379
1428
  @property
1380
- def view_md(self) -> Optional[schema.ViewMd]:
1429
+ def view_md(self) -> schema.ViewMd | None:
1381
1430
  return self._tbl_md.view_md
1382
1431
 
1383
1432
  @property
@@ -1385,7 +1434,7 @@ class TableVersion:
1385
1434
  return self._tbl_md.name
1386
1435
 
1387
1436
  @property
1388
- def user(self) -> Optional[str]:
1437
+ def user(self) -> str | None:
1389
1438
  return self._tbl_md.user
1390
1439
 
1391
1440
  @property
@@ -1423,7 +1472,7 @@ class TableVersion:
1423
1472
  def schema_version(self) -> int:
1424
1473
  return self._schema_version_md.schema_version
1425
1474
 
1426
- def bump_version(self, timestamp: Optional[float] = None, *, bump_schema_version: bool) -> None:
1475
+ def bump_version(self, timestamp: float | None = None, *, bump_schema_version: bool) -> None:
1427
1476
  """
1428
1477
  Increments the table version and adjusts all associated metadata. This will *not* trigger a database action;
1429
1478
  _write_md() must be called separately to persist the changes.
@@ -1460,11 +1509,11 @@ class TableVersion:
1460
1509
  self._schema_version_md.schema_version = new_version
1461
1510
 
1462
1511
  @property
1463
- def preceding_schema_version(self) -> Optional[int]:
1512
+ def preceding_schema_version(self) -> int | None:
1464
1513
  return self._schema_version_md.preceding_schema_version
1465
1514
 
1466
1515
  @property
1467
- def update_status(self) -> Optional[UpdateStatus]:
1516
+ def update_status(self) -> UpdateStatus | None:
1468
1517
  return self._version_md.update_status
1469
1518
 
1470
1519
  @update_status.setter
@@ -1476,14 +1525,10 @@ class TableVersion:
1476
1525
  def media_validation(self) -> MediaValidation:
1477
1526
  return MediaValidation[self._schema_version_md.media_validation.upper()]
1478
1527
 
1479
- @property
1480
1528
  def next_col_id(self) -> int:
1481
- return self._tbl_md.next_col_id
1482
-
1483
- @next_col_id.setter
1484
- def next_col_id(self, id: int) -> None:
1485
- assert self.effective_version is None
1486
- self._tbl_md.next_col_id = id
1529
+ val = self._tbl_md.next_col_id
1530
+ self._tbl_md.next_col_id += 1
1531
+ return val
1487
1532
 
1488
1533
  @property
1489
1534
  def next_idx_id(self) -> int:
@@ -1562,15 +1607,35 @@ class TableVersion:
1562
1607
  return names
1563
1608
 
1564
1609
  def get_idx_val_columns(self, cols: Iterable[Column]) -> set[Column]:
1565
- result = {info.val_col for col in cols for info in col.get_idx_info().values()}
1566
- return result
1610
+ # assumes that the indexed columns are all in this table
1611
+ assert all(col.get_tbl().id == self.id for col in cols)
1612
+ col_ids = {col.id for col in cols}
1613
+ return {info.val_col for info in self.idxs.values() if info.col.id in col_ids}
1614
+
1615
+ def get_idx(self, col: Column, idx_name: str | None, idx_cls: type[index.IndexBase]) -> TableVersion.IndexInfo:
1616
+ if not self.supports_idxs:
1617
+ raise excs.Error('Snapshot does not support indices')
1618
+ if col.qid not in self.idxs_by_col:
1619
+ raise excs.Error(f'Column {col.name!r} does not have a {idx_cls.display_name()} index')
1620
+ candidates = [info for info in self.idxs_by_col[col.qid] if isinstance(info.idx, idx_cls)]
1621
+ if len(candidates) == 0:
1622
+ raise excs.Error(f'No {idx_cls.display_name()} index found for column {col.name!r}')
1623
+ if len(candidates) > 1 and idx_name is None:
1624
+ raise excs.Error(
1625
+ f'Column {col.name!r} has multiple {idx_cls.display_name()} indices; specify `idx_name` instead'
1626
+ )
1627
+ if idx_name is not None and idx_name not in [info.name for info in candidates]:
1628
+ raise excs.Error(f'Index {idx_name!r} not found for column {col.name!r}')
1629
+ return candidates[0] if idx_name is None else next(info for info in candidates if info.name == idx_name)
1567
1630
 
1568
1631
  def get_dependent_columns(self, cols: Iterable[Column]) -> set[Column]:
1569
1632
  """
1570
1633
  Return the set of columns that transitively depend on any of the given ones.
1571
1634
  """
1572
- cat = pxt.catalog.Catalog.get()
1573
- result = set().union(*[cat.get_column_dependents(col.tbl.id, col.id) for col in cols])
1635
+ from pixeltable.catalog import Catalog
1636
+
1637
+ cat = Catalog.get()
1638
+ result = set().union(*[cat.get_column_dependents(col.get_tbl().id, col.id) for col in cols])
1574
1639
  if len(result) > 0:
1575
1640
  result.update(self.get_dependent_columns(result))
1576
1641
  return result
@@ -1582,7 +1647,7 @@ class TableVersion:
1582
1647
  return 1
1583
1648
 
1584
1649
  @classmethod
1585
- def _create_stores_md(cls, stores: Iterable[pxt.io.ExternalStore]) -> list[dict[str, Any]]:
1650
+ def _create_stores_md(cls, stores: Iterable[ExternalStore]) -> list[dict[str, Any]]:
1586
1651
  return [
1587
1652
  {'class': f'{type(store).__module__}.{type(store).__qualname__}', 'md': store.as_dict()} for store in stores
1588
1653
  ]