pixeltable 0.4.0rc3__py3-none-any.whl → 0.4.20__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of pixeltable might be problematic. Click here for more details.

Files changed (202) hide show
  1. pixeltable/__init__.py +23 -5
  2. pixeltable/_version.py +1 -0
  3. pixeltable/catalog/__init__.py +5 -3
  4. pixeltable/catalog/catalog.py +1318 -404
  5. pixeltable/catalog/column.py +186 -115
  6. pixeltable/catalog/dir.py +1 -2
  7. pixeltable/catalog/globals.py +11 -43
  8. pixeltable/catalog/insertable_table.py +167 -79
  9. pixeltable/catalog/path.py +61 -23
  10. pixeltable/catalog/schema_object.py +9 -10
  11. pixeltable/catalog/table.py +626 -308
  12. pixeltable/catalog/table_metadata.py +101 -0
  13. pixeltable/catalog/table_version.py +713 -569
  14. pixeltable/catalog/table_version_handle.py +37 -6
  15. pixeltable/catalog/table_version_path.py +42 -29
  16. pixeltable/catalog/tbl_ops.py +50 -0
  17. pixeltable/catalog/update_status.py +191 -0
  18. pixeltable/catalog/view.py +108 -94
  19. pixeltable/config.py +128 -22
  20. pixeltable/dataframe.py +188 -100
  21. pixeltable/env.py +407 -136
  22. pixeltable/exceptions.py +6 -0
  23. pixeltable/exec/__init__.py +3 -0
  24. pixeltable/exec/aggregation_node.py +7 -8
  25. pixeltable/exec/cache_prefetch_node.py +83 -110
  26. pixeltable/exec/cell_materialization_node.py +231 -0
  27. pixeltable/exec/cell_reconstruction_node.py +135 -0
  28. pixeltable/exec/component_iteration_node.py +4 -3
  29. pixeltable/exec/data_row_batch.py +8 -65
  30. pixeltable/exec/exec_context.py +16 -4
  31. pixeltable/exec/exec_node.py +13 -36
  32. pixeltable/exec/expr_eval/evaluators.py +7 -6
  33. pixeltable/exec/expr_eval/expr_eval_node.py +27 -12
  34. pixeltable/exec/expr_eval/globals.py +8 -5
  35. pixeltable/exec/expr_eval/row_buffer.py +1 -2
  36. pixeltable/exec/expr_eval/schedulers.py +190 -30
  37. pixeltable/exec/globals.py +32 -0
  38. pixeltable/exec/in_memory_data_node.py +18 -18
  39. pixeltable/exec/object_store_save_node.py +293 -0
  40. pixeltable/exec/row_update_node.py +16 -9
  41. pixeltable/exec/sql_node.py +206 -101
  42. pixeltable/exprs/__init__.py +1 -1
  43. pixeltable/exprs/arithmetic_expr.py +27 -22
  44. pixeltable/exprs/array_slice.py +3 -3
  45. pixeltable/exprs/column_property_ref.py +34 -30
  46. pixeltable/exprs/column_ref.py +92 -96
  47. pixeltable/exprs/comparison.py +5 -5
  48. pixeltable/exprs/compound_predicate.py +5 -4
  49. pixeltable/exprs/data_row.py +152 -55
  50. pixeltable/exprs/expr.py +62 -43
  51. pixeltable/exprs/expr_dict.py +3 -3
  52. pixeltable/exprs/expr_set.py +17 -10
  53. pixeltable/exprs/function_call.py +75 -37
  54. pixeltable/exprs/globals.py +1 -2
  55. pixeltable/exprs/in_predicate.py +4 -4
  56. pixeltable/exprs/inline_expr.py +10 -27
  57. pixeltable/exprs/is_null.py +1 -3
  58. pixeltable/exprs/json_mapper.py +8 -8
  59. pixeltable/exprs/json_path.py +56 -22
  60. pixeltable/exprs/literal.py +5 -5
  61. pixeltable/exprs/method_ref.py +2 -2
  62. pixeltable/exprs/object_ref.py +2 -2
  63. pixeltable/exprs/row_builder.py +127 -53
  64. pixeltable/exprs/rowid_ref.py +8 -12
  65. pixeltable/exprs/similarity_expr.py +50 -25
  66. pixeltable/exprs/sql_element_cache.py +4 -4
  67. pixeltable/exprs/string_op.py +5 -5
  68. pixeltable/exprs/type_cast.py +3 -5
  69. pixeltable/func/__init__.py +1 -0
  70. pixeltable/func/aggregate_function.py +8 -8
  71. pixeltable/func/callable_function.py +9 -9
  72. pixeltable/func/expr_template_function.py +10 -10
  73. pixeltable/func/function.py +18 -20
  74. pixeltable/func/function_registry.py +6 -7
  75. pixeltable/func/globals.py +2 -3
  76. pixeltable/func/mcp.py +74 -0
  77. pixeltable/func/query_template_function.py +20 -18
  78. pixeltable/func/signature.py +43 -16
  79. pixeltable/func/tools.py +23 -13
  80. pixeltable/func/udf.py +18 -20
  81. pixeltable/functions/__init__.py +6 -0
  82. pixeltable/functions/anthropic.py +93 -33
  83. pixeltable/functions/audio.py +114 -10
  84. pixeltable/functions/bedrock.py +13 -6
  85. pixeltable/functions/date.py +1 -1
  86. pixeltable/functions/deepseek.py +20 -9
  87. pixeltable/functions/fireworks.py +2 -2
  88. pixeltable/functions/gemini.py +28 -11
  89. pixeltable/functions/globals.py +13 -13
  90. pixeltable/functions/groq.py +108 -0
  91. pixeltable/functions/huggingface.py +1046 -23
  92. pixeltable/functions/image.py +9 -18
  93. pixeltable/functions/llama_cpp.py +23 -8
  94. pixeltable/functions/math.py +3 -4
  95. pixeltable/functions/mistralai.py +4 -15
  96. pixeltable/functions/ollama.py +16 -9
  97. pixeltable/functions/openai.py +104 -82
  98. pixeltable/functions/openrouter.py +143 -0
  99. pixeltable/functions/replicate.py +2 -2
  100. pixeltable/functions/reve.py +250 -0
  101. pixeltable/functions/string.py +21 -28
  102. pixeltable/functions/timestamp.py +13 -14
  103. pixeltable/functions/together.py +4 -6
  104. pixeltable/functions/twelvelabs.py +92 -0
  105. pixeltable/functions/util.py +6 -1
  106. pixeltable/functions/video.py +1388 -106
  107. pixeltable/functions/vision.py +7 -7
  108. pixeltable/functions/whisper.py +15 -7
  109. pixeltable/functions/whisperx.py +179 -0
  110. pixeltable/{ext/functions → functions}/yolox.py +2 -4
  111. pixeltable/globals.py +332 -105
  112. pixeltable/index/base.py +13 -22
  113. pixeltable/index/btree.py +23 -22
  114. pixeltable/index/embedding_index.py +32 -44
  115. pixeltable/io/__init__.py +4 -2
  116. pixeltable/io/datarows.py +7 -6
  117. pixeltable/io/external_store.py +49 -77
  118. pixeltable/io/fiftyone.py +11 -11
  119. pixeltable/io/globals.py +29 -28
  120. pixeltable/io/hf_datasets.py +17 -9
  121. pixeltable/io/label_studio.py +70 -66
  122. pixeltable/io/lancedb.py +3 -0
  123. pixeltable/io/pandas.py +12 -11
  124. pixeltable/io/parquet.py +13 -93
  125. pixeltable/io/table_data_conduit.py +71 -47
  126. pixeltable/io/utils.py +3 -3
  127. pixeltable/iterators/__init__.py +2 -1
  128. pixeltable/iterators/audio.py +21 -11
  129. pixeltable/iterators/document.py +116 -55
  130. pixeltable/iterators/image.py +5 -2
  131. pixeltable/iterators/video.py +293 -13
  132. pixeltable/metadata/__init__.py +4 -2
  133. pixeltable/metadata/converters/convert_18.py +2 -2
  134. pixeltable/metadata/converters/convert_19.py +2 -2
  135. pixeltable/metadata/converters/convert_20.py +2 -2
  136. pixeltable/metadata/converters/convert_21.py +2 -2
  137. pixeltable/metadata/converters/convert_22.py +2 -2
  138. pixeltable/metadata/converters/convert_24.py +2 -2
  139. pixeltable/metadata/converters/convert_25.py +2 -2
  140. pixeltable/metadata/converters/convert_26.py +2 -2
  141. pixeltable/metadata/converters/convert_29.py +4 -4
  142. pixeltable/metadata/converters/convert_34.py +2 -2
  143. pixeltable/metadata/converters/convert_36.py +2 -2
  144. pixeltable/metadata/converters/convert_37.py +15 -0
  145. pixeltable/metadata/converters/convert_38.py +39 -0
  146. pixeltable/metadata/converters/convert_39.py +124 -0
  147. pixeltable/metadata/converters/convert_40.py +73 -0
  148. pixeltable/metadata/converters/util.py +13 -12
  149. pixeltable/metadata/notes.py +4 -0
  150. pixeltable/metadata/schema.py +79 -42
  151. pixeltable/metadata/utils.py +74 -0
  152. pixeltable/mypy/__init__.py +3 -0
  153. pixeltable/mypy/mypy_plugin.py +123 -0
  154. pixeltable/plan.py +274 -223
  155. pixeltable/share/__init__.py +1 -1
  156. pixeltable/share/packager.py +259 -129
  157. pixeltable/share/protocol/__init__.py +34 -0
  158. pixeltable/share/protocol/common.py +170 -0
  159. pixeltable/share/protocol/operation_types.py +33 -0
  160. pixeltable/share/protocol/replica.py +109 -0
  161. pixeltable/share/publish.py +213 -57
  162. pixeltable/store.py +238 -175
  163. pixeltable/type_system.py +104 -63
  164. pixeltable/utils/__init__.py +2 -3
  165. pixeltable/utils/arrow.py +108 -13
  166. pixeltable/utils/av.py +298 -0
  167. pixeltable/utils/azure_store.py +305 -0
  168. pixeltable/utils/code.py +3 -3
  169. pixeltable/utils/console_output.py +4 -1
  170. pixeltable/utils/coroutine.py +6 -23
  171. pixeltable/utils/dbms.py +31 -5
  172. pixeltable/utils/description_helper.py +4 -5
  173. pixeltable/utils/documents.py +5 -6
  174. pixeltable/utils/exception_handler.py +7 -30
  175. pixeltable/utils/filecache.py +6 -6
  176. pixeltable/utils/formatter.py +4 -6
  177. pixeltable/utils/gcs_store.py +283 -0
  178. pixeltable/utils/http_server.py +2 -3
  179. pixeltable/utils/iceberg.py +1 -2
  180. pixeltable/utils/image.py +17 -0
  181. pixeltable/utils/lancedb.py +88 -0
  182. pixeltable/utils/local_store.py +316 -0
  183. pixeltable/utils/misc.py +5 -0
  184. pixeltable/utils/object_stores.py +528 -0
  185. pixeltable/utils/pydantic.py +60 -0
  186. pixeltable/utils/pytorch.py +5 -6
  187. pixeltable/utils/s3_store.py +392 -0
  188. pixeltable-0.4.20.dist-info/METADATA +587 -0
  189. pixeltable-0.4.20.dist-info/RECORD +218 -0
  190. {pixeltable-0.4.0rc3.dist-info → pixeltable-0.4.20.dist-info}/WHEEL +1 -1
  191. pixeltable-0.4.20.dist-info/entry_points.txt +2 -0
  192. pixeltable/__version__.py +0 -3
  193. pixeltable/ext/__init__.py +0 -17
  194. pixeltable/ext/functions/__init__.py +0 -11
  195. pixeltable/ext/functions/whisperx.py +0 -77
  196. pixeltable/utils/media_store.py +0 -77
  197. pixeltable/utils/s3.py +0 -17
  198. pixeltable/utils/sample.py +0 -25
  199. pixeltable-0.4.0rc3.dist-info/METADATA +0 -435
  200. pixeltable-0.4.0rc3.dist-info/RECORD +0 -189
  201. pixeltable-0.4.0rc3.dist-info/entry_points.txt +0 -3
  202. {pixeltable-0.4.0rc3.dist-info → pixeltable-0.4.20.dist-info/licenses}/LICENSE +0 -0
@@ -3,42 +3,66 @@ from __future__ import annotations
3
3
  import copy
4
4
  import dataclasses
5
5
  import importlib
6
+ import itertools
6
7
  import logging
7
8
  import time
8
9
  import uuid
9
- from typing import TYPE_CHECKING, Any, Iterable, Iterator, Literal, Optional, Tuple
10
+ from typing import TYPE_CHECKING, Any, Callable, Iterable, Iterator, Literal
10
11
  from uuid import UUID
11
12
 
12
13
  import jsonschema.exceptions
13
14
  import sqlalchemy as sql
15
+ from sqlalchemy import exc as sql_exc
14
16
 
15
- import pixeltable as pxt
16
17
  import pixeltable.exceptions as excs
18
+ import pixeltable.exprs as exprs
19
+ import pixeltable.index as index
17
20
  import pixeltable.type_system as ts
18
- from pixeltable import exprs, index
19
21
  from pixeltable.env import Env
20
22
  from pixeltable.iterators import ComponentIterator
21
23
  from pixeltable.metadata import schema
22
- from pixeltable.utils.exception_handler import run_cleanup_on_exception
23
24
  from pixeltable.utils.filecache import FileCache
24
- from pixeltable.utils.media_store import MediaStore
25
-
26
- if TYPE_CHECKING:
27
- from pixeltable.plan import SampleClause
28
-
25
+ from pixeltable.utils.object_stores import ObjectOps
29
26
 
30
27
  from ..func.globals import resolve_symbol
31
28
  from .column import Column
32
- from .globals import _POS_COLUMN_NAME, _ROWID_COLUMN_NAME, MediaValidation, UpdateStatus, is_valid_identifier
29
+ from .globals import _POS_COLUMN_NAME, _ROWID_COLUMN_NAME, MediaValidation, QColumnId, is_valid_identifier
30
+ from .tbl_ops import TableOp
31
+ from .update_status import RowCountStats, UpdateStatus
33
32
 
34
33
  if TYPE_CHECKING:
35
34
  from pixeltable import exec, store
35
+ from pixeltable.catalog.table_version_handle import TableVersionHandle
36
+ from pixeltable.dataframe import DataFrame
37
+ from pixeltable.io import ExternalStore
38
+ from pixeltable.plan import SampleClause
36
39
 
37
- from .table_version_handle import TableVersionHandle
40
+ from .table_version_path import TableVersionPath
38
41
 
39
42
  _logger = logging.getLogger('pixeltable')
40
43
 
41
44
 
45
+ @dataclasses.dataclass(frozen=True)
46
+ class TableVersionCompleteMd:
47
+ """
48
+ Complete set of md records for a specific TableVersion instance.
49
+ """
50
+
51
+ tbl_md: schema.TableMd
52
+ version_md: schema.TableVersionMd
53
+ schema_version_md: schema.TableSchemaVersionMd
54
+
55
+ @property
56
+ def is_pure_snapshot(self) -> bool:
57
+ return (
58
+ self.tbl_md is not None
59
+ and self.tbl_md.view_md is not None
60
+ and self.tbl_md.view_md.is_snapshot
61
+ and self.tbl_md.view_md.predicate is None
62
+ and len(self.schema_version_md.columns) == 0
63
+ )
64
+
65
+
42
66
  class TableVersion:
43
67
  """
44
68
  TableVersion represents a particular version of a table/view along with its physical representation:
@@ -64,20 +88,21 @@ class TableVersion:
64
88
 
65
89
  # record metadata stored in catalog
66
90
  _tbl_md: schema.TableMd
91
+ _version_md: schema.TableVersionMd
67
92
  _schema_version_md: schema.TableSchemaVersionMd
68
93
 
69
- effective_version: Optional[int]
70
- path: Optional[pxt.catalog.TableVersionPath] # only set for live tables; needed to resolve computed cols
71
- base: Optional[TableVersionHandle] # only set for views
72
- predicate: Optional[exprs.Expr]
73
- sample_clause: Optional['SampleClause']
94
+ effective_version: int | None
95
+ path: 'TableVersionPath' | None # only set for live tables; needed to resolve computed cols
96
+ base: TableVersionHandle | None # only set for views
97
+ predicate: exprs.Expr | None
98
+ sample_clause: 'SampleClause' | None
74
99
 
75
- iterator_cls: Optional[type[ComponentIterator]]
76
- iterator_args: Optional[exprs.InlineDict]
100
+ iterator_cls: type[ComponentIterator] | None
101
+ iterator_args: exprs.InlineDict | None
77
102
  num_iterator_cols: int
78
103
 
79
104
  # target for data operation propagation (only set for non-snapshots, and only records non-snapshot views)
80
- mutable_views: set[TableVersionHandle]
105
+ mutable_views: frozenset[TableVersionHandle]
81
106
 
82
107
  # contains complete history of columns, incl dropped ones
83
108
  cols: list[Column]
@@ -85,11 +110,21 @@ class TableVersion:
85
110
  cols_by_name: dict[str, Column]
86
111
  # contains only columns visible in this version, both system and user
87
112
  cols_by_id: dict[int, Column]
88
- # contains only actively maintained indices
113
+
114
+ # True if this TableVersion instance can have indices:
115
+ # - live version of a mutable table
116
+ # - the most recent version of a replica
117
+ supports_idxs: bool
118
+
119
+ # only populated with indices visible in this TableVersion instance
120
+ idxs: dict[int, TableVersion.IndexInfo] # key: index id
89
121
  idxs_by_name: dict[str, TableVersion.IndexInfo]
122
+ idxs_by_col: dict[QColumnId, list[TableVersion.IndexInfo]]
123
+
124
+ external_stores: dict[str, ExternalStore]
125
+ store_tbl: 'store.StoreBase' | None
90
126
 
91
- external_stores: dict[str, pxt.io.ExternalStore]
92
- store_tbl: Optional['store.StoreBase']
127
+ is_initialized: bool # True if init() has been called
93
128
 
94
129
  # used by Catalog to invalidate cached instances at the end of a transaction;
95
130
  # True if this instance reflects the state of stored metadata in the context of this transaction and
@@ -109,15 +144,18 @@ class TableVersion:
109
144
  self,
110
145
  id: UUID,
111
146
  tbl_md: schema.TableMd,
112
- effective_version: Optional[int],
147
+ version_md: schema.TableVersionMd,
148
+ effective_version: int | None,
113
149
  schema_version_md: schema.TableSchemaVersionMd,
114
150
  mutable_views: list[TableVersionHandle],
115
- base_path: Optional[pxt.catalog.TableVersionPath] = None,
116
- base: Optional[TableVersionHandle] = None,
151
+ base_path: 'TableVersionPath' | None = None,
152
+ base: TableVersionHandle | None = None,
117
153
  ):
118
154
  self.is_validated = True # a freshly constructed instance is always valid
155
+ self.is_initialized = False
119
156
  self.id = id
120
157
  self._tbl_md = copy.deepcopy(tbl_md)
158
+ self._version_md = copy.deepcopy(version_md)
121
159
  self._schema_version_md = copy.deepcopy(schema_version_md)
122
160
  self.effective_version = effective_version
123
161
  assert not (self.is_view and base is None)
@@ -158,27 +196,20 @@ class TableVersion:
158
196
  self.num_iterator_cols = len(output_schema)
159
197
  assert tbl_md.view_md.iterator_args is not None
160
198
 
161
- self.mutable_views = set(mutable_views)
199
+ self.mutable_views = frozenset(mutable_views)
162
200
  assert self.is_mutable or len(self.mutable_views) == 0
163
201
 
164
202
  self.cols = []
165
203
  self.cols_by_name = {}
166
204
  self.cols_by_id = {}
205
+ self.idxs = {}
167
206
  self.idxs_by_name = {}
207
+ self.idxs_by_col = {}
208
+ self.supports_idxs = self.effective_version is None or (
209
+ self.is_replica and self.effective_version == self.tbl_md.current_version
210
+ )
168
211
  self.external_stores = {}
169
212
 
170
- def init(self) -> None:
171
- """
172
- Initialize schema-related in-memory metadata separately, now that this TableVersion instance is visible
173
- in Catalog.
174
- """
175
- from .catalog import Catalog
176
-
177
- assert (self.id, self.effective_version) in Catalog.get()._tbl_versions
178
- self._init_schema()
179
- # init external stores; this needs to happen after the schema is created
180
- self._init_external_stores()
181
-
182
213
  def __hash__(self) -> int:
183
214
  return hash(self.id)
184
215
 
@@ -186,7 +217,7 @@ class TableVersion:
186
217
  """Create a snapshot copy of this TableVersion"""
187
218
  assert not self.is_snapshot
188
219
  base = self.path.base.tbl_version if self.is_view else None
189
- return TableVersion(self.id, self.tbl_md, self.version, self.schema_version_md, mutable_views=[], base=base)
220
+ return TableVersion(self.id, self.tbl_md, self.version_md, self.version, self.schema_version_md, [], base=base)
190
221
 
191
222
  @property
192
223
  def versioned_name(self) -> str:
@@ -195,75 +226,117 @@ class TableVersion:
195
226
  else:
196
227
  return f'{self.name}:{self.effective_version}'
197
228
 
229
+ def __repr__(self) -> str:
230
+ return (
231
+ f'TableVersion(id={self.id!r}, name={self.name!r}, '
232
+ f'version={self.version}, effective_version={self.effective_version})'
233
+ )
234
+
235
+ @property
236
+ def handle(self) -> 'TableVersionHandle':
237
+ from .table_version_handle import TableVersionHandle
238
+
239
+ return TableVersionHandle(self.id, self.effective_version, self)
240
+
198
241
  @classmethod
199
- def create(
242
+ def create_initial_md(
200
243
  cls,
201
- dir_id: UUID,
202
244
  name: str,
203
245
  cols: list[Column],
204
246
  num_retained_versions: int,
205
247
  comment: str,
206
248
  media_validation: MediaValidation,
207
- # base_path: Optional[pxt.catalog.TableVersionPath] = None,
208
- view_md: Optional[schema.ViewMd] = None,
209
- ) -> tuple[UUID, Optional[TableVersion]]:
210
- session = Env.get().session
249
+ create_default_idxs: bool,
250
+ view_md: schema.ViewMd | None = None,
251
+ ) -> TableVersionCompleteMd:
252
+ from .table_version_handle import TableVersionHandle
253
+
211
254
  user = Env.get().user
255
+ timestamp = time.time()
212
256
 
213
- # assign ids
214
- cols_by_name: dict[str, Column] = {}
257
+ tbl_id = uuid.uuid4()
258
+ tbl_id_str = str(tbl_id)
259
+ tbl_handle = TableVersionHandle(tbl_id, None)
260
+ column_ids = itertools.count(0)
261
+ index_ids = itertools.count(0)
262
+
263
+ # assign ids, create metadata
264
+ column_md: dict[int, schema.ColumnMd] = {}
265
+ schema_col_md: dict[int, schema.SchemaColumn] = {}
215
266
  for pos, col in enumerate(cols):
216
- col.id = pos
267
+ col.tbl_handle = tbl_handle
268
+ col.id = next(column_ids)
217
269
  col.schema_version_add = 0
218
- cols_by_name[col.name] = col
219
270
  if col.is_computed:
220
271
  col.check_value_expr()
272
+ col_md, sch_md = col.to_md(pos)
273
+ assert sch_md is not None
274
+ column_md[col.id] = col_md
275
+ schema_col_md[col.id] = sch_md
276
+
277
+ index_md: dict[int, schema.IndexMd] = {}
278
+ if create_default_idxs and (view_md is None or not view_md.is_snapshot):
279
+ index_cols: list[Column] = []
280
+ for col in (c for c in cols if cls._is_btree_indexable(c)):
281
+ idx = index.BtreeIndex()
282
+ val_col, undo_col = cls._create_index_columns(col, idx, 0, tbl_handle, id_cb=lambda: next(column_ids))
283
+ index_cols.extend([val_col, undo_col])
284
+
285
+ idx_id = next(index_ids)
286
+ idx_cls = type(idx)
287
+ md = schema.IndexMd(
288
+ id=idx_id,
289
+ name=f'idx{idx_id}',
290
+ indexed_col_id=col.id,
291
+ indexed_col_tbl_id=tbl_id_str,
292
+ index_val_col_id=val_col.id,
293
+ index_val_undo_col_id=undo_col.id,
294
+ schema_version_add=0,
295
+ schema_version_drop=None,
296
+ class_fqn=idx_cls.__module__ + '.' + idx_cls.__name__,
297
+ init_args=idx.as_dict(),
298
+ )
299
+ index_md[idx_id] = md
221
300
 
222
- timestamp = time.time()
223
- # create schema.Table
224
- # Column.dependent_cols for existing cols is wrong at this point, but init() will set it correctly
225
- column_md = cls._create_column_md(cols)
226
- tbl_id = uuid.uuid4()
227
- table_md = schema.TableMd(
228
- tbl_id=str(tbl_id),
301
+ for col in index_cols:
302
+ col_md, _ = col.to_md()
303
+ column_md[col.id] = col_md
304
+
305
+ assert all(column_md[id].id == id for id in column_md)
306
+ assert all(index_md[id].id == id for id in index_md)
307
+
308
+ cols.extend(index_cols)
309
+
310
+ tbl_md = schema.TableMd(
311
+ tbl_id=tbl_id_str,
229
312
  name=name,
230
313
  user=user,
231
314
  is_replica=False,
232
315
  current_version=0,
233
316
  current_schema_version=0,
234
- next_col_id=len(cols),
235
- next_idx_id=0,
317
+ next_col_id=next(column_ids),
318
+ next_idx_id=next(index_ids),
236
319
  next_row_id=0,
320
+ view_sn=0,
237
321
  column_md=column_md,
238
- index_md={},
322
+ index_md=index_md,
239
323
  external_stores=[],
240
324
  view_md=view_md,
241
325
  additional_md={},
242
326
  )
243
- # create a schema.Table here, we need it to call our c'tor;
244
- # don't add it to the session yet, we might add index metadata
245
- tbl_record = schema.Table(id=tbl_id, dir_id=dir_id, md=dataclasses.asdict(table_md))
246
327
 
247
- # create schema.TableVersion
248
328
  table_version_md = schema.TableVersionMd(
249
- tbl_id=str(tbl_record.id), created_at=timestamp, version=0, schema_version=0, additional_md={}
250
- )
251
- tbl_version_record = schema.TableVersion(
252
- tbl_id=tbl_record.id, version=0, md=dataclasses.asdict(table_version_md)
329
+ tbl_id=tbl_id_str,
330
+ created_at=timestamp,
331
+ version=0,
332
+ schema_version=0,
333
+ user=user,
334
+ update_status=None,
335
+ additional_md={},
253
336
  )
254
337
 
255
- # create schema.TableSchemaVersion
256
- schema_col_md: dict[int, schema.SchemaColumn] = {}
257
- for pos, col in enumerate(cols):
258
- md = schema.SchemaColumn(
259
- pos=pos,
260
- name=col.name,
261
- media_validation=col._media_validation.name.lower() if col._media_validation is not None else None,
262
- )
263
- schema_col_md[col.id] = md
264
-
265
338
  schema_version_md = schema.TableSchemaVersionMd(
266
- tbl_id=str(tbl_record.id),
339
+ tbl_id=tbl_id_str,
267
340
  schema_version=0,
268
341
  preceding_schema_version=None,
269
342
  columns=schema_col_md,
@@ -272,105 +345,121 @@ class TableVersion:
272
345
  media_validation=media_validation.name.lower(),
273
346
  additional_md={},
274
347
  )
275
- schema_version_record = schema.TableSchemaVersion(
276
- tbl_id=tbl_record.id, schema_version=0, md=dataclasses.asdict(schema_version_md)
277
- )
348
+ return TableVersionCompleteMd(tbl_md, table_version_md, schema_version_md)
278
349
 
279
- # if this is purely a snapshot (it doesn't require any additional storage for columns and it doesn't have a
280
- # predicate to apply at runtime), we don't create a physical table and simply use the base's table version path
281
- if (
282
- view_md is not None
283
- and view_md.is_snapshot
284
- and view_md.predicate is None
285
- and view_md.sample_clause is None
286
- and len(cols) == 0
287
- ):
288
- session.add(tbl_record)
289
- session.add(tbl_version_record)
290
- session.add(schema_version_record)
291
- return tbl_record.id, None
292
-
293
- # assert (base_path is not None) == (view_md is not None)
294
- is_snapshot = view_md is not None and view_md.is_snapshot
295
- effective_version = 0 if is_snapshot else None
296
- base_path = pxt.catalog.TableVersionPath.from_md(view_md.base_versions) if view_md is not None else None
297
- base = base_path.tbl_version if base_path is not None else None
298
- tbl_version = cls(
299
- tbl_record.id, table_md, effective_version, schema_version_md, [], base_path=base_path, base=base
300
- )
301
- # TODO: break this up, so that Catalog.create_table() registers tbl_version
302
- cat = pxt.catalog.Catalog.get()
303
- cat._tbl_versions[tbl_record.id, effective_version] = tbl_version
304
- tbl_version.init()
305
- tbl_version.store_tbl.create()
306
- is_mutable = not is_snapshot and not table_md.is_replica
307
- if base is not None and base.get().is_mutable and is_mutable:
308
- from .table_version_handle import TableVersionHandle
309
-
310
- handle = TableVersionHandle(tbl_version.id, effective_version)
311
- assert handle not in base.get().mutable_views
312
- base.get().mutable_views.add(handle)
313
-
314
- if view_md is None or not view_md.is_snapshot:
315
- # add default indices, after creating the store table
316
- for col in tbl_version.cols_by_name.values():
317
- status = tbl_version._add_default_index(col)
318
- assert status is None or status.num_excs == 0
319
-
320
- # we re-create the tbl_record here, now that we have new index metadata
321
- tbl_record = schema.Table(id=tbl_id, dir_id=dir_id, md=dataclasses.asdict(tbl_version.tbl_md))
322
- session.add(tbl_record)
323
- session.add(tbl_version_record)
324
- session.add(schema_version_record)
325
- return tbl_record.id, tbl_version
350
+ def exec_op(self, op: TableOp) -> None:
351
+ if op.create_store_table_op is not None:
352
+ # this needs to be called outside of a transaction
353
+ self.store_tbl.create()
354
+
355
+ elif op.create_index_op is not None:
356
+ idx_info = self.idxs[op.create_index_op.idx_id]
357
+ with Env.get().begin_xact():
358
+ self.store_tbl.create_index(idx_info.id)
359
+
360
+ elif op.load_view_op is not None:
361
+ from pixeltable.catalog import Catalog
362
+ from pixeltable.plan import Planner
363
+
364
+ from .table_version_path import TableVersionPath
365
+
366
+ # clear out any remaining media files from an aborted previous attempt
367
+ self.delete_media()
368
+ view_path = TableVersionPath.from_dict(op.load_view_op.view_path)
369
+ plan, _ = Planner.create_view_load_plan(view_path)
370
+ _, row_counts = self.store_tbl.insert_rows(plan, v_min=self.version)
371
+ status = UpdateStatus(row_count_stats=row_counts)
372
+ Catalog.get().store_update_status(self.id, self.version, status)
373
+ _logger.debug(f'Loaded view {self.name} with {row_counts.num_rows} rows')
326
374
 
327
375
  @classmethod
328
- def create_replica(cls, md: schema.FullTableMd) -> TableVersion:
376
+ def create_replica(cls, md: TableVersionCompleteMd) -> TableVersion:
377
+ from .catalog import Catalog, TableVersionPath
378
+
379
+ assert Env.get().in_xact
380
+ assert md.tbl_md.is_replica
329
381
  tbl_id = UUID(md.tbl_md.tbl_id)
330
382
  _logger.info(f'Creating replica table version {tbl_id}:{md.version_md.version}.')
331
383
  view_md = md.tbl_md.view_md
332
- base_path = pxt.catalog.TableVersionPath.from_md(view_md.base_versions) if view_md is not None else None
384
+ base_path = TableVersionPath.from_md(view_md.base_versions) if view_md is not None else None
333
385
  base = base_path.tbl_version if base_path is not None else None
334
386
  tbl_version = cls(
335
- tbl_id, md.tbl_md, md.version_md.version, md.schema_version_md, [], base_path=base_path, base=base
387
+ tbl_id,
388
+ md.tbl_md,
389
+ md.version_md,
390
+ md.version_md.version,
391
+ md.schema_version_md,
392
+ [],
393
+ base_path=base_path,
394
+ base=base,
336
395
  )
337
- cat = pxt.catalog.Catalog.get()
396
+ cat = Catalog.get()
397
+ # We're creating a new TableVersion replica, so we should never have seen this particular
398
+ # TableVersion instance before.
399
+ # Actually this isn't true, because we might be re-creating a dropped replica.
400
+ # TODO: Understand why old TableVersions are kept around even for a dropped table.
401
+ # assert tbl_version.effective_version is not None
402
+ # assert (tbl_version.id, tbl_version.effective_version) not in cat._tbl_versions
338
403
  cat._tbl_versions[tbl_version.id, tbl_version.effective_version] = tbl_version
339
404
  tbl_version.init()
340
405
  tbl_version.store_tbl.create()
341
- tbl_version.store_tbl.ensure_columns_exist(col for col in tbl_version.cols if col.is_stored)
342
406
  return tbl_version
343
407
 
344
- def drop(self) -> None:
345
- from .catalog import Catalog
408
+ def delete_media(self, tbl_version: int | None = None) -> None:
409
+ # Assemble a set of column destinations and delete objects from all of them
410
+ # None is a valid column destination which refers to the default object location
411
+ destinations = {col.destination for col in self.cols if col.is_stored}
412
+ for dest in destinations:
413
+ ObjectOps.delete(dest, self.id, tbl_version=tbl_version)
346
414
 
347
- if self.is_view and self.is_mutable:
348
- # update mutable_views
349
- from .table_version_handle import TableVersionHandle
415
+ def drop(self) -> None:
416
+ # if self.is_view and self.is_mutable:
417
+ # # update mutable_views
418
+ # # TODO: invalidate base to force reload
419
+ # from .table_version_handle import TableVersionHandle
420
+ #
421
+ # assert self.base is not None
422
+ # if self.base.get().is_mutable:
423
+ # self.base.get().mutable_views.remove(TableVersionHandle.create(self))
424
+
425
+ self.delete_media()
426
+ FileCache.get().clear(tbl_id=self.id)
427
+ self.store_tbl.drop()
350
428
 
351
- assert self.base is not None
352
- if self.base.get().is_mutable:
353
- self.base.get().mutable_views.remove(TableVersionHandle.create(self))
429
+ def init(self) -> None:
430
+ """
431
+ Initialize schema-related in-memory metadata separately, now that this TableVersion instance is visible
432
+ in Catalog.
433
+ """
434
+ from .catalog import Catalog
354
435
 
355
436
  cat = Catalog.get()
356
- # delete this table and all associated data
357
- MediaStore.delete(self.id)
358
- FileCache.get().clear(tbl_id=self.id)
359
- cat.delete_tbl_md(self.id)
360
- self.store_tbl.drop()
361
- # de-register table version from catalog
362
- cat.remove_tbl_version(self)
437
+ assert (self.id, self.effective_version) in cat._tbl_versions
438
+ self._init_schema()
439
+ if self.is_mutable:
440
+ cat.record_column_dependencies(self)
441
+ # init external stores; this needs to happen after the schema is created
442
+ self._init_external_stores()
443
+ self.is_initialized = True
363
444
 
364
445
  def _init_schema(self) -> None:
365
- # create columns first, so the indices can reference them
366
- self._init_cols()
367
- if not self.is_snapshot:
368
- self._init_idxs()
369
- # create the sa schema only after creating the columns and indices
370
- self._init_sa_schema()
371
-
372
- def _init_cols(self) -> None:
373
- """Initialize self.cols with the columns visible in our effective version"""
446
+ from pixeltable.store import StoreComponentView, StoreTable, StoreView
447
+
448
+ from .catalog import Catalog
449
+
450
+ # initialize IndexBase instances and collect sa_col_types
451
+ idxs: dict[int, index.IndexBase] = {}
452
+ val_col_idxs: dict[int, index.IndexBase] = {} # key: id of value column
453
+ undo_col_idxs: dict[int, index.IndexBase] = {} # key: id of undo column
454
+ for md in self.tbl_md.index_md.values():
455
+ cls_name = md.class_fqn.rsplit('.', 1)[-1]
456
+ cls = getattr(index, cls_name)
457
+ idx = cls.from_dict(md.init_args)
458
+ idxs[md.id] = idx
459
+ val_col_idxs[md.index_val_col_id] = idx
460
+ undo_col_idxs[md.index_val_undo_col_id] = idx
461
+
462
+ # initialize Columns
374
463
  self.cols = []
375
464
  self.cols_by_name = {}
376
465
  self.cols_by_id = {}
@@ -378,70 +467,110 @@ class TableVersion:
378
467
  # point backward.
379
468
  sorted_column_md = sorted(self.tbl_md.column_md.values(), key=lambda item: item.id)
380
469
  for col_md in sorted_column_md:
470
+ col_type = ts.ColumnType.from_dict(col_md.col_type)
381
471
  schema_col_md = self.schema_version_md.columns.get(col_md.id)
382
- col_name = schema_col_md.name if schema_col_md is not None else None
383
472
  media_val = (
384
473
  MediaValidation[schema_col_md.media_validation.upper()]
385
474
  if schema_col_md is not None and schema_col_md.media_validation is not None
386
475
  else None
387
476
  )
477
+
478
+ stores_cellmd: bool | None = None # None: determined by the column properties (in the Column c'tor)
479
+ sa_col_type: sql.types.TypeEngine | None = None
480
+ if col_md.id in val_col_idxs:
481
+ idx = val_col_idxs[col_md.id]
482
+ # for index value columns, the index gets to override the default
483
+ stores_cellmd = idx.records_value_errors()
484
+ sa_col_type = idx.get_index_sa_type(col_type)
485
+ elif col_md.id in undo_col_idxs:
486
+ idx = undo_col_idxs[col_md.id]
487
+ # for index undo columns, we never store cellmd
488
+ stores_cellmd = False
489
+ sa_col_type = idx.get_index_sa_type(col_type)
490
+
388
491
  col = Column(
389
492
  col_id=col_md.id,
390
- name=col_name,
391
- col_type=ts.ColumnType.from_dict(col_md.col_type),
493
+ name=schema_col_md.name if schema_col_md is not None else None,
494
+ col_type=col_type,
392
495
  is_pk=col_md.is_pk,
496
+ is_iterator_col=self.is_component_view and col_md.id < self.num_iterator_cols + 1,
393
497
  stored=col_md.stored,
394
498
  media_validation=media_val,
499
+ sa_col_type=sa_col_type,
395
500
  schema_version_add=col_md.schema_version_add,
396
501
  schema_version_drop=col_md.schema_version_drop,
502
+ stores_cellmd=stores_cellmd,
397
503
  value_expr_dict=col_md.value_expr,
504
+ tbl_handle=self.handle,
505
+ destination=col_md.destination,
398
506
  )
399
- col.tbl = self
400
- self.cols.append(col)
401
507
 
402
- # populate the lookup structures before Expr.from_dict()
403
- if col_md.schema_version_add > self.schema_version:
404
- # column was added after this version
405
- continue
406
- if col_md.schema_version_drop is not None and col_md.schema_version_drop <= self.schema_version:
407
- # column was dropped
408
- continue
409
- if col.name is not None:
410
- self.cols_by_name[col.name] = col
411
- self.cols_by_id[col.id] = col
412
-
413
- # make sure to traverse columns ordered by position = order in which cols were created;
414
- # this guarantees that references always point backwards
415
- if not self.is_snapshot and col_md.value_expr is not None:
416
- self._record_refd_columns(col)
508
+ self.cols.append(col)
509
+ # populate lookup structures before Expr.from_dict()
510
+ if col_md.schema_version_add <= self.schema_version and (
511
+ col_md.schema_version_drop is None or col_md.schema_version_drop > self.schema_version
512
+ ):
513
+ if col.name is not None:
514
+ self.cols_by_name[col.name] = col
515
+ self.cols_by_id[col.id] = col
516
+
517
+ if self.supports_idxs:
518
+ # create IndexInfo for indices visible in current_version
519
+ visible_idxs = [
520
+ md
521
+ for md in self.tbl_md.index_md.values()
522
+ if md.schema_version_add <= self.schema_version
523
+ and (md.schema_version_drop is None or md.schema_version_drop > self.schema_version)
524
+ ]
525
+ for md in visible_idxs:
526
+ idx = idxs[md.id]
527
+ indexed_col_id = QColumnId(UUID(md.indexed_col_tbl_id), md.indexed_col_id)
528
+ idx_col = self._lookup_column(indexed_col_id)
529
+ info = self.IndexInfo(
530
+ id=md.id,
531
+ name=md.name,
532
+ idx=idx,
533
+ col=idx_col,
534
+ val_col=self.cols_by_id[md.index_val_col_id],
535
+ undo_col=self.cols_by_id[md.index_val_undo_col_id],
536
+ )
537
+ self.idxs[md.id] = info
538
+ self.idxs_by_name[md.name] = info
539
+ self.idxs_by_col.setdefault(indexed_col_id, []).append(info)
540
+
541
+ # create value exprs, now that we have all lookup structures in place
542
+ tvp: TableVersionPath | None = None
543
+ if self.effective_version is not None:
544
+ # for snapshot TableVersion instances, we need to retarget the column value_exprs to the snapshot;
545
+ # otherwise they'll incorrectly refer to the live table. So, construct a full TableVersionPath to
546
+ # use for retargeting.
547
+ tvp = Catalog.get().construct_tvp(
548
+ self.id, self.effective_version, self.tbl_md.ancestor_ids, self.version_md.created_at
549
+ )
550
+ for col in self.cols_by_id.values():
551
+ col.init_value_expr(tvp)
417
552
 
418
- def _init_idxs(self) -> None:
419
- # self.idx_md = tbl_md.index_md
420
- self.idxs_by_name = {}
421
- import pixeltable.index as index_module
553
+ # create the sqlalchemy schema, after instantiating all Columns
554
+ if self.is_component_view:
555
+ self.store_tbl = StoreComponentView(self)
556
+ elif self.is_view:
557
+ self.store_tbl = StoreView(self)
558
+ else:
559
+ self.store_tbl = StoreTable(self)
422
560
 
423
- for md in self.tbl_md.index_md.values():
424
- if md.schema_version_add > self.schema_version or (
425
- md.schema_version_drop is not None and md.schema_version_drop <= self.schema_version
426
- ):
427
- # index not visible in this schema version
428
- continue
561
+ def _lookup_column(self, id: QColumnId) -> Column | None:
562
+ """
563
+ Look up the column with the given table id and column id, searching through the ancestors of this TableVersion
564
+ to find it. We avoid referencing TableVersionPath in order to work properly with snapshots as well.
429
565
 
430
- # instantiate index object
431
- cls_name = md.class_fqn.rsplit('.', 1)[-1]
432
- cls = getattr(index_module, cls_name)
433
- idx_col = self.path.get_column_by_id(UUID(md.indexed_col_tbl_id), md.indexed_col_id)
434
- idx = cls.from_dict(idx_col, md.init_args)
435
-
436
- # fix up the sa column type of the index value and undo columns
437
- val_col = self.cols_by_id[md.index_val_col_id]
438
- val_col.sa_col_type = idx.index_sa_type()
439
- val_col._records_errors = False
440
- undo_col = self.cols_by_id[md.index_val_undo_col_id]
441
- undo_col.sa_col_type = idx.index_sa_type()
442
- undo_col._records_errors = False
443
- idx_info = self.IndexInfo(id=md.id, name=md.name, idx=idx, col=idx_col, val_col=val_col, undo_col=undo_col)
444
- self.idxs_by_name[md.name] = idx_info
566
+ This will search through *all* known columns, including columns that are not visible in this TableVersion.
567
+ """
568
+ if id.tbl_id == self.id:
569
+ return next(col for col in self.cols if col.id == id.col_id)
570
+ elif self.base is not None:
571
+ return self.base.get()._lookup_column(id)
572
+ else:
573
+ return None
445
574
 
446
575
  def _init_sa_schema(self) -> None:
447
576
  # create the sqlalchemy schema; do this after instantiating columns, in order to determine whether they
@@ -455,58 +584,36 @@ class TableVersion:
455
584
  else:
456
585
  self.store_tbl = StoreTable(self)
457
586
 
458
- def _write_md(self, new_version: bool, new_version_ts: float, new_schema_version: bool) -> None:
459
- """Writes table metadata to the database.
460
-
461
- Args:
462
- timestamp: timestamp of the change
463
- update_tbl_version: if `True`, will also write `TableVersion` metadata
464
- preceding_schema_version: if specified, will also write `TableSchemaVersion` metadata, recording the
465
- specified preceding schema version
466
- """
587
+ def _write_md(self, new_version: bool, new_schema_version: bool) -> None:
467
588
  from pixeltable.catalog import Catalog
468
589
 
469
- version_md: Optional[schema.TableVersionMd] = (
470
- schema.TableVersionMd(
471
- tbl_id=str(self.id),
472
- created_at=new_version_ts,
473
- version=self.version,
474
- schema_version=self.schema_version,
475
- additional_md={},
476
- )
477
- if new_version
478
- else None
479
- )
480
-
481
590
  Catalog.get().store_tbl_md(
482
- self.id, self._tbl_md, version_md, self._schema_version_md if new_schema_version else None
591
+ self.id,
592
+ None,
593
+ self._tbl_md,
594
+ self._version_md if new_version else None,
595
+ self._schema_version_md if new_schema_version else None,
483
596
  )
484
597
 
485
- def ensure_md_loaded(self) -> None:
486
- """Ensure that table metadata is loaded."""
487
- for col in self.cols_by_id.values():
488
- _ = col.value_expr
489
-
490
598
  def _store_idx_name(self, idx_id: int) -> str:
491
599
  """Return name of index in the store, which needs to be globally unique"""
492
600
  return f'idx_{self.id.hex}_{idx_id}'
493
601
 
494
- def add_index(self, col: Column, idx_name: Optional[str], idx: index.IndexBase) -> UpdateStatus:
602
+ def add_index(self, col: Column, idx_name: str | None, idx: index.IndexBase) -> UpdateStatus:
495
603
  # we're creating a new schema version
496
- self.version += 1
497
- self.preceding_schema_version = self.schema_version
498
- self.schema_version = self.version
604
+ self.bump_version(bump_schema_version=True)
499
605
  status = self._add_index(col, idx_name, idx)
500
- self._write_md(new_version=True, new_version_ts=time.time(), new_schema_version=True)
606
+ self._write_md(new_version=True, new_schema_version=True)
501
607
  _logger.info(f'Added index {idx_name} on column {col.name} to table {self.name}')
502
608
  return status
503
609
 
504
- def _is_btree_indexable(self, col: Column) -> bool:
610
+ @classmethod
611
+ def _is_btree_indexable(cls, col: Column) -> bool:
505
612
  if not col.stored:
506
613
  # if the column is intentionally not stored, we want to avoid the overhead of an index
507
614
  return False
508
615
  # Skip index for stored media columns produced by an iterator
509
- if col.col_type.is_media_type() and self.is_iterator_column(col):
616
+ if col.col_type.is_media_type() and col.is_iterator_col:
510
617
  return False
511
618
  if not col.col_type.is_scalar_type() and not (col.col_type.is_media_type() and not col.is_computed):
512
619
  # wrong type for a B-tree
@@ -516,53 +623,58 @@ class TableVersion:
516
623
  return False
517
624
  return True
518
625
 
519
- def _add_default_index(self, col: Column) -> Optional[UpdateStatus]:
626
+ def _add_default_index(self, col: Column) -> UpdateStatus | None:
520
627
  """Add a B-tree index on this column if it has a compatible type"""
521
628
  if not self._is_btree_indexable(col):
522
629
  return None
523
- status = self._add_index(col, idx_name=None, idx=index.BtreeIndex(col))
630
+ status = self._add_index(col, idx_name=None, idx=index.BtreeIndex())
524
631
  return status
525
632
 
526
- def _create_index_columns(self, idx: index.IndexBase) -> Tuple[Column, Column]:
633
+ @classmethod
634
+ def _create_index_columns(
635
+ cls,
636
+ col: Column,
637
+ idx: index.IndexBase,
638
+ schema_version: int,
639
+ tbl_handle: TableVersionHandle,
640
+ id_cb: Callable[[], int],
641
+ ) -> tuple[Column, Column]:
527
642
  """Create value and undo columns for the given index.
528
643
  Args:
529
644
  idx: index for which columns will be created.
530
645
  Returns:
531
- A tuple containing the value column and the undo column.
646
+ A tuple containing the value column and the undo column, both of which are nullable.
532
647
  """
533
- assert not self.is_snapshot
534
- # add the index value and undo columns (which need to be nullable)
648
+ value_expr = idx.create_value_expr(col)
535
649
  val_col = Column(
536
- col_id=self.next_col_id,
650
+ col_id=id_cb(),
537
651
  name=None,
538
- computed_with=idx.index_value_expr(),
539
- sa_col_type=idx.index_sa_type(),
652
+ computed_with=value_expr,
653
+ sa_col_type=idx.get_index_sa_type(value_expr.col_type),
540
654
  stored=True,
541
- schema_version_add=self.schema_version,
655
+ stores_cellmd=idx.records_value_errors(),
656
+ schema_version_add=schema_version,
542
657
  schema_version_drop=None,
543
- records_errors=idx.records_value_errors(),
544
658
  )
545
- val_col.tbl = self
546
659
  val_col.col_type = val_col.col_type.copy(nullable=True)
547
- self.next_col_id += 1
660
+ val_col.tbl_handle = tbl_handle
548
661
 
549
662
  undo_col = Column(
550
- col_id=self.next_col_id,
663
+ col_id=id_cb(),
551
664
  name=None,
552
665
  col_type=val_col.col_type,
553
666
  sa_col_type=val_col.sa_col_type,
554
667
  stored=True,
555
- schema_version_add=self.schema_version,
668
+ stores_cellmd=False,
669
+ schema_version_add=schema_version,
556
670
  schema_version_drop=None,
557
- records_errors=False,
558
671
  )
559
- undo_col.tbl = self
560
672
  undo_col.col_type = undo_col.col_type.copy(nullable=True)
561
- self.next_col_id += 1
673
+ undo_col.tbl_handle = tbl_handle
562
674
  return val_col, undo_col
563
675
 
564
676
  def _create_index(
565
- self, col: Column, val_col: Column, undo_col: Column, idx_name: Optional[str], idx: index.IndexBase
677
+ self, col: Column, val_col: Column, undo_col: Column, idx_name: str | None, idx: index.IndexBase
566
678
  ) -> None:
567
679
  """Create the given index along with index md"""
568
680
  idx_id = self.next_idx_id
@@ -578,7 +690,7 @@ class TableVersion:
578
690
  id=idx_id,
579
691
  name=idx_name,
580
692
  indexed_col_id=col.id,
581
- indexed_col_tbl_id=str(col.tbl.id),
693
+ indexed_col_tbl_id=str(col.get_tbl().id),
582
694
  index_val_col_id=val_col.id,
583
695
  index_val_undo_col_id=undo_col.id,
584
696
  schema_version_add=self.schema_version,
@@ -588,38 +700,29 @@ class TableVersion:
588
700
  )
589
701
  idx_info = self.IndexInfo(id=idx_id, name=idx_name, idx=idx, col=col, val_col=val_col, undo_col=undo_col)
590
702
  self._tbl_md.index_md[idx_id] = idx_md
703
+ self.idxs[idx_id] = idx_info
591
704
  self.idxs_by_name[idx_name] = idx_info
592
- try:
593
- idx.create_index(self._store_idx_name(idx_id), val_col)
594
- finally:
705
+ self.idxs_by_col.setdefault(col.qid, []).append(idx_info)
706
+ self.store_tbl.create_index(idx_id)
595
707
 
596
- def cleanup_index() -> None:
597
- """Delete the newly added in-memory index structure"""
598
- del self.idxs_by_name[idx_name]
599
- del self._tbl_md.index_md[idx_id]
600
- self.next_idx_id = idx_id
601
-
602
- # Run cleanup only if there has been an exception; otherwise, skip cleanup.
603
- run_cleanup_on_exception(cleanup_index)
604
-
605
- def _add_index(self, col: Column, idx_name: Optional[str], idx: index.IndexBase) -> UpdateStatus:
606
- val_col, undo_vol = self._create_index_columns(idx)
708
+ def _add_index(self, col: Column, idx_name: str | None, idx: index.IndexBase) -> UpdateStatus:
709
+ val_col, undo_col = self._create_index_columns(
710
+ col, idx, self.schema_version, self.handle, id_cb=self.next_col_id
711
+ )
607
712
  # add the columns and update the metadata
608
713
  # TODO support on_error='abort' for indices; it's tricky because of the way metadata changes are entangled
609
714
  # with the database operations
610
- status = self._add_columns([val_col, undo_vol], print_stats=False, on_error='ignore')
715
+ status = self._add_columns([val_col, undo_col], print_stats=False, on_error='ignore')
611
716
  # now create the index structure
612
- self._create_index(col, val_col, undo_vol, idx_name, idx)
717
+ self._create_index(col, val_col, undo_col, idx_name, idx)
613
718
  return status
614
719
 
615
720
  def drop_index(self, idx_id: int) -> None:
616
- assert not self.is_snapshot
721
+ assert self.is_mutable
617
722
  assert idx_id in self._tbl_md.index_md
618
723
 
619
724
  # we're creating a new schema version
620
- self.version += 1
621
- self.preceding_schema_version = self.schema_version
622
- self.schema_version = self.version
725
+ self.bump_version(bump_schema_version=True)
623
726
  idx_md = self._tbl_md.index_md[idx_id]
624
727
  idx_md.schema_version_drop = self.schema_version
625
728
  assert idx_md.name in self.idxs_by_name
@@ -627,37 +730,39 @@ class TableVersion:
627
730
  # remove this index entry from the active indexes (in memory)
628
731
  # and the index metadata (in persistent table metadata)
629
732
  # TODO: this is wrong, it breaks revert()
733
+ del self.idxs[idx_id]
630
734
  del self.idxs_by_name[idx_md.name]
735
+ if idx_info.col.qid in self.idxs_by_col:
736
+ self.idxs_by_col[idx_info.col.qid].remove(idx_info)
631
737
  del self._tbl_md.index_md[idx_id]
632
738
 
633
739
  self._drop_columns([idx_info.val_col, idx_info.undo_col])
634
- self._write_md(new_version=True, new_version_ts=time.time(), new_schema_version=True)
740
+ self._write_md(new_version=True, new_schema_version=True)
635
741
  _logger.info(f'Dropped index {idx_md.name} on table {self.name}')
636
742
 
637
743
  def add_columns(
638
744
  self, cols: Iterable[Column], print_stats: bool, on_error: Literal['abort', 'ignore']
639
745
  ) -> UpdateStatus:
640
746
  """Adds columns to the table."""
641
- assert not self.is_snapshot
747
+ assert self.is_mutable
642
748
  assert all(is_valid_identifier(col.name) for col in cols if col.name is not None)
643
749
  assert all(col.stored is not None for col in cols)
644
750
  assert all(col.name not in self.cols_by_name for col in cols if col.name is not None)
645
751
  for col in cols:
646
- col.tbl = self
647
- col.id = self.next_col_id
648
- self.next_col_id += 1
752
+ col.tbl_handle = self.handle
753
+ col.id = self.next_col_id()
649
754
 
650
755
  # we're creating a new schema version
651
- self.version += 1
652
- self.preceding_schema_version = self.schema_version
653
- self.schema_version = self.version
756
+ self.bump_version(bump_schema_version=True)
654
757
  index_cols: dict[Column, tuple[index.BtreeIndex, Column, Column]] = {}
655
758
  all_cols: list[Column] = []
656
759
  for col in cols:
657
760
  all_cols.append(col)
658
761
  if col.name is not None and self._is_btree_indexable(col):
659
- idx = index.BtreeIndex(col)
660
- val_col, undo_col = self._create_index_columns(idx)
762
+ idx = index.BtreeIndex()
763
+ val_col, undo_col = self._create_index_columns(
764
+ col, idx, self.schema_version, self.handle, id_cb=self.next_col_id
765
+ )
661
766
  index_cols[col] = (idx, val_col, undo_col)
662
767
  all_cols.append(val_col)
663
768
  all_cols.append(undo_col)
@@ -666,7 +771,8 @@ class TableVersion:
666
771
  # Create indices and their md records
667
772
  for col, (idx, val_col, undo_col) in index_cols.items():
668
773
  self._create_index(col, val_col, undo_col, idx_name=None, idx=idx)
669
- self._write_md(new_version=True, new_version_ts=time.time(), new_schema_version=True)
774
+ self.update_status = status
775
+ self._write_md(new_version=True, new_schema_version=True)
670
776
  _logger.info(f'Added columns {[col.name for col in cols]} to table {self.name}, new version: {self.version}')
671
777
 
672
778
  msg = (
@@ -681,45 +787,39 @@ class TableVersion:
681
787
  self, cols: Iterable[Column], print_stats: bool, on_error: Literal['abort', 'ignore']
682
788
  ) -> UpdateStatus:
683
789
  """Add and populate columns within the current transaction"""
790
+ from pixeltable.catalog import Catalog
791
+ from pixeltable.plan import Planner
792
+
684
793
  cols_to_add = list(cols)
794
+
685
795
  row_count = self.store_tbl.count()
686
796
  for col in cols_to_add:
797
+ assert col.tbl_handle.id == self.id
687
798
  if not col.col_type.nullable and not col.is_computed and row_count > 0:
688
799
  raise excs.Error(
689
800
  f'Cannot add non-nullable column {col.name!r} to table {self.name!r} with existing rows'
690
801
  )
691
802
 
803
+ computed_values = 0
692
804
  num_excs = 0
693
805
  cols_with_excs: list[Column] = []
694
806
  for col in cols_to_add:
807
+ assert col.id is not None
695
808
  excs_per_col = 0
696
809
  col.schema_version_add = self.schema_version
697
810
  # add the column to the lookup structures now, rather than after the store changes executed successfully,
698
811
  # because it might be referenced by the next column's value_expr
699
812
  self.cols.append(col)
700
- if col.name is not None:
701
- self.cols_by_name[col.name] = col
702
813
  self.cols_by_id[col.id] = col
703
- if col.value_expr is not None:
704
- col.check_value_expr()
705
- self._record_refd_columns(col)
706
-
707
- # also add to stored md
708
- self._tbl_md.column_md[col.id] = schema.ColumnMd(
709
- id=col.id,
710
- col_type=col.col_type.as_dict(),
711
- is_pk=col.is_pk,
712
- schema_version_add=col.schema_version_add,
713
- schema_version_drop=col.schema_version_drop,
714
- value_expr=col.value_expr.as_dict() if col.value_expr is not None else None,
715
- stored=col.stored,
716
- )
717
814
  if col.name is not None:
718
- self._schema_version_md.columns[col.id] = schema.SchemaColumn(
719
- name=col.name,
720
- pos=len(self.cols_by_name),
721
- media_validation=col._media_validation.name.lower() if col._media_validation is not None else None,
722
- )
815
+ self.cols_by_name[col.name] = col
816
+ col_md, sch_md = col.to_md(len(self.cols_by_name))
817
+ assert sch_md is not None, 'Schema column metadata must be created for user-facing columns'
818
+ self._tbl_md.column_md[col.id] = col_md
819
+ self._schema_version_md.columns[col.id] = sch_md
820
+ else:
821
+ col_md, _ = col.to_md()
822
+ self._tbl_md.column_md[col.id] = col_md
723
823
 
724
824
  if col.is_stored:
725
825
  self.store_tbl.add_column(col)
@@ -728,61 +828,51 @@ class TableVersion:
728
828
  continue
729
829
 
730
830
  # populate the column
731
- from pixeltable.plan import Planner
732
-
733
- plan, value_expr_slot_idx = Planner.create_add_column_plan(self.path, col)
831
+ plan = Planner.create_add_column_plan(self.path, col)
734
832
  plan.ctx.num_rows = row_count
735
833
  try:
736
834
  plan.open()
737
835
  try:
738
- excs_per_col = self.store_tbl.load_column(col, plan, value_expr_slot_idx, on_error)
739
- except sql.exc.DBAPIError as exc:
740
- # Wrap the DBAPIError in an excs.Error to unify processing in the subsequent except block
741
- raise excs.Error(f'SQL error during execution of computed column `{col.name}`:\n{exc}') from exc
836
+ excs_per_col = self.store_tbl.load_column(col, plan, on_error == 'abort')
837
+ except sql_exc.DBAPIError as exc:
838
+ Catalog.get().convert_sql_exc(exc, self.id, self.handle, convert_db_excs=True)
839
+ # If it wasn't converted, re-raise as a generic Pixeltable error
840
+ # (this means it's not a known concurrency error; it's something else)
841
+ raise excs.Error(
842
+ f'Unexpected SQL error during execution of computed column {col.name!r}:\n{exc}'
843
+ ) from exc
742
844
  if excs_per_col > 0:
743
845
  cols_with_excs.append(col)
744
846
  num_excs += excs_per_col
847
+ computed_values += plan.ctx.num_computed_exprs * row_count
745
848
  finally:
746
- # Ensure cleanup occurs if an exception or keyboard interruption happens during `load_column()`.
747
- def cleanup_on_error() -> None:
748
- """Delete columns that are added as part of current add_columns operation and re-initialize
749
- the sqlalchemy schema"""
750
- self.cols = [col for col in self.cols if col not in cols_to_add]
751
- for col in cols_to_add:
752
- # remove columns that we already added
753
- if col.id in self.cols_by_id:
754
- del self.cols_by_id[col.id]
755
- if col.name is not None and col.name in self.cols_by_name:
756
- del self.cols_by_name[col.name]
757
- self.store_tbl.create_sa_tbl()
758
-
759
- # Run cleanup only if there has been an exception; otherwise, skip cleanup.
760
- run_cleanup_on_exception(cleanup_on_error)
761
849
  plan.close()
762
850
 
851
+ Catalog.get().record_column_dependencies(self)
852
+
763
853
  if print_stats:
764
854
  plan.ctx.profile.print(num_rows=row_count)
765
- # TODO(mkornacker): what to do about system columns with exceptions?
855
+
856
+ # TODO: what to do about system columns with exceptions?
857
+ row_counts = RowCountStats(
858
+ upd_rows=row_count, num_excs=num_excs, computed_values=computed_values
859
+ ) # add_columns
766
860
  return UpdateStatus(
767
- num_rows=row_count,
768
- num_computed_values=row_count,
769
- num_excs=num_excs,
770
- cols_with_excs=[f'{col.tbl.name}.{col.name}' for col in cols_with_excs if col.name is not None],
861
+ cols_with_excs=[f'{col.get_tbl().name}.{col.name}' for col in cols_with_excs if col.name is not None],
862
+ row_count_stats=row_counts,
771
863
  )
772
864
 
773
865
  def drop_column(self, col: Column) -> None:
774
866
  """Drop a column from the table."""
775
867
 
776
- assert not self.is_snapshot
868
+ assert self.is_mutable
777
869
 
778
870
  # we're creating a new schema version
779
- self.version += 1
780
- self.preceding_schema_version = self.schema_version
781
- self.schema_version = self.version
871
+ self.bump_version(bump_schema_version=True)
782
872
 
783
873
  # drop this column and all dependent index columns and indices
784
874
  dropped_cols = [col]
785
- dropped_idx_names: list[str] = []
875
+ dropped_idx_info: list[TableVersion.IndexInfo] = []
786
876
  for idx_info in self.idxs_by_name.values():
787
877
  if idx_info.col != col:
788
878
  continue
@@ -790,28 +880,26 @@ class TableVersion:
790
880
  idx_md = self._tbl_md.index_md[idx_info.id]
791
881
  idx_md.schema_version_drop = self.schema_version
792
882
  assert idx_md.name in self.idxs_by_name
793
- dropped_idx_names.append(idx_md.name)
883
+ dropped_idx_info.append(idx_info)
794
884
 
795
- # update idxs_by_name
796
- for idx_name in dropped_idx_names:
797
- del self.idxs_by_name[idx_name]
885
+ # update index lookup structures
886
+ for info in dropped_idx_info:
887
+ del self.idxs[info.id]
888
+ del self.idxs_by_name[info.name]
889
+ if col.qid in self.idxs_by_col:
890
+ del self.idxs_by_col[col.qid]
798
891
 
799
892
  self._drop_columns(dropped_cols)
800
- self._write_md(new_version=True, new_version_ts=time.time(), new_schema_version=True)
893
+ self._write_md(new_version=True, new_schema_version=True)
801
894
  _logger.info(f'Dropped column {col.name} from table {self.name}, new version: {self.version}')
802
895
 
803
896
  def _drop_columns(self, cols: Iterable[Column]) -> None:
804
897
  """Mark columns as dropped"""
805
- assert not self.is_snapshot
898
+ from pixeltable.catalog import Catalog
806
899
 
807
- for col in cols:
808
- if col.value_expr is not None:
809
- # update Column.dependent_cols
810
- for c in self.cols:
811
- if c == col:
812
- break
813
- c.dependent_cols.discard(col)
900
+ assert self.is_mutable
814
901
 
902
+ for col in cols:
815
903
  col.schema_version_drop = self.schema_version
816
904
  if col.name is not None:
817
905
  assert col.name in self.cols_by_name
@@ -828,31 +916,33 @@ class TableVersion:
828
916
  schema_col.pos = pos
829
917
 
830
918
  self.store_tbl.create_sa_tbl()
919
+ Catalog.get().record_column_dependencies(self)
831
920
 
832
921
  def rename_column(self, old_name: str, new_name: str) -> None:
833
922
  """Rename a column."""
834
- assert not self.is_snapshot
835
- if old_name not in self.cols_by_name:
923
+ if not self.is_mutable:
924
+ raise excs.Error(f'Cannot rename column for immutable table {self.name!r}')
925
+ col = self.path.get_column(old_name)
926
+ if col is None:
836
927
  raise excs.Error(f'Unknown column: {old_name}')
928
+ if col.get_tbl().id != self.id:
929
+ raise excs.Error(f'Cannot rename base table column {col.name!r}')
837
930
  if not is_valid_identifier(new_name):
838
- raise excs.Error(f"Invalid column name: '{new_name}'")
931
+ raise excs.Error(f'Invalid column name: {new_name}')
839
932
  if new_name in self.cols_by_name:
840
- raise excs.Error(f'Column {new_name} already exists')
841
- col = self.cols_by_name[old_name]
933
+ raise excs.Error(f'Column {new_name!r} already exists')
842
934
  del self.cols_by_name[old_name]
843
935
  col.name = new_name
844
936
  self.cols_by_name[new_name] = col
845
937
  self._schema_version_md.columns[col.id].name = new_name
846
938
 
847
939
  # we're creating a new schema version
848
- self.version += 1
849
- self.preceding_schema_version = self.schema_version
850
- self.schema_version = self.version
940
+ self.bump_version(bump_schema_version=True)
851
941
 
852
- self._write_md(new_version=True, new_version_ts=time.time(), new_schema_version=True)
942
+ self._write_md(new_version=True, new_schema_version=True)
853
943
  _logger.info(f'Renamed column {old_name} to {new_name} in table {self.name}, new version: {self.version}')
854
944
 
855
- def set_comment(self, new_comment: Optional[str]) -> None:
945
+ def set_comment(self, new_comment: str | None) -> None:
856
946
  _logger.info(f'[{self.name}] Updating comment: {new_comment}')
857
947
  self.comment = new_comment
858
948
  self._create_schema_version()
@@ -867,16 +957,14 @@ class TableVersion:
867
957
 
868
958
  def _create_schema_version(self) -> None:
869
959
  # we're creating a new schema version
870
- self.version += 1
871
- self.preceding_schema_version = self.schema_version
872
- self.schema_version = self.version
873
- self._write_md(new_version=True, new_version_ts=time.time(), new_schema_version=True)
960
+ self.bump_version(bump_schema_version=True)
961
+ self._write_md(new_version=True, new_schema_version=True)
874
962
  _logger.info(f'[{self.name}] Updating table schema to version: {self.version}')
875
963
 
876
964
  def insert(
877
965
  self,
878
- rows: Optional[list[dict[str, Any]]],
879
- df: Optional[pxt.DataFrame],
966
+ rows: list[dict[str, Any]] | None,
967
+ df: DataFrame | None,
880
968
  print_stats: bool = False,
881
969
  fail_on_exception: bool = True,
882
970
  ) -> UpdateStatus:
@@ -889,6 +977,7 @@ class TableVersion:
889
977
  assert (rows is None) != (df is None) # Exactly one must be specified
890
978
  if rows is not None:
891
979
  plan = Planner.create_insert_plan(self, rows, ignore_errors=not fail_on_exception)
980
+
892
981
  else:
893
982
  plan = Planner.create_df_insert_plan(self, df, ignore_errors=not fail_on_exception)
894
983
 
@@ -899,50 +988,48 @@ class TableVersion:
899
988
  self.next_row_id += 1
900
989
  yield rowid
901
990
 
902
- return self._insert(plan, time.time(), print_stats=print_stats, rowids=rowids(), abort_on_exc=fail_on_exception)
991
+ result = self._insert(
992
+ plan, time.time(), print_stats=print_stats, rowids=rowids(), abort_on_exc=fail_on_exception
993
+ )
994
+ return result
903
995
 
904
996
  def _insert(
905
997
  self,
906
998
  exec_plan: 'exec.ExecNode',
907
999
  timestamp: float,
908
1000
  *,
909
- rowids: Optional[Iterator[int]] = None,
1001
+ rowids: Iterator[int] | None = None,
910
1002
  print_stats: bool = False,
911
1003
  abort_on_exc: bool = False,
912
1004
  ) -> UpdateStatus:
913
1005
  """Insert rows produced by exec_plan and propagate to views"""
914
1006
  # we're creating a new version
915
- self.version += 1
916
- result = UpdateStatus()
917
- num_rows, num_excs, cols_with_excs = self.store_tbl.insert_rows(
1007
+ self.bump_version(timestamp, bump_schema_version=False)
1008
+ cols_with_excs, row_counts = self.store_tbl.insert_rows(
918
1009
  exec_plan, v_min=self.version, rowids=rowids, abort_on_exc=abort_on_exc
919
1010
  )
920
- result.num_rows = num_rows
921
- result.num_excs = num_excs
922
- result.num_computed_values += exec_plan.ctx.num_computed_exprs * num_rows
923
- result.cols_with_excs = [f'{self.name}.{self.cols_by_id[cid].name}' for cid in cols_with_excs]
924
- self._write_md(new_version=True, new_version_ts=timestamp, new_schema_version=False)
1011
+ result = UpdateStatus(
1012
+ cols_with_excs=[f'{self.name}.{self.cols_by_id[cid].name}' for cid in cols_with_excs],
1013
+ row_count_stats=row_counts,
1014
+ )
925
1015
 
926
1016
  # update views
927
1017
  for view in self.mutable_views:
928
1018
  from pixeltable.plan import Planner
929
1019
 
930
- plan, _ = Planner.create_view_load_plan(view.get().path, propagates_insert=True)
931
- status = view.get()._insert(plan, timestamp, print_stats=print_stats)
932
- result.num_rows += status.num_rows
933
- result.num_excs += status.num_excs
934
- result.num_computed_values += status.num_computed_values
935
- result.cols_with_excs += status.cols_with_excs
1020
+ plan2, _ = Planner.create_view_load_plan(view.get().path, propagates_insert=True)
1021
+ status = view.get()._insert(plan2, timestamp, print_stats=print_stats)
1022
+ result += status.to_cascade()
936
1023
 
937
- result.cols_with_excs = list(dict.fromkeys(result.cols_with_excs).keys()) # remove duplicates
1024
+ # Use the net status after all propagations
1025
+ self.update_status = result
1026
+ self._write_md(new_version=True, new_schema_version=False)
938
1027
  if print_stats:
939
- plan.ctx.profile.print(num_rows=num_rows)
1028
+ exec_plan.ctx.profile.print(num_rows=result.num_rows)
940
1029
  _logger.info(f'TableVersion {self.name}: new version {self.version}')
941
1030
  return result
942
1031
 
943
- def update(
944
- self, value_spec: dict[str, Any], where: Optional[exprs.Expr] = None, cascade: bool = True
945
- ) -> UpdateStatus:
1032
+ def update(self, value_spec: dict[str, Any], where: exprs.Expr | None = None, cascade: bool = True) -> UpdateStatus:
946
1033
  """Update rows in this TableVersionPath.
947
1034
  Args:
948
1035
  value_spec: a list of (column, value) pairs specifying the columns to update and their new values.
@@ -950,22 +1037,21 @@ class TableVersion:
950
1037
  cascade: if True, also update all computed columns that transitively depend on the updated columns,
951
1038
  including within views.
952
1039
  """
953
- if self.is_snapshot:
954
- raise excs.Error('Cannot update a snapshot')
955
-
1040
+ from pixeltable.exprs import SqlElementCache
956
1041
  from pixeltable.plan import Planner
957
1042
 
1043
+ assert self.is_mutable
1044
+
958
1045
  update_spec = self._validate_update_spec(value_spec, allow_pk=False, allow_exprs=True, allow_media=True)
959
1046
  if where is not None:
960
1047
  if not isinstance(where, exprs.Expr):
961
- raise excs.Error(f"'where' argument must be a predicate, got {type(where)}")
1048
+ raise excs.Error(f'`where` argument must be a valid Pixeltable expression; got `{type(where)}`')
962
1049
  analysis_info = Planner.analyze(self.path, where)
963
1050
  # for now we require that the updated rows can be identified via SQL, rather than via a Python filter
964
1051
  if analysis_info.filter is not None:
965
- raise excs.Error(f'Filter {analysis_info.filter} not expressible in SQL')
1052
+ raise excs.Error(f'Filter not expressible in SQL: {analysis_info.filter}')
966
1053
 
967
1054
  plan, updated_cols, recomputed_cols = Planner.create_update_plan(self.path, update_spec, [], where, cascade)
968
- from pixeltable.exprs import SqlElementCache
969
1055
 
970
1056
  result = self.propagate_update(
971
1057
  plan,
@@ -976,7 +1062,7 @@ class TableVersion:
976
1062
  cascade=cascade,
977
1063
  show_progress=True,
978
1064
  )
979
- result.updated_cols = updated_cols
1065
+ result += UpdateStatus(updated_cols=updated_cols)
980
1066
  return result
981
1067
 
982
1068
  def batch_update(
@@ -992,18 +1078,18 @@ class TableVersion:
992
1078
  batch: one dict per row, each mapping Columns to LiteralExprs representing the new values
993
1079
  rowids: if not empty, one tuple per row, each containing the rowid values for the corresponding row in batch
994
1080
  """
1081
+ from pixeltable.plan import Planner
1082
+
995
1083
  # if we do lookups of rowids, we must have one for each row in the batch
996
1084
  assert len(rowids) == 0 or len(rowids) == len(batch)
997
1085
 
998
- from pixeltable.plan import Planner
999
-
1000
1086
  plan, row_update_node, delete_where_clause, updated_cols, recomputed_cols = Planner.create_batch_update_plan(
1001
1087
  self.path, batch, rowids, cascade=cascade
1002
1088
  )
1003
1089
  result = self.propagate_update(
1004
1090
  plan, delete_where_clause, recomputed_cols, base_versions=[], timestamp=time.time(), cascade=cascade
1005
1091
  )
1006
- result.updated_cols = [c.qualified_name for c in updated_cols]
1092
+ result += UpdateStatus(updated_cols=[c.qualified_name for c in updated_cols])
1007
1093
 
1008
1094
  unmatched_rows = row_update_node.unmatched_rows()
1009
1095
  if len(unmatched_rows) > 0:
@@ -1011,7 +1097,7 @@ class TableVersion:
1011
1097
  raise excs.Error(f'batch_update(): {len(unmatched_rows)} row(s) not found')
1012
1098
  if insert_if_not_exists:
1013
1099
  insert_status = self.insert(unmatched_rows, None, print_stats=False, fail_on_exception=False)
1014
- result += insert_status
1100
+ result += insert_status.to_cascade()
1015
1101
  return result
1016
1102
 
1017
1103
  def _validate_update_spec(
@@ -1020,23 +1106,24 @@ class TableVersion:
1020
1106
  update_targets: dict[Column, exprs.Expr] = {}
1021
1107
  for col_name, val in value_spec.items():
1022
1108
  if not isinstance(col_name, str):
1023
- raise excs.Error(f'Update specification: dict key must be column name, got {col_name!r}')
1109
+ raise excs.Error(f'Update specification: dict key must be column name; got {col_name!r}')
1024
1110
  if col_name == _ROWID_COLUMN_NAME:
1025
1111
  # a valid rowid is a list of ints, one per rowid column
1026
1112
  assert len(val) == len(self.store_tbl.rowid_columns())
1027
1113
  for el in val:
1028
1114
  assert isinstance(el, int)
1029
1115
  continue
1030
- col = self.path.get_column(col_name, include_bases=False)
1116
+ col = self.path.get_column(col_name)
1031
1117
  if col is None:
1032
- # TODO: return more informative error if this is trying to update a base column
1033
- raise excs.Error(f'Column {col_name} unknown')
1118
+ raise excs.Error(f'Unknown column: {col_name}')
1119
+ if col.get_tbl().id != self.id:
1120
+ raise excs.Error(f'Column {col.name!r} is a base table column and cannot be updated')
1034
1121
  if col.is_computed:
1035
- raise excs.Error(f'Column {col_name} is computed and cannot be updated')
1122
+ raise excs.Error(f'Column {col_name!r} is computed and cannot be updated')
1036
1123
  if col.is_pk and not allow_pk:
1037
- raise excs.Error(f'Column {col_name} is a primary key column and cannot be updated')
1124
+ raise excs.Error(f'Column {col_name!r} is a primary key column and cannot be updated')
1038
1125
  if col.col_type.is_media_type() and not allow_media:
1039
- raise excs.Error(f'Column {col_name} is a media column and cannot be updated')
1126
+ raise excs.Error(f'Column {col_name!r} is a media column and cannot be updated')
1040
1127
 
1041
1128
  # make sure that the value is compatible with the column type
1042
1129
  value_expr: exprs.Expr
@@ -1046,100 +1133,136 @@ class TableVersion:
1046
1133
  except (TypeError, jsonschema.exceptions.ValidationError) as exc:
1047
1134
  if not allow_exprs:
1048
1135
  raise excs.Error(
1049
- f'Column {col_name}: value {val!r} is not a valid literal for this column '
1050
- f'(expected {col.col_type})'
1136
+ f'Column {col_name!r}: value is not a valid literal for this column '
1137
+ f'(expected `{col.col_type}`): {val!r}'
1051
1138
  ) from exc
1052
1139
  # it's not a literal, let's try to create an expr from it
1053
1140
  value_expr = exprs.Expr.from_object(val)
1054
1141
  if value_expr is None:
1055
1142
  raise excs.Error(
1056
- f'Column {col_name}: value {val!r} is not a recognized literal or expression'
1143
+ f'Column {col_name!r}: value is not a recognized literal or expression: {val!r}'
1057
1144
  ) from exc
1058
1145
  if not col.col_type.is_supertype_of(value_expr.col_type, ignore_nullable=True):
1059
1146
  raise excs.Error(
1060
- f'Type of value {val!r} ({value_expr.col_type}) is not compatible with the type of column '
1061
- f'{col_name} ({col.col_type})'
1147
+ f'Type `{value_expr.col_type}` of value {val!r} is not compatible with the type '
1148
+ f'`{col.col_type}` of column {col_name!r}'
1062
1149
  ) from exc
1063
1150
  update_targets[col] = value_expr
1064
1151
 
1065
1152
  return update_targets
1066
1153
 
1154
+ def recompute_columns(
1155
+ self, col_names: list[str], where: exprs.Expr | None = None, errors_only: bool = False, cascade: bool = True
1156
+ ) -> UpdateStatus:
1157
+ from pixeltable.exprs import CompoundPredicate, SqlElementCache
1158
+ from pixeltable.plan import Planner
1159
+
1160
+ assert self.is_mutable
1161
+ assert all(name in self.cols_by_name for name in col_names)
1162
+ assert len(col_names) > 0
1163
+ assert len(col_names) == 1 or not errors_only
1164
+
1165
+ target_columns = [self.cols_by_name[name] for name in col_names]
1166
+ where_clause: exprs.Expr | None = None
1167
+ if where is not None:
1168
+ self._validate_where_clause(where, error_prefix='`where` argument')
1169
+ where_clause = where
1170
+ if errors_only:
1171
+ errortype_pred = (
1172
+ exprs.ColumnPropertyRef(exprs.ColumnRef(target_columns[0]), exprs.ColumnPropertyRef.Property.ERRORTYPE)
1173
+ != None
1174
+ )
1175
+ where_clause = CompoundPredicate.make_conjunction([where_clause, errortype_pred])
1176
+ plan, updated_cols, recomputed_cols = Planner.create_update_plan(
1177
+ self.path, update_targets={}, recompute_targets=target_columns, where_clause=where_clause, cascade=cascade
1178
+ )
1179
+
1180
+ result = self.propagate_update(
1181
+ plan,
1182
+ where_clause.sql_expr(SqlElementCache()) if where_clause is not None else None,
1183
+ recomputed_cols,
1184
+ base_versions=[],
1185
+ timestamp=time.time(),
1186
+ cascade=cascade,
1187
+ show_progress=True,
1188
+ )
1189
+ result += UpdateStatus(updated_cols=updated_cols)
1190
+ return result
1191
+
1067
1192
  def propagate_update(
1068
1193
  self,
1069
- plan: Optional[exec.ExecNode],
1070
- where_clause: Optional[sql.ColumnElement],
1194
+ plan: exec.ExecNode | None,
1195
+ where_clause: sql.ColumnElement | None,
1071
1196
  recomputed_view_cols: list[Column],
1072
- base_versions: list[Optional[int]],
1197
+ base_versions: list[int | None],
1073
1198
  timestamp: float,
1074
1199
  cascade: bool,
1075
1200
  show_progress: bool = True,
1076
1201
  ) -> UpdateStatus:
1202
+ from pixeltable.catalog import Catalog
1203
+ from pixeltable.plan import Planner
1204
+
1205
+ Catalog.get().mark_modified_tvs(self.handle)
1077
1206
  result = UpdateStatus()
1078
- if plan is not None:
1079
- # we're creating a new version
1080
- self.version += 1
1081
- result.num_rows, result.num_excs, cols_with_excs = self.store_tbl.insert_rows(
1207
+ create_new_table_version = plan is not None
1208
+ if create_new_table_version:
1209
+ self.bump_version(timestamp, bump_schema_version=False)
1210
+ cols_with_excs, row_counts = self.store_tbl.insert_rows(
1082
1211
  plan, v_min=self.version, show_progress=show_progress
1083
1212
  )
1084
- result.cols_with_excs = [f'{self.name}.{self.cols_by_id[cid].name}' for cid in cols_with_excs]
1213
+ result += UpdateStatus(
1214
+ row_count_stats=row_counts.insert_to_update(),
1215
+ cols_with_excs=[f'{self.name}.{self.cols_by_id[cid].name}' for cid in cols_with_excs],
1216
+ )
1085
1217
  self.store_tbl.delete_rows(
1086
1218
  self.version, base_versions=base_versions, match_on_vmin=True, where_clause=where_clause
1087
1219
  )
1088
- self._write_md(new_version=True, new_version_ts=timestamp, new_schema_version=False)
1089
1220
 
1090
1221
  if cascade:
1091
1222
  base_versions = [None if plan is None else self.version, *base_versions] # don't update in place
1092
1223
  # propagate to views
1093
1224
  for view in self.mutable_views:
1094
- recomputed_cols = [col for col in recomputed_view_cols if col.tbl.id == view.id]
1225
+ recomputed_cols = [col for col in recomputed_view_cols if col.get_tbl().id == view.id]
1095
1226
  plan = None
1096
1227
  if len(recomputed_cols) > 0:
1097
- from pixeltable.plan import Planner
1098
-
1099
1228
  plan = Planner.create_view_update_plan(view.get().path, recompute_targets=recomputed_cols)
1100
1229
  status = view.get().propagate_update(
1101
1230
  plan, None, recomputed_view_cols, base_versions=base_versions, timestamp=timestamp, cascade=True
1102
1231
  )
1103
- result.num_rows += status.num_rows
1104
- result.num_excs += status.num_excs
1105
- result.cols_with_excs += status.cols_with_excs
1106
-
1107
- result.cols_with_excs = list(dict.fromkeys(result.cols_with_excs).keys()) # remove duplicates
1232
+ result += status.to_cascade()
1233
+ if create_new_table_version:
1234
+ self.update_status = result
1235
+ self._write_md(new_version=True, new_schema_version=False)
1108
1236
  return result
1109
1237
 
1110
- def delete(self, where: Optional[exprs.Expr] = None) -> UpdateStatus:
1111
- """Delete rows in this table.
1112
- Args:
1113
- where: a predicate to filter rows to delete.
1114
- """
1238
+ def _validate_where_clause(self, pred: exprs.Expr, error_prefix: str) -> None:
1239
+ """Validates that pred can be expressed as a SQL Where clause"""
1115
1240
  assert self.is_insertable
1116
1241
  from pixeltable.exprs import Expr
1117
1242
  from pixeltable.plan import Planner
1118
1243
 
1119
- sql_where_clause: Optional[Expr] = None
1120
- if where is not None:
1121
- if not isinstance(where, Expr):
1122
- raise excs.Error(f"'where' argument must be a predicate, got {type(where)}")
1123
- analysis_info = Planner.analyze(self.path, where)
1124
- # for now we require that the updated rows can be identified via SQL, rather than via a Python filter
1125
- if analysis_info.filter is not None:
1126
- raise excs.Error(f'Filter {analysis_info.filter} not expressible in SQL')
1127
- sql_where_clause = analysis_info.sql_where_clause
1244
+ if not isinstance(pred, Expr):
1245
+ raise excs.Error(f'{error_prefix} must be a valid Pixeltable expression; got `{type(pred)}`')
1246
+ analysis_info = Planner.analyze(self.path, pred)
1247
+ # for now we require that the updated rows can be identified via SQL, rather than via a Python filter
1248
+ if analysis_info.filter is not None:
1249
+ raise excs.Error(f'Filter not expressible in SQL: {analysis_info.filter}')
1128
1250
 
1129
- num_rows = self.propagate_delete(sql_where_clause, base_versions=[], timestamp=time.time())
1130
-
1131
- status = UpdateStatus(num_rows=num_rows)
1251
+ def delete(self, where: exprs.Expr | None = None) -> UpdateStatus:
1252
+ assert self.is_insertable
1253
+ if where is not None:
1254
+ self._validate_where_clause(where, error_prefix='`where` argument')
1255
+ status = self.propagate_delete(where, base_versions=[], timestamp=time.time())
1132
1256
  return status
1133
1257
 
1134
1258
  def propagate_delete(
1135
- self, where: Optional[exprs.Expr], base_versions: list[Optional[int]], timestamp: float
1136
- ) -> int:
1137
- """Delete rows in this table and propagate to views.
1138
- Args:
1139
- where: a predicate to filter rows to delete.
1140
- Returns:
1141
- number of deleted rows
1142
- """
1259
+ self, where: exprs.Expr | None, base_versions: list[int | None], timestamp: float
1260
+ ) -> UpdateStatus:
1261
+ """Delete rows in this table and propagate to views"""
1262
+ from pixeltable.catalog import Catalog
1263
+
1264
+ Catalog.get().mark_modified_tvs(self.handle)
1265
+
1143
1266
  # print(f'calling sql_expr()')
1144
1267
  sql_where_clause = where.sql_expr(exprs.SqlElementCache()) if where is not None else None
1145
1268
  # #print(f'sql_where_clause={str(sql_where_clause) if sql_where_clause is not None else None}')
@@ -1149,22 +1272,28 @@ class TableVersion:
1149
1272
  # sql.sql.visitors.traverse(sql_where_clause, {}, {'column': collect_cols})
1150
1273
  # x = [f'{str(c)}:{hash(c)}:{id(c.table)}' for c in sql_cols]
1151
1274
  # print(f'where_clause cols: {x}')
1152
- num_rows = self.store_tbl.delete_rows(
1275
+ del_rows = self.store_tbl.delete_rows(
1153
1276
  self.version + 1, base_versions=base_versions, match_on_vmin=False, where_clause=sql_where_clause
1154
1277
  )
1155
- if num_rows > 0:
1278
+ row_counts = RowCountStats(del_rows=del_rows) # delete
1279
+ result = UpdateStatus(row_count_stats=row_counts)
1280
+ if del_rows > 0:
1156
1281
  # we're creating a new version
1157
- self.version += 1
1158
- self._write_md(new_version=True, new_version_ts=timestamp, new_schema_version=False)
1282
+ self.bump_version(timestamp, bump_schema_version=False)
1159
1283
  for view in self.mutable_views:
1160
- num_rows += view.get().propagate_delete(
1284
+ status = view.get().propagate_delete(
1161
1285
  where=None, base_versions=[self.version, *base_versions], timestamp=timestamp
1162
1286
  )
1163
- return num_rows
1287
+ result += status.to_cascade()
1288
+ self.update_status = result
1289
+
1290
+ if del_rows > 0:
1291
+ self._write_md(new_version=True, new_schema_version=False)
1292
+ return result
1164
1293
 
1165
1294
  def revert(self) -> None:
1166
1295
  """Reverts the table to the previous version."""
1167
- assert not self.is_snapshot
1296
+ assert self.is_mutable
1168
1297
  if self.version == 0:
1169
1298
  raise excs.Error('Cannot revert version 0')
1170
1299
  self._revert()
@@ -1176,6 +1305,8 @@ class TableVersion:
1176
1305
  Doesn't attempt to revert the in-memory metadata, but instead invalidates this TableVersion instance
1177
1306
  and relies on Catalog to reload it
1178
1307
  """
1308
+ from pixeltable.catalog import Catalog
1309
+
1179
1310
  conn = Env.get().conn
1180
1311
  # make sure we don't have a snapshot referencing this version
1181
1312
  # (unclear how to express this with sqlalchemy)
@@ -1190,18 +1321,16 @@ class TableVersion:
1190
1321
  names = [row[1] for row in result]
1191
1322
  raise excs.Error(
1192
1323
  (
1193
- f'Current version is needed for {len(result)} snapshot{"s" if len(result) > 1 else ""} '
1324
+ f'Current version is needed for {len(result)} snapshot{"s" if len(result) > 1 else ""}: '
1194
1325
  f'({", ".join(names)})'
1195
1326
  )
1196
1327
  )
1197
1328
 
1198
- # delete newly-added data
1199
- MediaStore.delete(self.id, version=self.version)
1200
1329
  conn.execute(sql.delete(self.store_tbl.sa_tbl).where(self.store_tbl.sa_tbl.c.v_min == self.version))
1201
1330
 
1202
1331
  # revert new deletions
1203
1332
  set_clause: dict[sql.Column, Any] = {self.store_tbl.sa_tbl.c.v_max: schema.Table.MAX_VERSION}
1204
- for index_info in self.idxs_by_name.values():
1333
+ for index_info in self.idxs.values():
1205
1334
  # copy the index value back from the undo column and reset the undo column to NULL
1206
1335
  set_clause[index_info.val_col.sa_col] = index_info.undo_col.sa_col
1207
1336
  set_clause[index_info.undo_col.sa_col] = None
@@ -1211,6 +1340,8 @@ class TableVersion:
1211
1340
  # revert schema changes:
1212
1341
  # - undo changes to self._tbl_md and write that back
1213
1342
  # - delete newly-added TableVersion/TableSchemaVersion records
1343
+ Catalog.get().mark_modified_tvs(self.handle)
1344
+ old_version = self.version
1214
1345
  if self.version == self.schema_version:
1215
1346
  # physically delete newly-added columns and remove them from the stored md
1216
1347
  added_cols = [col for col in self.cols if col.schema_version_add == self.schema_version]
@@ -1257,57 +1388,78 @@ class TableVersion:
1257
1388
  .where(schema.TableVersion.version == self.version)
1258
1389
  )
1259
1390
 
1260
- self.version -= 1
1261
- self._write_md(new_version=False, new_version_ts=0, new_schema_version=False)
1391
+ self._tbl_md.current_version = self._version_md.version = self.version - 1
1392
+
1393
+ self._write_md(new_version=False, new_schema_version=False)
1262
1394
 
1263
1395
  # propagate to views
1264
- views_str = ', '.join([str(v.id) for v in self.mutable_views])
1265
- print(f'revert(): mutable_views={views_str}')
1266
1396
  for view in self.mutable_views:
1267
1397
  view.get()._revert()
1268
1398
 
1269
1399
  # force reload on next operation
1270
1400
  self.is_validated = False
1271
- pxt.catalog.Catalog.get().remove_tbl_version(self)
1272
- _logger.info(f'TableVersion {self.name}: reverted to version {self.version}')
1401
+ Catalog.get().remove_tbl_version(self)
1402
+
1403
+ # delete newly-added data
1404
+ # Do this at the end, after all DB operations have completed.
1405
+ # TODO: The transaction could still fail. Really this should be done via PendingTableOps.
1406
+ self.delete_media(tbl_version=old_version)
1407
+ _logger.info(f'TableVersion {self.name!r}: reverted to version {self.version}')
1273
1408
 
1274
1409
  def _init_external_stores(self) -> None:
1410
+ from pixeltable.io.external_store import ExternalStore
1411
+
1275
1412
  for store_md in self.tbl_md.external_stores:
1276
1413
  store_cls = resolve_symbol(store_md['class'])
1277
- assert isinstance(store_cls, type) and issubclass(store_cls, pxt.io.ExternalStore)
1414
+ assert isinstance(store_cls, type) and issubclass(store_cls, ExternalStore)
1278
1415
  store = store_cls.from_dict(store_md['md'])
1279
1416
  self.external_stores[store.name] = store
1280
1417
 
1281
- def link_external_store(self, store: pxt.io.ExternalStore) -> None:
1282
- self.version += 1
1283
- self.preceding_schema_version = self.schema_version
1284
- self.schema_version = self.version
1418
+ def link_external_store(self, store: ExternalStore) -> None:
1419
+ self.bump_version(bump_schema_version=True)
1285
1420
 
1286
1421
  self.external_stores[store.name] = store
1287
1422
  self._tbl_md.external_stores.append(
1288
1423
  {'class': f'{type(store).__module__}.{type(store).__qualname__}', 'md': store.as_dict()}
1289
1424
  )
1290
- self._write_md(new_version=True, new_version_ts=time.time(), new_schema_version=True)
1425
+ self._write_md(new_version=True, new_schema_version=True)
1291
1426
 
1292
- def unlink_external_store(self, store: pxt.io.ExternalStore) -> None:
1427
+ def unlink_external_store(self, store: ExternalStore) -> None:
1293
1428
  del self.external_stores[store.name]
1294
- self.version += 1
1295
- self.preceding_schema_version = self.schema_version
1296
- self.schema_version = self.version
1429
+ self.bump_version(bump_schema_version=True)
1297
1430
  idx = next(i for i, store_md in enumerate(self._tbl_md.external_stores) if store_md['md']['name'] == store.name)
1298
1431
  self._tbl_md.external_stores.pop(idx)
1299
- self._write_md(new_version=True, new_version_ts=time.time(), new_schema_version=True)
1432
+ self._write_md(new_version=True, new_schema_version=True)
1433
+
1434
+ @property
1435
+ def pxt_uri(self) -> str | None:
1436
+ return self._tbl_md.additional_md.get('pxt_uri')
1437
+
1438
+ def update_pxt_uri(self, pxt_uri: str | None) -> None:
1439
+ if self._tbl_md.additional_md.get('pxt_uri') == pxt_uri:
1440
+ return # Nothing to do
1441
+
1442
+ if pxt_uri is None:
1443
+ del self._tbl_md.additional_md['pxt_uri'] # must be present due to preceding check
1444
+ else:
1445
+ self._tbl_md.additional_md['pxt_uri'] = pxt_uri
1446
+
1447
+ self._write_md(new_version=False, new_schema_version=False)
1300
1448
 
1301
1449
  @property
1302
1450
  def tbl_md(self) -> schema.TableMd:
1303
1451
  return self._tbl_md
1304
1452
 
1453
+ @property
1454
+ def version_md(self) -> schema.TableVersionMd:
1455
+ return self._version_md
1456
+
1305
1457
  @property
1306
1458
  def schema_version_md(self) -> schema.TableSchemaVersionMd:
1307
1459
  return self._schema_version_md
1308
1460
 
1309
1461
  @property
1310
- def view_md(self) -> Optional[schema.ViewMd]:
1462
+ def view_md(self) -> schema.ViewMd | None:
1311
1463
  return self._tbl_md.view_md
1312
1464
 
1313
1465
  @property
@@ -1315,7 +1467,7 @@ class TableVersion:
1315
1467
  return self._tbl_md.name
1316
1468
 
1317
1469
  @property
1318
- def user(self) -> Optional[str]:
1470
+ def user(self) -> str | None:
1319
1471
  return self._tbl_md.user
1320
1472
 
1321
1473
  @property
@@ -1345,42 +1497,71 @@ class TableVersion:
1345
1497
  # if this is a snapshot instance, we need to ignore current_version
1346
1498
  return self._tbl_md.current_version if self.effective_version is None else self.effective_version
1347
1499
 
1348
- @version.setter
1349
- def version(self, version: int) -> None:
1350
- assert self.effective_version is None
1351
- self._tbl_md.current_version = version
1500
+ @property
1501
+ def created_at(self) -> float:
1502
+ return self._version_md.created_at
1352
1503
 
1353
1504
  @property
1354
1505
  def schema_version(self) -> int:
1355
1506
  return self._schema_version_md.schema_version
1356
1507
 
1357
- @schema_version.setter
1358
- def schema_version(self, version: int) -> None:
1508
+ def bump_version(self, timestamp: float | None = None, *, bump_schema_version: bool) -> None:
1509
+ """
1510
+ Increments the table version and adjusts all associated metadata. This will *not* trigger a database action;
1511
+ _write_md() must be called separately to persist the changes.
1512
+
1513
+ Args:
1514
+ timestamp: the creation time for the new version. Can be used to synchronize multiple metadata changes
1515
+ to the same timestamp. If `None`, then defaults to `time.time()`.
1516
+ bump_schema_version: if True, also adjusts the schema version (setting it equal to the new version)
1517
+ and associated metadata.
1518
+ """
1519
+ from pixeltable.catalog import Catalog
1520
+
1359
1521
  assert self.effective_version is None
1360
- self._tbl_md.current_schema_version = version
1361
- self._schema_version_md.schema_version = version
1522
+
1523
+ if timestamp is None:
1524
+ timestamp = time.time()
1525
+
1526
+ Catalog.get().mark_modified_tvs(self.handle)
1527
+
1528
+ old_version = self._tbl_md.current_version
1529
+ assert self._version_md.version == old_version
1530
+ new_version = old_version + 1
1531
+ self._tbl_md.current_version = new_version
1532
+ self._version_md.version = new_version
1533
+ self._version_md.created_at = timestamp
1534
+
1535
+ if bump_schema_version:
1536
+ old_schema_version = self._tbl_md.current_schema_version
1537
+ assert self._version_md.schema_version == old_schema_version
1538
+ assert self._schema_version_md.schema_version == old_schema_version
1539
+ self._tbl_md.current_schema_version = new_version
1540
+ self._version_md.schema_version = new_version
1541
+ self._schema_version_md.preceding_schema_version = old_schema_version
1542
+ self._schema_version_md.schema_version = new_version
1362
1543
 
1363
1544
  @property
1364
- def preceding_schema_version(self) -> int:
1545
+ def preceding_schema_version(self) -> int | None:
1365
1546
  return self._schema_version_md.preceding_schema_version
1366
1547
 
1367
- @preceding_schema_version.setter
1368
- def preceding_schema_version(self, v: int) -> None:
1548
+ @property
1549
+ def update_status(self) -> UpdateStatus | None:
1550
+ return self._version_md.update_status
1551
+
1552
+ @update_status.setter
1553
+ def update_status(self, status: UpdateStatus) -> None:
1369
1554
  assert self.effective_version is None
1370
- self._schema_version_md.preceding_schema_version = v
1555
+ self._version_md.update_status = status
1371
1556
 
1372
1557
  @property
1373
1558
  def media_validation(self) -> MediaValidation:
1374
1559
  return MediaValidation[self._schema_version_md.media_validation.upper()]
1375
1560
 
1376
- @property
1377
1561
  def next_col_id(self) -> int:
1378
- return self._tbl_md.next_col_id
1379
-
1380
- @next_col_id.setter
1381
- def next_col_id(self, id: int) -> None:
1382
- assert self.effective_version is None
1383
- self._tbl_md.next_col_id = id
1562
+ val = self._tbl_md.next_col_id
1563
+ self._tbl_md.next_col_id += 1
1564
+ return val
1384
1565
 
1385
1566
  @property
1386
1567
  def next_idx_id(self) -> int:
@@ -1423,7 +1604,7 @@ class TableVersion:
1423
1604
  @property
1424
1605
  def is_insertable(self) -> bool:
1425
1606
  """Returns True if this corresponds to an InsertableTable"""
1426
- return not self.is_snapshot and not self.is_view
1607
+ return self.is_mutable and not self.is_view
1427
1608
 
1428
1609
  def is_iterator_column(self, col: Column) -> bool:
1429
1610
  """Returns True if col is produced by an iterator"""
@@ -1458,27 +1639,36 @@ class TableVersion:
1458
1639
  names = [c.name for c in self.cols_by_name.values() if c.is_computed]
1459
1640
  return names
1460
1641
 
1461
- def _record_refd_columns(self, col: Column) -> None:
1462
- """Update Column.dependent_cols for all cols referenced in col.value_expr."""
1463
- from pixeltable import exprs
1464
-
1465
- if col.value_expr_dict is not None:
1466
- # if we have a value_expr_dict, use that instead of instantiating the value_expr
1467
- refd_cols = exprs.Expr.get_refd_columns(col.value_expr_dict)
1468
- else:
1469
- refd_cols = [e.col for e in col.value_expr.subexprs(expr_class=exprs.ColumnRef)]
1470
- for refd_col in refd_cols:
1471
- refd_col.dependent_cols.add(col)
1472
-
1473
1642
  def get_idx_val_columns(self, cols: Iterable[Column]) -> set[Column]:
1474
- result = {info.val_col for col in cols for info in col.get_idx_info().values()}
1475
- return result
1643
+ # assumes that the indexed columns are all in this table
1644
+ assert all(col.get_tbl().id == self.id for col in cols)
1645
+ col_ids = {col.id for col in cols}
1646
+ return {info.val_col for info in self.idxs.values() if info.col.id in col_ids}
1647
+
1648
+ def get_idx(self, col: Column, idx_name: str | None, idx_cls: type[index.IndexBase]) -> TableVersion.IndexInfo:
1649
+ if not self.supports_idxs:
1650
+ raise excs.Error('Snapshot does not support indices')
1651
+ if col.qid not in self.idxs_by_col:
1652
+ raise excs.Error(f'Column {col.name!r} does not have a {idx_cls.display_name()} index')
1653
+ candidates = [info for info in self.idxs_by_col[col.qid] if isinstance(info.idx, idx_cls)]
1654
+ if len(candidates) == 0:
1655
+ raise excs.Error(f'No {idx_cls.display_name()} index found for column {col.name!r}')
1656
+ if len(candidates) > 1 and idx_name is None:
1657
+ raise excs.Error(
1658
+ f'Column {col.name!r} has multiple {idx_cls.display_name()} indices; specify `idx_name` instead'
1659
+ )
1660
+ if idx_name is not None and idx_name not in [info.name for info in candidates]:
1661
+ raise excs.Error(f'Index {idx_name!r} not found for column {col.name!r}')
1662
+ return candidates[0] if idx_name is None else next(info for info in candidates if info.name == idx_name)
1476
1663
 
1477
1664
  def get_dependent_columns(self, cols: Iterable[Column]) -> set[Column]:
1478
1665
  """
1479
1666
  Return the set of columns that transitively depend on any of the given ones.
1480
1667
  """
1481
- result = {dependent_col for col in cols for dependent_col in col.dependent_cols}
1668
+ from pixeltable.catalog import Catalog
1669
+
1670
+ cat = Catalog.get()
1671
+ result = set().union(*[cat.get_column_dependents(col.get_tbl().id, col.id) for col in cols])
1482
1672
  if len(result) > 0:
1483
1673
  result.update(self.get_dependent_columns(result))
1484
1674
  return result
@@ -1490,64 +1680,18 @@ class TableVersion:
1490
1680
  return 1
1491
1681
 
1492
1682
  @classmethod
1493
- def _create_column_md(cls, cols: list[Column]) -> dict[int, schema.ColumnMd]:
1494
- column_md: dict[int, schema.ColumnMd] = {}
1495
- for col in cols:
1496
- value_expr_dict = col.value_expr.as_dict() if col.value_expr is not None else None
1497
- assert col.is_pk is not None
1498
- column_md[col.id] = schema.ColumnMd(
1499
- id=col.id,
1500
- col_type=col.col_type.as_dict(),
1501
- is_pk=col.is_pk,
1502
- schema_version_add=col.schema_version_add,
1503
- schema_version_drop=col.schema_version_drop,
1504
- value_expr=value_expr_dict,
1505
- stored=col.stored,
1506
- )
1507
- return column_md
1508
-
1509
- @classmethod
1510
- def _create_stores_md(cls, stores: Iterable[pxt.io.ExternalStore]) -> list[dict[str, Any]]:
1683
+ def _create_stores_md(cls, stores: Iterable[ExternalStore]) -> list[dict[str, Any]]:
1511
1684
  return [
1512
1685
  {'class': f'{type(store).__module__}.{type(store).__qualname__}', 'md': store.as_dict()} for store in stores
1513
1686
  ]
1514
1687
 
1515
- def _create_version_md(self, timestamp: float) -> schema.TableVersionMd:
1516
- return schema.TableVersionMd(
1517
- tbl_id=str(self.id),
1518
- created_at=timestamp,
1519
- version=self.version,
1520
- schema_version=self.schema_version,
1521
- additional_md={},
1522
- )
1523
-
1524
- def _create_schema_version_md(self, preceding_schema_version: int) -> schema.TableSchemaVersionMd:
1525
- column_md: dict[int, schema.SchemaColumn] = {}
1526
- for pos, col in enumerate(self.cols_by_name.values()):
1527
- column_md[col.id] = schema.SchemaColumn(
1528
- pos=pos,
1529
- name=col.name,
1530
- media_validation=col._media_validation.name.lower() if col._media_validation is not None else None,
1531
- )
1532
- # preceding_schema_version to be set by the caller
1533
- return schema.TableSchemaVersionMd(
1534
- tbl_id=str(self.id),
1535
- schema_version=self.schema_version,
1536
- preceding_schema_version=preceding_schema_version,
1537
- columns=column_md,
1538
- num_retained_versions=self.num_retained_versions,
1539
- comment=self.comment,
1540
- media_validation=self.media_validation.name.lower(),
1541
- additional_md={},
1542
- )
1543
-
1544
1688
  def as_dict(self) -> dict:
1545
1689
  return {'id': str(self.id), 'effective_version': self.effective_version}
1546
1690
 
1547
1691
  @classmethod
1548
1692
  def from_dict(cls, d: dict) -> TableVersion:
1549
- from pixeltable import catalog
1693
+ from pixeltable.catalog import Catalog
1550
1694
 
1551
1695
  id = UUID(d['id'])
1552
1696
  effective_version = d['effective_version']
1553
- return catalog.Catalog.get().get_tbl_version(id, effective_version)
1697
+ return Catalog.get().get_tbl_version(id, effective_version)