pixeltable 0.3.14__py3-none-any.whl → 0.5.7__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (220) hide show
  1. pixeltable/__init__.py +42 -8
  2. pixeltable/{dataframe.py → _query.py} +470 -206
  3. pixeltable/_version.py +1 -0
  4. pixeltable/catalog/__init__.py +5 -4
  5. pixeltable/catalog/catalog.py +1785 -432
  6. pixeltable/catalog/column.py +190 -113
  7. pixeltable/catalog/dir.py +2 -4
  8. pixeltable/catalog/globals.py +19 -46
  9. pixeltable/catalog/insertable_table.py +191 -98
  10. pixeltable/catalog/path.py +63 -23
  11. pixeltable/catalog/schema_object.py +11 -15
  12. pixeltable/catalog/table.py +843 -436
  13. pixeltable/catalog/table_metadata.py +103 -0
  14. pixeltable/catalog/table_version.py +978 -657
  15. pixeltable/catalog/table_version_handle.py +72 -16
  16. pixeltable/catalog/table_version_path.py +112 -43
  17. pixeltable/catalog/tbl_ops.py +53 -0
  18. pixeltable/catalog/update_status.py +191 -0
  19. pixeltable/catalog/view.py +134 -90
  20. pixeltable/config.py +134 -22
  21. pixeltable/env.py +471 -157
  22. pixeltable/exceptions.py +6 -0
  23. pixeltable/exec/__init__.py +4 -1
  24. pixeltable/exec/aggregation_node.py +7 -8
  25. pixeltable/exec/cache_prefetch_node.py +83 -110
  26. pixeltable/exec/cell_materialization_node.py +268 -0
  27. pixeltable/exec/cell_reconstruction_node.py +168 -0
  28. pixeltable/exec/component_iteration_node.py +4 -3
  29. pixeltable/exec/data_row_batch.py +8 -65
  30. pixeltable/exec/exec_context.py +16 -4
  31. pixeltable/exec/exec_node.py +13 -36
  32. pixeltable/exec/expr_eval/evaluators.py +11 -7
  33. pixeltable/exec/expr_eval/expr_eval_node.py +27 -12
  34. pixeltable/exec/expr_eval/globals.py +8 -5
  35. pixeltable/exec/expr_eval/row_buffer.py +1 -2
  36. pixeltable/exec/expr_eval/schedulers.py +106 -56
  37. pixeltable/exec/globals.py +35 -0
  38. pixeltable/exec/in_memory_data_node.py +19 -19
  39. pixeltable/exec/object_store_save_node.py +293 -0
  40. pixeltable/exec/row_update_node.py +16 -9
  41. pixeltable/exec/sql_node.py +351 -84
  42. pixeltable/exprs/__init__.py +1 -1
  43. pixeltable/exprs/arithmetic_expr.py +27 -22
  44. pixeltable/exprs/array_slice.py +3 -3
  45. pixeltable/exprs/column_property_ref.py +36 -23
  46. pixeltable/exprs/column_ref.py +213 -89
  47. pixeltable/exprs/comparison.py +5 -5
  48. pixeltable/exprs/compound_predicate.py +5 -4
  49. pixeltable/exprs/data_row.py +164 -54
  50. pixeltable/exprs/expr.py +70 -44
  51. pixeltable/exprs/expr_dict.py +3 -3
  52. pixeltable/exprs/expr_set.py +17 -10
  53. pixeltable/exprs/function_call.py +100 -40
  54. pixeltable/exprs/globals.py +2 -2
  55. pixeltable/exprs/in_predicate.py +4 -4
  56. pixeltable/exprs/inline_expr.py +18 -32
  57. pixeltable/exprs/is_null.py +7 -3
  58. pixeltable/exprs/json_mapper.py +8 -8
  59. pixeltable/exprs/json_path.py +56 -22
  60. pixeltable/exprs/literal.py +27 -5
  61. pixeltable/exprs/method_ref.py +2 -2
  62. pixeltable/exprs/object_ref.py +2 -2
  63. pixeltable/exprs/row_builder.py +167 -67
  64. pixeltable/exprs/rowid_ref.py +25 -10
  65. pixeltable/exprs/similarity_expr.py +58 -40
  66. pixeltable/exprs/sql_element_cache.py +4 -4
  67. pixeltable/exprs/string_op.py +5 -5
  68. pixeltable/exprs/type_cast.py +3 -5
  69. pixeltable/func/__init__.py +1 -0
  70. pixeltable/func/aggregate_function.py +8 -8
  71. pixeltable/func/callable_function.py +9 -9
  72. pixeltable/func/expr_template_function.py +17 -11
  73. pixeltable/func/function.py +18 -20
  74. pixeltable/func/function_registry.py +6 -7
  75. pixeltable/func/globals.py +2 -3
  76. pixeltable/func/mcp.py +74 -0
  77. pixeltable/func/query_template_function.py +29 -27
  78. pixeltable/func/signature.py +46 -19
  79. pixeltable/func/tools.py +31 -13
  80. pixeltable/func/udf.py +18 -20
  81. pixeltable/functions/__init__.py +16 -0
  82. pixeltable/functions/anthropic.py +123 -77
  83. pixeltable/functions/audio.py +147 -10
  84. pixeltable/functions/bedrock.py +13 -6
  85. pixeltable/functions/date.py +7 -4
  86. pixeltable/functions/deepseek.py +35 -43
  87. pixeltable/functions/document.py +81 -0
  88. pixeltable/functions/fal.py +76 -0
  89. pixeltable/functions/fireworks.py +11 -20
  90. pixeltable/functions/gemini.py +195 -39
  91. pixeltable/functions/globals.py +142 -14
  92. pixeltable/functions/groq.py +108 -0
  93. pixeltable/functions/huggingface.py +1056 -24
  94. pixeltable/functions/image.py +115 -57
  95. pixeltable/functions/json.py +1 -1
  96. pixeltable/functions/llama_cpp.py +28 -13
  97. pixeltable/functions/math.py +67 -5
  98. pixeltable/functions/mistralai.py +18 -55
  99. pixeltable/functions/net.py +70 -0
  100. pixeltable/functions/ollama.py +20 -13
  101. pixeltable/functions/openai.py +240 -226
  102. pixeltable/functions/openrouter.py +143 -0
  103. pixeltable/functions/replicate.py +4 -4
  104. pixeltable/functions/reve.py +250 -0
  105. pixeltable/functions/string.py +239 -69
  106. pixeltable/functions/timestamp.py +16 -16
  107. pixeltable/functions/together.py +24 -84
  108. pixeltable/functions/twelvelabs.py +188 -0
  109. pixeltable/functions/util.py +6 -1
  110. pixeltable/functions/uuid.py +30 -0
  111. pixeltable/functions/video.py +1515 -107
  112. pixeltable/functions/vision.py +8 -8
  113. pixeltable/functions/voyageai.py +289 -0
  114. pixeltable/functions/whisper.py +16 -8
  115. pixeltable/functions/whisperx.py +179 -0
  116. pixeltable/{ext/functions → functions}/yolox.py +2 -4
  117. pixeltable/globals.py +362 -115
  118. pixeltable/index/base.py +17 -21
  119. pixeltable/index/btree.py +28 -22
  120. pixeltable/index/embedding_index.py +100 -118
  121. pixeltable/io/__init__.py +4 -2
  122. pixeltable/io/datarows.py +8 -7
  123. pixeltable/io/external_store.py +56 -105
  124. pixeltable/io/fiftyone.py +13 -13
  125. pixeltable/io/globals.py +31 -30
  126. pixeltable/io/hf_datasets.py +61 -16
  127. pixeltable/io/label_studio.py +74 -70
  128. pixeltable/io/lancedb.py +3 -0
  129. pixeltable/io/pandas.py +21 -12
  130. pixeltable/io/parquet.py +25 -105
  131. pixeltable/io/table_data_conduit.py +250 -123
  132. pixeltable/io/utils.py +4 -4
  133. pixeltable/iterators/__init__.py +2 -1
  134. pixeltable/iterators/audio.py +26 -25
  135. pixeltable/iterators/base.py +9 -3
  136. pixeltable/iterators/document.py +112 -78
  137. pixeltable/iterators/image.py +12 -15
  138. pixeltable/iterators/string.py +11 -4
  139. pixeltable/iterators/video.py +523 -120
  140. pixeltable/metadata/__init__.py +14 -3
  141. pixeltable/metadata/converters/convert_13.py +2 -2
  142. pixeltable/metadata/converters/convert_18.py +2 -2
  143. pixeltable/metadata/converters/convert_19.py +2 -2
  144. pixeltable/metadata/converters/convert_20.py +2 -2
  145. pixeltable/metadata/converters/convert_21.py +2 -2
  146. pixeltable/metadata/converters/convert_22.py +2 -2
  147. pixeltable/metadata/converters/convert_24.py +2 -2
  148. pixeltable/metadata/converters/convert_25.py +2 -2
  149. pixeltable/metadata/converters/convert_26.py +2 -2
  150. pixeltable/metadata/converters/convert_29.py +4 -4
  151. pixeltable/metadata/converters/convert_30.py +34 -21
  152. pixeltable/metadata/converters/convert_34.py +2 -2
  153. pixeltable/metadata/converters/convert_35.py +9 -0
  154. pixeltable/metadata/converters/convert_36.py +38 -0
  155. pixeltable/metadata/converters/convert_37.py +15 -0
  156. pixeltable/metadata/converters/convert_38.py +39 -0
  157. pixeltable/metadata/converters/convert_39.py +124 -0
  158. pixeltable/metadata/converters/convert_40.py +73 -0
  159. pixeltable/metadata/converters/convert_41.py +12 -0
  160. pixeltable/metadata/converters/convert_42.py +9 -0
  161. pixeltable/metadata/converters/convert_43.py +44 -0
  162. pixeltable/metadata/converters/util.py +20 -31
  163. pixeltable/metadata/notes.py +9 -0
  164. pixeltable/metadata/schema.py +140 -53
  165. pixeltable/metadata/utils.py +74 -0
  166. pixeltable/mypy/__init__.py +3 -0
  167. pixeltable/mypy/mypy_plugin.py +123 -0
  168. pixeltable/plan.py +382 -115
  169. pixeltable/share/__init__.py +1 -1
  170. pixeltable/share/packager.py +547 -83
  171. pixeltable/share/protocol/__init__.py +33 -0
  172. pixeltable/share/protocol/common.py +165 -0
  173. pixeltable/share/protocol/operation_types.py +33 -0
  174. pixeltable/share/protocol/replica.py +119 -0
  175. pixeltable/share/publish.py +257 -59
  176. pixeltable/store.py +311 -194
  177. pixeltable/type_system.py +373 -211
  178. pixeltable/utils/__init__.py +2 -3
  179. pixeltable/utils/arrow.py +131 -17
  180. pixeltable/utils/av.py +298 -0
  181. pixeltable/utils/azure_store.py +346 -0
  182. pixeltable/utils/coco.py +6 -6
  183. pixeltable/utils/code.py +3 -3
  184. pixeltable/utils/console_output.py +4 -1
  185. pixeltable/utils/coroutine.py +6 -23
  186. pixeltable/utils/dbms.py +32 -6
  187. pixeltable/utils/description_helper.py +4 -5
  188. pixeltable/utils/documents.py +7 -18
  189. pixeltable/utils/exception_handler.py +7 -30
  190. pixeltable/utils/filecache.py +6 -6
  191. pixeltable/utils/formatter.py +86 -48
  192. pixeltable/utils/gcs_store.py +295 -0
  193. pixeltable/utils/http.py +133 -0
  194. pixeltable/utils/http_server.py +2 -3
  195. pixeltable/utils/iceberg.py +1 -2
  196. pixeltable/utils/image.py +17 -0
  197. pixeltable/utils/lancedb.py +90 -0
  198. pixeltable/utils/local_store.py +322 -0
  199. pixeltable/utils/misc.py +5 -0
  200. pixeltable/utils/object_stores.py +573 -0
  201. pixeltable/utils/pydantic.py +60 -0
  202. pixeltable/utils/pytorch.py +5 -6
  203. pixeltable/utils/s3_store.py +527 -0
  204. pixeltable/utils/sql.py +26 -0
  205. pixeltable/utils/system.py +30 -0
  206. pixeltable-0.5.7.dist-info/METADATA +579 -0
  207. pixeltable-0.5.7.dist-info/RECORD +227 -0
  208. {pixeltable-0.3.14.dist-info → pixeltable-0.5.7.dist-info}/WHEEL +1 -1
  209. pixeltable-0.5.7.dist-info/entry_points.txt +2 -0
  210. pixeltable/__version__.py +0 -3
  211. pixeltable/catalog/named_function.py +0 -40
  212. pixeltable/ext/__init__.py +0 -17
  213. pixeltable/ext/functions/__init__.py +0 -11
  214. pixeltable/ext/functions/whisperx.py +0 -77
  215. pixeltable/utils/media_store.py +0 -77
  216. pixeltable/utils/s3.py +0 -17
  217. pixeltable-0.3.14.dist-info/METADATA +0 -434
  218. pixeltable-0.3.14.dist-info/RECORD +0 -186
  219. pixeltable-0.3.14.dist-info/entry_points.txt +0 -3
  220. {pixeltable-0.3.14.dist-info → pixeltable-0.5.7.dist-info/licenses}/LICENSE +0 -0
@@ -1,39 +1,105 @@
1
1
  from __future__ import annotations
2
2
 
3
+ import copy
3
4
  import dataclasses
4
5
  import importlib
6
+ import itertools
5
7
  import logging
6
8
  import time
7
9
  import uuid
8
- from typing import TYPE_CHECKING, Any, Iterable, Iterator, Literal, Optional, Tuple
10
+ from typing import TYPE_CHECKING, Any, Callable, Iterable, Iterator, Literal
9
11
  from uuid import UUID
10
12
 
11
13
  import jsonschema.exceptions
12
14
  import sqlalchemy as sql
15
+ from sqlalchemy import exc as sql_exc
13
16
 
14
- import pixeltable as pxt
15
17
  import pixeltable.exceptions as excs
18
+ import pixeltable.exprs as exprs
19
+ import pixeltable.index as index
16
20
  import pixeltable.type_system as ts
17
- from pixeltable import exprs, index
18
21
  from pixeltable.env import Env
19
22
  from pixeltable.iterators import ComponentIterator
20
23
  from pixeltable.metadata import schema
21
- from pixeltable.utils.exception_handler import run_cleanup_on_exception
22
24
  from pixeltable.utils.filecache import FileCache
23
- from pixeltable.utils.media_store import MediaStore
25
+ from pixeltable.utils.object_stores import ObjectOps
24
26
 
25
27
  from ..func.globals import resolve_symbol
26
28
  from .column import Column
27
- from .globals import _POS_COLUMN_NAME, _ROWID_COLUMN_NAME, MediaValidation, UpdateStatus, is_valid_identifier
29
+ from .globals import _POS_COLUMN_NAME, _ROWID_COLUMN_NAME, MediaValidation, QColumnId, is_valid_identifier
30
+ from .tbl_ops import DeleteTableMdOp, DeleteTableMediaFilesOp, DropStoreTableOp, TableOp
31
+ from .update_status import RowCountStats, UpdateStatus
28
32
 
29
33
  if TYPE_CHECKING:
30
34
  from pixeltable import exec, store
35
+ from pixeltable._query import Query
36
+ from pixeltable.catalog.table_version_handle import TableVersionHandle
37
+ from pixeltable.io import ExternalStore
38
+ from pixeltable.plan import SampleClause
31
39
 
32
- from .table_version_handle import TableVersionHandle
40
+ from .table_version_path import TableVersionPath
33
41
 
34
42
  _logger = logging.getLogger('pixeltable')
35
43
 
36
44
 
45
+ @dataclasses.dataclass(frozen=True)
46
+ class TableVersionMd:
47
+ """
48
+ Complete set of md records for a specific TableVersion instance.
49
+ """
50
+
51
+ tbl_md: schema.TableMd
52
+ version_md: schema.VersionMd
53
+ schema_version_md: schema.SchemaVersionMd
54
+
55
+ @property
56
+ def is_pure_snapshot(self) -> bool:
57
+ return (
58
+ self.tbl_md.view_md is not None
59
+ and self.tbl_md.view_md.is_snapshot
60
+ and self.tbl_md.view_md.predicate is None
61
+ and self.tbl_md.view_md.sample_clause is None
62
+ and len(self.schema_version_md.columns) == 0
63
+ )
64
+
65
+ def as_dict(self) -> dict:
66
+ from .catalog import md_dict_factory
67
+
68
+ return dataclasses.asdict(self, dict_factory=md_dict_factory)
69
+
70
+ @classmethod
71
+ def from_dict(cls, data: dict[str, Any]) -> TableVersionMd:
72
+ return schema.md_from_dict(cls, data)
73
+
74
+
75
+ @dataclasses.dataclass(frozen=True, slots=True)
76
+ class TableVersionKey:
77
+ tbl_id: UUID
78
+ effective_version: int | None
79
+ anchor_tbl_id: UUID | None
80
+
81
+ def __post_init__(self) -> None:
82
+ assert self.effective_version is None or self.anchor_tbl_id is None
83
+
84
+ # Allow unpacking as a tuple
85
+ def __iter__(self) -> Iterator[Any]:
86
+ return iter((self.tbl_id, self.effective_version, self.anchor_tbl_id))
87
+
88
+ def as_dict(self) -> dict:
89
+ return {
90
+ 'id': str(self.tbl_id),
91
+ 'effective_version': self.effective_version,
92
+ 'anchor_tbl_id': str(self.anchor_tbl_id) if self.anchor_tbl_id is not None else None,
93
+ }
94
+
95
+ @classmethod
96
+ def from_dict(cls, d: dict) -> TableVersionKey:
97
+ tbl_id = UUID(d['id'])
98
+ effective_version = d['effective_version']
99
+ anchor_tbl_id = d.get('anchor_tbl_id')
100
+ return cls(tbl_id, effective_version, UUID(anchor_tbl_id) if anchor_tbl_id is not None else None)
101
+
102
+
37
103
  class TableVersion:
38
104
  """
39
105
  TableVersion represents a particular version of a table/view along with its physical representation:
@@ -50,43 +116,68 @@ class TableVersion:
50
116
 
51
117
  Instances of TableVersion should not be stored as member variables (ie, used across transaction boundaries).
52
118
  Use a TableVersionHandle instead.
119
+
120
+ Only TableVersion and Catalog interact directly with stored metadata. Everything else needs to go through these
121
+ two classes.
122
+
123
+ TableVersions come in three "flavors" depending on the `effective_version` and `anchor_tbl_id` settings:
124
+ - if both are None, it's a live table that tracks `tbl_md.current_version`
125
+ - if `effective_version` is defined, it's a snapshot of the specific version given by `effective_version`
126
+ - if `anchor_tbl_id` is defined, it's a replica table that is "anchored" to the given table, in the following
127
+ sense: if n is the latest non-fragment version of `anchor_tbl_id`, then the tracked version is m, where m
128
+ is the latest version of `tbl_id` (possibly a fragment) with created_at(m) <= created_at(n).
129
+ In the typical case, `anchor_tbl_id` is a descendant of `tbl_id` and the anchored TableVersion instance
130
+ appears along the TableVersionPath for `anchor_tbl_id`.
131
+ In the TableVersionPath for a replica, all path elements will have the same anchor_tbl_id, the tbl_id
132
+ of the primary (leaf) table. (It is also possible for one or more path elements at the base to be snapshots.)
133
+ At most one of `effective_version` and `anchor_tbl_id` can be specified.
53
134
  """
54
135
 
55
- id: UUID
56
- name: str
57
- user: Optional[str]
58
- effective_version: Optional[int]
59
- is_replica: bool
60
- version: int
61
- comment: str
62
- media_validation: MediaValidation
63
- num_retained_versions: int
64
- schema_version: int
65
- view_md: Optional[schema.ViewMd]
66
- path: Optional[pxt.catalog.TableVersionPath] # only set for live tables; needed to resolve computed cols
67
- base: Optional[TableVersionHandle] # only set for views
68
- next_col_id: int
69
- next_idx_id: int
70
- next_rowid: int
71
- predicate: Optional[exprs.Expr]
72
- mutable_views: list[TableVersionHandle] # target for data operation propagation (only set for live tables)
73
- iterator_cls: Optional[type[ComponentIterator]]
74
- iterator_args: Optional[exprs.InlineDict]
136
+ key: TableVersionKey
137
+
138
+ # record metadata stored in catalog
139
+ _tbl_md: schema.TableMd
140
+ _version_md: schema.VersionMd
141
+ _schema_version_md: schema.SchemaVersionMd
142
+
143
+ path: 'TableVersionPath' | None # only set for non-snapshots; needed to resolve computed cols
144
+ base: TableVersionHandle | None # only set for views
145
+ predicate: exprs.Expr | None
146
+ sample_clause: 'SampleClause' | None
147
+
148
+ iterator_cls: type[ComponentIterator] | None
149
+ iterator_args: exprs.InlineDict | None
75
150
  num_iterator_cols: int
76
151
 
152
+ # target for data operation propagation (only set for non-snapshots, and only records non-snapshot views)
153
+ mutable_views: frozenset[TableVersionHandle]
154
+
77
155
  # contains complete history of columns, incl dropped ones
78
156
  cols: list[Column]
79
157
  # contains only user-facing (named) columns visible in this version
80
158
  cols_by_name: dict[str, Column]
81
159
  # contains only columns visible in this version, both system and user
82
160
  cols_by_id: dict[int, Column]
83
- # needed for _create_tbl_md()
84
- idx_md: dict[int, schema.IndexMd]
85
- # contains only actively maintained indices
161
+
162
+ # True if this TableVersion instance can have indices:
163
+ # - live version of a mutable table
164
+ # - the most recent version of a replica
165
+ supports_idxs: bool
166
+
167
+ # only populated with indices visible in this TableVersion instance
168
+ idxs: dict[int, TableVersion.IndexInfo] # key: index id
86
169
  idxs_by_name: dict[str, TableVersion.IndexInfo]
170
+ idxs_by_col: dict[QColumnId, list[TableVersion.IndexInfo]]
171
+
172
+ external_stores: dict[str, ExternalStore]
173
+ store_tbl: 'store.StoreBase' | None
174
+
175
+ is_initialized: bool # True if init() has been called
87
176
 
88
- external_stores: dict[str, pxt.io.ExternalStore]
89
- store_tbl: 'store.StoreBase'
177
+ # used by Catalog to invalidate cached instances at the end of a transaction;
178
+ # True if this instance reflects the state of stored metadata in the context of this transaction and
179
+ # it is the instance cached in Catalog
180
+ is_validated: bool
90
181
 
91
182
  @dataclasses.dataclass
92
183
  class IndexInfo:
@@ -99,28 +190,25 @@ class TableVersion:
99
190
 
100
191
  def __init__(
101
192
  self,
102
- id: UUID,
193
+ key: TableVersionKey,
103
194
  tbl_md: schema.TableMd,
104
- effective_version: Optional[int],
105
- schema_version_md: schema.TableSchemaVersionMd,
195
+ version_md: schema.VersionMd,
196
+ schema_version_md: schema.SchemaVersionMd,
106
197
  mutable_views: list[TableVersionHandle],
107
- base_path: Optional[pxt.catalog.TableVersionPath] = None,
108
- base: Optional[TableVersionHandle] = None,
109
- # base_store_tbl: Optional['store.StoreBase'] = None,
198
+ base_path: 'TableVersionPath' | None = None,
199
+ base: TableVersionHandle | None = None,
110
200
  ):
111
- self.id = id
112
- self.name = tbl_md.name
113
- self.user = tbl_md.user
114
- self.effective_version = effective_version
115
- self.version = tbl_md.current_version if effective_version is None else effective_version
116
- self.is_replica = tbl_md.is_replica
117
- self.comment = schema_version_md.comment
118
- self.num_retained_versions = schema_version_md.num_retained_versions
119
- self.schema_version = schema_version_md.schema_version
120
- self.view_md = tbl_md.view_md # save this as-is, it's needed for _create_md()
121
- self.media_validation = MediaValidation[schema_version_md.media_validation.upper()]
201
+ assert key.anchor_tbl_id is None or isinstance(key.anchor_tbl_id, UUID)
202
+
203
+ self.is_validated = True # a freshly constructed instance is always valid
204
+ self.is_initialized = False
205
+ self.key = key
206
+ self._tbl_md = copy.deepcopy(tbl_md)
207
+ self._version_md = copy.deepcopy(version_md)
208
+ self._schema_version_md = copy.deepcopy(schema_version_md)
122
209
  assert not (self.is_view and base is None)
123
210
  self.base = base
211
+ self.store_tbl = None
124
212
 
125
213
  # mutable tables need their TableVersionPath for expr eval during updates
126
214
  from .table_version_handle import TableVersionHandle
@@ -129,27 +217,19 @@ class TableVersion:
129
217
  if self.is_snapshot:
130
218
  self.path = None
131
219
  else:
132
- self_handle = TableVersionHandle(id, self.effective_version)
220
+ self_handle = TableVersionHandle(key)
133
221
  if self.is_view:
134
222
  assert base_path is not None
135
223
  self.path = TableVersionPath(self_handle, base=base_path)
136
224
 
137
- if self.is_snapshot:
138
- self.next_col_id = -1
139
- self.next_idx_id = -1 # TODO: can snapshots have separate indices?
140
- self.next_rowid = -1
141
- else:
142
- assert tbl_md.current_version == self.version
143
- self.next_col_id = tbl_md.next_col_id
144
- self.next_idx_id = tbl_md.next_idx_id
145
- self.next_rowid = tbl_md.next_row_id
146
-
147
225
  # view-specific initialization
148
226
  from pixeltable import exprs
227
+ from pixeltable.plan import SampleClause
149
228
 
150
229
  predicate_dict = None if self.view_md is None or self.view_md.predicate is None else self.view_md.predicate
151
230
  self.predicate = exprs.Expr.from_dict(predicate_dict) if predicate_dict is not None else None
152
- self.mutable_views = mutable_views
231
+ sample_dict = None if self.view_md is None or self.view_md.sample_clause is None else self.view_md.sample_clause
232
+ self.sample_clause = SampleClause.from_dict(sample_dict) if sample_dict is not None else None
153
233
 
154
234
  # component view-specific initialization
155
235
  self.iterator_cls = None
@@ -164,44 +244,23 @@ class TableVersion:
164
244
  self.num_iterator_cols = len(output_schema)
165
245
  assert tbl_md.view_md.iterator_args is not None
166
246
 
167
- # register this table version now so that it's available when we're re-creating value exprs
168
- cat = pxt.catalog.Catalog.get()
169
- cat.add_tbl_version(self)
247
+ self.mutable_views = frozenset(mutable_views)
248
+ assert self.is_mutable or len(self.mutable_views) == 0
170
249
 
171
- # init schema after we determined whether we're a component view, and before we create the store table
172
250
  self.cols = []
173
251
  self.cols_by_name = {}
174
252
  self.cols_by_id = {}
175
- self.idx_md = tbl_md.index_md
253
+ self.idxs = {}
176
254
  self.idxs_by_name = {}
255
+ self.idxs_by_col = {}
256
+ self.supports_idxs = self.effective_version is None or (
257
+ self.is_replica and self.effective_version == self.tbl_md.current_version
258
+ )
177
259
  self.external_stores = {}
178
260
 
179
- self._init_schema(tbl_md, schema_version_md)
180
-
181
- # Init external stores (this needs to happen after the schema is created)
182
- self._init_external_stores(tbl_md)
183
-
184
261
  def __hash__(self) -> int:
185
262
  return hash(self.id)
186
263
 
187
- def create_snapshot_copy(self) -> TableVersion:
188
- """Create a snapshot copy of this TableVersion"""
189
- assert not self.is_snapshot
190
- base = self.path.base.tbl_version if self.is_view else None
191
- return TableVersion(
192
- self.id,
193
- self._create_tbl_md(),
194
- self.version,
195
- self._create_schema_version_md(preceding_schema_version=0), # preceding_schema_version: dummy value
196
- mutable_views=[],
197
- base=base,
198
- )
199
-
200
- def create_handle(self) -> TableVersionHandle:
201
- from .table_version_handle import TableVersionHandle
202
-
203
- return TableVersionHandle(self.id, self.effective_version, tbl_version=self)
204
-
205
264
  @property
206
265
  def versioned_name(self) -> str:
207
266
  if self.effective_version is None:
@@ -209,75 +268,117 @@ class TableVersion:
209
268
  else:
210
269
  return f'{self.name}:{self.effective_version}'
211
270
 
271
+ def __repr__(self) -> str:
272
+ return (
273
+ f'TableVersion(id={self.id!r}, name={self.name!r}, effective_version={self.effective_version}, '
274
+ f'anchor_tbl_id={self.anchor_tbl_id}; version={self.version})'
275
+ )
276
+
277
+ @property
278
+ def handle(self) -> 'TableVersionHandle':
279
+ from .table_version_handle import TableVersionHandle
280
+
281
+ return TableVersionHandle(self.key, tbl_version=self)
282
+
212
283
  @classmethod
213
- def create(
284
+ def create_initial_md(
214
285
  cls,
215
- dir_id: UUID,
216
286
  name: str,
217
287
  cols: list[Column],
218
288
  num_retained_versions: int,
219
289
  comment: str,
220
290
  media_validation: MediaValidation,
221
- # base_path: Optional[pxt.catalog.TableVersionPath] = None,
222
- view_md: Optional[schema.ViewMd] = None,
223
- ) -> tuple[UUID, Optional[TableVersion]]:
224
- session = Env.get().session
291
+ create_default_idxs: bool,
292
+ view_md: schema.ViewMd | None = None,
293
+ ) -> TableVersionMd:
294
+ from .table_version_handle import TableVersionHandle
295
+
225
296
  user = Env.get().user
297
+ timestamp = time.time()
298
+
299
+ tbl_id = uuid.uuid4()
300
+ tbl_id_str = str(tbl_id)
301
+ tbl_handle = TableVersionHandle(TableVersionKey(tbl_id, None, None))
302
+ column_ids = itertools.count()
303
+ index_ids = itertools.count()
226
304
 
227
- # assign ids
228
- cols_by_name: dict[str, Column] = {}
305
+ # assign ids, create metadata
306
+ column_md: dict[int, schema.ColumnMd] = {}
307
+ schema_col_md: dict[int, schema.SchemaColumn] = {}
229
308
  for pos, col in enumerate(cols):
230
- col.id = pos
309
+ col.tbl_handle = tbl_handle
310
+ col.id = next(column_ids)
231
311
  col.schema_version_add = 0
232
- cols_by_name[col.name] = col
233
312
  if col.is_computed:
234
313
  col.check_value_expr()
314
+ col_md, sch_md = col.to_md(pos)
315
+ assert sch_md is not None
316
+ column_md[col.id] = col_md
317
+ schema_col_md[col.id] = sch_md
318
+
319
+ index_md: dict[int, schema.IndexMd] = {}
320
+ if create_default_idxs and (view_md is None or not view_md.is_snapshot):
321
+ index_cols: list[Column] = []
322
+ for col in (c for c in cols if cls._is_btree_indexable(c)):
323
+ idx = index.BtreeIndex()
324
+ val_col, undo_col = cls._create_index_columns(col, idx, 0, tbl_handle, id_cb=lambda: next(column_ids))
325
+ index_cols.extend([val_col, undo_col])
326
+
327
+ idx_id = next(index_ids)
328
+ idx_cls = type(idx)
329
+ md = schema.IndexMd(
330
+ id=idx_id,
331
+ name=f'idx{idx_id}',
332
+ indexed_col_id=col.id,
333
+ indexed_col_tbl_id=tbl_id_str,
334
+ index_val_col_id=val_col.id,
335
+ index_val_undo_col_id=undo_col.id,
336
+ schema_version_add=0,
337
+ schema_version_drop=None,
338
+ class_fqn=idx_cls.__module__ + '.' + idx_cls.__name__,
339
+ init_args=idx.as_dict(),
340
+ )
341
+ index_md[idx_id] = md
235
342
 
236
- timestamp = time.time()
237
- # create schema.Table
238
- # Column.dependent_cols for existing cols is wrong at this point, but init() will set it correctly
239
- column_md = cls._create_column_md(cols)
240
- tbl_id = uuid.uuid4()
241
- table_md = schema.TableMd(
242
- tbl_id=str(tbl_id),
343
+ for col in index_cols:
344
+ col_md, _ = col.to_md()
345
+ column_md[col.id] = col_md
346
+
347
+ assert all(column_md[id].id == id for id in column_md)
348
+ assert all(index_md[id].id == id for id in index_md)
349
+
350
+ cols.extend(index_cols)
351
+
352
+ tbl_md = schema.TableMd(
353
+ tbl_id=tbl_id_str,
243
354
  name=name,
244
355
  user=user,
245
356
  is_replica=False,
246
357
  current_version=0,
247
358
  current_schema_version=0,
248
- next_col_id=len(cols),
249
- next_idx_id=0,
359
+ next_col_id=next(column_ids),
360
+ next_idx_id=next(index_ids),
250
361
  next_row_id=0,
362
+ view_sn=0,
251
363
  column_md=column_md,
252
- index_md={},
364
+ index_md=index_md,
253
365
  external_stores=[],
254
366
  view_md=view_md,
255
367
  additional_md={},
256
368
  )
257
- # create a schema.Table here, we need it to call our c'tor;
258
- # don't add it to the session yet, we might add index metadata
259
- tbl_record = schema.Table(id=tbl_id, dir_id=dir_id, md=dataclasses.asdict(table_md))
260
369
 
261
- # create schema.TableVersion
262
- table_version_md = schema.TableVersionMd(
263
- tbl_id=str(tbl_record.id), created_at=timestamp, version=0, schema_version=0, additional_md={}
264
- )
265
- tbl_version_record = schema.TableVersion(
266
- tbl_id=tbl_record.id, version=0, md=dataclasses.asdict(table_version_md)
370
+ table_version_md = schema.VersionMd(
371
+ tbl_id=tbl_id_str,
372
+ created_at=timestamp,
373
+ version=0,
374
+ schema_version=0,
375
+ user=user,
376
+ update_status=None,
377
+ additional_md={},
267
378
  )
268
379
 
269
- # create schema.TableSchemaVersion
270
- schema_col_md: dict[int, schema.SchemaColumn] = {}
271
- for pos, col in enumerate(cols):
272
- md = schema.SchemaColumn(
273
- pos=pos,
274
- name=col.name,
275
- media_validation=col._media_validation.name.lower() if col._media_validation is not None else None,
276
- )
277
- schema_col_md[col.id] = md
278
-
279
- schema_version_md = schema.TableSchemaVersionMd(
280
- tbl_id=str(tbl_record.id),
380
+ schema_version_md = schema.SchemaVersionMd(
381
+ tbl_id=tbl_id_str,
281
382
  schema_version=0,
282
383
  preceding_schema_version=None,
283
384
  columns=schema_col_md,
@@ -286,152 +387,227 @@ class TableVersion:
286
387
  media_validation=media_validation.name.lower(),
287
388
  additional_md={},
288
389
  )
289
- schema_version_record = schema.TableSchemaVersion(
290
- tbl_id=tbl_record.id, schema_version=0, md=dataclasses.asdict(schema_version_md)
291
- )
390
+ return TableVersionMd(tbl_md, table_version_md, schema_version_md)
292
391
 
293
- # if this is purely a snapshot (it doesn't require any additional storage for columns and it doesn't have a
294
- # predicate to apply at runtime), we don't create a physical table and simply use the base's table version path
295
- if view_md is not None and view_md.is_snapshot and view_md.predicate is None and len(cols) == 0:
296
- session.add(tbl_record)
297
- session.add(tbl_version_record)
298
- session.add(schema_version_record)
299
- return tbl_record.id, None
300
-
301
- # assert (base_path is not None) == (view_md is not None)
302
- is_snapshot = view_md is not None and view_md.is_snapshot
303
- effective_version = 0 if is_snapshot else None
304
- base_path = pxt.catalog.TableVersionPath.from_md(view_md.base_versions) if view_md is not None else None
305
- base = base_path.tbl_version if base_path is not None else None
306
- tbl_version = cls(
307
- tbl_record.id, table_md, effective_version, schema_version_md, [], base_path=base_path, base=base
308
- )
392
+ def exec_op(self, op: TableOp) -> None:
393
+ from pixeltable.store import StoreBase
394
+
395
+ assert op.delete_table_md_op is None # that needs to get handled by Catalog
396
+
397
+ if op.create_store_table_op is not None:
398
+ # this needs to be called outside of a transaction
399
+ self.store_tbl.create()
400
+
401
+ elif op.create_index_op is not None:
402
+ idx_info = self.idxs[op.create_index_op.idx_id]
403
+ with Env.get().begin_xact():
404
+ self.store_tbl.create_index(idx_info.id)
405
+
406
+ elif op.load_view_op is not None:
407
+ from pixeltable.catalog import Catalog
408
+ from pixeltable.plan import Planner
409
+
410
+ from .table_version_path import TableVersionPath
411
+
412
+ # clear out any remaining media files from an aborted previous attempt
413
+ self.delete_media()
414
+ view_path = TableVersionPath.from_dict(op.load_view_op.view_path)
415
+ plan, _ = Planner.create_view_load_plan(view_path)
416
+ _, row_counts = self.store_tbl.insert_rows(plan, v_min=self.version)
417
+ status = UpdateStatus(row_count_stats=row_counts)
418
+ Catalog.get().store_update_status(self.id, self.version, status)
419
+ _logger.debug(f'Loaded view {self.name} with {row_counts.num_rows} rows')
309
420
 
310
- tbl_version.store_tbl.create()
311
- if view_md is None or not view_md.is_snapshot:
312
- # add default indices, after creating the store table
313
- for col in tbl_version.cols_by_name.values():
314
- status = tbl_version._add_default_index(col)
315
- assert status is None or status.num_excs == 0
421
+ elif op.drop_store_table_op is not None:
422
+ # don't reference self.store_tbl here, it needs to reference the metadata for our base table, which at
423
+ # this point may not exist anymore
424
+ with Env.get().begin_xact() as conn:
425
+ drop_stmt = f'DROP TABLE IF EXISTS {StoreBase.storage_name(self.id, self.is_view)}'
426
+ conn.execute(sql.text(drop_stmt))
316
427
 
317
- # we re-create the tbl_record here, now that we have new index metadata
318
- tbl_record = schema.Table(id=tbl_id, dir_id=dir_id, md=dataclasses.asdict(tbl_version._create_tbl_md()))
319
- session.add(tbl_record)
320
- session.add(tbl_version_record)
321
- session.add(schema_version_record)
322
- return tbl_record.id, tbl_version
428
+ elif op.delete_table_media_files_op:
429
+ self.delete_media()
430
+ FileCache.get().clear(tbl_id=self.id)
323
431
 
324
432
  @classmethod
325
- def create_replica(cls, md: schema.FullTableMd) -> TableVersion:
433
+ def create_replica(cls, md: TableVersionMd, create_store_tbl: bool = True) -> TableVersion:
434
+ from .catalog import Catalog, TableVersionPath
435
+
436
+ assert Env.get().in_xact
437
+ assert md.tbl_md.is_replica
326
438
  tbl_id = UUID(md.tbl_md.tbl_id)
439
+ _logger.info(f'Creating replica table version {tbl_id}:{md.version_md.version}.')
327
440
  view_md = md.tbl_md.view_md
328
- base_path = pxt.catalog.TableVersionPath.from_md(view_md.base_versions) if view_md is not None else None
441
+ base_path = TableVersionPath.from_md(view_md.base_versions) if view_md is not None else None
329
442
  base = base_path.tbl_version if base_path is not None else None
330
- tbl_version = cls(
331
- tbl_id, md.tbl_md, md.version_md.version, md.schema_version_md, [], base_path=base_path, base=base
332
- )
333
- tbl_version.store_tbl.create()
443
+ key = TableVersionKey(tbl_id, md.version_md.version, None)
444
+ tbl_version = cls(key, md.tbl_md, md.version_md, md.schema_version_md, [], base_path=base_path, base=base)
445
+ cat = Catalog.get()
446
+ # We're creating a new TableVersion replica, so we should never have seen this particular
447
+ # TableVersion instance before.
448
+ # Actually this isn't true, because we might be re-creating a dropped replica.
449
+ # TODO: Understand why old TableVersions are kept around even for a dropped table.
450
+ # assert tbl_version.effective_version is not None
451
+ # assert (tbl_version.id, tbl_version.effective_version, None) not in cat._tbl_versions
452
+ cat._tbl_versions[key] = tbl_version
453
+ tbl_version.init()
454
+ if create_store_tbl:
455
+ tbl_version.store_tbl.create()
334
456
  return tbl_version
335
457
 
336
- def drop(self) -> None:
458
+ def delete_media(self, tbl_version: int | None = None) -> None:
459
+ # Assemble a set of column destinations and delete objects from all of them
460
+ # None is a valid column destination which refers to the default object location
461
+ destinations = {col.destination for col in self.cols if col.is_stored}
462
+ for dest in destinations:
463
+ ObjectOps.delete(dest, self.id, tbl_version=tbl_version)
464
+
465
+ def drop(self) -> list[TableOp]:
466
+ id_str = str(self.id)
467
+ ops = [
468
+ TableOp(
469
+ tbl_id=id_str,
470
+ op_sn=0,
471
+ num_ops=3,
472
+ needs_xact=False,
473
+ delete_table_media_files_op=DeleteTableMediaFilesOp(),
474
+ ),
475
+ TableOp(tbl_id=id_str, op_sn=1, num_ops=3, needs_xact=False, drop_store_table_op=DropStoreTableOp()),
476
+ TableOp(tbl_id=id_str, op_sn=2, num_ops=3, needs_xact=True, delete_table_md_op=DeleteTableMdOp()),
477
+ ]
478
+ return ops
479
+
480
+ def init(self) -> None:
481
+ """
482
+ Initialize schema-related in-memory metadata separately, now that this TableVersion instance is visible
483
+ in Catalog.
484
+ """
337
485
  from .catalog import Catalog
338
486
 
339
487
  cat = Catalog.get()
340
- # delete this table and all associated data
341
- MediaStore.delete(self.id)
342
- FileCache.get().clear(tbl_id=self.id)
343
- cat.delete_tbl_md(self.id)
344
- self.store_tbl.drop()
345
- # de-register table version from catalog
346
- cat.remove_tbl_version(self)
347
-
348
- def _init_schema(self, tbl_md: schema.TableMd, schema_version_md: schema.TableSchemaVersionMd) -> None:
349
- # create columns first, so the indices can reference them
350
- self._init_cols(tbl_md, schema_version_md)
351
- if not self.is_snapshot:
352
- self._init_idxs(tbl_md)
353
- # create the sa schema only after creating the columns and indices
354
- self._init_sa_schema()
355
-
356
- def _init_cols(self, tbl_md: schema.TableMd, schema_version_md: schema.TableSchemaVersionMd) -> None:
357
- """Initialize self.cols with the columns visible in our effective version"""
488
+ assert self.key in cat._tbl_versions
489
+ self._init_schema()
490
+ if self.is_mutable:
491
+ cat.record_column_dependencies(self)
492
+ # init external stores; this needs to happen after the schema is created
493
+ self._init_external_stores()
494
+
495
+ self.is_initialized = True
496
+
497
+ def _init_schema(self) -> None:
498
+ from pixeltable.store import StoreComponentView, StoreTable, StoreView
499
+
500
+ from .catalog import Catalog
501
+
502
+ # initialize IndexBase instances and collect sa_col_types
503
+ idxs: dict[int, index.IndexBase] = {}
504
+ val_col_idxs: dict[int, index.IndexBase] = {} # key: id of value column
505
+ undo_col_idxs: dict[int, index.IndexBase] = {} # key: id of undo column
506
+ for md in self.tbl_md.index_md.values():
507
+ cls_name = md.class_fqn.rsplit('.', 1)[-1]
508
+ cls = getattr(index, cls_name)
509
+ idx = cls.from_dict(md.init_args)
510
+ idxs[md.id] = idx
511
+ val_col_idxs[md.index_val_col_id] = idx
512
+ undo_col_idxs[md.index_val_undo_col_id] = idx
513
+
514
+ # initialize Columns
358
515
  self.cols = []
359
516
  self.cols_by_name = {}
360
517
  self.cols_by_id = {}
361
518
  # Sort columns in column_md by the position specified in col_md.id to guarantee that all references
362
519
  # point backward.
363
- sorted_column_md = sorted(tbl_md.column_md.values(), key=lambda item: item.id)
520
+ sorted_column_md = sorted(self.tbl_md.column_md.values(), key=lambda item: item.id)
364
521
  for col_md in sorted_column_md:
365
- schema_col_md = schema_version_md.columns.get(col_md.id)
366
- col_name = schema_col_md.name if schema_col_md is not None else None
522
+ col_type = ts.ColumnType.from_dict(col_md.col_type)
523
+ schema_col_md = self.schema_version_md.columns.get(col_md.id)
367
524
  media_val = (
368
525
  MediaValidation[schema_col_md.media_validation.upper()]
369
526
  if schema_col_md is not None and schema_col_md.media_validation is not None
370
527
  else None
371
528
  )
529
+
530
+ stores_cellmd: bool | None = None # None: determined by the column properties (in the Column c'tor)
531
+ sa_col_type: sql.types.TypeEngine | None = None
532
+ if col_md.id in val_col_idxs:
533
+ idx = val_col_idxs[col_md.id]
534
+ # for index value columns, the index gets to override the default
535
+ stores_cellmd = idx.records_value_errors()
536
+ sa_col_type = idx.get_index_sa_type(col_type)
537
+ elif col_md.id in undo_col_idxs:
538
+ idx = undo_col_idxs[col_md.id]
539
+ # for index undo columns, we never store cellmd
540
+ stores_cellmd = False
541
+ sa_col_type = idx.get_index_sa_type(col_type)
542
+
372
543
  col = Column(
373
544
  col_id=col_md.id,
374
- name=col_name,
375
- col_type=ts.ColumnType.from_dict(col_md.col_type),
545
+ name=schema_col_md.name if schema_col_md is not None else None,
546
+ col_type=col_type,
376
547
  is_pk=col_md.is_pk,
548
+ is_iterator_col=self.is_component_view and col_md.id < self.num_iterator_cols + 1,
377
549
  stored=col_md.stored,
378
550
  media_validation=media_val,
551
+ sa_col_type=sa_col_type,
379
552
  schema_version_add=col_md.schema_version_add,
380
553
  schema_version_drop=col_md.schema_version_drop,
554
+ stores_cellmd=stores_cellmd,
381
555
  value_expr_dict=col_md.value_expr,
556
+ tbl_handle=self.handle,
557
+ destination=col_md.destination,
382
558
  )
383
- col.tbl = self.create_handle()
384
- self.cols.append(col)
385
559
 
386
- # populate the lookup structures before Expr.from_dict()
387
- if col_md.schema_version_add > self.schema_version:
388
- # column was added after this version
389
- continue
390
- if col_md.schema_version_drop is not None and col_md.schema_version_drop <= self.schema_version:
391
- # column was dropped
392
- continue
393
- if col.name is not None:
394
- self.cols_by_name[col.name] = col
395
- self.cols_by_id[col.id] = col
396
-
397
- # make sure to traverse columns ordered by position = order in which cols were created;
398
- # this guarantees that references always point backwards
399
- if not self.is_snapshot and col_md.value_expr is not None:
400
- self._record_refd_columns(col)
401
-
402
- def _init_idxs(self, tbl_md: schema.TableMd) -> None:
403
- self.idx_md = tbl_md.index_md
404
- self.idxs_by_name = {}
405
- import pixeltable.index as index_module
406
-
407
- for md in tbl_md.index_md.values():
408
- if md.schema_version_add > self.schema_version or (
409
- md.schema_version_drop is not None and md.schema_version_drop <= self.schema_version
560
+ self.cols.append(col)
561
+ # populate lookup structures before Expr.from_dict()
562
+ if col_md.schema_version_add <= self.schema_version and (
563
+ col_md.schema_version_drop is None or col_md.schema_version_drop > self.schema_version
410
564
  ):
411
- # index not visible in this schema version
412
- continue
413
-
414
- # instantiate index object
415
- cls_name = md.class_fqn.rsplit('.', 1)[-1]
416
- cls = getattr(index_module, cls_name)
417
- idx_col = self.path.get_column_by_id(UUID(md.indexed_col_tbl_id), md.indexed_col_id)
418
- idx = cls.from_dict(idx_col, md.init_args)
419
-
420
- # fix up the sa column type of the index value and undo columns
421
- val_col = self.cols_by_id[md.index_val_col_id]
422
- val_col.sa_col_type = idx.index_sa_type()
423
- val_col._records_errors = False
424
- undo_col = self.cols_by_id[md.index_val_undo_col_id]
425
- undo_col.sa_col_type = idx.index_sa_type()
426
- undo_col._records_errors = False
427
- idx_info = self.IndexInfo(id=md.id, name=md.name, idx=idx, col=idx_col, val_col=val_col, undo_col=undo_col)
428
- self.idxs_by_name[md.name] = idx_info
429
-
430
- def _init_sa_schema(self) -> None:
431
- # create the sqlalchemy schema; do this after instantiating columns, in order to determine whether they
432
- # need to record errors
433
- from pixeltable.store import StoreComponentView, StoreTable, StoreView
565
+ if col.name is not None:
566
+ self.cols_by_name[col.name] = col
567
+ self.cols_by_id[col.id] = col
568
+
569
+ if self.supports_idxs:
570
+ # create IndexInfo for indices visible in current_version
571
+ visible_idxs = [
572
+ md
573
+ for md in self.tbl_md.index_md.values()
574
+ if md.schema_version_add <= self.schema_version
575
+ and (md.schema_version_drop is None or md.schema_version_drop > self.schema_version)
576
+ ]
577
+ for md in visible_idxs:
578
+ idx = idxs[md.id]
579
+ indexed_col_id = QColumnId(UUID(md.indexed_col_tbl_id), md.indexed_col_id)
580
+ idx_col = self._lookup_column(indexed_col_id)
581
+ info = self.IndexInfo(
582
+ id=md.id,
583
+ name=md.name,
584
+ idx=idx,
585
+ col=idx_col,
586
+ val_col=self.cols_by_id[md.index_val_col_id],
587
+ undo_col=self.cols_by_id[md.index_val_undo_col_id],
588
+ )
589
+ self.idxs[md.id] = info
590
+ self.idxs_by_name[md.name] = info
591
+ self.idxs_by_col.setdefault(indexed_col_id, []).append(info)
592
+
593
+ # create value exprs, now that we have all lookup structures in place
594
+ tvp: TableVersionPath | None = None
595
+ if self.effective_version is not None:
596
+ # for snapshot TableVersion instances, we need to retarget the column value_exprs to the snapshot;
597
+ # otherwise they'll incorrectly refer to the live table. So, construct a full TableVersionPath to
598
+ # use for retargeting.
599
+ tvp = Catalog.get().construct_tvp(
600
+ self.id, self.effective_version, self.tbl_md.ancestors, self.version_md.created_at
601
+ )
602
+ elif self.anchor_tbl_id is not None:
603
+ # for replica TableVersion instances, we also need to retarget the value_exprs, this time to the
604
+ # "anchored" TableVersionPath.
605
+ assert self.path is not None
606
+ tvp = self.path
607
+ for col in self.cols_by_id.values():
608
+ col.init_value_expr(tvp)
434
609
 
610
+ # create the sqlalchemy schema, after instantiating all Columns
435
611
  if self.is_component_view:
436
612
  self.store_tbl = StoreComponentView(self)
437
613
  elif self.is_view:
@@ -439,54 +615,50 @@ class TableVersion:
439
615
  else:
440
616
  self.store_tbl = StoreTable(self)
441
617
 
442
- def _update_md(
443
- self, timestamp: float, update_tbl_version: bool = True, preceding_schema_version: Optional[int] = None
444
- ) -> None:
445
- """Writes table metadata to the database.
618
+ def _lookup_column(self, qid: QColumnId) -> Column | None:
619
+ """
620
+ Look up the column with the given table id and column id, searching through the ancestors of this TableVersion
621
+ to find it. We avoid referencing TableVersionPath in order to work properly with snapshots as well.
446
622
 
447
- Args:
448
- timestamp: timestamp of the change
449
- conn: database connection to use
450
- update_tbl_version: if `True`, will also write `TableVersion` metadata
451
- preceding_schema_version: if specified, will also write `TableSchemaVersion` metadata, recording the
452
- specified preceding schema version
623
+ This will search through *all* known columns, including columns that are not visible in this TableVersion.
453
624
  """
454
- assert update_tbl_version or preceding_schema_version is None
625
+ if qid.tbl_id == self.id:
626
+ return next(col for col in self.cols if col.id == qid.col_id)
627
+ elif self.base is not None:
628
+ return self.base.get()._lookup_column(qid)
629
+ else:
630
+ return None
631
+
632
+ def _write_md(self, new_version: bool, new_schema_version: bool) -> None:
455
633
  from pixeltable.catalog import Catalog
456
634
 
457
- tbl_md = self._create_tbl_md()
458
- version_md = self._create_version_md(timestamp) if update_tbl_version else None
459
- schema_version_md = (
460
- self._create_schema_version_md(preceding_schema_version) if preceding_schema_version is not None else None
635
+ Catalog.get().write_tbl_md(
636
+ self.id,
637
+ None,
638
+ self._tbl_md,
639
+ self._version_md if new_version else None,
640
+ self._schema_version_md if new_schema_version else None,
461
641
  )
462
642
 
463
- Catalog.get().store_tbl_md(self.id, tbl_md, version_md, schema_version_md)
464
-
465
- def ensure_md_loaded(self) -> None:
466
- """Ensure that table metadata is loaded."""
467
- for col in self.cols_by_id.values():
468
- _ = col.value_expr
469
-
470
643
  def _store_idx_name(self, idx_id: int) -> str:
471
644
  """Return name of index in the store, which needs to be globally unique"""
472
645
  return f'idx_{self.id.hex}_{idx_id}'
473
646
 
474
- def add_index(self, col: Column, idx_name: Optional[str], idx: index.IndexBase) -> UpdateStatus:
647
+ def add_index(self, col: Column, idx_name: str | None, idx: index.IndexBase) -> UpdateStatus:
475
648
  # we're creating a new schema version
476
- self.version += 1
477
- preceding_schema_version = self.schema_version
478
- self.schema_version = self.version
649
+ self.bump_version(bump_schema_version=True)
479
650
  status = self._add_index(col, idx_name, idx)
480
- self._update_md(time.time(), preceding_schema_version=preceding_schema_version)
651
+ self._write_md(new_version=True, new_schema_version=True)
481
652
  _logger.info(f'Added index {idx_name} on column {col.name} to table {self.name}')
482
653
  return status
483
654
 
484
- def _is_btree_indexable(self, col: Column) -> bool:
655
+ @classmethod
656
+ def _is_btree_indexable(cls, col: Column) -> bool:
485
657
  if not col.stored:
486
658
  # if the column is intentionally not stored, we want to avoid the overhead of an index
487
659
  return False
488
660
  # Skip index for stored media columns produced by an iterator
489
- if col.col_type.is_media_type() and self.is_iterator_column(col):
661
+ if col.col_type.is_media_type() and col.is_iterator_col:
490
662
  return False
491
663
  if not col.col_type.is_scalar_type() and not (col.col_type.is_media_type() and not col.is_computed):
492
664
  # wrong type for a B-tree
@@ -496,53 +668,58 @@ class TableVersion:
496
668
  return False
497
669
  return True
498
670
 
499
- def _add_default_index(self, col: Column) -> Optional[UpdateStatus]:
671
+ def _add_default_index(self, col: Column) -> UpdateStatus | None:
500
672
  """Add a B-tree index on this column if it has a compatible type"""
501
673
  if not self._is_btree_indexable(col):
502
674
  return None
503
- status = self._add_index(col, idx_name=None, idx=index.BtreeIndex(col))
675
+ status = self._add_index(col, idx_name=None, idx=index.BtreeIndex())
504
676
  return status
505
677
 
506
- def _create_index_columns(self, idx: index.IndexBase) -> Tuple[Column, Column]:
678
+ @classmethod
679
+ def _create_index_columns(
680
+ cls,
681
+ col: Column,
682
+ idx: index.IndexBase,
683
+ schema_version: int,
684
+ tbl_handle: TableVersionHandle,
685
+ id_cb: Callable[[], int],
686
+ ) -> tuple[Column, Column]:
507
687
  """Create value and undo columns for the given index.
508
688
  Args:
509
689
  idx: index for which columns will be created.
510
690
  Returns:
511
- A tuple containing the value column and the undo column.
691
+ A tuple containing the value column and the undo column, both of which are nullable.
512
692
  """
513
- assert not self.is_snapshot
514
- # add the index value and undo columns (which need to be nullable)
693
+ value_expr = idx.create_value_expr(col)
515
694
  val_col = Column(
516
- col_id=self.next_col_id,
695
+ col_id=id_cb(),
517
696
  name=None,
518
- computed_with=idx.index_value_expr(),
519
- sa_col_type=idx.index_sa_type(),
697
+ computed_with=value_expr,
698
+ sa_col_type=idx.get_index_sa_type(value_expr.col_type),
520
699
  stored=True,
521
- schema_version_add=self.schema_version,
700
+ stores_cellmd=idx.records_value_errors(),
701
+ schema_version_add=schema_version,
522
702
  schema_version_drop=None,
523
- records_errors=idx.records_value_errors(),
524
703
  )
525
- val_col.tbl = self.create_handle()
526
704
  val_col.col_type = val_col.col_type.copy(nullable=True)
527
- self.next_col_id += 1
705
+ val_col.tbl_handle = tbl_handle
528
706
 
529
707
  undo_col = Column(
530
- col_id=self.next_col_id,
708
+ col_id=id_cb(),
531
709
  name=None,
532
710
  col_type=val_col.col_type,
533
711
  sa_col_type=val_col.sa_col_type,
534
712
  stored=True,
535
- schema_version_add=self.schema_version,
713
+ stores_cellmd=False,
714
+ schema_version_add=schema_version,
536
715
  schema_version_drop=None,
537
- records_errors=False,
538
716
  )
539
- undo_col.tbl = self.create_handle()
540
717
  undo_col.col_type = undo_col.col_type.copy(nullable=True)
541
- self.next_col_id += 1
718
+ undo_col.tbl_handle = tbl_handle
542
719
  return val_col, undo_col
543
720
 
544
721
  def _create_index(
545
- self, col: Column, val_col: Column, undo_col: Column, idx_name: Optional[str], idx: index.IndexBase
722
+ self, col: Column, val_col: Column, undo_col: Column, idx_name: str | None, idx: index.IndexBase
546
723
  ) -> None:
547
724
  """Create the given index along with index md"""
548
725
  idx_id = self.next_idx_id
@@ -551,14 +728,14 @@ class TableVersion:
551
728
  idx_name = f'idx{idx_id}'
552
729
  else:
553
730
  assert is_valid_identifier(idx_name)
554
- assert idx_name not in [i.name for i in self.idx_md.values()]
731
+ assert idx_name not in [i.name for i in self._tbl_md.index_md.values()]
555
732
  # create and register the index metadata
556
733
  idx_cls = type(idx)
557
734
  idx_md = schema.IndexMd(
558
735
  id=idx_id,
559
736
  name=idx_name,
560
737
  indexed_col_id=col.id,
561
- indexed_col_tbl_id=str(col.tbl.id),
738
+ indexed_col_tbl_id=str(col.get_tbl().id),
562
739
  index_val_col_id=val_col.id,
563
740
  index_val_undo_col_id=undo_col.id,
564
741
  schema_version_add=self.schema_version,
@@ -567,85 +744,80 @@ class TableVersion:
567
744
  init_args=idx.as_dict(),
568
745
  )
569
746
  idx_info = self.IndexInfo(id=idx_id, name=idx_name, idx=idx, col=col, val_col=val_col, undo_col=undo_col)
570
- self.idx_md[idx_id] = idx_md
747
+ self._tbl_md.index_md[idx_id] = idx_md
748
+ self.idxs[idx_id] = idx_info
571
749
  self.idxs_by_name[idx_name] = idx_info
572
- try:
573
- idx.create_index(self._store_idx_name(idx_id), val_col)
574
- finally:
575
-
576
- def cleanup_index() -> None:
577
- """Delete the newly added in-memory index structure"""
578
- del self.idxs_by_name[idx_name]
579
- del self.idx_md[idx_id]
580
- self.next_idx_id = idx_id
750
+ self.idxs_by_col.setdefault(col.qid, []).append(idx_info)
751
+ self.store_tbl.create_index(idx_id)
581
752
 
582
- # Run cleanup only if there has been an exception; otherwise, skip cleanup.
583
- run_cleanup_on_exception(cleanup_index)
584
-
585
- def _add_index(self, col: Column, idx_name: Optional[str], idx: index.IndexBase) -> UpdateStatus:
586
- val_col, undo_vol = self._create_index_columns(idx)
753
+ def _add_index(self, col: Column, idx_name: str | None, idx: index.IndexBase) -> UpdateStatus:
754
+ val_col, undo_col = self._create_index_columns(
755
+ col, idx, self.schema_version, self.handle, id_cb=self.next_col_id
756
+ )
587
757
  # add the columns and update the metadata
588
758
  # TODO support on_error='abort' for indices; it's tricky because of the way metadata changes are entangled
589
759
  # with the database operations
590
- status = self._add_columns([val_col, undo_vol], print_stats=False, on_error='ignore')
760
+ status = self._add_columns([val_col, undo_col], print_stats=False, on_error='ignore')
591
761
  # now create the index structure
592
- self._create_index(col, val_col, undo_vol, idx_name, idx)
762
+ self._create_index(col, val_col, undo_col, idx_name, idx)
593
763
  return status
594
764
 
595
765
  def drop_index(self, idx_id: int) -> None:
596
- assert not self.is_snapshot
597
- assert idx_id in self.idx_md
766
+ assert self.is_mutable
767
+ assert idx_id in self._tbl_md.index_md
598
768
 
599
769
  # we're creating a new schema version
600
- self.version += 1
601
- preceding_schema_version = self.schema_version
602
- self.schema_version = self.version
603
- idx_md = self.idx_md[idx_id]
770
+ self.bump_version(bump_schema_version=True)
771
+ idx_md = self._tbl_md.index_md[idx_id]
604
772
  idx_md.schema_version_drop = self.schema_version
605
773
  assert idx_md.name in self.idxs_by_name
606
774
  idx_info = self.idxs_by_name[idx_md.name]
607
775
  # remove this index entry from the active indexes (in memory)
608
776
  # and the index metadata (in persistent table metadata)
777
+ # TODO: this is wrong, it breaks revert()
778
+ del self.idxs[idx_id]
609
779
  del self.idxs_by_name[idx_md.name]
610
- del self.idx_md[idx_id]
780
+ if idx_info.col.qid in self.idxs_by_col:
781
+ self.idxs_by_col[idx_info.col.qid].remove(idx_info)
782
+ del self._tbl_md.index_md[idx_id]
611
783
 
612
784
  self._drop_columns([idx_info.val_col, idx_info.undo_col])
613
- self._update_md(time.time(), preceding_schema_version=preceding_schema_version)
785
+ self._write_md(new_version=True, new_schema_version=True)
614
786
  _logger.info(f'Dropped index {idx_md.name} on table {self.name}')
615
787
 
616
788
  def add_columns(
617
789
  self, cols: Iterable[Column], print_stats: bool, on_error: Literal['abort', 'ignore']
618
790
  ) -> UpdateStatus:
619
- """Adds a column to the table."""
620
- assert not self.is_snapshot
621
- assert all(is_valid_identifier(col.name) for col in cols)
791
+ """Adds columns to the table."""
792
+ assert self.is_mutable
793
+ assert all(is_valid_identifier(col.name) for col in cols if col.name is not None)
622
794
  assert all(col.stored is not None for col in cols)
623
- assert all(col.name not in self.cols_by_name for col in cols)
795
+ assert all(col.name not in self.cols_by_name for col in cols if col.name is not None)
624
796
  for col in cols:
625
- col.tbl = self.create_handle()
626
- col.id = self.next_col_id
627
- self.next_col_id += 1
797
+ col.tbl_handle = self.handle
798
+ col.id = self.next_col_id()
628
799
 
629
800
  # we're creating a new schema version
630
- self.version += 1
631
- preceding_schema_version = self.schema_version
632
- self.schema_version = self.version
801
+ self.bump_version(bump_schema_version=True)
633
802
  index_cols: dict[Column, tuple[index.BtreeIndex, Column, Column]] = {}
634
803
  all_cols: list[Column] = []
635
804
  for col in cols:
636
805
  all_cols.append(col)
637
- if self._is_btree_indexable(col):
638
- idx = index.BtreeIndex(col)
639
- val_col, undo_col = self._create_index_columns(idx)
806
+ if col.name is not None and self._is_btree_indexable(col):
807
+ idx = index.BtreeIndex()
808
+ val_col, undo_col = self._create_index_columns(
809
+ col, idx, self.schema_version, self.handle, id_cb=self.next_col_id
810
+ )
640
811
  index_cols[col] = (idx, val_col, undo_col)
641
812
  all_cols.append(val_col)
642
813
  all_cols.append(undo_col)
643
814
  # Add all columns
644
815
  status = self._add_columns(all_cols, print_stats=print_stats, on_error=on_error)
645
- # Create indices and their mds
816
+ # Create indices and their md records
646
817
  for col, (idx, val_col, undo_col) in index_cols.items():
647
818
  self._create_index(col, val_col, undo_col, idx_name=None, idx=idx)
648
- self._update_md(time.time(), preceding_schema_version=preceding_schema_version)
819
+ self.update_status = status
820
+ self._write_md(new_version=True, new_schema_version=True)
649
821
  _logger.info(f'Added columns {[col.name for col in cols]} to table {self.name}, new version: {self.version}')
650
822
 
651
823
  msg = (
@@ -660,28 +832,39 @@ class TableVersion:
660
832
  self, cols: Iterable[Column], print_stats: bool, on_error: Literal['abort', 'ignore']
661
833
  ) -> UpdateStatus:
662
834
  """Add and populate columns within the current transaction"""
835
+ from pixeltable.catalog import Catalog
836
+ from pixeltable.plan import Planner
837
+
663
838
  cols_to_add = list(cols)
839
+
664
840
  row_count = self.store_tbl.count()
665
841
  for col in cols_to_add:
842
+ assert col.tbl_handle.id == self.id
666
843
  if not col.col_type.nullable and not col.is_computed and row_count > 0:
667
844
  raise excs.Error(
668
845
  f'Cannot add non-nullable column {col.name!r} to table {self.name!r} with existing rows'
669
846
  )
670
847
 
848
+ computed_values = 0
671
849
  num_excs = 0
672
850
  cols_with_excs: list[Column] = []
673
851
  for col in cols_to_add:
852
+ assert col.id is not None
674
853
  excs_per_col = 0
675
854
  col.schema_version_add = self.schema_version
676
855
  # add the column to the lookup structures now, rather than after the store changes executed successfully,
677
856
  # because it might be referenced by the next column's value_expr
678
857
  self.cols.append(col)
858
+ self.cols_by_id[col.id] = col
679
859
  if col.name is not None:
680
860
  self.cols_by_name[col.name] = col
681
- self.cols_by_id[col.id] = col
682
- if col.value_expr is not None:
683
- col.check_value_expr()
684
- self._record_refd_columns(col)
861
+ col_md, sch_md = col.to_md(len(self.cols_by_name))
862
+ assert sch_md is not None, 'Schema column metadata must be created for user-facing columns'
863
+ self._tbl_md.column_md[col.id] = col_md
864
+ self._schema_version_md.columns[col.id] = sch_md
865
+ else:
866
+ col_md, _ = col.to_md()
867
+ self._tbl_md.column_md[col.id] = col_md
685
868
 
686
869
  if col.is_stored:
687
870
  self.store_tbl.add_column(col)
@@ -690,120 +873,121 @@ class TableVersion:
690
873
  continue
691
874
 
692
875
  # populate the column
693
- from pixeltable.plan import Planner
694
-
695
- plan, value_expr_slot_idx = Planner.create_add_column_plan(self.path, col)
876
+ plan = Planner.create_add_column_plan(self.path, col)
696
877
  plan.ctx.num_rows = row_count
697
878
  try:
698
879
  plan.open()
699
880
  try:
700
- excs_per_col = self.store_tbl.load_column(col, plan, value_expr_slot_idx, on_error)
701
- except sql.exc.DBAPIError as exc:
702
- # Wrap the DBAPIError in an excs.Error to unify processing in the subsequent except block
703
- raise excs.Error(f'SQL error during execution of computed column `{col.name}`:\n{exc}') from exc
881
+ excs_per_col = self.store_tbl.load_column(col, plan, on_error == 'abort')
882
+ except sql_exc.DBAPIError as exc:
883
+ Catalog.get().convert_sql_exc(exc, self.id, self.handle, convert_db_excs=True)
884
+ # If it wasn't converted, re-raise as a generic Pixeltable error
885
+ # (this means it's not a known concurrency error; it's something else)
886
+ raise excs.Error(
887
+ f'Unexpected SQL error during execution of computed column {col.name!r}:\n{exc}'
888
+ ) from exc
704
889
  if excs_per_col > 0:
705
890
  cols_with_excs.append(col)
706
891
  num_excs += excs_per_col
892
+ computed_values += plan.ctx.num_computed_exprs * row_count
707
893
  finally:
708
- # Ensure cleanup occurs if an exception or keyboard interruption happens during `load_column()`.
709
- def cleanup_on_error() -> None:
710
- """Delete columns that are added as part of current add_columns operation and re-initialize
711
- the sqlalchemy schema"""
712
- self.cols = [col for col in self.cols if col not in cols_to_add]
713
- for col in cols_to_add:
714
- # remove columns that we already added
715
- if col.id in self.cols_by_id:
716
- del self.cols_by_id[col.id]
717
- if col.name is not None and col.name in self.cols_by_name:
718
- del self.cols_by_name[col.name]
719
- self.store_tbl.create_sa_tbl()
720
-
721
- # Run cleanup only if there has been an exception; otherwise, skip cleanup.
722
- run_cleanup_on_exception(cleanup_on_error)
723
894
  plan.close()
724
895
 
896
+ Catalog.get().record_column_dependencies(self)
897
+
725
898
  if print_stats:
726
899
  plan.ctx.profile.print(num_rows=row_count)
727
- # TODO(mkornacker): what to do about system columns with exceptions?
900
+
901
+ # TODO: what to do about system columns with exceptions?
902
+ row_counts = RowCountStats(
903
+ upd_rows=row_count, num_excs=num_excs, computed_values=computed_values
904
+ ) # add_columns
728
905
  return UpdateStatus(
729
- num_rows=row_count,
730
- num_computed_values=row_count,
731
- num_excs=num_excs,
732
- cols_with_excs=[f'{col.tbl.get().name}.{col.name}' for col in cols_with_excs if col.name is not None],
906
+ cols_with_excs=[f'{col.get_tbl().name}.{col.name}' for col in cols_with_excs if col.name is not None],
907
+ row_count_stats=row_counts,
733
908
  )
734
909
 
735
910
  def drop_column(self, col: Column) -> None:
736
911
  """Drop a column from the table."""
737
912
 
738
- assert not self.is_snapshot
913
+ assert self.is_mutable
739
914
 
740
915
  # we're creating a new schema version
741
- self.version += 1
742
- preceding_schema_version = self.schema_version
743
- self.schema_version = self.version
916
+ self.bump_version(bump_schema_version=True)
744
917
 
745
918
  # drop this column and all dependent index columns and indices
746
919
  dropped_cols = [col]
747
- dropped_idx_names: list[str] = []
920
+ dropped_idx_info: list[TableVersion.IndexInfo] = []
748
921
  for idx_info in self.idxs_by_name.values():
749
922
  if idx_info.col != col:
750
923
  continue
751
924
  dropped_cols.extend([idx_info.val_col, idx_info.undo_col])
752
- idx_md = self.idx_md[idx_info.id]
925
+ idx_md = self._tbl_md.index_md[idx_info.id]
753
926
  idx_md.schema_version_drop = self.schema_version
754
927
  assert idx_md.name in self.idxs_by_name
755
- dropped_idx_names.append(idx_md.name)
756
- # update idxs_by_name
757
- for idx_name in dropped_idx_names:
758
- del self.idxs_by_name[idx_name]
928
+ dropped_idx_info.append(idx_info)
929
+
930
+ # update index lookup structures
931
+ for info in dropped_idx_info:
932
+ del self.idxs[info.id]
933
+ del self.idxs_by_name[info.name]
934
+ if col.qid in self.idxs_by_col:
935
+ del self.idxs_by_col[col.qid]
936
+
759
937
  self._drop_columns(dropped_cols)
760
- self._update_md(time.time(), preceding_schema_version=preceding_schema_version)
938
+ self._write_md(new_version=True, new_schema_version=True)
761
939
  _logger.info(f'Dropped column {col.name} from table {self.name}, new version: {self.version}')
762
940
 
763
941
  def _drop_columns(self, cols: Iterable[Column]) -> None:
764
942
  """Mark columns as dropped"""
765
- assert not self.is_snapshot
943
+ from pixeltable.catalog import Catalog
766
944
 
767
- for col in cols:
768
- if col.value_expr is not None:
769
- # update Column.dependent_cols
770
- for c in self.cols:
771
- if c == col:
772
- break
773
- c.dependent_cols.discard(col)
945
+ assert self.is_mutable
774
946
 
947
+ for col in cols:
775
948
  col.schema_version_drop = self.schema_version
776
949
  if col.name is not None:
777
950
  assert col.name in self.cols_by_name
778
951
  del self.cols_by_name[col.name]
779
952
  assert col.id in self.cols_by_id
780
953
  del self.cols_by_id[col.id]
954
+ # update stored md
955
+ self._tbl_md.column_md[col.id].schema_version_drop = col.schema_version_drop
956
+ if col.name is not None:
957
+ del self._schema_version_md.columns[col.id]
958
+
959
+ # update positions
960
+ for pos, schema_col in enumerate(self._schema_version_md.columns.values()):
961
+ schema_col.pos = pos
781
962
 
782
963
  self.store_tbl.create_sa_tbl()
964
+ Catalog.get().record_column_dependencies(self)
783
965
 
784
966
  def rename_column(self, old_name: str, new_name: str) -> None:
785
967
  """Rename a column."""
786
- assert not self.is_snapshot
787
- if old_name not in self.cols_by_name:
968
+ if not self.is_mutable:
969
+ raise excs.Error(f'Cannot rename column for immutable table {self.name!r}')
970
+ col = self.path.get_column(old_name)
971
+ if col is None:
788
972
  raise excs.Error(f'Unknown column: {old_name}')
973
+ if col.get_tbl().id != self.id:
974
+ raise excs.Error(f'Cannot rename base table column {col.name!r}')
789
975
  if not is_valid_identifier(new_name):
790
- raise excs.Error(f"Invalid column name: '{new_name}'")
976
+ raise excs.Error(f'Invalid column name: {new_name}')
791
977
  if new_name in self.cols_by_name:
792
- raise excs.Error(f'Column {new_name} already exists')
793
- col = self.cols_by_name[old_name]
978
+ raise excs.Error(f'Column {new_name!r} already exists')
794
979
  del self.cols_by_name[old_name]
795
980
  col.name = new_name
796
981
  self.cols_by_name[new_name] = col
982
+ self._schema_version_md.columns[col.id].name = new_name
797
983
 
798
984
  # we're creating a new schema version
799
- self.version += 1
800
- preceding_schema_version = self.schema_version
801
- self.schema_version = self.version
985
+ self.bump_version(bump_schema_version=True)
802
986
 
803
- self._update_md(time.time(), preceding_schema_version=preceding_schema_version)
987
+ self._write_md(new_version=True, new_schema_version=True)
804
988
  _logger.info(f'Renamed column {old_name} to {new_name} in table {self.name}, new version: {self.version}')
805
989
 
806
- def set_comment(self, new_comment: Optional[str]) -> None:
990
+ def set_comment(self, new_comment: str | None) -> None:
807
991
  _logger.info(f'[{self.name}] Updating comment: {new_comment}')
808
992
  self.comment = new_comment
809
993
  self._create_schema_version()
@@ -818,82 +1002,79 @@ class TableVersion:
818
1002
 
819
1003
  def _create_schema_version(self) -> None:
820
1004
  # we're creating a new schema version
821
- self.version += 1
822
- preceding_schema_version = self.schema_version
823
- self.schema_version = self.version
824
- self._update_md(time.time(), preceding_schema_version=preceding_schema_version)
1005
+ self.bump_version(bump_schema_version=True)
1006
+ self._write_md(new_version=True, new_schema_version=True)
825
1007
  _logger.info(f'[{self.name}] Updating table schema to version: {self.version}')
826
1008
 
827
1009
  def insert(
828
1010
  self,
829
- rows: Optional[list[dict[str, Any]]],
830
- df: Optional[pxt.DataFrame],
1011
+ rows: list[dict[str, Any]] | None,
1012
+ query: Query | None,
831
1013
  print_stats: bool = False,
832
1014
  fail_on_exception: bool = True,
833
1015
  ) -> UpdateStatus:
834
1016
  """
835
- Insert rows into this table, either from an explicit list of dicts or from a `DataFrame`.
1017
+ Insert rows into this table, either from an explicit list of dicts or from a `Query`.
836
1018
  """
837
1019
  from pixeltable.plan import Planner
838
1020
 
839
- assert self.is_insertable()
840
- assert (rows is None) != (df is None) # Exactly one must be specified
1021
+ assert self.is_insertable
1022
+ assert (rows is None) != (query is None) # Exactly one must be specified
841
1023
  if rows is not None:
842
1024
  plan = Planner.create_insert_plan(self, rows, ignore_errors=not fail_on_exception)
1025
+
843
1026
  else:
844
- plan = Planner.create_df_insert_plan(self, df, ignore_errors=not fail_on_exception)
1027
+ plan = Planner.create_query_insert_plan(self, query, ignore_errors=not fail_on_exception)
845
1028
 
846
1029
  # this is a base table; we generate rowids during the insert
847
1030
  def rowids() -> Iterator[int]:
848
1031
  while True:
849
- rowid = self.next_rowid
850
- self.next_rowid += 1
1032
+ rowid = self.next_row_id
1033
+ self.next_row_id += 1
851
1034
  yield rowid
852
1035
 
853
- return self._insert(plan, time.time(), print_stats=print_stats, rowids=rowids(), abort_on_exc=fail_on_exception)
1036
+ result = self._insert(
1037
+ plan, time.time(), print_stats=print_stats, rowids=rowids(), abort_on_exc=fail_on_exception
1038
+ )
1039
+ return result
854
1040
 
855
1041
  def _insert(
856
1042
  self,
857
1043
  exec_plan: 'exec.ExecNode',
858
1044
  timestamp: float,
859
1045
  *,
860
- rowids: Optional[Iterator[int]] = None,
1046
+ rowids: Iterator[int] | None = None,
861
1047
  print_stats: bool = False,
862
1048
  abort_on_exc: bool = False,
863
1049
  ) -> UpdateStatus:
864
1050
  """Insert rows produced by exec_plan and propagate to views"""
865
1051
  # we're creating a new version
866
- self.version += 1
867
- result = UpdateStatus()
868
- num_rows, num_excs, cols_with_excs = self.store_tbl.insert_rows(
1052
+ self.bump_version(timestamp, bump_schema_version=False)
1053
+ cols_with_excs, row_counts = self.store_tbl.insert_rows(
869
1054
  exec_plan, v_min=self.version, rowids=rowids, abort_on_exc=abort_on_exc
870
1055
  )
871
- result.num_rows = num_rows
872
- result.num_excs = num_excs
873
- result.num_computed_values += exec_plan.ctx.num_computed_exprs * num_rows
874
- result.cols_with_excs = [f'{self.name}.{self.cols_by_id[cid].name}' for cid in cols_with_excs]
875
- self._update_md(timestamp)
1056
+ result = UpdateStatus(
1057
+ cols_with_excs=[f'{self.name}.{self.cols_by_id[cid].name}' for cid in cols_with_excs],
1058
+ row_count_stats=row_counts,
1059
+ )
876
1060
 
877
1061
  # update views
878
1062
  for view in self.mutable_views:
879
1063
  from pixeltable.plan import Planner
880
1064
 
881
- plan, _ = Planner.create_view_load_plan(view.get().path, propagates_insert=True)
882
- status = view.get()._insert(plan, timestamp, print_stats=print_stats)
883
- result.num_rows += status.num_rows
884
- result.num_excs += status.num_excs
885
- result.num_computed_values += status.num_computed_values
886
- result.cols_with_excs += status.cols_with_excs
1065
+ plan2, _ = Planner.create_view_load_plan(view.get().path, propagates_insert=True)
1066
+ status = view.get()._insert(plan2, timestamp, print_stats=print_stats)
1067
+ result += status.to_cascade()
887
1068
 
888
- result.cols_with_excs = list(dict.fromkeys(result.cols_with_excs).keys()) # remove duplicates
1069
+ # Use the net status after all propagations
1070
+ self.update_status = result
1071
+ self._write_md(new_version=True, new_schema_version=False)
889
1072
  if print_stats:
890
- plan.ctx.profile.print(num_rows=num_rows)
1073
+ exec_plan.ctx.profile.print(num_rows=result.num_rows)
891
1074
  _logger.info(f'TableVersion {self.name}: new version {self.version}')
892
1075
  return result
893
1076
 
894
- def update(
895
- self, value_spec: dict[str, Any], where: Optional[exprs.Expr] = None, cascade: bool = True
896
- ) -> UpdateStatus:
1077
+ def update(self, value_spec: dict[str, Any], where: exprs.Expr | None = None, cascade: bool = True) -> UpdateStatus:
897
1078
  """Update rows in this TableVersionPath.
898
1079
  Args:
899
1080
  value_spec: a list of (column, value) pairs specifying the columns to update and their new values.
@@ -901,22 +1082,21 @@ class TableVersion:
901
1082
  cascade: if True, also update all computed columns that transitively depend on the updated columns,
902
1083
  including within views.
903
1084
  """
904
- if self.is_snapshot:
905
- raise excs.Error('Cannot update a snapshot')
906
-
1085
+ from pixeltable.exprs import SqlElementCache
907
1086
  from pixeltable.plan import Planner
908
1087
 
1088
+ assert self.is_mutable
1089
+
909
1090
  update_spec = self._validate_update_spec(value_spec, allow_pk=False, allow_exprs=True, allow_media=True)
910
1091
  if where is not None:
911
1092
  if not isinstance(where, exprs.Expr):
912
- raise excs.Error(f"'where' argument must be a predicate, got {type(where)}")
1093
+ raise excs.Error(f'`where` argument must be a valid Pixeltable expression; got `{type(where)}`')
913
1094
  analysis_info = Planner.analyze(self.path, where)
914
1095
  # for now we require that the updated rows can be identified via SQL, rather than via a Python filter
915
1096
  if analysis_info.filter is not None:
916
- raise excs.Error(f'Filter {analysis_info.filter} not expressible in SQL')
1097
+ raise excs.Error(f'Filter not expressible in SQL: {analysis_info.filter}')
917
1098
 
918
1099
  plan, updated_cols, recomputed_cols = Planner.create_update_plan(self.path, update_spec, [], where, cascade)
919
- from pixeltable.exprs import SqlElementCache
920
1100
 
921
1101
  result = self.propagate_update(
922
1102
  plan,
@@ -927,7 +1107,7 @@ class TableVersion:
927
1107
  cascade=cascade,
928
1108
  show_progress=True,
929
1109
  )
930
- result.updated_cols = updated_cols
1110
+ result += UpdateStatus(updated_cols=updated_cols)
931
1111
  return result
932
1112
 
933
1113
  def batch_update(
@@ -943,18 +1123,18 @@ class TableVersion:
943
1123
  batch: one dict per row, each mapping Columns to LiteralExprs representing the new values
944
1124
  rowids: if not empty, one tuple per row, each containing the rowid values for the corresponding row in batch
945
1125
  """
1126
+ from pixeltable.plan import Planner
1127
+
946
1128
  # if we do lookups of rowids, we must have one for each row in the batch
947
1129
  assert len(rowids) == 0 or len(rowids) == len(batch)
948
1130
 
949
- from pixeltable.plan import Planner
950
-
951
1131
  plan, row_update_node, delete_where_clause, updated_cols, recomputed_cols = Planner.create_batch_update_plan(
952
1132
  self.path, batch, rowids, cascade=cascade
953
1133
  )
954
1134
  result = self.propagate_update(
955
1135
  plan, delete_where_clause, recomputed_cols, base_versions=[], timestamp=time.time(), cascade=cascade
956
1136
  )
957
- result.updated_cols = [c.qualified_name for c in updated_cols]
1137
+ result += UpdateStatus(updated_cols=[c.qualified_name for c in updated_cols])
958
1138
 
959
1139
  unmatched_rows = row_update_node.unmatched_rows()
960
1140
  if len(unmatched_rows) > 0:
@@ -962,7 +1142,7 @@ class TableVersion:
962
1142
  raise excs.Error(f'batch_update(): {len(unmatched_rows)} row(s) not found')
963
1143
  if insert_if_not_exists:
964
1144
  insert_status = self.insert(unmatched_rows, None, print_stats=False, fail_on_exception=False)
965
- result += insert_status
1145
+ result += insert_status.to_cascade()
966
1146
  return result
967
1147
 
968
1148
  def _validate_update_spec(
@@ -971,23 +1151,24 @@ class TableVersion:
971
1151
  update_targets: dict[Column, exprs.Expr] = {}
972
1152
  for col_name, val in value_spec.items():
973
1153
  if not isinstance(col_name, str):
974
- raise excs.Error(f'Update specification: dict key must be column name, got {col_name!r}')
1154
+ raise excs.Error(f'Update specification: dict key must be column name; got {col_name!r}')
975
1155
  if col_name == _ROWID_COLUMN_NAME:
976
1156
  # a valid rowid is a list of ints, one per rowid column
977
1157
  assert len(val) == len(self.store_tbl.rowid_columns())
978
1158
  for el in val:
979
1159
  assert isinstance(el, int)
980
1160
  continue
981
- col = self.path.get_column(col_name, include_bases=False)
1161
+ col = self.path.get_column(col_name)
982
1162
  if col is None:
983
- # TODO: return more informative error if this is trying to update a base column
984
- raise excs.Error(f'Column {col_name} unknown')
1163
+ raise excs.Error(f'Unknown column: {col_name}')
1164
+ if col.get_tbl().id != self.id:
1165
+ raise excs.Error(f'Column {col.name!r} is a base table column and cannot be updated')
985
1166
  if col.is_computed:
986
- raise excs.Error(f'Column {col_name} is computed and cannot be updated')
1167
+ raise excs.Error(f'Column {col_name!r} is computed and cannot be updated')
987
1168
  if col.is_pk and not allow_pk:
988
- raise excs.Error(f'Column {col_name} is a primary key column and cannot be updated')
1169
+ raise excs.Error(f'Column {col_name!r} is a primary key column and cannot be updated')
989
1170
  if col.col_type.is_media_type() and not allow_media:
990
- raise excs.Error(f'Column {col_name} is a media column and cannot be updated')
1171
+ raise excs.Error(f'Column {col_name!r} is a media column and cannot be updated')
991
1172
 
992
1173
  # make sure that the value is compatible with the column type
993
1174
  value_expr: exprs.Expr
@@ -997,132 +1178,180 @@ class TableVersion:
997
1178
  except (TypeError, jsonschema.exceptions.ValidationError) as exc:
998
1179
  if not allow_exprs:
999
1180
  raise excs.Error(
1000
- f'Column {col_name}: value {val!r} is not a valid literal for this column '
1001
- f'(expected {col.col_type})'
1181
+ f'Column {col_name!r}: value is not a valid literal for this column '
1182
+ f'(expected `{col.col_type}`): {val!r}'
1002
1183
  ) from exc
1003
1184
  # it's not a literal, let's try to create an expr from it
1004
1185
  value_expr = exprs.Expr.from_object(val)
1005
1186
  if value_expr is None:
1006
1187
  raise excs.Error(
1007
- f'Column {col_name}: value {val!r} is not a recognized literal or expression'
1188
+ f'Column {col_name!r}: value is not a recognized literal or expression: {val!r}'
1008
1189
  ) from exc
1009
1190
  if not col.col_type.is_supertype_of(value_expr.col_type, ignore_nullable=True):
1010
1191
  raise excs.Error(
1011
- f'Type of value {val!r} ({value_expr.col_type}) is not compatible with the type of column '
1012
- f'{col_name} ({col.col_type})'
1192
+ f'Type `{value_expr.col_type}` of value {val!r} is not compatible with the type '
1193
+ f'`{col.col_type}` of column {col_name!r}'
1013
1194
  ) from exc
1014
1195
  update_targets[col] = value_expr
1015
1196
 
1016
1197
  return update_targets
1017
1198
 
1199
+ def recompute_columns(
1200
+ self, col_names: list[str], where: exprs.Expr | None = None, errors_only: bool = False, cascade: bool = True
1201
+ ) -> UpdateStatus:
1202
+ from pixeltable.exprs import CompoundPredicate, SqlElementCache
1203
+ from pixeltable.plan import Planner
1204
+
1205
+ assert self.is_mutable
1206
+ assert all(name in self.cols_by_name for name in col_names)
1207
+ assert len(col_names) > 0
1208
+ assert len(col_names) == 1 or not errors_only
1209
+
1210
+ target_columns = [self.cols_by_name[name] for name in col_names]
1211
+ where_clause: exprs.Expr | None = None
1212
+ if where is not None:
1213
+ self._validate_where_clause(where, error_prefix='`where` argument')
1214
+ where_clause = where
1215
+ if errors_only:
1216
+ errortype_pred = (
1217
+ exprs.ColumnPropertyRef(exprs.ColumnRef(target_columns[0]), exprs.ColumnPropertyRef.Property.ERRORTYPE)
1218
+ != None
1219
+ )
1220
+ where_clause = CompoundPredicate.make_conjunction([where_clause, errortype_pred])
1221
+ plan, updated_cols, recomputed_cols = Planner.create_update_plan(
1222
+ self.path, update_targets={}, recompute_targets=target_columns, where_clause=where_clause, cascade=cascade
1223
+ )
1224
+
1225
+ result = self.propagate_update(
1226
+ plan,
1227
+ where_clause.sql_expr(SqlElementCache()) if where_clause is not None else None,
1228
+ recomputed_cols,
1229
+ base_versions=[],
1230
+ timestamp=time.time(),
1231
+ cascade=cascade,
1232
+ show_progress=True,
1233
+ )
1234
+ result += UpdateStatus(updated_cols=updated_cols)
1235
+ return result
1236
+
1018
1237
  def propagate_update(
1019
1238
  self,
1020
- plan: Optional[exec.ExecNode],
1021
- where_clause: Optional[sql.ColumnElement],
1239
+ plan: exec.ExecNode | None,
1240
+ where_clause: sql.ColumnElement | None,
1022
1241
  recomputed_view_cols: list[Column],
1023
- base_versions: list[Optional[int]],
1242
+ base_versions: list[int | None],
1024
1243
  timestamp: float,
1025
1244
  cascade: bool,
1026
1245
  show_progress: bool = True,
1027
1246
  ) -> UpdateStatus:
1247
+ from pixeltable.catalog import Catalog
1248
+ from pixeltable.plan import Planner
1249
+
1250
+ Catalog.get().mark_modified_tvs(self.handle)
1028
1251
  result = UpdateStatus()
1029
- if plan is not None:
1030
- # we're creating a new version
1031
- self.version += 1
1032
- result.num_rows, result.num_excs, cols_with_excs = self.store_tbl.insert_rows(
1252
+ create_new_table_version = plan is not None
1253
+ if create_new_table_version:
1254
+ self.bump_version(timestamp, bump_schema_version=False)
1255
+ cols_with_excs, row_counts = self.store_tbl.insert_rows(
1033
1256
  plan, v_min=self.version, show_progress=show_progress
1034
1257
  )
1035
- result.cols_with_excs = [f'{self.name}.{self.cols_by_id[cid].name}' for cid in cols_with_excs]
1258
+ result += UpdateStatus(
1259
+ row_count_stats=row_counts.insert_to_update(),
1260
+ cols_with_excs=[f'{self.name}.{self.cols_by_id[cid].name}' for cid in cols_with_excs],
1261
+ )
1036
1262
  self.store_tbl.delete_rows(
1037
1263
  self.version, base_versions=base_versions, match_on_vmin=True, where_clause=where_clause
1038
1264
  )
1039
- self._update_md(timestamp)
1040
1265
 
1041
1266
  if cascade:
1042
1267
  base_versions = [None if plan is None else self.version, *base_versions] # don't update in place
1043
1268
  # propagate to views
1044
1269
  for view in self.mutable_views:
1045
- recomputed_cols = [col for col in recomputed_view_cols if col.tbl == view]
1270
+ recomputed_cols = [col for col in recomputed_view_cols if col.get_tbl().id == view.id]
1046
1271
  plan = None
1047
1272
  if len(recomputed_cols) > 0:
1048
- from pixeltable.plan import Planner
1049
-
1050
1273
  plan = Planner.create_view_update_plan(view.get().path, recompute_targets=recomputed_cols)
1051
1274
  status = view.get().propagate_update(
1052
1275
  plan, None, recomputed_view_cols, base_versions=base_versions, timestamp=timestamp, cascade=True
1053
1276
  )
1054
- result.num_rows += status.num_rows
1055
- result.num_excs += status.num_excs
1056
- result.cols_with_excs += status.cols_with_excs
1057
-
1058
- result.cols_with_excs = list(dict.fromkeys(result.cols_with_excs).keys()) # remove duplicates
1277
+ result += status.to_cascade()
1278
+ if create_new_table_version:
1279
+ self.update_status = result
1280
+ self._write_md(new_version=True, new_schema_version=False)
1059
1281
  return result
1060
1282
 
1061
- def delete(self, where: Optional[exprs.Expr] = None) -> UpdateStatus:
1062
- """Delete rows in this table.
1063
- Args:
1064
- where: a predicate to filter rows to delete.
1065
- """
1066
- assert self.is_insertable()
1283
+ def _validate_where_clause(self, pred: exprs.Expr, error_prefix: str) -> None:
1284
+ """Validates that pred can be expressed as a SQL Where clause"""
1285
+ assert self.is_insertable
1067
1286
  from pixeltable.exprs import Expr
1068
1287
  from pixeltable.plan import Planner
1069
1288
 
1070
- sql_where_clause: Optional[Expr] = None
1071
- if where is not None:
1072
- if not isinstance(where, Expr):
1073
- raise excs.Error(f"'where' argument must be a predicate, got {type(where)}")
1074
- analysis_info = Planner.analyze(self.path, where)
1075
- # for now we require that the updated rows can be identified via SQL, rather than via a Python filter
1076
- if analysis_info.filter is not None:
1077
- raise excs.Error(f'Filter {analysis_info.filter} not expressible in SQL')
1078
- sql_where_clause = analysis_info.sql_where_clause
1079
-
1080
- num_rows = self.propagate_delete(sql_where_clause, base_versions=[], timestamp=time.time())
1289
+ if not isinstance(pred, Expr):
1290
+ raise excs.Error(f'{error_prefix} must be a valid Pixeltable expression; got `{type(pred)}`')
1291
+ analysis_info = Planner.analyze(self.path, pred)
1292
+ # for now we require that the updated rows can be identified via SQL, rather than via a Python filter
1293
+ if analysis_info.filter is not None:
1294
+ raise excs.Error(f'Filter not expressible in SQL: {analysis_info.filter}')
1081
1295
 
1082
- status = UpdateStatus(num_rows=num_rows)
1296
+ def delete(self, where: exprs.Expr | None = None) -> UpdateStatus:
1297
+ assert self.is_insertable
1298
+ if where is not None:
1299
+ self._validate_where_clause(where, error_prefix='`where` argument')
1300
+ status = self.propagate_delete(where, base_versions=[], timestamp=time.time())
1083
1301
  return status
1084
1302
 
1085
1303
  def propagate_delete(
1086
- self, where: Optional[exprs.Expr], base_versions: list[Optional[int]], timestamp: float
1087
- ) -> int:
1088
- """Delete rows in this table and propagate to views.
1089
- Args:
1090
- where: a predicate to filter rows to delete.
1091
- Returns:
1092
- number of deleted rows
1093
- """
1304
+ self, where: exprs.Expr | None, base_versions: list[int | None], timestamp: float
1305
+ ) -> UpdateStatus:
1306
+ """Delete rows in this table and propagate to views"""
1307
+ from pixeltable.catalog import Catalog
1308
+
1309
+ Catalog.get().mark_modified_tvs(self.handle)
1310
+
1311
+ # print(f'calling sql_expr()')
1094
1312
  sql_where_clause = where.sql_expr(exprs.SqlElementCache()) if where is not None else None
1095
- num_rows = self.store_tbl.delete_rows(
1313
+ # #print(f'sql_where_clause={str(sql_where_clause) if sql_where_clause is not None else None}')
1314
+ # sql_cols: list[sql.Column] = []
1315
+ # def collect_cols(col) -> None:
1316
+ # sql_cols.append(col)
1317
+ # sql.sql.visitors.traverse(sql_where_clause, {}, {'column': collect_cols})
1318
+ # x = [f'{str(c)}:{hash(c)}:{id(c.table)}' for c in sql_cols]
1319
+ # print(f'where_clause cols: {x}')
1320
+ del_rows = self.store_tbl.delete_rows(
1096
1321
  self.version + 1, base_versions=base_versions, match_on_vmin=False, where_clause=sql_where_clause
1097
1322
  )
1098
- if num_rows > 0:
1323
+ row_counts = RowCountStats(del_rows=del_rows) # delete
1324
+ result = UpdateStatus(row_count_stats=row_counts)
1325
+ if del_rows > 0:
1099
1326
  # we're creating a new version
1100
- self.version += 1
1101
- self._update_md(timestamp)
1327
+ self.bump_version(timestamp, bump_schema_version=False)
1102
1328
  for view in self.mutable_views:
1103
- num_rows += view.get().propagate_delete(
1329
+ status = view.get().propagate_delete(
1104
1330
  where=None, base_versions=[self.version, *base_versions], timestamp=timestamp
1105
1331
  )
1106
- return num_rows
1332
+ result += status.to_cascade()
1333
+ self.update_status = result
1334
+
1335
+ if del_rows > 0:
1336
+ self._write_md(new_version=True, new_schema_version=False)
1337
+ return result
1107
1338
 
1108
1339
  def revert(self) -> None:
1109
1340
  """Reverts the table to the previous version."""
1110
- assert not self.is_snapshot
1341
+ assert self.is_mutable
1111
1342
  if self.version == 0:
1112
1343
  raise excs.Error('Cannot revert version 0')
1113
1344
  self._revert()
1114
1345
 
1115
- def _delete_column(self, col: Column) -> None:
1116
- """Physically remove the column from the schema and the store table"""
1117
- if col.is_stored:
1118
- self.store_tbl.drop_column(col)
1119
- self.cols.remove(col)
1120
- if col.name is not None:
1121
- del self.cols_by_name[col.name]
1122
- del self.cols_by_id[col.id]
1123
-
1124
1346
  def _revert(self) -> None:
1125
- """Reverts this table version and propagates to views"""
1347
+ """
1348
+ Reverts the stored metadata for this table version and propagates to views.
1349
+
1350
+ Doesn't attempt to revert the in-memory metadata, but instead invalidates this TableVersion instance
1351
+ and relies on Catalog to reload it
1352
+ """
1353
+ from pixeltable.catalog import Catalog
1354
+
1126
1355
  conn = Env.get().conn
1127
1356
  # make sure we don't have a snapshot referencing this version
1128
1357
  # (unclear how to express this with sqlalchemy)
@@ -1137,127 +1366,270 @@ class TableVersion:
1137
1366
  names = [row[1] for row in result]
1138
1367
  raise excs.Error(
1139
1368
  (
1140
- f'Current version is needed for {len(result)} snapshot{"s" if len(result) > 1 else ""} '
1369
+ f'Current version is needed for {len(result)} snapshot{"s" if len(result) > 1 else ""}: '
1141
1370
  f'({", ".join(names)})'
1142
1371
  )
1143
1372
  )
1144
1373
 
1145
- # delete newly-added data
1146
- MediaStore.delete(self.id, version=self.version)
1147
1374
  conn.execute(sql.delete(self.store_tbl.sa_tbl).where(self.store_tbl.sa_tbl.c.v_min == self.version))
1148
1375
 
1149
1376
  # revert new deletions
1150
1377
  set_clause: dict[sql.Column, Any] = {self.store_tbl.sa_tbl.c.v_max: schema.Table.MAX_VERSION}
1151
- for index_info in self.idxs_by_name.values():
1378
+ for index_info in self.idxs.values():
1152
1379
  # copy the index value back from the undo column and reset the undo column to NULL
1153
1380
  set_clause[index_info.val_col.sa_col] = index_info.undo_col.sa_col
1154
1381
  set_clause[index_info.undo_col.sa_col] = None
1155
1382
  stmt = sql.update(self.store_tbl.sa_tbl).values(set_clause).where(self.store_tbl.sa_tbl.c.v_max == self.version)
1156
1383
  conn.execute(stmt)
1157
1384
 
1158
- # revert schema changes
1385
+ # revert schema changes:
1386
+ # - undo changes to self._tbl_md and write that back
1387
+ # - delete newly-added TableVersion/TableSchemaVersion records
1388
+ Catalog.get().mark_modified_tvs(self.handle)
1389
+ old_version = self.version
1159
1390
  if self.version == self.schema_version:
1160
- # delete newly-added columns
1391
+ # physically delete newly-added columns and remove them from the stored md
1161
1392
  added_cols = [col for col in self.cols if col.schema_version_add == self.schema_version]
1162
1393
  if len(added_cols) > 0:
1163
- next_col_id = min(col.id for col in added_cols)
1394
+ self._tbl_md.next_col_id = min(col.id for col in added_cols)
1164
1395
  for col in added_cols:
1165
- self._delete_column(col)
1166
- self.next_col_id = next_col_id
1396
+ if col.is_stored:
1397
+ self.store_tbl.drop_column(col)
1398
+ del self._tbl_md.column_md[col.id]
1167
1399
 
1168
1400
  # remove newly-added indices from the lookup structures
1169
1401
  # (the value and undo columns got removed in the preceding step)
1170
- added_idx_md = [md for md in self.idx_md.values() if md.schema_version_add == self.schema_version]
1402
+ added_idx_md = [md for md in self._tbl_md.index_md.values() if md.schema_version_add == self.schema_version]
1171
1403
  if len(added_idx_md) > 0:
1172
- next_idx_id = min(md.id for md in added_idx_md)
1404
+ self._tbl_md.next_idx_id = min(md.id for md in added_idx_md)
1173
1405
  for md in added_idx_md:
1174
- del self.idx_md[md.id]
1175
- del self.idxs_by_name[md.name]
1176
- self.next_idx_id = next_idx_id
1406
+ # TODO: drop the index
1407
+ del self._tbl_md.index_md[md.id]
1177
1408
 
1178
1409
  # make newly-dropped columns visible again
1179
- dropped_cols = [col for col in self.cols if col.schema_version_drop == self.schema_version]
1180
- for col in dropped_cols:
1181
- col.schema_version_drop = None
1410
+ dropped_col_md = [
1411
+ md for md in self._tbl_md.column_md.values() if md.schema_version_drop == self.schema_version
1412
+ ]
1413
+ for col_md in dropped_col_md:
1414
+ col_md.schema_version_drop = None
1182
1415
 
1183
1416
  # make newly-dropped indices visible again
1184
- dropped_idx_md = [md for md in self.idx_md.values() if md.schema_version_drop == self.schema_version]
1185
- for md in dropped_idx_md:
1186
- md.schema_version_drop = None
1187
-
1188
- session = Env.get().session
1189
- # we need to determine the preceding schema version and reload the schema
1190
- schema_version_md_dict = (
1191
- session.query(schema.TableSchemaVersion.md)
1192
- .where(schema.TableSchemaVersion.tbl_id == self.id)
1193
- .where(schema.TableSchemaVersion.schema_version == self.schema_version)
1194
- .scalar()
1195
- )
1196
- preceding_schema_version = schema_version_md_dict['preceding_schema_version']
1197
- preceding_schema_version_md_dict = (
1198
- session.query(schema.TableSchemaVersion.md)
1199
- .where(schema.TableSchemaVersion.tbl_id == self.id)
1200
- .where(schema.TableSchemaVersion.schema_version == preceding_schema_version)
1201
- .scalar()
1202
- )
1203
- preceding_schema_version_md = schema.md_from_dict(
1204
- schema.TableSchemaVersionMd, preceding_schema_version_md_dict
1205
- )
1206
- tbl_md = self._create_tbl_md()
1207
- self._init_schema(tbl_md, preceding_schema_version_md)
1417
+ dropped_idx_md = [
1418
+ md for md in self._tbl_md.index_md.values() if md.schema_version_drop == self.schema_version
1419
+ ]
1420
+ for idx_md in dropped_idx_md:
1421
+ idx_md.schema_version_drop = None
1208
1422
 
1209
1423
  conn.execute(
1210
1424
  sql.delete(schema.TableSchemaVersion.__table__)
1211
1425
  .where(schema.TableSchemaVersion.tbl_id == self.id)
1212
1426
  .where(schema.TableSchemaVersion.schema_version == self.schema_version)
1213
1427
  )
1214
- self.schema_version = preceding_schema_version
1215
- self.comment = preceding_schema_version_md.comment
1216
- self.num_retained_versions = preceding_schema_version_md.num_retained_versions
1428
+ self._tbl_md.current_schema_version = self._schema_version_md.preceding_schema_version
1217
1429
 
1218
1430
  conn.execute(
1219
1431
  sql.delete(schema.TableVersion.__table__)
1220
1432
  .where(schema.TableVersion.tbl_id == self.id)
1221
1433
  .where(schema.TableVersion.version == self.version)
1222
1434
  )
1223
- self.version -= 1
1224
- conn.execute(
1225
- sql.update(schema.Table.__table__)
1226
- .values({schema.Table.md: dataclasses.asdict(self._create_tbl_md())})
1227
- .where(schema.Table.id == self.id)
1228
- )
1435
+
1436
+ self._tbl_md.current_version = self._version_md.version = self.version - 1
1437
+
1438
+ self._write_md(new_version=False, new_schema_version=False)
1229
1439
 
1230
1440
  # propagate to views
1231
1441
  for view in self.mutable_views:
1232
1442
  view.get()._revert()
1233
- _logger.info(f'TableVersion {self.name}: reverted to version {self.version}')
1234
1443
 
1235
- def _init_external_stores(self, tbl_md: schema.TableMd) -> None:
1236
- for store_md in tbl_md.external_stores:
1444
+ # force reload on next operation
1445
+ self.is_validated = False
1446
+ Catalog.get().remove_tbl_version(self.key)
1447
+
1448
+ # delete newly-added data
1449
+ # Do this at the end, after all DB operations have completed.
1450
+ # TODO: The transaction could still fail. Really this should be done via PendingTableOps.
1451
+ self.delete_media(tbl_version=old_version)
1452
+ _logger.info(f'TableVersion {self.name!r}: reverted to version {self.version}')
1453
+
1454
+ def _init_external_stores(self) -> None:
1455
+ from pixeltable.io.external_store import ExternalStore
1456
+
1457
+ for store_md in self.tbl_md.external_stores:
1237
1458
  store_cls = resolve_symbol(store_md['class'])
1238
- assert isinstance(store_cls, type) and issubclass(store_cls, pxt.io.ExternalStore)
1459
+ assert isinstance(store_cls, type) and issubclass(store_cls, ExternalStore)
1239
1460
  store = store_cls.from_dict(store_md['md'])
1240
1461
  self.external_stores[store.name] = store
1241
1462
 
1242
- def link_external_store(self, store: pxt.io.ExternalStore) -> None:
1243
- store.link(self) # May result in additional metadata changes
1463
+ def link_external_store(self, store: ExternalStore) -> None:
1464
+ self.bump_version(bump_schema_version=True)
1465
+
1244
1466
  self.external_stores[store.name] = store
1245
- self._update_md(time.time(), update_tbl_version=False)
1467
+ self._tbl_md.external_stores.append(
1468
+ {'class': f'{type(store).__module__}.{type(store).__qualname__}', 'md': store.as_dict()}
1469
+ )
1470
+ self._write_md(new_version=True, new_schema_version=True)
1471
+
1472
+ def unlink_external_store(self, store: ExternalStore) -> None:
1473
+ del self.external_stores[store.name]
1474
+ self.bump_version(bump_schema_version=True)
1475
+ idx = next(i for i, store_md in enumerate(self._tbl_md.external_stores) if store_md['md']['name'] == store.name)
1476
+ self._tbl_md.external_stores.pop(idx)
1477
+ self._write_md(new_version=True, new_schema_version=True)
1478
+
1479
+ @property
1480
+ def id(self) -> UUID:
1481
+ return self.key.tbl_id
1246
1482
 
1247
- def unlink_external_store(self, store_name: str, delete_external_data: bool) -> None:
1248
- assert store_name in self.external_stores
1249
- store = self.external_stores[store_name]
1250
- store.unlink(self) # May result in additional metadata changes
1251
- del self.external_stores[store_name]
1252
- self._update_md(time.time(), update_tbl_version=False)
1483
+ @property
1484
+ def effective_version(self) -> int | None:
1485
+ return self.key.effective_version
1253
1486
 
1254
- if delete_external_data and isinstance(store, pxt.io.external_store.Project):
1255
- store.delete()
1487
+ @property
1488
+ def anchor_tbl_id(self) -> UUID | None:
1489
+ return self.key.anchor_tbl_id
1490
+
1491
+ @property
1492
+ def tbl_md(self) -> schema.TableMd:
1493
+ return self._tbl_md
1494
+
1495
+ @property
1496
+ def version_md(self) -> schema.VersionMd:
1497
+ return self._version_md
1498
+
1499
+ @property
1500
+ def schema_version_md(self) -> schema.SchemaVersionMd:
1501
+ return self._schema_version_md
1502
+
1503
+ @property
1504
+ def view_md(self) -> schema.ViewMd | None:
1505
+ return self._tbl_md.view_md
1506
+
1507
+ @property
1508
+ def name(self) -> str:
1509
+ return self._tbl_md.name
1510
+
1511
+ @property
1512
+ def user(self) -> str | None:
1513
+ return self._tbl_md.user
1514
+
1515
+ @property
1516
+ def is_replica(self) -> bool:
1517
+ return self._tbl_md.is_replica
1518
+
1519
+ @property
1520
+ def comment(self) -> str:
1521
+ return self._schema_version_md.comment
1522
+
1523
+ @comment.setter
1524
+ def comment(self, c: str) -> None:
1525
+ assert self.effective_version is None
1526
+ self._schema_version_md.comment = c
1527
+
1528
+ @property
1529
+ def num_retained_versions(self) -> int:
1530
+ return self._schema_version_md.num_retained_versions
1531
+
1532
+ @num_retained_versions.setter
1533
+ def num_retained_versions(self, n: int) -> None:
1534
+ assert self.effective_version is None
1535
+ self._schema_version_md.num_retained_versions = n
1536
+
1537
+ @property
1538
+ def version(self) -> int:
1539
+ return self._version_md.version
1540
+
1541
+ @property
1542
+ def created_at(self) -> float:
1543
+ return self._version_md.created_at
1544
+
1545
+ @property
1546
+ def schema_version(self) -> int:
1547
+ return self._schema_version_md.schema_version
1548
+
1549
+ def bump_version(self, timestamp: float | None = None, *, bump_schema_version: bool) -> None:
1550
+ """
1551
+ Increments the table version and adjusts all associated metadata. This will *not* trigger a database action;
1552
+ _write_md() must be called separately to persist the changes.
1553
+
1554
+ Args:
1555
+ timestamp: the creation time for the new version. Can be used to synchronize multiple metadata changes
1556
+ to the same timestamp. If `None`, then defaults to `time.time()`.
1557
+ bump_schema_version: if True, also adjusts the schema version (setting it equal to the new version)
1558
+ and associated metadata.
1559
+ """
1560
+ from pixeltable.catalog import Catalog
1561
+
1562
+ assert self.effective_version is None
1563
+
1564
+ if timestamp is None:
1565
+ timestamp = time.time()
1566
+
1567
+ Catalog.get().mark_modified_tvs(self.handle)
1568
+
1569
+ old_version = self._tbl_md.current_version
1570
+ assert self._version_md.version == old_version
1571
+ new_version = old_version + 1
1572
+ self._tbl_md.current_version = new_version
1573
+ self._version_md.version = new_version
1574
+ self._version_md.created_at = timestamp
1575
+
1576
+ if bump_schema_version:
1577
+ old_schema_version = self._tbl_md.current_schema_version
1578
+ assert self._version_md.schema_version == old_schema_version
1579
+ assert self._schema_version_md.schema_version == old_schema_version
1580
+ self._tbl_md.current_schema_version = new_version
1581
+ self._version_md.schema_version = new_version
1582
+ self._schema_version_md.preceding_schema_version = old_schema_version
1583
+ self._schema_version_md.schema_version = new_version
1584
+
1585
+ @property
1586
+ def preceding_schema_version(self) -> int | None:
1587
+ return self._schema_version_md.preceding_schema_version
1588
+
1589
+ @property
1590
+ def update_status(self) -> UpdateStatus | None:
1591
+ return self._version_md.update_status
1592
+
1593
+ @update_status.setter
1594
+ def update_status(self, status: UpdateStatus) -> None:
1595
+ assert self.effective_version is None
1596
+ self._version_md.update_status = status
1597
+
1598
+ @property
1599
+ def media_validation(self) -> MediaValidation:
1600
+ return MediaValidation[self._schema_version_md.media_validation.upper()]
1601
+
1602
+ def next_col_id(self) -> int:
1603
+ val = self._tbl_md.next_col_id
1604
+ self._tbl_md.next_col_id += 1
1605
+ return val
1606
+
1607
+ @property
1608
+ def next_idx_id(self) -> int:
1609
+ return self._tbl_md.next_idx_id
1610
+
1611
+ @next_idx_id.setter
1612
+ def next_idx_id(self, id: int) -> None:
1613
+ assert self.effective_version is None
1614
+ self._tbl_md.next_idx_id = id
1615
+
1616
+ @property
1617
+ def next_row_id(self) -> int:
1618
+ return self._tbl_md.next_row_id
1619
+
1620
+ @next_row_id.setter
1621
+ def next_row_id(self, id: int) -> None:
1622
+ assert self.effective_version is None
1623
+ self._tbl_md.next_row_id = id
1256
1624
 
1257
1625
  @property
1258
1626
  def is_snapshot(self) -> bool:
1259
1627
  return self.effective_version is not None
1260
1628
 
1629
+ @property
1630
+ def is_mutable(self) -> bool:
1631
+ return not self.is_snapshot and not self.is_replica
1632
+
1261
1633
  @property
1262
1634
  def is_view(self) -> bool:
1263
1635
  return self.view_md is not None
@@ -1270,9 +1642,10 @@ class TableVersion:
1270
1642
  def is_component_view(self) -> bool:
1271
1643
  return self.iterator_cls is not None
1272
1644
 
1645
+ @property
1273
1646
  def is_insertable(self) -> bool:
1274
1647
  """Returns True if this corresponds to an InsertableTable"""
1275
- return not self.is_snapshot and not self.is_view
1648
+ return self.is_mutable and not self.is_view
1276
1649
 
1277
1650
  def is_iterator_column(self, col: Column) -> bool:
1278
1651
  """Returns True if col is produced by an iterator"""
@@ -1283,6 +1656,10 @@ class TableVersion:
1283
1656
  """Return True if column was created by Pixeltable"""
1284
1657
  return col.name == _POS_COLUMN_NAME and self.is_component_view
1285
1658
 
1659
+ def iterator_columns(self) -> list[Column]:
1660
+ """Return all iterator-produced columns"""
1661
+ return self.cols[1 : self.num_iterator_cols + 1]
1662
+
1286
1663
  def user_columns(self) -> list[Column]:
1287
1664
  """Return all non-system columns"""
1288
1665
  return [c for c in self.cols if not self.is_system_column(c)]
@@ -1307,27 +1684,36 @@ class TableVersion:
1307
1684
  names = [c.name for c in self.cols_by_name.values() if c.is_computed]
1308
1685
  return names
1309
1686
 
1310
- def _record_refd_columns(self, col: Column) -> None:
1311
- """Update Column.dependent_cols for all cols referenced in col.value_expr."""
1312
- from pixeltable import exprs
1313
-
1314
- if col.value_expr_dict is not None:
1315
- # if we have a value_expr_dict, use that instead of instantiating the value_expr
1316
- refd_cols = exprs.Expr.get_refd_columns(col.value_expr_dict)
1317
- else:
1318
- refd_cols = [e.col for e in col.value_expr.subexprs(expr_class=exprs.ColumnRef)]
1319
- for refd_col in refd_cols:
1320
- refd_col.dependent_cols.add(col)
1321
-
1322
1687
  def get_idx_val_columns(self, cols: Iterable[Column]) -> set[Column]:
1323
- result = {info.val_col for col in cols for info in col.get_idx_info().values()}
1324
- return result
1688
+ # assumes that the indexed columns are all in this table
1689
+ assert all(col.get_tbl().id == self.id for col in cols)
1690
+ col_ids = {col.id for col in cols}
1691
+ return {info.val_col for info in self.idxs.values() if info.col.id in col_ids}
1692
+
1693
+ def get_idx(self, col: Column, idx_name: str | None, idx_cls: type[index.IndexBase]) -> TableVersion.IndexInfo:
1694
+ if not self.supports_idxs:
1695
+ raise excs.Error('Snapshot does not support indices')
1696
+ if col.qid not in self.idxs_by_col:
1697
+ raise excs.Error(f'Column {col.name!r} does not have a {idx_cls.display_name()} index')
1698
+ candidates = [info for info in self.idxs_by_col[col.qid] if isinstance(info.idx, idx_cls)]
1699
+ if len(candidates) == 0:
1700
+ raise excs.Error(f'No {idx_cls.display_name()} index found for column {col.name!r}')
1701
+ if len(candidates) > 1 and idx_name is None:
1702
+ raise excs.Error(
1703
+ f'Column {col.name!r} has multiple {idx_cls.display_name()} indices; specify `idx_name` instead'
1704
+ )
1705
+ if idx_name is not None and idx_name not in [info.name for info in candidates]:
1706
+ raise excs.Error(f'Index {idx_name!r} not found for column {col.name!r}')
1707
+ return candidates[0] if idx_name is None else next(info for info in candidates if info.name == idx_name)
1325
1708
 
1326
1709
  def get_dependent_columns(self, cols: Iterable[Column]) -> set[Column]:
1327
1710
  """
1328
1711
  Return the set of columns that transitively depend on any of the given ones.
1329
1712
  """
1330
- result = {dependent_col for col in cols for dependent_col in col.dependent_cols}
1713
+ from pixeltable.catalog import Catalog
1714
+
1715
+ cat = Catalog.get()
1716
+ result = set().union(*[cat.get_column_dependents(col.get_tbl().id, col.id) for col in cols])
1331
1717
  if len(result) > 0:
1332
1718
  result.update(self.get_dependent_columns(result))
1333
1719
  return result
@@ -1339,82 +1725,17 @@ class TableVersion:
1339
1725
  return 1
1340
1726
 
1341
1727
  @classmethod
1342
- def _create_column_md(cls, cols: list[Column]) -> dict[int, schema.ColumnMd]:
1343
- column_md: dict[int, schema.ColumnMd] = {}
1344
- for col in cols:
1345
- value_expr_dict = col.value_expr.as_dict() if col.value_expr is not None else None
1346
- assert col.is_pk is not None
1347
- column_md[col.id] = schema.ColumnMd(
1348
- id=col.id,
1349
- col_type=col.col_type.as_dict(),
1350
- is_pk=col.is_pk,
1351
- schema_version_add=col.schema_version_add,
1352
- schema_version_drop=col.schema_version_drop,
1353
- value_expr=value_expr_dict,
1354
- stored=col.stored,
1355
- )
1356
- return column_md
1357
-
1358
- @classmethod
1359
- def _create_stores_md(cls, stores: Iterable[pxt.io.ExternalStore]) -> list[dict[str, Any]]:
1728
+ def _create_stores_md(cls, stores: Iterable[ExternalStore]) -> list[dict[str, Any]]:
1360
1729
  return [
1361
1730
  {'class': f'{type(store).__module__}.{type(store).__qualname__}', 'md': store.as_dict()} for store in stores
1362
1731
  ]
1363
1732
 
1364
- def _create_tbl_md(self) -> schema.TableMd:
1365
- return schema.TableMd(
1366
- tbl_id=str(self.id),
1367
- name=self.name,
1368
- user=self.user,
1369
- is_replica=self.is_replica,
1370
- current_version=self.version,
1371
- current_schema_version=self.schema_version,
1372
- next_col_id=self.next_col_id,
1373
- next_idx_id=self.next_idx_id,
1374
- next_row_id=self.next_rowid,
1375
- column_md=self._create_column_md(self.cols),
1376
- index_md=self.idx_md,
1377
- external_stores=self._create_stores_md(self.external_stores.values()),
1378
- view_md=self.view_md,
1379
- additional_md={},
1380
- )
1381
-
1382
- def _create_version_md(self, timestamp: float) -> schema.TableVersionMd:
1383
- return schema.TableVersionMd(
1384
- tbl_id=str(self.id),
1385
- created_at=timestamp,
1386
- version=self.version,
1387
- schema_version=self.schema_version,
1388
- additional_md={},
1389
- )
1390
-
1391
- def _create_schema_version_md(self, preceding_schema_version: int) -> schema.TableSchemaVersionMd:
1392
- column_md: dict[int, schema.SchemaColumn] = {}
1393
- for pos, col in enumerate(self.cols_by_name.values()):
1394
- column_md[col.id] = schema.SchemaColumn(
1395
- pos=pos,
1396
- name=col.name,
1397
- media_validation=col._media_validation.name.lower() if col._media_validation is not None else None,
1398
- )
1399
- # preceding_schema_version to be set by the caller
1400
- return schema.TableSchemaVersionMd(
1401
- tbl_id=str(self.id),
1402
- schema_version=self.schema_version,
1403
- preceding_schema_version=preceding_schema_version,
1404
- columns=column_md,
1405
- num_retained_versions=self.num_retained_versions,
1406
- comment=self.comment,
1407
- media_validation=self.media_validation.name.lower(),
1408
- additional_md={},
1409
- )
1410
-
1411
1733
  def as_dict(self) -> dict:
1412
- return {'id': str(self.id), 'effective_version': self.effective_version}
1734
+ return self.key.as_dict()
1413
1735
 
1414
1736
  @classmethod
1415
1737
  def from_dict(cls, d: dict) -> TableVersion:
1416
- from pixeltable import catalog
1738
+ from pixeltable.catalog import Catalog
1417
1739
 
1418
- id = UUID(d['id'])
1419
- effective_version = d['effective_version']
1420
- return catalog.Catalog.get().get_tbl_version(id, effective_version)
1740
+ key = TableVersionKey.from_dict(d)
1741
+ return Catalog.get().get_tbl_version(key)