pixeltable 0.2.26__py3-none-any.whl → 0.5.7__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (245) hide show
  1. pixeltable/__init__.py +83 -19
  2. pixeltable/_query.py +1444 -0
  3. pixeltable/_version.py +1 -0
  4. pixeltable/catalog/__init__.py +7 -4
  5. pixeltable/catalog/catalog.py +2394 -119
  6. pixeltable/catalog/column.py +225 -104
  7. pixeltable/catalog/dir.py +38 -9
  8. pixeltable/catalog/globals.py +53 -34
  9. pixeltable/catalog/insertable_table.py +265 -115
  10. pixeltable/catalog/path.py +80 -17
  11. pixeltable/catalog/schema_object.py +28 -43
  12. pixeltable/catalog/table.py +1270 -677
  13. pixeltable/catalog/table_metadata.py +103 -0
  14. pixeltable/catalog/table_version.py +1270 -751
  15. pixeltable/catalog/table_version_handle.py +109 -0
  16. pixeltable/catalog/table_version_path.py +137 -42
  17. pixeltable/catalog/tbl_ops.py +53 -0
  18. pixeltable/catalog/update_status.py +191 -0
  19. pixeltable/catalog/view.py +251 -134
  20. pixeltable/config.py +215 -0
  21. pixeltable/env.py +736 -285
  22. pixeltable/exceptions.py +26 -2
  23. pixeltable/exec/__init__.py +7 -2
  24. pixeltable/exec/aggregation_node.py +39 -21
  25. pixeltable/exec/cache_prefetch_node.py +87 -109
  26. pixeltable/exec/cell_materialization_node.py +268 -0
  27. pixeltable/exec/cell_reconstruction_node.py +168 -0
  28. pixeltable/exec/component_iteration_node.py +25 -28
  29. pixeltable/exec/data_row_batch.py +11 -46
  30. pixeltable/exec/exec_context.py +26 -11
  31. pixeltable/exec/exec_node.py +35 -27
  32. pixeltable/exec/expr_eval/__init__.py +3 -0
  33. pixeltable/exec/expr_eval/evaluators.py +365 -0
  34. pixeltable/exec/expr_eval/expr_eval_node.py +413 -0
  35. pixeltable/exec/expr_eval/globals.py +200 -0
  36. pixeltable/exec/expr_eval/row_buffer.py +74 -0
  37. pixeltable/exec/expr_eval/schedulers.py +413 -0
  38. pixeltable/exec/globals.py +35 -0
  39. pixeltable/exec/in_memory_data_node.py +35 -27
  40. pixeltable/exec/object_store_save_node.py +293 -0
  41. pixeltable/exec/row_update_node.py +44 -29
  42. pixeltable/exec/sql_node.py +414 -115
  43. pixeltable/exprs/__init__.py +8 -5
  44. pixeltable/exprs/arithmetic_expr.py +79 -45
  45. pixeltable/exprs/array_slice.py +5 -5
  46. pixeltable/exprs/column_property_ref.py +40 -26
  47. pixeltable/exprs/column_ref.py +254 -61
  48. pixeltable/exprs/comparison.py +14 -9
  49. pixeltable/exprs/compound_predicate.py +9 -10
  50. pixeltable/exprs/data_row.py +213 -72
  51. pixeltable/exprs/expr.py +270 -104
  52. pixeltable/exprs/expr_dict.py +6 -5
  53. pixeltable/exprs/expr_set.py +20 -11
  54. pixeltable/exprs/function_call.py +383 -284
  55. pixeltable/exprs/globals.py +18 -5
  56. pixeltable/exprs/in_predicate.py +7 -7
  57. pixeltable/exprs/inline_expr.py +37 -37
  58. pixeltable/exprs/is_null.py +8 -4
  59. pixeltable/exprs/json_mapper.py +120 -54
  60. pixeltable/exprs/json_path.py +90 -60
  61. pixeltable/exprs/literal.py +61 -16
  62. pixeltable/exprs/method_ref.py +7 -6
  63. pixeltable/exprs/object_ref.py +19 -8
  64. pixeltable/exprs/row_builder.py +238 -75
  65. pixeltable/exprs/rowid_ref.py +53 -15
  66. pixeltable/exprs/similarity_expr.py +65 -50
  67. pixeltable/exprs/sql_element_cache.py +5 -5
  68. pixeltable/exprs/string_op.py +107 -0
  69. pixeltable/exprs/type_cast.py +25 -13
  70. pixeltable/exprs/variable.py +2 -2
  71. pixeltable/func/__init__.py +9 -5
  72. pixeltable/func/aggregate_function.py +197 -92
  73. pixeltable/func/callable_function.py +119 -35
  74. pixeltable/func/expr_template_function.py +101 -48
  75. pixeltable/func/function.py +375 -62
  76. pixeltable/func/function_registry.py +20 -19
  77. pixeltable/func/globals.py +6 -5
  78. pixeltable/func/mcp.py +74 -0
  79. pixeltable/func/query_template_function.py +151 -35
  80. pixeltable/func/signature.py +178 -49
  81. pixeltable/func/tools.py +164 -0
  82. pixeltable/func/udf.py +176 -53
  83. pixeltable/functions/__init__.py +44 -4
  84. pixeltable/functions/anthropic.py +226 -47
  85. pixeltable/functions/audio.py +148 -11
  86. pixeltable/functions/bedrock.py +137 -0
  87. pixeltable/functions/date.py +188 -0
  88. pixeltable/functions/deepseek.py +113 -0
  89. pixeltable/functions/document.py +81 -0
  90. pixeltable/functions/fal.py +76 -0
  91. pixeltable/functions/fireworks.py +72 -20
  92. pixeltable/functions/gemini.py +249 -0
  93. pixeltable/functions/globals.py +208 -53
  94. pixeltable/functions/groq.py +108 -0
  95. pixeltable/functions/huggingface.py +1088 -95
  96. pixeltable/functions/image.py +155 -84
  97. pixeltable/functions/json.py +8 -11
  98. pixeltable/functions/llama_cpp.py +31 -19
  99. pixeltable/functions/math.py +169 -0
  100. pixeltable/functions/mistralai.py +50 -75
  101. pixeltable/functions/net.py +70 -0
  102. pixeltable/functions/ollama.py +29 -36
  103. pixeltable/functions/openai.py +548 -160
  104. pixeltable/functions/openrouter.py +143 -0
  105. pixeltable/functions/replicate.py +15 -14
  106. pixeltable/functions/reve.py +250 -0
  107. pixeltable/functions/string.py +310 -85
  108. pixeltable/functions/timestamp.py +37 -19
  109. pixeltable/functions/together.py +77 -120
  110. pixeltable/functions/twelvelabs.py +188 -0
  111. pixeltable/functions/util.py +7 -2
  112. pixeltable/functions/uuid.py +30 -0
  113. pixeltable/functions/video.py +1528 -117
  114. pixeltable/functions/vision.py +26 -26
  115. pixeltable/functions/voyageai.py +289 -0
  116. pixeltable/functions/whisper.py +19 -10
  117. pixeltable/functions/whisperx.py +179 -0
  118. pixeltable/functions/yolox.py +112 -0
  119. pixeltable/globals.py +716 -236
  120. pixeltable/index/__init__.py +3 -1
  121. pixeltable/index/base.py +17 -21
  122. pixeltable/index/btree.py +32 -22
  123. pixeltable/index/embedding_index.py +155 -92
  124. pixeltable/io/__init__.py +12 -7
  125. pixeltable/io/datarows.py +140 -0
  126. pixeltable/io/external_store.py +83 -125
  127. pixeltable/io/fiftyone.py +24 -33
  128. pixeltable/io/globals.py +47 -182
  129. pixeltable/io/hf_datasets.py +96 -127
  130. pixeltable/io/label_studio.py +171 -156
  131. pixeltable/io/lancedb.py +3 -0
  132. pixeltable/io/pandas.py +136 -115
  133. pixeltable/io/parquet.py +40 -153
  134. pixeltable/io/table_data_conduit.py +702 -0
  135. pixeltable/io/utils.py +100 -0
  136. pixeltable/iterators/__init__.py +8 -4
  137. pixeltable/iterators/audio.py +207 -0
  138. pixeltable/iterators/base.py +9 -3
  139. pixeltable/iterators/document.py +144 -87
  140. pixeltable/iterators/image.py +17 -38
  141. pixeltable/iterators/string.py +15 -12
  142. pixeltable/iterators/video.py +523 -127
  143. pixeltable/metadata/__init__.py +33 -8
  144. pixeltable/metadata/converters/convert_10.py +2 -3
  145. pixeltable/metadata/converters/convert_13.py +2 -2
  146. pixeltable/metadata/converters/convert_15.py +15 -11
  147. pixeltable/metadata/converters/convert_16.py +4 -5
  148. pixeltable/metadata/converters/convert_17.py +4 -5
  149. pixeltable/metadata/converters/convert_18.py +4 -6
  150. pixeltable/metadata/converters/convert_19.py +6 -9
  151. pixeltable/metadata/converters/convert_20.py +3 -6
  152. pixeltable/metadata/converters/convert_21.py +6 -8
  153. pixeltable/metadata/converters/convert_22.py +3 -2
  154. pixeltable/metadata/converters/convert_23.py +33 -0
  155. pixeltable/metadata/converters/convert_24.py +55 -0
  156. pixeltable/metadata/converters/convert_25.py +19 -0
  157. pixeltable/metadata/converters/convert_26.py +23 -0
  158. pixeltable/metadata/converters/convert_27.py +29 -0
  159. pixeltable/metadata/converters/convert_28.py +13 -0
  160. pixeltable/metadata/converters/convert_29.py +110 -0
  161. pixeltable/metadata/converters/convert_30.py +63 -0
  162. pixeltable/metadata/converters/convert_31.py +11 -0
  163. pixeltable/metadata/converters/convert_32.py +15 -0
  164. pixeltable/metadata/converters/convert_33.py +17 -0
  165. pixeltable/metadata/converters/convert_34.py +21 -0
  166. pixeltable/metadata/converters/convert_35.py +9 -0
  167. pixeltable/metadata/converters/convert_36.py +38 -0
  168. pixeltable/metadata/converters/convert_37.py +15 -0
  169. pixeltable/metadata/converters/convert_38.py +39 -0
  170. pixeltable/metadata/converters/convert_39.py +124 -0
  171. pixeltable/metadata/converters/convert_40.py +73 -0
  172. pixeltable/metadata/converters/convert_41.py +12 -0
  173. pixeltable/metadata/converters/convert_42.py +9 -0
  174. pixeltable/metadata/converters/convert_43.py +44 -0
  175. pixeltable/metadata/converters/util.py +44 -18
  176. pixeltable/metadata/notes.py +21 -0
  177. pixeltable/metadata/schema.py +185 -42
  178. pixeltable/metadata/utils.py +74 -0
  179. pixeltable/mypy/__init__.py +3 -0
  180. pixeltable/mypy/mypy_plugin.py +123 -0
  181. pixeltable/plan.py +616 -225
  182. pixeltable/share/__init__.py +3 -0
  183. pixeltable/share/packager.py +797 -0
  184. pixeltable/share/protocol/__init__.py +33 -0
  185. pixeltable/share/protocol/common.py +165 -0
  186. pixeltable/share/protocol/operation_types.py +33 -0
  187. pixeltable/share/protocol/replica.py +119 -0
  188. pixeltable/share/publish.py +349 -0
  189. pixeltable/store.py +398 -232
  190. pixeltable/type_system.py +730 -267
  191. pixeltable/utils/__init__.py +40 -0
  192. pixeltable/utils/arrow.py +201 -29
  193. pixeltable/utils/av.py +298 -0
  194. pixeltable/utils/azure_store.py +346 -0
  195. pixeltable/utils/coco.py +26 -27
  196. pixeltable/utils/code.py +4 -4
  197. pixeltable/utils/console_output.py +46 -0
  198. pixeltable/utils/coroutine.py +24 -0
  199. pixeltable/utils/dbms.py +92 -0
  200. pixeltable/utils/description_helper.py +11 -12
  201. pixeltable/utils/documents.py +60 -61
  202. pixeltable/utils/exception_handler.py +36 -0
  203. pixeltable/utils/filecache.py +38 -22
  204. pixeltable/utils/formatter.py +88 -51
  205. pixeltable/utils/gcs_store.py +295 -0
  206. pixeltable/utils/http.py +133 -0
  207. pixeltable/utils/http_server.py +14 -13
  208. pixeltable/utils/iceberg.py +13 -0
  209. pixeltable/utils/image.py +17 -0
  210. pixeltable/utils/lancedb.py +90 -0
  211. pixeltable/utils/local_store.py +322 -0
  212. pixeltable/utils/misc.py +5 -0
  213. pixeltable/utils/object_stores.py +573 -0
  214. pixeltable/utils/pydantic.py +60 -0
  215. pixeltable/utils/pytorch.py +20 -20
  216. pixeltable/utils/s3_store.py +527 -0
  217. pixeltable/utils/sql.py +32 -5
  218. pixeltable/utils/system.py +30 -0
  219. pixeltable/utils/transactional_directory.py +4 -3
  220. pixeltable-0.5.7.dist-info/METADATA +579 -0
  221. pixeltable-0.5.7.dist-info/RECORD +227 -0
  222. {pixeltable-0.2.26.dist-info → pixeltable-0.5.7.dist-info}/WHEEL +1 -1
  223. pixeltable-0.5.7.dist-info/entry_points.txt +2 -0
  224. pixeltable/__version__.py +0 -3
  225. pixeltable/catalog/named_function.py +0 -36
  226. pixeltable/catalog/path_dict.py +0 -141
  227. pixeltable/dataframe.py +0 -894
  228. pixeltable/exec/expr_eval_node.py +0 -232
  229. pixeltable/ext/__init__.py +0 -14
  230. pixeltable/ext/functions/__init__.py +0 -8
  231. pixeltable/ext/functions/whisperx.py +0 -77
  232. pixeltable/ext/functions/yolox.py +0 -157
  233. pixeltable/tool/create_test_db_dump.py +0 -311
  234. pixeltable/tool/create_test_video.py +0 -81
  235. pixeltable/tool/doc_plugins/griffe.py +0 -50
  236. pixeltable/tool/doc_plugins/mkdocstrings.py +0 -6
  237. pixeltable/tool/doc_plugins/templates/material/udf.html.jinja +0 -135
  238. pixeltable/tool/embed_udf.py +0 -9
  239. pixeltable/tool/mypy_plugin.py +0 -55
  240. pixeltable/utils/media_store.py +0 -76
  241. pixeltable/utils/s3.py +0 -16
  242. pixeltable-0.2.26.dist-info/METADATA +0 -400
  243. pixeltable-0.2.26.dist-info/RECORD +0 -156
  244. pixeltable-0.2.26.dist-info/entry_points.txt +0 -3
  245. {pixeltable-0.2.26.dist-info → pixeltable-0.5.7.dist-info/licenses}/LICENSE +0 -0
@@ -1,39 +1,105 @@
1
1
  from __future__ import annotations
2
2
 
3
+ import copy
3
4
  import dataclasses
4
5
  import importlib
5
- import inspect
6
+ import itertools
6
7
  import logging
7
8
  import time
8
9
  import uuid
9
- from typing import TYPE_CHECKING, Any, Iterable, Iterator, Literal, Optional
10
+ from typing import TYPE_CHECKING, Any, Callable, Iterable, Iterator, Literal
10
11
  from uuid import UUID
11
12
 
13
+ import jsonschema.exceptions
12
14
  import sqlalchemy as sql
13
- import sqlalchemy.orm as orm
15
+ from sqlalchemy import exc as sql_exc
14
16
 
15
- import pixeltable as pxt
16
17
  import pixeltable.exceptions as excs
17
18
  import pixeltable.exprs as exprs
18
- import pixeltable.func as func
19
19
  import pixeltable.index as index
20
20
  import pixeltable.type_system as ts
21
21
  from pixeltable.env import Env
22
22
  from pixeltable.iterators import ComponentIterator
23
23
  from pixeltable.metadata import schema
24
24
  from pixeltable.utils.filecache import FileCache
25
- from pixeltable.utils.media_store import MediaStore
25
+ from pixeltable.utils.object_stores import ObjectOps
26
26
 
27
27
  from ..func.globals import resolve_symbol
28
28
  from .column import Column
29
- from .globals import _POS_COLUMN_NAME, _ROWID_COLUMN_NAME, UpdateStatus, is_valid_identifier, MediaValidation
29
+ from .globals import _POS_COLUMN_NAME, _ROWID_COLUMN_NAME, MediaValidation, QColumnId, is_valid_identifier
30
+ from .tbl_ops import DeleteTableMdOp, DeleteTableMediaFilesOp, DropStoreTableOp, TableOp
31
+ from .update_status import RowCountStats, UpdateStatus
30
32
 
31
33
  if TYPE_CHECKING:
32
34
  from pixeltable import exec, store
35
+ from pixeltable._query import Query
36
+ from pixeltable.catalog.table_version_handle import TableVersionHandle
37
+ from pixeltable.io import ExternalStore
38
+ from pixeltable.plan import SampleClause
39
+
40
+ from .table_version_path import TableVersionPath
33
41
 
34
42
  _logger = logging.getLogger('pixeltable')
35
43
 
36
44
 
45
+ @dataclasses.dataclass(frozen=True)
46
+ class TableVersionMd:
47
+ """
48
+ Complete set of md records for a specific TableVersion instance.
49
+ """
50
+
51
+ tbl_md: schema.TableMd
52
+ version_md: schema.VersionMd
53
+ schema_version_md: schema.SchemaVersionMd
54
+
55
+ @property
56
+ def is_pure_snapshot(self) -> bool:
57
+ return (
58
+ self.tbl_md.view_md is not None
59
+ and self.tbl_md.view_md.is_snapshot
60
+ and self.tbl_md.view_md.predicate is None
61
+ and self.tbl_md.view_md.sample_clause is None
62
+ and len(self.schema_version_md.columns) == 0
63
+ )
64
+
65
+ def as_dict(self) -> dict:
66
+ from .catalog import md_dict_factory
67
+
68
+ return dataclasses.asdict(self, dict_factory=md_dict_factory)
69
+
70
+ @classmethod
71
+ def from_dict(cls, data: dict[str, Any]) -> TableVersionMd:
72
+ return schema.md_from_dict(cls, data)
73
+
74
+
75
+ @dataclasses.dataclass(frozen=True, slots=True)
76
+ class TableVersionKey:
77
+ tbl_id: UUID
78
+ effective_version: int | None
79
+ anchor_tbl_id: UUID | None
80
+
81
+ def __post_init__(self) -> None:
82
+ assert self.effective_version is None or self.anchor_tbl_id is None
83
+
84
+ # Allow unpacking as a tuple
85
+ def __iter__(self) -> Iterator[Any]:
86
+ return iter((self.tbl_id, self.effective_version, self.anchor_tbl_id))
87
+
88
+ def as_dict(self) -> dict:
89
+ return {
90
+ 'id': str(self.tbl_id),
91
+ 'effective_version': self.effective_version,
92
+ 'anchor_tbl_id': str(self.anchor_tbl_id) if self.anchor_tbl_id is not None else None,
93
+ }
94
+
95
+ @classmethod
96
+ def from_dict(cls, d: dict) -> TableVersionKey:
97
+ tbl_id = UUID(d['id'])
98
+ effective_version = d['effective_version']
99
+ anchor_tbl_id = d.get('anchor_tbl_id')
100
+ return cls(tbl_id, effective_version, UUID(anchor_tbl_id) if anchor_tbl_id is not None else None)
101
+
102
+
37
103
  class TableVersion:
38
104
  """
39
105
  TableVersion represents a particular version of a table/view along with its physical representation:
@@ -47,42 +113,71 @@ class TableVersion:
47
113
  * TODO: create a separate hierarchy of objects that records the version-independent tree of tables/views, and
48
114
  have TableVersions reference those
49
115
  - mutable TableVersions record their TableVersionPath, which is needed for expr evaluation in updates
116
+
117
+ Instances of TableVersion should not be stored as member variables (ie, used across transaction boundaries).
118
+ Use a TableVersionHandle instead.
119
+
120
+ Only TableVersion and Catalog interact directly with stored metadata. Everything else needs to go through these
121
+ two classes.
122
+
123
+ TableVersions come in three "flavors" depending on the `effective_version` and `anchor_tbl_id` settings:
124
+ - if both are None, it's a live table that tracks `tbl_md.current_version`
125
+ - if `effective_version` is defined, it's a snapshot of the specific version given by `effective_version`
126
+ - if `anchor_tbl_id` is defined, it's a replica table that is "anchored" to the given table, in the following
127
+ sense: if n is the latest non-fragment version of `anchor_tbl_id`, then the tracked version is m, where m
128
+ is the latest version of `tbl_id` (possibly a fragment) with created_at(m) <= created_at(n).
129
+ In the typical case, `anchor_tbl_id` is a descendant of `tbl_id` and the anchored TableVersion instance
130
+ appears along the TableVersionPath for `anchor_tbl_id`.
131
+ In the TableVersionPath for a replica, all path elements will have the same anchor_tbl_id, the tbl_id
132
+ of the primary (leaf) table. (It is also possible for one or more path elements at the base to be snapshots.)
133
+ At most one of `effective_version` and `anchor_tbl_id` can be specified.
50
134
  """
51
135
 
52
- id: UUID
53
- name: str
54
- version: int
55
- comment: str
56
- media_validation: MediaValidation
57
- num_retained_versions: int
58
- schema_version: int
59
- view_md: Optional[schema.ViewMd]
60
- is_snapshot: bool
61
- effective_version: Optional[int]
62
- path: Optional[pxt.catalog.TableVersionPath]
63
- base: Optional[TableVersion]
64
- next_col_id: int
65
- next_idx_id: int
66
- next_rowid: int
67
- predicate: Optional[exprs.Expr]
68
- mutable_views: list[TableVersion]
69
- iterator_cls: Optional[type[ComponentIterator]]
70
- iterator_args: Optional[exprs.InlineDict]
136
+ key: TableVersionKey
137
+
138
+ # record metadata stored in catalog
139
+ _tbl_md: schema.TableMd
140
+ _version_md: schema.VersionMd
141
+ _schema_version_md: schema.SchemaVersionMd
142
+
143
+ path: 'TableVersionPath' | None # only set for non-snapshots; needed to resolve computed cols
144
+ base: TableVersionHandle | None # only set for views
145
+ predicate: exprs.Expr | None
146
+ sample_clause: 'SampleClause' | None
147
+
148
+ iterator_cls: type[ComponentIterator] | None
149
+ iterator_args: exprs.InlineDict | None
71
150
  num_iterator_cols: int
72
151
 
152
+ # target for data operation propagation (only set for non-snapshots, and only records non-snapshot views)
153
+ mutable_views: frozenset[TableVersionHandle]
154
+
73
155
  # contains complete history of columns, incl dropped ones
74
156
  cols: list[Column]
75
157
  # contains only user-facing (named) columns visible in this version
76
158
  cols_by_name: dict[str, Column]
77
159
  # contains only columns visible in this version, both system and user
78
160
  cols_by_id: dict[int, Column]
79
- # needed for _create_tbl_md()
80
- idx_md: dict[int, schema.IndexMd]
81
- # contains only actively maintained indices
161
+
162
+ # True if this TableVersion instance can have indices:
163
+ # - live version of a mutable table
164
+ # - the most recent version of a replica
165
+ supports_idxs: bool
166
+
167
+ # only populated with indices visible in this TableVersion instance
168
+ idxs: dict[int, TableVersion.IndexInfo] # key: index id
82
169
  idxs_by_name: dict[str, TableVersion.IndexInfo]
170
+ idxs_by_col: dict[QColumnId, list[TableVersion.IndexInfo]]
171
+
172
+ external_stores: dict[str, ExternalStore]
173
+ store_tbl: 'store.StoreBase' | None
83
174
 
84
- external_stores: dict[str, pxt.io.ExternalStore]
85
- store_tbl: 'store.StoreBase'
175
+ is_initialized: bool # True if init() has been called
176
+
177
+ # used by Catalog to invalidate cached instances at the end of a transaction;
178
+ # True if this instance reflects the state of stored metadata in the context of this transaction and
179
+ # it is the instance cached in Catalog
180
+ is_validated: bool
86
181
 
87
182
  @dataclasses.dataclass
88
183
  class IndexInfo:
@@ -93,58 +188,54 @@ class TableVersion:
93
188
  val_col: Column
94
189
  undo_col: Column
95
190
 
96
-
97
191
  def __init__(
98
- self, id: UUID, tbl_md: schema.TableMd, version: int, schema_version_md: schema.TableSchemaVersionMd,
99
- base: Optional[TableVersion] = None, base_path: Optional[pxt.catalog.TableVersionPath] = None,
100
- is_snapshot: Optional[bool] = None
192
+ self,
193
+ key: TableVersionKey,
194
+ tbl_md: schema.TableMd,
195
+ version_md: schema.VersionMd,
196
+ schema_version_md: schema.SchemaVersionMd,
197
+ mutable_views: list[TableVersionHandle],
198
+ base_path: 'TableVersionPath' | None = None,
199
+ base: TableVersionHandle | None = None,
101
200
  ):
102
- # only one of base and base_path can be non-None
103
- assert base is None or base_path is None
104
- self.id = id
105
- self.name = tbl_md.name
106
- self.version = version
107
- self.comment = schema_version_md.comment
108
- self.num_retained_versions = schema_version_md.num_retained_versions
109
- self.schema_version = schema_version_md.schema_version
110
- self.view_md = tbl_md.view_md # save this as-is, it's needed for _create_md()
111
- is_view = tbl_md.view_md is not None
112
- self.is_snapshot = (is_view and tbl_md.view_md.is_snapshot) or bool(is_snapshot)
113
- self.media_validation = MediaValidation[schema_version_md.media_validation.upper()]
114
- # a mutable TableVersion doesn't have a static version
115
- self.effective_version = self.version if self.is_snapshot else None
201
+ assert key.anchor_tbl_id is None or isinstance(key.anchor_tbl_id, UUID)
202
+
203
+ self.is_validated = True # a freshly constructed instance is always valid
204
+ self.is_initialized = False
205
+ self.key = key
206
+ self._tbl_md = copy.deepcopy(tbl_md)
207
+ self._version_md = copy.deepcopy(version_md)
208
+ self._schema_version_md = copy.deepcopy(schema_version_md)
209
+ assert not (self.is_view and base is None)
210
+ self.base = base
211
+ self.store_tbl = None
116
212
 
117
213
  # mutable tables need their TableVersionPath for expr eval during updates
214
+ from .table_version_handle import TableVersionHandle
118
215
  from .table_version_path import TableVersionPath
119
- if self.is_snapshot:
120
- self.path = None
121
- else:
122
- self.path = TableVersionPath(self, base=base_path) if base_path is not None else TableVersionPath(self)
123
216
 
124
- self.base = base_path.tbl_version if base_path is not None else base
125
217
  if self.is_snapshot:
126
- self.next_col_id = -1
127
- self.next_idx_id = -1 # TODO: can snapshots have separate indices?
128
- self.next_rowid = -1
218
+ self.path = None
129
219
  else:
130
- assert tbl_md.current_version == self.version
131
- self.next_col_id = tbl_md.next_col_id
132
- self.next_idx_id = tbl_md.next_idx_id
133
- self.next_rowid = tbl_md.next_row_id
220
+ self_handle = TableVersionHandle(key)
221
+ if self.is_view:
222
+ assert base_path is not None
223
+ self.path = TableVersionPath(self_handle, base=base_path)
134
224
 
135
225
  # view-specific initialization
136
226
  from pixeltable import exprs
137
- predicate_dict = None if not is_view or tbl_md.view_md.predicate is None else tbl_md.view_md.predicate
227
+ from pixeltable.plan import SampleClause
228
+
229
+ predicate_dict = None if self.view_md is None or self.view_md.predicate is None else self.view_md.predicate
138
230
  self.predicate = exprs.Expr.from_dict(predicate_dict) if predicate_dict is not None else None
139
- self.mutable_views = [] # targets for update propagation
140
- if self.base is not None and not self.base.is_snapshot and not self.is_snapshot:
141
- self.base.mutable_views.append(self)
231
+ sample_dict = None if self.view_md is None or self.view_md.sample_clause is None else self.view_md.sample_clause
232
+ self.sample_clause = SampleClause.from_dict(sample_dict) if sample_dict is not None else None
142
233
 
143
234
  # component view-specific initialization
144
235
  self.iterator_cls = None
145
236
  self.iterator_args = None
146
237
  self.num_iterator_cols = 0
147
- if is_view and tbl_md.view_md.iterator_class_fqn is not None:
238
+ if self.view_md is not None and self.view_md.iterator_class_fqn is not None:
148
239
  module_name, class_name = tbl_md.view_md.iterator_class_fqn.rsplit('.', 1)
149
240
  module = importlib.import_module(module_name)
150
241
  self.iterator_cls = getattr(module, class_name)
@@ -153,614 +244,837 @@ class TableVersion:
153
244
  self.num_iterator_cols = len(output_schema)
154
245
  assert tbl_md.view_md.iterator_args is not None
155
246
 
156
- # register this table version now so that it's available when we're re-creating value exprs
157
- cat = pxt.catalog.Catalog.get()
158
- cat.tbl_versions[(self.id, self.effective_version)] = self
247
+ self.mutable_views = frozenset(mutable_views)
248
+ assert self.is_mutable or len(self.mutable_views) == 0
159
249
 
160
- # init schema after we determined whether we're a component view, and before we create the store table
161
250
  self.cols = []
162
251
  self.cols_by_name = {}
163
252
  self.cols_by_id = {}
164
- self.idx_md = tbl_md.index_md
253
+ self.idxs = {}
165
254
  self.idxs_by_name = {}
255
+ self.idxs_by_col = {}
256
+ self.supports_idxs = self.effective_version is None or (
257
+ self.is_replica and self.effective_version == self.tbl_md.current_version
258
+ )
166
259
  self.external_stores = {}
167
260
 
168
- self._init_schema(tbl_md, schema_version_md)
169
-
170
- # Init external stores (this needs to happen after the schema is created)
171
- self._init_external_stores(tbl_md)
172
-
173
261
  def __hash__(self) -> int:
174
262
  return hash(self.id)
175
263
 
176
- def create_snapshot_copy(self) -> TableVersion:
177
- """Create a snapshot copy of this TableVersion"""
178
- assert not self.is_snapshot
179
- return TableVersion(
180
- self.id, self._create_tbl_md(), self.version,
181
- self._create_schema_version_md(preceding_schema_version=0), # preceding_schema_version: dummy value
182
- is_snapshot=True, base=self.base)
264
+ @property
265
+ def versioned_name(self) -> str:
266
+ if self.effective_version is None:
267
+ return self.name
268
+ else:
269
+ return f'{self.name}:{self.effective_version}'
270
+
271
+ def __repr__(self) -> str:
272
+ return (
273
+ f'TableVersion(id={self.id!r}, name={self.name!r}, effective_version={self.effective_version}, '
274
+ f'anchor_tbl_id={self.anchor_tbl_id}; version={self.version})'
275
+ )
276
+
277
+ @property
278
+ def handle(self) -> 'TableVersionHandle':
279
+ from .table_version_handle import TableVersionHandle
280
+
281
+ return TableVersionHandle(self.key, tbl_version=self)
183
282
 
184
283
  @classmethod
185
- def create(
186
- cls, session: orm.Session, dir_id: UUID, name: str, cols: list[Column], num_retained_versions: int,
187
- comment: str, media_validation: MediaValidation, base_path: Optional[pxt.catalog.TableVersionPath] = None,
188
- view_md: Optional[schema.ViewMd] = None
189
- ) -> tuple[UUID, Optional[TableVersion]]:
190
- # assign ids
191
- cols_by_name: dict[str, Column] = {}
284
+ def create_initial_md(
285
+ cls,
286
+ name: str,
287
+ cols: list[Column],
288
+ num_retained_versions: int,
289
+ comment: str,
290
+ media_validation: MediaValidation,
291
+ create_default_idxs: bool,
292
+ view_md: schema.ViewMd | None = None,
293
+ ) -> TableVersionMd:
294
+ from .table_version_handle import TableVersionHandle
295
+
296
+ user = Env.get().user
297
+ timestamp = time.time()
298
+
299
+ tbl_id = uuid.uuid4()
300
+ tbl_id_str = str(tbl_id)
301
+ tbl_handle = TableVersionHandle(TableVersionKey(tbl_id, None, None))
302
+ column_ids = itertools.count()
303
+ index_ids = itertools.count()
304
+
305
+ # assign ids, create metadata
306
+ column_md: dict[int, schema.ColumnMd] = {}
307
+ schema_col_md: dict[int, schema.SchemaColumn] = {}
192
308
  for pos, col in enumerate(cols):
193
- col.id = pos
309
+ col.tbl_handle = tbl_handle
310
+ col.id = next(column_ids)
194
311
  col.schema_version_add = 0
195
- cols_by_name[col.name] = col
196
312
  if col.is_computed:
197
313
  col.check_value_expr()
314
+ col_md, sch_md = col.to_md(pos)
315
+ assert sch_md is not None
316
+ column_md[col.id] = col_md
317
+ schema_col_md[col.id] = sch_md
318
+
319
+ index_md: dict[int, schema.IndexMd] = {}
320
+ if create_default_idxs and (view_md is None or not view_md.is_snapshot):
321
+ index_cols: list[Column] = []
322
+ for col in (c for c in cols if cls._is_btree_indexable(c)):
323
+ idx = index.BtreeIndex()
324
+ val_col, undo_col = cls._create_index_columns(col, idx, 0, tbl_handle, id_cb=lambda: next(column_ids))
325
+ index_cols.extend([val_col, undo_col])
326
+
327
+ idx_id = next(index_ids)
328
+ idx_cls = type(idx)
329
+ md = schema.IndexMd(
330
+ id=idx_id,
331
+ name=f'idx{idx_id}',
332
+ indexed_col_id=col.id,
333
+ indexed_col_tbl_id=tbl_id_str,
334
+ index_val_col_id=val_col.id,
335
+ index_val_undo_col_id=undo_col.id,
336
+ schema_version_add=0,
337
+ schema_version_drop=None,
338
+ class_fqn=idx_cls.__module__ + '.' + idx_cls.__name__,
339
+ init_args=idx.as_dict(),
340
+ )
341
+ index_md[idx_id] = md
342
+
343
+ for col in index_cols:
344
+ col_md, _ = col.to_md()
345
+ column_md[col.id] = col_md
346
+
347
+ assert all(column_md[id].id == id for id in column_md)
348
+ assert all(index_md[id].id == id for id in index_md)
349
+
350
+ cols.extend(index_cols)
351
+
352
+ tbl_md = schema.TableMd(
353
+ tbl_id=tbl_id_str,
354
+ name=name,
355
+ user=user,
356
+ is_replica=False,
357
+ current_version=0,
358
+ current_schema_version=0,
359
+ next_col_id=next(column_ids),
360
+ next_idx_id=next(index_ids),
361
+ next_row_id=0,
362
+ view_sn=0,
363
+ column_md=column_md,
364
+ index_md=index_md,
365
+ external_stores=[],
366
+ view_md=view_md,
367
+ additional_md={},
368
+ )
198
369
 
199
- timestamp = time.time()
200
- # create schema.Table
201
- # Column.dependent_cols for existing cols is wrong at this point, but init() will set it correctly
202
- column_md = cls._create_column_md(cols)
203
- table_md = schema.TableMd(
204
- name=name, current_version=0, current_schema_version=0, next_col_id=len(cols),
205
- next_idx_id=0, next_row_id=0, column_md=column_md, index_md={}, external_stores=[], view_md=view_md)
206
- # create a schema.Table here, we need it to call our c'tor;
207
- # don't add it to the session yet, we might add index metadata
208
- tbl_id = uuid.uuid4()
209
- tbl_record = schema.Table(id=tbl_id, dir_id=dir_id, md=dataclasses.asdict(table_md))
370
+ table_version_md = schema.VersionMd(
371
+ tbl_id=tbl_id_str,
372
+ created_at=timestamp,
373
+ version=0,
374
+ schema_version=0,
375
+ user=user,
376
+ update_status=None,
377
+ additional_md={},
378
+ )
210
379
 
211
- # create schema.TableVersion
212
- table_version_md = schema.TableVersionMd(created_at=timestamp, version=0, schema_version=0)
213
- tbl_version_record = schema.TableVersion(
214
- tbl_id=tbl_record.id, version=0, md=dataclasses.asdict(table_version_md))
380
+ schema_version_md = schema.SchemaVersionMd(
381
+ tbl_id=tbl_id_str,
382
+ schema_version=0,
383
+ preceding_schema_version=None,
384
+ columns=schema_col_md,
385
+ num_retained_versions=num_retained_versions,
386
+ comment=comment,
387
+ media_validation=media_validation.name.lower(),
388
+ additional_md={},
389
+ )
390
+ return TableVersionMd(tbl_md, table_version_md, schema_version_md)
215
391
 
216
- # create schema.TableSchemaVersion
217
- schema_col_md: dict[int, schema.SchemaColumn] = {}
218
- for pos, col in enumerate(cols):
219
- md = schema.SchemaColumn(
220
- pos=pos, name=col.name,
221
- media_validation=col._media_validation.name.lower() if col._media_validation is not None else None)
222
- schema_col_md[col.id] = md
223
-
224
- schema_version_md = schema.TableSchemaVersionMd(
225
- schema_version=0, preceding_schema_version=None, columns=schema_col_md,
226
- num_retained_versions=num_retained_versions, comment=comment,
227
- media_validation=media_validation.name.lower())
228
- schema_version_record = schema.TableSchemaVersion(
229
- tbl_id=tbl_record.id, schema_version=0, md=dataclasses.asdict(schema_version_md))
230
-
231
- # if this is purely a snapshot (it doesn't require any additional storage for columns and it # doesn't have a
232
- # predicate to apply at runtime), we don't create a physical table and simply use the base's table version path
233
- if view_md is not None and view_md.is_snapshot and view_md.predicate is None and len(cols) == 0:
234
- session.add(tbl_record)
235
- session.add(tbl_version_record)
236
- session.add(schema_version_record)
237
- return tbl_record.id, None
238
-
239
- assert (base_path is not None) == (view_md is not None)
240
- base = base_path.tbl_version if base_path is not None and view_md.is_snapshot else None
241
- base_path = base_path if base_path is not None and not view_md.is_snapshot else None
242
- tbl_version = cls(tbl_record.id, table_md, 0, schema_version_md, base=base, base_path=base_path)
243
-
244
- conn = session.connection()
245
- tbl_version.store_tbl.create(conn)
246
- if view_md is None or not view_md.is_snapshot:
247
- # add default indices, after creating the store table
248
- for col in tbl_version.cols_by_name.values():
249
- status = tbl_version._add_default_index(col, conn=conn)
250
- assert status is None or status.num_excs == 0
251
-
252
- # we re-create the tbl_record here, now that we have new index metadata
253
- tbl_record = schema.Table(id=tbl_id, dir_id=dir_id, md=dataclasses.asdict(tbl_version._create_tbl_md()))
254
- session.add(tbl_record)
255
- session.add(tbl_version_record)
256
- session.add(schema_version_record)
257
- return tbl_record.id, tbl_version
392
+ def exec_op(self, op: TableOp) -> None:
393
+ from pixeltable.store import StoreBase
258
394
 
259
- @classmethod
260
- def delete_md(cls, tbl_id: UUID, conn: sql.Connection) -> None:
261
- conn.execute(
262
- sql.delete(schema.TableSchemaVersion.__table__).where(schema.TableSchemaVersion.tbl_id == tbl_id))
263
- conn.execute(
264
- sql.delete(schema.TableVersion.__table__).where(schema.TableVersion.tbl_id == tbl_id))
265
- conn.execute(sql.delete(schema.Table.__table__).where(schema.Table.id == tbl_id))
395
+ assert op.delete_table_md_op is None # that needs to get handled by Catalog
396
+
397
+ if op.create_store_table_op is not None:
398
+ # this needs to be called outside of a transaction
399
+ self.store_tbl.create()
266
400
 
267
- def drop(self) -> None:
268
- with Env.get().engine.begin() as conn:
269
- # delete this table and all associated data
270
- MediaStore.delete(self.id)
401
+ elif op.create_index_op is not None:
402
+ idx_info = self.idxs[op.create_index_op.idx_id]
403
+ with Env.get().begin_xact():
404
+ self.store_tbl.create_index(idx_info.id)
405
+
406
+ elif op.load_view_op is not None:
407
+ from pixeltable.catalog import Catalog
408
+ from pixeltable.plan import Planner
409
+
410
+ from .table_version_path import TableVersionPath
411
+
412
+ # clear out any remaining media files from an aborted previous attempt
413
+ self.delete_media()
414
+ view_path = TableVersionPath.from_dict(op.load_view_op.view_path)
415
+ plan, _ = Planner.create_view_load_plan(view_path)
416
+ _, row_counts = self.store_tbl.insert_rows(plan, v_min=self.version)
417
+ status = UpdateStatus(row_count_stats=row_counts)
418
+ Catalog.get().store_update_status(self.id, self.version, status)
419
+ _logger.debug(f'Loaded view {self.name} with {row_counts.num_rows} rows')
420
+
421
+ elif op.drop_store_table_op is not None:
422
+ # don't reference self.store_tbl here, it needs to reference the metadata for our base table, which at
423
+ # this point may not exist anymore
424
+ with Env.get().begin_xact() as conn:
425
+ drop_stmt = f'DROP TABLE IF EXISTS {StoreBase.storage_name(self.id, self.is_view)}'
426
+ conn.execute(sql.text(drop_stmt))
427
+
428
+ elif op.delete_table_media_files_op:
429
+ self.delete_media()
271
430
  FileCache.get().clear(tbl_id=self.id)
272
- self.delete_md(self.id, conn)
273
- self.store_tbl.drop(conn)
274
431
 
275
- # de-register table version from catalog
432
+ @classmethod
433
+ def create_replica(cls, md: TableVersionMd, create_store_tbl: bool = True) -> TableVersion:
434
+ from .catalog import Catalog, TableVersionPath
435
+
436
+ assert Env.get().in_xact
437
+ assert md.tbl_md.is_replica
438
+ tbl_id = UUID(md.tbl_md.tbl_id)
439
+ _logger.info(f'Creating replica table version {tbl_id}:{md.version_md.version}.')
440
+ view_md = md.tbl_md.view_md
441
+ base_path = TableVersionPath.from_md(view_md.base_versions) if view_md is not None else None
442
+ base = base_path.tbl_version if base_path is not None else None
443
+ key = TableVersionKey(tbl_id, md.version_md.version, None)
444
+ tbl_version = cls(key, md.tbl_md, md.version_md, md.schema_version_md, [], base_path=base_path, base=base)
445
+ cat = Catalog.get()
446
+ # We're creating a new TableVersion replica, so we should never have seen this particular
447
+ # TableVersion instance before.
448
+ # Actually this isn't true, because we might be re-creating a dropped replica.
449
+ # TODO: Understand why old TableVersions are kept around even for a dropped table.
450
+ # assert tbl_version.effective_version is not None
451
+ # assert (tbl_version.id, tbl_version.effective_version, None) not in cat._tbl_versions
452
+ cat._tbl_versions[key] = tbl_version
453
+ tbl_version.init()
454
+ if create_store_tbl:
455
+ tbl_version.store_tbl.create()
456
+ return tbl_version
457
+
458
+ def delete_media(self, tbl_version: int | None = None) -> None:
459
+ # Assemble a set of column destinations and delete objects from all of them
460
+ # None is a valid column destination which refers to the default object location
461
+ destinations = {col.destination for col in self.cols if col.is_stored}
462
+ for dest in destinations:
463
+ ObjectOps.delete(dest, self.id, tbl_version=tbl_version)
464
+
465
+ def drop(self) -> list[TableOp]:
466
+ id_str = str(self.id)
467
+ ops = [
468
+ TableOp(
469
+ tbl_id=id_str,
470
+ op_sn=0,
471
+ num_ops=3,
472
+ needs_xact=False,
473
+ delete_table_media_files_op=DeleteTableMediaFilesOp(),
474
+ ),
475
+ TableOp(tbl_id=id_str, op_sn=1, num_ops=3, needs_xact=False, drop_store_table_op=DropStoreTableOp()),
476
+ TableOp(tbl_id=id_str, op_sn=2, num_ops=3, needs_xact=True, delete_table_md_op=DeleteTableMdOp()),
477
+ ]
478
+ return ops
479
+
480
+ def init(self) -> None:
481
+ """
482
+ Initialize schema-related in-memory metadata separately, now that this TableVersion instance is visible
483
+ in Catalog.
484
+ """
276
485
  from .catalog import Catalog
486
+
277
487
  cat = Catalog.get()
278
- del cat.tbl_versions[(self.id, self.effective_version)]
279
- # TODO: remove from tbl_dependents
280
-
281
- def _init_schema(self, tbl_md: schema.TableMd, schema_version_md: schema.TableSchemaVersionMd) -> None:
282
- # create columns first, so the indices can reference them
283
- self._init_cols(tbl_md, schema_version_md)
284
- self._init_idxs(tbl_md)
285
- # create the sa schema only after creating the columns and indices
286
- self._init_sa_schema()
287
-
288
- def _init_cols(self, tbl_md: schema.TableMd, schema_version_md: schema.TableSchemaVersionMd) -> None:
289
- """Initialize self.cols with the columns visible in our effective version"""
488
+ assert self.key in cat._tbl_versions
489
+ self._init_schema()
490
+ if self.is_mutable:
491
+ cat.record_column_dependencies(self)
492
+ # init external stores; this needs to happen after the schema is created
493
+ self._init_external_stores()
494
+
495
+ self.is_initialized = True
496
+
497
+ def _init_schema(self) -> None:
498
+ from pixeltable.store import StoreComponentView, StoreTable, StoreView
499
+
500
+ from .catalog import Catalog
501
+
502
+ # initialize IndexBase instances and collect sa_col_types
503
+ idxs: dict[int, index.IndexBase] = {}
504
+ val_col_idxs: dict[int, index.IndexBase] = {} # key: id of value column
505
+ undo_col_idxs: dict[int, index.IndexBase] = {} # key: id of undo column
506
+ for md in self.tbl_md.index_md.values():
507
+ cls_name = md.class_fqn.rsplit('.', 1)[-1]
508
+ cls = getattr(index, cls_name)
509
+ idx = cls.from_dict(md.init_args)
510
+ idxs[md.id] = idx
511
+ val_col_idxs[md.index_val_col_id] = idx
512
+ undo_col_idxs[md.index_val_undo_col_id] = idx
513
+
514
+ # initialize Columns
290
515
  self.cols = []
291
516
  self.cols_by_name = {}
292
517
  self.cols_by_id = {}
293
- for col_md in tbl_md.column_md.values():
294
- schema_col_md = schema_version_md.columns[col_md.id] if col_md.id in schema_version_md.columns else None
295
- col_name = schema_col_md.name if schema_col_md is not None else None
518
+ # Sort columns in column_md by the position specified in col_md.id to guarantee that all references
519
+ # point backward.
520
+ sorted_column_md = sorted(self.tbl_md.column_md.values(), key=lambda item: item.id)
521
+ for col_md in sorted_column_md:
522
+ col_type = ts.ColumnType.from_dict(col_md.col_type)
523
+ schema_col_md = self.schema_version_md.columns.get(col_md.id)
296
524
  media_val = (
297
525
  MediaValidation[schema_col_md.media_validation.upper()]
298
- if schema_col_md is not None and schema_col_md.media_validation is not None else None
526
+ if schema_col_md is not None and schema_col_md.media_validation is not None
527
+ else None
299
528
  )
300
- col = Column(
301
- col_id=col_md.id, name=col_name, col_type=ts.ColumnType.from_dict(col_md.col_type),
302
- is_pk=col_md.is_pk, stored=col_md.stored, media_validation=media_val,
303
- schema_version_add=col_md.schema_version_add, schema_version_drop=col_md.schema_version_drop,
304
- value_expr_dict=col_md.value_expr)
305
- col.tbl = self
306
- self.cols.append(col)
307
-
308
- # populate the lookup structures before Expr.from_dict()
309
- if col_md.schema_version_add > self.schema_version:
310
- # column was added after this version
311
- continue
312
- if col_md.schema_version_drop is not None and col_md.schema_version_drop <= self.schema_version:
313
- # column was dropped
314
- continue
315
- if col.name is not None:
316
- self.cols_by_name[col.name] = col
317
- self.cols_by_id[col.id] = col
318
529
 
319
- # make sure to traverse columns ordered by position = order in which cols were created;
320
- # this guarantees that references always point backwards
321
- if col_md.value_expr is not None:
322
- refd_cols = exprs.Expr.get_refd_columns(col_md.value_expr)
323
- self._record_refd_columns(col)
530
+ stores_cellmd: bool | None = None # None: determined by the column properties (in the Column c'tor)
531
+ sa_col_type: sql.types.TypeEngine | None = None
532
+ if col_md.id in val_col_idxs:
533
+ idx = val_col_idxs[col_md.id]
534
+ # for index value columns, the index gets to override the default
535
+ stores_cellmd = idx.records_value_errors()
536
+ sa_col_type = idx.get_index_sa_type(col_type)
537
+ elif col_md.id in undo_col_idxs:
538
+ idx = undo_col_idxs[col_md.id]
539
+ # for index undo columns, we never store cellmd
540
+ stores_cellmd = False
541
+ sa_col_type = idx.get_index_sa_type(col_type)
324
542
 
325
- def _init_idxs(self, tbl_md: schema.TableMd) -> None:
326
- self.idx_md = tbl_md.index_md
327
- self.idxs_by_name = {}
328
- import pixeltable.index as index_module
329
- for md in tbl_md.index_md.values():
330
- if md.schema_version_add > self.schema_version \
331
- or md.schema_version_drop is not None and md.schema_version_drop <= self.schema_version:
332
- # index not visible in this schema version
333
- continue
543
+ col = Column(
544
+ col_id=col_md.id,
545
+ name=schema_col_md.name if schema_col_md is not None else None,
546
+ col_type=col_type,
547
+ is_pk=col_md.is_pk,
548
+ is_iterator_col=self.is_component_view and col_md.id < self.num_iterator_cols + 1,
549
+ stored=col_md.stored,
550
+ media_validation=media_val,
551
+ sa_col_type=sa_col_type,
552
+ schema_version_add=col_md.schema_version_add,
553
+ schema_version_drop=col_md.schema_version_drop,
554
+ stores_cellmd=stores_cellmd,
555
+ value_expr_dict=col_md.value_expr,
556
+ tbl_handle=self.handle,
557
+ destination=col_md.destination,
558
+ )
334
559
 
335
- # instantiate index object
336
- cls_name = md.class_fqn.rsplit('.', 1)[-1]
337
- cls = getattr(index_module, cls_name)
338
- idx_col = self.cols_by_id[md.indexed_col_id]
339
- idx = cls.from_dict(idx_col, md.init_args)
340
-
341
- # fix up the sa column type of the index value and undo columns
342
- val_col = self.cols_by_id[md.index_val_col_id]
343
- val_col.sa_col_type = idx.index_sa_type()
344
- val_col._records_errors = False
345
- undo_col = self.cols_by_id[md.index_val_undo_col_id]
346
- undo_col.sa_col_type = idx.index_sa_type()
347
- undo_col._records_errors = False
348
- idx_info = self.IndexInfo(id=md.id, name=md.name, idx=idx, col=idx_col, val_col=val_col, undo_col=undo_col)
349
- self.idxs_by_name[md.name] = idx_info
350
-
351
- def _init_sa_schema(self) -> None:
352
- # create the sqlalchemy schema; do this after instantiating columns, in order to determine whether they
353
- # need to record errors
354
- from pixeltable.store import StoreComponentView, StoreTable, StoreView
355
- if self.is_component_view():
560
+ self.cols.append(col)
561
+ # populate lookup structures before Expr.from_dict()
562
+ if col_md.schema_version_add <= self.schema_version and (
563
+ col_md.schema_version_drop is None or col_md.schema_version_drop > self.schema_version
564
+ ):
565
+ if col.name is not None:
566
+ self.cols_by_name[col.name] = col
567
+ self.cols_by_id[col.id] = col
568
+
569
+ if self.supports_idxs:
570
+ # create IndexInfo for indices visible in current_version
571
+ visible_idxs = [
572
+ md
573
+ for md in self.tbl_md.index_md.values()
574
+ if md.schema_version_add <= self.schema_version
575
+ and (md.schema_version_drop is None or md.schema_version_drop > self.schema_version)
576
+ ]
577
+ for md in visible_idxs:
578
+ idx = idxs[md.id]
579
+ indexed_col_id = QColumnId(UUID(md.indexed_col_tbl_id), md.indexed_col_id)
580
+ idx_col = self._lookup_column(indexed_col_id)
581
+ info = self.IndexInfo(
582
+ id=md.id,
583
+ name=md.name,
584
+ idx=idx,
585
+ col=idx_col,
586
+ val_col=self.cols_by_id[md.index_val_col_id],
587
+ undo_col=self.cols_by_id[md.index_val_undo_col_id],
588
+ )
589
+ self.idxs[md.id] = info
590
+ self.idxs_by_name[md.name] = info
591
+ self.idxs_by_col.setdefault(indexed_col_id, []).append(info)
592
+
593
+ # create value exprs, now that we have all lookup structures in place
594
+ tvp: TableVersionPath | None = None
595
+ if self.effective_version is not None:
596
+ # for snapshot TableVersion instances, we need to retarget the column value_exprs to the snapshot;
597
+ # otherwise they'll incorrectly refer to the live table. So, construct a full TableVersionPath to
598
+ # use for retargeting.
599
+ tvp = Catalog.get().construct_tvp(
600
+ self.id, self.effective_version, self.tbl_md.ancestors, self.version_md.created_at
601
+ )
602
+ elif self.anchor_tbl_id is not None:
603
+ # for replica TableVersion instances, we also need to retarget the value_exprs, this time to the
604
+ # "anchored" TableVersionPath.
605
+ assert self.path is not None
606
+ tvp = self.path
607
+ for col in self.cols_by_id.values():
608
+ col.init_value_expr(tvp)
609
+
610
+ # create the sqlalchemy schema, after instantiating all Columns
611
+ if self.is_component_view:
356
612
  self.store_tbl = StoreComponentView(self)
357
- elif self.is_view():
613
+ elif self.is_view:
358
614
  self.store_tbl = StoreView(self)
359
615
  else:
360
616
  self.store_tbl = StoreTable(self)
361
617
 
362
- def _update_md(
363
- self, timestamp: float, conn: sql.engine.Connection, update_tbl_version: bool = True,
364
- preceding_schema_version: Optional[int] = None
365
- ) -> None:
366
- """Writes table metadata to the database.
367
-
368
- Args:
369
- timestamp: timestamp of the change
370
- conn: database connection to use
371
- update_tbl_version: if `True`, will also write `TableVersion` metadata
372
- preceding_schema_version: if specified, will also write `TableSchemaVersion` metadata, recording the
373
- specified preceding schema version
618
+ def _lookup_column(self, qid: QColumnId) -> Column | None:
374
619
  """
375
- assert update_tbl_version or preceding_schema_version is None
620
+ Look up the column with the given table id and column id, searching through the ancestors of this TableVersion
621
+ to find it. We avoid referencing TableVersionPath in order to work properly with snapshots as well.
376
622
 
377
- conn.execute(
378
- sql.update(schema.Table.__table__)
379
- .values({schema.Table.md: dataclasses.asdict(self._create_tbl_md())})
380
- .where(schema.Table.id == self.id))
623
+ This will search through *all* known columns, including columns that are not visible in this TableVersion.
624
+ """
625
+ if qid.tbl_id == self.id:
626
+ return next(col for col in self.cols if col.id == qid.col_id)
627
+ elif self.base is not None:
628
+ return self.base.get()._lookup_column(qid)
629
+ else:
630
+ return None
381
631
 
382
- if update_tbl_version:
383
- version_md = self._create_version_md(timestamp)
384
- conn.execute(
385
- sql.insert(schema.TableVersion.__table__)
386
- .values(tbl_id=self.id, version=self.version, md=dataclasses.asdict(version_md)))
632
+ def _write_md(self, new_version: bool, new_schema_version: bool) -> None:
633
+ from pixeltable.catalog import Catalog
387
634
 
388
- if preceding_schema_version is not None:
389
- schema_version_md = self._create_schema_version_md(preceding_schema_version)
390
- conn.execute(
391
- sql.insert(schema.TableSchemaVersion.__table__)
392
- .values(
393
- tbl_id=self.id, schema_version=self.schema_version,
394
- md=dataclasses.asdict(schema_version_md)))
635
+ Catalog.get().write_tbl_md(
636
+ self.id,
637
+ None,
638
+ self._tbl_md,
639
+ self._version_md if new_version else None,
640
+ self._schema_version_md if new_schema_version else None,
641
+ )
395
642
 
396
643
  def _store_idx_name(self, idx_id: int) -> str:
397
644
  """Return name of index in the store, which needs to be globally unique"""
398
645
  return f'idx_{self.id.hex}_{idx_id}'
399
646
 
400
- def add_index(self, col: Column, idx_name: Optional[str], idx: index.IndexBase) -> UpdateStatus:
647
+ def add_index(self, col: Column, idx_name: str | None, idx: index.IndexBase) -> UpdateStatus:
401
648
  # we're creating a new schema version
402
- self.version += 1
403
- preceding_schema_version = self.schema_version
404
- self.schema_version = self.version
405
- with Env.get().engine.begin() as conn:
406
- status = self._add_index(col, idx_name, idx, conn)
407
- self._update_md(time.time(), conn, preceding_schema_version=preceding_schema_version)
408
- _logger.info(f'Added index {idx_name} on column {col.name} to table {self.name}')
409
- return status
410
-
411
- def _add_default_index(self, col: Column, conn: sql.engine.Connection) -> Optional[UpdateStatus]:
412
- """Add a B-tree index on this column if it has a compatible type"""
649
+ self.bump_version(bump_schema_version=True)
650
+ status = self._add_index(col, idx_name, idx)
651
+ self._write_md(new_version=True, new_schema_version=True)
652
+ _logger.info(f'Added index {idx_name} on column {col.name} to table {self.name}')
653
+ return status
654
+
655
+ @classmethod
656
+ def _is_btree_indexable(cls, col: Column) -> bool:
413
657
  if not col.stored:
414
658
  # if the column is intentionally not stored, we want to avoid the overhead of an index
415
- return None
659
+ return False
660
+ # Skip index for stored media columns produced by an iterator
661
+ if col.col_type.is_media_type() and col.is_iterator_col:
662
+ return False
416
663
  if not col.col_type.is_scalar_type() and not (col.col_type.is_media_type() and not col.is_computed):
417
664
  # wrong type for a B-tree
418
- return None
419
- if col.col_type.is_bool_type():
665
+ return False
666
+ if col.col_type.is_bool_type(): # noqa : SIM103 Supress `Return the negated condition directly` check
420
667
  # B-trees on bools aren't useful
668
+ return False
669
+ return True
670
+
671
+ def _add_default_index(self, col: Column) -> UpdateStatus | None:
672
+ """Add a B-tree index on this column if it has a compatible type"""
673
+ if not self._is_btree_indexable(col):
421
674
  return None
422
- status = self._add_index(col, idx_name=None, idx=index.BtreeIndex(col), conn=conn)
675
+ status = self._add_index(col, idx_name=None, idx=index.BtreeIndex())
423
676
  return status
424
677
 
425
- def _add_index(
426
- self, col: Column, idx_name: Optional[str], idx: index.IndexBase, conn: sql.engine.Connection
427
- ) -> UpdateStatus:
428
- assert not self.is_snapshot
429
- idx_id = self.next_idx_id
430
- self.next_idx_id += 1
431
- if idx_name is None:
432
- idx_name = f'idx{idx_id}'
433
- else:
434
- assert is_valid_identifier(idx_name)
435
- assert idx_name not in [i.name for i in self.idx_md.values()]
436
-
437
- # add the index value and undo columns (which need to be nullable)
678
+ @classmethod
679
+ def _create_index_columns(
680
+ cls,
681
+ col: Column,
682
+ idx: index.IndexBase,
683
+ schema_version: int,
684
+ tbl_handle: TableVersionHandle,
685
+ id_cb: Callable[[], int],
686
+ ) -> tuple[Column, Column]:
687
+ """Create value and undo columns for the given index.
688
+ Args:
689
+ idx: index for which columns will be created.
690
+ Returns:
691
+ A tuple containing the value column and the undo column, both of which are nullable.
692
+ """
693
+ value_expr = idx.create_value_expr(col)
438
694
  val_col = Column(
439
- col_id=self.next_col_id, name=None, computed_with=idx.index_value_expr(),
440
- sa_col_type=idx.index_sa_type(), stored=True,
441
- schema_version_add=self.schema_version, schema_version_drop=None,
442
- records_errors=idx.records_value_errors())
443
- val_col.tbl = self
695
+ col_id=id_cb(),
696
+ name=None,
697
+ computed_with=value_expr,
698
+ sa_col_type=idx.get_index_sa_type(value_expr.col_type),
699
+ stored=True,
700
+ stores_cellmd=idx.records_value_errors(),
701
+ schema_version_add=schema_version,
702
+ schema_version_drop=None,
703
+ )
444
704
  val_col.col_type = val_col.col_type.copy(nullable=True)
445
- self.next_col_id += 1
705
+ val_col.tbl_handle = tbl_handle
446
706
 
447
707
  undo_col = Column(
448
- col_id=self.next_col_id, name=None, col_type=val_col.col_type,
449
- sa_col_type=val_col.sa_col_type, stored=True,
450
- schema_version_add=self.schema_version, schema_version_drop=None,
451
- records_errors=False)
452
- undo_col.tbl = self
708
+ col_id=id_cb(),
709
+ name=None,
710
+ col_type=val_col.col_type,
711
+ sa_col_type=val_col.sa_col_type,
712
+ stored=True,
713
+ stores_cellmd=False,
714
+ schema_version_add=schema_version,
715
+ schema_version_drop=None,
716
+ )
453
717
  undo_col.col_type = undo_col.col_type.copy(nullable=True)
454
- self.next_col_id += 1
718
+ undo_col.tbl_handle = tbl_handle
719
+ return val_col, undo_col
455
720
 
721
+ def _create_index(
722
+ self, col: Column, val_col: Column, undo_col: Column, idx_name: str | None, idx: index.IndexBase
723
+ ) -> None:
724
+ """Create the given index along with index md"""
725
+ idx_id = self.next_idx_id
726
+ self.next_idx_id += 1
727
+ if idx_name is None:
728
+ idx_name = f'idx{idx_id}'
729
+ else:
730
+ assert is_valid_identifier(idx_name)
731
+ assert idx_name not in [i.name for i in self._tbl_md.index_md.values()]
456
732
  # create and register the index metadata
457
733
  idx_cls = type(idx)
458
734
  idx_md = schema.IndexMd(
459
- id=idx_id, name=idx_name,
460
- indexed_col_id=col.id, index_val_col_id=val_col.id, index_val_undo_col_id=undo_col.id,
461
- schema_version_add=self.schema_version, schema_version_drop=None,
462
- class_fqn=idx_cls.__module__ + '.' + idx_cls.__name__, init_args=idx.as_dict())
735
+ id=idx_id,
736
+ name=idx_name,
737
+ indexed_col_id=col.id,
738
+ indexed_col_tbl_id=str(col.get_tbl().id),
739
+ index_val_col_id=val_col.id,
740
+ index_val_undo_col_id=undo_col.id,
741
+ schema_version_add=self.schema_version,
742
+ schema_version_drop=None,
743
+ class_fqn=idx_cls.__module__ + '.' + idx_cls.__name__,
744
+ init_args=idx.as_dict(),
745
+ )
463
746
  idx_info = self.IndexInfo(id=idx_id, name=idx_name, idx=idx, col=col, val_col=val_col, undo_col=undo_col)
464
- self.idx_md[idx_id] = idx_md
747
+ self._tbl_md.index_md[idx_id] = idx_md
748
+ self.idxs[idx_id] = idx_info
465
749
  self.idxs_by_name[idx_name] = idx_info
750
+ self.idxs_by_col.setdefault(col.qid, []).append(idx_info)
751
+ self.store_tbl.create_index(idx_id)
466
752
 
753
+ def _add_index(self, col: Column, idx_name: str | None, idx: index.IndexBase) -> UpdateStatus:
754
+ val_col, undo_col = self._create_index_columns(
755
+ col, idx, self.schema_version, self.handle, id_cb=self.next_col_id
756
+ )
467
757
  # add the columns and update the metadata
468
758
  # TODO support on_error='abort' for indices; it's tricky because of the way metadata changes are entangled
469
759
  # with the database operations
470
- status = self._add_columns([val_col, undo_col], conn, print_stats=False, on_error='ignore')
760
+ status = self._add_columns([val_col, undo_col], print_stats=False, on_error='ignore')
471
761
  # now create the index structure
472
- idx.create_index(self._store_idx_name(idx_id), val_col, conn)
473
-
762
+ self._create_index(col, val_col, undo_col, idx_name, idx)
474
763
  return status
475
764
 
476
765
  def drop_index(self, idx_id: int) -> None:
477
- assert not self.is_snapshot
478
- assert idx_id in self.idx_md
766
+ assert self.is_mutable
767
+ assert idx_id in self._tbl_md.index_md
479
768
 
480
769
  # we're creating a new schema version
481
- self.version += 1
482
- preceding_schema_version = self.schema_version
483
- self.schema_version = self.version
484
- idx_md = self.idx_md[idx_id]
770
+ self.bump_version(bump_schema_version=True)
771
+ idx_md = self._tbl_md.index_md[idx_id]
485
772
  idx_md.schema_version_drop = self.schema_version
486
773
  assert idx_md.name in self.idxs_by_name
487
774
  idx_info = self.idxs_by_name[idx_md.name]
775
+ # remove this index entry from the active indexes (in memory)
776
+ # and the index metadata (in persistent table metadata)
777
+ # TODO: this is wrong, it breaks revert()
778
+ del self.idxs[idx_id]
488
779
  del self.idxs_by_name[idx_md.name]
780
+ if idx_info.col.qid in self.idxs_by_col:
781
+ self.idxs_by_col[idx_info.col.qid].remove(idx_info)
782
+ del self._tbl_md.index_md[idx_id]
489
783
 
490
- with Env.get().engine.begin() as conn:
491
- self._drop_columns([idx_info.val_col, idx_info.undo_col])
492
- self._update_md(time.time(), conn, preceding_schema_version=preceding_schema_version)
493
- _logger.info(f'Dropped index {idx_md.name} on table {self.name}')
784
+ self._drop_columns([idx_info.val_col, idx_info.undo_col])
785
+ self._write_md(new_version=True, new_schema_version=True)
786
+ _logger.info(f'Dropped index {idx_md.name} on table {self.name}')
494
787
 
495
- def add_columns(self, cols: Iterable[Column], print_stats: bool, on_error: Literal['abort', 'ignore']) -> UpdateStatus:
496
- """Adds a column to the table.
497
- """
498
- assert not self.is_snapshot
499
- assert all(is_valid_identifier(col.name) for col in cols)
788
+ def add_columns(
789
+ self, cols: Iterable[Column], print_stats: bool, on_error: Literal['abort', 'ignore']
790
+ ) -> UpdateStatus:
791
+ """Adds columns to the table."""
792
+ assert self.is_mutable
793
+ assert all(is_valid_identifier(col.name) for col in cols if col.name is not None)
500
794
  assert all(col.stored is not None for col in cols)
501
- assert all(col.name not in self.cols_by_name for col in cols)
795
+ assert all(col.name not in self.cols_by_name for col in cols if col.name is not None)
502
796
  for col in cols:
503
- col.tbl = self
504
- col.id = self.next_col_id
505
- self.next_col_id += 1
797
+ col.tbl_handle = self.handle
798
+ col.id = self.next_col_id()
506
799
 
507
800
  # we're creating a new schema version
508
- self.version += 1
509
- preceding_schema_version = self.schema_version
510
- self.schema_version = self.version
511
- with Env.get().engine.begin() as conn:
512
- status = self._add_columns(cols, conn, print_stats=print_stats, on_error=on_error)
513
- for col in cols:
514
- _ = self._add_default_index(col, conn)
515
- self._update_md(time.time(), conn, preceding_schema_version=preceding_schema_version)
801
+ self.bump_version(bump_schema_version=True)
802
+ index_cols: dict[Column, tuple[index.BtreeIndex, Column, Column]] = {}
803
+ all_cols: list[Column] = []
804
+ for col in cols:
805
+ all_cols.append(col)
806
+ if col.name is not None and self._is_btree_indexable(col):
807
+ idx = index.BtreeIndex()
808
+ val_col, undo_col = self._create_index_columns(
809
+ col, idx, self.schema_version, self.handle, id_cb=self.next_col_id
810
+ )
811
+ index_cols[col] = (idx, val_col, undo_col)
812
+ all_cols.append(val_col)
813
+ all_cols.append(undo_col)
814
+ # Add all columns
815
+ status = self._add_columns(all_cols, print_stats=print_stats, on_error=on_error)
816
+ # Create indices and their md records
817
+ for col, (idx, val_col, undo_col) in index_cols.items():
818
+ self._create_index(col, val_col, undo_col, idx_name=None, idx=idx)
819
+ self.update_status = status
820
+ self._write_md(new_version=True, new_schema_version=True)
516
821
  _logger.info(f'Added columns {[col.name for col in cols]} to table {self.name}, new version: {self.version}')
517
822
 
518
823
  msg = (
519
824
  f'Added {status.num_rows} column value{"" if status.num_rows == 1 else "s"} '
520
825
  f'with {status.num_excs} error{"" if status.num_excs == 1 else "s"}.'
521
826
  )
522
- print(msg)
827
+ Env.get().console_logger.info(msg)
523
828
  _logger.info(f'Columns {[col.name for col in cols]}: {msg}')
524
829
  return status
525
830
 
526
831
  def _add_columns(
527
- self,
528
- cols: Iterable[Column],
529
- conn: sql.engine.Connection,
530
- print_stats: bool,
531
- on_error: Literal['abort', 'ignore']
832
+ self, cols: Iterable[Column], print_stats: bool, on_error: Literal['abort', 'ignore']
532
833
  ) -> UpdateStatus:
533
834
  """Add and populate columns within the current transaction"""
534
- cols = list(cols)
535
- row_count = self.store_tbl.count(conn=conn)
536
- for col in cols:
537
- if not col.col_type.nullable and not col.is_computed:
538
- if row_count > 0:
539
- raise excs.Error(
540
- f'Cannot add non-nullable column "{col.name}" to table {self.name} with existing rows')
835
+ from pixeltable.catalog import Catalog
836
+ from pixeltable.plan import Planner
837
+
838
+ cols_to_add = list(cols)
839
+
840
+ row_count = self.store_tbl.count()
841
+ for col in cols_to_add:
842
+ assert col.tbl_handle.id == self.id
843
+ if not col.col_type.nullable and not col.is_computed and row_count > 0:
844
+ raise excs.Error(
845
+ f'Cannot add non-nullable column {col.name!r} to table {self.name!r} with existing rows'
846
+ )
541
847
 
848
+ computed_values = 0
542
849
  num_excs = 0
543
850
  cols_with_excs: list[Column] = []
544
- for col in cols:
851
+ for col in cols_to_add:
852
+ assert col.id is not None
853
+ excs_per_col = 0
545
854
  col.schema_version_add = self.schema_version
546
855
  # add the column to the lookup structures now, rather than after the store changes executed successfully,
547
856
  # because it might be referenced by the next column's value_expr
548
857
  self.cols.append(col)
858
+ self.cols_by_id[col.id] = col
549
859
  if col.name is not None:
550
860
  self.cols_by_name[col.name] = col
551
- self.cols_by_id[col.id] = col
552
- if col.value_expr is not None:
553
- col.check_value_expr()
554
- self._record_refd_columns(col)
861
+ col_md, sch_md = col.to_md(len(self.cols_by_name))
862
+ assert sch_md is not None, 'Schema column metadata must be created for user-facing columns'
863
+ self._tbl_md.column_md[col.id] = col_md
864
+ self._schema_version_md.columns[col.id] = sch_md
865
+ else:
866
+ col_md, _ = col.to_md()
867
+ self._tbl_md.column_md[col.id] = col_md
555
868
 
556
869
  if col.is_stored:
557
- self.store_tbl.add_column(col, conn)
870
+ self.store_tbl.add_column(col)
558
871
 
559
872
  if not col.is_computed or not col.is_stored or row_count == 0:
560
873
  continue
561
874
 
562
875
  # populate the column
563
- from pixeltable.plan import Planner
564
- plan, value_expr_slot_idx = Planner.create_add_column_plan(self.path, col)
876
+ plan = Planner.create_add_column_plan(self.path, col)
565
877
  plan.ctx.num_rows = row_count
566
-
567
878
  try:
568
- plan.ctx.set_conn(conn)
569
879
  plan.open()
570
880
  try:
571
- num_excs = self.store_tbl.load_column(col, plan, value_expr_slot_idx, conn, on_error)
572
- except sql.exc.DBAPIError as exc:
573
- # Wrap the DBAPIError in an excs.Error to unify processing in the subsequent except block
574
- raise excs.Error(f'SQL error during execution of computed column `{col.name}`:\n{exc}') from exc
575
- if num_excs > 0:
881
+ excs_per_col = self.store_tbl.load_column(col, plan, on_error == 'abort')
882
+ except sql_exc.DBAPIError as exc:
883
+ Catalog.get().convert_sql_exc(exc, self.id, self.handle, convert_db_excs=True)
884
+ # If it wasn't converted, re-raise as a generic Pixeltable error
885
+ # (this means it's not a known concurrency error; it's something else)
886
+ raise excs.Error(
887
+ f'Unexpected SQL error during execution of computed column {col.name!r}:\n{exc}'
888
+ ) from exc
889
+ if excs_per_col > 0:
576
890
  cols_with_excs.append(col)
577
- except excs.Error as exc:
578
- self.cols.pop()
579
- for col in cols:
580
- # remove columns that we already added
581
- if col.id not in self.cols_by_id:
582
- continue
583
- if col.name is not None:
584
- del self.cols_by_name[col.name]
585
- del self.cols_by_id[col.id]
586
- # we need to re-initialize the sqlalchemy schema
587
- self.store_tbl.create_sa_tbl()
588
- raise exc
891
+ num_excs += excs_per_col
892
+ computed_values += plan.ctx.num_computed_exprs * row_count
589
893
  finally:
590
894
  plan.close()
591
895
 
896
+ Catalog.get().record_column_dependencies(self)
897
+
592
898
  if print_stats:
593
899
  plan.ctx.profile.print(num_rows=row_count)
594
- # TODO(mkornacker): what to do about system columns with exceptions?
900
+
901
+ # TODO: what to do about system columns with exceptions?
902
+ row_counts = RowCountStats(
903
+ upd_rows=row_count, num_excs=num_excs, computed_values=computed_values
904
+ ) # add_columns
595
905
  return UpdateStatus(
596
- num_rows=row_count, num_computed_values=row_count, num_excs=num_excs,
597
- cols_with_excs=[f'{col.tbl.name}.{col.name}'for col in cols_with_excs if col.name is not None])
906
+ cols_with_excs=[f'{col.get_tbl().name}.{col.name}' for col in cols_with_excs if col.name is not None],
907
+ row_count_stats=row_counts,
908
+ )
598
909
 
599
910
  def drop_column(self, col: Column) -> None:
600
- """Drop a column from the table.
601
- """
602
- from pixeltable.catalog import Catalog
911
+ """Drop a column from the table."""
603
912
 
604
- assert not self.is_snapshot
913
+ assert self.is_mutable
605
914
 
606
915
  # we're creating a new schema version
607
- self.version += 1
608
- preceding_schema_version = self.schema_version
609
- self.schema_version = self.version
610
-
611
- with Env.get().engine.begin() as conn:
612
- # drop this column and all dependent index columns and indices
613
- dropped_cols = [col]
614
- dropped_idx_names: list[str] = []
615
- for idx_info in self.idxs_by_name.values():
616
- if idx_info.col != col:
617
- continue
618
- dropped_cols.extend([idx_info.val_col, idx_info.undo_col])
619
- idx_md = self.idx_md[idx_info.id]
620
- idx_md.schema_version_drop = self.schema_version
621
- assert idx_md.name in self.idxs_by_name
622
- dropped_idx_names.append(idx_md.name)
623
- # update idxs_by_name
624
- for idx_name in dropped_idx_names:
625
- del self.idxs_by_name[idx_name]
626
- self._drop_columns(dropped_cols)
627
- self._update_md(time.time(), conn, preceding_schema_version=preceding_schema_version)
916
+ self.bump_version(bump_schema_version=True)
917
+
918
+ # drop this column and all dependent index columns and indices
919
+ dropped_cols = [col]
920
+ dropped_idx_info: list[TableVersion.IndexInfo] = []
921
+ for idx_info in self.idxs_by_name.values():
922
+ if idx_info.col != col:
923
+ continue
924
+ dropped_cols.extend([idx_info.val_col, idx_info.undo_col])
925
+ idx_md = self._tbl_md.index_md[idx_info.id]
926
+ idx_md.schema_version_drop = self.schema_version
927
+ assert idx_md.name in self.idxs_by_name
928
+ dropped_idx_info.append(idx_info)
929
+
930
+ # update index lookup structures
931
+ for info in dropped_idx_info:
932
+ del self.idxs[info.id]
933
+ del self.idxs_by_name[info.name]
934
+ if col.qid in self.idxs_by_col:
935
+ del self.idxs_by_col[col.qid]
936
+
937
+ self._drop_columns(dropped_cols)
938
+ self._write_md(new_version=True, new_schema_version=True)
628
939
  _logger.info(f'Dropped column {col.name} from table {self.name}, new version: {self.version}')
629
940
 
630
941
  def _drop_columns(self, cols: Iterable[Column]) -> None:
631
942
  """Mark columns as dropped"""
632
- assert not self.is_snapshot
943
+ from pixeltable.catalog import Catalog
633
944
 
634
- for col in cols:
635
- if col.value_expr is not None:
636
- # update Column.dependent_cols
637
- for c in self.cols:
638
- if c == col:
639
- break
640
- c.dependent_cols.discard(col)
945
+ assert self.is_mutable
641
946
 
947
+ for col in cols:
642
948
  col.schema_version_drop = self.schema_version
643
949
  if col.name is not None:
644
950
  assert col.name in self.cols_by_name
645
951
  del self.cols_by_name[col.name]
646
952
  assert col.id in self.cols_by_id
647
953
  del self.cols_by_id[col.id]
954
+ # update stored md
955
+ self._tbl_md.column_md[col.id].schema_version_drop = col.schema_version_drop
956
+ if col.name is not None:
957
+ del self._schema_version_md.columns[col.id]
958
+
959
+ # update positions
960
+ for pos, schema_col in enumerate(self._schema_version_md.columns.values()):
961
+ schema_col.pos = pos
648
962
 
649
963
  self.store_tbl.create_sa_tbl()
964
+ Catalog.get().record_column_dependencies(self)
650
965
 
651
966
  def rename_column(self, old_name: str, new_name: str) -> None:
652
- """Rename a column.
653
- """
654
- assert not self.is_snapshot
655
- if old_name not in self.cols_by_name:
967
+ """Rename a column."""
968
+ if not self.is_mutable:
969
+ raise excs.Error(f'Cannot rename column for immutable table {self.name!r}')
970
+ col = self.path.get_column(old_name)
971
+ if col is None:
656
972
  raise excs.Error(f'Unknown column: {old_name}')
973
+ if col.get_tbl().id != self.id:
974
+ raise excs.Error(f'Cannot rename base table column {col.name!r}')
657
975
  if not is_valid_identifier(new_name):
658
- raise excs.Error(f"Invalid column name: '{new_name}'")
976
+ raise excs.Error(f'Invalid column name: {new_name}')
659
977
  if new_name in self.cols_by_name:
660
- raise excs.Error(f'Column {new_name} already exists')
661
- col = self.cols_by_name[old_name]
978
+ raise excs.Error(f'Column {new_name!r} already exists')
662
979
  del self.cols_by_name[old_name]
663
980
  col.name = new_name
664
981
  self.cols_by_name[new_name] = col
982
+ self._schema_version_md.columns[col.id].name = new_name
665
983
 
666
984
  # we're creating a new schema version
667
- self.version += 1
668
- preceding_schema_version = self.schema_version
669
- self.schema_version = self.version
985
+ self.bump_version(bump_schema_version=True)
670
986
 
671
- with Env.get().engine.begin() as conn:
672
- self._update_md(time.time(), conn, preceding_schema_version=preceding_schema_version)
987
+ self._write_md(new_version=True, new_schema_version=True)
673
988
  _logger.info(f'Renamed column {old_name} to {new_name} in table {self.name}, new version: {self.version}')
674
989
 
675
- def set_comment(self, new_comment: Optional[str]):
990
+ def set_comment(self, new_comment: str | None) -> None:
676
991
  _logger.info(f'[{self.name}] Updating comment: {new_comment}')
677
992
  self.comment = new_comment
678
993
  self._create_schema_version()
679
994
 
680
- def set_num_retained_versions(self, new_num_retained_versions: int):
681
- _logger.info(f'[{self.name}] Updating num_retained_versions: {new_num_retained_versions} (was {self.num_retained_versions})')
995
+ def set_num_retained_versions(self, new_num_retained_versions: int) -> None:
996
+ _logger.info(
997
+ f'[{self.name}] Updating num_retained_versions: {new_num_retained_versions} '
998
+ f'(was {self.num_retained_versions})'
999
+ )
682
1000
  self.num_retained_versions = new_num_retained_versions
683
1001
  self._create_schema_version()
684
1002
 
685
- def _create_schema_version(self):
1003
+ def _create_schema_version(self) -> None:
686
1004
  # we're creating a new schema version
687
- self.version += 1
688
- preceding_schema_version = self.schema_version
689
- self.schema_version = self.version
690
- with Env.get().engine.begin() as conn:
691
- self._update_md(time.time(), conn, preceding_schema_version=preceding_schema_version)
1005
+ self.bump_version(bump_schema_version=True)
1006
+ self._write_md(new_version=True, new_schema_version=True)
692
1007
  _logger.info(f'[{self.name}] Updating table schema to version: {self.version}')
693
1008
 
694
1009
  def insert(
695
- self,
696
- rows: Optional[list[dict[str, Any]]],
697
- df: Optional[pxt.DataFrame],
698
- conn: Optional[sql.engine.Connection] = None,
699
- print_stats: bool = False,
700
- fail_on_exception: bool = True
1010
+ self,
1011
+ rows: list[dict[str, Any]] | None,
1012
+ query: Query | None,
1013
+ print_stats: bool = False,
1014
+ fail_on_exception: bool = True,
701
1015
  ) -> UpdateStatus:
702
1016
  """
703
- Insert rows into this table, either from an explicit list of dicts or from a `DataFrame`.
1017
+ Insert rows into this table, either from an explicit list of dicts or from a `Query`.
704
1018
  """
705
1019
  from pixeltable.plan import Planner
706
1020
 
707
- assert self.is_insertable()
708
- assert (rows is None) != (df is None) # Exactly one must be specified
1021
+ assert self.is_insertable
1022
+ assert (rows is None) != (query is None) # Exactly one must be specified
709
1023
  if rows is not None:
710
1024
  plan = Planner.create_insert_plan(self, rows, ignore_errors=not fail_on_exception)
1025
+
711
1026
  else:
712
- plan = Planner.create_df_insert_plan(self, df, ignore_errors=not fail_on_exception)
1027
+ plan = Planner.create_query_insert_plan(self, query, ignore_errors=not fail_on_exception)
713
1028
 
714
1029
  # this is a base table; we generate rowids during the insert
715
1030
  def rowids() -> Iterator[int]:
716
1031
  while True:
717
- rowid = self.next_rowid
718
- self.next_rowid += 1
1032
+ rowid = self.next_row_id
1033
+ self.next_row_id += 1
719
1034
  yield rowid
720
1035
 
721
- if conn is None:
722
- with Env.get().engine.begin() as conn:
723
- return self._insert(
724
- plan, conn, time.time(), print_stats=print_stats, rowids=rowids(), abort_on_exc=fail_on_exception)
725
- else:
726
- return self._insert(
727
- plan, conn, time.time(), print_stats=print_stats, rowids=rowids(), abort_on_exc=fail_on_exception)
1036
+ result = self._insert(
1037
+ plan, time.time(), print_stats=print_stats, rowids=rowids(), abort_on_exc=fail_on_exception
1038
+ )
1039
+ return result
728
1040
 
729
1041
  def _insert(
730
- self, exec_plan: 'exec.ExecNode', conn: sql.engine.Connection, timestamp: float, *,
731
- rowids: Optional[Iterator[int]] = None, print_stats: bool = False, abort_on_exc: bool = False
1042
+ self,
1043
+ exec_plan: 'exec.ExecNode',
1044
+ timestamp: float,
1045
+ *,
1046
+ rowids: Iterator[int] | None = None,
1047
+ print_stats: bool = False,
1048
+ abort_on_exc: bool = False,
732
1049
  ) -> UpdateStatus:
733
1050
  """Insert rows produced by exec_plan and propagate to views"""
734
1051
  # we're creating a new version
735
- self.version += 1
736
- result = UpdateStatus()
737
- num_rows, num_excs, cols_with_excs = self.store_tbl.insert_rows(
738
- exec_plan, conn, v_min=self.version, rowids=rowids, abort_on_exc=abort_on_exc)
739
- result.num_rows = num_rows
740
- result.num_excs = num_excs
741
- result.num_computed_values += exec_plan.ctx.num_computed_exprs * num_rows
742
- result.cols_with_excs = [f'{self.name}.{self.cols_by_id[cid].name}' for cid in cols_with_excs]
743
- self._update_md(timestamp, conn)
1052
+ self.bump_version(timestamp, bump_schema_version=False)
1053
+ cols_with_excs, row_counts = self.store_tbl.insert_rows(
1054
+ exec_plan, v_min=self.version, rowids=rowids, abort_on_exc=abort_on_exc
1055
+ )
1056
+ result = UpdateStatus(
1057
+ cols_with_excs=[f'{self.name}.{self.cols_by_id[cid].name}' for cid in cols_with_excs],
1058
+ row_count_stats=row_counts,
1059
+ )
744
1060
 
745
1061
  # update views
746
1062
  for view in self.mutable_views:
747
1063
  from pixeltable.plan import Planner
748
- plan, _ = Planner.create_view_load_plan(view.path, propagates_insert=True)
749
- status = view._insert(plan, conn, timestamp, print_stats=print_stats)
750
- result.num_rows += status.num_rows
751
- result.num_excs += status.num_excs
752
- result.num_computed_values += status.num_computed_values
753
- result.cols_with_excs += status.cols_with_excs
754
-
755
- result.cols_with_excs = list(dict.fromkeys(result.cols_with_excs).keys()) # remove duplicates
1064
+
1065
+ plan2, _ = Planner.create_view_load_plan(view.get().path, propagates_insert=True)
1066
+ status = view.get()._insert(plan2, timestamp, print_stats=print_stats)
1067
+ result += status.to_cascade()
1068
+
1069
+ # Use the net status after all propagations
1070
+ self.update_status = result
1071
+ self._write_md(new_version=True, new_schema_version=False)
756
1072
  if print_stats:
757
- plan.ctx.profile.print(num_rows=num_rows)
1073
+ exec_plan.ctx.profile.print(num_rows=result.num_rows)
758
1074
  _logger.info(f'TableVersion {self.name}: new version {self.version}')
759
1075
  return result
760
1076
 
761
- def update(
762
- self, value_spec: dict[str, Any], where: Optional[exprs.Expr] = None, cascade: bool = True
763
- ) -> UpdateStatus:
1077
+ def update(self, value_spec: dict[str, Any], where: exprs.Expr | None = None, cascade: bool = True) -> UpdateStatus:
764
1078
  """Update rows in this TableVersionPath.
765
1079
  Args:
766
1080
  value_spec: a list of (column, value) pairs specifying the columns to update and their new values.
@@ -768,354 +1082,583 @@ class TableVersion:
768
1082
  cascade: if True, also update all computed columns that transitively depend on the updated columns,
769
1083
  including within views.
770
1084
  """
771
- if self.is_snapshot:
772
- raise excs.Error('Cannot update a snapshot')
773
-
1085
+ from pixeltable.exprs import SqlElementCache
774
1086
  from pixeltable.plan import Planner
775
1087
 
776
- update_spec = self._validate_update_spec(value_spec, allow_pk=False, allow_exprs=True)
1088
+ assert self.is_mutable
1089
+
1090
+ update_spec = self._validate_update_spec(value_spec, allow_pk=False, allow_exprs=True, allow_media=True)
777
1091
  if where is not None:
778
1092
  if not isinstance(where, exprs.Expr):
779
- raise excs.Error(f"'where' argument must be a predicate, got {type(where)}")
1093
+ raise excs.Error(f'`where` argument must be a valid Pixeltable expression; got `{type(where)}`')
780
1094
  analysis_info = Planner.analyze(self.path, where)
781
1095
  # for now we require that the updated rows can be identified via SQL, rather than via a Python filter
782
1096
  if analysis_info.filter is not None:
783
- raise excs.Error(f'Filter {analysis_info.filter} not expressible in SQL')
784
-
785
- with Env.get().engine.begin() as conn:
786
- plan, updated_cols, recomputed_cols = Planner.create_update_plan(self.path, update_spec, [], where, cascade)
787
- from pixeltable.exprs import SqlElementCache
788
- result = self.propagate_update(
789
- plan, where.sql_expr(SqlElementCache()) if where is not None else None, recomputed_cols,
790
- base_versions=[], conn=conn, timestamp=time.time(), cascade=cascade, show_progress=True)
791
- result.updated_cols = updated_cols
792
- return result
1097
+ raise excs.Error(f'Filter not expressible in SQL: {analysis_info.filter}')
1098
+
1099
+ plan, updated_cols, recomputed_cols = Planner.create_update_plan(self.path, update_spec, [], where, cascade)
1100
+
1101
+ result = self.propagate_update(
1102
+ plan,
1103
+ where.sql_expr(SqlElementCache()) if where is not None else None,
1104
+ recomputed_cols,
1105
+ base_versions=[],
1106
+ timestamp=time.time(),
1107
+ cascade=cascade,
1108
+ show_progress=True,
1109
+ )
1110
+ result += UpdateStatus(updated_cols=updated_cols)
1111
+ return result
793
1112
 
794
1113
  def batch_update(
795
- self, batch: list[dict[Column, exprs.Expr]], rowids: list[tuple[int, ...]], insert_if_not_exists: bool,
796
- error_if_not_exists: bool, cascade: bool = True,
1114
+ self,
1115
+ batch: list[dict[Column, exprs.Expr]],
1116
+ rowids: list[tuple[int, ...]],
1117
+ insert_if_not_exists: bool,
1118
+ error_if_not_exists: bool,
1119
+ cascade: bool = True,
797
1120
  ) -> UpdateStatus:
798
1121
  """Update rows in batch.
799
1122
  Args:
800
1123
  batch: one dict per row, each mapping Columns to LiteralExprs representing the new values
801
1124
  rowids: if not empty, one tuple per row, each containing the rowid values for the corresponding row in batch
802
1125
  """
1126
+ from pixeltable.plan import Planner
1127
+
803
1128
  # if we do lookups of rowids, we must have one for each row in the batch
804
1129
  assert len(rowids) == 0 or len(rowids) == len(batch)
805
- cols_with_excs: set[str] = set()
806
-
807
- with Env.get().engine.begin() as conn:
808
- from pixeltable.plan import Planner
809
1130
 
810
- plan, row_update_node, delete_where_clause, updated_cols, recomputed_cols = \
811
- Planner.create_batch_update_plan(self.path, batch, rowids, cascade=cascade)
812
- result = self.propagate_update(
813
- plan, delete_where_clause, recomputed_cols, base_versions=[], conn=conn, timestamp=time.time(),
814
- cascade=cascade)
815
- result.updated_cols = [c.qualified_name for c in updated_cols]
816
-
817
- unmatched_rows = row_update_node.unmatched_rows()
818
- if len(unmatched_rows) > 0:
819
- if error_if_not_exists:
820
- raise excs.Error(f'batch_update(): {len(unmatched_rows)} row(s) not found')
821
- if insert_if_not_exists:
822
- insert_status = self.insert(unmatched_rows, None, print_stats=False, fail_on_exception=False)
823
- result += insert_status
824
- return result
1131
+ plan, row_update_node, delete_where_clause, updated_cols, recomputed_cols = Planner.create_batch_update_plan(
1132
+ self.path, batch, rowids, cascade=cascade
1133
+ )
1134
+ result = self.propagate_update(
1135
+ plan, delete_where_clause, recomputed_cols, base_versions=[], timestamp=time.time(), cascade=cascade
1136
+ )
1137
+ result += UpdateStatus(updated_cols=[c.qualified_name for c in updated_cols])
1138
+
1139
+ unmatched_rows = row_update_node.unmatched_rows()
1140
+ if len(unmatched_rows) > 0:
1141
+ if error_if_not_exists:
1142
+ raise excs.Error(f'batch_update(): {len(unmatched_rows)} row(s) not found')
1143
+ if insert_if_not_exists:
1144
+ insert_status = self.insert(unmatched_rows, None, print_stats=False, fail_on_exception=False)
1145
+ result += insert_status.to_cascade()
1146
+ return result
825
1147
 
826
1148
  def _validate_update_spec(
827
- self, value_spec: dict[str, Any], allow_pk: bool, allow_exprs: bool
1149
+ self, value_spec: dict[str, Any], allow_pk: bool, allow_exprs: bool, allow_media: bool
828
1150
  ) -> dict[Column, exprs.Expr]:
829
1151
  update_targets: dict[Column, exprs.Expr] = {}
830
1152
  for col_name, val in value_spec.items():
831
1153
  if not isinstance(col_name, str):
832
- raise excs.Error(f'Update specification: dict key must be column name, got {col_name!r}')
1154
+ raise excs.Error(f'Update specification: dict key must be column name; got {col_name!r}')
833
1155
  if col_name == _ROWID_COLUMN_NAME:
834
1156
  # a valid rowid is a list of ints, one per rowid column
835
1157
  assert len(val) == len(self.store_tbl.rowid_columns())
836
1158
  for el in val:
837
1159
  assert isinstance(el, int)
838
1160
  continue
839
- col = self.path.get_column(col_name, include_bases=False)
1161
+ col = self.path.get_column(col_name)
840
1162
  if col is None:
841
- # TODO: return more informative error if this is trying to update a base column
842
- raise excs.Error(f'Column {col_name} unknown')
1163
+ raise excs.Error(f'Unknown column: {col_name}')
1164
+ if col.get_tbl().id != self.id:
1165
+ raise excs.Error(f'Column {col.name!r} is a base table column and cannot be updated')
843
1166
  if col.is_computed:
844
- raise excs.Error(f'Column {col_name} is computed and cannot be updated')
1167
+ raise excs.Error(f'Column {col_name!r} is computed and cannot be updated')
845
1168
  if col.is_pk and not allow_pk:
846
- raise excs.Error(f'Column {col_name} is a primary key column and cannot be updated')
1169
+ raise excs.Error(f'Column {col_name!r} is a primary key column and cannot be updated')
1170
+ if col.col_type.is_media_type() and not allow_media:
1171
+ raise excs.Error(f'Column {col_name!r} is a media column and cannot be updated')
847
1172
 
848
1173
  # make sure that the value is compatible with the column type
1174
+ value_expr: exprs.Expr
849
1175
  try:
850
1176
  # check if this is a literal
851
- value_expr: exprs.Expr = exprs.Literal(val, col_type=col.col_type)
852
- except TypeError:
1177
+ value_expr = exprs.Literal(val, col_type=col.col_type)
1178
+ except (TypeError, jsonschema.exceptions.ValidationError) as exc:
853
1179
  if not allow_exprs:
854
1180
  raise excs.Error(
855
- f'Column {col_name}: value {val!r} is not a valid literal for this column '
856
- f'(expected {col.col_type})')
1181
+ f'Column {col_name!r}: value is not a valid literal for this column '
1182
+ f'(expected `{col.col_type}`): {val!r}'
1183
+ ) from exc
857
1184
  # it's not a literal, let's try to create an expr from it
858
1185
  value_expr = exprs.Expr.from_object(val)
859
1186
  if value_expr is None:
860
- raise excs.Error(f'Column {col_name}: value {val!r} is not a recognized literal or expression')
861
- if not col.col_type.matches(value_expr.col_type):
862
- raise excs.Error((
863
- f'Type of value {val!r} ({value_expr.col_type}) is not compatible with the type of column '
864
- f'{col_name} ({col.col_type})'
865
- ))
1187
+ raise excs.Error(
1188
+ f'Column {col_name!r}: value is not a recognized literal or expression: {val!r}'
1189
+ ) from exc
1190
+ if not col.col_type.is_supertype_of(value_expr.col_type, ignore_nullable=True):
1191
+ raise excs.Error(
1192
+ f'Type `{value_expr.col_type}` of value {val!r} is not compatible with the type '
1193
+ f'`{col.col_type}` of column {col_name!r}'
1194
+ ) from exc
866
1195
  update_targets[col] = value_expr
867
1196
 
868
1197
  return update_targets
869
1198
 
1199
+ def recompute_columns(
1200
+ self, col_names: list[str], where: exprs.Expr | None = None, errors_only: bool = False, cascade: bool = True
1201
+ ) -> UpdateStatus:
1202
+ from pixeltable.exprs import CompoundPredicate, SqlElementCache
1203
+ from pixeltable.plan import Planner
1204
+
1205
+ assert self.is_mutable
1206
+ assert all(name in self.cols_by_name for name in col_names)
1207
+ assert len(col_names) > 0
1208
+ assert len(col_names) == 1 or not errors_only
1209
+
1210
+ target_columns = [self.cols_by_name[name] for name in col_names]
1211
+ where_clause: exprs.Expr | None = None
1212
+ if where is not None:
1213
+ self._validate_where_clause(where, error_prefix='`where` argument')
1214
+ where_clause = where
1215
+ if errors_only:
1216
+ errortype_pred = (
1217
+ exprs.ColumnPropertyRef(exprs.ColumnRef(target_columns[0]), exprs.ColumnPropertyRef.Property.ERRORTYPE)
1218
+ != None
1219
+ )
1220
+ where_clause = CompoundPredicate.make_conjunction([where_clause, errortype_pred])
1221
+ plan, updated_cols, recomputed_cols = Planner.create_update_plan(
1222
+ self.path, update_targets={}, recompute_targets=target_columns, where_clause=where_clause, cascade=cascade
1223
+ )
1224
+
1225
+ result = self.propagate_update(
1226
+ plan,
1227
+ where_clause.sql_expr(SqlElementCache()) if where_clause is not None else None,
1228
+ recomputed_cols,
1229
+ base_versions=[],
1230
+ timestamp=time.time(),
1231
+ cascade=cascade,
1232
+ show_progress=True,
1233
+ )
1234
+ result += UpdateStatus(updated_cols=updated_cols)
1235
+ return result
1236
+
870
1237
  def propagate_update(
871
- self, plan: Optional[exec.ExecNode], where_clause: Optional[sql.ColumnElement],
872
- recomputed_view_cols: list[Column], base_versions: list[Optional[int]], conn: sql.engine.Connection,
873
- timestamp: float, cascade: bool, show_progress: bool = True
1238
+ self,
1239
+ plan: exec.ExecNode | None,
1240
+ where_clause: sql.ColumnElement | None,
1241
+ recomputed_view_cols: list[Column],
1242
+ base_versions: list[int | None],
1243
+ timestamp: float,
1244
+ cascade: bool,
1245
+ show_progress: bool = True,
874
1246
  ) -> UpdateStatus:
1247
+ from pixeltable.catalog import Catalog
1248
+ from pixeltable.plan import Planner
1249
+
1250
+ Catalog.get().mark_modified_tvs(self.handle)
875
1251
  result = UpdateStatus()
876
- if plan is not None:
877
- # we're creating a new version
878
- self.version += 1
879
- result.num_rows, result.num_excs, cols_with_excs = \
880
- self.store_tbl.insert_rows(plan, conn, v_min=self.version, show_progress=show_progress)
881
- result.cols_with_excs = [f'{self.name}.{self.cols_by_id[cid].name}' for cid in cols_with_excs]
1252
+ create_new_table_version = plan is not None
1253
+ if create_new_table_version:
1254
+ self.bump_version(timestamp, bump_schema_version=False)
1255
+ cols_with_excs, row_counts = self.store_tbl.insert_rows(
1256
+ plan, v_min=self.version, show_progress=show_progress
1257
+ )
1258
+ result += UpdateStatus(
1259
+ row_count_stats=row_counts.insert_to_update(),
1260
+ cols_with_excs=[f'{self.name}.{self.cols_by_id[cid].name}' for cid in cols_with_excs],
1261
+ )
882
1262
  self.store_tbl.delete_rows(
883
- self.version, base_versions=base_versions, match_on_vmin=True, where_clause=where_clause, conn=conn)
884
- self._update_md(timestamp, conn)
1263
+ self.version, base_versions=base_versions, match_on_vmin=True, where_clause=where_clause
1264
+ )
885
1265
 
886
1266
  if cascade:
887
- base_versions = [None if plan is None else self.version] + base_versions # don't update in place
1267
+ base_versions = [None if plan is None else self.version, *base_versions] # don't update in place
888
1268
  # propagate to views
889
1269
  for view in self.mutable_views:
890
- recomputed_cols = [col for col in recomputed_view_cols if col.tbl is view]
1270
+ recomputed_cols = [col for col in recomputed_view_cols if col.get_tbl().id == view.id]
891
1271
  plan = None
892
1272
  if len(recomputed_cols) > 0:
893
- from pixeltable.plan import Planner
894
- plan = Planner.create_view_update_plan(view.path, recompute_targets=recomputed_cols)
895
- status = view.propagate_update(
896
- plan, None, recomputed_view_cols, base_versions=base_versions, conn=conn, timestamp=timestamp, cascade=True)
897
- result.num_rows += status.num_rows
898
- result.num_excs += status.num_excs
899
- result.cols_with_excs += status.cols_with_excs
900
-
901
- result.cols_with_excs = list(dict.fromkeys(result.cols_with_excs).keys()) # remove duplicates
1273
+ plan = Planner.create_view_update_plan(view.get().path, recompute_targets=recomputed_cols)
1274
+ status = view.get().propagate_update(
1275
+ plan, None, recomputed_view_cols, base_versions=base_versions, timestamp=timestamp, cascade=True
1276
+ )
1277
+ result += status.to_cascade()
1278
+ if create_new_table_version:
1279
+ self.update_status = result
1280
+ self._write_md(new_version=True, new_schema_version=False)
902
1281
  return result
903
1282
 
904
- def delete(self, where: Optional[exprs.Expr] = None) -> UpdateStatus:
905
- """Delete rows in this table.
906
- Args:
907
- where: a predicate to filter rows to delete.
908
- """
909
- assert self.is_insertable()
1283
+ def _validate_where_clause(self, pred: exprs.Expr, error_prefix: str) -> None:
1284
+ """Validates that pred can be expressed as a SQL Where clause"""
1285
+ assert self.is_insertable
910
1286
  from pixeltable.exprs import Expr
911
1287
  from pixeltable.plan import Planner
912
- sql_where_clause: Optional[Expr] = None
913
- if where is not None:
914
- if not isinstance(where, Expr):
915
- raise excs.Error(f"'where' argument must be a predicate, got {type(where)}")
916
- analysis_info = Planner.analyze(self.path, where)
917
- # for now we require that the updated rows can be identified via SQL, rather than via a Python filter
918
- if analysis_info.filter is not None:
919
- raise excs.Error(f'Filter {analysis_info.filter} not expressible in SQL')
920
- sql_where_clause = analysis_info.sql_where_clause
921
1288
 
922
- with Env.get().engine.begin() as conn:
923
- num_rows = self.propagate_delete(sql_where_clause, base_versions=[], conn=conn, timestamp=time.time())
1289
+ if not isinstance(pred, Expr):
1290
+ raise excs.Error(f'{error_prefix} must be a valid Pixeltable expression; got `{type(pred)}`')
1291
+ analysis_info = Planner.analyze(self.path, pred)
1292
+ # for now we require that the updated rows can be identified via SQL, rather than via a Python filter
1293
+ if analysis_info.filter is not None:
1294
+ raise excs.Error(f'Filter not expressible in SQL: {analysis_info.filter}')
924
1295
 
925
- status = UpdateStatus(num_rows=num_rows)
1296
+ def delete(self, where: exprs.Expr | None = None) -> UpdateStatus:
1297
+ assert self.is_insertable
1298
+ if where is not None:
1299
+ self._validate_where_clause(where, error_prefix='`where` argument')
1300
+ status = self.propagate_delete(where, base_versions=[], timestamp=time.time())
926
1301
  return status
927
1302
 
928
1303
  def propagate_delete(
929
- self, where: Optional[exprs.Expr], base_versions: list[Optional[int]],
930
- conn: sql.engine.Connection, timestamp: float) -> int:
931
- """Delete rows in this table and propagate to views.
932
- Args:
933
- where: a predicate to filter rows to delete.
934
- Returns:
935
- number of deleted rows
936
- """
1304
+ self, where: exprs.Expr | None, base_versions: list[int | None], timestamp: float
1305
+ ) -> UpdateStatus:
1306
+ """Delete rows in this table and propagate to views"""
1307
+ from pixeltable.catalog import Catalog
1308
+
1309
+ Catalog.get().mark_modified_tvs(self.handle)
1310
+
1311
+ # print(f'calling sql_expr()')
937
1312
  sql_where_clause = where.sql_expr(exprs.SqlElementCache()) if where is not None else None
938
- num_rows = self.store_tbl.delete_rows(
939
- self.version + 1, base_versions=base_versions, match_on_vmin=False, where_clause=sql_where_clause,
940
- conn=conn)
941
- if num_rows > 0:
1313
+ # #print(f'sql_where_clause={str(sql_where_clause) if sql_where_clause is not None else None}')
1314
+ # sql_cols: list[sql.Column] = []
1315
+ # def collect_cols(col) -> None:
1316
+ # sql_cols.append(col)
1317
+ # sql.sql.visitors.traverse(sql_where_clause, {}, {'column': collect_cols})
1318
+ # x = [f'{str(c)}:{hash(c)}:{id(c.table)}' for c in sql_cols]
1319
+ # print(f'where_clause cols: {x}')
1320
+ del_rows = self.store_tbl.delete_rows(
1321
+ self.version + 1, base_versions=base_versions, match_on_vmin=False, where_clause=sql_where_clause
1322
+ )
1323
+ row_counts = RowCountStats(del_rows=del_rows) # delete
1324
+ result = UpdateStatus(row_count_stats=row_counts)
1325
+ if del_rows > 0:
942
1326
  # we're creating a new version
943
- self.version += 1
944
- self._update_md(timestamp, conn)
945
- else:
946
- pass
1327
+ self.bump_version(timestamp, bump_schema_version=False)
947
1328
  for view in self.mutable_views:
948
- num_rows += view.propagate_delete(
949
- where=None, base_versions=[self.version] + base_versions, conn=conn, timestamp=timestamp)
950
- return num_rows
1329
+ status = view.get().propagate_delete(
1330
+ where=None, base_versions=[self.version, *base_versions], timestamp=timestamp
1331
+ )
1332
+ result += status.to_cascade()
1333
+ self.update_status = result
1334
+
1335
+ if del_rows > 0:
1336
+ self._write_md(new_version=True, new_schema_version=False)
1337
+ return result
951
1338
 
952
1339
  def revert(self) -> None:
953
- """Reverts the table to the previous version.
954
- """
955
- assert not self.is_snapshot
1340
+ """Reverts the table to the previous version."""
1341
+ assert self.is_mutable
956
1342
  if self.version == 0:
957
1343
  raise excs.Error('Cannot revert version 0')
958
- with orm.Session(Env.get().engine, future=True) as session:
959
- self._revert(session)
960
- session.commit()
961
-
962
- def _delete_column(self, col: Column, conn: sql.engine.Connection) -> None:
963
- """Physically remove the column from the schema and the store table"""
964
- if col.is_stored:
965
- self.store_tbl.drop_column(col, conn)
966
- self.cols.remove(col)
967
- if col.name is not None:
968
- del self.cols_by_name[col.name]
969
- del self.cols_by_id[col.id]
970
-
971
- def _revert(self, session: orm.Session) -> None:
972
- """Reverts this table version and propagates to views"""
973
- conn = session.connection()
1344
+ self._revert()
1345
+
1346
+ def _revert(self) -> None:
1347
+ """
1348
+ Reverts the stored metadata for this table version and propagates to views.
1349
+
1350
+ Doesn't attempt to revert the in-memory metadata, but instead invalidates this TableVersion instance
1351
+ and relies on Catalog to reload it
1352
+ """
1353
+ from pixeltable.catalog import Catalog
1354
+
1355
+ conn = Env.get().conn
974
1356
  # make sure we don't have a snapshot referencing this version
975
1357
  # (unclear how to express this with sqlalchemy)
976
1358
  query = (
977
1359
  f"select ts.dir_id, ts.md->'name' "
978
- f"from {schema.Table.__tablename__} ts "
1360
+ f'from {schema.Table.__tablename__} ts '
979
1361
  f"cross join lateral jsonb_path_query(md, '$.view_md.base_versions[*]') as tbl_version "
980
1362
  f"where tbl_version->>0 = '{self.id.hex}' and (tbl_version->>1)::int = {self.version}"
981
1363
  )
982
1364
  result = list(conn.execute(sql.text(query)))
983
1365
  if len(result) > 0:
984
1366
  names = [row[1] for row in result]
985
- raise excs.Error((
986
- f'Current version is needed for {len(result)} snapshot{"s" if len(result) > 1 else ""} '
987
- f'({", ".join(names)})'
988
- ))
1367
+ raise excs.Error(
1368
+ (
1369
+ f'Current version is needed for {len(result)} snapshot{"s" if len(result) > 1 else ""}: '
1370
+ f'({", ".join(names)})'
1371
+ )
1372
+ )
989
1373
 
990
- conn = session.connection()
991
- # delete newly-added data
992
- MediaStore.delete(self.id, version=self.version)
993
1374
  conn.execute(sql.delete(self.store_tbl.sa_tbl).where(self.store_tbl.sa_tbl.c.v_min == self.version))
994
1375
 
995
1376
  # revert new deletions
996
1377
  set_clause: dict[sql.Column, Any] = {self.store_tbl.sa_tbl.c.v_max: schema.Table.MAX_VERSION}
997
- for index_info in self.idxs_by_name.values():
1378
+ for index_info in self.idxs.values():
998
1379
  # copy the index value back from the undo column and reset the undo column to NULL
999
1380
  set_clause[index_info.val_col.sa_col] = index_info.undo_col.sa_col
1000
1381
  set_clause[index_info.undo_col.sa_col] = None
1001
- stmt = sql.update(self.store_tbl.sa_tbl) \
1002
- .values(set_clause) \
1003
- .where(self.store_tbl.sa_tbl.c.v_max == self.version)
1382
+ stmt = sql.update(self.store_tbl.sa_tbl).values(set_clause).where(self.store_tbl.sa_tbl.c.v_max == self.version)
1004
1383
  conn.execute(stmt)
1005
1384
 
1006
- # revert schema changes
1385
+ # revert schema changes:
1386
+ # - undo changes to self._tbl_md and write that back
1387
+ # - delete newly-added TableVersion/TableSchemaVersion records
1388
+ Catalog.get().mark_modified_tvs(self.handle)
1389
+ old_version = self.version
1007
1390
  if self.version == self.schema_version:
1008
- # delete newly-added columns
1391
+ # physically delete newly-added columns and remove them from the stored md
1009
1392
  added_cols = [col for col in self.cols if col.schema_version_add == self.schema_version]
1010
1393
  if len(added_cols) > 0:
1011
- next_col_id = min(col.id for col in added_cols)
1394
+ self._tbl_md.next_col_id = min(col.id for col in added_cols)
1012
1395
  for col in added_cols:
1013
- self._delete_column(col, conn)
1014
- self.next_col_id = next_col_id
1396
+ if col.is_stored:
1397
+ self.store_tbl.drop_column(col)
1398
+ del self._tbl_md.column_md[col.id]
1015
1399
 
1016
1400
  # remove newly-added indices from the lookup structures
1017
1401
  # (the value and undo columns got removed in the preceding step)
1018
- added_idx_md = [md for md in self.idx_md.values() if md.schema_version_add == self.schema_version]
1402
+ added_idx_md = [md for md in self._tbl_md.index_md.values() if md.schema_version_add == self.schema_version]
1019
1403
  if len(added_idx_md) > 0:
1020
- next_idx_id = min(md.id for md in added_idx_md)
1404
+ self._tbl_md.next_idx_id = min(md.id for md in added_idx_md)
1021
1405
  for md in added_idx_md:
1022
- del self.idx_md[md.id]
1023
- del self.idxs_by_name[md.name]
1024
- self.next_idx_id = next_idx_id
1406
+ # TODO: drop the index
1407
+ del self._tbl_md.index_md[md.id]
1025
1408
 
1026
1409
  # make newly-dropped columns visible again
1027
- dropped_cols = [col for col in self.cols if col.schema_version_drop == self.schema_version]
1028
- for col in dropped_cols:
1029
- col.schema_version_drop = None
1410
+ dropped_col_md = [
1411
+ md for md in self._tbl_md.column_md.values() if md.schema_version_drop == self.schema_version
1412
+ ]
1413
+ for col_md in dropped_col_md:
1414
+ col_md.schema_version_drop = None
1030
1415
 
1031
1416
  # make newly-dropped indices visible again
1032
- dropped_idx_md = [md for md in self.idx_md.values() if md.schema_version_drop == self.schema_version]
1033
- for md in dropped_idx_md:
1034
- md.schema_version_drop = None
1035
-
1036
- # we need to determine the preceding schema version and reload the schema
1037
- schema_version_md_dict = session.query(schema.TableSchemaVersion.md) \
1038
- .where(schema.TableSchemaVersion.tbl_id == self.id) \
1039
- .where(schema.TableSchemaVersion.schema_version == self.schema_version) \
1040
- .scalar()
1041
- preceding_schema_version = schema_version_md_dict['preceding_schema_version']
1042
- preceding_schema_version_md_dict = session.query(schema.TableSchemaVersion.md) \
1043
- .where(schema.TableSchemaVersion.tbl_id == self.id) \
1044
- .where(schema.TableSchemaVersion.schema_version == preceding_schema_version) \
1045
- .scalar()
1046
- preceding_schema_version_md = schema.md_from_dict(
1047
- schema.TableSchemaVersionMd, preceding_schema_version_md_dict)
1048
- tbl_md = self._create_tbl_md()
1049
- self._init_schema(tbl_md, preceding_schema_version_md)
1417
+ dropped_idx_md = [
1418
+ md for md in self._tbl_md.index_md.values() if md.schema_version_drop == self.schema_version
1419
+ ]
1420
+ for idx_md in dropped_idx_md:
1421
+ idx_md.schema_version_drop = None
1050
1422
 
1051
1423
  conn.execute(
1052
1424
  sql.delete(schema.TableSchemaVersion.__table__)
1053
- .where(schema.TableSchemaVersion.tbl_id == self.id)
1054
- .where(schema.TableSchemaVersion.schema_version == self.schema_version))
1055
- self.schema_version = preceding_schema_version
1056
- self.comment = preceding_schema_version_md.comment
1057
- self.num_retained_versions = preceding_schema_version_md.num_retained_versions
1425
+ .where(schema.TableSchemaVersion.tbl_id == self.id)
1426
+ .where(schema.TableSchemaVersion.schema_version == self.schema_version)
1427
+ )
1428
+ self._tbl_md.current_schema_version = self._schema_version_md.preceding_schema_version
1058
1429
 
1059
1430
  conn.execute(
1060
1431
  sql.delete(schema.TableVersion.__table__)
1061
- .where(schema.TableVersion.tbl_id == self.id)
1062
- .where(schema.TableVersion.version == self.version)
1432
+ .where(schema.TableVersion.tbl_id == self.id)
1433
+ .where(schema.TableVersion.version == self.version)
1063
1434
  )
1064
- self.version -= 1
1065
- conn.execute(
1066
- sql.update(schema.Table.__table__)
1067
- .values({schema.Table.md: dataclasses.asdict(self._create_tbl_md())})
1068
- .where(schema.Table.id == self.id))
1435
+
1436
+ self._tbl_md.current_version = self._version_md.version = self.version - 1
1437
+
1438
+ self._write_md(new_version=False, new_schema_version=False)
1069
1439
 
1070
1440
  # propagate to views
1071
1441
  for view in self.mutable_views:
1072
- view._revert(session)
1073
- _logger.info(f'TableVersion {self.name}: reverted to version {self.version}')
1442
+ view.get()._revert()
1443
+
1444
+ # force reload on next operation
1445
+ self.is_validated = False
1446
+ Catalog.get().remove_tbl_version(self.key)
1074
1447
 
1075
- def _init_external_stores(self, tbl_md: schema.TableMd) -> None:
1076
- for store_md in tbl_md.external_stores:
1448
+ # delete newly-added data
1449
+ # Do this at the end, after all DB operations have completed.
1450
+ # TODO: The transaction could still fail. Really this should be done via PendingTableOps.
1451
+ self.delete_media(tbl_version=old_version)
1452
+ _logger.info(f'TableVersion {self.name!r}: reverted to version {self.version}')
1453
+
1454
+ def _init_external_stores(self) -> None:
1455
+ from pixeltable.io.external_store import ExternalStore
1456
+
1457
+ for store_md in self.tbl_md.external_stores:
1077
1458
  store_cls = resolve_symbol(store_md['class'])
1078
- assert isinstance(store_cls, type) and issubclass(store_cls, pxt.io.ExternalStore)
1459
+ assert isinstance(store_cls, type) and issubclass(store_cls, ExternalStore)
1079
1460
  store = store_cls.from_dict(store_md['md'])
1080
1461
  self.external_stores[store.name] = store
1081
1462
 
1082
- def link_external_store(self, store: pxt.io.ExternalStore) -> None:
1083
- with Env.get().engine.begin() as conn:
1084
- store.link(self, conn) # May result in additional metadata changes
1085
- self.external_stores[store.name] = store
1086
- self._update_md(time.time(), conn, update_tbl_version=False)
1463
+ def link_external_store(self, store: ExternalStore) -> None:
1464
+ self.bump_version(bump_schema_version=True)
1087
1465
 
1088
- def unlink_external_store(self, store_name: str, delete_external_data: bool) -> None:
1089
- assert store_name in self.external_stores
1090
- store = self.external_stores[store_name]
1091
- with Env.get().engine.begin() as conn:
1092
- store.unlink(self, conn) # May result in additional metadata changes
1093
- del self.external_stores[store_name]
1094
- self._update_md(time.time(), conn, update_tbl_version=False)
1466
+ self.external_stores[store.name] = store
1467
+ self._tbl_md.external_stores.append(
1468
+ {'class': f'{type(store).__module__}.{type(store).__qualname__}', 'md': store.as_dict()}
1469
+ )
1470
+ self._write_md(new_version=True, new_schema_version=True)
1471
+
1472
+ def unlink_external_store(self, store: ExternalStore) -> None:
1473
+ del self.external_stores[store.name]
1474
+ self.bump_version(bump_schema_version=True)
1475
+ idx = next(i for i, store_md in enumerate(self._tbl_md.external_stores) if store_md['md']['name'] == store.name)
1476
+ self._tbl_md.external_stores.pop(idx)
1477
+ self._write_md(new_version=True, new_schema_version=True)
1478
+
1479
+ @property
1480
+ def id(self) -> UUID:
1481
+ return self.key.tbl_id
1482
+
1483
+ @property
1484
+ def effective_version(self) -> int | None:
1485
+ return self.key.effective_version
1486
+
1487
+ @property
1488
+ def anchor_tbl_id(self) -> UUID | None:
1489
+ return self.key.anchor_tbl_id
1490
+
1491
+ @property
1492
+ def tbl_md(self) -> schema.TableMd:
1493
+ return self._tbl_md
1494
+
1495
+ @property
1496
+ def version_md(self) -> schema.VersionMd:
1497
+ return self._version_md
1498
+
1499
+ @property
1500
+ def schema_version_md(self) -> schema.SchemaVersionMd:
1501
+ return self._schema_version_md
1502
+
1503
+ @property
1504
+ def view_md(self) -> schema.ViewMd | None:
1505
+ return self._tbl_md.view_md
1506
+
1507
+ @property
1508
+ def name(self) -> str:
1509
+ return self._tbl_md.name
1510
+
1511
+ @property
1512
+ def user(self) -> str | None:
1513
+ return self._tbl_md.user
1514
+
1515
+ @property
1516
+ def is_replica(self) -> bool:
1517
+ return self._tbl_md.is_replica
1518
+
1519
+ @property
1520
+ def comment(self) -> str:
1521
+ return self._schema_version_md.comment
1522
+
1523
+ @comment.setter
1524
+ def comment(self, c: str) -> None:
1525
+ assert self.effective_version is None
1526
+ self._schema_version_md.comment = c
1527
+
1528
+ @property
1529
+ def num_retained_versions(self) -> int:
1530
+ return self._schema_version_md.num_retained_versions
1531
+
1532
+ @num_retained_versions.setter
1533
+ def num_retained_versions(self, n: int) -> None:
1534
+ assert self.effective_version is None
1535
+ self._schema_version_md.num_retained_versions = n
1536
+
1537
+ @property
1538
+ def version(self) -> int:
1539
+ return self._version_md.version
1540
+
1541
+ @property
1542
+ def created_at(self) -> float:
1543
+ return self._version_md.created_at
1544
+
1545
+ @property
1546
+ def schema_version(self) -> int:
1547
+ return self._schema_version_md.schema_version
1548
+
1549
+ def bump_version(self, timestamp: float | None = None, *, bump_schema_version: bool) -> None:
1550
+ """
1551
+ Increments the table version and adjusts all associated metadata. This will *not* trigger a database action;
1552
+ _write_md() must be called separately to persist the changes.
1095
1553
 
1096
- if delete_external_data and isinstance(store, pxt.io.external_store.Project):
1097
- store.delete()
1554
+ Args:
1555
+ timestamp: the creation time for the new version. Can be used to synchronize multiple metadata changes
1556
+ to the same timestamp. If `None`, then defaults to `time.time()`.
1557
+ bump_schema_version: if True, also adjusts the schema version (setting it equal to the new version)
1558
+ and associated metadata.
1559
+ """
1560
+ from pixeltable.catalog import Catalog
1098
1561
 
1562
+ assert self.effective_version is None
1563
+
1564
+ if timestamp is None:
1565
+ timestamp = time.time()
1566
+
1567
+ Catalog.get().mark_modified_tvs(self.handle)
1568
+
1569
+ old_version = self._tbl_md.current_version
1570
+ assert self._version_md.version == old_version
1571
+ new_version = old_version + 1
1572
+ self._tbl_md.current_version = new_version
1573
+ self._version_md.version = new_version
1574
+ self._version_md.created_at = timestamp
1575
+
1576
+ if bump_schema_version:
1577
+ old_schema_version = self._tbl_md.current_schema_version
1578
+ assert self._version_md.schema_version == old_schema_version
1579
+ assert self._schema_version_md.schema_version == old_schema_version
1580
+ self._tbl_md.current_schema_version = new_version
1581
+ self._version_md.schema_version = new_version
1582
+ self._schema_version_md.preceding_schema_version = old_schema_version
1583
+ self._schema_version_md.schema_version = new_version
1584
+
1585
+ @property
1586
+ def preceding_schema_version(self) -> int | None:
1587
+ return self._schema_version_md.preceding_schema_version
1588
+
1589
+ @property
1590
+ def update_status(self) -> UpdateStatus | None:
1591
+ return self._version_md.update_status
1592
+
1593
+ @update_status.setter
1594
+ def update_status(self, status: UpdateStatus) -> None:
1595
+ assert self.effective_version is None
1596
+ self._version_md.update_status = status
1597
+
1598
+ @property
1599
+ def media_validation(self) -> MediaValidation:
1600
+ return MediaValidation[self._schema_version_md.media_validation.upper()]
1601
+
1602
+ def next_col_id(self) -> int:
1603
+ val = self._tbl_md.next_col_id
1604
+ self._tbl_md.next_col_id += 1
1605
+ return val
1606
+
1607
+ @property
1608
+ def next_idx_id(self) -> int:
1609
+ return self._tbl_md.next_idx_id
1610
+
1611
+ @next_idx_id.setter
1612
+ def next_idx_id(self, id: int) -> None:
1613
+ assert self.effective_version is None
1614
+ self._tbl_md.next_idx_id = id
1615
+
1616
+ @property
1617
+ def next_row_id(self) -> int:
1618
+ return self._tbl_md.next_row_id
1619
+
1620
+ @next_row_id.setter
1621
+ def next_row_id(self, id: int) -> None:
1622
+ assert self.effective_version is None
1623
+ self._tbl_md.next_row_id = id
1624
+
1625
+ @property
1626
+ def is_snapshot(self) -> bool:
1627
+ return self.effective_version is not None
1628
+
1629
+ @property
1630
+ def is_mutable(self) -> bool:
1631
+ return not self.is_snapshot and not self.is_replica
1632
+
1633
+ @property
1099
1634
  def is_view(self) -> bool:
1100
- return self.base is not None
1635
+ return self.view_md is not None
1101
1636
 
1637
+ @property
1638
+ def include_base_columns(self) -> bool:
1639
+ return self.view_md is not None and self.view_md.include_base_columns
1640
+
1641
+ @property
1102
1642
  def is_component_view(self) -> bool:
1103
1643
  return self.iterator_cls is not None
1104
1644
 
1645
+ @property
1105
1646
  def is_insertable(self) -> bool:
1106
1647
  """Returns True if this corresponds to an InsertableTable"""
1107
- return not self.is_snapshot and not self.is_view()
1648
+ return self.is_mutable and not self.is_view
1108
1649
 
1109
1650
  def is_iterator_column(self, col: Column) -> bool:
1110
1651
  """Returns True if col is produced by an iterator"""
1111
1652
  # the iterator columns directly follow the pos column
1112
- return self.is_component_view() and col.id > 0 and col.id < self.num_iterator_cols + 1
1653
+ return self.is_component_view and col.id > 0 and col.id < self.num_iterator_cols + 1
1113
1654
 
1114
1655
  def is_system_column(self, col: Column) -> bool:
1115
1656
  """Return True if column was created by Pixeltable"""
1116
- if col.name == _POS_COLUMN_NAME and self.is_component_view():
1117
- return True
1118
- return False
1657
+ return col.name == _POS_COLUMN_NAME and self.is_component_view
1658
+
1659
+ def iterator_columns(self) -> list[Column]:
1660
+ """Return all iterator-produced columns"""
1661
+ return self.cols[1 : self.num_iterator_cols + 1]
1119
1662
 
1120
1663
  def user_columns(self) -> list[Column]:
1121
1664
  """Return all non-system columns"""
@@ -1125,9 +1668,14 @@ class TableVersion:
1125
1668
  """Return all non-system columns"""
1126
1669
  return [c for c in self.cols if c.is_pk]
1127
1670
 
1671
+ @property
1672
+ def primary_key(self) -> list[str]:
1673
+ """Return the names of the primary key columns"""
1674
+ return [c.name for c in self.cols if c.is_pk]
1675
+
1128
1676
  def get_required_col_names(self) -> list[str]:
1129
1677
  """Return the names of all columns for which values must be specified in insert()"""
1130
- assert not self.is_view()
1678
+ assert not self.is_view
1131
1679
  names = [c.name for c in self.cols_by_name.values() if not c.is_computed and not c.col_type.nullable]
1132
1680
  return names
1133
1681
 
@@ -1136,87 +1684,58 @@ class TableVersion:
1136
1684
  names = [c.name for c in self.cols_by_name.values() if c.is_computed]
1137
1685
  return names
1138
1686
 
1139
- def _record_refd_columns(self, col: Column) -> None:
1140
- """Update Column.dependent_cols for all cols referenced in col.value_expr.
1141
- """
1142
- import pixeltable.exprs as exprs
1143
- if col.value_expr_dict is not None:
1144
- # if we have a value_expr_dict, use that instead of instantiating the value_expr
1145
- refd_cols = exprs.Expr.get_refd_columns(col.value_expr_dict)
1146
- else:
1147
- refd_cols = [e.col for e in col.value_expr.subexprs(expr_class=exprs.ColumnRef)]
1148
- for refd_col in refd_cols:
1149
- refd_col.dependent_cols.add(col)
1150
-
1151
1687
  def get_idx_val_columns(self, cols: Iterable[Column]) -> set[Column]:
1152
- result = {info.val_col for col in cols for info in col.get_idx_info().values()}
1153
- return result
1688
+ # assumes that the indexed columns are all in this table
1689
+ assert all(col.get_tbl().id == self.id for col in cols)
1690
+ col_ids = {col.id for col in cols}
1691
+ return {info.val_col for info in self.idxs.values() if info.col.id in col_ids}
1692
+
1693
+ def get_idx(self, col: Column, idx_name: str | None, idx_cls: type[index.IndexBase]) -> TableVersion.IndexInfo:
1694
+ if not self.supports_idxs:
1695
+ raise excs.Error('Snapshot does not support indices')
1696
+ if col.qid not in self.idxs_by_col:
1697
+ raise excs.Error(f'Column {col.name!r} does not have a {idx_cls.display_name()} index')
1698
+ candidates = [info for info in self.idxs_by_col[col.qid] if isinstance(info.idx, idx_cls)]
1699
+ if len(candidates) == 0:
1700
+ raise excs.Error(f'No {idx_cls.display_name()} index found for column {col.name!r}')
1701
+ if len(candidates) > 1 and idx_name is None:
1702
+ raise excs.Error(
1703
+ f'Column {col.name!r} has multiple {idx_cls.display_name()} indices; specify `idx_name` instead'
1704
+ )
1705
+ if idx_name is not None and idx_name not in [info.name for info in candidates]:
1706
+ raise excs.Error(f'Index {idx_name!r} not found for column {col.name!r}')
1707
+ return candidates[0] if idx_name is None else next(info for info in candidates if info.name == idx_name)
1154
1708
 
1155
1709
  def get_dependent_columns(self, cols: Iterable[Column]) -> set[Column]:
1156
1710
  """
1157
1711
  Return the set of columns that transitively depend on any of the given ones.
1158
1712
  """
1159
- result = {dependent_col for col in cols for dependent_col in col.dependent_cols}
1713
+ from pixeltable.catalog import Catalog
1714
+
1715
+ cat = Catalog.get()
1716
+ result = set().union(*[cat.get_column_dependents(col.get_tbl().id, col.id) for col in cols])
1160
1717
  if len(result) > 0:
1161
1718
  result.update(self.get_dependent_columns(result))
1162
1719
  return result
1163
1720
 
1164
1721
  def num_rowid_columns(self) -> int:
1165
1722
  """Return the number of columns of the rowids, without accessing store_tbl"""
1166
- if self.is_component_view():
1167
- return 1 + self.base.num_rowid_columns()
1723
+ if self.is_component_view:
1724
+ return 1 + self.base.get().num_rowid_columns()
1168
1725
  return 1
1169
1726
 
1170
1727
  @classmethod
1171
- def _create_column_md(cls, cols: list[Column]) -> dict[int, schema.ColumnMd]:
1172
- column_md: dict[int, schema.ColumnMd] = {}
1173
- for col in cols:
1174
- value_expr_dict = col.value_expr.as_dict() if col.value_expr is not None else None
1175
- column_md[col.id] = schema.ColumnMd(
1176
- id=col.id, col_type=col.col_type.as_dict(), is_pk=col.is_pk,
1177
- schema_version_add=col.schema_version_add, schema_version_drop=col.schema_version_drop,
1178
- value_expr=value_expr_dict, stored=col.stored)
1179
- return column_md
1180
-
1181
- @classmethod
1182
- def _create_stores_md(cls, stores: Iterable[pxt.io.ExternalStore]) -> list[dict[str, Any]]:
1728
+ def _create_stores_md(cls, stores: Iterable[ExternalStore]) -> list[dict[str, Any]]:
1183
1729
  return [
1184
- {
1185
- 'class': f'{type(store).__module__}.{type(store).__qualname__}',
1186
- 'md': store.as_dict()
1187
- }
1188
- for store in stores
1730
+ {'class': f'{type(store).__module__}.{type(store).__qualname__}', 'md': store.as_dict()} for store in stores
1189
1731
  ]
1190
1732
 
1191
- def _create_tbl_md(self) -> schema.TableMd:
1192
- return schema.TableMd(
1193
- name=self.name, current_version=self.version, current_schema_version=self.schema_version,
1194
- next_col_id=self.next_col_id, next_idx_id=self.next_idx_id, next_row_id=self.next_rowid,
1195
- column_md=self._create_column_md(self.cols), index_md=self.idx_md,
1196
- external_stores=self._create_stores_md(self.external_stores.values()), view_md=self.view_md,
1197
- )
1198
-
1199
- def _create_version_md(self, timestamp: float) -> schema.TableVersionMd:
1200
- return schema.TableVersionMd(created_at=timestamp, version=self.version, schema_version=self.schema_version)
1201
-
1202
- def _create_schema_version_md(self, preceding_schema_version: int) -> schema.TableSchemaVersionMd:
1203
- column_md: dict[int, schema.SchemaColumn] = {}
1204
- for pos, col in enumerate(self.cols_by_name.values()):
1205
- column_md[col.id] = schema.SchemaColumn(
1206
- pos=pos, name=col.name,
1207
- media_validation=col._media_validation.name.lower() if col._media_validation is not None else None)
1208
- # preceding_schema_version to be set by the caller
1209
- return schema.TableSchemaVersionMd(
1210
- schema_version=self.schema_version, preceding_schema_version=preceding_schema_version,
1211
- columns=column_md, num_retained_versions=self.num_retained_versions, comment=self.comment,
1212
- media_validation=self.media_validation.name.lower())
1213
-
1214
1733
  def as_dict(self) -> dict:
1215
- return {'id': str(self.id), 'effective_version': self.effective_version}
1734
+ return self.key.as_dict()
1216
1735
 
1217
1736
  @classmethod
1218
1737
  def from_dict(cls, d: dict) -> TableVersion:
1219
- import pixeltable.catalog as catalog
1220
- id = UUID(d['id'])
1221
- effective_version = d['effective_version']
1222
- return catalog.Catalog.get().tbl_versions[(id, effective_version)]
1738
+ from pixeltable.catalog import Catalog
1739
+
1740
+ key = TableVersionKey.from_dict(d)
1741
+ return Catalog.get().get_tbl_version(key)