pixeltable 0.0.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of pixeltable might be problematic. Click here for more details.

Files changed (119) hide show
  1. pixeltable/__init__.py +53 -0
  2. pixeltable/__version__.py +3 -0
  3. pixeltable/catalog/__init__.py +13 -0
  4. pixeltable/catalog/catalog.py +159 -0
  5. pixeltable/catalog/column.py +181 -0
  6. pixeltable/catalog/dir.py +32 -0
  7. pixeltable/catalog/globals.py +33 -0
  8. pixeltable/catalog/insertable_table.py +192 -0
  9. pixeltable/catalog/named_function.py +36 -0
  10. pixeltable/catalog/path.py +58 -0
  11. pixeltable/catalog/path_dict.py +139 -0
  12. pixeltable/catalog/schema_object.py +39 -0
  13. pixeltable/catalog/table.py +695 -0
  14. pixeltable/catalog/table_version.py +1026 -0
  15. pixeltable/catalog/table_version_path.py +133 -0
  16. pixeltable/catalog/view.py +203 -0
  17. pixeltable/dataframe.py +749 -0
  18. pixeltable/env.py +466 -0
  19. pixeltable/exceptions.py +17 -0
  20. pixeltable/exec/__init__.py +10 -0
  21. pixeltable/exec/aggregation_node.py +78 -0
  22. pixeltable/exec/cache_prefetch_node.py +116 -0
  23. pixeltable/exec/component_iteration_node.py +79 -0
  24. pixeltable/exec/data_row_batch.py +94 -0
  25. pixeltable/exec/exec_context.py +22 -0
  26. pixeltable/exec/exec_node.py +61 -0
  27. pixeltable/exec/expr_eval_node.py +217 -0
  28. pixeltable/exec/in_memory_data_node.py +73 -0
  29. pixeltable/exec/media_validation_node.py +43 -0
  30. pixeltable/exec/sql_scan_node.py +226 -0
  31. pixeltable/exprs/__init__.py +25 -0
  32. pixeltable/exprs/arithmetic_expr.py +102 -0
  33. pixeltable/exprs/array_slice.py +71 -0
  34. pixeltable/exprs/column_property_ref.py +77 -0
  35. pixeltable/exprs/column_ref.py +114 -0
  36. pixeltable/exprs/comparison.py +77 -0
  37. pixeltable/exprs/compound_predicate.py +98 -0
  38. pixeltable/exprs/data_row.py +199 -0
  39. pixeltable/exprs/expr.py +594 -0
  40. pixeltable/exprs/expr_set.py +39 -0
  41. pixeltable/exprs/function_call.py +382 -0
  42. pixeltable/exprs/globals.py +69 -0
  43. pixeltable/exprs/image_member_access.py +96 -0
  44. pixeltable/exprs/in_predicate.py +96 -0
  45. pixeltable/exprs/inline_array.py +109 -0
  46. pixeltable/exprs/inline_dict.py +103 -0
  47. pixeltable/exprs/is_null.py +38 -0
  48. pixeltable/exprs/json_mapper.py +121 -0
  49. pixeltable/exprs/json_path.py +159 -0
  50. pixeltable/exprs/literal.py +66 -0
  51. pixeltable/exprs/object_ref.py +41 -0
  52. pixeltable/exprs/predicate.py +44 -0
  53. pixeltable/exprs/row_builder.py +329 -0
  54. pixeltable/exprs/rowid_ref.py +94 -0
  55. pixeltable/exprs/similarity_expr.py +65 -0
  56. pixeltable/exprs/type_cast.py +53 -0
  57. pixeltable/exprs/variable.py +45 -0
  58. pixeltable/ext/__init__.py +5 -0
  59. pixeltable/ext/functions/yolox.py +92 -0
  60. pixeltable/func/__init__.py +7 -0
  61. pixeltable/func/aggregate_function.py +197 -0
  62. pixeltable/func/callable_function.py +113 -0
  63. pixeltable/func/expr_template_function.py +99 -0
  64. pixeltable/func/function.py +141 -0
  65. pixeltable/func/function_registry.py +227 -0
  66. pixeltable/func/globals.py +46 -0
  67. pixeltable/func/nos_function.py +202 -0
  68. pixeltable/func/signature.py +162 -0
  69. pixeltable/func/udf.py +164 -0
  70. pixeltable/functions/__init__.py +95 -0
  71. pixeltable/functions/eval.py +215 -0
  72. pixeltable/functions/fireworks.py +34 -0
  73. pixeltable/functions/huggingface.py +167 -0
  74. pixeltable/functions/image.py +16 -0
  75. pixeltable/functions/openai.py +289 -0
  76. pixeltable/functions/pil/image.py +147 -0
  77. pixeltable/functions/string.py +13 -0
  78. pixeltable/functions/together.py +143 -0
  79. pixeltable/functions/util.py +52 -0
  80. pixeltable/functions/video.py +62 -0
  81. pixeltable/globals.py +425 -0
  82. pixeltable/index/__init__.py +2 -0
  83. pixeltable/index/base.py +51 -0
  84. pixeltable/index/embedding_index.py +168 -0
  85. pixeltable/io/__init__.py +3 -0
  86. pixeltable/io/hf_datasets.py +188 -0
  87. pixeltable/io/pandas.py +148 -0
  88. pixeltable/io/parquet.py +192 -0
  89. pixeltable/iterators/__init__.py +3 -0
  90. pixeltable/iterators/base.py +52 -0
  91. pixeltable/iterators/document.py +432 -0
  92. pixeltable/iterators/video.py +88 -0
  93. pixeltable/metadata/__init__.py +58 -0
  94. pixeltable/metadata/converters/convert_10.py +18 -0
  95. pixeltable/metadata/converters/convert_12.py +3 -0
  96. pixeltable/metadata/converters/convert_13.py +41 -0
  97. pixeltable/metadata/schema.py +234 -0
  98. pixeltable/plan.py +620 -0
  99. pixeltable/store.py +424 -0
  100. pixeltable/tool/create_test_db_dump.py +184 -0
  101. pixeltable/tool/create_test_video.py +81 -0
  102. pixeltable/type_system.py +846 -0
  103. pixeltable/utils/__init__.py +17 -0
  104. pixeltable/utils/arrow.py +98 -0
  105. pixeltable/utils/clip.py +18 -0
  106. pixeltable/utils/coco.py +136 -0
  107. pixeltable/utils/documents.py +69 -0
  108. pixeltable/utils/filecache.py +195 -0
  109. pixeltable/utils/help.py +11 -0
  110. pixeltable/utils/http_server.py +70 -0
  111. pixeltable/utils/media_store.py +76 -0
  112. pixeltable/utils/pytorch.py +91 -0
  113. pixeltable/utils/s3.py +13 -0
  114. pixeltable/utils/sql.py +17 -0
  115. pixeltable/utils/transactional_directory.py +35 -0
  116. pixeltable-0.0.0.dist-info/LICENSE +18 -0
  117. pixeltable-0.0.0.dist-info/METADATA +131 -0
  118. pixeltable-0.0.0.dist-info/RECORD +119 -0
  119. pixeltable-0.0.0.dist-info/WHEEL +4 -0
@@ -0,0 +1,1026 @@
1
+ from __future__ import annotations
2
+
3
+ import dataclasses
4
+ import importlib
5
+ import inspect
6
+ import logging
7
+ import time
8
+ from typing import Optional, List, Dict, Any, Tuple, Type, Set
9
+ from uuid import UUID
10
+
11
+ import sqlalchemy as sql
12
+ import sqlalchemy.orm as orm
13
+
14
+ import pixeltable
15
+ import pixeltable.func as func
16
+ import pixeltable.type_system as ts
17
+ import pixeltable.exceptions as excs
18
+ import pixeltable.index as index
19
+ from pixeltable.env import Env
20
+ from pixeltable.iterators import ComponentIterator
21
+ from pixeltable.metadata import schema
22
+ from pixeltable.utils.filecache import FileCache
23
+ from pixeltable.utils.media_store import MediaStore
24
+ from .column import Column
25
+ from .globals import UpdateStatus, POS_COLUMN_NAME, is_valid_identifier
26
+
27
+ _logger = logging.getLogger('pixeltable')
28
+
29
+ class TableVersion:
30
+ """
31
+ TableVersion represents a particular version of a table/view along with its physical representation:
32
+ - the physical representation is a store table with indices
33
+ - the version can be mutable or a snapshot
34
+ - tables and their recursive views form a tree, and a mutable TableVersion also records its own
35
+ mutable views in order to propagate updates
36
+ - each view TableVersion records its base:
37
+ * the base is correct only for mutable views (snapshot versions form a DAG, not a tree)
38
+ * the base is useful for getting access to the StoreTable and the base id
39
+ * TODO: create a separate hierarchy of objects that records the version-independent tree of tables/views, and
40
+ have TableVersions reference those
41
+ - mutable TableVersions record their TableVersionPath, which is needed for expr evaluation in updates
42
+ """
43
+ @dataclasses.dataclass
44
+ class IndexInfo:
45
+ id: int
46
+ name: str
47
+ idx: index.IndexBase
48
+ col: Column
49
+ val_col: Column
50
+ undo_col: Column
51
+
52
+
53
+ def __init__(
54
+ self, id: UUID, tbl_md: schema.TableMd, version: int, schema_version_md: schema.TableSchemaVersionMd,
55
+ base: Optional[TableVersion] = None, base_path: Optional['pixeltable.catalog.TableVersionPath'] = None,
56
+ is_snapshot: Optional[bool] = None
57
+ ):
58
+ # only one of base and base_path can be non-None
59
+ assert base is None or base_path is None
60
+ self.id = id
61
+ self.name = tbl_md.name
62
+ self.version = version
63
+ self.comment = schema_version_md.comment
64
+ self.num_retained_versions = schema_version_md.num_retained_versions
65
+ self.schema_version = schema_version_md.schema_version
66
+ self.view_md = tbl_md.view_md # save this as-is, it's needed for _create_md()
67
+ is_view = tbl_md.view_md is not None
68
+ self.is_snapshot = (is_view and tbl_md.view_md.is_snapshot) or bool(is_snapshot)
69
+ # a mutable TableVersion doesn't have a static version
70
+ self.effective_version = self.version if self.is_snapshot else None
71
+
72
+ # mutable tables need their TableVersionPath for expr eval during updates
73
+ from .table_version_path import TableVersionPath
74
+ if self.is_snapshot:
75
+ self.path = None
76
+ else:
77
+ self.path = TableVersionPath(self, base=base_path) if base_path is not None else TableVersionPath(self)
78
+
79
+ self.base = base_path.tbl_version if base_path is not None else base
80
+ if self.is_snapshot:
81
+ self.next_col_id = -1
82
+ self.next_idx_id = -1 # TODO: can snapshots have separate indices?
83
+ self.next_rowid = -1
84
+ else:
85
+ assert tbl_md.current_version == self.version
86
+ self.next_col_id = tbl_md.next_col_id
87
+ self.next_idx_id = tbl_md.next_idx_id
88
+ self.next_rowid = tbl_md.next_row_id
89
+
90
+ # view-specific initialization
91
+ from pixeltable import exprs
92
+ predicate_dict = None if not is_view or tbl_md.view_md.predicate is None else tbl_md.view_md.predicate
93
+ self.predicate = exprs.Expr.from_dict(predicate_dict) if predicate_dict is not None else None
94
+ self.mutable_views: List[TableVersion] = [] # targets for update propagation
95
+ if self.base is not None and not self.base.is_snapshot and not self.is_snapshot:
96
+ self.base.mutable_views.append(self)
97
+
98
+ # component view-specific initialization
99
+ self.iterator_cls: Optional[Type[ComponentIterator]] = None
100
+ self.iterator_args: Optional[exprs.InlineDict] = None
101
+ self.num_iterator_cols = 0
102
+ if is_view and tbl_md.view_md.iterator_class_fqn is not None:
103
+ module_name, class_name = tbl_md.view_md.iterator_class_fqn.rsplit('.', 1)
104
+ module = importlib.import_module(module_name)
105
+ self.iterator_cls = getattr(module, class_name)
106
+ self.iterator_args = exprs.Expr.from_dict(tbl_md.view_md.iterator_args)
107
+ assert isinstance(self.iterator_args, exprs.InlineDict)
108
+ output_schema, _ = self.iterator_cls.output_schema(**self.iterator_args.to_dict())
109
+ self.num_iterator_cols = len(output_schema)
110
+ assert tbl_md.view_md.iterator_args is not None
111
+
112
+ # register this table version now so that it's available when we're re-creating value exprs
113
+ import pixeltable.catalog as catalog
114
+ cat = catalog.Catalog.get()
115
+ cat.tbl_versions[(self.id, self.effective_version)] = self
116
+
117
+ # init schema after we determined whether we're a component view, and before we create the store table
118
+ self.cols: List[Column] = [] # contains complete history of columns, incl dropped ones
119
+ self.cols_by_name: dict[str, Column] = {} # contains only user-facing (named) columns visible in this version
120
+ self.cols_by_id: dict[int, Column] = {} # contains only columns visible in this version
121
+ self.idx_md = tbl_md.index_md # needed for _create_tbl_md()
122
+ self.idxs_by_name: dict[str, TableVersion.IndexInfo] = {} # contains only actively maintained indices
123
+ self._init_schema(tbl_md, schema_version_md)
124
+
125
+ def __hash__(self) -> int:
126
+ return hash(self.id)
127
+
128
+ def create_snapshot_copy(self) -> TableVersion:
129
+ """Create a snapshot copy of this TableVersion"""
130
+ assert not self.is_snapshot
131
+ return TableVersion(
132
+ self.id, self._create_tbl_md(), self.version,
133
+ self._create_schema_version_md(preceding_schema_version=0), # preceding_schema_version: dummy value
134
+ is_snapshot=True, base=self.base)
135
+
136
+ @classmethod
137
+ def create(
138
+ cls, session: orm.Session, dir_id: UUID, name: str, cols: List[Column], num_retained_versions: int,
139
+ comment: str, base_path: Optional['pixeltable.catalog.TableVersionPath'] = None,
140
+ view_md: Optional[schema.ViewMd] = None
141
+ ) -> Tuple[UUID, Optional[TableVersion]]:
142
+ # assign ids
143
+ cols_by_name: Dict[str, Column] = {}
144
+ for pos, col in enumerate(cols):
145
+ col.id = pos
146
+ col.schema_version_add = 0
147
+ cols_by_name[col.name] = col
148
+ if col.value_expr is None and col.compute_func is not None:
149
+ cls._create_value_expr(col, base_path)
150
+ if col.is_computed:
151
+ col.check_value_expr()
152
+
153
+ ts = time.time()
154
+ # create schema.Table
155
+ # Column.dependent_cols for existing cols is wrong at this point, but init() will set it correctly
156
+ column_md = cls._create_column_md(cols)
157
+ table_md = schema.TableMd(
158
+ name=name, current_version=0, current_schema_version=0,
159
+ next_col_id=len(cols), next_idx_id=0, next_row_id=0, column_md=column_md, index_md={}, view_md=view_md)
160
+ tbl_record = schema.Table(dir_id=dir_id, md=dataclasses.asdict(table_md))
161
+ session.add(tbl_record)
162
+ session.flush() # sets tbl_record.id
163
+ assert tbl_record.id is not None
164
+
165
+ # create schema.TableVersion
166
+ table_version_md = schema.TableVersionMd(created_at=ts, version=0, schema_version=0)
167
+ tbl_version_record = schema.TableVersion(
168
+ tbl_id=tbl_record.id, version=0, md=dataclasses.asdict(table_version_md))
169
+ session.add(tbl_version_record)
170
+
171
+ # create schema.TableSchemaVersion
172
+ schema_col_md = {col.id: schema.SchemaColumn(pos=pos, name=col.name) for pos, col in enumerate(cols)}
173
+
174
+ schema_version_md = schema.TableSchemaVersionMd(
175
+ schema_version=0, preceding_schema_version=None, columns=schema_col_md,
176
+ num_retained_versions=num_retained_versions, comment=comment)
177
+ schema_version_record = schema.TableSchemaVersion(
178
+ tbl_id=tbl_record.id, schema_version=0, md=dataclasses.asdict(schema_version_md))
179
+ session.add(schema_version_record)
180
+
181
+ # if this is purely a snapshot (it doesn't require any additional storage for columns and it # doesn't have a
182
+ # predicate to apply at runtime), we don't create a physical table and simply use the base's table version path
183
+ if view_md is not None and view_md.is_snapshot and view_md.predicate is None and len(cols) == 0:
184
+ return tbl_record.id, None
185
+
186
+ assert (base_path is not None) == (view_md is not None)
187
+ base = base_path.tbl_version if base_path is not None and view_md.is_snapshot else None
188
+ base_path = base_path if base_path is not None and not view_md.is_snapshot else None
189
+ tbl_version = cls(tbl_record.id, table_md, 0, schema_version_md, base=base, base_path=base_path)
190
+ tbl_version.store_tbl.create(session.connection())
191
+ # TODO: create pgvector indices
192
+ return tbl_record.id, tbl_version
193
+
194
+ @classmethod
195
+ def delete_md(cls, tbl_id: UUID, conn: sql.Connection) -> None:
196
+ conn.execute(
197
+ sql.delete(schema.TableSchemaVersion.__table__).where(schema.TableSchemaVersion.tbl_id == tbl_id))
198
+ conn.execute(
199
+ sql.delete(schema.TableVersion.__table__).where(schema.TableVersion.tbl_id == tbl_id))
200
+ conn.execute(sql.delete(schema.Table.__table__).where(schema.Table.id == tbl_id))
201
+
202
+ def drop(self) -> None:
203
+ with Env.get().engine.begin() as conn:
204
+ # delete this table and all associated data
205
+ MediaStore.delete(self.id)
206
+ FileCache.get().clear(tbl_id=self.id)
207
+ self.delete_md(self.id, conn)
208
+ self.store_tbl.drop(conn)
209
+
210
+ # de-register table version from catalog
211
+ from .catalog import Catalog
212
+ cat = Catalog.get()
213
+ del cat.tbl_versions[(self.id, self.effective_version)]
214
+ # TODO: remove from tbl_dependents
215
+
216
+ def _init_schema(self, tbl_md: schema.TableMd, schema_version_md: schema.TableSchemaVersionMd) -> None:
217
+ # create columns first, so the indices can reference them
218
+ self._init_cols(tbl_md, schema_version_md)
219
+ self._init_idxs(tbl_md)
220
+ # create the sa schema only after creating the columns and indices
221
+ self._init_sa_schema()
222
+
223
+ def _init_cols(self, tbl_md: schema.TableMd, schema_version_md: schema.TableSchemaVersionMd) -> None:
224
+ """Initialize self.cols with the columns visible in our effective version"""
225
+ import pixeltable.exprs as exprs
226
+ self.cols = []
227
+ self.cols_by_name = {}
228
+ self.cols_by_id = {}
229
+ for col_md in tbl_md.column_md.values():
230
+ col_name = schema_version_md.columns[col_md.id].name if col_md.id in schema_version_md.columns else None
231
+ col = Column(
232
+ col_id=col_md.id, name=col_name, col_type=ts.ColumnType.from_dict(col_md.col_type),
233
+ is_pk=col_md.is_pk, stored=col_md.stored,
234
+ schema_version_add=col_md.schema_version_add, schema_version_drop=col_md.schema_version_drop)
235
+ col.tbl = self
236
+ self.cols.append(col)
237
+
238
+ # populate the lookup structures before Expr.from_dict()
239
+ if col_md.schema_version_add > self.schema_version:
240
+ # column was added after this version
241
+ continue
242
+ if col_md.schema_version_drop is not None and col_md.schema_version_drop <= self.schema_version:
243
+ # column was dropped
244
+ continue
245
+ if col.name is not None:
246
+ self.cols_by_name[col.name] = col
247
+ self.cols_by_id[col.id] = col
248
+
249
+ # make sure to traverse columns ordered by position = order in which cols were created;
250
+ # this guarantees that references always point backwards
251
+ if col_md.value_expr is not None:
252
+ col.value_expr = exprs.Expr.from_dict(col_md.value_expr)
253
+ self._record_value_expr(col)
254
+
255
+ def _init_idxs(self, tbl_md: schema.TableMd) -> None:
256
+ self.idx_md = tbl_md.index_md
257
+ self.idxs_by_name = {}
258
+ import pixeltable.index as index_module
259
+ for md in tbl_md.index_md.values():
260
+ if md.schema_version_add > self.schema_version \
261
+ or md.schema_version_drop is not None and md.schema_version_drop <= self.schema_version:
262
+ # column not visible in this schema version
263
+ continue
264
+
265
+ # instantiate index object
266
+ cls_name = md.class_fqn.rsplit('.', 1)[-1]
267
+ cls = getattr(index_module, cls_name)
268
+ idx_col = self.cols_by_id[md.indexed_col_id]
269
+ idx = cls.from_dict(idx_col, md.init_args)
270
+
271
+ # fix up the sa column type of the index value and undo columns
272
+ val_col = self.cols_by_id[md.index_val_col_id]
273
+ val_col.sa_col_type = idx.index_sa_type()
274
+ undo_col = self.cols_by_id[md.index_val_undo_col_id]
275
+ undo_col.sa_col_type = idx.index_sa_type()
276
+ idx_info = self.IndexInfo(id=md.id, name=md.name, idx=idx, col=idx_col, val_col=val_col, undo_col=undo_col)
277
+ self.idxs_by_name[md.name] = idx_info
278
+
279
+ def _init_sa_schema(self) -> None:
280
+ # create the sqlalchemy schema; do this after instantiating columns, in order to determine whether they
281
+ # need to record errors
282
+ from pixeltable.store import StoreBase, StoreTable, StoreView, StoreComponentView
283
+ if self.is_component_view():
284
+ self.store_tbl: StoreBase = StoreComponentView(self)
285
+ elif self.is_view():
286
+ self.store_tbl: StoreBase = StoreView(self)
287
+ else:
288
+ self.store_tbl: StoreBase = StoreTable(self)
289
+
290
+ def _update_md(self, ts: float, preceding_schema_version: Optional[int], conn: sql.engine.Connection) -> None:
291
+ """Update all recorded metadata in response to a data or schema change.
292
+ Args:
293
+ ts: timestamp of the change
294
+ preceding_schema_version: last schema version if schema change, else None
295
+ """
296
+ conn.execute(
297
+ sql.update(schema.Table.__table__)
298
+ .values({schema.Table.md: dataclasses.asdict(self._create_tbl_md())})
299
+ .where(schema.Table.id == self.id))
300
+
301
+ version_md = self._create_version_md(ts)
302
+ conn.execute(
303
+ sql.insert(schema.TableVersion.__table__)
304
+ .values(tbl_id=self.id, version=self.version, md=dataclasses.asdict(version_md)))
305
+ if preceding_schema_version is not None:
306
+ schema_version_md = self._create_schema_version_md(preceding_schema_version)
307
+ conn.execute(
308
+ sql.insert(schema.TableSchemaVersion.__table__)
309
+ .values(
310
+ tbl_id=self.id, schema_version=self.schema_version,
311
+ md=dataclasses.asdict(schema_version_md)))
312
+
313
+ def _store_idx_name(self, idx_id: int) -> str:
314
+ """Return name of index in the store, which needs to be globally unique"""
315
+ return f'idx_{self.id.hex}_{idx_id}'
316
+
317
+ def add_index(self, col: Column, idx_name: Optional[str], idx: index.IndexBase) -> UpdateStatus:
318
+ assert not self.is_snapshot
319
+ idx_id = self.next_idx_id
320
+ self.next_idx_id += 1
321
+ if idx_name is None:
322
+ idx_name = f'idx{idx_id}'
323
+ else:
324
+ assert is_valid_identifier(idx_name)
325
+ assert idx_name not in [i.name for i in self.idx_md.values()]
326
+
327
+ # we're creating a new schema version
328
+ self.version += 1
329
+ preceding_schema_version = self.schema_version
330
+ self.schema_version = self.version
331
+ with Env.get().engine.begin() as conn:
332
+ # add the index value and undo columns (which need to be nullable);
333
+ # we don't create a new schema version, because indices aren't part of the logical schema
334
+ val_col = Column(
335
+ col_id=self.next_col_id, name=None, computed_with=idx.index_value_expr(),
336
+ sa_col_type=idx.index_sa_type(), stored=True,
337
+ schema_version_add=self.schema_version, schema_version_drop=None)
338
+ val_col.tbl = self
339
+ val_col.col_type.nullable = True
340
+ self.next_col_id += 1
341
+
342
+ undo_col = Column(
343
+ col_id=self.next_col_id, name=None, col_type=val_col.col_type,
344
+ sa_col_type=val_col.sa_col_type, stored=True,
345
+ schema_version_add=self.schema_version, schema_version_drop=None)
346
+ undo_col.tbl = self
347
+ undo_col.col_type.nullable = True
348
+ self.next_col_id += 1
349
+
350
+ # create and register the index metadata
351
+ idx_cls = type(idx)
352
+ idx_md = schema.IndexMd(
353
+ id=idx_id, name=idx_name,
354
+ indexed_col_id=col.id, index_val_col_id=val_col.id, index_val_undo_col_id=undo_col.id,
355
+ schema_version_add=self.schema_version, schema_version_drop=None,
356
+ class_fqn=idx_cls.__module__ + '.' + idx_cls.__name__, init_args=idx.as_dict())
357
+ idx_info = self.IndexInfo(id=idx_id, name=idx_name, idx=idx, col=col, val_col=val_col, undo_col=undo_col)
358
+ self.idx_md[idx_id] = idx_md
359
+ self.idxs_by_name[idx_name] = idx_info
360
+
361
+ # add the columns and update the metadata
362
+ status = self._add_columns([val_col, undo_col], conn, preceding_schema_version=preceding_schema_version)
363
+ # now create the index structure
364
+ idx.create_index(self._store_idx_name(idx_id), val_col, conn)
365
+
366
+ _logger.info(f'Added index {idx_name} on column {col.name} to table {self.name}')
367
+ return status
368
+
369
+ def drop_index(self, idx_id: int) -> None:
370
+ assert not self.is_snapshot
371
+ assert idx_id in self.idx_md
372
+
373
+ # we're creating a new schema version
374
+ self.version += 1
375
+ preceding_schema_version = self.schema_version
376
+ self.schema_version = self.version
377
+ idx_md = self.idx_md[idx_id]
378
+ idx_md.schema_version_drop = self.schema_version
379
+ assert idx_md.name in self.idxs_by_name
380
+ idx_info = self.idxs_by_name[idx_md.name]
381
+ del self.idxs_by_name[idx_md.name]
382
+
383
+ with Env.get().engine.begin() as conn:
384
+ self._drop_columns([idx_info.val_col, idx_info.undo_col], conn, preceding_schema_version)
385
+ _logger.info(f'Dropped index {idx_md.name} on table {self.name}')
386
+
387
+ def add_column(self, col: Column, print_stats: bool = False) -> UpdateStatus:
388
+ """Adds a column to the table.
389
+ """
390
+ assert not self.is_snapshot
391
+ assert is_valid_identifier(col.name)
392
+ assert col.stored is not None
393
+ assert col.name not in self.cols_by_name
394
+ col.tbl = self
395
+ col.id = self.next_col_id
396
+ self.next_col_id += 1
397
+
398
+ if col.compute_func is not None:
399
+ # create value_expr from compute_func
400
+ self._create_value_expr(col, self.path)
401
+ if col.value_expr is not None:
402
+ col.check_value_expr()
403
+ self._record_value_expr(col)
404
+
405
+ # we're creating a new schema version
406
+ self.version += 1
407
+ preceding_schema_version = self.schema_version
408
+ self.schema_version = self.version
409
+ with Env.get().engine.begin() as conn:
410
+ status = self._add_columns([col], conn, preceding_schema_version, print_stats=print_stats)
411
+ _logger.info(f'Added column {col.name} to table {self.name}, new version: {self.version}')
412
+
413
+ msg = (
414
+ f'Added {status.num_rows} column value{"" if status.num_rows == 1 else "s"} '
415
+ f'with {status.num_excs} error{"" if status.num_excs == 1 else "s"}.'
416
+ )
417
+ print(msg)
418
+ _logger.info(f'Column {col.name}: {msg}')
419
+ return status
420
+
421
+ def _add_columns(
422
+ self, cols: List[Column], conn: sql.engine.Connection, preceding_schema_version: Optional[int] = None,
423
+ print_stats: bool = False
424
+ ) -> UpdateStatus:
425
+ """Add and populate columns within the current transaction"""
426
+ ts = time.time()
427
+
428
+ row_count = self.store_tbl.count(conn=conn)
429
+ for col in cols:
430
+ if not col.col_type.nullable and not col.is_computed:
431
+ if row_count > 0:
432
+ raise excs.Error(
433
+ f'Cannot add non-nullable column "{col.name}" to table {self.name} with existing rows')
434
+
435
+ num_excs = 0
436
+ cols_with_excs: List[Column] = []
437
+ for col in cols:
438
+ col.schema_version_add = self.schema_version
439
+ # add the column to the lookup structures now, rather than after the store changes executed successfully,
440
+ # because it might be referenced by the next column's value_expr
441
+ self.cols.append(col)
442
+ if col.name is not None:
443
+ self.cols_by_name[col.name] = col
444
+ self.cols_by_id[col.id] = col
445
+
446
+ if col.is_stored:
447
+ self.store_tbl.add_column(col, conn)
448
+
449
+ if not col.is_computed or not col.is_stored or row_count == 0:
450
+ continue
451
+
452
+ # populate the column
453
+ from pixeltable.plan import Planner
454
+ plan, value_expr_slot_idx = Planner.create_add_column_plan(self.path, col)
455
+ plan.ctx.num_rows = row_count
456
+
457
+ try:
458
+ plan.ctx.conn = conn
459
+ plan.open()
460
+ num_excs = self.store_tbl.load_column(col, plan, value_expr_slot_idx, conn)
461
+ if num_excs > 0:
462
+ cols_with_excs.append(col)
463
+ except sql.exc.DBAPIError as e:
464
+ self.cols.pop()
465
+ for col in cols:
466
+ # remove columns that we already added
467
+ if col.id not in self.cols_by_id:
468
+ continue
469
+ if col.name is not None:
470
+ del self.cols_by_name[col.name]
471
+ del self.cols_by_id[col.id]
472
+ # we need to re-initialize the sqlalchemy schema
473
+ self.store_tbl.create_sa_tbl()
474
+ raise excs.Error(f'Error during SQL execution:\n{e}')
475
+ finally:
476
+ plan.close()
477
+
478
+ self._update_md(ts, preceding_schema_version, conn)
479
+ if print_stats:
480
+ plan.ctx.profile.print(num_rows=row_count)
481
+ # TODO(mkornacker): what to do about system columns with exceptions?
482
+ return UpdateStatus(
483
+ num_rows=row_count, num_computed_values=row_count, num_excs=num_excs,
484
+ cols_with_excs=[f'{col.tbl.name}.{col.name}'for col in cols_with_excs if col.name is not None])
485
+
486
+ def drop_column(self, name: str) -> None:
487
+ """Drop a column from the table.
488
+ """
489
+ assert not self.is_snapshot
490
+ if name not in self.cols_by_name:
491
+ raise excs.Error(f'Unknown column: {name}')
492
+ col = self.cols_by_name[name]
493
+ dependent_user_cols = [c for c in col.dependent_cols if c.name is not None]
494
+ if len(dependent_user_cols) > 0:
495
+ raise excs.Error(
496
+ f'Cannot drop column {name} because the following columns depend on it:\n',
497
+ f'{", ".join([c.name for c in dependent_user_cols])}')
498
+
499
+ # we're creating a new schema version
500
+ self.version += 1
501
+ preceding_schema_version = self.schema_version
502
+ self.schema_version = self.version
503
+
504
+ with Env.get().engine.begin() as conn:
505
+ # drop this column and all dependent index columns and indices
506
+ dropped_cols = [col]
507
+ dropped_idx_names: List[str] = []
508
+ for idx_info in self.idxs_by_name.values():
509
+ if idx_info.col != col:
510
+ continue
511
+ dropped_cols.extend([idx_info.val_col, idx_info.undo_col])
512
+ idx_md = self.idx_md[idx_info.id]
513
+ idx_md.schema_version_drop = self.schema_version
514
+ assert idx_md.name in self.idxs_by_name
515
+ dropped_idx_names.append(idx_md.name)
516
+ # update idxs_by_name
517
+ for idx_name in dropped_idx_names:
518
+ del self.idxs_by_name[idx_name]
519
+ self._drop_columns(dropped_cols, conn, preceding_schema_version)
520
+ _logger.info(f'Dropped column {name} from table {self.name}, new version: {self.version}')
521
+
522
+ def _drop_columns(self, cols: list[Column], conn: sql.engine.Connection, preceding_schema_version: int) -> None:
523
+ """Mark columns as dropped"""
524
+ assert not self.is_snapshot
525
+
526
+ ts = time.time()
527
+ for col in cols:
528
+ if col.value_expr is not None:
529
+ # update Column.dependent_cols
530
+ for c in self.cols:
531
+ if c == col:
532
+ break
533
+ c.dependent_cols.discard(col)
534
+
535
+ col.schema_version_drop = self.schema_version
536
+ if col.name is not None:
537
+ assert col.name in self.cols_by_name
538
+ del self.cols_by_name[col.name]
539
+ assert col.id in self.cols_by_id
540
+ del self.cols_by_id[col.id]
541
+
542
+ self._update_md(ts, preceding_schema_version, conn)
543
+ self.store_tbl.create_sa_tbl()
544
+
545
+ def rename_column(self, old_name: str, new_name: str) -> None:
546
+ """Rename a column.
547
+ """
548
+ assert not self.is_snapshot
549
+ if old_name not in self.cols_by_name:
550
+ raise excs.Error(f'Unknown column: {old_name}')
551
+ if not is_valid_identifier(new_name):
552
+ raise excs.Error(f"Invalid column name: '{new_name}'")
553
+ if new_name in self.cols_by_name:
554
+ raise excs.Error(f'Column {new_name} already exists')
555
+ col = self.cols_by_name[old_name]
556
+ del self.cols_by_name[old_name]
557
+ col.name = new_name
558
+ self.cols_by_name[new_name] = col
559
+
560
+ # we're creating a new schema version
561
+ ts = time.time()
562
+ self.version += 1
563
+ preceding_schema_version = self.schema_version
564
+ self.schema_version = self.version
565
+
566
+ with Env.get().engine.begin() as conn:
567
+ self._update_md(ts, preceding_schema_version, conn)
568
+ _logger.info(f'Renamed column {old_name} to {new_name} in table {self.name}, new version: {self.version}')
569
+
570
+ def set_comment(self, new_comment: Optional[str]):
571
+ _logger.info(f'[{self.name}] Updating comment: {new_comment}')
572
+ self.comment = new_comment
573
+ self._create_schema_version()
574
+
575
+ def set_num_retained_versions(self, new_num_retained_versions: int):
576
+ _logger.info(f'[{self.name}] Updating num_retained_versions: {new_num_retained_versions} (was {self.num_retained_versions})')
577
+ self.num_retained_versions = new_num_retained_versions
578
+ self._create_schema_version()
579
+
580
+ def _create_schema_version(self):
581
+ # we're creating a new schema version
582
+ ts = time.time()
583
+ self.version += 1
584
+ preceding_schema_version = self.schema_version
585
+ self.schema_version = self.version
586
+ with Env.get().engine.begin() as conn:
587
+ self._update_md(ts, preceding_schema_version, conn)
588
+ _logger.info(f'[{self.name}] Updating table schema to version: {self.version}')
589
+
590
+ def insert(
591
+ self, rows: List[Dict[str, Any]], print_stats: bool = False, fail_on_exception : bool = True
592
+ ) -> UpdateStatus:
593
+ """Insert rows into this table.
594
+ """
595
+ assert self.is_insertable()
596
+ from pixeltable.plan import Planner
597
+ plan = Planner.create_insert_plan(self, rows, ignore_errors=not fail_on_exception)
598
+ ts = time.time()
599
+ with Env.get().engine.begin() as conn:
600
+ return self._insert(plan, conn, ts, print_stats)
601
+
602
+ def _insert(
603
+ self, exec_plan: exec.ExecNode, conn: sql.engine.Connection, ts: float, print_stats: bool = False,
604
+ ) -> UpdateStatus:
605
+ """Insert rows produced by exec_plan and propagate to views"""
606
+ # we're creating a new version
607
+ self.version += 1
608
+ result = UpdateStatus()
609
+ num_rows, num_excs, cols_with_excs = self.store_tbl.insert_rows(exec_plan, conn, v_min=self.version)
610
+ self.next_rowid = num_rows
611
+ result.num_rows = num_rows
612
+ result.num_excs = num_excs
613
+ result.num_computed_values += exec_plan.ctx.num_computed_exprs * num_rows
614
+ result.cols_with_excs = [f'{self.name}.{self.cols_by_id[cid].name}' for cid in cols_with_excs]
615
+ self._update_md(ts, None, conn)
616
+
617
+ # update views
618
+ for view in self.mutable_views:
619
+ from pixeltable.plan import Planner
620
+ plan, _ = Planner.create_view_load_plan(view.path, propagates_insert=True)
621
+ status = view._insert(plan, conn, ts, print_stats)
622
+ result.num_rows += status.num_rows
623
+ result.num_excs += status.num_excs
624
+ result.num_computed_values += status.num_computed_values
625
+ result.cols_with_excs += status.cols_with_excs
626
+
627
+ result.cols_with_excs = list(dict.fromkeys(result.cols_with_excs).keys()) # remove duplicates
628
+ if print_stats:
629
+ plan.ctx.profile.print(num_rows=num_rows)
630
+ _logger.info(f'TableVersion {self.name}: new version {self.version}')
631
+ return result
632
+
633
+ def update(
634
+ self, update_targets: dict[Column, 'pixeltable.exprs.Expr'],
635
+ where_clause: Optional['pixeltable.exprs.Predicate'] = None, cascade: bool = True
636
+ ) -> UpdateStatus:
637
+ with Env.get().engine.begin() as conn:
638
+ return self._update(conn, update_targets, where_clause, cascade)
639
+
640
+ def batch_update(
641
+ self, batch: list[dict[Column, 'pixeltable.exprs.Expr']], rowids: list[Tuple[int, ...]],
642
+ cascade: bool = True
643
+ ) -> UpdateStatus:
644
+ """Update rows in batch.
645
+ Args:
646
+ batch: one dict per row, each mapping Columns to LiteralExprs representing the new values
647
+ rowids: if not empty, one tuple per row, each containing the rowid values for the corresponding row in batch
648
+ """
649
+ # if we do lookups of rowids, we must have one for each row in the batch
650
+ assert len(rowids) == 0 or len(rowids) == len(batch)
651
+ import pixeltable.exprs as exprs
652
+ result_status = UpdateStatus()
653
+ cols_with_excs: set[str] = set()
654
+ updated_cols: set[str] = set()
655
+ pk_cols = self.primary_key_columns()
656
+ use_rowids = len(rowids) > 0
657
+
658
+ with Env.get().engine.begin() as conn:
659
+ for i, row in enumerate(batch):
660
+ where_clause: Optional[exprs.Expr] = None
661
+ if use_rowids:
662
+ # construct Where clause to match rowid
663
+ num_rowid_cols = len(self.store_tbl.rowid_columns())
664
+ for col_idx in range(num_rowid_cols):
665
+ assert len(rowids[i]) == num_rowid_cols
666
+ clause = exprs.RowidRef(self, col_idx) == rowids[i][col_idx]
667
+ if where_clause is None:
668
+ where_clause = clause
669
+ else:
670
+ where_clause = where_clause & clause
671
+ else:
672
+ # construct Where clause for primary key columns
673
+ for col in pk_cols:
674
+ assert col in row
675
+ clause = exprs.ColumnRef(col) == row[col]
676
+ if where_clause is None:
677
+ where_clause = clause
678
+ else:
679
+ where_clause = where_clause & clause
680
+
681
+ update_targets = {col: row[col] for col in row if col not in pk_cols}
682
+ status = self._update(conn, update_targets, where_clause, cascade)
683
+ result_status.num_rows += status.num_rows
684
+ result_status.num_excs += status.num_excs
685
+ result_status.num_computed_values += status.num_computed_values
686
+ cols_with_excs.update(status.cols_with_excs)
687
+ updated_cols.update(status.updated_cols)
688
+
689
+ result_status.cols_with_excs = list(cols_with_excs)
690
+ result_status.updated_cols = list(updated_cols)
691
+ return result_status
692
+
693
+ def _update(
694
+ self, conn: sql.engine.Connection, update_targets: dict[Column, 'pixeltable.exprs.Expr'],
695
+ where_clause: Optional['pixeltable.exprs.Predicate'] = None, cascade: bool = True
696
+ ) -> UpdateStatus:
697
+ """Update rows in this table.
698
+ Args:
699
+ update_targets: a list of (column, value) pairs specifying the columns to update and their new values.
700
+ where_clause: a Predicate to filter rows to update.
701
+ cascade: if True, also update all computed columns that transitively depend on the updated columns,
702
+ including within views.
703
+ """
704
+ assert not self.is_snapshot
705
+ from pixeltable.plan import Planner
706
+ plan, updated_cols, recomputed_cols = \
707
+ Planner.create_update_plan(self.path, update_targets, [], where_clause, cascade)
708
+ ts = time.time()
709
+ result = self._propagate_update(
710
+ plan, where_clause.sql_expr() if where_clause is not None else None, recomputed_cols,
711
+ base_versions=[], conn=conn, ts=ts, cascade=cascade)
712
+ result.updated_cols = updated_cols
713
+ return result
714
+
715
+ def _propagate_update(
716
+ self, plan: Optional[exec.ExecNode], where_clause: Optional[sql.ClauseElement],
717
+ recomputed_view_cols: List[Column], base_versions: List[Optional[int]], conn: sql.engine.Connection,
718
+ ts: float, cascade: bool
719
+ ) -> UpdateStatus:
720
+ result = UpdateStatus()
721
+ if plan is not None:
722
+ # we're creating a new version
723
+ self.version += 1
724
+ result.num_rows, result.num_excs, cols_with_excs = \
725
+ self.store_tbl.insert_rows(plan, conn, v_min=self.version)
726
+ result.cols_with_excs = [f'{self.name}.{self.cols_by_id[cid].name}' for cid in cols_with_excs]
727
+ self.store_tbl.delete_rows(
728
+ self.version, base_versions=base_versions, match_on_vmin=True, where_clause=where_clause, conn=conn)
729
+ self._update_md(ts, None, conn)
730
+
731
+ if cascade:
732
+ base_versions = [None if plan is None else self.version] + base_versions # don't update in place
733
+ # propagate to views
734
+ for view in self.mutable_views:
735
+ recomputed_cols = [col for col in recomputed_view_cols if col.tbl is view]
736
+ plan: Optional[exec.ExecNode] = None
737
+ if len(recomputed_cols) > 0:
738
+ from pixeltable.plan import Planner
739
+ plan = Planner.create_view_update_plan(view.path, recompute_targets=recomputed_cols)
740
+ status = view._propagate_update(
741
+ plan, None, recomputed_view_cols, base_versions=base_versions, conn=conn, ts=ts, cascade=True)
742
+ result.num_rows += status.num_rows
743
+ result.num_excs += status.num_excs
744
+ result.cols_with_excs += status.cols_with_excs
745
+
746
+ result.cols_with_excs = list(dict.fromkeys(result.cols_with_excs).keys()) # remove duplicates
747
+ return result
748
+
749
+ def delete(self, where: Optional['pixeltable.exprs.Predicate'] = None) -> UpdateStatus:
750
+ """Delete rows in this table.
751
+ Args:
752
+ where: a Predicate to filter rows to delete.
753
+ """
754
+ assert self.is_insertable()
755
+ from pixeltable.plan import Planner
756
+ analysis_info = Planner.analyze(self, where)
757
+ ts = time.time()
758
+ with Env.get().engine.begin() as conn:
759
+ num_rows = self._delete(analysis_info.sql_where_clause, base_versions=[], conn=conn, ts=ts)
760
+
761
+ status = UpdateStatus(num_rows=num_rows)
762
+ return status
763
+
764
+ def _delete(
765
+ self, where: Optional['pixeltable.exprs.Predicate'], base_versions: List[Optional[int]],
766
+ conn: sql.engine.Connection, ts: float) -> int:
767
+ """Delete rows in this table and propagate to views.
768
+ Args:
769
+ where: a Predicate to filter rows to delete.
770
+ Returns:
771
+ number of deleted rows
772
+ """
773
+ sql_where_clause = where.sql_expr() if where is not None else None
774
+ num_rows = self.store_tbl.delete_rows(
775
+ self.version + 1, base_versions=base_versions, match_on_vmin=False, where_clause=sql_where_clause,
776
+ conn=conn)
777
+ if num_rows > 0:
778
+ # we're creating a new version
779
+ self.version += 1
780
+ self._update_md(ts, None, conn)
781
+ else:
782
+ pass
783
+ for view in self.mutable_views:
784
+ num_rows += view._delete(where=None, base_versions=[self.version] + base_versions, conn=conn, ts=ts)
785
+ return num_rows
786
+
787
+ def revert(self) -> None:
788
+ """Reverts the table to the previous version.
789
+ """
790
+ assert not self.is_snapshot
791
+ if self.version == 0:
792
+ raise excs.Error('Cannot revert version 0')
793
+ with orm.Session(Env.get().engine, future=True) as session:
794
+ self._revert(session)
795
+ session.commit()
796
+
797
+ def _delete_column(self, col: Column, conn: sql.engine.Connection) -> None:
798
+ """Physically remove the column from the schema and the store table"""
799
+ if col.is_stored:
800
+ self.store_tbl.drop_column(col, conn)
801
+ self.cols.remove(col)
802
+ if col.name is not None:
803
+ del self.cols_by_name[col.name]
804
+ del self.cols_by_id[col.id]
805
+
806
+ def _revert(self, session: orm.Session) -> None:
807
+ """Reverts this table version and propagates to views"""
808
+ conn = session.connection()
809
+ # make sure we don't have a snapshot referencing this version
810
+ # (unclear how to express this with sqlalchemy)
811
+ query = (
812
+ f"select ts.dir_id, ts.md->'name' "
813
+ f"from {schema.Table.__tablename__} ts "
814
+ f"cross join lateral jsonb_path_query(md, '$.view_md.base_versions[*]') as tbl_version "
815
+ f"where tbl_version->>0 = '{self.id.hex}' and (tbl_version->>1)::int = {self.version}"
816
+ )
817
+ result = list(conn.execute(sql.text(query)))
818
+ if len(result) > 0:
819
+ names = [row[1] for row in result]
820
+ raise excs.Error((
821
+ f'Current version is needed for {len(result)} snapshot{"s" if len(result) > 1 else ""} '
822
+ f'({", ".join(names)})'
823
+ ))
824
+
825
+ conn = session.connection()
826
+ # delete newly-added data
827
+ MediaStore.delete(self.id, version=self.version)
828
+ conn.execute(sql.delete(self.store_tbl.sa_tbl).where(self.store_tbl.sa_tbl.c.v_min == self.version))
829
+
830
+ # revert new deletions
831
+ set_clause = {self.store_tbl.sa_tbl.c.v_max: schema.Table.MAX_VERSION}
832
+ for index_info in self.idxs_by_name.values():
833
+ # copy the index value back from the undo column and reset the undo column to NULL
834
+ set_clause[index_info.val_col.sa_col] = index_info.undo_col.sa_col
835
+ set_clause[index_info.undo_col.sa_col] = None
836
+ stmt = sql.update(self.store_tbl.sa_tbl) \
837
+ .values(set_clause) \
838
+ .where(self.store_tbl.sa_tbl.c.v_max == self.version)
839
+ conn.execute(stmt)
840
+
841
+ # revert schema changes
842
+ if self.version == self.schema_version:
843
+ # delete newly-added columns
844
+ added_cols = [col for col in self.cols if col.schema_version_add == self.schema_version]
845
+ if len(added_cols) > 0:
846
+ next_col_id = min(col.id for col in added_cols)
847
+ for col in added_cols:
848
+ self._delete_column(col, conn)
849
+ self.next_col_id = next_col_id
850
+
851
+ # remove newly-added indices from the lookup structures
852
+ # (the value and undo columns got removed in the preceding step)
853
+ added_idx_md = [md for md in self.idx_md.values() if md.schema_version_add == self.schema_version]
854
+ if len(added_idx_md) > 0:
855
+ next_idx_id = min(md.id for md in added_idx_md)
856
+ for md in added_idx_md:
857
+ del self.idx_md[md.id]
858
+ del self.idxs_by_name[md.name]
859
+ self.next_idx_id = next_idx_id
860
+
861
+ # make newly-dropped columns visible again
862
+ dropped_cols = [col for col in self.cols if col.schema_version_drop == self.schema_version]
863
+ for col in dropped_cols:
864
+ col.schema_version_drop = None
865
+
866
+ # make newly-dropped indices visible again
867
+ dropped_idx_md = [md for md in self.idx_md.values() if md.schema_version_drop == self.schema_version]
868
+ for md in dropped_idx_md:
869
+ md.schema_version_drop = None
870
+
871
+ # we need to determine the preceding schema version and reload the schema
872
+ schema_version_md_dict = session.query(schema.TableSchemaVersion.md) \
873
+ .where(schema.TableSchemaVersion.tbl_id == self.id) \
874
+ .where(schema.TableSchemaVersion.schema_version == self.schema_version) \
875
+ .scalar()
876
+ preceding_schema_version = schema_version_md_dict['preceding_schema_version']
877
+ preceding_schema_version_md_dict = session.query(schema.TableSchemaVersion.md) \
878
+ .where(schema.TableSchemaVersion.tbl_id == self.id) \
879
+ .where(schema.TableSchemaVersion.schema_version == preceding_schema_version) \
880
+ .scalar()
881
+ preceding_schema_version_md = schema.md_from_dict(
882
+ schema.TableSchemaVersionMd, preceding_schema_version_md_dict)
883
+ tbl_md = self._create_tbl_md()
884
+ self._init_schema(tbl_md, preceding_schema_version_md)
885
+
886
+ conn.execute(
887
+ sql.delete(schema.TableSchemaVersion.__table__)
888
+ .where(schema.TableSchemaVersion.tbl_id == self.id)
889
+ .where(schema.TableSchemaVersion.schema_version == self.schema_version))
890
+ self.schema_version = preceding_schema_version
891
+ self.comment = preceding_schema_version_md.comment
892
+ self.num_retained_versions = preceding_schema_version_md.num_retained_versions
893
+
894
+ conn.execute(
895
+ sql.delete(schema.TableVersion.__table__)
896
+ .where(schema.TableVersion.tbl_id == self.id)
897
+ .where(schema.TableVersion.version == self.version)
898
+ )
899
+ self.version -= 1
900
+ conn.execute(
901
+ sql.update(schema.Table.__table__)
902
+ .values({schema.Table.md: dataclasses.asdict(self._create_tbl_md())})
903
+ .where(schema.Table.id == self.id))
904
+
905
+ # propagate to views
906
+ for view in self.mutable_views:
907
+ view._revert(session)
908
+ _logger.info(f'TableVersion {self.name}: reverted to version {self.version}')
909
+
910
+ def is_view(self) -> bool:
911
+ return self.base is not None
912
+
913
+ def is_component_view(self) -> bool:
914
+ return self.iterator_cls is not None
915
+
916
+ def is_insertable(self) -> bool:
917
+ """Returns True if this corresponds to an InsertableTable"""
918
+ return not self.is_snapshot and not self.is_view()
919
+
920
+ def is_iterator_column(self, col: Column) -> bool:
921
+ """Returns True if col is produced by an iterator"""
922
+ # the iterator columns directly follow the pos column
923
+ return self.is_component_view() and col.id > 0 and col.id < self.num_iterator_cols + 1
924
+
925
+ def is_system_column(self, col: Column) -> bool:
926
+ """Return True if column was created by Pixeltable"""
927
+ if col.name == POS_COLUMN_NAME and self.is_component_view():
928
+ return True
929
+ return False
930
+
931
+ def user_columns(self) -> List[Column]:
932
+ """Return all non-system columns"""
933
+ return [c for c in self.cols if not self.is_system_column(c)]
934
+
935
+ def primary_key_columns(self) -> List[Column]:
936
+ """Return all non-system columns"""
937
+ return [c for c in self.cols if c.is_pk]
938
+
939
+ def get_required_col_names(self) -> List[str]:
940
+ """Return the names of all columns for which values must be specified in insert()"""
941
+ assert not self.is_view()
942
+ names = [c.name for c in self.cols if not c.is_computed and not c.col_type.nullable]
943
+ return names
944
+
945
+ def get_computed_col_names(self) -> List[str]:
946
+ """Return the names of all computed columns"""
947
+ names = [c.name for c in self.cols if c.is_computed]
948
+ return names
949
+
950
+ @classmethod
951
+ def _create_value_expr(cls, col: Column, path: 'TableVersionPath') -> None:
952
+ """
953
+ Create col.value_expr, given col.compute_func.
954
+ Interprets compute_func's parameters to be references to columns and construct ColumnRefs as args.
955
+ Does not update Column.dependent_cols.
956
+ """
957
+ assert col.value_expr is None
958
+ assert col.compute_func is not None
959
+ from pixeltable import exprs
960
+ params = inspect.signature(col.compute_func).parameters
961
+ args: List[exprs.ColumnRef] = []
962
+ for param_name in params:
963
+ param = path.get_column(param_name)
964
+ if param is None:
965
+ raise excs.Error(
966
+ f'Column {col.name}: Callable parameter refers to an unknown column: {param_name}')
967
+ args.append(exprs.ColumnRef(param))
968
+ fn = func.make_function(
969
+ col.compute_func, return_type=col.col_type, param_types=[arg.col_type for arg in args])
970
+ col.value_expr = fn(*args)
971
+
972
+ def _record_value_expr(self, col: Column) -> None:
973
+ """Update Column.dependent_cols for all cols referenced in col.value_expr.
974
+ """
975
+ assert col.value_expr is not None
976
+ from pixeltable.exprs import ColumnRef
977
+ refd_cols = [e.col for e in col.value_expr.subexprs(expr_class=ColumnRef)]
978
+ for refd_col in refd_cols:
979
+ refd_col.dependent_cols.add(col)
980
+
981
+ def get_dependent_columns(self, cols: List[Column]) -> Set[Column]:
982
+ """
983
+ Return the set of columns that transitively depend on any of the given ones.
984
+ """
985
+ if len(cols) == 0:
986
+ return []
987
+ result: Set[Column] = set()
988
+ for col in cols:
989
+ result.update(col.dependent_cols)
990
+ result.update(self.get_dependent_columns(result))
991
+ return result
992
+
993
+ def num_rowid_columns(self) -> int:
994
+ """Return the number of columns of the rowids, without accessing store_tbl"""
995
+ if self.is_component_view():
996
+ return 1 + self.base.num_rowid_columns()
997
+ return 1
998
+
999
+ @classmethod
1000
+ def _create_column_md(cls, cols: List[Column]) -> dict[int, schema.ColumnMd]:
1001
+ column_md: Dict[int, schema.ColumnMd] = {}
1002
+ for col in cols:
1003
+ value_expr_dict = col.value_expr.as_dict() if col.value_expr is not None else None
1004
+ column_md[col.id] = schema.ColumnMd(
1005
+ id=col.id, col_type=col.col_type.as_dict(), is_pk=col.is_pk,
1006
+ schema_version_add=col.schema_version_add, schema_version_drop=col.schema_version_drop,
1007
+ value_expr=value_expr_dict, stored=col.stored)
1008
+ return column_md
1009
+
1010
+ def _create_tbl_md(self) -> schema.TableMd:
1011
+ return schema.TableMd(
1012
+ name=self.name, current_version=self.version, current_schema_version=self.schema_version,
1013
+ next_col_id=self.next_col_id, next_idx_id=self.next_idx_id, next_row_id=self.next_rowid,
1014
+ column_md=self._create_column_md(self.cols), index_md=self.idx_md, view_md=self.view_md)
1015
+
1016
+ def _create_version_md(self, ts: float) -> schema.TableVersionMd:
1017
+ return schema.TableVersionMd(created_at=ts, version=self.version, schema_version=self.schema_version)
1018
+
1019
+ def _create_schema_version_md(self, preceding_schema_version: int) -> schema.TableSchemaVersionMd:
1020
+ column_md: Dict[int, schema.SchemaColumn] = {}
1021
+ for pos, col in enumerate(self.cols_by_name.values()):
1022
+ column_md[col.id] = schema.SchemaColumn(pos=pos, name=col.name)
1023
+ # preceding_schema_version to be set by the caller
1024
+ return schema.TableSchemaVersionMd(
1025
+ schema_version=self.schema_version, preceding_schema_version=preceding_schema_version,
1026
+ columns=column_md, num_retained_versions=self.num_retained_versions, comment=self.comment)