pixeltable 0.4.0rc3__py3-none-any.whl → 0.4.2__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of pixeltable might be problematic. Click here for more details.

Files changed (58) hide show
  1. pixeltable/__init__.py +1 -1
  2. pixeltable/__version__.py +2 -2
  3. pixeltable/catalog/__init__.py +11 -2
  4. pixeltable/catalog/catalog.py +407 -119
  5. pixeltable/catalog/column.py +38 -26
  6. pixeltable/catalog/globals.py +130 -15
  7. pixeltable/catalog/insertable_table.py +10 -9
  8. pixeltable/catalog/schema_object.py +6 -0
  9. pixeltable/catalog/table.py +245 -119
  10. pixeltable/catalog/table_version.py +142 -116
  11. pixeltable/catalog/table_version_handle.py +30 -2
  12. pixeltable/catalog/table_version_path.py +28 -4
  13. pixeltable/catalog/view.py +14 -20
  14. pixeltable/config.py +4 -0
  15. pixeltable/dataframe.py +10 -9
  16. pixeltable/env.py +5 -11
  17. pixeltable/exceptions.py +6 -0
  18. pixeltable/exec/exec_node.py +2 -0
  19. pixeltable/exec/expr_eval/expr_eval_node.py +4 -4
  20. pixeltable/exec/sql_node.py +47 -30
  21. pixeltable/exprs/column_property_ref.py +2 -10
  22. pixeltable/exprs/column_ref.py +24 -21
  23. pixeltable/exprs/data_row.py +9 -0
  24. pixeltable/exprs/expr.py +4 -4
  25. pixeltable/exprs/row_builder.py +44 -13
  26. pixeltable/func/__init__.py +1 -0
  27. pixeltable/func/mcp.py +74 -0
  28. pixeltable/func/query_template_function.py +4 -2
  29. pixeltable/func/tools.py +12 -2
  30. pixeltable/func/udf.py +2 -2
  31. pixeltable/functions/__init__.py +1 -0
  32. pixeltable/functions/groq.py +108 -0
  33. pixeltable/functions/huggingface.py +8 -6
  34. pixeltable/functions/mistralai.py +2 -13
  35. pixeltable/functions/openai.py +1 -6
  36. pixeltable/functions/replicate.py +2 -2
  37. pixeltable/functions/util.py +6 -1
  38. pixeltable/globals.py +0 -2
  39. pixeltable/io/external_store.py +81 -54
  40. pixeltable/io/globals.py +1 -1
  41. pixeltable/io/label_studio.py +49 -45
  42. pixeltable/io/table_data_conduit.py +1 -1
  43. pixeltable/metadata/__init__.py +1 -1
  44. pixeltable/metadata/converters/convert_37.py +15 -0
  45. pixeltable/metadata/converters/convert_38.py +39 -0
  46. pixeltable/metadata/notes.py +2 -0
  47. pixeltable/metadata/schema.py +5 -0
  48. pixeltable/metadata/utils.py +78 -0
  49. pixeltable/plan.py +59 -139
  50. pixeltable/share/packager.py +2 -2
  51. pixeltable/store.py +114 -103
  52. pixeltable/type_system.py +30 -0
  53. {pixeltable-0.4.0rc3.dist-info → pixeltable-0.4.2.dist-info}/METADATA +1 -1
  54. {pixeltable-0.4.0rc3.dist-info → pixeltable-0.4.2.dist-info}/RECORD +57 -53
  55. pixeltable/utils/sample.py +0 -25
  56. {pixeltable-0.4.0rc3.dist-info → pixeltable-0.4.2.dist-info}/LICENSE +0 -0
  57. {pixeltable-0.4.0rc3.dist-info → pixeltable-0.4.2.dist-info}/WHEEL +0 -0
  58. {pixeltable-0.4.0rc3.dist-info → pixeltable-0.4.2.dist-info}/entry_points.txt +0 -0
@@ -5,6 +5,7 @@ import functools
5
5
  import logging
6
6
  import random
7
7
  import time
8
+ from collections import defaultdict
8
9
  from contextlib import contextmanager
9
10
  from typing import TYPE_CHECKING, Any, Callable, Iterator, Optional, TypeVar
10
11
  from uuid import UUID
@@ -17,10 +18,9 @@ from pixeltable.env import Env
17
18
  from pixeltable.iterators import ComponentIterator
18
19
  from pixeltable.metadata import schema
19
20
 
20
- if TYPE_CHECKING:
21
- from pixeltable.plan import SampleClause
21
+ from .column import Column
22
22
  from .dir import Dir
23
- from .globals import IfExistsParam, IfNotExistsParam, MediaValidation
23
+ from .globals import IfExistsParam, IfNotExistsParam, MediaValidation, QColumnId
24
24
  from .insertable_table import InsertableTable
25
25
  from .path import Path
26
26
  from .schema_object import SchemaObject
@@ -31,6 +31,8 @@ from .table_version_path import TableVersionPath
31
31
  from .view import View
32
32
 
33
33
  if TYPE_CHECKING:
34
+ from pixeltable.plan import SampleClause
35
+
34
36
  from .. import DataFrame, exprs
35
37
 
36
38
 
@@ -60,9 +62,10 @@ def _unpack_row(
60
62
  return result
61
63
 
62
64
 
65
+ # -1: unlimited
63
66
  # for now, we don't limit the number of retries, because we haven't seen situations where the actual number of retries
64
67
  # grows uncontrollably
65
- _MAX_RETRIES = 0
68
+ _MAX_RETRIES = -1
66
69
 
67
70
  T = TypeVar('T')
68
71
 
@@ -71,25 +74,29 @@ def _retry_loop(*, for_write: bool) -> Callable[[Callable[..., T]], Callable[...
71
74
  def decorator(op: Callable[..., T]) -> Callable[..., T]:
72
75
  @functools.wraps(op)
73
76
  def loop(*args: Any, **kwargs: Any) -> T:
74
- num_remaining_retries = _MAX_RETRIES
77
+ num_retries = 0
75
78
  while True:
76
79
  try:
77
80
  # in order for retry to work, we need to make sure that there aren't any prior db updates
78
81
  # that are part of an ongoing transaction
79
82
  assert not Env.get().in_xact
80
- with Catalog.get().begin_xact(for_write=for_write):
83
+ with Catalog.get().begin_xact(for_write=for_write, convert_db_excs=False):
81
84
  return op(*args, **kwargs)
82
85
  except sql.exc.DBAPIError as e:
83
86
  # TODO: what other exceptions should we be looking for?
84
- if isinstance(e.orig, psycopg.errors.SerializationFailure):
85
- if num_remaining_retries > 0:
86
- num_remaining_retries -= 1
87
- _logger.debug(f'Serialization failure, retrying ({num_remaining_retries} retries left)')
87
+ if isinstance(e.orig, (psycopg.errors.SerializationFailure, psycopg.errors.LockNotAvailable)):
88
+ if num_retries < _MAX_RETRIES or _MAX_RETRIES == -1:
89
+ num_retries += 1
90
+ _logger.debug(f'Retrying ({num_retries}) after {type(e.orig)}')
88
91
  time.sleep(random.uniform(0.1, 0.5))
89
92
  else:
90
93
  raise excs.Error(f'Serialization retry limit ({_MAX_RETRIES}) exceeded') from e
91
94
  else:
92
95
  raise
96
+ except Exception as e:
97
+ # for informational/debugging purposes
98
+ _logger.debug(f'retry_loop(): passing along {e}')
99
+ raise
93
100
 
94
101
  return loop
95
102
 
@@ -109,8 +116,8 @@ class Catalog:
109
116
  duplicate references to that table in the From clause (ie, incorrect Cartesian products)
110
117
  - in order to allow multiple concurrent Python processes to perform updates (data and/or schema) against a shared
111
118
  Pixeltable instance, Catalog needs to reload metadata from the store when there are changes
112
- - concurrent changes are detected by comparing TableVersion.version with the stored current version
113
- (TableMd.current_version)
119
+ - concurrent changes are detected by comparing TableVersion.version/view_sn with the stored current version
120
+ (TableMd.current_version/view_sn)
114
121
  - cached live TableVersion instances (those with effective_version == None) are validated against the stored
115
122
  metadata on transaction boundaries; this is recorded in TableVersion.is_validated
116
123
  - metadata validation is only needed for live TableVersion instances (snapshot instances are immutable)
@@ -118,13 +125,22 @@ class Catalog:
118
125
 
119
126
  _instance: Optional[Catalog] = None
120
127
 
121
- # key: [id, version]
128
+ # cached TableVersion instances; key: [id, version]
122
129
  # - mutable version of a table: version == None (even though TableVersion.version is set correctly)
123
130
  # - snapshot versions: records the version of the snapshot
124
131
  _tbl_versions: dict[tuple[UUID, Optional[int]], TableVersion]
125
132
  _tbls: dict[UUID, Table]
126
133
  _in_write_xact: bool # True if we're in a write transaction
127
- _x_locked_tbl_id: Optional[UUID] # set if begin_xact() was asked to write-lock a table
134
+ _x_locked_tbl_ids: set[UUID] # non-empty for write transactions
135
+
136
+ # cached column dependencies
137
+ # - key: table id, value: mapping from column id to its dependencies
138
+ # - only maintained for dependencies between non-snapshot table versions
139
+ # - can contain stale entries (stemming from invalidated TV instances)
140
+ _column_dependencies: dict[UUID, dict[QColumnId, set[QColumnId]]]
141
+
142
+ # column dependents are recomputed at the beginning of every write transaction and only reflect the locked tree
143
+ _column_dependents: Optional[dict[QColumnId, set[QColumnId]]]
128
144
 
129
145
  @classmethod
130
146
  def get(cls) -> Catalog:
@@ -147,9 +163,14 @@ class Catalog:
147
163
  self._tbl_versions = {}
148
164
  self._tbls = {} # don't use a defaultdict here, it doesn't cooperate with the debugger
149
165
  self._in_write_xact = False
150
- self._x_locked_tbl_id = None
166
+ self._x_locked_tbl_ids = set()
167
+ self._column_dependencies = {}
168
+ self._column_dependents = None
151
169
  self._init_store()
152
170
 
171
+ def _dropped_tbl_error_msg(self, tbl_id: UUID) -> str:
172
+ return f'Table was dropped (no record found for {tbl_id})'
173
+
153
174
  def validate(self) -> None:
154
175
  """Validate structural consistency of cached metadata"""
155
176
  for (tbl_id, effective_version), tbl_version in self._tbl_versions.items():
@@ -164,13 +185,24 @@ class Catalog:
164
185
  f'snapshot_id={tbl_version.id} mutable_views={tbl_version.mutable_views}'
165
186
  )
166
187
 
167
- if tbl_version.is_view and tbl_version.is_mutable:
188
+ if tbl_version.is_view and tbl_version.is_mutable and tbl_version.is_validated:
168
189
  # make sure this mutable view is recorded in a mutable base
169
190
  base = tbl_version.base
170
191
  assert base is not None
171
192
  if base.effective_version is None:
172
193
  assert (base.id, None) in self._tbl_versions
173
- assert TableVersionHandle.create(tbl_version) in self._tbl_versions[base.id, None].mutable_views
194
+ base_tv = self._tbl_versions[base.id, None]
195
+ if not base_tv.is_validated:
196
+ continue
197
+ mutable_view_ids = ', '.join(str(tv.id) for tv in self._tbl_versions[base.id, None].mutable_views)
198
+ mutable_view_names = ', '.join(
199
+ tv._tbl_version.name
200
+ for tv in self._tbl_versions[base.id, None].mutable_views
201
+ if tv._tbl_version is not None
202
+ )
203
+ assert TableVersionHandle.create(tbl_version) in self._tbl_versions[base.id, None].mutable_views, (
204
+ f'{tbl_version.name} ({tbl_version.id}) missing in {mutable_view_ids} ({mutable_view_names})'
205
+ )
174
206
 
175
207
  if len(tbl_version.mutable_views) > 0:
176
208
  # make sure we also loaded mutable view metadata, which is needed to detect column dependencies
@@ -178,24 +210,37 @@ class Catalog:
178
210
  assert v.effective_version is None, f'{v.id}:{v.effective_version}'
179
211
 
180
212
  @contextmanager
181
- def begin_xact(self, *, tbl_id: Optional[UUID] = None, for_write: bool = False) -> Iterator[sql.Connection]:
213
+ def begin_xact(
214
+ self,
215
+ *,
216
+ tbl: Optional[TableVersionPath] = None,
217
+ for_write: bool = False,
218
+ lock_mutable_tree: bool = False,
219
+ convert_db_excs: bool = True,
220
+ ) -> Iterator[sql.Connection]:
182
221
  """
183
222
  Return a context manager that yields a connection to the database. Idempotent.
184
223
 
185
224
  It is mandatory to call this method, not Env.begin_xact(), if the transaction accesses any table data
186
225
  or metadata.
187
226
 
188
- Lock acquisition:
189
- - x-locks Table records by updating Table.lock_dummy
227
+ If tbl != None, follows this locking protocol:
228
+ - validates/reloads the TableVersion instances of tbl's ancestors (in the hope that this reduces potential
229
+ SerializationErrors later on)
230
+ - if for_write == True, x-locks Table record (by updating Table.lock_dummy; see _acquire_tbl_xlock())
231
+ - if for_write == False, validates TableVersion instance
232
+ - if lock_mutable_tree == True, also x-locks all mutable views of the table
190
233
  - this needs to be done in a retry loop, because Postgres can decide to abort the transaction
191
234
  (SerializationFailure, LockNotAvailable)
192
235
  - for that reason, we do all lock acquisition prior to doing any real work (eg, compute column values),
193
- to minimize (maybe avoid altogether) loosing that work
236
+ to minimize the probability of loosing that work due to a forced abort
237
+
238
+ If convert_db_excs == True, converts DBAPIErrors into excs.Errors.
194
239
  """
195
240
  if Env.get().in_xact:
196
- if tbl_id is not None and for_write:
241
+ if tbl is not None and for_write:
197
242
  # make sure that we requested the required table lock at the beginning of the transaction
198
- assert tbl_id == self._x_locked_tbl_id, f'{tbl_id} != {self._x_locked_tbl_id}'
243
+ assert tbl.tbl_id in self._x_locked_tbl_ids, f'{tbl.tbl_id} not in {self._x_locked_tbl_ids}'
199
244
  yield Env.get().conn
200
245
  return
201
246
 
@@ -209,30 +254,72 @@ class Catalog:
209
254
  num_retries = 0
210
255
  while True:
211
256
  try:
257
+ self._in_write_xact = False
258
+ self._x_locked_tbl_ids = set()
259
+ self._column_dependents = None
260
+
212
261
  with Env.get().begin_xact() as conn:
213
- if tbl_id is not None and for_write:
214
- # X-lock Table record
215
- conn.execute(
216
- sql.select(schema.Table).where(schema.Table.id == tbl_id).with_for_update(nowait=True)
217
- )
218
- conn.execute(sql.update(schema.Table).values(lock_dummy=1).where(schema.Table.id == tbl_id))
219
- self._x_locked_tbl_id = tbl_id
262
+ if tbl is not None:
263
+ try:
264
+ if not self._acquire_path_locks(
265
+ tbl=tbl, for_write=for_write, lock_mutable_tree=lock_mutable_tree
266
+ ):
267
+ # this is a snapshot
268
+ yield conn
269
+ return
270
+
271
+ if for_write:
272
+ if lock_mutable_tree:
273
+ self._x_locked_tbl_ids = self._get_mutable_tree(tbl.tbl_id)
274
+ self._compute_column_dependents(self._x_locked_tbl_ids)
275
+ else:
276
+ self._x_locked_tbl_ids = {tbl.tbl_id}
277
+ if _logger.isEnabledFor(logging.DEBUG):
278
+ # validate only when we don't see errors
279
+ self.validate()
280
+
281
+ except sql.exc.DBAPIError as e:
282
+ if isinstance(
283
+ e.orig, (psycopg.errors.SerializationFailure, psycopg.errors.LockNotAvailable)
284
+ ) and (num_retries < _MAX_RETRIES or _MAX_RETRIES == -1):
285
+ num_retries += 1
286
+ _logger.debug(f'Retrying ({num_retries}) after {type(e.orig)}')
287
+ time.sleep(random.uniform(0.1, 0.5))
288
+ continue
289
+ else:
290
+ raise
220
291
 
221
292
  self._in_write_xact = for_write
222
293
  yield conn
223
294
  return
295
+
224
296
  except sql.exc.DBAPIError as e:
225
- if isinstance(e.orig, (psycopg.errors.SerializationFailure, psycopg.errors.LockNotAvailable)) and (
226
- num_retries < _MAX_RETRIES or _MAX_RETRIES == 0
227
- ):
228
- num_retries += 1
229
- _logger.debug(f'Retrying ({num_retries}) after {type(e.orig)}')
230
- time.sleep(random.uniform(0.1, 0.5))
297
+ # we got some db error during the actual operation (not just while trying to get locks on the metadata
298
+ # records): we convert these into Errors, if asked to do so, and abort
299
+ # TODO: what other concurrency-related exceptions should we expect?
300
+
301
+ # we always convert UndefinedTable exceptions (they can't be retried)
302
+ if isinstance(e.orig, psycopg.errors.UndefinedTable):
303
+ # the table got dropped in the middle of the table operation
304
+ _logger.debug(f'Exception: undefined table ({tbl.tbl_name()}): Caught {type(e.orig)}: {e!r}')
305
+ assert tbl is not None
306
+ raise excs.Error(f'Table was dropped: {tbl.tbl_name()}') from None
307
+ elif isinstance(e.orig, psycopg.errors.SerializationFailure) and convert_db_excs:
308
+ # we still got a serialization error, despite getting x-locks at the beginning
309
+ msg = f'{tbl.tbl_name()} ({tbl.tbl_id})' if tbl is not None else ''
310
+ _logger.debug(f'Exception: serialization failure: {msg} ({e})')
311
+ raise excs.Error(
312
+ 'That Pixeltable operation could not be completed because it conflicted with another '
313
+ 'operation that was run on a different process.\n'
314
+ 'Please re-run the operation.'
315
+ ) from None
231
316
  else:
232
317
  raise
318
+
233
319
  finally:
234
320
  self._in_write_xact = False
235
- self._x_locked_tbl_id = None
321
+ self._x_locked_tbl_ids = set()
322
+ self._column_dependents = None
236
323
 
237
324
  # invalidate cached current TableVersion instances
238
325
  for tv in self._tbl_versions.values():
@@ -240,20 +327,117 @@ class Catalog:
240
327
  _logger.debug(f'invalidating table version {tv.id}:None (tv={id(tv):x})')
241
328
  tv.is_validated = False
242
329
 
243
- if _logger.isEnabledFor(logging.DEBUG):
244
- self.validate()
245
-
246
330
  @property
247
331
  def in_write_xact(self) -> bool:
248
332
  return self._in_write_xact
249
333
 
250
- def _acquire_dir_xlock(self, parent_id: Optional[UUID], dir_id: Optional[UUID], dir_name: Optional[str]) -> None:
334
+ def _acquire_path_locks(
335
+ self, *, tbl: TableVersionPath, for_write: bool = False, lock_mutable_tree: bool = False
336
+ ) -> bool:
337
+ """
338
+ Path locking protocol:
339
+ - refresh cached TableVersions of ancestors (we need those even during inserts, for computed columns that
340
+ reference the base tables)
341
+ - refresh cached TableVersion of tbl or get X-lock, depending on for_write
342
+ - if lock_mutable_tree, also X-lock all mutable views of tbl
343
+
344
+ Returns False if trying to lock a pure snapshot with for_write == True
345
+ Raises Error if tbl doesn't exist.
346
+ """
347
+ start_idx = 1 if for_write else 0
348
+ for handle in tbl.get_tbl_versions()[start_idx::-1]:
349
+ _ = self.get_tbl_version(handle.id, handle.effective_version)
350
+ if not for_write:
351
+ return True # nothing left to lock
352
+ return self._acquire_tbl_xlock(tbl_id=tbl.tbl_id, lock_mutable_tree=lock_mutable_tree, raise_if_not_exists=True)
353
+
354
+ def _acquire_tbl_xlock(
355
+ self,
356
+ *,
357
+ tbl_id: Optional[UUID] = None,
358
+ dir_id: Optional[UUID] = None,
359
+ tbl_name: Optional[str] = None,
360
+ lock_mutable_tree: bool = False,
361
+ raise_if_not_exists: bool = False,
362
+ ) -> bool:
363
+ """Force acquisition of an X-lock on a Table record via a blind update
364
+
365
+ Either tbl_id or dir_id/tbl_name need to be specified.
366
+ Returns True if the table was locked, False if it was a snapshot or not found.
367
+ If lock_mutable_tree, recursively locks all mutable views of the table.
368
+
369
+ Returns False if the table is a snapshot or not found and !raise_if_not_exists.
370
+ """
371
+ where_clause: sql.ColumnElement
372
+ if tbl_id is not None:
373
+ where_clause = schema.Table.id == tbl_id
374
+ else:
375
+ where_clause = sql.and_(schema.Table.dir_id == dir_id, schema.Table.md['name'].astext == tbl_name)
376
+ user = Env.get().user
377
+ if user is not None:
378
+ where_clause = sql.and_(where_clause, schema.Table.md['user'].astext == Env.get().user)
379
+
380
+ conn = Env.get().conn
381
+ row = conn.execute(sql.select(schema.Table).where(where_clause).with_for_update(nowait=True)).one_or_none()
382
+ if row is None:
383
+ if raise_if_not_exists:
384
+ raise excs.Error(self._dropped_tbl_error_msg(tbl_id))
385
+ return False # nothing to lock
386
+ if row.md['view_md'] is not None and row.md['view_md']['is_snapshot']:
387
+ return False # nothing to lock
388
+ conn.execute(sql.update(schema.Table).values(lock_dummy=1).where(where_clause))
389
+
390
+ if not lock_mutable_tree:
391
+ return True
392
+ # also lock mutable views
393
+ tv = self.get_tbl_version(tbl_id, None)
394
+ for view in tv.mutable_views:
395
+ self._acquire_tbl_xlock(tbl_id=view.id, lock_mutable_tree=True, raise_if_not_exists=raise_if_not_exists)
396
+ return True
397
+
398
+ def _get_mutable_tree(self, tbl_id: UUID) -> set[UUID]:
399
+ """Returns ids of all tables that form the tree of mutable views starting at tbl_id; includes the root."""
400
+ tv = self.get_tbl_version(tbl_id, None)
401
+ result: set[UUID] = {tv.id}
402
+ for view in tv.mutable_views:
403
+ result.update(self._get_mutable_tree(view.id))
404
+ return result
405
+
406
+ def _compute_column_dependents(self, mutable_tree: set[UUID]) -> None:
407
+ """Populate self._column_dependents for all tables in mutable_tree"""
408
+ assert self._column_dependents is None
409
+ self._column_dependents = defaultdict(set)
410
+ for tbl_id in mutable_tree:
411
+ assert tbl_id in self._column_dependencies
412
+ for col, dependencies in self._column_dependencies[tbl_id].items():
413
+ for dependency in dependencies:
414
+ if dependency.tbl_id not in mutable_tree:
415
+ continue
416
+ dependents = self._column_dependents[dependency]
417
+ dependents.add(col)
418
+
419
+ def get_column_dependents(self, tbl_id: UUID, col_id: int) -> set[Column]:
420
+ """Return all Columns that transitively depend on the given column."""
421
+ assert self._column_dependents is not None
422
+ dependents = self._column_dependents[QColumnId(tbl_id, col_id)]
423
+ result: set[Column] = set()
424
+ for dependent in dependents:
425
+ tv = self.get_tbl_version(dependent.tbl_id, None)
426
+ col = tv.cols_by_id[dependent.col_id]
427
+ result.add(col)
428
+ return result
429
+
430
+ def _acquire_dir_xlock(
431
+ self, *, parent_id: Optional[UUID] = None, dir_id: Optional[UUID] = None, dir_name: Optional[str] = None
432
+ ) -> None:
251
433
  """Force acquisition of an X-lock on a Dir record via a blind update.
252
434
 
253
435
  If dir_id is present, then all other conditions are ignored.
254
436
  Note that (parent_id==None) is a valid where condition.
255
437
  If dir_id is not specified, the user from the environment is added to the directory filters.
256
438
  """
439
+ assert (dir_name is None) != (dir_id is None)
440
+ assert not (parent_id is not None and dir_name is None)
257
441
  user = Env.get().user
258
442
  assert self._in_write_xact
259
443
  q = sql.update(schema.Dir).values(lock_dummy=1)
@@ -367,7 +551,7 @@ class Catalog:
367
551
  add_dir: Optional[schema.Dir] = None
368
552
  drop_dir: Optional[schema.Dir] = None
369
553
  for p in sorted(dir_paths):
370
- dir = self._get_dir(p, for_update=True)
554
+ dir = self._get_dir(p, lock_dir=True)
371
555
  if dir is None:
372
556
  raise excs.Error(f'Directory {str(p)!r} does not exist.')
373
557
  if p == add_dir_path:
@@ -377,7 +561,7 @@ class Catalog:
377
561
 
378
562
  add_obj: Optional[SchemaObject] = None
379
563
  if add_dir is not None:
380
- add_obj = self._get_dir_entry(add_dir.id, add_name, for_update=True)
564
+ add_obj = self._get_dir_entry(add_dir.id, add_name, lock_entry=True)
381
565
  if add_obj is not None and raise_if_exists:
382
566
  add_path = add_dir_path.append(add_name)
383
567
  raise excs.Error(f'Path {str(add_path)!r} already exists.')
@@ -385,7 +569,7 @@ class Catalog:
385
569
  drop_obj: Optional[SchemaObject] = None
386
570
  if drop_dir is not None:
387
571
  drop_path = drop_dir_path.append(drop_name)
388
- drop_obj = self._get_dir_entry(drop_dir.id, drop_name, for_update=True)
572
+ drop_obj = self._get_dir_entry(drop_dir.id, drop_name, lock_entry=True)
389
573
  if drop_obj is None and raise_if_not_exists:
390
574
  raise excs.Error(f'Path {str(drop_path)!r} does not exist.')
391
575
  if drop_obj is not None and drop_expected is not None and not isinstance(drop_obj, drop_expected):
@@ -397,13 +581,13 @@ class Catalog:
397
581
  add_dir_obj = Dir(add_dir.id, add_dir.parent_id, add_dir.md['name']) if add_dir is not None else None
398
582
  return add_obj, add_dir_obj, drop_obj
399
583
 
400
- def _get_dir_entry(self, dir_id: UUID, name: str, for_update: bool = False) -> Optional[SchemaObject]:
584
+ def _get_dir_entry(self, dir_id: UUID, name: str, lock_entry: bool = False) -> Optional[SchemaObject]:
401
585
  user = Env.get().user
402
586
  conn = Env.get().conn
403
587
 
404
588
  # check for subdirectory
405
- if for_update:
406
- self._acquire_dir_xlock(dir_id, None, name)
589
+ if lock_entry:
590
+ self._acquire_dir_xlock(parent_id=dir_id, dir_id=None, dir_name=name)
407
591
  q = sql.select(schema.Dir).where(
408
592
  schema.Dir.parent_id == dir_id, schema.Dir.md['name'].astext == name, schema.Dir.md['user'].astext == user
409
593
  )
@@ -417,13 +601,13 @@ class Catalog:
417
601
  return Dir(dir_record.id, dir_record.parent_id, name)
418
602
 
419
603
  # check for table
604
+ if lock_entry:
605
+ self._acquire_tbl_xlock(dir_id=dir_id, tbl_name=name)
420
606
  q = sql.select(schema.Table.id).where(
421
607
  schema.Table.dir_id == dir_id,
422
608
  schema.Table.md['name'].astext == name,
423
609
  schema.Table.md['user'].astext == user,
424
610
  )
425
- if for_update:
426
- q = q.with_for_update()
427
611
  tbl_id = conn.execute(q).scalar_one_or_none()
428
612
  if tbl_id is not None:
429
613
  if tbl_id not in self._tbls:
@@ -438,7 +622,8 @@ class Catalog:
438
622
  expected: Optional[type[SchemaObject]] = None,
439
623
  raise_if_exists: bool = False,
440
624
  raise_if_not_exists: bool = False,
441
- for_update: bool = False,
625
+ lock_parent: bool = False,
626
+ lock_obj: bool = False,
442
627
  ) -> Optional[SchemaObject]:
443
628
  """Return the schema object at the given path, or None if it doesn't exist.
444
629
 
@@ -454,16 +639,16 @@ class Catalog:
454
639
  raise excs.Error(
455
640
  f'{str(path)!r} needs to be a {expected._display_name()} but is a {Dir._display_name()}'
456
641
  )
457
- dir = self._get_dir(path, for_update=for_update)
642
+ dir = self._get_dir(path, lock_dir=lock_obj)
458
643
  if dir is None:
459
644
  raise excs.Error(f'Unknown user: {Env.get().user}')
460
645
  return Dir(dir.id, dir.parent_id, dir.md['name'])
461
646
 
462
647
  parent_path = path.parent
463
- parent_dir = self._get_dir(parent_path, for_update=False)
648
+ parent_dir = self._get_dir(parent_path, lock_dir=lock_parent)
464
649
  if parent_dir is None:
465
650
  raise excs.Error(f'Directory {str(parent_path)!r} does not exist.')
466
- obj = self._get_dir_entry(parent_dir.id, path.name, for_update=for_update)
651
+ obj = self._get_dir_entry(parent_dir.id, path.name, lock_entry=lock_obj)
467
652
 
468
653
  if obj is None and raise_if_not_exists:
469
654
  raise excs.Error(f'Path {str(path)!r} does not exist.')
@@ -480,12 +665,12 @@ class Catalog:
480
665
  tbl = self._load_tbl(tbl_id)
481
666
  if tbl is None:
482
667
  return None
483
- # if this is a mutable table, we also need to have its mutable views loaded, in order to track column
484
- # dependencies
485
- tbl_version = tbl._tbl_version.get()
486
- if tbl_version.is_mutable:
487
- for v in tbl_version.mutable_views:
488
- _ = self.get_table_by_id(v.id)
668
+ # # if this is a mutable table, we also need to have its mutable views loaded, in order to track column
669
+ # # dependencies
670
+ # tbl_version = tbl._tbl_version.get()
671
+ # if tbl_version.is_mutable:
672
+ # for v in tbl_version.mutable_views:
673
+ # _ = self.get_table_by_id(v.id)
489
674
  return self._tbls[tbl_id]
490
675
 
491
676
  @_retry_loop(for_write=True)
@@ -539,6 +724,18 @@ class Catalog:
539
724
  ) -> Table:
540
725
  from pixeltable.utils.filecache import FileCache
541
726
 
727
+ if not is_snapshot and not base.is_snapshot():
728
+ # this is a mutable view of a mutable base; X-lock the base and advance its view_sn before adding the view
729
+ self._acquire_tbl_xlock(tbl_id=base.tbl_id)
730
+ base_tv = self.get_tbl_version(base.tbl_id, None)
731
+ base_tv.tbl_md.view_sn += 1
732
+ result = Env.get().conn.execute(
733
+ sql.update(schema.Table)
734
+ .values({schema.Table.md: dataclasses.asdict(base_tv.tbl_md)})
735
+ .where(schema.Table.id == base.tbl_id)
736
+ )
737
+ assert result.rowcount == 1, result.rowcount
738
+
542
739
  existing = self._handle_path_collision(path, View, is_snapshot, if_exists)
543
740
  if existing is not None:
544
741
  assert isinstance(existing, View)
@@ -718,38 +915,35 @@ class Catalog:
718
915
  'This is likely due to data corruption in the replicated table.'
719
916
  )
720
917
 
721
- self.store_tbl_md(UUID(tbl_id), new_tbl_md, new_version_md, new_schema_version_md)
918
+ self.store_tbl_md(UUID(tbl_id), None, new_tbl_md, new_version_md, new_schema_version_md)
722
919
 
723
920
  @_retry_loop(for_write=False)
724
921
  def get_table(self, path: Path) -> Table:
725
- obj = self._get_table(path)
726
- return obj
727
-
728
- def _get_table(self, path: Path) -> Table:
729
922
  obj = Catalog.get()._get_schema_object(path, expected=Table, raise_if_not_exists=True)
730
923
  assert isinstance(obj, Table)
731
- tbl_version = obj._tbl_version.get()
732
- # TODO: instead of calling this here, move the logic into TableVersion.init(), which is called after
733
- # registering the instance in _tbl_versions
734
- tbl_version.ensure_md_loaded()
735
- # if this table has mutable views, we need to load those as well, in order to record column dependencies
736
- for v in tbl_version.mutable_views:
737
- self.get_table_by_id(v.id)
738
924
  return obj
739
925
 
740
926
  @_retry_loop(for_write=True)
741
927
  def drop_table(self, path: Path, if_not_exists: IfNotExistsParam, force: bool) -> None:
742
- _, _, src_obj = self._prepare_dir_op(
743
- drop_dir_path=path.parent,
744
- drop_name=path.name,
745
- drop_expected=Table,
928
+ tbl = self._get_schema_object(
929
+ path,
930
+ expected=Table,
746
931
  raise_if_not_exists=if_not_exists == IfNotExistsParam.ERROR and not force,
932
+ lock_parent=True,
933
+ lock_obj=False,
747
934
  )
748
- if src_obj is None:
935
+ if tbl is None:
749
936
  _logger.info(f'Skipped table {str(path)!r} (does not exist).')
750
937
  return
751
- assert isinstance(src_obj, Table)
752
- self._drop_tbl(src_obj, force=force, is_replace=False)
938
+ assert isinstance(tbl, Table)
939
+
940
+ if isinstance(tbl, View) and tbl._tbl_version_path.is_mutable() and tbl._tbl_version_path.base.is_mutable():
941
+ # this is a mutable view of a mutable base;
942
+ # lock the base before the view, in order to avoid deadlocks with concurrent inserts/updates
943
+ base_id = tbl._tbl_version_path.base.tbl_id
944
+ self._acquire_tbl_xlock(tbl_id=base_id, lock_mutable_tree=False)
945
+
946
+ self._drop_tbl(tbl, force=force, is_replace=False)
753
947
 
754
948
  def _drop_tbl(self, tbl: Table, force: bool, is_replace: bool) -> None:
755
949
  """
@@ -759,8 +953,11 @@ class Catalog:
759
953
  - X-lock base before X-locking any view
760
954
  - deadlock-free wrt to TableVersion.insert() (insert propagation also proceeds top-down)
761
955
  - X-locks parent dir prior to calling TableVersion.drop(): prevent concurrent creation of another SchemaObject
762
- in the same directory with the same name (which could lead to duplicate names if we get rolled back)
956
+ in the same directory with the same name (which could lead to duplicate names if we get aborted)
763
957
  """
958
+ self._acquire_dir_xlock(dir_id=tbl._dir_id)
959
+ self._acquire_tbl_xlock(tbl_id=tbl._id, lock_mutable_tree=False)
960
+
764
961
  view_ids = self.get_view_ids(tbl._id, for_update=True)
765
962
  if len(view_ids) > 0:
766
963
  if not force:
@@ -780,12 +977,34 @@ class Catalog:
780
977
  view = self.get_table_by_id(view_id)
781
978
  self._drop_tbl(view, force=force, is_replace=is_replace)
782
979
 
783
- _ = self.get_dir(tbl._dir_id, for_update=True) # X-lock the parent directory
784
- tbl._drop()
980
+ # if this is a mutable view of a mutable base, advance the base's view_sn
981
+ if isinstance(tbl, View) and tbl._tbl_version_path.is_mutable() and tbl._tbl_version_path.base.is_mutable():
982
+ base_id = tbl._tbl_version_path.base.tbl_id
983
+ base_tv = self.get_tbl_version(base_id, None)
984
+ base_tv.tbl_md.view_sn += 1
985
+ result = Env.get().conn.execute(
986
+ sql.update(schema.Table.__table__)
987
+ .values({schema.Table.md: dataclasses.asdict(base_tv.tbl_md)})
988
+ .where(schema.Table.id == base_id)
989
+ )
990
+ assert result.rowcount == 1, result.rowcount
991
+
992
+ tv = tbl._tbl_version.get() if tbl._tbl_version is not None else None
993
+ if tv is not None:
994
+ tv = tbl._tbl_version.get()
995
+ # invalidate the TableVersion instance so that existing references to it can find out it has been dropped
996
+ tv.is_validated = False
997
+
998
+ self.delete_tbl_md(tbl._id)
785
999
  assert tbl._id in self._tbls
786
1000
  del self._tbls[tbl._id]
787
1001
  _logger.info(f'Dropped table `{tbl._path()}`.')
788
1002
 
1003
+ if tv is not None:
1004
+ tv.drop()
1005
+ assert (tv.id, tv.effective_version) in self._tbl_versions
1006
+ del self._tbl_versions[tv.id, tv.effective_version]
1007
+
789
1008
  @_retry_loop(for_write=True)
790
1009
  def create_dir(self, path: Path, if_exists: IfExistsParam, parents: bool) -> Dir:
791
1010
  return self._create_dir(path, if_exists, parents)
@@ -846,7 +1065,7 @@ class Catalog:
846
1065
  raise excs.Error(f'Directory {str(dir_path)!r} is not empty.')
847
1066
 
848
1067
  # drop existing subdirs
849
- self._acquire_dir_xlock(dir_id, None, None)
1068
+ self._acquire_dir_xlock(dir_id=dir_id)
850
1069
  dir_q = sql.select(schema.Dir).where(schema.Dir.parent_id == dir_id)
851
1070
  for row in conn.execute(dir_q).all():
852
1071
  self._drop_dir(row.id, dir_path.append(row.md['name']), force=True)
@@ -866,6 +1085,11 @@ class Catalog:
866
1085
  def get_view_ids(self, tbl_id: UUID, for_update: bool = False) -> list[UUID]:
867
1086
  """Return the ids of views that directly reference the given table"""
868
1087
  conn = Env.get().conn
1088
+ # check whether this table still exists
1089
+ q = sql.select(sql.func.count()).select_from(schema.Table).where(schema.Table.id == tbl_id)
1090
+ tbl_count = conn.execute(q).scalar()
1091
+ if tbl_count == 0:
1092
+ raise excs.Error(self._dropped_tbl_error_msg(tbl_id))
869
1093
  q = sql.select(schema.Table.id).where(sql.text(f"md->'view_md'->'base_versions'->0->>0 = {tbl_id.hex!r}"))
870
1094
  if for_update:
871
1095
  q = q.with_for_update()
@@ -875,27 +1099,29 @@ class Catalog:
875
1099
  def get_tbl_version(self, tbl_id: UUID, effective_version: Optional[int]) -> Optional[TableVersion]:
876
1100
  # we need a transaction here, if we're not already in one; if this starts a new transaction,
877
1101
  # the returned TableVersion instance will not be validated
878
- with self.begin_xact(tbl_id=tbl_id, for_write=False) as conn:
1102
+ with self.begin_xact(for_write=False) as conn:
879
1103
  tv = self._tbl_versions.get((tbl_id, effective_version))
880
1104
  if tv is None:
881
1105
  tv = self._load_tbl_version(tbl_id, effective_version)
882
1106
  elif not tv.is_validated:
883
1107
  # only live instances are invalidated
884
1108
  assert effective_version is None
885
- # we validate live instances by comparing our cached version number to the stored current version
1109
+ # we validate live instances by comparing our cached TableMd.current_version/view_sn to what's stored
886
1110
  # _logger.debug(f'validating metadata for table {tbl_id}:{tv.version} ({id(tv):x})')
887
1111
  q = sql.select(schema.Table.md).where(schema.Table.id == tbl_id)
888
- row = conn.execute(q).one()
889
- current_version = row.md['current_version']
1112
+ row = conn.execute(q).one_or_none()
1113
+ if row is None:
1114
+ raise excs.Error(self._dropped_tbl_error_msg(tbl_id))
1115
+ current_version, view_sn = row.md['current_version'], row.md['view_sn']
890
1116
 
891
1117
  # the stored version can be behind TableVersion.version, because we don't roll back the in-memory
892
1118
  # metadata changes after a failed update operation
893
- if current_version != tv.version:
1119
+ if current_version != tv.version or view_sn != tv.tbl_md.view_sn:
894
1120
  # the cached metadata is invalid
895
1121
  _logger.debug(
896
1122
  f'reloading metadata for table {tbl_id} '
897
- f'(cached version: {tv.version}, current version: {current_version}'
898
- # f', id: {id(tv):x})'
1123
+ f'(cached/current version: {tv.version}/{current_version}, '
1124
+ f'cached/current view_sn: {tv.tbl_md.view_sn}/{view_sn})'
899
1125
  )
900
1126
  tv = self._load_tbl_version(tbl_id, None)
901
1127
  else:
@@ -913,7 +1139,7 @@ class Catalog:
913
1139
  """Return the Dir with the given id, or None if it doesn't exist"""
914
1140
  conn = Env.get().conn
915
1141
  if for_update:
916
- self._acquire_dir_xlock(None, dir_id, None)
1142
+ self._acquire_dir_xlock(dir_id=dir_id)
917
1143
  q = sql.select(schema.Dir).where(schema.Dir.id == dir_id)
918
1144
  row = conn.execute(q).one_or_none()
919
1145
  if row is None:
@@ -921,24 +1147,24 @@ class Catalog:
921
1147
  dir_record = schema.Dir(**row._mapping)
922
1148
  return Dir(dir_record.id, dir_record.parent_id, dir_record.md['name'])
923
1149
 
924
- def _get_dir(self, path: Path, for_update: bool = False) -> Optional[schema.Dir]:
1150
+ def _get_dir(self, path: Path, lock_dir: bool = False) -> Optional[schema.Dir]:
925
1151
  """
926
- Locking protocol: X locks on all ancestors
1152
+ lock_dir: if True, X-locks target (but not the ancestors)
927
1153
  """
928
1154
  user = Env.get().user
929
1155
  conn = Env.get().conn
930
1156
  if path.is_root:
931
- if for_update:
932
- self._acquire_dir_xlock(parent_id=None, dir_id=None, dir_name='')
1157
+ if lock_dir:
1158
+ self._acquire_dir_xlock(dir_name='')
933
1159
  q = sql.select(schema.Dir).where(schema.Dir.parent_id.is_(None), schema.Dir.md['user'].astext == user)
934
1160
  row = conn.execute(q).one_or_none()
935
1161
  return schema.Dir(**row._mapping) if row is not None else None
936
1162
  else:
937
- parent_dir = self._get_dir(path.parent, for_update=False)
1163
+ parent_dir = self._get_dir(path.parent, lock_dir=False)
938
1164
  if parent_dir is None:
939
1165
  return None
940
- if for_update:
941
- self._acquire_dir_xlock(parent_id=parent_dir.id, dir_id=None, dir_name=path.name)
1166
+ if lock_dir:
1167
+ self._acquire_dir_xlock(parent_id=parent_dir.id, dir_name=path.name)
942
1168
  q = sql.select(schema.Dir).where(
943
1169
  schema.Dir.parent_id == parent_dir.id,
944
1170
  schema.Dir.md['name'].astext == path.name,
@@ -1006,6 +1232,43 @@ class Catalog:
1006
1232
  self._tbls[tbl_id] = view
1007
1233
  return view
1008
1234
 
1235
+ @_retry_loop(for_write=False)
1236
+ def collect_tbl_history(self, tbl_id: UUID, n: Optional[int]) -> list[schema.FullTableMd]:
1237
+ """
1238
+ Returns the history of up to n versions of the table with the given UUID.
1239
+
1240
+ Args:
1241
+ tbl_id: the UUID of the table to collect history for.
1242
+ n: Optional limit on the maximum number of versions returned.
1243
+
1244
+ Returns:
1245
+ A sequence of rows, ordered by version number
1246
+ Each row contains a TableVersion and a TableSchemaVersion object.
1247
+ """
1248
+ q = (
1249
+ sql.select(schema.TableVersion, schema.TableSchemaVersion)
1250
+ .select_from(schema.TableVersion)
1251
+ .join(
1252
+ schema.TableSchemaVersion,
1253
+ sql.cast(schema.TableVersion.md['schema_version'], sql.Integer)
1254
+ == schema.TableSchemaVersion.schema_version,
1255
+ )
1256
+ .where(schema.TableVersion.tbl_id == tbl_id)
1257
+ .where(schema.TableSchemaVersion.tbl_id == tbl_id)
1258
+ .order_by(schema.TableVersion.version.desc())
1259
+ )
1260
+ if n is not None:
1261
+ q = q.limit(n)
1262
+ src_rows = Env.get().session.execute(q).fetchall()
1263
+ return [
1264
+ schema.FullTableMd(
1265
+ None,
1266
+ schema.md_from_dict(schema.TableVersionMd, row.TableVersion.md),
1267
+ schema.md_from_dict(schema.TableSchemaVersionMd, row.TableSchemaVersion.md),
1268
+ )
1269
+ for row in src_rows
1270
+ ]
1271
+
1009
1272
  def load_tbl_md(self, tbl_id: UUID, effective_version: Optional[int]) -> schema.FullTableMd:
1010
1273
  """
1011
1274
  Loads metadata from the store for a given table UUID and version.
@@ -1060,7 +1323,8 @@ class Catalog:
1060
1323
  )
1061
1324
 
1062
1325
  row = conn.execute(q).one_or_none()
1063
- assert row is not None, f'Table record not found: {tbl_id}:{effective_version}'
1326
+ if row is None:
1327
+ raise excs.Error(self._dropped_tbl_error_msg(tbl_id))
1064
1328
  tbl_record, version_record, schema_version_record = _unpack_row(
1065
1329
  row, [schema.Table, schema.TableVersion, schema.TableSchemaVersion]
1066
1330
  )
@@ -1074,19 +1338,27 @@ class Catalog:
1074
1338
  def store_tbl_md(
1075
1339
  self,
1076
1340
  tbl_id: UUID,
1341
+ dir_id: Optional[UUID],
1077
1342
  tbl_md: Optional[schema.TableMd],
1078
1343
  version_md: Optional[schema.TableVersionMd],
1079
1344
  schema_version_md: Optional[schema.TableSchemaVersionMd],
1080
1345
  ) -> None:
1081
1346
  """
1082
- Stores metadata to the DB. If specified, `tbl_md` will be updated in place (only one such record can exist
1083
- per UUID); `version_md` and `schema_version_md` will be inserted as new records.
1347
+ Stores metadata to the DB.
1348
+
1349
+ Args:
1350
+ tbl_id: UUID of the table to store metadata for.
1351
+ dir_id: If specified, the tbl_md will be added to the given directory; if None, the table must already exist
1352
+ tbl_md: If specified, `tbl_md` will be inserted, or updated (only one such record can exist per UUID)
1353
+ version_md: inserted as a new record if present
1354
+ schema_version_md: will be inserted as a new record if present
1084
1355
 
1085
1356
  If inserting `version_md` or `schema_version_md` would be a primary key violation, an exception will be raised.
1086
1357
  """
1087
- conn = Env.get().conn
1088
1358
  assert self._in_write_xact
1359
+ session = Env.get().session
1089
1360
 
1361
+ # Construct and insert or update table record if requested.
1090
1362
  if tbl_md is not None:
1091
1363
  assert tbl_md.tbl_id == str(tbl_id)
1092
1364
  if version_md is not None:
@@ -1094,32 +1366,37 @@ class Catalog:
1094
1366
  assert tbl_md.current_schema_version == version_md.schema_version
1095
1367
  if schema_version_md is not None:
1096
1368
  assert tbl_md.current_schema_version == schema_version_md.schema_version
1097
- result = conn.execute(
1098
- sql.update(schema.Table.__table__)
1099
- .values({schema.Table.md: dataclasses.asdict(tbl_md)})
1100
- .where(schema.Table.id == tbl_id)
1101
- )
1102
- assert result.rowcount == 1, result.rowcount
1369
+ if dir_id is not None:
1370
+ # We are inserting a record while creating a new table.
1371
+ tbl_record = schema.Table(id=tbl_id, dir_id=dir_id, md=dataclasses.asdict(tbl_md))
1372
+ session.add(tbl_record)
1373
+ else:
1374
+ # Update the existing table record.
1375
+ result = session.execute(
1376
+ sql.update(schema.Table.__table__)
1377
+ .values({schema.Table.md: dataclasses.asdict(tbl_md)})
1378
+ .where(schema.Table.id == tbl_id)
1379
+ )
1380
+ assert result.rowcount == 1, result.rowcount
1103
1381
 
1382
+ # Construct and insert new table version record if requested.
1104
1383
  if version_md is not None:
1105
1384
  assert version_md.tbl_id == str(tbl_id)
1106
1385
  if schema_version_md is not None:
1107
1386
  assert version_md.schema_version == schema_version_md.schema_version
1108
- conn.execute(
1109
- sql.insert(schema.TableVersion.__table__).values(
1110
- tbl_id=tbl_id, version=version_md.version, md=dataclasses.asdict(version_md)
1111
- )
1387
+ tbl_version_record = schema.TableVersion(
1388
+ tbl_id=tbl_id, version=version_md.version, md=dataclasses.asdict(version_md)
1112
1389
  )
1390
+ session.add(tbl_version_record)
1113
1391
 
1392
+ # Construct and insert a new schema version record if requested.
1114
1393
  if schema_version_md is not None:
1115
1394
  assert schema_version_md.tbl_id == str(tbl_id)
1116
- conn.execute(
1117
- sql.insert(schema.TableSchemaVersion.__table__).values(
1118
- tbl_id=tbl_id,
1119
- schema_version=schema_version_md.schema_version,
1120
- md=dataclasses.asdict(schema_version_md),
1121
- )
1395
+ schema_version_record = schema.TableSchemaVersion(
1396
+ tbl_id=tbl_id, schema_version=schema_version_md.schema_version, md=dataclasses.asdict(schema_version_md)
1122
1397
  )
1398
+ session.add(schema_version_record)
1399
+ session.flush() # Inform SQLAlchemy that we want to write these changes to the DB.
1123
1400
 
1124
1401
  def delete_tbl_md(self, tbl_id: UUID) -> None:
1125
1402
  """
@@ -1143,7 +1420,7 @@ class Catalog:
1143
1420
 
1144
1421
  # If `tbl` is a named pure snapshot, we're not quite done, since the snapshot metadata won't appear in the
1145
1422
  # TableVersionPath. We need to prepend it separately.
1146
- if tbl._id != tbl._tbl_version.id:
1423
+ if isinstance(tbl, View) and tbl._snapshot_only:
1147
1424
  snapshot_md = self.load_tbl_md(tbl._id, 0)
1148
1425
  md = [snapshot_md, *md]
1149
1426
 
@@ -1212,9 +1489,20 @@ class Catalog:
1212
1489
 
1213
1490
  self._tbl_versions[tbl_id, effective_version] = tbl_version
1214
1491
  tbl_version.init()
1215
-
1216
1492
  return tbl_version
1217
1493
 
1494
+ def record_column_dependencies(self, tbl_version: TableVersion) -> None:
1495
+ """Update self._column_dependencies. Only valid for non-snapshot versions."""
1496
+ from pixeltable.exprs import Expr
1497
+
1498
+ assert not tbl_version.is_snapshot
1499
+ dependencies: dict[QColumnId, set[QColumnId]] = {}
1500
+ for col in tbl_version.cols_by_id.values():
1501
+ if col.value_expr_dict is None:
1502
+ continue
1503
+ dependencies[QColumnId(tbl_version.id, col.id)] = Expr.get_refd_column_ids(col.value_expr_dict)
1504
+ self._column_dependencies[tbl_version.id] = dependencies
1505
+
1218
1506
  def _init_store(self) -> None:
1219
1507
  """One-time initialization of the stored catalog. Idempotent."""
1220
1508
  self.create_user(None)