pixeltable 0.4.3__py3-none-any.whl → 0.4.5__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of pixeltable might be problematic. Click here for more details.

Files changed (52) hide show
  1. pixeltable/__version__.py +2 -2
  2. pixeltable/catalog/__init__.py +1 -1
  3. pixeltable/catalog/catalog.py +619 -255
  4. pixeltable/catalog/dir.py +1 -2
  5. pixeltable/catalog/insertable_table.py +9 -9
  6. pixeltable/catalog/path.py +59 -20
  7. pixeltable/catalog/schema_object.py +10 -4
  8. pixeltable/catalog/table.py +51 -53
  9. pixeltable/catalog/table_version.py +216 -156
  10. pixeltable/catalog/table_version_path.py +1 -1
  11. pixeltable/catalog/tbl_ops.py +44 -0
  12. pixeltable/catalog/view.py +63 -65
  13. pixeltable/config.py +12 -4
  14. pixeltable/dataframe.py +75 -6
  15. pixeltable/env.py +46 -17
  16. pixeltable/exec/aggregation_node.py +1 -1
  17. pixeltable/exec/cache_prefetch_node.py +2 -6
  18. pixeltable/exec/component_iteration_node.py +4 -3
  19. pixeltable/exec/data_row_batch.py +10 -51
  20. pixeltable/exec/expr_eval/expr_eval_node.py +2 -2
  21. pixeltable/exec/in_memory_data_node.py +17 -16
  22. pixeltable/exec/sql_node.py +6 -7
  23. pixeltable/exprs/column_ref.py +2 -1
  24. pixeltable/exprs/data_row.py +13 -13
  25. pixeltable/exprs/row_builder.py +16 -4
  26. pixeltable/exprs/string_op.py +1 -1
  27. pixeltable/func/expr_template_function.py +1 -4
  28. pixeltable/functions/date.py +1 -1
  29. pixeltable/functions/gemini.py +4 -4
  30. pixeltable/functions/math.py +1 -1
  31. pixeltable/functions/openai.py +9 -6
  32. pixeltable/functions/timestamp.py +6 -6
  33. pixeltable/functions/video.py +2 -6
  34. pixeltable/globals.py +62 -33
  35. pixeltable/io/datarows.py +2 -1
  36. pixeltable/io/pandas.py +1 -0
  37. pixeltable/io/table_data_conduit.py +12 -13
  38. pixeltable/iterators/audio.py +17 -8
  39. pixeltable/iterators/image.py +5 -2
  40. pixeltable/metadata/schema.py +39 -2
  41. pixeltable/plan.py +5 -14
  42. pixeltable/share/packager.py +13 -13
  43. pixeltable/store.py +31 -7
  44. pixeltable/type_system.py +2 -1
  45. pixeltable/utils/filecache.py +1 -1
  46. pixeltable/utils/http_server.py +2 -3
  47. pixeltable/utils/media_store.py +90 -34
  48. {pixeltable-0.4.3.dist-info → pixeltable-0.4.5.dist-info}/METADATA +1 -1
  49. {pixeltable-0.4.3.dist-info → pixeltable-0.4.5.dist-info}/RECORD +52 -51
  50. {pixeltable-0.4.3.dist-info → pixeltable-0.4.5.dist-info}/LICENSE +0 -0
  51. {pixeltable-0.4.3.dist-info → pixeltable-0.4.5.dist-info}/WHEEL +0 -0
  52. {pixeltable-0.4.3.dist-info → pixeltable-0.4.5.dist-info}/entry_points.txt +0 -0
@@ -14,6 +14,8 @@ import psycopg
14
14
  import sqlalchemy as sql
15
15
 
16
16
  from pixeltable import exceptions as excs
17
+
18
+ # from pixeltable import exceptions as excs, UpdateStatus
17
19
  from pixeltable.env import Env
18
20
  from pixeltable.iterators import ComponentIterator
19
21
  from pixeltable.metadata import schema
@@ -28,6 +30,8 @@ from .table import Table
28
30
  from .table_version import TableVersion
29
31
  from .table_version_handle import TableVersionHandle
30
32
  from .table_version_path import TableVersionPath
33
+ from .tbl_ops import TableOp
34
+ from .update_status import UpdateStatus
31
35
  from .view import View
32
36
 
33
37
  if TYPE_CHECKING:
@@ -70,18 +74,35 @@ _MAX_RETRIES = -1
70
74
  T = TypeVar('T')
71
75
 
72
76
 
73
- def _retry_loop(*, for_write: bool) -> Callable[[Callable[..., T]], Callable[..., T]]:
77
+ def retry_loop(
78
+ *, tbl: Optional[TableVersionPath] = None, for_write: bool, lock_mutable_tree: bool = False
79
+ ) -> Callable[[Callable[..., T]], Callable[..., T]]:
74
80
  def decorator(op: Callable[..., T]) -> Callable[..., T]:
75
81
  @functools.wraps(op)
76
82
  def loop(*args: Any, **kwargs: Any) -> T:
83
+ cat = Catalog.get()
84
+ # retry_loop() is reentrant
85
+ if cat._in_retry_loop:
86
+ return op(*args, **kwargs)
87
+
77
88
  num_retries = 0
78
89
  while True:
90
+ cat._in_retry_loop = True
79
91
  try:
80
92
  # in order for retry to work, we need to make sure that there aren't any prior db updates
81
93
  # that are part of an ongoing transaction
82
94
  assert not Env.get().in_xact
83
- with Catalog.get().begin_xact(for_write=for_write, convert_db_excs=False):
95
+ with Catalog.get().begin_xact(
96
+ tbl=tbl,
97
+ for_write=for_write,
98
+ convert_db_excs=False,
99
+ lock_mutable_tree=lock_mutable_tree,
100
+ finalize_pending_ops=True,
101
+ ):
84
102
  return op(*args, **kwargs)
103
+ except PendingTableOpsError as e:
104
+ Env.get().console_logger.debug(f'retry_loop(): finalizing pending ops for {e.tbl_id}')
105
+ Catalog.get()._finalize_pending_ops(e.tbl_id)
85
106
  except sql.exc.DBAPIError as e:
86
107
  # TODO: what other exceptions should we be looking for?
87
108
  if isinstance(e.orig, (psycopg.errors.SerializationFailure, psycopg.errors.LockNotAvailable)):
@@ -97,16 +118,31 @@ def _retry_loop(*, for_write: bool) -> Callable[[Callable[..., T]], Callable[...
97
118
  # for informational/debugging purposes
98
119
  _logger.debug(f'retry_loop(): passing along {e}')
99
120
  raise
121
+ finally:
122
+ cat._in_retry_loop = False
100
123
 
101
124
  return loop
102
125
 
103
126
  return decorator
104
127
 
105
128
 
129
+ class PendingTableOpsError(Exception):
130
+ tbl_id: UUID
131
+
132
+ def __init__(self, tbl_id: UUID) -> None:
133
+ self.tbl_id = tbl_id
134
+
135
+
106
136
  class Catalog:
107
137
  """The functional interface to getting access to catalog objects
108
138
 
109
- All interface functions must be called in the context of a transaction, started with Catalog.begin_xact().
139
+ All interface functions must be called in the context of a transaction, started with Catalog.begin_xact() or
140
+ via retry_loop().
141
+
142
+ When calling functions that involve Table or TableVersion instances, the catalog needs to get a chance to finalize
143
+ pending ops against those tables. To that end,
144
+ - use begin_xact(tbl) or begin_xact(tbl_id) if only accessing a single table
145
+ - use retry_loop() when accessing multiple tables (eg, pxt.ls())
110
146
 
111
147
  Caching and invalidation of metadata:
112
148
  - Catalog caches TableVersion instances in order to avoid excessive metadata loading
@@ -129,9 +165,11 @@ class Catalog:
129
165
  # - mutable version of a table: version == None (even though TableVersion.version is set correctly)
130
166
  # - snapshot versions: records the version of the snapshot
131
167
  _tbl_versions: dict[tuple[UUID, Optional[int]], TableVersion]
132
- _tbls: dict[UUID, Table]
168
+ _tbls: dict[tuple[UUID, Optional[int]], Table]
133
169
  _in_write_xact: bool # True if we're in a write transaction
134
170
  _x_locked_tbl_ids: set[UUID] # non-empty for write transactions
171
+ _modified_tvs: set[TableVersionHandle] # TableVersion instances modified in the current transaction
172
+ _in_retry_loop: bool
135
173
 
136
174
  # cached column dependencies
137
175
  # - key: table id, value: mapping from column id to its dependencies
@@ -164,6 +202,8 @@ class Catalog:
164
202
  self._tbls = {} # don't use a defaultdict here, it doesn't cooperate with the debugger
165
203
  self._in_write_xact = False
166
204
  self._x_locked_tbl_ids = set()
205
+ self._modified_tvs = set()
206
+ self._in_retry_loop = False
167
207
  self._column_dependencies = {}
168
208
  self._column_dependents = None
169
209
  self._init_store()
@@ -214,9 +254,11 @@ class Catalog:
214
254
  self,
215
255
  *,
216
256
  tbl: Optional[TableVersionPath] = None,
257
+ tbl_id: Optional[UUID] = None,
217
258
  for_write: bool = False,
218
259
  lock_mutable_tree: bool = False,
219
260
  convert_db_excs: bool = True,
261
+ finalize_pending_ops: bool = True,
220
262
  ) -> Iterator[sql.Connection]:
221
263
  """
222
264
  Return a context manager that yields a connection to the database. Idempotent.
@@ -227,7 +269,7 @@ class Catalog:
227
269
  If tbl != None, follows this locking protocol:
228
270
  - validates/reloads the TableVersion instances of tbl's ancestors (in the hope that this reduces potential
229
271
  SerializationErrors later on)
230
- - if for_write == True, x-locks Table record (by updating Table.lock_dummy; see _acquire_tbl_xlock())
272
+ - if for_write == True, x-locks Table record (by updating Table.lock_dummy; see _acquire_tbl_lock())
231
273
  - if for_write == False, validates TableVersion instance
232
274
  - if lock_mutable_tree == True, also x-locks all mutable views of the table
233
275
  - this needs to be done in a retry loop, because Postgres can decide to abort the transaction
@@ -237,10 +279,14 @@ class Catalog:
237
279
 
238
280
  If convert_db_excs == True, converts DBAPIErrors into excs.Errors.
239
281
  """
282
+ assert tbl is None or tbl_id is None # at most one can be specified
240
283
  if Env.get().in_xact:
241
- if tbl is not None and for_write:
242
- # make sure that we requested the required table lock at the beginning of the transaction
243
- assert tbl.tbl_id in self._x_locked_tbl_ids, f'{tbl.tbl_id} not in {self._x_locked_tbl_ids}'
284
+ # make sure that we requested the required table lock at the beginning of the transaction
285
+ if for_write:
286
+ if tbl is not None:
287
+ assert tbl.tbl_id in self._x_locked_tbl_ids, f'{tbl.tbl_id} not in {self._x_locked_tbl_ids}'
288
+ elif tbl_id is not None:
289
+ assert tbl_id in self._x_locked_tbl_ids, f'{tbl_id} not in {self._x_locked_tbl_ids}'
244
290
  yield Env.get().conn
245
291
  return
246
292
 
@@ -252,33 +298,66 @@ class Catalog:
252
298
  # )
253
299
  # _logger.debug(f'begin_xact(): {tv_msg}')
254
300
  num_retries = 0
301
+ pending_ops_tbl_id: Optional[UUID] = None
302
+ has_exc = False # True if we exited the 'with ...begin_xact()' block with an exception
255
303
  while True:
304
+ if pending_ops_tbl_id is not None:
305
+ Env.get().console_logger.debug(f'begin_xact(): finalizing pending ops for {pending_ops_tbl_id}')
306
+ self._finalize_pending_ops(pending_ops_tbl_id)
307
+ pending_ops_tbl_id = None
308
+
256
309
  try:
257
- self._in_write_xact = False
310
+ self._in_write_xact = for_write
258
311
  self._x_locked_tbl_ids = set()
312
+ self._modified_tvs = set()
259
313
  self._column_dependents = None
314
+ has_exc = False
260
315
 
261
- with Env.get().begin_xact() as conn:
262
- if tbl is not None:
316
+ with Env.get().begin_xact(for_write=for_write) as conn:
317
+ if tbl is not None or tbl_id is not None:
263
318
  try:
264
- if not self._acquire_path_locks(
265
- tbl=tbl, for_write=for_write, lock_mutable_tree=lock_mutable_tree
266
- ):
267
- # this is a snapshot
268
- yield conn
269
- return
270
-
271
- if for_write:
272
- if lock_mutable_tree:
273
- self._x_locked_tbl_ids = self._get_mutable_tree(tbl.tbl_id)
319
+ target: Optional[TableVersionHandle] = None
320
+ if tbl is not None:
321
+ if self._acquire_path_locks(
322
+ tbl=tbl,
323
+ for_write=for_write,
324
+ lock_mutable_tree=lock_mutable_tree,
325
+ check_pending_ops=finalize_pending_ops,
326
+ ):
327
+ target = tbl.tbl_version
328
+ else:
329
+ target = self._acquire_tbl_lock(
330
+ tbl_id=tbl_id,
331
+ for_write=for_write,
332
+ lock_mutable_tree=lock_mutable_tree,
333
+ raise_if_not_exists=True,
334
+ check_pending_ops=finalize_pending_ops,
335
+ )
336
+
337
+ if target is None:
338
+ # didn't get the write lock
339
+ for_write = False
340
+ elif for_write:
341
+ # we know at this point that target is mutable because we got the X-lock
342
+ if lock_mutable_tree and not target.is_snapshot:
343
+ self._x_locked_tbl_ids = self._get_mutable_tree(target.id)
274
344
  self._compute_column_dependents(self._x_locked_tbl_ids)
275
345
  else:
276
- self._x_locked_tbl_ids = {tbl.tbl_id}
346
+ self._x_locked_tbl_ids = {target.id}
277
347
  if _logger.isEnabledFor(logging.DEBUG):
278
348
  # validate only when we don't see errors
279
349
  self.validate()
280
350
 
351
+ except PendingTableOpsError as e:
352
+ has_exc = True
353
+ if finalize_pending_ops:
354
+ # we remember which table id to finalize
355
+ pending_ops_tbl_id = e.tbl_id
356
+ # raise to abort the transaction
357
+ raise
358
+
281
359
  except sql.exc.DBAPIError as e:
360
+ has_exc = True
282
361
  if isinstance(
283
362
  e.orig, (psycopg.errors.SerializationFailure, psycopg.errors.LockNotAvailable)
284
363
  ) and (num_retries < _MAX_RETRIES or _MAX_RETRIES == -1):
@@ -289,11 +368,20 @@ class Catalog:
289
368
  else:
290
369
  raise
291
370
 
292
- self._in_write_xact = for_write
293
371
  yield conn
294
372
  return
295
373
 
374
+ except PendingTableOpsError:
375
+ has_exc = True
376
+ if pending_ops_tbl_id is not None:
377
+ # the next iteration of the loop will deal with pending ops for this table id
378
+ continue
379
+ else:
380
+ # we got this exception after getting the initial table locks and therefore need to abort
381
+ raise
382
+
296
383
  except sql.exc.DBAPIError as e:
384
+ has_exc = True
297
385
  # we got some db error during the actual operation (not just while trying to get locks on the metadata
298
386
  # records): we convert these into Errors, if asked to do so, and abort
299
387
  # TODO: what other concurrency-related exceptions should we expect?
@@ -301,12 +389,19 @@ class Catalog:
301
389
  # we always convert UndefinedTable exceptions (they can't be retried)
302
390
  if isinstance(e.orig, psycopg.errors.UndefinedTable):
303
391
  # the table got dropped in the middle of the table operation
304
- _logger.debug(f'Exception: undefined table ({tbl.tbl_name()}): Caught {type(e.orig)}: {e!r}')
392
+ tbl_name = tbl.tbl_name() if tbl is not None else str(tbl_id) if tbl_id is not None else '?'
393
+ _logger.debug(f'Exception: undefined table ({tbl_name}): Caught {type(e.orig)}: {e!r}')
305
394
  assert tbl is not None
306
- raise excs.Error(f'Table was dropped: {tbl.tbl_name()}') from None
395
+ raise excs.Error(f'Table was dropped: {tbl_name}') from None
307
396
  elif isinstance(e.orig, psycopg.errors.SerializationFailure) and convert_db_excs:
308
397
  # we still got a serialization error, despite getting x-locks at the beginning
309
- msg = f'{tbl.tbl_name()} ({tbl.tbl_id})' if tbl is not None else ''
398
+ msg: str
399
+ if tbl is not None:
400
+ msg = f'{tbl.tbl_name()} ({tbl.tbl_id})'
401
+ elif tbl_id is not None:
402
+ msg = f'{tbl_id}'
403
+ else:
404
+ msg = ''
310
405
  _logger.debug(f'Exception: serialization failure: {msg} ({e})')
311
406
  raise excs.Error(
312
407
  'That Pixeltable operation could not be completed because it conflicted with another '
@@ -316,6 +411,10 @@ class Catalog:
316
411
  else:
317
412
  raise
318
413
 
414
+ except:
415
+ has_exc = True
416
+ raise
417
+
319
418
  finally:
320
419
  self._in_write_xact = False
321
420
  self._x_locked_tbl_ids = set()
@@ -327,12 +426,24 @@ class Catalog:
327
426
  _logger.debug(f'invalidating table version {tv.id}:None (tv={id(tv):x})')
328
427
  tv.is_validated = False
329
428
 
429
+ if has_exc:
430
+ # purge all modified TableVersion instances, we can't guarantee they are still consistent with the
431
+ # stored metadata
432
+ for handle in self._modified_tvs:
433
+ self._clear_tv_cache(handle.id, handle.effective_version)
434
+ self._modified_tvs = set()
435
+
330
436
  @property
331
437
  def in_write_xact(self) -> bool:
332
438
  return self._in_write_xact
333
439
 
334
440
  def _acquire_path_locks(
335
- self, *, tbl: TableVersionPath, for_write: bool = False, lock_mutable_tree: bool = False
441
+ self,
442
+ *,
443
+ tbl: TableVersionPath,
444
+ for_write: bool = False,
445
+ lock_mutable_tree: bool = False,
446
+ check_pending_ops: Optional[bool] = None,
336
447
  ) -> bool:
337
448
  """
338
449
  Path locking protocol:
@@ -341,33 +452,49 @@ class Catalog:
341
452
  - refresh cached TableVersion of tbl or get X-lock, depending on for_write
342
453
  - if lock_mutable_tree, also X-lock all mutable views of tbl
343
454
 
344
- Returns False if trying to lock a pure snapshot with for_write == True
345
455
  Raises Error if tbl doesn't exist.
456
+ Return False if the lock couldn't be acquired (X-lock on a non-mutable table), True otherwise.
346
457
  """
347
- start_idx = 1 if for_write else 0
348
- for handle in tbl.get_tbl_versions()[start_idx::-1]:
349
- _ = self.get_tbl_version(handle.id, handle.effective_version)
458
+ path_handles = tbl.get_tbl_versions()
459
+ read_handles = path_handles[:0:-1] if for_write else path_handles[::-1]
460
+ for handle in read_handles:
461
+ # update cache
462
+ _ = self.get_tbl_version(handle.id, handle.effective_version, validate_initialized=True)
350
463
  if not for_write:
351
464
  return True # nothing left to lock
352
- return self._acquire_tbl_xlock(tbl_id=tbl.tbl_id, lock_mutable_tree=lock_mutable_tree, raise_if_not_exists=True)
465
+ handle = self._acquire_tbl_lock(
466
+ tbl_id=tbl.tbl_id,
467
+ for_write=True,
468
+ lock_mutable_tree=lock_mutable_tree,
469
+ raise_if_not_exists=True,
470
+ check_pending_ops=check_pending_ops,
471
+ )
472
+ # update cache
473
+ _ = self.get_tbl_version(path_handles[0].id, path_handles[0].effective_version, validate_initialized=True)
474
+ return handle is not None
353
475
 
354
- def _acquire_tbl_xlock(
476
+ def _acquire_tbl_lock(
355
477
  self,
356
478
  *,
479
+ for_write: bool,
357
480
  tbl_id: Optional[UUID] = None,
358
481
  dir_id: Optional[UUID] = None,
359
482
  tbl_name: Optional[str] = None,
360
483
  lock_mutable_tree: bool = False,
361
- raise_if_not_exists: bool = False,
362
- ) -> bool:
363
- """Force acquisition of an X-lock on a Table record via a blind update
484
+ raise_if_not_exists: bool = True,
485
+ check_pending_ops: Optional[bool] = None,
486
+ ) -> Optional[TableVersionHandle]:
487
+ """
488
+ For writes: force acquisition of an X-lock on a Table record via a blind update.
364
489
 
365
490
  Either tbl_id or dir_id/tbl_name need to be specified.
366
491
  Returns True if the table was locked, False if it was a snapshot or not found.
367
492
  If lock_mutable_tree, recursively locks all mutable views of the table.
368
493
 
369
- Returns False if the table is a snapshot or not found and !raise_if_not_exists.
494
+ Returns a handle to what was locked, None if the lock couldn't be acquired (eg, X-lock on a non-mutable table).
370
495
  """
496
+ assert (tbl_id is not None) != (dir_id is not None and tbl_name is not None)
497
+ assert (dir_id is None) == (tbl_name is None)
371
498
  where_clause: sql.ColumnElement
372
499
  if tbl_id is not None:
373
500
  where_clause = schema.Table.id == tbl_id
@@ -378,26 +505,130 @@ class Catalog:
378
505
  where_clause = sql.and_(where_clause, schema.Table.md['user'].astext == Env.get().user)
379
506
 
380
507
  conn = Env.get().conn
381
- row = conn.execute(sql.select(schema.Table).where(where_clause).with_for_update(nowait=True)).one_or_none()
508
+ q = sql.select(schema.Table).where(where_clause)
509
+ if for_write:
510
+ q = q.with_for_update(nowait=True)
511
+ row = conn.execute(q).one_or_none()
382
512
  if row is None:
383
513
  if raise_if_not_exists:
384
514
  raise excs.Error(self._dropped_tbl_error_msg(tbl_id))
385
- return False # nothing to lock
386
- if row.md['view_md'] is not None and row.md['view_md']['is_snapshot']:
387
- return False # nothing to lock
388
- conn.execute(sql.update(schema.Table).values(lock_dummy=1).where(where_clause))
389
-
390
- if not lock_mutable_tree:
391
- return True
392
- # also lock mutable views
393
- tv = self.get_tbl_version(tbl_id, None)
394
- for view in tv.mutable_views:
395
- self._acquire_tbl_xlock(tbl_id=view.id, lock_mutable_tree=True, raise_if_not_exists=raise_if_not_exists)
396
- return True
515
+ return None # nothing to lock
516
+ tbl_md = schema.md_from_dict(schema.TableMd, row.md)
517
+ if for_write and tbl_md.is_mutable:
518
+ conn.execute(sql.update(schema.Table).values(lock_dummy=1).where(where_clause))
519
+
520
+ if check_pending_ops:
521
+ # check for pending ops after getting table lock
522
+ pending_ops_q = sql.select(sql.func.count()).where(schema.PendingTableOp.tbl_id == row.id)
523
+ has_pending_ops = conn.execute(pending_ops_q).scalar() > 0
524
+ if has_pending_ops:
525
+ raise PendingTableOpsError(row.id)
526
+
527
+ if for_write and not tbl_md.is_mutable:
528
+ return None # nothing to lock
529
+
530
+ effective_version = tbl_md.current_version if tbl_md.is_snapshot else None
531
+ if tbl_md.is_mutable and lock_mutable_tree:
532
+ # also lock mutable views
533
+ tv = self.get_tbl_version(tbl_id, effective_version, validate_initialized=True)
534
+ for view in tv.mutable_views:
535
+ self._acquire_tbl_lock(
536
+ for_write=for_write,
537
+ tbl_id=view.id,
538
+ lock_mutable_tree=lock_mutable_tree,
539
+ raise_if_not_exists=raise_if_not_exists,
540
+ check_pending_ops=check_pending_ops,
541
+ )
542
+ return TableVersionHandle(tbl_id, effective_version)
543
+
544
+ def _finalize_pending_ops(self, tbl_id: UUID) -> None:
545
+ """Finalizes all pending ops for the given table."""
546
+ num_retries = 0
547
+ while True:
548
+ try:
549
+ tbl_version: int
550
+ op: Optional[TableOp] = None
551
+ delete_next_op_stmt: sql.Delete
552
+ reset_has_pending_stmt: sql.Update
553
+ with self.begin_xact(
554
+ tbl_id=tbl_id, for_write=True, convert_db_excs=False, finalize_pending_ops=False
555
+ ) as conn:
556
+ q = (
557
+ sql.select(schema.Table.md, schema.PendingTableOp)
558
+ .select_from(schema.Table)
559
+ .join(schema.PendingTableOp)
560
+ .where(schema.Table.id == tbl_id)
561
+ .where(schema.PendingTableOp.tbl_id == tbl_id)
562
+ .order_by(schema.PendingTableOp.op_sn)
563
+ .limit(1)
564
+ .with_for_update()
565
+ )
566
+ row = conn.execute(q).one_or_none()
567
+ if row is None:
568
+ return
569
+ tbl_version = row.md.get('current_version')
570
+ op = schema.md_from_dict(TableOp, row.op)
571
+ delete_next_op_stmt = sql.delete(schema.PendingTableOp).where(
572
+ schema.PendingTableOp.tbl_id == tbl_id, schema.PendingTableOp.op_sn == row.op_sn
573
+ )
574
+ reset_has_pending_stmt = (
575
+ sql.update(schema.Table)
576
+ .where(schema.Table.id == tbl_id)
577
+ .values(md=schema.Table.md.op('||')({'has_pending_ops': False}))
578
+ )
579
+
580
+ if op.needs_xact:
581
+ tv = self.get_tbl_version(
582
+ tbl_id, tbl_version, check_pending_ops=False, validate_initialized=True
583
+ )
584
+ tv.exec_op(op)
585
+ conn.execute(delete_next_op_stmt)
586
+ if op.op_sn == op.num_ops - 1:
587
+ conn.execute(reset_has_pending_stmt)
588
+ continue
589
+
590
+ # this op runs outside of a transaction
591
+ tv = self.get_tbl_version(tbl_id, tbl_version, check_pending_ops=False, validate_initialized=True)
592
+ tv.exec_op(op)
593
+ with self.begin_xact(
594
+ tbl_id=tbl_id, for_write=True, convert_db_excs=False, finalize_pending_ops=False
595
+ ) as conn:
596
+ conn.execute(delete_next_op_stmt)
597
+ if op.op_sn == op.num_ops - 1:
598
+ conn.execute(reset_has_pending_stmt)
599
+
600
+ except (sql.exc.DBAPIError, sql.exc.OperationalError) as e:
601
+ # TODO: why are we still seeing these here, instead of them getting taken care of by the retry
602
+ # logic of begin_xact()?
603
+ if isinstance(e.orig, (psycopg.errors.SerializationFailure, psycopg.errors.LockNotAvailable)):
604
+ num_retries += 1
605
+ log_msg: str
606
+ if op is not None:
607
+ log_msg = f'finalize_pending_ops(): retrying ({num_retries}) op {op!s} after {type(e.orig)}'
608
+ else:
609
+ log_msg = f'finalize_pending_ops(): retrying ({num_retries}) after {type(e.orig)}'
610
+ Env.get().console_logger.debug(log_msg)
611
+ time.sleep(random.uniform(0.1, 0.5))
612
+ continue
613
+ else:
614
+ raise
615
+ except Exception as e:
616
+ Env.get().console_logger.debug(f'finalize_pending_ops(): caught {e}')
617
+ raise
618
+
619
+ num_retries = 0
620
+
621
+ def _debug_str(self) -> str:
622
+ tv_str = '\n'.join(str(k) for k in self._tbl_versions)
623
+ tbl_str = '\n'.join(str(k) for k in self._tbls)
624
+ return f'tbl_versions:\n{tv_str}\ntbls:\n{tbl_str}'
397
625
 
398
626
  def _get_mutable_tree(self, tbl_id: UUID) -> set[UUID]:
399
627
  """Returns ids of all tables that form the tree of mutable views starting at tbl_id; includes the root."""
400
- tv = self.get_tbl_version(tbl_id, None)
628
+ assert (tbl_id, None) in self._tbl_versions, (
629
+ f'({tbl_id}, None) not in {self._tbl_versions.keys()}\n{self._debug_str()}'
630
+ )
631
+ tv = self.get_tbl_version(tbl_id, None, validate_initialized=True)
401
632
  result: set[UUID] = {tv.id}
402
633
  for view in tv.mutable_views:
403
634
  result.update(self._get_mutable_tree(view.id))
@@ -408,7 +639,9 @@ class Catalog:
408
639
  assert self._column_dependents is None
409
640
  self._column_dependents = defaultdict(set)
410
641
  for tbl_id in mutable_tree:
411
- assert tbl_id in self._column_dependencies
642
+ assert tbl_id in self._column_dependencies, (
643
+ f'{tbl_id} not in {self._column_dependencies.keys()}\n{self._debug_str()}'
644
+ )
412
645
  for col, dependencies in self._column_dependencies[tbl_id].items():
413
646
  for dependency in dependencies:
414
647
  if dependency.tbl_id not in mutable_tree:
@@ -416,13 +649,25 @@ class Catalog:
416
649
  dependents = self._column_dependents[dependency]
417
650
  dependents.add(col)
418
651
 
652
+ def record_column_dependencies(self, tbl_version: TableVersion) -> None:
653
+ """Update self._column_dependencies. Only valid for mutable versions."""
654
+ from pixeltable.exprs import Expr
655
+
656
+ assert tbl_version.is_mutable
657
+ dependencies: dict[QColumnId, set[QColumnId]] = {}
658
+ for col in tbl_version.cols_by_id.values():
659
+ if col.value_expr_dict is None:
660
+ continue
661
+ dependencies[QColumnId(tbl_version.id, col.id)] = Expr.get_refd_column_ids(col.value_expr_dict)
662
+ self._column_dependencies[tbl_version.id] = dependencies
663
+
419
664
  def get_column_dependents(self, tbl_id: UUID, col_id: int) -> set[Column]:
420
665
  """Return all Columns that transitively depend on the given column."""
421
666
  assert self._column_dependents is not None
422
667
  dependents = self._column_dependents[QColumnId(tbl_id, col_id)]
423
668
  result: set[Column] = set()
424
669
  for dependent in dependents:
425
- tv = self.get_tbl_version(dependent.tbl_id, None)
670
+ tv = self.get_tbl_version(dependent.tbl_id, None, validate_initialized=True)
426
671
  col = tv.cols_by_id[dependent.col_id]
427
672
  result.add(col)
428
673
  return result
@@ -453,6 +698,7 @@ class Catalog:
453
698
 
454
699
  def get_dir_path(self, dir_id: UUID) -> Path:
455
700
  """Return path for directory with given id"""
701
+ assert isinstance(dir_id, UUID)
456
702
  conn = Env.get().conn
457
703
  names: list[str] = []
458
704
  while True:
@@ -463,7 +709,7 @@ class Catalog:
463
709
  break
464
710
  names.insert(0, dir.md['name'])
465
711
  dir_id = dir.parent_id
466
- return Path('.'.join(names), empty_is_valid=True, allow_system_paths=True)
712
+ return Path.parse('.'.join(names), allow_empty_path=True, allow_system_path=True)
467
713
 
468
714
  @dataclasses.dataclass
469
715
  class DirEntry:
@@ -471,7 +717,7 @@ class Catalog:
471
717
  dir_entries: dict[str, Catalog.DirEntry]
472
718
  table: Optional[schema.Table]
473
719
 
474
- @_retry_loop(for_write=False)
720
+ @retry_loop(for_write=False)
475
721
  def get_dir_contents(self, dir_path: Path, recursive: bool = False) -> dict[str, DirEntry]:
476
722
  dir = self._get_schema_object(dir_path, expected=Dir, raise_if_not_exists=True)
477
723
  return self._get_dir_contents(dir._id, recursive=recursive)
@@ -498,7 +744,7 @@ class Catalog:
498
744
 
499
745
  return result
500
746
 
501
- @_retry_loop(for_write=True)
747
+ @retry_loop(for_write=True)
502
748
  def move(self, path: Path, new_path: Path) -> None:
503
749
  self._move(path, new_path)
504
750
 
@@ -540,6 +786,7 @@ class Catalog:
540
786
  - if both add and drop (= two directories are involved), lock the directories in a pre-determined order
541
787
  (in this case, by name) in order to prevent deadlocks between concurrent directory modifications
542
788
  """
789
+ assert drop_expected in (None, Table, Dir), drop_expected
543
790
  assert (add_dir_path is None) == (add_name is None)
544
791
  assert (drop_dir_path is None) == (drop_name is None)
545
792
  dir_paths: set[Path] = set()
@@ -553,7 +800,7 @@ class Catalog:
553
800
  for p in sorted(dir_paths):
554
801
  dir = self._get_dir(p, lock_dir=True)
555
802
  if dir is None:
556
- raise excs.Error(f'Directory {str(p)!r} does not exist.')
803
+ raise excs.Error(f'Directory {p!r} does not exist.')
557
804
  if p == add_dir_path:
558
805
  add_dir = dir
559
806
  if p == drop_dir_path:
@@ -564,24 +811,24 @@ class Catalog:
564
811
  add_obj = self._get_dir_entry(add_dir.id, add_name, lock_entry=True)
565
812
  if add_obj is not None and raise_if_exists:
566
813
  add_path = add_dir_path.append(add_name)
567
- raise excs.Error(f'Path {str(add_path)!r} already exists.')
814
+ raise excs.Error(f'Path {add_path!r} already exists.')
568
815
 
569
816
  drop_obj: Optional[SchemaObject] = None
570
817
  if drop_dir is not None:
571
818
  drop_path = drop_dir_path.append(drop_name)
572
819
  drop_obj = self._get_dir_entry(drop_dir.id, drop_name, lock_entry=True)
573
820
  if drop_obj is None and raise_if_not_exists:
574
- raise excs.Error(f'Path {str(drop_path)!r} does not exist.')
821
+ raise excs.Error(f'Path {drop_path!r} does not exist.')
575
822
  if drop_obj is not None and drop_expected is not None and not isinstance(drop_obj, drop_expected):
576
- raise excs.Error(
577
- f'{str(drop_path)!r} needs to be a {drop_expected._display_name()} '
578
- f'but is a {type(drop_obj)._display_name()}'
579
- )
823
+ expected_name = 'table' if drop_expected is Table else 'directory'
824
+ raise excs.Error(f'{drop_path!r} needs to be a {expected_name} but is a {drop_obj._display_name()}')
580
825
 
581
826
  add_dir_obj = Dir(add_dir.id, add_dir.parent_id, add_dir.md['name']) if add_dir is not None else None
582
827
  return add_obj, add_dir_obj, drop_obj
583
828
 
584
- def _get_dir_entry(self, dir_id: UUID, name: str, lock_entry: bool = False) -> Optional[SchemaObject]:
829
+ def _get_dir_entry(
830
+ self, dir_id: UUID, name: str, version: Optional[int] = None, lock_entry: bool = False
831
+ ) -> Optional[SchemaObject]:
585
832
  user = Env.get().user
586
833
  conn = Env.get().conn
587
834
 
@@ -602,7 +849,7 @@ class Catalog:
602
849
 
603
850
  # check for table
604
851
  if lock_entry:
605
- self._acquire_tbl_xlock(dir_id=dir_id, tbl_name=name)
852
+ self._acquire_tbl_lock(for_write=True, dir_id=dir_id, raise_if_not_exists=False, tbl_name=name)
606
853
  q = sql.select(schema.Table.id).where(
607
854
  schema.Table.dir_id == dir_id,
608
855
  schema.Table.md['name'].astext == name,
@@ -610,9 +857,7 @@ class Catalog:
610
857
  )
611
858
  tbl_id = conn.execute(q).scalar_one_or_none()
612
859
  if tbl_id is not None:
613
- if tbl_id not in self._tbls:
614
- _ = self._load_tbl(tbl_id)
615
- return self._tbls[tbl_id]
860
+ return self.get_table_by_id(tbl_id, version)
616
861
 
617
862
  return None
618
863
 
@@ -628,17 +873,17 @@ class Catalog:
628
873
  """Return the schema object at the given path, or None if it doesn't exist.
629
874
 
630
875
  Raises Error if
631
- - the parent directory doesn't exist'
876
+ - the parent directory doesn't exist
632
877
  - raise_if_exists is True and the path exists
633
878
  - raise_if_not_exists is True and the path does not exist
634
879
  - expected is not None and the existing object has a different type
635
880
  """
881
+ assert expected in (None, Table, Dir), expected
882
+
636
883
  if path.is_root:
637
884
  # the root dir
638
885
  if expected is not None and expected is not Dir:
639
- raise excs.Error(
640
- f'{str(path)!r} needs to be a {expected._display_name()} but is a {Dir._display_name()}'
641
- )
886
+ raise excs.Error(f'{path!r} needs to be a table but is a dir')
642
887
  dir = self._get_dir(path, lock_dir=lock_obj)
643
888
  if dir is None:
644
889
  raise excs.Error(f'Unknown user: {Env.get().user}')
@@ -647,33 +892,28 @@ class Catalog:
647
892
  parent_path = path.parent
648
893
  parent_dir = self._get_dir(parent_path, lock_dir=lock_parent)
649
894
  if parent_dir is None:
650
- raise excs.Error(f'Directory {str(parent_path)!r} does not exist.')
651
- obj = self._get_dir_entry(parent_dir.id, path.name, lock_entry=lock_obj)
895
+ raise excs.Error(f'Directory {parent_path!r} does not exist.')
896
+ obj = self._get_dir_entry(parent_dir.id, path.name, path.version, lock_entry=lock_obj)
652
897
 
653
898
  if obj is None and raise_if_not_exists:
654
- raise excs.Error(f'Path {str(path)!r} does not exist.')
899
+ raise excs.Error(f'Path {path!r} does not exist.')
655
900
  elif obj is not None and raise_if_exists:
656
- raise excs.Error(f'Path {str(path)!r} is an existing {type(obj)._display_name()}.')
901
+ raise excs.Error(f'Path {path!r} is an existing {obj._display_name()}.')
657
902
  elif obj is not None and expected is not None and not isinstance(obj, expected):
658
- raise excs.Error(
659
- f'{str(path)!r} needs to be a {expected._display_name()} but is a {type(obj)._display_name()}.'
660
- )
903
+ expected_name = 'table' if expected is Table else 'directory'
904
+ raise excs.Error(f'{path!r} needs to be a {expected_name} but is a {obj._display_name()}.')
661
905
  return obj
662
906
 
663
- def get_table_by_id(self, tbl_id: UUID) -> Optional[Table]:
664
- if tbl_id not in self._tbls:
665
- tbl = self._load_tbl(tbl_id)
666
- if tbl is None:
667
- return None
668
- # # if this is a mutable table, we also need to have its mutable views loaded, in order to track column
669
- # # dependencies
670
- # tbl_version = tbl._tbl_version.get()
671
- # if tbl_version.is_mutable:
672
- # for v in tbl_version.mutable_views:
673
- # _ = self.get_table_by_id(v.id)
674
- return self._tbls[tbl_id]
675
-
676
- @_retry_loop(for_write=True)
907
+ def get_table_by_id(self, tbl_id: UUID, version: Optional[int] = None) -> Optional[Table]:
908
+ """Must be executed inside a transaction. Might raise PendingTableOpsError."""
909
+ if (tbl_id, version) not in self._tbls:
910
+ if version is None:
911
+ self._load_tbl(tbl_id)
912
+ else:
913
+ self._load_tbl_at_version(tbl_id, version)
914
+ return self._tbls.get((tbl_id, version))
915
+
916
+ @retry_loop(for_write=True)
677
917
  def create_table(
678
918
  self,
679
919
  path: Path,
@@ -703,10 +943,9 @@ class Catalog:
703
943
  comment=comment,
704
944
  media_validation=media_validation,
705
945
  )
706
- self._tbls[tbl._id] = tbl
946
+ self._tbls[tbl._id, None] = tbl
707
947
  return tbl
708
948
 
709
- @_retry_loop(for_write=True)
710
949
  def create_view(
711
950
  self,
712
951
  path: Path,
@@ -722,49 +961,68 @@ class Catalog:
722
961
  media_validation: MediaValidation,
723
962
  if_exists: IfExistsParam,
724
963
  ) -> Table:
725
- from pixeltable.utils.filecache import FileCache
726
-
727
- if not is_snapshot and not base.is_snapshot():
728
- # this is a mutable view of a mutable base; X-lock the base and advance its view_sn before adding the view
729
- self._acquire_tbl_xlock(tbl_id=base.tbl_id)
730
- base_tv = self.get_tbl_version(base.tbl_id, None)
731
- base_tv.tbl_md.view_sn += 1
732
- result = Env.get().conn.execute(
733
- sql.update(schema.Table)
734
- .values({schema.Table.md: dataclasses.asdict(base_tv.tbl_md)})
735
- .where(schema.Table.id == base.tbl_id)
736
- )
737
- assert result.rowcount == 1, result.rowcount
964
+ @retry_loop(for_write=True)
965
+ def create_fn() -> UUID:
966
+ if not is_snapshot and base.is_mutable():
967
+ # this is a mutable view of a mutable base; X-lock the base and advance its view_sn before adding
968
+ # the view
969
+ self._acquire_tbl_lock(tbl_id=base.tbl_id, for_write=True)
970
+ base_tv = self.get_tbl_version(base.tbl_id, None, validate_initialized=True)
971
+ base_tv.tbl_md.view_sn += 1
972
+ result = Env.get().conn.execute(
973
+ sql.update(schema.Table)
974
+ .values({schema.Table.md: dataclasses.asdict(base_tv.tbl_md)})
975
+ .where(schema.Table.id == base.tbl_id)
976
+ )
977
+ assert result.rowcount == 1, result.rowcount
738
978
 
739
- existing = self._handle_path_collision(path, View, is_snapshot, if_exists)
740
- if existing is not None:
741
- assert isinstance(existing, View)
742
- return existing
979
+ existing = self._handle_path_collision(path, View, is_snapshot, if_exists)
980
+ if existing is not None:
981
+ assert isinstance(existing, View)
982
+ return existing._id
743
983
 
744
- dir = self._get_schema_object(path.parent, expected=Dir, raise_if_not_exists=True)
745
- assert dir is not None
746
- if iterator is None:
747
- iterator_class, iterator_args = None, None
748
- else:
749
- iterator_class, iterator_args = iterator
750
- view = View._create(
751
- dir._id,
752
- path.name,
753
- base=base,
754
- select_list=select_list,
755
- additional_columns=additional_columns,
756
- predicate=where,
757
- sample_clause=sample_clause,
758
- is_snapshot=is_snapshot,
759
- iterator_cls=iterator_class,
760
- iterator_args=iterator_args,
761
- num_retained_versions=num_retained_versions,
762
- comment=comment,
763
- media_validation=media_validation,
764
- )
765
- FileCache.get().emit_eviction_warnings()
766
- self._tbls[view._id] = view
767
- return view
984
+ dir = self._get_schema_object(path.parent, expected=Dir, raise_if_not_exists=True)
985
+ assert dir is not None
986
+ if iterator is None:
987
+ iterator_class, iterator_args = None, None
988
+ else:
989
+ iterator_class, iterator_args = iterator
990
+ md, ops = View._create(
991
+ dir._id,
992
+ path.name,
993
+ base=base,
994
+ select_list=select_list,
995
+ additional_columns=additional_columns,
996
+ predicate=where,
997
+ sample_clause=sample_clause,
998
+ is_snapshot=is_snapshot,
999
+ iterator_cls=iterator_class,
1000
+ iterator_args=iterator_args,
1001
+ num_retained_versions=num_retained_versions,
1002
+ comment=comment,
1003
+ media_validation=media_validation,
1004
+ )
1005
+ tbl_id = UUID(md.tbl_md.tbl_id)
1006
+ self.store_tbl_md(tbl_id, dir._id, md.tbl_md, md.version_md, md.schema_version_md, ops)
1007
+ return tbl_id
1008
+
1009
+ view_id = create_fn()
1010
+ if not is_snapshot and base.is_mutable():
1011
+ # invalidate base's TableVersion instance, so that it gets reloaded with the new mutable view
1012
+ self._clear_tv_cache(base.tbl_id, base.tbl_version.effective_version)
1013
+ # base_tv = self.get_tbl_version(base.tbl_id, base.tbl_version.effective_version, validate_initialized=True)
1014
+ # view_handle = TableVersionHandle(view_id, effective_version=None)
1015
+ # base_tv.mutable_views.add(view_handle)
1016
+
1017
+ # finalize pending ops
1018
+ with self.begin_xact(tbl_id=view_id, for_write=True, finalize_pending_ops=True):
1019
+ return self.get_table_by_id(view_id)
1020
+
1021
+ def _clear_tv_cache(self, tbl_id: UUID, effective_version: Optional[int]) -> None:
1022
+ if (tbl_id, effective_version) in self._tbl_versions:
1023
+ tv = self._tbl_versions[tbl_id, effective_version]
1024
+ tv.is_validated = False
1025
+ del self._tbl_versions[tbl_id, effective_version]
768
1026
 
769
1027
  def create_replica(self, path: Path, md: list[schema.FullTableMd]) -> None:
770
1028
  """
@@ -784,12 +1042,12 @@ class Catalog:
784
1042
  )
785
1043
 
786
1044
  # Ensure that the system directory exists.
787
- self._create_dir(Path('_system', allow_system_paths=True), if_exists=IfExistsParam.IGNORE, parents=False)
1045
+ self._create_dir(Path.parse('_system', allow_system_path=True), if_exists=IfExistsParam.IGNORE, parents=False)
788
1046
 
789
- # Now check to see if this table UUID already exists in the catalog.
790
- existing = Catalog.get().get_table_by_id(tbl_id)
1047
+ # Now check to see if this table already exists in the catalog.
1048
+ existing = self.get_table_by_id(tbl_id)
791
1049
  if existing is not None:
792
- existing_path = Path(existing._path(), allow_system_paths=True)
1050
+ existing_path = Path.parse(existing._path(), allow_system_path=True)
793
1051
  if existing_path != path:
794
1052
  # It does exist, under a different path from the specified one.
795
1053
  if not existing_path.is_system_path:
@@ -808,16 +1066,16 @@ class Catalog:
808
1066
  # table being replicated.
809
1067
  for ancestor_md in md[:0:-1]:
810
1068
  ancestor_id = UUID(ancestor_md.tbl_md.tbl_id)
811
- replica = Catalog.get().get_table_by_id(ancestor_id)
1069
+ replica = self.get_table_by_id(ancestor_id)
812
1070
  replica_path: Path
813
1071
  if replica is None:
814
1072
  # We've never seen this table before. Create a new anonymous system table for it.
815
- replica_path = Path(f'_system.replica_{ancestor_id.hex}', allow_system_paths=True)
1073
+ replica_path = Path.parse(f'_system.replica_{ancestor_id.hex}', allow_system_path=True)
816
1074
  else:
817
1075
  # The table already exists in the catalog. The existing path might be a system path (if the table
818
1076
  # was created as an anonymous base table of some other table), or it might not (if it's a snapshot
819
1077
  # that was directly replicated by the user at some point). In either case, use the existing path.
820
- replica_path = Path(replica._path(), allow_system_paths=True)
1078
+ replica_path = Path.parse(replica._path(), allow_system_path=True)
821
1079
 
822
1080
  # Store the metadata; it could be a new version (in which case a new record will be created), or a known
823
1081
  # version (in which case the newly received metadata will be validated as identical).
@@ -877,7 +1135,7 @@ class Catalog:
877
1135
  q = (
878
1136
  sql.select(schema.TableVersion.md)
879
1137
  .where(schema.TableVersion.tbl_id == tbl_id)
880
- .where(sql.text(f"({schema.TableVersion.__table__}.md->>'version')::int = {md.version_md.version}"))
1138
+ .where(schema.TableVersion.md['version'].cast(sql.Integer) == md.version_md.version)
881
1139
  )
882
1140
  existing_version_md_row = conn.execute(q).one_or_none()
883
1141
  if existing_version_md_row is None:
@@ -896,10 +1154,7 @@ class Catalog:
896
1154
  sql.select(schema.TableSchemaVersion.md)
897
1155
  .where(schema.TableSchemaVersion.tbl_id == tbl_id)
898
1156
  .where(
899
- sql.text(
900
- f"({schema.TableSchemaVersion.__table__}.md->>'schema_version')::int = "
901
- f'{md.schema_version_md.schema_version}'
902
- )
1157
+ schema.TableSchemaVersion.md['schema_version'].cast(sql.Integer) == md.schema_version_md.schema_version
903
1158
  )
904
1159
  )
905
1160
  existing_schema_version_md_row = conn.execute(q).one_or_none()
@@ -922,7 +1177,7 @@ class Catalog:
922
1177
  # It's a new version of a table that has a physical store, so we need to create a TableVersion instance.
923
1178
  TableVersion.create_replica(md)
924
1179
 
925
- @_retry_loop(for_write=False)
1180
+ @retry_loop(for_write=False)
926
1181
  def get_table(self, path: Path) -> Table:
927
1182
  obj = Catalog.get()._get_schema_object(path, expected=Table, raise_if_not_exists=True)
928
1183
  assert isinstance(obj, Table)
@@ -931,7 +1186,7 @@ class Catalog:
931
1186
  obj._tbl_version_path.clear_cached_md()
932
1187
  return obj
933
1188
 
934
- @_retry_loop(for_write=True)
1189
+ @retry_loop(for_write=True)
935
1190
  def drop_table(self, path: Path, if_not_exists: IfNotExistsParam, force: bool) -> None:
936
1191
  tbl = self._get_schema_object(
937
1192
  path,
@@ -941,7 +1196,7 @@ class Catalog:
941
1196
  lock_obj=False,
942
1197
  )
943
1198
  if tbl is None:
944
- _logger.info(f'Skipped table {str(path)!r} (does not exist).')
1199
+ _logger.info(f'Skipped table {path!r} (does not exist).')
945
1200
  return
946
1201
  assert isinstance(tbl, Table)
947
1202
 
@@ -949,7 +1204,7 @@ class Catalog:
949
1204
  # this is a mutable view of a mutable base;
950
1205
  # lock the base before the view, in order to avoid deadlocks with concurrent inserts/updates
951
1206
  base_id = tbl._tbl_version_path.base.tbl_id
952
- self._acquire_tbl_xlock(tbl_id=base_id, lock_mutable_tree=False)
1207
+ self._acquire_tbl_lock(tbl_id=base_id, for_write=True, lock_mutable_tree=False)
953
1208
 
954
1209
  self._drop_tbl(tbl, force=force, is_replace=False)
955
1210
 
@@ -964,7 +1219,7 @@ class Catalog:
964
1219
  in the same directory with the same name (which could lead to duplicate names if we get aborted)
965
1220
  """
966
1221
  self._acquire_dir_xlock(dir_id=tbl._dir_id)
967
- self._acquire_tbl_xlock(tbl_id=tbl._id, lock_mutable_tree=False)
1222
+ self._acquire_tbl_lock(tbl_id=tbl._id, for_write=True, lock_mutable_tree=False)
968
1223
 
969
1224
  view_ids = self.get_view_ids(tbl._id, for_update=True)
970
1225
  if len(view_ids) > 0:
@@ -988,8 +1243,9 @@ class Catalog:
988
1243
  # if this is a mutable view of a mutable base, advance the base's view_sn
989
1244
  if isinstance(tbl, View) and tbl._tbl_version_path.is_mutable() and tbl._tbl_version_path.base.is_mutable():
990
1245
  base_id = tbl._tbl_version_path.base.tbl_id
991
- base_tv = self.get_tbl_version(base_id, None)
1246
+ base_tv = self.get_tbl_version(base_id, None, validate_initialized=True)
992
1247
  base_tv.tbl_md.view_sn += 1
1248
+ self._modified_tvs.add(base_tv.handle)
993
1249
  result = Env.get().conn.execute(
994
1250
  sql.update(schema.Table.__table__)
995
1251
  .values({schema.Table.md: dataclasses.asdict(base_tv.tbl_md)})
@@ -997,23 +1253,28 @@ class Catalog:
997
1253
  )
998
1254
  assert result.rowcount == 1, result.rowcount
999
1255
 
1256
+ if tbl._tbl_version is not None:
1257
+ # invalidate the TableVersion instance when we're done so that existing references to it can find out it
1258
+ # has been dropped
1259
+ self._modified_tvs.add(tbl._tbl_version)
1000
1260
  tv = tbl._tbl_version.get() if tbl._tbl_version is not None else None
1001
- if tv is not None:
1261
+ # if tv is not None:
1262
+ # tv = tbl._tbl_version.get()
1263
+ # # invalidate the TableVersion instance so that existing references to it can find out it has been dropped
1264
+ # tv.is_validated = False
1265
+ if tbl._tbl_version is not None:
1266
+ # drop the store table before deleting the Table record
1002
1267
  tv = tbl._tbl_version.get()
1003
- # invalidate the TableVersion instance so that existing references to it can find out it has been dropped
1004
- tv.is_validated = False
1268
+ tv.drop()
1005
1269
 
1006
1270
  self.delete_tbl_md(tbl._id)
1007
- assert tbl._id in self._tbls
1008
- del self._tbls[tbl._id]
1271
+ assert (tbl._id, None) in self._tbls
1272
+ versions = [k[1] for k in self._tbls if k[0] == tbl._id]
1273
+ for version in versions:
1274
+ del self._tbls[tbl._id, version]
1009
1275
  _logger.info(f'Dropped table `{tbl._path()}`.')
1010
1276
 
1011
- if tv is not None:
1012
- tv.drop()
1013
- assert (tv.id, tv.effective_version) in self._tbl_versions
1014
- del self._tbl_versions[tv.id, tv.effective_version]
1015
-
1016
- @_retry_loop(for_write=True)
1277
+ @retry_loop(for_write=True)
1017
1278
  def create_dir(self, path: Path, if_exists: IfExistsParam, parents: bool) -> Dir:
1018
1279
  return self._create_dir(path, if_exists, parents)
1019
1280
 
@@ -1026,7 +1287,7 @@ class Catalog:
1026
1287
  # parent = self._get_schema_object(path.parent)
1027
1288
  # assert parent is not None
1028
1289
  # dir = Dir._create(parent._id, path.name)
1029
- # Env.get().console_logger.info(f'Created directory {str(path)!r}.')
1290
+ # Env.get().console_logger.info(f'Created directory {path!r}.')
1030
1291
  # return dir
1031
1292
 
1032
1293
  if parents:
@@ -1045,10 +1306,10 @@ class Catalog:
1045
1306
  return existing
1046
1307
  assert parent is not None
1047
1308
  dir = Dir._create(parent._id, path.name)
1048
- Env.get().console_logger.info(f'Created directory {str(path)!r}.')
1309
+ Env.get().console_logger.info(f'Created directory {path!r}.')
1049
1310
  return dir
1050
1311
 
1051
- @_retry_loop(for_write=True)
1312
+ @retry_loop(for_write=True)
1052
1313
  def drop_dir(self, path: Path, if_not_exists: IfNotExistsParam, force: bool) -> None:
1053
1314
  _, _, schema_obj = self._prepare_dir_op(
1054
1315
  drop_dir_path=path.parent,
@@ -1057,7 +1318,7 @@ class Catalog:
1057
1318
  raise_if_not_exists=if_not_exists == IfNotExistsParam.ERROR and not force,
1058
1319
  )
1059
1320
  if schema_obj is None:
1060
- _logger.info(f'Directory {str(path)!r} does not exist; skipped drop_dir().')
1321
+ _logger.info(f'Directory {path!r} does not exist; skipped drop_dir().')
1061
1322
  return
1062
1323
  self._drop_dir(schema_obj._id, path, force=force)
1063
1324
 
@@ -1070,7 +1331,7 @@ class Catalog:
1070
1331
  q = sql.select(sql.func.count()).select_from(schema.Table).where(schema.Table.dir_id == dir_id)
1071
1332
  num_tbls = conn.execute(q).scalar()
1072
1333
  if num_subdirs + num_tbls > 0:
1073
- raise excs.Error(f'Directory {str(dir_path)!r} is not empty.')
1334
+ raise excs.Error(f'Directory {dir_path!r} is not empty.')
1074
1335
 
1075
1336
  # drop existing subdirs
1076
1337
  self._acquire_dir_xlock(dir_id=dir_id)
@@ -1088,7 +1349,7 @@ class Catalog:
1088
1349
 
1089
1350
  # self.drop_dir(dir_id)
1090
1351
  conn.execute(sql.delete(schema.Dir).where(schema.Dir.id == dir_id))
1091
- _logger.info(f'Removed directory {str(dir_path)!r}.')
1352
+ _logger.info(f'Removed directory {dir_path!r}.')
1092
1353
 
1093
1354
  def get_view_ids(self, tbl_id: UUID, for_update: bool = False) -> list[UUID]:
1094
1355
  """Return the ids of views that directly reference the given table"""
@@ -1098,19 +1359,31 @@ class Catalog:
1098
1359
  tbl_count = conn.execute(q).scalar()
1099
1360
  if tbl_count == 0:
1100
1361
  raise excs.Error(self._dropped_tbl_error_msg(tbl_id))
1101
- q = sql.select(schema.Table.id).where(sql.text(f"md->'view_md'->'base_versions'->0->>0 = {tbl_id.hex!r}"))
1362
+ q = sql.select(schema.Table.id).where(schema.Table.md['view_md']['base_versions'][0][0].astext == tbl_id.hex)
1102
1363
  if for_update:
1103
1364
  q = q.with_for_update()
1104
1365
  result = [r[0] for r in conn.execute(q).all()]
1105
1366
  return result
1106
1367
 
1107
- def get_tbl_version(self, tbl_id: UUID, effective_version: Optional[int]) -> Optional[TableVersion]:
1368
+ def get_tbl_version(
1369
+ self,
1370
+ tbl_id: UUID,
1371
+ effective_version: Optional[int],
1372
+ check_pending_ops: Optional[bool] = None,
1373
+ validate_initialized: bool = False,
1374
+ ) -> Optional[TableVersion]:
1375
+ """
1376
+ Returns the TableVersion instance for the given table and version and updates the cache.
1377
+
1378
+ If present in the cache and the instance isn't validated, validates version and view_sn against the stored
1379
+ metadata.
1380
+ """
1108
1381
  # we need a transaction here, if we're not already in one; if this starts a new transaction,
1109
1382
  # the returned TableVersion instance will not be validated
1110
1383
  with self.begin_xact(for_write=False) as conn:
1111
1384
  tv = self._tbl_versions.get((tbl_id, effective_version))
1112
1385
  if tv is None:
1113
- tv = self._load_tbl_version(tbl_id, effective_version)
1386
+ tv = self._load_tbl_version(tbl_id, effective_version, check_pending_ops=check_pending_ops)
1114
1387
  elif not tv.is_validated:
1115
1388
  # only live instances are invalidated
1116
1389
  assert effective_version is None
@@ -1131,12 +1404,16 @@ class Catalog:
1131
1404
  f'(cached/current version: {tv.version}/{current_version}, '
1132
1405
  f'cached/current view_sn: {tv.tbl_md.view_sn}/{view_sn})'
1133
1406
  )
1134
- tv = self._load_tbl_version(tbl_id, None)
1407
+ tv = self._load_tbl_version(tbl_id, None, check_pending_ops=check_pending_ops)
1135
1408
  else:
1136
1409
  # the cached metadata is valid
1137
1410
  tv.is_validated = True
1138
1411
 
1139
- assert tv.is_validated
1412
+ assert tv.is_validated, f'{tbl_id}:{effective_version} not validated\n{tv.__dict__}\n{self._debug_str()}'
1413
+ if validate_initialized:
1414
+ assert tv.is_initialized, (
1415
+ f'{tbl_id}:{effective_version} not initialized\n{tv.__dict__}\n{self._debug_str()}'
1416
+ )
1140
1417
  return tv
1141
1418
 
1142
1419
  def remove_tbl_version(self, tbl_version: TableVersion) -> None:
@@ -1181,52 +1458,56 @@ class Catalog:
1181
1458
  row = conn.execute(q).one_or_none()
1182
1459
  return schema.Dir(**row._mapping) if row is not None else None
1183
1460
 
1184
- def _load_tbl(self, tbl_id: UUID) -> Optional[Table]:
1461
+ def _load_tbl(self, tbl_id: UUID) -> None:
1185
1462
  """Loads metadata for the table with the given id and caches it."""
1186
1463
  _logger.info(f'Loading table {tbl_id}')
1187
1464
  from .insertable_table import InsertableTable
1188
1465
  from .view import View
1189
1466
 
1190
1467
  conn = Env.get().conn
1468
+
1469
+ # check for pending ops
1470
+ q = sql.select(sql.func.count()).where(schema.PendingTableOp.tbl_id == tbl_id)
1471
+ has_pending_ops = conn.execute(q).scalar() > 0
1472
+ if has_pending_ops:
1473
+ raise PendingTableOpsError(tbl_id)
1474
+
1191
1475
  q = (
1192
1476
  sql.select(schema.Table, schema.TableSchemaVersion)
1193
1477
  .join(schema.TableSchemaVersion)
1194
1478
  .where(schema.Table.id == schema.TableSchemaVersion.tbl_id)
1195
- # Table.md['current_schema_version'] == TableSchemaVersion.schema_version
1196
1479
  .where(
1197
- sql.text(
1198
- f"({schema.Table.__table__}.md->>'current_schema_version')::int = "
1199
- f'{schema.TableSchemaVersion.__table__}.{schema.TableSchemaVersion.schema_version.name}'
1200
- )
1480
+ schema.Table.md['current_schema_version'].cast(sql.Integer) == schema.TableSchemaVersion.schema_version
1201
1481
  )
1202
1482
  .where(schema.Table.id == tbl_id)
1203
1483
  )
1204
1484
  row = conn.execute(q).one_or_none()
1205
1485
  if row is None:
1206
1486
  return None
1207
- tbl_record, schema_version_record = _unpack_row(row, [schema.Table, schema.TableSchemaVersion])
1487
+ tbl_record, _ = _unpack_row(row, [schema.Table, schema.TableSchemaVersion])
1208
1488
 
1209
1489
  tbl_md = schema.md_from_dict(schema.TableMd, tbl_record.md)
1210
1490
  view_md = tbl_md.view_md
1211
- if view_md is None:
1491
+ if view_md is None and not tbl_md.is_replica:
1212
1492
  # this is a base table
1213
1493
  if (tbl_id, None) not in self._tbl_versions:
1214
1494
  _ = self._load_tbl_version(tbl_id, None)
1215
1495
  tbl = InsertableTable(tbl_record.dir_id, TableVersionHandle(tbl_id, None))
1216
- self._tbls[tbl_id] = tbl
1217
- return tbl
1496
+ self._tbls[tbl_id, None] = tbl
1497
+ return
1218
1498
 
1219
1499
  # this is a view; determine the sequence of TableVersions to load
1220
1500
  tbl_version_path: list[tuple[UUID, Optional[int]]] = []
1221
- schema_version_md = schema.md_from_dict(schema.TableSchemaVersionMd, schema_version_record.md)
1222
- pure_snapshot = view_md.is_snapshot and view_md.predicate is None and len(schema_version_md.columns) == 0
1223
- if pure_snapshot:
1501
+ if tbl_md.is_pure_snapshot:
1224
1502
  # this is a pure snapshot, without a physical table backing it; we only need the bases
1225
1503
  pass
1226
1504
  else:
1227
- effective_version = 0 if view_md.is_snapshot else None # snapshots only have version 0
1505
+ effective_version = (
1506
+ 0 if view_md is not None and view_md.is_snapshot else None
1507
+ ) # snapshots only have version 0
1228
1508
  tbl_version_path.append((tbl_id, effective_version))
1229
- tbl_version_path.extend((UUID(tbl_id), version) for tbl_id, version in view_md.base_versions)
1509
+ if view_md is not None:
1510
+ tbl_version_path.extend((UUID(tbl_id), version) for tbl_id, version in view_md.base_versions)
1230
1511
 
1231
1512
  # load TableVersions, starting at the root
1232
1513
  base_path: Optional[TableVersionPath] = None
@@ -1236,11 +1517,71 @@ class Catalog:
1236
1517
  _ = self._load_tbl_version(id, effective_version)
1237
1518
  view_path = TableVersionPath(TableVersionHandle(id, effective_version), base=base_path)
1238
1519
  base_path = view_path
1239
- view = View(tbl_id, tbl_record.dir_id, tbl_md.name, view_path, snapshot_only=pure_snapshot)
1240
- self._tbls[tbl_id] = view
1241
- return view
1520
+ view = View(tbl_id, tbl_record.dir_id, tbl_md.name, view_path, snapshot_only=tbl_md.is_pure_snapshot)
1521
+ self._tbls[tbl_id, None] = view
1522
+
1523
+ def _load_tbl_at_version(self, tbl_id: UUID, version: int) -> None:
1524
+ from .view import View
1525
+
1526
+ # Load the specified TableMd and TableVersionMd records from the db.
1527
+ conn = Env.get().conn
1528
+ q: sql.Executable = (
1529
+ sql.select(schema.Table, schema.TableVersion)
1530
+ .join(schema.TableVersion)
1531
+ .where(schema.Table.id == tbl_id)
1532
+ .where(schema.Table.id == schema.TableVersion.tbl_id)
1533
+ .where(schema.TableVersion.version == version)
1534
+ )
1535
+ row = conn.execute(q).one_or_none()
1536
+ if row is None:
1537
+ return None
1538
+ tbl_record, version_record = _unpack_row(row, [schema.Table, schema.TableVersion])
1539
+ tbl_md = schema.md_from_dict(schema.TableMd, tbl_record.md)
1540
+ version_md = schema.md_from_dict(schema.TableVersionMd, version_record.md)
1242
1541
 
1243
- @_retry_loop(for_write=False)
1542
+ # Reconstruct the TableVersionPath for the specified TableVersion. We do this by examining the created_at
1543
+ # timestamps of this table and all its ancestors.
1544
+ # TODO: Store the relevant TableVersionPaths in the database, so that we don't need to rely on timestamps
1545
+ # (which might be nondeterministic in the future).
1546
+
1547
+ # Build the list of ancestor versions, starting with the given table and traversing back to the base table.
1548
+ # For each proper ancestor, we use the version whose created_at timestamp equals or most nearly precedes the
1549
+ # given TableVersion's created_at timestamp.
1550
+ ancestors: list[tuple[UUID, Optional[int]]] = [(tbl_id, version)]
1551
+ if tbl_md.view_md is not None:
1552
+ for ancestor_id, _ in tbl_md.view_md.base_versions:
1553
+ q = (
1554
+ sql.select(schema.TableVersion)
1555
+ .where(schema.TableVersion.tbl_id == ancestor_id)
1556
+ .where(schema.TableVersion.md['created_at'].cast(sql.Float) <= version_md.created_at)
1557
+ .order_by(schema.TableVersion.md['created_at'].cast(sql.Float).desc())
1558
+ .limit(1)
1559
+ )
1560
+ row = conn.execute(q).one_or_none()
1561
+ if row is None:
1562
+ # This can happen if an ancestor version is garbage collected; it can also happen in
1563
+ # rare circumstances involving table versions created specifically with Pixeltable 0.4.3.
1564
+ _logger.info(f'Ancestor {ancestor_id} not found for table {tbl_id}:{version}')
1565
+ raise excs.Error('The specified table version is no longer valid and cannot be retrieved.')
1566
+ ancestor_version_record = _unpack_row(row, [schema.TableVersion])[0]
1567
+ ancestor_version_md = schema.md_from_dict(schema.TableVersionMd, ancestor_version_record.md)
1568
+ assert ancestor_version_md.created_at <= version_md.created_at
1569
+ ancestors.append((UUID(ancestor_id), ancestor_version_md.version))
1570
+
1571
+ # Force any ancestors to be loaded (base table first).
1572
+ for anc_id, anc_version in ancestors[::-1]:
1573
+ if (anc_id, anc_version) not in self._tbl_versions:
1574
+ _ = self._load_tbl_version(anc_id, anc_version)
1575
+
1576
+ # Now reconstruct the relevant TableVersionPath instance from the ancestor versions.
1577
+ tvp: Optional[TableVersionPath] = None
1578
+ for anc_id, anc_version in ancestors[::-1]:
1579
+ tvp = TableVersionPath(TableVersionHandle(anc_id, anc_version), base=tvp)
1580
+
1581
+ view = View(tbl_id, tbl_record.dir_id, tbl_md.name, tvp, snapshot_only=True)
1582
+ self._tbls[tbl_id, version] = view
1583
+
1584
+ @retry_loop(for_write=False)
1244
1585
  def collect_tbl_history(self, tbl_id: UUID, n: Optional[int]) -> list[schema.FullTableMd]:
1245
1586
  """
1246
1587
  Returns the history of up to n versions of the table with the given UUID.
@@ -1258,8 +1599,7 @@ class Catalog:
1258
1599
  .select_from(schema.TableVersion)
1259
1600
  .join(
1260
1601
  schema.TableSchemaVersion,
1261
- sql.cast(schema.TableVersion.md['schema_version'], sql.Integer)
1262
- == schema.TableSchemaVersion.schema_version,
1602
+ schema.TableVersion.md['schema_version'].cast(sql.Integer) == schema.TableSchemaVersion.schema_version,
1263
1603
  )
1264
1604
  .where(schema.TableVersion.tbl_id == tbl_id)
1265
1605
  .where(schema.TableSchemaVersion.tbl_id == tbl_id)
@@ -1301,13 +1641,9 @@ class Catalog:
1301
1641
  # JOIN TableVersion tv ON (tv.tbl_id = tbl_id AND tv.version = effective_version)
1302
1642
  # JOIN TableSchemaVersion tsv ON (tsv.tbl_id = tbl_id AND tv.md.schema_version = tsv.schema_version)
1303
1643
  # WHERE t.id = tbl_id
1304
- q = q.where(sql.text(f"({schema.TableVersion.__table__}.md->>'version')::int = {effective_version}")).where(
1305
- sql.text(
1306
- (
1307
- f"({schema.TableVersion.__table__}.md->>'schema_version')::int = "
1308
- f'{schema.TableSchemaVersion.__table__}.{schema.TableSchemaVersion.schema_version.name}'
1309
- )
1310
- )
1644
+ q = q.where(
1645
+ schema.TableVersion.md['version'].cast(sql.Integer) == effective_version,
1646
+ schema.TableVersion.md['schema_version'].cast(sql.Integer) == schema.TableSchemaVersion.schema_version,
1311
1647
  )
1312
1648
  else:
1313
1649
  # we are loading the current version
@@ -1317,17 +1653,8 @@ class Catalog:
1317
1653
  # JOIN TableSchemaVersion tsv ON (tsv.tbl_id = tbl_id AND t.current_schema_version = tsv.schema_version)
1318
1654
  # WHERE t.id = tbl_id
1319
1655
  q = q.where(
1320
- sql.text(
1321
- f"({schema.Table.__table__}.md->>'current_version')::int = "
1322
- f'{schema.TableVersion.__table__}.{schema.TableVersion.version.name}'
1323
- )
1324
- ).where(
1325
- sql.text(
1326
- (
1327
- f"({schema.Table.__table__}.md->>'current_schema_version')::int = "
1328
- f'{schema.TableSchemaVersion.__table__}.{schema.TableSchemaVersion.schema_version.name}'
1329
- )
1330
- )
1656
+ schema.Table.md['current_version'].cast(sql.Integer) == schema.TableVersion.version,
1657
+ schema.Table.md['current_schema_version'].cast(sql.Integer) == schema.TableSchemaVersion.schema_version,
1331
1658
  )
1332
1659
 
1333
1660
  row = conn.execute(q).one_or_none()
@@ -1350,6 +1677,7 @@ class Catalog:
1350
1677
  tbl_md: Optional[schema.TableMd],
1351
1678
  version_md: Optional[schema.TableVersionMd],
1352
1679
  schema_version_md: Optional[schema.TableSchemaVersionMd],
1680
+ pending_ops: Optional[list[TableOp]] = None,
1353
1681
  ) -> None:
1354
1682
  """
1355
1683
  Stores metadata to the DB.
@@ -1364,6 +1692,9 @@ class Catalog:
1364
1692
  If inserting `version_md` or `schema_version_md` would be a primary key violation, an exception will be raised.
1365
1693
  """
1366
1694
  assert self._in_write_xact
1695
+ assert version_md is None or version_md.created_at > 0.0
1696
+ assert pending_ops is None or len(pending_ops) > 0
1697
+ assert pending_ops is None or tbl_md is not None # if we write pending ops, we must also write new tbl_md
1367
1698
  session = Env.get().session
1368
1699
 
1369
1700
  # Construct and insert or update table record if requested.
@@ -1374,6 +1705,9 @@ class Catalog:
1374
1705
  assert tbl_md.current_schema_version == version_md.schema_version
1375
1706
  if schema_version_md is not None:
1376
1707
  assert tbl_md.current_schema_version == schema_version_md.schema_version
1708
+ if pending_ops is not None:
1709
+ tbl_md.has_pending_ops = True
1710
+
1377
1711
  if dir_id is not None:
1378
1712
  # We are inserting a record while creating a new table.
1379
1713
  tbl_record = schema.Table(id=tbl_id, dir_id=dir_id, md=dataclasses.asdict(tbl_md))
@@ -1404,25 +1738,30 @@ class Catalog:
1404
1738
  tbl_id=tbl_id, schema_version=schema_version_md.schema_version, md=dataclasses.asdict(schema_version_md)
1405
1739
  )
1406
1740
  session.add(schema_version_record)
1407
- session.flush() # Inform SQLAlchemy that we want to write these changes to the DB.
1408
1741
 
1409
- def update_tbl_version_md(self, version_md: Optional[schema.TableVersionMd]) -> None:
1410
- """
1411
- Update the TableVersion.md field in the DB. Typically used to update the cascade row count status.
1742
+ # make sure we don't have any pending ops
1743
+ assert session.query(schema.PendingTableOp).filter(schema.PendingTableOp.tbl_id == tbl_id).count() == 0
1412
1744
 
1413
- Args:
1414
- version_md: TableVersionMd
1415
- """
1745
+ if pending_ops is not None:
1746
+ for op in pending_ops:
1747
+ op_record = schema.PendingTableOp(tbl_id=tbl_id, op_sn=op.op_sn, op=dataclasses.asdict(op))
1748
+ session.add(op_record)
1749
+
1750
+ session.flush() # Inform SQLAlchemy that we want to write these changes to the DB.
1751
+
1752
+ def store_update_status(self, tbl_id: UUID, version: int, status: UpdateStatus) -> None:
1753
+ """Update the TableVersion.md.update_status field"""
1416
1754
  assert self._in_write_xact
1417
- session = Env.get().session
1755
+ conn = Env.get().conn
1418
1756
 
1419
- session.execute(
1420
- sql.update(schema.TableVersion.__table__)
1421
- .values({schema.TableVersion.md: dataclasses.asdict(version_md)})
1422
- .where(schema.TableVersion.tbl_id == version_md.tbl_id, schema.TableVersion.version == version_md.version)
1757
+ stmt = (
1758
+ sql.update(schema.TableVersion)
1759
+ .where(schema.TableVersion.tbl_id == tbl_id, schema.TableVersion.version == version)
1760
+ .values(md=schema.TableVersion.md.op('||')({'update_status': dataclasses.asdict(status)}))
1423
1761
  )
1424
1762
 
1425
- session.flush() # Inform SQLAlchemy that we want to write these changes to the DB.
1763
+ res = conn.execute(stmt)
1764
+ assert res.rowcount == 1, res.rowcount
1426
1765
 
1427
1766
  def delete_tbl_md(self, tbl_id: UUID) -> None:
1428
1767
  """
@@ -1431,6 +1770,7 @@ class Catalog:
1431
1770
  conn = Env.get().conn
1432
1771
  conn.execute(sql.delete(schema.TableSchemaVersion.__table__).where(schema.TableSchemaVersion.tbl_id == tbl_id))
1433
1772
  conn.execute(sql.delete(schema.TableVersion.__table__).where(schema.TableVersion.tbl_id == tbl_id))
1773
+ conn.execute(sql.delete(schema.PendingTableOp.__table__).where(schema.PendingTableOp.tbl_id == tbl_id))
1434
1774
  conn.execute(sql.delete(schema.Table.__table__).where(schema.Table.id == tbl_id))
1435
1775
 
1436
1776
  def load_replica_md(self, tbl: Table) -> list[schema.FullTableMd]:
@@ -1461,13 +1801,32 @@ class Catalog:
1461
1801
 
1462
1802
  return md
1463
1803
 
1464
- def _load_tbl_version(self, tbl_id: UUID, effective_version: Optional[int]) -> Optional[TableVersion]:
1804
+ def _load_tbl_version(
1805
+ self, tbl_id: UUID, effective_version: Optional[int], check_pending_ops: bool = True
1806
+ ) -> Optional[TableVersion]:
1465
1807
  """Creates TableVersion instance from stored metadata and registers it in _tbl_versions."""
1466
- tbl_md, _, schema_version_md = self.load_tbl_md(tbl_id, effective_version)
1808
+ tbl_md, version_md, schema_version_md = self.load_tbl_md(tbl_id, effective_version)
1467
1809
  view_md = tbl_md.view_md
1468
1810
 
1469
1811
  conn = Env.get().conn
1470
1812
 
1813
+ if check_pending_ops:
1814
+ pending_ops_q = (
1815
+ sql.select(sql.func.count())
1816
+ .select_from(schema.Table)
1817
+ .join(schema.PendingTableOp)
1818
+ .where(schema.PendingTableOp.tbl_id == tbl_id)
1819
+ .where(schema.Table.id == tbl_id)
1820
+ )
1821
+ if effective_version is not None:
1822
+ # we only care about pending ops if the requested version is the current version
1823
+ pending_ops_q = pending_ops_q.where(
1824
+ sql.text(f"({schema.Table.__table__}.md->>'current_version')::int = {effective_version}")
1825
+ )
1826
+ has_pending_ops = conn.execute(pending_ops_q).scalar() > 0
1827
+ if has_pending_ops:
1828
+ raise PendingTableOpsError(tbl_id)
1829
+
1471
1830
  # load mutable view ids for mutable TableVersions
1472
1831
  mutable_view_ids: list[UUID] = []
1473
1832
  # If this is a replica, effective_version should not be None. We see this today, because
@@ -1475,24 +1834,30 @@ class Catalog:
1475
1834
  # This is presumably a source of bugs, because it ignores schema version changes (eg, column renames).
1476
1835
  # TODO: retarget the value_expr_dict when instantiating Columns for a particular TV instance.
1477
1836
  if effective_version is None and not tbl_md.is_replica:
1478
- q = sql.select(schema.Table.id).where(
1479
- sql.text(
1480
- f"md->'view_md'->'base_versions'->0->>0 = {tbl_id.hex!r} "
1481
- "AND md->'view_md'->'base_versions'->0->>1 IS NULL"
1482
- )
1837
+ q = (
1838
+ sql.select(schema.Table.id)
1839
+ .where(schema.Table.md['view_md']['base_versions'][0][0].astext == tbl_id.hex)
1840
+ .where(schema.Table.md['view_md']['base_versions'][0][1].astext == None)
1483
1841
  )
1484
1842
  mutable_view_ids = [r[0] for r in conn.execute(q).all()]
1843
+
1485
1844
  mutable_views = [TableVersionHandle(id, None) for id in mutable_view_ids]
1486
1845
 
1487
1846
  tbl_version: TableVersion
1488
1847
  if view_md is None:
1489
1848
  # this is a base table
1490
1849
  tbl_version = TableVersion(
1491
- tbl_id, tbl_md, effective_version, schema_version_md, mutable_views=mutable_views
1850
+ tbl_id, tbl_md, version_md, effective_version, schema_version_md, mutable_views=mutable_views
1492
1851
  )
1493
1852
  else:
1494
1853
  assert len(view_md.base_versions) > 0 # a view needs to have a base
1495
- pure_snapshot = view_md.is_snapshot and view_md.predicate is None and len(schema_version_md.columns) == 0
1854
+ # TODO: add TableVersionMd.is_pure_snapshot() and use that
1855
+ pure_snapshot = (
1856
+ view_md.is_snapshot
1857
+ and view_md.predicate is None
1858
+ and view_md.sample_clause is None
1859
+ and len(schema_version_md.columns) == 0
1860
+ )
1496
1861
  assert not pure_snapshot # a pure snapshot doesn't have a physical table backing it, no point in loading it
1497
1862
 
1498
1863
  base: TableVersionHandle
@@ -1506,29 +1871,22 @@ class Catalog:
1506
1871
  tbl_version = TableVersion(
1507
1872
  tbl_id,
1508
1873
  tbl_md,
1874
+ version_md,
1509
1875
  effective_version,
1510
1876
  schema_version_md,
1877
+ mutable_views,
1511
1878
  base_path=base_path,
1512
1879
  base=base,
1513
- mutable_views=mutable_views,
1514
1880
  )
1515
1881
 
1882
+ # register the instance before init()
1516
1883
  self._tbl_versions[tbl_id, effective_version] = tbl_version
1884
+ # register this instance as modified, so that it gets purged if the transaction fails, it may not be
1885
+ # fully initialized
1886
+ self._modified_tvs.add(tbl_version.handle)
1517
1887
  tbl_version.init()
1518
1888
  return tbl_version
1519
1889
 
1520
- def record_column_dependencies(self, tbl_version: TableVersion) -> None:
1521
- """Update self._column_dependencies. Only valid for non-snapshot versions."""
1522
- from pixeltable.exprs import Expr
1523
-
1524
- assert not tbl_version.is_snapshot
1525
- dependencies: dict[QColumnId, set[QColumnId]] = {}
1526
- for col in tbl_version.cols_by_id.values():
1527
- if col.value_expr_dict is None:
1528
- continue
1529
- dependencies[QColumnId(tbl_version.id, col.id)] = Expr.get_refd_column_ids(col.value_expr_dict)
1530
- self._column_dependencies[tbl_version.id] = dependencies
1531
-
1532
1890
  def _init_store(self) -> None:
1533
1891
  """One-time initialization of the stored catalog. Idempotent."""
1534
1892
  self.create_user(None)
@@ -1557,14 +1915,20 @@ class Catalog:
1557
1915
  obj, _, _ = self._prepare_dir_op(add_dir_path=path.parent, add_name=path.name)
1558
1916
 
1559
1917
  if if_exists == IfExistsParam.ERROR and obj is not None:
1560
- raise excs.Error(f'Path {str(path)!r} is an existing {type(obj)._display_name()}')
1918
+ raise excs.Error(f'Path {path!r} is an existing {obj._display_name()}')
1561
1919
  else:
1562
1920
  is_snapshot = isinstance(obj, View) and obj._tbl_version_path.is_snapshot()
1563
1921
  if obj is not None and (not isinstance(obj, expected_obj_type) or (expected_snapshot and not is_snapshot)):
1564
- obj_type_str = 'snapshot' if expected_snapshot else expected_obj_type._display_name()
1922
+ if expected_obj_type is Dir:
1923
+ obj_type_str = 'directory'
1924
+ elif expected_obj_type is InsertableTable:
1925
+ obj_type_str = 'table'
1926
+ elif expected_obj_type is View:
1927
+ obj_type_str = 'snapshot' if expected_snapshot else 'view'
1928
+ else:
1929
+ raise AssertionError()
1565
1930
  raise excs.Error(
1566
- f'Path {str(path)!r} already exists but is not a {obj_type_str}. '
1567
- f'Cannot {if_exists.name.lower()} it.'
1931
+ f'Path {path!r} already exists but is not a {obj_type_str}. Cannot {if_exists.name.lower()} it.'
1568
1932
  )
1569
1933
 
1570
1934
  if obj is None:
@@ -1577,7 +1941,7 @@ class Catalog:
1577
1941
  dir_contents = self._get_dir_contents(obj._id)
1578
1942
  if len(dir_contents) > 0 and if_exists == IfExistsParam.REPLACE:
1579
1943
  raise excs.Error(
1580
- f'Directory {str(path)!r} already exists and is not empty. '
1944
+ f'Directory {path!r} already exists and is not empty. '
1581
1945
  'Use `if_exists="replace_force"` to replace it.'
1582
1946
  )
1583
1947
  self._drop_dir(obj._id, path, force=True)