pixeltable 0.4.3__py3-none-any.whl → 0.4.4__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of pixeltable might be problematic. Click here for more details.

@@ -14,6 +14,8 @@ import psycopg
14
14
  import sqlalchemy as sql
15
15
 
16
16
  from pixeltable import exceptions as excs
17
+
18
+ # from pixeltable import exceptions as excs, UpdateStatus
17
19
  from pixeltable.env import Env
18
20
  from pixeltable.iterators import ComponentIterator
19
21
  from pixeltable.metadata import schema
@@ -28,6 +30,8 @@ from .table import Table
28
30
  from .table_version import TableVersion
29
31
  from .table_version_handle import TableVersionHandle
30
32
  from .table_version_path import TableVersionPath
33
+ from .tbl_ops import TableOp
34
+ from .update_status import UpdateStatus
31
35
  from .view import View
32
36
 
33
37
  if TYPE_CHECKING:
@@ -70,18 +74,35 @@ _MAX_RETRIES = -1
70
74
  T = TypeVar('T')
71
75
 
72
76
 
73
- def _retry_loop(*, for_write: bool) -> Callable[[Callable[..., T]], Callable[..., T]]:
77
+ def retry_loop(
78
+ *, tbl: Optional[TableVersionPath] = None, for_write: bool, lock_mutable_tree: bool = False
79
+ ) -> Callable[[Callable[..., T]], Callable[..., T]]:
74
80
  def decorator(op: Callable[..., T]) -> Callable[..., T]:
75
81
  @functools.wraps(op)
76
82
  def loop(*args: Any, **kwargs: Any) -> T:
83
+ cat = Catalog.get()
84
+ # retry_loop() is reentrant
85
+ if cat._in_retry_loop:
86
+ return op(*args, **kwargs)
87
+
77
88
  num_retries = 0
78
89
  while True:
90
+ cat._in_retry_loop = True
79
91
  try:
80
92
  # in order for retry to work, we need to make sure that there aren't any prior db updates
81
93
  # that are part of an ongoing transaction
82
94
  assert not Env.get().in_xact
83
- with Catalog.get().begin_xact(for_write=for_write, convert_db_excs=False):
95
+ with Catalog.get().begin_xact(
96
+ tbl=tbl,
97
+ for_write=for_write,
98
+ convert_db_excs=False,
99
+ lock_mutable_tree=lock_mutable_tree,
100
+ finalize_pending_ops=True,
101
+ ):
84
102
  return op(*args, **kwargs)
103
+ except PendingTableOpsError as e:
104
+ Env.get().console_logger.debug(f'retry_loop(): finalizing pending ops for {e.tbl_id}')
105
+ Catalog.get()._finalize_pending_ops(e.tbl_id)
85
106
  except sql.exc.DBAPIError as e:
86
107
  # TODO: what other exceptions should we be looking for?
87
108
  if isinstance(e.orig, (psycopg.errors.SerializationFailure, psycopg.errors.LockNotAvailable)):
@@ -97,16 +118,31 @@ def _retry_loop(*, for_write: bool) -> Callable[[Callable[..., T]], Callable[...
97
118
  # for informational/debugging purposes
98
119
  _logger.debug(f'retry_loop(): passing along {e}')
99
120
  raise
121
+ finally:
122
+ cat._in_retry_loop = False
100
123
 
101
124
  return loop
102
125
 
103
126
  return decorator
104
127
 
105
128
 
129
+ class PendingTableOpsError(Exception):
130
+ tbl_id: UUID
131
+
132
+ def __init__(self, tbl_id: UUID) -> None:
133
+ self.tbl_id = tbl_id
134
+
135
+
106
136
  class Catalog:
107
137
  """The functional interface to getting access to catalog objects
108
138
 
109
- All interface functions must be called in the context of a transaction, started with Catalog.begin_xact().
139
+ All interface functions must be called in the context of a transaction, started with Catalog.begin_xact() or
140
+ via retry_loop().
141
+
142
+ When calling functions that involve Table or TableVersion instances, the catalog needs to get a chance to finalize
143
+ pending ops against those tables. To that end,
144
+ - use begin_xact(tbl) or begin_xact(tbl_id) if only accessing a single table
145
+ - use retry_loop() when accessing multiple tables (eg, pxt.ls())
110
146
 
111
147
  Caching and invalidation of metadata:
112
148
  - Catalog caches TableVersion instances in order to avoid excessive metadata loading
@@ -132,6 +168,8 @@ class Catalog:
132
168
  _tbls: dict[UUID, Table]
133
169
  _in_write_xact: bool # True if we're in a write transaction
134
170
  _x_locked_tbl_ids: set[UUID] # non-empty for write transactions
171
+ _modified_tvs: set[TableVersionHandle] # TableVersion instances modified in the current transaction
172
+ _in_retry_loop: bool
135
173
 
136
174
  # cached column dependencies
137
175
  # - key: table id, value: mapping from column id to its dependencies
@@ -164,6 +202,8 @@ class Catalog:
164
202
  self._tbls = {} # don't use a defaultdict here, it doesn't cooperate with the debugger
165
203
  self._in_write_xact = False
166
204
  self._x_locked_tbl_ids = set()
205
+ self._modified_tvs = set()
206
+ self._in_retry_loop = False
167
207
  self._column_dependencies = {}
168
208
  self._column_dependents = None
169
209
  self._init_store()
@@ -214,9 +254,11 @@ class Catalog:
214
254
  self,
215
255
  *,
216
256
  tbl: Optional[TableVersionPath] = None,
257
+ tbl_id: Optional[UUID] = None,
217
258
  for_write: bool = False,
218
259
  lock_mutable_tree: bool = False,
219
260
  convert_db_excs: bool = True,
261
+ finalize_pending_ops: bool = True,
220
262
  ) -> Iterator[sql.Connection]:
221
263
  """
222
264
  Return a context manager that yields a connection to the database. Idempotent.
@@ -227,7 +269,7 @@ class Catalog:
227
269
  If tbl != None, follows this locking protocol:
228
270
  - validates/reloads the TableVersion instances of tbl's ancestors (in the hope that this reduces potential
229
271
  SerializationErrors later on)
230
- - if for_write == True, x-locks Table record (by updating Table.lock_dummy; see _acquire_tbl_xlock())
272
+ - if for_write == True, x-locks Table record (by updating Table.lock_dummy; see _acquire_tbl_lock())
231
273
  - if for_write == False, validates TableVersion instance
232
274
  - if lock_mutable_tree == True, also x-locks all mutable views of the table
233
275
  - this needs to be done in a retry loop, because Postgres can decide to abort the transaction
@@ -237,10 +279,14 @@ class Catalog:
237
279
 
238
280
  If convert_db_excs == True, converts DBAPIErrors into excs.Errors.
239
281
  """
282
+ assert tbl is None or tbl_id is None # at most one can be specified
240
283
  if Env.get().in_xact:
241
- if tbl is not None and for_write:
242
- # make sure that we requested the required table lock at the beginning of the transaction
243
- assert tbl.tbl_id in self._x_locked_tbl_ids, f'{tbl.tbl_id} not in {self._x_locked_tbl_ids}'
284
+ # make sure that we requested the required table lock at the beginning of the transaction
285
+ if for_write:
286
+ if tbl is not None:
287
+ assert tbl.tbl_id in self._x_locked_tbl_ids, f'{tbl.tbl_id} not in {self._x_locked_tbl_ids}'
288
+ elif tbl_id is not None:
289
+ assert tbl_id in self._x_locked_tbl_ids, f'{tbl_id} not in {self._x_locked_tbl_ids}'
244
290
  yield Env.get().conn
245
291
  return
246
292
 
@@ -252,33 +298,66 @@ class Catalog:
252
298
  # )
253
299
  # _logger.debug(f'begin_xact(): {tv_msg}')
254
300
  num_retries = 0
301
+ pending_ops_tbl_id: Optional[UUID] = None
302
+ has_exc = False # True if we exited the 'with ...begin_xact()' block with an exception
255
303
  while True:
304
+ if pending_ops_tbl_id is not None:
305
+ Env.get().console_logger.debug(f'begin_xact(): finalizing pending ops for {pending_ops_tbl_id}')
306
+ self._finalize_pending_ops(pending_ops_tbl_id)
307
+ pending_ops_tbl_id = None
308
+
256
309
  try:
257
- self._in_write_xact = False
310
+ self._in_write_xact = for_write
258
311
  self._x_locked_tbl_ids = set()
312
+ self._modified_tvs = set()
259
313
  self._column_dependents = None
314
+ has_exc = False
260
315
 
261
- with Env.get().begin_xact() as conn:
262
- if tbl is not None:
316
+ with Env.get().begin_xact(for_write=for_write) as conn:
317
+ if tbl is not None or tbl_id is not None:
263
318
  try:
264
- if not self._acquire_path_locks(
265
- tbl=tbl, for_write=for_write, lock_mutable_tree=lock_mutable_tree
266
- ):
267
- # this is a snapshot
268
- yield conn
269
- return
270
-
271
- if for_write:
272
- if lock_mutable_tree:
273
- self._x_locked_tbl_ids = self._get_mutable_tree(tbl.tbl_id)
319
+ target: Optional[TableVersionHandle] = None
320
+ if tbl is not None:
321
+ if self._acquire_path_locks(
322
+ tbl=tbl,
323
+ for_write=for_write,
324
+ lock_mutable_tree=lock_mutable_tree,
325
+ check_pending_ops=finalize_pending_ops,
326
+ ):
327
+ target = tbl.tbl_version
328
+ else:
329
+ target = self._acquire_tbl_lock(
330
+ tbl_id=tbl_id,
331
+ for_write=for_write,
332
+ lock_mutable_tree=lock_mutable_tree,
333
+ raise_if_not_exists=True,
334
+ check_pending_ops=finalize_pending_ops,
335
+ )
336
+
337
+ if target is None:
338
+ # didn't get the write lock
339
+ for_write = False
340
+ elif for_write:
341
+ # we know at this point that target is mutable because we got the X-lock
342
+ if lock_mutable_tree and not target.is_snapshot:
343
+ self._x_locked_tbl_ids = self._get_mutable_tree(target.id)
274
344
  self._compute_column_dependents(self._x_locked_tbl_ids)
275
345
  else:
276
- self._x_locked_tbl_ids = {tbl.tbl_id}
346
+ self._x_locked_tbl_ids = {target.id}
277
347
  if _logger.isEnabledFor(logging.DEBUG):
278
348
  # validate only when we don't see errors
279
349
  self.validate()
280
350
 
351
+ except PendingTableOpsError as e:
352
+ has_exc = True
353
+ if finalize_pending_ops:
354
+ # we remember which table id to finalize
355
+ pending_ops_tbl_id = e.tbl_id
356
+ # raise to abort the transaction
357
+ raise
358
+
281
359
  except sql.exc.DBAPIError as e:
360
+ has_exc = True
282
361
  if isinstance(
283
362
  e.orig, (psycopg.errors.SerializationFailure, psycopg.errors.LockNotAvailable)
284
363
  ) and (num_retries < _MAX_RETRIES or _MAX_RETRIES == -1):
@@ -289,11 +368,20 @@ class Catalog:
289
368
  else:
290
369
  raise
291
370
 
292
- self._in_write_xact = for_write
293
371
  yield conn
294
372
  return
295
373
 
374
+ except PendingTableOpsError:
375
+ has_exc = True
376
+ if pending_ops_tbl_id is not None:
377
+ # the next iteration of the loop will deal with pending ops for this table id
378
+ continue
379
+ else:
380
+ # we got this exception after getting the initial table locks and therefore need to abort
381
+ raise
382
+
296
383
  except sql.exc.DBAPIError as e:
384
+ has_exc = True
297
385
  # we got some db error during the actual operation (not just while trying to get locks on the metadata
298
386
  # records): we convert these into Errors, if asked to do so, and abort
299
387
  # TODO: what other concurrency-related exceptions should we expect?
@@ -301,12 +389,19 @@ class Catalog:
301
389
  # we always convert UndefinedTable exceptions (they can't be retried)
302
390
  if isinstance(e.orig, psycopg.errors.UndefinedTable):
303
391
  # the table got dropped in the middle of the table operation
304
- _logger.debug(f'Exception: undefined table ({tbl.tbl_name()}): Caught {type(e.orig)}: {e!r}')
392
+ tbl_name = tbl.tbl_name() if tbl is not None else str(tbl_id) if tbl_id is not None else '?'
393
+ _logger.debug(f'Exception: undefined table ({tbl_name}): Caught {type(e.orig)}: {e!r}')
305
394
  assert tbl is not None
306
- raise excs.Error(f'Table was dropped: {tbl.tbl_name()}') from None
395
+ raise excs.Error(f'Table was dropped: {tbl_name}') from None
307
396
  elif isinstance(e.orig, psycopg.errors.SerializationFailure) and convert_db_excs:
308
397
  # we still got a serialization error, despite getting x-locks at the beginning
309
- msg = f'{tbl.tbl_name()} ({tbl.tbl_id})' if tbl is not None else ''
398
+ msg: str
399
+ if tbl is not None:
400
+ msg = f'{tbl.tbl_name()} ({tbl.tbl_id})'
401
+ elif tbl_id is not None:
402
+ msg = f'{tbl_id}'
403
+ else:
404
+ msg = ''
310
405
  _logger.debug(f'Exception: serialization failure: {msg} ({e})')
311
406
  raise excs.Error(
312
407
  'That Pixeltable operation could not be completed because it conflicted with another '
@@ -316,6 +411,10 @@ class Catalog:
316
411
  else:
317
412
  raise
318
413
 
414
+ except:
415
+ has_exc = True
416
+ raise
417
+
319
418
  finally:
320
419
  self._in_write_xact = False
321
420
  self._x_locked_tbl_ids = set()
@@ -327,12 +426,24 @@ class Catalog:
327
426
  _logger.debug(f'invalidating table version {tv.id}:None (tv={id(tv):x})')
328
427
  tv.is_validated = False
329
428
 
429
+ if has_exc:
430
+ # purge all modified TableVersion instances, we can't guarantee they are still consistent with the
431
+ # stored metadata
432
+ for handle in self._modified_tvs:
433
+ self._clear_tv_cache(handle.id, handle.effective_version)
434
+ self._modified_tvs = set()
435
+
330
436
  @property
331
437
  def in_write_xact(self) -> bool:
332
438
  return self._in_write_xact
333
439
 
334
440
  def _acquire_path_locks(
335
- self, *, tbl: TableVersionPath, for_write: bool = False, lock_mutable_tree: bool = False
441
+ self,
442
+ *,
443
+ tbl: TableVersionPath,
444
+ for_write: bool = False,
445
+ lock_mutable_tree: bool = False,
446
+ check_pending_ops: Optional[bool] = None,
336
447
  ) -> bool:
337
448
  """
338
449
  Path locking protocol:
@@ -341,33 +452,49 @@ class Catalog:
341
452
  - refresh cached TableVersion of tbl or get X-lock, depending on for_write
342
453
  - if lock_mutable_tree, also X-lock all mutable views of tbl
343
454
 
344
- Returns False if trying to lock a pure snapshot with for_write == True
345
455
  Raises Error if tbl doesn't exist.
456
+ Return False if the lock couldn't be acquired (X-lock on a non-mutable table), True otherwise.
346
457
  """
347
- start_idx = 1 if for_write else 0
348
- for handle in tbl.get_tbl_versions()[start_idx::-1]:
349
- _ = self.get_tbl_version(handle.id, handle.effective_version)
458
+ path_handles = tbl.get_tbl_versions()
459
+ read_handles = path_handles[:0:-1] if for_write else path_handles[::-1]
460
+ for handle in read_handles:
461
+ # update cache
462
+ _ = self.get_tbl_version(handle.id, handle.effective_version, validate_initialized=True)
350
463
  if not for_write:
351
464
  return True # nothing left to lock
352
- return self._acquire_tbl_xlock(tbl_id=tbl.tbl_id, lock_mutable_tree=lock_mutable_tree, raise_if_not_exists=True)
465
+ handle = self._acquire_tbl_lock(
466
+ tbl_id=tbl.tbl_id,
467
+ for_write=True,
468
+ lock_mutable_tree=lock_mutable_tree,
469
+ raise_if_not_exists=True,
470
+ check_pending_ops=check_pending_ops,
471
+ )
472
+ # update cache
473
+ _ = self.get_tbl_version(path_handles[0].id, path_handles[0].effective_version, validate_initialized=True)
474
+ return handle is not None
353
475
 
354
- def _acquire_tbl_xlock(
476
+ def _acquire_tbl_lock(
355
477
  self,
356
478
  *,
479
+ for_write: bool,
357
480
  tbl_id: Optional[UUID] = None,
358
481
  dir_id: Optional[UUID] = None,
359
482
  tbl_name: Optional[str] = None,
360
483
  lock_mutable_tree: bool = False,
361
- raise_if_not_exists: bool = False,
362
- ) -> bool:
363
- """Force acquisition of an X-lock on a Table record via a blind update
484
+ raise_if_not_exists: bool = True,
485
+ check_pending_ops: Optional[bool] = None,
486
+ ) -> Optional[TableVersionHandle]:
487
+ """
488
+ For writes: force acquisition of an X-lock on a Table record via a blind update.
364
489
 
365
490
  Either tbl_id or dir_id/tbl_name need to be specified.
366
491
  Returns True if the table was locked, False if it was a snapshot or not found.
367
492
  If lock_mutable_tree, recursively locks all mutable views of the table.
368
493
 
369
- Returns False if the table is a snapshot or not found and !raise_if_not_exists.
494
+ Returns a handle to what was locked, None if the lock couldn't be acquired (eg, X-lock on a non-mutable table).
370
495
  """
496
+ assert (tbl_id is not None) != (dir_id is not None and tbl_name is not None)
497
+ assert (dir_id is None) == (tbl_name is None)
371
498
  where_clause: sql.ColumnElement
372
499
  if tbl_id is not None:
373
500
  where_clause = schema.Table.id == tbl_id
@@ -378,26 +505,130 @@ class Catalog:
378
505
  where_clause = sql.and_(where_clause, schema.Table.md['user'].astext == Env.get().user)
379
506
 
380
507
  conn = Env.get().conn
381
- row = conn.execute(sql.select(schema.Table).where(where_clause).with_for_update(nowait=True)).one_or_none()
508
+ q = sql.select(schema.Table).where(where_clause)
509
+ if for_write:
510
+ q = q.with_for_update(nowait=True)
511
+ row = conn.execute(q).one_or_none()
382
512
  if row is None:
383
513
  if raise_if_not_exists:
384
514
  raise excs.Error(self._dropped_tbl_error_msg(tbl_id))
385
- return False # nothing to lock
386
- if row.md['view_md'] is not None and row.md['view_md']['is_snapshot']:
387
- return False # nothing to lock
388
- conn.execute(sql.update(schema.Table).values(lock_dummy=1).where(where_clause))
389
-
390
- if not lock_mutable_tree:
391
- return True
392
- # also lock mutable views
393
- tv = self.get_tbl_version(tbl_id, None)
394
- for view in tv.mutable_views:
395
- self._acquire_tbl_xlock(tbl_id=view.id, lock_mutable_tree=True, raise_if_not_exists=raise_if_not_exists)
396
- return True
515
+ return None # nothing to lock
516
+ tbl_md = schema.md_from_dict(schema.TableMd, row.md)
517
+ if for_write and tbl_md.is_mutable:
518
+ conn.execute(sql.update(schema.Table).values(lock_dummy=1).where(where_clause))
519
+
520
+ if check_pending_ops:
521
+ # check for pending ops after getting table lock
522
+ pending_ops_q = sql.select(sql.func.count()).where(schema.PendingTableOp.tbl_id == row.id)
523
+ has_pending_ops = conn.execute(pending_ops_q).scalar() > 0
524
+ if has_pending_ops:
525
+ raise PendingTableOpsError(row.id)
526
+
527
+ if for_write and not tbl_md.is_mutable:
528
+ return None # nothing to lock
529
+
530
+ effective_version = tbl_md.current_version if tbl_md.is_snapshot else None
531
+ if tbl_md.is_mutable and lock_mutable_tree:
532
+ # also lock mutable views
533
+ tv = self.get_tbl_version(tbl_id, effective_version, validate_initialized=True)
534
+ for view in tv.mutable_views:
535
+ self._acquire_tbl_lock(
536
+ for_write=for_write,
537
+ tbl_id=view.id,
538
+ lock_mutable_tree=lock_mutable_tree,
539
+ raise_if_not_exists=raise_if_not_exists,
540
+ check_pending_ops=check_pending_ops,
541
+ )
542
+ return TableVersionHandle(tbl_id, effective_version)
543
+
544
+ def _finalize_pending_ops(self, tbl_id: UUID) -> None:
545
+ """Finalizes all pending ops for the given table."""
546
+ num_retries = 0
547
+ while True:
548
+ try:
549
+ tbl_version: int
550
+ op: Optional[TableOp] = None
551
+ delete_next_op_stmt: sql.Delete
552
+ reset_has_pending_stmt: sql.Update
553
+ with self.begin_xact(
554
+ tbl_id=tbl_id, for_write=True, convert_db_excs=False, finalize_pending_ops=False
555
+ ) as conn:
556
+ q = (
557
+ sql.select(schema.Table.md, schema.PendingTableOp)
558
+ .select_from(schema.Table)
559
+ .join(schema.PendingTableOp)
560
+ .where(schema.Table.id == tbl_id)
561
+ .where(schema.PendingTableOp.tbl_id == tbl_id)
562
+ .order_by(schema.PendingTableOp.op_sn)
563
+ .limit(1)
564
+ .with_for_update()
565
+ )
566
+ row = conn.execute(q).one_or_none()
567
+ if row is None:
568
+ return
569
+ tbl_version = row.md.get('current_version')
570
+ op = schema.md_from_dict(TableOp, row.op)
571
+ delete_next_op_stmt = sql.delete(schema.PendingTableOp).where(
572
+ schema.PendingTableOp.tbl_id == tbl_id, schema.PendingTableOp.op_sn == row.op_sn
573
+ )
574
+ reset_has_pending_stmt = (
575
+ sql.update(schema.Table)
576
+ .where(schema.Table.id == tbl_id)
577
+ .values(md=schema.Table.md.op('||')({'has_pending_ops': False}))
578
+ )
579
+
580
+ if op.needs_xact:
581
+ tv = self.get_tbl_version(
582
+ tbl_id, tbl_version, check_pending_ops=False, validate_initialized=True
583
+ )
584
+ tv.exec_op(op)
585
+ conn.execute(delete_next_op_stmt)
586
+ if op.op_sn == op.num_ops - 1:
587
+ conn.execute(reset_has_pending_stmt)
588
+ continue
589
+
590
+ # this op runs outside of a transaction
591
+ tv = self.get_tbl_version(tbl_id, tbl_version, check_pending_ops=False, validate_initialized=True)
592
+ tv.exec_op(op)
593
+ with self.begin_xact(
594
+ tbl_id=tbl_id, for_write=True, convert_db_excs=False, finalize_pending_ops=False
595
+ ) as conn:
596
+ conn.execute(delete_next_op_stmt)
597
+ if op.op_sn == op.num_ops - 1:
598
+ conn.execute(reset_has_pending_stmt)
599
+
600
+ except (sql.exc.DBAPIError, sql.exc.OperationalError) as e:
601
+ # TODO: why are we still seeing these here, instead of them getting taken care of by the retry
602
+ # logic of begin_xact()?
603
+ if isinstance(e.orig, (psycopg.errors.SerializationFailure, psycopg.errors.LockNotAvailable)):
604
+ num_retries += 1
605
+ log_msg: str
606
+ if op is not None:
607
+ log_msg = f'finalize_pending_ops(): retrying ({num_retries}) op {op!s} after {type(e.orig)}'
608
+ else:
609
+ log_msg = f'finalize_pending_ops(): retrying ({num_retries}) after {type(e.orig)}'
610
+ Env.get().console_logger.debug(log_msg)
611
+ time.sleep(random.uniform(0.1, 0.5))
612
+ continue
613
+ else:
614
+ raise
615
+ except Exception as e:
616
+ Env.get().console_logger.debug(f'finalize_pending_ops(): caught {e}')
617
+ raise
618
+
619
+ num_retries = 0
620
+
621
+ def _debug_str(self) -> str:
622
+ tv_str = '\n'.join(str(k) for k in self._tbl_versions)
623
+ tbl_str = '\n'.join(str(k) for k in self._tbls)
624
+ return f'tbl_versions:\n{tv_str}\ntbls:\n{tbl_str}'
397
625
 
398
626
  def _get_mutable_tree(self, tbl_id: UUID) -> set[UUID]:
399
627
  """Returns ids of all tables that form the tree of mutable views starting at tbl_id; includes the root."""
400
- tv = self.get_tbl_version(tbl_id, None)
628
+ assert (tbl_id, None) in self._tbl_versions, (
629
+ f'({tbl_id}, None) not in {self._tbl_versions.keys()}\n{self._debug_str()}'
630
+ )
631
+ tv = self.get_tbl_version(tbl_id, None, validate_initialized=True)
401
632
  result: set[UUID] = {tv.id}
402
633
  for view in tv.mutable_views:
403
634
  result.update(self._get_mutable_tree(view.id))
@@ -408,7 +639,9 @@ class Catalog:
408
639
  assert self._column_dependents is None
409
640
  self._column_dependents = defaultdict(set)
410
641
  for tbl_id in mutable_tree:
411
- assert tbl_id in self._column_dependencies
642
+ assert tbl_id in self._column_dependencies, (
643
+ f'{tbl_id} not in {self._column_dependencies.keys()}\n{self._debug_str()}'
644
+ )
412
645
  for col, dependencies in self._column_dependencies[tbl_id].items():
413
646
  for dependency in dependencies:
414
647
  if dependency.tbl_id not in mutable_tree:
@@ -416,13 +649,25 @@ class Catalog:
416
649
  dependents = self._column_dependents[dependency]
417
650
  dependents.add(col)
418
651
 
652
+ def record_column_dependencies(self, tbl_version: TableVersion) -> None:
653
+ """Update self._column_dependencies. Only valid for mutable versions."""
654
+ from pixeltable.exprs import Expr
655
+
656
+ assert tbl_version.is_mutable
657
+ dependencies: dict[QColumnId, set[QColumnId]] = {}
658
+ for col in tbl_version.cols_by_id.values():
659
+ if col.value_expr_dict is None:
660
+ continue
661
+ dependencies[QColumnId(tbl_version.id, col.id)] = Expr.get_refd_column_ids(col.value_expr_dict)
662
+ self._column_dependencies[tbl_version.id] = dependencies
663
+
419
664
  def get_column_dependents(self, tbl_id: UUID, col_id: int) -> set[Column]:
420
665
  """Return all Columns that transitively depend on the given column."""
421
666
  assert self._column_dependents is not None
422
667
  dependents = self._column_dependents[QColumnId(tbl_id, col_id)]
423
668
  result: set[Column] = set()
424
669
  for dependent in dependents:
425
- tv = self.get_tbl_version(dependent.tbl_id, None)
670
+ tv = self.get_tbl_version(dependent.tbl_id, None, validate_initialized=True)
426
671
  col = tv.cols_by_id[dependent.col_id]
427
672
  result.add(col)
428
673
  return result
@@ -471,7 +716,7 @@ class Catalog:
471
716
  dir_entries: dict[str, Catalog.DirEntry]
472
717
  table: Optional[schema.Table]
473
718
 
474
- @_retry_loop(for_write=False)
719
+ @retry_loop(for_write=False)
475
720
  def get_dir_contents(self, dir_path: Path, recursive: bool = False) -> dict[str, DirEntry]:
476
721
  dir = self._get_schema_object(dir_path, expected=Dir, raise_if_not_exists=True)
477
722
  return self._get_dir_contents(dir._id, recursive=recursive)
@@ -498,7 +743,7 @@ class Catalog:
498
743
 
499
744
  return result
500
745
 
501
- @_retry_loop(for_write=True)
746
+ @retry_loop(for_write=True)
502
747
  def move(self, path: Path, new_path: Path) -> None:
503
748
  self._move(path, new_path)
504
749
 
@@ -540,6 +785,7 @@ class Catalog:
540
785
  - if both add and drop (= two directories are involved), lock the directories in a pre-determined order
541
786
  (in this case, by name) in order to prevent deadlocks between concurrent directory modifications
542
787
  """
788
+ assert drop_expected in (None, Table, Dir), drop_expected
543
789
  assert (add_dir_path is None) == (add_name is None)
544
790
  assert (drop_dir_path is None) == (drop_name is None)
545
791
  dir_paths: set[Path] = set()
@@ -553,7 +799,7 @@ class Catalog:
553
799
  for p in sorted(dir_paths):
554
800
  dir = self._get_dir(p, lock_dir=True)
555
801
  if dir is None:
556
- raise excs.Error(f'Directory {str(p)!r} does not exist.')
802
+ raise excs.Error(f'Directory {p!r} does not exist.')
557
803
  if p == add_dir_path:
558
804
  add_dir = dir
559
805
  if p == drop_dir_path:
@@ -564,19 +810,17 @@ class Catalog:
564
810
  add_obj = self._get_dir_entry(add_dir.id, add_name, lock_entry=True)
565
811
  if add_obj is not None and raise_if_exists:
566
812
  add_path = add_dir_path.append(add_name)
567
- raise excs.Error(f'Path {str(add_path)!r} already exists.')
813
+ raise excs.Error(f'Path {add_path!r} already exists.')
568
814
 
569
815
  drop_obj: Optional[SchemaObject] = None
570
816
  if drop_dir is not None:
571
817
  drop_path = drop_dir_path.append(drop_name)
572
818
  drop_obj = self._get_dir_entry(drop_dir.id, drop_name, lock_entry=True)
573
819
  if drop_obj is None and raise_if_not_exists:
574
- raise excs.Error(f'Path {str(drop_path)!r} does not exist.')
820
+ raise excs.Error(f'Path {drop_path!r} does not exist.')
575
821
  if drop_obj is not None and drop_expected is not None and not isinstance(drop_obj, drop_expected):
576
- raise excs.Error(
577
- f'{str(drop_path)!r} needs to be a {drop_expected._display_name()} '
578
- f'but is a {type(drop_obj)._display_name()}'
579
- )
822
+ expected_name = 'table' if drop_expected is Table else 'directory'
823
+ raise excs.Error(f'{drop_path!r} needs to be a {expected_name} but is a {drop_obj._display_name()}')
580
824
 
581
825
  add_dir_obj = Dir(add_dir.id, add_dir.parent_id, add_dir.md['name']) if add_dir is not None else None
582
826
  return add_obj, add_dir_obj, drop_obj
@@ -602,7 +846,7 @@ class Catalog:
602
846
 
603
847
  # check for table
604
848
  if lock_entry:
605
- self._acquire_tbl_xlock(dir_id=dir_id, tbl_name=name)
849
+ self._acquire_tbl_lock(for_write=True, dir_id=dir_id, raise_if_not_exists=False, tbl_name=name)
606
850
  q = sql.select(schema.Table.id).where(
607
851
  schema.Table.dir_id == dir_id,
608
852
  schema.Table.md['name'].astext == name,
@@ -633,12 +877,12 @@ class Catalog:
633
877
  - raise_if_not_exists is True and the path does not exist
634
878
  - expected is not None and the existing object has a different type
635
879
  """
880
+ assert expected in (None, Table, Dir), expected
881
+
636
882
  if path.is_root:
637
883
  # the root dir
638
884
  if expected is not None and expected is not Dir:
639
- raise excs.Error(
640
- f'{str(path)!r} needs to be a {expected._display_name()} but is a {Dir._display_name()}'
641
- )
885
+ raise excs.Error(f'{path!r} needs to be a table but is a dir')
642
886
  dir = self._get_dir(path, lock_dir=lock_obj)
643
887
  if dir is None:
644
888
  raise excs.Error(f'Unknown user: {Env.get().user}')
@@ -647,33 +891,32 @@ class Catalog:
647
891
  parent_path = path.parent
648
892
  parent_dir = self._get_dir(parent_path, lock_dir=lock_parent)
649
893
  if parent_dir is None:
650
- raise excs.Error(f'Directory {str(parent_path)!r} does not exist.')
894
+ raise excs.Error(f'Directory {parent_path!r} does not exist.')
651
895
  obj = self._get_dir_entry(parent_dir.id, path.name, lock_entry=lock_obj)
652
896
 
653
897
  if obj is None and raise_if_not_exists:
654
- raise excs.Error(f'Path {str(path)!r} does not exist.')
898
+ raise excs.Error(f'Path {path!r} does not exist.')
655
899
  elif obj is not None and raise_if_exists:
656
- raise excs.Error(f'Path {str(path)!r} is an existing {type(obj)._display_name()}.')
900
+ raise excs.Error(f'Path {path!r} is an existing {obj._display_name()}.')
657
901
  elif obj is not None and expected is not None and not isinstance(obj, expected):
658
- raise excs.Error(
659
- f'{str(path)!r} needs to be a {expected._display_name()} but is a {type(obj)._display_name()}.'
660
- )
902
+ expected_name = 'table' if expected is Table else 'directory'
903
+ raise excs.Error(f'{path!r} needs to be a {expected_name} but is a {obj._display_name()}.')
661
904
  return obj
662
905
 
663
906
  def get_table_by_id(self, tbl_id: UUID) -> Optional[Table]:
664
- if tbl_id not in self._tbls:
665
- tbl = self._load_tbl(tbl_id)
666
- if tbl is None:
667
- return None
668
- # # if this is a mutable table, we also need to have its mutable views loaded, in order to track column
669
- # # dependencies
670
- # tbl_version = tbl._tbl_version.get()
671
- # if tbl_version.is_mutable:
672
- # for v in tbl_version.mutable_views:
673
- # _ = self.get_table_by_id(v.id)
674
- return self._tbls[tbl_id]
675
-
676
- @_retry_loop(for_write=True)
907
+ """Must be executed inside a transaction. Might raise PendingTableOpsError."""
908
+ if tbl_id in self._tbls:
909
+ return self._tbls[tbl_id]
910
+ tbl = self._load_tbl(tbl_id)
911
+ # # if this is a mutable table, we also need to have its mutable views loaded, in order to track column
912
+ # # dependencies
913
+ # tbl_version = tbl._tbl_version.get()
914
+ # if tbl_version.is_mutable:
915
+ # for v in tbl_version.mutable_views:
916
+ # _ = self.get_table_by_id(v.id)
917
+ return tbl
918
+
919
+ @retry_loop(for_write=True)
677
920
  def create_table(
678
921
  self,
679
922
  path: Path,
@@ -706,7 +949,6 @@ class Catalog:
706
949
  self._tbls[tbl._id] = tbl
707
950
  return tbl
708
951
 
709
- @_retry_loop(for_write=True)
710
952
  def create_view(
711
953
  self,
712
954
  path: Path,
@@ -722,49 +964,68 @@ class Catalog:
722
964
  media_validation: MediaValidation,
723
965
  if_exists: IfExistsParam,
724
966
  ) -> Table:
725
- from pixeltable.utils.filecache import FileCache
967
+ @retry_loop(for_write=True)
968
+ def create_fn() -> UUID:
969
+ if not is_snapshot and base.is_mutable():
970
+ # this is a mutable view of a mutable base; X-lock the base and advance its view_sn before adding
971
+ # the view
972
+ self._acquire_tbl_lock(tbl_id=base.tbl_id, for_write=True)
973
+ base_tv = self.get_tbl_version(base.tbl_id, None, validate_initialized=True)
974
+ base_tv.tbl_md.view_sn += 1
975
+ result = Env.get().conn.execute(
976
+ sql.update(schema.Table)
977
+ .values({schema.Table.md: dataclasses.asdict(base_tv.tbl_md)})
978
+ .where(schema.Table.id == base.tbl_id)
979
+ )
980
+ assert result.rowcount == 1, result.rowcount
726
981
 
727
- if not is_snapshot and not base.is_snapshot():
728
- # this is a mutable view of a mutable base; X-lock the base and advance its view_sn before adding the view
729
- self._acquire_tbl_xlock(tbl_id=base.tbl_id)
730
- base_tv = self.get_tbl_version(base.tbl_id, None)
731
- base_tv.tbl_md.view_sn += 1
732
- result = Env.get().conn.execute(
733
- sql.update(schema.Table)
734
- .values({schema.Table.md: dataclasses.asdict(base_tv.tbl_md)})
735
- .where(schema.Table.id == base.tbl_id)
736
- )
737
- assert result.rowcount == 1, result.rowcount
982
+ existing = self._handle_path_collision(path, View, is_snapshot, if_exists)
983
+ if existing is not None:
984
+ assert isinstance(existing, View)
985
+ return existing._id
738
986
 
739
- existing = self._handle_path_collision(path, View, is_snapshot, if_exists)
740
- if existing is not None:
741
- assert isinstance(existing, View)
742
- return existing
743
-
744
- dir = self._get_schema_object(path.parent, expected=Dir, raise_if_not_exists=True)
745
- assert dir is not None
746
- if iterator is None:
747
- iterator_class, iterator_args = None, None
748
- else:
749
- iterator_class, iterator_args = iterator
750
- view = View._create(
751
- dir._id,
752
- path.name,
753
- base=base,
754
- select_list=select_list,
755
- additional_columns=additional_columns,
756
- predicate=where,
757
- sample_clause=sample_clause,
758
- is_snapshot=is_snapshot,
759
- iterator_cls=iterator_class,
760
- iterator_args=iterator_args,
761
- num_retained_versions=num_retained_versions,
762
- comment=comment,
763
- media_validation=media_validation,
764
- )
765
- FileCache.get().emit_eviction_warnings()
766
- self._tbls[view._id] = view
767
- return view
987
+ dir = self._get_schema_object(path.parent, expected=Dir, raise_if_not_exists=True)
988
+ assert dir is not None
989
+ if iterator is None:
990
+ iterator_class, iterator_args = None, None
991
+ else:
992
+ iterator_class, iterator_args = iterator
993
+ md, ops = View._create(
994
+ dir._id,
995
+ path.name,
996
+ base=base,
997
+ select_list=select_list,
998
+ additional_columns=additional_columns,
999
+ predicate=where,
1000
+ sample_clause=sample_clause,
1001
+ is_snapshot=is_snapshot,
1002
+ iterator_cls=iterator_class,
1003
+ iterator_args=iterator_args,
1004
+ num_retained_versions=num_retained_versions,
1005
+ comment=comment,
1006
+ media_validation=media_validation,
1007
+ )
1008
+ tbl_id = UUID(md.tbl_md.tbl_id)
1009
+ self.store_tbl_md(tbl_id, dir._id, md.tbl_md, md.version_md, md.schema_version_md, ops)
1010
+ return tbl_id
1011
+
1012
+ view_id = create_fn()
1013
+ if not is_snapshot and base.is_mutable():
1014
+ # invalidate base's TableVersion instance, so that it gets reloaded with the new mutable view
1015
+ self._clear_tv_cache(base.tbl_id, base.tbl_version.effective_version)
1016
+ # base_tv = self.get_tbl_version(base.tbl_id, base.tbl_version.effective_version, validate_initialized=True)
1017
+ # view_handle = TableVersionHandle(view_id, effective_version=None)
1018
+ # base_tv.mutable_views.add(view_handle)
1019
+
1020
+ # finalize pending ops
1021
+ with self.begin_xact(tbl_id=view_id, for_write=True, finalize_pending_ops=True):
1022
+ return self.get_table_by_id(view_id)
1023
+
1024
+ def _clear_tv_cache(self, tbl_id: UUID, effective_version: Optional[int]) -> None:
1025
+ if (tbl_id, effective_version) in self._tbl_versions:
1026
+ tv = self._tbl_versions[tbl_id, effective_version]
1027
+ tv.is_validated = False
1028
+ del self._tbl_versions[tbl_id, effective_version]
768
1029
 
769
1030
  def create_replica(self, path: Path, md: list[schema.FullTableMd]) -> None:
770
1031
  """
@@ -786,8 +1047,8 @@ class Catalog:
786
1047
  # Ensure that the system directory exists.
787
1048
  self._create_dir(Path('_system', allow_system_paths=True), if_exists=IfExistsParam.IGNORE, parents=False)
788
1049
 
789
- # Now check to see if this table UUID already exists in the catalog.
790
- existing = Catalog.get().get_table_by_id(tbl_id)
1050
+ # Now check to see if this table already exists in the catalog.
1051
+ existing = self.get_table_by_id(tbl_id)
791
1052
  if existing is not None:
792
1053
  existing_path = Path(existing._path(), allow_system_paths=True)
793
1054
  if existing_path != path:
@@ -808,7 +1069,7 @@ class Catalog:
808
1069
  # table being replicated.
809
1070
  for ancestor_md in md[:0:-1]:
810
1071
  ancestor_id = UUID(ancestor_md.tbl_md.tbl_id)
811
- replica = Catalog.get().get_table_by_id(ancestor_id)
1072
+ replica = self.get_table_by_id(ancestor_id)
812
1073
  replica_path: Path
813
1074
  if replica is None:
814
1075
  # We've never seen this table before. Create a new anonymous system table for it.
@@ -922,7 +1183,7 @@ class Catalog:
922
1183
  # It's a new version of a table that has a physical store, so we need to create a TableVersion instance.
923
1184
  TableVersion.create_replica(md)
924
1185
 
925
- @_retry_loop(for_write=False)
1186
+ @retry_loop(for_write=False)
926
1187
  def get_table(self, path: Path) -> Table:
927
1188
  obj = Catalog.get()._get_schema_object(path, expected=Table, raise_if_not_exists=True)
928
1189
  assert isinstance(obj, Table)
@@ -931,7 +1192,7 @@ class Catalog:
931
1192
  obj._tbl_version_path.clear_cached_md()
932
1193
  return obj
933
1194
 
934
- @_retry_loop(for_write=True)
1195
+ @retry_loop(for_write=True)
935
1196
  def drop_table(self, path: Path, if_not_exists: IfNotExistsParam, force: bool) -> None:
936
1197
  tbl = self._get_schema_object(
937
1198
  path,
@@ -941,7 +1202,7 @@ class Catalog:
941
1202
  lock_obj=False,
942
1203
  )
943
1204
  if tbl is None:
944
- _logger.info(f'Skipped table {str(path)!r} (does not exist).')
1205
+ _logger.info(f'Skipped table {path!r} (does not exist).')
945
1206
  return
946
1207
  assert isinstance(tbl, Table)
947
1208
 
@@ -949,7 +1210,7 @@ class Catalog:
949
1210
  # this is a mutable view of a mutable base;
950
1211
  # lock the base before the view, in order to avoid deadlocks with concurrent inserts/updates
951
1212
  base_id = tbl._tbl_version_path.base.tbl_id
952
- self._acquire_tbl_xlock(tbl_id=base_id, lock_mutable_tree=False)
1213
+ self._acquire_tbl_lock(tbl_id=base_id, for_write=True, lock_mutable_tree=False)
953
1214
 
954
1215
  self._drop_tbl(tbl, force=force, is_replace=False)
955
1216
 
@@ -964,7 +1225,7 @@ class Catalog:
964
1225
  in the same directory with the same name (which could lead to duplicate names if we get aborted)
965
1226
  """
966
1227
  self._acquire_dir_xlock(dir_id=tbl._dir_id)
967
- self._acquire_tbl_xlock(tbl_id=tbl._id, lock_mutable_tree=False)
1228
+ self._acquire_tbl_lock(tbl_id=tbl._id, for_write=True, lock_mutable_tree=False)
968
1229
 
969
1230
  view_ids = self.get_view_ids(tbl._id, for_update=True)
970
1231
  if len(view_ids) > 0:
@@ -988,8 +1249,9 @@ class Catalog:
988
1249
  # if this is a mutable view of a mutable base, advance the base's view_sn
989
1250
  if isinstance(tbl, View) and tbl._tbl_version_path.is_mutable() and tbl._tbl_version_path.base.is_mutable():
990
1251
  base_id = tbl._tbl_version_path.base.tbl_id
991
- base_tv = self.get_tbl_version(base_id, None)
1252
+ base_tv = self.get_tbl_version(base_id, None, validate_initialized=True)
992
1253
  base_tv.tbl_md.view_sn += 1
1254
+ self._modified_tvs.add(base_tv.handle)
993
1255
  result = Env.get().conn.execute(
994
1256
  sql.update(schema.Table.__table__)
995
1257
  .values({schema.Table.md: dataclasses.asdict(base_tv.tbl_md)})
@@ -997,23 +1259,26 @@ class Catalog:
997
1259
  )
998
1260
  assert result.rowcount == 1, result.rowcount
999
1261
 
1262
+ if tbl._tbl_version is not None:
1263
+ # invalidate the TableVersion instance when we're done so that existing references to it can find out it
1264
+ # has been dropped
1265
+ self._modified_tvs.add(tbl._tbl_version)
1000
1266
  tv = tbl._tbl_version.get() if tbl._tbl_version is not None else None
1001
- if tv is not None:
1267
+ # if tv is not None:
1268
+ # tv = tbl._tbl_version.get()
1269
+ # # invalidate the TableVersion instance so that existing references to it can find out it has been dropped
1270
+ # tv.is_validated = False
1271
+ if tbl._tbl_version is not None:
1272
+ # drop the store table before deleting the Table record
1002
1273
  tv = tbl._tbl_version.get()
1003
- # invalidate the TableVersion instance so that existing references to it can find out it has been dropped
1004
- tv.is_validated = False
1274
+ tv.drop()
1005
1275
 
1006
1276
  self.delete_tbl_md(tbl._id)
1007
1277
  assert tbl._id in self._tbls
1008
1278
  del self._tbls[tbl._id]
1009
1279
  _logger.info(f'Dropped table `{tbl._path()}`.')
1010
1280
 
1011
- if tv is not None:
1012
- tv.drop()
1013
- assert (tv.id, tv.effective_version) in self._tbl_versions
1014
- del self._tbl_versions[tv.id, tv.effective_version]
1015
-
1016
- @_retry_loop(for_write=True)
1281
+ @retry_loop(for_write=True)
1017
1282
  def create_dir(self, path: Path, if_exists: IfExistsParam, parents: bool) -> Dir:
1018
1283
  return self._create_dir(path, if_exists, parents)
1019
1284
 
@@ -1026,7 +1291,7 @@ class Catalog:
1026
1291
  # parent = self._get_schema_object(path.parent)
1027
1292
  # assert parent is not None
1028
1293
  # dir = Dir._create(parent._id, path.name)
1029
- # Env.get().console_logger.info(f'Created directory {str(path)!r}.')
1294
+ # Env.get().console_logger.info(f'Created directory {path!r}.')
1030
1295
  # return dir
1031
1296
 
1032
1297
  if parents:
@@ -1045,10 +1310,10 @@ class Catalog:
1045
1310
  return existing
1046
1311
  assert parent is not None
1047
1312
  dir = Dir._create(parent._id, path.name)
1048
- Env.get().console_logger.info(f'Created directory {str(path)!r}.')
1313
+ Env.get().console_logger.info(f'Created directory {path!r}.')
1049
1314
  return dir
1050
1315
 
1051
- @_retry_loop(for_write=True)
1316
+ @retry_loop(for_write=True)
1052
1317
  def drop_dir(self, path: Path, if_not_exists: IfNotExistsParam, force: bool) -> None:
1053
1318
  _, _, schema_obj = self._prepare_dir_op(
1054
1319
  drop_dir_path=path.parent,
@@ -1057,7 +1322,7 @@ class Catalog:
1057
1322
  raise_if_not_exists=if_not_exists == IfNotExistsParam.ERROR and not force,
1058
1323
  )
1059
1324
  if schema_obj is None:
1060
- _logger.info(f'Directory {str(path)!r} does not exist; skipped drop_dir().')
1325
+ _logger.info(f'Directory {path!r} does not exist; skipped drop_dir().')
1061
1326
  return
1062
1327
  self._drop_dir(schema_obj._id, path, force=force)
1063
1328
 
@@ -1070,7 +1335,7 @@ class Catalog:
1070
1335
  q = sql.select(sql.func.count()).select_from(schema.Table).where(schema.Table.dir_id == dir_id)
1071
1336
  num_tbls = conn.execute(q).scalar()
1072
1337
  if num_subdirs + num_tbls > 0:
1073
- raise excs.Error(f'Directory {str(dir_path)!r} is not empty.')
1338
+ raise excs.Error(f'Directory {dir_path!r} is not empty.')
1074
1339
 
1075
1340
  # drop existing subdirs
1076
1341
  self._acquire_dir_xlock(dir_id=dir_id)
@@ -1088,7 +1353,7 @@ class Catalog:
1088
1353
 
1089
1354
  # self.drop_dir(dir_id)
1090
1355
  conn.execute(sql.delete(schema.Dir).where(schema.Dir.id == dir_id))
1091
- _logger.info(f'Removed directory {str(dir_path)!r}.')
1356
+ _logger.info(f'Removed directory {dir_path!r}.')
1092
1357
 
1093
1358
  def get_view_ids(self, tbl_id: UUID, for_update: bool = False) -> list[UUID]:
1094
1359
  """Return the ids of views that directly reference the given table"""
@@ -1104,13 +1369,25 @@ class Catalog:
1104
1369
  result = [r[0] for r in conn.execute(q).all()]
1105
1370
  return result
1106
1371
 
1107
- def get_tbl_version(self, tbl_id: UUID, effective_version: Optional[int]) -> Optional[TableVersion]:
1372
+ def get_tbl_version(
1373
+ self,
1374
+ tbl_id: UUID,
1375
+ effective_version: Optional[int],
1376
+ check_pending_ops: Optional[bool] = None,
1377
+ validate_initialized: bool = False,
1378
+ ) -> Optional[TableVersion]:
1379
+ """
1380
+ Returns the TableVersion instance for the given table and version and updates the cache.
1381
+
1382
+ If present in the cache and the instance isn't validated, validates version and view_sn against the stored
1383
+ metadata.
1384
+ """
1108
1385
  # we need a transaction here, if we're not already in one; if this starts a new transaction,
1109
1386
  # the returned TableVersion instance will not be validated
1110
1387
  with self.begin_xact(for_write=False) as conn:
1111
1388
  tv = self._tbl_versions.get((tbl_id, effective_version))
1112
1389
  if tv is None:
1113
- tv = self._load_tbl_version(tbl_id, effective_version)
1390
+ tv = self._load_tbl_version(tbl_id, effective_version, check_pending_ops=check_pending_ops)
1114
1391
  elif not tv.is_validated:
1115
1392
  # only live instances are invalidated
1116
1393
  assert effective_version is None
@@ -1131,12 +1408,16 @@ class Catalog:
1131
1408
  f'(cached/current version: {tv.version}/{current_version}, '
1132
1409
  f'cached/current view_sn: {tv.tbl_md.view_sn}/{view_sn})'
1133
1410
  )
1134
- tv = self._load_tbl_version(tbl_id, None)
1411
+ tv = self._load_tbl_version(tbl_id, None, check_pending_ops=check_pending_ops)
1135
1412
  else:
1136
1413
  # the cached metadata is valid
1137
1414
  tv.is_validated = True
1138
1415
 
1139
- assert tv.is_validated
1416
+ assert tv.is_validated, f'{tbl_id}:{effective_version} not validated\n{tv.__dict__}\n{self._debug_str()}'
1417
+ if validate_initialized:
1418
+ assert tv.is_initialized, (
1419
+ f'{tbl_id}:{effective_version} not initialized\n{tv.__dict__}\n{self._debug_str()}'
1420
+ )
1140
1421
  return tv
1141
1422
 
1142
1423
  def remove_tbl_version(self, tbl_version: TableVersion) -> None:
@@ -1188,6 +1469,13 @@ class Catalog:
1188
1469
  from .view import View
1189
1470
 
1190
1471
  conn = Env.get().conn
1472
+
1473
+ # check for pending ops
1474
+ q = sql.select(sql.func.count()).where(schema.PendingTableOp.tbl_id == tbl_id)
1475
+ has_pending_ops = conn.execute(q).scalar() > 0
1476
+ if has_pending_ops:
1477
+ raise PendingTableOpsError(tbl_id)
1478
+
1191
1479
  q = (
1192
1480
  sql.select(schema.Table, schema.TableSchemaVersion)
1193
1481
  .join(schema.TableSchemaVersion)
@@ -1204,11 +1492,11 @@ class Catalog:
1204
1492
  row = conn.execute(q).one_or_none()
1205
1493
  if row is None:
1206
1494
  return None
1207
- tbl_record, schema_version_record = _unpack_row(row, [schema.Table, schema.TableSchemaVersion])
1495
+ tbl_record, _ = _unpack_row(row, [schema.Table, schema.TableSchemaVersion])
1208
1496
 
1209
1497
  tbl_md = schema.md_from_dict(schema.TableMd, tbl_record.md)
1210
1498
  view_md = tbl_md.view_md
1211
- if view_md is None:
1499
+ if view_md is None and not tbl_md.is_replica:
1212
1500
  # this is a base table
1213
1501
  if (tbl_id, None) not in self._tbl_versions:
1214
1502
  _ = self._load_tbl_version(tbl_id, None)
@@ -1218,15 +1506,16 @@ class Catalog:
1218
1506
 
1219
1507
  # this is a view; determine the sequence of TableVersions to load
1220
1508
  tbl_version_path: list[tuple[UUID, Optional[int]]] = []
1221
- schema_version_md = schema.md_from_dict(schema.TableSchemaVersionMd, schema_version_record.md)
1222
- pure_snapshot = view_md.is_snapshot and view_md.predicate is None and len(schema_version_md.columns) == 0
1223
- if pure_snapshot:
1509
+ if tbl_md.is_pure_snapshot:
1224
1510
  # this is a pure snapshot, without a physical table backing it; we only need the bases
1225
1511
  pass
1226
1512
  else:
1227
- effective_version = 0 if view_md.is_snapshot else None # snapshots only have version 0
1513
+ effective_version = (
1514
+ 0 if view_md is not None and view_md.is_snapshot else None
1515
+ ) # snapshots only have version 0
1228
1516
  tbl_version_path.append((tbl_id, effective_version))
1229
- tbl_version_path.extend((UUID(tbl_id), version) for tbl_id, version in view_md.base_versions)
1517
+ if view_md is not None:
1518
+ tbl_version_path.extend((UUID(tbl_id), version) for tbl_id, version in view_md.base_versions)
1230
1519
 
1231
1520
  # load TableVersions, starting at the root
1232
1521
  base_path: Optional[TableVersionPath] = None
@@ -1236,11 +1525,11 @@ class Catalog:
1236
1525
  _ = self._load_tbl_version(id, effective_version)
1237
1526
  view_path = TableVersionPath(TableVersionHandle(id, effective_version), base=base_path)
1238
1527
  base_path = view_path
1239
- view = View(tbl_id, tbl_record.dir_id, tbl_md.name, view_path, snapshot_only=pure_snapshot)
1528
+ view = View(tbl_id, tbl_record.dir_id, tbl_md.name, view_path, snapshot_only=tbl_md.is_pure_snapshot)
1240
1529
  self._tbls[tbl_id] = view
1241
1530
  return view
1242
1531
 
1243
- @_retry_loop(for_write=False)
1532
+ @retry_loop(for_write=False)
1244
1533
  def collect_tbl_history(self, tbl_id: UUID, n: Optional[int]) -> list[schema.FullTableMd]:
1245
1534
  """
1246
1535
  Returns the history of up to n versions of the table with the given UUID.
@@ -1350,6 +1639,7 @@ class Catalog:
1350
1639
  tbl_md: Optional[schema.TableMd],
1351
1640
  version_md: Optional[schema.TableVersionMd],
1352
1641
  schema_version_md: Optional[schema.TableSchemaVersionMd],
1642
+ pending_ops: Optional[list[TableOp]] = None,
1353
1643
  ) -> None:
1354
1644
  """
1355
1645
  Stores metadata to the DB.
@@ -1364,6 +1654,9 @@ class Catalog:
1364
1654
  If inserting `version_md` or `schema_version_md` would be a primary key violation, an exception will be raised.
1365
1655
  """
1366
1656
  assert self._in_write_xact
1657
+ assert version_md is None or version_md.created_at > 0.0
1658
+ assert pending_ops is None or len(pending_ops) > 0
1659
+ assert pending_ops is None or tbl_md is not None # if we write pending ops, we must also write new tbl_md
1367
1660
  session = Env.get().session
1368
1661
 
1369
1662
  # Construct and insert or update table record if requested.
@@ -1374,6 +1667,9 @@ class Catalog:
1374
1667
  assert tbl_md.current_schema_version == version_md.schema_version
1375
1668
  if schema_version_md is not None:
1376
1669
  assert tbl_md.current_schema_version == schema_version_md.schema_version
1670
+ if pending_ops is not None:
1671
+ tbl_md.has_pending_ops = True
1672
+
1377
1673
  if dir_id is not None:
1378
1674
  # We are inserting a record while creating a new table.
1379
1675
  tbl_record = schema.Table(id=tbl_id, dir_id=dir_id, md=dataclasses.asdict(tbl_md))
@@ -1404,25 +1700,32 @@ class Catalog:
1404
1700
  tbl_id=tbl_id, schema_version=schema_version_md.schema_version, md=dataclasses.asdict(schema_version_md)
1405
1701
  )
1406
1702
  session.add(schema_version_record)
1407
- session.flush() # Inform SQLAlchemy that we want to write these changes to the DB.
1408
1703
 
1409
- def update_tbl_version_md(self, version_md: Optional[schema.TableVersionMd]) -> None:
1410
- """
1411
- Update the TableVersion.md field in the DB. Typically used to update the cascade row count status.
1704
+ # make sure we don't have any pending ops
1705
+ assert session.query(schema.PendingTableOp).filter(schema.PendingTableOp.tbl_id == tbl_id).count() == 0
1412
1706
 
1413
- Args:
1414
- version_md: TableVersionMd
1415
- """
1707
+ if pending_ops is not None:
1708
+ for op in pending_ops:
1709
+ op_record = schema.PendingTableOp(tbl_id=tbl_id, op_sn=op.op_sn, op=dataclasses.asdict(op))
1710
+ session.add(op_record)
1711
+
1712
+ session.flush() # Inform SQLAlchemy that we want to write these changes to the DB.
1713
+
1714
+ def store_update_status(self, tbl_id: UUID, version: int, status: UpdateStatus) -> None:
1715
+ """Update the TableVersion.md.update_status field"""
1416
1716
  assert self._in_write_xact
1417
- session = Env.get().session
1717
+ conn = Env.get().conn
1418
1718
 
1419
- session.execute(
1420
- sql.update(schema.TableVersion.__table__)
1421
- .values({schema.TableVersion.md: dataclasses.asdict(version_md)})
1422
- .where(schema.TableVersion.tbl_id == version_md.tbl_id, schema.TableVersion.version == version_md.version)
1719
+ stmt = (
1720
+ sql.update(schema.TableVersion)
1721
+ .where(schema.TableVersion.tbl_id == tbl_id, schema.TableVersion.version == version)
1722
+ .values(
1723
+ md=schema.TableVersion.md.op('||')({'additional_md': {'update_status': dataclasses.asdict(status)}})
1724
+ )
1423
1725
  )
1424
1726
 
1425
- session.flush() # Inform SQLAlchemy that we want to write these changes to the DB.
1727
+ res = conn.execute(stmt)
1728
+ assert res.rowcount == 1, res.rowcount
1426
1729
 
1427
1730
  def delete_tbl_md(self, tbl_id: UUID) -> None:
1428
1731
  """
@@ -1431,6 +1734,7 @@ class Catalog:
1431
1734
  conn = Env.get().conn
1432
1735
  conn.execute(sql.delete(schema.TableSchemaVersion.__table__).where(schema.TableSchemaVersion.tbl_id == tbl_id))
1433
1736
  conn.execute(sql.delete(schema.TableVersion.__table__).where(schema.TableVersion.tbl_id == tbl_id))
1737
+ conn.execute(sql.delete(schema.PendingTableOp.__table__).where(schema.PendingTableOp.tbl_id == tbl_id))
1434
1738
  conn.execute(sql.delete(schema.Table.__table__).where(schema.Table.id == tbl_id))
1435
1739
 
1436
1740
  def load_replica_md(self, tbl: Table) -> list[schema.FullTableMd]:
@@ -1461,13 +1765,32 @@ class Catalog:
1461
1765
 
1462
1766
  return md
1463
1767
 
1464
- def _load_tbl_version(self, tbl_id: UUID, effective_version: Optional[int]) -> Optional[TableVersion]:
1768
+ def _load_tbl_version(
1769
+ self, tbl_id: UUID, effective_version: Optional[int], check_pending_ops: bool = True
1770
+ ) -> Optional[TableVersion]:
1465
1771
  """Creates TableVersion instance from stored metadata and registers it in _tbl_versions."""
1466
- tbl_md, _, schema_version_md = self.load_tbl_md(tbl_id, effective_version)
1772
+ tbl_md, version_md, schema_version_md = self.load_tbl_md(tbl_id, effective_version)
1467
1773
  view_md = tbl_md.view_md
1468
1774
 
1469
1775
  conn = Env.get().conn
1470
1776
 
1777
+ if check_pending_ops:
1778
+ pending_ops_q = (
1779
+ sql.select(sql.func.count())
1780
+ .select_from(schema.Table)
1781
+ .join(schema.PendingTableOp)
1782
+ .where(schema.PendingTableOp.tbl_id == tbl_id)
1783
+ .where(schema.Table.id == tbl_id)
1784
+ )
1785
+ if effective_version is not None:
1786
+ # we only care about pending ops if the requested version is the current version
1787
+ pending_ops_q = pending_ops_q.where(
1788
+ sql.text(f"({schema.Table.__table__}.md->>'current_version')::int = {effective_version}")
1789
+ )
1790
+ has_pending_ops = conn.execute(pending_ops_q).scalar() > 0
1791
+ if has_pending_ops:
1792
+ raise PendingTableOpsError(tbl_id)
1793
+
1471
1794
  # load mutable view ids for mutable TableVersions
1472
1795
  mutable_view_ids: list[UUID] = []
1473
1796
  # If this is a replica, effective_version should not be None. We see this today, because
@@ -1482,17 +1805,24 @@ class Catalog:
1482
1805
  )
1483
1806
  )
1484
1807
  mutable_view_ids = [r[0] for r in conn.execute(q).all()]
1808
+
1485
1809
  mutable_views = [TableVersionHandle(id, None) for id in mutable_view_ids]
1486
1810
 
1487
1811
  tbl_version: TableVersion
1488
1812
  if view_md is None:
1489
1813
  # this is a base table
1490
1814
  tbl_version = TableVersion(
1491
- tbl_id, tbl_md, effective_version, schema_version_md, mutable_views=mutable_views
1815
+ tbl_id, tbl_md, version_md, effective_version, schema_version_md, mutable_views=mutable_views
1492
1816
  )
1493
1817
  else:
1494
1818
  assert len(view_md.base_versions) > 0 # a view needs to have a base
1495
- pure_snapshot = view_md.is_snapshot and view_md.predicate is None and len(schema_version_md.columns) == 0
1819
+ # TODO: add TableVersionMd.is_pure_snapshot() and use that
1820
+ pure_snapshot = (
1821
+ view_md.is_snapshot
1822
+ and view_md.predicate is None
1823
+ and view_md.sample_clause is None
1824
+ and len(schema_version_md.columns) == 0
1825
+ )
1496
1826
  assert not pure_snapshot # a pure snapshot doesn't have a physical table backing it, no point in loading it
1497
1827
 
1498
1828
  base: TableVersionHandle
@@ -1506,6 +1836,7 @@ class Catalog:
1506
1836
  tbl_version = TableVersion(
1507
1837
  tbl_id,
1508
1838
  tbl_md,
1839
+ version_md,
1509
1840
  effective_version,
1510
1841
  schema_version_md,
1511
1842
  base_path=base_path,
@@ -1513,22 +1844,14 @@ class Catalog:
1513
1844
  mutable_views=mutable_views,
1514
1845
  )
1515
1846
 
1847
+ # register the instance before init()
1516
1848
  self._tbl_versions[tbl_id, effective_version] = tbl_version
1849
+ # register this instance as modified, so that it gets purged if the transaction fails, it may not be
1850
+ # fully initialized
1851
+ self._modified_tvs.add(tbl_version.handle)
1517
1852
  tbl_version.init()
1518
1853
  return tbl_version
1519
1854
 
1520
- def record_column_dependencies(self, tbl_version: TableVersion) -> None:
1521
- """Update self._column_dependencies. Only valid for non-snapshot versions."""
1522
- from pixeltable.exprs import Expr
1523
-
1524
- assert not tbl_version.is_snapshot
1525
- dependencies: dict[QColumnId, set[QColumnId]] = {}
1526
- for col in tbl_version.cols_by_id.values():
1527
- if col.value_expr_dict is None:
1528
- continue
1529
- dependencies[QColumnId(tbl_version.id, col.id)] = Expr.get_refd_column_ids(col.value_expr_dict)
1530
- self._column_dependencies[tbl_version.id] = dependencies
1531
-
1532
1855
  def _init_store(self) -> None:
1533
1856
  """One-time initialization of the stored catalog. Idempotent."""
1534
1857
  self.create_user(None)
@@ -1557,14 +1880,20 @@ class Catalog:
1557
1880
  obj, _, _ = self._prepare_dir_op(add_dir_path=path.parent, add_name=path.name)
1558
1881
 
1559
1882
  if if_exists == IfExistsParam.ERROR and obj is not None:
1560
- raise excs.Error(f'Path {str(path)!r} is an existing {type(obj)._display_name()}')
1883
+ raise excs.Error(f'Path {path!r} is an existing {obj._display_name()}')
1561
1884
  else:
1562
1885
  is_snapshot = isinstance(obj, View) and obj._tbl_version_path.is_snapshot()
1563
1886
  if obj is not None and (not isinstance(obj, expected_obj_type) or (expected_snapshot and not is_snapshot)):
1564
- obj_type_str = 'snapshot' if expected_snapshot else expected_obj_type._display_name()
1887
+ if expected_obj_type is Dir:
1888
+ obj_type_str = 'directory'
1889
+ elif expected_obj_type is InsertableTable:
1890
+ obj_type_str = 'table'
1891
+ elif expected_obj_type is View:
1892
+ obj_type_str = 'snapshot' if expected_snapshot else 'view'
1893
+ else:
1894
+ raise AssertionError()
1565
1895
  raise excs.Error(
1566
- f'Path {str(path)!r} already exists but is not a {obj_type_str}. '
1567
- f'Cannot {if_exists.name.lower()} it.'
1896
+ f'Path {path!r} already exists but is not a {obj_type_str}. Cannot {if_exists.name.lower()} it.'
1568
1897
  )
1569
1898
 
1570
1899
  if obj is None:
@@ -1577,7 +1906,7 @@ class Catalog:
1577
1906
  dir_contents = self._get_dir_contents(obj._id)
1578
1907
  if len(dir_contents) > 0 and if_exists == IfExistsParam.REPLACE:
1579
1908
  raise excs.Error(
1580
- f'Directory {str(path)!r} already exists and is not empty. '
1909
+ f'Directory {path!r} already exists and is not empty. '
1581
1910
  'Use `if_exists="replace_force"` to replace it.'
1582
1911
  )
1583
1912
  self._drop_dir(obj._id, path, force=True)