pixeltable 0.4.2__py3-none-any.whl → 0.4.4__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of pixeltable might be problematic. Click here for more details.

Files changed (60) hide show
  1. pixeltable/__init__.py +1 -0
  2. pixeltable/__version__.py +2 -2
  3. pixeltable/catalog/__init__.py +3 -11
  4. pixeltable/catalog/catalog.py +575 -220
  5. pixeltable/catalog/column.py +22 -23
  6. pixeltable/catalog/dir.py +1 -2
  7. pixeltable/catalog/globals.py +2 -148
  8. pixeltable/catalog/insertable_table.py +15 -13
  9. pixeltable/catalog/path.py +6 -0
  10. pixeltable/catalog/schema_object.py +9 -4
  11. pixeltable/catalog/table.py +96 -85
  12. pixeltable/catalog/table_version.py +257 -174
  13. pixeltable/catalog/table_version_path.py +1 -1
  14. pixeltable/catalog/tbl_ops.py +44 -0
  15. pixeltable/catalog/update_status.py +179 -0
  16. pixeltable/catalog/view.py +50 -56
  17. pixeltable/config.py +76 -12
  18. pixeltable/dataframe.py +19 -6
  19. pixeltable/env.py +50 -4
  20. pixeltable/exec/data_row_batch.py +3 -1
  21. pixeltable/exec/exec_node.py +7 -24
  22. pixeltable/exec/expr_eval/schedulers.py +134 -7
  23. pixeltable/exec/in_memory_data_node.py +6 -7
  24. pixeltable/exprs/column_property_ref.py +21 -9
  25. pixeltable/exprs/column_ref.py +7 -2
  26. pixeltable/exprs/function_call.py +2 -2
  27. pixeltable/exprs/row_builder.py +10 -9
  28. pixeltable/exprs/rowid_ref.py +0 -4
  29. pixeltable/func/function.py +3 -3
  30. pixeltable/functions/audio.py +36 -9
  31. pixeltable/functions/gemini.py +4 -4
  32. pixeltable/functions/openai.py +1 -2
  33. pixeltable/functions/video.py +59 -16
  34. pixeltable/globals.py +109 -24
  35. pixeltable/io/__init__.py +1 -1
  36. pixeltable/io/datarows.py +2 -1
  37. pixeltable/io/external_store.py +3 -55
  38. pixeltable/io/globals.py +4 -4
  39. pixeltable/io/hf_datasets.py +10 -2
  40. pixeltable/io/label_studio.py +16 -16
  41. pixeltable/io/pandas.py +1 -0
  42. pixeltable/io/table_data_conduit.py +12 -13
  43. pixeltable/iterators/audio.py +17 -8
  44. pixeltable/iterators/image.py +5 -2
  45. pixeltable/metadata/__init__.py +1 -1
  46. pixeltable/metadata/converters/convert_39.py +125 -0
  47. pixeltable/metadata/converters/util.py +3 -0
  48. pixeltable/metadata/notes.py +1 -0
  49. pixeltable/metadata/schema.py +50 -1
  50. pixeltable/plan.py +4 -0
  51. pixeltable/share/packager.py +20 -38
  52. pixeltable/store.py +40 -51
  53. pixeltable/type_system.py +2 -2
  54. pixeltable/utils/coroutine.py +6 -23
  55. pixeltable/utils/media_store.py +50 -0
  56. {pixeltable-0.4.2.dist-info → pixeltable-0.4.4.dist-info}/METADATA +1 -1
  57. {pixeltable-0.4.2.dist-info → pixeltable-0.4.4.dist-info}/RECORD +60 -57
  58. {pixeltable-0.4.2.dist-info → pixeltable-0.4.4.dist-info}/LICENSE +0 -0
  59. {pixeltable-0.4.2.dist-info → pixeltable-0.4.4.dist-info}/WHEEL +0 -0
  60. {pixeltable-0.4.2.dist-info → pixeltable-0.4.4.dist-info}/entry_points.txt +0 -0
@@ -14,6 +14,8 @@ import psycopg
14
14
  import sqlalchemy as sql
15
15
 
16
16
  from pixeltable import exceptions as excs
17
+
18
+ # from pixeltable import exceptions as excs, UpdateStatus
17
19
  from pixeltable.env import Env
18
20
  from pixeltable.iterators import ComponentIterator
19
21
  from pixeltable.metadata import schema
@@ -28,6 +30,8 @@ from .table import Table
28
30
  from .table_version import TableVersion
29
31
  from .table_version_handle import TableVersionHandle
30
32
  from .table_version_path import TableVersionPath
33
+ from .tbl_ops import TableOp
34
+ from .update_status import UpdateStatus
31
35
  from .view import View
32
36
 
33
37
  if TYPE_CHECKING:
@@ -70,18 +74,35 @@ _MAX_RETRIES = -1
70
74
  T = TypeVar('T')
71
75
 
72
76
 
73
- def _retry_loop(*, for_write: bool) -> Callable[[Callable[..., T]], Callable[..., T]]:
77
+ def retry_loop(
78
+ *, tbl: Optional[TableVersionPath] = None, for_write: bool, lock_mutable_tree: bool = False
79
+ ) -> Callable[[Callable[..., T]], Callable[..., T]]:
74
80
  def decorator(op: Callable[..., T]) -> Callable[..., T]:
75
81
  @functools.wraps(op)
76
82
  def loop(*args: Any, **kwargs: Any) -> T:
83
+ cat = Catalog.get()
84
+ # retry_loop() is reentrant
85
+ if cat._in_retry_loop:
86
+ return op(*args, **kwargs)
87
+
77
88
  num_retries = 0
78
89
  while True:
90
+ cat._in_retry_loop = True
79
91
  try:
80
92
  # in order for retry to work, we need to make sure that there aren't any prior db updates
81
93
  # that are part of an ongoing transaction
82
94
  assert not Env.get().in_xact
83
- with Catalog.get().begin_xact(for_write=for_write, convert_db_excs=False):
95
+ with Catalog.get().begin_xact(
96
+ tbl=tbl,
97
+ for_write=for_write,
98
+ convert_db_excs=False,
99
+ lock_mutable_tree=lock_mutable_tree,
100
+ finalize_pending_ops=True,
101
+ ):
84
102
  return op(*args, **kwargs)
103
+ except PendingTableOpsError as e:
104
+ Env.get().console_logger.debug(f'retry_loop(): finalizing pending ops for {e.tbl_id}')
105
+ Catalog.get()._finalize_pending_ops(e.tbl_id)
85
106
  except sql.exc.DBAPIError as e:
86
107
  # TODO: what other exceptions should we be looking for?
87
108
  if isinstance(e.orig, (psycopg.errors.SerializationFailure, psycopg.errors.LockNotAvailable)):
@@ -97,16 +118,31 @@ def _retry_loop(*, for_write: bool) -> Callable[[Callable[..., T]], Callable[...
97
118
  # for informational/debugging purposes
98
119
  _logger.debug(f'retry_loop(): passing along {e}')
99
120
  raise
121
+ finally:
122
+ cat._in_retry_loop = False
100
123
 
101
124
  return loop
102
125
 
103
126
  return decorator
104
127
 
105
128
 
129
+ class PendingTableOpsError(Exception):
130
+ tbl_id: UUID
131
+
132
+ def __init__(self, tbl_id: UUID) -> None:
133
+ self.tbl_id = tbl_id
134
+
135
+
106
136
  class Catalog:
107
137
  """The functional interface to getting access to catalog objects
108
138
 
109
- All interface functions must be called in the context of a transaction, started with Catalog.begin_xact().
139
+ All interface functions must be called in the context of a transaction, started with Catalog.begin_xact() or
140
+ via retry_loop().
141
+
142
+ When calling functions that involve Table or TableVersion instances, the catalog needs to get a chance to finalize
143
+ pending ops against those tables. To that end,
144
+ - use begin_xact(tbl) or begin_xact(tbl_id) if only accessing a single table
145
+ - use retry_loop() when accessing multiple tables (eg, pxt.ls())
110
146
 
111
147
  Caching and invalidation of metadata:
112
148
  - Catalog caches TableVersion instances in order to avoid excessive metadata loading
@@ -132,6 +168,8 @@ class Catalog:
132
168
  _tbls: dict[UUID, Table]
133
169
  _in_write_xact: bool # True if we're in a write transaction
134
170
  _x_locked_tbl_ids: set[UUID] # non-empty for write transactions
171
+ _modified_tvs: set[TableVersionHandle] # TableVersion instances modified in the current transaction
172
+ _in_retry_loop: bool
135
173
 
136
174
  # cached column dependencies
137
175
  # - key: table id, value: mapping from column id to its dependencies
@@ -164,6 +202,8 @@ class Catalog:
164
202
  self._tbls = {} # don't use a defaultdict here, it doesn't cooperate with the debugger
165
203
  self._in_write_xact = False
166
204
  self._x_locked_tbl_ids = set()
205
+ self._modified_tvs = set()
206
+ self._in_retry_loop = False
167
207
  self._column_dependencies = {}
168
208
  self._column_dependents = None
169
209
  self._init_store()
@@ -214,9 +254,11 @@ class Catalog:
214
254
  self,
215
255
  *,
216
256
  tbl: Optional[TableVersionPath] = None,
257
+ tbl_id: Optional[UUID] = None,
217
258
  for_write: bool = False,
218
259
  lock_mutable_tree: bool = False,
219
260
  convert_db_excs: bool = True,
261
+ finalize_pending_ops: bool = True,
220
262
  ) -> Iterator[sql.Connection]:
221
263
  """
222
264
  Return a context manager that yields a connection to the database. Idempotent.
@@ -227,7 +269,7 @@ class Catalog:
227
269
  If tbl != None, follows this locking protocol:
228
270
  - validates/reloads the TableVersion instances of tbl's ancestors (in the hope that this reduces potential
229
271
  SerializationErrors later on)
230
- - if for_write == True, x-locks Table record (by updating Table.lock_dummy; see _acquire_tbl_xlock())
272
+ - if for_write == True, x-locks Table record (by updating Table.lock_dummy; see _acquire_tbl_lock())
231
273
  - if for_write == False, validates TableVersion instance
232
274
  - if lock_mutable_tree == True, also x-locks all mutable views of the table
233
275
  - this needs to be done in a retry loop, because Postgres can decide to abort the transaction
@@ -237,10 +279,14 @@ class Catalog:
237
279
 
238
280
  If convert_db_excs == True, converts DBAPIErrors into excs.Errors.
239
281
  """
282
+ assert tbl is None or tbl_id is None # at most one can be specified
240
283
  if Env.get().in_xact:
241
- if tbl is not None and for_write:
242
- # make sure that we requested the required table lock at the beginning of the transaction
243
- assert tbl.tbl_id in self._x_locked_tbl_ids, f'{tbl.tbl_id} not in {self._x_locked_tbl_ids}'
284
+ # make sure that we requested the required table lock at the beginning of the transaction
285
+ if for_write:
286
+ if tbl is not None:
287
+ assert tbl.tbl_id in self._x_locked_tbl_ids, f'{tbl.tbl_id} not in {self._x_locked_tbl_ids}'
288
+ elif tbl_id is not None:
289
+ assert tbl_id in self._x_locked_tbl_ids, f'{tbl_id} not in {self._x_locked_tbl_ids}'
244
290
  yield Env.get().conn
245
291
  return
246
292
 
@@ -252,33 +298,66 @@ class Catalog:
252
298
  # )
253
299
  # _logger.debug(f'begin_xact(): {tv_msg}')
254
300
  num_retries = 0
301
+ pending_ops_tbl_id: Optional[UUID] = None
302
+ has_exc = False # True if we exited the 'with ...begin_xact()' block with an exception
255
303
  while True:
304
+ if pending_ops_tbl_id is not None:
305
+ Env.get().console_logger.debug(f'begin_xact(): finalizing pending ops for {pending_ops_tbl_id}')
306
+ self._finalize_pending_ops(pending_ops_tbl_id)
307
+ pending_ops_tbl_id = None
308
+
256
309
  try:
257
- self._in_write_xact = False
310
+ self._in_write_xact = for_write
258
311
  self._x_locked_tbl_ids = set()
312
+ self._modified_tvs = set()
259
313
  self._column_dependents = None
314
+ has_exc = False
260
315
 
261
- with Env.get().begin_xact() as conn:
262
- if tbl is not None:
316
+ with Env.get().begin_xact(for_write=for_write) as conn:
317
+ if tbl is not None or tbl_id is not None:
263
318
  try:
264
- if not self._acquire_path_locks(
265
- tbl=tbl, for_write=for_write, lock_mutable_tree=lock_mutable_tree
266
- ):
267
- # this is a snapshot
268
- yield conn
269
- return
270
-
271
- if for_write:
272
- if lock_mutable_tree:
273
- self._x_locked_tbl_ids = self._get_mutable_tree(tbl.tbl_id)
319
+ target: Optional[TableVersionHandle] = None
320
+ if tbl is not None:
321
+ if self._acquire_path_locks(
322
+ tbl=tbl,
323
+ for_write=for_write,
324
+ lock_mutable_tree=lock_mutable_tree,
325
+ check_pending_ops=finalize_pending_ops,
326
+ ):
327
+ target = tbl.tbl_version
328
+ else:
329
+ target = self._acquire_tbl_lock(
330
+ tbl_id=tbl_id,
331
+ for_write=for_write,
332
+ lock_mutable_tree=lock_mutable_tree,
333
+ raise_if_not_exists=True,
334
+ check_pending_ops=finalize_pending_ops,
335
+ )
336
+
337
+ if target is None:
338
+ # didn't get the write lock
339
+ for_write = False
340
+ elif for_write:
341
+ # we know at this point that target is mutable because we got the X-lock
342
+ if lock_mutable_tree and not target.is_snapshot:
343
+ self._x_locked_tbl_ids = self._get_mutable_tree(target.id)
274
344
  self._compute_column_dependents(self._x_locked_tbl_ids)
275
345
  else:
276
- self._x_locked_tbl_ids = {tbl.tbl_id}
346
+ self._x_locked_tbl_ids = {target.id}
277
347
  if _logger.isEnabledFor(logging.DEBUG):
278
348
  # validate only when we don't see errors
279
349
  self.validate()
280
350
 
351
+ except PendingTableOpsError as e:
352
+ has_exc = True
353
+ if finalize_pending_ops:
354
+ # we remember which table id to finalize
355
+ pending_ops_tbl_id = e.tbl_id
356
+ # raise to abort the transaction
357
+ raise
358
+
281
359
  except sql.exc.DBAPIError as e:
360
+ has_exc = True
282
361
  if isinstance(
283
362
  e.orig, (psycopg.errors.SerializationFailure, psycopg.errors.LockNotAvailable)
284
363
  ) and (num_retries < _MAX_RETRIES or _MAX_RETRIES == -1):
@@ -289,11 +368,20 @@ class Catalog:
289
368
  else:
290
369
  raise
291
370
 
292
- self._in_write_xact = for_write
293
371
  yield conn
294
372
  return
295
373
 
374
+ except PendingTableOpsError:
375
+ has_exc = True
376
+ if pending_ops_tbl_id is not None:
377
+ # the next iteration of the loop will deal with pending ops for this table id
378
+ continue
379
+ else:
380
+ # we got this exception after getting the initial table locks and therefore need to abort
381
+ raise
382
+
296
383
  except sql.exc.DBAPIError as e:
384
+ has_exc = True
297
385
  # we got some db error during the actual operation (not just while trying to get locks on the metadata
298
386
  # records): we convert these into Errors, if asked to do so, and abort
299
387
  # TODO: what other concurrency-related exceptions should we expect?
@@ -301,12 +389,19 @@ class Catalog:
301
389
  # we always convert UndefinedTable exceptions (they can't be retried)
302
390
  if isinstance(e.orig, psycopg.errors.UndefinedTable):
303
391
  # the table got dropped in the middle of the table operation
304
- _logger.debug(f'Exception: undefined table ({tbl.tbl_name()}): Caught {type(e.orig)}: {e!r}')
392
+ tbl_name = tbl.tbl_name() if tbl is not None else str(tbl_id) if tbl_id is not None else '?'
393
+ _logger.debug(f'Exception: undefined table ({tbl_name}): Caught {type(e.orig)}: {e!r}')
305
394
  assert tbl is not None
306
- raise excs.Error(f'Table was dropped: {tbl.tbl_name()}') from None
395
+ raise excs.Error(f'Table was dropped: {tbl_name}') from None
307
396
  elif isinstance(e.orig, psycopg.errors.SerializationFailure) and convert_db_excs:
308
397
  # we still got a serialization error, despite getting x-locks at the beginning
309
- msg = f'{tbl.tbl_name()} ({tbl.tbl_id})' if tbl is not None else ''
398
+ msg: str
399
+ if tbl is not None:
400
+ msg = f'{tbl.tbl_name()} ({tbl.tbl_id})'
401
+ elif tbl_id is not None:
402
+ msg = f'{tbl_id}'
403
+ else:
404
+ msg = ''
310
405
  _logger.debug(f'Exception: serialization failure: {msg} ({e})')
311
406
  raise excs.Error(
312
407
  'That Pixeltable operation could not be completed because it conflicted with another '
@@ -316,6 +411,10 @@ class Catalog:
316
411
  else:
317
412
  raise
318
413
 
414
+ except:
415
+ has_exc = True
416
+ raise
417
+
319
418
  finally:
320
419
  self._in_write_xact = False
321
420
  self._x_locked_tbl_ids = set()
@@ -327,12 +426,24 @@ class Catalog:
327
426
  _logger.debug(f'invalidating table version {tv.id}:None (tv={id(tv):x})')
328
427
  tv.is_validated = False
329
428
 
429
+ if has_exc:
430
+ # purge all modified TableVersion instances, we can't guarantee they are still consistent with the
431
+ # stored metadata
432
+ for handle in self._modified_tvs:
433
+ self._clear_tv_cache(handle.id, handle.effective_version)
434
+ self._modified_tvs = set()
435
+
330
436
  @property
331
437
  def in_write_xact(self) -> bool:
332
438
  return self._in_write_xact
333
439
 
334
440
  def _acquire_path_locks(
335
- self, *, tbl: TableVersionPath, for_write: bool = False, lock_mutable_tree: bool = False
441
+ self,
442
+ *,
443
+ tbl: TableVersionPath,
444
+ for_write: bool = False,
445
+ lock_mutable_tree: bool = False,
446
+ check_pending_ops: Optional[bool] = None,
336
447
  ) -> bool:
337
448
  """
338
449
  Path locking protocol:
@@ -341,33 +452,49 @@ class Catalog:
341
452
  - refresh cached TableVersion of tbl or get X-lock, depending on for_write
342
453
  - if lock_mutable_tree, also X-lock all mutable views of tbl
343
454
 
344
- Returns False if trying to lock a pure snapshot with for_write == True
345
455
  Raises Error if tbl doesn't exist.
456
+ Return False if the lock couldn't be acquired (X-lock on a non-mutable table), True otherwise.
346
457
  """
347
- start_idx = 1 if for_write else 0
348
- for handle in tbl.get_tbl_versions()[start_idx::-1]:
349
- _ = self.get_tbl_version(handle.id, handle.effective_version)
458
+ path_handles = tbl.get_tbl_versions()
459
+ read_handles = path_handles[:0:-1] if for_write else path_handles[::-1]
460
+ for handle in read_handles:
461
+ # update cache
462
+ _ = self.get_tbl_version(handle.id, handle.effective_version, validate_initialized=True)
350
463
  if not for_write:
351
464
  return True # nothing left to lock
352
- return self._acquire_tbl_xlock(tbl_id=tbl.tbl_id, lock_mutable_tree=lock_mutable_tree, raise_if_not_exists=True)
465
+ handle = self._acquire_tbl_lock(
466
+ tbl_id=tbl.tbl_id,
467
+ for_write=True,
468
+ lock_mutable_tree=lock_mutable_tree,
469
+ raise_if_not_exists=True,
470
+ check_pending_ops=check_pending_ops,
471
+ )
472
+ # update cache
473
+ _ = self.get_tbl_version(path_handles[0].id, path_handles[0].effective_version, validate_initialized=True)
474
+ return handle is not None
353
475
 
354
- def _acquire_tbl_xlock(
476
+ def _acquire_tbl_lock(
355
477
  self,
356
478
  *,
479
+ for_write: bool,
357
480
  tbl_id: Optional[UUID] = None,
358
481
  dir_id: Optional[UUID] = None,
359
482
  tbl_name: Optional[str] = None,
360
483
  lock_mutable_tree: bool = False,
361
- raise_if_not_exists: bool = False,
362
- ) -> bool:
363
- """Force acquisition of an X-lock on a Table record via a blind update
484
+ raise_if_not_exists: bool = True,
485
+ check_pending_ops: Optional[bool] = None,
486
+ ) -> Optional[TableVersionHandle]:
487
+ """
488
+ For writes: force acquisition of an X-lock on a Table record via a blind update.
364
489
 
365
490
  Either tbl_id or dir_id/tbl_name need to be specified.
366
491
  Returns True if the table was locked, False if it was a snapshot or not found.
367
492
  If lock_mutable_tree, recursively locks all mutable views of the table.
368
493
 
369
- Returns False if the table is a snapshot or not found and !raise_if_not_exists.
494
+ Returns a handle to what was locked, None if the lock couldn't be acquired (eg, X-lock on a non-mutable table).
370
495
  """
496
+ assert (tbl_id is not None) != (dir_id is not None and tbl_name is not None)
497
+ assert (dir_id is None) == (tbl_name is None)
371
498
  where_clause: sql.ColumnElement
372
499
  if tbl_id is not None:
373
500
  where_clause = schema.Table.id == tbl_id
@@ -378,26 +505,130 @@ class Catalog:
378
505
  where_clause = sql.and_(where_clause, schema.Table.md['user'].astext == Env.get().user)
379
506
 
380
507
  conn = Env.get().conn
381
- row = conn.execute(sql.select(schema.Table).where(where_clause).with_for_update(nowait=True)).one_or_none()
508
+ q = sql.select(schema.Table).where(where_clause)
509
+ if for_write:
510
+ q = q.with_for_update(nowait=True)
511
+ row = conn.execute(q).one_or_none()
382
512
  if row is None:
383
513
  if raise_if_not_exists:
384
514
  raise excs.Error(self._dropped_tbl_error_msg(tbl_id))
385
- return False # nothing to lock
386
- if row.md['view_md'] is not None and row.md['view_md']['is_snapshot']:
387
- return False # nothing to lock
388
- conn.execute(sql.update(schema.Table).values(lock_dummy=1).where(where_clause))
389
-
390
- if not lock_mutable_tree:
391
- return True
392
- # also lock mutable views
393
- tv = self.get_tbl_version(tbl_id, None)
394
- for view in tv.mutable_views:
395
- self._acquire_tbl_xlock(tbl_id=view.id, lock_mutable_tree=True, raise_if_not_exists=raise_if_not_exists)
396
- return True
515
+ return None # nothing to lock
516
+ tbl_md = schema.md_from_dict(schema.TableMd, row.md)
517
+ if for_write and tbl_md.is_mutable:
518
+ conn.execute(sql.update(schema.Table).values(lock_dummy=1).where(where_clause))
519
+
520
+ if check_pending_ops:
521
+ # check for pending ops after getting table lock
522
+ pending_ops_q = sql.select(sql.func.count()).where(schema.PendingTableOp.tbl_id == row.id)
523
+ has_pending_ops = conn.execute(pending_ops_q).scalar() > 0
524
+ if has_pending_ops:
525
+ raise PendingTableOpsError(row.id)
526
+
527
+ if for_write and not tbl_md.is_mutable:
528
+ return None # nothing to lock
529
+
530
+ effective_version = tbl_md.current_version if tbl_md.is_snapshot else None
531
+ if tbl_md.is_mutable and lock_mutable_tree:
532
+ # also lock mutable views
533
+ tv = self.get_tbl_version(tbl_id, effective_version, validate_initialized=True)
534
+ for view in tv.mutable_views:
535
+ self._acquire_tbl_lock(
536
+ for_write=for_write,
537
+ tbl_id=view.id,
538
+ lock_mutable_tree=lock_mutable_tree,
539
+ raise_if_not_exists=raise_if_not_exists,
540
+ check_pending_ops=check_pending_ops,
541
+ )
542
+ return TableVersionHandle(tbl_id, effective_version)
543
+
544
+ def _finalize_pending_ops(self, tbl_id: UUID) -> None:
545
+ """Finalizes all pending ops for the given table."""
546
+ num_retries = 0
547
+ while True:
548
+ try:
549
+ tbl_version: int
550
+ op: Optional[TableOp] = None
551
+ delete_next_op_stmt: sql.Delete
552
+ reset_has_pending_stmt: sql.Update
553
+ with self.begin_xact(
554
+ tbl_id=tbl_id, for_write=True, convert_db_excs=False, finalize_pending_ops=False
555
+ ) as conn:
556
+ q = (
557
+ sql.select(schema.Table.md, schema.PendingTableOp)
558
+ .select_from(schema.Table)
559
+ .join(schema.PendingTableOp)
560
+ .where(schema.Table.id == tbl_id)
561
+ .where(schema.PendingTableOp.tbl_id == tbl_id)
562
+ .order_by(schema.PendingTableOp.op_sn)
563
+ .limit(1)
564
+ .with_for_update()
565
+ )
566
+ row = conn.execute(q).one_or_none()
567
+ if row is None:
568
+ return
569
+ tbl_version = row.md.get('current_version')
570
+ op = schema.md_from_dict(TableOp, row.op)
571
+ delete_next_op_stmt = sql.delete(schema.PendingTableOp).where(
572
+ schema.PendingTableOp.tbl_id == tbl_id, schema.PendingTableOp.op_sn == row.op_sn
573
+ )
574
+ reset_has_pending_stmt = (
575
+ sql.update(schema.Table)
576
+ .where(schema.Table.id == tbl_id)
577
+ .values(md=schema.Table.md.op('||')({'has_pending_ops': False}))
578
+ )
579
+
580
+ if op.needs_xact:
581
+ tv = self.get_tbl_version(
582
+ tbl_id, tbl_version, check_pending_ops=False, validate_initialized=True
583
+ )
584
+ tv.exec_op(op)
585
+ conn.execute(delete_next_op_stmt)
586
+ if op.op_sn == op.num_ops - 1:
587
+ conn.execute(reset_has_pending_stmt)
588
+ continue
589
+
590
+ # this op runs outside of a transaction
591
+ tv = self.get_tbl_version(tbl_id, tbl_version, check_pending_ops=False, validate_initialized=True)
592
+ tv.exec_op(op)
593
+ with self.begin_xact(
594
+ tbl_id=tbl_id, for_write=True, convert_db_excs=False, finalize_pending_ops=False
595
+ ) as conn:
596
+ conn.execute(delete_next_op_stmt)
597
+ if op.op_sn == op.num_ops - 1:
598
+ conn.execute(reset_has_pending_stmt)
599
+
600
+ except (sql.exc.DBAPIError, sql.exc.OperationalError) as e:
601
+ # TODO: why are we still seeing these here, instead of them getting taken care of by the retry
602
+ # logic of begin_xact()?
603
+ if isinstance(e.orig, (psycopg.errors.SerializationFailure, psycopg.errors.LockNotAvailable)):
604
+ num_retries += 1
605
+ log_msg: str
606
+ if op is not None:
607
+ log_msg = f'finalize_pending_ops(): retrying ({num_retries}) op {op!s} after {type(e.orig)}'
608
+ else:
609
+ log_msg = f'finalize_pending_ops(): retrying ({num_retries}) after {type(e.orig)}'
610
+ Env.get().console_logger.debug(log_msg)
611
+ time.sleep(random.uniform(0.1, 0.5))
612
+ continue
613
+ else:
614
+ raise
615
+ except Exception as e:
616
+ Env.get().console_logger.debug(f'finalize_pending_ops(): caught {e}')
617
+ raise
618
+
619
+ num_retries = 0
620
+
621
+ def _debug_str(self) -> str:
622
+ tv_str = '\n'.join(str(k) for k in self._tbl_versions)
623
+ tbl_str = '\n'.join(str(k) for k in self._tbls)
624
+ return f'tbl_versions:\n{tv_str}\ntbls:\n{tbl_str}'
397
625
 
398
626
  def _get_mutable_tree(self, tbl_id: UUID) -> set[UUID]:
399
627
  """Returns ids of all tables that form the tree of mutable views starting at tbl_id; includes the root."""
400
- tv = self.get_tbl_version(tbl_id, None)
628
+ assert (tbl_id, None) in self._tbl_versions, (
629
+ f'({tbl_id}, None) not in {self._tbl_versions.keys()}\n{self._debug_str()}'
630
+ )
631
+ tv = self.get_tbl_version(tbl_id, None, validate_initialized=True)
401
632
  result: set[UUID] = {tv.id}
402
633
  for view in tv.mutable_views:
403
634
  result.update(self._get_mutable_tree(view.id))
@@ -408,7 +639,9 @@ class Catalog:
408
639
  assert self._column_dependents is None
409
640
  self._column_dependents = defaultdict(set)
410
641
  for tbl_id in mutable_tree:
411
- assert tbl_id in self._column_dependencies
642
+ assert tbl_id in self._column_dependencies, (
643
+ f'{tbl_id} not in {self._column_dependencies.keys()}\n{self._debug_str()}'
644
+ )
412
645
  for col, dependencies in self._column_dependencies[tbl_id].items():
413
646
  for dependency in dependencies:
414
647
  if dependency.tbl_id not in mutable_tree:
@@ -416,13 +649,25 @@ class Catalog:
416
649
  dependents = self._column_dependents[dependency]
417
650
  dependents.add(col)
418
651
 
652
+ def record_column_dependencies(self, tbl_version: TableVersion) -> None:
653
+ """Update self._column_dependencies. Only valid for mutable versions."""
654
+ from pixeltable.exprs import Expr
655
+
656
+ assert tbl_version.is_mutable
657
+ dependencies: dict[QColumnId, set[QColumnId]] = {}
658
+ for col in tbl_version.cols_by_id.values():
659
+ if col.value_expr_dict is None:
660
+ continue
661
+ dependencies[QColumnId(tbl_version.id, col.id)] = Expr.get_refd_column_ids(col.value_expr_dict)
662
+ self._column_dependencies[tbl_version.id] = dependencies
663
+
419
664
  def get_column_dependents(self, tbl_id: UUID, col_id: int) -> set[Column]:
420
665
  """Return all Columns that transitively depend on the given column."""
421
666
  assert self._column_dependents is not None
422
667
  dependents = self._column_dependents[QColumnId(tbl_id, col_id)]
423
668
  result: set[Column] = set()
424
669
  for dependent in dependents:
425
- tv = self.get_tbl_version(dependent.tbl_id, None)
670
+ tv = self.get_tbl_version(dependent.tbl_id, None, validate_initialized=True)
426
671
  col = tv.cols_by_id[dependent.col_id]
427
672
  result.add(col)
428
673
  return result
@@ -471,7 +716,7 @@ class Catalog:
471
716
  dir_entries: dict[str, Catalog.DirEntry]
472
717
  table: Optional[schema.Table]
473
718
 
474
- @_retry_loop(for_write=False)
719
+ @retry_loop(for_write=False)
475
720
  def get_dir_contents(self, dir_path: Path, recursive: bool = False) -> dict[str, DirEntry]:
476
721
  dir = self._get_schema_object(dir_path, expected=Dir, raise_if_not_exists=True)
477
722
  return self._get_dir_contents(dir._id, recursive=recursive)
@@ -498,7 +743,7 @@ class Catalog:
498
743
 
499
744
  return result
500
745
 
501
- @_retry_loop(for_write=True)
746
+ @retry_loop(for_write=True)
502
747
  def move(self, path: Path, new_path: Path) -> None:
503
748
  self._move(path, new_path)
504
749
 
@@ -540,6 +785,7 @@ class Catalog:
540
785
  - if both add and drop (= two directories are involved), lock the directories in a pre-determined order
541
786
  (in this case, by name) in order to prevent deadlocks between concurrent directory modifications
542
787
  """
788
+ assert drop_expected in (None, Table, Dir), drop_expected
543
789
  assert (add_dir_path is None) == (add_name is None)
544
790
  assert (drop_dir_path is None) == (drop_name is None)
545
791
  dir_paths: set[Path] = set()
@@ -553,7 +799,7 @@ class Catalog:
553
799
  for p in sorted(dir_paths):
554
800
  dir = self._get_dir(p, lock_dir=True)
555
801
  if dir is None:
556
- raise excs.Error(f'Directory {str(p)!r} does not exist.')
802
+ raise excs.Error(f'Directory {p!r} does not exist.')
557
803
  if p == add_dir_path:
558
804
  add_dir = dir
559
805
  if p == drop_dir_path:
@@ -564,19 +810,17 @@ class Catalog:
564
810
  add_obj = self._get_dir_entry(add_dir.id, add_name, lock_entry=True)
565
811
  if add_obj is not None and raise_if_exists:
566
812
  add_path = add_dir_path.append(add_name)
567
- raise excs.Error(f'Path {str(add_path)!r} already exists.')
813
+ raise excs.Error(f'Path {add_path!r} already exists.')
568
814
 
569
815
  drop_obj: Optional[SchemaObject] = None
570
816
  if drop_dir is not None:
571
817
  drop_path = drop_dir_path.append(drop_name)
572
818
  drop_obj = self._get_dir_entry(drop_dir.id, drop_name, lock_entry=True)
573
819
  if drop_obj is None and raise_if_not_exists:
574
- raise excs.Error(f'Path {str(drop_path)!r} does not exist.')
820
+ raise excs.Error(f'Path {drop_path!r} does not exist.')
575
821
  if drop_obj is not None and drop_expected is not None and not isinstance(drop_obj, drop_expected):
576
- raise excs.Error(
577
- f'{str(drop_path)!r} needs to be a {drop_expected._display_name()} '
578
- f'but is a {type(drop_obj)._display_name()}'
579
- )
822
+ expected_name = 'table' if drop_expected is Table else 'directory'
823
+ raise excs.Error(f'{drop_path!r} needs to be a {expected_name} but is a {drop_obj._display_name()}')
580
824
 
581
825
  add_dir_obj = Dir(add_dir.id, add_dir.parent_id, add_dir.md['name']) if add_dir is not None else None
582
826
  return add_obj, add_dir_obj, drop_obj
@@ -602,7 +846,7 @@ class Catalog:
602
846
 
603
847
  # check for table
604
848
  if lock_entry:
605
- self._acquire_tbl_xlock(dir_id=dir_id, tbl_name=name)
849
+ self._acquire_tbl_lock(for_write=True, dir_id=dir_id, raise_if_not_exists=False, tbl_name=name)
606
850
  q = sql.select(schema.Table.id).where(
607
851
  schema.Table.dir_id == dir_id,
608
852
  schema.Table.md['name'].astext == name,
@@ -633,12 +877,12 @@ class Catalog:
633
877
  - raise_if_not_exists is True and the path does not exist
634
878
  - expected is not None and the existing object has a different type
635
879
  """
880
+ assert expected in (None, Table, Dir), expected
881
+
636
882
  if path.is_root:
637
883
  # the root dir
638
884
  if expected is not None and expected is not Dir:
639
- raise excs.Error(
640
- f'{str(path)!r} needs to be a {expected._display_name()} but is a {Dir._display_name()}'
641
- )
885
+ raise excs.Error(f'{path!r} needs to be a table but is a dir')
642
886
  dir = self._get_dir(path, lock_dir=lock_obj)
643
887
  if dir is None:
644
888
  raise excs.Error(f'Unknown user: {Env.get().user}')
@@ -647,33 +891,32 @@ class Catalog:
647
891
  parent_path = path.parent
648
892
  parent_dir = self._get_dir(parent_path, lock_dir=lock_parent)
649
893
  if parent_dir is None:
650
- raise excs.Error(f'Directory {str(parent_path)!r} does not exist.')
894
+ raise excs.Error(f'Directory {parent_path!r} does not exist.')
651
895
  obj = self._get_dir_entry(parent_dir.id, path.name, lock_entry=lock_obj)
652
896
 
653
897
  if obj is None and raise_if_not_exists:
654
- raise excs.Error(f'Path {str(path)!r} does not exist.')
898
+ raise excs.Error(f'Path {path!r} does not exist.')
655
899
  elif obj is not None and raise_if_exists:
656
- raise excs.Error(f'Path {str(path)!r} is an existing {type(obj)._display_name()}.')
900
+ raise excs.Error(f'Path {path!r} is an existing {obj._display_name()}.')
657
901
  elif obj is not None and expected is not None and not isinstance(obj, expected):
658
- raise excs.Error(
659
- f'{str(path)!r} needs to be a {expected._display_name()} but is a {type(obj)._display_name()}.'
660
- )
902
+ expected_name = 'table' if expected is Table else 'directory'
903
+ raise excs.Error(f'{path!r} needs to be a {expected_name} but is a {obj._display_name()}.')
661
904
  return obj
662
905
 
663
906
  def get_table_by_id(self, tbl_id: UUID) -> Optional[Table]:
664
- if tbl_id not in self._tbls:
665
- tbl = self._load_tbl(tbl_id)
666
- if tbl is None:
667
- return None
668
- # # if this is a mutable table, we also need to have its mutable views loaded, in order to track column
669
- # # dependencies
670
- # tbl_version = tbl._tbl_version.get()
671
- # if tbl_version.is_mutable:
672
- # for v in tbl_version.mutable_views:
673
- # _ = self.get_table_by_id(v.id)
674
- return self._tbls[tbl_id]
675
-
676
- @_retry_loop(for_write=True)
907
+ """Must be executed inside a transaction. Might raise PendingTableOpsError."""
908
+ if tbl_id in self._tbls:
909
+ return self._tbls[tbl_id]
910
+ tbl = self._load_tbl(tbl_id)
911
+ # # if this is a mutable table, we also need to have its mutable views loaded, in order to track column
912
+ # # dependencies
913
+ # tbl_version = tbl._tbl_version.get()
914
+ # if tbl_version.is_mutable:
915
+ # for v in tbl_version.mutable_views:
916
+ # _ = self.get_table_by_id(v.id)
917
+ return tbl
918
+
919
+ @retry_loop(for_write=True)
677
920
  def create_table(
678
921
  self,
679
922
  path: Path,
@@ -706,7 +949,6 @@ class Catalog:
706
949
  self._tbls[tbl._id] = tbl
707
950
  return tbl
708
951
 
709
- @_retry_loop(for_write=True)
710
952
  def create_view(
711
953
  self,
712
954
  path: Path,
@@ -722,102 +964,112 @@ class Catalog:
722
964
  media_validation: MediaValidation,
723
965
  if_exists: IfExistsParam,
724
966
  ) -> Table:
725
- from pixeltable.utils.filecache import FileCache
726
-
727
- if not is_snapshot and not base.is_snapshot():
728
- # this is a mutable view of a mutable base; X-lock the base and advance its view_sn before adding the view
729
- self._acquire_tbl_xlock(tbl_id=base.tbl_id)
730
- base_tv = self.get_tbl_version(base.tbl_id, None)
731
- base_tv.tbl_md.view_sn += 1
732
- result = Env.get().conn.execute(
733
- sql.update(schema.Table)
734
- .values({schema.Table.md: dataclasses.asdict(base_tv.tbl_md)})
735
- .where(schema.Table.id == base.tbl_id)
736
- )
737
- assert result.rowcount == 1, result.rowcount
967
+ @retry_loop(for_write=True)
968
+ def create_fn() -> UUID:
969
+ if not is_snapshot and base.is_mutable():
970
+ # this is a mutable view of a mutable base; X-lock the base and advance its view_sn before adding
971
+ # the view
972
+ self._acquire_tbl_lock(tbl_id=base.tbl_id, for_write=True)
973
+ base_tv = self.get_tbl_version(base.tbl_id, None, validate_initialized=True)
974
+ base_tv.tbl_md.view_sn += 1
975
+ result = Env.get().conn.execute(
976
+ sql.update(schema.Table)
977
+ .values({schema.Table.md: dataclasses.asdict(base_tv.tbl_md)})
978
+ .where(schema.Table.id == base.tbl_id)
979
+ )
980
+ assert result.rowcount == 1, result.rowcount
738
981
 
739
- existing = self._handle_path_collision(path, View, is_snapshot, if_exists)
740
- if existing is not None:
741
- assert isinstance(existing, View)
742
- return existing
982
+ existing = self._handle_path_collision(path, View, is_snapshot, if_exists)
983
+ if existing is not None:
984
+ assert isinstance(existing, View)
985
+ return existing._id
743
986
 
744
- dir = self._get_schema_object(path.parent, expected=Dir, raise_if_not_exists=True)
745
- assert dir is not None
746
- if iterator is None:
747
- iterator_class, iterator_args = None, None
748
- else:
749
- iterator_class, iterator_args = iterator
750
- view = View._create(
751
- dir._id,
752
- path.name,
753
- base=base,
754
- select_list=select_list,
755
- additional_columns=additional_columns,
756
- predicate=where,
757
- sample_clause=sample_clause,
758
- is_snapshot=is_snapshot,
759
- iterator_cls=iterator_class,
760
- iterator_args=iterator_args,
761
- num_retained_versions=num_retained_versions,
762
- comment=comment,
763
- media_validation=media_validation,
764
- )
765
- FileCache.get().emit_eviction_warnings()
766
- self._tbls[view._id] = view
767
- return view
987
+ dir = self._get_schema_object(path.parent, expected=Dir, raise_if_not_exists=True)
988
+ assert dir is not None
989
+ if iterator is None:
990
+ iterator_class, iterator_args = None, None
991
+ else:
992
+ iterator_class, iterator_args = iterator
993
+ md, ops = View._create(
994
+ dir._id,
995
+ path.name,
996
+ base=base,
997
+ select_list=select_list,
998
+ additional_columns=additional_columns,
999
+ predicate=where,
1000
+ sample_clause=sample_clause,
1001
+ is_snapshot=is_snapshot,
1002
+ iterator_cls=iterator_class,
1003
+ iterator_args=iterator_args,
1004
+ num_retained_versions=num_retained_versions,
1005
+ comment=comment,
1006
+ media_validation=media_validation,
1007
+ )
1008
+ tbl_id = UUID(md.tbl_md.tbl_id)
1009
+ self.store_tbl_md(tbl_id, dir._id, md.tbl_md, md.version_md, md.schema_version_md, ops)
1010
+ return tbl_id
1011
+
1012
+ view_id = create_fn()
1013
+ if not is_snapshot and base.is_mutable():
1014
+ # invalidate base's TableVersion instance, so that it gets reloaded with the new mutable view
1015
+ self._clear_tv_cache(base.tbl_id, base.tbl_version.effective_version)
1016
+ # base_tv = self.get_tbl_version(base.tbl_id, base.tbl_version.effective_version, validate_initialized=True)
1017
+ # view_handle = TableVersionHandle(view_id, effective_version=None)
1018
+ # base_tv.mutable_views.add(view_handle)
1019
+
1020
+ # finalize pending ops
1021
+ with self.begin_xact(tbl_id=view_id, for_write=True, finalize_pending_ops=True):
1022
+ return self.get_table_by_id(view_id)
1023
+
1024
+ def _clear_tv_cache(self, tbl_id: UUID, effective_version: Optional[int]) -> None:
1025
+ if (tbl_id, effective_version) in self._tbl_versions:
1026
+ tv = self._tbl_versions[tbl_id, effective_version]
1027
+ tv.is_validated = False
1028
+ del self._tbl_versions[tbl_id, effective_version]
768
1029
 
769
- @_retry_loop(for_write=True)
770
- def create_replica(
771
- self, path: Path, md: list[schema.FullTableMd], if_exists: IfExistsParam = IfExistsParam.ERROR
772
- ) -> None:
1030
+ def create_replica(self, path: Path, md: list[schema.FullTableMd]) -> None:
773
1031
  """
774
1032
  Creates table, table_version, and table_schema_version records for a replica with the given metadata.
775
1033
  The metadata should be presented in standard "ancestor order", with the table being replicated at
776
1034
  list position 0 and the (root) base table at list position -1.
777
-
778
- TODO: create_replica() also needs to create the store tables and populate them in order to make
779
- replica creation atomic.
780
1035
  """
1036
+ assert Env.get().in_xact
1037
+
781
1038
  tbl_id = UUID(md[0].tbl_md.tbl_id)
782
1039
 
783
- # First handle path collisions (if_exists='ignore' or 'replace' or etc).
784
- existing = self._handle_path_collision(path, View, False, if_exists)
785
- if existing is not None:
786
- if existing._id != tbl_id:
787
- raise excs.Error(
788
- f"An attempt was made to create a replica table at {path!r} with if_exists='ignore', "
789
- 'but a different table already exists at that location.'
790
- )
791
- assert isinstance(existing, View)
792
- return
1040
+ existing = self._handle_path_collision(path, Table, False, if_exists=IfExistsParam.IGNORE) # type: ignore[type-abstract]
1041
+ if existing is not None and existing._id != tbl_id:
1042
+ raise excs.Error(
1043
+ f'An attempt was made to create a replica table at {path!r}, '
1044
+ 'but a different table already exists at that location.'
1045
+ )
793
1046
 
794
1047
  # Ensure that the system directory exists.
795
1048
  self._create_dir(Path('_system', allow_system_paths=True), if_exists=IfExistsParam.IGNORE, parents=False)
796
1049
 
797
1050
  # Now check to see if this table already exists in the catalog.
798
- existing = Catalog.get().get_table_by_id(tbl_id)
1051
+ existing = self.get_table_by_id(tbl_id)
799
1052
  if existing is not None:
800
1053
  existing_path = Path(existing._path(), allow_system_paths=True)
801
- # It does exist. If it's a non-system table, that's an error: it's already been replicated.
802
- if not existing_path.is_system_path:
803
- raise excs.Error(
804
- f'That table has already been replicated as {existing._path()!r}. \n'
805
- f'Drop the existing replica if you wish to re-create it.'
806
- )
807
- # If it's a system table, then this means it was created at some point as the ancestor of some other
808
- # table (a snapshot-over-snapshot scenario). In that case, we simply move it to the new (named) location.
809
- self._move(existing_path, path)
810
-
811
- # Now store the metadata for this replica. In the case where the table already exists (and was just moved
812
- # into a named location), this will be a no-op, but it still serves to validate that the newly received
813
- # metadata is identical to what's in the catalog.
814
- self.__store_replica_md(path, md[0])
1054
+ if existing_path != path:
1055
+ # It does exist, under a different path from the specified one.
1056
+ if not existing_path.is_system_path:
1057
+ raise excs.Error(
1058
+ f'That table has already been replicated as {existing_path!r}.\n'
1059
+ f'Drop the existing replica if you wish to re-create it.'
1060
+ )
1061
+ # If it's a system table, then this means it was created at some point as the ancestor of some other
1062
+ # table (a snapshot-over-snapshot scenario). In that case, we simply move it to the new (named)
1063
+ # location.
1064
+ self._move(existing_path, path)
815
1065
 
816
- # Now store the metadata for all of this table's proper ancestors. If one or more proper ancestors
1066
+ # Now store the metadata for this replica's proper ancestors. If one or more proper ancestors
817
1067
  # do not yet exist in the store, they will be created as anonymous system tables.
818
- for ancestor_md in md[1:]:
1068
+ # We instantiate the ancestors starting with the base table and ending with the immediate parent of the
1069
+ # table being replicated.
1070
+ for ancestor_md in md[:0:-1]:
819
1071
  ancestor_id = UUID(ancestor_md.tbl_md.tbl_id)
820
- replica = Catalog.get().get_table_by_id(ancestor_id)
1072
+ replica = self.get_table_by_id(ancestor_id)
821
1073
  replica_path: Path
822
1074
  if replica is None:
823
1075
  # We've never seen this table before. Create a new anonymous system table for it.
@@ -828,12 +1080,22 @@ class Catalog:
828
1080
  # that was directly replicated by the user at some point). In either case, use the existing path.
829
1081
  replica_path = Path(replica._path(), allow_system_paths=True)
830
1082
 
831
- # Store the metadata; it could be a new version (in which case a new record will be created) or a
832
- # known version (in which case the newly received metadata will be validated as identical).
1083
+ # Store the metadata; it could be a new version (in which case a new record will be created), or a known
1084
+ # version (in which case the newly received metadata will be validated as identical).
1085
+ # If it's a new version, this will result in a new TableVersion record being created.
833
1086
  self.__store_replica_md(replica_path, ancestor_md)
834
1087
 
835
- # don't create TableVersion instances at this point, they would be superseded by calls to TV.create_replica()
836
- # in TableRestorer.restore()
1088
+ # Now we must clear cached metadata for the ancestor table, to force the next table operation to pick up
1089
+ # the new TableVersion instance. This is necessary because computed columns of descendant tables might
1090
+ # reference columns of the ancestor table that only exist in the new version.
1091
+ replica = Catalog.get().get_table_by_id(ancestor_id)
1092
+ assert replica is not None # If it didn't exist before, it must have been created by now.
1093
+ replica._tbl_version_path.clear_cached_md()
1094
+
1095
+ # Finally, store the metadata for the table being replicated; as before, it could be a new version or a known
1096
+ # version. If it's a new version, then a TableVersion record will be created, unless the table being replicated
1097
+ # is a pure snapshot.
1098
+ self.__store_replica_md(path, md[0])
837
1099
 
838
1100
  def __store_replica_md(self, path: Path, md: schema.FullTableMd) -> None:
839
1101
  _logger.info(f'Creating replica table at {path!r} with ID: {md.tbl_md.tbl_id}')
@@ -917,13 +1179,20 @@ class Catalog:
917
1179
 
918
1180
  self.store_tbl_md(UUID(tbl_id), None, new_tbl_md, new_version_md, new_schema_version_md)
919
1181
 
920
- @_retry_loop(for_write=False)
1182
+ if new_version_md is not None and not md.is_pure_snapshot:
1183
+ # It's a new version of a table that has a physical store, so we need to create a TableVersion instance.
1184
+ TableVersion.create_replica(md)
1185
+
1186
+ @retry_loop(for_write=False)
921
1187
  def get_table(self, path: Path) -> Table:
922
1188
  obj = Catalog.get()._get_schema_object(path, expected=Table, raise_if_not_exists=True)
923
1189
  assert isinstance(obj, Table)
1190
+ # We need to clear cached metadata from tbl_version_path, in case the schema has been changed
1191
+ # by another process.
1192
+ obj._tbl_version_path.clear_cached_md()
924
1193
  return obj
925
1194
 
926
- @_retry_loop(for_write=True)
1195
+ @retry_loop(for_write=True)
927
1196
  def drop_table(self, path: Path, if_not_exists: IfNotExistsParam, force: bool) -> None:
928
1197
  tbl = self._get_schema_object(
929
1198
  path,
@@ -933,7 +1202,7 @@ class Catalog:
933
1202
  lock_obj=False,
934
1203
  )
935
1204
  if tbl is None:
936
- _logger.info(f'Skipped table {str(path)!r} (does not exist).')
1205
+ _logger.info(f'Skipped table {path!r} (does not exist).')
937
1206
  return
938
1207
  assert isinstance(tbl, Table)
939
1208
 
@@ -941,7 +1210,7 @@ class Catalog:
941
1210
  # this is a mutable view of a mutable base;
942
1211
  # lock the base before the view, in order to avoid deadlocks with concurrent inserts/updates
943
1212
  base_id = tbl._tbl_version_path.base.tbl_id
944
- self._acquire_tbl_xlock(tbl_id=base_id, lock_mutable_tree=False)
1213
+ self._acquire_tbl_lock(tbl_id=base_id, for_write=True, lock_mutable_tree=False)
945
1214
 
946
1215
  self._drop_tbl(tbl, force=force, is_replace=False)
947
1216
 
@@ -956,7 +1225,7 @@ class Catalog:
956
1225
  in the same directory with the same name (which could lead to duplicate names if we get aborted)
957
1226
  """
958
1227
  self._acquire_dir_xlock(dir_id=tbl._dir_id)
959
- self._acquire_tbl_xlock(tbl_id=tbl._id, lock_mutable_tree=False)
1228
+ self._acquire_tbl_lock(tbl_id=tbl._id, for_write=True, lock_mutable_tree=False)
960
1229
 
961
1230
  view_ids = self.get_view_ids(tbl._id, for_update=True)
962
1231
  if len(view_ids) > 0:
@@ -980,8 +1249,9 @@ class Catalog:
980
1249
  # if this is a mutable view of a mutable base, advance the base's view_sn
981
1250
  if isinstance(tbl, View) and tbl._tbl_version_path.is_mutable() and tbl._tbl_version_path.base.is_mutable():
982
1251
  base_id = tbl._tbl_version_path.base.tbl_id
983
- base_tv = self.get_tbl_version(base_id, None)
1252
+ base_tv = self.get_tbl_version(base_id, None, validate_initialized=True)
984
1253
  base_tv.tbl_md.view_sn += 1
1254
+ self._modified_tvs.add(base_tv.handle)
985
1255
  result = Env.get().conn.execute(
986
1256
  sql.update(schema.Table.__table__)
987
1257
  .values({schema.Table.md: dataclasses.asdict(base_tv.tbl_md)})
@@ -989,23 +1259,26 @@ class Catalog:
989
1259
  )
990
1260
  assert result.rowcount == 1, result.rowcount
991
1261
 
1262
+ if tbl._tbl_version is not None:
1263
+ # invalidate the TableVersion instance when we're done so that existing references to it can find out it
1264
+ # has been dropped
1265
+ self._modified_tvs.add(tbl._tbl_version)
992
1266
  tv = tbl._tbl_version.get() if tbl._tbl_version is not None else None
993
- if tv is not None:
1267
+ # if tv is not None:
1268
+ # tv = tbl._tbl_version.get()
1269
+ # # invalidate the TableVersion instance so that existing references to it can find out it has been dropped
1270
+ # tv.is_validated = False
1271
+ if tbl._tbl_version is not None:
1272
+ # drop the store table before deleting the Table record
994
1273
  tv = tbl._tbl_version.get()
995
- # invalidate the TableVersion instance so that existing references to it can find out it has been dropped
996
- tv.is_validated = False
1274
+ tv.drop()
997
1275
 
998
1276
  self.delete_tbl_md(tbl._id)
999
1277
  assert tbl._id in self._tbls
1000
1278
  del self._tbls[tbl._id]
1001
1279
  _logger.info(f'Dropped table `{tbl._path()}`.')
1002
1280
 
1003
- if tv is not None:
1004
- tv.drop()
1005
- assert (tv.id, tv.effective_version) in self._tbl_versions
1006
- del self._tbl_versions[tv.id, tv.effective_version]
1007
-
1008
- @_retry_loop(for_write=True)
1281
+ @retry_loop(for_write=True)
1009
1282
  def create_dir(self, path: Path, if_exists: IfExistsParam, parents: bool) -> Dir:
1010
1283
  return self._create_dir(path, if_exists, parents)
1011
1284
 
@@ -1018,7 +1291,7 @@ class Catalog:
1018
1291
  # parent = self._get_schema_object(path.parent)
1019
1292
  # assert parent is not None
1020
1293
  # dir = Dir._create(parent._id, path.name)
1021
- # Env.get().console_logger.info(f'Created directory {str(path)!r}.')
1294
+ # Env.get().console_logger.info(f'Created directory {path!r}.')
1022
1295
  # return dir
1023
1296
 
1024
1297
  if parents:
@@ -1037,10 +1310,10 @@ class Catalog:
1037
1310
  return existing
1038
1311
  assert parent is not None
1039
1312
  dir = Dir._create(parent._id, path.name)
1040
- Env.get().console_logger.info(f'Created directory {str(path)!r}.')
1313
+ Env.get().console_logger.info(f'Created directory {path!r}.')
1041
1314
  return dir
1042
1315
 
1043
- @_retry_loop(for_write=True)
1316
+ @retry_loop(for_write=True)
1044
1317
  def drop_dir(self, path: Path, if_not_exists: IfNotExistsParam, force: bool) -> None:
1045
1318
  _, _, schema_obj = self._prepare_dir_op(
1046
1319
  drop_dir_path=path.parent,
@@ -1049,7 +1322,7 @@ class Catalog:
1049
1322
  raise_if_not_exists=if_not_exists == IfNotExistsParam.ERROR and not force,
1050
1323
  )
1051
1324
  if schema_obj is None:
1052
- _logger.info(f'Directory {str(path)!r} does not exist; skipped drop_dir().')
1325
+ _logger.info(f'Directory {path!r} does not exist; skipped drop_dir().')
1053
1326
  return
1054
1327
  self._drop_dir(schema_obj._id, path, force=force)
1055
1328
 
@@ -1062,7 +1335,7 @@ class Catalog:
1062
1335
  q = sql.select(sql.func.count()).select_from(schema.Table).where(schema.Table.dir_id == dir_id)
1063
1336
  num_tbls = conn.execute(q).scalar()
1064
1337
  if num_subdirs + num_tbls > 0:
1065
- raise excs.Error(f'Directory {str(dir_path)!r} is not empty.')
1338
+ raise excs.Error(f'Directory {dir_path!r} is not empty.')
1066
1339
 
1067
1340
  # drop existing subdirs
1068
1341
  self._acquire_dir_xlock(dir_id=dir_id)
@@ -1080,7 +1353,7 @@ class Catalog:
1080
1353
 
1081
1354
  # self.drop_dir(dir_id)
1082
1355
  conn.execute(sql.delete(schema.Dir).where(schema.Dir.id == dir_id))
1083
- _logger.info(f'Removed directory {str(dir_path)!r}.')
1356
+ _logger.info(f'Removed directory {dir_path!r}.')
1084
1357
 
1085
1358
  def get_view_ids(self, tbl_id: UUID, for_update: bool = False) -> list[UUID]:
1086
1359
  """Return the ids of views that directly reference the given table"""
@@ -1096,13 +1369,25 @@ class Catalog:
1096
1369
  result = [r[0] for r in conn.execute(q).all()]
1097
1370
  return result
1098
1371
 
1099
- def get_tbl_version(self, tbl_id: UUID, effective_version: Optional[int]) -> Optional[TableVersion]:
1372
+ def get_tbl_version(
1373
+ self,
1374
+ tbl_id: UUID,
1375
+ effective_version: Optional[int],
1376
+ check_pending_ops: Optional[bool] = None,
1377
+ validate_initialized: bool = False,
1378
+ ) -> Optional[TableVersion]:
1379
+ """
1380
+ Returns the TableVersion instance for the given table and version and updates the cache.
1381
+
1382
+ If present in the cache and the instance isn't validated, validates version and view_sn against the stored
1383
+ metadata.
1384
+ """
1100
1385
  # we need a transaction here, if we're not already in one; if this starts a new transaction,
1101
1386
  # the returned TableVersion instance will not be validated
1102
1387
  with self.begin_xact(for_write=False) as conn:
1103
1388
  tv = self._tbl_versions.get((tbl_id, effective_version))
1104
1389
  if tv is None:
1105
- tv = self._load_tbl_version(tbl_id, effective_version)
1390
+ tv = self._load_tbl_version(tbl_id, effective_version, check_pending_ops=check_pending_ops)
1106
1391
  elif not tv.is_validated:
1107
1392
  # only live instances are invalidated
1108
1393
  assert effective_version is None
@@ -1123,12 +1408,16 @@ class Catalog:
1123
1408
  f'(cached/current version: {tv.version}/{current_version}, '
1124
1409
  f'cached/current view_sn: {tv.tbl_md.view_sn}/{view_sn})'
1125
1410
  )
1126
- tv = self._load_tbl_version(tbl_id, None)
1411
+ tv = self._load_tbl_version(tbl_id, None, check_pending_ops=check_pending_ops)
1127
1412
  else:
1128
1413
  # the cached metadata is valid
1129
1414
  tv.is_validated = True
1130
1415
 
1131
- assert tv.is_validated
1416
+ assert tv.is_validated, f'{tbl_id}:{effective_version} not validated\n{tv.__dict__}\n{self._debug_str()}'
1417
+ if validate_initialized:
1418
+ assert tv.is_initialized, (
1419
+ f'{tbl_id}:{effective_version} not initialized\n{tv.__dict__}\n{self._debug_str()}'
1420
+ )
1132
1421
  return tv
1133
1422
 
1134
1423
  def remove_tbl_version(self, tbl_version: TableVersion) -> None:
@@ -1180,6 +1469,13 @@ class Catalog:
1180
1469
  from .view import View
1181
1470
 
1182
1471
  conn = Env.get().conn
1472
+
1473
+ # check for pending ops
1474
+ q = sql.select(sql.func.count()).where(schema.PendingTableOp.tbl_id == tbl_id)
1475
+ has_pending_ops = conn.execute(q).scalar() > 0
1476
+ if has_pending_ops:
1477
+ raise PendingTableOpsError(tbl_id)
1478
+
1183
1479
  q = (
1184
1480
  sql.select(schema.Table, schema.TableSchemaVersion)
1185
1481
  .join(schema.TableSchemaVersion)
@@ -1196,11 +1492,11 @@ class Catalog:
1196
1492
  row = conn.execute(q).one_or_none()
1197
1493
  if row is None:
1198
1494
  return None
1199
- tbl_record, schema_version_record = _unpack_row(row, [schema.Table, schema.TableSchemaVersion])
1495
+ tbl_record, _ = _unpack_row(row, [schema.Table, schema.TableSchemaVersion])
1200
1496
 
1201
1497
  tbl_md = schema.md_from_dict(schema.TableMd, tbl_record.md)
1202
1498
  view_md = tbl_md.view_md
1203
- if view_md is None:
1499
+ if view_md is None and not tbl_md.is_replica:
1204
1500
  # this is a base table
1205
1501
  if (tbl_id, None) not in self._tbl_versions:
1206
1502
  _ = self._load_tbl_version(tbl_id, None)
@@ -1210,15 +1506,16 @@ class Catalog:
1210
1506
 
1211
1507
  # this is a view; determine the sequence of TableVersions to load
1212
1508
  tbl_version_path: list[tuple[UUID, Optional[int]]] = []
1213
- schema_version_md = schema.md_from_dict(schema.TableSchemaVersionMd, schema_version_record.md)
1214
- pure_snapshot = view_md.is_snapshot and view_md.predicate is None and len(schema_version_md.columns) == 0
1215
- if pure_snapshot:
1509
+ if tbl_md.is_pure_snapshot:
1216
1510
  # this is a pure snapshot, without a physical table backing it; we only need the bases
1217
1511
  pass
1218
1512
  else:
1219
- effective_version = 0 if view_md.is_snapshot else None # snapshots only have version 0
1513
+ effective_version = (
1514
+ 0 if view_md is not None and view_md.is_snapshot else None
1515
+ ) # snapshots only have version 0
1220
1516
  tbl_version_path.append((tbl_id, effective_version))
1221
- tbl_version_path.extend((UUID(tbl_id), version) for tbl_id, version in view_md.base_versions)
1517
+ if view_md is not None:
1518
+ tbl_version_path.extend((UUID(tbl_id), version) for tbl_id, version in view_md.base_versions)
1222
1519
 
1223
1520
  # load TableVersions, starting at the root
1224
1521
  base_path: Optional[TableVersionPath] = None
@@ -1228,11 +1525,11 @@ class Catalog:
1228
1525
  _ = self._load_tbl_version(id, effective_version)
1229
1526
  view_path = TableVersionPath(TableVersionHandle(id, effective_version), base=base_path)
1230
1527
  base_path = view_path
1231
- view = View(tbl_id, tbl_record.dir_id, tbl_md.name, view_path, snapshot_only=pure_snapshot)
1528
+ view = View(tbl_id, tbl_record.dir_id, tbl_md.name, view_path, snapshot_only=tbl_md.is_pure_snapshot)
1232
1529
  self._tbls[tbl_id] = view
1233
1530
  return view
1234
1531
 
1235
- @_retry_loop(for_write=False)
1532
+ @retry_loop(for_write=False)
1236
1533
  def collect_tbl_history(self, tbl_id: UUID, n: Optional[int]) -> list[schema.FullTableMd]:
1237
1534
  """
1238
1535
  Returns the history of up to n versions of the table with the given UUID.
@@ -1342,6 +1639,7 @@ class Catalog:
1342
1639
  tbl_md: Optional[schema.TableMd],
1343
1640
  version_md: Optional[schema.TableVersionMd],
1344
1641
  schema_version_md: Optional[schema.TableSchemaVersionMd],
1642
+ pending_ops: Optional[list[TableOp]] = None,
1345
1643
  ) -> None:
1346
1644
  """
1347
1645
  Stores metadata to the DB.
@@ -1356,6 +1654,9 @@ class Catalog:
1356
1654
  If inserting `version_md` or `schema_version_md` would be a primary key violation, an exception will be raised.
1357
1655
  """
1358
1656
  assert self._in_write_xact
1657
+ assert version_md is None or version_md.created_at > 0.0
1658
+ assert pending_ops is None or len(pending_ops) > 0
1659
+ assert pending_ops is None or tbl_md is not None # if we write pending ops, we must also write new tbl_md
1359
1660
  session = Env.get().session
1360
1661
 
1361
1662
  # Construct and insert or update table record if requested.
@@ -1366,6 +1667,9 @@ class Catalog:
1366
1667
  assert tbl_md.current_schema_version == version_md.schema_version
1367
1668
  if schema_version_md is not None:
1368
1669
  assert tbl_md.current_schema_version == schema_version_md.schema_version
1670
+ if pending_ops is not None:
1671
+ tbl_md.has_pending_ops = True
1672
+
1369
1673
  if dir_id is not None:
1370
1674
  # We are inserting a record while creating a new table.
1371
1675
  tbl_record = schema.Table(id=tbl_id, dir_id=dir_id, md=dataclasses.asdict(tbl_md))
@@ -1396,8 +1700,33 @@ class Catalog:
1396
1700
  tbl_id=tbl_id, schema_version=schema_version_md.schema_version, md=dataclasses.asdict(schema_version_md)
1397
1701
  )
1398
1702
  session.add(schema_version_record)
1703
+
1704
+ # make sure we don't have any pending ops
1705
+ assert session.query(schema.PendingTableOp).filter(schema.PendingTableOp.tbl_id == tbl_id).count() == 0
1706
+
1707
+ if pending_ops is not None:
1708
+ for op in pending_ops:
1709
+ op_record = schema.PendingTableOp(tbl_id=tbl_id, op_sn=op.op_sn, op=dataclasses.asdict(op))
1710
+ session.add(op_record)
1711
+
1399
1712
  session.flush() # Inform SQLAlchemy that we want to write these changes to the DB.
1400
1713
 
1714
+ def store_update_status(self, tbl_id: UUID, version: int, status: UpdateStatus) -> None:
1715
+ """Update the TableVersion.md.update_status field"""
1716
+ assert self._in_write_xact
1717
+ conn = Env.get().conn
1718
+
1719
+ stmt = (
1720
+ sql.update(schema.TableVersion)
1721
+ .where(schema.TableVersion.tbl_id == tbl_id, schema.TableVersion.version == version)
1722
+ .values(
1723
+ md=schema.TableVersion.md.op('||')({'additional_md': {'update_status': dataclasses.asdict(status)}})
1724
+ )
1725
+ )
1726
+
1727
+ res = conn.execute(stmt)
1728
+ assert res.rowcount == 1, res.rowcount
1729
+
1401
1730
  def delete_tbl_md(self, tbl_id: UUID) -> None:
1402
1731
  """
1403
1732
  Deletes all table metadata from the store for the given table UUID.
@@ -1405,6 +1734,7 @@ class Catalog:
1405
1734
  conn = Env.get().conn
1406
1735
  conn.execute(sql.delete(schema.TableSchemaVersion.__table__).where(schema.TableSchemaVersion.tbl_id == tbl_id))
1407
1736
  conn.execute(sql.delete(schema.TableVersion.__table__).where(schema.TableVersion.tbl_id == tbl_id))
1737
+ conn.execute(sql.delete(schema.PendingTableOp.__table__).where(schema.PendingTableOp.tbl_id == tbl_id))
1408
1738
  conn.execute(sql.delete(schema.Table.__table__).where(schema.Table.id == tbl_id))
1409
1739
 
1410
1740
  def load_replica_md(self, tbl: Table) -> list[schema.FullTableMd]:
@@ -1435,13 +1765,32 @@ class Catalog:
1435
1765
 
1436
1766
  return md
1437
1767
 
1438
- def _load_tbl_version(self, tbl_id: UUID, effective_version: Optional[int]) -> Optional[TableVersion]:
1768
+ def _load_tbl_version(
1769
+ self, tbl_id: UUID, effective_version: Optional[int], check_pending_ops: bool = True
1770
+ ) -> Optional[TableVersion]:
1439
1771
  """Creates TableVersion instance from stored metadata and registers it in _tbl_versions."""
1440
- tbl_md, _, schema_version_md = self.load_tbl_md(tbl_id, effective_version)
1772
+ tbl_md, version_md, schema_version_md = self.load_tbl_md(tbl_id, effective_version)
1441
1773
  view_md = tbl_md.view_md
1442
1774
 
1443
1775
  conn = Env.get().conn
1444
1776
 
1777
+ if check_pending_ops:
1778
+ pending_ops_q = (
1779
+ sql.select(sql.func.count())
1780
+ .select_from(schema.Table)
1781
+ .join(schema.PendingTableOp)
1782
+ .where(schema.PendingTableOp.tbl_id == tbl_id)
1783
+ .where(schema.Table.id == tbl_id)
1784
+ )
1785
+ if effective_version is not None:
1786
+ # we only care about pending ops if the requested version is the current version
1787
+ pending_ops_q = pending_ops_q.where(
1788
+ sql.text(f"({schema.Table.__table__}.md->>'current_version')::int = {effective_version}")
1789
+ )
1790
+ has_pending_ops = conn.execute(pending_ops_q).scalar() > 0
1791
+ if has_pending_ops:
1792
+ raise PendingTableOpsError(tbl_id)
1793
+
1445
1794
  # load mutable view ids for mutable TableVersions
1446
1795
  mutable_view_ids: list[UUID] = []
1447
1796
  # If this is a replica, effective_version should not be None. We see this today, because
@@ -1456,17 +1805,24 @@ class Catalog:
1456
1805
  )
1457
1806
  )
1458
1807
  mutable_view_ids = [r[0] for r in conn.execute(q).all()]
1808
+
1459
1809
  mutable_views = [TableVersionHandle(id, None) for id in mutable_view_ids]
1460
1810
 
1461
1811
  tbl_version: TableVersion
1462
1812
  if view_md is None:
1463
1813
  # this is a base table
1464
1814
  tbl_version = TableVersion(
1465
- tbl_id, tbl_md, effective_version, schema_version_md, mutable_views=mutable_views
1815
+ tbl_id, tbl_md, version_md, effective_version, schema_version_md, mutable_views=mutable_views
1466
1816
  )
1467
1817
  else:
1468
1818
  assert len(view_md.base_versions) > 0 # a view needs to have a base
1469
- pure_snapshot = view_md.is_snapshot and view_md.predicate is None and len(schema_version_md.columns) == 0
1819
+ # TODO: add TableVersionMd.is_pure_snapshot() and use that
1820
+ pure_snapshot = (
1821
+ view_md.is_snapshot
1822
+ and view_md.predicate is None
1823
+ and view_md.sample_clause is None
1824
+ and len(schema_version_md.columns) == 0
1825
+ )
1470
1826
  assert not pure_snapshot # a pure snapshot doesn't have a physical table backing it, no point in loading it
1471
1827
 
1472
1828
  base: TableVersionHandle
@@ -1480,6 +1836,7 @@ class Catalog:
1480
1836
  tbl_version = TableVersion(
1481
1837
  tbl_id,
1482
1838
  tbl_md,
1839
+ version_md,
1483
1840
  effective_version,
1484
1841
  schema_version_md,
1485
1842
  base_path=base_path,
@@ -1487,22 +1844,14 @@ class Catalog:
1487
1844
  mutable_views=mutable_views,
1488
1845
  )
1489
1846
 
1847
+ # register the instance before init()
1490
1848
  self._tbl_versions[tbl_id, effective_version] = tbl_version
1849
+ # register this instance as modified, so that it gets purged if the transaction fails, it may not be
1850
+ # fully initialized
1851
+ self._modified_tvs.add(tbl_version.handle)
1491
1852
  tbl_version.init()
1492
1853
  return tbl_version
1493
1854
 
1494
- def record_column_dependencies(self, tbl_version: TableVersion) -> None:
1495
- """Update self._column_dependencies. Only valid for non-snapshot versions."""
1496
- from pixeltable.exprs import Expr
1497
-
1498
- assert not tbl_version.is_snapshot
1499
- dependencies: dict[QColumnId, set[QColumnId]] = {}
1500
- for col in tbl_version.cols_by_id.values():
1501
- if col.value_expr_dict is None:
1502
- continue
1503
- dependencies[QColumnId(tbl_version.id, col.id)] = Expr.get_refd_column_ids(col.value_expr_dict)
1504
- self._column_dependencies[tbl_version.id] = dependencies
1505
-
1506
1855
  def _init_store(self) -> None:
1507
1856
  """One-time initialization of the stored catalog. Idempotent."""
1508
1857
  self.create_user(None)
@@ -1531,14 +1880,20 @@ class Catalog:
1531
1880
  obj, _, _ = self._prepare_dir_op(add_dir_path=path.parent, add_name=path.name)
1532
1881
 
1533
1882
  if if_exists == IfExistsParam.ERROR and obj is not None:
1534
- raise excs.Error(f'Path {str(path)!r} is an existing {type(obj)._display_name()}')
1883
+ raise excs.Error(f'Path {path!r} is an existing {obj._display_name()}')
1535
1884
  else:
1536
1885
  is_snapshot = isinstance(obj, View) and obj._tbl_version_path.is_snapshot()
1537
1886
  if obj is not None and (not isinstance(obj, expected_obj_type) or (expected_snapshot and not is_snapshot)):
1538
- obj_type_str = 'snapshot' if expected_snapshot else expected_obj_type._display_name()
1887
+ if expected_obj_type is Dir:
1888
+ obj_type_str = 'directory'
1889
+ elif expected_obj_type is InsertableTable:
1890
+ obj_type_str = 'table'
1891
+ elif expected_obj_type is View:
1892
+ obj_type_str = 'snapshot' if expected_snapshot else 'view'
1893
+ else:
1894
+ raise AssertionError()
1539
1895
  raise excs.Error(
1540
- f'Path {str(path)!r} already exists but is not a {obj_type_str}. '
1541
- f'Cannot {if_exists.name.lower()} it.'
1896
+ f'Path {path!r} already exists but is not a {obj_type_str}. Cannot {if_exists.name.lower()} it.'
1542
1897
  )
1543
1898
 
1544
1899
  if obj is None:
@@ -1551,7 +1906,7 @@ class Catalog:
1551
1906
  dir_contents = self._get_dir_contents(obj._id)
1552
1907
  if len(dir_contents) > 0 and if_exists == IfExistsParam.REPLACE:
1553
1908
  raise excs.Error(
1554
- f'Directory {str(path)!r} already exists and is not empty. '
1909
+ f'Directory {path!r} already exists and is not empty. '
1555
1910
  'Use `if_exists="replace_force"` to replace it.'
1556
1911
  )
1557
1912
  self._drop_dir(obj._id, path, force=True)