pixeltable 0.4.3__py3-none-any.whl → 0.4.4__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of pixeltable might be problematic. Click here for more details.
- pixeltable/__version__.py +2 -2
- pixeltable/catalog/__init__.py +1 -1
- pixeltable/catalog/catalog.py +526 -197
- pixeltable/catalog/dir.py +1 -2
- pixeltable/catalog/insertable_table.py +9 -9
- pixeltable/catalog/schema_object.py +9 -4
- pixeltable/catalog/table.py +45 -53
- pixeltable/catalog/table_version.py +214 -155
- pixeltable/catalog/table_version_path.py +1 -1
- pixeltable/catalog/tbl_ops.py +44 -0
- pixeltable/catalog/view.py +47 -60
- pixeltable/dataframe.py +18 -5
- pixeltable/env.py +21 -4
- pixeltable/exec/data_row_batch.py +3 -1
- pixeltable/exec/in_memory_data_node.py +6 -7
- pixeltable/exprs/column_ref.py +2 -1
- pixeltable/functions/gemini.py +4 -4
- pixeltable/functions/openai.py +1 -2
- pixeltable/functions/video.py +2 -6
- pixeltable/globals.py +50 -25
- pixeltable/io/datarows.py +2 -1
- pixeltable/io/pandas.py +1 -0
- pixeltable/io/table_data_conduit.py +12 -13
- pixeltable/iterators/audio.py +17 -8
- pixeltable/iterators/image.py +5 -2
- pixeltable/metadata/schema.py +38 -1
- pixeltable/store.py +22 -1
- pixeltable/utils/media_store.py +11 -0
- {pixeltable-0.4.3.dist-info → pixeltable-0.4.4.dist-info}/METADATA +1 -1
- {pixeltable-0.4.3.dist-info → pixeltable-0.4.4.dist-info}/RECORD +33 -32
- {pixeltable-0.4.3.dist-info → pixeltable-0.4.4.dist-info}/LICENSE +0 -0
- {pixeltable-0.4.3.dist-info → pixeltable-0.4.4.dist-info}/WHEEL +0 -0
- {pixeltable-0.4.3.dist-info → pixeltable-0.4.4.dist-info}/entry_points.txt +0 -0
pixeltable/catalog/catalog.py
CHANGED
|
@@ -14,6 +14,8 @@ import psycopg
|
|
|
14
14
|
import sqlalchemy as sql
|
|
15
15
|
|
|
16
16
|
from pixeltable import exceptions as excs
|
|
17
|
+
|
|
18
|
+
# from pixeltable import exceptions as excs, UpdateStatus
|
|
17
19
|
from pixeltable.env import Env
|
|
18
20
|
from pixeltable.iterators import ComponentIterator
|
|
19
21
|
from pixeltable.metadata import schema
|
|
@@ -28,6 +30,8 @@ from .table import Table
|
|
|
28
30
|
from .table_version import TableVersion
|
|
29
31
|
from .table_version_handle import TableVersionHandle
|
|
30
32
|
from .table_version_path import TableVersionPath
|
|
33
|
+
from .tbl_ops import TableOp
|
|
34
|
+
from .update_status import UpdateStatus
|
|
31
35
|
from .view import View
|
|
32
36
|
|
|
33
37
|
if TYPE_CHECKING:
|
|
@@ -70,18 +74,35 @@ _MAX_RETRIES = -1
|
|
|
70
74
|
T = TypeVar('T')
|
|
71
75
|
|
|
72
76
|
|
|
73
|
-
def
|
|
77
|
+
def retry_loop(
|
|
78
|
+
*, tbl: Optional[TableVersionPath] = None, for_write: bool, lock_mutable_tree: bool = False
|
|
79
|
+
) -> Callable[[Callable[..., T]], Callable[..., T]]:
|
|
74
80
|
def decorator(op: Callable[..., T]) -> Callable[..., T]:
|
|
75
81
|
@functools.wraps(op)
|
|
76
82
|
def loop(*args: Any, **kwargs: Any) -> T:
|
|
83
|
+
cat = Catalog.get()
|
|
84
|
+
# retry_loop() is reentrant
|
|
85
|
+
if cat._in_retry_loop:
|
|
86
|
+
return op(*args, **kwargs)
|
|
87
|
+
|
|
77
88
|
num_retries = 0
|
|
78
89
|
while True:
|
|
90
|
+
cat._in_retry_loop = True
|
|
79
91
|
try:
|
|
80
92
|
# in order for retry to work, we need to make sure that there aren't any prior db updates
|
|
81
93
|
# that are part of an ongoing transaction
|
|
82
94
|
assert not Env.get().in_xact
|
|
83
|
-
with Catalog.get().begin_xact(
|
|
95
|
+
with Catalog.get().begin_xact(
|
|
96
|
+
tbl=tbl,
|
|
97
|
+
for_write=for_write,
|
|
98
|
+
convert_db_excs=False,
|
|
99
|
+
lock_mutable_tree=lock_mutable_tree,
|
|
100
|
+
finalize_pending_ops=True,
|
|
101
|
+
):
|
|
84
102
|
return op(*args, **kwargs)
|
|
103
|
+
except PendingTableOpsError as e:
|
|
104
|
+
Env.get().console_logger.debug(f'retry_loop(): finalizing pending ops for {e.tbl_id}')
|
|
105
|
+
Catalog.get()._finalize_pending_ops(e.tbl_id)
|
|
85
106
|
except sql.exc.DBAPIError as e:
|
|
86
107
|
# TODO: what other exceptions should we be looking for?
|
|
87
108
|
if isinstance(e.orig, (psycopg.errors.SerializationFailure, psycopg.errors.LockNotAvailable)):
|
|
@@ -97,16 +118,31 @@ def _retry_loop(*, for_write: bool) -> Callable[[Callable[..., T]], Callable[...
|
|
|
97
118
|
# for informational/debugging purposes
|
|
98
119
|
_logger.debug(f'retry_loop(): passing along {e}')
|
|
99
120
|
raise
|
|
121
|
+
finally:
|
|
122
|
+
cat._in_retry_loop = False
|
|
100
123
|
|
|
101
124
|
return loop
|
|
102
125
|
|
|
103
126
|
return decorator
|
|
104
127
|
|
|
105
128
|
|
|
129
|
+
class PendingTableOpsError(Exception):
|
|
130
|
+
tbl_id: UUID
|
|
131
|
+
|
|
132
|
+
def __init__(self, tbl_id: UUID) -> None:
|
|
133
|
+
self.tbl_id = tbl_id
|
|
134
|
+
|
|
135
|
+
|
|
106
136
|
class Catalog:
|
|
107
137
|
"""The functional interface to getting access to catalog objects
|
|
108
138
|
|
|
109
|
-
All interface functions must be called in the context of a transaction, started with Catalog.begin_xact()
|
|
139
|
+
All interface functions must be called in the context of a transaction, started with Catalog.begin_xact() or
|
|
140
|
+
via retry_loop().
|
|
141
|
+
|
|
142
|
+
When calling functions that involve Table or TableVersion instances, the catalog needs to get a chance to finalize
|
|
143
|
+
pending ops against those tables. To that end,
|
|
144
|
+
- use begin_xact(tbl) or begin_xact(tbl_id) if only accessing a single table
|
|
145
|
+
- use retry_loop() when accessing multiple tables (eg, pxt.ls())
|
|
110
146
|
|
|
111
147
|
Caching and invalidation of metadata:
|
|
112
148
|
- Catalog caches TableVersion instances in order to avoid excessive metadata loading
|
|
@@ -132,6 +168,8 @@ class Catalog:
|
|
|
132
168
|
_tbls: dict[UUID, Table]
|
|
133
169
|
_in_write_xact: bool # True if we're in a write transaction
|
|
134
170
|
_x_locked_tbl_ids: set[UUID] # non-empty for write transactions
|
|
171
|
+
_modified_tvs: set[TableVersionHandle] # TableVersion instances modified in the current transaction
|
|
172
|
+
_in_retry_loop: bool
|
|
135
173
|
|
|
136
174
|
# cached column dependencies
|
|
137
175
|
# - key: table id, value: mapping from column id to its dependencies
|
|
@@ -164,6 +202,8 @@ class Catalog:
|
|
|
164
202
|
self._tbls = {} # don't use a defaultdict here, it doesn't cooperate with the debugger
|
|
165
203
|
self._in_write_xact = False
|
|
166
204
|
self._x_locked_tbl_ids = set()
|
|
205
|
+
self._modified_tvs = set()
|
|
206
|
+
self._in_retry_loop = False
|
|
167
207
|
self._column_dependencies = {}
|
|
168
208
|
self._column_dependents = None
|
|
169
209
|
self._init_store()
|
|
@@ -214,9 +254,11 @@ class Catalog:
|
|
|
214
254
|
self,
|
|
215
255
|
*,
|
|
216
256
|
tbl: Optional[TableVersionPath] = None,
|
|
257
|
+
tbl_id: Optional[UUID] = None,
|
|
217
258
|
for_write: bool = False,
|
|
218
259
|
lock_mutable_tree: bool = False,
|
|
219
260
|
convert_db_excs: bool = True,
|
|
261
|
+
finalize_pending_ops: bool = True,
|
|
220
262
|
) -> Iterator[sql.Connection]:
|
|
221
263
|
"""
|
|
222
264
|
Return a context manager that yields a connection to the database. Idempotent.
|
|
@@ -227,7 +269,7 @@ class Catalog:
|
|
|
227
269
|
If tbl != None, follows this locking protocol:
|
|
228
270
|
- validates/reloads the TableVersion instances of tbl's ancestors (in the hope that this reduces potential
|
|
229
271
|
SerializationErrors later on)
|
|
230
|
-
- if for_write == True, x-locks Table record (by updating Table.lock_dummy; see
|
|
272
|
+
- if for_write == True, x-locks Table record (by updating Table.lock_dummy; see _acquire_tbl_lock())
|
|
231
273
|
- if for_write == False, validates TableVersion instance
|
|
232
274
|
- if lock_mutable_tree == True, also x-locks all mutable views of the table
|
|
233
275
|
- this needs to be done in a retry loop, because Postgres can decide to abort the transaction
|
|
@@ -237,10 +279,14 @@ class Catalog:
|
|
|
237
279
|
|
|
238
280
|
If convert_db_excs == True, converts DBAPIErrors into excs.Errors.
|
|
239
281
|
"""
|
|
282
|
+
assert tbl is None or tbl_id is None # at most one can be specified
|
|
240
283
|
if Env.get().in_xact:
|
|
241
|
-
|
|
242
|
-
|
|
243
|
-
|
|
284
|
+
# make sure that we requested the required table lock at the beginning of the transaction
|
|
285
|
+
if for_write:
|
|
286
|
+
if tbl is not None:
|
|
287
|
+
assert tbl.tbl_id in self._x_locked_tbl_ids, f'{tbl.tbl_id} not in {self._x_locked_tbl_ids}'
|
|
288
|
+
elif tbl_id is not None:
|
|
289
|
+
assert tbl_id in self._x_locked_tbl_ids, f'{tbl_id} not in {self._x_locked_tbl_ids}'
|
|
244
290
|
yield Env.get().conn
|
|
245
291
|
return
|
|
246
292
|
|
|
@@ -252,33 +298,66 @@ class Catalog:
|
|
|
252
298
|
# )
|
|
253
299
|
# _logger.debug(f'begin_xact(): {tv_msg}')
|
|
254
300
|
num_retries = 0
|
|
301
|
+
pending_ops_tbl_id: Optional[UUID] = None
|
|
302
|
+
has_exc = False # True if we exited the 'with ...begin_xact()' block with an exception
|
|
255
303
|
while True:
|
|
304
|
+
if pending_ops_tbl_id is not None:
|
|
305
|
+
Env.get().console_logger.debug(f'begin_xact(): finalizing pending ops for {pending_ops_tbl_id}')
|
|
306
|
+
self._finalize_pending_ops(pending_ops_tbl_id)
|
|
307
|
+
pending_ops_tbl_id = None
|
|
308
|
+
|
|
256
309
|
try:
|
|
257
|
-
self._in_write_xact =
|
|
310
|
+
self._in_write_xact = for_write
|
|
258
311
|
self._x_locked_tbl_ids = set()
|
|
312
|
+
self._modified_tvs = set()
|
|
259
313
|
self._column_dependents = None
|
|
314
|
+
has_exc = False
|
|
260
315
|
|
|
261
|
-
with Env.get().begin_xact() as conn:
|
|
262
|
-
if tbl is not None:
|
|
316
|
+
with Env.get().begin_xact(for_write=for_write) as conn:
|
|
317
|
+
if tbl is not None or tbl_id is not None:
|
|
263
318
|
try:
|
|
264
|
-
|
|
265
|
-
|
|
266
|
-
|
|
267
|
-
|
|
268
|
-
|
|
269
|
-
|
|
270
|
-
|
|
271
|
-
|
|
272
|
-
|
|
273
|
-
|
|
319
|
+
target: Optional[TableVersionHandle] = None
|
|
320
|
+
if tbl is not None:
|
|
321
|
+
if self._acquire_path_locks(
|
|
322
|
+
tbl=tbl,
|
|
323
|
+
for_write=for_write,
|
|
324
|
+
lock_mutable_tree=lock_mutable_tree,
|
|
325
|
+
check_pending_ops=finalize_pending_ops,
|
|
326
|
+
):
|
|
327
|
+
target = tbl.tbl_version
|
|
328
|
+
else:
|
|
329
|
+
target = self._acquire_tbl_lock(
|
|
330
|
+
tbl_id=tbl_id,
|
|
331
|
+
for_write=for_write,
|
|
332
|
+
lock_mutable_tree=lock_mutable_tree,
|
|
333
|
+
raise_if_not_exists=True,
|
|
334
|
+
check_pending_ops=finalize_pending_ops,
|
|
335
|
+
)
|
|
336
|
+
|
|
337
|
+
if target is None:
|
|
338
|
+
# didn't get the write lock
|
|
339
|
+
for_write = False
|
|
340
|
+
elif for_write:
|
|
341
|
+
# we know at this point that target is mutable because we got the X-lock
|
|
342
|
+
if lock_mutable_tree and not target.is_snapshot:
|
|
343
|
+
self._x_locked_tbl_ids = self._get_mutable_tree(target.id)
|
|
274
344
|
self._compute_column_dependents(self._x_locked_tbl_ids)
|
|
275
345
|
else:
|
|
276
|
-
self._x_locked_tbl_ids = {
|
|
346
|
+
self._x_locked_tbl_ids = {target.id}
|
|
277
347
|
if _logger.isEnabledFor(logging.DEBUG):
|
|
278
348
|
# validate only when we don't see errors
|
|
279
349
|
self.validate()
|
|
280
350
|
|
|
351
|
+
except PendingTableOpsError as e:
|
|
352
|
+
has_exc = True
|
|
353
|
+
if finalize_pending_ops:
|
|
354
|
+
# we remember which table id to finalize
|
|
355
|
+
pending_ops_tbl_id = e.tbl_id
|
|
356
|
+
# raise to abort the transaction
|
|
357
|
+
raise
|
|
358
|
+
|
|
281
359
|
except sql.exc.DBAPIError as e:
|
|
360
|
+
has_exc = True
|
|
282
361
|
if isinstance(
|
|
283
362
|
e.orig, (psycopg.errors.SerializationFailure, psycopg.errors.LockNotAvailable)
|
|
284
363
|
) and (num_retries < _MAX_RETRIES or _MAX_RETRIES == -1):
|
|
@@ -289,11 +368,20 @@ class Catalog:
|
|
|
289
368
|
else:
|
|
290
369
|
raise
|
|
291
370
|
|
|
292
|
-
self._in_write_xact = for_write
|
|
293
371
|
yield conn
|
|
294
372
|
return
|
|
295
373
|
|
|
374
|
+
except PendingTableOpsError:
|
|
375
|
+
has_exc = True
|
|
376
|
+
if pending_ops_tbl_id is not None:
|
|
377
|
+
# the next iteration of the loop will deal with pending ops for this table id
|
|
378
|
+
continue
|
|
379
|
+
else:
|
|
380
|
+
# we got this exception after getting the initial table locks and therefore need to abort
|
|
381
|
+
raise
|
|
382
|
+
|
|
296
383
|
except sql.exc.DBAPIError as e:
|
|
384
|
+
has_exc = True
|
|
297
385
|
# we got some db error during the actual operation (not just while trying to get locks on the metadata
|
|
298
386
|
# records): we convert these into Errors, if asked to do so, and abort
|
|
299
387
|
# TODO: what other concurrency-related exceptions should we expect?
|
|
@@ -301,12 +389,19 @@ class Catalog:
|
|
|
301
389
|
# we always convert UndefinedTable exceptions (they can't be retried)
|
|
302
390
|
if isinstance(e.orig, psycopg.errors.UndefinedTable):
|
|
303
391
|
# the table got dropped in the middle of the table operation
|
|
304
|
-
|
|
392
|
+
tbl_name = tbl.tbl_name() if tbl is not None else str(tbl_id) if tbl_id is not None else '?'
|
|
393
|
+
_logger.debug(f'Exception: undefined table ({tbl_name}): Caught {type(e.orig)}: {e!r}')
|
|
305
394
|
assert tbl is not None
|
|
306
|
-
raise excs.Error(f'Table was dropped: {
|
|
395
|
+
raise excs.Error(f'Table was dropped: {tbl_name}') from None
|
|
307
396
|
elif isinstance(e.orig, psycopg.errors.SerializationFailure) and convert_db_excs:
|
|
308
397
|
# we still got a serialization error, despite getting x-locks at the beginning
|
|
309
|
-
msg
|
|
398
|
+
msg: str
|
|
399
|
+
if tbl is not None:
|
|
400
|
+
msg = f'{tbl.tbl_name()} ({tbl.tbl_id})'
|
|
401
|
+
elif tbl_id is not None:
|
|
402
|
+
msg = f'{tbl_id}'
|
|
403
|
+
else:
|
|
404
|
+
msg = ''
|
|
310
405
|
_logger.debug(f'Exception: serialization failure: {msg} ({e})')
|
|
311
406
|
raise excs.Error(
|
|
312
407
|
'That Pixeltable operation could not be completed because it conflicted with another '
|
|
@@ -316,6 +411,10 @@ class Catalog:
|
|
|
316
411
|
else:
|
|
317
412
|
raise
|
|
318
413
|
|
|
414
|
+
except:
|
|
415
|
+
has_exc = True
|
|
416
|
+
raise
|
|
417
|
+
|
|
319
418
|
finally:
|
|
320
419
|
self._in_write_xact = False
|
|
321
420
|
self._x_locked_tbl_ids = set()
|
|
@@ -327,12 +426,24 @@ class Catalog:
|
|
|
327
426
|
_logger.debug(f'invalidating table version {tv.id}:None (tv={id(tv):x})')
|
|
328
427
|
tv.is_validated = False
|
|
329
428
|
|
|
429
|
+
if has_exc:
|
|
430
|
+
# purge all modified TableVersion instances, we can't guarantee they are still consistent with the
|
|
431
|
+
# stored metadata
|
|
432
|
+
for handle in self._modified_tvs:
|
|
433
|
+
self._clear_tv_cache(handle.id, handle.effective_version)
|
|
434
|
+
self._modified_tvs = set()
|
|
435
|
+
|
|
330
436
|
@property
|
|
331
437
|
def in_write_xact(self) -> bool:
|
|
332
438
|
return self._in_write_xact
|
|
333
439
|
|
|
334
440
|
def _acquire_path_locks(
|
|
335
|
-
self,
|
|
441
|
+
self,
|
|
442
|
+
*,
|
|
443
|
+
tbl: TableVersionPath,
|
|
444
|
+
for_write: bool = False,
|
|
445
|
+
lock_mutable_tree: bool = False,
|
|
446
|
+
check_pending_ops: Optional[bool] = None,
|
|
336
447
|
) -> bool:
|
|
337
448
|
"""
|
|
338
449
|
Path locking protocol:
|
|
@@ -341,33 +452,49 @@ class Catalog:
|
|
|
341
452
|
- refresh cached TableVersion of tbl or get X-lock, depending on for_write
|
|
342
453
|
- if lock_mutable_tree, also X-lock all mutable views of tbl
|
|
343
454
|
|
|
344
|
-
Returns False if trying to lock a pure snapshot with for_write == True
|
|
345
455
|
Raises Error if tbl doesn't exist.
|
|
456
|
+
Return False if the lock couldn't be acquired (X-lock on a non-mutable table), True otherwise.
|
|
346
457
|
"""
|
|
347
|
-
|
|
348
|
-
|
|
349
|
-
|
|
458
|
+
path_handles = tbl.get_tbl_versions()
|
|
459
|
+
read_handles = path_handles[:0:-1] if for_write else path_handles[::-1]
|
|
460
|
+
for handle in read_handles:
|
|
461
|
+
# update cache
|
|
462
|
+
_ = self.get_tbl_version(handle.id, handle.effective_version, validate_initialized=True)
|
|
350
463
|
if not for_write:
|
|
351
464
|
return True # nothing left to lock
|
|
352
|
-
|
|
465
|
+
handle = self._acquire_tbl_lock(
|
|
466
|
+
tbl_id=tbl.tbl_id,
|
|
467
|
+
for_write=True,
|
|
468
|
+
lock_mutable_tree=lock_mutable_tree,
|
|
469
|
+
raise_if_not_exists=True,
|
|
470
|
+
check_pending_ops=check_pending_ops,
|
|
471
|
+
)
|
|
472
|
+
# update cache
|
|
473
|
+
_ = self.get_tbl_version(path_handles[0].id, path_handles[0].effective_version, validate_initialized=True)
|
|
474
|
+
return handle is not None
|
|
353
475
|
|
|
354
|
-
def
|
|
476
|
+
def _acquire_tbl_lock(
|
|
355
477
|
self,
|
|
356
478
|
*,
|
|
479
|
+
for_write: bool,
|
|
357
480
|
tbl_id: Optional[UUID] = None,
|
|
358
481
|
dir_id: Optional[UUID] = None,
|
|
359
482
|
tbl_name: Optional[str] = None,
|
|
360
483
|
lock_mutable_tree: bool = False,
|
|
361
|
-
raise_if_not_exists: bool =
|
|
362
|
-
|
|
363
|
-
|
|
484
|
+
raise_if_not_exists: bool = True,
|
|
485
|
+
check_pending_ops: Optional[bool] = None,
|
|
486
|
+
) -> Optional[TableVersionHandle]:
|
|
487
|
+
"""
|
|
488
|
+
For writes: force acquisition of an X-lock on a Table record via a blind update.
|
|
364
489
|
|
|
365
490
|
Either tbl_id or dir_id/tbl_name need to be specified.
|
|
366
491
|
Returns True if the table was locked, False if it was a snapshot or not found.
|
|
367
492
|
If lock_mutable_tree, recursively locks all mutable views of the table.
|
|
368
493
|
|
|
369
|
-
Returns
|
|
494
|
+
Returns a handle to what was locked, None if the lock couldn't be acquired (eg, X-lock on a non-mutable table).
|
|
370
495
|
"""
|
|
496
|
+
assert (tbl_id is not None) != (dir_id is not None and tbl_name is not None)
|
|
497
|
+
assert (dir_id is None) == (tbl_name is None)
|
|
371
498
|
where_clause: sql.ColumnElement
|
|
372
499
|
if tbl_id is not None:
|
|
373
500
|
where_clause = schema.Table.id == tbl_id
|
|
@@ -378,26 +505,130 @@ class Catalog:
|
|
|
378
505
|
where_clause = sql.and_(where_clause, schema.Table.md['user'].astext == Env.get().user)
|
|
379
506
|
|
|
380
507
|
conn = Env.get().conn
|
|
381
|
-
|
|
508
|
+
q = sql.select(schema.Table).where(where_clause)
|
|
509
|
+
if for_write:
|
|
510
|
+
q = q.with_for_update(nowait=True)
|
|
511
|
+
row = conn.execute(q).one_or_none()
|
|
382
512
|
if row is None:
|
|
383
513
|
if raise_if_not_exists:
|
|
384
514
|
raise excs.Error(self._dropped_tbl_error_msg(tbl_id))
|
|
385
|
-
return
|
|
386
|
-
|
|
387
|
-
|
|
388
|
-
|
|
389
|
-
|
|
390
|
-
if
|
|
391
|
-
|
|
392
|
-
|
|
393
|
-
|
|
394
|
-
|
|
395
|
-
|
|
396
|
-
|
|
515
|
+
return None # nothing to lock
|
|
516
|
+
tbl_md = schema.md_from_dict(schema.TableMd, row.md)
|
|
517
|
+
if for_write and tbl_md.is_mutable:
|
|
518
|
+
conn.execute(sql.update(schema.Table).values(lock_dummy=1).where(where_clause))
|
|
519
|
+
|
|
520
|
+
if check_pending_ops:
|
|
521
|
+
# check for pending ops after getting table lock
|
|
522
|
+
pending_ops_q = sql.select(sql.func.count()).where(schema.PendingTableOp.tbl_id == row.id)
|
|
523
|
+
has_pending_ops = conn.execute(pending_ops_q).scalar() > 0
|
|
524
|
+
if has_pending_ops:
|
|
525
|
+
raise PendingTableOpsError(row.id)
|
|
526
|
+
|
|
527
|
+
if for_write and not tbl_md.is_mutable:
|
|
528
|
+
return None # nothing to lock
|
|
529
|
+
|
|
530
|
+
effective_version = tbl_md.current_version if tbl_md.is_snapshot else None
|
|
531
|
+
if tbl_md.is_mutable and lock_mutable_tree:
|
|
532
|
+
# also lock mutable views
|
|
533
|
+
tv = self.get_tbl_version(tbl_id, effective_version, validate_initialized=True)
|
|
534
|
+
for view in tv.mutable_views:
|
|
535
|
+
self._acquire_tbl_lock(
|
|
536
|
+
for_write=for_write,
|
|
537
|
+
tbl_id=view.id,
|
|
538
|
+
lock_mutable_tree=lock_mutable_tree,
|
|
539
|
+
raise_if_not_exists=raise_if_not_exists,
|
|
540
|
+
check_pending_ops=check_pending_ops,
|
|
541
|
+
)
|
|
542
|
+
return TableVersionHandle(tbl_id, effective_version)
|
|
543
|
+
|
|
544
|
+
def _finalize_pending_ops(self, tbl_id: UUID) -> None:
|
|
545
|
+
"""Finalizes all pending ops for the given table."""
|
|
546
|
+
num_retries = 0
|
|
547
|
+
while True:
|
|
548
|
+
try:
|
|
549
|
+
tbl_version: int
|
|
550
|
+
op: Optional[TableOp] = None
|
|
551
|
+
delete_next_op_stmt: sql.Delete
|
|
552
|
+
reset_has_pending_stmt: sql.Update
|
|
553
|
+
with self.begin_xact(
|
|
554
|
+
tbl_id=tbl_id, for_write=True, convert_db_excs=False, finalize_pending_ops=False
|
|
555
|
+
) as conn:
|
|
556
|
+
q = (
|
|
557
|
+
sql.select(schema.Table.md, schema.PendingTableOp)
|
|
558
|
+
.select_from(schema.Table)
|
|
559
|
+
.join(schema.PendingTableOp)
|
|
560
|
+
.where(schema.Table.id == tbl_id)
|
|
561
|
+
.where(schema.PendingTableOp.tbl_id == tbl_id)
|
|
562
|
+
.order_by(schema.PendingTableOp.op_sn)
|
|
563
|
+
.limit(1)
|
|
564
|
+
.with_for_update()
|
|
565
|
+
)
|
|
566
|
+
row = conn.execute(q).one_or_none()
|
|
567
|
+
if row is None:
|
|
568
|
+
return
|
|
569
|
+
tbl_version = row.md.get('current_version')
|
|
570
|
+
op = schema.md_from_dict(TableOp, row.op)
|
|
571
|
+
delete_next_op_stmt = sql.delete(schema.PendingTableOp).where(
|
|
572
|
+
schema.PendingTableOp.tbl_id == tbl_id, schema.PendingTableOp.op_sn == row.op_sn
|
|
573
|
+
)
|
|
574
|
+
reset_has_pending_stmt = (
|
|
575
|
+
sql.update(schema.Table)
|
|
576
|
+
.where(schema.Table.id == tbl_id)
|
|
577
|
+
.values(md=schema.Table.md.op('||')({'has_pending_ops': False}))
|
|
578
|
+
)
|
|
579
|
+
|
|
580
|
+
if op.needs_xact:
|
|
581
|
+
tv = self.get_tbl_version(
|
|
582
|
+
tbl_id, tbl_version, check_pending_ops=False, validate_initialized=True
|
|
583
|
+
)
|
|
584
|
+
tv.exec_op(op)
|
|
585
|
+
conn.execute(delete_next_op_stmt)
|
|
586
|
+
if op.op_sn == op.num_ops - 1:
|
|
587
|
+
conn.execute(reset_has_pending_stmt)
|
|
588
|
+
continue
|
|
589
|
+
|
|
590
|
+
# this op runs outside of a transaction
|
|
591
|
+
tv = self.get_tbl_version(tbl_id, tbl_version, check_pending_ops=False, validate_initialized=True)
|
|
592
|
+
tv.exec_op(op)
|
|
593
|
+
with self.begin_xact(
|
|
594
|
+
tbl_id=tbl_id, for_write=True, convert_db_excs=False, finalize_pending_ops=False
|
|
595
|
+
) as conn:
|
|
596
|
+
conn.execute(delete_next_op_stmt)
|
|
597
|
+
if op.op_sn == op.num_ops - 1:
|
|
598
|
+
conn.execute(reset_has_pending_stmt)
|
|
599
|
+
|
|
600
|
+
except (sql.exc.DBAPIError, sql.exc.OperationalError) as e:
|
|
601
|
+
# TODO: why are we still seeing these here, instead of them getting taken care of by the retry
|
|
602
|
+
# logic of begin_xact()?
|
|
603
|
+
if isinstance(e.orig, (psycopg.errors.SerializationFailure, psycopg.errors.LockNotAvailable)):
|
|
604
|
+
num_retries += 1
|
|
605
|
+
log_msg: str
|
|
606
|
+
if op is not None:
|
|
607
|
+
log_msg = f'finalize_pending_ops(): retrying ({num_retries}) op {op!s} after {type(e.orig)}'
|
|
608
|
+
else:
|
|
609
|
+
log_msg = f'finalize_pending_ops(): retrying ({num_retries}) after {type(e.orig)}'
|
|
610
|
+
Env.get().console_logger.debug(log_msg)
|
|
611
|
+
time.sleep(random.uniform(0.1, 0.5))
|
|
612
|
+
continue
|
|
613
|
+
else:
|
|
614
|
+
raise
|
|
615
|
+
except Exception as e:
|
|
616
|
+
Env.get().console_logger.debug(f'finalize_pending_ops(): caught {e}')
|
|
617
|
+
raise
|
|
618
|
+
|
|
619
|
+
num_retries = 0
|
|
620
|
+
|
|
621
|
+
def _debug_str(self) -> str:
|
|
622
|
+
tv_str = '\n'.join(str(k) for k in self._tbl_versions)
|
|
623
|
+
tbl_str = '\n'.join(str(k) for k in self._tbls)
|
|
624
|
+
return f'tbl_versions:\n{tv_str}\ntbls:\n{tbl_str}'
|
|
397
625
|
|
|
398
626
|
def _get_mutable_tree(self, tbl_id: UUID) -> set[UUID]:
|
|
399
627
|
"""Returns ids of all tables that form the tree of mutable views starting at tbl_id; includes the root."""
|
|
400
|
-
|
|
628
|
+
assert (tbl_id, None) in self._tbl_versions, (
|
|
629
|
+
f'({tbl_id}, None) not in {self._tbl_versions.keys()}\n{self._debug_str()}'
|
|
630
|
+
)
|
|
631
|
+
tv = self.get_tbl_version(tbl_id, None, validate_initialized=True)
|
|
401
632
|
result: set[UUID] = {tv.id}
|
|
402
633
|
for view in tv.mutable_views:
|
|
403
634
|
result.update(self._get_mutable_tree(view.id))
|
|
@@ -408,7 +639,9 @@ class Catalog:
|
|
|
408
639
|
assert self._column_dependents is None
|
|
409
640
|
self._column_dependents = defaultdict(set)
|
|
410
641
|
for tbl_id in mutable_tree:
|
|
411
|
-
assert tbl_id in self._column_dependencies
|
|
642
|
+
assert tbl_id in self._column_dependencies, (
|
|
643
|
+
f'{tbl_id} not in {self._column_dependencies.keys()}\n{self._debug_str()}'
|
|
644
|
+
)
|
|
412
645
|
for col, dependencies in self._column_dependencies[tbl_id].items():
|
|
413
646
|
for dependency in dependencies:
|
|
414
647
|
if dependency.tbl_id not in mutable_tree:
|
|
@@ -416,13 +649,25 @@ class Catalog:
|
|
|
416
649
|
dependents = self._column_dependents[dependency]
|
|
417
650
|
dependents.add(col)
|
|
418
651
|
|
|
652
|
+
def record_column_dependencies(self, tbl_version: TableVersion) -> None:
|
|
653
|
+
"""Update self._column_dependencies. Only valid for mutable versions."""
|
|
654
|
+
from pixeltable.exprs import Expr
|
|
655
|
+
|
|
656
|
+
assert tbl_version.is_mutable
|
|
657
|
+
dependencies: dict[QColumnId, set[QColumnId]] = {}
|
|
658
|
+
for col in tbl_version.cols_by_id.values():
|
|
659
|
+
if col.value_expr_dict is None:
|
|
660
|
+
continue
|
|
661
|
+
dependencies[QColumnId(tbl_version.id, col.id)] = Expr.get_refd_column_ids(col.value_expr_dict)
|
|
662
|
+
self._column_dependencies[tbl_version.id] = dependencies
|
|
663
|
+
|
|
419
664
|
def get_column_dependents(self, tbl_id: UUID, col_id: int) -> set[Column]:
|
|
420
665
|
"""Return all Columns that transitively depend on the given column."""
|
|
421
666
|
assert self._column_dependents is not None
|
|
422
667
|
dependents = self._column_dependents[QColumnId(tbl_id, col_id)]
|
|
423
668
|
result: set[Column] = set()
|
|
424
669
|
for dependent in dependents:
|
|
425
|
-
tv = self.get_tbl_version(dependent.tbl_id, None)
|
|
670
|
+
tv = self.get_tbl_version(dependent.tbl_id, None, validate_initialized=True)
|
|
426
671
|
col = tv.cols_by_id[dependent.col_id]
|
|
427
672
|
result.add(col)
|
|
428
673
|
return result
|
|
@@ -471,7 +716,7 @@ class Catalog:
|
|
|
471
716
|
dir_entries: dict[str, Catalog.DirEntry]
|
|
472
717
|
table: Optional[schema.Table]
|
|
473
718
|
|
|
474
|
-
@
|
|
719
|
+
@retry_loop(for_write=False)
|
|
475
720
|
def get_dir_contents(self, dir_path: Path, recursive: bool = False) -> dict[str, DirEntry]:
|
|
476
721
|
dir = self._get_schema_object(dir_path, expected=Dir, raise_if_not_exists=True)
|
|
477
722
|
return self._get_dir_contents(dir._id, recursive=recursive)
|
|
@@ -498,7 +743,7 @@ class Catalog:
|
|
|
498
743
|
|
|
499
744
|
return result
|
|
500
745
|
|
|
501
|
-
@
|
|
746
|
+
@retry_loop(for_write=True)
|
|
502
747
|
def move(self, path: Path, new_path: Path) -> None:
|
|
503
748
|
self._move(path, new_path)
|
|
504
749
|
|
|
@@ -540,6 +785,7 @@ class Catalog:
|
|
|
540
785
|
- if both add and drop (= two directories are involved), lock the directories in a pre-determined order
|
|
541
786
|
(in this case, by name) in order to prevent deadlocks between concurrent directory modifications
|
|
542
787
|
"""
|
|
788
|
+
assert drop_expected in (None, Table, Dir), drop_expected
|
|
543
789
|
assert (add_dir_path is None) == (add_name is None)
|
|
544
790
|
assert (drop_dir_path is None) == (drop_name is None)
|
|
545
791
|
dir_paths: set[Path] = set()
|
|
@@ -553,7 +799,7 @@ class Catalog:
|
|
|
553
799
|
for p in sorted(dir_paths):
|
|
554
800
|
dir = self._get_dir(p, lock_dir=True)
|
|
555
801
|
if dir is None:
|
|
556
|
-
raise excs.Error(f'Directory {
|
|
802
|
+
raise excs.Error(f'Directory {p!r} does not exist.')
|
|
557
803
|
if p == add_dir_path:
|
|
558
804
|
add_dir = dir
|
|
559
805
|
if p == drop_dir_path:
|
|
@@ -564,19 +810,17 @@ class Catalog:
|
|
|
564
810
|
add_obj = self._get_dir_entry(add_dir.id, add_name, lock_entry=True)
|
|
565
811
|
if add_obj is not None and raise_if_exists:
|
|
566
812
|
add_path = add_dir_path.append(add_name)
|
|
567
|
-
raise excs.Error(f'Path {
|
|
813
|
+
raise excs.Error(f'Path {add_path!r} already exists.')
|
|
568
814
|
|
|
569
815
|
drop_obj: Optional[SchemaObject] = None
|
|
570
816
|
if drop_dir is not None:
|
|
571
817
|
drop_path = drop_dir_path.append(drop_name)
|
|
572
818
|
drop_obj = self._get_dir_entry(drop_dir.id, drop_name, lock_entry=True)
|
|
573
819
|
if drop_obj is None and raise_if_not_exists:
|
|
574
|
-
raise excs.Error(f'Path {
|
|
820
|
+
raise excs.Error(f'Path {drop_path!r} does not exist.')
|
|
575
821
|
if drop_obj is not None and drop_expected is not None and not isinstance(drop_obj, drop_expected):
|
|
576
|
-
|
|
577
|
-
|
|
578
|
-
f'but is a {type(drop_obj)._display_name()}'
|
|
579
|
-
)
|
|
822
|
+
expected_name = 'table' if drop_expected is Table else 'directory'
|
|
823
|
+
raise excs.Error(f'{drop_path!r} needs to be a {expected_name} but is a {drop_obj._display_name()}')
|
|
580
824
|
|
|
581
825
|
add_dir_obj = Dir(add_dir.id, add_dir.parent_id, add_dir.md['name']) if add_dir is not None else None
|
|
582
826
|
return add_obj, add_dir_obj, drop_obj
|
|
@@ -602,7 +846,7 @@ class Catalog:
|
|
|
602
846
|
|
|
603
847
|
# check for table
|
|
604
848
|
if lock_entry:
|
|
605
|
-
self.
|
|
849
|
+
self._acquire_tbl_lock(for_write=True, dir_id=dir_id, raise_if_not_exists=False, tbl_name=name)
|
|
606
850
|
q = sql.select(schema.Table.id).where(
|
|
607
851
|
schema.Table.dir_id == dir_id,
|
|
608
852
|
schema.Table.md['name'].astext == name,
|
|
@@ -633,12 +877,12 @@ class Catalog:
|
|
|
633
877
|
- raise_if_not_exists is True and the path does not exist
|
|
634
878
|
- expected is not None and the existing object has a different type
|
|
635
879
|
"""
|
|
880
|
+
assert expected in (None, Table, Dir), expected
|
|
881
|
+
|
|
636
882
|
if path.is_root:
|
|
637
883
|
# the root dir
|
|
638
884
|
if expected is not None and expected is not Dir:
|
|
639
|
-
raise excs.Error(
|
|
640
|
-
f'{str(path)!r} needs to be a {expected._display_name()} but is a {Dir._display_name()}'
|
|
641
|
-
)
|
|
885
|
+
raise excs.Error(f'{path!r} needs to be a table but is a dir')
|
|
642
886
|
dir = self._get_dir(path, lock_dir=lock_obj)
|
|
643
887
|
if dir is None:
|
|
644
888
|
raise excs.Error(f'Unknown user: {Env.get().user}')
|
|
@@ -647,33 +891,32 @@ class Catalog:
|
|
|
647
891
|
parent_path = path.parent
|
|
648
892
|
parent_dir = self._get_dir(parent_path, lock_dir=lock_parent)
|
|
649
893
|
if parent_dir is None:
|
|
650
|
-
raise excs.Error(f'Directory {
|
|
894
|
+
raise excs.Error(f'Directory {parent_path!r} does not exist.')
|
|
651
895
|
obj = self._get_dir_entry(parent_dir.id, path.name, lock_entry=lock_obj)
|
|
652
896
|
|
|
653
897
|
if obj is None and raise_if_not_exists:
|
|
654
|
-
raise excs.Error(f'Path {
|
|
898
|
+
raise excs.Error(f'Path {path!r} does not exist.')
|
|
655
899
|
elif obj is not None and raise_if_exists:
|
|
656
|
-
raise excs.Error(f'Path {
|
|
900
|
+
raise excs.Error(f'Path {path!r} is an existing {obj._display_name()}.')
|
|
657
901
|
elif obj is not None and expected is not None and not isinstance(obj, expected):
|
|
658
|
-
|
|
659
|
-
|
|
660
|
-
)
|
|
902
|
+
expected_name = 'table' if expected is Table else 'directory'
|
|
903
|
+
raise excs.Error(f'{path!r} needs to be a {expected_name} but is a {obj._display_name()}.')
|
|
661
904
|
return obj
|
|
662
905
|
|
|
663
906
|
def get_table_by_id(self, tbl_id: UUID) -> Optional[Table]:
|
|
664
|
-
|
|
665
|
-
|
|
666
|
-
|
|
667
|
-
|
|
668
|
-
|
|
669
|
-
|
|
670
|
-
|
|
671
|
-
|
|
672
|
-
|
|
673
|
-
|
|
674
|
-
return
|
|
675
|
-
|
|
676
|
-
@
|
|
907
|
+
"""Must be executed inside a transaction. Might raise PendingTableOpsError."""
|
|
908
|
+
if tbl_id in self._tbls:
|
|
909
|
+
return self._tbls[tbl_id]
|
|
910
|
+
tbl = self._load_tbl(tbl_id)
|
|
911
|
+
# # if this is a mutable table, we also need to have its mutable views loaded, in order to track column
|
|
912
|
+
# # dependencies
|
|
913
|
+
# tbl_version = tbl._tbl_version.get()
|
|
914
|
+
# if tbl_version.is_mutable:
|
|
915
|
+
# for v in tbl_version.mutable_views:
|
|
916
|
+
# _ = self.get_table_by_id(v.id)
|
|
917
|
+
return tbl
|
|
918
|
+
|
|
919
|
+
@retry_loop(for_write=True)
|
|
677
920
|
def create_table(
|
|
678
921
|
self,
|
|
679
922
|
path: Path,
|
|
@@ -706,7 +949,6 @@ class Catalog:
|
|
|
706
949
|
self._tbls[tbl._id] = tbl
|
|
707
950
|
return tbl
|
|
708
951
|
|
|
709
|
-
@_retry_loop(for_write=True)
|
|
710
952
|
def create_view(
|
|
711
953
|
self,
|
|
712
954
|
path: Path,
|
|
@@ -722,49 +964,68 @@ class Catalog:
|
|
|
722
964
|
media_validation: MediaValidation,
|
|
723
965
|
if_exists: IfExistsParam,
|
|
724
966
|
) -> Table:
|
|
725
|
-
|
|
967
|
+
@retry_loop(for_write=True)
|
|
968
|
+
def create_fn() -> UUID:
|
|
969
|
+
if not is_snapshot and base.is_mutable():
|
|
970
|
+
# this is a mutable view of a mutable base; X-lock the base and advance its view_sn before adding
|
|
971
|
+
# the view
|
|
972
|
+
self._acquire_tbl_lock(tbl_id=base.tbl_id, for_write=True)
|
|
973
|
+
base_tv = self.get_tbl_version(base.tbl_id, None, validate_initialized=True)
|
|
974
|
+
base_tv.tbl_md.view_sn += 1
|
|
975
|
+
result = Env.get().conn.execute(
|
|
976
|
+
sql.update(schema.Table)
|
|
977
|
+
.values({schema.Table.md: dataclasses.asdict(base_tv.tbl_md)})
|
|
978
|
+
.where(schema.Table.id == base.tbl_id)
|
|
979
|
+
)
|
|
980
|
+
assert result.rowcount == 1, result.rowcount
|
|
726
981
|
|
|
727
|
-
|
|
728
|
-
|
|
729
|
-
|
|
730
|
-
|
|
731
|
-
base_tv.tbl_md.view_sn += 1
|
|
732
|
-
result = Env.get().conn.execute(
|
|
733
|
-
sql.update(schema.Table)
|
|
734
|
-
.values({schema.Table.md: dataclasses.asdict(base_tv.tbl_md)})
|
|
735
|
-
.where(schema.Table.id == base.tbl_id)
|
|
736
|
-
)
|
|
737
|
-
assert result.rowcount == 1, result.rowcount
|
|
982
|
+
existing = self._handle_path_collision(path, View, is_snapshot, if_exists)
|
|
983
|
+
if existing is not None:
|
|
984
|
+
assert isinstance(existing, View)
|
|
985
|
+
return existing._id
|
|
738
986
|
|
|
739
|
-
|
|
740
|
-
|
|
741
|
-
|
|
742
|
-
|
|
743
|
-
|
|
744
|
-
|
|
745
|
-
|
|
746
|
-
|
|
747
|
-
|
|
748
|
-
|
|
749
|
-
|
|
750
|
-
|
|
751
|
-
|
|
752
|
-
|
|
753
|
-
|
|
754
|
-
|
|
755
|
-
|
|
756
|
-
|
|
757
|
-
|
|
758
|
-
|
|
759
|
-
|
|
760
|
-
|
|
761
|
-
|
|
762
|
-
|
|
763
|
-
|
|
764
|
-
)
|
|
765
|
-
|
|
766
|
-
|
|
767
|
-
|
|
987
|
+
dir = self._get_schema_object(path.parent, expected=Dir, raise_if_not_exists=True)
|
|
988
|
+
assert dir is not None
|
|
989
|
+
if iterator is None:
|
|
990
|
+
iterator_class, iterator_args = None, None
|
|
991
|
+
else:
|
|
992
|
+
iterator_class, iterator_args = iterator
|
|
993
|
+
md, ops = View._create(
|
|
994
|
+
dir._id,
|
|
995
|
+
path.name,
|
|
996
|
+
base=base,
|
|
997
|
+
select_list=select_list,
|
|
998
|
+
additional_columns=additional_columns,
|
|
999
|
+
predicate=where,
|
|
1000
|
+
sample_clause=sample_clause,
|
|
1001
|
+
is_snapshot=is_snapshot,
|
|
1002
|
+
iterator_cls=iterator_class,
|
|
1003
|
+
iterator_args=iterator_args,
|
|
1004
|
+
num_retained_versions=num_retained_versions,
|
|
1005
|
+
comment=comment,
|
|
1006
|
+
media_validation=media_validation,
|
|
1007
|
+
)
|
|
1008
|
+
tbl_id = UUID(md.tbl_md.tbl_id)
|
|
1009
|
+
self.store_tbl_md(tbl_id, dir._id, md.tbl_md, md.version_md, md.schema_version_md, ops)
|
|
1010
|
+
return tbl_id
|
|
1011
|
+
|
|
1012
|
+
view_id = create_fn()
|
|
1013
|
+
if not is_snapshot and base.is_mutable():
|
|
1014
|
+
# invalidate base's TableVersion instance, so that it gets reloaded with the new mutable view
|
|
1015
|
+
self._clear_tv_cache(base.tbl_id, base.tbl_version.effective_version)
|
|
1016
|
+
# base_tv = self.get_tbl_version(base.tbl_id, base.tbl_version.effective_version, validate_initialized=True)
|
|
1017
|
+
# view_handle = TableVersionHandle(view_id, effective_version=None)
|
|
1018
|
+
# base_tv.mutable_views.add(view_handle)
|
|
1019
|
+
|
|
1020
|
+
# finalize pending ops
|
|
1021
|
+
with self.begin_xact(tbl_id=view_id, for_write=True, finalize_pending_ops=True):
|
|
1022
|
+
return self.get_table_by_id(view_id)
|
|
1023
|
+
|
|
1024
|
+
def _clear_tv_cache(self, tbl_id: UUID, effective_version: Optional[int]) -> None:
|
|
1025
|
+
if (tbl_id, effective_version) in self._tbl_versions:
|
|
1026
|
+
tv = self._tbl_versions[tbl_id, effective_version]
|
|
1027
|
+
tv.is_validated = False
|
|
1028
|
+
del self._tbl_versions[tbl_id, effective_version]
|
|
768
1029
|
|
|
769
1030
|
def create_replica(self, path: Path, md: list[schema.FullTableMd]) -> None:
|
|
770
1031
|
"""
|
|
@@ -786,8 +1047,8 @@ class Catalog:
|
|
|
786
1047
|
# Ensure that the system directory exists.
|
|
787
1048
|
self._create_dir(Path('_system', allow_system_paths=True), if_exists=IfExistsParam.IGNORE, parents=False)
|
|
788
1049
|
|
|
789
|
-
# Now check to see if this table
|
|
790
|
-
existing =
|
|
1050
|
+
# Now check to see if this table already exists in the catalog.
|
|
1051
|
+
existing = self.get_table_by_id(tbl_id)
|
|
791
1052
|
if existing is not None:
|
|
792
1053
|
existing_path = Path(existing._path(), allow_system_paths=True)
|
|
793
1054
|
if existing_path != path:
|
|
@@ -808,7 +1069,7 @@ class Catalog:
|
|
|
808
1069
|
# table being replicated.
|
|
809
1070
|
for ancestor_md in md[:0:-1]:
|
|
810
1071
|
ancestor_id = UUID(ancestor_md.tbl_md.tbl_id)
|
|
811
|
-
replica =
|
|
1072
|
+
replica = self.get_table_by_id(ancestor_id)
|
|
812
1073
|
replica_path: Path
|
|
813
1074
|
if replica is None:
|
|
814
1075
|
# We've never seen this table before. Create a new anonymous system table for it.
|
|
@@ -922,7 +1183,7 @@ class Catalog:
|
|
|
922
1183
|
# It's a new version of a table that has a physical store, so we need to create a TableVersion instance.
|
|
923
1184
|
TableVersion.create_replica(md)
|
|
924
1185
|
|
|
925
|
-
@
|
|
1186
|
+
@retry_loop(for_write=False)
|
|
926
1187
|
def get_table(self, path: Path) -> Table:
|
|
927
1188
|
obj = Catalog.get()._get_schema_object(path, expected=Table, raise_if_not_exists=True)
|
|
928
1189
|
assert isinstance(obj, Table)
|
|
@@ -931,7 +1192,7 @@ class Catalog:
|
|
|
931
1192
|
obj._tbl_version_path.clear_cached_md()
|
|
932
1193
|
return obj
|
|
933
1194
|
|
|
934
|
-
@
|
|
1195
|
+
@retry_loop(for_write=True)
|
|
935
1196
|
def drop_table(self, path: Path, if_not_exists: IfNotExistsParam, force: bool) -> None:
|
|
936
1197
|
tbl = self._get_schema_object(
|
|
937
1198
|
path,
|
|
@@ -941,7 +1202,7 @@ class Catalog:
|
|
|
941
1202
|
lock_obj=False,
|
|
942
1203
|
)
|
|
943
1204
|
if tbl is None:
|
|
944
|
-
_logger.info(f'Skipped table {
|
|
1205
|
+
_logger.info(f'Skipped table {path!r} (does not exist).')
|
|
945
1206
|
return
|
|
946
1207
|
assert isinstance(tbl, Table)
|
|
947
1208
|
|
|
@@ -949,7 +1210,7 @@ class Catalog:
|
|
|
949
1210
|
# this is a mutable view of a mutable base;
|
|
950
1211
|
# lock the base before the view, in order to avoid deadlocks with concurrent inserts/updates
|
|
951
1212
|
base_id = tbl._tbl_version_path.base.tbl_id
|
|
952
|
-
self.
|
|
1213
|
+
self._acquire_tbl_lock(tbl_id=base_id, for_write=True, lock_mutable_tree=False)
|
|
953
1214
|
|
|
954
1215
|
self._drop_tbl(tbl, force=force, is_replace=False)
|
|
955
1216
|
|
|
@@ -964,7 +1225,7 @@ class Catalog:
|
|
|
964
1225
|
in the same directory with the same name (which could lead to duplicate names if we get aborted)
|
|
965
1226
|
"""
|
|
966
1227
|
self._acquire_dir_xlock(dir_id=tbl._dir_id)
|
|
967
|
-
self.
|
|
1228
|
+
self._acquire_tbl_lock(tbl_id=tbl._id, for_write=True, lock_mutable_tree=False)
|
|
968
1229
|
|
|
969
1230
|
view_ids = self.get_view_ids(tbl._id, for_update=True)
|
|
970
1231
|
if len(view_ids) > 0:
|
|
@@ -988,8 +1249,9 @@ class Catalog:
|
|
|
988
1249
|
# if this is a mutable view of a mutable base, advance the base's view_sn
|
|
989
1250
|
if isinstance(tbl, View) and tbl._tbl_version_path.is_mutable() and tbl._tbl_version_path.base.is_mutable():
|
|
990
1251
|
base_id = tbl._tbl_version_path.base.tbl_id
|
|
991
|
-
base_tv = self.get_tbl_version(base_id, None)
|
|
1252
|
+
base_tv = self.get_tbl_version(base_id, None, validate_initialized=True)
|
|
992
1253
|
base_tv.tbl_md.view_sn += 1
|
|
1254
|
+
self._modified_tvs.add(base_tv.handle)
|
|
993
1255
|
result = Env.get().conn.execute(
|
|
994
1256
|
sql.update(schema.Table.__table__)
|
|
995
1257
|
.values({schema.Table.md: dataclasses.asdict(base_tv.tbl_md)})
|
|
@@ -997,23 +1259,26 @@ class Catalog:
|
|
|
997
1259
|
)
|
|
998
1260
|
assert result.rowcount == 1, result.rowcount
|
|
999
1261
|
|
|
1262
|
+
if tbl._tbl_version is not None:
|
|
1263
|
+
# invalidate the TableVersion instance when we're done so that existing references to it can find out it
|
|
1264
|
+
# has been dropped
|
|
1265
|
+
self._modified_tvs.add(tbl._tbl_version)
|
|
1000
1266
|
tv = tbl._tbl_version.get() if tbl._tbl_version is not None else None
|
|
1001
|
-
if tv is not None:
|
|
1267
|
+
# if tv is not None:
|
|
1268
|
+
# tv = tbl._tbl_version.get()
|
|
1269
|
+
# # invalidate the TableVersion instance so that existing references to it can find out it has been dropped
|
|
1270
|
+
# tv.is_validated = False
|
|
1271
|
+
if tbl._tbl_version is not None:
|
|
1272
|
+
# drop the store table before deleting the Table record
|
|
1002
1273
|
tv = tbl._tbl_version.get()
|
|
1003
|
-
|
|
1004
|
-
tv.is_validated = False
|
|
1274
|
+
tv.drop()
|
|
1005
1275
|
|
|
1006
1276
|
self.delete_tbl_md(tbl._id)
|
|
1007
1277
|
assert tbl._id in self._tbls
|
|
1008
1278
|
del self._tbls[tbl._id]
|
|
1009
1279
|
_logger.info(f'Dropped table `{tbl._path()}`.')
|
|
1010
1280
|
|
|
1011
|
-
|
|
1012
|
-
tv.drop()
|
|
1013
|
-
assert (tv.id, tv.effective_version) in self._tbl_versions
|
|
1014
|
-
del self._tbl_versions[tv.id, tv.effective_version]
|
|
1015
|
-
|
|
1016
|
-
@_retry_loop(for_write=True)
|
|
1281
|
+
@retry_loop(for_write=True)
|
|
1017
1282
|
def create_dir(self, path: Path, if_exists: IfExistsParam, parents: bool) -> Dir:
|
|
1018
1283
|
return self._create_dir(path, if_exists, parents)
|
|
1019
1284
|
|
|
@@ -1026,7 +1291,7 @@ class Catalog:
|
|
|
1026
1291
|
# parent = self._get_schema_object(path.parent)
|
|
1027
1292
|
# assert parent is not None
|
|
1028
1293
|
# dir = Dir._create(parent._id, path.name)
|
|
1029
|
-
# Env.get().console_logger.info(f'Created directory {
|
|
1294
|
+
# Env.get().console_logger.info(f'Created directory {path!r}.')
|
|
1030
1295
|
# return dir
|
|
1031
1296
|
|
|
1032
1297
|
if parents:
|
|
@@ -1045,10 +1310,10 @@ class Catalog:
|
|
|
1045
1310
|
return existing
|
|
1046
1311
|
assert parent is not None
|
|
1047
1312
|
dir = Dir._create(parent._id, path.name)
|
|
1048
|
-
Env.get().console_logger.info(f'Created directory {
|
|
1313
|
+
Env.get().console_logger.info(f'Created directory {path!r}.')
|
|
1049
1314
|
return dir
|
|
1050
1315
|
|
|
1051
|
-
@
|
|
1316
|
+
@retry_loop(for_write=True)
|
|
1052
1317
|
def drop_dir(self, path: Path, if_not_exists: IfNotExistsParam, force: bool) -> None:
|
|
1053
1318
|
_, _, schema_obj = self._prepare_dir_op(
|
|
1054
1319
|
drop_dir_path=path.parent,
|
|
@@ -1057,7 +1322,7 @@ class Catalog:
|
|
|
1057
1322
|
raise_if_not_exists=if_not_exists == IfNotExistsParam.ERROR and not force,
|
|
1058
1323
|
)
|
|
1059
1324
|
if schema_obj is None:
|
|
1060
|
-
_logger.info(f'Directory {
|
|
1325
|
+
_logger.info(f'Directory {path!r} does not exist; skipped drop_dir().')
|
|
1061
1326
|
return
|
|
1062
1327
|
self._drop_dir(schema_obj._id, path, force=force)
|
|
1063
1328
|
|
|
@@ -1070,7 +1335,7 @@ class Catalog:
|
|
|
1070
1335
|
q = sql.select(sql.func.count()).select_from(schema.Table).where(schema.Table.dir_id == dir_id)
|
|
1071
1336
|
num_tbls = conn.execute(q).scalar()
|
|
1072
1337
|
if num_subdirs + num_tbls > 0:
|
|
1073
|
-
raise excs.Error(f'Directory {
|
|
1338
|
+
raise excs.Error(f'Directory {dir_path!r} is not empty.')
|
|
1074
1339
|
|
|
1075
1340
|
# drop existing subdirs
|
|
1076
1341
|
self._acquire_dir_xlock(dir_id=dir_id)
|
|
@@ -1088,7 +1353,7 @@ class Catalog:
|
|
|
1088
1353
|
|
|
1089
1354
|
# self.drop_dir(dir_id)
|
|
1090
1355
|
conn.execute(sql.delete(schema.Dir).where(schema.Dir.id == dir_id))
|
|
1091
|
-
_logger.info(f'Removed directory {
|
|
1356
|
+
_logger.info(f'Removed directory {dir_path!r}.')
|
|
1092
1357
|
|
|
1093
1358
|
def get_view_ids(self, tbl_id: UUID, for_update: bool = False) -> list[UUID]:
|
|
1094
1359
|
"""Return the ids of views that directly reference the given table"""
|
|
@@ -1104,13 +1369,25 @@ class Catalog:
|
|
|
1104
1369
|
result = [r[0] for r in conn.execute(q).all()]
|
|
1105
1370
|
return result
|
|
1106
1371
|
|
|
1107
|
-
def get_tbl_version(
|
|
1372
|
+
def get_tbl_version(
|
|
1373
|
+
self,
|
|
1374
|
+
tbl_id: UUID,
|
|
1375
|
+
effective_version: Optional[int],
|
|
1376
|
+
check_pending_ops: Optional[bool] = None,
|
|
1377
|
+
validate_initialized: bool = False,
|
|
1378
|
+
) -> Optional[TableVersion]:
|
|
1379
|
+
"""
|
|
1380
|
+
Returns the TableVersion instance for the given table and version and updates the cache.
|
|
1381
|
+
|
|
1382
|
+
If present in the cache and the instance isn't validated, validates version and view_sn against the stored
|
|
1383
|
+
metadata.
|
|
1384
|
+
"""
|
|
1108
1385
|
# we need a transaction here, if we're not already in one; if this starts a new transaction,
|
|
1109
1386
|
# the returned TableVersion instance will not be validated
|
|
1110
1387
|
with self.begin_xact(for_write=False) as conn:
|
|
1111
1388
|
tv = self._tbl_versions.get((tbl_id, effective_version))
|
|
1112
1389
|
if tv is None:
|
|
1113
|
-
tv = self._load_tbl_version(tbl_id, effective_version)
|
|
1390
|
+
tv = self._load_tbl_version(tbl_id, effective_version, check_pending_ops=check_pending_ops)
|
|
1114
1391
|
elif not tv.is_validated:
|
|
1115
1392
|
# only live instances are invalidated
|
|
1116
1393
|
assert effective_version is None
|
|
@@ -1131,12 +1408,16 @@ class Catalog:
|
|
|
1131
1408
|
f'(cached/current version: {tv.version}/{current_version}, '
|
|
1132
1409
|
f'cached/current view_sn: {tv.tbl_md.view_sn}/{view_sn})'
|
|
1133
1410
|
)
|
|
1134
|
-
tv = self._load_tbl_version(tbl_id, None)
|
|
1411
|
+
tv = self._load_tbl_version(tbl_id, None, check_pending_ops=check_pending_ops)
|
|
1135
1412
|
else:
|
|
1136
1413
|
# the cached metadata is valid
|
|
1137
1414
|
tv.is_validated = True
|
|
1138
1415
|
|
|
1139
|
-
assert tv.is_validated
|
|
1416
|
+
assert tv.is_validated, f'{tbl_id}:{effective_version} not validated\n{tv.__dict__}\n{self._debug_str()}'
|
|
1417
|
+
if validate_initialized:
|
|
1418
|
+
assert tv.is_initialized, (
|
|
1419
|
+
f'{tbl_id}:{effective_version} not initialized\n{tv.__dict__}\n{self._debug_str()}'
|
|
1420
|
+
)
|
|
1140
1421
|
return tv
|
|
1141
1422
|
|
|
1142
1423
|
def remove_tbl_version(self, tbl_version: TableVersion) -> None:
|
|
@@ -1188,6 +1469,13 @@ class Catalog:
|
|
|
1188
1469
|
from .view import View
|
|
1189
1470
|
|
|
1190
1471
|
conn = Env.get().conn
|
|
1472
|
+
|
|
1473
|
+
# check for pending ops
|
|
1474
|
+
q = sql.select(sql.func.count()).where(schema.PendingTableOp.tbl_id == tbl_id)
|
|
1475
|
+
has_pending_ops = conn.execute(q).scalar() > 0
|
|
1476
|
+
if has_pending_ops:
|
|
1477
|
+
raise PendingTableOpsError(tbl_id)
|
|
1478
|
+
|
|
1191
1479
|
q = (
|
|
1192
1480
|
sql.select(schema.Table, schema.TableSchemaVersion)
|
|
1193
1481
|
.join(schema.TableSchemaVersion)
|
|
@@ -1204,11 +1492,11 @@ class Catalog:
|
|
|
1204
1492
|
row = conn.execute(q).one_or_none()
|
|
1205
1493
|
if row is None:
|
|
1206
1494
|
return None
|
|
1207
|
-
tbl_record,
|
|
1495
|
+
tbl_record, _ = _unpack_row(row, [schema.Table, schema.TableSchemaVersion])
|
|
1208
1496
|
|
|
1209
1497
|
tbl_md = schema.md_from_dict(schema.TableMd, tbl_record.md)
|
|
1210
1498
|
view_md = tbl_md.view_md
|
|
1211
|
-
if view_md is None:
|
|
1499
|
+
if view_md is None and not tbl_md.is_replica:
|
|
1212
1500
|
# this is a base table
|
|
1213
1501
|
if (tbl_id, None) not in self._tbl_versions:
|
|
1214
1502
|
_ = self._load_tbl_version(tbl_id, None)
|
|
@@ -1218,15 +1506,16 @@ class Catalog:
|
|
|
1218
1506
|
|
|
1219
1507
|
# this is a view; determine the sequence of TableVersions to load
|
|
1220
1508
|
tbl_version_path: list[tuple[UUID, Optional[int]]] = []
|
|
1221
|
-
|
|
1222
|
-
pure_snapshot = view_md.is_snapshot and view_md.predicate is None and len(schema_version_md.columns) == 0
|
|
1223
|
-
if pure_snapshot:
|
|
1509
|
+
if tbl_md.is_pure_snapshot:
|
|
1224
1510
|
# this is a pure snapshot, without a physical table backing it; we only need the bases
|
|
1225
1511
|
pass
|
|
1226
1512
|
else:
|
|
1227
|
-
effective_version =
|
|
1513
|
+
effective_version = (
|
|
1514
|
+
0 if view_md is not None and view_md.is_snapshot else None
|
|
1515
|
+
) # snapshots only have version 0
|
|
1228
1516
|
tbl_version_path.append((tbl_id, effective_version))
|
|
1229
|
-
|
|
1517
|
+
if view_md is not None:
|
|
1518
|
+
tbl_version_path.extend((UUID(tbl_id), version) for tbl_id, version in view_md.base_versions)
|
|
1230
1519
|
|
|
1231
1520
|
# load TableVersions, starting at the root
|
|
1232
1521
|
base_path: Optional[TableVersionPath] = None
|
|
@@ -1236,11 +1525,11 @@ class Catalog:
|
|
|
1236
1525
|
_ = self._load_tbl_version(id, effective_version)
|
|
1237
1526
|
view_path = TableVersionPath(TableVersionHandle(id, effective_version), base=base_path)
|
|
1238
1527
|
base_path = view_path
|
|
1239
|
-
view = View(tbl_id, tbl_record.dir_id, tbl_md.name, view_path, snapshot_only=
|
|
1528
|
+
view = View(tbl_id, tbl_record.dir_id, tbl_md.name, view_path, snapshot_only=tbl_md.is_pure_snapshot)
|
|
1240
1529
|
self._tbls[tbl_id] = view
|
|
1241
1530
|
return view
|
|
1242
1531
|
|
|
1243
|
-
@
|
|
1532
|
+
@retry_loop(for_write=False)
|
|
1244
1533
|
def collect_tbl_history(self, tbl_id: UUID, n: Optional[int]) -> list[schema.FullTableMd]:
|
|
1245
1534
|
"""
|
|
1246
1535
|
Returns the history of up to n versions of the table with the given UUID.
|
|
@@ -1350,6 +1639,7 @@ class Catalog:
|
|
|
1350
1639
|
tbl_md: Optional[schema.TableMd],
|
|
1351
1640
|
version_md: Optional[schema.TableVersionMd],
|
|
1352
1641
|
schema_version_md: Optional[schema.TableSchemaVersionMd],
|
|
1642
|
+
pending_ops: Optional[list[TableOp]] = None,
|
|
1353
1643
|
) -> None:
|
|
1354
1644
|
"""
|
|
1355
1645
|
Stores metadata to the DB.
|
|
@@ -1364,6 +1654,9 @@ class Catalog:
|
|
|
1364
1654
|
If inserting `version_md` or `schema_version_md` would be a primary key violation, an exception will be raised.
|
|
1365
1655
|
"""
|
|
1366
1656
|
assert self._in_write_xact
|
|
1657
|
+
assert version_md is None or version_md.created_at > 0.0
|
|
1658
|
+
assert pending_ops is None or len(pending_ops) > 0
|
|
1659
|
+
assert pending_ops is None or tbl_md is not None # if we write pending ops, we must also write new tbl_md
|
|
1367
1660
|
session = Env.get().session
|
|
1368
1661
|
|
|
1369
1662
|
# Construct and insert or update table record if requested.
|
|
@@ -1374,6 +1667,9 @@ class Catalog:
|
|
|
1374
1667
|
assert tbl_md.current_schema_version == version_md.schema_version
|
|
1375
1668
|
if schema_version_md is not None:
|
|
1376
1669
|
assert tbl_md.current_schema_version == schema_version_md.schema_version
|
|
1670
|
+
if pending_ops is not None:
|
|
1671
|
+
tbl_md.has_pending_ops = True
|
|
1672
|
+
|
|
1377
1673
|
if dir_id is not None:
|
|
1378
1674
|
# We are inserting a record while creating a new table.
|
|
1379
1675
|
tbl_record = schema.Table(id=tbl_id, dir_id=dir_id, md=dataclasses.asdict(tbl_md))
|
|
@@ -1404,25 +1700,32 @@ class Catalog:
|
|
|
1404
1700
|
tbl_id=tbl_id, schema_version=schema_version_md.schema_version, md=dataclasses.asdict(schema_version_md)
|
|
1405
1701
|
)
|
|
1406
1702
|
session.add(schema_version_record)
|
|
1407
|
-
session.flush() # Inform SQLAlchemy that we want to write these changes to the DB.
|
|
1408
1703
|
|
|
1409
|
-
|
|
1410
|
-
|
|
1411
|
-
Update the TableVersion.md field in the DB. Typically used to update the cascade row count status.
|
|
1704
|
+
# make sure we don't have any pending ops
|
|
1705
|
+
assert session.query(schema.PendingTableOp).filter(schema.PendingTableOp.tbl_id == tbl_id).count() == 0
|
|
1412
1706
|
|
|
1413
|
-
|
|
1414
|
-
|
|
1415
|
-
|
|
1707
|
+
if pending_ops is not None:
|
|
1708
|
+
for op in pending_ops:
|
|
1709
|
+
op_record = schema.PendingTableOp(tbl_id=tbl_id, op_sn=op.op_sn, op=dataclasses.asdict(op))
|
|
1710
|
+
session.add(op_record)
|
|
1711
|
+
|
|
1712
|
+
session.flush() # Inform SQLAlchemy that we want to write these changes to the DB.
|
|
1713
|
+
|
|
1714
|
+
def store_update_status(self, tbl_id: UUID, version: int, status: UpdateStatus) -> None:
|
|
1715
|
+
"""Update the TableVersion.md.update_status field"""
|
|
1416
1716
|
assert self._in_write_xact
|
|
1417
|
-
|
|
1717
|
+
conn = Env.get().conn
|
|
1418
1718
|
|
|
1419
|
-
|
|
1420
|
-
sql.update(schema.TableVersion
|
|
1421
|
-
.
|
|
1422
|
-
.
|
|
1719
|
+
stmt = (
|
|
1720
|
+
sql.update(schema.TableVersion)
|
|
1721
|
+
.where(schema.TableVersion.tbl_id == tbl_id, schema.TableVersion.version == version)
|
|
1722
|
+
.values(
|
|
1723
|
+
md=schema.TableVersion.md.op('||')({'additional_md': {'update_status': dataclasses.asdict(status)}})
|
|
1724
|
+
)
|
|
1423
1725
|
)
|
|
1424
1726
|
|
|
1425
|
-
|
|
1727
|
+
res = conn.execute(stmt)
|
|
1728
|
+
assert res.rowcount == 1, res.rowcount
|
|
1426
1729
|
|
|
1427
1730
|
def delete_tbl_md(self, tbl_id: UUID) -> None:
|
|
1428
1731
|
"""
|
|
@@ -1431,6 +1734,7 @@ class Catalog:
|
|
|
1431
1734
|
conn = Env.get().conn
|
|
1432
1735
|
conn.execute(sql.delete(schema.TableSchemaVersion.__table__).where(schema.TableSchemaVersion.tbl_id == tbl_id))
|
|
1433
1736
|
conn.execute(sql.delete(schema.TableVersion.__table__).where(schema.TableVersion.tbl_id == tbl_id))
|
|
1737
|
+
conn.execute(sql.delete(schema.PendingTableOp.__table__).where(schema.PendingTableOp.tbl_id == tbl_id))
|
|
1434
1738
|
conn.execute(sql.delete(schema.Table.__table__).where(schema.Table.id == tbl_id))
|
|
1435
1739
|
|
|
1436
1740
|
def load_replica_md(self, tbl: Table) -> list[schema.FullTableMd]:
|
|
@@ -1461,13 +1765,32 @@ class Catalog:
|
|
|
1461
1765
|
|
|
1462
1766
|
return md
|
|
1463
1767
|
|
|
1464
|
-
def _load_tbl_version(
|
|
1768
|
+
def _load_tbl_version(
|
|
1769
|
+
self, tbl_id: UUID, effective_version: Optional[int], check_pending_ops: bool = True
|
|
1770
|
+
) -> Optional[TableVersion]:
|
|
1465
1771
|
"""Creates TableVersion instance from stored metadata and registers it in _tbl_versions."""
|
|
1466
|
-
tbl_md,
|
|
1772
|
+
tbl_md, version_md, schema_version_md = self.load_tbl_md(tbl_id, effective_version)
|
|
1467
1773
|
view_md = tbl_md.view_md
|
|
1468
1774
|
|
|
1469
1775
|
conn = Env.get().conn
|
|
1470
1776
|
|
|
1777
|
+
if check_pending_ops:
|
|
1778
|
+
pending_ops_q = (
|
|
1779
|
+
sql.select(sql.func.count())
|
|
1780
|
+
.select_from(schema.Table)
|
|
1781
|
+
.join(schema.PendingTableOp)
|
|
1782
|
+
.where(schema.PendingTableOp.tbl_id == tbl_id)
|
|
1783
|
+
.where(schema.Table.id == tbl_id)
|
|
1784
|
+
)
|
|
1785
|
+
if effective_version is not None:
|
|
1786
|
+
# we only care about pending ops if the requested version is the current version
|
|
1787
|
+
pending_ops_q = pending_ops_q.where(
|
|
1788
|
+
sql.text(f"({schema.Table.__table__}.md->>'current_version')::int = {effective_version}")
|
|
1789
|
+
)
|
|
1790
|
+
has_pending_ops = conn.execute(pending_ops_q).scalar() > 0
|
|
1791
|
+
if has_pending_ops:
|
|
1792
|
+
raise PendingTableOpsError(tbl_id)
|
|
1793
|
+
|
|
1471
1794
|
# load mutable view ids for mutable TableVersions
|
|
1472
1795
|
mutable_view_ids: list[UUID] = []
|
|
1473
1796
|
# If this is a replica, effective_version should not be None. We see this today, because
|
|
@@ -1482,17 +1805,24 @@ class Catalog:
|
|
|
1482
1805
|
)
|
|
1483
1806
|
)
|
|
1484
1807
|
mutable_view_ids = [r[0] for r in conn.execute(q).all()]
|
|
1808
|
+
|
|
1485
1809
|
mutable_views = [TableVersionHandle(id, None) for id in mutable_view_ids]
|
|
1486
1810
|
|
|
1487
1811
|
tbl_version: TableVersion
|
|
1488
1812
|
if view_md is None:
|
|
1489
1813
|
# this is a base table
|
|
1490
1814
|
tbl_version = TableVersion(
|
|
1491
|
-
tbl_id, tbl_md, effective_version, schema_version_md, mutable_views=mutable_views
|
|
1815
|
+
tbl_id, tbl_md, version_md, effective_version, schema_version_md, mutable_views=mutable_views
|
|
1492
1816
|
)
|
|
1493
1817
|
else:
|
|
1494
1818
|
assert len(view_md.base_versions) > 0 # a view needs to have a base
|
|
1495
|
-
|
|
1819
|
+
# TODO: add TableVersionMd.is_pure_snapshot() and use that
|
|
1820
|
+
pure_snapshot = (
|
|
1821
|
+
view_md.is_snapshot
|
|
1822
|
+
and view_md.predicate is None
|
|
1823
|
+
and view_md.sample_clause is None
|
|
1824
|
+
and len(schema_version_md.columns) == 0
|
|
1825
|
+
)
|
|
1496
1826
|
assert not pure_snapshot # a pure snapshot doesn't have a physical table backing it, no point in loading it
|
|
1497
1827
|
|
|
1498
1828
|
base: TableVersionHandle
|
|
@@ -1506,6 +1836,7 @@ class Catalog:
|
|
|
1506
1836
|
tbl_version = TableVersion(
|
|
1507
1837
|
tbl_id,
|
|
1508
1838
|
tbl_md,
|
|
1839
|
+
version_md,
|
|
1509
1840
|
effective_version,
|
|
1510
1841
|
schema_version_md,
|
|
1511
1842
|
base_path=base_path,
|
|
@@ -1513,22 +1844,14 @@ class Catalog:
|
|
|
1513
1844
|
mutable_views=mutable_views,
|
|
1514
1845
|
)
|
|
1515
1846
|
|
|
1847
|
+
# register the instance before init()
|
|
1516
1848
|
self._tbl_versions[tbl_id, effective_version] = tbl_version
|
|
1849
|
+
# register this instance as modified, so that it gets purged if the transaction fails, it may not be
|
|
1850
|
+
# fully initialized
|
|
1851
|
+
self._modified_tvs.add(tbl_version.handle)
|
|
1517
1852
|
tbl_version.init()
|
|
1518
1853
|
return tbl_version
|
|
1519
1854
|
|
|
1520
|
-
def record_column_dependencies(self, tbl_version: TableVersion) -> None:
|
|
1521
|
-
"""Update self._column_dependencies. Only valid for non-snapshot versions."""
|
|
1522
|
-
from pixeltable.exprs import Expr
|
|
1523
|
-
|
|
1524
|
-
assert not tbl_version.is_snapshot
|
|
1525
|
-
dependencies: dict[QColumnId, set[QColumnId]] = {}
|
|
1526
|
-
for col in tbl_version.cols_by_id.values():
|
|
1527
|
-
if col.value_expr_dict is None:
|
|
1528
|
-
continue
|
|
1529
|
-
dependencies[QColumnId(tbl_version.id, col.id)] = Expr.get_refd_column_ids(col.value_expr_dict)
|
|
1530
|
-
self._column_dependencies[tbl_version.id] = dependencies
|
|
1531
|
-
|
|
1532
1855
|
def _init_store(self) -> None:
|
|
1533
1856
|
"""One-time initialization of the stored catalog. Idempotent."""
|
|
1534
1857
|
self.create_user(None)
|
|
@@ -1557,14 +1880,20 @@ class Catalog:
|
|
|
1557
1880
|
obj, _, _ = self._prepare_dir_op(add_dir_path=path.parent, add_name=path.name)
|
|
1558
1881
|
|
|
1559
1882
|
if if_exists == IfExistsParam.ERROR and obj is not None:
|
|
1560
|
-
raise excs.Error(f'Path {
|
|
1883
|
+
raise excs.Error(f'Path {path!r} is an existing {obj._display_name()}')
|
|
1561
1884
|
else:
|
|
1562
1885
|
is_snapshot = isinstance(obj, View) and obj._tbl_version_path.is_snapshot()
|
|
1563
1886
|
if obj is not None and (not isinstance(obj, expected_obj_type) or (expected_snapshot and not is_snapshot)):
|
|
1564
|
-
|
|
1887
|
+
if expected_obj_type is Dir:
|
|
1888
|
+
obj_type_str = 'directory'
|
|
1889
|
+
elif expected_obj_type is InsertableTable:
|
|
1890
|
+
obj_type_str = 'table'
|
|
1891
|
+
elif expected_obj_type is View:
|
|
1892
|
+
obj_type_str = 'snapshot' if expected_snapshot else 'view'
|
|
1893
|
+
else:
|
|
1894
|
+
raise AssertionError()
|
|
1565
1895
|
raise excs.Error(
|
|
1566
|
-
f'Path {
|
|
1567
|
-
f'Cannot {if_exists.name.lower()} it.'
|
|
1896
|
+
f'Path {path!r} already exists but is not a {obj_type_str}. Cannot {if_exists.name.lower()} it.'
|
|
1568
1897
|
)
|
|
1569
1898
|
|
|
1570
1899
|
if obj is None:
|
|
@@ -1577,7 +1906,7 @@ class Catalog:
|
|
|
1577
1906
|
dir_contents = self._get_dir_contents(obj._id)
|
|
1578
1907
|
if len(dir_contents) > 0 and if_exists == IfExistsParam.REPLACE:
|
|
1579
1908
|
raise excs.Error(
|
|
1580
|
-
f'Directory {
|
|
1909
|
+
f'Directory {path!r} already exists and is not empty. '
|
|
1581
1910
|
'Use `if_exists="replace_force"` to replace it.'
|
|
1582
1911
|
)
|
|
1583
1912
|
self._drop_dir(obj._id, path, force=True)
|