pixeltable 0.4.2__py3-none-any.whl → 0.4.4__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of pixeltable might be problematic. Click here for more details.
- pixeltable/__init__.py +1 -0
- pixeltable/__version__.py +2 -2
- pixeltable/catalog/__init__.py +3 -11
- pixeltable/catalog/catalog.py +575 -220
- pixeltable/catalog/column.py +22 -23
- pixeltable/catalog/dir.py +1 -2
- pixeltable/catalog/globals.py +2 -148
- pixeltable/catalog/insertable_table.py +15 -13
- pixeltable/catalog/path.py +6 -0
- pixeltable/catalog/schema_object.py +9 -4
- pixeltable/catalog/table.py +96 -85
- pixeltable/catalog/table_version.py +257 -174
- pixeltable/catalog/table_version_path.py +1 -1
- pixeltable/catalog/tbl_ops.py +44 -0
- pixeltable/catalog/update_status.py +179 -0
- pixeltable/catalog/view.py +50 -56
- pixeltable/config.py +76 -12
- pixeltable/dataframe.py +19 -6
- pixeltable/env.py +50 -4
- pixeltable/exec/data_row_batch.py +3 -1
- pixeltable/exec/exec_node.py +7 -24
- pixeltable/exec/expr_eval/schedulers.py +134 -7
- pixeltable/exec/in_memory_data_node.py +6 -7
- pixeltable/exprs/column_property_ref.py +21 -9
- pixeltable/exprs/column_ref.py +7 -2
- pixeltable/exprs/function_call.py +2 -2
- pixeltable/exprs/row_builder.py +10 -9
- pixeltable/exprs/rowid_ref.py +0 -4
- pixeltable/func/function.py +3 -3
- pixeltable/functions/audio.py +36 -9
- pixeltable/functions/gemini.py +4 -4
- pixeltable/functions/openai.py +1 -2
- pixeltable/functions/video.py +59 -16
- pixeltable/globals.py +109 -24
- pixeltable/io/__init__.py +1 -1
- pixeltable/io/datarows.py +2 -1
- pixeltable/io/external_store.py +3 -55
- pixeltable/io/globals.py +4 -4
- pixeltable/io/hf_datasets.py +10 -2
- pixeltable/io/label_studio.py +16 -16
- pixeltable/io/pandas.py +1 -0
- pixeltable/io/table_data_conduit.py +12 -13
- pixeltable/iterators/audio.py +17 -8
- pixeltable/iterators/image.py +5 -2
- pixeltable/metadata/__init__.py +1 -1
- pixeltable/metadata/converters/convert_39.py +125 -0
- pixeltable/metadata/converters/util.py +3 -0
- pixeltable/metadata/notes.py +1 -0
- pixeltable/metadata/schema.py +50 -1
- pixeltable/plan.py +4 -0
- pixeltable/share/packager.py +20 -38
- pixeltable/store.py +40 -51
- pixeltable/type_system.py +2 -2
- pixeltable/utils/coroutine.py +6 -23
- pixeltable/utils/media_store.py +50 -0
- {pixeltable-0.4.2.dist-info → pixeltable-0.4.4.dist-info}/METADATA +1 -1
- {pixeltable-0.4.2.dist-info → pixeltable-0.4.4.dist-info}/RECORD +60 -57
- {pixeltable-0.4.2.dist-info → pixeltable-0.4.4.dist-info}/LICENSE +0 -0
- {pixeltable-0.4.2.dist-info → pixeltable-0.4.4.dist-info}/WHEEL +0 -0
- {pixeltable-0.4.2.dist-info → pixeltable-0.4.4.dist-info}/entry_points.txt +0 -0
pixeltable/catalog/catalog.py
CHANGED
|
@@ -14,6 +14,8 @@ import psycopg
|
|
|
14
14
|
import sqlalchemy as sql
|
|
15
15
|
|
|
16
16
|
from pixeltable import exceptions as excs
|
|
17
|
+
|
|
18
|
+
# from pixeltable import exceptions as excs, UpdateStatus
|
|
17
19
|
from pixeltable.env import Env
|
|
18
20
|
from pixeltable.iterators import ComponentIterator
|
|
19
21
|
from pixeltable.metadata import schema
|
|
@@ -28,6 +30,8 @@ from .table import Table
|
|
|
28
30
|
from .table_version import TableVersion
|
|
29
31
|
from .table_version_handle import TableVersionHandle
|
|
30
32
|
from .table_version_path import TableVersionPath
|
|
33
|
+
from .tbl_ops import TableOp
|
|
34
|
+
from .update_status import UpdateStatus
|
|
31
35
|
from .view import View
|
|
32
36
|
|
|
33
37
|
if TYPE_CHECKING:
|
|
@@ -70,18 +74,35 @@ _MAX_RETRIES = -1
|
|
|
70
74
|
T = TypeVar('T')
|
|
71
75
|
|
|
72
76
|
|
|
73
|
-
def
|
|
77
|
+
def retry_loop(
|
|
78
|
+
*, tbl: Optional[TableVersionPath] = None, for_write: bool, lock_mutable_tree: bool = False
|
|
79
|
+
) -> Callable[[Callable[..., T]], Callable[..., T]]:
|
|
74
80
|
def decorator(op: Callable[..., T]) -> Callable[..., T]:
|
|
75
81
|
@functools.wraps(op)
|
|
76
82
|
def loop(*args: Any, **kwargs: Any) -> T:
|
|
83
|
+
cat = Catalog.get()
|
|
84
|
+
# retry_loop() is reentrant
|
|
85
|
+
if cat._in_retry_loop:
|
|
86
|
+
return op(*args, **kwargs)
|
|
87
|
+
|
|
77
88
|
num_retries = 0
|
|
78
89
|
while True:
|
|
90
|
+
cat._in_retry_loop = True
|
|
79
91
|
try:
|
|
80
92
|
# in order for retry to work, we need to make sure that there aren't any prior db updates
|
|
81
93
|
# that are part of an ongoing transaction
|
|
82
94
|
assert not Env.get().in_xact
|
|
83
|
-
with Catalog.get().begin_xact(
|
|
95
|
+
with Catalog.get().begin_xact(
|
|
96
|
+
tbl=tbl,
|
|
97
|
+
for_write=for_write,
|
|
98
|
+
convert_db_excs=False,
|
|
99
|
+
lock_mutable_tree=lock_mutable_tree,
|
|
100
|
+
finalize_pending_ops=True,
|
|
101
|
+
):
|
|
84
102
|
return op(*args, **kwargs)
|
|
103
|
+
except PendingTableOpsError as e:
|
|
104
|
+
Env.get().console_logger.debug(f'retry_loop(): finalizing pending ops for {e.tbl_id}')
|
|
105
|
+
Catalog.get()._finalize_pending_ops(e.tbl_id)
|
|
85
106
|
except sql.exc.DBAPIError as e:
|
|
86
107
|
# TODO: what other exceptions should we be looking for?
|
|
87
108
|
if isinstance(e.orig, (psycopg.errors.SerializationFailure, psycopg.errors.LockNotAvailable)):
|
|
@@ -97,16 +118,31 @@ def _retry_loop(*, for_write: bool) -> Callable[[Callable[..., T]], Callable[...
|
|
|
97
118
|
# for informational/debugging purposes
|
|
98
119
|
_logger.debug(f'retry_loop(): passing along {e}')
|
|
99
120
|
raise
|
|
121
|
+
finally:
|
|
122
|
+
cat._in_retry_loop = False
|
|
100
123
|
|
|
101
124
|
return loop
|
|
102
125
|
|
|
103
126
|
return decorator
|
|
104
127
|
|
|
105
128
|
|
|
129
|
+
class PendingTableOpsError(Exception):
|
|
130
|
+
tbl_id: UUID
|
|
131
|
+
|
|
132
|
+
def __init__(self, tbl_id: UUID) -> None:
|
|
133
|
+
self.tbl_id = tbl_id
|
|
134
|
+
|
|
135
|
+
|
|
106
136
|
class Catalog:
|
|
107
137
|
"""The functional interface to getting access to catalog objects
|
|
108
138
|
|
|
109
|
-
All interface functions must be called in the context of a transaction, started with Catalog.begin_xact()
|
|
139
|
+
All interface functions must be called in the context of a transaction, started with Catalog.begin_xact() or
|
|
140
|
+
via retry_loop().
|
|
141
|
+
|
|
142
|
+
When calling functions that involve Table or TableVersion instances, the catalog needs to get a chance to finalize
|
|
143
|
+
pending ops against those tables. To that end,
|
|
144
|
+
- use begin_xact(tbl) or begin_xact(tbl_id) if only accessing a single table
|
|
145
|
+
- use retry_loop() when accessing multiple tables (eg, pxt.ls())
|
|
110
146
|
|
|
111
147
|
Caching and invalidation of metadata:
|
|
112
148
|
- Catalog caches TableVersion instances in order to avoid excessive metadata loading
|
|
@@ -132,6 +168,8 @@ class Catalog:
|
|
|
132
168
|
_tbls: dict[UUID, Table]
|
|
133
169
|
_in_write_xact: bool # True if we're in a write transaction
|
|
134
170
|
_x_locked_tbl_ids: set[UUID] # non-empty for write transactions
|
|
171
|
+
_modified_tvs: set[TableVersionHandle] # TableVersion instances modified in the current transaction
|
|
172
|
+
_in_retry_loop: bool
|
|
135
173
|
|
|
136
174
|
# cached column dependencies
|
|
137
175
|
# - key: table id, value: mapping from column id to its dependencies
|
|
@@ -164,6 +202,8 @@ class Catalog:
|
|
|
164
202
|
self._tbls = {} # don't use a defaultdict here, it doesn't cooperate with the debugger
|
|
165
203
|
self._in_write_xact = False
|
|
166
204
|
self._x_locked_tbl_ids = set()
|
|
205
|
+
self._modified_tvs = set()
|
|
206
|
+
self._in_retry_loop = False
|
|
167
207
|
self._column_dependencies = {}
|
|
168
208
|
self._column_dependents = None
|
|
169
209
|
self._init_store()
|
|
@@ -214,9 +254,11 @@ class Catalog:
|
|
|
214
254
|
self,
|
|
215
255
|
*,
|
|
216
256
|
tbl: Optional[TableVersionPath] = None,
|
|
257
|
+
tbl_id: Optional[UUID] = None,
|
|
217
258
|
for_write: bool = False,
|
|
218
259
|
lock_mutable_tree: bool = False,
|
|
219
260
|
convert_db_excs: bool = True,
|
|
261
|
+
finalize_pending_ops: bool = True,
|
|
220
262
|
) -> Iterator[sql.Connection]:
|
|
221
263
|
"""
|
|
222
264
|
Return a context manager that yields a connection to the database. Idempotent.
|
|
@@ -227,7 +269,7 @@ class Catalog:
|
|
|
227
269
|
If tbl != None, follows this locking protocol:
|
|
228
270
|
- validates/reloads the TableVersion instances of tbl's ancestors (in the hope that this reduces potential
|
|
229
271
|
SerializationErrors later on)
|
|
230
|
-
- if for_write == True, x-locks Table record (by updating Table.lock_dummy; see
|
|
272
|
+
- if for_write == True, x-locks Table record (by updating Table.lock_dummy; see _acquire_tbl_lock())
|
|
231
273
|
- if for_write == False, validates TableVersion instance
|
|
232
274
|
- if lock_mutable_tree == True, also x-locks all mutable views of the table
|
|
233
275
|
- this needs to be done in a retry loop, because Postgres can decide to abort the transaction
|
|
@@ -237,10 +279,14 @@ class Catalog:
|
|
|
237
279
|
|
|
238
280
|
If convert_db_excs == True, converts DBAPIErrors into excs.Errors.
|
|
239
281
|
"""
|
|
282
|
+
assert tbl is None or tbl_id is None # at most one can be specified
|
|
240
283
|
if Env.get().in_xact:
|
|
241
|
-
|
|
242
|
-
|
|
243
|
-
|
|
284
|
+
# make sure that we requested the required table lock at the beginning of the transaction
|
|
285
|
+
if for_write:
|
|
286
|
+
if tbl is not None:
|
|
287
|
+
assert tbl.tbl_id in self._x_locked_tbl_ids, f'{tbl.tbl_id} not in {self._x_locked_tbl_ids}'
|
|
288
|
+
elif tbl_id is not None:
|
|
289
|
+
assert tbl_id in self._x_locked_tbl_ids, f'{tbl_id} not in {self._x_locked_tbl_ids}'
|
|
244
290
|
yield Env.get().conn
|
|
245
291
|
return
|
|
246
292
|
|
|
@@ -252,33 +298,66 @@ class Catalog:
|
|
|
252
298
|
# )
|
|
253
299
|
# _logger.debug(f'begin_xact(): {tv_msg}')
|
|
254
300
|
num_retries = 0
|
|
301
|
+
pending_ops_tbl_id: Optional[UUID] = None
|
|
302
|
+
has_exc = False # True if we exited the 'with ...begin_xact()' block with an exception
|
|
255
303
|
while True:
|
|
304
|
+
if pending_ops_tbl_id is not None:
|
|
305
|
+
Env.get().console_logger.debug(f'begin_xact(): finalizing pending ops for {pending_ops_tbl_id}')
|
|
306
|
+
self._finalize_pending_ops(pending_ops_tbl_id)
|
|
307
|
+
pending_ops_tbl_id = None
|
|
308
|
+
|
|
256
309
|
try:
|
|
257
|
-
self._in_write_xact =
|
|
310
|
+
self._in_write_xact = for_write
|
|
258
311
|
self._x_locked_tbl_ids = set()
|
|
312
|
+
self._modified_tvs = set()
|
|
259
313
|
self._column_dependents = None
|
|
314
|
+
has_exc = False
|
|
260
315
|
|
|
261
|
-
with Env.get().begin_xact() as conn:
|
|
262
|
-
if tbl is not None:
|
|
316
|
+
with Env.get().begin_xact(for_write=for_write) as conn:
|
|
317
|
+
if tbl is not None or tbl_id is not None:
|
|
263
318
|
try:
|
|
264
|
-
|
|
265
|
-
|
|
266
|
-
|
|
267
|
-
|
|
268
|
-
|
|
269
|
-
|
|
270
|
-
|
|
271
|
-
|
|
272
|
-
|
|
273
|
-
|
|
319
|
+
target: Optional[TableVersionHandle] = None
|
|
320
|
+
if tbl is not None:
|
|
321
|
+
if self._acquire_path_locks(
|
|
322
|
+
tbl=tbl,
|
|
323
|
+
for_write=for_write,
|
|
324
|
+
lock_mutable_tree=lock_mutable_tree,
|
|
325
|
+
check_pending_ops=finalize_pending_ops,
|
|
326
|
+
):
|
|
327
|
+
target = tbl.tbl_version
|
|
328
|
+
else:
|
|
329
|
+
target = self._acquire_tbl_lock(
|
|
330
|
+
tbl_id=tbl_id,
|
|
331
|
+
for_write=for_write,
|
|
332
|
+
lock_mutable_tree=lock_mutable_tree,
|
|
333
|
+
raise_if_not_exists=True,
|
|
334
|
+
check_pending_ops=finalize_pending_ops,
|
|
335
|
+
)
|
|
336
|
+
|
|
337
|
+
if target is None:
|
|
338
|
+
# didn't get the write lock
|
|
339
|
+
for_write = False
|
|
340
|
+
elif for_write:
|
|
341
|
+
# we know at this point that target is mutable because we got the X-lock
|
|
342
|
+
if lock_mutable_tree and not target.is_snapshot:
|
|
343
|
+
self._x_locked_tbl_ids = self._get_mutable_tree(target.id)
|
|
274
344
|
self._compute_column_dependents(self._x_locked_tbl_ids)
|
|
275
345
|
else:
|
|
276
|
-
self._x_locked_tbl_ids = {
|
|
346
|
+
self._x_locked_tbl_ids = {target.id}
|
|
277
347
|
if _logger.isEnabledFor(logging.DEBUG):
|
|
278
348
|
# validate only when we don't see errors
|
|
279
349
|
self.validate()
|
|
280
350
|
|
|
351
|
+
except PendingTableOpsError as e:
|
|
352
|
+
has_exc = True
|
|
353
|
+
if finalize_pending_ops:
|
|
354
|
+
# we remember which table id to finalize
|
|
355
|
+
pending_ops_tbl_id = e.tbl_id
|
|
356
|
+
# raise to abort the transaction
|
|
357
|
+
raise
|
|
358
|
+
|
|
281
359
|
except sql.exc.DBAPIError as e:
|
|
360
|
+
has_exc = True
|
|
282
361
|
if isinstance(
|
|
283
362
|
e.orig, (psycopg.errors.SerializationFailure, psycopg.errors.LockNotAvailable)
|
|
284
363
|
) and (num_retries < _MAX_RETRIES or _MAX_RETRIES == -1):
|
|
@@ -289,11 +368,20 @@ class Catalog:
|
|
|
289
368
|
else:
|
|
290
369
|
raise
|
|
291
370
|
|
|
292
|
-
self._in_write_xact = for_write
|
|
293
371
|
yield conn
|
|
294
372
|
return
|
|
295
373
|
|
|
374
|
+
except PendingTableOpsError:
|
|
375
|
+
has_exc = True
|
|
376
|
+
if pending_ops_tbl_id is not None:
|
|
377
|
+
# the next iteration of the loop will deal with pending ops for this table id
|
|
378
|
+
continue
|
|
379
|
+
else:
|
|
380
|
+
# we got this exception after getting the initial table locks and therefore need to abort
|
|
381
|
+
raise
|
|
382
|
+
|
|
296
383
|
except sql.exc.DBAPIError as e:
|
|
384
|
+
has_exc = True
|
|
297
385
|
# we got some db error during the actual operation (not just while trying to get locks on the metadata
|
|
298
386
|
# records): we convert these into Errors, if asked to do so, and abort
|
|
299
387
|
# TODO: what other concurrency-related exceptions should we expect?
|
|
@@ -301,12 +389,19 @@ class Catalog:
|
|
|
301
389
|
# we always convert UndefinedTable exceptions (they can't be retried)
|
|
302
390
|
if isinstance(e.orig, psycopg.errors.UndefinedTable):
|
|
303
391
|
# the table got dropped in the middle of the table operation
|
|
304
|
-
|
|
392
|
+
tbl_name = tbl.tbl_name() if tbl is not None else str(tbl_id) if tbl_id is not None else '?'
|
|
393
|
+
_logger.debug(f'Exception: undefined table ({tbl_name}): Caught {type(e.orig)}: {e!r}')
|
|
305
394
|
assert tbl is not None
|
|
306
|
-
raise excs.Error(f'Table was dropped: {
|
|
395
|
+
raise excs.Error(f'Table was dropped: {tbl_name}') from None
|
|
307
396
|
elif isinstance(e.orig, psycopg.errors.SerializationFailure) and convert_db_excs:
|
|
308
397
|
# we still got a serialization error, despite getting x-locks at the beginning
|
|
309
|
-
msg
|
|
398
|
+
msg: str
|
|
399
|
+
if tbl is not None:
|
|
400
|
+
msg = f'{tbl.tbl_name()} ({tbl.tbl_id})'
|
|
401
|
+
elif tbl_id is not None:
|
|
402
|
+
msg = f'{tbl_id}'
|
|
403
|
+
else:
|
|
404
|
+
msg = ''
|
|
310
405
|
_logger.debug(f'Exception: serialization failure: {msg} ({e})')
|
|
311
406
|
raise excs.Error(
|
|
312
407
|
'That Pixeltable operation could not be completed because it conflicted with another '
|
|
@@ -316,6 +411,10 @@ class Catalog:
|
|
|
316
411
|
else:
|
|
317
412
|
raise
|
|
318
413
|
|
|
414
|
+
except:
|
|
415
|
+
has_exc = True
|
|
416
|
+
raise
|
|
417
|
+
|
|
319
418
|
finally:
|
|
320
419
|
self._in_write_xact = False
|
|
321
420
|
self._x_locked_tbl_ids = set()
|
|
@@ -327,12 +426,24 @@ class Catalog:
|
|
|
327
426
|
_logger.debug(f'invalidating table version {tv.id}:None (tv={id(tv):x})')
|
|
328
427
|
tv.is_validated = False
|
|
329
428
|
|
|
429
|
+
if has_exc:
|
|
430
|
+
# purge all modified TableVersion instances, we can't guarantee they are still consistent with the
|
|
431
|
+
# stored metadata
|
|
432
|
+
for handle in self._modified_tvs:
|
|
433
|
+
self._clear_tv_cache(handle.id, handle.effective_version)
|
|
434
|
+
self._modified_tvs = set()
|
|
435
|
+
|
|
330
436
|
@property
|
|
331
437
|
def in_write_xact(self) -> bool:
|
|
332
438
|
return self._in_write_xact
|
|
333
439
|
|
|
334
440
|
def _acquire_path_locks(
|
|
335
|
-
self,
|
|
441
|
+
self,
|
|
442
|
+
*,
|
|
443
|
+
tbl: TableVersionPath,
|
|
444
|
+
for_write: bool = False,
|
|
445
|
+
lock_mutable_tree: bool = False,
|
|
446
|
+
check_pending_ops: Optional[bool] = None,
|
|
336
447
|
) -> bool:
|
|
337
448
|
"""
|
|
338
449
|
Path locking protocol:
|
|
@@ -341,33 +452,49 @@ class Catalog:
|
|
|
341
452
|
- refresh cached TableVersion of tbl or get X-lock, depending on for_write
|
|
342
453
|
- if lock_mutable_tree, also X-lock all mutable views of tbl
|
|
343
454
|
|
|
344
|
-
Returns False if trying to lock a pure snapshot with for_write == True
|
|
345
455
|
Raises Error if tbl doesn't exist.
|
|
456
|
+
Return False if the lock couldn't be acquired (X-lock on a non-mutable table), True otherwise.
|
|
346
457
|
"""
|
|
347
|
-
|
|
348
|
-
|
|
349
|
-
|
|
458
|
+
path_handles = tbl.get_tbl_versions()
|
|
459
|
+
read_handles = path_handles[:0:-1] if for_write else path_handles[::-1]
|
|
460
|
+
for handle in read_handles:
|
|
461
|
+
# update cache
|
|
462
|
+
_ = self.get_tbl_version(handle.id, handle.effective_version, validate_initialized=True)
|
|
350
463
|
if not for_write:
|
|
351
464
|
return True # nothing left to lock
|
|
352
|
-
|
|
465
|
+
handle = self._acquire_tbl_lock(
|
|
466
|
+
tbl_id=tbl.tbl_id,
|
|
467
|
+
for_write=True,
|
|
468
|
+
lock_mutable_tree=lock_mutable_tree,
|
|
469
|
+
raise_if_not_exists=True,
|
|
470
|
+
check_pending_ops=check_pending_ops,
|
|
471
|
+
)
|
|
472
|
+
# update cache
|
|
473
|
+
_ = self.get_tbl_version(path_handles[0].id, path_handles[0].effective_version, validate_initialized=True)
|
|
474
|
+
return handle is not None
|
|
353
475
|
|
|
354
|
-
def
|
|
476
|
+
def _acquire_tbl_lock(
|
|
355
477
|
self,
|
|
356
478
|
*,
|
|
479
|
+
for_write: bool,
|
|
357
480
|
tbl_id: Optional[UUID] = None,
|
|
358
481
|
dir_id: Optional[UUID] = None,
|
|
359
482
|
tbl_name: Optional[str] = None,
|
|
360
483
|
lock_mutable_tree: bool = False,
|
|
361
|
-
raise_if_not_exists: bool =
|
|
362
|
-
|
|
363
|
-
|
|
484
|
+
raise_if_not_exists: bool = True,
|
|
485
|
+
check_pending_ops: Optional[bool] = None,
|
|
486
|
+
) -> Optional[TableVersionHandle]:
|
|
487
|
+
"""
|
|
488
|
+
For writes: force acquisition of an X-lock on a Table record via a blind update.
|
|
364
489
|
|
|
365
490
|
Either tbl_id or dir_id/tbl_name need to be specified.
|
|
366
491
|
Returns True if the table was locked, False if it was a snapshot or not found.
|
|
367
492
|
If lock_mutable_tree, recursively locks all mutable views of the table.
|
|
368
493
|
|
|
369
|
-
Returns
|
|
494
|
+
Returns a handle to what was locked, None if the lock couldn't be acquired (eg, X-lock on a non-mutable table).
|
|
370
495
|
"""
|
|
496
|
+
assert (tbl_id is not None) != (dir_id is not None and tbl_name is not None)
|
|
497
|
+
assert (dir_id is None) == (tbl_name is None)
|
|
371
498
|
where_clause: sql.ColumnElement
|
|
372
499
|
if tbl_id is not None:
|
|
373
500
|
where_clause = schema.Table.id == tbl_id
|
|
@@ -378,26 +505,130 @@ class Catalog:
|
|
|
378
505
|
where_clause = sql.and_(where_clause, schema.Table.md['user'].astext == Env.get().user)
|
|
379
506
|
|
|
380
507
|
conn = Env.get().conn
|
|
381
|
-
|
|
508
|
+
q = sql.select(schema.Table).where(where_clause)
|
|
509
|
+
if for_write:
|
|
510
|
+
q = q.with_for_update(nowait=True)
|
|
511
|
+
row = conn.execute(q).one_or_none()
|
|
382
512
|
if row is None:
|
|
383
513
|
if raise_if_not_exists:
|
|
384
514
|
raise excs.Error(self._dropped_tbl_error_msg(tbl_id))
|
|
385
|
-
return
|
|
386
|
-
|
|
387
|
-
|
|
388
|
-
|
|
389
|
-
|
|
390
|
-
if
|
|
391
|
-
|
|
392
|
-
|
|
393
|
-
|
|
394
|
-
|
|
395
|
-
|
|
396
|
-
|
|
515
|
+
return None # nothing to lock
|
|
516
|
+
tbl_md = schema.md_from_dict(schema.TableMd, row.md)
|
|
517
|
+
if for_write and tbl_md.is_mutable:
|
|
518
|
+
conn.execute(sql.update(schema.Table).values(lock_dummy=1).where(where_clause))
|
|
519
|
+
|
|
520
|
+
if check_pending_ops:
|
|
521
|
+
# check for pending ops after getting table lock
|
|
522
|
+
pending_ops_q = sql.select(sql.func.count()).where(schema.PendingTableOp.tbl_id == row.id)
|
|
523
|
+
has_pending_ops = conn.execute(pending_ops_q).scalar() > 0
|
|
524
|
+
if has_pending_ops:
|
|
525
|
+
raise PendingTableOpsError(row.id)
|
|
526
|
+
|
|
527
|
+
if for_write and not tbl_md.is_mutable:
|
|
528
|
+
return None # nothing to lock
|
|
529
|
+
|
|
530
|
+
effective_version = tbl_md.current_version if tbl_md.is_snapshot else None
|
|
531
|
+
if tbl_md.is_mutable and lock_mutable_tree:
|
|
532
|
+
# also lock mutable views
|
|
533
|
+
tv = self.get_tbl_version(tbl_id, effective_version, validate_initialized=True)
|
|
534
|
+
for view in tv.mutable_views:
|
|
535
|
+
self._acquire_tbl_lock(
|
|
536
|
+
for_write=for_write,
|
|
537
|
+
tbl_id=view.id,
|
|
538
|
+
lock_mutable_tree=lock_mutable_tree,
|
|
539
|
+
raise_if_not_exists=raise_if_not_exists,
|
|
540
|
+
check_pending_ops=check_pending_ops,
|
|
541
|
+
)
|
|
542
|
+
return TableVersionHandle(tbl_id, effective_version)
|
|
543
|
+
|
|
544
|
+
def _finalize_pending_ops(self, tbl_id: UUID) -> None:
|
|
545
|
+
"""Finalizes all pending ops for the given table."""
|
|
546
|
+
num_retries = 0
|
|
547
|
+
while True:
|
|
548
|
+
try:
|
|
549
|
+
tbl_version: int
|
|
550
|
+
op: Optional[TableOp] = None
|
|
551
|
+
delete_next_op_stmt: sql.Delete
|
|
552
|
+
reset_has_pending_stmt: sql.Update
|
|
553
|
+
with self.begin_xact(
|
|
554
|
+
tbl_id=tbl_id, for_write=True, convert_db_excs=False, finalize_pending_ops=False
|
|
555
|
+
) as conn:
|
|
556
|
+
q = (
|
|
557
|
+
sql.select(schema.Table.md, schema.PendingTableOp)
|
|
558
|
+
.select_from(schema.Table)
|
|
559
|
+
.join(schema.PendingTableOp)
|
|
560
|
+
.where(schema.Table.id == tbl_id)
|
|
561
|
+
.where(schema.PendingTableOp.tbl_id == tbl_id)
|
|
562
|
+
.order_by(schema.PendingTableOp.op_sn)
|
|
563
|
+
.limit(1)
|
|
564
|
+
.with_for_update()
|
|
565
|
+
)
|
|
566
|
+
row = conn.execute(q).one_or_none()
|
|
567
|
+
if row is None:
|
|
568
|
+
return
|
|
569
|
+
tbl_version = row.md.get('current_version')
|
|
570
|
+
op = schema.md_from_dict(TableOp, row.op)
|
|
571
|
+
delete_next_op_stmt = sql.delete(schema.PendingTableOp).where(
|
|
572
|
+
schema.PendingTableOp.tbl_id == tbl_id, schema.PendingTableOp.op_sn == row.op_sn
|
|
573
|
+
)
|
|
574
|
+
reset_has_pending_stmt = (
|
|
575
|
+
sql.update(schema.Table)
|
|
576
|
+
.where(schema.Table.id == tbl_id)
|
|
577
|
+
.values(md=schema.Table.md.op('||')({'has_pending_ops': False}))
|
|
578
|
+
)
|
|
579
|
+
|
|
580
|
+
if op.needs_xact:
|
|
581
|
+
tv = self.get_tbl_version(
|
|
582
|
+
tbl_id, tbl_version, check_pending_ops=False, validate_initialized=True
|
|
583
|
+
)
|
|
584
|
+
tv.exec_op(op)
|
|
585
|
+
conn.execute(delete_next_op_stmt)
|
|
586
|
+
if op.op_sn == op.num_ops - 1:
|
|
587
|
+
conn.execute(reset_has_pending_stmt)
|
|
588
|
+
continue
|
|
589
|
+
|
|
590
|
+
# this op runs outside of a transaction
|
|
591
|
+
tv = self.get_tbl_version(tbl_id, tbl_version, check_pending_ops=False, validate_initialized=True)
|
|
592
|
+
tv.exec_op(op)
|
|
593
|
+
with self.begin_xact(
|
|
594
|
+
tbl_id=tbl_id, for_write=True, convert_db_excs=False, finalize_pending_ops=False
|
|
595
|
+
) as conn:
|
|
596
|
+
conn.execute(delete_next_op_stmt)
|
|
597
|
+
if op.op_sn == op.num_ops - 1:
|
|
598
|
+
conn.execute(reset_has_pending_stmt)
|
|
599
|
+
|
|
600
|
+
except (sql.exc.DBAPIError, sql.exc.OperationalError) as e:
|
|
601
|
+
# TODO: why are we still seeing these here, instead of them getting taken care of by the retry
|
|
602
|
+
# logic of begin_xact()?
|
|
603
|
+
if isinstance(e.orig, (psycopg.errors.SerializationFailure, psycopg.errors.LockNotAvailable)):
|
|
604
|
+
num_retries += 1
|
|
605
|
+
log_msg: str
|
|
606
|
+
if op is not None:
|
|
607
|
+
log_msg = f'finalize_pending_ops(): retrying ({num_retries}) op {op!s} after {type(e.orig)}'
|
|
608
|
+
else:
|
|
609
|
+
log_msg = f'finalize_pending_ops(): retrying ({num_retries}) after {type(e.orig)}'
|
|
610
|
+
Env.get().console_logger.debug(log_msg)
|
|
611
|
+
time.sleep(random.uniform(0.1, 0.5))
|
|
612
|
+
continue
|
|
613
|
+
else:
|
|
614
|
+
raise
|
|
615
|
+
except Exception as e:
|
|
616
|
+
Env.get().console_logger.debug(f'finalize_pending_ops(): caught {e}')
|
|
617
|
+
raise
|
|
618
|
+
|
|
619
|
+
num_retries = 0
|
|
620
|
+
|
|
621
|
+
def _debug_str(self) -> str:
|
|
622
|
+
tv_str = '\n'.join(str(k) for k in self._tbl_versions)
|
|
623
|
+
tbl_str = '\n'.join(str(k) for k in self._tbls)
|
|
624
|
+
return f'tbl_versions:\n{tv_str}\ntbls:\n{tbl_str}'
|
|
397
625
|
|
|
398
626
|
def _get_mutable_tree(self, tbl_id: UUID) -> set[UUID]:
|
|
399
627
|
"""Returns ids of all tables that form the tree of mutable views starting at tbl_id; includes the root."""
|
|
400
|
-
|
|
628
|
+
assert (tbl_id, None) in self._tbl_versions, (
|
|
629
|
+
f'({tbl_id}, None) not in {self._tbl_versions.keys()}\n{self._debug_str()}'
|
|
630
|
+
)
|
|
631
|
+
tv = self.get_tbl_version(tbl_id, None, validate_initialized=True)
|
|
401
632
|
result: set[UUID] = {tv.id}
|
|
402
633
|
for view in tv.mutable_views:
|
|
403
634
|
result.update(self._get_mutable_tree(view.id))
|
|
@@ -408,7 +639,9 @@ class Catalog:
|
|
|
408
639
|
assert self._column_dependents is None
|
|
409
640
|
self._column_dependents = defaultdict(set)
|
|
410
641
|
for tbl_id in mutable_tree:
|
|
411
|
-
assert tbl_id in self._column_dependencies
|
|
642
|
+
assert tbl_id in self._column_dependencies, (
|
|
643
|
+
f'{tbl_id} not in {self._column_dependencies.keys()}\n{self._debug_str()}'
|
|
644
|
+
)
|
|
412
645
|
for col, dependencies in self._column_dependencies[tbl_id].items():
|
|
413
646
|
for dependency in dependencies:
|
|
414
647
|
if dependency.tbl_id not in mutable_tree:
|
|
@@ -416,13 +649,25 @@ class Catalog:
|
|
|
416
649
|
dependents = self._column_dependents[dependency]
|
|
417
650
|
dependents.add(col)
|
|
418
651
|
|
|
652
|
+
def record_column_dependencies(self, tbl_version: TableVersion) -> None:
|
|
653
|
+
"""Update self._column_dependencies. Only valid for mutable versions."""
|
|
654
|
+
from pixeltable.exprs import Expr
|
|
655
|
+
|
|
656
|
+
assert tbl_version.is_mutable
|
|
657
|
+
dependencies: dict[QColumnId, set[QColumnId]] = {}
|
|
658
|
+
for col in tbl_version.cols_by_id.values():
|
|
659
|
+
if col.value_expr_dict is None:
|
|
660
|
+
continue
|
|
661
|
+
dependencies[QColumnId(tbl_version.id, col.id)] = Expr.get_refd_column_ids(col.value_expr_dict)
|
|
662
|
+
self._column_dependencies[tbl_version.id] = dependencies
|
|
663
|
+
|
|
419
664
|
def get_column_dependents(self, tbl_id: UUID, col_id: int) -> set[Column]:
|
|
420
665
|
"""Return all Columns that transitively depend on the given column."""
|
|
421
666
|
assert self._column_dependents is not None
|
|
422
667
|
dependents = self._column_dependents[QColumnId(tbl_id, col_id)]
|
|
423
668
|
result: set[Column] = set()
|
|
424
669
|
for dependent in dependents:
|
|
425
|
-
tv = self.get_tbl_version(dependent.tbl_id, None)
|
|
670
|
+
tv = self.get_tbl_version(dependent.tbl_id, None, validate_initialized=True)
|
|
426
671
|
col = tv.cols_by_id[dependent.col_id]
|
|
427
672
|
result.add(col)
|
|
428
673
|
return result
|
|
@@ -471,7 +716,7 @@ class Catalog:
|
|
|
471
716
|
dir_entries: dict[str, Catalog.DirEntry]
|
|
472
717
|
table: Optional[schema.Table]
|
|
473
718
|
|
|
474
|
-
@
|
|
719
|
+
@retry_loop(for_write=False)
|
|
475
720
|
def get_dir_contents(self, dir_path: Path, recursive: bool = False) -> dict[str, DirEntry]:
|
|
476
721
|
dir = self._get_schema_object(dir_path, expected=Dir, raise_if_not_exists=True)
|
|
477
722
|
return self._get_dir_contents(dir._id, recursive=recursive)
|
|
@@ -498,7 +743,7 @@ class Catalog:
|
|
|
498
743
|
|
|
499
744
|
return result
|
|
500
745
|
|
|
501
|
-
@
|
|
746
|
+
@retry_loop(for_write=True)
|
|
502
747
|
def move(self, path: Path, new_path: Path) -> None:
|
|
503
748
|
self._move(path, new_path)
|
|
504
749
|
|
|
@@ -540,6 +785,7 @@ class Catalog:
|
|
|
540
785
|
- if both add and drop (= two directories are involved), lock the directories in a pre-determined order
|
|
541
786
|
(in this case, by name) in order to prevent deadlocks between concurrent directory modifications
|
|
542
787
|
"""
|
|
788
|
+
assert drop_expected in (None, Table, Dir), drop_expected
|
|
543
789
|
assert (add_dir_path is None) == (add_name is None)
|
|
544
790
|
assert (drop_dir_path is None) == (drop_name is None)
|
|
545
791
|
dir_paths: set[Path] = set()
|
|
@@ -553,7 +799,7 @@ class Catalog:
|
|
|
553
799
|
for p in sorted(dir_paths):
|
|
554
800
|
dir = self._get_dir(p, lock_dir=True)
|
|
555
801
|
if dir is None:
|
|
556
|
-
raise excs.Error(f'Directory {
|
|
802
|
+
raise excs.Error(f'Directory {p!r} does not exist.')
|
|
557
803
|
if p == add_dir_path:
|
|
558
804
|
add_dir = dir
|
|
559
805
|
if p == drop_dir_path:
|
|
@@ -564,19 +810,17 @@ class Catalog:
|
|
|
564
810
|
add_obj = self._get_dir_entry(add_dir.id, add_name, lock_entry=True)
|
|
565
811
|
if add_obj is not None and raise_if_exists:
|
|
566
812
|
add_path = add_dir_path.append(add_name)
|
|
567
|
-
raise excs.Error(f'Path {
|
|
813
|
+
raise excs.Error(f'Path {add_path!r} already exists.')
|
|
568
814
|
|
|
569
815
|
drop_obj: Optional[SchemaObject] = None
|
|
570
816
|
if drop_dir is not None:
|
|
571
817
|
drop_path = drop_dir_path.append(drop_name)
|
|
572
818
|
drop_obj = self._get_dir_entry(drop_dir.id, drop_name, lock_entry=True)
|
|
573
819
|
if drop_obj is None and raise_if_not_exists:
|
|
574
|
-
raise excs.Error(f'Path {
|
|
820
|
+
raise excs.Error(f'Path {drop_path!r} does not exist.')
|
|
575
821
|
if drop_obj is not None and drop_expected is not None and not isinstance(drop_obj, drop_expected):
|
|
576
|
-
|
|
577
|
-
|
|
578
|
-
f'but is a {type(drop_obj)._display_name()}'
|
|
579
|
-
)
|
|
822
|
+
expected_name = 'table' if drop_expected is Table else 'directory'
|
|
823
|
+
raise excs.Error(f'{drop_path!r} needs to be a {expected_name} but is a {drop_obj._display_name()}')
|
|
580
824
|
|
|
581
825
|
add_dir_obj = Dir(add_dir.id, add_dir.parent_id, add_dir.md['name']) if add_dir is not None else None
|
|
582
826
|
return add_obj, add_dir_obj, drop_obj
|
|
@@ -602,7 +846,7 @@ class Catalog:
|
|
|
602
846
|
|
|
603
847
|
# check for table
|
|
604
848
|
if lock_entry:
|
|
605
|
-
self.
|
|
849
|
+
self._acquire_tbl_lock(for_write=True, dir_id=dir_id, raise_if_not_exists=False, tbl_name=name)
|
|
606
850
|
q = sql.select(schema.Table.id).where(
|
|
607
851
|
schema.Table.dir_id == dir_id,
|
|
608
852
|
schema.Table.md['name'].astext == name,
|
|
@@ -633,12 +877,12 @@ class Catalog:
|
|
|
633
877
|
- raise_if_not_exists is True and the path does not exist
|
|
634
878
|
- expected is not None and the existing object has a different type
|
|
635
879
|
"""
|
|
880
|
+
assert expected in (None, Table, Dir), expected
|
|
881
|
+
|
|
636
882
|
if path.is_root:
|
|
637
883
|
# the root dir
|
|
638
884
|
if expected is not None and expected is not Dir:
|
|
639
|
-
raise excs.Error(
|
|
640
|
-
f'{str(path)!r} needs to be a {expected._display_name()} but is a {Dir._display_name()}'
|
|
641
|
-
)
|
|
885
|
+
raise excs.Error(f'{path!r} needs to be a table but is a dir')
|
|
642
886
|
dir = self._get_dir(path, lock_dir=lock_obj)
|
|
643
887
|
if dir is None:
|
|
644
888
|
raise excs.Error(f'Unknown user: {Env.get().user}')
|
|
@@ -647,33 +891,32 @@ class Catalog:
|
|
|
647
891
|
parent_path = path.parent
|
|
648
892
|
parent_dir = self._get_dir(parent_path, lock_dir=lock_parent)
|
|
649
893
|
if parent_dir is None:
|
|
650
|
-
raise excs.Error(f'Directory {
|
|
894
|
+
raise excs.Error(f'Directory {parent_path!r} does not exist.')
|
|
651
895
|
obj = self._get_dir_entry(parent_dir.id, path.name, lock_entry=lock_obj)
|
|
652
896
|
|
|
653
897
|
if obj is None and raise_if_not_exists:
|
|
654
|
-
raise excs.Error(f'Path {
|
|
898
|
+
raise excs.Error(f'Path {path!r} does not exist.')
|
|
655
899
|
elif obj is not None and raise_if_exists:
|
|
656
|
-
raise excs.Error(f'Path {
|
|
900
|
+
raise excs.Error(f'Path {path!r} is an existing {obj._display_name()}.')
|
|
657
901
|
elif obj is not None and expected is not None and not isinstance(obj, expected):
|
|
658
|
-
|
|
659
|
-
|
|
660
|
-
)
|
|
902
|
+
expected_name = 'table' if expected is Table else 'directory'
|
|
903
|
+
raise excs.Error(f'{path!r} needs to be a {expected_name} but is a {obj._display_name()}.')
|
|
661
904
|
return obj
|
|
662
905
|
|
|
663
906
|
def get_table_by_id(self, tbl_id: UUID) -> Optional[Table]:
|
|
664
|
-
|
|
665
|
-
|
|
666
|
-
|
|
667
|
-
|
|
668
|
-
|
|
669
|
-
|
|
670
|
-
|
|
671
|
-
|
|
672
|
-
|
|
673
|
-
|
|
674
|
-
return
|
|
675
|
-
|
|
676
|
-
@
|
|
907
|
+
"""Must be executed inside a transaction. Might raise PendingTableOpsError."""
|
|
908
|
+
if tbl_id in self._tbls:
|
|
909
|
+
return self._tbls[tbl_id]
|
|
910
|
+
tbl = self._load_tbl(tbl_id)
|
|
911
|
+
# # if this is a mutable table, we also need to have its mutable views loaded, in order to track column
|
|
912
|
+
# # dependencies
|
|
913
|
+
# tbl_version = tbl._tbl_version.get()
|
|
914
|
+
# if tbl_version.is_mutable:
|
|
915
|
+
# for v in tbl_version.mutable_views:
|
|
916
|
+
# _ = self.get_table_by_id(v.id)
|
|
917
|
+
return tbl
|
|
918
|
+
|
|
919
|
+
@retry_loop(for_write=True)
|
|
677
920
|
def create_table(
|
|
678
921
|
self,
|
|
679
922
|
path: Path,
|
|
@@ -706,7 +949,6 @@ class Catalog:
|
|
|
706
949
|
self._tbls[tbl._id] = tbl
|
|
707
950
|
return tbl
|
|
708
951
|
|
|
709
|
-
@_retry_loop(for_write=True)
|
|
710
952
|
def create_view(
|
|
711
953
|
self,
|
|
712
954
|
path: Path,
|
|
@@ -722,102 +964,112 @@ class Catalog:
|
|
|
722
964
|
media_validation: MediaValidation,
|
|
723
965
|
if_exists: IfExistsParam,
|
|
724
966
|
) -> Table:
|
|
725
|
-
|
|
726
|
-
|
|
727
|
-
|
|
728
|
-
|
|
729
|
-
|
|
730
|
-
|
|
731
|
-
|
|
732
|
-
|
|
733
|
-
|
|
734
|
-
|
|
735
|
-
|
|
736
|
-
|
|
737
|
-
|
|
967
|
+
@retry_loop(for_write=True)
|
|
968
|
+
def create_fn() -> UUID:
|
|
969
|
+
if not is_snapshot and base.is_mutable():
|
|
970
|
+
# this is a mutable view of a mutable base; X-lock the base and advance its view_sn before adding
|
|
971
|
+
# the view
|
|
972
|
+
self._acquire_tbl_lock(tbl_id=base.tbl_id, for_write=True)
|
|
973
|
+
base_tv = self.get_tbl_version(base.tbl_id, None, validate_initialized=True)
|
|
974
|
+
base_tv.tbl_md.view_sn += 1
|
|
975
|
+
result = Env.get().conn.execute(
|
|
976
|
+
sql.update(schema.Table)
|
|
977
|
+
.values({schema.Table.md: dataclasses.asdict(base_tv.tbl_md)})
|
|
978
|
+
.where(schema.Table.id == base.tbl_id)
|
|
979
|
+
)
|
|
980
|
+
assert result.rowcount == 1, result.rowcount
|
|
738
981
|
|
|
739
|
-
|
|
740
|
-
|
|
741
|
-
|
|
742
|
-
|
|
982
|
+
existing = self._handle_path_collision(path, View, is_snapshot, if_exists)
|
|
983
|
+
if existing is not None:
|
|
984
|
+
assert isinstance(existing, View)
|
|
985
|
+
return existing._id
|
|
743
986
|
|
|
744
|
-
|
|
745
|
-
|
|
746
|
-
|
|
747
|
-
|
|
748
|
-
|
|
749
|
-
|
|
750
|
-
|
|
751
|
-
|
|
752
|
-
|
|
753
|
-
|
|
754
|
-
|
|
755
|
-
|
|
756
|
-
|
|
757
|
-
|
|
758
|
-
|
|
759
|
-
|
|
760
|
-
|
|
761
|
-
|
|
762
|
-
|
|
763
|
-
|
|
764
|
-
|
|
765
|
-
|
|
766
|
-
|
|
767
|
-
|
|
987
|
+
dir = self._get_schema_object(path.parent, expected=Dir, raise_if_not_exists=True)
|
|
988
|
+
assert dir is not None
|
|
989
|
+
if iterator is None:
|
|
990
|
+
iterator_class, iterator_args = None, None
|
|
991
|
+
else:
|
|
992
|
+
iterator_class, iterator_args = iterator
|
|
993
|
+
md, ops = View._create(
|
|
994
|
+
dir._id,
|
|
995
|
+
path.name,
|
|
996
|
+
base=base,
|
|
997
|
+
select_list=select_list,
|
|
998
|
+
additional_columns=additional_columns,
|
|
999
|
+
predicate=where,
|
|
1000
|
+
sample_clause=sample_clause,
|
|
1001
|
+
is_snapshot=is_snapshot,
|
|
1002
|
+
iterator_cls=iterator_class,
|
|
1003
|
+
iterator_args=iterator_args,
|
|
1004
|
+
num_retained_versions=num_retained_versions,
|
|
1005
|
+
comment=comment,
|
|
1006
|
+
media_validation=media_validation,
|
|
1007
|
+
)
|
|
1008
|
+
tbl_id = UUID(md.tbl_md.tbl_id)
|
|
1009
|
+
self.store_tbl_md(tbl_id, dir._id, md.tbl_md, md.version_md, md.schema_version_md, ops)
|
|
1010
|
+
return tbl_id
|
|
1011
|
+
|
|
1012
|
+
view_id = create_fn()
|
|
1013
|
+
if not is_snapshot and base.is_mutable():
|
|
1014
|
+
# invalidate base's TableVersion instance, so that it gets reloaded with the new mutable view
|
|
1015
|
+
self._clear_tv_cache(base.tbl_id, base.tbl_version.effective_version)
|
|
1016
|
+
# base_tv = self.get_tbl_version(base.tbl_id, base.tbl_version.effective_version, validate_initialized=True)
|
|
1017
|
+
# view_handle = TableVersionHandle(view_id, effective_version=None)
|
|
1018
|
+
# base_tv.mutable_views.add(view_handle)
|
|
1019
|
+
|
|
1020
|
+
# finalize pending ops
|
|
1021
|
+
with self.begin_xact(tbl_id=view_id, for_write=True, finalize_pending_ops=True):
|
|
1022
|
+
return self.get_table_by_id(view_id)
|
|
1023
|
+
|
|
1024
|
+
def _clear_tv_cache(self, tbl_id: UUID, effective_version: Optional[int]) -> None:
|
|
1025
|
+
if (tbl_id, effective_version) in self._tbl_versions:
|
|
1026
|
+
tv = self._tbl_versions[tbl_id, effective_version]
|
|
1027
|
+
tv.is_validated = False
|
|
1028
|
+
del self._tbl_versions[tbl_id, effective_version]
|
|
768
1029
|
|
|
769
|
-
|
|
770
|
-
def create_replica(
|
|
771
|
-
self, path: Path, md: list[schema.FullTableMd], if_exists: IfExistsParam = IfExistsParam.ERROR
|
|
772
|
-
) -> None:
|
|
1030
|
+
def create_replica(self, path: Path, md: list[schema.FullTableMd]) -> None:
|
|
773
1031
|
"""
|
|
774
1032
|
Creates table, table_version, and table_schema_version records for a replica with the given metadata.
|
|
775
1033
|
The metadata should be presented in standard "ancestor order", with the table being replicated at
|
|
776
1034
|
list position 0 and the (root) base table at list position -1.
|
|
777
|
-
|
|
778
|
-
TODO: create_replica() also needs to create the store tables and populate them in order to make
|
|
779
|
-
replica creation atomic.
|
|
780
1035
|
"""
|
|
1036
|
+
assert Env.get().in_xact
|
|
1037
|
+
|
|
781
1038
|
tbl_id = UUID(md[0].tbl_md.tbl_id)
|
|
782
1039
|
|
|
783
|
-
|
|
784
|
-
existing
|
|
785
|
-
|
|
786
|
-
|
|
787
|
-
|
|
788
|
-
|
|
789
|
-
'but a different table already exists at that location.'
|
|
790
|
-
)
|
|
791
|
-
assert isinstance(existing, View)
|
|
792
|
-
return
|
|
1040
|
+
existing = self._handle_path_collision(path, Table, False, if_exists=IfExistsParam.IGNORE) # type: ignore[type-abstract]
|
|
1041
|
+
if existing is not None and existing._id != tbl_id:
|
|
1042
|
+
raise excs.Error(
|
|
1043
|
+
f'An attempt was made to create a replica table at {path!r}, '
|
|
1044
|
+
'but a different table already exists at that location.'
|
|
1045
|
+
)
|
|
793
1046
|
|
|
794
1047
|
# Ensure that the system directory exists.
|
|
795
1048
|
self._create_dir(Path('_system', allow_system_paths=True), if_exists=IfExistsParam.IGNORE, parents=False)
|
|
796
1049
|
|
|
797
1050
|
# Now check to see if this table already exists in the catalog.
|
|
798
|
-
existing =
|
|
1051
|
+
existing = self.get_table_by_id(tbl_id)
|
|
799
1052
|
if existing is not None:
|
|
800
1053
|
existing_path = Path(existing._path(), allow_system_paths=True)
|
|
801
|
-
|
|
802
|
-
|
|
803
|
-
|
|
804
|
-
|
|
805
|
-
|
|
806
|
-
|
|
807
|
-
|
|
808
|
-
|
|
809
|
-
|
|
810
|
-
|
|
811
|
-
|
|
812
|
-
# into a named location), this will be a no-op, but it still serves to validate that the newly received
|
|
813
|
-
# metadata is identical to what's in the catalog.
|
|
814
|
-
self.__store_replica_md(path, md[0])
|
|
1054
|
+
if existing_path != path:
|
|
1055
|
+
# It does exist, under a different path from the specified one.
|
|
1056
|
+
if not existing_path.is_system_path:
|
|
1057
|
+
raise excs.Error(
|
|
1058
|
+
f'That table has already been replicated as {existing_path!r}.\n'
|
|
1059
|
+
f'Drop the existing replica if you wish to re-create it.'
|
|
1060
|
+
)
|
|
1061
|
+
# If it's a system table, then this means it was created at some point as the ancestor of some other
|
|
1062
|
+
# table (a snapshot-over-snapshot scenario). In that case, we simply move it to the new (named)
|
|
1063
|
+
# location.
|
|
1064
|
+
self._move(existing_path, path)
|
|
815
1065
|
|
|
816
|
-
# Now store the metadata for
|
|
1066
|
+
# Now store the metadata for this replica's proper ancestors. If one or more proper ancestors
|
|
817
1067
|
# do not yet exist in the store, they will be created as anonymous system tables.
|
|
818
|
-
|
|
1068
|
+
# We instantiate the ancestors starting with the base table and ending with the immediate parent of the
|
|
1069
|
+
# table being replicated.
|
|
1070
|
+
for ancestor_md in md[:0:-1]:
|
|
819
1071
|
ancestor_id = UUID(ancestor_md.tbl_md.tbl_id)
|
|
820
|
-
replica =
|
|
1072
|
+
replica = self.get_table_by_id(ancestor_id)
|
|
821
1073
|
replica_path: Path
|
|
822
1074
|
if replica is None:
|
|
823
1075
|
# We've never seen this table before. Create a new anonymous system table for it.
|
|
@@ -828,12 +1080,22 @@ class Catalog:
|
|
|
828
1080
|
# that was directly replicated by the user at some point). In either case, use the existing path.
|
|
829
1081
|
replica_path = Path(replica._path(), allow_system_paths=True)
|
|
830
1082
|
|
|
831
|
-
# Store the metadata; it could be a new version (in which case a new record will be created) or a
|
|
832
|
-
#
|
|
1083
|
+
# Store the metadata; it could be a new version (in which case a new record will be created), or a known
|
|
1084
|
+
# version (in which case the newly received metadata will be validated as identical).
|
|
1085
|
+
# If it's a new version, this will result in a new TableVersion record being created.
|
|
833
1086
|
self.__store_replica_md(replica_path, ancestor_md)
|
|
834
1087
|
|
|
835
|
-
|
|
836
|
-
|
|
1088
|
+
# Now we must clear cached metadata for the ancestor table, to force the next table operation to pick up
|
|
1089
|
+
# the new TableVersion instance. This is necessary because computed columns of descendant tables might
|
|
1090
|
+
# reference columns of the ancestor table that only exist in the new version.
|
|
1091
|
+
replica = Catalog.get().get_table_by_id(ancestor_id)
|
|
1092
|
+
assert replica is not None # If it didn't exist before, it must have been created by now.
|
|
1093
|
+
replica._tbl_version_path.clear_cached_md()
|
|
1094
|
+
|
|
1095
|
+
# Finally, store the metadata for the table being replicated; as before, it could be a new version or a known
|
|
1096
|
+
# version. If it's a new version, then a TableVersion record will be created, unless the table being replicated
|
|
1097
|
+
# is a pure snapshot.
|
|
1098
|
+
self.__store_replica_md(path, md[0])
|
|
837
1099
|
|
|
838
1100
|
def __store_replica_md(self, path: Path, md: schema.FullTableMd) -> None:
|
|
839
1101
|
_logger.info(f'Creating replica table at {path!r} with ID: {md.tbl_md.tbl_id}')
|
|
@@ -917,13 +1179,20 @@ class Catalog:
|
|
|
917
1179
|
|
|
918
1180
|
self.store_tbl_md(UUID(tbl_id), None, new_tbl_md, new_version_md, new_schema_version_md)
|
|
919
1181
|
|
|
920
|
-
|
|
1182
|
+
if new_version_md is not None and not md.is_pure_snapshot:
|
|
1183
|
+
# It's a new version of a table that has a physical store, so we need to create a TableVersion instance.
|
|
1184
|
+
TableVersion.create_replica(md)
|
|
1185
|
+
|
|
1186
|
+
@retry_loop(for_write=False)
|
|
921
1187
|
def get_table(self, path: Path) -> Table:
|
|
922
1188
|
obj = Catalog.get()._get_schema_object(path, expected=Table, raise_if_not_exists=True)
|
|
923
1189
|
assert isinstance(obj, Table)
|
|
1190
|
+
# We need to clear cached metadata from tbl_version_path, in case the schema has been changed
|
|
1191
|
+
# by another process.
|
|
1192
|
+
obj._tbl_version_path.clear_cached_md()
|
|
924
1193
|
return obj
|
|
925
1194
|
|
|
926
|
-
@
|
|
1195
|
+
@retry_loop(for_write=True)
|
|
927
1196
|
def drop_table(self, path: Path, if_not_exists: IfNotExistsParam, force: bool) -> None:
|
|
928
1197
|
tbl = self._get_schema_object(
|
|
929
1198
|
path,
|
|
@@ -933,7 +1202,7 @@ class Catalog:
|
|
|
933
1202
|
lock_obj=False,
|
|
934
1203
|
)
|
|
935
1204
|
if tbl is None:
|
|
936
|
-
_logger.info(f'Skipped table {
|
|
1205
|
+
_logger.info(f'Skipped table {path!r} (does not exist).')
|
|
937
1206
|
return
|
|
938
1207
|
assert isinstance(tbl, Table)
|
|
939
1208
|
|
|
@@ -941,7 +1210,7 @@ class Catalog:
|
|
|
941
1210
|
# this is a mutable view of a mutable base;
|
|
942
1211
|
# lock the base before the view, in order to avoid deadlocks with concurrent inserts/updates
|
|
943
1212
|
base_id = tbl._tbl_version_path.base.tbl_id
|
|
944
|
-
self.
|
|
1213
|
+
self._acquire_tbl_lock(tbl_id=base_id, for_write=True, lock_mutable_tree=False)
|
|
945
1214
|
|
|
946
1215
|
self._drop_tbl(tbl, force=force, is_replace=False)
|
|
947
1216
|
|
|
@@ -956,7 +1225,7 @@ class Catalog:
|
|
|
956
1225
|
in the same directory with the same name (which could lead to duplicate names if we get aborted)
|
|
957
1226
|
"""
|
|
958
1227
|
self._acquire_dir_xlock(dir_id=tbl._dir_id)
|
|
959
|
-
self.
|
|
1228
|
+
self._acquire_tbl_lock(tbl_id=tbl._id, for_write=True, lock_mutable_tree=False)
|
|
960
1229
|
|
|
961
1230
|
view_ids = self.get_view_ids(tbl._id, for_update=True)
|
|
962
1231
|
if len(view_ids) > 0:
|
|
@@ -980,8 +1249,9 @@ class Catalog:
|
|
|
980
1249
|
# if this is a mutable view of a mutable base, advance the base's view_sn
|
|
981
1250
|
if isinstance(tbl, View) and tbl._tbl_version_path.is_mutable() and tbl._tbl_version_path.base.is_mutable():
|
|
982
1251
|
base_id = tbl._tbl_version_path.base.tbl_id
|
|
983
|
-
base_tv = self.get_tbl_version(base_id, None)
|
|
1252
|
+
base_tv = self.get_tbl_version(base_id, None, validate_initialized=True)
|
|
984
1253
|
base_tv.tbl_md.view_sn += 1
|
|
1254
|
+
self._modified_tvs.add(base_tv.handle)
|
|
985
1255
|
result = Env.get().conn.execute(
|
|
986
1256
|
sql.update(schema.Table.__table__)
|
|
987
1257
|
.values({schema.Table.md: dataclasses.asdict(base_tv.tbl_md)})
|
|
@@ -989,23 +1259,26 @@ class Catalog:
|
|
|
989
1259
|
)
|
|
990
1260
|
assert result.rowcount == 1, result.rowcount
|
|
991
1261
|
|
|
1262
|
+
if tbl._tbl_version is not None:
|
|
1263
|
+
# invalidate the TableVersion instance when we're done so that existing references to it can find out it
|
|
1264
|
+
# has been dropped
|
|
1265
|
+
self._modified_tvs.add(tbl._tbl_version)
|
|
992
1266
|
tv = tbl._tbl_version.get() if tbl._tbl_version is not None else None
|
|
993
|
-
if tv is not None:
|
|
1267
|
+
# if tv is not None:
|
|
1268
|
+
# tv = tbl._tbl_version.get()
|
|
1269
|
+
# # invalidate the TableVersion instance so that existing references to it can find out it has been dropped
|
|
1270
|
+
# tv.is_validated = False
|
|
1271
|
+
if tbl._tbl_version is not None:
|
|
1272
|
+
# drop the store table before deleting the Table record
|
|
994
1273
|
tv = tbl._tbl_version.get()
|
|
995
|
-
|
|
996
|
-
tv.is_validated = False
|
|
1274
|
+
tv.drop()
|
|
997
1275
|
|
|
998
1276
|
self.delete_tbl_md(tbl._id)
|
|
999
1277
|
assert tbl._id in self._tbls
|
|
1000
1278
|
del self._tbls[tbl._id]
|
|
1001
1279
|
_logger.info(f'Dropped table `{tbl._path()}`.')
|
|
1002
1280
|
|
|
1003
|
-
|
|
1004
|
-
tv.drop()
|
|
1005
|
-
assert (tv.id, tv.effective_version) in self._tbl_versions
|
|
1006
|
-
del self._tbl_versions[tv.id, tv.effective_version]
|
|
1007
|
-
|
|
1008
|
-
@_retry_loop(for_write=True)
|
|
1281
|
+
@retry_loop(for_write=True)
|
|
1009
1282
|
def create_dir(self, path: Path, if_exists: IfExistsParam, parents: bool) -> Dir:
|
|
1010
1283
|
return self._create_dir(path, if_exists, parents)
|
|
1011
1284
|
|
|
@@ -1018,7 +1291,7 @@ class Catalog:
|
|
|
1018
1291
|
# parent = self._get_schema_object(path.parent)
|
|
1019
1292
|
# assert parent is not None
|
|
1020
1293
|
# dir = Dir._create(parent._id, path.name)
|
|
1021
|
-
# Env.get().console_logger.info(f'Created directory {
|
|
1294
|
+
# Env.get().console_logger.info(f'Created directory {path!r}.')
|
|
1022
1295
|
# return dir
|
|
1023
1296
|
|
|
1024
1297
|
if parents:
|
|
@@ -1037,10 +1310,10 @@ class Catalog:
|
|
|
1037
1310
|
return existing
|
|
1038
1311
|
assert parent is not None
|
|
1039
1312
|
dir = Dir._create(parent._id, path.name)
|
|
1040
|
-
Env.get().console_logger.info(f'Created directory {
|
|
1313
|
+
Env.get().console_logger.info(f'Created directory {path!r}.')
|
|
1041
1314
|
return dir
|
|
1042
1315
|
|
|
1043
|
-
@
|
|
1316
|
+
@retry_loop(for_write=True)
|
|
1044
1317
|
def drop_dir(self, path: Path, if_not_exists: IfNotExistsParam, force: bool) -> None:
|
|
1045
1318
|
_, _, schema_obj = self._prepare_dir_op(
|
|
1046
1319
|
drop_dir_path=path.parent,
|
|
@@ -1049,7 +1322,7 @@ class Catalog:
|
|
|
1049
1322
|
raise_if_not_exists=if_not_exists == IfNotExistsParam.ERROR and not force,
|
|
1050
1323
|
)
|
|
1051
1324
|
if schema_obj is None:
|
|
1052
|
-
_logger.info(f'Directory {
|
|
1325
|
+
_logger.info(f'Directory {path!r} does not exist; skipped drop_dir().')
|
|
1053
1326
|
return
|
|
1054
1327
|
self._drop_dir(schema_obj._id, path, force=force)
|
|
1055
1328
|
|
|
@@ -1062,7 +1335,7 @@ class Catalog:
|
|
|
1062
1335
|
q = sql.select(sql.func.count()).select_from(schema.Table).where(schema.Table.dir_id == dir_id)
|
|
1063
1336
|
num_tbls = conn.execute(q).scalar()
|
|
1064
1337
|
if num_subdirs + num_tbls > 0:
|
|
1065
|
-
raise excs.Error(f'Directory {
|
|
1338
|
+
raise excs.Error(f'Directory {dir_path!r} is not empty.')
|
|
1066
1339
|
|
|
1067
1340
|
# drop existing subdirs
|
|
1068
1341
|
self._acquire_dir_xlock(dir_id=dir_id)
|
|
@@ -1080,7 +1353,7 @@ class Catalog:
|
|
|
1080
1353
|
|
|
1081
1354
|
# self.drop_dir(dir_id)
|
|
1082
1355
|
conn.execute(sql.delete(schema.Dir).where(schema.Dir.id == dir_id))
|
|
1083
|
-
_logger.info(f'Removed directory {
|
|
1356
|
+
_logger.info(f'Removed directory {dir_path!r}.')
|
|
1084
1357
|
|
|
1085
1358
|
def get_view_ids(self, tbl_id: UUID, for_update: bool = False) -> list[UUID]:
|
|
1086
1359
|
"""Return the ids of views that directly reference the given table"""
|
|
@@ -1096,13 +1369,25 @@ class Catalog:
|
|
|
1096
1369
|
result = [r[0] for r in conn.execute(q).all()]
|
|
1097
1370
|
return result
|
|
1098
1371
|
|
|
1099
|
-
def get_tbl_version(
|
|
1372
|
+
def get_tbl_version(
|
|
1373
|
+
self,
|
|
1374
|
+
tbl_id: UUID,
|
|
1375
|
+
effective_version: Optional[int],
|
|
1376
|
+
check_pending_ops: Optional[bool] = None,
|
|
1377
|
+
validate_initialized: bool = False,
|
|
1378
|
+
) -> Optional[TableVersion]:
|
|
1379
|
+
"""
|
|
1380
|
+
Returns the TableVersion instance for the given table and version and updates the cache.
|
|
1381
|
+
|
|
1382
|
+
If present in the cache and the instance isn't validated, validates version and view_sn against the stored
|
|
1383
|
+
metadata.
|
|
1384
|
+
"""
|
|
1100
1385
|
# we need a transaction here, if we're not already in one; if this starts a new transaction,
|
|
1101
1386
|
# the returned TableVersion instance will not be validated
|
|
1102
1387
|
with self.begin_xact(for_write=False) as conn:
|
|
1103
1388
|
tv = self._tbl_versions.get((tbl_id, effective_version))
|
|
1104
1389
|
if tv is None:
|
|
1105
|
-
tv = self._load_tbl_version(tbl_id, effective_version)
|
|
1390
|
+
tv = self._load_tbl_version(tbl_id, effective_version, check_pending_ops=check_pending_ops)
|
|
1106
1391
|
elif not tv.is_validated:
|
|
1107
1392
|
# only live instances are invalidated
|
|
1108
1393
|
assert effective_version is None
|
|
@@ -1123,12 +1408,16 @@ class Catalog:
|
|
|
1123
1408
|
f'(cached/current version: {tv.version}/{current_version}, '
|
|
1124
1409
|
f'cached/current view_sn: {tv.tbl_md.view_sn}/{view_sn})'
|
|
1125
1410
|
)
|
|
1126
|
-
tv = self._load_tbl_version(tbl_id, None)
|
|
1411
|
+
tv = self._load_tbl_version(tbl_id, None, check_pending_ops=check_pending_ops)
|
|
1127
1412
|
else:
|
|
1128
1413
|
# the cached metadata is valid
|
|
1129
1414
|
tv.is_validated = True
|
|
1130
1415
|
|
|
1131
|
-
assert tv.is_validated
|
|
1416
|
+
assert tv.is_validated, f'{tbl_id}:{effective_version} not validated\n{tv.__dict__}\n{self._debug_str()}'
|
|
1417
|
+
if validate_initialized:
|
|
1418
|
+
assert tv.is_initialized, (
|
|
1419
|
+
f'{tbl_id}:{effective_version} not initialized\n{tv.__dict__}\n{self._debug_str()}'
|
|
1420
|
+
)
|
|
1132
1421
|
return tv
|
|
1133
1422
|
|
|
1134
1423
|
def remove_tbl_version(self, tbl_version: TableVersion) -> None:
|
|
@@ -1180,6 +1469,13 @@ class Catalog:
|
|
|
1180
1469
|
from .view import View
|
|
1181
1470
|
|
|
1182
1471
|
conn = Env.get().conn
|
|
1472
|
+
|
|
1473
|
+
# check for pending ops
|
|
1474
|
+
q = sql.select(sql.func.count()).where(schema.PendingTableOp.tbl_id == tbl_id)
|
|
1475
|
+
has_pending_ops = conn.execute(q).scalar() > 0
|
|
1476
|
+
if has_pending_ops:
|
|
1477
|
+
raise PendingTableOpsError(tbl_id)
|
|
1478
|
+
|
|
1183
1479
|
q = (
|
|
1184
1480
|
sql.select(schema.Table, schema.TableSchemaVersion)
|
|
1185
1481
|
.join(schema.TableSchemaVersion)
|
|
@@ -1196,11 +1492,11 @@ class Catalog:
|
|
|
1196
1492
|
row = conn.execute(q).one_or_none()
|
|
1197
1493
|
if row is None:
|
|
1198
1494
|
return None
|
|
1199
|
-
tbl_record,
|
|
1495
|
+
tbl_record, _ = _unpack_row(row, [schema.Table, schema.TableSchemaVersion])
|
|
1200
1496
|
|
|
1201
1497
|
tbl_md = schema.md_from_dict(schema.TableMd, tbl_record.md)
|
|
1202
1498
|
view_md = tbl_md.view_md
|
|
1203
|
-
if view_md is None:
|
|
1499
|
+
if view_md is None and not tbl_md.is_replica:
|
|
1204
1500
|
# this is a base table
|
|
1205
1501
|
if (tbl_id, None) not in self._tbl_versions:
|
|
1206
1502
|
_ = self._load_tbl_version(tbl_id, None)
|
|
@@ -1210,15 +1506,16 @@ class Catalog:
|
|
|
1210
1506
|
|
|
1211
1507
|
# this is a view; determine the sequence of TableVersions to load
|
|
1212
1508
|
tbl_version_path: list[tuple[UUID, Optional[int]]] = []
|
|
1213
|
-
|
|
1214
|
-
pure_snapshot = view_md.is_snapshot and view_md.predicate is None and len(schema_version_md.columns) == 0
|
|
1215
|
-
if pure_snapshot:
|
|
1509
|
+
if tbl_md.is_pure_snapshot:
|
|
1216
1510
|
# this is a pure snapshot, without a physical table backing it; we only need the bases
|
|
1217
1511
|
pass
|
|
1218
1512
|
else:
|
|
1219
|
-
effective_version =
|
|
1513
|
+
effective_version = (
|
|
1514
|
+
0 if view_md is not None and view_md.is_snapshot else None
|
|
1515
|
+
) # snapshots only have version 0
|
|
1220
1516
|
tbl_version_path.append((tbl_id, effective_version))
|
|
1221
|
-
|
|
1517
|
+
if view_md is not None:
|
|
1518
|
+
tbl_version_path.extend((UUID(tbl_id), version) for tbl_id, version in view_md.base_versions)
|
|
1222
1519
|
|
|
1223
1520
|
# load TableVersions, starting at the root
|
|
1224
1521
|
base_path: Optional[TableVersionPath] = None
|
|
@@ -1228,11 +1525,11 @@ class Catalog:
|
|
|
1228
1525
|
_ = self._load_tbl_version(id, effective_version)
|
|
1229
1526
|
view_path = TableVersionPath(TableVersionHandle(id, effective_version), base=base_path)
|
|
1230
1527
|
base_path = view_path
|
|
1231
|
-
view = View(tbl_id, tbl_record.dir_id, tbl_md.name, view_path, snapshot_only=
|
|
1528
|
+
view = View(tbl_id, tbl_record.dir_id, tbl_md.name, view_path, snapshot_only=tbl_md.is_pure_snapshot)
|
|
1232
1529
|
self._tbls[tbl_id] = view
|
|
1233
1530
|
return view
|
|
1234
1531
|
|
|
1235
|
-
@
|
|
1532
|
+
@retry_loop(for_write=False)
|
|
1236
1533
|
def collect_tbl_history(self, tbl_id: UUID, n: Optional[int]) -> list[schema.FullTableMd]:
|
|
1237
1534
|
"""
|
|
1238
1535
|
Returns the history of up to n versions of the table with the given UUID.
|
|
@@ -1342,6 +1639,7 @@ class Catalog:
|
|
|
1342
1639
|
tbl_md: Optional[schema.TableMd],
|
|
1343
1640
|
version_md: Optional[schema.TableVersionMd],
|
|
1344
1641
|
schema_version_md: Optional[schema.TableSchemaVersionMd],
|
|
1642
|
+
pending_ops: Optional[list[TableOp]] = None,
|
|
1345
1643
|
) -> None:
|
|
1346
1644
|
"""
|
|
1347
1645
|
Stores metadata to the DB.
|
|
@@ -1356,6 +1654,9 @@ class Catalog:
|
|
|
1356
1654
|
If inserting `version_md` or `schema_version_md` would be a primary key violation, an exception will be raised.
|
|
1357
1655
|
"""
|
|
1358
1656
|
assert self._in_write_xact
|
|
1657
|
+
assert version_md is None or version_md.created_at > 0.0
|
|
1658
|
+
assert pending_ops is None or len(pending_ops) > 0
|
|
1659
|
+
assert pending_ops is None or tbl_md is not None # if we write pending ops, we must also write new tbl_md
|
|
1359
1660
|
session = Env.get().session
|
|
1360
1661
|
|
|
1361
1662
|
# Construct and insert or update table record if requested.
|
|
@@ -1366,6 +1667,9 @@ class Catalog:
|
|
|
1366
1667
|
assert tbl_md.current_schema_version == version_md.schema_version
|
|
1367
1668
|
if schema_version_md is not None:
|
|
1368
1669
|
assert tbl_md.current_schema_version == schema_version_md.schema_version
|
|
1670
|
+
if pending_ops is not None:
|
|
1671
|
+
tbl_md.has_pending_ops = True
|
|
1672
|
+
|
|
1369
1673
|
if dir_id is not None:
|
|
1370
1674
|
# We are inserting a record while creating a new table.
|
|
1371
1675
|
tbl_record = schema.Table(id=tbl_id, dir_id=dir_id, md=dataclasses.asdict(tbl_md))
|
|
@@ -1396,8 +1700,33 @@ class Catalog:
|
|
|
1396
1700
|
tbl_id=tbl_id, schema_version=schema_version_md.schema_version, md=dataclasses.asdict(schema_version_md)
|
|
1397
1701
|
)
|
|
1398
1702
|
session.add(schema_version_record)
|
|
1703
|
+
|
|
1704
|
+
# make sure we don't have any pending ops
|
|
1705
|
+
assert session.query(schema.PendingTableOp).filter(schema.PendingTableOp.tbl_id == tbl_id).count() == 0
|
|
1706
|
+
|
|
1707
|
+
if pending_ops is not None:
|
|
1708
|
+
for op in pending_ops:
|
|
1709
|
+
op_record = schema.PendingTableOp(tbl_id=tbl_id, op_sn=op.op_sn, op=dataclasses.asdict(op))
|
|
1710
|
+
session.add(op_record)
|
|
1711
|
+
|
|
1399
1712
|
session.flush() # Inform SQLAlchemy that we want to write these changes to the DB.
|
|
1400
1713
|
|
|
1714
|
+
def store_update_status(self, tbl_id: UUID, version: int, status: UpdateStatus) -> None:
|
|
1715
|
+
"""Update the TableVersion.md.update_status field"""
|
|
1716
|
+
assert self._in_write_xact
|
|
1717
|
+
conn = Env.get().conn
|
|
1718
|
+
|
|
1719
|
+
stmt = (
|
|
1720
|
+
sql.update(schema.TableVersion)
|
|
1721
|
+
.where(schema.TableVersion.tbl_id == tbl_id, schema.TableVersion.version == version)
|
|
1722
|
+
.values(
|
|
1723
|
+
md=schema.TableVersion.md.op('||')({'additional_md': {'update_status': dataclasses.asdict(status)}})
|
|
1724
|
+
)
|
|
1725
|
+
)
|
|
1726
|
+
|
|
1727
|
+
res = conn.execute(stmt)
|
|
1728
|
+
assert res.rowcount == 1, res.rowcount
|
|
1729
|
+
|
|
1401
1730
|
def delete_tbl_md(self, tbl_id: UUID) -> None:
|
|
1402
1731
|
"""
|
|
1403
1732
|
Deletes all table metadata from the store for the given table UUID.
|
|
@@ -1405,6 +1734,7 @@ class Catalog:
|
|
|
1405
1734
|
conn = Env.get().conn
|
|
1406
1735
|
conn.execute(sql.delete(schema.TableSchemaVersion.__table__).where(schema.TableSchemaVersion.tbl_id == tbl_id))
|
|
1407
1736
|
conn.execute(sql.delete(schema.TableVersion.__table__).where(schema.TableVersion.tbl_id == tbl_id))
|
|
1737
|
+
conn.execute(sql.delete(schema.PendingTableOp.__table__).where(schema.PendingTableOp.tbl_id == tbl_id))
|
|
1408
1738
|
conn.execute(sql.delete(schema.Table.__table__).where(schema.Table.id == tbl_id))
|
|
1409
1739
|
|
|
1410
1740
|
def load_replica_md(self, tbl: Table) -> list[schema.FullTableMd]:
|
|
@@ -1435,13 +1765,32 @@ class Catalog:
|
|
|
1435
1765
|
|
|
1436
1766
|
return md
|
|
1437
1767
|
|
|
1438
|
-
def _load_tbl_version(
|
|
1768
|
+
def _load_tbl_version(
|
|
1769
|
+
self, tbl_id: UUID, effective_version: Optional[int], check_pending_ops: bool = True
|
|
1770
|
+
) -> Optional[TableVersion]:
|
|
1439
1771
|
"""Creates TableVersion instance from stored metadata and registers it in _tbl_versions."""
|
|
1440
|
-
tbl_md,
|
|
1772
|
+
tbl_md, version_md, schema_version_md = self.load_tbl_md(tbl_id, effective_version)
|
|
1441
1773
|
view_md = tbl_md.view_md
|
|
1442
1774
|
|
|
1443
1775
|
conn = Env.get().conn
|
|
1444
1776
|
|
|
1777
|
+
if check_pending_ops:
|
|
1778
|
+
pending_ops_q = (
|
|
1779
|
+
sql.select(sql.func.count())
|
|
1780
|
+
.select_from(schema.Table)
|
|
1781
|
+
.join(schema.PendingTableOp)
|
|
1782
|
+
.where(schema.PendingTableOp.tbl_id == tbl_id)
|
|
1783
|
+
.where(schema.Table.id == tbl_id)
|
|
1784
|
+
)
|
|
1785
|
+
if effective_version is not None:
|
|
1786
|
+
# we only care about pending ops if the requested version is the current version
|
|
1787
|
+
pending_ops_q = pending_ops_q.where(
|
|
1788
|
+
sql.text(f"({schema.Table.__table__}.md->>'current_version')::int = {effective_version}")
|
|
1789
|
+
)
|
|
1790
|
+
has_pending_ops = conn.execute(pending_ops_q).scalar() > 0
|
|
1791
|
+
if has_pending_ops:
|
|
1792
|
+
raise PendingTableOpsError(tbl_id)
|
|
1793
|
+
|
|
1445
1794
|
# load mutable view ids for mutable TableVersions
|
|
1446
1795
|
mutable_view_ids: list[UUID] = []
|
|
1447
1796
|
# If this is a replica, effective_version should not be None. We see this today, because
|
|
@@ -1456,17 +1805,24 @@ class Catalog:
|
|
|
1456
1805
|
)
|
|
1457
1806
|
)
|
|
1458
1807
|
mutable_view_ids = [r[0] for r in conn.execute(q).all()]
|
|
1808
|
+
|
|
1459
1809
|
mutable_views = [TableVersionHandle(id, None) for id in mutable_view_ids]
|
|
1460
1810
|
|
|
1461
1811
|
tbl_version: TableVersion
|
|
1462
1812
|
if view_md is None:
|
|
1463
1813
|
# this is a base table
|
|
1464
1814
|
tbl_version = TableVersion(
|
|
1465
|
-
tbl_id, tbl_md, effective_version, schema_version_md, mutable_views=mutable_views
|
|
1815
|
+
tbl_id, tbl_md, version_md, effective_version, schema_version_md, mutable_views=mutable_views
|
|
1466
1816
|
)
|
|
1467
1817
|
else:
|
|
1468
1818
|
assert len(view_md.base_versions) > 0 # a view needs to have a base
|
|
1469
|
-
|
|
1819
|
+
# TODO: add TableVersionMd.is_pure_snapshot() and use that
|
|
1820
|
+
pure_snapshot = (
|
|
1821
|
+
view_md.is_snapshot
|
|
1822
|
+
and view_md.predicate is None
|
|
1823
|
+
and view_md.sample_clause is None
|
|
1824
|
+
and len(schema_version_md.columns) == 0
|
|
1825
|
+
)
|
|
1470
1826
|
assert not pure_snapshot # a pure snapshot doesn't have a physical table backing it, no point in loading it
|
|
1471
1827
|
|
|
1472
1828
|
base: TableVersionHandle
|
|
@@ -1480,6 +1836,7 @@ class Catalog:
|
|
|
1480
1836
|
tbl_version = TableVersion(
|
|
1481
1837
|
tbl_id,
|
|
1482
1838
|
tbl_md,
|
|
1839
|
+
version_md,
|
|
1483
1840
|
effective_version,
|
|
1484
1841
|
schema_version_md,
|
|
1485
1842
|
base_path=base_path,
|
|
@@ -1487,22 +1844,14 @@ class Catalog:
|
|
|
1487
1844
|
mutable_views=mutable_views,
|
|
1488
1845
|
)
|
|
1489
1846
|
|
|
1847
|
+
# register the instance before init()
|
|
1490
1848
|
self._tbl_versions[tbl_id, effective_version] = tbl_version
|
|
1849
|
+
# register this instance as modified, so that it gets purged if the transaction fails, it may not be
|
|
1850
|
+
# fully initialized
|
|
1851
|
+
self._modified_tvs.add(tbl_version.handle)
|
|
1491
1852
|
tbl_version.init()
|
|
1492
1853
|
return tbl_version
|
|
1493
1854
|
|
|
1494
|
-
def record_column_dependencies(self, tbl_version: TableVersion) -> None:
|
|
1495
|
-
"""Update self._column_dependencies. Only valid for non-snapshot versions."""
|
|
1496
|
-
from pixeltable.exprs import Expr
|
|
1497
|
-
|
|
1498
|
-
assert not tbl_version.is_snapshot
|
|
1499
|
-
dependencies: dict[QColumnId, set[QColumnId]] = {}
|
|
1500
|
-
for col in tbl_version.cols_by_id.values():
|
|
1501
|
-
if col.value_expr_dict is None:
|
|
1502
|
-
continue
|
|
1503
|
-
dependencies[QColumnId(tbl_version.id, col.id)] = Expr.get_refd_column_ids(col.value_expr_dict)
|
|
1504
|
-
self._column_dependencies[tbl_version.id] = dependencies
|
|
1505
|
-
|
|
1506
1855
|
def _init_store(self) -> None:
|
|
1507
1856
|
"""One-time initialization of the stored catalog. Idempotent."""
|
|
1508
1857
|
self.create_user(None)
|
|
@@ -1531,14 +1880,20 @@ class Catalog:
|
|
|
1531
1880
|
obj, _, _ = self._prepare_dir_op(add_dir_path=path.parent, add_name=path.name)
|
|
1532
1881
|
|
|
1533
1882
|
if if_exists == IfExistsParam.ERROR and obj is not None:
|
|
1534
|
-
raise excs.Error(f'Path {
|
|
1883
|
+
raise excs.Error(f'Path {path!r} is an existing {obj._display_name()}')
|
|
1535
1884
|
else:
|
|
1536
1885
|
is_snapshot = isinstance(obj, View) and obj._tbl_version_path.is_snapshot()
|
|
1537
1886
|
if obj is not None and (not isinstance(obj, expected_obj_type) or (expected_snapshot and not is_snapshot)):
|
|
1538
|
-
|
|
1887
|
+
if expected_obj_type is Dir:
|
|
1888
|
+
obj_type_str = 'directory'
|
|
1889
|
+
elif expected_obj_type is InsertableTable:
|
|
1890
|
+
obj_type_str = 'table'
|
|
1891
|
+
elif expected_obj_type is View:
|
|
1892
|
+
obj_type_str = 'snapshot' if expected_snapshot else 'view'
|
|
1893
|
+
else:
|
|
1894
|
+
raise AssertionError()
|
|
1539
1895
|
raise excs.Error(
|
|
1540
|
-
f'Path {
|
|
1541
|
-
f'Cannot {if_exists.name.lower()} it.'
|
|
1896
|
+
f'Path {path!r} already exists but is not a {obj_type_str}. Cannot {if_exists.name.lower()} it.'
|
|
1542
1897
|
)
|
|
1543
1898
|
|
|
1544
1899
|
if obj is None:
|
|
@@ -1551,7 +1906,7 @@ class Catalog:
|
|
|
1551
1906
|
dir_contents = self._get_dir_contents(obj._id)
|
|
1552
1907
|
if len(dir_contents) > 0 and if_exists == IfExistsParam.REPLACE:
|
|
1553
1908
|
raise excs.Error(
|
|
1554
|
-
f'Directory {
|
|
1909
|
+
f'Directory {path!r} already exists and is not empty. '
|
|
1555
1910
|
'Use `if_exists="replace_force"` to replace it.'
|
|
1556
1911
|
)
|
|
1557
1912
|
self._drop_dir(obj._id, path, force=True)
|