pixeltable 0.4.3__py3-none-any.whl → 0.4.5__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of pixeltable might be problematic. Click here for more details.
- pixeltable/__version__.py +2 -2
- pixeltable/catalog/__init__.py +1 -1
- pixeltable/catalog/catalog.py +619 -255
- pixeltable/catalog/dir.py +1 -2
- pixeltable/catalog/insertable_table.py +9 -9
- pixeltable/catalog/path.py +59 -20
- pixeltable/catalog/schema_object.py +10 -4
- pixeltable/catalog/table.py +51 -53
- pixeltable/catalog/table_version.py +216 -156
- pixeltable/catalog/table_version_path.py +1 -1
- pixeltable/catalog/tbl_ops.py +44 -0
- pixeltable/catalog/view.py +63 -65
- pixeltable/config.py +12 -4
- pixeltable/dataframe.py +75 -6
- pixeltable/env.py +46 -17
- pixeltable/exec/aggregation_node.py +1 -1
- pixeltable/exec/cache_prefetch_node.py +2 -6
- pixeltable/exec/component_iteration_node.py +4 -3
- pixeltable/exec/data_row_batch.py +10 -51
- pixeltable/exec/expr_eval/expr_eval_node.py +2 -2
- pixeltable/exec/in_memory_data_node.py +17 -16
- pixeltable/exec/sql_node.py +6 -7
- pixeltable/exprs/column_ref.py +2 -1
- pixeltable/exprs/data_row.py +13 -13
- pixeltable/exprs/row_builder.py +16 -4
- pixeltable/exprs/string_op.py +1 -1
- pixeltable/func/expr_template_function.py +1 -4
- pixeltable/functions/date.py +1 -1
- pixeltable/functions/gemini.py +4 -4
- pixeltable/functions/math.py +1 -1
- pixeltable/functions/openai.py +9 -6
- pixeltable/functions/timestamp.py +6 -6
- pixeltable/functions/video.py +2 -6
- pixeltable/globals.py +62 -33
- pixeltable/io/datarows.py +2 -1
- pixeltable/io/pandas.py +1 -0
- pixeltable/io/table_data_conduit.py +12 -13
- pixeltable/iterators/audio.py +17 -8
- pixeltable/iterators/image.py +5 -2
- pixeltable/metadata/schema.py +39 -2
- pixeltable/plan.py +5 -14
- pixeltable/share/packager.py +13 -13
- pixeltable/store.py +31 -7
- pixeltable/type_system.py +2 -1
- pixeltable/utils/filecache.py +1 -1
- pixeltable/utils/http_server.py +2 -3
- pixeltable/utils/media_store.py +90 -34
- {pixeltable-0.4.3.dist-info → pixeltable-0.4.5.dist-info}/METADATA +1 -1
- {pixeltable-0.4.3.dist-info → pixeltable-0.4.5.dist-info}/RECORD +52 -51
- {pixeltable-0.4.3.dist-info → pixeltable-0.4.5.dist-info}/LICENSE +0 -0
- {pixeltable-0.4.3.dist-info → pixeltable-0.4.5.dist-info}/WHEEL +0 -0
- {pixeltable-0.4.3.dist-info → pixeltable-0.4.5.dist-info}/entry_points.txt +0 -0
pixeltable/catalog/catalog.py
CHANGED
|
@@ -14,6 +14,8 @@ import psycopg
|
|
|
14
14
|
import sqlalchemy as sql
|
|
15
15
|
|
|
16
16
|
from pixeltable import exceptions as excs
|
|
17
|
+
|
|
18
|
+
# from pixeltable import exceptions as excs, UpdateStatus
|
|
17
19
|
from pixeltable.env import Env
|
|
18
20
|
from pixeltable.iterators import ComponentIterator
|
|
19
21
|
from pixeltable.metadata import schema
|
|
@@ -28,6 +30,8 @@ from .table import Table
|
|
|
28
30
|
from .table_version import TableVersion
|
|
29
31
|
from .table_version_handle import TableVersionHandle
|
|
30
32
|
from .table_version_path import TableVersionPath
|
|
33
|
+
from .tbl_ops import TableOp
|
|
34
|
+
from .update_status import UpdateStatus
|
|
31
35
|
from .view import View
|
|
32
36
|
|
|
33
37
|
if TYPE_CHECKING:
|
|
@@ -70,18 +74,35 @@ _MAX_RETRIES = -1
|
|
|
70
74
|
T = TypeVar('T')
|
|
71
75
|
|
|
72
76
|
|
|
73
|
-
def
|
|
77
|
+
def retry_loop(
|
|
78
|
+
*, tbl: Optional[TableVersionPath] = None, for_write: bool, lock_mutable_tree: bool = False
|
|
79
|
+
) -> Callable[[Callable[..., T]], Callable[..., T]]:
|
|
74
80
|
def decorator(op: Callable[..., T]) -> Callable[..., T]:
|
|
75
81
|
@functools.wraps(op)
|
|
76
82
|
def loop(*args: Any, **kwargs: Any) -> T:
|
|
83
|
+
cat = Catalog.get()
|
|
84
|
+
# retry_loop() is reentrant
|
|
85
|
+
if cat._in_retry_loop:
|
|
86
|
+
return op(*args, **kwargs)
|
|
87
|
+
|
|
77
88
|
num_retries = 0
|
|
78
89
|
while True:
|
|
90
|
+
cat._in_retry_loop = True
|
|
79
91
|
try:
|
|
80
92
|
# in order for retry to work, we need to make sure that there aren't any prior db updates
|
|
81
93
|
# that are part of an ongoing transaction
|
|
82
94
|
assert not Env.get().in_xact
|
|
83
|
-
with Catalog.get().begin_xact(
|
|
95
|
+
with Catalog.get().begin_xact(
|
|
96
|
+
tbl=tbl,
|
|
97
|
+
for_write=for_write,
|
|
98
|
+
convert_db_excs=False,
|
|
99
|
+
lock_mutable_tree=lock_mutable_tree,
|
|
100
|
+
finalize_pending_ops=True,
|
|
101
|
+
):
|
|
84
102
|
return op(*args, **kwargs)
|
|
103
|
+
except PendingTableOpsError as e:
|
|
104
|
+
Env.get().console_logger.debug(f'retry_loop(): finalizing pending ops for {e.tbl_id}')
|
|
105
|
+
Catalog.get()._finalize_pending_ops(e.tbl_id)
|
|
85
106
|
except sql.exc.DBAPIError as e:
|
|
86
107
|
# TODO: what other exceptions should we be looking for?
|
|
87
108
|
if isinstance(e.orig, (psycopg.errors.SerializationFailure, psycopg.errors.LockNotAvailable)):
|
|
@@ -97,16 +118,31 @@ def _retry_loop(*, for_write: bool) -> Callable[[Callable[..., T]], Callable[...
|
|
|
97
118
|
# for informational/debugging purposes
|
|
98
119
|
_logger.debug(f'retry_loop(): passing along {e}')
|
|
99
120
|
raise
|
|
121
|
+
finally:
|
|
122
|
+
cat._in_retry_loop = False
|
|
100
123
|
|
|
101
124
|
return loop
|
|
102
125
|
|
|
103
126
|
return decorator
|
|
104
127
|
|
|
105
128
|
|
|
129
|
+
class PendingTableOpsError(Exception):
|
|
130
|
+
tbl_id: UUID
|
|
131
|
+
|
|
132
|
+
def __init__(self, tbl_id: UUID) -> None:
|
|
133
|
+
self.tbl_id = tbl_id
|
|
134
|
+
|
|
135
|
+
|
|
106
136
|
class Catalog:
|
|
107
137
|
"""The functional interface to getting access to catalog objects
|
|
108
138
|
|
|
109
|
-
All interface functions must be called in the context of a transaction, started with Catalog.begin_xact()
|
|
139
|
+
All interface functions must be called in the context of a transaction, started with Catalog.begin_xact() or
|
|
140
|
+
via retry_loop().
|
|
141
|
+
|
|
142
|
+
When calling functions that involve Table or TableVersion instances, the catalog needs to get a chance to finalize
|
|
143
|
+
pending ops against those tables. To that end,
|
|
144
|
+
- use begin_xact(tbl) or begin_xact(tbl_id) if only accessing a single table
|
|
145
|
+
- use retry_loop() when accessing multiple tables (eg, pxt.ls())
|
|
110
146
|
|
|
111
147
|
Caching and invalidation of metadata:
|
|
112
148
|
- Catalog caches TableVersion instances in order to avoid excessive metadata loading
|
|
@@ -129,9 +165,11 @@ class Catalog:
|
|
|
129
165
|
# - mutable version of a table: version == None (even though TableVersion.version is set correctly)
|
|
130
166
|
# - snapshot versions: records the version of the snapshot
|
|
131
167
|
_tbl_versions: dict[tuple[UUID, Optional[int]], TableVersion]
|
|
132
|
-
_tbls: dict[UUID, Table]
|
|
168
|
+
_tbls: dict[tuple[UUID, Optional[int]], Table]
|
|
133
169
|
_in_write_xact: bool # True if we're in a write transaction
|
|
134
170
|
_x_locked_tbl_ids: set[UUID] # non-empty for write transactions
|
|
171
|
+
_modified_tvs: set[TableVersionHandle] # TableVersion instances modified in the current transaction
|
|
172
|
+
_in_retry_loop: bool
|
|
135
173
|
|
|
136
174
|
# cached column dependencies
|
|
137
175
|
# - key: table id, value: mapping from column id to its dependencies
|
|
@@ -164,6 +202,8 @@ class Catalog:
|
|
|
164
202
|
self._tbls = {} # don't use a defaultdict here, it doesn't cooperate with the debugger
|
|
165
203
|
self._in_write_xact = False
|
|
166
204
|
self._x_locked_tbl_ids = set()
|
|
205
|
+
self._modified_tvs = set()
|
|
206
|
+
self._in_retry_loop = False
|
|
167
207
|
self._column_dependencies = {}
|
|
168
208
|
self._column_dependents = None
|
|
169
209
|
self._init_store()
|
|
@@ -214,9 +254,11 @@ class Catalog:
|
|
|
214
254
|
self,
|
|
215
255
|
*,
|
|
216
256
|
tbl: Optional[TableVersionPath] = None,
|
|
257
|
+
tbl_id: Optional[UUID] = None,
|
|
217
258
|
for_write: bool = False,
|
|
218
259
|
lock_mutable_tree: bool = False,
|
|
219
260
|
convert_db_excs: bool = True,
|
|
261
|
+
finalize_pending_ops: bool = True,
|
|
220
262
|
) -> Iterator[sql.Connection]:
|
|
221
263
|
"""
|
|
222
264
|
Return a context manager that yields a connection to the database. Idempotent.
|
|
@@ -227,7 +269,7 @@ class Catalog:
|
|
|
227
269
|
If tbl != None, follows this locking protocol:
|
|
228
270
|
- validates/reloads the TableVersion instances of tbl's ancestors (in the hope that this reduces potential
|
|
229
271
|
SerializationErrors later on)
|
|
230
|
-
- if for_write == True, x-locks Table record (by updating Table.lock_dummy; see
|
|
272
|
+
- if for_write == True, x-locks Table record (by updating Table.lock_dummy; see _acquire_tbl_lock())
|
|
231
273
|
- if for_write == False, validates TableVersion instance
|
|
232
274
|
- if lock_mutable_tree == True, also x-locks all mutable views of the table
|
|
233
275
|
- this needs to be done in a retry loop, because Postgres can decide to abort the transaction
|
|
@@ -237,10 +279,14 @@ class Catalog:
|
|
|
237
279
|
|
|
238
280
|
If convert_db_excs == True, converts DBAPIErrors into excs.Errors.
|
|
239
281
|
"""
|
|
282
|
+
assert tbl is None or tbl_id is None # at most one can be specified
|
|
240
283
|
if Env.get().in_xact:
|
|
241
|
-
|
|
242
|
-
|
|
243
|
-
|
|
284
|
+
# make sure that we requested the required table lock at the beginning of the transaction
|
|
285
|
+
if for_write:
|
|
286
|
+
if tbl is not None:
|
|
287
|
+
assert tbl.tbl_id in self._x_locked_tbl_ids, f'{tbl.tbl_id} not in {self._x_locked_tbl_ids}'
|
|
288
|
+
elif tbl_id is not None:
|
|
289
|
+
assert tbl_id in self._x_locked_tbl_ids, f'{tbl_id} not in {self._x_locked_tbl_ids}'
|
|
244
290
|
yield Env.get().conn
|
|
245
291
|
return
|
|
246
292
|
|
|
@@ -252,33 +298,66 @@ class Catalog:
|
|
|
252
298
|
# )
|
|
253
299
|
# _logger.debug(f'begin_xact(): {tv_msg}')
|
|
254
300
|
num_retries = 0
|
|
301
|
+
pending_ops_tbl_id: Optional[UUID] = None
|
|
302
|
+
has_exc = False # True if we exited the 'with ...begin_xact()' block with an exception
|
|
255
303
|
while True:
|
|
304
|
+
if pending_ops_tbl_id is not None:
|
|
305
|
+
Env.get().console_logger.debug(f'begin_xact(): finalizing pending ops for {pending_ops_tbl_id}')
|
|
306
|
+
self._finalize_pending_ops(pending_ops_tbl_id)
|
|
307
|
+
pending_ops_tbl_id = None
|
|
308
|
+
|
|
256
309
|
try:
|
|
257
|
-
self._in_write_xact =
|
|
310
|
+
self._in_write_xact = for_write
|
|
258
311
|
self._x_locked_tbl_ids = set()
|
|
312
|
+
self._modified_tvs = set()
|
|
259
313
|
self._column_dependents = None
|
|
314
|
+
has_exc = False
|
|
260
315
|
|
|
261
|
-
with Env.get().begin_xact() as conn:
|
|
262
|
-
if tbl is not None:
|
|
316
|
+
with Env.get().begin_xact(for_write=for_write) as conn:
|
|
317
|
+
if tbl is not None or tbl_id is not None:
|
|
263
318
|
try:
|
|
264
|
-
|
|
265
|
-
|
|
266
|
-
|
|
267
|
-
|
|
268
|
-
|
|
269
|
-
|
|
270
|
-
|
|
271
|
-
|
|
272
|
-
|
|
273
|
-
|
|
319
|
+
target: Optional[TableVersionHandle] = None
|
|
320
|
+
if tbl is not None:
|
|
321
|
+
if self._acquire_path_locks(
|
|
322
|
+
tbl=tbl,
|
|
323
|
+
for_write=for_write,
|
|
324
|
+
lock_mutable_tree=lock_mutable_tree,
|
|
325
|
+
check_pending_ops=finalize_pending_ops,
|
|
326
|
+
):
|
|
327
|
+
target = tbl.tbl_version
|
|
328
|
+
else:
|
|
329
|
+
target = self._acquire_tbl_lock(
|
|
330
|
+
tbl_id=tbl_id,
|
|
331
|
+
for_write=for_write,
|
|
332
|
+
lock_mutable_tree=lock_mutable_tree,
|
|
333
|
+
raise_if_not_exists=True,
|
|
334
|
+
check_pending_ops=finalize_pending_ops,
|
|
335
|
+
)
|
|
336
|
+
|
|
337
|
+
if target is None:
|
|
338
|
+
# didn't get the write lock
|
|
339
|
+
for_write = False
|
|
340
|
+
elif for_write:
|
|
341
|
+
# we know at this point that target is mutable because we got the X-lock
|
|
342
|
+
if lock_mutable_tree and not target.is_snapshot:
|
|
343
|
+
self._x_locked_tbl_ids = self._get_mutable_tree(target.id)
|
|
274
344
|
self._compute_column_dependents(self._x_locked_tbl_ids)
|
|
275
345
|
else:
|
|
276
|
-
self._x_locked_tbl_ids = {
|
|
346
|
+
self._x_locked_tbl_ids = {target.id}
|
|
277
347
|
if _logger.isEnabledFor(logging.DEBUG):
|
|
278
348
|
# validate only when we don't see errors
|
|
279
349
|
self.validate()
|
|
280
350
|
|
|
351
|
+
except PendingTableOpsError as e:
|
|
352
|
+
has_exc = True
|
|
353
|
+
if finalize_pending_ops:
|
|
354
|
+
# we remember which table id to finalize
|
|
355
|
+
pending_ops_tbl_id = e.tbl_id
|
|
356
|
+
# raise to abort the transaction
|
|
357
|
+
raise
|
|
358
|
+
|
|
281
359
|
except sql.exc.DBAPIError as e:
|
|
360
|
+
has_exc = True
|
|
282
361
|
if isinstance(
|
|
283
362
|
e.orig, (psycopg.errors.SerializationFailure, psycopg.errors.LockNotAvailable)
|
|
284
363
|
) and (num_retries < _MAX_RETRIES or _MAX_RETRIES == -1):
|
|
@@ -289,11 +368,20 @@ class Catalog:
|
|
|
289
368
|
else:
|
|
290
369
|
raise
|
|
291
370
|
|
|
292
|
-
self._in_write_xact = for_write
|
|
293
371
|
yield conn
|
|
294
372
|
return
|
|
295
373
|
|
|
374
|
+
except PendingTableOpsError:
|
|
375
|
+
has_exc = True
|
|
376
|
+
if pending_ops_tbl_id is not None:
|
|
377
|
+
# the next iteration of the loop will deal with pending ops for this table id
|
|
378
|
+
continue
|
|
379
|
+
else:
|
|
380
|
+
# we got this exception after getting the initial table locks and therefore need to abort
|
|
381
|
+
raise
|
|
382
|
+
|
|
296
383
|
except sql.exc.DBAPIError as e:
|
|
384
|
+
has_exc = True
|
|
297
385
|
# we got some db error during the actual operation (not just while trying to get locks on the metadata
|
|
298
386
|
# records): we convert these into Errors, if asked to do so, and abort
|
|
299
387
|
# TODO: what other concurrency-related exceptions should we expect?
|
|
@@ -301,12 +389,19 @@ class Catalog:
|
|
|
301
389
|
# we always convert UndefinedTable exceptions (they can't be retried)
|
|
302
390
|
if isinstance(e.orig, psycopg.errors.UndefinedTable):
|
|
303
391
|
# the table got dropped in the middle of the table operation
|
|
304
|
-
|
|
392
|
+
tbl_name = tbl.tbl_name() if tbl is not None else str(tbl_id) if tbl_id is not None else '?'
|
|
393
|
+
_logger.debug(f'Exception: undefined table ({tbl_name}): Caught {type(e.orig)}: {e!r}')
|
|
305
394
|
assert tbl is not None
|
|
306
|
-
raise excs.Error(f'Table was dropped: {
|
|
395
|
+
raise excs.Error(f'Table was dropped: {tbl_name}') from None
|
|
307
396
|
elif isinstance(e.orig, psycopg.errors.SerializationFailure) and convert_db_excs:
|
|
308
397
|
# we still got a serialization error, despite getting x-locks at the beginning
|
|
309
|
-
msg
|
|
398
|
+
msg: str
|
|
399
|
+
if tbl is not None:
|
|
400
|
+
msg = f'{tbl.tbl_name()} ({tbl.tbl_id})'
|
|
401
|
+
elif tbl_id is not None:
|
|
402
|
+
msg = f'{tbl_id}'
|
|
403
|
+
else:
|
|
404
|
+
msg = ''
|
|
310
405
|
_logger.debug(f'Exception: serialization failure: {msg} ({e})')
|
|
311
406
|
raise excs.Error(
|
|
312
407
|
'That Pixeltable operation could not be completed because it conflicted with another '
|
|
@@ -316,6 +411,10 @@ class Catalog:
|
|
|
316
411
|
else:
|
|
317
412
|
raise
|
|
318
413
|
|
|
414
|
+
except:
|
|
415
|
+
has_exc = True
|
|
416
|
+
raise
|
|
417
|
+
|
|
319
418
|
finally:
|
|
320
419
|
self._in_write_xact = False
|
|
321
420
|
self._x_locked_tbl_ids = set()
|
|
@@ -327,12 +426,24 @@ class Catalog:
|
|
|
327
426
|
_logger.debug(f'invalidating table version {tv.id}:None (tv={id(tv):x})')
|
|
328
427
|
tv.is_validated = False
|
|
329
428
|
|
|
429
|
+
if has_exc:
|
|
430
|
+
# purge all modified TableVersion instances, we can't guarantee they are still consistent with the
|
|
431
|
+
# stored metadata
|
|
432
|
+
for handle in self._modified_tvs:
|
|
433
|
+
self._clear_tv_cache(handle.id, handle.effective_version)
|
|
434
|
+
self._modified_tvs = set()
|
|
435
|
+
|
|
330
436
|
@property
|
|
331
437
|
def in_write_xact(self) -> bool:
|
|
332
438
|
return self._in_write_xact
|
|
333
439
|
|
|
334
440
|
def _acquire_path_locks(
|
|
335
|
-
self,
|
|
441
|
+
self,
|
|
442
|
+
*,
|
|
443
|
+
tbl: TableVersionPath,
|
|
444
|
+
for_write: bool = False,
|
|
445
|
+
lock_mutable_tree: bool = False,
|
|
446
|
+
check_pending_ops: Optional[bool] = None,
|
|
336
447
|
) -> bool:
|
|
337
448
|
"""
|
|
338
449
|
Path locking protocol:
|
|
@@ -341,33 +452,49 @@ class Catalog:
|
|
|
341
452
|
- refresh cached TableVersion of tbl or get X-lock, depending on for_write
|
|
342
453
|
- if lock_mutable_tree, also X-lock all mutable views of tbl
|
|
343
454
|
|
|
344
|
-
Returns False if trying to lock a pure snapshot with for_write == True
|
|
345
455
|
Raises Error if tbl doesn't exist.
|
|
456
|
+
Return False if the lock couldn't be acquired (X-lock on a non-mutable table), True otherwise.
|
|
346
457
|
"""
|
|
347
|
-
|
|
348
|
-
|
|
349
|
-
|
|
458
|
+
path_handles = tbl.get_tbl_versions()
|
|
459
|
+
read_handles = path_handles[:0:-1] if for_write else path_handles[::-1]
|
|
460
|
+
for handle in read_handles:
|
|
461
|
+
# update cache
|
|
462
|
+
_ = self.get_tbl_version(handle.id, handle.effective_version, validate_initialized=True)
|
|
350
463
|
if not for_write:
|
|
351
464
|
return True # nothing left to lock
|
|
352
|
-
|
|
465
|
+
handle = self._acquire_tbl_lock(
|
|
466
|
+
tbl_id=tbl.tbl_id,
|
|
467
|
+
for_write=True,
|
|
468
|
+
lock_mutable_tree=lock_mutable_tree,
|
|
469
|
+
raise_if_not_exists=True,
|
|
470
|
+
check_pending_ops=check_pending_ops,
|
|
471
|
+
)
|
|
472
|
+
# update cache
|
|
473
|
+
_ = self.get_tbl_version(path_handles[0].id, path_handles[0].effective_version, validate_initialized=True)
|
|
474
|
+
return handle is not None
|
|
353
475
|
|
|
354
|
-
def
|
|
476
|
+
def _acquire_tbl_lock(
|
|
355
477
|
self,
|
|
356
478
|
*,
|
|
479
|
+
for_write: bool,
|
|
357
480
|
tbl_id: Optional[UUID] = None,
|
|
358
481
|
dir_id: Optional[UUID] = None,
|
|
359
482
|
tbl_name: Optional[str] = None,
|
|
360
483
|
lock_mutable_tree: bool = False,
|
|
361
|
-
raise_if_not_exists: bool =
|
|
362
|
-
|
|
363
|
-
|
|
484
|
+
raise_if_not_exists: bool = True,
|
|
485
|
+
check_pending_ops: Optional[bool] = None,
|
|
486
|
+
) -> Optional[TableVersionHandle]:
|
|
487
|
+
"""
|
|
488
|
+
For writes: force acquisition of an X-lock on a Table record via a blind update.
|
|
364
489
|
|
|
365
490
|
Either tbl_id or dir_id/tbl_name need to be specified.
|
|
366
491
|
Returns True if the table was locked, False if it was a snapshot or not found.
|
|
367
492
|
If lock_mutable_tree, recursively locks all mutable views of the table.
|
|
368
493
|
|
|
369
|
-
Returns
|
|
494
|
+
Returns a handle to what was locked, None if the lock couldn't be acquired (eg, X-lock on a non-mutable table).
|
|
370
495
|
"""
|
|
496
|
+
assert (tbl_id is not None) != (dir_id is not None and tbl_name is not None)
|
|
497
|
+
assert (dir_id is None) == (tbl_name is None)
|
|
371
498
|
where_clause: sql.ColumnElement
|
|
372
499
|
if tbl_id is not None:
|
|
373
500
|
where_clause = schema.Table.id == tbl_id
|
|
@@ -378,26 +505,130 @@ class Catalog:
|
|
|
378
505
|
where_clause = sql.and_(where_clause, schema.Table.md['user'].astext == Env.get().user)
|
|
379
506
|
|
|
380
507
|
conn = Env.get().conn
|
|
381
|
-
|
|
508
|
+
q = sql.select(schema.Table).where(where_clause)
|
|
509
|
+
if for_write:
|
|
510
|
+
q = q.with_for_update(nowait=True)
|
|
511
|
+
row = conn.execute(q).one_or_none()
|
|
382
512
|
if row is None:
|
|
383
513
|
if raise_if_not_exists:
|
|
384
514
|
raise excs.Error(self._dropped_tbl_error_msg(tbl_id))
|
|
385
|
-
return
|
|
386
|
-
|
|
387
|
-
|
|
388
|
-
|
|
389
|
-
|
|
390
|
-
if
|
|
391
|
-
|
|
392
|
-
|
|
393
|
-
|
|
394
|
-
|
|
395
|
-
|
|
396
|
-
|
|
515
|
+
return None # nothing to lock
|
|
516
|
+
tbl_md = schema.md_from_dict(schema.TableMd, row.md)
|
|
517
|
+
if for_write and tbl_md.is_mutable:
|
|
518
|
+
conn.execute(sql.update(schema.Table).values(lock_dummy=1).where(where_clause))
|
|
519
|
+
|
|
520
|
+
if check_pending_ops:
|
|
521
|
+
# check for pending ops after getting table lock
|
|
522
|
+
pending_ops_q = sql.select(sql.func.count()).where(schema.PendingTableOp.tbl_id == row.id)
|
|
523
|
+
has_pending_ops = conn.execute(pending_ops_q).scalar() > 0
|
|
524
|
+
if has_pending_ops:
|
|
525
|
+
raise PendingTableOpsError(row.id)
|
|
526
|
+
|
|
527
|
+
if for_write and not tbl_md.is_mutable:
|
|
528
|
+
return None # nothing to lock
|
|
529
|
+
|
|
530
|
+
effective_version = tbl_md.current_version if tbl_md.is_snapshot else None
|
|
531
|
+
if tbl_md.is_mutable and lock_mutable_tree:
|
|
532
|
+
# also lock mutable views
|
|
533
|
+
tv = self.get_tbl_version(tbl_id, effective_version, validate_initialized=True)
|
|
534
|
+
for view in tv.mutable_views:
|
|
535
|
+
self._acquire_tbl_lock(
|
|
536
|
+
for_write=for_write,
|
|
537
|
+
tbl_id=view.id,
|
|
538
|
+
lock_mutable_tree=lock_mutable_tree,
|
|
539
|
+
raise_if_not_exists=raise_if_not_exists,
|
|
540
|
+
check_pending_ops=check_pending_ops,
|
|
541
|
+
)
|
|
542
|
+
return TableVersionHandle(tbl_id, effective_version)
|
|
543
|
+
|
|
544
|
+
def _finalize_pending_ops(self, tbl_id: UUID) -> None:
|
|
545
|
+
"""Finalizes all pending ops for the given table."""
|
|
546
|
+
num_retries = 0
|
|
547
|
+
while True:
|
|
548
|
+
try:
|
|
549
|
+
tbl_version: int
|
|
550
|
+
op: Optional[TableOp] = None
|
|
551
|
+
delete_next_op_stmt: sql.Delete
|
|
552
|
+
reset_has_pending_stmt: sql.Update
|
|
553
|
+
with self.begin_xact(
|
|
554
|
+
tbl_id=tbl_id, for_write=True, convert_db_excs=False, finalize_pending_ops=False
|
|
555
|
+
) as conn:
|
|
556
|
+
q = (
|
|
557
|
+
sql.select(schema.Table.md, schema.PendingTableOp)
|
|
558
|
+
.select_from(schema.Table)
|
|
559
|
+
.join(schema.PendingTableOp)
|
|
560
|
+
.where(schema.Table.id == tbl_id)
|
|
561
|
+
.where(schema.PendingTableOp.tbl_id == tbl_id)
|
|
562
|
+
.order_by(schema.PendingTableOp.op_sn)
|
|
563
|
+
.limit(1)
|
|
564
|
+
.with_for_update()
|
|
565
|
+
)
|
|
566
|
+
row = conn.execute(q).one_or_none()
|
|
567
|
+
if row is None:
|
|
568
|
+
return
|
|
569
|
+
tbl_version = row.md.get('current_version')
|
|
570
|
+
op = schema.md_from_dict(TableOp, row.op)
|
|
571
|
+
delete_next_op_stmt = sql.delete(schema.PendingTableOp).where(
|
|
572
|
+
schema.PendingTableOp.tbl_id == tbl_id, schema.PendingTableOp.op_sn == row.op_sn
|
|
573
|
+
)
|
|
574
|
+
reset_has_pending_stmt = (
|
|
575
|
+
sql.update(schema.Table)
|
|
576
|
+
.where(schema.Table.id == tbl_id)
|
|
577
|
+
.values(md=schema.Table.md.op('||')({'has_pending_ops': False}))
|
|
578
|
+
)
|
|
579
|
+
|
|
580
|
+
if op.needs_xact:
|
|
581
|
+
tv = self.get_tbl_version(
|
|
582
|
+
tbl_id, tbl_version, check_pending_ops=False, validate_initialized=True
|
|
583
|
+
)
|
|
584
|
+
tv.exec_op(op)
|
|
585
|
+
conn.execute(delete_next_op_stmt)
|
|
586
|
+
if op.op_sn == op.num_ops - 1:
|
|
587
|
+
conn.execute(reset_has_pending_stmt)
|
|
588
|
+
continue
|
|
589
|
+
|
|
590
|
+
# this op runs outside of a transaction
|
|
591
|
+
tv = self.get_tbl_version(tbl_id, tbl_version, check_pending_ops=False, validate_initialized=True)
|
|
592
|
+
tv.exec_op(op)
|
|
593
|
+
with self.begin_xact(
|
|
594
|
+
tbl_id=tbl_id, for_write=True, convert_db_excs=False, finalize_pending_ops=False
|
|
595
|
+
) as conn:
|
|
596
|
+
conn.execute(delete_next_op_stmt)
|
|
597
|
+
if op.op_sn == op.num_ops - 1:
|
|
598
|
+
conn.execute(reset_has_pending_stmt)
|
|
599
|
+
|
|
600
|
+
except (sql.exc.DBAPIError, sql.exc.OperationalError) as e:
|
|
601
|
+
# TODO: why are we still seeing these here, instead of them getting taken care of by the retry
|
|
602
|
+
# logic of begin_xact()?
|
|
603
|
+
if isinstance(e.orig, (psycopg.errors.SerializationFailure, psycopg.errors.LockNotAvailable)):
|
|
604
|
+
num_retries += 1
|
|
605
|
+
log_msg: str
|
|
606
|
+
if op is not None:
|
|
607
|
+
log_msg = f'finalize_pending_ops(): retrying ({num_retries}) op {op!s} after {type(e.orig)}'
|
|
608
|
+
else:
|
|
609
|
+
log_msg = f'finalize_pending_ops(): retrying ({num_retries}) after {type(e.orig)}'
|
|
610
|
+
Env.get().console_logger.debug(log_msg)
|
|
611
|
+
time.sleep(random.uniform(0.1, 0.5))
|
|
612
|
+
continue
|
|
613
|
+
else:
|
|
614
|
+
raise
|
|
615
|
+
except Exception as e:
|
|
616
|
+
Env.get().console_logger.debug(f'finalize_pending_ops(): caught {e}')
|
|
617
|
+
raise
|
|
618
|
+
|
|
619
|
+
num_retries = 0
|
|
620
|
+
|
|
621
|
+
def _debug_str(self) -> str:
|
|
622
|
+
tv_str = '\n'.join(str(k) for k in self._tbl_versions)
|
|
623
|
+
tbl_str = '\n'.join(str(k) for k in self._tbls)
|
|
624
|
+
return f'tbl_versions:\n{tv_str}\ntbls:\n{tbl_str}'
|
|
397
625
|
|
|
398
626
|
def _get_mutable_tree(self, tbl_id: UUID) -> set[UUID]:
|
|
399
627
|
"""Returns ids of all tables that form the tree of mutable views starting at tbl_id; includes the root."""
|
|
400
|
-
|
|
628
|
+
assert (tbl_id, None) in self._tbl_versions, (
|
|
629
|
+
f'({tbl_id}, None) not in {self._tbl_versions.keys()}\n{self._debug_str()}'
|
|
630
|
+
)
|
|
631
|
+
tv = self.get_tbl_version(tbl_id, None, validate_initialized=True)
|
|
401
632
|
result: set[UUID] = {tv.id}
|
|
402
633
|
for view in tv.mutable_views:
|
|
403
634
|
result.update(self._get_mutable_tree(view.id))
|
|
@@ -408,7 +639,9 @@ class Catalog:
|
|
|
408
639
|
assert self._column_dependents is None
|
|
409
640
|
self._column_dependents = defaultdict(set)
|
|
410
641
|
for tbl_id in mutable_tree:
|
|
411
|
-
assert tbl_id in self._column_dependencies
|
|
642
|
+
assert tbl_id in self._column_dependencies, (
|
|
643
|
+
f'{tbl_id} not in {self._column_dependencies.keys()}\n{self._debug_str()}'
|
|
644
|
+
)
|
|
412
645
|
for col, dependencies in self._column_dependencies[tbl_id].items():
|
|
413
646
|
for dependency in dependencies:
|
|
414
647
|
if dependency.tbl_id not in mutable_tree:
|
|
@@ -416,13 +649,25 @@ class Catalog:
|
|
|
416
649
|
dependents = self._column_dependents[dependency]
|
|
417
650
|
dependents.add(col)
|
|
418
651
|
|
|
652
|
+
def record_column_dependencies(self, tbl_version: TableVersion) -> None:
|
|
653
|
+
"""Update self._column_dependencies. Only valid for mutable versions."""
|
|
654
|
+
from pixeltable.exprs import Expr
|
|
655
|
+
|
|
656
|
+
assert tbl_version.is_mutable
|
|
657
|
+
dependencies: dict[QColumnId, set[QColumnId]] = {}
|
|
658
|
+
for col in tbl_version.cols_by_id.values():
|
|
659
|
+
if col.value_expr_dict is None:
|
|
660
|
+
continue
|
|
661
|
+
dependencies[QColumnId(tbl_version.id, col.id)] = Expr.get_refd_column_ids(col.value_expr_dict)
|
|
662
|
+
self._column_dependencies[tbl_version.id] = dependencies
|
|
663
|
+
|
|
419
664
|
def get_column_dependents(self, tbl_id: UUID, col_id: int) -> set[Column]:
|
|
420
665
|
"""Return all Columns that transitively depend on the given column."""
|
|
421
666
|
assert self._column_dependents is not None
|
|
422
667
|
dependents = self._column_dependents[QColumnId(tbl_id, col_id)]
|
|
423
668
|
result: set[Column] = set()
|
|
424
669
|
for dependent in dependents:
|
|
425
|
-
tv = self.get_tbl_version(dependent.tbl_id, None)
|
|
670
|
+
tv = self.get_tbl_version(dependent.tbl_id, None, validate_initialized=True)
|
|
426
671
|
col = tv.cols_by_id[dependent.col_id]
|
|
427
672
|
result.add(col)
|
|
428
673
|
return result
|
|
@@ -453,6 +698,7 @@ class Catalog:
|
|
|
453
698
|
|
|
454
699
|
def get_dir_path(self, dir_id: UUID) -> Path:
|
|
455
700
|
"""Return path for directory with given id"""
|
|
701
|
+
assert isinstance(dir_id, UUID)
|
|
456
702
|
conn = Env.get().conn
|
|
457
703
|
names: list[str] = []
|
|
458
704
|
while True:
|
|
@@ -463,7 +709,7 @@ class Catalog:
|
|
|
463
709
|
break
|
|
464
710
|
names.insert(0, dir.md['name'])
|
|
465
711
|
dir_id = dir.parent_id
|
|
466
|
-
return Path('.'.join(names),
|
|
712
|
+
return Path.parse('.'.join(names), allow_empty_path=True, allow_system_path=True)
|
|
467
713
|
|
|
468
714
|
@dataclasses.dataclass
|
|
469
715
|
class DirEntry:
|
|
@@ -471,7 +717,7 @@ class Catalog:
|
|
|
471
717
|
dir_entries: dict[str, Catalog.DirEntry]
|
|
472
718
|
table: Optional[schema.Table]
|
|
473
719
|
|
|
474
|
-
@
|
|
720
|
+
@retry_loop(for_write=False)
|
|
475
721
|
def get_dir_contents(self, dir_path: Path, recursive: bool = False) -> dict[str, DirEntry]:
|
|
476
722
|
dir = self._get_schema_object(dir_path, expected=Dir, raise_if_not_exists=True)
|
|
477
723
|
return self._get_dir_contents(dir._id, recursive=recursive)
|
|
@@ -498,7 +744,7 @@ class Catalog:
|
|
|
498
744
|
|
|
499
745
|
return result
|
|
500
746
|
|
|
501
|
-
@
|
|
747
|
+
@retry_loop(for_write=True)
|
|
502
748
|
def move(self, path: Path, new_path: Path) -> None:
|
|
503
749
|
self._move(path, new_path)
|
|
504
750
|
|
|
@@ -540,6 +786,7 @@ class Catalog:
|
|
|
540
786
|
- if both add and drop (= two directories are involved), lock the directories in a pre-determined order
|
|
541
787
|
(in this case, by name) in order to prevent deadlocks between concurrent directory modifications
|
|
542
788
|
"""
|
|
789
|
+
assert drop_expected in (None, Table, Dir), drop_expected
|
|
543
790
|
assert (add_dir_path is None) == (add_name is None)
|
|
544
791
|
assert (drop_dir_path is None) == (drop_name is None)
|
|
545
792
|
dir_paths: set[Path] = set()
|
|
@@ -553,7 +800,7 @@ class Catalog:
|
|
|
553
800
|
for p in sorted(dir_paths):
|
|
554
801
|
dir = self._get_dir(p, lock_dir=True)
|
|
555
802
|
if dir is None:
|
|
556
|
-
raise excs.Error(f'Directory {
|
|
803
|
+
raise excs.Error(f'Directory {p!r} does not exist.')
|
|
557
804
|
if p == add_dir_path:
|
|
558
805
|
add_dir = dir
|
|
559
806
|
if p == drop_dir_path:
|
|
@@ -564,24 +811,24 @@ class Catalog:
|
|
|
564
811
|
add_obj = self._get_dir_entry(add_dir.id, add_name, lock_entry=True)
|
|
565
812
|
if add_obj is not None and raise_if_exists:
|
|
566
813
|
add_path = add_dir_path.append(add_name)
|
|
567
|
-
raise excs.Error(f'Path {
|
|
814
|
+
raise excs.Error(f'Path {add_path!r} already exists.')
|
|
568
815
|
|
|
569
816
|
drop_obj: Optional[SchemaObject] = None
|
|
570
817
|
if drop_dir is not None:
|
|
571
818
|
drop_path = drop_dir_path.append(drop_name)
|
|
572
819
|
drop_obj = self._get_dir_entry(drop_dir.id, drop_name, lock_entry=True)
|
|
573
820
|
if drop_obj is None and raise_if_not_exists:
|
|
574
|
-
raise excs.Error(f'Path {
|
|
821
|
+
raise excs.Error(f'Path {drop_path!r} does not exist.')
|
|
575
822
|
if drop_obj is not None and drop_expected is not None and not isinstance(drop_obj, drop_expected):
|
|
576
|
-
|
|
577
|
-
|
|
578
|
-
f'but is a {type(drop_obj)._display_name()}'
|
|
579
|
-
)
|
|
823
|
+
expected_name = 'table' if drop_expected is Table else 'directory'
|
|
824
|
+
raise excs.Error(f'{drop_path!r} needs to be a {expected_name} but is a {drop_obj._display_name()}')
|
|
580
825
|
|
|
581
826
|
add_dir_obj = Dir(add_dir.id, add_dir.parent_id, add_dir.md['name']) if add_dir is not None else None
|
|
582
827
|
return add_obj, add_dir_obj, drop_obj
|
|
583
828
|
|
|
584
|
-
def _get_dir_entry(
|
|
829
|
+
def _get_dir_entry(
|
|
830
|
+
self, dir_id: UUID, name: str, version: Optional[int] = None, lock_entry: bool = False
|
|
831
|
+
) -> Optional[SchemaObject]:
|
|
585
832
|
user = Env.get().user
|
|
586
833
|
conn = Env.get().conn
|
|
587
834
|
|
|
@@ -602,7 +849,7 @@ class Catalog:
|
|
|
602
849
|
|
|
603
850
|
# check for table
|
|
604
851
|
if lock_entry:
|
|
605
|
-
self.
|
|
852
|
+
self._acquire_tbl_lock(for_write=True, dir_id=dir_id, raise_if_not_exists=False, tbl_name=name)
|
|
606
853
|
q = sql.select(schema.Table.id).where(
|
|
607
854
|
schema.Table.dir_id == dir_id,
|
|
608
855
|
schema.Table.md['name'].astext == name,
|
|
@@ -610,9 +857,7 @@ class Catalog:
|
|
|
610
857
|
)
|
|
611
858
|
tbl_id = conn.execute(q).scalar_one_or_none()
|
|
612
859
|
if tbl_id is not None:
|
|
613
|
-
|
|
614
|
-
_ = self._load_tbl(tbl_id)
|
|
615
|
-
return self._tbls[tbl_id]
|
|
860
|
+
return self.get_table_by_id(tbl_id, version)
|
|
616
861
|
|
|
617
862
|
return None
|
|
618
863
|
|
|
@@ -628,17 +873,17 @@ class Catalog:
|
|
|
628
873
|
"""Return the schema object at the given path, or None if it doesn't exist.
|
|
629
874
|
|
|
630
875
|
Raises Error if
|
|
631
|
-
- the parent directory doesn't exist
|
|
876
|
+
- the parent directory doesn't exist
|
|
632
877
|
- raise_if_exists is True and the path exists
|
|
633
878
|
- raise_if_not_exists is True and the path does not exist
|
|
634
879
|
- expected is not None and the existing object has a different type
|
|
635
880
|
"""
|
|
881
|
+
assert expected in (None, Table, Dir), expected
|
|
882
|
+
|
|
636
883
|
if path.is_root:
|
|
637
884
|
# the root dir
|
|
638
885
|
if expected is not None and expected is not Dir:
|
|
639
|
-
raise excs.Error(
|
|
640
|
-
f'{str(path)!r} needs to be a {expected._display_name()} but is a {Dir._display_name()}'
|
|
641
|
-
)
|
|
886
|
+
raise excs.Error(f'{path!r} needs to be a table but is a dir')
|
|
642
887
|
dir = self._get_dir(path, lock_dir=lock_obj)
|
|
643
888
|
if dir is None:
|
|
644
889
|
raise excs.Error(f'Unknown user: {Env.get().user}')
|
|
@@ -647,33 +892,28 @@ class Catalog:
|
|
|
647
892
|
parent_path = path.parent
|
|
648
893
|
parent_dir = self._get_dir(parent_path, lock_dir=lock_parent)
|
|
649
894
|
if parent_dir is None:
|
|
650
|
-
raise excs.Error(f'Directory {
|
|
651
|
-
obj = self._get_dir_entry(parent_dir.id, path.name, lock_entry=lock_obj)
|
|
895
|
+
raise excs.Error(f'Directory {parent_path!r} does not exist.')
|
|
896
|
+
obj = self._get_dir_entry(parent_dir.id, path.name, path.version, lock_entry=lock_obj)
|
|
652
897
|
|
|
653
898
|
if obj is None and raise_if_not_exists:
|
|
654
|
-
raise excs.Error(f'Path {
|
|
899
|
+
raise excs.Error(f'Path {path!r} does not exist.')
|
|
655
900
|
elif obj is not None and raise_if_exists:
|
|
656
|
-
raise excs.Error(f'Path {
|
|
901
|
+
raise excs.Error(f'Path {path!r} is an existing {obj._display_name()}.')
|
|
657
902
|
elif obj is not None and expected is not None and not isinstance(obj, expected):
|
|
658
|
-
|
|
659
|
-
|
|
660
|
-
)
|
|
903
|
+
expected_name = 'table' if expected is Table else 'directory'
|
|
904
|
+
raise excs.Error(f'{path!r} needs to be a {expected_name} but is a {obj._display_name()}.')
|
|
661
905
|
return obj
|
|
662
906
|
|
|
663
|
-
def get_table_by_id(self, tbl_id: UUID) -> Optional[Table]:
|
|
664
|
-
|
|
665
|
-
|
|
666
|
-
if
|
|
667
|
-
|
|
668
|
-
|
|
669
|
-
|
|
670
|
-
|
|
671
|
-
|
|
672
|
-
|
|
673
|
-
# _ = self.get_table_by_id(v.id)
|
|
674
|
-
return self._tbls[tbl_id]
|
|
675
|
-
|
|
676
|
-
@_retry_loop(for_write=True)
|
|
907
|
+
def get_table_by_id(self, tbl_id: UUID, version: Optional[int] = None) -> Optional[Table]:
|
|
908
|
+
"""Must be executed inside a transaction. Might raise PendingTableOpsError."""
|
|
909
|
+
if (tbl_id, version) not in self._tbls:
|
|
910
|
+
if version is None:
|
|
911
|
+
self._load_tbl(tbl_id)
|
|
912
|
+
else:
|
|
913
|
+
self._load_tbl_at_version(tbl_id, version)
|
|
914
|
+
return self._tbls.get((tbl_id, version))
|
|
915
|
+
|
|
916
|
+
@retry_loop(for_write=True)
|
|
677
917
|
def create_table(
|
|
678
918
|
self,
|
|
679
919
|
path: Path,
|
|
@@ -703,10 +943,9 @@ class Catalog:
|
|
|
703
943
|
comment=comment,
|
|
704
944
|
media_validation=media_validation,
|
|
705
945
|
)
|
|
706
|
-
self._tbls[tbl._id] = tbl
|
|
946
|
+
self._tbls[tbl._id, None] = tbl
|
|
707
947
|
return tbl
|
|
708
948
|
|
|
709
|
-
@_retry_loop(for_write=True)
|
|
710
949
|
def create_view(
|
|
711
950
|
self,
|
|
712
951
|
path: Path,
|
|
@@ -722,49 +961,68 @@ class Catalog:
|
|
|
722
961
|
media_validation: MediaValidation,
|
|
723
962
|
if_exists: IfExistsParam,
|
|
724
963
|
) -> Table:
|
|
725
|
-
|
|
726
|
-
|
|
727
|
-
|
|
728
|
-
|
|
729
|
-
|
|
730
|
-
|
|
731
|
-
|
|
732
|
-
|
|
733
|
-
|
|
734
|
-
|
|
735
|
-
|
|
736
|
-
|
|
737
|
-
|
|
964
|
+
@retry_loop(for_write=True)
|
|
965
|
+
def create_fn() -> UUID:
|
|
966
|
+
if not is_snapshot and base.is_mutable():
|
|
967
|
+
# this is a mutable view of a mutable base; X-lock the base and advance its view_sn before adding
|
|
968
|
+
# the view
|
|
969
|
+
self._acquire_tbl_lock(tbl_id=base.tbl_id, for_write=True)
|
|
970
|
+
base_tv = self.get_tbl_version(base.tbl_id, None, validate_initialized=True)
|
|
971
|
+
base_tv.tbl_md.view_sn += 1
|
|
972
|
+
result = Env.get().conn.execute(
|
|
973
|
+
sql.update(schema.Table)
|
|
974
|
+
.values({schema.Table.md: dataclasses.asdict(base_tv.tbl_md)})
|
|
975
|
+
.where(schema.Table.id == base.tbl_id)
|
|
976
|
+
)
|
|
977
|
+
assert result.rowcount == 1, result.rowcount
|
|
738
978
|
|
|
739
|
-
|
|
740
|
-
|
|
741
|
-
|
|
742
|
-
|
|
979
|
+
existing = self._handle_path_collision(path, View, is_snapshot, if_exists)
|
|
980
|
+
if existing is not None:
|
|
981
|
+
assert isinstance(existing, View)
|
|
982
|
+
return existing._id
|
|
743
983
|
|
|
744
|
-
|
|
745
|
-
|
|
746
|
-
|
|
747
|
-
|
|
748
|
-
|
|
749
|
-
|
|
750
|
-
|
|
751
|
-
|
|
752
|
-
|
|
753
|
-
|
|
754
|
-
|
|
755
|
-
|
|
756
|
-
|
|
757
|
-
|
|
758
|
-
|
|
759
|
-
|
|
760
|
-
|
|
761
|
-
|
|
762
|
-
|
|
763
|
-
|
|
764
|
-
|
|
765
|
-
|
|
766
|
-
|
|
767
|
-
|
|
984
|
+
dir = self._get_schema_object(path.parent, expected=Dir, raise_if_not_exists=True)
|
|
985
|
+
assert dir is not None
|
|
986
|
+
if iterator is None:
|
|
987
|
+
iterator_class, iterator_args = None, None
|
|
988
|
+
else:
|
|
989
|
+
iterator_class, iterator_args = iterator
|
|
990
|
+
md, ops = View._create(
|
|
991
|
+
dir._id,
|
|
992
|
+
path.name,
|
|
993
|
+
base=base,
|
|
994
|
+
select_list=select_list,
|
|
995
|
+
additional_columns=additional_columns,
|
|
996
|
+
predicate=where,
|
|
997
|
+
sample_clause=sample_clause,
|
|
998
|
+
is_snapshot=is_snapshot,
|
|
999
|
+
iterator_cls=iterator_class,
|
|
1000
|
+
iterator_args=iterator_args,
|
|
1001
|
+
num_retained_versions=num_retained_versions,
|
|
1002
|
+
comment=comment,
|
|
1003
|
+
media_validation=media_validation,
|
|
1004
|
+
)
|
|
1005
|
+
tbl_id = UUID(md.tbl_md.tbl_id)
|
|
1006
|
+
self.store_tbl_md(tbl_id, dir._id, md.tbl_md, md.version_md, md.schema_version_md, ops)
|
|
1007
|
+
return tbl_id
|
|
1008
|
+
|
|
1009
|
+
view_id = create_fn()
|
|
1010
|
+
if not is_snapshot and base.is_mutable():
|
|
1011
|
+
# invalidate base's TableVersion instance, so that it gets reloaded with the new mutable view
|
|
1012
|
+
self._clear_tv_cache(base.tbl_id, base.tbl_version.effective_version)
|
|
1013
|
+
# base_tv = self.get_tbl_version(base.tbl_id, base.tbl_version.effective_version, validate_initialized=True)
|
|
1014
|
+
# view_handle = TableVersionHandle(view_id, effective_version=None)
|
|
1015
|
+
# base_tv.mutable_views.add(view_handle)
|
|
1016
|
+
|
|
1017
|
+
# finalize pending ops
|
|
1018
|
+
with self.begin_xact(tbl_id=view_id, for_write=True, finalize_pending_ops=True):
|
|
1019
|
+
return self.get_table_by_id(view_id)
|
|
1020
|
+
|
|
1021
|
+
def _clear_tv_cache(self, tbl_id: UUID, effective_version: Optional[int]) -> None:
|
|
1022
|
+
if (tbl_id, effective_version) in self._tbl_versions:
|
|
1023
|
+
tv = self._tbl_versions[tbl_id, effective_version]
|
|
1024
|
+
tv.is_validated = False
|
|
1025
|
+
del self._tbl_versions[tbl_id, effective_version]
|
|
768
1026
|
|
|
769
1027
|
def create_replica(self, path: Path, md: list[schema.FullTableMd]) -> None:
|
|
770
1028
|
"""
|
|
@@ -784,12 +1042,12 @@ class Catalog:
|
|
|
784
1042
|
)
|
|
785
1043
|
|
|
786
1044
|
# Ensure that the system directory exists.
|
|
787
|
-
self._create_dir(Path('_system',
|
|
1045
|
+
self._create_dir(Path.parse('_system', allow_system_path=True), if_exists=IfExistsParam.IGNORE, parents=False)
|
|
788
1046
|
|
|
789
|
-
# Now check to see if this table
|
|
790
|
-
existing =
|
|
1047
|
+
# Now check to see if this table already exists in the catalog.
|
|
1048
|
+
existing = self.get_table_by_id(tbl_id)
|
|
791
1049
|
if existing is not None:
|
|
792
|
-
existing_path = Path(existing._path(),
|
|
1050
|
+
existing_path = Path.parse(existing._path(), allow_system_path=True)
|
|
793
1051
|
if existing_path != path:
|
|
794
1052
|
# It does exist, under a different path from the specified one.
|
|
795
1053
|
if not existing_path.is_system_path:
|
|
@@ -808,16 +1066,16 @@ class Catalog:
|
|
|
808
1066
|
# table being replicated.
|
|
809
1067
|
for ancestor_md in md[:0:-1]:
|
|
810
1068
|
ancestor_id = UUID(ancestor_md.tbl_md.tbl_id)
|
|
811
|
-
replica =
|
|
1069
|
+
replica = self.get_table_by_id(ancestor_id)
|
|
812
1070
|
replica_path: Path
|
|
813
1071
|
if replica is None:
|
|
814
1072
|
# We've never seen this table before. Create a new anonymous system table for it.
|
|
815
|
-
replica_path = Path(f'_system.replica_{ancestor_id.hex}',
|
|
1073
|
+
replica_path = Path.parse(f'_system.replica_{ancestor_id.hex}', allow_system_path=True)
|
|
816
1074
|
else:
|
|
817
1075
|
# The table already exists in the catalog. The existing path might be a system path (if the table
|
|
818
1076
|
# was created as an anonymous base table of some other table), or it might not (if it's a snapshot
|
|
819
1077
|
# that was directly replicated by the user at some point). In either case, use the existing path.
|
|
820
|
-
replica_path = Path(replica._path(),
|
|
1078
|
+
replica_path = Path.parse(replica._path(), allow_system_path=True)
|
|
821
1079
|
|
|
822
1080
|
# Store the metadata; it could be a new version (in which case a new record will be created), or a known
|
|
823
1081
|
# version (in which case the newly received metadata will be validated as identical).
|
|
@@ -877,7 +1135,7 @@ class Catalog:
|
|
|
877
1135
|
q = (
|
|
878
1136
|
sql.select(schema.TableVersion.md)
|
|
879
1137
|
.where(schema.TableVersion.tbl_id == tbl_id)
|
|
880
|
-
.where(
|
|
1138
|
+
.where(schema.TableVersion.md['version'].cast(sql.Integer) == md.version_md.version)
|
|
881
1139
|
)
|
|
882
1140
|
existing_version_md_row = conn.execute(q).one_or_none()
|
|
883
1141
|
if existing_version_md_row is None:
|
|
@@ -896,10 +1154,7 @@ class Catalog:
|
|
|
896
1154
|
sql.select(schema.TableSchemaVersion.md)
|
|
897
1155
|
.where(schema.TableSchemaVersion.tbl_id == tbl_id)
|
|
898
1156
|
.where(
|
|
899
|
-
sql.
|
|
900
|
-
f"({schema.TableSchemaVersion.__table__}.md->>'schema_version')::int = "
|
|
901
|
-
f'{md.schema_version_md.schema_version}'
|
|
902
|
-
)
|
|
1157
|
+
schema.TableSchemaVersion.md['schema_version'].cast(sql.Integer) == md.schema_version_md.schema_version
|
|
903
1158
|
)
|
|
904
1159
|
)
|
|
905
1160
|
existing_schema_version_md_row = conn.execute(q).one_or_none()
|
|
@@ -922,7 +1177,7 @@ class Catalog:
|
|
|
922
1177
|
# It's a new version of a table that has a physical store, so we need to create a TableVersion instance.
|
|
923
1178
|
TableVersion.create_replica(md)
|
|
924
1179
|
|
|
925
|
-
@
|
|
1180
|
+
@retry_loop(for_write=False)
|
|
926
1181
|
def get_table(self, path: Path) -> Table:
|
|
927
1182
|
obj = Catalog.get()._get_schema_object(path, expected=Table, raise_if_not_exists=True)
|
|
928
1183
|
assert isinstance(obj, Table)
|
|
@@ -931,7 +1186,7 @@ class Catalog:
|
|
|
931
1186
|
obj._tbl_version_path.clear_cached_md()
|
|
932
1187
|
return obj
|
|
933
1188
|
|
|
934
|
-
@
|
|
1189
|
+
@retry_loop(for_write=True)
|
|
935
1190
|
def drop_table(self, path: Path, if_not_exists: IfNotExistsParam, force: bool) -> None:
|
|
936
1191
|
tbl = self._get_schema_object(
|
|
937
1192
|
path,
|
|
@@ -941,7 +1196,7 @@ class Catalog:
|
|
|
941
1196
|
lock_obj=False,
|
|
942
1197
|
)
|
|
943
1198
|
if tbl is None:
|
|
944
|
-
_logger.info(f'Skipped table {
|
|
1199
|
+
_logger.info(f'Skipped table {path!r} (does not exist).')
|
|
945
1200
|
return
|
|
946
1201
|
assert isinstance(tbl, Table)
|
|
947
1202
|
|
|
@@ -949,7 +1204,7 @@ class Catalog:
|
|
|
949
1204
|
# this is a mutable view of a mutable base;
|
|
950
1205
|
# lock the base before the view, in order to avoid deadlocks with concurrent inserts/updates
|
|
951
1206
|
base_id = tbl._tbl_version_path.base.tbl_id
|
|
952
|
-
self.
|
|
1207
|
+
self._acquire_tbl_lock(tbl_id=base_id, for_write=True, lock_mutable_tree=False)
|
|
953
1208
|
|
|
954
1209
|
self._drop_tbl(tbl, force=force, is_replace=False)
|
|
955
1210
|
|
|
@@ -964,7 +1219,7 @@ class Catalog:
|
|
|
964
1219
|
in the same directory with the same name (which could lead to duplicate names if we get aborted)
|
|
965
1220
|
"""
|
|
966
1221
|
self._acquire_dir_xlock(dir_id=tbl._dir_id)
|
|
967
|
-
self.
|
|
1222
|
+
self._acquire_tbl_lock(tbl_id=tbl._id, for_write=True, lock_mutable_tree=False)
|
|
968
1223
|
|
|
969
1224
|
view_ids = self.get_view_ids(tbl._id, for_update=True)
|
|
970
1225
|
if len(view_ids) > 0:
|
|
@@ -988,8 +1243,9 @@ class Catalog:
|
|
|
988
1243
|
# if this is a mutable view of a mutable base, advance the base's view_sn
|
|
989
1244
|
if isinstance(tbl, View) and tbl._tbl_version_path.is_mutable() and tbl._tbl_version_path.base.is_mutable():
|
|
990
1245
|
base_id = tbl._tbl_version_path.base.tbl_id
|
|
991
|
-
base_tv = self.get_tbl_version(base_id, None)
|
|
1246
|
+
base_tv = self.get_tbl_version(base_id, None, validate_initialized=True)
|
|
992
1247
|
base_tv.tbl_md.view_sn += 1
|
|
1248
|
+
self._modified_tvs.add(base_tv.handle)
|
|
993
1249
|
result = Env.get().conn.execute(
|
|
994
1250
|
sql.update(schema.Table.__table__)
|
|
995
1251
|
.values({schema.Table.md: dataclasses.asdict(base_tv.tbl_md)})
|
|
@@ -997,23 +1253,28 @@ class Catalog:
|
|
|
997
1253
|
)
|
|
998
1254
|
assert result.rowcount == 1, result.rowcount
|
|
999
1255
|
|
|
1256
|
+
if tbl._tbl_version is not None:
|
|
1257
|
+
# invalidate the TableVersion instance when we're done so that existing references to it can find out it
|
|
1258
|
+
# has been dropped
|
|
1259
|
+
self._modified_tvs.add(tbl._tbl_version)
|
|
1000
1260
|
tv = tbl._tbl_version.get() if tbl._tbl_version is not None else None
|
|
1001
|
-
if tv is not None:
|
|
1261
|
+
# if tv is not None:
|
|
1262
|
+
# tv = tbl._tbl_version.get()
|
|
1263
|
+
# # invalidate the TableVersion instance so that existing references to it can find out it has been dropped
|
|
1264
|
+
# tv.is_validated = False
|
|
1265
|
+
if tbl._tbl_version is not None:
|
|
1266
|
+
# drop the store table before deleting the Table record
|
|
1002
1267
|
tv = tbl._tbl_version.get()
|
|
1003
|
-
|
|
1004
|
-
tv.is_validated = False
|
|
1268
|
+
tv.drop()
|
|
1005
1269
|
|
|
1006
1270
|
self.delete_tbl_md(tbl._id)
|
|
1007
|
-
assert tbl._id in self._tbls
|
|
1008
|
-
|
|
1271
|
+
assert (tbl._id, None) in self._tbls
|
|
1272
|
+
versions = [k[1] for k in self._tbls if k[0] == tbl._id]
|
|
1273
|
+
for version in versions:
|
|
1274
|
+
del self._tbls[tbl._id, version]
|
|
1009
1275
|
_logger.info(f'Dropped table `{tbl._path()}`.')
|
|
1010
1276
|
|
|
1011
|
-
|
|
1012
|
-
tv.drop()
|
|
1013
|
-
assert (tv.id, tv.effective_version) in self._tbl_versions
|
|
1014
|
-
del self._tbl_versions[tv.id, tv.effective_version]
|
|
1015
|
-
|
|
1016
|
-
@_retry_loop(for_write=True)
|
|
1277
|
+
@retry_loop(for_write=True)
|
|
1017
1278
|
def create_dir(self, path: Path, if_exists: IfExistsParam, parents: bool) -> Dir:
|
|
1018
1279
|
return self._create_dir(path, if_exists, parents)
|
|
1019
1280
|
|
|
@@ -1026,7 +1287,7 @@ class Catalog:
|
|
|
1026
1287
|
# parent = self._get_schema_object(path.parent)
|
|
1027
1288
|
# assert parent is not None
|
|
1028
1289
|
# dir = Dir._create(parent._id, path.name)
|
|
1029
|
-
# Env.get().console_logger.info(f'Created directory {
|
|
1290
|
+
# Env.get().console_logger.info(f'Created directory {path!r}.')
|
|
1030
1291
|
# return dir
|
|
1031
1292
|
|
|
1032
1293
|
if parents:
|
|
@@ -1045,10 +1306,10 @@ class Catalog:
|
|
|
1045
1306
|
return existing
|
|
1046
1307
|
assert parent is not None
|
|
1047
1308
|
dir = Dir._create(parent._id, path.name)
|
|
1048
|
-
Env.get().console_logger.info(f'Created directory {
|
|
1309
|
+
Env.get().console_logger.info(f'Created directory {path!r}.')
|
|
1049
1310
|
return dir
|
|
1050
1311
|
|
|
1051
|
-
@
|
|
1312
|
+
@retry_loop(for_write=True)
|
|
1052
1313
|
def drop_dir(self, path: Path, if_not_exists: IfNotExistsParam, force: bool) -> None:
|
|
1053
1314
|
_, _, schema_obj = self._prepare_dir_op(
|
|
1054
1315
|
drop_dir_path=path.parent,
|
|
@@ -1057,7 +1318,7 @@ class Catalog:
|
|
|
1057
1318
|
raise_if_not_exists=if_not_exists == IfNotExistsParam.ERROR and not force,
|
|
1058
1319
|
)
|
|
1059
1320
|
if schema_obj is None:
|
|
1060
|
-
_logger.info(f'Directory {
|
|
1321
|
+
_logger.info(f'Directory {path!r} does not exist; skipped drop_dir().')
|
|
1061
1322
|
return
|
|
1062
1323
|
self._drop_dir(schema_obj._id, path, force=force)
|
|
1063
1324
|
|
|
@@ -1070,7 +1331,7 @@ class Catalog:
|
|
|
1070
1331
|
q = sql.select(sql.func.count()).select_from(schema.Table).where(schema.Table.dir_id == dir_id)
|
|
1071
1332
|
num_tbls = conn.execute(q).scalar()
|
|
1072
1333
|
if num_subdirs + num_tbls > 0:
|
|
1073
|
-
raise excs.Error(f'Directory {
|
|
1334
|
+
raise excs.Error(f'Directory {dir_path!r} is not empty.')
|
|
1074
1335
|
|
|
1075
1336
|
# drop existing subdirs
|
|
1076
1337
|
self._acquire_dir_xlock(dir_id=dir_id)
|
|
@@ -1088,7 +1349,7 @@ class Catalog:
|
|
|
1088
1349
|
|
|
1089
1350
|
# self.drop_dir(dir_id)
|
|
1090
1351
|
conn.execute(sql.delete(schema.Dir).where(schema.Dir.id == dir_id))
|
|
1091
|
-
_logger.info(f'Removed directory {
|
|
1352
|
+
_logger.info(f'Removed directory {dir_path!r}.')
|
|
1092
1353
|
|
|
1093
1354
|
def get_view_ids(self, tbl_id: UUID, for_update: bool = False) -> list[UUID]:
|
|
1094
1355
|
"""Return the ids of views that directly reference the given table"""
|
|
@@ -1098,19 +1359,31 @@ class Catalog:
|
|
|
1098
1359
|
tbl_count = conn.execute(q).scalar()
|
|
1099
1360
|
if tbl_count == 0:
|
|
1100
1361
|
raise excs.Error(self._dropped_tbl_error_msg(tbl_id))
|
|
1101
|
-
q = sql.select(schema.Table.id).where(
|
|
1362
|
+
q = sql.select(schema.Table.id).where(schema.Table.md['view_md']['base_versions'][0][0].astext == tbl_id.hex)
|
|
1102
1363
|
if for_update:
|
|
1103
1364
|
q = q.with_for_update()
|
|
1104
1365
|
result = [r[0] for r in conn.execute(q).all()]
|
|
1105
1366
|
return result
|
|
1106
1367
|
|
|
1107
|
-
def get_tbl_version(
|
|
1368
|
+
def get_tbl_version(
|
|
1369
|
+
self,
|
|
1370
|
+
tbl_id: UUID,
|
|
1371
|
+
effective_version: Optional[int],
|
|
1372
|
+
check_pending_ops: Optional[bool] = None,
|
|
1373
|
+
validate_initialized: bool = False,
|
|
1374
|
+
) -> Optional[TableVersion]:
|
|
1375
|
+
"""
|
|
1376
|
+
Returns the TableVersion instance for the given table and version and updates the cache.
|
|
1377
|
+
|
|
1378
|
+
If present in the cache and the instance isn't validated, validates version and view_sn against the stored
|
|
1379
|
+
metadata.
|
|
1380
|
+
"""
|
|
1108
1381
|
# we need a transaction here, if we're not already in one; if this starts a new transaction,
|
|
1109
1382
|
# the returned TableVersion instance will not be validated
|
|
1110
1383
|
with self.begin_xact(for_write=False) as conn:
|
|
1111
1384
|
tv = self._tbl_versions.get((tbl_id, effective_version))
|
|
1112
1385
|
if tv is None:
|
|
1113
|
-
tv = self._load_tbl_version(tbl_id, effective_version)
|
|
1386
|
+
tv = self._load_tbl_version(tbl_id, effective_version, check_pending_ops=check_pending_ops)
|
|
1114
1387
|
elif not tv.is_validated:
|
|
1115
1388
|
# only live instances are invalidated
|
|
1116
1389
|
assert effective_version is None
|
|
@@ -1131,12 +1404,16 @@ class Catalog:
|
|
|
1131
1404
|
f'(cached/current version: {tv.version}/{current_version}, '
|
|
1132
1405
|
f'cached/current view_sn: {tv.tbl_md.view_sn}/{view_sn})'
|
|
1133
1406
|
)
|
|
1134
|
-
tv = self._load_tbl_version(tbl_id, None)
|
|
1407
|
+
tv = self._load_tbl_version(tbl_id, None, check_pending_ops=check_pending_ops)
|
|
1135
1408
|
else:
|
|
1136
1409
|
# the cached metadata is valid
|
|
1137
1410
|
tv.is_validated = True
|
|
1138
1411
|
|
|
1139
|
-
assert tv.is_validated
|
|
1412
|
+
assert tv.is_validated, f'{tbl_id}:{effective_version} not validated\n{tv.__dict__}\n{self._debug_str()}'
|
|
1413
|
+
if validate_initialized:
|
|
1414
|
+
assert tv.is_initialized, (
|
|
1415
|
+
f'{tbl_id}:{effective_version} not initialized\n{tv.__dict__}\n{self._debug_str()}'
|
|
1416
|
+
)
|
|
1140
1417
|
return tv
|
|
1141
1418
|
|
|
1142
1419
|
def remove_tbl_version(self, tbl_version: TableVersion) -> None:
|
|
@@ -1181,52 +1458,56 @@ class Catalog:
|
|
|
1181
1458
|
row = conn.execute(q).one_or_none()
|
|
1182
1459
|
return schema.Dir(**row._mapping) if row is not None else None
|
|
1183
1460
|
|
|
1184
|
-
def _load_tbl(self, tbl_id: UUID) ->
|
|
1461
|
+
def _load_tbl(self, tbl_id: UUID) -> None:
|
|
1185
1462
|
"""Loads metadata for the table with the given id and caches it."""
|
|
1186
1463
|
_logger.info(f'Loading table {tbl_id}')
|
|
1187
1464
|
from .insertable_table import InsertableTable
|
|
1188
1465
|
from .view import View
|
|
1189
1466
|
|
|
1190
1467
|
conn = Env.get().conn
|
|
1468
|
+
|
|
1469
|
+
# check for pending ops
|
|
1470
|
+
q = sql.select(sql.func.count()).where(schema.PendingTableOp.tbl_id == tbl_id)
|
|
1471
|
+
has_pending_ops = conn.execute(q).scalar() > 0
|
|
1472
|
+
if has_pending_ops:
|
|
1473
|
+
raise PendingTableOpsError(tbl_id)
|
|
1474
|
+
|
|
1191
1475
|
q = (
|
|
1192
1476
|
sql.select(schema.Table, schema.TableSchemaVersion)
|
|
1193
1477
|
.join(schema.TableSchemaVersion)
|
|
1194
1478
|
.where(schema.Table.id == schema.TableSchemaVersion.tbl_id)
|
|
1195
|
-
# Table.md['current_schema_version'] == TableSchemaVersion.schema_version
|
|
1196
1479
|
.where(
|
|
1197
|
-
sql.
|
|
1198
|
-
f"({schema.Table.__table__}.md->>'current_schema_version')::int = "
|
|
1199
|
-
f'{schema.TableSchemaVersion.__table__}.{schema.TableSchemaVersion.schema_version.name}'
|
|
1200
|
-
)
|
|
1480
|
+
schema.Table.md['current_schema_version'].cast(sql.Integer) == schema.TableSchemaVersion.schema_version
|
|
1201
1481
|
)
|
|
1202
1482
|
.where(schema.Table.id == tbl_id)
|
|
1203
1483
|
)
|
|
1204
1484
|
row = conn.execute(q).one_or_none()
|
|
1205
1485
|
if row is None:
|
|
1206
1486
|
return None
|
|
1207
|
-
tbl_record,
|
|
1487
|
+
tbl_record, _ = _unpack_row(row, [schema.Table, schema.TableSchemaVersion])
|
|
1208
1488
|
|
|
1209
1489
|
tbl_md = schema.md_from_dict(schema.TableMd, tbl_record.md)
|
|
1210
1490
|
view_md = tbl_md.view_md
|
|
1211
|
-
if view_md is None:
|
|
1491
|
+
if view_md is None and not tbl_md.is_replica:
|
|
1212
1492
|
# this is a base table
|
|
1213
1493
|
if (tbl_id, None) not in self._tbl_versions:
|
|
1214
1494
|
_ = self._load_tbl_version(tbl_id, None)
|
|
1215
1495
|
tbl = InsertableTable(tbl_record.dir_id, TableVersionHandle(tbl_id, None))
|
|
1216
|
-
self._tbls[tbl_id] = tbl
|
|
1217
|
-
return
|
|
1496
|
+
self._tbls[tbl_id, None] = tbl
|
|
1497
|
+
return
|
|
1218
1498
|
|
|
1219
1499
|
# this is a view; determine the sequence of TableVersions to load
|
|
1220
1500
|
tbl_version_path: list[tuple[UUID, Optional[int]]] = []
|
|
1221
|
-
|
|
1222
|
-
pure_snapshot = view_md.is_snapshot and view_md.predicate is None and len(schema_version_md.columns) == 0
|
|
1223
|
-
if pure_snapshot:
|
|
1501
|
+
if tbl_md.is_pure_snapshot:
|
|
1224
1502
|
# this is a pure snapshot, without a physical table backing it; we only need the bases
|
|
1225
1503
|
pass
|
|
1226
1504
|
else:
|
|
1227
|
-
effective_version =
|
|
1505
|
+
effective_version = (
|
|
1506
|
+
0 if view_md is not None and view_md.is_snapshot else None
|
|
1507
|
+
) # snapshots only have version 0
|
|
1228
1508
|
tbl_version_path.append((tbl_id, effective_version))
|
|
1229
|
-
|
|
1509
|
+
if view_md is not None:
|
|
1510
|
+
tbl_version_path.extend((UUID(tbl_id), version) for tbl_id, version in view_md.base_versions)
|
|
1230
1511
|
|
|
1231
1512
|
# load TableVersions, starting at the root
|
|
1232
1513
|
base_path: Optional[TableVersionPath] = None
|
|
@@ -1236,11 +1517,71 @@ class Catalog:
|
|
|
1236
1517
|
_ = self._load_tbl_version(id, effective_version)
|
|
1237
1518
|
view_path = TableVersionPath(TableVersionHandle(id, effective_version), base=base_path)
|
|
1238
1519
|
base_path = view_path
|
|
1239
|
-
view = View(tbl_id, tbl_record.dir_id, tbl_md.name, view_path, snapshot_only=
|
|
1240
|
-
self._tbls[tbl_id] = view
|
|
1241
|
-
|
|
1520
|
+
view = View(tbl_id, tbl_record.dir_id, tbl_md.name, view_path, snapshot_only=tbl_md.is_pure_snapshot)
|
|
1521
|
+
self._tbls[tbl_id, None] = view
|
|
1522
|
+
|
|
1523
|
+
def _load_tbl_at_version(self, tbl_id: UUID, version: int) -> None:
|
|
1524
|
+
from .view import View
|
|
1525
|
+
|
|
1526
|
+
# Load the specified TableMd and TableVersionMd records from the db.
|
|
1527
|
+
conn = Env.get().conn
|
|
1528
|
+
q: sql.Executable = (
|
|
1529
|
+
sql.select(schema.Table, schema.TableVersion)
|
|
1530
|
+
.join(schema.TableVersion)
|
|
1531
|
+
.where(schema.Table.id == tbl_id)
|
|
1532
|
+
.where(schema.Table.id == schema.TableVersion.tbl_id)
|
|
1533
|
+
.where(schema.TableVersion.version == version)
|
|
1534
|
+
)
|
|
1535
|
+
row = conn.execute(q).one_or_none()
|
|
1536
|
+
if row is None:
|
|
1537
|
+
return None
|
|
1538
|
+
tbl_record, version_record = _unpack_row(row, [schema.Table, schema.TableVersion])
|
|
1539
|
+
tbl_md = schema.md_from_dict(schema.TableMd, tbl_record.md)
|
|
1540
|
+
version_md = schema.md_from_dict(schema.TableVersionMd, version_record.md)
|
|
1242
1541
|
|
|
1243
|
-
|
|
1542
|
+
# Reconstruct the TableVersionPath for the specified TableVersion. We do this by examining the created_at
|
|
1543
|
+
# timestamps of this table and all its ancestors.
|
|
1544
|
+
# TODO: Store the relevant TableVersionPaths in the database, so that we don't need to rely on timestamps
|
|
1545
|
+
# (which might be nondeterministic in the future).
|
|
1546
|
+
|
|
1547
|
+
# Build the list of ancestor versions, starting with the given table and traversing back to the base table.
|
|
1548
|
+
# For each proper ancestor, we use the version whose created_at timestamp equals or most nearly precedes the
|
|
1549
|
+
# given TableVersion's created_at timestamp.
|
|
1550
|
+
ancestors: list[tuple[UUID, Optional[int]]] = [(tbl_id, version)]
|
|
1551
|
+
if tbl_md.view_md is not None:
|
|
1552
|
+
for ancestor_id, _ in tbl_md.view_md.base_versions:
|
|
1553
|
+
q = (
|
|
1554
|
+
sql.select(schema.TableVersion)
|
|
1555
|
+
.where(schema.TableVersion.tbl_id == ancestor_id)
|
|
1556
|
+
.where(schema.TableVersion.md['created_at'].cast(sql.Float) <= version_md.created_at)
|
|
1557
|
+
.order_by(schema.TableVersion.md['created_at'].cast(sql.Float).desc())
|
|
1558
|
+
.limit(1)
|
|
1559
|
+
)
|
|
1560
|
+
row = conn.execute(q).one_or_none()
|
|
1561
|
+
if row is None:
|
|
1562
|
+
# This can happen if an ancestor version is garbage collected; it can also happen in
|
|
1563
|
+
# rare circumstances involving table versions created specifically with Pixeltable 0.4.3.
|
|
1564
|
+
_logger.info(f'Ancestor {ancestor_id} not found for table {tbl_id}:{version}')
|
|
1565
|
+
raise excs.Error('The specified table version is no longer valid and cannot be retrieved.')
|
|
1566
|
+
ancestor_version_record = _unpack_row(row, [schema.TableVersion])[0]
|
|
1567
|
+
ancestor_version_md = schema.md_from_dict(schema.TableVersionMd, ancestor_version_record.md)
|
|
1568
|
+
assert ancestor_version_md.created_at <= version_md.created_at
|
|
1569
|
+
ancestors.append((UUID(ancestor_id), ancestor_version_md.version))
|
|
1570
|
+
|
|
1571
|
+
# Force any ancestors to be loaded (base table first).
|
|
1572
|
+
for anc_id, anc_version in ancestors[::-1]:
|
|
1573
|
+
if (anc_id, anc_version) not in self._tbl_versions:
|
|
1574
|
+
_ = self._load_tbl_version(anc_id, anc_version)
|
|
1575
|
+
|
|
1576
|
+
# Now reconstruct the relevant TableVersionPath instance from the ancestor versions.
|
|
1577
|
+
tvp: Optional[TableVersionPath] = None
|
|
1578
|
+
for anc_id, anc_version in ancestors[::-1]:
|
|
1579
|
+
tvp = TableVersionPath(TableVersionHandle(anc_id, anc_version), base=tvp)
|
|
1580
|
+
|
|
1581
|
+
view = View(tbl_id, tbl_record.dir_id, tbl_md.name, tvp, snapshot_only=True)
|
|
1582
|
+
self._tbls[tbl_id, version] = view
|
|
1583
|
+
|
|
1584
|
+
@retry_loop(for_write=False)
|
|
1244
1585
|
def collect_tbl_history(self, tbl_id: UUID, n: Optional[int]) -> list[schema.FullTableMd]:
|
|
1245
1586
|
"""
|
|
1246
1587
|
Returns the history of up to n versions of the table with the given UUID.
|
|
@@ -1258,8 +1599,7 @@ class Catalog:
|
|
|
1258
1599
|
.select_from(schema.TableVersion)
|
|
1259
1600
|
.join(
|
|
1260
1601
|
schema.TableSchemaVersion,
|
|
1261
|
-
|
|
1262
|
-
== schema.TableSchemaVersion.schema_version,
|
|
1602
|
+
schema.TableVersion.md['schema_version'].cast(sql.Integer) == schema.TableSchemaVersion.schema_version,
|
|
1263
1603
|
)
|
|
1264
1604
|
.where(schema.TableVersion.tbl_id == tbl_id)
|
|
1265
1605
|
.where(schema.TableSchemaVersion.tbl_id == tbl_id)
|
|
@@ -1301,13 +1641,9 @@ class Catalog:
|
|
|
1301
1641
|
# JOIN TableVersion tv ON (tv.tbl_id = tbl_id AND tv.version = effective_version)
|
|
1302
1642
|
# JOIN TableSchemaVersion tsv ON (tsv.tbl_id = tbl_id AND tv.md.schema_version = tsv.schema_version)
|
|
1303
1643
|
# WHERE t.id = tbl_id
|
|
1304
|
-
q = q.where(
|
|
1305
|
-
sql.
|
|
1306
|
-
|
|
1307
|
-
f"({schema.TableVersion.__table__}.md->>'schema_version')::int = "
|
|
1308
|
-
f'{schema.TableSchemaVersion.__table__}.{schema.TableSchemaVersion.schema_version.name}'
|
|
1309
|
-
)
|
|
1310
|
-
)
|
|
1644
|
+
q = q.where(
|
|
1645
|
+
schema.TableVersion.md['version'].cast(sql.Integer) == effective_version,
|
|
1646
|
+
schema.TableVersion.md['schema_version'].cast(sql.Integer) == schema.TableSchemaVersion.schema_version,
|
|
1311
1647
|
)
|
|
1312
1648
|
else:
|
|
1313
1649
|
# we are loading the current version
|
|
@@ -1317,17 +1653,8 @@ class Catalog:
|
|
|
1317
1653
|
# JOIN TableSchemaVersion tsv ON (tsv.tbl_id = tbl_id AND t.current_schema_version = tsv.schema_version)
|
|
1318
1654
|
# WHERE t.id = tbl_id
|
|
1319
1655
|
q = q.where(
|
|
1320
|
-
sql.
|
|
1321
|
-
|
|
1322
|
-
f'{schema.TableVersion.__table__}.{schema.TableVersion.version.name}'
|
|
1323
|
-
)
|
|
1324
|
-
).where(
|
|
1325
|
-
sql.text(
|
|
1326
|
-
(
|
|
1327
|
-
f"({schema.Table.__table__}.md->>'current_schema_version')::int = "
|
|
1328
|
-
f'{schema.TableSchemaVersion.__table__}.{schema.TableSchemaVersion.schema_version.name}'
|
|
1329
|
-
)
|
|
1330
|
-
)
|
|
1656
|
+
schema.Table.md['current_version'].cast(sql.Integer) == schema.TableVersion.version,
|
|
1657
|
+
schema.Table.md['current_schema_version'].cast(sql.Integer) == schema.TableSchemaVersion.schema_version,
|
|
1331
1658
|
)
|
|
1332
1659
|
|
|
1333
1660
|
row = conn.execute(q).one_or_none()
|
|
@@ -1350,6 +1677,7 @@ class Catalog:
|
|
|
1350
1677
|
tbl_md: Optional[schema.TableMd],
|
|
1351
1678
|
version_md: Optional[schema.TableVersionMd],
|
|
1352
1679
|
schema_version_md: Optional[schema.TableSchemaVersionMd],
|
|
1680
|
+
pending_ops: Optional[list[TableOp]] = None,
|
|
1353
1681
|
) -> None:
|
|
1354
1682
|
"""
|
|
1355
1683
|
Stores metadata to the DB.
|
|
@@ -1364,6 +1692,9 @@ class Catalog:
|
|
|
1364
1692
|
If inserting `version_md` or `schema_version_md` would be a primary key violation, an exception will be raised.
|
|
1365
1693
|
"""
|
|
1366
1694
|
assert self._in_write_xact
|
|
1695
|
+
assert version_md is None or version_md.created_at > 0.0
|
|
1696
|
+
assert pending_ops is None or len(pending_ops) > 0
|
|
1697
|
+
assert pending_ops is None or tbl_md is not None # if we write pending ops, we must also write new tbl_md
|
|
1367
1698
|
session = Env.get().session
|
|
1368
1699
|
|
|
1369
1700
|
# Construct and insert or update table record if requested.
|
|
@@ -1374,6 +1705,9 @@ class Catalog:
|
|
|
1374
1705
|
assert tbl_md.current_schema_version == version_md.schema_version
|
|
1375
1706
|
if schema_version_md is not None:
|
|
1376
1707
|
assert tbl_md.current_schema_version == schema_version_md.schema_version
|
|
1708
|
+
if pending_ops is not None:
|
|
1709
|
+
tbl_md.has_pending_ops = True
|
|
1710
|
+
|
|
1377
1711
|
if dir_id is not None:
|
|
1378
1712
|
# We are inserting a record while creating a new table.
|
|
1379
1713
|
tbl_record = schema.Table(id=tbl_id, dir_id=dir_id, md=dataclasses.asdict(tbl_md))
|
|
@@ -1404,25 +1738,30 @@ class Catalog:
|
|
|
1404
1738
|
tbl_id=tbl_id, schema_version=schema_version_md.schema_version, md=dataclasses.asdict(schema_version_md)
|
|
1405
1739
|
)
|
|
1406
1740
|
session.add(schema_version_record)
|
|
1407
|
-
session.flush() # Inform SQLAlchemy that we want to write these changes to the DB.
|
|
1408
1741
|
|
|
1409
|
-
|
|
1410
|
-
|
|
1411
|
-
Update the TableVersion.md field in the DB. Typically used to update the cascade row count status.
|
|
1742
|
+
# make sure we don't have any pending ops
|
|
1743
|
+
assert session.query(schema.PendingTableOp).filter(schema.PendingTableOp.tbl_id == tbl_id).count() == 0
|
|
1412
1744
|
|
|
1413
|
-
|
|
1414
|
-
|
|
1415
|
-
|
|
1745
|
+
if pending_ops is not None:
|
|
1746
|
+
for op in pending_ops:
|
|
1747
|
+
op_record = schema.PendingTableOp(tbl_id=tbl_id, op_sn=op.op_sn, op=dataclasses.asdict(op))
|
|
1748
|
+
session.add(op_record)
|
|
1749
|
+
|
|
1750
|
+
session.flush() # Inform SQLAlchemy that we want to write these changes to the DB.
|
|
1751
|
+
|
|
1752
|
+
def store_update_status(self, tbl_id: UUID, version: int, status: UpdateStatus) -> None:
|
|
1753
|
+
"""Update the TableVersion.md.update_status field"""
|
|
1416
1754
|
assert self._in_write_xact
|
|
1417
|
-
|
|
1755
|
+
conn = Env.get().conn
|
|
1418
1756
|
|
|
1419
|
-
|
|
1420
|
-
sql.update(schema.TableVersion
|
|
1421
|
-
.
|
|
1422
|
-
.
|
|
1757
|
+
stmt = (
|
|
1758
|
+
sql.update(schema.TableVersion)
|
|
1759
|
+
.where(schema.TableVersion.tbl_id == tbl_id, schema.TableVersion.version == version)
|
|
1760
|
+
.values(md=schema.TableVersion.md.op('||')({'update_status': dataclasses.asdict(status)}))
|
|
1423
1761
|
)
|
|
1424
1762
|
|
|
1425
|
-
|
|
1763
|
+
res = conn.execute(stmt)
|
|
1764
|
+
assert res.rowcount == 1, res.rowcount
|
|
1426
1765
|
|
|
1427
1766
|
def delete_tbl_md(self, tbl_id: UUID) -> None:
|
|
1428
1767
|
"""
|
|
@@ -1431,6 +1770,7 @@ class Catalog:
|
|
|
1431
1770
|
conn = Env.get().conn
|
|
1432
1771
|
conn.execute(sql.delete(schema.TableSchemaVersion.__table__).where(schema.TableSchemaVersion.tbl_id == tbl_id))
|
|
1433
1772
|
conn.execute(sql.delete(schema.TableVersion.__table__).where(schema.TableVersion.tbl_id == tbl_id))
|
|
1773
|
+
conn.execute(sql.delete(schema.PendingTableOp.__table__).where(schema.PendingTableOp.tbl_id == tbl_id))
|
|
1434
1774
|
conn.execute(sql.delete(schema.Table.__table__).where(schema.Table.id == tbl_id))
|
|
1435
1775
|
|
|
1436
1776
|
def load_replica_md(self, tbl: Table) -> list[schema.FullTableMd]:
|
|
@@ -1461,13 +1801,32 @@ class Catalog:
|
|
|
1461
1801
|
|
|
1462
1802
|
return md
|
|
1463
1803
|
|
|
1464
|
-
def _load_tbl_version(
|
|
1804
|
+
def _load_tbl_version(
|
|
1805
|
+
self, tbl_id: UUID, effective_version: Optional[int], check_pending_ops: bool = True
|
|
1806
|
+
) -> Optional[TableVersion]:
|
|
1465
1807
|
"""Creates TableVersion instance from stored metadata and registers it in _tbl_versions."""
|
|
1466
|
-
tbl_md,
|
|
1808
|
+
tbl_md, version_md, schema_version_md = self.load_tbl_md(tbl_id, effective_version)
|
|
1467
1809
|
view_md = tbl_md.view_md
|
|
1468
1810
|
|
|
1469
1811
|
conn = Env.get().conn
|
|
1470
1812
|
|
|
1813
|
+
if check_pending_ops:
|
|
1814
|
+
pending_ops_q = (
|
|
1815
|
+
sql.select(sql.func.count())
|
|
1816
|
+
.select_from(schema.Table)
|
|
1817
|
+
.join(schema.PendingTableOp)
|
|
1818
|
+
.where(schema.PendingTableOp.tbl_id == tbl_id)
|
|
1819
|
+
.where(schema.Table.id == tbl_id)
|
|
1820
|
+
)
|
|
1821
|
+
if effective_version is not None:
|
|
1822
|
+
# we only care about pending ops if the requested version is the current version
|
|
1823
|
+
pending_ops_q = pending_ops_q.where(
|
|
1824
|
+
sql.text(f"({schema.Table.__table__}.md->>'current_version')::int = {effective_version}")
|
|
1825
|
+
)
|
|
1826
|
+
has_pending_ops = conn.execute(pending_ops_q).scalar() > 0
|
|
1827
|
+
if has_pending_ops:
|
|
1828
|
+
raise PendingTableOpsError(tbl_id)
|
|
1829
|
+
|
|
1471
1830
|
# load mutable view ids for mutable TableVersions
|
|
1472
1831
|
mutable_view_ids: list[UUID] = []
|
|
1473
1832
|
# If this is a replica, effective_version should not be None. We see this today, because
|
|
@@ -1475,24 +1834,30 @@ class Catalog:
|
|
|
1475
1834
|
# This is presumably a source of bugs, because it ignores schema version changes (eg, column renames).
|
|
1476
1835
|
# TODO: retarget the value_expr_dict when instantiating Columns for a particular TV instance.
|
|
1477
1836
|
if effective_version is None and not tbl_md.is_replica:
|
|
1478
|
-
q =
|
|
1479
|
-
sql.
|
|
1480
|
-
|
|
1481
|
-
|
|
1482
|
-
)
|
|
1837
|
+
q = (
|
|
1838
|
+
sql.select(schema.Table.id)
|
|
1839
|
+
.where(schema.Table.md['view_md']['base_versions'][0][0].astext == tbl_id.hex)
|
|
1840
|
+
.where(schema.Table.md['view_md']['base_versions'][0][1].astext == None)
|
|
1483
1841
|
)
|
|
1484
1842
|
mutable_view_ids = [r[0] for r in conn.execute(q).all()]
|
|
1843
|
+
|
|
1485
1844
|
mutable_views = [TableVersionHandle(id, None) for id in mutable_view_ids]
|
|
1486
1845
|
|
|
1487
1846
|
tbl_version: TableVersion
|
|
1488
1847
|
if view_md is None:
|
|
1489
1848
|
# this is a base table
|
|
1490
1849
|
tbl_version = TableVersion(
|
|
1491
|
-
tbl_id, tbl_md, effective_version, schema_version_md, mutable_views=mutable_views
|
|
1850
|
+
tbl_id, tbl_md, version_md, effective_version, schema_version_md, mutable_views=mutable_views
|
|
1492
1851
|
)
|
|
1493
1852
|
else:
|
|
1494
1853
|
assert len(view_md.base_versions) > 0 # a view needs to have a base
|
|
1495
|
-
|
|
1854
|
+
# TODO: add TableVersionMd.is_pure_snapshot() and use that
|
|
1855
|
+
pure_snapshot = (
|
|
1856
|
+
view_md.is_snapshot
|
|
1857
|
+
and view_md.predicate is None
|
|
1858
|
+
and view_md.sample_clause is None
|
|
1859
|
+
and len(schema_version_md.columns) == 0
|
|
1860
|
+
)
|
|
1496
1861
|
assert not pure_snapshot # a pure snapshot doesn't have a physical table backing it, no point in loading it
|
|
1497
1862
|
|
|
1498
1863
|
base: TableVersionHandle
|
|
@@ -1506,29 +1871,22 @@ class Catalog:
|
|
|
1506
1871
|
tbl_version = TableVersion(
|
|
1507
1872
|
tbl_id,
|
|
1508
1873
|
tbl_md,
|
|
1874
|
+
version_md,
|
|
1509
1875
|
effective_version,
|
|
1510
1876
|
schema_version_md,
|
|
1877
|
+
mutable_views,
|
|
1511
1878
|
base_path=base_path,
|
|
1512
1879
|
base=base,
|
|
1513
|
-
mutable_views=mutable_views,
|
|
1514
1880
|
)
|
|
1515
1881
|
|
|
1882
|
+
# register the instance before init()
|
|
1516
1883
|
self._tbl_versions[tbl_id, effective_version] = tbl_version
|
|
1884
|
+
# register this instance as modified, so that it gets purged if the transaction fails, it may not be
|
|
1885
|
+
# fully initialized
|
|
1886
|
+
self._modified_tvs.add(tbl_version.handle)
|
|
1517
1887
|
tbl_version.init()
|
|
1518
1888
|
return tbl_version
|
|
1519
1889
|
|
|
1520
|
-
def record_column_dependencies(self, tbl_version: TableVersion) -> None:
|
|
1521
|
-
"""Update self._column_dependencies. Only valid for non-snapshot versions."""
|
|
1522
|
-
from pixeltable.exprs import Expr
|
|
1523
|
-
|
|
1524
|
-
assert not tbl_version.is_snapshot
|
|
1525
|
-
dependencies: dict[QColumnId, set[QColumnId]] = {}
|
|
1526
|
-
for col in tbl_version.cols_by_id.values():
|
|
1527
|
-
if col.value_expr_dict is None:
|
|
1528
|
-
continue
|
|
1529
|
-
dependencies[QColumnId(tbl_version.id, col.id)] = Expr.get_refd_column_ids(col.value_expr_dict)
|
|
1530
|
-
self._column_dependencies[tbl_version.id] = dependencies
|
|
1531
|
-
|
|
1532
1890
|
def _init_store(self) -> None:
|
|
1533
1891
|
"""One-time initialization of the stored catalog. Idempotent."""
|
|
1534
1892
|
self.create_user(None)
|
|
@@ -1557,14 +1915,20 @@ class Catalog:
|
|
|
1557
1915
|
obj, _, _ = self._prepare_dir_op(add_dir_path=path.parent, add_name=path.name)
|
|
1558
1916
|
|
|
1559
1917
|
if if_exists == IfExistsParam.ERROR and obj is not None:
|
|
1560
|
-
raise excs.Error(f'Path {
|
|
1918
|
+
raise excs.Error(f'Path {path!r} is an existing {obj._display_name()}')
|
|
1561
1919
|
else:
|
|
1562
1920
|
is_snapshot = isinstance(obj, View) and obj._tbl_version_path.is_snapshot()
|
|
1563
1921
|
if obj is not None and (not isinstance(obj, expected_obj_type) or (expected_snapshot and not is_snapshot)):
|
|
1564
|
-
|
|
1922
|
+
if expected_obj_type is Dir:
|
|
1923
|
+
obj_type_str = 'directory'
|
|
1924
|
+
elif expected_obj_type is InsertableTable:
|
|
1925
|
+
obj_type_str = 'table'
|
|
1926
|
+
elif expected_obj_type is View:
|
|
1927
|
+
obj_type_str = 'snapshot' if expected_snapshot else 'view'
|
|
1928
|
+
else:
|
|
1929
|
+
raise AssertionError()
|
|
1565
1930
|
raise excs.Error(
|
|
1566
|
-
f'Path {
|
|
1567
|
-
f'Cannot {if_exists.name.lower()} it.'
|
|
1931
|
+
f'Path {path!r} already exists but is not a {obj_type_str}. Cannot {if_exists.name.lower()} it.'
|
|
1568
1932
|
)
|
|
1569
1933
|
|
|
1570
1934
|
if obj is None:
|
|
@@ -1577,7 +1941,7 @@ class Catalog:
|
|
|
1577
1941
|
dir_contents = self._get_dir_contents(obj._id)
|
|
1578
1942
|
if len(dir_contents) > 0 and if_exists == IfExistsParam.REPLACE:
|
|
1579
1943
|
raise excs.Error(
|
|
1580
|
-
f'Directory {
|
|
1944
|
+
f'Directory {path!r} already exists and is not empty. '
|
|
1581
1945
|
'Use `if_exists="replace_force"` to replace it.'
|
|
1582
1946
|
)
|
|
1583
1947
|
self._drop_dir(obj._id, path, force=True)
|