pixeltable 0.4.0rc3__py3-none-any.whl → 0.4.2__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of pixeltable might be problematic. Click here for more details.
- pixeltable/__init__.py +1 -1
- pixeltable/__version__.py +2 -2
- pixeltable/catalog/__init__.py +11 -2
- pixeltable/catalog/catalog.py +407 -119
- pixeltable/catalog/column.py +38 -26
- pixeltable/catalog/globals.py +130 -15
- pixeltable/catalog/insertable_table.py +10 -9
- pixeltable/catalog/schema_object.py +6 -0
- pixeltable/catalog/table.py +245 -119
- pixeltable/catalog/table_version.py +142 -116
- pixeltable/catalog/table_version_handle.py +30 -2
- pixeltable/catalog/table_version_path.py +28 -4
- pixeltable/catalog/view.py +14 -20
- pixeltable/config.py +4 -0
- pixeltable/dataframe.py +10 -9
- pixeltable/env.py +5 -11
- pixeltable/exceptions.py +6 -0
- pixeltable/exec/exec_node.py +2 -0
- pixeltable/exec/expr_eval/expr_eval_node.py +4 -4
- pixeltable/exec/sql_node.py +47 -30
- pixeltable/exprs/column_property_ref.py +2 -10
- pixeltable/exprs/column_ref.py +24 -21
- pixeltable/exprs/data_row.py +9 -0
- pixeltable/exprs/expr.py +4 -4
- pixeltable/exprs/row_builder.py +44 -13
- pixeltable/func/__init__.py +1 -0
- pixeltable/func/mcp.py +74 -0
- pixeltable/func/query_template_function.py +4 -2
- pixeltable/func/tools.py +12 -2
- pixeltable/func/udf.py +2 -2
- pixeltable/functions/__init__.py +1 -0
- pixeltable/functions/groq.py +108 -0
- pixeltable/functions/huggingface.py +8 -6
- pixeltable/functions/mistralai.py +2 -13
- pixeltable/functions/openai.py +1 -6
- pixeltable/functions/replicate.py +2 -2
- pixeltable/functions/util.py +6 -1
- pixeltable/globals.py +0 -2
- pixeltable/io/external_store.py +81 -54
- pixeltable/io/globals.py +1 -1
- pixeltable/io/label_studio.py +49 -45
- pixeltable/io/table_data_conduit.py +1 -1
- pixeltable/metadata/__init__.py +1 -1
- pixeltable/metadata/converters/convert_37.py +15 -0
- pixeltable/metadata/converters/convert_38.py +39 -0
- pixeltable/metadata/notes.py +2 -0
- pixeltable/metadata/schema.py +5 -0
- pixeltable/metadata/utils.py +78 -0
- pixeltable/plan.py +59 -139
- pixeltable/share/packager.py +2 -2
- pixeltable/store.py +114 -103
- pixeltable/type_system.py +30 -0
- {pixeltable-0.4.0rc3.dist-info → pixeltable-0.4.2.dist-info}/METADATA +1 -1
- {pixeltable-0.4.0rc3.dist-info → pixeltable-0.4.2.dist-info}/RECORD +57 -53
- pixeltable/utils/sample.py +0 -25
- {pixeltable-0.4.0rc3.dist-info → pixeltable-0.4.2.dist-info}/LICENSE +0 -0
- {pixeltable-0.4.0rc3.dist-info → pixeltable-0.4.2.dist-info}/WHEEL +0 -0
- {pixeltable-0.4.0rc3.dist-info → pixeltable-0.4.2.dist-info}/entry_points.txt +0 -0
pixeltable/catalog/catalog.py
CHANGED
|
@@ -5,6 +5,7 @@ import functools
|
|
|
5
5
|
import logging
|
|
6
6
|
import random
|
|
7
7
|
import time
|
|
8
|
+
from collections import defaultdict
|
|
8
9
|
from contextlib import contextmanager
|
|
9
10
|
from typing import TYPE_CHECKING, Any, Callable, Iterator, Optional, TypeVar
|
|
10
11
|
from uuid import UUID
|
|
@@ -17,10 +18,9 @@ from pixeltable.env import Env
|
|
|
17
18
|
from pixeltable.iterators import ComponentIterator
|
|
18
19
|
from pixeltable.metadata import schema
|
|
19
20
|
|
|
20
|
-
|
|
21
|
-
from pixeltable.plan import SampleClause
|
|
21
|
+
from .column import Column
|
|
22
22
|
from .dir import Dir
|
|
23
|
-
from .globals import IfExistsParam, IfNotExistsParam, MediaValidation
|
|
23
|
+
from .globals import IfExistsParam, IfNotExistsParam, MediaValidation, QColumnId
|
|
24
24
|
from .insertable_table import InsertableTable
|
|
25
25
|
from .path import Path
|
|
26
26
|
from .schema_object import SchemaObject
|
|
@@ -31,6 +31,8 @@ from .table_version_path import TableVersionPath
|
|
|
31
31
|
from .view import View
|
|
32
32
|
|
|
33
33
|
if TYPE_CHECKING:
|
|
34
|
+
from pixeltable.plan import SampleClause
|
|
35
|
+
|
|
34
36
|
from .. import DataFrame, exprs
|
|
35
37
|
|
|
36
38
|
|
|
@@ -60,9 +62,10 @@ def _unpack_row(
|
|
|
60
62
|
return result
|
|
61
63
|
|
|
62
64
|
|
|
65
|
+
# -1: unlimited
|
|
63
66
|
# for now, we don't limit the number of retries, because we haven't seen situations where the actual number of retries
|
|
64
67
|
# grows uncontrollably
|
|
65
|
-
_MAX_RETRIES =
|
|
68
|
+
_MAX_RETRIES = -1
|
|
66
69
|
|
|
67
70
|
T = TypeVar('T')
|
|
68
71
|
|
|
@@ -71,25 +74,29 @@ def _retry_loop(*, for_write: bool) -> Callable[[Callable[..., T]], Callable[...
|
|
|
71
74
|
def decorator(op: Callable[..., T]) -> Callable[..., T]:
|
|
72
75
|
@functools.wraps(op)
|
|
73
76
|
def loop(*args: Any, **kwargs: Any) -> T:
|
|
74
|
-
|
|
77
|
+
num_retries = 0
|
|
75
78
|
while True:
|
|
76
79
|
try:
|
|
77
80
|
# in order for retry to work, we need to make sure that there aren't any prior db updates
|
|
78
81
|
# that are part of an ongoing transaction
|
|
79
82
|
assert not Env.get().in_xact
|
|
80
|
-
with Catalog.get().begin_xact(for_write=for_write):
|
|
83
|
+
with Catalog.get().begin_xact(for_write=for_write, convert_db_excs=False):
|
|
81
84
|
return op(*args, **kwargs)
|
|
82
85
|
except sql.exc.DBAPIError as e:
|
|
83
86
|
# TODO: what other exceptions should we be looking for?
|
|
84
|
-
if isinstance(e.orig, psycopg.errors.SerializationFailure):
|
|
85
|
-
if
|
|
86
|
-
|
|
87
|
-
_logger.debug(f'
|
|
87
|
+
if isinstance(e.orig, (psycopg.errors.SerializationFailure, psycopg.errors.LockNotAvailable)):
|
|
88
|
+
if num_retries < _MAX_RETRIES or _MAX_RETRIES == -1:
|
|
89
|
+
num_retries += 1
|
|
90
|
+
_logger.debug(f'Retrying ({num_retries}) after {type(e.orig)}')
|
|
88
91
|
time.sleep(random.uniform(0.1, 0.5))
|
|
89
92
|
else:
|
|
90
93
|
raise excs.Error(f'Serialization retry limit ({_MAX_RETRIES}) exceeded') from e
|
|
91
94
|
else:
|
|
92
95
|
raise
|
|
96
|
+
except Exception as e:
|
|
97
|
+
# for informational/debugging purposes
|
|
98
|
+
_logger.debug(f'retry_loop(): passing along {e}')
|
|
99
|
+
raise
|
|
93
100
|
|
|
94
101
|
return loop
|
|
95
102
|
|
|
@@ -109,8 +116,8 @@ class Catalog:
|
|
|
109
116
|
duplicate references to that table in the From clause (ie, incorrect Cartesian products)
|
|
110
117
|
- in order to allow multiple concurrent Python processes to perform updates (data and/or schema) against a shared
|
|
111
118
|
Pixeltable instance, Catalog needs to reload metadata from the store when there are changes
|
|
112
|
-
- concurrent changes are detected by comparing TableVersion.version with the stored current version
|
|
113
|
-
(TableMd.current_version)
|
|
119
|
+
- concurrent changes are detected by comparing TableVersion.version/view_sn with the stored current version
|
|
120
|
+
(TableMd.current_version/view_sn)
|
|
114
121
|
- cached live TableVersion instances (those with effective_version == None) are validated against the stored
|
|
115
122
|
metadata on transaction boundaries; this is recorded in TableVersion.is_validated
|
|
116
123
|
- metadata validation is only needed for live TableVersion instances (snapshot instances are immutable)
|
|
@@ -118,13 +125,22 @@ class Catalog:
|
|
|
118
125
|
|
|
119
126
|
_instance: Optional[Catalog] = None
|
|
120
127
|
|
|
121
|
-
# key: [id, version]
|
|
128
|
+
# cached TableVersion instances; key: [id, version]
|
|
122
129
|
# - mutable version of a table: version == None (even though TableVersion.version is set correctly)
|
|
123
130
|
# - snapshot versions: records the version of the snapshot
|
|
124
131
|
_tbl_versions: dict[tuple[UUID, Optional[int]], TableVersion]
|
|
125
132
|
_tbls: dict[UUID, Table]
|
|
126
133
|
_in_write_xact: bool # True if we're in a write transaction
|
|
127
|
-
|
|
134
|
+
_x_locked_tbl_ids: set[UUID] # non-empty for write transactions
|
|
135
|
+
|
|
136
|
+
# cached column dependencies
|
|
137
|
+
# - key: table id, value: mapping from column id to its dependencies
|
|
138
|
+
# - only maintained for dependencies between non-snapshot table versions
|
|
139
|
+
# - can contain stale entries (stemming from invalidated TV instances)
|
|
140
|
+
_column_dependencies: dict[UUID, dict[QColumnId, set[QColumnId]]]
|
|
141
|
+
|
|
142
|
+
# column dependents are recomputed at the beginning of every write transaction and only reflect the locked tree
|
|
143
|
+
_column_dependents: Optional[dict[QColumnId, set[QColumnId]]]
|
|
128
144
|
|
|
129
145
|
@classmethod
|
|
130
146
|
def get(cls) -> Catalog:
|
|
@@ -147,9 +163,14 @@ class Catalog:
|
|
|
147
163
|
self._tbl_versions = {}
|
|
148
164
|
self._tbls = {} # don't use a defaultdict here, it doesn't cooperate with the debugger
|
|
149
165
|
self._in_write_xact = False
|
|
150
|
-
self.
|
|
166
|
+
self._x_locked_tbl_ids = set()
|
|
167
|
+
self._column_dependencies = {}
|
|
168
|
+
self._column_dependents = None
|
|
151
169
|
self._init_store()
|
|
152
170
|
|
|
171
|
+
def _dropped_tbl_error_msg(self, tbl_id: UUID) -> str:
|
|
172
|
+
return f'Table was dropped (no record found for {tbl_id})'
|
|
173
|
+
|
|
153
174
|
def validate(self) -> None:
|
|
154
175
|
"""Validate structural consistency of cached metadata"""
|
|
155
176
|
for (tbl_id, effective_version), tbl_version in self._tbl_versions.items():
|
|
@@ -164,13 +185,24 @@ class Catalog:
|
|
|
164
185
|
f'snapshot_id={tbl_version.id} mutable_views={tbl_version.mutable_views}'
|
|
165
186
|
)
|
|
166
187
|
|
|
167
|
-
if tbl_version.is_view and tbl_version.is_mutable:
|
|
188
|
+
if tbl_version.is_view and tbl_version.is_mutable and tbl_version.is_validated:
|
|
168
189
|
# make sure this mutable view is recorded in a mutable base
|
|
169
190
|
base = tbl_version.base
|
|
170
191
|
assert base is not None
|
|
171
192
|
if base.effective_version is None:
|
|
172
193
|
assert (base.id, None) in self._tbl_versions
|
|
173
|
-
|
|
194
|
+
base_tv = self._tbl_versions[base.id, None]
|
|
195
|
+
if not base_tv.is_validated:
|
|
196
|
+
continue
|
|
197
|
+
mutable_view_ids = ', '.join(str(tv.id) for tv in self._tbl_versions[base.id, None].mutable_views)
|
|
198
|
+
mutable_view_names = ', '.join(
|
|
199
|
+
tv._tbl_version.name
|
|
200
|
+
for tv in self._tbl_versions[base.id, None].mutable_views
|
|
201
|
+
if tv._tbl_version is not None
|
|
202
|
+
)
|
|
203
|
+
assert TableVersionHandle.create(tbl_version) in self._tbl_versions[base.id, None].mutable_views, (
|
|
204
|
+
f'{tbl_version.name} ({tbl_version.id}) missing in {mutable_view_ids} ({mutable_view_names})'
|
|
205
|
+
)
|
|
174
206
|
|
|
175
207
|
if len(tbl_version.mutable_views) > 0:
|
|
176
208
|
# make sure we also loaded mutable view metadata, which is needed to detect column dependencies
|
|
@@ -178,24 +210,37 @@ class Catalog:
|
|
|
178
210
|
assert v.effective_version is None, f'{v.id}:{v.effective_version}'
|
|
179
211
|
|
|
180
212
|
@contextmanager
|
|
181
|
-
def begin_xact(
|
|
213
|
+
def begin_xact(
|
|
214
|
+
self,
|
|
215
|
+
*,
|
|
216
|
+
tbl: Optional[TableVersionPath] = None,
|
|
217
|
+
for_write: bool = False,
|
|
218
|
+
lock_mutable_tree: bool = False,
|
|
219
|
+
convert_db_excs: bool = True,
|
|
220
|
+
) -> Iterator[sql.Connection]:
|
|
182
221
|
"""
|
|
183
222
|
Return a context manager that yields a connection to the database. Idempotent.
|
|
184
223
|
|
|
185
224
|
It is mandatory to call this method, not Env.begin_xact(), if the transaction accesses any table data
|
|
186
225
|
or metadata.
|
|
187
226
|
|
|
188
|
-
|
|
189
|
-
-
|
|
227
|
+
If tbl != None, follows this locking protocol:
|
|
228
|
+
- validates/reloads the TableVersion instances of tbl's ancestors (in the hope that this reduces potential
|
|
229
|
+
SerializationErrors later on)
|
|
230
|
+
- if for_write == True, x-locks Table record (by updating Table.lock_dummy; see _acquire_tbl_xlock())
|
|
231
|
+
- if for_write == False, validates TableVersion instance
|
|
232
|
+
- if lock_mutable_tree == True, also x-locks all mutable views of the table
|
|
190
233
|
- this needs to be done in a retry loop, because Postgres can decide to abort the transaction
|
|
191
234
|
(SerializationFailure, LockNotAvailable)
|
|
192
235
|
- for that reason, we do all lock acquisition prior to doing any real work (eg, compute column values),
|
|
193
|
-
to minimize
|
|
236
|
+
to minimize the probability of loosing that work due to a forced abort
|
|
237
|
+
|
|
238
|
+
If convert_db_excs == True, converts DBAPIErrors into excs.Errors.
|
|
194
239
|
"""
|
|
195
240
|
if Env.get().in_xact:
|
|
196
|
-
if
|
|
241
|
+
if tbl is not None and for_write:
|
|
197
242
|
# make sure that we requested the required table lock at the beginning of the transaction
|
|
198
|
-
assert tbl_id
|
|
243
|
+
assert tbl.tbl_id in self._x_locked_tbl_ids, f'{tbl.tbl_id} not in {self._x_locked_tbl_ids}'
|
|
199
244
|
yield Env.get().conn
|
|
200
245
|
return
|
|
201
246
|
|
|
@@ -209,30 +254,72 @@ class Catalog:
|
|
|
209
254
|
num_retries = 0
|
|
210
255
|
while True:
|
|
211
256
|
try:
|
|
257
|
+
self._in_write_xact = False
|
|
258
|
+
self._x_locked_tbl_ids = set()
|
|
259
|
+
self._column_dependents = None
|
|
260
|
+
|
|
212
261
|
with Env.get().begin_xact() as conn:
|
|
213
|
-
if
|
|
214
|
-
|
|
215
|
-
|
|
216
|
-
|
|
217
|
-
|
|
218
|
-
|
|
219
|
-
|
|
262
|
+
if tbl is not None:
|
|
263
|
+
try:
|
|
264
|
+
if not self._acquire_path_locks(
|
|
265
|
+
tbl=tbl, for_write=for_write, lock_mutable_tree=lock_mutable_tree
|
|
266
|
+
):
|
|
267
|
+
# this is a snapshot
|
|
268
|
+
yield conn
|
|
269
|
+
return
|
|
270
|
+
|
|
271
|
+
if for_write:
|
|
272
|
+
if lock_mutable_tree:
|
|
273
|
+
self._x_locked_tbl_ids = self._get_mutable_tree(tbl.tbl_id)
|
|
274
|
+
self._compute_column_dependents(self._x_locked_tbl_ids)
|
|
275
|
+
else:
|
|
276
|
+
self._x_locked_tbl_ids = {tbl.tbl_id}
|
|
277
|
+
if _logger.isEnabledFor(logging.DEBUG):
|
|
278
|
+
# validate only when we don't see errors
|
|
279
|
+
self.validate()
|
|
280
|
+
|
|
281
|
+
except sql.exc.DBAPIError as e:
|
|
282
|
+
if isinstance(
|
|
283
|
+
e.orig, (psycopg.errors.SerializationFailure, psycopg.errors.LockNotAvailable)
|
|
284
|
+
) and (num_retries < _MAX_RETRIES or _MAX_RETRIES == -1):
|
|
285
|
+
num_retries += 1
|
|
286
|
+
_logger.debug(f'Retrying ({num_retries}) after {type(e.orig)}')
|
|
287
|
+
time.sleep(random.uniform(0.1, 0.5))
|
|
288
|
+
continue
|
|
289
|
+
else:
|
|
290
|
+
raise
|
|
220
291
|
|
|
221
292
|
self._in_write_xact = for_write
|
|
222
293
|
yield conn
|
|
223
294
|
return
|
|
295
|
+
|
|
224
296
|
except sql.exc.DBAPIError as e:
|
|
225
|
-
|
|
226
|
-
|
|
227
|
-
|
|
228
|
-
|
|
229
|
-
|
|
230
|
-
|
|
297
|
+
# we got some db error during the actual operation (not just while trying to get locks on the metadata
|
|
298
|
+
# records): we convert these into Errors, if asked to do so, and abort
|
|
299
|
+
# TODO: what other concurrency-related exceptions should we expect?
|
|
300
|
+
|
|
301
|
+
# we always convert UndefinedTable exceptions (they can't be retried)
|
|
302
|
+
if isinstance(e.orig, psycopg.errors.UndefinedTable):
|
|
303
|
+
# the table got dropped in the middle of the table operation
|
|
304
|
+
_logger.debug(f'Exception: undefined table ({tbl.tbl_name()}): Caught {type(e.orig)}: {e!r}')
|
|
305
|
+
assert tbl is not None
|
|
306
|
+
raise excs.Error(f'Table was dropped: {tbl.tbl_name()}') from None
|
|
307
|
+
elif isinstance(e.orig, psycopg.errors.SerializationFailure) and convert_db_excs:
|
|
308
|
+
# we still got a serialization error, despite getting x-locks at the beginning
|
|
309
|
+
msg = f'{tbl.tbl_name()} ({tbl.tbl_id})' if tbl is not None else ''
|
|
310
|
+
_logger.debug(f'Exception: serialization failure: {msg} ({e})')
|
|
311
|
+
raise excs.Error(
|
|
312
|
+
'That Pixeltable operation could not be completed because it conflicted with another '
|
|
313
|
+
'operation that was run on a different process.\n'
|
|
314
|
+
'Please re-run the operation.'
|
|
315
|
+
) from None
|
|
231
316
|
else:
|
|
232
317
|
raise
|
|
318
|
+
|
|
233
319
|
finally:
|
|
234
320
|
self._in_write_xact = False
|
|
235
|
-
self.
|
|
321
|
+
self._x_locked_tbl_ids = set()
|
|
322
|
+
self._column_dependents = None
|
|
236
323
|
|
|
237
324
|
# invalidate cached current TableVersion instances
|
|
238
325
|
for tv in self._tbl_versions.values():
|
|
@@ -240,20 +327,117 @@ class Catalog:
|
|
|
240
327
|
_logger.debug(f'invalidating table version {tv.id}:None (tv={id(tv):x})')
|
|
241
328
|
tv.is_validated = False
|
|
242
329
|
|
|
243
|
-
if _logger.isEnabledFor(logging.DEBUG):
|
|
244
|
-
self.validate()
|
|
245
|
-
|
|
246
330
|
@property
|
|
247
331
|
def in_write_xact(self) -> bool:
|
|
248
332
|
return self._in_write_xact
|
|
249
333
|
|
|
250
|
-
def
|
|
334
|
+
def _acquire_path_locks(
|
|
335
|
+
self, *, tbl: TableVersionPath, for_write: bool = False, lock_mutable_tree: bool = False
|
|
336
|
+
) -> bool:
|
|
337
|
+
"""
|
|
338
|
+
Path locking protocol:
|
|
339
|
+
- refresh cached TableVersions of ancestors (we need those even during inserts, for computed columns that
|
|
340
|
+
reference the base tables)
|
|
341
|
+
- refresh cached TableVersion of tbl or get X-lock, depending on for_write
|
|
342
|
+
- if lock_mutable_tree, also X-lock all mutable views of tbl
|
|
343
|
+
|
|
344
|
+
Returns False if trying to lock a pure snapshot with for_write == True
|
|
345
|
+
Raises Error if tbl doesn't exist.
|
|
346
|
+
"""
|
|
347
|
+
start_idx = 1 if for_write else 0
|
|
348
|
+
for handle in tbl.get_tbl_versions()[start_idx::-1]:
|
|
349
|
+
_ = self.get_tbl_version(handle.id, handle.effective_version)
|
|
350
|
+
if not for_write:
|
|
351
|
+
return True # nothing left to lock
|
|
352
|
+
return self._acquire_tbl_xlock(tbl_id=tbl.tbl_id, lock_mutable_tree=lock_mutable_tree, raise_if_not_exists=True)
|
|
353
|
+
|
|
354
|
+
def _acquire_tbl_xlock(
|
|
355
|
+
self,
|
|
356
|
+
*,
|
|
357
|
+
tbl_id: Optional[UUID] = None,
|
|
358
|
+
dir_id: Optional[UUID] = None,
|
|
359
|
+
tbl_name: Optional[str] = None,
|
|
360
|
+
lock_mutable_tree: bool = False,
|
|
361
|
+
raise_if_not_exists: bool = False,
|
|
362
|
+
) -> bool:
|
|
363
|
+
"""Force acquisition of an X-lock on a Table record via a blind update
|
|
364
|
+
|
|
365
|
+
Either tbl_id or dir_id/tbl_name need to be specified.
|
|
366
|
+
Returns True if the table was locked, False if it was a snapshot or not found.
|
|
367
|
+
If lock_mutable_tree, recursively locks all mutable views of the table.
|
|
368
|
+
|
|
369
|
+
Returns False if the table is a snapshot or not found and !raise_if_not_exists.
|
|
370
|
+
"""
|
|
371
|
+
where_clause: sql.ColumnElement
|
|
372
|
+
if tbl_id is not None:
|
|
373
|
+
where_clause = schema.Table.id == tbl_id
|
|
374
|
+
else:
|
|
375
|
+
where_clause = sql.and_(schema.Table.dir_id == dir_id, schema.Table.md['name'].astext == tbl_name)
|
|
376
|
+
user = Env.get().user
|
|
377
|
+
if user is not None:
|
|
378
|
+
where_clause = sql.and_(where_clause, schema.Table.md['user'].astext == Env.get().user)
|
|
379
|
+
|
|
380
|
+
conn = Env.get().conn
|
|
381
|
+
row = conn.execute(sql.select(schema.Table).where(where_clause).with_for_update(nowait=True)).one_or_none()
|
|
382
|
+
if row is None:
|
|
383
|
+
if raise_if_not_exists:
|
|
384
|
+
raise excs.Error(self._dropped_tbl_error_msg(tbl_id))
|
|
385
|
+
return False # nothing to lock
|
|
386
|
+
if row.md['view_md'] is not None and row.md['view_md']['is_snapshot']:
|
|
387
|
+
return False # nothing to lock
|
|
388
|
+
conn.execute(sql.update(schema.Table).values(lock_dummy=1).where(where_clause))
|
|
389
|
+
|
|
390
|
+
if not lock_mutable_tree:
|
|
391
|
+
return True
|
|
392
|
+
# also lock mutable views
|
|
393
|
+
tv = self.get_tbl_version(tbl_id, None)
|
|
394
|
+
for view in tv.mutable_views:
|
|
395
|
+
self._acquire_tbl_xlock(tbl_id=view.id, lock_mutable_tree=True, raise_if_not_exists=raise_if_not_exists)
|
|
396
|
+
return True
|
|
397
|
+
|
|
398
|
+
def _get_mutable_tree(self, tbl_id: UUID) -> set[UUID]:
|
|
399
|
+
"""Returns ids of all tables that form the tree of mutable views starting at tbl_id; includes the root."""
|
|
400
|
+
tv = self.get_tbl_version(tbl_id, None)
|
|
401
|
+
result: set[UUID] = {tv.id}
|
|
402
|
+
for view in tv.mutable_views:
|
|
403
|
+
result.update(self._get_mutable_tree(view.id))
|
|
404
|
+
return result
|
|
405
|
+
|
|
406
|
+
def _compute_column_dependents(self, mutable_tree: set[UUID]) -> None:
|
|
407
|
+
"""Populate self._column_dependents for all tables in mutable_tree"""
|
|
408
|
+
assert self._column_dependents is None
|
|
409
|
+
self._column_dependents = defaultdict(set)
|
|
410
|
+
for tbl_id in mutable_tree:
|
|
411
|
+
assert tbl_id in self._column_dependencies
|
|
412
|
+
for col, dependencies in self._column_dependencies[tbl_id].items():
|
|
413
|
+
for dependency in dependencies:
|
|
414
|
+
if dependency.tbl_id not in mutable_tree:
|
|
415
|
+
continue
|
|
416
|
+
dependents = self._column_dependents[dependency]
|
|
417
|
+
dependents.add(col)
|
|
418
|
+
|
|
419
|
+
def get_column_dependents(self, tbl_id: UUID, col_id: int) -> set[Column]:
|
|
420
|
+
"""Return all Columns that transitively depend on the given column."""
|
|
421
|
+
assert self._column_dependents is not None
|
|
422
|
+
dependents = self._column_dependents[QColumnId(tbl_id, col_id)]
|
|
423
|
+
result: set[Column] = set()
|
|
424
|
+
for dependent in dependents:
|
|
425
|
+
tv = self.get_tbl_version(dependent.tbl_id, None)
|
|
426
|
+
col = tv.cols_by_id[dependent.col_id]
|
|
427
|
+
result.add(col)
|
|
428
|
+
return result
|
|
429
|
+
|
|
430
|
+
def _acquire_dir_xlock(
|
|
431
|
+
self, *, parent_id: Optional[UUID] = None, dir_id: Optional[UUID] = None, dir_name: Optional[str] = None
|
|
432
|
+
) -> None:
|
|
251
433
|
"""Force acquisition of an X-lock on a Dir record via a blind update.
|
|
252
434
|
|
|
253
435
|
If dir_id is present, then all other conditions are ignored.
|
|
254
436
|
Note that (parent_id==None) is a valid where condition.
|
|
255
437
|
If dir_id is not specified, the user from the environment is added to the directory filters.
|
|
256
438
|
"""
|
|
439
|
+
assert (dir_name is None) != (dir_id is None)
|
|
440
|
+
assert not (parent_id is not None and dir_name is None)
|
|
257
441
|
user = Env.get().user
|
|
258
442
|
assert self._in_write_xact
|
|
259
443
|
q = sql.update(schema.Dir).values(lock_dummy=1)
|
|
@@ -367,7 +551,7 @@ class Catalog:
|
|
|
367
551
|
add_dir: Optional[schema.Dir] = None
|
|
368
552
|
drop_dir: Optional[schema.Dir] = None
|
|
369
553
|
for p in sorted(dir_paths):
|
|
370
|
-
dir = self._get_dir(p,
|
|
554
|
+
dir = self._get_dir(p, lock_dir=True)
|
|
371
555
|
if dir is None:
|
|
372
556
|
raise excs.Error(f'Directory {str(p)!r} does not exist.')
|
|
373
557
|
if p == add_dir_path:
|
|
@@ -377,7 +561,7 @@ class Catalog:
|
|
|
377
561
|
|
|
378
562
|
add_obj: Optional[SchemaObject] = None
|
|
379
563
|
if add_dir is not None:
|
|
380
|
-
add_obj = self._get_dir_entry(add_dir.id, add_name,
|
|
564
|
+
add_obj = self._get_dir_entry(add_dir.id, add_name, lock_entry=True)
|
|
381
565
|
if add_obj is not None and raise_if_exists:
|
|
382
566
|
add_path = add_dir_path.append(add_name)
|
|
383
567
|
raise excs.Error(f'Path {str(add_path)!r} already exists.')
|
|
@@ -385,7 +569,7 @@ class Catalog:
|
|
|
385
569
|
drop_obj: Optional[SchemaObject] = None
|
|
386
570
|
if drop_dir is not None:
|
|
387
571
|
drop_path = drop_dir_path.append(drop_name)
|
|
388
|
-
drop_obj = self._get_dir_entry(drop_dir.id, drop_name,
|
|
572
|
+
drop_obj = self._get_dir_entry(drop_dir.id, drop_name, lock_entry=True)
|
|
389
573
|
if drop_obj is None and raise_if_not_exists:
|
|
390
574
|
raise excs.Error(f'Path {str(drop_path)!r} does not exist.')
|
|
391
575
|
if drop_obj is not None and drop_expected is not None and not isinstance(drop_obj, drop_expected):
|
|
@@ -397,13 +581,13 @@ class Catalog:
|
|
|
397
581
|
add_dir_obj = Dir(add_dir.id, add_dir.parent_id, add_dir.md['name']) if add_dir is not None else None
|
|
398
582
|
return add_obj, add_dir_obj, drop_obj
|
|
399
583
|
|
|
400
|
-
def _get_dir_entry(self, dir_id: UUID, name: str,
|
|
584
|
+
def _get_dir_entry(self, dir_id: UUID, name: str, lock_entry: bool = False) -> Optional[SchemaObject]:
|
|
401
585
|
user = Env.get().user
|
|
402
586
|
conn = Env.get().conn
|
|
403
587
|
|
|
404
588
|
# check for subdirectory
|
|
405
|
-
if
|
|
406
|
-
self._acquire_dir_xlock(dir_id, None, name)
|
|
589
|
+
if lock_entry:
|
|
590
|
+
self._acquire_dir_xlock(parent_id=dir_id, dir_id=None, dir_name=name)
|
|
407
591
|
q = sql.select(schema.Dir).where(
|
|
408
592
|
schema.Dir.parent_id == dir_id, schema.Dir.md['name'].astext == name, schema.Dir.md['user'].astext == user
|
|
409
593
|
)
|
|
@@ -417,13 +601,13 @@ class Catalog:
|
|
|
417
601
|
return Dir(dir_record.id, dir_record.parent_id, name)
|
|
418
602
|
|
|
419
603
|
# check for table
|
|
604
|
+
if lock_entry:
|
|
605
|
+
self._acquire_tbl_xlock(dir_id=dir_id, tbl_name=name)
|
|
420
606
|
q = sql.select(schema.Table.id).where(
|
|
421
607
|
schema.Table.dir_id == dir_id,
|
|
422
608
|
schema.Table.md['name'].astext == name,
|
|
423
609
|
schema.Table.md['user'].astext == user,
|
|
424
610
|
)
|
|
425
|
-
if for_update:
|
|
426
|
-
q = q.with_for_update()
|
|
427
611
|
tbl_id = conn.execute(q).scalar_one_or_none()
|
|
428
612
|
if tbl_id is not None:
|
|
429
613
|
if tbl_id not in self._tbls:
|
|
@@ -438,7 +622,8 @@ class Catalog:
|
|
|
438
622
|
expected: Optional[type[SchemaObject]] = None,
|
|
439
623
|
raise_if_exists: bool = False,
|
|
440
624
|
raise_if_not_exists: bool = False,
|
|
441
|
-
|
|
625
|
+
lock_parent: bool = False,
|
|
626
|
+
lock_obj: bool = False,
|
|
442
627
|
) -> Optional[SchemaObject]:
|
|
443
628
|
"""Return the schema object at the given path, or None if it doesn't exist.
|
|
444
629
|
|
|
@@ -454,16 +639,16 @@ class Catalog:
|
|
|
454
639
|
raise excs.Error(
|
|
455
640
|
f'{str(path)!r} needs to be a {expected._display_name()} but is a {Dir._display_name()}'
|
|
456
641
|
)
|
|
457
|
-
dir = self._get_dir(path,
|
|
642
|
+
dir = self._get_dir(path, lock_dir=lock_obj)
|
|
458
643
|
if dir is None:
|
|
459
644
|
raise excs.Error(f'Unknown user: {Env.get().user}')
|
|
460
645
|
return Dir(dir.id, dir.parent_id, dir.md['name'])
|
|
461
646
|
|
|
462
647
|
parent_path = path.parent
|
|
463
|
-
parent_dir = self._get_dir(parent_path,
|
|
648
|
+
parent_dir = self._get_dir(parent_path, lock_dir=lock_parent)
|
|
464
649
|
if parent_dir is None:
|
|
465
650
|
raise excs.Error(f'Directory {str(parent_path)!r} does not exist.')
|
|
466
|
-
obj = self._get_dir_entry(parent_dir.id, path.name,
|
|
651
|
+
obj = self._get_dir_entry(parent_dir.id, path.name, lock_entry=lock_obj)
|
|
467
652
|
|
|
468
653
|
if obj is None and raise_if_not_exists:
|
|
469
654
|
raise excs.Error(f'Path {str(path)!r} does not exist.')
|
|
@@ -480,12 +665,12 @@ class Catalog:
|
|
|
480
665
|
tbl = self._load_tbl(tbl_id)
|
|
481
666
|
if tbl is None:
|
|
482
667
|
return None
|
|
483
|
-
# if this is a mutable table, we also need to have its mutable views loaded, in order to track column
|
|
484
|
-
# dependencies
|
|
485
|
-
tbl_version = tbl._tbl_version.get()
|
|
486
|
-
if tbl_version.is_mutable:
|
|
487
|
-
|
|
488
|
-
|
|
668
|
+
# # if this is a mutable table, we also need to have its mutable views loaded, in order to track column
|
|
669
|
+
# # dependencies
|
|
670
|
+
# tbl_version = tbl._tbl_version.get()
|
|
671
|
+
# if tbl_version.is_mutable:
|
|
672
|
+
# for v in tbl_version.mutable_views:
|
|
673
|
+
# _ = self.get_table_by_id(v.id)
|
|
489
674
|
return self._tbls[tbl_id]
|
|
490
675
|
|
|
491
676
|
@_retry_loop(for_write=True)
|
|
@@ -539,6 +724,18 @@ class Catalog:
|
|
|
539
724
|
) -> Table:
|
|
540
725
|
from pixeltable.utils.filecache import FileCache
|
|
541
726
|
|
|
727
|
+
if not is_snapshot and not base.is_snapshot():
|
|
728
|
+
# this is a mutable view of a mutable base; X-lock the base and advance its view_sn before adding the view
|
|
729
|
+
self._acquire_tbl_xlock(tbl_id=base.tbl_id)
|
|
730
|
+
base_tv = self.get_tbl_version(base.tbl_id, None)
|
|
731
|
+
base_tv.tbl_md.view_sn += 1
|
|
732
|
+
result = Env.get().conn.execute(
|
|
733
|
+
sql.update(schema.Table)
|
|
734
|
+
.values({schema.Table.md: dataclasses.asdict(base_tv.tbl_md)})
|
|
735
|
+
.where(schema.Table.id == base.tbl_id)
|
|
736
|
+
)
|
|
737
|
+
assert result.rowcount == 1, result.rowcount
|
|
738
|
+
|
|
542
739
|
existing = self._handle_path_collision(path, View, is_snapshot, if_exists)
|
|
543
740
|
if existing is not None:
|
|
544
741
|
assert isinstance(existing, View)
|
|
@@ -718,38 +915,35 @@ class Catalog:
|
|
|
718
915
|
'This is likely due to data corruption in the replicated table.'
|
|
719
916
|
)
|
|
720
917
|
|
|
721
|
-
self.store_tbl_md(UUID(tbl_id), new_tbl_md, new_version_md, new_schema_version_md)
|
|
918
|
+
self.store_tbl_md(UUID(tbl_id), None, new_tbl_md, new_version_md, new_schema_version_md)
|
|
722
919
|
|
|
723
920
|
@_retry_loop(for_write=False)
|
|
724
921
|
def get_table(self, path: Path) -> Table:
|
|
725
|
-
obj = self._get_table(path)
|
|
726
|
-
return obj
|
|
727
|
-
|
|
728
|
-
def _get_table(self, path: Path) -> Table:
|
|
729
922
|
obj = Catalog.get()._get_schema_object(path, expected=Table, raise_if_not_exists=True)
|
|
730
923
|
assert isinstance(obj, Table)
|
|
731
|
-
tbl_version = obj._tbl_version.get()
|
|
732
|
-
# TODO: instead of calling this here, move the logic into TableVersion.init(), which is called after
|
|
733
|
-
# registering the instance in _tbl_versions
|
|
734
|
-
tbl_version.ensure_md_loaded()
|
|
735
|
-
# if this table has mutable views, we need to load those as well, in order to record column dependencies
|
|
736
|
-
for v in tbl_version.mutable_views:
|
|
737
|
-
self.get_table_by_id(v.id)
|
|
738
924
|
return obj
|
|
739
925
|
|
|
740
926
|
@_retry_loop(for_write=True)
|
|
741
927
|
def drop_table(self, path: Path, if_not_exists: IfNotExistsParam, force: bool) -> None:
|
|
742
|
-
|
|
743
|
-
|
|
744
|
-
|
|
745
|
-
drop_expected=Table,
|
|
928
|
+
tbl = self._get_schema_object(
|
|
929
|
+
path,
|
|
930
|
+
expected=Table,
|
|
746
931
|
raise_if_not_exists=if_not_exists == IfNotExistsParam.ERROR and not force,
|
|
932
|
+
lock_parent=True,
|
|
933
|
+
lock_obj=False,
|
|
747
934
|
)
|
|
748
|
-
if
|
|
935
|
+
if tbl is None:
|
|
749
936
|
_logger.info(f'Skipped table {str(path)!r} (does not exist).')
|
|
750
937
|
return
|
|
751
|
-
assert isinstance(
|
|
752
|
-
|
|
938
|
+
assert isinstance(tbl, Table)
|
|
939
|
+
|
|
940
|
+
if isinstance(tbl, View) and tbl._tbl_version_path.is_mutable() and tbl._tbl_version_path.base.is_mutable():
|
|
941
|
+
# this is a mutable view of a mutable base;
|
|
942
|
+
# lock the base before the view, in order to avoid deadlocks with concurrent inserts/updates
|
|
943
|
+
base_id = tbl._tbl_version_path.base.tbl_id
|
|
944
|
+
self._acquire_tbl_xlock(tbl_id=base_id, lock_mutable_tree=False)
|
|
945
|
+
|
|
946
|
+
self._drop_tbl(tbl, force=force, is_replace=False)
|
|
753
947
|
|
|
754
948
|
def _drop_tbl(self, tbl: Table, force: bool, is_replace: bool) -> None:
|
|
755
949
|
"""
|
|
@@ -759,8 +953,11 @@ class Catalog:
|
|
|
759
953
|
- X-lock base before X-locking any view
|
|
760
954
|
- deadlock-free wrt to TableVersion.insert() (insert propagation also proceeds top-down)
|
|
761
955
|
- X-locks parent dir prior to calling TableVersion.drop(): prevent concurrent creation of another SchemaObject
|
|
762
|
-
in the same directory with the same name (which could lead to duplicate names if we get
|
|
956
|
+
in the same directory with the same name (which could lead to duplicate names if we get aborted)
|
|
763
957
|
"""
|
|
958
|
+
self._acquire_dir_xlock(dir_id=tbl._dir_id)
|
|
959
|
+
self._acquire_tbl_xlock(tbl_id=tbl._id, lock_mutable_tree=False)
|
|
960
|
+
|
|
764
961
|
view_ids = self.get_view_ids(tbl._id, for_update=True)
|
|
765
962
|
if len(view_ids) > 0:
|
|
766
963
|
if not force:
|
|
@@ -780,12 +977,34 @@ class Catalog:
|
|
|
780
977
|
view = self.get_table_by_id(view_id)
|
|
781
978
|
self._drop_tbl(view, force=force, is_replace=is_replace)
|
|
782
979
|
|
|
783
|
-
|
|
784
|
-
tbl.
|
|
980
|
+
# if this is a mutable view of a mutable base, advance the base's view_sn
|
|
981
|
+
if isinstance(tbl, View) and tbl._tbl_version_path.is_mutable() and tbl._tbl_version_path.base.is_mutable():
|
|
982
|
+
base_id = tbl._tbl_version_path.base.tbl_id
|
|
983
|
+
base_tv = self.get_tbl_version(base_id, None)
|
|
984
|
+
base_tv.tbl_md.view_sn += 1
|
|
985
|
+
result = Env.get().conn.execute(
|
|
986
|
+
sql.update(schema.Table.__table__)
|
|
987
|
+
.values({schema.Table.md: dataclasses.asdict(base_tv.tbl_md)})
|
|
988
|
+
.where(schema.Table.id == base_id)
|
|
989
|
+
)
|
|
990
|
+
assert result.rowcount == 1, result.rowcount
|
|
991
|
+
|
|
992
|
+
tv = tbl._tbl_version.get() if tbl._tbl_version is not None else None
|
|
993
|
+
if tv is not None:
|
|
994
|
+
tv = tbl._tbl_version.get()
|
|
995
|
+
# invalidate the TableVersion instance so that existing references to it can find out it has been dropped
|
|
996
|
+
tv.is_validated = False
|
|
997
|
+
|
|
998
|
+
self.delete_tbl_md(tbl._id)
|
|
785
999
|
assert tbl._id in self._tbls
|
|
786
1000
|
del self._tbls[tbl._id]
|
|
787
1001
|
_logger.info(f'Dropped table `{tbl._path()}`.')
|
|
788
1002
|
|
|
1003
|
+
if tv is not None:
|
|
1004
|
+
tv.drop()
|
|
1005
|
+
assert (tv.id, tv.effective_version) in self._tbl_versions
|
|
1006
|
+
del self._tbl_versions[tv.id, tv.effective_version]
|
|
1007
|
+
|
|
789
1008
|
@_retry_loop(for_write=True)
|
|
790
1009
|
def create_dir(self, path: Path, if_exists: IfExistsParam, parents: bool) -> Dir:
|
|
791
1010
|
return self._create_dir(path, if_exists, parents)
|
|
@@ -846,7 +1065,7 @@ class Catalog:
|
|
|
846
1065
|
raise excs.Error(f'Directory {str(dir_path)!r} is not empty.')
|
|
847
1066
|
|
|
848
1067
|
# drop existing subdirs
|
|
849
|
-
self._acquire_dir_xlock(dir_id
|
|
1068
|
+
self._acquire_dir_xlock(dir_id=dir_id)
|
|
850
1069
|
dir_q = sql.select(schema.Dir).where(schema.Dir.parent_id == dir_id)
|
|
851
1070
|
for row in conn.execute(dir_q).all():
|
|
852
1071
|
self._drop_dir(row.id, dir_path.append(row.md['name']), force=True)
|
|
@@ -866,6 +1085,11 @@ class Catalog:
|
|
|
866
1085
|
def get_view_ids(self, tbl_id: UUID, for_update: bool = False) -> list[UUID]:
|
|
867
1086
|
"""Return the ids of views that directly reference the given table"""
|
|
868
1087
|
conn = Env.get().conn
|
|
1088
|
+
# check whether this table still exists
|
|
1089
|
+
q = sql.select(sql.func.count()).select_from(schema.Table).where(schema.Table.id == tbl_id)
|
|
1090
|
+
tbl_count = conn.execute(q).scalar()
|
|
1091
|
+
if tbl_count == 0:
|
|
1092
|
+
raise excs.Error(self._dropped_tbl_error_msg(tbl_id))
|
|
869
1093
|
q = sql.select(schema.Table.id).where(sql.text(f"md->'view_md'->'base_versions'->0->>0 = {tbl_id.hex!r}"))
|
|
870
1094
|
if for_update:
|
|
871
1095
|
q = q.with_for_update()
|
|
@@ -875,27 +1099,29 @@ class Catalog:
|
|
|
875
1099
|
def get_tbl_version(self, tbl_id: UUID, effective_version: Optional[int]) -> Optional[TableVersion]:
|
|
876
1100
|
# we need a transaction here, if we're not already in one; if this starts a new transaction,
|
|
877
1101
|
# the returned TableVersion instance will not be validated
|
|
878
|
-
with self.begin_xact(
|
|
1102
|
+
with self.begin_xact(for_write=False) as conn:
|
|
879
1103
|
tv = self._tbl_versions.get((tbl_id, effective_version))
|
|
880
1104
|
if tv is None:
|
|
881
1105
|
tv = self._load_tbl_version(tbl_id, effective_version)
|
|
882
1106
|
elif not tv.is_validated:
|
|
883
1107
|
# only live instances are invalidated
|
|
884
1108
|
assert effective_version is None
|
|
885
|
-
# we validate live instances by comparing our cached
|
|
1109
|
+
# we validate live instances by comparing our cached TableMd.current_version/view_sn to what's stored
|
|
886
1110
|
# _logger.debug(f'validating metadata for table {tbl_id}:{tv.version} ({id(tv):x})')
|
|
887
1111
|
q = sql.select(schema.Table.md).where(schema.Table.id == tbl_id)
|
|
888
|
-
row = conn.execute(q).
|
|
889
|
-
|
|
1112
|
+
row = conn.execute(q).one_or_none()
|
|
1113
|
+
if row is None:
|
|
1114
|
+
raise excs.Error(self._dropped_tbl_error_msg(tbl_id))
|
|
1115
|
+
current_version, view_sn = row.md['current_version'], row.md['view_sn']
|
|
890
1116
|
|
|
891
1117
|
# the stored version can be behind TableVersion.version, because we don't roll back the in-memory
|
|
892
1118
|
# metadata changes after a failed update operation
|
|
893
|
-
if current_version != tv.version:
|
|
1119
|
+
if current_version != tv.version or view_sn != tv.tbl_md.view_sn:
|
|
894
1120
|
# the cached metadata is invalid
|
|
895
1121
|
_logger.debug(
|
|
896
1122
|
f'reloading metadata for table {tbl_id} '
|
|
897
|
-
f'(cached version: {tv.version}
|
|
898
|
-
|
|
1123
|
+
f'(cached/current version: {tv.version}/{current_version}, '
|
|
1124
|
+
f'cached/current view_sn: {tv.tbl_md.view_sn}/{view_sn})'
|
|
899
1125
|
)
|
|
900
1126
|
tv = self._load_tbl_version(tbl_id, None)
|
|
901
1127
|
else:
|
|
@@ -913,7 +1139,7 @@ class Catalog:
|
|
|
913
1139
|
"""Return the Dir with the given id, or None if it doesn't exist"""
|
|
914
1140
|
conn = Env.get().conn
|
|
915
1141
|
if for_update:
|
|
916
|
-
self._acquire_dir_xlock(
|
|
1142
|
+
self._acquire_dir_xlock(dir_id=dir_id)
|
|
917
1143
|
q = sql.select(schema.Dir).where(schema.Dir.id == dir_id)
|
|
918
1144
|
row = conn.execute(q).one_or_none()
|
|
919
1145
|
if row is None:
|
|
@@ -921,24 +1147,24 @@ class Catalog:
|
|
|
921
1147
|
dir_record = schema.Dir(**row._mapping)
|
|
922
1148
|
return Dir(dir_record.id, dir_record.parent_id, dir_record.md['name'])
|
|
923
1149
|
|
|
924
|
-
def _get_dir(self, path: Path,
|
|
1150
|
+
def _get_dir(self, path: Path, lock_dir: bool = False) -> Optional[schema.Dir]:
|
|
925
1151
|
"""
|
|
926
|
-
|
|
1152
|
+
lock_dir: if True, X-locks target (but not the ancestors)
|
|
927
1153
|
"""
|
|
928
1154
|
user = Env.get().user
|
|
929
1155
|
conn = Env.get().conn
|
|
930
1156
|
if path.is_root:
|
|
931
|
-
if
|
|
932
|
-
self._acquire_dir_xlock(
|
|
1157
|
+
if lock_dir:
|
|
1158
|
+
self._acquire_dir_xlock(dir_name='')
|
|
933
1159
|
q = sql.select(schema.Dir).where(schema.Dir.parent_id.is_(None), schema.Dir.md['user'].astext == user)
|
|
934
1160
|
row = conn.execute(q).one_or_none()
|
|
935
1161
|
return schema.Dir(**row._mapping) if row is not None else None
|
|
936
1162
|
else:
|
|
937
|
-
parent_dir = self._get_dir(path.parent,
|
|
1163
|
+
parent_dir = self._get_dir(path.parent, lock_dir=False)
|
|
938
1164
|
if parent_dir is None:
|
|
939
1165
|
return None
|
|
940
|
-
if
|
|
941
|
-
self._acquire_dir_xlock(parent_id=parent_dir.id,
|
|
1166
|
+
if lock_dir:
|
|
1167
|
+
self._acquire_dir_xlock(parent_id=parent_dir.id, dir_name=path.name)
|
|
942
1168
|
q = sql.select(schema.Dir).where(
|
|
943
1169
|
schema.Dir.parent_id == parent_dir.id,
|
|
944
1170
|
schema.Dir.md['name'].astext == path.name,
|
|
@@ -1006,6 +1232,43 @@ class Catalog:
|
|
|
1006
1232
|
self._tbls[tbl_id] = view
|
|
1007
1233
|
return view
|
|
1008
1234
|
|
|
1235
|
+
@_retry_loop(for_write=False)
|
|
1236
|
+
def collect_tbl_history(self, tbl_id: UUID, n: Optional[int]) -> list[schema.FullTableMd]:
|
|
1237
|
+
"""
|
|
1238
|
+
Returns the history of up to n versions of the table with the given UUID.
|
|
1239
|
+
|
|
1240
|
+
Args:
|
|
1241
|
+
tbl_id: the UUID of the table to collect history for.
|
|
1242
|
+
n: Optional limit on the maximum number of versions returned.
|
|
1243
|
+
|
|
1244
|
+
Returns:
|
|
1245
|
+
A sequence of rows, ordered by version number
|
|
1246
|
+
Each row contains a TableVersion and a TableSchemaVersion object.
|
|
1247
|
+
"""
|
|
1248
|
+
q = (
|
|
1249
|
+
sql.select(schema.TableVersion, schema.TableSchemaVersion)
|
|
1250
|
+
.select_from(schema.TableVersion)
|
|
1251
|
+
.join(
|
|
1252
|
+
schema.TableSchemaVersion,
|
|
1253
|
+
sql.cast(schema.TableVersion.md['schema_version'], sql.Integer)
|
|
1254
|
+
== schema.TableSchemaVersion.schema_version,
|
|
1255
|
+
)
|
|
1256
|
+
.where(schema.TableVersion.tbl_id == tbl_id)
|
|
1257
|
+
.where(schema.TableSchemaVersion.tbl_id == tbl_id)
|
|
1258
|
+
.order_by(schema.TableVersion.version.desc())
|
|
1259
|
+
)
|
|
1260
|
+
if n is not None:
|
|
1261
|
+
q = q.limit(n)
|
|
1262
|
+
src_rows = Env.get().session.execute(q).fetchall()
|
|
1263
|
+
return [
|
|
1264
|
+
schema.FullTableMd(
|
|
1265
|
+
None,
|
|
1266
|
+
schema.md_from_dict(schema.TableVersionMd, row.TableVersion.md),
|
|
1267
|
+
schema.md_from_dict(schema.TableSchemaVersionMd, row.TableSchemaVersion.md),
|
|
1268
|
+
)
|
|
1269
|
+
for row in src_rows
|
|
1270
|
+
]
|
|
1271
|
+
|
|
1009
1272
|
def load_tbl_md(self, tbl_id: UUID, effective_version: Optional[int]) -> schema.FullTableMd:
|
|
1010
1273
|
"""
|
|
1011
1274
|
Loads metadata from the store for a given table UUID and version.
|
|
@@ -1060,7 +1323,8 @@ class Catalog:
|
|
|
1060
1323
|
)
|
|
1061
1324
|
|
|
1062
1325
|
row = conn.execute(q).one_or_none()
|
|
1063
|
-
|
|
1326
|
+
if row is None:
|
|
1327
|
+
raise excs.Error(self._dropped_tbl_error_msg(tbl_id))
|
|
1064
1328
|
tbl_record, version_record, schema_version_record = _unpack_row(
|
|
1065
1329
|
row, [schema.Table, schema.TableVersion, schema.TableSchemaVersion]
|
|
1066
1330
|
)
|
|
@@ -1074,19 +1338,27 @@ class Catalog:
|
|
|
1074
1338
|
def store_tbl_md(
|
|
1075
1339
|
self,
|
|
1076
1340
|
tbl_id: UUID,
|
|
1341
|
+
dir_id: Optional[UUID],
|
|
1077
1342
|
tbl_md: Optional[schema.TableMd],
|
|
1078
1343
|
version_md: Optional[schema.TableVersionMd],
|
|
1079
1344
|
schema_version_md: Optional[schema.TableSchemaVersionMd],
|
|
1080
1345
|
) -> None:
|
|
1081
1346
|
"""
|
|
1082
|
-
Stores metadata to the DB.
|
|
1083
|
-
|
|
1347
|
+
Stores metadata to the DB.
|
|
1348
|
+
|
|
1349
|
+
Args:
|
|
1350
|
+
tbl_id: UUID of the table to store metadata for.
|
|
1351
|
+
dir_id: If specified, the tbl_md will be added to the given directory; if None, the table must already exist
|
|
1352
|
+
tbl_md: If specified, `tbl_md` will be inserted, or updated (only one such record can exist per UUID)
|
|
1353
|
+
version_md: inserted as a new record if present
|
|
1354
|
+
schema_version_md: will be inserted as a new record if present
|
|
1084
1355
|
|
|
1085
1356
|
If inserting `version_md` or `schema_version_md` would be a primary key violation, an exception will be raised.
|
|
1086
1357
|
"""
|
|
1087
|
-
conn = Env.get().conn
|
|
1088
1358
|
assert self._in_write_xact
|
|
1359
|
+
session = Env.get().session
|
|
1089
1360
|
|
|
1361
|
+
# Construct and insert or update table record if requested.
|
|
1090
1362
|
if tbl_md is not None:
|
|
1091
1363
|
assert tbl_md.tbl_id == str(tbl_id)
|
|
1092
1364
|
if version_md is not None:
|
|
@@ -1094,32 +1366,37 @@ class Catalog:
|
|
|
1094
1366
|
assert tbl_md.current_schema_version == version_md.schema_version
|
|
1095
1367
|
if schema_version_md is not None:
|
|
1096
1368
|
assert tbl_md.current_schema_version == schema_version_md.schema_version
|
|
1097
|
-
|
|
1098
|
-
|
|
1099
|
-
|
|
1100
|
-
.
|
|
1101
|
-
|
|
1102
|
-
|
|
1369
|
+
if dir_id is not None:
|
|
1370
|
+
# We are inserting a record while creating a new table.
|
|
1371
|
+
tbl_record = schema.Table(id=tbl_id, dir_id=dir_id, md=dataclasses.asdict(tbl_md))
|
|
1372
|
+
session.add(tbl_record)
|
|
1373
|
+
else:
|
|
1374
|
+
# Update the existing table record.
|
|
1375
|
+
result = session.execute(
|
|
1376
|
+
sql.update(schema.Table.__table__)
|
|
1377
|
+
.values({schema.Table.md: dataclasses.asdict(tbl_md)})
|
|
1378
|
+
.where(schema.Table.id == tbl_id)
|
|
1379
|
+
)
|
|
1380
|
+
assert result.rowcount == 1, result.rowcount
|
|
1103
1381
|
|
|
1382
|
+
# Construct and insert new table version record if requested.
|
|
1104
1383
|
if version_md is not None:
|
|
1105
1384
|
assert version_md.tbl_id == str(tbl_id)
|
|
1106
1385
|
if schema_version_md is not None:
|
|
1107
1386
|
assert version_md.schema_version == schema_version_md.schema_version
|
|
1108
|
-
|
|
1109
|
-
|
|
1110
|
-
tbl_id=tbl_id, version=version_md.version, md=dataclasses.asdict(version_md)
|
|
1111
|
-
)
|
|
1387
|
+
tbl_version_record = schema.TableVersion(
|
|
1388
|
+
tbl_id=tbl_id, version=version_md.version, md=dataclasses.asdict(version_md)
|
|
1112
1389
|
)
|
|
1390
|
+
session.add(tbl_version_record)
|
|
1113
1391
|
|
|
1392
|
+
# Construct and insert a new schema version record if requested.
|
|
1114
1393
|
if schema_version_md is not None:
|
|
1115
1394
|
assert schema_version_md.tbl_id == str(tbl_id)
|
|
1116
|
-
|
|
1117
|
-
|
|
1118
|
-
tbl_id=tbl_id,
|
|
1119
|
-
schema_version=schema_version_md.schema_version,
|
|
1120
|
-
md=dataclasses.asdict(schema_version_md),
|
|
1121
|
-
)
|
|
1395
|
+
schema_version_record = schema.TableSchemaVersion(
|
|
1396
|
+
tbl_id=tbl_id, schema_version=schema_version_md.schema_version, md=dataclasses.asdict(schema_version_md)
|
|
1122
1397
|
)
|
|
1398
|
+
session.add(schema_version_record)
|
|
1399
|
+
session.flush() # Inform SQLAlchemy that we want to write these changes to the DB.
|
|
1123
1400
|
|
|
1124
1401
|
def delete_tbl_md(self, tbl_id: UUID) -> None:
|
|
1125
1402
|
"""
|
|
@@ -1143,7 +1420,7 @@ class Catalog:
|
|
|
1143
1420
|
|
|
1144
1421
|
# If `tbl` is a named pure snapshot, we're not quite done, since the snapshot metadata won't appear in the
|
|
1145
1422
|
# TableVersionPath. We need to prepend it separately.
|
|
1146
|
-
if tbl
|
|
1423
|
+
if isinstance(tbl, View) and tbl._snapshot_only:
|
|
1147
1424
|
snapshot_md = self.load_tbl_md(tbl._id, 0)
|
|
1148
1425
|
md = [snapshot_md, *md]
|
|
1149
1426
|
|
|
@@ -1212,9 +1489,20 @@ class Catalog:
|
|
|
1212
1489
|
|
|
1213
1490
|
self._tbl_versions[tbl_id, effective_version] = tbl_version
|
|
1214
1491
|
tbl_version.init()
|
|
1215
|
-
|
|
1216
1492
|
return tbl_version
|
|
1217
1493
|
|
|
1494
|
+
def record_column_dependencies(self, tbl_version: TableVersion) -> None:
|
|
1495
|
+
"""Update self._column_dependencies. Only valid for non-snapshot versions."""
|
|
1496
|
+
from pixeltable.exprs import Expr
|
|
1497
|
+
|
|
1498
|
+
assert not tbl_version.is_snapshot
|
|
1499
|
+
dependencies: dict[QColumnId, set[QColumnId]] = {}
|
|
1500
|
+
for col in tbl_version.cols_by_id.values():
|
|
1501
|
+
if col.value_expr_dict is None:
|
|
1502
|
+
continue
|
|
1503
|
+
dependencies[QColumnId(tbl_version.id, col.id)] = Expr.get_refd_column_ids(col.value_expr_dict)
|
|
1504
|
+
self._column_dependencies[tbl_version.id] = dependencies
|
|
1505
|
+
|
|
1218
1506
|
def _init_store(self) -> None:
|
|
1219
1507
|
"""One-time initialization of the stored catalog. Idempotent."""
|
|
1220
1508
|
self.create_user(None)
|