pixeltable 0.3.15__py3-none-any.whl → 0.4.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of pixeltable might be problematic. Click here for more details.
- pixeltable/__init__.py +1 -1
- pixeltable/__version__.py +2 -2
- pixeltable/catalog/__init__.py +9 -1
- pixeltable/catalog/catalog.py +559 -134
- pixeltable/catalog/column.py +36 -32
- pixeltable/catalog/dir.py +1 -2
- pixeltable/catalog/globals.py +12 -0
- pixeltable/catalog/insertable_table.py +30 -25
- pixeltable/catalog/schema_object.py +9 -6
- pixeltable/catalog/table.py +334 -267
- pixeltable/catalog/table_version.py +358 -241
- pixeltable/catalog/table_version_handle.py +18 -2
- pixeltable/catalog/table_version_path.py +86 -16
- pixeltable/catalog/view.py +47 -23
- pixeltable/dataframe.py +198 -19
- pixeltable/env.py +6 -4
- pixeltable/exceptions.py +6 -0
- pixeltable/exec/__init__.py +1 -1
- pixeltable/exec/exec_node.py +2 -0
- pixeltable/exec/expr_eval/evaluators.py +4 -1
- pixeltable/exec/expr_eval/expr_eval_node.py +4 -4
- pixeltable/exec/in_memory_data_node.py +1 -1
- pixeltable/exec/sql_node.py +188 -22
- pixeltable/exprs/column_property_ref.py +16 -6
- pixeltable/exprs/column_ref.py +33 -11
- pixeltable/exprs/comparison.py +1 -1
- pixeltable/exprs/data_row.py +5 -3
- pixeltable/exprs/expr.py +11 -4
- pixeltable/exprs/literal.py +2 -0
- pixeltable/exprs/row_builder.py +4 -6
- pixeltable/exprs/rowid_ref.py +8 -0
- pixeltable/exprs/similarity_expr.py +1 -0
- pixeltable/func/__init__.py +1 -0
- pixeltable/func/mcp.py +74 -0
- pixeltable/func/query_template_function.py +5 -3
- pixeltable/func/tools.py +12 -2
- pixeltable/func/udf.py +2 -2
- pixeltable/functions/__init__.py +1 -0
- pixeltable/functions/anthropic.py +19 -45
- pixeltable/functions/deepseek.py +19 -38
- pixeltable/functions/fireworks.py +9 -18
- pixeltable/functions/gemini.py +2 -3
- pixeltable/functions/groq.py +108 -0
- pixeltable/functions/llama_cpp.py +6 -6
- pixeltable/functions/mistralai.py +16 -53
- pixeltable/functions/ollama.py +1 -1
- pixeltable/functions/openai.py +82 -165
- pixeltable/functions/string.py +212 -58
- pixeltable/functions/together.py +22 -80
- pixeltable/globals.py +10 -4
- pixeltable/index/base.py +5 -0
- pixeltable/index/btree.py +5 -0
- pixeltable/index/embedding_index.py +5 -0
- pixeltable/io/external_store.py +10 -31
- pixeltable/io/label_studio.py +5 -5
- pixeltable/io/parquet.py +2 -2
- pixeltable/io/table_data_conduit.py +1 -32
- pixeltable/metadata/__init__.py +11 -2
- pixeltable/metadata/converters/convert_13.py +2 -2
- pixeltable/metadata/converters/convert_30.py +6 -11
- pixeltable/metadata/converters/convert_35.py +9 -0
- pixeltable/metadata/converters/convert_36.py +38 -0
- pixeltable/metadata/converters/convert_37.py +15 -0
- pixeltable/metadata/converters/util.py +3 -9
- pixeltable/metadata/notes.py +3 -0
- pixeltable/metadata/schema.py +13 -1
- pixeltable/plan.py +135 -12
- pixeltable/share/packager.py +138 -14
- pixeltable/share/publish.py +2 -2
- pixeltable/store.py +19 -13
- pixeltable/type_system.py +30 -0
- pixeltable/utils/dbms.py +1 -1
- pixeltable/utils/formatter.py +64 -42
- {pixeltable-0.3.15.dist-info → pixeltable-0.4.0.dist-info}/METADATA +2 -1
- {pixeltable-0.3.15.dist-info → pixeltable-0.4.0.dist-info}/RECORD +78 -73
- {pixeltable-0.3.15.dist-info → pixeltable-0.4.0.dist-info}/LICENSE +0 -0
- {pixeltable-0.3.15.dist-info → pixeltable-0.4.0.dist-info}/WHEEL +0 -0
- {pixeltable-0.3.15.dist-info → pixeltable-0.4.0.dist-info}/entry_points.txt +0 -0
pixeltable/catalog/catalog.py
CHANGED
|
@@ -3,8 +3,11 @@ from __future__ import annotations
|
|
|
3
3
|
import dataclasses
|
|
4
4
|
import functools
|
|
5
5
|
import logging
|
|
6
|
+
import random
|
|
6
7
|
import time
|
|
7
|
-
from
|
|
8
|
+
from collections import defaultdict
|
|
9
|
+
from contextlib import contextmanager
|
|
10
|
+
from typing import TYPE_CHECKING, Any, Callable, Iterator, Optional, TypeVar
|
|
8
11
|
from uuid import UUID
|
|
9
12
|
|
|
10
13
|
import psycopg
|
|
@@ -15,8 +18,9 @@ from pixeltable.env import Env
|
|
|
15
18
|
from pixeltable.iterators import ComponentIterator
|
|
16
19
|
from pixeltable.metadata import schema
|
|
17
20
|
|
|
21
|
+
from .column import Column
|
|
18
22
|
from .dir import Dir
|
|
19
|
-
from .globals import IfExistsParam, IfNotExistsParam, MediaValidation
|
|
23
|
+
from .globals import IfExistsParam, IfNotExistsParam, MediaValidation, QColumnId
|
|
20
24
|
from .insertable_table import InsertableTable
|
|
21
25
|
from .path import Path
|
|
22
26
|
from .schema_object import SchemaObject
|
|
@@ -27,6 +31,8 @@ from .table_version_path import TableVersionPath
|
|
|
27
31
|
from .view import View
|
|
28
32
|
|
|
29
33
|
if TYPE_CHECKING:
|
|
34
|
+
from pixeltable.plan import SampleClause
|
|
35
|
+
|
|
30
36
|
from .. import DataFrame, exprs
|
|
31
37
|
|
|
32
38
|
|
|
@@ -56,49 +62,85 @@ def _unpack_row(
|
|
|
56
62
|
return result
|
|
57
63
|
|
|
58
64
|
|
|
59
|
-
|
|
65
|
+
# -1: unlimited
|
|
66
|
+
# for now, we don't limit the number of retries, because we haven't seen situations where the actual number of retries
|
|
67
|
+
# grows uncontrollably
|
|
68
|
+
_MAX_RETRIES = -1
|
|
69
|
+
|
|
60
70
|
T = TypeVar('T')
|
|
61
71
|
|
|
62
72
|
|
|
63
|
-
def _retry_loop(
|
|
64
|
-
|
|
65
|
-
|
|
66
|
-
|
|
67
|
-
|
|
68
|
-
|
|
69
|
-
|
|
70
|
-
|
|
71
|
-
|
|
72
|
-
|
|
73
|
-
|
|
74
|
-
|
|
75
|
-
|
|
76
|
-
|
|
77
|
-
|
|
78
|
-
|
|
79
|
-
|
|
80
|
-
|
|
73
|
+
def _retry_loop(*, for_write: bool) -> Callable[[Callable[..., T]], Callable[..., T]]:
|
|
74
|
+
def decorator(op: Callable[..., T]) -> Callable[..., T]:
|
|
75
|
+
@functools.wraps(op)
|
|
76
|
+
def loop(*args: Any, **kwargs: Any) -> T:
|
|
77
|
+
num_retries = 0
|
|
78
|
+
while True:
|
|
79
|
+
try:
|
|
80
|
+
# in order for retry to work, we need to make sure that there aren't any prior db updates
|
|
81
|
+
# that are part of an ongoing transaction
|
|
82
|
+
assert not Env.get().in_xact
|
|
83
|
+
with Catalog.get().begin_xact(for_write=for_write, convert_db_excs=False):
|
|
84
|
+
return op(*args, **kwargs)
|
|
85
|
+
except sql.exc.DBAPIError as e:
|
|
86
|
+
# TODO: what other exceptions should we be looking for?
|
|
87
|
+
if isinstance(e.orig, (psycopg.errors.SerializationFailure, psycopg.errors.LockNotAvailable)):
|
|
88
|
+
if num_retries < _MAX_RETRIES or _MAX_RETRIES == -1:
|
|
89
|
+
num_retries += 1
|
|
90
|
+
_logger.debug(f'Retrying ({num_retries}) after {type(e.orig)}')
|
|
91
|
+
time.sleep(random.uniform(0.1, 0.5))
|
|
92
|
+
else:
|
|
93
|
+
raise excs.Error(f'Serialization retry limit ({_MAX_RETRIES}) exceeded') from e
|
|
81
94
|
else:
|
|
82
|
-
raise
|
|
83
|
-
|
|
95
|
+
raise
|
|
96
|
+
except Exception as e:
|
|
97
|
+
# for informational/debugging purposes
|
|
98
|
+
_logger.debug(f'retry_loop(): passing along {e}')
|
|
84
99
|
raise
|
|
85
100
|
|
|
86
|
-
|
|
101
|
+
return loop
|
|
102
|
+
|
|
103
|
+
return decorator
|
|
87
104
|
|
|
88
105
|
|
|
89
106
|
class Catalog:
|
|
90
107
|
"""The functional interface to getting access to catalog objects
|
|
91
108
|
|
|
92
|
-
All interface functions must be called in the context of a transaction, started with
|
|
109
|
+
All interface functions must be called in the context of a transaction, started with Catalog.begin_xact().
|
|
110
|
+
|
|
111
|
+
Caching and invalidation of metadata:
|
|
112
|
+
- Catalog caches TableVersion instances in order to avoid excessive metadata loading
|
|
113
|
+
- for any specific table version (ie, combination of id and effective version) there can be only a single
|
|
114
|
+
Tableversion instance in circulation; the reason is that each TV instance has its own store_tbl.sa_tbl, and
|
|
115
|
+
mixing multiple instances of sqlalchemy Table objects in the same query (for the same underlying table) leads to
|
|
116
|
+
duplicate references to that table in the From clause (ie, incorrect Cartesian products)
|
|
117
|
+
- in order to allow multiple concurrent Python processes to perform updates (data and/or schema) against a shared
|
|
118
|
+
Pixeltable instance, Catalog needs to reload metadata from the store when there are changes
|
|
119
|
+
- concurrent changes are detected by comparing TableVersion.version/view_sn with the stored current version
|
|
120
|
+
(TableMd.current_version/view_sn)
|
|
121
|
+
- cached live TableVersion instances (those with effective_version == None) are validated against the stored
|
|
122
|
+
metadata on transaction boundaries; this is recorded in TableVersion.is_validated
|
|
123
|
+
- metadata validation is only needed for live TableVersion instances (snapshot instances are immutable)
|
|
93
124
|
"""
|
|
94
125
|
|
|
95
126
|
_instance: Optional[Catalog] = None
|
|
96
127
|
|
|
97
|
-
# key: [id, version]
|
|
128
|
+
# cached TableVersion instances; key: [id, version]
|
|
98
129
|
# - mutable version of a table: version == None (even though TableVersion.version is set correctly)
|
|
99
130
|
# - snapshot versions: records the version of the snapshot
|
|
100
131
|
_tbl_versions: dict[tuple[UUID, Optional[int]], TableVersion]
|
|
101
132
|
_tbls: dict[UUID, Table]
|
|
133
|
+
_in_write_xact: bool # True if we're in a write transaction
|
|
134
|
+
_x_locked_tbl_ids: set[UUID] # non-empty for write transactions
|
|
135
|
+
|
|
136
|
+
# cached column dependencies
|
|
137
|
+
# - key: table id, value: mapping from column id to its dependencies
|
|
138
|
+
# - only maintained for dependencies between non-snapshot table versions
|
|
139
|
+
# - can contain stale entries (stemming from invalidated TV instances)
|
|
140
|
+
_column_dependencies: dict[UUID, dict[QColumnId, set[QColumnId]]]
|
|
141
|
+
|
|
142
|
+
# column dependents are recomputed at the beginning of every write transaction and only reflect the locked tree
|
|
143
|
+
_column_dependents: Optional[dict[QColumnId, set[QColumnId]]]
|
|
102
144
|
|
|
103
145
|
@classmethod
|
|
104
146
|
def get(cls) -> Catalog:
|
|
@@ -109,22 +151,291 @@ class Catalog:
|
|
|
109
151
|
@classmethod
|
|
110
152
|
def clear(cls) -> None:
|
|
111
153
|
"""Remove the instance. Used for testing."""
|
|
154
|
+
# invalidate all existing instances to force reloading of metadata
|
|
155
|
+
for tbl_version in cls._instance._tbl_versions.values():
|
|
156
|
+
# _logger.debug(
|
|
157
|
+
# f'Invalidating table version {tbl_version.id}:{tbl_version.effective_version} ({id(tbl_version):x})'
|
|
158
|
+
# )
|
|
159
|
+
tbl_version.is_validated = False
|
|
112
160
|
cls._instance = None
|
|
113
161
|
|
|
114
162
|
def __init__(self) -> None:
|
|
115
163
|
self._tbl_versions = {}
|
|
116
164
|
self._tbls = {} # don't use a defaultdict here, it doesn't cooperate with the debugger
|
|
165
|
+
self._in_write_xact = False
|
|
166
|
+
self._x_locked_tbl_ids = set()
|
|
167
|
+
self._column_dependencies = {}
|
|
168
|
+
self._column_dependents = None
|
|
117
169
|
self._init_store()
|
|
118
170
|
|
|
119
|
-
|
|
120
|
-
|
|
121
|
-
|
|
171
|
+
def _dropped_tbl_error_msg(self, tbl_id: UUID) -> str:
|
|
172
|
+
return f'Table was dropped (no record found for {tbl_id})'
|
|
173
|
+
|
|
174
|
+
def validate(self) -> None:
|
|
175
|
+
"""Validate structural consistency of cached metadata"""
|
|
176
|
+
for (tbl_id, effective_version), tbl_version in self._tbl_versions.items():
|
|
177
|
+
assert tbl_id == tbl_version.id, f'{tbl_id} != {tbl_version.id}'
|
|
178
|
+
assert tbl_version.effective_version == tbl_version.version or tbl_version.effective_version is None, (
|
|
179
|
+
f'{tbl_version.effective_version} != {tbl_version.version} for id {tbl_id}'
|
|
180
|
+
)
|
|
181
|
+
assert effective_version == tbl_version.effective_version, (
|
|
182
|
+
f'{effective_version} != {tbl_version.effective_version} for id {tbl_id}'
|
|
183
|
+
)
|
|
184
|
+
assert len(tbl_version.mutable_views) == 0 or tbl_version.is_mutable, (
|
|
185
|
+
f'snapshot_id={tbl_version.id} mutable_views={tbl_version.mutable_views}'
|
|
186
|
+
)
|
|
187
|
+
|
|
188
|
+
if tbl_version.is_view and tbl_version.is_mutable and tbl_version.is_validated:
|
|
189
|
+
# make sure this mutable view is recorded in a mutable base
|
|
190
|
+
base = tbl_version.base
|
|
191
|
+
assert base is not None
|
|
192
|
+
if base.effective_version is None:
|
|
193
|
+
assert (base.id, None) in self._tbl_versions
|
|
194
|
+
base_tv = self._tbl_versions[base.id, None]
|
|
195
|
+
if not base_tv.is_validated:
|
|
196
|
+
continue
|
|
197
|
+
mutable_view_ids = ', '.join(str(tv.id) for tv in self._tbl_versions[base.id, None].mutable_views)
|
|
198
|
+
mutable_view_names = ', '.join(
|
|
199
|
+
tv._tbl_version.name
|
|
200
|
+
for tv in self._tbl_versions[base.id, None].mutable_views
|
|
201
|
+
if tv._tbl_version is not None
|
|
202
|
+
)
|
|
203
|
+
assert TableVersionHandle.create(tbl_version) in self._tbl_versions[base.id, None].mutable_views, (
|
|
204
|
+
f'{tbl_version.name} ({tbl_version.id}) missing in {mutable_view_ids} ({mutable_view_names})'
|
|
205
|
+
)
|
|
206
|
+
|
|
207
|
+
if len(tbl_version.mutable_views) > 0:
|
|
208
|
+
# make sure we also loaded mutable view metadata, which is needed to detect column dependencies
|
|
209
|
+
for v in tbl_version.mutable_views:
|
|
210
|
+
assert v.effective_version is None, f'{v.id}:{v.effective_version}'
|
|
211
|
+
|
|
212
|
+
@contextmanager
|
|
213
|
+
def begin_xact(
|
|
214
|
+
self,
|
|
215
|
+
*,
|
|
216
|
+
tbl: Optional[TableVersionPath] = None,
|
|
217
|
+
for_write: bool = False,
|
|
218
|
+
lock_mutable_tree: bool = False,
|
|
219
|
+
convert_db_excs: bool = True,
|
|
220
|
+
) -> Iterator[sql.Connection]:
|
|
221
|
+
"""
|
|
222
|
+
Return a context manager that yields a connection to the database. Idempotent.
|
|
223
|
+
|
|
224
|
+
It is mandatory to call this method, not Env.begin_xact(), if the transaction accesses any table data
|
|
225
|
+
or metadata.
|
|
226
|
+
|
|
227
|
+
If tbl != None, follows this locking protocol:
|
|
228
|
+
- validates/reloads the TableVersion instances of tbl's ancestors (in the hope that this reduces potential
|
|
229
|
+
SerializationErrors later on)
|
|
230
|
+
- if for_write == True, x-locks Table record (by updating Table.lock_dummy; see _acquire_tbl_xlock())
|
|
231
|
+
- if for_write == False, validates TableVersion instance
|
|
232
|
+
- if lock_mutable_tree == True, also x-locks all mutable views of the table
|
|
233
|
+
- this needs to be done in a retry loop, because Postgres can decide to abort the transaction
|
|
234
|
+
(SerializationFailure, LockNotAvailable)
|
|
235
|
+
- for that reason, we do all lock acquisition prior to doing any real work (eg, compute column values),
|
|
236
|
+
to minimize the probability of loosing that work due to a forced abort
|
|
237
|
+
|
|
238
|
+
If convert_db_excs == True, converts DBAPIErrors into excs.Errors.
|
|
239
|
+
"""
|
|
240
|
+
if Env.get().in_xact:
|
|
241
|
+
if tbl is not None and for_write:
|
|
242
|
+
# make sure that we requested the required table lock at the beginning of the transaction
|
|
243
|
+
assert tbl.tbl_id in self._x_locked_tbl_ids, f'{tbl.tbl_id} not in {self._x_locked_tbl_ids}'
|
|
244
|
+
yield Env.get().conn
|
|
245
|
+
return
|
|
246
|
+
|
|
247
|
+
# tv_msg = '\n'.join(
|
|
248
|
+
# [
|
|
249
|
+
# f'{tv.id}:{tv.effective_version} : tv={id(tv):x} sa_tbl={id(tv.store_tbl.sa_tbl):x}'
|
|
250
|
+
# for tv in self._tbl_versions.values()
|
|
251
|
+
# ]
|
|
252
|
+
# )
|
|
253
|
+
# _logger.debug(f'begin_xact(): {tv_msg}')
|
|
254
|
+
num_retries = 0
|
|
255
|
+
while True:
|
|
256
|
+
try:
|
|
257
|
+
self._in_write_xact = False
|
|
258
|
+
self._x_locked_tbl_ids = set()
|
|
259
|
+
self._column_dependents = None
|
|
260
|
+
|
|
261
|
+
with Env.get().begin_xact() as conn:
|
|
262
|
+
if tbl is not None:
|
|
263
|
+
try:
|
|
264
|
+
if not self._acquire_path_locks(
|
|
265
|
+
tbl=tbl, for_write=for_write, lock_mutable_tree=lock_mutable_tree
|
|
266
|
+
):
|
|
267
|
+
# this is a snapshot
|
|
268
|
+
yield conn
|
|
269
|
+
return
|
|
270
|
+
|
|
271
|
+
if for_write:
|
|
272
|
+
if lock_mutable_tree:
|
|
273
|
+
self._x_locked_tbl_ids = self._get_mutable_tree(tbl.tbl_id)
|
|
274
|
+
self._compute_column_dependents(self._x_locked_tbl_ids)
|
|
275
|
+
else:
|
|
276
|
+
self._x_locked_tbl_ids = {tbl.tbl_id}
|
|
277
|
+
if _logger.isEnabledFor(logging.DEBUG):
|
|
278
|
+
# validate only when we don't see errors
|
|
279
|
+
self.validate()
|
|
280
|
+
|
|
281
|
+
except sql.exc.DBAPIError as e:
|
|
282
|
+
if isinstance(
|
|
283
|
+
e.orig, (psycopg.errors.SerializationFailure, psycopg.errors.LockNotAvailable)
|
|
284
|
+
) and (num_retries < _MAX_RETRIES or _MAX_RETRIES == -1):
|
|
285
|
+
num_retries += 1
|
|
286
|
+
_logger.debug(f'Retrying ({num_retries}) after {type(e.orig)}')
|
|
287
|
+
time.sleep(random.uniform(0.1, 0.5))
|
|
288
|
+
continue
|
|
289
|
+
else:
|
|
290
|
+
raise
|
|
291
|
+
|
|
292
|
+
self._in_write_xact = for_write
|
|
293
|
+
yield conn
|
|
294
|
+
return
|
|
295
|
+
|
|
296
|
+
except sql.exc.DBAPIError as e:
|
|
297
|
+
# we got some db error during the actual operation (not just while trying to get locks on the metadata
|
|
298
|
+
# records): we convert these into Errors, if asked to do so, and abort
|
|
299
|
+
# TODO: what other concurrency-related exceptions should we expect?
|
|
300
|
+
|
|
301
|
+
# we always convert UndefinedTable exceptions (they can't be retried)
|
|
302
|
+
if isinstance(e.orig, psycopg.errors.UndefinedTable):
|
|
303
|
+
# the table got dropped in the middle of the table operation
|
|
304
|
+
_logger.debug(f'Exception: undefined table ({tbl.tbl_name()}): Caught {type(e.orig)}: {e!r}')
|
|
305
|
+
assert tbl is not None
|
|
306
|
+
raise excs.Error(f'Table was dropped: {tbl.tbl_name()}') from None
|
|
307
|
+
elif isinstance(e.orig, psycopg.errors.SerializationFailure) and convert_db_excs:
|
|
308
|
+
# we still got a serialization error, despite getting x-locks at the beginning
|
|
309
|
+
msg = f'{tbl.tbl_name()} ({tbl.tbl_id})' if tbl is not None else ''
|
|
310
|
+
_logger.debug(f'Exception: serialization failure: {msg} ({e})')
|
|
311
|
+
raise excs.Error('Serialization failure. Please re-run the operation.') from None
|
|
312
|
+
else:
|
|
313
|
+
raise
|
|
314
|
+
|
|
315
|
+
finally:
|
|
316
|
+
self._in_write_xact = False
|
|
317
|
+
self._x_locked_tbl_ids = set()
|
|
318
|
+
self._column_dependents = None
|
|
319
|
+
|
|
320
|
+
# invalidate cached current TableVersion instances
|
|
321
|
+
for tv in self._tbl_versions.values():
|
|
322
|
+
if tv.effective_version is None:
|
|
323
|
+
_logger.debug(f'invalidating table version {tv.id}:None (tv={id(tv):x})')
|
|
324
|
+
tv.is_validated = False
|
|
325
|
+
|
|
326
|
+
@property
|
|
327
|
+
def in_write_xact(self) -> bool:
|
|
328
|
+
return self._in_write_xact
|
|
329
|
+
|
|
330
|
+
def _acquire_path_locks(
|
|
331
|
+
self, *, tbl: TableVersionPath, for_write: bool = False, lock_mutable_tree: bool = False
|
|
332
|
+
) -> bool:
|
|
333
|
+
"""
|
|
334
|
+
Path locking protocol:
|
|
335
|
+
- refresh cached TableVersions of ancestors (we need those even during inserts, for computed columns that
|
|
336
|
+
reference the base tables)
|
|
337
|
+
- refresh cached TableVersion of tbl or get X-lock, depending on for_write
|
|
338
|
+
- if lock_mutable_tree, also X-lock all mutable views of tbl
|
|
339
|
+
|
|
340
|
+
Returns False if trying to lock a pure snapshot with for_write == True
|
|
341
|
+
Raises Error if tbl doesn't exist.
|
|
342
|
+
"""
|
|
343
|
+
start_idx = 1 if for_write else 0
|
|
344
|
+
for handle in tbl.get_tbl_versions()[start_idx::-1]:
|
|
345
|
+
_ = self.get_tbl_version(handle.id, handle.effective_version)
|
|
346
|
+
if not for_write:
|
|
347
|
+
return True # nothing left to lock
|
|
348
|
+
return self._acquire_tbl_xlock(tbl_id=tbl.tbl_id, lock_mutable_tree=lock_mutable_tree, raise_if_not_exists=True)
|
|
349
|
+
|
|
350
|
+
def _acquire_tbl_xlock(
|
|
351
|
+
self,
|
|
352
|
+
*,
|
|
353
|
+
tbl_id: Optional[UUID] = None,
|
|
354
|
+
dir_id: Optional[UUID] = None,
|
|
355
|
+
tbl_name: Optional[str] = None,
|
|
356
|
+
lock_mutable_tree: bool = False,
|
|
357
|
+
raise_if_not_exists: bool = False,
|
|
358
|
+
) -> bool:
|
|
359
|
+
"""Force acquisition of an X-lock on a Table record via a blind update
|
|
360
|
+
|
|
361
|
+
Either tbl_id or dir_id/tbl_name need to be specified.
|
|
362
|
+
Returns True if the table was locked, False if it was a snapshot or not found.
|
|
363
|
+
If lock_mutable_tree, recursively locks all mutable views of the table.
|
|
364
|
+
|
|
365
|
+
Returns False if the table is a snapshot or not found and !raise_if_not_exists.
|
|
366
|
+
"""
|
|
367
|
+
where_clause: sql.ColumnElement
|
|
368
|
+
if tbl_id is not None:
|
|
369
|
+
where_clause = schema.Table.id == tbl_id
|
|
370
|
+
else:
|
|
371
|
+
where_clause = sql.and_(schema.Table.dir_id == dir_id, schema.Table.md['name'].astext == tbl_name)
|
|
372
|
+
user = Env.get().user
|
|
373
|
+
if user is not None:
|
|
374
|
+
where_clause = sql.and_(where_clause, schema.Table.md['user'].astext == Env.get().user)
|
|
375
|
+
|
|
376
|
+
conn = Env.get().conn
|
|
377
|
+
row = conn.execute(sql.select(schema.Table).where(where_clause).with_for_update(nowait=True)).one_or_none()
|
|
378
|
+
if row is None:
|
|
379
|
+
if raise_if_not_exists:
|
|
380
|
+
raise excs.Error(self._dropped_tbl_error_msg(tbl_id))
|
|
381
|
+
return False # nothing to lock
|
|
382
|
+
if row.md['view_md'] is not None and row.md['view_md']['is_snapshot']:
|
|
383
|
+
return False # nothing to lock
|
|
384
|
+
conn.execute(sql.update(schema.Table).values(lock_dummy=1).where(where_clause))
|
|
385
|
+
|
|
386
|
+
if not lock_mutable_tree:
|
|
387
|
+
return True
|
|
388
|
+
# also lock mutable views
|
|
389
|
+
tv = self.get_tbl_version(tbl_id, None)
|
|
390
|
+
for view in tv.mutable_views:
|
|
391
|
+
self._acquire_tbl_xlock(tbl_id=view.id, lock_mutable_tree=True, raise_if_not_exists=raise_if_not_exists)
|
|
392
|
+
return True
|
|
393
|
+
|
|
394
|
+
def _get_mutable_tree(self, tbl_id: UUID) -> set[UUID]:
|
|
395
|
+
"""Returns ids of all tables that form the tree of mutable views starting at tbl_id; includes the root."""
|
|
396
|
+
tv = self.get_tbl_version(tbl_id, None)
|
|
397
|
+
result: set[UUID] = {tv.id}
|
|
398
|
+
for view in tv.mutable_views:
|
|
399
|
+
result.update(self._get_mutable_tree(view.id))
|
|
400
|
+
return result
|
|
401
|
+
|
|
402
|
+
def _compute_column_dependents(self, mutable_tree: set[UUID]) -> None:
|
|
403
|
+
"""Populate self._column_dependents for all tables in mutable_tree"""
|
|
404
|
+
assert self._column_dependents is None
|
|
405
|
+
self._column_dependents = defaultdict(set)
|
|
406
|
+
for tbl_id in mutable_tree:
|
|
407
|
+
assert tbl_id in self._column_dependencies
|
|
408
|
+
for col, dependencies in self._column_dependencies[tbl_id].items():
|
|
409
|
+
for dependency in dependencies:
|
|
410
|
+
if dependency.tbl_id not in mutable_tree:
|
|
411
|
+
continue
|
|
412
|
+
dependents = self._column_dependents[dependency]
|
|
413
|
+
dependents.add(col)
|
|
414
|
+
|
|
415
|
+
def get_column_dependents(self, tbl_id: UUID, col_id: int) -> set[Column]:
|
|
416
|
+
"""Return all Columns that transitively depend on the given column."""
|
|
417
|
+
assert self._column_dependents is not None
|
|
418
|
+
dependents = self._column_dependents[QColumnId(tbl_id, col_id)]
|
|
419
|
+
result: set[Column] = set()
|
|
420
|
+
for dependent in dependents:
|
|
421
|
+
tv = self.get_tbl_version(dependent.tbl_id, None)
|
|
422
|
+
col = tv.cols_by_id[dependent.col_id]
|
|
423
|
+
result.add(col)
|
|
424
|
+
return result
|
|
425
|
+
|
|
426
|
+
def _acquire_dir_xlock(
|
|
427
|
+
self, *, parent_id: Optional[UUID] = None, dir_id: Optional[UUID] = None, dir_name: Optional[str] = None
|
|
428
|
+
) -> None:
|
|
429
|
+
"""Force acquisition of an X-lock on a Dir record via a blind update.
|
|
430
|
+
|
|
122
431
|
If dir_id is present, then all other conditions are ignored.
|
|
123
432
|
Note that (parent_id==None) is a valid where condition.
|
|
124
433
|
If dir_id is not specified, the user from the environment is added to the directory filters.
|
|
125
434
|
"""
|
|
435
|
+
assert (dir_name is None) != (dir_id is None)
|
|
436
|
+
assert not (parent_id is not None and dir_name is None)
|
|
126
437
|
user = Env.get().user
|
|
127
|
-
|
|
438
|
+
assert self._in_write_xact
|
|
128
439
|
q = sql.update(schema.Dir).values(lock_dummy=1)
|
|
129
440
|
if dir_id is not None:
|
|
130
441
|
q = q.where(schema.Dir.id == dir_id)
|
|
@@ -134,7 +445,7 @@ class Catalog:
|
|
|
134
445
|
q = q.where(schema.Dir.md['name'].astext == dir_name)
|
|
135
446
|
if user is not None:
|
|
136
447
|
q = q.where(schema.Dir.md['user'].astext == user)
|
|
137
|
-
conn.execute(q)
|
|
448
|
+
Env.get().conn.execute(q)
|
|
138
449
|
|
|
139
450
|
def get_dir_path(self, dir_id: UUID) -> Path:
|
|
140
451
|
"""Return path for directory with given id"""
|
|
@@ -156,7 +467,7 @@ class Catalog:
|
|
|
156
467
|
dir_entries: dict[str, Catalog.DirEntry]
|
|
157
468
|
table: Optional[schema.Table]
|
|
158
469
|
|
|
159
|
-
@_retry_loop
|
|
470
|
+
@_retry_loop(for_write=False)
|
|
160
471
|
def get_dir_contents(self, dir_path: Path, recursive: bool = False) -> dict[str, DirEntry]:
|
|
161
472
|
dir = self._get_schema_object(dir_path, expected=Dir, raise_if_not_exists=True)
|
|
162
473
|
return self._get_dir_contents(dir._id, recursive=recursive)
|
|
@@ -183,7 +494,7 @@ class Catalog:
|
|
|
183
494
|
|
|
184
495
|
return result
|
|
185
496
|
|
|
186
|
-
@_retry_loop
|
|
497
|
+
@_retry_loop(for_write=True)
|
|
187
498
|
def move(self, path: Path, new_path: Path) -> None:
|
|
188
499
|
self._move(path, new_path)
|
|
189
500
|
|
|
@@ -236,7 +547,7 @@ class Catalog:
|
|
|
236
547
|
add_dir: Optional[schema.Dir] = None
|
|
237
548
|
drop_dir: Optional[schema.Dir] = None
|
|
238
549
|
for p in sorted(dir_paths):
|
|
239
|
-
dir = self._get_dir(p,
|
|
550
|
+
dir = self._get_dir(p, lock_dir=True)
|
|
240
551
|
if dir is None:
|
|
241
552
|
raise excs.Error(f'Directory {str(p)!r} does not exist.')
|
|
242
553
|
if p == add_dir_path:
|
|
@@ -246,7 +557,7 @@ class Catalog:
|
|
|
246
557
|
|
|
247
558
|
add_obj: Optional[SchemaObject] = None
|
|
248
559
|
if add_dir is not None:
|
|
249
|
-
add_obj = self._get_dir_entry(add_dir.id, add_name,
|
|
560
|
+
add_obj = self._get_dir_entry(add_dir.id, add_name, lock_entry=True)
|
|
250
561
|
if add_obj is not None and raise_if_exists:
|
|
251
562
|
add_path = add_dir_path.append(add_name)
|
|
252
563
|
raise excs.Error(f'Path {str(add_path)!r} already exists.')
|
|
@@ -254,7 +565,7 @@ class Catalog:
|
|
|
254
565
|
drop_obj: Optional[SchemaObject] = None
|
|
255
566
|
if drop_dir is not None:
|
|
256
567
|
drop_path = drop_dir_path.append(drop_name)
|
|
257
|
-
drop_obj = self._get_dir_entry(drop_dir.id, drop_name,
|
|
568
|
+
drop_obj = self._get_dir_entry(drop_dir.id, drop_name, lock_entry=True)
|
|
258
569
|
if drop_obj is None and raise_if_not_exists:
|
|
259
570
|
raise excs.Error(f'Path {str(drop_path)!r} does not exist.')
|
|
260
571
|
if drop_obj is not None and drop_expected is not None and not isinstance(drop_obj, drop_expected):
|
|
@@ -266,13 +577,13 @@ class Catalog:
|
|
|
266
577
|
add_dir_obj = Dir(add_dir.id, add_dir.parent_id, add_dir.md['name']) if add_dir is not None else None
|
|
267
578
|
return add_obj, add_dir_obj, drop_obj
|
|
268
579
|
|
|
269
|
-
def _get_dir_entry(self, dir_id: UUID, name: str,
|
|
580
|
+
def _get_dir_entry(self, dir_id: UUID, name: str, lock_entry: bool = False) -> Optional[SchemaObject]:
|
|
270
581
|
user = Env.get().user
|
|
271
582
|
conn = Env.get().conn
|
|
272
583
|
|
|
273
584
|
# check for subdirectory
|
|
274
|
-
if
|
|
275
|
-
self.
|
|
585
|
+
if lock_entry:
|
|
586
|
+
self._acquire_dir_xlock(parent_id=dir_id, dir_id=None, dir_name=name)
|
|
276
587
|
q = sql.select(schema.Dir).where(
|
|
277
588
|
schema.Dir.parent_id == dir_id, schema.Dir.md['name'].astext == name, schema.Dir.md['user'].astext == user
|
|
278
589
|
)
|
|
@@ -286,17 +597,17 @@ class Catalog:
|
|
|
286
597
|
return Dir(dir_record.id, dir_record.parent_id, name)
|
|
287
598
|
|
|
288
599
|
# check for table
|
|
600
|
+
if lock_entry:
|
|
601
|
+
self._acquire_tbl_xlock(dir_id=dir_id, tbl_name=name)
|
|
289
602
|
q = sql.select(schema.Table.id).where(
|
|
290
603
|
schema.Table.dir_id == dir_id,
|
|
291
604
|
schema.Table.md['name'].astext == name,
|
|
292
605
|
schema.Table.md['user'].astext == user,
|
|
293
606
|
)
|
|
294
|
-
if for_update:
|
|
295
|
-
q = q.with_for_update()
|
|
296
607
|
tbl_id = conn.execute(q).scalar_one_or_none()
|
|
297
608
|
if tbl_id is not None:
|
|
298
609
|
if tbl_id not in self._tbls:
|
|
299
|
-
|
|
610
|
+
_ = self._load_tbl(tbl_id)
|
|
300
611
|
return self._tbls[tbl_id]
|
|
301
612
|
|
|
302
613
|
return None
|
|
@@ -307,7 +618,8 @@ class Catalog:
|
|
|
307
618
|
expected: Optional[type[SchemaObject]] = None,
|
|
308
619
|
raise_if_exists: bool = False,
|
|
309
620
|
raise_if_not_exists: bool = False,
|
|
310
|
-
|
|
621
|
+
lock_parent: bool = False,
|
|
622
|
+
lock_obj: bool = False,
|
|
311
623
|
) -> Optional[SchemaObject]:
|
|
312
624
|
"""Return the schema object at the given path, or None if it doesn't exist.
|
|
313
625
|
|
|
@@ -323,16 +635,16 @@ class Catalog:
|
|
|
323
635
|
raise excs.Error(
|
|
324
636
|
f'{str(path)!r} needs to be a {expected._display_name()} but is a {Dir._display_name()}'
|
|
325
637
|
)
|
|
326
|
-
dir = self._get_dir(path,
|
|
638
|
+
dir = self._get_dir(path, lock_dir=lock_obj)
|
|
327
639
|
if dir is None:
|
|
328
640
|
raise excs.Error(f'Unknown user: {Env.get().user}')
|
|
329
641
|
return Dir(dir.id, dir.parent_id, dir.md['name'])
|
|
330
642
|
|
|
331
643
|
parent_path = path.parent
|
|
332
|
-
parent_dir = self._get_dir(parent_path,
|
|
644
|
+
parent_dir = self._get_dir(parent_path, lock_dir=lock_parent)
|
|
333
645
|
if parent_dir is None:
|
|
334
646
|
raise excs.Error(f'Directory {str(parent_path)!r} does not exist.')
|
|
335
|
-
obj = self._get_dir_entry(parent_dir.id, path.name,
|
|
647
|
+
obj = self._get_dir_entry(parent_dir.id, path.name, lock_entry=lock_obj)
|
|
336
648
|
|
|
337
649
|
if obj is None and raise_if_not_exists:
|
|
338
650
|
raise excs.Error(f'Path {str(path)!r} does not exist.')
|
|
@@ -349,10 +661,15 @@ class Catalog:
|
|
|
349
661
|
tbl = self._load_tbl(tbl_id)
|
|
350
662
|
if tbl is None:
|
|
351
663
|
return None
|
|
352
|
-
|
|
664
|
+
# # if this is a mutable table, we also need to have its mutable views loaded, in order to track column
|
|
665
|
+
# # dependencies
|
|
666
|
+
# tbl_version = tbl._tbl_version.get()
|
|
667
|
+
# if tbl_version.is_mutable:
|
|
668
|
+
# for v in tbl_version.mutable_views:
|
|
669
|
+
# _ = self.get_table_by_id(v.id)
|
|
353
670
|
return self._tbls[tbl_id]
|
|
354
671
|
|
|
355
|
-
@_retry_loop
|
|
672
|
+
@_retry_loop(for_write=True)
|
|
356
673
|
def create_table(
|
|
357
674
|
self,
|
|
358
675
|
path: Path,
|
|
@@ -385,13 +702,14 @@ class Catalog:
|
|
|
385
702
|
self._tbls[tbl._id] = tbl
|
|
386
703
|
return tbl
|
|
387
704
|
|
|
388
|
-
@_retry_loop
|
|
705
|
+
@_retry_loop(for_write=True)
|
|
389
706
|
def create_view(
|
|
390
707
|
self,
|
|
391
708
|
path: Path,
|
|
392
709
|
base: TableVersionPath,
|
|
393
710
|
select_list: Optional[list[tuple[exprs.Expr, Optional[str]]]],
|
|
394
711
|
where: Optional[exprs.Expr],
|
|
712
|
+
sample_clause: Optional['SampleClause'],
|
|
395
713
|
additional_columns: Optional[dict[str, Any]],
|
|
396
714
|
is_snapshot: bool,
|
|
397
715
|
iterator: Optional[tuple[type[ComponentIterator], dict[str, Any]]],
|
|
@@ -402,6 +720,18 @@ class Catalog:
|
|
|
402
720
|
) -> Table:
|
|
403
721
|
from pixeltable.utils.filecache import FileCache
|
|
404
722
|
|
|
723
|
+
if not is_snapshot and not base.is_snapshot():
|
|
724
|
+
# this is a mutable view of a mutable base; X-lock the base and advance its view_sn before adding the view
|
|
725
|
+
self._acquire_tbl_xlock(tbl_id=base.tbl_id)
|
|
726
|
+
base_tv = self.get_tbl_version(base.tbl_id, None)
|
|
727
|
+
base_tv.tbl_md.view_sn += 1
|
|
728
|
+
result = Env.get().conn.execute(
|
|
729
|
+
sql.update(schema.Table)
|
|
730
|
+
.values({schema.Table.md: dataclasses.asdict(base_tv.tbl_md)})
|
|
731
|
+
.where(schema.Table.id == base.tbl_id)
|
|
732
|
+
)
|
|
733
|
+
assert result.rowcount == 1, result.rowcount
|
|
734
|
+
|
|
405
735
|
existing = self._handle_path_collision(path, View, is_snapshot, if_exists)
|
|
406
736
|
if existing is not None:
|
|
407
737
|
assert isinstance(existing, View)
|
|
@@ -420,6 +750,7 @@ class Catalog:
|
|
|
420
750
|
select_list=select_list,
|
|
421
751
|
additional_columns=additional_columns,
|
|
422
752
|
predicate=where,
|
|
753
|
+
sample_clause=sample_clause,
|
|
423
754
|
is_snapshot=is_snapshot,
|
|
424
755
|
iterator_cls=iterator_class,
|
|
425
756
|
iterator_args=iterator_args,
|
|
@@ -431,14 +762,17 @@ class Catalog:
|
|
|
431
762
|
self._tbls[view._id] = view
|
|
432
763
|
return view
|
|
433
764
|
|
|
434
|
-
@_retry_loop
|
|
765
|
+
@_retry_loop(for_write=True)
|
|
435
766
|
def create_replica(
|
|
436
767
|
self, path: Path, md: list[schema.FullTableMd], if_exists: IfExistsParam = IfExistsParam.ERROR
|
|
437
|
-
) ->
|
|
768
|
+
) -> None:
|
|
438
769
|
"""
|
|
439
770
|
Creates table, table_version, and table_schema_version records for a replica with the given metadata.
|
|
440
771
|
The metadata should be presented in standard "ancestor order", with the table being replicated at
|
|
441
772
|
list position 0 and the (root) base table at list position -1.
|
|
773
|
+
|
|
774
|
+
TODO: create_replica() also needs to create the store tables and populate them in order to make
|
|
775
|
+
replica creation atomic.
|
|
442
776
|
"""
|
|
443
777
|
tbl_id = UUID(md[0].tbl_md.tbl_id)
|
|
444
778
|
|
|
@@ -451,20 +785,19 @@ class Catalog:
|
|
|
451
785
|
'but a different table already exists at that location.'
|
|
452
786
|
)
|
|
453
787
|
assert isinstance(existing, View)
|
|
454
|
-
return
|
|
788
|
+
return
|
|
455
789
|
|
|
456
790
|
# Ensure that the system directory exists.
|
|
457
791
|
self._create_dir(Path('_system', allow_system_paths=True), if_exists=IfExistsParam.IGNORE, parents=False)
|
|
458
792
|
|
|
459
793
|
# Now check to see if this table already exists in the catalog.
|
|
460
|
-
# TODO: Handle concurrency in create_replica()
|
|
461
794
|
existing = Catalog.get().get_table_by_id(tbl_id)
|
|
462
795
|
if existing is not None:
|
|
463
|
-
existing_path = Path(existing._path, allow_system_paths=True)
|
|
796
|
+
existing_path = Path(existing._path(), allow_system_paths=True)
|
|
464
797
|
# It does exist. If it's a non-system table, that's an error: it's already been replicated.
|
|
465
798
|
if not existing_path.is_system_path:
|
|
466
799
|
raise excs.Error(
|
|
467
|
-
f'That table has already been replicated as {existing._path!r}. \n'
|
|
800
|
+
f'That table has already been replicated as {existing._path()!r}. \n'
|
|
468
801
|
f'Drop the existing replica if you wish to re-create it.'
|
|
469
802
|
)
|
|
470
803
|
# If it's a system table, then this means it was created at some point as the ancestor of some other
|
|
@@ -489,22 +822,20 @@ class Catalog:
|
|
|
489
822
|
# The table already exists in the catalog. The existing path might be a system path (if the table
|
|
490
823
|
# was created as an anonymous base table of some other table), or it might not (if it's a snapshot
|
|
491
824
|
# that was directly replicated by the user at some point). In either case, use the existing path.
|
|
492
|
-
replica_path = Path(replica._path, allow_system_paths=True)
|
|
825
|
+
replica_path = Path(replica._path(), allow_system_paths=True)
|
|
493
826
|
|
|
494
827
|
# Store the metadata; it could be a new version (in which case a new record will be created) or a
|
|
495
828
|
# known version (in which case the newly received metadata will be validated as identical).
|
|
496
829
|
self.__store_replica_md(replica_path, ancestor_md)
|
|
497
830
|
|
|
498
|
-
#
|
|
499
|
-
#
|
|
500
|
-
self._tbls[tbl_id] = self._load_tbl(tbl_id)
|
|
501
|
-
return self._tbls[tbl_id]
|
|
831
|
+
# don't create TableVersion instances at this point, they would be superseded by calls to TV.create_replica()
|
|
832
|
+
# in TableRestorer.restore()
|
|
502
833
|
|
|
503
834
|
def __store_replica_md(self, path: Path, md: schema.FullTableMd) -> None:
|
|
504
835
|
_logger.info(f'Creating replica table at {path!r} with ID: {md.tbl_md.tbl_id}')
|
|
505
|
-
# TODO: Handle concurrency
|
|
506
836
|
dir = self._get_schema_object(path.parent, expected=Dir, raise_if_not_exists=True)
|
|
507
837
|
assert dir is not None
|
|
838
|
+
assert self._in_write_xact
|
|
508
839
|
|
|
509
840
|
conn = Env.get().conn
|
|
510
841
|
tbl_id = md.tbl_md.tbl_id
|
|
@@ -582,26 +913,33 @@ class Catalog:
|
|
|
582
913
|
|
|
583
914
|
self.store_tbl_md(UUID(tbl_id), new_tbl_md, new_version_md, new_schema_version_md)
|
|
584
915
|
|
|
585
|
-
@_retry_loop
|
|
916
|
+
@_retry_loop(for_write=False)
|
|
586
917
|
def get_table(self, path: Path) -> Table:
|
|
587
918
|
obj = Catalog.get()._get_schema_object(path, expected=Table, raise_if_not_exists=True)
|
|
588
919
|
assert isinstance(obj, Table)
|
|
589
|
-
obj._tbl_version.get().ensure_md_loaded()
|
|
590
920
|
return obj
|
|
591
921
|
|
|
592
|
-
@_retry_loop
|
|
922
|
+
@_retry_loop(for_write=True)
|
|
593
923
|
def drop_table(self, path: Path, if_not_exists: IfNotExistsParam, force: bool) -> None:
|
|
594
|
-
|
|
595
|
-
|
|
596
|
-
|
|
597
|
-
drop_expected=Table,
|
|
924
|
+
tbl = self._get_schema_object(
|
|
925
|
+
path,
|
|
926
|
+
expected=Table,
|
|
598
927
|
raise_if_not_exists=if_not_exists == IfNotExistsParam.ERROR and not force,
|
|
928
|
+
lock_parent=True,
|
|
929
|
+
lock_obj=False,
|
|
599
930
|
)
|
|
600
|
-
if
|
|
931
|
+
if tbl is None:
|
|
601
932
|
_logger.info(f'Skipped table {str(path)!r} (does not exist).')
|
|
602
933
|
return
|
|
603
|
-
assert isinstance(
|
|
604
|
-
|
|
934
|
+
assert isinstance(tbl, Table)
|
|
935
|
+
|
|
936
|
+
if isinstance(tbl, View) and tbl._tbl_version_path.is_mutable() and tbl._tbl_version_path.base.is_mutable():
|
|
937
|
+
# this is a mutable view of a mutable base;
|
|
938
|
+
# lock the base before the view, in order to avoid deadlocks with concurrent inserts/updates
|
|
939
|
+
base_id = tbl._tbl_version_path.base.tbl_id
|
|
940
|
+
self._acquire_tbl_xlock(tbl_id=base_id, lock_mutable_tree=False)
|
|
941
|
+
|
|
942
|
+
self._drop_tbl(tbl, force=force, is_replace=False)
|
|
605
943
|
|
|
606
944
|
def _drop_tbl(self, tbl: Table, force: bool, is_replace: bool) -> None:
|
|
607
945
|
"""
|
|
@@ -611,8 +949,11 @@ class Catalog:
|
|
|
611
949
|
- X-lock base before X-locking any view
|
|
612
950
|
- deadlock-free wrt to TableVersion.insert() (insert propagation also proceeds top-down)
|
|
613
951
|
- X-locks parent dir prior to calling TableVersion.drop(): prevent concurrent creation of another SchemaObject
|
|
614
|
-
in the same directory with the same name (which could lead to duplicate names if we get
|
|
952
|
+
in the same directory with the same name (which could lead to duplicate names if we get aborted)
|
|
615
953
|
"""
|
|
954
|
+
self._acquire_dir_xlock(dir_id=tbl._dir_id)
|
|
955
|
+
self._acquire_tbl_xlock(tbl_id=tbl._id, lock_mutable_tree=False)
|
|
956
|
+
|
|
616
957
|
view_ids = self.get_view_ids(tbl._id, for_update=True)
|
|
617
958
|
if len(view_ids) > 0:
|
|
618
959
|
if not force:
|
|
@@ -621,24 +962,46 @@ class Catalog:
|
|
|
621
962
|
msg: str
|
|
622
963
|
if is_replace:
|
|
623
964
|
msg = (
|
|
624
|
-
f'{obj_type_str} {tbl._path} already exists and has dependents. '
|
|
965
|
+
f'{obj_type_str} {tbl._path()} already exists and has dependents. '
|
|
625
966
|
"Use `if_exists='replace_force'` to replace it."
|
|
626
967
|
)
|
|
627
968
|
else:
|
|
628
|
-
msg = f'{obj_type_str} {tbl._path} has dependents.'
|
|
969
|
+
msg = f'{obj_type_str} {tbl._path()} has dependents.'
|
|
629
970
|
raise excs.Error(msg)
|
|
630
971
|
|
|
631
972
|
for view_id in view_ids:
|
|
632
973
|
view = self.get_table_by_id(view_id)
|
|
633
974
|
self._drop_tbl(view, force=force, is_replace=is_replace)
|
|
634
975
|
|
|
635
|
-
|
|
636
|
-
tbl.
|
|
976
|
+
# if this is a mutable view of a mutable base, advance the base's view_sn
|
|
977
|
+
if isinstance(tbl, View) and tbl._tbl_version_path.is_mutable() and tbl._tbl_version_path.base.is_mutable():
|
|
978
|
+
base_id = tbl._tbl_version_path.base.tbl_id
|
|
979
|
+
base_tv = self.get_tbl_version(base_id, None)
|
|
980
|
+
base_tv.tbl_md.view_sn += 1
|
|
981
|
+
result = Env.get().conn.execute(
|
|
982
|
+
sql.update(schema.Table.__table__)
|
|
983
|
+
.values({schema.Table.md: dataclasses.asdict(base_tv.tbl_md)})
|
|
984
|
+
.where(schema.Table.id == base_id)
|
|
985
|
+
)
|
|
986
|
+
assert result.rowcount == 1, result.rowcount
|
|
987
|
+
|
|
988
|
+
tv = tbl._tbl_version.get() if tbl._tbl_version is not None else None
|
|
989
|
+
if tv is not None:
|
|
990
|
+
tv = tbl._tbl_version.get()
|
|
991
|
+
# invalidate the TableVersion instance so that existing references to it can find out it has been dropped
|
|
992
|
+
tv.is_validated = False
|
|
993
|
+
|
|
994
|
+
self.delete_tbl_md(tbl._id)
|
|
637
995
|
assert tbl._id in self._tbls
|
|
638
996
|
del self._tbls[tbl._id]
|
|
639
|
-
_logger.info(f'Dropped table `{tbl._path}`.')
|
|
997
|
+
_logger.info(f'Dropped table `{tbl._path()}`.')
|
|
998
|
+
|
|
999
|
+
if tv is not None:
|
|
1000
|
+
tv.drop()
|
|
1001
|
+
assert (tv.id, tv.effective_version) in self._tbl_versions
|
|
1002
|
+
del self._tbl_versions[tv.id, tv.effective_version]
|
|
640
1003
|
|
|
641
|
-
@_retry_loop
|
|
1004
|
+
@_retry_loop(for_write=True)
|
|
642
1005
|
def create_dir(self, path: Path, if_exists: IfExistsParam, parents: bool) -> Dir:
|
|
643
1006
|
return self._create_dir(path, if_exists, parents)
|
|
644
1007
|
|
|
@@ -673,7 +1036,7 @@ class Catalog:
|
|
|
673
1036
|
Env.get().console_logger.info(f'Created directory {str(path)!r}.')
|
|
674
1037
|
return dir
|
|
675
1038
|
|
|
676
|
-
@_retry_loop
|
|
1039
|
+
@_retry_loop(for_write=True)
|
|
677
1040
|
def drop_dir(self, path: Path, if_not_exists: IfNotExistsParam, force: bool) -> None:
|
|
678
1041
|
_, _, schema_obj = self._prepare_dir_op(
|
|
679
1042
|
drop_dir_path=path.parent,
|
|
@@ -698,7 +1061,7 @@ class Catalog:
|
|
|
698
1061
|
raise excs.Error(f'Directory {str(dir_path)!r} is not empty.')
|
|
699
1062
|
|
|
700
1063
|
# drop existing subdirs
|
|
701
|
-
self.
|
|
1064
|
+
self._acquire_dir_xlock(dir_id=dir_id)
|
|
702
1065
|
dir_q = sql.select(schema.Dir).where(schema.Dir.parent_id == dir_id)
|
|
703
1066
|
for row in conn.execute(dir_q).all():
|
|
704
1067
|
self._drop_dir(row.id, dir_path.append(row.md['name']), force=True)
|
|
@@ -718,6 +1081,11 @@ class Catalog:
|
|
|
718
1081
|
def get_view_ids(self, tbl_id: UUID, for_update: bool = False) -> list[UUID]:
|
|
719
1082
|
"""Return the ids of views that directly reference the given table"""
|
|
720
1083
|
conn = Env.get().conn
|
|
1084
|
+
# check whether this table still exists
|
|
1085
|
+
q = sql.select(sql.func.count()).select_from(schema.Table).where(schema.Table.id == tbl_id)
|
|
1086
|
+
tbl_count = conn.execute(q).scalar()
|
|
1087
|
+
if tbl_count == 0:
|
|
1088
|
+
raise excs.Error(self._dropped_tbl_error_msg(tbl_id))
|
|
721
1089
|
q = sql.select(schema.Table.id).where(sql.text(f"md->'view_md'->'base_versions'->0->>0 = {tbl_id.hex!r}"))
|
|
722
1090
|
if for_update:
|
|
723
1091
|
q = q.with_for_update()
|
|
@@ -725,17 +1093,39 @@ class Catalog:
|
|
|
725
1093
|
return result
|
|
726
1094
|
|
|
727
1095
|
def get_tbl_version(self, tbl_id: UUID, effective_version: Optional[int]) -> Optional[TableVersion]:
|
|
728
|
-
|
|
729
|
-
|
|
730
|
-
|
|
731
|
-
|
|
732
|
-
|
|
733
|
-
|
|
734
|
-
|
|
735
|
-
|
|
736
|
-
|
|
737
|
-
|
|
738
|
-
|
|
1096
|
+
# we need a transaction here, if we're not already in one; if this starts a new transaction,
|
|
1097
|
+
# the returned TableVersion instance will not be validated
|
|
1098
|
+
with self.begin_xact(for_write=False) as conn:
|
|
1099
|
+
tv = self._tbl_versions.get((tbl_id, effective_version))
|
|
1100
|
+
if tv is None:
|
|
1101
|
+
tv = self._load_tbl_version(tbl_id, effective_version)
|
|
1102
|
+
elif not tv.is_validated:
|
|
1103
|
+
# only live instances are invalidated
|
|
1104
|
+
assert effective_version is None
|
|
1105
|
+
# we validate live instances by comparing our cached TableMd.current_version/view_sn to what's stored
|
|
1106
|
+
# _logger.debug(f'validating metadata for table {tbl_id}:{tv.version} ({id(tv):x})')
|
|
1107
|
+
q = sql.select(schema.Table.md).where(schema.Table.id == tbl_id)
|
|
1108
|
+
row = conn.execute(q).one_or_none()
|
|
1109
|
+
if row is None:
|
|
1110
|
+
raise excs.Error(self._dropped_tbl_error_msg(tbl_id))
|
|
1111
|
+
current_version, view_sn = row.md['current_version'], row.md['view_sn']
|
|
1112
|
+
|
|
1113
|
+
# the stored version can be behind TableVersion.version, because we don't roll back the in-memory
|
|
1114
|
+
# metadata changes after a failed update operation
|
|
1115
|
+
if current_version != tv.version or view_sn != tv.tbl_md.view_sn:
|
|
1116
|
+
# the cached metadata is invalid
|
|
1117
|
+
_logger.debug(
|
|
1118
|
+
f'reloading metadata for table {tbl_id} '
|
|
1119
|
+
f'(cached/current version: {tv.version}/{current_version}, '
|
|
1120
|
+
f'cached/current view_sn: {tv.tbl_md.view_sn}/{view_sn})'
|
|
1121
|
+
)
|
|
1122
|
+
tv = self._load_tbl_version(tbl_id, None)
|
|
1123
|
+
else:
|
|
1124
|
+
# the cached metadata is valid
|
|
1125
|
+
tv.is_validated = True
|
|
1126
|
+
|
|
1127
|
+
assert tv.is_validated
|
|
1128
|
+
return tv
|
|
739
1129
|
|
|
740
1130
|
def remove_tbl_version(self, tbl_version: TableVersion) -> None:
|
|
741
1131
|
assert (tbl_version.id, tbl_version.effective_version) in self._tbl_versions
|
|
@@ -745,7 +1135,7 @@ class Catalog:
|
|
|
745
1135
|
"""Return the Dir with the given id, or None if it doesn't exist"""
|
|
746
1136
|
conn = Env.get().conn
|
|
747
1137
|
if for_update:
|
|
748
|
-
self.
|
|
1138
|
+
self._acquire_dir_xlock(dir_id=dir_id)
|
|
749
1139
|
q = sql.select(schema.Dir).where(schema.Dir.id == dir_id)
|
|
750
1140
|
row = conn.execute(q).one_or_none()
|
|
751
1141
|
if row is None:
|
|
@@ -753,24 +1143,24 @@ class Catalog:
|
|
|
753
1143
|
dir_record = schema.Dir(**row._mapping)
|
|
754
1144
|
return Dir(dir_record.id, dir_record.parent_id, dir_record.md['name'])
|
|
755
1145
|
|
|
756
|
-
def _get_dir(self, path: Path,
|
|
1146
|
+
def _get_dir(self, path: Path, lock_dir: bool = False) -> Optional[schema.Dir]:
|
|
757
1147
|
"""
|
|
758
|
-
|
|
1148
|
+
lock_dir: if True, X-locks target (but not the ancestors)
|
|
759
1149
|
"""
|
|
760
1150
|
user = Env.get().user
|
|
761
1151
|
conn = Env.get().conn
|
|
762
1152
|
if path.is_root:
|
|
763
|
-
if
|
|
764
|
-
self.
|
|
1153
|
+
if lock_dir:
|
|
1154
|
+
self._acquire_dir_xlock(dir_name='')
|
|
765
1155
|
q = sql.select(schema.Dir).where(schema.Dir.parent_id.is_(None), schema.Dir.md['user'].astext == user)
|
|
766
1156
|
row = conn.execute(q).one_or_none()
|
|
767
1157
|
return schema.Dir(**row._mapping) if row is not None else None
|
|
768
1158
|
else:
|
|
769
|
-
parent_dir = self._get_dir(path.parent,
|
|
1159
|
+
parent_dir = self._get_dir(path.parent, lock_dir=False)
|
|
770
1160
|
if parent_dir is None:
|
|
771
1161
|
return None
|
|
772
|
-
if
|
|
773
|
-
self.
|
|
1162
|
+
if lock_dir:
|
|
1163
|
+
self._acquire_dir_xlock(parent_id=parent_dir.id, dir_name=path.name)
|
|
774
1164
|
q = sql.select(schema.Dir).where(
|
|
775
1165
|
schema.Dir.parent_id == parent_dir.id,
|
|
776
1166
|
schema.Dir.md['name'].astext == path.name,
|
|
@@ -780,6 +1170,7 @@ class Catalog:
|
|
|
780
1170
|
return schema.Dir(**row._mapping) if row is not None else None
|
|
781
1171
|
|
|
782
1172
|
def _load_tbl(self, tbl_id: UUID) -> Optional[Table]:
|
|
1173
|
+
"""Loads metadata for the table with the given id and caches it."""
|
|
783
1174
|
_logger.info(f'Loading table {tbl_id}')
|
|
784
1175
|
from .insertable_table import InsertableTable
|
|
785
1176
|
from .view import View
|
|
@@ -808,8 +1199,9 @@ class Catalog:
|
|
|
808
1199
|
if view_md is None:
|
|
809
1200
|
# this is a base table
|
|
810
1201
|
if (tbl_id, None) not in self._tbl_versions:
|
|
811
|
-
|
|
1202
|
+
_ = self._load_tbl_version(tbl_id, None)
|
|
812
1203
|
tbl = InsertableTable(tbl_record.dir_id, TableVersionHandle(tbl_id, None))
|
|
1204
|
+
self._tbls[tbl_id] = tbl
|
|
813
1205
|
return tbl
|
|
814
1206
|
|
|
815
1207
|
# this is a view; determine the sequence of TableVersions to load
|
|
@@ -829,18 +1221,18 @@ class Catalog:
|
|
|
829
1221
|
view_path: Optional[TableVersionPath] = None
|
|
830
1222
|
for id, effective_version in tbl_version_path[::-1]:
|
|
831
1223
|
if (id, effective_version) not in self._tbl_versions:
|
|
832
|
-
|
|
1224
|
+
_ = self._load_tbl_version(id, effective_version)
|
|
833
1225
|
view_path = TableVersionPath(TableVersionHandle(id, effective_version), base=base_path)
|
|
834
1226
|
base_path = view_path
|
|
835
1227
|
view = View(tbl_id, tbl_record.dir_id, tbl_md.name, view_path, snapshot_only=pure_snapshot)
|
|
836
|
-
|
|
1228
|
+
self._tbls[tbl_id] = view
|
|
837
1229
|
return view
|
|
838
1230
|
|
|
839
1231
|
def load_tbl_md(self, tbl_id: UUID, effective_version: Optional[int]) -> schema.FullTableMd:
|
|
840
1232
|
"""
|
|
841
1233
|
Loads metadata from the store for a given table UUID and version.
|
|
842
1234
|
"""
|
|
843
|
-
_logger.info(f'Loading metadata for table version: {tbl_id}:{effective_version}')
|
|
1235
|
+
# _logger.info(f'Loading metadata for table version: {tbl_id}:{effective_version}')
|
|
844
1236
|
conn = Env.get().conn
|
|
845
1237
|
|
|
846
1238
|
q = (
|
|
@@ -890,7 +1282,8 @@ class Catalog:
|
|
|
890
1282
|
)
|
|
891
1283
|
|
|
892
1284
|
row = conn.execute(q).one_or_none()
|
|
893
|
-
|
|
1285
|
+
if row is None:
|
|
1286
|
+
raise excs.Error(self._dropped_tbl_error_msg(tbl_id))
|
|
894
1287
|
tbl_record, version_record, schema_version_record = _unpack_row(
|
|
895
1288
|
row, [schema.Table, schema.TableVersion, schema.TableSchemaVersion]
|
|
896
1289
|
)
|
|
@@ -915,8 +1308,15 @@ class Catalog:
|
|
|
915
1308
|
If inserting `version_md` or `schema_version_md` would be a primary key violation, an exception will be raised.
|
|
916
1309
|
"""
|
|
917
1310
|
conn = Env.get().conn
|
|
1311
|
+
assert self._in_write_xact
|
|
918
1312
|
|
|
919
1313
|
if tbl_md is not None:
|
|
1314
|
+
assert tbl_md.tbl_id == str(tbl_id)
|
|
1315
|
+
if version_md is not None:
|
|
1316
|
+
assert tbl_md.current_version == version_md.version
|
|
1317
|
+
assert tbl_md.current_schema_version == version_md.schema_version
|
|
1318
|
+
if schema_version_md is not None:
|
|
1319
|
+
assert tbl_md.current_schema_version == schema_version_md.schema_version
|
|
920
1320
|
result = conn.execute(
|
|
921
1321
|
sql.update(schema.Table.__table__)
|
|
922
1322
|
.values({schema.Table.md: dataclasses.asdict(tbl_md)})
|
|
@@ -925,6 +1325,9 @@ class Catalog:
|
|
|
925
1325
|
assert result.rowcount == 1, result.rowcount
|
|
926
1326
|
|
|
927
1327
|
if version_md is not None:
|
|
1328
|
+
assert version_md.tbl_id == str(tbl_id)
|
|
1329
|
+
if schema_version_md is not None:
|
|
1330
|
+
assert version_md.schema_version == schema_version_md.schema_version
|
|
928
1331
|
conn.execute(
|
|
929
1332
|
sql.insert(schema.TableVersion.__table__).values(
|
|
930
1333
|
tbl_id=tbl_id, version=version_md.version, md=dataclasses.asdict(version_md)
|
|
@@ -932,6 +1335,7 @@ class Catalog:
|
|
|
932
1335
|
)
|
|
933
1336
|
|
|
934
1337
|
if schema_version_md is not None:
|
|
1338
|
+
assert schema_version_md.tbl_id == str(tbl_id)
|
|
935
1339
|
conn.execute(
|
|
936
1340
|
sql.insert(schema.TableSchemaVersion.__table__).values(
|
|
937
1341
|
tbl_id=tbl_id,
|
|
@@ -962,7 +1366,7 @@ class Catalog:
|
|
|
962
1366
|
|
|
963
1367
|
# If `tbl` is a named pure snapshot, we're not quite done, since the snapshot metadata won't appear in the
|
|
964
1368
|
# TableVersionPath. We need to prepend it separately.
|
|
965
|
-
if tbl
|
|
1369
|
+
if isinstance(tbl, View) and tbl._snapshot_only:
|
|
966
1370
|
snapshot_md = self.load_tbl_md(tbl._id, 0)
|
|
967
1371
|
md = [snapshot_md, *md]
|
|
968
1372
|
|
|
@@ -978,52 +1382,73 @@ class Catalog:
|
|
|
978
1382
|
return md
|
|
979
1383
|
|
|
980
1384
|
def _load_tbl_version(self, tbl_id: UUID, effective_version: Optional[int]) -> Optional[TableVersion]:
|
|
1385
|
+
"""Creates TableVersion instance from stored metadata and registers it in _tbl_versions."""
|
|
981
1386
|
tbl_md, _, schema_version_md = self.load_tbl_md(tbl_id, effective_version)
|
|
982
1387
|
view_md = tbl_md.view_md
|
|
983
1388
|
|
|
984
|
-
_logger.info(f'Loading table version: {tbl_id}:{effective_version}')
|
|
985
1389
|
conn = Env.get().conn
|
|
986
1390
|
|
|
987
|
-
# load mutable view ids
|
|
988
|
-
|
|
989
|
-
|
|
990
|
-
|
|
991
|
-
|
|
1391
|
+
# load mutable view ids for mutable TableVersions
|
|
1392
|
+
mutable_view_ids: list[UUID] = []
|
|
1393
|
+
# If this is a replica, effective_version should not be None. We see this today, because
|
|
1394
|
+
# the replica's TV instance's Column instances contain value_expr_dicts that reference the live version.
|
|
1395
|
+
# This is presumably a source of bugs, because it ignores schema version changes (eg, column renames).
|
|
1396
|
+
# TODO: retarget the value_expr_dict when instantiating Columns for a particular TV instance.
|
|
1397
|
+
if effective_version is None and not tbl_md.is_replica:
|
|
1398
|
+
q = sql.select(schema.Table.id).where(
|
|
1399
|
+
sql.text(
|
|
1400
|
+
f"md->'view_md'->'base_versions'->0->>0 = {tbl_id.hex!r} "
|
|
1401
|
+
"AND md->'view_md'->'base_versions'->0->>1 IS NULL"
|
|
1402
|
+
)
|
|
992
1403
|
)
|
|
993
|
-
|
|
994
|
-
mutable_view_ids = [r[0] for r in conn.execute(q).all()]
|
|
1404
|
+
mutable_view_ids = [r[0] for r in conn.execute(q).all()]
|
|
995
1405
|
mutable_views = [TableVersionHandle(id, None) for id in mutable_view_ids]
|
|
996
1406
|
|
|
1407
|
+
tbl_version: TableVersion
|
|
997
1408
|
if view_md is None:
|
|
998
1409
|
# this is a base table
|
|
999
1410
|
tbl_version = TableVersion(
|
|
1000
1411
|
tbl_id, tbl_md, effective_version, schema_version_md, mutable_views=mutable_views
|
|
1001
1412
|
)
|
|
1002
|
-
|
|
1413
|
+
else:
|
|
1414
|
+
assert len(view_md.base_versions) > 0 # a view needs to have a base
|
|
1415
|
+
pure_snapshot = view_md.is_snapshot and view_md.predicate is None and len(schema_version_md.columns) == 0
|
|
1416
|
+
assert not pure_snapshot # a pure snapshot doesn't have a physical table backing it, no point in loading it
|
|
1417
|
+
|
|
1418
|
+
base: TableVersionHandle
|
|
1419
|
+
base_path: Optional[TableVersionPath] = None # needed for live view
|
|
1420
|
+
if view_md.is_snapshot:
|
|
1421
|
+
base = TableVersionHandle(UUID(view_md.base_versions[0][0]), view_md.base_versions[0][1])
|
|
1422
|
+
else:
|
|
1423
|
+
base_path = TableVersionPath.from_md(tbl_md.view_md.base_versions)
|
|
1424
|
+
base = base_path.tbl_version
|
|
1003
1425
|
|
|
1004
|
-
|
|
1005
|
-
|
|
1006
|
-
|
|
1426
|
+
tbl_version = TableVersion(
|
|
1427
|
+
tbl_id,
|
|
1428
|
+
tbl_md,
|
|
1429
|
+
effective_version,
|
|
1430
|
+
schema_version_md,
|
|
1431
|
+
base_path=base_path,
|
|
1432
|
+
base=base,
|
|
1433
|
+
mutable_views=mutable_views,
|
|
1434
|
+
)
|
|
1007
1435
|
|
|
1008
|
-
|
|
1009
|
-
|
|
1010
|
-
if view_md.is_snapshot:
|
|
1011
|
-
base = TableVersionHandle(UUID(view_md.base_versions[0][0]), view_md.base_versions[0][1])
|
|
1012
|
-
else:
|
|
1013
|
-
base_path = TableVersionPath.from_md(tbl_md.view_md.base_versions)
|
|
1014
|
-
base = base_path.tbl_version
|
|
1015
|
-
|
|
1016
|
-
tbl_version = TableVersion(
|
|
1017
|
-
tbl_id,
|
|
1018
|
-
tbl_md,
|
|
1019
|
-
effective_version,
|
|
1020
|
-
schema_version_md,
|
|
1021
|
-
base_path=base_path,
|
|
1022
|
-
base=base,
|
|
1023
|
-
mutable_views=mutable_views,
|
|
1024
|
-
)
|
|
1436
|
+
self._tbl_versions[tbl_id, effective_version] = tbl_version
|
|
1437
|
+
tbl_version.init()
|
|
1025
1438
|
return tbl_version
|
|
1026
1439
|
|
|
1440
|
+
def record_column_dependencies(self, tbl_version: TableVersion) -> None:
|
|
1441
|
+
"""Update self._column_dependencies. Only valid for non-snapshot versions."""
|
|
1442
|
+
from pixeltable.exprs import Expr
|
|
1443
|
+
|
|
1444
|
+
assert not tbl_version.is_snapshot
|
|
1445
|
+
dependencies: dict[QColumnId, set[QColumnId]] = {}
|
|
1446
|
+
for col in tbl_version.cols_by_id.values():
|
|
1447
|
+
if col.value_expr_dict is None:
|
|
1448
|
+
continue
|
|
1449
|
+
dependencies[QColumnId(tbl_version.id, col.id)] = Expr.get_refd_column_ids(col.value_expr_dict)
|
|
1450
|
+
self._column_dependencies[tbl_version.id] = dependencies
|
|
1451
|
+
|
|
1027
1452
|
def _init_store(self) -> None:
|
|
1028
1453
|
"""One-time initialization of the stored catalog. Idempotent."""
|
|
1029
1454
|
self.create_user(None)
|