pixeltable 0.3.10__py3-none-any.whl → 0.3.12__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of pixeltable might be problematic. Click here for more details.
- pixeltable/__init__.py +2 -2
- pixeltable/__version__.py +2 -2
- pixeltable/catalog/__init__.py +2 -1
- pixeltable/catalog/catalog.py +370 -93
- pixeltable/catalog/column.py +6 -4
- pixeltable/catalog/dir.py +5 -5
- pixeltable/catalog/globals.py +14 -16
- pixeltable/catalog/insertable_table.py +6 -8
- pixeltable/catalog/path.py +14 -7
- pixeltable/catalog/table.py +72 -62
- pixeltable/catalog/table_version.py +137 -107
- pixeltable/catalog/table_version_handle.py +3 -0
- pixeltable/catalog/table_version_path.py +1 -1
- pixeltable/catalog/view.py +10 -14
- pixeltable/dataframe.py +5 -3
- pixeltable/env.py +108 -42
- pixeltable/exec/__init__.py +2 -0
- pixeltable/exec/aggregation_node.py +6 -8
- pixeltable/exec/cache_prefetch_node.py +4 -7
- pixeltable/exec/component_iteration_node.py +1 -3
- pixeltable/exec/data_row_batch.py +1 -2
- pixeltable/exec/exec_context.py +1 -1
- pixeltable/exec/exec_node.py +1 -2
- pixeltable/exec/expr_eval/__init__.py +2 -0
- pixeltable/exec/expr_eval/evaluators.py +137 -20
- pixeltable/exec/expr_eval/expr_eval_node.py +43 -64
- pixeltable/exec/expr_eval/globals.py +68 -7
- pixeltable/exec/expr_eval/schedulers.py +25 -23
- pixeltable/exec/in_memory_data_node.py +8 -6
- pixeltable/exec/row_update_node.py +3 -4
- pixeltable/exec/sql_node.py +16 -18
- pixeltable/exprs/__init__.py +1 -1
- pixeltable/exprs/column_property_ref.py +1 -1
- pixeltable/exprs/column_ref.py +3 -3
- pixeltable/exprs/compound_predicate.py +1 -1
- pixeltable/exprs/data_row.py +17 -1
- pixeltable/exprs/expr.py +12 -12
- pixeltable/exprs/function_call.py +34 -2
- pixeltable/exprs/json_mapper.py +95 -48
- pixeltable/exprs/json_path.py +4 -9
- pixeltable/exprs/method_ref.py +2 -2
- pixeltable/exprs/object_ref.py +2 -2
- pixeltable/exprs/row_builder.py +33 -6
- pixeltable/exprs/similarity_expr.py +1 -1
- pixeltable/exprs/sql_element_cache.py +1 -1
- pixeltable/exprs/string_op.py +2 -2
- pixeltable/ext/__init__.py +1 -1
- pixeltable/ext/functions/__init__.py +1 -1
- pixeltable/ext/functions/whisperx.py +1 -1
- pixeltable/ext/functions/yolox.py +1 -1
- pixeltable/func/__init__.py +1 -1
- pixeltable/func/aggregate_function.py +2 -2
- pixeltable/func/callable_function.py +3 -6
- pixeltable/func/expr_template_function.py +24 -4
- pixeltable/func/function.py +7 -9
- pixeltable/func/function_registry.py +1 -1
- pixeltable/func/query_template_function.py +87 -4
- pixeltable/func/signature.py +1 -1
- pixeltable/func/tools.py +1 -1
- pixeltable/func/udf.py +2 -2
- pixeltable/functions/__init__.py +1 -1
- pixeltable/functions/anthropic.py +2 -2
- pixeltable/functions/audio.py +1 -1
- pixeltable/functions/deepseek.py +1 -1
- pixeltable/functions/fireworks.py +1 -1
- pixeltable/functions/globals.py +6 -6
- pixeltable/functions/huggingface.py +1 -1
- pixeltable/functions/image.py +1 -1
- pixeltable/functions/json.py +1 -1
- pixeltable/functions/llama_cpp.py +1 -1
- pixeltable/functions/math.py +1 -1
- pixeltable/functions/mistralai.py +1 -1
- pixeltable/functions/ollama.py +1 -1
- pixeltable/functions/openai.py +2 -2
- pixeltable/functions/replicate.py +1 -1
- pixeltable/functions/string.py +1 -1
- pixeltable/functions/timestamp.py +1 -1
- pixeltable/functions/together.py +1 -1
- pixeltable/functions/util.py +1 -1
- pixeltable/functions/video.py +2 -2
- pixeltable/functions/vision.py +2 -2
- pixeltable/globals.py +7 -2
- pixeltable/index/embedding_index.py +12 -1
- pixeltable/io/__init__.py +5 -3
- pixeltable/io/fiftyone.py +6 -7
- pixeltable/io/label_studio.py +21 -20
- pixeltable/io/pandas.py +6 -5
- pixeltable/iterators/__init__.py +1 -1
- pixeltable/metadata/__init__.py +6 -4
- pixeltable/metadata/converters/convert_24.py +3 -3
- pixeltable/metadata/converters/convert_25.py +1 -1
- pixeltable/metadata/converters/convert_29.py +1 -1
- pixeltable/metadata/converters/convert_31.py +11 -0
- pixeltable/metadata/converters/convert_32.py +15 -0
- pixeltable/metadata/converters/convert_33.py +17 -0
- pixeltable/metadata/notes.py +3 -0
- pixeltable/metadata/schema.py +26 -1
- pixeltable/plan.py +2 -3
- pixeltable/share/packager.py +8 -24
- pixeltable/share/publish.py +20 -9
- pixeltable/store.py +9 -6
- pixeltable/type_system.py +19 -7
- pixeltable/utils/console_output.py +3 -2
- pixeltable/utils/coroutine.py +3 -3
- pixeltable/utils/dbms.py +66 -0
- pixeltable/utils/documents.py +61 -67
- pixeltable/utils/exception_handler.py +59 -0
- pixeltable/utils/filecache.py +1 -1
- pixeltable/utils/http_server.py +3 -2
- pixeltable/utils/pytorch.py +1 -1
- pixeltable/utils/sql.py +1 -1
- pixeltable-0.3.12.dist-info/METADATA +436 -0
- pixeltable-0.3.12.dist-info/RECORD +183 -0
- pixeltable/catalog/path_dict.py +0 -169
- pixeltable-0.3.10.dist-info/METADATA +0 -382
- pixeltable-0.3.10.dist-info/RECORD +0 -179
- {pixeltable-0.3.10.dist-info → pixeltable-0.3.12.dist-info}/LICENSE +0 -0
- {pixeltable-0.3.10.dist-info → pixeltable-0.3.12.dist-info}/WHEEL +0 -0
- {pixeltable-0.3.10.dist-info → pixeltable-0.3.12.dist-info}/entry_points.txt +0 -0
pixeltable/catalog/catalog.py
CHANGED
|
@@ -10,10 +10,10 @@ from uuid import UUID
|
|
|
10
10
|
import psycopg
|
|
11
11
|
import sqlalchemy as sql
|
|
12
12
|
|
|
13
|
-
import
|
|
14
|
-
import pixeltable.metadata.schema as schema
|
|
13
|
+
from pixeltable import exceptions as excs
|
|
15
14
|
from pixeltable.env import Env
|
|
16
15
|
from pixeltable.iterators import ComponentIterator
|
|
16
|
+
from pixeltable.metadata import schema
|
|
17
17
|
|
|
18
18
|
from .dir import Dir
|
|
19
19
|
from .globals import IfExistsParam, IfNotExistsParam, MediaValidation
|
|
@@ -33,16 +33,6 @@ if TYPE_CHECKING:
|
|
|
33
33
|
_logger = logging.getLogger('pixeltable')
|
|
34
34
|
|
|
35
35
|
|
|
36
|
-
def _lock_str(for_update: bool) -> str:
|
|
37
|
-
return 'X' if for_update else 'S'
|
|
38
|
-
|
|
39
|
-
|
|
40
|
-
# TODO: remove once the concurrent update behavior has been debugged
|
|
41
|
-
# def _debug_print(for_update: bool, msg: str) -> None:
|
|
42
|
-
# return
|
|
43
|
-
# print(f'{datetime.datetime.now()}: {_lock_str(for_update)}: {msg}')
|
|
44
|
-
|
|
45
|
-
|
|
46
36
|
def _unpack_row(
|
|
47
37
|
row: Optional[sql.engine.Row], entities: list[type[sql.orm.decl_api.DeclarativeBase]]
|
|
48
38
|
) -> Optional[list[Any]]:
|
|
@@ -79,14 +69,17 @@ def _retry_loop(op: Callable[..., T]) -> Callable[..., T]:
|
|
|
79
69
|
# in order for retry to work, we need to make sure that there aren't any prior db updates
|
|
80
70
|
# that are part of an ongoing transaction
|
|
81
71
|
assert not Env.get().in_xact()
|
|
82
|
-
with Env.get().begin_xact()
|
|
72
|
+
with Env.get().begin_xact():
|
|
83
73
|
return op(*args, **kwargs)
|
|
84
74
|
except sql.exc.DBAPIError as e:
|
|
85
|
-
if isinstance(e.orig, psycopg.errors.SerializationFailure)
|
|
86
|
-
num_remaining_retries
|
|
87
|
-
|
|
88
|
-
|
|
89
|
-
|
|
75
|
+
if isinstance(e.orig, psycopg.errors.SerializationFailure):
|
|
76
|
+
if num_remaining_retries > 0:
|
|
77
|
+
num_remaining_retries -= 1
|
|
78
|
+
# print(f'serialization failure:\n{e}')
|
|
79
|
+
# print('retrying ************************************************************')
|
|
80
|
+
time.sleep(1)
|
|
81
|
+
else:
|
|
82
|
+
raise excs.Error(f'Serialization retry limit ({_MAX_RETRIES}) exceeded') from e
|
|
90
83
|
else:
|
|
91
84
|
raise
|
|
92
85
|
|
|
@@ -123,20 +116,39 @@ class Catalog:
|
|
|
123
116
|
self._tbls = {} # don't use a defaultdict here, it doesn't cooperate with the debugger
|
|
124
117
|
self._init_store()
|
|
125
118
|
|
|
119
|
+
@classmethod
|
|
120
|
+
def _lock_dir(cls, parent_id: Optional[UUID], dir_id: Optional[UUID], dir_name: Optional[str]) -> None:
|
|
121
|
+
"""Update directory record(s) to sequentialize thread access. Lock is released when transaction commits.
|
|
122
|
+
If dir_id is present, then all other conditions are ignored.
|
|
123
|
+
Note that (parent_id==None) is a valid where condition.
|
|
124
|
+
If dir_id is not specified, the user from the environment is added to the directory filters.
|
|
125
|
+
"""
|
|
126
|
+
user = Env.get().user
|
|
127
|
+
conn = Env.get().conn
|
|
128
|
+
q = sql.update(schema.Dir).values(lock_dummy=1)
|
|
129
|
+
if dir_id is not None:
|
|
130
|
+
q = q.where(schema.Dir.id == dir_id)
|
|
131
|
+
else:
|
|
132
|
+
q = q.where(schema.Dir.parent_id == parent_id)
|
|
133
|
+
if dir_name is not None:
|
|
134
|
+
q = q.where(schema.Dir.md['name'].astext == dir_name)
|
|
135
|
+
if user is not None:
|
|
136
|
+
q = q.where(schema.Dir.md['user'].astext == user)
|
|
137
|
+
conn.execute(q)
|
|
138
|
+
|
|
126
139
|
def get_dir_path(self, dir_id: UUID) -> Path:
|
|
127
140
|
"""Return path for directory with given id"""
|
|
128
141
|
conn = Env.get().conn
|
|
129
142
|
names: list[str] = []
|
|
130
143
|
while True:
|
|
131
144
|
q = sql.select(schema.Dir).where(schema.Dir.id == dir_id)
|
|
132
|
-
# _debug_print(for_update=False, msg=f'dir id={dir_id}')
|
|
133
145
|
row = conn.execute(q).one()
|
|
134
146
|
dir = schema.Dir(**row._mapping)
|
|
135
147
|
if dir.md['name'] == '':
|
|
136
148
|
break
|
|
137
149
|
names.insert(0, dir.md['name'])
|
|
138
150
|
dir_id = dir.parent_id
|
|
139
|
-
return Path('.'.join(names), empty_is_valid=True)
|
|
151
|
+
return Path('.'.join(names), empty_is_valid=True, allow_system_paths=True)
|
|
140
152
|
|
|
141
153
|
@dataclasses.dataclass
|
|
142
154
|
class DirEntry:
|
|
@@ -155,7 +167,6 @@ class Catalog:
|
|
|
155
167
|
result: dict[str, Catalog.DirEntry] = {}
|
|
156
168
|
|
|
157
169
|
q = sql.select(schema.Dir).where(schema.Dir.parent_id == dir_id)
|
|
158
|
-
# _debug_print(for_update=False, msg=f'dirs parent_id={dir_id}')
|
|
159
170
|
rows = conn.execute(q).all()
|
|
160
171
|
for row in rows:
|
|
161
172
|
dir = schema.Dir(**row._mapping)
|
|
@@ -165,7 +176,6 @@ class Catalog:
|
|
|
165
176
|
result[dir.md['name']] = self.DirEntry(dir=dir, dir_entries=dir_contents, table=None)
|
|
166
177
|
|
|
167
178
|
q = sql.select(schema.Table).where(schema.Table.dir_id == dir_id)
|
|
168
|
-
# _debug_print(for_update=False, msg=f'tbls parent_id={dir_id}')
|
|
169
179
|
rows = conn.execute(q).all()
|
|
170
180
|
for row in rows:
|
|
171
181
|
tbl = schema.Table(**row._mapping)
|
|
@@ -175,6 +185,9 @@ class Catalog:
|
|
|
175
185
|
|
|
176
186
|
@_retry_loop
|
|
177
187
|
def move(self, path: Path, new_path: Path) -> None:
|
|
188
|
+
self._move(path, new_path)
|
|
189
|
+
|
|
190
|
+
def _move(self, path: Path, new_path: Path) -> None:
|
|
178
191
|
_, dest_dir, src_obj = self._prepare_dir_op(
|
|
179
192
|
add_dir_path=new_path.parent,
|
|
180
193
|
add_name=new_path.name,
|
|
@@ -222,10 +235,10 @@ class Catalog:
|
|
|
222
235
|
|
|
223
236
|
add_dir: Optional[schema.Dir] = None
|
|
224
237
|
drop_dir: Optional[schema.Dir] = None
|
|
225
|
-
for p in sorted(
|
|
238
|
+
for p in sorted(dir_paths):
|
|
226
239
|
dir = self._get_dir(p, for_update=True)
|
|
227
240
|
if dir is None:
|
|
228
|
-
raise excs.Error(f'Directory {str(p)!r} does not exist')
|
|
241
|
+
raise excs.Error(f'Directory {str(p)!r} does not exist.')
|
|
229
242
|
if p == add_dir_path:
|
|
230
243
|
add_dir = dir
|
|
231
244
|
if p == drop_dir_path:
|
|
@@ -236,14 +249,14 @@ class Catalog:
|
|
|
236
249
|
add_obj = self._get_dir_entry(add_dir.id, add_name, for_update=True)
|
|
237
250
|
if add_obj is not None and raise_if_exists:
|
|
238
251
|
add_path = add_dir_path.append(add_name)
|
|
239
|
-
raise excs.Error(f'Path {str(add_path)!r} already exists')
|
|
252
|
+
raise excs.Error(f'Path {str(add_path)!r} already exists.')
|
|
240
253
|
|
|
241
254
|
drop_obj: Optional[SchemaObject] = None
|
|
242
255
|
if drop_dir is not None:
|
|
243
256
|
drop_path = drop_dir_path.append(drop_name)
|
|
244
257
|
drop_obj = self._get_dir_entry(drop_dir.id, drop_name, for_update=True)
|
|
245
258
|
if drop_obj is None and raise_if_not_exists:
|
|
246
|
-
raise excs.Error(f'Path {str(drop_path)!r} does not exist')
|
|
259
|
+
raise excs.Error(f'Path {str(drop_path)!r} does not exist.')
|
|
247
260
|
if drop_obj is not None and drop_expected is not None and not isinstance(drop_obj, drop_expected):
|
|
248
261
|
raise excs.Error(
|
|
249
262
|
f'{str(drop_path)!r} needs to be a {drop_expected._display_name()} '
|
|
@@ -254,32 +267,35 @@ class Catalog:
|
|
|
254
267
|
return add_obj, add_dir_obj, drop_obj
|
|
255
268
|
|
|
256
269
|
def _get_dir_entry(self, dir_id: UUID, name: str, for_update: bool = False) -> Optional[SchemaObject]:
|
|
270
|
+
user = Env.get().user
|
|
257
271
|
conn = Env.get().conn
|
|
258
272
|
|
|
259
273
|
# check for subdirectory
|
|
260
|
-
q = sql.select(schema.Dir).where(schema.Dir.parent_id == dir_id, schema.Dir.md['name'].astext == name)
|
|
261
274
|
if for_update:
|
|
262
|
-
|
|
263
|
-
|
|
264
|
-
|
|
265
|
-
|
|
266
|
-
# dir_record = schema.Dir(**row._mapping)
|
|
267
|
-
# return Dir(dir_record.id, dir_record.parent_id, name)
|
|
275
|
+
self._lock_dir(dir_id, None, name)
|
|
276
|
+
q = sql.select(schema.Dir).where(
|
|
277
|
+
schema.Dir.parent_id == dir_id, schema.Dir.md['name'].astext == name, schema.Dir.md['user'].astext == user
|
|
278
|
+
)
|
|
268
279
|
rows = conn.execute(q).all()
|
|
280
|
+
# The condition below can occur if there is a synchronization failure across multiple processes
|
|
281
|
+
# It indicates database inconsistency.
|
|
269
282
|
if len(rows) > 1:
|
|
270
|
-
|
|
283
|
+
raise AssertionError(rows)
|
|
271
284
|
if len(rows) == 1:
|
|
272
285
|
dir_record = schema.Dir(**rows[0]._mapping)
|
|
273
286
|
return Dir(dir_record.id, dir_record.parent_id, name)
|
|
274
287
|
|
|
275
288
|
# check for table
|
|
276
|
-
q = sql.select(schema.Table.id).where(
|
|
289
|
+
q = sql.select(schema.Table.id).where(
|
|
290
|
+
schema.Table.dir_id == dir_id,
|
|
291
|
+
schema.Table.md['name'].astext == name,
|
|
292
|
+
schema.Table.md['user'].astext == user,
|
|
293
|
+
)
|
|
277
294
|
if for_update:
|
|
278
295
|
q = q.with_for_update()
|
|
279
|
-
# _debug_print(for_update, f'table name={name!r} parent={dir_id}')
|
|
280
296
|
tbl_id = conn.execute(q).scalar_one_or_none()
|
|
281
297
|
if tbl_id is not None:
|
|
282
|
-
if not
|
|
298
|
+
if tbl_id not in self._tbls:
|
|
283
299
|
self._tbls[tbl_id] = self._load_tbl(tbl_id)
|
|
284
300
|
return self._tbls[tbl_id]
|
|
285
301
|
|
|
@@ -304,26 +320,32 @@ class Catalog:
|
|
|
304
320
|
if path.is_root:
|
|
305
321
|
# the root dir
|
|
306
322
|
if expected is not None and expected is not Dir:
|
|
307
|
-
raise excs.Error(
|
|
323
|
+
raise excs.Error(
|
|
324
|
+
f'{str(path)!r} needs to be a {expected._display_name()} but is a {Dir._display_name()}'
|
|
325
|
+
)
|
|
308
326
|
dir = self._get_dir(path, for_update=for_update)
|
|
327
|
+
if dir is None:
|
|
328
|
+
raise excs.Error(f'Unknown user: {Env.get().user}')
|
|
309
329
|
return Dir(dir.id, dir.parent_id, dir.md['name'])
|
|
310
330
|
|
|
311
331
|
parent_path = path.parent
|
|
312
332
|
parent_dir = self._get_dir(parent_path, for_update=False)
|
|
313
333
|
if parent_dir is None:
|
|
314
|
-
raise excs.Error(f'Directory {parent_path!r} does not exist')
|
|
334
|
+
raise excs.Error(f'Directory {str(parent_path)!r} does not exist.')
|
|
315
335
|
obj = self._get_dir_entry(parent_dir.id, path.name, for_update=for_update)
|
|
316
336
|
|
|
317
337
|
if obj is None and raise_if_not_exists:
|
|
318
|
-
raise excs.Error(f'Path {path!r} does not exist')
|
|
338
|
+
raise excs.Error(f'Path {str(path)!r} does not exist.')
|
|
319
339
|
elif obj is not None and raise_if_exists:
|
|
320
|
-
raise excs.Error(f'Path {path!r} is an existing {type(obj)._display_name()}')
|
|
340
|
+
raise excs.Error(f'Path {str(path)!r} is an existing {type(obj)._display_name()}.')
|
|
321
341
|
elif obj is not None and expected is not None and not isinstance(obj, expected):
|
|
322
|
-
raise excs.Error(
|
|
342
|
+
raise excs.Error(
|
|
343
|
+
f'{str(path)!r} needs to be a {expected._display_name()} but is a {type(obj)._display_name()}.'
|
|
344
|
+
)
|
|
323
345
|
return obj
|
|
324
346
|
|
|
325
347
|
def get_table_by_id(self, tbl_id: UUID) -> Optional[Table]:
|
|
326
|
-
if not
|
|
348
|
+
if tbl_id not in self._tbls:
|
|
327
349
|
tbl = self._load_tbl(tbl_id)
|
|
328
350
|
if tbl is None:
|
|
329
351
|
return None
|
|
@@ -409,6 +431,155 @@ class Catalog:
|
|
|
409
431
|
self._tbls[view._id] = view
|
|
410
432
|
return view
|
|
411
433
|
|
|
434
|
+
@_retry_loop
|
|
435
|
+
def create_replica(self, path: Path, md: list[schema.FullTableMd], if_exists: IfExistsParam) -> Table:
|
|
436
|
+
"""
|
|
437
|
+
Creates table, table_version, and table_schema_version records for a replica with the given metadata.
|
|
438
|
+
The metadata should be presented in standard "ancestor order", with the table being replicated at
|
|
439
|
+
list position 0 and the (root) base table at list position -1.
|
|
440
|
+
"""
|
|
441
|
+
tbl_id = UUID(md[0].tbl_md.tbl_id)
|
|
442
|
+
|
|
443
|
+
# First handle path collisions (if_exists='ignore' or 'replace' or etc).
|
|
444
|
+
existing = self._handle_path_collision(path, View, False, if_exists)
|
|
445
|
+
if existing is not None:
|
|
446
|
+
if existing._id != tbl_id:
|
|
447
|
+
raise excs.Error(
|
|
448
|
+
f"An attempt was made to create a replica table at {path!r} with if_exists='ignore', "
|
|
449
|
+
'but a different table already exists at that location.'
|
|
450
|
+
)
|
|
451
|
+
assert isinstance(existing, View)
|
|
452
|
+
return existing
|
|
453
|
+
|
|
454
|
+
# Ensure that the system directory exists.
|
|
455
|
+
self._create_dir(Path('_system', allow_system_paths=True), if_exists=IfExistsParam.IGNORE, parents=False)
|
|
456
|
+
|
|
457
|
+
# Now check to see if this table already exists in the catalog.
|
|
458
|
+
# TODO: Handle concurrency in create_replica()
|
|
459
|
+
existing = Catalog.get().get_table_by_id(tbl_id)
|
|
460
|
+
if existing is not None:
|
|
461
|
+
existing_path = Path(existing._path(), allow_system_paths=True)
|
|
462
|
+
# It does exist. If it's a non-system table, that's an error: it's already been replicated.
|
|
463
|
+
if not existing_path.is_system_path:
|
|
464
|
+
raise excs.Error(
|
|
465
|
+
f'That table has already been replicated as {existing._path()!r}. \n'
|
|
466
|
+
f'Drop the existing replica if you wish to re-create it.'
|
|
467
|
+
)
|
|
468
|
+
# If it's a system table, then this means it was created at some point as the ancestor of some other
|
|
469
|
+
# table (a snapshot-over-snapshot scenario). In that case, we simply move it to the new (named) location.
|
|
470
|
+
self._move(existing_path, path)
|
|
471
|
+
|
|
472
|
+
# Now store the metadata for this replica. In the case where the table already exists (and was just moved
|
|
473
|
+
# into a named location), this will be a no-op, but it still serves to validate that the newly received
|
|
474
|
+
# metadata is identical to what's in the catalog.
|
|
475
|
+
self.__store_replica_md(path, md[0])
|
|
476
|
+
|
|
477
|
+
# Now store the metadata for all of this table's proper ancestors. If one or more proper ancestors
|
|
478
|
+
# do not yet exist in the store, they will be created as anonymous system tables.
|
|
479
|
+
for ancestor_md in md[1:]:
|
|
480
|
+
ancestor_id = UUID(ancestor_md.tbl_md.tbl_id)
|
|
481
|
+
replica = Catalog.get().get_table_by_id(ancestor_id)
|
|
482
|
+
replica_path: Path
|
|
483
|
+
if replica is None:
|
|
484
|
+
# We've never seen this table before. Create a new anonymous system table for it.
|
|
485
|
+
replica_path = Path(f'_system.replica_{ancestor_id.hex}', allow_system_paths=True)
|
|
486
|
+
else:
|
|
487
|
+
# The table already exists in the catalog. The existing path might be a system path (if the table
|
|
488
|
+
# was created as an anonymous base table of some other table), or it might not (if it's a snapshot
|
|
489
|
+
# that was directly replicated by the user at some point). In either case, use the existing path.
|
|
490
|
+
replica_path = Path(replica._path(), allow_system_paths=True)
|
|
491
|
+
|
|
492
|
+
# Store the metadata; it could be a new version (in which case a new record will be created) or a
|
|
493
|
+
# known version (in which case the newly received metadata will be validated as identical).
|
|
494
|
+
self.__store_replica_md(replica_path, ancestor_md)
|
|
495
|
+
|
|
496
|
+
# Update the catalog (as a final step, after all DB operations completed successfully).
|
|
497
|
+
# Only the table being replicated is actually made visible in the catalog.
|
|
498
|
+
self._tbls[tbl_id] = self._load_tbl(tbl_id)
|
|
499
|
+
return self._tbls[tbl_id]
|
|
500
|
+
|
|
501
|
+
def __store_replica_md(self, path: Path, md: schema.FullTableMd) -> None:
|
|
502
|
+
_logger.info(f'Creating replica table at {path!r} with ID: {md.tbl_md.tbl_id}')
|
|
503
|
+
# TODO: Handle concurrency
|
|
504
|
+
dir = self._get_schema_object(path.parent, expected=Dir, raise_if_not_exists=True)
|
|
505
|
+
assert dir is not None
|
|
506
|
+
|
|
507
|
+
conn = Env.get().conn
|
|
508
|
+
tbl_id = md.tbl_md.tbl_id
|
|
509
|
+
|
|
510
|
+
new_tbl_md: Optional[schema.TableMd] = None
|
|
511
|
+
new_version_md: Optional[schema.TableVersionMd] = None
|
|
512
|
+
new_schema_version_md: Optional[schema.TableSchemaVersionMd] = None
|
|
513
|
+
|
|
514
|
+
# We need to ensure that the table metadata in the catalog always reflects the latest observed version of
|
|
515
|
+
# this table. (In particular, if this is a base table, then its table metadata need to be consistent
|
|
516
|
+
# with the latest version of this table having a replicated view somewhere in the catalog.)
|
|
517
|
+
q: sql.Executable = sql.select(schema.Table.md).where(schema.Table.id == tbl_id)
|
|
518
|
+
existing_md_row = conn.execute(q).one_or_none()
|
|
519
|
+
|
|
520
|
+
if existing_md_row is None:
|
|
521
|
+
# No existing table, so create a new record.
|
|
522
|
+
q = sql.insert(schema.Table.__table__).values(
|
|
523
|
+
id=tbl_id,
|
|
524
|
+
dir_id=dir._id,
|
|
525
|
+
md=dataclasses.asdict(
|
|
526
|
+
dataclasses.replace(md.tbl_md, name=path.name, user=Env.get().user, is_replica=True)
|
|
527
|
+
),
|
|
528
|
+
)
|
|
529
|
+
conn.execute(q)
|
|
530
|
+
else:
|
|
531
|
+
assert existing_md_row.md['is_replica']
|
|
532
|
+
if md.tbl_md.current_version > existing_md_row.md['current_version']:
|
|
533
|
+
# New metadata is more recent than the metadata currently stored in the DB; we'll update the record
|
|
534
|
+
# in place in the DB.
|
|
535
|
+
new_tbl_md = dataclasses.replace(md.tbl_md, name=path.name, user=Env.get().user, is_replica=True)
|
|
536
|
+
|
|
537
|
+
# Now see if a TableVersion record already exists in the DB for this table version. If not, insert it. If
|
|
538
|
+
# it already exists, check that the existing record is identical to the new one.
|
|
539
|
+
q = (
|
|
540
|
+
sql.select(schema.TableVersion.md)
|
|
541
|
+
.where(schema.TableVersion.tbl_id == tbl_id)
|
|
542
|
+
.where(sql.text(f"({schema.TableVersion.__table__}.md->>'version')::int = {md.version_md.version}"))
|
|
543
|
+
)
|
|
544
|
+
existing_version_md_row = conn.execute(q).one_or_none()
|
|
545
|
+
if existing_version_md_row is None:
|
|
546
|
+
new_version_md = md.version_md
|
|
547
|
+
else:
|
|
548
|
+
existing_version_md = schema.md_from_dict(schema.TableVersionMd, existing_version_md_row.md)
|
|
549
|
+
if existing_version_md != md.version_md:
|
|
550
|
+
raise excs.Error(
|
|
551
|
+
f'The version metadata for the replica {path!r}:{md.version_md.version} is inconsistent with '
|
|
552
|
+
'the metadata recorded from a prior replica.\n'
|
|
553
|
+
'This is likely due to data corruption in the replicated table.'
|
|
554
|
+
)
|
|
555
|
+
|
|
556
|
+
# Do the same thing for TableSchemaVersion.
|
|
557
|
+
q = (
|
|
558
|
+
sql.select(schema.TableSchemaVersion.md)
|
|
559
|
+
.where(schema.TableSchemaVersion.tbl_id == tbl_id)
|
|
560
|
+
.where(
|
|
561
|
+
sql.text(
|
|
562
|
+
f"({schema.TableSchemaVersion.__table__}.md->>'schema_version')::int = "
|
|
563
|
+
f'{md.schema_version_md.schema_version}'
|
|
564
|
+
)
|
|
565
|
+
)
|
|
566
|
+
)
|
|
567
|
+
existing_schema_version_md_row = conn.execute(q).one_or_none()
|
|
568
|
+
if existing_schema_version_md_row is None:
|
|
569
|
+
new_schema_version_md = md.schema_version_md
|
|
570
|
+
else:
|
|
571
|
+
existing_schema_version_md = schema.md_from_dict(
|
|
572
|
+
schema.TableSchemaVersionMd, existing_schema_version_md_row.md
|
|
573
|
+
)
|
|
574
|
+
if existing_schema_version_md != md.schema_version_md:
|
|
575
|
+
raise excs.Error(
|
|
576
|
+
f'The schema version metadata for the replica {path!r}:{md.schema_version_md.schema_version} '
|
|
577
|
+
'is inconsistent with the metadata recorded from a prior replica.\n'
|
|
578
|
+
'This is likely due to data corruption in the replicated table.'
|
|
579
|
+
)
|
|
580
|
+
|
|
581
|
+
self.store_tbl_md(UUID(tbl_id), new_tbl_md, new_version_md, new_schema_version_md)
|
|
582
|
+
|
|
412
583
|
@_retry_loop
|
|
413
584
|
def get_table(self, path: Path) -> Table:
|
|
414
585
|
obj = Catalog.get()._get_schema_object(path, expected=Table, raise_if_not_exists=True)
|
|
@@ -467,6 +638,9 @@ class Catalog:
|
|
|
467
638
|
|
|
468
639
|
@_retry_loop
|
|
469
640
|
def create_dir(self, path: Path, if_exists: IfExistsParam, parents: bool) -> Dir:
|
|
641
|
+
return self._create_dir(path, if_exists, parents)
|
|
642
|
+
|
|
643
|
+
def _create_dir(self, path: Path, if_exists: IfExistsParam, parents: bool) -> Dir:
|
|
470
644
|
# existing = self._handle_path_collision(path, Dir, False, if_exists)
|
|
471
645
|
# if existing is not None:
|
|
472
646
|
# assert isinstance(existing, Dir)
|
|
@@ -475,7 +649,7 @@ class Catalog:
|
|
|
475
649
|
# parent = self._get_schema_object(path.parent)
|
|
476
650
|
# assert parent is not None
|
|
477
651
|
# dir = Dir._create(parent._id, path.name)
|
|
478
|
-
# Env.get().console_logger.info(f'Created directory {path!r}.')
|
|
652
|
+
# Env.get().console_logger.info(f'Created directory {str(path)!r}.')
|
|
479
653
|
# return dir
|
|
480
654
|
|
|
481
655
|
if parents:
|
|
@@ -519,10 +693,11 @@ class Catalog:
|
|
|
519
693
|
q = sql.select(sql.func.count()).select_from(schema.Table).where(schema.Table.dir_id == dir_id)
|
|
520
694
|
num_tbls = conn.execute(q).scalar()
|
|
521
695
|
if num_subdirs + num_tbls > 0:
|
|
522
|
-
raise excs.Error(f'Directory {dir_path!r} is not empty.')
|
|
696
|
+
raise excs.Error(f'Directory {str(dir_path)!r} is not empty.')
|
|
523
697
|
|
|
524
698
|
# drop existing subdirs
|
|
525
|
-
|
|
699
|
+
self._lock_dir(dir_id, None, None)
|
|
700
|
+
dir_q = sql.select(schema.Dir).where(schema.Dir.parent_id == dir_id)
|
|
526
701
|
for row in conn.execute(dir_q).all():
|
|
527
702
|
self._drop_dir(row.id, dir_path.append(row.md['name']), force=True)
|
|
528
703
|
|
|
@@ -535,7 +710,6 @@ class Catalog:
|
|
|
535
710
|
self._drop_tbl(tbl, force=True, is_replace=False)
|
|
536
711
|
|
|
537
712
|
# self.drop_dir(dir_id)
|
|
538
|
-
# _debug_print(for_update=True, msg=f'drop dir id={dir_id}')
|
|
539
713
|
conn.execute(sql.delete(schema.Dir).where(schema.Dir.id == dir_id))
|
|
540
714
|
_logger.info(f'Removed directory {str(dir_path)!r}.')
|
|
541
715
|
|
|
@@ -545,18 +719,17 @@ class Catalog:
|
|
|
545
719
|
q = sql.select(schema.Table.id).where(sql.text(f"md->'view_md'->'base_versions'->0->>0 = {tbl_id.hex!r}"))
|
|
546
720
|
if for_update:
|
|
547
721
|
q = q.with_for_update()
|
|
548
|
-
# _debug_print(for_update=False, msg=f'views of tbl id={tbl_id}')
|
|
549
722
|
result = [r[0] for r in conn.execute(q).all()]
|
|
550
723
|
return result
|
|
551
724
|
|
|
552
725
|
def get_tbl_version(self, tbl_id: UUID, effective_version: Optional[int]) -> Optional[TableVersion]:
|
|
553
726
|
if (tbl_id, effective_version) not in self._tbl_versions:
|
|
554
|
-
self._tbl_versions[
|
|
555
|
-
return self._tbl_versions[
|
|
727
|
+
self._tbl_versions[tbl_id, effective_version] = self._load_tbl_version(tbl_id, effective_version)
|
|
728
|
+
return self._tbl_versions[tbl_id, effective_version]
|
|
556
729
|
|
|
557
730
|
def add_tbl_version(self, tbl_version: TableVersion) -> None:
|
|
558
731
|
"""Explicitly add a TableVersion"""
|
|
559
|
-
self._tbl_versions[
|
|
732
|
+
self._tbl_versions[tbl_version.id, tbl_version.effective_version] = tbl_version
|
|
560
733
|
# if this is a mutable view, also record it in the base
|
|
561
734
|
if tbl_version.is_view and tbl_version.effective_version is None:
|
|
562
735
|
base = tbl_version.base.get()
|
|
@@ -564,15 +737,14 @@ class Catalog:
|
|
|
564
737
|
|
|
565
738
|
def remove_tbl_version(self, tbl_version: TableVersion) -> None:
|
|
566
739
|
assert (tbl_version.id, tbl_version.effective_version) in self._tbl_versions
|
|
567
|
-
del self._tbl_versions[
|
|
740
|
+
del self._tbl_versions[tbl_version.id, tbl_version.effective_version]
|
|
568
741
|
|
|
569
742
|
def get_dir(self, dir_id: UUID, for_update: bool = False) -> Optional[Dir]:
|
|
570
743
|
"""Return the Dir with the given id, or None if it doesn't exist"""
|
|
571
744
|
conn = Env.get().conn
|
|
572
|
-
q = sql.select(schema.Dir).where(schema.Dir.id == dir_id)
|
|
573
745
|
if for_update:
|
|
574
|
-
|
|
575
|
-
|
|
746
|
+
self._lock_dir(None, dir_id, None)
|
|
747
|
+
q = sql.select(schema.Dir).where(schema.Dir.id == dir_id)
|
|
576
748
|
row = conn.execute(q).one_or_none()
|
|
577
749
|
if row is None:
|
|
578
750
|
return None
|
|
@@ -581,28 +753,27 @@ class Catalog:
|
|
|
581
753
|
|
|
582
754
|
def _get_dir(self, path: Path, for_update: bool = False) -> Optional[schema.Dir]:
|
|
583
755
|
"""
|
|
584
|
-
Locking protocol:
|
|
585
|
-
- S locks on all ancestors
|
|
586
|
-
- X lock on dir if for_update == True, otherwise also an S lock
|
|
756
|
+
Locking protocol: X locks on all ancestors
|
|
587
757
|
"""
|
|
758
|
+
user = Env.get().user
|
|
588
759
|
conn = Env.get().conn
|
|
589
760
|
if path.is_root:
|
|
590
|
-
q = sql.select(schema.Dir).where(schema.Dir.parent_id.is_(None))
|
|
591
761
|
if for_update:
|
|
592
|
-
|
|
593
|
-
|
|
594
|
-
row = conn.execute(q).
|
|
595
|
-
return schema.Dir(**row._mapping)
|
|
762
|
+
self._lock_dir(parent_id=None, dir_id=None, dir_name='')
|
|
763
|
+
q = sql.select(schema.Dir).where(schema.Dir.parent_id.is_(None), schema.Dir.md['user'].astext == user)
|
|
764
|
+
row = conn.execute(q).one_or_none()
|
|
765
|
+
return schema.Dir(**row._mapping) if row is not None else None
|
|
596
766
|
else:
|
|
597
767
|
parent_dir = self._get_dir(path.parent, for_update=False)
|
|
598
768
|
if parent_dir is None:
|
|
599
769
|
return None
|
|
770
|
+
if for_update:
|
|
771
|
+
self._lock_dir(parent_id=parent_dir.id, dir_id=None, dir_name=path.name)
|
|
600
772
|
q = sql.select(schema.Dir).where(
|
|
601
|
-
schema.Dir.parent_id == parent_dir.id,
|
|
773
|
+
schema.Dir.parent_id == parent_dir.id,
|
|
774
|
+
schema.Dir.md['name'].astext == path.name,
|
|
775
|
+
schema.Dir.md['user'].astext == user,
|
|
602
776
|
)
|
|
603
|
-
if for_update:
|
|
604
|
-
q = q.with_for_update()
|
|
605
|
-
# _debug_print(for_update, f'dir {str(path)}')
|
|
606
777
|
row = conn.execute(q).one_or_none()
|
|
607
778
|
return schema.Dir(**row._mapping) if row is not None else None
|
|
608
779
|
|
|
@@ -625,7 +796,6 @@ class Catalog:
|
|
|
625
796
|
)
|
|
626
797
|
.where(schema.Table.id == tbl_id)
|
|
627
798
|
)
|
|
628
|
-
# _debug_print(for_update=False, msg=f'load table id={tbl_id!r}')
|
|
629
799
|
row = conn.execute(q).one_or_none()
|
|
630
800
|
if row is None:
|
|
631
801
|
return None
|
|
@@ -636,7 +806,7 @@ class Catalog:
|
|
|
636
806
|
if view_md is None:
|
|
637
807
|
# this is a base table
|
|
638
808
|
if (tbl_id, None) not in self._tbl_versions:
|
|
639
|
-
self._tbl_versions[
|
|
809
|
+
self._tbl_versions[tbl_id, None] = self._load_tbl_version(tbl_id, None)
|
|
640
810
|
tbl = InsertableTable(tbl_record.dir_id, TableVersionHandle(tbl_id, None))
|
|
641
811
|
return tbl
|
|
642
812
|
|
|
@@ -657,20 +827,26 @@ class Catalog:
|
|
|
657
827
|
view_path: Optional[TableVersionPath] = None
|
|
658
828
|
for id, effective_version in tbl_version_path[::-1]:
|
|
659
829
|
if (id, effective_version) not in self._tbl_versions:
|
|
660
|
-
self._tbl_versions[
|
|
830
|
+
self._tbl_versions[id, effective_version] = self._load_tbl_version(id, effective_version)
|
|
661
831
|
view_path = TableVersionPath(TableVersionHandle(id, effective_version), base=base_path)
|
|
662
832
|
base_path = view_path
|
|
663
833
|
view = View(tbl_id, tbl_record.dir_id, tbl_md.name, view_path, snapshot_only=pure_snapshot)
|
|
664
834
|
# TODO: also load mutable views
|
|
665
835
|
return view
|
|
666
836
|
|
|
667
|
-
def
|
|
668
|
-
|
|
837
|
+
def load_tbl_md(self, tbl_id: UUID, effective_version: Optional[int]) -> schema.FullTableMd:
|
|
838
|
+
"""
|
|
839
|
+
Loads metadata from the store for a given table UUID and version.
|
|
840
|
+
"""
|
|
841
|
+
_logger.info(f'Loading metadata for table version: {tbl_id}:{effective_version}')
|
|
669
842
|
conn = Env.get().conn
|
|
843
|
+
|
|
670
844
|
q = (
|
|
671
|
-
sql.select(schema.Table, schema.TableSchemaVersion)
|
|
845
|
+
sql.select(schema.Table, schema.TableVersion, schema.TableSchemaVersion)
|
|
672
846
|
.select_from(schema.Table)
|
|
673
847
|
.where(schema.Table.id == tbl_id)
|
|
848
|
+
.join(schema.TableVersion)
|
|
849
|
+
.where(schema.TableVersion.tbl_id == tbl_id)
|
|
674
850
|
.join(schema.TableSchemaVersion)
|
|
675
851
|
.where(schema.TableSchemaVersion.tbl_id == tbl_id)
|
|
676
852
|
)
|
|
@@ -682,16 +858,11 @@ class Catalog:
|
|
|
682
858
|
# JOIN TableVersion tv ON (tv.tbl_id = tbl_id AND tv.version = effective_version)
|
|
683
859
|
# JOIN TableSchemaVersion tsv ON (tsv.tbl_id = tbl_id AND tv.md.schema_version = tsv.schema_version)
|
|
684
860
|
# WHERE t.id = tbl_id
|
|
685
|
-
q = (
|
|
686
|
-
|
|
687
|
-
|
|
688
|
-
|
|
689
|
-
|
|
690
|
-
sql.text(
|
|
691
|
-
(
|
|
692
|
-
f"({schema.TableVersion.__table__}.md->>'schema_version')::int = "
|
|
693
|
-
f'{schema.TableSchemaVersion.__table__}.{schema.TableSchemaVersion.schema_version.name}'
|
|
694
|
-
)
|
|
861
|
+
q = q.where(sql.text(f"({schema.TableVersion.__table__}.md->>'version')::int = {effective_version}")).where(
|
|
862
|
+
sql.text(
|
|
863
|
+
(
|
|
864
|
+
f"({schema.TableVersion.__table__}.md->>'schema_version')::int = "
|
|
865
|
+
f'{schema.TableSchemaVersion.__table__}.{schema.TableSchemaVersion.schema_version.name}'
|
|
695
866
|
)
|
|
696
867
|
)
|
|
697
868
|
)
|
|
@@ -699,9 +870,15 @@ class Catalog:
|
|
|
699
870
|
# we are loading the current version
|
|
700
871
|
# SELECT *
|
|
701
872
|
# FROM Table t
|
|
873
|
+
# JOIN TableVersion tv ON (tv.tbl_id = tbl_id AND t.current_version = tv.version)
|
|
702
874
|
# JOIN TableSchemaVersion tsv ON (tsv.tbl_id = tbl_id AND t.current_schema_version = tsv.schema_version)
|
|
703
875
|
# WHERE t.id = tbl_id
|
|
704
876
|
q = q.where(
|
|
877
|
+
sql.text(
|
|
878
|
+
f"({schema.Table.__table__}.md->>'current_version')::int = "
|
|
879
|
+
f'{schema.TableVersion.__table__}.{schema.TableVersion.version.name}'
|
|
880
|
+
)
|
|
881
|
+
).where(
|
|
705
882
|
sql.text(
|
|
706
883
|
(
|
|
707
884
|
f"({schema.Table.__table__}.md->>'current_schema_version')::int = "
|
|
@@ -711,11 +888,100 @@ class Catalog:
|
|
|
711
888
|
)
|
|
712
889
|
|
|
713
890
|
row = conn.execute(q).one_or_none()
|
|
714
|
-
|
|
891
|
+
assert row is not None, f'Table record not found: {tbl_id}:{effective_version}'
|
|
892
|
+
tbl_record, version_record, schema_version_record = _unpack_row(
|
|
893
|
+
row, [schema.Table, schema.TableVersion, schema.TableSchemaVersion]
|
|
894
|
+
)
|
|
895
|
+
assert tbl_record.id == tbl_id
|
|
715
896
|
tbl_md = schema.md_from_dict(schema.TableMd, tbl_record.md)
|
|
897
|
+
version_md = schema.md_from_dict(schema.TableVersionMd, version_record.md)
|
|
716
898
|
schema_version_md = schema.md_from_dict(schema.TableSchemaVersionMd, schema_version_record.md)
|
|
899
|
+
|
|
900
|
+
return schema.FullTableMd(tbl_md, version_md, schema_version_md)
|
|
901
|
+
|
|
902
|
+
def store_tbl_md(
|
|
903
|
+
self,
|
|
904
|
+
tbl_id: UUID,
|
|
905
|
+
tbl_md: Optional[schema.TableMd],
|
|
906
|
+
version_md: Optional[schema.TableVersionMd],
|
|
907
|
+
schema_version_md: Optional[schema.TableSchemaVersionMd],
|
|
908
|
+
) -> None:
|
|
909
|
+
"""
|
|
910
|
+
Stores metadata to the DB. If specified, `tbl_md` will be updated in place (only one such record can exist
|
|
911
|
+
per UUID); `version_md` and `schema_version_md` will be inserted as new records.
|
|
912
|
+
|
|
913
|
+
If inserting `version_md` or `schema_version_md` would be a primary key violation, an exception will be raised.
|
|
914
|
+
"""
|
|
915
|
+
conn = Env.get().conn
|
|
916
|
+
|
|
917
|
+
if tbl_md is not None:
|
|
918
|
+
result = conn.execute(
|
|
919
|
+
sql.update(schema.Table.__table__)
|
|
920
|
+
.values({schema.Table.md: dataclasses.asdict(tbl_md)})
|
|
921
|
+
.where(schema.Table.id == tbl_id)
|
|
922
|
+
)
|
|
923
|
+
assert result.rowcount == 1, result.rowcount
|
|
924
|
+
|
|
925
|
+
if version_md is not None:
|
|
926
|
+
conn.execute(
|
|
927
|
+
sql.insert(schema.TableVersion.__table__).values(
|
|
928
|
+
tbl_id=tbl_id, version=version_md.version, md=dataclasses.asdict(version_md)
|
|
929
|
+
)
|
|
930
|
+
)
|
|
931
|
+
|
|
932
|
+
if schema_version_md is not None:
|
|
933
|
+
conn.execute(
|
|
934
|
+
sql.insert(schema.TableSchemaVersion.__table__).values(
|
|
935
|
+
tbl_id=tbl_id,
|
|
936
|
+
schema_version=schema_version_md.schema_version,
|
|
937
|
+
md=dataclasses.asdict(schema_version_md),
|
|
938
|
+
)
|
|
939
|
+
)
|
|
940
|
+
|
|
941
|
+
def delete_tbl_md(self, tbl_id: UUID) -> None:
|
|
942
|
+
"""
|
|
943
|
+
Deletes all table metadata from the store for the given table UUID.
|
|
944
|
+
"""
|
|
945
|
+
conn = Env.get().conn
|
|
946
|
+
conn.execute(sql.delete(schema.TableSchemaVersion.__table__).where(schema.TableSchemaVersion.tbl_id == tbl_id))
|
|
947
|
+
conn.execute(sql.delete(schema.TableVersion.__table__).where(schema.TableVersion.tbl_id == tbl_id))
|
|
948
|
+
conn.execute(sql.delete(schema.Table.__table__).where(schema.Table.id == tbl_id))
|
|
949
|
+
|
|
950
|
+
def load_replica_md(self, tbl: Table) -> list[schema.FullTableMd]:
|
|
951
|
+
"""
|
|
952
|
+
Load metadata for the given table along with all its ancestors. The values of TableMd.current_version and
|
|
953
|
+
TableMd.current_schema_version will be adjusted to ensure that the metadata represent a valid (internally
|
|
954
|
+
consistent) table state.
|
|
955
|
+
"""
|
|
956
|
+
# TODO: First acquire X-locks for all relevant metadata entries
|
|
957
|
+
|
|
958
|
+
# Load metadata for every table in the TableVersionPath for `tbl`.
|
|
959
|
+
md = [self.load_tbl_md(tv.id, tv.effective_version) for tv in tbl._tbl_version_path.get_tbl_versions()]
|
|
960
|
+
|
|
961
|
+
# If `tbl` is a named pure snapshot, we're not quite done, since the snapshot metadata won't appear in the
|
|
962
|
+
# TableVersionPath. We need to prepend it separately.
|
|
963
|
+
if tbl._id != tbl._tbl_version.id:
|
|
964
|
+
snapshot_md = self.load_tbl_md(tbl._id, 0)
|
|
965
|
+
md = [snapshot_md, *md]
|
|
966
|
+
|
|
967
|
+
for ancestor_md in md[1:]:
|
|
968
|
+
# For replica metadata, we guarantee that the current_version and current_schema_version of TableMd
|
|
969
|
+
# match the corresponding values in TableVersionMd and TableSchemaVersionMd. This is to ensure that,
|
|
970
|
+
# when the metadata is later stored in the catalog of a different Pixeltable instance, the values of
|
|
971
|
+
# current_version and current_schema_version will always point to versions that are known to the
|
|
972
|
+
# destination catalog.
|
|
973
|
+
ancestor_md.tbl_md.current_version = ancestor_md.version_md.version
|
|
974
|
+
ancestor_md.tbl_md.current_schema_version = ancestor_md.schema_version_md.schema_version
|
|
975
|
+
|
|
976
|
+
return md
|
|
977
|
+
|
|
978
|
+
def _load_tbl_version(self, tbl_id: UUID, effective_version: Optional[int]) -> Optional[TableVersion]:
|
|
979
|
+
tbl_md, _, schema_version_md = self.load_tbl_md(tbl_id, effective_version)
|
|
717
980
|
view_md = tbl_md.view_md
|
|
718
981
|
|
|
982
|
+
_logger.info(f'Loading table version: {tbl_id}:{effective_version}')
|
|
983
|
+
conn = Env.get().conn
|
|
984
|
+
|
|
719
985
|
# load mutable view ids
|
|
720
986
|
q = sql.select(schema.Table.id).where(
|
|
721
987
|
sql.text(
|
|
@@ -729,7 +995,7 @@ class Catalog:
|
|
|
729
995
|
if view_md is None:
|
|
730
996
|
# this is a base table
|
|
731
997
|
tbl_version = TableVersion(
|
|
732
|
-
|
|
998
|
+
tbl_id, tbl_md, effective_version, schema_version_md, mutable_views=mutable_views
|
|
733
999
|
)
|
|
734
1000
|
return tbl_version
|
|
735
1001
|
|
|
@@ -746,7 +1012,7 @@ class Catalog:
|
|
|
746
1012
|
base = base_path.tbl_version
|
|
747
1013
|
|
|
748
1014
|
tbl_version = TableVersion(
|
|
749
|
-
|
|
1015
|
+
tbl_id,
|
|
750
1016
|
tbl_md,
|
|
751
1017
|
effective_version,
|
|
752
1018
|
schema_version_md,
|
|
@@ -758,16 +1024,25 @@ class Catalog:
|
|
|
758
1024
|
|
|
759
1025
|
def _init_store(self) -> None:
|
|
760
1026
|
"""One-time initialization of the stored catalog. Idempotent."""
|
|
1027
|
+
self.create_user(None)
|
|
1028
|
+
_logger.info('Initialized catalog.')
|
|
1029
|
+
|
|
1030
|
+
def create_user(self, user: Optional[str]) -> None:
|
|
1031
|
+
"""
|
|
1032
|
+
Creates a catalog record (root directory) for the specified user, if one does not already exist.
|
|
1033
|
+
"""
|
|
761
1034
|
with Env.get().begin_xact():
|
|
762
1035
|
session = Env.get().session
|
|
763
|
-
if
|
|
1036
|
+
# See if there are any directories in the catalog matching the specified user.
|
|
1037
|
+
if session.query(schema.Dir).where(schema.Dir.md['user'].astext == user).count() > 0:
|
|
1038
|
+
# At least one such directory exists; no need to create a new one.
|
|
764
1039
|
return
|
|
765
|
-
|
|
766
|
-
dir_md = schema.DirMd(name='', user=
|
|
1040
|
+
|
|
1041
|
+
dir_md = schema.DirMd(name='', user=user, additional_md={})
|
|
767
1042
|
dir_record = schema.Dir(parent_id=None, md=dataclasses.asdict(dir_md))
|
|
768
1043
|
session.add(dir_record)
|
|
769
1044
|
session.flush()
|
|
770
|
-
_logger.info(f'
|
|
1045
|
+
_logger.info(f'Added root directory record for user: {user!r}')
|
|
771
1046
|
|
|
772
1047
|
def _handle_path_collision(
|
|
773
1048
|
self, path: Path, expected_obj_type: type[SchemaObject], expected_snapshot: bool, if_exists: IfExistsParam
|
|
@@ -775,13 +1050,14 @@ class Catalog:
|
|
|
775
1050
|
obj, _, _ = self._prepare_dir_op(add_dir_path=path.parent, add_name=path.name)
|
|
776
1051
|
|
|
777
1052
|
if if_exists == IfExistsParam.ERROR and obj is not None:
|
|
778
|
-
raise excs.Error(f'Path {path!r} is an existing {type(obj)._display_name()}')
|
|
1053
|
+
raise excs.Error(f'Path {str(path)!r} is an existing {type(obj)._display_name()}')
|
|
779
1054
|
else:
|
|
780
1055
|
is_snapshot = isinstance(obj, View) and obj._tbl_version_path.is_snapshot()
|
|
781
1056
|
if obj is not None and (not isinstance(obj, expected_obj_type) or (expected_snapshot and not is_snapshot)):
|
|
782
1057
|
obj_type_str = 'snapshot' if expected_snapshot else expected_obj_type._display_name()
|
|
783
1058
|
raise excs.Error(
|
|
784
|
-
f'Path {path!r} already exists but is not a {obj_type_str}.
|
|
1059
|
+
f'Path {str(path)!r} already exists but is not a {obj_type_str}. '
|
|
1060
|
+
f'Cannot {if_exists.name.lower()} it.'
|
|
785
1061
|
)
|
|
786
1062
|
|
|
787
1063
|
if obj is None:
|
|
@@ -794,7 +1070,8 @@ class Catalog:
|
|
|
794
1070
|
dir_contents = self._get_dir_contents(obj._id)
|
|
795
1071
|
if len(dir_contents) > 0 and if_exists == IfExistsParam.REPLACE:
|
|
796
1072
|
raise excs.Error(
|
|
797
|
-
f'Directory {path!r} already exists and is not empty.
|
|
1073
|
+
f'Directory {str(path)!r} already exists and is not empty. '
|
|
1074
|
+
'Use `if_exists="replace_force"` to replace it.'
|
|
798
1075
|
)
|
|
799
1076
|
self._drop_dir(obj._id, path, force=True)
|
|
800
1077
|
else:
|