pixeltable 0.2.26__py3-none-any.whl → 0.5.7__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- pixeltable/__init__.py +83 -19
- pixeltable/_query.py +1444 -0
- pixeltable/_version.py +1 -0
- pixeltable/catalog/__init__.py +7 -4
- pixeltable/catalog/catalog.py +2394 -119
- pixeltable/catalog/column.py +225 -104
- pixeltable/catalog/dir.py +38 -9
- pixeltable/catalog/globals.py +53 -34
- pixeltable/catalog/insertable_table.py +265 -115
- pixeltable/catalog/path.py +80 -17
- pixeltable/catalog/schema_object.py +28 -43
- pixeltable/catalog/table.py +1270 -677
- pixeltable/catalog/table_metadata.py +103 -0
- pixeltable/catalog/table_version.py +1270 -751
- pixeltable/catalog/table_version_handle.py +109 -0
- pixeltable/catalog/table_version_path.py +137 -42
- pixeltable/catalog/tbl_ops.py +53 -0
- pixeltable/catalog/update_status.py +191 -0
- pixeltable/catalog/view.py +251 -134
- pixeltable/config.py +215 -0
- pixeltable/env.py +736 -285
- pixeltable/exceptions.py +26 -2
- pixeltable/exec/__init__.py +7 -2
- pixeltable/exec/aggregation_node.py +39 -21
- pixeltable/exec/cache_prefetch_node.py +87 -109
- pixeltable/exec/cell_materialization_node.py +268 -0
- pixeltable/exec/cell_reconstruction_node.py +168 -0
- pixeltable/exec/component_iteration_node.py +25 -28
- pixeltable/exec/data_row_batch.py +11 -46
- pixeltable/exec/exec_context.py +26 -11
- pixeltable/exec/exec_node.py +35 -27
- pixeltable/exec/expr_eval/__init__.py +3 -0
- pixeltable/exec/expr_eval/evaluators.py +365 -0
- pixeltable/exec/expr_eval/expr_eval_node.py +413 -0
- pixeltable/exec/expr_eval/globals.py +200 -0
- pixeltable/exec/expr_eval/row_buffer.py +74 -0
- pixeltable/exec/expr_eval/schedulers.py +413 -0
- pixeltable/exec/globals.py +35 -0
- pixeltable/exec/in_memory_data_node.py +35 -27
- pixeltable/exec/object_store_save_node.py +293 -0
- pixeltable/exec/row_update_node.py +44 -29
- pixeltable/exec/sql_node.py +414 -115
- pixeltable/exprs/__init__.py +8 -5
- pixeltable/exprs/arithmetic_expr.py +79 -45
- pixeltable/exprs/array_slice.py +5 -5
- pixeltable/exprs/column_property_ref.py +40 -26
- pixeltable/exprs/column_ref.py +254 -61
- pixeltable/exprs/comparison.py +14 -9
- pixeltable/exprs/compound_predicate.py +9 -10
- pixeltable/exprs/data_row.py +213 -72
- pixeltable/exprs/expr.py +270 -104
- pixeltable/exprs/expr_dict.py +6 -5
- pixeltable/exprs/expr_set.py +20 -11
- pixeltable/exprs/function_call.py +383 -284
- pixeltable/exprs/globals.py +18 -5
- pixeltable/exprs/in_predicate.py +7 -7
- pixeltable/exprs/inline_expr.py +37 -37
- pixeltable/exprs/is_null.py +8 -4
- pixeltable/exprs/json_mapper.py +120 -54
- pixeltable/exprs/json_path.py +90 -60
- pixeltable/exprs/literal.py +61 -16
- pixeltable/exprs/method_ref.py +7 -6
- pixeltable/exprs/object_ref.py +19 -8
- pixeltable/exprs/row_builder.py +238 -75
- pixeltable/exprs/rowid_ref.py +53 -15
- pixeltable/exprs/similarity_expr.py +65 -50
- pixeltable/exprs/sql_element_cache.py +5 -5
- pixeltable/exprs/string_op.py +107 -0
- pixeltable/exprs/type_cast.py +25 -13
- pixeltable/exprs/variable.py +2 -2
- pixeltable/func/__init__.py +9 -5
- pixeltable/func/aggregate_function.py +197 -92
- pixeltable/func/callable_function.py +119 -35
- pixeltable/func/expr_template_function.py +101 -48
- pixeltable/func/function.py +375 -62
- pixeltable/func/function_registry.py +20 -19
- pixeltable/func/globals.py +6 -5
- pixeltable/func/mcp.py +74 -0
- pixeltable/func/query_template_function.py +151 -35
- pixeltable/func/signature.py +178 -49
- pixeltable/func/tools.py +164 -0
- pixeltable/func/udf.py +176 -53
- pixeltable/functions/__init__.py +44 -4
- pixeltable/functions/anthropic.py +226 -47
- pixeltable/functions/audio.py +148 -11
- pixeltable/functions/bedrock.py +137 -0
- pixeltable/functions/date.py +188 -0
- pixeltable/functions/deepseek.py +113 -0
- pixeltable/functions/document.py +81 -0
- pixeltable/functions/fal.py +76 -0
- pixeltable/functions/fireworks.py +72 -20
- pixeltable/functions/gemini.py +249 -0
- pixeltable/functions/globals.py +208 -53
- pixeltable/functions/groq.py +108 -0
- pixeltable/functions/huggingface.py +1088 -95
- pixeltable/functions/image.py +155 -84
- pixeltable/functions/json.py +8 -11
- pixeltable/functions/llama_cpp.py +31 -19
- pixeltable/functions/math.py +169 -0
- pixeltable/functions/mistralai.py +50 -75
- pixeltable/functions/net.py +70 -0
- pixeltable/functions/ollama.py +29 -36
- pixeltable/functions/openai.py +548 -160
- pixeltable/functions/openrouter.py +143 -0
- pixeltable/functions/replicate.py +15 -14
- pixeltable/functions/reve.py +250 -0
- pixeltable/functions/string.py +310 -85
- pixeltable/functions/timestamp.py +37 -19
- pixeltable/functions/together.py +77 -120
- pixeltable/functions/twelvelabs.py +188 -0
- pixeltable/functions/util.py +7 -2
- pixeltable/functions/uuid.py +30 -0
- pixeltable/functions/video.py +1528 -117
- pixeltable/functions/vision.py +26 -26
- pixeltable/functions/voyageai.py +289 -0
- pixeltable/functions/whisper.py +19 -10
- pixeltable/functions/whisperx.py +179 -0
- pixeltable/functions/yolox.py +112 -0
- pixeltable/globals.py +716 -236
- pixeltable/index/__init__.py +3 -1
- pixeltable/index/base.py +17 -21
- pixeltable/index/btree.py +32 -22
- pixeltable/index/embedding_index.py +155 -92
- pixeltable/io/__init__.py +12 -7
- pixeltable/io/datarows.py +140 -0
- pixeltable/io/external_store.py +83 -125
- pixeltable/io/fiftyone.py +24 -33
- pixeltable/io/globals.py +47 -182
- pixeltable/io/hf_datasets.py +96 -127
- pixeltable/io/label_studio.py +171 -156
- pixeltable/io/lancedb.py +3 -0
- pixeltable/io/pandas.py +136 -115
- pixeltable/io/parquet.py +40 -153
- pixeltable/io/table_data_conduit.py +702 -0
- pixeltable/io/utils.py +100 -0
- pixeltable/iterators/__init__.py +8 -4
- pixeltable/iterators/audio.py +207 -0
- pixeltable/iterators/base.py +9 -3
- pixeltable/iterators/document.py +144 -87
- pixeltable/iterators/image.py +17 -38
- pixeltable/iterators/string.py +15 -12
- pixeltable/iterators/video.py +523 -127
- pixeltable/metadata/__init__.py +33 -8
- pixeltable/metadata/converters/convert_10.py +2 -3
- pixeltable/metadata/converters/convert_13.py +2 -2
- pixeltable/metadata/converters/convert_15.py +15 -11
- pixeltable/metadata/converters/convert_16.py +4 -5
- pixeltable/metadata/converters/convert_17.py +4 -5
- pixeltable/metadata/converters/convert_18.py +4 -6
- pixeltable/metadata/converters/convert_19.py +6 -9
- pixeltable/metadata/converters/convert_20.py +3 -6
- pixeltable/metadata/converters/convert_21.py +6 -8
- pixeltable/metadata/converters/convert_22.py +3 -2
- pixeltable/metadata/converters/convert_23.py +33 -0
- pixeltable/metadata/converters/convert_24.py +55 -0
- pixeltable/metadata/converters/convert_25.py +19 -0
- pixeltable/metadata/converters/convert_26.py +23 -0
- pixeltable/metadata/converters/convert_27.py +29 -0
- pixeltable/metadata/converters/convert_28.py +13 -0
- pixeltable/metadata/converters/convert_29.py +110 -0
- pixeltable/metadata/converters/convert_30.py +63 -0
- pixeltable/metadata/converters/convert_31.py +11 -0
- pixeltable/metadata/converters/convert_32.py +15 -0
- pixeltable/metadata/converters/convert_33.py +17 -0
- pixeltable/metadata/converters/convert_34.py +21 -0
- pixeltable/metadata/converters/convert_35.py +9 -0
- pixeltable/metadata/converters/convert_36.py +38 -0
- pixeltable/metadata/converters/convert_37.py +15 -0
- pixeltable/metadata/converters/convert_38.py +39 -0
- pixeltable/metadata/converters/convert_39.py +124 -0
- pixeltable/metadata/converters/convert_40.py +73 -0
- pixeltable/metadata/converters/convert_41.py +12 -0
- pixeltable/metadata/converters/convert_42.py +9 -0
- pixeltable/metadata/converters/convert_43.py +44 -0
- pixeltable/metadata/converters/util.py +44 -18
- pixeltable/metadata/notes.py +21 -0
- pixeltable/metadata/schema.py +185 -42
- pixeltable/metadata/utils.py +74 -0
- pixeltable/mypy/__init__.py +3 -0
- pixeltable/mypy/mypy_plugin.py +123 -0
- pixeltable/plan.py +616 -225
- pixeltable/share/__init__.py +3 -0
- pixeltable/share/packager.py +797 -0
- pixeltable/share/protocol/__init__.py +33 -0
- pixeltable/share/protocol/common.py +165 -0
- pixeltable/share/protocol/operation_types.py +33 -0
- pixeltable/share/protocol/replica.py +119 -0
- pixeltable/share/publish.py +349 -0
- pixeltable/store.py +398 -232
- pixeltable/type_system.py +730 -267
- pixeltable/utils/__init__.py +40 -0
- pixeltable/utils/arrow.py +201 -29
- pixeltable/utils/av.py +298 -0
- pixeltable/utils/azure_store.py +346 -0
- pixeltable/utils/coco.py +26 -27
- pixeltable/utils/code.py +4 -4
- pixeltable/utils/console_output.py +46 -0
- pixeltable/utils/coroutine.py +24 -0
- pixeltable/utils/dbms.py +92 -0
- pixeltable/utils/description_helper.py +11 -12
- pixeltable/utils/documents.py +60 -61
- pixeltable/utils/exception_handler.py +36 -0
- pixeltable/utils/filecache.py +38 -22
- pixeltable/utils/formatter.py +88 -51
- pixeltable/utils/gcs_store.py +295 -0
- pixeltable/utils/http.py +133 -0
- pixeltable/utils/http_server.py +14 -13
- pixeltable/utils/iceberg.py +13 -0
- pixeltable/utils/image.py +17 -0
- pixeltable/utils/lancedb.py +90 -0
- pixeltable/utils/local_store.py +322 -0
- pixeltable/utils/misc.py +5 -0
- pixeltable/utils/object_stores.py +573 -0
- pixeltable/utils/pydantic.py +60 -0
- pixeltable/utils/pytorch.py +20 -20
- pixeltable/utils/s3_store.py +527 -0
- pixeltable/utils/sql.py +32 -5
- pixeltable/utils/system.py +30 -0
- pixeltable/utils/transactional_directory.py +4 -3
- pixeltable-0.5.7.dist-info/METADATA +579 -0
- pixeltable-0.5.7.dist-info/RECORD +227 -0
- {pixeltable-0.2.26.dist-info → pixeltable-0.5.7.dist-info}/WHEEL +1 -1
- pixeltable-0.5.7.dist-info/entry_points.txt +2 -0
- pixeltable/__version__.py +0 -3
- pixeltable/catalog/named_function.py +0 -36
- pixeltable/catalog/path_dict.py +0 -141
- pixeltable/dataframe.py +0 -894
- pixeltable/exec/expr_eval_node.py +0 -232
- pixeltable/ext/__init__.py +0 -14
- pixeltable/ext/functions/__init__.py +0 -8
- pixeltable/ext/functions/whisperx.py +0 -77
- pixeltable/ext/functions/yolox.py +0 -157
- pixeltable/tool/create_test_db_dump.py +0 -311
- pixeltable/tool/create_test_video.py +0 -81
- pixeltable/tool/doc_plugins/griffe.py +0 -50
- pixeltable/tool/doc_plugins/mkdocstrings.py +0 -6
- pixeltable/tool/doc_plugins/templates/material/udf.html.jinja +0 -135
- pixeltable/tool/embed_udf.py +0 -9
- pixeltable/tool/mypy_plugin.py +0 -55
- pixeltable/utils/media_store.py +0 -76
- pixeltable/utils/s3.py +0 -16
- pixeltable-0.2.26.dist-info/METADATA +0 -400
- pixeltable-0.2.26.dist-info/RECORD +0 -156
- pixeltable-0.2.26.dist-info/entry_points.txt +0 -3
- {pixeltable-0.2.26.dist-info → pixeltable-0.5.7.dist-info/licenses}/LICENSE +0 -0
pixeltable/catalog/catalog.py
CHANGED
|
@@ -1,160 +1,2435 @@
|
|
|
1
1
|
from __future__ import annotations
|
|
2
2
|
|
|
3
3
|
import dataclasses
|
|
4
|
+
import functools
|
|
4
5
|
import logging
|
|
5
|
-
|
|
6
|
+
import random
|
|
7
|
+
import time
|
|
8
|
+
from collections import defaultdict
|
|
9
|
+
from contextlib import contextmanager
|
|
10
|
+
from enum import Enum
|
|
11
|
+
from typing import TYPE_CHECKING, Any, Callable, Iterator, TypeVar
|
|
6
12
|
from uuid import UUID
|
|
7
13
|
|
|
14
|
+
import psycopg
|
|
8
15
|
import sqlalchemy as sql
|
|
9
|
-
import sqlalchemy.
|
|
16
|
+
import sqlalchemy.exc as sql_exc
|
|
10
17
|
|
|
11
|
-
from
|
|
12
|
-
from .
|
|
18
|
+
from pixeltable import exceptions as excs
|
|
19
|
+
from pixeltable.env import Env
|
|
20
|
+
from pixeltable.iterators import ComponentIterator
|
|
21
|
+
from pixeltable.metadata import schema
|
|
22
|
+
from pixeltable.utils.exception_handler import run_cleanup
|
|
23
|
+
|
|
24
|
+
from .column import Column
|
|
25
|
+
from .dir import Dir
|
|
26
|
+
from .globals import IfExistsParam, IfNotExistsParam, MediaValidation, QColumnId
|
|
27
|
+
from .insertable_table import InsertableTable
|
|
28
|
+
from .path import Path
|
|
29
|
+
from .schema_object import SchemaObject
|
|
13
30
|
from .table import Table
|
|
14
|
-
from .
|
|
31
|
+
from .table_version import TableVersion, TableVersionKey, TableVersionMd
|
|
32
|
+
from .table_version_handle import TableVersionHandle
|
|
33
|
+
from .table_version_path import TableVersionPath
|
|
34
|
+
from .tbl_ops import TableOp
|
|
35
|
+
from .update_status import UpdateStatus
|
|
36
|
+
from .view import View
|
|
37
|
+
|
|
38
|
+
if TYPE_CHECKING:
|
|
39
|
+
from pixeltable.plan import SampleClause
|
|
40
|
+
|
|
41
|
+
from .. import exprs
|
|
15
42
|
|
|
16
|
-
import pixeltable.env as env
|
|
17
|
-
import pixeltable.metadata.schema as schema
|
|
18
43
|
|
|
19
44
|
_logger = logging.getLogger('pixeltable')
|
|
20
45
|
|
|
46
|
+
|
|
47
|
+
def _unpack_row(row: sql.engine.Row | None, entities: list[type[sql.orm.decl_api.DeclarativeBase]]) -> list[Any] | None:
|
|
48
|
+
"""Convert a Row result into a list of entity instances.
|
|
49
|
+
|
|
50
|
+
Assumes that the query contains a select() of exactly those entities.
|
|
51
|
+
"""
|
|
52
|
+
if row is None:
|
|
53
|
+
return None
|
|
54
|
+
|
|
55
|
+
result: list[sql.orm.decl_api.DeclarativeBase] = []
|
|
56
|
+
column_offset = 0
|
|
57
|
+
|
|
58
|
+
for entity in entities:
|
|
59
|
+
num_cols = len(entity.__table__.columns)
|
|
60
|
+
data = {name: row[column_offset + i] for i, name in enumerate(entity.__table__.columns.keys())}
|
|
61
|
+
inst = entity(**data)
|
|
62
|
+
result.append(inst)
|
|
63
|
+
column_offset += num_cols
|
|
64
|
+
|
|
65
|
+
return result
|
|
66
|
+
|
|
67
|
+
|
|
68
|
+
def md_dict_factory(data: list[tuple[str, Any]]) -> dict:
|
|
69
|
+
"""Use this to serialize TableMd instances with asdict()"""
|
|
70
|
+
# serialize enums to their values
|
|
71
|
+
return {k: v.value if isinstance(v, Enum) else v for k, v in data}
|
|
72
|
+
|
|
73
|
+
|
|
74
|
+
# -1: unlimited
|
|
75
|
+
# for now, we don't limit the number of retries, because we haven't seen situations where the actual number of retries
|
|
76
|
+
# grows uncontrollably
|
|
77
|
+
_MAX_RETRIES = -1
|
|
78
|
+
|
|
79
|
+
T = TypeVar('T')
|
|
80
|
+
|
|
81
|
+
|
|
82
|
+
def retry_loop(
|
|
83
|
+
*, tbl: TableVersionPath | None = None, for_write: bool, lock_mutable_tree: bool = False
|
|
84
|
+
) -> Callable[[Callable[..., T]], Callable[..., T]]:
|
|
85
|
+
def decorator(op: Callable[..., T]) -> Callable[..., T]:
|
|
86
|
+
@functools.wraps(op)
|
|
87
|
+
def loop(*args: Any, **kwargs: Any) -> T:
|
|
88
|
+
cat = Catalog.get()
|
|
89
|
+
# retry_loop() is reentrant
|
|
90
|
+
if cat._in_retry_loop:
|
|
91
|
+
return op(*args, **kwargs)
|
|
92
|
+
|
|
93
|
+
num_retries = 0
|
|
94
|
+
while True:
|
|
95
|
+
cat._in_retry_loop = True
|
|
96
|
+
try:
|
|
97
|
+
# in order for retry to work, we need to make sure that there aren't any prior db updates
|
|
98
|
+
# that are part of an ongoing transaction
|
|
99
|
+
assert not Env.get().in_xact
|
|
100
|
+
with Catalog.get().begin_xact(
|
|
101
|
+
tbl=tbl,
|
|
102
|
+
for_write=for_write,
|
|
103
|
+
convert_db_excs=False,
|
|
104
|
+
lock_mutable_tree=lock_mutable_tree,
|
|
105
|
+
finalize_pending_ops=True,
|
|
106
|
+
):
|
|
107
|
+
return op(*args, **kwargs)
|
|
108
|
+
except PendingTableOpsError as e:
|
|
109
|
+
Env.get().console_logger.debug(f'retry_loop(): finalizing pending ops for {e.tbl_id}')
|
|
110
|
+
Catalog.get()._finalize_pending_ops(e.tbl_id)
|
|
111
|
+
except (sql_exc.DBAPIError, sql_exc.OperationalError) as e:
|
|
112
|
+
# TODO: what other exceptions should we be looking for?
|
|
113
|
+
if isinstance(
|
|
114
|
+
# TODO: Investigate whether DeadlockDetected points to a bug in our locking protocol,
|
|
115
|
+
# which is supposed to be deadlock-free.
|
|
116
|
+
e.orig,
|
|
117
|
+
(
|
|
118
|
+
psycopg.errors.SerializationFailure,
|
|
119
|
+
psycopg.errors.LockNotAvailable,
|
|
120
|
+
psycopg.errors.DeadlockDetected,
|
|
121
|
+
),
|
|
122
|
+
):
|
|
123
|
+
if num_retries < _MAX_RETRIES or _MAX_RETRIES == -1:
|
|
124
|
+
num_retries += 1
|
|
125
|
+
_logger.debug(f'Retrying ({num_retries}) after {type(e.orig)}')
|
|
126
|
+
time.sleep(random.uniform(0.1, 0.5))
|
|
127
|
+
else:
|
|
128
|
+
raise excs.Error(f'Serialization retry limit ({_MAX_RETRIES}) exceeded') from e
|
|
129
|
+
else:
|
|
130
|
+
raise
|
|
131
|
+
except Exception as e:
|
|
132
|
+
# for informational/debugging purposes
|
|
133
|
+
_logger.debug(f'retry_loop(): passing along {e}')
|
|
134
|
+
raise
|
|
135
|
+
finally:
|
|
136
|
+
cat._in_retry_loop = False
|
|
137
|
+
|
|
138
|
+
return loop
|
|
139
|
+
|
|
140
|
+
return decorator
|
|
141
|
+
|
|
142
|
+
|
|
143
|
+
class PendingTableOpsError(Exception):
|
|
144
|
+
tbl_id: UUID
|
|
145
|
+
|
|
146
|
+
def __init__(self, tbl_id: UUID) -> None:
|
|
147
|
+
self.tbl_id = tbl_id
|
|
148
|
+
|
|
149
|
+
|
|
21
150
|
class Catalog:
|
|
22
|
-
"""
|
|
23
|
-
|
|
151
|
+
"""The functional interface to getting access to catalog objects
|
|
152
|
+
|
|
153
|
+
All interface functions must be called in the context of a transaction, started with Catalog.begin_xact() or
|
|
154
|
+
via retry_loop().
|
|
155
|
+
|
|
156
|
+
When calling functions that involve Table or TableVersion instances, the catalog needs to get a chance to finalize
|
|
157
|
+
pending ops against those tables. To that end,
|
|
158
|
+
- use begin_xact(tbl) or begin_xact(tbl_id) if only accessing a single table
|
|
159
|
+
- use retry_loop() when accessing multiple tables (eg, pxt.ls())
|
|
160
|
+
|
|
161
|
+
Caching and invalidation of metadata:
|
|
162
|
+
- Catalog caches TableVersion instances in order to avoid excessive metadata loading
|
|
163
|
+
- for any specific table version (ie, combination of id and effective version) there can be only a single
|
|
164
|
+
Tableversion instance in circulation; the reason is that each TV instance has its own store_tbl.sa_tbl, and
|
|
165
|
+
mixing multiple instances of sqlalchemy Table objects in the same query (for the same underlying table) leads to
|
|
166
|
+
duplicate references to that table in the From clause (ie, incorrect Cartesian products)
|
|
167
|
+
- in order to allow multiple concurrent Python processes to perform updates (data and/or schema) against a shared
|
|
168
|
+
Pixeltable instance, Catalog needs to reload metadata from the store when there are changes
|
|
169
|
+
- concurrent changes are detected by comparing TableVersion.version/view_sn with the stored current version
|
|
170
|
+
(TableMd.current_version/view_sn)
|
|
171
|
+
- cached live TableVersion instances (those with effective_version == None) are validated against the stored
|
|
172
|
+
metadata on transaction boundaries; this is recorded in TableVersion.is_validated
|
|
173
|
+
- metadata validation is only needed for live TableVersion instances (snapshot instances are immutable)
|
|
174
|
+
"""
|
|
175
|
+
|
|
176
|
+
_instance: Catalog | None = None
|
|
177
|
+
|
|
178
|
+
# cached TableVersion instances; key: [id, version, anchor_tbl_id]
|
|
179
|
+
# - mutable version of a table: version == None (even though TableVersion.version is set correctly)
|
|
180
|
+
# - snapshot versions: records the version of the snapshot
|
|
181
|
+
# - anchored versions: records the tbl_id of the anchor table (used when the table is a replica)
|
|
182
|
+
_tbl_versions: dict[TableVersionKey, TableVersion]
|
|
183
|
+
_tbls: dict[tuple[UUID, int | None], Table]
|
|
184
|
+
_in_write_xact: bool # True if we're in a write transaction
|
|
185
|
+
_x_locked_tbl_ids: set[UUID] # non-empty for write transactions
|
|
186
|
+
_modified_tvs: set[TableVersionHandle] # TableVersion instances modified in the current transaction
|
|
187
|
+
_roll_forward_ids: set[UUID] # ids of Tables that have pending TableOps
|
|
188
|
+
_undo_actions: list[Callable[[], None]]
|
|
189
|
+
_in_retry_loop: bool
|
|
190
|
+
|
|
191
|
+
# cached column dependencies
|
|
192
|
+
# - key: table id, value: mapping from column id to its dependencies
|
|
193
|
+
# - only maintained for dependencies between non-snapshot table versions
|
|
194
|
+
# - can contain stale entries (stemming from invalidated TV instances)
|
|
195
|
+
_column_dependencies: dict[UUID, dict[QColumnId, set[QColumnId]]]
|
|
196
|
+
|
|
197
|
+
# column dependents are recomputed at the beginning of every write transaction and only reflect the locked tree
|
|
198
|
+
_column_dependents: dict[QColumnId, set[QColumnId]] | None
|
|
24
199
|
|
|
25
200
|
@classmethod
|
|
26
201
|
def get(cls) -> Catalog:
|
|
27
202
|
if cls._instance is None:
|
|
28
203
|
cls._instance = cls()
|
|
29
|
-
with orm.Session(env.Env.get().engine, future=True) as session:
|
|
30
|
-
cls._instance._load_table_versions(session)
|
|
31
|
-
#cls._instance._load_functions(session)
|
|
32
204
|
return cls._instance
|
|
33
205
|
|
|
34
206
|
@classmethod
|
|
35
207
|
def clear(cls) -> None:
|
|
36
208
|
"""Remove the instance. Used for testing."""
|
|
209
|
+
if cls._instance is not None:
|
|
210
|
+
# invalidate all existing instances to force reloading of metadata
|
|
211
|
+
for tbl_version in cls._instance._tbl_versions.values():
|
|
212
|
+
tbl_version.is_validated = False
|
|
37
213
|
cls._instance = None
|
|
38
214
|
|
|
39
215
|
def __init__(self) -> None:
|
|
40
|
-
|
|
41
|
-
|
|
42
|
-
|
|
43
|
-
self.
|
|
216
|
+
self._tbl_versions = {}
|
|
217
|
+
self._tbls = {} # don't use a defaultdict here, it doesn't cooperate with the debugger
|
|
218
|
+
self._in_write_xact = False
|
|
219
|
+
self._x_locked_tbl_ids = set()
|
|
220
|
+
self._modified_tvs = set()
|
|
221
|
+
self._roll_forward_ids = set()
|
|
222
|
+
self._undo_actions = []
|
|
223
|
+
self._in_retry_loop = False
|
|
224
|
+
self._column_dependencies = {}
|
|
225
|
+
self._column_dependents = None
|
|
226
|
+
self._init_store()
|
|
44
227
|
|
|
45
|
-
|
|
46
|
-
self
|
|
228
|
+
def _active_tbl_clause(
|
|
229
|
+
self, *, tbl_id: UUID | None = None, dir_id: UUID | None = None, tbl_name: str | None = None
|
|
230
|
+
) -> sql.ColumnElement[bool]:
|
|
231
|
+
"""Create a clause that filters out dropped tables in addition to the specified conditions."""
|
|
232
|
+
# avoid tables that are in the process of getting dropped
|
|
233
|
+
clause = sql.func.coalesce(schema.Table.md['pending_stmt'].astext, '-1') != str(
|
|
234
|
+
schema.TableStatement.DROP_TABLE.value
|
|
235
|
+
)
|
|
236
|
+
if tbl_id is not None:
|
|
237
|
+
clause = sql.and_(schema.Table.id == tbl_id, clause)
|
|
238
|
+
if dir_id is not None:
|
|
239
|
+
clause = sql.and_(schema.Table.dir_id == dir_id, clause)
|
|
240
|
+
if tbl_name is not None:
|
|
241
|
+
clause = sql.and_(schema.Table.md['name'].astext == tbl_name, clause)
|
|
242
|
+
return clause
|
|
47
243
|
|
|
48
|
-
|
|
49
|
-
|
|
244
|
+
def _dropped_tbl_error_msg(self, tbl_id: UUID) -> str:
|
|
245
|
+
return f'Table was dropped (no record found for {tbl_id})'
|
|
246
|
+
|
|
247
|
+
def validate(self) -> None:
|
|
248
|
+
"""Validate structural consistency of cached metadata"""
|
|
249
|
+
for (tbl_id, effective_version, anchor_tbl_id), tbl_version in self._tbl_versions.items():
|
|
250
|
+
assert tbl_id == tbl_version.id, f'{tbl_id} != {tbl_version.id}'
|
|
251
|
+
assert effective_version is None or anchor_tbl_id is None
|
|
252
|
+
assert tbl_version.effective_version == tbl_version.version or tbl_version.effective_version is None, (
|
|
253
|
+
f'{tbl_version.effective_version} != {tbl_version.version} for id {tbl_id}'
|
|
254
|
+
)
|
|
255
|
+
assert effective_version == tbl_version.effective_version, (
|
|
256
|
+
f'{effective_version} != {tbl_version.effective_version} for id {tbl_id}'
|
|
257
|
+
)
|
|
258
|
+
assert len(tbl_version.mutable_views) == 0 or tbl_version.is_mutable, (
|
|
259
|
+
f'snapshot_id={tbl_version.id} mutable_views={tbl_version.mutable_views}'
|
|
260
|
+
)
|
|
261
|
+
|
|
262
|
+
assert anchor_tbl_id is None or tbl_version.is_replica
|
|
263
|
+
|
|
264
|
+
if tbl_version.is_view and tbl_version.is_mutable and tbl_version.is_validated:
|
|
265
|
+
# make sure this mutable view is recorded in a mutable base
|
|
266
|
+
base = tbl_version.base
|
|
267
|
+
assert base is not None
|
|
268
|
+
if base.effective_version is None:
|
|
269
|
+
key = TableVersionKey(base.id, None, None)
|
|
270
|
+
assert key in self._tbl_versions
|
|
271
|
+
base_tv = self._tbl_versions[key]
|
|
272
|
+
if not base_tv.is_validated:
|
|
273
|
+
continue
|
|
274
|
+
mutable_view_ids = ', '.join(str(tv.id) for tv in self._tbl_versions[key].mutable_views)
|
|
275
|
+
mutable_view_names = ', '.join(
|
|
276
|
+
tv._tbl_version.name
|
|
277
|
+
for tv in self._tbl_versions[key].mutable_views
|
|
278
|
+
if tv._tbl_version is not None
|
|
279
|
+
)
|
|
280
|
+
assert tbl_version.handle in self._tbl_versions[key].mutable_views, (
|
|
281
|
+
f'{tbl_version.name} ({tbl_version.id}) missing in {mutable_view_ids} ({mutable_view_names})'
|
|
282
|
+
)
|
|
283
|
+
|
|
284
|
+
if len(tbl_version.mutable_views) > 0:
|
|
285
|
+
# make sure we also loaded mutable view metadata, which is needed to detect column dependencies
|
|
286
|
+
for v in tbl_version.mutable_views:
|
|
287
|
+
assert v.effective_version is None, f'{v.id}:{v.effective_version}'
|
|
288
|
+
|
|
289
|
+
def mark_modified_tvs(self, *handle: TableVersionHandle) -> None:
|
|
290
|
+
"""Record that the given TableVersion instances were modified in the current transaction"""
|
|
291
|
+
assert Env.get().in_xact
|
|
292
|
+
self._modified_tvs.update(handle)
|
|
293
|
+
|
|
294
|
+
@contextmanager
|
|
295
|
+
def begin_xact(
|
|
296
|
+
self,
|
|
297
|
+
*,
|
|
298
|
+
tbl: TableVersionPath | None = None,
|
|
299
|
+
tbl_id: UUID | None = None,
|
|
300
|
+
for_write: bool = False,
|
|
301
|
+
lock_mutable_tree: bool = False,
|
|
302
|
+
convert_db_excs: bool = True,
|
|
303
|
+
finalize_pending_ops: bool = True,
|
|
304
|
+
) -> Iterator[sql.Connection]:
|
|
305
|
+
"""
|
|
306
|
+
Return a context manager that yields a connection to the database. Idempotent.
|
|
307
|
+
|
|
308
|
+
It is mandatory to call this method, not Env.begin_xact(), if the transaction accesses any table data
|
|
309
|
+
or metadata.
|
|
310
|
+
|
|
311
|
+
If tbl != None, follows this locking protocol:
|
|
312
|
+
- validates/reloads the TableVersion instances of tbl's ancestors (in the hope that this reduces potential
|
|
313
|
+
SerializationErrors later on)
|
|
314
|
+
- if for_write == True, x-locks Table record (by updating Table.lock_dummy; see _acquire_tbl_lock())
|
|
315
|
+
- if for_write == False, validates TableVersion instance
|
|
316
|
+
- if lock_mutable_tree == True, also x-locks all mutable views of the table
|
|
317
|
+
- this needs to be done in a retry loop, because Postgres can decide to abort the transaction
|
|
318
|
+
(SerializationFailure, LockNotAvailable)
|
|
319
|
+
- for that reason, we do all lock acquisition prior to doing any real work (eg, compute column values),
|
|
320
|
+
to minimize the probability of losing that work due to a forced abort
|
|
321
|
+
|
|
322
|
+
If convert_db_excs == True, converts DBAPIErrors into excs.Errors.
|
|
323
|
+
"""
|
|
324
|
+
assert tbl is None or tbl_id is None # at most one can be specified
|
|
325
|
+
if Env.get().in_xact:
|
|
326
|
+
# make sure that we requested the required table lock at the beginning of the transaction
|
|
327
|
+
if for_write:
|
|
328
|
+
if tbl is not None:
|
|
329
|
+
assert tbl.tbl_id in self._x_locked_tbl_ids, f'{tbl.tbl_id} not in {self._x_locked_tbl_ids}'
|
|
330
|
+
elif tbl_id is not None:
|
|
331
|
+
assert tbl_id in self._x_locked_tbl_ids, f'{tbl_id} not in {self._x_locked_tbl_ids}'
|
|
332
|
+
yield Env.get().conn
|
|
333
|
+
return
|
|
334
|
+
|
|
335
|
+
# tv_msg = '\n'.join(
|
|
336
|
+
# [
|
|
337
|
+
# f'{tv.id}:{tv.effective_version} : tv={id(tv):x} sa_tbl={id(tv.store_tbl.sa_tbl):x}'
|
|
338
|
+
# for tv in self._tbl_versions.values()
|
|
339
|
+
# ]
|
|
340
|
+
# )
|
|
341
|
+
# _logger.debug(f'begin_xact(): {tv_msg}')
|
|
342
|
+
num_retries = 0
|
|
343
|
+
pending_ops_tbl_id: UUID | None = None
|
|
344
|
+
has_exc = False # True if we exited the 'with ...begin_xact()' block with an exception
|
|
345
|
+
while True:
|
|
346
|
+
if pending_ops_tbl_id is not None:
|
|
347
|
+
Env.get().console_logger.debug(f'begin_xact(): finalizing pending ops for {pending_ops_tbl_id}')
|
|
348
|
+
self._finalize_pending_ops(pending_ops_tbl_id)
|
|
349
|
+
pending_ops_tbl_id = None
|
|
350
|
+
|
|
351
|
+
try:
|
|
352
|
+
self._in_write_xact = for_write
|
|
353
|
+
self._x_locked_tbl_ids = set()
|
|
354
|
+
self._modified_tvs = set()
|
|
355
|
+
self._column_dependents = None
|
|
356
|
+
has_exc = False
|
|
357
|
+
|
|
358
|
+
assert not self._undo_actions
|
|
359
|
+
with Env.get().begin_xact(for_write=for_write) as conn:
|
|
360
|
+
if tbl is not None or tbl_id is not None:
|
|
361
|
+
try:
|
|
362
|
+
target: TableVersionHandle | None = None
|
|
363
|
+
if tbl is not None:
|
|
364
|
+
if self._acquire_path_locks(
|
|
365
|
+
tbl=tbl,
|
|
366
|
+
for_write=for_write,
|
|
367
|
+
lock_mutable_tree=lock_mutable_tree,
|
|
368
|
+
check_pending_ops=finalize_pending_ops,
|
|
369
|
+
):
|
|
370
|
+
target = tbl.tbl_version
|
|
371
|
+
else:
|
|
372
|
+
target = self._acquire_tbl_lock(
|
|
373
|
+
tbl_id=tbl_id,
|
|
374
|
+
for_write=for_write,
|
|
375
|
+
lock_mutable_tree=lock_mutable_tree,
|
|
376
|
+
raise_if_not_exists=True,
|
|
377
|
+
check_pending_ops=finalize_pending_ops,
|
|
378
|
+
)
|
|
379
|
+
|
|
380
|
+
if target is None:
|
|
381
|
+
# didn't get the write lock
|
|
382
|
+
for_write = False
|
|
383
|
+
elif for_write:
|
|
384
|
+
# we know at this point that target is mutable because we got the X-lock
|
|
385
|
+
if lock_mutable_tree and not target.is_snapshot:
|
|
386
|
+
self._x_locked_tbl_ids = self._get_mutable_tree(target.id)
|
|
387
|
+
self._compute_column_dependents(self._x_locked_tbl_ids)
|
|
388
|
+
else:
|
|
389
|
+
self._x_locked_tbl_ids = {target.id}
|
|
390
|
+
if _logger.isEnabledFor(logging.DEBUG):
|
|
391
|
+
# validate only when we don't see errors
|
|
392
|
+
self.validate()
|
|
393
|
+
|
|
394
|
+
except PendingTableOpsError as e:
|
|
395
|
+
has_exc = True
|
|
396
|
+
if finalize_pending_ops:
|
|
397
|
+
# we remember which table id to finalize
|
|
398
|
+
pending_ops_tbl_id = e.tbl_id
|
|
399
|
+
# raise to abort the transaction
|
|
400
|
+
raise
|
|
401
|
+
|
|
402
|
+
except (sql_exc.DBAPIError, sql_exc.OperationalError) as e:
|
|
403
|
+
has_exc = True
|
|
404
|
+
if isinstance(
|
|
405
|
+
e.orig, (psycopg.errors.SerializationFailure, psycopg.errors.LockNotAvailable)
|
|
406
|
+
) and (num_retries < _MAX_RETRIES or _MAX_RETRIES == -1):
|
|
407
|
+
num_retries += 1
|
|
408
|
+
_logger.debug(f'Retrying ({num_retries}) after {type(e.orig)}')
|
|
409
|
+
time.sleep(random.uniform(0.1, 0.5))
|
|
410
|
+
assert not self._undo_actions # We should not have any undo actions at this point
|
|
411
|
+
continue
|
|
412
|
+
else:
|
|
413
|
+
raise
|
|
414
|
+
|
|
415
|
+
assert not self._undo_actions
|
|
416
|
+
yield conn
|
|
417
|
+
return
|
|
418
|
+
|
|
419
|
+
except PendingTableOpsError:
|
|
420
|
+
has_exc = True
|
|
421
|
+
if pending_ops_tbl_id is not None:
|
|
422
|
+
# the next iteration of the loop will deal with pending ops for this table id
|
|
423
|
+
continue
|
|
424
|
+
else:
|
|
425
|
+
# we got this exception after getting the initial table locks and therefore need to abort
|
|
426
|
+
raise
|
|
427
|
+
|
|
428
|
+
except (sql_exc.DBAPIError, sql_exc.OperationalError, sql_exc.InternalError) as e:
|
|
429
|
+
has_exc = True
|
|
430
|
+
self.convert_sql_exc(e, tbl_id, tbl.tbl_version if tbl is not None else None, convert_db_excs)
|
|
431
|
+
raise # re-raise the error if it didn't convert to a pxt.Error
|
|
432
|
+
|
|
433
|
+
except (Exception, KeyboardInterrupt) as e:
|
|
434
|
+
has_exc = True
|
|
435
|
+
_logger.debug(f'Caught {e.__class__}')
|
|
436
|
+
raise
|
|
437
|
+
|
|
438
|
+
finally:
|
|
439
|
+
self._in_write_xact = False
|
|
440
|
+
self._x_locked_tbl_ids.clear()
|
|
441
|
+
self._column_dependents = None
|
|
442
|
+
|
|
443
|
+
# invalidate cached current TableVersion instances
|
|
444
|
+
for tv in self._tbl_versions.values():
|
|
445
|
+
if tv.effective_version is None:
|
|
446
|
+
_logger.debug(f'invalidating table version {tv} (0x{id(tv):x})')
|
|
447
|
+
tv.is_validated = False
|
|
448
|
+
|
|
449
|
+
if has_exc:
|
|
450
|
+
# Execute undo actions in reverse order (LIFO)
|
|
451
|
+
for hook in reversed(self._undo_actions):
|
|
452
|
+
run_cleanup(hook, raise_error=False)
|
|
453
|
+
# purge all modified TableVersion instances; we can't guarantee they are still consistent with the
|
|
454
|
+
# stored metadata
|
|
455
|
+
for handle in self._modified_tvs:
|
|
456
|
+
self._clear_tv_cache(handle.key)
|
|
457
|
+
# Clear potentially corrupted cached metadata
|
|
458
|
+
if tbl is not None:
|
|
459
|
+
tbl.clear_cached_md()
|
|
460
|
+
|
|
461
|
+
self._undo_actions.clear()
|
|
462
|
+
self._modified_tvs.clear()
|
|
463
|
+
|
|
464
|
+
def register_undo_action(self, func: Callable[[], None]) -> Callable[[], None]:
|
|
465
|
+
"""Registers a function to be called if the current transaction fails.
|
|
466
|
+
|
|
467
|
+
The function is called only if the current transaction fails due to an exception.
|
|
468
|
+
|
|
469
|
+
Rollback functions are called in reverse order of registration (LIFO).
|
|
470
|
+
|
|
471
|
+
The function should not raise exceptions; if it does, they are logged and ignored.
|
|
472
|
+
"""
|
|
473
|
+
assert self.in_write_xact
|
|
474
|
+
self._undo_actions.append(func)
|
|
475
|
+
return func
|
|
476
|
+
|
|
477
|
+
def convert_sql_exc(
|
|
478
|
+
self,
|
|
479
|
+
e: sql_exc.StatementError,
|
|
480
|
+
tbl_id: UUID | None = None,
|
|
481
|
+
tbl: TableVersionHandle | None = None,
|
|
482
|
+
convert_db_excs: bool = True,
|
|
483
|
+
) -> None:
|
|
484
|
+
# we got some db error during the actual operation (not just while trying to get locks on the metadata
|
|
485
|
+
# records); we convert these into pxt.Error exceptions if appropriate
|
|
486
|
+
|
|
487
|
+
# we always convert UndefinedTable exceptions (they can't be retried)
|
|
488
|
+
if isinstance(e.orig, psycopg.errors.UndefinedTable) and tbl is not None:
|
|
489
|
+
# the table got dropped in the middle of the operation
|
|
490
|
+
tbl_name = tbl.get().name
|
|
491
|
+
_logger.debug(f'Exception: undefined table {tbl_name!r}: Caught {type(e.orig)}: {e!r}')
|
|
492
|
+
raise excs.Error(f'Table was dropped: {tbl_name}') from None
|
|
493
|
+
elif (
|
|
494
|
+
# TODO: Investigate whether DeadlockDetected points to a bug in our locking protocol,
|
|
495
|
+
# which is supposed to be deadlock-free.
|
|
496
|
+
isinstance(
|
|
497
|
+
e.orig,
|
|
498
|
+
(
|
|
499
|
+
psycopg.errors.SerializationFailure, # serialization error despite getting x-locks
|
|
500
|
+
psycopg.errors.InFailedSqlTransaction, # can happen after tx fails for another reason
|
|
501
|
+
psycopg.errors.DuplicateColumn, # if a different process added a column concurrently
|
|
502
|
+
psycopg.errors.DeadlockDetected, # locking protocol contention
|
|
503
|
+
),
|
|
504
|
+
)
|
|
505
|
+
and convert_db_excs
|
|
506
|
+
):
|
|
507
|
+
msg: str
|
|
508
|
+
if tbl is not None:
|
|
509
|
+
msg = f'{tbl.get().name} ({tbl.id})'
|
|
510
|
+
elif tbl_id is not None:
|
|
511
|
+
msg = f'{tbl_id}'
|
|
512
|
+
else:
|
|
513
|
+
msg = ''
|
|
514
|
+
_logger.debug(f'Exception: {e.orig.__class__}: {msg} ({e})')
|
|
515
|
+
# Suppress the underlying SQL exception unless DEBUG is enabled
|
|
516
|
+
raise_from = e if _logger.isEnabledFor(logging.DEBUG) else None
|
|
517
|
+
raise excs.Error(
|
|
518
|
+
'That Pixeltable operation could not be completed because it conflicted with another '
|
|
519
|
+
'operation that was run on a different process.\n'
|
|
520
|
+
'Please re-run the operation.'
|
|
521
|
+
) from raise_from
|
|
522
|
+
|
|
523
|
+
@property
|
|
524
|
+
def in_write_xact(self) -> bool:
|
|
525
|
+
return self._in_write_xact
|
|
526
|
+
|
|
527
|
+
def _acquire_path_locks(
|
|
528
|
+
self,
|
|
529
|
+
*,
|
|
530
|
+
tbl: TableVersionPath,
|
|
531
|
+
for_write: bool = False,
|
|
532
|
+
lock_mutable_tree: bool = False,
|
|
533
|
+
check_pending_ops: bool = True,
|
|
534
|
+
) -> bool:
|
|
535
|
+
"""
|
|
536
|
+
Path locking protocol:
|
|
537
|
+
- refresh cached TableVersions of ancestors (we need those even during inserts, for computed columns that
|
|
538
|
+
reference the base tables)
|
|
539
|
+
- refresh cached TableVersion of tbl or get X-lock, depending on for_write
|
|
540
|
+
- if lock_mutable_tree, also X-lock all mutable views of tbl
|
|
541
|
+
|
|
542
|
+
Raises Error if tbl doesn't exist.
|
|
543
|
+
Return False if the lock couldn't be acquired (X-lock on a non-mutable table), True otherwise.
|
|
544
|
+
"""
|
|
545
|
+
path_handles = tbl.get_tbl_versions()
|
|
546
|
+
read_handles = path_handles[:0:-1] if for_write else path_handles[::-1]
|
|
547
|
+
for handle in read_handles:
|
|
548
|
+
# update cache
|
|
549
|
+
_ = self.get_tbl_version(handle.key, validate_initialized=True)
|
|
550
|
+
if not for_write:
|
|
551
|
+
return True # nothing left to lock
|
|
552
|
+
handle = self._acquire_tbl_lock(
|
|
553
|
+
tbl_id=tbl.tbl_id,
|
|
554
|
+
for_write=True,
|
|
555
|
+
lock_mutable_tree=lock_mutable_tree,
|
|
556
|
+
raise_if_not_exists=True,
|
|
557
|
+
check_pending_ops=check_pending_ops,
|
|
558
|
+
)
|
|
559
|
+
# update cache
|
|
560
|
+
_ = self.get_tbl_version(path_handles[0].key, validate_initialized=True)
|
|
561
|
+
return handle is not None
|
|
562
|
+
|
|
563
|
+
def _acquire_tbl_lock(
|
|
564
|
+
self,
|
|
565
|
+
*,
|
|
566
|
+
for_write: bool,
|
|
567
|
+
tbl_id: UUID | None = None,
|
|
568
|
+
dir_id: UUID | None = None,
|
|
569
|
+
tbl_name: str | None = None,
|
|
570
|
+
lock_mutable_tree: bool = False,
|
|
571
|
+
raise_if_not_exists: bool = True,
|
|
572
|
+
check_pending_ops: bool = True,
|
|
573
|
+
) -> TableVersionHandle | None:
|
|
574
|
+
"""
|
|
575
|
+
For writes: force acquisition of an X-lock on a Table record via a blind update.
|
|
576
|
+
|
|
577
|
+
Either tbl_id or dir_id/tbl_name need to be specified.
|
|
578
|
+
Returns True if the table was locked, False if it was a snapshot or not found.
|
|
579
|
+
If lock_mutable_tree, recursively locks all mutable views of the table.
|
|
580
|
+
|
|
581
|
+
Returns a handle to what was locked, None if the lock couldn't be acquired (eg, X-lock on a non-mutable table).
|
|
582
|
+
"""
|
|
583
|
+
assert (tbl_id is not None) != (dir_id is not None and tbl_name is not None)
|
|
584
|
+
assert (dir_id is None) == (tbl_name is None)
|
|
585
|
+
where_clause: sql.ColumnElement
|
|
586
|
+
if tbl_id is not None:
|
|
587
|
+
where_clause = schema.Table.id == tbl_id
|
|
588
|
+
else:
|
|
589
|
+
where_clause = sql.and_(schema.Table.dir_id == dir_id, schema.Table.md['name'].astext == tbl_name)
|
|
590
|
+
user = Env.get().user
|
|
591
|
+
if user is not None:
|
|
592
|
+
where_clause = sql.and_(where_clause, schema.Table.md['user'].astext == Env.get().user)
|
|
593
|
+
|
|
594
|
+
conn = Env.get().conn
|
|
595
|
+
q = sql.select(schema.Table).where(where_clause)
|
|
596
|
+
if for_write:
|
|
597
|
+
q = q.with_for_update(nowait=True)
|
|
598
|
+
row = conn.execute(q).one_or_none()
|
|
599
|
+
if row is None:
|
|
600
|
+
if raise_if_not_exists:
|
|
601
|
+
raise excs.Error(self._dropped_tbl_error_msg(tbl_id))
|
|
602
|
+
return None # nothing to lock
|
|
603
|
+
tbl_md = schema.md_from_dict(schema.TableMd, row.md)
|
|
604
|
+
if for_write and tbl_md.is_mutable:
|
|
605
|
+
conn.execute(sql.update(schema.Table).values(lock_dummy=1).where(where_clause))
|
|
606
|
+
|
|
607
|
+
if check_pending_ops:
|
|
608
|
+
# check for pending ops after getting table lock
|
|
609
|
+
pending_ops_q = sql.select(sql.func.count()).where(schema.PendingTableOp.tbl_id == row.id)
|
|
610
|
+
has_pending_ops = conn.execute(pending_ops_q).scalar() > 0
|
|
611
|
+
if has_pending_ops:
|
|
612
|
+
raise PendingTableOpsError(row.id)
|
|
613
|
+
|
|
614
|
+
# TODO: properly handle concurrency for replicas with live views (once they are supported)
|
|
615
|
+
if for_write and not tbl_md.is_mutable:
|
|
616
|
+
return None # nothing to lock
|
|
617
|
+
|
|
618
|
+
key = TableVersionKey(tbl_id, tbl_md.current_version if tbl_md.is_snapshot else None, None)
|
|
619
|
+
if tbl_md.is_mutable and lock_mutable_tree:
|
|
620
|
+
# also lock mutable views
|
|
621
|
+
tv = self.get_tbl_version(key, validate_initialized=True)
|
|
622
|
+
for view in tv.mutable_views:
|
|
623
|
+
self._acquire_tbl_lock(
|
|
624
|
+
for_write=for_write,
|
|
625
|
+
tbl_id=view.id,
|
|
626
|
+
lock_mutable_tree=lock_mutable_tree,
|
|
627
|
+
raise_if_not_exists=raise_if_not_exists,
|
|
628
|
+
check_pending_ops=check_pending_ops,
|
|
629
|
+
)
|
|
630
|
+
return TableVersionHandle(key)
|
|
631
|
+
|
|
632
|
+
def _roll_forward(self) -> None:
|
|
633
|
+
"""Finalize pending ops for all tables in self._roll_forward_ids."""
|
|
634
|
+
for tbl_id in self._roll_forward_ids:
|
|
635
|
+
self._finalize_pending_ops(tbl_id)
|
|
636
|
+
# TODO: handle replicas
|
|
637
|
+
self._clear_tv_cache(TableVersionKey(tbl_id, None, None))
|
|
638
|
+
|
|
639
|
+
def _finalize_pending_ops(self, tbl_id: UUID) -> None:
|
|
640
|
+
"""Finalizes all pending ops for the given table."""
|
|
641
|
+
num_retries = 0
|
|
642
|
+
while True:
|
|
643
|
+
try:
|
|
644
|
+
tbl_version: int
|
|
645
|
+
op: TableOp | None = None
|
|
646
|
+
delete_next_op_stmt: sql.Delete
|
|
647
|
+
reset_state_stmt: sql.Update
|
|
648
|
+
with self.begin_xact(
|
|
649
|
+
tbl_id=tbl_id, for_write=True, convert_db_excs=False, finalize_pending_ops=False
|
|
650
|
+
) as conn:
|
|
651
|
+
q = (
|
|
652
|
+
sql.select(schema.Table.md, schema.PendingTableOp)
|
|
653
|
+
.select_from(schema.Table)
|
|
654
|
+
.join(schema.PendingTableOp)
|
|
655
|
+
.where(schema.Table.id == tbl_id)
|
|
656
|
+
.where(schema.PendingTableOp.tbl_id == tbl_id)
|
|
657
|
+
.order_by(schema.PendingTableOp.op_sn)
|
|
658
|
+
.limit(1)
|
|
659
|
+
.with_for_update()
|
|
660
|
+
)
|
|
661
|
+
row = conn.execute(q).one_or_none()
|
|
662
|
+
if row is None:
|
|
663
|
+
return
|
|
664
|
+
view_md = row.md.get('view_md')
|
|
665
|
+
is_snapshot = False if view_md is None else view_md.get('is_snapshot')
|
|
666
|
+
assert is_snapshot is not None
|
|
667
|
+
tbl_version = row.md.get('current_version') if is_snapshot else None
|
|
668
|
+
op = schema.md_from_dict(TableOp, row.op)
|
|
669
|
+
delete_next_op_stmt = sql.delete(schema.PendingTableOp).where(
|
|
670
|
+
schema.PendingTableOp.tbl_id == tbl_id, schema.PendingTableOp.op_sn == row.op_sn
|
|
671
|
+
)
|
|
672
|
+
reset_state_stmt = (
|
|
673
|
+
sql.update(schema.Table)
|
|
674
|
+
.where(schema.Table.id == tbl_id)
|
|
675
|
+
.values(
|
|
676
|
+
md=schema.Table.md.op('||')(
|
|
677
|
+
{'tbl_state': schema.TableState.LIVE.value, 'pending_stmt': None}
|
|
678
|
+
)
|
|
679
|
+
)
|
|
680
|
+
)
|
|
681
|
+
_logger.debug(f'finalize_pending_ops({tbl_id}): finalizing op {op!s}')
|
|
682
|
+
|
|
683
|
+
if op.needs_xact:
|
|
684
|
+
if op.delete_table_md_op is not None:
|
|
685
|
+
self.delete_tbl_md(tbl_id)
|
|
686
|
+
else:
|
|
687
|
+
tv = self.get_tbl_version(
|
|
688
|
+
TableVersionKey(tbl_id, tbl_version, None),
|
|
689
|
+
check_pending_ops=False,
|
|
690
|
+
validate_initialized=True,
|
|
691
|
+
)
|
|
692
|
+
# TODO: The above TableVersionKey instance will need to be updated if we see a replica here.
|
|
693
|
+
# For now, just assert that we don't.
|
|
694
|
+
assert not tv.is_replica
|
|
695
|
+
tv.exec_op(op)
|
|
696
|
+
|
|
697
|
+
conn.execute(delete_next_op_stmt)
|
|
698
|
+
if op.op_sn == op.num_ops - 1:
|
|
699
|
+
conn.execute(reset_state_stmt)
|
|
700
|
+
return
|
|
701
|
+
continue
|
|
702
|
+
|
|
703
|
+
# this op runs outside of a transaction
|
|
704
|
+
tv = self.get_tbl_version(
|
|
705
|
+
TableVersionKey(tbl_id, tbl_version, None), check_pending_ops=False, validate_initialized=True
|
|
706
|
+
)
|
|
707
|
+
tv.exec_op(op)
|
|
708
|
+
with self.begin_xact(
|
|
709
|
+
tbl_id=tbl_id, for_write=True, convert_db_excs=False, finalize_pending_ops=False
|
|
710
|
+
) as conn:
|
|
711
|
+
conn.execute(delete_next_op_stmt)
|
|
712
|
+
if op.op_sn == op.num_ops - 1:
|
|
713
|
+
conn.execute(reset_state_stmt)
|
|
714
|
+
return
|
|
715
|
+
|
|
716
|
+
except (sql_exc.DBAPIError, sql_exc.OperationalError) as e:
|
|
717
|
+
# TODO: why are we still seeing these here, instead of them getting taken care of by the retry
|
|
718
|
+
# logic of begin_xact()?
|
|
719
|
+
if isinstance(e.orig, (psycopg.errors.SerializationFailure, psycopg.errors.LockNotAvailable)):
|
|
720
|
+
num_retries += 1
|
|
721
|
+
log_msg: str
|
|
722
|
+
if op is not None:
|
|
723
|
+
log_msg = f'finalize_pending_ops(): retrying ({num_retries}) op {op!s} after {type(e.orig)}'
|
|
724
|
+
else:
|
|
725
|
+
log_msg = f'finalize_pending_ops(): retrying ({num_retries}) after {type(e.orig)}'
|
|
726
|
+
Env.get().console_logger.debug(log_msg)
|
|
727
|
+
time.sleep(random.uniform(0.1, 0.5))
|
|
728
|
+
continue
|
|
729
|
+
else:
|
|
730
|
+
raise
|
|
731
|
+
except Exception as e:
|
|
732
|
+
Env.get().console_logger.debug(f'finalize_pending_ops(): caught {e}')
|
|
733
|
+
raise
|
|
734
|
+
|
|
735
|
+
num_retries = 0
|
|
736
|
+
|
|
737
|
+
def _debug_str(self) -> str:
|
|
738
|
+
tv_str = '\n'.join(str(k) for k in self._tbl_versions)
|
|
739
|
+
tbl_str = '\n'.join(str(k) for k in self._tbls)
|
|
740
|
+
return f'tbl_versions:\n{tv_str}\ntbls:\n{tbl_str}'
|
|
741
|
+
|
|
742
|
+
def _get_mutable_tree(self, tbl_id: UUID) -> set[UUID]:
|
|
743
|
+
"""Returns ids of all tables that form the tree of mutable views starting at tbl_id; includes the root."""
|
|
744
|
+
key = TableVersionKey(tbl_id, None, None)
|
|
745
|
+
assert key in self._tbl_versions, f'{key} not in {self._tbl_versions.keys()}\n{self._debug_str()}'
|
|
746
|
+
tv = self.get_tbl_version(key, validate_initialized=True)
|
|
747
|
+
assert not tv.is_replica
|
|
748
|
+
result: set[UUID] = {tv.id}
|
|
749
|
+
for view in tv.mutable_views:
|
|
750
|
+
result.update(self._get_mutable_tree(view.id))
|
|
751
|
+
return result
|
|
752
|
+
|
|
753
|
+
def _compute_column_dependents(self, mutable_tree: set[UUID]) -> None:
|
|
754
|
+
"""Populate self._column_dependents for all tables in mutable_tree"""
|
|
755
|
+
assert self._column_dependents is None
|
|
756
|
+
self._column_dependents = defaultdict(set)
|
|
757
|
+
for tbl_id in mutable_tree:
|
|
758
|
+
assert tbl_id in self._column_dependencies, (
|
|
759
|
+
f'{tbl_id} not in {self._column_dependencies.keys()}\n{self._debug_str()}'
|
|
760
|
+
)
|
|
761
|
+
for col, dependencies in self._column_dependencies[tbl_id].items():
|
|
762
|
+
for dependency in dependencies:
|
|
763
|
+
if dependency.tbl_id not in mutable_tree:
|
|
764
|
+
continue
|
|
765
|
+
dependents = self._column_dependents[dependency]
|
|
766
|
+
dependents.add(col)
|
|
767
|
+
|
|
768
|
+
def record_column_dependencies(self, tbl_version: TableVersion) -> None:
|
|
769
|
+
"""Update self._column_dependencies. Only valid for mutable versions."""
|
|
770
|
+
from pixeltable.exprs import Expr
|
|
771
|
+
|
|
772
|
+
assert tbl_version.is_mutable
|
|
773
|
+
dependencies: dict[QColumnId, set[QColumnId]] = {}
|
|
774
|
+
for col in tbl_version.cols_by_id.values():
|
|
775
|
+
if col.value_expr_dict is None:
|
|
776
|
+
continue
|
|
777
|
+
dependencies[QColumnId(tbl_version.id, col.id)] = Expr.get_refd_column_ids(col.value_expr_dict)
|
|
778
|
+
self._column_dependencies[tbl_version.id] = dependencies
|
|
779
|
+
|
|
780
|
+
def get_column_dependents(self, tbl_id: UUID, col_id: int) -> set[Column]:
|
|
781
|
+
"""Return all Columns that transitively depend on the given column."""
|
|
782
|
+
assert self._column_dependents is not None
|
|
783
|
+
dependents = self._column_dependents[QColumnId(tbl_id, col_id)]
|
|
784
|
+
result: set[Column] = set()
|
|
785
|
+
for dependent in dependents:
|
|
786
|
+
tv = self.get_tbl_version(TableVersionKey(dependent.tbl_id, None, None), validate_initialized=True)
|
|
787
|
+
col = tv.cols_by_id[dependent.col_id]
|
|
788
|
+
result.add(col)
|
|
789
|
+
return result
|
|
790
|
+
|
|
791
|
+
def _acquire_dir_xlock(
|
|
792
|
+
self, *, parent_id: UUID | None = None, dir_id: UUID | None = None, dir_name: str | None = None
|
|
793
|
+
) -> None:
|
|
794
|
+
"""Force acquisition of an X-lock on a Dir record via a blind update.
|
|
795
|
+
|
|
796
|
+
If dir_id is present, then all other conditions are ignored.
|
|
797
|
+
Note that (parent_id==None) is a valid where condition.
|
|
798
|
+
If dir_id is not specified, the user from the environment is added to the directory filters.
|
|
799
|
+
"""
|
|
800
|
+
assert (dir_name is None) != (dir_id is None)
|
|
801
|
+
assert not (parent_id is not None and dir_name is None)
|
|
802
|
+
user = Env.get().user
|
|
803
|
+
assert self._in_write_xact
|
|
804
|
+
q = sql.update(schema.Dir).values(lock_dummy=1)
|
|
805
|
+
if dir_id is not None:
|
|
806
|
+
q = q.where(schema.Dir.id == dir_id)
|
|
807
|
+
else:
|
|
808
|
+
q = q.where(schema.Dir.parent_id == parent_id)
|
|
809
|
+
if dir_name is not None:
|
|
810
|
+
q = q.where(schema.Dir.md['name'].astext == dir_name)
|
|
811
|
+
if user is not None:
|
|
812
|
+
q = q.where(schema.Dir.md['user'].astext == user)
|
|
813
|
+
Env.get().conn.execute(q)
|
|
814
|
+
|
|
815
|
+
def get_dir_path(self, dir_id: UUID) -> Path:
|
|
816
|
+
"""Return path for directory with given id"""
|
|
817
|
+
assert isinstance(dir_id, UUID)
|
|
818
|
+
conn = Env.get().conn
|
|
819
|
+
names: list[str] = []
|
|
820
|
+
while True:
|
|
821
|
+
q = sql.select(schema.Dir).where(schema.Dir.id == dir_id)
|
|
822
|
+
row = conn.execute(q).one()
|
|
823
|
+
dir = schema.Dir(**row._mapping)
|
|
824
|
+
if dir.md['name'] == '':
|
|
825
|
+
break
|
|
826
|
+
names.insert(0, dir.md['name'])
|
|
827
|
+
dir_id = dir.parent_id
|
|
828
|
+
return Path.parse('.'.join(names), allow_empty_path=True, allow_system_path=True)
|
|
829
|
+
|
|
830
|
+
@dataclasses.dataclass
|
|
831
|
+
class DirEntry:
|
|
832
|
+
dir: schema.Dir | None
|
|
833
|
+
dir_entries: dict[str, Catalog.DirEntry]
|
|
834
|
+
table: schema.Table | None
|
|
835
|
+
|
|
836
|
+
@retry_loop(for_write=False)
|
|
837
|
+
def get_dir_contents(self, dir_path: Path, recursive: bool = False) -> dict[str, DirEntry]:
|
|
838
|
+
dir = self._get_schema_object(dir_path, expected=Dir, raise_if_not_exists=True)
|
|
839
|
+
return self._get_dir_contents(dir._id, recursive=recursive)
|
|
840
|
+
|
|
841
|
+
def _get_dir_contents(self, dir_id: UUID, recursive: bool = False) -> dict[str, DirEntry]:
|
|
842
|
+
"""Returns a dict mapping the entry names to DirEntry objects"""
|
|
843
|
+
conn = Env.get().conn
|
|
844
|
+
result: dict[str, Catalog.DirEntry] = {}
|
|
845
|
+
|
|
846
|
+
q = sql.select(schema.Dir).where(schema.Dir.parent_id == dir_id)
|
|
847
|
+
rows = conn.execute(q).all()
|
|
848
|
+
for row in rows:
|
|
849
|
+
dir = schema.Dir(**row._mapping)
|
|
850
|
+
dir_contents: dict[str, Catalog.DirEntry] = {}
|
|
851
|
+
if recursive:
|
|
852
|
+
dir_contents = self._get_dir_contents(dir.id, recursive=True)
|
|
853
|
+
result[dir.md['name']] = self.DirEntry(dir=dir, dir_entries=dir_contents, table=None)
|
|
854
|
+
|
|
855
|
+
q = sql.select(schema.Table).where(self._active_tbl_clause(dir_id=dir_id))
|
|
856
|
+
rows = conn.execute(q).all()
|
|
857
|
+
for row in rows:
|
|
858
|
+
tbl = schema.Table(**row._mapping)
|
|
859
|
+
result[tbl.md['name']] = self.DirEntry(dir=None, dir_entries={}, table=tbl)
|
|
860
|
+
|
|
861
|
+
return result
|
|
862
|
+
|
|
863
|
+
@retry_loop(for_write=True)
|
|
864
|
+
def move(self, path: Path, new_path: Path, if_exists: IfExistsParam, if_not_exists: IfNotExistsParam) -> None:
|
|
865
|
+
self._move(path, new_path, if_exists, if_not_exists)
|
|
866
|
+
|
|
867
|
+
def _move(self, path: Path, new_path: Path, if_exists: IfExistsParam, if_not_exists: IfNotExistsParam) -> None:
|
|
868
|
+
dest_obj, dest_dir, src_obj = self._prepare_dir_op(
|
|
869
|
+
add_dir_path=new_path.parent,
|
|
870
|
+
add_name=new_path.name,
|
|
871
|
+
drop_dir_path=path.parent,
|
|
872
|
+
drop_name=path.name,
|
|
873
|
+
raise_if_exists=(if_exists == IfExistsParam.ERROR),
|
|
874
|
+
raise_if_not_exists=(if_not_exists == IfNotExistsParam.ERROR),
|
|
875
|
+
)
|
|
876
|
+
assert dest_obj is None or if_exists == IfExistsParam.IGNORE
|
|
877
|
+
assert src_obj is not None or if_not_exists == IfNotExistsParam.IGNORE
|
|
878
|
+
if dest_obj is None and src_obj is not None:
|
|
879
|
+
# If dest_obj is not None, it means `if_exists='ignore'` and the destination already exists.
|
|
880
|
+
# If src_obj is None, it means `if_not_exists='ignore'` and the source doesn't exist.
|
|
881
|
+
# If dest_obj is None and src_obj is not None, then we can proceed with the move.
|
|
882
|
+
src_obj._move(new_path.name, dest_dir._id)
|
|
883
|
+
|
|
884
|
+
def _prepare_dir_op(
|
|
885
|
+
self,
|
|
886
|
+
add_dir_path: Path | None = None,
|
|
887
|
+
add_name: str | None = None,
|
|
888
|
+
drop_dir_path: Path | None = None,
|
|
889
|
+
drop_name: str | None = None,
|
|
890
|
+
drop_expected: type[SchemaObject] | None = None,
|
|
891
|
+
raise_if_exists: bool = False,
|
|
892
|
+
raise_if_not_exists: bool = False,
|
|
893
|
+
) -> tuple[SchemaObject | None, Dir | None, SchemaObject | None]:
|
|
894
|
+
"""
|
|
895
|
+
Validates paths and acquires locks needed for a directory operation, ie, add/drop/rename (add + drop) of a
|
|
896
|
+
directory entry.
|
|
897
|
+
|
|
898
|
+
The target entry is either a table or directory. The directory operation can include
|
|
899
|
+
- adding an entry (<add_dir_path>.<add_name>)
|
|
900
|
+
- dropping an entry (<drop_dir_path>.<drop_name>)
|
|
901
|
+
|
|
902
|
+
Returns: (existing SchemaObject of add path, Dir of add path, existing SchemaObject of drop path)
|
|
903
|
+
|
|
904
|
+
Locking protocol:
|
|
905
|
+
- X locks on the immediate parent directories of the added/dropped entries; this prevents concurrent
|
|
906
|
+
modifications of the parent
|
|
907
|
+
- lock parent before child
|
|
908
|
+
- if both add and drop (= two directories are involved), lock the directories in a pre-determined order
|
|
909
|
+
(in this case, by name) in order to prevent deadlocks between concurrent directory modifications
|
|
910
|
+
"""
|
|
911
|
+
assert drop_expected in (None, Table, Dir), drop_expected
|
|
912
|
+
assert (add_dir_path is None) == (add_name is None)
|
|
913
|
+
assert (drop_dir_path is None) == (drop_name is None)
|
|
914
|
+
dir_paths: set[Path] = set()
|
|
915
|
+
if add_dir_path is not None:
|
|
916
|
+
dir_paths.add(add_dir_path)
|
|
917
|
+
if drop_dir_path is not None:
|
|
918
|
+
dir_paths.add(drop_dir_path)
|
|
919
|
+
|
|
920
|
+
add_dir: schema.Dir | None = None
|
|
921
|
+
drop_dir: schema.Dir | None = None
|
|
922
|
+
for p in sorted(dir_paths):
|
|
923
|
+
dir = self._get_dir(p, lock_dir=True)
|
|
924
|
+
if dir is None:
|
|
925
|
+
# Dir does not exist; raise an appropriate error.
|
|
926
|
+
if add_dir_path is not None or add_name is not None:
|
|
927
|
+
raise excs.Error(f'Directory {p!r} does not exist. Create it first with:\npxt.create_dir({p!r})')
|
|
928
|
+
else:
|
|
929
|
+
raise excs.Error(f'Directory {p!r} does not exist.')
|
|
930
|
+
if p == add_dir_path:
|
|
931
|
+
add_dir = dir
|
|
932
|
+
if p == drop_dir_path:
|
|
933
|
+
drop_dir = dir
|
|
934
|
+
|
|
935
|
+
add_obj: SchemaObject | None = None
|
|
936
|
+
if add_dir is not None:
|
|
937
|
+
add_obj = self._get_dir_entry(add_dir.id, add_name, lock_entry=True)
|
|
938
|
+
if add_obj is not None and raise_if_exists:
|
|
939
|
+
add_path = add_dir_path.append(add_name)
|
|
940
|
+
raise excs.Error(f'Path {add_path!r} already exists.')
|
|
941
|
+
|
|
942
|
+
drop_obj: SchemaObject | None = None
|
|
943
|
+
if drop_dir is not None:
|
|
944
|
+
drop_path = drop_dir_path.append(drop_name)
|
|
945
|
+
drop_obj = self._get_dir_entry(drop_dir.id, drop_name, lock_entry=True)
|
|
946
|
+
if drop_obj is None and raise_if_not_exists:
|
|
947
|
+
raise excs.Error(f'Path {drop_path!r} does not exist.')
|
|
948
|
+
if drop_obj is not None and drop_expected is not None and not isinstance(drop_obj, drop_expected):
|
|
949
|
+
expected_name = 'table' if drop_expected is Table else 'directory'
|
|
950
|
+
raise excs.Error(f'{drop_path!r} needs to be a {expected_name} but is a {drop_obj._display_name()}')
|
|
951
|
+
|
|
952
|
+
add_dir_obj = Dir(add_dir.id, add_dir.parent_id, add_dir.md['name']) if add_dir is not None else None
|
|
953
|
+
return add_obj, add_dir_obj, drop_obj
|
|
954
|
+
|
|
955
|
+
def _get_dir_entry(
|
|
956
|
+
self, dir_id: UUID, name: str, version: int | None = None, lock_entry: bool = False
|
|
957
|
+
) -> SchemaObject | None:
|
|
958
|
+
user = Env.get().user
|
|
959
|
+
conn = Env.get().conn
|
|
960
|
+
|
|
961
|
+
# check for subdirectory
|
|
962
|
+
if lock_entry:
|
|
963
|
+
self._acquire_dir_xlock(parent_id=dir_id, dir_id=None, dir_name=name)
|
|
964
|
+
q = sql.select(schema.Dir).where(
|
|
965
|
+
schema.Dir.parent_id == dir_id, schema.Dir.md['name'].astext == name, schema.Dir.md['user'].astext == user
|
|
966
|
+
)
|
|
967
|
+
rows = conn.execute(q).all()
|
|
968
|
+
# The condition below can occur if there is a synchronization failure across multiple processes
|
|
969
|
+
# It indicates database inconsistency.
|
|
970
|
+
if len(rows) > 1:
|
|
971
|
+
raise AssertionError(rows)
|
|
972
|
+
if len(rows) == 1:
|
|
973
|
+
dir_record = schema.Dir(**rows[0]._mapping)
|
|
974
|
+
return Dir(dir_record.id, dir_record.parent_id, name)
|
|
975
|
+
|
|
976
|
+
# check for table
|
|
977
|
+
if lock_entry:
|
|
978
|
+
self._acquire_tbl_lock(for_write=True, dir_id=dir_id, raise_if_not_exists=False, tbl_name=name)
|
|
979
|
+
q = sql.select(schema.Table.id).where(
|
|
980
|
+
self._active_tbl_clause(dir_id=dir_id, tbl_name=name), schema.Table.md['user'].astext == user
|
|
981
|
+
)
|
|
982
|
+
tbl_id = conn.execute(q).scalars().all()
|
|
983
|
+
assert len(tbl_id) <= 1, name
|
|
984
|
+
if len(tbl_id) == 1:
|
|
985
|
+
return self.get_table_by_id(tbl_id[0], version)
|
|
986
|
+
|
|
987
|
+
return None
|
|
988
|
+
|
|
989
|
+
def _get_schema_object(
|
|
990
|
+
self,
|
|
991
|
+
path: Path,
|
|
992
|
+
expected: type[SchemaObject] | None = None,
|
|
993
|
+
raise_if_exists: bool = False,
|
|
994
|
+
raise_if_not_exists: bool = False,
|
|
995
|
+
lock_parent: bool = False,
|
|
996
|
+
lock_obj: bool = False,
|
|
997
|
+
) -> SchemaObject | None:
|
|
998
|
+
"""Return the schema object at the given path, or None if it doesn't exist.
|
|
999
|
+
|
|
1000
|
+
Raises Error if
|
|
1001
|
+
- the parent directory doesn't exist
|
|
1002
|
+
- raise_if_exists is True and the path exists
|
|
1003
|
+
- raise_if_not_exists is True and the path does not exist
|
|
1004
|
+
- expected is not None and the existing object has a different type
|
|
1005
|
+
"""
|
|
1006
|
+
assert expected in (None, Table, Dir), expected
|
|
1007
|
+
|
|
1008
|
+
if path.is_root:
|
|
1009
|
+
# the root dir
|
|
1010
|
+
if expected is not None and expected is not Dir:
|
|
1011
|
+
raise excs.Error(f'{path!r} needs to be a table but is a dir')
|
|
1012
|
+
dir = self._get_dir(path, lock_dir=lock_obj)
|
|
1013
|
+
if dir is None:
|
|
1014
|
+
raise excs.Error(f'Unknown user: {Env.get().user}')
|
|
1015
|
+
return Dir(dir.id, dir.parent_id, dir.md['name'])
|
|
1016
|
+
|
|
1017
|
+
parent_path = path.parent
|
|
1018
|
+
parent_dir = self._get_dir(parent_path, lock_dir=lock_parent)
|
|
1019
|
+
if parent_dir is None:
|
|
1020
|
+
raise excs.Error(f'Directory {parent_path!r} does not exist.')
|
|
1021
|
+
obj = self._get_dir_entry(parent_dir.id, path.name, path.version, lock_entry=lock_obj)
|
|
1022
|
+
|
|
1023
|
+
if obj is None and raise_if_not_exists:
|
|
1024
|
+
raise excs.Error(f'Path {path!r} does not exist.')
|
|
1025
|
+
elif obj is not None and raise_if_exists:
|
|
1026
|
+
raise excs.Error(f'Path {path!r} is an existing {obj._display_name()}.')
|
|
1027
|
+
elif obj is not None and expected is not None and not isinstance(obj, expected):
|
|
1028
|
+
expected_name = 'table' if expected is Table else 'directory'
|
|
1029
|
+
raise excs.Error(f'{path!r} needs to be a {expected_name} but is a {obj._display_name()}.')
|
|
1030
|
+
return obj
|
|
1031
|
+
|
|
1032
|
+
def get_table_by_id(
|
|
1033
|
+
self, tbl_id: UUID, version: int | None = None, ignore_if_dropped: bool = False
|
|
1034
|
+
) -> Table | None:
|
|
1035
|
+
"""Must be executed inside a transaction. Might raise PendingTableOpsError."""
|
|
1036
|
+
if (tbl_id, version) not in self._tbls:
|
|
1037
|
+
if version is None:
|
|
1038
|
+
return self._load_tbl(tbl_id, ignore_pending_drop=ignore_if_dropped)
|
|
1039
|
+
else:
|
|
1040
|
+
return self._load_tbl_at_version(tbl_id, version)
|
|
1041
|
+
return self._tbls.get((tbl_id, version))
|
|
1042
|
+
|
|
1043
|
+
def create_table(
|
|
1044
|
+
self,
|
|
1045
|
+
path: Path,
|
|
1046
|
+
schema: dict[str, Any],
|
|
1047
|
+
if_exists: IfExistsParam,
|
|
1048
|
+
primary_key: list[str] | None,
|
|
1049
|
+
num_retained_versions: int,
|
|
1050
|
+
comment: str,
|
|
1051
|
+
media_validation: MediaValidation,
|
|
1052
|
+
create_default_idxs: bool,
|
|
1053
|
+
) -> tuple[Table, bool]:
|
|
1054
|
+
"""
|
|
1055
|
+
Creates a new InsertableTable at the given path.
|
|
1056
|
+
|
|
1057
|
+
If `if_exists == IfExistsParam.IGNORE` and a table `t` already exists at the given path, returns `t, False`.
|
|
1058
|
+
|
|
1059
|
+
Otherwise, creates a new table `t` and returns `t, True` (or raises an exception if the operation fails).
|
|
1060
|
+
"""
|
|
1061
|
+
|
|
1062
|
+
@retry_loop(for_write=True)
|
|
1063
|
+
def create_fn() -> tuple[UUID, bool]:
|
|
1064
|
+
import pixeltable.metadata.schema
|
|
1065
|
+
|
|
1066
|
+
existing = self._handle_path_collision(path, InsertableTable, False, if_exists)
|
|
1067
|
+
if existing is not None:
|
|
1068
|
+
assert isinstance(existing, Table)
|
|
1069
|
+
return existing._id, False
|
|
1070
|
+
|
|
1071
|
+
dir = self._get_schema_object(path.parent, expected=Dir, raise_if_not_exists=True)
|
|
1072
|
+
assert dir is not None
|
|
1073
|
+
|
|
1074
|
+
md, ops = InsertableTable._create(
|
|
1075
|
+
path.name,
|
|
1076
|
+
schema,
|
|
1077
|
+
primary_key=primary_key,
|
|
1078
|
+
num_retained_versions=num_retained_versions,
|
|
1079
|
+
comment=comment,
|
|
1080
|
+
media_validation=media_validation,
|
|
1081
|
+
create_default_idxs=create_default_idxs,
|
|
1082
|
+
)
|
|
1083
|
+
tbl_id = UUID(md.tbl_md.tbl_id)
|
|
1084
|
+
md.tbl_md.pending_stmt = pixeltable.metadata.schema.TableStatement.CREATE_TABLE
|
|
1085
|
+
self.write_tbl_md(tbl_id, dir._id, md.tbl_md, md.version_md, md.schema_version_md, ops)
|
|
1086
|
+
return tbl_id, True
|
|
1087
|
+
|
|
1088
|
+
self._roll_forward_ids.clear()
|
|
1089
|
+
tbl_id, is_created = create_fn()
|
|
1090
|
+
self._roll_forward()
|
|
1091
|
+
with self.begin_xact(tbl_id=tbl_id, for_write=True):
|
|
1092
|
+
tbl = self.get_table_by_id(tbl_id)
|
|
1093
|
+
_logger.info(f'Created table {tbl._name!r}, id={tbl._id}')
|
|
1094
|
+
Env.get().console_logger.info(f'Created table {tbl._name!r}.')
|
|
1095
|
+
return tbl, is_created
|
|
1096
|
+
|
|
1097
|
+
def create_view(
|
|
1098
|
+
self,
|
|
1099
|
+
path: Path,
|
|
1100
|
+
base: TableVersionPath,
|
|
1101
|
+
select_list: list[tuple[exprs.Expr, str | None]] | None,
|
|
1102
|
+
where: exprs.Expr | None,
|
|
1103
|
+
sample_clause: 'SampleClause' | None,
|
|
1104
|
+
additional_columns: dict[str, Any] | None,
|
|
1105
|
+
is_snapshot: bool,
|
|
1106
|
+
create_default_idxs: bool,
|
|
1107
|
+
iterator: tuple[type[ComponentIterator], dict[str, Any]] | None,
|
|
1108
|
+
num_retained_versions: int,
|
|
1109
|
+
comment: str,
|
|
1110
|
+
media_validation: MediaValidation,
|
|
1111
|
+
if_exists: IfExistsParam,
|
|
1112
|
+
) -> Table:
|
|
1113
|
+
@retry_loop(for_write=True)
|
|
1114
|
+
def create_fn() -> UUID:
|
|
1115
|
+
if not is_snapshot and base.is_mutable():
|
|
1116
|
+
# this is a mutable view of a mutable base; X-lock the base and advance its view_sn before adding
|
|
1117
|
+
# the view
|
|
1118
|
+
self._acquire_tbl_lock(tbl_id=base.tbl_id, for_write=True)
|
|
1119
|
+
base_tv = self.get_tbl_version(TableVersionKey(base.tbl_id, None, None), validate_initialized=True)
|
|
1120
|
+
base_tv.tbl_md.view_sn += 1
|
|
1121
|
+
result = Env.get().conn.execute(
|
|
1122
|
+
sql.update(schema.Table)
|
|
1123
|
+
.values({schema.Table.md: dataclasses.asdict(base_tv.tbl_md, dict_factory=md_dict_factory)})
|
|
1124
|
+
.where(schema.Table.id == base.tbl_id)
|
|
1125
|
+
)
|
|
1126
|
+
assert result.rowcount == 1, result.rowcount
|
|
1127
|
+
|
|
1128
|
+
existing = self._handle_path_collision(path, View, is_snapshot, if_exists, base=base)
|
|
1129
|
+
if existing is not None:
|
|
1130
|
+
assert isinstance(existing, View)
|
|
1131
|
+
return existing._id
|
|
1132
|
+
|
|
1133
|
+
dir = self._get_schema_object(path.parent, expected=Dir, raise_if_not_exists=True)
|
|
1134
|
+
assert dir is not None
|
|
1135
|
+
if iterator is None:
|
|
1136
|
+
iterator_class, iterator_args = None, None
|
|
1137
|
+
else:
|
|
1138
|
+
iterator_class, iterator_args = iterator
|
|
1139
|
+
md, ops = View._create(
|
|
1140
|
+
dir._id,
|
|
1141
|
+
path.name,
|
|
1142
|
+
base=base,
|
|
1143
|
+
select_list=select_list,
|
|
1144
|
+
additional_columns=additional_columns,
|
|
1145
|
+
predicate=where,
|
|
1146
|
+
sample_clause=sample_clause,
|
|
1147
|
+
is_snapshot=is_snapshot,
|
|
1148
|
+
create_default_idxs=create_default_idxs,
|
|
1149
|
+
iterator_cls=iterator_class,
|
|
1150
|
+
iterator_args=iterator_args,
|
|
1151
|
+
num_retained_versions=num_retained_versions,
|
|
1152
|
+
comment=comment,
|
|
1153
|
+
media_validation=media_validation,
|
|
1154
|
+
)
|
|
1155
|
+
tbl_id = UUID(md.tbl_md.tbl_id)
|
|
1156
|
+
md.tbl_md.pending_stmt = schema.TableStatement.CREATE_VIEW
|
|
1157
|
+
self.write_tbl_md(tbl_id, dir._id, md.tbl_md, md.version_md, md.schema_version_md, ops)
|
|
1158
|
+
return tbl_id
|
|
1159
|
+
|
|
1160
|
+
self._roll_forward_ids.clear()
|
|
1161
|
+
view_id = create_fn()
|
|
1162
|
+
if not is_snapshot and base.is_mutable():
|
|
1163
|
+
# invalidate base's TableVersion instance, so that it gets reloaded with the new mutable view
|
|
1164
|
+
self._clear_tv_cache(base.tbl_version.key)
|
|
1165
|
+
# base_tv = self.get_tbl_version(base.tbl_id, base.tbl_version.effective_version, validate_initialized=True)
|
|
1166
|
+
# view_handle = TableVersionHandle(view_id, effective_version=None)
|
|
1167
|
+
# base_tv.mutable_views.add(view_handle)
|
|
1168
|
+
|
|
1169
|
+
self._roll_forward()
|
|
1170
|
+
with self.begin_xact(tbl_id=view_id, for_write=True):
|
|
1171
|
+
return self.get_table_by_id(view_id)
|
|
1172
|
+
|
|
1173
|
+
def _clear_tv_cache(self, key: TableVersionKey) -> None:
|
|
1174
|
+
if key in self._tbl_versions:
|
|
1175
|
+
tv = self._tbl_versions[key]
|
|
1176
|
+
tv.is_validated = False
|
|
1177
|
+
del self._tbl_versions[key]
|
|
1178
|
+
|
|
1179
|
+
def create_replica(self, path: Path, md: list[TableVersionMd], create_store_tbls: bool = True) -> None:
|
|
1180
|
+
"""
|
|
1181
|
+
Creates table, table_version, and table_schema_version records for a replica with the given metadata.
|
|
1182
|
+
The metadata should be presented in standard "ancestor order", with the table being replicated at
|
|
1183
|
+
list position 0 and the (root) base table at list position -1.
|
|
1184
|
+
"""
|
|
1185
|
+
assert self.in_write_xact
|
|
1186
|
+
|
|
1187
|
+
# Acquire locks for any tables in the ancestor hierarchy that might already exist (base table first).
|
|
1188
|
+
for ancestor_md in md[::-1]: # base table first
|
|
1189
|
+
self._acquire_tbl_lock(for_write=True, tbl_id=UUID(ancestor_md.tbl_md.tbl_id), raise_if_not_exists=False)
|
|
1190
|
+
|
|
1191
|
+
tbl_id = UUID(md[0].tbl_md.tbl_id)
|
|
1192
|
+
|
|
1193
|
+
existing = self._handle_path_collision(path, Table, False, if_exists=IfExistsParam.IGNORE) # type: ignore[type-abstract]
|
|
1194
|
+
if existing is not None and existing._id != tbl_id:
|
|
1195
|
+
raise excs.Error(
|
|
1196
|
+
f'An attempt was made to create a replica table at {path!r}, '
|
|
1197
|
+
'but a different table already exists at that location.'
|
|
1198
|
+
)
|
|
1199
|
+
|
|
1200
|
+
# Ensure that the system directory exists.
|
|
1201
|
+
self.__ensure_system_dir_exists()
|
|
1202
|
+
|
|
1203
|
+
# Now check to see if this table already exists in the catalog.
|
|
1204
|
+
existing = self.get_table_by_id(tbl_id)
|
|
1205
|
+
if existing is not None:
|
|
1206
|
+
existing_path = Path.parse(existing._path(), allow_system_path=True)
|
|
1207
|
+
if existing_path != path and not existing_path.is_system_path:
|
|
1208
|
+
# It does exist, under a different path from the specified one.
|
|
1209
|
+
raise excs.Error(
|
|
1210
|
+
f'That table has already been replicated as {existing_path!r}.\n'
|
|
1211
|
+
f'Drop the existing replica if you wish to re-create it.'
|
|
1212
|
+
)
|
|
1213
|
+
|
|
1214
|
+
# Now store the metadata for this replica's proper ancestors. If one or more proper ancestors
|
|
1215
|
+
# do not yet exist in the store, they will be created as anonymous system tables.
|
|
1216
|
+
# We instantiate the ancestors starting with the base table and ending with the immediate parent of the
|
|
1217
|
+
# table being replicated.
|
|
1218
|
+
for ancestor_md in md[:0:-1]:
|
|
1219
|
+
ancestor_id = UUID(ancestor_md.tbl_md.tbl_id)
|
|
1220
|
+
replica = self.get_table_by_id(ancestor_id)
|
|
1221
|
+
replica_path: Path
|
|
1222
|
+
if replica is None:
|
|
1223
|
+
# We've never seen this table before. Create a new anonymous system table for it.
|
|
1224
|
+
replica_path = Path.parse(f'_system.replica_{ancestor_id.hex}', allow_system_path=True)
|
|
1225
|
+
else:
|
|
1226
|
+
# The table already exists in the catalog. The existing path might be a system path (if the table
|
|
1227
|
+
# was created as an anonymous base table of some other table), or it might not (if it's a snapshot
|
|
1228
|
+
# that was directly replicated by the user at some point). In either case, use the existing path.
|
|
1229
|
+
replica_path = Path.parse(replica._path(), allow_system_path=True)
|
|
1230
|
+
|
|
1231
|
+
# Store the metadata; it could be a new version (in which case a new record will be created), or a known
|
|
1232
|
+
# version (in which case the newly received metadata will be validated as identical).
|
|
1233
|
+
# If it's a new version, this will result in a new TableVersion record being created.
|
|
1234
|
+
self.__store_replica_md(replica_path, ancestor_md)
|
|
1235
|
+
|
|
1236
|
+
# Now we must clear cached metadata for the ancestor table, to force the next table operation to pick up
|
|
1237
|
+
# the new TableVersion instance. This is necessary because computed columns of descendant tables might
|
|
1238
|
+
# reference columns of the ancestor table that only exist in the new version.
|
|
1239
|
+
replica = self.get_table_by_id(ancestor_id)
|
|
1240
|
+
# assert replica is not None # If it didn't exist before, it must have been created by now.
|
|
1241
|
+
if replica is not None:
|
|
1242
|
+
replica._tbl_version_path.clear_cached_md()
|
|
1243
|
+
|
|
1244
|
+
# Store the metadata for the table being replicated; as before, it could be a new version or a known version.
|
|
1245
|
+
# If it's a new version, then a TableVersion record will be created, unless the table being replicated
|
|
1246
|
+
# is a pure snapshot.
|
|
1247
|
+
self.__store_replica_md(path, md[0], create_store_tbls)
|
|
1248
|
+
|
|
1249
|
+
# Finally, it's possible that the table already exists in the catalog, but as an anonymous system table that
|
|
1250
|
+
# was hidden the last time we checked (and that just became visible when the replica was imported). In this
|
|
1251
|
+
# case, we need to make the existing table visible by moving it to the specified path.
|
|
1252
|
+
# We need to do this at the end, since `existing_path` needs to first have a non-fragment table version in
|
|
1253
|
+
# order to be instantiated as a schema object.
|
|
1254
|
+
existing = self.get_table_by_id(tbl_id)
|
|
1255
|
+
assert existing is not None
|
|
1256
|
+
existing_path = Path.parse(existing._path(), allow_system_path=True)
|
|
1257
|
+
if existing_path != path:
|
|
1258
|
+
assert existing_path.is_system_path
|
|
1259
|
+
self._move(existing_path, path, IfExistsParam.ERROR, IfNotExistsParam.ERROR)
|
|
1260
|
+
|
|
1261
|
+
def __ensure_system_dir_exists(self) -> Dir:
|
|
1262
|
+
system_path = Path.parse('_system', allow_system_path=True)
|
|
1263
|
+
return self._create_dir(system_path, if_exists=IfExistsParam.IGNORE, parents=False)
|
|
1264
|
+
|
|
1265
|
+
def __store_replica_md(self, path: Path, md: TableVersionMd, create_store_tbl: bool = True) -> None:
|
|
1266
|
+
_logger.info(f'Creating replica table at {path!r} with ID: {md.tbl_md.tbl_id}')
|
|
1267
|
+
dir = self._get_schema_object(path.parent, expected=Dir, raise_if_not_exists=True)
|
|
1268
|
+
assert dir is not None
|
|
1269
|
+
assert self._in_write_xact
|
|
1270
|
+
|
|
1271
|
+
conn = Env.get().conn
|
|
1272
|
+
tbl_id = md.tbl_md.tbl_id
|
|
1273
|
+
|
|
1274
|
+
new_tbl_md: schema.TableMd | None = None
|
|
1275
|
+
new_version_md: schema.VersionMd | None = None
|
|
1276
|
+
new_schema_version_md: schema.SchemaVersionMd | None = None
|
|
1277
|
+
is_new_tbl_version: bool = False
|
|
1278
|
+
|
|
1279
|
+
# We need to ensure that the table metadata in the catalog always reflects the latest observed version of
|
|
1280
|
+
# this table. (In particular, if this is a base table, then its table metadata need to be consistent
|
|
1281
|
+
# with the latest version of this table having a replicated view somewhere in the catalog.)
|
|
1282
|
+
# TODO: handle concurrent drop() of an existing replica; if we just ignore that Table record here, we can end
|
|
1283
|
+
# up with a duplicate key violation; in principle, we should wait for the concurrent drop() to finish
|
|
1284
|
+
q: sql.Executable = sql.select(schema.Table.md).where(schema.Table.id == tbl_id)
|
|
1285
|
+
existing_md_row = conn.execute(q).one_or_none()
|
|
1286
|
+
|
|
1287
|
+
# Update md with the given name, current user, and is_replica flag.
|
|
1288
|
+
md = dataclasses.replace(
|
|
1289
|
+
md, tbl_md=dataclasses.replace(md.tbl_md, name=path.name, user=Env.get().user, is_replica=True)
|
|
1290
|
+
)
|
|
1291
|
+
if existing_md_row is None:
|
|
1292
|
+
# No existing table, so create a new record.
|
|
1293
|
+
q = sql.insert(schema.Table.__table__).values(
|
|
1294
|
+
id=tbl_id, dir_id=dir._id, md=dataclasses.asdict(md.tbl_md, dict_factory=md_dict_factory)
|
|
1295
|
+
)
|
|
1296
|
+
conn.execute(q)
|
|
1297
|
+
elif not existing_md_row.md['is_replica']:
|
|
1298
|
+
raise excs.Error(
|
|
1299
|
+
'An attempt was made to replicate a view whose base table already exists in the local catalog '
|
|
1300
|
+
'in its original form.\n'
|
|
1301
|
+
'If this is intentional, you must first drop the existing base table:\n'
|
|
1302
|
+
f' pxt.drop_table({str(path)!r})'
|
|
1303
|
+
)
|
|
1304
|
+
elif md.tbl_md.current_version > existing_md_row.md['current_version']:
|
|
1305
|
+
# New metadata is more recent than the metadata currently stored in the DB; we'll update the record
|
|
1306
|
+
# in place in the DB.
|
|
1307
|
+
new_tbl_md = md.tbl_md
|
|
1308
|
+
|
|
1309
|
+
# Now see if a TableVersion record already exists in the DB for this table version. If not, insert it. If
|
|
1310
|
+
# it already exists, check that the existing record is identical to the new one.
|
|
1311
|
+
q = (
|
|
1312
|
+
sql.select(schema.TableVersion.md)
|
|
1313
|
+
.where(schema.TableVersion.tbl_id == tbl_id)
|
|
1314
|
+
.where(schema.TableVersion.md['version'].cast(sql.Integer) == md.version_md.version)
|
|
1315
|
+
)
|
|
1316
|
+
existing_version_md_row = conn.execute(q).one_or_none()
|
|
1317
|
+
if existing_version_md_row is None:
|
|
1318
|
+
new_version_md = md.version_md
|
|
1319
|
+
is_new_tbl_version = True
|
|
1320
|
+
else:
|
|
1321
|
+
existing_version_md = schema.md_from_dict(schema.VersionMd, existing_version_md_row.md)
|
|
1322
|
+
# Validate that the existing metadata are identical to the new metadata, except is_fragment
|
|
1323
|
+
# and additional_md which may differ.
|
|
1324
|
+
if (
|
|
1325
|
+
dataclasses.replace(
|
|
1326
|
+
existing_version_md,
|
|
1327
|
+
is_fragment=md.version_md.is_fragment,
|
|
1328
|
+
additional_md=md.version_md.additional_md,
|
|
1329
|
+
)
|
|
1330
|
+
!= md.version_md
|
|
1331
|
+
):
|
|
1332
|
+
raise excs.Error(
|
|
1333
|
+
f'The version metadata for the replica {path!r}:{md.version_md.version} is inconsistent with '
|
|
1334
|
+
'the metadata recorded from a prior replica.\n'
|
|
1335
|
+
'This is likely due to data corruption in the replicated table.'
|
|
1336
|
+
)
|
|
1337
|
+
if existing_version_md.is_fragment and not md.version_md.is_fragment:
|
|
1338
|
+
# This version exists in the DB as a fragment, but we're importing a complete copy of the same version;
|
|
1339
|
+
# set the is_fragment flag to False in the DB.
|
|
1340
|
+
new_version_md = md.version_md
|
|
1341
|
+
|
|
1342
|
+
# Do the same thing for TableSchemaVersion.
|
|
1343
|
+
q = (
|
|
1344
|
+
sql.select(schema.TableSchemaVersion.md)
|
|
1345
|
+
.where(schema.TableSchemaVersion.tbl_id == tbl_id)
|
|
1346
|
+
.where(
|
|
1347
|
+
schema.TableSchemaVersion.md['schema_version'].cast(sql.Integer) == md.schema_version_md.schema_version
|
|
1348
|
+
)
|
|
1349
|
+
)
|
|
1350
|
+
existing_schema_version_md_row = conn.execute(q).one_or_none()
|
|
1351
|
+
if existing_schema_version_md_row is None:
|
|
1352
|
+
new_schema_version_md = md.schema_version_md
|
|
1353
|
+
else:
|
|
1354
|
+
existing_schema_version_md = schema.md_from_dict(schema.SchemaVersionMd, existing_schema_version_md_row.md)
|
|
1355
|
+
# Validate that the existing metadata are identical to the new metadata.
|
|
1356
|
+
if existing_schema_version_md != md.schema_version_md:
|
|
1357
|
+
raise excs.Error(
|
|
1358
|
+
f'The schema version metadata for the replica {path!r}:{md.schema_version_md.schema_version} '
|
|
1359
|
+
'is inconsistent with the metadata recorded from a prior replica.\n'
|
|
1360
|
+
'This is likely due to data corruption in the replicated table.'
|
|
1361
|
+
)
|
|
1362
|
+
|
|
1363
|
+
self.write_tbl_md(UUID(tbl_id), None, new_tbl_md, new_version_md, new_schema_version_md)
|
|
1364
|
+
|
|
1365
|
+
if is_new_tbl_version and not md.is_pure_snapshot:
|
|
1366
|
+
# It's a new version of a table that has a physical store, so we need to create a TableVersion instance.
|
|
1367
|
+
TableVersion.create_replica(md, create_store_tbl)
|
|
1368
|
+
|
|
1369
|
+
def get_additional_md(self, tbl_id: UUID) -> dict[str, Any]:
|
|
1370
|
+
"""Return the additional_md field of the given table."""
|
|
1371
|
+
assert Env.get().in_xact
|
|
1372
|
+
conn = Env.get().conn
|
|
1373
|
+
q = sql.select(schema.Table.additional_md).where(self._active_tbl_clause(tbl_id=tbl_id))
|
|
1374
|
+
# TODO: handle concurrent drop()
|
|
1375
|
+
row = conn.execute(q).one()
|
|
1376
|
+
assert isinstance(row[0], dict)
|
|
1377
|
+
return row[0]
|
|
1378
|
+
|
|
1379
|
+
def update_additional_md(self, tbl_id: UUID, additional_md: dict[str, Any]) -> None:
|
|
1380
|
+
"""
|
|
1381
|
+
Update the additional_md field of the given table. The new additional_md is merged with the
|
|
1382
|
+
existing one via a JSON dictionary merge, giving preference to the new values.
|
|
1383
|
+
"""
|
|
1384
|
+
assert self._in_write_xact
|
|
1385
|
+
conn = Env.get().conn
|
|
1386
|
+
q = (
|
|
1387
|
+
sql.update(schema.Table)
|
|
1388
|
+
.where(schema.Table.id == str(tbl_id))
|
|
1389
|
+
.values({schema.Table.additional_md: schema.Table.additional_md.op('||')(additional_md)})
|
|
1390
|
+
)
|
|
1391
|
+
result = conn.execute(q)
|
|
1392
|
+
assert result.rowcount == 1, result.rowcount
|
|
1393
|
+
|
|
1394
|
+
@retry_loop(for_write=False)
|
|
1395
|
+
def get_table(self, path: Path, if_not_exists: IfNotExistsParam) -> Table | None:
|
|
1396
|
+
obj = Catalog.get()._get_schema_object(
|
|
1397
|
+
path, expected=Table, raise_if_not_exists=(if_not_exists == IfNotExistsParam.ERROR)
|
|
1398
|
+
)
|
|
1399
|
+
if obj is None:
|
|
1400
|
+
_logger.info(f'Skipped table {path!r} (does not exist).')
|
|
1401
|
+
return None
|
|
1402
|
+
|
|
1403
|
+
assert isinstance(obj, Table)
|
|
1404
|
+
# We need to clear cached metadata from tbl_version_path, in case the schema has been changed
|
|
1405
|
+
# by another process.
|
|
1406
|
+
obj._tbl_version_path.clear_cached_md()
|
|
1407
|
+
return obj
|
|
1408
|
+
|
|
1409
|
+
def drop_table(self, path: Path, if_not_exists: IfNotExistsParam, force: bool) -> None:
|
|
1410
|
+
@retry_loop(for_write=True)
|
|
1411
|
+
def drop_fn() -> None:
|
|
1412
|
+
tbl = self._get_schema_object(
|
|
1413
|
+
path,
|
|
1414
|
+
expected=Table,
|
|
1415
|
+
raise_if_not_exists=(if_not_exists == IfNotExistsParam.ERROR and not force),
|
|
1416
|
+
lock_parent=True,
|
|
1417
|
+
lock_obj=False,
|
|
1418
|
+
)
|
|
1419
|
+
if tbl is None:
|
|
1420
|
+
_logger.info(f'Skipped table {path!r} (does not exist).')
|
|
1421
|
+
return
|
|
1422
|
+
assert isinstance(tbl, Table)
|
|
1423
|
+
|
|
1424
|
+
if isinstance(tbl, View) and tbl._tbl_version_path.is_mutable() and tbl._tbl_version_path.base.is_mutable():
|
|
1425
|
+
# this is a mutable view of a mutable base;
|
|
1426
|
+
# lock the base before the view, in order to avoid deadlocks with concurrent inserts/updates
|
|
1427
|
+
base_id = tbl._tbl_version_path.base.tbl_id
|
|
1428
|
+
self._acquire_tbl_lock(tbl_id=base_id, for_write=True, lock_mutable_tree=False)
|
|
1429
|
+
|
|
1430
|
+
self._drop_tbl(tbl, force=force, is_replace=False)
|
|
1431
|
+
|
|
1432
|
+
self._roll_forward_ids.clear()
|
|
1433
|
+
drop_fn()
|
|
1434
|
+
self._roll_forward()
|
|
1435
|
+
|
|
1436
|
+
def _drop_tbl(self, tbl: Table | TableVersionPath, force: bool, is_replace: bool) -> None:
|
|
1437
|
+
"""
|
|
1438
|
+
Drop the table (and recursively its views, if force == True).
|
|
1439
|
+
|
|
1440
|
+
`tbl` can be an instance of `Table` for a user table, or `TableVersionPath` for a hidden (system) table.
|
|
1441
|
+
|
|
1442
|
+
Returns:
|
|
1443
|
+
List of table ids that were dropped.
|
|
1444
|
+
|
|
1445
|
+
Locking protocol:
|
|
1446
|
+
- X-lock base before X-locking any view
|
|
1447
|
+
- deadlock-free wrt to TableVersion.insert() (insert propagation also proceeds top-down)
|
|
1448
|
+
- X-locks parent dir prior to calling TableVersion.drop(): prevent concurrent creation of another SchemaObject
|
|
1449
|
+
in the same directory with the same name (which could lead to duplicate names if we get aborted)
|
|
1450
|
+
"""
|
|
1451
|
+
is_pure_snapshot: bool
|
|
1452
|
+
if isinstance(tbl, TableVersionPath):
|
|
1453
|
+
tvp = tbl
|
|
1454
|
+
tbl_id = tvp.tbl_id
|
|
1455
|
+
tbl = None
|
|
1456
|
+
is_pure_snapshot = False
|
|
1457
|
+
else:
|
|
1458
|
+
tvp = tbl._tbl_version_path
|
|
1459
|
+
tbl_id = tbl._id
|
|
1460
|
+
is_pure_snapshot = tbl._tbl_version is None
|
|
1461
|
+
|
|
1462
|
+
if tbl is not None:
|
|
1463
|
+
self._acquire_dir_xlock(dir_id=tbl._dir_id)
|
|
1464
|
+
self._acquire_tbl_lock(tbl_id=tbl_id, for_write=True, lock_mutable_tree=False)
|
|
1465
|
+
|
|
1466
|
+
view_ids = self.get_view_ids(tbl_id, for_update=True)
|
|
1467
|
+
is_replica = tvp.is_replica()
|
|
1468
|
+
do_drop = True
|
|
1469
|
+
|
|
1470
|
+
_logger.debug(f'Preparing to drop table {tbl_id} (force={force!r}, is_replica={is_replica}).')
|
|
1471
|
+
|
|
1472
|
+
if len(view_ids) > 0:
|
|
1473
|
+
if force:
|
|
1474
|
+
# recursively drop views first
|
|
1475
|
+
for view_id in view_ids:
|
|
1476
|
+
view = self.get_table_by_id(view_id, ignore_if_dropped=True)
|
|
1477
|
+
if view is not None:
|
|
1478
|
+
self._drop_tbl(view, force=force, is_replace=is_replace)
|
|
1479
|
+
|
|
1480
|
+
elif is_replica:
|
|
1481
|
+
# Dropping a replica with dependents and no 'force': just rename it to be a hidden table;
|
|
1482
|
+
# the actual table will not be dropped.
|
|
1483
|
+
assert tbl is not None # can only occur for a user table
|
|
1484
|
+
system_dir = self.__ensure_system_dir_exists()
|
|
1485
|
+
new_name = f'replica_{tbl_id.hex}'
|
|
1486
|
+
_logger.debug(f'{tbl._path()!r} is a replica with dependents; renaming to {new_name!r}.')
|
|
1487
|
+
tbl._move(new_name, system_dir._id)
|
|
1488
|
+
do_drop = False # don't actually clear the catalog for this table
|
|
1489
|
+
|
|
1490
|
+
else:
|
|
1491
|
+
# It has dependents but is not a replica and no 'force', so it's an error to drop it.
|
|
1492
|
+
assert tbl is not None # can only occur for a user table
|
|
1493
|
+
msg: str
|
|
1494
|
+
if is_replace:
|
|
1495
|
+
msg = (
|
|
1496
|
+
f'{tbl._display_str()} already exists and has dependents. '
|
|
1497
|
+
"Use `if_exists='replace_force'` to replace it."
|
|
1498
|
+
)
|
|
1499
|
+
else:
|
|
1500
|
+
msg = f'{tbl._display_str()} has dependents.'
|
|
1501
|
+
raise excs.Error(msg)
|
|
1502
|
+
|
|
1503
|
+
# if this is a mutable view of a mutable base, advance the base's view_sn
|
|
1504
|
+
if isinstance(tbl, View) and tvp.is_mutable() and tvp.base.is_mutable():
|
|
1505
|
+
base_id = tvp.base.tbl_id
|
|
1506
|
+
base_tv = self.get_tbl_version(TableVersionKey(base_id, None, None), validate_initialized=True)
|
|
1507
|
+
base_tv.tbl_md.view_sn += 1
|
|
1508
|
+
result = Env.get().conn.execute(
|
|
1509
|
+
sql.update(schema.Table.__table__)
|
|
1510
|
+
.values({schema.Table.md: dataclasses.asdict(base_tv.tbl_md, dict_factory=md_dict_factory)})
|
|
1511
|
+
.where(schema.Table.id == base_id)
|
|
1512
|
+
)
|
|
1513
|
+
assert result.rowcount == 1, result.rowcount
|
|
1514
|
+
# force reload of base TV instance in order to make its state consistent with the stored metadata
|
|
1515
|
+
self._clear_tv_cache(base_tv.key)
|
|
1516
|
+
|
|
1517
|
+
if do_drop:
|
|
1518
|
+
if is_pure_snapshot:
|
|
1519
|
+
# there is no physical table, but we still need to delete the Table record; we can do that right now
|
|
1520
|
+
# as part of the current transaction
|
|
1521
|
+
self.delete_tbl_md(tbl_id)
|
|
1522
|
+
else:
|
|
1523
|
+
# invalidate the TableVersion instance when we're done so that existing references to it can find out it
|
|
1524
|
+
# has been dropped
|
|
1525
|
+
self.mark_modified_tvs(tvp.tbl_version)
|
|
1526
|
+
|
|
1527
|
+
# write TableOps to execute the drop, plus the updated Table record
|
|
1528
|
+
tv = tvp.tbl_version.get()
|
|
1529
|
+
tv.tbl_md.pending_stmt = schema.TableStatement.DROP_TABLE
|
|
1530
|
+
drop_ops = tv.drop()
|
|
1531
|
+
self.write_tbl_md(
|
|
1532
|
+
tv.id,
|
|
1533
|
+
dir_id=None,
|
|
1534
|
+
tbl_md=tv.tbl_md,
|
|
1535
|
+
version_md=None,
|
|
1536
|
+
schema_version_md=None,
|
|
1537
|
+
pending_ops=drop_ops,
|
|
1538
|
+
remove_from_dir=True,
|
|
1539
|
+
)
|
|
1540
|
+
|
|
1541
|
+
tvp.clear_cached_md()
|
|
1542
|
+
|
|
1543
|
+
assert (
|
|
1544
|
+
is_replica
|
|
1545
|
+
or (tbl_id, None) in self._tbls # non-replica tables must have an entry with effective_version=None
|
|
1546
|
+
)
|
|
1547
|
+
|
|
1548
|
+
# Remove visible Table references (we do this even for a replica that was just renamed).
|
|
1549
|
+
versions = [version for id, version in self._tbls if id == tbl_id]
|
|
1550
|
+
for version in versions:
|
|
1551
|
+
del self._tbls[tbl_id, version]
|
|
1552
|
+
|
|
1553
|
+
_logger.info(f'Dropped table {tbl_id if tbl is None else repr(tbl._path())}.')
|
|
1554
|
+
|
|
1555
|
+
if (
|
|
1556
|
+
is_replica # if this is a replica,
|
|
1557
|
+
and do_drop # and it was actually dropped (not just renamed),
|
|
1558
|
+
and tvp.base is not None # and it has a base table,
|
|
1559
|
+
):
|
|
1560
|
+
base_tbl = self.get_table_by_id(tvp.base.tbl_id)
|
|
1561
|
+
base_tbl_path = None if base_tbl is None else Path.parse(base_tbl._path(), allow_system_path=True)
|
|
1562
|
+
if (
|
|
1563
|
+
(base_tbl_path is None or base_tbl_path.is_system_path) # and the base table is hidden,
|
|
1564
|
+
and len(self.get_view_ids(tvp.base.tbl_id, for_update=True)) == 0 # and has no other dependents,
|
|
1565
|
+
):
|
|
1566
|
+
# then drop the base table as well (possibly recursively).
|
|
1567
|
+
_logger.debug(f'Dropping hidden base table {tvp.base.tbl_id} of dropped replica {tbl_id}.')
|
|
1568
|
+
# we just dropped the anchor on `tvp.base`; we need to clear the anchor so that we can actually
|
|
1569
|
+
# load the TableVersion instance in order to drop it
|
|
1570
|
+
self._drop_tbl(tvp.base.anchor_to(None), force=False, is_replace=False)
|
|
1571
|
+
|
|
1572
|
+
@retry_loop(for_write=True)
|
|
1573
|
+
def create_dir(self, path: Path, if_exists: IfExistsParam, parents: bool) -> Dir:
|
|
1574
|
+
return self._create_dir(path, if_exists, parents)
|
|
1575
|
+
|
|
1576
|
+
def _create_dir(self, path: Path, if_exists: IfExistsParam, parents: bool) -> Dir:
|
|
1577
|
+
# existing = self._handle_path_collision(path, Dir, False, if_exists)
|
|
1578
|
+
# if existing is not None:
|
|
1579
|
+
# assert isinstance(existing, Dir)
|
|
1580
|
+
# return existing
|
|
1581
|
+
#
|
|
1582
|
+
# parent = self._get_schema_object(path.parent)
|
|
1583
|
+
# assert parent is not None
|
|
1584
|
+
# dir = Dir._create(parent._id, path.name)
|
|
1585
|
+
# Env.get().console_logger.info(f'Created directory {path!r}.')
|
|
1586
|
+
# return dir
|
|
1587
|
+
|
|
1588
|
+
if parents:
|
|
1589
|
+
# start walking down from the root
|
|
1590
|
+
last_parent: SchemaObject | None = None
|
|
1591
|
+
for ancestor in path.ancestors():
|
|
1592
|
+
ancestor_obj = self._get_schema_object(ancestor, expected=Dir)
|
|
1593
|
+
assert ancestor_obj is not None or last_parent is not None
|
|
1594
|
+
last_parent = Dir._create(last_parent._id, ancestor.name) if ancestor_obj is None else ancestor_obj
|
|
1595
|
+
parent = last_parent
|
|
1596
|
+
else:
|
|
1597
|
+
parent = self._get_schema_object(path.parent)
|
|
1598
|
+
existing = self._handle_path_collision(path, Dir, False, if_exists)
|
|
1599
|
+
if existing is not None:
|
|
1600
|
+
assert isinstance(existing, Dir)
|
|
1601
|
+
return existing
|
|
1602
|
+
assert parent is not None
|
|
1603
|
+
dir = Dir._create(parent._id, path.name)
|
|
1604
|
+
Env.get().console_logger.info(f'Created directory {path!r}.')
|
|
1605
|
+
return dir
|
|
1606
|
+
|
|
1607
|
+
def drop_dir(self, path: Path, if_not_exists: IfNotExistsParam, force: bool) -> None:
|
|
1608
|
+
@retry_loop(for_write=True)
|
|
1609
|
+
def drop_fn() -> None:
|
|
1610
|
+
_, _, schema_obj = self._prepare_dir_op(
|
|
1611
|
+
drop_dir_path=path.parent,
|
|
1612
|
+
drop_name=path.name,
|
|
1613
|
+
drop_expected=Dir,
|
|
1614
|
+
raise_if_not_exists=if_not_exists == IfNotExistsParam.ERROR and not force,
|
|
1615
|
+
)
|
|
1616
|
+
if schema_obj is None:
|
|
1617
|
+
_logger.info(f'Directory {path!r} does not exist; skipped drop_dir().')
|
|
1618
|
+
return
|
|
1619
|
+
self._drop_dir(schema_obj._id, path, force=force)
|
|
1620
|
+
|
|
1621
|
+
self._roll_forward_ids.clear()
|
|
1622
|
+
drop_fn()
|
|
1623
|
+
self._roll_forward()
|
|
1624
|
+
|
|
1625
|
+
def _drop_dir(self, dir_id: UUID, dir_path: Path, force: bool = False) -> None:
|
|
1626
|
+
conn = Env.get().conn
|
|
1627
|
+
if not force:
|
|
1628
|
+
# check for existing entries
|
|
1629
|
+
q = sql.select(sql.func.count()).select_from(schema.Dir).where(schema.Dir.parent_id == dir_id)
|
|
1630
|
+
num_subdirs = conn.execute(q).scalar()
|
|
1631
|
+
q = sql.select(sql.func.count()).select_from(schema.Table).where(self._active_tbl_clause(dir_id=dir_id))
|
|
1632
|
+
num_tbls = conn.execute(q).scalar()
|
|
1633
|
+
if num_subdirs + num_tbls > 0:
|
|
1634
|
+
raise excs.Error(f'Directory {dir_path!r} is not empty.')
|
|
1635
|
+
|
|
1636
|
+
# drop existing subdirs
|
|
1637
|
+
self._acquire_dir_xlock(dir_id=dir_id)
|
|
1638
|
+
dir_q = sql.select(schema.Dir).where(schema.Dir.parent_id == dir_id)
|
|
1639
|
+
for row in conn.execute(dir_q).all():
|
|
1640
|
+
self._drop_dir(row.id, dir_path.append(row.md['name']), force=True)
|
|
1641
|
+
|
|
1642
|
+
# drop existing tables
|
|
1643
|
+
tbl_q = sql.select(schema.Table).where(self._active_tbl_clause(dir_id=dir_id)).with_for_update()
|
|
1644
|
+
for row in conn.execute(tbl_q).all():
|
|
1645
|
+
tbl = self.get_table_by_id(row.id, ignore_if_dropped=True)
|
|
1646
|
+
# this table would have been dropped already if it's a view of a base we dropped earlier
|
|
1647
|
+
if tbl is not None:
|
|
1648
|
+
self._drop_tbl(tbl, force=True, is_replace=False)
|
|
1649
|
+
|
|
1650
|
+
# self.drop_dir(dir_id)
|
|
1651
|
+
conn.execute(sql.delete(schema.Dir).where(schema.Dir.id == dir_id))
|
|
1652
|
+
_logger.info(f'Removed directory {dir_path!r}.')
|
|
1653
|
+
|
|
1654
|
+
def get_view_ids(self, tbl_id: UUID, for_update: bool = False) -> list[UUID]:
|
|
1655
|
+
"""Return the ids of views that directly reference the given table"""
|
|
1656
|
+
conn = Env.get().conn
|
|
1657
|
+
# check whether this table still exists
|
|
1658
|
+
q = sql.select(sql.func.count()).select_from(schema.Table).where(self._active_tbl_clause(tbl_id=tbl_id))
|
|
1659
|
+
tbl_count = conn.execute(q).scalar()
|
|
1660
|
+
if tbl_count == 0:
|
|
1661
|
+
raise excs.Error(self._dropped_tbl_error_msg(tbl_id))
|
|
1662
|
+
q = (
|
|
1663
|
+
sql.select(schema.Table.id)
|
|
1664
|
+
.where(schema.Table.md['view_md']['base_versions'][0][0].astext == tbl_id.hex)
|
|
1665
|
+
.where(self._active_tbl_clause())
|
|
1666
|
+
)
|
|
1667
|
+
if for_update:
|
|
1668
|
+
q = q.with_for_update()
|
|
1669
|
+
result = [r[0] for r in conn.execute(q).all()]
|
|
1670
|
+
return result
|
|
1671
|
+
|
|
1672
|
+
def get_tbl_version(
|
|
1673
|
+
self, key: TableVersionKey, *, check_pending_ops: bool = True, validate_initialized: bool = False
|
|
1674
|
+
) -> TableVersion | None:
|
|
1675
|
+
"""
|
|
1676
|
+
Returns the TableVersion instance for the given table and version and updates the cache.
|
|
1677
|
+
|
|
1678
|
+
If present in the cache and the instance isn't validated, validates version and view_sn against the stored
|
|
1679
|
+
metadata.
|
|
1680
|
+
"""
|
|
1681
|
+
# we need a transaction here, if we're not already in one; if this starts a new transaction,
|
|
1682
|
+
# the returned TableVersion instance will not be validated
|
|
1683
|
+
with self.begin_xact(for_write=False) as conn:
|
|
1684
|
+
tv = self._tbl_versions.get(key)
|
|
1685
|
+
if tv is None:
|
|
1686
|
+
tv = self._load_tbl_version(key, check_pending_ops=check_pending_ops)
|
|
1687
|
+
elif not tv.is_validated:
|
|
1688
|
+
# only live instances are invalidated
|
|
1689
|
+
assert key.effective_version is None
|
|
1690
|
+
# _logger.debug(f'validating metadata for table {tbl_id}:{tv.version} ({id(tv):x})')
|
|
1691
|
+
where_clause: sql.ColumnElement[bool]
|
|
1692
|
+
if check_pending_ops:
|
|
1693
|
+
# if we don't want to see pending ops, we also don't want to see dropped tables
|
|
1694
|
+
where_clause = self._active_tbl_clause(tbl_id=key.tbl_id)
|
|
1695
|
+
else:
|
|
1696
|
+
where_clause = schema.Table.id == key.tbl_id
|
|
1697
|
+
q = sql.select(schema.Table.md).where(where_clause)
|
|
1698
|
+
row = conn.execute(q).one_or_none()
|
|
1699
|
+
if row is None:
|
|
1700
|
+
raise excs.Error(self._dropped_tbl_error_msg(key.tbl_id))
|
|
1701
|
+
|
|
1702
|
+
reload = False
|
|
1703
|
+
|
|
1704
|
+
if tv.anchor_tbl_id is None:
|
|
1705
|
+
# live non-replica table; compare our cached TableMd.current_version/view_sn to what's stored
|
|
1706
|
+
q = sql.select(schema.Table.md).where(where_clause)
|
|
1707
|
+
row = conn.execute(q).one_or_none()
|
|
1708
|
+
if row is None:
|
|
1709
|
+
raise excs.Error(self._dropped_tbl_error_msg(key.tbl_id))
|
|
1710
|
+
current_version, view_sn = row.md['current_version'], row.md['view_sn']
|
|
1711
|
+
if current_version != tv.version or view_sn != tv.tbl_md.view_sn:
|
|
1712
|
+
_logger.debug(
|
|
1713
|
+
f'reloading metadata for live table {key.tbl_id} '
|
|
1714
|
+
f'(cached/current version: {tv.version}/{current_version}, '
|
|
1715
|
+
f'cached/current view_sn: {tv.tbl_md.view_sn}/{view_sn})'
|
|
1716
|
+
)
|
|
1717
|
+
reload = True
|
|
1718
|
+
|
|
1719
|
+
else:
|
|
1720
|
+
# live replica table; use the anchored version
|
|
1721
|
+
anchor_tbl_version_md = self.head_version_md(tv.anchor_tbl_id)
|
|
1722
|
+
assert anchor_tbl_version_md is not None
|
|
1723
|
+
q = sql.select(schema.TableVersion.md)
|
|
1724
|
+
if check_pending_ops:
|
|
1725
|
+
q = q.join(schema.Table, schema.Table.id == schema.TableVersion.tbl_id).where(
|
|
1726
|
+
self._active_tbl_clause(tbl_id=key.tbl_id)
|
|
1727
|
+
)
|
|
1728
|
+
q = (
|
|
1729
|
+
q.where(schema.TableVersion.tbl_id == key.tbl_id)
|
|
1730
|
+
.where(schema.TableVersion.md['created_at'].cast(sql.Float) <= anchor_tbl_version_md.created_at)
|
|
1731
|
+
.order_by(schema.TableVersion.md['created_at'].cast(sql.Float).desc())
|
|
1732
|
+
.limit(1)
|
|
1733
|
+
)
|
|
1734
|
+
row = conn.execute(q).one_or_none()
|
|
1735
|
+
if row is None:
|
|
1736
|
+
raise excs.Error(self._dropped_tbl_error_msg(key.tbl_id))
|
|
1737
|
+
version = row.md['version']
|
|
1738
|
+
if version != tv.version: # TODO: How will view_sn work for replicas?
|
|
1739
|
+
_logger.debug(
|
|
1740
|
+
f'reloading metadata for replica table {key.tbl_id} (anchor {key.anchor_tbl_id}) '
|
|
1741
|
+
f'(cached/anchored version: {tv.version}/{version})'
|
|
1742
|
+
)
|
|
1743
|
+
reload = True
|
|
1744
|
+
|
|
1745
|
+
# the stored version can be behind TableVersion.version, because we don't roll back the in-memory
|
|
1746
|
+
# metadata changes after a failed update operation
|
|
1747
|
+
if reload:
|
|
1748
|
+
# the cached metadata is invalid
|
|
1749
|
+
tv = self._load_tbl_version(key, check_pending_ops=check_pending_ops)
|
|
1750
|
+
else:
|
|
1751
|
+
# the cached metadata is valid
|
|
1752
|
+
tv.is_validated = True
|
|
1753
|
+
|
|
1754
|
+
assert tv.anchor_tbl_id == key.anchor_tbl_id
|
|
1755
|
+
assert tv.is_validated, f'{key} not validated\n{tv.__dict__}\n{self._debug_str()}'
|
|
1756
|
+
if validate_initialized:
|
|
1757
|
+
assert tv.is_initialized, f'{key} not initialized\n{tv.__dict__}\n{self._debug_str()}'
|
|
1758
|
+
return tv
|
|
1759
|
+
|
|
1760
|
+
def remove_tbl_version(self, key: TableVersionKey) -> None:
|
|
1761
|
+
assert isinstance(key, TableVersionKey)
|
|
1762
|
+
assert key in self._tbl_versions
|
|
1763
|
+
del self._tbl_versions[key]
|
|
1764
|
+
|
|
1765
|
+
def get_dir(self, dir_id: UUID, for_update: bool = False) -> Dir | None:
|
|
1766
|
+
"""Return the Dir with the given id, or None if it doesn't exist"""
|
|
1767
|
+
conn = Env.get().conn
|
|
1768
|
+
if for_update:
|
|
1769
|
+
self._acquire_dir_xlock(dir_id=dir_id)
|
|
1770
|
+
q = sql.select(schema.Dir).where(schema.Dir.id == dir_id)
|
|
1771
|
+
row = conn.execute(q).one_or_none()
|
|
1772
|
+
if row is None:
|
|
1773
|
+
return None
|
|
1774
|
+
dir_record = schema.Dir(**row._mapping)
|
|
1775
|
+
return Dir(dir_record.id, dir_record.parent_id, dir_record.md['name'])
|
|
1776
|
+
|
|
1777
|
+
def _get_dir(self, path: Path, lock_dir: bool = False) -> schema.Dir | None:
|
|
1778
|
+
"""
|
|
1779
|
+
lock_dir: if True, X-locks target (but not the ancestors)
|
|
1780
|
+
"""
|
|
1781
|
+
user = Env.get().user
|
|
1782
|
+
conn = Env.get().conn
|
|
1783
|
+
if path.is_root:
|
|
1784
|
+
if lock_dir:
|
|
1785
|
+
self._acquire_dir_xlock(dir_name='')
|
|
1786
|
+
q = sql.select(schema.Dir).where(schema.Dir.parent_id.is_(None), schema.Dir.md['user'].astext == user)
|
|
1787
|
+
row = conn.execute(q).one_or_none()
|
|
1788
|
+
return schema.Dir(**row._mapping) if row is not None else None
|
|
1789
|
+
else:
|
|
1790
|
+
parent_dir = self._get_dir(path.parent, lock_dir=False)
|
|
1791
|
+
if parent_dir is None:
|
|
1792
|
+
return None
|
|
1793
|
+
if lock_dir:
|
|
1794
|
+
self._acquire_dir_xlock(parent_id=parent_dir.id, dir_name=path.name)
|
|
1795
|
+
q = sql.select(schema.Dir).where(
|
|
1796
|
+
schema.Dir.parent_id == parent_dir.id,
|
|
1797
|
+
schema.Dir.md['name'].astext == path.name,
|
|
1798
|
+
schema.Dir.md['user'].astext == user,
|
|
1799
|
+
)
|
|
1800
|
+
row = conn.execute(q).one_or_none()
|
|
1801
|
+
return schema.Dir(**row._mapping) if row is not None else None
|
|
1802
|
+
|
|
1803
|
+
def _load_tbl(self, tbl_id: UUID, ignore_pending_drop: bool = False) -> Table | None:
|
|
1804
|
+
"""Loads metadata for the table with the given id and caches it."""
|
|
1805
|
+
from .insertable_table import InsertableTable
|
|
1806
|
+
from .view import View
|
|
1807
|
+
|
|
1808
|
+
assert tbl_id is not None
|
|
1809
|
+
_logger.info(f'Loading table {tbl_id}')
|
|
1810
|
+
|
|
1811
|
+
conn = Env.get().conn
|
|
1812
|
+
|
|
1813
|
+
if ignore_pending_drop:
|
|
1814
|
+
# check whether this table is in the process of being dropped
|
|
1815
|
+
q: sql.Executable = sql.select(schema.Table.md).where(schema.Table.id == tbl_id)
|
|
1816
|
+
row = conn.execute(q).one()
|
|
1817
|
+
if row.md['pending_stmt'] == schema.TableStatement.DROP_TABLE.value:
|
|
1818
|
+
return None
|
|
1819
|
+
|
|
1820
|
+
# check for pending ops
|
|
1821
|
+
q = sql.select(sql.func.count()).where(schema.PendingTableOp.tbl_id == tbl_id)
|
|
1822
|
+
has_pending_ops = conn.execute(q).scalar() > 0
|
|
1823
|
+
if has_pending_ops:
|
|
1824
|
+
raise PendingTableOpsError(tbl_id)
|
|
1825
|
+
|
|
1826
|
+
q = (
|
|
1827
|
+
sql.select(schema.Table, schema.TableSchemaVersion)
|
|
1828
|
+
.join(schema.TableSchemaVersion)
|
|
1829
|
+
.where(schema.Table.id == schema.TableSchemaVersion.tbl_id)
|
|
1830
|
+
.where(
|
|
1831
|
+
schema.Table.md['current_schema_version'].cast(sql.Integer) == schema.TableSchemaVersion.schema_version
|
|
1832
|
+
)
|
|
1833
|
+
.where(schema.Table.id == tbl_id)
|
|
1834
|
+
)
|
|
1835
|
+
row = conn.execute(q).one_or_none()
|
|
1836
|
+
if row is None:
|
|
1837
|
+
return None
|
|
1838
|
+
tbl_record, _ = _unpack_row(row, [schema.Table, schema.TableSchemaVersion])
|
|
1839
|
+
|
|
1840
|
+
tbl_md = schema.md_from_dict(schema.TableMd, tbl_record.md)
|
|
1841
|
+
view_md = tbl_md.view_md
|
|
1842
|
+
|
|
1843
|
+
if view_md is None and not tbl_md.is_replica:
|
|
1844
|
+
# this is a base, non-replica table
|
|
1845
|
+
key = TableVersionKey(tbl_id, None, None)
|
|
1846
|
+
if key not in self._tbl_versions:
|
|
1847
|
+
_ = self._load_tbl_version(key)
|
|
1848
|
+
tbl = InsertableTable(tbl_record.dir_id, TableVersionHandle(key))
|
|
1849
|
+
self._tbls[tbl_id, None] = tbl
|
|
1850
|
+
return tbl
|
|
1851
|
+
|
|
1852
|
+
# this is a view; determine the sequence of TableVersions to load
|
|
1853
|
+
tbl_version_path: list[tuple[UUID, int | None]] = []
|
|
1854
|
+
anchor_tbl_id = UUID(tbl_md.tbl_id) if tbl_md.is_replica else None
|
|
1855
|
+
if tbl_md.is_pure_snapshot:
|
|
1856
|
+
# this is a pure snapshot, without a physical table backing it; we only need the bases
|
|
1857
|
+
pass
|
|
1858
|
+
else:
|
|
1859
|
+
effective_version = (
|
|
1860
|
+
0 if view_md is not None and view_md.is_snapshot else None
|
|
1861
|
+
) # snapshots only have version 0
|
|
1862
|
+
tbl_version_path.append((tbl_id, effective_version))
|
|
1863
|
+
|
|
1864
|
+
if view_md is not None:
|
|
1865
|
+
tbl_version_path.extend((UUID(ancestor_id), version) for ancestor_id, version in view_md.base_versions)
|
|
1866
|
+
|
|
1867
|
+
if anchor_tbl_id is not None and self.head_version_md(anchor_tbl_id) is None:
|
|
1868
|
+
return None
|
|
1869
|
+
|
|
1870
|
+
# load TableVersions, starting at the root
|
|
1871
|
+
base_path: TableVersionPath | None = None
|
|
1872
|
+
view_path: TableVersionPath | None = None
|
|
1873
|
+
for id, effective_version in tbl_version_path[::-1]:
|
|
1874
|
+
# anchor the path elements that have effective_version == None
|
|
1875
|
+
key = TableVersionKey(id, effective_version, None if effective_version is not None else anchor_tbl_id)
|
|
1876
|
+
if key not in self._tbl_versions:
|
|
1877
|
+
_ = self._load_tbl_version(key)
|
|
1878
|
+
view_path = TableVersionPath(TableVersionHandle(key), base=base_path)
|
|
1879
|
+
base_path = view_path
|
|
1880
|
+
view = View(tbl_id, tbl_record.dir_id, tbl_md.name, view_path, snapshot_only=tbl_md.is_pure_snapshot)
|
|
1881
|
+
self._tbls[tbl_id, None] = view
|
|
1882
|
+
return view
|
|
1883
|
+
|
|
1884
|
+
def _load_tbl_at_version(self, tbl_id: UUID, version: int) -> Table | None:
|
|
1885
|
+
from .view import View
|
|
1886
|
+
|
|
1887
|
+
# Load the specified TableMd and TableVersionMd records from the db.
|
|
1888
|
+
conn = Env.get().conn
|
|
1889
|
+
q: sql.Executable = (
|
|
1890
|
+
sql.select(schema.Table, schema.TableVersion)
|
|
1891
|
+
.join(schema.TableVersion)
|
|
1892
|
+
.where(schema.Table.id == tbl_id)
|
|
1893
|
+
.where(schema.Table.id == schema.TableVersion.tbl_id)
|
|
1894
|
+
.where(schema.TableVersion.version == version)
|
|
1895
|
+
)
|
|
1896
|
+
row = conn.execute(q).one_or_none()
|
|
1897
|
+
if row is None:
|
|
1898
|
+
return None
|
|
1899
|
+
tbl_record, version_record = _unpack_row(row, [schema.Table, schema.TableVersion])
|
|
1900
|
+
tbl_md = schema.md_from_dict(schema.TableMd, tbl_record.md)
|
|
1901
|
+
version_md = schema.md_from_dict(schema.VersionMd, version_record.md)
|
|
1902
|
+
tvp = self.construct_tvp(tbl_id, version, tbl_md.ancestors, version_md.created_at)
|
|
1903
|
+
|
|
1904
|
+
view = View(tbl_id, tbl_record.dir_id, tbl_md.name, tvp, snapshot_only=True)
|
|
1905
|
+
self._tbls[tbl_id, version] = view
|
|
1906
|
+
return view
|
|
1907
|
+
|
|
1908
|
+
def construct_tvp(
|
|
1909
|
+
self, tbl_id: UUID, version: int, ancestors_of_live_tbl: schema.TableVersionPath, created_at: float
|
|
1910
|
+
) -> TableVersionPath:
|
|
1911
|
+
"""
|
|
1912
|
+
Construct the TableVersionPath for the specified version of the given table. Here `live_ancestors` is the
|
|
1913
|
+
list of ancestor table IDs and fixed versions (if any) from the table's metadata. The constructed
|
|
1914
|
+
TableVersionPath will preserve any fixed versions from `live_ancestors` (corresponding to a view-over-snapshot
|
|
1915
|
+
scenario), while "filling in" the implied versions for any `None` versions.
|
|
1916
|
+
"""
|
|
1917
|
+
# TODO: Currently, we reconstruct the ancestors by inspecting the created_at timestamps of the table and its
|
|
1918
|
+
# ancestors' versions. In the future, we should store the relevant TableVersionPaths in the database, so
|
|
1919
|
+
# that we don't need to rely on timestamps (which might be nondeterministic in distributed execution
|
|
1920
|
+
# scenarios).
|
|
1921
|
+
|
|
1922
|
+
assert Env.get().conn is not None
|
|
1923
|
+
|
|
1924
|
+
# Build the list of ancestor versions, starting with the given table and traversing back to the base table.
|
|
1925
|
+
# For each proper ancestor,
|
|
1926
|
+
# - If it's an ancestor with a fixed version (view-over-snapshot scenario), we keep the given fixed version.
|
|
1927
|
+
# - If it's an ancestor with a live (floating) version, we use the version whose created_at timestamp equals
|
|
1928
|
+
# or most nearly precedes the given TableVersion's created_at timestamp.
|
|
1929
|
+
ancestors: list[tuple[UUID, int]] = [(tbl_id, version)]
|
|
1930
|
+
for ancestor_id, ancestor_version in ancestors_of_live_tbl:
|
|
1931
|
+
if ancestor_version is not None:
|
|
1932
|
+
# fixed version; just use it
|
|
1933
|
+
ancestors.append((UUID(ancestor_id), ancestor_version))
|
|
1934
|
+
continue
|
|
1935
|
+
|
|
1936
|
+
q = (
|
|
1937
|
+
sql.select(schema.TableVersion)
|
|
1938
|
+
.where(schema.TableVersion.tbl_id == ancestor_id)
|
|
1939
|
+
.where(schema.TableVersion.md['created_at'].cast(sql.Float) <= created_at)
|
|
1940
|
+
.order_by(schema.TableVersion.md['created_at'].cast(sql.Float).desc())
|
|
1941
|
+
.limit(1)
|
|
1942
|
+
)
|
|
1943
|
+
row = Env.get().conn.execute(q).one_or_none()
|
|
1944
|
+
if row is None:
|
|
1945
|
+
# This can happen if an ancestor version is garbage collected; it can also happen in
|
|
1946
|
+
# rare circumstances involving table versions created specifically with Pixeltable 0.4.3.
|
|
1947
|
+
_logger.info(f'Ancestor {ancestor_id} not found for table {tbl_id}:{version}')
|
|
1948
|
+
raise excs.Error('The specified table version is no longer valid and cannot be retrieved.')
|
|
1949
|
+
ancestor_version_record = _unpack_row(row, [schema.TableVersion])[0]
|
|
1950
|
+
ancestor_version_md = schema.md_from_dict(schema.VersionMd, ancestor_version_record.md)
|
|
1951
|
+
assert ancestor_version_md.created_at <= created_at
|
|
1952
|
+
ancestors.append((UUID(ancestor_id), ancestor_version_md.version))
|
|
1953
|
+
|
|
1954
|
+
# Force any ancestors to be loaded (base table first).
|
|
1955
|
+
for anc_id, anc_version in ancestors[::-1]:
|
|
1956
|
+
key = TableVersionKey(anc_id, anc_version, None)
|
|
1957
|
+
if key not in self._tbl_versions:
|
|
1958
|
+
_ = self._load_tbl_version(key)
|
|
1959
|
+
|
|
1960
|
+
# Now reconstruct the relevant TableVersionPath instance from the ancestor versions.
|
|
1961
|
+
tvp: TableVersionPath | None = None
|
|
1962
|
+
for anc_id, anc_version in ancestors[::-1]:
|
|
1963
|
+
tvp = TableVersionPath(TableVersionHandle(TableVersionKey(anc_id, anc_version, None)), base=tvp)
|
|
1964
|
+
|
|
1965
|
+
return tvp
|
|
1966
|
+
|
|
1967
|
+
@retry_loop(for_write=False)
|
|
1968
|
+
def collect_tbl_history(self, tbl_id: UUID, n: int | None) -> list[TableVersionMd]:
|
|
1969
|
+
return self._collect_tbl_history(tbl_id, n)
|
|
1970
|
+
|
|
1971
|
+
def _collect_tbl_history(self, tbl_id: UUID, n: int | None) -> list[TableVersionMd]:
|
|
1972
|
+
"""
|
|
1973
|
+
Returns the history of up to n versions of the table with the given UUID.
|
|
1974
|
+
|
|
1975
|
+
Args:
|
|
1976
|
+
tbl_id: the UUID of the table to collect history for.
|
|
1977
|
+
n: Optional limit on the maximum number of versions returned.
|
|
1978
|
+
|
|
1979
|
+
Returns:
|
|
1980
|
+
A sequence of rows, ordered by version number
|
|
1981
|
+
Each row contains a TableVersion and a TableSchemaVersion object.
|
|
1982
|
+
"""
|
|
1983
|
+
q = (
|
|
1984
|
+
sql.select(schema.Table, schema.TableVersion, schema.TableSchemaVersion)
|
|
1985
|
+
.where(self._active_tbl_clause(tbl_id=tbl_id))
|
|
1986
|
+
.join(schema.TableVersion)
|
|
1987
|
+
.where(schema.TableVersion.tbl_id == tbl_id)
|
|
1988
|
+
.join(schema.TableSchemaVersion)
|
|
1989
|
+
.where(schema.TableSchemaVersion.tbl_id == tbl_id)
|
|
1990
|
+
.where(
|
|
1991
|
+
schema.TableVersion.md['schema_version'].cast(sql.Integer) == schema.TableSchemaVersion.schema_version
|
|
1992
|
+
)
|
|
1993
|
+
.order_by(schema.TableVersion.version.desc())
|
|
1994
|
+
)
|
|
1995
|
+
if n is not None:
|
|
1996
|
+
q = q.limit(n)
|
|
1997
|
+
src_rows = Env.get().session.execute(q).fetchall()
|
|
1998
|
+
return [
|
|
1999
|
+
TableVersionMd(
|
|
2000
|
+
tbl_md=schema.md_from_dict(schema.TableMd, row.Table.md),
|
|
2001
|
+
version_md=schema.md_from_dict(schema.VersionMd, row.TableVersion.md),
|
|
2002
|
+
schema_version_md=schema.md_from_dict(schema.SchemaVersionMd, row.TableSchemaVersion.md),
|
|
2003
|
+
)
|
|
2004
|
+
for row in src_rows
|
|
2005
|
+
]
|
|
2006
|
+
|
|
2007
|
+
def head_version_md(self, tbl_id: UUID) -> schema.VersionMd | None:
|
|
2008
|
+
"""
|
|
2009
|
+
Returns the TableVersionMd for the most recent non-fragment version of the given table.
|
|
2010
|
+
"""
|
|
2011
|
+
conn = Env.get().conn
|
|
2012
|
+
|
|
2013
|
+
q = (
|
|
2014
|
+
sql.select(schema.TableVersion.md)
|
|
2015
|
+
.where(schema.TableVersion.tbl_id == tbl_id)
|
|
2016
|
+
.where(schema.TableVersion.md['is_fragment'].astext == 'false')
|
|
2017
|
+
.order_by(schema.TableVersion.md['version'].cast(sql.Integer).desc())
|
|
2018
|
+
.limit(1)
|
|
2019
|
+
)
|
|
2020
|
+
row = conn.execute(q).one_or_none()
|
|
2021
|
+
if row is None:
|
|
2022
|
+
return None
|
|
2023
|
+
assert isinstance(row[0], dict)
|
|
2024
|
+
return schema.md_from_dict(schema.VersionMd, row[0])
|
|
2025
|
+
|
|
2026
|
+
def load_tbl_md(self, key: TableVersionKey) -> TableVersionMd:
|
|
2027
|
+
"""
|
|
2028
|
+
Loads metadata from the store for a given table UUID and version.
|
|
2029
|
+
"""
|
|
2030
|
+
anchor_timestamp: float | None = None
|
|
2031
|
+
if key.anchor_tbl_id is not None:
|
|
2032
|
+
anchored_version_md = self.head_version_md(key.anchor_tbl_id)
|
|
2033
|
+
# `anchor_tbl_id` must exist and have at least one non-fragment version, or else this isn't
|
|
2034
|
+
# a valid TableVersion specification.
|
|
2035
|
+
assert anchored_version_md is not None
|
|
2036
|
+
anchor_timestamp = anchored_version_md.created_at
|
|
2037
|
+
|
|
2038
|
+
# _logger.info(f'Loading metadata for table version: {tbl_id}:{effective_version}')
|
|
2039
|
+
conn = Env.get().conn
|
|
2040
|
+
|
|
2041
|
+
q = (
|
|
2042
|
+
sql.select(schema.Table, schema.TableVersion, schema.TableSchemaVersion)
|
|
2043
|
+
.select_from(schema.Table)
|
|
2044
|
+
.where(schema.Table.id == key.tbl_id)
|
|
2045
|
+
.join(schema.TableVersion)
|
|
2046
|
+
.where(schema.TableVersion.tbl_id == key.tbl_id)
|
|
2047
|
+
.join(schema.TableSchemaVersion)
|
|
2048
|
+
.where(schema.TableSchemaVersion.tbl_id == key.tbl_id)
|
|
2049
|
+
)
|
|
2050
|
+
|
|
2051
|
+
if key.effective_version is not None:
|
|
2052
|
+
# we are loading a specific version
|
|
2053
|
+
# SELECT *
|
|
2054
|
+
# FROM Table t
|
|
2055
|
+
# JOIN TableVersion tv ON (tv.tbl_id = tbl_id AND tv.version = effective_version)
|
|
2056
|
+
# JOIN TableSchemaVersion tsv ON (tsv.tbl_id = tbl_id AND tv.md.schema_version = tsv.schema_version)
|
|
2057
|
+
# WHERE t.id = tbl_id
|
|
2058
|
+
q = q.where(
|
|
2059
|
+
schema.TableVersion.md['version'].cast(sql.Integer) == key.effective_version,
|
|
2060
|
+
schema.TableVersion.md['schema_version'].cast(sql.Integer) == schema.TableSchemaVersion.schema_version,
|
|
2061
|
+
)
|
|
2062
|
+
elif anchor_timestamp is not None:
|
|
2063
|
+
# we are loading the version that is anchored to the head version of another table (see TableVersion
|
|
2064
|
+
# docstring for details)
|
|
2065
|
+
# SELECT *
|
|
2066
|
+
# FROM Table t
|
|
2067
|
+
# JOIN TableVersion tv ON (tv.tbl_id = tbl_id)
|
|
2068
|
+
# JOIN TableSchemaVersion tsv ON (tsv.tbl_id = tbl_id AND tv.md.schema_version = tsv.schema_version)
|
|
2069
|
+
# WHERE t.id = tbl_id AND tv.md.created_at <= anchor_timestamp
|
|
2070
|
+
# ORDER BY tv.md.created_at DESC
|
|
2071
|
+
# LIMIT 1
|
|
2072
|
+
q = (
|
|
2073
|
+
q.where(
|
|
2074
|
+
schema.TableVersion.md['created_at'].cast(sql.Float) <= anchor_timestamp,
|
|
2075
|
+
schema.TableVersion.md['schema_version'].cast(sql.Integer)
|
|
2076
|
+
== schema.TableSchemaVersion.schema_version,
|
|
2077
|
+
)
|
|
2078
|
+
.order_by(schema.TableVersion.md['created_at'].cast(sql.Float).desc())
|
|
2079
|
+
.limit(1)
|
|
2080
|
+
)
|
|
2081
|
+
else:
|
|
2082
|
+
# we are loading the current version
|
|
2083
|
+
# SELECT *
|
|
2084
|
+
# FROM Table t
|
|
2085
|
+
# JOIN TableVersion tv ON (tv.tbl_id = tbl_id AND t.current_version = tv.version)
|
|
2086
|
+
# JOIN TableSchemaVersion tsv ON (tsv.tbl_id = tbl_id AND t.current_schema_version = tsv.schema_version)
|
|
2087
|
+
# WHERE t.id = tbl_id
|
|
2088
|
+
q = q.where(
|
|
2089
|
+
schema.Table.md['current_version'].cast(sql.Integer) == schema.TableVersion.version,
|
|
2090
|
+
schema.Table.md['current_schema_version'].cast(sql.Integer) == schema.TableSchemaVersion.schema_version,
|
|
2091
|
+
)
|
|
2092
|
+
|
|
2093
|
+
row = conn.execute(q).one_or_none()
|
|
2094
|
+
if row is None:
|
|
2095
|
+
raise excs.Error(self._dropped_tbl_error_msg(key.tbl_id))
|
|
2096
|
+
tbl_record, version_record, schema_version_record = _unpack_row(
|
|
2097
|
+
row, [schema.Table, schema.TableVersion, schema.TableSchemaVersion]
|
|
2098
|
+
)
|
|
2099
|
+
assert tbl_record.id == key.tbl_id
|
|
2100
|
+
tbl_md = schema.md_from_dict(schema.TableMd, tbl_record.md)
|
|
2101
|
+
version_md = schema.md_from_dict(schema.VersionMd, version_record.md)
|
|
2102
|
+
schema_version_md = schema.md_from_dict(schema.SchemaVersionMd, schema_version_record.md)
|
|
2103
|
+
|
|
2104
|
+
return TableVersionMd(tbl_md, version_md, schema_version_md)
|
|
2105
|
+
|
|
2106
|
+
def write_tbl_md(
|
|
2107
|
+
self,
|
|
2108
|
+
tbl_id: UUID,
|
|
2109
|
+
dir_id: UUID | None,
|
|
2110
|
+
tbl_md: schema.TableMd | None,
|
|
2111
|
+
version_md: schema.VersionMd | None,
|
|
2112
|
+
schema_version_md: schema.SchemaVersionMd | None,
|
|
2113
|
+
pending_ops: list[TableOp] | None = None,
|
|
2114
|
+
remove_from_dir: bool = False,
|
|
2115
|
+
) -> None:
|
|
2116
|
+
"""
|
|
2117
|
+
Stores metadata to the DB and adds tbl_id to self._roll_forward_ids if pending_ops is specified.
|
|
2118
|
+
|
|
2119
|
+
Args:
|
|
2120
|
+
tbl_id: UUID of the table to store metadata for.
|
|
2121
|
+
dir_id: If specified, the tbl_md will be added to the given directory; if None, the table must already exist
|
|
2122
|
+
tbl_md: If specified, `tbl_md` will be inserted, or updated (only one such record can exist per UUID)
|
|
2123
|
+
version_md: inserted as a new record if present
|
|
2124
|
+
schema_version_md: will be inserted as a new record if present
|
|
2125
|
+
|
|
2126
|
+
If inserting `version_md` or `schema_version_md` would be a primary key violation, an exception will be raised.
|
|
2127
|
+
"""
|
|
2128
|
+
assert self._in_write_xact
|
|
2129
|
+
assert version_md is None or version_md.created_at > 0.0
|
|
2130
|
+
assert pending_ops is None or len(pending_ops) > 0
|
|
2131
|
+
assert pending_ops is None or tbl_md is not None # if we write pending ops, we must also write new tbl_md
|
|
2132
|
+
session = Env.get().session
|
|
2133
|
+
|
|
2134
|
+
# Construct and insert or update table record if requested.
|
|
2135
|
+
if tbl_md is not None:
|
|
2136
|
+
assert tbl_md.tbl_id == str(tbl_id)
|
|
2137
|
+
if version_md is not None:
|
|
2138
|
+
assert tbl_md.current_version == version_md.version
|
|
2139
|
+
assert tbl_md.current_schema_version == version_md.schema_version
|
|
2140
|
+
if schema_version_md is not None:
|
|
2141
|
+
assert tbl_md.current_schema_version == schema_version_md.schema_version
|
|
2142
|
+
if pending_ops is not None:
|
|
2143
|
+
assert tbl_md.pending_stmt is not None
|
|
2144
|
+
assert all(op.tbl_id == str(tbl_id) for op in pending_ops)
|
|
2145
|
+
assert all(op.op_sn == i for i, op in enumerate(pending_ops))
|
|
2146
|
+
assert all(op.num_ops == len(pending_ops) for op in pending_ops)
|
|
2147
|
+
tbl_md.tbl_state = schema.TableState.ROLLFORWARD
|
|
2148
|
+
self._roll_forward_ids.add(tbl_id)
|
|
2149
|
+
|
|
2150
|
+
if dir_id is not None:
|
|
2151
|
+
# We are inserting a record while creating a new table.
|
|
2152
|
+
tbl_record = schema.Table(
|
|
2153
|
+
id=tbl_id, dir_id=dir_id, md=dataclasses.asdict(tbl_md, dict_factory=md_dict_factory)
|
|
2154
|
+
)
|
|
2155
|
+
session.add(tbl_record)
|
|
2156
|
+
else:
|
|
2157
|
+
# Update the existing table record.
|
|
2158
|
+
values: dict[Any, Any] = {schema.Table.md: dataclasses.asdict(tbl_md, dict_factory=md_dict_factory)}
|
|
2159
|
+
if remove_from_dir:
|
|
2160
|
+
values.update({schema.Table.dir_id: None})
|
|
2161
|
+
result = session.execute(
|
|
2162
|
+
sql.update(schema.Table.__table__).values(values).where(schema.Table.id == tbl_id)
|
|
2163
|
+
)
|
|
2164
|
+
assert isinstance(result, sql.CursorResult)
|
|
2165
|
+
assert result.rowcount == 1, result.rowcount
|
|
2166
|
+
|
|
2167
|
+
# Construct and insert new table version record if requested.
|
|
2168
|
+
if version_md is not None:
|
|
2169
|
+
assert version_md.tbl_id == str(tbl_id)
|
|
2170
|
+
if schema_version_md is not None:
|
|
2171
|
+
assert version_md.schema_version == schema_version_md.schema_version
|
|
2172
|
+
version_rows = (
|
|
2173
|
+
session.query(schema.TableVersion)
|
|
2174
|
+
.filter(schema.TableVersion.tbl_id == tbl_id, schema.TableVersion.version == version_md.version)
|
|
2175
|
+
.all()
|
|
2176
|
+
)
|
|
2177
|
+
if len(version_rows) == 0:
|
|
2178
|
+
# It's a new table version; insert a new record in the DB for it.
|
|
2179
|
+
tbl_version_record = schema.TableVersion(
|
|
2180
|
+
tbl_id=tbl_id, version=version_md.version, md=dataclasses.asdict(version_md)
|
|
2181
|
+
)
|
|
2182
|
+
session.add(tbl_version_record)
|
|
2183
|
+
else:
|
|
2184
|
+
# This table version already exists; update it.
|
|
2185
|
+
assert len(version_rows) == 1 # must be unique
|
|
2186
|
+
version_record = version_rows[0]
|
|
2187
|
+
# Validate that the only fields that can change are 'is_fragment' and 'additional_md'.
|
|
2188
|
+
assert version_record.md == dataclasses.asdict(
|
|
2189
|
+
dataclasses.replace(
|
|
2190
|
+
version_md,
|
|
2191
|
+
is_fragment=version_record.md['is_fragment'],
|
|
2192
|
+
additional_md=version_record.md['additional_md'],
|
|
2193
|
+
)
|
|
2194
|
+
)
|
|
2195
|
+
result = session.execute(
|
|
2196
|
+
sql.update(schema.TableVersion.__table__)
|
|
2197
|
+
.values({schema.TableVersion.md: dataclasses.asdict(version_md)})
|
|
2198
|
+
.where(schema.TableVersion.tbl_id == tbl_id, schema.TableVersion.version == version_md.version)
|
|
2199
|
+
)
|
|
2200
|
+
assert isinstance(result, sql.CursorResult)
|
|
2201
|
+
assert result.rowcount == 1, result.rowcount
|
|
2202
|
+
|
|
2203
|
+
# Construct and insert a new schema version record if requested.
|
|
2204
|
+
if schema_version_md is not None:
|
|
2205
|
+
assert schema_version_md.tbl_id == str(tbl_id)
|
|
2206
|
+
schema_version_record = schema.TableSchemaVersion(
|
|
2207
|
+
tbl_id=tbl_id, schema_version=schema_version_md.schema_version, md=dataclasses.asdict(schema_version_md)
|
|
2208
|
+
)
|
|
2209
|
+
session.add(schema_version_record)
|
|
2210
|
+
|
|
2211
|
+
# make sure we don't have any pending ops
|
|
2212
|
+
assert session.query(schema.PendingTableOp).filter(schema.PendingTableOp.tbl_id == tbl_id).count() == 0
|
|
2213
|
+
|
|
2214
|
+
if pending_ops is not None:
|
|
2215
|
+
for op in pending_ops:
|
|
2216
|
+
op_record = schema.PendingTableOp(tbl_id=tbl_id, op_sn=op.op_sn, op=dataclasses.asdict(op))
|
|
2217
|
+
session.add(op_record)
|
|
2218
|
+
|
|
2219
|
+
session.flush() # Inform SQLAlchemy that we want to write these changes to the DB.
|
|
2220
|
+
|
|
2221
|
+
def store_update_status(self, tbl_id: UUID, version: int, status: UpdateStatus) -> None:
|
|
2222
|
+
"""Update the TableVersion.md.update_status field"""
|
|
2223
|
+
assert self._in_write_xact
|
|
2224
|
+
conn = Env.get().conn
|
|
2225
|
+
|
|
2226
|
+
stmt = (
|
|
2227
|
+
sql.update(schema.TableVersion)
|
|
2228
|
+
.where(schema.TableVersion.tbl_id == tbl_id, schema.TableVersion.version == version)
|
|
2229
|
+
.values(md=schema.TableVersion.md.op('||')({'update_status': dataclasses.asdict(status)}))
|
|
2230
|
+
)
|
|
2231
|
+
|
|
2232
|
+
res = conn.execute(stmt)
|
|
2233
|
+
assert res.rowcount == 1, res.rowcount
|
|
2234
|
+
|
|
2235
|
+
def delete_tbl_md(self, tbl_id: UUID) -> None:
|
|
2236
|
+
"""
|
|
2237
|
+
Deletes all table metadata from the store for the given table UUID.
|
|
2238
|
+
"""
|
|
2239
|
+
conn = Env.get().conn
|
|
2240
|
+
_logger.info(f'delete_tbl_md({tbl_id})')
|
|
2241
|
+
conn.execute(sql.delete(schema.TableSchemaVersion.__table__).where(schema.TableSchemaVersion.tbl_id == tbl_id))
|
|
2242
|
+
conn.execute(sql.delete(schema.TableVersion.__table__).where(schema.TableVersion.tbl_id == tbl_id))
|
|
2243
|
+
conn.execute(sql.delete(schema.PendingTableOp.__table__).where(schema.PendingTableOp.tbl_id == tbl_id))
|
|
2244
|
+
conn.execute(sql.delete(schema.Table.__table__).where(schema.Table.id == tbl_id))
|
|
2245
|
+
|
|
2246
|
+
def load_replica_md(self, tbl: Table) -> list[TableVersionMd]:
|
|
2247
|
+
"""
|
|
2248
|
+
Load metadata for the given table along with all its ancestors. The values of TableMd.current_version and
|
|
2249
|
+
TableMd.current_schema_version will be adjusted to ensure that the metadata represent a valid (internally
|
|
2250
|
+
consistent) table state.
|
|
2251
|
+
"""
|
|
2252
|
+
# TODO: First acquire X-locks for all relevant metadata entries
|
|
2253
|
+
# TODO: handle concurrent drop()
|
|
2254
|
+
|
|
2255
|
+
# Load metadata for every table in the TableVersionPath for `tbl`.
|
|
2256
|
+
md = [self.load_tbl_md(tv.key) for tv in tbl._tbl_version_path.get_tbl_versions()]
|
|
2257
|
+
|
|
2258
|
+
# If `tbl` is a named pure snapshot, we're not quite done, since the snapshot metadata won't appear in the
|
|
2259
|
+
# TableVersionPath. We need to prepend it separately.
|
|
2260
|
+
if isinstance(tbl, View) and tbl._is_named_pure_snapshot():
|
|
2261
|
+
snapshot_md = self.load_tbl_md(TableVersionKey(tbl._id, 0, None))
|
|
2262
|
+
md = [snapshot_md, *md]
|
|
2263
|
+
|
|
2264
|
+
for ancestor_md in md:
|
|
2265
|
+
# Set the `is_replica` flag on every ancestor's TableMd.
|
|
2266
|
+
ancestor_md.tbl_md.is_replica = True
|
|
2267
|
+
# For replica metadata, we guarantee that the current_version and current_schema_version of TableMd
|
|
2268
|
+
# match the corresponding values in TableVersionMd and TableSchemaVersionMd. This is to ensure that,
|
|
2269
|
+
# when the metadata is later stored in the catalog of a different Pixeltable instance, the values of
|
|
2270
|
+
# current_version and current_schema_version will always point to versions that are known to the
|
|
2271
|
+
# destination catalog.
|
|
2272
|
+
ancestor_md.tbl_md.current_version = ancestor_md.version_md.version
|
|
2273
|
+
ancestor_md.tbl_md.current_schema_version = ancestor_md.schema_version_md.schema_version
|
|
2274
|
+
|
|
2275
|
+
for ancestor_md in md[1:]:
|
|
2276
|
+
# Also, the table version of every proper ancestor is emphemeral; it does not represent a queryable
|
|
2277
|
+
# table version (the data might be incomplete, since we have only retrieved one of its views, not
|
|
2278
|
+
# the table itself).
|
|
2279
|
+
ancestor_md.version_md.is_fragment = True
|
|
2280
|
+
|
|
2281
|
+
return md
|
|
2282
|
+
|
|
2283
|
+
def _load_tbl_version(self, key: TableVersionKey, *, check_pending_ops: bool = True) -> TableVersion | None:
|
|
2284
|
+
"""Creates TableVersion instance from stored metadata and registers it in _tbl_versions."""
|
|
2285
|
+
tv_md = self.load_tbl_md(key)
|
|
2286
|
+
tbl_md = tv_md.tbl_md
|
|
2287
|
+
version_md = tv_md.version_md
|
|
2288
|
+
schema_version_md = tv_md.schema_version_md
|
|
2289
|
+
view_md = tbl_md.view_md
|
|
2290
|
+
|
|
2291
|
+
conn = Env.get().conn
|
|
2292
|
+
|
|
2293
|
+
if check_pending_ops:
|
|
2294
|
+
# if we care about pending ops, we also care whether the table is in the process of getting dropped
|
|
2295
|
+
if tbl_md.pending_stmt == schema.TableStatement.DROP_TABLE:
|
|
2296
|
+
raise excs.Error(self._dropped_tbl_error_msg(key.tbl_id))
|
|
2297
|
+
|
|
2298
|
+
pending_ops_q = (
|
|
2299
|
+
sql.select(sql.func.count())
|
|
2300
|
+
.select_from(schema.Table)
|
|
2301
|
+
.join(schema.PendingTableOp)
|
|
2302
|
+
.where(schema.PendingTableOp.tbl_id == key.tbl_id)
|
|
2303
|
+
.where(schema.Table.id == key.tbl_id)
|
|
2304
|
+
)
|
|
2305
|
+
if key.effective_version is not None:
|
|
2306
|
+
# we only care about pending ops if the requested version is the current version
|
|
2307
|
+
pending_ops_q = pending_ops_q.where(
|
|
2308
|
+
sql.text(f"({schema.Table.__table__}.md->>'current_version')::int = {key.effective_version}")
|
|
2309
|
+
)
|
|
2310
|
+
has_pending_ops = conn.execute(pending_ops_q).scalar() > 0
|
|
2311
|
+
if has_pending_ops:
|
|
2312
|
+
raise PendingTableOpsError(key.tbl_id)
|
|
2313
|
+
|
|
2314
|
+
# load mutable view ids for mutable TableVersions
|
|
2315
|
+
mutable_view_ids: list[UUID] = []
|
|
2316
|
+
if key.effective_version is None and key.anchor_tbl_id is None and not tbl_md.is_replica:
|
|
2317
|
+
q = (
|
|
2318
|
+
sql.select(schema.Table.id)
|
|
2319
|
+
.where(schema.Table.md['view_md']['base_versions'][0][0].astext == key.tbl_id.hex)
|
|
2320
|
+
.where(schema.Table.md['view_md']['base_versions'][0][1].astext == None)
|
|
2321
|
+
)
|
|
2322
|
+
mutable_view_ids = [r[0] for r in conn.execute(q).all()]
|
|
2323
|
+
|
|
2324
|
+
mutable_views = [TableVersionHandle(TableVersionKey(id, None, None)) for id in mutable_view_ids]
|
|
2325
|
+
|
|
2326
|
+
tbl_version: TableVersion
|
|
2327
|
+
if view_md is None:
|
|
2328
|
+
# this is a base table
|
|
2329
|
+
tbl_version = TableVersion(key, tbl_md, version_md, schema_version_md, mutable_views)
|
|
2330
|
+
else:
|
|
2331
|
+
assert len(view_md.base_versions) > 0 # a view needs to have a base
|
|
2332
|
+
assert (
|
|
2333
|
+
not tv_md.is_pure_snapshot
|
|
2334
|
+
) # a pure snapshot doesn't have a physical table backing it, no point in loading it
|
|
2335
|
+
|
|
2336
|
+
base: TableVersionHandle
|
|
2337
|
+
base_path: TableVersionPath | None = None # needed for live view
|
|
2338
|
+
if view_md.is_snapshot:
|
|
2339
|
+
base = TableVersionHandle(
|
|
2340
|
+
TableVersionKey(UUID(view_md.base_versions[0][0]), view_md.base_versions[0][1], key.anchor_tbl_id)
|
|
2341
|
+
)
|
|
2342
|
+
else:
|
|
2343
|
+
base_path = TableVersionPath.from_md(tbl_md.view_md.base_versions)
|
|
2344
|
+
base = base_path.tbl_version
|
|
2345
|
+
|
|
2346
|
+
tbl_version = TableVersion(
|
|
2347
|
+
key, tbl_md, version_md, schema_version_md, mutable_views, base_path=base_path, base=base
|
|
2348
|
+
)
|
|
2349
|
+
|
|
2350
|
+
# register the instance before init()
|
|
2351
|
+
self._tbl_versions[key] = tbl_version
|
|
2352
|
+
# register this instance as modified, so that it gets purged if the transaction fails, it may not be
|
|
2353
|
+
# fully initialized
|
|
2354
|
+
self.mark_modified_tvs(tbl_version.handle)
|
|
2355
|
+
tbl_version.init()
|
|
2356
|
+
return tbl_version
|
|
50
2357
|
|
|
51
2358
|
def _init_store(self) -> None:
|
|
52
2359
|
"""One-time initialization of the stored catalog. Idempotent."""
|
|
53
|
-
|
|
54
|
-
|
|
2360
|
+
self.create_user(None)
|
|
2361
|
+
_logger.info('Initialized catalog.')
|
|
2362
|
+
|
|
2363
|
+
def create_user(self, user: str | None) -> None:
|
|
2364
|
+
"""
|
|
2365
|
+
Creates a catalog record (root directory) for the specified user, if one does not already exist.
|
|
2366
|
+
"""
|
|
2367
|
+
with Env.get().begin_xact():
|
|
2368
|
+
session = Env.get().session
|
|
2369
|
+
# See if there are any directories in the catalog matching the specified user.
|
|
2370
|
+
if session.query(schema.Dir).where(schema.Dir.md['user'].astext == user).count() > 0:
|
|
2371
|
+
# At least one such directory exists; no need to create a new one.
|
|
55
2372
|
return
|
|
56
|
-
|
|
57
|
-
dir_md = schema.DirMd(name='')
|
|
2373
|
+
|
|
2374
|
+
dir_md = schema.DirMd(name='', user=user, additional_md={})
|
|
58
2375
|
dir_record = schema.Dir(parent_id=None, md=dataclasses.asdict(dir_md))
|
|
59
2376
|
session.add(dir_record)
|
|
60
2377
|
session.flush()
|
|
61
|
-
|
|
62
|
-
_logger.info(f'Initialized catalog')
|
|
63
|
-
|
|
64
|
-
def _load_snapshot_version(
|
|
65
|
-
self, tbl_id: UUID, version: int, base: Optional[TableVersion], session: orm.Session
|
|
66
|
-
) -> TableVersion:
|
|
67
|
-
q = session.query(schema.Table, schema.TableSchemaVersion) \
|
|
68
|
-
.select_from(schema.Table) \
|
|
69
|
-
.join(schema.TableVersion) \
|
|
70
|
-
.join(schema.TableSchemaVersion) \
|
|
71
|
-
.where(schema.Table.id == tbl_id) \
|
|
72
|
-
.where(sql.text(f"({schema.TableVersion.__table__}.md->>'version')::int = {version}")) \
|
|
73
|
-
.where(sql.text((
|
|
74
|
-
f"({schema.TableVersion.__table__}.md->>'schema_version')::int = "
|
|
75
|
-
f"{schema.TableSchemaVersion.__table__}.{schema.TableSchemaVersion.schema_version.name}")))
|
|
76
|
-
tbl_record, schema_version_record = q.one()
|
|
77
|
-
tbl_md = schema.md_from_dict(schema.TableMd, tbl_record.md)
|
|
78
|
-
schema_version_md = schema.md_from_dict(schema.TableSchemaVersionMd, schema_version_record.md)
|
|
79
|
-
# we ignore tbl_record.base_tbl_id/base_snapshot_id and use 'base' instead: if the base is a snapshot
|
|
80
|
-
# we'd have to look that up first
|
|
81
|
-
return TableVersion(tbl_record.id, tbl_md, version, schema_version_md, is_snapshot=True, base=base)
|
|
2378
|
+
_logger.info(f'Added root directory record for user: {user!r}')
|
|
82
2379
|
|
|
83
|
-
def
|
|
84
|
-
|
|
85
|
-
|
|
2380
|
+
def _handle_path_collision(
|
|
2381
|
+
self,
|
|
2382
|
+
path: Path,
|
|
2383
|
+
expected_obj_type: type[SchemaObject],
|
|
2384
|
+
expected_snapshot: bool,
|
|
2385
|
+
if_exists: IfExistsParam,
|
|
2386
|
+
*,
|
|
2387
|
+
base: TableVersionPath | None = None,
|
|
2388
|
+
) -> SchemaObject | None:
|
|
2389
|
+
obj, _, _ = self._prepare_dir_op(add_dir_path=path.parent, add_name=path.name)
|
|
86
2390
|
|
|
87
|
-
|
|
88
|
-
|
|
89
|
-
|
|
90
|
-
.
|
|
91
|
-
|
|
92
|
-
|
|
93
|
-
|
|
94
|
-
|
|
95
|
-
|
|
96
|
-
|
|
97
|
-
|
|
98
|
-
|
|
99
|
-
for tbl_record, schema_version_record in q.all():
|
|
100
|
-
tbl_md = schema.md_from_dict(schema.TableMd, tbl_record.md)
|
|
101
|
-
schema_version_md = schema.md_from_dict(schema.TableSchemaVersionMd, schema_version_record.md)
|
|
102
|
-
view_md = tbl_md.view_md
|
|
103
|
-
|
|
104
|
-
if view_md is not None:
|
|
105
|
-
assert len(view_md.base_versions) > 0
|
|
106
|
-
# construct a TableVersionPath for the view
|
|
107
|
-
refd_versions = [(UUID(tbl_id), version) for tbl_id, version in view_md.base_versions]
|
|
108
|
-
base_path: Optional[TableVersionPath] = None
|
|
109
|
-
base: Optional[TableVersion] = None
|
|
110
|
-
# go through the versions in reverse order, so we can construct TableVersionPaths
|
|
111
|
-
for base_id, version in refd_versions[::-1]:
|
|
112
|
-
base_version = self.tbl_versions.get((base_id, version), None)
|
|
113
|
-
if base_version is None:
|
|
114
|
-
if version is None:
|
|
115
|
-
# debugging
|
|
116
|
-
pass
|
|
117
|
-
# if this is a reference to a mutable table, we should have loaded it already
|
|
118
|
-
assert version is not None
|
|
119
|
-
base_version = self._load_snapshot_version(base_id, version, base, session)
|
|
120
|
-
base_path = TableVersionPath(base_version, base=base_path)
|
|
121
|
-
base = base_version
|
|
122
|
-
assert base_path is not None
|
|
123
|
-
|
|
124
|
-
base_tbl_id = base_path.tbl_id()
|
|
125
|
-
is_snapshot = view_md is not None and view_md.is_snapshot
|
|
126
|
-
snapshot_only = is_snapshot and view_md.predicate is None and len(schema_version_md.columns) == 0
|
|
127
|
-
if snapshot_only:
|
|
128
|
-
# this is a pure snapshot, without a physical table backing it
|
|
129
|
-
view_path = base_path
|
|
2391
|
+
if if_exists == IfExistsParam.ERROR and obj is not None:
|
|
2392
|
+
raise excs.Error(f'Path {path!r} is an existing {obj._display_name()}')
|
|
2393
|
+
else:
|
|
2394
|
+
is_snapshot = isinstance(obj, View) and obj._tbl_version_path.is_snapshot()
|
|
2395
|
+
if obj is not None and (not isinstance(obj, expected_obj_type) or (expected_snapshot and not is_snapshot)):
|
|
2396
|
+
if expected_obj_type is Dir:
|
|
2397
|
+
obj_type_str = 'directory'
|
|
2398
|
+
elif expected_obj_type is InsertableTable:
|
|
2399
|
+
obj_type_str = 'table'
|
|
2400
|
+
elif expected_obj_type is View:
|
|
2401
|
+
obj_type_str = 'snapshot' if expected_snapshot else 'view'
|
|
130
2402
|
else:
|
|
131
|
-
|
|
132
|
-
|
|
133
|
-
|
|
134
|
-
|
|
135
|
-
view_path = TableVersionPath(tbl_version, base=base_path)
|
|
2403
|
+
raise AssertionError()
|
|
2404
|
+
raise excs.Error(
|
|
2405
|
+
f'Path {path!r} already exists but is not a {obj_type_str}. Cannot {if_exists.name.lower()} it.'
|
|
2406
|
+
)
|
|
136
2407
|
|
|
137
|
-
|
|
138
|
-
|
|
139
|
-
|
|
140
|
-
|
|
2408
|
+
if obj is None:
|
|
2409
|
+
return None
|
|
2410
|
+
if if_exists == IfExistsParam.IGNORE:
|
|
2411
|
+
return obj
|
|
141
2412
|
|
|
142
|
-
|
|
143
|
-
|
|
144
|
-
|
|
145
|
-
|
|
146
|
-
|
|
147
|
-
|
|
148
|
-
|
|
149
|
-
|
|
150
|
-
|
|
151
|
-
|
|
152
|
-
|
|
153
|
-
|
|
154
|
-
|
|
155
|
-
|
|
156
|
-
|
|
157
|
-
|
|
158
|
-
|
|
159
|
-
|
|
160
|
-
|
|
2413
|
+
assert if_exists in (IfExistsParam.REPLACE, IfExistsParam.REPLACE_FORCE)
|
|
2414
|
+
|
|
2415
|
+
# Check for circularity
|
|
2416
|
+
if obj is not None and base is not None:
|
|
2417
|
+
assert isinstance(obj, Table) # or else it would have been caught above
|
|
2418
|
+
if obj._id in tuple(version.id for version in base.get_tbl_versions()):
|
|
2419
|
+
raise excs.Error(
|
|
2420
|
+
"Cannot use if_exists='replace' with the same name as one of the view's own ancestors."
|
|
2421
|
+
)
|
|
2422
|
+
|
|
2423
|
+
# drop the existing schema object
|
|
2424
|
+
if isinstance(obj, Dir):
|
|
2425
|
+
dir_contents = self._get_dir_contents(obj._id)
|
|
2426
|
+
if len(dir_contents) > 0 and if_exists == IfExistsParam.REPLACE:
|
|
2427
|
+
raise excs.Error(
|
|
2428
|
+
f'Directory {path!r} already exists and is not empty. '
|
|
2429
|
+
'Use `if_exists="replace_force"` to replace it.'
|
|
2430
|
+
)
|
|
2431
|
+
self._drop_dir(obj._id, path, force=True)
|
|
2432
|
+
else:
|
|
2433
|
+
assert isinstance(obj, Table)
|
|
2434
|
+
self._drop_tbl(obj, force=if_exists == IfExistsParam.REPLACE_FORCE, is_replace=True)
|
|
2435
|
+
return None
|