pixeltable 0.4.0rc3__py3-none-any.whl → 0.4.20__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of pixeltable might be problematic. Click here for more details.
- pixeltable/__init__.py +23 -5
- pixeltable/_version.py +1 -0
- pixeltable/catalog/__init__.py +5 -3
- pixeltable/catalog/catalog.py +1318 -404
- pixeltable/catalog/column.py +186 -115
- pixeltable/catalog/dir.py +1 -2
- pixeltable/catalog/globals.py +11 -43
- pixeltable/catalog/insertable_table.py +167 -79
- pixeltable/catalog/path.py +61 -23
- pixeltable/catalog/schema_object.py +9 -10
- pixeltable/catalog/table.py +626 -308
- pixeltable/catalog/table_metadata.py +101 -0
- pixeltable/catalog/table_version.py +713 -569
- pixeltable/catalog/table_version_handle.py +37 -6
- pixeltable/catalog/table_version_path.py +42 -29
- pixeltable/catalog/tbl_ops.py +50 -0
- pixeltable/catalog/update_status.py +191 -0
- pixeltable/catalog/view.py +108 -94
- pixeltable/config.py +128 -22
- pixeltable/dataframe.py +188 -100
- pixeltable/env.py +407 -136
- pixeltable/exceptions.py +6 -0
- pixeltable/exec/__init__.py +3 -0
- pixeltable/exec/aggregation_node.py +7 -8
- pixeltable/exec/cache_prefetch_node.py +83 -110
- pixeltable/exec/cell_materialization_node.py +231 -0
- pixeltable/exec/cell_reconstruction_node.py +135 -0
- pixeltable/exec/component_iteration_node.py +4 -3
- pixeltable/exec/data_row_batch.py +8 -65
- pixeltable/exec/exec_context.py +16 -4
- pixeltable/exec/exec_node.py +13 -36
- pixeltable/exec/expr_eval/evaluators.py +7 -6
- pixeltable/exec/expr_eval/expr_eval_node.py +27 -12
- pixeltable/exec/expr_eval/globals.py +8 -5
- pixeltable/exec/expr_eval/row_buffer.py +1 -2
- pixeltable/exec/expr_eval/schedulers.py +190 -30
- pixeltable/exec/globals.py +32 -0
- pixeltable/exec/in_memory_data_node.py +18 -18
- pixeltable/exec/object_store_save_node.py +293 -0
- pixeltable/exec/row_update_node.py +16 -9
- pixeltable/exec/sql_node.py +206 -101
- pixeltable/exprs/__init__.py +1 -1
- pixeltable/exprs/arithmetic_expr.py +27 -22
- pixeltable/exprs/array_slice.py +3 -3
- pixeltable/exprs/column_property_ref.py +34 -30
- pixeltable/exprs/column_ref.py +92 -96
- pixeltable/exprs/comparison.py +5 -5
- pixeltable/exprs/compound_predicate.py +5 -4
- pixeltable/exprs/data_row.py +152 -55
- pixeltable/exprs/expr.py +62 -43
- pixeltable/exprs/expr_dict.py +3 -3
- pixeltable/exprs/expr_set.py +17 -10
- pixeltable/exprs/function_call.py +75 -37
- pixeltable/exprs/globals.py +1 -2
- pixeltable/exprs/in_predicate.py +4 -4
- pixeltable/exprs/inline_expr.py +10 -27
- pixeltable/exprs/is_null.py +1 -3
- pixeltable/exprs/json_mapper.py +8 -8
- pixeltable/exprs/json_path.py +56 -22
- pixeltable/exprs/literal.py +5 -5
- pixeltable/exprs/method_ref.py +2 -2
- pixeltable/exprs/object_ref.py +2 -2
- pixeltable/exprs/row_builder.py +127 -53
- pixeltable/exprs/rowid_ref.py +8 -12
- pixeltable/exprs/similarity_expr.py +50 -25
- pixeltable/exprs/sql_element_cache.py +4 -4
- pixeltable/exprs/string_op.py +5 -5
- pixeltable/exprs/type_cast.py +3 -5
- pixeltable/func/__init__.py +1 -0
- pixeltable/func/aggregate_function.py +8 -8
- pixeltable/func/callable_function.py +9 -9
- pixeltable/func/expr_template_function.py +10 -10
- pixeltable/func/function.py +18 -20
- pixeltable/func/function_registry.py +6 -7
- pixeltable/func/globals.py +2 -3
- pixeltable/func/mcp.py +74 -0
- pixeltable/func/query_template_function.py +20 -18
- pixeltable/func/signature.py +43 -16
- pixeltable/func/tools.py +23 -13
- pixeltable/func/udf.py +18 -20
- pixeltable/functions/__init__.py +6 -0
- pixeltable/functions/anthropic.py +93 -33
- pixeltable/functions/audio.py +114 -10
- pixeltable/functions/bedrock.py +13 -6
- pixeltable/functions/date.py +1 -1
- pixeltable/functions/deepseek.py +20 -9
- pixeltable/functions/fireworks.py +2 -2
- pixeltable/functions/gemini.py +28 -11
- pixeltable/functions/globals.py +13 -13
- pixeltable/functions/groq.py +108 -0
- pixeltable/functions/huggingface.py +1046 -23
- pixeltable/functions/image.py +9 -18
- pixeltable/functions/llama_cpp.py +23 -8
- pixeltable/functions/math.py +3 -4
- pixeltable/functions/mistralai.py +4 -15
- pixeltable/functions/ollama.py +16 -9
- pixeltable/functions/openai.py +104 -82
- pixeltable/functions/openrouter.py +143 -0
- pixeltable/functions/replicate.py +2 -2
- pixeltable/functions/reve.py +250 -0
- pixeltable/functions/string.py +21 -28
- pixeltable/functions/timestamp.py +13 -14
- pixeltable/functions/together.py +4 -6
- pixeltable/functions/twelvelabs.py +92 -0
- pixeltable/functions/util.py +6 -1
- pixeltable/functions/video.py +1388 -106
- pixeltable/functions/vision.py +7 -7
- pixeltable/functions/whisper.py +15 -7
- pixeltable/functions/whisperx.py +179 -0
- pixeltable/{ext/functions → functions}/yolox.py +2 -4
- pixeltable/globals.py +332 -105
- pixeltable/index/base.py +13 -22
- pixeltable/index/btree.py +23 -22
- pixeltable/index/embedding_index.py +32 -44
- pixeltable/io/__init__.py +4 -2
- pixeltable/io/datarows.py +7 -6
- pixeltable/io/external_store.py +49 -77
- pixeltable/io/fiftyone.py +11 -11
- pixeltable/io/globals.py +29 -28
- pixeltable/io/hf_datasets.py +17 -9
- pixeltable/io/label_studio.py +70 -66
- pixeltable/io/lancedb.py +3 -0
- pixeltable/io/pandas.py +12 -11
- pixeltable/io/parquet.py +13 -93
- pixeltable/io/table_data_conduit.py +71 -47
- pixeltable/io/utils.py +3 -3
- pixeltable/iterators/__init__.py +2 -1
- pixeltable/iterators/audio.py +21 -11
- pixeltable/iterators/document.py +116 -55
- pixeltable/iterators/image.py +5 -2
- pixeltable/iterators/video.py +293 -13
- pixeltable/metadata/__init__.py +4 -2
- pixeltable/metadata/converters/convert_18.py +2 -2
- pixeltable/metadata/converters/convert_19.py +2 -2
- pixeltable/metadata/converters/convert_20.py +2 -2
- pixeltable/metadata/converters/convert_21.py +2 -2
- pixeltable/metadata/converters/convert_22.py +2 -2
- pixeltable/metadata/converters/convert_24.py +2 -2
- pixeltable/metadata/converters/convert_25.py +2 -2
- pixeltable/metadata/converters/convert_26.py +2 -2
- pixeltable/metadata/converters/convert_29.py +4 -4
- pixeltable/metadata/converters/convert_34.py +2 -2
- pixeltable/metadata/converters/convert_36.py +2 -2
- pixeltable/metadata/converters/convert_37.py +15 -0
- pixeltable/metadata/converters/convert_38.py +39 -0
- pixeltable/metadata/converters/convert_39.py +124 -0
- pixeltable/metadata/converters/convert_40.py +73 -0
- pixeltable/metadata/converters/util.py +13 -12
- pixeltable/metadata/notes.py +4 -0
- pixeltable/metadata/schema.py +79 -42
- pixeltable/metadata/utils.py +74 -0
- pixeltable/mypy/__init__.py +3 -0
- pixeltable/mypy/mypy_plugin.py +123 -0
- pixeltable/plan.py +274 -223
- pixeltable/share/__init__.py +1 -1
- pixeltable/share/packager.py +259 -129
- pixeltable/share/protocol/__init__.py +34 -0
- pixeltable/share/protocol/common.py +170 -0
- pixeltable/share/protocol/operation_types.py +33 -0
- pixeltable/share/protocol/replica.py +109 -0
- pixeltable/share/publish.py +213 -57
- pixeltable/store.py +238 -175
- pixeltable/type_system.py +104 -63
- pixeltable/utils/__init__.py +2 -3
- pixeltable/utils/arrow.py +108 -13
- pixeltable/utils/av.py +298 -0
- pixeltable/utils/azure_store.py +305 -0
- pixeltable/utils/code.py +3 -3
- pixeltable/utils/console_output.py +4 -1
- pixeltable/utils/coroutine.py +6 -23
- pixeltable/utils/dbms.py +31 -5
- pixeltable/utils/description_helper.py +4 -5
- pixeltable/utils/documents.py +5 -6
- pixeltable/utils/exception_handler.py +7 -30
- pixeltable/utils/filecache.py +6 -6
- pixeltable/utils/formatter.py +4 -6
- pixeltable/utils/gcs_store.py +283 -0
- pixeltable/utils/http_server.py +2 -3
- pixeltable/utils/iceberg.py +1 -2
- pixeltable/utils/image.py +17 -0
- pixeltable/utils/lancedb.py +88 -0
- pixeltable/utils/local_store.py +316 -0
- pixeltable/utils/misc.py +5 -0
- pixeltable/utils/object_stores.py +528 -0
- pixeltable/utils/pydantic.py +60 -0
- pixeltable/utils/pytorch.py +5 -6
- pixeltable/utils/s3_store.py +392 -0
- pixeltable-0.4.20.dist-info/METADATA +587 -0
- pixeltable-0.4.20.dist-info/RECORD +218 -0
- {pixeltable-0.4.0rc3.dist-info → pixeltable-0.4.20.dist-info}/WHEEL +1 -1
- pixeltable-0.4.20.dist-info/entry_points.txt +2 -0
- pixeltable/__version__.py +0 -3
- pixeltable/ext/__init__.py +0 -17
- pixeltable/ext/functions/__init__.py +0 -11
- pixeltable/ext/functions/whisperx.py +0 -77
- pixeltable/utils/media_store.py +0 -77
- pixeltable/utils/s3.py +0 -17
- pixeltable/utils/sample.py +0 -25
- pixeltable-0.4.0rc3.dist-info/METADATA +0 -435
- pixeltable-0.4.0rc3.dist-info/RECORD +0 -189
- pixeltable-0.4.0rc3.dist-info/entry_points.txt +0 -3
- {pixeltable-0.4.0rc3.dist-info → pixeltable-0.4.20.dist-info/licenses}/LICENSE +0 -0
pixeltable/catalog/catalog.py
CHANGED
|
@@ -5,41 +5,45 @@ import functools
|
|
|
5
5
|
import logging
|
|
6
6
|
import random
|
|
7
7
|
import time
|
|
8
|
+
from collections import defaultdict
|
|
8
9
|
from contextlib import contextmanager
|
|
9
|
-
from typing import TYPE_CHECKING, Any, Callable, Iterator,
|
|
10
|
+
from typing import TYPE_CHECKING, Any, Callable, Iterator, TypeVar
|
|
10
11
|
from uuid import UUID
|
|
11
12
|
|
|
12
13
|
import psycopg
|
|
13
14
|
import sqlalchemy as sql
|
|
15
|
+
import sqlalchemy.exc as sql_exc
|
|
14
16
|
|
|
15
17
|
from pixeltable import exceptions as excs
|
|
16
18
|
from pixeltable.env import Env
|
|
17
19
|
from pixeltable.iterators import ComponentIterator
|
|
18
20
|
from pixeltable.metadata import schema
|
|
21
|
+
from pixeltable.utils.exception_handler import run_cleanup
|
|
19
22
|
|
|
20
|
-
|
|
21
|
-
from pixeltable.plan import SampleClause
|
|
23
|
+
from .column import Column
|
|
22
24
|
from .dir import Dir
|
|
23
|
-
from .globals import IfExistsParam, IfNotExistsParam, MediaValidation
|
|
25
|
+
from .globals import IfExistsParam, IfNotExistsParam, MediaValidation, QColumnId
|
|
24
26
|
from .insertable_table import InsertableTable
|
|
25
27
|
from .path import Path
|
|
26
28
|
from .schema_object import SchemaObject
|
|
27
29
|
from .table import Table
|
|
28
|
-
from .table_version import TableVersion
|
|
30
|
+
from .table_version import TableVersion, TableVersionCompleteMd
|
|
29
31
|
from .table_version_handle import TableVersionHandle
|
|
30
32
|
from .table_version_path import TableVersionPath
|
|
33
|
+
from .tbl_ops import TableOp
|
|
34
|
+
from .update_status import UpdateStatus
|
|
31
35
|
from .view import View
|
|
32
36
|
|
|
33
37
|
if TYPE_CHECKING:
|
|
34
|
-
from
|
|
38
|
+
from pixeltable.plan import SampleClause
|
|
39
|
+
|
|
40
|
+
from .. import exprs
|
|
35
41
|
|
|
36
42
|
|
|
37
43
|
_logger = logging.getLogger('pixeltable')
|
|
38
44
|
|
|
39
45
|
|
|
40
|
-
def _unpack_row(
|
|
41
|
-
row: Optional[sql.engine.Row], entities: list[type[sql.orm.decl_api.DeclarativeBase]]
|
|
42
|
-
) -> Optional[list[Any]]:
|
|
46
|
+
def _unpack_row(row: sql.engine.Row | None, entities: list[type[sql.orm.decl_api.DeclarativeBase]]) -> list[Any] | None:
|
|
43
47
|
"""Convert a Row result into a list of entity instances.
|
|
44
48
|
|
|
45
49
|
Assumes that the query contains a select() of exactly those entities.
|
|
@@ -60,46 +64,92 @@ def _unpack_row(
|
|
|
60
64
|
return result
|
|
61
65
|
|
|
62
66
|
|
|
67
|
+
# -1: unlimited
|
|
63
68
|
# for now, we don't limit the number of retries, because we haven't seen situations where the actual number of retries
|
|
64
69
|
# grows uncontrollably
|
|
65
|
-
_MAX_RETRIES =
|
|
70
|
+
_MAX_RETRIES = -1
|
|
66
71
|
|
|
67
72
|
T = TypeVar('T')
|
|
68
73
|
|
|
69
74
|
|
|
70
|
-
def
|
|
75
|
+
def retry_loop(
|
|
76
|
+
*, tbl: TableVersionPath | None = None, for_write: bool, lock_mutable_tree: bool = False
|
|
77
|
+
) -> Callable[[Callable[..., T]], Callable[..., T]]:
|
|
71
78
|
def decorator(op: Callable[..., T]) -> Callable[..., T]:
|
|
72
79
|
@functools.wraps(op)
|
|
73
80
|
def loop(*args: Any, **kwargs: Any) -> T:
|
|
74
|
-
|
|
81
|
+
cat = Catalog.get()
|
|
82
|
+
# retry_loop() is reentrant
|
|
83
|
+
if cat._in_retry_loop:
|
|
84
|
+
return op(*args, **kwargs)
|
|
85
|
+
|
|
86
|
+
num_retries = 0
|
|
75
87
|
while True:
|
|
88
|
+
cat._in_retry_loop = True
|
|
76
89
|
try:
|
|
77
90
|
# in order for retry to work, we need to make sure that there aren't any prior db updates
|
|
78
91
|
# that are part of an ongoing transaction
|
|
79
92
|
assert not Env.get().in_xact
|
|
80
|
-
with Catalog.get().begin_xact(
|
|
93
|
+
with Catalog.get().begin_xact(
|
|
94
|
+
tbl=tbl,
|
|
95
|
+
for_write=for_write,
|
|
96
|
+
convert_db_excs=False,
|
|
97
|
+
lock_mutable_tree=lock_mutable_tree,
|
|
98
|
+
finalize_pending_ops=True,
|
|
99
|
+
):
|
|
81
100
|
return op(*args, **kwargs)
|
|
82
|
-
except
|
|
101
|
+
except PendingTableOpsError as e:
|
|
102
|
+
Env.get().console_logger.debug(f'retry_loop(): finalizing pending ops for {e.tbl_id}')
|
|
103
|
+
Catalog.get()._finalize_pending_ops(e.tbl_id)
|
|
104
|
+
except (sql_exc.DBAPIError, sql_exc.OperationalError) as e:
|
|
83
105
|
# TODO: what other exceptions should we be looking for?
|
|
84
|
-
if isinstance(
|
|
85
|
-
|
|
86
|
-
|
|
87
|
-
|
|
106
|
+
if isinstance(
|
|
107
|
+
# TODO: Investigate whether DeadlockDetected points to a bug in our locking protocol,
|
|
108
|
+
# which is supposed to be deadlock-free.
|
|
109
|
+
e.orig,
|
|
110
|
+
(
|
|
111
|
+
psycopg.errors.SerializationFailure,
|
|
112
|
+
psycopg.errors.LockNotAvailable,
|
|
113
|
+
psycopg.errors.DeadlockDetected,
|
|
114
|
+
),
|
|
115
|
+
):
|
|
116
|
+
if num_retries < _MAX_RETRIES or _MAX_RETRIES == -1:
|
|
117
|
+
num_retries += 1
|
|
118
|
+
_logger.debug(f'Retrying ({num_retries}) after {type(e.orig)}')
|
|
88
119
|
time.sleep(random.uniform(0.1, 0.5))
|
|
89
120
|
else:
|
|
90
121
|
raise excs.Error(f'Serialization retry limit ({_MAX_RETRIES}) exceeded') from e
|
|
91
122
|
else:
|
|
92
123
|
raise
|
|
124
|
+
except Exception as e:
|
|
125
|
+
# for informational/debugging purposes
|
|
126
|
+
_logger.debug(f'retry_loop(): passing along {e}')
|
|
127
|
+
raise
|
|
128
|
+
finally:
|
|
129
|
+
cat._in_retry_loop = False
|
|
93
130
|
|
|
94
131
|
return loop
|
|
95
132
|
|
|
96
133
|
return decorator
|
|
97
134
|
|
|
98
135
|
|
|
136
|
+
class PendingTableOpsError(Exception):
|
|
137
|
+
tbl_id: UUID
|
|
138
|
+
|
|
139
|
+
def __init__(self, tbl_id: UUID) -> None:
|
|
140
|
+
self.tbl_id = tbl_id
|
|
141
|
+
|
|
142
|
+
|
|
99
143
|
class Catalog:
|
|
100
144
|
"""The functional interface to getting access to catalog objects
|
|
101
145
|
|
|
102
|
-
All interface functions must be called in the context of a transaction, started with Catalog.begin_xact()
|
|
146
|
+
All interface functions must be called in the context of a transaction, started with Catalog.begin_xact() or
|
|
147
|
+
via retry_loop().
|
|
148
|
+
|
|
149
|
+
When calling functions that involve Table or TableVersion instances, the catalog needs to get a chance to finalize
|
|
150
|
+
pending ops against those tables. To that end,
|
|
151
|
+
- use begin_xact(tbl) or begin_xact(tbl_id) if only accessing a single table
|
|
152
|
+
- use retry_loop() when accessing multiple tables (eg, pxt.ls())
|
|
103
153
|
|
|
104
154
|
Caching and invalidation of metadata:
|
|
105
155
|
- Catalog caches TableVersion instances in order to avoid excessive metadata loading
|
|
@@ -109,22 +159,34 @@ class Catalog:
|
|
|
109
159
|
duplicate references to that table in the From clause (ie, incorrect Cartesian products)
|
|
110
160
|
- in order to allow multiple concurrent Python processes to perform updates (data and/or schema) against a shared
|
|
111
161
|
Pixeltable instance, Catalog needs to reload metadata from the store when there are changes
|
|
112
|
-
- concurrent changes are detected by comparing TableVersion.version with the stored current version
|
|
113
|
-
(TableMd.current_version)
|
|
162
|
+
- concurrent changes are detected by comparing TableVersion.version/view_sn with the stored current version
|
|
163
|
+
(TableMd.current_version/view_sn)
|
|
114
164
|
- cached live TableVersion instances (those with effective_version == None) are validated against the stored
|
|
115
165
|
metadata on transaction boundaries; this is recorded in TableVersion.is_validated
|
|
116
166
|
- metadata validation is only needed for live TableVersion instances (snapshot instances are immutable)
|
|
117
167
|
"""
|
|
118
168
|
|
|
119
|
-
_instance:
|
|
169
|
+
_instance: Catalog | None = None
|
|
120
170
|
|
|
121
|
-
# key: [id, version]
|
|
171
|
+
# cached TableVersion instances; key: [id, version]
|
|
122
172
|
# - mutable version of a table: version == None (even though TableVersion.version is set correctly)
|
|
123
173
|
# - snapshot versions: records the version of the snapshot
|
|
124
|
-
_tbl_versions: dict[tuple[UUID,
|
|
125
|
-
_tbls: dict[UUID, Table]
|
|
174
|
+
_tbl_versions: dict[tuple[UUID, int | None], TableVersion]
|
|
175
|
+
_tbls: dict[tuple[UUID, int | None], Table]
|
|
126
176
|
_in_write_xact: bool # True if we're in a write transaction
|
|
127
|
-
|
|
177
|
+
_x_locked_tbl_ids: set[UUID] # non-empty for write transactions
|
|
178
|
+
_modified_tvs: set[TableVersionHandle] # TableVersion instances modified in the current transaction
|
|
179
|
+
_undo_actions: list[Callable[[], None]]
|
|
180
|
+
_in_retry_loop: bool
|
|
181
|
+
|
|
182
|
+
# cached column dependencies
|
|
183
|
+
# - key: table id, value: mapping from column id to its dependencies
|
|
184
|
+
# - only maintained for dependencies between non-snapshot table versions
|
|
185
|
+
# - can contain stale entries (stemming from invalidated TV instances)
|
|
186
|
+
_column_dependencies: dict[UUID, dict[QColumnId, set[QColumnId]]]
|
|
187
|
+
|
|
188
|
+
# column dependents are recomputed at the beginning of every write transaction and only reflect the locked tree
|
|
189
|
+
_column_dependents: dict[QColumnId, set[QColumnId]] | None
|
|
128
190
|
|
|
129
191
|
@classmethod
|
|
130
192
|
def get(cls) -> Catalog:
|
|
@@ -135,21 +197,27 @@ class Catalog:
|
|
|
135
197
|
@classmethod
|
|
136
198
|
def clear(cls) -> None:
|
|
137
199
|
"""Remove the instance. Used for testing."""
|
|
138
|
-
|
|
139
|
-
|
|
140
|
-
|
|
141
|
-
|
|
142
|
-
# )
|
|
143
|
-
tbl_version.is_validated = False
|
|
200
|
+
if cls._instance is not None:
|
|
201
|
+
# invalidate all existing instances to force reloading of metadata
|
|
202
|
+
for tbl_version in cls._instance._tbl_versions.values():
|
|
203
|
+
tbl_version.is_validated = False
|
|
144
204
|
cls._instance = None
|
|
145
205
|
|
|
146
206
|
def __init__(self) -> None:
|
|
147
207
|
self._tbl_versions = {}
|
|
148
208
|
self._tbls = {} # don't use a defaultdict here, it doesn't cooperate with the debugger
|
|
149
209
|
self._in_write_xact = False
|
|
150
|
-
self.
|
|
210
|
+
self._x_locked_tbl_ids = set()
|
|
211
|
+
self._modified_tvs = set()
|
|
212
|
+
self._undo_actions = []
|
|
213
|
+
self._in_retry_loop = False
|
|
214
|
+
self._column_dependencies = {}
|
|
215
|
+
self._column_dependents = None
|
|
151
216
|
self._init_store()
|
|
152
217
|
|
|
218
|
+
def _dropped_tbl_error_msg(self, tbl_id: UUID) -> str:
|
|
219
|
+
return f'Table was dropped (no record found for {tbl_id})'
|
|
220
|
+
|
|
153
221
|
def validate(self) -> None:
|
|
154
222
|
"""Validate structural consistency of cached metadata"""
|
|
155
223
|
for (tbl_id, effective_version), tbl_version in self._tbl_versions.items():
|
|
@@ -164,38 +232,73 @@ class Catalog:
|
|
|
164
232
|
f'snapshot_id={tbl_version.id} mutable_views={tbl_version.mutable_views}'
|
|
165
233
|
)
|
|
166
234
|
|
|
167
|
-
if tbl_version.is_view and tbl_version.is_mutable:
|
|
235
|
+
if tbl_version.is_view and tbl_version.is_mutable and tbl_version.is_validated:
|
|
168
236
|
# make sure this mutable view is recorded in a mutable base
|
|
169
237
|
base = tbl_version.base
|
|
170
238
|
assert base is not None
|
|
171
239
|
if base.effective_version is None:
|
|
172
240
|
assert (base.id, None) in self._tbl_versions
|
|
173
|
-
|
|
241
|
+
base_tv = self._tbl_versions[base.id, None]
|
|
242
|
+
if not base_tv.is_validated:
|
|
243
|
+
continue
|
|
244
|
+
mutable_view_ids = ', '.join(str(tv.id) for tv in self._tbl_versions[base.id, None].mutable_views)
|
|
245
|
+
mutable_view_names = ', '.join(
|
|
246
|
+
tv._tbl_version.name
|
|
247
|
+
for tv in self._tbl_versions[base.id, None].mutable_views
|
|
248
|
+
if tv._tbl_version is not None
|
|
249
|
+
)
|
|
250
|
+
assert TableVersionHandle.create(tbl_version) in self._tbl_versions[base.id, None].mutable_views, (
|
|
251
|
+
f'{tbl_version.name} ({tbl_version.id}) missing in {mutable_view_ids} ({mutable_view_names})'
|
|
252
|
+
)
|
|
174
253
|
|
|
175
254
|
if len(tbl_version.mutable_views) > 0:
|
|
176
255
|
# make sure we also loaded mutable view metadata, which is needed to detect column dependencies
|
|
177
256
|
for v in tbl_version.mutable_views:
|
|
178
257
|
assert v.effective_version is None, f'{v.id}:{v.effective_version}'
|
|
179
258
|
|
|
259
|
+
def mark_modified_tvs(self, *handle: TableVersionHandle) -> None:
|
|
260
|
+
"""Record that the given TableVersion instances were modified in the current transaction"""
|
|
261
|
+
assert Env.get().in_xact
|
|
262
|
+
self._modified_tvs.update(handle)
|
|
263
|
+
|
|
180
264
|
@contextmanager
|
|
181
|
-
def begin_xact(
|
|
265
|
+
def begin_xact(
|
|
266
|
+
self,
|
|
267
|
+
*,
|
|
268
|
+
tbl: TableVersionPath | None = None,
|
|
269
|
+
tbl_id: UUID | None = None,
|
|
270
|
+
for_write: bool = False,
|
|
271
|
+
lock_mutable_tree: bool = False,
|
|
272
|
+
convert_db_excs: bool = True,
|
|
273
|
+
finalize_pending_ops: bool = True,
|
|
274
|
+
) -> Iterator[sql.Connection]:
|
|
182
275
|
"""
|
|
183
276
|
Return a context manager that yields a connection to the database. Idempotent.
|
|
184
277
|
|
|
185
278
|
It is mandatory to call this method, not Env.begin_xact(), if the transaction accesses any table data
|
|
186
279
|
or metadata.
|
|
187
280
|
|
|
188
|
-
|
|
189
|
-
-
|
|
281
|
+
If tbl != None, follows this locking protocol:
|
|
282
|
+
- validates/reloads the TableVersion instances of tbl's ancestors (in the hope that this reduces potential
|
|
283
|
+
SerializationErrors later on)
|
|
284
|
+
- if for_write == True, x-locks Table record (by updating Table.lock_dummy; see _acquire_tbl_lock())
|
|
285
|
+
- if for_write == False, validates TableVersion instance
|
|
286
|
+
- if lock_mutable_tree == True, also x-locks all mutable views of the table
|
|
190
287
|
- this needs to be done in a retry loop, because Postgres can decide to abort the transaction
|
|
191
288
|
(SerializationFailure, LockNotAvailable)
|
|
192
289
|
- for that reason, we do all lock acquisition prior to doing any real work (eg, compute column values),
|
|
193
|
-
to minimize
|
|
290
|
+
to minimize the probability of losing that work due to a forced abort
|
|
291
|
+
|
|
292
|
+
If convert_db_excs == True, converts DBAPIErrors into excs.Errors.
|
|
194
293
|
"""
|
|
294
|
+
assert tbl is None or tbl_id is None # at most one can be specified
|
|
195
295
|
if Env.get().in_xact:
|
|
196
|
-
|
|
197
|
-
|
|
198
|
-
|
|
296
|
+
# make sure that we requested the required table lock at the beginning of the transaction
|
|
297
|
+
if for_write:
|
|
298
|
+
if tbl is not None:
|
|
299
|
+
assert tbl.tbl_id in self._x_locked_tbl_ids, f'{tbl.tbl_id} not in {self._x_locked_tbl_ids}'
|
|
300
|
+
elif tbl_id is not None:
|
|
301
|
+
assert tbl_id in self._x_locked_tbl_ids, f'{tbl_id} not in {self._x_locked_tbl_ids}'
|
|
199
302
|
yield Env.get().conn
|
|
200
303
|
return
|
|
201
304
|
|
|
@@ -207,32 +310,105 @@ class Catalog:
|
|
|
207
310
|
# )
|
|
208
311
|
# _logger.debug(f'begin_xact(): {tv_msg}')
|
|
209
312
|
num_retries = 0
|
|
313
|
+
pending_ops_tbl_id: UUID | None = None
|
|
314
|
+
has_exc = False # True if we exited the 'with ...begin_xact()' block with an exception
|
|
210
315
|
while True:
|
|
211
|
-
|
|
212
|
-
|
|
213
|
-
|
|
214
|
-
|
|
215
|
-
conn.execute(
|
|
216
|
-
sql.select(schema.Table).where(schema.Table.id == tbl_id).with_for_update(nowait=True)
|
|
217
|
-
)
|
|
218
|
-
conn.execute(sql.update(schema.Table).values(lock_dummy=1).where(schema.Table.id == tbl_id))
|
|
219
|
-
self._x_locked_tbl_id = tbl_id
|
|
316
|
+
if pending_ops_tbl_id is not None:
|
|
317
|
+
Env.get().console_logger.debug(f'begin_xact(): finalizing pending ops for {pending_ops_tbl_id}')
|
|
318
|
+
self._finalize_pending_ops(pending_ops_tbl_id)
|
|
319
|
+
pending_ops_tbl_id = None
|
|
220
320
|
|
|
221
|
-
|
|
321
|
+
try:
|
|
322
|
+
self._in_write_xact = for_write
|
|
323
|
+
self._x_locked_tbl_ids = set()
|
|
324
|
+
self._modified_tvs = set()
|
|
325
|
+
self._column_dependents = None
|
|
326
|
+
has_exc = False
|
|
327
|
+
|
|
328
|
+
assert not self._undo_actions
|
|
329
|
+
with Env.get().begin_xact(for_write=for_write) as conn:
|
|
330
|
+
if tbl is not None or tbl_id is not None:
|
|
331
|
+
try:
|
|
332
|
+
target: TableVersionHandle | None = None
|
|
333
|
+
if tbl is not None:
|
|
334
|
+
if self._acquire_path_locks(
|
|
335
|
+
tbl=tbl,
|
|
336
|
+
for_write=for_write,
|
|
337
|
+
lock_mutable_tree=lock_mutable_tree,
|
|
338
|
+
check_pending_ops=finalize_pending_ops,
|
|
339
|
+
):
|
|
340
|
+
target = tbl.tbl_version
|
|
341
|
+
else:
|
|
342
|
+
target = self._acquire_tbl_lock(
|
|
343
|
+
tbl_id=tbl_id,
|
|
344
|
+
for_write=for_write,
|
|
345
|
+
lock_mutable_tree=lock_mutable_tree,
|
|
346
|
+
raise_if_not_exists=True,
|
|
347
|
+
check_pending_ops=finalize_pending_ops,
|
|
348
|
+
)
|
|
349
|
+
|
|
350
|
+
if target is None:
|
|
351
|
+
# didn't get the write lock
|
|
352
|
+
for_write = False
|
|
353
|
+
elif for_write:
|
|
354
|
+
# we know at this point that target is mutable because we got the X-lock
|
|
355
|
+
if lock_mutable_tree and not target.is_snapshot:
|
|
356
|
+
self._x_locked_tbl_ids = self._get_mutable_tree(target.id)
|
|
357
|
+
self._compute_column_dependents(self._x_locked_tbl_ids)
|
|
358
|
+
else:
|
|
359
|
+
self._x_locked_tbl_ids = {target.id}
|
|
360
|
+
if _logger.isEnabledFor(logging.DEBUG):
|
|
361
|
+
# validate only when we don't see errors
|
|
362
|
+
self.validate()
|
|
363
|
+
|
|
364
|
+
except PendingTableOpsError as e:
|
|
365
|
+
has_exc = True
|
|
366
|
+
if finalize_pending_ops:
|
|
367
|
+
# we remember which table id to finalize
|
|
368
|
+
pending_ops_tbl_id = e.tbl_id
|
|
369
|
+
# raise to abort the transaction
|
|
370
|
+
raise
|
|
371
|
+
|
|
372
|
+
except (sql_exc.DBAPIError, sql_exc.OperationalError) as e:
|
|
373
|
+
has_exc = True
|
|
374
|
+
if isinstance(
|
|
375
|
+
e.orig, (psycopg.errors.SerializationFailure, psycopg.errors.LockNotAvailable)
|
|
376
|
+
) and (num_retries < _MAX_RETRIES or _MAX_RETRIES == -1):
|
|
377
|
+
num_retries += 1
|
|
378
|
+
_logger.debug(f'Retrying ({num_retries}) after {type(e.orig)}')
|
|
379
|
+
time.sleep(random.uniform(0.1, 0.5))
|
|
380
|
+
assert not self._undo_actions # We should not have any undo actions at this point
|
|
381
|
+
continue
|
|
382
|
+
else:
|
|
383
|
+
raise
|
|
384
|
+
|
|
385
|
+
assert not self._undo_actions
|
|
222
386
|
yield conn
|
|
223
387
|
return
|
|
224
|
-
|
|
225
|
-
|
|
226
|
-
|
|
227
|
-
|
|
228
|
-
|
|
229
|
-
|
|
230
|
-
time.sleep(random.uniform(0.1, 0.5))
|
|
388
|
+
|
|
389
|
+
except PendingTableOpsError:
|
|
390
|
+
has_exc = True
|
|
391
|
+
if pending_ops_tbl_id is not None:
|
|
392
|
+
# the next iteration of the loop will deal with pending ops for this table id
|
|
393
|
+
continue
|
|
231
394
|
else:
|
|
395
|
+
# we got this exception after getting the initial table locks and therefore need to abort
|
|
232
396
|
raise
|
|
397
|
+
|
|
398
|
+
except (sql_exc.DBAPIError, sql_exc.OperationalError, sql_exc.InternalError) as e:
|
|
399
|
+
has_exc = True
|
|
400
|
+
self.convert_sql_exc(e, tbl_id, tbl.tbl_version if tbl is not None else None, convert_db_excs)
|
|
401
|
+
raise # re-raise the error if it didn't convert to a pxt.Error
|
|
402
|
+
|
|
403
|
+
except (Exception, KeyboardInterrupt) as e:
|
|
404
|
+
has_exc = True
|
|
405
|
+
_logger.debug(f'Caught {e.__class__}')
|
|
406
|
+
raise
|
|
407
|
+
|
|
233
408
|
finally:
|
|
234
409
|
self._in_write_xact = False
|
|
235
|
-
self.
|
|
410
|
+
self._x_locked_tbl_ids.clear()
|
|
411
|
+
self._column_dependents = None
|
|
236
412
|
|
|
237
413
|
# invalidate cached current TableVersion instances
|
|
238
414
|
for tv in self._tbl_versions.values():
|
|
@@ -240,20 +416,333 @@ class Catalog:
|
|
|
240
416
|
_logger.debug(f'invalidating table version {tv.id}:None (tv={id(tv):x})')
|
|
241
417
|
tv.is_validated = False
|
|
242
418
|
|
|
243
|
-
if
|
|
244
|
-
|
|
419
|
+
if has_exc:
|
|
420
|
+
# Execute undo actions in reverse order (LIFO)
|
|
421
|
+
for hook in reversed(self._undo_actions):
|
|
422
|
+
run_cleanup(hook, raise_error=False)
|
|
423
|
+
# purge all modified TableVersion instances; we can't guarantee they are still consistent with the
|
|
424
|
+
# stored metadata
|
|
425
|
+
for handle in self._modified_tvs:
|
|
426
|
+
self._clear_tv_cache(handle.id, handle.effective_version)
|
|
427
|
+
# Clear potentially corrupted cached metadata
|
|
428
|
+
if tbl is not None:
|
|
429
|
+
tbl.clear_cached_md()
|
|
430
|
+
|
|
431
|
+
self._undo_actions.clear()
|
|
432
|
+
self._modified_tvs.clear()
|
|
433
|
+
|
|
434
|
+
def register_undo_action(self, func: Callable[[], None]) -> Callable[[], None]:
|
|
435
|
+
"""Registers a function to be called if the current transaction fails.
|
|
436
|
+
|
|
437
|
+
The function is called only if the current transaction fails due to an exception.
|
|
438
|
+
|
|
439
|
+
Rollback functions are called in reverse order of registration (LIFO).
|
|
440
|
+
|
|
441
|
+
The function should not raise exceptions; if it does, they are logged and ignored.
|
|
442
|
+
"""
|
|
443
|
+
assert self.in_write_xact
|
|
444
|
+
self._undo_actions.append(func)
|
|
445
|
+
return func
|
|
446
|
+
|
|
447
|
+
def convert_sql_exc(
|
|
448
|
+
self,
|
|
449
|
+
e: sql_exc.StatementError,
|
|
450
|
+
tbl_id: UUID | None = None,
|
|
451
|
+
tbl: TableVersionHandle | None = None,
|
|
452
|
+
convert_db_excs: bool = True,
|
|
453
|
+
) -> None:
|
|
454
|
+
# we got some db error during the actual operation (not just while trying to get locks on the metadata
|
|
455
|
+
# records); we convert these into pxt.Error exceptions if appropriate
|
|
456
|
+
|
|
457
|
+
# we always convert UndefinedTable exceptions (they can't be retried)
|
|
458
|
+
if isinstance(e.orig, psycopg.errors.UndefinedTable) and tbl is not None:
|
|
459
|
+
# the table got dropped in the middle of the operation
|
|
460
|
+
tbl_name = tbl.get().name
|
|
461
|
+
_logger.debug(f'Exception: undefined table {tbl_name!r}: Caught {type(e.orig)}: {e!r}')
|
|
462
|
+
raise excs.Error(f'Table was dropped: {tbl_name}') from None
|
|
463
|
+
elif (
|
|
464
|
+
# TODO: Investigate whether DeadlockDetected points to a bug in our locking protocol,
|
|
465
|
+
# which is supposed to be deadlock-free.
|
|
466
|
+
isinstance(
|
|
467
|
+
e.orig,
|
|
468
|
+
(
|
|
469
|
+
psycopg.errors.SerializationFailure, # serialization error despite getting x-locks
|
|
470
|
+
psycopg.errors.InFailedSqlTransaction, # can happen after tx fails for another reason
|
|
471
|
+
psycopg.errors.DuplicateColumn, # if a different process added a column concurrently
|
|
472
|
+
psycopg.errors.DeadlockDetected, # locking protocol contention
|
|
473
|
+
),
|
|
474
|
+
)
|
|
475
|
+
and convert_db_excs
|
|
476
|
+
):
|
|
477
|
+
msg: str
|
|
478
|
+
if tbl is not None:
|
|
479
|
+
msg = f'{tbl.get().name} ({tbl.id})'
|
|
480
|
+
elif tbl_id is not None:
|
|
481
|
+
msg = f'{tbl_id}'
|
|
482
|
+
else:
|
|
483
|
+
msg = ''
|
|
484
|
+
_logger.debug(f'Exception: {e.orig.__class__}: {msg} ({e})')
|
|
485
|
+
# Suppress the underlying SQL exception unless DEBUG is enabled
|
|
486
|
+
raise_from = e if _logger.isEnabledFor(logging.DEBUG) else None
|
|
487
|
+
raise excs.Error(
|
|
488
|
+
'That Pixeltable operation could not be completed because it conflicted with another '
|
|
489
|
+
'operation that was run on a different process.\n'
|
|
490
|
+
'Please re-run the operation.'
|
|
491
|
+
) from raise_from
|
|
245
492
|
|
|
246
493
|
@property
|
|
247
494
|
def in_write_xact(self) -> bool:
|
|
248
495
|
return self._in_write_xact
|
|
249
496
|
|
|
250
|
-
def
|
|
497
|
+
def _acquire_path_locks(
|
|
498
|
+
self,
|
|
499
|
+
*,
|
|
500
|
+
tbl: TableVersionPath,
|
|
501
|
+
for_write: bool = False,
|
|
502
|
+
lock_mutable_tree: bool = False,
|
|
503
|
+
check_pending_ops: bool | None = None,
|
|
504
|
+
) -> bool:
|
|
505
|
+
"""
|
|
506
|
+
Path locking protocol:
|
|
507
|
+
- refresh cached TableVersions of ancestors (we need those even during inserts, for computed columns that
|
|
508
|
+
reference the base tables)
|
|
509
|
+
- refresh cached TableVersion of tbl or get X-lock, depending on for_write
|
|
510
|
+
- if lock_mutable_tree, also X-lock all mutable views of tbl
|
|
511
|
+
|
|
512
|
+
Raises Error if tbl doesn't exist.
|
|
513
|
+
Return False if the lock couldn't be acquired (X-lock on a non-mutable table), True otherwise.
|
|
514
|
+
"""
|
|
515
|
+
path_handles = tbl.get_tbl_versions()
|
|
516
|
+
read_handles = path_handles[:0:-1] if for_write else path_handles[::-1]
|
|
517
|
+
for handle in read_handles:
|
|
518
|
+
# update cache
|
|
519
|
+
_ = self.get_tbl_version(handle.id, handle.effective_version, validate_initialized=True)
|
|
520
|
+
if not for_write:
|
|
521
|
+
return True # nothing left to lock
|
|
522
|
+
handle = self._acquire_tbl_lock(
|
|
523
|
+
tbl_id=tbl.tbl_id,
|
|
524
|
+
for_write=True,
|
|
525
|
+
lock_mutable_tree=lock_mutable_tree,
|
|
526
|
+
raise_if_not_exists=True,
|
|
527
|
+
check_pending_ops=check_pending_ops,
|
|
528
|
+
)
|
|
529
|
+
# update cache
|
|
530
|
+
_ = self.get_tbl_version(path_handles[0].id, path_handles[0].effective_version, validate_initialized=True)
|
|
531
|
+
return handle is not None
|
|
532
|
+
|
|
533
|
+
def _acquire_tbl_lock(
|
|
534
|
+
self,
|
|
535
|
+
*,
|
|
536
|
+
for_write: bool,
|
|
537
|
+
tbl_id: UUID | None = None,
|
|
538
|
+
dir_id: UUID | None = None,
|
|
539
|
+
tbl_name: str | None = None,
|
|
540
|
+
lock_mutable_tree: bool = False,
|
|
541
|
+
raise_if_not_exists: bool = True,
|
|
542
|
+
check_pending_ops: bool | None = None,
|
|
543
|
+
) -> TableVersionHandle | None:
|
|
544
|
+
"""
|
|
545
|
+
For writes: force acquisition of an X-lock on a Table record via a blind update.
|
|
546
|
+
|
|
547
|
+
Either tbl_id or dir_id/tbl_name need to be specified.
|
|
548
|
+
Returns True if the table was locked, False if it was a snapshot or not found.
|
|
549
|
+
If lock_mutable_tree, recursively locks all mutable views of the table.
|
|
550
|
+
|
|
551
|
+
Returns a handle to what was locked, None if the lock couldn't be acquired (eg, X-lock on a non-mutable table).
|
|
552
|
+
"""
|
|
553
|
+
assert (tbl_id is not None) != (dir_id is not None and tbl_name is not None)
|
|
554
|
+
assert (dir_id is None) == (tbl_name is None)
|
|
555
|
+
where_clause: sql.ColumnElement
|
|
556
|
+
if tbl_id is not None:
|
|
557
|
+
where_clause = schema.Table.id == tbl_id
|
|
558
|
+
else:
|
|
559
|
+
where_clause = sql.and_(schema.Table.dir_id == dir_id, schema.Table.md['name'].astext == tbl_name)
|
|
560
|
+
user = Env.get().user
|
|
561
|
+
if user is not None:
|
|
562
|
+
where_clause = sql.and_(where_clause, schema.Table.md['user'].astext == Env.get().user)
|
|
563
|
+
|
|
564
|
+
conn = Env.get().conn
|
|
565
|
+
q = sql.select(schema.Table).where(where_clause)
|
|
566
|
+
if for_write:
|
|
567
|
+
q = q.with_for_update(nowait=True)
|
|
568
|
+
row = conn.execute(q).one_or_none()
|
|
569
|
+
if row is None:
|
|
570
|
+
if raise_if_not_exists:
|
|
571
|
+
raise excs.Error(self._dropped_tbl_error_msg(tbl_id))
|
|
572
|
+
return None # nothing to lock
|
|
573
|
+
tbl_md = schema.md_from_dict(schema.TableMd, row.md)
|
|
574
|
+
if for_write and tbl_md.is_mutable:
|
|
575
|
+
conn.execute(sql.update(schema.Table).values(lock_dummy=1).where(where_clause))
|
|
576
|
+
|
|
577
|
+
if check_pending_ops:
|
|
578
|
+
# check for pending ops after getting table lock
|
|
579
|
+
pending_ops_q = sql.select(sql.func.count()).where(schema.PendingTableOp.tbl_id == row.id)
|
|
580
|
+
has_pending_ops = conn.execute(pending_ops_q).scalar() > 0
|
|
581
|
+
if has_pending_ops:
|
|
582
|
+
raise PendingTableOpsError(row.id)
|
|
583
|
+
|
|
584
|
+
if for_write and not tbl_md.is_mutable:
|
|
585
|
+
return None # nothing to lock
|
|
586
|
+
|
|
587
|
+
effective_version = tbl_md.current_version if tbl_md.is_snapshot else None
|
|
588
|
+
if tbl_md.is_mutable and lock_mutable_tree:
|
|
589
|
+
# also lock mutable views
|
|
590
|
+
tv = self.get_tbl_version(tbl_id, effective_version, validate_initialized=True)
|
|
591
|
+
for view in tv.mutable_views:
|
|
592
|
+
self._acquire_tbl_lock(
|
|
593
|
+
for_write=for_write,
|
|
594
|
+
tbl_id=view.id,
|
|
595
|
+
lock_mutable_tree=lock_mutable_tree,
|
|
596
|
+
raise_if_not_exists=raise_if_not_exists,
|
|
597
|
+
check_pending_ops=check_pending_ops,
|
|
598
|
+
)
|
|
599
|
+
return TableVersionHandle(tbl_id, effective_version)
|
|
600
|
+
|
|
601
|
+
def _finalize_pending_ops(self, tbl_id: UUID) -> None:
|
|
602
|
+
"""Finalizes all pending ops for the given table."""
|
|
603
|
+
num_retries = 0
|
|
604
|
+
while True:
|
|
605
|
+
try:
|
|
606
|
+
tbl_version: int
|
|
607
|
+
op: TableOp | None = None
|
|
608
|
+
delete_next_op_stmt: sql.Delete
|
|
609
|
+
reset_has_pending_stmt: sql.Update
|
|
610
|
+
with self.begin_xact(
|
|
611
|
+
tbl_id=tbl_id, for_write=True, convert_db_excs=False, finalize_pending_ops=False
|
|
612
|
+
) as conn:
|
|
613
|
+
q = (
|
|
614
|
+
sql.select(schema.Table.md, schema.PendingTableOp)
|
|
615
|
+
.select_from(schema.Table)
|
|
616
|
+
.join(schema.PendingTableOp)
|
|
617
|
+
.where(schema.Table.id == tbl_id)
|
|
618
|
+
.where(schema.PendingTableOp.tbl_id == tbl_id)
|
|
619
|
+
.order_by(schema.PendingTableOp.op_sn)
|
|
620
|
+
.limit(1)
|
|
621
|
+
.with_for_update()
|
|
622
|
+
)
|
|
623
|
+
row = conn.execute(q).one_or_none()
|
|
624
|
+
if row is None:
|
|
625
|
+
return
|
|
626
|
+
view_md = row.md.get('view_md')
|
|
627
|
+
is_snapshot = False if view_md is None else view_md.get('is_snapshot')
|
|
628
|
+
assert is_snapshot is not None
|
|
629
|
+
tbl_version = row.md.get('current_version') if is_snapshot else None
|
|
630
|
+
op = schema.md_from_dict(TableOp, row.op)
|
|
631
|
+
delete_next_op_stmt = sql.delete(schema.PendingTableOp).where(
|
|
632
|
+
schema.PendingTableOp.tbl_id == tbl_id, schema.PendingTableOp.op_sn == row.op_sn
|
|
633
|
+
)
|
|
634
|
+
reset_has_pending_stmt = (
|
|
635
|
+
sql.update(schema.Table)
|
|
636
|
+
.where(schema.Table.id == tbl_id)
|
|
637
|
+
.values(md=schema.Table.md.op('||')({'has_pending_ops': False}))
|
|
638
|
+
)
|
|
639
|
+
|
|
640
|
+
if op.needs_xact:
|
|
641
|
+
tv = self.get_tbl_version(
|
|
642
|
+
tbl_id, tbl_version, check_pending_ops=False, validate_initialized=True
|
|
643
|
+
)
|
|
644
|
+
tv.exec_op(op)
|
|
645
|
+
conn.execute(delete_next_op_stmt)
|
|
646
|
+
if op.op_sn == op.num_ops - 1:
|
|
647
|
+
conn.execute(reset_has_pending_stmt)
|
|
648
|
+
continue
|
|
649
|
+
|
|
650
|
+
# this op runs outside of a transaction
|
|
651
|
+
tv = self.get_tbl_version(tbl_id, tbl_version, check_pending_ops=False, validate_initialized=True)
|
|
652
|
+
tv.exec_op(op)
|
|
653
|
+
with self.begin_xact(
|
|
654
|
+
tbl_id=tbl_id, for_write=True, convert_db_excs=False, finalize_pending_ops=False
|
|
655
|
+
) as conn:
|
|
656
|
+
conn.execute(delete_next_op_stmt)
|
|
657
|
+
if op.op_sn == op.num_ops - 1:
|
|
658
|
+
conn.execute(reset_has_pending_stmt)
|
|
659
|
+
|
|
660
|
+
except (sql_exc.DBAPIError, sql_exc.OperationalError) as e:
|
|
661
|
+
# TODO: why are we still seeing these here, instead of them getting taken care of by the retry
|
|
662
|
+
# logic of begin_xact()?
|
|
663
|
+
if isinstance(e.orig, (psycopg.errors.SerializationFailure, psycopg.errors.LockNotAvailable)):
|
|
664
|
+
num_retries += 1
|
|
665
|
+
log_msg: str
|
|
666
|
+
if op is not None:
|
|
667
|
+
log_msg = f'finalize_pending_ops(): retrying ({num_retries}) op {op!s} after {type(e.orig)}'
|
|
668
|
+
else:
|
|
669
|
+
log_msg = f'finalize_pending_ops(): retrying ({num_retries}) after {type(e.orig)}'
|
|
670
|
+
Env.get().console_logger.debug(log_msg)
|
|
671
|
+
time.sleep(random.uniform(0.1, 0.5))
|
|
672
|
+
continue
|
|
673
|
+
else:
|
|
674
|
+
raise
|
|
675
|
+
except Exception as e:
|
|
676
|
+
Env.get().console_logger.debug(f'finalize_pending_ops(): caught {e}')
|
|
677
|
+
raise
|
|
678
|
+
|
|
679
|
+
num_retries = 0
|
|
680
|
+
|
|
681
|
+
def _debug_str(self) -> str:
|
|
682
|
+
tv_str = '\n'.join(str(k) for k in self._tbl_versions)
|
|
683
|
+
tbl_str = '\n'.join(str(k) for k in self._tbls)
|
|
684
|
+
return f'tbl_versions:\n{tv_str}\ntbls:\n{tbl_str}'
|
|
685
|
+
|
|
686
|
+
def _get_mutable_tree(self, tbl_id: UUID) -> set[UUID]:
|
|
687
|
+
"""Returns ids of all tables that form the tree of mutable views starting at tbl_id; includes the root."""
|
|
688
|
+
assert (tbl_id, None) in self._tbl_versions, (
|
|
689
|
+
f'({tbl_id}, None) not in {self._tbl_versions.keys()}\n{self._debug_str()}'
|
|
690
|
+
)
|
|
691
|
+
tv = self.get_tbl_version(tbl_id, None, validate_initialized=True)
|
|
692
|
+
result: set[UUID] = {tv.id}
|
|
693
|
+
for view in tv.mutable_views:
|
|
694
|
+
result.update(self._get_mutable_tree(view.id))
|
|
695
|
+
return result
|
|
696
|
+
|
|
697
|
+
def _compute_column_dependents(self, mutable_tree: set[UUID]) -> None:
|
|
698
|
+
"""Populate self._column_dependents for all tables in mutable_tree"""
|
|
699
|
+
assert self._column_dependents is None
|
|
700
|
+
self._column_dependents = defaultdict(set)
|
|
701
|
+
for tbl_id in mutable_tree:
|
|
702
|
+
assert tbl_id in self._column_dependencies, (
|
|
703
|
+
f'{tbl_id} not in {self._column_dependencies.keys()}\n{self._debug_str()}'
|
|
704
|
+
)
|
|
705
|
+
for col, dependencies in self._column_dependencies[tbl_id].items():
|
|
706
|
+
for dependency in dependencies:
|
|
707
|
+
if dependency.tbl_id not in mutable_tree:
|
|
708
|
+
continue
|
|
709
|
+
dependents = self._column_dependents[dependency]
|
|
710
|
+
dependents.add(col)
|
|
711
|
+
|
|
712
|
+
def record_column_dependencies(self, tbl_version: TableVersion) -> None:
|
|
713
|
+
"""Update self._column_dependencies. Only valid for mutable versions."""
|
|
714
|
+
from pixeltable.exprs import Expr
|
|
715
|
+
|
|
716
|
+
assert tbl_version.is_mutable
|
|
717
|
+
dependencies: dict[QColumnId, set[QColumnId]] = {}
|
|
718
|
+
for col in tbl_version.cols_by_id.values():
|
|
719
|
+
if col.value_expr_dict is None:
|
|
720
|
+
continue
|
|
721
|
+
dependencies[QColumnId(tbl_version.id, col.id)] = Expr.get_refd_column_ids(col.value_expr_dict)
|
|
722
|
+
self._column_dependencies[tbl_version.id] = dependencies
|
|
723
|
+
|
|
724
|
+
def get_column_dependents(self, tbl_id: UUID, col_id: int) -> set[Column]:
|
|
725
|
+
"""Return all Columns that transitively depend on the given column."""
|
|
726
|
+
assert self._column_dependents is not None
|
|
727
|
+
dependents = self._column_dependents[QColumnId(tbl_id, col_id)]
|
|
728
|
+
result: set[Column] = set()
|
|
729
|
+
for dependent in dependents:
|
|
730
|
+
tv = self.get_tbl_version(dependent.tbl_id, None, validate_initialized=True)
|
|
731
|
+
col = tv.cols_by_id[dependent.col_id]
|
|
732
|
+
result.add(col)
|
|
733
|
+
return result
|
|
734
|
+
|
|
735
|
+
def _acquire_dir_xlock(
|
|
736
|
+
self, *, parent_id: UUID | None = None, dir_id: UUID | None = None, dir_name: str | None = None
|
|
737
|
+
) -> None:
|
|
251
738
|
"""Force acquisition of an X-lock on a Dir record via a blind update.
|
|
252
739
|
|
|
253
740
|
If dir_id is present, then all other conditions are ignored.
|
|
254
741
|
Note that (parent_id==None) is a valid where condition.
|
|
255
742
|
If dir_id is not specified, the user from the environment is added to the directory filters.
|
|
256
743
|
"""
|
|
744
|
+
assert (dir_name is None) != (dir_id is None)
|
|
745
|
+
assert not (parent_id is not None and dir_name is None)
|
|
257
746
|
user = Env.get().user
|
|
258
747
|
assert self._in_write_xact
|
|
259
748
|
q = sql.update(schema.Dir).values(lock_dummy=1)
|
|
@@ -269,6 +758,7 @@ class Catalog:
|
|
|
269
758
|
|
|
270
759
|
def get_dir_path(self, dir_id: UUID) -> Path:
|
|
271
760
|
"""Return path for directory with given id"""
|
|
761
|
+
assert isinstance(dir_id, UUID)
|
|
272
762
|
conn = Env.get().conn
|
|
273
763
|
names: list[str] = []
|
|
274
764
|
while True:
|
|
@@ -279,15 +769,15 @@ class Catalog:
|
|
|
279
769
|
break
|
|
280
770
|
names.insert(0, dir.md['name'])
|
|
281
771
|
dir_id = dir.parent_id
|
|
282
|
-
return Path('.'.join(names),
|
|
772
|
+
return Path.parse('.'.join(names), allow_empty_path=True, allow_system_path=True)
|
|
283
773
|
|
|
284
774
|
@dataclasses.dataclass
|
|
285
775
|
class DirEntry:
|
|
286
|
-
dir:
|
|
776
|
+
dir: schema.Dir | None
|
|
287
777
|
dir_entries: dict[str, Catalog.DirEntry]
|
|
288
|
-
table:
|
|
778
|
+
table: schema.Table | None
|
|
289
779
|
|
|
290
|
-
@
|
|
780
|
+
@retry_loop(for_write=False)
|
|
291
781
|
def get_dir_contents(self, dir_path: Path, recursive: bool = False) -> dict[str, DirEntry]:
|
|
292
782
|
dir = self._get_schema_object(dir_path, expected=Dir, raise_if_not_exists=True)
|
|
293
783
|
return self._get_dir_contents(dir._id, recursive=recursive)
|
|
@@ -314,31 +804,37 @@ class Catalog:
|
|
|
314
804
|
|
|
315
805
|
return result
|
|
316
806
|
|
|
317
|
-
@
|
|
318
|
-
def move(self, path: Path, new_path: Path) -> None:
|
|
319
|
-
self._move(path, new_path)
|
|
807
|
+
@retry_loop(for_write=True)
|
|
808
|
+
def move(self, path: Path, new_path: Path, if_exists: IfExistsParam, if_not_exists: IfNotExistsParam) -> None:
|
|
809
|
+
self._move(path, new_path, if_exists, if_not_exists)
|
|
320
810
|
|
|
321
|
-
def _move(self, path: Path, new_path: Path) -> None:
|
|
322
|
-
|
|
811
|
+
def _move(self, path: Path, new_path: Path, if_exists: IfExistsParam, if_not_exists: IfNotExistsParam) -> None:
|
|
812
|
+
dest_obj, dest_dir, src_obj = self._prepare_dir_op(
|
|
323
813
|
add_dir_path=new_path.parent,
|
|
324
814
|
add_name=new_path.name,
|
|
325
815
|
drop_dir_path=path.parent,
|
|
326
816
|
drop_name=path.name,
|
|
327
|
-
raise_if_exists=
|
|
328
|
-
raise_if_not_exists=
|
|
817
|
+
raise_if_exists=(if_exists == IfExistsParam.ERROR),
|
|
818
|
+
raise_if_not_exists=(if_not_exists == IfNotExistsParam.ERROR),
|
|
329
819
|
)
|
|
330
|
-
|
|
820
|
+
assert dest_obj is None or if_exists == IfExistsParam.IGNORE
|
|
821
|
+
assert src_obj is not None or if_not_exists == IfNotExistsParam.IGNORE
|
|
822
|
+
if dest_obj is None and src_obj is not None:
|
|
823
|
+
# If dest_obj is not None, it means `if_exists='ignore'` and the destination already exists.
|
|
824
|
+
# If src_obj is None, it means `if_not_exists='ignore'` and the source doesn't exist.
|
|
825
|
+
# If dest_obj is None and src_obj is not None, then we can proceed with the move.
|
|
826
|
+
src_obj._move(new_path.name, dest_dir._id)
|
|
331
827
|
|
|
332
828
|
def _prepare_dir_op(
|
|
333
829
|
self,
|
|
334
|
-
add_dir_path:
|
|
335
|
-
add_name:
|
|
336
|
-
drop_dir_path:
|
|
337
|
-
drop_name:
|
|
338
|
-
drop_expected:
|
|
830
|
+
add_dir_path: Path | None = None,
|
|
831
|
+
add_name: str | None = None,
|
|
832
|
+
drop_dir_path: Path | None = None,
|
|
833
|
+
drop_name: str | None = None,
|
|
834
|
+
drop_expected: type[SchemaObject] | None = None,
|
|
339
835
|
raise_if_exists: bool = False,
|
|
340
836
|
raise_if_not_exists: bool = False,
|
|
341
|
-
) -> tuple[
|
|
837
|
+
) -> tuple[SchemaObject | None, Dir | None, SchemaObject | None]:
|
|
342
838
|
"""
|
|
343
839
|
Validates paths and acquires locks needed for a directory operation, ie, add/drop/rename (add + drop) of a
|
|
344
840
|
directory entry.
|
|
@@ -356,6 +852,7 @@ class Catalog:
|
|
|
356
852
|
- if both add and drop (= two directories are involved), lock the directories in a pre-determined order
|
|
357
853
|
(in this case, by name) in order to prevent deadlocks between concurrent directory modifications
|
|
358
854
|
"""
|
|
855
|
+
assert drop_expected in (None, Table, Dir), drop_expected
|
|
359
856
|
assert (add_dir_path is None) == (add_name is None)
|
|
360
857
|
assert (drop_dir_path is None) == (drop_name is None)
|
|
361
858
|
dir_paths: set[Path] = set()
|
|
@@ -364,46 +861,50 @@ class Catalog:
|
|
|
364
861
|
if drop_dir_path is not None:
|
|
365
862
|
dir_paths.add(drop_dir_path)
|
|
366
863
|
|
|
367
|
-
add_dir:
|
|
368
|
-
drop_dir:
|
|
864
|
+
add_dir: schema.Dir | None = None
|
|
865
|
+
drop_dir: schema.Dir | None = None
|
|
369
866
|
for p in sorted(dir_paths):
|
|
370
|
-
dir = self._get_dir(p,
|
|
867
|
+
dir = self._get_dir(p, lock_dir=True)
|
|
371
868
|
if dir is None:
|
|
372
|
-
|
|
869
|
+
# Dir does not exist; raise an appropriate error.
|
|
870
|
+
if add_dir_path is not None or add_name is not None:
|
|
871
|
+
raise excs.Error(f'Directory {p!r} does not exist. Create it first with:\npxt.create_dir({p!r})')
|
|
872
|
+
else:
|
|
873
|
+
raise excs.Error(f'Directory {p!r} does not exist.')
|
|
373
874
|
if p == add_dir_path:
|
|
374
875
|
add_dir = dir
|
|
375
876
|
if p == drop_dir_path:
|
|
376
877
|
drop_dir = dir
|
|
377
878
|
|
|
378
|
-
add_obj:
|
|
879
|
+
add_obj: SchemaObject | None = None
|
|
379
880
|
if add_dir is not None:
|
|
380
|
-
add_obj = self._get_dir_entry(add_dir.id, add_name,
|
|
881
|
+
add_obj = self._get_dir_entry(add_dir.id, add_name, lock_entry=True)
|
|
381
882
|
if add_obj is not None and raise_if_exists:
|
|
382
883
|
add_path = add_dir_path.append(add_name)
|
|
383
|
-
raise excs.Error(f'Path {
|
|
884
|
+
raise excs.Error(f'Path {add_path!r} already exists.')
|
|
384
885
|
|
|
385
|
-
drop_obj:
|
|
886
|
+
drop_obj: SchemaObject | None = None
|
|
386
887
|
if drop_dir is not None:
|
|
387
888
|
drop_path = drop_dir_path.append(drop_name)
|
|
388
|
-
drop_obj = self._get_dir_entry(drop_dir.id, drop_name,
|
|
889
|
+
drop_obj = self._get_dir_entry(drop_dir.id, drop_name, lock_entry=True)
|
|
389
890
|
if drop_obj is None and raise_if_not_exists:
|
|
390
|
-
raise excs.Error(f'Path {
|
|
891
|
+
raise excs.Error(f'Path {drop_path!r} does not exist.')
|
|
391
892
|
if drop_obj is not None and drop_expected is not None and not isinstance(drop_obj, drop_expected):
|
|
392
|
-
|
|
393
|
-
|
|
394
|
-
f'but is a {type(drop_obj)._display_name()}'
|
|
395
|
-
)
|
|
893
|
+
expected_name = 'table' if drop_expected is Table else 'directory'
|
|
894
|
+
raise excs.Error(f'{drop_path!r} needs to be a {expected_name} but is a {drop_obj._display_name()}')
|
|
396
895
|
|
|
397
896
|
add_dir_obj = Dir(add_dir.id, add_dir.parent_id, add_dir.md['name']) if add_dir is not None else None
|
|
398
897
|
return add_obj, add_dir_obj, drop_obj
|
|
399
898
|
|
|
400
|
-
def _get_dir_entry(
|
|
899
|
+
def _get_dir_entry(
|
|
900
|
+
self, dir_id: UUID, name: str, version: int | None = None, lock_entry: bool = False
|
|
901
|
+
) -> SchemaObject | None:
|
|
401
902
|
user = Env.get().user
|
|
402
903
|
conn = Env.get().conn
|
|
403
904
|
|
|
404
905
|
# check for subdirectory
|
|
405
|
-
if
|
|
406
|
-
self._acquire_dir_xlock(dir_id, None, name)
|
|
906
|
+
if lock_entry:
|
|
907
|
+
self._acquire_dir_xlock(parent_id=dir_id, dir_id=None, dir_name=name)
|
|
407
908
|
q = sql.select(schema.Dir).where(
|
|
408
909
|
schema.Dir.parent_id == dir_id, schema.Dir.md['name'].astext == name, schema.Dir.md['user'].astext == user
|
|
409
910
|
)
|
|
@@ -417,228 +918,285 @@ class Catalog:
|
|
|
417
918
|
return Dir(dir_record.id, dir_record.parent_id, name)
|
|
418
919
|
|
|
419
920
|
# check for table
|
|
921
|
+
if lock_entry:
|
|
922
|
+
self._acquire_tbl_lock(for_write=True, dir_id=dir_id, raise_if_not_exists=False, tbl_name=name)
|
|
420
923
|
q = sql.select(schema.Table.id).where(
|
|
421
924
|
schema.Table.dir_id == dir_id,
|
|
422
925
|
schema.Table.md['name'].astext == name,
|
|
423
926
|
schema.Table.md['user'].astext == user,
|
|
424
927
|
)
|
|
425
|
-
|
|
426
|
-
|
|
427
|
-
tbl_id
|
|
428
|
-
|
|
429
|
-
if tbl_id not in self._tbls:
|
|
430
|
-
_ = self._load_tbl(tbl_id)
|
|
431
|
-
return self._tbls[tbl_id]
|
|
928
|
+
tbl_id = conn.execute(q).scalars().all()
|
|
929
|
+
assert len(tbl_id) <= 1, name
|
|
930
|
+
if len(tbl_id) == 1:
|
|
931
|
+
return self.get_table_by_id(tbl_id[0], version)
|
|
432
932
|
|
|
433
933
|
return None
|
|
434
934
|
|
|
435
935
|
def _get_schema_object(
|
|
436
936
|
self,
|
|
437
937
|
path: Path,
|
|
438
|
-
expected:
|
|
938
|
+
expected: type[SchemaObject] | None = None,
|
|
439
939
|
raise_if_exists: bool = False,
|
|
440
940
|
raise_if_not_exists: bool = False,
|
|
441
|
-
|
|
442
|
-
|
|
941
|
+
lock_parent: bool = False,
|
|
942
|
+
lock_obj: bool = False,
|
|
943
|
+
) -> SchemaObject | None:
|
|
443
944
|
"""Return the schema object at the given path, or None if it doesn't exist.
|
|
444
945
|
|
|
445
946
|
Raises Error if
|
|
446
|
-
- the parent directory doesn't exist
|
|
947
|
+
- the parent directory doesn't exist
|
|
447
948
|
- raise_if_exists is True and the path exists
|
|
448
949
|
- raise_if_not_exists is True and the path does not exist
|
|
449
950
|
- expected is not None and the existing object has a different type
|
|
450
951
|
"""
|
|
952
|
+
assert expected in (None, Table, Dir), expected
|
|
953
|
+
|
|
451
954
|
if path.is_root:
|
|
452
955
|
# the root dir
|
|
453
956
|
if expected is not None and expected is not Dir:
|
|
454
|
-
raise excs.Error(
|
|
455
|
-
|
|
456
|
-
)
|
|
457
|
-
dir = self._get_dir(path, for_update=for_update)
|
|
957
|
+
raise excs.Error(f'{path!r} needs to be a table but is a dir')
|
|
958
|
+
dir = self._get_dir(path, lock_dir=lock_obj)
|
|
458
959
|
if dir is None:
|
|
459
960
|
raise excs.Error(f'Unknown user: {Env.get().user}')
|
|
460
961
|
return Dir(dir.id, dir.parent_id, dir.md['name'])
|
|
461
962
|
|
|
462
963
|
parent_path = path.parent
|
|
463
|
-
parent_dir = self._get_dir(parent_path,
|
|
964
|
+
parent_dir = self._get_dir(parent_path, lock_dir=lock_parent)
|
|
464
965
|
if parent_dir is None:
|
|
465
|
-
raise excs.Error(f'Directory {
|
|
466
|
-
obj = self._get_dir_entry(parent_dir.id, path.name,
|
|
966
|
+
raise excs.Error(f'Directory {parent_path!r} does not exist.')
|
|
967
|
+
obj = self._get_dir_entry(parent_dir.id, path.name, path.version, lock_entry=lock_obj)
|
|
467
968
|
|
|
468
969
|
if obj is None and raise_if_not_exists:
|
|
469
|
-
raise excs.Error(f'Path {
|
|
970
|
+
raise excs.Error(f'Path {path!r} does not exist.')
|
|
470
971
|
elif obj is not None and raise_if_exists:
|
|
471
|
-
raise excs.Error(f'Path {
|
|
972
|
+
raise excs.Error(f'Path {path!r} is an existing {obj._display_name()}.')
|
|
472
973
|
elif obj is not None and expected is not None and not isinstance(obj, expected):
|
|
473
|
-
|
|
474
|
-
|
|
475
|
-
)
|
|
974
|
+
expected_name = 'table' if expected is Table else 'directory'
|
|
975
|
+
raise excs.Error(f'{path!r} needs to be a {expected_name} but is a {obj._display_name()}.')
|
|
476
976
|
return obj
|
|
477
977
|
|
|
478
|
-
def get_table_by_id(self, tbl_id: UUID) ->
|
|
479
|
-
|
|
480
|
-
|
|
481
|
-
if
|
|
482
|
-
return
|
|
483
|
-
|
|
484
|
-
|
|
485
|
-
|
|
486
|
-
if tbl_version.is_mutable:
|
|
487
|
-
for v in tbl_version.mutable_views:
|
|
488
|
-
_ = self.get_table_by_id(v.id)
|
|
489
|
-
return self._tbls[tbl_id]
|
|
978
|
+
def get_table_by_id(self, tbl_id: UUID, version: int | None = None) -> Table | None:
|
|
979
|
+
"""Must be executed inside a transaction. Might raise PendingTableOpsError."""
|
|
980
|
+
if (tbl_id, version) not in self._tbls:
|
|
981
|
+
if version is None:
|
|
982
|
+
return self._load_tbl(tbl_id)
|
|
983
|
+
else:
|
|
984
|
+
return self._load_tbl_at_version(tbl_id, version)
|
|
985
|
+
return self._tbls.get((tbl_id, version))
|
|
490
986
|
|
|
491
|
-
@_retry_loop(for_write=True)
|
|
492
987
|
def create_table(
|
|
493
988
|
self,
|
|
494
989
|
path: Path,
|
|
495
990
|
schema: dict[str, Any],
|
|
496
|
-
df: 'DataFrame',
|
|
497
991
|
if_exists: IfExistsParam,
|
|
498
|
-
primary_key:
|
|
992
|
+
primary_key: list[str] | None,
|
|
499
993
|
num_retained_versions: int,
|
|
500
994
|
comment: str,
|
|
501
995
|
media_validation: MediaValidation,
|
|
502
|
-
|
|
503
|
-
|
|
504
|
-
|
|
505
|
-
|
|
506
|
-
return existing
|
|
996
|
+
create_default_idxs: bool,
|
|
997
|
+
) -> tuple[Table, bool]:
|
|
998
|
+
"""
|
|
999
|
+
Creates a new InsertableTable at the given path.
|
|
507
1000
|
|
|
508
|
-
|
|
509
|
-
assert dir is not None
|
|
1001
|
+
If `if_exists == IfExistsParam.IGNORE` and a table `t` already exists at the given path, returns `t, False`.
|
|
510
1002
|
|
|
511
|
-
|
|
512
|
-
|
|
513
|
-
|
|
514
|
-
|
|
515
|
-
|
|
516
|
-
|
|
517
|
-
|
|
518
|
-
|
|
519
|
-
|
|
520
|
-
|
|
521
|
-
|
|
522
|
-
|
|
1003
|
+
Otherwise, creates a new table `t` and returns `t, True` (or raises an exception if the operation fails).
|
|
1004
|
+
"""
|
|
1005
|
+
|
|
1006
|
+
@retry_loop(for_write=True)
|
|
1007
|
+
def create_fn() -> tuple[UUID, bool]:
|
|
1008
|
+
existing = self._handle_path_collision(path, InsertableTable, False, if_exists)
|
|
1009
|
+
if existing is not None:
|
|
1010
|
+
assert isinstance(existing, Table)
|
|
1011
|
+
return existing._id, False
|
|
1012
|
+
|
|
1013
|
+
dir = self._get_schema_object(path.parent, expected=Dir, raise_if_not_exists=True)
|
|
1014
|
+
assert dir is not None
|
|
1015
|
+
|
|
1016
|
+
md, ops = InsertableTable._create(
|
|
1017
|
+
path.name,
|
|
1018
|
+
schema,
|
|
1019
|
+
primary_key=primary_key,
|
|
1020
|
+
num_retained_versions=num_retained_versions,
|
|
1021
|
+
comment=comment,
|
|
1022
|
+
media_validation=media_validation,
|
|
1023
|
+
create_default_idxs=create_default_idxs,
|
|
1024
|
+
)
|
|
1025
|
+
tbl_id = UUID(md.tbl_md.tbl_id)
|
|
1026
|
+
self.store_tbl_md(tbl_id, dir._id, md.tbl_md, md.version_md, md.schema_version_md, ops)
|
|
1027
|
+
return tbl_id, True
|
|
1028
|
+
|
|
1029
|
+
tbl_id, is_created = create_fn()
|
|
1030
|
+
# finalize pending ops
|
|
1031
|
+
with self.begin_xact(tbl_id=tbl_id, for_write=True, finalize_pending_ops=True):
|
|
1032
|
+
tbl = self.get_table_by_id(tbl_id)
|
|
1033
|
+
_logger.info(f'Created table {tbl._name!r}, id={tbl._id}')
|
|
1034
|
+
Env.get().console_logger.info(f'Created table {tbl._name!r}.')
|
|
1035
|
+
return tbl, is_created
|
|
523
1036
|
|
|
524
|
-
@_retry_loop(for_write=True)
|
|
525
1037
|
def create_view(
|
|
526
1038
|
self,
|
|
527
1039
|
path: Path,
|
|
528
1040
|
base: TableVersionPath,
|
|
529
|
-
select_list:
|
|
530
|
-
where:
|
|
531
|
-
sample_clause:
|
|
532
|
-
additional_columns:
|
|
1041
|
+
select_list: list[tuple[exprs.Expr, str | None]] | None,
|
|
1042
|
+
where: exprs.Expr | None,
|
|
1043
|
+
sample_clause: 'SampleClause' | None,
|
|
1044
|
+
additional_columns: dict[str, Any] | None,
|
|
533
1045
|
is_snapshot: bool,
|
|
534
|
-
|
|
1046
|
+
create_default_idxs: bool,
|
|
1047
|
+
iterator: tuple[type[ComponentIterator], dict[str, Any]] | None,
|
|
535
1048
|
num_retained_versions: int,
|
|
536
1049
|
comment: str,
|
|
537
1050
|
media_validation: MediaValidation,
|
|
538
1051
|
if_exists: IfExistsParam,
|
|
539
1052
|
) -> Table:
|
|
540
|
-
|
|
541
|
-
|
|
542
|
-
|
|
543
|
-
|
|
544
|
-
|
|
545
|
-
|
|
1053
|
+
@retry_loop(for_write=True)
|
|
1054
|
+
def create_fn() -> UUID:
|
|
1055
|
+
if not is_snapshot and base.is_mutable():
|
|
1056
|
+
# this is a mutable view of a mutable base; X-lock the base and advance its view_sn before adding
|
|
1057
|
+
# the view
|
|
1058
|
+
self._acquire_tbl_lock(tbl_id=base.tbl_id, for_write=True)
|
|
1059
|
+
base_tv = self.get_tbl_version(base.tbl_id, None, validate_initialized=True)
|
|
1060
|
+
base_tv.tbl_md.view_sn += 1
|
|
1061
|
+
result = Env.get().conn.execute(
|
|
1062
|
+
sql.update(schema.Table)
|
|
1063
|
+
.values({schema.Table.md: dataclasses.asdict(base_tv.tbl_md)})
|
|
1064
|
+
.where(schema.Table.id == base.tbl_id)
|
|
1065
|
+
)
|
|
1066
|
+
assert result.rowcount == 1, result.rowcount
|
|
546
1067
|
|
|
547
|
-
|
|
548
|
-
|
|
549
|
-
|
|
550
|
-
|
|
551
|
-
else:
|
|
552
|
-
iterator_class, iterator_args = iterator
|
|
553
|
-
view = View._create(
|
|
554
|
-
dir._id,
|
|
555
|
-
path.name,
|
|
556
|
-
base=base,
|
|
557
|
-
select_list=select_list,
|
|
558
|
-
additional_columns=additional_columns,
|
|
559
|
-
predicate=where,
|
|
560
|
-
sample_clause=sample_clause,
|
|
561
|
-
is_snapshot=is_snapshot,
|
|
562
|
-
iterator_cls=iterator_class,
|
|
563
|
-
iterator_args=iterator_args,
|
|
564
|
-
num_retained_versions=num_retained_versions,
|
|
565
|
-
comment=comment,
|
|
566
|
-
media_validation=media_validation,
|
|
567
|
-
)
|
|
568
|
-
FileCache.get().emit_eviction_warnings()
|
|
569
|
-
self._tbls[view._id] = view
|
|
570
|
-
return view
|
|
1068
|
+
existing = self._handle_path_collision(path, View, is_snapshot, if_exists)
|
|
1069
|
+
if existing is not None:
|
|
1070
|
+
assert isinstance(existing, View)
|
|
1071
|
+
return existing._id
|
|
571
1072
|
|
|
572
|
-
|
|
573
|
-
|
|
574
|
-
|
|
575
|
-
|
|
1073
|
+
dir = self._get_schema_object(path.parent, expected=Dir, raise_if_not_exists=True)
|
|
1074
|
+
assert dir is not None
|
|
1075
|
+
if iterator is None:
|
|
1076
|
+
iterator_class, iterator_args = None, None
|
|
1077
|
+
else:
|
|
1078
|
+
iterator_class, iterator_args = iterator
|
|
1079
|
+
md, ops = View._create(
|
|
1080
|
+
dir._id,
|
|
1081
|
+
path.name,
|
|
1082
|
+
base=base,
|
|
1083
|
+
select_list=select_list,
|
|
1084
|
+
additional_columns=additional_columns,
|
|
1085
|
+
predicate=where,
|
|
1086
|
+
sample_clause=sample_clause,
|
|
1087
|
+
is_snapshot=is_snapshot,
|
|
1088
|
+
create_default_idxs=create_default_idxs,
|
|
1089
|
+
iterator_cls=iterator_class,
|
|
1090
|
+
iterator_args=iterator_args,
|
|
1091
|
+
num_retained_versions=num_retained_versions,
|
|
1092
|
+
comment=comment,
|
|
1093
|
+
media_validation=media_validation,
|
|
1094
|
+
)
|
|
1095
|
+
tbl_id = UUID(md.tbl_md.tbl_id)
|
|
1096
|
+
self.store_tbl_md(tbl_id, dir._id, md.tbl_md, md.version_md, md.schema_version_md, ops)
|
|
1097
|
+
return tbl_id
|
|
1098
|
+
|
|
1099
|
+
view_id = create_fn()
|
|
1100
|
+
if not is_snapshot and base.is_mutable():
|
|
1101
|
+
# invalidate base's TableVersion instance, so that it gets reloaded with the new mutable view
|
|
1102
|
+
self._clear_tv_cache(base.tbl_id, base.tbl_version.effective_version)
|
|
1103
|
+
# base_tv = self.get_tbl_version(base.tbl_id, base.tbl_version.effective_version, validate_initialized=True)
|
|
1104
|
+
# view_handle = TableVersionHandle(view_id, effective_version=None)
|
|
1105
|
+
# base_tv.mutable_views.add(view_handle)
|
|
1106
|
+
|
|
1107
|
+
# finalize pending ops
|
|
1108
|
+
with self.begin_xact(tbl_id=view_id, for_write=True, finalize_pending_ops=True):
|
|
1109
|
+
return self.get_table_by_id(view_id)
|
|
1110
|
+
|
|
1111
|
+
def _clear_tv_cache(self, tbl_id: UUID, effective_version: int | None) -> None:
|
|
1112
|
+
if (tbl_id, effective_version) in self._tbl_versions:
|
|
1113
|
+
tv = self._tbl_versions[tbl_id, effective_version]
|
|
1114
|
+
tv.is_validated = False
|
|
1115
|
+
del self._tbl_versions[tbl_id, effective_version]
|
|
1116
|
+
|
|
1117
|
+
def create_replica(self, path: Path, md: list[TableVersionCompleteMd]) -> None:
|
|
576
1118
|
"""
|
|
577
1119
|
Creates table, table_version, and table_schema_version records for a replica with the given metadata.
|
|
578
1120
|
The metadata should be presented in standard "ancestor order", with the table being replicated at
|
|
579
1121
|
list position 0 and the (root) base table at list position -1.
|
|
580
|
-
|
|
581
|
-
TODO: create_replica() also needs to create the store tables and populate them in order to make
|
|
582
|
-
replica creation atomic.
|
|
583
1122
|
"""
|
|
1123
|
+
assert self.in_write_xact
|
|
1124
|
+
|
|
584
1125
|
tbl_id = UUID(md[0].tbl_md.tbl_id)
|
|
585
1126
|
|
|
586
|
-
|
|
587
|
-
existing
|
|
588
|
-
|
|
589
|
-
|
|
590
|
-
|
|
591
|
-
|
|
592
|
-
'but a different table already exists at that location.'
|
|
593
|
-
)
|
|
594
|
-
assert isinstance(existing, View)
|
|
595
|
-
return
|
|
1127
|
+
existing = self._handle_path_collision(path, Table, False, if_exists=IfExistsParam.IGNORE) # type: ignore[type-abstract]
|
|
1128
|
+
if existing is not None and existing._id != tbl_id:
|
|
1129
|
+
raise excs.Error(
|
|
1130
|
+
f'An attempt was made to create a replica table at {path!r}, '
|
|
1131
|
+
'but a different table already exists at that location.'
|
|
1132
|
+
)
|
|
596
1133
|
|
|
597
1134
|
# Ensure that the system directory exists.
|
|
598
|
-
self.
|
|
1135
|
+
self.__ensure_system_dir_exists()
|
|
599
1136
|
|
|
600
1137
|
# Now check to see if this table already exists in the catalog.
|
|
601
|
-
existing =
|
|
1138
|
+
existing = self.get_table_by_id(tbl_id)
|
|
602
1139
|
if existing is not None:
|
|
603
|
-
existing_path = Path(existing._path(),
|
|
604
|
-
|
|
605
|
-
|
|
1140
|
+
existing_path = Path.parse(existing._path(), allow_system_path=True)
|
|
1141
|
+
if existing_path != path and not existing_path.is_system_path:
|
|
1142
|
+
# It does exist, under a different path from the specified one.
|
|
606
1143
|
raise excs.Error(
|
|
607
|
-
f'That table has already been replicated as {
|
|
1144
|
+
f'That table has already been replicated as {existing_path!r}.\n'
|
|
608
1145
|
f'Drop the existing replica if you wish to re-create it.'
|
|
609
1146
|
)
|
|
610
|
-
# If it's a system table, then this means it was created at some point as the ancestor of some other
|
|
611
|
-
# table (a snapshot-over-snapshot scenario). In that case, we simply move it to the new (named) location.
|
|
612
|
-
self._move(existing_path, path)
|
|
613
1147
|
|
|
614
|
-
# Now store the metadata for this replica
|
|
615
|
-
# into a named location), this will be a no-op, but it still serves to validate that the newly received
|
|
616
|
-
# metadata is identical to what's in the catalog.
|
|
617
|
-
self.__store_replica_md(path, md[0])
|
|
618
|
-
|
|
619
|
-
# Now store the metadata for all of this table's proper ancestors. If one or more proper ancestors
|
|
1148
|
+
# Now store the metadata for this replica's proper ancestors. If one or more proper ancestors
|
|
620
1149
|
# do not yet exist in the store, they will be created as anonymous system tables.
|
|
621
|
-
|
|
1150
|
+
# We instantiate the ancestors starting with the base table and ending with the immediate parent of the
|
|
1151
|
+
# table being replicated.
|
|
1152
|
+
for ancestor_md in md[:0:-1]:
|
|
622
1153
|
ancestor_id = UUID(ancestor_md.tbl_md.tbl_id)
|
|
623
|
-
replica =
|
|
1154
|
+
replica = self.get_table_by_id(ancestor_id)
|
|
624
1155
|
replica_path: Path
|
|
625
1156
|
if replica is None:
|
|
626
1157
|
# We've never seen this table before. Create a new anonymous system table for it.
|
|
627
|
-
replica_path = Path(f'_system.replica_{ancestor_id.hex}',
|
|
1158
|
+
replica_path = Path.parse(f'_system.replica_{ancestor_id.hex}', allow_system_path=True)
|
|
628
1159
|
else:
|
|
629
1160
|
# The table already exists in the catalog. The existing path might be a system path (if the table
|
|
630
1161
|
# was created as an anonymous base table of some other table), or it might not (if it's a snapshot
|
|
631
1162
|
# that was directly replicated by the user at some point). In either case, use the existing path.
|
|
632
|
-
replica_path = Path(replica._path(),
|
|
1163
|
+
replica_path = Path.parse(replica._path(), allow_system_path=True)
|
|
633
1164
|
|
|
634
|
-
# Store the metadata; it could be a new version (in which case a new record will be created) or a
|
|
635
|
-
#
|
|
1165
|
+
# Store the metadata; it could be a new version (in which case a new record will be created), or a known
|
|
1166
|
+
# version (in which case the newly received metadata will be validated as identical).
|
|
1167
|
+
# If it's a new version, this will result in a new TableVersion record being created.
|
|
636
1168
|
self.__store_replica_md(replica_path, ancestor_md)
|
|
637
1169
|
|
|
638
|
-
|
|
639
|
-
|
|
1170
|
+
# Now we must clear cached metadata for the ancestor table, to force the next table operation to pick up
|
|
1171
|
+
# the new TableVersion instance. This is necessary because computed columns of descendant tables might
|
|
1172
|
+
# reference columns of the ancestor table that only exist in the new version.
|
|
1173
|
+
replica = Catalog.get().get_table_by_id(ancestor_id)
|
|
1174
|
+
# assert replica is not None # If it didn't exist before, it must have been created by now.
|
|
1175
|
+
if replica is not None:
|
|
1176
|
+
replica._tbl_version_path.clear_cached_md()
|
|
1177
|
+
|
|
1178
|
+
# Store the metadata for the table being replicated; as before, it could be a new version or a known version.
|
|
1179
|
+
# If it's a new version, then a TableVersion record will be created, unless the table being replicated
|
|
1180
|
+
# is a pure snapshot.
|
|
1181
|
+
self.__store_replica_md(path, md[0])
|
|
640
1182
|
|
|
641
|
-
|
|
1183
|
+
# Finally, it's possible that the table already exists in the catalog, but as an anonymous system table that
|
|
1184
|
+
# was hidden the last time we checked (and that just became visible when the replica was imported). In this
|
|
1185
|
+
# case, we need to make the existing table visible by moving it to the specified path.
|
|
1186
|
+
# We need to do this at the end, since `existing_path` needs to first have a non-fragment table version in
|
|
1187
|
+
# order to be instantiated as a schema object.
|
|
1188
|
+
existing = self.get_table_by_id(tbl_id)
|
|
1189
|
+
assert existing is not None
|
|
1190
|
+
existing_path = Path.parse(existing._path(), allow_system_path=True)
|
|
1191
|
+
if existing_path != path:
|
|
1192
|
+
assert existing_path.is_system_path
|
|
1193
|
+
self._move(existing_path, path, IfExistsParam.ERROR, IfNotExistsParam.ERROR)
|
|
1194
|
+
|
|
1195
|
+
def __ensure_system_dir_exists(self) -> Dir:
|
|
1196
|
+
system_path = Path.parse('_system', allow_system_path=True)
|
|
1197
|
+
return self._create_dir(system_path, if_exists=IfExistsParam.IGNORE, parents=False)
|
|
1198
|
+
|
|
1199
|
+
def __store_replica_md(self, path: Path, md: TableVersionCompleteMd) -> None:
|
|
642
1200
|
_logger.info(f'Creating replica table at {path!r} with ID: {md.tbl_md.tbl_id}')
|
|
643
1201
|
dir = self._get_schema_object(path.parent, expected=Dir, raise_if_not_exists=True)
|
|
644
1202
|
assert dir is not None
|
|
@@ -647,9 +1205,10 @@ class Catalog:
|
|
|
647
1205
|
conn = Env.get().conn
|
|
648
1206
|
tbl_id = md.tbl_md.tbl_id
|
|
649
1207
|
|
|
650
|
-
new_tbl_md:
|
|
651
|
-
new_version_md:
|
|
652
|
-
new_schema_version_md:
|
|
1208
|
+
new_tbl_md: schema.TableMd | None = None
|
|
1209
|
+
new_version_md: schema.TableVersionMd | None = None
|
|
1210
|
+
new_schema_version_md: schema.TableSchemaVersionMd | None = None
|
|
1211
|
+
is_new_tbl_version: bool = False
|
|
653
1212
|
|
|
654
1213
|
# We need to ensure that the table metadata in the catalog always reflects the latest observed version of
|
|
655
1214
|
# this table. (In particular, if this is a base table, then its table metadata need to be consistent
|
|
@@ -673,35 +1232,45 @@ class Catalog:
|
|
|
673
1232
|
# New metadata is more recent than the metadata currently stored in the DB; we'll update the record
|
|
674
1233
|
# in place in the DB.
|
|
675
1234
|
new_tbl_md = dataclasses.replace(md.tbl_md, name=path.name, user=Env.get().user, is_replica=True)
|
|
676
|
-
|
|
677
1235
|
# Now see if a TableVersion record already exists in the DB for this table version. If not, insert it. If
|
|
678
1236
|
# it already exists, check that the existing record is identical to the new one.
|
|
679
1237
|
q = (
|
|
680
1238
|
sql.select(schema.TableVersion.md)
|
|
681
1239
|
.where(schema.TableVersion.tbl_id == tbl_id)
|
|
682
|
-
.where(
|
|
1240
|
+
.where(schema.TableVersion.md['version'].cast(sql.Integer) == md.version_md.version)
|
|
683
1241
|
)
|
|
684
1242
|
existing_version_md_row = conn.execute(q).one_or_none()
|
|
685
1243
|
if existing_version_md_row is None:
|
|
686
1244
|
new_version_md = md.version_md
|
|
1245
|
+
is_new_tbl_version = True
|
|
687
1246
|
else:
|
|
688
1247
|
existing_version_md = schema.md_from_dict(schema.TableVersionMd, existing_version_md_row.md)
|
|
689
|
-
|
|
1248
|
+
# Validate that the existing metadata are identical to the new metadata, except is_fragment
|
|
1249
|
+
# and additional_md which may differ.
|
|
1250
|
+
if (
|
|
1251
|
+
dataclasses.replace(
|
|
1252
|
+
existing_version_md,
|
|
1253
|
+
is_fragment=md.version_md.is_fragment,
|
|
1254
|
+
additional_md=md.version_md.additional_md,
|
|
1255
|
+
)
|
|
1256
|
+
!= md.version_md
|
|
1257
|
+
):
|
|
690
1258
|
raise excs.Error(
|
|
691
1259
|
f'The version metadata for the replica {path!r}:{md.version_md.version} is inconsistent with '
|
|
692
1260
|
'the metadata recorded from a prior replica.\n'
|
|
693
1261
|
'This is likely due to data corruption in the replicated table.'
|
|
694
1262
|
)
|
|
1263
|
+
if existing_version_md.is_fragment and not md.version_md.is_fragment:
|
|
1264
|
+
# This version exists in the DB as a fragment, but we're importing a complete copy of the same version;
|
|
1265
|
+
# set the is_fragment flag to False in the DB.
|
|
1266
|
+
new_version_md = md.version_md
|
|
695
1267
|
|
|
696
1268
|
# Do the same thing for TableSchemaVersion.
|
|
697
1269
|
q = (
|
|
698
1270
|
sql.select(schema.TableSchemaVersion.md)
|
|
699
1271
|
.where(schema.TableSchemaVersion.tbl_id == tbl_id)
|
|
700
1272
|
.where(
|
|
701
|
-
sql.
|
|
702
|
-
f"({schema.TableSchemaVersion.__table__}.md->>'schema_version')::int = "
|
|
703
|
-
f'{md.schema_version_md.schema_version}'
|
|
704
|
-
)
|
|
1273
|
+
schema.TableSchemaVersion.md['schema_version'].cast(sql.Integer) == md.schema_version_md.schema_version
|
|
705
1274
|
)
|
|
706
1275
|
)
|
|
707
1276
|
existing_schema_version_md_row = conn.execute(q).one_or_none()
|
|
@@ -711,6 +1280,7 @@ class Catalog:
|
|
|
711
1280
|
existing_schema_version_md = schema.md_from_dict(
|
|
712
1281
|
schema.TableSchemaVersionMd, existing_schema_version_md_row.md
|
|
713
1282
|
)
|
|
1283
|
+
# Validate that the existing metadata are identical to the new metadata.
|
|
714
1284
|
if existing_schema_version_md != md.schema_version_md:
|
|
715
1285
|
raise excs.Error(
|
|
716
1286
|
f'The schema version metadata for the replica {path!r}:{md.schema_version_md.schema_version} '
|
|
@@ -718,75 +1288,167 @@ class Catalog:
|
|
|
718
1288
|
'This is likely due to data corruption in the replicated table.'
|
|
719
1289
|
)
|
|
720
1290
|
|
|
721
|
-
self.store_tbl_md(UUID(tbl_id), new_tbl_md, new_version_md, new_schema_version_md)
|
|
1291
|
+
self.store_tbl_md(UUID(tbl_id), None, new_tbl_md, new_version_md, new_schema_version_md)
|
|
722
1292
|
|
|
723
|
-
|
|
724
|
-
|
|
725
|
-
|
|
726
|
-
|
|
1293
|
+
if is_new_tbl_version and not md.is_pure_snapshot:
|
|
1294
|
+
# It's a new version of a table that has a physical store, so we need to create a TableVersion instance.
|
|
1295
|
+
TableVersion.create_replica(md)
|
|
1296
|
+
|
|
1297
|
+
@retry_loop(for_write=False)
|
|
1298
|
+
def get_table(self, path: Path, if_not_exists: IfNotExistsParam) -> Table | None:
|
|
1299
|
+
obj = Catalog.get()._get_schema_object(
|
|
1300
|
+
path, expected=Table, raise_if_not_exists=(if_not_exists == IfNotExistsParam.ERROR)
|
|
1301
|
+
)
|
|
1302
|
+
if obj is None:
|
|
1303
|
+
_logger.info(f'Skipped table {path!r} (does not exist).')
|
|
1304
|
+
return None
|
|
727
1305
|
|
|
728
|
-
def _get_table(self, path: Path) -> Table:
|
|
729
|
-
obj = Catalog.get()._get_schema_object(path, expected=Table, raise_if_not_exists=True)
|
|
730
1306
|
assert isinstance(obj, Table)
|
|
731
|
-
|
|
732
|
-
#
|
|
733
|
-
|
|
734
|
-
tbl_version.ensure_md_loaded()
|
|
735
|
-
# if this table has mutable views, we need to load those as well, in order to record column dependencies
|
|
736
|
-
for v in tbl_version.mutable_views:
|
|
737
|
-
self.get_table_by_id(v.id)
|
|
1307
|
+
# We need to clear cached metadata from tbl_version_path, in case the schema has been changed
|
|
1308
|
+
# by another process.
|
|
1309
|
+
obj._tbl_version_path.clear_cached_md()
|
|
738
1310
|
return obj
|
|
739
1311
|
|
|
740
|
-
@
|
|
1312
|
+
@retry_loop(for_write=True)
|
|
741
1313
|
def drop_table(self, path: Path, if_not_exists: IfNotExistsParam, force: bool) -> None:
|
|
742
|
-
|
|
743
|
-
|
|
744
|
-
|
|
745
|
-
|
|
746
|
-
|
|
1314
|
+
tbl = self._get_schema_object(
|
|
1315
|
+
path,
|
|
1316
|
+
expected=Table,
|
|
1317
|
+
raise_if_not_exists=(if_not_exists == IfNotExistsParam.ERROR and not force),
|
|
1318
|
+
lock_parent=True,
|
|
1319
|
+
lock_obj=False,
|
|
747
1320
|
)
|
|
748
|
-
if
|
|
749
|
-
_logger.info(f'Skipped table {
|
|
1321
|
+
if tbl is None:
|
|
1322
|
+
_logger.info(f'Skipped table {path!r} (does not exist).')
|
|
750
1323
|
return
|
|
751
|
-
assert isinstance(
|
|
752
|
-
|
|
1324
|
+
assert isinstance(tbl, Table)
|
|
1325
|
+
|
|
1326
|
+
if isinstance(tbl, View) and tbl._tbl_version_path.is_mutable() and tbl._tbl_version_path.base.is_mutable():
|
|
1327
|
+
# this is a mutable view of a mutable base;
|
|
1328
|
+
# lock the base before the view, in order to avoid deadlocks with concurrent inserts/updates
|
|
1329
|
+
base_id = tbl._tbl_version_path.base.tbl_id
|
|
1330
|
+
self._acquire_tbl_lock(tbl_id=base_id, for_write=True, lock_mutable_tree=False)
|
|
753
1331
|
|
|
754
|
-
|
|
1332
|
+
self._drop_tbl(tbl, force=force, is_replace=False)
|
|
1333
|
+
|
|
1334
|
+
def _drop_tbl(self, tbl: Table | TableVersionPath, force: bool, is_replace: bool) -> None:
|
|
755
1335
|
"""
|
|
756
1336
|
Drop the table (and recursively its views, if force == True).
|
|
757
1337
|
|
|
1338
|
+
`tbl` can be an instance of `Table` for a user table, or `TableVersionPath` for a hidden (system) table.
|
|
1339
|
+
|
|
758
1340
|
Locking protocol:
|
|
759
1341
|
- X-lock base before X-locking any view
|
|
760
1342
|
- deadlock-free wrt to TableVersion.insert() (insert propagation also proceeds top-down)
|
|
761
1343
|
- X-locks parent dir prior to calling TableVersion.drop(): prevent concurrent creation of another SchemaObject
|
|
762
|
-
in the same directory with the same name (which could lead to duplicate names if we get
|
|
1344
|
+
in the same directory with the same name (which could lead to duplicate names if we get aborted)
|
|
763
1345
|
"""
|
|
764
|
-
|
|
1346
|
+
is_pure_snapshot: bool
|
|
1347
|
+
if isinstance(tbl, TableVersionPath):
|
|
1348
|
+
tvp = tbl
|
|
1349
|
+
tbl_id = tvp.tbl_id
|
|
1350
|
+
tbl = None
|
|
1351
|
+
is_pure_snapshot = False
|
|
1352
|
+
else:
|
|
1353
|
+
tvp = tbl._tbl_version_path
|
|
1354
|
+
tbl_id = tbl._id
|
|
1355
|
+
is_pure_snapshot = tbl._tbl_version is None
|
|
1356
|
+
|
|
1357
|
+
if tbl is not None:
|
|
1358
|
+
self._acquire_dir_xlock(dir_id=tbl._dir_id)
|
|
1359
|
+
self._acquire_tbl_lock(tbl_id=tbl_id, for_write=True, lock_mutable_tree=False)
|
|
1360
|
+
|
|
1361
|
+
view_ids = self.get_view_ids(tbl_id, for_update=True)
|
|
1362
|
+
is_replica = tvp.is_replica()
|
|
1363
|
+
do_drop = True
|
|
1364
|
+
|
|
1365
|
+
_logger.debug(f'Preparing to drop table {tbl_id} (force={force!r}, is_replica={is_replica}).')
|
|
1366
|
+
|
|
765
1367
|
if len(view_ids) > 0:
|
|
766
|
-
if
|
|
767
|
-
|
|
768
|
-
|
|
1368
|
+
if force:
|
|
1369
|
+
# recursively drop views first
|
|
1370
|
+
for view_id in view_ids:
|
|
1371
|
+
view = self.get_table_by_id(view_id)
|
|
1372
|
+
self._drop_tbl(view, force=force, is_replace=is_replace)
|
|
1373
|
+
|
|
1374
|
+
elif is_replica:
|
|
1375
|
+
# Dropping a replica with dependents and no 'force': just rename it to be a hidden table;
|
|
1376
|
+
# the actual table will not be dropped.
|
|
1377
|
+
assert tbl is not None # can only occur for a user table
|
|
1378
|
+
system_dir = self.__ensure_system_dir_exists()
|
|
1379
|
+
new_name = f'replica_{tbl_id.hex}'
|
|
1380
|
+
_logger.debug(f'{tbl._path()!r} is a replica with dependents; renaming to {new_name!r}.')
|
|
1381
|
+
tbl._move(new_name, system_dir._id)
|
|
1382
|
+
do_drop = False # don't actually clear the catalog for this table
|
|
1383
|
+
|
|
1384
|
+
else:
|
|
1385
|
+
# It has dependents but is not a replica and no 'force', so it's an error to drop it.
|
|
1386
|
+
assert tbl is not None # can only occur for a user table
|
|
769
1387
|
msg: str
|
|
770
1388
|
if is_replace:
|
|
771
1389
|
msg = (
|
|
772
|
-
f'{
|
|
1390
|
+
f'{tbl._display_str()} already exists and has dependents. '
|
|
773
1391
|
"Use `if_exists='replace_force'` to replace it."
|
|
774
1392
|
)
|
|
775
1393
|
else:
|
|
776
|
-
msg = f'{
|
|
1394
|
+
msg = f'{tbl._display_str()} has dependents.'
|
|
777
1395
|
raise excs.Error(msg)
|
|
778
1396
|
|
|
779
|
-
|
|
780
|
-
|
|
781
|
-
|
|
1397
|
+
# if this is a mutable view of a mutable base, advance the base's view_sn
|
|
1398
|
+
if isinstance(tbl, View) and tvp.is_mutable() and tvp.base.is_mutable():
|
|
1399
|
+
base_id = tvp.base.tbl_id
|
|
1400
|
+
base_tv = self.get_tbl_version(base_id, None, validate_initialized=True)
|
|
1401
|
+
base_tv.tbl_md.view_sn += 1
|
|
1402
|
+
self.mark_modified_tvs(base_tv.handle)
|
|
1403
|
+
result = Env.get().conn.execute(
|
|
1404
|
+
sql.update(schema.Table.__table__)
|
|
1405
|
+
.values({schema.Table.md: dataclasses.asdict(base_tv.tbl_md)})
|
|
1406
|
+
.where(schema.Table.id == base_id)
|
|
1407
|
+
)
|
|
1408
|
+
assert result.rowcount == 1, result.rowcount
|
|
782
1409
|
|
|
783
|
-
|
|
784
|
-
|
|
785
|
-
|
|
786
|
-
|
|
787
|
-
|
|
1410
|
+
if do_drop:
|
|
1411
|
+
if not is_pure_snapshot:
|
|
1412
|
+
# invalidate the TableVersion instance when we're done so that existing references to it can find out it
|
|
1413
|
+
# has been dropped
|
|
1414
|
+
self.mark_modified_tvs(tvp.tbl_version)
|
|
1415
|
+
tv = tvp.tbl_version.get() if tvp.tbl_version is not None else None
|
|
1416
|
+
if not is_pure_snapshot:
|
|
1417
|
+
# drop the store table before deleting the Table record
|
|
1418
|
+
tv = tvp.tbl_version.get()
|
|
1419
|
+
tv.drop()
|
|
1420
|
+
|
|
1421
|
+
self.delete_tbl_md(tbl_id)
|
|
1422
|
+
tvp.clear_cached_md()
|
|
1423
|
+
|
|
1424
|
+
assert (
|
|
1425
|
+
is_replica
|
|
1426
|
+
or (tbl_id, None) in self._tbls # non-replica tables must have an entry with effective_version=None
|
|
1427
|
+
)
|
|
788
1428
|
|
|
789
|
-
|
|
1429
|
+
# Remove visible Table references (we do this even for a replica that was just renamed).
|
|
1430
|
+
versions = [version for id, version in self._tbls if id == tbl_id]
|
|
1431
|
+
for version in versions:
|
|
1432
|
+
del self._tbls[tbl_id, version]
|
|
1433
|
+
|
|
1434
|
+
_logger.info(f'Dropped table {tbl_id if tbl is None else repr(tbl._path())}.')
|
|
1435
|
+
|
|
1436
|
+
if (
|
|
1437
|
+
is_replica # if this is a replica,
|
|
1438
|
+
and do_drop # and it was actually dropped (not just renamed),
|
|
1439
|
+
and tvp.base is not None # and it has a base table,
|
|
1440
|
+
):
|
|
1441
|
+
base_tbl = self.get_table_by_id(tvp.base.tbl_id)
|
|
1442
|
+
base_tbl_path = None if base_tbl is None else Path.parse(base_tbl._path(), allow_system_path=True)
|
|
1443
|
+
if (
|
|
1444
|
+
(base_tbl_path is None or base_tbl_path.is_system_path) # and the base table is hidden,
|
|
1445
|
+
and len(self.get_view_ids(tvp.base.tbl_id, for_update=True)) == 0 # and has no other dependents,
|
|
1446
|
+
):
|
|
1447
|
+
# then drop the base table as well (possibly recursively).
|
|
1448
|
+
_logger.debug(f'Dropping hidden base table {tvp.base.tbl_id} of dropped replica {tbl_id}.')
|
|
1449
|
+
self._drop_tbl(tvp.base, force=False, is_replace=False)
|
|
1450
|
+
|
|
1451
|
+
@retry_loop(for_write=True)
|
|
790
1452
|
def create_dir(self, path: Path, if_exists: IfExistsParam, parents: bool) -> Dir:
|
|
791
1453
|
return self._create_dir(path, if_exists, parents)
|
|
792
1454
|
|
|
@@ -799,12 +1461,12 @@ class Catalog:
|
|
|
799
1461
|
# parent = self._get_schema_object(path.parent)
|
|
800
1462
|
# assert parent is not None
|
|
801
1463
|
# dir = Dir._create(parent._id, path.name)
|
|
802
|
-
# Env.get().console_logger.info(f'Created directory {
|
|
1464
|
+
# Env.get().console_logger.info(f'Created directory {path!r}.')
|
|
803
1465
|
# return dir
|
|
804
1466
|
|
|
805
1467
|
if parents:
|
|
806
1468
|
# start walking down from the root
|
|
807
|
-
last_parent:
|
|
1469
|
+
last_parent: SchemaObject | None = None
|
|
808
1470
|
for ancestor in path.ancestors():
|
|
809
1471
|
ancestor_obj = self._get_schema_object(ancestor, expected=Dir)
|
|
810
1472
|
assert ancestor_obj is not None or last_parent is not None
|
|
@@ -818,10 +1480,10 @@ class Catalog:
|
|
|
818
1480
|
return existing
|
|
819
1481
|
assert parent is not None
|
|
820
1482
|
dir = Dir._create(parent._id, path.name)
|
|
821
|
-
Env.get().console_logger.info(f'Created directory {
|
|
1483
|
+
Env.get().console_logger.info(f'Created directory {path!r}.')
|
|
822
1484
|
return dir
|
|
823
1485
|
|
|
824
|
-
@
|
|
1486
|
+
@retry_loop(for_write=True)
|
|
825
1487
|
def drop_dir(self, path: Path, if_not_exists: IfNotExistsParam, force: bool) -> None:
|
|
826
1488
|
_, _, schema_obj = self._prepare_dir_op(
|
|
827
1489
|
drop_dir_path=path.parent,
|
|
@@ -830,7 +1492,7 @@ class Catalog:
|
|
|
830
1492
|
raise_if_not_exists=if_not_exists == IfNotExistsParam.ERROR and not force,
|
|
831
1493
|
)
|
|
832
1494
|
if schema_obj is None:
|
|
833
|
-
_logger.info(f'Directory {
|
|
1495
|
+
_logger.info(f'Directory {path!r} does not exist; skipped drop_dir().')
|
|
834
1496
|
return
|
|
835
1497
|
self._drop_dir(schema_obj._id, path, force=force)
|
|
836
1498
|
|
|
@@ -843,10 +1505,10 @@ class Catalog:
|
|
|
843
1505
|
q = sql.select(sql.func.count()).select_from(schema.Table).where(schema.Table.dir_id == dir_id)
|
|
844
1506
|
num_tbls = conn.execute(q).scalar()
|
|
845
1507
|
if num_subdirs + num_tbls > 0:
|
|
846
|
-
raise excs.Error(f'Directory {
|
|
1508
|
+
raise excs.Error(f'Directory {dir_path!r} is not empty.')
|
|
847
1509
|
|
|
848
1510
|
# drop existing subdirs
|
|
849
|
-
self._acquire_dir_xlock(dir_id
|
|
1511
|
+
self._acquire_dir_xlock(dir_id=dir_id)
|
|
850
1512
|
dir_q = sql.select(schema.Dir).where(schema.Dir.parent_id == dir_id)
|
|
851
1513
|
for row in conn.execute(dir_q).all():
|
|
852
1514
|
self._drop_dir(row.id, dir_path.append(row.md['name']), force=True)
|
|
@@ -861,59 +1523,82 @@ class Catalog:
|
|
|
861
1523
|
|
|
862
1524
|
# self.drop_dir(dir_id)
|
|
863
1525
|
conn.execute(sql.delete(schema.Dir).where(schema.Dir.id == dir_id))
|
|
864
|
-
_logger.info(f'Removed directory {
|
|
1526
|
+
_logger.info(f'Removed directory {dir_path!r}.')
|
|
865
1527
|
|
|
866
1528
|
def get_view_ids(self, tbl_id: UUID, for_update: bool = False) -> list[UUID]:
|
|
867
1529
|
"""Return the ids of views that directly reference the given table"""
|
|
868
1530
|
conn = Env.get().conn
|
|
869
|
-
|
|
1531
|
+
# check whether this table still exists
|
|
1532
|
+
q = sql.select(sql.func.count()).select_from(schema.Table).where(schema.Table.id == tbl_id)
|
|
1533
|
+
tbl_count = conn.execute(q).scalar()
|
|
1534
|
+
if tbl_count == 0:
|
|
1535
|
+
raise excs.Error(self._dropped_tbl_error_msg(tbl_id))
|
|
1536
|
+
q = sql.select(schema.Table.id).where(schema.Table.md['view_md']['base_versions'][0][0].astext == tbl_id.hex)
|
|
870
1537
|
if for_update:
|
|
871
1538
|
q = q.with_for_update()
|
|
872
1539
|
result = [r[0] for r in conn.execute(q).all()]
|
|
873
1540
|
return result
|
|
874
1541
|
|
|
875
|
-
def get_tbl_version(
|
|
1542
|
+
def get_tbl_version(
|
|
1543
|
+
self,
|
|
1544
|
+
tbl_id: UUID,
|
|
1545
|
+
effective_version: int | None,
|
|
1546
|
+
check_pending_ops: bool | None = None,
|
|
1547
|
+
validate_initialized: bool = False,
|
|
1548
|
+
) -> TableVersion | None:
|
|
1549
|
+
"""
|
|
1550
|
+
Returns the TableVersion instance for the given table and version and updates the cache.
|
|
1551
|
+
|
|
1552
|
+
If present in the cache and the instance isn't validated, validates version and view_sn against the stored
|
|
1553
|
+
metadata.
|
|
1554
|
+
"""
|
|
876
1555
|
# we need a transaction here, if we're not already in one; if this starts a new transaction,
|
|
877
1556
|
# the returned TableVersion instance will not be validated
|
|
878
|
-
with self.begin_xact(
|
|
1557
|
+
with self.begin_xact(for_write=False) as conn:
|
|
879
1558
|
tv = self._tbl_versions.get((tbl_id, effective_version))
|
|
880
1559
|
if tv is None:
|
|
881
|
-
tv = self._load_tbl_version(tbl_id, effective_version)
|
|
1560
|
+
tv = self._load_tbl_version(tbl_id, effective_version, check_pending_ops=check_pending_ops)
|
|
882
1561
|
elif not tv.is_validated:
|
|
883
1562
|
# only live instances are invalidated
|
|
884
1563
|
assert effective_version is None
|
|
885
|
-
# we validate live instances by comparing our cached
|
|
1564
|
+
# we validate live instances by comparing our cached TableMd.current_version/view_sn to what's stored
|
|
886
1565
|
# _logger.debug(f'validating metadata for table {tbl_id}:{tv.version} ({id(tv):x})')
|
|
887
1566
|
q = sql.select(schema.Table.md).where(schema.Table.id == tbl_id)
|
|
888
|
-
row = conn.execute(q).
|
|
889
|
-
|
|
1567
|
+
row = conn.execute(q).one_or_none()
|
|
1568
|
+
if row is None:
|
|
1569
|
+
raise excs.Error(self._dropped_tbl_error_msg(tbl_id))
|
|
1570
|
+
current_version, view_sn = row.md['current_version'], row.md['view_sn']
|
|
890
1571
|
|
|
891
1572
|
# the stored version can be behind TableVersion.version, because we don't roll back the in-memory
|
|
892
1573
|
# metadata changes after a failed update operation
|
|
893
|
-
if current_version != tv.version:
|
|
1574
|
+
if current_version != tv.version or view_sn != tv.tbl_md.view_sn:
|
|
894
1575
|
# the cached metadata is invalid
|
|
895
1576
|
_logger.debug(
|
|
896
1577
|
f'reloading metadata for table {tbl_id} '
|
|
897
|
-
f'(cached version: {tv.version}
|
|
898
|
-
|
|
1578
|
+
f'(cached/current version: {tv.version}/{current_version}, '
|
|
1579
|
+
f'cached/current view_sn: {tv.tbl_md.view_sn}/{view_sn})'
|
|
899
1580
|
)
|
|
900
|
-
tv = self._load_tbl_version(tbl_id, None)
|
|
1581
|
+
tv = self._load_tbl_version(tbl_id, None, check_pending_ops=check_pending_ops)
|
|
901
1582
|
else:
|
|
902
1583
|
# the cached metadata is valid
|
|
903
1584
|
tv.is_validated = True
|
|
904
1585
|
|
|
905
|
-
assert tv.is_validated
|
|
1586
|
+
assert tv.is_validated, f'{tbl_id}:{effective_version} not validated\n{tv.__dict__}\n{self._debug_str()}'
|
|
1587
|
+
if validate_initialized:
|
|
1588
|
+
assert tv.is_initialized, (
|
|
1589
|
+
f'{tbl_id}:{effective_version} not initialized\n{tv.__dict__}\n{self._debug_str()}'
|
|
1590
|
+
)
|
|
906
1591
|
return tv
|
|
907
1592
|
|
|
908
1593
|
def remove_tbl_version(self, tbl_version: TableVersion) -> None:
|
|
909
1594
|
assert (tbl_version.id, tbl_version.effective_version) in self._tbl_versions
|
|
910
1595
|
del self._tbl_versions[tbl_version.id, tbl_version.effective_version]
|
|
911
1596
|
|
|
912
|
-
def get_dir(self, dir_id: UUID, for_update: bool = False) ->
|
|
1597
|
+
def get_dir(self, dir_id: UUID, for_update: bool = False) -> Dir | None:
|
|
913
1598
|
"""Return the Dir with the given id, or None if it doesn't exist"""
|
|
914
1599
|
conn = Env.get().conn
|
|
915
1600
|
if for_update:
|
|
916
|
-
self._acquire_dir_xlock(
|
|
1601
|
+
self._acquire_dir_xlock(dir_id=dir_id)
|
|
917
1602
|
q = sql.select(schema.Dir).where(schema.Dir.id == dir_id)
|
|
918
1603
|
row = conn.execute(q).one_or_none()
|
|
919
1604
|
if row is None:
|
|
@@ -921,24 +1606,24 @@ class Catalog:
|
|
|
921
1606
|
dir_record = schema.Dir(**row._mapping)
|
|
922
1607
|
return Dir(dir_record.id, dir_record.parent_id, dir_record.md['name'])
|
|
923
1608
|
|
|
924
|
-
def _get_dir(self, path: Path,
|
|
1609
|
+
def _get_dir(self, path: Path, lock_dir: bool = False) -> schema.Dir | None:
|
|
925
1610
|
"""
|
|
926
|
-
|
|
1611
|
+
lock_dir: if True, X-locks target (but not the ancestors)
|
|
927
1612
|
"""
|
|
928
1613
|
user = Env.get().user
|
|
929
1614
|
conn = Env.get().conn
|
|
930
1615
|
if path.is_root:
|
|
931
|
-
if
|
|
932
|
-
self._acquire_dir_xlock(
|
|
1616
|
+
if lock_dir:
|
|
1617
|
+
self._acquire_dir_xlock(dir_name='')
|
|
933
1618
|
q = sql.select(schema.Dir).where(schema.Dir.parent_id.is_(None), schema.Dir.md['user'].astext == user)
|
|
934
1619
|
row = conn.execute(q).one_or_none()
|
|
935
1620
|
return schema.Dir(**row._mapping) if row is not None else None
|
|
936
1621
|
else:
|
|
937
|
-
parent_dir = self._get_dir(path.parent,
|
|
1622
|
+
parent_dir = self._get_dir(path.parent, lock_dir=False)
|
|
938
1623
|
if parent_dir is None:
|
|
939
1624
|
return None
|
|
940
|
-
if
|
|
941
|
-
self._acquire_dir_xlock(parent_id=parent_dir.id,
|
|
1625
|
+
if lock_dir:
|
|
1626
|
+
self._acquire_dir_xlock(parent_id=parent_dir.id, dir_name=path.name)
|
|
942
1627
|
q = sql.select(schema.Dir).where(
|
|
943
1628
|
schema.Dir.parent_id == parent_dir.id,
|
|
944
1629
|
schema.Dir.md['name'].astext == path.name,
|
|
@@ -947,66 +1632,198 @@ class Catalog:
|
|
|
947
1632
|
row = conn.execute(q).one_or_none()
|
|
948
1633
|
return schema.Dir(**row._mapping) if row is not None else None
|
|
949
1634
|
|
|
950
|
-
def _load_tbl(self, tbl_id: UUID) ->
|
|
1635
|
+
def _load_tbl(self, tbl_id: UUID) -> Table | None:
|
|
951
1636
|
"""Loads metadata for the table with the given id and caches it."""
|
|
952
1637
|
_logger.info(f'Loading table {tbl_id}')
|
|
953
1638
|
from .insertable_table import InsertableTable
|
|
954
1639
|
from .view import View
|
|
955
1640
|
|
|
956
1641
|
conn = Env.get().conn
|
|
957
|
-
|
|
1642
|
+
|
|
1643
|
+
# check for pending ops
|
|
1644
|
+
q = sql.select(sql.func.count()).where(schema.PendingTableOp.tbl_id == tbl_id)
|
|
1645
|
+
has_pending_ops = conn.execute(q).scalar() > 0
|
|
1646
|
+
if has_pending_ops:
|
|
1647
|
+
raise PendingTableOpsError(tbl_id)
|
|
1648
|
+
|
|
1649
|
+
q: sql.Executable = (
|
|
958
1650
|
sql.select(schema.Table, schema.TableSchemaVersion)
|
|
959
1651
|
.join(schema.TableSchemaVersion)
|
|
960
1652
|
.where(schema.Table.id == schema.TableSchemaVersion.tbl_id)
|
|
961
|
-
# Table.md['current_schema_version'] == TableSchemaVersion.schema_version
|
|
962
1653
|
.where(
|
|
963
|
-
sql.
|
|
964
|
-
f"({schema.Table.__table__}.md->>'current_schema_version')::int = "
|
|
965
|
-
f'{schema.TableSchemaVersion.__table__}.{schema.TableSchemaVersion.schema_version.name}'
|
|
966
|
-
)
|
|
1654
|
+
schema.Table.md['current_schema_version'].cast(sql.Integer) == schema.TableSchemaVersion.schema_version
|
|
967
1655
|
)
|
|
968
1656
|
.where(schema.Table.id == tbl_id)
|
|
969
1657
|
)
|
|
970
1658
|
row = conn.execute(q).one_or_none()
|
|
971
1659
|
if row is None:
|
|
972
1660
|
return None
|
|
973
|
-
tbl_record,
|
|
1661
|
+
tbl_record, _ = _unpack_row(row, [schema.Table, schema.TableSchemaVersion])
|
|
974
1662
|
|
|
975
1663
|
tbl_md = schema.md_from_dict(schema.TableMd, tbl_record.md)
|
|
976
1664
|
view_md = tbl_md.view_md
|
|
977
|
-
|
|
978
|
-
|
|
1665
|
+
|
|
1666
|
+
if tbl_md.is_replica and not tbl_md.is_snapshot:
|
|
1667
|
+
# If this is a non-snapshot replica, we have to load it as a specific version handle. This is because:
|
|
1668
|
+
# (1) the head version might be a version fragment that isn't user-accessible, and
|
|
1669
|
+
# (2) the cached data in view_md.base_versions is not reliable, since the replicated version does not
|
|
1670
|
+
# necessarily track the head version of the originally shared table.
|
|
1671
|
+
|
|
1672
|
+
# Query for the latest non-fragment table version.
|
|
1673
|
+
q = (
|
|
1674
|
+
sql.select(schema.TableVersion.version)
|
|
1675
|
+
.where(schema.TableVersion.tbl_id == tbl_id)
|
|
1676
|
+
.where(schema.TableVersion.md['is_fragment'].astext == 'false')
|
|
1677
|
+
.order_by(schema.TableVersion.md['version'].cast(sql.Integer).desc())
|
|
1678
|
+
.limit(1)
|
|
1679
|
+
)
|
|
1680
|
+
row = conn.execute(q).one_or_none()
|
|
1681
|
+
if row is not None:
|
|
1682
|
+
version = row[0]
|
|
1683
|
+
return self._load_tbl_at_version(tbl_id, version)
|
|
1684
|
+
return None
|
|
1685
|
+
|
|
1686
|
+
if view_md is None and not tbl_md.is_replica:
|
|
1687
|
+
# this is a base, non-replica table
|
|
979
1688
|
if (tbl_id, None) not in self._tbl_versions:
|
|
980
1689
|
_ = self._load_tbl_version(tbl_id, None)
|
|
981
1690
|
tbl = InsertableTable(tbl_record.dir_id, TableVersionHandle(tbl_id, None))
|
|
982
|
-
self._tbls[tbl_id] = tbl
|
|
1691
|
+
self._tbls[tbl_id, None] = tbl
|
|
983
1692
|
return tbl
|
|
984
1693
|
|
|
985
1694
|
# this is a view; determine the sequence of TableVersions to load
|
|
986
|
-
tbl_version_path: list[tuple[UUID,
|
|
987
|
-
|
|
988
|
-
pure_snapshot = view_md.is_snapshot and view_md.predicate is None and len(schema_version_md.columns) == 0
|
|
989
|
-
if pure_snapshot:
|
|
1695
|
+
tbl_version_path: list[tuple[UUID, int | None]] = []
|
|
1696
|
+
if tbl_md.is_pure_snapshot:
|
|
990
1697
|
# this is a pure snapshot, without a physical table backing it; we only need the bases
|
|
991
1698
|
pass
|
|
992
1699
|
else:
|
|
993
|
-
effective_version =
|
|
1700
|
+
effective_version = (
|
|
1701
|
+
0 if view_md is not None and view_md.is_snapshot else None
|
|
1702
|
+
) # snapshots only have version 0
|
|
994
1703
|
tbl_version_path.append((tbl_id, effective_version))
|
|
995
|
-
|
|
1704
|
+
if view_md is not None:
|
|
1705
|
+
tbl_version_path.extend((UUID(tbl_id), version) for tbl_id, version in view_md.base_versions)
|
|
996
1706
|
|
|
997
1707
|
# load TableVersions, starting at the root
|
|
998
|
-
base_path:
|
|
999
|
-
view_path:
|
|
1708
|
+
base_path: TableVersionPath | None = None
|
|
1709
|
+
view_path: TableVersionPath | None = None
|
|
1000
1710
|
for id, effective_version in tbl_version_path[::-1]:
|
|
1001
1711
|
if (id, effective_version) not in self._tbl_versions:
|
|
1002
1712
|
_ = self._load_tbl_version(id, effective_version)
|
|
1003
1713
|
view_path = TableVersionPath(TableVersionHandle(id, effective_version), base=base_path)
|
|
1004
1714
|
base_path = view_path
|
|
1005
|
-
view = View(tbl_id, tbl_record.dir_id, tbl_md.name, view_path, snapshot_only=
|
|
1006
|
-
self._tbls[tbl_id] = view
|
|
1715
|
+
view = View(tbl_id, tbl_record.dir_id, tbl_md.name, view_path, snapshot_only=tbl_md.is_pure_snapshot)
|
|
1716
|
+
self._tbls[tbl_id, None] = view
|
|
1007
1717
|
return view
|
|
1008
1718
|
|
|
1009
|
-
def
|
|
1719
|
+
def _load_tbl_at_version(self, tbl_id: UUID, version: int) -> Table | None:
|
|
1720
|
+
from .view import View
|
|
1721
|
+
|
|
1722
|
+
# Load the specified TableMd and TableVersionMd records from the db.
|
|
1723
|
+
conn = Env.get().conn
|
|
1724
|
+
q: sql.Executable = (
|
|
1725
|
+
sql.select(schema.Table, schema.TableVersion)
|
|
1726
|
+
.join(schema.TableVersion)
|
|
1727
|
+
.where(schema.Table.id == tbl_id)
|
|
1728
|
+
.where(schema.Table.id == schema.TableVersion.tbl_id)
|
|
1729
|
+
.where(schema.TableVersion.version == version)
|
|
1730
|
+
)
|
|
1731
|
+
row = conn.execute(q).one_or_none()
|
|
1732
|
+
if row is None:
|
|
1733
|
+
return None
|
|
1734
|
+
tbl_record, version_record = _unpack_row(row, [schema.Table, schema.TableVersion])
|
|
1735
|
+
tbl_md = schema.md_from_dict(schema.TableMd, tbl_record.md)
|
|
1736
|
+
version_md = schema.md_from_dict(schema.TableVersionMd, version_record.md)
|
|
1737
|
+
tvp = self.construct_tvp(tbl_id, version, tbl_md.ancestor_ids, version_md.created_at)
|
|
1738
|
+
|
|
1739
|
+
view = View(tbl_id, tbl_record.dir_id, tbl_md.name, tvp, snapshot_only=True)
|
|
1740
|
+
self._tbls[tbl_id, version] = view
|
|
1741
|
+
return view
|
|
1742
|
+
|
|
1743
|
+
def construct_tvp(self, tbl_id: UUID, version: int, ancestor_ids: list[str], created_at: float) -> TableVersionPath:
|
|
1744
|
+
# Construct the TableVersionPath for the specified TableVersion. We do this by examining the created_at
|
|
1745
|
+
# timestamps of this table and all its ancestors.
|
|
1746
|
+
# TODO: Store the relevant TableVersionPaths in the database, so that we don't need to rely on timestamps
|
|
1747
|
+
# (which might be nondeterministic in the future).
|
|
1748
|
+
|
|
1749
|
+
assert Env.get().conn is not None
|
|
1750
|
+
|
|
1751
|
+
# Build the list of ancestor versions, starting with the given table and traversing back to the base table.
|
|
1752
|
+
# For each proper ancestor, we use the version whose created_at timestamp equals or most nearly precedes the
|
|
1753
|
+
# given TableVersion's created_at timestamp.
|
|
1754
|
+
ancestors: list[tuple[UUID, int]] = [(tbl_id, version)]
|
|
1755
|
+
for ancestor_id in ancestor_ids:
|
|
1756
|
+
q = (
|
|
1757
|
+
sql.select(schema.TableVersion)
|
|
1758
|
+
.where(schema.TableVersion.tbl_id == ancestor_id)
|
|
1759
|
+
.where(schema.TableVersion.md['created_at'].cast(sql.Float) <= created_at)
|
|
1760
|
+
.order_by(schema.TableVersion.md['created_at'].cast(sql.Float).desc())
|
|
1761
|
+
.limit(1)
|
|
1762
|
+
)
|
|
1763
|
+
row = Env.get().conn.execute(q).one_or_none()
|
|
1764
|
+
if row is None:
|
|
1765
|
+
# This can happen if an ancestor version is garbage collected; it can also happen in
|
|
1766
|
+
# rare circumstances involving table versions created specifically with Pixeltable 0.4.3.
|
|
1767
|
+
_logger.info(f'Ancestor {ancestor_id} not found for table {tbl_id}:{version}')
|
|
1768
|
+
raise excs.Error('The specified table version is no longer valid and cannot be retrieved.')
|
|
1769
|
+
ancestor_version_record = _unpack_row(row, [schema.TableVersion])[0]
|
|
1770
|
+
ancestor_version_md = schema.md_from_dict(schema.TableVersionMd, ancestor_version_record.md)
|
|
1771
|
+
assert ancestor_version_md.created_at <= created_at
|
|
1772
|
+
ancestors.append((UUID(ancestor_id), ancestor_version_md.version))
|
|
1773
|
+
|
|
1774
|
+
# Force any ancestors to be loaded (base table first).
|
|
1775
|
+
for anc_id, anc_version in ancestors[::-1]:
|
|
1776
|
+
if (anc_id, anc_version) not in self._tbl_versions:
|
|
1777
|
+
_ = self._load_tbl_version(anc_id, anc_version)
|
|
1778
|
+
|
|
1779
|
+
# Now reconstruct the relevant TableVersionPath instance from the ancestor versions.
|
|
1780
|
+
tvp: TableVersionPath | None = None
|
|
1781
|
+
for anc_id, anc_version in ancestors[::-1]:
|
|
1782
|
+
tvp = TableVersionPath(TableVersionHandle(anc_id, anc_version), base=tvp)
|
|
1783
|
+
|
|
1784
|
+
return tvp
|
|
1785
|
+
|
|
1786
|
+
@retry_loop(for_write=False)
|
|
1787
|
+
def collect_tbl_history(self, tbl_id: UUID, n: int | None) -> list[TableVersionCompleteMd]:
|
|
1788
|
+
return self._collect_tbl_history(tbl_id, n)
|
|
1789
|
+
|
|
1790
|
+
def _collect_tbl_history(self, tbl_id: UUID, n: int | None) -> list[TableVersionCompleteMd]:
|
|
1791
|
+
"""
|
|
1792
|
+
Returns the history of up to n versions of the table with the given UUID.
|
|
1793
|
+
|
|
1794
|
+
Args:
|
|
1795
|
+
tbl_id: the UUID of the table to collect history for.
|
|
1796
|
+
n: Optional limit on the maximum number of versions returned.
|
|
1797
|
+
|
|
1798
|
+
Returns:
|
|
1799
|
+
A sequence of rows, ordered by version number
|
|
1800
|
+
Each row contains a TableVersion and a TableSchemaVersion object.
|
|
1801
|
+
"""
|
|
1802
|
+
q = (
|
|
1803
|
+
sql.select(schema.Table, schema.TableVersion, schema.TableSchemaVersion)
|
|
1804
|
+
.where(schema.Table.id == tbl_id)
|
|
1805
|
+
.join(schema.TableVersion)
|
|
1806
|
+
.where(schema.TableVersion.tbl_id == tbl_id)
|
|
1807
|
+
.join(schema.TableSchemaVersion)
|
|
1808
|
+
.where(schema.TableSchemaVersion.tbl_id == tbl_id)
|
|
1809
|
+
.where(
|
|
1810
|
+
schema.TableVersion.md['schema_version'].cast(sql.Integer) == schema.TableSchemaVersion.schema_version
|
|
1811
|
+
)
|
|
1812
|
+
.order_by(schema.TableVersion.version.desc())
|
|
1813
|
+
)
|
|
1814
|
+
if n is not None:
|
|
1815
|
+
q = q.limit(n)
|
|
1816
|
+
src_rows = Env.get().session.execute(q).fetchall()
|
|
1817
|
+
return [
|
|
1818
|
+
TableVersionCompleteMd(
|
|
1819
|
+
tbl_md=schema.md_from_dict(schema.TableMd, row.Table.md),
|
|
1820
|
+
version_md=schema.md_from_dict(schema.TableVersionMd, row.TableVersion.md),
|
|
1821
|
+
schema_version_md=schema.md_from_dict(schema.TableSchemaVersionMd, row.TableSchemaVersion.md),
|
|
1822
|
+
)
|
|
1823
|
+
for row in src_rows
|
|
1824
|
+
]
|
|
1825
|
+
|
|
1826
|
+
def load_tbl_md(self, tbl_id: UUID, effective_version: int | None) -> TableVersionCompleteMd:
|
|
1010
1827
|
"""
|
|
1011
1828
|
Loads metadata from the store for a given table UUID and version.
|
|
1012
1829
|
"""
|
|
@@ -1030,13 +1847,9 @@ class Catalog:
|
|
|
1030
1847
|
# JOIN TableVersion tv ON (tv.tbl_id = tbl_id AND tv.version = effective_version)
|
|
1031
1848
|
# JOIN TableSchemaVersion tsv ON (tsv.tbl_id = tbl_id AND tv.md.schema_version = tsv.schema_version)
|
|
1032
1849
|
# WHERE t.id = tbl_id
|
|
1033
|
-
q = q.where(
|
|
1034
|
-
sql.
|
|
1035
|
-
|
|
1036
|
-
f"({schema.TableVersion.__table__}.md->>'schema_version')::int = "
|
|
1037
|
-
f'{schema.TableSchemaVersion.__table__}.{schema.TableSchemaVersion.schema_version.name}'
|
|
1038
|
-
)
|
|
1039
|
-
)
|
|
1850
|
+
q = q.where(
|
|
1851
|
+
schema.TableVersion.md['version'].cast(sql.Integer) == effective_version,
|
|
1852
|
+
schema.TableVersion.md['schema_version'].cast(sql.Integer) == schema.TableSchemaVersion.schema_version,
|
|
1040
1853
|
)
|
|
1041
1854
|
else:
|
|
1042
1855
|
# we are loading the current version
|
|
@@ -1046,21 +1859,13 @@ class Catalog:
|
|
|
1046
1859
|
# JOIN TableSchemaVersion tsv ON (tsv.tbl_id = tbl_id AND t.current_schema_version = tsv.schema_version)
|
|
1047
1860
|
# WHERE t.id = tbl_id
|
|
1048
1861
|
q = q.where(
|
|
1049
|
-
sql.
|
|
1050
|
-
|
|
1051
|
-
f'{schema.TableVersion.__table__}.{schema.TableVersion.version.name}'
|
|
1052
|
-
)
|
|
1053
|
-
).where(
|
|
1054
|
-
sql.text(
|
|
1055
|
-
(
|
|
1056
|
-
f"({schema.Table.__table__}.md->>'current_schema_version')::int = "
|
|
1057
|
-
f'{schema.TableSchemaVersion.__table__}.{schema.TableSchemaVersion.schema_version.name}'
|
|
1058
|
-
)
|
|
1059
|
-
)
|
|
1862
|
+
schema.Table.md['current_version'].cast(sql.Integer) == schema.TableVersion.version,
|
|
1863
|
+
schema.Table.md['current_schema_version'].cast(sql.Integer) == schema.TableSchemaVersion.schema_version,
|
|
1060
1864
|
)
|
|
1061
1865
|
|
|
1062
1866
|
row = conn.execute(q).one_or_none()
|
|
1063
|
-
|
|
1867
|
+
if row is None:
|
|
1868
|
+
raise excs.Error(self._dropped_tbl_error_msg(tbl_id))
|
|
1064
1869
|
tbl_record, version_record, schema_version_record = _unpack_row(
|
|
1065
1870
|
row, [schema.Table, schema.TableVersion, schema.TableSchemaVersion]
|
|
1066
1871
|
)
|
|
@@ -1069,24 +1874,36 @@ class Catalog:
|
|
|
1069
1874
|
version_md = schema.md_from_dict(schema.TableVersionMd, version_record.md)
|
|
1070
1875
|
schema_version_md = schema.md_from_dict(schema.TableSchemaVersionMd, schema_version_record.md)
|
|
1071
1876
|
|
|
1072
|
-
return
|
|
1877
|
+
return TableVersionCompleteMd(tbl_md, version_md, schema_version_md)
|
|
1073
1878
|
|
|
1074
1879
|
def store_tbl_md(
|
|
1075
1880
|
self,
|
|
1076
1881
|
tbl_id: UUID,
|
|
1077
|
-
|
|
1078
|
-
|
|
1079
|
-
|
|
1882
|
+
dir_id: UUID | None,
|
|
1883
|
+
tbl_md: schema.TableMd | None,
|
|
1884
|
+
version_md: schema.TableVersionMd | None,
|
|
1885
|
+
schema_version_md: schema.TableSchemaVersionMd | None,
|
|
1886
|
+
pending_ops: list[TableOp] | None = None,
|
|
1080
1887
|
) -> None:
|
|
1081
1888
|
"""
|
|
1082
|
-
Stores metadata to the DB.
|
|
1083
|
-
|
|
1889
|
+
Stores metadata to the DB.
|
|
1890
|
+
|
|
1891
|
+
Args:
|
|
1892
|
+
tbl_id: UUID of the table to store metadata for.
|
|
1893
|
+
dir_id: If specified, the tbl_md will be added to the given directory; if None, the table must already exist
|
|
1894
|
+
tbl_md: If specified, `tbl_md` will be inserted, or updated (only one such record can exist per UUID)
|
|
1895
|
+
version_md: inserted as a new record if present
|
|
1896
|
+
schema_version_md: will be inserted as a new record if present
|
|
1084
1897
|
|
|
1085
1898
|
If inserting `version_md` or `schema_version_md` would be a primary key violation, an exception will be raised.
|
|
1086
1899
|
"""
|
|
1087
|
-
conn = Env.get().conn
|
|
1088
1900
|
assert self._in_write_xact
|
|
1901
|
+
assert version_md is None or version_md.created_at > 0.0
|
|
1902
|
+
assert pending_ops is None or len(pending_ops) > 0
|
|
1903
|
+
assert pending_ops is None or tbl_md is not None # if we write pending ops, we must also write new tbl_md
|
|
1904
|
+
session = Env.get().session
|
|
1089
1905
|
|
|
1906
|
+
# Construct and insert or update table record if requested.
|
|
1090
1907
|
if tbl_md is not None:
|
|
1091
1908
|
assert tbl_md.tbl_id == str(tbl_id)
|
|
1092
1909
|
if version_md is not None:
|
|
@@ -1094,32 +1911,88 @@ class Catalog:
|
|
|
1094
1911
|
assert tbl_md.current_schema_version == version_md.schema_version
|
|
1095
1912
|
if schema_version_md is not None:
|
|
1096
1913
|
assert tbl_md.current_schema_version == schema_version_md.schema_version
|
|
1097
|
-
|
|
1098
|
-
|
|
1099
|
-
|
|
1100
|
-
|
|
1101
|
-
|
|
1102
|
-
|
|
1914
|
+
if pending_ops is not None:
|
|
1915
|
+
tbl_md.has_pending_ops = True
|
|
1916
|
+
|
|
1917
|
+
if dir_id is not None:
|
|
1918
|
+
# We are inserting a record while creating a new table.
|
|
1919
|
+
tbl_record = schema.Table(id=tbl_id, dir_id=dir_id, md=dataclasses.asdict(tbl_md))
|
|
1920
|
+
session.add(tbl_record)
|
|
1921
|
+
else:
|
|
1922
|
+
# Update the existing table record.
|
|
1923
|
+
result = session.execute(
|
|
1924
|
+
sql.update(schema.Table.__table__)
|
|
1925
|
+
.values({schema.Table.md: dataclasses.asdict(tbl_md)})
|
|
1926
|
+
.where(schema.Table.id == tbl_id)
|
|
1927
|
+
)
|
|
1928
|
+
assert isinstance(result, sql.CursorResult)
|
|
1929
|
+
assert result.rowcount == 1, result.rowcount
|
|
1103
1930
|
|
|
1931
|
+
# Construct and insert new table version record if requested.
|
|
1104
1932
|
if version_md is not None:
|
|
1105
1933
|
assert version_md.tbl_id == str(tbl_id)
|
|
1106
1934
|
if schema_version_md is not None:
|
|
1107
1935
|
assert version_md.schema_version == schema_version_md.schema_version
|
|
1108
|
-
|
|
1109
|
-
|
|
1936
|
+
tv_rows = (
|
|
1937
|
+
session.query(schema.TableVersion)
|
|
1938
|
+
.filter(schema.TableVersion.tbl_id == tbl_id, schema.TableVersion.version == version_md.version)
|
|
1939
|
+
.all()
|
|
1940
|
+
)
|
|
1941
|
+
if len(tv_rows) == 0:
|
|
1942
|
+
# It's a new table version; insert a new record in the DB for it.
|
|
1943
|
+
tbl_version_record = schema.TableVersion(
|
|
1110
1944
|
tbl_id=tbl_id, version=version_md.version, md=dataclasses.asdict(version_md)
|
|
1111
1945
|
)
|
|
1112
|
-
|
|
1946
|
+
session.add(tbl_version_record)
|
|
1947
|
+
else:
|
|
1948
|
+
# This table version already exists; update it.
|
|
1949
|
+
assert len(tv_rows) == 1 # must be unique
|
|
1950
|
+
tv = tv_rows[0]
|
|
1951
|
+
# Validate that the only fields that can change are 'is_fragment' and 'additional_md'.
|
|
1952
|
+
assert tv.md == dataclasses.asdict(
|
|
1953
|
+
dataclasses.replace(
|
|
1954
|
+
version_md, is_fragment=tv.md['is_fragment'], additional_md=tv.md['additional_md']
|
|
1955
|
+
)
|
|
1956
|
+
)
|
|
1957
|
+
result = session.execute(
|
|
1958
|
+
sql.update(schema.TableVersion.__table__)
|
|
1959
|
+
.values({schema.TableVersion.md: dataclasses.asdict(version_md)})
|
|
1960
|
+
.where(schema.TableVersion.tbl_id == tbl_id, schema.TableVersion.version == version_md.version)
|
|
1961
|
+
)
|
|
1962
|
+
assert isinstance(result, sql.CursorResult)
|
|
1963
|
+
assert result.rowcount == 1, result.rowcount
|
|
1113
1964
|
|
|
1965
|
+
# Construct and insert a new schema version record if requested.
|
|
1114
1966
|
if schema_version_md is not None:
|
|
1115
1967
|
assert schema_version_md.tbl_id == str(tbl_id)
|
|
1116
|
-
|
|
1117
|
-
|
|
1118
|
-
tbl_id=tbl_id,
|
|
1119
|
-
schema_version=schema_version_md.schema_version,
|
|
1120
|
-
md=dataclasses.asdict(schema_version_md),
|
|
1121
|
-
)
|
|
1968
|
+
schema_version_record = schema.TableSchemaVersion(
|
|
1969
|
+
tbl_id=tbl_id, schema_version=schema_version_md.schema_version, md=dataclasses.asdict(schema_version_md)
|
|
1122
1970
|
)
|
|
1971
|
+
session.add(schema_version_record)
|
|
1972
|
+
|
|
1973
|
+
# make sure we don't have any pending ops
|
|
1974
|
+
assert session.query(schema.PendingTableOp).filter(schema.PendingTableOp.tbl_id == tbl_id).count() == 0
|
|
1975
|
+
|
|
1976
|
+
if pending_ops is not None:
|
|
1977
|
+
for op in pending_ops:
|
|
1978
|
+
op_record = schema.PendingTableOp(tbl_id=tbl_id, op_sn=op.op_sn, op=dataclasses.asdict(op))
|
|
1979
|
+
session.add(op_record)
|
|
1980
|
+
|
|
1981
|
+
session.flush() # Inform SQLAlchemy that we want to write these changes to the DB.
|
|
1982
|
+
|
|
1983
|
+
def store_update_status(self, tbl_id: UUID, version: int, status: UpdateStatus) -> None:
|
|
1984
|
+
"""Update the TableVersion.md.update_status field"""
|
|
1985
|
+
assert self._in_write_xact
|
|
1986
|
+
conn = Env.get().conn
|
|
1987
|
+
|
|
1988
|
+
stmt = (
|
|
1989
|
+
sql.update(schema.TableVersion)
|
|
1990
|
+
.where(schema.TableVersion.tbl_id == tbl_id, schema.TableVersion.version == version)
|
|
1991
|
+
.values(md=schema.TableVersion.md.op('||')({'update_status': dataclasses.asdict(status)}))
|
|
1992
|
+
)
|
|
1993
|
+
|
|
1994
|
+
res = conn.execute(stmt)
|
|
1995
|
+
assert res.rowcount == 1, res.rowcount
|
|
1123
1996
|
|
|
1124
1997
|
def delete_tbl_md(self, tbl_id: UUID) -> None:
|
|
1125
1998
|
"""
|
|
@@ -1128,9 +2001,10 @@ class Catalog:
|
|
|
1128
2001
|
conn = Env.get().conn
|
|
1129
2002
|
conn.execute(sql.delete(schema.TableSchemaVersion.__table__).where(schema.TableSchemaVersion.tbl_id == tbl_id))
|
|
1130
2003
|
conn.execute(sql.delete(schema.TableVersion.__table__).where(schema.TableVersion.tbl_id == tbl_id))
|
|
2004
|
+
conn.execute(sql.delete(schema.PendingTableOp.__table__).where(schema.PendingTableOp.tbl_id == tbl_id))
|
|
1131
2005
|
conn.execute(sql.delete(schema.Table.__table__).where(schema.Table.id == tbl_id))
|
|
1132
2006
|
|
|
1133
|
-
def load_replica_md(self, tbl: Table) -> list[
|
|
2007
|
+
def load_replica_md(self, tbl: Table) -> list[TableVersionCompleteMd]:
|
|
1134
2008
|
"""
|
|
1135
2009
|
Load metadata for the given table along with all its ancestors. The values of TableMd.current_version and
|
|
1136
2010
|
TableMd.current_schema_version will be adjusted to ensure that the metadata represent a valid (internally
|
|
@@ -1143,57 +2017,87 @@ class Catalog:
|
|
|
1143
2017
|
|
|
1144
2018
|
# If `tbl` is a named pure snapshot, we're not quite done, since the snapshot metadata won't appear in the
|
|
1145
2019
|
# TableVersionPath. We need to prepend it separately.
|
|
1146
|
-
if tbl
|
|
2020
|
+
if isinstance(tbl, View) and tbl._is_named_pure_snapshot():
|
|
1147
2021
|
snapshot_md = self.load_tbl_md(tbl._id, 0)
|
|
1148
2022
|
md = [snapshot_md, *md]
|
|
1149
2023
|
|
|
1150
|
-
for ancestor_md in md
|
|
2024
|
+
for ancestor_md in md:
|
|
2025
|
+
# Set the `is_replica` flag on every ancestor's TableMd.
|
|
2026
|
+
ancestor_md.tbl_md.is_replica = True
|
|
1151
2027
|
# For replica metadata, we guarantee that the current_version and current_schema_version of TableMd
|
|
1152
|
-
# match the corresponding values in
|
|
2028
|
+
# match the corresponding values in TableVersionCompleteMd and TableSchemaVersionMd. This is to ensure that,
|
|
1153
2029
|
# when the metadata is later stored in the catalog of a different Pixeltable instance, the values of
|
|
1154
2030
|
# current_version and current_schema_version will always point to versions that are known to the
|
|
1155
2031
|
# destination catalog.
|
|
1156
2032
|
ancestor_md.tbl_md.current_version = ancestor_md.version_md.version
|
|
1157
2033
|
ancestor_md.tbl_md.current_schema_version = ancestor_md.schema_version_md.schema_version
|
|
1158
2034
|
|
|
2035
|
+
for ancestor_md in md[1:]:
|
|
2036
|
+
# Also, the table version of every proper ancestor is emphemeral; it does not represent a queryable
|
|
2037
|
+
# table version (the data might be incomplete, since we have only retrieved one of its views, not
|
|
2038
|
+
# the table itself).
|
|
2039
|
+
ancestor_md.version_md.is_fragment = True
|
|
2040
|
+
|
|
1159
2041
|
return md
|
|
1160
2042
|
|
|
1161
|
-
def _load_tbl_version(
|
|
2043
|
+
def _load_tbl_version(
|
|
2044
|
+
self, tbl_id: UUID, effective_version: int | None, check_pending_ops: bool = True
|
|
2045
|
+
) -> TableVersion | None:
|
|
1162
2046
|
"""Creates TableVersion instance from stored metadata and registers it in _tbl_versions."""
|
|
1163
|
-
|
|
2047
|
+
table_version_md = self.load_tbl_md(tbl_id, effective_version)
|
|
2048
|
+
tbl_md = table_version_md.tbl_md
|
|
2049
|
+
version_md = table_version_md.version_md
|
|
2050
|
+
schema_version_md = table_version_md.schema_version_md
|
|
1164
2051
|
view_md = tbl_md.view_md
|
|
1165
2052
|
|
|
1166
2053
|
conn = Env.get().conn
|
|
1167
2054
|
|
|
2055
|
+
if check_pending_ops:
|
|
2056
|
+
pending_ops_q = (
|
|
2057
|
+
sql.select(sql.func.count())
|
|
2058
|
+
.select_from(schema.Table)
|
|
2059
|
+
.join(schema.PendingTableOp)
|
|
2060
|
+
.where(schema.PendingTableOp.tbl_id == tbl_id)
|
|
2061
|
+
.where(schema.Table.id == tbl_id)
|
|
2062
|
+
)
|
|
2063
|
+
if effective_version is not None:
|
|
2064
|
+
# we only care about pending ops if the requested version is the current version
|
|
2065
|
+
pending_ops_q = pending_ops_q.where(
|
|
2066
|
+
sql.text(f"({schema.Table.__table__}.md->>'current_version')::int = {effective_version}")
|
|
2067
|
+
)
|
|
2068
|
+
has_pending_ops = conn.execute(pending_ops_q).scalar() > 0
|
|
2069
|
+
if has_pending_ops:
|
|
2070
|
+
raise PendingTableOpsError(tbl_id)
|
|
2071
|
+
|
|
1168
2072
|
# load mutable view ids for mutable TableVersions
|
|
1169
2073
|
mutable_view_ids: list[UUID] = []
|
|
1170
|
-
# If this is a replica, effective_version should not be None. We see this today, because
|
|
1171
|
-
# the replica's TV instance's Column instances contain value_expr_dicts that reference the live version.
|
|
1172
|
-
# This is presumably a source of bugs, because it ignores schema version changes (eg, column renames).
|
|
1173
|
-
# TODO: retarget the value_expr_dict when instantiating Columns for a particular TV instance.
|
|
1174
2074
|
if effective_version is None and not tbl_md.is_replica:
|
|
1175
|
-
q =
|
|
1176
|
-
sql.
|
|
1177
|
-
|
|
1178
|
-
|
|
1179
|
-
)
|
|
2075
|
+
q = (
|
|
2076
|
+
sql.select(schema.Table.id)
|
|
2077
|
+
.where(schema.Table.md['view_md']['base_versions'][0][0].astext == tbl_id.hex)
|
|
2078
|
+
.where(schema.Table.md['view_md']['base_versions'][0][1].astext == None)
|
|
1180
2079
|
)
|
|
1181
2080
|
mutable_view_ids = [r[0] for r in conn.execute(q).all()]
|
|
2081
|
+
|
|
1182
2082
|
mutable_views = [TableVersionHandle(id, None) for id in mutable_view_ids]
|
|
1183
2083
|
|
|
1184
2084
|
tbl_version: TableVersion
|
|
1185
2085
|
if view_md is None:
|
|
1186
2086
|
# this is a base table
|
|
1187
|
-
tbl_version = TableVersion(
|
|
1188
|
-
tbl_id, tbl_md, effective_version, schema_version_md, mutable_views=mutable_views
|
|
1189
|
-
)
|
|
2087
|
+
tbl_version = TableVersion(tbl_id, tbl_md, version_md, effective_version, schema_version_md, mutable_views)
|
|
1190
2088
|
else:
|
|
1191
2089
|
assert len(view_md.base_versions) > 0 # a view needs to have a base
|
|
1192
|
-
|
|
2090
|
+
# TODO: add TableVersionCompleteMd.is_pure_snapshot() and use that
|
|
2091
|
+
pure_snapshot = (
|
|
2092
|
+
view_md.is_snapshot
|
|
2093
|
+
and view_md.predicate is None
|
|
2094
|
+
and view_md.sample_clause is None
|
|
2095
|
+
and len(schema_version_md.columns) == 0
|
|
2096
|
+
)
|
|
1193
2097
|
assert not pure_snapshot # a pure snapshot doesn't have a physical table backing it, no point in loading it
|
|
1194
2098
|
|
|
1195
2099
|
base: TableVersionHandle
|
|
1196
|
-
base_path:
|
|
2100
|
+
base_path: TableVersionPath | None = None # needed for live view
|
|
1197
2101
|
if view_md.is_snapshot:
|
|
1198
2102
|
base = TableVersionHandle(UUID(view_md.base_versions[0][0]), view_md.base_versions[0][1])
|
|
1199
2103
|
else:
|
|
@@ -1203,16 +2107,20 @@ class Catalog:
|
|
|
1203
2107
|
tbl_version = TableVersion(
|
|
1204
2108
|
tbl_id,
|
|
1205
2109
|
tbl_md,
|
|
2110
|
+
version_md,
|
|
1206
2111
|
effective_version,
|
|
1207
2112
|
schema_version_md,
|
|
2113
|
+
mutable_views,
|
|
1208
2114
|
base_path=base_path,
|
|
1209
2115
|
base=base,
|
|
1210
|
-
mutable_views=mutable_views,
|
|
1211
2116
|
)
|
|
1212
2117
|
|
|
2118
|
+
# register the instance before init()
|
|
1213
2119
|
self._tbl_versions[tbl_id, effective_version] = tbl_version
|
|
2120
|
+
# register this instance as modified, so that it gets purged if the transaction fails, it may not be
|
|
2121
|
+
# fully initialized
|
|
2122
|
+
self.mark_modified_tvs(tbl_version.handle)
|
|
1214
2123
|
tbl_version.init()
|
|
1215
|
-
|
|
1216
2124
|
return tbl_version
|
|
1217
2125
|
|
|
1218
2126
|
def _init_store(self) -> None:
|
|
@@ -1220,7 +2128,7 @@ class Catalog:
|
|
|
1220
2128
|
self.create_user(None)
|
|
1221
2129
|
_logger.info('Initialized catalog.')
|
|
1222
2130
|
|
|
1223
|
-
def create_user(self, user:
|
|
2131
|
+
def create_user(self, user: str | None) -> None:
|
|
1224
2132
|
"""
|
|
1225
2133
|
Creates a catalog record (root directory) for the specified user, if one does not already exist.
|
|
1226
2134
|
"""
|
|
@@ -1239,18 +2147,24 @@ class Catalog:
|
|
|
1239
2147
|
|
|
1240
2148
|
def _handle_path_collision(
|
|
1241
2149
|
self, path: Path, expected_obj_type: type[SchemaObject], expected_snapshot: bool, if_exists: IfExistsParam
|
|
1242
|
-
) ->
|
|
2150
|
+
) -> SchemaObject | None:
|
|
1243
2151
|
obj, _, _ = self._prepare_dir_op(add_dir_path=path.parent, add_name=path.name)
|
|
1244
2152
|
|
|
1245
2153
|
if if_exists == IfExistsParam.ERROR and obj is not None:
|
|
1246
|
-
raise excs.Error(f'Path {
|
|
2154
|
+
raise excs.Error(f'Path {path!r} is an existing {obj._display_name()}')
|
|
1247
2155
|
else:
|
|
1248
2156
|
is_snapshot = isinstance(obj, View) and obj._tbl_version_path.is_snapshot()
|
|
1249
2157
|
if obj is not None and (not isinstance(obj, expected_obj_type) or (expected_snapshot and not is_snapshot)):
|
|
1250
|
-
|
|
2158
|
+
if expected_obj_type is Dir:
|
|
2159
|
+
obj_type_str = 'directory'
|
|
2160
|
+
elif expected_obj_type is InsertableTable:
|
|
2161
|
+
obj_type_str = 'table'
|
|
2162
|
+
elif expected_obj_type is View:
|
|
2163
|
+
obj_type_str = 'snapshot' if expected_snapshot else 'view'
|
|
2164
|
+
else:
|
|
2165
|
+
raise AssertionError()
|
|
1251
2166
|
raise excs.Error(
|
|
1252
|
-
f'Path {
|
|
1253
|
-
f'Cannot {if_exists.name.lower()} it.'
|
|
2167
|
+
f'Path {path!r} already exists but is not a {obj_type_str}. Cannot {if_exists.name.lower()} it.'
|
|
1254
2168
|
)
|
|
1255
2169
|
|
|
1256
2170
|
if obj is None:
|
|
@@ -1263,7 +2177,7 @@ class Catalog:
|
|
|
1263
2177
|
dir_contents = self._get_dir_contents(obj._id)
|
|
1264
2178
|
if len(dir_contents) > 0 and if_exists == IfExistsParam.REPLACE:
|
|
1265
2179
|
raise excs.Error(
|
|
1266
|
-
f'Directory {
|
|
2180
|
+
f'Directory {path!r} already exists and is not empty. '
|
|
1267
2181
|
'Use `if_exists="replace_force"` to replace it.'
|
|
1268
2182
|
)
|
|
1269
2183
|
self._drop_dir(obj._id, path, force=True)
|