deltacat 1.1.36__py3-none-any.whl → 2.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- deltacat/__init__.py +42 -3
- deltacat/annotations.py +36 -0
- deltacat/api.py +168 -0
- deltacat/aws/s3u.py +4 -4
- deltacat/benchmarking/benchmark_engine.py +82 -0
- deltacat/benchmarking/benchmark_report.py +86 -0
- deltacat/benchmarking/benchmark_suite.py +11 -0
- deltacat/benchmarking/conftest.py +21 -0
- deltacat/benchmarking/data/random_row_generator.py +94 -0
- deltacat/benchmarking/data/row_generator.py +10 -0
- deltacat/benchmarking/test_benchmark_pipeline.py +106 -0
- deltacat/catalog/__init__.py +14 -0
- deltacat/catalog/delegate.py +199 -106
- deltacat/catalog/iceberg/__init__.py +4 -0
- deltacat/catalog/iceberg/iceberg_catalog_config.py +26 -0
- deltacat/catalog/iceberg/impl.py +368 -0
- deltacat/catalog/iceberg/overrides.py +74 -0
- deltacat/catalog/interface.py +273 -76
- deltacat/catalog/main/impl.py +720 -0
- deltacat/catalog/model/catalog.py +227 -20
- deltacat/catalog/model/properties.py +116 -0
- deltacat/catalog/model/table_definition.py +32 -1
- deltacat/compute/compactor/model/compaction_session_audit_info.py +7 -3
- deltacat/compute/compactor/model/delta_annotated.py +3 -3
- deltacat/compute/compactor/model/delta_file_envelope.py +3 -1
- deltacat/compute/compactor/model/delta_file_locator.py +3 -1
- deltacat/compute/compactor/model/round_completion_info.py +5 -5
- deltacat/compute/compactor/model/table_object_store.py +3 -2
- deltacat/compute/compactor/repartition_session.py +1 -1
- deltacat/compute/compactor/steps/dedupe.py +11 -4
- deltacat/compute/compactor/steps/hash_bucket.py +1 -1
- deltacat/compute/compactor/steps/materialize.py +6 -2
- deltacat/compute/compactor/utils/io.py +1 -1
- deltacat/compute/compactor/utils/sort_key.py +9 -2
- deltacat/compute/compactor_v2/compaction_session.py +5 -9
- deltacat/compute/compactor_v2/constants.py +1 -30
- deltacat/compute/compactor_v2/deletes/utils.py +3 -3
- deltacat/compute/compactor_v2/model/merge_input.py +1 -7
- deltacat/compute/compactor_v2/private/compaction_utils.py +5 -6
- deltacat/compute/compactor_v2/steps/merge.py +17 -126
- deltacat/compute/compactor_v2/utils/content_type_params.py +0 -17
- deltacat/compute/compactor_v2/utils/dedupe.py +1 -1
- deltacat/compute/compactor_v2/utils/io.py +1 -1
- deltacat/compute/compactor_v2/utils/merge.py +0 -1
- deltacat/compute/compactor_v2/utils/primary_key_index.py +3 -15
- deltacat/compute/compactor_v2/utils/task_options.py +23 -43
- deltacat/compute/converter/constants.py +4 -0
- deltacat/compute/converter/converter_session.py +143 -0
- deltacat/compute/converter/model/convert_input.py +69 -0
- deltacat/compute/converter/model/convert_input_files.py +61 -0
- deltacat/compute/converter/model/converter_session_params.py +99 -0
- deltacat/compute/converter/pyiceberg/__init__.py +0 -0
- deltacat/compute/converter/pyiceberg/catalog.py +75 -0
- deltacat/compute/converter/pyiceberg/overrides.py +135 -0
- deltacat/compute/converter/pyiceberg/update_snapshot_overrides.py +251 -0
- deltacat/compute/converter/steps/__init__.py +0 -0
- deltacat/compute/converter/steps/convert.py +211 -0
- deltacat/compute/converter/steps/dedupe.py +60 -0
- deltacat/compute/converter/utils/__init__.py +0 -0
- deltacat/compute/converter/utils/convert_task_options.py +88 -0
- deltacat/compute/converter/utils/converter_session_utils.py +109 -0
- deltacat/compute/converter/utils/iceberg_columns.py +82 -0
- deltacat/compute/converter/utils/io.py +43 -0
- deltacat/compute/converter/utils/s3u.py +133 -0
- deltacat/compute/resource_estimation/delta.py +1 -19
- deltacat/constants.py +47 -1
- deltacat/env.py +51 -0
- deltacat/examples/__init__.py +0 -0
- deltacat/examples/basic_logging.py +101 -0
- deltacat/examples/common/__init__.py +0 -0
- deltacat/examples/common/fixtures.py +15 -0
- deltacat/examples/hello_world.py +27 -0
- deltacat/examples/iceberg/__init__.py +0 -0
- deltacat/examples/iceberg/iceberg_bucket_writer.py +139 -0
- deltacat/examples/iceberg/iceberg_reader.py +149 -0
- deltacat/exceptions.py +51 -9
- deltacat/logs.py +4 -1
- deltacat/storage/__init__.py +118 -28
- deltacat/storage/iceberg/__init__.py +0 -0
- deltacat/storage/iceberg/iceberg_scan_planner.py +28 -0
- deltacat/storage/iceberg/impl.py +737 -0
- deltacat/storage/iceberg/model.py +709 -0
- deltacat/storage/interface.py +217 -134
- deltacat/storage/main/__init__.py +0 -0
- deltacat/storage/main/impl.py +2077 -0
- deltacat/storage/model/delta.py +118 -71
- deltacat/storage/model/interop.py +24 -0
- deltacat/storage/model/list_result.py +8 -0
- deltacat/storage/model/locator.py +93 -3
- deltacat/{aws/redshift → storage}/model/manifest.py +122 -98
- deltacat/storage/model/metafile.py +1316 -0
- deltacat/storage/model/namespace.py +34 -18
- deltacat/storage/model/partition.py +362 -37
- deltacat/storage/model/scan/__init__.py +0 -0
- deltacat/storage/model/scan/push_down.py +19 -0
- deltacat/storage/model/scan/scan_plan.py +10 -0
- deltacat/storage/model/scan/scan_task.py +34 -0
- deltacat/storage/model/schema.py +892 -0
- deltacat/storage/model/shard.py +47 -0
- deltacat/storage/model/sort_key.py +170 -13
- deltacat/storage/model/stream.py +208 -80
- deltacat/storage/model/table.py +123 -29
- deltacat/storage/model/table_version.py +322 -46
- deltacat/storage/model/transaction.py +757 -0
- deltacat/storage/model/transform.py +198 -61
- deltacat/storage/model/types.py +111 -13
- deltacat/storage/rivulet/__init__.py +11 -0
- deltacat/storage/rivulet/arrow/__init__.py +0 -0
- deltacat/storage/rivulet/arrow/serializer.py +75 -0
- deltacat/storage/rivulet/dataset.py +744 -0
- deltacat/storage/rivulet/dataset_executor.py +87 -0
- deltacat/storage/rivulet/feather/__init__.py +5 -0
- deltacat/storage/rivulet/feather/file_reader.py +136 -0
- deltacat/storage/rivulet/feather/serializer.py +35 -0
- deltacat/storage/rivulet/fs/__init__.py +0 -0
- deltacat/storage/rivulet/fs/file_provider.py +105 -0
- deltacat/storage/rivulet/fs/file_store.py +130 -0
- deltacat/storage/rivulet/fs/input_file.py +76 -0
- deltacat/storage/rivulet/fs/output_file.py +86 -0
- deltacat/storage/rivulet/logical_plan.py +105 -0
- deltacat/storage/rivulet/metastore/__init__.py +0 -0
- deltacat/storage/rivulet/metastore/delta.py +190 -0
- deltacat/storage/rivulet/metastore/json_sst.py +105 -0
- deltacat/storage/rivulet/metastore/sst.py +82 -0
- deltacat/storage/rivulet/metastore/sst_interval_tree.py +260 -0
- deltacat/storage/rivulet/mvp/Table.py +101 -0
- deltacat/storage/rivulet/mvp/__init__.py +5 -0
- deltacat/storage/rivulet/parquet/__init__.py +5 -0
- deltacat/storage/rivulet/parquet/data_reader.py +0 -0
- deltacat/storage/rivulet/parquet/file_reader.py +127 -0
- deltacat/storage/rivulet/parquet/serializer.py +37 -0
- deltacat/storage/rivulet/reader/__init__.py +0 -0
- deltacat/storage/rivulet/reader/block_scanner.py +378 -0
- deltacat/storage/rivulet/reader/data_reader.py +136 -0
- deltacat/storage/rivulet/reader/data_scan.py +63 -0
- deltacat/storage/rivulet/reader/dataset_metastore.py +178 -0
- deltacat/storage/rivulet/reader/dataset_reader.py +156 -0
- deltacat/storage/rivulet/reader/pyarrow_data_reader.py +121 -0
- deltacat/storage/rivulet/reader/query_expression.py +99 -0
- deltacat/storage/rivulet/reader/reader_type_registrar.py +84 -0
- deltacat/storage/rivulet/schema/__init__.py +0 -0
- deltacat/storage/rivulet/schema/datatype.py +128 -0
- deltacat/storage/rivulet/schema/schema.py +251 -0
- deltacat/storage/rivulet/serializer.py +40 -0
- deltacat/storage/rivulet/serializer_factory.py +42 -0
- deltacat/storage/rivulet/writer/__init__.py +0 -0
- deltacat/storage/rivulet/writer/dataset_writer.py +29 -0
- deltacat/storage/rivulet/writer/memtable_dataset_writer.py +294 -0
- deltacat/tests/_io/__init__.py +1 -0
- deltacat/tests/catalog/test_catalogs.py +324 -0
- deltacat/tests/catalog/test_default_catalog_impl.py +16 -8
- deltacat/tests/compute/compact_partition_multiple_rounds_test_cases.py +21 -21
- deltacat/tests/compute/compact_partition_rebase_test_cases.py +6 -6
- deltacat/tests/compute/compact_partition_rebase_then_incremental_test_cases.py +56 -56
- deltacat/tests/compute/compact_partition_test_cases.py +19 -53
- deltacat/tests/compute/compactor/steps/test_repartition.py +2 -2
- deltacat/tests/compute/compactor/utils/test_io.py +6 -8
- deltacat/tests/compute/compactor_v2/test_compaction_session.py +0 -466
- deltacat/tests/compute/compactor_v2/utils/test_task_options.py +1 -273
- deltacat/tests/compute/conftest.py +75 -0
- deltacat/tests/compute/converter/__init__.py +0 -0
- deltacat/tests/compute/converter/conftest.py +80 -0
- deltacat/tests/compute/converter/test_convert_session.py +478 -0
- deltacat/tests/compute/converter/utils.py +123 -0
- deltacat/tests/compute/resource_estimation/test_delta.py +0 -16
- deltacat/tests/compute/test_compact_partition_incremental.py +2 -42
- deltacat/tests/compute/test_compact_partition_multiple_rounds.py +5 -46
- deltacat/tests/compute/test_compact_partition_params.py +3 -3
- deltacat/tests/compute/test_compact_partition_rebase.py +1 -46
- deltacat/tests/compute/test_compact_partition_rebase_then_incremental.py +5 -46
- deltacat/tests/compute/test_util_common.py +19 -12
- deltacat/tests/compute/test_util_create_table_deltas_repo.py +13 -22
- deltacat/tests/local_deltacat_storage/__init__.py +76 -103
- deltacat/tests/storage/__init__.py +0 -0
- deltacat/tests/storage/conftest.py +25 -0
- deltacat/tests/storage/main/__init__.py +0 -0
- deltacat/tests/storage/main/test_main_storage.py +1399 -0
- deltacat/tests/storage/model/__init__.py +0 -0
- deltacat/tests/storage/model/test_delete_parameters.py +21 -0
- deltacat/tests/storage/model/test_metafile_io.py +2535 -0
- deltacat/tests/storage/model/test_schema.py +308 -0
- deltacat/tests/storage/model/test_shard.py +22 -0
- deltacat/tests/storage/model/test_table_version.py +110 -0
- deltacat/tests/storage/model/test_transaction.py +308 -0
- deltacat/tests/storage/rivulet/__init__.py +0 -0
- deltacat/tests/storage/rivulet/conftest.py +149 -0
- deltacat/tests/storage/rivulet/fs/__init__.py +0 -0
- deltacat/tests/storage/rivulet/fs/test_file_location_provider.py +93 -0
- deltacat/tests/storage/rivulet/schema/__init__.py +0 -0
- deltacat/tests/storage/rivulet/schema/test_schema.py +241 -0
- deltacat/tests/storage/rivulet/test_dataset.py +406 -0
- deltacat/tests/storage/rivulet/test_manifest.py +67 -0
- deltacat/tests/storage/rivulet/test_sst_interval_tree.py +232 -0
- deltacat/tests/storage/rivulet/test_utils.py +122 -0
- deltacat/tests/storage/rivulet/writer/__init__.py +0 -0
- deltacat/tests/storage/rivulet/writer/test_dataset_write_then_read.py +341 -0
- deltacat/tests/storage/rivulet/writer/test_dataset_writer.py +79 -0
- deltacat/tests/storage/rivulet/writer/test_memtable_dataset_writer.py +75 -0
- deltacat/tests/test_deltacat_api.py +39 -0
- deltacat/tests/test_utils/filesystem.py +14 -0
- deltacat/tests/test_utils/message_pack_utils.py +54 -0
- deltacat/tests/test_utils/pyarrow.py +8 -15
- deltacat/tests/test_utils/storage.py +266 -3
- deltacat/tests/utils/test_daft.py +3 -3
- deltacat/tests/utils/test_pyarrow.py +0 -432
- deltacat/types/partial_download.py +1 -1
- deltacat/types/tables.py +1 -1
- deltacat/utils/export.py +59 -0
- deltacat/utils/filesystem.py +320 -0
- deltacat/utils/metafile_locator.py +73 -0
- deltacat/utils/pyarrow.py +36 -183
- deltacat-2.0.dist-info/METADATA +65 -0
- deltacat-2.0.dist-info/RECORD +347 -0
- deltacat/aws/redshift/__init__.py +0 -19
- deltacat/catalog/default_catalog_impl/__init__.py +0 -369
- deltacat/io/dataset.py +0 -73
- deltacat/io/read_api.py +0 -143
- deltacat/storage/model/delete_parameters.py +0 -40
- deltacat/storage/model/partition_spec.py +0 -71
- deltacat/tests/compute/compactor_v2/utils/test_content_type_params.py +0 -253
- deltacat/tests/compute/compactor_v2/utils/test_primary_key_index.py +0 -45
- deltacat-1.1.36.dist-info/METADATA +0 -64
- deltacat-1.1.36.dist-info/RECORD +0 -219
- /deltacat/{aws/redshift/model → benchmarking/data}/__init__.py +0 -0
- /deltacat/{io/aws → catalog/main}/__init__.py +0 -0
- /deltacat/{io/aws/redshift → compute/converter}/__init__.py +0 -0
- /deltacat/{tests/io → compute/converter/model}/__init__.py +0 -0
- /deltacat/tests/{io → _io}/test_cloudpickle_bug_fix.py +0 -0
- /deltacat/tests/{io → _io}/test_file_object_store.py +0 -0
- /deltacat/tests/{io → _io}/test_memcached_object_store.py +0 -0
- /deltacat/tests/{io → _io}/test_ray_plasma_object_store.py +0 -0
- /deltacat/tests/{io → _io}/test_redis_object_store.py +0 -0
- /deltacat/tests/{io → _io}/test_s3_object_store.py +0 -0
- {deltacat-1.1.36.dist-info → deltacat-2.0.dist-info}/LICENSE +0 -0
- {deltacat-1.1.36.dist-info → deltacat-2.0.dist-info}/WHEEL +0 -0
- {deltacat-1.1.36.dist-info → deltacat-2.0.dist-info}/top_level.txt +0 -0
@@ -0,0 +1,2535 @@
|
|
1
|
+
import os
|
2
|
+
from typing import List, Tuple
|
3
|
+
|
4
|
+
import time
|
5
|
+
import multiprocessing
|
6
|
+
|
7
|
+
import pyarrow as pa
|
8
|
+
import pytest
|
9
|
+
|
10
|
+
from deltacat import (
|
11
|
+
Schema,
|
12
|
+
Field,
|
13
|
+
PartitionScheme,
|
14
|
+
PartitionKey,
|
15
|
+
ContentEncoding,
|
16
|
+
ContentType,
|
17
|
+
SortScheme,
|
18
|
+
SortKey,
|
19
|
+
SortOrder,
|
20
|
+
NullOrder,
|
21
|
+
LifecycleState,
|
22
|
+
)
|
23
|
+
from deltacat.storage import (
|
24
|
+
BucketTransform,
|
25
|
+
BucketTransformParameters,
|
26
|
+
BucketingStrategy,
|
27
|
+
CommitState,
|
28
|
+
DeltaLocator,
|
29
|
+
Delta,
|
30
|
+
DeltaType,
|
31
|
+
EntryParams,
|
32
|
+
EntryType,
|
33
|
+
Manifest,
|
34
|
+
ManifestAuthor,
|
35
|
+
ManifestEntry,
|
36
|
+
ManifestMeta,
|
37
|
+
Namespace,
|
38
|
+
NamespaceLocator,
|
39
|
+
PartitionLocator,
|
40
|
+
Partition,
|
41
|
+
StreamLocator,
|
42
|
+
StreamFormat,
|
43
|
+
Stream,
|
44
|
+
Table,
|
45
|
+
TableLocator,
|
46
|
+
TableVersionLocator,
|
47
|
+
TableVersion,
|
48
|
+
Transaction,
|
49
|
+
TransactionOperation,
|
50
|
+
TransactionType,
|
51
|
+
TransactionOperationType,
|
52
|
+
TruncateTransform,
|
53
|
+
TruncateTransformParameters,
|
54
|
+
)
|
55
|
+
from deltacat.storage.model.metafile import (
|
56
|
+
Metafile,
|
57
|
+
MetafileRevisionInfo,
|
58
|
+
)
|
59
|
+
from deltacat.constants import TXN_DIR_NAME, SUCCESS_TXN_DIR_NAME, NANOS_PER_SEC
|
60
|
+
from deltacat.utils.filesystem import resolve_path_and_filesystem
|
61
|
+
from deltacat.tests.test_utils.storage import (
|
62
|
+
create_test_namespace,
|
63
|
+
create_test_table,
|
64
|
+
create_test_table_version,
|
65
|
+
create_test_stream,
|
66
|
+
create_test_partition,
|
67
|
+
create_test_delta,
|
68
|
+
)
|
69
|
+
|
70
|
+
|
71
|
+
def _commit_single_delta_table(temp_dir: str) -> List[Tuple[Metafile, Metafile, str]]:
|
72
|
+
namespace = create_test_namespace()
|
73
|
+
table = create_test_table()
|
74
|
+
table_version = create_test_table_version()
|
75
|
+
stream = create_test_stream()
|
76
|
+
partition = create_test_partition()
|
77
|
+
delta = create_test_delta()
|
78
|
+
|
79
|
+
meta_to_create = [
|
80
|
+
namespace,
|
81
|
+
table,
|
82
|
+
table_version,
|
83
|
+
stream,
|
84
|
+
partition,
|
85
|
+
delta,
|
86
|
+
]
|
87
|
+
txn_operations = [
|
88
|
+
TransactionOperation.of(
|
89
|
+
operation_type=TransactionOperationType.CREATE,
|
90
|
+
dest_metafile=meta,
|
91
|
+
)
|
92
|
+
for meta in meta_to_create
|
93
|
+
]
|
94
|
+
transaction = Transaction.of(
|
95
|
+
txn_type=TransactionType.APPEND,
|
96
|
+
txn_operations=txn_operations,
|
97
|
+
)
|
98
|
+
write_paths, txn_log_path = transaction.commit(temp_dir)
|
99
|
+
write_paths_copy = write_paths.copy()
|
100
|
+
assert os.path.exists(txn_log_path)
|
101
|
+
metafiles_created = [
|
102
|
+
Delta.read(write_paths.pop()),
|
103
|
+
Partition.read(write_paths.pop()),
|
104
|
+
Stream.read(write_paths.pop()),
|
105
|
+
TableVersion.read(write_paths.pop()),
|
106
|
+
Table.read(write_paths.pop()),
|
107
|
+
Namespace.read(write_paths.pop()),
|
108
|
+
]
|
109
|
+
metafiles_created.reverse()
|
110
|
+
return list(zip(meta_to_create, metafiles_created, write_paths_copy))
|
111
|
+
|
112
|
+
|
113
|
+
def _commit_concurrent_transaction(
|
114
|
+
catalog_root: str,
|
115
|
+
transaction: Transaction,
|
116
|
+
) -> None:
|
117
|
+
try:
|
118
|
+
return transaction.commit(catalog_root)
|
119
|
+
except (RuntimeError, ValueError) as e:
|
120
|
+
return e
|
121
|
+
|
122
|
+
|
123
|
+
class TestMetafileIO:
|
124
|
+
def test_txn_conflict_concurrent_multiprocess_table_create(self, temp_dir):
|
125
|
+
base_table_name = "test_table"
|
126
|
+
table_locator = TableLocator.at(
|
127
|
+
namespace=None,
|
128
|
+
table_name=base_table_name,
|
129
|
+
)
|
130
|
+
# given a transaction to create a table
|
131
|
+
table = Table.of(
|
132
|
+
locator=table_locator,
|
133
|
+
description="test table description",
|
134
|
+
)
|
135
|
+
transaction = Transaction.of(
|
136
|
+
txn_type=TransactionType.APPEND,
|
137
|
+
txn_operations=[
|
138
|
+
TransactionOperation.of(
|
139
|
+
operation_type=TransactionOperationType.CREATE,
|
140
|
+
dest_metafile=table,
|
141
|
+
)
|
142
|
+
],
|
143
|
+
)
|
144
|
+
# when K rounds of N concurrent transaction commits try to create the
|
145
|
+
# same table
|
146
|
+
rounds = 25
|
147
|
+
concurrent_commit_count = multiprocessing.cpu_count()
|
148
|
+
with multiprocessing.Pool(processes=concurrent_commit_count) as pool:
|
149
|
+
for round_number in range(rounds):
|
150
|
+
table.locator.table_name = f"{base_table_name}_{round_number}"
|
151
|
+
futures = [
|
152
|
+
pool.apply_async(
|
153
|
+
_commit_concurrent_transaction, (temp_dir, transaction)
|
154
|
+
)
|
155
|
+
for _ in range(concurrent_commit_count)
|
156
|
+
]
|
157
|
+
# expect all but one concurrent transaction to succeed each round
|
158
|
+
results = [future.get() for future in futures]
|
159
|
+
conflict_exception_count = 0
|
160
|
+
for result in results:
|
161
|
+
# TODO(pdames): Add new concurrent conflict exception types.
|
162
|
+
if isinstance(result, RuntimeError) or isinstance(
|
163
|
+
result, ValueError
|
164
|
+
):
|
165
|
+
conflict_exception_count += 1
|
166
|
+
else:
|
167
|
+
write_paths, txn_log_path = result
|
168
|
+
deserialized_table = Table.read(write_paths.pop())
|
169
|
+
assert table.equivalent_to(deserialized_table)
|
170
|
+
assert conflict_exception_count == concurrent_commit_count - 1
|
171
|
+
|
172
|
+
def test_txn_dual_commit_fails(self, temp_dir):
|
173
|
+
namespace_locator = NamespaceLocator.of(namespace="test_namespace")
|
174
|
+
namespace = Namespace.of(locator=namespace_locator)
|
175
|
+
# given a transaction that creates a single namespace
|
176
|
+
transaction = Transaction.of(
|
177
|
+
txn_type=TransactionType.APPEND,
|
178
|
+
txn_operations=[
|
179
|
+
TransactionOperation.of(
|
180
|
+
operation_type=TransactionOperationType.CREATE,
|
181
|
+
dest_metafile=namespace,
|
182
|
+
)
|
183
|
+
],
|
184
|
+
)
|
185
|
+
write_paths, txn_log_path = transaction.commit(temp_dir)
|
186
|
+
# when the transaction is committed,
|
187
|
+
# expect the namespace created to match the namespace given
|
188
|
+
deserialized_namespace = Namespace.read(write_paths.pop())
|
189
|
+
assert namespace.equivalent_to(deserialized_namespace)
|
190
|
+
# if we reread the transaction and commit it again,
|
191
|
+
reread_transaction = Transaction.read(txn_log_path)
|
192
|
+
# expect an exception to be raised
|
193
|
+
with pytest.raises(RuntimeError):
|
194
|
+
reread_transaction.commit(temp_dir)
|
195
|
+
|
196
|
+
def test_txn_bad_end_time_fails(self, temp_dir, mocker):
|
197
|
+
commit_results = _commit_single_delta_table(temp_dir)
|
198
|
+
for expected, actual, _ in commit_results:
|
199
|
+
assert expected.equivalent_to(actual)
|
200
|
+
# given a transaction with an ending timestamp set in the past
|
201
|
+
past_timestamp = time.time_ns() - NANOS_PER_SEC
|
202
|
+
mocker.patch(
|
203
|
+
"deltacat.storage.model.transaction.Transaction._parse_end_time",
|
204
|
+
return_value=past_timestamp,
|
205
|
+
)
|
206
|
+
original_delta: Delta = commit_results[5][1]
|
207
|
+
new_delta = Delta.update_for(original_delta)
|
208
|
+
txn_operations = [
|
209
|
+
TransactionOperation.of(
|
210
|
+
operation_type=TransactionOperationType.UPDATE,
|
211
|
+
dest_metafile=new_delta,
|
212
|
+
src_metafile=original_delta,
|
213
|
+
)
|
214
|
+
]
|
215
|
+
transaction = Transaction.of(
|
216
|
+
txn_type=TransactionType.ALTER,
|
217
|
+
txn_operations=txn_operations,
|
218
|
+
)
|
219
|
+
# expect the bad timestamp to be detected and its commit to fail
|
220
|
+
with pytest.raises(RuntimeError):
|
221
|
+
transaction.commit(temp_dir)
|
222
|
+
|
223
|
+
def test_txn_conflict_concurrent_complete(self, temp_dir, mocker):
|
224
|
+
commit_results = _commit_single_delta_table(temp_dir)
|
225
|
+
for expected, actual, _ in commit_results:
|
226
|
+
assert expected.equivalent_to(actual)
|
227
|
+
|
228
|
+
# given an initial metafile revision of a committed delta
|
229
|
+
write_paths = [result[2] for result in commit_results]
|
230
|
+
orig_delta_write_path = write_paths[5]
|
231
|
+
|
232
|
+
# a new delta metafile revision written by a transaction that completed
|
233
|
+
# before seeing any concurrent conflicts
|
234
|
+
mri = MetafileRevisionInfo.parse(orig_delta_write_path)
|
235
|
+
mri.txn_id = "0000000000000_test-txn-id"
|
236
|
+
mri.txn_op_type = TransactionOperationType.UPDATE
|
237
|
+
mri.revision = mri.revision + 1
|
238
|
+
conflict_delta_write_path = mri.path
|
239
|
+
_, filesystem = resolve_path_and_filesystem(orig_delta_write_path)
|
240
|
+
with filesystem.open_output_stream(conflict_delta_write_path):
|
241
|
+
pass # Just create an empty conflicting metafile revision
|
242
|
+
txn_log_file_dir = os.path.join(
|
243
|
+
temp_dir,
|
244
|
+
TXN_DIR_NAME,
|
245
|
+
SUCCESS_TXN_DIR_NAME,
|
246
|
+
mri.txn_id,
|
247
|
+
)
|
248
|
+
filesystem.create_dir(txn_log_file_dir, recursive=True)
|
249
|
+
txn_log_file_path = os.path.join(
|
250
|
+
txn_log_file_dir,
|
251
|
+
str(time.time_ns()),
|
252
|
+
)
|
253
|
+
with filesystem.open_output_stream(txn_log_file_path):
|
254
|
+
pass # Just create an empty log to mark the txn as complete
|
255
|
+
|
256
|
+
# and a concurrent transaction that started before that transaction
|
257
|
+
# completed, writes the same delta metafile revision, then sees the
|
258
|
+
# conflict
|
259
|
+
past_timestamp = time.time_ns() - NANOS_PER_SEC
|
260
|
+
future_timestamp = 9999999999999
|
261
|
+
end_time_mock = mocker.patch(
|
262
|
+
"deltacat.storage.model.transaction.Transaction._parse_end_time",
|
263
|
+
)
|
264
|
+
end_time_mock.side_effect = (
|
265
|
+
lambda path: future_timestamp if mri.txn_id in path else past_timestamp
|
266
|
+
)
|
267
|
+
original_delta = Delta.read(orig_delta_write_path)
|
268
|
+
new_delta = Delta.update_for(original_delta)
|
269
|
+
txn_operations = [
|
270
|
+
TransactionOperation.of(
|
271
|
+
operation_type=TransactionOperationType.UPDATE,
|
272
|
+
dest_metafile=new_delta,
|
273
|
+
src_metafile=original_delta,
|
274
|
+
)
|
275
|
+
]
|
276
|
+
transaction = Transaction.of(
|
277
|
+
txn_type=TransactionType.ALTER,
|
278
|
+
txn_operations=txn_operations,
|
279
|
+
)
|
280
|
+
# expect the commit to fail due to a concurrent modification error
|
281
|
+
with pytest.raises(RuntimeError):
|
282
|
+
transaction.commit(temp_dir)
|
283
|
+
|
284
|
+
def test_txn_conflict_concurrent_incomplete(self, temp_dir):
|
285
|
+
commit_results = _commit_single_delta_table(temp_dir)
|
286
|
+
for expected, actual, _ in commit_results:
|
287
|
+
assert expected.equivalent_to(actual)
|
288
|
+
|
289
|
+
# given an initial metafile revision of a committed delta
|
290
|
+
write_paths = [result[2] for result in commit_results]
|
291
|
+
orig_delta_write_path = write_paths[5]
|
292
|
+
|
293
|
+
# and a new delta metafile revision written by an incomplete transaction
|
294
|
+
mri = MetafileRevisionInfo.parse(orig_delta_write_path)
|
295
|
+
mri.txn_id = "9999999999999_test-txn-id"
|
296
|
+
mri.txn_op_type = TransactionOperationType.DELETE
|
297
|
+
mri.revision = mri.revision + 1
|
298
|
+
conflict_delta_write_path = mri.path
|
299
|
+
_, filesystem = resolve_path_and_filesystem(orig_delta_write_path)
|
300
|
+
with filesystem.open_output_stream(conflict_delta_write_path):
|
301
|
+
pass # Just create an empty conflicting metafile revision
|
302
|
+
|
303
|
+
# when a concurrent transaction tries to update the same delta
|
304
|
+
original_delta = Delta.read(orig_delta_write_path)
|
305
|
+
new_delta = Delta.update_for(original_delta)
|
306
|
+
transaction = Transaction.of(
|
307
|
+
txn_type=TransactionType.ALTER,
|
308
|
+
txn_operations=[
|
309
|
+
TransactionOperation.of(
|
310
|
+
operation_type=TransactionOperationType.UPDATE,
|
311
|
+
dest_metafile=new_delta,
|
312
|
+
src_metafile=original_delta,
|
313
|
+
)
|
314
|
+
],
|
315
|
+
)
|
316
|
+
# expect the commit to fail due to a concurrent modification error
|
317
|
+
with pytest.raises(RuntimeError):
|
318
|
+
transaction.commit(temp_dir)
|
319
|
+
# expect a commit retry to also fail
|
320
|
+
with pytest.raises(RuntimeError):
|
321
|
+
transaction.commit(temp_dir)
|
322
|
+
|
323
|
+
def test_append_multiple_deltas(self, temp_dir):
|
324
|
+
commit_results = _commit_single_delta_table(temp_dir)
|
325
|
+
for expected, actual, _ in commit_results:
|
326
|
+
assert expected.equivalent_to(actual)
|
327
|
+
original_delta: Delta = commit_results[5][1]
|
328
|
+
|
329
|
+
# given a transaction containing several deltas to append
|
330
|
+
txn_operations = []
|
331
|
+
|
332
|
+
delta_append_count = 100
|
333
|
+
for i in range(delta_append_count):
|
334
|
+
new_delta = Delta.based_on(
|
335
|
+
original_delta,
|
336
|
+
new_id=str(int(original_delta.id) + i + 1),
|
337
|
+
)
|
338
|
+
txn_operations.append(
|
339
|
+
TransactionOperation.of(
|
340
|
+
operation_type=TransactionOperationType.CREATE,
|
341
|
+
dest_metafile=new_delta,
|
342
|
+
)
|
343
|
+
)
|
344
|
+
transaction = Transaction.of(
|
345
|
+
txn_type=TransactionType.APPEND,
|
346
|
+
txn_operations=txn_operations,
|
347
|
+
)
|
348
|
+
# when the transaction is committed
|
349
|
+
write_paths, txn_log_path = transaction.commit(temp_dir)
|
350
|
+
# expect all new deltas to be successfully written
|
351
|
+
assert len(write_paths) == delta_append_count
|
352
|
+
for i in range(len(write_paths)):
|
353
|
+
actual_delta = Delta.read(write_paths[i])
|
354
|
+
assert txn_operations[i].dest_metafile.equivalent_to(actual_delta)
|
355
|
+
|
356
|
+
def test_bad_update_mismatched_metafile_types(self, temp_dir):
|
357
|
+
commit_results = _commit_single_delta_table(temp_dir)
|
358
|
+
for expected, actual, _ in commit_results:
|
359
|
+
assert expected.equivalent_to(actual)
|
360
|
+
original_partition: Partition = commit_results[4][1]
|
361
|
+
original_delta: Delta = commit_results[5][1]
|
362
|
+
|
363
|
+
# given an attempt to replace a delta with a partition
|
364
|
+
replacement_partition: Partition = Partition.based_on(
|
365
|
+
original_partition,
|
366
|
+
new_id=original_partition.id + "_2",
|
367
|
+
)
|
368
|
+
# expect the transaction operation initialization to raise a value error
|
369
|
+
with pytest.raises(ValueError):
|
370
|
+
TransactionOperation.of(
|
371
|
+
operation_type=TransactionOperationType.UPDATE,
|
372
|
+
dest_metafile=replacement_partition,
|
373
|
+
src_metafile=original_delta,
|
374
|
+
)
|
375
|
+
|
376
|
+
def test_delete_delta(self, temp_dir):
|
377
|
+
commit_results = _commit_single_delta_table(temp_dir)
|
378
|
+
for expected, actual, _ in commit_results:
|
379
|
+
assert expected.equivalent_to(actual)
|
380
|
+
original_delta: Delta = commit_results[5][1]
|
381
|
+
|
382
|
+
# given a transaction containing a delta to delete
|
383
|
+
txn_operations = [
|
384
|
+
TransactionOperation.of(
|
385
|
+
operation_type=TransactionOperationType.DELETE,
|
386
|
+
dest_metafile=original_delta,
|
387
|
+
)
|
388
|
+
]
|
389
|
+
transaction = Transaction.of(
|
390
|
+
txn_type=TransactionType.DELETE,
|
391
|
+
txn_operations=txn_operations,
|
392
|
+
)
|
393
|
+
# when the transaction is committed
|
394
|
+
write_paths, txn_log_path = transaction.commit(temp_dir)
|
395
|
+
|
396
|
+
# expect one new delete metafile to be written
|
397
|
+
assert len(write_paths) == 1
|
398
|
+
delete_write_path = write_paths[0]
|
399
|
+
|
400
|
+
# expect the delete metafile to contain the input txn op dest_metafile
|
401
|
+
assert TransactionOperationType.DELETE.value in delete_write_path
|
402
|
+
actual_delta = Delta.read(delete_write_path)
|
403
|
+
assert original_delta.equivalent_to(actual_delta)
|
404
|
+
|
405
|
+
# expect a subsequent replace of the deleted delta to fail
|
406
|
+
replacement_delta: Delta = Delta.based_on(
|
407
|
+
original_delta,
|
408
|
+
new_id=str(int(original_delta.id) + 1),
|
409
|
+
)
|
410
|
+
bad_txn_operations = [
|
411
|
+
TransactionOperation.of(
|
412
|
+
operation_type=TransactionOperationType.UPDATE,
|
413
|
+
dest_metafile=replacement_delta,
|
414
|
+
src_metafile=original_delta,
|
415
|
+
)
|
416
|
+
]
|
417
|
+
transaction = Transaction.of(
|
418
|
+
txn_type=TransactionType.OVERWRITE,
|
419
|
+
txn_operations=bad_txn_operations,
|
420
|
+
)
|
421
|
+
with pytest.raises(ValueError):
|
422
|
+
transaction.commit(temp_dir)
|
423
|
+
|
424
|
+
# expect subsequent deletes of the deleted delta to fail
|
425
|
+
bad_txn_operations = [
|
426
|
+
TransactionOperation.of(
|
427
|
+
operation_type=TransactionOperationType.DELETE,
|
428
|
+
dest_metafile=original_delta,
|
429
|
+
)
|
430
|
+
]
|
431
|
+
transaction = Transaction.of(
|
432
|
+
txn_type=TransactionType.DELETE,
|
433
|
+
txn_operations=bad_txn_operations,
|
434
|
+
)
|
435
|
+
with pytest.raises(ValueError):
|
436
|
+
transaction.commit(temp_dir)
|
437
|
+
|
438
|
+
def test_replace_delta(self, temp_dir):
|
439
|
+
commit_results = _commit_single_delta_table(temp_dir)
|
440
|
+
for expected, actual, _ in commit_results:
|
441
|
+
assert expected.equivalent_to(actual)
|
442
|
+
original_delta: Delta = commit_results[5][1]
|
443
|
+
|
444
|
+
# given a transaction containing a delta replacement
|
445
|
+
replacement_delta: Delta = Delta.based_on(
|
446
|
+
original_delta,
|
447
|
+
new_id=str(int(original_delta.id) + 1),
|
448
|
+
)
|
449
|
+
|
450
|
+
# expect the proposed replacement delta to be assigned a new ID
|
451
|
+
assert replacement_delta.id != original_delta.id
|
452
|
+
|
453
|
+
txn_operations = [
|
454
|
+
TransactionOperation.of(
|
455
|
+
operation_type=TransactionOperationType.UPDATE,
|
456
|
+
dest_metafile=replacement_delta,
|
457
|
+
src_metafile=original_delta,
|
458
|
+
)
|
459
|
+
]
|
460
|
+
transaction = Transaction.of(
|
461
|
+
txn_type=TransactionType.OVERWRITE,
|
462
|
+
txn_operations=txn_operations,
|
463
|
+
)
|
464
|
+
# when the transaction is committed
|
465
|
+
write_paths, txn_log_path = transaction.commit(temp_dir)
|
466
|
+
|
467
|
+
# expect two new metafiles to be written
|
468
|
+
# (i.e., delete old delta, create replacement delta)
|
469
|
+
assert len(write_paths) == 2
|
470
|
+
delete_write_path = write_paths[0]
|
471
|
+
create_write_path = write_paths[1]
|
472
|
+
|
473
|
+
# expect the replacement delta to be successfully written and read
|
474
|
+
assert TransactionOperationType.CREATE.value in create_write_path
|
475
|
+
actual_delta = Delta.read(create_write_path)
|
476
|
+
assert replacement_delta.equivalent_to(actual_delta)
|
477
|
+
|
478
|
+
# expect the delete metafile to also contain the replacement delta
|
479
|
+
assert TransactionOperationType.DELETE.value in delete_write_path
|
480
|
+
actual_delta = Delta.read(delete_write_path)
|
481
|
+
assert replacement_delta.equivalent_to(actual_delta)
|
482
|
+
|
483
|
+
# expect a subsequent replace of the original delta to fail
|
484
|
+
bad_txn_operations = [
|
485
|
+
TransactionOperation.of(
|
486
|
+
operation_type=TransactionOperationType.UPDATE,
|
487
|
+
dest_metafile=replacement_delta,
|
488
|
+
src_metafile=original_delta,
|
489
|
+
)
|
490
|
+
]
|
491
|
+
transaction = Transaction.of(
|
492
|
+
txn_type=TransactionType.OVERWRITE,
|
493
|
+
txn_operations=bad_txn_operations,
|
494
|
+
)
|
495
|
+
with pytest.raises(ValueError):
|
496
|
+
transaction.commit(temp_dir)
|
497
|
+
|
498
|
+
# expect deletes of the original delta to fail
|
499
|
+
bad_txn_operations = [
|
500
|
+
TransactionOperation.of(
|
501
|
+
operation_type=TransactionOperationType.DELETE,
|
502
|
+
dest_metafile=original_delta,
|
503
|
+
)
|
504
|
+
]
|
505
|
+
transaction = Transaction.of(
|
506
|
+
txn_type=TransactionType.DELETE,
|
507
|
+
txn_operations=bad_txn_operations,
|
508
|
+
)
|
509
|
+
with pytest.raises(ValueError):
|
510
|
+
transaction.commit(temp_dir)
|
511
|
+
|
512
|
+
def test_delete_partition(self, temp_dir):
|
513
|
+
commit_results = _commit_single_delta_table(temp_dir)
|
514
|
+
for expected, actual, _ in commit_results:
|
515
|
+
assert expected.equivalent_to(actual)
|
516
|
+
original_partition: Partition = commit_results[4][1]
|
517
|
+
|
518
|
+
txn_operations = [
|
519
|
+
TransactionOperation.of(
|
520
|
+
operation_type=TransactionOperationType.DELETE,
|
521
|
+
dest_metafile=original_partition,
|
522
|
+
)
|
523
|
+
]
|
524
|
+
transaction = Transaction.of(
|
525
|
+
txn_type=TransactionType.DELETE,
|
526
|
+
txn_operations=txn_operations,
|
527
|
+
)
|
528
|
+
# when the transaction is committed
|
529
|
+
write_paths, txn_log_path = transaction.commit(temp_dir)
|
530
|
+
|
531
|
+
# expect 1 new partition metafile to be written
|
532
|
+
assert len(write_paths) == 1
|
533
|
+
delete_write_path = write_paths[0]
|
534
|
+
|
535
|
+
# expect the delete metafile to contain the input txn op dest_metafile
|
536
|
+
assert TransactionOperationType.DELETE.value in delete_write_path
|
537
|
+
actual_partition = Partition.read(delete_write_path)
|
538
|
+
assert original_partition.equivalent_to(actual_partition)
|
539
|
+
|
540
|
+
# expect child metafiles in the deleted partition to remain readable and unchanged
|
541
|
+
child_metafiles_read_post_delete = [
|
542
|
+
Delta.read(commit_results[5][2]),
|
543
|
+
]
|
544
|
+
original_child_metafiles_to_create = [
|
545
|
+
Delta(commit_results[5][0]),
|
546
|
+
]
|
547
|
+
original_child_metafiles_created = [
|
548
|
+
Delta(commit_results[5][1]),
|
549
|
+
]
|
550
|
+
for i in range(len(original_child_metafiles_to_create)):
|
551
|
+
assert child_metafiles_read_post_delete[i].equivalent_to(
|
552
|
+
original_child_metafiles_to_create[i]
|
553
|
+
)
|
554
|
+
assert child_metafiles_read_post_delete[i].equivalent_to(
|
555
|
+
original_child_metafiles_created[i]
|
556
|
+
)
|
557
|
+
|
558
|
+
# expect a subsequent replace of the deleted partition to fail
|
559
|
+
replacement_partition: Partition = Partition.based_on(
|
560
|
+
original_partition,
|
561
|
+
new_id=original_partition.id + "_2",
|
562
|
+
)
|
563
|
+
bad_txn_operations = [
|
564
|
+
TransactionOperation.of(
|
565
|
+
operation_type=TransactionOperationType.UPDATE,
|
566
|
+
dest_metafile=replacement_partition,
|
567
|
+
src_metafile=original_partition,
|
568
|
+
)
|
569
|
+
]
|
570
|
+
transaction = Transaction.of(
|
571
|
+
txn_type=TransactionType.OVERWRITE,
|
572
|
+
txn_operations=bad_txn_operations,
|
573
|
+
)
|
574
|
+
with pytest.raises(ValueError):
|
575
|
+
transaction.commit(temp_dir)
|
576
|
+
|
577
|
+
# expect subsequent deletes of the deleted partition to fail
|
578
|
+
bad_txn_operations = [
|
579
|
+
TransactionOperation.of(
|
580
|
+
operation_type=TransactionOperationType.DELETE,
|
581
|
+
dest_metafile=original_partition,
|
582
|
+
)
|
583
|
+
]
|
584
|
+
transaction = Transaction.of(
|
585
|
+
txn_type=TransactionType.DELETE,
|
586
|
+
txn_operations=bad_txn_operations,
|
587
|
+
)
|
588
|
+
with pytest.raises(ValueError):
|
589
|
+
transaction.commit(temp_dir)
|
590
|
+
|
591
|
+
# expect new child metafile creation under the deleted partition to fail
|
592
|
+
for metafile in original_child_metafiles_created:
|
593
|
+
bad_txn_operations = [
|
594
|
+
TransactionOperation.of(
|
595
|
+
operation_type=TransactionOperationType.CREATE,
|
596
|
+
dest_metafile=metafile,
|
597
|
+
)
|
598
|
+
]
|
599
|
+
transaction = Transaction.of(
|
600
|
+
txn_type=TransactionType.APPEND,
|
601
|
+
txn_operations=bad_txn_operations,
|
602
|
+
)
|
603
|
+
with pytest.raises(ValueError):
|
604
|
+
transaction.commit(temp_dir)
|
605
|
+
|
606
|
+
def test_replace_partition(self, temp_dir):
|
607
|
+
commit_results = _commit_single_delta_table(temp_dir)
|
608
|
+
for expected, actual, _ in commit_results:
|
609
|
+
assert expected.equivalent_to(actual)
|
610
|
+
original_partition: Partition = commit_results[4][1]
|
611
|
+
|
612
|
+
# given a transaction containing a partition replacement
|
613
|
+
replacement_partition: Partition = Partition.based_on(
|
614
|
+
original_partition,
|
615
|
+
new_id=original_partition.id + "_2",
|
616
|
+
)
|
617
|
+
|
618
|
+
# expect the proposed replacement partition to be assigned a new ID
|
619
|
+
assert replacement_partition.id != original_partition.id
|
620
|
+
|
621
|
+
txn_operations = [
|
622
|
+
TransactionOperation.of(
|
623
|
+
operation_type=TransactionOperationType.UPDATE,
|
624
|
+
dest_metafile=replacement_partition,
|
625
|
+
src_metafile=original_partition,
|
626
|
+
)
|
627
|
+
]
|
628
|
+
transaction = Transaction.of(
|
629
|
+
txn_type=TransactionType.OVERWRITE,
|
630
|
+
txn_operations=txn_operations,
|
631
|
+
)
|
632
|
+
# when the transaction is committed
|
633
|
+
write_paths, txn_log_path = transaction.commit(temp_dir)
|
634
|
+
|
635
|
+
# expect two new partition metafiles to be written
|
636
|
+
# (i.e., delete old partition, create replacement partition)
|
637
|
+
assert len(write_paths) == 2
|
638
|
+
delete_write_path = write_paths[0]
|
639
|
+
create_write_path = write_paths[1]
|
640
|
+
|
641
|
+
# expect the replacement partition to be successfully written and read
|
642
|
+
assert TransactionOperationType.CREATE.value in create_write_path
|
643
|
+
actual_partition = Partition.read(create_write_path)
|
644
|
+
assert replacement_partition.equivalent_to(actual_partition)
|
645
|
+
|
646
|
+
# expect the delete metafile to also contain the replacement partition
|
647
|
+
assert TransactionOperationType.DELETE.value in delete_write_path
|
648
|
+
actual_partition = Partition.read(delete_write_path)
|
649
|
+
assert replacement_partition.equivalent_to(actual_partition)
|
650
|
+
|
651
|
+
# expect old child metafiles for the replaced partition to remain readable
|
652
|
+
child_metafiles_read_post_replace = [
|
653
|
+
Delta.read(commit_results[5][2]),
|
654
|
+
]
|
655
|
+
# expect old child metafiles read to share the same parent table name as
|
656
|
+
# the replacement partition, but have a different parent partition ID
|
657
|
+
for metafile in child_metafiles_read_post_replace:
|
658
|
+
assert (
|
659
|
+
metafile.table_name
|
660
|
+
== replacement_partition.table_name
|
661
|
+
== original_partition.table_name
|
662
|
+
)
|
663
|
+
ancestor_ids = metafile.ancestor_ids(catalog_root=temp_dir)
|
664
|
+
parent_partition_id = ancestor_ids[4]
|
665
|
+
assert parent_partition_id == original_partition.id
|
666
|
+
|
667
|
+
# expect original child metafiles to share the original parent partition ID
|
668
|
+
original_child_metafiles_to_create = [
|
669
|
+
Delta(commit_results[5][0]),
|
670
|
+
]
|
671
|
+
original_child_metafiles_created = [
|
672
|
+
Delta(commit_results[5][1]),
|
673
|
+
]
|
674
|
+
for i in range(len(original_child_metafiles_to_create)):
|
675
|
+
ancestor_ids = metafile.ancestor_ids(catalog_root=temp_dir)
|
676
|
+
parent_partition_id = ancestor_ids[4]
|
677
|
+
assert parent_partition_id == original_partition.id
|
678
|
+
|
679
|
+
# expect a subsequent replace of the original partition to fail
|
680
|
+
bad_txn_operations = [
|
681
|
+
TransactionOperation.of(
|
682
|
+
operation_type=TransactionOperationType.UPDATE,
|
683
|
+
dest_metafile=replacement_partition,
|
684
|
+
src_metafile=original_partition,
|
685
|
+
)
|
686
|
+
]
|
687
|
+
transaction = Transaction.of(
|
688
|
+
txn_type=TransactionType.OVERWRITE,
|
689
|
+
txn_operations=bad_txn_operations,
|
690
|
+
)
|
691
|
+
with pytest.raises(ValueError):
|
692
|
+
transaction.commit(temp_dir)
|
693
|
+
|
694
|
+
# expect deletes of the original partition to fail
|
695
|
+
bad_txn_operations = [
|
696
|
+
TransactionOperation.of(
|
697
|
+
operation_type=TransactionOperationType.DELETE,
|
698
|
+
dest_metafile=original_partition,
|
699
|
+
)
|
700
|
+
]
|
701
|
+
transaction = Transaction.of(
|
702
|
+
txn_type=TransactionType.DELETE,
|
703
|
+
txn_operations=bad_txn_operations,
|
704
|
+
)
|
705
|
+
with pytest.raises(ValueError):
|
706
|
+
transaction.commit(temp_dir)
|
707
|
+
|
708
|
+
# expect new child metafile creation under the old partition to fail
|
709
|
+
for metafile in original_child_metafiles_created:
|
710
|
+
bad_txn_operations = [
|
711
|
+
TransactionOperation.of(
|
712
|
+
operation_type=TransactionOperationType.CREATE,
|
713
|
+
dest_metafile=metafile,
|
714
|
+
)
|
715
|
+
]
|
716
|
+
transaction = Transaction.of(
|
717
|
+
txn_type=TransactionType.APPEND,
|
718
|
+
txn_operations=bad_txn_operations,
|
719
|
+
)
|
720
|
+
with pytest.raises(ValueError):
|
721
|
+
transaction.commit(temp_dir)
|
722
|
+
|
723
|
+
def test_delete_stream(self, temp_dir):
|
724
|
+
commit_results = _commit_single_delta_table(temp_dir)
|
725
|
+
for expected, actual, _ in commit_results:
|
726
|
+
assert expected.equivalent_to(actual)
|
727
|
+
original_stream: Stream = commit_results[3][1]
|
728
|
+
|
729
|
+
txn_operations = [
|
730
|
+
TransactionOperation.of(
|
731
|
+
operation_type=TransactionOperationType.DELETE,
|
732
|
+
dest_metafile=original_stream,
|
733
|
+
)
|
734
|
+
]
|
735
|
+
transaction = Transaction.of(
|
736
|
+
txn_type=TransactionType.DELETE,
|
737
|
+
txn_operations=txn_operations,
|
738
|
+
)
|
739
|
+
# when the transaction is committed
|
740
|
+
write_paths, txn_log_path = transaction.commit(temp_dir)
|
741
|
+
|
742
|
+
# expect 1 new stream metafile to be written
|
743
|
+
assert len(write_paths) == 1
|
744
|
+
delete_write_path = write_paths[0]
|
745
|
+
|
746
|
+
# expect the delete metafile to contain the input txn op dest_metafile
|
747
|
+
assert TransactionOperationType.DELETE.value in delete_write_path
|
748
|
+
actual_stream = Stream.read(delete_write_path)
|
749
|
+
assert original_stream == actual_stream
|
750
|
+
|
751
|
+
# expect child metafiles in the deleted stream to remain readable and unchanged
|
752
|
+
child_metafiles_read_post_delete = [
|
753
|
+
Delta.read(commit_results[5][2]),
|
754
|
+
Partition.read(commit_results[4][2]),
|
755
|
+
]
|
756
|
+
original_child_metafiles_to_create = [
|
757
|
+
Delta(commit_results[5][0]),
|
758
|
+
Partition(commit_results[4][0]),
|
759
|
+
]
|
760
|
+
original_child_metafiles_created = [
|
761
|
+
Delta(commit_results[5][1]),
|
762
|
+
Partition(commit_results[4][1]),
|
763
|
+
]
|
764
|
+
for i in range(len(original_child_metafiles_to_create)):
|
765
|
+
assert child_metafiles_read_post_delete[i].equivalent_to(
|
766
|
+
original_child_metafiles_to_create[i]
|
767
|
+
)
|
768
|
+
assert child_metafiles_read_post_delete[i].equivalent_to(
|
769
|
+
original_child_metafiles_created[i]
|
770
|
+
)
|
771
|
+
|
772
|
+
# expect a subsequent replace of the deleted stream to fail
|
773
|
+
replacement_stream: Stream = Stream.based_on(
|
774
|
+
original_stream,
|
775
|
+
new_id=original_stream.id + "_2",
|
776
|
+
)
|
777
|
+
bad_txn_operations = [
|
778
|
+
TransactionOperation.of(
|
779
|
+
operation_type=TransactionOperationType.UPDATE,
|
780
|
+
dest_metafile=replacement_stream,
|
781
|
+
src_metafile=original_stream,
|
782
|
+
)
|
783
|
+
]
|
784
|
+
transaction = Transaction.of(
|
785
|
+
txn_type=TransactionType.OVERWRITE,
|
786
|
+
txn_operations=bad_txn_operations,
|
787
|
+
)
|
788
|
+
with pytest.raises(ValueError):
|
789
|
+
transaction.commit(temp_dir)
|
790
|
+
|
791
|
+
# expect subsequent deletes of the deleted stream to fail
|
792
|
+
bad_txn_operations = [
|
793
|
+
TransactionOperation.of(
|
794
|
+
operation_type=TransactionOperationType.DELETE,
|
795
|
+
dest_metafile=original_stream,
|
796
|
+
)
|
797
|
+
]
|
798
|
+
transaction = Transaction.of(
|
799
|
+
txn_type=TransactionType.DELETE,
|
800
|
+
txn_operations=bad_txn_operations,
|
801
|
+
)
|
802
|
+
with pytest.raises(ValueError):
|
803
|
+
transaction.commit(temp_dir)
|
804
|
+
|
805
|
+
# expect new child metafile creation under the deleted stream to fail
|
806
|
+
for metafile in original_child_metafiles_created:
|
807
|
+
bad_txn_operations = [
|
808
|
+
TransactionOperation.of(
|
809
|
+
operation_type=TransactionOperationType.CREATE,
|
810
|
+
dest_metafile=metafile,
|
811
|
+
)
|
812
|
+
]
|
813
|
+
transaction = Transaction.of(
|
814
|
+
txn_type=TransactionType.APPEND,
|
815
|
+
txn_operations=bad_txn_operations,
|
816
|
+
)
|
817
|
+
with pytest.raises(ValueError):
|
818
|
+
transaction.commit(temp_dir)
|
819
|
+
|
820
|
+
def test_replace_stream(self, temp_dir):
|
821
|
+
commit_results = _commit_single_delta_table(temp_dir)
|
822
|
+
for expected, actual, _ in commit_results:
|
823
|
+
assert expected.equivalent_to(actual)
|
824
|
+
original_stream: Stream = commit_results[3][1]
|
825
|
+
|
826
|
+
# given a transaction containing a stream replacement
|
827
|
+
replacement_stream: Stream = Stream.based_on(
|
828
|
+
original_stream,
|
829
|
+
new_id=original_stream.id + "_2",
|
830
|
+
)
|
831
|
+
|
832
|
+
# expect the proposed replacement stream to be assigned a new ID
|
833
|
+
assert replacement_stream.id != original_stream.id
|
834
|
+
|
835
|
+
txn_operations = [
|
836
|
+
TransactionOperation.of(
|
837
|
+
operation_type=TransactionOperationType.UPDATE,
|
838
|
+
dest_metafile=replacement_stream,
|
839
|
+
src_metafile=original_stream,
|
840
|
+
)
|
841
|
+
]
|
842
|
+
transaction = Transaction.of(
|
843
|
+
txn_type=TransactionType.OVERWRITE,
|
844
|
+
txn_operations=txn_operations,
|
845
|
+
)
|
846
|
+
# when the transaction is committed
|
847
|
+
write_paths, txn_log_path = transaction.commit(temp_dir)
|
848
|
+
|
849
|
+
# expect two new stream metafiles to be written
|
850
|
+
# (i.e., delete old stream, create replacement stream)
|
851
|
+
assert len(write_paths) == 2
|
852
|
+
delete_write_path = write_paths[0]
|
853
|
+
create_write_path = write_paths[1]
|
854
|
+
|
855
|
+
# expect the replacement stream to be successfully written and read
|
856
|
+
assert TransactionOperationType.CREATE.value in create_write_path
|
857
|
+
actual_stream = Stream.read(create_write_path)
|
858
|
+
assert replacement_stream.equivalent_to(actual_stream)
|
859
|
+
|
860
|
+
# expect the delete metafile to also contain the replacement stream
|
861
|
+
assert TransactionOperationType.DELETE.value in delete_write_path
|
862
|
+
actual_stream = Stream.read(delete_write_path)
|
863
|
+
assert replacement_stream.equivalent_to(actual_stream)
|
864
|
+
|
865
|
+
# expect old child metafiles for the replaced stream to remain readable
|
866
|
+
child_metafiles_read_post_replace = [
|
867
|
+
Delta.read(commit_results[5][2]),
|
868
|
+
Partition.read(commit_results[4][2]),
|
869
|
+
]
|
870
|
+
# expect old child metafiles read to share the same parent table name as
|
871
|
+
# the replacement stream, but have a different parent stream ID
|
872
|
+
for metafile in child_metafiles_read_post_replace:
|
873
|
+
assert (
|
874
|
+
metafile.table_name
|
875
|
+
== replacement_stream.table_name
|
876
|
+
== original_stream.table_name
|
877
|
+
)
|
878
|
+
ancestor_ids = metafile.ancestor_ids(catalog_root=temp_dir)
|
879
|
+
parent_stream_id = ancestor_ids[3]
|
880
|
+
assert parent_stream_id == original_stream.id
|
881
|
+
|
882
|
+
# expect original child metafiles to share the original parent stream ID
|
883
|
+
original_child_metafiles_to_create = [
|
884
|
+
Delta(commit_results[5][0]),
|
885
|
+
Partition(commit_results[4][0]),
|
886
|
+
]
|
887
|
+
original_child_metafiles_created = [
|
888
|
+
Delta(commit_results[5][1]),
|
889
|
+
Partition(commit_results[4][1]),
|
890
|
+
]
|
891
|
+
for i in range(len(original_child_metafiles_to_create)):
|
892
|
+
ancestor_ids = metafile.ancestor_ids(catalog_root=temp_dir)
|
893
|
+
parent_stream_id = ancestor_ids[3]
|
894
|
+
assert parent_stream_id == original_stream.id
|
895
|
+
|
896
|
+
# expect a subsequent replace of the original stream to fail
|
897
|
+
bad_txn_operations = [
|
898
|
+
TransactionOperation.of(
|
899
|
+
operation_type=TransactionOperationType.UPDATE,
|
900
|
+
dest_metafile=replacement_stream,
|
901
|
+
src_metafile=original_stream,
|
902
|
+
)
|
903
|
+
]
|
904
|
+
transaction = Transaction.of(
|
905
|
+
txn_type=TransactionType.OVERWRITE,
|
906
|
+
txn_operations=bad_txn_operations,
|
907
|
+
)
|
908
|
+
with pytest.raises(ValueError):
|
909
|
+
transaction.commit(temp_dir)
|
910
|
+
|
911
|
+
# expect deletes of the original stream to fail
|
912
|
+
bad_txn_operations = [
|
913
|
+
TransactionOperation.of(
|
914
|
+
operation_type=TransactionOperationType.DELETE,
|
915
|
+
dest_metafile=original_stream,
|
916
|
+
)
|
917
|
+
]
|
918
|
+
transaction = Transaction.of(
|
919
|
+
txn_type=TransactionType.DELETE,
|
920
|
+
txn_operations=bad_txn_operations,
|
921
|
+
)
|
922
|
+
with pytest.raises(ValueError):
|
923
|
+
transaction.commit(temp_dir)
|
924
|
+
|
925
|
+
# expect new child metafile creation under the old stream to fail
|
926
|
+
for metafile in original_child_metafiles_created:
|
927
|
+
bad_txn_operations = [
|
928
|
+
TransactionOperation.of(
|
929
|
+
operation_type=TransactionOperationType.CREATE,
|
930
|
+
dest_metafile=metafile,
|
931
|
+
)
|
932
|
+
]
|
933
|
+
transaction = Transaction.of(
|
934
|
+
txn_type=TransactionType.APPEND,
|
935
|
+
txn_operations=bad_txn_operations,
|
936
|
+
)
|
937
|
+
with pytest.raises(ValueError):
|
938
|
+
transaction.commit(temp_dir)
|
939
|
+
|
940
|
+
def test_delete_table_version(self, temp_dir):
|
941
|
+
commit_results = _commit_single_delta_table(temp_dir)
|
942
|
+
for expected, actual, _ in commit_results:
|
943
|
+
assert expected.equivalent_to(actual)
|
944
|
+
original_table_version: TableVersion = commit_results[2][1]
|
945
|
+
|
946
|
+
txn_operations = [
|
947
|
+
TransactionOperation.of(
|
948
|
+
operation_type=TransactionOperationType.DELETE,
|
949
|
+
dest_metafile=original_table_version,
|
950
|
+
)
|
951
|
+
]
|
952
|
+
transaction = Transaction.of(
|
953
|
+
txn_type=TransactionType.DELETE,
|
954
|
+
txn_operations=txn_operations,
|
955
|
+
)
|
956
|
+
# when the transaction is committed
|
957
|
+
write_paths, txn_log_path = transaction.commit(temp_dir)
|
958
|
+
|
959
|
+
# expect 1 new table version metafile to be written
|
960
|
+
assert len(write_paths) == 1
|
961
|
+
delete_write_path = write_paths[0]
|
962
|
+
|
963
|
+
# expect the delete metafile to contain the input txn op dest_metafile
|
964
|
+
assert TransactionOperationType.DELETE.value in delete_write_path
|
965
|
+
actual_table_version = TableVersion.read(delete_write_path)
|
966
|
+
assert original_table_version.equivalent_to(actual_table_version)
|
967
|
+
|
968
|
+
# expect child metafiles in the deleted table version to remain readable and unchanged
|
969
|
+
child_metafiles_read_post_delete = [
|
970
|
+
Delta.read(commit_results[5][2]),
|
971
|
+
Partition.read(commit_results[4][2]),
|
972
|
+
Stream.read(commit_results[3][2]),
|
973
|
+
]
|
974
|
+
original_child_metafiles_to_create = [
|
975
|
+
Delta(commit_results[5][0]),
|
976
|
+
Partition(commit_results[4][0]),
|
977
|
+
Stream(commit_results[3][0]),
|
978
|
+
]
|
979
|
+
original_child_metafiles_created = [
|
980
|
+
Delta(commit_results[5][1]),
|
981
|
+
Partition(commit_results[4][1]),
|
982
|
+
Stream(commit_results[3][1]),
|
983
|
+
]
|
984
|
+
for i in range(len(original_child_metafiles_to_create)):
|
985
|
+
assert child_metafiles_read_post_delete[i].equivalent_to(
|
986
|
+
original_child_metafiles_to_create[i]
|
987
|
+
)
|
988
|
+
assert child_metafiles_read_post_delete[i].equivalent_to(
|
989
|
+
original_child_metafiles_created[i]
|
990
|
+
)
|
991
|
+
|
992
|
+
# expect a subsequent replace of the deleted table version to fail
|
993
|
+
replacement_table_version: TableVersion = TableVersion.based_on(
|
994
|
+
original_table_version,
|
995
|
+
new_id=original_table_version.id + "0",
|
996
|
+
)
|
997
|
+
bad_txn_operations = [
|
998
|
+
TransactionOperation.of(
|
999
|
+
operation_type=TransactionOperationType.UPDATE,
|
1000
|
+
dest_metafile=replacement_table_version,
|
1001
|
+
src_metafile=original_table_version,
|
1002
|
+
)
|
1003
|
+
]
|
1004
|
+
transaction = Transaction.of(
|
1005
|
+
txn_type=TransactionType.OVERWRITE,
|
1006
|
+
txn_operations=bad_txn_operations,
|
1007
|
+
)
|
1008
|
+
with pytest.raises(ValueError):
|
1009
|
+
transaction.commit(temp_dir)
|
1010
|
+
|
1011
|
+
# expect subsequent deletes of the deleted table version to fail
|
1012
|
+
bad_txn_operations = [
|
1013
|
+
TransactionOperation.of(
|
1014
|
+
operation_type=TransactionOperationType.DELETE,
|
1015
|
+
dest_metafile=original_table_version,
|
1016
|
+
)
|
1017
|
+
]
|
1018
|
+
transaction = Transaction.of(
|
1019
|
+
txn_type=TransactionType.DELETE,
|
1020
|
+
txn_operations=bad_txn_operations,
|
1021
|
+
)
|
1022
|
+
with pytest.raises(ValueError):
|
1023
|
+
transaction.commit(temp_dir)
|
1024
|
+
|
1025
|
+
# expect new child metafile creation under the deleted table version to fail
|
1026
|
+
for metafile in original_child_metafiles_created:
|
1027
|
+
bad_txn_operations = [
|
1028
|
+
TransactionOperation.of(
|
1029
|
+
operation_type=TransactionOperationType.CREATE,
|
1030
|
+
dest_metafile=metafile,
|
1031
|
+
)
|
1032
|
+
]
|
1033
|
+
transaction = Transaction.of(
|
1034
|
+
txn_type=TransactionType.APPEND,
|
1035
|
+
txn_operations=bad_txn_operations,
|
1036
|
+
)
|
1037
|
+
with pytest.raises(ValueError):
|
1038
|
+
transaction.commit(temp_dir)
|
1039
|
+
|
1040
|
+
def test_replace_table_version(self, temp_dir):
|
1041
|
+
commit_results = _commit_single_delta_table(temp_dir)
|
1042
|
+
for expected, actual, _ in commit_results:
|
1043
|
+
assert expected.equivalent_to(actual)
|
1044
|
+
original_table_version: TableVersion = commit_results[2][1]
|
1045
|
+
|
1046
|
+
# given a transaction containing a table version replacement
|
1047
|
+
replacement_table_version: TableVersion = TableVersion.based_on(
|
1048
|
+
original_table_version,
|
1049
|
+
new_id=original_table_version.id + "0",
|
1050
|
+
)
|
1051
|
+
|
1052
|
+
# expect the proposed replacement table version to be assigned a new ID
|
1053
|
+
assert replacement_table_version.id != original_table_version.id
|
1054
|
+
|
1055
|
+
txn_operations = [
|
1056
|
+
TransactionOperation.of(
|
1057
|
+
operation_type=TransactionOperationType.UPDATE,
|
1058
|
+
dest_metafile=replacement_table_version,
|
1059
|
+
src_metafile=original_table_version,
|
1060
|
+
)
|
1061
|
+
]
|
1062
|
+
transaction = Transaction.of(
|
1063
|
+
txn_type=TransactionType.OVERWRITE,
|
1064
|
+
txn_operations=txn_operations,
|
1065
|
+
)
|
1066
|
+
# when the transaction is committed
|
1067
|
+
write_paths, txn_log_path = transaction.commit(temp_dir)
|
1068
|
+
|
1069
|
+
# expect two new table version metafiles to be written
|
1070
|
+
# (i.e., delete old table version, create replacement table version)
|
1071
|
+
assert len(write_paths) == 2
|
1072
|
+
delete_write_path = write_paths[0]
|
1073
|
+
create_write_path = write_paths[1]
|
1074
|
+
|
1075
|
+
# expect the replacement table version to be successfully written and read
|
1076
|
+
assert TransactionOperationType.CREATE.value in create_write_path
|
1077
|
+
actual_table_version = TableVersion.read(create_write_path)
|
1078
|
+
assert replacement_table_version.equivalent_to(actual_table_version)
|
1079
|
+
|
1080
|
+
# expect the delete metafile to also contain the replacement table version
|
1081
|
+
assert TransactionOperationType.DELETE.value in delete_write_path
|
1082
|
+
actual_table_version = TableVersion.read(delete_write_path)
|
1083
|
+
assert replacement_table_version.equivalent_to(actual_table_version)
|
1084
|
+
|
1085
|
+
# expect old child metafiles for the replaced table version to remain readable
|
1086
|
+
child_metafiles_read_post_replace = [
|
1087
|
+
Delta.read(commit_results[5][2]),
|
1088
|
+
Partition.read(commit_results[4][2]),
|
1089
|
+
Stream.read(commit_results[3][2]),
|
1090
|
+
]
|
1091
|
+
# expect old child metafiles read to share the same parent table name as
|
1092
|
+
# the replacement table version, but have a different parent table
|
1093
|
+
# version ID
|
1094
|
+
for metafile in child_metafiles_read_post_replace:
|
1095
|
+
assert (
|
1096
|
+
metafile.table_name
|
1097
|
+
== replacement_table_version.table_name
|
1098
|
+
== original_table_version.table_name
|
1099
|
+
)
|
1100
|
+
ancestor_ids = metafile.ancestor_ids(catalog_root=temp_dir)
|
1101
|
+
parent_table_version_id = ancestor_ids[2]
|
1102
|
+
assert parent_table_version_id == original_table_version.id
|
1103
|
+
|
1104
|
+
# expect original child metafiles to share the original parent table version ID
|
1105
|
+
original_child_metafiles_to_create = [
|
1106
|
+
Delta(commit_results[5][0]),
|
1107
|
+
Partition(commit_results[4][0]),
|
1108
|
+
Stream(commit_results[3][0]),
|
1109
|
+
]
|
1110
|
+
original_child_metafiles_created = [
|
1111
|
+
Delta(commit_results[5][1]),
|
1112
|
+
Partition(commit_results[4][1]),
|
1113
|
+
Stream(commit_results[3][1]),
|
1114
|
+
]
|
1115
|
+
for i in range(len(original_child_metafiles_to_create)):
|
1116
|
+
ancestor_ids = metafile.ancestor_ids(catalog_root=temp_dir)
|
1117
|
+
parent_table_version_id = ancestor_ids[2]
|
1118
|
+
assert parent_table_version_id == original_table_version.id
|
1119
|
+
|
1120
|
+
# expect a subsequent replace of the original table version to fail
|
1121
|
+
bad_txn_operations = [
|
1122
|
+
TransactionOperation.of(
|
1123
|
+
operation_type=TransactionOperationType.UPDATE,
|
1124
|
+
dest_metafile=replacement_table_version,
|
1125
|
+
src_metafile=original_table_version,
|
1126
|
+
)
|
1127
|
+
]
|
1128
|
+
transaction = Transaction.of(
|
1129
|
+
txn_type=TransactionType.OVERWRITE,
|
1130
|
+
txn_operations=bad_txn_operations,
|
1131
|
+
)
|
1132
|
+
with pytest.raises(ValueError):
|
1133
|
+
transaction.commit(temp_dir)
|
1134
|
+
|
1135
|
+
# expect deletes of the original table version to fail
|
1136
|
+
bad_txn_operations = [
|
1137
|
+
TransactionOperation.of(
|
1138
|
+
operation_type=TransactionOperationType.DELETE,
|
1139
|
+
dest_metafile=original_table_version,
|
1140
|
+
)
|
1141
|
+
]
|
1142
|
+
transaction = Transaction.of(
|
1143
|
+
txn_type=TransactionType.DELETE,
|
1144
|
+
txn_operations=bad_txn_operations,
|
1145
|
+
)
|
1146
|
+
with pytest.raises(ValueError):
|
1147
|
+
transaction.commit(temp_dir)
|
1148
|
+
|
1149
|
+
# expect new child metafile creation under the old table version to fail
|
1150
|
+
for metafile in original_child_metafiles_created:
|
1151
|
+
bad_txn_operations = [
|
1152
|
+
TransactionOperation.of(
|
1153
|
+
operation_type=TransactionOperationType.CREATE,
|
1154
|
+
dest_metafile=metafile,
|
1155
|
+
)
|
1156
|
+
]
|
1157
|
+
transaction = Transaction.of(
|
1158
|
+
txn_type=TransactionType.APPEND,
|
1159
|
+
txn_operations=bad_txn_operations,
|
1160
|
+
)
|
1161
|
+
with pytest.raises(ValueError):
|
1162
|
+
transaction.commit(temp_dir)
|
1163
|
+
|
1164
|
+
def test_delete_table(self, temp_dir):
|
1165
|
+
commit_results = _commit_single_delta_table(temp_dir)
|
1166
|
+
for expected, actual, _ in commit_results:
|
1167
|
+
assert expected.equivalent_to(actual)
|
1168
|
+
original_table: Table = commit_results[1][1]
|
1169
|
+
|
1170
|
+
txn_operations = [
|
1171
|
+
TransactionOperation.of(
|
1172
|
+
operation_type=TransactionOperationType.DELETE,
|
1173
|
+
dest_metafile=original_table,
|
1174
|
+
)
|
1175
|
+
]
|
1176
|
+
transaction = Transaction.of(
|
1177
|
+
txn_type=TransactionType.DELETE,
|
1178
|
+
txn_operations=txn_operations,
|
1179
|
+
)
|
1180
|
+
# when the transaction is committed
|
1181
|
+
write_paths, txn_log_path = transaction.commit(temp_dir)
|
1182
|
+
|
1183
|
+
# expect 1 new table metafile to be written
|
1184
|
+
assert len(write_paths) == 1
|
1185
|
+
delete_write_path = write_paths[0]
|
1186
|
+
|
1187
|
+
# expect the delete metafile to contain the input txn op dest_metafile
|
1188
|
+
assert TransactionOperationType.DELETE.value in delete_write_path
|
1189
|
+
actual_table = Table.read(delete_write_path)
|
1190
|
+
assert original_table.equivalent_to(actual_table)
|
1191
|
+
|
1192
|
+
# expect child metafiles in the deleted table to remain readable and unchanged
|
1193
|
+
child_metafiles_read_post_delete = [
|
1194
|
+
Delta.read(commit_results[5][2]),
|
1195
|
+
Partition.read(commit_results[4][2]),
|
1196
|
+
Stream.read(commit_results[3][2]),
|
1197
|
+
TableVersion.read(commit_results[2][2]),
|
1198
|
+
]
|
1199
|
+
original_child_metafiles_to_create = [
|
1200
|
+
Delta(commit_results[5][0]),
|
1201
|
+
Partition(commit_results[4][0]),
|
1202
|
+
Stream(commit_results[3][0]),
|
1203
|
+
TableVersion(commit_results[2][0]),
|
1204
|
+
]
|
1205
|
+
original_child_metafiles_created = [
|
1206
|
+
Delta(commit_results[5][1]),
|
1207
|
+
Partition(commit_results[4][1]),
|
1208
|
+
Stream(commit_results[3][1]),
|
1209
|
+
TableVersion(commit_results[2][1]),
|
1210
|
+
]
|
1211
|
+
for i in range(len(original_child_metafiles_to_create)):
|
1212
|
+
assert child_metafiles_read_post_delete[i].equivalent_to(
|
1213
|
+
original_child_metafiles_to_create[i]
|
1214
|
+
)
|
1215
|
+
assert child_metafiles_read_post_delete[i].equivalent_to(
|
1216
|
+
original_child_metafiles_created[i]
|
1217
|
+
)
|
1218
|
+
|
1219
|
+
# expect a subsequent replace of the deleted table to fail
|
1220
|
+
replacement_table: Table = Table.based_on(original_table)
|
1221
|
+
bad_txn_operations = [
|
1222
|
+
TransactionOperation.of(
|
1223
|
+
operation_type=TransactionOperationType.UPDATE,
|
1224
|
+
dest_metafile=replacement_table,
|
1225
|
+
src_metafile=original_table,
|
1226
|
+
)
|
1227
|
+
]
|
1228
|
+
transaction = Transaction.of(
|
1229
|
+
txn_type=TransactionType.OVERWRITE,
|
1230
|
+
txn_operations=bad_txn_operations,
|
1231
|
+
)
|
1232
|
+
with pytest.raises(ValueError):
|
1233
|
+
transaction.commit(temp_dir)
|
1234
|
+
|
1235
|
+
# expect subsequent deletes of the deleted table to fail
|
1236
|
+
bad_txn_operations = [
|
1237
|
+
TransactionOperation.of(
|
1238
|
+
operation_type=TransactionOperationType.DELETE,
|
1239
|
+
dest_metafile=original_table,
|
1240
|
+
)
|
1241
|
+
]
|
1242
|
+
transaction = Transaction.of(
|
1243
|
+
txn_type=TransactionType.DELETE,
|
1244
|
+
txn_operations=bad_txn_operations,
|
1245
|
+
)
|
1246
|
+
with pytest.raises(ValueError):
|
1247
|
+
transaction.commit(temp_dir)
|
1248
|
+
|
1249
|
+
# expect new child metafile creation under the deleted table to fail
|
1250
|
+
for metafile in original_child_metafiles_created:
|
1251
|
+
bad_txn_operations = [
|
1252
|
+
TransactionOperation.of(
|
1253
|
+
operation_type=TransactionOperationType.CREATE,
|
1254
|
+
dest_metafile=metafile,
|
1255
|
+
)
|
1256
|
+
]
|
1257
|
+
transaction = Transaction.of(
|
1258
|
+
txn_type=TransactionType.APPEND,
|
1259
|
+
txn_operations=bad_txn_operations,
|
1260
|
+
)
|
1261
|
+
with pytest.raises(ValueError):
|
1262
|
+
transaction.commit(temp_dir)
|
1263
|
+
|
1264
|
+
def test_replace_table(self, temp_dir):
|
1265
|
+
commit_results = _commit_single_delta_table(temp_dir)
|
1266
|
+
for expected, actual, _ in commit_results:
|
1267
|
+
assert expected.equivalent_to(actual)
|
1268
|
+
original_table: Table = commit_results[1][1]
|
1269
|
+
|
1270
|
+
# given a transaction containing a table replacement
|
1271
|
+
replacement_table: Table = Table.based_on(original_table)
|
1272
|
+
|
1273
|
+
# expect the proposed replacement table to be assigned a new ID, but
|
1274
|
+
# continue to have the same name as the original table
|
1275
|
+
assert replacement_table.id != original_table.id
|
1276
|
+
assert replacement_table.table_name == original_table.table_name
|
1277
|
+
|
1278
|
+
txn_operations = [
|
1279
|
+
TransactionOperation.of(
|
1280
|
+
operation_type=TransactionOperationType.UPDATE,
|
1281
|
+
dest_metafile=replacement_table,
|
1282
|
+
src_metafile=original_table,
|
1283
|
+
)
|
1284
|
+
]
|
1285
|
+
transaction = Transaction.of(
|
1286
|
+
txn_type=TransactionType.OVERWRITE,
|
1287
|
+
txn_operations=txn_operations,
|
1288
|
+
)
|
1289
|
+
# when the transaction is committed
|
1290
|
+
write_paths, txn_log_path = transaction.commit(temp_dir)
|
1291
|
+
|
1292
|
+
# expect two new table metafiles to be written
|
1293
|
+
# (i.e., delete old table, create replacement table)
|
1294
|
+
assert len(write_paths) == 2
|
1295
|
+
delete_write_path = write_paths[0]
|
1296
|
+
create_write_path = write_paths[1]
|
1297
|
+
|
1298
|
+
# expect the replacement table to be successfully written and read
|
1299
|
+
assert TransactionOperationType.CREATE.value in create_write_path
|
1300
|
+
actual_table = Table.read(create_write_path)
|
1301
|
+
assert replacement_table.equivalent_to(actual_table)
|
1302
|
+
|
1303
|
+
# expect the delete metafile to also contain the replacement table
|
1304
|
+
assert TransactionOperationType.DELETE.value in delete_write_path
|
1305
|
+
actual_table = Table.read(delete_write_path)
|
1306
|
+
assert replacement_table.equivalent_to(actual_table)
|
1307
|
+
|
1308
|
+
# expect old child metafiles for the replaced table to remain readable
|
1309
|
+
child_metafiles_read_post_replace = [
|
1310
|
+
Delta.read(commit_results[5][2]),
|
1311
|
+
Partition.read(commit_results[4][2]),
|
1312
|
+
Stream.read(commit_results[3][2]),
|
1313
|
+
TableVersion.read(commit_results[2][2]),
|
1314
|
+
]
|
1315
|
+
# expect old child metafiles read to share the same parent table name as
|
1316
|
+
# the replacement table, but have a different parent table ID
|
1317
|
+
for metafile in child_metafiles_read_post_replace:
|
1318
|
+
assert (
|
1319
|
+
metafile.table_name
|
1320
|
+
== replacement_table.table_name
|
1321
|
+
== original_table.table_name
|
1322
|
+
)
|
1323
|
+
ancestor_ids = metafile.ancestor_ids(catalog_root=temp_dir)
|
1324
|
+
parent_table_id = ancestor_ids[1]
|
1325
|
+
assert parent_table_id == original_table.id
|
1326
|
+
|
1327
|
+
# expect original child metafiles to share the original parent table ID
|
1328
|
+
original_child_metafiles_to_create = [
|
1329
|
+
Delta(commit_results[5][0]),
|
1330
|
+
Partition(commit_results[4][0]),
|
1331
|
+
Stream(commit_results[3][0]),
|
1332
|
+
TableVersion(commit_results[2][0]),
|
1333
|
+
]
|
1334
|
+
original_child_metafiles_created = [
|
1335
|
+
Delta(commit_results[5][1]),
|
1336
|
+
Partition(commit_results[4][1]),
|
1337
|
+
Stream(commit_results[3][1]),
|
1338
|
+
TableVersion(commit_results[2][1]),
|
1339
|
+
]
|
1340
|
+
for i in range(len(original_child_metafiles_to_create)):
|
1341
|
+
ancestor_ids = metafile.ancestor_ids(catalog_root=temp_dir)
|
1342
|
+
parent_table_id = ancestor_ids[1]
|
1343
|
+
assert parent_table_id == original_table.id
|
1344
|
+
|
1345
|
+
# expect a subsequent table replace of the original table to fail
|
1346
|
+
bad_txn_operations = [
|
1347
|
+
TransactionOperation.of(
|
1348
|
+
operation_type=TransactionOperationType.UPDATE,
|
1349
|
+
dest_metafile=replacement_table,
|
1350
|
+
src_metafile=original_table,
|
1351
|
+
)
|
1352
|
+
]
|
1353
|
+
transaction = Transaction.of(
|
1354
|
+
txn_type=TransactionType.OVERWRITE,
|
1355
|
+
txn_operations=bad_txn_operations,
|
1356
|
+
)
|
1357
|
+
with pytest.raises(ValueError):
|
1358
|
+
transaction.commit(temp_dir)
|
1359
|
+
|
1360
|
+
# expect table deletes of the original table to fail
|
1361
|
+
bad_txn_operations = [
|
1362
|
+
TransactionOperation.of(
|
1363
|
+
operation_type=TransactionOperationType.DELETE,
|
1364
|
+
dest_metafile=original_table,
|
1365
|
+
)
|
1366
|
+
]
|
1367
|
+
transaction = Transaction.of(
|
1368
|
+
txn_type=TransactionType.DELETE,
|
1369
|
+
txn_operations=bad_txn_operations,
|
1370
|
+
)
|
1371
|
+
with pytest.raises(ValueError):
|
1372
|
+
transaction.commit(temp_dir)
|
1373
|
+
|
1374
|
+
# expect new child metafile creation under the old table to fail
|
1375
|
+
for metafile in original_child_metafiles_created:
|
1376
|
+
bad_txn_operations = [
|
1377
|
+
TransactionOperation.of(
|
1378
|
+
operation_type=TransactionOperationType.CREATE,
|
1379
|
+
dest_metafile=metafile,
|
1380
|
+
)
|
1381
|
+
]
|
1382
|
+
transaction = Transaction.of(
|
1383
|
+
txn_type=TransactionType.APPEND,
|
1384
|
+
txn_operations=bad_txn_operations,
|
1385
|
+
)
|
1386
|
+
with pytest.raises(ValueError):
|
1387
|
+
transaction.commit(temp_dir)
|
1388
|
+
|
1389
|
+
def test_delete_namespace(self, temp_dir):
|
1390
|
+
commit_results = _commit_single_delta_table(temp_dir)
|
1391
|
+
for expected, actual, _ in commit_results:
|
1392
|
+
assert expected.equivalent_to(actual)
|
1393
|
+
original_namespace: Namespace = commit_results[0][1]
|
1394
|
+
|
1395
|
+
txn_operations = [
|
1396
|
+
TransactionOperation.of(
|
1397
|
+
operation_type=TransactionOperationType.DELETE,
|
1398
|
+
dest_metafile=original_namespace,
|
1399
|
+
)
|
1400
|
+
]
|
1401
|
+
transaction = Transaction.of(
|
1402
|
+
txn_type=TransactionType.DELETE,
|
1403
|
+
txn_operations=txn_operations,
|
1404
|
+
)
|
1405
|
+
# when the transaction is committed
|
1406
|
+
write_paths, txn_log_path = transaction.commit(temp_dir)
|
1407
|
+
|
1408
|
+
# expect 1 new namespace metafile to be written
|
1409
|
+
assert len(write_paths) == 1
|
1410
|
+
delete_write_path = write_paths[0]
|
1411
|
+
|
1412
|
+
# expect the delete metafile to contain the input txn op dest_metafile
|
1413
|
+
assert TransactionOperationType.DELETE.value in delete_write_path
|
1414
|
+
actual_namespace = Namespace.read(delete_write_path)
|
1415
|
+
assert original_namespace.equivalent_to(actual_namespace)
|
1416
|
+
|
1417
|
+
# expect child metafiles in the deleted namespace to remain readable and unchanged
|
1418
|
+
child_metafiles_read_post_delete = [
|
1419
|
+
Delta.read(commit_results[5][2]),
|
1420
|
+
Partition.read(commit_results[4][2]),
|
1421
|
+
Stream.read(commit_results[3][2]),
|
1422
|
+
TableVersion.read(commit_results[2][2]),
|
1423
|
+
Table.read(commit_results[1][2]),
|
1424
|
+
]
|
1425
|
+
original_child_metafiles_to_create = [
|
1426
|
+
Delta(commit_results[5][0]),
|
1427
|
+
Partition(commit_results[4][0]),
|
1428
|
+
Stream(commit_results[3][0]),
|
1429
|
+
TableVersion(commit_results[2][0]),
|
1430
|
+
Table(commit_results[1][0]),
|
1431
|
+
]
|
1432
|
+
original_child_metafiles_created = [
|
1433
|
+
Delta(commit_results[5][1]),
|
1434
|
+
Partition(commit_results[4][1]),
|
1435
|
+
Stream(commit_results[3][1]),
|
1436
|
+
TableVersion(commit_results[2][1]),
|
1437
|
+
Table(commit_results[1][1]),
|
1438
|
+
]
|
1439
|
+
for i in range(len(original_child_metafiles_to_create)):
|
1440
|
+
assert child_metafiles_read_post_delete[i].equivalent_to(
|
1441
|
+
original_child_metafiles_to_create[i]
|
1442
|
+
)
|
1443
|
+
assert child_metafiles_read_post_delete[i].equivalent_to(
|
1444
|
+
original_child_metafiles_created[i]
|
1445
|
+
)
|
1446
|
+
|
1447
|
+
# expect a subsequent replace of the deleted namespace to fail
|
1448
|
+
replacement_namespace: Namespace = Namespace.based_on(original_namespace)
|
1449
|
+
bad_txn_operations = [
|
1450
|
+
TransactionOperation.of(
|
1451
|
+
operation_type=TransactionOperationType.UPDATE,
|
1452
|
+
dest_metafile=replacement_namespace,
|
1453
|
+
src_metafile=original_namespace,
|
1454
|
+
)
|
1455
|
+
]
|
1456
|
+
transaction = Transaction.of(
|
1457
|
+
txn_type=TransactionType.OVERWRITE,
|
1458
|
+
txn_operations=bad_txn_operations,
|
1459
|
+
)
|
1460
|
+
with pytest.raises(ValueError):
|
1461
|
+
transaction.commit(temp_dir)
|
1462
|
+
|
1463
|
+
# expect subsequent deletes of the deleted namespace to fail
|
1464
|
+
bad_txn_operations = [
|
1465
|
+
TransactionOperation.of(
|
1466
|
+
operation_type=TransactionOperationType.DELETE,
|
1467
|
+
dest_metafile=original_namespace,
|
1468
|
+
)
|
1469
|
+
]
|
1470
|
+
transaction = Transaction.of(
|
1471
|
+
txn_type=TransactionType.DELETE,
|
1472
|
+
txn_operations=bad_txn_operations,
|
1473
|
+
)
|
1474
|
+
with pytest.raises(ValueError):
|
1475
|
+
transaction.commit(temp_dir)
|
1476
|
+
|
1477
|
+
# expect new child metafile creation under the deleted namespace to fail
|
1478
|
+
for metafile in original_child_metafiles_created:
|
1479
|
+
bad_txn_operations = [
|
1480
|
+
TransactionOperation.of(
|
1481
|
+
operation_type=TransactionOperationType.CREATE,
|
1482
|
+
dest_metafile=metafile,
|
1483
|
+
)
|
1484
|
+
]
|
1485
|
+
transaction = Transaction.of(
|
1486
|
+
txn_type=TransactionType.APPEND,
|
1487
|
+
txn_operations=bad_txn_operations,
|
1488
|
+
)
|
1489
|
+
with pytest.raises(ValueError):
|
1490
|
+
transaction.commit(temp_dir)
|
1491
|
+
|
1492
|
+
def test_replace_namespace(self, temp_dir):
|
1493
|
+
commit_results = _commit_single_delta_table(temp_dir)
|
1494
|
+
for expected, actual, _ in commit_results:
|
1495
|
+
assert expected.equivalent_to(actual)
|
1496
|
+
original_namespace: Namespace = commit_results[0][1]
|
1497
|
+
|
1498
|
+
# given a transaction containing a namespace replacement
|
1499
|
+
replacement_namespace: Namespace = Namespace.based_on(original_namespace)
|
1500
|
+
|
1501
|
+
# expect the proposed replacement namespace to be assigned a new ID, but
|
1502
|
+
# continue to have the same name as the original namespace
|
1503
|
+
assert replacement_namespace.id != original_namespace.id
|
1504
|
+
assert replacement_namespace.namespace == original_namespace.namespace
|
1505
|
+
|
1506
|
+
txn_operations = [
|
1507
|
+
TransactionOperation.of(
|
1508
|
+
operation_type=TransactionOperationType.UPDATE,
|
1509
|
+
dest_metafile=replacement_namespace,
|
1510
|
+
src_metafile=original_namespace,
|
1511
|
+
)
|
1512
|
+
]
|
1513
|
+
transaction = Transaction.of(
|
1514
|
+
txn_type=TransactionType.OVERWRITE,
|
1515
|
+
txn_operations=txn_operations,
|
1516
|
+
)
|
1517
|
+
# when the transaction is committed
|
1518
|
+
write_paths, txn_log_path = transaction.commit(temp_dir)
|
1519
|
+
|
1520
|
+
# expect two new namespace metafiles to be written
|
1521
|
+
# (i.e., delete old namespace, create replacement namespace)
|
1522
|
+
assert len(write_paths) == 2
|
1523
|
+
delete_write_path = write_paths[0]
|
1524
|
+
create_write_path = write_paths[1]
|
1525
|
+
|
1526
|
+
# expect the replacement namespace to be successfully written and read
|
1527
|
+
assert TransactionOperationType.CREATE.value in create_write_path
|
1528
|
+
actual_namespace = Namespace.read(create_write_path)
|
1529
|
+
assert replacement_namespace.equivalent_to(actual_namespace)
|
1530
|
+
|
1531
|
+
# expect the delete metafile to also contain the replacement namespace
|
1532
|
+
assert TransactionOperationType.DELETE.value in delete_write_path
|
1533
|
+
actual_namespace = Namespace.read(delete_write_path)
|
1534
|
+
assert replacement_namespace.equivalent_to(actual_namespace)
|
1535
|
+
|
1536
|
+
# expect old child metafiles for the replaced namespace to remain readable
|
1537
|
+
child_metafiles_read_post_replace = [
|
1538
|
+
Delta.read(commit_results[5][2]),
|
1539
|
+
Partition.read(commit_results[4][2]),
|
1540
|
+
Stream.read(commit_results[3][2]),
|
1541
|
+
TableVersion.read(commit_results[2][2]),
|
1542
|
+
Table.read(commit_results[1][2]),
|
1543
|
+
]
|
1544
|
+
# expect old child metafiles read to share the same parent namespace name as
|
1545
|
+
# the replacement namespace, but have a different parent namespace ID
|
1546
|
+
for metafile in child_metafiles_read_post_replace:
|
1547
|
+
assert (
|
1548
|
+
metafile.namespace
|
1549
|
+
== replacement_namespace.namespace
|
1550
|
+
== original_namespace.namespace
|
1551
|
+
)
|
1552
|
+
ancestor_ids = metafile.ancestor_ids(catalog_root=temp_dir)
|
1553
|
+
parent_namespace_id = ancestor_ids[0]
|
1554
|
+
assert parent_namespace_id == original_namespace.id
|
1555
|
+
|
1556
|
+
# expect original child metafiles to share the original parent namespace ID
|
1557
|
+
original_child_metafiles_to_create = [
|
1558
|
+
Delta(commit_results[5][0]),
|
1559
|
+
Partition(commit_results[4][0]),
|
1560
|
+
Stream(commit_results[3][0]),
|
1561
|
+
TableVersion(commit_results[2][0]),
|
1562
|
+
Table(commit_results[1][0]),
|
1563
|
+
]
|
1564
|
+
original_child_metafiles_created = [
|
1565
|
+
Delta(commit_results[5][1]),
|
1566
|
+
Partition(commit_results[4][1]),
|
1567
|
+
Stream(commit_results[3][1]),
|
1568
|
+
TableVersion(commit_results[2][1]),
|
1569
|
+
Table(commit_results[1][1]),
|
1570
|
+
]
|
1571
|
+
for i in range(len(original_child_metafiles_to_create)):
|
1572
|
+
ancestor_ids = metafile.ancestor_ids(catalog_root=temp_dir)
|
1573
|
+
parent_namespace_id = ancestor_ids[0]
|
1574
|
+
assert parent_namespace_id == original_namespace.id
|
1575
|
+
|
1576
|
+
# expect a subsequent namespace replace of the original namespace to fail
|
1577
|
+
bad_txn_operations = [
|
1578
|
+
TransactionOperation.of(
|
1579
|
+
operation_type=TransactionOperationType.UPDATE,
|
1580
|
+
dest_metafile=replacement_namespace,
|
1581
|
+
src_metafile=original_namespace,
|
1582
|
+
)
|
1583
|
+
]
|
1584
|
+
transaction = Transaction.of(
|
1585
|
+
txn_type=TransactionType.OVERWRITE,
|
1586
|
+
txn_operations=bad_txn_operations,
|
1587
|
+
)
|
1588
|
+
with pytest.raises(ValueError):
|
1589
|
+
transaction.commit(temp_dir)
|
1590
|
+
|
1591
|
+
# expect namespace deletes of the original namespace to fail
|
1592
|
+
bad_txn_operations = [
|
1593
|
+
TransactionOperation.of(
|
1594
|
+
operation_type=TransactionOperationType.DELETE,
|
1595
|
+
dest_metafile=original_namespace,
|
1596
|
+
)
|
1597
|
+
]
|
1598
|
+
transaction = Transaction.of(
|
1599
|
+
txn_type=TransactionType.DELETE,
|
1600
|
+
txn_operations=bad_txn_operations,
|
1601
|
+
)
|
1602
|
+
with pytest.raises(ValueError):
|
1603
|
+
transaction.commit(temp_dir)
|
1604
|
+
|
1605
|
+
# expect new child metafile creation under the old namespace to fail
|
1606
|
+
for metafile in original_child_metafiles_created:
|
1607
|
+
bad_txn_operations = [
|
1608
|
+
TransactionOperation.of(
|
1609
|
+
operation_type=TransactionOperationType.CREATE,
|
1610
|
+
dest_metafile=metafile,
|
1611
|
+
)
|
1612
|
+
]
|
1613
|
+
transaction = Transaction.of(
|
1614
|
+
txn_type=TransactionType.APPEND,
|
1615
|
+
txn_operations=bad_txn_operations,
|
1616
|
+
)
|
1617
|
+
with pytest.raises(ValueError):
|
1618
|
+
transaction.commit(temp_dir)
|
1619
|
+
|
1620
|
+
def test_create_stream_bad_order_txn_op_chaining(self, temp_dir):
|
1621
|
+
commit_results = _commit_single_delta_table(temp_dir)
|
1622
|
+
for expected, actual, _ in commit_results:
|
1623
|
+
assert expected.equivalent_to(actual)
|
1624
|
+
# given a transaction containing:
|
1625
|
+
|
1626
|
+
# 1. a new table version in an existing table
|
1627
|
+
original_table_version_created = TableVersion(commit_results[2][1])
|
1628
|
+
new_table_version: TableVersion = TableVersion.based_on(
|
1629
|
+
other=original_table_version_created,
|
1630
|
+
new_id=original_table_version_created.id + "0",
|
1631
|
+
)
|
1632
|
+
# 2. a new stream in the new table version
|
1633
|
+
original_stream_created = Stream(commit_results[3][1])
|
1634
|
+
new_stream: Stream = Stream.based_on(
|
1635
|
+
other=original_stream_created,
|
1636
|
+
new_id="test_stream_id",
|
1637
|
+
)
|
1638
|
+
new_stream.table_version_locator.table_version = new_table_version.table_version
|
1639
|
+
|
1640
|
+
# 3. ordered transaction operations that try to put the new stream
|
1641
|
+
# in the new table version before it is created
|
1642
|
+
txn_operations = [
|
1643
|
+
TransactionOperation.of(
|
1644
|
+
TransactionOperationType.CREATE,
|
1645
|
+
new_stream,
|
1646
|
+
),
|
1647
|
+
TransactionOperation.of(
|
1648
|
+
TransactionOperationType.CREATE,
|
1649
|
+
new_table_version,
|
1650
|
+
),
|
1651
|
+
]
|
1652
|
+
transaction = Transaction.of(
|
1653
|
+
txn_type=TransactionType.APPEND,
|
1654
|
+
txn_operations=txn_operations,
|
1655
|
+
)
|
1656
|
+
# when the transaction is committed,
|
1657
|
+
# expect stream creation to fail
|
1658
|
+
with pytest.raises(ValueError):
|
1659
|
+
transaction.commit(temp_dir)
|
1660
|
+
# when a transaction with the operations reversed is committed,
|
1661
|
+
transaction = Transaction.of(
|
1662
|
+
txn_type=TransactionType.APPEND,
|
1663
|
+
txn_operations=list(reversed(txn_operations)),
|
1664
|
+
)
|
1665
|
+
# expect table version and stream creation to succeed
|
1666
|
+
write_paths, txn_log_path = transaction.commit(temp_dir)
|
1667
|
+
assert len(write_paths) == 2
|
1668
|
+
|
1669
|
+
def test_table_rename_bad_order_txn_op_chaining(self, temp_dir):
|
1670
|
+
commit_results = _commit_single_delta_table(temp_dir)
|
1671
|
+
for expected, actual, _ in commit_results:
|
1672
|
+
assert expected.equivalent_to(actual)
|
1673
|
+
original_table: Table = commit_results[1][1]
|
1674
|
+
# given a transaction containing:
|
1675
|
+
# 1. a table rename
|
1676
|
+
renamed_table: Table = Table.update_for(original_table)
|
1677
|
+
renamed_table.locator = TableLocator.at(
|
1678
|
+
namespace="test_namespace",
|
1679
|
+
table_name="test_table_renamed",
|
1680
|
+
)
|
1681
|
+
# 2. a new table version in a renamed table
|
1682
|
+
original_table_version_created = TableVersion(commit_results[2][1])
|
1683
|
+
new_table_version_to_create: TableVersion = TableVersion.based_on(
|
1684
|
+
other=original_table_version_created,
|
1685
|
+
new_id=original_table_version_created.id + "0",
|
1686
|
+
)
|
1687
|
+
new_table_version_to_create.table_locator.table_name = renamed_table.table_name
|
1688
|
+
# 3. ordered transaction operations that try to put the new table
|
1689
|
+
# version in the renamed table before the table is renamed
|
1690
|
+
txn_operations = [
|
1691
|
+
TransactionOperation.of(
|
1692
|
+
TransactionOperationType.CREATE,
|
1693
|
+
new_table_version_to_create,
|
1694
|
+
),
|
1695
|
+
TransactionOperation.of(
|
1696
|
+
TransactionOperationType.UPDATE,
|
1697
|
+
renamed_table,
|
1698
|
+
original_table,
|
1699
|
+
),
|
1700
|
+
]
|
1701
|
+
transaction = Transaction.of(
|
1702
|
+
txn_type=TransactionType.ALTER,
|
1703
|
+
txn_operations=txn_operations,
|
1704
|
+
)
|
1705
|
+
# when the transaction is committed,
|
1706
|
+
# expect the transaction to fail due to incorrect operation order
|
1707
|
+
with pytest.raises(ValueError):
|
1708
|
+
transaction.commit(temp_dir)
|
1709
|
+
# when a transaction with the operations reversed is committed,
|
1710
|
+
transaction = Transaction.of(
|
1711
|
+
txn_type=TransactionType.ALTER,
|
1712
|
+
txn_operations=list(reversed(txn_operations)),
|
1713
|
+
)
|
1714
|
+
# expect table and table version creation to succeed
|
1715
|
+
write_paths, txn_log_path = transaction.commit(temp_dir)
|
1716
|
+
assert len(write_paths) == 2
|
1717
|
+
|
1718
|
+
def test_create_duplicate_namespace(self, temp_dir):
|
1719
|
+
namespace_locator = NamespaceLocator.of(namespace="test_namespace")
|
1720
|
+
namespace = Namespace.of(locator=namespace_locator)
|
1721
|
+
# given serial transaction that try to create two namespaces with
|
1722
|
+
# the same name
|
1723
|
+
transaction = Transaction.of(
|
1724
|
+
txn_type=TransactionType.APPEND,
|
1725
|
+
txn_operations=[
|
1726
|
+
TransactionOperation.of(
|
1727
|
+
TransactionOperationType.CREATE,
|
1728
|
+
namespace,
|
1729
|
+
),
|
1730
|
+
],
|
1731
|
+
)
|
1732
|
+
# expect the first transaction to be successfully committed
|
1733
|
+
write_paths, txn_log_path = transaction.commit(temp_dir)
|
1734
|
+
deserialized_namespace = Namespace.read(write_paths.pop())
|
1735
|
+
assert namespace.equivalent_to(deserialized_namespace)
|
1736
|
+
# but expect the second transaction to fail
|
1737
|
+
with pytest.raises(ValueError):
|
1738
|
+
transaction.commit(temp_dir)
|
1739
|
+
|
1740
|
+
def test_create_duplicate_namespace_txn_op_chaining(self, temp_dir):
|
1741
|
+
namespace_locator = NamespaceLocator.of(namespace="test_namespace")
|
1742
|
+
namespace = Namespace.of(locator=namespace_locator)
|
1743
|
+
# given a transaction that tries to create the same namespace twice
|
1744
|
+
transaction = Transaction.of(
|
1745
|
+
txn_type=TransactionType.APPEND,
|
1746
|
+
txn_operations=[
|
1747
|
+
TransactionOperation.of(
|
1748
|
+
TransactionOperationType.CREATE,
|
1749
|
+
namespace,
|
1750
|
+
),
|
1751
|
+
TransactionOperation.of(
|
1752
|
+
TransactionOperationType.CREATE,
|
1753
|
+
namespace,
|
1754
|
+
),
|
1755
|
+
],
|
1756
|
+
)
|
1757
|
+
# when the transaction is committed,
|
1758
|
+
# expect duplicate namespace creation to fail
|
1759
|
+
with pytest.raises(ValueError):
|
1760
|
+
transaction.commit(temp_dir)
|
1761
|
+
|
1762
|
+
def test_create_stream_in_missing_table_version(self, temp_dir):
|
1763
|
+
commit_results = _commit_single_delta_table(temp_dir)
|
1764
|
+
for expected, actual, _ in commit_results:
|
1765
|
+
assert expected.equivalent_to(actual)
|
1766
|
+
# given a transaction that tries to create a single stream
|
1767
|
+
# in a table version that doesn't exist
|
1768
|
+
original_stream_created = Stream(commit_results[3][1])
|
1769
|
+
new_stream: Stream = Stream.based_on(
|
1770
|
+
other=original_stream_created,
|
1771
|
+
new_id="test_stream_id",
|
1772
|
+
)
|
1773
|
+
new_stream.table_version_locator.table_version = "missing_table_version.0"
|
1774
|
+
transaction = Transaction.of(
|
1775
|
+
txn_type=TransactionType.APPEND,
|
1776
|
+
txn_operations=[
|
1777
|
+
TransactionOperation.of(
|
1778
|
+
TransactionOperationType.CREATE,
|
1779
|
+
new_stream,
|
1780
|
+
)
|
1781
|
+
],
|
1782
|
+
)
|
1783
|
+
# when the transaction is committed,
|
1784
|
+
# expect stream creation to fail
|
1785
|
+
with pytest.raises(ValueError):
|
1786
|
+
transaction.commit(temp_dir)
|
1787
|
+
|
1788
|
+
def test_create_table_version_in_missing_namespace(self, temp_dir):
|
1789
|
+
commit_results = _commit_single_delta_table(temp_dir)
|
1790
|
+
for expected, actual, _ in commit_results:
|
1791
|
+
assert expected.equivalent_to(actual)
|
1792
|
+
# given a transaction that tries to create a single table version
|
1793
|
+
# in a namespace that doesn't exist
|
1794
|
+
original_table_version_created = TableVersion(commit_results[2][1])
|
1795
|
+
new_table_version: TableVersion = TableVersion.based_on(
|
1796
|
+
other=original_table_version_created,
|
1797
|
+
new_id="test_table_version.1",
|
1798
|
+
)
|
1799
|
+
new_table_version.namespace_locator.namespace = "missing_namespace"
|
1800
|
+
transaction = Transaction.of(
|
1801
|
+
txn_type=TransactionType.APPEND,
|
1802
|
+
txn_operations=[
|
1803
|
+
TransactionOperation.of(
|
1804
|
+
TransactionOperationType.CREATE,
|
1805
|
+
new_table_version,
|
1806
|
+
)
|
1807
|
+
],
|
1808
|
+
)
|
1809
|
+
# when the transaction is committed,
|
1810
|
+
# expect table version creation to fail
|
1811
|
+
with pytest.raises(ValueError):
|
1812
|
+
transaction.commit(temp_dir)
|
1813
|
+
|
1814
|
+
def test_create_table_version_in_missing_table(self, temp_dir):
|
1815
|
+
commit_results = _commit_single_delta_table(temp_dir)
|
1816
|
+
for expected, actual, _ in commit_results:
|
1817
|
+
assert expected.equivalent_to(actual)
|
1818
|
+
# given a transaction that tries to create a single table version
|
1819
|
+
# in a table that doesn't exist
|
1820
|
+
original_table_version_created = TableVersion(commit_results[2][1])
|
1821
|
+
new_table_version: TableVersion = TableVersion.based_on(
|
1822
|
+
other=original_table_version_created,
|
1823
|
+
new_id="test_table_version.1",
|
1824
|
+
)
|
1825
|
+
new_table_version.table_locator.table_name = "missing_table"
|
1826
|
+
transaction = Transaction.of(
|
1827
|
+
txn_type=TransactionType.APPEND,
|
1828
|
+
txn_operations=[
|
1829
|
+
TransactionOperation.of(
|
1830
|
+
TransactionOperationType.CREATE,
|
1831
|
+
new_table_version,
|
1832
|
+
)
|
1833
|
+
],
|
1834
|
+
)
|
1835
|
+
# when the transaction is committed,
|
1836
|
+
# expect table version creation to fail
|
1837
|
+
with pytest.raises(ValueError):
|
1838
|
+
transaction.commit(temp_dir)
|
1839
|
+
|
1840
|
+
def test_create_table_in_missing_namespace(self, temp_dir):
|
1841
|
+
table_locator = TableLocator.at(
|
1842
|
+
namespace="missing_namespace",
|
1843
|
+
table_name="test_table",
|
1844
|
+
)
|
1845
|
+
table = Table.of(
|
1846
|
+
locator=table_locator,
|
1847
|
+
description="test table description",
|
1848
|
+
)
|
1849
|
+
# given a transaction that tries to create a single table in a
|
1850
|
+
# namespace that doesn't exist
|
1851
|
+
transaction = Transaction.of(
|
1852
|
+
txn_type=TransactionType.APPEND,
|
1853
|
+
txn_operations=[
|
1854
|
+
TransactionOperation.of(
|
1855
|
+
TransactionOperationType.CREATE,
|
1856
|
+
table,
|
1857
|
+
)
|
1858
|
+
],
|
1859
|
+
)
|
1860
|
+
# when the transaction is committed,
|
1861
|
+
# expect table creation to fail
|
1862
|
+
with pytest.raises(ValueError):
|
1863
|
+
transaction.commit(temp_dir)
|
1864
|
+
|
1865
|
+
def test_rename_table_txn_op_chaining(self, temp_dir):
|
1866
|
+
commit_results = _commit_single_delta_table(temp_dir)
|
1867
|
+
for expected, actual, _ in commit_results:
|
1868
|
+
assert expected.equivalent_to(actual)
|
1869
|
+
original_table: Table = commit_results[1][1]
|
1870
|
+
# given a transaction containing:
|
1871
|
+
# 1. a table rename
|
1872
|
+
renamed_table: Table = Table.update_for(original_table)
|
1873
|
+
renamed_table.locator = TableLocator.at(
|
1874
|
+
namespace="test_namespace",
|
1875
|
+
table_name="test_table_renamed",
|
1876
|
+
)
|
1877
|
+
original_delta_created = Delta(commit_results[5][1])
|
1878
|
+
original_partition_created = Partition(commit_results[4][1])
|
1879
|
+
original_stream_created = Stream(commit_results[3][1])
|
1880
|
+
original_table_version_created = TableVersion(commit_results[2][1])
|
1881
|
+
# 2. a new table version in the renamed table
|
1882
|
+
new_table_version_to_create: TableVersion = TableVersion.based_on(
|
1883
|
+
other=original_table_version_created,
|
1884
|
+
new_id=original_table_version_created.table_version + "0",
|
1885
|
+
)
|
1886
|
+
new_table_version_to_create.table_locator.table_name = renamed_table.table_name
|
1887
|
+
# 3. a new stream in the new table version in the renamed table
|
1888
|
+
new_stream_to_create: Stream = Stream.based_on(
|
1889
|
+
other=original_stream_created,
|
1890
|
+
new_id=original_stream_created.stream_id + "_2",
|
1891
|
+
)
|
1892
|
+
new_stream_to_create.locator.table_version_locator = (
|
1893
|
+
new_table_version_to_create.locator
|
1894
|
+
)
|
1895
|
+
# 4. a new partition in the new stream in the new table version
|
1896
|
+
# in the renamed table
|
1897
|
+
new_partition_to_create: Partition = Partition.based_on(
|
1898
|
+
other=original_partition_created,
|
1899
|
+
new_id=original_partition_created.partition_id + "_2",
|
1900
|
+
)
|
1901
|
+
new_partition_to_create.locator.stream_locator = new_stream_to_create.locator
|
1902
|
+
# 5. a new delta in the new partition in the new stream in the new
|
1903
|
+
# table version in the renamed table
|
1904
|
+
new_delta_to_create = Delta.based_on(
|
1905
|
+
other=original_delta_created,
|
1906
|
+
new_id="2",
|
1907
|
+
)
|
1908
|
+
new_delta_to_create.locator.partition_locator = new_partition_to_create.locator
|
1909
|
+
# 6. ordered transaction operations that ensure all prior
|
1910
|
+
# dependencies are satisfied
|
1911
|
+
txn_operations = [
|
1912
|
+
TransactionOperation.of(
|
1913
|
+
operation_type=TransactionOperationType.UPDATE,
|
1914
|
+
dest_metafile=renamed_table,
|
1915
|
+
src_metafile=original_table,
|
1916
|
+
),
|
1917
|
+
TransactionOperation.of(
|
1918
|
+
operation_type=TransactionOperationType.CREATE,
|
1919
|
+
dest_metafile=new_table_version_to_create,
|
1920
|
+
),
|
1921
|
+
TransactionOperation.of(
|
1922
|
+
operation_type=TransactionOperationType.CREATE,
|
1923
|
+
dest_metafile=new_stream_to_create,
|
1924
|
+
),
|
1925
|
+
TransactionOperation.of(
|
1926
|
+
operation_type=TransactionOperationType.CREATE,
|
1927
|
+
dest_metafile=new_partition_to_create,
|
1928
|
+
),
|
1929
|
+
TransactionOperation.of(
|
1930
|
+
operation_type=TransactionOperationType.CREATE,
|
1931
|
+
dest_metafile=new_delta_to_create,
|
1932
|
+
),
|
1933
|
+
]
|
1934
|
+
transaction = Transaction.of(
|
1935
|
+
txn_type=TransactionType.ALTER,
|
1936
|
+
txn_operations=txn_operations,
|
1937
|
+
)
|
1938
|
+
# when the transaction is committed
|
1939
|
+
write_paths, txn_log_path = transaction.commit(temp_dir)
|
1940
|
+
|
1941
|
+
# expect the transaction to successfully create 5 new metafiles
|
1942
|
+
assert len(write_paths) == 5
|
1943
|
+
|
1944
|
+
# expect the table to be successfully renamed
|
1945
|
+
actual_table = Table.read(write_paths[0])
|
1946
|
+
assert renamed_table.equivalent_to(actual_table)
|
1947
|
+
|
1948
|
+
# expect the new table version in the renamed table to be
|
1949
|
+
# successfully created
|
1950
|
+
actual_table_version = TableVersion.read(write_paths[1])
|
1951
|
+
assert new_table_version_to_create.equivalent_to(actual_table_version)
|
1952
|
+
|
1953
|
+
# expect the new stream in the new table version in the renamed
|
1954
|
+
# table to be successfully created
|
1955
|
+
actual_stream = Stream.read(write_paths[2])
|
1956
|
+
assert new_stream_to_create.equivalent_to(actual_stream)
|
1957
|
+
|
1958
|
+
# expect the new partition in the new stream in the new table
|
1959
|
+
# version in the renamed table to be successfully created
|
1960
|
+
actual_partition = Partition.read(write_paths[3])
|
1961
|
+
assert new_partition_to_create.equivalent_to(actual_partition)
|
1962
|
+
|
1963
|
+
# expect the new delta in the new partition in the new stream in
|
1964
|
+
# the new table version in the renamed table to be successfully
|
1965
|
+
# created
|
1966
|
+
actual_delta = Delta.read(write_paths[4])
|
1967
|
+
assert new_delta_to_create.equivalent_to(actual_delta)
|
1968
|
+
|
1969
|
+
def test_rename_table(self, temp_dir):
|
1970
|
+
commit_results = _commit_single_delta_table(temp_dir)
|
1971
|
+
for expected, actual, _ in commit_results:
|
1972
|
+
assert expected.equivalent_to(actual)
|
1973
|
+
original_table: Table = commit_results[1][1]
|
1974
|
+
|
1975
|
+
# given a transaction containing a table rename
|
1976
|
+
renamed_table: Table = Table.update_for(original_table)
|
1977
|
+
renamed_table.locator = TableLocator.at(
|
1978
|
+
namespace="test_namespace",
|
1979
|
+
table_name="test_table_renamed",
|
1980
|
+
)
|
1981
|
+
txn_operations = [
|
1982
|
+
TransactionOperation.of(
|
1983
|
+
operation_type=TransactionOperationType.UPDATE,
|
1984
|
+
dest_metafile=renamed_table,
|
1985
|
+
src_metafile=original_table,
|
1986
|
+
)
|
1987
|
+
]
|
1988
|
+
transaction = Transaction.of(
|
1989
|
+
txn_type=TransactionType.ALTER,
|
1990
|
+
txn_operations=txn_operations,
|
1991
|
+
)
|
1992
|
+
# when the transaction is committed
|
1993
|
+
write_paths, txn_log_path = transaction.commit(temp_dir)
|
1994
|
+
|
1995
|
+
# expect only one new table metafile to be written
|
1996
|
+
assert len(write_paths) == 1
|
1997
|
+
|
1998
|
+
# expect the table to be successfully renamed
|
1999
|
+
actual_table = Table.read(write_paths[0])
|
2000
|
+
assert renamed_table == actual_table
|
2001
|
+
|
2002
|
+
# expect all new child metafiles read to return the new table name
|
2003
|
+
child_metafiles_read_post_rename = [
|
2004
|
+
Delta.read(commit_results[5][2]),
|
2005
|
+
Partition.read(commit_results[4][2]),
|
2006
|
+
Stream.read(commit_results[3][2]),
|
2007
|
+
TableVersion.read(commit_results[2][2]),
|
2008
|
+
]
|
2009
|
+
for metafile in child_metafiles_read_post_rename:
|
2010
|
+
assert metafile.table_name == renamed_table.table_name
|
2011
|
+
|
2012
|
+
# expect all original metafiles to return the original table name
|
2013
|
+
original_child_metafiles_to_create = [
|
2014
|
+
Delta(commit_results[5][0]),
|
2015
|
+
Partition(commit_results[4][0]),
|
2016
|
+
Stream(commit_results[3][0]),
|
2017
|
+
TableVersion(commit_results[2][0]),
|
2018
|
+
]
|
2019
|
+
original_child_metafiles_created = [
|
2020
|
+
Delta(commit_results[5][1]),
|
2021
|
+
Partition(commit_results[4][1]),
|
2022
|
+
Stream(commit_results[3][1]),
|
2023
|
+
TableVersion(commit_results[2][1]),
|
2024
|
+
]
|
2025
|
+
for i in range(len(original_child_metafiles_to_create)):
|
2026
|
+
assert (
|
2027
|
+
original_child_metafiles_created[i].table_name
|
2028
|
+
== original_child_metafiles_to_create[i].table_name
|
2029
|
+
== original_table.table_name
|
2030
|
+
)
|
2031
|
+
|
2032
|
+
# expect a subsequent table update from the old table name to fail
|
2033
|
+
bad_txn_operations = [
|
2034
|
+
TransactionOperation.of(
|
2035
|
+
operation_type=TransactionOperationType.UPDATE,
|
2036
|
+
dest_metafile=renamed_table,
|
2037
|
+
src_metafile=original_table,
|
2038
|
+
)
|
2039
|
+
]
|
2040
|
+
transaction = Transaction.of(
|
2041
|
+
txn_type=TransactionType.RESTATE,
|
2042
|
+
txn_operations=bad_txn_operations,
|
2043
|
+
)
|
2044
|
+
with pytest.raises(ValueError):
|
2045
|
+
transaction.commit(temp_dir)
|
2046
|
+
|
2047
|
+
# expect table deletes of the old table name fail
|
2048
|
+
bad_txn_operations = [
|
2049
|
+
TransactionOperation.of(
|
2050
|
+
operation_type=TransactionOperationType.DELETE,
|
2051
|
+
dest_metafile=original_table,
|
2052
|
+
)
|
2053
|
+
]
|
2054
|
+
transaction = Transaction.of(
|
2055
|
+
txn_type=TransactionType.DELETE,
|
2056
|
+
txn_operations=bad_txn_operations,
|
2057
|
+
)
|
2058
|
+
with pytest.raises(ValueError):
|
2059
|
+
transaction.commit(temp_dir)
|
2060
|
+
|
2061
|
+
# expect child metafile creation under the old table name to fail
|
2062
|
+
for metafile in original_child_metafiles_created:
|
2063
|
+
bad_txn_operations = [
|
2064
|
+
TransactionOperation.of(
|
2065
|
+
operation_type=TransactionOperationType.CREATE,
|
2066
|
+
dest_metafile=metafile,
|
2067
|
+
)
|
2068
|
+
]
|
2069
|
+
transaction = Transaction.of(
|
2070
|
+
txn_type=TransactionType.APPEND,
|
2071
|
+
txn_operations=bad_txn_operations,
|
2072
|
+
)
|
2073
|
+
with pytest.raises(ValueError):
|
2074
|
+
transaction.commit(temp_dir)
|
2075
|
+
|
2076
|
+
def test_rename_namespace(self, temp_dir):
|
2077
|
+
commit_results = _commit_single_delta_table(temp_dir)
|
2078
|
+
for expected, actual, _ in commit_results:
|
2079
|
+
assert expected.equivalent_to(actual)
|
2080
|
+
original_namespace = commit_results[0][1]
|
2081
|
+
# given a transaction containing a namespace rename
|
2082
|
+
renamed_namespace: Namespace = Namespace.update_for(original_namespace)
|
2083
|
+
renamed_namespace.locator = NamespaceLocator.of(
|
2084
|
+
namespace="test_namespace_renamed",
|
2085
|
+
)
|
2086
|
+
txn_operations = [
|
2087
|
+
TransactionOperation.of(
|
2088
|
+
operation_type=TransactionOperationType.UPDATE,
|
2089
|
+
dest_metafile=renamed_namespace,
|
2090
|
+
src_metafile=original_namespace,
|
2091
|
+
)
|
2092
|
+
]
|
2093
|
+
transaction = Transaction.of(
|
2094
|
+
txn_type=TransactionType.ALTER,
|
2095
|
+
txn_operations=txn_operations,
|
2096
|
+
)
|
2097
|
+
# when the transaction is committed
|
2098
|
+
write_paths, txn_log_path = transaction.commit(temp_dir)
|
2099
|
+
|
2100
|
+
# expect only one new namespace metafile to be written
|
2101
|
+
assert len(write_paths) == 1
|
2102
|
+
|
2103
|
+
# expect the namespace to be successfully renamed
|
2104
|
+
actual_namespace = Namespace.read(write_paths[0])
|
2105
|
+
assert renamed_namespace == actual_namespace
|
2106
|
+
|
2107
|
+
# expect all child metafiles read to return the new namespace
|
2108
|
+
child_metafiles_read_post_rename = [
|
2109
|
+
Delta.read(commit_results[5][2]),
|
2110
|
+
Partition.read(commit_results[4][2]),
|
2111
|
+
Stream.read(commit_results[3][2]),
|
2112
|
+
TableVersion.read(commit_results[2][2]),
|
2113
|
+
Table.read(commit_results[1][2]),
|
2114
|
+
]
|
2115
|
+
for metafile in child_metafiles_read_post_rename:
|
2116
|
+
assert metafile.namespace == "test_namespace_renamed"
|
2117
|
+
|
2118
|
+
# expect the original metafiles to return the original namespace
|
2119
|
+
original_child_metafiles_to_create = [
|
2120
|
+
Delta(commit_results[5][0]),
|
2121
|
+
Partition(commit_results[4][0]),
|
2122
|
+
Stream(commit_results[3][0]),
|
2123
|
+
TableVersion(commit_results[2][0]),
|
2124
|
+
Table(commit_results[1][0]),
|
2125
|
+
]
|
2126
|
+
original_child_metafiles_created = [
|
2127
|
+
Delta(commit_results[5][1]),
|
2128
|
+
Partition(commit_results[4][1]),
|
2129
|
+
Stream(commit_results[3][1]),
|
2130
|
+
TableVersion(commit_results[2][1]),
|
2131
|
+
Table(commit_results[1][1]),
|
2132
|
+
]
|
2133
|
+
for i in range(len(original_child_metafiles_to_create)):
|
2134
|
+
assert (
|
2135
|
+
original_child_metafiles_created[i].namespace
|
2136
|
+
== original_child_metafiles_to_create[i].namespace
|
2137
|
+
== "test_namespace"
|
2138
|
+
)
|
2139
|
+
|
2140
|
+
# expect a subsequent update of the old namespace name to fail
|
2141
|
+
bad_txn_operations = [
|
2142
|
+
TransactionOperation.of(
|
2143
|
+
operation_type=TransactionOperationType.UPDATE,
|
2144
|
+
dest_metafile=renamed_namespace,
|
2145
|
+
src_metafile=original_namespace,
|
2146
|
+
)
|
2147
|
+
]
|
2148
|
+
transaction = Transaction.of(
|
2149
|
+
txn_type=TransactionType.ALTER,
|
2150
|
+
txn_operations=bad_txn_operations,
|
2151
|
+
)
|
2152
|
+
with pytest.raises(ValueError):
|
2153
|
+
transaction.commit(temp_dir)
|
2154
|
+
|
2155
|
+
# expect namespace deletes of the old namespace name fail
|
2156
|
+
bad_txn_operations = [
|
2157
|
+
TransactionOperation.of(
|
2158
|
+
operation_type=TransactionOperationType.DELETE,
|
2159
|
+
dest_metafile=original_namespace,
|
2160
|
+
)
|
2161
|
+
]
|
2162
|
+
transaction = Transaction.of(
|
2163
|
+
txn_type=TransactionType.DELETE,
|
2164
|
+
txn_operations=bad_txn_operations,
|
2165
|
+
)
|
2166
|
+
with pytest.raises(ValueError):
|
2167
|
+
transaction.commit(temp_dir)
|
2168
|
+
|
2169
|
+
# expect child metafile creation under the old namespace to fail
|
2170
|
+
for metafile in original_child_metafiles_created:
|
2171
|
+
bad_txn_operations = [
|
2172
|
+
TransactionOperation.of(
|
2173
|
+
operation_type=TransactionOperationType.CREATE,
|
2174
|
+
dest_metafile=metafile,
|
2175
|
+
)
|
2176
|
+
]
|
2177
|
+
transaction = Transaction.of(
|
2178
|
+
txn_type=TransactionType.APPEND,
|
2179
|
+
txn_operations=bad_txn_operations,
|
2180
|
+
)
|
2181
|
+
with pytest.raises(ValueError):
|
2182
|
+
transaction.commit(temp_dir)
|
2183
|
+
|
2184
|
+
def test_e2e_serde(self, temp_dir):
|
2185
|
+
# given a transaction that creates a single namespace, table,
|
2186
|
+
# table version, stream, partition, and delta
|
2187
|
+
commit_results = _commit_single_delta_table(temp_dir)
|
2188
|
+
# when the transaction is committed, expect all actual metafiles
|
2189
|
+
# created to match the expected/input metafiles to create
|
2190
|
+
for expected, actual, _ in commit_results:
|
2191
|
+
assert expected.equivalent_to(actual)
|
2192
|
+
|
2193
|
+
def test_namespace_serde(self, temp_dir):
|
2194
|
+
namespace_locator = NamespaceLocator.of(namespace="test_namespace")
|
2195
|
+
namespace = Namespace.of(locator=namespace_locator)
|
2196
|
+
# given a transaction that creates a single namespace
|
2197
|
+
write_paths, txn_log_path = Transaction.of(
|
2198
|
+
txn_type=TransactionType.APPEND,
|
2199
|
+
txn_operations=[
|
2200
|
+
TransactionOperation.of(
|
2201
|
+
operation_type=TransactionOperationType.CREATE,
|
2202
|
+
dest_metafile=namespace,
|
2203
|
+
)
|
2204
|
+
],
|
2205
|
+
).commit(temp_dir)
|
2206
|
+
# when the transaction is committed,
|
2207
|
+
# expect the namespace created to match the namespace given
|
2208
|
+
deserialized_namespace = Namespace.read(write_paths.pop())
|
2209
|
+
assert namespace.equivalent_to(deserialized_namespace)
|
2210
|
+
|
2211
|
+
def test_table_serde(self, temp_dir):
|
2212
|
+
table_locator = TableLocator.at(
|
2213
|
+
namespace=None,
|
2214
|
+
table_name="test_table",
|
2215
|
+
)
|
2216
|
+
table = Table.of(
|
2217
|
+
locator=table_locator,
|
2218
|
+
description="test table description",
|
2219
|
+
)
|
2220
|
+
# given a transaction that creates a single table
|
2221
|
+
write_paths, txn_log_path = Transaction.of(
|
2222
|
+
txn_type=TransactionType.APPEND,
|
2223
|
+
txn_operations=[
|
2224
|
+
TransactionOperation.of(
|
2225
|
+
operation_type=TransactionOperationType.CREATE,
|
2226
|
+
dest_metafile=table,
|
2227
|
+
)
|
2228
|
+
],
|
2229
|
+
).commit(temp_dir)
|
2230
|
+
# when the transaction is committed,
|
2231
|
+
# expect the table created to match the table given
|
2232
|
+
deserialized_table = Table.read(write_paths.pop())
|
2233
|
+
assert table.equivalent_to(deserialized_table)
|
2234
|
+
|
2235
|
+
def test_table_version_serde(self, temp_dir):
|
2236
|
+
table_version_locator = TableVersionLocator.at(
|
2237
|
+
namespace=None,
|
2238
|
+
table_name=None,
|
2239
|
+
table_version="test_table_version.1",
|
2240
|
+
)
|
2241
|
+
schema = Schema.of(
|
2242
|
+
[
|
2243
|
+
Field.of(
|
2244
|
+
field=pa.field("some_string", pa.string(), nullable=False),
|
2245
|
+
field_id=1,
|
2246
|
+
is_merge_key=True,
|
2247
|
+
),
|
2248
|
+
Field.of(
|
2249
|
+
field=pa.field("some_int32", pa.int32(), nullable=False),
|
2250
|
+
field_id=2,
|
2251
|
+
is_merge_key=True,
|
2252
|
+
),
|
2253
|
+
Field.of(
|
2254
|
+
field=pa.field("some_float64", pa.float64()),
|
2255
|
+
field_id=3,
|
2256
|
+
is_merge_key=False,
|
2257
|
+
),
|
2258
|
+
]
|
2259
|
+
)
|
2260
|
+
bucket_transform = BucketTransform.of(
|
2261
|
+
BucketTransformParameters.of(
|
2262
|
+
num_buckets=2,
|
2263
|
+
bucketing_strategy=BucketingStrategy.DEFAULT,
|
2264
|
+
)
|
2265
|
+
)
|
2266
|
+
partition_keys = [
|
2267
|
+
PartitionKey.of(
|
2268
|
+
key=["some_string", "some_int32"],
|
2269
|
+
name="test_partition_key",
|
2270
|
+
field_id="test_field_id",
|
2271
|
+
transform=bucket_transform,
|
2272
|
+
)
|
2273
|
+
]
|
2274
|
+
partition_scheme = PartitionScheme.of(
|
2275
|
+
keys=partition_keys,
|
2276
|
+
name="test_partition_scheme",
|
2277
|
+
scheme_id="test_partition_scheme_id",
|
2278
|
+
)
|
2279
|
+
sort_keys = [
|
2280
|
+
SortKey.of(
|
2281
|
+
key=["some_int32"],
|
2282
|
+
sort_order=SortOrder.DESCENDING,
|
2283
|
+
null_order=NullOrder.AT_START,
|
2284
|
+
transform=TruncateTransform.of(
|
2285
|
+
TruncateTransformParameters.of(width=3),
|
2286
|
+
),
|
2287
|
+
)
|
2288
|
+
]
|
2289
|
+
sort_scheme = SortScheme.of(
|
2290
|
+
keys=sort_keys,
|
2291
|
+
name="test_sort_scheme",
|
2292
|
+
scheme_id="test_sort_scheme_id",
|
2293
|
+
)
|
2294
|
+
table_version = TableVersion.of(
|
2295
|
+
locator=table_version_locator,
|
2296
|
+
schema=schema,
|
2297
|
+
partition_scheme=partition_scheme,
|
2298
|
+
description="test table version description",
|
2299
|
+
properties={"test_property_key": "test_property_value"},
|
2300
|
+
content_types=[ContentType.PARQUET],
|
2301
|
+
sort_scheme=sort_scheme,
|
2302
|
+
watermark=1,
|
2303
|
+
lifecycle_state=LifecycleState.CREATED,
|
2304
|
+
schemas=[schema, schema, schema],
|
2305
|
+
partition_schemes=[partition_scheme, partition_scheme],
|
2306
|
+
sort_schemes=[sort_scheme, sort_scheme],
|
2307
|
+
)
|
2308
|
+
# given a transaction that creates a single table version
|
2309
|
+
write_paths, txn_log_path = Transaction.of(
|
2310
|
+
txn_type=TransactionType.APPEND,
|
2311
|
+
txn_operations=[
|
2312
|
+
TransactionOperation.of(
|
2313
|
+
operation_type=TransactionOperationType.CREATE,
|
2314
|
+
dest_metafile=table_version,
|
2315
|
+
)
|
2316
|
+
],
|
2317
|
+
).commit(temp_dir)
|
2318
|
+
# when the transaction is committed,
|
2319
|
+
# expect the table version created to match the table version given
|
2320
|
+
deserialized_table_version = TableVersion.read(write_paths.pop())
|
2321
|
+
assert table_version.equivalent_to(deserialized_table_version)
|
2322
|
+
|
2323
|
+
def test_stream_serde(self, temp_dir):
|
2324
|
+
stream_locator = StreamLocator.at(
|
2325
|
+
namespace=None,
|
2326
|
+
table_name=None,
|
2327
|
+
table_version=None,
|
2328
|
+
stream_id="test_stream_id",
|
2329
|
+
stream_format=StreamFormat.DELTACAT,
|
2330
|
+
)
|
2331
|
+
bucket_transform = BucketTransform.of(
|
2332
|
+
BucketTransformParameters.of(
|
2333
|
+
num_buckets=2,
|
2334
|
+
bucketing_strategy=BucketingStrategy.DEFAULT,
|
2335
|
+
)
|
2336
|
+
)
|
2337
|
+
partition_keys = [
|
2338
|
+
PartitionKey.of(
|
2339
|
+
key=["some_string", "some_int32"],
|
2340
|
+
name="test_partition_key",
|
2341
|
+
field_id="test_field_id",
|
2342
|
+
transform=bucket_transform,
|
2343
|
+
)
|
2344
|
+
]
|
2345
|
+
partition_scheme = PartitionScheme.of(
|
2346
|
+
keys=partition_keys,
|
2347
|
+
name="test_partition_scheme",
|
2348
|
+
scheme_id="test_partition_scheme_id",
|
2349
|
+
)
|
2350
|
+
stream = Stream.of(
|
2351
|
+
locator=stream_locator,
|
2352
|
+
partition_scheme=partition_scheme,
|
2353
|
+
state=CommitState.STAGED,
|
2354
|
+
previous_stream_id="test_previous_stream_id",
|
2355
|
+
watermark=1,
|
2356
|
+
)
|
2357
|
+
# given a transaction that creates a single stream
|
2358
|
+
write_paths, txn_log_path = Transaction.of(
|
2359
|
+
txn_type=TransactionType.APPEND,
|
2360
|
+
txn_operations=[
|
2361
|
+
TransactionOperation.of(
|
2362
|
+
operation_type=TransactionOperationType.CREATE,
|
2363
|
+
dest_metafile=stream,
|
2364
|
+
)
|
2365
|
+
],
|
2366
|
+
).commit(temp_dir)
|
2367
|
+
# when the transaction is committed,
|
2368
|
+
# expect the stream created to match the stream given
|
2369
|
+
deserialized_stream = Stream.read(write_paths.pop())
|
2370
|
+
assert stream.equivalent_to(deserialized_stream)
|
2371
|
+
|
2372
|
+
def test_partition_serde(self, temp_dir):
|
2373
|
+
partition_locator = PartitionLocator.at(
|
2374
|
+
namespace=None,
|
2375
|
+
table_name=None,
|
2376
|
+
table_version=None,
|
2377
|
+
stream_id=None,
|
2378
|
+
stream_format=None,
|
2379
|
+
partition_values=["a", 1],
|
2380
|
+
partition_id="test_partition_id",
|
2381
|
+
)
|
2382
|
+
schema = Schema.of(
|
2383
|
+
[
|
2384
|
+
Field.of(
|
2385
|
+
field=pa.field("some_string", pa.string(), nullable=False),
|
2386
|
+
field_id=1,
|
2387
|
+
is_merge_key=True,
|
2388
|
+
),
|
2389
|
+
Field.of(
|
2390
|
+
field=pa.field("some_int32", pa.int32(), nullable=False),
|
2391
|
+
field_id=2,
|
2392
|
+
is_merge_key=True,
|
2393
|
+
),
|
2394
|
+
Field.of(
|
2395
|
+
field=pa.field("some_float64", pa.float64()),
|
2396
|
+
field_id=3,
|
2397
|
+
is_merge_key=False,
|
2398
|
+
),
|
2399
|
+
]
|
2400
|
+
)
|
2401
|
+
partition = Partition.of(
|
2402
|
+
locator=partition_locator,
|
2403
|
+
schema=schema,
|
2404
|
+
content_types=[ContentType.PARQUET],
|
2405
|
+
state=CommitState.STAGED,
|
2406
|
+
previous_stream_position=0,
|
2407
|
+
previous_partition_id="test_previous_partition_id",
|
2408
|
+
stream_position=1,
|
2409
|
+
partition_scheme_id="test_partition_scheme_id",
|
2410
|
+
)
|
2411
|
+
# given a transaction that creates a single partition
|
2412
|
+
write_paths, txn_log_path = Transaction.of(
|
2413
|
+
txn_type=TransactionType.APPEND,
|
2414
|
+
txn_operations=[
|
2415
|
+
TransactionOperation.of(
|
2416
|
+
operation_type=TransactionOperationType.CREATE,
|
2417
|
+
dest_metafile=partition,
|
2418
|
+
)
|
2419
|
+
],
|
2420
|
+
).commit(temp_dir)
|
2421
|
+
# when the transaction is committed,
|
2422
|
+
# expect the partition created to match the partition given
|
2423
|
+
deserialized_partition = Partition.read(write_paths.pop())
|
2424
|
+
assert partition.equivalent_to(deserialized_partition)
|
2425
|
+
|
2426
|
+
def test_delta_serde(self, temp_dir):
|
2427
|
+
delta_locator = DeltaLocator.at(
|
2428
|
+
namespace=None,
|
2429
|
+
table_name=None,
|
2430
|
+
table_version=None,
|
2431
|
+
stream_id=None,
|
2432
|
+
stream_format=None,
|
2433
|
+
partition_values=None,
|
2434
|
+
partition_id=None,
|
2435
|
+
stream_position=1,
|
2436
|
+
)
|
2437
|
+
manifest_entry_params = EntryParams.of(
|
2438
|
+
equality_field_locators=["some_string", "some_int32"],
|
2439
|
+
)
|
2440
|
+
manifest_meta = ManifestMeta.of(
|
2441
|
+
record_count=1,
|
2442
|
+
content_length=10,
|
2443
|
+
content_type=ContentType.PARQUET.value,
|
2444
|
+
content_encoding=ContentEncoding.IDENTITY.value,
|
2445
|
+
source_content_length=100,
|
2446
|
+
credentials={"foo": "bar"},
|
2447
|
+
content_type_parameters=[{"param1": "value1"}],
|
2448
|
+
entry_type=EntryType.EQUALITY_DELETE,
|
2449
|
+
entry_params=manifest_entry_params,
|
2450
|
+
)
|
2451
|
+
manifest = Manifest.of(
|
2452
|
+
entries=[
|
2453
|
+
ManifestEntry.of(
|
2454
|
+
url="s3://test/url",
|
2455
|
+
meta=manifest_meta,
|
2456
|
+
)
|
2457
|
+
],
|
2458
|
+
author=ManifestAuthor.of(
|
2459
|
+
name="deltacat",
|
2460
|
+
version="2.0",
|
2461
|
+
),
|
2462
|
+
entry_type=EntryType.EQUALITY_DELETE,
|
2463
|
+
entry_params=manifest_entry_params,
|
2464
|
+
)
|
2465
|
+
delta = Delta.of(
|
2466
|
+
locator=delta_locator,
|
2467
|
+
delta_type=DeltaType.APPEND,
|
2468
|
+
meta=manifest_meta,
|
2469
|
+
properties={"property1": "value1"},
|
2470
|
+
manifest=manifest,
|
2471
|
+
previous_stream_position=0,
|
2472
|
+
)
|
2473
|
+
# given a transaction that creates a single delta
|
2474
|
+
write_paths, txn_log_path = Transaction.of(
|
2475
|
+
txn_type=TransactionType.APPEND,
|
2476
|
+
txn_operations=[
|
2477
|
+
TransactionOperation.of(
|
2478
|
+
operation_type=TransactionOperationType.CREATE,
|
2479
|
+
dest_metafile=delta,
|
2480
|
+
)
|
2481
|
+
],
|
2482
|
+
).commit(temp_dir)
|
2483
|
+
# when the transaction is committed,
|
2484
|
+
# expect the delta created to match the delta given
|
2485
|
+
deserialized_delta = Delta.read(write_paths.pop())
|
2486
|
+
assert delta.equivalent_to(deserialized_delta)
|
2487
|
+
|
2488
|
+
def test_python_type_serde(self, temp_dir):
|
2489
|
+
table_locator = TableLocator.at(
|
2490
|
+
namespace=None,
|
2491
|
+
table_name="test_table",
|
2492
|
+
)
|
2493
|
+
# given a table whose property values contain every basic python type
|
2494
|
+
# except set, frozenset, and range which can't be serialized by msgpack
|
2495
|
+
# and memoryview which can't be pickled by copy.deepcopy
|
2496
|
+
properties = {
|
2497
|
+
"foo": 1,
|
2498
|
+
"bar": 2.0,
|
2499
|
+
"baz": True,
|
2500
|
+
"qux": b"123",
|
2501
|
+
"quux": None,
|
2502
|
+
"corge": [1, 2, 3],
|
2503
|
+
"grault": {"foo": "bar"},
|
2504
|
+
"garply": (1, 2, 3),
|
2505
|
+
"waldo": bytearray(3),
|
2506
|
+
}
|
2507
|
+
table = Table.of(
|
2508
|
+
locator=table_locator,
|
2509
|
+
description="test table description",
|
2510
|
+
properties=properties,
|
2511
|
+
)
|
2512
|
+
# when a transaction commits this table
|
2513
|
+
write_paths, txn_log_path = Transaction.of(
|
2514
|
+
txn_type=TransactionType.APPEND,
|
2515
|
+
txn_operations=[
|
2516
|
+
TransactionOperation.of(
|
2517
|
+
operation_type=TransactionOperationType.CREATE,
|
2518
|
+
dest_metafile=table,
|
2519
|
+
)
|
2520
|
+
],
|
2521
|
+
).commit(temp_dir)
|
2522
|
+
deserialized_table = Table.read(write_paths.pop())
|
2523
|
+
# expect the following SerDe transformations of the original properties:
|
2524
|
+
expected_properties = properties.copy()
|
2525
|
+
# 1. msgpack tranlates tuple to list
|
2526
|
+
expected_properties["garply"] = [1, 2, 3]
|
2527
|
+
# 2. msgpack unpacks bytearray into bytes
|
2528
|
+
expected_properties["waldo"] = b"\x00\x00\x00"
|
2529
|
+
# expect the table created to otherwise match the table given
|
2530
|
+
table.properties = expected_properties
|
2531
|
+
assert table.equivalent_to(deserialized_table)
|
2532
|
+
|
2533
|
+
def test_metafile_read_bad_path(self, temp_dir):
|
2534
|
+
with pytest.raises(FileNotFoundError):
|
2535
|
+
Delta.read("foobar")
|