deltacat 1.1.35__py3-none-any.whl → 2.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (235) hide show
  1. deltacat/__init__.py +42 -3
  2. deltacat/annotations.py +36 -0
  3. deltacat/api.py +168 -0
  4. deltacat/aws/s3u.py +4 -4
  5. deltacat/benchmarking/benchmark_engine.py +82 -0
  6. deltacat/benchmarking/benchmark_report.py +86 -0
  7. deltacat/benchmarking/benchmark_suite.py +11 -0
  8. deltacat/benchmarking/conftest.py +21 -0
  9. deltacat/benchmarking/data/random_row_generator.py +94 -0
  10. deltacat/benchmarking/data/row_generator.py +10 -0
  11. deltacat/benchmarking/test_benchmark_pipeline.py +106 -0
  12. deltacat/catalog/__init__.py +14 -0
  13. deltacat/catalog/delegate.py +199 -106
  14. deltacat/catalog/iceberg/__init__.py +4 -0
  15. deltacat/catalog/iceberg/iceberg_catalog_config.py +26 -0
  16. deltacat/catalog/iceberg/impl.py +368 -0
  17. deltacat/catalog/iceberg/overrides.py +74 -0
  18. deltacat/catalog/interface.py +273 -76
  19. deltacat/catalog/main/impl.py +720 -0
  20. deltacat/catalog/model/catalog.py +227 -20
  21. deltacat/catalog/model/properties.py +116 -0
  22. deltacat/catalog/model/table_definition.py +32 -1
  23. deltacat/compute/compactor/model/compaction_session_audit_info.py +7 -3
  24. deltacat/compute/compactor/model/delta_annotated.py +3 -3
  25. deltacat/compute/compactor/model/delta_file_envelope.py +3 -1
  26. deltacat/compute/compactor/model/delta_file_locator.py +3 -1
  27. deltacat/compute/compactor/model/round_completion_info.py +5 -5
  28. deltacat/compute/compactor/model/table_object_store.py +3 -2
  29. deltacat/compute/compactor/repartition_session.py +1 -1
  30. deltacat/compute/compactor/steps/dedupe.py +11 -4
  31. deltacat/compute/compactor/steps/hash_bucket.py +1 -1
  32. deltacat/compute/compactor/steps/materialize.py +6 -2
  33. deltacat/compute/compactor/utils/io.py +1 -1
  34. deltacat/compute/compactor/utils/sort_key.py +9 -2
  35. deltacat/compute/compactor_v2/compaction_session.py +2 -3
  36. deltacat/compute/compactor_v2/constants.py +1 -30
  37. deltacat/compute/compactor_v2/deletes/utils.py +3 -3
  38. deltacat/compute/compactor_v2/model/merge_input.py +1 -1
  39. deltacat/compute/compactor_v2/private/compaction_utils.py +5 -5
  40. deltacat/compute/compactor_v2/steps/merge.py +11 -80
  41. deltacat/compute/compactor_v2/utils/content_type_params.py +0 -17
  42. deltacat/compute/compactor_v2/utils/dedupe.py +1 -1
  43. deltacat/compute/compactor_v2/utils/io.py +1 -1
  44. deltacat/compute/compactor_v2/utils/primary_key_index.py +3 -15
  45. deltacat/compute/compactor_v2/utils/task_options.py +23 -43
  46. deltacat/compute/converter/constants.py +4 -0
  47. deltacat/compute/converter/converter_session.py +143 -0
  48. deltacat/compute/converter/model/convert_input.py +69 -0
  49. deltacat/compute/converter/model/convert_input_files.py +61 -0
  50. deltacat/compute/converter/model/converter_session_params.py +99 -0
  51. deltacat/compute/converter/pyiceberg/__init__.py +0 -0
  52. deltacat/compute/converter/pyiceberg/catalog.py +75 -0
  53. deltacat/compute/converter/pyiceberg/overrides.py +135 -0
  54. deltacat/compute/converter/pyiceberg/update_snapshot_overrides.py +251 -0
  55. deltacat/compute/converter/steps/__init__.py +0 -0
  56. deltacat/compute/converter/steps/convert.py +211 -0
  57. deltacat/compute/converter/steps/dedupe.py +60 -0
  58. deltacat/compute/converter/utils/__init__.py +0 -0
  59. deltacat/compute/converter/utils/convert_task_options.py +88 -0
  60. deltacat/compute/converter/utils/converter_session_utils.py +109 -0
  61. deltacat/compute/converter/utils/iceberg_columns.py +82 -0
  62. deltacat/compute/converter/utils/io.py +43 -0
  63. deltacat/compute/converter/utils/s3u.py +133 -0
  64. deltacat/compute/resource_estimation/delta.py +1 -19
  65. deltacat/constants.py +47 -1
  66. deltacat/env.py +51 -0
  67. deltacat/examples/__init__.py +0 -0
  68. deltacat/examples/basic_logging.py +101 -0
  69. deltacat/examples/common/__init__.py +0 -0
  70. deltacat/examples/common/fixtures.py +15 -0
  71. deltacat/examples/hello_world.py +27 -0
  72. deltacat/examples/iceberg/__init__.py +0 -0
  73. deltacat/examples/iceberg/iceberg_bucket_writer.py +139 -0
  74. deltacat/examples/iceberg/iceberg_reader.py +149 -0
  75. deltacat/exceptions.py +51 -9
  76. deltacat/logs.py +4 -1
  77. deltacat/storage/__init__.py +118 -28
  78. deltacat/storage/iceberg/__init__.py +0 -0
  79. deltacat/storage/iceberg/iceberg_scan_planner.py +28 -0
  80. deltacat/storage/iceberg/impl.py +737 -0
  81. deltacat/storage/iceberg/model.py +709 -0
  82. deltacat/storage/interface.py +217 -134
  83. deltacat/storage/main/__init__.py +0 -0
  84. deltacat/storage/main/impl.py +2077 -0
  85. deltacat/storage/model/delta.py +118 -71
  86. deltacat/storage/model/interop.py +24 -0
  87. deltacat/storage/model/list_result.py +8 -0
  88. deltacat/storage/model/locator.py +93 -3
  89. deltacat/{aws/redshift → storage}/model/manifest.py +122 -98
  90. deltacat/storage/model/metafile.py +1316 -0
  91. deltacat/storage/model/namespace.py +34 -18
  92. deltacat/storage/model/partition.py +362 -37
  93. deltacat/storage/model/scan/__init__.py +0 -0
  94. deltacat/storage/model/scan/push_down.py +19 -0
  95. deltacat/storage/model/scan/scan_plan.py +10 -0
  96. deltacat/storage/model/scan/scan_task.py +34 -0
  97. deltacat/storage/model/schema.py +892 -0
  98. deltacat/storage/model/shard.py +47 -0
  99. deltacat/storage/model/sort_key.py +170 -13
  100. deltacat/storage/model/stream.py +208 -80
  101. deltacat/storage/model/table.py +123 -29
  102. deltacat/storage/model/table_version.py +322 -46
  103. deltacat/storage/model/transaction.py +757 -0
  104. deltacat/storage/model/transform.py +198 -61
  105. deltacat/storage/model/types.py +111 -13
  106. deltacat/storage/rivulet/__init__.py +11 -0
  107. deltacat/storage/rivulet/arrow/__init__.py +0 -0
  108. deltacat/storage/rivulet/arrow/serializer.py +75 -0
  109. deltacat/storage/rivulet/dataset.py +744 -0
  110. deltacat/storage/rivulet/dataset_executor.py +87 -0
  111. deltacat/storage/rivulet/feather/__init__.py +5 -0
  112. deltacat/storage/rivulet/feather/file_reader.py +136 -0
  113. deltacat/storage/rivulet/feather/serializer.py +35 -0
  114. deltacat/storage/rivulet/fs/__init__.py +0 -0
  115. deltacat/storage/rivulet/fs/file_provider.py +105 -0
  116. deltacat/storage/rivulet/fs/file_store.py +130 -0
  117. deltacat/storage/rivulet/fs/input_file.py +76 -0
  118. deltacat/storage/rivulet/fs/output_file.py +86 -0
  119. deltacat/storage/rivulet/logical_plan.py +105 -0
  120. deltacat/storage/rivulet/metastore/__init__.py +0 -0
  121. deltacat/storage/rivulet/metastore/delta.py +190 -0
  122. deltacat/storage/rivulet/metastore/json_sst.py +105 -0
  123. deltacat/storage/rivulet/metastore/sst.py +82 -0
  124. deltacat/storage/rivulet/metastore/sst_interval_tree.py +260 -0
  125. deltacat/storage/rivulet/mvp/Table.py +101 -0
  126. deltacat/storage/rivulet/mvp/__init__.py +5 -0
  127. deltacat/storage/rivulet/parquet/__init__.py +5 -0
  128. deltacat/storage/rivulet/parquet/data_reader.py +0 -0
  129. deltacat/storage/rivulet/parquet/file_reader.py +127 -0
  130. deltacat/storage/rivulet/parquet/serializer.py +37 -0
  131. deltacat/storage/rivulet/reader/__init__.py +0 -0
  132. deltacat/storage/rivulet/reader/block_scanner.py +378 -0
  133. deltacat/storage/rivulet/reader/data_reader.py +136 -0
  134. deltacat/storage/rivulet/reader/data_scan.py +63 -0
  135. deltacat/storage/rivulet/reader/dataset_metastore.py +178 -0
  136. deltacat/storage/rivulet/reader/dataset_reader.py +156 -0
  137. deltacat/storage/rivulet/reader/pyarrow_data_reader.py +121 -0
  138. deltacat/storage/rivulet/reader/query_expression.py +99 -0
  139. deltacat/storage/rivulet/reader/reader_type_registrar.py +84 -0
  140. deltacat/storage/rivulet/schema/__init__.py +0 -0
  141. deltacat/storage/rivulet/schema/datatype.py +128 -0
  142. deltacat/storage/rivulet/schema/schema.py +251 -0
  143. deltacat/storage/rivulet/serializer.py +40 -0
  144. deltacat/storage/rivulet/serializer_factory.py +42 -0
  145. deltacat/storage/rivulet/writer/__init__.py +0 -0
  146. deltacat/storage/rivulet/writer/dataset_writer.py +29 -0
  147. deltacat/storage/rivulet/writer/memtable_dataset_writer.py +294 -0
  148. deltacat/tests/_io/__init__.py +1 -0
  149. deltacat/tests/catalog/test_catalogs.py +324 -0
  150. deltacat/tests/catalog/test_default_catalog_impl.py +16 -8
  151. deltacat/tests/compute/compact_partition_multiple_rounds_test_cases.py +21 -21
  152. deltacat/tests/compute/compact_partition_rebase_test_cases.py +6 -6
  153. deltacat/tests/compute/compact_partition_rebase_then_incremental_test_cases.py +56 -56
  154. deltacat/tests/compute/compact_partition_test_cases.py +19 -53
  155. deltacat/tests/compute/compactor/steps/test_repartition.py +2 -2
  156. deltacat/tests/compute/compactor/utils/test_io.py +6 -8
  157. deltacat/tests/compute/compactor_v2/test_compaction_session.py +0 -466
  158. deltacat/tests/compute/compactor_v2/utils/test_task_options.py +1 -273
  159. deltacat/tests/compute/conftest.py +75 -0
  160. deltacat/tests/compute/converter/__init__.py +0 -0
  161. deltacat/tests/compute/converter/conftest.py +80 -0
  162. deltacat/tests/compute/converter/test_convert_session.py +478 -0
  163. deltacat/tests/compute/converter/utils.py +123 -0
  164. deltacat/tests/compute/resource_estimation/test_delta.py +0 -16
  165. deltacat/tests/compute/test_compact_partition_incremental.py +2 -42
  166. deltacat/tests/compute/test_compact_partition_multiple_rounds.py +5 -46
  167. deltacat/tests/compute/test_compact_partition_params.py +3 -3
  168. deltacat/tests/compute/test_compact_partition_rebase.py +1 -46
  169. deltacat/tests/compute/test_compact_partition_rebase_then_incremental.py +5 -46
  170. deltacat/tests/compute/test_util_common.py +19 -12
  171. deltacat/tests/compute/test_util_create_table_deltas_repo.py +13 -22
  172. deltacat/tests/local_deltacat_storage/__init__.py +76 -103
  173. deltacat/tests/storage/__init__.py +0 -0
  174. deltacat/tests/storage/conftest.py +25 -0
  175. deltacat/tests/storage/main/__init__.py +0 -0
  176. deltacat/tests/storage/main/test_main_storage.py +1399 -0
  177. deltacat/tests/storage/model/__init__.py +0 -0
  178. deltacat/tests/storage/model/test_delete_parameters.py +21 -0
  179. deltacat/tests/storage/model/test_metafile_io.py +2535 -0
  180. deltacat/tests/storage/model/test_schema.py +308 -0
  181. deltacat/tests/storage/model/test_shard.py +22 -0
  182. deltacat/tests/storage/model/test_table_version.py +110 -0
  183. deltacat/tests/storage/model/test_transaction.py +308 -0
  184. deltacat/tests/storage/rivulet/__init__.py +0 -0
  185. deltacat/tests/storage/rivulet/conftest.py +149 -0
  186. deltacat/tests/storage/rivulet/fs/__init__.py +0 -0
  187. deltacat/tests/storage/rivulet/fs/test_file_location_provider.py +93 -0
  188. deltacat/tests/storage/rivulet/schema/__init__.py +0 -0
  189. deltacat/tests/storage/rivulet/schema/test_schema.py +241 -0
  190. deltacat/tests/storage/rivulet/test_dataset.py +406 -0
  191. deltacat/tests/storage/rivulet/test_manifest.py +67 -0
  192. deltacat/tests/storage/rivulet/test_sst_interval_tree.py +232 -0
  193. deltacat/tests/storage/rivulet/test_utils.py +122 -0
  194. deltacat/tests/storage/rivulet/writer/__init__.py +0 -0
  195. deltacat/tests/storage/rivulet/writer/test_dataset_write_then_read.py +341 -0
  196. deltacat/tests/storage/rivulet/writer/test_dataset_writer.py +79 -0
  197. deltacat/tests/storage/rivulet/writer/test_memtable_dataset_writer.py +75 -0
  198. deltacat/tests/test_deltacat_api.py +39 -0
  199. deltacat/tests/test_utils/filesystem.py +14 -0
  200. deltacat/tests/test_utils/message_pack_utils.py +54 -0
  201. deltacat/tests/test_utils/pyarrow.py +8 -15
  202. deltacat/tests/test_utils/storage.py +266 -3
  203. deltacat/tests/utils/test_daft.py +3 -3
  204. deltacat/tests/utils/test_pyarrow.py +0 -432
  205. deltacat/types/partial_download.py +1 -1
  206. deltacat/types/tables.py +1 -1
  207. deltacat/utils/export.py +59 -0
  208. deltacat/utils/filesystem.py +320 -0
  209. deltacat/utils/metafile_locator.py +73 -0
  210. deltacat/utils/pyarrow.py +36 -183
  211. deltacat-2.0.dist-info/METADATA +65 -0
  212. deltacat-2.0.dist-info/RECORD +347 -0
  213. deltacat/aws/redshift/__init__.py +0 -19
  214. deltacat/catalog/default_catalog_impl/__init__.py +0 -369
  215. deltacat/io/dataset.py +0 -73
  216. deltacat/io/read_api.py +0 -143
  217. deltacat/storage/model/delete_parameters.py +0 -40
  218. deltacat/storage/model/partition_spec.py +0 -71
  219. deltacat/tests/compute/compactor_v2/utils/test_content_type_params.py +0 -253
  220. deltacat/tests/compute/compactor_v2/utils/test_primary_key_index.py +0 -45
  221. deltacat-1.1.35.dist-info/METADATA +0 -64
  222. deltacat-1.1.35.dist-info/RECORD +0 -219
  223. /deltacat/{aws/redshift/model → benchmarking/data}/__init__.py +0 -0
  224. /deltacat/{io/aws → catalog/main}/__init__.py +0 -0
  225. /deltacat/{io/aws/redshift → compute/converter}/__init__.py +0 -0
  226. /deltacat/{tests/io → compute/converter/model}/__init__.py +0 -0
  227. /deltacat/tests/{io → _io}/test_cloudpickle_bug_fix.py +0 -0
  228. /deltacat/tests/{io → _io}/test_file_object_store.py +0 -0
  229. /deltacat/tests/{io → _io}/test_memcached_object_store.py +0 -0
  230. /deltacat/tests/{io → _io}/test_ray_plasma_object_store.py +0 -0
  231. /deltacat/tests/{io → _io}/test_redis_object_store.py +0 -0
  232. /deltacat/tests/{io → _io}/test_s3_object_store.py +0 -0
  233. {deltacat-1.1.35.dist-info → deltacat-2.0.dist-info}/LICENSE +0 -0
  234. {deltacat-1.1.35.dist-info → deltacat-2.0.dist-info}/WHEEL +0 -0
  235. {deltacat-1.1.35.dist-info → deltacat-2.0.dist-info}/top_level.txt +0 -0
@@ -0,0 +1,2535 @@
1
+ import os
2
+ from typing import List, Tuple
3
+
4
+ import time
5
+ import multiprocessing
6
+
7
+ import pyarrow as pa
8
+ import pytest
9
+
10
+ from deltacat import (
11
+ Schema,
12
+ Field,
13
+ PartitionScheme,
14
+ PartitionKey,
15
+ ContentEncoding,
16
+ ContentType,
17
+ SortScheme,
18
+ SortKey,
19
+ SortOrder,
20
+ NullOrder,
21
+ LifecycleState,
22
+ )
23
+ from deltacat.storage import (
24
+ BucketTransform,
25
+ BucketTransformParameters,
26
+ BucketingStrategy,
27
+ CommitState,
28
+ DeltaLocator,
29
+ Delta,
30
+ DeltaType,
31
+ EntryParams,
32
+ EntryType,
33
+ Manifest,
34
+ ManifestAuthor,
35
+ ManifestEntry,
36
+ ManifestMeta,
37
+ Namespace,
38
+ NamespaceLocator,
39
+ PartitionLocator,
40
+ Partition,
41
+ StreamLocator,
42
+ StreamFormat,
43
+ Stream,
44
+ Table,
45
+ TableLocator,
46
+ TableVersionLocator,
47
+ TableVersion,
48
+ Transaction,
49
+ TransactionOperation,
50
+ TransactionType,
51
+ TransactionOperationType,
52
+ TruncateTransform,
53
+ TruncateTransformParameters,
54
+ )
55
+ from deltacat.storage.model.metafile import (
56
+ Metafile,
57
+ MetafileRevisionInfo,
58
+ )
59
+ from deltacat.constants import TXN_DIR_NAME, SUCCESS_TXN_DIR_NAME, NANOS_PER_SEC
60
+ from deltacat.utils.filesystem import resolve_path_and_filesystem
61
+ from deltacat.tests.test_utils.storage import (
62
+ create_test_namespace,
63
+ create_test_table,
64
+ create_test_table_version,
65
+ create_test_stream,
66
+ create_test_partition,
67
+ create_test_delta,
68
+ )
69
+
70
+
71
+ def _commit_single_delta_table(temp_dir: str) -> List[Tuple[Metafile, Metafile, str]]:
72
+ namespace = create_test_namespace()
73
+ table = create_test_table()
74
+ table_version = create_test_table_version()
75
+ stream = create_test_stream()
76
+ partition = create_test_partition()
77
+ delta = create_test_delta()
78
+
79
+ meta_to_create = [
80
+ namespace,
81
+ table,
82
+ table_version,
83
+ stream,
84
+ partition,
85
+ delta,
86
+ ]
87
+ txn_operations = [
88
+ TransactionOperation.of(
89
+ operation_type=TransactionOperationType.CREATE,
90
+ dest_metafile=meta,
91
+ )
92
+ for meta in meta_to_create
93
+ ]
94
+ transaction = Transaction.of(
95
+ txn_type=TransactionType.APPEND,
96
+ txn_operations=txn_operations,
97
+ )
98
+ write_paths, txn_log_path = transaction.commit(temp_dir)
99
+ write_paths_copy = write_paths.copy()
100
+ assert os.path.exists(txn_log_path)
101
+ metafiles_created = [
102
+ Delta.read(write_paths.pop()),
103
+ Partition.read(write_paths.pop()),
104
+ Stream.read(write_paths.pop()),
105
+ TableVersion.read(write_paths.pop()),
106
+ Table.read(write_paths.pop()),
107
+ Namespace.read(write_paths.pop()),
108
+ ]
109
+ metafiles_created.reverse()
110
+ return list(zip(meta_to_create, metafiles_created, write_paths_copy))
111
+
112
+
113
+ def _commit_concurrent_transaction(
114
+ catalog_root: str,
115
+ transaction: Transaction,
116
+ ) -> None:
117
+ try:
118
+ return transaction.commit(catalog_root)
119
+ except (RuntimeError, ValueError) as e:
120
+ return e
121
+
122
+
123
+ class TestMetafileIO:
124
+ def test_txn_conflict_concurrent_multiprocess_table_create(self, temp_dir):
125
+ base_table_name = "test_table"
126
+ table_locator = TableLocator.at(
127
+ namespace=None,
128
+ table_name=base_table_name,
129
+ )
130
+ # given a transaction to create a table
131
+ table = Table.of(
132
+ locator=table_locator,
133
+ description="test table description",
134
+ )
135
+ transaction = Transaction.of(
136
+ txn_type=TransactionType.APPEND,
137
+ txn_operations=[
138
+ TransactionOperation.of(
139
+ operation_type=TransactionOperationType.CREATE,
140
+ dest_metafile=table,
141
+ )
142
+ ],
143
+ )
144
+ # when K rounds of N concurrent transaction commits try to create the
145
+ # same table
146
+ rounds = 25
147
+ concurrent_commit_count = multiprocessing.cpu_count()
148
+ with multiprocessing.Pool(processes=concurrent_commit_count) as pool:
149
+ for round_number in range(rounds):
150
+ table.locator.table_name = f"{base_table_name}_{round_number}"
151
+ futures = [
152
+ pool.apply_async(
153
+ _commit_concurrent_transaction, (temp_dir, transaction)
154
+ )
155
+ for _ in range(concurrent_commit_count)
156
+ ]
157
+ # expect all but one concurrent transaction to succeed each round
158
+ results = [future.get() for future in futures]
159
+ conflict_exception_count = 0
160
+ for result in results:
161
+ # TODO(pdames): Add new concurrent conflict exception types.
162
+ if isinstance(result, RuntimeError) or isinstance(
163
+ result, ValueError
164
+ ):
165
+ conflict_exception_count += 1
166
+ else:
167
+ write_paths, txn_log_path = result
168
+ deserialized_table = Table.read(write_paths.pop())
169
+ assert table.equivalent_to(deserialized_table)
170
+ assert conflict_exception_count == concurrent_commit_count - 1
171
+
172
+ def test_txn_dual_commit_fails(self, temp_dir):
173
+ namespace_locator = NamespaceLocator.of(namespace="test_namespace")
174
+ namespace = Namespace.of(locator=namespace_locator)
175
+ # given a transaction that creates a single namespace
176
+ transaction = Transaction.of(
177
+ txn_type=TransactionType.APPEND,
178
+ txn_operations=[
179
+ TransactionOperation.of(
180
+ operation_type=TransactionOperationType.CREATE,
181
+ dest_metafile=namespace,
182
+ )
183
+ ],
184
+ )
185
+ write_paths, txn_log_path = transaction.commit(temp_dir)
186
+ # when the transaction is committed,
187
+ # expect the namespace created to match the namespace given
188
+ deserialized_namespace = Namespace.read(write_paths.pop())
189
+ assert namespace.equivalent_to(deserialized_namespace)
190
+ # if we reread the transaction and commit it again,
191
+ reread_transaction = Transaction.read(txn_log_path)
192
+ # expect an exception to be raised
193
+ with pytest.raises(RuntimeError):
194
+ reread_transaction.commit(temp_dir)
195
+
196
+ def test_txn_bad_end_time_fails(self, temp_dir, mocker):
197
+ commit_results = _commit_single_delta_table(temp_dir)
198
+ for expected, actual, _ in commit_results:
199
+ assert expected.equivalent_to(actual)
200
+ # given a transaction with an ending timestamp set in the past
201
+ past_timestamp = time.time_ns() - NANOS_PER_SEC
202
+ mocker.patch(
203
+ "deltacat.storage.model.transaction.Transaction._parse_end_time",
204
+ return_value=past_timestamp,
205
+ )
206
+ original_delta: Delta = commit_results[5][1]
207
+ new_delta = Delta.update_for(original_delta)
208
+ txn_operations = [
209
+ TransactionOperation.of(
210
+ operation_type=TransactionOperationType.UPDATE,
211
+ dest_metafile=new_delta,
212
+ src_metafile=original_delta,
213
+ )
214
+ ]
215
+ transaction = Transaction.of(
216
+ txn_type=TransactionType.ALTER,
217
+ txn_operations=txn_operations,
218
+ )
219
+ # expect the bad timestamp to be detected and its commit to fail
220
+ with pytest.raises(RuntimeError):
221
+ transaction.commit(temp_dir)
222
+
223
+ def test_txn_conflict_concurrent_complete(self, temp_dir, mocker):
224
+ commit_results = _commit_single_delta_table(temp_dir)
225
+ for expected, actual, _ in commit_results:
226
+ assert expected.equivalent_to(actual)
227
+
228
+ # given an initial metafile revision of a committed delta
229
+ write_paths = [result[2] for result in commit_results]
230
+ orig_delta_write_path = write_paths[5]
231
+
232
+ # a new delta metafile revision written by a transaction that completed
233
+ # before seeing any concurrent conflicts
234
+ mri = MetafileRevisionInfo.parse(orig_delta_write_path)
235
+ mri.txn_id = "0000000000000_test-txn-id"
236
+ mri.txn_op_type = TransactionOperationType.UPDATE
237
+ mri.revision = mri.revision + 1
238
+ conflict_delta_write_path = mri.path
239
+ _, filesystem = resolve_path_and_filesystem(orig_delta_write_path)
240
+ with filesystem.open_output_stream(conflict_delta_write_path):
241
+ pass # Just create an empty conflicting metafile revision
242
+ txn_log_file_dir = os.path.join(
243
+ temp_dir,
244
+ TXN_DIR_NAME,
245
+ SUCCESS_TXN_DIR_NAME,
246
+ mri.txn_id,
247
+ )
248
+ filesystem.create_dir(txn_log_file_dir, recursive=True)
249
+ txn_log_file_path = os.path.join(
250
+ txn_log_file_dir,
251
+ str(time.time_ns()),
252
+ )
253
+ with filesystem.open_output_stream(txn_log_file_path):
254
+ pass # Just create an empty log to mark the txn as complete
255
+
256
+ # and a concurrent transaction that started before that transaction
257
+ # completed, writes the same delta metafile revision, then sees the
258
+ # conflict
259
+ past_timestamp = time.time_ns() - NANOS_PER_SEC
260
+ future_timestamp = 9999999999999
261
+ end_time_mock = mocker.patch(
262
+ "deltacat.storage.model.transaction.Transaction._parse_end_time",
263
+ )
264
+ end_time_mock.side_effect = (
265
+ lambda path: future_timestamp if mri.txn_id in path else past_timestamp
266
+ )
267
+ original_delta = Delta.read(orig_delta_write_path)
268
+ new_delta = Delta.update_for(original_delta)
269
+ txn_operations = [
270
+ TransactionOperation.of(
271
+ operation_type=TransactionOperationType.UPDATE,
272
+ dest_metafile=new_delta,
273
+ src_metafile=original_delta,
274
+ )
275
+ ]
276
+ transaction = Transaction.of(
277
+ txn_type=TransactionType.ALTER,
278
+ txn_operations=txn_operations,
279
+ )
280
+ # expect the commit to fail due to a concurrent modification error
281
+ with pytest.raises(RuntimeError):
282
+ transaction.commit(temp_dir)
283
+
284
+ def test_txn_conflict_concurrent_incomplete(self, temp_dir):
285
+ commit_results = _commit_single_delta_table(temp_dir)
286
+ for expected, actual, _ in commit_results:
287
+ assert expected.equivalent_to(actual)
288
+
289
+ # given an initial metafile revision of a committed delta
290
+ write_paths = [result[2] for result in commit_results]
291
+ orig_delta_write_path = write_paths[5]
292
+
293
+ # and a new delta metafile revision written by an incomplete transaction
294
+ mri = MetafileRevisionInfo.parse(orig_delta_write_path)
295
+ mri.txn_id = "9999999999999_test-txn-id"
296
+ mri.txn_op_type = TransactionOperationType.DELETE
297
+ mri.revision = mri.revision + 1
298
+ conflict_delta_write_path = mri.path
299
+ _, filesystem = resolve_path_and_filesystem(orig_delta_write_path)
300
+ with filesystem.open_output_stream(conflict_delta_write_path):
301
+ pass # Just create an empty conflicting metafile revision
302
+
303
+ # when a concurrent transaction tries to update the same delta
304
+ original_delta = Delta.read(orig_delta_write_path)
305
+ new_delta = Delta.update_for(original_delta)
306
+ transaction = Transaction.of(
307
+ txn_type=TransactionType.ALTER,
308
+ txn_operations=[
309
+ TransactionOperation.of(
310
+ operation_type=TransactionOperationType.UPDATE,
311
+ dest_metafile=new_delta,
312
+ src_metafile=original_delta,
313
+ )
314
+ ],
315
+ )
316
+ # expect the commit to fail due to a concurrent modification error
317
+ with pytest.raises(RuntimeError):
318
+ transaction.commit(temp_dir)
319
+ # expect a commit retry to also fail
320
+ with pytest.raises(RuntimeError):
321
+ transaction.commit(temp_dir)
322
+
323
+ def test_append_multiple_deltas(self, temp_dir):
324
+ commit_results = _commit_single_delta_table(temp_dir)
325
+ for expected, actual, _ in commit_results:
326
+ assert expected.equivalent_to(actual)
327
+ original_delta: Delta = commit_results[5][1]
328
+
329
+ # given a transaction containing several deltas to append
330
+ txn_operations = []
331
+
332
+ delta_append_count = 100
333
+ for i in range(delta_append_count):
334
+ new_delta = Delta.based_on(
335
+ original_delta,
336
+ new_id=str(int(original_delta.id) + i + 1),
337
+ )
338
+ txn_operations.append(
339
+ TransactionOperation.of(
340
+ operation_type=TransactionOperationType.CREATE,
341
+ dest_metafile=new_delta,
342
+ )
343
+ )
344
+ transaction = Transaction.of(
345
+ txn_type=TransactionType.APPEND,
346
+ txn_operations=txn_operations,
347
+ )
348
+ # when the transaction is committed
349
+ write_paths, txn_log_path = transaction.commit(temp_dir)
350
+ # expect all new deltas to be successfully written
351
+ assert len(write_paths) == delta_append_count
352
+ for i in range(len(write_paths)):
353
+ actual_delta = Delta.read(write_paths[i])
354
+ assert txn_operations[i].dest_metafile.equivalent_to(actual_delta)
355
+
356
+ def test_bad_update_mismatched_metafile_types(self, temp_dir):
357
+ commit_results = _commit_single_delta_table(temp_dir)
358
+ for expected, actual, _ in commit_results:
359
+ assert expected.equivalent_to(actual)
360
+ original_partition: Partition = commit_results[4][1]
361
+ original_delta: Delta = commit_results[5][1]
362
+
363
+ # given an attempt to replace a delta with a partition
364
+ replacement_partition: Partition = Partition.based_on(
365
+ original_partition,
366
+ new_id=original_partition.id + "_2",
367
+ )
368
+ # expect the transaction operation initialization to raise a value error
369
+ with pytest.raises(ValueError):
370
+ TransactionOperation.of(
371
+ operation_type=TransactionOperationType.UPDATE,
372
+ dest_metafile=replacement_partition,
373
+ src_metafile=original_delta,
374
+ )
375
+
376
+ def test_delete_delta(self, temp_dir):
377
+ commit_results = _commit_single_delta_table(temp_dir)
378
+ for expected, actual, _ in commit_results:
379
+ assert expected.equivalent_to(actual)
380
+ original_delta: Delta = commit_results[5][1]
381
+
382
+ # given a transaction containing a delta to delete
383
+ txn_operations = [
384
+ TransactionOperation.of(
385
+ operation_type=TransactionOperationType.DELETE,
386
+ dest_metafile=original_delta,
387
+ )
388
+ ]
389
+ transaction = Transaction.of(
390
+ txn_type=TransactionType.DELETE,
391
+ txn_operations=txn_operations,
392
+ )
393
+ # when the transaction is committed
394
+ write_paths, txn_log_path = transaction.commit(temp_dir)
395
+
396
+ # expect one new delete metafile to be written
397
+ assert len(write_paths) == 1
398
+ delete_write_path = write_paths[0]
399
+
400
+ # expect the delete metafile to contain the input txn op dest_metafile
401
+ assert TransactionOperationType.DELETE.value in delete_write_path
402
+ actual_delta = Delta.read(delete_write_path)
403
+ assert original_delta.equivalent_to(actual_delta)
404
+
405
+ # expect a subsequent replace of the deleted delta to fail
406
+ replacement_delta: Delta = Delta.based_on(
407
+ original_delta,
408
+ new_id=str(int(original_delta.id) + 1),
409
+ )
410
+ bad_txn_operations = [
411
+ TransactionOperation.of(
412
+ operation_type=TransactionOperationType.UPDATE,
413
+ dest_metafile=replacement_delta,
414
+ src_metafile=original_delta,
415
+ )
416
+ ]
417
+ transaction = Transaction.of(
418
+ txn_type=TransactionType.OVERWRITE,
419
+ txn_operations=bad_txn_operations,
420
+ )
421
+ with pytest.raises(ValueError):
422
+ transaction.commit(temp_dir)
423
+
424
+ # expect subsequent deletes of the deleted delta to fail
425
+ bad_txn_operations = [
426
+ TransactionOperation.of(
427
+ operation_type=TransactionOperationType.DELETE,
428
+ dest_metafile=original_delta,
429
+ )
430
+ ]
431
+ transaction = Transaction.of(
432
+ txn_type=TransactionType.DELETE,
433
+ txn_operations=bad_txn_operations,
434
+ )
435
+ with pytest.raises(ValueError):
436
+ transaction.commit(temp_dir)
437
+
438
+ def test_replace_delta(self, temp_dir):
439
+ commit_results = _commit_single_delta_table(temp_dir)
440
+ for expected, actual, _ in commit_results:
441
+ assert expected.equivalent_to(actual)
442
+ original_delta: Delta = commit_results[5][1]
443
+
444
+ # given a transaction containing a delta replacement
445
+ replacement_delta: Delta = Delta.based_on(
446
+ original_delta,
447
+ new_id=str(int(original_delta.id) + 1),
448
+ )
449
+
450
+ # expect the proposed replacement delta to be assigned a new ID
451
+ assert replacement_delta.id != original_delta.id
452
+
453
+ txn_operations = [
454
+ TransactionOperation.of(
455
+ operation_type=TransactionOperationType.UPDATE,
456
+ dest_metafile=replacement_delta,
457
+ src_metafile=original_delta,
458
+ )
459
+ ]
460
+ transaction = Transaction.of(
461
+ txn_type=TransactionType.OVERWRITE,
462
+ txn_operations=txn_operations,
463
+ )
464
+ # when the transaction is committed
465
+ write_paths, txn_log_path = transaction.commit(temp_dir)
466
+
467
+ # expect two new metafiles to be written
468
+ # (i.e., delete old delta, create replacement delta)
469
+ assert len(write_paths) == 2
470
+ delete_write_path = write_paths[0]
471
+ create_write_path = write_paths[1]
472
+
473
+ # expect the replacement delta to be successfully written and read
474
+ assert TransactionOperationType.CREATE.value in create_write_path
475
+ actual_delta = Delta.read(create_write_path)
476
+ assert replacement_delta.equivalent_to(actual_delta)
477
+
478
+ # expect the delete metafile to also contain the replacement delta
479
+ assert TransactionOperationType.DELETE.value in delete_write_path
480
+ actual_delta = Delta.read(delete_write_path)
481
+ assert replacement_delta.equivalent_to(actual_delta)
482
+
483
+ # expect a subsequent replace of the original delta to fail
484
+ bad_txn_operations = [
485
+ TransactionOperation.of(
486
+ operation_type=TransactionOperationType.UPDATE,
487
+ dest_metafile=replacement_delta,
488
+ src_metafile=original_delta,
489
+ )
490
+ ]
491
+ transaction = Transaction.of(
492
+ txn_type=TransactionType.OVERWRITE,
493
+ txn_operations=bad_txn_operations,
494
+ )
495
+ with pytest.raises(ValueError):
496
+ transaction.commit(temp_dir)
497
+
498
+ # expect deletes of the original delta to fail
499
+ bad_txn_operations = [
500
+ TransactionOperation.of(
501
+ operation_type=TransactionOperationType.DELETE,
502
+ dest_metafile=original_delta,
503
+ )
504
+ ]
505
+ transaction = Transaction.of(
506
+ txn_type=TransactionType.DELETE,
507
+ txn_operations=bad_txn_operations,
508
+ )
509
+ with pytest.raises(ValueError):
510
+ transaction.commit(temp_dir)
511
+
512
+ def test_delete_partition(self, temp_dir):
513
+ commit_results = _commit_single_delta_table(temp_dir)
514
+ for expected, actual, _ in commit_results:
515
+ assert expected.equivalent_to(actual)
516
+ original_partition: Partition = commit_results[4][1]
517
+
518
+ txn_operations = [
519
+ TransactionOperation.of(
520
+ operation_type=TransactionOperationType.DELETE,
521
+ dest_metafile=original_partition,
522
+ )
523
+ ]
524
+ transaction = Transaction.of(
525
+ txn_type=TransactionType.DELETE,
526
+ txn_operations=txn_operations,
527
+ )
528
+ # when the transaction is committed
529
+ write_paths, txn_log_path = transaction.commit(temp_dir)
530
+
531
+ # expect 1 new partition metafile to be written
532
+ assert len(write_paths) == 1
533
+ delete_write_path = write_paths[0]
534
+
535
+ # expect the delete metafile to contain the input txn op dest_metafile
536
+ assert TransactionOperationType.DELETE.value in delete_write_path
537
+ actual_partition = Partition.read(delete_write_path)
538
+ assert original_partition.equivalent_to(actual_partition)
539
+
540
+ # expect child metafiles in the deleted partition to remain readable and unchanged
541
+ child_metafiles_read_post_delete = [
542
+ Delta.read(commit_results[5][2]),
543
+ ]
544
+ original_child_metafiles_to_create = [
545
+ Delta(commit_results[5][0]),
546
+ ]
547
+ original_child_metafiles_created = [
548
+ Delta(commit_results[5][1]),
549
+ ]
550
+ for i in range(len(original_child_metafiles_to_create)):
551
+ assert child_metafiles_read_post_delete[i].equivalent_to(
552
+ original_child_metafiles_to_create[i]
553
+ )
554
+ assert child_metafiles_read_post_delete[i].equivalent_to(
555
+ original_child_metafiles_created[i]
556
+ )
557
+
558
+ # expect a subsequent replace of the deleted partition to fail
559
+ replacement_partition: Partition = Partition.based_on(
560
+ original_partition,
561
+ new_id=original_partition.id + "_2",
562
+ )
563
+ bad_txn_operations = [
564
+ TransactionOperation.of(
565
+ operation_type=TransactionOperationType.UPDATE,
566
+ dest_metafile=replacement_partition,
567
+ src_metafile=original_partition,
568
+ )
569
+ ]
570
+ transaction = Transaction.of(
571
+ txn_type=TransactionType.OVERWRITE,
572
+ txn_operations=bad_txn_operations,
573
+ )
574
+ with pytest.raises(ValueError):
575
+ transaction.commit(temp_dir)
576
+
577
+ # expect subsequent deletes of the deleted partition to fail
578
+ bad_txn_operations = [
579
+ TransactionOperation.of(
580
+ operation_type=TransactionOperationType.DELETE,
581
+ dest_metafile=original_partition,
582
+ )
583
+ ]
584
+ transaction = Transaction.of(
585
+ txn_type=TransactionType.DELETE,
586
+ txn_operations=bad_txn_operations,
587
+ )
588
+ with pytest.raises(ValueError):
589
+ transaction.commit(temp_dir)
590
+
591
+ # expect new child metafile creation under the deleted partition to fail
592
+ for metafile in original_child_metafiles_created:
593
+ bad_txn_operations = [
594
+ TransactionOperation.of(
595
+ operation_type=TransactionOperationType.CREATE,
596
+ dest_metafile=metafile,
597
+ )
598
+ ]
599
+ transaction = Transaction.of(
600
+ txn_type=TransactionType.APPEND,
601
+ txn_operations=bad_txn_operations,
602
+ )
603
+ with pytest.raises(ValueError):
604
+ transaction.commit(temp_dir)
605
+
606
+ def test_replace_partition(self, temp_dir):
607
+ commit_results = _commit_single_delta_table(temp_dir)
608
+ for expected, actual, _ in commit_results:
609
+ assert expected.equivalent_to(actual)
610
+ original_partition: Partition = commit_results[4][1]
611
+
612
+ # given a transaction containing a partition replacement
613
+ replacement_partition: Partition = Partition.based_on(
614
+ original_partition,
615
+ new_id=original_partition.id + "_2",
616
+ )
617
+
618
+ # expect the proposed replacement partition to be assigned a new ID
619
+ assert replacement_partition.id != original_partition.id
620
+
621
+ txn_operations = [
622
+ TransactionOperation.of(
623
+ operation_type=TransactionOperationType.UPDATE,
624
+ dest_metafile=replacement_partition,
625
+ src_metafile=original_partition,
626
+ )
627
+ ]
628
+ transaction = Transaction.of(
629
+ txn_type=TransactionType.OVERWRITE,
630
+ txn_operations=txn_operations,
631
+ )
632
+ # when the transaction is committed
633
+ write_paths, txn_log_path = transaction.commit(temp_dir)
634
+
635
+ # expect two new partition metafiles to be written
636
+ # (i.e., delete old partition, create replacement partition)
637
+ assert len(write_paths) == 2
638
+ delete_write_path = write_paths[0]
639
+ create_write_path = write_paths[1]
640
+
641
+ # expect the replacement partition to be successfully written and read
642
+ assert TransactionOperationType.CREATE.value in create_write_path
643
+ actual_partition = Partition.read(create_write_path)
644
+ assert replacement_partition.equivalent_to(actual_partition)
645
+
646
+ # expect the delete metafile to also contain the replacement partition
647
+ assert TransactionOperationType.DELETE.value in delete_write_path
648
+ actual_partition = Partition.read(delete_write_path)
649
+ assert replacement_partition.equivalent_to(actual_partition)
650
+
651
+ # expect old child metafiles for the replaced partition to remain readable
652
+ child_metafiles_read_post_replace = [
653
+ Delta.read(commit_results[5][2]),
654
+ ]
655
+ # expect old child metafiles read to share the same parent table name as
656
+ # the replacement partition, but have a different parent partition ID
657
+ for metafile in child_metafiles_read_post_replace:
658
+ assert (
659
+ metafile.table_name
660
+ == replacement_partition.table_name
661
+ == original_partition.table_name
662
+ )
663
+ ancestor_ids = metafile.ancestor_ids(catalog_root=temp_dir)
664
+ parent_partition_id = ancestor_ids[4]
665
+ assert parent_partition_id == original_partition.id
666
+
667
+ # expect original child metafiles to share the original parent partition ID
668
+ original_child_metafiles_to_create = [
669
+ Delta(commit_results[5][0]),
670
+ ]
671
+ original_child_metafiles_created = [
672
+ Delta(commit_results[5][1]),
673
+ ]
674
+ for i in range(len(original_child_metafiles_to_create)):
675
+ ancestor_ids = metafile.ancestor_ids(catalog_root=temp_dir)
676
+ parent_partition_id = ancestor_ids[4]
677
+ assert parent_partition_id == original_partition.id
678
+
679
+ # expect a subsequent replace of the original partition to fail
680
+ bad_txn_operations = [
681
+ TransactionOperation.of(
682
+ operation_type=TransactionOperationType.UPDATE,
683
+ dest_metafile=replacement_partition,
684
+ src_metafile=original_partition,
685
+ )
686
+ ]
687
+ transaction = Transaction.of(
688
+ txn_type=TransactionType.OVERWRITE,
689
+ txn_operations=bad_txn_operations,
690
+ )
691
+ with pytest.raises(ValueError):
692
+ transaction.commit(temp_dir)
693
+
694
+ # expect deletes of the original partition to fail
695
+ bad_txn_operations = [
696
+ TransactionOperation.of(
697
+ operation_type=TransactionOperationType.DELETE,
698
+ dest_metafile=original_partition,
699
+ )
700
+ ]
701
+ transaction = Transaction.of(
702
+ txn_type=TransactionType.DELETE,
703
+ txn_operations=bad_txn_operations,
704
+ )
705
+ with pytest.raises(ValueError):
706
+ transaction.commit(temp_dir)
707
+
708
+ # expect new child metafile creation under the old partition to fail
709
+ for metafile in original_child_metafiles_created:
710
+ bad_txn_operations = [
711
+ TransactionOperation.of(
712
+ operation_type=TransactionOperationType.CREATE,
713
+ dest_metafile=metafile,
714
+ )
715
+ ]
716
+ transaction = Transaction.of(
717
+ txn_type=TransactionType.APPEND,
718
+ txn_operations=bad_txn_operations,
719
+ )
720
+ with pytest.raises(ValueError):
721
+ transaction.commit(temp_dir)
722
+
723
+ def test_delete_stream(self, temp_dir):
724
+ commit_results = _commit_single_delta_table(temp_dir)
725
+ for expected, actual, _ in commit_results:
726
+ assert expected.equivalent_to(actual)
727
+ original_stream: Stream = commit_results[3][1]
728
+
729
+ txn_operations = [
730
+ TransactionOperation.of(
731
+ operation_type=TransactionOperationType.DELETE,
732
+ dest_metafile=original_stream,
733
+ )
734
+ ]
735
+ transaction = Transaction.of(
736
+ txn_type=TransactionType.DELETE,
737
+ txn_operations=txn_operations,
738
+ )
739
+ # when the transaction is committed
740
+ write_paths, txn_log_path = transaction.commit(temp_dir)
741
+
742
+ # expect 1 new stream metafile to be written
743
+ assert len(write_paths) == 1
744
+ delete_write_path = write_paths[0]
745
+
746
+ # expect the delete metafile to contain the input txn op dest_metafile
747
+ assert TransactionOperationType.DELETE.value in delete_write_path
748
+ actual_stream = Stream.read(delete_write_path)
749
+ assert original_stream == actual_stream
750
+
751
+ # expect child metafiles in the deleted stream to remain readable and unchanged
752
+ child_metafiles_read_post_delete = [
753
+ Delta.read(commit_results[5][2]),
754
+ Partition.read(commit_results[4][2]),
755
+ ]
756
+ original_child_metafiles_to_create = [
757
+ Delta(commit_results[5][0]),
758
+ Partition(commit_results[4][0]),
759
+ ]
760
+ original_child_metafiles_created = [
761
+ Delta(commit_results[5][1]),
762
+ Partition(commit_results[4][1]),
763
+ ]
764
+ for i in range(len(original_child_metafiles_to_create)):
765
+ assert child_metafiles_read_post_delete[i].equivalent_to(
766
+ original_child_metafiles_to_create[i]
767
+ )
768
+ assert child_metafiles_read_post_delete[i].equivalent_to(
769
+ original_child_metafiles_created[i]
770
+ )
771
+
772
+ # expect a subsequent replace of the deleted stream to fail
773
+ replacement_stream: Stream = Stream.based_on(
774
+ original_stream,
775
+ new_id=original_stream.id + "_2",
776
+ )
777
+ bad_txn_operations = [
778
+ TransactionOperation.of(
779
+ operation_type=TransactionOperationType.UPDATE,
780
+ dest_metafile=replacement_stream,
781
+ src_metafile=original_stream,
782
+ )
783
+ ]
784
+ transaction = Transaction.of(
785
+ txn_type=TransactionType.OVERWRITE,
786
+ txn_operations=bad_txn_operations,
787
+ )
788
+ with pytest.raises(ValueError):
789
+ transaction.commit(temp_dir)
790
+
791
+ # expect subsequent deletes of the deleted stream to fail
792
+ bad_txn_operations = [
793
+ TransactionOperation.of(
794
+ operation_type=TransactionOperationType.DELETE,
795
+ dest_metafile=original_stream,
796
+ )
797
+ ]
798
+ transaction = Transaction.of(
799
+ txn_type=TransactionType.DELETE,
800
+ txn_operations=bad_txn_operations,
801
+ )
802
+ with pytest.raises(ValueError):
803
+ transaction.commit(temp_dir)
804
+
805
+ # expect new child metafile creation under the deleted stream to fail
806
+ for metafile in original_child_metafiles_created:
807
+ bad_txn_operations = [
808
+ TransactionOperation.of(
809
+ operation_type=TransactionOperationType.CREATE,
810
+ dest_metafile=metafile,
811
+ )
812
+ ]
813
+ transaction = Transaction.of(
814
+ txn_type=TransactionType.APPEND,
815
+ txn_operations=bad_txn_operations,
816
+ )
817
+ with pytest.raises(ValueError):
818
+ transaction.commit(temp_dir)
819
+
820
+ def test_replace_stream(self, temp_dir):
821
+ commit_results = _commit_single_delta_table(temp_dir)
822
+ for expected, actual, _ in commit_results:
823
+ assert expected.equivalent_to(actual)
824
+ original_stream: Stream = commit_results[3][1]
825
+
826
+ # given a transaction containing a stream replacement
827
+ replacement_stream: Stream = Stream.based_on(
828
+ original_stream,
829
+ new_id=original_stream.id + "_2",
830
+ )
831
+
832
+ # expect the proposed replacement stream to be assigned a new ID
833
+ assert replacement_stream.id != original_stream.id
834
+
835
+ txn_operations = [
836
+ TransactionOperation.of(
837
+ operation_type=TransactionOperationType.UPDATE,
838
+ dest_metafile=replacement_stream,
839
+ src_metafile=original_stream,
840
+ )
841
+ ]
842
+ transaction = Transaction.of(
843
+ txn_type=TransactionType.OVERWRITE,
844
+ txn_operations=txn_operations,
845
+ )
846
+ # when the transaction is committed
847
+ write_paths, txn_log_path = transaction.commit(temp_dir)
848
+
849
+ # expect two new stream metafiles to be written
850
+ # (i.e., delete old stream, create replacement stream)
851
+ assert len(write_paths) == 2
852
+ delete_write_path = write_paths[0]
853
+ create_write_path = write_paths[1]
854
+
855
+ # expect the replacement stream to be successfully written and read
856
+ assert TransactionOperationType.CREATE.value in create_write_path
857
+ actual_stream = Stream.read(create_write_path)
858
+ assert replacement_stream.equivalent_to(actual_stream)
859
+
860
+ # expect the delete metafile to also contain the replacement stream
861
+ assert TransactionOperationType.DELETE.value in delete_write_path
862
+ actual_stream = Stream.read(delete_write_path)
863
+ assert replacement_stream.equivalent_to(actual_stream)
864
+
865
+ # expect old child metafiles for the replaced stream to remain readable
866
+ child_metafiles_read_post_replace = [
867
+ Delta.read(commit_results[5][2]),
868
+ Partition.read(commit_results[4][2]),
869
+ ]
870
+ # expect old child metafiles read to share the same parent table name as
871
+ # the replacement stream, but have a different parent stream ID
872
+ for metafile in child_metafiles_read_post_replace:
873
+ assert (
874
+ metafile.table_name
875
+ == replacement_stream.table_name
876
+ == original_stream.table_name
877
+ )
878
+ ancestor_ids = metafile.ancestor_ids(catalog_root=temp_dir)
879
+ parent_stream_id = ancestor_ids[3]
880
+ assert parent_stream_id == original_stream.id
881
+
882
+ # expect original child metafiles to share the original parent stream ID
883
+ original_child_metafiles_to_create = [
884
+ Delta(commit_results[5][0]),
885
+ Partition(commit_results[4][0]),
886
+ ]
887
+ original_child_metafiles_created = [
888
+ Delta(commit_results[5][1]),
889
+ Partition(commit_results[4][1]),
890
+ ]
891
+ for i in range(len(original_child_metafiles_to_create)):
892
+ ancestor_ids = metafile.ancestor_ids(catalog_root=temp_dir)
893
+ parent_stream_id = ancestor_ids[3]
894
+ assert parent_stream_id == original_stream.id
895
+
896
+ # expect a subsequent replace of the original stream to fail
897
+ bad_txn_operations = [
898
+ TransactionOperation.of(
899
+ operation_type=TransactionOperationType.UPDATE,
900
+ dest_metafile=replacement_stream,
901
+ src_metafile=original_stream,
902
+ )
903
+ ]
904
+ transaction = Transaction.of(
905
+ txn_type=TransactionType.OVERWRITE,
906
+ txn_operations=bad_txn_operations,
907
+ )
908
+ with pytest.raises(ValueError):
909
+ transaction.commit(temp_dir)
910
+
911
+ # expect deletes of the original stream to fail
912
+ bad_txn_operations = [
913
+ TransactionOperation.of(
914
+ operation_type=TransactionOperationType.DELETE,
915
+ dest_metafile=original_stream,
916
+ )
917
+ ]
918
+ transaction = Transaction.of(
919
+ txn_type=TransactionType.DELETE,
920
+ txn_operations=bad_txn_operations,
921
+ )
922
+ with pytest.raises(ValueError):
923
+ transaction.commit(temp_dir)
924
+
925
+ # expect new child metafile creation under the old stream to fail
926
+ for metafile in original_child_metafiles_created:
927
+ bad_txn_operations = [
928
+ TransactionOperation.of(
929
+ operation_type=TransactionOperationType.CREATE,
930
+ dest_metafile=metafile,
931
+ )
932
+ ]
933
+ transaction = Transaction.of(
934
+ txn_type=TransactionType.APPEND,
935
+ txn_operations=bad_txn_operations,
936
+ )
937
+ with pytest.raises(ValueError):
938
+ transaction.commit(temp_dir)
939
+
940
+ def test_delete_table_version(self, temp_dir):
941
+ commit_results = _commit_single_delta_table(temp_dir)
942
+ for expected, actual, _ in commit_results:
943
+ assert expected.equivalent_to(actual)
944
+ original_table_version: TableVersion = commit_results[2][1]
945
+
946
+ txn_operations = [
947
+ TransactionOperation.of(
948
+ operation_type=TransactionOperationType.DELETE,
949
+ dest_metafile=original_table_version,
950
+ )
951
+ ]
952
+ transaction = Transaction.of(
953
+ txn_type=TransactionType.DELETE,
954
+ txn_operations=txn_operations,
955
+ )
956
+ # when the transaction is committed
957
+ write_paths, txn_log_path = transaction.commit(temp_dir)
958
+
959
+ # expect 1 new table version metafile to be written
960
+ assert len(write_paths) == 1
961
+ delete_write_path = write_paths[0]
962
+
963
+ # expect the delete metafile to contain the input txn op dest_metafile
964
+ assert TransactionOperationType.DELETE.value in delete_write_path
965
+ actual_table_version = TableVersion.read(delete_write_path)
966
+ assert original_table_version.equivalent_to(actual_table_version)
967
+
968
+ # expect child metafiles in the deleted table version to remain readable and unchanged
969
+ child_metafiles_read_post_delete = [
970
+ Delta.read(commit_results[5][2]),
971
+ Partition.read(commit_results[4][2]),
972
+ Stream.read(commit_results[3][2]),
973
+ ]
974
+ original_child_metafiles_to_create = [
975
+ Delta(commit_results[5][0]),
976
+ Partition(commit_results[4][0]),
977
+ Stream(commit_results[3][0]),
978
+ ]
979
+ original_child_metafiles_created = [
980
+ Delta(commit_results[5][1]),
981
+ Partition(commit_results[4][1]),
982
+ Stream(commit_results[3][1]),
983
+ ]
984
+ for i in range(len(original_child_metafiles_to_create)):
985
+ assert child_metafiles_read_post_delete[i].equivalent_to(
986
+ original_child_metafiles_to_create[i]
987
+ )
988
+ assert child_metafiles_read_post_delete[i].equivalent_to(
989
+ original_child_metafiles_created[i]
990
+ )
991
+
992
+ # expect a subsequent replace of the deleted table version to fail
993
+ replacement_table_version: TableVersion = TableVersion.based_on(
994
+ original_table_version,
995
+ new_id=original_table_version.id + "0",
996
+ )
997
+ bad_txn_operations = [
998
+ TransactionOperation.of(
999
+ operation_type=TransactionOperationType.UPDATE,
1000
+ dest_metafile=replacement_table_version,
1001
+ src_metafile=original_table_version,
1002
+ )
1003
+ ]
1004
+ transaction = Transaction.of(
1005
+ txn_type=TransactionType.OVERWRITE,
1006
+ txn_operations=bad_txn_operations,
1007
+ )
1008
+ with pytest.raises(ValueError):
1009
+ transaction.commit(temp_dir)
1010
+
1011
+ # expect subsequent deletes of the deleted table version to fail
1012
+ bad_txn_operations = [
1013
+ TransactionOperation.of(
1014
+ operation_type=TransactionOperationType.DELETE,
1015
+ dest_metafile=original_table_version,
1016
+ )
1017
+ ]
1018
+ transaction = Transaction.of(
1019
+ txn_type=TransactionType.DELETE,
1020
+ txn_operations=bad_txn_operations,
1021
+ )
1022
+ with pytest.raises(ValueError):
1023
+ transaction.commit(temp_dir)
1024
+
1025
+ # expect new child metafile creation under the deleted table version to fail
1026
+ for metafile in original_child_metafiles_created:
1027
+ bad_txn_operations = [
1028
+ TransactionOperation.of(
1029
+ operation_type=TransactionOperationType.CREATE,
1030
+ dest_metafile=metafile,
1031
+ )
1032
+ ]
1033
+ transaction = Transaction.of(
1034
+ txn_type=TransactionType.APPEND,
1035
+ txn_operations=bad_txn_operations,
1036
+ )
1037
+ with pytest.raises(ValueError):
1038
+ transaction.commit(temp_dir)
1039
+
1040
+ def test_replace_table_version(self, temp_dir):
1041
+ commit_results = _commit_single_delta_table(temp_dir)
1042
+ for expected, actual, _ in commit_results:
1043
+ assert expected.equivalent_to(actual)
1044
+ original_table_version: TableVersion = commit_results[2][1]
1045
+
1046
+ # given a transaction containing a table version replacement
1047
+ replacement_table_version: TableVersion = TableVersion.based_on(
1048
+ original_table_version,
1049
+ new_id=original_table_version.id + "0",
1050
+ )
1051
+
1052
+ # expect the proposed replacement table version to be assigned a new ID
1053
+ assert replacement_table_version.id != original_table_version.id
1054
+
1055
+ txn_operations = [
1056
+ TransactionOperation.of(
1057
+ operation_type=TransactionOperationType.UPDATE,
1058
+ dest_metafile=replacement_table_version,
1059
+ src_metafile=original_table_version,
1060
+ )
1061
+ ]
1062
+ transaction = Transaction.of(
1063
+ txn_type=TransactionType.OVERWRITE,
1064
+ txn_operations=txn_operations,
1065
+ )
1066
+ # when the transaction is committed
1067
+ write_paths, txn_log_path = transaction.commit(temp_dir)
1068
+
1069
+ # expect two new table version metafiles to be written
1070
+ # (i.e., delete old table version, create replacement table version)
1071
+ assert len(write_paths) == 2
1072
+ delete_write_path = write_paths[0]
1073
+ create_write_path = write_paths[1]
1074
+
1075
+ # expect the replacement table version to be successfully written and read
1076
+ assert TransactionOperationType.CREATE.value in create_write_path
1077
+ actual_table_version = TableVersion.read(create_write_path)
1078
+ assert replacement_table_version.equivalent_to(actual_table_version)
1079
+
1080
+ # expect the delete metafile to also contain the replacement table version
1081
+ assert TransactionOperationType.DELETE.value in delete_write_path
1082
+ actual_table_version = TableVersion.read(delete_write_path)
1083
+ assert replacement_table_version.equivalent_to(actual_table_version)
1084
+
1085
+ # expect old child metafiles for the replaced table version to remain readable
1086
+ child_metafiles_read_post_replace = [
1087
+ Delta.read(commit_results[5][2]),
1088
+ Partition.read(commit_results[4][2]),
1089
+ Stream.read(commit_results[3][2]),
1090
+ ]
1091
+ # expect old child metafiles read to share the same parent table name as
1092
+ # the replacement table version, but have a different parent table
1093
+ # version ID
1094
+ for metafile in child_metafiles_read_post_replace:
1095
+ assert (
1096
+ metafile.table_name
1097
+ == replacement_table_version.table_name
1098
+ == original_table_version.table_name
1099
+ )
1100
+ ancestor_ids = metafile.ancestor_ids(catalog_root=temp_dir)
1101
+ parent_table_version_id = ancestor_ids[2]
1102
+ assert parent_table_version_id == original_table_version.id
1103
+
1104
+ # expect original child metafiles to share the original parent table version ID
1105
+ original_child_metafiles_to_create = [
1106
+ Delta(commit_results[5][0]),
1107
+ Partition(commit_results[4][0]),
1108
+ Stream(commit_results[3][0]),
1109
+ ]
1110
+ original_child_metafiles_created = [
1111
+ Delta(commit_results[5][1]),
1112
+ Partition(commit_results[4][1]),
1113
+ Stream(commit_results[3][1]),
1114
+ ]
1115
+ for i in range(len(original_child_metafiles_to_create)):
1116
+ ancestor_ids = metafile.ancestor_ids(catalog_root=temp_dir)
1117
+ parent_table_version_id = ancestor_ids[2]
1118
+ assert parent_table_version_id == original_table_version.id
1119
+
1120
+ # expect a subsequent replace of the original table version to fail
1121
+ bad_txn_operations = [
1122
+ TransactionOperation.of(
1123
+ operation_type=TransactionOperationType.UPDATE,
1124
+ dest_metafile=replacement_table_version,
1125
+ src_metafile=original_table_version,
1126
+ )
1127
+ ]
1128
+ transaction = Transaction.of(
1129
+ txn_type=TransactionType.OVERWRITE,
1130
+ txn_operations=bad_txn_operations,
1131
+ )
1132
+ with pytest.raises(ValueError):
1133
+ transaction.commit(temp_dir)
1134
+
1135
+ # expect deletes of the original table version to fail
1136
+ bad_txn_operations = [
1137
+ TransactionOperation.of(
1138
+ operation_type=TransactionOperationType.DELETE,
1139
+ dest_metafile=original_table_version,
1140
+ )
1141
+ ]
1142
+ transaction = Transaction.of(
1143
+ txn_type=TransactionType.DELETE,
1144
+ txn_operations=bad_txn_operations,
1145
+ )
1146
+ with pytest.raises(ValueError):
1147
+ transaction.commit(temp_dir)
1148
+
1149
+ # expect new child metafile creation under the old table version to fail
1150
+ for metafile in original_child_metafiles_created:
1151
+ bad_txn_operations = [
1152
+ TransactionOperation.of(
1153
+ operation_type=TransactionOperationType.CREATE,
1154
+ dest_metafile=metafile,
1155
+ )
1156
+ ]
1157
+ transaction = Transaction.of(
1158
+ txn_type=TransactionType.APPEND,
1159
+ txn_operations=bad_txn_operations,
1160
+ )
1161
+ with pytest.raises(ValueError):
1162
+ transaction.commit(temp_dir)
1163
+
1164
+ def test_delete_table(self, temp_dir):
1165
+ commit_results = _commit_single_delta_table(temp_dir)
1166
+ for expected, actual, _ in commit_results:
1167
+ assert expected.equivalent_to(actual)
1168
+ original_table: Table = commit_results[1][1]
1169
+
1170
+ txn_operations = [
1171
+ TransactionOperation.of(
1172
+ operation_type=TransactionOperationType.DELETE,
1173
+ dest_metafile=original_table,
1174
+ )
1175
+ ]
1176
+ transaction = Transaction.of(
1177
+ txn_type=TransactionType.DELETE,
1178
+ txn_operations=txn_operations,
1179
+ )
1180
+ # when the transaction is committed
1181
+ write_paths, txn_log_path = transaction.commit(temp_dir)
1182
+
1183
+ # expect 1 new table metafile to be written
1184
+ assert len(write_paths) == 1
1185
+ delete_write_path = write_paths[0]
1186
+
1187
+ # expect the delete metafile to contain the input txn op dest_metafile
1188
+ assert TransactionOperationType.DELETE.value in delete_write_path
1189
+ actual_table = Table.read(delete_write_path)
1190
+ assert original_table.equivalent_to(actual_table)
1191
+
1192
+ # expect child metafiles in the deleted table to remain readable and unchanged
1193
+ child_metafiles_read_post_delete = [
1194
+ Delta.read(commit_results[5][2]),
1195
+ Partition.read(commit_results[4][2]),
1196
+ Stream.read(commit_results[3][2]),
1197
+ TableVersion.read(commit_results[2][2]),
1198
+ ]
1199
+ original_child_metafiles_to_create = [
1200
+ Delta(commit_results[5][0]),
1201
+ Partition(commit_results[4][0]),
1202
+ Stream(commit_results[3][0]),
1203
+ TableVersion(commit_results[2][0]),
1204
+ ]
1205
+ original_child_metafiles_created = [
1206
+ Delta(commit_results[5][1]),
1207
+ Partition(commit_results[4][1]),
1208
+ Stream(commit_results[3][1]),
1209
+ TableVersion(commit_results[2][1]),
1210
+ ]
1211
+ for i in range(len(original_child_metafiles_to_create)):
1212
+ assert child_metafiles_read_post_delete[i].equivalent_to(
1213
+ original_child_metafiles_to_create[i]
1214
+ )
1215
+ assert child_metafiles_read_post_delete[i].equivalent_to(
1216
+ original_child_metafiles_created[i]
1217
+ )
1218
+
1219
+ # expect a subsequent replace of the deleted table to fail
1220
+ replacement_table: Table = Table.based_on(original_table)
1221
+ bad_txn_operations = [
1222
+ TransactionOperation.of(
1223
+ operation_type=TransactionOperationType.UPDATE,
1224
+ dest_metafile=replacement_table,
1225
+ src_metafile=original_table,
1226
+ )
1227
+ ]
1228
+ transaction = Transaction.of(
1229
+ txn_type=TransactionType.OVERWRITE,
1230
+ txn_operations=bad_txn_operations,
1231
+ )
1232
+ with pytest.raises(ValueError):
1233
+ transaction.commit(temp_dir)
1234
+
1235
+ # expect subsequent deletes of the deleted table to fail
1236
+ bad_txn_operations = [
1237
+ TransactionOperation.of(
1238
+ operation_type=TransactionOperationType.DELETE,
1239
+ dest_metafile=original_table,
1240
+ )
1241
+ ]
1242
+ transaction = Transaction.of(
1243
+ txn_type=TransactionType.DELETE,
1244
+ txn_operations=bad_txn_operations,
1245
+ )
1246
+ with pytest.raises(ValueError):
1247
+ transaction.commit(temp_dir)
1248
+
1249
+ # expect new child metafile creation under the deleted table to fail
1250
+ for metafile in original_child_metafiles_created:
1251
+ bad_txn_operations = [
1252
+ TransactionOperation.of(
1253
+ operation_type=TransactionOperationType.CREATE,
1254
+ dest_metafile=metafile,
1255
+ )
1256
+ ]
1257
+ transaction = Transaction.of(
1258
+ txn_type=TransactionType.APPEND,
1259
+ txn_operations=bad_txn_operations,
1260
+ )
1261
+ with pytest.raises(ValueError):
1262
+ transaction.commit(temp_dir)
1263
+
1264
+ def test_replace_table(self, temp_dir):
1265
+ commit_results = _commit_single_delta_table(temp_dir)
1266
+ for expected, actual, _ in commit_results:
1267
+ assert expected.equivalent_to(actual)
1268
+ original_table: Table = commit_results[1][1]
1269
+
1270
+ # given a transaction containing a table replacement
1271
+ replacement_table: Table = Table.based_on(original_table)
1272
+
1273
+ # expect the proposed replacement table to be assigned a new ID, but
1274
+ # continue to have the same name as the original table
1275
+ assert replacement_table.id != original_table.id
1276
+ assert replacement_table.table_name == original_table.table_name
1277
+
1278
+ txn_operations = [
1279
+ TransactionOperation.of(
1280
+ operation_type=TransactionOperationType.UPDATE,
1281
+ dest_metafile=replacement_table,
1282
+ src_metafile=original_table,
1283
+ )
1284
+ ]
1285
+ transaction = Transaction.of(
1286
+ txn_type=TransactionType.OVERWRITE,
1287
+ txn_operations=txn_operations,
1288
+ )
1289
+ # when the transaction is committed
1290
+ write_paths, txn_log_path = transaction.commit(temp_dir)
1291
+
1292
+ # expect two new table metafiles to be written
1293
+ # (i.e., delete old table, create replacement table)
1294
+ assert len(write_paths) == 2
1295
+ delete_write_path = write_paths[0]
1296
+ create_write_path = write_paths[1]
1297
+
1298
+ # expect the replacement table to be successfully written and read
1299
+ assert TransactionOperationType.CREATE.value in create_write_path
1300
+ actual_table = Table.read(create_write_path)
1301
+ assert replacement_table.equivalent_to(actual_table)
1302
+
1303
+ # expect the delete metafile to also contain the replacement table
1304
+ assert TransactionOperationType.DELETE.value in delete_write_path
1305
+ actual_table = Table.read(delete_write_path)
1306
+ assert replacement_table.equivalent_to(actual_table)
1307
+
1308
+ # expect old child metafiles for the replaced table to remain readable
1309
+ child_metafiles_read_post_replace = [
1310
+ Delta.read(commit_results[5][2]),
1311
+ Partition.read(commit_results[4][2]),
1312
+ Stream.read(commit_results[3][2]),
1313
+ TableVersion.read(commit_results[2][2]),
1314
+ ]
1315
+ # expect old child metafiles read to share the same parent table name as
1316
+ # the replacement table, but have a different parent table ID
1317
+ for metafile in child_metafiles_read_post_replace:
1318
+ assert (
1319
+ metafile.table_name
1320
+ == replacement_table.table_name
1321
+ == original_table.table_name
1322
+ )
1323
+ ancestor_ids = metafile.ancestor_ids(catalog_root=temp_dir)
1324
+ parent_table_id = ancestor_ids[1]
1325
+ assert parent_table_id == original_table.id
1326
+
1327
+ # expect original child metafiles to share the original parent table ID
1328
+ original_child_metafiles_to_create = [
1329
+ Delta(commit_results[5][0]),
1330
+ Partition(commit_results[4][0]),
1331
+ Stream(commit_results[3][0]),
1332
+ TableVersion(commit_results[2][0]),
1333
+ ]
1334
+ original_child_metafiles_created = [
1335
+ Delta(commit_results[5][1]),
1336
+ Partition(commit_results[4][1]),
1337
+ Stream(commit_results[3][1]),
1338
+ TableVersion(commit_results[2][1]),
1339
+ ]
1340
+ for i in range(len(original_child_metafiles_to_create)):
1341
+ ancestor_ids = metafile.ancestor_ids(catalog_root=temp_dir)
1342
+ parent_table_id = ancestor_ids[1]
1343
+ assert parent_table_id == original_table.id
1344
+
1345
+ # expect a subsequent table replace of the original table to fail
1346
+ bad_txn_operations = [
1347
+ TransactionOperation.of(
1348
+ operation_type=TransactionOperationType.UPDATE,
1349
+ dest_metafile=replacement_table,
1350
+ src_metafile=original_table,
1351
+ )
1352
+ ]
1353
+ transaction = Transaction.of(
1354
+ txn_type=TransactionType.OVERWRITE,
1355
+ txn_operations=bad_txn_operations,
1356
+ )
1357
+ with pytest.raises(ValueError):
1358
+ transaction.commit(temp_dir)
1359
+
1360
+ # expect table deletes of the original table to fail
1361
+ bad_txn_operations = [
1362
+ TransactionOperation.of(
1363
+ operation_type=TransactionOperationType.DELETE,
1364
+ dest_metafile=original_table,
1365
+ )
1366
+ ]
1367
+ transaction = Transaction.of(
1368
+ txn_type=TransactionType.DELETE,
1369
+ txn_operations=bad_txn_operations,
1370
+ )
1371
+ with pytest.raises(ValueError):
1372
+ transaction.commit(temp_dir)
1373
+
1374
+ # expect new child metafile creation under the old table to fail
1375
+ for metafile in original_child_metafiles_created:
1376
+ bad_txn_operations = [
1377
+ TransactionOperation.of(
1378
+ operation_type=TransactionOperationType.CREATE,
1379
+ dest_metafile=metafile,
1380
+ )
1381
+ ]
1382
+ transaction = Transaction.of(
1383
+ txn_type=TransactionType.APPEND,
1384
+ txn_operations=bad_txn_operations,
1385
+ )
1386
+ with pytest.raises(ValueError):
1387
+ transaction.commit(temp_dir)
1388
+
1389
+ def test_delete_namespace(self, temp_dir):
1390
+ commit_results = _commit_single_delta_table(temp_dir)
1391
+ for expected, actual, _ in commit_results:
1392
+ assert expected.equivalent_to(actual)
1393
+ original_namespace: Namespace = commit_results[0][1]
1394
+
1395
+ txn_operations = [
1396
+ TransactionOperation.of(
1397
+ operation_type=TransactionOperationType.DELETE,
1398
+ dest_metafile=original_namespace,
1399
+ )
1400
+ ]
1401
+ transaction = Transaction.of(
1402
+ txn_type=TransactionType.DELETE,
1403
+ txn_operations=txn_operations,
1404
+ )
1405
+ # when the transaction is committed
1406
+ write_paths, txn_log_path = transaction.commit(temp_dir)
1407
+
1408
+ # expect 1 new namespace metafile to be written
1409
+ assert len(write_paths) == 1
1410
+ delete_write_path = write_paths[0]
1411
+
1412
+ # expect the delete metafile to contain the input txn op dest_metafile
1413
+ assert TransactionOperationType.DELETE.value in delete_write_path
1414
+ actual_namespace = Namespace.read(delete_write_path)
1415
+ assert original_namespace.equivalent_to(actual_namespace)
1416
+
1417
+ # expect child metafiles in the deleted namespace to remain readable and unchanged
1418
+ child_metafiles_read_post_delete = [
1419
+ Delta.read(commit_results[5][2]),
1420
+ Partition.read(commit_results[4][2]),
1421
+ Stream.read(commit_results[3][2]),
1422
+ TableVersion.read(commit_results[2][2]),
1423
+ Table.read(commit_results[1][2]),
1424
+ ]
1425
+ original_child_metafiles_to_create = [
1426
+ Delta(commit_results[5][0]),
1427
+ Partition(commit_results[4][0]),
1428
+ Stream(commit_results[3][0]),
1429
+ TableVersion(commit_results[2][0]),
1430
+ Table(commit_results[1][0]),
1431
+ ]
1432
+ original_child_metafiles_created = [
1433
+ Delta(commit_results[5][1]),
1434
+ Partition(commit_results[4][1]),
1435
+ Stream(commit_results[3][1]),
1436
+ TableVersion(commit_results[2][1]),
1437
+ Table(commit_results[1][1]),
1438
+ ]
1439
+ for i in range(len(original_child_metafiles_to_create)):
1440
+ assert child_metafiles_read_post_delete[i].equivalent_to(
1441
+ original_child_metafiles_to_create[i]
1442
+ )
1443
+ assert child_metafiles_read_post_delete[i].equivalent_to(
1444
+ original_child_metafiles_created[i]
1445
+ )
1446
+
1447
+ # expect a subsequent replace of the deleted namespace to fail
1448
+ replacement_namespace: Namespace = Namespace.based_on(original_namespace)
1449
+ bad_txn_operations = [
1450
+ TransactionOperation.of(
1451
+ operation_type=TransactionOperationType.UPDATE,
1452
+ dest_metafile=replacement_namespace,
1453
+ src_metafile=original_namespace,
1454
+ )
1455
+ ]
1456
+ transaction = Transaction.of(
1457
+ txn_type=TransactionType.OVERWRITE,
1458
+ txn_operations=bad_txn_operations,
1459
+ )
1460
+ with pytest.raises(ValueError):
1461
+ transaction.commit(temp_dir)
1462
+
1463
+ # expect subsequent deletes of the deleted namespace to fail
1464
+ bad_txn_operations = [
1465
+ TransactionOperation.of(
1466
+ operation_type=TransactionOperationType.DELETE,
1467
+ dest_metafile=original_namespace,
1468
+ )
1469
+ ]
1470
+ transaction = Transaction.of(
1471
+ txn_type=TransactionType.DELETE,
1472
+ txn_operations=bad_txn_operations,
1473
+ )
1474
+ with pytest.raises(ValueError):
1475
+ transaction.commit(temp_dir)
1476
+
1477
+ # expect new child metafile creation under the deleted namespace to fail
1478
+ for metafile in original_child_metafiles_created:
1479
+ bad_txn_operations = [
1480
+ TransactionOperation.of(
1481
+ operation_type=TransactionOperationType.CREATE,
1482
+ dest_metafile=metafile,
1483
+ )
1484
+ ]
1485
+ transaction = Transaction.of(
1486
+ txn_type=TransactionType.APPEND,
1487
+ txn_operations=bad_txn_operations,
1488
+ )
1489
+ with pytest.raises(ValueError):
1490
+ transaction.commit(temp_dir)
1491
+
1492
+ def test_replace_namespace(self, temp_dir):
1493
+ commit_results = _commit_single_delta_table(temp_dir)
1494
+ for expected, actual, _ in commit_results:
1495
+ assert expected.equivalent_to(actual)
1496
+ original_namespace: Namespace = commit_results[0][1]
1497
+
1498
+ # given a transaction containing a namespace replacement
1499
+ replacement_namespace: Namespace = Namespace.based_on(original_namespace)
1500
+
1501
+ # expect the proposed replacement namespace to be assigned a new ID, but
1502
+ # continue to have the same name as the original namespace
1503
+ assert replacement_namespace.id != original_namespace.id
1504
+ assert replacement_namespace.namespace == original_namespace.namespace
1505
+
1506
+ txn_operations = [
1507
+ TransactionOperation.of(
1508
+ operation_type=TransactionOperationType.UPDATE,
1509
+ dest_metafile=replacement_namespace,
1510
+ src_metafile=original_namespace,
1511
+ )
1512
+ ]
1513
+ transaction = Transaction.of(
1514
+ txn_type=TransactionType.OVERWRITE,
1515
+ txn_operations=txn_operations,
1516
+ )
1517
+ # when the transaction is committed
1518
+ write_paths, txn_log_path = transaction.commit(temp_dir)
1519
+
1520
+ # expect two new namespace metafiles to be written
1521
+ # (i.e., delete old namespace, create replacement namespace)
1522
+ assert len(write_paths) == 2
1523
+ delete_write_path = write_paths[0]
1524
+ create_write_path = write_paths[1]
1525
+
1526
+ # expect the replacement namespace to be successfully written and read
1527
+ assert TransactionOperationType.CREATE.value in create_write_path
1528
+ actual_namespace = Namespace.read(create_write_path)
1529
+ assert replacement_namespace.equivalent_to(actual_namespace)
1530
+
1531
+ # expect the delete metafile to also contain the replacement namespace
1532
+ assert TransactionOperationType.DELETE.value in delete_write_path
1533
+ actual_namespace = Namespace.read(delete_write_path)
1534
+ assert replacement_namespace.equivalent_to(actual_namespace)
1535
+
1536
+ # expect old child metafiles for the replaced namespace to remain readable
1537
+ child_metafiles_read_post_replace = [
1538
+ Delta.read(commit_results[5][2]),
1539
+ Partition.read(commit_results[4][2]),
1540
+ Stream.read(commit_results[3][2]),
1541
+ TableVersion.read(commit_results[2][2]),
1542
+ Table.read(commit_results[1][2]),
1543
+ ]
1544
+ # expect old child metafiles read to share the same parent namespace name as
1545
+ # the replacement namespace, but have a different parent namespace ID
1546
+ for metafile in child_metafiles_read_post_replace:
1547
+ assert (
1548
+ metafile.namespace
1549
+ == replacement_namespace.namespace
1550
+ == original_namespace.namespace
1551
+ )
1552
+ ancestor_ids = metafile.ancestor_ids(catalog_root=temp_dir)
1553
+ parent_namespace_id = ancestor_ids[0]
1554
+ assert parent_namespace_id == original_namespace.id
1555
+
1556
+ # expect original child metafiles to share the original parent namespace ID
1557
+ original_child_metafiles_to_create = [
1558
+ Delta(commit_results[5][0]),
1559
+ Partition(commit_results[4][0]),
1560
+ Stream(commit_results[3][0]),
1561
+ TableVersion(commit_results[2][0]),
1562
+ Table(commit_results[1][0]),
1563
+ ]
1564
+ original_child_metafiles_created = [
1565
+ Delta(commit_results[5][1]),
1566
+ Partition(commit_results[4][1]),
1567
+ Stream(commit_results[3][1]),
1568
+ TableVersion(commit_results[2][1]),
1569
+ Table(commit_results[1][1]),
1570
+ ]
1571
+ for i in range(len(original_child_metafiles_to_create)):
1572
+ ancestor_ids = metafile.ancestor_ids(catalog_root=temp_dir)
1573
+ parent_namespace_id = ancestor_ids[0]
1574
+ assert parent_namespace_id == original_namespace.id
1575
+
1576
+ # expect a subsequent namespace replace of the original namespace to fail
1577
+ bad_txn_operations = [
1578
+ TransactionOperation.of(
1579
+ operation_type=TransactionOperationType.UPDATE,
1580
+ dest_metafile=replacement_namespace,
1581
+ src_metafile=original_namespace,
1582
+ )
1583
+ ]
1584
+ transaction = Transaction.of(
1585
+ txn_type=TransactionType.OVERWRITE,
1586
+ txn_operations=bad_txn_operations,
1587
+ )
1588
+ with pytest.raises(ValueError):
1589
+ transaction.commit(temp_dir)
1590
+
1591
+ # expect namespace deletes of the original namespace to fail
1592
+ bad_txn_operations = [
1593
+ TransactionOperation.of(
1594
+ operation_type=TransactionOperationType.DELETE,
1595
+ dest_metafile=original_namespace,
1596
+ )
1597
+ ]
1598
+ transaction = Transaction.of(
1599
+ txn_type=TransactionType.DELETE,
1600
+ txn_operations=bad_txn_operations,
1601
+ )
1602
+ with pytest.raises(ValueError):
1603
+ transaction.commit(temp_dir)
1604
+
1605
+ # expect new child metafile creation under the old namespace to fail
1606
+ for metafile in original_child_metafiles_created:
1607
+ bad_txn_operations = [
1608
+ TransactionOperation.of(
1609
+ operation_type=TransactionOperationType.CREATE,
1610
+ dest_metafile=metafile,
1611
+ )
1612
+ ]
1613
+ transaction = Transaction.of(
1614
+ txn_type=TransactionType.APPEND,
1615
+ txn_operations=bad_txn_operations,
1616
+ )
1617
+ with pytest.raises(ValueError):
1618
+ transaction.commit(temp_dir)
1619
+
1620
+ def test_create_stream_bad_order_txn_op_chaining(self, temp_dir):
1621
+ commit_results = _commit_single_delta_table(temp_dir)
1622
+ for expected, actual, _ in commit_results:
1623
+ assert expected.equivalent_to(actual)
1624
+ # given a transaction containing:
1625
+
1626
+ # 1. a new table version in an existing table
1627
+ original_table_version_created = TableVersion(commit_results[2][1])
1628
+ new_table_version: TableVersion = TableVersion.based_on(
1629
+ other=original_table_version_created,
1630
+ new_id=original_table_version_created.id + "0",
1631
+ )
1632
+ # 2. a new stream in the new table version
1633
+ original_stream_created = Stream(commit_results[3][1])
1634
+ new_stream: Stream = Stream.based_on(
1635
+ other=original_stream_created,
1636
+ new_id="test_stream_id",
1637
+ )
1638
+ new_stream.table_version_locator.table_version = new_table_version.table_version
1639
+
1640
+ # 3. ordered transaction operations that try to put the new stream
1641
+ # in the new table version before it is created
1642
+ txn_operations = [
1643
+ TransactionOperation.of(
1644
+ TransactionOperationType.CREATE,
1645
+ new_stream,
1646
+ ),
1647
+ TransactionOperation.of(
1648
+ TransactionOperationType.CREATE,
1649
+ new_table_version,
1650
+ ),
1651
+ ]
1652
+ transaction = Transaction.of(
1653
+ txn_type=TransactionType.APPEND,
1654
+ txn_operations=txn_operations,
1655
+ )
1656
+ # when the transaction is committed,
1657
+ # expect stream creation to fail
1658
+ with pytest.raises(ValueError):
1659
+ transaction.commit(temp_dir)
1660
+ # when a transaction with the operations reversed is committed,
1661
+ transaction = Transaction.of(
1662
+ txn_type=TransactionType.APPEND,
1663
+ txn_operations=list(reversed(txn_operations)),
1664
+ )
1665
+ # expect table version and stream creation to succeed
1666
+ write_paths, txn_log_path = transaction.commit(temp_dir)
1667
+ assert len(write_paths) == 2
1668
+
1669
+ def test_table_rename_bad_order_txn_op_chaining(self, temp_dir):
1670
+ commit_results = _commit_single_delta_table(temp_dir)
1671
+ for expected, actual, _ in commit_results:
1672
+ assert expected.equivalent_to(actual)
1673
+ original_table: Table = commit_results[1][1]
1674
+ # given a transaction containing:
1675
+ # 1. a table rename
1676
+ renamed_table: Table = Table.update_for(original_table)
1677
+ renamed_table.locator = TableLocator.at(
1678
+ namespace="test_namespace",
1679
+ table_name="test_table_renamed",
1680
+ )
1681
+ # 2. a new table version in a renamed table
1682
+ original_table_version_created = TableVersion(commit_results[2][1])
1683
+ new_table_version_to_create: TableVersion = TableVersion.based_on(
1684
+ other=original_table_version_created,
1685
+ new_id=original_table_version_created.id + "0",
1686
+ )
1687
+ new_table_version_to_create.table_locator.table_name = renamed_table.table_name
1688
+ # 3. ordered transaction operations that try to put the new table
1689
+ # version in the renamed table before the table is renamed
1690
+ txn_operations = [
1691
+ TransactionOperation.of(
1692
+ TransactionOperationType.CREATE,
1693
+ new_table_version_to_create,
1694
+ ),
1695
+ TransactionOperation.of(
1696
+ TransactionOperationType.UPDATE,
1697
+ renamed_table,
1698
+ original_table,
1699
+ ),
1700
+ ]
1701
+ transaction = Transaction.of(
1702
+ txn_type=TransactionType.ALTER,
1703
+ txn_operations=txn_operations,
1704
+ )
1705
+ # when the transaction is committed,
1706
+ # expect the transaction to fail due to incorrect operation order
1707
+ with pytest.raises(ValueError):
1708
+ transaction.commit(temp_dir)
1709
+ # when a transaction with the operations reversed is committed,
1710
+ transaction = Transaction.of(
1711
+ txn_type=TransactionType.ALTER,
1712
+ txn_operations=list(reversed(txn_operations)),
1713
+ )
1714
+ # expect table and table version creation to succeed
1715
+ write_paths, txn_log_path = transaction.commit(temp_dir)
1716
+ assert len(write_paths) == 2
1717
+
1718
+ def test_create_duplicate_namespace(self, temp_dir):
1719
+ namespace_locator = NamespaceLocator.of(namespace="test_namespace")
1720
+ namespace = Namespace.of(locator=namespace_locator)
1721
+ # given serial transaction that try to create two namespaces with
1722
+ # the same name
1723
+ transaction = Transaction.of(
1724
+ txn_type=TransactionType.APPEND,
1725
+ txn_operations=[
1726
+ TransactionOperation.of(
1727
+ TransactionOperationType.CREATE,
1728
+ namespace,
1729
+ ),
1730
+ ],
1731
+ )
1732
+ # expect the first transaction to be successfully committed
1733
+ write_paths, txn_log_path = transaction.commit(temp_dir)
1734
+ deserialized_namespace = Namespace.read(write_paths.pop())
1735
+ assert namespace.equivalent_to(deserialized_namespace)
1736
+ # but expect the second transaction to fail
1737
+ with pytest.raises(ValueError):
1738
+ transaction.commit(temp_dir)
1739
+
1740
+ def test_create_duplicate_namespace_txn_op_chaining(self, temp_dir):
1741
+ namespace_locator = NamespaceLocator.of(namespace="test_namespace")
1742
+ namespace = Namespace.of(locator=namespace_locator)
1743
+ # given a transaction that tries to create the same namespace twice
1744
+ transaction = Transaction.of(
1745
+ txn_type=TransactionType.APPEND,
1746
+ txn_operations=[
1747
+ TransactionOperation.of(
1748
+ TransactionOperationType.CREATE,
1749
+ namespace,
1750
+ ),
1751
+ TransactionOperation.of(
1752
+ TransactionOperationType.CREATE,
1753
+ namespace,
1754
+ ),
1755
+ ],
1756
+ )
1757
+ # when the transaction is committed,
1758
+ # expect duplicate namespace creation to fail
1759
+ with pytest.raises(ValueError):
1760
+ transaction.commit(temp_dir)
1761
+
1762
+ def test_create_stream_in_missing_table_version(self, temp_dir):
1763
+ commit_results = _commit_single_delta_table(temp_dir)
1764
+ for expected, actual, _ in commit_results:
1765
+ assert expected.equivalent_to(actual)
1766
+ # given a transaction that tries to create a single stream
1767
+ # in a table version that doesn't exist
1768
+ original_stream_created = Stream(commit_results[3][1])
1769
+ new_stream: Stream = Stream.based_on(
1770
+ other=original_stream_created,
1771
+ new_id="test_stream_id",
1772
+ )
1773
+ new_stream.table_version_locator.table_version = "missing_table_version.0"
1774
+ transaction = Transaction.of(
1775
+ txn_type=TransactionType.APPEND,
1776
+ txn_operations=[
1777
+ TransactionOperation.of(
1778
+ TransactionOperationType.CREATE,
1779
+ new_stream,
1780
+ )
1781
+ ],
1782
+ )
1783
+ # when the transaction is committed,
1784
+ # expect stream creation to fail
1785
+ with pytest.raises(ValueError):
1786
+ transaction.commit(temp_dir)
1787
+
1788
+ def test_create_table_version_in_missing_namespace(self, temp_dir):
1789
+ commit_results = _commit_single_delta_table(temp_dir)
1790
+ for expected, actual, _ in commit_results:
1791
+ assert expected.equivalent_to(actual)
1792
+ # given a transaction that tries to create a single table version
1793
+ # in a namespace that doesn't exist
1794
+ original_table_version_created = TableVersion(commit_results[2][1])
1795
+ new_table_version: TableVersion = TableVersion.based_on(
1796
+ other=original_table_version_created,
1797
+ new_id="test_table_version.1",
1798
+ )
1799
+ new_table_version.namespace_locator.namespace = "missing_namespace"
1800
+ transaction = Transaction.of(
1801
+ txn_type=TransactionType.APPEND,
1802
+ txn_operations=[
1803
+ TransactionOperation.of(
1804
+ TransactionOperationType.CREATE,
1805
+ new_table_version,
1806
+ )
1807
+ ],
1808
+ )
1809
+ # when the transaction is committed,
1810
+ # expect table version creation to fail
1811
+ with pytest.raises(ValueError):
1812
+ transaction.commit(temp_dir)
1813
+
1814
+ def test_create_table_version_in_missing_table(self, temp_dir):
1815
+ commit_results = _commit_single_delta_table(temp_dir)
1816
+ for expected, actual, _ in commit_results:
1817
+ assert expected.equivalent_to(actual)
1818
+ # given a transaction that tries to create a single table version
1819
+ # in a table that doesn't exist
1820
+ original_table_version_created = TableVersion(commit_results[2][1])
1821
+ new_table_version: TableVersion = TableVersion.based_on(
1822
+ other=original_table_version_created,
1823
+ new_id="test_table_version.1",
1824
+ )
1825
+ new_table_version.table_locator.table_name = "missing_table"
1826
+ transaction = Transaction.of(
1827
+ txn_type=TransactionType.APPEND,
1828
+ txn_operations=[
1829
+ TransactionOperation.of(
1830
+ TransactionOperationType.CREATE,
1831
+ new_table_version,
1832
+ )
1833
+ ],
1834
+ )
1835
+ # when the transaction is committed,
1836
+ # expect table version creation to fail
1837
+ with pytest.raises(ValueError):
1838
+ transaction.commit(temp_dir)
1839
+
1840
+ def test_create_table_in_missing_namespace(self, temp_dir):
1841
+ table_locator = TableLocator.at(
1842
+ namespace="missing_namespace",
1843
+ table_name="test_table",
1844
+ )
1845
+ table = Table.of(
1846
+ locator=table_locator,
1847
+ description="test table description",
1848
+ )
1849
+ # given a transaction that tries to create a single table in a
1850
+ # namespace that doesn't exist
1851
+ transaction = Transaction.of(
1852
+ txn_type=TransactionType.APPEND,
1853
+ txn_operations=[
1854
+ TransactionOperation.of(
1855
+ TransactionOperationType.CREATE,
1856
+ table,
1857
+ )
1858
+ ],
1859
+ )
1860
+ # when the transaction is committed,
1861
+ # expect table creation to fail
1862
+ with pytest.raises(ValueError):
1863
+ transaction.commit(temp_dir)
1864
+
1865
+ def test_rename_table_txn_op_chaining(self, temp_dir):
1866
+ commit_results = _commit_single_delta_table(temp_dir)
1867
+ for expected, actual, _ in commit_results:
1868
+ assert expected.equivalent_to(actual)
1869
+ original_table: Table = commit_results[1][1]
1870
+ # given a transaction containing:
1871
+ # 1. a table rename
1872
+ renamed_table: Table = Table.update_for(original_table)
1873
+ renamed_table.locator = TableLocator.at(
1874
+ namespace="test_namespace",
1875
+ table_name="test_table_renamed",
1876
+ )
1877
+ original_delta_created = Delta(commit_results[5][1])
1878
+ original_partition_created = Partition(commit_results[4][1])
1879
+ original_stream_created = Stream(commit_results[3][1])
1880
+ original_table_version_created = TableVersion(commit_results[2][1])
1881
+ # 2. a new table version in the renamed table
1882
+ new_table_version_to_create: TableVersion = TableVersion.based_on(
1883
+ other=original_table_version_created,
1884
+ new_id=original_table_version_created.table_version + "0",
1885
+ )
1886
+ new_table_version_to_create.table_locator.table_name = renamed_table.table_name
1887
+ # 3. a new stream in the new table version in the renamed table
1888
+ new_stream_to_create: Stream = Stream.based_on(
1889
+ other=original_stream_created,
1890
+ new_id=original_stream_created.stream_id + "_2",
1891
+ )
1892
+ new_stream_to_create.locator.table_version_locator = (
1893
+ new_table_version_to_create.locator
1894
+ )
1895
+ # 4. a new partition in the new stream in the new table version
1896
+ # in the renamed table
1897
+ new_partition_to_create: Partition = Partition.based_on(
1898
+ other=original_partition_created,
1899
+ new_id=original_partition_created.partition_id + "_2",
1900
+ )
1901
+ new_partition_to_create.locator.stream_locator = new_stream_to_create.locator
1902
+ # 5. a new delta in the new partition in the new stream in the new
1903
+ # table version in the renamed table
1904
+ new_delta_to_create = Delta.based_on(
1905
+ other=original_delta_created,
1906
+ new_id="2",
1907
+ )
1908
+ new_delta_to_create.locator.partition_locator = new_partition_to_create.locator
1909
+ # 6. ordered transaction operations that ensure all prior
1910
+ # dependencies are satisfied
1911
+ txn_operations = [
1912
+ TransactionOperation.of(
1913
+ operation_type=TransactionOperationType.UPDATE,
1914
+ dest_metafile=renamed_table,
1915
+ src_metafile=original_table,
1916
+ ),
1917
+ TransactionOperation.of(
1918
+ operation_type=TransactionOperationType.CREATE,
1919
+ dest_metafile=new_table_version_to_create,
1920
+ ),
1921
+ TransactionOperation.of(
1922
+ operation_type=TransactionOperationType.CREATE,
1923
+ dest_metafile=new_stream_to_create,
1924
+ ),
1925
+ TransactionOperation.of(
1926
+ operation_type=TransactionOperationType.CREATE,
1927
+ dest_metafile=new_partition_to_create,
1928
+ ),
1929
+ TransactionOperation.of(
1930
+ operation_type=TransactionOperationType.CREATE,
1931
+ dest_metafile=new_delta_to_create,
1932
+ ),
1933
+ ]
1934
+ transaction = Transaction.of(
1935
+ txn_type=TransactionType.ALTER,
1936
+ txn_operations=txn_operations,
1937
+ )
1938
+ # when the transaction is committed
1939
+ write_paths, txn_log_path = transaction.commit(temp_dir)
1940
+
1941
+ # expect the transaction to successfully create 5 new metafiles
1942
+ assert len(write_paths) == 5
1943
+
1944
+ # expect the table to be successfully renamed
1945
+ actual_table = Table.read(write_paths[0])
1946
+ assert renamed_table.equivalent_to(actual_table)
1947
+
1948
+ # expect the new table version in the renamed table to be
1949
+ # successfully created
1950
+ actual_table_version = TableVersion.read(write_paths[1])
1951
+ assert new_table_version_to_create.equivalent_to(actual_table_version)
1952
+
1953
+ # expect the new stream in the new table version in the renamed
1954
+ # table to be successfully created
1955
+ actual_stream = Stream.read(write_paths[2])
1956
+ assert new_stream_to_create.equivalent_to(actual_stream)
1957
+
1958
+ # expect the new partition in the new stream in the new table
1959
+ # version in the renamed table to be successfully created
1960
+ actual_partition = Partition.read(write_paths[3])
1961
+ assert new_partition_to_create.equivalent_to(actual_partition)
1962
+
1963
+ # expect the new delta in the new partition in the new stream in
1964
+ # the new table version in the renamed table to be successfully
1965
+ # created
1966
+ actual_delta = Delta.read(write_paths[4])
1967
+ assert new_delta_to_create.equivalent_to(actual_delta)
1968
+
1969
+ def test_rename_table(self, temp_dir):
1970
+ commit_results = _commit_single_delta_table(temp_dir)
1971
+ for expected, actual, _ in commit_results:
1972
+ assert expected.equivalent_to(actual)
1973
+ original_table: Table = commit_results[1][1]
1974
+
1975
+ # given a transaction containing a table rename
1976
+ renamed_table: Table = Table.update_for(original_table)
1977
+ renamed_table.locator = TableLocator.at(
1978
+ namespace="test_namespace",
1979
+ table_name="test_table_renamed",
1980
+ )
1981
+ txn_operations = [
1982
+ TransactionOperation.of(
1983
+ operation_type=TransactionOperationType.UPDATE,
1984
+ dest_metafile=renamed_table,
1985
+ src_metafile=original_table,
1986
+ )
1987
+ ]
1988
+ transaction = Transaction.of(
1989
+ txn_type=TransactionType.ALTER,
1990
+ txn_operations=txn_operations,
1991
+ )
1992
+ # when the transaction is committed
1993
+ write_paths, txn_log_path = transaction.commit(temp_dir)
1994
+
1995
+ # expect only one new table metafile to be written
1996
+ assert len(write_paths) == 1
1997
+
1998
+ # expect the table to be successfully renamed
1999
+ actual_table = Table.read(write_paths[0])
2000
+ assert renamed_table == actual_table
2001
+
2002
+ # expect all new child metafiles read to return the new table name
2003
+ child_metafiles_read_post_rename = [
2004
+ Delta.read(commit_results[5][2]),
2005
+ Partition.read(commit_results[4][2]),
2006
+ Stream.read(commit_results[3][2]),
2007
+ TableVersion.read(commit_results[2][2]),
2008
+ ]
2009
+ for metafile in child_metafiles_read_post_rename:
2010
+ assert metafile.table_name == renamed_table.table_name
2011
+
2012
+ # expect all original metafiles to return the original table name
2013
+ original_child_metafiles_to_create = [
2014
+ Delta(commit_results[5][0]),
2015
+ Partition(commit_results[4][0]),
2016
+ Stream(commit_results[3][0]),
2017
+ TableVersion(commit_results[2][0]),
2018
+ ]
2019
+ original_child_metafiles_created = [
2020
+ Delta(commit_results[5][1]),
2021
+ Partition(commit_results[4][1]),
2022
+ Stream(commit_results[3][1]),
2023
+ TableVersion(commit_results[2][1]),
2024
+ ]
2025
+ for i in range(len(original_child_metafiles_to_create)):
2026
+ assert (
2027
+ original_child_metafiles_created[i].table_name
2028
+ == original_child_metafiles_to_create[i].table_name
2029
+ == original_table.table_name
2030
+ )
2031
+
2032
+ # expect a subsequent table update from the old table name to fail
2033
+ bad_txn_operations = [
2034
+ TransactionOperation.of(
2035
+ operation_type=TransactionOperationType.UPDATE,
2036
+ dest_metafile=renamed_table,
2037
+ src_metafile=original_table,
2038
+ )
2039
+ ]
2040
+ transaction = Transaction.of(
2041
+ txn_type=TransactionType.RESTATE,
2042
+ txn_operations=bad_txn_operations,
2043
+ )
2044
+ with pytest.raises(ValueError):
2045
+ transaction.commit(temp_dir)
2046
+
2047
+ # expect table deletes of the old table name fail
2048
+ bad_txn_operations = [
2049
+ TransactionOperation.of(
2050
+ operation_type=TransactionOperationType.DELETE,
2051
+ dest_metafile=original_table,
2052
+ )
2053
+ ]
2054
+ transaction = Transaction.of(
2055
+ txn_type=TransactionType.DELETE,
2056
+ txn_operations=bad_txn_operations,
2057
+ )
2058
+ with pytest.raises(ValueError):
2059
+ transaction.commit(temp_dir)
2060
+
2061
+ # expect child metafile creation under the old table name to fail
2062
+ for metafile in original_child_metafiles_created:
2063
+ bad_txn_operations = [
2064
+ TransactionOperation.of(
2065
+ operation_type=TransactionOperationType.CREATE,
2066
+ dest_metafile=metafile,
2067
+ )
2068
+ ]
2069
+ transaction = Transaction.of(
2070
+ txn_type=TransactionType.APPEND,
2071
+ txn_operations=bad_txn_operations,
2072
+ )
2073
+ with pytest.raises(ValueError):
2074
+ transaction.commit(temp_dir)
2075
+
2076
+ def test_rename_namespace(self, temp_dir):
2077
+ commit_results = _commit_single_delta_table(temp_dir)
2078
+ for expected, actual, _ in commit_results:
2079
+ assert expected.equivalent_to(actual)
2080
+ original_namespace = commit_results[0][1]
2081
+ # given a transaction containing a namespace rename
2082
+ renamed_namespace: Namespace = Namespace.update_for(original_namespace)
2083
+ renamed_namespace.locator = NamespaceLocator.of(
2084
+ namespace="test_namespace_renamed",
2085
+ )
2086
+ txn_operations = [
2087
+ TransactionOperation.of(
2088
+ operation_type=TransactionOperationType.UPDATE,
2089
+ dest_metafile=renamed_namespace,
2090
+ src_metafile=original_namespace,
2091
+ )
2092
+ ]
2093
+ transaction = Transaction.of(
2094
+ txn_type=TransactionType.ALTER,
2095
+ txn_operations=txn_operations,
2096
+ )
2097
+ # when the transaction is committed
2098
+ write_paths, txn_log_path = transaction.commit(temp_dir)
2099
+
2100
+ # expect only one new namespace metafile to be written
2101
+ assert len(write_paths) == 1
2102
+
2103
+ # expect the namespace to be successfully renamed
2104
+ actual_namespace = Namespace.read(write_paths[0])
2105
+ assert renamed_namespace == actual_namespace
2106
+
2107
+ # expect all child metafiles read to return the new namespace
2108
+ child_metafiles_read_post_rename = [
2109
+ Delta.read(commit_results[5][2]),
2110
+ Partition.read(commit_results[4][2]),
2111
+ Stream.read(commit_results[3][2]),
2112
+ TableVersion.read(commit_results[2][2]),
2113
+ Table.read(commit_results[1][2]),
2114
+ ]
2115
+ for metafile in child_metafiles_read_post_rename:
2116
+ assert metafile.namespace == "test_namespace_renamed"
2117
+
2118
+ # expect the original metafiles to return the original namespace
2119
+ original_child_metafiles_to_create = [
2120
+ Delta(commit_results[5][0]),
2121
+ Partition(commit_results[4][0]),
2122
+ Stream(commit_results[3][0]),
2123
+ TableVersion(commit_results[2][0]),
2124
+ Table(commit_results[1][0]),
2125
+ ]
2126
+ original_child_metafiles_created = [
2127
+ Delta(commit_results[5][1]),
2128
+ Partition(commit_results[4][1]),
2129
+ Stream(commit_results[3][1]),
2130
+ TableVersion(commit_results[2][1]),
2131
+ Table(commit_results[1][1]),
2132
+ ]
2133
+ for i in range(len(original_child_metafiles_to_create)):
2134
+ assert (
2135
+ original_child_metafiles_created[i].namespace
2136
+ == original_child_metafiles_to_create[i].namespace
2137
+ == "test_namespace"
2138
+ )
2139
+
2140
+ # expect a subsequent update of the old namespace name to fail
2141
+ bad_txn_operations = [
2142
+ TransactionOperation.of(
2143
+ operation_type=TransactionOperationType.UPDATE,
2144
+ dest_metafile=renamed_namespace,
2145
+ src_metafile=original_namespace,
2146
+ )
2147
+ ]
2148
+ transaction = Transaction.of(
2149
+ txn_type=TransactionType.ALTER,
2150
+ txn_operations=bad_txn_operations,
2151
+ )
2152
+ with pytest.raises(ValueError):
2153
+ transaction.commit(temp_dir)
2154
+
2155
+ # expect namespace deletes of the old namespace name fail
2156
+ bad_txn_operations = [
2157
+ TransactionOperation.of(
2158
+ operation_type=TransactionOperationType.DELETE,
2159
+ dest_metafile=original_namespace,
2160
+ )
2161
+ ]
2162
+ transaction = Transaction.of(
2163
+ txn_type=TransactionType.DELETE,
2164
+ txn_operations=bad_txn_operations,
2165
+ )
2166
+ with pytest.raises(ValueError):
2167
+ transaction.commit(temp_dir)
2168
+
2169
+ # expect child metafile creation under the old namespace to fail
2170
+ for metafile in original_child_metafiles_created:
2171
+ bad_txn_operations = [
2172
+ TransactionOperation.of(
2173
+ operation_type=TransactionOperationType.CREATE,
2174
+ dest_metafile=metafile,
2175
+ )
2176
+ ]
2177
+ transaction = Transaction.of(
2178
+ txn_type=TransactionType.APPEND,
2179
+ txn_operations=bad_txn_operations,
2180
+ )
2181
+ with pytest.raises(ValueError):
2182
+ transaction.commit(temp_dir)
2183
+
2184
+ def test_e2e_serde(self, temp_dir):
2185
+ # given a transaction that creates a single namespace, table,
2186
+ # table version, stream, partition, and delta
2187
+ commit_results = _commit_single_delta_table(temp_dir)
2188
+ # when the transaction is committed, expect all actual metafiles
2189
+ # created to match the expected/input metafiles to create
2190
+ for expected, actual, _ in commit_results:
2191
+ assert expected.equivalent_to(actual)
2192
+
2193
+ def test_namespace_serde(self, temp_dir):
2194
+ namespace_locator = NamespaceLocator.of(namespace="test_namespace")
2195
+ namespace = Namespace.of(locator=namespace_locator)
2196
+ # given a transaction that creates a single namespace
2197
+ write_paths, txn_log_path = Transaction.of(
2198
+ txn_type=TransactionType.APPEND,
2199
+ txn_operations=[
2200
+ TransactionOperation.of(
2201
+ operation_type=TransactionOperationType.CREATE,
2202
+ dest_metafile=namespace,
2203
+ )
2204
+ ],
2205
+ ).commit(temp_dir)
2206
+ # when the transaction is committed,
2207
+ # expect the namespace created to match the namespace given
2208
+ deserialized_namespace = Namespace.read(write_paths.pop())
2209
+ assert namespace.equivalent_to(deserialized_namespace)
2210
+
2211
+ def test_table_serde(self, temp_dir):
2212
+ table_locator = TableLocator.at(
2213
+ namespace=None,
2214
+ table_name="test_table",
2215
+ )
2216
+ table = Table.of(
2217
+ locator=table_locator,
2218
+ description="test table description",
2219
+ )
2220
+ # given a transaction that creates a single table
2221
+ write_paths, txn_log_path = Transaction.of(
2222
+ txn_type=TransactionType.APPEND,
2223
+ txn_operations=[
2224
+ TransactionOperation.of(
2225
+ operation_type=TransactionOperationType.CREATE,
2226
+ dest_metafile=table,
2227
+ )
2228
+ ],
2229
+ ).commit(temp_dir)
2230
+ # when the transaction is committed,
2231
+ # expect the table created to match the table given
2232
+ deserialized_table = Table.read(write_paths.pop())
2233
+ assert table.equivalent_to(deserialized_table)
2234
+
2235
+ def test_table_version_serde(self, temp_dir):
2236
+ table_version_locator = TableVersionLocator.at(
2237
+ namespace=None,
2238
+ table_name=None,
2239
+ table_version="test_table_version.1",
2240
+ )
2241
+ schema = Schema.of(
2242
+ [
2243
+ Field.of(
2244
+ field=pa.field("some_string", pa.string(), nullable=False),
2245
+ field_id=1,
2246
+ is_merge_key=True,
2247
+ ),
2248
+ Field.of(
2249
+ field=pa.field("some_int32", pa.int32(), nullable=False),
2250
+ field_id=2,
2251
+ is_merge_key=True,
2252
+ ),
2253
+ Field.of(
2254
+ field=pa.field("some_float64", pa.float64()),
2255
+ field_id=3,
2256
+ is_merge_key=False,
2257
+ ),
2258
+ ]
2259
+ )
2260
+ bucket_transform = BucketTransform.of(
2261
+ BucketTransformParameters.of(
2262
+ num_buckets=2,
2263
+ bucketing_strategy=BucketingStrategy.DEFAULT,
2264
+ )
2265
+ )
2266
+ partition_keys = [
2267
+ PartitionKey.of(
2268
+ key=["some_string", "some_int32"],
2269
+ name="test_partition_key",
2270
+ field_id="test_field_id",
2271
+ transform=bucket_transform,
2272
+ )
2273
+ ]
2274
+ partition_scheme = PartitionScheme.of(
2275
+ keys=partition_keys,
2276
+ name="test_partition_scheme",
2277
+ scheme_id="test_partition_scheme_id",
2278
+ )
2279
+ sort_keys = [
2280
+ SortKey.of(
2281
+ key=["some_int32"],
2282
+ sort_order=SortOrder.DESCENDING,
2283
+ null_order=NullOrder.AT_START,
2284
+ transform=TruncateTransform.of(
2285
+ TruncateTransformParameters.of(width=3),
2286
+ ),
2287
+ )
2288
+ ]
2289
+ sort_scheme = SortScheme.of(
2290
+ keys=sort_keys,
2291
+ name="test_sort_scheme",
2292
+ scheme_id="test_sort_scheme_id",
2293
+ )
2294
+ table_version = TableVersion.of(
2295
+ locator=table_version_locator,
2296
+ schema=schema,
2297
+ partition_scheme=partition_scheme,
2298
+ description="test table version description",
2299
+ properties={"test_property_key": "test_property_value"},
2300
+ content_types=[ContentType.PARQUET],
2301
+ sort_scheme=sort_scheme,
2302
+ watermark=1,
2303
+ lifecycle_state=LifecycleState.CREATED,
2304
+ schemas=[schema, schema, schema],
2305
+ partition_schemes=[partition_scheme, partition_scheme],
2306
+ sort_schemes=[sort_scheme, sort_scheme],
2307
+ )
2308
+ # given a transaction that creates a single table version
2309
+ write_paths, txn_log_path = Transaction.of(
2310
+ txn_type=TransactionType.APPEND,
2311
+ txn_operations=[
2312
+ TransactionOperation.of(
2313
+ operation_type=TransactionOperationType.CREATE,
2314
+ dest_metafile=table_version,
2315
+ )
2316
+ ],
2317
+ ).commit(temp_dir)
2318
+ # when the transaction is committed,
2319
+ # expect the table version created to match the table version given
2320
+ deserialized_table_version = TableVersion.read(write_paths.pop())
2321
+ assert table_version.equivalent_to(deserialized_table_version)
2322
+
2323
+ def test_stream_serde(self, temp_dir):
2324
+ stream_locator = StreamLocator.at(
2325
+ namespace=None,
2326
+ table_name=None,
2327
+ table_version=None,
2328
+ stream_id="test_stream_id",
2329
+ stream_format=StreamFormat.DELTACAT,
2330
+ )
2331
+ bucket_transform = BucketTransform.of(
2332
+ BucketTransformParameters.of(
2333
+ num_buckets=2,
2334
+ bucketing_strategy=BucketingStrategy.DEFAULT,
2335
+ )
2336
+ )
2337
+ partition_keys = [
2338
+ PartitionKey.of(
2339
+ key=["some_string", "some_int32"],
2340
+ name="test_partition_key",
2341
+ field_id="test_field_id",
2342
+ transform=bucket_transform,
2343
+ )
2344
+ ]
2345
+ partition_scheme = PartitionScheme.of(
2346
+ keys=partition_keys,
2347
+ name="test_partition_scheme",
2348
+ scheme_id="test_partition_scheme_id",
2349
+ )
2350
+ stream = Stream.of(
2351
+ locator=stream_locator,
2352
+ partition_scheme=partition_scheme,
2353
+ state=CommitState.STAGED,
2354
+ previous_stream_id="test_previous_stream_id",
2355
+ watermark=1,
2356
+ )
2357
+ # given a transaction that creates a single stream
2358
+ write_paths, txn_log_path = Transaction.of(
2359
+ txn_type=TransactionType.APPEND,
2360
+ txn_operations=[
2361
+ TransactionOperation.of(
2362
+ operation_type=TransactionOperationType.CREATE,
2363
+ dest_metafile=stream,
2364
+ )
2365
+ ],
2366
+ ).commit(temp_dir)
2367
+ # when the transaction is committed,
2368
+ # expect the stream created to match the stream given
2369
+ deserialized_stream = Stream.read(write_paths.pop())
2370
+ assert stream.equivalent_to(deserialized_stream)
2371
+
2372
+ def test_partition_serde(self, temp_dir):
2373
+ partition_locator = PartitionLocator.at(
2374
+ namespace=None,
2375
+ table_name=None,
2376
+ table_version=None,
2377
+ stream_id=None,
2378
+ stream_format=None,
2379
+ partition_values=["a", 1],
2380
+ partition_id="test_partition_id",
2381
+ )
2382
+ schema = Schema.of(
2383
+ [
2384
+ Field.of(
2385
+ field=pa.field("some_string", pa.string(), nullable=False),
2386
+ field_id=1,
2387
+ is_merge_key=True,
2388
+ ),
2389
+ Field.of(
2390
+ field=pa.field("some_int32", pa.int32(), nullable=False),
2391
+ field_id=2,
2392
+ is_merge_key=True,
2393
+ ),
2394
+ Field.of(
2395
+ field=pa.field("some_float64", pa.float64()),
2396
+ field_id=3,
2397
+ is_merge_key=False,
2398
+ ),
2399
+ ]
2400
+ )
2401
+ partition = Partition.of(
2402
+ locator=partition_locator,
2403
+ schema=schema,
2404
+ content_types=[ContentType.PARQUET],
2405
+ state=CommitState.STAGED,
2406
+ previous_stream_position=0,
2407
+ previous_partition_id="test_previous_partition_id",
2408
+ stream_position=1,
2409
+ partition_scheme_id="test_partition_scheme_id",
2410
+ )
2411
+ # given a transaction that creates a single partition
2412
+ write_paths, txn_log_path = Transaction.of(
2413
+ txn_type=TransactionType.APPEND,
2414
+ txn_operations=[
2415
+ TransactionOperation.of(
2416
+ operation_type=TransactionOperationType.CREATE,
2417
+ dest_metafile=partition,
2418
+ )
2419
+ ],
2420
+ ).commit(temp_dir)
2421
+ # when the transaction is committed,
2422
+ # expect the partition created to match the partition given
2423
+ deserialized_partition = Partition.read(write_paths.pop())
2424
+ assert partition.equivalent_to(deserialized_partition)
2425
+
2426
+ def test_delta_serde(self, temp_dir):
2427
+ delta_locator = DeltaLocator.at(
2428
+ namespace=None,
2429
+ table_name=None,
2430
+ table_version=None,
2431
+ stream_id=None,
2432
+ stream_format=None,
2433
+ partition_values=None,
2434
+ partition_id=None,
2435
+ stream_position=1,
2436
+ )
2437
+ manifest_entry_params = EntryParams.of(
2438
+ equality_field_locators=["some_string", "some_int32"],
2439
+ )
2440
+ manifest_meta = ManifestMeta.of(
2441
+ record_count=1,
2442
+ content_length=10,
2443
+ content_type=ContentType.PARQUET.value,
2444
+ content_encoding=ContentEncoding.IDENTITY.value,
2445
+ source_content_length=100,
2446
+ credentials={"foo": "bar"},
2447
+ content_type_parameters=[{"param1": "value1"}],
2448
+ entry_type=EntryType.EQUALITY_DELETE,
2449
+ entry_params=manifest_entry_params,
2450
+ )
2451
+ manifest = Manifest.of(
2452
+ entries=[
2453
+ ManifestEntry.of(
2454
+ url="s3://test/url",
2455
+ meta=manifest_meta,
2456
+ )
2457
+ ],
2458
+ author=ManifestAuthor.of(
2459
+ name="deltacat",
2460
+ version="2.0",
2461
+ ),
2462
+ entry_type=EntryType.EQUALITY_DELETE,
2463
+ entry_params=manifest_entry_params,
2464
+ )
2465
+ delta = Delta.of(
2466
+ locator=delta_locator,
2467
+ delta_type=DeltaType.APPEND,
2468
+ meta=manifest_meta,
2469
+ properties={"property1": "value1"},
2470
+ manifest=manifest,
2471
+ previous_stream_position=0,
2472
+ )
2473
+ # given a transaction that creates a single delta
2474
+ write_paths, txn_log_path = Transaction.of(
2475
+ txn_type=TransactionType.APPEND,
2476
+ txn_operations=[
2477
+ TransactionOperation.of(
2478
+ operation_type=TransactionOperationType.CREATE,
2479
+ dest_metafile=delta,
2480
+ )
2481
+ ],
2482
+ ).commit(temp_dir)
2483
+ # when the transaction is committed,
2484
+ # expect the delta created to match the delta given
2485
+ deserialized_delta = Delta.read(write_paths.pop())
2486
+ assert delta.equivalent_to(deserialized_delta)
2487
+
2488
+ def test_python_type_serde(self, temp_dir):
2489
+ table_locator = TableLocator.at(
2490
+ namespace=None,
2491
+ table_name="test_table",
2492
+ )
2493
+ # given a table whose property values contain every basic python type
2494
+ # except set, frozenset, and range which can't be serialized by msgpack
2495
+ # and memoryview which can't be pickled by copy.deepcopy
2496
+ properties = {
2497
+ "foo": 1,
2498
+ "bar": 2.0,
2499
+ "baz": True,
2500
+ "qux": b"123",
2501
+ "quux": None,
2502
+ "corge": [1, 2, 3],
2503
+ "grault": {"foo": "bar"},
2504
+ "garply": (1, 2, 3),
2505
+ "waldo": bytearray(3),
2506
+ }
2507
+ table = Table.of(
2508
+ locator=table_locator,
2509
+ description="test table description",
2510
+ properties=properties,
2511
+ )
2512
+ # when a transaction commits this table
2513
+ write_paths, txn_log_path = Transaction.of(
2514
+ txn_type=TransactionType.APPEND,
2515
+ txn_operations=[
2516
+ TransactionOperation.of(
2517
+ operation_type=TransactionOperationType.CREATE,
2518
+ dest_metafile=table,
2519
+ )
2520
+ ],
2521
+ ).commit(temp_dir)
2522
+ deserialized_table = Table.read(write_paths.pop())
2523
+ # expect the following SerDe transformations of the original properties:
2524
+ expected_properties = properties.copy()
2525
+ # 1. msgpack tranlates tuple to list
2526
+ expected_properties["garply"] = [1, 2, 3]
2527
+ # 2. msgpack unpacks bytearray into bytes
2528
+ expected_properties["waldo"] = b"\x00\x00\x00"
2529
+ # expect the table created to otherwise match the table given
2530
+ table.properties = expected_properties
2531
+ assert table.equivalent_to(deserialized_table)
2532
+
2533
+ def test_metafile_read_bad_path(self, temp_dir):
2534
+ with pytest.raises(FileNotFoundError):
2535
+ Delta.read("foobar")