deltacat 2.0.0b11__py3-none-any.whl → 2.0.0b12__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- deltacat/__init__.py +78 -3
- deltacat/api.py +122 -67
- deltacat/aws/constants.py +0 -23
- deltacat/aws/s3u.py +4 -631
- deltacat/benchmarking/conftest.py +0 -18
- deltacat/catalog/__init__.py +2 -0
- deltacat/catalog/delegate.py +445 -63
- deltacat/catalog/interface.py +188 -62
- deltacat/catalog/main/impl.py +2417 -271
- deltacat/catalog/model/catalog.py +49 -10
- deltacat/catalog/model/properties.py +38 -0
- deltacat/compute/compactor/compaction_session.py +97 -75
- deltacat/compute/compactor/model/compact_partition_params.py +75 -30
- deltacat/compute/compactor/model/compaction_session_audit_info.py +17 -0
- deltacat/compute/compactor/model/round_completion_info.py +16 -6
- deltacat/compute/compactor/repartition_session.py +8 -21
- deltacat/compute/compactor/steps/hash_bucket.py +5 -5
- deltacat/compute/compactor/steps/materialize.py +9 -7
- deltacat/compute/compactor/steps/repartition.py +12 -11
- deltacat/compute/compactor/utils/io.py +6 -5
- deltacat/compute/compactor/utils/round_completion_reader.py +117 -0
- deltacat/compute/compactor/utils/system_columns.py +3 -1
- deltacat/compute/compactor_v2/compaction_session.py +17 -14
- deltacat/compute/compactor_v2/constants.py +30 -1
- deltacat/compute/compactor_v2/model/evaluate_compaction_result.py +0 -1
- deltacat/compute/compactor_v2/model/hash_bucket_input.py +9 -3
- deltacat/compute/compactor_v2/model/merge_file_group.py +5 -2
- deltacat/compute/compactor_v2/model/merge_input.py +33 -8
- deltacat/compute/compactor_v2/private/compaction_utils.py +167 -68
- deltacat/compute/compactor_v2/steps/hash_bucket.py +5 -2
- deltacat/compute/compactor_v2/steps/merge.py +267 -55
- deltacat/compute/compactor_v2/utils/content_type_params.py +34 -6
- deltacat/compute/compactor_v2/utils/dedupe.py +1 -1
- deltacat/compute/compactor_v2/utils/delta.py +5 -3
- deltacat/compute/compactor_v2/utils/io.py +11 -4
- deltacat/compute/compactor_v2/utils/merge.py +15 -2
- deltacat/compute/compactor_v2/utils/primary_key_index.py +28 -4
- deltacat/compute/compactor_v2/utils/task_options.py +45 -33
- deltacat/compute/converter/converter_session.py +145 -32
- deltacat/compute/converter/model/convert_input.py +26 -19
- deltacat/compute/converter/model/convert_input_files.py +33 -16
- deltacat/compute/converter/model/convert_result.py +35 -16
- deltacat/compute/converter/model/converter_session_params.py +24 -21
- deltacat/compute/converter/pyiceberg/catalog.py +21 -18
- deltacat/compute/converter/pyiceberg/overrides.py +18 -9
- deltacat/compute/converter/pyiceberg/update_snapshot_overrides.py +148 -100
- deltacat/compute/converter/steps/convert.py +157 -50
- deltacat/compute/converter/steps/dedupe.py +24 -11
- deltacat/compute/converter/utils/convert_task_options.py +27 -12
- deltacat/compute/converter/utils/converter_session_utils.py +126 -60
- deltacat/compute/converter/utils/iceberg_columns.py +8 -8
- deltacat/compute/converter/utils/io.py +101 -12
- deltacat/compute/converter/utils/s3u.py +33 -27
- deltacat/compute/janitor.py +205 -0
- deltacat/compute/jobs/client.py +19 -8
- deltacat/compute/resource_estimation/delta.py +38 -6
- deltacat/compute/resource_estimation/model.py +8 -0
- deltacat/constants.py +44 -0
- deltacat/docs/autogen/schema/__init__.py +0 -0
- deltacat/docs/autogen/schema/inference/__init__.py +0 -0
- deltacat/docs/autogen/schema/inference/generate_type_mappings.py +687 -0
- deltacat/docs/autogen/schema/inference/parse_json_type_mappings.py +673 -0
- deltacat/examples/compactor/__init__.py +0 -0
- deltacat/examples/compactor/aws/__init__.py +1 -0
- deltacat/examples/compactor/bootstrap.py +863 -0
- deltacat/examples/compactor/compactor.py +373 -0
- deltacat/examples/compactor/explorer.py +473 -0
- deltacat/examples/compactor/gcp/__init__.py +1 -0
- deltacat/examples/compactor/job_runner.py +439 -0
- deltacat/examples/compactor/utils/__init__.py +1 -0
- deltacat/examples/compactor/utils/common.py +261 -0
- deltacat/examples/experimental/iceberg/converter/__init__.py +0 -0
- deltacat/examples/experimental/iceberg/converter/beam/__init__.py +0 -0
- deltacat/examples/experimental/iceberg/converter/beam/app.py +226 -0
- deltacat/examples/experimental/iceberg/converter/beam/main.py +133 -0
- deltacat/examples/experimental/iceberg/converter/beam/test_workflow.py +113 -0
- deltacat/examples/experimental/iceberg/converter/beam/utils/__init__.py +3 -0
- deltacat/examples/experimental/iceberg/converter/beam/utils/common.py +174 -0
- deltacat/examples/experimental/iceberg/converter/beam/utils/spark.py +263 -0
- deltacat/exceptions.py +66 -4
- deltacat/experimental/catalog/iceberg/impl.py +2 -2
- deltacat/experimental/compatibility/__init__.py +0 -0
- deltacat/experimental/compatibility/backfill_locator_to_id_mappings.py +201 -0
- deltacat/experimental/converter_agent/__init__.py +0 -0
- deltacat/experimental/converter_agent/beam/__init__.py +0 -0
- deltacat/experimental/converter_agent/beam/managed.py +173 -0
- deltacat/experimental/converter_agent/table_monitor.py +479 -0
- deltacat/experimental/storage/iceberg/iceberg_scan_planner.py +105 -4
- deltacat/experimental/storage/iceberg/impl.py +5 -3
- deltacat/experimental/storage/iceberg/model.py +7 -3
- deltacat/experimental/storage/iceberg/visitor.py +119 -0
- deltacat/experimental/storage/rivulet/dataset.py +0 -3
- deltacat/experimental/storage/rivulet/metastore/delta.py +0 -2
- deltacat/experimental/storage/rivulet/reader/dataset_metastore.py +3 -2
- deltacat/io/datasource/deltacat_datasource.py +0 -1
- deltacat/storage/__init__.py +20 -2
- deltacat/storage/interface.py +54 -32
- deltacat/storage/main/impl.py +1494 -541
- deltacat/storage/model/delta.py +27 -3
- deltacat/storage/model/locator.py +6 -12
- deltacat/storage/model/manifest.py +182 -6
- deltacat/storage/model/metafile.py +151 -78
- deltacat/storage/model/namespace.py +8 -1
- deltacat/storage/model/partition.py +117 -42
- deltacat/storage/model/schema.py +2427 -159
- deltacat/storage/model/sort_key.py +40 -0
- deltacat/storage/model/stream.py +9 -2
- deltacat/storage/model/table.py +12 -1
- deltacat/storage/model/table_version.py +11 -0
- deltacat/storage/model/transaction.py +1184 -208
- deltacat/storage/model/transform.py +81 -2
- deltacat/storage/model/types.py +48 -26
- deltacat/tests/_io/test_cloudpickle_bug_fix.py +8 -4
- deltacat/tests/aws/test_s3u.py +2 -31
- deltacat/tests/catalog/main/test_catalog_impl_table_operations.py +1606 -70
- deltacat/tests/catalog/test_catalogs.py +54 -11
- deltacat/tests/catalog/test_default_catalog_impl.py +12152 -71
- deltacat/tests/compute/compact_partition_test_cases.py +35 -8
- deltacat/tests/compute/compactor/steps/test_repartition.py +12 -12
- deltacat/tests/compute/compactor/utils/test_io.py +124 -120
- deltacat/tests/compute/compactor/utils/test_round_completion_reader.py +254 -0
- deltacat/tests/compute/compactor_v2/test_compaction_session.py +423 -312
- deltacat/tests/compute/compactor_v2/utils/test_content_type_params.py +266 -0
- deltacat/tests/compute/compactor_v2/utils/test_primary_key_index.py +45 -0
- deltacat/tests/compute/compactor_v2/utils/test_task_options.py +270 -1
- deltacat/tests/compute/conftest.py +8 -44
- deltacat/tests/compute/converter/test_convert_session.py +675 -490
- deltacat/tests/compute/converter/utils.py +15 -6
- deltacat/tests/compute/resource_estimation/test_delta.py +145 -79
- deltacat/tests/compute/test_compact_partition_incremental.py +103 -70
- deltacat/tests/compute/test_compact_partition_multiple_rounds.py +89 -66
- deltacat/tests/compute/test_compact_partition_params.py +13 -8
- deltacat/tests/compute/test_compact_partition_rebase.py +77 -62
- deltacat/tests/compute/test_compact_partition_rebase_then_incremental.py +263 -193
- deltacat/tests/compute/test_janitor.py +236 -0
- deltacat/tests/compute/test_util_common.py +716 -43
- deltacat/tests/compute/test_util_constant.py +0 -1
- deltacat/tests/{storage/conftest.py → conftest.py} +1 -1
- deltacat/tests/experimental/__init__.py +1 -0
- deltacat/tests/experimental/compatibility/__init__.py +1 -0
- deltacat/tests/experimental/compatibility/test_backfill_locator_to_id_mappings.py +582 -0
- deltacat/tests/storage/main/test_main_storage.py +6900 -95
- deltacat/tests/storage/model/test_metafile_io.py +78 -173
- deltacat/tests/storage/model/test_partition_scheme.py +85 -0
- deltacat/tests/storage/model/test_schema.py +171 -0
- deltacat/tests/storage/model/test_schema_update.py +1925 -0
- deltacat/tests/storage/model/test_sort_scheme.py +90 -0
- deltacat/tests/storage/model/test_transaction.py +393 -48
- deltacat/tests/storage/model/test_transaction_history.py +886 -0
- deltacat/tests/test_deltacat_api.py +988 -4
- deltacat/tests/test_exceptions.py +9 -5
- deltacat/tests/test_utils/pyarrow.py +52 -21
- deltacat/tests/test_utils/storage.py +23 -34
- deltacat/tests/types/__init__.py +0 -0
- deltacat/tests/types/test_tables.py +104 -0
- deltacat/tests/utils/exceptions.py +22 -0
- deltacat/tests/utils/main_deltacat_storage_mock.py +31 -0
- deltacat/tests/utils/ray_utils/test_dataset.py +123 -5
- deltacat/tests/utils/test_daft.py +121 -31
- deltacat/tests/utils/test_numpy.py +1193 -0
- deltacat/tests/utils/test_pandas.py +1106 -0
- deltacat/tests/utils/test_polars.py +1040 -0
- deltacat/tests/utils/test_pyarrow.py +1370 -89
- deltacat/types/media.py +221 -11
- deltacat/types/tables.py +2329 -59
- deltacat/utils/arguments.py +33 -1
- deltacat/utils/daft.py +411 -150
- deltacat/utils/filesystem.py +100 -0
- deltacat/utils/metafile_locator.py +2 -1
- deltacat/utils/numpy.py +118 -26
- deltacat/utils/pandas.py +577 -48
- deltacat/utils/polars.py +658 -27
- deltacat/utils/pyarrow.py +1258 -213
- deltacat/utils/ray_utils/dataset.py +101 -10
- deltacat/utils/reader_compatibility_mapping.py +3083 -0
- deltacat/utils/url.py +56 -15
- deltacat-2.0.0b12.dist-info/METADATA +1163 -0
- {deltacat-2.0.0b11.dist-info → deltacat-2.0.0b12.dist-info}/RECORD +183 -145
- {deltacat-2.0.0b11.dist-info → deltacat-2.0.0b12.dist-info}/WHEEL +1 -1
- deltacat/compute/compactor/utils/round_completion_file.py +0 -97
- deltacat/compute/merge_on_read/__init__.py +0 -4
- deltacat/compute/merge_on_read/daft.py +0 -40
- deltacat/compute/merge_on_read/model/merge_on_read_params.py +0 -66
- deltacat/compute/merge_on_read/utils/delta.py +0 -42
- deltacat/tests/compute/compactor/utils/test_round_completion_file.py +0 -231
- deltacat/tests/compute/test_util_create_table_deltas_repo.py +0 -388
- deltacat/tests/local_deltacat_storage/__init__.py +0 -1236
- deltacat/tests/local_deltacat_storage/exceptions.py +0 -10
- deltacat/utils/s3fs.py +0 -21
- deltacat-2.0.0b11.dist-info/METADATA +0 -67
- /deltacat/{compute/merge_on_read/model → docs}/__init__.py +0 -0
- /deltacat/{compute/merge_on_read/utils → docs/autogen}/__init__.py +0 -0
- {deltacat-2.0.0b11.dist-info → deltacat-2.0.0b12.dist-info/licenses}/LICENSE +0 -0
- {deltacat-2.0.0b11.dist-info → deltacat-2.0.0b12.dist-info}/top_level.txt +0 -0
@@ -0,0 +1,90 @@
|
|
1
|
+
import pytest
|
2
|
+
|
3
|
+
from deltacat.storage import (
|
4
|
+
SortKey,
|
5
|
+
SortScheme,
|
6
|
+
SortOrder,
|
7
|
+
NullOrder,
|
8
|
+
)
|
9
|
+
|
10
|
+
|
11
|
+
def test_sort_scheme_validates_empty_keys():
|
12
|
+
# When creating a sort scheme with empty keys list
|
13
|
+
with pytest.raises(ValueError, match="Sort scheme cannot have empty keys list"):
|
14
|
+
SortScheme.of(
|
15
|
+
keys=[],
|
16
|
+
name="test_sort_scheme",
|
17
|
+
scheme_id="test_sort_scheme_id",
|
18
|
+
)
|
19
|
+
|
20
|
+
|
21
|
+
def test_sort_scheme_validates_duplicate_keys():
|
22
|
+
# When creating a sort scheme with duplicate keys
|
23
|
+
with pytest.raises(ValueError, match="Duplicate sort key found: col1"):
|
24
|
+
SortScheme.of(
|
25
|
+
keys=[
|
26
|
+
SortKey.of(
|
27
|
+
key=["col1"],
|
28
|
+
sort_order=SortOrder.ASCENDING,
|
29
|
+
null_order=NullOrder.AT_END,
|
30
|
+
),
|
31
|
+
SortKey.of(
|
32
|
+
key=["col1"], # Duplicate key
|
33
|
+
sort_order=SortOrder.DESCENDING,
|
34
|
+
null_order=NullOrder.AT_START,
|
35
|
+
),
|
36
|
+
],
|
37
|
+
name="test_sort_scheme",
|
38
|
+
scheme_id="test_sort_scheme_id",
|
39
|
+
)
|
40
|
+
|
41
|
+
|
42
|
+
def test_sort_scheme_allows_valid_keys():
|
43
|
+
# When creating a sort scheme with valid keys
|
44
|
+
sort_scheme = SortScheme.of(
|
45
|
+
keys=[
|
46
|
+
SortKey.of(
|
47
|
+
key=["col1"],
|
48
|
+
sort_order=SortOrder.ASCENDING,
|
49
|
+
null_order=NullOrder.AT_END,
|
50
|
+
),
|
51
|
+
SortKey.of(
|
52
|
+
key=["col2"],
|
53
|
+
sort_order=SortOrder.DESCENDING,
|
54
|
+
null_order=NullOrder.AT_END,
|
55
|
+
),
|
56
|
+
],
|
57
|
+
name="test_sort_scheme",
|
58
|
+
scheme_id="test_sort_scheme_id",
|
59
|
+
)
|
60
|
+
|
61
|
+
# Then it should succeed
|
62
|
+
assert sort_scheme is not None
|
63
|
+
assert len(sort_scheme.keys) == 2
|
64
|
+
assert sort_scheme.name == "test_sort_scheme"
|
65
|
+
assert sort_scheme.id == "test_sort_scheme_id"
|
66
|
+
|
67
|
+
|
68
|
+
def test_sort_scheme_validates_null_order_consistency():
|
69
|
+
# When creating a sort scheme with inconsistent null orders
|
70
|
+
with pytest.raises(
|
71
|
+
ValueError, match="All arrow sort keys must use the same null order"
|
72
|
+
):
|
73
|
+
sort_scheme = SortScheme.of(
|
74
|
+
keys=[
|
75
|
+
SortKey.of(
|
76
|
+
key=["col1"],
|
77
|
+
sort_order=SortOrder.ASCENDING,
|
78
|
+
null_order=NullOrder.AT_END,
|
79
|
+
),
|
80
|
+
SortKey.of(
|
81
|
+
key=["col2"],
|
82
|
+
sort_order=SortOrder.DESCENDING,
|
83
|
+
null_order=NullOrder.AT_START, # Different null order
|
84
|
+
),
|
85
|
+
],
|
86
|
+
name="test_sort_scheme",
|
87
|
+
scheme_id="test_sort_scheme_id",
|
88
|
+
)
|
89
|
+
# Access arrow property to trigger validation
|
90
|
+
sort_scheme.arrow
|
@@ -1,15 +1,25 @@
|
|
1
1
|
import pytest
|
2
|
+
import os
|
3
|
+
import pyarrow
|
4
|
+
import msgpack
|
5
|
+
import posixpath
|
6
|
+
|
2
7
|
|
3
8
|
from deltacat.storage import (
|
4
9
|
Transaction,
|
5
10
|
TransactionOperation,
|
6
|
-
TransactionType,
|
7
11
|
TransactionOperationType,
|
8
|
-
|
9
|
-
|
12
|
+
Namespace,
|
13
|
+
NamespaceLocator,
|
10
14
|
Metafile,
|
11
15
|
)
|
12
16
|
|
17
|
+
from deltacat.constants import (
|
18
|
+
TXN_DIR_NAME,
|
19
|
+
RUNNING_TXN_DIR_NAME,
|
20
|
+
PAUSED_TXN_DIR_NAME,
|
21
|
+
)
|
22
|
+
|
13
23
|
|
14
24
|
class TestAbsToRelative:
|
15
25
|
@classmethod
|
@@ -64,7 +74,7 @@ class TestAbsToRelative:
|
|
64
74
|
Transaction._abs_txn_meta_path_to_relative("/lorem/ipsum/", "")
|
65
75
|
|
66
76
|
# Test cases for the relativize_operation_paths function
|
67
|
-
def
|
77
|
+
def test_relativizemetafile_write_paths(self):
|
68
78
|
catalog_root = "/catalog/root"
|
69
79
|
absolute_paths = [
|
70
80
|
"/catalog/root/path/to/metafile1.mpk",
|
@@ -91,9 +101,7 @@ class TestAbsToRelative:
|
|
91
101
|
# use replace method as setter
|
92
102
|
transaction_operation.metafile_write_paths = absolute_paths
|
93
103
|
# Create a transaction and relativize paths
|
94
|
-
transaction = Transaction.of(
|
95
|
-
txn_type=TransactionType.APPEND, txn_operations=[transaction_operation]
|
96
|
-
)
|
104
|
+
transaction = Transaction.of([transaction_operation])
|
97
105
|
transaction.relativize_operation_paths(transaction_operation, catalog_root)
|
98
106
|
# Verify the paths have been correctly relativized
|
99
107
|
assert transaction_operation.metafile_write_paths == expected_relative_paths
|
@@ -125,9 +133,7 @@ class TestAbsToRelative:
|
|
125
133
|
# use replace as setter
|
126
134
|
transaction_operation.locator_write_paths = absolute_paths
|
127
135
|
# Create a transaction and relativize paths
|
128
|
-
transaction = Transaction.of(
|
129
|
-
txn_type=TransactionType.APPEND, txn_operations=[transaction_operation]
|
130
|
-
)
|
136
|
+
transaction = Transaction.of(txn_operations=[transaction_operation])
|
131
137
|
transaction.relativize_operation_paths(transaction_operation, catalog_root)
|
132
138
|
# Verify the paths have been correctly relativized
|
133
139
|
assert transaction_operation.locator_write_paths == expected_relative_paths
|
@@ -164,9 +170,7 @@ class TestAbsToRelative:
|
|
164
170
|
transaction_operation.metafile_write_paths = meta_absolute_paths
|
165
171
|
transaction_operation.locator_write_paths = loc_absolute_paths
|
166
172
|
# Create a transaction and relativize paths
|
167
|
-
transaction = Transaction.of(
|
168
|
-
txn_type=TransactionType.APPEND, txn_operations=[transaction_operation]
|
169
|
-
)
|
173
|
+
transaction = Transaction.of([transaction_operation])
|
170
174
|
transaction.relativize_operation_paths(transaction_operation, catalog_root)
|
171
175
|
# Verify the paths have been correctly relativized
|
172
176
|
assert (
|
@@ -222,9 +226,7 @@ class TestAbsToRelative:
|
|
222
226
|
transaction_operation.locator_write_paths = loc_absolute_paths
|
223
227
|
transaction_operations.append(transaction_operation)
|
224
228
|
# Create a transaction and relativize paths
|
225
|
-
transaction = Transaction.of(
|
226
|
-
txn_type=TransactionType.APPEND, txn_operations=transaction_operations
|
227
|
-
)
|
229
|
+
transaction = Transaction.of(transaction_operations)
|
228
230
|
for operation in transaction_operations:
|
229
231
|
transaction.relativize_operation_paths(operation, catalog_root)
|
230
232
|
# Verify the paths have been correctly relativized
|
@@ -241,9 +243,7 @@ class TestAbsToRelative:
|
|
241
243
|
# Empty paths
|
242
244
|
transaction_operation.metafile_write_paths = []
|
243
245
|
transaction_operation.locator_write_paths = []
|
244
|
-
transaction = Transaction.of(
|
245
|
-
txn_type=TransactionType.APPEND, txn_operations=[transaction_operation]
|
246
|
-
)
|
246
|
+
transaction = Transaction.of([transaction_operation])
|
247
247
|
transaction.relativize_operation_paths(transaction_operation, catalog_root)
|
248
248
|
assert transaction_operation.metafile_write_paths == []
|
249
249
|
assert transaction_operation.locator_write_paths == []
|
@@ -257,9 +257,7 @@ class TestAbsToRelative:
|
|
257
257
|
dest_metafile=Metafile({"id": "dummy_metafile_id"}),
|
258
258
|
)
|
259
259
|
transaction_operation.metafile_write_paths = absolute_paths
|
260
|
-
transaction = Transaction.of(
|
261
|
-
txn_type=TransactionType.APPEND, txn_operations=[transaction_operation]
|
262
|
-
)
|
260
|
+
transaction = Transaction.of([transaction_operation])
|
263
261
|
transaction.relativize_operation_paths(transaction_operation, catalog_root)
|
264
262
|
assert transaction_operation.metafile_write_paths == expected_paths
|
265
263
|
|
@@ -279,30 +277,377 @@ class TestAbsToRelative:
|
|
279
277
|
TransactionOperationType.READ_SIBLINGS,
|
280
278
|
]
|
281
279
|
|
282
|
-
|
283
|
-
|
284
|
-
|
285
|
-
|
286
|
-
|
287
|
-
TransactionType.OVERWRITE,
|
288
|
-
TransactionType.READ,
|
289
|
-
TransactionType.RESTATE,
|
290
|
-
]
|
291
|
-
|
292
|
-
for txn_type in txn_types:
|
293
|
-
transaction_ops = []
|
294
|
-
for op_type in operation_types:
|
295
|
-
transaction_operation = TransactionOperation.of(
|
296
|
-
operation_type=op_type,
|
297
|
-
dest_metafile=Metafile({"id": "dummy_metafile_id"}),
|
298
|
-
)
|
299
|
-
transaction_operation.metafile_write_paths = absolute_paths
|
300
|
-
transaction_ops.append(transaction_operation)
|
301
|
-
transaction = Transaction.of(
|
302
|
-
txn_type=txn_type, txn_operations=[transaction_operation]
|
280
|
+
transaction_ops = []
|
281
|
+
for op_type in operation_types:
|
282
|
+
transaction_operation = TransactionOperation.of(
|
283
|
+
operation_type=op_type,
|
284
|
+
dest_metafile=Metafile({"id": "dummy_metafile_id"}),
|
303
285
|
)
|
304
|
-
|
305
|
-
|
306
|
-
|
307
|
-
|
308
|
-
|
286
|
+
transaction_operation.metafile_write_paths = absolute_paths
|
287
|
+
transaction_ops.append(transaction_operation)
|
288
|
+
transaction = Transaction.of([transaction_operation])
|
289
|
+
transaction.relativize_operation_paths(transaction_operation, catalog_root)
|
290
|
+
# Assert paths are relativized correctly
|
291
|
+
assert (
|
292
|
+
transaction_operation.metafile_write_paths == expected_paths
|
293
|
+
), f"Failed for operation type {op_type}"
|
294
|
+
|
295
|
+
|
296
|
+
class TestTransactionPersistence:
|
297
|
+
|
298
|
+
# Verifies that transactions initialized with empty or None operations are marked interactive,
|
299
|
+
# while valid operations are not
|
300
|
+
def test_create_iterative_transaction(self):
|
301
|
+
txn_1 = Transaction.of(txn_operations=[])
|
302
|
+
txn_2 = Transaction.of(txn_operations=None)
|
303
|
+
op = TransactionOperation.of(
|
304
|
+
operation_type=TransactionOperationType.CREATE,
|
305
|
+
dest_metafile=Metafile({"id": "dummy_metafile_id"}),
|
306
|
+
)
|
307
|
+
txn_3 = Transaction.of(txn_operations=[op, op])
|
308
|
+
assert (
|
309
|
+
txn_1.interactive
|
310
|
+
) # check if constructor detect empty list --> interactive transaction
|
311
|
+
assert (
|
312
|
+
txn_2.interactive
|
313
|
+
) # check if we can initialize with no list --> interactive transaction
|
314
|
+
assert (
|
315
|
+
not txn_3.interactive
|
316
|
+
) # check that valid operations_list --> not interactive transaction
|
317
|
+
|
318
|
+
# Builds and commits a transaction step-by-step, then validates the output files and transaction success log
|
319
|
+
def test_commit_iterative_transaction(self, temp_dir):
|
320
|
+
# Create two simple namespaces
|
321
|
+
namespace_locator1 = NamespaceLocator.of(namespace="test_ns_1")
|
322
|
+
namespace_locator2 = NamespaceLocator.of(namespace="test_ns_2")
|
323
|
+
ns1 = Namespace.of(locator=namespace_locator1)
|
324
|
+
ns2 = Namespace.of(locator=namespace_locator2)
|
325
|
+
# Start with an empty transaction (interactive)
|
326
|
+
transaction = Transaction.of()
|
327
|
+
txn = transaction.start(temp_dir) # operate on deep-copy
|
328
|
+
# Build operations manually and step them in
|
329
|
+
op1 = TransactionOperation.of(
|
330
|
+
operation_type=TransactionOperationType.CREATE,
|
331
|
+
dest_metafile=ns1,
|
332
|
+
)
|
333
|
+
op2 = TransactionOperation.of(
|
334
|
+
operation_type=TransactionOperationType.CREATE,
|
335
|
+
dest_metafile=ns2,
|
336
|
+
)
|
337
|
+
# steps
|
338
|
+
txn.step(op1)
|
339
|
+
txn.step(op2)
|
340
|
+
|
341
|
+
# seal() for interactive transactions
|
342
|
+
write_paths, success_log_path = txn.seal()
|
343
|
+
|
344
|
+
# Check output files exist and are valid
|
345
|
+
deserialized_ns1 = Namespace.read(write_paths[0])
|
346
|
+
deserialized_ns2 = Namespace.read(write_paths[1])
|
347
|
+
|
348
|
+
assert ns1.equivalent_to(deserialized_ns1)
|
349
|
+
assert ns2.equivalent_to(deserialized_ns2)
|
350
|
+
assert success_log_path.endswith(str(txn.end_time))
|
351
|
+
|
352
|
+
# Ensures that stepping and committing a transaction writes non-empty output files and a valid success log
|
353
|
+
def test_commit_iterative_file_creation(self, temp_dir):
|
354
|
+
ns = Namespace.of(locator=NamespaceLocator.of(namespace="check_writes"))
|
355
|
+
txn = Transaction.of().start(temp_dir)
|
356
|
+
op = TransactionOperation.of(TransactionOperationType.CREATE, dest_metafile=ns)
|
357
|
+
txn.step(op)
|
358
|
+
write_paths, success_log_path = txn.seal()
|
359
|
+
|
360
|
+
# check the files were created
|
361
|
+
for path in write_paths:
|
362
|
+
abs_path = os.path.join(temp_dir, path)
|
363
|
+
assert os.path.exists(abs_path)
|
364
|
+
assert os.path.getsize(abs_path) > 0
|
365
|
+
|
366
|
+
# check the success log exists
|
367
|
+
assert os.path.exists(success_log_path)
|
368
|
+
assert os.path.getsize(success_log_path) > 0
|
369
|
+
|
370
|
+
# Confirms that a transaction can be paused, resumed, and successfully committed without data los
|
371
|
+
def test_transaction_pause_and_resume_roundtrip(self, temp_dir):
|
372
|
+
# Create a test namespace
|
373
|
+
ns = Namespace.of(locator=NamespaceLocator.of(namespace="paused_resume_ns"))
|
374
|
+
|
375
|
+
# Start interactive transaction
|
376
|
+
txn = Transaction.of().start(temp_dir)
|
377
|
+
op = TransactionOperation.of(TransactionOperationType.CREATE, dest_metafile=ns)
|
378
|
+
|
379
|
+
txn.step(op)
|
380
|
+
|
381
|
+
# Pause transaction (writes to paused/)
|
382
|
+
txn.pause()
|
383
|
+
|
384
|
+
# Resume transaction (reads from paused/)
|
385
|
+
txn.resume()
|
386
|
+
|
387
|
+
# Commit resumed transaction
|
388
|
+
write_paths, success_log_path = txn.seal()
|
389
|
+
|
390
|
+
# Validate outputs
|
391
|
+
deserialized = Namespace.read(write_paths[0])
|
392
|
+
assert ns.equivalent_to(deserialized)
|
393
|
+
assert os.path.exists(success_log_path)
|
394
|
+
assert success_log_path.endswith(str(txn.end_time))
|
395
|
+
|
396
|
+
# Validates that transaction state, including ID and write paths, is correctly preserved across pause/resume cycles
|
397
|
+
def test_resume_preserves_state_after_pause(self, temp_dir):
|
398
|
+
ns = Namespace.of(locator=NamespaceLocator.of(namespace="resume_state_check"))
|
399
|
+
|
400
|
+
txn = Transaction.of().start(temp_dir)
|
401
|
+
op = TransactionOperation.of(TransactionOperationType.CREATE, dest_metafile=ns)
|
402
|
+
|
403
|
+
txn.step(op)
|
404
|
+
txn_id_before = txn.id
|
405
|
+
|
406
|
+
txn.pause()
|
407
|
+
txn.resume()
|
408
|
+
|
409
|
+
# Ensure the ID and provider are still valid
|
410
|
+
assert txn.id == txn_id_before
|
411
|
+
assert txn._time_provider is not None
|
412
|
+
assert hasattr(txn, "metafile_write_paths")
|
413
|
+
assert len(txn.metafile_write_paths) == 1
|
414
|
+
|
415
|
+
# Check commit still works
|
416
|
+
_, success_log_path = txn.seal()
|
417
|
+
assert os.path.exists(success_log_path)
|
418
|
+
|
419
|
+
# Explicitly checks that fields are preserved
|
420
|
+
def test_resume_preserves_state_after_pause_deep(self, temp_dir):
|
421
|
+
ns = Namespace.of(locator=NamespaceLocator.of(namespace="resume_state_check"))
|
422
|
+
|
423
|
+
txn = Transaction.of().start(temp_dir)
|
424
|
+
op = TransactionOperation.of(TransactionOperationType.CREATE, dest_metafile=ns)
|
425
|
+
|
426
|
+
txn.step(op)
|
427
|
+
|
428
|
+
# Save values before pause
|
429
|
+
txn_id_before = txn.id
|
430
|
+
start_time_before = txn.start_time
|
431
|
+
root_before = txn.catalog_root_normalized
|
432
|
+
meta_paths_before = list(txn.metafile_write_paths)
|
433
|
+
locator_paths_before = list(txn.locator_write_paths)
|
434
|
+
|
435
|
+
txn.pause()
|
436
|
+
txn.resume()
|
437
|
+
|
438
|
+
# Field-by-field checks
|
439
|
+
assert txn.id == txn_id_before, "Transaction ID should be preserved"
|
440
|
+
assert txn._time_provider is not None, "Time provider should be reinitialized"
|
441
|
+
assert txn.start_time == start_time_before, "Start time should be preserved"
|
442
|
+
assert txn.catalog_root_normalized == root_before, "Catalog root should match"
|
443
|
+
assert (
|
444
|
+
txn.metafile_write_paths == meta_paths_before
|
445
|
+
), "Metafile paths must match"
|
446
|
+
assert (
|
447
|
+
txn.locator_write_paths == locator_paths_before
|
448
|
+
), "Locator paths must match"
|
449
|
+
assert (
|
450
|
+
isinstance(txn.operations, list) and len(txn.operations) == 1
|
451
|
+
), "Operations must be restored"
|
452
|
+
assert txn.pause_time is not None, "Pause time should be restored"
|
453
|
+
|
454
|
+
# Final commit still works
|
455
|
+
write_paths, success_log_path = txn.seal()
|
456
|
+
assert os.path.exists(success_log_path)
|
457
|
+
|
458
|
+
# Checks that pausing a transaction moves its log from running/ to paused/ and preserves valid transaction state
|
459
|
+
def test_pause_moves_running_to_paused(self, temp_dir):
|
460
|
+
# Set up a transaction and a single operation
|
461
|
+
locator = NamespaceLocator.of(namespace="pause_test")
|
462
|
+
ns = Namespace.of(locator=locator)
|
463
|
+
txn = Transaction.of().start(temp_dir)
|
464
|
+
|
465
|
+
op = TransactionOperation.of(TransactionOperationType.CREATE, dest_metafile=ns)
|
466
|
+
txn.step(op)
|
467
|
+
|
468
|
+
fs = pyarrow.fs.LocalFileSystem()
|
469
|
+
txn_id = txn.id
|
470
|
+
txn_log_dir = posixpath.join(temp_dir, TXN_DIR_NAME)
|
471
|
+
|
472
|
+
running_path = posixpath.join(txn_log_dir, RUNNING_TXN_DIR_NAME, txn_id)
|
473
|
+
paused_path = posixpath.join(txn_log_dir, PAUSED_TXN_DIR_NAME, txn_id)
|
474
|
+
|
475
|
+
# Sanity check: file should be in running/
|
476
|
+
assert fs.get_file_info(running_path).type == pyarrow.fs.FileType.File
|
477
|
+
|
478
|
+
# Pause transaction
|
479
|
+
txn.pause()
|
480
|
+
# Ensure the running file is deleted
|
481
|
+
assert fs.get_file_info(running_path).type == pyarrow.fs.FileType.NotFound
|
482
|
+
|
483
|
+
# Ensure the paused file exists and contains valid msgpack
|
484
|
+
paused_info = fs.get_file_info(paused_path)
|
485
|
+
assert paused_info.type == pyarrow.fs.FileType.File
|
486
|
+
with fs.open_input_stream(paused_path) as f:
|
487
|
+
data = f.readall()
|
488
|
+
txn_loaded = msgpack.loads(data)
|
489
|
+
assert "operations" in txn_loaded
|
490
|
+
|
491
|
+
# Simulates a full multi-step transaction with multiple pause/resume cycles and verifies correctness of all outputs
|
492
|
+
def test_transaction_pause_and_resume_roundtrip_complex(self, temp_dir):
|
493
|
+
# Step 0: Create an empty interactive transaction
|
494
|
+
txn = Transaction.of().start(temp_dir)
|
495
|
+
|
496
|
+
# Step 1: Add first namespace, pause
|
497
|
+
ns1 = Namespace.of(locator=NamespaceLocator.of(namespace="roundtrip_ns_1"))
|
498
|
+
op1 = TransactionOperation.of(
|
499
|
+
TransactionOperationType.CREATE, dest_metafile=ns1
|
500
|
+
)
|
501
|
+
txn.step(op1)
|
502
|
+
txn.pause()
|
503
|
+
|
504
|
+
# Step 2: Resume, add second namespace, pause
|
505
|
+
txn.resume()
|
506
|
+
ns2 = Namespace.of(locator=NamespaceLocator.of(namespace="roundtrip_ns_2"))
|
507
|
+
op2 = TransactionOperation.of(
|
508
|
+
TransactionOperationType.CREATE, dest_metafile=ns2
|
509
|
+
)
|
510
|
+
txn.step(op2)
|
511
|
+
txn.pause()
|
512
|
+
|
513
|
+
# Step 3: Resume again, add third namespace, commit
|
514
|
+
txn.resume()
|
515
|
+
ns3 = Namespace.of(locator=NamespaceLocator.of(namespace="roundtrip_ns_3"))
|
516
|
+
op3 = TransactionOperation.of(
|
517
|
+
TransactionOperationType.CREATE, dest_metafile=ns3
|
518
|
+
)
|
519
|
+
txn.step(op3)
|
520
|
+
|
521
|
+
# Final commit
|
522
|
+
write_paths, success_log_path = txn.seal()
|
523
|
+
|
524
|
+
# Read and verify written namespaces
|
525
|
+
for i, ns in enumerate([ns1, ns2, ns3]):
|
526
|
+
written_path = write_paths[i]
|
527
|
+
deserialized_ns = Namespace.read(written_path)
|
528
|
+
assert ns.equivalent_to(
|
529
|
+
deserialized_ns
|
530
|
+
), f"Mismatch in ns{i+1}: {ns} != {deserialized_ns}"
|
531
|
+
assert os.path.exists(written_path), f"Missing file: {written_path}"
|
532
|
+
assert os.path.getsize(written_path) > 0
|
533
|
+
|
534
|
+
# Check success log exists and is correct
|
535
|
+
assert os.path.exists(success_log_path)
|
536
|
+
assert success_log_path.endswith(str(txn.end_time))
|
537
|
+
|
538
|
+
# Repeats a complex pause/resume flow with additional assertions on namespace equality and time consistency
|
539
|
+
def test_transaction_pause_and_resume_roundtrip_complex_2(self, temp_dir):
|
540
|
+
# Step 0: Create an empty interactive transaction
|
541
|
+
txn = Transaction.of().start(temp_dir)
|
542
|
+
|
543
|
+
# Step 1: Add first namespace, pause
|
544
|
+
ns1 = Namespace.of(locator=NamespaceLocator.of(namespace="roundtrip_ns_1"))
|
545
|
+
op1 = TransactionOperation.of(
|
546
|
+
TransactionOperationType.CREATE, dest_metafile=ns1
|
547
|
+
)
|
548
|
+
txn.step(op1)
|
549
|
+
txn.pause()
|
550
|
+
|
551
|
+
# Step 2: Resume, add second namespace, pause
|
552
|
+
txn.resume()
|
553
|
+
ns2 = Namespace.of(locator=NamespaceLocator.of(namespace="roundtrip_ns_2"))
|
554
|
+
op2 = TransactionOperation.of(
|
555
|
+
TransactionOperationType.CREATE, dest_metafile=ns2
|
556
|
+
)
|
557
|
+
txn.step(op2)
|
558
|
+
|
559
|
+
txn.pause()
|
560
|
+
|
561
|
+
# Step 3: Resume again, add third namespace, commit
|
562
|
+
txn.resume()
|
563
|
+
ns3 = Namespace.of(locator=NamespaceLocator.of(namespace="roundtrip_ns_3"))
|
564
|
+
op3 = TransactionOperation.of(
|
565
|
+
TransactionOperationType.CREATE, dest_metafile=ns3
|
566
|
+
)
|
567
|
+
txn.step(op3)
|
568
|
+
|
569
|
+
# Final commit
|
570
|
+
write_paths, success_log_path = txn.seal()
|
571
|
+
|
572
|
+
assert txn.start_time < txn.end_time
|
573
|
+
|
574
|
+
# Read and verify written namespaces
|
575
|
+
for i, ns in enumerate([ns1, ns2, ns3]):
|
576
|
+
written_path = write_paths[i]
|
577
|
+
|
578
|
+
# Confirm file was created and is non-empty
|
579
|
+
assert os.path.exists(written_path), f"Missing file: {written_path}"
|
580
|
+
assert os.path.getsize(written_path) > 0, f"Empty file: {written_path}"
|
581
|
+
|
582
|
+
# Deserialize and verify content
|
583
|
+
deserialized_ns = Namespace.read(written_path)
|
584
|
+
assert ns.equivalent_to(deserialized_ns), f"Namespace mismatch at index {i}"
|
585
|
+
assert ns.locator.namespace == deserialized_ns.locator.namespace
|
586
|
+
assert ns.locator_alias == deserialized_ns.locator_alias
|
587
|
+
assert ns.properties == deserialized_ns.properties
|
588
|
+
|
589
|
+
# Verify success log
|
590
|
+
assert os.path.exists(success_log_path)
|
591
|
+
assert success_log_path.endswith(str(txn.end_time))
|
592
|
+
|
593
|
+
|
594
|
+
class TestTransactionCommitMessage:
|
595
|
+
"""Test commit message preservation and retrieval for transactions."""
|
596
|
+
|
597
|
+
def test_transaction_with_commit_message(self):
|
598
|
+
"""Test that commit messages are stored and retrievable from transactions."""
|
599
|
+
commit_msg = "Test commit message for transaction functionality"
|
600
|
+
|
601
|
+
# Create transaction with commit message
|
602
|
+
txn = Transaction.of(commit_message=commit_msg)
|
603
|
+
|
604
|
+
# Verify commit message is stored correctly
|
605
|
+
assert txn.commit_message == commit_msg
|
606
|
+
assert txn.get("commit_message") == commit_msg
|
607
|
+
|
608
|
+
def test_transaction_without_commit_message(self):
|
609
|
+
"""Test that transactions work normally without commit messages."""
|
610
|
+
# Create transaction without commit message
|
611
|
+
txn = Transaction.of()
|
612
|
+
|
613
|
+
# Verify no commit message is stored
|
614
|
+
assert txn.commit_message is None
|
615
|
+
assert txn.get("commit_message") is None
|
616
|
+
|
617
|
+
def test_transaction_commit_message_setter(self):
|
618
|
+
"""Test that commit messages can be set after transaction creation."""
|
619
|
+
# Create transaction without commit message
|
620
|
+
txn = Transaction.of()
|
621
|
+
assert txn.commit_message is None
|
622
|
+
|
623
|
+
# Set commit message using property setter
|
624
|
+
commit_msg = "Added commit message after creation"
|
625
|
+
txn.commit_message = commit_msg
|
626
|
+
|
627
|
+
# Verify commit message is stored correctly
|
628
|
+
assert txn.commit_message == commit_msg
|
629
|
+
assert txn.get("commit_message") == commit_msg
|
630
|
+
|
631
|
+
def test_transaction_serialization_with_commit_message(self, temp_dir):
|
632
|
+
"""Test that commit messages persist through transaction serialization."""
|
633
|
+
commit_msg = "Serialization test commit message"
|
634
|
+
|
635
|
+
# Create namespace for testing
|
636
|
+
ns = Namespace.of(locator=NamespaceLocator.of(namespace="serialization_test"))
|
637
|
+
|
638
|
+
# Create transaction with commit message
|
639
|
+
txn = Transaction.of(commit_message=commit_msg).start(temp_dir)
|
640
|
+
op = TransactionOperation.of(TransactionOperationType.CREATE, dest_metafile=ns)
|
641
|
+
txn.step(op)
|
642
|
+
|
643
|
+
# Commit transaction (this should serialize the transaction with commit message)
|
644
|
+
_, success_log_path = txn.seal()
|
645
|
+
|
646
|
+
# Read the transaction log and verify commit message persisted
|
647
|
+
txn_read = Transaction.read(success_log_path)
|
648
|
+
assert txn_read.commit_message == commit_msg
|
649
|
+
|
650
|
+
# Verify other transaction properties are intact
|
651
|
+
assert txn_read.start_time == txn.start_time
|
652
|
+
assert txn_read.end_time == txn.end_time
|
653
|
+
assert len(txn_read.operations) == 1
|