deltacat 2.0.0b11__py3-none-any.whl → 2.0.0b12__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- deltacat/__init__.py +78 -3
- deltacat/api.py +122 -67
- deltacat/aws/constants.py +0 -23
- deltacat/aws/s3u.py +4 -631
- deltacat/benchmarking/conftest.py +0 -18
- deltacat/catalog/__init__.py +2 -0
- deltacat/catalog/delegate.py +445 -63
- deltacat/catalog/interface.py +188 -62
- deltacat/catalog/main/impl.py +2417 -271
- deltacat/catalog/model/catalog.py +49 -10
- deltacat/catalog/model/properties.py +38 -0
- deltacat/compute/compactor/compaction_session.py +97 -75
- deltacat/compute/compactor/model/compact_partition_params.py +75 -30
- deltacat/compute/compactor/model/compaction_session_audit_info.py +17 -0
- deltacat/compute/compactor/model/round_completion_info.py +16 -6
- deltacat/compute/compactor/repartition_session.py +8 -21
- deltacat/compute/compactor/steps/hash_bucket.py +5 -5
- deltacat/compute/compactor/steps/materialize.py +9 -7
- deltacat/compute/compactor/steps/repartition.py +12 -11
- deltacat/compute/compactor/utils/io.py +6 -5
- deltacat/compute/compactor/utils/round_completion_reader.py +117 -0
- deltacat/compute/compactor/utils/system_columns.py +3 -1
- deltacat/compute/compactor_v2/compaction_session.py +17 -14
- deltacat/compute/compactor_v2/constants.py +30 -1
- deltacat/compute/compactor_v2/model/evaluate_compaction_result.py +0 -1
- deltacat/compute/compactor_v2/model/hash_bucket_input.py +9 -3
- deltacat/compute/compactor_v2/model/merge_file_group.py +5 -2
- deltacat/compute/compactor_v2/model/merge_input.py +33 -8
- deltacat/compute/compactor_v2/private/compaction_utils.py +167 -68
- deltacat/compute/compactor_v2/steps/hash_bucket.py +5 -2
- deltacat/compute/compactor_v2/steps/merge.py +267 -55
- deltacat/compute/compactor_v2/utils/content_type_params.py +34 -6
- deltacat/compute/compactor_v2/utils/dedupe.py +1 -1
- deltacat/compute/compactor_v2/utils/delta.py +5 -3
- deltacat/compute/compactor_v2/utils/io.py +11 -4
- deltacat/compute/compactor_v2/utils/merge.py +15 -2
- deltacat/compute/compactor_v2/utils/primary_key_index.py +28 -4
- deltacat/compute/compactor_v2/utils/task_options.py +45 -33
- deltacat/compute/converter/converter_session.py +145 -32
- deltacat/compute/converter/model/convert_input.py +26 -19
- deltacat/compute/converter/model/convert_input_files.py +33 -16
- deltacat/compute/converter/model/convert_result.py +35 -16
- deltacat/compute/converter/model/converter_session_params.py +24 -21
- deltacat/compute/converter/pyiceberg/catalog.py +21 -18
- deltacat/compute/converter/pyiceberg/overrides.py +18 -9
- deltacat/compute/converter/pyiceberg/update_snapshot_overrides.py +148 -100
- deltacat/compute/converter/steps/convert.py +157 -50
- deltacat/compute/converter/steps/dedupe.py +24 -11
- deltacat/compute/converter/utils/convert_task_options.py +27 -12
- deltacat/compute/converter/utils/converter_session_utils.py +126 -60
- deltacat/compute/converter/utils/iceberg_columns.py +8 -8
- deltacat/compute/converter/utils/io.py +101 -12
- deltacat/compute/converter/utils/s3u.py +33 -27
- deltacat/compute/janitor.py +205 -0
- deltacat/compute/jobs/client.py +19 -8
- deltacat/compute/resource_estimation/delta.py +38 -6
- deltacat/compute/resource_estimation/model.py +8 -0
- deltacat/constants.py +44 -0
- deltacat/docs/autogen/schema/__init__.py +0 -0
- deltacat/docs/autogen/schema/inference/__init__.py +0 -0
- deltacat/docs/autogen/schema/inference/generate_type_mappings.py +687 -0
- deltacat/docs/autogen/schema/inference/parse_json_type_mappings.py +673 -0
- deltacat/examples/compactor/__init__.py +0 -0
- deltacat/examples/compactor/aws/__init__.py +1 -0
- deltacat/examples/compactor/bootstrap.py +863 -0
- deltacat/examples/compactor/compactor.py +373 -0
- deltacat/examples/compactor/explorer.py +473 -0
- deltacat/examples/compactor/gcp/__init__.py +1 -0
- deltacat/examples/compactor/job_runner.py +439 -0
- deltacat/examples/compactor/utils/__init__.py +1 -0
- deltacat/examples/compactor/utils/common.py +261 -0
- deltacat/examples/experimental/iceberg/converter/__init__.py +0 -0
- deltacat/examples/experimental/iceberg/converter/beam/__init__.py +0 -0
- deltacat/examples/experimental/iceberg/converter/beam/app.py +226 -0
- deltacat/examples/experimental/iceberg/converter/beam/main.py +133 -0
- deltacat/examples/experimental/iceberg/converter/beam/test_workflow.py +113 -0
- deltacat/examples/experimental/iceberg/converter/beam/utils/__init__.py +3 -0
- deltacat/examples/experimental/iceberg/converter/beam/utils/common.py +174 -0
- deltacat/examples/experimental/iceberg/converter/beam/utils/spark.py +263 -0
- deltacat/exceptions.py +66 -4
- deltacat/experimental/catalog/iceberg/impl.py +2 -2
- deltacat/experimental/compatibility/__init__.py +0 -0
- deltacat/experimental/compatibility/backfill_locator_to_id_mappings.py +201 -0
- deltacat/experimental/converter_agent/__init__.py +0 -0
- deltacat/experimental/converter_agent/beam/__init__.py +0 -0
- deltacat/experimental/converter_agent/beam/managed.py +173 -0
- deltacat/experimental/converter_agent/table_monitor.py +479 -0
- deltacat/experimental/storage/iceberg/iceberg_scan_planner.py +105 -4
- deltacat/experimental/storage/iceberg/impl.py +5 -3
- deltacat/experimental/storage/iceberg/model.py +7 -3
- deltacat/experimental/storage/iceberg/visitor.py +119 -0
- deltacat/experimental/storage/rivulet/dataset.py +0 -3
- deltacat/experimental/storage/rivulet/metastore/delta.py +0 -2
- deltacat/experimental/storage/rivulet/reader/dataset_metastore.py +3 -2
- deltacat/io/datasource/deltacat_datasource.py +0 -1
- deltacat/storage/__init__.py +20 -2
- deltacat/storage/interface.py +54 -32
- deltacat/storage/main/impl.py +1494 -541
- deltacat/storage/model/delta.py +27 -3
- deltacat/storage/model/locator.py +6 -12
- deltacat/storage/model/manifest.py +182 -6
- deltacat/storage/model/metafile.py +151 -78
- deltacat/storage/model/namespace.py +8 -1
- deltacat/storage/model/partition.py +117 -42
- deltacat/storage/model/schema.py +2427 -159
- deltacat/storage/model/sort_key.py +40 -0
- deltacat/storage/model/stream.py +9 -2
- deltacat/storage/model/table.py +12 -1
- deltacat/storage/model/table_version.py +11 -0
- deltacat/storage/model/transaction.py +1184 -208
- deltacat/storage/model/transform.py +81 -2
- deltacat/storage/model/types.py +48 -26
- deltacat/tests/_io/test_cloudpickle_bug_fix.py +8 -4
- deltacat/tests/aws/test_s3u.py +2 -31
- deltacat/tests/catalog/main/test_catalog_impl_table_operations.py +1606 -70
- deltacat/tests/catalog/test_catalogs.py +54 -11
- deltacat/tests/catalog/test_default_catalog_impl.py +12152 -71
- deltacat/tests/compute/compact_partition_test_cases.py +35 -8
- deltacat/tests/compute/compactor/steps/test_repartition.py +12 -12
- deltacat/tests/compute/compactor/utils/test_io.py +124 -120
- deltacat/tests/compute/compactor/utils/test_round_completion_reader.py +254 -0
- deltacat/tests/compute/compactor_v2/test_compaction_session.py +423 -312
- deltacat/tests/compute/compactor_v2/utils/test_content_type_params.py +266 -0
- deltacat/tests/compute/compactor_v2/utils/test_primary_key_index.py +45 -0
- deltacat/tests/compute/compactor_v2/utils/test_task_options.py +270 -1
- deltacat/tests/compute/conftest.py +8 -44
- deltacat/tests/compute/converter/test_convert_session.py +675 -490
- deltacat/tests/compute/converter/utils.py +15 -6
- deltacat/tests/compute/resource_estimation/test_delta.py +145 -79
- deltacat/tests/compute/test_compact_partition_incremental.py +103 -70
- deltacat/tests/compute/test_compact_partition_multiple_rounds.py +89 -66
- deltacat/tests/compute/test_compact_partition_params.py +13 -8
- deltacat/tests/compute/test_compact_partition_rebase.py +77 -62
- deltacat/tests/compute/test_compact_partition_rebase_then_incremental.py +263 -193
- deltacat/tests/compute/test_janitor.py +236 -0
- deltacat/tests/compute/test_util_common.py +716 -43
- deltacat/tests/compute/test_util_constant.py +0 -1
- deltacat/tests/{storage/conftest.py → conftest.py} +1 -1
- deltacat/tests/experimental/__init__.py +1 -0
- deltacat/tests/experimental/compatibility/__init__.py +1 -0
- deltacat/tests/experimental/compatibility/test_backfill_locator_to_id_mappings.py +582 -0
- deltacat/tests/storage/main/test_main_storage.py +6900 -95
- deltacat/tests/storage/model/test_metafile_io.py +78 -173
- deltacat/tests/storage/model/test_partition_scheme.py +85 -0
- deltacat/tests/storage/model/test_schema.py +171 -0
- deltacat/tests/storage/model/test_schema_update.py +1925 -0
- deltacat/tests/storage/model/test_sort_scheme.py +90 -0
- deltacat/tests/storage/model/test_transaction.py +393 -48
- deltacat/tests/storage/model/test_transaction_history.py +886 -0
- deltacat/tests/test_deltacat_api.py +988 -4
- deltacat/tests/test_exceptions.py +9 -5
- deltacat/tests/test_utils/pyarrow.py +52 -21
- deltacat/tests/test_utils/storage.py +23 -34
- deltacat/tests/types/__init__.py +0 -0
- deltacat/tests/types/test_tables.py +104 -0
- deltacat/tests/utils/exceptions.py +22 -0
- deltacat/tests/utils/main_deltacat_storage_mock.py +31 -0
- deltacat/tests/utils/ray_utils/test_dataset.py +123 -5
- deltacat/tests/utils/test_daft.py +121 -31
- deltacat/tests/utils/test_numpy.py +1193 -0
- deltacat/tests/utils/test_pandas.py +1106 -0
- deltacat/tests/utils/test_polars.py +1040 -0
- deltacat/tests/utils/test_pyarrow.py +1370 -89
- deltacat/types/media.py +221 -11
- deltacat/types/tables.py +2329 -59
- deltacat/utils/arguments.py +33 -1
- deltacat/utils/daft.py +411 -150
- deltacat/utils/filesystem.py +100 -0
- deltacat/utils/metafile_locator.py +2 -1
- deltacat/utils/numpy.py +118 -26
- deltacat/utils/pandas.py +577 -48
- deltacat/utils/polars.py +658 -27
- deltacat/utils/pyarrow.py +1258 -213
- deltacat/utils/ray_utils/dataset.py +101 -10
- deltacat/utils/reader_compatibility_mapping.py +3083 -0
- deltacat/utils/url.py +56 -15
- deltacat-2.0.0b12.dist-info/METADATA +1163 -0
- {deltacat-2.0.0b11.dist-info → deltacat-2.0.0b12.dist-info}/RECORD +183 -145
- {deltacat-2.0.0b11.dist-info → deltacat-2.0.0b12.dist-info}/WHEEL +1 -1
- deltacat/compute/compactor/utils/round_completion_file.py +0 -97
- deltacat/compute/merge_on_read/__init__.py +0 -4
- deltacat/compute/merge_on_read/daft.py +0 -40
- deltacat/compute/merge_on_read/model/merge_on_read_params.py +0 -66
- deltacat/compute/merge_on_read/utils/delta.py +0 -42
- deltacat/tests/compute/compactor/utils/test_round_completion_file.py +0 -231
- deltacat/tests/compute/test_util_create_table_deltas_repo.py +0 -388
- deltacat/tests/local_deltacat_storage/__init__.py +0 -1236
- deltacat/tests/local_deltacat_storage/exceptions.py +0 -10
- deltacat/utils/s3fs.py +0 -21
- deltacat-2.0.0b11.dist-info/METADATA +0 -67
- /deltacat/{compute/merge_on_read/model → docs}/__init__.py +0 -0
- /deltacat/{compute/merge_on_read/utils → docs/autogen}/__init__.py +0 -0
- {deltacat-2.0.0b11.dist-info → deltacat-2.0.0b12.dist-info/licenses}/LICENSE +0 -0
- {deltacat-2.0.0b11.dist-info → deltacat-2.0.0b12.dist-info}/top_level.txt +0 -0
@@ -1,43 +1,40 @@
|
|
1
|
-
import
|
1
|
+
import tempfile
|
2
2
|
import os
|
3
|
-
from
|
3
|
+
from typing import Any, Callable, Dict, List, Optional, Set
|
4
4
|
import pytest
|
5
|
-
import boto3
|
6
|
-
from boto3.resources.base import ServiceResource
|
7
5
|
import pyarrow as pa
|
6
|
+
import ray
|
7
|
+
|
8
8
|
from deltacat.io.file_object_store import FileObjectStore
|
9
9
|
from pytest_benchmark.fixture import BenchmarkFixture
|
10
|
-
import tempfile
|
11
10
|
|
12
11
|
from deltacat.tests.compute.test_util_constant import (
|
13
|
-
TEST_S3_RCF_BUCKET_NAME,
|
14
12
|
DEFAULT_NUM_WORKERS,
|
15
13
|
DEFAULT_WORKER_INSTANCE_CPUS,
|
16
14
|
)
|
17
15
|
from deltacat.tests.compute.test_util_common import (
|
18
|
-
|
16
|
+
get_rci_from_partition,
|
17
|
+
read_audit_file,
|
18
|
+
PartitionKey,
|
19
|
+
get_compacted_delta_locator_from_partition,
|
19
20
|
)
|
20
|
-
from deltacat.tests.test_utils.utils import read_s3_contents
|
21
|
-
from deltacat.compute.compactor.model.compactor_version import CompactorVersion
|
22
21
|
from deltacat.tests.compute.test_util_common import (
|
23
|
-
|
22
|
+
create_src_w_deltas_destination_rebase_w_deltas_strategy_main,
|
24
23
|
)
|
24
|
+
|
25
|
+
from deltacat.compute.compactor.model.compactor_version import CompactorVersion
|
25
26
|
from deltacat.compute.compactor.model.compaction_session_audit_info import (
|
26
27
|
CompactionSessionAuditInfo,
|
27
28
|
)
|
28
|
-
from deltacat.tests.compute.test_util_create_table_deltas_repo import (
|
29
|
-
create_src_w_deltas_destination_rebase_w_deltas_strategy,
|
30
|
-
)
|
31
29
|
from deltacat.tests.compute.compact_partition_rebase_test_cases import (
|
32
30
|
REBASE_TEST_CASES,
|
33
31
|
)
|
34
|
-
from
|
35
|
-
from deltacat.types.media import StorageType
|
32
|
+
from deltacat.types.media import StorageType, ContentType
|
36
33
|
from deltacat.storage import (
|
37
34
|
DeltaLocator,
|
38
35
|
Partition,
|
36
|
+
metastore,
|
39
37
|
)
|
40
|
-
from deltacat.types.media import ContentType
|
41
38
|
from deltacat.compute.compactor.model.compact_partition_params import (
|
42
39
|
CompactPartitionParams,
|
43
40
|
)
|
@@ -48,6 +45,7 @@ from deltacat.utils.placement import (
|
|
48
45
|
PlacementGroupManager,
|
49
46
|
)
|
50
47
|
|
48
|
+
|
51
49
|
"""
|
52
50
|
MODULE scoped fixtures
|
53
51
|
"""
|
@@ -60,29 +58,24 @@ def setup_ray_cluster():
|
|
60
58
|
ray.shutdown()
|
61
59
|
|
62
60
|
|
63
|
-
|
64
|
-
|
65
|
-
|
66
|
-
os.environ["AWS_SECRET_ACCESS_ID"] = "testing"
|
67
|
-
os.environ["AWS_SECURITY_TOKEN"] = "testing"
|
68
|
-
os.environ["AWS_SESSION_TOKEN"] = "testing"
|
69
|
-
os.environ["AWS_DEFAULT_REGION"] = "us-east-1"
|
70
|
-
yield
|
71
|
-
|
61
|
+
"""
|
62
|
+
FUNCTION scoped fixtures
|
63
|
+
"""
|
72
64
|
|
73
|
-
@pytest.fixture(scope="module")
|
74
|
-
def s3_resource(mock_aws_credential):
|
75
|
-
with mock_s3():
|
76
|
-
yield boto3.resource("s3")
|
77
65
|
|
66
|
+
@pytest.fixture(autouse=True, scope="function")
|
67
|
+
def enable_bucketing_spec_validation(monkeypatch):
|
68
|
+
"""
|
69
|
+
Enable the bucketing spec validation for all tests.
|
70
|
+
This will help catch hash bucket drift in testing.
|
71
|
+
"""
|
72
|
+
import deltacat.compute.compactor_v2.steps.merge
|
78
73
|
|
79
|
-
|
80
|
-
|
81
|
-
|
82
|
-
|
83
|
-
Bucket=TEST_S3_RCF_BUCKET_NAME,
|
74
|
+
monkeypatch.setattr(
|
75
|
+
deltacat.compute.compactor_v2.steps.merge,
|
76
|
+
"BUCKETING_SPEC_COMPLIANCE_PROFILE",
|
77
|
+
"ASSERT",
|
84
78
|
)
|
85
|
-
yield
|
86
79
|
|
87
80
|
|
88
81
|
@pytest.mark.parametrize(
|
@@ -155,14 +148,13 @@ def setup_compaction_artifacts_s3_bucket(s3_resource: ServiceResource):
|
|
155
148
|
],
|
156
149
|
ids=[test_name for test_name in REBASE_TEST_CASES],
|
157
150
|
)
|
158
|
-
def
|
151
|
+
def test_compact_partition_rebase_same_source_and_destination_main(
|
159
152
|
mocker,
|
160
|
-
|
161
|
-
local_deltacat_storage_kwargs: Dict[str, Any],
|
153
|
+
main_deltacat_storage_kwargs: Dict[str, Any],
|
162
154
|
test_name: str,
|
163
155
|
primary_keys: Set[str],
|
164
156
|
sort_keys: List[Optional[Any]],
|
165
|
-
partition_keys_param: Optional[List[
|
157
|
+
partition_keys_param: Optional[List[PartitionKey]],
|
166
158
|
partition_values_param: List[Optional[str]],
|
167
159
|
input_deltas_param: List[pa.Array],
|
168
160
|
input_deltas_delta_type: str,
|
@@ -181,20 +173,20 @@ def test_compact_partition_rebase_same_source_and_destination(
|
|
181
173
|
compact_partition_func: Callable,
|
182
174
|
benchmark: BenchmarkFixture,
|
183
175
|
):
|
184
|
-
|
185
|
-
|
186
|
-
ds_mock_kwargs = local_deltacat_storage_kwargs
|
176
|
+
ds_mock_kwargs = main_deltacat_storage_kwargs
|
187
177
|
"""
|
188
178
|
This test tests the scenario where source partition locator == destination partition locator,
|
189
179
|
but rebase source partition locator is different.
|
190
180
|
This scenario could occur when hash bucket count changes.
|
181
|
+
|
182
|
+
This version uses the main metastore implementation instead of local storage.
|
191
183
|
"""
|
192
184
|
partition_keys = partition_keys_param
|
193
185
|
(
|
194
186
|
source_table_stream,
|
195
187
|
_,
|
196
188
|
rebased_table_stream,
|
197
|
-
) =
|
189
|
+
) = create_src_w_deltas_destination_rebase_w_deltas_strategy_main(
|
198
190
|
sort_keys,
|
199
191
|
partition_keys,
|
200
192
|
input_deltas_param,
|
@@ -202,14 +194,31 @@ def test_compact_partition_rebase_same_source_and_destination(
|
|
202
194
|
partition_values_param,
|
203
195
|
ds_mock_kwargs,
|
204
196
|
)
|
205
|
-
|
197
|
+
|
198
|
+
# Convert partition values for partition lookup (same as in the helper function)
|
199
|
+
converted_partition_values_for_lookup = partition_values_param
|
200
|
+
if partition_values_param and partition_keys:
|
201
|
+
converted_partition_values_for_lookup = []
|
202
|
+
for i, (value, pk) in enumerate(zip(partition_values_param, partition_keys)):
|
203
|
+
if pk.key_type.value == "int": # Use .value to get string representation
|
204
|
+
converted_partition_values_for_lookup.append(int(value))
|
205
|
+
else:
|
206
|
+
converted_partition_values_for_lookup.append(value)
|
207
|
+
|
208
|
+
source_partition: Partition = metastore.get_partition(
|
206
209
|
source_table_stream.locator,
|
207
|
-
|
210
|
+
converted_partition_values_for_lookup,
|
208
211
|
**ds_mock_kwargs,
|
209
212
|
)
|
210
|
-
rebased_partition: Partition =
|
213
|
+
rebased_partition: Partition = metastore.get_partition(
|
211
214
|
rebased_table_stream.locator,
|
212
|
-
|
215
|
+
converted_partition_values_for_lookup,
|
216
|
+
**ds_mock_kwargs,
|
217
|
+
)
|
218
|
+
all_column_names = metastore.get_table_version_column_names(
|
219
|
+
rebased_table_stream.locator.table_locator.namespace,
|
220
|
+
rebased_table_stream.locator.table_locator.table_name,
|
221
|
+
rebased_table_stream.locator.table_version_locator.table_version,
|
213
222
|
**ds_mock_kwargs,
|
214
223
|
)
|
215
224
|
num_workers, worker_instance_cpu = DEFAULT_NUM_WORKERS, DEFAULT_WORKER_INSTANCE_CPUS
|
@@ -224,10 +233,10 @@ def test_compact_partition_rebase_same_source_and_destination(
|
|
224
233
|
with tempfile.TemporaryDirectory() as test_dir:
|
225
234
|
compact_partition_params = CompactPartitionParams.of(
|
226
235
|
{
|
227
|
-
"
|
236
|
+
"catalog": ds_mock_kwargs.get("inner"),
|
228
237
|
"compacted_file_content_type": ContentType.PARQUET,
|
229
238
|
"dd_max_parallelism_ratio": 1.0,
|
230
|
-
"deltacat_storage":
|
239
|
+
"deltacat_storage": metastore,
|
231
240
|
"deltacat_storage_kwargs": ds_mock_kwargs,
|
232
241
|
"destination_partition_locator": rebased_partition.locator,
|
233
242
|
"hash_bucket_count": hash_bucket_count_param,
|
@@ -239,11 +248,11 @@ def test_compact_partition_rebase_same_source_and_destination(
|
|
239
248
|
"object_store": FileObjectStore(test_dir),
|
240
249
|
"pg_config": pgm,
|
241
250
|
"primary_keys": primary_keys,
|
251
|
+
"all_column_names": all_column_names,
|
242
252
|
"read_kwargs_provider": read_kwargs_provider_param,
|
243
253
|
"rebase_source_partition_locator": source_partition.locator,
|
244
254
|
"rebase_source_partition_high_watermark": rebased_partition.stream_position,
|
245
255
|
"records_per_compacted_file": records_per_compacted_file_param,
|
246
|
-
"s3_client_kwargs": {},
|
247
256
|
"source_partition_locator": rebased_partition.locator,
|
248
257
|
"sort_keys": sort_keys if sort_keys else None,
|
249
258
|
"drop_duplicates": drop_duplicates_param,
|
@@ -260,16 +269,14 @@ def test_compact_partition_rebase_same_source_and_destination(
|
|
260
269
|
object_store_put_many_spy = mocker.spy(FileObjectStore, "put_many")
|
261
270
|
|
262
271
|
# execute
|
263
|
-
|
272
|
+
benchmark(compact_partition_func, compact_partition_params)
|
264
273
|
|
265
|
-
|
266
|
-
|
267
|
-
|
268
|
-
audit_bucket, audit_key = RoundCompletionInfo.get_audit_bucket_name_and_key(
|
269
|
-
round_completion_info.compaction_audit_url
|
274
|
+
# Get RoundCompletionInfo from the compacted partition
|
275
|
+
round_completion_info: RoundCompletionInfo = get_rci_from_partition(
|
276
|
+
rebased_partition.locator, metastore, catalog=ds_mock_kwargs.get("inner")
|
270
277
|
)
|
271
278
|
|
272
|
-
# assert if
|
279
|
+
# assert if RCI covers all files
|
273
280
|
if compactor_version != CompactorVersion.V1.value:
|
274
281
|
previous_end = None
|
275
282
|
for start, end in round_completion_info.hb_index_to_entry_range.values():
|
@@ -280,8 +287,12 @@ def test_compact_partition_rebase_same_source_and_destination(
|
|
280
287
|
== round_completion_info.compacted_pyarrow_write_result.files
|
281
288
|
)
|
282
289
|
|
283
|
-
|
284
|
-
|
290
|
+
# Get catalog root for audit file resolution
|
291
|
+
catalog = ds_mock_kwargs.get("inner")
|
292
|
+
catalog_root = catalog.root
|
293
|
+
|
294
|
+
compaction_audit_obj: Dict[str, Any] = read_audit_file(
|
295
|
+
round_completion_info.compaction_audit_url, catalog_root
|
285
296
|
)
|
286
297
|
compaction_audit: CompactionSessionAuditInfo = CompactionSessionAuditInfo(
|
287
298
|
**compaction_audit_obj
|
@@ -291,13 +302,17 @@ def test_compact_partition_rebase_same_source_and_destination(
|
|
291
302
|
assert (
|
292
303
|
execute_compaction_result_spy.call_args.args[-1] is False
|
293
304
|
), "Table version erroneously marked as in-place compacted!"
|
294
|
-
compacted_delta_locator: DeltaLocator =
|
295
|
-
|
305
|
+
compacted_delta_locator: DeltaLocator = (
|
306
|
+
get_compacted_delta_locator_from_partition(
|
307
|
+
rebased_partition.locator,
|
308
|
+
metastore,
|
309
|
+
catalog=ds_mock_kwargs.get("inner"),
|
310
|
+
)
|
296
311
|
)
|
297
312
|
assert (
|
298
313
|
compacted_delta_locator.stream_position == last_stream_position_to_compact
|
299
314
|
), "Compacted delta locator must be equal to last stream position"
|
300
|
-
tables =
|
315
|
+
tables = metastore.download_delta(
|
301
316
|
compacted_delta_locator, storage_type=StorageType.LOCAL, **ds_mock_kwargs
|
302
317
|
)
|
303
318
|
actual_rebase_compacted_table = pa.concat_tables(tables)
|