deltacat 2.0.0b11__py3-none-any.whl → 2.0.0.post1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (194) hide show
  1. deltacat/__init__.py +78 -3
  2. deltacat/api.py +122 -67
  3. deltacat/aws/constants.py +0 -23
  4. deltacat/aws/s3u.py +4 -631
  5. deltacat/benchmarking/conftest.py +0 -18
  6. deltacat/catalog/__init__.py +2 -0
  7. deltacat/catalog/delegate.py +445 -63
  8. deltacat/catalog/interface.py +188 -62
  9. deltacat/catalog/main/impl.py +2417 -271
  10. deltacat/catalog/model/catalog.py +49 -10
  11. deltacat/catalog/model/properties.py +38 -0
  12. deltacat/compute/compactor/compaction_session.py +97 -75
  13. deltacat/compute/compactor/model/compact_partition_params.py +75 -30
  14. deltacat/compute/compactor/model/compaction_session_audit_info.py +17 -0
  15. deltacat/compute/compactor/model/round_completion_info.py +16 -6
  16. deltacat/compute/compactor/repartition_session.py +8 -21
  17. deltacat/compute/compactor/steps/hash_bucket.py +5 -5
  18. deltacat/compute/compactor/steps/materialize.py +9 -7
  19. deltacat/compute/compactor/steps/repartition.py +12 -11
  20. deltacat/compute/compactor/utils/io.py +6 -5
  21. deltacat/compute/compactor/utils/round_completion_reader.py +117 -0
  22. deltacat/compute/compactor/utils/system_columns.py +3 -1
  23. deltacat/compute/compactor_v2/compaction_session.py +17 -14
  24. deltacat/compute/compactor_v2/constants.py +30 -1
  25. deltacat/compute/compactor_v2/model/evaluate_compaction_result.py +0 -1
  26. deltacat/compute/compactor_v2/model/hash_bucket_input.py +9 -3
  27. deltacat/compute/compactor_v2/model/merge_file_group.py +5 -2
  28. deltacat/compute/compactor_v2/model/merge_input.py +33 -8
  29. deltacat/compute/compactor_v2/private/compaction_utils.py +167 -68
  30. deltacat/compute/compactor_v2/steps/hash_bucket.py +5 -2
  31. deltacat/compute/compactor_v2/steps/merge.py +267 -55
  32. deltacat/compute/compactor_v2/utils/content_type_params.py +34 -6
  33. deltacat/compute/compactor_v2/utils/dedupe.py +1 -1
  34. deltacat/compute/compactor_v2/utils/delta.py +5 -3
  35. deltacat/compute/compactor_v2/utils/io.py +11 -4
  36. deltacat/compute/compactor_v2/utils/merge.py +15 -2
  37. deltacat/compute/compactor_v2/utils/primary_key_index.py +28 -4
  38. deltacat/compute/compactor_v2/utils/task_options.py +45 -33
  39. deltacat/compute/converter/converter_session.py +145 -32
  40. deltacat/compute/converter/model/convert_input.py +26 -19
  41. deltacat/compute/converter/model/convert_input_files.py +33 -16
  42. deltacat/compute/converter/model/convert_result.py +35 -16
  43. deltacat/compute/converter/model/converter_session_params.py +24 -21
  44. deltacat/compute/converter/pyiceberg/catalog.py +21 -18
  45. deltacat/compute/converter/pyiceberg/overrides.py +18 -9
  46. deltacat/compute/converter/pyiceberg/update_snapshot_overrides.py +148 -100
  47. deltacat/compute/converter/steps/convert.py +157 -50
  48. deltacat/compute/converter/steps/dedupe.py +24 -11
  49. deltacat/compute/converter/utils/convert_task_options.py +27 -12
  50. deltacat/compute/converter/utils/converter_session_utils.py +126 -60
  51. deltacat/compute/converter/utils/iceberg_columns.py +8 -8
  52. deltacat/compute/converter/utils/io.py +101 -12
  53. deltacat/compute/converter/utils/s3u.py +33 -27
  54. deltacat/compute/janitor.py +205 -0
  55. deltacat/compute/jobs/client.py +19 -8
  56. deltacat/compute/resource_estimation/delta.py +38 -6
  57. deltacat/compute/resource_estimation/model.py +8 -0
  58. deltacat/constants.py +44 -0
  59. deltacat/docs/autogen/schema/__init__.py +0 -0
  60. deltacat/docs/autogen/schema/inference/__init__.py +0 -0
  61. deltacat/docs/autogen/schema/inference/generate_type_mappings.py +687 -0
  62. deltacat/docs/autogen/schema/inference/parse_json_type_mappings.py +673 -0
  63. deltacat/examples/compactor/__init__.py +0 -0
  64. deltacat/examples/compactor/aws/__init__.py +1 -0
  65. deltacat/examples/compactor/bootstrap.py +863 -0
  66. deltacat/examples/compactor/compactor.py +373 -0
  67. deltacat/examples/compactor/explorer.py +473 -0
  68. deltacat/examples/compactor/gcp/__init__.py +1 -0
  69. deltacat/examples/compactor/job_runner.py +439 -0
  70. deltacat/examples/compactor/utils/__init__.py +1 -0
  71. deltacat/examples/compactor/utils/common.py +261 -0
  72. deltacat/examples/experimental/iceberg/converter/__init__.py +0 -0
  73. deltacat/examples/experimental/iceberg/converter/beam/__init__.py +0 -0
  74. deltacat/examples/experimental/iceberg/converter/beam/app.py +226 -0
  75. deltacat/examples/experimental/iceberg/converter/beam/main.py +133 -0
  76. deltacat/examples/experimental/iceberg/converter/beam/test_workflow.py +113 -0
  77. deltacat/examples/experimental/iceberg/converter/beam/utils/__init__.py +3 -0
  78. deltacat/examples/experimental/iceberg/converter/beam/utils/common.py +174 -0
  79. deltacat/examples/experimental/iceberg/converter/beam/utils/spark.py +263 -0
  80. deltacat/exceptions.py +66 -4
  81. deltacat/experimental/catalog/iceberg/impl.py +2 -2
  82. deltacat/experimental/compatibility/__init__.py +0 -0
  83. deltacat/experimental/compatibility/backfill_locator_to_id_mappings.py +201 -0
  84. deltacat/experimental/converter_agent/__init__.py +0 -0
  85. deltacat/experimental/converter_agent/beam/__init__.py +0 -0
  86. deltacat/experimental/converter_agent/beam/managed.py +173 -0
  87. deltacat/experimental/converter_agent/table_monitor.py +479 -0
  88. deltacat/experimental/storage/iceberg/iceberg_scan_planner.py +105 -4
  89. deltacat/experimental/storage/iceberg/impl.py +5 -3
  90. deltacat/experimental/storage/iceberg/model.py +7 -3
  91. deltacat/experimental/storage/iceberg/visitor.py +119 -0
  92. deltacat/experimental/storage/rivulet/dataset.py +0 -3
  93. deltacat/experimental/storage/rivulet/metastore/delta.py +0 -2
  94. deltacat/experimental/storage/rivulet/reader/dataset_metastore.py +3 -2
  95. deltacat/io/datasource/deltacat_datasource.py +0 -1
  96. deltacat/storage/__init__.py +20 -2
  97. deltacat/storage/interface.py +54 -32
  98. deltacat/storage/main/impl.py +1494 -541
  99. deltacat/storage/model/delta.py +27 -3
  100. deltacat/storage/model/locator.py +6 -12
  101. deltacat/storage/model/manifest.py +182 -6
  102. deltacat/storage/model/metafile.py +151 -78
  103. deltacat/storage/model/namespace.py +8 -1
  104. deltacat/storage/model/partition.py +117 -42
  105. deltacat/storage/model/schema.py +2427 -159
  106. deltacat/storage/model/sort_key.py +40 -0
  107. deltacat/storage/model/stream.py +9 -2
  108. deltacat/storage/model/table.py +12 -1
  109. deltacat/storage/model/table_version.py +11 -0
  110. deltacat/storage/model/transaction.py +1184 -208
  111. deltacat/storage/model/transform.py +81 -2
  112. deltacat/storage/model/types.py +48 -26
  113. deltacat/tests/_io/test_cloudpickle_bug_fix.py +8 -4
  114. deltacat/tests/aws/test_s3u.py +2 -31
  115. deltacat/tests/catalog/main/test_catalog_impl_table_operations.py +1606 -70
  116. deltacat/tests/catalog/test_catalogs.py +54 -11
  117. deltacat/tests/catalog/test_default_catalog_impl.py +12152 -71
  118. deltacat/tests/compute/compact_partition_test_cases.py +35 -8
  119. deltacat/tests/compute/compactor/steps/test_repartition.py +12 -12
  120. deltacat/tests/compute/compactor/utils/test_io.py +124 -120
  121. deltacat/tests/compute/compactor/utils/test_round_completion_reader.py +254 -0
  122. deltacat/tests/compute/compactor_v2/test_compaction_session.py +423 -312
  123. deltacat/tests/compute/compactor_v2/utils/test_content_type_params.py +266 -0
  124. deltacat/tests/compute/compactor_v2/utils/test_primary_key_index.py +45 -0
  125. deltacat/tests/compute/compactor_v2/utils/test_task_options.py +270 -1
  126. deltacat/tests/compute/conftest.py +8 -44
  127. deltacat/tests/compute/converter/test_convert_session.py +675 -490
  128. deltacat/tests/compute/converter/utils.py +15 -6
  129. deltacat/tests/compute/resource_estimation/test_delta.py +145 -79
  130. deltacat/tests/compute/test_compact_partition_incremental.py +103 -70
  131. deltacat/tests/compute/test_compact_partition_multiple_rounds.py +89 -66
  132. deltacat/tests/compute/test_compact_partition_params.py +13 -8
  133. deltacat/tests/compute/test_compact_partition_rebase.py +77 -62
  134. deltacat/tests/compute/test_compact_partition_rebase_then_incremental.py +263 -193
  135. deltacat/tests/compute/test_janitor.py +236 -0
  136. deltacat/tests/compute/test_util_common.py +716 -43
  137. deltacat/tests/compute/test_util_constant.py +0 -1
  138. deltacat/tests/{storage/conftest.py → conftest.py} +1 -1
  139. deltacat/tests/experimental/__init__.py +1 -0
  140. deltacat/tests/experimental/compatibility/__init__.py +1 -0
  141. deltacat/tests/experimental/compatibility/test_backfill_locator_to_id_mappings.py +582 -0
  142. deltacat/tests/storage/main/test_main_storage.py +6900 -95
  143. deltacat/tests/storage/model/test_metafile_io.py +78 -173
  144. deltacat/tests/storage/model/test_partition_scheme.py +85 -0
  145. deltacat/tests/storage/model/test_schema.py +171 -0
  146. deltacat/tests/storage/model/test_schema_update.py +1925 -0
  147. deltacat/tests/storage/model/test_sort_scheme.py +90 -0
  148. deltacat/tests/storage/model/test_transaction.py +393 -48
  149. deltacat/tests/storage/model/test_transaction_history.py +886 -0
  150. deltacat/tests/test_deltacat_api.py +988 -4
  151. deltacat/tests/test_exceptions.py +9 -5
  152. deltacat/tests/test_utils/pyarrow.py +52 -21
  153. deltacat/tests/test_utils/storage.py +23 -34
  154. deltacat/tests/types/__init__.py +0 -0
  155. deltacat/tests/types/test_tables.py +104 -0
  156. deltacat/tests/utils/exceptions.py +22 -0
  157. deltacat/tests/utils/main_deltacat_storage_mock.py +31 -0
  158. deltacat/tests/utils/ray_utils/test_dataset.py +123 -5
  159. deltacat/tests/utils/test_daft.py +121 -31
  160. deltacat/tests/utils/test_numpy.py +1193 -0
  161. deltacat/tests/utils/test_pandas.py +1106 -0
  162. deltacat/tests/utils/test_polars.py +1040 -0
  163. deltacat/tests/utils/test_pyarrow.py +1370 -89
  164. deltacat/types/media.py +221 -11
  165. deltacat/types/tables.py +2329 -59
  166. deltacat/utils/arguments.py +33 -1
  167. deltacat/utils/daft.py +411 -150
  168. deltacat/utils/filesystem.py +100 -0
  169. deltacat/utils/metafile_locator.py +2 -1
  170. deltacat/utils/numpy.py +118 -26
  171. deltacat/utils/pandas.py +577 -48
  172. deltacat/utils/polars.py +658 -27
  173. deltacat/utils/pyarrow.py +1258 -213
  174. deltacat/utils/ray_utils/dataset.py +101 -10
  175. deltacat/utils/reader_compatibility_mapping.py +3083 -0
  176. deltacat/utils/url.py +56 -15
  177. deltacat-2.0.0.post1.dist-info/METADATA +1163 -0
  178. {deltacat-2.0.0b11.dist-info → deltacat-2.0.0.post1.dist-info}/RECORD +183 -145
  179. {deltacat-2.0.0b11.dist-info → deltacat-2.0.0.post1.dist-info}/WHEEL +1 -1
  180. deltacat/compute/compactor/utils/round_completion_file.py +0 -97
  181. deltacat/compute/merge_on_read/__init__.py +0 -4
  182. deltacat/compute/merge_on_read/daft.py +0 -40
  183. deltacat/compute/merge_on_read/model/merge_on_read_params.py +0 -66
  184. deltacat/compute/merge_on_read/utils/delta.py +0 -42
  185. deltacat/tests/compute/compactor/utils/test_round_completion_file.py +0 -231
  186. deltacat/tests/compute/test_util_create_table_deltas_repo.py +0 -388
  187. deltacat/tests/local_deltacat_storage/__init__.py +0 -1236
  188. deltacat/tests/local_deltacat_storage/exceptions.py +0 -10
  189. deltacat/utils/s3fs.py +0 -21
  190. deltacat-2.0.0b11.dist-info/METADATA +0 -67
  191. /deltacat/{compute/merge_on_read/model → docs}/__init__.py +0 -0
  192. /deltacat/{compute/merge_on_read/utils → docs/autogen}/__init__.py +0 -0
  193. {deltacat-2.0.0b11.dist-info → deltacat-2.0.0.post1.dist-info/licenses}/LICENSE +0 -0
  194. {deltacat-2.0.0b11.dist-info → deltacat-2.0.0.post1.dist-info}/top_level.txt +0 -0
@@ -1,43 +1,40 @@
1
- import ray
1
+ import tempfile
2
2
  import os
3
- from moto import mock_s3
3
+ from typing import Any, Callable, Dict, List, Optional, Set
4
4
  import pytest
5
- import boto3
6
- from boto3.resources.base import ServiceResource
7
5
  import pyarrow as pa
6
+ import ray
7
+
8
8
  from deltacat.io.file_object_store import FileObjectStore
9
9
  from pytest_benchmark.fixture import BenchmarkFixture
10
- import tempfile
11
10
 
12
11
  from deltacat.tests.compute.test_util_constant import (
13
- TEST_S3_RCF_BUCKET_NAME,
14
12
  DEFAULT_NUM_WORKERS,
15
13
  DEFAULT_WORKER_INSTANCE_CPUS,
16
14
  )
17
15
  from deltacat.tests.compute.test_util_common import (
18
- get_rcf,
16
+ get_rci_from_partition,
17
+ read_audit_file,
18
+ PartitionKey,
19
+ get_compacted_delta_locator_from_partition,
19
20
  )
20
- from deltacat.tests.test_utils.utils import read_s3_contents
21
- from deltacat.compute.compactor.model.compactor_version import CompactorVersion
22
21
  from deltacat.tests.compute.test_util_common import (
23
- get_compacted_delta_locator_from_rcf,
22
+ create_src_w_deltas_destination_rebase_w_deltas_strategy_main,
24
23
  )
24
+
25
+ from deltacat.compute.compactor.model.compactor_version import CompactorVersion
25
26
  from deltacat.compute.compactor.model.compaction_session_audit_info import (
26
27
  CompactionSessionAuditInfo,
27
28
  )
28
- from deltacat.tests.compute.test_util_create_table_deltas_repo import (
29
- create_src_w_deltas_destination_rebase_w_deltas_strategy,
30
- )
31
29
  from deltacat.tests.compute.compact_partition_rebase_test_cases import (
32
30
  REBASE_TEST_CASES,
33
31
  )
34
- from typing import Any, Callable, Dict, List, Optional, Set
35
- from deltacat.types.media import StorageType
32
+ from deltacat.types.media import StorageType, ContentType
36
33
  from deltacat.storage import (
37
34
  DeltaLocator,
38
35
  Partition,
36
+ metastore,
39
37
  )
40
- from deltacat.types.media import ContentType
41
38
  from deltacat.compute.compactor.model.compact_partition_params import (
42
39
  CompactPartitionParams,
43
40
  )
@@ -48,6 +45,7 @@ from deltacat.utils.placement import (
48
45
  PlacementGroupManager,
49
46
  )
50
47
 
48
+
51
49
  """
52
50
  MODULE scoped fixtures
53
51
  """
@@ -60,29 +58,24 @@ def setup_ray_cluster():
60
58
  ray.shutdown()
61
59
 
62
60
 
63
- @pytest.fixture(autouse=True, scope="module")
64
- def mock_aws_credential():
65
- os.environ["AWS_ACCESS_KEY_ID"] = "testing"
66
- os.environ["AWS_SECRET_ACCESS_ID"] = "testing"
67
- os.environ["AWS_SECURITY_TOKEN"] = "testing"
68
- os.environ["AWS_SESSION_TOKEN"] = "testing"
69
- os.environ["AWS_DEFAULT_REGION"] = "us-east-1"
70
- yield
71
-
61
+ """
62
+ FUNCTION scoped fixtures
63
+ """
72
64
 
73
- @pytest.fixture(scope="module")
74
- def s3_resource(mock_aws_credential):
75
- with mock_s3():
76
- yield boto3.resource("s3")
77
65
 
66
+ @pytest.fixture(autouse=True, scope="function")
67
+ def enable_bucketing_spec_validation(monkeypatch):
68
+ """
69
+ Enable the bucketing spec validation for all tests.
70
+ This will help catch hash bucket drift in testing.
71
+ """
72
+ import deltacat.compute.compactor_v2.steps.merge
78
73
 
79
- @pytest.fixture(autouse=True, scope="module")
80
- def setup_compaction_artifacts_s3_bucket(s3_resource: ServiceResource):
81
- s3_resource.create_bucket(
82
- ACL="authenticated-read",
83
- Bucket=TEST_S3_RCF_BUCKET_NAME,
74
+ monkeypatch.setattr(
75
+ deltacat.compute.compactor_v2.steps.merge,
76
+ "BUCKETING_SPEC_COMPLIANCE_PROFILE",
77
+ "ASSERT",
84
78
  )
85
- yield
86
79
 
87
80
 
88
81
  @pytest.mark.parametrize(
@@ -155,14 +148,13 @@ def setup_compaction_artifacts_s3_bucket(s3_resource: ServiceResource):
155
148
  ],
156
149
  ids=[test_name for test_name in REBASE_TEST_CASES],
157
150
  )
158
- def test_compact_partition_rebase_same_source_and_destination(
151
+ def test_compact_partition_rebase_same_source_and_destination_main(
159
152
  mocker,
160
- s3_resource: ServiceResource,
161
- local_deltacat_storage_kwargs: Dict[str, Any],
153
+ main_deltacat_storage_kwargs: Dict[str, Any],
162
154
  test_name: str,
163
155
  primary_keys: Set[str],
164
156
  sort_keys: List[Optional[Any]],
165
- partition_keys_param: Optional[List[Any]],
157
+ partition_keys_param: Optional[List[PartitionKey]],
166
158
  partition_values_param: List[Optional[str]],
167
159
  input_deltas_param: List[pa.Array],
168
160
  input_deltas_delta_type: str,
@@ -181,20 +173,20 @@ def test_compact_partition_rebase_same_source_and_destination(
181
173
  compact_partition_func: Callable,
182
174
  benchmark: BenchmarkFixture,
183
175
  ):
184
- import deltacat.tests.local_deltacat_storage as ds
185
-
186
- ds_mock_kwargs = local_deltacat_storage_kwargs
176
+ ds_mock_kwargs = main_deltacat_storage_kwargs
187
177
  """
188
178
  This test tests the scenario where source partition locator == destination partition locator,
189
179
  but rebase source partition locator is different.
190
180
  This scenario could occur when hash bucket count changes.
181
+
182
+ This version uses the main metastore implementation instead of local storage.
191
183
  """
192
184
  partition_keys = partition_keys_param
193
185
  (
194
186
  source_table_stream,
195
187
  _,
196
188
  rebased_table_stream,
197
- ) = create_src_w_deltas_destination_rebase_w_deltas_strategy(
189
+ ) = create_src_w_deltas_destination_rebase_w_deltas_strategy_main(
198
190
  sort_keys,
199
191
  partition_keys,
200
192
  input_deltas_param,
@@ -202,14 +194,31 @@ def test_compact_partition_rebase_same_source_and_destination(
202
194
  partition_values_param,
203
195
  ds_mock_kwargs,
204
196
  )
205
- source_partition: Partition = ds.get_partition(
197
+
198
+ # Convert partition values for partition lookup (same as in the helper function)
199
+ converted_partition_values_for_lookup = partition_values_param
200
+ if partition_values_param and partition_keys:
201
+ converted_partition_values_for_lookup = []
202
+ for i, (value, pk) in enumerate(zip(partition_values_param, partition_keys)):
203
+ if pk.key_type.value == "int": # Use .value to get string representation
204
+ converted_partition_values_for_lookup.append(int(value))
205
+ else:
206
+ converted_partition_values_for_lookup.append(value)
207
+
208
+ source_partition: Partition = metastore.get_partition(
206
209
  source_table_stream.locator,
207
- partition_values_param,
210
+ converted_partition_values_for_lookup,
208
211
  **ds_mock_kwargs,
209
212
  )
210
- rebased_partition: Partition = ds.get_partition(
213
+ rebased_partition: Partition = metastore.get_partition(
211
214
  rebased_table_stream.locator,
212
- partition_values_param,
215
+ converted_partition_values_for_lookup,
216
+ **ds_mock_kwargs,
217
+ )
218
+ all_column_names = metastore.get_table_version_column_names(
219
+ rebased_table_stream.locator.table_locator.namespace,
220
+ rebased_table_stream.locator.table_locator.table_name,
221
+ rebased_table_stream.locator.table_version_locator.table_version,
213
222
  **ds_mock_kwargs,
214
223
  )
215
224
  num_workers, worker_instance_cpu = DEFAULT_NUM_WORKERS, DEFAULT_WORKER_INSTANCE_CPUS
@@ -224,10 +233,10 @@ def test_compact_partition_rebase_same_source_and_destination(
224
233
  with tempfile.TemporaryDirectory() as test_dir:
225
234
  compact_partition_params = CompactPartitionParams.of(
226
235
  {
227
- "compaction_artifact_s3_bucket": TEST_S3_RCF_BUCKET_NAME,
236
+ "catalog": ds_mock_kwargs.get("inner"),
228
237
  "compacted_file_content_type": ContentType.PARQUET,
229
238
  "dd_max_parallelism_ratio": 1.0,
230
- "deltacat_storage": ds,
239
+ "deltacat_storage": metastore,
231
240
  "deltacat_storage_kwargs": ds_mock_kwargs,
232
241
  "destination_partition_locator": rebased_partition.locator,
233
242
  "hash_bucket_count": hash_bucket_count_param,
@@ -239,11 +248,11 @@ def test_compact_partition_rebase_same_source_and_destination(
239
248
  "object_store": FileObjectStore(test_dir),
240
249
  "pg_config": pgm,
241
250
  "primary_keys": primary_keys,
251
+ "all_column_names": all_column_names,
242
252
  "read_kwargs_provider": read_kwargs_provider_param,
243
253
  "rebase_source_partition_locator": source_partition.locator,
244
254
  "rebase_source_partition_high_watermark": rebased_partition.stream_position,
245
255
  "records_per_compacted_file": records_per_compacted_file_param,
246
- "s3_client_kwargs": {},
247
256
  "source_partition_locator": rebased_partition.locator,
248
257
  "sort_keys": sort_keys if sort_keys else None,
249
258
  "drop_duplicates": drop_duplicates_param,
@@ -260,16 +269,14 @@ def test_compact_partition_rebase_same_source_and_destination(
260
269
  object_store_put_many_spy = mocker.spy(FileObjectStore, "put_many")
261
270
 
262
271
  # execute
263
- rcf_file_s3_uri = benchmark(compact_partition_func, compact_partition_params)
272
+ benchmark(compact_partition_func, compact_partition_params)
264
273
 
265
- round_completion_info: RoundCompletionInfo = get_rcf(
266
- s3_resource, rcf_file_s3_uri
267
- )
268
- audit_bucket, audit_key = RoundCompletionInfo.get_audit_bucket_name_and_key(
269
- round_completion_info.compaction_audit_url
274
+ # Get RoundCompletionInfo from the compacted partition
275
+ round_completion_info: RoundCompletionInfo = get_rci_from_partition(
276
+ rebased_partition.locator, metastore, catalog=ds_mock_kwargs.get("inner")
270
277
  )
271
278
 
272
- # assert if RCF covers all files
279
+ # assert if RCI covers all files
273
280
  if compactor_version != CompactorVersion.V1.value:
274
281
  previous_end = None
275
282
  for start, end in round_completion_info.hb_index_to_entry_range.values():
@@ -280,8 +287,12 @@ def test_compact_partition_rebase_same_source_and_destination(
280
287
  == round_completion_info.compacted_pyarrow_write_result.files
281
288
  )
282
289
 
283
- compaction_audit_obj: Dict[str, Any] = read_s3_contents(
284
- s3_resource, audit_bucket, audit_key
290
+ # Get catalog root for audit file resolution
291
+ catalog = ds_mock_kwargs.get("inner")
292
+ catalog_root = catalog.root
293
+
294
+ compaction_audit_obj: Dict[str, Any] = read_audit_file(
295
+ round_completion_info.compaction_audit_url, catalog_root
285
296
  )
286
297
  compaction_audit: CompactionSessionAuditInfo = CompactionSessionAuditInfo(
287
298
  **compaction_audit_obj
@@ -291,13 +302,17 @@ def test_compact_partition_rebase_same_source_and_destination(
291
302
  assert (
292
303
  execute_compaction_result_spy.call_args.args[-1] is False
293
304
  ), "Table version erroneously marked as in-place compacted!"
294
- compacted_delta_locator: DeltaLocator = get_compacted_delta_locator_from_rcf(
295
- s3_resource, rcf_file_s3_uri
305
+ compacted_delta_locator: DeltaLocator = (
306
+ get_compacted_delta_locator_from_partition(
307
+ rebased_partition.locator,
308
+ metastore,
309
+ catalog=ds_mock_kwargs.get("inner"),
310
+ )
296
311
  )
297
312
  assert (
298
313
  compacted_delta_locator.stream_position == last_stream_position_to_compact
299
314
  ), "Compacted delta locator must be equal to last stream position"
300
- tables = ds.download_delta(
315
+ tables = metastore.download_delta(
301
316
  compacted_delta_locator, storage_type=StorageType.LOCAL, **ds_mock_kwargs
302
317
  )
303
318
  actual_rebase_compacted_table = pa.concat_tables(tables)