deltacat 1.1.14__py3-none-any.whl → 1.1.16__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (24) hide show
  1. deltacat/__init__.py +1 -1
  2. deltacat/compute/compactor/compaction_session.py +3 -2
  3. deltacat/compute/compactor/model/compact_partition_params.py +11 -1
  4. deltacat/compute/compactor/model/compaction_session_audit_info.py +2 -2
  5. deltacat/compute/compactor/model/delta_annotated.py +2 -4
  6. deltacat/compute/compactor/steps/hash_bucket.py +2 -3
  7. deltacat/compute/compactor_v2/compaction_session.py +27 -33
  8. deltacat/compute/compactor_v2/constants.py +4 -0
  9. deltacat/compute/compactor_v2/private/compaction_utils.py +112 -67
  10. deltacat/compute/compactor_v2/steps/merge.py +0 -3
  11. deltacat/compute/compactor_v2/utils/delta.py +2 -3
  12. deltacat/compute/compactor_v2/utils/io.py +0 -2
  13. deltacat/compute/compactor_v2/utils/merge.py +0 -1
  14. deltacat/tests/compute/compact_partition_multiple_rounds_test_cases.py +855 -0
  15. deltacat/tests/compute/compactor_v2/test_compaction_session.py +147 -1
  16. deltacat/tests/compute/test_compact_partition_multiple_rounds.py +330 -0
  17. deltacat/tests/compute/test_compact_partition_rebase.py +1 -1
  18. deltacat/tests/compute/test_util_create_table_deltas_repo.py +118 -0
  19. deltacat/tests/local_deltacat_storage/__init__.py +8 -5
  20. {deltacat-1.1.14.dist-info → deltacat-1.1.16.dist-info}/METADATA +1 -1
  21. {deltacat-1.1.14.dist-info → deltacat-1.1.16.dist-info}/RECORD +24 -22
  22. {deltacat-1.1.14.dist-info → deltacat-1.1.16.dist-info}/LICENSE +0 -0
  23. {deltacat-1.1.14.dist-info → deltacat-1.1.16.dist-info}/WHEEL +0 -0
  24. {deltacat-1.1.14.dist-info → deltacat-1.1.16.dist-info}/top_level.txt +0 -0
deltacat/__init__.py CHANGED
@@ -44,7 +44,7 @@ from deltacat.types.tables import TableWriteMode
44
44
 
45
45
  deltacat.logs.configure_deltacat_logger(logging.getLogger(__name__))
46
46
 
47
- __version__ = "1.1.14"
47
+ __version__ = "1.1.16"
48
48
 
49
49
 
50
50
  __all__ = [
@@ -688,8 +688,9 @@ def _execute_compaction_round(
688
688
  session_peak_memory
689
689
  )
690
690
 
691
- compaction_audit.save_round_completion_stats(
692
- mat_results, telemetry_time_hb + telemetry_time_dd + telemetry_time_materialize
691
+ compaction_audit.save_round_completion_stats(mat_results)
692
+ compaction_audit.set_telemetry_time_in_seconds(
693
+ telemetry_time_hb + telemetry_time_dd + telemetry_time_materialize
693
694
  )
694
695
 
695
696
  s3_utils.upload(
@@ -22,6 +22,7 @@ from deltacat.compute.compactor_v2.constants import (
22
22
  DROP_DUPLICATES,
23
23
  TOTAL_MEMORY_BUFFER_PERCENTAGE,
24
24
  DEFAULT_DISABLE_COPY_BY_REFERENCE,
25
+ DEFAULT_NUM_ROUNDS,
25
26
  )
26
27
  from deltacat.constants import PYARROW_INFLATION_MULTIPLIER
27
28
  from deltacat.compute.compactor.utils.sort_key import validate_sort_keys
@@ -102,6 +103,8 @@ class CompactPartitionParams(dict):
102
103
 
103
104
  result.metrics_config = params.get("metrics_config")
104
105
 
106
+ result.num_rounds = params.get("num_rounds", DEFAULT_NUM_ROUNDS)
107
+
105
108
  if not importlib.util.find_spec("memray"):
106
109
  result.enable_profiler = False
107
110
 
@@ -189,7 +192,6 @@ class CompactPartitionParams(dict):
189
192
  cluster_resources = self.pg_config.resource
190
193
  cluster_cpus = cluster_resources["CPU"]
191
194
  self.task_max_parallelism = cluster_cpus
192
- self["task_max_parallelism"] = self.task_max_parallelism
193
195
  return self["task_max_parallelism"]
194
196
 
195
197
  @task_max_parallelism.setter
@@ -403,6 +405,14 @@ class CompactPartitionParams(dict):
403
405
  def metrics_config(self, config: MetricsConfig) -> None:
404
406
  self["metrics_config"] = config
405
407
 
408
+ @property
409
+ def num_rounds(self) -> int:
410
+ return self["num_rounds"]
411
+
412
+ @num_rounds.setter
413
+ def num_rounds(self, num_rounds: int) -> None:
414
+ self["num_rounds"] = num_rounds
415
+
406
416
  @staticmethod
407
417
  def json_handler_for_compact_partition_params(obj):
408
418
  """
@@ -818,7 +818,8 @@ class CompactionSessionAuditInfo(dict):
818
818
  return cluster_util_after_task_latency + telemetry_time
819
819
 
820
820
  def save_round_completion_stats(
821
- self, mat_results: List[MaterializeResult], total_telemetry_time: float
821
+ self,
822
+ mat_results: List[MaterializeResult],
822
823
  ) -> None:
823
824
  """
824
825
  This method saves all the relevant stats after all the steps are completed.
@@ -888,4 +889,3 @@ class CompactionSessionAuditInfo(dict):
888
889
  )
889
890
 
890
891
  self.set_pyarrow_version(pa.__version__)
891
- self.set_telemetry_time_in_seconds(total_telemetry_time)
@@ -97,8 +97,7 @@ class DeltaAnnotated(Delta):
97
97
  for src_da in split_annotated_deltas:
98
98
  src_da_annotations = src_da.annotations
99
99
  src_da_entries = src_da.manifest.entries
100
- assert (
101
- len(src_da_annotations) == len(src_da_entries),
100
+ assert len(src_da_annotations) == len(src_da_entries), (
102
101
  f"Unexpected Error: Length of delta annotations "
103
102
  f"({len(src_da_annotations)}) doesn't mach the length of "
104
103
  f"delta manifest entries ({len(src_da_entries)}).",
@@ -152,8 +151,7 @@ class DeltaAnnotated(Delta):
152
151
  da_group_entry_count = 0
153
152
  src_da_annotations = src_da.annotations
154
153
  src_da_entries = src_da.manifest.entries
155
- assert (
156
- len(src_da_annotations) == len(src_da_entries),
154
+ assert len(src_da_annotations) == len(src_da_entries), (
157
155
  f"Unexpected Error: Length of delta annotations "
158
156
  f"({len(src_da_annotations)}) doesn't mach the length of "
159
157
  f"delta manifest entries ({len(src_da_entries)}).",
@@ -157,11 +157,10 @@ def _read_delta_file_envelopes(
157
157
  **deltacat_storage_kwargs,
158
158
  )
159
159
  annotations = annotated_delta.annotations
160
- assert (
161
- len(tables) == len(annotations),
160
+ assert len(tables) == len(annotations), (
162
161
  f"Unexpected Error: Length of downloaded delta manifest tables "
163
162
  f"({len(tables)}) doesn't match the length of delta manifest "
164
- f"annotations ({len(annotations)}).",
163
+ f"annotations ({len(annotations)})."
165
164
  )
166
165
  if not tables:
167
166
  return None, 0
@@ -1,4 +1,3 @@
1
- import numpy as np
2
1
  import importlib
3
2
  from contextlib import nullcontext
4
3
  import logging
@@ -40,9 +39,10 @@ from deltacat.utils.resources import (
40
39
  from deltacat.compute.compactor_v2.private.compaction_utils import (
41
40
  _fetch_compaction_metadata,
42
41
  _build_uniform_deltas,
42
+ _group_uniform_deltas,
43
+ _stage_new_partition,
43
44
  _run_hash_and_merge,
44
45
  _process_merge_results,
45
- _upload_compaction_audit,
46
46
  _write_new_round_completion_file,
47
47
  _commit_compaction_result,
48
48
  )
@@ -69,6 +69,10 @@ def compact_partition(params: CompactPartitionParams, **kwargs) -> Optional[str]
69
69
  assert (
70
70
  params.hash_bucket_count is not None and params.hash_bucket_count >= 1
71
71
  ), "hash_bucket_count is a required arg for compactor v2"
72
+ if params.num_rounds > 1:
73
+ assert (
74
+ not params.drop_duplicates
75
+ ), "num_rounds > 1, drop_duplicates must be False but is True"
72
76
 
73
77
  with memray.Tracker(
74
78
  "compaction_partition.bin"
@@ -144,32 +148,28 @@ def _execute_compaction(
144
148
  delete_strategy,
145
149
  delete_file_envelopes,
146
150
  ) = build_uniform_deltas_result
147
-
148
- # run merge
149
- _run_hash_and_merge_result: tuple[
150
- Optional[List[MergeResult]],
151
- np.float64,
152
- np.float64,
153
- Partition,
154
- ] = _run_hash_and_merge(
155
- params,
156
- uniform_deltas,
157
- round_completion_info,
158
- delete_strategy,
159
- delete_file_envelopes,
160
- compaction_audit,
161
- previous_compacted_delta_manifest,
162
- )
163
- (
164
- merge_results,
165
- telemetry_time_hb,
166
- telemetry_time_merge,
167
- compacted_partition,
168
- ) = _run_hash_and_merge_result
151
+ logger.info(f"Number of rounds parameter is set to: {params.num_rounds}")
152
+ uniform_deltas_grouped = _group_uniform_deltas(params, uniform_deltas)
153
+ logger.info(f"Length of grouped uniform deltas is: {len(uniform_deltas_grouped)}")
154
+ merge_result_list: List[MergeResult] = []
155
+ compacted_partition = _stage_new_partition(params)
156
+ for uniform_deltas in uniform_deltas_grouped:
157
+ # run hash and merge
158
+ _run_hash_and_merge_result: List[MergeResult] = _run_hash_and_merge(
159
+ params,
160
+ uniform_deltas,
161
+ round_completion_info,
162
+ delete_strategy,
163
+ delete_file_envelopes,
164
+ compaction_audit,
165
+ previous_compacted_delta_manifest,
166
+ compacted_partition,
167
+ )
168
+ merge_result_list.extend(_run_hash_and_merge_result)
169
169
  # process merge results
170
170
  process_merge_results: tuple[
171
171
  Delta, list[MaterializeResult], dict
172
- ] = _process_merge_results(params, merge_results, compaction_audit)
172
+ ] = _process_merge_results(params, merge_result_list, compaction_audit)
173
173
  merged_delta, mat_results, hb_id_to_entry_indices_range = process_merge_results
174
174
  # Record information, logging, and return ExecutionCompactionResult
175
175
  record_info_msg: str = f" Materialized records: {merged_delta.meta.record_count}"
@@ -198,15 +198,8 @@ def _execute_compaction(
198
198
  session_peak_memory
199
199
  )
200
200
 
201
- compaction_audit.save_round_completion_stats(
202
- mat_results, telemetry_time_hb + telemetry_time_merge
203
- )
201
+ compaction_audit.save_round_completion_stats(mat_results)
204
202
 
205
- _upload_compaction_audit(
206
- params,
207
- compaction_audit,
208
- round_completion_info,
209
- )
210
203
  compaction_result: ExecutionCompactionResult = _write_new_round_completion_file(
211
204
  params,
212
205
  compaction_audit,
@@ -216,5 +209,6 @@ def _execute_compaction(
216
209
  rcf_source_partition_locator,
217
210
  new_compacted_delta_locator,
218
211
  pyarrow_write_result,
212
+ round_completion_info,
219
213
  )
220
214
  return compaction_result
@@ -71,3 +71,7 @@ PREPARE_DELETES_METRIC_PREFIX = "prepare_deletes"
71
71
 
72
72
  # Metric prefix for compact partition method
73
73
  COMPACT_PARTITION_METRIC_PREFIX = "compact_partition"
74
+
75
+ # Number of rounds to run hash/merge for a single
76
+ # partition. (For large table support)
77
+ DEFAULT_NUM_ROUNDS = 1
@@ -4,8 +4,10 @@ import logging
4
4
  import ray
5
5
  import time
6
6
  import json
7
+ from math import ceil
7
8
 
8
9
  from deltacat.compute.compactor import (
10
+ PyArrowWriteResult,
9
11
  HighWatermark,
10
12
  RoundCompletionInfo,
11
13
  )
@@ -44,10 +46,11 @@ from deltacat.compute.compactor_v2.deletes.utils import prepare_deletes
44
46
  from deltacat.storage import (
45
47
  Delta,
46
48
  DeltaType,
47
- Stream,
48
- StreamLocator,
49
+ DeltaLocator,
49
50
  Partition,
50
51
  Manifest,
52
+ Stream,
53
+ StreamLocator,
51
54
  )
52
55
  from deltacat.compute.compactor.model.compact_partition_params import (
53
56
  CompactPartitionParams,
@@ -60,7 +63,7 @@ from deltacat.compute.compactor_v2.steps import merge as mg
60
63
  from deltacat.compute.compactor_v2.steps import hash_bucket as hb
61
64
  from deltacat.compute.compactor_v2.utils import io
62
65
 
63
- from typing import Any, List, Optional
66
+ from typing import List, Optional
64
67
  from collections import defaultdict
65
68
  from deltacat.compute.compactor.model.compaction_session_audit_info import (
66
69
  CompactionSessionAuditInfo,
@@ -123,9 +126,9 @@ def _fetch_compaction_metadata(
123
126
 
124
127
  def _build_uniform_deltas(
125
128
  params: CompactPartitionParams,
126
- mutable_compaction_audit,
127
- input_deltas,
128
- delta_discovery_start,
129
+ mutable_compaction_audit: CompactionSessionAuditInfo,
130
+ input_deltas: List[Delta],
131
+ delta_discovery_start: float,
129
132
  ) -> tuple[List[DeltaAnnotated], DeleteStrategy, List[DeleteFileEnvelope], Partition]:
130
133
 
131
134
  delete_strategy: Optional[DeleteStrategy] = None
@@ -173,18 +176,34 @@ def _build_uniform_deltas(
173
176
  )
174
177
 
175
178
 
176
- def _run_hash_and_merge(
177
- params: CompactPartitionParams,
178
- uniform_deltas,
179
- round_completion_info,
180
- delete_strategy,
181
- delete_file_envelopes,
182
- mutable_compaction_audit,
183
- previous_compacted_delta_manifest,
184
- ) -> tuple[
185
- list[MergeResult], np.int64, np.float64, np.int64, np.int64, np.float64, Partition
186
- ]:
187
- # create a new stream for this round
179
+ def _group_uniform_deltas(
180
+ params: CompactPartitionParams, uniform_deltas: List[DeltaAnnotated]
181
+ ) -> List[List[DeltaAnnotated]]:
182
+ num_deltas = len(uniform_deltas)
183
+ num_rounds = params.num_rounds
184
+ if num_rounds == 1:
185
+ return [uniform_deltas]
186
+ assert (
187
+ num_rounds > 0
188
+ ), f"num_rounds parameter should be greater than zero but is {params.num_rounds}"
189
+ assert (
190
+ num_rounds <= num_deltas
191
+ ), f"{params.num_rounds} rounds should be less than the number of uniform deltas, which is {len(uniform_deltas)}"
192
+ size = ceil(num_deltas / num_rounds)
193
+ uniform_deltas_grouped = list(
194
+ map(
195
+ lambda x: uniform_deltas[x * size : x * size + size],
196
+ list(range(num_rounds)),
197
+ )
198
+ )
199
+ num_deltas_after_grouping = sum(len(sublist) for sublist in uniform_deltas_grouped)
200
+ assert (
201
+ num_deltas_after_grouping == num_deltas
202
+ ), f"uniform_deltas_grouped expected to have {num_deltas} deltas, but has {num_deltas_after_grouping}"
203
+ return uniform_deltas_grouped
204
+
205
+
206
+ def _stage_new_partition(params: CompactPartitionParams) -> Partition:
188
207
  compacted_stream_locator: Optional[
189
208
  StreamLocator
190
209
  ] = params.destination_partition_locator.stream_locator
@@ -199,7 +218,19 @@ def _run_hash_and_merge(
199
218
  params.destination_partition_locator.partition_values,
200
219
  **params.deltacat_storage_kwargs,
201
220
  )
221
+ return compacted_partition
222
+
202
223
 
224
+ def _run_hash_and_merge(
225
+ params: CompactPartitionParams,
226
+ uniform_deltas: List[DeltaAnnotated],
227
+ round_completion_info: RoundCompletionInfo,
228
+ delete_strategy: Optional[DeleteStrategy],
229
+ delete_file_envelopes: Optional[DeleteFileEnvelope],
230
+ mutable_compaction_audit: CompactionSessionAuditInfo,
231
+ previous_compacted_delta_manifest: Optional[Manifest],
232
+ compacted_partition: Partition,
233
+ ) -> List[MergeResult]:
203
234
  telemetry_time_hb = 0
204
235
  total_input_records_count = np.int64(0)
205
236
  total_hb_record_count = np.int64(0)
@@ -257,7 +288,6 @@ def _run_hash_and_merge(
257
288
  for hb_result in hb_results:
258
289
  hb_data_processed_size_bytes += hb_result.hb_size_bytes
259
290
  total_input_records_count += hb_result.hb_record_count
260
-
261
291
  for hash_group_index, object_id_size_tuple in enumerate(
262
292
  hb_result.hash_bucket_group_to_obj_id_tuple
263
293
  ):
@@ -271,7 +301,6 @@ def _run_hash_and_merge(
271
301
  all_hash_group_idx_to_num_rows[
272
302
  hash_group_index
273
303
  ] += object_id_size_tuple[2].item()
274
-
275
304
  logger.info(
276
305
  f"Got {total_input_records_count} hash bucket records from hash bucketing step..."
277
306
  )
@@ -330,26 +359,31 @@ def _run_hash_and_merge(
330
359
  f" Deleted records: {total_deleted_record_count}, "
331
360
  )
332
361
  logger.info(record_info_msg)
333
- return (
334
- merge_results,
335
- telemetry_time_hb,
336
- telemetry_time_merge,
337
- compacted_partition,
362
+ telemetry_this_round = telemetry_time_hb + telemetry_time_merge
363
+ previous_telemetry = (
364
+ mutable_compaction_audit.telemetry_time_in_seconds
365
+ if mutable_compaction_audit.telemetry_time_in_seconds
366
+ else 0.0
338
367
  )
339
368
 
369
+ mutable_compaction_audit.set_telemetry_time_in_seconds(
370
+ telemetry_this_round + previous_telemetry
371
+ )
372
+ return merge_results
373
+
340
374
 
341
375
  def _merge(
342
376
  params: CompactPartitionParams,
343
- task_resource_options_provider,
344
- merge_resource_options_provider,
345
- all_hash_group_idx_to_size_bytes,
346
- all_hash_group_idx_to_num_rows,
347
- round_completion_info,
348
- previous_compacted_delta_manifest,
349
- all_hash_group_idx_to_obj_id,
350
- compacted_partition,
351
- delete_strategy,
352
- delete_file_envelopes,
377
+ task_resource_options_provider: callable,
378
+ merge_resource_options_provider: callable,
379
+ all_hash_group_idx_to_size_bytes: dict,
380
+ all_hash_group_idx_to_num_rows: dict,
381
+ round_completion_info: RoundCompletionInfo,
382
+ previous_compacted_delta_manifest: Manifest,
383
+ all_hash_group_idx_to_obj_id: dict,
384
+ compacted_partition: Partition,
385
+ delete_strategy: DeleteStrategy,
386
+ delete_file_envelopes: DeleteFileEnvelope,
353
387
  ) -> tuple[List[MergeResult], float]:
354
388
  merge_options_provider = functools.partial(
355
389
  task_resource_options_provider,
@@ -416,8 +450,9 @@ def _merge(
416
450
 
417
451
  def _hash_bucket(
418
452
  params: CompactPartitionParams,
419
- uniform_deltas,
420
- ):
453
+ uniform_deltas: List[DeltaAnnotated],
454
+ ) -> tuple[List[HashBucketResult], float]:
455
+
421
456
  hb_options_provider = functools.partial(
422
457
  task_resource_options_provider,
423
458
  pg_config=params.pg_config,
@@ -455,7 +490,6 @@ def _hash_bucket(
455
490
  options_provider=hb_options_provider,
456
491
  kwargs_provider=hash_bucket_input_provider,
457
492
  )
458
-
459
493
  hb_invoke_end = time.monotonic()
460
494
 
461
495
  logger.info(f"Getting {len(hb_tasks_pending)} hash bucket results...")
@@ -467,15 +501,15 @@ def _hash_bucket(
467
501
 
468
502
  def _run_local_merge(
469
503
  params: CompactPartitionParams,
470
- uniform_deltas,
471
- compacted_partition,
472
- round_completion_info,
473
- delete_strategy,
474
- delete_file_envelopes,
475
- mutable_compaction_audit,
476
- previous_compacted_delta_manifest,
477
- total_input_records_count,
478
- ) -> tuple[list[Any], Any]:
504
+ uniform_deltas: List[DeltaAnnotated],
505
+ compacted_partition: Partition,
506
+ round_completion_info: RoundCompletionInfo,
507
+ delete_strategy: Optional[DeleteStrategy],
508
+ delete_file_envelopes: Optional[DeleteFileEnvelope],
509
+ mutable_compaction_audit: CompactionSessionAuditInfo,
510
+ previous_compacted_delta_manifest: Optional[Manifest],
511
+ total_input_records_count: np.int64,
512
+ ) -> tuple[List[MergeResult], np.int64]:
479
513
  local_merge_input: MergeInput = generate_local_merge_input(
480
514
  params,
481
515
  uniform_deltas,
@@ -513,8 +547,10 @@ def _run_local_merge(
513
547
 
514
548
 
515
549
  def _process_merge_results(
516
- params: CompactPartitionParams, merge_results, mutable_compaction_audit
517
- ) -> tuple[Delta, list[MaterializeResult], dict]:
550
+ params: CompactPartitionParams,
551
+ merge_results: List[MergeResult],
552
+ mutable_compaction_audit: CompactionSessionAuditInfo,
553
+ ) -> tuple[Delta, List[MaterializeResult], dict]:
518
554
  mat_results = []
519
555
  for merge_result in merge_results:
520
556
  mat_results.extend(merge_result.materialize_results)
@@ -522,19 +558,23 @@ def _process_merge_results(
522
558
  mat_results: List[MaterializeResult] = sorted(
523
559
  mat_results, key=lambda m: m.task_index
524
560
  )
525
-
526
561
  hb_id_to_entry_indices_range = {}
527
562
  file_index = 0
528
563
  previous_task_index = -1
529
564
 
565
+ duplicate_hash_bucket_mat_results = 0
530
566
  for mat_result in mat_results:
531
567
  assert (
532
568
  mat_result.pyarrow_write_result.files >= 1
533
- ), "Atleast one file must be materialized"
534
- assert (
535
- mat_result.task_index != previous_task_index
536
- ), f"Multiple materialize results found for a hash bucket: {mat_result.task_index}"
537
-
569
+ ), "At least one file must be materialized"
570
+ if mat_result.task_index == previous_task_index:
571
+ duplicate_hash_bucket_mat_results += 1
572
+ else:
573
+ duplicate_hash_bucket_mat_results = 0
574
+ assert duplicate_hash_bucket_mat_results < params.num_rounds, (
575
+ f"Duplicate record count ({duplicate_hash_bucket_mat_results}) is as large "
576
+ f"as or greater than params.num_rounds, which is {params.num_rounds}"
577
+ )
538
578
  hb_id_to_entry_indices_range[str(mat_result.task_index)] = (
539
579
  file_index,
540
580
  file_index + mat_result.pyarrow_write_result.files,
@@ -548,9 +588,7 @@ def _process_merge_results(
548
588
  str(json.dumps(mutable_compaction_audit)),
549
589
  **params.s3_client_kwargs,
550
590
  )
551
-
552
591
  deltas: List[Delta] = [m.delta for m in mat_results]
553
-
554
592
  # Note: An appropriate last stream position must be set
555
593
  # to avoid correctness issue.
556
594
  merged_delta: Delta = Delta.merge_deltas(
@@ -561,10 +599,10 @@ def _process_merge_results(
561
599
  return merged_delta, mat_results, hb_id_to_entry_indices_range
562
600
 
563
601
 
564
- def _upload_compaction_audit(
602
+ def _update_and_upload_compaction_audit(
565
603
  params: CompactPartitionParams,
566
- mutable_compaction_audit,
567
- round_completion_info,
604
+ mutable_compaction_audit: CompactionSessionAuditInfo,
605
+ round_completion_info: Optional[RoundCompletionInfo] = None,
568
606
  ) -> None:
569
607
 
570
608
  # After all incremental delta related calculations, we update
@@ -593,13 +631,14 @@ def _upload_compaction_audit(
593
631
 
594
632
  def _write_new_round_completion_file(
595
633
  params: CompactPartitionParams,
596
- mutable_compaction_audit,
597
- compacted_partition,
598
- audit_url,
599
- hb_id_to_entry_indices_range,
600
- rcf_source_partition_locator,
601
- new_compacted_delta_locator,
602
- pyarrow_write_result,
634
+ mutable_compaction_audit: CompactionSessionAuditInfo,
635
+ compacted_partition: Partition,
636
+ audit_url: str,
637
+ hb_id_to_entry_indices_range: dict,
638
+ rcf_source_partition_locator: rcf.PartitionLocator,
639
+ new_compacted_delta_locator: DeltaLocator,
640
+ pyarrow_write_result: PyArrowWriteResult,
641
+ prev_round_completion_info: Optional[RoundCompletionInfo] = None,
603
642
  ) -> ExecutionCompactionResult:
604
643
  input_inflation = None
605
644
  input_average_record_size_bytes = None
@@ -627,6 +666,12 @@ def _write_new_round_completion_file(
627
666
  f" and average record size={input_average_record_size_bytes}"
628
667
  )
629
668
 
669
+ _update_and_upload_compaction_audit(
670
+ params,
671
+ mutable_compaction_audit,
672
+ prev_round_completion_info,
673
+ )
674
+
630
675
  new_round_completion_info = RoundCompletionInfo.of(
631
676
  high_watermark=params.last_stream_position_to_compact,
632
677
  compacted_delta_locator=new_compacted_delta_locator,
@@ -177,13 +177,10 @@ def _download_compacted_table(
177
177
 
178
178
  if str(hb_index) not in hb_index_to_indices:
179
179
  return None
180
-
181
180
  indices = hb_index_to_indices[str(hb_index)]
182
-
183
181
  assert (
184
182
  indices is not None and len(indices) == 2
185
183
  ), "indices should not be none and contains exactly two elements"
186
-
187
184
  for offset in range(indices[1] - indices[0]):
188
185
  table = deltacat_storage.download_delta_manifest_entry(
189
186
  rcf.compacted_delta_locator,
@@ -42,11 +42,10 @@ def read_delta_file_envelopes(
42
42
  **deltacat_storage_kwargs,
43
43
  )
44
44
  annotations = annotated_delta.annotations
45
- assert (
46
- len(tables) == len(annotations),
45
+ assert len(tables) == len(annotations), (
47
46
  f"Unexpected Error: Length of downloaded delta manifest tables "
48
47
  f"({len(tables)}) doesn't match the length of delta manifest "
49
- f"annotations ({len(annotations)}).",
48
+ f"annotations ({len(annotations)})."
50
49
  )
51
50
  if not tables:
52
51
  return None, 0, 0
@@ -61,7 +61,6 @@ def discover_deltas(
61
61
  )
62
62
 
63
63
  result.extend(delta_source_incremental_deltas)
64
-
65
64
  logger.info(
66
65
  f"Length of input deltas from delta source table is {len(delta_source_incremental_deltas)}"
67
66
  f" from ({previous_compacted_high_watermark}, {last_stream_position_to_compact}]"
@@ -132,7 +131,6 @@ def create_uniform_input_deltas(
132
131
  size_estimation_function = functools.partial(
133
132
  estimate_manifest_entry_size_bytes, previous_inflation=previous_inflation
134
133
  )
135
-
136
134
  rebatched_da_list = DeltaAnnotated.rebatch(
137
135
  input_da_list,
138
136
  min_delta_bytes=min_delta_bytes,
@@ -109,7 +109,6 @@ def generate_local_merge_input(
109
109
  A MergeInput object
110
110
 
111
111
  """
112
-
113
112
  return MergeInput.of(
114
113
  merge_file_groups_provider=LocalMergeFileGroupsProvider(
115
114
  annotated_deltas,