deltacat 1.1.14__py3-none-any.whl → 1.1.15__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (24) hide show
  1. deltacat/__init__.py +1 -1
  2. deltacat/compute/compactor/compaction_session.py +3 -2
  3. deltacat/compute/compactor/model/compact_partition_params.py +11 -1
  4. deltacat/compute/compactor/model/compaction_session_audit_info.py +2 -2
  5. deltacat/compute/compactor/model/delta_annotated.py +2 -4
  6. deltacat/compute/compactor/steps/hash_bucket.py +2 -3
  7. deltacat/compute/compactor_v2/compaction_session.py +26 -27
  8. deltacat/compute/compactor_v2/constants.py +4 -0
  9. deltacat/compute/compactor_v2/private/compaction_utils.py +103 -66
  10. deltacat/compute/compactor_v2/steps/merge.py +0 -3
  11. deltacat/compute/compactor_v2/utils/delta.py +2 -3
  12. deltacat/compute/compactor_v2/utils/io.py +0 -2
  13. deltacat/compute/compactor_v2/utils/merge.py +0 -1
  14. deltacat/tests/compute/compact_partition_multiple_rounds_test_cases.py +855 -0
  15. deltacat/tests/compute/compactor_v2/test_compaction_session.py +1 -1
  16. deltacat/tests/compute/test_compact_partition_multiple_rounds.py +330 -0
  17. deltacat/tests/compute/test_compact_partition_rebase.py +1 -1
  18. deltacat/tests/compute/test_util_create_table_deltas_repo.py +118 -0
  19. deltacat/tests/local_deltacat_storage/__init__.py +8 -5
  20. {deltacat-1.1.14.dist-info → deltacat-1.1.15.dist-info}/METADATA +1 -1
  21. {deltacat-1.1.14.dist-info → deltacat-1.1.15.dist-info}/RECORD +24 -22
  22. {deltacat-1.1.14.dist-info → deltacat-1.1.15.dist-info}/LICENSE +0 -0
  23. {deltacat-1.1.14.dist-info → deltacat-1.1.15.dist-info}/WHEEL +0 -0
  24. {deltacat-1.1.14.dist-info → deltacat-1.1.15.dist-info}/top_level.txt +0 -0
deltacat/__init__.py CHANGED
@@ -44,7 +44,7 @@ from deltacat.types.tables import TableWriteMode
44
44
 
45
45
  deltacat.logs.configure_deltacat_logger(logging.getLogger(__name__))
46
46
 
47
- __version__ = "1.1.14"
47
+ __version__ = "1.1.15"
48
48
 
49
49
 
50
50
  __all__ = [
@@ -688,8 +688,9 @@ def _execute_compaction_round(
688
688
  session_peak_memory
689
689
  )
690
690
 
691
- compaction_audit.save_round_completion_stats(
692
- mat_results, telemetry_time_hb + telemetry_time_dd + telemetry_time_materialize
691
+ compaction_audit.save_round_completion_stats(mat_results)
692
+ compaction_audit.set_telemetry_time_in_seconds(
693
+ telemetry_time_hb + telemetry_time_dd + telemetry_time_materialize
693
694
  )
694
695
 
695
696
  s3_utils.upload(
@@ -22,6 +22,7 @@ from deltacat.compute.compactor_v2.constants import (
22
22
  DROP_DUPLICATES,
23
23
  TOTAL_MEMORY_BUFFER_PERCENTAGE,
24
24
  DEFAULT_DISABLE_COPY_BY_REFERENCE,
25
+ DEFAULT_NUM_ROUNDS,
25
26
  )
26
27
  from deltacat.constants import PYARROW_INFLATION_MULTIPLIER
27
28
  from deltacat.compute.compactor.utils.sort_key import validate_sort_keys
@@ -102,6 +103,8 @@ class CompactPartitionParams(dict):
102
103
 
103
104
  result.metrics_config = params.get("metrics_config")
104
105
 
106
+ result.num_rounds = params.get("num_rounds", DEFAULT_NUM_ROUNDS)
107
+
105
108
  if not importlib.util.find_spec("memray"):
106
109
  result.enable_profiler = False
107
110
 
@@ -189,7 +192,6 @@ class CompactPartitionParams(dict):
189
192
  cluster_resources = self.pg_config.resource
190
193
  cluster_cpus = cluster_resources["CPU"]
191
194
  self.task_max_parallelism = cluster_cpus
192
- self["task_max_parallelism"] = self.task_max_parallelism
193
195
  return self["task_max_parallelism"]
194
196
 
195
197
  @task_max_parallelism.setter
@@ -403,6 +405,14 @@ class CompactPartitionParams(dict):
403
405
  def metrics_config(self, config: MetricsConfig) -> None:
404
406
  self["metrics_config"] = config
405
407
 
408
+ @property
409
+ def num_rounds(self) -> int:
410
+ return self["num_rounds"]
411
+
412
+ @num_rounds.setter
413
+ def num_rounds(self, num_rounds: int) -> None:
414
+ self["num_rounds"] = num_rounds
415
+
406
416
  @staticmethod
407
417
  def json_handler_for_compact_partition_params(obj):
408
418
  """
@@ -818,7 +818,8 @@ class CompactionSessionAuditInfo(dict):
818
818
  return cluster_util_after_task_latency + telemetry_time
819
819
 
820
820
  def save_round_completion_stats(
821
- self, mat_results: List[MaterializeResult], total_telemetry_time: float
821
+ self,
822
+ mat_results: List[MaterializeResult],
822
823
  ) -> None:
823
824
  """
824
825
  This method saves all the relevant stats after all the steps are completed.
@@ -888,4 +889,3 @@ class CompactionSessionAuditInfo(dict):
888
889
  )
889
890
 
890
891
  self.set_pyarrow_version(pa.__version__)
891
- self.set_telemetry_time_in_seconds(total_telemetry_time)
@@ -97,8 +97,7 @@ class DeltaAnnotated(Delta):
97
97
  for src_da in split_annotated_deltas:
98
98
  src_da_annotations = src_da.annotations
99
99
  src_da_entries = src_da.manifest.entries
100
- assert (
101
- len(src_da_annotations) == len(src_da_entries),
100
+ assert len(src_da_annotations) == len(src_da_entries), (
102
101
  f"Unexpected Error: Length of delta annotations "
103
102
  f"({len(src_da_annotations)}) doesn't mach the length of "
104
103
  f"delta manifest entries ({len(src_da_entries)}).",
@@ -152,8 +151,7 @@ class DeltaAnnotated(Delta):
152
151
  da_group_entry_count = 0
153
152
  src_da_annotations = src_da.annotations
154
153
  src_da_entries = src_da.manifest.entries
155
- assert (
156
- len(src_da_annotations) == len(src_da_entries),
154
+ assert len(src_da_annotations) == len(src_da_entries), (
157
155
  f"Unexpected Error: Length of delta annotations "
158
156
  f"({len(src_da_annotations)}) doesn't mach the length of "
159
157
  f"delta manifest entries ({len(src_da_entries)}).",
@@ -157,11 +157,10 @@ def _read_delta_file_envelopes(
157
157
  **deltacat_storage_kwargs,
158
158
  )
159
159
  annotations = annotated_delta.annotations
160
- assert (
161
- len(tables) == len(annotations),
160
+ assert len(tables) == len(annotations), (
162
161
  f"Unexpected Error: Length of downloaded delta manifest tables "
163
162
  f"({len(tables)}) doesn't match the length of delta manifest "
164
- f"annotations ({len(annotations)}).",
163
+ f"annotations ({len(annotations)})."
165
164
  )
166
165
  if not tables:
167
166
  return None, 0
@@ -1,4 +1,3 @@
1
- import numpy as np
2
1
  import importlib
3
2
  from contextlib import nullcontext
4
3
  import logging
@@ -40,6 +39,8 @@ from deltacat.utils.resources import (
40
39
  from deltacat.compute.compactor_v2.private.compaction_utils import (
41
40
  _fetch_compaction_metadata,
42
41
  _build_uniform_deltas,
42
+ _group_uniform_deltas,
43
+ _stage_new_partition,
43
44
  _run_hash_and_merge,
44
45
  _process_merge_results,
45
46
  _upload_compaction_audit,
@@ -69,6 +70,10 @@ def compact_partition(params: CompactPartitionParams, **kwargs) -> Optional[str]
69
70
  assert (
70
71
  params.hash_bucket_count is not None and params.hash_bucket_count >= 1
71
72
  ), "hash_bucket_count is a required arg for compactor v2"
73
+ if params.num_rounds > 1:
74
+ assert (
75
+ not params.drop_duplicates
76
+ ), "num_rounds > 1, drop_duplicates must be False but is True"
72
77
 
73
78
  with memray.Tracker(
74
79
  "compaction_partition.bin"
@@ -144,32 +149,28 @@ def _execute_compaction(
144
149
  delete_strategy,
145
150
  delete_file_envelopes,
146
151
  ) = build_uniform_deltas_result
147
-
148
- # run merge
149
- _run_hash_and_merge_result: tuple[
150
- Optional[List[MergeResult]],
151
- np.float64,
152
- np.float64,
153
- Partition,
154
- ] = _run_hash_and_merge(
155
- params,
156
- uniform_deltas,
157
- round_completion_info,
158
- delete_strategy,
159
- delete_file_envelopes,
160
- compaction_audit,
161
- previous_compacted_delta_manifest,
162
- )
163
- (
164
- merge_results,
165
- telemetry_time_hb,
166
- telemetry_time_merge,
167
- compacted_partition,
168
- ) = _run_hash_and_merge_result
152
+ logger.info(f"Number of rounds parameter is set to: {params.num_rounds}")
153
+ uniform_deltas_grouped = _group_uniform_deltas(params, uniform_deltas)
154
+ logger.info(f"Length of grouped uniform deltas is: {len(uniform_deltas_grouped)}")
155
+ merge_result_list: List[MergeResult] = []
156
+ compacted_partition = _stage_new_partition(params)
157
+ for uniform_deltas in uniform_deltas_grouped:
158
+ # run hash and merge
159
+ _run_hash_and_merge_result: List[MergeResult] = _run_hash_and_merge(
160
+ params,
161
+ uniform_deltas,
162
+ round_completion_info,
163
+ delete_strategy,
164
+ delete_file_envelopes,
165
+ compaction_audit,
166
+ previous_compacted_delta_manifest,
167
+ compacted_partition,
168
+ )
169
+ merge_result_list.extend(_run_hash_and_merge_result)
169
170
  # process merge results
170
171
  process_merge_results: tuple[
171
172
  Delta, list[MaterializeResult], dict
172
- ] = _process_merge_results(params, merge_results, compaction_audit)
173
+ ] = _process_merge_results(params, merge_result_list, compaction_audit)
173
174
  merged_delta, mat_results, hb_id_to_entry_indices_range = process_merge_results
174
175
  # Record information, logging, and return ExecutionCompactionResult
175
176
  record_info_msg: str = f" Materialized records: {merged_delta.meta.record_count}"
@@ -198,9 +199,7 @@ def _execute_compaction(
198
199
  session_peak_memory
199
200
  )
200
201
 
201
- compaction_audit.save_round_completion_stats(
202
- mat_results, telemetry_time_hb + telemetry_time_merge
203
- )
202
+ compaction_audit.save_round_completion_stats(mat_results)
204
203
 
205
204
  _upload_compaction_audit(
206
205
  params,
@@ -71,3 +71,7 @@ PREPARE_DELETES_METRIC_PREFIX = "prepare_deletes"
71
71
 
72
72
  # Metric prefix for compact partition method
73
73
  COMPACT_PARTITION_METRIC_PREFIX = "compact_partition"
74
+
75
+ # Number of rounds to run hash/merge for a single
76
+ # partition. (For large table support)
77
+ DEFAULT_NUM_ROUNDS = 1
@@ -4,8 +4,10 @@ import logging
4
4
  import ray
5
5
  import time
6
6
  import json
7
+ from math import ceil
7
8
 
8
9
  from deltacat.compute.compactor import (
10
+ PyArrowWriteResult,
9
11
  HighWatermark,
10
12
  RoundCompletionInfo,
11
13
  )
@@ -44,10 +46,11 @@ from deltacat.compute.compactor_v2.deletes.utils import prepare_deletes
44
46
  from deltacat.storage import (
45
47
  Delta,
46
48
  DeltaType,
47
- Stream,
48
- StreamLocator,
49
+ DeltaLocator,
49
50
  Partition,
50
51
  Manifest,
52
+ Stream,
53
+ StreamLocator,
51
54
  )
52
55
  from deltacat.compute.compactor.model.compact_partition_params import (
53
56
  CompactPartitionParams,
@@ -60,7 +63,7 @@ from deltacat.compute.compactor_v2.steps import merge as mg
60
63
  from deltacat.compute.compactor_v2.steps import hash_bucket as hb
61
64
  from deltacat.compute.compactor_v2.utils import io
62
65
 
63
- from typing import Any, List, Optional
66
+ from typing import List, Optional
64
67
  from collections import defaultdict
65
68
  from deltacat.compute.compactor.model.compaction_session_audit_info import (
66
69
  CompactionSessionAuditInfo,
@@ -123,9 +126,9 @@ def _fetch_compaction_metadata(
123
126
 
124
127
  def _build_uniform_deltas(
125
128
  params: CompactPartitionParams,
126
- mutable_compaction_audit,
127
- input_deltas,
128
- delta_discovery_start,
129
+ mutable_compaction_audit: CompactionSessionAuditInfo,
130
+ input_deltas: List[Delta],
131
+ delta_discovery_start: float,
129
132
  ) -> tuple[List[DeltaAnnotated], DeleteStrategy, List[DeleteFileEnvelope], Partition]:
130
133
 
131
134
  delete_strategy: Optional[DeleteStrategy] = None
@@ -173,18 +176,34 @@ def _build_uniform_deltas(
173
176
  )
174
177
 
175
178
 
176
- def _run_hash_and_merge(
177
- params: CompactPartitionParams,
178
- uniform_deltas,
179
- round_completion_info,
180
- delete_strategy,
181
- delete_file_envelopes,
182
- mutable_compaction_audit,
183
- previous_compacted_delta_manifest,
184
- ) -> tuple[
185
- list[MergeResult], np.int64, np.float64, np.int64, np.int64, np.float64, Partition
186
- ]:
187
- # create a new stream for this round
179
+ def _group_uniform_deltas(
180
+ params: CompactPartitionParams, uniform_deltas: List[DeltaAnnotated]
181
+ ) -> List[List[DeltaAnnotated]]:
182
+ num_deltas = len(uniform_deltas)
183
+ num_rounds = params.num_rounds
184
+ if num_rounds == 1:
185
+ return [uniform_deltas]
186
+ assert (
187
+ num_rounds > 0
188
+ ), f"num_rounds parameter should be greater than zero but is {params.num_rounds}"
189
+ assert (
190
+ num_rounds <= num_deltas
191
+ ), f"{params.num_rounds} rounds should be less than the number of uniform deltas, which is {len(uniform_deltas)}"
192
+ size = ceil(num_deltas / num_rounds)
193
+ uniform_deltas_grouped = list(
194
+ map(
195
+ lambda x: uniform_deltas[x * size : x * size + size],
196
+ list(range(num_rounds)),
197
+ )
198
+ )
199
+ num_deltas_after_grouping = sum(len(sublist) for sublist in uniform_deltas_grouped)
200
+ assert (
201
+ num_deltas_after_grouping == num_deltas
202
+ ), f"uniform_deltas_grouped expected to have {num_deltas} deltas, but has {num_deltas_after_grouping}"
203
+ return uniform_deltas_grouped
204
+
205
+
206
+ def _stage_new_partition(params: CompactPartitionParams) -> Partition:
188
207
  compacted_stream_locator: Optional[
189
208
  StreamLocator
190
209
  ] = params.destination_partition_locator.stream_locator
@@ -199,7 +218,19 @@ def _run_hash_and_merge(
199
218
  params.destination_partition_locator.partition_values,
200
219
  **params.deltacat_storage_kwargs,
201
220
  )
221
+ return compacted_partition
202
222
 
223
+
224
+ def _run_hash_and_merge(
225
+ params: CompactPartitionParams,
226
+ uniform_deltas: List[DeltaAnnotated],
227
+ round_completion_info: RoundCompletionInfo,
228
+ delete_strategy: Optional[DeleteStrategy],
229
+ delete_file_envelopes: Optional[DeleteFileEnvelope],
230
+ mutable_compaction_audit: CompactionSessionAuditInfo,
231
+ previous_compacted_delta_manifest: Optional[Manifest],
232
+ compacted_partition: Partition,
233
+ ) -> List[MergeResult]:
203
234
  telemetry_time_hb = 0
204
235
  total_input_records_count = np.int64(0)
205
236
  total_hb_record_count = np.int64(0)
@@ -257,7 +288,6 @@ def _run_hash_and_merge(
257
288
  for hb_result in hb_results:
258
289
  hb_data_processed_size_bytes += hb_result.hb_size_bytes
259
290
  total_input_records_count += hb_result.hb_record_count
260
-
261
291
  for hash_group_index, object_id_size_tuple in enumerate(
262
292
  hb_result.hash_bucket_group_to_obj_id_tuple
263
293
  ):
@@ -271,7 +301,6 @@ def _run_hash_and_merge(
271
301
  all_hash_group_idx_to_num_rows[
272
302
  hash_group_index
273
303
  ] += object_id_size_tuple[2].item()
274
-
275
304
  logger.info(
276
305
  f"Got {total_input_records_count} hash bucket records from hash bucketing step..."
277
306
  )
@@ -330,26 +359,30 @@ def _run_hash_and_merge(
330
359
  f" Deleted records: {total_deleted_record_count}, "
331
360
  )
332
361
  logger.info(record_info_msg)
333
- return (
334
- merge_results,
335
- telemetry_time_hb,
336
- telemetry_time_merge,
337
- compacted_partition,
362
+ telemetry_this_round = telemetry_time_hb + telemetry_time_merge
363
+ previous_telemetry = (
364
+ mutable_compaction_audit.telemetry_time_in_seconds
365
+ if mutable_compaction_audit.telemetry_time_in_seconds
366
+ else 0.0
338
367
  )
368
+ mutable_compaction_audit.set_telemetry_time_in_seconds(
369
+ telemetry_this_round + previous_telemetry
370
+ )
371
+ return merge_results
339
372
 
340
373
 
341
374
  def _merge(
342
375
  params: CompactPartitionParams,
343
- task_resource_options_provider,
344
- merge_resource_options_provider,
345
- all_hash_group_idx_to_size_bytes,
346
- all_hash_group_idx_to_num_rows,
347
- round_completion_info,
348
- previous_compacted_delta_manifest,
349
- all_hash_group_idx_to_obj_id,
350
- compacted_partition,
351
- delete_strategy,
352
- delete_file_envelopes,
376
+ task_resource_options_provider: callable,
377
+ merge_resource_options_provider: callable,
378
+ all_hash_group_idx_to_size_bytes: dict,
379
+ all_hash_group_idx_to_num_rows: dict,
380
+ round_completion_info: RoundCompletionInfo,
381
+ previous_compacted_delta_manifest: Manifest,
382
+ all_hash_group_idx_to_obj_id: dict,
383
+ compacted_partition: Partition,
384
+ delete_strategy: DeleteStrategy,
385
+ delete_file_envelopes: DeleteFileEnvelope,
353
386
  ) -> tuple[List[MergeResult], float]:
354
387
  merge_options_provider = functools.partial(
355
388
  task_resource_options_provider,
@@ -416,8 +449,9 @@ def _merge(
416
449
 
417
450
  def _hash_bucket(
418
451
  params: CompactPartitionParams,
419
- uniform_deltas,
420
- ):
452
+ uniform_deltas: List[DeltaAnnotated],
453
+ ) -> tuple[List[HashBucketResult], float]:
454
+
421
455
  hb_options_provider = functools.partial(
422
456
  task_resource_options_provider,
423
457
  pg_config=params.pg_config,
@@ -455,7 +489,6 @@ def _hash_bucket(
455
489
  options_provider=hb_options_provider,
456
490
  kwargs_provider=hash_bucket_input_provider,
457
491
  )
458
-
459
492
  hb_invoke_end = time.monotonic()
460
493
 
461
494
  logger.info(f"Getting {len(hb_tasks_pending)} hash bucket results...")
@@ -467,15 +500,15 @@ def _hash_bucket(
467
500
 
468
501
  def _run_local_merge(
469
502
  params: CompactPartitionParams,
470
- uniform_deltas,
471
- compacted_partition,
472
- round_completion_info,
473
- delete_strategy,
474
- delete_file_envelopes,
475
- mutable_compaction_audit,
476
- previous_compacted_delta_manifest,
477
- total_input_records_count,
478
- ) -> tuple[list[Any], Any]:
503
+ uniform_deltas: List[DeltaAnnotated],
504
+ compacted_partition: Partition,
505
+ round_completion_info: RoundCompletionInfo,
506
+ delete_strategy: Optional[DeleteStrategy],
507
+ delete_file_envelopes: Optional[DeleteFileEnvelope],
508
+ mutable_compaction_audit: CompactionSessionAuditInfo,
509
+ previous_compacted_delta_manifest: Optional[Manifest],
510
+ total_input_records_count: np.int64,
511
+ ) -> tuple[List[MergeResult], np.int64]:
479
512
  local_merge_input: MergeInput = generate_local_merge_input(
480
513
  params,
481
514
  uniform_deltas,
@@ -513,8 +546,10 @@ def _run_local_merge(
513
546
 
514
547
 
515
548
  def _process_merge_results(
516
- params: CompactPartitionParams, merge_results, mutable_compaction_audit
517
- ) -> tuple[Delta, list[MaterializeResult], dict]:
549
+ params: CompactPartitionParams,
550
+ merge_results: List[MergeResult],
551
+ mutable_compaction_audit: CompactionSessionAuditInfo,
552
+ ) -> tuple[Delta, List[MaterializeResult], dict]:
518
553
  mat_results = []
519
554
  for merge_result in merge_results:
520
555
  mat_results.extend(merge_result.materialize_results)
@@ -522,19 +557,23 @@ def _process_merge_results(
522
557
  mat_results: List[MaterializeResult] = sorted(
523
558
  mat_results, key=lambda m: m.task_index
524
559
  )
525
-
526
560
  hb_id_to_entry_indices_range = {}
527
561
  file_index = 0
528
562
  previous_task_index = -1
529
563
 
564
+ duplicate_hash_bucket_mat_results = 0
530
565
  for mat_result in mat_results:
531
566
  assert (
532
567
  mat_result.pyarrow_write_result.files >= 1
533
- ), "Atleast one file must be materialized"
534
- assert (
535
- mat_result.task_index != previous_task_index
536
- ), f"Multiple materialize results found for a hash bucket: {mat_result.task_index}"
537
-
568
+ ), "At least one file must be materialized"
569
+ if mat_result.task_index == previous_task_index:
570
+ duplicate_hash_bucket_mat_results += 1
571
+ else:
572
+ duplicate_hash_bucket_mat_results = 0
573
+ assert duplicate_hash_bucket_mat_results < params.num_rounds, (
574
+ f"Duplicate record count ({duplicate_hash_bucket_mat_results}) is as large "
575
+ f"as or greater than params.num_rounds, which is {params.num_rounds}"
576
+ )
538
577
  hb_id_to_entry_indices_range[str(mat_result.task_index)] = (
539
578
  file_index,
540
579
  file_index + mat_result.pyarrow_write_result.files,
@@ -548,9 +587,7 @@ def _process_merge_results(
548
587
  str(json.dumps(mutable_compaction_audit)),
549
588
  **params.s3_client_kwargs,
550
589
  )
551
-
552
590
  deltas: List[Delta] = [m.delta for m in mat_results]
553
-
554
591
  # Note: An appropriate last stream position must be set
555
592
  # to avoid correctness issue.
556
593
  merged_delta: Delta = Delta.merge_deltas(
@@ -563,8 +600,8 @@ def _process_merge_results(
563
600
 
564
601
  def _upload_compaction_audit(
565
602
  params: CompactPartitionParams,
566
- mutable_compaction_audit,
567
- round_completion_info,
603
+ mutable_compaction_audit: CompactionSessionAuditInfo,
604
+ round_completion_info: RoundCompletionInfo,
568
605
  ) -> None:
569
606
 
570
607
  # After all incremental delta related calculations, we update
@@ -593,13 +630,13 @@ def _upload_compaction_audit(
593
630
 
594
631
  def _write_new_round_completion_file(
595
632
  params: CompactPartitionParams,
596
- mutable_compaction_audit,
597
- compacted_partition,
598
- audit_url,
599
- hb_id_to_entry_indices_range,
600
- rcf_source_partition_locator,
601
- new_compacted_delta_locator,
602
- pyarrow_write_result,
633
+ mutable_compaction_audit: CompactionSessionAuditInfo,
634
+ compacted_partition: Partition,
635
+ audit_url: str,
636
+ hb_id_to_entry_indices_range: dict,
637
+ rcf_source_partition_locator: rcf.PartitionLocator,
638
+ new_compacted_delta_locator: DeltaLocator,
639
+ pyarrow_write_result: PyArrowWriteResult,
603
640
  ) -> ExecutionCompactionResult:
604
641
  input_inflation = None
605
642
  input_average_record_size_bytes = None
@@ -177,13 +177,10 @@ def _download_compacted_table(
177
177
 
178
178
  if str(hb_index) not in hb_index_to_indices:
179
179
  return None
180
-
181
180
  indices = hb_index_to_indices[str(hb_index)]
182
-
183
181
  assert (
184
182
  indices is not None and len(indices) == 2
185
183
  ), "indices should not be none and contains exactly two elements"
186
-
187
184
  for offset in range(indices[1] - indices[0]):
188
185
  table = deltacat_storage.download_delta_manifest_entry(
189
186
  rcf.compacted_delta_locator,
@@ -42,11 +42,10 @@ def read_delta_file_envelopes(
42
42
  **deltacat_storage_kwargs,
43
43
  )
44
44
  annotations = annotated_delta.annotations
45
- assert (
46
- len(tables) == len(annotations),
45
+ assert len(tables) == len(annotations), (
47
46
  f"Unexpected Error: Length of downloaded delta manifest tables "
48
47
  f"({len(tables)}) doesn't match the length of delta manifest "
49
- f"annotations ({len(annotations)}).",
48
+ f"annotations ({len(annotations)})."
50
49
  )
51
50
  if not tables:
52
51
  return None, 0, 0
@@ -61,7 +61,6 @@ def discover_deltas(
61
61
  )
62
62
 
63
63
  result.extend(delta_source_incremental_deltas)
64
-
65
64
  logger.info(
66
65
  f"Length of input deltas from delta source table is {len(delta_source_incremental_deltas)}"
67
66
  f" from ({previous_compacted_high_watermark}, {last_stream_position_to_compact}]"
@@ -132,7 +131,6 @@ def create_uniform_input_deltas(
132
131
  size_estimation_function = functools.partial(
133
132
  estimate_manifest_entry_size_bytes, previous_inflation=previous_inflation
134
133
  )
135
-
136
134
  rebatched_da_list = DeltaAnnotated.rebatch(
137
135
  input_da_list,
138
136
  min_delta_bytes=min_delta_bytes,
@@ -109,7 +109,6 @@ def generate_local_merge_input(
109
109
  A MergeInput object
110
110
 
111
111
  """
112
-
113
112
  return MergeInput.of(
114
113
  merge_file_groups_provider=LocalMergeFileGroupsProvider(
115
114
  annotated_deltas,