deltacat 0.1.8__py3-none-any.whl → 0.1.11__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (105) hide show
  1. deltacat/__init__.py +41 -15
  2. deltacat/aws/clients.py +12 -31
  3. deltacat/aws/constants.py +1 -1
  4. deltacat/aws/redshift/__init__.py +7 -2
  5. deltacat/aws/redshift/model/manifest.py +54 -50
  6. deltacat/aws/s3u.py +188 -218
  7. deltacat/catalog/delegate.py +151 -185
  8. deltacat/catalog/interface.py +78 -97
  9. deltacat/catalog/model/catalog.py +21 -21
  10. deltacat/catalog/model/table_definition.py +11 -9
  11. deltacat/compute/compactor/__init__.py +12 -16
  12. deltacat/compute/compactor/compaction_session.py +259 -316
  13. deltacat/compute/compactor/model/delta_annotated.py +60 -44
  14. deltacat/compute/compactor/model/delta_file_envelope.py +5 -6
  15. deltacat/compute/compactor/model/delta_file_locator.py +10 -8
  16. deltacat/compute/compactor/model/materialize_result.py +6 -7
  17. deltacat/compute/compactor/model/primary_key_index.py +38 -34
  18. deltacat/compute/compactor/model/pyarrow_write_result.py +3 -4
  19. deltacat/compute/compactor/model/round_completion_info.py +25 -19
  20. deltacat/compute/compactor/model/sort_key.py +18 -15
  21. deltacat/compute/compactor/steps/dedupe.py +152 -259
  22. deltacat/compute/compactor/steps/hash_bucket.py +57 -73
  23. deltacat/compute/compactor/steps/materialize.py +138 -99
  24. deltacat/compute/compactor/steps/rehash/rehash_bucket.py +13 -13
  25. deltacat/compute/compactor/steps/rehash/rewrite_index.py +11 -13
  26. deltacat/compute/compactor/utils/io.py +59 -47
  27. deltacat/compute/compactor/utils/primary_key_index.py +131 -90
  28. deltacat/compute/compactor/utils/round_completion_file.py +22 -23
  29. deltacat/compute/compactor/utils/system_columns.py +33 -42
  30. deltacat/compute/metastats/meta_stats.py +235 -157
  31. deltacat/compute/metastats/model/partition_stats_dict.py +7 -10
  32. deltacat/compute/metastats/model/stats_cluster_size_estimator.py +13 -5
  33. deltacat/compute/metastats/stats.py +95 -64
  34. deltacat/compute/metastats/utils/io.py +100 -53
  35. deltacat/compute/metastats/utils/pyarrow_memory_estimation_function.py +5 -2
  36. deltacat/compute/metastats/utils/ray_utils.py +38 -33
  37. deltacat/compute/stats/basic.py +107 -69
  38. deltacat/compute/stats/models/delta_column_stats.py +11 -8
  39. deltacat/compute/stats/models/delta_stats.py +59 -32
  40. deltacat/compute/stats/models/delta_stats_cache_result.py +4 -1
  41. deltacat/compute/stats/models/manifest_entry_stats.py +12 -6
  42. deltacat/compute/stats/models/stats_result.py +24 -14
  43. deltacat/compute/stats/utils/intervals.py +16 -9
  44. deltacat/compute/stats/utils/io.py +86 -51
  45. deltacat/compute/stats/utils/manifest_stats_file.py +24 -33
  46. deltacat/constants.py +8 -10
  47. deltacat/io/__init__.py +2 -2
  48. deltacat/io/aws/redshift/redshift_datasource.py +157 -143
  49. deltacat/io/dataset.py +14 -17
  50. deltacat/io/read_api.py +36 -33
  51. deltacat/logs.py +94 -42
  52. deltacat/storage/__init__.py +18 -8
  53. deltacat/storage/interface.py +196 -213
  54. deltacat/storage/model/delta.py +45 -51
  55. deltacat/storage/model/list_result.py +12 -8
  56. deltacat/storage/model/namespace.py +4 -5
  57. deltacat/storage/model/partition.py +42 -42
  58. deltacat/storage/model/stream.py +29 -30
  59. deltacat/storage/model/table.py +14 -14
  60. deltacat/storage/model/table_version.py +32 -31
  61. deltacat/storage/model/types.py +1 -0
  62. deltacat/tests/stats/test_intervals.py +11 -24
  63. deltacat/tests/utils/test_record_batch_tables.py +284 -0
  64. deltacat/types/media.py +3 -4
  65. deltacat/types/tables.py +31 -21
  66. deltacat/utils/common.py +5 -11
  67. deltacat/utils/numpy.py +20 -22
  68. deltacat/utils/pandas.py +73 -100
  69. deltacat/utils/performance.py +3 -9
  70. deltacat/utils/placement.py +276 -231
  71. deltacat/utils/pyarrow.py +302 -89
  72. deltacat/utils/ray_utils/collections.py +2 -1
  73. deltacat/utils/ray_utils/concurrency.py +38 -32
  74. deltacat/utils/ray_utils/dataset.py +28 -28
  75. deltacat/utils/ray_utils/performance.py +5 -9
  76. deltacat/utils/ray_utils/runtime.py +9 -10
  77. {deltacat-0.1.8.dist-info → deltacat-0.1.11.dist-info}/METADATA +22 -12
  78. deltacat-0.1.11.dist-info/RECORD +110 -0
  79. {deltacat-0.1.8.dist-info → deltacat-0.1.11.dist-info}/WHEEL +1 -1
  80. deltacat/autoscaler/events/__init__.py +0 -0
  81. deltacat/autoscaler/events/compaction/__init__.py +0 -0
  82. deltacat/autoscaler/events/compaction/cluster.py +0 -82
  83. deltacat/autoscaler/events/compaction/collections/__init__.py +0 -0
  84. deltacat/autoscaler/events/compaction/collections/partition_key_value.py +0 -36
  85. deltacat/autoscaler/events/compaction/dispatcher.py +0 -28
  86. deltacat/autoscaler/events/compaction/input.py +0 -27
  87. deltacat/autoscaler/events/compaction/process.py +0 -25
  88. deltacat/autoscaler/events/compaction/session_manager.py +0 -13
  89. deltacat/autoscaler/events/compaction/utils.py +0 -216
  90. deltacat/autoscaler/events/compaction/workflow.py +0 -303
  91. deltacat/autoscaler/events/dispatcher.py +0 -95
  92. deltacat/autoscaler/events/dynamodb/__init__.py +0 -0
  93. deltacat/autoscaler/events/dynamodb/event_store.py +0 -164
  94. deltacat/autoscaler/events/event_store.py +0 -55
  95. deltacat/autoscaler/events/exceptions.py +0 -6
  96. deltacat/autoscaler/events/processor.py +0 -177
  97. deltacat/autoscaler/events/session_manager.py +0 -25
  98. deltacat/autoscaler/events/states.py +0 -88
  99. deltacat/autoscaler/events/workflow.py +0 -54
  100. deltacat/autoscaler/node_group.py +0 -230
  101. deltacat/autoscaler/utils.py +0 -69
  102. deltacat-0.1.8.dist-info/RECORD +0 -131
  103. /deltacat/{autoscaler → tests/utils}/__init__.py +0 -0
  104. {deltacat-0.1.8.dist-info → deltacat-0.1.11.dist-info}/LICENSE +0 -0
  105. {deltacat-0.1.8.dist-info → deltacat-0.1.11.dist-info}/top_level.txt +0 -0
@@ -1,83 +1,66 @@
1
- import logging
2
- import time
3
1
  import functools
4
- import ray
5
-
2
+ import logging
6
3
  from collections import defaultdict
4
+ from typing import Dict, List, Optional, Set, Tuple
5
+
6
+ import pyarrow as pa
7
+ import ray
7
8
 
8
9
  from deltacat import logs
10
+ from deltacat.compute.compactor import (
11
+ PrimaryKeyIndexLocator,
12
+ PrimaryKeyIndexMeta,
13
+ PrimaryKeyIndexVersionLocator,
14
+ PrimaryKeyIndexVersionMeta,
15
+ PyArrowWriteResult,
16
+ RoundCompletionInfo,
17
+ SortKey,
18
+ )
19
+ from deltacat.compute.compactor.steps import dedupe as dd
20
+ from deltacat.compute.compactor.steps import hash_bucket as hb
21
+ from deltacat.compute.compactor.steps import materialize as mat
22
+ from deltacat.compute.compactor.utils import io
23
+ from deltacat.compute.compactor.utils import primary_key_index as pki
24
+ from deltacat.compute.compactor.utils import round_completion_file as rcf
9
25
  from deltacat.compute.stats.models.delta_stats import DeltaStats
10
- from deltacat.storage import Delta, DeltaLocator, Partition, \
11
- PartitionLocator, interface as unimplemented_deltacat_storage
12
- from deltacat.utils.ray_utils.concurrency import invoke_parallel, \
13
- round_robin_options_provider
14
- from deltacat.utils.ray_utils.runtime import live_node_resource_keys
15
- from deltacat.compute.compactor.steps import hash_bucket as hb, dedupe as dd, \
16
- materialize as mat
17
- from deltacat.compute.compactor import SortKey, PrimaryKeyIndexMeta, \
18
- PrimaryKeyIndexLocator, PrimaryKeyIndexVersionMeta, \
19
- PrimaryKeyIndexVersionLocator, RoundCompletionInfo, \
20
- PyArrowWriteResult
21
- from deltacat.compute.compactor.utils import round_completion_file as rcf, io, \
22
- primary_key_index as pki
26
+ from deltacat.storage import Delta, DeltaLocator, Partition, PartitionLocator
27
+ from deltacat.storage import interface as unimplemented_deltacat_storage
23
28
  from deltacat.types.media import ContentType
29
+ from deltacat.utils.placement import PlacementGroupConfig
30
+ from deltacat.utils.ray_utils.concurrency import (
31
+ invoke_parallel,
32
+ round_robin_options_provider,
33
+ )
34
+ from deltacat.utils.ray_utils.runtime import live_node_resource_keys
24
35
 
25
- from typing import List, Set, Optional, Tuple, Dict, Union, Any
26
-
27
- import pyarrow as pa
28
36
  logger = logs.configure_deltacat_logger(logging.getLogger(__name__))
29
37
 
30
-
31
-
32
-
33
- @ray.remote(num_cpus=0.01)
34
- class STATES_ACTOR:
35
- def __init__(self):
36
- self._SORT_KEY_NAME_INDEX: int = 0
37
- self._SORT_KEY_ORDER_INDEX: int = 1
38
- self._PRIMARY_KEY_INDEX_ALGORITHM_VERSION: str = "1.0"
39
- self._TOTAL_ENTRIES: int = 0
40
- self._TOTAL_DELTAS: int = 0
41
- self._TOTAL_ROUNDS: float = 0
42
-
43
- def SORT_KEY_NAME_INDEX(self):
44
- return self._SORT_KEY_NAME_INDEX
45
- def SORT_KEY_ORDER_INDEX(self):
46
- return self._SORT_KEY_NAME_INDEX
47
- def PRIMARY_KEY_INDEX_ALGORITHM_VERSION(self):
48
- return self._PRIMARY_KEY_INDEX_ALGORITHM_VERSION
49
- def TOTAL_ROUNDS(self):
50
- return self._TOTAL_ROUNDS
51
- def TOTAL_ENTRIES(self):
52
- return self._TOTAL_ENTRIES
53
- def TOTAL_DELTAS(self):
54
- return self._TOTAL_DELTAS
55
-
56
- def update_delta(self, delta):
57
- self._TOTAL_DELTAS = delta
58
- def update_entry(self, entry):
59
- self._TOTAL_ENTRIES = entry
60
- def update_round(self, round):
61
- self._TOTAL_ROUNDS = round
38
+ _PRIMARY_KEY_INDEX_ALGORITHM_VERSION: str = "1.0"
62
39
 
63
40
 
64
41
  def check_preconditions(
65
- source_partition_locator: PartitionLocator,
66
- compacted_partition_locator: PartitionLocator,
67
- sort_keys: List[SortKey],
68
- max_records_per_output_file: int,
69
- new_hash_bucket_count: Optional[int],
70
- deltacat_storage=unimplemented_deltacat_storage) -> int:
71
-
72
- assert source_partition_locator.partition_values \
73
- == compacted_partition_locator.partition_values, \
74
- "In-place compaction must use the same partition values for the " \
42
+ source_partition_locator: PartitionLocator,
43
+ compacted_partition_locator: PartitionLocator,
44
+ sort_keys: List[SortKey],
45
+ max_records_per_output_file: int,
46
+ new_hash_bucket_count: Optional[int],
47
+ deltacat_storage=unimplemented_deltacat_storage,
48
+ ) -> int:
49
+
50
+ assert (
51
+ source_partition_locator.partition_values
52
+ == compacted_partition_locator.partition_values
53
+ ), (
54
+ "In-place compaction must use the same partition values for the "
75
55
  "source and destination."
76
- assert max_records_per_output_file >= 1, \
77
- "Max records per output file must be a positive value"
56
+ )
57
+ assert (
58
+ max_records_per_output_file >= 1
59
+ ), "Max records per output file must be a positive value"
78
60
  if new_hash_bucket_count is not None:
79
- assert new_hash_bucket_count >= 1, \
80
- "New hash bucket count must be a positive value"
61
+ assert (
62
+ new_hash_bucket_count >= 1
63
+ ), "New hash bucket count must be a positive value"
81
64
  return SortKey.validate_sort_keys(
82
65
  source_partition_locator,
83
66
  sort_keys,
@@ -86,129 +69,111 @@ def check_preconditions(
86
69
 
87
70
 
88
71
  def compact_partition(
89
- source_partition_locator: PartitionLocator,
90
- compacted_partition_locator: PartitionLocator,
91
- primary_keys: Set[str],
92
- compaction_artifact_s3_bucket: str,
93
- last_stream_position_to_compact: int,
94
- hash_bucket_count: Optional[int] = None,
95
- sort_keys: List[SortKey] = None,
96
- records_per_primary_key_index_file: int = 38_000_000,
97
- records_per_compacted_file: int = 4_000_000,
98
- input_deltas_stats: Dict[int, DeltaStats] = None,
99
- min_pk_index_pa_bytes: int = 0,
100
- min_hash_bucket_chunk_size: int = 0,
101
- compacted_file_content_type: ContentType = ContentType.PARQUET,
102
- delete_prev_primary_key_index: bool = False,
103
- read_round_completion: bool = False,
104
- ignore_missing_manifest: bool = False,
105
- max_parallelism: List[float] = None,
106
- num_cpus: List[int] = None,
107
- pg_config: Optional[List[Dict[str, Any]]] = None,
108
- schema_on_read: Optional[pa.schema] = None, # TODO (ricmiyam): Remove this and retrieve schema from storage API
109
- deltacat_storage=unimplemented_deltacat_storage):
72
+ source_partition_locator: PartitionLocator,
73
+ destination_partition_locator: PartitionLocator,
74
+ primary_keys: Set[str],
75
+ compaction_artifact_s3_bucket: str,
76
+ last_stream_position_to_compact: int,
77
+ *,
78
+ hash_bucket_count: Optional[int] = None,
79
+ sort_keys: List[SortKey] = None,
80
+ records_per_primary_key_index_file: int = 38_000_000,
81
+ records_per_compacted_file: int = 4_000_000,
82
+ input_deltas_stats: Dict[int, DeltaStats] = None,
83
+ min_pk_index_pa_bytes: int = 0,
84
+ min_hash_bucket_chunk_size: int = 0,
85
+ compacted_file_content_type: ContentType = ContentType.PARQUET,
86
+ delete_prev_primary_key_index: bool = False,
87
+ pg_config: Optional[PlacementGroupConfig] = None,
88
+ schema_on_read: Optional[
89
+ pa.schema
90
+ ] = None, # TODO (ricmiyam): Remove this and retrieve schema from storage API
91
+ rebase_source_partition_locator: Optional[PartitionLocator] = None,
92
+ rebase_source_partition_high_watermark: Optional[int] = None,
93
+ deltacat_storage=unimplemented_deltacat_storage,
94
+ ) -> Optional[str]:
110
95
 
111
96
  logger.info(f"Starting compaction session for: {source_partition_locator}")
112
97
  partition = None
113
98
  compaction_rounds_executed = 0
114
99
  has_next_compaction_round = True
115
- opts={}
116
- if pg_config:
117
- opts=pg_config[0]
118
- round_id = 1
119
- states = STATES_ACTOR.remote()
100
+ new_rcf_s3_url = None
120
101
  while has_next_compaction_round:
121
- round_start = time.time()
122
- logger.info(f"round {round_id} started")
123
- print(f"round {round_id} started")
124
- has_next_compaction_round_obj, new_partition_obj, new_rci_obj = \
125
- _execute_compaction_round.options(**opts).remote(
126
- source_partition_locator,
127
- compacted_partition_locator,
128
- primary_keys,
129
- compaction_artifact_s3_bucket,
130
- last_stream_position_to_compact,
131
- hash_bucket_count,
132
- sort_keys,
133
- records_per_primary_key_index_file,
134
- records_per_compacted_file,
135
- input_deltas_stats,
136
- min_pk_index_pa_bytes,
137
- min_hash_bucket_chunk_size,
138
- compacted_file_content_type,
139
- delete_prev_primary_key_index,
140
- read_round_completion,
141
- ignore_missing_manifest,
142
- max_parallelism,
143
- num_cpus,
144
- round_id,
145
- states,
146
- schema_on_read,
147
- deltacat_storage=deltacat_storage,
148
- pg_config=pg_config
149
- )
150
- round_id +=1
151
- has_next_compaction_round = ray.get(has_next_compaction_round_obj)
152
- round_end = time.time()
153
- TOTAL_ROUNDS = ray.get(states.TOTAL_ROUNDS.remote())
154
- logger.info(f"Round {round_id}/{TOTAL_ROUNDS} took {round_end-round_start} seconds, estimated time to finish:{(TOTAL_ROUNDS-round_id)*(round_end-round_start)}")
155
- print(f"Round {round_id}/{TOTAL_ROUNDS} took {round_end-round_start} seconds, estimated time to finish:{(TOTAL_ROUNDS-round_id)*(round_end-round_start)}")
156
- new_partition = ray.get(new_partition_obj)
157
- new_rci = ray.get(new_rci_obj)
102
+ (
103
+ has_next_compaction_round,
104
+ new_partition,
105
+ new_rci,
106
+ new_rcf_s3_url,
107
+ ) = _execute_compaction_round(
108
+ source_partition_locator,
109
+ destination_partition_locator,
110
+ primary_keys,
111
+ compaction_artifact_s3_bucket,
112
+ last_stream_position_to_compact,
113
+ hash_bucket_count,
114
+ sort_keys,
115
+ records_per_primary_key_index_file,
116
+ records_per_compacted_file,
117
+ input_deltas_stats,
118
+ min_pk_index_pa_bytes,
119
+ min_hash_bucket_chunk_size,
120
+ compacted_file_content_type,
121
+ delete_prev_primary_key_index,
122
+ pg_config,
123
+ schema_on_read,
124
+ rebase_source_partition_locator,
125
+ rebase_source_partition_high_watermark,
126
+ deltacat_storage,
127
+ )
158
128
  if new_partition:
159
129
  partition = new_partition
160
- compacted_partition_locator = new_partition.locator
130
+ destination_partition_locator = new_partition.locator
161
131
  compaction_rounds_executed += 1
162
132
  # Take new primary key index sizes into account for subsequent compaction rounds and their dedupe steps
163
133
  if new_rci:
164
134
  min_pk_index_pa_bytes = new_rci.pk_index_pyarrow_write_result.pyarrow_bytes
165
135
 
166
- logger.info(f"Partition-{source_partition_locator.partition_values}-> Compaction session data processing completed in "
167
- f"{compaction_rounds_executed} rounds.")
136
+ logger.info(
137
+ f"Partition-{source_partition_locator.partition_values}-> Compaction session data processing completed in "
138
+ f"{compaction_rounds_executed} rounds."
139
+ )
168
140
  if partition:
169
141
  logger.info(f"Committing compacted partition to: {partition.locator}")
170
142
  partition = deltacat_storage.commit_partition(partition)
171
143
  logger.info(f"Committed compacted partition: {partition}")
172
144
  logger.info(f"Completed compaction session for: {source_partition_locator}")
145
+ return new_rcf_s3_url
173
146
 
174
147
 
175
- @ray.remote
176
- def get_metadata(deltacat_storage, delta):
177
- return len(deltacat_storage.get_delta_manifest(delta).entries)
178
- @ray.remote(num_cpus=1,num_returns=3,max_retries=1)
148
+ @ray.remote(num_cpus=0.1, num_returns=3)
179
149
  def _execute_compaction_round(
180
- source_partition_locator: PartitionLocator,
181
- compacted_partition_locator: PartitionLocator,
182
- primary_keys: Set[str],
183
- compaction_artifact_s3_bucket: str,
184
- last_stream_position_to_compact: int,
185
- new_hash_bucket_count: Optional[int],
186
- sort_keys: List[SortKey],
187
- records_per_primary_key_index_file: int,
188
- records_per_compacted_file: int,
189
- input_deltas_stats: Dict[int, DeltaStats],
190
- min_pk_index_pa_bytes: int,
191
- min_hash_bucket_chunk_size: int,
192
- compacted_file_content_type: ContentType,
193
- delete_prev_primary_key_index: bool,
194
- read_round_completion: bool,
195
- ignore_missing_manifest: bool,
196
- max_parallelism: List[float],
197
- num_cpus: List[int],
198
- round_id: int,
199
- states: STATES_ACTOR,
200
- schema_on_read: Optional[pa.schema],
201
- deltacat_storage = unimplemented_deltacat_storage,
202
- pg_config: Optional[List[Dict[str, Any]]] = None) \
203
- -> Tuple[bool, Optional[Partition], Optional[RoundCompletionInfo]]:
204
-
205
-
206
- pre_hb_start = time.time()
150
+ source_partition_locator: PartitionLocator,
151
+ compacted_partition_locator: PartitionLocator,
152
+ primary_keys: Set[str],
153
+ compaction_artifact_s3_bucket: str,
154
+ last_stream_position_to_compact: int,
155
+ new_hash_bucket_count: Optional[int],
156
+ sort_keys: List[SortKey],
157
+ records_per_primary_key_index_file: int,
158
+ records_per_compacted_file: int,
159
+ input_deltas_stats: Dict[int, DeltaStats],
160
+ min_pk_index_pa_bytes: int,
161
+ min_hash_bucket_chunk_size: int,
162
+ compacted_file_content_type: ContentType,
163
+ delete_prev_primary_key_index: bool,
164
+ pg_config: Optional[PlacementGroupConfig],
165
+ schema_on_read: Optional[pa.schema],
166
+ rebase_source_partition_locator: Optional[PartitionLocator],
167
+ rebase_source_partition_high_watermark: Optional[int],
168
+ deltacat_storage=unimplemented_deltacat_storage,
169
+ ) -> Tuple[bool, Optional[Partition], Optional[RoundCompletionInfo], Optional[str]]:
170
+
207
171
  if not primary_keys:
208
172
  # TODO (pdames): run simple rebatch to reduce all deltas into 1 delta
209
173
  # with normalized manifest entry sizes
210
174
  raise NotImplementedError(
211
- "Compaction only supports tables with 1 or more primary keys")
175
+ "Compaction only supports tables with 1 or more primary keys"
176
+ )
212
177
  if sort_keys is None:
213
178
  sort_keys = []
214
179
  # TODO (pdames): detect and handle schema evolution (at least ensure that
@@ -229,40 +194,30 @@ def _execute_compaction_round(
229
194
  # sort primary keys to produce the same pk digest regardless of input order
230
195
  primary_keys = sorted(primary_keys)
231
196
 
232
- # collect cluster resource stats
233
- # cluster_resources = ray.cluster_resources()
234
- # logger.info(f"Total cluster resources: {cluster_resources}")
235
- # logger.info(f"Available cluster resources: {ray.available_resources()}")
236
- # cluster_cpus = int(cluster_resources["CPU"])
237
- # logger.info(f"Total cluster CPUs: {cluster_cpus}")
238
-
239
- # collect node group resources
240
-
241
197
  cluster_resources = ray.cluster_resources()
242
198
  logger.info(f"Total cluster resources: {cluster_resources}")
243
- if pg_config: # use resource in each placement group
244
- #node_resource_keys=None
245
- cluster_resources = pg_config[1]
246
- cluster_cpus = cluster_resources['CPU']
247
- node_resource_keys = cluster_resources['node_id']
248
- else: # use all cluster resource
199
+ node_resource_keys = None
200
+ if pg_config: # use resource in each placement group
201
+ cluster_resources = pg_config.resource
202
+ cluster_cpus = cluster_resources["CPU"]
203
+ else: # use all cluster resource
249
204
  logger.info(f"Available cluster resources: {ray.available_resources()}")
250
205
  cluster_cpus = int(cluster_resources["CPU"])
251
206
  logger.info(f"Total cluster CPUs: {cluster_cpus}")
252
207
  node_resource_keys = live_node_resource_keys()
253
- logger.info(f"Found {len(node_resource_keys)} live cluster nodes: "
254
- f"{node_resource_keys}")
255
-
256
- if node_resource_keys:
257
- # create a remote options provider to round-robin tasks across all nodes
258
- logger.info(f"Setting round robin scheduling with node id:{node_resource_keys}")
259
- round_robin_opt_provider = functools.partial(
260
- round_robin_options_provider,
261
- resource_keys=node_resource_keys,
208
+ logger.info(
209
+ f"Found {len(node_resource_keys)} live cluster nodes: "
210
+ f"{node_resource_keys}"
262
211
  )
263
- else:
264
- logger.info("Setting round robin scheduling to None")
265
- round_robin_opt_provider = None
212
+
213
+ # create a remote options provider to round-robin tasks across all nodes or allocated bundles
214
+ logger.info(f"Setting round robin scheduling with node id:{node_resource_keys}")
215
+ round_robin_opt_provider = functools.partial(
216
+ round_robin_options_provider,
217
+ resource_keys=node_resource_keys,
218
+ pg_config=pg_config.opts if pg_config else None,
219
+ )
220
+
266
221
  # assign a distinct index to each node in the cluster
267
222
  # head_node_ip = urllib.request.urlopen(
268
223
  # "http://169.254.169.254/latest/meta-data/local-ipv4"
@@ -273,20 +228,10 @@ def _execute_compaction_round(
273
228
  # we assume here that we're running on a fixed-size cluster - this
274
229
  # assumption could be removed but we'd still need to know the maximum
275
230
  # "safe" number of parallel tasks that our autoscaling cluster could handle
276
- if max_parallelism and len(max_parallelism)==3: # customized for each major step: hb, dd, mat
277
- max_parallelism = [int(cluster_cpus*i) for i in max_parallelism]
278
- else:
279
- max_parallelism = [int(cluster_cpus) for _ in range(3)]
280
- logger.info(f"Max parallelism for each steps: {max_parallelism}")
281
-
282
-
283
- if not num_cpus:
284
- num_cpus=[1,1,1] # allocate 1 cpu for each task (hb, dd or mat)
285
-
286
- logger.info(f"Number of cpus for each steps: {num_cpus}")
231
+ max_parallelism = int(cluster_cpus)
232
+ logger.info(f"Max parallelism: {max_parallelism}")
287
233
 
288
234
  # get the root path of a compatible primary key index for this round
289
- _PRIMARY_KEY_INDEX_ALGORITHM_VERSION=ray.get(states.PRIMARY_KEY_INDEX_ALGORITHM_VERSION.remote())
290
235
  compatible_primary_key_index_meta = PrimaryKeyIndexMeta.of(
291
236
  compacted_partition_locator,
292
237
  primary_keys,
@@ -294,38 +239,58 @@ def _execute_compaction_round(
294
239
  _PRIMARY_KEY_INDEX_ALGORITHM_VERSION,
295
240
  )
296
241
  compatible_primary_key_index_locator = PrimaryKeyIndexLocator.of(
297
- compatible_primary_key_index_meta)
298
- compatible_primary_key_index_root_path = \
242
+ compatible_primary_key_index_meta
243
+ )
244
+ compatible_primary_key_index_root_path = (
299
245
  compatible_primary_key_index_locator.primary_key_index_root_path
246
+ )
300
247
 
301
248
  # read the results from any previously completed compaction round that used
302
249
  # a compatible primary key index
303
250
  round_completion_info = None
304
- if read_round_completion:
251
+ if not rebase_source_partition_locator:
252
+ logger.info(
253
+ f"Reading round completion file for compatible "
254
+ f"primary key index root path: {compatible_primary_key_index_root_path}"
255
+ )
305
256
  round_completion_info = rcf.read_round_completion_file(
306
257
  compaction_artifact_s3_bucket,
307
258
  source_partition_locator,
308
259
  compatible_primary_key_index_root_path,
309
260
  )
261
+ logger.info(f"Round completion file: {round_completion_info}")
310
262
 
311
263
  # read the previous compaction round's hash bucket count, if any
312
264
  old_hash_bucket_count = None
313
265
  if round_completion_info:
314
- old_pki_version_locator = round_completion_info\
315
- .primary_key_index_version_locator
316
- old_hash_bucket_count = old_pki_version_locator\
317
- .primary_key_index_version_meta \
318
- .hash_bucket_count
319
- min_pk_index_pa_bytes = round_completion_info.pk_index_pyarrow_write_result.pyarrow_bytes
266
+ old_pki_version_locator = (
267
+ round_completion_info.primary_key_index_version_locator
268
+ )
269
+ old_hash_bucket_count = (
270
+ old_pki_version_locator.primary_key_index_version_meta.hash_bucket_count
271
+ )
272
+ min_pk_index_pa_bytes = (
273
+ round_completion_info.pk_index_pyarrow_write_result.pyarrow_bytes
274
+ )
275
+ else:
276
+ logger.info(
277
+ f"No prior round info read. Source partition: "
278
+ f"{source_partition_locator}. Primary key index locator: "
279
+ f"{compatible_primary_key_index_locator}. Rebase source "
280
+ f"partition locator: {rebase_source_partition_locator}"
281
+ )
320
282
 
321
283
  # use the new hash bucket count if provided, or fall back to old count
322
- hash_bucket_count = new_hash_bucket_count \
323
- if new_hash_bucket_count is not None \
284
+ hash_bucket_count = (
285
+ new_hash_bucket_count
286
+ if new_hash_bucket_count is not None
324
287
  else old_hash_bucket_count
288
+ )
325
289
 
326
290
  # discover input delta files
327
- high_watermark = round_completion_info.high_watermark \
328
- if round_completion_info else None
291
+ high_watermark = (
292
+ round_completion_info.high_watermark if round_completion_info else None
293
+ )
329
294
 
330
295
  input_deltas = io.discover_deltas(
331
296
  source_partition_locator,
@@ -336,99 +301,72 @@ def _execute_compaction_round(
336
301
 
337
302
  if not input_deltas:
338
303
  logger.info("No input deltas found to compact.")
339
- return False, None, None
304
+ return False, None, None, None
340
305
 
341
306
  # limit the input deltas to fit on this cluster and convert them to
342
307
  # annotated deltas of equivalent size for easy parallel distribution
343
308
 
344
- uniform_deltas, hash_bucket_count, last_stream_position_compacted = \
345
- io.limit_input_deltas(
346
- input_deltas,
347
- cluster_resources,
348
- hash_bucket_count,
349
- min_pk_index_pa_bytes,
350
- min_hash_bucket_chunk_size,
351
- input_deltas_stats=input_deltas_stats,
352
- deltacat_storage=deltacat_storage
353
- )
309
+ (
310
+ uniform_deltas,
311
+ hash_bucket_count,
312
+ last_stream_position_compacted,
313
+ ) = io.limit_input_deltas(
314
+ input_deltas,
315
+ cluster_resources,
316
+ hash_bucket_count,
317
+ min_pk_index_pa_bytes,
318
+ min_hash_bucket_chunk_size,
319
+ input_deltas_stats=input_deltas_stats,
320
+ deltacat_storage=deltacat_storage,
321
+ )
354
322
 
355
- uniform_deltas_entries=sum([len(i.manifest.entries) for i in uniform_deltas])
356
- if round_id == 1: # first round, total_deltas is known
357
- #TOTAL_ENTRIES = sum([len(deltacat_storage.get_delta_manifest(i).manifest.entries) for i in input_deltas])
358
- #TOTAL_ENTRIES = sum(ray.get([get_metadata.remote(deltacat_storage,i) for i in input_deltas]))
359
- #TODO: use stats, otherwise too slow to get all manifest's metadata
360
- TOTAL_ENTRIES = 722451
361
- TOTAL_DELTAS = len(input_deltas)
362
- ray.get(states.update_entry.remote(TOTAL_ENTRIES))
363
- ray.get(states.update_delta.remote(TOTAL_DELTAS))
364
- logger.info(f"Estimated Rounds:{TOTAL_ENTRIES/uniform_deltas_entries}")
365
- TOTAL_ROUNDS = TOTAL_ENTRIES/uniform_deltas_entries
366
- ray.get(states.update_round.remote(TOTAL_ROUNDS))
367
- TOTAL_ROUNDS = ray.get(states.TOTAL_ROUNDS.remote())
368
- TOTAL_ENTRIES = ray.get(states.TOTAL_ENTRIES.remote())
369
- TOTAL_DELTAS = ray.get(states.TOTAL_DELTAS.remote())
370
- logger.info(f"Round {round_id}/{TOTAL_ROUNDS}: {uniform_deltas_entries}/{TOTAL_ENTRIES} entries in total deltas {TOTAL_DELTAS}")
371
- print(f"Round {round_id}/{TOTAL_ROUNDS}: {uniform_deltas_entries}/{TOTAL_ENTRIES} entries in total deltas {TOTAL_DELTAS}")
372
-
373
- assert hash_bucket_count is not None and hash_bucket_count > 0, \
374
- f"Unexpected Error: Default hash bucket count ({hash_bucket_count}) " \
375
- f"is invalid."
323
+ assert hash_bucket_count is not None and hash_bucket_count > 0, (
324
+ f"Expected hash bucket count to be a positive integer, but found "
325
+ f"`{hash_bucket_count}`"
326
+ )
376
327
 
377
328
  # rehash the primary key index if necessary
378
- round_completion_info = None
379
329
  if round_completion_info:
380
330
  logger.info(f"Round completion file contents: {round_completion_info}")
381
331
  # the previous primary key index is compatible with the current, but
382
332
  # will need to be rehashed if the hash bucket count has changed
383
333
  if hash_bucket_count != old_hash_bucket_count:
334
+ # TODO(draghave): manually test the path after prior primary key
335
+ # index was already built
384
336
  round_completion_info = pki.rehash(
385
337
  round_robin_opt_provider,
386
338
  compaction_artifact_s3_bucket,
387
339
  source_partition_locator,
388
340
  round_completion_info,
389
341
  hash_bucket_count,
390
- max_parallelism[0],
342
+ max_parallelism,
391
343
  records_per_primary_key_index_file,
392
344
  delete_prev_primary_key_index,
393
345
  )
394
- else:
395
- logger.info(f"No prior round completion file found. Source partition: "
396
- f"{source_partition_locator}. Primary key index locator: "
397
- f"{compatible_primary_key_index_locator}")
398
346
 
399
-
400
- hb_start = time.time()
401
- logger.info(f"adhoc_rootliu, Round {round_id} Pre-Hash bucket took:{hb_start-pre_hb_start} seconds")
402
- print(f"adhoc_rootliu, Round {round_id} Pre-Hash bucket took:{hb_start-pre_hb_start} seconds")
403
347
  # parallel step 1:
404
348
  # group like primary keys together by hashing them into buckets
405
349
  hb_tasks_pending = invoke_parallel(
406
350
  items=uniform_deltas,
407
351
  ray_task=hb.hash_bucket,
408
- max_parallelism=max_parallelism[0],
409
- num_cpus = num_cpus[0],
352
+ max_parallelism=max_parallelism,
410
353
  options_provider=round_robin_opt_provider,
411
354
  primary_keys=primary_keys,
412
355
  sort_keys=sort_keys,
413
356
  num_buckets=hash_bucket_count,
414
- num_groups=max_parallelism[0],
415
- ignore_missing_manifest=ignore_missing_manifest,
357
+ num_groups=max_parallelism,
416
358
  deltacat_storage=deltacat_storage,
417
359
  )
418
360
  logger.info(f"Getting {len(hb_tasks_pending)} hash bucket results...")
419
361
  hb_results = ray.get([t[0] for t in hb_tasks_pending])
420
- print(f"adhoc_rootliu, Round {round_id} Got {len(hb_results)} hash bucket results.")
421
362
  logger.info(f"Got {len(hb_results)} hash bucket results.")
422
363
  all_hash_group_idx_to_obj_id = defaultdict(list)
423
364
  for hash_group_idx_to_obj_id in hb_results:
424
365
  for hash_group_index, object_id in enumerate(hash_group_idx_to_obj_id):
425
366
  if object_id:
426
367
  all_hash_group_idx_to_obj_id[hash_group_index].append(object_id)
427
- hash_group_count = dedupe_task_count = len(all_hash_group_idx_to_obj_id)
368
+ hash_group_count = len(all_hash_group_idx_to_obj_id)
428
369
  logger.info(f"Hash bucket groups created: {hash_group_count}")
429
- hb_end = time.time()
430
- logger.info(f"adhoc_rootliu, Round {round_id} Hash bucket took:{hb_end-hb_start} seconds")
431
- print(f"adhoc_rootliu, Round {round_id} Hash bucket took:{hb_end-hb_start} seconds")
432
370
 
433
371
  # TODO (pdames): when resources are freed during the last round of hash
434
372
  # bucketing, start running dedupe tasks that read existing dedupe
@@ -455,9 +393,11 @@ def _execute_compaction_round(
455
393
  _PRIMARY_KEY_INDEX_ALGORITHM_VERSION,
456
394
  )
457
395
  new_primary_key_index_locator = PrimaryKeyIndexLocator.of(
458
- new_primary_key_index_meta)
459
- new_primary_key_index_root_path = new_primary_key_index_locator\
460
- .primary_key_index_root_path
396
+ new_primary_key_index_meta
397
+ )
398
+ new_primary_key_index_root_path = (
399
+ new_primary_key_index_locator.primary_key_index_root_path
400
+ )
461
401
 
462
402
  # generate a new primary key index version locator for this round
463
403
  new_primary_key_index_version_meta = PrimaryKeyIndexVersionMeta.of(
@@ -465,53 +405,48 @@ def _execute_compaction_round(
465
405
  hash_bucket_count,
466
406
  )
467
407
  new_pki_version_locator = PrimaryKeyIndexVersionLocator.generate(
468
- new_primary_key_index_version_meta)
469
-
408
+ new_primary_key_index_version_meta
409
+ )
470
410
 
471
411
  # parallel step 2:
472
412
  # discover records with duplicate primary keys in each hash bucket, and
473
413
  # identify the index of records to keep or drop based on sort keys
474
- num_materialize_buckets = max_parallelism[1]
414
+ num_materialize_buckets = max_parallelism
475
415
  logger.info(f"Materialize Bucket Count: {num_materialize_buckets}")
476
- record_counts_pending_materialize = \
477
- dd.RecordCountsPendingMaterialize.remote(dedupe_task_count)
478
416
  dd_tasks_pending = invoke_parallel(
479
417
  items=all_hash_group_idx_to_obj_id.values(),
480
418
  ray_task=dd.dedupe,
481
- max_parallelism=max_parallelism[1],
482
- num_cpus = num_cpus[1],
419
+ max_parallelism=max_parallelism,
483
420
  options_provider=round_robin_opt_provider,
484
- kwargs_provider=lambda index, item: {"dedupe_task_index": index,
485
- "object_ids": item},
421
+ kwargs_provider=lambda index, item: {
422
+ "dedupe_task_index": index,
423
+ "object_ids": item,
424
+ },
486
425
  compaction_artifact_s3_bucket=compaction_artifact_s3_bucket,
487
426
  round_completion_info=round_completion_info,
488
427
  new_primary_key_index_version_locator=new_pki_version_locator,
489
428
  sort_keys=sort_keys,
490
429
  max_records_per_index_file=records_per_primary_key_index_file,
491
- max_records_per_materialized_file=records_per_compacted_file,
492
430
  num_materialize_buckets=num_materialize_buckets,
493
431
  delete_old_primary_key_index=delete_prev_primary_key_index,
494
- record_counts_pending_materialize=record_counts_pending_materialize,
495
432
  )
496
433
  logger.info(f"Getting {len(dd_tasks_pending)} dedupe results...")
497
434
  dd_results = ray.get([t[0] for t in dd_tasks_pending])
498
435
  logger.info(f"Got {len(dd_results)} dedupe results.")
499
- print((f"adhoc_rootliu, Round {round_id} Got {len(dd_results)} dedupe results."))
500
436
  all_mat_buckets_to_obj_id = defaultdict(list)
501
437
  for mat_bucket_idx_to_obj_id in dd_results:
502
- for bucket_idx, dd_task_index_and_object_id_tuple in \
503
- mat_bucket_idx_to_obj_id.items():
438
+ for (
439
+ bucket_idx,
440
+ dd_task_index_and_object_id_tuple,
441
+ ) in mat_bucket_idx_to_obj_id.items():
504
442
  all_mat_buckets_to_obj_id[bucket_idx].append(
505
- dd_task_index_and_object_id_tuple)
443
+ dd_task_index_and_object_id_tuple
444
+ )
506
445
  logger.info(f"Getting {len(dd_tasks_pending)} dedupe result stat(s)...")
507
446
  pki_stats = ray.get([t[2] for t in dd_tasks_pending])
508
447
  logger.info(f"Got {len(pki_stats)} dedupe result stat(s).")
509
- logger.info(f"Materialize buckets created: "
510
- f"{len(all_mat_buckets_to_obj_id)}")
448
+ logger.info(f"Materialize buckets created: " f"{len(all_mat_buckets_to_obj_id)}")
511
449
 
512
- dd_end = time.time()
513
- logger.info(f"adhoc_rootliu, Round {round_id} dedupe took:{dd_end-hb_end} seconds")
514
- print(f"adhoc_rootliu, Round {round_id} dedupe took:{dd_end-hb_end} seconds")
515
450
  # TODO(pdames): when resources are freed during the last round of deduping
516
451
  # start running materialize tasks that read materialization source file
517
452
  # tables from S3 then wait for deduping to finish before continuing
@@ -527,12 +462,11 @@ def _execute_compaction_round(
527
462
  mat_tasks_pending = invoke_parallel(
528
463
  items=all_mat_buckets_to_obj_id.items(),
529
464
  ray_task=mat.materialize,
530
- max_parallelism=max_parallelism[2],
531
- num_cpus = num_cpus[2],
465
+ max_parallelism=max_parallelism,
532
466
  options_provider=round_robin_opt_provider,
533
- kwargs_provider=lambda index, mat_bucket_idx_to_obj_id: {
534
- "mat_bucket_index": mat_bucket_idx_to_obj_id[0],
535
- "dedupe_task_idx_and_obj_id_tuples": mat_bucket_idx_to_obj_id[1],
467
+ kwargs_provider=lambda index, mat_bucket_index_to_obj_id: {
468
+ "mat_bucket_index": mat_bucket_index_to_obj_id[0],
469
+ "dedupe_task_idx_and_obj_id_tuples": mat_bucket_index_to_obj_id[1],
536
470
  },
537
471
  schema=schema_on_read,
538
472
  round_completion_info=round_completion_info,
@@ -545,43 +479,52 @@ def _execute_compaction_round(
545
479
  logger.info(f"Getting {len(mat_tasks_pending)} materialize result(s)...")
546
480
  mat_results = ray.get(mat_tasks_pending)
547
481
  logger.info(f"Got {len(mat_results)} materialize result(s).")
548
- print(f"adhoc_rootliu, Round {round_id} Got {len(mat_results)} materialize result(s).")
549
482
 
550
- mat_end = time.time()
551
- logger.info(f"adhoc_rootliu, Round {round_id} mat took:{mat_end-dd_end} seconds")
552
- print(f"adhoc_rootliu, Round {round_id} mat took:{mat_end-dd_end} seconds")
553
483
  mat_results = sorted(mat_results, key=lambda m: m.task_index)
554
484
  deltas = [m.delta for m in mat_results]
555
485
  merged_delta = Delta.merge_deltas(deltas)
556
486
  compacted_delta = deltacat_storage.commit_delta(merged_delta)
557
487
  logger.info(f"Committed compacted delta: {compacted_delta}")
558
- commit_end=time.time()
559
- logger.info(f"adhoc_rootliu, Round {round_id} commit took:{commit_end-mat_end} seconds")
560
- print(f"adhoc_rootliu, Round {round_id} commit took:{commit_end-mat_end} seconds")
488
+
561
489
  new_compacted_delta_locator = DeltaLocator.of(
562
490
  new_compacted_partition_locator,
563
491
  compacted_delta.stream_position,
564
492
  )
565
493
 
566
- round_completion_info = RoundCompletionInfo.of(
567
- last_stream_position_compacted,
494
+ rci_high_watermark = (
495
+ rebase_source_partition_high_watermark
496
+ if rebase_source_partition_high_watermark
497
+ else last_stream_position_compacted
498
+ )
499
+ new_round_completion_info = RoundCompletionInfo.of(
500
+ rci_high_watermark,
568
501
  new_compacted_delta_locator,
569
- PyArrowWriteResult.union([m.pyarrow_write_result
570
- for m in mat_results]),
502
+ PyArrowWriteResult.union([m.pyarrow_write_result for m in mat_results]),
571
503
  PyArrowWriteResult.union(pki_stats),
572
504
  bit_width_of_sort_keys,
573
505
  new_pki_version_locator,
506
+ rebase_source_partition_locator
507
+ or round_completion_info.rebase_source_partition_locator,
574
508
  )
575
- rcf.write_round_completion_file(
509
+ rcf_source_partition_locator = (
510
+ rebase_source_partition_locator
511
+ if rebase_source_partition_locator
512
+ else source_partition_locator
513
+ )
514
+ round_completion_file_s3_url = rcf.write_round_completion_file(
576
515
  compaction_artifact_s3_bucket,
577
- source_partition_locator,
516
+ rcf_source_partition_locator,
578
517
  new_primary_key_index_root_path,
579
- round_completion_info,
518
+ new_round_completion_info,
519
+ )
520
+ logger.info(
521
+ f"partition-{source_partition_locator.partition_values},"
522
+ f"compacted at: {last_stream_position_compacted},"
523
+ f"last position: {last_stream_position_to_compact}"
524
+ )
525
+ return (
526
+ (last_stream_position_compacted < last_stream_position_to_compact),
527
+ partition,
528
+ new_round_completion_info,
529
+ round_completion_file_s3_url,
580
530
  )
581
- time_mat_e = time.time()
582
- logger.info(f"partition-{source_partition_locator.partition_values},compacted at:{last_stream_position_compacted}, last position:{last_stream_position_to_compact}")
583
- return \
584
- (last_stream_position_compacted < last_stream_position_to_compact), \
585
- partition, \
586
- round_completion_info
587
-