deltacat 0.1.8__py3-none-any.whl → 0.1.11__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- deltacat/__init__.py +41 -15
- deltacat/aws/clients.py +12 -31
- deltacat/aws/constants.py +1 -1
- deltacat/aws/redshift/__init__.py +7 -2
- deltacat/aws/redshift/model/manifest.py +54 -50
- deltacat/aws/s3u.py +188 -218
- deltacat/catalog/delegate.py +151 -185
- deltacat/catalog/interface.py +78 -97
- deltacat/catalog/model/catalog.py +21 -21
- deltacat/catalog/model/table_definition.py +11 -9
- deltacat/compute/compactor/__init__.py +12 -16
- deltacat/compute/compactor/compaction_session.py +259 -316
- deltacat/compute/compactor/model/delta_annotated.py +60 -44
- deltacat/compute/compactor/model/delta_file_envelope.py +5 -6
- deltacat/compute/compactor/model/delta_file_locator.py +10 -8
- deltacat/compute/compactor/model/materialize_result.py +6 -7
- deltacat/compute/compactor/model/primary_key_index.py +38 -34
- deltacat/compute/compactor/model/pyarrow_write_result.py +3 -4
- deltacat/compute/compactor/model/round_completion_info.py +25 -19
- deltacat/compute/compactor/model/sort_key.py +18 -15
- deltacat/compute/compactor/steps/dedupe.py +152 -259
- deltacat/compute/compactor/steps/hash_bucket.py +57 -73
- deltacat/compute/compactor/steps/materialize.py +138 -99
- deltacat/compute/compactor/steps/rehash/rehash_bucket.py +13 -13
- deltacat/compute/compactor/steps/rehash/rewrite_index.py +11 -13
- deltacat/compute/compactor/utils/io.py +59 -47
- deltacat/compute/compactor/utils/primary_key_index.py +131 -90
- deltacat/compute/compactor/utils/round_completion_file.py +22 -23
- deltacat/compute/compactor/utils/system_columns.py +33 -42
- deltacat/compute/metastats/meta_stats.py +235 -157
- deltacat/compute/metastats/model/partition_stats_dict.py +7 -10
- deltacat/compute/metastats/model/stats_cluster_size_estimator.py +13 -5
- deltacat/compute/metastats/stats.py +95 -64
- deltacat/compute/metastats/utils/io.py +100 -53
- deltacat/compute/metastats/utils/pyarrow_memory_estimation_function.py +5 -2
- deltacat/compute/metastats/utils/ray_utils.py +38 -33
- deltacat/compute/stats/basic.py +107 -69
- deltacat/compute/stats/models/delta_column_stats.py +11 -8
- deltacat/compute/stats/models/delta_stats.py +59 -32
- deltacat/compute/stats/models/delta_stats_cache_result.py +4 -1
- deltacat/compute/stats/models/manifest_entry_stats.py +12 -6
- deltacat/compute/stats/models/stats_result.py +24 -14
- deltacat/compute/stats/utils/intervals.py +16 -9
- deltacat/compute/stats/utils/io.py +86 -51
- deltacat/compute/stats/utils/manifest_stats_file.py +24 -33
- deltacat/constants.py +8 -10
- deltacat/io/__init__.py +2 -2
- deltacat/io/aws/redshift/redshift_datasource.py +157 -143
- deltacat/io/dataset.py +14 -17
- deltacat/io/read_api.py +36 -33
- deltacat/logs.py +94 -42
- deltacat/storage/__init__.py +18 -8
- deltacat/storage/interface.py +196 -213
- deltacat/storage/model/delta.py +45 -51
- deltacat/storage/model/list_result.py +12 -8
- deltacat/storage/model/namespace.py +4 -5
- deltacat/storage/model/partition.py +42 -42
- deltacat/storage/model/stream.py +29 -30
- deltacat/storage/model/table.py +14 -14
- deltacat/storage/model/table_version.py +32 -31
- deltacat/storage/model/types.py +1 -0
- deltacat/tests/stats/test_intervals.py +11 -24
- deltacat/tests/utils/test_record_batch_tables.py +284 -0
- deltacat/types/media.py +3 -4
- deltacat/types/tables.py +31 -21
- deltacat/utils/common.py +5 -11
- deltacat/utils/numpy.py +20 -22
- deltacat/utils/pandas.py +73 -100
- deltacat/utils/performance.py +3 -9
- deltacat/utils/placement.py +276 -231
- deltacat/utils/pyarrow.py +302 -89
- deltacat/utils/ray_utils/collections.py +2 -1
- deltacat/utils/ray_utils/concurrency.py +38 -32
- deltacat/utils/ray_utils/dataset.py +28 -28
- deltacat/utils/ray_utils/performance.py +5 -9
- deltacat/utils/ray_utils/runtime.py +9 -10
- {deltacat-0.1.8.dist-info → deltacat-0.1.11.dist-info}/METADATA +22 -12
- deltacat-0.1.11.dist-info/RECORD +110 -0
- {deltacat-0.1.8.dist-info → deltacat-0.1.11.dist-info}/WHEEL +1 -1
- deltacat/autoscaler/events/__init__.py +0 -0
- deltacat/autoscaler/events/compaction/__init__.py +0 -0
- deltacat/autoscaler/events/compaction/cluster.py +0 -82
- deltacat/autoscaler/events/compaction/collections/__init__.py +0 -0
- deltacat/autoscaler/events/compaction/collections/partition_key_value.py +0 -36
- deltacat/autoscaler/events/compaction/dispatcher.py +0 -28
- deltacat/autoscaler/events/compaction/input.py +0 -27
- deltacat/autoscaler/events/compaction/process.py +0 -25
- deltacat/autoscaler/events/compaction/session_manager.py +0 -13
- deltacat/autoscaler/events/compaction/utils.py +0 -216
- deltacat/autoscaler/events/compaction/workflow.py +0 -303
- deltacat/autoscaler/events/dispatcher.py +0 -95
- deltacat/autoscaler/events/dynamodb/__init__.py +0 -0
- deltacat/autoscaler/events/dynamodb/event_store.py +0 -164
- deltacat/autoscaler/events/event_store.py +0 -55
- deltacat/autoscaler/events/exceptions.py +0 -6
- deltacat/autoscaler/events/processor.py +0 -177
- deltacat/autoscaler/events/session_manager.py +0 -25
- deltacat/autoscaler/events/states.py +0 -88
- deltacat/autoscaler/events/workflow.py +0 -54
- deltacat/autoscaler/node_group.py +0 -230
- deltacat/autoscaler/utils.py +0 -69
- deltacat-0.1.8.dist-info/RECORD +0 -131
- /deltacat/{autoscaler → tests/utils}/__init__.py +0 -0
- {deltacat-0.1.8.dist-info → deltacat-0.1.11.dist-info}/LICENSE +0 -0
- {deltacat-0.1.8.dist-info → deltacat-0.1.11.dist-info}/top_level.txt +0 -0
@@ -1,83 +1,66 @@
|
|
1
|
-
import logging
|
2
|
-
import time
|
3
1
|
import functools
|
4
|
-
import
|
5
|
-
|
2
|
+
import logging
|
6
3
|
from collections import defaultdict
|
4
|
+
from typing import Dict, List, Optional, Set, Tuple
|
5
|
+
|
6
|
+
import pyarrow as pa
|
7
|
+
import ray
|
7
8
|
|
8
9
|
from deltacat import logs
|
10
|
+
from deltacat.compute.compactor import (
|
11
|
+
PrimaryKeyIndexLocator,
|
12
|
+
PrimaryKeyIndexMeta,
|
13
|
+
PrimaryKeyIndexVersionLocator,
|
14
|
+
PrimaryKeyIndexVersionMeta,
|
15
|
+
PyArrowWriteResult,
|
16
|
+
RoundCompletionInfo,
|
17
|
+
SortKey,
|
18
|
+
)
|
19
|
+
from deltacat.compute.compactor.steps import dedupe as dd
|
20
|
+
from deltacat.compute.compactor.steps import hash_bucket as hb
|
21
|
+
from deltacat.compute.compactor.steps import materialize as mat
|
22
|
+
from deltacat.compute.compactor.utils import io
|
23
|
+
from deltacat.compute.compactor.utils import primary_key_index as pki
|
24
|
+
from deltacat.compute.compactor.utils import round_completion_file as rcf
|
9
25
|
from deltacat.compute.stats.models.delta_stats import DeltaStats
|
10
|
-
from deltacat.storage import Delta, DeltaLocator, Partition,
|
11
|
-
|
12
|
-
from deltacat.utils.ray_utils.concurrency import invoke_parallel, \
|
13
|
-
round_robin_options_provider
|
14
|
-
from deltacat.utils.ray_utils.runtime import live_node_resource_keys
|
15
|
-
from deltacat.compute.compactor.steps import hash_bucket as hb, dedupe as dd, \
|
16
|
-
materialize as mat
|
17
|
-
from deltacat.compute.compactor import SortKey, PrimaryKeyIndexMeta, \
|
18
|
-
PrimaryKeyIndexLocator, PrimaryKeyIndexVersionMeta, \
|
19
|
-
PrimaryKeyIndexVersionLocator, RoundCompletionInfo, \
|
20
|
-
PyArrowWriteResult
|
21
|
-
from deltacat.compute.compactor.utils import round_completion_file as rcf, io, \
|
22
|
-
primary_key_index as pki
|
26
|
+
from deltacat.storage import Delta, DeltaLocator, Partition, PartitionLocator
|
27
|
+
from deltacat.storage import interface as unimplemented_deltacat_storage
|
23
28
|
from deltacat.types.media import ContentType
|
29
|
+
from deltacat.utils.placement import PlacementGroupConfig
|
30
|
+
from deltacat.utils.ray_utils.concurrency import (
|
31
|
+
invoke_parallel,
|
32
|
+
round_robin_options_provider,
|
33
|
+
)
|
34
|
+
from deltacat.utils.ray_utils.runtime import live_node_resource_keys
|
24
35
|
|
25
|
-
from typing import List, Set, Optional, Tuple, Dict, Union, Any
|
26
|
-
|
27
|
-
import pyarrow as pa
|
28
36
|
logger = logs.configure_deltacat_logger(logging.getLogger(__name__))
|
29
37
|
|
30
|
-
|
31
|
-
|
32
|
-
|
33
|
-
@ray.remote(num_cpus=0.01)
|
34
|
-
class STATES_ACTOR:
|
35
|
-
def __init__(self):
|
36
|
-
self._SORT_KEY_NAME_INDEX: int = 0
|
37
|
-
self._SORT_KEY_ORDER_INDEX: int = 1
|
38
|
-
self._PRIMARY_KEY_INDEX_ALGORITHM_VERSION: str = "1.0"
|
39
|
-
self._TOTAL_ENTRIES: int = 0
|
40
|
-
self._TOTAL_DELTAS: int = 0
|
41
|
-
self._TOTAL_ROUNDS: float = 0
|
42
|
-
|
43
|
-
def SORT_KEY_NAME_INDEX(self):
|
44
|
-
return self._SORT_KEY_NAME_INDEX
|
45
|
-
def SORT_KEY_ORDER_INDEX(self):
|
46
|
-
return self._SORT_KEY_NAME_INDEX
|
47
|
-
def PRIMARY_KEY_INDEX_ALGORITHM_VERSION(self):
|
48
|
-
return self._PRIMARY_KEY_INDEX_ALGORITHM_VERSION
|
49
|
-
def TOTAL_ROUNDS(self):
|
50
|
-
return self._TOTAL_ROUNDS
|
51
|
-
def TOTAL_ENTRIES(self):
|
52
|
-
return self._TOTAL_ENTRIES
|
53
|
-
def TOTAL_DELTAS(self):
|
54
|
-
return self._TOTAL_DELTAS
|
55
|
-
|
56
|
-
def update_delta(self, delta):
|
57
|
-
self._TOTAL_DELTAS = delta
|
58
|
-
def update_entry(self, entry):
|
59
|
-
self._TOTAL_ENTRIES = entry
|
60
|
-
def update_round(self, round):
|
61
|
-
self._TOTAL_ROUNDS = round
|
38
|
+
_PRIMARY_KEY_INDEX_ALGORITHM_VERSION: str = "1.0"
|
62
39
|
|
63
40
|
|
64
41
|
def check_preconditions(
|
65
|
-
|
66
|
-
|
67
|
-
|
68
|
-
|
69
|
-
|
70
|
-
|
71
|
-
|
72
|
-
|
73
|
-
|
74
|
-
|
42
|
+
source_partition_locator: PartitionLocator,
|
43
|
+
compacted_partition_locator: PartitionLocator,
|
44
|
+
sort_keys: List[SortKey],
|
45
|
+
max_records_per_output_file: int,
|
46
|
+
new_hash_bucket_count: Optional[int],
|
47
|
+
deltacat_storage=unimplemented_deltacat_storage,
|
48
|
+
) -> int:
|
49
|
+
|
50
|
+
assert (
|
51
|
+
source_partition_locator.partition_values
|
52
|
+
== compacted_partition_locator.partition_values
|
53
|
+
), (
|
54
|
+
"In-place compaction must use the same partition values for the "
|
75
55
|
"source and destination."
|
76
|
-
|
77
|
-
|
56
|
+
)
|
57
|
+
assert (
|
58
|
+
max_records_per_output_file >= 1
|
59
|
+
), "Max records per output file must be a positive value"
|
78
60
|
if new_hash_bucket_count is not None:
|
79
|
-
assert
|
80
|
-
|
61
|
+
assert (
|
62
|
+
new_hash_bucket_count >= 1
|
63
|
+
), "New hash bucket count must be a positive value"
|
81
64
|
return SortKey.validate_sort_keys(
|
82
65
|
source_partition_locator,
|
83
66
|
sort_keys,
|
@@ -86,129 +69,111 @@ def check_preconditions(
|
|
86
69
|
|
87
70
|
|
88
71
|
def compact_partition(
|
89
|
-
|
90
|
-
|
91
|
-
|
92
|
-
|
93
|
-
|
94
|
-
|
95
|
-
|
96
|
-
|
97
|
-
|
98
|
-
|
99
|
-
|
100
|
-
|
101
|
-
|
102
|
-
|
103
|
-
|
104
|
-
|
105
|
-
|
106
|
-
|
107
|
-
|
108
|
-
|
109
|
-
|
72
|
+
source_partition_locator: PartitionLocator,
|
73
|
+
destination_partition_locator: PartitionLocator,
|
74
|
+
primary_keys: Set[str],
|
75
|
+
compaction_artifact_s3_bucket: str,
|
76
|
+
last_stream_position_to_compact: int,
|
77
|
+
*,
|
78
|
+
hash_bucket_count: Optional[int] = None,
|
79
|
+
sort_keys: List[SortKey] = None,
|
80
|
+
records_per_primary_key_index_file: int = 38_000_000,
|
81
|
+
records_per_compacted_file: int = 4_000_000,
|
82
|
+
input_deltas_stats: Dict[int, DeltaStats] = None,
|
83
|
+
min_pk_index_pa_bytes: int = 0,
|
84
|
+
min_hash_bucket_chunk_size: int = 0,
|
85
|
+
compacted_file_content_type: ContentType = ContentType.PARQUET,
|
86
|
+
delete_prev_primary_key_index: bool = False,
|
87
|
+
pg_config: Optional[PlacementGroupConfig] = None,
|
88
|
+
schema_on_read: Optional[
|
89
|
+
pa.schema
|
90
|
+
] = None, # TODO (ricmiyam): Remove this and retrieve schema from storage API
|
91
|
+
rebase_source_partition_locator: Optional[PartitionLocator] = None,
|
92
|
+
rebase_source_partition_high_watermark: Optional[int] = None,
|
93
|
+
deltacat_storage=unimplemented_deltacat_storage,
|
94
|
+
) -> Optional[str]:
|
110
95
|
|
111
96
|
logger.info(f"Starting compaction session for: {source_partition_locator}")
|
112
97
|
partition = None
|
113
98
|
compaction_rounds_executed = 0
|
114
99
|
has_next_compaction_round = True
|
115
|
-
|
116
|
-
if pg_config:
|
117
|
-
opts=pg_config[0]
|
118
|
-
round_id = 1
|
119
|
-
states = STATES_ACTOR.remote()
|
100
|
+
new_rcf_s3_url = None
|
120
101
|
while has_next_compaction_round:
|
121
|
-
|
122
|
-
|
123
|
-
|
124
|
-
|
125
|
-
|
126
|
-
|
127
|
-
|
128
|
-
|
129
|
-
|
130
|
-
|
131
|
-
|
132
|
-
|
133
|
-
|
134
|
-
|
135
|
-
|
136
|
-
|
137
|
-
|
138
|
-
|
139
|
-
|
140
|
-
|
141
|
-
|
142
|
-
|
143
|
-
|
144
|
-
|
145
|
-
|
146
|
-
|
147
|
-
deltacat_storage=deltacat_storage,
|
148
|
-
pg_config=pg_config
|
149
|
-
)
|
150
|
-
round_id +=1
|
151
|
-
has_next_compaction_round = ray.get(has_next_compaction_round_obj)
|
152
|
-
round_end = time.time()
|
153
|
-
TOTAL_ROUNDS = ray.get(states.TOTAL_ROUNDS.remote())
|
154
|
-
logger.info(f"Round {round_id}/{TOTAL_ROUNDS} took {round_end-round_start} seconds, estimated time to finish:{(TOTAL_ROUNDS-round_id)*(round_end-round_start)}")
|
155
|
-
print(f"Round {round_id}/{TOTAL_ROUNDS} took {round_end-round_start} seconds, estimated time to finish:{(TOTAL_ROUNDS-round_id)*(round_end-round_start)}")
|
156
|
-
new_partition = ray.get(new_partition_obj)
|
157
|
-
new_rci = ray.get(new_rci_obj)
|
102
|
+
(
|
103
|
+
has_next_compaction_round,
|
104
|
+
new_partition,
|
105
|
+
new_rci,
|
106
|
+
new_rcf_s3_url,
|
107
|
+
) = _execute_compaction_round(
|
108
|
+
source_partition_locator,
|
109
|
+
destination_partition_locator,
|
110
|
+
primary_keys,
|
111
|
+
compaction_artifact_s3_bucket,
|
112
|
+
last_stream_position_to_compact,
|
113
|
+
hash_bucket_count,
|
114
|
+
sort_keys,
|
115
|
+
records_per_primary_key_index_file,
|
116
|
+
records_per_compacted_file,
|
117
|
+
input_deltas_stats,
|
118
|
+
min_pk_index_pa_bytes,
|
119
|
+
min_hash_bucket_chunk_size,
|
120
|
+
compacted_file_content_type,
|
121
|
+
delete_prev_primary_key_index,
|
122
|
+
pg_config,
|
123
|
+
schema_on_read,
|
124
|
+
rebase_source_partition_locator,
|
125
|
+
rebase_source_partition_high_watermark,
|
126
|
+
deltacat_storage,
|
127
|
+
)
|
158
128
|
if new_partition:
|
159
129
|
partition = new_partition
|
160
|
-
|
130
|
+
destination_partition_locator = new_partition.locator
|
161
131
|
compaction_rounds_executed += 1
|
162
132
|
# Take new primary key index sizes into account for subsequent compaction rounds and their dedupe steps
|
163
133
|
if new_rci:
|
164
134
|
min_pk_index_pa_bytes = new_rci.pk_index_pyarrow_write_result.pyarrow_bytes
|
165
135
|
|
166
|
-
logger.info(
|
167
|
-
|
136
|
+
logger.info(
|
137
|
+
f"Partition-{source_partition_locator.partition_values}-> Compaction session data processing completed in "
|
138
|
+
f"{compaction_rounds_executed} rounds."
|
139
|
+
)
|
168
140
|
if partition:
|
169
141
|
logger.info(f"Committing compacted partition to: {partition.locator}")
|
170
142
|
partition = deltacat_storage.commit_partition(partition)
|
171
143
|
logger.info(f"Committed compacted partition: {partition}")
|
172
144
|
logger.info(f"Completed compaction session for: {source_partition_locator}")
|
145
|
+
return new_rcf_s3_url
|
173
146
|
|
174
147
|
|
175
|
-
@ray.remote
|
176
|
-
def get_metadata(deltacat_storage, delta):
|
177
|
-
return len(deltacat_storage.get_delta_manifest(delta).entries)
|
178
|
-
@ray.remote(num_cpus=1,num_returns=3,max_retries=1)
|
148
|
+
@ray.remote(num_cpus=0.1, num_returns=3)
|
179
149
|
def _execute_compaction_round(
|
180
|
-
|
181
|
-
|
182
|
-
|
183
|
-
|
184
|
-
|
185
|
-
|
186
|
-
|
187
|
-
|
188
|
-
|
189
|
-
|
190
|
-
|
191
|
-
|
192
|
-
|
193
|
-
|
194
|
-
|
195
|
-
|
196
|
-
|
197
|
-
|
198
|
-
|
199
|
-
|
200
|
-
|
201
|
-
deltacat_storage = unimplemented_deltacat_storage,
|
202
|
-
pg_config: Optional[List[Dict[str, Any]]] = None) \
|
203
|
-
-> Tuple[bool, Optional[Partition], Optional[RoundCompletionInfo]]:
|
204
|
-
|
205
|
-
|
206
|
-
pre_hb_start = time.time()
|
150
|
+
source_partition_locator: PartitionLocator,
|
151
|
+
compacted_partition_locator: PartitionLocator,
|
152
|
+
primary_keys: Set[str],
|
153
|
+
compaction_artifact_s3_bucket: str,
|
154
|
+
last_stream_position_to_compact: int,
|
155
|
+
new_hash_bucket_count: Optional[int],
|
156
|
+
sort_keys: List[SortKey],
|
157
|
+
records_per_primary_key_index_file: int,
|
158
|
+
records_per_compacted_file: int,
|
159
|
+
input_deltas_stats: Dict[int, DeltaStats],
|
160
|
+
min_pk_index_pa_bytes: int,
|
161
|
+
min_hash_bucket_chunk_size: int,
|
162
|
+
compacted_file_content_type: ContentType,
|
163
|
+
delete_prev_primary_key_index: bool,
|
164
|
+
pg_config: Optional[PlacementGroupConfig],
|
165
|
+
schema_on_read: Optional[pa.schema],
|
166
|
+
rebase_source_partition_locator: Optional[PartitionLocator],
|
167
|
+
rebase_source_partition_high_watermark: Optional[int],
|
168
|
+
deltacat_storage=unimplemented_deltacat_storage,
|
169
|
+
) -> Tuple[bool, Optional[Partition], Optional[RoundCompletionInfo], Optional[str]]:
|
170
|
+
|
207
171
|
if not primary_keys:
|
208
172
|
# TODO (pdames): run simple rebatch to reduce all deltas into 1 delta
|
209
173
|
# with normalized manifest entry sizes
|
210
174
|
raise NotImplementedError(
|
211
|
-
"Compaction only supports tables with 1 or more primary keys"
|
175
|
+
"Compaction only supports tables with 1 or more primary keys"
|
176
|
+
)
|
212
177
|
if sort_keys is None:
|
213
178
|
sort_keys = []
|
214
179
|
# TODO (pdames): detect and handle schema evolution (at least ensure that
|
@@ -229,40 +194,30 @@ def _execute_compaction_round(
|
|
229
194
|
# sort primary keys to produce the same pk digest regardless of input order
|
230
195
|
primary_keys = sorted(primary_keys)
|
231
196
|
|
232
|
-
# collect cluster resource stats
|
233
|
-
# cluster_resources = ray.cluster_resources()
|
234
|
-
# logger.info(f"Total cluster resources: {cluster_resources}")
|
235
|
-
# logger.info(f"Available cluster resources: {ray.available_resources()}")
|
236
|
-
# cluster_cpus = int(cluster_resources["CPU"])
|
237
|
-
# logger.info(f"Total cluster CPUs: {cluster_cpus}")
|
238
|
-
|
239
|
-
# collect node group resources
|
240
|
-
|
241
197
|
cluster_resources = ray.cluster_resources()
|
242
198
|
logger.info(f"Total cluster resources: {cluster_resources}")
|
243
|
-
|
244
|
-
|
245
|
-
cluster_resources = pg_config
|
246
|
-
cluster_cpus = cluster_resources[
|
247
|
-
|
248
|
-
else: # use all cluster resource
|
199
|
+
node_resource_keys = None
|
200
|
+
if pg_config: # use resource in each placement group
|
201
|
+
cluster_resources = pg_config.resource
|
202
|
+
cluster_cpus = cluster_resources["CPU"]
|
203
|
+
else: # use all cluster resource
|
249
204
|
logger.info(f"Available cluster resources: {ray.available_resources()}")
|
250
205
|
cluster_cpus = int(cluster_resources["CPU"])
|
251
206
|
logger.info(f"Total cluster CPUs: {cluster_cpus}")
|
252
207
|
node_resource_keys = live_node_resource_keys()
|
253
|
-
logger.info(
|
254
|
-
|
255
|
-
|
256
|
-
if node_resource_keys:
|
257
|
-
# create a remote options provider to round-robin tasks across all nodes
|
258
|
-
logger.info(f"Setting round robin scheduling with node id:{node_resource_keys}")
|
259
|
-
round_robin_opt_provider = functools.partial(
|
260
|
-
round_robin_options_provider,
|
261
|
-
resource_keys=node_resource_keys,
|
208
|
+
logger.info(
|
209
|
+
f"Found {len(node_resource_keys)} live cluster nodes: "
|
210
|
+
f"{node_resource_keys}"
|
262
211
|
)
|
263
|
-
|
264
|
-
|
265
|
-
|
212
|
+
|
213
|
+
# create a remote options provider to round-robin tasks across all nodes or allocated bundles
|
214
|
+
logger.info(f"Setting round robin scheduling with node id:{node_resource_keys}")
|
215
|
+
round_robin_opt_provider = functools.partial(
|
216
|
+
round_robin_options_provider,
|
217
|
+
resource_keys=node_resource_keys,
|
218
|
+
pg_config=pg_config.opts if pg_config else None,
|
219
|
+
)
|
220
|
+
|
266
221
|
# assign a distinct index to each node in the cluster
|
267
222
|
# head_node_ip = urllib.request.urlopen(
|
268
223
|
# "http://169.254.169.254/latest/meta-data/local-ipv4"
|
@@ -273,20 +228,10 @@ def _execute_compaction_round(
|
|
273
228
|
# we assume here that we're running on a fixed-size cluster - this
|
274
229
|
# assumption could be removed but we'd still need to know the maximum
|
275
230
|
# "safe" number of parallel tasks that our autoscaling cluster could handle
|
276
|
-
|
277
|
-
|
278
|
-
else:
|
279
|
-
max_parallelism = [int(cluster_cpus) for _ in range(3)]
|
280
|
-
logger.info(f"Max parallelism for each steps: {max_parallelism}")
|
281
|
-
|
282
|
-
|
283
|
-
if not num_cpus:
|
284
|
-
num_cpus=[1,1,1] # allocate 1 cpu for each task (hb, dd or mat)
|
285
|
-
|
286
|
-
logger.info(f"Number of cpus for each steps: {num_cpus}")
|
231
|
+
max_parallelism = int(cluster_cpus)
|
232
|
+
logger.info(f"Max parallelism: {max_parallelism}")
|
287
233
|
|
288
234
|
# get the root path of a compatible primary key index for this round
|
289
|
-
_PRIMARY_KEY_INDEX_ALGORITHM_VERSION=ray.get(states.PRIMARY_KEY_INDEX_ALGORITHM_VERSION.remote())
|
290
235
|
compatible_primary_key_index_meta = PrimaryKeyIndexMeta.of(
|
291
236
|
compacted_partition_locator,
|
292
237
|
primary_keys,
|
@@ -294,38 +239,58 @@ def _execute_compaction_round(
|
|
294
239
|
_PRIMARY_KEY_INDEX_ALGORITHM_VERSION,
|
295
240
|
)
|
296
241
|
compatible_primary_key_index_locator = PrimaryKeyIndexLocator.of(
|
297
|
-
compatible_primary_key_index_meta
|
298
|
-
|
242
|
+
compatible_primary_key_index_meta
|
243
|
+
)
|
244
|
+
compatible_primary_key_index_root_path = (
|
299
245
|
compatible_primary_key_index_locator.primary_key_index_root_path
|
246
|
+
)
|
300
247
|
|
301
248
|
# read the results from any previously completed compaction round that used
|
302
249
|
# a compatible primary key index
|
303
250
|
round_completion_info = None
|
304
|
-
if
|
251
|
+
if not rebase_source_partition_locator:
|
252
|
+
logger.info(
|
253
|
+
f"Reading round completion file for compatible "
|
254
|
+
f"primary key index root path: {compatible_primary_key_index_root_path}"
|
255
|
+
)
|
305
256
|
round_completion_info = rcf.read_round_completion_file(
|
306
257
|
compaction_artifact_s3_bucket,
|
307
258
|
source_partition_locator,
|
308
259
|
compatible_primary_key_index_root_path,
|
309
260
|
)
|
261
|
+
logger.info(f"Round completion file: {round_completion_info}")
|
310
262
|
|
311
263
|
# read the previous compaction round's hash bucket count, if any
|
312
264
|
old_hash_bucket_count = None
|
313
265
|
if round_completion_info:
|
314
|
-
old_pki_version_locator =
|
315
|
-
.primary_key_index_version_locator
|
316
|
-
|
317
|
-
|
318
|
-
.hash_bucket_count
|
319
|
-
|
266
|
+
old_pki_version_locator = (
|
267
|
+
round_completion_info.primary_key_index_version_locator
|
268
|
+
)
|
269
|
+
old_hash_bucket_count = (
|
270
|
+
old_pki_version_locator.primary_key_index_version_meta.hash_bucket_count
|
271
|
+
)
|
272
|
+
min_pk_index_pa_bytes = (
|
273
|
+
round_completion_info.pk_index_pyarrow_write_result.pyarrow_bytes
|
274
|
+
)
|
275
|
+
else:
|
276
|
+
logger.info(
|
277
|
+
f"No prior round info read. Source partition: "
|
278
|
+
f"{source_partition_locator}. Primary key index locator: "
|
279
|
+
f"{compatible_primary_key_index_locator}. Rebase source "
|
280
|
+
f"partition locator: {rebase_source_partition_locator}"
|
281
|
+
)
|
320
282
|
|
321
283
|
# use the new hash bucket count if provided, or fall back to old count
|
322
|
-
hash_bucket_count =
|
323
|
-
|
284
|
+
hash_bucket_count = (
|
285
|
+
new_hash_bucket_count
|
286
|
+
if new_hash_bucket_count is not None
|
324
287
|
else old_hash_bucket_count
|
288
|
+
)
|
325
289
|
|
326
290
|
# discover input delta files
|
327
|
-
high_watermark =
|
328
|
-
if round_completion_info else None
|
291
|
+
high_watermark = (
|
292
|
+
round_completion_info.high_watermark if round_completion_info else None
|
293
|
+
)
|
329
294
|
|
330
295
|
input_deltas = io.discover_deltas(
|
331
296
|
source_partition_locator,
|
@@ -336,99 +301,72 @@ def _execute_compaction_round(
|
|
336
301
|
|
337
302
|
if not input_deltas:
|
338
303
|
logger.info("No input deltas found to compact.")
|
339
|
-
return False, None, None
|
304
|
+
return False, None, None, None
|
340
305
|
|
341
306
|
# limit the input deltas to fit on this cluster and convert them to
|
342
307
|
# annotated deltas of equivalent size for easy parallel distribution
|
343
308
|
|
344
|
-
|
345
|
-
|
346
|
-
|
347
|
-
|
348
|
-
|
349
|
-
|
350
|
-
|
351
|
-
|
352
|
-
|
353
|
-
|
309
|
+
(
|
310
|
+
uniform_deltas,
|
311
|
+
hash_bucket_count,
|
312
|
+
last_stream_position_compacted,
|
313
|
+
) = io.limit_input_deltas(
|
314
|
+
input_deltas,
|
315
|
+
cluster_resources,
|
316
|
+
hash_bucket_count,
|
317
|
+
min_pk_index_pa_bytes,
|
318
|
+
min_hash_bucket_chunk_size,
|
319
|
+
input_deltas_stats=input_deltas_stats,
|
320
|
+
deltacat_storage=deltacat_storage,
|
321
|
+
)
|
354
322
|
|
355
|
-
|
356
|
-
|
357
|
-
|
358
|
-
|
359
|
-
#TODO: use stats, otherwise too slow to get all manifest's metadata
|
360
|
-
TOTAL_ENTRIES = 722451
|
361
|
-
TOTAL_DELTAS = len(input_deltas)
|
362
|
-
ray.get(states.update_entry.remote(TOTAL_ENTRIES))
|
363
|
-
ray.get(states.update_delta.remote(TOTAL_DELTAS))
|
364
|
-
logger.info(f"Estimated Rounds:{TOTAL_ENTRIES/uniform_deltas_entries}")
|
365
|
-
TOTAL_ROUNDS = TOTAL_ENTRIES/uniform_deltas_entries
|
366
|
-
ray.get(states.update_round.remote(TOTAL_ROUNDS))
|
367
|
-
TOTAL_ROUNDS = ray.get(states.TOTAL_ROUNDS.remote())
|
368
|
-
TOTAL_ENTRIES = ray.get(states.TOTAL_ENTRIES.remote())
|
369
|
-
TOTAL_DELTAS = ray.get(states.TOTAL_DELTAS.remote())
|
370
|
-
logger.info(f"Round {round_id}/{TOTAL_ROUNDS}: {uniform_deltas_entries}/{TOTAL_ENTRIES} entries in total deltas {TOTAL_DELTAS}")
|
371
|
-
print(f"Round {round_id}/{TOTAL_ROUNDS}: {uniform_deltas_entries}/{TOTAL_ENTRIES} entries in total deltas {TOTAL_DELTAS}")
|
372
|
-
|
373
|
-
assert hash_bucket_count is not None and hash_bucket_count > 0, \
|
374
|
-
f"Unexpected Error: Default hash bucket count ({hash_bucket_count}) " \
|
375
|
-
f"is invalid."
|
323
|
+
assert hash_bucket_count is not None and hash_bucket_count > 0, (
|
324
|
+
f"Expected hash bucket count to be a positive integer, but found "
|
325
|
+
f"`{hash_bucket_count}`"
|
326
|
+
)
|
376
327
|
|
377
328
|
# rehash the primary key index if necessary
|
378
|
-
round_completion_info = None
|
379
329
|
if round_completion_info:
|
380
330
|
logger.info(f"Round completion file contents: {round_completion_info}")
|
381
331
|
# the previous primary key index is compatible with the current, but
|
382
332
|
# will need to be rehashed if the hash bucket count has changed
|
383
333
|
if hash_bucket_count != old_hash_bucket_count:
|
334
|
+
# TODO(draghave): manually test the path after prior primary key
|
335
|
+
# index was already built
|
384
336
|
round_completion_info = pki.rehash(
|
385
337
|
round_robin_opt_provider,
|
386
338
|
compaction_artifact_s3_bucket,
|
387
339
|
source_partition_locator,
|
388
340
|
round_completion_info,
|
389
341
|
hash_bucket_count,
|
390
|
-
max_parallelism
|
342
|
+
max_parallelism,
|
391
343
|
records_per_primary_key_index_file,
|
392
344
|
delete_prev_primary_key_index,
|
393
345
|
)
|
394
|
-
else:
|
395
|
-
logger.info(f"No prior round completion file found. Source partition: "
|
396
|
-
f"{source_partition_locator}. Primary key index locator: "
|
397
|
-
f"{compatible_primary_key_index_locator}")
|
398
346
|
|
399
|
-
|
400
|
-
hb_start = time.time()
|
401
|
-
logger.info(f"adhoc_rootliu, Round {round_id} Pre-Hash bucket took:{hb_start-pre_hb_start} seconds")
|
402
|
-
print(f"adhoc_rootliu, Round {round_id} Pre-Hash bucket took:{hb_start-pre_hb_start} seconds")
|
403
347
|
# parallel step 1:
|
404
348
|
# group like primary keys together by hashing them into buckets
|
405
349
|
hb_tasks_pending = invoke_parallel(
|
406
350
|
items=uniform_deltas,
|
407
351
|
ray_task=hb.hash_bucket,
|
408
|
-
max_parallelism=max_parallelism
|
409
|
-
num_cpus = num_cpus[0],
|
352
|
+
max_parallelism=max_parallelism,
|
410
353
|
options_provider=round_robin_opt_provider,
|
411
354
|
primary_keys=primary_keys,
|
412
355
|
sort_keys=sort_keys,
|
413
356
|
num_buckets=hash_bucket_count,
|
414
|
-
num_groups=max_parallelism
|
415
|
-
ignore_missing_manifest=ignore_missing_manifest,
|
357
|
+
num_groups=max_parallelism,
|
416
358
|
deltacat_storage=deltacat_storage,
|
417
359
|
)
|
418
360
|
logger.info(f"Getting {len(hb_tasks_pending)} hash bucket results...")
|
419
361
|
hb_results = ray.get([t[0] for t in hb_tasks_pending])
|
420
|
-
print(f"adhoc_rootliu, Round {round_id} Got {len(hb_results)} hash bucket results.")
|
421
362
|
logger.info(f"Got {len(hb_results)} hash bucket results.")
|
422
363
|
all_hash_group_idx_to_obj_id = defaultdict(list)
|
423
364
|
for hash_group_idx_to_obj_id in hb_results:
|
424
365
|
for hash_group_index, object_id in enumerate(hash_group_idx_to_obj_id):
|
425
366
|
if object_id:
|
426
367
|
all_hash_group_idx_to_obj_id[hash_group_index].append(object_id)
|
427
|
-
hash_group_count =
|
368
|
+
hash_group_count = len(all_hash_group_idx_to_obj_id)
|
428
369
|
logger.info(f"Hash bucket groups created: {hash_group_count}")
|
429
|
-
hb_end = time.time()
|
430
|
-
logger.info(f"adhoc_rootliu, Round {round_id} Hash bucket took:{hb_end-hb_start} seconds")
|
431
|
-
print(f"adhoc_rootliu, Round {round_id} Hash bucket took:{hb_end-hb_start} seconds")
|
432
370
|
|
433
371
|
# TODO (pdames): when resources are freed during the last round of hash
|
434
372
|
# bucketing, start running dedupe tasks that read existing dedupe
|
@@ -455,9 +393,11 @@ def _execute_compaction_round(
|
|
455
393
|
_PRIMARY_KEY_INDEX_ALGORITHM_VERSION,
|
456
394
|
)
|
457
395
|
new_primary_key_index_locator = PrimaryKeyIndexLocator.of(
|
458
|
-
new_primary_key_index_meta
|
459
|
-
|
460
|
-
|
396
|
+
new_primary_key_index_meta
|
397
|
+
)
|
398
|
+
new_primary_key_index_root_path = (
|
399
|
+
new_primary_key_index_locator.primary_key_index_root_path
|
400
|
+
)
|
461
401
|
|
462
402
|
# generate a new primary key index version locator for this round
|
463
403
|
new_primary_key_index_version_meta = PrimaryKeyIndexVersionMeta.of(
|
@@ -465,53 +405,48 @@ def _execute_compaction_round(
|
|
465
405
|
hash_bucket_count,
|
466
406
|
)
|
467
407
|
new_pki_version_locator = PrimaryKeyIndexVersionLocator.generate(
|
468
|
-
new_primary_key_index_version_meta
|
469
|
-
|
408
|
+
new_primary_key_index_version_meta
|
409
|
+
)
|
470
410
|
|
471
411
|
# parallel step 2:
|
472
412
|
# discover records with duplicate primary keys in each hash bucket, and
|
473
413
|
# identify the index of records to keep or drop based on sort keys
|
474
|
-
num_materialize_buckets = max_parallelism
|
414
|
+
num_materialize_buckets = max_parallelism
|
475
415
|
logger.info(f"Materialize Bucket Count: {num_materialize_buckets}")
|
476
|
-
record_counts_pending_materialize = \
|
477
|
-
dd.RecordCountsPendingMaterialize.remote(dedupe_task_count)
|
478
416
|
dd_tasks_pending = invoke_parallel(
|
479
417
|
items=all_hash_group_idx_to_obj_id.values(),
|
480
418
|
ray_task=dd.dedupe,
|
481
|
-
max_parallelism=max_parallelism
|
482
|
-
num_cpus = num_cpus[1],
|
419
|
+
max_parallelism=max_parallelism,
|
483
420
|
options_provider=round_robin_opt_provider,
|
484
|
-
kwargs_provider=lambda index, item: {
|
485
|
-
|
421
|
+
kwargs_provider=lambda index, item: {
|
422
|
+
"dedupe_task_index": index,
|
423
|
+
"object_ids": item,
|
424
|
+
},
|
486
425
|
compaction_artifact_s3_bucket=compaction_artifact_s3_bucket,
|
487
426
|
round_completion_info=round_completion_info,
|
488
427
|
new_primary_key_index_version_locator=new_pki_version_locator,
|
489
428
|
sort_keys=sort_keys,
|
490
429
|
max_records_per_index_file=records_per_primary_key_index_file,
|
491
|
-
max_records_per_materialized_file=records_per_compacted_file,
|
492
430
|
num_materialize_buckets=num_materialize_buckets,
|
493
431
|
delete_old_primary_key_index=delete_prev_primary_key_index,
|
494
|
-
record_counts_pending_materialize=record_counts_pending_materialize,
|
495
432
|
)
|
496
433
|
logger.info(f"Getting {len(dd_tasks_pending)} dedupe results...")
|
497
434
|
dd_results = ray.get([t[0] for t in dd_tasks_pending])
|
498
435
|
logger.info(f"Got {len(dd_results)} dedupe results.")
|
499
|
-
print((f"adhoc_rootliu, Round {round_id} Got {len(dd_results)} dedupe results."))
|
500
436
|
all_mat_buckets_to_obj_id = defaultdict(list)
|
501
437
|
for mat_bucket_idx_to_obj_id in dd_results:
|
502
|
-
for
|
503
|
-
|
438
|
+
for (
|
439
|
+
bucket_idx,
|
440
|
+
dd_task_index_and_object_id_tuple,
|
441
|
+
) in mat_bucket_idx_to_obj_id.items():
|
504
442
|
all_mat_buckets_to_obj_id[bucket_idx].append(
|
505
|
-
dd_task_index_and_object_id_tuple
|
443
|
+
dd_task_index_and_object_id_tuple
|
444
|
+
)
|
506
445
|
logger.info(f"Getting {len(dd_tasks_pending)} dedupe result stat(s)...")
|
507
446
|
pki_stats = ray.get([t[2] for t in dd_tasks_pending])
|
508
447
|
logger.info(f"Got {len(pki_stats)} dedupe result stat(s).")
|
509
|
-
logger.info(f"Materialize buckets created: "
|
510
|
-
f"{len(all_mat_buckets_to_obj_id)}")
|
448
|
+
logger.info(f"Materialize buckets created: " f"{len(all_mat_buckets_to_obj_id)}")
|
511
449
|
|
512
|
-
dd_end = time.time()
|
513
|
-
logger.info(f"adhoc_rootliu, Round {round_id} dedupe took:{dd_end-hb_end} seconds")
|
514
|
-
print(f"adhoc_rootliu, Round {round_id} dedupe took:{dd_end-hb_end} seconds")
|
515
450
|
# TODO(pdames): when resources are freed during the last round of deduping
|
516
451
|
# start running materialize tasks that read materialization source file
|
517
452
|
# tables from S3 then wait for deduping to finish before continuing
|
@@ -527,12 +462,11 @@ def _execute_compaction_round(
|
|
527
462
|
mat_tasks_pending = invoke_parallel(
|
528
463
|
items=all_mat_buckets_to_obj_id.items(),
|
529
464
|
ray_task=mat.materialize,
|
530
|
-
max_parallelism=max_parallelism
|
531
|
-
num_cpus = num_cpus[2],
|
465
|
+
max_parallelism=max_parallelism,
|
532
466
|
options_provider=round_robin_opt_provider,
|
533
|
-
kwargs_provider=lambda index,
|
534
|
-
"mat_bucket_index":
|
535
|
-
"dedupe_task_idx_and_obj_id_tuples":
|
467
|
+
kwargs_provider=lambda index, mat_bucket_index_to_obj_id: {
|
468
|
+
"mat_bucket_index": mat_bucket_index_to_obj_id[0],
|
469
|
+
"dedupe_task_idx_and_obj_id_tuples": mat_bucket_index_to_obj_id[1],
|
536
470
|
},
|
537
471
|
schema=schema_on_read,
|
538
472
|
round_completion_info=round_completion_info,
|
@@ -545,43 +479,52 @@ def _execute_compaction_round(
|
|
545
479
|
logger.info(f"Getting {len(mat_tasks_pending)} materialize result(s)...")
|
546
480
|
mat_results = ray.get(mat_tasks_pending)
|
547
481
|
logger.info(f"Got {len(mat_results)} materialize result(s).")
|
548
|
-
print(f"adhoc_rootliu, Round {round_id} Got {len(mat_results)} materialize result(s).")
|
549
482
|
|
550
|
-
mat_end = time.time()
|
551
|
-
logger.info(f"adhoc_rootliu, Round {round_id} mat took:{mat_end-dd_end} seconds")
|
552
|
-
print(f"adhoc_rootliu, Round {round_id} mat took:{mat_end-dd_end} seconds")
|
553
483
|
mat_results = sorted(mat_results, key=lambda m: m.task_index)
|
554
484
|
deltas = [m.delta for m in mat_results]
|
555
485
|
merged_delta = Delta.merge_deltas(deltas)
|
556
486
|
compacted_delta = deltacat_storage.commit_delta(merged_delta)
|
557
487
|
logger.info(f"Committed compacted delta: {compacted_delta}")
|
558
|
-
|
559
|
-
logger.info(f"adhoc_rootliu, Round {round_id} commit took:{commit_end-mat_end} seconds")
|
560
|
-
print(f"adhoc_rootliu, Round {round_id} commit took:{commit_end-mat_end} seconds")
|
488
|
+
|
561
489
|
new_compacted_delta_locator = DeltaLocator.of(
|
562
490
|
new_compacted_partition_locator,
|
563
491
|
compacted_delta.stream_position,
|
564
492
|
)
|
565
493
|
|
566
|
-
|
567
|
-
|
494
|
+
rci_high_watermark = (
|
495
|
+
rebase_source_partition_high_watermark
|
496
|
+
if rebase_source_partition_high_watermark
|
497
|
+
else last_stream_position_compacted
|
498
|
+
)
|
499
|
+
new_round_completion_info = RoundCompletionInfo.of(
|
500
|
+
rci_high_watermark,
|
568
501
|
new_compacted_delta_locator,
|
569
|
-
PyArrowWriteResult.union([m.pyarrow_write_result
|
570
|
-
for m in mat_results]),
|
502
|
+
PyArrowWriteResult.union([m.pyarrow_write_result for m in mat_results]),
|
571
503
|
PyArrowWriteResult.union(pki_stats),
|
572
504
|
bit_width_of_sort_keys,
|
573
505
|
new_pki_version_locator,
|
506
|
+
rebase_source_partition_locator
|
507
|
+
or round_completion_info.rebase_source_partition_locator,
|
574
508
|
)
|
575
|
-
|
509
|
+
rcf_source_partition_locator = (
|
510
|
+
rebase_source_partition_locator
|
511
|
+
if rebase_source_partition_locator
|
512
|
+
else source_partition_locator
|
513
|
+
)
|
514
|
+
round_completion_file_s3_url = rcf.write_round_completion_file(
|
576
515
|
compaction_artifact_s3_bucket,
|
577
|
-
|
516
|
+
rcf_source_partition_locator,
|
578
517
|
new_primary_key_index_root_path,
|
579
|
-
|
518
|
+
new_round_completion_info,
|
519
|
+
)
|
520
|
+
logger.info(
|
521
|
+
f"partition-{source_partition_locator.partition_values},"
|
522
|
+
f"compacted at: {last_stream_position_compacted},"
|
523
|
+
f"last position: {last_stream_position_to_compact}"
|
524
|
+
)
|
525
|
+
return (
|
526
|
+
(last_stream_position_compacted < last_stream_position_to_compact),
|
527
|
+
partition,
|
528
|
+
new_round_completion_info,
|
529
|
+
round_completion_file_s3_url,
|
580
530
|
)
|
581
|
-
time_mat_e = time.time()
|
582
|
-
logger.info(f"partition-{source_partition_locator.partition_values},compacted at:{last_stream_position_compacted}, last position:{last_stream_position_to_compact}")
|
583
|
-
return \
|
584
|
-
(last_stream_position_compacted < last_stream_position_to_compact), \
|
585
|
-
partition, \
|
586
|
-
round_completion_info
|
587
|
-
|