deltacat 0.1.8__py3-none-any.whl → 0.1.11__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- deltacat/__init__.py +41 -15
- deltacat/aws/clients.py +12 -31
- deltacat/aws/constants.py +1 -1
- deltacat/aws/redshift/__init__.py +7 -2
- deltacat/aws/redshift/model/manifest.py +54 -50
- deltacat/aws/s3u.py +188 -218
- deltacat/catalog/delegate.py +151 -185
- deltacat/catalog/interface.py +78 -97
- deltacat/catalog/model/catalog.py +21 -21
- deltacat/catalog/model/table_definition.py +11 -9
- deltacat/compute/compactor/__init__.py +12 -16
- deltacat/compute/compactor/compaction_session.py +259 -316
- deltacat/compute/compactor/model/delta_annotated.py +60 -44
- deltacat/compute/compactor/model/delta_file_envelope.py +5 -6
- deltacat/compute/compactor/model/delta_file_locator.py +10 -8
- deltacat/compute/compactor/model/materialize_result.py +6 -7
- deltacat/compute/compactor/model/primary_key_index.py +38 -34
- deltacat/compute/compactor/model/pyarrow_write_result.py +3 -4
- deltacat/compute/compactor/model/round_completion_info.py +25 -19
- deltacat/compute/compactor/model/sort_key.py +18 -15
- deltacat/compute/compactor/steps/dedupe.py +152 -259
- deltacat/compute/compactor/steps/hash_bucket.py +57 -73
- deltacat/compute/compactor/steps/materialize.py +138 -99
- deltacat/compute/compactor/steps/rehash/rehash_bucket.py +13 -13
- deltacat/compute/compactor/steps/rehash/rewrite_index.py +11 -13
- deltacat/compute/compactor/utils/io.py +59 -47
- deltacat/compute/compactor/utils/primary_key_index.py +131 -90
- deltacat/compute/compactor/utils/round_completion_file.py +22 -23
- deltacat/compute/compactor/utils/system_columns.py +33 -42
- deltacat/compute/metastats/meta_stats.py +235 -157
- deltacat/compute/metastats/model/partition_stats_dict.py +7 -10
- deltacat/compute/metastats/model/stats_cluster_size_estimator.py +13 -5
- deltacat/compute/metastats/stats.py +95 -64
- deltacat/compute/metastats/utils/io.py +100 -53
- deltacat/compute/metastats/utils/pyarrow_memory_estimation_function.py +5 -2
- deltacat/compute/metastats/utils/ray_utils.py +38 -33
- deltacat/compute/stats/basic.py +107 -69
- deltacat/compute/stats/models/delta_column_stats.py +11 -8
- deltacat/compute/stats/models/delta_stats.py +59 -32
- deltacat/compute/stats/models/delta_stats_cache_result.py +4 -1
- deltacat/compute/stats/models/manifest_entry_stats.py +12 -6
- deltacat/compute/stats/models/stats_result.py +24 -14
- deltacat/compute/stats/utils/intervals.py +16 -9
- deltacat/compute/stats/utils/io.py +86 -51
- deltacat/compute/stats/utils/manifest_stats_file.py +24 -33
- deltacat/constants.py +8 -10
- deltacat/io/__init__.py +2 -2
- deltacat/io/aws/redshift/redshift_datasource.py +157 -143
- deltacat/io/dataset.py +14 -17
- deltacat/io/read_api.py +36 -33
- deltacat/logs.py +94 -42
- deltacat/storage/__init__.py +18 -8
- deltacat/storage/interface.py +196 -213
- deltacat/storage/model/delta.py +45 -51
- deltacat/storage/model/list_result.py +12 -8
- deltacat/storage/model/namespace.py +4 -5
- deltacat/storage/model/partition.py +42 -42
- deltacat/storage/model/stream.py +29 -30
- deltacat/storage/model/table.py +14 -14
- deltacat/storage/model/table_version.py +32 -31
- deltacat/storage/model/types.py +1 -0
- deltacat/tests/stats/test_intervals.py +11 -24
- deltacat/tests/utils/test_record_batch_tables.py +284 -0
- deltacat/types/media.py +3 -4
- deltacat/types/tables.py +31 -21
- deltacat/utils/common.py +5 -11
- deltacat/utils/numpy.py +20 -22
- deltacat/utils/pandas.py +73 -100
- deltacat/utils/performance.py +3 -9
- deltacat/utils/placement.py +276 -231
- deltacat/utils/pyarrow.py +302 -89
- deltacat/utils/ray_utils/collections.py +2 -1
- deltacat/utils/ray_utils/concurrency.py +38 -32
- deltacat/utils/ray_utils/dataset.py +28 -28
- deltacat/utils/ray_utils/performance.py +5 -9
- deltacat/utils/ray_utils/runtime.py +9 -10
- {deltacat-0.1.8.dist-info → deltacat-0.1.11.dist-info}/METADATA +22 -12
- deltacat-0.1.11.dist-info/RECORD +110 -0
- {deltacat-0.1.8.dist-info → deltacat-0.1.11.dist-info}/WHEEL +1 -1
- deltacat/autoscaler/events/__init__.py +0 -0
- deltacat/autoscaler/events/compaction/__init__.py +0 -0
- deltacat/autoscaler/events/compaction/cluster.py +0 -82
- deltacat/autoscaler/events/compaction/collections/__init__.py +0 -0
- deltacat/autoscaler/events/compaction/collections/partition_key_value.py +0 -36
- deltacat/autoscaler/events/compaction/dispatcher.py +0 -28
- deltacat/autoscaler/events/compaction/input.py +0 -27
- deltacat/autoscaler/events/compaction/process.py +0 -25
- deltacat/autoscaler/events/compaction/session_manager.py +0 -13
- deltacat/autoscaler/events/compaction/utils.py +0 -216
- deltacat/autoscaler/events/compaction/workflow.py +0 -303
- deltacat/autoscaler/events/dispatcher.py +0 -95
- deltacat/autoscaler/events/dynamodb/__init__.py +0 -0
- deltacat/autoscaler/events/dynamodb/event_store.py +0 -164
- deltacat/autoscaler/events/event_store.py +0 -55
- deltacat/autoscaler/events/exceptions.py +0 -6
- deltacat/autoscaler/events/processor.py +0 -177
- deltacat/autoscaler/events/session_manager.py +0 -25
- deltacat/autoscaler/events/states.py +0 -88
- deltacat/autoscaler/events/workflow.py +0 -54
- deltacat/autoscaler/node_group.py +0 -230
- deltacat/autoscaler/utils.py +0 -69
- deltacat-0.1.8.dist-info/RECORD +0 -131
- /deltacat/{autoscaler → tests/utils}/__init__.py +0 -0
- {deltacat-0.1.8.dist-info → deltacat-0.1.11.dist-info}/LICENSE +0 -0
- {deltacat-0.1.8.dist-info → deltacat-0.1.11.dist-info}/top_level.txt +0 -0
@@ -1,216 +0,0 @@
|
|
1
|
-
import argparse
|
2
|
-
import base64
|
3
|
-
import gzip
|
4
|
-
import json
|
5
|
-
import logging
|
6
|
-
import pathlib
|
7
|
-
from io import BytesIO
|
8
|
-
|
9
|
-
import math
|
10
|
-
import yaml
|
11
|
-
|
12
|
-
from typing import Dict, Any, List, Tuple, Optional, Set, Union, TextIO
|
13
|
-
|
14
|
-
from deltacat.autoscaler.events.compaction.cluster import ClusterSizeSuggester
|
15
|
-
from deltacat.autoscaler.events.compaction.collections.partition_key_value import PartitionKeyValues, PartitionKeyValue
|
16
|
-
from deltacat.autoscaler.events.compaction.input import CompactionInput
|
17
|
-
from deltacat.autoscaler.events.session_manager import SessionManager, SESSION_ID_KEY
|
18
|
-
from deltacat.compute.compactor.utils import round_completion_file as rcf
|
19
|
-
from deltacat.compute.stats.models.delta_stats import DeltaStats
|
20
|
-
from deltacat.storage import interface as dcs
|
21
|
-
from deltacat import ContentType, logs, SortKey
|
22
|
-
from deltacat.compute.compactor import RoundCompletionInfo, compaction_session, PrimaryKeyIndexMeta, \
|
23
|
-
PrimaryKeyIndexLocator
|
24
|
-
from deltacat.storage import PartitionLocator
|
25
|
-
|
26
|
-
_PRIMARY_KEY_INDEX_ALGORITHM_VERSION: str = "1.0"
|
27
|
-
|
28
|
-
|
29
|
-
logger = logs.configure_deltacat_logger(logging.getLogger(__name__))
|
30
|
-
|
31
|
-
|
32
|
-
def read_latest_round_completion_file(source_partition_locator,
|
33
|
-
compacted_partition_locator,
|
34
|
-
compaction_artifact_s3_bucket,
|
35
|
-
primary_keys,
|
36
|
-
sort_keys: List[SortKey] = None):
|
37
|
-
if sort_keys is None:
|
38
|
-
sort_keys = []
|
39
|
-
# get the root path of a compatible primary key index for this round
|
40
|
-
compatible_primary_key_index_meta = PrimaryKeyIndexMeta.of(
|
41
|
-
compacted_partition_locator,
|
42
|
-
primary_keys,
|
43
|
-
sort_keys,
|
44
|
-
_PRIMARY_KEY_INDEX_ALGORITHM_VERSION,
|
45
|
-
)
|
46
|
-
compatible_primary_key_index_locator = PrimaryKeyIndexLocator.of(
|
47
|
-
compatible_primary_key_index_meta)
|
48
|
-
compatible_primary_key_index_root_path = \
|
49
|
-
compatible_primary_key_index_locator.primary_key_index_root_path
|
50
|
-
|
51
|
-
# read the results from any previously completed compaction round that used
|
52
|
-
# a compatible primary key index
|
53
|
-
round_completion_info = rcf.read_round_completion_file(
|
54
|
-
compaction_artifact_s3_bucket,
|
55
|
-
source_partition_locator,
|
56
|
-
compatible_primary_key_index_root_path,
|
57
|
-
)
|
58
|
-
return round_completion_info
|
59
|
-
|
60
|
-
|
61
|
-
def build_partition_locator(partition_data: Dict[str, Any]):
|
62
|
-
return PartitionLocator.at(
|
63
|
-
partition_data["owner"],
|
64
|
-
partition_data["name"],
|
65
|
-
partition_data.get("tableVersion"),
|
66
|
-
partition_data["streamUUID"],
|
67
|
-
None, # storage type
|
68
|
-
partition_data["partition_values"], # partition values
|
69
|
-
None # partition ID
|
70
|
-
)
|
71
|
-
|
72
|
-
|
73
|
-
def compact(compaction_input: CompactionInput,
|
74
|
-
hash_bucket_count: Optional[int] = None,
|
75
|
-
deltacat_storage=dcs,
|
76
|
-
**kwargs):
|
77
|
-
compaction_session.compact_partition(
|
78
|
-
compaction_input.source_partition_locator,
|
79
|
-
compaction_input.compacted_partition_locator,
|
80
|
-
set(compaction_input.primary_keys),
|
81
|
-
compaction_input.compaction_artifact_s3_bucket,
|
82
|
-
compaction_input.last_stream_position_to_compact,
|
83
|
-
schema_on_read=compaction_input.schema_on_read,
|
84
|
-
input_deltas_stats=compaction_input.input_deltas_stats,
|
85
|
-
hash_bucket_count=hash_bucket_count if hash_bucket_count else compaction_input.hash_bucket_count,
|
86
|
-
deltacat_storage=deltacat_storage,
|
87
|
-
**kwargs
|
88
|
-
)
|
89
|
-
|
90
|
-
|
91
|
-
def calc_new_hash_bucket_count(cluster_memory_bytes: int,
|
92
|
-
max_memory_per_vcpu: int,
|
93
|
-
vcpu_per_node: int):
|
94
|
-
new_hash_bucket_count = max(
|
95
|
-
math.ceil(cluster_memory_bytes / max_memory_per_vcpu),
|
96
|
-
min(vcpu_per_node, 256) # Do not exceed 256 CPUs as a safety measure
|
97
|
-
)
|
98
|
-
|
99
|
-
return new_hash_bucket_count
|
100
|
-
|
101
|
-
|
102
|
-
def get_round_completion_file(
|
103
|
-
source_partition_locator: PartitionLocator,
|
104
|
-
compacted_partition_locator: PartitionLocator,
|
105
|
-
primary_keys: Set[str],
|
106
|
-
compaction_artifact_s3_bucket: str
|
107
|
-
):
|
108
|
-
return read_latest_round_completion_file(source_partition_locator,
|
109
|
-
compacted_partition_locator,
|
110
|
-
compaction_artifact_s3_bucket,
|
111
|
-
sorted(primary_keys))
|
112
|
-
|
113
|
-
|
114
|
-
def calc_compaction_cluster_memory_bytes(compaction_input: CompactionInput,
|
115
|
-
new_uncompacted_deltas_bytes: int = 0) -> int:
|
116
|
-
round_completion_file = get_round_completion_file(compaction_input.source_partition_locator,
|
117
|
-
compaction_input.compacted_partition_locator,
|
118
|
-
compaction_input.primary_keys,
|
119
|
-
compaction_input.compaction_artifact_s3_bucket)
|
120
|
-
if round_completion_file is None:
|
121
|
-
# if no previous compaction rounds exist, use the incoming delta size as a place to start for calculations
|
122
|
-
est_incoming_delta_size = new_uncompacted_deltas_bytes * 1.3
|
123
|
-
logger.warning(f"No previous round completion file found for {compaction_input}."
|
124
|
-
f"Using estimates: {est_incoming_delta_size}")
|
125
|
-
return int(est_incoming_delta_size)
|
126
|
-
|
127
|
-
old_num_records = round_completion_file.compacted_pyarrow_write_result.records
|
128
|
-
sort_keys_bit_width = round_completion_file.sort_keys_bit_width
|
129
|
-
pk_index_row_size = 32 + math.ceil(sort_keys_bit_width / 8)
|
130
|
-
cluster_memory_bytes = max(
|
131
|
-
old_num_records * pk_index_row_size * 1.3, # object store memory (hash bucketing)
|
132
|
-
round_completion_file.compacted_pyarrow_write_result.pyarrow_bytes + new_uncompacted_deltas_bytes # dedupe
|
133
|
-
)
|
134
|
-
return int(cluster_memory_bytes)
|
135
|
-
|
136
|
-
|
137
|
-
def get_compaction_size_inputs(config: Dict[str, Any],
|
138
|
-
partition_key_values: PartitionKeyValues,
|
139
|
-
cluster_memory_bytes: int,
|
140
|
-
stats_metadata: Dict[int, DeltaStats] = None,
|
141
|
-
parent_session_id: str = None,
|
142
|
-
session_id: str = None) -> Tuple[int, TextIO]:
|
143
|
-
suggester = ClusterSizeSuggester(cluster_memory_bytes=cluster_memory_bytes)
|
144
|
-
new_hash_bucket_count = calc_new_hash_bucket_count(cluster_memory_bytes,
|
145
|
-
suggester.get_max_memory_per_vcpu(),
|
146
|
-
suggester.get_num_vcpu_per_node())
|
147
|
-
cluster_cpus = max(
|
148
|
-
new_hash_bucket_count,
|
149
|
-
suggester.get_suggested_vcpu_count()
|
150
|
-
)
|
151
|
-
cluster_nodes = int(math.ceil(cluster_cpus / suggester.get_num_vcpu_per_node()))
|
152
|
-
yaml_file = generate_compaction_session_yaml(config,
|
153
|
-
partition_key_values,
|
154
|
-
worker_node_count=cluster_nodes,
|
155
|
-
instance_type=suggester.instance_type,
|
156
|
-
stats_metadata=stats_metadata,
|
157
|
-
parent_session_id=parent_session_id,
|
158
|
-
session_id=session_id)
|
159
|
-
return new_hash_bucket_count, yaml_file
|
160
|
-
|
161
|
-
|
162
|
-
def generate_compaction_session_yaml(config: Dict[str, Any],
|
163
|
-
partition_key_values: PartitionKeyValues,
|
164
|
-
head_node_count: int = 0,
|
165
|
-
worker_node_count: int = 0,
|
166
|
-
stats_metadata: Dict[int, DeltaStats] = None,
|
167
|
-
instance_type: str = None,
|
168
|
-
parent_session_id: str = None,
|
169
|
-
session_id: str = None) -> TextIO:
|
170
|
-
# TODO: Remove this workaround when custom AMIs are built with baked-in build files (i.e. wheels, jars)
|
171
|
-
new_config = {**config}
|
172
|
-
for local_path, _ in new_config["file_mounts"].items():
|
173
|
-
new_config["file_mounts"][local_path] = local_path
|
174
|
-
pkv_id = partition_key_values.id
|
175
|
-
new_filename = f"compact.{pkv_id}.yaml"
|
176
|
-
new_config["cluster_name"] = f"compaction-session-{pkv_id}"
|
177
|
-
# Allow child clusters to re-use the same SSH key provided from the parent cluster
|
178
|
-
new_config["auth"]["ssh_private_key"] = f"~/ray_bootstrap_key.pem"
|
179
|
-
new_config["file_mounts"] = {
|
180
|
-
**config["file_mounts"],
|
181
|
-
f"~/{new_filename}": f"~/{new_filename}"
|
182
|
-
}
|
183
|
-
new_config["provider"]["use_internal_ips"] = True
|
184
|
-
new_config["max_workers"] = worker_node_count
|
185
|
-
# TODO: Determine optimal object store memory / worker heap memory allocation ratios?
|
186
|
-
new_config["available_node_types"]["ray.worker.default"]["min_workers"] = \
|
187
|
-
new_config["available_node_types"]["ray.worker.default"]["max_workers"] = worker_node_count
|
188
|
-
new_config["available_node_types"]["ray.worker.default"]["node_config"]["InstanceType"] = instance_type
|
189
|
-
new_config["available_node_types"]["ray.head.default"]["node_config"]["InstanceType"] = instance_type
|
190
|
-
|
191
|
-
# TODO: Formalize supported parameter key/values after initial shadow compaction
|
192
|
-
new_events = {
|
193
|
-
**config["events"],
|
194
|
-
"parameters": {
|
195
|
-
**config["events"]["parameters"],
|
196
|
-
SESSION_ID_KEY: session_id,
|
197
|
-
},
|
198
|
-
"metadata": {
|
199
|
-
"partitionKeyValues": compress(partition_key_values).decode('utf-8')
|
200
|
-
}
|
201
|
-
}
|
202
|
-
if stats_metadata:
|
203
|
-
new_events["metadata"]["statsMetadata"] = compress(stats_metadata).decode('utf-8')
|
204
|
-
new_config["events"] = new_events
|
205
|
-
|
206
|
-
with open(new_filename, "w") as yaml_file:
|
207
|
-
yaml.dump(new_config, yaml_file, default_flow_style=False)
|
208
|
-
return yaml_file
|
209
|
-
|
210
|
-
def compress(serializable_obj: Union[Dict, Tuple, List]) -> bytes:
|
211
|
-
json_dict = json.dumps(serializable_obj)
|
212
|
-
out = BytesIO()
|
213
|
-
with gzip.open(out, "wt", encoding="utf-8") as zipfile:
|
214
|
-
zipfile.write(json_dict)
|
215
|
-
|
216
|
-
return base64.b64encode(out.getvalue())
|
@@ -1,303 +0,0 @@
|
|
1
|
-
import json
|
2
|
-
import logging
|
3
|
-
import uuid
|
4
|
-
from typing import List, Union, Dict, Set, Any, Optional
|
5
|
-
|
6
|
-
from deltacat import logs
|
7
|
-
from deltacat.autoscaler.events.compaction.input import CompactionInput
|
8
|
-
from deltacat.autoscaler.events.compaction.process import CompactionProcess
|
9
|
-
from deltacat.autoscaler.events.compaction.utils import calc_compaction_cluster_memory_bytes, get_compaction_size_inputs
|
10
|
-
from deltacat.autoscaler.events.event_store import EventStoreClient
|
11
|
-
from deltacat.autoscaler.events.exceptions import WorkflowException
|
12
|
-
from deltacat.autoscaler.events.session_manager import PARENT_SESSION_ID_KEY, SESSION_ID_KEY
|
13
|
-
from deltacat.autoscaler.events.workflow import EventWorkflow, StateTransitionMap
|
14
|
-
from deltacat.autoscaler.events.compaction.dispatcher import CompactionEventDispatcher
|
15
|
-
from deltacat.autoscaler.events.states import ScriptStartedEvent, ScriptInProgressEvent, \
|
16
|
-
ScriptCompletedEvent, States, ScriptInProgressCustomEvent, RayJobRequestEvent
|
17
|
-
from deltacat.storage import PartitionLocator
|
18
|
-
|
19
|
-
logger = logs.configure_deltacat_logger(logging.getLogger(__name__))
|
20
|
-
|
21
|
-
|
22
|
-
NEW_REQUEST = "NEW"
|
23
|
-
STATS_METADATA_COLLECTION_STARTED = "STATS_METADATA_COLLECTION_STARTED"
|
24
|
-
STATS_METADATA_COLLECTION_COMPLETED = "STATS_METADATA_COLLECTION_COMPLETED"
|
25
|
-
COMPACTION_SESSION_STARTED = "COMPACTION_SESSION_STARTED"
|
26
|
-
COMPACTION_SESSION_PARTITIONS_COMPACTED = "COMPACTION_SESSION_PARTITIONS_COMPACTED"
|
27
|
-
COMPACTION_SESSION_PARTITIONS_FAILURE = "COMPACTION_SESSION_PARTITIONS_FAILURE"
|
28
|
-
COMPACTION_SESSION_PROGRESS_UPDATE = "COMPACTION_SESSION_PROGRESS_UPDATE"
|
29
|
-
COMPACTION_SESSION_COMPLETED = "COMPACTION_SESSION_COMPLETED"
|
30
|
-
COMPACTION_METADATA_SESSION_WORKFLOW_FAILURE = "COMPACTION_METADATA_SESSION_WORKFLOW_FAILURE"
|
31
|
-
|
32
|
-
|
33
|
-
# TODO: Might be worth renaming this later to avoid confusion with Ray Workflows
|
34
|
-
class CompactionWorkflow(EventWorkflow):
|
35
|
-
|
36
|
-
def __init__(self,
|
37
|
-
config: Dict[str, Any],
|
38
|
-
event_dispatcher: CompactionEventDispatcher,
|
39
|
-
event_store: EventStoreClient = None,
|
40
|
-
compaction_inputs: List[CompactionInput] = None):
|
41
|
-
self.config = config
|
42
|
-
self.event_dispatcher = event_dispatcher
|
43
|
-
assert self.event_dispatcher is not None, f"Event dispatcher must be provided to build and transition " \
|
44
|
-
f"to different job states."
|
45
|
-
self.event_store = event_store
|
46
|
-
self._compaction_inputs = compaction_inputs
|
47
|
-
|
48
|
-
# Initialization
|
49
|
-
self._add_state_handlers()
|
50
|
-
self.state_transitions = self._build_state_transitions()
|
51
|
-
self._metastats = {}
|
52
|
-
|
53
|
-
@property
|
54
|
-
def state_transition_map(self) -> StateTransitionMap:
|
55
|
-
return self.state_transitions
|
56
|
-
|
57
|
-
def _add_state_handlers(self):
|
58
|
-
in_progress_custom_events = [
|
59
|
-
ScriptInProgressCustomEvent(COMPACTION_METADATA_SESSION_WORKFLOW_FAILURE, 0),
|
60
|
-
ScriptInProgressCustomEvent(STATS_METADATA_COLLECTION_STARTED, 1),
|
61
|
-
ScriptInProgressCustomEvent(STATS_METADATA_COLLECTION_COMPLETED, 2),
|
62
|
-
ScriptInProgressCustomEvent(COMPACTION_SESSION_STARTED, 3),
|
63
|
-
ScriptInProgressCustomEvent(COMPACTION_SESSION_PARTITIONS_COMPACTED, 4),
|
64
|
-
ScriptInProgressCustomEvent(COMPACTION_SESSION_PROGRESS_UPDATE, 5),
|
65
|
-
ScriptInProgressCustomEvent(COMPACTION_SESSION_COMPLETED, 6),
|
66
|
-
ScriptInProgressCustomEvent(COMPACTION_SESSION_PARTITIONS_FAILURE, 7),
|
67
|
-
]
|
68
|
-
|
69
|
-
self._event_map = event_map = {event.name: event for event in in_progress_custom_events}
|
70
|
-
self.event_dispatcher.add_event_handlers([val for _, val in event_map.items()])
|
71
|
-
|
72
|
-
# This callback is added for tracking new child jobs to be launched
|
73
|
-
self.event_dispatcher.add_event_handlers([RayJobRequestEvent.new_request_delivered])
|
74
|
-
|
75
|
-
def _build_state_transitions(self) -> StateTransitionMap:
|
76
|
-
"""Builds a mapping of event states to state transitioning callbacks, or
|
77
|
-
a dictionary of state transitioning callbacks.
|
78
|
-
|
79
|
-
If an event has state sequences, a dictionary of callbacks is provided
|
80
|
-
with sequences as keys and callback functions as values.
|
81
|
-
|
82
|
-
Returns: a map of event states to callbacks or a dictionary of callbacks
|
83
|
-
"""
|
84
|
-
init_sequence = 0
|
85
|
-
in_progress_sequence = {name: event.state_sequence for name, event in self._event_map.items()}
|
86
|
-
return {
|
87
|
-
States.IN_PROGRESS.name: {
|
88
|
-
init_sequence: self.begin_stats_metadata_collection,
|
89
|
-
in_progress_sequence[STATS_METADATA_COLLECTION_COMPLETED]: self.begin_compaction,
|
90
|
-
in_progress_sequence[COMPACTION_SESSION_STARTED]: self.wait_or_mark_compaction_complete,
|
91
|
-
in_progress_sequence[COMPACTION_SESSION_PARTITIONS_COMPACTED]: self.wait_or_mark_compaction_complete,
|
92
|
-
in_progress_sequence[COMPACTION_SESSION_PROGRESS_UPDATE]: self.wait_or_mark_compaction_complete,
|
93
|
-
in_progress_sequence[COMPACTION_SESSION_PARTITIONS_FAILURE]: self.wait_or_mark_compaction_complete,
|
94
|
-
in_progress_sequence[COMPACTION_SESSION_COMPLETED]: self.complete_job
|
95
|
-
},
|
96
|
-
}
|
97
|
-
|
98
|
-
def register_compaction_inputs(self, compaction_inputs: List[CompactionInput]):
|
99
|
-
"""Extracts and registers a set of partition IDs that need compaction from the compaction inputs.
|
100
|
-
"""
|
101
|
-
self._compaction_inputs = compaction_inputs
|
102
|
-
compaction_source_partition_locators = [task.source_partition_locator for task in self._compaction_inputs]
|
103
|
-
compaction_source_partition_ids = [loc.partition_id for loc in compaction_source_partition_locators]
|
104
|
-
self._partition_ids_to_compact = set(compaction_source_partition_ids)
|
105
|
-
|
106
|
-
def start_workflow(self):
|
107
|
-
"""Publish a job state event that indicates that a request to start a job run has been successfully received,
|
108
|
-
but the job run has not yet finished prerequisite initialization steps.
|
109
|
-
"""
|
110
|
-
self.event_dispatcher.dispatch_event(ScriptInProgressEvent.in_progress)
|
111
|
-
|
112
|
-
def begin_stats_metadata_collection(self):
|
113
|
-
"""Publish a job state event that indicates that stats metadata collection has started.
|
114
|
-
"""
|
115
|
-
event = self._event_map[STATS_METADATA_COLLECTION_STARTED]
|
116
|
-
self.event_dispatcher.dispatch_event(event,
|
117
|
-
event_data={
|
118
|
-
"eventName": event.name,
|
119
|
-
"stateDetailDescription": "Running stats metadata session",
|
120
|
-
})
|
121
|
-
if self.session_manager:
|
122
|
-
self._metastats = self.session_manager.launch_stats_metadata_collection(
|
123
|
-
[compact.source_partition_locator for compact in self._compaction_inputs]
|
124
|
-
)
|
125
|
-
self.stats_metadata_collection_completed()
|
126
|
-
|
127
|
-
def stats_metadata_collection_completed(self):
|
128
|
-
"""Publish a job state event that indicates that stats metadata collection is complete.
|
129
|
-
"""
|
130
|
-
event = self._event_map[STATS_METADATA_COLLECTION_COMPLETED]
|
131
|
-
self.event_dispatcher.dispatch_event(event,
|
132
|
-
event_data={
|
133
|
-
"eventName": event.name,
|
134
|
-
"stateDetailDescription": "Finished collecting stats metadata",
|
135
|
-
})
|
136
|
-
|
137
|
-
def begin_compaction(self):
|
138
|
-
"""Publish a job state event that indicates that the compaction run has started.
|
139
|
-
"""
|
140
|
-
event = self._event_map[COMPACTION_SESSION_STARTED]
|
141
|
-
self.event_dispatcher.dispatch_event(event,
|
142
|
-
event_data={
|
143
|
-
"eventName": event.name,
|
144
|
-
"stateDetailDescription": "Running compaction session",
|
145
|
-
})
|
146
|
-
if self.session_manager:
|
147
|
-
processes = self.build_compaction_processes()
|
148
|
-
self.session_manager.launch_compaction(processes)
|
149
|
-
|
150
|
-
def build_compaction_processes(self) -> List[CompactionProcess]:
|
151
|
-
processes = []
|
152
|
-
partition_stats_metadata = self._metastats
|
153
|
-
for compaction_input in self._compaction_inputs:
|
154
|
-
stats_metadata = partition_stats_metadata.get(compaction_input.source_partition_locator.partition_id, {})
|
155
|
-
stats_metadata = {stream_pos: delta_stats for stream_pos, delta_stats in stats_metadata.items()
|
156
|
-
if stream_pos <= compaction_input.last_stream_position_to_compact}
|
157
|
-
total_pyarrow_table_bytes = sum([stats_result.stats.pyarrow_table_bytes
|
158
|
-
for stream_pos, stats_result in stats_metadata.items()
|
159
|
-
if stats_result.stats is not None])
|
160
|
-
cluster_memory_bytes = calc_compaction_cluster_memory_bytes(compaction_input, total_pyarrow_table_bytes)
|
161
|
-
new_session_id = str(uuid.uuid4())
|
162
|
-
self.event_dispatcher.dispatch_event(RayJobRequestEvent.new_request_delivered,
|
163
|
-
event_data={
|
164
|
-
PARENT_SESSION_ID_KEY: self.session_manager.session_id,
|
165
|
-
SESSION_ID_KEY: new_session_id
|
166
|
-
})
|
167
|
-
new_hash_bucket_count, yaml_file = get_compaction_size_inputs(self.config,
|
168
|
-
compaction_input.partition_key_values,
|
169
|
-
cluster_memory_bytes,
|
170
|
-
stats_metadata=stats_metadata,
|
171
|
-
parent_session_id=self.session_manager.session_id,
|
172
|
-
session_id=new_session_id)
|
173
|
-
compaction_process = CompactionProcess(compaction_input.source_partition_locator,
|
174
|
-
yaml_file.name,
|
175
|
-
new_hash_bucket_count,
|
176
|
-
compaction_input.last_stream_position_to_compact,
|
177
|
-
compaction_input.partition_key_values,
|
178
|
-
cluster_memory_bytes=cluster_memory_bytes,
|
179
|
-
input_delta_total_bytes=total_pyarrow_table_bytes)
|
180
|
-
|
181
|
-
# TODO: Increase file descriptor limit on host (up to ~60k)
|
182
|
-
# TODO: Emit metrics for compaction jobs with very high number of partitions
|
183
|
-
processes.append(compaction_process)
|
184
|
-
return processes
|
185
|
-
|
186
|
-
def partitions_compacted(self,
|
187
|
-
partition_locators: List[PartitionLocator]):
|
188
|
-
"""Publish a job state event that indicates that a single partition has finished compaction.
|
189
|
-
A compaction session can have 1...N partitions to compact.
|
190
|
-
"""
|
191
|
-
partition_completed_event = self._event_map[COMPACTION_SESSION_PARTITIONS_COMPACTED]
|
192
|
-
self.event_dispatcher.dispatch_event(partition_completed_event,
|
193
|
-
event_data={
|
194
|
-
"eventName": partition_completed_event.name,
|
195
|
-
"stateDetailDescription":
|
196
|
-
f"Finished compaction on partitions: "
|
197
|
-
f"{[pl.partition_id for pl in partition_locators]}",
|
198
|
-
"stateDetailMetadata": {
|
199
|
-
**{pl.partition_id: json.dumps(pl.partition_values)
|
200
|
-
for pl in partition_locators}
|
201
|
-
}
|
202
|
-
})
|
203
|
-
|
204
|
-
def partitions_compaction_failure(self,
|
205
|
-
partition_locators: List[PartitionLocator],
|
206
|
-
error_trace: Optional[str] = None ):
|
207
|
-
"""Publish a job state event that indicates failure to compact a list of partitions.
|
208
|
-
"""
|
209
|
-
failed_event = self._event_map[COMPACTION_SESSION_PARTITIONS_FAILURE]
|
210
|
-
self.event_dispatcher.dispatch_event(failed_event,
|
211
|
-
event_data={
|
212
|
-
"eventName": failed_event.name,
|
213
|
-
"errorMessage":
|
214
|
-
f"Failure to compact partitions: "
|
215
|
-
f"{[pl.partition_id for pl in partition_locators]}",
|
216
|
-
"errorStackTrace": error_trace,
|
217
|
-
"stateDetailDescription":
|
218
|
-
f"Failure to compact partitions: "
|
219
|
-
f"{[pl.partition_id for pl in partition_locators]}",
|
220
|
-
"stateDetailMetadata": {
|
221
|
-
**{pl.partition_id: json.dumps(pl.partition_values)
|
222
|
-
for pl in partition_locators}
|
223
|
-
}
|
224
|
-
})
|
225
|
-
|
226
|
-
def update_compaction_job_progress(self,
|
227
|
-
partition_locator: PartitionLocator,
|
228
|
-
session_id: str):
|
229
|
-
"""Dispatch a compaction job update event for a given session ID.
|
230
|
-
|
231
|
-
:param partition_locator: Locator for a partition
|
232
|
-
:param session_id: Session ID to dispatch the event for
|
233
|
-
:return:
|
234
|
-
"""
|
235
|
-
progress_event = self._event_map[COMPACTION_SESSION_PROGRESS_UPDATE]
|
236
|
-
partition_id = partition_locator.partition_id
|
237
|
-
self.event_dispatcher.dispatch_event(progress_event,
|
238
|
-
event_data={
|
239
|
-
PARENT_SESSION_ID_KEY: session_id,
|
240
|
-
SESSION_ID_KEY: session_id,
|
241
|
-
"eventName": progress_event.name,
|
242
|
-
"stateDetailDescription": f"Compaction Update",
|
243
|
-
"stateDetailMetadata": {
|
244
|
-
partition_id: str(partition_locator.partition_values)
|
245
|
-
}
|
246
|
-
})
|
247
|
-
|
248
|
-
def wait_or_mark_compaction_complete(self):
|
249
|
-
"""Publish a job state event that indicates that the compaction run is complete.
|
250
|
-
"""
|
251
|
-
if self.event_store is None or self._partition_ids_to_compact is None:
|
252
|
-
# TODO: Separate this workflow out into multiple workflows, for different applications
|
253
|
-
raise WorkflowException(f"Event store and partition IDs must be defined in a workflow."
|
254
|
-
f"Event store: {self.event_store}"
|
255
|
-
f"Partition IDs to compact: {self._partition_ids_to_compact}")
|
256
|
-
|
257
|
-
partition_ids_failed = set(self.event_store.get_failed_partition_ids(self.trace_id))
|
258
|
-
if len(partition_ids_failed) > 0:
|
259
|
-
raise WorkflowException(f"Compaction workflow failed due to partition errors: {partition_ids_failed}")
|
260
|
-
|
261
|
-
|
262
|
-
partition_ids_completed = set(self.event_store.get_compacted_partition_ids(self.trace_id))
|
263
|
-
if partition_ids_completed == self._partition_ids_to_compact:
|
264
|
-
logger.info(f"Compaction run complete.")
|
265
|
-
event = self._event_map[COMPACTION_SESSION_COMPLETED]
|
266
|
-
self.event_dispatcher.dispatch_event(event,
|
267
|
-
event_data={
|
268
|
-
"eventName": event.name,
|
269
|
-
"stateDetailDescription": "Finished compaction run",
|
270
|
-
})
|
271
|
-
else:
|
272
|
-
logger.info(f"Compaction is in progress: {len(partition_ids_completed)} "
|
273
|
-
f"out of {len(self._partition_ids_to_compact)} partitions completed...")
|
274
|
-
|
275
|
-
def complete_job(self):
|
276
|
-
"""Publish a job state event that indicates that the job run has completed.
|
277
|
-
"""
|
278
|
-
self.event_dispatcher.dispatch_event(ScriptCompletedEvent.completed)
|
279
|
-
|
280
|
-
def workflow_failure(
|
281
|
-
self,
|
282
|
-
error_message: Optional[str] = None,
|
283
|
-
error_trace: Optional[str] = None):
|
284
|
-
"""Publish a job state event that indicates failure to compact a list of partitions.
|
285
|
-
"""
|
286
|
-
failed_workflow_event = self._event_map[COMPACTION_METADATA_SESSION_WORKFLOW_FAILURE]
|
287
|
-
self.event_dispatcher.dispatch_event(failed_workflow_event,
|
288
|
-
event_data={
|
289
|
-
"eventName": failed_workflow_event.name,
|
290
|
-
"errorMessage": error_message,
|
291
|
-
"errorStackTrace": error_trace,
|
292
|
-
"stateDetailDescription":
|
293
|
-
f"Workflow encountered a failure.",
|
294
|
-
"stateDetailStatus": "FAILED",
|
295
|
-
})
|
296
|
-
|
297
|
-
@property
|
298
|
-
def session_manager(self):
|
299
|
-
return self.event_dispatcher.session_manager
|
300
|
-
|
301
|
-
@property
|
302
|
-
def trace_id(self):
|
303
|
-
return self.event_dispatcher.events_publisher.event_base_params.get("traceId", "UNKNOWN_TRACE_ID")
|
@@ -1,95 +0,0 @@
|
|
1
|
-
import logging
|
2
|
-
from typing import Dict, Any, Optional, Callable, List
|
3
|
-
|
4
|
-
from deltacat.autoscaler.events.session_manager import SessionManager, SESSION_ID_KEY, PARENT_SESSION_ID_KEY
|
5
|
-
from deltacat.autoscaler.events.states import event_enum_values
|
6
|
-
from ray.autoscaler._private.event_system import RayEvent, EventPublisher
|
7
|
-
|
8
|
-
from deltacat import logs
|
9
|
-
from deltacat.storage import interface as unimplemented_deltacat_storage
|
10
|
-
|
11
|
-
logger = logs.configure_deltacat_logger(logging.getLogger(__name__))
|
12
|
-
|
13
|
-
|
14
|
-
class EventDispatcher:
|
15
|
-
def __init__(self,
|
16
|
-
events_publisher: EventPublisher,
|
17
|
-
deltacat_storage: unimplemented_deltacat_storage,
|
18
|
-
session_manager: SessionManager = None):
|
19
|
-
"""Constructor for the event dispatcher.
|
20
|
-
|
21
|
-
Intended for usage by Ray parent and child clusters running managed jobs.
|
22
|
-
|
23
|
-
Args:
|
24
|
-
events_publisher: Events manager for publishing events through a cloud provider
|
25
|
-
session_manager: Manager for tracking and launching Ray sessions
|
26
|
-
deltacat_storage: Storage interface for deltacat
|
27
|
-
"""
|
28
|
-
self.events_publisher = events_publisher
|
29
|
-
self.deltacat_storage = deltacat_storage
|
30
|
-
self.session_manager = session_manager
|
31
|
-
|
32
|
-
# Setup event callbacks in the constructor
|
33
|
-
self._add_base_event_handlers()
|
34
|
-
|
35
|
-
def dispatch_event(self,
|
36
|
-
event: RayEvent,
|
37
|
-
event_data: Optional[Dict[str, Any]] = None):
|
38
|
-
"""Generic helper method to dispatch Ray job events
|
39
|
-
|
40
|
-
Args:
|
41
|
-
event: Ray job event to dispatch
|
42
|
-
event_data: Additional metadata for the given event. Optional.
|
43
|
-
|
44
|
-
Returns:
|
45
|
-
|
46
|
-
"""
|
47
|
-
if event_data is None:
|
48
|
-
event_data = {}
|
49
|
-
|
50
|
-
event_data["event_name"] = event
|
51
|
-
if self.session_manager:
|
52
|
-
event_data.setdefault(PARENT_SESSION_ID_KEY, self.session_manager.session_id)
|
53
|
-
event_data.setdefault(SESSION_ID_KEY, self.session_manager.session_id)
|
54
|
-
|
55
|
-
logger.info(f"Dispatching event {event.name} "
|
56
|
-
f"with parent Ray session ID = {event_data[PARENT_SESSION_ID_KEY]} "
|
57
|
-
f"and current Ray session ID = {event_data[SESSION_ID_KEY]}")
|
58
|
-
|
59
|
-
event_payload = {
|
60
|
-
**self.events_publisher.config["parameters"],
|
61
|
-
**event_data
|
62
|
-
}
|
63
|
-
|
64
|
-
# Trim un-required, space intensive data from payload
|
65
|
-
if "statsMetadata" in event_payload:
|
66
|
-
event_payload.pop("statsMetadata")
|
67
|
-
if "partitionsToCompact" in event_payload:
|
68
|
-
event_payload.pop("partitionsToCompact")
|
69
|
-
|
70
|
-
self._publish_event(event_payload)
|
71
|
-
|
72
|
-
def _add_base_event_handlers(self):
|
73
|
-
"""Add callback handlers for base job events
|
74
|
-
"""
|
75
|
-
publisher = self.events_publisher
|
76
|
-
if publisher:
|
77
|
-
for event in event_enum_values:
|
78
|
-
logger.info(f"[{publisher.__class__.__name__}]: Adding callback for event {event.name}")
|
79
|
-
publisher.add_callback(event)
|
80
|
-
|
81
|
-
def add_event_handlers(self, custom_events: List[RayEvent]):
|
82
|
-
"""Add callback handlers for custom job events
|
83
|
-
"""
|
84
|
-
publisher = self.events_publisher
|
85
|
-
if publisher:
|
86
|
-
for event in custom_events:
|
87
|
-
logger.info(f"[{publisher.__class__.__name__}]: Adding callback for event {event.name}")
|
88
|
-
publisher.add_callback(event)
|
89
|
-
|
90
|
-
def _publish_event(self, event_data: Dict[str, Any]):
|
91
|
-
publisher = self.events_publisher
|
92
|
-
if publisher and event_data and event_data.get("event_name"):
|
93
|
-
event: RayEvent = event_data["event_name"]
|
94
|
-
logger.info(f"[{publisher.__class__.__name__}]: Publishing event {event.name}")
|
95
|
-
publisher.publish(event, event_data)
|
File without changes
|