deltacat 0.1.8__py3-none-any.whl → 0.1.11__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- deltacat/__init__.py +41 -15
- deltacat/aws/clients.py +12 -31
- deltacat/aws/constants.py +1 -1
- deltacat/aws/redshift/__init__.py +7 -2
- deltacat/aws/redshift/model/manifest.py +54 -50
- deltacat/aws/s3u.py +188 -218
- deltacat/catalog/delegate.py +151 -185
- deltacat/catalog/interface.py +78 -97
- deltacat/catalog/model/catalog.py +21 -21
- deltacat/catalog/model/table_definition.py +11 -9
- deltacat/compute/compactor/__init__.py +12 -16
- deltacat/compute/compactor/compaction_session.py +259 -316
- deltacat/compute/compactor/model/delta_annotated.py +60 -44
- deltacat/compute/compactor/model/delta_file_envelope.py +5 -6
- deltacat/compute/compactor/model/delta_file_locator.py +10 -8
- deltacat/compute/compactor/model/materialize_result.py +6 -7
- deltacat/compute/compactor/model/primary_key_index.py +38 -34
- deltacat/compute/compactor/model/pyarrow_write_result.py +3 -4
- deltacat/compute/compactor/model/round_completion_info.py +25 -19
- deltacat/compute/compactor/model/sort_key.py +18 -15
- deltacat/compute/compactor/steps/dedupe.py +152 -259
- deltacat/compute/compactor/steps/hash_bucket.py +57 -73
- deltacat/compute/compactor/steps/materialize.py +138 -99
- deltacat/compute/compactor/steps/rehash/rehash_bucket.py +13 -13
- deltacat/compute/compactor/steps/rehash/rewrite_index.py +11 -13
- deltacat/compute/compactor/utils/io.py +59 -47
- deltacat/compute/compactor/utils/primary_key_index.py +131 -90
- deltacat/compute/compactor/utils/round_completion_file.py +22 -23
- deltacat/compute/compactor/utils/system_columns.py +33 -42
- deltacat/compute/metastats/meta_stats.py +235 -157
- deltacat/compute/metastats/model/partition_stats_dict.py +7 -10
- deltacat/compute/metastats/model/stats_cluster_size_estimator.py +13 -5
- deltacat/compute/metastats/stats.py +95 -64
- deltacat/compute/metastats/utils/io.py +100 -53
- deltacat/compute/metastats/utils/pyarrow_memory_estimation_function.py +5 -2
- deltacat/compute/metastats/utils/ray_utils.py +38 -33
- deltacat/compute/stats/basic.py +107 -69
- deltacat/compute/stats/models/delta_column_stats.py +11 -8
- deltacat/compute/stats/models/delta_stats.py +59 -32
- deltacat/compute/stats/models/delta_stats_cache_result.py +4 -1
- deltacat/compute/stats/models/manifest_entry_stats.py +12 -6
- deltacat/compute/stats/models/stats_result.py +24 -14
- deltacat/compute/stats/utils/intervals.py +16 -9
- deltacat/compute/stats/utils/io.py +86 -51
- deltacat/compute/stats/utils/manifest_stats_file.py +24 -33
- deltacat/constants.py +8 -10
- deltacat/io/__init__.py +2 -2
- deltacat/io/aws/redshift/redshift_datasource.py +157 -143
- deltacat/io/dataset.py +14 -17
- deltacat/io/read_api.py +36 -33
- deltacat/logs.py +94 -42
- deltacat/storage/__init__.py +18 -8
- deltacat/storage/interface.py +196 -213
- deltacat/storage/model/delta.py +45 -51
- deltacat/storage/model/list_result.py +12 -8
- deltacat/storage/model/namespace.py +4 -5
- deltacat/storage/model/partition.py +42 -42
- deltacat/storage/model/stream.py +29 -30
- deltacat/storage/model/table.py +14 -14
- deltacat/storage/model/table_version.py +32 -31
- deltacat/storage/model/types.py +1 -0
- deltacat/tests/stats/test_intervals.py +11 -24
- deltacat/tests/utils/test_record_batch_tables.py +284 -0
- deltacat/types/media.py +3 -4
- deltacat/types/tables.py +31 -21
- deltacat/utils/common.py +5 -11
- deltacat/utils/numpy.py +20 -22
- deltacat/utils/pandas.py +73 -100
- deltacat/utils/performance.py +3 -9
- deltacat/utils/placement.py +276 -231
- deltacat/utils/pyarrow.py +302 -89
- deltacat/utils/ray_utils/collections.py +2 -1
- deltacat/utils/ray_utils/concurrency.py +38 -32
- deltacat/utils/ray_utils/dataset.py +28 -28
- deltacat/utils/ray_utils/performance.py +5 -9
- deltacat/utils/ray_utils/runtime.py +9 -10
- {deltacat-0.1.8.dist-info → deltacat-0.1.11.dist-info}/METADATA +22 -12
- deltacat-0.1.11.dist-info/RECORD +110 -0
- {deltacat-0.1.8.dist-info → deltacat-0.1.11.dist-info}/WHEEL +1 -1
- deltacat/autoscaler/events/__init__.py +0 -0
- deltacat/autoscaler/events/compaction/__init__.py +0 -0
- deltacat/autoscaler/events/compaction/cluster.py +0 -82
- deltacat/autoscaler/events/compaction/collections/__init__.py +0 -0
- deltacat/autoscaler/events/compaction/collections/partition_key_value.py +0 -36
- deltacat/autoscaler/events/compaction/dispatcher.py +0 -28
- deltacat/autoscaler/events/compaction/input.py +0 -27
- deltacat/autoscaler/events/compaction/process.py +0 -25
- deltacat/autoscaler/events/compaction/session_manager.py +0 -13
- deltacat/autoscaler/events/compaction/utils.py +0 -216
- deltacat/autoscaler/events/compaction/workflow.py +0 -303
- deltacat/autoscaler/events/dispatcher.py +0 -95
- deltacat/autoscaler/events/dynamodb/__init__.py +0 -0
- deltacat/autoscaler/events/dynamodb/event_store.py +0 -164
- deltacat/autoscaler/events/event_store.py +0 -55
- deltacat/autoscaler/events/exceptions.py +0 -6
- deltacat/autoscaler/events/processor.py +0 -177
- deltacat/autoscaler/events/session_manager.py +0 -25
- deltacat/autoscaler/events/states.py +0 -88
- deltacat/autoscaler/events/workflow.py +0 -54
- deltacat/autoscaler/node_group.py +0 -230
- deltacat/autoscaler/utils.py +0 -69
- deltacat-0.1.8.dist-info/RECORD +0 -131
- /deltacat/{autoscaler → tests/utils}/__init__.py +0 -0
- {deltacat-0.1.8.dist-info → deltacat-0.1.11.dist-info}/LICENSE +0 -0
- {deltacat-0.1.8.dist-info → deltacat-0.1.11.dist-info}/top_level.txt +0 -0
@@ -1,164 +0,0 @@
|
|
1
|
-
from typing import List, Dict, Any, Optional
|
2
|
-
|
3
|
-
import boto3
|
4
|
-
from botocore.client import BaseClient
|
5
|
-
from deltacat.autoscaler.events.compaction.workflow import COMPACTION_SESSION_PARTITIONS_COMPACTED, \
|
6
|
-
COMPACTION_SESSION_PARTITIONS_FAILURE
|
7
|
-
|
8
|
-
from deltacat.autoscaler.events.event_store import EventStoreClient
|
9
|
-
|
10
|
-
|
11
|
-
class DynamoDBEventStoreClient(EventStoreClient):
|
12
|
-
def __init__(self,
|
13
|
-
table_name: str,
|
14
|
-
dynamodb_client: BaseClient = None):
|
15
|
-
if dynamodb_client is None:
|
16
|
-
dynamodb_client = boto3.client("dynamodb", "us-east-1")
|
17
|
-
|
18
|
-
self.dynamodb_client = dynamodb_client
|
19
|
-
self.table_name = table_name
|
20
|
-
|
21
|
-
def query_events(self,
|
22
|
-
trace_id: str) -> List[Optional[Dict[str, Any]]]:
|
23
|
-
"""Query events by Trace ID
|
24
|
-
|
25
|
-
Args:
|
26
|
-
trace_id: Trace ID for the job
|
27
|
-
|
28
|
-
Returns: list of events that are active
|
29
|
-
|
30
|
-
"""
|
31
|
-
return self.get_events(self._query_events(trace_id))
|
32
|
-
|
33
|
-
def query_active_events(self,
|
34
|
-
trace_id: str) -> List[Optional[Dict[str, Any]]]:
|
35
|
-
"""Query active events by Trace ID
|
36
|
-
|
37
|
-
Args:
|
38
|
-
trace_id: Trace ID for the job
|
39
|
-
|
40
|
-
Returns: list of events that are active
|
41
|
-
|
42
|
-
"""
|
43
|
-
return self.get_active_events(self._query_events(trace_id))
|
44
|
-
|
45
|
-
def query_active_events_by_destination_job_table(self,
|
46
|
-
destination_job_table: str) -> List[Optional[Dict[str, Any]]]:
|
47
|
-
"""Query active events from the job destination table index
|
48
|
-
|
49
|
-
Args:
|
50
|
-
destination_job_table: Destination table for jobs
|
51
|
-
|
52
|
-
Returns: list of active events for the particular job
|
53
|
-
|
54
|
-
"""
|
55
|
-
result = self.dynamodb_client.query(
|
56
|
-
TableName=self.table_name,
|
57
|
-
IndexName="destinationTable.timestamp",
|
58
|
-
ScanIndexForward=False, # descending order traversal
|
59
|
-
KeyConditions={
|
60
|
-
"destinationTable": {
|
61
|
-
"AttributeValueList": [
|
62
|
-
{
|
63
|
-
"S": destination_job_table
|
64
|
-
},
|
65
|
-
],
|
66
|
-
"ComparisonOperator": "EQ"
|
67
|
-
},
|
68
|
-
},
|
69
|
-
)
|
70
|
-
return self.get_active_events(result)
|
71
|
-
|
72
|
-
def query_active_events_by_event_name(self,
|
73
|
-
event_name: str) -> List[Optional[Dict[str, Any]]]:
|
74
|
-
"""Query active events from the event name index
|
75
|
-
|
76
|
-
Args:
|
77
|
-
event_name: Name of the job event state
|
78
|
-
|
79
|
-
Returns: list of active events for the particular event name
|
80
|
-
|
81
|
-
"""
|
82
|
-
result = self.dynamodb_client.query(
|
83
|
-
TableName=self.table_name,
|
84
|
-
IndexName="eventName.timestamp",
|
85
|
-
KeyConditions={
|
86
|
-
"eventName": {
|
87
|
-
"AttributeValueList": [
|
88
|
-
{
|
89
|
-
"S": event_name
|
90
|
-
},
|
91
|
-
],
|
92
|
-
"ComparisonOperator": "EQ"
|
93
|
-
},
|
94
|
-
}
|
95
|
-
)
|
96
|
-
return self.get_active_events(result)
|
97
|
-
|
98
|
-
def get_compacted_partition_ids(self, trace_id: str) -> List[str]:
|
99
|
-
items = self._get_completed_partition_events(trace_id)
|
100
|
-
partition_id_list = [partition_id for event in items
|
101
|
-
if "stateDetailMetadata" in event
|
102
|
-
for partition_id in event["stateDetailMetadata"]["M"].keys()]
|
103
|
-
return partition_id_list
|
104
|
-
|
105
|
-
def get_failed_partition_ids(self, trace_id: str) -> List[str]:
|
106
|
-
items = self._get_failed_partition_events(trace_id)
|
107
|
-
partition_id_list = [partition_id for event in items
|
108
|
-
if "stateDetailMetadata" in event
|
109
|
-
for partition_id in event["stateDetailMetadata"]["M"].keys()]
|
110
|
-
return partition_id_list
|
111
|
-
|
112
|
-
@staticmethod
|
113
|
-
def get_events(query_result: Dict[str, Any]) -> List[Dict[str, Any]]:
|
114
|
-
"""Gets a filtered list of active job state events
|
115
|
-
|
116
|
-
Args:
|
117
|
-
query_result: list of job state events
|
118
|
-
|
119
|
-
Returns: a filtered list of active job state events
|
120
|
-
|
121
|
-
"""
|
122
|
-
return query_result["Items"]
|
123
|
-
|
124
|
-
@staticmethod
|
125
|
-
def get_active_events(query_result: Dict[str, Any]) -> List[Dict[str, Any]]:
|
126
|
-
"""Gets a filtered list of active job state events
|
127
|
-
|
128
|
-
Args:
|
129
|
-
query_result: list of job state events
|
130
|
-
|
131
|
-
Returns: a filtered list of active job state events
|
132
|
-
|
133
|
-
"""
|
134
|
-
return [item for item in query_result["Items"] if item.get("active")]
|
135
|
-
|
136
|
-
def _get_completed_partition_events(
|
137
|
-
self,
|
138
|
-
trace_id: str) -> List[Dict[str, Any]]:
|
139
|
-
return [item for item in self.query_active_events(trace_id)
|
140
|
-
if item["eventName"]["S"] == COMPACTION_SESSION_PARTITIONS_COMPACTED]
|
141
|
-
|
142
|
-
def _get_failed_partition_events(
|
143
|
-
self,
|
144
|
-
trace_id: str) -> List[Dict[str, Any]]:
|
145
|
-
return [item for item in self.query_active_events(trace_id)
|
146
|
-
if item["eventName"]["S"] == COMPACTION_SESSION_PARTITIONS_FAILURE]
|
147
|
-
|
148
|
-
def _query_events(self, trace_id: str):
|
149
|
-
return self.dynamodb_client.query(
|
150
|
-
TableName=self.table_name,
|
151
|
-
IndexName="traceId.timestamp",
|
152
|
-
ScanIndexForward=False, # descending order traversal
|
153
|
-
KeyConditions={
|
154
|
-
"traceId": {
|
155
|
-
"AttributeValueList": [
|
156
|
-
{
|
157
|
-
"S": trace_id
|
158
|
-
},
|
159
|
-
],
|
160
|
-
"ComparisonOperator": "EQ"
|
161
|
-
},
|
162
|
-
},
|
163
|
-
)
|
164
|
-
|
@@ -1,55 +0,0 @@
|
|
1
|
-
from typing import List, Dict, Any, Optional
|
2
|
-
|
3
|
-
|
4
|
-
class EventStoreClient:
|
5
|
-
def query_events(self,
|
6
|
-
trace_id: str) -> List[Optional[Dict[str, Any]]]:
|
7
|
-
"""Query active events by Trace ID
|
8
|
-
|
9
|
-
Args:
|
10
|
-
trace_id: Trace ID for the job
|
11
|
-
|
12
|
-
Returns: list of events that are active
|
13
|
-
|
14
|
-
"""
|
15
|
-
raise NotImplementedError("Method not implemented")
|
16
|
-
|
17
|
-
def query_active_events_by_destination_job_table(self,
|
18
|
-
destination_job_table: str) -> List[Optional[Dict[str, Any]]]:
|
19
|
-
"""Query active events from the job destination table
|
20
|
-
|
21
|
-
Args:
|
22
|
-
destination_job_table: Destination table for jobs
|
23
|
-
|
24
|
-
Returns: list of active events for the particular job
|
25
|
-
|
26
|
-
"""
|
27
|
-
raise NotImplementedError("Method not implemented")
|
28
|
-
|
29
|
-
def query_active_events_by_event_name(self,
|
30
|
-
event_name: str) -> List[Optional[Dict[str, Any]]]:
|
31
|
-
"""Query active events from the event name index
|
32
|
-
|
33
|
-
Args:
|
34
|
-
event_name: Name of the job event state
|
35
|
-
|
36
|
-
Returns: list of active events for the particular event name
|
37
|
-
|
38
|
-
"""
|
39
|
-
raise NotImplementedError("Method not implemented")
|
40
|
-
|
41
|
-
def get_compacted_partition_ids(self, trace_id: str) -> List[str]:
|
42
|
-
"""Retrieve all compacted partition IDs.
|
43
|
-
|
44
|
-
Returns: list of all compacted partition IDs
|
45
|
-
|
46
|
-
"""
|
47
|
-
raise NotImplementedError("Method not implemented")
|
48
|
-
|
49
|
-
def get_failed_partition_ids(self, trace_id: str) -> List[str]:
|
50
|
-
"""Retrieve all partition IDs that failed compaction.
|
51
|
-
|
52
|
-
Returns: list of all failed partition IDs
|
53
|
-
|
54
|
-
"""
|
55
|
-
raise NotImplementedError("Method not implemented")
|
@@ -1,177 +0,0 @@
|
|
1
|
-
import logging
|
2
|
-
|
3
|
-
import time
|
4
|
-
from typing import Dict, Callable, Union, Tuple, Optional, List, Any
|
5
|
-
|
6
|
-
from botocore.exceptions import BotoCoreError
|
7
|
-
from deltacat.autoscaler.events.EventWorkflow import EventWorkflow
|
8
|
-
from deltacat.autoscaler.events.compaction.workflow import CompactionWorkflow
|
9
|
-
from deltacat.autoscaler.events.states import States
|
10
|
-
from ray.autoscaler._private.event_system import EventPublisher
|
11
|
-
|
12
|
-
from deltacat import logs
|
13
|
-
from deltacat.autoscaler.events.event_store import EventStoreClient
|
14
|
-
from deltacat.autoscaler.events.exceptions import EventNotFoundException, WorkflowException
|
15
|
-
|
16
|
-
logging.basicConfig(level=logging.INFO)
|
17
|
-
logger = logs.configure_deltacat_logger(logging.getLogger(__name__))
|
18
|
-
|
19
|
-
QUERY_EVENTS_MAX_RETRY_COUNTER = 10
|
20
|
-
SLEEP_PERIOD_SECONDS = 20
|
21
|
-
|
22
|
-
|
23
|
-
# TODO: Make this the primary open-source Job Run Event Handler / Dispatcher?
|
24
|
-
# Might be worth porting over some features of the Job Event Daemon (Java) to here
|
25
|
-
|
26
|
-
class EventProcessor:
|
27
|
-
def __init__(self,
|
28
|
-
events_publisher: EventPublisher,
|
29
|
-
event_store: EventStoreClient,
|
30
|
-
workflow: CompactionWorkflow):
|
31
|
-
"""
|
32
|
-
|
33
|
-
Args:
|
34
|
-
events_publisher: Events manager for publishing events through a cloud provider
|
35
|
-
event_store: High-level API client for the Event Store database
|
36
|
-
workflow: Workflow of job states
|
37
|
-
"""
|
38
|
-
self.event_publisher = events_publisher
|
39
|
-
self.event_store = event_store
|
40
|
-
self.workflow = workflow
|
41
|
-
|
42
|
-
def run(self):
|
43
|
-
"""Polls the event store and handles state transitions based on incoming events.
|
44
|
-
|
45
|
-
This function will dispatch the STARTED event when first executed.
|
46
|
-
The event listener will only listen to event states from STARTED and onwards.
|
47
|
-
|
48
|
-
Event states before STARTED (i.e. NEW, DISPATCHED) are emitted from the Event Daemon (Java).
|
49
|
-
"""
|
50
|
-
logger.info(f"Starting workflow...!")
|
51
|
-
compaction_workflow.start_workflow()
|
52
|
-
|
53
|
-
trace_id = self.event_publisher.event_base_params["traceId"]
|
54
|
-
dest_provider = self.event_publisher.event_base_params["destinationTable"]["owner"]
|
55
|
-
dest_table = self.event_publisher.event_base_params["destinationTable"]["name"]
|
56
|
-
expiry_timestamp = self.event_publisher.event_base_params["expirationTimestamp"]
|
57
|
-
|
58
|
-
retry_ctr = 0
|
59
|
-
while round(time.time() * 1000) < expiry_timestamp and retry_ctr < QUERY_EVENTS_MAX_RETRY_COUNTER:
|
60
|
-
logger.debug(f"Polling latest job states for trace_id: {trace_id}, "
|
61
|
-
f"provider: {dest_provider} and table: {dest_table}...")
|
62
|
-
|
63
|
-
try:
|
64
|
-
events = self.event_store.query_events(trace_id)
|
65
|
-
|
66
|
-
# Latest non-active / active event must be checked for the completed state.
|
67
|
-
latest_state, latest_state_sequence = self.get_latest_event(events, trace_id)
|
68
|
-
if latest_state == States.COMPLETED.name:
|
69
|
-
logger.info("Completed Ray job! Exiting.")
|
70
|
-
break
|
71
|
-
|
72
|
-
# Latest active event must be checked for the next state transition
|
73
|
-
latest_active_state, latest_active_state_sequence = self.get_latest_active_event(events, trace_id)
|
74
|
-
|
75
|
-
# Uncomment for testing on non-active events to test specific steps of workflows
|
76
|
-
# latest_active_state, latest_active_state_sequence = get_latest_event(events, trace_id)
|
77
|
-
|
78
|
-
self.workflow.to_next_state(latest_active_state, latest_active_state_sequence)
|
79
|
-
|
80
|
-
except WorkflowException as e:
|
81
|
-
self.workflow.workflow_failure(error_message=str(e))
|
82
|
-
except EventNotFoundException as e:
|
83
|
-
logger.debug(e)
|
84
|
-
except BotoCoreError as e:
|
85
|
-
logger.error(e)
|
86
|
-
retry_ctr += 1
|
87
|
-
|
88
|
-
time.sleep(SLEEP_PERIOD_SECONDS)
|
89
|
-
|
90
|
-
if retry_ctr == QUERY_EVENTS_MAX_RETRY_COUNTER:
|
91
|
-
# TODO: Dispatch timeout event for IN_PROGRESS
|
92
|
-
logger.error(f"Failed to fetch events for {trace_id} after "
|
93
|
-
f"{QUERY_EVENTS_MAX_RETRY_COUNTER} attempts")
|
94
|
-
|
95
|
-
def get_latest_event(self,
|
96
|
-
events: List[Dict[str, Any]],
|
97
|
-
trace_id: str) -> Tuple[Optional[str], int]:
|
98
|
-
"""
|
99
|
-
|
100
|
-
Args:
|
101
|
-
events: Job events which may be active or non-active
|
102
|
-
trace_id: Trace ID for a Ray Job
|
103
|
-
|
104
|
-
Returns: tuple of state name (str) and the state sequence (int)
|
105
|
-
|
106
|
-
"""
|
107
|
-
latest_event = self.get_latest_sorted_event(events)
|
108
|
-
if latest_event is None:
|
109
|
-
raise EventNotFoundException(f"No events found for Ray job: {trace_id}")
|
110
|
-
|
111
|
-
latest_state, latest_state_sequence = latest_event["state"]["S"], int(latest_event["stateSequence"]["N"])
|
112
|
-
return latest_state, latest_state_sequence
|
113
|
-
|
114
|
-
def get_latest_active_event(self,
|
115
|
-
events: List[Dict[str, Any]],
|
116
|
-
trace_id: str) -> Tuple[Optional[str], int]:
|
117
|
-
"""
|
118
|
-
|
119
|
-
Args:
|
120
|
-
events: Job events which may be active or non-active
|
121
|
-
trace_id: Trace ID for a Ray Job
|
122
|
-
|
123
|
-
Returns: tuple of state name (str) and the state sequence (int)
|
124
|
-
|
125
|
-
"""
|
126
|
-
active_events = [x for x in events if x.get("active")]
|
127
|
-
latest_event = self.get_latest_sorted_event(active_events)
|
128
|
-
if latest_event is None:
|
129
|
-
raise EventNotFoundException(f"No events found for Ray job: {trace_id}")
|
130
|
-
|
131
|
-
latest_state, latest_state_sequence = latest_event["state"]["S"], int(latest_event["stateSequence"]["N"])
|
132
|
-
return latest_state, latest_state_sequence
|
133
|
-
|
134
|
-
@staticmethod
|
135
|
-
def get_latest_sorted_event(items: List[Dict[str, Any]]) -> Optional[Dict[str, Any]]:
|
136
|
-
"""Get the latest event from the event store,
|
137
|
-
|
138
|
-
Sorted by order of higher precedence - State, State Sequence ID, and finally Timestamp
|
139
|
-
|
140
|
-
Args:
|
141
|
-
items: list of job events as DynamoDB items
|
142
|
-
|
143
|
-
Returns:
|
144
|
-
event: job event sorted by State, State Sequence ID, and finally Timestamp
|
145
|
-
"""
|
146
|
-
#
|
147
|
-
latest_events = sorted(items,
|
148
|
-
key=lambda x: (States[x["state"]["S"]].value,
|
149
|
-
int(x["stateSequence"]["N"]),
|
150
|
-
int(x["timestamp"]["N"])),
|
151
|
-
reverse=True)
|
152
|
-
return latest_events[0] if latest_events else None
|
153
|
-
|
154
|
-
@staticmethod
|
155
|
-
def to_next_state(event_state: str,
|
156
|
-
event_state_sequence: int,
|
157
|
-
state_transition_map: Dict[str, Union[Callable[[], None], Dict]]):
|
158
|
-
"""Reads the state_transition_map to execute the callback for the given event state and state sequence.
|
159
|
-
|
160
|
-
Args:
|
161
|
-
event_state: name of the job event state
|
162
|
-
event_state_sequence: ID of the job event state sequence
|
163
|
-
state_transition_map: A mapping of event states to callbacks or a dictionary of callbacks.
|
164
|
-
|
165
|
-
"""
|
166
|
-
transition_cb = state_transition_map.get(event_state)
|
167
|
-
|
168
|
-
if transition_cb is None:
|
169
|
-
return
|
170
|
-
|
171
|
-
if isinstance(transition_cb, dict):
|
172
|
-
transition_sequence_cb: Callable[[], None] = transition_cb.get(event_state_sequence)
|
173
|
-
if transition_sequence_cb and callable(transition_sequence_cb):
|
174
|
-
logger.info(f"Calling function for {event_state} and sequence {event_state_sequence}")
|
175
|
-
transition_sequence_cb(event_state, event_state_sequence)
|
176
|
-
elif callable(transition_cb):
|
177
|
-
transition_cb(event_state, event_state_sequence)
|
@@ -1,25 +0,0 @@
|
|
1
|
-
import uuid
|
2
|
-
from typing import Optional
|
3
|
-
|
4
|
-
|
5
|
-
PARENT_SESSION_ID_KEY = "rayParentSessionId"
|
6
|
-
SESSION_ID_KEY = "raySessionId"
|
7
|
-
|
8
|
-
|
9
|
-
class SessionManager:
|
10
|
-
def __init__(self,
|
11
|
-
session_id: Optional[str] = None):
|
12
|
-
"""Manages Ray sessions.
|
13
|
-
|
14
|
-
Args:
|
15
|
-
session_id: Overrideable Session ID for this Ray app instance.
|
16
|
-
If not provided, a Session ID is newly generated.
|
17
|
-
"""
|
18
|
-
if not session_id:
|
19
|
-
session_id = str(uuid.uuid4())
|
20
|
-
|
21
|
-
self._session_id = session_id
|
22
|
-
|
23
|
-
@property
|
24
|
-
def session_id(self):
|
25
|
-
return self._session_id
|
@@ -1,88 +0,0 @@
|
|
1
|
-
from enum import Enum, auto
|
2
|
-
|
3
|
-
from ray.autoscaler._private.event_system import EventSequence, CreateClusterEvent, StateEvent
|
4
|
-
|
5
|
-
|
6
|
-
class States(Enum):
|
7
|
-
UNKNOWN = None
|
8
|
-
NEW = 1
|
9
|
-
DISPATCHED = 2
|
10
|
-
STARTED = 3
|
11
|
-
IN_PROGRESS = 4
|
12
|
-
COMPLETED = 5
|
13
|
-
|
14
|
-
|
15
|
-
class RayJobRequestEvent(StateEvent):
|
16
|
-
"""Events for a new Ray job request.
|
17
|
-
"""
|
18
|
-
@property
|
19
|
-
def state(self) -> str:
|
20
|
-
return States.NEW.name
|
21
|
-
|
22
|
-
new_request_delivered = auto()
|
23
|
-
|
24
|
-
|
25
|
-
class ScriptStartedEvent(StateEvent):
|
26
|
-
"""Events to track for Ray scripts that are executed.
|
27
|
-
"""
|
28
|
-
@property
|
29
|
-
def state(self) -> str:
|
30
|
-
return States.STARTED.name
|
31
|
-
|
32
|
-
start_initializing = auto()
|
33
|
-
|
34
|
-
|
35
|
-
class ScriptInProgressEvent(StateEvent):
|
36
|
-
"""Events tracking Ray app execution progress.
|
37
|
-
"""
|
38
|
-
@property
|
39
|
-
def state(self) -> str:
|
40
|
-
return States.IN_PROGRESS.name
|
41
|
-
|
42
|
-
in_progress = auto()
|
43
|
-
|
44
|
-
|
45
|
-
class ScriptInProgressCustomEvent(EventSequence):
|
46
|
-
"""Custom, user-defined events to track during execution of Ray scripts.
|
47
|
-
"""
|
48
|
-
def __init__(self, event_name: str, state_sequence: int):
|
49
|
-
self.event_name = event_name
|
50
|
-
self.state_sequence = state_sequence
|
51
|
-
|
52
|
-
@property
|
53
|
-
def state(self) -> str:
|
54
|
-
return States.IN_PROGRESS.name
|
55
|
-
|
56
|
-
@property
|
57
|
-
def name(self) -> str:
|
58
|
-
return self.event_name
|
59
|
-
|
60
|
-
@property
|
61
|
-
def value(self) -> int:
|
62
|
-
# the state sequence number in 1-based indexing
|
63
|
-
return self.state_sequence + 1
|
64
|
-
|
65
|
-
|
66
|
-
class ScriptCompletedEvent(StateEvent):
|
67
|
-
"""Event marking the start of Ray app execution.
|
68
|
-
"""
|
69
|
-
@property
|
70
|
-
def state(self) -> str:
|
71
|
-
return States.COMPLETED.name
|
72
|
-
|
73
|
-
completed = auto()
|
74
|
-
|
75
|
-
|
76
|
-
class ScriptFailureEvent(StateEvent):
|
77
|
-
"""Event marking the failure of Ray app execution.
|
78
|
-
"""
|
79
|
-
@property
|
80
|
-
def state(self) -> str:
|
81
|
-
return States.IN_PROGRESS.name
|
82
|
-
|
83
|
-
failed = auto()
|
84
|
-
|
85
|
-
|
86
|
-
event_enums = [CreateClusterEvent, ScriptStartedEvent, ScriptInProgressEvent, ScriptCompletedEvent]
|
87
|
-
event_enum_values = [sequence for event in event_enums
|
88
|
-
for sequence in event.__members__.values()]
|
@@ -1,54 +0,0 @@
|
|
1
|
-
import logging
|
2
|
-
from abc import abstractmethod, ABC
|
3
|
-
from typing import Dict, Union, Callable
|
4
|
-
|
5
|
-
from deltacat import logs
|
6
|
-
|
7
|
-
logger = logs.configure_deltacat_logger(logging.getLogger(__name__))
|
8
|
-
StateTransitionCallback = Callable[[], None]
|
9
|
-
StateTransitionMap = Dict[str, Union[StateTransitionCallback, Dict]]
|
10
|
-
|
11
|
-
|
12
|
-
class EventWorkflow(ABC):
|
13
|
-
@property
|
14
|
-
@abstractmethod
|
15
|
-
def state_transition_map(self) -> StateTransitionMap:
|
16
|
-
raise NotImplementedError("Method not implemented")
|
17
|
-
|
18
|
-
@abstractmethod
|
19
|
-
def _build_state_transitions(self) -> StateTransitionMap:
|
20
|
-
"""Builds a mapping of event states to state transitioning callbacks, or
|
21
|
-
a dictionary of state transitioning callbacks.
|
22
|
-
|
23
|
-
If an event has state sequences, a dictionary of callbacks is provided
|
24
|
-
with sequences as keys and callback functions as values.
|
25
|
-
|
26
|
-
Returns: a map of event states to callbacks or a dictionary of callbacks
|
27
|
-
"""
|
28
|
-
raise NotImplementedError("Method not implemented")
|
29
|
-
|
30
|
-
def to_next_state(self,
|
31
|
-
event_state: str,
|
32
|
-
event_state_sequence: int):
|
33
|
-
"""Reads the state_transition_map to execute the callback for the given event state and state sequence.
|
34
|
-
|
35
|
-
Args:
|
36
|
-
event_state: name of the job event state
|
37
|
-
event_state_sequence: ID of the job event state sequence
|
38
|
-
|
39
|
-
"""
|
40
|
-
transition_cb: Union[StateTransitionCallback, Dict[int, StateTransitionCallback]] = \
|
41
|
-
self.state_transition_map.get(event_state)
|
42
|
-
|
43
|
-
if transition_cb is None:
|
44
|
-
logger.debug(f"No callback found for state: {event_state}, "
|
45
|
-
f"sequence ID: {event_state_sequence}")
|
46
|
-
return
|
47
|
-
|
48
|
-
if isinstance(transition_cb, dict):
|
49
|
-
transition_sequence_cb: Callable[[], None] = transition_cb.get(event_state_sequence)
|
50
|
-
if transition_sequence_cb and callable(transition_sequence_cb):
|
51
|
-
logger.info(f"Calling function for {event_state} and sequence {event_state_sequence}")
|
52
|
-
transition_sequence_cb()
|
53
|
-
elif callable(transition_cb):
|
54
|
-
transition_cb()
|