deltacat 0.1.8__py3-none-any.whl → 0.1.11__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (105) hide show
  1. deltacat/__init__.py +41 -15
  2. deltacat/aws/clients.py +12 -31
  3. deltacat/aws/constants.py +1 -1
  4. deltacat/aws/redshift/__init__.py +7 -2
  5. deltacat/aws/redshift/model/manifest.py +54 -50
  6. deltacat/aws/s3u.py +188 -218
  7. deltacat/catalog/delegate.py +151 -185
  8. deltacat/catalog/interface.py +78 -97
  9. deltacat/catalog/model/catalog.py +21 -21
  10. deltacat/catalog/model/table_definition.py +11 -9
  11. deltacat/compute/compactor/__init__.py +12 -16
  12. deltacat/compute/compactor/compaction_session.py +259 -316
  13. deltacat/compute/compactor/model/delta_annotated.py +60 -44
  14. deltacat/compute/compactor/model/delta_file_envelope.py +5 -6
  15. deltacat/compute/compactor/model/delta_file_locator.py +10 -8
  16. deltacat/compute/compactor/model/materialize_result.py +6 -7
  17. deltacat/compute/compactor/model/primary_key_index.py +38 -34
  18. deltacat/compute/compactor/model/pyarrow_write_result.py +3 -4
  19. deltacat/compute/compactor/model/round_completion_info.py +25 -19
  20. deltacat/compute/compactor/model/sort_key.py +18 -15
  21. deltacat/compute/compactor/steps/dedupe.py +152 -259
  22. deltacat/compute/compactor/steps/hash_bucket.py +57 -73
  23. deltacat/compute/compactor/steps/materialize.py +138 -99
  24. deltacat/compute/compactor/steps/rehash/rehash_bucket.py +13 -13
  25. deltacat/compute/compactor/steps/rehash/rewrite_index.py +11 -13
  26. deltacat/compute/compactor/utils/io.py +59 -47
  27. deltacat/compute/compactor/utils/primary_key_index.py +131 -90
  28. deltacat/compute/compactor/utils/round_completion_file.py +22 -23
  29. deltacat/compute/compactor/utils/system_columns.py +33 -42
  30. deltacat/compute/metastats/meta_stats.py +235 -157
  31. deltacat/compute/metastats/model/partition_stats_dict.py +7 -10
  32. deltacat/compute/metastats/model/stats_cluster_size_estimator.py +13 -5
  33. deltacat/compute/metastats/stats.py +95 -64
  34. deltacat/compute/metastats/utils/io.py +100 -53
  35. deltacat/compute/metastats/utils/pyarrow_memory_estimation_function.py +5 -2
  36. deltacat/compute/metastats/utils/ray_utils.py +38 -33
  37. deltacat/compute/stats/basic.py +107 -69
  38. deltacat/compute/stats/models/delta_column_stats.py +11 -8
  39. deltacat/compute/stats/models/delta_stats.py +59 -32
  40. deltacat/compute/stats/models/delta_stats_cache_result.py +4 -1
  41. deltacat/compute/stats/models/manifest_entry_stats.py +12 -6
  42. deltacat/compute/stats/models/stats_result.py +24 -14
  43. deltacat/compute/stats/utils/intervals.py +16 -9
  44. deltacat/compute/stats/utils/io.py +86 -51
  45. deltacat/compute/stats/utils/manifest_stats_file.py +24 -33
  46. deltacat/constants.py +8 -10
  47. deltacat/io/__init__.py +2 -2
  48. deltacat/io/aws/redshift/redshift_datasource.py +157 -143
  49. deltacat/io/dataset.py +14 -17
  50. deltacat/io/read_api.py +36 -33
  51. deltacat/logs.py +94 -42
  52. deltacat/storage/__init__.py +18 -8
  53. deltacat/storage/interface.py +196 -213
  54. deltacat/storage/model/delta.py +45 -51
  55. deltacat/storage/model/list_result.py +12 -8
  56. deltacat/storage/model/namespace.py +4 -5
  57. deltacat/storage/model/partition.py +42 -42
  58. deltacat/storage/model/stream.py +29 -30
  59. deltacat/storage/model/table.py +14 -14
  60. deltacat/storage/model/table_version.py +32 -31
  61. deltacat/storage/model/types.py +1 -0
  62. deltacat/tests/stats/test_intervals.py +11 -24
  63. deltacat/tests/utils/test_record_batch_tables.py +284 -0
  64. deltacat/types/media.py +3 -4
  65. deltacat/types/tables.py +31 -21
  66. deltacat/utils/common.py +5 -11
  67. deltacat/utils/numpy.py +20 -22
  68. deltacat/utils/pandas.py +73 -100
  69. deltacat/utils/performance.py +3 -9
  70. deltacat/utils/placement.py +276 -231
  71. deltacat/utils/pyarrow.py +302 -89
  72. deltacat/utils/ray_utils/collections.py +2 -1
  73. deltacat/utils/ray_utils/concurrency.py +38 -32
  74. deltacat/utils/ray_utils/dataset.py +28 -28
  75. deltacat/utils/ray_utils/performance.py +5 -9
  76. deltacat/utils/ray_utils/runtime.py +9 -10
  77. {deltacat-0.1.8.dist-info → deltacat-0.1.11.dist-info}/METADATA +22 -12
  78. deltacat-0.1.11.dist-info/RECORD +110 -0
  79. {deltacat-0.1.8.dist-info → deltacat-0.1.11.dist-info}/WHEEL +1 -1
  80. deltacat/autoscaler/events/__init__.py +0 -0
  81. deltacat/autoscaler/events/compaction/__init__.py +0 -0
  82. deltacat/autoscaler/events/compaction/cluster.py +0 -82
  83. deltacat/autoscaler/events/compaction/collections/__init__.py +0 -0
  84. deltacat/autoscaler/events/compaction/collections/partition_key_value.py +0 -36
  85. deltacat/autoscaler/events/compaction/dispatcher.py +0 -28
  86. deltacat/autoscaler/events/compaction/input.py +0 -27
  87. deltacat/autoscaler/events/compaction/process.py +0 -25
  88. deltacat/autoscaler/events/compaction/session_manager.py +0 -13
  89. deltacat/autoscaler/events/compaction/utils.py +0 -216
  90. deltacat/autoscaler/events/compaction/workflow.py +0 -303
  91. deltacat/autoscaler/events/dispatcher.py +0 -95
  92. deltacat/autoscaler/events/dynamodb/__init__.py +0 -0
  93. deltacat/autoscaler/events/dynamodb/event_store.py +0 -164
  94. deltacat/autoscaler/events/event_store.py +0 -55
  95. deltacat/autoscaler/events/exceptions.py +0 -6
  96. deltacat/autoscaler/events/processor.py +0 -177
  97. deltacat/autoscaler/events/session_manager.py +0 -25
  98. deltacat/autoscaler/events/states.py +0 -88
  99. deltacat/autoscaler/events/workflow.py +0 -54
  100. deltacat/autoscaler/node_group.py +0 -230
  101. deltacat/autoscaler/utils.py +0 -69
  102. deltacat-0.1.8.dist-info/RECORD +0 -131
  103. /deltacat/{autoscaler → tests/utils}/__init__.py +0 -0
  104. {deltacat-0.1.8.dist-info → deltacat-0.1.11.dist-info}/LICENSE +0 -0
  105. {deltacat-0.1.8.dist-info → deltacat-0.1.11.dist-info}/top_level.txt +0 -0
@@ -1,164 +0,0 @@
1
- from typing import List, Dict, Any, Optional
2
-
3
- import boto3
4
- from botocore.client import BaseClient
5
- from deltacat.autoscaler.events.compaction.workflow import COMPACTION_SESSION_PARTITIONS_COMPACTED, \
6
- COMPACTION_SESSION_PARTITIONS_FAILURE
7
-
8
- from deltacat.autoscaler.events.event_store import EventStoreClient
9
-
10
-
11
- class DynamoDBEventStoreClient(EventStoreClient):
12
- def __init__(self,
13
- table_name: str,
14
- dynamodb_client: BaseClient = None):
15
- if dynamodb_client is None:
16
- dynamodb_client = boto3.client("dynamodb", "us-east-1")
17
-
18
- self.dynamodb_client = dynamodb_client
19
- self.table_name = table_name
20
-
21
- def query_events(self,
22
- trace_id: str) -> List[Optional[Dict[str, Any]]]:
23
- """Query events by Trace ID
24
-
25
- Args:
26
- trace_id: Trace ID for the job
27
-
28
- Returns: list of events that are active
29
-
30
- """
31
- return self.get_events(self._query_events(trace_id))
32
-
33
- def query_active_events(self,
34
- trace_id: str) -> List[Optional[Dict[str, Any]]]:
35
- """Query active events by Trace ID
36
-
37
- Args:
38
- trace_id: Trace ID for the job
39
-
40
- Returns: list of events that are active
41
-
42
- """
43
- return self.get_active_events(self._query_events(trace_id))
44
-
45
- def query_active_events_by_destination_job_table(self,
46
- destination_job_table: str) -> List[Optional[Dict[str, Any]]]:
47
- """Query active events from the job destination table index
48
-
49
- Args:
50
- destination_job_table: Destination table for jobs
51
-
52
- Returns: list of active events for the particular job
53
-
54
- """
55
- result = self.dynamodb_client.query(
56
- TableName=self.table_name,
57
- IndexName="destinationTable.timestamp",
58
- ScanIndexForward=False, # descending order traversal
59
- KeyConditions={
60
- "destinationTable": {
61
- "AttributeValueList": [
62
- {
63
- "S": destination_job_table
64
- },
65
- ],
66
- "ComparisonOperator": "EQ"
67
- },
68
- },
69
- )
70
- return self.get_active_events(result)
71
-
72
- def query_active_events_by_event_name(self,
73
- event_name: str) -> List[Optional[Dict[str, Any]]]:
74
- """Query active events from the event name index
75
-
76
- Args:
77
- event_name: Name of the job event state
78
-
79
- Returns: list of active events for the particular event name
80
-
81
- """
82
- result = self.dynamodb_client.query(
83
- TableName=self.table_name,
84
- IndexName="eventName.timestamp",
85
- KeyConditions={
86
- "eventName": {
87
- "AttributeValueList": [
88
- {
89
- "S": event_name
90
- },
91
- ],
92
- "ComparisonOperator": "EQ"
93
- },
94
- }
95
- )
96
- return self.get_active_events(result)
97
-
98
- def get_compacted_partition_ids(self, trace_id: str) -> List[str]:
99
- items = self._get_completed_partition_events(trace_id)
100
- partition_id_list = [partition_id for event in items
101
- if "stateDetailMetadata" in event
102
- for partition_id in event["stateDetailMetadata"]["M"].keys()]
103
- return partition_id_list
104
-
105
- def get_failed_partition_ids(self, trace_id: str) -> List[str]:
106
- items = self._get_failed_partition_events(trace_id)
107
- partition_id_list = [partition_id for event in items
108
- if "stateDetailMetadata" in event
109
- for partition_id in event["stateDetailMetadata"]["M"].keys()]
110
- return partition_id_list
111
-
112
- @staticmethod
113
- def get_events(query_result: Dict[str, Any]) -> List[Dict[str, Any]]:
114
- """Gets a filtered list of active job state events
115
-
116
- Args:
117
- query_result: list of job state events
118
-
119
- Returns: a filtered list of active job state events
120
-
121
- """
122
- return query_result["Items"]
123
-
124
- @staticmethod
125
- def get_active_events(query_result: Dict[str, Any]) -> List[Dict[str, Any]]:
126
- """Gets a filtered list of active job state events
127
-
128
- Args:
129
- query_result: list of job state events
130
-
131
- Returns: a filtered list of active job state events
132
-
133
- """
134
- return [item for item in query_result["Items"] if item.get("active")]
135
-
136
- def _get_completed_partition_events(
137
- self,
138
- trace_id: str) -> List[Dict[str, Any]]:
139
- return [item for item in self.query_active_events(trace_id)
140
- if item["eventName"]["S"] == COMPACTION_SESSION_PARTITIONS_COMPACTED]
141
-
142
- def _get_failed_partition_events(
143
- self,
144
- trace_id: str) -> List[Dict[str, Any]]:
145
- return [item for item in self.query_active_events(trace_id)
146
- if item["eventName"]["S"] == COMPACTION_SESSION_PARTITIONS_FAILURE]
147
-
148
- def _query_events(self, trace_id: str):
149
- return self.dynamodb_client.query(
150
- TableName=self.table_name,
151
- IndexName="traceId.timestamp",
152
- ScanIndexForward=False, # descending order traversal
153
- KeyConditions={
154
- "traceId": {
155
- "AttributeValueList": [
156
- {
157
- "S": trace_id
158
- },
159
- ],
160
- "ComparisonOperator": "EQ"
161
- },
162
- },
163
- )
164
-
@@ -1,55 +0,0 @@
1
- from typing import List, Dict, Any, Optional
2
-
3
-
4
- class EventStoreClient:
5
- def query_events(self,
6
- trace_id: str) -> List[Optional[Dict[str, Any]]]:
7
- """Query active events by Trace ID
8
-
9
- Args:
10
- trace_id: Trace ID for the job
11
-
12
- Returns: list of events that are active
13
-
14
- """
15
- raise NotImplementedError("Method not implemented")
16
-
17
- def query_active_events_by_destination_job_table(self,
18
- destination_job_table: str) -> List[Optional[Dict[str, Any]]]:
19
- """Query active events from the job destination table
20
-
21
- Args:
22
- destination_job_table: Destination table for jobs
23
-
24
- Returns: list of active events for the particular job
25
-
26
- """
27
- raise NotImplementedError("Method not implemented")
28
-
29
- def query_active_events_by_event_name(self,
30
- event_name: str) -> List[Optional[Dict[str, Any]]]:
31
- """Query active events from the event name index
32
-
33
- Args:
34
- event_name: Name of the job event state
35
-
36
- Returns: list of active events for the particular event name
37
-
38
- """
39
- raise NotImplementedError("Method not implemented")
40
-
41
- def get_compacted_partition_ids(self, trace_id: str) -> List[str]:
42
- """Retrieve all compacted partition IDs.
43
-
44
- Returns: list of all compacted partition IDs
45
-
46
- """
47
- raise NotImplementedError("Method not implemented")
48
-
49
- def get_failed_partition_ids(self, trace_id: str) -> List[str]:
50
- """Retrieve all partition IDs that failed compaction.
51
-
52
- Returns: list of all failed partition IDs
53
-
54
- """
55
- raise NotImplementedError("Method not implemented")
@@ -1,6 +0,0 @@
1
- class EventNotFoundException(Exception):
2
- pass
3
-
4
-
5
- class WorkflowException(Exception):
6
- pass
@@ -1,177 +0,0 @@
1
- import logging
2
-
3
- import time
4
- from typing import Dict, Callable, Union, Tuple, Optional, List, Any
5
-
6
- from botocore.exceptions import BotoCoreError
7
- from deltacat.autoscaler.events.EventWorkflow import EventWorkflow
8
- from deltacat.autoscaler.events.compaction.workflow import CompactionWorkflow
9
- from deltacat.autoscaler.events.states import States
10
- from ray.autoscaler._private.event_system import EventPublisher
11
-
12
- from deltacat import logs
13
- from deltacat.autoscaler.events.event_store import EventStoreClient
14
- from deltacat.autoscaler.events.exceptions import EventNotFoundException, WorkflowException
15
-
16
- logging.basicConfig(level=logging.INFO)
17
- logger = logs.configure_deltacat_logger(logging.getLogger(__name__))
18
-
19
- QUERY_EVENTS_MAX_RETRY_COUNTER = 10
20
- SLEEP_PERIOD_SECONDS = 20
21
-
22
-
23
- # TODO: Make this the primary open-source Job Run Event Handler / Dispatcher?
24
- # Might be worth porting over some features of the Job Event Daemon (Java) to here
25
-
26
- class EventProcessor:
27
- def __init__(self,
28
- events_publisher: EventPublisher,
29
- event_store: EventStoreClient,
30
- workflow: CompactionWorkflow):
31
- """
32
-
33
- Args:
34
- events_publisher: Events manager for publishing events through a cloud provider
35
- event_store: High-level API client for the Event Store database
36
- workflow: Workflow of job states
37
- """
38
- self.event_publisher = events_publisher
39
- self.event_store = event_store
40
- self.workflow = workflow
41
-
42
- def run(self):
43
- """Polls the event store and handles state transitions based on incoming events.
44
-
45
- This function will dispatch the STARTED event when first executed.
46
- The event listener will only listen to event states from STARTED and onwards.
47
-
48
- Event states before STARTED (i.e. NEW, DISPATCHED) are emitted from the Event Daemon (Java).
49
- """
50
- logger.info(f"Starting workflow...!")
51
- compaction_workflow.start_workflow()
52
-
53
- trace_id = self.event_publisher.event_base_params["traceId"]
54
- dest_provider = self.event_publisher.event_base_params["destinationTable"]["owner"]
55
- dest_table = self.event_publisher.event_base_params["destinationTable"]["name"]
56
- expiry_timestamp = self.event_publisher.event_base_params["expirationTimestamp"]
57
-
58
- retry_ctr = 0
59
- while round(time.time() * 1000) < expiry_timestamp and retry_ctr < QUERY_EVENTS_MAX_RETRY_COUNTER:
60
- logger.debug(f"Polling latest job states for trace_id: {trace_id}, "
61
- f"provider: {dest_provider} and table: {dest_table}...")
62
-
63
- try:
64
- events = self.event_store.query_events(trace_id)
65
-
66
- # Latest non-active / active event must be checked for the completed state.
67
- latest_state, latest_state_sequence = self.get_latest_event(events, trace_id)
68
- if latest_state == States.COMPLETED.name:
69
- logger.info("Completed Ray job! Exiting.")
70
- break
71
-
72
- # Latest active event must be checked for the next state transition
73
- latest_active_state, latest_active_state_sequence = self.get_latest_active_event(events, trace_id)
74
-
75
- # Uncomment for testing on non-active events to test specific steps of workflows
76
- # latest_active_state, latest_active_state_sequence = get_latest_event(events, trace_id)
77
-
78
- self.workflow.to_next_state(latest_active_state, latest_active_state_sequence)
79
-
80
- except WorkflowException as e:
81
- self.workflow.workflow_failure(error_message=str(e))
82
- except EventNotFoundException as e:
83
- logger.debug(e)
84
- except BotoCoreError as e:
85
- logger.error(e)
86
- retry_ctr += 1
87
-
88
- time.sleep(SLEEP_PERIOD_SECONDS)
89
-
90
- if retry_ctr == QUERY_EVENTS_MAX_RETRY_COUNTER:
91
- # TODO: Dispatch timeout event for IN_PROGRESS
92
- logger.error(f"Failed to fetch events for {trace_id} after "
93
- f"{QUERY_EVENTS_MAX_RETRY_COUNTER} attempts")
94
-
95
- def get_latest_event(self,
96
- events: List[Dict[str, Any]],
97
- trace_id: str) -> Tuple[Optional[str], int]:
98
- """
99
-
100
- Args:
101
- events: Job events which may be active or non-active
102
- trace_id: Trace ID for a Ray Job
103
-
104
- Returns: tuple of state name (str) and the state sequence (int)
105
-
106
- """
107
- latest_event = self.get_latest_sorted_event(events)
108
- if latest_event is None:
109
- raise EventNotFoundException(f"No events found for Ray job: {trace_id}")
110
-
111
- latest_state, latest_state_sequence = latest_event["state"]["S"], int(latest_event["stateSequence"]["N"])
112
- return latest_state, latest_state_sequence
113
-
114
- def get_latest_active_event(self,
115
- events: List[Dict[str, Any]],
116
- trace_id: str) -> Tuple[Optional[str], int]:
117
- """
118
-
119
- Args:
120
- events: Job events which may be active or non-active
121
- trace_id: Trace ID for a Ray Job
122
-
123
- Returns: tuple of state name (str) and the state sequence (int)
124
-
125
- """
126
- active_events = [x for x in events if x.get("active")]
127
- latest_event = self.get_latest_sorted_event(active_events)
128
- if latest_event is None:
129
- raise EventNotFoundException(f"No events found for Ray job: {trace_id}")
130
-
131
- latest_state, latest_state_sequence = latest_event["state"]["S"], int(latest_event["stateSequence"]["N"])
132
- return latest_state, latest_state_sequence
133
-
134
- @staticmethod
135
- def get_latest_sorted_event(items: List[Dict[str, Any]]) -> Optional[Dict[str, Any]]:
136
- """Get the latest event from the event store,
137
-
138
- Sorted by order of higher precedence - State, State Sequence ID, and finally Timestamp
139
-
140
- Args:
141
- items: list of job events as DynamoDB items
142
-
143
- Returns:
144
- event: job event sorted by State, State Sequence ID, and finally Timestamp
145
- """
146
- #
147
- latest_events = sorted(items,
148
- key=lambda x: (States[x["state"]["S"]].value,
149
- int(x["stateSequence"]["N"]),
150
- int(x["timestamp"]["N"])),
151
- reverse=True)
152
- return latest_events[0] if latest_events else None
153
-
154
- @staticmethod
155
- def to_next_state(event_state: str,
156
- event_state_sequence: int,
157
- state_transition_map: Dict[str, Union[Callable[[], None], Dict]]):
158
- """Reads the state_transition_map to execute the callback for the given event state and state sequence.
159
-
160
- Args:
161
- event_state: name of the job event state
162
- event_state_sequence: ID of the job event state sequence
163
- state_transition_map: A mapping of event states to callbacks or a dictionary of callbacks.
164
-
165
- """
166
- transition_cb = state_transition_map.get(event_state)
167
-
168
- if transition_cb is None:
169
- return
170
-
171
- if isinstance(transition_cb, dict):
172
- transition_sequence_cb: Callable[[], None] = transition_cb.get(event_state_sequence)
173
- if transition_sequence_cb and callable(transition_sequence_cb):
174
- logger.info(f"Calling function for {event_state} and sequence {event_state_sequence}")
175
- transition_sequence_cb(event_state, event_state_sequence)
176
- elif callable(transition_cb):
177
- transition_cb(event_state, event_state_sequence)
@@ -1,25 +0,0 @@
1
- import uuid
2
- from typing import Optional
3
-
4
-
5
- PARENT_SESSION_ID_KEY = "rayParentSessionId"
6
- SESSION_ID_KEY = "raySessionId"
7
-
8
-
9
- class SessionManager:
10
- def __init__(self,
11
- session_id: Optional[str] = None):
12
- """Manages Ray sessions.
13
-
14
- Args:
15
- session_id: Overrideable Session ID for this Ray app instance.
16
- If not provided, a Session ID is newly generated.
17
- """
18
- if not session_id:
19
- session_id = str(uuid.uuid4())
20
-
21
- self._session_id = session_id
22
-
23
- @property
24
- def session_id(self):
25
- return self._session_id
@@ -1,88 +0,0 @@
1
- from enum import Enum, auto
2
-
3
- from ray.autoscaler._private.event_system import EventSequence, CreateClusterEvent, StateEvent
4
-
5
-
6
- class States(Enum):
7
- UNKNOWN = None
8
- NEW = 1
9
- DISPATCHED = 2
10
- STARTED = 3
11
- IN_PROGRESS = 4
12
- COMPLETED = 5
13
-
14
-
15
- class RayJobRequestEvent(StateEvent):
16
- """Events for a new Ray job request.
17
- """
18
- @property
19
- def state(self) -> str:
20
- return States.NEW.name
21
-
22
- new_request_delivered = auto()
23
-
24
-
25
- class ScriptStartedEvent(StateEvent):
26
- """Events to track for Ray scripts that are executed.
27
- """
28
- @property
29
- def state(self) -> str:
30
- return States.STARTED.name
31
-
32
- start_initializing = auto()
33
-
34
-
35
- class ScriptInProgressEvent(StateEvent):
36
- """Events tracking Ray app execution progress.
37
- """
38
- @property
39
- def state(self) -> str:
40
- return States.IN_PROGRESS.name
41
-
42
- in_progress = auto()
43
-
44
-
45
- class ScriptInProgressCustomEvent(EventSequence):
46
- """Custom, user-defined events to track during execution of Ray scripts.
47
- """
48
- def __init__(self, event_name: str, state_sequence: int):
49
- self.event_name = event_name
50
- self.state_sequence = state_sequence
51
-
52
- @property
53
- def state(self) -> str:
54
- return States.IN_PROGRESS.name
55
-
56
- @property
57
- def name(self) -> str:
58
- return self.event_name
59
-
60
- @property
61
- def value(self) -> int:
62
- # the state sequence number in 1-based indexing
63
- return self.state_sequence + 1
64
-
65
-
66
- class ScriptCompletedEvent(StateEvent):
67
- """Event marking the start of Ray app execution.
68
- """
69
- @property
70
- def state(self) -> str:
71
- return States.COMPLETED.name
72
-
73
- completed = auto()
74
-
75
-
76
- class ScriptFailureEvent(StateEvent):
77
- """Event marking the failure of Ray app execution.
78
- """
79
- @property
80
- def state(self) -> str:
81
- return States.IN_PROGRESS.name
82
-
83
- failed = auto()
84
-
85
-
86
- event_enums = [CreateClusterEvent, ScriptStartedEvent, ScriptInProgressEvent, ScriptCompletedEvent]
87
- event_enum_values = [sequence for event in event_enums
88
- for sequence in event.__members__.values()]
@@ -1,54 +0,0 @@
1
- import logging
2
- from abc import abstractmethod, ABC
3
- from typing import Dict, Union, Callable
4
-
5
- from deltacat import logs
6
-
7
- logger = logs.configure_deltacat_logger(logging.getLogger(__name__))
8
- StateTransitionCallback = Callable[[], None]
9
- StateTransitionMap = Dict[str, Union[StateTransitionCallback, Dict]]
10
-
11
-
12
- class EventWorkflow(ABC):
13
- @property
14
- @abstractmethod
15
- def state_transition_map(self) -> StateTransitionMap:
16
- raise NotImplementedError("Method not implemented")
17
-
18
- @abstractmethod
19
- def _build_state_transitions(self) -> StateTransitionMap:
20
- """Builds a mapping of event states to state transitioning callbacks, or
21
- a dictionary of state transitioning callbacks.
22
-
23
- If an event has state sequences, a dictionary of callbacks is provided
24
- with sequences as keys and callback functions as values.
25
-
26
- Returns: a map of event states to callbacks or a dictionary of callbacks
27
- """
28
- raise NotImplementedError("Method not implemented")
29
-
30
- def to_next_state(self,
31
- event_state: str,
32
- event_state_sequence: int):
33
- """Reads the state_transition_map to execute the callback for the given event state and state sequence.
34
-
35
- Args:
36
- event_state: name of the job event state
37
- event_state_sequence: ID of the job event state sequence
38
-
39
- """
40
- transition_cb: Union[StateTransitionCallback, Dict[int, StateTransitionCallback]] = \
41
- self.state_transition_map.get(event_state)
42
-
43
- if transition_cb is None:
44
- logger.debug(f"No callback found for state: {event_state}, "
45
- f"sequence ID: {event_state_sequence}")
46
- return
47
-
48
- if isinstance(transition_cb, dict):
49
- transition_sequence_cb: Callable[[], None] = transition_cb.get(event_state_sequence)
50
- if transition_sequence_cb and callable(transition_sequence_cb):
51
- logger.info(f"Calling function for {event_state} and sequence {event_state_sequence}")
52
- transition_sequence_cb()
53
- elif callable(transition_cb):
54
- transition_cb()