mccode-plumber 0.6.0__py3-none-any.whl → 0.7.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- mccode_plumber/file_writer_control/CommandChannel.py +236 -0
- mccode_plumber/file_writer_control/CommandHandler.py +58 -0
- mccode_plumber/file_writer_control/CommandStatus.py +151 -0
- mccode_plumber/file_writer_control/InThreadStatusTracker.py +228 -0
- mccode_plumber/file_writer_control/JobHandler.py +102 -0
- mccode_plumber/file_writer_control/JobStatus.py +147 -0
- mccode_plumber/file_writer_control/KafkaTopicUrl.py +22 -0
- mccode_plumber/file_writer_control/StateExtractor.py +58 -0
- mccode_plumber/file_writer_control/WorkerFinder.py +139 -0
- mccode_plumber/file_writer_control/WorkerJobPool.py +70 -0
- mccode_plumber/file_writer_control/WorkerStatus.py +88 -0
- mccode_plumber/file_writer_control/WriteJob.py +83 -0
- mccode_plumber/file_writer_control/__init__.py +13 -0
- mccode_plumber/writer.py +3 -3
- {mccode_plumber-0.6.0.dist-info → mccode_plumber-0.7.0.dist-info}/METADATA +3 -2
- mccode_plumber-0.7.0.dist-info/RECORD +27 -0
- mccode_plumber-0.6.0.dist-info/RECORD +0 -14
- {mccode_plumber-0.6.0.dist-info → mccode_plumber-0.7.0.dist-info}/WHEEL +0 -0
- {mccode_plumber-0.6.0.dist-info → mccode_plumber-0.7.0.dist-info}/entry_points.txt +0 -0
- {mccode_plumber-0.6.0.dist-info → mccode_plumber-0.7.0.dist-info}/top_level.txt +0 -0
|
@@ -0,0 +1,236 @@
|
|
|
1
|
+
import atexit
|
|
2
|
+
import threading
|
|
3
|
+
from datetime import datetime
|
|
4
|
+
from queue import Queue
|
|
5
|
+
from typing import Dict, List, Optional, Union
|
|
6
|
+
|
|
7
|
+
from kafka import KafkaConsumer
|
|
8
|
+
from kafka.errors import NoBrokersAvailable
|
|
9
|
+
|
|
10
|
+
from file_writer_control.CommandStatus import CommandState, CommandStatus
|
|
11
|
+
from file_writer_control.InThreadStatusTracker import (
|
|
12
|
+
DEAD_ENTITY_TIME_LIMIT,
|
|
13
|
+
InThreadStatusTracker,
|
|
14
|
+
)
|
|
15
|
+
from file_writer_control.JobStatus import JobStatus
|
|
16
|
+
from file_writer_control.KafkaTopicUrl import KafkaTopicUrl
|
|
17
|
+
from file_writer_control.WorkerStatus import WorkerStatus
|
|
18
|
+
|
|
19
|
+
|
|
20
|
+
def thread_function(
|
|
21
|
+
host_port: str,
|
|
22
|
+
topic: str,
|
|
23
|
+
in_queue: Queue,
|
|
24
|
+
out_queue: Queue,
|
|
25
|
+
kafka_config: Dict[str, str] = {},
|
|
26
|
+
):
|
|
27
|
+
"""
|
|
28
|
+
Background thread for consuming Kafka messages.
|
|
29
|
+
:param host_port: The host + port of the Kafka broker that we are using.
|
|
30
|
+
:param topic: The Kafka topic that we are listening to.
|
|
31
|
+
:param in_queue: A queue for sending "exit" messages to the thread.
|
|
32
|
+
.. note:: The queue will exit upon the reception of the string "exit" on this queue.
|
|
33
|
+
:param out_queue: The queue to which status updates are published.
|
|
34
|
+
"""
|
|
35
|
+
status_tracker = InThreadStatusTracker(out_queue)
|
|
36
|
+
while True:
|
|
37
|
+
try:
|
|
38
|
+
consumer = KafkaConsumer(
|
|
39
|
+
topic,
|
|
40
|
+
bootstrap_servers=host_port,
|
|
41
|
+
fetch_max_bytes=52428800 * 6,
|
|
42
|
+
max_partition_fetch_bytes=52428800 * 10,
|
|
43
|
+
consumer_timeout_ms=100,
|
|
44
|
+
**kafka_config
|
|
45
|
+
) # Roughly 300MB
|
|
46
|
+
break
|
|
47
|
+
except NoBrokersAvailable:
|
|
48
|
+
pass # Do not fail if the broker is not immediately available.
|
|
49
|
+
if not in_queue.empty():
|
|
50
|
+
new_msg = in_queue.get()
|
|
51
|
+
if new_msg == "exit":
|
|
52
|
+
return
|
|
53
|
+
while True:
|
|
54
|
+
for message in consumer:
|
|
55
|
+
status_tracker.process_message(message.value)
|
|
56
|
+
status_tracker.check_for_lost_connections()
|
|
57
|
+
if not in_queue.empty():
|
|
58
|
+
new_msg = in_queue.get()
|
|
59
|
+
if new_msg == "exit":
|
|
60
|
+
break
|
|
61
|
+
consumer.close(True)
|
|
62
|
+
|
|
63
|
+
|
|
64
|
+
class CommandChannel(object):
|
|
65
|
+
"""
|
|
66
|
+
A class that implements the functionality for receiving and interpreting messages that are published to the
|
|
67
|
+
Kafka command topic of a pool of file-writers.
|
|
68
|
+
.. note:: This class implements a thread that will continuously attempt to connect to a Kafka broker.
|
|
69
|
+
"""
|
|
70
|
+
|
|
71
|
+
def __init__(self, command_topic_url: str, kafka_config: Dict[str, str] = {}):
|
|
72
|
+
"""
|
|
73
|
+
Constructor.
|
|
74
|
+
:param command_topic_url: The url of the Kafka topic to where the file-writer status/command messages are published.
|
|
75
|
+
"""
|
|
76
|
+
kafka_address = KafkaTopicUrl(command_topic_url)
|
|
77
|
+
self.status_queue = Queue()
|
|
78
|
+
self.to_thread_queue = Queue()
|
|
79
|
+
thread_kwargs = {
|
|
80
|
+
"host_port": kafka_address.host_port,
|
|
81
|
+
"topic": kafka_address.topic,
|
|
82
|
+
"in_queue": self.to_thread_queue,
|
|
83
|
+
"out_queue": self.status_queue,
|
|
84
|
+
"kafka_config": kafka_config,
|
|
85
|
+
}
|
|
86
|
+
self.map_of_workers: Dict[str, WorkerStatus] = {}
|
|
87
|
+
self.map_of_jobs: Dict[str, JobStatus] = {}
|
|
88
|
+
self.map_of_commands: Dict[str, CommandStatus] = {}
|
|
89
|
+
self.run_thread = True
|
|
90
|
+
self.thread = threading.Thread(
|
|
91
|
+
target=thread_function, daemon=True, kwargs=thread_kwargs
|
|
92
|
+
)
|
|
93
|
+
self.thread.start()
|
|
94
|
+
|
|
95
|
+
def do_exit():
|
|
96
|
+
self.stop_thread()
|
|
97
|
+
|
|
98
|
+
atexit.register(do_exit)
|
|
99
|
+
|
|
100
|
+
def add_job_id(self, job_id: str):
|
|
101
|
+
"""
|
|
102
|
+
Add a job identifier to the list of known jobs before it has been encountered on the command topic.
|
|
103
|
+
:param job_id: The identifier of the new job.
|
|
104
|
+
"""
|
|
105
|
+
if job_id not in self.map_of_jobs:
|
|
106
|
+
self.map_of_jobs[job_id] = JobStatus(job_id)
|
|
107
|
+
|
|
108
|
+
def add_command_id(self, job_id: str, command_id: str):
|
|
109
|
+
"""
|
|
110
|
+
Add a command identifier to the list of known commands before it has been encountered on the command topic.
|
|
111
|
+
:param job_id: The job identifier of the new command.
|
|
112
|
+
:param command_id: The identifier of the new command.
|
|
113
|
+
"""
|
|
114
|
+
if command_id not in self.map_of_commands:
|
|
115
|
+
self.map_of_commands[command_id] = CommandStatus(job_id, command_id)
|
|
116
|
+
self.map_of_commands[command_id].state = CommandState.WAITING_RESPONSE
|
|
117
|
+
|
|
118
|
+
def stop_thread(self):
|
|
119
|
+
"""
|
|
120
|
+
Stop the thread that is continuously getting command topic messages in the background. Should only be called if
|
|
121
|
+
we are about to get rid of the current instance of CommandChannel.
|
|
122
|
+
"""
|
|
123
|
+
self.to_thread_queue.put("exit")
|
|
124
|
+
try:
|
|
125
|
+
self.thread.join()
|
|
126
|
+
except RuntimeError:
|
|
127
|
+
pass # Do not throw an exception if the thread has not yet been started.
|
|
128
|
+
|
|
129
|
+
def __del__(self):
|
|
130
|
+
self.stop_thread()
|
|
131
|
+
|
|
132
|
+
def update_workers(self, current_time: Optional[datetime] = None):
|
|
133
|
+
"""
|
|
134
|
+
Update the list of known workers, jobs and commands. This is a non-blocking call but it might take some time
|
|
135
|
+
to execute if the queue of updates is long. This member function is called by many of the other member functions
|
|
136
|
+
in this class.
|
|
137
|
+
"""
|
|
138
|
+
if current_time is None:
|
|
139
|
+
current_time = datetime.now()
|
|
140
|
+
|
|
141
|
+
def handle_worker_status(status_update):
|
|
142
|
+
if status_update.service_id not in self.map_of_workers:
|
|
143
|
+
self.map_of_workers[status_update.service_id] = status_update
|
|
144
|
+
self.map_of_workers[status_update.service_id].update_status(status_update)
|
|
145
|
+
|
|
146
|
+
def handle_job_status(status_update):
|
|
147
|
+
if status_update.job_id not in self.map_of_jobs:
|
|
148
|
+
self.map_of_jobs[status_update.job_id] = status_update
|
|
149
|
+
self.map_of_jobs[status_update.job_id].update_status(status_update)
|
|
150
|
+
|
|
151
|
+
def handle_command_status(status_update):
|
|
152
|
+
if status_update.command_id not in self.map_of_commands:
|
|
153
|
+
self.map_of_commands[status_update.command_id] = status_update
|
|
154
|
+
self.map_of_commands[status_update.command_id].update_status(status_update)
|
|
155
|
+
|
|
156
|
+
status_updater_map = {
|
|
157
|
+
WorkerStatus: handle_worker_status,
|
|
158
|
+
CommandStatus: handle_command_status,
|
|
159
|
+
JobStatus: handle_job_status,
|
|
160
|
+
}
|
|
161
|
+
while not self.status_queue.empty():
|
|
162
|
+
status_update = self.status_queue.get()
|
|
163
|
+
status_updater_map[type(status_update)](status_update)
|
|
164
|
+
|
|
165
|
+
for entity in (
|
|
166
|
+
list(self.map_of_workers.values())
|
|
167
|
+
+ list(self.map_of_commands.values())
|
|
168
|
+
+ list(self.map_of_jobs.values())
|
|
169
|
+
):
|
|
170
|
+
entity.check_if_outdated(current_time)
|
|
171
|
+
|
|
172
|
+
def pruner(entities_dictionary):
|
|
173
|
+
for key in list(entities_dictionary.keys()):
|
|
174
|
+
if (
|
|
175
|
+
entities_dictionary[key].last_update + DEAD_ENTITY_TIME_LIMIT
|
|
176
|
+
< current_time
|
|
177
|
+
):
|
|
178
|
+
del entities_dictionary[key]
|
|
179
|
+
|
|
180
|
+
pruner(self.map_of_commands)
|
|
181
|
+
pruner(self.map_of_workers)
|
|
182
|
+
pruner(self.map_of_jobs)
|
|
183
|
+
|
|
184
|
+
def list_workers(self) -> List[WorkerStatus]:
|
|
185
|
+
"""
|
|
186
|
+
:return: A list of the (known) workers with state and status information.
|
|
187
|
+
"""
|
|
188
|
+
self.update_workers()
|
|
189
|
+
return list(self.map_of_workers.values())
|
|
190
|
+
|
|
191
|
+
def list_jobs(self) -> List[JobStatus]:
|
|
192
|
+
"""
|
|
193
|
+
:return: A list of the (known) jobs with state and status information.
|
|
194
|
+
"""
|
|
195
|
+
self.update_workers()
|
|
196
|
+
return list(self.map_of_jobs.values())
|
|
197
|
+
|
|
198
|
+
def list_commands(self) -> List[CommandStatus]:
|
|
199
|
+
"""
|
|
200
|
+
:return: A list of the (known) commands and their outcomes.
|
|
201
|
+
"""
|
|
202
|
+
self.update_workers()
|
|
203
|
+
return list(self.map_of_commands.values())
|
|
204
|
+
|
|
205
|
+
def get_job(self, job_id: str) -> Union[JobStatus, None]:
|
|
206
|
+
"""
|
|
207
|
+
Get the status of a single job.
|
|
208
|
+
:param job_id: The job identifier of the job we are interested in.
|
|
209
|
+
:return: The job status or None if the job is not known.
|
|
210
|
+
"""
|
|
211
|
+
self.update_workers()
|
|
212
|
+
if job_id in self.map_of_jobs:
|
|
213
|
+
return self.map_of_jobs[job_id]
|
|
214
|
+
return None
|
|
215
|
+
|
|
216
|
+
def get_worker(self, service_id: str) -> Union[WorkerStatus, None]:
|
|
217
|
+
"""
|
|
218
|
+
Get the status of a single worker.
|
|
219
|
+
:param service_id: The service identifier of the worker we are interested in.
|
|
220
|
+
:return: The worker status or None if the service id is not known.
|
|
221
|
+
"""
|
|
222
|
+
self.update_workers()
|
|
223
|
+
if service_id in self.map_of_workers:
|
|
224
|
+
return self.map_of_workers[service_id]
|
|
225
|
+
return None
|
|
226
|
+
|
|
227
|
+
def get_command(self, command_id: str) -> Union[CommandStatus, None]:
|
|
228
|
+
"""
|
|
229
|
+
Get the status of a single command.
|
|
230
|
+
:param command_id: The command identifier of the command we are interested in.
|
|
231
|
+
:return: The command status/outcome or None if the command is not known.
|
|
232
|
+
"""
|
|
233
|
+
self.update_workers()
|
|
234
|
+
if command_id in self.map_of_commands:
|
|
235
|
+
return self.map_of_commands[command_id]
|
|
236
|
+
return None
|
|
@@ -0,0 +1,58 @@
|
|
|
1
|
+
from datetime import timedelta
|
|
2
|
+
|
|
3
|
+
from file_writer_control.CommandChannel import CommandChannel
|
|
4
|
+
from file_writer_control.CommandStatus import CommandState
|
|
5
|
+
|
|
6
|
+
|
|
7
|
+
class CommandHandler:
|
|
8
|
+
"""
|
|
9
|
+
A stand in for (more easily) checking the state of a command sent to a file-writer.
|
|
10
|
+
"""
|
|
11
|
+
|
|
12
|
+
def __init__(self, command_channel: CommandChannel, command_id: str):
|
|
13
|
+
"""
|
|
14
|
+
Constructor.
|
|
15
|
+
:param command_channel: The instance of a CommandChannel that this class uses for getting the command status from.
|
|
16
|
+
:param command_id: The (unique) command identifier.
|
|
17
|
+
"""
|
|
18
|
+
self.command_id = command_id
|
|
19
|
+
self.command_channel = command_channel
|
|
20
|
+
|
|
21
|
+
def get_state(self) -> CommandState:
|
|
22
|
+
"""
|
|
23
|
+
Get the command state.
|
|
24
|
+
:return: The state of the command if possible. CommandState.UNKNOWN if the the state can not be determined.
|
|
25
|
+
"""
|
|
26
|
+
command = self.command_channel.get_command(self.command_id)
|
|
27
|
+
if command is None:
|
|
28
|
+
return CommandState.UNKNOWN
|
|
29
|
+
return command.state
|
|
30
|
+
|
|
31
|
+
def is_done(self) -> bool:
|
|
32
|
+
"""
|
|
33
|
+
:return: True if the command completed successfully. False otherwise.
|
|
34
|
+
"""
|
|
35
|
+
current_state = self.command_channel.get_command(self.command_id).state
|
|
36
|
+
if current_state == CommandState.ERROR:
|
|
37
|
+
raise RuntimeError(
|
|
38
|
+
f'Command failed with error message "{self.get_message()}".'
|
|
39
|
+
)
|
|
40
|
+
if current_state == CommandState.TIMEOUT_RESPONSE:
|
|
41
|
+
raise RuntimeError("Timed out while trying to send command.")
|
|
42
|
+
return current_state == CommandState.SUCCESS
|
|
43
|
+
|
|
44
|
+
def get_message(self) -> str:
|
|
45
|
+
"""
|
|
46
|
+
:return: If there was an error executing the command, this member function will return the error string as
|
|
47
|
+
sent by the file-writer. Will return an empty string otherwise.
|
|
48
|
+
"""
|
|
49
|
+
command = self.command_channel.get_command(self.command_id)
|
|
50
|
+
if command is None:
|
|
51
|
+
return ""
|
|
52
|
+
return command.message
|
|
53
|
+
|
|
54
|
+
def set_timeout(self, new_timeout: timedelta):
|
|
55
|
+
self.command_channel.get_command(self.command_id).timeout = new_timeout
|
|
56
|
+
|
|
57
|
+
def get_timeout(self):
|
|
58
|
+
return self.command_channel.get_command(self.command_id).timeout
|
|
@@ -0,0 +1,151 @@
|
|
|
1
|
+
from datetime import datetime, timedelta
|
|
2
|
+
from enum import Enum, auto
|
|
3
|
+
from typing import Optional
|
|
4
|
+
|
|
5
|
+
COMMAND_STATUS_TIMEOUT = timedelta(seconds=60)
|
|
6
|
+
|
|
7
|
+
|
|
8
|
+
class CommandState(Enum):
|
|
9
|
+
"""
|
|
10
|
+
The state of a command.
|
|
11
|
+
"""
|
|
12
|
+
|
|
13
|
+
UNKNOWN = auto()
|
|
14
|
+
NO_COMMAND = auto()
|
|
15
|
+
WAITING_RESPONSE = auto()
|
|
16
|
+
TIMEOUT_RESPONSE = auto()
|
|
17
|
+
ERROR = auto()
|
|
18
|
+
SUCCESS = auto()
|
|
19
|
+
|
|
20
|
+
|
|
21
|
+
class CommandStatus(object):
|
|
22
|
+
"""
|
|
23
|
+
The status of a command.
|
|
24
|
+
"""
|
|
25
|
+
|
|
26
|
+
def __init__(
|
|
27
|
+
self,
|
|
28
|
+
job_id: str,
|
|
29
|
+
command_id: str,
|
|
30
|
+
command_timeout: timedelta = COMMAND_STATUS_TIMEOUT,
|
|
31
|
+
):
|
|
32
|
+
self._command_timeout = command_timeout
|
|
33
|
+
self._job_id = job_id
|
|
34
|
+
self._command_id = command_id
|
|
35
|
+
self._last_update = datetime.now()
|
|
36
|
+
self._state = CommandState.NO_COMMAND
|
|
37
|
+
self._message = ""
|
|
38
|
+
self._response_code = None
|
|
39
|
+
|
|
40
|
+
def __eq__(self, other_status: "CommandStatus"):
|
|
41
|
+
if not isinstance(other_status, CommandStatus):
|
|
42
|
+
raise NotImplementedError
|
|
43
|
+
return (
|
|
44
|
+
other_status.command_id == self.command_id
|
|
45
|
+
and other_status.job_id == self.job_id
|
|
46
|
+
and other_status.state == self.state
|
|
47
|
+
and other_status.response_code == self.response_code
|
|
48
|
+
)
|
|
49
|
+
|
|
50
|
+
def update_status(self, new_status: "CommandStatus"):
|
|
51
|
+
"""
|
|
52
|
+
Updates the status/state of this instance of the CommandStatus class using another instance.
|
|
53
|
+
.. note:: The command identifier of both this instance and the other one must be identical.
|
|
54
|
+
:param new_status: The other instance of the CommandStatus class.
|
|
55
|
+
"""
|
|
56
|
+
if new_status.command_id != self.command_id:
|
|
57
|
+
raise RuntimeError(
|
|
58
|
+
f"Command id of status update is not correct ({self.command_id} vs {new_status.command_id})"
|
|
59
|
+
)
|
|
60
|
+
self._state = new_status.state
|
|
61
|
+
self._response_code = new_status.response_code
|
|
62
|
+
if new_status.message:
|
|
63
|
+
self._message = new_status.message
|
|
64
|
+
self._last_update = new_status.last_update
|
|
65
|
+
|
|
66
|
+
def check_if_outdated(self, current_time: datetime):
|
|
67
|
+
"""
|
|
68
|
+
Given the current time, state and the time of the last update: Have we lost the connection?
|
|
69
|
+
:param current_time: The current time
|
|
70
|
+
"""
|
|
71
|
+
if (
|
|
72
|
+
self.state != CommandState.SUCCESS
|
|
73
|
+
and self.state != CommandState.ERROR
|
|
74
|
+
and self.state != CommandState.TIMEOUT_RESPONSE
|
|
75
|
+
and current_time - self.last_update > self._command_timeout
|
|
76
|
+
):
|
|
77
|
+
self._state = CommandState.TIMEOUT_RESPONSE
|
|
78
|
+
self._last_update = current_time
|
|
79
|
+
|
|
80
|
+
@property
|
|
81
|
+
def response_code(self) -> Optional[int]:
|
|
82
|
+
"""
|
|
83
|
+
A code that mirrors the result of a command if a response has been received. Is set to None otherwise.
|
|
84
|
+
"""
|
|
85
|
+
return self._response_code
|
|
86
|
+
|
|
87
|
+
@response_code.setter
|
|
88
|
+
def response_code(self, new_code: int):
|
|
89
|
+
"""
|
|
90
|
+
Set the current response code.
|
|
91
|
+
"""
|
|
92
|
+
self._response_code = new_code
|
|
93
|
+
self._last_update = datetime.now()
|
|
94
|
+
|
|
95
|
+
@property
|
|
96
|
+
def job_id(self) -> str:
|
|
97
|
+
"""
|
|
98
|
+
The job identifier under which this command is executed.
|
|
99
|
+
"""
|
|
100
|
+
return self._job_id
|
|
101
|
+
|
|
102
|
+
@property
|
|
103
|
+
def command_id(self) -> str:
|
|
104
|
+
"""
|
|
105
|
+
The unique command identifier of this command.
|
|
106
|
+
"""
|
|
107
|
+
return self._command_id
|
|
108
|
+
|
|
109
|
+
@property
|
|
110
|
+
def message(self) -> str:
|
|
111
|
+
"""
|
|
112
|
+
A status/error message as returned by the file-writer that responded to the command.
|
|
113
|
+
:return:
|
|
114
|
+
"""
|
|
115
|
+
return self._message
|
|
116
|
+
|
|
117
|
+
@message.setter
|
|
118
|
+
def message(self, new_message: str):
|
|
119
|
+
if new_message:
|
|
120
|
+
self._message = new_message
|
|
121
|
+
self._last_update = datetime.now()
|
|
122
|
+
|
|
123
|
+
@property
|
|
124
|
+
def state(self) -> CommandState:
|
|
125
|
+
"""
|
|
126
|
+
The current state of the command.
|
|
127
|
+
"""
|
|
128
|
+
return self._state
|
|
129
|
+
|
|
130
|
+
@state.setter
|
|
131
|
+
def state(self, new_state: CommandState):
|
|
132
|
+
self._state = new_state
|
|
133
|
+
self._last_update = datetime.now()
|
|
134
|
+
|
|
135
|
+
@property
|
|
136
|
+
def last_update(self) -> datetime:
|
|
137
|
+
"""
|
|
138
|
+
The local time stamp of the last update of the status of the command.
|
|
139
|
+
"""
|
|
140
|
+
return self._last_update
|
|
141
|
+
|
|
142
|
+
@property
|
|
143
|
+
def timeout(self) -> timedelta:
|
|
144
|
+
"""
|
|
145
|
+
Timeout for waiting for response to command.
|
|
146
|
+
"""
|
|
147
|
+
return self._command_timeout
|
|
148
|
+
|
|
149
|
+
@timeout.setter
|
|
150
|
+
def timeout(self, new_timeout: timedelta):
|
|
151
|
+
self._command_timeout = new_timeout
|
|
@@ -0,0 +1,228 @@
|
|
|
1
|
+
import json
|
|
2
|
+
from datetime import datetime, timedelta
|
|
3
|
+
from queue import Queue
|
|
4
|
+
from typing import Dict
|
|
5
|
+
|
|
6
|
+
from streaming_data_types import deserialise_6s4t as deserialise_stop_time
|
|
7
|
+
from streaming_data_types import deserialise_answ as deserialise_answer
|
|
8
|
+
from streaming_data_types import deserialise_pl72 as deserialise_start
|
|
9
|
+
from streaming_data_types import deserialise_wrdn as deserialise_stopped
|
|
10
|
+
from streaming_data_types import deserialise_x5f2 as deserialise_status
|
|
11
|
+
from streaming_data_types.action_response_answ import FILE_IDENTIFIER as ANSW_IDENTIFIER
|
|
12
|
+
from streaming_data_types.action_response_answ import Response
|
|
13
|
+
from streaming_data_types.finished_writing_wrdn import (
|
|
14
|
+
FILE_IDENTIFIER as STOPPED_IDENTIFIER,
|
|
15
|
+
)
|
|
16
|
+
from streaming_data_types.finished_writing_wrdn import WritingFinished
|
|
17
|
+
from streaming_data_types.run_start_pl72 import FILE_IDENTIFIER as START_IDENTIFIER
|
|
18
|
+
from streaming_data_types.run_start_pl72 import RunStartInfo
|
|
19
|
+
from streaming_data_types.run_stop_6s4t import FILE_IDENTIFIER as STOP_TIME_IDENTIFIER
|
|
20
|
+
from streaming_data_types.run_stop_6s4t import RunStopInfo
|
|
21
|
+
from streaming_data_types.status_x5f2 import FILE_IDENTIFIER as STAT_IDENTIFIER
|
|
22
|
+
from streaming_data_types.status_x5f2 import StatusMessage
|
|
23
|
+
from streaming_data_types.utils import get_schema
|
|
24
|
+
|
|
25
|
+
from file_writer_control.CommandStatus import CommandState, CommandStatus
|
|
26
|
+
from file_writer_control.JobStatus import JobState, JobStatus
|
|
27
|
+
from file_writer_control.StateExtractor import (
|
|
28
|
+
extract_job_state_from_answer,
|
|
29
|
+
extract_state_from_command_answer,
|
|
30
|
+
extract_worker_state_from_status,
|
|
31
|
+
)
|
|
32
|
+
from file_writer_control.WorkerStatus import WorkerState, WorkerStatus
|
|
33
|
+
|
|
34
|
+
DEAD_ENTITY_TIME_LIMIT = timedelta(hours=1)
|
|
35
|
+
|
|
36
|
+
|
|
37
|
+
class InThreadStatusTracker:
|
|
38
|
+
"""
|
|
39
|
+
Implements de-coding of flatbuffer messages and sends updates of worker, job and command state/status back to the
|
|
40
|
+
"main"-thread if there has been changes.
|
|
41
|
+
"""
|
|
42
|
+
|
|
43
|
+
def __init__(self, status_queue: Queue):
|
|
44
|
+
"""
|
|
45
|
+
Constructor.
|
|
46
|
+
:param status_queue: The output queue to which state/status updates are pushed.
|
|
47
|
+
"""
|
|
48
|
+
self.queue = status_queue
|
|
49
|
+
self.known_workers: Dict[str, WorkerStatus] = {}
|
|
50
|
+
self.known_jobs: Dict[str, JobStatus] = {}
|
|
51
|
+
self.known_commands: Dict[str, CommandStatus] = {}
|
|
52
|
+
|
|
53
|
+
def process_message(self, message: bytes):
|
|
54
|
+
"""
|
|
55
|
+
Process a binary message.
|
|
56
|
+
:param message: The binary message to be processed.
|
|
57
|
+
"""
|
|
58
|
+
current_schema = get_schema(message).encode("utf-8")
|
|
59
|
+
update_time = datetime.now()
|
|
60
|
+
msg_process_map = {
|
|
61
|
+
ANSW_IDENTIFIER: lambda msg: self.process_answer(deserialise_answer(msg)),
|
|
62
|
+
STAT_IDENTIFIER: lambda msg: self.process_status(deserialise_status(msg)),
|
|
63
|
+
STOP_TIME_IDENTIFIER: lambda msg: self.process_set_stop_time(
|
|
64
|
+
deserialise_stop_time(msg)
|
|
65
|
+
),
|
|
66
|
+
START_IDENTIFIER: lambda msg: self.process_start(deserialise_start(msg)),
|
|
67
|
+
STOPPED_IDENTIFIER: lambda msg: self.process_stopped(
|
|
68
|
+
deserialise_stopped(msg)
|
|
69
|
+
),
|
|
70
|
+
}
|
|
71
|
+
if current_schema in msg_process_map:
|
|
72
|
+
msg_process_map[current_schema](message)
|
|
73
|
+
|
|
74
|
+
self.send_status_if_updated(update_time)
|
|
75
|
+
|
|
76
|
+
def send_status_if_updated(self, limit_time: datetime):
|
|
77
|
+
"""
|
|
78
|
+
Sends status updates of workers, jobs and commands (to the status queue) if there has been any updates on or
|
|
79
|
+
after the limit_time.
|
|
80
|
+
:param limit_time: The cut-off time for deciding which updates should be sent to the status queue.
|
|
81
|
+
"""
|
|
82
|
+
for entity in (
|
|
83
|
+
list(self.known_workers.values())
|
|
84
|
+
+ list(self.known_jobs.values())
|
|
85
|
+
+ list(self.known_commands.values())
|
|
86
|
+
):
|
|
87
|
+
if entity.last_update >= limit_time:
|
|
88
|
+
self.queue.put(entity)
|
|
89
|
+
|
|
90
|
+
def check_for_worker_presence(self, service_id: str):
|
|
91
|
+
"""
|
|
92
|
+
Check if a service_id is known and add it to a list of known ones if it is not.
|
|
93
|
+
:param service_id: The service identifier to look for.
|
|
94
|
+
"""
|
|
95
|
+
if service_id not in self.known_workers:
|
|
96
|
+
self.known_workers[service_id] = WorkerStatus(service_id)
|
|
97
|
+
|
|
98
|
+
def check_for_job_presence(self, job_id: str):
|
|
99
|
+
"""
|
|
100
|
+
Check if a job identifier is known and add it to a list of known ones if it is not.
|
|
101
|
+
:param job_id: The job identifier to look for.
|
|
102
|
+
"""
|
|
103
|
+
if job_id not in self.known_jobs:
|
|
104
|
+
new_job = JobStatus(job_id)
|
|
105
|
+
self.known_jobs[job_id] = new_job
|
|
106
|
+
|
|
107
|
+
def check_for_command_presence(self, job_id: str, command_id: str):
|
|
108
|
+
"""
|
|
109
|
+
Check if a command identifier is known and add it to a list of known ones if it is not.
|
|
110
|
+
:param job_id: The job identifier of the command that we are looking for. (Only used if we need to add the
|
|
111
|
+
command.)
|
|
112
|
+
:param command_id: The command identifier to look for.
|
|
113
|
+
"""
|
|
114
|
+
if command_id not in self.known_commands:
|
|
115
|
+
new_command = CommandStatus(job_id, command_id)
|
|
116
|
+
self.known_commands[command_id] = new_command
|
|
117
|
+
|
|
118
|
+
def check_for_lost_connections(self):
|
|
119
|
+
"""
|
|
120
|
+
Check workers, commands and jobs for the last update time and change the state of these if a timeout has been
|
|
121
|
+
reached.
|
|
122
|
+
"""
|
|
123
|
+
now = datetime.now()
|
|
124
|
+
for entity in (
|
|
125
|
+
list(self.known_workers.values())
|
|
126
|
+
+ list(self.known_jobs.values())
|
|
127
|
+
+ list(self.known_commands.values())
|
|
128
|
+
):
|
|
129
|
+
entity.check_if_outdated(now)
|
|
130
|
+
self.send_status_if_updated(now)
|
|
131
|
+
|
|
132
|
+
def prune_dead_entities(self, current_time: datetime):
|
|
133
|
+
"""
|
|
134
|
+
Will remove old jobs, workers and commands that have not been updated recently.
|
|
135
|
+
:return:
|
|
136
|
+
"""
|
|
137
|
+
|
|
138
|
+
def pruner(entities_dictionary):
|
|
139
|
+
for key in list(entities_dictionary.keys()):
|
|
140
|
+
if (
|
|
141
|
+
entities_dictionary[key].last_update + DEAD_ENTITY_TIME_LIMIT
|
|
142
|
+
< current_time
|
|
143
|
+
):
|
|
144
|
+
del entities_dictionary[key]
|
|
145
|
+
|
|
146
|
+
pruner(self.known_workers)
|
|
147
|
+
pruner(self.known_commands)
|
|
148
|
+
pruner(self.known_jobs)
|
|
149
|
+
|
|
150
|
+
def process_answer(self, answer: Response):
|
|
151
|
+
"""
|
|
152
|
+
Update workers, jobs and commands based on information in a response message.
|
|
153
|
+
:param answer: The response/answer message to use for status updates.
|
|
154
|
+
"""
|
|
155
|
+
self.check_for_worker_presence(answer.service_id)
|
|
156
|
+
self.check_for_job_presence(answer.job_id)
|
|
157
|
+
self.check_for_command_presence(answer.job_id, answer.command_id)
|
|
158
|
+
new_job_state = extract_job_state_from_answer(answer)
|
|
159
|
+
if new_job_state is not None:
|
|
160
|
+
self.known_jobs[answer.job_id].state = new_job_state
|
|
161
|
+
current_command = self.known_commands[answer.command_id]
|
|
162
|
+
current_command.state = extract_state_from_command_answer(answer)
|
|
163
|
+
current_command.message = answer.message
|
|
164
|
+
current_command.response_code = Response.status_code
|
|
165
|
+
self.known_jobs[answer.job_id].message = answer.message
|
|
166
|
+
try:
|
|
167
|
+
self.known_jobs[answer.job_id].service_id = answer.service_id
|
|
168
|
+
except RuntimeError:
|
|
169
|
+
pass
|
|
170
|
+
|
|
171
|
+
def process_status(self, status_update: StatusMessage):
|
|
172
|
+
"""
|
|
173
|
+
Update workers and jobs based on information in a status message.
|
|
174
|
+
:param status_update: The status message to use for updates.
|
|
175
|
+
"""
|
|
176
|
+
self.check_for_worker_presence(status_update.service_id)
|
|
177
|
+
current_state = extract_worker_state_from_status(status_update)
|
|
178
|
+
self.known_workers[status_update.service_id].state = current_state
|
|
179
|
+
if current_state == WorkerState.WRITING:
|
|
180
|
+
try:
|
|
181
|
+
json_data = json.loads(status_update.status_json)
|
|
182
|
+
job_id = json_data["job_id"]
|
|
183
|
+
except (KeyError, json.JSONDecodeError):
|
|
184
|
+
raise RuntimeError("Unable to extract JSON data from status message.")
|
|
185
|
+
file_name = json_data["file_being_written"]
|
|
186
|
+
self.check_for_job_presence(job_id)
|
|
187
|
+
self.known_jobs[job_id].state = JobState.WRITING
|
|
188
|
+
self.known_jobs[job_id].file_name = file_name
|
|
189
|
+
self.known_jobs[job_id].metadata = json_data
|
|
190
|
+
# For some jobs, we will only know the service-id when a worker starts working on a job.
|
|
191
|
+
# Thus we need the following statement to update the (known) service-id of a job.
|
|
192
|
+
try:
|
|
193
|
+
self.known_jobs[job_id].service_id = status_update.service_id
|
|
194
|
+
except RuntimeError:
|
|
195
|
+
pass # Expected error (i.e. the job is not known), do nothing
|
|
196
|
+
|
|
197
|
+
def process_set_stop_time(self, stop_time: RunStopInfo):
|
|
198
|
+
"""
|
|
199
|
+
Update commands and jobs based on information in a "set stop time" message.
|
|
200
|
+
:param stop_time: The "stop" message to use for updates.
|
|
201
|
+
"""
|
|
202
|
+
self.check_for_command_presence(stop_time.job_id, stop_time.command_id)
|
|
203
|
+
self.known_commands[stop_time.command_id].state = CommandState.WAITING_RESPONSE
|
|
204
|
+
|
|
205
|
+
def process_start(self, start: RunStartInfo):
|
|
206
|
+
"""
|
|
207
|
+
Update commands and jobs based on information in a "start" message.
|
|
208
|
+
:param start: The "start" message to use for updates.
|
|
209
|
+
"""
|
|
210
|
+
self.check_for_job_presence(start.job_id)
|
|
211
|
+
self.check_for_command_presence(start.job_id, start.job_id)
|
|
212
|
+
self.known_commands[start.job_id].state = CommandState.WAITING_RESPONSE
|
|
213
|
+
|
|
214
|
+
def process_stopped(self, stopped: WritingFinished):
|
|
215
|
+
"""
|
|
216
|
+
Update workers and jobs based on information in a "has stopped" message.
|
|
217
|
+
:param stopped: The "stopped" message to use for updates.
|
|
218
|
+
"""
|
|
219
|
+
self.check_for_job_presence(stopped.job_id)
|
|
220
|
+
self.check_for_worker_presence(stopped.service_id)
|
|
221
|
+
current_job = self.known_jobs[stopped.job_id]
|
|
222
|
+
if stopped.error_encountered:
|
|
223
|
+
current_job.state = JobState.ERROR
|
|
224
|
+
else:
|
|
225
|
+
current_job.state = JobState.DONE
|
|
226
|
+
current_job.metadata = json.loads(stopped.metadata)
|
|
227
|
+
current_job.message = stopped.message
|
|
228
|
+
self.known_workers[stopped.service_id].state = WorkerState.IDLE
|