mccode-plumber 0.6.0__py3-none-any.whl → 0.7.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,102 @@
1
+ from datetime import datetime
2
+
3
+ from file_writer_control.CommandHandler import CommandHandler
4
+ from file_writer_control.JobStatus import JobState
5
+ from file_writer_control.WorkerFinder import WorkerFinder
6
+ from file_writer_control.WriteJob import WriteJob
7
+
8
+
9
+ class JobHandler:
10
+ """
11
+ A stand in for controlling and checking the state of a job running on a file-writer instance.
12
+ """
13
+
14
+ def __init__(self, worker_finder: WorkerFinder, job_id=""):
15
+ """
16
+ Constructor.
17
+ :param worker_finder: An instance of a class that inherits from WorkerFinder and implements the member function
18
+ try_start_job of that class.
19
+ :param job_id: (Optional) The job identifier of an existing job.
20
+ """
21
+ self.worker_finder = worker_finder
22
+ self._job_id = job_id
23
+
24
+ def start_job(self, job: WriteJob) -> CommandHandler:
25
+ """
26
+ Start a write job. This call is not blocking. It does not guarantee that the write job will actually be started.
27
+ :param job: The write to be started.
28
+ .. note:: Starting a new job will cause the current instance of this class to no longer being able to track or
29
+ control previous jobs.
30
+ :return: A CommandHandler instance that can be used to monitor the outcome of the attempt to start a write job.
31
+ """
32
+ self._job_id = job.job_id
33
+ return self.worker_finder.try_start_job(job)
34
+
35
+ def get_state(self) -> JobState:
36
+ """
37
+ Get the state of the job.
38
+ """
39
+ return self.worker_finder.get_job_state(self._job_id)
40
+
41
+ def is_done(self) -> bool:
42
+ """
43
+ :return: True if job was completed without errors. False otherwise.
44
+ .. note:: If the job was completed with errors, this call will return False.
45
+ """
46
+ current_job_state = self.worker_finder.get_job_state(self._job_id)
47
+ if current_job_state == JobState.ERROR:
48
+ raise RuntimeError(f'Job failed with error message "{self.get_message()}".')
49
+ if current_job_state == JobState.TIMEOUT:
50
+ raise RuntimeError("Timed out while trying to start write job.")
51
+ return current_job_state == JobState.DONE
52
+
53
+ def get_message(self) -> str:
54
+ """
55
+ Get a string describing the error that was encountered when running the job. (If there was an error.)
56
+ """
57
+ current_status = self.worker_finder.get_job_status(self._job_id)
58
+ if current_status is None:
59
+ return ""
60
+ return current_status.message
61
+
62
+ def set_stop_time(self, stop_time: datetime) -> CommandHandler:
63
+ """
64
+ Set a new stop time for the file-writing job. There is no guarantee that the stop time will actually be changed.
65
+ This call is not blocking. Calling this member function will have no effect on the stop-time before the write
66
+ job has started.
67
+
68
+ :param stop_time: The new stop time of the job.
69
+ :return: A CommandHandler instance that can be used to monitor the outcome of the attempt to set a new stop time.
70
+ """
71
+ current_status = self.worker_finder.get_job_status(self._job_id)
72
+ return self.worker_finder.try_send_stop_time(
73
+ current_status.service_id if current_status else None,
74
+ self._job_id,
75
+ stop_time,
76
+ )
77
+
78
+ def stop_now(self) -> CommandHandler:
79
+ """
80
+ See the documentation for abort_write_job().
81
+ """
82
+ return self.abort_write_job()
83
+
84
+ def abort_write_job(self) -> CommandHandler:
85
+ """
86
+ Tell the file-writing to abort writing. There is no guarantee that will actually happen though.
87
+ This call is not blocking. Calling this member function will have no effect if done before a write job has
88
+ actually started.
89
+
90
+ :return: A CommandHandler instance that can be used to monitor the outcome of the attempt to set a new stop time.
91
+ """
92
+ current_status = self.worker_finder.get_job_status(self._job_id)
93
+ return self.worker_finder.try_send_abort(
94
+ current_status.service_id if current_status else None, self._job_id
95
+ )
96
+
97
+ @property
98
+ def job_id(self) -> str:
99
+ """
100
+ The job identifier of the job that this instance of the JobHandler class represent.
101
+ """
102
+ return self._job_id
@@ -0,0 +1,147 @@
1
+ from datetime import datetime, timedelta
2
+ from enum import Enum, auto
3
+ from typing import Dict, Optional
4
+
5
+ DEFAULT_TIMEOUT = timedelta(seconds=15)
6
+
7
+
8
+ class JobState(Enum):
9
+ """
10
+ The state of a job.
11
+ """
12
+
13
+ NO_JOB = auto()
14
+ WAITING = auto()
15
+ WRITING = auto()
16
+ TIMEOUT = auto()
17
+ ERROR = auto()
18
+ DONE = auto()
19
+ UNAVAILABLE = auto()
20
+
21
+
22
+ class JobStatus:
23
+ """
24
+ Contains general information about the (execution) of a job.
25
+ """
26
+
27
+ def __init__(self, job_id: str, timeout: Optional[timedelta] = DEFAULT_TIMEOUT):
28
+ self._job_id = job_id
29
+ self._timeout = timeout
30
+ self._service_id = ""
31
+ self._file_name = ""
32
+ self._last_update = datetime.now()
33
+ self._state = JobState.WAITING
34
+ self._metadata: Optional[Dict] = None
35
+ self._message = ""
36
+
37
+ def update_status(self, new_status: "JobStatus") -> None:
38
+ """
39
+ Updates the status/state of a this instance of the JobStatus class, using another instance.
40
+ .. note:: The job identifier of this instance and the other must be identical.
41
+ :param new_status: The other instance of the JobStatus class.
42
+ """
43
+ if new_status.job_id != self.job_id:
44
+ raise RuntimeError(
45
+ f"Job id of status update is not correct ({self.job_id} vs {new_status.job_id})"
46
+ )
47
+ self._state = new_status.state
48
+ if new_status.message:
49
+ self._message = new_status.message
50
+ self._service_id = new_status.service_id
51
+ self._file_name = new_status.file_name
52
+ self._last_update = new_status.last_update
53
+ self._metadata = new_status.metadata
54
+
55
+ def check_if_outdated(self, current_time: datetime):
56
+ """
57
+ Given the current time, state and the time of the last update: Have we lost the connection?
58
+ :param current_time: The current time
59
+ """
60
+ if (
61
+ self.state != JobState.DONE
62
+ and self.state != JobState.ERROR
63
+ and self.state != JobState.TIMEOUT
64
+ and current_time - self.last_update > self._timeout
65
+ ):
66
+ self._state = JobState.TIMEOUT
67
+ self._last_update = current_time
68
+
69
+ @property
70
+ def job_id(self) -> str:
71
+ """
72
+ The (unique) job identifier.
73
+ """
74
+ return self._job_id
75
+
76
+ @property
77
+ def service_id(self) -> str:
78
+ """
79
+ The (unique) service identifier of the instance of the file-writer that executes the current job.
80
+ """
81
+ return self._service_id
82
+
83
+ @service_id.setter
84
+ def service_id(self, new_service_id: str) -> None:
85
+ if not self._service_id:
86
+ self._service_id = new_service_id
87
+ self._last_update = datetime.now()
88
+ elif self._service_id == new_service_id:
89
+ return
90
+ else:
91
+ raise RuntimeError(
92
+ f'Can not set service_id of job with id "{self._job_id}" to "{new_service_id}" as it has already been set to "{self._service_id}".'
93
+ )
94
+
95
+ @property
96
+ def last_update(self) -> datetime:
97
+ """
98
+ The local time stamp of the last update of the status of the job.
99
+ """
100
+ return self._last_update
101
+
102
+ @property
103
+ def state(self) -> JobState:
104
+ """
105
+ The current state of the job.
106
+ """
107
+ return self._state
108
+
109
+ @property
110
+ def file_name(self) -> str:
111
+ """
112
+ The file name of the job. None if the file name is not known.
113
+ """
114
+ if self._file_name == "":
115
+ return None
116
+ return self._file_name
117
+
118
+ @file_name.setter
119
+ def file_name(self, new_file_name: str) -> None:
120
+ self._file_name = new_file_name
121
+ self._last_update = datetime.now()
122
+
123
+ @state.setter
124
+ def state(self, new_state: JobState) -> None:
125
+ self._state = new_state
126
+ self._last_update = datetime.now()
127
+
128
+ @property
129
+ def message(self) -> str:
130
+ """
131
+ Status/state message of the job as received from the file-writer.
132
+ """
133
+ return self._message
134
+
135
+ @property
136
+ def metadata(self) -> Optional[Dict]:
137
+ return self._metadata
138
+
139
+ @metadata.setter
140
+ def metadata(self, metadata: Dict) -> None:
141
+ self._metadata = metadata
142
+
143
+ @message.setter
144
+ def message(self, new_message: str) -> None:
145
+ if new_message:
146
+ self._message = new_message
147
+ self._last_update = datetime.now()
@@ -0,0 +1,22 @@
1
+ import re
2
+
3
+
4
+ class KafkaTopicUrl:
5
+ """
6
+ Class for extracting address, port and topic name from a Kafka topic url.
7
+ """
8
+
9
+ test_regexp = re.compile(
10
+ r"^\s*(?:kafka://)?(?:(?P<host>[^/?#:]+)(?::(?P<port>\d+){1,5})?)/(?P<topic>[a-zA-Z0-9._-]+)\s*$"
11
+ )
12
+
13
+ def __init__(self, url: str):
14
+ result = re.match(KafkaTopicUrl.test_regexp, url)
15
+ if result is None:
16
+ raise RuntimeError("Unable to match kafka url.")
17
+ self.port = 9092 # Default Kafka broker port
18
+ if result.group("port") is not None:
19
+ self.port = int(result.group("port"))
20
+ self.host = result.group("host")
21
+ self.host_port = f"{self.host}:{self.port}"
22
+ self.topic = result.group("topic")
@@ -0,0 +1,58 @@
1
+ from json import loads
2
+ from typing import Optional
3
+
4
+ from streaming_data_types.action_response_answ import (
5
+ ActionOutcome,
6
+ ActionResponse,
7
+ ActionType,
8
+ )
9
+ from streaming_data_types.status_x5f2 import StatusMessage
10
+
11
+ from file_writer_control.CommandStatus import CommandState
12
+ from file_writer_control.JobStatus import JobState
13
+ from file_writer_control.WorkerStatus import WorkerState
14
+
15
+
16
+ def extract_worker_state_from_status(status: StatusMessage) -> WorkerState:
17
+ """
18
+ Determine the worker state (i.e. file-writer state) based on a file-writer status message.
19
+ :param status: A status update message from a file-writer.
20
+ :return: The extracted worker state.
21
+ """
22
+ json_struct = loads(status.status_json)
23
+ status_map = {"writing": WorkerState.WRITING, "idle": WorkerState.IDLE}
24
+ try:
25
+ status_string = json_struct["state"]
26
+ return status_map[status_string]
27
+ except KeyError:
28
+ return WorkerState.UNKNOWN
29
+
30
+
31
+ def extract_state_from_command_answer(answer: ActionResponse) -> CommandState:
32
+ """
33
+ Determine the command state from a action response message.
34
+ :param answer: The action (either "start a job" or "set top time") response from a file-writer.
35
+ :return: The extracted command state/response.
36
+ """
37
+ status_map = {
38
+ ActionOutcome.Failure: CommandState.ERROR,
39
+ ActionOutcome.Success: CommandState.SUCCESS,
40
+ }
41
+ try:
42
+ return status_map[answer.outcome]
43
+ except KeyError:
44
+ return CommandState.ERROR
45
+
46
+
47
+ def extract_job_state_from_answer(answer: ActionResponse) -> Optional[JobState]:
48
+ """
49
+ Determine the file writing job state from a action response message.
50
+ :param answer: The action (either "start a job" or "set top time") response from a file-writer.
51
+ :return: The extracted job state, None if job state can not be determined from this answer.
52
+ """
53
+ if answer.action == ActionType.StartJob:
54
+ if answer.outcome == ActionOutcome.Success:
55
+ return JobState.WRITING
56
+ else:
57
+ return JobState.ERROR
58
+ return None
@@ -0,0 +1,139 @@
1
+ import uuid
2
+ from datetime import datetime
3
+ from typing import Dict, List, Optional
4
+
5
+ from kafka import KafkaProducer
6
+ from kafka.errors import NoBrokersAvailable
7
+ from streaming_data_types.run_stop_6s4t import serialise_6s4t as serialise_stop
8
+
9
+ from file_writer_control.CommandChannel import CommandChannel
10
+ from file_writer_control.CommandHandler import CommandHandler
11
+ from file_writer_control.CommandStatus import CommandStatus
12
+ from file_writer_control.JobStatus import JobState, JobStatus
13
+ from file_writer_control.KafkaTopicUrl import KafkaTopicUrl
14
+ from file_writer_control.WorkerStatus import WorkerStatus
15
+ from file_writer_control.WriteJob import WriteJob
16
+
17
+
18
+ class WorkerFinderBase:
19
+ def __init__(
20
+ self,
21
+ command_topic: str,
22
+ command_channel: CommandChannel,
23
+ message_producer: KafkaProducer,
24
+ ):
25
+ """
26
+ Constructor.
27
+ """
28
+ self.command_channel = command_channel
29
+ self.command_topic = command_topic
30
+ self.message_producer = message_producer
31
+
32
+ def send_command(self, message: bytes):
33
+ """
34
+ Send a message (command) to the file-writer "command"-topic.
35
+ :param message: The command/message as binary data.
36
+ """
37
+ self.message_producer.send(self.command_topic, message)
38
+
39
+ def try_start_job(self, job: WriteJob) -> CommandHandler:
40
+ """
41
+ Attempts to start a file-writing job. This function is not blocking. No guarantees are given that the job will
42
+ be successfully started.
43
+ .. note:: This class must be implemented by the classes inheriting from this one.
44
+ :param job: The file-writing job to be started.
45
+ :return: A CommandHandler instance for (more) easily checking the outcome of attempting to start a write job.
46
+ """
47
+ raise NotImplementedError("Not implemented in base class.")
48
+
49
+ def try_send_stop_time(
50
+ self, service_id: Optional[str], job_id: str, stop_time: datetime
51
+ ) -> CommandHandler:
52
+ """
53
+ Sends a "set stop time" message to a file-writer running a job as identified by the parameters.
54
+ This function is not blocking. No guarantees are given that this command will be followed.
55
+ :param service_id: The (optional) service identifier of the file-writer to receive the command.
56
+ :param job_id: The job identifier of the currently running file-writer job.
57
+ :param stop_time: The new stop time.
58
+ :return: A CommandHandler instance for (more) easily checking the outcome of setting a new stop time.
59
+ """
60
+ command_id = str(uuid.uuid1())
61
+ message = serialise_stop(
62
+ job_id=job_id,
63
+ service_id=service_id,
64
+ command_id=command_id,
65
+ stop_time=stop_time,
66
+ )
67
+ self.command_channel.add_command_id(job_id=job_id, command_id=command_id)
68
+ self.send_command(message)
69
+ return CommandHandler(self.command_channel, command_id)
70
+
71
+ def try_send_stop_now(
72
+ self, service_id: Optional[str], job_id: str
73
+ ) -> CommandHandler:
74
+ """
75
+ See documentation for `try_send_abort()`.
76
+ """
77
+ return self.try_send_abort(service_id, job_id)
78
+
79
+ def try_send_abort(self, service_id: Optional[str], job_id: str) -> CommandHandler:
80
+ """
81
+ Sends a "abort" message to a file-writer running a job as identified by the parameters of this function.
82
+ This function is not blocking. No guarantees are given that this command will be followed.
83
+ :param service_id: The (optional) service identifier of the file-writer to receive the command.
84
+ :param job_id: The job identifier of the currently running file-writer job.
85
+ :return: A CommandHandler instance for (more) easily checking the outcome of the "abort" command.
86
+ """
87
+ return self.try_send_stop_time(service_id, job_id, 0)
88
+
89
+ def list_known_workers(self) -> List[WorkerStatus]:
90
+ """
91
+ :return: A list of the (known) status of the workers publishing status updates to the configured command topic.
92
+ """
93
+ return self.command_channel.list_workers()
94
+
95
+ def list_known_jobs(self) -> List[JobStatus]:
96
+ """
97
+ :return: A list of the (known) jobs and their status as published on the configured command topic.
98
+ """
99
+ return self.command_channel.list_jobs()
100
+
101
+ def list_known_commands(self) -> List[CommandStatus]:
102
+ """
103
+ :return: A list of the (known) commands and their outcomes as published on the configured command topic.
104
+ """
105
+ return self.command_channel.list_commands()
106
+
107
+ def get_job_state(self, job_id: str) -> JobState:
108
+ """
109
+ Get the state of a specific job.
110
+ :param job_id: The (unique) identifier of the job that we are trying to find the state of.
111
+ :return: The state of the job if known, JobState.UNAVAILABLE if job is not known.
112
+ """
113
+ current_job = self.command_channel.get_job(job_id)
114
+ if current_job is None:
115
+ return JobState.UNAVAILABLE
116
+ return current_job.state
117
+
118
+ def get_job_status(self, job_id: str) -> JobStatus:
119
+ """
120
+ Get the full (known) status of a specific job.
121
+ :param job_id: The (unique) identifier of the job that we are trying to find the status of.
122
+ :return: The status of the job if known. None if it is not.
123
+ """
124
+ return self.command_channel.get_job(job_id)
125
+
126
+
127
+ class WorkerFinder(WorkerFinderBase):
128
+ def __init__(self, command_topic_url: str, kafka_config: Dict[str, str] = {}):
129
+ temp_cmd_ch = CommandChannel(command_topic_url, kafka_config=kafka_config)
130
+ command_url = KafkaTopicUrl(command_topic_url)
131
+ try:
132
+ temp_producer = KafkaProducer(
133
+ bootstrap_servers=[command_url.host_port], **kafka_config
134
+ )
135
+ except NoBrokersAvailable as e:
136
+ raise NoBrokersAvailable(
137
+ f'Unable to find brokers (or connect to brokers) on address: "{command_url.host_port}"'
138
+ ) from e
139
+ super().__init__(command_url.topic, temp_cmd_ch, temp_producer)
@@ -0,0 +1,70 @@
1
+ from typing import Dict
2
+
3
+ from kafka import KafkaProducer
4
+ from kafka.errors import NoBrokersAvailable
5
+
6
+ from file_writer_control.CommandHandler import CommandHandler
7
+ from file_writer_control.CommandStatus import CommandState
8
+ from file_writer_control.KafkaTopicUrl import KafkaTopicUrl
9
+ from file_writer_control.WorkerFinder import WorkerFinder
10
+ from file_writer_control.WriteJob import WriteJob
11
+
12
+
13
+ class WorkerJobPool(WorkerFinder):
14
+ """
15
+ A child of WorkerFinder intended for use with "worker pool" style of starting a file-writing job.
16
+ """
17
+
18
+ def __init__(
19
+ self,
20
+ job_topic_url: str,
21
+ command_topic_url: str,
22
+ max_message_size: int = 1048576 * 200,
23
+ kafka_config: Dict[str, str] = {},
24
+ ):
25
+ """
26
+ :param job_topic_url: The Kafka topic that the available file-writers are listening to for write jobs.
27
+ :param command_topic_url: The Kafka topic that a file-writer uses to send status updates to and receive direct
28
+ commands from.
29
+ :param max_message_size: The maximum message (actually "request") size.
30
+ """
31
+ super().__init__(command_topic_url, kafka_config=kafka_config)
32
+ self._job_pool = KafkaTopicUrl(job_topic_url)
33
+ self._max_message_size = max_message_size
34
+ try:
35
+ self._pool_producer = KafkaProducer(
36
+ bootstrap_servers=[self._job_pool.host_port],
37
+ max_request_size=max_message_size,
38
+ buffer_memory=max_message_size,
39
+ **kafka_config,
40
+ )
41
+ except NoBrokersAvailable as e:
42
+ raise NoBrokersAvailable(
43
+ f'Unable to find brokers (or connect to brokers) on address: "{self._job_pool.host_port}"'
44
+ ) from e
45
+
46
+ def _send_pool_message(self, message: bytes):
47
+ """
48
+ Send a message to the Kafka topic that is configured as the job-pool topic.
49
+ .. note:: If the file-writer has been configured properly, it will only accept start-job messages to this topic.
50
+ :param message: The binary data of the message.
51
+ """
52
+ if len(message) >= self._max_message_size:
53
+ raise RuntimeError(
54
+ f"Unable to send Kafka message as message size is too large ({len(message)} vs"
55
+ f"{self._max_message_size} bytes). Increase max message size with the 'max_message_size'"
56
+ f"constructor argument."
57
+ )
58
+ self._pool_producer.send(self._job_pool.topic, message)
59
+
60
+ def try_start_job(self, job: WriteJob) -> CommandHandler:
61
+ """
62
+ See base class for documentation.
63
+ """
64
+ self.command_channel.add_job_id(job.job_id)
65
+ self.command_channel.add_command_id(job.job_id, job.job_id)
66
+ self.command_channel.get_command(
67
+ job.job_id
68
+ ).state = CommandState.WAITING_RESPONSE
69
+ self._send_pool_message(job.get_start_message())
70
+ return CommandHandler(self.command_channel, job.job_id)
@@ -0,0 +1,88 @@
1
+ from datetime import datetime, timedelta
2
+ from enum import Enum, auto
3
+ from typing import Optional
4
+
5
+ DEFAULT_TIMEOUT = timedelta(seconds=15)
6
+
7
+
8
+ class WorkerState(Enum):
9
+ """
10
+ The state of a worker (i.e. a file-writer instance).
11
+ """
12
+
13
+ IDLE = auto()
14
+ WRITING = auto()
15
+ UNKNOWN = auto()
16
+ UNAVAILABLE = auto()
17
+
18
+
19
+ class WorkerStatus(object):
20
+ """
21
+ Contains general status information about a worker.
22
+ """
23
+
24
+ def __init__(self, service_id: str, timeout: Optional[timedelta] = DEFAULT_TIMEOUT):
25
+ self._last_update = datetime.now()
26
+ self._service_id = service_id
27
+ self._timeout = timeout
28
+ self._state = WorkerState.UNAVAILABLE
29
+
30
+ def __eq__(self, other_status: "WorkerStatus") -> bool:
31
+ if not isinstance(other_status, WorkerStatus):
32
+ raise NotImplementedError
33
+ return (
34
+ self.service_id == other_status.service_id
35
+ and self.state == other_status.state
36
+ )
37
+
38
+ def update_status(self, new_status: "WorkerStatus"):
39
+ """
40
+ Updates the status/state of this instance of the WorkerStatus class using another instance.
41
+ .. note:: The service identifier of both this instance and the other one must be identical.
42
+ :param new_status: The other instance of the WorkerStatus class.
43
+ """
44
+ if new_status.service_id != self.service_id:
45
+ raise RuntimeError(
46
+ f"Service id of status update is not correct ({self.service_id} vs {new_status.service_id})"
47
+ )
48
+ self._state = new_status.state
49
+ self._last_update = new_status.last_update
50
+
51
+ def check_if_outdated(self, current_time: datetime):
52
+ """
53
+ Given the current time, state and the time of the last update: Have we lost the connection?
54
+ :param current_time: The current time
55
+ """
56
+ if (
57
+ self.state != WorkerState.UNAVAILABLE
58
+ and current_time - self.last_update > self._timeout
59
+ ):
60
+ self._state = WorkerState.UNAVAILABLE
61
+ self._last_update = current_time
62
+
63
+ @property
64
+ def state(self) -> WorkerState:
65
+ """
66
+ The current state of the worker.
67
+ """
68
+ return self._state
69
+
70
+ @property
71
+ def service_id(self) -> str:
72
+ """
73
+ The service identifier of the worker that this instance of the WorkerState class represent.
74
+ """
75
+ return self._service_id
76
+
77
+ @property
78
+ def last_update(self) -> datetime:
79
+ """
80
+ The local time stamp of the last update of the status of the file-writer instance that this instance of the
81
+ WorkerStatus class represents.
82
+ """
83
+ return self._last_update
84
+
85
+ @state.setter
86
+ def state(self, new_state: WorkerState):
87
+ self._last_update = datetime.now()
88
+ self._state = new_state