cgse-common 2024.1.1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
egse/control.py ADDED
@@ -0,0 +1,429 @@
1
+ """
2
+ This module defines the abstract class for any control server and some convenience functions.
3
+ """
4
+ import abc
5
+ import logging
6
+ import pickle
7
+ import threading
8
+ from typing import Any
9
+
10
+ import zmq
11
+
12
+ from egse.system import time_in_ms
13
+
14
+ try:
15
+ from egse.logger import close_all_zmq_handlers
16
+ except ImportError:
17
+ def close_all_zmq_handlers(): # noqa
18
+ pass
19
+
20
+ from egse.process import ProcessStatus
21
+ from egse.settings import Settings
22
+ from egse.system import do_every
23
+ from egse.system import get_average_execution_time
24
+ from egse.system import get_average_execution_times
25
+ from egse.system import get_full_classname
26
+ from egse.system import get_host_ip
27
+ from egse.system import save_average_execution_time
28
+
29
+ MODULE_LOGGER = logging.getLogger(__name__)
30
+ PROCESS_SETTINGS = Settings.load("PROCESS")
31
+
32
+
33
+ def is_control_server_active(endpoint: str = None, timeout: float = 0.5) -> bool:
34
+ """
35
+ Check if the control server is running. This function sends a *Ping* message to the
36
+ control server and expects a *Pong* answer back within the timeout period.
37
+
38
+ Args:
39
+ endpoint (str): the endpoint to connect to, i.e. <protocol>://<address>:<port>
40
+ timeout (float): timeout when waiting for a reply [seconds, default=0.5]
41
+ Returns:
42
+ True if the Control Server is running and replied with the expected answer.
43
+ """
44
+ ctx = zmq.Context.instance()
45
+
46
+ return_code = False
47
+
48
+ try:
49
+ socket = ctx.socket(zmq.REQ)
50
+ socket.connect(endpoint)
51
+ data = pickle.dumps("Ping")
52
+ socket.send(data)
53
+ rlist, _, _ = zmq.select([socket], [], [], timeout=timeout)
54
+ if socket in rlist:
55
+ data = socket.recv()
56
+ response = pickle.loads(data)
57
+ return_code = response == "Pong"
58
+ socket.close(linger=0)
59
+ except Exception as exc:
60
+ MODULE_LOGGER.warning(f"Caught an exception while pinging a control server at {endpoint}: {exc}.")
61
+
62
+ return return_code
63
+
64
+
65
+ class Response:
66
+ """Base class for any reply or response between client-server communication.
67
+
68
+ The idea is that the response is encapsulated in one of the subclasses depending
69
+ on the type of response.
70
+ """
71
+
72
+ def __init__(self, message: str):
73
+ self.message = message
74
+
75
+ def __str__(self):
76
+ return self.message
77
+
78
+ @property
79
+ def successful(self):
80
+ """Returns True if the Response is not an Exception."""
81
+ return not isinstance(self, Exception)
82
+
83
+
84
+ class Failure(Response, Exception):
85
+ """A failure response indicating something went wrong at the other side.
86
+
87
+ This class is used to encapsulate an Exception that was caught and needs to be
88
+ passed to the client. So, the intended use is like this:
89
+ ```
90
+ try:
91
+ # perform some useful action that might raise an Exception
92
+ except SomeException as exc:
93
+ return Failure("Our action failed", exc)
94
+ ```
95
+ The client can inspect the Exception that was originally raised, in this case `SomeException`
96
+ with the `cause` variable.
97
+
98
+ Since a Failure is also an Exception, the property `successful` will return False.
99
+ So, the calling method can test for this easily.
100
+
101
+ ```
102
+ rc: Response = function_that_returns_a_response()
103
+
104
+ if not rc.successful:
105
+ # handle the failure
106
+ else:
107
+ # handle success
108
+ ```
109
+
110
+ """
111
+
112
+ def __init__(self, message: str, cause: Exception = None):
113
+ msg = f"{message}: {cause}" if cause is not None else message
114
+ super().__init__(msg)
115
+ self.cause = cause
116
+
117
+
118
+ class Success(Response):
119
+ """A success response for the client.
120
+
121
+ The return code from any action or function that needs to be returned to the
122
+ client shall be added.
123
+
124
+ Since `Success` doesn't inherit from `Exception`, the property `successful` will return True.
125
+ """
126
+
127
+ def __init__(self, message: str, return_code: Any = None):
128
+ msg = f"{message}: {return_code}" if return_code is not None else message
129
+ super().__init__(msg)
130
+ self.return_code = return_code
131
+
132
+
133
+ class Message(Response):
134
+ """A message response from the client.
135
+
136
+ Send a Message when there is no Failure, but also no return code. This is the alternative of
137
+ returning a None.
138
+
139
+ Message returns True for the property successful since it doesn't inherit from Exception.
140
+ """
141
+
142
+ pass
143
+
144
+
145
+ class ControlServer(metaclass=abc.ABCMeta):
146
+ """
147
+ The base class for all device control servers and for the Storage Manager and Configuration
148
+ Manager. A Control Server reads commands from a ZeroMQ socket and executes these commands by
149
+ calling the `execute()` method of the commanding protocol class.
150
+
151
+ The sub-class shall define the following:
152
+
153
+ * Define the device protocol class -> `self.device_protocol`
154
+ * Bind the command socket to the device protocol -> `self.dev_ctrl_cmd_sock`
155
+ * Register the command socket in the poll set -> `self.poller`
156
+
157
+ """
158
+
159
+ def __init__(self):
160
+ from egse.monitoring import MonitoringProtocol
161
+ from egse.services import ServiceProtocol
162
+
163
+ self._process_status = ProcessStatus()
164
+
165
+ self._timer_thread = threading.Thread(
166
+ target=do_every, args=(PROCESS_SETTINGS.METRICS_INTERVAL, self._process_status.update))
167
+ self._timer_thread.daemon = True
168
+ self._timer_thread.start()
169
+
170
+ # The logger will be overwritten by the sub-class, if not, then we use this logger
171
+ # with the name of the sub-class. That will help us to identify which sub-class did not
172
+ # overwrite the logger attribute.
173
+
174
+ self.logger = logging.getLogger(get_full_classname(self))
175
+
176
+ self.interrupted = False
177
+ self.delay = 1000 # delay between publish status information [milliseconds]
178
+ self.hk_delay = 1000 # delay between saving housekeeping information [milliseconds]
179
+
180
+ self.zcontext = zmq.Context.instance()
181
+ self.poller = zmq.Poller()
182
+
183
+ self.device_protocol = None # This will be set in the sub-class
184
+ self.service_protocol = ServiceProtocol(self)
185
+ self.monitoring_protocol = MonitoringProtocol(self)
186
+
187
+ # Setup the control server waiting for service requests
188
+
189
+ self.dev_ctrl_service_sock = self.zcontext.socket(zmq.REP)
190
+ self.service_protocol.bind(self.dev_ctrl_service_sock)
191
+
192
+ # Setup the control server for sending monitoring info
193
+
194
+ self.dev_ctrl_mon_sock = self.zcontext.socket(zmq.PUB)
195
+ self.monitoring_protocol.bind(self.dev_ctrl_mon_sock)
196
+
197
+ # Setup the control server waiting for device commands.
198
+ # The device protocol shall bind the socket in the sub-class
199
+
200
+ self.dev_ctrl_cmd_sock = self.zcontext.socket(zmq.REP)
201
+
202
+ # Initialize the poll set
203
+
204
+ self.poller.register(self.dev_ctrl_service_sock, zmq.POLLIN)
205
+ self.poller.register(self.dev_ctrl_mon_sock, zmq.POLLIN)
206
+
207
+ @abc.abstractmethod
208
+ def get_communication_protocol(self):
209
+ pass
210
+
211
+ @abc.abstractmethod
212
+ def get_commanding_port(self):
213
+ pass
214
+
215
+ @abc.abstractmethod
216
+ def get_service_port(self):
217
+ pass
218
+
219
+ @abc.abstractmethod
220
+ def get_monitoring_port(self):
221
+ pass
222
+
223
+ def get_ip_address(self):
224
+ return get_host_ip()
225
+
226
+ def get_storage_mnemonic(self):
227
+ return self.__class__.__name__
228
+
229
+ def get_process_status(self):
230
+ return self._process_status.as_dict()
231
+
232
+ def get_average_execution_times(self):
233
+ return get_average_execution_times()
234
+
235
+ def set_delay(self, seconds: float) -> float:
236
+ """
237
+ Sets the delay time for monitoring. The delay time is the time between two successive executions of the
238
+ `get_status()` function of the device protocol.
239
+
240
+ It might happen that the delay time that is set is longer than what you requested. That is the case when
241
+ the execution of the `get_status()` function takes longer than the requested delay time. That should
242
+ prevent the server from blocking when a too short delay time is requested.
243
+
244
+ Args:
245
+ seconds: the number of seconds between the monitoring calls.
246
+ Returns:
247
+ The delay that was set in milliseconds.
248
+ """
249
+ execution_time = get_average_execution_time(self.device_protocol.get_status)
250
+ self.delay = max(seconds * 1000, (execution_time + 0.2) * 1000)
251
+ return self.delay
252
+
253
+ def set_hk_delay(self, seconds) -> float:
254
+ """
255
+ Sets the delay time for housekeeping. The delay time is the time between two successive executions of the
256
+ `get_housekeeping()` function of the device protocol.
257
+
258
+ It might happen that the delay time that is set is longer than what you requested. That is the case when
259
+ the execution of the `get_housekeeping()` function takes longer than the requested delay time. That should
260
+ prevent the server from blocking when a too short delay time is requested.
261
+
262
+ Args:
263
+ seconds: the number of seconds between the housekeeping calls.
264
+ Returns:
265
+ The delay that was set in milliseconds.
266
+ """
267
+ execution_time = get_average_execution_time(self.device_protocol.get_housekeeping)
268
+ self.hk_delay = max(seconds * 1000, (execution_time + 0.2) * 1000)
269
+ return self.hk_delay
270
+
271
+ def set_logging_level(self, level):
272
+ self.logger.setLevel(level=level)
273
+
274
+ def quit(self):
275
+ self.interrupted = True
276
+
277
+ def before_serve(self):
278
+ pass
279
+
280
+ def after_serve(self):
281
+ pass
282
+
283
+ def is_storage_manager_active(self):
284
+ """
285
+ This method needs to be implemented by the subclass if you need to store information.
286
+
287
+ Note: you might want to set a specific timeout when checking for the Storage Manager.
288
+
289
+ Note: If this method returns True, the following methods shall also be implemented by the subclass:
290
+
291
+ * register_to_storage_manager()
292
+ * unregister_from_storage_manager()
293
+ * store_housekeeping_information()
294
+
295
+ """
296
+ return False
297
+
298
+ def serve(self):
299
+
300
+ self.before_serve()
301
+
302
+ # check if Storage Manager is available
303
+
304
+ storage_manager = self.is_storage_manager_active()
305
+
306
+ storage_manager and self.register_to_storage_manager()
307
+
308
+ # This approach is very simplistic and not time efficient
309
+ # We probably want to use a Timer that executes the monitoring and saving actions at
310
+ # dedicated times in the background.
311
+
312
+ # FIXME; we shall use the time.perf_counter() here!
313
+
314
+ last_time = time_in_ms()
315
+ last_time_hk = time_in_ms()
316
+
317
+ while True:
318
+ try:
319
+ socks = dict(self.poller.poll(50)) # timeout in milliseconds, do not block
320
+ except KeyboardInterrupt:
321
+ self.logger.warning("Keyboard interrupt caught!")
322
+ self.logger.warning(
323
+ "The ControlServer can not be interrupted with CTRL-C, "
324
+ "send a quit command to the server."
325
+ )
326
+ continue
327
+
328
+ if self.dev_ctrl_cmd_sock in socks:
329
+ self.device_protocol.execute()
330
+
331
+ if self.dev_ctrl_service_sock in socks:
332
+ self.service_protocol.execute()
333
+
334
+ # Now handle the periodic sending out of status information. A dictionary with the
335
+ # status or HK info is sent out periodically based on the DELAY time that is in the
336
+ # YAML config file.
337
+
338
+ if time_in_ms() - last_time >= self.delay:
339
+ last_time = time_in_ms()
340
+ # self.logger.debug("Sending status to monitoring processes.")
341
+ self.monitoring_protocol.send_status(
342
+ save_average_execution_time(self.device_protocol.get_status)
343
+ )
344
+
345
+ if time_in_ms() - last_time_hk >= self.hk_delay:
346
+ last_time_hk = time_in_ms()
347
+ if storage_manager:
348
+ # self.logger.debug("Sending housekeeping information to Storage.")
349
+ self.store_housekeeping_information(
350
+ save_average_execution_time(self.device_protocol.get_housekeeping)
351
+ )
352
+
353
+ if self.interrupted:
354
+ self.logger.info(
355
+ f"Quit command received, closing down the {self.__class__.__name__}."
356
+ )
357
+ break
358
+
359
+ # Some device protocol subclasses might start a number of threads or processes to
360
+ # support the commanding. Check if these threads/processes are still alive and
361
+ # terminate gracefully if they are not.
362
+
363
+ if not self.device_protocol.is_alive():
364
+ self.logger.error(
365
+ "Some Thread or sub-process that was started by Protocol has "
366
+ "died, terminating..."
367
+ )
368
+ break
369
+
370
+ storage_manager and self.unregister_from_storage_manager()
371
+
372
+ self.after_serve()
373
+
374
+ self.device_protocol.quit()
375
+
376
+ self.dev_ctrl_mon_sock.close()
377
+ self.dev_ctrl_service_sock.close()
378
+ self.dev_ctrl_cmd_sock.close()
379
+
380
+ close_all_zmq_handlers()
381
+
382
+ self.zcontext.term()
383
+
384
+ def store_housekeeping_information(self, data: dict):
385
+ """
386
+ Send housekeeping information to the Storage manager.
387
+
388
+ Subclasses need to overwrite this method if they want the device housekeeping information to be saved.
389
+
390
+ Args:
391
+ data (dict): a dictionary containing parameter name and value of all device housekeeping. There is also
392
+ a timestamp that represents the date/time when the HK was received from the device.
393
+ """
394
+ pass
395
+
396
+ def register_to_storage_manager(self):
397
+ """
398
+ Register this ControlServer to the Storage Manager so the housekeeping information of the device can be saved.
399
+
400
+ Subclasses need to overwrite this method if they have housekeeping information to be stored. The following
401
+ information is required for the registration:
402
+
403
+ * origin: can be retrieved from `self.get_storage_mnemonic()`
404
+ * persistence_class: one of the TYPES in egse.storage.persistence
405
+ * prep: depending on the type of the persistence class (see respective documentation)
406
+
407
+ The `egse.storage` module provides a convenience method that can be called from the method in the subclass:
408
+
409
+ >>> from egse.storage import register_to_storage_manager # noqa
410
+
411
+ Note: the `egse.storage` module might not be available, it is provided by the `cgse-core` package.
412
+ """
413
+ pass
414
+
415
+ def unregister_from_storage_manager(self):
416
+ """
417
+ Unregister this ControlServer from the Storage manager.
418
+
419
+ Subclasses need to overwrite this method. The following information is required for the registration:
420
+
421
+ * origin: can be retrieved from `self.get_storage_mnemonic()`
422
+
423
+ The `egse.storage` module provides a convenience method that can be called from the method in the subclass:
424
+
425
+ >>> from egse.storage import unregister_from_storage_manager # noqa
426
+
427
+ Note: the `egse.storage` module might not be available, it is provided by the `cgse-core` package.
428
+ """
429
+ pass