cgse-core 0.17.2__py3-none-any.whl → 0.17.3__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {cgse_core-0.17.2.dist-info → cgse_core-0.17.3.dist-info}/METADATA +1 -1
- {cgse_core-0.17.2.dist-info → cgse_core-0.17.3.dist-info}/RECORD +11 -17
- egse/connect.py +4 -477
- egse/dummy.py +3 -19
- egse/logger/__init__.py +2 -2
- egse/notifyhub/server.py +1 -3
- egse/registry/client.py +3 -9
- egse/registry/server.py +3 -11
- egse/registry/service.py +5 -11
- egse/async_control.py +0 -1085
- egse/async_control_claude.py +0 -807
- egse/confman/confman_acs.py +0 -35
- egse/metricshub/__init__.py +0 -0
- egse/metricshub/server.py +0 -271
- egse/notifyhub/test.py +0 -303
- {cgse_core-0.17.2.dist-info → cgse_core-0.17.3.dist-info}/WHEEL +0 -0
- {cgse_core-0.17.2.dist-info → cgse_core-0.17.3.dist-info}/entry_points.txt +0 -0
egse/async_control_claude.py
DELETED
|
@@ -1,807 +0,0 @@
|
|
|
1
|
-
"""
|
|
2
|
-
This module defines the abstract class for any Control Server and some convenience functions.
|
|
3
|
-
"""
|
|
4
|
-
|
|
5
|
-
import abc
|
|
6
|
-
import asyncio
|
|
7
|
-
import datetime
|
|
8
|
-
import logging
|
|
9
|
-
import pickle
|
|
10
|
-
import textwrap
|
|
11
|
-
from functools import partial
|
|
12
|
-
from typing import Callable
|
|
13
|
-
from typing import Coroutine
|
|
14
|
-
from typing import Type
|
|
15
|
-
from typing import Union
|
|
16
|
-
|
|
17
|
-
import zmq
|
|
18
|
-
import zmq.asyncio
|
|
19
|
-
|
|
20
|
-
from egse.decorators import retry
|
|
21
|
-
from egse.decorators import retry_with_exponential_backoff
|
|
22
|
-
from egse.listener import EVENT_ID
|
|
23
|
-
from egse.listener import Event
|
|
24
|
-
from egse.listener import Listeners
|
|
25
|
-
from egse.system import SignalCatcher
|
|
26
|
-
|
|
27
|
-
try:
|
|
28
|
-
# This function is only available when the cgse-core package is installed
|
|
29
|
-
from egse.logger import close_all_zmq_handlers
|
|
30
|
-
except ImportError:
|
|
31
|
-
|
|
32
|
-
def close_all_zmq_handlers(): # noqa
|
|
33
|
-
pass
|
|
34
|
-
|
|
35
|
-
|
|
36
|
-
from egse.process import ProcessStatus
|
|
37
|
-
from egse.settings import Settings
|
|
38
|
-
from egse.system import get_average_execution_time
|
|
39
|
-
from egse.system import get_average_execution_times
|
|
40
|
-
from egse.system import get_full_classname
|
|
41
|
-
from egse.system import get_host_ip
|
|
42
|
-
from egse.system import save_average_execution_time
|
|
43
|
-
|
|
44
|
-
_LOGGER = logging.getLogger(__name__)
|
|
45
|
-
|
|
46
|
-
PROCESS_SETTINGS = Settings.load("PROCESS")
|
|
47
|
-
|
|
48
|
-
|
|
49
|
-
async def is_control_server_active(endpoint: str = None, timeout: float = 0.5) -> bool:
|
|
50
|
-
"""Checks if the Control Server is running.
|
|
51
|
-
|
|
52
|
-
This function sends a *Ping* message to the Control Server and expects a *Pong* answer back within the timeout
|
|
53
|
-
period.
|
|
54
|
-
|
|
55
|
-
Args:
|
|
56
|
-
endpoint (str): Endpoint to connect to, i.e. <protocol>://<address>:<port>
|
|
57
|
-
timeout (float): Timeout when waiting for a reply [s, default=0.5]
|
|
58
|
-
|
|
59
|
-
Returns: True if the Control Server is running and replied with the expected answer; False otherwise.
|
|
60
|
-
"""
|
|
61
|
-
|
|
62
|
-
if endpoint is None:
|
|
63
|
-
raise ValueError(
|
|
64
|
-
"endpoint argument not provided, please provide a string with this format: '<protocol>://<address>:<port>'"
|
|
65
|
-
)
|
|
66
|
-
|
|
67
|
-
ctx = zmq.asyncio.Context.instance()
|
|
68
|
-
|
|
69
|
-
return_code = False
|
|
70
|
-
|
|
71
|
-
try:
|
|
72
|
-
socket = ctx.socket(zmq.REQ)
|
|
73
|
-
socket.connect(endpoint)
|
|
74
|
-
data = pickle.dumps("Ping")
|
|
75
|
-
await socket.send(data)
|
|
76
|
-
|
|
77
|
-
# Use asyncio.wait_for instead of zmq.select
|
|
78
|
-
try:
|
|
79
|
-
data = await asyncio.wait_for(socket.recv(), timeout=timeout)
|
|
80
|
-
response = pickle.loads(data)
|
|
81
|
-
return_code = response == "Pong"
|
|
82
|
-
except asyncio.TimeoutError:
|
|
83
|
-
pass
|
|
84
|
-
|
|
85
|
-
socket.close(linger=0)
|
|
86
|
-
except Exception as exc:
|
|
87
|
-
_LOGGER.warning(f"Caught an exception while pinging a control server at {endpoint}: {exc}.")
|
|
88
|
-
|
|
89
|
-
return return_code
|
|
90
|
-
|
|
91
|
-
|
|
92
|
-
# Synchronous version for backward compatibility
|
|
93
|
-
def is_control_server_active_sync(endpoint: str = None, timeout: float = 0.5) -> bool:
|
|
94
|
-
"""Synchronous version of is_control_server_active for backward compatibility.
|
|
95
|
-
|
|
96
|
-
This function runs the async version in a new event loop.
|
|
97
|
-
"""
|
|
98
|
-
if endpoint is None:
|
|
99
|
-
raise ValueError(
|
|
100
|
-
"endpoint argument not provided, please provide a string with this format: '<protocol>://<address>:<port>'"
|
|
101
|
-
)
|
|
102
|
-
|
|
103
|
-
# Create a new event loop for this function call
|
|
104
|
-
loop = asyncio.new_event_loop()
|
|
105
|
-
try:
|
|
106
|
-
return loop.run_until_complete(is_control_server_active(endpoint, timeout))
|
|
107
|
-
finally:
|
|
108
|
-
loop.close()
|
|
109
|
-
|
|
110
|
-
|
|
111
|
-
# Helper function to periodically run a coroutine
|
|
112
|
-
async def do_every_async(interval: float, coroutine: Callable[[], Coroutine]) -> None:
|
|
113
|
-
"""Run a coroutine every interval seconds"""
|
|
114
|
-
while True:
|
|
115
|
-
await coroutine()
|
|
116
|
-
await asyncio.sleep(interval)
|
|
117
|
-
|
|
118
|
-
|
|
119
|
-
class ControlServer(abc.ABC):
|
|
120
|
-
"""Base class for all device control servers and for the Storage Manager and Configuration Manager.
|
|
121
|
-
|
|
122
|
-
A Control Server reads commands from a ZeroMQ socket and executes these commands by calling the `execute()` method
|
|
123
|
-
of the commanding protocol class.
|
|
124
|
-
|
|
125
|
-
The subclass shall define the following:
|
|
126
|
-
|
|
127
|
-
- Define the device protocol class -> `self.device_protocol`
|
|
128
|
-
- Bind the command socket to the device protocol -> `self.dev_ctrl_cmd_sock`
|
|
129
|
-
- Register the command socket in the poll set -> `self.poller`
|
|
130
|
-
|
|
131
|
-
"""
|
|
132
|
-
|
|
133
|
-
def __init__(self):
|
|
134
|
-
"""Initialisation of a new Control Server."""
|
|
135
|
-
|
|
136
|
-
from egse.monitoring import MonitoringProtocol
|
|
137
|
-
from egse.services import ServiceProtocol
|
|
138
|
-
|
|
139
|
-
self._process_status = ProcessStatus()
|
|
140
|
-
self._metrics_task = None # Will be created in serve()
|
|
141
|
-
|
|
142
|
-
# The logger will be overwritten by the subclass, if not, we use this logger with the name of the subclass.
|
|
143
|
-
# That will help us to identify which subclass did not overwrite the logger attribute.
|
|
144
|
-
|
|
145
|
-
self.logger = logging.getLogger(get_full_classname(self))
|
|
146
|
-
|
|
147
|
-
self.listeners = Listeners()
|
|
148
|
-
self.scheduled_tasks = []
|
|
149
|
-
|
|
150
|
-
# Queue for sequential operations that must preserve ordering
|
|
151
|
-
self.sequential_queue = asyncio.Queue()
|
|
152
|
-
|
|
153
|
-
self.interrupted = False
|
|
154
|
-
self.mon_delay = 1000 # Delay between publish status information [ms]
|
|
155
|
-
self.hk_delay = 1000 # Delay between saving housekeeping information [ms]
|
|
156
|
-
self.scheduled_task_delay = 10 # delay time between successive executions of scheduled tasks [seconds]
|
|
157
|
-
|
|
158
|
-
self.zcontext = zmq.asyncio.Context.instance()
|
|
159
|
-
|
|
160
|
-
# No need for explicit poller in asyncio version
|
|
161
|
-
# Instead, we'll use asyncio.gather with tasks for each socket
|
|
162
|
-
|
|
163
|
-
self.device_protocol = None # This will be set in the subclass
|
|
164
|
-
self.service_protocol = ServiceProtocol(self)
|
|
165
|
-
self.monitoring_protocol = MonitoringProtocol(self)
|
|
166
|
-
|
|
167
|
-
# Set up the Control Server waiting for service requests
|
|
168
|
-
self.dev_ctrl_service_sock = self.zcontext.socket(zmq.REP)
|
|
169
|
-
self.service_protocol.bind(self.dev_ctrl_service_sock)
|
|
170
|
-
|
|
171
|
-
# Set up the Control Server for sending monitoring info
|
|
172
|
-
self.dev_ctrl_mon_sock = self.zcontext.socket(zmq.PUB)
|
|
173
|
-
self.monitoring_protocol.bind(self.dev_ctrl_mon_sock)
|
|
174
|
-
|
|
175
|
-
# Set up the Control Server waiting for device commands.
|
|
176
|
-
# The device protocol shall bind the socket in the subclass
|
|
177
|
-
self.dev_ctrl_cmd_sock = self.zcontext.socket(zmq.REP)
|
|
178
|
-
|
|
179
|
-
# Tasks will be created in serve()
|
|
180
|
-
self.tasks = []
|
|
181
|
-
self.event_loop = None
|
|
182
|
-
|
|
183
|
-
@abc.abstractmethod
|
|
184
|
-
def get_communication_protocol(self) -> str:
|
|
185
|
-
"""Returns the communication protocol used by the Control Server.
|
|
186
|
-
|
|
187
|
-
Returns:
|
|
188
|
-
Communication protocol used by the Control Server, as specified in the settings.
|
|
189
|
-
"""
|
|
190
|
-
|
|
191
|
-
pass
|
|
192
|
-
|
|
193
|
-
@abc.abstractmethod
|
|
194
|
-
def get_commanding_port(self) -> int:
|
|
195
|
-
"""Returns the commanding port used by the Control Server.
|
|
196
|
-
|
|
197
|
-
Returns:
|
|
198
|
-
Commanding port used by the Control Server, as specified in the settings.
|
|
199
|
-
"""
|
|
200
|
-
|
|
201
|
-
pass
|
|
202
|
-
|
|
203
|
-
@abc.abstractmethod
|
|
204
|
-
def get_service_port(self) -> int:
|
|
205
|
-
"""Returns the service port used by the Control Server.
|
|
206
|
-
|
|
207
|
-
Returns:
|
|
208
|
-
Service port used by the Control Server, as specified in the settings.
|
|
209
|
-
"""
|
|
210
|
-
|
|
211
|
-
pass
|
|
212
|
-
|
|
213
|
-
@abc.abstractmethod
|
|
214
|
-
def get_monitoring_port(self) -> int:
|
|
215
|
-
"""Returns the monitoring port used by the Control Server.
|
|
216
|
-
|
|
217
|
-
Returns:
|
|
218
|
-
Monitoring port used by the Control Server, as specified in the settings.
|
|
219
|
-
"""
|
|
220
|
-
|
|
221
|
-
pass
|
|
222
|
-
|
|
223
|
-
@staticmethod
|
|
224
|
-
def get_ip_address() -> str:
|
|
225
|
-
"""Returns the IP address of the current host."""
|
|
226
|
-
return get_host_ip()
|
|
227
|
-
|
|
228
|
-
def get_storage_mnemonic(self) -> str:
|
|
229
|
-
"""Returns the storage mnemonics used by the Control Server.
|
|
230
|
-
|
|
231
|
-
This is a string that will appear in the filename with the housekeeping information of the device, as a way of
|
|
232
|
-
identifying the device. If this is not implemented in the subclass, then the class name will be used.
|
|
233
|
-
|
|
234
|
-
Returns:
|
|
235
|
-
Storage mnemonics used by the Control Server, as specified in the settings.
|
|
236
|
-
"""
|
|
237
|
-
|
|
238
|
-
return self.__class__.__name__
|
|
239
|
-
|
|
240
|
-
def get_process_status(self) -> dict:
|
|
241
|
-
"""Returns the process status of the Control Server.
|
|
242
|
-
|
|
243
|
-
Returns:
|
|
244
|
-
Dictionary with the process status of the Control Server.
|
|
245
|
-
"""
|
|
246
|
-
|
|
247
|
-
return self._process_status.as_dict()
|
|
248
|
-
|
|
249
|
-
def get_average_execution_times(self) -> dict:
|
|
250
|
-
"""Returns the average execution times of all functions that have been monitored by this process.
|
|
251
|
-
|
|
252
|
-
Returns:
|
|
253
|
-
Dictionary with the average execution times of all functions that have been monitored by this process.
|
|
254
|
-
The dictionary keys are the function names, and the values are the average execution times in ms.
|
|
255
|
-
"""
|
|
256
|
-
|
|
257
|
-
return get_average_execution_times()
|
|
258
|
-
|
|
259
|
-
def set_mon_delay(self, seconds: float) -> float:
|
|
260
|
-
"""Sets the delay time for monitoring.
|
|
261
|
-
|
|
262
|
-
The delay time is the time between two successive executions of the `get_status()` function of the device
|
|
263
|
-
protocol.
|
|
264
|
-
|
|
265
|
-
It might happen that the delay time that is set is longer than what you requested. That is the case when the
|
|
266
|
-
execution of the `get_status()` function takes longer than the requested delay time. That should prevent the
|
|
267
|
-
server from blocking when a too short delay time is requested.
|
|
268
|
-
|
|
269
|
-
Args:
|
|
270
|
-
seconds (float): Number of seconds between the monitoring calls
|
|
271
|
-
|
|
272
|
-
Returns:
|
|
273
|
-
Delay that was set [ms].
|
|
274
|
-
"""
|
|
275
|
-
|
|
276
|
-
execution_time = get_average_execution_time(self.device_protocol.get_status)
|
|
277
|
-
self.mon_delay = max(seconds * 1000, (execution_time + 0.2) * 1000)
|
|
278
|
-
|
|
279
|
-
return self.mon_delay
|
|
280
|
-
|
|
281
|
-
def set_hk_delay(self, seconds: float) -> float:
|
|
282
|
-
"""Sets the delay time for housekeeping.
|
|
283
|
-
|
|
284
|
-
The delay time is the time between two successive executions of the `get_housekeeping()` function of the device
|
|
285
|
-
protocol.
|
|
286
|
-
|
|
287
|
-
It might happen that the delay time that is set is longer than what you requested. That is the case when the
|
|
288
|
-
execution of the `get_housekeeping()` function takes longer than the requested delay time. That should prevent
|
|
289
|
-
the server from blocking when a too short delay time is requested.
|
|
290
|
-
|
|
291
|
-
Args:
|
|
292
|
-
seconds (float): Number of seconds between the housekeeping calls
|
|
293
|
-
|
|
294
|
-
Returns:
|
|
295
|
-
Delay that was set [ms].
|
|
296
|
-
"""
|
|
297
|
-
|
|
298
|
-
execution_time = get_average_execution_time(self.device_protocol.get_housekeeping)
|
|
299
|
-
self.hk_delay = max(seconds * 1000, (execution_time + 0.2) * 1000)
|
|
300
|
-
|
|
301
|
-
return self.hk_delay
|
|
302
|
-
|
|
303
|
-
def set_scheduled_task_delay(self, seconds: float):
|
|
304
|
-
"""
|
|
305
|
-
Sets the delay time between successive executions of scheduled tasks.
|
|
306
|
-
|
|
307
|
-
Args:
|
|
308
|
-
seconds: the time interval between two successive executions [seconds]
|
|
309
|
-
|
|
310
|
-
"""
|
|
311
|
-
self.scheduled_task_delay = seconds
|
|
312
|
-
|
|
313
|
-
def set_logging_level(self, level: Union[int, str]) -> None:
|
|
314
|
-
"""Sets the logging level to the given level.
|
|
315
|
-
|
|
316
|
-
Allowed logging levels are:
|
|
317
|
-
|
|
318
|
-
- "CRITICAL" or "FATAL" or 50
|
|
319
|
-
- "ERROR" or 40
|
|
320
|
-
- "WARNING" or "WARN" or 30
|
|
321
|
-
- "INFO" or 20
|
|
322
|
-
- "DEBUG" or 10
|
|
323
|
-
- "NOTSET" or 0
|
|
324
|
-
|
|
325
|
-
Args:
|
|
326
|
-
level (int | str): Logging level to use, specified as either a string or an integer
|
|
327
|
-
"""
|
|
328
|
-
|
|
329
|
-
self.logger.setLevel(level=level)
|
|
330
|
-
|
|
331
|
-
def quit(self) -> None:
|
|
332
|
-
"""Interrupts the Control Server."""
|
|
333
|
-
|
|
334
|
-
self.interrupted = True
|
|
335
|
-
if self.event_loop:
|
|
336
|
-
for task in self.tasks:
|
|
337
|
-
if not task.done():
|
|
338
|
-
task.cancel()
|
|
339
|
-
|
|
340
|
-
async def before_serve(self) -> None:
|
|
341
|
-
"""
|
|
342
|
-
This method needs to be overridden by the subclass if certain actions need to be executed before the control
|
|
343
|
-
server is activated.
|
|
344
|
-
"""
|
|
345
|
-
|
|
346
|
-
pass
|
|
347
|
-
|
|
348
|
-
async def after_serve(self) -> None:
|
|
349
|
-
"""
|
|
350
|
-
This method needs to be overridden by the subclass if certain actions need to be executed after the control
|
|
351
|
-
server has been deactivated.
|
|
352
|
-
"""
|
|
353
|
-
|
|
354
|
-
pass
|
|
355
|
-
|
|
356
|
-
async def is_storage_manager_active(self) -> bool:
|
|
357
|
-
"""Checks if the Storage Manager is active.
|
|
358
|
-
|
|
359
|
-
This method has to be implemented by the subclass if you need to store information.
|
|
360
|
-
|
|
361
|
-
Note: You might want to set a specific timeout when checking for the Storage Manager.
|
|
362
|
-
|
|
363
|
-
Note: If this method returns True, the following methods shall also be implemented by the subclass:
|
|
364
|
-
|
|
365
|
-
- register_to_storage_manager()
|
|
366
|
-
- unregister_from_storage_manager()
|
|
367
|
-
- store_housekeeping_information()
|
|
368
|
-
|
|
369
|
-
Returns:
|
|
370
|
-
True if the Storage Manager is active; False otherwise.
|
|
371
|
-
"""
|
|
372
|
-
|
|
373
|
-
return False
|
|
374
|
-
|
|
375
|
-
async def handle_scheduled_tasks(self):
|
|
376
|
-
"""
|
|
377
|
-
Executes or reschedules tasks in the `serve()` event loop.
|
|
378
|
-
"""
|
|
379
|
-
self.scheduled_tasks.reverse()
|
|
380
|
-
rescheduled_tasks = []
|
|
381
|
-
while self.scheduled_tasks:
|
|
382
|
-
task_info = self.scheduled_tasks.pop()
|
|
383
|
-
task = task_info["task"]
|
|
384
|
-
task_name = task_info.get("name")
|
|
385
|
-
|
|
386
|
-
at = task_info.get("after")
|
|
387
|
-
if at and at > datetime.datetime.now(tz=datetime.timezone.utc):
|
|
388
|
-
rescheduled_tasks.append(task_info)
|
|
389
|
-
continue
|
|
390
|
-
|
|
391
|
-
condition = task_info.get("when")
|
|
392
|
-
if condition and not condition():
|
|
393
|
-
self.logger.debug(
|
|
394
|
-
f"Task {task_name} rescheduled in {self.scheduled_task_delay}s, condition not met...."
|
|
395
|
-
)
|
|
396
|
-
self.logger.info(f"Task {task_name} rescheduled in {self.scheduled_task_delay}s")
|
|
397
|
-
current_time = datetime.datetime.now(tz=datetime.timezone.utc)
|
|
398
|
-
scheduled_time = current_time + datetime.timedelta(seconds=self.scheduled_task_delay)
|
|
399
|
-
task_info["after"] = scheduled_time
|
|
400
|
-
rescheduled_tasks.append(task_info)
|
|
401
|
-
continue
|
|
402
|
-
|
|
403
|
-
self.logger.debug(f"Running scheduled task: {task_name}")
|
|
404
|
-
try:
|
|
405
|
-
# Handle both regular functions and coroutines
|
|
406
|
-
if asyncio.iscoroutinefunction(task):
|
|
407
|
-
await task()
|
|
408
|
-
else:
|
|
409
|
-
task()
|
|
410
|
-
except Exception as exc:
|
|
411
|
-
self.logger.error(f"Task {task_name} has failed: {exc!r}")
|
|
412
|
-
self.logger.info(f"Task {task_name} rescheduled in {self.scheduled_task_delay}s")
|
|
413
|
-
current_time = datetime.datetime.now(tz=datetime.timezone.utc)
|
|
414
|
-
scheduled_time = current_time + datetime.timedelta(seconds=self.scheduled_task_delay)
|
|
415
|
-
task_info["after"] = scheduled_time
|
|
416
|
-
rescheduled_tasks.append(task_info)
|
|
417
|
-
else:
|
|
418
|
-
self.logger.debug(f"Scheduled task finished: {task_name}")
|
|
419
|
-
|
|
420
|
-
if self.scheduled_tasks:
|
|
421
|
-
self.logger.warning(f"There are still {len(self.scheduled_tasks)} scheduled tasks.")
|
|
422
|
-
|
|
423
|
-
if rescheduled_tasks:
|
|
424
|
-
self.scheduled_tasks.extend(rescheduled_tasks)
|
|
425
|
-
|
|
426
|
-
def schedule_task(self, callback: Union[Callable, Coroutine], after: float = 0.0, when: Callable = None):
|
|
427
|
-
"""
|
|
428
|
-
Schedules a task to run in the control server event loop.
|
|
429
|
-
|
|
430
|
-
The `callback` function will be executed as soon as possible in the `serve()` event loop.
|
|
431
|
-
|
|
432
|
-
Some simple scheduling options are available:
|
|
433
|
-
|
|
434
|
-
* after: the task will only execute 'x' seconds after the time of scheduling. I.e.
|
|
435
|
-
the task will be rescheduled until time > scheduled time + 'x' seconds.
|
|
436
|
-
* when: the task will only execute when the condition is True.
|
|
437
|
-
|
|
438
|
-
The `after` and the `when` arguments can be combined.
|
|
439
|
-
|
|
440
|
-
Note:
|
|
441
|
-
* This function is intended to be used in order to prevent a deadlock.
|
|
442
|
-
* Since the `callback` function is executed in the `serve()` event loop, it shall not block!
|
|
443
|
-
|
|
444
|
-
"""
|
|
445
|
-
try:
|
|
446
|
-
name = callback.func.__name__ if isinstance(callback, partial) else callback.__name__
|
|
447
|
-
except AttributeError:
|
|
448
|
-
name = "unknown"
|
|
449
|
-
|
|
450
|
-
current_time = datetime.datetime.now(tz=datetime.timezone.utc)
|
|
451
|
-
scheduled_time = current_time + datetime.timedelta(seconds=after)
|
|
452
|
-
|
|
453
|
-
self.logger.info(f"Task {name} scheduled")
|
|
454
|
-
|
|
455
|
-
self.scheduled_tasks.append({"task": callback, "name": name, "after": scheduled_time, "when": when})
|
|
456
|
-
|
|
457
|
-
async def process_device_command(self):
|
|
458
|
-
"""Handle commands for the device protocol"""
|
|
459
|
-
while not self.interrupted:
|
|
460
|
-
try:
|
|
461
|
-
# Check if there's a command pending with non-blocking recv
|
|
462
|
-
try:
|
|
463
|
-
# Use poll with a short timeout to check for messages
|
|
464
|
-
events = await self.dev_ctrl_cmd_sock.poll(timeout=50, flags=zmq.POLLIN)
|
|
465
|
-
if events == zmq.POLLIN:
|
|
466
|
-
# If we have a command, we can either:
|
|
467
|
-
# 1. Process it directly (parallel to other operations)
|
|
468
|
-
# await self.device_protocol.execute_async()
|
|
469
|
-
|
|
470
|
-
# 2. Or enqueue it for sequential processing if order matters
|
|
471
|
-
self.enqueue_sequential_operation(self.device_protocol.execute_async)
|
|
472
|
-
except Exception as exc:
|
|
473
|
-
self.logger.error(f"Error checking for device command: {exc}")
|
|
474
|
-
await asyncio.sleep(0.05)
|
|
475
|
-
|
|
476
|
-
await asyncio.sleep(0.01) # Short sleep to prevent CPU hogging
|
|
477
|
-
except asyncio.CancelledError:
|
|
478
|
-
break
|
|
479
|
-
except Exception as exc:
|
|
480
|
-
self.logger.error(f"Error in device command processor: {exc}")
|
|
481
|
-
|
|
482
|
-
async def process_service_command(self):
|
|
483
|
-
"""Handle commands for the service protocol"""
|
|
484
|
-
while not self.interrupted:
|
|
485
|
-
try:
|
|
486
|
-
# Check if there's a command pending with non-blocking recv
|
|
487
|
-
try:
|
|
488
|
-
# Use poll with a short timeout to check for messages
|
|
489
|
-
events = await self.dev_ctrl_service_sock.poll(timeout=50, flags=zmq.POLLIN)
|
|
490
|
-
if events == zmq.POLLIN:
|
|
491
|
-
# If we have a command, we can either:
|
|
492
|
-
# 1. Process it directly (parallel to other operations)
|
|
493
|
-
# await self.service_protocol.execute_async()
|
|
494
|
-
|
|
495
|
-
# 2. Or enqueue it for sequential processing if order matters
|
|
496
|
-
self.enqueue_sequential_operation(self.service_protocol.execute_async)
|
|
497
|
-
except Exception as exc:
|
|
498
|
-
self.logger.error(f"Error checking for service command: {exc}")
|
|
499
|
-
await asyncio.sleep(0.05)
|
|
500
|
-
|
|
501
|
-
await asyncio.sleep(0.01) # Short sleep to prevent CPU hogging
|
|
502
|
-
except asyncio.CancelledError:
|
|
503
|
-
break
|
|
504
|
-
except Exception as exc:
|
|
505
|
-
self.logger.error(f"Error in service command processor: {exc}")
|
|
506
|
-
|
|
507
|
-
async def send_status_updates(self):
|
|
508
|
-
"""Send periodic status updates"""
|
|
509
|
-
while not self.interrupted:
|
|
510
|
-
try:
|
|
511
|
-
# Convert milliseconds to seconds for asyncio.sleep
|
|
512
|
-
await asyncio.sleep(self.mon_delay / 1000)
|
|
513
|
-
|
|
514
|
-
# Create a coroutine for the status update
|
|
515
|
-
async def status_update_operation():
|
|
516
|
-
try:
|
|
517
|
-
status = save_average_execution_time(self.device_protocol.get_status)
|
|
518
|
-
await self.monitoring_protocol.send_status_async(status)
|
|
519
|
-
except Exception as exc:
|
|
520
|
-
_LOGGER.error(
|
|
521
|
-
textwrap.dedent(
|
|
522
|
-
f"""\
|
|
523
|
-
An Exception occurred while collecting status info from the control server \
|
|
524
|
-
{self.__class__.__name__}.
|
|
525
|
-
This might be a temporary problem, still needs to be looked into:
|
|
526
|
-
|
|
527
|
-
{exc}
|
|
528
|
-
"""
|
|
529
|
-
)
|
|
530
|
-
)
|
|
531
|
-
|
|
532
|
-
# You can choose to run status updates sequentially if they must be in order
|
|
533
|
-
# with other operations from the polling loop
|
|
534
|
-
self.enqueue_sequential_operation(status_update_operation)
|
|
535
|
-
|
|
536
|
-
# Or run them independently if order doesn't matter:
|
|
537
|
-
# await status_update_operation()
|
|
538
|
-
|
|
539
|
-
except asyncio.CancelledError:
|
|
540
|
-
break
|
|
541
|
-
except Exception as exc:
|
|
542
|
-
self.logger.error(f"Error sending status: {exc}")
|
|
543
|
-
|
|
544
|
-
async def send_housekeeping_updates(self):
|
|
545
|
-
"""Send periodic housekeeping updates if storage manager is active"""
|
|
546
|
-
storage_manager = await self.is_storage_manager_active()
|
|
547
|
-
if not storage_manager:
|
|
548
|
-
return
|
|
549
|
-
|
|
550
|
-
while not self.interrupted:
|
|
551
|
-
try:
|
|
552
|
-
# Convert milliseconds to seconds for asyncio.sleep
|
|
553
|
-
await asyncio.sleep(self.hk_delay / 1000)
|
|
554
|
-
|
|
555
|
-
# Create a coroutine for the housekeeping update
|
|
556
|
-
async def housekeeping_update_operation():
|
|
557
|
-
try:
|
|
558
|
-
housekeeping = save_average_execution_time(self.device_protocol.get_housekeeping)
|
|
559
|
-
await self.store_housekeeping_information(housekeeping)
|
|
560
|
-
except Exception as exc:
|
|
561
|
-
_LOGGER.error(
|
|
562
|
-
textwrap.dedent(
|
|
563
|
-
f"""\
|
|
564
|
-
An Exception occurred while collecting housekeeping from the device to be stored in \
|
|
565
|
-
{self.get_storage_mnemonic()}.
|
|
566
|
-
This might be a temporary problem, still needs to be looked into:
|
|
567
|
-
|
|
568
|
-
{exc}
|
|
569
|
-
"""
|
|
570
|
-
)
|
|
571
|
-
)
|
|
572
|
-
|
|
573
|
-
# You can choose to run housekeeping updates sequentially if they must be in order
|
|
574
|
-
# with other operations from the polling loop
|
|
575
|
-
self.enqueue_sequential_operation(housekeeping_update_operation)
|
|
576
|
-
|
|
577
|
-
# Or run them independently if order doesn't matter:
|
|
578
|
-
# await housekeeping_update_operation()
|
|
579
|
-
|
|
580
|
-
except asyncio.CancelledError:
|
|
581
|
-
break
|
|
582
|
-
except Exception as exc:
|
|
583
|
-
self.logger.error(f"Error sending housekeeping: {exc}")
|
|
584
|
-
|
|
585
|
-
async def process_scheduled_tasks(self):
|
|
586
|
-
"""Process scheduled tasks periodically"""
|
|
587
|
-
while not self.interrupted:
|
|
588
|
-
try:
|
|
589
|
-
# Create a coroutine for handling scheduled tasks
|
|
590
|
-
async def scheduled_tasks_operation():
|
|
591
|
-
await self.handle_scheduled_tasks()
|
|
592
|
-
|
|
593
|
-
# You can choose to run scheduled tasks sequentially if they must
|
|
594
|
-
# maintain order with other operations from the polling loop
|
|
595
|
-
self.enqueue_sequential_operation(scheduled_tasks_operation)
|
|
596
|
-
|
|
597
|
-
# Or run them independently if order doesn't matter:
|
|
598
|
-
# await scheduled_tasks_operation()
|
|
599
|
-
|
|
600
|
-
await asyncio.sleep(0.05) # Small sleep to not hog CPU
|
|
601
|
-
except asyncio.CancelledError:
|
|
602
|
-
break
|
|
603
|
-
except Exception as exc:
|
|
604
|
-
self.logger.error(f"Error processing scheduled tasks: {exc}")
|
|
605
|
-
|
|
606
|
-
async def update_metrics(self):
|
|
607
|
-
"""Update process metrics periodically"""
|
|
608
|
-
while not self.interrupted:
|
|
609
|
-
try:
|
|
610
|
-
self._process_status.update()
|
|
611
|
-
await asyncio.sleep(PROCESS_SETTINGS.METRICS_INTERVAL)
|
|
612
|
-
except asyncio.CancelledError:
|
|
613
|
-
break
|
|
614
|
-
except Exception as exc:
|
|
615
|
-
self.logger.error(f"Error updating metrics: {exc}")
|
|
616
|
-
|
|
617
|
-
async def check_device_protocol_alive(self):
|
|
618
|
-
"""Check if device protocol is still alive"""
|
|
619
|
-
while not self.interrupted:
|
|
620
|
-
try:
|
|
621
|
-
await asyncio.sleep(1.0) # Check every second
|
|
622
|
-
|
|
623
|
-
if not self.device_protocol.is_alive():
|
|
624
|
-
self.logger.error(
|
|
625
|
-
"Some Thread or sub-process that was started by Protocol has died, terminating..."
|
|
626
|
-
)
|
|
627
|
-
self.quit()
|
|
628
|
-
break
|
|
629
|
-
except asyncio.CancelledError:
|
|
630
|
-
break
|
|
631
|
-
except Exception as exc:
|
|
632
|
-
self.logger.error(f"Error checking if device protocol is alive: {exc}")
|
|
633
|
-
|
|
634
|
-
async def process_sequential_queue(self):
|
|
635
|
-
"""
|
|
636
|
-
Process operations that need to be executed sequentially.
|
|
637
|
-
This ensures that certain operations maintain the same order as in the original polling loop.
|
|
638
|
-
"""
|
|
639
|
-
while not self.interrupted:
|
|
640
|
-
try:
|
|
641
|
-
# Get operation from queue with timeout to allow checking for interruption
|
|
642
|
-
try:
|
|
643
|
-
operation = await asyncio.wait_for(self.sequential_queue.get(), 0.1)
|
|
644
|
-
await operation()
|
|
645
|
-
self.sequential_queue.task_done()
|
|
646
|
-
except asyncio.TimeoutError:
|
|
647
|
-
continue
|
|
648
|
-
except asyncio.CancelledError:
|
|
649
|
-
break
|
|
650
|
-
except Exception as exc:
|
|
651
|
-
self.logger.error(f"Error processing sequential operation: {exc}")
|
|
652
|
-
|
|
653
|
-
def enqueue_sequential_operation(self, coroutine_func):
|
|
654
|
-
"""
|
|
655
|
-
Add an operation to the sequential queue.
|
|
656
|
-
This ensures the operation will run in order with other sequential operations.
|
|
657
|
-
|
|
658
|
-
Args:
|
|
659
|
-
coroutine_func: A coroutine function (async function) to be executed sequentially
|
|
660
|
-
"""
|
|
661
|
-
if self.sequential_queue is not None: # Check if server is initialized
|
|
662
|
-
self.sequential_queue.put_nowait(coroutine_func)
|
|
663
|
-
|
|
664
|
-
async def serve(self) -> None:
|
|
665
|
-
"""Activation of the Control Server.
|
|
666
|
-
|
|
667
|
-
This comprises the following steps:
|
|
668
|
-
|
|
669
|
-
- Executing the `before_serve` method;
|
|
670
|
-
- Checking if the Storage Manager is active and registering the Control Server to it;
|
|
671
|
-
- Start accepting (listening to) commands;
|
|
672
|
-
- Start sending out monitoring information;
|
|
673
|
-
- Start sending out housekeeping information;
|
|
674
|
-
- Start listening for quit commands;
|
|
675
|
-
- After a quit command has been received:
|
|
676
|
-
- Unregister from the Storage Manager;
|
|
677
|
-
- Execute the `after_serve` method;
|
|
678
|
-
- Close all sockets;
|
|
679
|
-
- Clean up all tasks.
|
|
680
|
-
"""
|
|
681
|
-
# Store reference to event loop
|
|
682
|
-
self.event_loop = asyncio.get_event_loop()
|
|
683
|
-
|
|
684
|
-
# Execute before_serve hook
|
|
685
|
-
await self.before_serve()
|
|
686
|
-
|
|
687
|
-
# Check if Storage Manager is available
|
|
688
|
-
storage_manager = await self.is_storage_manager_active()
|
|
689
|
-
if storage_manager:
|
|
690
|
-
await self.register_to_storage_manager()
|
|
691
|
-
|
|
692
|
-
# Set up signal handler
|
|
693
|
-
killer = SignalCatcher()
|
|
694
|
-
|
|
695
|
-
# Create tasks for each aspect of the control server
|
|
696
|
-
self.tasks = [
|
|
697
|
-
asyncio.create_task(self.process_device_command()),
|
|
698
|
-
asyncio.create_task(self.process_service_command()),
|
|
699
|
-
asyncio.create_task(self.send_status_updates()),
|
|
700
|
-
asyncio.create_task(self.process_scheduled_tasks()),
|
|
701
|
-
asyncio.create_task(self.update_metrics()),
|
|
702
|
-
asyncio.create_task(self.check_device_protocol_alive()),
|
|
703
|
-
asyncio.create_task(self.process_sequential_queue()), # Add sequential queue processor
|
|
704
|
-
]
|
|
705
|
-
|
|
706
|
-
# Add housekeeping task if storage manager is active
|
|
707
|
-
if storage_manager:
|
|
708
|
-
self.tasks.append(asyncio.create_task(self.send_housekeeping_updates()))
|
|
709
|
-
|
|
710
|
-
# Wait for interruption or signal
|
|
711
|
-
try:
|
|
712
|
-
while not self.interrupted and not killer.term_signal_received:
|
|
713
|
-
await asyncio.sleep(0.1)
|
|
714
|
-
|
|
715
|
-
if killer.term_signal_received:
|
|
716
|
-
self.logger.info(f"TERM Signal received, closing down the {self.__class__.__name__}.")
|
|
717
|
-
break
|
|
718
|
-
|
|
719
|
-
if self.interrupted:
|
|
720
|
-
self.logger.info(f"Quit command received, closing down the {self.__class__.__name__}.")
|
|
721
|
-
break
|
|
722
|
-
|
|
723
|
-
except asyncio.CancelledError:
|
|
724
|
-
self.logger.info("Main server loop cancelled.")
|
|
725
|
-
finally:
|
|
726
|
-
# Cancel all running tasks
|
|
727
|
-
for task in self.tasks:
|
|
728
|
-
if not task.done():
|
|
729
|
-
task.cancel()
|
|
730
|
-
|
|
731
|
-
# Wait for tasks to complete their cancellation
|
|
732
|
-
if self.tasks:
|
|
733
|
-
try:
|
|
734
|
-
await asyncio.gather(*self.tasks, return_exceptions=True)
|
|
735
|
-
except asyncio.CancelledError:
|
|
736
|
-
pass
|
|
737
|
-
|
|
738
|
-
# Cleanup
|
|
739
|
-
if storage_manager:
|
|
740
|
-
await self.unregister_from_storage_manager()
|
|
741
|
-
|
|
742
|
-
await self.after_serve()
|
|
743
|
-
|
|
744
|
-
await self.device_protocol.quit_async()
|
|
745
|
-
|
|
746
|
-
self.dev_ctrl_mon_sock.close(linger=0)
|
|
747
|
-
self.dev_ctrl_service_sock.close(linger=0)
|
|
748
|
-
self.dev_ctrl_cmd_sock.close(linger=0)
|
|
749
|
-
|
|
750
|
-
close_all_zmq_handlers()
|
|
751
|
-
|
|
752
|
-
self.zcontext.term()
|
|
753
|
-
|
|
754
|
-
async def store_housekeeping_information(self, data: dict) -> None:
|
|
755
|
-
"""Sends housekeeping information to the Storage Manager.
|
|
756
|
-
|
|
757
|
-
This method has to be overwritten by the subclasses if they want the device housekeeping information to be
|
|
758
|
-
saved.
|
|
759
|
-
|
|
760
|
-
Args:
|
|
761
|
-
data (dict): a dictionary containing parameter name and value of all device housekeeping. There is also
|
|
762
|
-
a timestamp that represents the date/time when the HK was received from the device.
|
|
763
|
-
"""
|
|
764
|
-
pass
|
|
765
|
-
|
|
766
|
-
async def register_to_storage_manager(self) -> None:
|
|
767
|
-
"""Registers this Control Server to the Storage Manager.
|
|
768
|
-
|
|
769
|
-
By doing so, the housekeeping information of the device will be sent to the Storage Manager, which will store
|
|
770
|
-
the information in a dedicated CSV file.
|
|
771
|
-
|
|
772
|
-
This method has to be overwritten by the subclasses if they have housekeeping information that must be stored.
|
|
773
|
-
|
|
774
|
-
Subclasses need to overwrite this method if they have housekeeping information to be stored.
|
|
775
|
-
|
|
776
|
-
The following information is required for the registration:
|
|
777
|
-
|
|
778
|
-
- origin: Storage mnemonic, which can be retrieved from `self.get_storage_mnemonic()`
|
|
779
|
-
- persistence_class: Persistence layer (one of the TYPES in egse.storage.persistence)
|
|
780
|
-
- prep: depending on the type of the persistence class (see respective documentation)
|
|
781
|
-
|
|
782
|
-
The `egse.storage` module provides a convenience method that can be called from the method in the subclass:
|
|
783
|
-
|
|
784
|
-
>>> from egse.storage import register_to_storage_manager_async # noqa
|
|
785
|
-
|
|
786
|
-
Note:
|
|
787
|
-
the `egse.storage` module might not be available, it is provided by the `cgse-core` package.
|
|
788
|
-
"""
|
|
789
|
-
pass
|
|
790
|
-
|
|
791
|
-
async def unregister_from_storage_manager(self) -> None:
|
|
792
|
-
"""Unregisters the Control Server from the Storage Manager.
|
|
793
|
-
|
|
794
|
-
This method has to be overwritten by the subclasses.
|
|
795
|
-
|
|
796
|
-
The following information is required for the registration:
|
|
797
|
-
|
|
798
|
-
- origin: Storage mnemonic, which can be retrieved from `self.get_storage_mnemonic()`
|
|
799
|
-
|
|
800
|
-
The `egse.storage` module provides a convenience method that can be called from the method in the subclass:
|
|
801
|
-
|
|
802
|
-
>>> from egse.storage import unregister_from_storage_manager_async # noqa
|
|
803
|
-
|
|
804
|
-
Note:
|
|
805
|
-
the `egse.storage` module might not be available, it is provided by the `cgse-core` package.
|
|
806
|
-
"""
|
|
807
|
-
pass
|