cgse-core 0.17.3__py3-none-any.whl → 0.18.1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
cgse_core/_stop.py CHANGED
@@ -1,9 +1,8 @@
1
1
  import logging
2
2
  import subprocess
3
- import sys
4
- from pathlib import Path
5
3
 
6
4
  import rich
5
+ import sys
7
6
 
8
7
  from egse.log import logger
9
8
  from egse.process import is_process_running
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: cgse-core
3
- Version: 0.17.3
3
+ Version: 0.18.1
4
4
  Summary: Core services for the CGSE framework
5
5
  Author: IvS KU Leuven
6
6
  Maintainer-email: Rik Huygen <rik.huygen@kuleuven.be>, Sara Regibo <sara.regibo@kuleuven.be>
@@ -1,15 +1,15 @@
1
1
  cgse_core/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
2
2
  cgse_core/_start.py,sha256=m0JDrtEVRRmRA3nL8kQhEKq7eWiF5lf5kQ3fq9-JrkM,2126
3
3
  cgse_core/_status.py,sha256=Quz7hNtqSZPWn8xZ9ZmmF2i5hGY_bMUoybN2UNNBsHc,3259
4
- cgse_core/_stop.py,sha256=ZXTzvWjJ3bVNmNcmsP8NP5G6AHvegamyfu_cNn8qa6A,3588
4
+ cgse_core/_stop.py,sha256=MTXtnYjrqJdwPOkQ7h1Zx-C57XeXxqrL6KijYhLL6AE,3563
5
5
  cgse_core/cgse_explore.py,sha256=8jxAxYDsjPUZftUccPCneHaqijRHxBPZuaOo0ESmBUs,460
6
6
  cgse_core/services.py,sha256=_a1d1Dcf9L0F1IF3w6kHrACE5EDnqtNUd8KzrTZDu5U,8595
7
7
  cgse_core/settings.yaml,sha256=6RcZ3aI7IUSB6GaZ8e9b_JEiql0l3l7h-VowDycv5AE,3625
8
8
  egse/_setup_core.py,sha256=ei4a7tGYfDxc16kfhrF11Tm3cOSnl48MsSGfrWu0vVg,5983
9
9
  egse/command.py,sha256=529_T619qG3Xif9UkxZS8mj2ibr6eljfbVsMM-K_AII,21965
10
- egse/connect.py,sha256=BDMzTt4URx7TpNTPyzb1Ye3Ch09enxm5yIzcxMRga_A,1834
11
- egse/control.py,sha256=u4bWsKNW0tl4gNMaSMyUA02xXwvcxCVe2TsdfJpTkMs,28424
12
- egse/dummy.py,sha256=I63wXBdC30WqnP6gz1sJ4YBHjrWumeXhiHbF49oF_ZQ,21909
10
+ egse/connect.py,sha256=QiG5G0qnCI3EeDhmfj-gBlhI9RmKvnd0dGXu0ZBIZ90,21556
11
+ egse/control.py,sha256=pKhQjp3YGkw8J4KyDTO3BUwBePSBJAQnfwqBPsZtpSs,28468
12
+ egse/dummy.py,sha256=N1v9bCOF3saY6J_ip2h1pbgregQNeT4YEpDIhhKUKps,22721
13
13
  egse/listener.py,sha256=kD62oD7w1yBVV0M2Eq5MNEQyVaJvlsl0RyhYCzVkgQs,6493
14
14
  egse/mixin.py,sha256=fklH61C7TtaTnXx7FQjjJCgUQ2PsQv5baw0SVBBdbH0,20265
15
15
  egse/monitoring.py,sha256=0Hbd5SHUU2YWFm8gbzlyPMUMjTezKap_q9yE4T6Vkk0,9716
@@ -28,30 +28,30 @@ egse/icons/simulator-mode.svg,sha256=iDU6i0nTqyeYJOY9NAIj6NvjBQWAo2XSJ__35funhXY
28
28
  egse/icons/start-process-button.svg,sha256=lvOmRzafhldlAbjKtBjuqrRDjBwyaVnd3yK2ees22JM,476
29
29
  egse/icons/stop-process-button.svg,sha256=ZysOJooxOXEbup9zd6077M36OS0S5jQQkny9jfQWBfI,456
30
30
  egse/icons/user-interface.svg,sha256=q_KWgHJ9ATdbw-HFghoaGQIBe52ZyxyAKitHpDaFWm8,2375
31
- egse/logger/__init__.py,sha256=5Nku3dpZVBdawiKZJgNyhuw5-ppm1K9lSb_p5uinp-o,10763
31
+ egse/logger/__init__.py,sha256=IhqNw4Yvm26Q5w7m3NuTAL8ZkywtN9dT4OaTpc3xkuo,10782
32
32
  egse/logger/__main__.py,sha256=_etegNrUM9IWHNTOObDVY1DiLn9SJyf8eHktVBbN-OQ,1765
33
33
  egse/logger/log_cs.py,sha256=3k-HUnfrJY2072cjF0TXBv9TyNd4GzCSzazx03zfJss,14296
34
34
  egse/notifyhub/__init__.py,sha256=pbCy5PHSR7OKcD1heaMIDO6ztkyFlpsvc-i2pyOdCpU,2438
35
35
  egse/notifyhub/client.py,sha256=VOFlxcotEm3JAe5eGMDXpCRejXzDb3-7k4gIPgcgmpY,10536
36
36
  egse/notifyhub/event.py,sha256=Yo8uvnK27uzaCLjJa8IK734Hcg3eLuD_KCd7eZ80XBQ,792
37
- egse/notifyhub/server.py,sha256=_bK45WZhu6KKuJMLLzQdDLBVz6Z_d4n0UFf77ydiDns,14520
37
+ egse/notifyhub/server.py,sha256=XW4lbmz1IUhhPUB3frbgvAWUyAXZy1Wr1ofC10vAoM0,14553
38
38
  egse/notifyhub/services.py,sha256=LpKxJfFszwoQN9Kb5f5lkjjKLbvwCfHSC2mt1WZt1wc,10494
39
39
  egse/procman/__init__.py,sha256=w2ilHHVuznkNCK-Syd9WVayktW8um8_EsKF5JQJDZK0,13946
40
40
  egse/procman/procman.yaml,sha256=miAq3GE4PSKm8QZDfjvmtBrYCt8GyzJDJm-JzzgdznQ,847
41
- egse/procman/procman_cs.py,sha256=lvuhmGdd1GmkdkXYNS9slNT9VJF09OtffZrZmCdy97I,5390
41
+ egse/procman/procman_cs.py,sha256=2FxpDI4h-3312rt84J4rcX1QNBFFst0Q1cLy5Hex3ro,5381
42
42
  egse/procman/procman_protocol.py,sha256=kFUWQb66d5aDE7qHM9xYmsO07UB-6ro2hJ-E7o_OPlE,3778
43
- egse/procman/procman_ui.py,sha256=5pU4-Dtma5aY8k4PaHQpJUe-DSwjpP-V1sJpbuUMlLg,42070
43
+ egse/procman/procman_ui.py,sha256=IzusnQymoBsiQ50lCH7gopbmNoevotC2vmRCeTTR1F8,42184
44
44
  egse/registry/__init__.py,sha256=U7ap_245Bid74nn7Ncd0GXu0aetR95dJHltGRwBCVb8,1349
45
45
  egse/registry/backend.py,sha256=-kRXncO949YETb1S2GFedhYTt1O0teiKYUUPxeQqX9E,22120
46
- egse/registry/client.py,sha256=-fivaDQSTb_SjbZWcir_MpH5hciMSJHVinuSS5RBmMU,42874
47
- egse/registry/server.py,sha256=1Zv-1VkGhpKRo_P3gRRnlN1UuXJbFnYWX3fOVtgKe-g,21869
48
- egse/registry/service.py,sha256=QnKVICWiuHmzESmy2H4VEtJ-tGX8hSV6n6qq6ejkWAY,15125
46
+ egse/registry/client.py,sha256=InUttsnCUDUjWHyFQ5RFAUckmFo8deQOdFshXcUVubs,43395
47
+ egse/registry/server.py,sha256=MTBS-f0kXtxgpxJWGtUNklZL405WEokS4rp_JpmIqQE,22296
48
+ egse/registry/service.py,sha256=r6ThO_ur9WyFb_9JkpE__C5Y2P3VAeK8j_w9SFHiIBw,15375
49
49
  egse/storage/__init__.py,sha256=xtMdHdtPT9-oqTp2bpWw7Os3qUgN8TdqZNuaj3glxn0,43147
50
50
  egse/storage/__main__.py,sha256=LI9fxlsFWmEd5LcWUB0xA8i7Yt6UHgnblB4G0aTi3pI,28
51
51
  egse/storage/persistence.py,sha256=35fvuCPuGTSCc2MfmFLLNU03xYq3CEaJQspot4f-Pvw,18274
52
52
  egse/storage/storage.yaml,sha256=l3HtPx_bAbXoV4f3_PXWAa1tP-fY2S6roSBSBiOHodE,2712
53
53
  egse/storage/storage_cs.py,sha256=172llnKef1fdiDXcnzYllw_q12bVyuGJGh_3XpeDVCU,7377
54
- cgse_core-0.17.3.dist-info/METADATA,sha256=cG8dHirqx3LtMgrrOJkxOHtUJEyCrsU8zs4OfzP1hz0,582
55
- cgse_core-0.17.3.dist-info/WHEEL,sha256=qtCwoSJWgHk21S1Kb4ihdzI2rlJ1ZKaIurTj_ngOhyQ,87
56
- cgse_core-0.17.3.dist-info/entry_points.txt,sha256=IwIG7aGgfUehol29ufcKd559S88t3TJdh1LMJ5YymCE,976
57
- cgse_core-0.17.3.dist-info/RECORD,,
54
+ cgse_core-0.18.1.dist-info/METADATA,sha256=JhEOJtC_SXdr-CnBqg_wP9Q0bkBBO5w8hqZqUYadR1g,582
55
+ cgse_core-0.18.1.dist-info/WHEEL,sha256=qtCwoSJWgHk21S1Kb4ihdzI2rlJ1ZKaIurTj_ngOhyQ,87
56
+ cgse_core-0.18.1.dist-info/entry_points.txt,sha256=IwIG7aGgfUehol29ufcKd559S88t3TJdh1LMJ5YymCE,976
57
+ cgse_core-0.18.1.dist-info/RECORD,,
egse/connect.py CHANGED
@@ -1,5 +1,12 @@
1
+ import random
2
+ import threading
3
+ import time
4
+ from enum import Enum
5
+ from typing import Any
6
+
1
7
  from egse.env import bool_env
2
8
  from egse.log import logging
9
+ from egse.system import type_name
3
10
  from egse.zmq_ser import connect_address
4
11
 
5
12
  logger = logging.getLogger("egse.connect")
@@ -10,7 +17,7 @@ VERBOSE_DEBUG = bool_env("VERBOSE_DEBUG")
10
17
 
11
18
 
12
19
  def get_endpoint(
13
- service_type: str,
20
+ service_type: str | None = None,
14
21
  protocol: str = "tcp",
15
22
  hostname: str = "localhost",
16
23
  port: int = 0,
@@ -41,15 +48,481 @@ def get_endpoint(
41
48
  endpoint = reg.get_endpoint(service_type)
42
49
  if endpoint:
43
50
  if VERBOSE_DEBUG:
44
- logger.debug(f"Endpoint for {service_type} found in registry: {endpoint}")
51
+ logger.debug(f"Endpoint for '{service_type}' found in registry: {endpoint}")
45
52
  else:
46
- logger.warning(f"No endpoint for {service_type} found in registry.")
53
+ logger.warning(f"No endpoint for '{service_type}' found in registry.")
47
54
 
48
55
  if not endpoint:
49
56
  if port == 0:
50
- raise RuntimeError(f"No service registered as {service_type} and no port provided.")
57
+ raise RuntimeError(f"No service registered as '{service_type}' and no port provided.")
51
58
  endpoint = connect_address(protocol, hostname, port)
52
59
  if VERBOSE_DEBUG:
53
60
  logger.debug(f"Endpoint constructed from protocol/hostname/port: {endpoint}")
54
61
 
55
62
  return endpoint
63
+
64
+
65
+ class ConnectionState(Enum):
66
+ DISCONNECTED = "disconnected"
67
+ CONNECTING = "connecting"
68
+ CONNECTED = "connected"
69
+ CIRCUIT_OPEN = "circuit_open" # Temporarily stopped trying
70
+
71
+
72
+ class BackoffStrategy(Enum):
73
+ """
74
+ Specifies the strategy for increasing the delay between retry attempts
75
+ in backoff algorithms to reduce load and avoid overwhelming services.
76
+
77
+ Strategies:
78
+ EXPONENTIAL:
79
+ The delay doubles with each retry attempt (e.g., 1s, 2s, 4s, 8s).
80
+ This is the most widely used approach because it quickly reduces load on struggling systems.
81
+ LINEAR:
82
+ The delay increases by a fixed amount each time (e.g., 1s, 2s, 3s, 4s).
83
+ This provides a more gradual reduction in request rate.
84
+ FIXED:
85
+ Uses the same delay between all retry attempts.
86
+ Simple but less adaptive to system conditions.
87
+
88
+ References:
89
+ - AWS Architecture Blog: Exponential Backoff And Jitter
90
+ """
91
+
92
+ EXPONENTIAL = "exponential"
93
+ """The delay doubles with each retry attempt (e.g., 1s, 2s, 4s, 8s).
94
+ This is the most widely used approach because it quickly reduces load on struggling systems."""
95
+ LINEAR = "linear"
96
+ """The delay increases by a fixed amount each time (e.g., 1s, 2s, 3s, 4s).
97
+ This provides a more gradual reduction in request rate."""
98
+ FIXED = "fixed"
99
+ """Uses the same delay between all retry attempts. Simple but less adaptive to system conditions."""
100
+
101
+
102
+ class JitterStrategy(Enum):
103
+ """
104
+ Specifies the strategy for applying jitter (randomization) to retry intervals
105
+ in backoff algorithms to avoid synchronized retries and reduce load spikes.
106
+
107
+ Strategies:
108
+ NONE:
109
+ No jitter is applied. The retry interval is deterministic.
110
+ FULL:
111
+ Applies full jitter by selecting a random value uniformly between 0 and the calculated interval.
112
+ This maximizes randomness but can result in very short delays.
113
+ EQUAL:
114
+ Applies "equal jitter" as described in the AWS Architecture Blog.
115
+ The interval is randomized within [interval/2, interval], ensuring a minimum delay of half the interval.
116
+ Note: This is not the same as "a jitter of 50% around interval" (which would be [0.5 * interval, 1.5 * interval]).
117
+ PERCENT_10:
118
+ Applies a jitter of ±10% around the base interval, resulting in a random interval within [0.9 * interval, 1.1 * interval].
119
+
120
+ References:
121
+ - AWS Architecture Blog: Exponential Backoff And Jitter
122
+ """
123
+
124
+ NONE = "none"
125
+ """No jitter is applied to the backoff."""
126
+ FULL = "full"
127
+ """Maximum distribution but can be too random with very short intervals."""
128
+ EQUAL = "equal"
129
+ """Best balance, maintains backoff properties while preventing synchronization."""
130
+ PERCENT_10 = "10%"
131
+ """Add a jitter of 10% around the base interval."""
132
+
133
+
134
+ def calculate_retry_interval(
135
+ attempt_number,
136
+ base_interval,
137
+ max_interval,
138
+ backoff_strategy: BackoffStrategy = BackoffStrategy.EXPONENTIAL,
139
+ jitter_strategy: JitterStrategy = JitterStrategy.EQUAL,
140
+ ):
141
+ """
142
+ Calculates the next retry interval based on the given backoff and jitter strategies.
143
+
144
+ Args:
145
+ attempt_number (int): The current retry attempt (starting from 0).
146
+ base_interval (float): The initial interval in seconds.
147
+ max_interval (float): The maximum allowed interval in seconds.
148
+ backoff_strategy (BackoffStrategy): Strategy for increasing the delay (exponential, linear, or fixed).
149
+ jitter_strategy (JitterStrategy): Strategy for randomizing the delay to avoid synchronization.
150
+
151
+ Returns:
152
+ float: The computed retry interval in seconds.
153
+
154
+ Notes:
155
+ - See the docstrings for BackoffStrategy and JitterStrategy for details on each strategy.
156
+ - Based on best practices from the AWS Architecture Blog: Exponential Backoff And Jitter.
157
+ """
158
+
159
+ if backoff_strategy == BackoffStrategy.EXPONENTIAL:
160
+ interval = min(base_interval * (2**attempt_number), max_interval)
161
+ elif backoff_strategy == BackoffStrategy.LINEAR:
162
+ interval = min(base_interval + attempt_number, max_interval)
163
+ else:
164
+ interval = base_interval
165
+
166
+ if jitter_strategy == JitterStrategy.NONE:
167
+ return interval
168
+ elif jitter_strategy == JitterStrategy.FULL:
169
+ return random.uniform(0, interval)
170
+ elif jitter_strategy == JitterStrategy.EQUAL:
171
+ return interval / 2 + random.uniform(0, interval / 2)
172
+ elif jitter_strategy == JitterStrategy.PERCENT_10:
173
+ jitter_amount = interval * 0.1
174
+ return interval + random.uniform(-jitter_amount, jitter_amount)
175
+
176
+ return interval
177
+
178
+
179
+ class AsyncServiceConnector:
180
+ """
181
+ Asynchronous base class for robust service connection management with retry, backoff, and circuit breaker logic.
182
+
183
+ This class is intended to be subclassed for managing persistent connections to external services
184
+ (such as devices, databases, or remote APIs) that may be unreliable or temporarily unavailable.
185
+
186
+ Features:
187
+ - Automatic retry with configurable backoff and jitter strategies.
188
+ - Circuit breaker to prevent repeated connection attempts after multiple failures.
189
+ - Connection state tracking (disconnected, connecting, connected, circuit open).
190
+
191
+ Usage:
192
+ 1. Subclass `AsyncServiceConnector` and override the `connect_to_service()` coroutine with your
193
+ actual connection logic. Optionally, override `health_check()` for custom health verification.
194
+ 2. Store the actual connection object (e.g., socket, transport) as an instance attribute in your subclass.
195
+ 3. Use `attempt_connection()` to initiate connection attempts; it will handle retries and backoff automatically.
196
+ 4. Use `is_connected()` to check connection status.
197
+
198
+ Example:
199
+ class MyConnector(AsyncServiceConnector):
200
+ async def connect_to_service(self):
201
+ self.connection = await create_socket()
202
+ return self.connection is not None
203
+
204
+ def get_connection(self):
205
+ return self.connection
206
+
207
+ Note:
208
+ The base class does not manage or expose the underlying connection object.
209
+ Your subclass should provide a method or property to access it as needed.
210
+ """
211
+
212
+ def __init__(
213
+ self,
214
+ service_name: str,
215
+ backoff_strategy: BackoffStrategy = BackoffStrategy.EXPONENTIAL,
216
+ jitter_strategy: JitterStrategy = JitterStrategy.EQUAL,
217
+ ):
218
+ self.state = ConnectionState.DISCONNECTED
219
+ self.last_attempt = 0
220
+ self.base_interval = 1
221
+ self.retry_interval = 1 # Start with 1 second
222
+ self.max_retry_interval = 300 # Max 5 minutes
223
+ self.failure_count = 0
224
+ self.max_failures_before_circuit_break = 5
225
+ self.circuit_break_duration = 60 # 1 minute
226
+ self.circuit_opened_at = None
227
+ self.backoff_strategy = backoff_strategy
228
+ self.jitter_strategy = jitter_strategy
229
+
230
+ self.service_name = service_name
231
+
232
+ async def connect_to_service(self) -> bool:
233
+ logger.warning(
234
+ f"The connect_to_service() method is not implemented for {self.service_name}, connection will always fail."
235
+ )
236
+ return False
237
+
238
+ async def disconnect_from_service(self) -> None:
239
+ """
240
+ Optional hook to cleanly disconnect / release resources for the service.
241
+ Default implementation is a no-op. Subclasses should override to:
242
+ - close async transports
243
+ - cancel background tasks
244
+ - set state to ConnectionState.DISCONNECTED
245
+ - call device.disconnect()
246
+ """
247
+ logger.debug(f"{self.service_name}: default async disconnect_from_service(): no-op")
248
+ self.state = ConnectionState.DISCONNECTED
249
+ return
250
+
251
+ async def health_check(self) -> bool:
252
+ logger.warning(
253
+ f"The health_check() method is not implemented for {self.service_name}, check will always return false."
254
+ )
255
+ return False
256
+
257
+ def should_attempt_connection(self) -> bool:
258
+ """Return True if we should attempt a new connection."""
259
+ now = time.monotonic()
260
+
261
+ # If circuit is open, check if we should close it
262
+ if self.state == ConnectionState.CIRCUIT_OPEN:
263
+ assert self.circuit_opened_at is not None
264
+ circuit_break_open_since = now - self.circuit_opened_at
265
+ logger.debug(f"{circuit_break_open_since=}")
266
+ if circuit_break_open_since > self.circuit_break_duration:
267
+ self.state = ConnectionState.DISCONNECTED
268
+ self.failure_count = 0
269
+ self.retry_interval = 1
270
+ return True
271
+ return False
272
+
273
+ # Regular backoff logic
274
+ return now - self.last_attempt >= self.retry_interval
275
+
276
+ async def attempt_connection(self):
277
+ """Try to connect to the service.
278
+
279
+ This will execute the `connect_to_service()` that was overridden by the subclass.
280
+ That function shall return True when the connection succeeded, False otherwise.
281
+ """
282
+ if self.state == ConnectionState.CONNECTED:
283
+ # ensure the CONNECTED state is validated before skipping reconnection attempts
284
+ # even is state is CONNECTED, the underlying connection could be stale or broken
285
+ # or closed externally and unless you check the health here, you will never attempt
286
+ # recovery.
287
+ try:
288
+ healthy = await self.health_check()
289
+ except Exception as exc:
290
+ logger.debug(f"health_check raised: {type_name(exc)} – {exc}")
291
+ healthy = False
292
+
293
+ if healthy:
294
+ if VERBOSE_DEBUG:
295
+ logger.debug(f"{self.service_name} already connected and healthy")
296
+ return
297
+
298
+ logger.info(
299
+ f"{self.service_name} marked CONNECTED but health_check failed — disconnecting and reconnecting"
300
+ )
301
+ self.state = ConnectionState.DISCONNECTED
302
+ try:
303
+ # ensure the state is updated by disconnect hook (disconnect_from_service should set DISCONNECTED)
304
+ await self.disconnect_from_service()
305
+ except Exception as exc:
306
+ if VERBOSE_DEBUG:
307
+ logger.debug(f"Couldn't disconnect from {self.service_name}")
308
+
309
+ if not self.should_attempt_connection():
310
+ logger.debug("Not time yet to attempt new connection")
311
+ return
312
+
313
+ self.state = ConnectionState.CONNECTING
314
+ self.last_attempt = time.monotonic()
315
+
316
+ try:
317
+ success = await self.connect_to_service()
318
+
319
+ if success:
320
+ self.state = ConnectionState.CONNECTED
321
+ self.failure_count = 0
322
+ self.retry_interval = 1 # Reset backoff
323
+ logger.info(f"Successfully connected to service {self.service_name}")
324
+ else:
325
+ # warning should have been logged by the connect_to_service() callable.
326
+ self.handle_connection_failure()
327
+
328
+ except Exception as exc:
329
+ logger.warning(f"Failed to connect to service {self.service_name}: {exc}")
330
+ self.handle_connection_failure()
331
+
332
+ def handle_connection_failure(self):
333
+ self.failure_count += 1
334
+
335
+ # Open circuit breaker if too many failures
336
+ if self.failure_count >= self.max_failures_before_circuit_break:
337
+ self.state = ConnectionState.CIRCUIT_OPEN
338
+ self.circuit_opened_at = time.monotonic()
339
+ logger.warning(
340
+ f"Circuit breaker opened for service {self.service_name} after {self.failure_count} failures"
341
+ )
342
+ else:
343
+ self.state = ConnectionState.DISCONNECTED
344
+ self.retry_interval = calculate_retry_interval(
345
+ self.failure_count,
346
+ self.base_interval,
347
+ self.max_retry_interval,
348
+ self.backoff_strategy,
349
+ self.jitter_strategy,
350
+ )
351
+ logger.debug(f"retry_interval={self.retry_interval}")
352
+
353
+ def is_connected(self) -> bool:
354
+ if VERBOSE_DEBUG:
355
+ logger.debug(f"Checking if {self.service_name} is connected: {self.state.name}")
356
+ return self.state == ConnectionState.CONNECTED
357
+
358
+ def get_connection(self) -> Any:
359
+ """
360
+ Optional method to return the underlying connection object.
361
+ Subclasses should override this method to return the actual connection
362
+ (e.g., socket, transport) if needed.
363
+ """
364
+ logger.warning(f"The get_connection() method is not implemented for {self.service_name}, returning None.")
365
+ return None
366
+
367
+
368
+ class ServiceConnector:
369
+ """
370
+ Synchronous base class for robust service connection management with retry, backoff, and circuit breaker logic.
371
+
372
+ This class is intended to be subclassed for managing persistent connections to external services
373
+ (such as devices, databases, or remote APIs) that may be unreliable or temporarily unavailable.
374
+
375
+ Features:
376
+ - Automatic retry with configurable backoff and jitter strategies.
377
+ - Circuit breaker to prevent repeated connection attempts after multiple failures.
378
+ - Connection state tracking (disconnected, connecting, connected, circuit open).
379
+ - Thread-safe operation using a lock for all state changes.
380
+
381
+ Usage:
382
+ 1. Subclass `ServiceConnector` and override the `connect_to_service()` method with your
383
+ actual connection logic. Optionally, override `health_check()` for custom health verification.
384
+ 2. Store the actual connection object (e.g., socket, transport) as an instance attribute in your subclass.
385
+ 3. Use `attempt_connection()` to initiate connection attempts; it will handle retries and backoff automatically.
386
+ 4. Use `is_connected()` to check connection status.
387
+
388
+ Example:
389
+ class MyConnector(ServiceConnector):
390
+ def connect_to_service(self):
391
+ self.connection = create_socket()
392
+ return self.connection is not None
393
+
394
+ def get_connection(self):
395
+ return self.connection
396
+
397
+ Note:
398
+ The base class does not manage or expose the underlying connection object.
399
+ Your subclass should provide a method or property to access it as needed.
400
+ """
401
+
402
+ def __init__(
403
+ self,
404
+ service_name: str,
405
+ backoff_strategy: BackoffStrategy = BackoffStrategy.EXPONENTIAL,
406
+ jitter_strategy: JitterStrategy = JitterStrategy.EQUAL,
407
+ ):
408
+ self.state = ConnectionState.DISCONNECTED
409
+ self.last_attempt = 0
410
+ self.base_interval = 1
411
+ self.retry_interval = 1
412
+ self.max_retry_interval = 300
413
+ self.failure_count = 0
414
+ self.max_failures_before_circuit_break = 5
415
+ self.circuit_break_duration = 60
416
+ self.circuit_opened_at = None
417
+ self.service_name = service_name
418
+ self.backoff_strategy = backoff_strategy
419
+ self.jitter_strategy = jitter_strategy
420
+
421
+ self._lock = threading.RLock()
422
+
423
+ def connect_to_service(self) -> bool:
424
+ logger.warning(
425
+ f"The connect_to_service() method is not implemented for {self.service_name}, connection will always fail."
426
+ )
427
+ return False
428
+
429
+ def disconnect_from_service(self) -> None:
430
+ """
431
+ Optional hook to cleanly disconnect / release resources for the service. Default implementation is a no-op.
432
+ Subclasses should override and must be careful about thread-safety; the base class holds _lock which can be
433
+ used.
434
+ """
435
+ with self._lock:
436
+ logger.debug(f"{self.service_name}: default disconnect_from_service(): no-op")
437
+ self.state = ConnectionState.DISCONNECTED
438
+ return
439
+
440
+ def health_check(self) -> bool:
441
+ logger.warning(
442
+ f"The health_check() method is not implemented for {self.service_name}, check will always return false."
443
+ )
444
+ return False
445
+
446
+ def should_attempt_connection(self) -> bool:
447
+ now = time.monotonic()
448
+ with self._lock:
449
+ if self.state == ConnectionState.CIRCUIT_OPEN:
450
+ assert self.circuit_opened_at is not None
451
+ if now - self.circuit_opened_at > self.circuit_break_duration:
452
+ self.state = ConnectionState.DISCONNECTED
453
+ self.failure_count = 0
454
+ self.retry_interval = 1
455
+ return True
456
+ return False
457
+ return now - self.last_attempt >= self.retry_interval
458
+
459
+ def attempt_connection(self):
460
+ with self._lock:
461
+ current_state = self.state
462
+
463
+ if current_state == ConnectionState.CONNECTED:
464
+ # ensure the CONNECTED state is validated before skipping reconnection attempts
465
+ try:
466
+ healthy = self.health_check()
467
+ except Exception as exc:
468
+ logger.debug(f"health_check raised: {type_name(exc)} – {exc}")
469
+ healthy = False
470
+
471
+ if healthy:
472
+ logger.debug(f"{self.service_name} already connected and healthy")
473
+ return
474
+
475
+ logger.info(
476
+ f"{self.service_name} marked CONNECTED but health_check failed — disconnecting and reconnecting"
477
+ )
478
+ self.state = ConnectionState.DISCONNECTED
479
+ try:
480
+ # ensure the state is updated by disconnect hook (disconnect_from_service should set DISCONNECTED)
481
+ self.disconnect_from_service()
482
+ except Exception as exc:
483
+ if VERBOSE_DEBUG:
484
+ logger.debug(f"Couldn't disconnect from {self.service_name}: {type_name(exc)} – {exc}")
485
+
486
+ with self._lock:
487
+ if not self.should_attempt_connection():
488
+ return
489
+ self.state = ConnectionState.CONNECTING
490
+ self.last_attempt = time.monotonic()
491
+
492
+ try:
493
+ success = self.connect_to_service()
494
+ with self._lock:
495
+ if success:
496
+ self.state = ConnectionState.CONNECTED
497
+ self.failure_count = 0
498
+ self.retry_interval = 1
499
+ logger.debug(f"Successfully connected to service {self.service_name}")
500
+ else:
501
+ self.handle_connection_failure()
502
+ except Exception as exc:
503
+ logger.error(f"Failed to connect to service {self.service_name}: {exc}")
504
+ with self._lock:
505
+ self.handle_connection_failure()
506
+
507
+ def handle_connection_failure(self):
508
+ self.failure_count += 1
509
+ if self.failure_count >= self.max_failures_before_circuit_break:
510
+ self.state = ConnectionState.CIRCUIT_OPEN
511
+ self.circuit_opened_at = time.monotonic()
512
+ logger.warning(
513
+ f"Circuit breaker opened for service {self.service_name} after {self.failure_count} failures"
514
+ )
515
+ else:
516
+ self.state = ConnectionState.DISCONNECTED
517
+ self.retry_interval = calculate_retry_interval(
518
+ self.failure_count,
519
+ self.base_interval,
520
+ self.max_retry_interval,
521
+ self.backoff_strategy,
522
+ self.jitter_strategy,
523
+ )
524
+ logger.debug(f"retry_interval={self.retry_interval}")
525
+
526
+ def is_connected(self) -> bool:
527
+ with self._lock:
528
+ return self.state == ConnectionState.CONNECTED
egse/control.py CHANGED
@@ -558,7 +558,8 @@ class ControlServer(metaclass=abc.ABCMeta):
558
558
  try:
559
559
  hk_dict = save_average_execution_time(self.device_protocol.get_housekeeping)
560
560
 
561
- self.store_housekeeping_information(hk_dict)
561
+ if storage_manager:
562
+ self.store_housekeeping_information(hk_dict)
562
563
  self.propagate_metrics(hk_dict)
563
564
  except Exception as exc:
564
565
  logger.error(
egse/dummy.py CHANGED
@@ -18,9 +18,9 @@ and stop the server with:
18
18
 
19
19
  Commands that can be used with the proxy:
20
20
 
21
- * info returns an info message from the dummy device, e.g. "Dummy Device <__version__>"
22
- * get_value returns a random float between 0.0 and 1.0
23
- * division returns the result of the division between arguments 'a' and 'b'.
21
+ * info - returns an info message from the dummy device, e.g. "Dummy Device <__version__>"
22
+ * get_value - returns a random float between 0.0 and 1.0
23
+ * division - returns the result of the division between arguments 'a' and 'b'.
24
24
  This can be used also to induce a ZeroDivisionError that should return a Failure
25
25
  object.
26
26
 
@@ -35,6 +35,7 @@ and stopped with:
35
35
 
36
36
  from __future__ import annotations
37
37
 
38
+ import contextlib
38
39
  import multiprocessing
39
40
  import random
40
41
  import select
@@ -52,12 +53,14 @@ from egse.device import DeviceConnectionError
52
53
  from egse.device import DeviceConnectionInterface
53
54
  from egse.device import DeviceTimeoutError
54
55
  from egse.device import DeviceTransport
56
+ from egse.env import bool_env
55
57
  from egse.log import logger
56
58
  from egse.protocol import CommandProtocol
57
59
  from egse.proxy import Proxy
58
60
  from egse.system import SignalCatcher
59
61
  from egse.system import attrdict
60
62
  from egse.system import format_datetime
63
+ from egse.system import type_name
61
64
  from egse.zmq_ser import bind_address
62
65
  from egse.zmq_ser import connect_address
63
66
 
@@ -77,6 +80,9 @@ WRITE_TIMEOUT = 1.0
77
80
  CONNECT_TIMEOUT = 3.0
78
81
  """The maximum time in seconds to wait for establishing a socket connect."""
79
82
 
83
+
84
+ VERBOSE_DEBUG = bool_env("VERBOSE_DEBUG", default=False)
85
+
80
86
  # Especially DummyCommand and DummyController need to be defined in a known module
81
87
  # because those objects are pickled and when de-pickled at the clients side the class
82
88
  # definition must be known.
@@ -116,14 +122,17 @@ def is_dummy_cs_active() -> bool:
116
122
 
117
123
 
118
124
  def is_dummy_dev_active() -> bool:
125
+ if VERBOSE_DEBUG:
126
+ logger.debug("Checking if dummy device is active...")
119
127
  try:
120
128
  dev = DummyDeviceEthernetInterface(DEV_HOST, DEV_PORT)
121
129
  dev.connect()
122
130
  rc = dev.trans("ping\n")
123
131
  dev.disconnect()
124
132
  return rc.decode().strip() == "pong"
125
- except DeviceConnectionError as exc:
126
- # logger.error(f"Caught {type_name(exc)}: {exc}")
133
+ except (DeviceConnectionError, ConnectionResetError, DeviceTimeoutError) as exc:
134
+ if VERBOSE_DEBUG:
135
+ logger.debug(f"Caught {type_name(exc)}: {exc} - returning False")
127
136
  return False
128
137
 
129
138
 
@@ -309,11 +318,10 @@ class DummyDeviceEthernetInterface(DeviceConnectionInterface, DeviceTransport):
309
318
  Args:
310
319
  hostname (str): the IP address or fully qualified hostname of the Dummy Device
311
320
  controller.
312
-
313
321
  port (int): the IP port number to connect to.
314
322
  """
315
323
 
316
- def __init__(self, hostname: str = None, port: int = None):
324
+ def __init__(self, hostname: str | None = None, port: int | None = None):
317
325
  super().__init__()
318
326
 
319
327
  # Basic connection settings, loaded from the configuration YAML file
@@ -350,7 +358,8 @@ class DummyDeviceEthernetInterface(DeviceConnectionInterface, DeviceTransport):
350
358
  try:
351
359
  self.sock = socket.socket(socket.AF_INET, socket.SOCK_STREAM)
352
360
  except socket.error as exc:
353
- self.sock.close()
361
+ if self.sock is not None:
362
+ self.sock.close()
354
363
  raise DeviceConnectionError("Dummy Device", "Failed to create socket.") from exc
355
364
 
356
365
  # Attempt to establish a connection to the remote host
@@ -403,7 +412,8 @@ class DummyDeviceEthernetInterface(DeviceConnectionInterface, DeviceTransport):
403
412
  """
404
413
  try:
405
414
  logger.debug(f"Disconnecting from {self.hostname}")
406
- self.sock.close()
415
+ if self.sock is not None:
416
+ self.sock.close()
407
417
  self.is_connection_open = False
408
418
  except Exception as exc:
409
419
  raise DeviceConnectionError(DEV_NAME, f"Could not close socket to {self.hostname}") from exc
@@ -439,6 +449,8 @@ class DummyDeviceEthernetInterface(DeviceConnectionInterface, DeviceTransport):
439
449
  buf_size = 1024 * 10
440
450
  response = bytes()
441
451
 
452
+ assert self.sock is not None
453
+
442
454
  # Set a timeout of READ_TIMEOUT to the socket.recv
443
455
 
444
456
  saved_timeout = self.sock.gettimeout()
@@ -478,6 +490,8 @@ class DummyDeviceEthernetInterface(DeviceConnectionInterface, DeviceTransport):
478
490
  there was a socket related error.
479
491
  """
480
492
 
493
+ assert self.sock is not None
494
+
481
495
  # logger.debug(f"{command.encode() = }")
482
496
 
483
497
  try:
@@ -505,6 +519,9 @@ class DummyDeviceEthernetInterface(DeviceConnectionInterface, DeviceTransport):
505
519
  DeviceTimeoutError: when the sendall() timed out, and a DeviceConnectionError if
506
520
  there was a socket related error.
507
521
  """
522
+
523
+ assert self.sock is not None
524
+
508
525
  # logger.debug(f"{command.encode() = }")
509
526
 
510
527
  try:
@@ -580,11 +597,18 @@ def start_dev():
580
597
  with socket.socket(socket.AF_INET, socket.SOCK_STREAM) as s:
581
598
  s.bind((DEV_HOST, DEV_PORT))
582
599
  s.listen()
600
+ s.settimeout(CONNECT_TIMEOUT)
583
601
  logger.info(f"Ready to accept connection on {DEV_HOST}:{DEV_PORT}...")
584
- conn, addr = s.accept()
602
+ while True:
603
+ with contextlib.suppress(socket.timeout):
604
+ conn, addr = s.accept()
605
+ break
606
+ if killer.term_signal_received:
607
+ return
585
608
  with conn:
586
609
  logger.info(f"Accepted connection from {addr}")
587
610
  conn.sendall(f"Dummy Device {__version__}".encode())
611
+ conn.settimeout(READ_TIMEOUT)
588
612
  try:
589
613
  while True:
590
614
  error_msg = ""
@@ -641,7 +665,8 @@ def process_command(command_string: str) -> str | None:
641
665
 
642
666
  try:
643
667
  action, response = COMMAND_ACTIONS_RESPONSES[command_string]
644
- action and action()
668
+ if action:
669
+ action()
645
670
  if error_msg:
646
671
  return error_msg
647
672
  else:
egse/logger/__init__.py CHANGED
@@ -57,7 +57,7 @@ COMMANDER_PORT = settings.get("COMMANDER_PORT", 0) # dynamically assigned by th
57
57
  _initialised = False # will be set to True in the setup_logging() function
58
58
 
59
59
 
60
- def get_log_file_name():
60
+ def get_log_file_name() -> str:
61
61
  """
62
62
  Returns the filename of the log file as defined in the Settings or return the default name 'general.log'.
63
63
  """
@@ -315,7 +315,7 @@ def send_request(command_request: str):
315
315
  """Sends a request to the Logger Control Server and waits for a response."""
316
316
 
317
317
  if COMMANDER_PORT == 0:
318
- endpoint = get_endpoint_from_registry()
318
+ endpoint = get_endpoint_from_registry(SERVICE_TYPE)
319
319
  else:
320
320
  endpoint = f"{PROTOCOL}://{HOSTNAME}:{COMMANDER_PORT}"
321
321
 
egse/notifyhub/server.py CHANGED
@@ -23,10 +23,12 @@ from egse.notifyhub import SERVICE_TYPE
23
23
  from egse.notifyhub import STATS_INTERVAL
24
24
  from egse.notifyhub.client import AsyncNotificationHubClient
25
25
  from egse.registry import MessageType
26
- from egse.registry.client import AsyncRegistryClient, REQUEST_TIMEOUT
26
+ from egse.registry.client import REQUEST_TIMEOUT
27
+ from egse.registry.client import AsyncRegistryClient
27
28
  from egse.system import TyperAsyncCommand
28
29
  from egse.system import get_host_ip
29
30
  from egse.zmq_ser import get_port_number
31
+
30
32
  from .event import NotificationEvent
31
33
 
32
34
  REQUEST_POLL_TIMEOUT = 1.0
@@ -154,7 +154,7 @@ def stop():
154
154
 
155
155
  if COMMANDING_PORT == 0:
156
156
  with RegistryClient() as reg:
157
- service = reg.discover_service(settings.SERVICE_TYPE)
157
+ service = reg.discover_service(SERVICE_TYPE)
158
158
  rich.print("service = ", service)
159
159
  if service:
160
160
  hostname = service["host"]
@@ -44,6 +44,9 @@ from egse.zmq_ser import set_address_port
44
44
 
45
45
  MAX_SLEEP = 10
46
46
 
47
+ DEVICE_CMD_ENTRY_POINT = "cgse.service.device_command"
48
+ GUI_SCRIPTS_ENTRY_POINT = "gui_scripts"
49
+
47
50
 
48
51
  class ControlServerStatus(Enum):
49
52
  """Status of the Control Server of a device."""
@@ -78,7 +81,7 @@ def get_cgse_cmd(device_proxy: str) -> str:
78
81
 
79
82
  module_name = device_proxy[7:].rsplit(".", 1)[0]
80
83
  entry_point_values = []
81
- for ep in sorted(entry_points("cgse.service"), key=lambda x: x.name):
84
+ for ep in sorted(entry_points(DEVICE_CMD_ENTRY_POINT), key=lambda x: x.name):
82
85
  entry_point_values.append(ep.value)
83
86
 
84
87
  similarity_scores = [
@@ -108,7 +111,7 @@ def get_cgse_ui(device_proxy: str) -> Union[str, None]:
108
111
 
109
112
  module_name = device_proxy[7:].rsplit(".", 1)[0]
110
113
  entry_point_values = []
111
- for ep in sorted(entry_points("gui_scripts"), key=lambda x: x.name):
114
+ for ep in sorted(entry_points(GUI_SCRIPTS_ENTRY_POINT), key=lambda x: x.name):
112
115
  entry_point_values.append(ep.name)
113
116
 
114
117
  similarity_scores = [
egse/registry/client.py CHANGED
@@ -639,6 +639,8 @@ class AsyncRegistryClient:
639
639
  The response from the registry as a dictionary.
640
640
  """
641
641
 
642
+ assert self.req_socket is not None, "REQ socket is not connected, cannot send request."
643
+
642
644
  timeout = timeout or self.timeout
643
645
  try:
644
646
  self.logger.debug(f"Sending request: {request}")
@@ -687,6 +689,8 @@ class AsyncRegistryClient:
687
689
  The response from the registry as a dictionary.
688
690
  """
689
691
 
692
+ assert self.hb_socket is not None, "HB socket is not connected, cannot send heartbeat request."
693
+
690
694
  try:
691
695
  self.logger.debug(f"Sending heartbeat request: {request}")
692
696
  await self.hb_socket.send_string(json.dumps(request))
@@ -867,7 +871,8 @@ class AsyncRegistryClient:
867
871
  await self.reregister()
868
872
 
869
873
  else:
870
- VERBOSE_DEBUG and self.logger.debug(f"Heartbeat succeeded: {response.get('message')}")
874
+ if VERBOSE_DEBUG:
875
+ self.logger.debug(f"Heartbeat succeeded: {response.get('message')}")
871
876
 
872
877
  except Exception as exc:
873
878
  self.logger.error(f"Error in heartbeat loop: {exc}", exc_info=True)
@@ -893,13 +898,15 @@ class AsyncRegistryClient:
893
898
  """Stop the running heartbeat task."""
894
899
 
895
900
  if self._heartbeat_task is None:
896
- VERBOSE_DEBUG and self.logger.debug("Couldn't stop heartbeat, heartbeat_task is None")
901
+ if VERBOSE_DEBUG:
902
+ self.logger.debug("Couldn't stop heartbeat, heartbeat_task is None")
897
903
  return
898
904
 
899
905
  self._heartbeat_task.cancel()
900
906
  try:
901
907
  await self._heartbeat_task
902
908
  except asyncio.CancelledError:
909
+ self.logger.info("Heartbeat task cancelled")
903
910
  pass
904
911
  self._tasks.discard(self._heartbeat_task)
905
912
  self._heartbeat_task = None
@@ -909,7 +916,8 @@ class AsyncRegistryClient:
909
916
  """Stop the running event listener task."""
910
917
 
911
918
  if self._event_listener_task is None:
912
- VERBOSE_DEBUG and self.logger.debug("Couldn't stop event_listener, event_listener_task is None")
919
+ if VERBOSE_DEBUG:
920
+ self.logger.debug("Couldn't stop event_listener, event_listener_task is None")
913
921
  return
914
922
 
915
923
  self._event_listener_task.cancel()
@@ -1142,12 +1150,14 @@ class AsyncRegistryClient:
1142
1150
  self.sub_socket.close()
1143
1151
 
1144
1152
  # We can not terminate the context, because we use a global instance, i.e. a singleton context.
1145
- # When we try to terminate it, even after checking if it was closed,
1153
+ # When we try to terminate it, even after checking if it was closed, it raises an exception.
1146
1154
  if hasattr(self, "context") and self.context:
1147
1155
  self.logger.info(f"{self.context = !r}")
1148
1156
  self.logger.info(f"{self.context._sockets = !r}")
1149
- if not self.context.closed:
1150
- self.context.term()
1157
+ # The zmq context instance is the global singleton instance.
1158
+ # Terminating it here would affect other parts of the application using zmq.
1159
+ # if not self.context.closed:
1160
+ # self.context.term()
1151
1161
  except Exception as exc:
1152
1162
  self.logger.error(f"Error during cleanup: {exc}")
1153
1163
 
egse/registry/server.py CHANGED
@@ -218,13 +218,15 @@ class AsyncRegistryServer:
218
218
  """Task that handles incoming requests."""
219
219
  self.logger.info("Started request handler task")
220
220
 
221
+ assert self.req_socket is not None, "REQ socket is not connected, cannot handle requests."
222
+
221
223
  try:
222
224
  message_parts = None
223
225
  while self._running:
224
226
  try:
225
227
  # Wait for a request with timeout to allow checking if still running
226
228
  try:
227
- # self.logger.info("Waiting for a request with 1s timeout...")
229
+ # self.logger.debug("Waiting for a request with 1s timeout...")
228
230
  message_parts = await asyncio.wait_for(self.req_socket.recv_multipart(), timeout=1.0)
229
231
  except asyncio.TimeoutError:
230
232
  # self.logger.debug("waiting for command request...")
@@ -241,6 +243,9 @@ class AsyncRegistryServer:
241
243
  response = await self._process_request(message_data)
242
244
 
243
245
  await self._send_response(client_id, message_type, response)
246
+ else:
247
+ self.logger.warning("Request handler: message corrupted, check debug messages.")
248
+ self.logger.debug(f"{message_parts=}")
244
249
 
245
250
  except zmq.ZMQError as exc:
246
251
  self.logger.error(f"ZMQ error: {exc}", exc_info=True)
@@ -397,6 +402,8 @@ class AsyncRegistryServer:
397
402
  """Task that handles heartbeat messages."""
398
403
  self.logger.info("Started heartbeats handler task")
399
404
 
405
+ assert self.hb_socket is not None, "HB socket is not connected, cannot handle heartbeat messages."
406
+
400
407
  try:
401
408
  message_parts = None
402
409
  while self._running:
@@ -425,7 +432,8 @@ class AsyncRegistryServer:
425
432
  self.logger.warning("Heartbeat request: message corrupted, check debug messages.")
426
433
 
427
434
  except asyncio.TimeoutError:
428
- VERBOSE_DEBUG and self.logger.debug("waiting for heartbeat...")
435
+ if VERBOSE_DEBUG:
436
+ self.logger.debug("waiting for heartbeat...")
429
437
  continue
430
438
 
431
439
  except Exception as exc:
egse/registry/service.py CHANGED
@@ -2,7 +2,6 @@ from __future__ import annotations
2
2
 
3
3
  import asyncio
4
4
  import json
5
- import logging
6
5
  import time
7
6
  from typing import Any
8
7
  from typing import Callable
@@ -10,13 +9,14 @@ from typing import Callable
10
9
  import zmq
11
10
  import zmq.asyncio
12
11
 
12
+ from egse.log import logging
13
13
  from egse.registry import DEFAULT_RS_PUB_PORT
14
14
  from egse.registry import DEFAULT_RS_REQ_PORT
15
15
  from egse.registry.client import AsyncRegistryClient
16
16
  from egse.system import get_host_ip
17
17
  from egse.zmq_ser import get_port_number
18
18
 
19
- module_module_logger_name = "async_microservice"
19
+ module_module_logger_name = "egse.async_microservice"
20
20
  module_logger = logging.getLogger(module_module_logger_name)
21
21
 
22
22
 
@@ -64,7 +64,7 @@ class ZMQMicroservice:
64
64
  self.registry_sub_endpoint = registry_sub_endpoint or f"tcp://localhost:{DEFAULT_RS_PUB_PORT}"
65
65
  self.metadata = metadata or {}
66
66
 
67
- self.host_ip = get_host_ip()
67
+ self.host_ip = get_host_ip() or "localhost"
68
68
 
69
69
  # Service ID will be set when registered
70
70
  self.service_id = None
@@ -164,6 +164,7 @@ class ZMQMicroservice:
164
164
 
165
165
  if not self.service_id:
166
166
  module_logger.error("Failed to register with the service registry")
167
+ await self._cleanup()
167
168
  return True
168
169
 
169
170
  module_logger.info(f"Registered with service ID: {self.service_id}")
@@ -175,12 +176,17 @@ class ZMQMicroservice:
175
176
  # Start request handler
176
177
  request_task = asyncio.create_task(self._handle_requests())
177
178
  self._tasks.add(request_task)
178
- request_task.add_done_callback(self._tasks.discard)
179
+ # request_task.add_done_callback(self._tasks.discard)
179
180
 
180
181
  # Wait for shutdown signal
181
182
  await self._shutdown.wait()
182
183
 
183
- # Clean shutdown
184
+ # request_task.cancel()
185
+ # try:
186
+ # await request_task
187
+ # except asyncio.CancelledError:
188
+ # module_logger.info("Request handler task cancelled during shutdown")
189
+
184
190
  await self._cleanup()
185
191
 
186
192
  return False