cgse-core 0.17.2__py3-none-any.whl → 0.17.3__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: cgse-core
3
- Version: 0.17.2
3
+ Version: 0.17.3
4
4
  Summary: Core services for the CGSE framework
5
5
  Author: IvS KU Leuven
6
6
  Maintainer-email: Rik Huygen <rik.huygen@kuleuven.be>, Sara Regibo <sara.regibo@kuleuven.be>
@@ -6,12 +6,10 @@ cgse_core/cgse_explore.py,sha256=8jxAxYDsjPUZftUccPCneHaqijRHxBPZuaOo0ESmBUs,460
6
6
  cgse_core/services.py,sha256=_a1d1Dcf9L0F1IF3w6kHrACE5EDnqtNUd8KzrTZDu5U,8595
7
7
  cgse_core/settings.yaml,sha256=6RcZ3aI7IUSB6GaZ8e9b_JEiql0l3l7h-VowDycv5AE,3625
8
8
  egse/_setup_core.py,sha256=ei4a7tGYfDxc16kfhrF11Tm3cOSnl48MsSGfrWu0vVg,5983
9
- egse/async_control.py,sha256=V4cx89EIuohZCJEMNXiwKWQFPDWRY6Oj0w5xpo3wO3A,45274
10
- egse/async_control_claude.py,sha256=oMI_g7euQ6Z1dowmVHfijZImiB_sMQl5sHC84DMX2Fs,31609
11
9
  egse/command.py,sha256=529_T619qG3Xif9UkxZS8mj2ibr6eljfbVsMM-K_AII,21965
12
- egse/connect.py,sha256=QiG5G0qnCI3EeDhmfj-gBlhI9RmKvnd0dGXu0ZBIZ90,21556
10
+ egse/connect.py,sha256=BDMzTt4URx7TpNTPyzb1Ye3Ch09enxm5yIzcxMRga_A,1834
13
11
  egse/control.py,sha256=u4bWsKNW0tl4gNMaSMyUA02xXwvcxCVe2TsdfJpTkMs,28424
14
- egse/dummy.py,sha256=VHa2Y72kXBEYUZ4ztedy2A4XXzteXGjwHeIlvmAsMIA,22503
12
+ egse/dummy.py,sha256=I63wXBdC30WqnP6gz1sJ4YBHjrWumeXhiHbF49oF_ZQ,21909
15
13
  egse/listener.py,sha256=kD62oD7w1yBVV0M2Eq5MNEQyVaJvlsl0RyhYCzVkgQs,6493
16
14
  egse/mixin.py,sha256=fklH61C7TtaTnXx7FQjjJCgUQ2PsQv5baw0SVBBdbH0,20265
17
15
  egse/monitoring.py,sha256=0Hbd5SHUU2YWFm8gbzlyPMUMjTezKap_q9yE4T6Vkk0,9716
@@ -22,7 +20,6 @@ egse/services.yaml,sha256=0VRxQGM0TwExJl4GEXYakWSaE9W5Tav5Og-ZV7vogto,2612
22
20
  egse/confman/__init__.py,sha256=HtLxM7u7v9mc8eJ4KTFfsWiRRAMmyvI0Blo_4Yb2nC0,39993
23
21
  egse/confman/__main__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
24
22
  egse/confman/confman.yaml,sha256=CaS4potNTiy2-Qqc5Zo2Lx8c5hh4wLYhtxwifh-hP5M,3018
25
- egse/confman/confman_acs.py,sha256=Z7QfCUiAZ8tzj7ujHTpG0Kdw7VA76x-fnsWsOntHU2Y,1044
26
23
  egse/confman/confman_cs.py,sha256=JeAWC1T9RBppPVX-arWG87wS5_QrjKN8KUszQV8XBcQ,9981
27
24
  egse/icons/busy.svg,sha256=fKNR2AxNupsAga7huDQF-kEYTThMkxgqGVZvXbma0G4,2964
28
25
  egse/icons/operational-mode.svg,sha256=P6uhgnlXF5UXiJXIlpawklTcCZQePPEI5fGONtL_Y3o,5199
@@ -31,17 +28,14 @@ egse/icons/simulator-mode.svg,sha256=iDU6i0nTqyeYJOY9NAIj6NvjBQWAo2XSJ__35funhXY
31
28
  egse/icons/start-process-button.svg,sha256=lvOmRzafhldlAbjKtBjuqrRDjBwyaVnd3yK2ees22JM,476
32
29
  egse/icons/stop-process-button.svg,sha256=ZysOJooxOXEbup9zd6077M36OS0S5jQQkny9jfQWBfI,456
33
30
  egse/icons/user-interface.svg,sha256=q_KWgHJ9ATdbw-HFghoaGQIBe52ZyxyAKitHpDaFWm8,2375
34
- egse/logger/__init__.py,sha256=IhqNw4Yvm26Q5w7m3NuTAL8ZkywtN9dT4OaTpc3xkuo,10782
31
+ egse/logger/__init__.py,sha256=5Nku3dpZVBdawiKZJgNyhuw5-ppm1K9lSb_p5uinp-o,10763
35
32
  egse/logger/__main__.py,sha256=_etegNrUM9IWHNTOObDVY1DiLn9SJyf8eHktVBbN-OQ,1765
36
33
  egse/logger/log_cs.py,sha256=3k-HUnfrJY2072cjF0TXBv9TyNd4GzCSzazx03zfJss,14296
37
- egse/metricshub/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
38
- egse/metricshub/server.py,sha256=2arZ2OOsc7L3_tHnpXNw91joYnVo1nxzqLFuwO-GSzM,8780
39
34
  egse/notifyhub/__init__.py,sha256=pbCy5PHSR7OKcD1heaMIDO6ztkyFlpsvc-i2pyOdCpU,2438
40
35
  egse/notifyhub/client.py,sha256=VOFlxcotEm3JAe5eGMDXpCRejXzDb3-7k4gIPgcgmpY,10536
41
36
  egse/notifyhub/event.py,sha256=Yo8uvnK27uzaCLjJa8IK734Hcg3eLuD_KCd7eZ80XBQ,792
42
- egse/notifyhub/server.py,sha256=XW4lbmz1IUhhPUB3frbgvAWUyAXZy1Wr1ofC10vAoM0,14553
37
+ egse/notifyhub/server.py,sha256=_bK45WZhu6KKuJMLLzQdDLBVz6Z_d4n0UFf77ydiDns,14520
43
38
  egse/notifyhub/services.py,sha256=LpKxJfFszwoQN9Kb5f5lkjjKLbvwCfHSC2mt1WZt1wc,10494
44
- egse/notifyhub/test.py,sha256=61-hEii3KUho_nheTz6zCuUvQtIPZ_zZWt3Gpxmhk8g,9922
45
39
  egse/procman/__init__.py,sha256=w2ilHHVuznkNCK-Syd9WVayktW8um8_EsKF5JQJDZK0,13946
46
40
  egse/procman/procman.yaml,sha256=miAq3GE4PSKm8QZDfjvmtBrYCt8GyzJDJm-JzzgdznQ,847
47
41
  egse/procman/procman_cs.py,sha256=lvuhmGdd1GmkdkXYNS9slNT9VJF09OtffZrZmCdy97I,5390
@@ -49,15 +43,15 @@ egse/procman/procman_protocol.py,sha256=kFUWQb66d5aDE7qHM9xYmsO07UB-6ro2hJ-E7o_O
49
43
  egse/procman/procman_ui.py,sha256=5pU4-Dtma5aY8k4PaHQpJUe-DSwjpP-V1sJpbuUMlLg,42070
50
44
  egse/registry/__init__.py,sha256=U7ap_245Bid74nn7Ncd0GXu0aetR95dJHltGRwBCVb8,1349
51
45
  egse/registry/backend.py,sha256=-kRXncO949YETb1S2GFedhYTt1O0teiKYUUPxeQqX9E,22120
52
- egse/registry/client.py,sha256=VR6v-IRVpY3lUB8IGIZcfLAj6PhwakZNBuX7QPYxzxY,43165
53
- egse/registry/server.py,sha256=-5f-6Rk-z-sBCmDXxHZos7u_e4P0VqsLU5av0qITnzU,22291
54
- egse/registry/service.py,sha256=r6ThO_ur9WyFb_9JkpE__C5Y2P3VAeK8j_w9SFHiIBw,15375
46
+ egse/registry/client.py,sha256=-fivaDQSTb_SjbZWcir_MpH5hciMSJHVinuSS5RBmMU,42874
47
+ egse/registry/server.py,sha256=1Zv-1VkGhpKRo_P3gRRnlN1UuXJbFnYWX3fOVtgKe-g,21869
48
+ egse/registry/service.py,sha256=QnKVICWiuHmzESmy2H4VEtJ-tGX8hSV6n6qq6ejkWAY,15125
55
49
  egse/storage/__init__.py,sha256=xtMdHdtPT9-oqTp2bpWw7Os3qUgN8TdqZNuaj3glxn0,43147
56
50
  egse/storage/__main__.py,sha256=LI9fxlsFWmEd5LcWUB0xA8i7Yt6UHgnblB4G0aTi3pI,28
57
51
  egse/storage/persistence.py,sha256=35fvuCPuGTSCc2MfmFLLNU03xYq3CEaJQspot4f-Pvw,18274
58
52
  egse/storage/storage.yaml,sha256=l3HtPx_bAbXoV4f3_PXWAa1tP-fY2S6roSBSBiOHodE,2712
59
53
  egse/storage/storage_cs.py,sha256=172llnKef1fdiDXcnzYllw_q12bVyuGJGh_3XpeDVCU,7377
60
- cgse_core-0.17.2.dist-info/METADATA,sha256=JLHz7fbDXX5hP1Q9651n2oVnQy7ZZ4rmbu886KhMZ0s,582
61
- cgse_core-0.17.2.dist-info/WHEEL,sha256=qtCwoSJWgHk21S1Kb4ihdzI2rlJ1ZKaIurTj_ngOhyQ,87
62
- cgse_core-0.17.2.dist-info/entry_points.txt,sha256=IwIG7aGgfUehol29ufcKd559S88t3TJdh1LMJ5YymCE,976
63
- cgse_core-0.17.2.dist-info/RECORD,,
54
+ cgse_core-0.17.3.dist-info/METADATA,sha256=cG8dHirqx3LtMgrrOJkxOHtUJEyCrsU8zs4OfzP1hz0,582
55
+ cgse_core-0.17.3.dist-info/WHEEL,sha256=qtCwoSJWgHk21S1Kb4ihdzI2rlJ1ZKaIurTj_ngOhyQ,87
56
+ cgse_core-0.17.3.dist-info/entry_points.txt,sha256=IwIG7aGgfUehol29ufcKd559S88t3TJdh1LMJ5YymCE,976
57
+ cgse_core-0.17.3.dist-info/RECORD,,
egse/connect.py CHANGED
@@ -1,12 +1,5 @@
1
- import random
2
- import threading
3
- import time
4
- from enum import Enum
5
- from typing import Any
6
-
7
1
  from egse.env import bool_env
8
2
  from egse.log import logging
9
- from egse.system import type_name
10
3
  from egse.zmq_ser import connect_address
11
4
 
12
5
  logger = logging.getLogger("egse.connect")
@@ -17,7 +10,7 @@ VERBOSE_DEBUG = bool_env("VERBOSE_DEBUG")
17
10
 
18
11
 
19
12
  def get_endpoint(
20
- service_type: str | None = None,
13
+ service_type: str,
21
14
  protocol: str = "tcp",
22
15
  hostname: str = "localhost",
23
16
  port: int = 0,
@@ -48,481 +41,15 @@ def get_endpoint(
48
41
  endpoint = reg.get_endpoint(service_type)
49
42
  if endpoint:
50
43
  if VERBOSE_DEBUG:
51
- logger.debug(f"Endpoint for '{service_type}' found in registry: {endpoint}")
44
+ logger.debug(f"Endpoint for {service_type} found in registry: {endpoint}")
52
45
  else:
53
- logger.warning(f"No endpoint for '{service_type}' found in registry.")
46
+ logger.warning(f"No endpoint for {service_type} found in registry.")
54
47
 
55
48
  if not endpoint:
56
49
  if port == 0:
57
- raise RuntimeError(f"No service registered as '{service_type}' and no port provided.")
50
+ raise RuntimeError(f"No service registered as {service_type} and no port provided.")
58
51
  endpoint = connect_address(protocol, hostname, port)
59
52
  if VERBOSE_DEBUG:
60
53
  logger.debug(f"Endpoint constructed from protocol/hostname/port: {endpoint}")
61
54
 
62
55
  return endpoint
63
-
64
-
65
- class ConnectionState(Enum):
66
- DISCONNECTED = "disconnected"
67
- CONNECTING = "connecting"
68
- CONNECTED = "connected"
69
- CIRCUIT_OPEN = "circuit_open" # Temporarily stopped trying
70
-
71
-
72
- class BackoffStrategy(Enum):
73
- """
74
- Specifies the strategy for increasing the delay between retry attempts
75
- in backoff algorithms to reduce load and avoid overwhelming services.
76
-
77
- Strategies:
78
- EXPONENTIAL:
79
- The delay doubles with each retry attempt (e.g., 1s, 2s, 4s, 8s).
80
- This is the most widely used approach because it quickly reduces load on struggling systems.
81
- LINEAR:
82
- The delay increases by a fixed amount each time (e.g., 1s, 2s, 3s, 4s).
83
- This provides a more gradual reduction in request rate.
84
- FIXED:
85
- Uses the same delay between all retry attempts.
86
- Simple but less adaptive to system conditions.
87
-
88
- References:
89
- - AWS Architecture Blog: Exponential Backoff And Jitter
90
- """
91
-
92
- EXPONENTIAL = "exponential"
93
- """The delay doubles with each retry attempt (e.g., 1s, 2s, 4s, 8s).
94
- This is the most widely used approach because it quickly reduces load on struggling systems."""
95
- LINEAR = "linear"
96
- """The delay increases by a fixed amount each time (e.g., 1s, 2s, 3s, 4s).
97
- This provides a more gradual reduction in request rate."""
98
- FIXED = "fixed"
99
- """Uses the same delay between all retry attempts. Simple but less adaptive to system conditions."""
100
-
101
-
102
- class JitterStrategy(Enum):
103
- """
104
- Specifies the strategy for applying jitter (randomization) to retry intervals
105
- in backoff algorithms to avoid synchronized retries and reduce load spikes.
106
-
107
- Strategies:
108
- NONE:
109
- No jitter is applied. The retry interval is deterministic.
110
- FULL:
111
- Applies full jitter by selecting a random value uniformly between 0 and the calculated interval.
112
- This maximizes randomness but can result in very short delays.
113
- EQUAL:
114
- Applies "equal jitter" as described in the AWS Architecture Blog.
115
- The interval is randomized within [interval/2, interval], ensuring a minimum delay of half the interval.
116
- Note: This is not the same as "a jitter of 50% around interval" (which would be [0.5 * interval, 1.5 * interval]).
117
- PERCENT_10:
118
- Applies a jitter of ±10% around the base interval, resulting in a random interval within [0.9 * interval, 1.1 * interval].
119
-
120
- References:
121
- - AWS Architecture Blog: Exponential Backoff And Jitter
122
- """
123
-
124
- NONE = "none"
125
- """No jitter is applied to the backoff."""
126
- FULL = "full"
127
- """Maximum distribution but can be too random with very short intervals."""
128
- EQUAL = "equal"
129
- """Best balance, maintains backoff properties while preventing synchronization."""
130
- PERCENT_10 = "10%"
131
- """Add a jitter of 10% around the base interval."""
132
-
133
-
134
- def calculate_retry_interval(
135
- attempt_number,
136
- base_interval,
137
- max_interval,
138
- backoff_strategy: BackoffStrategy = BackoffStrategy.EXPONENTIAL,
139
- jitter_strategy: JitterStrategy = JitterStrategy.EQUAL,
140
- ):
141
- """
142
- Calculates the next retry interval based on the given backoff and jitter strategies.
143
-
144
- Args:
145
- attempt_number (int): The current retry attempt (starting from 0).
146
- base_interval (float): The initial interval in seconds.
147
- max_interval (float): The maximum allowed interval in seconds.
148
- backoff_strategy (BackoffStrategy): Strategy for increasing the delay (exponential, linear, or fixed).
149
- jitter_strategy (JitterStrategy): Strategy for randomizing the delay to avoid synchronization.
150
-
151
- Returns:
152
- float: The computed retry interval in seconds.
153
-
154
- Notes:
155
- - See the docstrings for BackoffStrategy and JitterStrategy for details on each strategy.
156
- - Based on best practices from the AWS Architecture Blog: Exponential Backoff And Jitter.
157
- """
158
-
159
- if backoff_strategy == BackoffStrategy.EXPONENTIAL:
160
- interval = min(base_interval * (2**attempt_number), max_interval)
161
- elif backoff_strategy == BackoffStrategy.LINEAR:
162
- interval = min(base_interval + attempt_number, max_interval)
163
- else:
164
- interval = base_interval
165
-
166
- if jitter_strategy == JitterStrategy.NONE:
167
- return interval
168
- elif jitter_strategy == JitterStrategy.FULL:
169
- return random.uniform(0, interval)
170
- elif jitter_strategy == JitterStrategy.EQUAL:
171
- return interval / 2 + random.uniform(0, interval / 2)
172
- elif jitter_strategy == JitterStrategy.PERCENT_10:
173
- jitter_amount = interval * 0.1
174
- return interval + random.uniform(-jitter_amount, jitter_amount)
175
-
176
- return interval
177
-
178
-
179
- class AsyncServiceConnector:
180
- """
181
- Asynchronous base class for robust service connection management with retry, backoff, and circuit breaker logic.
182
-
183
- This class is intended to be subclassed for managing persistent connections to external services
184
- (such as devices, databases, or remote APIs) that may be unreliable or temporarily unavailable.
185
-
186
- Features:
187
- - Automatic retry with configurable backoff and jitter strategies.
188
- - Circuit breaker to prevent repeated connection attempts after multiple failures.
189
- - Connection state tracking (disconnected, connecting, connected, circuit open).
190
-
191
- Usage:
192
- 1. Subclass `AsyncServiceConnector` and override the `connect_to_service()` coroutine with your
193
- actual connection logic. Optionally, override `health_check()` for custom health verification.
194
- 2. Store the actual connection object (e.g., socket, transport) as an instance attribute in your subclass.
195
- 3. Use `attempt_connection()` to initiate connection attempts; it will handle retries and backoff automatically.
196
- 4. Use `is_connected()` to check connection status.
197
-
198
- Example:
199
- class MyConnector(AsyncServiceConnector):
200
- async def connect_to_service(self):
201
- self.connection = await create_socket()
202
- return self.connection is not None
203
-
204
- def get_connection(self):
205
- return self.connection
206
-
207
- Note:
208
- The base class does not manage or expose the underlying connection object.
209
- Your subclass should provide a method or property to access it as needed.
210
- """
211
-
212
- def __init__(
213
- self,
214
- service_name: str,
215
- backoff_strategy: BackoffStrategy = BackoffStrategy.EXPONENTIAL,
216
- jitter_strategy: JitterStrategy = JitterStrategy.EQUAL,
217
- ):
218
- self.state = ConnectionState.DISCONNECTED
219
- self.last_attempt = 0
220
- self.base_interval = 1
221
- self.retry_interval = 1 # Start with 1 second
222
- self.max_retry_interval = 300 # Max 5 minutes
223
- self.failure_count = 0
224
- self.max_failures_before_circuit_break = 5
225
- self.circuit_break_duration = 60 # 1 minute
226
- self.circuit_opened_at = None
227
- self.backoff_strategy = backoff_strategy
228
- self.jitter_strategy = jitter_strategy
229
-
230
- self.service_name = service_name
231
-
232
- async def connect_to_service(self) -> bool:
233
- logger.warning(
234
- f"The connect_to_service() method is not implemented for {self.service_name}, connection will always fail."
235
- )
236
- return False
237
-
238
- async def disconnect_from_service(self) -> None:
239
- """
240
- Optional hook to cleanly disconnect / release resources for the service.
241
- Default implementation is a no-op. Subclasses should override to:
242
- - close async transports
243
- - cancel background tasks
244
- - set state to ConnectionState.DISCONNECTED
245
- - call device.disconnect()
246
- """
247
- logger.debug(f"{self.service_name}: default async disconnect_from_service(): no-op")
248
- self.state = ConnectionState.DISCONNECTED
249
- return
250
-
251
- async def health_check(self) -> bool:
252
- logger.warning(
253
- f"The health_check() method is not implemented for {self.service_name}, check will always return false."
254
- )
255
- return False
256
-
257
- def should_attempt_connection(self) -> bool:
258
- """Return True if we should attempt a new connection."""
259
- now = time.monotonic()
260
-
261
- # If circuit is open, check if we should close it
262
- if self.state == ConnectionState.CIRCUIT_OPEN:
263
- assert self.circuit_opened_at is not None
264
- circuit_break_open_since = now - self.circuit_opened_at
265
- logger.debug(f"{circuit_break_open_since=}")
266
- if circuit_break_open_since > self.circuit_break_duration:
267
- self.state = ConnectionState.DISCONNECTED
268
- self.failure_count = 0
269
- self.retry_interval = 1
270
- return True
271
- return False
272
-
273
- # Regular backoff logic
274
- return now - self.last_attempt >= self.retry_interval
275
-
276
- async def attempt_connection(self):
277
- """Try to connect to the service.
278
-
279
- This will execute the `connect_to_service()` that was overridden by the subclass.
280
- That function shall return True when the connection succeeded, False otherwise.
281
- """
282
- if self.state == ConnectionState.CONNECTED:
283
- # ensure the CONNECTED state is validated before skipping reconnection attempts
284
- # even is state is CONNECTED, the underlying connection could be stale or broken
285
- # or closed externally and unless you check the health here, you will never attempt
286
- # recovery.
287
- try:
288
- healthy = await self.health_check()
289
- except Exception as exc:
290
- logger.debug(f"health_check raised: {type_name(exc)} – {exc}")
291
- healthy = False
292
-
293
- if healthy:
294
- if VERBOSE_DEBUG:
295
- logger.debug(f"{self.service_name} already connected and healthy")
296
- return
297
-
298
- logger.info(
299
- f"{self.service_name} marked CONNECTED but health_check failed — disconnecting and reconnecting"
300
- )
301
- self.state = ConnectionState.DISCONNECTED
302
- try:
303
- # ensure the state is updated by disconnect hook (disconnect_from_service should set DISCONNECTED)
304
- await self.disconnect_from_service()
305
- except Exception as exc:
306
- if VERBOSE_DEBUG:
307
- logger.debug(f"Couldn't disconnect from {self.service_name}")
308
-
309
- if not self.should_attempt_connection():
310
- logger.debug("Not time yet to attempt new connection")
311
- return
312
-
313
- self.state = ConnectionState.CONNECTING
314
- self.last_attempt = time.monotonic()
315
-
316
- try:
317
- success = await self.connect_to_service()
318
-
319
- if success:
320
- self.state = ConnectionState.CONNECTED
321
- self.failure_count = 0
322
- self.retry_interval = 1 # Reset backoff
323
- logger.info(f"Successfully connected to service {self.service_name}")
324
- else:
325
- # warning should have been logged by the connect_to_service() callable.
326
- self.handle_connection_failure()
327
-
328
- except Exception as exc:
329
- logger.warning(f"Failed to connect to service {self.service_name}: {exc}")
330
- self.handle_connection_failure()
331
-
332
- def handle_connection_failure(self):
333
- self.failure_count += 1
334
-
335
- # Open circuit breaker if too many failures
336
- if self.failure_count >= self.max_failures_before_circuit_break:
337
- self.state = ConnectionState.CIRCUIT_OPEN
338
- self.circuit_opened_at = time.monotonic()
339
- logger.warning(
340
- f"Circuit breaker opened for service {self.service_name} after {self.failure_count} failures"
341
- )
342
- else:
343
- self.state = ConnectionState.DISCONNECTED
344
- self.retry_interval = calculate_retry_interval(
345
- self.failure_count,
346
- self.base_interval,
347
- self.max_retry_interval,
348
- self.backoff_strategy,
349
- self.jitter_strategy,
350
- )
351
- logger.debug(f"retry_interval={self.retry_interval}")
352
-
353
- def is_connected(self) -> bool:
354
- if VERBOSE_DEBUG:
355
- logger.debug(f"Checking if {self.service_name} is connected: {self.state.name}")
356
- return self.state == ConnectionState.CONNECTED
357
-
358
- def get_connection(self) -> Any:
359
- """
360
- Optional method to return the underlying connection object.
361
- Subclasses should override this method to return the actual connection
362
- (e.g., socket, transport) if needed.
363
- """
364
- logger.warning(f"The get_connection() method is not implemented for {self.service_name}, returning None.")
365
- return None
366
-
367
-
368
- class ServiceConnector:
369
- """
370
- Synchronous base class for robust service connection management with retry, backoff, and circuit breaker logic.
371
-
372
- This class is intended to be subclassed for managing persistent connections to external services
373
- (such as devices, databases, or remote APIs) that may be unreliable or temporarily unavailable.
374
-
375
- Features:
376
- - Automatic retry with configurable backoff and jitter strategies.
377
- - Circuit breaker to prevent repeated connection attempts after multiple failures.
378
- - Connection state tracking (disconnected, connecting, connected, circuit open).
379
- - Thread-safe operation using a lock for all state changes.
380
-
381
- Usage:
382
- 1. Subclass `ServiceConnector` and override the `connect_to_service()` method with your
383
- actual connection logic. Optionally, override `health_check()` for custom health verification.
384
- 2. Store the actual connection object (e.g., socket, transport) as an instance attribute in your subclass.
385
- 3. Use `attempt_connection()` to initiate connection attempts; it will handle retries and backoff automatically.
386
- 4. Use `is_connected()` to check connection status.
387
-
388
- Example:
389
- class MyConnector(ServiceConnector):
390
- def connect_to_service(self):
391
- self.connection = create_socket()
392
- return self.connection is not None
393
-
394
- def get_connection(self):
395
- return self.connection
396
-
397
- Note:
398
- The base class does not manage or expose the underlying connection object.
399
- Your subclass should provide a method or property to access it as needed.
400
- """
401
-
402
- def __init__(
403
- self,
404
- service_name: str,
405
- backoff_strategy: BackoffStrategy = BackoffStrategy.EXPONENTIAL,
406
- jitter_strategy: JitterStrategy = JitterStrategy.EQUAL,
407
- ):
408
- self.state = ConnectionState.DISCONNECTED
409
- self.last_attempt = 0
410
- self.base_interval = 1
411
- self.retry_interval = 1
412
- self.max_retry_interval = 300
413
- self.failure_count = 0
414
- self.max_failures_before_circuit_break = 5
415
- self.circuit_break_duration = 60
416
- self.circuit_opened_at = None
417
- self.service_name = service_name
418
- self.backoff_strategy = backoff_strategy
419
- self.jitter_strategy = jitter_strategy
420
-
421
- self._lock = threading.RLock()
422
-
423
- def connect_to_service(self) -> bool:
424
- logger.warning(
425
- f"The connect_to_service() method is not implemented for {self.service_name}, connection will always fail."
426
- )
427
- return False
428
-
429
- def disconnect_from_service(self) -> None:
430
- """
431
- Optional hook to cleanly disconnect / release resources for the service. Default implementation is a no-op.
432
- Subclasses should override and must be careful about thread-safety; the base class holds _lock which can be
433
- used.
434
- """
435
- with self._lock:
436
- logger.debug(f"{self.service_name}: default disconnect_from_service(): no-op")
437
- self.state = ConnectionState.DISCONNECTED
438
- return
439
-
440
- def health_check(self) -> bool:
441
- logger.warning(
442
- f"The health_check() method is not implemented for {self.service_name}, check will always return false."
443
- )
444
- return False
445
-
446
- def should_attempt_connection(self) -> bool:
447
- now = time.monotonic()
448
- with self._lock:
449
- if self.state == ConnectionState.CIRCUIT_OPEN:
450
- assert self.circuit_opened_at is not None
451
- if now - self.circuit_opened_at > self.circuit_break_duration:
452
- self.state = ConnectionState.DISCONNECTED
453
- self.failure_count = 0
454
- self.retry_interval = 1
455
- return True
456
- return False
457
- return now - self.last_attempt >= self.retry_interval
458
-
459
- def attempt_connection(self):
460
- with self._lock:
461
- current_state = self.state
462
-
463
- if current_state == ConnectionState.CONNECTED:
464
- # ensure the CONNECTED state is validated before skipping reconnection attempts
465
- try:
466
- healthy = self.health_check()
467
- except Exception as exc:
468
- logger.debug(f"health_check raised: {type_name(exc)} – {exc}")
469
- healthy = False
470
-
471
- if healthy:
472
- logger.debug(f"{self.service_name} already connected and healthy")
473
- return
474
-
475
- logger.info(
476
- f"{self.service_name} marked CONNECTED but health_check failed — disconnecting and reconnecting"
477
- )
478
- self.state = ConnectionState.DISCONNECTED
479
- try:
480
- # ensure the state is updated by disconnect hook (disconnect_from_service should set DISCONNECTED)
481
- self.disconnect_from_service()
482
- except Exception as exc:
483
- if VERBOSE_DEBUG:
484
- logger.debug(f"Couldn't disconnect from {self.service_name}: {type_name(exc)} – {exc}")
485
-
486
- with self._lock:
487
- if not self.should_attempt_connection():
488
- return
489
- self.state = ConnectionState.CONNECTING
490
- self.last_attempt = time.monotonic()
491
-
492
- try:
493
- success = self.connect_to_service()
494
- with self._lock:
495
- if success:
496
- self.state = ConnectionState.CONNECTED
497
- self.failure_count = 0
498
- self.retry_interval = 1
499
- logger.debug(f"Successfully connected to service {self.service_name}")
500
- else:
501
- self.handle_connection_failure()
502
- except Exception as exc:
503
- logger.error(f"Failed to connect to service {self.service_name}: {exc}")
504
- with self._lock:
505
- self.handle_connection_failure()
506
-
507
- def handle_connection_failure(self):
508
- self.failure_count += 1
509
- if self.failure_count >= self.max_failures_before_circuit_break:
510
- self.state = ConnectionState.CIRCUIT_OPEN
511
- self.circuit_opened_at = time.monotonic()
512
- logger.warning(
513
- f"Circuit breaker opened for service {self.service_name} after {self.failure_count} failures"
514
- )
515
- else:
516
- self.state = ConnectionState.DISCONNECTED
517
- self.retry_interval = calculate_retry_interval(
518
- self.failure_count,
519
- self.base_interval,
520
- self.max_retry_interval,
521
- self.backoff_strategy,
522
- self.jitter_strategy,
523
- )
524
- logger.debug(f"retry_interval={self.retry_interval}")
525
-
526
- def is_connected(self) -> bool:
527
- with self._lock:
528
- return self.state == ConnectionState.CONNECTED
egse/dummy.py CHANGED
@@ -35,7 +35,6 @@ and stopped with:
35
35
 
36
36
  from __future__ import annotations
37
37
 
38
- import contextlib
39
38
  import multiprocessing
40
39
  import random
41
40
  import select
@@ -53,14 +52,12 @@ from egse.device import DeviceConnectionError
53
52
  from egse.device import DeviceConnectionInterface
54
53
  from egse.device import DeviceTimeoutError
55
54
  from egse.device import DeviceTransport
56
- from egse.env import bool_env
57
55
  from egse.log import logger
58
56
  from egse.protocol import CommandProtocol
59
57
  from egse.proxy import Proxy
60
58
  from egse.system import SignalCatcher
61
59
  from egse.system import attrdict
62
60
  from egse.system import format_datetime
63
- from egse.system import type_name
64
61
  from egse.zmq_ser import bind_address
65
62
  from egse.zmq_ser import connect_address
66
63
 
@@ -80,9 +77,6 @@ WRITE_TIMEOUT = 1.0
80
77
  CONNECT_TIMEOUT = 3.0
81
78
  """The maximum time in seconds to wait for establishing a socket connect."""
82
79
 
83
-
84
- VERBOSE_DEBUG = bool_env("VERBOSE_DEBUG", default=False)
85
-
86
80
  # Especially DummyCommand and DummyController need to be defined in a known module
87
81
  # because those objects are pickled and when de-pickled at the clients side the class
88
82
  # definition must be known.
@@ -122,17 +116,14 @@ def is_dummy_cs_active() -> bool:
122
116
 
123
117
 
124
118
  def is_dummy_dev_active() -> bool:
125
- if VERBOSE_DEBUG:
126
- logger.debug("Checking if dummy device is active...")
127
119
  try:
128
120
  dev = DummyDeviceEthernetInterface(DEV_HOST, DEV_PORT)
129
121
  dev.connect()
130
122
  rc = dev.trans("ping\n")
131
123
  dev.disconnect()
132
124
  return rc.decode().strip() == "pong"
133
- except (DeviceConnectionError, ConnectionResetError, DeviceTimeoutError) as exc:
134
- if VERBOSE_DEBUG:
135
- logger.debug(f"Caught {type_name(exc)}: {exc} - returning False")
125
+ except DeviceConnectionError as exc:
126
+ # logger.error(f"Caught {type_name(exc)}: {exc}")
136
127
  return False
137
128
 
138
129
 
@@ -589,18 +580,11 @@ def start_dev():
589
580
  with socket.socket(socket.AF_INET, socket.SOCK_STREAM) as s:
590
581
  s.bind((DEV_HOST, DEV_PORT))
591
582
  s.listen()
592
- s.settimeout(CONNECT_TIMEOUT)
593
583
  logger.info(f"Ready to accept connection on {DEV_HOST}:{DEV_PORT}...")
594
- while True:
595
- with contextlib.suppress(socket.timeout):
596
- conn, addr = s.accept()
597
- break
598
- if killer.term_signal_received:
599
- return
584
+ conn, addr = s.accept()
600
585
  with conn:
601
586
  logger.info(f"Accepted connection from {addr}")
602
587
  conn.sendall(f"Dummy Device {__version__}".encode())
603
- conn.settimeout(READ_TIMEOUT)
604
588
  try:
605
589
  while True:
606
590
  error_msg = ""
egse/logger/__init__.py CHANGED
@@ -57,7 +57,7 @@ COMMANDER_PORT = settings.get("COMMANDER_PORT", 0) # dynamically assigned by th
57
57
  _initialised = False # will be set to True in the setup_logging() function
58
58
 
59
59
 
60
- def get_log_file_name() -> str:
60
+ def get_log_file_name():
61
61
  """
62
62
  Returns the filename of the log file as defined in the Settings or return the default name 'general.log'.
63
63
  """
@@ -315,7 +315,7 @@ def send_request(command_request: str):
315
315
  """Sends a request to the Logger Control Server and waits for a response."""
316
316
 
317
317
  if COMMANDER_PORT == 0:
318
- endpoint = get_endpoint_from_registry(SERVICE_TYPE)
318
+ endpoint = get_endpoint_from_registry()
319
319
  else:
320
320
  endpoint = f"{PROTOCOL}://{HOSTNAME}:{COMMANDER_PORT}"
321
321