cgse-core 0.17.2__py3-none-any.whl → 0.17.3__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {cgse_core-0.17.2.dist-info → cgse_core-0.17.3.dist-info}/METADATA +1 -1
- {cgse_core-0.17.2.dist-info → cgse_core-0.17.3.dist-info}/RECORD +11 -17
- egse/connect.py +4 -477
- egse/dummy.py +3 -19
- egse/logger/__init__.py +2 -2
- egse/notifyhub/server.py +1 -3
- egse/registry/client.py +3 -9
- egse/registry/server.py +3 -11
- egse/registry/service.py +5 -11
- egse/async_control.py +0 -1085
- egse/async_control_claude.py +0 -807
- egse/confman/confman_acs.py +0 -35
- egse/metricshub/__init__.py +0 -0
- egse/metricshub/server.py +0 -271
- egse/notifyhub/test.py +0 -303
- {cgse_core-0.17.2.dist-info → cgse_core-0.17.3.dist-info}/WHEEL +0 -0
- {cgse_core-0.17.2.dist-info → cgse_core-0.17.3.dist-info}/entry_points.txt +0 -0
|
@@ -6,12 +6,10 @@ cgse_core/cgse_explore.py,sha256=8jxAxYDsjPUZftUccPCneHaqijRHxBPZuaOo0ESmBUs,460
|
|
|
6
6
|
cgse_core/services.py,sha256=_a1d1Dcf9L0F1IF3w6kHrACE5EDnqtNUd8KzrTZDu5U,8595
|
|
7
7
|
cgse_core/settings.yaml,sha256=6RcZ3aI7IUSB6GaZ8e9b_JEiql0l3l7h-VowDycv5AE,3625
|
|
8
8
|
egse/_setup_core.py,sha256=ei4a7tGYfDxc16kfhrF11Tm3cOSnl48MsSGfrWu0vVg,5983
|
|
9
|
-
egse/async_control.py,sha256=V4cx89EIuohZCJEMNXiwKWQFPDWRY6Oj0w5xpo3wO3A,45274
|
|
10
|
-
egse/async_control_claude.py,sha256=oMI_g7euQ6Z1dowmVHfijZImiB_sMQl5sHC84DMX2Fs,31609
|
|
11
9
|
egse/command.py,sha256=529_T619qG3Xif9UkxZS8mj2ibr6eljfbVsMM-K_AII,21965
|
|
12
|
-
egse/connect.py,sha256=
|
|
10
|
+
egse/connect.py,sha256=BDMzTt4URx7TpNTPyzb1Ye3Ch09enxm5yIzcxMRga_A,1834
|
|
13
11
|
egse/control.py,sha256=u4bWsKNW0tl4gNMaSMyUA02xXwvcxCVe2TsdfJpTkMs,28424
|
|
14
|
-
egse/dummy.py,sha256=
|
|
12
|
+
egse/dummy.py,sha256=I63wXBdC30WqnP6gz1sJ4YBHjrWumeXhiHbF49oF_ZQ,21909
|
|
15
13
|
egse/listener.py,sha256=kD62oD7w1yBVV0M2Eq5MNEQyVaJvlsl0RyhYCzVkgQs,6493
|
|
16
14
|
egse/mixin.py,sha256=fklH61C7TtaTnXx7FQjjJCgUQ2PsQv5baw0SVBBdbH0,20265
|
|
17
15
|
egse/monitoring.py,sha256=0Hbd5SHUU2YWFm8gbzlyPMUMjTezKap_q9yE4T6Vkk0,9716
|
|
@@ -22,7 +20,6 @@ egse/services.yaml,sha256=0VRxQGM0TwExJl4GEXYakWSaE9W5Tav5Og-ZV7vogto,2612
|
|
|
22
20
|
egse/confman/__init__.py,sha256=HtLxM7u7v9mc8eJ4KTFfsWiRRAMmyvI0Blo_4Yb2nC0,39993
|
|
23
21
|
egse/confman/__main__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
24
22
|
egse/confman/confman.yaml,sha256=CaS4potNTiy2-Qqc5Zo2Lx8c5hh4wLYhtxwifh-hP5M,3018
|
|
25
|
-
egse/confman/confman_acs.py,sha256=Z7QfCUiAZ8tzj7ujHTpG0Kdw7VA76x-fnsWsOntHU2Y,1044
|
|
26
23
|
egse/confman/confman_cs.py,sha256=JeAWC1T9RBppPVX-arWG87wS5_QrjKN8KUszQV8XBcQ,9981
|
|
27
24
|
egse/icons/busy.svg,sha256=fKNR2AxNupsAga7huDQF-kEYTThMkxgqGVZvXbma0G4,2964
|
|
28
25
|
egse/icons/operational-mode.svg,sha256=P6uhgnlXF5UXiJXIlpawklTcCZQePPEI5fGONtL_Y3o,5199
|
|
@@ -31,17 +28,14 @@ egse/icons/simulator-mode.svg,sha256=iDU6i0nTqyeYJOY9NAIj6NvjBQWAo2XSJ__35funhXY
|
|
|
31
28
|
egse/icons/start-process-button.svg,sha256=lvOmRzafhldlAbjKtBjuqrRDjBwyaVnd3yK2ees22JM,476
|
|
32
29
|
egse/icons/stop-process-button.svg,sha256=ZysOJooxOXEbup9zd6077M36OS0S5jQQkny9jfQWBfI,456
|
|
33
30
|
egse/icons/user-interface.svg,sha256=q_KWgHJ9ATdbw-HFghoaGQIBe52ZyxyAKitHpDaFWm8,2375
|
|
34
|
-
egse/logger/__init__.py,sha256=
|
|
31
|
+
egse/logger/__init__.py,sha256=5Nku3dpZVBdawiKZJgNyhuw5-ppm1K9lSb_p5uinp-o,10763
|
|
35
32
|
egse/logger/__main__.py,sha256=_etegNrUM9IWHNTOObDVY1DiLn9SJyf8eHktVBbN-OQ,1765
|
|
36
33
|
egse/logger/log_cs.py,sha256=3k-HUnfrJY2072cjF0TXBv9TyNd4GzCSzazx03zfJss,14296
|
|
37
|
-
egse/metricshub/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
38
|
-
egse/metricshub/server.py,sha256=2arZ2OOsc7L3_tHnpXNw91joYnVo1nxzqLFuwO-GSzM,8780
|
|
39
34
|
egse/notifyhub/__init__.py,sha256=pbCy5PHSR7OKcD1heaMIDO6ztkyFlpsvc-i2pyOdCpU,2438
|
|
40
35
|
egse/notifyhub/client.py,sha256=VOFlxcotEm3JAe5eGMDXpCRejXzDb3-7k4gIPgcgmpY,10536
|
|
41
36
|
egse/notifyhub/event.py,sha256=Yo8uvnK27uzaCLjJa8IK734Hcg3eLuD_KCd7eZ80XBQ,792
|
|
42
|
-
egse/notifyhub/server.py,sha256=
|
|
37
|
+
egse/notifyhub/server.py,sha256=_bK45WZhu6KKuJMLLzQdDLBVz6Z_d4n0UFf77ydiDns,14520
|
|
43
38
|
egse/notifyhub/services.py,sha256=LpKxJfFszwoQN9Kb5f5lkjjKLbvwCfHSC2mt1WZt1wc,10494
|
|
44
|
-
egse/notifyhub/test.py,sha256=61-hEii3KUho_nheTz6zCuUvQtIPZ_zZWt3Gpxmhk8g,9922
|
|
45
39
|
egse/procman/__init__.py,sha256=w2ilHHVuznkNCK-Syd9WVayktW8um8_EsKF5JQJDZK0,13946
|
|
46
40
|
egse/procman/procman.yaml,sha256=miAq3GE4PSKm8QZDfjvmtBrYCt8GyzJDJm-JzzgdznQ,847
|
|
47
41
|
egse/procman/procman_cs.py,sha256=lvuhmGdd1GmkdkXYNS9slNT9VJF09OtffZrZmCdy97I,5390
|
|
@@ -49,15 +43,15 @@ egse/procman/procman_protocol.py,sha256=kFUWQb66d5aDE7qHM9xYmsO07UB-6ro2hJ-E7o_O
|
|
|
49
43
|
egse/procman/procman_ui.py,sha256=5pU4-Dtma5aY8k4PaHQpJUe-DSwjpP-V1sJpbuUMlLg,42070
|
|
50
44
|
egse/registry/__init__.py,sha256=U7ap_245Bid74nn7Ncd0GXu0aetR95dJHltGRwBCVb8,1349
|
|
51
45
|
egse/registry/backend.py,sha256=-kRXncO949YETb1S2GFedhYTt1O0teiKYUUPxeQqX9E,22120
|
|
52
|
-
egse/registry/client.py,sha256
|
|
53
|
-
egse/registry/server.py,sha256
|
|
54
|
-
egse/registry/service.py,sha256=
|
|
46
|
+
egse/registry/client.py,sha256=-fivaDQSTb_SjbZWcir_MpH5hciMSJHVinuSS5RBmMU,42874
|
|
47
|
+
egse/registry/server.py,sha256=1Zv-1VkGhpKRo_P3gRRnlN1UuXJbFnYWX3fOVtgKe-g,21869
|
|
48
|
+
egse/registry/service.py,sha256=QnKVICWiuHmzESmy2H4VEtJ-tGX8hSV6n6qq6ejkWAY,15125
|
|
55
49
|
egse/storage/__init__.py,sha256=xtMdHdtPT9-oqTp2bpWw7Os3qUgN8TdqZNuaj3glxn0,43147
|
|
56
50
|
egse/storage/__main__.py,sha256=LI9fxlsFWmEd5LcWUB0xA8i7Yt6UHgnblB4G0aTi3pI,28
|
|
57
51
|
egse/storage/persistence.py,sha256=35fvuCPuGTSCc2MfmFLLNU03xYq3CEaJQspot4f-Pvw,18274
|
|
58
52
|
egse/storage/storage.yaml,sha256=l3HtPx_bAbXoV4f3_PXWAa1tP-fY2S6roSBSBiOHodE,2712
|
|
59
53
|
egse/storage/storage_cs.py,sha256=172llnKef1fdiDXcnzYllw_q12bVyuGJGh_3XpeDVCU,7377
|
|
60
|
-
cgse_core-0.17.
|
|
61
|
-
cgse_core-0.17.
|
|
62
|
-
cgse_core-0.17.
|
|
63
|
-
cgse_core-0.17.
|
|
54
|
+
cgse_core-0.17.3.dist-info/METADATA,sha256=cG8dHirqx3LtMgrrOJkxOHtUJEyCrsU8zs4OfzP1hz0,582
|
|
55
|
+
cgse_core-0.17.3.dist-info/WHEEL,sha256=qtCwoSJWgHk21S1Kb4ihdzI2rlJ1ZKaIurTj_ngOhyQ,87
|
|
56
|
+
cgse_core-0.17.3.dist-info/entry_points.txt,sha256=IwIG7aGgfUehol29ufcKd559S88t3TJdh1LMJ5YymCE,976
|
|
57
|
+
cgse_core-0.17.3.dist-info/RECORD,,
|
egse/connect.py
CHANGED
|
@@ -1,12 +1,5 @@
|
|
|
1
|
-
import random
|
|
2
|
-
import threading
|
|
3
|
-
import time
|
|
4
|
-
from enum import Enum
|
|
5
|
-
from typing import Any
|
|
6
|
-
|
|
7
1
|
from egse.env import bool_env
|
|
8
2
|
from egse.log import logging
|
|
9
|
-
from egse.system import type_name
|
|
10
3
|
from egse.zmq_ser import connect_address
|
|
11
4
|
|
|
12
5
|
logger = logging.getLogger("egse.connect")
|
|
@@ -17,7 +10,7 @@ VERBOSE_DEBUG = bool_env("VERBOSE_DEBUG")
|
|
|
17
10
|
|
|
18
11
|
|
|
19
12
|
def get_endpoint(
|
|
20
|
-
service_type: str
|
|
13
|
+
service_type: str,
|
|
21
14
|
protocol: str = "tcp",
|
|
22
15
|
hostname: str = "localhost",
|
|
23
16
|
port: int = 0,
|
|
@@ -48,481 +41,15 @@ def get_endpoint(
|
|
|
48
41
|
endpoint = reg.get_endpoint(service_type)
|
|
49
42
|
if endpoint:
|
|
50
43
|
if VERBOSE_DEBUG:
|
|
51
|
-
logger.debug(f"Endpoint for
|
|
44
|
+
logger.debug(f"Endpoint for {service_type} found in registry: {endpoint}")
|
|
52
45
|
else:
|
|
53
|
-
logger.warning(f"No endpoint for
|
|
46
|
+
logger.warning(f"No endpoint for {service_type} found in registry.")
|
|
54
47
|
|
|
55
48
|
if not endpoint:
|
|
56
49
|
if port == 0:
|
|
57
|
-
raise RuntimeError(f"No service registered as
|
|
50
|
+
raise RuntimeError(f"No service registered as {service_type} and no port provided.")
|
|
58
51
|
endpoint = connect_address(protocol, hostname, port)
|
|
59
52
|
if VERBOSE_DEBUG:
|
|
60
53
|
logger.debug(f"Endpoint constructed from protocol/hostname/port: {endpoint}")
|
|
61
54
|
|
|
62
55
|
return endpoint
|
|
63
|
-
|
|
64
|
-
|
|
65
|
-
class ConnectionState(Enum):
|
|
66
|
-
DISCONNECTED = "disconnected"
|
|
67
|
-
CONNECTING = "connecting"
|
|
68
|
-
CONNECTED = "connected"
|
|
69
|
-
CIRCUIT_OPEN = "circuit_open" # Temporarily stopped trying
|
|
70
|
-
|
|
71
|
-
|
|
72
|
-
class BackoffStrategy(Enum):
|
|
73
|
-
"""
|
|
74
|
-
Specifies the strategy for increasing the delay between retry attempts
|
|
75
|
-
in backoff algorithms to reduce load and avoid overwhelming services.
|
|
76
|
-
|
|
77
|
-
Strategies:
|
|
78
|
-
EXPONENTIAL:
|
|
79
|
-
The delay doubles with each retry attempt (e.g., 1s, 2s, 4s, 8s).
|
|
80
|
-
This is the most widely used approach because it quickly reduces load on struggling systems.
|
|
81
|
-
LINEAR:
|
|
82
|
-
The delay increases by a fixed amount each time (e.g., 1s, 2s, 3s, 4s).
|
|
83
|
-
This provides a more gradual reduction in request rate.
|
|
84
|
-
FIXED:
|
|
85
|
-
Uses the same delay between all retry attempts.
|
|
86
|
-
Simple but less adaptive to system conditions.
|
|
87
|
-
|
|
88
|
-
References:
|
|
89
|
-
- AWS Architecture Blog: Exponential Backoff And Jitter
|
|
90
|
-
"""
|
|
91
|
-
|
|
92
|
-
EXPONENTIAL = "exponential"
|
|
93
|
-
"""The delay doubles with each retry attempt (e.g., 1s, 2s, 4s, 8s).
|
|
94
|
-
This is the most widely used approach because it quickly reduces load on struggling systems."""
|
|
95
|
-
LINEAR = "linear"
|
|
96
|
-
"""The delay increases by a fixed amount each time (e.g., 1s, 2s, 3s, 4s).
|
|
97
|
-
This provides a more gradual reduction in request rate."""
|
|
98
|
-
FIXED = "fixed"
|
|
99
|
-
"""Uses the same delay between all retry attempts. Simple but less adaptive to system conditions."""
|
|
100
|
-
|
|
101
|
-
|
|
102
|
-
class JitterStrategy(Enum):
|
|
103
|
-
"""
|
|
104
|
-
Specifies the strategy for applying jitter (randomization) to retry intervals
|
|
105
|
-
in backoff algorithms to avoid synchronized retries and reduce load spikes.
|
|
106
|
-
|
|
107
|
-
Strategies:
|
|
108
|
-
NONE:
|
|
109
|
-
No jitter is applied. The retry interval is deterministic.
|
|
110
|
-
FULL:
|
|
111
|
-
Applies full jitter by selecting a random value uniformly between 0 and the calculated interval.
|
|
112
|
-
This maximizes randomness but can result in very short delays.
|
|
113
|
-
EQUAL:
|
|
114
|
-
Applies "equal jitter" as described in the AWS Architecture Blog.
|
|
115
|
-
The interval is randomized within [interval/2, interval], ensuring a minimum delay of half the interval.
|
|
116
|
-
Note: This is not the same as "a jitter of 50% around interval" (which would be [0.5 * interval, 1.5 * interval]).
|
|
117
|
-
PERCENT_10:
|
|
118
|
-
Applies a jitter of ±10% around the base interval, resulting in a random interval within [0.9 * interval, 1.1 * interval].
|
|
119
|
-
|
|
120
|
-
References:
|
|
121
|
-
- AWS Architecture Blog: Exponential Backoff And Jitter
|
|
122
|
-
"""
|
|
123
|
-
|
|
124
|
-
NONE = "none"
|
|
125
|
-
"""No jitter is applied to the backoff."""
|
|
126
|
-
FULL = "full"
|
|
127
|
-
"""Maximum distribution but can be too random with very short intervals."""
|
|
128
|
-
EQUAL = "equal"
|
|
129
|
-
"""Best balance, maintains backoff properties while preventing synchronization."""
|
|
130
|
-
PERCENT_10 = "10%"
|
|
131
|
-
"""Add a jitter of 10% around the base interval."""
|
|
132
|
-
|
|
133
|
-
|
|
134
|
-
def calculate_retry_interval(
|
|
135
|
-
attempt_number,
|
|
136
|
-
base_interval,
|
|
137
|
-
max_interval,
|
|
138
|
-
backoff_strategy: BackoffStrategy = BackoffStrategy.EXPONENTIAL,
|
|
139
|
-
jitter_strategy: JitterStrategy = JitterStrategy.EQUAL,
|
|
140
|
-
):
|
|
141
|
-
"""
|
|
142
|
-
Calculates the next retry interval based on the given backoff and jitter strategies.
|
|
143
|
-
|
|
144
|
-
Args:
|
|
145
|
-
attempt_number (int): The current retry attempt (starting from 0).
|
|
146
|
-
base_interval (float): The initial interval in seconds.
|
|
147
|
-
max_interval (float): The maximum allowed interval in seconds.
|
|
148
|
-
backoff_strategy (BackoffStrategy): Strategy for increasing the delay (exponential, linear, or fixed).
|
|
149
|
-
jitter_strategy (JitterStrategy): Strategy for randomizing the delay to avoid synchronization.
|
|
150
|
-
|
|
151
|
-
Returns:
|
|
152
|
-
float: The computed retry interval in seconds.
|
|
153
|
-
|
|
154
|
-
Notes:
|
|
155
|
-
- See the docstrings for BackoffStrategy and JitterStrategy for details on each strategy.
|
|
156
|
-
- Based on best practices from the AWS Architecture Blog: Exponential Backoff And Jitter.
|
|
157
|
-
"""
|
|
158
|
-
|
|
159
|
-
if backoff_strategy == BackoffStrategy.EXPONENTIAL:
|
|
160
|
-
interval = min(base_interval * (2**attempt_number), max_interval)
|
|
161
|
-
elif backoff_strategy == BackoffStrategy.LINEAR:
|
|
162
|
-
interval = min(base_interval + attempt_number, max_interval)
|
|
163
|
-
else:
|
|
164
|
-
interval = base_interval
|
|
165
|
-
|
|
166
|
-
if jitter_strategy == JitterStrategy.NONE:
|
|
167
|
-
return interval
|
|
168
|
-
elif jitter_strategy == JitterStrategy.FULL:
|
|
169
|
-
return random.uniform(0, interval)
|
|
170
|
-
elif jitter_strategy == JitterStrategy.EQUAL:
|
|
171
|
-
return interval / 2 + random.uniform(0, interval / 2)
|
|
172
|
-
elif jitter_strategy == JitterStrategy.PERCENT_10:
|
|
173
|
-
jitter_amount = interval * 0.1
|
|
174
|
-
return interval + random.uniform(-jitter_amount, jitter_amount)
|
|
175
|
-
|
|
176
|
-
return interval
|
|
177
|
-
|
|
178
|
-
|
|
179
|
-
class AsyncServiceConnector:
|
|
180
|
-
"""
|
|
181
|
-
Asynchronous base class for robust service connection management with retry, backoff, and circuit breaker logic.
|
|
182
|
-
|
|
183
|
-
This class is intended to be subclassed for managing persistent connections to external services
|
|
184
|
-
(such as devices, databases, or remote APIs) that may be unreliable or temporarily unavailable.
|
|
185
|
-
|
|
186
|
-
Features:
|
|
187
|
-
- Automatic retry with configurable backoff and jitter strategies.
|
|
188
|
-
- Circuit breaker to prevent repeated connection attempts after multiple failures.
|
|
189
|
-
- Connection state tracking (disconnected, connecting, connected, circuit open).
|
|
190
|
-
|
|
191
|
-
Usage:
|
|
192
|
-
1. Subclass `AsyncServiceConnector` and override the `connect_to_service()` coroutine with your
|
|
193
|
-
actual connection logic. Optionally, override `health_check()` for custom health verification.
|
|
194
|
-
2. Store the actual connection object (e.g., socket, transport) as an instance attribute in your subclass.
|
|
195
|
-
3. Use `attempt_connection()` to initiate connection attempts; it will handle retries and backoff automatically.
|
|
196
|
-
4. Use `is_connected()` to check connection status.
|
|
197
|
-
|
|
198
|
-
Example:
|
|
199
|
-
class MyConnector(AsyncServiceConnector):
|
|
200
|
-
async def connect_to_service(self):
|
|
201
|
-
self.connection = await create_socket()
|
|
202
|
-
return self.connection is not None
|
|
203
|
-
|
|
204
|
-
def get_connection(self):
|
|
205
|
-
return self.connection
|
|
206
|
-
|
|
207
|
-
Note:
|
|
208
|
-
The base class does not manage or expose the underlying connection object.
|
|
209
|
-
Your subclass should provide a method or property to access it as needed.
|
|
210
|
-
"""
|
|
211
|
-
|
|
212
|
-
def __init__(
|
|
213
|
-
self,
|
|
214
|
-
service_name: str,
|
|
215
|
-
backoff_strategy: BackoffStrategy = BackoffStrategy.EXPONENTIAL,
|
|
216
|
-
jitter_strategy: JitterStrategy = JitterStrategy.EQUAL,
|
|
217
|
-
):
|
|
218
|
-
self.state = ConnectionState.DISCONNECTED
|
|
219
|
-
self.last_attempt = 0
|
|
220
|
-
self.base_interval = 1
|
|
221
|
-
self.retry_interval = 1 # Start with 1 second
|
|
222
|
-
self.max_retry_interval = 300 # Max 5 minutes
|
|
223
|
-
self.failure_count = 0
|
|
224
|
-
self.max_failures_before_circuit_break = 5
|
|
225
|
-
self.circuit_break_duration = 60 # 1 minute
|
|
226
|
-
self.circuit_opened_at = None
|
|
227
|
-
self.backoff_strategy = backoff_strategy
|
|
228
|
-
self.jitter_strategy = jitter_strategy
|
|
229
|
-
|
|
230
|
-
self.service_name = service_name
|
|
231
|
-
|
|
232
|
-
async def connect_to_service(self) -> bool:
|
|
233
|
-
logger.warning(
|
|
234
|
-
f"The connect_to_service() method is not implemented for {self.service_name}, connection will always fail."
|
|
235
|
-
)
|
|
236
|
-
return False
|
|
237
|
-
|
|
238
|
-
async def disconnect_from_service(self) -> None:
|
|
239
|
-
"""
|
|
240
|
-
Optional hook to cleanly disconnect / release resources for the service.
|
|
241
|
-
Default implementation is a no-op. Subclasses should override to:
|
|
242
|
-
- close async transports
|
|
243
|
-
- cancel background tasks
|
|
244
|
-
- set state to ConnectionState.DISCONNECTED
|
|
245
|
-
- call device.disconnect()
|
|
246
|
-
"""
|
|
247
|
-
logger.debug(f"{self.service_name}: default async disconnect_from_service(): no-op")
|
|
248
|
-
self.state = ConnectionState.DISCONNECTED
|
|
249
|
-
return
|
|
250
|
-
|
|
251
|
-
async def health_check(self) -> bool:
|
|
252
|
-
logger.warning(
|
|
253
|
-
f"The health_check() method is not implemented for {self.service_name}, check will always return false."
|
|
254
|
-
)
|
|
255
|
-
return False
|
|
256
|
-
|
|
257
|
-
def should_attempt_connection(self) -> bool:
|
|
258
|
-
"""Return True if we should attempt a new connection."""
|
|
259
|
-
now = time.monotonic()
|
|
260
|
-
|
|
261
|
-
# If circuit is open, check if we should close it
|
|
262
|
-
if self.state == ConnectionState.CIRCUIT_OPEN:
|
|
263
|
-
assert self.circuit_opened_at is not None
|
|
264
|
-
circuit_break_open_since = now - self.circuit_opened_at
|
|
265
|
-
logger.debug(f"{circuit_break_open_since=}")
|
|
266
|
-
if circuit_break_open_since > self.circuit_break_duration:
|
|
267
|
-
self.state = ConnectionState.DISCONNECTED
|
|
268
|
-
self.failure_count = 0
|
|
269
|
-
self.retry_interval = 1
|
|
270
|
-
return True
|
|
271
|
-
return False
|
|
272
|
-
|
|
273
|
-
# Regular backoff logic
|
|
274
|
-
return now - self.last_attempt >= self.retry_interval
|
|
275
|
-
|
|
276
|
-
async def attempt_connection(self):
|
|
277
|
-
"""Try to connect to the service.
|
|
278
|
-
|
|
279
|
-
This will execute the `connect_to_service()` that was overridden by the subclass.
|
|
280
|
-
That function shall return True when the connection succeeded, False otherwise.
|
|
281
|
-
"""
|
|
282
|
-
if self.state == ConnectionState.CONNECTED:
|
|
283
|
-
# ensure the CONNECTED state is validated before skipping reconnection attempts
|
|
284
|
-
# even is state is CONNECTED, the underlying connection could be stale or broken
|
|
285
|
-
# or closed externally and unless you check the health here, you will never attempt
|
|
286
|
-
# recovery.
|
|
287
|
-
try:
|
|
288
|
-
healthy = await self.health_check()
|
|
289
|
-
except Exception as exc:
|
|
290
|
-
logger.debug(f"health_check raised: {type_name(exc)} – {exc}")
|
|
291
|
-
healthy = False
|
|
292
|
-
|
|
293
|
-
if healthy:
|
|
294
|
-
if VERBOSE_DEBUG:
|
|
295
|
-
logger.debug(f"{self.service_name} already connected and healthy")
|
|
296
|
-
return
|
|
297
|
-
|
|
298
|
-
logger.info(
|
|
299
|
-
f"{self.service_name} marked CONNECTED but health_check failed — disconnecting and reconnecting"
|
|
300
|
-
)
|
|
301
|
-
self.state = ConnectionState.DISCONNECTED
|
|
302
|
-
try:
|
|
303
|
-
# ensure the state is updated by disconnect hook (disconnect_from_service should set DISCONNECTED)
|
|
304
|
-
await self.disconnect_from_service()
|
|
305
|
-
except Exception as exc:
|
|
306
|
-
if VERBOSE_DEBUG:
|
|
307
|
-
logger.debug(f"Couldn't disconnect from {self.service_name}")
|
|
308
|
-
|
|
309
|
-
if not self.should_attempt_connection():
|
|
310
|
-
logger.debug("Not time yet to attempt new connection")
|
|
311
|
-
return
|
|
312
|
-
|
|
313
|
-
self.state = ConnectionState.CONNECTING
|
|
314
|
-
self.last_attempt = time.monotonic()
|
|
315
|
-
|
|
316
|
-
try:
|
|
317
|
-
success = await self.connect_to_service()
|
|
318
|
-
|
|
319
|
-
if success:
|
|
320
|
-
self.state = ConnectionState.CONNECTED
|
|
321
|
-
self.failure_count = 0
|
|
322
|
-
self.retry_interval = 1 # Reset backoff
|
|
323
|
-
logger.info(f"Successfully connected to service {self.service_name}")
|
|
324
|
-
else:
|
|
325
|
-
# warning should have been logged by the connect_to_service() callable.
|
|
326
|
-
self.handle_connection_failure()
|
|
327
|
-
|
|
328
|
-
except Exception as exc:
|
|
329
|
-
logger.warning(f"Failed to connect to service {self.service_name}: {exc}")
|
|
330
|
-
self.handle_connection_failure()
|
|
331
|
-
|
|
332
|
-
def handle_connection_failure(self):
|
|
333
|
-
self.failure_count += 1
|
|
334
|
-
|
|
335
|
-
# Open circuit breaker if too many failures
|
|
336
|
-
if self.failure_count >= self.max_failures_before_circuit_break:
|
|
337
|
-
self.state = ConnectionState.CIRCUIT_OPEN
|
|
338
|
-
self.circuit_opened_at = time.monotonic()
|
|
339
|
-
logger.warning(
|
|
340
|
-
f"Circuit breaker opened for service {self.service_name} after {self.failure_count} failures"
|
|
341
|
-
)
|
|
342
|
-
else:
|
|
343
|
-
self.state = ConnectionState.DISCONNECTED
|
|
344
|
-
self.retry_interval = calculate_retry_interval(
|
|
345
|
-
self.failure_count,
|
|
346
|
-
self.base_interval,
|
|
347
|
-
self.max_retry_interval,
|
|
348
|
-
self.backoff_strategy,
|
|
349
|
-
self.jitter_strategy,
|
|
350
|
-
)
|
|
351
|
-
logger.debug(f"retry_interval={self.retry_interval}")
|
|
352
|
-
|
|
353
|
-
def is_connected(self) -> bool:
|
|
354
|
-
if VERBOSE_DEBUG:
|
|
355
|
-
logger.debug(f"Checking if {self.service_name} is connected: {self.state.name}")
|
|
356
|
-
return self.state == ConnectionState.CONNECTED
|
|
357
|
-
|
|
358
|
-
def get_connection(self) -> Any:
|
|
359
|
-
"""
|
|
360
|
-
Optional method to return the underlying connection object.
|
|
361
|
-
Subclasses should override this method to return the actual connection
|
|
362
|
-
(e.g., socket, transport) if needed.
|
|
363
|
-
"""
|
|
364
|
-
logger.warning(f"The get_connection() method is not implemented for {self.service_name}, returning None.")
|
|
365
|
-
return None
|
|
366
|
-
|
|
367
|
-
|
|
368
|
-
class ServiceConnector:
|
|
369
|
-
"""
|
|
370
|
-
Synchronous base class for robust service connection management with retry, backoff, and circuit breaker logic.
|
|
371
|
-
|
|
372
|
-
This class is intended to be subclassed for managing persistent connections to external services
|
|
373
|
-
(such as devices, databases, or remote APIs) that may be unreliable or temporarily unavailable.
|
|
374
|
-
|
|
375
|
-
Features:
|
|
376
|
-
- Automatic retry with configurable backoff and jitter strategies.
|
|
377
|
-
- Circuit breaker to prevent repeated connection attempts after multiple failures.
|
|
378
|
-
- Connection state tracking (disconnected, connecting, connected, circuit open).
|
|
379
|
-
- Thread-safe operation using a lock for all state changes.
|
|
380
|
-
|
|
381
|
-
Usage:
|
|
382
|
-
1. Subclass `ServiceConnector` and override the `connect_to_service()` method with your
|
|
383
|
-
actual connection logic. Optionally, override `health_check()` for custom health verification.
|
|
384
|
-
2. Store the actual connection object (e.g., socket, transport) as an instance attribute in your subclass.
|
|
385
|
-
3. Use `attempt_connection()` to initiate connection attempts; it will handle retries and backoff automatically.
|
|
386
|
-
4. Use `is_connected()` to check connection status.
|
|
387
|
-
|
|
388
|
-
Example:
|
|
389
|
-
class MyConnector(ServiceConnector):
|
|
390
|
-
def connect_to_service(self):
|
|
391
|
-
self.connection = create_socket()
|
|
392
|
-
return self.connection is not None
|
|
393
|
-
|
|
394
|
-
def get_connection(self):
|
|
395
|
-
return self.connection
|
|
396
|
-
|
|
397
|
-
Note:
|
|
398
|
-
The base class does not manage or expose the underlying connection object.
|
|
399
|
-
Your subclass should provide a method or property to access it as needed.
|
|
400
|
-
"""
|
|
401
|
-
|
|
402
|
-
def __init__(
|
|
403
|
-
self,
|
|
404
|
-
service_name: str,
|
|
405
|
-
backoff_strategy: BackoffStrategy = BackoffStrategy.EXPONENTIAL,
|
|
406
|
-
jitter_strategy: JitterStrategy = JitterStrategy.EQUAL,
|
|
407
|
-
):
|
|
408
|
-
self.state = ConnectionState.DISCONNECTED
|
|
409
|
-
self.last_attempt = 0
|
|
410
|
-
self.base_interval = 1
|
|
411
|
-
self.retry_interval = 1
|
|
412
|
-
self.max_retry_interval = 300
|
|
413
|
-
self.failure_count = 0
|
|
414
|
-
self.max_failures_before_circuit_break = 5
|
|
415
|
-
self.circuit_break_duration = 60
|
|
416
|
-
self.circuit_opened_at = None
|
|
417
|
-
self.service_name = service_name
|
|
418
|
-
self.backoff_strategy = backoff_strategy
|
|
419
|
-
self.jitter_strategy = jitter_strategy
|
|
420
|
-
|
|
421
|
-
self._lock = threading.RLock()
|
|
422
|
-
|
|
423
|
-
def connect_to_service(self) -> bool:
|
|
424
|
-
logger.warning(
|
|
425
|
-
f"The connect_to_service() method is not implemented for {self.service_name}, connection will always fail."
|
|
426
|
-
)
|
|
427
|
-
return False
|
|
428
|
-
|
|
429
|
-
def disconnect_from_service(self) -> None:
|
|
430
|
-
"""
|
|
431
|
-
Optional hook to cleanly disconnect / release resources for the service. Default implementation is a no-op.
|
|
432
|
-
Subclasses should override and must be careful about thread-safety; the base class holds _lock which can be
|
|
433
|
-
used.
|
|
434
|
-
"""
|
|
435
|
-
with self._lock:
|
|
436
|
-
logger.debug(f"{self.service_name}: default disconnect_from_service(): no-op")
|
|
437
|
-
self.state = ConnectionState.DISCONNECTED
|
|
438
|
-
return
|
|
439
|
-
|
|
440
|
-
def health_check(self) -> bool:
|
|
441
|
-
logger.warning(
|
|
442
|
-
f"The health_check() method is not implemented for {self.service_name}, check will always return false."
|
|
443
|
-
)
|
|
444
|
-
return False
|
|
445
|
-
|
|
446
|
-
def should_attempt_connection(self) -> bool:
|
|
447
|
-
now = time.monotonic()
|
|
448
|
-
with self._lock:
|
|
449
|
-
if self.state == ConnectionState.CIRCUIT_OPEN:
|
|
450
|
-
assert self.circuit_opened_at is not None
|
|
451
|
-
if now - self.circuit_opened_at > self.circuit_break_duration:
|
|
452
|
-
self.state = ConnectionState.DISCONNECTED
|
|
453
|
-
self.failure_count = 0
|
|
454
|
-
self.retry_interval = 1
|
|
455
|
-
return True
|
|
456
|
-
return False
|
|
457
|
-
return now - self.last_attempt >= self.retry_interval
|
|
458
|
-
|
|
459
|
-
def attempt_connection(self):
|
|
460
|
-
with self._lock:
|
|
461
|
-
current_state = self.state
|
|
462
|
-
|
|
463
|
-
if current_state == ConnectionState.CONNECTED:
|
|
464
|
-
# ensure the CONNECTED state is validated before skipping reconnection attempts
|
|
465
|
-
try:
|
|
466
|
-
healthy = self.health_check()
|
|
467
|
-
except Exception as exc:
|
|
468
|
-
logger.debug(f"health_check raised: {type_name(exc)} – {exc}")
|
|
469
|
-
healthy = False
|
|
470
|
-
|
|
471
|
-
if healthy:
|
|
472
|
-
logger.debug(f"{self.service_name} already connected and healthy")
|
|
473
|
-
return
|
|
474
|
-
|
|
475
|
-
logger.info(
|
|
476
|
-
f"{self.service_name} marked CONNECTED but health_check failed — disconnecting and reconnecting"
|
|
477
|
-
)
|
|
478
|
-
self.state = ConnectionState.DISCONNECTED
|
|
479
|
-
try:
|
|
480
|
-
# ensure the state is updated by disconnect hook (disconnect_from_service should set DISCONNECTED)
|
|
481
|
-
self.disconnect_from_service()
|
|
482
|
-
except Exception as exc:
|
|
483
|
-
if VERBOSE_DEBUG:
|
|
484
|
-
logger.debug(f"Couldn't disconnect from {self.service_name}: {type_name(exc)} – {exc}")
|
|
485
|
-
|
|
486
|
-
with self._lock:
|
|
487
|
-
if not self.should_attempt_connection():
|
|
488
|
-
return
|
|
489
|
-
self.state = ConnectionState.CONNECTING
|
|
490
|
-
self.last_attempt = time.monotonic()
|
|
491
|
-
|
|
492
|
-
try:
|
|
493
|
-
success = self.connect_to_service()
|
|
494
|
-
with self._lock:
|
|
495
|
-
if success:
|
|
496
|
-
self.state = ConnectionState.CONNECTED
|
|
497
|
-
self.failure_count = 0
|
|
498
|
-
self.retry_interval = 1
|
|
499
|
-
logger.debug(f"Successfully connected to service {self.service_name}")
|
|
500
|
-
else:
|
|
501
|
-
self.handle_connection_failure()
|
|
502
|
-
except Exception as exc:
|
|
503
|
-
logger.error(f"Failed to connect to service {self.service_name}: {exc}")
|
|
504
|
-
with self._lock:
|
|
505
|
-
self.handle_connection_failure()
|
|
506
|
-
|
|
507
|
-
def handle_connection_failure(self):
|
|
508
|
-
self.failure_count += 1
|
|
509
|
-
if self.failure_count >= self.max_failures_before_circuit_break:
|
|
510
|
-
self.state = ConnectionState.CIRCUIT_OPEN
|
|
511
|
-
self.circuit_opened_at = time.monotonic()
|
|
512
|
-
logger.warning(
|
|
513
|
-
f"Circuit breaker opened for service {self.service_name} after {self.failure_count} failures"
|
|
514
|
-
)
|
|
515
|
-
else:
|
|
516
|
-
self.state = ConnectionState.DISCONNECTED
|
|
517
|
-
self.retry_interval = calculate_retry_interval(
|
|
518
|
-
self.failure_count,
|
|
519
|
-
self.base_interval,
|
|
520
|
-
self.max_retry_interval,
|
|
521
|
-
self.backoff_strategy,
|
|
522
|
-
self.jitter_strategy,
|
|
523
|
-
)
|
|
524
|
-
logger.debug(f"retry_interval={self.retry_interval}")
|
|
525
|
-
|
|
526
|
-
def is_connected(self) -> bool:
|
|
527
|
-
with self._lock:
|
|
528
|
-
return self.state == ConnectionState.CONNECTED
|
egse/dummy.py
CHANGED
|
@@ -35,7 +35,6 @@ and stopped with:
|
|
|
35
35
|
|
|
36
36
|
from __future__ import annotations
|
|
37
37
|
|
|
38
|
-
import contextlib
|
|
39
38
|
import multiprocessing
|
|
40
39
|
import random
|
|
41
40
|
import select
|
|
@@ -53,14 +52,12 @@ from egse.device import DeviceConnectionError
|
|
|
53
52
|
from egse.device import DeviceConnectionInterface
|
|
54
53
|
from egse.device import DeviceTimeoutError
|
|
55
54
|
from egse.device import DeviceTransport
|
|
56
|
-
from egse.env import bool_env
|
|
57
55
|
from egse.log import logger
|
|
58
56
|
from egse.protocol import CommandProtocol
|
|
59
57
|
from egse.proxy import Proxy
|
|
60
58
|
from egse.system import SignalCatcher
|
|
61
59
|
from egse.system import attrdict
|
|
62
60
|
from egse.system import format_datetime
|
|
63
|
-
from egse.system import type_name
|
|
64
61
|
from egse.zmq_ser import bind_address
|
|
65
62
|
from egse.zmq_ser import connect_address
|
|
66
63
|
|
|
@@ -80,9 +77,6 @@ WRITE_TIMEOUT = 1.0
|
|
|
80
77
|
CONNECT_TIMEOUT = 3.0
|
|
81
78
|
"""The maximum time in seconds to wait for establishing a socket connect."""
|
|
82
79
|
|
|
83
|
-
|
|
84
|
-
VERBOSE_DEBUG = bool_env("VERBOSE_DEBUG", default=False)
|
|
85
|
-
|
|
86
80
|
# Especially DummyCommand and DummyController need to be defined in a known module
|
|
87
81
|
# because those objects are pickled and when de-pickled at the clients side the class
|
|
88
82
|
# definition must be known.
|
|
@@ -122,17 +116,14 @@ def is_dummy_cs_active() -> bool:
|
|
|
122
116
|
|
|
123
117
|
|
|
124
118
|
def is_dummy_dev_active() -> bool:
|
|
125
|
-
if VERBOSE_DEBUG:
|
|
126
|
-
logger.debug("Checking if dummy device is active...")
|
|
127
119
|
try:
|
|
128
120
|
dev = DummyDeviceEthernetInterface(DEV_HOST, DEV_PORT)
|
|
129
121
|
dev.connect()
|
|
130
122
|
rc = dev.trans("ping\n")
|
|
131
123
|
dev.disconnect()
|
|
132
124
|
return rc.decode().strip() == "pong"
|
|
133
|
-
except
|
|
134
|
-
|
|
135
|
-
logger.debug(f"Caught {type_name(exc)}: {exc} - returning False")
|
|
125
|
+
except DeviceConnectionError as exc:
|
|
126
|
+
# logger.error(f"Caught {type_name(exc)}: {exc}")
|
|
136
127
|
return False
|
|
137
128
|
|
|
138
129
|
|
|
@@ -589,18 +580,11 @@ def start_dev():
|
|
|
589
580
|
with socket.socket(socket.AF_INET, socket.SOCK_STREAM) as s:
|
|
590
581
|
s.bind((DEV_HOST, DEV_PORT))
|
|
591
582
|
s.listen()
|
|
592
|
-
s.settimeout(CONNECT_TIMEOUT)
|
|
593
583
|
logger.info(f"Ready to accept connection on {DEV_HOST}:{DEV_PORT}...")
|
|
594
|
-
|
|
595
|
-
with contextlib.suppress(socket.timeout):
|
|
596
|
-
conn, addr = s.accept()
|
|
597
|
-
break
|
|
598
|
-
if killer.term_signal_received:
|
|
599
|
-
return
|
|
584
|
+
conn, addr = s.accept()
|
|
600
585
|
with conn:
|
|
601
586
|
logger.info(f"Accepted connection from {addr}")
|
|
602
587
|
conn.sendall(f"Dummy Device {__version__}".encode())
|
|
603
|
-
conn.settimeout(READ_TIMEOUT)
|
|
604
588
|
try:
|
|
605
589
|
while True:
|
|
606
590
|
error_msg = ""
|
egse/logger/__init__.py
CHANGED
|
@@ -57,7 +57,7 @@ COMMANDER_PORT = settings.get("COMMANDER_PORT", 0) # dynamically assigned by th
|
|
|
57
57
|
_initialised = False # will be set to True in the setup_logging() function
|
|
58
58
|
|
|
59
59
|
|
|
60
|
-
def get_log_file_name()
|
|
60
|
+
def get_log_file_name():
|
|
61
61
|
"""
|
|
62
62
|
Returns the filename of the log file as defined in the Settings or return the default name 'general.log'.
|
|
63
63
|
"""
|
|
@@ -315,7 +315,7 @@ def send_request(command_request: str):
|
|
|
315
315
|
"""Sends a request to the Logger Control Server and waits for a response."""
|
|
316
316
|
|
|
317
317
|
if COMMANDER_PORT == 0:
|
|
318
|
-
endpoint = get_endpoint_from_registry(
|
|
318
|
+
endpoint = get_endpoint_from_registry()
|
|
319
319
|
else:
|
|
320
320
|
endpoint = f"{PROTOCOL}://{HOSTNAME}:{COMMANDER_PORT}"
|
|
321
321
|
|