epics-bridge 1.0.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- epics_bridge/__init__.py +5 -0
- epics_bridge/base_pv_interface.py +83 -0
- epics_bridge/daemon.py +323 -0
- epics_bridge/daemon_status.py +13 -0
- epics_bridge/io.py +130 -0
- epics_bridge/utils.py +39 -0
- epics_bridge-1.0.0.dist-info/METADATA +193 -0
- epics_bridge-1.0.0.dist-info/RECORD +10 -0
- epics_bridge-1.0.0.dist-info/WHEEL +5 -0
- epics_bridge-1.0.0.dist-info/top_level.txt +1 -0
epics_bridge/__init__.py
ADDED
|
@@ -0,0 +1,83 @@
|
|
|
1
|
+
from dataclasses import dataclass, field, fields
|
|
2
|
+
from typing import Dict
|
|
3
|
+
|
|
4
|
+
|
|
5
|
+
@dataclass
|
|
6
|
+
class BasePVInterface:
|
|
7
|
+
"""
|
|
8
|
+
A base configuration class for EPICS PV interfaces.
|
|
9
|
+
|
|
10
|
+
This class handles the dynamic construction of PV names based on provided
|
|
11
|
+
prefixes. It supports inheritance, allowing users to define sets of PVs
|
|
12
|
+
(templates) and instantiate them with specific device prefixes.
|
|
13
|
+
|
|
14
|
+
Attributes:
|
|
15
|
+
prefixes (Dict[str, str]):
|
|
16
|
+
A dictionary of prefixes used to format the PV templates.
|
|
17
|
+
Example: {'sys': 'VAC:01:', 'main': 'PUMP:A:'}
|
|
18
|
+
"""
|
|
19
|
+
|
|
20
|
+
# 1. Configuration: Holds the prefixes used for formatting
|
|
21
|
+
prefixes: Dict[str, str] = field(default_factory=dict)
|
|
22
|
+
|
|
23
|
+
# 2. Default PV Templates (Note: These use the '{main}' key)
|
|
24
|
+
trigger: str = "{main}Trigger"
|
|
25
|
+
heartbeat: str = "{main}Heartbeat"
|
|
26
|
+
busy: str = "{main}Busy"
|
|
27
|
+
task_status: str = "{main}TaskStatus"
|
|
28
|
+
task_duration: str = "{main}TaskDuration"
|
|
29
|
+
|
|
30
|
+
def __post_init__(self):
|
|
31
|
+
"""
|
|
32
|
+
Post-initialization hook.
|
|
33
|
+
Iterates through all string fields in the instance. If a field contains
|
|
34
|
+
Python format placeholders (e.g., "{sys}Name"), it formats the string
|
|
35
|
+
using the provided `prefixes` dictionary.
|
|
36
|
+
"""
|
|
37
|
+
missing_keys = []
|
|
38
|
+
|
|
39
|
+
for f in fields(self):
|
|
40
|
+
# Skip internal configuration fields
|
|
41
|
+
if f.name == "prefixes":
|
|
42
|
+
continue
|
|
43
|
+
|
|
44
|
+
# Get the raw value (the template, e.g., "{sys}Trigger")
|
|
45
|
+
raw_template = getattr(self, f.name)
|
|
46
|
+
|
|
47
|
+
# Only process strings that look like templates
|
|
48
|
+
if isinstance(raw_template, str) and "{" in raw_template:
|
|
49
|
+
try:
|
|
50
|
+
# FIX: Changed 'self.context' to 'self.prefixes'
|
|
51
|
+
formatted_pv = raw_template.format(**self.prefixes)
|
|
52
|
+
setattr(self, f.name, formatted_pv)
|
|
53
|
+
except KeyError as e:
|
|
54
|
+
# Collect errors to show them all at once
|
|
55
|
+
missing_keys.append(f"Field '{f.name}' requires prefix key {e}")
|
|
56
|
+
|
|
57
|
+
if missing_keys:
|
|
58
|
+
raise ValueError(
|
|
59
|
+
"Configuration Error: Missing prefixes for PV templates.\n"
|
|
60
|
+
+ "\n".join(missing_keys)
|
|
61
|
+
)
|
|
62
|
+
|
|
63
|
+
@property
|
|
64
|
+
def as_dict(self) -> Dict[str, str]:
|
|
65
|
+
"""Returns a dictionary of PVs, excluding the internal 'prefixes'."""
|
|
66
|
+
# vars(self) is faster than asdict(self)
|
|
67
|
+
# We use .copy() to ensure we don't modify the actual object
|
|
68
|
+
data = vars(self).copy()
|
|
69
|
+
|
|
70
|
+
# Remove the configuration field so you only get PVs
|
|
71
|
+
if "prefixes" in data:
|
|
72
|
+
del data["prefixes"]
|
|
73
|
+
|
|
74
|
+
return data
|
|
75
|
+
|
|
76
|
+
@property
|
|
77
|
+
def pv_to_attr(self) -> Dict[str, str]:
|
|
78
|
+
"""
|
|
79
|
+
Returns a reverse mapping: {Formatted_PV_Name: Attribute_Name}.
|
|
80
|
+
Example: {'VAC:01:Trigger': 'trigger'}
|
|
81
|
+
"""
|
|
82
|
+
# Invert the dictionary: value becomes key, key becomes value
|
|
83
|
+
return {v: k for k, v in self.as_dict.items()}
|
epics_bridge/daemon.py
ADDED
|
@@ -0,0 +1,323 @@
|
|
|
1
|
+
import logging
|
|
2
|
+
import os
|
|
3
|
+
import threading
|
|
4
|
+
import time
|
|
5
|
+
from abc import ABC, abstractmethod
|
|
6
|
+
from typing import Tuple
|
|
7
|
+
|
|
8
|
+
from p4p.client.thread import Context
|
|
9
|
+
|
|
10
|
+
from .base_pv_interface import BasePVInterface
|
|
11
|
+
from .daemon_status import TaskStatus
|
|
12
|
+
from .io import BridgeIO
|
|
13
|
+
from .utils import Timer
|
|
14
|
+
|
|
15
|
+
logger = logging.getLogger(__name__)
|
|
16
|
+
|
|
17
|
+
|
|
18
|
+
class BridgeDaemon(ABC):
|
|
19
|
+
"""
|
|
20
|
+
Base class for an EPICS Bridge Daemon with High-Availability features.
|
|
21
|
+
|
|
22
|
+
Architecture:
|
|
23
|
+
1. **Main Loop (Control):** Runs on the main thread using the 'main_context'.
|
|
24
|
+
It executes the synchronous control cycle:
|
|
25
|
+
Trigger -> Read -> Logic -> Write -> Ack.
|
|
26
|
+
2. **Heartbeat Loop (Monitor):** Runs on a separate daemon thread with a
|
|
27
|
+
PRIVATE 'hb_context'. This ensures the heartbeat never stalls, even if
|
|
28
|
+
the Main Loop is blocked by heavy network traffic.
|
|
29
|
+
|
|
30
|
+
Safety Features:
|
|
31
|
+
- **Zombie Protection:** The Heartbeat thread acts as a watchdog. It monitors
|
|
32
|
+
the Main Loop's activity timestamp. If the Main Loop hangs (e.g., infinite
|
|
33
|
+
loop or deadlocked IO), the Heartbeat STOPS pulsing to alert external
|
|
34
|
+
supervisors.
|
|
35
|
+
- **Suicide Pact:** If the Main Loop encounters persistent IO failures
|
|
36
|
+
(defined by `max_stuck_cycles`), it voluntarily exits the process to
|
|
37
|
+
allow a system-level restart (Docker/systemd).
|
|
38
|
+
|
|
39
|
+
Attributes:
|
|
40
|
+
cfg (BridgeConfig): Configuration object.
|
|
41
|
+
io (BridgeIO): The synchronous IO handler for the Main Loop.
|
|
42
|
+
"""
|
|
43
|
+
|
|
44
|
+
def __init__(self, interface: BasePVInterface):
|
|
45
|
+
self.interface = interface
|
|
46
|
+
|
|
47
|
+
# 1. Main IO (Control Logic)
|
|
48
|
+
# We assume BridgeIO is the synchronous version (max_workers=1 or blocking)
|
|
49
|
+
self.io = BridgeIO()
|
|
50
|
+
|
|
51
|
+
# 2. State & Concurrency
|
|
52
|
+
self._running = False
|
|
53
|
+
self._last_main_loop_activity = 0.0 # Timestamp for Zombie Protection
|
|
54
|
+
|
|
55
|
+
# 3. Safety Thresholds
|
|
56
|
+
self.max_stall_limit = 10.0 # Time considered normal for the task
|
|
57
|
+
self.max_stuck_cycles = 15 # Approx 15 * poll_interval seconds
|
|
58
|
+
self._stuck_counter = 0
|
|
59
|
+
|
|
60
|
+
# =========================================================================
|
|
61
|
+
# Overridable Hooks
|
|
62
|
+
# =========================================================================
|
|
63
|
+
|
|
64
|
+
def get_poll_rate(self) -> float:
|
|
65
|
+
"""
|
|
66
|
+
[Overridable] Controls the polling rate of the Main Loop.
|
|
67
|
+
Defaults to 1 Hz
|
|
68
|
+
Can be overridden in child classes
|
|
69
|
+
"""
|
|
70
|
+
return 1.0
|
|
71
|
+
|
|
72
|
+
def trigger(self) -> bool:
|
|
73
|
+
"""
|
|
74
|
+
[Overridable] Checks if the control cycle should run.
|
|
75
|
+
|
|
76
|
+
Default Implementation:
|
|
77
|
+
Reads the trigger PV.
|
|
78
|
+
Raises RuntimeError if the read operation fails (triggering stuck counter).
|
|
79
|
+
|
|
80
|
+
Returns:
|
|
81
|
+
bool: True to start `run_task()`, False to sleep.
|
|
82
|
+
"""
|
|
83
|
+
|
|
84
|
+
return self.io.pvget(self.interface.trigger, timeout=1.0)
|
|
85
|
+
|
|
86
|
+
@abstractmethod
|
|
87
|
+
def run_task(self) -> TaskStatus:
|
|
88
|
+
"""
|
|
89
|
+
[Required Override] Core business logic.
|
|
90
|
+
|
|
91
|
+
Args:
|
|
92
|
+
inputs: Dictionary of current values from Input PVs.
|
|
93
|
+
|
|
94
|
+
Returns:
|
|
95
|
+
Dict[str, Any]: Values to write to Output PVs.
|
|
96
|
+
None: If no output is required for this cycle.
|
|
97
|
+
"""
|
|
98
|
+
pass
|
|
99
|
+
|
|
100
|
+
def reset_cycle(self):
|
|
101
|
+
"""
|
|
102
|
+
[Overridable] Resets the handshake/trigger PVs at end of cycle.
|
|
103
|
+
Retries 3 times. If all fail, raises RuntimeError.
|
|
104
|
+
|
|
105
|
+
Returns:
|
|
106
|
+
bool: True if the reset write was successful.
|
|
107
|
+
|
|
108
|
+
Raises:
|
|
109
|
+
RuntimeError: If the write fails after 3 attempts.
|
|
110
|
+
"""
|
|
111
|
+
payload = {self.interface.busy: False, self.interface.trigger: False}
|
|
112
|
+
|
|
113
|
+
for attempt in range(1, 4):
|
|
114
|
+
if self.io.pvput(payload, timeout=2.0):
|
|
115
|
+
return
|
|
116
|
+
logger.warning("Reset cycle failed (Attempt %d/3). Retrying...", attempt)
|
|
117
|
+
time.sleep(0.5) # Brief pause to let network/IOC recover
|
|
118
|
+
|
|
119
|
+
logger.error("Critical: Failed to reset cycle after 3 attempts.")
|
|
120
|
+
raise RuntimeError("Failed to reset cycle PVs (Busy/Trigger) after 3 attempts.")
|
|
121
|
+
|
|
122
|
+
# =========================================================================
|
|
123
|
+
# Lifecycle Management
|
|
124
|
+
# =========================================================================
|
|
125
|
+
|
|
126
|
+
def start(self) -> None:
|
|
127
|
+
"""Starts the Heartbeat thread and enters the blocking Main Loop."""
|
|
128
|
+
self._running = True
|
|
129
|
+
self._last_main_loop_activity = time.time() # Prime the watchdog
|
|
130
|
+
|
|
131
|
+
# Start the Isolated Heartbeat Thread (Daemon)
|
|
132
|
+
# Daemon=True ensures it dies instantly if the main process crashes.
|
|
133
|
+
hb_thread = threading.Thread(
|
|
134
|
+
target=self._heartbeat_worker, daemon=True, name="Bridge_Heartbeat_Isolated"
|
|
135
|
+
)
|
|
136
|
+
hb_thread.start()
|
|
137
|
+
|
|
138
|
+
# Block main thread on the Control Loop
|
|
139
|
+
self._main_loop()
|
|
140
|
+
|
|
141
|
+
def stop(self) -> None:
|
|
142
|
+
"""Signals loops to stop."""
|
|
143
|
+
logger.info("Stopping Daemon...")
|
|
144
|
+
self._running = False
|
|
145
|
+
exit(0)
|
|
146
|
+
|
|
147
|
+
# =========================================================================
|
|
148
|
+
# Main Control Loop (Thread 1)
|
|
149
|
+
# =========================================================================
|
|
150
|
+
|
|
151
|
+
def _main_loop(self) -> None:
|
|
152
|
+
"""
|
|
153
|
+
The Master Control Loop.
|
|
154
|
+
Orchestrates the cycle: Health -> Attempt -> Report -> Sleep.
|
|
155
|
+
"""
|
|
156
|
+
logger.info("Main loop started.")
|
|
157
|
+
|
|
158
|
+
try:
|
|
159
|
+
while self._running:
|
|
160
|
+
# 1. Health Check (Watchdogs & Suicide Pacts)
|
|
161
|
+
self._last_main_loop_activity = time.time()
|
|
162
|
+
|
|
163
|
+
# 2. Attempt Cycle (Trigger -> Execute -> Catch Errors)
|
|
164
|
+
status, duration = self._attempt_cycle()
|
|
165
|
+
|
|
166
|
+
# 3. Report Status (Best Effort)
|
|
167
|
+
self._update_telemetry(status, duration)
|
|
168
|
+
|
|
169
|
+
# 4. Pace the loop
|
|
170
|
+
time.sleep(self.get_poll_rate())
|
|
171
|
+
|
|
172
|
+
except Exception as e:
|
|
173
|
+
# Fatal Error Boundary: Catches infrastructure crashes
|
|
174
|
+
logger.critical(f"Fatal error in main loop: {e}", exc_info=True)
|
|
175
|
+
finally:
|
|
176
|
+
self.stop()
|
|
177
|
+
|
|
178
|
+
def _attempt_cycle(self) -> Tuple[object, float]:
|
|
179
|
+
# 1. Initialize duration safety
|
|
180
|
+
duration = 0.0
|
|
181
|
+
|
|
182
|
+
try:
|
|
183
|
+
if not self.trigger():
|
|
184
|
+
return None, None
|
|
185
|
+
|
|
186
|
+
with Timer() as t:
|
|
187
|
+
status = self._execute_cycle()
|
|
188
|
+
|
|
189
|
+
# Happy path duration
|
|
190
|
+
duration = t.duration
|
|
191
|
+
return status, duration
|
|
192
|
+
|
|
193
|
+
except Exception as e:
|
|
194
|
+
logger.error("Cycle failed: %s", e, exc_info=True)
|
|
195
|
+
# 2. Now safe to return 'duration' (either 0.0 or t.duration if t exists)
|
|
196
|
+
if "t" in locals():
|
|
197
|
+
duration = t.duration
|
|
198
|
+
return TaskStatus.EXCEPTION, duration
|
|
199
|
+
|
|
200
|
+
def _execute_cycle(self) -> TaskStatus:
|
|
201
|
+
"""
|
|
202
|
+
The Action Sequence.
|
|
203
|
+
Uses try...finally to GUARANTEE cleanup.
|
|
204
|
+
"""
|
|
205
|
+
# 1. Set Busy Flag
|
|
206
|
+
if not self.io.pvput({self.interface.busy: True}):
|
|
207
|
+
raise IOError("Failed to set Busy flag at start of cycle.")
|
|
208
|
+
|
|
209
|
+
try:
|
|
210
|
+
# 2. Run User Logic
|
|
211
|
+
return self.run_task()
|
|
212
|
+
finally:
|
|
213
|
+
# 3. Cleanup (Always runs, even on error)
|
|
214
|
+
self.reset_cycle()
|
|
215
|
+
|
|
216
|
+
def _update_telemetry(self, status: object, duration: float) -> None:
|
|
217
|
+
"""Writes status and duration to the I/O interface (Best Effort)."""
|
|
218
|
+
|
|
219
|
+
if status is None or duration is None:
|
|
220
|
+
return
|
|
221
|
+
|
|
222
|
+
self.io.pvput(
|
|
223
|
+
{self.interface.task_status: status, self.interface.task_duration: duration}
|
|
224
|
+
)
|
|
225
|
+
|
|
226
|
+
# =========================================================================
|
|
227
|
+
# Isolated Heartbeat (Thread 2)
|
|
228
|
+
# =========================================================================
|
|
229
|
+
|
|
230
|
+
def _heartbeat_worker(self) -> None:
|
|
231
|
+
"""
|
|
232
|
+
Executes the background Heartbeat and Watchdog logic.
|
|
233
|
+
|
|
234
|
+
This method runs on a separate daemon thread (`Bridge_Heartbeat_Isolated`).
|
|
235
|
+
It serves two critical roles:
|
|
236
|
+
|
|
237
|
+
1. **Liveness Monitor (External):** Toggles a 'heartbeat' PV (0/1) to let
|
|
238
|
+
external monitoring tools (like the IOC or Alarm Handler) know the
|
|
239
|
+
bridge is connected and processing data.
|
|
240
|
+
|
|
241
|
+
2. **Watchdog Terminator (Internal):** Monitors the `_last_main_loop_activity`
|
|
242
|
+
timestamp.
|
|
243
|
+
If the Main Thread gets stuck (deadlock, infinite loop, blocking IO),
|
|
244
|
+
this thread detects the stall and forcibly terminates the process to allow
|
|
245
|
+
supervisors (systemd/Docker) to restart it.
|
|
246
|
+
"""
|
|
247
|
+
logger.info("Starting Isolated Heartbeat Worker...")
|
|
248
|
+
|
|
249
|
+
# Toggle state for the heartbeat (flips between 0 and 1)
|
|
250
|
+
pulse_val = 0
|
|
251
|
+
|
|
252
|
+
with Context("pva") as ctx:
|
|
253
|
+
while self._running:
|
|
254
|
+
is_active = self._is_main_loop_active()
|
|
255
|
+
|
|
256
|
+
if is_active:
|
|
257
|
+
pulse_val = 1 - pulse_val # Toggle value
|
|
258
|
+
|
|
259
|
+
success = self._send_pulse(ctx, pulse_val)
|
|
260
|
+
|
|
261
|
+
if success:
|
|
262
|
+
self._stuck_counter = 0 # Reset fault counter on success
|
|
263
|
+
else:
|
|
264
|
+
# Main loop is fine, but Network/EPICS layer is failing
|
|
265
|
+
self._stuck_counter += 1
|
|
266
|
+
|
|
267
|
+
else:
|
|
268
|
+
# The Main Loop has not updated its timestamp within 'stall_limit'.
|
|
269
|
+
# We stop pulsing to alert external monitors.
|
|
270
|
+
self._stuck_counter += 1
|
|
271
|
+
|
|
272
|
+
logger.warning(
|
|
273
|
+
"Heartbeat Paused: Main Loop Stalled. Faults: %d/%d",
|
|
274
|
+
self._stuck_counter,
|
|
275
|
+
self.max_stuck_cycles,
|
|
276
|
+
)
|
|
277
|
+
|
|
278
|
+
# If the Main Thread has been stuck for too long, we must take action.
|
|
279
|
+
# Since the Main Thread is stuck, it cannot exit itself.
|
|
280
|
+
if self._stuck_counter >= self.max_stuck_cycles:
|
|
281
|
+
self._kill_process()
|
|
282
|
+
|
|
283
|
+
# Pace the heartbeat (1 Hz)
|
|
284
|
+
time.sleep(1.0)
|
|
285
|
+
|
|
286
|
+
logger.info("Heartbeat Worker stopped cleanly.")
|
|
287
|
+
|
|
288
|
+
# =========================================================================
|
|
289
|
+
# Helper Methods (Private)
|
|
290
|
+
# =========================================================================
|
|
291
|
+
|
|
292
|
+
def _is_main_loop_active(self) -> bool:
|
|
293
|
+
"""Calculates if the main loop has updated its timestamp recently."""
|
|
294
|
+
# Dynamic tolerance based on current poll rate
|
|
295
|
+
stall_limit = max(self.get_poll_rate() * 4, self.max_stall_limit)
|
|
296
|
+
time_since_activity = time.time() - self._last_main_loop_activity
|
|
297
|
+
|
|
298
|
+
return time_since_activity < stall_limit
|
|
299
|
+
|
|
300
|
+
def _send_pulse(self, context: Context, value: int) -> bool:
|
|
301
|
+
"""Attempts to write the heartbeat PV. Returns True if successful."""
|
|
302
|
+
try:
|
|
303
|
+
context.put(self.interface.heartbeat, value, wait=True, timeout=1.0)
|
|
304
|
+
return True
|
|
305
|
+
except Exception as e:
|
|
306
|
+
logger.warning("Heartbeat IO Error: %s", e)
|
|
307
|
+
return False
|
|
308
|
+
|
|
309
|
+
def _kill_process(self) -> None:
|
|
310
|
+
"""
|
|
311
|
+
CRITICAL: Use os._exit(), not sys.exit()
|
|
312
|
+
sys.exit() only raises an exception in THIS thread (the heartbeat thread).
|
|
313
|
+
os._exit() tells the OS to immediately destroy the entire process.
|
|
314
|
+
This bypasses 'finally' blocks and cleanup handlers, which is
|
|
315
|
+
necessary because the Main Thread is likely deadlocked.
|
|
316
|
+
"""
|
|
317
|
+
logger.critical(
|
|
318
|
+
"Watchdog Triggered: Main Loop stuck for %d cycles. "
|
|
319
|
+
"initiating HARD KILL via os._exit(1).",
|
|
320
|
+
self._stuck_counter,
|
|
321
|
+
)
|
|
322
|
+
|
|
323
|
+
os._exit(1)
|
|
@@ -0,0 +1,13 @@
|
|
|
1
|
+
from enum import IntEnum
|
|
2
|
+
|
|
3
|
+
|
|
4
|
+
class TaskStatus(IntEnum):
|
|
5
|
+
"""
|
|
6
|
+
Simple status tracking for task completion.
|
|
7
|
+
Maps to an EPICS mbbi record.
|
|
8
|
+
"""
|
|
9
|
+
|
|
10
|
+
SUCCESS = 0 # ZRST="Success", ZRSV=NO_ALARM
|
|
11
|
+
LOGIC_FAILURE = 1 # ONST="Task Fail", ONSV=MINOR (Business logic said no)
|
|
12
|
+
IO_FAILURE = 2 # TWST="IO Fail", ONSV=MINOR (Network/IO operation failed)
|
|
13
|
+
EXCEPTION = 3 # THST="Code Crash", ONSV=MAJOR (Python traceback occurred)
|
epics_bridge/io.py
ADDED
|
@@ -0,0 +1,130 @@
|
|
|
1
|
+
import logging
|
|
2
|
+
from typing import Any, Dict, List, Optional, Union
|
|
3
|
+
|
|
4
|
+
from p4p.client.thread import Context, Value
|
|
5
|
+
|
|
6
|
+
from .utils import unwrap_p4p_structure
|
|
7
|
+
|
|
8
|
+
logger = logging.getLogger(__name__)
|
|
9
|
+
|
|
10
|
+
|
|
11
|
+
class BridgeIO:
|
|
12
|
+
"""
|
|
13
|
+
Handles synchronous Input/Output operations for Process Variables (PVs) via p4p.
|
|
14
|
+
|
|
15
|
+
This class serves as the abstraction layer between the logical pv_names used in
|
|
16
|
+
the application (e.g., 'motor_position') and the actual EPICS PV strings
|
|
17
|
+
(e.g., 'IOC:SYS:MOTOR:POS').
|
|
18
|
+
|
|
19
|
+
Design Philosophy:
|
|
20
|
+
- **Synchronous Execution:** All methods block until the operation completes
|
|
21
|
+
or times out. This ensures the caller has absolute control over flow.
|
|
22
|
+
- **Fail-Safe:** Errors (disconnects, timeouts) are caught and logged,
|
|
23
|
+
returning safe fallback values (None/False) rather than crashing the thread.
|
|
24
|
+
|
|
25
|
+
Attributes:
|
|
26
|
+
cfg (BridgeConfig): Configuration object containing PV definitions.
|
|
27
|
+
ctx (Context): The active p4p client context.
|
|
28
|
+
pv_map (Dict[str, str]): A unified lookup map of {logical_name: full_pv_name}.
|
|
29
|
+
"""
|
|
30
|
+
|
|
31
|
+
def __init__(self):
|
|
32
|
+
self.ctx = Context("pva")
|
|
33
|
+
|
|
34
|
+
def pvget(
|
|
35
|
+
self, pv_names: Union[str, List[str]], timeout: float = 5.0, raw: bool = False
|
|
36
|
+
) -> Union[Any, Dict[str, Optional[Any]]]:
|
|
37
|
+
"""
|
|
38
|
+
Reads PVs synchronously. Supports both single-value and batch reads.
|
|
39
|
+
|
|
40
|
+
Args:
|
|
41
|
+
pv_names: A single logical name (str) OR a list of pv_names (List[str]).
|
|
42
|
+
timeout: Maximum seconds to wait for the network response.
|
|
43
|
+
|
|
44
|
+
|
|
45
|
+
Returns:
|
|
46
|
+
- If 'pv_names' is a str:
|
|
47
|
+
Returns the single value (or None on failure).
|
|
48
|
+
- If 'pv_names' is a list:
|
|
49
|
+
Returns {name: value} dict. None values indicate failure.
|
|
50
|
+
"""
|
|
51
|
+
# 1. Normalize Input
|
|
52
|
+
is_single_value = isinstance(pv_names, str)
|
|
53
|
+
if is_single_value:
|
|
54
|
+
pv_names_list = [pv_names]
|
|
55
|
+
else:
|
|
56
|
+
pv_names_list = pv_names
|
|
57
|
+
|
|
58
|
+
if not pv_names_list:
|
|
59
|
+
return None if is_single_value else {}
|
|
60
|
+
|
|
61
|
+
# 2. Execute Read
|
|
62
|
+
try:
|
|
63
|
+
# throw=False returns Exception objects in the list for specific failures
|
|
64
|
+
values: List[Union[Value, Exception]] = self.ctx.get(
|
|
65
|
+
pv_names_list, throw=False, timeout=timeout
|
|
66
|
+
)
|
|
67
|
+
except Exception as e:
|
|
68
|
+
# Catastrophic failure (Context down, etc.)
|
|
69
|
+
logger.error("Batch read failure: %s", e)
|
|
70
|
+
return None if is_single_value else {n: None for n in pv_names_list}
|
|
71
|
+
|
|
72
|
+
if not raw:
|
|
73
|
+
values = self._convert_epics_to_python_types(values)
|
|
74
|
+
|
|
75
|
+
# 4. Return (Polymorphic)
|
|
76
|
+
if is_single_value:
|
|
77
|
+
return values[0]
|
|
78
|
+
|
|
79
|
+
return dict(zip(pv_names_list, values))
|
|
80
|
+
|
|
81
|
+
def pvput(self, data: Dict[str, Any], timeout: float = 5.0) -> bool:
|
|
82
|
+
"""
|
|
83
|
+
Writes to a batch of PVs synchronously.
|
|
84
|
+
|
|
85
|
+
Blocks until the server confirms the write.
|
|
86
|
+
This guarantees that when this function returns True,
|
|
87
|
+
the value has reached the IOC.
|
|
88
|
+
|
|
89
|
+
Args:
|
|
90
|
+
data: A dictionary of {logical_name: value_to_write}.
|
|
91
|
+
timeout: Maximum seconds to wait for write confirmation.
|
|
92
|
+
|
|
93
|
+
Returns:
|
|
94
|
+
True: If the write was successful and acknowledged.
|
|
95
|
+
False: If the write timed out or failed (e.g., PV not writable).
|
|
96
|
+
"""
|
|
97
|
+
if not data:
|
|
98
|
+
return False
|
|
99
|
+
|
|
100
|
+
pvs = []
|
|
101
|
+
vals = []
|
|
102
|
+
for k, v in data.items():
|
|
103
|
+
pvs.append(k)
|
|
104
|
+
vals.append(v)
|
|
105
|
+
|
|
106
|
+
try:
|
|
107
|
+
# Block here. wait=True ensures we wait for the server's handshake.
|
|
108
|
+
self.ctx.put(pvs, vals, wait=True, timeout=timeout)
|
|
109
|
+
return True
|
|
110
|
+
|
|
111
|
+
except TimeoutError:
|
|
112
|
+
# The server received the request but didn't Ack in time,
|
|
113
|
+
# or the network dropped the packet.
|
|
114
|
+
logger.error("IO Timeout: Write operation stuck for >%.1fs.", timeout)
|
|
115
|
+
return False
|
|
116
|
+
|
|
117
|
+
except Exception as e:
|
|
118
|
+
# Catches disconnection errors, permission denied, or type mismatches.
|
|
119
|
+
logger.error("IO Error during write: %s", e)
|
|
120
|
+
return False
|
|
121
|
+
|
|
122
|
+
def _convert_epics_to_python_types(self, raw_values):
|
|
123
|
+
processed_values = []
|
|
124
|
+
for raw_val in raw_values:
|
|
125
|
+
if isinstance(raw_val, Exception):
|
|
126
|
+
processed_values.append(None)
|
|
127
|
+
else:
|
|
128
|
+
processed_values.append(unwrap_p4p_structure(raw_val))
|
|
129
|
+
|
|
130
|
+
return processed_values
|
epics_bridge/utils.py
ADDED
|
@@ -0,0 +1,39 @@
|
|
|
1
|
+
import logging
|
|
2
|
+
import time
|
|
3
|
+
from typing import Any
|
|
4
|
+
|
|
5
|
+
from p4p.nt.enum import ntenum
|
|
6
|
+
from p4p.nt.ndarray import ntndarray
|
|
7
|
+
from p4p.nt.scalar import ntbool, ntfloat, ntint, ntnumericarray, ntstr, ntstringarray
|
|
8
|
+
|
|
9
|
+
logger = logging.getLogger(__name__)
|
|
10
|
+
|
|
11
|
+
|
|
12
|
+
class Timer:
|
|
13
|
+
def __enter__(self):
|
|
14
|
+
self.start = time.perf_counter()
|
|
15
|
+
return self
|
|
16
|
+
|
|
17
|
+
def __exit__(self, exc_type, exc_value, traceback):
|
|
18
|
+
self.end = time.perf_counter()
|
|
19
|
+
self.duration = self.end - self.start
|
|
20
|
+
|
|
21
|
+
|
|
22
|
+
def unwrap_p4p_structure(res: Any) -> Any:
|
|
23
|
+
"""
|
|
24
|
+
Unwraps P4P structures to get clean Python types.
|
|
25
|
+
"""
|
|
26
|
+
raw_data = res.raw
|
|
27
|
+
raw_dict = raw_data.todict()
|
|
28
|
+
# logger.debug("Unwrapping object type: %s", type(res))
|
|
29
|
+
if isinstance(res, (ntbool, ntint, ntfloat, ntnumericarray, ntstr, ntstringarray)):
|
|
30
|
+
val = raw_dict["value"]
|
|
31
|
+
elif isinstance(res, ntenum):
|
|
32
|
+
val = raw_dict["value"]["index"]
|
|
33
|
+
elif isinstance(res, ntndarray):
|
|
34
|
+
val = raw_dict["value"]
|
|
35
|
+
else:
|
|
36
|
+
val = None
|
|
37
|
+
|
|
38
|
+
# logger.debug("Extracted %s from %s", val, raw_dict)
|
|
39
|
+
return val
|
|
@@ -0,0 +1,193 @@
|
|
|
1
|
+
Metadata-Version: 2.4
|
|
2
|
+
Name: epics-bridge
|
|
3
|
+
Version: 1.0.0
|
|
4
|
+
Summary: A generic bridge between EPICS IOCs and Python logic.
|
|
5
|
+
Author-email: Hugo Valim <hugo.valim@ess.eu>
|
|
6
|
+
Requires-Python: >=3.9
|
|
7
|
+
Description-Content-Type: text/markdown
|
|
8
|
+
Requires-Dist: numpy
|
|
9
|
+
Requires-Dist: p4p
|
|
10
|
+
|
|
11
|
+
# EPICS Bridge
|
|
12
|
+
|
|
13
|
+

|
|
14
|
+

|
|
15
|
+

|
|
16
|
+
|
|
17
|
+
**EPICS Bridge** is a high-availability Python framework designed for implementing a robust EPICS-Python interface. It provides a structured environment for bridging external control logic with the EPICS control system, emphasizing synchronous execution, fault tolerance, and strict process monitoring.
|
|
18
|
+
|
|
19
|
+
This library addresses the common reliability challenges like preventing silent stalls ("zombie processes") and handling network IO failures deterministically.
|
|
20
|
+
|
|
21
|
+
|
|
22
|
+
## System Architecture
|
|
23
|
+
|
|
24
|
+
The core of `epics-bridge` relies on a **Twin-Thread Architecture** that decouples the control logic from the monitoring signal.
|
|
25
|
+
|
|
26
|
+
### 1. Synchronous Control Loop (Main Thread)
|
|
27
|
+
The primary thread executes the user-defined logic in a strict, synchronous cycle:
|
|
28
|
+
1. **Trigger:** Waits for an input event or timer.
|
|
29
|
+
2. **Run Task:** Executes user-defined task
|
|
30
|
+
3. **Acknowledge:** Updates the task status and completes the handshake.
|
|
31
|
+
|
|
32
|
+
### 2. Isolated Heartbeat Monitor (Daemon Thread)
|
|
33
|
+
A separate, isolated thread acts as an internal watchdog. It monitors the activity timestamp of the Main Thread.
|
|
34
|
+
* **Operational:** Pulses the `Heartbeat` PV as long as the Main Thread is active.
|
|
35
|
+
* **Stalled (Zombie Protection):** If the Main Thread hangs (e.g., infinite loop, deadlocked IO) for longer than the defined tolerance, the Heartbeat thread ceases pulsing immediately. This alerts external watchdogs (e.g., the IOC or alarm handler) that the process is unresponsive.
|
|
36
|
+
|
|
37
|
+
### 3. Automatic Recovery ("Suicide Pact")
|
|
38
|
+
To support containerized environments (Docker, Kubernetes) or systemd supervisors, the daemon implements a fail-fast mechanism. If network connectivity is lost or IO errors persist beyond a configurable threshold (`max_stuck_cycles`), the process voluntarily terminates (`exit(0)`). This allows the external supervisor to perform a clean restart of the service.
|
|
39
|
+
|
|
40
|
+
|
|
41
|
+
### 4. Logger
|
|
42
|
+
Output important messages in the daemon shell to a configured log file.
|
|
43
|
+
|
|
44
|
+
|
|
45
|
+
|
|
46
|
+
## Installation
|
|
47
|
+
|
|
48
|
+
```bash
|
|
49
|
+
# Install the package
|
|
50
|
+
pip install .
|
|
51
|
+
|
|
52
|
+
# Install test dependencies
|
|
53
|
+
pip install -r requirements-test.txt
|
|
54
|
+
```
|
|
55
|
+
|
|
56
|
+
|
|
57
|
+
|
|
58
|
+
|
|
59
|
+
## Project Structure
|
|
60
|
+
|
|
61
|
+
- **epics_bridge.daemon**
|
|
62
|
+
Main control loop, heartbeat logic, and failure handling
|
|
63
|
+
|
|
64
|
+
- **epics_bridge.io**
|
|
65
|
+
Synchronous P4P client wrapper with strict error handling
|
|
66
|
+
|
|
67
|
+
- **epics_bridge.base_pv_interface**
|
|
68
|
+
PV template definitions and prefix validation
|
|
69
|
+
|
|
70
|
+
- **epics_bridge.utils**
|
|
71
|
+
Utilities for converting P4P data into native Python types
|
|
72
|
+
|
|
73
|
+
|
|
74
|
+
---
|
|
75
|
+
## Quick Start
|
|
76
|
+
### 1. EPICS Interface
|
|
77
|
+
There should be a standard epics db to handle the basic functionalities of the daemon and any amount of specialized dbs to fulfill the intended functionality.
|
|
78
|
+
|
|
79
|
+
The standard db should always be loaded by the IOC that interfaces with the daemon.
|
|
80
|
+
These are its contents:
|
|
81
|
+
|
|
82
|
+
```epics
|
|
83
|
+
record(bo, "$(P)Trigger") {
|
|
84
|
+
field(DESC, "Start Task")
|
|
85
|
+
field(ZNAM, "Idle")
|
|
86
|
+
field(ONAM, "Run")
|
|
87
|
+
}
|
|
88
|
+
|
|
89
|
+
record(bi, "$(P)Busy") {
|
|
90
|
+
field(DESC, "Task Running Status")
|
|
91
|
+
field(ZNAM, "Idle")
|
|
92
|
+
field(ONAM, "Busy")
|
|
93
|
+
}
|
|
94
|
+
|
|
95
|
+
record(bi, "$(P)Heartbeat") {
|
|
96
|
+
field(DESC, "Daemon Heartbeat")
|
|
97
|
+
}
|
|
98
|
+
|
|
99
|
+
record(mbbi, "$(P)TaskStatus") {
|
|
100
|
+
field(DESC, "Last Cycle Result")
|
|
101
|
+
field(DTYP, "Raw Soft Channel")
|
|
102
|
+
|
|
103
|
+
# State 0: Success (Green)
|
|
104
|
+
field(ZRVL, "0")
|
|
105
|
+
field(ZRST, "Success")
|
|
106
|
+
field(ZRSV, "NO_ALARM")
|
|
107
|
+
|
|
108
|
+
# State 1: Logic Failure (Yellow - e.g. Interlock)
|
|
109
|
+
field(ONVL, "1")
|
|
110
|
+
field(ONST, "Task Fail")
|
|
111
|
+
field(ONSV, "MINOR")
|
|
112
|
+
|
|
113
|
+
# State 2: EPICS IO Failure (Yellow - e.g. PV Read/Write Error)
|
|
114
|
+
field(TWVL, "2")
|
|
115
|
+
field(TWST, "IO Failure")
|
|
116
|
+
field(TWSV, "MINOR")
|
|
117
|
+
|
|
118
|
+
# State 3: Exception (Red - Software/Hardware Crash)
|
|
119
|
+
field(THVL, "3")
|
|
120
|
+
field(THST, "Code Crash")
|
|
121
|
+
field(THSV, "MAJOR")
|
|
122
|
+
}
|
|
123
|
+
|
|
124
|
+
record(ai, "$(P)TaskDuration") {
|
|
125
|
+
field(DESC, "Task duration")
|
|
126
|
+
field(PREC, "2")
|
|
127
|
+
field(EGU, "s")
|
|
128
|
+
}
|
|
129
|
+
```
|
|
130
|
+
|
|
131
|
+
|
|
132
|
+
### 2. Define a Python PV Interface
|
|
133
|
+
|
|
134
|
+
Use a dataclass to define EPICS PV templates.
|
|
135
|
+
Standard PVs (trigger, busy, heartbeat, task_status) are provided automatically.
|
|
136
|
+
|
|
137
|
+
```python
|
|
138
|
+
from dataclasses import dataclass
|
|
139
|
+
from epics_bridge.base_pv_interface import BasePVInterface
|
|
140
|
+
|
|
141
|
+
@dataclass
|
|
142
|
+
class MotorInterface(BasePVInterface):
|
|
143
|
+
position_rbv: str = "{main}Pos:RBV"
|
|
144
|
+
velocity_sp: str = "{main}Vel:SP"
|
|
145
|
+
temperature: str = "{sys}Temp:Mon"
|
|
146
|
+
|
|
147
|
+
```
|
|
148
|
+
|
|
149
|
+
### 3. Implement Control Logic
|
|
150
|
+
|
|
151
|
+
Subclass BridgeDaemon and implement the synchronous execute() method.
|
|
152
|
+
|
|
153
|
+
```python
|
|
154
|
+
from epics_bridge.daemon import BridgeDaemon, TaskStatus
|
|
155
|
+
|
|
156
|
+
class MotorControlDaemon(BridgeDaemon):
|
|
157
|
+
def run_task(self, inputs=None) -> TaskStatus:
|
|
158
|
+
velocity = self.io.pvget(self.interface.velocity_sp)
|
|
159
|
+
|
|
160
|
+
if velocity is None:
|
|
161
|
+
return TaskStatus.ERROR
|
|
162
|
+
|
|
163
|
+
new_position = velocity * 0.5
|
|
164
|
+
|
|
165
|
+
self.io.pvput({
|
|
166
|
+
self.interface.position_rbv: new_position
|
|
167
|
+
})
|
|
168
|
+
|
|
169
|
+
return TaskStatus.DONE
|
|
170
|
+
```
|
|
171
|
+
|
|
172
|
+
### 4. Run the Daemon
|
|
173
|
+
|
|
174
|
+
```python
|
|
175
|
+
|
|
176
|
+
def main():
|
|
177
|
+
|
|
178
|
+
prefixes = {
|
|
179
|
+
"main": "IOC:MOTOR:01:",
|
|
180
|
+
"sys": "IOC:SYS:"
|
|
181
|
+
}
|
|
182
|
+
|
|
183
|
+
interface = MotorInterface(prefixes=prefixes)
|
|
184
|
+
|
|
185
|
+
daemon = MotorControlDaemon(
|
|
186
|
+
interface=interface,
|
|
187
|
+
)
|
|
188
|
+
|
|
189
|
+
daemon.start()
|
|
190
|
+
|
|
191
|
+
if __name__ == "__main__":
|
|
192
|
+
main()
|
|
193
|
+
```
|
|
@@ -0,0 +1,10 @@
|
|
|
1
|
+
epics_bridge/__init__.py,sha256=nOU5u5m8LjRkq_eH69p2OY7OYZF-FBoJyZC7dBO-MVg,179
|
|
2
|
+
epics_bridge/base_pv_interface.py,sha256=7Q_r2y7ChuFmPKw500CODXnjE2iF7kVbj50VEeLASjg,3022
|
|
3
|
+
epics_bridge/daemon.py,sha256=DZfI0Ayhr7m4Rvjyd3g0PVKVC8FhGsH7Yf49ySiDaAo,11802
|
|
4
|
+
epics_bridge/daemon_status.py,sha256=Q9BiV96eHPSo82xovOOm5wJlgCa2bSyyc2Ivna58NCA,446
|
|
5
|
+
epics_bridge/io.py,sha256=QOyALzdwM_Yf2JAn1x-M7g_RUcoLZ9fRPGKnei3383s,4550
|
|
6
|
+
epics_bridge/utils.py,sha256=kdHqm-EwTK3M0TcFXS-nJrOl6fJgn0u139E1OCs3Knk,1085
|
|
7
|
+
epics_bridge-1.0.0.dist-info/METADATA,sha256=lVrwj8YzEejqcSeD9tUgwqruTtm3phpPS2M1KG-krcE,5552
|
|
8
|
+
epics_bridge-1.0.0.dist-info/WHEEL,sha256=wUyA8OaulRlbfwMtmQsvNngGrxQHAvkKcvRmdizlJi0,92
|
|
9
|
+
epics_bridge-1.0.0.dist-info/top_level.txt,sha256=Ps28BHqERpyKVNk5Bhk-KVFobpGQNQJ-Q7fqFYc9rvo,13
|
|
10
|
+
epics_bridge-1.0.0.dist-info/RECORD,,
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
epics_bridge
|