osiris-agent 0.3.76__tar.gz → 0.3.78__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {osiris_agent-0.3.76 → osiris_agent-0.3.78}/PKG-INFO +1 -1
- {osiris_agent-0.3.76 → osiris_agent-0.3.78}/osiris_agent/__init__.py +1 -1
- osiris_agent-0.3.78/osiris_agent/agent_node.py +867 -0
- {osiris_agent-0.3.76 → osiris_agent-0.3.78}/osiris_agent.egg-info/PKG-INFO +1 -1
- {osiris_agent-0.3.76 → osiris_agent-0.3.78}/setup.py +1 -1
- osiris_agent-0.3.76/osiris_agent/agent_node.py +0 -268
- {osiris_agent-0.3.76 → osiris_agent-0.3.78}/LICENSE +0 -0
- {osiris_agent-0.3.76 → osiris_agent-0.3.78}/README.md +0 -0
- {osiris_agent-0.3.76 → osiris_agent-0.3.78}/osiris_agent/bt_collector.py +0 -0
- {osiris_agent-0.3.76 → osiris_agent-0.3.78}/osiris_agent/ros2_control_collector.py +0 -0
- {osiris_agent-0.3.76 → osiris_agent-0.3.78}/osiris_agent/tf_tree_collector.py +0 -0
- {osiris_agent-0.3.76 → osiris_agent-0.3.78}/osiris_agent.egg-info/SOURCES.txt +0 -0
- {osiris_agent-0.3.76 → osiris_agent-0.3.78}/osiris_agent.egg-info/dependency_links.txt +0 -0
- {osiris_agent-0.3.76 → osiris_agent-0.3.78}/osiris_agent.egg-info/entry_points.txt +0 -0
- {osiris_agent-0.3.76 → osiris_agent-0.3.78}/osiris_agent.egg-info/requires.txt +0 -0
- {osiris_agent-0.3.76 → osiris_agent-0.3.78}/osiris_agent.egg-info/top_level.txt +0 -0
- {osiris_agent-0.3.76 → osiris_agent-0.3.78}/setup.cfg +0 -0
- {osiris_agent-0.3.76 → osiris_agent-0.3.78}/tests/test_agent_node.py +0 -0
|
@@ -0,0 +1,867 @@
|
|
|
1
|
+
import asyncio
|
|
2
|
+
import os
|
|
3
|
+
import random
|
|
4
|
+
import threading
|
|
5
|
+
import time
|
|
6
|
+
from collections import deque
|
|
7
|
+
|
|
8
|
+
import psutil
|
|
9
|
+
import rclpy
|
|
10
|
+
import websockets
|
|
11
|
+
import json
|
|
12
|
+
|
|
13
|
+
from rcl_interfaces.srv import GetParameters, ListParameters
|
|
14
|
+
from rclpy.node import Node
|
|
15
|
+
from rclpy.parameter import parameter_value_to_python
|
|
16
|
+
from rclpy.qos import QoSProfile
|
|
17
|
+
from rosidl_runtime_py import message_to_ordereddict
|
|
18
|
+
from rosidl_runtime_py.utilities import get_message
|
|
19
|
+
|
|
20
|
+
from osiris_agent import __version__ as AGENT_VERSION
|
|
21
|
+
from .ros2_control_collector import Ros2ControlCollector
|
|
22
|
+
from .tf_tree_collector import TfTreeCollector
|
|
23
|
+
|
|
24
|
+
# ──────────────────────────────────────────────
|
|
25
|
+
# Constants
|
|
26
|
+
# ──────────────────────────────────────────────
|
|
27
|
+
GRAPH_CHECK_INTERVAL = 2.0 # seconds between graph polls
|
|
28
|
+
TOPIC_BATCH_SIZE = 10 # max topics enriched (deep-scan) per tick
|
|
29
|
+
TELEMETRY_INTERVAL = 1.0 # seconds between telemetry samples
|
|
30
|
+
SERVICE_SCAN_INTERVAL = 30.0 # seconds between service graph scans
|
|
31
|
+
PARAMETER_REFRESH_INTERVAL = 60.0 # seconds between retries for nodes with no params yet
|
|
32
|
+
MAX_SUBSCRIPTIONS = 100 # hard cap on gateway-requested topic subs
|
|
33
|
+
RECONNECT_INITIAL_DELAY = 1 # seconds
|
|
34
|
+
RECONNECT_MAX_DELAY = 30 # seconds
|
|
35
|
+
|
|
36
|
+
# Services to suppress from graph output (internal ROS2 plumbing)
|
|
37
|
+
_SUPPRESSED_SERVICE_PREFIXES = ('/ros2cli_daemon',)
|
|
38
|
+
|
|
39
|
+
|
|
40
|
+
class WebBridge(Node):
|
|
41
|
+
|
|
42
|
+
def __init__(self):
|
|
43
|
+
super().__init__('osiris_node')
|
|
44
|
+
|
|
45
|
+
auth_token = os.environ.get('OSIRIS_AUTH_TOKEN')
|
|
46
|
+
if not auth_token:
|
|
47
|
+
raise ValueError("OSIRIS_AUTH_TOKEN environment variable must be set")
|
|
48
|
+
|
|
49
|
+
# Declare tunable parameters
|
|
50
|
+
self.declare_parameter('graph_check_interval', GRAPH_CHECK_INTERVAL)
|
|
51
|
+
self.declare_parameter('topic_batch_size', TOPIC_BATCH_SIZE)
|
|
52
|
+
self.declare_parameter('telemetry_interval', TELEMETRY_INTERVAL)
|
|
53
|
+
self.declare_parameter('tf_tree_enabled', False)
|
|
54
|
+
|
|
55
|
+
base_url = os.environ.get('OSIRIS_WS_URL', 'wss://osiris-gateway.fly.dev')
|
|
56
|
+
self.ws_url = f'{base_url}?robot=true&token={auth_token}'
|
|
57
|
+
# self.ws_url = f'ws://host.docker.internal:8080?robot=true&token={auth_token}'
|
|
58
|
+
|
|
59
|
+
self.ws = None
|
|
60
|
+
self.loop = None
|
|
61
|
+
self._send_queue: asyncio.Queue | None = None
|
|
62
|
+
|
|
63
|
+
# ── Topic subscriptions (gateway-requested) ──────────────────────────
|
|
64
|
+
self._topic_subs: dict[str, rclpy.subscription.Subscription] = {}
|
|
65
|
+
self._topic_subs_lock = threading.Lock()
|
|
66
|
+
self._topic_last_timestamp: dict[str, float] = {}
|
|
67
|
+
self._topic_rate_history: dict[str, deque] = {}
|
|
68
|
+
self._rate_history_depth = 8
|
|
69
|
+
|
|
70
|
+
# ── Existence caches (set of fully-qualified names) ───────────────────
|
|
71
|
+
self._active_nodes: set[str] = set()
|
|
72
|
+
self._active_topics: set[str] = set()
|
|
73
|
+
self._active_services: dict[str, str] = {}
|
|
74
|
+
self._active_actions: set[str] = set()
|
|
75
|
+
|
|
76
|
+
# ── Count sentinels (cheap change detection) ─────────────────────────
|
|
77
|
+
self._topic_counts: dict[str, tuple[int, int]] = {} # topic → (pub_n, sub_n)
|
|
78
|
+
|
|
79
|
+
# ── Relation caches (populated by Tier-2 enrichment) ─────────────────
|
|
80
|
+
self._topic_relations: dict[str, dict] = {}
|
|
81
|
+
|
|
82
|
+
# ── Enrichment pending queues ─────────────────────────────────────────
|
|
83
|
+
self._pending_topic_enrichment: set[str] = set()
|
|
84
|
+
|
|
85
|
+
# ── Parameters (lazy-loaded, async) ──────────────────────────────────
|
|
86
|
+
self._node_parameter_cache: dict[str, dict] = {}
|
|
87
|
+
self._pending_param_fetches: set[str] = set()
|
|
88
|
+
|
|
89
|
+
# ── Snapshot & dirty-flag ─────────────────────────────────────────────
|
|
90
|
+
self._last_sent_nodes: dict | None = None
|
|
91
|
+
self._last_sent_topics: dict | None = None
|
|
92
|
+
self._last_sent_actions: dict | None = None
|
|
93
|
+
self._last_sent_services: dict | None = None
|
|
94
|
+
self._graph_dirty = False
|
|
95
|
+
|
|
96
|
+
# ── Service scan throttle ─────────────────────────────────────────────
|
|
97
|
+
self._last_service_scan: float = 0.0
|
|
98
|
+
self._service_rescan_ticks: int = 0
|
|
99
|
+
|
|
100
|
+
# ── Initial scan synchronization ──────────────────────────────────────
|
|
101
|
+
self._initial_scan_complete = threading.Event()
|
|
102
|
+
self._first_graph_check_done = False
|
|
103
|
+
|
|
104
|
+
# ── Telemetry ─────────────────────────────────────────────────────────
|
|
105
|
+
self._telemetry_enabled = True
|
|
106
|
+
self._last_disk_io = None
|
|
107
|
+
self._last_net_io = None
|
|
108
|
+
self._last_io_time: float | None = None
|
|
109
|
+
self._cpu_history: deque = deque(maxlen=900) # 15 min at 1 Hz
|
|
110
|
+
psutil.cpu_percent(interval=None) # prime — first call always returns 0.0
|
|
111
|
+
|
|
112
|
+
# ── Collectors ────────────────────────────────────────────────────────
|
|
113
|
+
self._ros2_control = Ros2ControlCollector(
|
|
114
|
+
node=self,
|
|
115
|
+
event_callback=self._on_ros2_control_event,
|
|
116
|
+
logger=self.get_logger(),
|
|
117
|
+
)
|
|
118
|
+
_tf_tree_enabled = self.get_parameter('tf_tree_enabled').get_parameter_value().bool_value
|
|
119
|
+
self._tf_tree = TfTreeCollector(
|
|
120
|
+
node=self,
|
|
121
|
+
event_callback=self._on_tf_tree_event,
|
|
122
|
+
logger=self.get_logger(),
|
|
123
|
+
) if _tf_tree_enabled else None
|
|
124
|
+
|
|
125
|
+
# ── Timers ────────────────────────────────────────────────────────────
|
|
126
|
+
_graph_interval = self.get_parameter('graph_check_interval').get_parameter_value().double_value
|
|
127
|
+
self._topic_batch_size = self.get_parameter('topic_batch_size').get_parameter_value().integer_value
|
|
128
|
+
self.create_timer(_graph_interval, self._check_graph_changes)
|
|
129
|
+
# NOTE: _collect_telemetry and _refresh_empty_param_caches timers are
|
|
130
|
+
# NOT wired yet — held for bisect step 4e (enable polling).
|
|
131
|
+
|
|
132
|
+
# ── WebSocket thread ──────────────────────────────────────────────────
|
|
133
|
+
threading.Thread(target=self._run_ws_client, daemon=True).start()
|
|
134
|
+
|
|
135
|
+
self.get_logger().info(
|
|
136
|
+
f"🚀 Osiris agent v{AGENT_VERSION} — bisect 4c: state hydration + collectors (inert)"
|
|
137
|
+
)
|
|
138
|
+
|
|
139
|
+
# ──────────────────────────────────────────────
|
|
140
|
+
# WebSocket client
|
|
141
|
+
# ──────────────────────────────────────────────
|
|
142
|
+
|
|
143
|
+
def _run_ws_client(self):
|
|
144
|
+
self.loop = asyncio.new_event_loop()
|
|
145
|
+
asyncio.set_event_loop(self.loop)
|
|
146
|
+
self._send_queue = asyncio.Queue()
|
|
147
|
+
self.loop.run_until_complete(self._client_loop_with_reconnect())
|
|
148
|
+
|
|
149
|
+
async def _client_loop_with_reconnect(self):
|
|
150
|
+
delay = RECONNECT_INITIAL_DELAY
|
|
151
|
+
while self.context.ok():
|
|
152
|
+
try:
|
|
153
|
+
await self._client_loop()
|
|
154
|
+
except Exception as e:
|
|
155
|
+
if self.context.ok():
|
|
156
|
+
self.get_logger().warning(
|
|
157
|
+
f"WebSocket error: {e}; retrying in {delay:.1f}s"
|
|
158
|
+
)
|
|
159
|
+
await asyncio.sleep(delay)
|
|
160
|
+
delay = min(delay * 2, RECONNECT_MAX_DELAY) + random.uniform(0, 1)
|
|
161
|
+
|
|
162
|
+
async def _client_loop(self):
|
|
163
|
+
send_task = None
|
|
164
|
+
self.get_logger().info('Connecting to gateway...')
|
|
165
|
+
try:
|
|
166
|
+
async with websockets.connect(self.ws_url) as ws:
|
|
167
|
+
try:
|
|
168
|
+
auth_msg = await ws.recv()
|
|
169
|
+
auth_data = json.loads(auth_msg)
|
|
170
|
+
except Exception:
|
|
171
|
+
self.get_logger().error('Failed to receive auth response from gateway')
|
|
172
|
+
return
|
|
173
|
+
|
|
174
|
+
if not auth_data or auth_data.get('type') != 'auth_success':
|
|
175
|
+
error_msg = auth_data.get('message', 'unknown') if auth_data else 'no response'
|
|
176
|
+
self.get_logger().error(f'Authentication failed: {error_msg}')
|
|
177
|
+
return
|
|
178
|
+
|
|
179
|
+
self.get_logger().info('Connected and authenticated to gateway')
|
|
180
|
+
self.ws = ws
|
|
181
|
+
send_task = asyncio.create_task(self._send_loop(ws))
|
|
182
|
+
|
|
183
|
+
await self._send_initial_state()
|
|
184
|
+
await self._receive_loop(ws)
|
|
185
|
+
finally:
|
|
186
|
+
if send_task and not send_task.done():
|
|
187
|
+
send_task.cancel()
|
|
188
|
+
try:
|
|
189
|
+
await send_task
|
|
190
|
+
except (asyncio.CancelledError, Exception):
|
|
191
|
+
pass
|
|
192
|
+
if self.ws is not None:
|
|
193
|
+
self.get_logger().warning('Disconnected from gateway')
|
|
194
|
+
self.ws = None
|
|
195
|
+
|
|
196
|
+
async def _send_loop(self, ws):
|
|
197
|
+
while True:
|
|
198
|
+
msg = await self._send_queue.get()
|
|
199
|
+
try:
|
|
200
|
+
await ws.send(msg)
|
|
201
|
+
except Exception as e:
|
|
202
|
+
self.get_logger().error(f"WS send failed: {e}")
|
|
203
|
+
raise
|
|
204
|
+
|
|
205
|
+
async def _receive_loop(self, ws):
|
|
206
|
+
async for raw in ws:
|
|
207
|
+
if not self.context.ok():
|
|
208
|
+
break
|
|
209
|
+
try:
|
|
210
|
+
data = json.loads(raw)
|
|
211
|
+
except json.JSONDecodeError:
|
|
212
|
+
continue
|
|
213
|
+
msg_type = data.get('type')
|
|
214
|
+
if msg_type == 'subscribe':
|
|
215
|
+
topic = data.get('topic')
|
|
216
|
+
if topic:
|
|
217
|
+
self._subscribe_to_topic(topic)
|
|
218
|
+
elif msg_type == 'unsubscribe':
|
|
219
|
+
topic = data.get('topic')
|
|
220
|
+
if topic:
|
|
221
|
+
self._unsubscribe_from_topic(topic)
|
|
222
|
+
elif msg_type == 'start_telemetry':
|
|
223
|
+
self._telemetry_enabled = True
|
|
224
|
+
elif msg_type == 'stop_telemetry':
|
|
225
|
+
self._telemetry_enabled = False
|
|
226
|
+
elif msg_type == 'error':
|
|
227
|
+
self.get_logger().warning(f"Gateway error: {data.get('message', '')}")
|
|
228
|
+
|
|
229
|
+
async def _send_initial_state(self):
|
|
230
|
+
# Wait for the first _check_graph_changes tick to populate all caches.
|
|
231
|
+
await asyncio.to_thread(self._initial_scan_complete.wait, 15.0)
|
|
232
|
+
|
|
233
|
+
# Reset delta caches so _flush_graph_snapshots treats everything as
|
|
234
|
+
# "unsent" after this reconnect.
|
|
235
|
+
self._last_sent_nodes = None
|
|
236
|
+
self._last_sent_topics = None
|
|
237
|
+
self._last_sent_actions = None
|
|
238
|
+
self._last_sent_services = None
|
|
239
|
+
self._graph_dirty = True
|
|
240
|
+
|
|
241
|
+
nodes = self._get_nodes_with_relations()
|
|
242
|
+
topics = self._get_topics_with_relations()
|
|
243
|
+
actions = self._get_actions_with_relations()
|
|
244
|
+
services = self._get_services_with_relations()
|
|
245
|
+
|
|
246
|
+
self._last_sent_nodes = nodes.copy()
|
|
247
|
+
self._last_sent_topics = topics.copy()
|
|
248
|
+
self._last_sent_actions = actions.copy()
|
|
249
|
+
self._last_sent_services = services.copy()
|
|
250
|
+
|
|
251
|
+
await self._send_queue.put(json.dumps({
|
|
252
|
+
'type': 'agent_version',
|
|
253
|
+
'version': AGENT_VERSION,
|
|
254
|
+
}))
|
|
255
|
+
|
|
256
|
+
await self._send_queue.put(json.dumps({
|
|
257
|
+
'type': 'initial_state',
|
|
258
|
+
'timestamp': time.time(),
|
|
259
|
+
'data': {
|
|
260
|
+
'nodes': nodes,
|
|
261
|
+
'topics': topics,
|
|
262
|
+
'actions': actions,
|
|
263
|
+
'services': services,
|
|
264
|
+
'telemetry': self._get_telemetry_snapshot(),
|
|
265
|
+
'controllers': self._ros2_control.get_controllers_snapshot(),
|
|
266
|
+
'hardware': self._ros2_control.get_hardware_snapshot(),
|
|
267
|
+
'tf_tree': self._tf_tree.get_snapshot() if self._tf_tree is not None else None,
|
|
268
|
+
},
|
|
269
|
+
}))
|
|
270
|
+
|
|
271
|
+
await self._send_bridge_subscriptions()
|
|
272
|
+
|
|
273
|
+
self.get_logger().info(
|
|
274
|
+
f"Sent initial_state: {len(nodes)} nodes, {len(topics)} topics, "
|
|
275
|
+
f"{len(actions)} actions, {len(services)} services"
|
|
276
|
+
)
|
|
277
|
+
|
|
278
|
+
async def _send_bridge_subscriptions(self):
|
|
279
|
+
with self._topic_subs_lock:
|
|
280
|
+
subs = list(self._topic_subs.keys())
|
|
281
|
+
await self._send_queue.put(json.dumps({
|
|
282
|
+
'type': 'bridge_subscriptions',
|
|
283
|
+
'subscriptions': subs,
|
|
284
|
+
'timestamp': time.time(),
|
|
285
|
+
}))
|
|
286
|
+
|
|
287
|
+
# ──────────────────────────────────────────────
|
|
288
|
+
# Tier-1: cheap existence detection
|
|
289
|
+
# ──────────────────────────────────────────────
|
|
290
|
+
|
|
291
|
+
def _check_graph_changes(self):
|
|
292
|
+
if self._first_graph_check_done:
|
|
293
|
+
return # BISECT R1: skip all polling after first tick
|
|
294
|
+
|
|
295
|
+
_t0 = time.time()
|
|
296
|
+
node_pairs = list(self.get_node_names_and_namespaces())
|
|
297
|
+
topic_type_list = self.get_topic_names_and_types()
|
|
298
|
+
_t1 = time.time()
|
|
299
|
+
|
|
300
|
+
current_nodes = {self._node_full_name(n, ns) for n, ns in node_pairs}
|
|
301
|
+
current_topics = {t for t, _ in topic_type_list}
|
|
302
|
+
current_actions = {
|
|
303
|
+
t.replace('/_action/status', '')
|
|
304
|
+
for t in current_topics
|
|
305
|
+
if t.endswith('/_action/status')
|
|
306
|
+
}
|
|
307
|
+
self.get_logger().info(
|
|
308
|
+
f"[poll] node+topic: {(_t1-_t0)*1000:.1f}ms "
|
|
309
|
+
f"({len(current_nodes)} nodes, {len(current_topics)} topics, {len(current_actions)} actions)"
|
|
310
|
+
)
|
|
311
|
+
|
|
312
|
+
_ts0 = time.time()
|
|
313
|
+
service_type_list = self.get_service_names_and_types()
|
|
314
|
+
_ts1 = time.time()
|
|
315
|
+
current_services = {
|
|
316
|
+
s: types[0] if types else 'unknown'
|
|
317
|
+
for s, types in service_type_list
|
|
318
|
+
if not any(s.startswith(p) for p in _SUPPRESSED_SERVICE_PREFIXES)
|
|
319
|
+
}
|
|
320
|
+
self.get_logger().info(
|
|
321
|
+
f"[poll] service_scan: {(_ts1-_ts0)*1000:.1f}ms ({len(current_services)} services)"
|
|
322
|
+
)
|
|
323
|
+
|
|
324
|
+
self._first_graph_check_done = True
|
|
325
|
+
self._active_nodes = current_nodes
|
|
326
|
+
self._active_topics = current_topics
|
|
327
|
+
self._active_services = current_services
|
|
328
|
+
self._active_actions = current_actions
|
|
329
|
+
_te0 = time.time()
|
|
330
|
+
self._do_full_initial_enrichment(topic_type_list, node_pairs)
|
|
331
|
+
_te1 = time.time()
|
|
332
|
+
for fqn in current_nodes:
|
|
333
|
+
self._fetch_node_parameters_async(fqn)
|
|
334
|
+
self._ros2_control.poll()
|
|
335
|
+
if self._tf_tree is not None:
|
|
336
|
+
self._tf_tree.poll(force=True)
|
|
337
|
+
self._initial_scan_complete.set()
|
|
338
|
+
self.get_logger().info(
|
|
339
|
+
f"[poll] first tick complete: {len(current_nodes)} nodes, {len(current_topics)} topics, "
|
|
340
|
+
f"{len(current_services)} services, {len(current_actions)} actions — "
|
|
341
|
+
f"node+topic={(_t1-_t0)*1000:.1f}ms enrichment={(_te1-_te0)*1000:.1f}ms"
|
|
342
|
+
)
|
|
343
|
+
|
|
344
|
+
# ──────────────────────────────────────────────
|
|
345
|
+
# Initial full enrichment (called once on first tick)
|
|
346
|
+
# ──────────────────────────────────────────────
|
|
347
|
+
|
|
348
|
+
def _do_full_initial_enrichment(self, topic_type_list, node_pairs):
|
|
349
|
+
topic_type_map = dict(topic_type_list)
|
|
350
|
+
self._pending_topic_enrichment.clear()
|
|
351
|
+
for topic in self._active_topics:
|
|
352
|
+
try:
|
|
353
|
+
pub_infos = self.get_publishers_info_by_topic(topic)
|
|
354
|
+
sub_infos = self.get_subscriptions_info_by_topic(topic)
|
|
355
|
+
except Exception:
|
|
356
|
+
continue
|
|
357
|
+
publishers = {self._node_full_name(p.node_name, p.node_namespace) for p in pub_infos}
|
|
358
|
+
subscribers = {self._node_full_name(s.node_name, s.node_namespace) for s in sub_infos}
|
|
359
|
+
self._topic_relations[topic] = {
|
|
360
|
+
'publishers': publishers,
|
|
361
|
+
'subscribers': subscribers,
|
|
362
|
+
'publisher_infos': pub_infos,
|
|
363
|
+
'subscriber_infos': sub_infos,
|
|
364
|
+
'type': topic_type_map.get(topic, ['unknown'])[0],
|
|
365
|
+
}
|
|
366
|
+
self._topic_counts[topic] = (len(pub_infos), len(sub_infos))
|
|
367
|
+
|
|
368
|
+
# ──────────────────────────────────────────────
|
|
369
|
+
# Tier-2: batched relation enrichment (inert with R1 gate)
|
|
370
|
+
# ──────────────────────────────────────────────
|
|
371
|
+
|
|
372
|
+
def _enrich_pending_relations(self, topic_type_list=None):
|
|
373
|
+
if not self._pending_topic_enrichment:
|
|
374
|
+
return
|
|
375
|
+
|
|
376
|
+
_pending_before = len(self._pending_topic_enrichment)
|
|
377
|
+
_t0 = time.time()
|
|
378
|
+
batch = set(list(self._pending_topic_enrichment)[:self._topic_batch_size])
|
|
379
|
+
self._pending_topic_enrichment -= batch
|
|
380
|
+
self.get_logger().info(
|
|
381
|
+
f"[enrich] batch={len(batch)}, pending_before={_pending_before}, "
|
|
382
|
+
f"remaining={len(self._pending_topic_enrichment)}"
|
|
383
|
+
)
|
|
384
|
+
|
|
385
|
+
if topic_type_list is not None:
|
|
386
|
+
topic_type_map = dict(topic_type_list)
|
|
387
|
+
else:
|
|
388
|
+
topic_type_map = dict(self.get_topic_names_and_types())
|
|
389
|
+
|
|
390
|
+
for topic in batch:
|
|
391
|
+
if topic not in self._active_topics:
|
|
392
|
+
continue
|
|
393
|
+
try:
|
|
394
|
+
pub_infos = self.get_publishers_info_by_topic(topic)
|
|
395
|
+
sub_infos = self.get_subscriptions_info_by_topic(topic)
|
|
396
|
+
except Exception as e:
|
|
397
|
+
self.get_logger().debug(f"Enrichment failed for {topic}: {e}")
|
|
398
|
+
continue
|
|
399
|
+
|
|
400
|
+
publishers = {self._node_full_name(p.node_name, p.node_namespace) for p in pub_infos}
|
|
401
|
+
subscribers = {self._node_full_name(s.node_name, s.node_namespace) for s in sub_infos}
|
|
402
|
+
old = self._topic_relations.get(topic)
|
|
403
|
+
new_rel = {
|
|
404
|
+
'publishers': publishers,
|
|
405
|
+
'subscribers': subscribers,
|
|
406
|
+
'publisher_infos': pub_infos,
|
|
407
|
+
'subscriber_infos': sub_infos,
|
|
408
|
+
'type': topic_type_map.get(topic, ['unknown'])[0],
|
|
409
|
+
}
|
|
410
|
+
self._topic_relations[topic] = new_rel
|
|
411
|
+
self._topic_counts[topic] = (len(pub_infos), len(sub_infos))
|
|
412
|
+
|
|
413
|
+
if old is not None:
|
|
414
|
+
old_subs = old['subscribers']
|
|
415
|
+
for fqn in subscribers - old_subs:
|
|
416
|
+
self._send_event_and_update({
|
|
417
|
+
'type': 'topic_event', 'topic': topic, 'node': fqn,
|
|
418
|
+
'event': 'subscribed', 'timestamp': time.time(),
|
|
419
|
+
})
|
|
420
|
+
for fqn in old_subs - subscribers:
|
|
421
|
+
self._send_event_and_update({
|
|
422
|
+
'type': 'topic_event', 'topic': topic, 'node': fqn,
|
|
423
|
+
'event': 'unsubscribed', 'timestamp': time.time(),
|
|
424
|
+
})
|
|
425
|
+
|
|
426
|
+
self.get_logger().info(f"[enrich] done in {(time.time()-_t0)*1000:.1f}ms")
|
|
427
|
+
|
|
428
|
+
# ──────────────────────────────────────────────
|
|
429
|
+
# Graph snapshot builders
|
|
430
|
+
# ──────────────────────────────────────────────
|
|
431
|
+
|
|
432
|
+
def _get_nodes_with_relations(self) -> dict:
|
|
433
|
+
result = {}
|
|
434
|
+
for fqn in self._active_nodes:
|
|
435
|
+
result[fqn] = {
|
|
436
|
+
'publishes': [],
|
|
437
|
+
'subscribes': [],
|
|
438
|
+
'actions': [],
|
|
439
|
+
'services': [],
|
|
440
|
+
'parameters': self._node_parameter_cache.get(fqn, {}),
|
|
441
|
+
}
|
|
442
|
+
|
|
443
|
+
for topic, rel in self._topic_relations.items():
|
|
444
|
+
pub_infos = rel.get('publisher_infos', [])
|
|
445
|
+
sub_infos = rel.get('subscriber_infos', [])
|
|
446
|
+
for p in pub_infos:
|
|
447
|
+
fqn = self._node_full_name(p.node_name, p.node_namespace)
|
|
448
|
+
if fqn in result:
|
|
449
|
+
result[fqn]['publishes'].append({
|
|
450
|
+
'topic': topic,
|
|
451
|
+
'qos': self._qos_to_dict(p.qos_profile),
|
|
452
|
+
})
|
|
453
|
+
for s in sub_infos:
|
|
454
|
+
fqn = self._node_full_name(s.node_name, s.node_namespace)
|
|
455
|
+
if fqn in result:
|
|
456
|
+
result[fqn]['subscribes'].append({
|
|
457
|
+
'topic': topic,
|
|
458
|
+
'qos': self._qos_to_dict(s.qos_profile),
|
|
459
|
+
})
|
|
460
|
+
|
|
461
|
+
for topic, rel in self._topic_relations.items():
|
|
462
|
+
if topic.endswith('/_action/status') and rel['publishers']:
|
|
463
|
+
action = topic.replace('/_action/status', '')
|
|
464
|
+
for p in rel['publisher_infos']:
|
|
465
|
+
fqn = self._node_full_name(p.node_name, p.node_namespace)
|
|
466
|
+
if fqn in result and action not in result[fqn]['actions']:
|
|
467
|
+
result[fqn]['actions'].append(action)
|
|
468
|
+
|
|
469
|
+
return result
|
|
470
|
+
|
|
471
|
+
def _get_topics_with_relations(self) -> dict:
|
|
472
|
+
result = {}
|
|
473
|
+
for topic, rel in self._topic_relations.items():
|
|
474
|
+
result[topic] = {
|
|
475
|
+
'type': rel.get('type', 'unknown'),
|
|
476
|
+
'publishers': [
|
|
477
|
+
{
|
|
478
|
+
'node': self._node_full_name(p.node_name, p.node_namespace),
|
|
479
|
+
'qos': self._qos_to_dict(p.qos_profile),
|
|
480
|
+
}
|
|
481
|
+
for p in rel.get('publisher_infos', [])
|
|
482
|
+
],
|
|
483
|
+
'subscribers': [
|
|
484
|
+
{
|
|
485
|
+
'node': self._node_full_name(s.node_name, s.node_namespace),
|
|
486
|
+
'qos': self._qos_to_dict(s.qos_profile),
|
|
487
|
+
}
|
|
488
|
+
for s in rel.get('subscriber_infos', [])
|
|
489
|
+
],
|
|
490
|
+
}
|
|
491
|
+
return result
|
|
492
|
+
|
|
493
|
+
def _get_actions_with_relations(self) -> dict:
|
|
494
|
+
result = {}
|
|
495
|
+
for topic, rel in self._topic_relations.items():
|
|
496
|
+
if topic.endswith('/_action/status') and rel['publishers']:
|
|
497
|
+
action = topic.replace('/_action/status', '')
|
|
498
|
+
providers = [
|
|
499
|
+
self._node_full_name(p.node_name, p.node_namespace)
|
|
500
|
+
for p in rel.get('publisher_infos', [])
|
|
501
|
+
]
|
|
502
|
+
result[action] = {'providers': providers}
|
|
503
|
+
return result
|
|
504
|
+
|
|
505
|
+
def _get_services_with_relations(self) -> dict:
|
|
506
|
+
return {
|
|
507
|
+
name: {'type': type_str, 'providers': []}
|
|
508
|
+
for name, type_str in self._active_services.items()
|
|
509
|
+
}
|
|
510
|
+
|
|
511
|
+
# ──────────────────────────────────────────────
|
|
512
|
+
# Delta-send: flush graph snapshots after each tick
|
|
513
|
+
# ──────────────────────────────────────────────
|
|
514
|
+
|
|
515
|
+
def _flush_graph_snapshots(self):
|
|
516
|
+
if not self._graph_dirty or not self.ws or not self.loop:
|
|
517
|
+
return
|
|
518
|
+
self._graph_dirty = False
|
|
519
|
+
self.get_logger().debug("[flush] graph dirty, checking snapshots")
|
|
520
|
+
|
|
521
|
+
nodes = self._get_nodes_with_relations()
|
|
522
|
+
if nodes != self._last_sent_nodes:
|
|
523
|
+
self.get_logger().info(f"[flush] nodes changed ({len(nodes)} nodes)")
|
|
524
|
+
self._last_sent_nodes = nodes.copy()
|
|
525
|
+
self._enqueue({
|
|
526
|
+
'type': 'nodes', 'data': nodes, 'timestamp': time.time(),
|
|
527
|
+
})
|
|
528
|
+
|
|
529
|
+
topics = self._get_topics_with_relations()
|
|
530
|
+
if topics != self._last_sent_topics:
|
|
531
|
+
self.get_logger().info(f"[flush] topics changed ({len(topics)} topics)")
|
|
532
|
+
self._last_sent_topics = topics.copy()
|
|
533
|
+
self._enqueue({
|
|
534
|
+
'type': 'topics', 'data': topics, 'timestamp': time.time(),
|
|
535
|
+
})
|
|
536
|
+
|
|
537
|
+
actions = self._get_actions_with_relations()
|
|
538
|
+
if actions != self._last_sent_actions:
|
|
539
|
+
self.get_logger().info(f"[flush] actions changed ({len(actions)} actions)")
|
|
540
|
+
self._last_sent_actions = actions.copy()
|
|
541
|
+
self._enqueue({
|
|
542
|
+
'type': 'actions', 'data': actions, 'timestamp': time.time(),
|
|
543
|
+
})
|
|
544
|
+
|
|
545
|
+
services = self._get_services_with_relations()
|
|
546
|
+
if services != self._last_sent_services:
|
|
547
|
+
self.get_logger().info(f"[flush] services changed ({len(services)} services)")
|
|
548
|
+
self._last_sent_services = services.copy()
|
|
549
|
+
self._enqueue({
|
|
550
|
+
'type': 'services', 'data': services, 'timestamp': time.time(),
|
|
551
|
+
})
|
|
552
|
+
|
|
553
|
+
# ──────────────────────────────────────────────
|
|
554
|
+
# Topic subscriptions (gateway-requested)
|
|
555
|
+
# ──────────────────────────────────────────────
|
|
556
|
+
|
|
557
|
+
def _subscribe_to_topic(self, topic_name: str):
|
|
558
|
+
if not topic_name or not isinstance(topic_name, str):
|
|
559
|
+
return
|
|
560
|
+
with self._topic_subs_lock:
|
|
561
|
+
if topic_name in self._topic_subs:
|
|
562
|
+
return
|
|
563
|
+
if len(self._topic_subs) >= MAX_SUBSCRIPTIONS:
|
|
564
|
+
self.get_logger().error(
|
|
565
|
+
f"Subscription limit ({MAX_SUBSCRIPTIONS}) reached; "
|
|
566
|
+
f"cannot subscribe to {topic_name}"
|
|
567
|
+
)
|
|
568
|
+
return
|
|
569
|
+
|
|
570
|
+
types = dict(self.get_topic_names_and_types()).get(topic_name)
|
|
571
|
+
if not types:
|
|
572
|
+
self.get_logger().warning(f"Topic not found: {topic_name}")
|
|
573
|
+
return
|
|
574
|
+
|
|
575
|
+
msg_class = get_message(types[0])
|
|
576
|
+
sub = self.create_subscription(
|
|
577
|
+
msg_class,
|
|
578
|
+
topic_name,
|
|
579
|
+
lambda msg, t=topic_name: self._on_topic_msg(msg, t),
|
|
580
|
+
QoSProfile(depth=10),
|
|
581
|
+
)
|
|
582
|
+
with self._topic_subs_lock:
|
|
583
|
+
self._topic_subs[topic_name] = sub
|
|
584
|
+
|
|
585
|
+
self.get_logger().info(f"Subscribed to {topic_name}")
|
|
586
|
+
if self.loop:
|
|
587
|
+
asyncio.run_coroutine_threadsafe(
|
|
588
|
+
self._send_bridge_subscriptions(), self.loop
|
|
589
|
+
)
|
|
590
|
+
|
|
591
|
+
def _unsubscribe_from_topic(self, topic_name: str):
|
|
592
|
+
with self._topic_subs_lock:
|
|
593
|
+
sub = self._topic_subs.pop(topic_name, None)
|
|
594
|
+
if sub:
|
|
595
|
+
self.destroy_subscription(sub)
|
|
596
|
+
self.get_logger().info(f"Unsubscribed from {topic_name}")
|
|
597
|
+
if self.loop:
|
|
598
|
+
asyncio.run_coroutine_threadsafe(
|
|
599
|
+
self._send_bridge_subscriptions(), self.loop
|
|
600
|
+
)
|
|
601
|
+
|
|
602
|
+
def _on_topic_msg(self, msg, topic_name: str):
|
|
603
|
+
if not self.ws or not self.loop:
|
|
604
|
+
return
|
|
605
|
+
|
|
606
|
+
ts = time.time()
|
|
607
|
+
last_ts = self._topic_last_timestamp.get(topic_name)
|
|
608
|
+
if last_ts is not None:
|
|
609
|
+
delta = ts - last_ts
|
|
610
|
+
if delta > 0:
|
|
611
|
+
history = self._topic_rate_history.setdefault(
|
|
612
|
+
topic_name, deque(maxlen=self._rate_history_depth)
|
|
613
|
+
)
|
|
614
|
+
history.append(delta)
|
|
615
|
+
self._topic_last_timestamp[topic_name] = ts
|
|
616
|
+
|
|
617
|
+
rate = None
|
|
618
|
+
history = self._topic_rate_history.get(topic_name)
|
|
619
|
+
if history:
|
|
620
|
+
total = sum(history)
|
|
621
|
+
if total > 0:
|
|
622
|
+
rate = len(history) / total
|
|
623
|
+
|
|
624
|
+
asyncio.run_coroutine_threadsafe(
|
|
625
|
+
self._send_queue.put(json.dumps({
|
|
626
|
+
'type': 'topic_data',
|
|
627
|
+
'topic': topic_name,
|
|
628
|
+
'data': message_to_ordereddict(msg),
|
|
629
|
+
'rate_hz': rate,
|
|
630
|
+
'timestamp': ts,
|
|
631
|
+
})),
|
|
632
|
+
self.loop,
|
|
633
|
+
)
|
|
634
|
+
|
|
635
|
+
# ──────────────────────────────────────────────
|
|
636
|
+
# Parameters (async, lazy-loaded)
|
|
637
|
+
# ──────────────────────────────────────────────
|
|
638
|
+
|
|
639
|
+
def _refresh_empty_param_caches(self):
|
|
640
|
+
"""Retry parameter fetch for nodes that don't have cached params yet."""
|
|
641
|
+
for fqn in self._active_nodes:
|
|
642
|
+
if not self._node_parameter_cache.get(fqn):
|
|
643
|
+
self._fetch_node_parameters_async(fqn)
|
|
644
|
+
|
|
645
|
+
def _fetch_node_parameters_async(self, fqn: str):
|
|
646
|
+
"""Fetch parameters for *fqn* without blocking the executor.
|
|
647
|
+
|
|
648
|
+
Creates service clients, fires async calls, and stores results in
|
|
649
|
+
_node_parameter_cache when callbacks fire. Safe to call from any
|
|
650
|
+
timer or graph-change callback.
|
|
651
|
+
"""
|
|
652
|
+
if fqn in self._pending_param_fetches:
|
|
653
|
+
return
|
|
654
|
+
|
|
655
|
+
list_client = self.create_client(ListParameters, f"{fqn}/list_parameters")
|
|
656
|
+
if not list_client.service_is_ready():
|
|
657
|
+
self.destroy_client(list_client)
|
|
658
|
+
return
|
|
659
|
+
|
|
660
|
+
self._pending_param_fetches.add(fqn)
|
|
661
|
+
req = ListParameters.Request()
|
|
662
|
+
req.depth = 10
|
|
663
|
+
future = list_client.call_async(req)
|
|
664
|
+
|
|
665
|
+
def _on_list(fut):
|
|
666
|
+
self.destroy_client(list_client)
|
|
667
|
+
response = fut.result()
|
|
668
|
+
if response is None or not response.result.names:
|
|
669
|
+
self._pending_param_fetches.discard(fqn)
|
|
670
|
+
return
|
|
671
|
+
param_names = list(response.result.names)
|
|
672
|
+
get_client = self.create_client(GetParameters, f"{fqn}/get_parameters")
|
|
673
|
+
get_req = GetParameters.Request()
|
|
674
|
+
get_req.names = param_names
|
|
675
|
+
get_future = get_client.call_async(get_req)
|
|
676
|
+
|
|
677
|
+
def _on_get(gfut):
|
|
678
|
+
self.destroy_client(get_client)
|
|
679
|
+
self._pending_param_fetches.discard(fqn)
|
|
680
|
+
get_resp = gfut.result()
|
|
681
|
+
if get_resp is None:
|
|
682
|
+
return
|
|
683
|
+
params = {}
|
|
684
|
+
for name, value in zip(param_names, get_resp.values):
|
|
685
|
+
try:
|
|
686
|
+
params[name] = parameter_value_to_python(value)
|
|
687
|
+
except Exception:
|
|
688
|
+
pass
|
|
689
|
+
self._node_parameter_cache[fqn] = params
|
|
690
|
+
self._graph_dirty = True
|
|
691
|
+
self.get_logger().debug(f"[params] cached {len(params)} params for {fqn}")
|
|
692
|
+
self._flush_graph_snapshots()
|
|
693
|
+
|
|
694
|
+
get_future.add_done_callback(_on_get)
|
|
695
|
+
|
|
696
|
+
future.add_done_callback(_on_list)
|
|
697
|
+
|
|
698
|
+
# ──────────────────────────────────────────────
|
|
699
|
+
# Telemetry
|
|
700
|
+
# ──────────────────────────────────────────────
|
|
701
|
+
|
|
702
|
+
def _collect_telemetry(self):
|
|
703
|
+
if not self._telemetry_enabled or not self.ws or not self.loop:
|
|
704
|
+
return
|
|
705
|
+
self._enqueue({
|
|
706
|
+
'type': 'telemetry',
|
|
707
|
+
'data': self._get_telemetry_snapshot(),
|
|
708
|
+
'timestamp': time.time(),
|
|
709
|
+
})
|
|
710
|
+
|
|
711
|
+
def _get_telemetry_snapshot(self) -> dict:
|
|
712
|
+
cpu_now = round(psutil.cpu_percent(interval=None), 1)
|
|
713
|
+
self._cpu_history.append(cpu_now)
|
|
714
|
+
|
|
715
|
+
def _rolling(n: int) -> float | None:
|
|
716
|
+
window = list(self._cpu_history)[-n:]
|
|
717
|
+
return round(sum(window) / len(window), 1) if window else None
|
|
718
|
+
|
|
719
|
+
load1 = _rolling(60)
|
|
720
|
+
load5 = _rolling(300)
|
|
721
|
+
load15 = _rolling(900)
|
|
722
|
+
|
|
723
|
+
vm = psutil.virtual_memory()
|
|
724
|
+
ram_percent = vm.percent
|
|
725
|
+
|
|
726
|
+
now = time.time()
|
|
727
|
+
disk_usage = psutil.disk_usage('/')
|
|
728
|
+
disk_read_mbps = 0.0
|
|
729
|
+
disk_write_mbps = 0.0
|
|
730
|
+
try:
|
|
731
|
+
disk_io = psutil.disk_io_counters()
|
|
732
|
+
if self._last_disk_io is not None and self._last_io_time is not None:
|
|
733
|
+
dt = now - self._last_io_time
|
|
734
|
+
if dt > 0:
|
|
735
|
+
disk_read_mbps = round(max(0.0, (disk_io.read_bytes - self._last_disk_io.read_bytes) / dt / (1024 * 1024)), 2)
|
|
736
|
+
disk_write_mbps = round(max(0.0, (disk_io.write_bytes - self._last_disk_io.write_bytes) / dt / (1024 * 1024)), 2)
|
|
737
|
+
self._last_disk_io = disk_io
|
|
738
|
+
except Exception:
|
|
739
|
+
pass
|
|
740
|
+
|
|
741
|
+
net_tx_mbps = 0.0
|
|
742
|
+
net_rx_mbps = 0.0
|
|
743
|
+
try:
|
|
744
|
+
net_io = psutil.net_io_counters()
|
|
745
|
+
if self._last_net_io is not None and self._last_io_time is not None:
|
|
746
|
+
dt = now - self._last_io_time
|
|
747
|
+
if dt > 0:
|
|
748
|
+
net_tx_mbps = round(max(0.0, (net_io.bytes_sent - self._last_net_io.bytes_sent) / dt / (1024 * 1024)), 2)
|
|
749
|
+
net_rx_mbps = round(max(0.0, (net_io.bytes_recv - self._last_net_io.bytes_recv) / dt / (1024 * 1024)), 2)
|
|
750
|
+
self._last_net_io = net_io
|
|
751
|
+
except Exception:
|
|
752
|
+
pass
|
|
753
|
+
|
|
754
|
+
self._last_io_time = now
|
|
755
|
+
|
|
756
|
+
cpu_c = None
|
|
757
|
+
try:
|
|
758
|
+
temps = psutil.sensors_temperatures()
|
|
759
|
+
for key in ('coretemp', 'cpu-thermal', 'acpitz', 'k10temp', 'cpu_thermal'):
|
|
760
|
+
entries = temps.get(key)
|
|
761
|
+
if entries:
|
|
762
|
+
cpu_c = round(entries[0].current, 1)
|
|
763
|
+
break
|
|
764
|
+
except Exception:
|
|
765
|
+
pass
|
|
766
|
+
|
|
767
|
+
return {
|
|
768
|
+
'cpu': {
|
|
769
|
+
'now': cpu_now,
|
|
770
|
+
'load1': load1,
|
|
771
|
+
'load5': load5,
|
|
772
|
+
'load15': load15,
|
|
773
|
+
},
|
|
774
|
+
'ram': {
|
|
775
|
+
'percent': round(ram_percent, 1),
|
|
776
|
+
'used_mb': round(vm.used / (1024 * 1024), 1),
|
|
777
|
+
'total_mb': round(vm.total / (1024 * 1024), 1),
|
|
778
|
+
},
|
|
779
|
+
'disk': {
|
|
780
|
+
'percent': round(disk_usage.percent, 1),
|
|
781
|
+
'used_gb': round(disk_usage.used / (1024 ** 3), 2),
|
|
782
|
+
'total_gb': round(disk_usage.total / (1024 ** 3), 2),
|
|
783
|
+
'read_mbps': disk_read_mbps,
|
|
784
|
+
'write_mbps': disk_write_mbps,
|
|
785
|
+
},
|
|
786
|
+
'net': {
|
|
787
|
+
'tx_mbps': net_tx_mbps,
|
|
788
|
+
'rx_mbps': net_rx_mbps,
|
|
789
|
+
},
|
|
790
|
+
'temp': {
|
|
791
|
+
'cpu_c': cpu_c,
|
|
792
|
+
},
|
|
793
|
+
}
|
|
794
|
+
|
|
795
|
+
# ──────────────────────────────────────────────
|
|
796
|
+
# Helpers
|
|
797
|
+
# ──────────────────────────────────────────────
|
|
798
|
+
|
|
799
|
+
@staticmethod
|
|
800
|
+
def _node_full_name(name: str, namespace: str) -> str:
|
|
801
|
+
ns = namespace if namespace.endswith('/') else namespace + '/'
|
|
802
|
+
return ns + name
|
|
803
|
+
|
|
804
|
+
@staticmethod
|
|
805
|
+
def _qos_to_dict(qos) -> dict | None:
|
|
806
|
+
if not qos:
|
|
807
|
+
return None
|
|
808
|
+
return {
|
|
809
|
+
'reliability': qos.reliability.name if hasattr(qos.reliability, 'name') else str(qos.reliability),
|
|
810
|
+
'durability': qos.durability.name if hasattr(qos.durability, 'name') else str(qos.durability),
|
|
811
|
+
'history': qos.history.name if hasattr(qos.history, 'name') else str(qos.history),
|
|
812
|
+
'depth': qos.depth,
|
|
813
|
+
'liveliness': qos.liveliness.name if hasattr(qos.liveliness, 'name') else str(qos.liveliness),
|
|
814
|
+
}
|
|
815
|
+
|
|
816
|
+
def _send_event_and_update(self, event: dict, log: str = ''):
|
|
817
|
+
"""Queue an event to the WS send loop and mark the graph dirty."""
|
|
818
|
+
if log:
|
|
819
|
+
self.get_logger().debug(log)
|
|
820
|
+
if event:
|
|
821
|
+
self._enqueue(event)
|
|
822
|
+
self._graph_dirty = True
|
|
823
|
+
|
|
824
|
+
def _enqueue(self, payload: dict):
|
|
825
|
+
"""Thread-safe enqueue to the asyncio send queue."""
|
|
826
|
+
if self.ws and self.loop:
|
|
827
|
+
asyncio.run_coroutine_threadsafe(
|
|
828
|
+
self._send_queue.put(json.dumps(payload)),
|
|
829
|
+
self.loop,
|
|
830
|
+
)
|
|
831
|
+
|
|
832
|
+
# ──────────────────────────────────────────────
|
|
833
|
+
# Collector event handlers
|
|
834
|
+
# ──────────────────────────────────────────────
|
|
835
|
+
|
|
836
|
+
def _on_ros2_control_event(self, event: dict):
|
|
837
|
+
self._enqueue(event)
|
|
838
|
+
|
|
839
|
+
def _on_tf_tree_event(self, event: dict):
|
|
840
|
+
self._enqueue(event)
|
|
841
|
+
|
|
842
|
+
# ──────────────────────────────────────────────
|
|
843
|
+
# Cleanup
|
|
844
|
+
# ──────────────────────────────────────────────
|
|
845
|
+
|
|
846
|
+
def destroy_node(self):
|
|
847
|
+
self._ros2_control.destroy()
|
|
848
|
+
if self._tf_tree is not None:
|
|
849
|
+
self._tf_tree.destroy()
|
|
850
|
+
super().destroy_node()
|
|
851
|
+
|
|
852
|
+
|
|
853
|
+
def main(args=None):
|
|
854
|
+
rclpy.init(args=args)
|
|
855
|
+
node = WebBridge()
|
|
856
|
+
try:
|
|
857
|
+
rclpy.spin(node)
|
|
858
|
+
except (KeyboardInterrupt, rclpy.executors.ExternalShutdownException):
|
|
859
|
+
pass
|
|
860
|
+
finally:
|
|
861
|
+
node.destroy_node()
|
|
862
|
+
if rclpy.ok():
|
|
863
|
+
rclpy.shutdown()
|
|
864
|
+
|
|
865
|
+
|
|
866
|
+
if __name__ == '__main__':
|
|
867
|
+
main()
|
|
@@ -6,7 +6,7 @@ long_description = (HERE / "README.md").read_text(encoding="utf-8")
|
|
|
6
6
|
|
|
7
7
|
setup(
|
|
8
8
|
name='osiris_agent',
|
|
9
|
-
version='0.3.
|
|
9
|
+
version='0.3.78',
|
|
10
10
|
description='OSIRIS agent for ROS2/Humble',
|
|
11
11
|
long_description=long_description,
|
|
12
12
|
long_description_content_type="text/markdown",
|
|
@@ -1,268 +0,0 @@
|
|
|
1
|
-
import asyncio
|
|
2
|
-
import os
|
|
3
|
-
import random
|
|
4
|
-
import threading
|
|
5
|
-
import time
|
|
6
|
-
|
|
7
|
-
import rclpy
|
|
8
|
-
import websockets
|
|
9
|
-
import json
|
|
10
|
-
|
|
11
|
-
from rclpy.node import Node
|
|
12
|
-
|
|
13
|
-
from osiris_agent import __version__ as AGENT_VERSION
|
|
14
|
-
|
|
15
|
-
# ──────────────────────────────────────────────
|
|
16
|
-
# Constants
|
|
17
|
-
# ──────────────────────────────────────────────
|
|
18
|
-
GRAPH_CHECK_INTERVAL = 2.0 # seconds between graph polls
|
|
19
|
-
TOPIC_BATCH_SIZE = 10 # max topics enriched per tick
|
|
20
|
-
RECONNECT_INITIAL_DELAY = 1 # seconds
|
|
21
|
-
RECONNECT_MAX_DELAY = 30 # seconds
|
|
22
|
-
|
|
23
|
-
# Services to suppress from graph output (internal ROS2 plumbing)
|
|
24
|
-
_SUPPRESSED_SERVICE_PREFIXES = ('/ros2cli_daemon',)
|
|
25
|
-
|
|
26
|
-
|
|
27
|
-
class WebBridge(Node):
|
|
28
|
-
|
|
29
|
-
def __init__(self):
|
|
30
|
-
super().__init__('osiris_node')
|
|
31
|
-
|
|
32
|
-
auth_token = os.environ.get('OSIRIS_AUTH_TOKEN')
|
|
33
|
-
if not auth_token:
|
|
34
|
-
raise ValueError("OSIRIS_AUTH_TOKEN environment variable must be set")
|
|
35
|
-
|
|
36
|
-
base_url = os.environ.get('OSIRIS_WS_URL', 'wss://osiris-gateway.fly.dev')
|
|
37
|
-
self.ws_url = f'{base_url}?robot=true&token={auth_token}'
|
|
38
|
-
# self.ws_url = f'ws://host.docker.internal:8080?robot=true&token={auth_token}'
|
|
39
|
-
|
|
40
|
-
# Declare tunable parameters
|
|
41
|
-
self.declare_parameter('graph_check_interval', GRAPH_CHECK_INTERVAL)
|
|
42
|
-
self.declare_parameter('topic_batch_size', TOPIC_BATCH_SIZE)
|
|
43
|
-
|
|
44
|
-
self.ws = None
|
|
45
|
-
self.loop = None
|
|
46
|
-
self._send_queue: asyncio.Queue | None = None
|
|
47
|
-
|
|
48
|
-
# ── Existence caches (set of fully-qualified names) ───────────────────
|
|
49
|
-
self._active_nodes: set[str] = set()
|
|
50
|
-
self._active_topics: set[str] = set()
|
|
51
|
-
self._active_services: dict[str, str] = {}
|
|
52
|
-
self._active_actions: set[str] = set()
|
|
53
|
-
|
|
54
|
-
# ── Relation caches (populated by enrichment) ─────────────────────────
|
|
55
|
-
self._topic_relations: dict[str, dict] = {}
|
|
56
|
-
|
|
57
|
-
# ── First-tick gate ───────────────────────────────────────────────────
|
|
58
|
-
self._first_graph_check_done = False
|
|
59
|
-
|
|
60
|
-
# ── Timers ────────────────────────────────────────────────────────────
|
|
61
|
-
_graph_interval = self.get_parameter('graph_check_interval').get_parameter_value().double_value
|
|
62
|
-
self._topic_batch_size = self.get_parameter('topic_batch_size').get_parameter_value().integer_value
|
|
63
|
-
self.create_timer(_graph_interval, self._check_graph_changes)
|
|
64
|
-
|
|
65
|
-
threading.Thread(target=self._run_ws_client, daemon=True).start()
|
|
66
|
-
|
|
67
|
-
self.get_logger().info(
|
|
68
|
-
f"🚀 Osiris agent v{AGENT_VERSION} — bisect 4a: first-tick scan enabled"
|
|
69
|
-
)
|
|
70
|
-
|
|
71
|
-
# ──────────────────────────────────────────────
|
|
72
|
-
# WebSocket client
|
|
73
|
-
# ──────────────────────────────────────────────
|
|
74
|
-
|
|
75
|
-
def _run_ws_client(self):
|
|
76
|
-
self.loop = asyncio.new_event_loop()
|
|
77
|
-
asyncio.set_event_loop(self.loop)
|
|
78
|
-
self._send_queue = asyncio.Queue()
|
|
79
|
-
self.loop.run_until_complete(self._client_loop_with_reconnect())
|
|
80
|
-
|
|
81
|
-
async def _client_loop_with_reconnect(self):
|
|
82
|
-
delay = RECONNECT_INITIAL_DELAY
|
|
83
|
-
while self.context.ok():
|
|
84
|
-
try:
|
|
85
|
-
await self._client_loop()
|
|
86
|
-
except Exception as e:
|
|
87
|
-
if self.context.ok():
|
|
88
|
-
self.get_logger().warning(
|
|
89
|
-
f"WebSocket error: {e}; retrying in {delay:.1f}s"
|
|
90
|
-
)
|
|
91
|
-
await asyncio.sleep(delay)
|
|
92
|
-
delay = min(delay * 2, RECONNECT_MAX_DELAY) + random.uniform(0, 1)
|
|
93
|
-
|
|
94
|
-
async def _client_loop(self):
|
|
95
|
-
send_task = None
|
|
96
|
-
self.get_logger().info('Connecting to gateway...')
|
|
97
|
-
try:
|
|
98
|
-
async with websockets.connect(self.ws_url) as ws:
|
|
99
|
-
try:
|
|
100
|
-
auth_msg = await ws.recv()
|
|
101
|
-
auth_data = json.loads(auth_msg)
|
|
102
|
-
except Exception:
|
|
103
|
-
self.get_logger().error('Failed to receive auth response from gateway')
|
|
104
|
-
return
|
|
105
|
-
|
|
106
|
-
if not auth_data or auth_data.get('type') != 'auth_success':
|
|
107
|
-
error_msg = auth_data.get('message', 'unknown') if auth_data else 'no response'
|
|
108
|
-
self.get_logger().error(f'Authentication failed: {error_msg}')
|
|
109
|
-
return
|
|
110
|
-
|
|
111
|
-
self.get_logger().info('Connected and authenticated to gateway')
|
|
112
|
-
self.ws = ws
|
|
113
|
-
send_task = asyncio.create_task(self._send_loop(ws))
|
|
114
|
-
|
|
115
|
-
await self._send_initial_state()
|
|
116
|
-
await self._receive_loop(ws)
|
|
117
|
-
finally:
|
|
118
|
-
if send_task and not send_task.done():
|
|
119
|
-
send_task.cancel()
|
|
120
|
-
try:
|
|
121
|
-
await send_task
|
|
122
|
-
except (asyncio.CancelledError, Exception):
|
|
123
|
-
pass
|
|
124
|
-
if self.ws is not None:
|
|
125
|
-
self.get_logger().warning('Disconnected from gateway')
|
|
126
|
-
self.ws = None
|
|
127
|
-
|
|
128
|
-
async def _send_loop(self, ws):
|
|
129
|
-
while True:
|
|
130
|
-
msg = await self._send_queue.get()
|
|
131
|
-
try:
|
|
132
|
-
await ws.send(msg)
|
|
133
|
-
except Exception as e:
|
|
134
|
-
self.get_logger().error(f"WS send failed: {e}")
|
|
135
|
-
raise
|
|
136
|
-
|
|
137
|
-
async def _receive_loop(self, ws):
|
|
138
|
-
async for raw in ws:
|
|
139
|
-
if not self.context.ok():
|
|
140
|
-
break
|
|
141
|
-
|
|
142
|
-
async def _send_initial_state(self):
|
|
143
|
-
await self._send_queue.put(json.dumps({
|
|
144
|
-
'type': 'agent_version',
|
|
145
|
-
'version': AGENT_VERSION,
|
|
146
|
-
}))
|
|
147
|
-
|
|
148
|
-
await self._send_queue.put(json.dumps({
|
|
149
|
-
'type': 'initial_state',
|
|
150
|
-
'timestamp': time.time(),
|
|
151
|
-
'data': {
|
|
152
|
-
'nodes': {},
|
|
153
|
-
'topics': {},
|
|
154
|
-
'actions': {},
|
|
155
|
-
'services': {},
|
|
156
|
-
'telemetry': None,
|
|
157
|
-
'controllers': None,
|
|
158
|
-
'hardware': None,
|
|
159
|
-
'tf_tree': None,
|
|
160
|
-
},
|
|
161
|
-
}))
|
|
162
|
-
self.get_logger().info("Sent initial_state (empty — bisect mode)")
|
|
163
|
-
|
|
164
|
-
# ──────────────────────────────────────────────
|
|
165
|
-
# Tier-1: cheap existence detection
|
|
166
|
-
# ──────────────────────────────────────────────
|
|
167
|
-
|
|
168
|
-
def _check_graph_changes(self):
|
|
169
|
-
if self._first_graph_check_done:
|
|
170
|
-
return # BISECT R1: skip all polling after first tick
|
|
171
|
-
|
|
172
|
-
_t0 = time.time()
|
|
173
|
-
node_pairs = list(self.get_node_names_and_namespaces())
|
|
174
|
-
topic_type_list = self.get_topic_names_and_types()
|
|
175
|
-
_t1 = time.time()
|
|
176
|
-
|
|
177
|
-
current_nodes = {self._node_full_name(n, ns) for n, ns in node_pairs}
|
|
178
|
-
current_topics = {t for t, _ in topic_type_list}
|
|
179
|
-
current_actions = {
|
|
180
|
-
t.replace('/_action/status', '')
|
|
181
|
-
for t in current_topics
|
|
182
|
-
if t.endswith('/_action/status')
|
|
183
|
-
}
|
|
184
|
-
self.get_logger().info(
|
|
185
|
-
f"[poll] node+topic: {(_t1-_t0)*1000:.1f}ms "
|
|
186
|
-
f"({len(current_nodes)} nodes, {len(current_topics)} topics, {len(current_actions)} actions)"
|
|
187
|
-
)
|
|
188
|
-
|
|
189
|
-
_ts0 = time.time()
|
|
190
|
-
service_type_list = self.get_service_names_and_types()
|
|
191
|
-
_ts1 = time.time()
|
|
192
|
-
current_services = {
|
|
193
|
-
s: types[0] if types else 'unknown'
|
|
194
|
-
for s, types in service_type_list
|
|
195
|
-
if not any(s.startswith(p) for p in _SUPPRESSED_SERVICE_PREFIXES)
|
|
196
|
-
}
|
|
197
|
-
self.get_logger().info(
|
|
198
|
-
f"[poll] service_scan: {(_ts1-_ts0)*1000:.1f}ms ({len(current_services)} services)"
|
|
199
|
-
)
|
|
200
|
-
|
|
201
|
-
self._first_graph_check_done = True
|
|
202
|
-
self._active_nodes = current_nodes
|
|
203
|
-
self._active_topics = current_topics
|
|
204
|
-
self._active_services = current_services
|
|
205
|
-
self._active_actions = current_actions
|
|
206
|
-
_te0 = time.time()
|
|
207
|
-
self._do_full_initial_enrichment(topic_type_list, node_pairs)
|
|
208
|
-
_te1 = time.time()
|
|
209
|
-
self.get_logger().info(
|
|
210
|
-
f"[poll] first tick complete: {len(current_nodes)} nodes, {len(current_topics)} topics, "
|
|
211
|
-
f"{len(current_services)} services, {len(current_actions)} actions — "
|
|
212
|
-
f"node+topic={(_t1-_t0)*1000:.1f}ms enrichment={(_te1-_te0)*1000:.1f}ms"
|
|
213
|
-
)
|
|
214
|
-
|
|
215
|
-
# ──────────────────────────────────────────────
|
|
216
|
-
# Initial full enrichment (called once on first tick)
|
|
217
|
-
# ──────────────────────────────────────────────
|
|
218
|
-
|
|
219
|
-
def _do_full_initial_enrichment(self, topic_type_list, node_pairs):
|
|
220
|
-
topic_type_map = dict(topic_type_list)
|
|
221
|
-
for topic in self._active_topics:
|
|
222
|
-
try:
|
|
223
|
-
pub_infos = self.get_publishers_info_by_topic(topic)
|
|
224
|
-
sub_infos = self.get_subscriptions_info_by_topic(topic)
|
|
225
|
-
except Exception:
|
|
226
|
-
continue
|
|
227
|
-
publishers = {self._node_full_name(p.node_name, p.node_namespace) for p in pub_infos}
|
|
228
|
-
subscribers = {self._node_full_name(s.node_name, s.node_namespace) for s in sub_infos}
|
|
229
|
-
self._topic_relations[topic] = {
|
|
230
|
-
'publishers': publishers,
|
|
231
|
-
'subscribers': subscribers,
|
|
232
|
-
'publisher_infos': pub_infos,
|
|
233
|
-
'subscriber_infos': sub_infos,
|
|
234
|
-
'type': topic_type_map.get(topic, ['unknown'])[0],
|
|
235
|
-
}
|
|
236
|
-
|
|
237
|
-
# ──────────────────────────────────────────────
|
|
238
|
-
# Helpers
|
|
239
|
-
# ──────────────────────────────────────────────
|
|
240
|
-
|
|
241
|
-
@staticmethod
|
|
242
|
-
def _node_full_name(name: str, namespace: str) -> str:
|
|
243
|
-
ns = namespace if namespace.endswith('/') else namespace + '/'
|
|
244
|
-
return ns + name
|
|
245
|
-
|
|
246
|
-
# ──────────────────────────────────────────────
|
|
247
|
-
# Cleanup
|
|
248
|
-
# ──────────────────────────────────────────────
|
|
249
|
-
|
|
250
|
-
def destroy_node(self):
|
|
251
|
-
super().destroy_node()
|
|
252
|
-
|
|
253
|
-
|
|
254
|
-
def main(args=None):
|
|
255
|
-
rclpy.init(args=args)
|
|
256
|
-
node = WebBridge()
|
|
257
|
-
try:
|
|
258
|
-
rclpy.spin(node)
|
|
259
|
-
except (KeyboardInterrupt, rclpy.executors.ExternalShutdownException):
|
|
260
|
-
pass
|
|
261
|
-
finally:
|
|
262
|
-
node.destroy_node()
|
|
263
|
-
if rclpy.ok():
|
|
264
|
-
rclpy.shutdown()
|
|
265
|
-
|
|
266
|
-
|
|
267
|
-
if __name__ == '__main__':
|
|
268
|
-
main()
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|