jarviscore-framework 0.2.1__py3-none-any.whl → 0.3.1__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- examples/cloud_deployment_example.py +162 -0
- examples/customagent_cognitive_discovery_example.py +343 -0
- examples/fastapi_integration_example.py +570 -0
- jarviscore/__init__.py +19 -5
- jarviscore/cli/smoketest.py +8 -4
- jarviscore/core/agent.py +227 -0
- jarviscore/core/mesh.py +9 -0
- jarviscore/data/examples/cloud_deployment_example.py +162 -0
- jarviscore/data/examples/custom_profile_decorator.py +134 -0
- jarviscore/data/examples/custom_profile_wrap.py +168 -0
- jarviscore/data/examples/customagent_cognitive_discovery_example.py +343 -0
- jarviscore/data/examples/fastapi_integration_example.py +570 -0
- jarviscore/docs/API_REFERENCE.md +283 -3
- jarviscore/docs/CHANGELOG.md +139 -0
- jarviscore/docs/CONFIGURATION.md +1 -1
- jarviscore/docs/CUSTOMAGENT_GUIDE.md +997 -85
- jarviscore/docs/GETTING_STARTED.md +228 -267
- jarviscore/docs/TROUBLESHOOTING.md +1 -1
- jarviscore/docs/USER_GUIDE.md +153 -8
- jarviscore/integrations/__init__.py +16 -0
- jarviscore/integrations/fastapi.py +247 -0
- jarviscore/p2p/broadcaster.py +10 -3
- jarviscore/p2p/coordinator.py +310 -14
- jarviscore/p2p/keepalive.py +45 -23
- jarviscore/p2p/peer_client.py +311 -12
- jarviscore/p2p/swim_manager.py +9 -4
- jarviscore/profiles/__init__.py +7 -1
- jarviscore/profiles/customagent.py +295 -74
- {jarviscore_framework-0.2.1.dist-info → jarviscore_framework-0.3.1.dist-info}/METADATA +66 -18
- {jarviscore_framework-0.2.1.dist-info → jarviscore_framework-0.3.1.dist-info}/RECORD +37 -22
- {jarviscore_framework-0.2.1.dist-info → jarviscore_framework-0.3.1.dist-info}/WHEEL +1 -1
- tests/test_13_dx_improvements.py +554 -0
- tests/test_14_cloud_deployment.py +403 -0
- tests/test_15_llm_cognitive_discovery.py +684 -0
- tests/test_16_unified_dx_flow.py +947 -0
- {jarviscore_framework-0.2.1.dist-info → jarviscore_framework-0.3.1.dist-info}/licenses/LICENSE +0 -0
- {jarviscore_framework-0.2.1.dist-info → jarviscore_framework-0.3.1.dist-info}/top_level.txt +0 -0
jarviscore/p2p/coordinator.py
CHANGED
|
@@ -63,6 +63,7 @@ class P2PCoordinator:
|
|
|
63
63
|
self._started = False
|
|
64
64
|
self._capability_map: Dict[str, List[str]] = {} # capability -> [agent_ids]
|
|
65
65
|
self._agent_peer_clients: Dict[str, Any] = {} # agent_id -> PeerClient
|
|
66
|
+
self._remote_agent_registry: Dict[str, Dict[str, Any]] = {} # agent_id -> agent info
|
|
66
67
|
|
|
67
68
|
async def start(self):
|
|
68
69
|
"""
|
|
@@ -139,7 +140,9 @@ class P2PCoordinator:
|
|
|
139
140
|
"STEP_COMPLETION_NUDGE_RESPONSE": self._handle_nudge_response,
|
|
140
141
|
"STEP_DATA_REQUEST": self._handle_data_request,
|
|
141
142
|
"CAPABILITY_ANNOUNCEMENT": self._handle_capability_announcement,
|
|
143
|
+
"CAPABILITY_DEANNOUNCEMENT": self._handle_capability_deannouncement,
|
|
142
144
|
"CAPABILITY_QUERY": self._handle_capability_query,
|
|
145
|
+
"CAPABILITY_REQUEST": self._handle_capability_request,
|
|
143
146
|
"P2P_KEEPALIVE": self.keepalive_manager.handle_keepalive_received,
|
|
144
147
|
"P2P_KEEPALIVE_ACK": self.keepalive_manager.handle_keepalive_ack,
|
|
145
148
|
# Peer-to-peer messaging (PeerClient)
|
|
@@ -157,34 +160,228 @@ class P2PCoordinator:
|
|
|
157
160
|
|
|
158
161
|
logger.info(f"Registered {len(message_types)} message handlers")
|
|
159
162
|
|
|
163
|
+
async def _wait_for_zmq_connections(self, timeout: float = 10.0) -> bool:
|
|
164
|
+
"""
|
|
165
|
+
Wait for ZMQ connections to alive SWIM members to be established.
|
|
166
|
+
|
|
167
|
+
This ensures we don't try to send messages before ZMQ is ready.
|
|
168
|
+
The ZMQ connection establishment happens asynchronously after
|
|
169
|
+
SWIM membership changes are detected.
|
|
170
|
+
|
|
171
|
+
Args:
|
|
172
|
+
timeout: Maximum time to wait in seconds
|
|
173
|
+
|
|
174
|
+
Returns:
|
|
175
|
+
True if connections are ready, False if timeout
|
|
176
|
+
"""
|
|
177
|
+
import asyncio
|
|
178
|
+
import time
|
|
179
|
+
|
|
180
|
+
if not self.swim_manager or not self.swim_manager.zmq_agent:
|
|
181
|
+
logger.warning("No ZMQ agent available")
|
|
182
|
+
return False
|
|
183
|
+
|
|
184
|
+
swim_node = self.swim_manager.swim_node
|
|
185
|
+
if not swim_node:
|
|
186
|
+
logger.warning("No SWIM node available")
|
|
187
|
+
return False
|
|
188
|
+
|
|
189
|
+
conn_mgr = self.swim_manager.zmq_agent.connection_manager
|
|
190
|
+
start_time = time.time()
|
|
191
|
+
|
|
192
|
+
while time.time() - start_time < timeout:
|
|
193
|
+
# Get alive members (excluding self)
|
|
194
|
+
alive_members = list(swim_node.members.get_alive_members(exclude_self=True))
|
|
195
|
+
|
|
196
|
+
if not alive_members:
|
|
197
|
+
# No peers to connect to - that's fine
|
|
198
|
+
logger.debug("No alive peers to wait for")
|
|
199
|
+
return True
|
|
200
|
+
|
|
201
|
+
# Check if all have ZMQ connections ready
|
|
202
|
+
all_ready = True
|
|
203
|
+
for member in alive_members:
|
|
204
|
+
swim_addr = str(member.address)
|
|
205
|
+
zmq_addr = conn_mgr.get_zmq_address_for_swim(swim_addr)
|
|
206
|
+
|
|
207
|
+
if zmq_addr and conn_mgr.can_send_to_node(zmq_addr):
|
|
208
|
+
logger.debug(f"ZMQ connection to {swim_addr} is ready")
|
|
209
|
+
else:
|
|
210
|
+
logger.debug(f"ZMQ connection to {swim_addr} not ready yet")
|
|
211
|
+
all_ready = False
|
|
212
|
+
break
|
|
213
|
+
|
|
214
|
+
if all_ready:
|
|
215
|
+
logger.info(f"All ZMQ connections ready ({len(alive_members)} peers)")
|
|
216
|
+
return True
|
|
217
|
+
|
|
218
|
+
# Wait a bit before checking again
|
|
219
|
+
await asyncio.sleep(0.2)
|
|
220
|
+
|
|
221
|
+
logger.warning(f"Timeout waiting for ZMQ connections after {timeout}s")
|
|
222
|
+
return False
|
|
223
|
+
|
|
160
224
|
async def announce_capabilities(self):
|
|
161
225
|
"""Broadcast agent capabilities to mesh."""
|
|
162
226
|
if not self._started:
|
|
163
227
|
raise RuntimeError("P2P Coordinator not started")
|
|
164
228
|
|
|
229
|
+
# Wait for ZMQ connections to be ready before announcing
|
|
230
|
+
await self._wait_for_zmq_connections(timeout=5.0)
|
|
231
|
+
|
|
165
232
|
capabilities = {}
|
|
233
|
+
agents_info = {} # Full agent info for remote registry
|
|
234
|
+
|
|
166
235
|
for agent in self.agents:
|
|
167
236
|
for cap in agent.capabilities:
|
|
168
237
|
if cap not in capabilities:
|
|
169
238
|
capabilities[cap] = []
|
|
170
239
|
capabilities[cap].append(agent.agent_id)
|
|
171
240
|
|
|
172
|
-
|
|
241
|
+
# Collect full agent info for remote visibility
|
|
242
|
+
agents_info[agent.agent_id] = {
|
|
243
|
+
'agent_id': agent.agent_id,
|
|
244
|
+
'role': agent.role,
|
|
245
|
+
'capabilities': list(agent.capabilities),
|
|
246
|
+
'description': getattr(agent, 'description', ''),
|
|
247
|
+
'node_id': self._get_node_id()
|
|
248
|
+
}
|
|
249
|
+
|
|
250
|
+
# Merge local capabilities into the map (preserve remote agents)
|
|
251
|
+
for cap, agent_ids in capabilities.items():
|
|
252
|
+
if cap not in self._capability_map:
|
|
253
|
+
self._capability_map[cap] = []
|
|
254
|
+
for agent_id in agent_ids:
|
|
255
|
+
if agent_id not in self._capability_map[cap]:
|
|
256
|
+
self._capability_map[cap].append(agent_id)
|
|
173
257
|
|
|
174
258
|
payload = {
|
|
175
259
|
'node_id': self._get_node_id(),
|
|
176
|
-
'capabilities': capabilities
|
|
260
|
+
'capabilities': capabilities,
|
|
261
|
+
'agents': agents_info # Include for remote agent registry
|
|
177
262
|
}
|
|
178
263
|
|
|
179
|
-
# Broadcast using
|
|
180
|
-
|
|
181
|
-
|
|
182
|
-
|
|
183
|
-
|
|
184
|
-
|
|
264
|
+
# Broadcast directly using CAPABILITY_ANNOUNCEMENT message type
|
|
265
|
+
# This ensures the handler updates the capability map
|
|
266
|
+
# Note: _send_p2p_message wraps in 'payload' key, so send payload directly
|
|
267
|
+
success_count = await self._broadcast_p2p_message(
|
|
268
|
+
'CAPABILITY_ANNOUNCEMENT',
|
|
269
|
+
payload
|
|
185
270
|
)
|
|
186
271
|
|
|
187
|
-
logger.info(f"Announced capabilities: {list(capabilities.keys())}")
|
|
272
|
+
logger.info(f"Announced capabilities to {success_count} peers: {list(capabilities.keys())}")
|
|
273
|
+
|
|
274
|
+
async def request_peer_capabilities(self):
|
|
275
|
+
"""
|
|
276
|
+
Request capabilities from all existing peers.
|
|
277
|
+
|
|
278
|
+
Called when joining an existing mesh to discover what agents/capabilities
|
|
279
|
+
already exist. This ensures late-joiners see existing agents.
|
|
280
|
+
"""
|
|
281
|
+
if not self._started or not self.swim_manager:
|
|
282
|
+
logger.warning("Cannot request capabilities - coordinator not started")
|
|
283
|
+
return
|
|
284
|
+
|
|
285
|
+
# Wait for ZMQ connections to be ready before requesting
|
|
286
|
+
await self._wait_for_zmq_connections(timeout=5.0)
|
|
287
|
+
|
|
288
|
+
# Get alive peers from SWIM
|
|
289
|
+
swim_node = self.swim_manager.swim_node
|
|
290
|
+
if not swim_node:
|
|
291
|
+
logger.warning("SWIM node not available")
|
|
292
|
+
return
|
|
293
|
+
|
|
294
|
+
try:
|
|
295
|
+
alive_members = list(swim_node.members.get_alive_members(exclude_self=True))
|
|
296
|
+
logger.info(f"Requesting capabilities from {len(alive_members)} peers")
|
|
297
|
+
|
|
298
|
+
for member in alive_members:
|
|
299
|
+
# member.address is already a string like "127.0.0.1:9905"
|
|
300
|
+
peer_addr = str(member.address)
|
|
301
|
+
try:
|
|
302
|
+
# Send capability request - peers should respond with their capabilities
|
|
303
|
+
await self._send_p2p_message(
|
|
304
|
+
peer_addr,
|
|
305
|
+
'CAPABILITY_REQUEST',
|
|
306
|
+
{'node_id': self._get_node_id()}
|
|
307
|
+
)
|
|
308
|
+
logger.debug(f"Sent capability request to {peer_addr}")
|
|
309
|
+
except Exception as e:
|
|
310
|
+
logger.debug(f"Failed to request capabilities from {peer_addr}: {e}")
|
|
311
|
+
|
|
312
|
+
except Exception as e:
|
|
313
|
+
logger.error(f"Error requesting peer capabilities: {e}")
|
|
314
|
+
|
|
315
|
+
async def _handle_capability_request(self, sender, message):
|
|
316
|
+
"""Handle capability request from a new joiner - respond with our capabilities."""
|
|
317
|
+
try:
|
|
318
|
+
# Get the SWIM ID of the sender from the message (not the ZMQ identity)
|
|
319
|
+
sender_swim_id = message.get('from_node')
|
|
320
|
+
if not sender_swim_id:
|
|
321
|
+
logger.warning(f"Capability request missing from_node, cannot respond")
|
|
322
|
+
return
|
|
323
|
+
|
|
324
|
+
# Re-announce our capabilities to this specific peer
|
|
325
|
+
capabilities = {}
|
|
326
|
+
agents_info = {}
|
|
327
|
+
|
|
328
|
+
for agent in self.agents:
|
|
329
|
+
for cap in agent.capabilities:
|
|
330
|
+
if cap not in capabilities:
|
|
331
|
+
capabilities[cap] = []
|
|
332
|
+
capabilities[cap].append(agent.agent_id)
|
|
333
|
+
|
|
334
|
+
agents_info[agent.agent_id] = {
|
|
335
|
+
'agent_id': agent.agent_id,
|
|
336
|
+
'role': agent.role,
|
|
337
|
+
'capabilities': list(agent.capabilities),
|
|
338
|
+
'description': getattr(agent, 'description', ''),
|
|
339
|
+
'node_id': self._get_node_id()
|
|
340
|
+
}
|
|
341
|
+
|
|
342
|
+
response = {
|
|
343
|
+
'node_id': self._get_node_id(),
|
|
344
|
+
'capabilities': capabilities,
|
|
345
|
+
'agents': agents_info
|
|
346
|
+
}
|
|
347
|
+
|
|
348
|
+
# Send to the SWIM address (from_node), not the ZMQ identity (sender)
|
|
349
|
+
await self._send_p2p_message(sender_swim_id, 'CAPABILITY_ANNOUNCEMENT', response)
|
|
350
|
+
logger.info(f"Sent capabilities to requesting peer {sender_swim_id}")
|
|
351
|
+
|
|
352
|
+
except Exception as e:
|
|
353
|
+
logger.error(f"Error handling capability request: {e}")
|
|
354
|
+
|
|
355
|
+
async def deannounce_capabilities(self):
|
|
356
|
+
"""
|
|
357
|
+
Broadcast capability removal to mesh.
|
|
358
|
+
|
|
359
|
+
Called when agent leaves mesh gracefully to notify other nodes
|
|
360
|
+
that this agent's capabilities are no longer available.
|
|
361
|
+
"""
|
|
362
|
+
import time
|
|
363
|
+
|
|
364
|
+
if not self._started or not self.swim_manager:
|
|
365
|
+
return
|
|
366
|
+
|
|
367
|
+
node_id = self._get_node_id()
|
|
368
|
+
|
|
369
|
+
capabilities = []
|
|
370
|
+
agent_ids = []
|
|
371
|
+
for agent in self.agents:
|
|
372
|
+
capabilities.extend(agent.capabilities)
|
|
373
|
+
agent_ids.append(agent.agent_id)
|
|
374
|
+
|
|
375
|
+
payload = {
|
|
376
|
+
'type': 'CAPABILITY_DEANNOUNCEMENT',
|
|
377
|
+
'node_id': node_id,
|
|
378
|
+
'capabilities': list(set(capabilities)),
|
|
379
|
+
'agent_ids': agent_ids,
|
|
380
|
+
'timestamp': time.time()
|
|
381
|
+
}
|
|
382
|
+
|
|
383
|
+
await self._broadcast_p2p_message("CAPABILITY_DEANNOUNCEMENT", payload)
|
|
384
|
+
logger.info(f"Deannounced capabilities: {capabilities}")
|
|
188
385
|
|
|
189
386
|
async def query_mesh(self, capability: str) -> List[str]:
|
|
190
387
|
"""
|
|
@@ -266,13 +463,21 @@ class P2PCoordinator:
|
|
|
266
463
|
logger.error("Cannot send P2P message: ZMQ agent not available")
|
|
267
464
|
return False
|
|
268
465
|
|
|
269
|
-
|
|
466
|
+
import json
|
|
467
|
+
payload_json = json.dumps(payload)
|
|
468
|
+
success = await self.swim_manager.zmq_agent.send_message_base(
|
|
469
|
+
target,
|
|
470
|
+
msg_type,
|
|
471
|
+
"payload",
|
|
472
|
+
payload_json,
|
|
473
|
+
f"p2p_{msg_type}"
|
|
474
|
+
)
|
|
270
475
|
|
|
271
476
|
# Record activity for keepalive suppression
|
|
272
477
|
if self.keepalive_manager:
|
|
273
478
|
self.keepalive_manager.record_p2p_activity()
|
|
274
479
|
|
|
275
|
-
return
|
|
480
|
+
return success
|
|
276
481
|
except Exception as e:
|
|
277
482
|
logger.error(f"Failed to send P2P message to {target}: {e}")
|
|
278
483
|
return False
|
|
@@ -338,10 +543,18 @@ class P2PCoordinator:
|
|
|
338
543
|
|
|
339
544
|
async def _handle_capability_announcement(self, sender, message):
|
|
340
545
|
"""Handle capability announcement from peer."""
|
|
546
|
+
import time
|
|
547
|
+
import json
|
|
548
|
+
|
|
341
549
|
try:
|
|
342
550
|
payload = message.get('payload', {})
|
|
551
|
+
# Handle both JSON string and dict payload
|
|
552
|
+
if isinstance(payload, str):
|
|
553
|
+
payload = json.loads(payload)
|
|
554
|
+
|
|
343
555
|
caps = payload.get('capabilities', {})
|
|
344
556
|
node_id = payload.get('node_id')
|
|
557
|
+
agents_info = payload.get('agents', {})
|
|
345
558
|
|
|
346
559
|
# Update local capability map
|
|
347
560
|
for cap, agents in caps.items():
|
|
@@ -352,20 +565,65 @@ class P2PCoordinator:
|
|
|
352
565
|
if agent_id not in self._capability_map[cap]:
|
|
353
566
|
self._capability_map[cap].append(agent_id)
|
|
354
567
|
|
|
355
|
-
|
|
568
|
+
# Update remote agent registry for visibility
|
|
569
|
+
for agent_id, info in agents_info.items():
|
|
570
|
+
self._remote_agent_registry[agent_id] = {
|
|
571
|
+
**info,
|
|
572
|
+
'node_id': node_id,
|
|
573
|
+
'last_seen': time.time()
|
|
574
|
+
}
|
|
575
|
+
|
|
576
|
+
logger.info(
|
|
577
|
+
f"Updated from {node_id}: caps={list(caps.keys())}, "
|
|
578
|
+
f"agents={list(agents_info.keys())}"
|
|
579
|
+
)
|
|
356
580
|
except Exception as e:
|
|
357
581
|
logger.error(f"Error handling capability announcement: {e}")
|
|
358
582
|
|
|
583
|
+
async def _handle_capability_deannouncement(self, sender, message):
|
|
584
|
+
"""Handle capability deannouncement from departing node."""
|
|
585
|
+
import json
|
|
586
|
+
try:
|
|
587
|
+
payload = message.get('payload', {})
|
|
588
|
+
if isinstance(payload, str):
|
|
589
|
+
payload = json.loads(payload)
|
|
590
|
+
node_id = payload.get('node_id')
|
|
591
|
+
agent_ids = payload.get('agent_ids', [])
|
|
592
|
+
|
|
593
|
+
# Remove from capability map
|
|
594
|
+
for cap in list(self._capability_map.keys()):
|
|
595
|
+
self._capability_map[cap] = [
|
|
596
|
+
a for a in self._capability_map[cap]
|
|
597
|
+
if a not in agent_ids
|
|
598
|
+
]
|
|
599
|
+
# Clean up empty capabilities
|
|
600
|
+
if not self._capability_map[cap]:
|
|
601
|
+
del self._capability_map[cap]
|
|
602
|
+
|
|
603
|
+
# Remove from remote agent registry
|
|
604
|
+
for agent_id in agent_ids:
|
|
605
|
+
self._remote_agent_registry.pop(agent_id, None)
|
|
606
|
+
|
|
607
|
+
logger.info(f"Node {node_id} departed, removed agents: {agent_ids}")
|
|
608
|
+
except Exception as e:
|
|
609
|
+
logger.error(f"Error handling capability deannouncement: {e}")
|
|
610
|
+
|
|
359
611
|
async def _handle_capability_query(self, sender, message):
|
|
360
612
|
"""Handle capability query from peer."""
|
|
361
613
|
try:
|
|
614
|
+
# Get the SWIM ID from the message (not the ZMQ identity)
|
|
615
|
+
sender_swim_id = message.get('from_node')
|
|
616
|
+
if not sender_swim_id:
|
|
617
|
+
logger.warning("Capability query missing from_node, cannot respond")
|
|
618
|
+
return
|
|
619
|
+
|
|
362
620
|
capability = message.get('capability')
|
|
363
621
|
response = {
|
|
364
622
|
'capability': capability,
|
|
365
623
|
'agents': self._capability_map.get(capability, [])
|
|
366
624
|
}
|
|
367
|
-
await self._send_p2p_message(
|
|
368
|
-
logger.debug(f"Responded to capability query from {
|
|
625
|
+
await self._send_p2p_message(sender_swim_id, 'CAPABILITY_QUERY_RESPONSE', response)
|
|
626
|
+
logger.debug(f"Responded to capability query from {sender_swim_id} for {capability}")
|
|
369
627
|
except Exception as e:
|
|
370
628
|
logger.error(f"Error handling capability query: {e}")
|
|
371
629
|
|
|
@@ -387,6 +645,44 @@ class P2PCoordinator:
|
|
|
387
645
|
self._agent_peer_clients.pop(agent_id, None)
|
|
388
646
|
logger.debug(f"Unregistered PeerClient for agent: {agent_id}")
|
|
389
647
|
|
|
648
|
+
def get_remote_agent(self, role_or_id: str) -> Optional[Dict[str, Any]]:
|
|
649
|
+
"""
|
|
650
|
+
Find a remote agent by role or agent ID.
|
|
651
|
+
|
|
652
|
+
Args:
|
|
653
|
+
role_or_id: Role name or agent_id to search for
|
|
654
|
+
|
|
655
|
+
Returns:
|
|
656
|
+
Agent info dict with node_id, or None if not found
|
|
657
|
+
|
|
658
|
+
Example:
|
|
659
|
+
info = coordinator.get_remote_agent("analyst")
|
|
660
|
+
if info:
|
|
661
|
+
print(f"Found analyst at {info['node_id']}")
|
|
662
|
+
"""
|
|
663
|
+
# Direct agent_id lookup
|
|
664
|
+
if role_or_id in self._remote_agent_registry:
|
|
665
|
+
return self._remote_agent_registry[role_or_id]
|
|
666
|
+
|
|
667
|
+
# Role lookup
|
|
668
|
+
for agent_id, info in self._remote_agent_registry.items():
|
|
669
|
+
if info.get('role') == role_or_id:
|
|
670
|
+
return {'agent_id': agent_id, **info}
|
|
671
|
+
|
|
672
|
+
return None
|
|
673
|
+
|
|
674
|
+
def list_remote_agents(self) -> List[Dict[str, Any]]:
|
|
675
|
+
"""
|
|
676
|
+
List all known remote agents.
|
|
677
|
+
|
|
678
|
+
Returns:
|
|
679
|
+
List of agent info dicts with agent_id, role, capabilities, node_id
|
|
680
|
+
"""
|
|
681
|
+
return [
|
|
682
|
+
{'agent_id': aid, **info}
|
|
683
|
+
for aid, info in self._remote_agent_registry.items()
|
|
684
|
+
]
|
|
685
|
+
|
|
390
686
|
async def _handle_peer_notify(self, sender, message):
|
|
391
687
|
"""Handle peer notification message."""
|
|
392
688
|
try:
|
jarviscore/p2p/keepalive.py
CHANGED
|
@@ -242,62 +242,84 @@ class P2PKeepaliveManager:
|
|
|
242
242
|
except Exception as e:
|
|
243
243
|
logger.error(f"P2P_KEEPALIVE ({self.agent_id}): Error sending keepalive: {e}")
|
|
244
244
|
|
|
245
|
-
async def handle_keepalive_received(self,
|
|
245
|
+
async def handle_keepalive_received(self, sender_zmq_id: str, message: Dict[str, Any]):
|
|
246
246
|
"""
|
|
247
247
|
Handle incoming keepalive message from peer.
|
|
248
|
-
|
|
248
|
+
|
|
249
249
|
Args:
|
|
250
|
-
|
|
251
|
-
|
|
250
|
+
sender_zmq_id: ZMQ identity of the sender (not used for response)
|
|
251
|
+
message: Full message dict containing 'from_node' with SWIM address
|
|
252
252
|
"""
|
|
253
253
|
try:
|
|
254
254
|
self.metrics.keepalives_received += 1
|
|
255
|
-
|
|
256
|
-
|
|
257
|
-
|
|
255
|
+
|
|
256
|
+
# Extract the SWIM address from the message (not the ZMQ identity)
|
|
257
|
+
sender_swim_id = message.get('from_node')
|
|
258
|
+
if not sender_swim_id:
|
|
259
|
+
logger.warning(f"P2P_KEEPALIVE ({self.agent_id}): Keepalive missing from_node, cannot ACK")
|
|
260
|
+
return
|
|
261
|
+
|
|
262
|
+
logger.debug(f"P2P_KEEPALIVE ({self.agent_id}): Received keepalive from {sender_swim_id}")
|
|
263
|
+
|
|
264
|
+
# Extract the nested payload for timestamp
|
|
265
|
+
payload = message.get('payload', {})
|
|
266
|
+
if isinstance(payload, str):
|
|
267
|
+
import json
|
|
268
|
+
payload = json.loads(payload)
|
|
269
|
+
|
|
270
|
+
# Send ACK back to sender using SWIM address
|
|
258
271
|
ack_payload = {
|
|
259
272
|
'agent_id': self.agent_id,
|
|
260
273
|
'timestamp': time.time(),
|
|
261
274
|
'original_timestamp': payload.get('timestamp')
|
|
262
275
|
}
|
|
263
|
-
|
|
276
|
+
|
|
264
277
|
# Send ACK using direct message (not broadcast)
|
|
265
278
|
if self.send_p2p_message:
|
|
266
|
-
success = await self.send_p2p_message(
|
|
279
|
+
success = await self.send_p2p_message(sender_swim_id, 'P2P_KEEPALIVE_ACK', ack_payload)
|
|
267
280
|
if success:
|
|
268
|
-
logger.debug(f"P2P_KEEPALIVE ({self.agent_id}): Sent ACK to {
|
|
281
|
+
logger.debug(f"P2P_KEEPALIVE ({self.agent_id}): Sent ACK to {sender_swim_id}")
|
|
269
282
|
else:
|
|
270
|
-
logger.warning(f"P2P_KEEPALIVE ({self.agent_id}): Failed to send ACK to {
|
|
271
|
-
|
|
283
|
+
logger.warning(f"P2P_KEEPALIVE ({self.agent_id}): Failed to send ACK to {sender_swim_id}")
|
|
284
|
+
|
|
272
285
|
except Exception as e:
|
|
273
286
|
logger.error(f"P2P_KEEPALIVE ({self.agent_id}): Error handling keepalive: {e}")
|
|
274
287
|
|
|
275
|
-
async def handle_keepalive_ack(self,
|
|
288
|
+
async def handle_keepalive_ack(self, sender_zmq_id: str, message: Dict[str, Any]):
|
|
276
289
|
"""
|
|
277
290
|
Handle incoming keepalive ACK from peer.
|
|
278
|
-
|
|
291
|
+
|
|
279
292
|
Args:
|
|
280
|
-
|
|
281
|
-
|
|
293
|
+
sender_zmq_id: ZMQ identity of the sender
|
|
294
|
+
message: Full message dict containing 'from_node' with SWIM address
|
|
282
295
|
"""
|
|
283
296
|
try:
|
|
284
297
|
self.metrics.acks_received += 1
|
|
285
298
|
current_time = time.time()
|
|
286
|
-
|
|
299
|
+
|
|
300
|
+
# Extract the SWIM address from the message
|
|
301
|
+
sender_swim_id = message.get('from_node', sender_zmq_id)
|
|
302
|
+
|
|
303
|
+
# Extract the nested payload
|
|
304
|
+
payload = message.get('payload', {})
|
|
305
|
+
if isinstance(payload, str):
|
|
306
|
+
import json
|
|
307
|
+
payload = json.loads(payload)
|
|
308
|
+
|
|
287
309
|
# Calculate latency if original timestamp available
|
|
288
310
|
original_timestamp = payload.get('original_timestamp')
|
|
289
311
|
if original_timestamp:
|
|
290
312
|
latency = current_time - original_timestamp
|
|
291
313
|
self.metrics.last_keepalive_latency = latency
|
|
292
|
-
logger.debug(f"P2P_KEEPALIVE ({self.agent_id}): ACK from {
|
|
314
|
+
logger.debug(f"P2P_KEEPALIVE ({self.agent_id}): ACK from {sender_swim_id}, "
|
|
293
315
|
f"latency={latency*1000:.1f}ms")
|
|
294
|
-
|
|
316
|
+
|
|
295
317
|
self.metrics.last_successful_keepalive = current_time
|
|
296
|
-
|
|
318
|
+
|
|
297
319
|
# Remove from pending if tracked
|
|
298
|
-
if
|
|
299
|
-
del self.pending_keepalives[
|
|
300
|
-
|
|
320
|
+
if sender_swim_id in self.pending_keepalives:
|
|
321
|
+
del self.pending_keepalives[sender_swim_id]
|
|
322
|
+
|
|
301
323
|
except Exception as e:
|
|
302
324
|
logger.error(f"P2P_KEEPALIVE ({self.agent_id}): Error handling ACK: {e}")
|
|
303
325
|
|