jarviscore-framework 0.2.1__py3-none-any.whl → 0.3.1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (37) hide show
  1. examples/cloud_deployment_example.py +162 -0
  2. examples/customagent_cognitive_discovery_example.py +343 -0
  3. examples/fastapi_integration_example.py +570 -0
  4. jarviscore/__init__.py +19 -5
  5. jarviscore/cli/smoketest.py +8 -4
  6. jarviscore/core/agent.py +227 -0
  7. jarviscore/core/mesh.py +9 -0
  8. jarviscore/data/examples/cloud_deployment_example.py +162 -0
  9. jarviscore/data/examples/custom_profile_decorator.py +134 -0
  10. jarviscore/data/examples/custom_profile_wrap.py +168 -0
  11. jarviscore/data/examples/customagent_cognitive_discovery_example.py +343 -0
  12. jarviscore/data/examples/fastapi_integration_example.py +570 -0
  13. jarviscore/docs/API_REFERENCE.md +283 -3
  14. jarviscore/docs/CHANGELOG.md +139 -0
  15. jarviscore/docs/CONFIGURATION.md +1 -1
  16. jarviscore/docs/CUSTOMAGENT_GUIDE.md +997 -85
  17. jarviscore/docs/GETTING_STARTED.md +228 -267
  18. jarviscore/docs/TROUBLESHOOTING.md +1 -1
  19. jarviscore/docs/USER_GUIDE.md +153 -8
  20. jarviscore/integrations/__init__.py +16 -0
  21. jarviscore/integrations/fastapi.py +247 -0
  22. jarviscore/p2p/broadcaster.py +10 -3
  23. jarviscore/p2p/coordinator.py +310 -14
  24. jarviscore/p2p/keepalive.py +45 -23
  25. jarviscore/p2p/peer_client.py +311 -12
  26. jarviscore/p2p/swim_manager.py +9 -4
  27. jarviscore/profiles/__init__.py +7 -1
  28. jarviscore/profiles/customagent.py +295 -74
  29. {jarviscore_framework-0.2.1.dist-info → jarviscore_framework-0.3.1.dist-info}/METADATA +66 -18
  30. {jarviscore_framework-0.2.1.dist-info → jarviscore_framework-0.3.1.dist-info}/RECORD +37 -22
  31. {jarviscore_framework-0.2.1.dist-info → jarviscore_framework-0.3.1.dist-info}/WHEEL +1 -1
  32. tests/test_13_dx_improvements.py +554 -0
  33. tests/test_14_cloud_deployment.py +403 -0
  34. tests/test_15_llm_cognitive_discovery.py +684 -0
  35. tests/test_16_unified_dx_flow.py +947 -0
  36. {jarviscore_framework-0.2.1.dist-info → jarviscore_framework-0.3.1.dist-info}/licenses/LICENSE +0 -0
  37. {jarviscore_framework-0.2.1.dist-info → jarviscore_framework-0.3.1.dist-info}/top_level.txt +0 -0
@@ -63,6 +63,7 @@ class P2PCoordinator:
63
63
  self._started = False
64
64
  self._capability_map: Dict[str, List[str]] = {} # capability -> [agent_ids]
65
65
  self._agent_peer_clients: Dict[str, Any] = {} # agent_id -> PeerClient
66
+ self._remote_agent_registry: Dict[str, Dict[str, Any]] = {} # agent_id -> agent info
66
67
 
67
68
  async def start(self):
68
69
  """
@@ -139,7 +140,9 @@ class P2PCoordinator:
139
140
  "STEP_COMPLETION_NUDGE_RESPONSE": self._handle_nudge_response,
140
141
  "STEP_DATA_REQUEST": self._handle_data_request,
141
142
  "CAPABILITY_ANNOUNCEMENT": self._handle_capability_announcement,
143
+ "CAPABILITY_DEANNOUNCEMENT": self._handle_capability_deannouncement,
142
144
  "CAPABILITY_QUERY": self._handle_capability_query,
145
+ "CAPABILITY_REQUEST": self._handle_capability_request,
143
146
  "P2P_KEEPALIVE": self.keepalive_manager.handle_keepalive_received,
144
147
  "P2P_KEEPALIVE_ACK": self.keepalive_manager.handle_keepalive_ack,
145
148
  # Peer-to-peer messaging (PeerClient)
@@ -157,34 +160,228 @@ class P2PCoordinator:
157
160
 
158
161
  logger.info(f"Registered {len(message_types)} message handlers")
159
162
 
163
+ async def _wait_for_zmq_connections(self, timeout: float = 10.0) -> bool:
164
+ """
165
+ Wait for ZMQ connections to alive SWIM members to be established.
166
+
167
+ This ensures we don't try to send messages before ZMQ is ready.
168
+ The ZMQ connection establishment happens asynchronously after
169
+ SWIM membership changes are detected.
170
+
171
+ Args:
172
+ timeout: Maximum time to wait in seconds
173
+
174
+ Returns:
175
+ True if connections are ready, False if timeout
176
+ """
177
+ import asyncio
178
+ import time
179
+
180
+ if not self.swim_manager or not self.swim_manager.zmq_agent:
181
+ logger.warning("No ZMQ agent available")
182
+ return False
183
+
184
+ swim_node = self.swim_manager.swim_node
185
+ if not swim_node:
186
+ logger.warning("No SWIM node available")
187
+ return False
188
+
189
+ conn_mgr = self.swim_manager.zmq_agent.connection_manager
190
+ start_time = time.time()
191
+
192
+ while time.time() - start_time < timeout:
193
+ # Get alive members (excluding self)
194
+ alive_members = list(swim_node.members.get_alive_members(exclude_self=True))
195
+
196
+ if not alive_members:
197
+ # No peers to connect to - that's fine
198
+ logger.debug("No alive peers to wait for")
199
+ return True
200
+
201
+ # Check if all have ZMQ connections ready
202
+ all_ready = True
203
+ for member in alive_members:
204
+ swim_addr = str(member.address)
205
+ zmq_addr = conn_mgr.get_zmq_address_for_swim(swim_addr)
206
+
207
+ if zmq_addr and conn_mgr.can_send_to_node(zmq_addr):
208
+ logger.debug(f"ZMQ connection to {swim_addr} is ready")
209
+ else:
210
+ logger.debug(f"ZMQ connection to {swim_addr} not ready yet")
211
+ all_ready = False
212
+ break
213
+
214
+ if all_ready:
215
+ logger.info(f"All ZMQ connections ready ({len(alive_members)} peers)")
216
+ return True
217
+
218
+ # Wait a bit before checking again
219
+ await asyncio.sleep(0.2)
220
+
221
+ logger.warning(f"Timeout waiting for ZMQ connections after {timeout}s")
222
+ return False
223
+
160
224
  async def announce_capabilities(self):
161
225
  """Broadcast agent capabilities to mesh."""
162
226
  if not self._started:
163
227
  raise RuntimeError("P2P Coordinator not started")
164
228
 
229
+ # Wait for ZMQ connections to be ready before announcing
230
+ await self._wait_for_zmq_connections(timeout=5.0)
231
+
165
232
  capabilities = {}
233
+ agents_info = {} # Full agent info for remote registry
234
+
166
235
  for agent in self.agents:
167
236
  for cap in agent.capabilities:
168
237
  if cap not in capabilities:
169
238
  capabilities[cap] = []
170
239
  capabilities[cap].append(agent.agent_id)
171
240
 
172
- self._capability_map = capabilities
241
+ # Collect full agent info for remote visibility
242
+ agents_info[agent.agent_id] = {
243
+ 'agent_id': agent.agent_id,
244
+ 'role': agent.role,
245
+ 'capabilities': list(agent.capabilities),
246
+ 'description': getattr(agent, 'description', ''),
247
+ 'node_id': self._get_node_id()
248
+ }
249
+
250
+ # Merge local capabilities into the map (preserve remote agents)
251
+ for cap, agent_ids in capabilities.items():
252
+ if cap not in self._capability_map:
253
+ self._capability_map[cap] = []
254
+ for agent_id in agent_ids:
255
+ if agent_id not in self._capability_map[cap]:
256
+ self._capability_map[cap].append(agent_id)
173
257
 
174
258
  payload = {
175
259
  'node_id': self._get_node_id(),
176
- 'capabilities': capabilities
260
+ 'capabilities': capabilities,
261
+ 'agents': agents_info # Include for remote agent registry
177
262
  }
178
263
 
179
- # Broadcast using the broadcaster
180
- await self.broadcaster.broadcast_step_result(
181
- step_id='capability_announcement',
182
- workflow_id='system',
183
- output_data=payload,
184
- status='success'
264
+ # Broadcast directly using CAPABILITY_ANNOUNCEMENT message type
265
+ # This ensures the handler updates the capability map
266
+ # Note: _send_p2p_message wraps in 'payload' key, so send payload directly
267
+ success_count = await self._broadcast_p2p_message(
268
+ 'CAPABILITY_ANNOUNCEMENT',
269
+ payload
185
270
  )
186
271
 
187
- logger.info(f"Announced capabilities: {list(capabilities.keys())}")
272
+ logger.info(f"Announced capabilities to {success_count} peers: {list(capabilities.keys())}")
273
+
274
+ async def request_peer_capabilities(self):
275
+ """
276
+ Request capabilities from all existing peers.
277
+
278
+ Called when joining an existing mesh to discover what agents/capabilities
279
+ already exist. This ensures late-joiners see existing agents.
280
+ """
281
+ if not self._started or not self.swim_manager:
282
+ logger.warning("Cannot request capabilities - coordinator not started")
283
+ return
284
+
285
+ # Wait for ZMQ connections to be ready before requesting
286
+ await self._wait_for_zmq_connections(timeout=5.0)
287
+
288
+ # Get alive peers from SWIM
289
+ swim_node = self.swim_manager.swim_node
290
+ if not swim_node:
291
+ logger.warning("SWIM node not available")
292
+ return
293
+
294
+ try:
295
+ alive_members = list(swim_node.members.get_alive_members(exclude_self=True))
296
+ logger.info(f"Requesting capabilities from {len(alive_members)} peers")
297
+
298
+ for member in alive_members:
299
+ # member.address is already a string like "127.0.0.1:9905"
300
+ peer_addr = str(member.address)
301
+ try:
302
+ # Send capability request - peers should respond with their capabilities
303
+ await self._send_p2p_message(
304
+ peer_addr,
305
+ 'CAPABILITY_REQUEST',
306
+ {'node_id': self._get_node_id()}
307
+ )
308
+ logger.debug(f"Sent capability request to {peer_addr}")
309
+ except Exception as e:
310
+ logger.debug(f"Failed to request capabilities from {peer_addr}: {e}")
311
+
312
+ except Exception as e:
313
+ logger.error(f"Error requesting peer capabilities: {e}")
314
+
315
+ async def _handle_capability_request(self, sender, message):
316
+ """Handle capability request from a new joiner - respond with our capabilities."""
317
+ try:
318
+ # Get the SWIM ID of the sender from the message (not the ZMQ identity)
319
+ sender_swim_id = message.get('from_node')
320
+ if not sender_swim_id:
321
+ logger.warning(f"Capability request missing from_node, cannot respond")
322
+ return
323
+
324
+ # Re-announce our capabilities to this specific peer
325
+ capabilities = {}
326
+ agents_info = {}
327
+
328
+ for agent in self.agents:
329
+ for cap in agent.capabilities:
330
+ if cap not in capabilities:
331
+ capabilities[cap] = []
332
+ capabilities[cap].append(agent.agent_id)
333
+
334
+ agents_info[agent.agent_id] = {
335
+ 'agent_id': agent.agent_id,
336
+ 'role': agent.role,
337
+ 'capabilities': list(agent.capabilities),
338
+ 'description': getattr(agent, 'description', ''),
339
+ 'node_id': self._get_node_id()
340
+ }
341
+
342
+ response = {
343
+ 'node_id': self._get_node_id(),
344
+ 'capabilities': capabilities,
345
+ 'agents': agents_info
346
+ }
347
+
348
+ # Send to the SWIM address (from_node), not the ZMQ identity (sender)
349
+ await self._send_p2p_message(sender_swim_id, 'CAPABILITY_ANNOUNCEMENT', response)
350
+ logger.info(f"Sent capabilities to requesting peer {sender_swim_id}")
351
+
352
+ except Exception as e:
353
+ logger.error(f"Error handling capability request: {e}")
354
+
355
+ async def deannounce_capabilities(self):
356
+ """
357
+ Broadcast capability removal to mesh.
358
+
359
+ Called when agent leaves mesh gracefully to notify other nodes
360
+ that this agent's capabilities are no longer available.
361
+ """
362
+ import time
363
+
364
+ if not self._started or not self.swim_manager:
365
+ return
366
+
367
+ node_id = self._get_node_id()
368
+
369
+ capabilities = []
370
+ agent_ids = []
371
+ for agent in self.agents:
372
+ capabilities.extend(agent.capabilities)
373
+ agent_ids.append(agent.agent_id)
374
+
375
+ payload = {
376
+ 'type': 'CAPABILITY_DEANNOUNCEMENT',
377
+ 'node_id': node_id,
378
+ 'capabilities': list(set(capabilities)),
379
+ 'agent_ids': agent_ids,
380
+ 'timestamp': time.time()
381
+ }
382
+
383
+ await self._broadcast_p2p_message("CAPABILITY_DEANNOUNCEMENT", payload)
384
+ logger.info(f"Deannounced capabilities: {capabilities}")
188
385
 
189
386
  async def query_mesh(self, capability: str) -> List[str]:
190
387
  """
@@ -266,13 +463,21 @@ class P2PCoordinator:
266
463
  logger.error("Cannot send P2P message: ZMQ agent not available")
267
464
  return False
268
465
 
269
- await self.swim_manager.zmq_agent.send_message(target, msg_type, payload)
466
+ import json
467
+ payload_json = json.dumps(payload)
468
+ success = await self.swim_manager.zmq_agent.send_message_base(
469
+ target,
470
+ msg_type,
471
+ "payload",
472
+ payload_json,
473
+ f"p2p_{msg_type}"
474
+ )
270
475
 
271
476
  # Record activity for keepalive suppression
272
477
  if self.keepalive_manager:
273
478
  self.keepalive_manager.record_p2p_activity()
274
479
 
275
- return True
480
+ return success
276
481
  except Exception as e:
277
482
  logger.error(f"Failed to send P2P message to {target}: {e}")
278
483
  return False
@@ -338,10 +543,18 @@ class P2PCoordinator:
338
543
 
339
544
  async def _handle_capability_announcement(self, sender, message):
340
545
  """Handle capability announcement from peer."""
546
+ import time
547
+ import json
548
+
341
549
  try:
342
550
  payload = message.get('payload', {})
551
+ # Handle both JSON string and dict payload
552
+ if isinstance(payload, str):
553
+ payload = json.loads(payload)
554
+
343
555
  caps = payload.get('capabilities', {})
344
556
  node_id = payload.get('node_id')
557
+ agents_info = payload.get('agents', {})
345
558
 
346
559
  # Update local capability map
347
560
  for cap, agents in caps.items():
@@ -352,20 +565,65 @@ class P2PCoordinator:
352
565
  if agent_id not in self._capability_map[cap]:
353
566
  self._capability_map[cap].append(agent_id)
354
567
 
355
- logger.info(f"Updated capabilities from {node_id}: {list(caps.keys())}")
568
+ # Update remote agent registry for visibility
569
+ for agent_id, info in agents_info.items():
570
+ self._remote_agent_registry[agent_id] = {
571
+ **info,
572
+ 'node_id': node_id,
573
+ 'last_seen': time.time()
574
+ }
575
+
576
+ logger.info(
577
+ f"Updated from {node_id}: caps={list(caps.keys())}, "
578
+ f"agents={list(agents_info.keys())}"
579
+ )
356
580
  except Exception as e:
357
581
  logger.error(f"Error handling capability announcement: {e}")
358
582
 
583
+ async def _handle_capability_deannouncement(self, sender, message):
584
+ """Handle capability deannouncement from departing node."""
585
+ import json
586
+ try:
587
+ payload = message.get('payload', {})
588
+ if isinstance(payload, str):
589
+ payload = json.loads(payload)
590
+ node_id = payload.get('node_id')
591
+ agent_ids = payload.get('agent_ids', [])
592
+
593
+ # Remove from capability map
594
+ for cap in list(self._capability_map.keys()):
595
+ self._capability_map[cap] = [
596
+ a for a in self._capability_map[cap]
597
+ if a not in agent_ids
598
+ ]
599
+ # Clean up empty capabilities
600
+ if not self._capability_map[cap]:
601
+ del self._capability_map[cap]
602
+
603
+ # Remove from remote agent registry
604
+ for agent_id in agent_ids:
605
+ self._remote_agent_registry.pop(agent_id, None)
606
+
607
+ logger.info(f"Node {node_id} departed, removed agents: {agent_ids}")
608
+ except Exception as e:
609
+ logger.error(f"Error handling capability deannouncement: {e}")
610
+
359
611
  async def _handle_capability_query(self, sender, message):
360
612
  """Handle capability query from peer."""
361
613
  try:
614
+ # Get the SWIM ID from the message (not the ZMQ identity)
615
+ sender_swim_id = message.get('from_node')
616
+ if not sender_swim_id:
617
+ logger.warning("Capability query missing from_node, cannot respond")
618
+ return
619
+
362
620
  capability = message.get('capability')
363
621
  response = {
364
622
  'capability': capability,
365
623
  'agents': self._capability_map.get(capability, [])
366
624
  }
367
- await self._send_p2p_message(sender, 'CAPABILITY_QUERY_RESPONSE', response)
368
- logger.debug(f"Responded to capability query from {sender} for {capability}")
625
+ await self._send_p2p_message(sender_swim_id, 'CAPABILITY_QUERY_RESPONSE', response)
626
+ logger.debug(f"Responded to capability query from {sender_swim_id} for {capability}")
369
627
  except Exception as e:
370
628
  logger.error(f"Error handling capability query: {e}")
371
629
 
@@ -387,6 +645,44 @@ class P2PCoordinator:
387
645
  self._agent_peer_clients.pop(agent_id, None)
388
646
  logger.debug(f"Unregistered PeerClient for agent: {agent_id}")
389
647
 
648
+ def get_remote_agent(self, role_or_id: str) -> Optional[Dict[str, Any]]:
649
+ """
650
+ Find a remote agent by role or agent ID.
651
+
652
+ Args:
653
+ role_or_id: Role name or agent_id to search for
654
+
655
+ Returns:
656
+ Agent info dict with node_id, or None if not found
657
+
658
+ Example:
659
+ info = coordinator.get_remote_agent("analyst")
660
+ if info:
661
+ print(f"Found analyst at {info['node_id']}")
662
+ """
663
+ # Direct agent_id lookup
664
+ if role_or_id in self._remote_agent_registry:
665
+ return self._remote_agent_registry[role_or_id]
666
+
667
+ # Role lookup
668
+ for agent_id, info in self._remote_agent_registry.items():
669
+ if info.get('role') == role_or_id:
670
+ return {'agent_id': agent_id, **info}
671
+
672
+ return None
673
+
674
+ def list_remote_agents(self) -> List[Dict[str, Any]]:
675
+ """
676
+ List all known remote agents.
677
+
678
+ Returns:
679
+ List of agent info dicts with agent_id, role, capabilities, node_id
680
+ """
681
+ return [
682
+ {'agent_id': aid, **info}
683
+ for aid, info in self._remote_agent_registry.items()
684
+ ]
685
+
390
686
  async def _handle_peer_notify(self, sender, message):
391
687
  """Handle peer notification message."""
392
688
  try:
@@ -242,62 +242,84 @@ class P2PKeepaliveManager:
242
242
  except Exception as e:
243
243
  logger.error(f"P2P_KEEPALIVE ({self.agent_id}): Error sending keepalive: {e}")
244
244
 
245
- async def handle_keepalive_received(self, sender_id: str, payload: Dict[str, Any]):
245
+ async def handle_keepalive_received(self, sender_zmq_id: str, message: Dict[str, Any]):
246
246
  """
247
247
  Handle incoming keepalive message from peer.
248
-
248
+
249
249
  Args:
250
- sender_id: ID of the peer that sent keepalive
251
- payload: Keepalive message payload
250
+ sender_zmq_id: ZMQ identity of the sender (not used for response)
251
+ message: Full message dict containing 'from_node' with SWIM address
252
252
  """
253
253
  try:
254
254
  self.metrics.keepalives_received += 1
255
- logger.debug(f"P2P_KEEPALIVE ({self.agent_id}): Received keepalive from {sender_id}")
256
-
257
- # Send ACK back to sender
255
+
256
+ # Extract the SWIM address from the message (not the ZMQ identity)
257
+ sender_swim_id = message.get('from_node')
258
+ if not sender_swim_id:
259
+ logger.warning(f"P2P_KEEPALIVE ({self.agent_id}): Keepalive missing from_node, cannot ACK")
260
+ return
261
+
262
+ logger.debug(f"P2P_KEEPALIVE ({self.agent_id}): Received keepalive from {sender_swim_id}")
263
+
264
+ # Extract the nested payload for timestamp
265
+ payload = message.get('payload', {})
266
+ if isinstance(payload, str):
267
+ import json
268
+ payload = json.loads(payload)
269
+
270
+ # Send ACK back to sender using SWIM address
258
271
  ack_payload = {
259
272
  'agent_id': self.agent_id,
260
273
  'timestamp': time.time(),
261
274
  'original_timestamp': payload.get('timestamp')
262
275
  }
263
-
276
+
264
277
  # Send ACK using direct message (not broadcast)
265
278
  if self.send_p2p_message:
266
- success = await self.send_p2p_message(sender_id, 'P2P_KEEPALIVE_ACK', ack_payload)
279
+ success = await self.send_p2p_message(sender_swim_id, 'P2P_KEEPALIVE_ACK', ack_payload)
267
280
  if success:
268
- logger.debug(f"P2P_KEEPALIVE ({self.agent_id}): Sent ACK to {sender_id}")
281
+ logger.debug(f"P2P_KEEPALIVE ({self.agent_id}): Sent ACK to {sender_swim_id}")
269
282
  else:
270
- logger.warning(f"P2P_KEEPALIVE ({self.agent_id}): Failed to send ACK to {sender_id}")
271
-
283
+ logger.warning(f"P2P_KEEPALIVE ({self.agent_id}): Failed to send ACK to {sender_swim_id}")
284
+
272
285
  except Exception as e:
273
286
  logger.error(f"P2P_KEEPALIVE ({self.agent_id}): Error handling keepalive: {e}")
274
287
 
275
- async def handle_keepalive_ack(self, sender_id: str, payload: Dict[str, Any]):
288
+ async def handle_keepalive_ack(self, sender_zmq_id: str, message: Dict[str, Any]):
276
289
  """
277
290
  Handle incoming keepalive ACK from peer.
278
-
291
+
279
292
  Args:
280
- sender_id: ID of the peer that sent ACK
281
- payload: ACK message payload
293
+ sender_zmq_id: ZMQ identity of the sender
294
+ message: Full message dict containing 'from_node' with SWIM address
282
295
  """
283
296
  try:
284
297
  self.metrics.acks_received += 1
285
298
  current_time = time.time()
286
-
299
+
300
+ # Extract the SWIM address from the message
301
+ sender_swim_id = message.get('from_node', sender_zmq_id)
302
+
303
+ # Extract the nested payload
304
+ payload = message.get('payload', {})
305
+ if isinstance(payload, str):
306
+ import json
307
+ payload = json.loads(payload)
308
+
287
309
  # Calculate latency if original timestamp available
288
310
  original_timestamp = payload.get('original_timestamp')
289
311
  if original_timestamp:
290
312
  latency = current_time - original_timestamp
291
313
  self.metrics.last_keepalive_latency = latency
292
- logger.debug(f"P2P_KEEPALIVE ({self.agent_id}): ACK from {sender_id}, "
314
+ logger.debug(f"P2P_KEEPALIVE ({self.agent_id}): ACK from {sender_swim_id}, "
293
315
  f"latency={latency*1000:.1f}ms")
294
-
316
+
295
317
  self.metrics.last_successful_keepalive = current_time
296
-
318
+
297
319
  # Remove from pending if tracked
298
- if sender_id in self.pending_keepalives:
299
- del self.pending_keepalives[sender_id]
300
-
320
+ if sender_swim_id in self.pending_keepalives:
321
+ del self.pending_keepalives[sender_swim_id]
322
+
301
323
  except Exception as e:
302
324
  logger.error(f"P2P_KEEPALIVE ({self.agent_id}): Error handling ACK: {e}")
303
325