jarviscore-framework 0.1.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (55) hide show
  1. examples/calculator_agent_example.py +77 -0
  2. examples/multi_agent_workflow.py +132 -0
  3. examples/research_agent_example.py +76 -0
  4. jarviscore/__init__.py +54 -0
  5. jarviscore/cli/__init__.py +7 -0
  6. jarviscore/cli/__main__.py +33 -0
  7. jarviscore/cli/check.py +404 -0
  8. jarviscore/cli/smoketest.py +371 -0
  9. jarviscore/config/__init__.py +7 -0
  10. jarviscore/config/settings.py +128 -0
  11. jarviscore/core/__init__.py +7 -0
  12. jarviscore/core/agent.py +163 -0
  13. jarviscore/core/mesh.py +463 -0
  14. jarviscore/core/profile.py +64 -0
  15. jarviscore/docs/API_REFERENCE.md +932 -0
  16. jarviscore/docs/CONFIGURATION.md +753 -0
  17. jarviscore/docs/GETTING_STARTED.md +600 -0
  18. jarviscore/docs/TROUBLESHOOTING.md +424 -0
  19. jarviscore/docs/USER_GUIDE.md +983 -0
  20. jarviscore/execution/__init__.py +94 -0
  21. jarviscore/execution/code_registry.py +298 -0
  22. jarviscore/execution/generator.py +268 -0
  23. jarviscore/execution/llm.py +430 -0
  24. jarviscore/execution/repair.py +283 -0
  25. jarviscore/execution/result_handler.py +332 -0
  26. jarviscore/execution/sandbox.py +555 -0
  27. jarviscore/execution/search.py +281 -0
  28. jarviscore/orchestration/__init__.py +18 -0
  29. jarviscore/orchestration/claimer.py +101 -0
  30. jarviscore/orchestration/dependency.py +143 -0
  31. jarviscore/orchestration/engine.py +292 -0
  32. jarviscore/orchestration/status.py +96 -0
  33. jarviscore/p2p/__init__.py +23 -0
  34. jarviscore/p2p/broadcaster.py +353 -0
  35. jarviscore/p2p/coordinator.py +364 -0
  36. jarviscore/p2p/keepalive.py +361 -0
  37. jarviscore/p2p/swim_manager.py +290 -0
  38. jarviscore/profiles/__init__.py +6 -0
  39. jarviscore/profiles/autoagent.py +264 -0
  40. jarviscore/profiles/customagent.py +137 -0
  41. jarviscore_framework-0.1.0.dist-info/METADATA +136 -0
  42. jarviscore_framework-0.1.0.dist-info/RECORD +55 -0
  43. jarviscore_framework-0.1.0.dist-info/WHEEL +5 -0
  44. jarviscore_framework-0.1.0.dist-info/licenses/LICENSE +21 -0
  45. jarviscore_framework-0.1.0.dist-info/top_level.txt +3 -0
  46. tests/conftest.py +44 -0
  47. tests/test_agent.py +165 -0
  48. tests/test_autoagent.py +140 -0
  49. tests/test_autoagent_day4.py +186 -0
  50. tests/test_customagent.py +248 -0
  51. tests/test_integration.py +293 -0
  52. tests/test_llm_fallback.py +185 -0
  53. tests/test_mesh.py +356 -0
  54. tests/test_p2p_integration.py +375 -0
  55. tests/test_remote_sandbox.py +116 -0
@@ -0,0 +1,361 @@
1
+ """
2
+ P2P Keepalive Manager for maintaining active ZMQ connections in agent mesh.
3
+
4
+ Prevents idle connection closure by sending periodic keepalive messages
5
+ while intelligently suppressing when real workflow traffic exists.
6
+ """
7
+
8
+ import asyncio
9
+ import time
10
+ import logging
11
+ from typing import Optional, Dict, Any, Callable
12
+ from dataclasses import dataclass
13
+ from enum import Enum
14
+
15
+ logger = logging.getLogger(__name__)
16
+
17
+
18
+ class CircuitState(Enum):
19
+ """Circuit breaker states for connection health."""
20
+ CLOSED = "CLOSED"
21
+ HALF_OPEN = "HALF_OPEN"
22
+ OPEN = "OPEN"
23
+ UNKNOWN = "UNKNOWN"
24
+
25
+
26
+ @dataclass
27
+ class KeepaliveMetrics:
28
+ """Metrics for P2P keepalive health monitoring."""
29
+ keepalives_sent: int = 0
30
+ keepalives_received: int = 0
31
+ acks_received: int = 0
32
+ timeouts: int = 0
33
+ suppressed_count: int = 0
34
+ last_successful_keepalive: float = 0.0
35
+ last_keepalive_latency: float = 0.0
36
+ circuit_breaker_events: int = 0
37
+
38
+
39
+ class P2PKeepaliveManager:
40
+ """
41
+ Manages P2P keepalive messages to prevent ZMQ connection idle closure.
42
+
43
+ Features:
44
+ - Periodic keepalive with configurable interval
45
+ - Smart suppression when recent workflow traffic exists
46
+ - Circuit breaker integration for adaptive behavior
47
+ - Health metrics and observability
48
+ - Bidirectional keepalive + ACK pattern
49
+ """
50
+
51
+ def __init__(
52
+ self,
53
+ agent_id: str,
54
+ send_p2p_callback: Callable[[str, str, Dict[str, Any]], bool],
55
+ broadcast_p2p_callback: Optional[Callable[[str, Dict[str, Any]], int]] = None,
56
+ config: Optional[Dict[str, Any]] = None
57
+ ):
58
+ """
59
+ Initialize P2P Keepalive Manager.
60
+
61
+ Args:
62
+ agent_id: Unique identifier for this agent
63
+ send_p2p_callback: Function to send P2P message to specific peer
64
+ broadcast_p2p_callback: Optional function to broadcast to all peers
65
+ config: Configuration dictionary
66
+ """
67
+ self.agent_id = agent_id
68
+ self.send_p2p_message = send_p2p_callback
69
+ self.broadcast_p2p_message = broadcast_p2p_callback
70
+
71
+ # Configuration with production defaults
72
+ config = config or {}
73
+ self.enabled = config.get('P2P_KEEPALIVE_ENABLED', True)
74
+ self.interval = config.get('P2P_KEEPALIVE_INTERVAL', 90) # 90s default
75
+ self.timeout = config.get('P2P_KEEPALIVE_TIMEOUT', 10) # 10s timeout
76
+ self.activity_suppress_window = config.get('P2P_ACTIVITY_SUPPRESS_WINDOW', 60) # 60s
77
+ self.circuit_half_open_interval = config.get('P2P_CIRCUIT_HALF_OPEN_INTERVAL', 30) # 30s aggressive
78
+
79
+ # State tracking
80
+ self.last_p2p_activity = time.time() # Track any P2P activity
81
+ self.last_keepalive_sent = 0.0
82
+ self.pending_keepalives: Dict[str, float] = {} # peer_id -> sent_time
83
+ self.circuit_state = CircuitState.UNKNOWN
84
+
85
+ # Metrics
86
+ self.metrics = KeepaliveMetrics()
87
+
88
+ # Control
89
+ self._running = False
90
+ self._keepalive_task: Optional[asyncio.Task] = None
91
+
92
+ logger.info(f"P2P_KEEPALIVE ({self.agent_id}): Initialized with interval={self.interval}s, "
93
+ f"suppress_window={self.activity_suppress_window}s, enabled={self.enabled}")
94
+
95
+ async def start(self):
96
+ """Start the keepalive loop."""
97
+ if not self.enabled:
98
+ logger.info(f"P2P_KEEPALIVE ({self.agent_id}): Disabled by configuration")
99
+ return
100
+
101
+ if self._running:
102
+ logger.warning(f"P2P_KEEPALIVE ({self.agent_id}): Already running")
103
+ return
104
+
105
+ self._running = True
106
+ self._keepalive_task = asyncio.create_task(self._keepalive_loop())
107
+ logger.info(f"P2P_KEEPALIVE ({self.agent_id}): Started keepalive loop")
108
+
109
+ async def stop(self):
110
+ """Stop the keepalive loop."""
111
+ self._running = False
112
+
113
+ if self._keepalive_task:
114
+ self._keepalive_task.cancel()
115
+ try:
116
+ await self._keepalive_task
117
+ except asyncio.CancelledError:
118
+ pass
119
+
120
+ logger.info(f"P2P_KEEPALIVE ({self.agent_id}): Stopped keepalive loop")
121
+
122
+ def record_p2p_activity(self):
123
+ """
124
+ Record that P2P activity occurred (workflow message, nudge, broadcast).
125
+ Used for smart suppression of keepalives.
126
+ """
127
+ self.last_p2p_activity = time.time()
128
+
129
+ def update_circuit_state(self, state: CircuitState):
130
+ """
131
+ Update circuit breaker state for adaptive keepalive behavior.
132
+
133
+ Args:
134
+ state: Current circuit breaker state
135
+ """
136
+ if state != self.circuit_state:
137
+ logger.info(f"P2P_KEEPALIVE ({self.agent_id}): Circuit state changed: "
138
+ f"{self.circuit_state.value} -> {state.value}")
139
+ self.circuit_state = state
140
+ self.metrics.circuit_breaker_events += 1
141
+
142
+ def _should_send_keepalive(self) -> bool:
143
+ """
144
+ Determine if keepalive should be sent based on smart suppression logic.
145
+
146
+ Returns:
147
+ True if keepalive should be sent, False if suppressed
148
+ """
149
+ current_time = time.time()
150
+
151
+ # Check if recent P2P activity exists
152
+ time_since_activity = current_time - self.last_p2p_activity
153
+ if time_since_activity < self.activity_suppress_window:
154
+ logger.debug(f"P2P_KEEPALIVE ({self.agent_id}): Suppressed - recent activity "
155
+ f"{time_since_activity:.1f}s ago")
156
+ self.metrics.suppressed_count += 1
157
+ return False
158
+
159
+ # Check interval based on circuit state
160
+ interval = self._get_adaptive_interval()
161
+ time_since_last_keepalive = current_time - self.last_keepalive_sent
162
+
163
+ if time_since_last_keepalive < interval:
164
+ return False
165
+
166
+ return True
167
+
168
+ def _get_adaptive_interval(self) -> float:
169
+ """
170
+ Get adaptive keepalive interval based on circuit breaker state.
171
+
172
+ Returns:
173
+ Keepalive interval in seconds
174
+ """
175
+ if self.circuit_state == CircuitState.HALF_OPEN:
176
+ # Aggressive keepalives to help circuit recovery
177
+ return self.circuit_half_open_interval
178
+ elif self.circuit_state == CircuitState.OPEN:
179
+ # Try to trigger recovery probes
180
+ return self.circuit_half_open_interval
181
+ else:
182
+ # Normal operation
183
+ return self.interval
184
+
185
+ async def _keepalive_loop(self):
186
+ """Main keepalive loop with adaptive timing."""
187
+ logger.info(f"P2P_KEEPALIVE ({self.agent_id}): Keepalive loop started")
188
+
189
+ # Initial delay to allow P2P mesh to stabilize
190
+ await asyncio.sleep(30)
191
+
192
+ while self._running:
193
+ try:
194
+ if self._should_send_keepalive():
195
+ await self._send_keepalive()
196
+
197
+ # Check for keepalive timeouts
198
+ await self._check_timeouts()
199
+
200
+ # Log metrics periodically
201
+ await self._log_metrics()
202
+
203
+ # Sleep with adaptive interval
204
+ await asyncio.sleep(10) # Check every 10s, send based on interval
205
+
206
+ except Exception as e:
207
+ logger.error(f"P2P_KEEPALIVE ({self.agent_id}): Error in keepalive loop: {e}",
208
+ exc_info=True)
209
+ await asyncio.sleep(30) # Back off on error
210
+
211
+ logger.info(f"P2P_KEEPALIVE ({self.agent_id}): Keepalive loop stopped")
212
+
213
+ async def _send_keepalive(self):
214
+ """Send keepalive message to all peers."""
215
+ try:
216
+ current_time = time.time()
217
+
218
+ payload = {
219
+ 'agent_id': self.agent_id,
220
+ 'timestamp': current_time,
221
+ 'circuit_state': self.circuit_state.value,
222
+ 'metrics': {
223
+ 'sent': self.metrics.keepalives_sent,
224
+ 'received': self.metrics.keepalives_received,
225
+ 'acks': self.metrics.acks_received
226
+ }
227
+ }
228
+
229
+ # Broadcast keepalive to all peers
230
+ if self.broadcast_p2p_message:
231
+ success_count = await self.broadcast_p2p_message('P2P_KEEPALIVE', payload)
232
+
233
+ if success_count > 0:
234
+ self.last_keepalive_sent = current_time
235
+ self.metrics.keepalives_sent += 1
236
+ logger.debug(f"P2P_KEEPALIVE ({self.agent_id}): Sent keepalive to {success_count} peers")
237
+ else:
238
+ logger.warning(f"P2P_KEEPALIVE ({self.agent_id}): Failed to send keepalive to any peer")
239
+ else:
240
+ logger.warning(f"P2P_KEEPALIVE ({self.agent_id}): No broadcast callback available")
241
+
242
+ except Exception as e:
243
+ logger.error(f"P2P_KEEPALIVE ({self.agent_id}): Error sending keepalive: {e}")
244
+
245
+ async def handle_keepalive_received(self, sender_id: str, payload: Dict[str, Any]):
246
+ """
247
+ Handle incoming keepalive message from peer.
248
+
249
+ Args:
250
+ sender_id: ID of the peer that sent keepalive
251
+ payload: Keepalive message payload
252
+ """
253
+ try:
254
+ self.metrics.keepalives_received += 1
255
+ logger.debug(f"P2P_KEEPALIVE ({self.agent_id}): Received keepalive from {sender_id}")
256
+
257
+ # Send ACK back to sender
258
+ ack_payload = {
259
+ 'agent_id': self.agent_id,
260
+ 'timestamp': time.time(),
261
+ 'original_timestamp': payload.get('timestamp')
262
+ }
263
+
264
+ # Send ACK using direct message (not broadcast)
265
+ if self.send_p2p_message:
266
+ success = await self.send_p2p_message(sender_id, 'P2P_KEEPALIVE_ACK', ack_payload)
267
+ if success:
268
+ logger.debug(f"P2P_KEEPALIVE ({self.agent_id}): Sent ACK to {sender_id}")
269
+ else:
270
+ logger.warning(f"P2P_KEEPALIVE ({self.agent_id}): Failed to send ACK to {sender_id}")
271
+
272
+ except Exception as e:
273
+ logger.error(f"P2P_KEEPALIVE ({self.agent_id}): Error handling keepalive: {e}")
274
+
275
+ async def handle_keepalive_ack(self, sender_id: str, payload: Dict[str, Any]):
276
+ """
277
+ Handle incoming keepalive ACK from peer.
278
+
279
+ Args:
280
+ sender_id: ID of the peer that sent ACK
281
+ payload: ACK message payload
282
+ """
283
+ try:
284
+ self.metrics.acks_received += 1
285
+ current_time = time.time()
286
+
287
+ # Calculate latency if original timestamp available
288
+ original_timestamp = payload.get('original_timestamp')
289
+ if original_timestamp:
290
+ latency = current_time - original_timestamp
291
+ self.metrics.last_keepalive_latency = latency
292
+ logger.debug(f"P2P_KEEPALIVE ({self.agent_id}): ACK from {sender_id}, "
293
+ f"latency={latency*1000:.1f}ms")
294
+
295
+ self.metrics.last_successful_keepalive = current_time
296
+
297
+ # Remove from pending if tracked
298
+ if sender_id in self.pending_keepalives:
299
+ del self.pending_keepalives[sender_id]
300
+
301
+ except Exception as e:
302
+ logger.error(f"P2P_KEEPALIVE ({self.agent_id}): Error handling ACK: {e}")
303
+
304
+ async def _check_timeouts(self):
305
+ """Check for keepalive timeouts and clean up pending requests."""
306
+ current_time = time.time()
307
+ timed_out = []
308
+
309
+ for peer_id, sent_time in self.pending_keepalives.items():
310
+ if current_time - sent_time > self.timeout:
311
+ timed_out.append(peer_id)
312
+
313
+ for peer_id in timed_out:
314
+ del self.pending_keepalives[peer_id]
315
+ self.metrics.timeouts += 1
316
+ logger.warning(f"P2P_KEEPALIVE ({self.agent_id}): Keepalive timeout for peer {peer_id}")
317
+
318
+ async def _log_metrics(self):
319
+ """Periodically log keepalive metrics."""
320
+ current_time = time.time()
321
+
322
+ # Log every 5 minutes
323
+ if current_time % 300 < 10:
324
+ logger.info(
325
+ f"P2P_KEEPALIVE_METRICS ({self.agent_id}): "
326
+ f"Sent={self.metrics.keepalives_sent}, "
327
+ f"Received={self.metrics.keepalives_received}, "
328
+ f"ACKs={self.metrics.acks_received}, "
329
+ f"Timeouts={self.metrics.timeouts}, "
330
+ f"Suppressed={self.metrics.suppressed_count}, "
331
+ f"Circuit_Events={self.metrics.circuit_breaker_events}, "
332
+ f"Last_Latency={self.metrics.last_keepalive_latency*1000:.1f}ms"
333
+ )
334
+
335
+ def get_health_status(self) -> Dict[str, Any]:
336
+ """
337
+ Get current health status and metrics.
338
+
339
+ Returns:
340
+ Dictionary with health status and metrics
341
+ """
342
+ current_time = time.time()
343
+ time_since_last_success = current_time - self.metrics.last_successful_keepalive
344
+
345
+ return {
346
+ 'enabled': self.enabled,
347
+ 'running': self._running,
348
+ 'circuit_state': self.circuit_state.value,
349
+ 'last_activity': current_time - self.last_p2p_activity,
350
+ 'last_keepalive': current_time - self.last_keepalive_sent,
351
+ 'last_success': time_since_last_success,
352
+ 'metrics': {
353
+ 'sent': self.metrics.keepalives_sent,
354
+ 'received': self.metrics.keepalives_received,
355
+ 'acks': self.metrics.acks_received,
356
+ 'timeouts': self.metrics.timeouts,
357
+ 'suppressed': self.metrics.suppressed_count,
358
+ 'circuit_events': self.metrics.circuit_breaker_events,
359
+ 'latency_ms': self.metrics.last_keepalive_latency * 1000
360
+ }
361
+ }
@@ -0,0 +1,290 @@
1
+ """
2
+ SWIM Thread Manager for JarvisCore Framework
3
+ Runs SWIM protocol in dedicated thread to prevent GIL blocking from CPU-bound workflow operations.
4
+
5
+ Adapted from integration-agent/src/swim_thread_manager.py
6
+ - Updated imports to use jarviscore.config
7
+ - Kept core functionality identical
8
+ """
9
+ import asyncio
10
+ import logging
11
+ import threading
12
+ import time
13
+ from typing import Optional
14
+
15
+ from swim.transport.hybrid import HybridTransport
16
+ from swim.protocol.node import Node
17
+ from swim.config import get_config as get_swim_config, validate_config as validate_swim_config
18
+ from swim.events.dispatcher import EventDispatcher
19
+ from swim.integration.agent import ZMQAgentIntegration
20
+ from swim.main import SWIMZMQBridge, parse_address as swim_parse_address
21
+
22
+ logger = logging.getLogger(__name__)
23
+
24
+
25
+ class SWIMThreadManager:
26
+ """Manages SWIM node in a dedicated thread with its own event loop."""
27
+
28
+ def __init__(self, config: Optional[dict] = None):
29
+ """
30
+ Initialize SWIM Thread Manager.
31
+
32
+ Args:
33
+ config: Configuration dictionary (uses defaults if not provided)
34
+ """
35
+ self.config = config or {}
36
+ self.swim_loop: Optional[asyncio.AbstractEventLoop] = None
37
+ self.swim_thread: Optional[threading.Thread] = None
38
+ self.swim_node = None
39
+ self.zmq_agent = None
40
+ self.swim_zmq_bridge = None
41
+ self.event_dispatcher = None
42
+ self.bind_addr = None # Store bind address for node_id access
43
+ self._started = False
44
+ self._initialized = threading.Event()
45
+ self._shutdown_event = threading.Event()
46
+ self._init_error: Optional[str] = None
47
+
48
+ def start_swim_in_thread_simple(self):
49
+ """
50
+ Start SWIM in dedicated thread using configuration.
51
+ """
52
+ if self._started:
53
+ logger.warning("SWIM thread already started")
54
+ return
55
+
56
+ logger.info("Starting SWIM in dedicated thread...")
57
+ self.swim_thread = threading.Thread(
58
+ target=self._run_swim_loop,
59
+ daemon=True,
60
+ name="SWIM-Protocol-Thread"
61
+ )
62
+ self.swim_thread.start()
63
+ self._started = True
64
+ logger.info("SWIM thread started")
65
+
66
+ def _run_swim_loop(self):
67
+ """Run SWIM in dedicated event loop (runs in thread)."""
68
+ try:
69
+ # Create new event loop for this thread
70
+ self.swim_loop = asyncio.new_event_loop()
71
+ asyncio.set_event_loop(self.swim_loop)
72
+
73
+ logger.info("SWIM thread event loop created")
74
+
75
+ # Initialize SWIM
76
+ self.swim_loop.run_until_complete(self._init_swim())
77
+
78
+ if not self.swim_node:
79
+ logger.error("SWIM initialization failed in thread")
80
+ self._init_error = "Failed to create SWIM node"
81
+ self._initialized.set()
82
+ return
83
+
84
+ logger.info("✅ SWIM initialized successfully in dedicated thread")
85
+ logger.info(" SWIM will never be blocked by workflow execution!")
86
+
87
+ # Signal that initialization is complete
88
+ self._initialized.set()
89
+
90
+ # Run event loop until shutdown
91
+ self._run_until_shutdown()
92
+
93
+ except Exception as e:
94
+ logger.error(f"Error in SWIM thread: {e}", exc_info=True)
95
+ self._init_error = str(e)
96
+ self._initialized.set()
97
+ finally:
98
+ if self.swim_loop:
99
+ try:
100
+ self.swim_loop.close()
101
+ except Exception as e:
102
+ logger.error(f"Error closing SWIM loop: {e}")
103
+
104
+ async def _init_swim(self):
105
+ """Initialize SWIM components."""
106
+ try:
107
+ # Get configuration with defaults
108
+ bind_host = self.config.get('bind_host', '127.0.0.1')
109
+ bind_port = self.config.get('bind_port', 7946)
110
+ node_name = self.config.get('node_name', 'jarviscore-node')
111
+ seed_nodes = self.config.get('seed_nodes', '')
112
+ transport_type = self.config.get('transport_type', 'hybrid')
113
+ zmq_port_offset = self.config.get('zmq_port_offset', 1000)
114
+
115
+ # Parse bind address
116
+ self.bind_addr = swim_parse_address(f"{bind_host}:{bind_port}")
117
+ logger.info(f"SWIM bind address: {self.bind_addr}")
118
+
119
+ # Parse seed nodes
120
+ seed_addrs = []
121
+ if seed_nodes:
122
+ for seed in seed_nodes.split(','):
123
+ if seed.strip():
124
+ seed_addrs.append(swim_parse_address(seed.strip()))
125
+ logger.info(f"SWIM seed nodes: {seed_addrs}")
126
+
127
+ # Get SWIM config
128
+ swim_config = get_swim_config()
129
+ swim_config.update({
130
+ "NODE_NAME": node_name,
131
+ "ZMQ_ENABLED": True,
132
+ "SEND_ON_JOIN": True,
133
+ "ZMQ_PORT_OFFSET": zmq_port_offset,
134
+ "TRANSPORT_TYPE": transport_type,
135
+ "STABILITY_TIMEOUT_SECONDS": 3.0
136
+ })
137
+
138
+ # Validate config
139
+ errors = validate_swim_config(swim_config)
140
+ if errors:
141
+ logger.error(f"SWIM config validation errors: {errors}")
142
+ return
143
+
144
+ # Create transport
145
+ transport = HybridTransport(
146
+ udp_max_size=swim_config.get("UDP_MAX_SIZE", 1400),
147
+ tcp_buffer_size=swim_config.get("TCP_BUFFER_SIZE", 65536),
148
+ tcp_max_connections=swim_config.get("TCP_MAX_CONNECTIONS", 128)
149
+ )
150
+
151
+ # Create event dispatcher
152
+ self.event_dispatcher = EventDispatcher(
153
+ max_history_size=swim_config.get("EVENT_HISTORY", 1000),
154
+ enable_history=swim_config.get("EVENTS_ENABLED", True)
155
+ )
156
+
157
+ # Create SWIM node
158
+ logger.info("Creating SWIM node in dedicated thread...")
159
+ self.swim_node = await Node.create(
160
+ bind_addr=self.bind_addr,
161
+ transport=transport,
162
+ seed_addrs=seed_addrs,
163
+ config=swim_config,
164
+ event_dispatcher=self.event_dispatcher,
165
+ validate_ports=True
166
+ )
167
+
168
+ if not self.swim_node:
169
+ logger.error("Failed to create SWIM node")
170
+ return
171
+
172
+ logger.info(f"SWIM node created at {self.bind_addr}")
173
+
174
+ # Setup ZMQ integration
175
+ zmq_port = self.bind_addr[1] + swim_config.get("ZMQ_PORT_OFFSET", zmq_port_offset)
176
+ zmq_addr = f"{self.bind_addr[0]}:{zmq_port}"
177
+ node_id = f"{self.bind_addr[0]}:{self.bind_addr[1]}"
178
+ logger.info(f"Setting up ZMQ integration at {zmq_addr}")
179
+
180
+ self.zmq_agent = ZMQAgentIntegration(
181
+ node_id=node_id,
182
+ bind_address=zmq_addr,
183
+ event_dispatcher=self.event_dispatcher,
184
+ config=swim_config
185
+ )
186
+
187
+ # Start ZMQ agent
188
+ logger.info("Starting ZMQ agent...")
189
+ await self.zmq_agent.start()
190
+ logger.info("ZMQ agent started successfully")
191
+
192
+ # Setup SWIM-ZMQ Bridge
193
+ logger.info("Setting up SWIM-ZMQ Bridge...")
194
+ self.swim_zmq_bridge = SWIMZMQBridge(self.swim_node, self.zmq_agent, swim_config)
195
+ await self.swim_zmq_bridge.start()
196
+ logger.info("SWIM-ZMQ Bridge started successfully")
197
+
198
+ # Start the SWIM protocol
199
+ logger.info("Starting SWIM protocol...")
200
+ await self.swim_node.start()
201
+ logger.info("SWIM node started successfully")
202
+
203
+ except Exception as e:
204
+ logger.error(f"Error initializing SWIM: {e}", exc_info=True)
205
+ raise
206
+
207
+ def _run_until_shutdown(self):
208
+ """Keep SWIM event loop running until shutdown requested."""
209
+ while not self._shutdown_event.is_set():
210
+ try:
211
+ # Process events with timeout so we can check shutdown flag
212
+ self.swim_loop.run_until_complete(asyncio.sleep(0.5))
213
+ except Exception as e:
214
+ logger.error(f"Error in SWIM event loop: {e}")
215
+
216
+ logger.info("SWIM thread shutdown requested")
217
+
218
+ # Cleanup SWIM components
219
+ try:
220
+ if self.swim_zmq_bridge and hasattr(self.swim_zmq_bridge, 'stop'):
221
+ self.swim_loop.run_until_complete(self.swim_zmq_bridge.stop())
222
+ if self.zmq_agent:
223
+ self.swim_loop.run_until_complete(self.zmq_agent.stop())
224
+ if self.swim_node:
225
+ self.swim_loop.run_until_complete(self.swim_node.stop())
226
+ except Exception as e:
227
+ logger.error(f"Error during SWIM shutdown: {e}")
228
+
229
+ def wait_for_init(self, timeout: float = 20.0) -> bool:
230
+ """
231
+ Wait for SWIM to initialize.
232
+
233
+ Args:
234
+ timeout: Maximum time to wait in seconds
235
+
236
+ Returns:
237
+ True if initialized successfully, False if timeout or error
238
+ """
239
+ logger.info(f"Waiting for SWIM initialization (timeout: {timeout}s)...")
240
+
241
+ if self._initialized.wait(timeout=timeout):
242
+ if self._init_error:
243
+ logger.error(f"SWIM initialization failed: {self._init_error}")
244
+ return False
245
+ if self.swim_node and self.zmq_agent:
246
+ logger.info("SWIM initialization confirmed")
247
+ return True
248
+ else:
249
+ logger.error("SWIM initialization incomplete")
250
+ return False
251
+ else:
252
+ logger.error(f"SWIM initialization timeout after {timeout}s")
253
+ return False
254
+
255
+ def is_healthy(self) -> bool:
256
+ """Check if SWIM thread is healthy."""
257
+ return (
258
+ self._started and
259
+ self.swim_thread is not None and
260
+ self.swim_thread.is_alive() and
261
+ self.swim_node is not None and
262
+ self.zmq_agent is not None
263
+ )
264
+
265
+ def get_status(self) -> dict:
266
+ """Get SWIM thread status."""
267
+ return {
268
+ 'started': self._started,
269
+ 'thread_alive': self.swim_thread.is_alive() if self.swim_thread else False,
270
+ 'swim_node': self.swim_node is not None,
271
+ 'zmq_agent': self.zmq_agent is not None,
272
+ 'bridge': self.swim_zmq_bridge is not None,
273
+ 'healthy': self.is_healthy()
274
+ }
275
+
276
+ def shutdown(self, timeout: float = 10.0):
277
+ """Shutdown SWIM thread gracefully."""
278
+ if not self._started:
279
+ return
280
+
281
+ logger.info("Shutting down SWIM thread...")
282
+ self._shutdown_event.set()
283
+
284
+ if self.swim_thread:
285
+ self.swim_thread.join(timeout=timeout)
286
+ if self.swim_thread.is_alive():
287
+ logger.warning("SWIM thread did not exit cleanly")
288
+
289
+ self._started = False
290
+ logger.info("SWIM thread shutdown complete")
@@ -0,0 +1,6 @@
1
+ """Execution profiles for agents."""
2
+
3
+ from .autoagent import AutoAgent
4
+ from .customagent import CustomAgent
5
+
6
+ __all__ = ["AutoAgent", "CustomAgent"]