agentfield 0.1.22rc2__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (42) hide show
  1. agentfield/__init__.py +66 -0
  2. agentfield/agent.py +3569 -0
  3. agentfield/agent_ai.py +1125 -0
  4. agentfield/agent_cli.py +386 -0
  5. agentfield/agent_field_handler.py +494 -0
  6. agentfield/agent_mcp.py +534 -0
  7. agentfield/agent_registry.py +29 -0
  8. agentfield/agent_server.py +1185 -0
  9. agentfield/agent_utils.py +269 -0
  10. agentfield/agent_workflow.py +323 -0
  11. agentfield/async_config.py +278 -0
  12. agentfield/async_execution_manager.py +1227 -0
  13. agentfield/client.py +1447 -0
  14. agentfield/connection_manager.py +280 -0
  15. agentfield/decorators.py +527 -0
  16. agentfield/did_manager.py +337 -0
  17. agentfield/dynamic_skills.py +304 -0
  18. agentfield/execution_context.py +255 -0
  19. agentfield/execution_state.py +453 -0
  20. agentfield/http_connection_manager.py +429 -0
  21. agentfield/litellm_adapters.py +140 -0
  22. agentfield/logger.py +249 -0
  23. agentfield/mcp_client.py +204 -0
  24. agentfield/mcp_manager.py +340 -0
  25. agentfield/mcp_stdio_bridge.py +550 -0
  26. agentfield/memory.py +723 -0
  27. agentfield/memory_events.py +489 -0
  28. agentfield/multimodal.py +173 -0
  29. agentfield/multimodal_response.py +403 -0
  30. agentfield/pydantic_utils.py +227 -0
  31. agentfield/rate_limiter.py +280 -0
  32. agentfield/result_cache.py +441 -0
  33. agentfield/router.py +190 -0
  34. agentfield/status.py +70 -0
  35. agentfield/types.py +710 -0
  36. agentfield/utils.py +26 -0
  37. agentfield/vc_generator.py +464 -0
  38. agentfield/vision.py +198 -0
  39. agentfield-0.1.22rc2.dist-info/METADATA +102 -0
  40. agentfield-0.1.22rc2.dist-info/RECORD +42 -0
  41. agentfield-0.1.22rc2.dist-info/WHEEL +5 -0
  42. agentfield-0.1.22rc2.dist-info/top_level.txt +1 -0
@@ -0,0 +1,1185 @@
1
+ import asyncio
2
+ import importlib.util
3
+ import os
4
+ import signal
5
+ import urllib.parse
6
+ from datetime import datetime
7
+ from typing import Optional
8
+
9
+ import uvicorn
10
+ from agentfield.agent_utils import AgentUtils
11
+ from agentfield.logger import log_debug, log_error, log_info, log_success, log_warn
12
+ from agentfield.utils import get_free_port
13
+ from fastapi import Request
14
+ from fastapi.routing import APIRoute
15
+
16
+
17
+ class AgentServer:
18
+ """Server management functionality for AgentField Agent"""
19
+
20
+ def __init__(self, agent_instance):
21
+ """
22
+ Initialize the AgentServer with a reference to the agent instance.
23
+
24
+ Args:
25
+ agent_instance: The Agent instance this server manages
26
+ """
27
+ self.agent = agent_instance
28
+
29
+ def setup_agentfield_routes(self):
30
+ """Setup standard routes that AgentField server expects"""
31
+
32
+ @self.agent.get("/health")
33
+ async def health():
34
+ health_response = {
35
+ "status": "healthy",
36
+ "node_id": self.agent.node_id,
37
+ "version": self.agent.version,
38
+ "timestamp": datetime.now().isoformat(),
39
+ }
40
+
41
+ # Add MCP server status if manager is available
42
+ if self.agent.mcp_manager:
43
+ try:
44
+ all_status = self.agent.mcp_manager.get_all_status()
45
+
46
+ # Calculate summary statistics
47
+ total_servers = len(all_status)
48
+ running_servers = sum(
49
+ 1
50
+ for server in all_status.values()
51
+ if server.get("status") == "running"
52
+ )
53
+ failed_servers = sum(
54
+ 1
55
+ for server in all_status.values()
56
+ if server.get("status") == "failed"
57
+ )
58
+
59
+ # Determine overall health status
60
+ if failed_servers > 0:
61
+ health_response["status"] = "degraded"
62
+
63
+ # Add MCP information to health response
64
+ mcp_server_info = {
65
+ "total": total_servers,
66
+ "running": running_servers,
67
+ "failed": failed_servers,
68
+ "servers": {},
69
+ }
70
+
71
+ # Add individual server details
72
+ for alias, server_process in all_status.items():
73
+ process = server_process.get("process")
74
+ server_info = {
75
+ "status": server_process.get("status"),
76
+ "port": server_process.get("port"),
77
+ "pid": process.pid if process else None,
78
+ }
79
+ mcp_server_info["servers"][alias] = server_info
80
+
81
+ health_response["mcp_servers"] = mcp_server_info
82
+
83
+ except Exception as e:
84
+ if self.agent.dev_mode:
85
+ log_warn(f"Error getting MCP status for health check: {e}")
86
+ health_response["mcp_servers"] = {
87
+ "error": "Failed to get MCP status",
88
+ "total": 0,
89
+ "running": 0,
90
+ "failed": 0,
91
+ }
92
+
93
+ return health_response
94
+
95
+ @self.agent.get("/reasoners")
96
+ async def list_reasoners():
97
+ return {"reasoners": self.agent.reasoners}
98
+
99
+ @self.agent.get("/skills")
100
+ async def list_skills():
101
+ return {"skills": self.agent.skills}
102
+
103
+ @self.agent.post("/shutdown")
104
+ async def shutdown_agent(request: Request):
105
+ """
106
+ Graceful shutdown endpoint for the agent.
107
+
108
+ This endpoint allows the AgentField server to request a graceful shutdown
109
+ instead of using process signals.
110
+ """
111
+ try:
112
+ # Parse request body for shutdown options
113
+ body = (
114
+ await request.json()
115
+ if request.headers.get("content-type") == "application/json"
116
+ else {}
117
+ )
118
+ graceful = body.get("graceful", True)
119
+ timeout_seconds = body.get("timeout_seconds", 30)
120
+
121
+ if self.agent.dev_mode:
122
+ log_info(
123
+ f"Shutdown request received (graceful={graceful}, timeout={timeout_seconds}s)"
124
+ )
125
+
126
+ # Set shutdown status
127
+ from agentfield.agent import AgentStatus
128
+
129
+ self.agent._shutdown_requested = True
130
+ self.agent._current_status = AgentStatus.OFFLINE
131
+
132
+ # Notify AgentField server of shutdown initiation
133
+ try:
134
+ success = self.agent.client.notify_graceful_shutdown_sync(
135
+ self.agent.node_id
136
+ )
137
+ if self.agent.dev_mode:
138
+ state = "sent" if success else "failed"
139
+ log_info(f"Shutdown notification {state}")
140
+ except Exception as e:
141
+ if self.agent.dev_mode:
142
+ log_error(f"Shutdown notification error: {e}")
143
+
144
+ # Schedule graceful shutdown
145
+ if graceful:
146
+ asyncio.create_task(self._graceful_shutdown(timeout_seconds))
147
+
148
+ return {
149
+ "status": "shutting_down",
150
+ "graceful": True,
151
+ "timeout_seconds": timeout_seconds,
152
+ "estimated_shutdown_time": datetime.now().isoformat(),
153
+ "message": "Graceful shutdown initiated",
154
+ }
155
+ else:
156
+ # Immediate shutdown
157
+ asyncio.create_task(self._immediate_shutdown())
158
+
159
+ return {
160
+ "status": "shutting_down",
161
+ "graceful": False,
162
+ "message": "Immediate shutdown initiated",
163
+ }
164
+
165
+ except Exception as e:
166
+ if self.agent.dev_mode:
167
+ log_error(f"Shutdown endpoint error: {e}")
168
+ return {
169
+ "status": "error",
170
+ "message": f"Failed to initiate shutdown: {str(e)}",
171
+ }
172
+
173
+ @self.agent.get("/status")
174
+ async def get_agent_status():
175
+ """
176
+ Get detailed agent status information.
177
+
178
+ This endpoint provides comprehensive status information about the agent,
179
+ including uptime, resource usage, and current state.
180
+ """
181
+ try:
182
+ import time
183
+
184
+ import psutil
185
+
186
+ # Get process info
187
+ process = psutil.Process()
188
+
189
+ # Calculate uptime
190
+ start_time = getattr(self.agent, "_start_time", time.time())
191
+ uptime_seconds = time.time() - start_time
192
+ uptime_formatted = self._format_uptime(uptime_seconds)
193
+
194
+ status_response = {
195
+ "status": (
196
+ "running"
197
+ if not getattr(self.agent, "_shutdown_requested", False)
198
+ else "stopping"
199
+ ),
200
+ "uptime": uptime_formatted,
201
+ "uptime_seconds": int(uptime_seconds),
202
+ "pid": os.getpid(),
203
+ "version": self.agent.version,
204
+ "node_id": self.agent.node_id,
205
+ "last_activity": datetime.now().isoformat(),
206
+ "resources": {
207
+ "memory_mb": round(process.memory_info().rss / 1024 / 1024, 2),
208
+ "cpu_percent": process.cpu_percent(),
209
+ "threads": process.num_threads(),
210
+ },
211
+ }
212
+
213
+ # Add MCP server information if available
214
+ if self.agent.mcp_manager:
215
+ try:
216
+ all_status = self.agent.mcp_manager.get_all_status()
217
+ status_response["mcp_servers"] = {
218
+ "total": len(all_status),
219
+ "running": sum(
220
+ 1
221
+ for s in all_status.values()
222
+ if s.get("status") == "running"
223
+ ),
224
+ "servers": all_status,
225
+ }
226
+ except Exception as e:
227
+ if self.agent.dev_mode:
228
+ log_warn(f"Error getting MCP status: {e}")
229
+ status_response["mcp_servers"] = {"error": str(e)}
230
+
231
+ return status_response
232
+
233
+ except ImportError:
234
+ # Fallback if psutil is not available
235
+ return {
236
+ "status": (
237
+ "running"
238
+ if not getattr(self.agent, "_shutdown_requested", False)
239
+ else "stopping"
240
+ ),
241
+ "pid": os.getpid(),
242
+ "version": self.agent.version,
243
+ "node_id": self.agent.node_id,
244
+ "last_activity": datetime.now().isoformat(),
245
+ "message": "Limited status info (psutil not available)",
246
+ }
247
+ except Exception as e:
248
+ if self.agent.dev_mode:
249
+ log_error(f"Status endpoint error: {e}")
250
+ return {"status": "error", "message": f"Failed to get status: {str(e)}"}
251
+
252
+ @self.agent.get("/info")
253
+ async def node_info():
254
+ return {
255
+ "node_id": self.agent.node_id,
256
+ "version": self.agent.version,
257
+ "base_url": self.agent.base_url,
258
+ "reasoners": self.agent.reasoners,
259
+ "skills": self.agent.skills,
260
+ "registered_at": datetime.now().isoformat(),
261
+ }
262
+
263
+ @self.agent.get("/mcp/status")
264
+ async def mcp_status():
265
+ """Get status of all MCP servers"""
266
+ if not self.agent.mcp_manager:
267
+ return {
268
+ "error": "MCP Manager not available",
269
+ "servers": {},
270
+ "total": 0,
271
+ "running": 0,
272
+ "failed": 0,
273
+ }
274
+
275
+ # MCP functionality disabled
276
+ return {
277
+ "error": "MCP functionality disabled - old modules removed",
278
+ "servers": {},
279
+ "total": 0,
280
+ "running": 0,
281
+ "failed": 0,
282
+ }
283
+
284
+ @self.agent.post("/mcp/{alias}/start")
285
+ async def start_mcp_server(alias: str):
286
+ """Start a specific MCP server"""
287
+ if not self.agent.mcp_manager:
288
+ return {
289
+ "success": False,
290
+ "error": "MCP Process Manager not available",
291
+ "alias": alias,
292
+ }
293
+
294
+ try:
295
+ success = await self.agent.mcp_manager.start_server_by_alias(alias)
296
+ if success:
297
+ # Get updated status
298
+ status = self.agent.mcp_manager.get_server_status(alias)
299
+ return {
300
+ "success": True,
301
+ "message": f"MCP server '{alias}' started successfully",
302
+ "alias": alias,
303
+ "status": status,
304
+ "timestamp": datetime.now().isoformat(),
305
+ }
306
+ else:
307
+ return {
308
+ "success": False,
309
+ "error": f"Failed to start MCP server '{alias}'",
310
+ "alias": alias,
311
+ "timestamp": datetime.now().isoformat(),
312
+ }
313
+
314
+ except Exception as e:
315
+ return {
316
+ "success": False,
317
+ "error": f"Error starting MCP server '{alias}': {str(e)}",
318
+ "alias": alias,
319
+ "timestamp": datetime.now().isoformat(),
320
+ }
321
+
322
+ @self.agent.post("/mcp/{alias}/stop")
323
+ async def stop_mcp_server(alias: str):
324
+ """Stop a specific MCP server"""
325
+ if not self.agent.mcp_manager:
326
+ return {
327
+ "success": False,
328
+ "error": "MCP Process Manager not available",
329
+ "alias": alias,
330
+ }
331
+
332
+ try:
333
+ success = self.agent.mcp_manager.stop_server(alias)
334
+ if success:
335
+ return {
336
+ "success": True,
337
+ "message": f"MCP server '{alias}' stopped successfully",
338
+ "alias": alias,
339
+ "timestamp": datetime.now().isoformat(),
340
+ }
341
+ else:
342
+ return {
343
+ "success": False,
344
+ "error": f"Failed to stop MCP server '{alias}' (may not be running)",
345
+ "alias": alias,
346
+ "timestamp": datetime.now().isoformat(),
347
+ }
348
+
349
+ except Exception as e:
350
+ return {
351
+ "success": False,
352
+ "error": f"Error stopping MCP server '{alias}': {str(e)}",
353
+ "alias": alias,
354
+ "timestamp": datetime.now().isoformat(),
355
+ }
356
+
357
+ @self.agent.post("/mcp/{alias}/restart")
358
+ async def restart_mcp_server(alias: str):
359
+ """Restart a specific MCP server"""
360
+ if not self.agent.mcp_manager:
361
+ return {
362
+ "success": False,
363
+ "error": "MCP Process Manager not available",
364
+ "alias": alias,
365
+ }
366
+
367
+ try:
368
+ success = await self.agent.mcp_manager.restart_server(alias)
369
+ if success:
370
+ # Get updated status
371
+ status = self.agent.mcp_manager.get_server_status(alias)
372
+ return {
373
+ "success": True,
374
+ "message": f"MCP server '{alias}' restarted successfully",
375
+ "alias": alias,
376
+ "status": status,
377
+ "timestamp": datetime.now().isoformat(),
378
+ }
379
+ else:
380
+ return {
381
+ "success": False,
382
+ "error": f"Failed to restart MCP server '{alias}'",
383
+ "alias": alias,
384
+ "timestamp": datetime.now().isoformat(),
385
+ }
386
+
387
+ except Exception as e:
388
+ return {
389
+ "success": False,
390
+ "error": f"Error restarting MCP server '{alias}': {str(e)}",
391
+ "alias": alias,
392
+ "timestamp": datetime.now().isoformat(),
393
+ }
394
+
395
+ @self.agent.get("/health/mcp")
396
+ async def mcp_health():
397
+ """Get MCP health information in the format expected by AgentField server"""
398
+ if not self.agent.mcp_manager:
399
+ # Return empty response when MCP manager is not available
400
+ return {
401
+ "servers": [],
402
+ "summary": {
403
+ "total_servers": 0,
404
+ "running_servers": 0,
405
+ "total_tools": 0,
406
+ "overall_health": 0.0,
407
+ },
408
+ }
409
+
410
+ try:
411
+ # Get all server status from MCP manager
412
+ all_status = self.agent.mcp_manager.get_all_status()
413
+ servers = []
414
+ total_tools = 0
415
+ running_servers = 0
416
+
417
+ # Process each server to get detailed health information
418
+ for alias, server_info in all_status.items():
419
+ server_health = {
420
+ "alias": alias,
421
+ "status": server_info.get("status", "unknown"),
422
+ "tool_count": 0,
423
+ "started_at": None,
424
+ "last_health_check": datetime.now().isoformat(),
425
+ "port": server_info.get("port"),
426
+ "process_id": None,
427
+ }
428
+
429
+ # Get process ID if available
430
+ if alias in self.agent.mcp_manager.servers:
431
+ server_process = self.agent.mcp_manager.servers[alias]
432
+ if server_process.process:
433
+ server_health["process_id"] = server_process.process.pid
434
+
435
+ # Count running servers
436
+ if server_health["status"] == "running":
437
+ running_servers += 1
438
+
439
+ # Try to get tool count from MCP client
440
+ try:
441
+ if self.agent.mcp_client_registry:
442
+ client = self.agent.mcp_client_registry.get_client(
443
+ alias
444
+ )
445
+ if client:
446
+ tools = await client.list_tools()
447
+ server_health["tool_count"] = len(tools)
448
+ total_tools += len(tools)
449
+
450
+ # Set started_at time (approximate)
451
+ server_health["started_at"] = (
452
+ datetime.now().isoformat()
453
+ )
454
+
455
+ except Exception as e:
456
+ if self.agent.dev_mode:
457
+ log_warn(f"Failed to get tools for {alias}: {e}")
458
+
459
+ servers.append(server_health)
460
+
461
+ # Calculate overall health score
462
+ total_servers = len(servers)
463
+ if total_servers == 0:
464
+ overall_health = 0.0
465
+ else:
466
+ # Health score based on running servers ratio
467
+ health_ratio = running_servers / total_servers
468
+ # Adjust for any servers with errors
469
+ error_servers = sum(1 for s in servers if s["status"] == "error")
470
+ if error_servers > 0:
471
+ health_ratio *= 1 - (
472
+ error_servers * 0.2
473
+ ) # Reduce health for errors
474
+ overall_health = max(0.0, min(1.0, health_ratio))
475
+
476
+ # Build summary
477
+ summary = {
478
+ "total_servers": total_servers,
479
+ "running_servers": running_servers,
480
+ "total_tools": total_tools,
481
+ "overall_health": overall_health,
482
+ }
483
+
484
+ return {"servers": servers, "summary": summary}
485
+
486
+ except Exception as e:
487
+ if self.agent.dev_mode:
488
+ log_error(f"Error getting MCP health: {e}")
489
+
490
+ # Return error response in expected format
491
+ return {
492
+ "servers": [],
493
+ "summary": {
494
+ "total_servers": 0,
495
+ "running_servers": 0,
496
+ "total_tools": 0,
497
+ "overall_health": 0.0,
498
+ },
499
+ }
500
+
501
+ @self.agent.post("/mcp/servers/{alias}/restart")
502
+ async def restart_mcp_server_alt(alias: str):
503
+ """Alternative restart endpoint for AgentField server compatibility"""
504
+ return await restart_mcp_server(alias)
505
+
506
+ @self.agent.get("/mcp/servers/{alias}/tools")
507
+ async def get_mcp_server_tools(alias: str):
508
+ """Get tools from a specific MCP server"""
509
+ if not self.agent.mcp_client_registry:
510
+ return {"error": "MCP Client Registry not available", "tools": []}
511
+
512
+ try:
513
+ client = self.agent.mcp_client_registry.get_client(alias)
514
+ if not client:
515
+ return {
516
+ "error": f"MCP server '{alias}' not found or not running",
517
+ "tools": [],
518
+ }
519
+
520
+ tools = await client.list_tools()
521
+
522
+ # Transform tools to match expected format
523
+ formatted_tools = []
524
+ for tool in tools:
525
+ formatted_tool = {
526
+ "name": tool.get("name", ""),
527
+ "description": tool.get("description", ""),
528
+ "input_schema": tool.get("inputSchema", {}),
529
+ }
530
+ formatted_tools.append(formatted_tool)
531
+
532
+ return {"tools": formatted_tools}
533
+
534
+ except Exception as e:
535
+ if self.agent.dev_mode:
536
+ log_error(f"Error getting tools for {alias}: {e}")
537
+
538
+ return {
539
+ "error": f"Failed to get tools from MCP server '{alias}': {str(e)}",
540
+ "tools": [],
541
+ }
542
+
543
+ async def _graceful_shutdown(self, timeout_seconds: int = 30):
544
+ """
545
+ Perform graceful shutdown with cleanup.
546
+
547
+ Args:
548
+ timeout_seconds: Maximum time to wait for graceful shutdown
549
+ """
550
+ try:
551
+ if self.agent.dev_mode:
552
+ log_info(f"Starting graceful shutdown (timeout: {timeout_seconds}s)")
553
+
554
+ # Stop MCP servers first
555
+ try:
556
+ if hasattr(self.agent, "mcp_handler") and self.agent.mcp_handler:
557
+ self.agent.mcp_handler._cleanup_mcp_servers()
558
+ if self.agent.dev_mode:
559
+ log_info("MCP servers stopped")
560
+ except Exception as e:
561
+ if self.agent.dev_mode:
562
+ log_error(f"MCP shutdown error: {e}")
563
+
564
+ # Stop heartbeat
565
+ try:
566
+ if (
567
+ hasattr(self.agent, "agentfield_handler")
568
+ and self.agent.agentfield_handler
569
+ ):
570
+ self.agent.agentfield_handler.stop_heartbeat()
571
+ if self.agent.dev_mode:
572
+ log_debug("Heartbeat stopped")
573
+ except Exception as e:
574
+ if self.agent.dev_mode:
575
+ log_error(f"Heartbeat stop error: {e}")
576
+
577
+ # Clear agent registry
578
+ try:
579
+ from agentfield.agent_registry import clear_current_agent
580
+
581
+ clear_current_agent()
582
+ except Exception as e:
583
+ if self.agent.dev_mode:
584
+ log_error(f"Registry clear error: {e}")
585
+
586
+ # Wait a moment for cleanup to complete
587
+ await asyncio.sleep(1)
588
+
589
+ if self.agent.dev_mode:
590
+ log_success("Graceful shutdown completed")
591
+
592
+ # Exit the process
593
+ os._exit(0)
594
+
595
+ except Exception as e:
596
+ if self.agent.dev_mode:
597
+ log_error(f"Graceful shutdown error: {e}")
598
+ # Fallback to immediate shutdown
599
+ await self._immediate_shutdown()
600
+
601
+ async def _immediate_shutdown(self):
602
+ """
603
+ Perform immediate shutdown without cleanup.
604
+ """
605
+ try:
606
+ if self.agent.dev_mode:
607
+ log_warn("Immediate shutdown initiated")
608
+
609
+ # Quick cleanup attempt
610
+ try:
611
+ if hasattr(self.agent, "mcp_handler") and self.agent.mcp_handler:
612
+ self.agent.mcp_handler._cleanup_mcp_servers()
613
+ except Exception:
614
+ pass # Ignore errors in immediate shutdown
615
+
616
+ # Exit immediately
617
+ os._exit(0)
618
+
619
+ except Exception as e:
620
+ if self.agent.dev_mode:
621
+ log_error(f"Immediate shutdown error: {e}")
622
+ os._exit(1)
623
+
624
+ def _format_uptime(self, uptime_seconds: float) -> str:
625
+ """
626
+ Format uptime seconds into a human-readable string.
627
+
628
+ Args:
629
+ uptime_seconds: Uptime in seconds
630
+
631
+ Returns:
632
+ Formatted uptime string (e.g., "2h 30m 15s")
633
+ """
634
+ try:
635
+ hours = int(uptime_seconds // 3600)
636
+ minutes = int((uptime_seconds % 3600) // 60)
637
+ seconds = int(uptime_seconds % 60)
638
+
639
+ parts = []
640
+ if hours > 0:
641
+ parts.append(f"{hours}h")
642
+ if minutes > 0:
643
+ parts.append(f"{minutes}m")
644
+ if seconds > 0 or not parts: # Always show seconds if no other parts
645
+ parts.append(f"{seconds}s")
646
+
647
+ return " ".join(parts)
648
+ except Exception:
649
+ return f"{int(uptime_seconds)}s"
650
+
651
+ def _validate_ssl_config(
652
+ self, ssl_keyfile: Optional[str], ssl_certfile: Optional[str]
653
+ ) -> bool:
654
+ """
655
+ Validate SSL configuration files exist and are readable.
656
+
657
+ Args:
658
+ ssl_keyfile: Path to SSL key file
659
+ ssl_certfile: Path to SSL certificate file
660
+
661
+ Returns:
662
+ True if SSL configuration is valid, False otherwise
663
+ """
664
+ if not ssl_keyfile or not ssl_certfile:
665
+ return False
666
+
667
+ try:
668
+ # Check if files exist and are readable
669
+ if not os.path.isfile(ssl_keyfile):
670
+ if self.agent.dev_mode:
671
+ log_error(f"SSL key file not found: {ssl_keyfile}")
672
+ return False
673
+
674
+ if not os.path.isfile(ssl_certfile):
675
+ if self.agent.dev_mode:
676
+ log_error(f"SSL certificate file not found: {ssl_certfile}")
677
+ return False
678
+
679
+ # Check file permissions
680
+ if not os.access(ssl_keyfile, os.R_OK):
681
+ if self.agent.dev_mode:
682
+ log_error(f"SSL key file not readable: {ssl_keyfile}")
683
+ return False
684
+
685
+ if not os.access(ssl_certfile, os.R_OK):
686
+ if self.agent.dev_mode:
687
+ log_error(f"SSL certificate file not readable: {ssl_certfile}")
688
+ return False
689
+
690
+ return True
691
+
692
+ except Exception as e:
693
+ if self.agent.dev_mode:
694
+ log_error(f"SSL validation error: {e}")
695
+ return False
696
+
697
+ def _get_optimal_workers(self, workers: Optional[int] = None) -> Optional[int]:
698
+ """
699
+ Determine optimal number of workers based on system resources.
700
+
701
+ Args:
702
+ workers: Explicitly requested number of workers
703
+
704
+ Returns:
705
+ Optimal number of workers or None for single process
706
+ """
707
+ if workers is not None:
708
+ return workers
709
+
710
+ # Check environment variable
711
+ env_workers = os.getenv("UVICORN_WORKERS")
712
+ if env_workers and env_workers.isdigit():
713
+ return int(env_workers)
714
+
715
+ # Auto-detect based on CPU cores (only in production)
716
+ try:
717
+ import multiprocessing
718
+
719
+ cpu_count = multiprocessing.cpu_count()
720
+
721
+ # Use 2 * CPU cores for I/O bound workloads, but cap at 8
722
+ optimal_workers = min(cpu_count * 2, 8)
723
+
724
+ if self.agent.dev_mode:
725
+ log_debug(
726
+ f"Detected {cpu_count} CPU cores, optimal workers: {optimal_workers}"
727
+ )
728
+
729
+ return optimal_workers
730
+
731
+ except Exception:
732
+ return None
733
+
734
+ def _check_performance_dependencies(self) -> dict:
735
+ """
736
+ Check availability of performance-enhancing dependencies.
737
+
738
+ Returns:
739
+ Dictionary with availability status of optional dependencies
740
+ """
741
+ deps = {
742
+ "uvloop": False,
743
+ "psutil": False,
744
+ "orjson": False,
745
+ }
746
+
747
+ if importlib.util.find_spec("uvloop") is not None:
748
+ deps["uvloop"] = True
749
+
750
+ if importlib.util.find_spec("psutil") is not None:
751
+ deps["psutil"] = True
752
+
753
+ if importlib.util.find_spec("orjson") is not None:
754
+ deps["orjson"] = True
755
+
756
+ return deps
757
+
758
+ def setup_signal_handlers(self) -> None:
759
+ """
760
+ Setup signal handlers for graceful shutdown.
761
+
762
+ This method registers signal handlers for SIGTERM and SIGINT
763
+ to ensure MCP servers are properly stopped when the agent shuts down.
764
+ """
765
+ try:
766
+ # Register signal handlers for graceful shutdown
767
+ signal.signal(signal.SIGTERM, self.signal_handler)
768
+ signal.signal(signal.SIGINT, self.signal_handler)
769
+
770
+ if self.agent.dev_mode:
771
+ log_debug("Signal handlers registered for graceful shutdown")
772
+
773
+ except Exception as e:
774
+ if self.agent.dev_mode:
775
+ log_error(f"Failed to setup signal handlers: {e}")
776
+ # Continue without signal handlers - not critical
777
+
778
+ def signal_handler(self, signum: int, frame) -> None:
779
+ """
780
+ Handle shutdown signals gracefully.
781
+
782
+ Args:
783
+ signum: Signal number
784
+ frame: Current stack frame
785
+ """
786
+ signal_name = "SIGTERM" if signum == signal.SIGTERM else "SIGINT"
787
+
788
+ if self.agent.dev_mode:
789
+ log_warn(f"{signal_name} received, shutting down gracefully...")
790
+
791
+ # Perform cleanup
792
+ self.agent.mcp_handler._cleanup_mcp_servers()
793
+
794
+ # Exit gracefully
795
+ os._exit(0)
796
+
797
+ def serve(
798
+ self,
799
+ port: Optional[int] = None,
800
+ host: str = "0.0.0.0",
801
+ dev: bool = False,
802
+ heartbeat_interval: int = 2, # Fast heartbeat for real-time detection
803
+ auto_port: bool = False,
804
+ workers: Optional[int] = None,
805
+ ssl_keyfile: Optional[str] = None,
806
+ ssl_certfile: Optional[str] = None,
807
+ log_level: str = "info",
808
+ access_log: bool = True,
809
+ **kwargs,
810
+ ):
811
+ """
812
+ Start the agent node server with intelligent port management and production-ready configuration.
813
+
814
+ This method implements smart port resolution that seamlessly works with AgentField CLI
815
+ or standalone execution. The port selection priority is:
816
+ 1. Explicit port parameter (highest priority)
817
+ 2. PORT environment variable (AgentField CLI integration)
818
+ 3. auto_port=True: find free port automatically
819
+ 4. Default fallback with availability check
820
+
821
+ Args:
822
+ port (int, optional): The port on which the agent server will listen.
823
+ If specified, this takes highest priority.
824
+ host (str): The host address for the agent server. Defaults to "0.0.0.0".
825
+ dev (bool): If True, enables development mode features (e.g., hot reload, debug UI).
826
+ heartbeat_interval (int): The interval in seconds for sending heartbeats to the AgentField server.
827
+ Defaults to 2 seconds (fast detection architecture).
828
+ auto_port (bool): If True, automatically find an available port. Defaults to False.
829
+ workers (int, optional): Number of worker processes for production. If None, uses single process.
830
+ ssl_keyfile (str, optional): Path to SSL key file for HTTPS.
831
+ ssl_certfile (str, optional): Path to SSL certificate file for HTTPS.
832
+ log_level (str): Log level for uvicorn. Defaults to "info".
833
+ access_log (bool): Enable/disable access logging. Defaults to True.
834
+ **kwargs: Additional keyword arguments to pass to `uvicorn.run`.
835
+ """
836
+ # Smart port resolution with priority order
837
+ if port is None:
838
+ # Check for AgentField CLI integration via environment variable
839
+ env_port = os.getenv("PORT")
840
+ if env_port and env_port.isdigit():
841
+ suggested_port = int(env_port)
842
+ if AgentUtils.is_port_available(suggested_port):
843
+ port = suggested_port
844
+ if self.agent.dev_mode:
845
+ log_debug(f"Using port from AgentField CLI: {port}")
846
+ else:
847
+ # AgentField CLI suggested port is taken, find next available
848
+ try:
849
+ port = get_free_port(start_port=suggested_port)
850
+ if self.agent.dev_mode:
851
+ log_debug(
852
+ f"AgentField CLI port {suggested_port} taken, using {port}"
853
+ )
854
+ except RuntimeError:
855
+ port = get_free_port() # Fallback to default range
856
+ if self.agent.dev_mode:
857
+ log_debug(f"Using fallback port: {port}")
858
+ elif auto_port or os.getenv("AGENTFIELD_AUTO_PORT") == "true":
859
+ # Auto-port mode: find any available port
860
+ try:
861
+ port = get_free_port()
862
+ if self.agent.dev_mode:
863
+ log_debug(f"Auto-assigned port: {port}")
864
+ except RuntimeError as e:
865
+ log_error(f"Failed to find free port: {e}")
866
+ port = 8001 # Fallback to default
867
+ else:
868
+ # Default behavior: try 8001, find alternative if taken
869
+ if AgentUtils.is_port_available(8001):
870
+ port = 8001
871
+ else:
872
+ try:
873
+ port = get_free_port()
874
+ if self.agent.dev_mode:
875
+ log_debug(f"Default port 8001 taken, using {port}")
876
+ except RuntimeError:
877
+ port = 8001 # Force use even if taken (will fail gracefully)
878
+ else:
879
+ # Explicit port provided - validate it's available
880
+ if not AgentUtils.is_port_available(port):
881
+ if self.agent.dev_mode:
882
+ log_warn(f"Requested port {port} is not available")
883
+ # Try to find an alternative near the requested port
884
+ try:
885
+ alternative_port = get_free_port(start_port=port)
886
+ if self.agent.dev_mode:
887
+ log_debug(f"Using alternative port: {alternative_port}")
888
+ port = alternative_port
889
+ except RuntimeError:
890
+ if self.agent.dev_mode:
891
+ log_warn(
892
+ f"No alternative ports found, attempting to use {port}"
893
+ )
894
+ # Continue with original port (will fail if truly unavailable)
895
+
896
+ log_info(f"Starting agent node '{self.agent.node_id}' on port {port}")
897
+
898
+ # Set base_url for registration - preserve explicit callback URL if set
899
+ if not self.agent.base_url:
900
+ # Check AGENT_CALLBACK_URL environment variable before defaulting to localhost
901
+ env_callback_url = os.getenv("AGENT_CALLBACK_URL")
902
+ if env_callback_url:
903
+ # Parse the environment variable URL to extract the hostname
904
+ try:
905
+ parsed = urllib.parse.urlparse(env_callback_url)
906
+ if parsed.hostname:
907
+ self.agent.base_url = (
908
+ f"{parsed.scheme or 'http'}://{parsed.hostname}:{port}"
909
+ )
910
+ if self.agent.dev_mode:
911
+ log_debug(
912
+ f"Using AGENT_CALLBACK_URL from environment: {self.agent.base_url}"
913
+ )
914
+ else:
915
+ # Invalid URL in env var, fall back to localhost
916
+ self.agent.base_url = f"http://localhost:{port}"
917
+ except Exception:
918
+ # Failed to parse env var, fall back to localhost
919
+ self.agent.base_url = f"http://localhost:{port}"
920
+ else:
921
+ # No env var set, use localhost
922
+ self.agent.base_url = f"http://localhost:{port}"
923
+ else:
924
+ # Update port in existing base_url if needed
925
+ parsed = urllib.parse.urlparse(self.agent.base_url)
926
+ if parsed.port != port:
927
+ # Update the port in the existing URL, but preserve the hostname
928
+ self.agent.base_url = f"{parsed.scheme}://{parsed.hostname}:{port}"
929
+ if self.agent.dev_mode:
930
+ log_debug(f"Updated port in callback URL: {self.agent.base_url}")
931
+ elif self.agent.dev_mode:
932
+ log_debug(f"Using explicit callback URL: {self.agent.base_url}")
933
+
934
+ # Start heartbeat worker
935
+ self.agent.agentfield_handler.start_heartbeat(heartbeat_interval)
936
+
937
+ log_info(f"Agent server running at http://{host}:{port}")
938
+ log_info("Available endpoints:")
939
+ for route in self.agent.routes:
940
+ # Check if the route is an APIRoute (has .path and .methods)
941
+ if isinstance(route, APIRoute):
942
+ for method in route.methods:
943
+ if method != "HEAD": # Skip HEAD methods
944
+ log_debug(f"Endpoint registered: {method} {route.path}")
945
+
946
+ # Setup fast lifecycle signal handlers
947
+ self.agent.agentfield_handler.setup_fast_lifecycle_signal_handlers()
948
+
949
+ # Add startup event handler for resilient lifecycle
950
+ @self.agent.on_event("startup")
951
+ async def startup_resilient_lifecycle():
952
+ """Resilient lifecycle startup: connection manager handles AgentField server connectivity"""
953
+
954
+ # Initialize connection manager
955
+ from agentfield.connection_manager import (
956
+ ConnectionConfig,
957
+ ConnectionManager,
958
+ )
959
+
960
+ # Configure connection manager with reasonable retry interval
961
+ config = ConnectionConfig(
962
+ retry_interval=10.0, # Check every 10 seconds for AgentField server
963
+ health_check_interval=30.0,
964
+ connection_timeout=10.0,
965
+ )
966
+
967
+ self.agent.connection_manager = ConnectionManager(self.agent, config)
968
+
969
+ # Set up callbacks for connection state changes
970
+ def on_connected():
971
+ if self.agent.dev_mode:
972
+ log_info(
973
+ "Connected to AgentField server - full functionality available"
974
+ )
975
+ # Kick a heartbeat immediately so the control plane renews the lease
976
+ try:
977
+ asyncio.create_task(
978
+ self.agent.agentfield_handler.send_enhanced_heartbeat()
979
+ )
980
+ except RuntimeError:
981
+ # Event loop not running; the heartbeat worker will recover shortly
982
+ pass
983
+ # Start enhanced heartbeat when connected
984
+ if (
985
+ not hasattr(self.agent, "_heartbeat_task")
986
+ or self.agent._heartbeat_task.done()
987
+ ):
988
+ self.agent._heartbeat_task = asyncio.create_task(
989
+ self.agent.agentfield_handler.enhanced_heartbeat_loop(
990
+ heartbeat_interval
991
+ )
992
+ )
993
+
994
+ def on_disconnected():
995
+ if self.agent.dev_mode:
996
+ log_warn("AgentField server disconnected - running in local mode")
997
+ # Cancel heartbeat task when disconnected
998
+ if (
999
+ hasattr(self.agent, "_heartbeat_task")
1000
+ and not self.agent._heartbeat_task.done()
1001
+ ):
1002
+ self.agent._heartbeat_task.cancel()
1003
+
1004
+ self.agent.connection_manager.on_connected = on_connected
1005
+ self.agent.connection_manager.on_disconnected = on_disconnected
1006
+
1007
+ # Start connection manager (non-blocking)
1008
+ connected = await self.agent.connection_manager.start()
1009
+
1010
+ # Always connect memory event client and start MCP initialization
1011
+ # These work independently of AgentField server connection
1012
+ if self.agent.memory_event_client:
1013
+ try:
1014
+ await self.agent.memory_event_client.connect()
1015
+ except Exception as e:
1016
+ if self.agent.dev_mode:
1017
+ log_error(f"Memory event client connection failed: {e}")
1018
+
1019
+ # Start background MCP initialization (non-blocking)
1020
+ asyncio.create_task(self.agent.mcp_handler._background_mcp_initialization())
1021
+
1022
+ if connected:
1023
+ if self.agent.dev_mode:
1024
+ log_info("Agent started with AgentField server connection")
1025
+ else:
1026
+ if self.agent.dev_mode:
1027
+ log_info(
1028
+ "Agent started in local mode - will connect to AgentField server when available"
1029
+ )
1030
+
1031
+ # Add shutdown event handler for cleanup
1032
+ @self.agent.on_event("shutdown")
1033
+ async def shutdown_cleanup():
1034
+ """Cleanup all resources when FastAPI shuts down"""
1035
+
1036
+ # Stop connection manager
1037
+ if self.agent.connection_manager:
1038
+ await self.agent.connection_manager.stop()
1039
+
1040
+ # Close memory event client
1041
+ if self.agent.memory_event_client:
1042
+ await self.agent.memory_event_client.close()
1043
+
1044
+ # Stop MCP servers
1045
+ if self.agent.mcp_manager:
1046
+ try:
1047
+ await self.agent.mcp_manager.shutdown_all()
1048
+ if self.agent.dev_mode:
1049
+ log_info("MCP servers stopped")
1050
+ except Exception as e:
1051
+ if self.agent.dev_mode:
1052
+ log_error(f"MCP shutdown error: {e}")
1053
+
1054
+ if self.agent.mcp_client_registry:
1055
+ try:
1056
+ await self.agent.mcp_client_registry.close_all()
1057
+ except Exception as e:
1058
+ if self.agent.dev_mode:
1059
+ log_error(f"MCP client shutdown error: {e}")
1060
+
1061
+ if getattr(self.agent, "client", None):
1062
+ try:
1063
+ await self.agent.client.aclose()
1064
+ except Exception as e:
1065
+ if self.agent.dev_mode:
1066
+ log_error(f"AgentField client shutdown error: {e}")
1067
+
1068
+ # Clear agent from thread-local storage during shutdown
1069
+ from agentfield.agent_registry import clear_current_agent
1070
+
1071
+ clear_current_agent()
1072
+
1073
+ # Configure uvicorn parameters based on environment and requirements
1074
+ uvicorn_config = {
1075
+ "host": host,
1076
+ "port": port,
1077
+ "reload": dev
1078
+ and workers is None, # Only enable reload in dev mode with single worker
1079
+ "access_log": access_log,
1080
+ "log_level": log_level,
1081
+ "timeout_graceful_shutdown": 30, # Allow 30 seconds for graceful shutdown
1082
+ **kwargs,
1083
+ }
1084
+
1085
+ # Add SSL configuration if provided and valid
1086
+ if ssl_keyfile and ssl_certfile:
1087
+ if self._validate_ssl_config(ssl_keyfile, ssl_certfile):
1088
+ uvicorn_config.update(
1089
+ {
1090
+ "ssl_keyfile": ssl_keyfile,
1091
+ "ssl_certfile": ssl_certfile,
1092
+ }
1093
+ )
1094
+ if self.agent.dev_mode:
1095
+ log_info("HTTPS enabled with SSL certificates")
1096
+ else:
1097
+ log_error("Invalid SSL configuration, falling back to HTTP")
1098
+ ssl_keyfile = ssl_certfile = None
1099
+
1100
+ # Configure workers for production
1101
+ if workers and workers > 1:
1102
+ uvicorn_config["workers"] = workers
1103
+ if self.agent.dev_mode:
1104
+ log_debug(f"Multi-process mode: {workers} workers")
1105
+ elif self.agent.dev_mode:
1106
+ log_debug("Single-process mode")
1107
+
1108
+ # Performance optimizations for production
1109
+ if not dev:
1110
+ # Add production-specific configurations
1111
+ production_config = {
1112
+ "limit_concurrency": 1000, # Limit concurrent connections
1113
+ "backlog": 2048, # Connection queue size
1114
+ }
1115
+
1116
+ # Only apply request limit for multi-worker deployments
1117
+ # Single-process apps don't benefit from this and it causes unwanted shutdowns
1118
+ if workers and workers > 1:
1119
+ production_config["limit_max_requests"] = (
1120
+ 100000 # Restart workers after N requests
1121
+ )
1122
+
1123
+ uvicorn_config.update(production_config)
1124
+
1125
+ # Try to use uvloop for better performance
1126
+ if importlib.util.find_spec("uvloop") is not None:
1127
+ uvicorn_config["loop"] = "uvloop"
1128
+ if self.agent.dev_mode:
1129
+ log_info("Using uvloop for enhanced performance")
1130
+ elif self.agent.dev_mode:
1131
+ log_warn("uvloop not available, using default asyncio loop")
1132
+
1133
+ # Environment-based log level adjustment
1134
+ env_log_level = os.getenv("UVICORN_LOG_LEVEL", log_level).lower()
1135
+ if env_log_level in ["critical", "error", "warning", "info", "debug", "trace"]:
1136
+ uvicorn_config["log_level"] = env_log_level
1137
+
1138
+ # Disable access log in production if not explicitly enabled
1139
+ if not dev and "access_log" not in kwargs:
1140
+ uvicorn_config["access_log"] = False
1141
+
1142
+ if self.agent.dev_mode:
1143
+ log_debug("Uvicorn configuration:")
1144
+ config_display = {
1145
+ k: v
1146
+ for k, v in uvicorn_config.items()
1147
+ if k not in ["ssl_keyfile", "ssl_certfile"]
1148
+ }
1149
+ for key, value in config_display.items():
1150
+ log_debug(f" {key}: {value}")
1151
+
1152
+ try:
1153
+ # Start FastAPI server with production-ready configuration
1154
+ uvicorn.run(self.agent, **uvicorn_config)
1155
+ except OSError as e:
1156
+ if "Address already in use" in str(e):
1157
+ log_error(
1158
+ f"Port {port} is already in use. Choose a different port or stop the conflicting service."
1159
+ )
1160
+ if self.agent.dev_mode:
1161
+ log_info(
1162
+ "Try using auto_port=True or set a different port explicitly"
1163
+ )
1164
+ else:
1165
+ log_error(f"Failed to start server: {e}")
1166
+ raise
1167
+ except KeyboardInterrupt:
1168
+ if self.agent.dev_mode:
1169
+ log_info("Server stopped by user (Ctrl+C)")
1170
+ except Exception as e:
1171
+ log_error(f"Unexpected server error: {e}")
1172
+ raise
1173
+ finally:
1174
+ # Phase 5: Graceful shutdown - stop heartbeat and MCP servers
1175
+ if self.agent.dev_mode:
1176
+ log_info("Agent shutdown initiated...")
1177
+
1178
+ # Stop heartbeat worker
1179
+ self.agent.agentfield_handler.stop_heartbeat()
1180
+
1181
+ # Stop all MCP servers
1182
+ self.agent.mcp_handler._cleanup_mcp_servers()
1183
+
1184
+ if self.agent.dev_mode:
1185
+ log_success("Agent shutdown complete")