agentfield 0.1.22rc2__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- agentfield/__init__.py +66 -0
- agentfield/agent.py +3569 -0
- agentfield/agent_ai.py +1125 -0
- agentfield/agent_cli.py +386 -0
- agentfield/agent_field_handler.py +494 -0
- agentfield/agent_mcp.py +534 -0
- agentfield/agent_registry.py +29 -0
- agentfield/agent_server.py +1185 -0
- agentfield/agent_utils.py +269 -0
- agentfield/agent_workflow.py +323 -0
- agentfield/async_config.py +278 -0
- agentfield/async_execution_manager.py +1227 -0
- agentfield/client.py +1447 -0
- agentfield/connection_manager.py +280 -0
- agentfield/decorators.py +527 -0
- agentfield/did_manager.py +337 -0
- agentfield/dynamic_skills.py +304 -0
- agentfield/execution_context.py +255 -0
- agentfield/execution_state.py +453 -0
- agentfield/http_connection_manager.py +429 -0
- agentfield/litellm_adapters.py +140 -0
- agentfield/logger.py +249 -0
- agentfield/mcp_client.py +204 -0
- agentfield/mcp_manager.py +340 -0
- agentfield/mcp_stdio_bridge.py +550 -0
- agentfield/memory.py +723 -0
- agentfield/memory_events.py +489 -0
- agentfield/multimodal.py +173 -0
- agentfield/multimodal_response.py +403 -0
- agentfield/pydantic_utils.py +227 -0
- agentfield/rate_limiter.py +280 -0
- agentfield/result_cache.py +441 -0
- agentfield/router.py +190 -0
- agentfield/status.py +70 -0
- agentfield/types.py +710 -0
- agentfield/utils.py +26 -0
- agentfield/vc_generator.py +464 -0
- agentfield/vision.py +198 -0
- agentfield-0.1.22rc2.dist-info/METADATA +102 -0
- agentfield-0.1.22rc2.dist-info/RECORD +42 -0
- agentfield-0.1.22rc2.dist-info/WHEEL +5 -0
- agentfield-0.1.22rc2.dist-info/top_level.txt +1 -0
|
@@ -0,0 +1,1185 @@
|
|
|
1
|
+
import asyncio
|
|
2
|
+
import importlib.util
|
|
3
|
+
import os
|
|
4
|
+
import signal
|
|
5
|
+
import urllib.parse
|
|
6
|
+
from datetime import datetime
|
|
7
|
+
from typing import Optional
|
|
8
|
+
|
|
9
|
+
import uvicorn
|
|
10
|
+
from agentfield.agent_utils import AgentUtils
|
|
11
|
+
from agentfield.logger import log_debug, log_error, log_info, log_success, log_warn
|
|
12
|
+
from agentfield.utils import get_free_port
|
|
13
|
+
from fastapi import Request
|
|
14
|
+
from fastapi.routing import APIRoute
|
|
15
|
+
|
|
16
|
+
|
|
17
|
+
class AgentServer:
|
|
18
|
+
"""Server management functionality for AgentField Agent"""
|
|
19
|
+
|
|
20
|
+
def __init__(self, agent_instance):
|
|
21
|
+
"""
|
|
22
|
+
Initialize the AgentServer with a reference to the agent instance.
|
|
23
|
+
|
|
24
|
+
Args:
|
|
25
|
+
agent_instance: The Agent instance this server manages
|
|
26
|
+
"""
|
|
27
|
+
self.agent = agent_instance
|
|
28
|
+
|
|
29
|
+
def setup_agentfield_routes(self):
|
|
30
|
+
"""Setup standard routes that AgentField server expects"""
|
|
31
|
+
|
|
32
|
+
@self.agent.get("/health")
|
|
33
|
+
async def health():
|
|
34
|
+
health_response = {
|
|
35
|
+
"status": "healthy",
|
|
36
|
+
"node_id": self.agent.node_id,
|
|
37
|
+
"version": self.agent.version,
|
|
38
|
+
"timestamp": datetime.now().isoformat(),
|
|
39
|
+
}
|
|
40
|
+
|
|
41
|
+
# Add MCP server status if manager is available
|
|
42
|
+
if self.agent.mcp_manager:
|
|
43
|
+
try:
|
|
44
|
+
all_status = self.agent.mcp_manager.get_all_status()
|
|
45
|
+
|
|
46
|
+
# Calculate summary statistics
|
|
47
|
+
total_servers = len(all_status)
|
|
48
|
+
running_servers = sum(
|
|
49
|
+
1
|
|
50
|
+
for server in all_status.values()
|
|
51
|
+
if server.get("status") == "running"
|
|
52
|
+
)
|
|
53
|
+
failed_servers = sum(
|
|
54
|
+
1
|
|
55
|
+
for server in all_status.values()
|
|
56
|
+
if server.get("status") == "failed"
|
|
57
|
+
)
|
|
58
|
+
|
|
59
|
+
# Determine overall health status
|
|
60
|
+
if failed_servers > 0:
|
|
61
|
+
health_response["status"] = "degraded"
|
|
62
|
+
|
|
63
|
+
# Add MCP information to health response
|
|
64
|
+
mcp_server_info = {
|
|
65
|
+
"total": total_servers,
|
|
66
|
+
"running": running_servers,
|
|
67
|
+
"failed": failed_servers,
|
|
68
|
+
"servers": {},
|
|
69
|
+
}
|
|
70
|
+
|
|
71
|
+
# Add individual server details
|
|
72
|
+
for alias, server_process in all_status.items():
|
|
73
|
+
process = server_process.get("process")
|
|
74
|
+
server_info = {
|
|
75
|
+
"status": server_process.get("status"),
|
|
76
|
+
"port": server_process.get("port"),
|
|
77
|
+
"pid": process.pid if process else None,
|
|
78
|
+
}
|
|
79
|
+
mcp_server_info["servers"][alias] = server_info
|
|
80
|
+
|
|
81
|
+
health_response["mcp_servers"] = mcp_server_info
|
|
82
|
+
|
|
83
|
+
except Exception as e:
|
|
84
|
+
if self.agent.dev_mode:
|
|
85
|
+
log_warn(f"Error getting MCP status for health check: {e}")
|
|
86
|
+
health_response["mcp_servers"] = {
|
|
87
|
+
"error": "Failed to get MCP status",
|
|
88
|
+
"total": 0,
|
|
89
|
+
"running": 0,
|
|
90
|
+
"failed": 0,
|
|
91
|
+
}
|
|
92
|
+
|
|
93
|
+
return health_response
|
|
94
|
+
|
|
95
|
+
@self.agent.get("/reasoners")
|
|
96
|
+
async def list_reasoners():
|
|
97
|
+
return {"reasoners": self.agent.reasoners}
|
|
98
|
+
|
|
99
|
+
@self.agent.get("/skills")
|
|
100
|
+
async def list_skills():
|
|
101
|
+
return {"skills": self.agent.skills}
|
|
102
|
+
|
|
103
|
+
@self.agent.post("/shutdown")
|
|
104
|
+
async def shutdown_agent(request: Request):
|
|
105
|
+
"""
|
|
106
|
+
Graceful shutdown endpoint for the agent.
|
|
107
|
+
|
|
108
|
+
This endpoint allows the AgentField server to request a graceful shutdown
|
|
109
|
+
instead of using process signals.
|
|
110
|
+
"""
|
|
111
|
+
try:
|
|
112
|
+
# Parse request body for shutdown options
|
|
113
|
+
body = (
|
|
114
|
+
await request.json()
|
|
115
|
+
if request.headers.get("content-type") == "application/json"
|
|
116
|
+
else {}
|
|
117
|
+
)
|
|
118
|
+
graceful = body.get("graceful", True)
|
|
119
|
+
timeout_seconds = body.get("timeout_seconds", 30)
|
|
120
|
+
|
|
121
|
+
if self.agent.dev_mode:
|
|
122
|
+
log_info(
|
|
123
|
+
f"Shutdown request received (graceful={graceful}, timeout={timeout_seconds}s)"
|
|
124
|
+
)
|
|
125
|
+
|
|
126
|
+
# Set shutdown status
|
|
127
|
+
from agentfield.agent import AgentStatus
|
|
128
|
+
|
|
129
|
+
self.agent._shutdown_requested = True
|
|
130
|
+
self.agent._current_status = AgentStatus.OFFLINE
|
|
131
|
+
|
|
132
|
+
# Notify AgentField server of shutdown initiation
|
|
133
|
+
try:
|
|
134
|
+
success = self.agent.client.notify_graceful_shutdown_sync(
|
|
135
|
+
self.agent.node_id
|
|
136
|
+
)
|
|
137
|
+
if self.agent.dev_mode:
|
|
138
|
+
state = "sent" if success else "failed"
|
|
139
|
+
log_info(f"Shutdown notification {state}")
|
|
140
|
+
except Exception as e:
|
|
141
|
+
if self.agent.dev_mode:
|
|
142
|
+
log_error(f"Shutdown notification error: {e}")
|
|
143
|
+
|
|
144
|
+
# Schedule graceful shutdown
|
|
145
|
+
if graceful:
|
|
146
|
+
asyncio.create_task(self._graceful_shutdown(timeout_seconds))
|
|
147
|
+
|
|
148
|
+
return {
|
|
149
|
+
"status": "shutting_down",
|
|
150
|
+
"graceful": True,
|
|
151
|
+
"timeout_seconds": timeout_seconds,
|
|
152
|
+
"estimated_shutdown_time": datetime.now().isoformat(),
|
|
153
|
+
"message": "Graceful shutdown initiated",
|
|
154
|
+
}
|
|
155
|
+
else:
|
|
156
|
+
# Immediate shutdown
|
|
157
|
+
asyncio.create_task(self._immediate_shutdown())
|
|
158
|
+
|
|
159
|
+
return {
|
|
160
|
+
"status": "shutting_down",
|
|
161
|
+
"graceful": False,
|
|
162
|
+
"message": "Immediate shutdown initiated",
|
|
163
|
+
}
|
|
164
|
+
|
|
165
|
+
except Exception as e:
|
|
166
|
+
if self.agent.dev_mode:
|
|
167
|
+
log_error(f"Shutdown endpoint error: {e}")
|
|
168
|
+
return {
|
|
169
|
+
"status": "error",
|
|
170
|
+
"message": f"Failed to initiate shutdown: {str(e)}",
|
|
171
|
+
}
|
|
172
|
+
|
|
173
|
+
@self.agent.get("/status")
|
|
174
|
+
async def get_agent_status():
|
|
175
|
+
"""
|
|
176
|
+
Get detailed agent status information.
|
|
177
|
+
|
|
178
|
+
This endpoint provides comprehensive status information about the agent,
|
|
179
|
+
including uptime, resource usage, and current state.
|
|
180
|
+
"""
|
|
181
|
+
try:
|
|
182
|
+
import time
|
|
183
|
+
|
|
184
|
+
import psutil
|
|
185
|
+
|
|
186
|
+
# Get process info
|
|
187
|
+
process = psutil.Process()
|
|
188
|
+
|
|
189
|
+
# Calculate uptime
|
|
190
|
+
start_time = getattr(self.agent, "_start_time", time.time())
|
|
191
|
+
uptime_seconds = time.time() - start_time
|
|
192
|
+
uptime_formatted = self._format_uptime(uptime_seconds)
|
|
193
|
+
|
|
194
|
+
status_response = {
|
|
195
|
+
"status": (
|
|
196
|
+
"running"
|
|
197
|
+
if not getattr(self.agent, "_shutdown_requested", False)
|
|
198
|
+
else "stopping"
|
|
199
|
+
),
|
|
200
|
+
"uptime": uptime_formatted,
|
|
201
|
+
"uptime_seconds": int(uptime_seconds),
|
|
202
|
+
"pid": os.getpid(),
|
|
203
|
+
"version": self.agent.version,
|
|
204
|
+
"node_id": self.agent.node_id,
|
|
205
|
+
"last_activity": datetime.now().isoformat(),
|
|
206
|
+
"resources": {
|
|
207
|
+
"memory_mb": round(process.memory_info().rss / 1024 / 1024, 2),
|
|
208
|
+
"cpu_percent": process.cpu_percent(),
|
|
209
|
+
"threads": process.num_threads(),
|
|
210
|
+
},
|
|
211
|
+
}
|
|
212
|
+
|
|
213
|
+
# Add MCP server information if available
|
|
214
|
+
if self.agent.mcp_manager:
|
|
215
|
+
try:
|
|
216
|
+
all_status = self.agent.mcp_manager.get_all_status()
|
|
217
|
+
status_response["mcp_servers"] = {
|
|
218
|
+
"total": len(all_status),
|
|
219
|
+
"running": sum(
|
|
220
|
+
1
|
|
221
|
+
for s in all_status.values()
|
|
222
|
+
if s.get("status") == "running"
|
|
223
|
+
),
|
|
224
|
+
"servers": all_status,
|
|
225
|
+
}
|
|
226
|
+
except Exception as e:
|
|
227
|
+
if self.agent.dev_mode:
|
|
228
|
+
log_warn(f"Error getting MCP status: {e}")
|
|
229
|
+
status_response["mcp_servers"] = {"error": str(e)}
|
|
230
|
+
|
|
231
|
+
return status_response
|
|
232
|
+
|
|
233
|
+
except ImportError:
|
|
234
|
+
# Fallback if psutil is not available
|
|
235
|
+
return {
|
|
236
|
+
"status": (
|
|
237
|
+
"running"
|
|
238
|
+
if not getattr(self.agent, "_shutdown_requested", False)
|
|
239
|
+
else "stopping"
|
|
240
|
+
),
|
|
241
|
+
"pid": os.getpid(),
|
|
242
|
+
"version": self.agent.version,
|
|
243
|
+
"node_id": self.agent.node_id,
|
|
244
|
+
"last_activity": datetime.now().isoformat(),
|
|
245
|
+
"message": "Limited status info (psutil not available)",
|
|
246
|
+
}
|
|
247
|
+
except Exception as e:
|
|
248
|
+
if self.agent.dev_mode:
|
|
249
|
+
log_error(f"Status endpoint error: {e}")
|
|
250
|
+
return {"status": "error", "message": f"Failed to get status: {str(e)}"}
|
|
251
|
+
|
|
252
|
+
@self.agent.get("/info")
|
|
253
|
+
async def node_info():
|
|
254
|
+
return {
|
|
255
|
+
"node_id": self.agent.node_id,
|
|
256
|
+
"version": self.agent.version,
|
|
257
|
+
"base_url": self.agent.base_url,
|
|
258
|
+
"reasoners": self.agent.reasoners,
|
|
259
|
+
"skills": self.agent.skills,
|
|
260
|
+
"registered_at": datetime.now().isoformat(),
|
|
261
|
+
}
|
|
262
|
+
|
|
263
|
+
@self.agent.get("/mcp/status")
|
|
264
|
+
async def mcp_status():
|
|
265
|
+
"""Get status of all MCP servers"""
|
|
266
|
+
if not self.agent.mcp_manager:
|
|
267
|
+
return {
|
|
268
|
+
"error": "MCP Manager not available",
|
|
269
|
+
"servers": {},
|
|
270
|
+
"total": 0,
|
|
271
|
+
"running": 0,
|
|
272
|
+
"failed": 0,
|
|
273
|
+
}
|
|
274
|
+
|
|
275
|
+
# MCP functionality disabled
|
|
276
|
+
return {
|
|
277
|
+
"error": "MCP functionality disabled - old modules removed",
|
|
278
|
+
"servers": {},
|
|
279
|
+
"total": 0,
|
|
280
|
+
"running": 0,
|
|
281
|
+
"failed": 0,
|
|
282
|
+
}
|
|
283
|
+
|
|
284
|
+
@self.agent.post("/mcp/{alias}/start")
|
|
285
|
+
async def start_mcp_server(alias: str):
|
|
286
|
+
"""Start a specific MCP server"""
|
|
287
|
+
if not self.agent.mcp_manager:
|
|
288
|
+
return {
|
|
289
|
+
"success": False,
|
|
290
|
+
"error": "MCP Process Manager not available",
|
|
291
|
+
"alias": alias,
|
|
292
|
+
}
|
|
293
|
+
|
|
294
|
+
try:
|
|
295
|
+
success = await self.agent.mcp_manager.start_server_by_alias(alias)
|
|
296
|
+
if success:
|
|
297
|
+
# Get updated status
|
|
298
|
+
status = self.agent.mcp_manager.get_server_status(alias)
|
|
299
|
+
return {
|
|
300
|
+
"success": True,
|
|
301
|
+
"message": f"MCP server '{alias}' started successfully",
|
|
302
|
+
"alias": alias,
|
|
303
|
+
"status": status,
|
|
304
|
+
"timestamp": datetime.now().isoformat(),
|
|
305
|
+
}
|
|
306
|
+
else:
|
|
307
|
+
return {
|
|
308
|
+
"success": False,
|
|
309
|
+
"error": f"Failed to start MCP server '{alias}'",
|
|
310
|
+
"alias": alias,
|
|
311
|
+
"timestamp": datetime.now().isoformat(),
|
|
312
|
+
}
|
|
313
|
+
|
|
314
|
+
except Exception as e:
|
|
315
|
+
return {
|
|
316
|
+
"success": False,
|
|
317
|
+
"error": f"Error starting MCP server '{alias}': {str(e)}",
|
|
318
|
+
"alias": alias,
|
|
319
|
+
"timestamp": datetime.now().isoformat(),
|
|
320
|
+
}
|
|
321
|
+
|
|
322
|
+
@self.agent.post("/mcp/{alias}/stop")
|
|
323
|
+
async def stop_mcp_server(alias: str):
|
|
324
|
+
"""Stop a specific MCP server"""
|
|
325
|
+
if not self.agent.mcp_manager:
|
|
326
|
+
return {
|
|
327
|
+
"success": False,
|
|
328
|
+
"error": "MCP Process Manager not available",
|
|
329
|
+
"alias": alias,
|
|
330
|
+
}
|
|
331
|
+
|
|
332
|
+
try:
|
|
333
|
+
success = self.agent.mcp_manager.stop_server(alias)
|
|
334
|
+
if success:
|
|
335
|
+
return {
|
|
336
|
+
"success": True,
|
|
337
|
+
"message": f"MCP server '{alias}' stopped successfully",
|
|
338
|
+
"alias": alias,
|
|
339
|
+
"timestamp": datetime.now().isoformat(),
|
|
340
|
+
}
|
|
341
|
+
else:
|
|
342
|
+
return {
|
|
343
|
+
"success": False,
|
|
344
|
+
"error": f"Failed to stop MCP server '{alias}' (may not be running)",
|
|
345
|
+
"alias": alias,
|
|
346
|
+
"timestamp": datetime.now().isoformat(),
|
|
347
|
+
}
|
|
348
|
+
|
|
349
|
+
except Exception as e:
|
|
350
|
+
return {
|
|
351
|
+
"success": False,
|
|
352
|
+
"error": f"Error stopping MCP server '{alias}': {str(e)}",
|
|
353
|
+
"alias": alias,
|
|
354
|
+
"timestamp": datetime.now().isoformat(),
|
|
355
|
+
}
|
|
356
|
+
|
|
357
|
+
@self.agent.post("/mcp/{alias}/restart")
|
|
358
|
+
async def restart_mcp_server(alias: str):
|
|
359
|
+
"""Restart a specific MCP server"""
|
|
360
|
+
if not self.agent.mcp_manager:
|
|
361
|
+
return {
|
|
362
|
+
"success": False,
|
|
363
|
+
"error": "MCP Process Manager not available",
|
|
364
|
+
"alias": alias,
|
|
365
|
+
}
|
|
366
|
+
|
|
367
|
+
try:
|
|
368
|
+
success = await self.agent.mcp_manager.restart_server(alias)
|
|
369
|
+
if success:
|
|
370
|
+
# Get updated status
|
|
371
|
+
status = self.agent.mcp_manager.get_server_status(alias)
|
|
372
|
+
return {
|
|
373
|
+
"success": True,
|
|
374
|
+
"message": f"MCP server '{alias}' restarted successfully",
|
|
375
|
+
"alias": alias,
|
|
376
|
+
"status": status,
|
|
377
|
+
"timestamp": datetime.now().isoformat(),
|
|
378
|
+
}
|
|
379
|
+
else:
|
|
380
|
+
return {
|
|
381
|
+
"success": False,
|
|
382
|
+
"error": f"Failed to restart MCP server '{alias}'",
|
|
383
|
+
"alias": alias,
|
|
384
|
+
"timestamp": datetime.now().isoformat(),
|
|
385
|
+
}
|
|
386
|
+
|
|
387
|
+
except Exception as e:
|
|
388
|
+
return {
|
|
389
|
+
"success": False,
|
|
390
|
+
"error": f"Error restarting MCP server '{alias}': {str(e)}",
|
|
391
|
+
"alias": alias,
|
|
392
|
+
"timestamp": datetime.now().isoformat(),
|
|
393
|
+
}
|
|
394
|
+
|
|
395
|
+
@self.agent.get("/health/mcp")
|
|
396
|
+
async def mcp_health():
|
|
397
|
+
"""Get MCP health information in the format expected by AgentField server"""
|
|
398
|
+
if not self.agent.mcp_manager:
|
|
399
|
+
# Return empty response when MCP manager is not available
|
|
400
|
+
return {
|
|
401
|
+
"servers": [],
|
|
402
|
+
"summary": {
|
|
403
|
+
"total_servers": 0,
|
|
404
|
+
"running_servers": 0,
|
|
405
|
+
"total_tools": 0,
|
|
406
|
+
"overall_health": 0.0,
|
|
407
|
+
},
|
|
408
|
+
}
|
|
409
|
+
|
|
410
|
+
try:
|
|
411
|
+
# Get all server status from MCP manager
|
|
412
|
+
all_status = self.agent.mcp_manager.get_all_status()
|
|
413
|
+
servers = []
|
|
414
|
+
total_tools = 0
|
|
415
|
+
running_servers = 0
|
|
416
|
+
|
|
417
|
+
# Process each server to get detailed health information
|
|
418
|
+
for alias, server_info in all_status.items():
|
|
419
|
+
server_health = {
|
|
420
|
+
"alias": alias,
|
|
421
|
+
"status": server_info.get("status", "unknown"),
|
|
422
|
+
"tool_count": 0,
|
|
423
|
+
"started_at": None,
|
|
424
|
+
"last_health_check": datetime.now().isoformat(),
|
|
425
|
+
"port": server_info.get("port"),
|
|
426
|
+
"process_id": None,
|
|
427
|
+
}
|
|
428
|
+
|
|
429
|
+
# Get process ID if available
|
|
430
|
+
if alias in self.agent.mcp_manager.servers:
|
|
431
|
+
server_process = self.agent.mcp_manager.servers[alias]
|
|
432
|
+
if server_process.process:
|
|
433
|
+
server_health["process_id"] = server_process.process.pid
|
|
434
|
+
|
|
435
|
+
# Count running servers
|
|
436
|
+
if server_health["status"] == "running":
|
|
437
|
+
running_servers += 1
|
|
438
|
+
|
|
439
|
+
# Try to get tool count from MCP client
|
|
440
|
+
try:
|
|
441
|
+
if self.agent.mcp_client_registry:
|
|
442
|
+
client = self.agent.mcp_client_registry.get_client(
|
|
443
|
+
alias
|
|
444
|
+
)
|
|
445
|
+
if client:
|
|
446
|
+
tools = await client.list_tools()
|
|
447
|
+
server_health["tool_count"] = len(tools)
|
|
448
|
+
total_tools += len(tools)
|
|
449
|
+
|
|
450
|
+
# Set started_at time (approximate)
|
|
451
|
+
server_health["started_at"] = (
|
|
452
|
+
datetime.now().isoformat()
|
|
453
|
+
)
|
|
454
|
+
|
|
455
|
+
except Exception as e:
|
|
456
|
+
if self.agent.dev_mode:
|
|
457
|
+
log_warn(f"Failed to get tools for {alias}: {e}")
|
|
458
|
+
|
|
459
|
+
servers.append(server_health)
|
|
460
|
+
|
|
461
|
+
# Calculate overall health score
|
|
462
|
+
total_servers = len(servers)
|
|
463
|
+
if total_servers == 0:
|
|
464
|
+
overall_health = 0.0
|
|
465
|
+
else:
|
|
466
|
+
# Health score based on running servers ratio
|
|
467
|
+
health_ratio = running_servers / total_servers
|
|
468
|
+
# Adjust for any servers with errors
|
|
469
|
+
error_servers = sum(1 for s in servers if s["status"] == "error")
|
|
470
|
+
if error_servers > 0:
|
|
471
|
+
health_ratio *= 1 - (
|
|
472
|
+
error_servers * 0.2
|
|
473
|
+
) # Reduce health for errors
|
|
474
|
+
overall_health = max(0.0, min(1.0, health_ratio))
|
|
475
|
+
|
|
476
|
+
# Build summary
|
|
477
|
+
summary = {
|
|
478
|
+
"total_servers": total_servers,
|
|
479
|
+
"running_servers": running_servers,
|
|
480
|
+
"total_tools": total_tools,
|
|
481
|
+
"overall_health": overall_health,
|
|
482
|
+
}
|
|
483
|
+
|
|
484
|
+
return {"servers": servers, "summary": summary}
|
|
485
|
+
|
|
486
|
+
except Exception as e:
|
|
487
|
+
if self.agent.dev_mode:
|
|
488
|
+
log_error(f"Error getting MCP health: {e}")
|
|
489
|
+
|
|
490
|
+
# Return error response in expected format
|
|
491
|
+
return {
|
|
492
|
+
"servers": [],
|
|
493
|
+
"summary": {
|
|
494
|
+
"total_servers": 0,
|
|
495
|
+
"running_servers": 0,
|
|
496
|
+
"total_tools": 0,
|
|
497
|
+
"overall_health": 0.0,
|
|
498
|
+
},
|
|
499
|
+
}
|
|
500
|
+
|
|
501
|
+
@self.agent.post("/mcp/servers/{alias}/restart")
|
|
502
|
+
async def restart_mcp_server_alt(alias: str):
|
|
503
|
+
"""Alternative restart endpoint for AgentField server compatibility"""
|
|
504
|
+
return await restart_mcp_server(alias)
|
|
505
|
+
|
|
506
|
+
@self.agent.get("/mcp/servers/{alias}/tools")
|
|
507
|
+
async def get_mcp_server_tools(alias: str):
|
|
508
|
+
"""Get tools from a specific MCP server"""
|
|
509
|
+
if not self.agent.mcp_client_registry:
|
|
510
|
+
return {"error": "MCP Client Registry not available", "tools": []}
|
|
511
|
+
|
|
512
|
+
try:
|
|
513
|
+
client = self.agent.mcp_client_registry.get_client(alias)
|
|
514
|
+
if not client:
|
|
515
|
+
return {
|
|
516
|
+
"error": f"MCP server '{alias}' not found or not running",
|
|
517
|
+
"tools": [],
|
|
518
|
+
}
|
|
519
|
+
|
|
520
|
+
tools = await client.list_tools()
|
|
521
|
+
|
|
522
|
+
# Transform tools to match expected format
|
|
523
|
+
formatted_tools = []
|
|
524
|
+
for tool in tools:
|
|
525
|
+
formatted_tool = {
|
|
526
|
+
"name": tool.get("name", ""),
|
|
527
|
+
"description": tool.get("description", ""),
|
|
528
|
+
"input_schema": tool.get("inputSchema", {}),
|
|
529
|
+
}
|
|
530
|
+
formatted_tools.append(formatted_tool)
|
|
531
|
+
|
|
532
|
+
return {"tools": formatted_tools}
|
|
533
|
+
|
|
534
|
+
except Exception as e:
|
|
535
|
+
if self.agent.dev_mode:
|
|
536
|
+
log_error(f"Error getting tools for {alias}: {e}")
|
|
537
|
+
|
|
538
|
+
return {
|
|
539
|
+
"error": f"Failed to get tools from MCP server '{alias}': {str(e)}",
|
|
540
|
+
"tools": [],
|
|
541
|
+
}
|
|
542
|
+
|
|
543
|
+
async def _graceful_shutdown(self, timeout_seconds: int = 30):
|
|
544
|
+
"""
|
|
545
|
+
Perform graceful shutdown with cleanup.
|
|
546
|
+
|
|
547
|
+
Args:
|
|
548
|
+
timeout_seconds: Maximum time to wait for graceful shutdown
|
|
549
|
+
"""
|
|
550
|
+
try:
|
|
551
|
+
if self.agent.dev_mode:
|
|
552
|
+
log_info(f"Starting graceful shutdown (timeout: {timeout_seconds}s)")
|
|
553
|
+
|
|
554
|
+
# Stop MCP servers first
|
|
555
|
+
try:
|
|
556
|
+
if hasattr(self.agent, "mcp_handler") and self.agent.mcp_handler:
|
|
557
|
+
self.agent.mcp_handler._cleanup_mcp_servers()
|
|
558
|
+
if self.agent.dev_mode:
|
|
559
|
+
log_info("MCP servers stopped")
|
|
560
|
+
except Exception as e:
|
|
561
|
+
if self.agent.dev_mode:
|
|
562
|
+
log_error(f"MCP shutdown error: {e}")
|
|
563
|
+
|
|
564
|
+
# Stop heartbeat
|
|
565
|
+
try:
|
|
566
|
+
if (
|
|
567
|
+
hasattr(self.agent, "agentfield_handler")
|
|
568
|
+
and self.agent.agentfield_handler
|
|
569
|
+
):
|
|
570
|
+
self.agent.agentfield_handler.stop_heartbeat()
|
|
571
|
+
if self.agent.dev_mode:
|
|
572
|
+
log_debug("Heartbeat stopped")
|
|
573
|
+
except Exception as e:
|
|
574
|
+
if self.agent.dev_mode:
|
|
575
|
+
log_error(f"Heartbeat stop error: {e}")
|
|
576
|
+
|
|
577
|
+
# Clear agent registry
|
|
578
|
+
try:
|
|
579
|
+
from agentfield.agent_registry import clear_current_agent
|
|
580
|
+
|
|
581
|
+
clear_current_agent()
|
|
582
|
+
except Exception as e:
|
|
583
|
+
if self.agent.dev_mode:
|
|
584
|
+
log_error(f"Registry clear error: {e}")
|
|
585
|
+
|
|
586
|
+
# Wait a moment for cleanup to complete
|
|
587
|
+
await asyncio.sleep(1)
|
|
588
|
+
|
|
589
|
+
if self.agent.dev_mode:
|
|
590
|
+
log_success("Graceful shutdown completed")
|
|
591
|
+
|
|
592
|
+
# Exit the process
|
|
593
|
+
os._exit(0)
|
|
594
|
+
|
|
595
|
+
except Exception as e:
|
|
596
|
+
if self.agent.dev_mode:
|
|
597
|
+
log_error(f"Graceful shutdown error: {e}")
|
|
598
|
+
# Fallback to immediate shutdown
|
|
599
|
+
await self._immediate_shutdown()
|
|
600
|
+
|
|
601
|
+
async def _immediate_shutdown(self):
|
|
602
|
+
"""
|
|
603
|
+
Perform immediate shutdown without cleanup.
|
|
604
|
+
"""
|
|
605
|
+
try:
|
|
606
|
+
if self.agent.dev_mode:
|
|
607
|
+
log_warn("Immediate shutdown initiated")
|
|
608
|
+
|
|
609
|
+
# Quick cleanup attempt
|
|
610
|
+
try:
|
|
611
|
+
if hasattr(self.agent, "mcp_handler") and self.agent.mcp_handler:
|
|
612
|
+
self.agent.mcp_handler._cleanup_mcp_servers()
|
|
613
|
+
except Exception:
|
|
614
|
+
pass # Ignore errors in immediate shutdown
|
|
615
|
+
|
|
616
|
+
# Exit immediately
|
|
617
|
+
os._exit(0)
|
|
618
|
+
|
|
619
|
+
except Exception as e:
|
|
620
|
+
if self.agent.dev_mode:
|
|
621
|
+
log_error(f"Immediate shutdown error: {e}")
|
|
622
|
+
os._exit(1)
|
|
623
|
+
|
|
624
|
+
def _format_uptime(self, uptime_seconds: float) -> str:
|
|
625
|
+
"""
|
|
626
|
+
Format uptime seconds into a human-readable string.
|
|
627
|
+
|
|
628
|
+
Args:
|
|
629
|
+
uptime_seconds: Uptime in seconds
|
|
630
|
+
|
|
631
|
+
Returns:
|
|
632
|
+
Formatted uptime string (e.g., "2h 30m 15s")
|
|
633
|
+
"""
|
|
634
|
+
try:
|
|
635
|
+
hours = int(uptime_seconds // 3600)
|
|
636
|
+
minutes = int((uptime_seconds % 3600) // 60)
|
|
637
|
+
seconds = int(uptime_seconds % 60)
|
|
638
|
+
|
|
639
|
+
parts = []
|
|
640
|
+
if hours > 0:
|
|
641
|
+
parts.append(f"{hours}h")
|
|
642
|
+
if minutes > 0:
|
|
643
|
+
parts.append(f"{minutes}m")
|
|
644
|
+
if seconds > 0 or not parts: # Always show seconds if no other parts
|
|
645
|
+
parts.append(f"{seconds}s")
|
|
646
|
+
|
|
647
|
+
return " ".join(parts)
|
|
648
|
+
except Exception:
|
|
649
|
+
return f"{int(uptime_seconds)}s"
|
|
650
|
+
|
|
651
|
+
def _validate_ssl_config(
|
|
652
|
+
self, ssl_keyfile: Optional[str], ssl_certfile: Optional[str]
|
|
653
|
+
) -> bool:
|
|
654
|
+
"""
|
|
655
|
+
Validate SSL configuration files exist and are readable.
|
|
656
|
+
|
|
657
|
+
Args:
|
|
658
|
+
ssl_keyfile: Path to SSL key file
|
|
659
|
+
ssl_certfile: Path to SSL certificate file
|
|
660
|
+
|
|
661
|
+
Returns:
|
|
662
|
+
True if SSL configuration is valid, False otherwise
|
|
663
|
+
"""
|
|
664
|
+
if not ssl_keyfile or not ssl_certfile:
|
|
665
|
+
return False
|
|
666
|
+
|
|
667
|
+
try:
|
|
668
|
+
# Check if files exist and are readable
|
|
669
|
+
if not os.path.isfile(ssl_keyfile):
|
|
670
|
+
if self.agent.dev_mode:
|
|
671
|
+
log_error(f"SSL key file not found: {ssl_keyfile}")
|
|
672
|
+
return False
|
|
673
|
+
|
|
674
|
+
if not os.path.isfile(ssl_certfile):
|
|
675
|
+
if self.agent.dev_mode:
|
|
676
|
+
log_error(f"SSL certificate file not found: {ssl_certfile}")
|
|
677
|
+
return False
|
|
678
|
+
|
|
679
|
+
# Check file permissions
|
|
680
|
+
if not os.access(ssl_keyfile, os.R_OK):
|
|
681
|
+
if self.agent.dev_mode:
|
|
682
|
+
log_error(f"SSL key file not readable: {ssl_keyfile}")
|
|
683
|
+
return False
|
|
684
|
+
|
|
685
|
+
if not os.access(ssl_certfile, os.R_OK):
|
|
686
|
+
if self.agent.dev_mode:
|
|
687
|
+
log_error(f"SSL certificate file not readable: {ssl_certfile}")
|
|
688
|
+
return False
|
|
689
|
+
|
|
690
|
+
return True
|
|
691
|
+
|
|
692
|
+
except Exception as e:
|
|
693
|
+
if self.agent.dev_mode:
|
|
694
|
+
log_error(f"SSL validation error: {e}")
|
|
695
|
+
return False
|
|
696
|
+
|
|
697
|
+
def _get_optimal_workers(self, workers: Optional[int] = None) -> Optional[int]:
|
|
698
|
+
"""
|
|
699
|
+
Determine optimal number of workers based on system resources.
|
|
700
|
+
|
|
701
|
+
Args:
|
|
702
|
+
workers: Explicitly requested number of workers
|
|
703
|
+
|
|
704
|
+
Returns:
|
|
705
|
+
Optimal number of workers or None for single process
|
|
706
|
+
"""
|
|
707
|
+
if workers is not None:
|
|
708
|
+
return workers
|
|
709
|
+
|
|
710
|
+
# Check environment variable
|
|
711
|
+
env_workers = os.getenv("UVICORN_WORKERS")
|
|
712
|
+
if env_workers and env_workers.isdigit():
|
|
713
|
+
return int(env_workers)
|
|
714
|
+
|
|
715
|
+
# Auto-detect based on CPU cores (only in production)
|
|
716
|
+
try:
|
|
717
|
+
import multiprocessing
|
|
718
|
+
|
|
719
|
+
cpu_count = multiprocessing.cpu_count()
|
|
720
|
+
|
|
721
|
+
# Use 2 * CPU cores for I/O bound workloads, but cap at 8
|
|
722
|
+
optimal_workers = min(cpu_count * 2, 8)
|
|
723
|
+
|
|
724
|
+
if self.agent.dev_mode:
|
|
725
|
+
log_debug(
|
|
726
|
+
f"Detected {cpu_count} CPU cores, optimal workers: {optimal_workers}"
|
|
727
|
+
)
|
|
728
|
+
|
|
729
|
+
return optimal_workers
|
|
730
|
+
|
|
731
|
+
except Exception:
|
|
732
|
+
return None
|
|
733
|
+
|
|
734
|
+
def _check_performance_dependencies(self) -> dict:
|
|
735
|
+
"""
|
|
736
|
+
Check availability of performance-enhancing dependencies.
|
|
737
|
+
|
|
738
|
+
Returns:
|
|
739
|
+
Dictionary with availability status of optional dependencies
|
|
740
|
+
"""
|
|
741
|
+
deps = {
|
|
742
|
+
"uvloop": False,
|
|
743
|
+
"psutil": False,
|
|
744
|
+
"orjson": False,
|
|
745
|
+
}
|
|
746
|
+
|
|
747
|
+
if importlib.util.find_spec("uvloop") is not None:
|
|
748
|
+
deps["uvloop"] = True
|
|
749
|
+
|
|
750
|
+
if importlib.util.find_spec("psutil") is not None:
|
|
751
|
+
deps["psutil"] = True
|
|
752
|
+
|
|
753
|
+
if importlib.util.find_spec("orjson") is not None:
|
|
754
|
+
deps["orjson"] = True
|
|
755
|
+
|
|
756
|
+
return deps
|
|
757
|
+
|
|
758
|
+
def setup_signal_handlers(self) -> None:
|
|
759
|
+
"""
|
|
760
|
+
Setup signal handlers for graceful shutdown.
|
|
761
|
+
|
|
762
|
+
This method registers signal handlers for SIGTERM and SIGINT
|
|
763
|
+
to ensure MCP servers are properly stopped when the agent shuts down.
|
|
764
|
+
"""
|
|
765
|
+
try:
|
|
766
|
+
# Register signal handlers for graceful shutdown
|
|
767
|
+
signal.signal(signal.SIGTERM, self.signal_handler)
|
|
768
|
+
signal.signal(signal.SIGINT, self.signal_handler)
|
|
769
|
+
|
|
770
|
+
if self.agent.dev_mode:
|
|
771
|
+
log_debug("Signal handlers registered for graceful shutdown")
|
|
772
|
+
|
|
773
|
+
except Exception as e:
|
|
774
|
+
if self.agent.dev_mode:
|
|
775
|
+
log_error(f"Failed to setup signal handlers: {e}")
|
|
776
|
+
# Continue without signal handlers - not critical
|
|
777
|
+
|
|
778
|
+
def signal_handler(self, signum: int, frame) -> None:
|
|
779
|
+
"""
|
|
780
|
+
Handle shutdown signals gracefully.
|
|
781
|
+
|
|
782
|
+
Args:
|
|
783
|
+
signum: Signal number
|
|
784
|
+
frame: Current stack frame
|
|
785
|
+
"""
|
|
786
|
+
signal_name = "SIGTERM" if signum == signal.SIGTERM else "SIGINT"
|
|
787
|
+
|
|
788
|
+
if self.agent.dev_mode:
|
|
789
|
+
log_warn(f"{signal_name} received, shutting down gracefully...")
|
|
790
|
+
|
|
791
|
+
# Perform cleanup
|
|
792
|
+
self.agent.mcp_handler._cleanup_mcp_servers()
|
|
793
|
+
|
|
794
|
+
# Exit gracefully
|
|
795
|
+
os._exit(0)
|
|
796
|
+
|
|
797
|
+
def serve(
|
|
798
|
+
self,
|
|
799
|
+
port: Optional[int] = None,
|
|
800
|
+
host: str = "0.0.0.0",
|
|
801
|
+
dev: bool = False,
|
|
802
|
+
heartbeat_interval: int = 2, # Fast heartbeat for real-time detection
|
|
803
|
+
auto_port: bool = False,
|
|
804
|
+
workers: Optional[int] = None,
|
|
805
|
+
ssl_keyfile: Optional[str] = None,
|
|
806
|
+
ssl_certfile: Optional[str] = None,
|
|
807
|
+
log_level: str = "info",
|
|
808
|
+
access_log: bool = True,
|
|
809
|
+
**kwargs,
|
|
810
|
+
):
|
|
811
|
+
"""
|
|
812
|
+
Start the agent node server with intelligent port management and production-ready configuration.
|
|
813
|
+
|
|
814
|
+
This method implements smart port resolution that seamlessly works with AgentField CLI
|
|
815
|
+
or standalone execution. The port selection priority is:
|
|
816
|
+
1. Explicit port parameter (highest priority)
|
|
817
|
+
2. PORT environment variable (AgentField CLI integration)
|
|
818
|
+
3. auto_port=True: find free port automatically
|
|
819
|
+
4. Default fallback with availability check
|
|
820
|
+
|
|
821
|
+
Args:
|
|
822
|
+
port (int, optional): The port on which the agent server will listen.
|
|
823
|
+
If specified, this takes highest priority.
|
|
824
|
+
host (str): The host address for the agent server. Defaults to "0.0.0.0".
|
|
825
|
+
dev (bool): If True, enables development mode features (e.g., hot reload, debug UI).
|
|
826
|
+
heartbeat_interval (int): The interval in seconds for sending heartbeats to the AgentField server.
|
|
827
|
+
Defaults to 2 seconds (fast detection architecture).
|
|
828
|
+
auto_port (bool): If True, automatically find an available port. Defaults to False.
|
|
829
|
+
workers (int, optional): Number of worker processes for production. If None, uses single process.
|
|
830
|
+
ssl_keyfile (str, optional): Path to SSL key file for HTTPS.
|
|
831
|
+
ssl_certfile (str, optional): Path to SSL certificate file for HTTPS.
|
|
832
|
+
log_level (str): Log level for uvicorn. Defaults to "info".
|
|
833
|
+
access_log (bool): Enable/disable access logging. Defaults to True.
|
|
834
|
+
**kwargs: Additional keyword arguments to pass to `uvicorn.run`.
|
|
835
|
+
"""
|
|
836
|
+
# Smart port resolution with priority order
|
|
837
|
+
if port is None:
|
|
838
|
+
# Check for AgentField CLI integration via environment variable
|
|
839
|
+
env_port = os.getenv("PORT")
|
|
840
|
+
if env_port and env_port.isdigit():
|
|
841
|
+
suggested_port = int(env_port)
|
|
842
|
+
if AgentUtils.is_port_available(suggested_port):
|
|
843
|
+
port = suggested_port
|
|
844
|
+
if self.agent.dev_mode:
|
|
845
|
+
log_debug(f"Using port from AgentField CLI: {port}")
|
|
846
|
+
else:
|
|
847
|
+
# AgentField CLI suggested port is taken, find next available
|
|
848
|
+
try:
|
|
849
|
+
port = get_free_port(start_port=suggested_port)
|
|
850
|
+
if self.agent.dev_mode:
|
|
851
|
+
log_debug(
|
|
852
|
+
f"AgentField CLI port {suggested_port} taken, using {port}"
|
|
853
|
+
)
|
|
854
|
+
except RuntimeError:
|
|
855
|
+
port = get_free_port() # Fallback to default range
|
|
856
|
+
if self.agent.dev_mode:
|
|
857
|
+
log_debug(f"Using fallback port: {port}")
|
|
858
|
+
elif auto_port or os.getenv("AGENTFIELD_AUTO_PORT") == "true":
|
|
859
|
+
# Auto-port mode: find any available port
|
|
860
|
+
try:
|
|
861
|
+
port = get_free_port()
|
|
862
|
+
if self.agent.dev_mode:
|
|
863
|
+
log_debug(f"Auto-assigned port: {port}")
|
|
864
|
+
except RuntimeError as e:
|
|
865
|
+
log_error(f"Failed to find free port: {e}")
|
|
866
|
+
port = 8001 # Fallback to default
|
|
867
|
+
else:
|
|
868
|
+
# Default behavior: try 8001, find alternative if taken
|
|
869
|
+
if AgentUtils.is_port_available(8001):
|
|
870
|
+
port = 8001
|
|
871
|
+
else:
|
|
872
|
+
try:
|
|
873
|
+
port = get_free_port()
|
|
874
|
+
if self.agent.dev_mode:
|
|
875
|
+
log_debug(f"Default port 8001 taken, using {port}")
|
|
876
|
+
except RuntimeError:
|
|
877
|
+
port = 8001 # Force use even if taken (will fail gracefully)
|
|
878
|
+
else:
|
|
879
|
+
# Explicit port provided - validate it's available
|
|
880
|
+
if not AgentUtils.is_port_available(port):
|
|
881
|
+
if self.agent.dev_mode:
|
|
882
|
+
log_warn(f"Requested port {port} is not available")
|
|
883
|
+
# Try to find an alternative near the requested port
|
|
884
|
+
try:
|
|
885
|
+
alternative_port = get_free_port(start_port=port)
|
|
886
|
+
if self.agent.dev_mode:
|
|
887
|
+
log_debug(f"Using alternative port: {alternative_port}")
|
|
888
|
+
port = alternative_port
|
|
889
|
+
except RuntimeError:
|
|
890
|
+
if self.agent.dev_mode:
|
|
891
|
+
log_warn(
|
|
892
|
+
f"No alternative ports found, attempting to use {port}"
|
|
893
|
+
)
|
|
894
|
+
# Continue with original port (will fail if truly unavailable)
|
|
895
|
+
|
|
896
|
+
log_info(f"Starting agent node '{self.agent.node_id}' on port {port}")
|
|
897
|
+
|
|
898
|
+
# Set base_url for registration - preserve explicit callback URL if set
|
|
899
|
+
if not self.agent.base_url:
|
|
900
|
+
# Check AGENT_CALLBACK_URL environment variable before defaulting to localhost
|
|
901
|
+
env_callback_url = os.getenv("AGENT_CALLBACK_URL")
|
|
902
|
+
if env_callback_url:
|
|
903
|
+
# Parse the environment variable URL to extract the hostname
|
|
904
|
+
try:
|
|
905
|
+
parsed = urllib.parse.urlparse(env_callback_url)
|
|
906
|
+
if parsed.hostname:
|
|
907
|
+
self.agent.base_url = (
|
|
908
|
+
f"{parsed.scheme or 'http'}://{parsed.hostname}:{port}"
|
|
909
|
+
)
|
|
910
|
+
if self.agent.dev_mode:
|
|
911
|
+
log_debug(
|
|
912
|
+
f"Using AGENT_CALLBACK_URL from environment: {self.agent.base_url}"
|
|
913
|
+
)
|
|
914
|
+
else:
|
|
915
|
+
# Invalid URL in env var, fall back to localhost
|
|
916
|
+
self.agent.base_url = f"http://localhost:{port}"
|
|
917
|
+
except Exception:
|
|
918
|
+
# Failed to parse env var, fall back to localhost
|
|
919
|
+
self.agent.base_url = f"http://localhost:{port}"
|
|
920
|
+
else:
|
|
921
|
+
# No env var set, use localhost
|
|
922
|
+
self.agent.base_url = f"http://localhost:{port}"
|
|
923
|
+
else:
|
|
924
|
+
# Update port in existing base_url if needed
|
|
925
|
+
parsed = urllib.parse.urlparse(self.agent.base_url)
|
|
926
|
+
if parsed.port != port:
|
|
927
|
+
# Update the port in the existing URL, but preserve the hostname
|
|
928
|
+
self.agent.base_url = f"{parsed.scheme}://{parsed.hostname}:{port}"
|
|
929
|
+
if self.agent.dev_mode:
|
|
930
|
+
log_debug(f"Updated port in callback URL: {self.agent.base_url}")
|
|
931
|
+
elif self.agent.dev_mode:
|
|
932
|
+
log_debug(f"Using explicit callback URL: {self.agent.base_url}")
|
|
933
|
+
|
|
934
|
+
# Start heartbeat worker
|
|
935
|
+
self.agent.agentfield_handler.start_heartbeat(heartbeat_interval)
|
|
936
|
+
|
|
937
|
+
log_info(f"Agent server running at http://{host}:{port}")
|
|
938
|
+
log_info("Available endpoints:")
|
|
939
|
+
for route in self.agent.routes:
|
|
940
|
+
# Check if the route is an APIRoute (has .path and .methods)
|
|
941
|
+
if isinstance(route, APIRoute):
|
|
942
|
+
for method in route.methods:
|
|
943
|
+
if method != "HEAD": # Skip HEAD methods
|
|
944
|
+
log_debug(f"Endpoint registered: {method} {route.path}")
|
|
945
|
+
|
|
946
|
+
# Setup fast lifecycle signal handlers
|
|
947
|
+
self.agent.agentfield_handler.setup_fast_lifecycle_signal_handlers()
|
|
948
|
+
|
|
949
|
+
# Add startup event handler for resilient lifecycle
|
|
950
|
+
@self.agent.on_event("startup")
|
|
951
|
+
async def startup_resilient_lifecycle():
|
|
952
|
+
"""Resilient lifecycle startup: connection manager handles AgentField server connectivity"""
|
|
953
|
+
|
|
954
|
+
# Initialize connection manager
|
|
955
|
+
from agentfield.connection_manager import (
|
|
956
|
+
ConnectionConfig,
|
|
957
|
+
ConnectionManager,
|
|
958
|
+
)
|
|
959
|
+
|
|
960
|
+
# Configure connection manager with reasonable retry interval
|
|
961
|
+
config = ConnectionConfig(
|
|
962
|
+
retry_interval=10.0, # Check every 10 seconds for AgentField server
|
|
963
|
+
health_check_interval=30.0,
|
|
964
|
+
connection_timeout=10.0,
|
|
965
|
+
)
|
|
966
|
+
|
|
967
|
+
self.agent.connection_manager = ConnectionManager(self.agent, config)
|
|
968
|
+
|
|
969
|
+
# Set up callbacks for connection state changes
|
|
970
|
+
def on_connected():
|
|
971
|
+
if self.agent.dev_mode:
|
|
972
|
+
log_info(
|
|
973
|
+
"Connected to AgentField server - full functionality available"
|
|
974
|
+
)
|
|
975
|
+
# Kick a heartbeat immediately so the control plane renews the lease
|
|
976
|
+
try:
|
|
977
|
+
asyncio.create_task(
|
|
978
|
+
self.agent.agentfield_handler.send_enhanced_heartbeat()
|
|
979
|
+
)
|
|
980
|
+
except RuntimeError:
|
|
981
|
+
# Event loop not running; the heartbeat worker will recover shortly
|
|
982
|
+
pass
|
|
983
|
+
# Start enhanced heartbeat when connected
|
|
984
|
+
if (
|
|
985
|
+
not hasattr(self.agent, "_heartbeat_task")
|
|
986
|
+
or self.agent._heartbeat_task.done()
|
|
987
|
+
):
|
|
988
|
+
self.agent._heartbeat_task = asyncio.create_task(
|
|
989
|
+
self.agent.agentfield_handler.enhanced_heartbeat_loop(
|
|
990
|
+
heartbeat_interval
|
|
991
|
+
)
|
|
992
|
+
)
|
|
993
|
+
|
|
994
|
+
def on_disconnected():
|
|
995
|
+
if self.agent.dev_mode:
|
|
996
|
+
log_warn("AgentField server disconnected - running in local mode")
|
|
997
|
+
# Cancel heartbeat task when disconnected
|
|
998
|
+
if (
|
|
999
|
+
hasattr(self.agent, "_heartbeat_task")
|
|
1000
|
+
and not self.agent._heartbeat_task.done()
|
|
1001
|
+
):
|
|
1002
|
+
self.agent._heartbeat_task.cancel()
|
|
1003
|
+
|
|
1004
|
+
self.agent.connection_manager.on_connected = on_connected
|
|
1005
|
+
self.agent.connection_manager.on_disconnected = on_disconnected
|
|
1006
|
+
|
|
1007
|
+
# Start connection manager (non-blocking)
|
|
1008
|
+
connected = await self.agent.connection_manager.start()
|
|
1009
|
+
|
|
1010
|
+
# Always connect memory event client and start MCP initialization
|
|
1011
|
+
# These work independently of AgentField server connection
|
|
1012
|
+
if self.agent.memory_event_client:
|
|
1013
|
+
try:
|
|
1014
|
+
await self.agent.memory_event_client.connect()
|
|
1015
|
+
except Exception as e:
|
|
1016
|
+
if self.agent.dev_mode:
|
|
1017
|
+
log_error(f"Memory event client connection failed: {e}")
|
|
1018
|
+
|
|
1019
|
+
# Start background MCP initialization (non-blocking)
|
|
1020
|
+
asyncio.create_task(self.agent.mcp_handler._background_mcp_initialization())
|
|
1021
|
+
|
|
1022
|
+
if connected:
|
|
1023
|
+
if self.agent.dev_mode:
|
|
1024
|
+
log_info("Agent started with AgentField server connection")
|
|
1025
|
+
else:
|
|
1026
|
+
if self.agent.dev_mode:
|
|
1027
|
+
log_info(
|
|
1028
|
+
"Agent started in local mode - will connect to AgentField server when available"
|
|
1029
|
+
)
|
|
1030
|
+
|
|
1031
|
+
# Add shutdown event handler for cleanup
|
|
1032
|
+
@self.agent.on_event("shutdown")
|
|
1033
|
+
async def shutdown_cleanup():
|
|
1034
|
+
"""Cleanup all resources when FastAPI shuts down"""
|
|
1035
|
+
|
|
1036
|
+
# Stop connection manager
|
|
1037
|
+
if self.agent.connection_manager:
|
|
1038
|
+
await self.agent.connection_manager.stop()
|
|
1039
|
+
|
|
1040
|
+
# Close memory event client
|
|
1041
|
+
if self.agent.memory_event_client:
|
|
1042
|
+
await self.agent.memory_event_client.close()
|
|
1043
|
+
|
|
1044
|
+
# Stop MCP servers
|
|
1045
|
+
if self.agent.mcp_manager:
|
|
1046
|
+
try:
|
|
1047
|
+
await self.agent.mcp_manager.shutdown_all()
|
|
1048
|
+
if self.agent.dev_mode:
|
|
1049
|
+
log_info("MCP servers stopped")
|
|
1050
|
+
except Exception as e:
|
|
1051
|
+
if self.agent.dev_mode:
|
|
1052
|
+
log_error(f"MCP shutdown error: {e}")
|
|
1053
|
+
|
|
1054
|
+
if self.agent.mcp_client_registry:
|
|
1055
|
+
try:
|
|
1056
|
+
await self.agent.mcp_client_registry.close_all()
|
|
1057
|
+
except Exception as e:
|
|
1058
|
+
if self.agent.dev_mode:
|
|
1059
|
+
log_error(f"MCP client shutdown error: {e}")
|
|
1060
|
+
|
|
1061
|
+
if getattr(self.agent, "client", None):
|
|
1062
|
+
try:
|
|
1063
|
+
await self.agent.client.aclose()
|
|
1064
|
+
except Exception as e:
|
|
1065
|
+
if self.agent.dev_mode:
|
|
1066
|
+
log_error(f"AgentField client shutdown error: {e}")
|
|
1067
|
+
|
|
1068
|
+
# Clear agent from thread-local storage during shutdown
|
|
1069
|
+
from agentfield.agent_registry import clear_current_agent
|
|
1070
|
+
|
|
1071
|
+
clear_current_agent()
|
|
1072
|
+
|
|
1073
|
+
# Configure uvicorn parameters based on environment and requirements
|
|
1074
|
+
uvicorn_config = {
|
|
1075
|
+
"host": host,
|
|
1076
|
+
"port": port,
|
|
1077
|
+
"reload": dev
|
|
1078
|
+
and workers is None, # Only enable reload in dev mode with single worker
|
|
1079
|
+
"access_log": access_log,
|
|
1080
|
+
"log_level": log_level,
|
|
1081
|
+
"timeout_graceful_shutdown": 30, # Allow 30 seconds for graceful shutdown
|
|
1082
|
+
**kwargs,
|
|
1083
|
+
}
|
|
1084
|
+
|
|
1085
|
+
# Add SSL configuration if provided and valid
|
|
1086
|
+
if ssl_keyfile and ssl_certfile:
|
|
1087
|
+
if self._validate_ssl_config(ssl_keyfile, ssl_certfile):
|
|
1088
|
+
uvicorn_config.update(
|
|
1089
|
+
{
|
|
1090
|
+
"ssl_keyfile": ssl_keyfile,
|
|
1091
|
+
"ssl_certfile": ssl_certfile,
|
|
1092
|
+
}
|
|
1093
|
+
)
|
|
1094
|
+
if self.agent.dev_mode:
|
|
1095
|
+
log_info("HTTPS enabled with SSL certificates")
|
|
1096
|
+
else:
|
|
1097
|
+
log_error("Invalid SSL configuration, falling back to HTTP")
|
|
1098
|
+
ssl_keyfile = ssl_certfile = None
|
|
1099
|
+
|
|
1100
|
+
# Configure workers for production
|
|
1101
|
+
if workers and workers > 1:
|
|
1102
|
+
uvicorn_config["workers"] = workers
|
|
1103
|
+
if self.agent.dev_mode:
|
|
1104
|
+
log_debug(f"Multi-process mode: {workers} workers")
|
|
1105
|
+
elif self.agent.dev_mode:
|
|
1106
|
+
log_debug("Single-process mode")
|
|
1107
|
+
|
|
1108
|
+
# Performance optimizations for production
|
|
1109
|
+
if not dev:
|
|
1110
|
+
# Add production-specific configurations
|
|
1111
|
+
production_config = {
|
|
1112
|
+
"limit_concurrency": 1000, # Limit concurrent connections
|
|
1113
|
+
"backlog": 2048, # Connection queue size
|
|
1114
|
+
}
|
|
1115
|
+
|
|
1116
|
+
# Only apply request limit for multi-worker deployments
|
|
1117
|
+
# Single-process apps don't benefit from this and it causes unwanted shutdowns
|
|
1118
|
+
if workers and workers > 1:
|
|
1119
|
+
production_config["limit_max_requests"] = (
|
|
1120
|
+
100000 # Restart workers after N requests
|
|
1121
|
+
)
|
|
1122
|
+
|
|
1123
|
+
uvicorn_config.update(production_config)
|
|
1124
|
+
|
|
1125
|
+
# Try to use uvloop for better performance
|
|
1126
|
+
if importlib.util.find_spec("uvloop") is not None:
|
|
1127
|
+
uvicorn_config["loop"] = "uvloop"
|
|
1128
|
+
if self.agent.dev_mode:
|
|
1129
|
+
log_info("Using uvloop for enhanced performance")
|
|
1130
|
+
elif self.agent.dev_mode:
|
|
1131
|
+
log_warn("uvloop not available, using default asyncio loop")
|
|
1132
|
+
|
|
1133
|
+
# Environment-based log level adjustment
|
|
1134
|
+
env_log_level = os.getenv("UVICORN_LOG_LEVEL", log_level).lower()
|
|
1135
|
+
if env_log_level in ["critical", "error", "warning", "info", "debug", "trace"]:
|
|
1136
|
+
uvicorn_config["log_level"] = env_log_level
|
|
1137
|
+
|
|
1138
|
+
# Disable access log in production if not explicitly enabled
|
|
1139
|
+
if not dev and "access_log" not in kwargs:
|
|
1140
|
+
uvicorn_config["access_log"] = False
|
|
1141
|
+
|
|
1142
|
+
if self.agent.dev_mode:
|
|
1143
|
+
log_debug("Uvicorn configuration:")
|
|
1144
|
+
config_display = {
|
|
1145
|
+
k: v
|
|
1146
|
+
for k, v in uvicorn_config.items()
|
|
1147
|
+
if k not in ["ssl_keyfile", "ssl_certfile"]
|
|
1148
|
+
}
|
|
1149
|
+
for key, value in config_display.items():
|
|
1150
|
+
log_debug(f" {key}: {value}")
|
|
1151
|
+
|
|
1152
|
+
try:
|
|
1153
|
+
# Start FastAPI server with production-ready configuration
|
|
1154
|
+
uvicorn.run(self.agent, **uvicorn_config)
|
|
1155
|
+
except OSError as e:
|
|
1156
|
+
if "Address already in use" in str(e):
|
|
1157
|
+
log_error(
|
|
1158
|
+
f"Port {port} is already in use. Choose a different port or stop the conflicting service."
|
|
1159
|
+
)
|
|
1160
|
+
if self.agent.dev_mode:
|
|
1161
|
+
log_info(
|
|
1162
|
+
"Try using auto_port=True or set a different port explicitly"
|
|
1163
|
+
)
|
|
1164
|
+
else:
|
|
1165
|
+
log_error(f"Failed to start server: {e}")
|
|
1166
|
+
raise
|
|
1167
|
+
except KeyboardInterrupt:
|
|
1168
|
+
if self.agent.dev_mode:
|
|
1169
|
+
log_info("Server stopped by user (Ctrl+C)")
|
|
1170
|
+
except Exception as e:
|
|
1171
|
+
log_error(f"Unexpected server error: {e}")
|
|
1172
|
+
raise
|
|
1173
|
+
finally:
|
|
1174
|
+
# Phase 5: Graceful shutdown - stop heartbeat and MCP servers
|
|
1175
|
+
if self.agent.dev_mode:
|
|
1176
|
+
log_info("Agent shutdown initiated...")
|
|
1177
|
+
|
|
1178
|
+
# Stop heartbeat worker
|
|
1179
|
+
self.agent.agentfield_handler.stop_heartbeat()
|
|
1180
|
+
|
|
1181
|
+
# Stop all MCP servers
|
|
1182
|
+
self.agent.mcp_handler._cleanup_mcp_servers()
|
|
1183
|
+
|
|
1184
|
+
if self.agent.dev_mode:
|
|
1185
|
+
log_success("Agent shutdown complete")
|