kubectl-mcp-server 1.12.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- kubectl_mcp_server-1.12.0.dist-info/METADATA +711 -0
- kubectl_mcp_server-1.12.0.dist-info/RECORD +45 -0
- kubectl_mcp_server-1.12.0.dist-info/WHEEL +5 -0
- kubectl_mcp_server-1.12.0.dist-info/entry_points.txt +3 -0
- kubectl_mcp_server-1.12.0.dist-info/licenses/LICENSE +21 -0
- kubectl_mcp_server-1.12.0.dist-info/top_level.txt +2 -0
- kubectl_mcp_tool/__init__.py +21 -0
- kubectl_mcp_tool/__main__.py +46 -0
- kubectl_mcp_tool/auth/__init__.py +13 -0
- kubectl_mcp_tool/auth/config.py +71 -0
- kubectl_mcp_tool/auth/scopes.py +148 -0
- kubectl_mcp_tool/auth/verifier.py +82 -0
- kubectl_mcp_tool/cli/__init__.py +9 -0
- kubectl_mcp_tool/cli/__main__.py +10 -0
- kubectl_mcp_tool/cli/cli.py +111 -0
- kubectl_mcp_tool/diagnostics.py +355 -0
- kubectl_mcp_tool/k8s_config.py +289 -0
- kubectl_mcp_tool/mcp_server.py +530 -0
- kubectl_mcp_tool/prompts/__init__.py +5 -0
- kubectl_mcp_tool/prompts/prompts.py +823 -0
- kubectl_mcp_tool/resources/__init__.py +5 -0
- kubectl_mcp_tool/resources/resources.py +305 -0
- kubectl_mcp_tool/tools/__init__.py +28 -0
- kubectl_mcp_tool/tools/browser.py +371 -0
- kubectl_mcp_tool/tools/cluster.py +315 -0
- kubectl_mcp_tool/tools/core.py +421 -0
- kubectl_mcp_tool/tools/cost.py +680 -0
- kubectl_mcp_tool/tools/deployments.py +381 -0
- kubectl_mcp_tool/tools/diagnostics.py +174 -0
- kubectl_mcp_tool/tools/helm.py +1561 -0
- kubectl_mcp_tool/tools/networking.py +296 -0
- kubectl_mcp_tool/tools/operations.py +501 -0
- kubectl_mcp_tool/tools/pods.py +582 -0
- kubectl_mcp_tool/tools/security.py +333 -0
- kubectl_mcp_tool/tools/storage.py +133 -0
- kubectl_mcp_tool/utils/__init__.py +17 -0
- kubectl_mcp_tool/utils/helpers.py +80 -0
- tests/__init__.py +9 -0
- tests/conftest.py +379 -0
- tests/test_auth.py +256 -0
- tests/test_browser.py +349 -0
- tests/test_prompts.py +536 -0
- tests/test_resources.py +343 -0
- tests/test_server.py +384 -0
- tests/test_tools.py +659 -0
|
@@ -0,0 +1,582 @@
|
|
|
1
|
+
import json
|
|
2
|
+
import logging
|
|
3
|
+
import shlex
|
|
4
|
+
import subprocess
|
|
5
|
+
from typing import Any, Callable, Dict, List, Optional
|
|
6
|
+
|
|
7
|
+
from mcp.types import ToolAnnotations
|
|
8
|
+
|
|
9
|
+
logger = logging.getLogger("mcp-server")
|
|
10
|
+
|
|
11
|
+
|
|
12
|
+
def register_pod_tools(
|
|
13
|
+
server,
|
|
14
|
+
non_destructive: bool
|
|
15
|
+
):
|
|
16
|
+
"""Register all Pod-related tools with the MCP server.
|
|
17
|
+
|
|
18
|
+
Args:
|
|
19
|
+
server: FastMCP server instance
|
|
20
|
+
non_destructive: If True, block destructive operations
|
|
21
|
+
"""
|
|
22
|
+
|
|
23
|
+
@server.tool(
|
|
24
|
+
annotations=ToolAnnotations(
|
|
25
|
+
title="Get Pods",
|
|
26
|
+
readOnlyHint=True,
|
|
27
|
+
),
|
|
28
|
+
)
|
|
29
|
+
def get_pods(namespace: Optional[str] = None) -> Dict[str, Any]:
|
|
30
|
+
"""Get all pods in the specified namespace."""
|
|
31
|
+
try:
|
|
32
|
+
from kubernetes import client, config
|
|
33
|
+
config.load_kube_config()
|
|
34
|
+
v1 = client.CoreV1Api()
|
|
35
|
+
|
|
36
|
+
if namespace:
|
|
37
|
+
pods = v1.list_namespaced_pod(namespace)
|
|
38
|
+
else:
|
|
39
|
+
pods = v1.list_pod_for_all_namespaces()
|
|
40
|
+
|
|
41
|
+
return {
|
|
42
|
+
"success": True,
|
|
43
|
+
"pods": [
|
|
44
|
+
{
|
|
45
|
+
"name": pod.metadata.name,
|
|
46
|
+
"namespace": pod.metadata.namespace,
|
|
47
|
+
"status": pod.status.phase,
|
|
48
|
+
"ip": pod.status.pod_ip
|
|
49
|
+
}
|
|
50
|
+
for pod in pods.items
|
|
51
|
+
]
|
|
52
|
+
}
|
|
53
|
+
except Exception as e:
|
|
54
|
+
logger.error(f"Error getting pods: {e}")
|
|
55
|
+
return {"success": False, "error": str(e)}
|
|
56
|
+
|
|
57
|
+
@server.tool(
|
|
58
|
+
annotations=ToolAnnotations(
|
|
59
|
+
title="Get Logs",
|
|
60
|
+
readOnlyHint=True,
|
|
61
|
+
),
|
|
62
|
+
)
|
|
63
|
+
def get_logs(
|
|
64
|
+
pod_name: str,
|
|
65
|
+
namespace: Optional[str] = "default",
|
|
66
|
+
container: Optional[str] = None,
|
|
67
|
+
tail: Optional[int] = None
|
|
68
|
+
) -> Dict[str, Any]:
|
|
69
|
+
"""Get logs from a pod."""
|
|
70
|
+
try:
|
|
71
|
+
from kubernetes import client, config
|
|
72
|
+
config.load_kube_config()
|
|
73
|
+
v1 = client.CoreV1Api()
|
|
74
|
+
|
|
75
|
+
logs = v1.read_namespaced_pod_log(
|
|
76
|
+
name=pod_name,
|
|
77
|
+
namespace=namespace,
|
|
78
|
+
container=container,
|
|
79
|
+
tail_lines=tail
|
|
80
|
+
)
|
|
81
|
+
|
|
82
|
+
return {
|
|
83
|
+
"success": True,
|
|
84
|
+
"logs": logs
|
|
85
|
+
}
|
|
86
|
+
except Exception as e:
|
|
87
|
+
logger.error(f"Error getting logs: {e}")
|
|
88
|
+
return {"success": False, "error": str(e)}
|
|
89
|
+
|
|
90
|
+
@server.tool(
|
|
91
|
+
annotations=ToolAnnotations(
|
|
92
|
+
title="Get Pod Events",
|
|
93
|
+
readOnlyHint=True,
|
|
94
|
+
),
|
|
95
|
+
)
|
|
96
|
+
def get_pod_events(pod_name: str, namespace: str = "default") -> Dict[str, Any]:
|
|
97
|
+
"""Get events for a specific pod."""
|
|
98
|
+
try:
|
|
99
|
+
from kubernetes import client, config
|
|
100
|
+
config.load_kube_config()
|
|
101
|
+
v1 = client.CoreV1Api()
|
|
102
|
+
field_selector = f"involvedObject.name={pod_name}"
|
|
103
|
+
events = v1.list_namespaced_event(namespace, field_selector=field_selector)
|
|
104
|
+
return {
|
|
105
|
+
"success": True,
|
|
106
|
+
"events": [
|
|
107
|
+
{
|
|
108
|
+
"name": event.metadata.name,
|
|
109
|
+
"type": event.type,
|
|
110
|
+
"reason": event.reason,
|
|
111
|
+
"message": event.message,
|
|
112
|
+
"timestamp": event.last_timestamp.isoformat() if event.last_timestamp else None
|
|
113
|
+
} for event in events.items
|
|
114
|
+
]
|
|
115
|
+
}
|
|
116
|
+
except Exception as e:
|
|
117
|
+
logger.error(f"Error getting pod events: {e}")
|
|
118
|
+
return {"success": False, "error": str(e)}
|
|
119
|
+
|
|
120
|
+
@server.tool(
|
|
121
|
+
annotations=ToolAnnotations(
|
|
122
|
+
title="Check Pod Health",
|
|
123
|
+
readOnlyHint=True,
|
|
124
|
+
),
|
|
125
|
+
)
|
|
126
|
+
def check_pod_health(pod_name: str, namespace: str = "default") -> Dict[str, Any]:
|
|
127
|
+
"""Check the health status of a pod."""
|
|
128
|
+
try:
|
|
129
|
+
from kubernetes import client, config
|
|
130
|
+
config.load_kube_config()
|
|
131
|
+
v1 = client.CoreV1Api()
|
|
132
|
+
pod = v1.read_namespaced_pod(pod_name, namespace)
|
|
133
|
+
status = pod.status
|
|
134
|
+
return {
|
|
135
|
+
"success": True,
|
|
136
|
+
"phase": status.phase,
|
|
137
|
+
"conditions": [c.type for c in status.conditions] if status.conditions else []
|
|
138
|
+
}
|
|
139
|
+
except Exception as e:
|
|
140
|
+
logger.error(f"Error checking pod health: {e}")
|
|
141
|
+
return {"success": False, "error": str(e)}
|
|
142
|
+
|
|
143
|
+
@server.tool(
|
|
144
|
+
annotations=ToolAnnotations(
|
|
145
|
+
title="Exec in Pod",
|
|
146
|
+
destructiveHint=True,
|
|
147
|
+
),
|
|
148
|
+
)
|
|
149
|
+
def exec_in_pod(
|
|
150
|
+
pod_name: str,
|
|
151
|
+
command: str,
|
|
152
|
+
namespace: Optional[str] = "default",
|
|
153
|
+
container: Optional[str] = None
|
|
154
|
+
) -> Dict[str, Any]:
|
|
155
|
+
"""Execute a command inside a pod."""
|
|
156
|
+
try:
|
|
157
|
+
cmd = ["kubectl", "exec", pod_name, "-n", namespace]
|
|
158
|
+
if container:
|
|
159
|
+
cmd.extend(["-c", container])
|
|
160
|
+
cmd.append("--")
|
|
161
|
+
cmd.extend(shlex.split(command))
|
|
162
|
+
|
|
163
|
+
result = subprocess.run(cmd, capture_output=True, text=True, timeout=60)
|
|
164
|
+
|
|
165
|
+
return {
|
|
166
|
+
"success": result.returncode == 0,
|
|
167
|
+
"stdout": result.stdout,
|
|
168
|
+
"stderr": result.stderr,
|
|
169
|
+
"exit_code": result.returncode
|
|
170
|
+
}
|
|
171
|
+
except Exception as e:
|
|
172
|
+
logger.error(f"Error executing in pod: {e}")
|
|
173
|
+
return {"success": False, "error": str(e)}
|
|
174
|
+
|
|
175
|
+
@server.tool(
|
|
176
|
+
annotations=ToolAnnotations(
|
|
177
|
+
title="Cleanup Pods",
|
|
178
|
+
destructiveHint=True,
|
|
179
|
+
),
|
|
180
|
+
)
|
|
181
|
+
def cleanup_pods(
|
|
182
|
+
namespace: Optional[str] = None,
|
|
183
|
+
states: Optional[List[str]] = None
|
|
184
|
+
) -> Dict[str, Any]:
|
|
185
|
+
"""Clean up pods in problematic states (Evicted, Error, Completed, etc.)."""
|
|
186
|
+
if non_destructive:
|
|
187
|
+
return {"success": False, "error": "Blocked: non-destructive mode"}
|
|
188
|
+
try:
|
|
189
|
+
if states is None:
|
|
190
|
+
states = ["Evicted", "Error", "Completed", "ContainerStatusUnknown"]
|
|
191
|
+
|
|
192
|
+
ns_flag = ["-n", namespace] if namespace else ["--all-namespaces"]
|
|
193
|
+
|
|
194
|
+
deleted_pods = []
|
|
195
|
+
for state in states:
|
|
196
|
+
if state == "Evicted":
|
|
197
|
+
cmd = ["kubectl", "get", "pods"] + ns_flag + ["-o", "json"]
|
|
198
|
+
result = subprocess.run(cmd, capture_output=True, text=True, timeout=30)
|
|
199
|
+
if result.returncode == 0:
|
|
200
|
+
try:
|
|
201
|
+
pods = json.loads(result.stdout)
|
|
202
|
+
for pod in pods.get("items", []):
|
|
203
|
+
if pod.get("status", {}).get("reason") == "Evicted":
|
|
204
|
+
pod_name = pod["metadata"]["name"]
|
|
205
|
+
pod_ns = pod["metadata"]["namespace"]
|
|
206
|
+
del_cmd = ["kubectl", "delete", "pod", pod_name, "-n", pod_ns]
|
|
207
|
+
subprocess.run(del_cmd, capture_output=True, timeout=10)
|
|
208
|
+
deleted_pods.append(f"{pod_ns}/{pod_name}")
|
|
209
|
+
except json.JSONDecodeError:
|
|
210
|
+
pass
|
|
211
|
+
|
|
212
|
+
return {
|
|
213
|
+
"success": True,
|
|
214
|
+
"deleted_count": len(deleted_pods),
|
|
215
|
+
"deleted_pods": deleted_pods[:20]
|
|
216
|
+
}
|
|
217
|
+
except Exception as e:
|
|
218
|
+
logger.error(f"Error cleaning up pods: {e}")
|
|
219
|
+
return {"success": False, "error": str(e)}
|
|
220
|
+
|
|
221
|
+
@server.tool(
|
|
222
|
+
annotations=ToolAnnotations(
|
|
223
|
+
title="Get Pod Conditions Detailed",
|
|
224
|
+
readOnlyHint=True,
|
|
225
|
+
),
|
|
226
|
+
)
|
|
227
|
+
def get_pod_conditions(pod_name: str, namespace: str = "default") -> Dict[str, Any]:
|
|
228
|
+
"""Get detailed pod conditions breakdown."""
|
|
229
|
+
try:
|
|
230
|
+
from kubernetes import client, config
|
|
231
|
+
config.load_kube_config()
|
|
232
|
+
v1 = client.CoreV1Api()
|
|
233
|
+
|
|
234
|
+
pod = v1.read_namespaced_pod(pod_name, namespace)
|
|
235
|
+
|
|
236
|
+
conditions = []
|
|
237
|
+
for c in (pod.status.conditions or []):
|
|
238
|
+
conditions.append({
|
|
239
|
+
"type": c.type,
|
|
240
|
+
"status": c.status,
|
|
241
|
+
"reason": c.reason,
|
|
242
|
+
"message": c.message,
|
|
243
|
+
"lastTransitionTime": str(c.last_transition_time) if c.last_transition_time else None,
|
|
244
|
+
"lastProbeTime": str(c.last_probe_time) if c.last_probe_time else None
|
|
245
|
+
})
|
|
246
|
+
|
|
247
|
+
container_statuses = []
|
|
248
|
+
for cs in (pod.status.container_statuses or []):
|
|
249
|
+
status = {
|
|
250
|
+
"name": cs.name,
|
|
251
|
+
"ready": cs.ready,
|
|
252
|
+
"started": cs.started,
|
|
253
|
+
"restartCount": cs.restart_count,
|
|
254
|
+
"image": cs.image,
|
|
255
|
+
"containerID": cs.container_id
|
|
256
|
+
}
|
|
257
|
+
if cs.state:
|
|
258
|
+
if cs.state.running:
|
|
259
|
+
status["state"] = "running"
|
|
260
|
+
status["startedAt"] = str(cs.state.running.started_at)
|
|
261
|
+
elif cs.state.waiting:
|
|
262
|
+
status["state"] = "waiting"
|
|
263
|
+
status["waitingReason"] = cs.state.waiting.reason
|
|
264
|
+
elif cs.state.terminated:
|
|
265
|
+
status["state"] = "terminated"
|
|
266
|
+
status["terminatedReason"] = cs.state.terminated.reason
|
|
267
|
+
status["exitCode"] = cs.state.terminated.exit_code
|
|
268
|
+
container_statuses.append(status)
|
|
269
|
+
|
|
270
|
+
phase_analysis = {
|
|
271
|
+
"phase": pod.status.phase,
|
|
272
|
+
"reason": pod.status.reason,
|
|
273
|
+
"message": pod.status.message,
|
|
274
|
+
"hostIP": pod.status.host_ip,
|
|
275
|
+
"podIP": pod.status.pod_ip,
|
|
276
|
+
"startTime": str(pod.status.start_time) if pod.status.start_time else None,
|
|
277
|
+
"qosClass": pod.status.qos_class
|
|
278
|
+
}
|
|
279
|
+
|
|
280
|
+
return {
|
|
281
|
+
"success": True,
|
|
282
|
+
"pod": pod_name,
|
|
283
|
+
"namespace": namespace,
|
|
284
|
+
"phaseAnalysis": phase_analysis,
|
|
285
|
+
"conditions": conditions,
|
|
286
|
+
"containerStatuses": container_statuses
|
|
287
|
+
}
|
|
288
|
+
except Exception as e:
|
|
289
|
+
logger.error(f"Error getting pod conditions: {e}")
|
|
290
|
+
return {"success": False, "error": str(e)}
|
|
291
|
+
|
|
292
|
+
@server.tool(
|
|
293
|
+
annotations=ToolAnnotations(
|
|
294
|
+
title="Get Container Logs Previous",
|
|
295
|
+
readOnlyHint=True,
|
|
296
|
+
),
|
|
297
|
+
)
|
|
298
|
+
def get_previous_logs(
|
|
299
|
+
pod_name: str,
|
|
300
|
+
namespace: str = "default",
|
|
301
|
+
container: Optional[str] = None,
|
|
302
|
+
tail: int = 100
|
|
303
|
+
) -> Dict[str, Any]:
|
|
304
|
+
"""Get logs from the previous container instance (useful for crash debugging)."""
|
|
305
|
+
try:
|
|
306
|
+
cmd = ["kubectl", "logs", pod_name, "-n", namespace, "--previous", f"--tail={tail}"]
|
|
307
|
+
if container:
|
|
308
|
+
cmd.extend(["-c", container])
|
|
309
|
+
|
|
310
|
+
result = subprocess.run(cmd, capture_output=True, text=True, timeout=60)
|
|
311
|
+
|
|
312
|
+
if result.returncode != 0:
|
|
313
|
+
if "previous terminated container" in result.stderr.lower():
|
|
314
|
+
return {"success": False, "error": "No previous container instance found (container hasn't crashed)"}
|
|
315
|
+
return {"success": False, "error": result.stderr.strip()}
|
|
316
|
+
|
|
317
|
+
return {
|
|
318
|
+
"success": True,
|
|
319
|
+
"pod": pod_name,
|
|
320
|
+
"namespace": namespace,
|
|
321
|
+
"container": container,
|
|
322
|
+
"logs": result.stdout,
|
|
323
|
+
"lineCount": len(result.stdout.split("\n"))
|
|
324
|
+
}
|
|
325
|
+
except subprocess.TimeoutExpired:
|
|
326
|
+
return {"success": False, "error": "Log retrieval timed out"}
|
|
327
|
+
except Exception as e:
|
|
328
|
+
logger.error(f"Error getting previous logs: {e}")
|
|
329
|
+
return {"success": False, "error": str(e)}
|
|
330
|
+
|
|
331
|
+
@server.tool(
|
|
332
|
+
annotations=ToolAnnotations(
|
|
333
|
+
title="Diagnose Pod Crash",
|
|
334
|
+
readOnlyHint=True,
|
|
335
|
+
),
|
|
336
|
+
)
|
|
337
|
+
def diagnose_pod_crash(pod_name: str, namespace: str = "default") -> Dict[str, Any]:
|
|
338
|
+
"""Automated diagnosis of pod crash loops and failures."""
|
|
339
|
+
try:
|
|
340
|
+
from kubernetes import client, config
|
|
341
|
+
config.load_kube_config()
|
|
342
|
+
v1 = client.CoreV1Api()
|
|
343
|
+
|
|
344
|
+
pod = v1.read_namespaced_pod(pod_name, namespace)
|
|
345
|
+
|
|
346
|
+
diagnosis = {
|
|
347
|
+
"pod": pod_name,
|
|
348
|
+
"namespace": namespace,
|
|
349
|
+
"phase": pod.status.phase,
|
|
350
|
+
"issues": [],
|
|
351
|
+
"recommendations": [],
|
|
352
|
+
"containerStatuses": [],
|
|
353
|
+
"events": []
|
|
354
|
+
}
|
|
355
|
+
|
|
356
|
+
for cs in (pod.status.container_statuses or []):
|
|
357
|
+
container_info = {
|
|
358
|
+
"name": cs.name,
|
|
359
|
+
"ready": cs.ready,
|
|
360
|
+
"restartCount": cs.restart_count,
|
|
361
|
+
"state": None,
|
|
362
|
+
"lastState": None
|
|
363
|
+
}
|
|
364
|
+
|
|
365
|
+
if cs.state:
|
|
366
|
+
if cs.state.waiting:
|
|
367
|
+
container_info["state"] = {
|
|
368
|
+
"status": "waiting",
|
|
369
|
+
"reason": cs.state.waiting.reason,
|
|
370
|
+
"message": cs.state.waiting.message
|
|
371
|
+
}
|
|
372
|
+
if cs.state.waiting.reason == "CrashLoopBackOff":
|
|
373
|
+
diagnosis["issues"].append({
|
|
374
|
+
"container": cs.name,
|
|
375
|
+
"issue": "CrashLoopBackOff",
|
|
376
|
+
"severity": "critical",
|
|
377
|
+
"description": "Container is crashing repeatedly"
|
|
378
|
+
})
|
|
379
|
+
diagnosis["recommendations"].append("Check container logs for error messages")
|
|
380
|
+
diagnosis["recommendations"].append("Verify the container command and args are correct")
|
|
381
|
+
elif cs.state.waiting.reason == "ImagePullBackOff":
|
|
382
|
+
diagnosis["issues"].append({
|
|
383
|
+
"container": cs.name,
|
|
384
|
+
"issue": "ImagePullBackOff",
|
|
385
|
+
"severity": "critical",
|
|
386
|
+
"description": "Unable to pull container image"
|
|
387
|
+
})
|
|
388
|
+
diagnosis["recommendations"].append("Verify the image name and tag exist")
|
|
389
|
+
diagnosis["recommendations"].append("Check imagePullSecrets if using private registry")
|
|
390
|
+
elif cs.state.waiting.reason == "CreateContainerConfigError":
|
|
391
|
+
diagnosis["issues"].append({
|
|
392
|
+
"container": cs.name,
|
|
393
|
+
"issue": "CreateContainerConfigError",
|
|
394
|
+
"severity": "critical",
|
|
395
|
+
"description": "Container configuration error"
|
|
396
|
+
})
|
|
397
|
+
diagnosis["recommendations"].append("Check ConfigMaps and Secrets referenced by the container")
|
|
398
|
+
elif cs.state.running:
|
|
399
|
+
container_info["state"] = {"status": "running", "startedAt": str(cs.state.running.started_at)}
|
|
400
|
+
elif cs.state.terminated:
|
|
401
|
+
container_info["state"] = {
|
|
402
|
+
"status": "terminated",
|
|
403
|
+
"exitCode": cs.state.terminated.exit_code,
|
|
404
|
+
"reason": cs.state.terminated.reason,
|
|
405
|
+
"message": cs.state.terminated.message
|
|
406
|
+
}
|
|
407
|
+
if cs.state.terminated.exit_code != 0:
|
|
408
|
+
diagnosis["issues"].append({
|
|
409
|
+
"container": cs.name,
|
|
410
|
+
"issue": f"Exited with code {cs.state.terminated.exit_code}",
|
|
411
|
+
"severity": "error",
|
|
412
|
+
"reason": cs.state.terminated.reason
|
|
413
|
+
})
|
|
414
|
+
if cs.state.terminated.reason == "OOMKilled":
|
|
415
|
+
diagnosis["recommendations"].append(f"Increase memory limit for container '{cs.name}'")
|
|
416
|
+
elif cs.state.terminated.reason == "Error":
|
|
417
|
+
diagnosis["recommendations"].append(f"Check logs for container '{cs.name}' to identify the error")
|
|
418
|
+
|
|
419
|
+
if cs.last_state and cs.last_state.terminated:
|
|
420
|
+
container_info["lastState"] = {
|
|
421
|
+
"status": "terminated",
|
|
422
|
+
"exitCode": cs.last_state.terminated.exit_code,
|
|
423
|
+
"reason": cs.last_state.terminated.reason,
|
|
424
|
+
"finishedAt": str(cs.last_state.terminated.finished_at)
|
|
425
|
+
}
|
|
426
|
+
|
|
427
|
+
diagnosis["containerStatuses"].append(container_info)
|
|
428
|
+
|
|
429
|
+
events = v1.list_namespaced_event(namespace, field_selector=f"involvedObject.name={pod_name}")
|
|
430
|
+
for event in events.items:
|
|
431
|
+
if event.type == "Warning":
|
|
432
|
+
diagnosis["events"].append({
|
|
433
|
+
"type": event.type,
|
|
434
|
+
"reason": event.reason,
|
|
435
|
+
"message": event.message,
|
|
436
|
+
"count": event.count
|
|
437
|
+
})
|
|
438
|
+
|
|
439
|
+
return {"success": True, "diagnosis": diagnosis}
|
|
440
|
+
except Exception as e:
|
|
441
|
+
logger.error(f"Error diagnosing pod crash: {e}")
|
|
442
|
+
return {"success": False, "error": str(e)}
|
|
443
|
+
|
|
444
|
+
@server.tool(
|
|
445
|
+
annotations=ToolAnnotations(
|
|
446
|
+
title="Detect Pending Pods",
|
|
447
|
+
readOnlyHint=True,
|
|
448
|
+
),
|
|
449
|
+
)
|
|
450
|
+
def detect_pending_pods(namespace: Optional[str] = None) -> Dict[str, Any]:
|
|
451
|
+
"""Find pending pods and explain why they are not scheduled."""
|
|
452
|
+
try:
|
|
453
|
+
from kubernetes import client, config
|
|
454
|
+
config.load_kube_config()
|
|
455
|
+
v1 = client.CoreV1Api()
|
|
456
|
+
|
|
457
|
+
if namespace:
|
|
458
|
+
pods = v1.list_namespaced_pod(namespace, field_selector="status.phase=Pending")
|
|
459
|
+
else:
|
|
460
|
+
pods = v1.list_pod_for_all_namespaces(field_selector="status.phase=Pending")
|
|
461
|
+
|
|
462
|
+
pending_pods = []
|
|
463
|
+
for pod in pods.items:
|
|
464
|
+
pod_info = {
|
|
465
|
+
"name": pod.metadata.name,
|
|
466
|
+
"namespace": pod.metadata.namespace,
|
|
467
|
+
"createdAt": str(pod.metadata.creation_timestamp),
|
|
468
|
+
"reasons": [],
|
|
469
|
+
"events": []
|
|
470
|
+
}
|
|
471
|
+
|
|
472
|
+
for condition in (pod.status.conditions or []):
|
|
473
|
+
if condition.type == "PodScheduled" and condition.status == "False":
|
|
474
|
+
pod_info["reasons"].append({
|
|
475
|
+
"type": "SchedulingFailed",
|
|
476
|
+
"reason": condition.reason,
|
|
477
|
+
"message": condition.message
|
|
478
|
+
})
|
|
479
|
+
|
|
480
|
+
events = v1.list_namespaced_event(
|
|
481
|
+
pod.metadata.namespace,
|
|
482
|
+
field_selector=f"involvedObject.name={pod.metadata.name}"
|
|
483
|
+
)
|
|
484
|
+
for event in events.items:
|
|
485
|
+
if event.reason in ["FailedScheduling", "FailedAttachVolume", "FailedMount"]:
|
|
486
|
+
pod_info["events"].append({
|
|
487
|
+
"reason": event.reason,
|
|
488
|
+
"message": event.message,
|
|
489
|
+
"count": event.count
|
|
490
|
+
})
|
|
491
|
+
msg = event.message or ""
|
|
492
|
+
if "Insufficient cpu" in msg:
|
|
493
|
+
pod_info["reasons"].append({
|
|
494
|
+
"type": "InsufficientCPU",
|
|
495
|
+
"message": "Not enough CPU available on any node"
|
|
496
|
+
})
|
|
497
|
+
elif "Insufficient memory" in msg:
|
|
498
|
+
pod_info["reasons"].append({
|
|
499
|
+
"type": "InsufficientMemory",
|
|
500
|
+
"message": "Not enough memory available on any node"
|
|
501
|
+
})
|
|
502
|
+
elif "node(s) didn't match node selector" in msg:
|
|
503
|
+
pod_info["reasons"].append({
|
|
504
|
+
"type": "NodeSelectorMismatch",
|
|
505
|
+
"message": "No nodes match the pod's nodeSelector"
|
|
506
|
+
})
|
|
507
|
+
elif "PersistentVolumeClaim" in msg:
|
|
508
|
+
pod_info["reasons"].append({
|
|
509
|
+
"type": "PVCPending",
|
|
510
|
+
"message": "PersistentVolumeClaim is not bound"
|
|
511
|
+
})
|
|
512
|
+
|
|
513
|
+
pending_pods.append(pod_info)
|
|
514
|
+
|
|
515
|
+
return {
|
|
516
|
+
"success": True,
|
|
517
|
+
"pendingCount": len(pending_pods),
|
|
518
|
+
"pendingPods": pending_pods
|
|
519
|
+
}
|
|
520
|
+
except Exception as e:
|
|
521
|
+
logger.error(f"Error detecting pending pods: {e}")
|
|
522
|
+
return {"success": False, "error": str(e)}
|
|
523
|
+
|
|
524
|
+
@server.tool(
|
|
525
|
+
annotations=ToolAnnotations(
|
|
526
|
+
title="Get Evicted Pods",
|
|
527
|
+
readOnlyHint=True,
|
|
528
|
+
),
|
|
529
|
+
)
|
|
530
|
+
def get_evicted_pods(namespace: Optional[str] = None) -> Dict[str, Any]:
|
|
531
|
+
"""Find evicted pods with their eviction reasons."""
|
|
532
|
+
try:
|
|
533
|
+
from kubernetes import client, config
|
|
534
|
+
config.load_kube_config()
|
|
535
|
+
v1 = client.CoreV1Api()
|
|
536
|
+
|
|
537
|
+
if namespace:
|
|
538
|
+
pods = v1.list_namespaced_pod(namespace)
|
|
539
|
+
else:
|
|
540
|
+
pods = v1.list_pod_for_all_namespaces()
|
|
541
|
+
|
|
542
|
+
evicted = []
|
|
543
|
+
for pod in pods.items:
|
|
544
|
+
if pod.status.phase == "Failed" and pod.status.reason == "Evicted":
|
|
545
|
+
evicted.append({
|
|
546
|
+
"name": pod.metadata.name,
|
|
547
|
+
"namespace": pod.metadata.namespace,
|
|
548
|
+
"reason": pod.status.reason,
|
|
549
|
+
"message": pod.status.message,
|
|
550
|
+
"nodeName": pod.spec.node_name,
|
|
551
|
+
"evictedAt": str(pod.status.start_time) if pod.status.start_time else None
|
|
552
|
+
})
|
|
553
|
+
|
|
554
|
+
by_reason = {}
|
|
555
|
+
for pod in evicted:
|
|
556
|
+
msg = pod.get("message", "Unknown")
|
|
557
|
+
if "ephemeral-storage" in msg.lower():
|
|
558
|
+
reason = "DiskPressure"
|
|
559
|
+
elif "memory" in msg.lower():
|
|
560
|
+
reason = "MemoryPressure"
|
|
561
|
+
else:
|
|
562
|
+
reason = "Other"
|
|
563
|
+
|
|
564
|
+
if reason not in by_reason:
|
|
565
|
+
by_reason[reason] = []
|
|
566
|
+
by_reason[reason].append(pod["name"])
|
|
567
|
+
|
|
568
|
+
return {
|
|
569
|
+
"success": True,
|
|
570
|
+
"summary": {
|
|
571
|
+
"totalEvicted": len(evicted),
|
|
572
|
+
"byReason": {k: len(v) for k, v in by_reason.items()}
|
|
573
|
+
},
|
|
574
|
+
"evictedPods": evicted,
|
|
575
|
+
"recommendations": [
|
|
576
|
+
"DiskPressure: Clean up disk space or increase ephemeral-storage limits" if "DiskPressure" in by_reason else None,
|
|
577
|
+
"MemoryPressure: Increase memory limits or add more nodes" if "MemoryPressure" in by_reason else None
|
|
578
|
+
]
|
|
579
|
+
}
|
|
580
|
+
except Exception as e:
|
|
581
|
+
logger.error(f"Error getting evicted pods: {e}")
|
|
582
|
+
return {"success": False, "error": str(e)}
|