kubectl-mcp-server 1.12.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- kubectl_mcp_server-1.12.0.dist-info/METADATA +711 -0
- kubectl_mcp_server-1.12.0.dist-info/RECORD +45 -0
- kubectl_mcp_server-1.12.0.dist-info/WHEEL +5 -0
- kubectl_mcp_server-1.12.0.dist-info/entry_points.txt +3 -0
- kubectl_mcp_server-1.12.0.dist-info/licenses/LICENSE +21 -0
- kubectl_mcp_server-1.12.0.dist-info/top_level.txt +2 -0
- kubectl_mcp_tool/__init__.py +21 -0
- kubectl_mcp_tool/__main__.py +46 -0
- kubectl_mcp_tool/auth/__init__.py +13 -0
- kubectl_mcp_tool/auth/config.py +71 -0
- kubectl_mcp_tool/auth/scopes.py +148 -0
- kubectl_mcp_tool/auth/verifier.py +82 -0
- kubectl_mcp_tool/cli/__init__.py +9 -0
- kubectl_mcp_tool/cli/__main__.py +10 -0
- kubectl_mcp_tool/cli/cli.py +111 -0
- kubectl_mcp_tool/diagnostics.py +355 -0
- kubectl_mcp_tool/k8s_config.py +289 -0
- kubectl_mcp_tool/mcp_server.py +530 -0
- kubectl_mcp_tool/prompts/__init__.py +5 -0
- kubectl_mcp_tool/prompts/prompts.py +823 -0
- kubectl_mcp_tool/resources/__init__.py +5 -0
- kubectl_mcp_tool/resources/resources.py +305 -0
- kubectl_mcp_tool/tools/__init__.py +28 -0
- kubectl_mcp_tool/tools/browser.py +371 -0
- kubectl_mcp_tool/tools/cluster.py +315 -0
- kubectl_mcp_tool/tools/core.py +421 -0
- kubectl_mcp_tool/tools/cost.py +680 -0
- kubectl_mcp_tool/tools/deployments.py +381 -0
- kubectl_mcp_tool/tools/diagnostics.py +174 -0
- kubectl_mcp_tool/tools/helm.py +1561 -0
- kubectl_mcp_tool/tools/networking.py +296 -0
- kubectl_mcp_tool/tools/operations.py +501 -0
- kubectl_mcp_tool/tools/pods.py +582 -0
- kubectl_mcp_tool/tools/security.py +333 -0
- kubectl_mcp_tool/tools/storage.py +133 -0
- kubectl_mcp_tool/utils/__init__.py +17 -0
- kubectl_mcp_tool/utils/helpers.py +80 -0
- tests/__init__.py +9 -0
- tests/conftest.py +379 -0
- tests/test_auth.py +256 -0
- tests/test_browser.py +349 -0
- tests/test_prompts.py +536 -0
- tests/test_resources.py +343 -0
- tests/test_server.py +384 -0
- tests/test_tools.py +659 -0
|
@@ -0,0 +1,680 @@
|
|
|
1
|
+
import logging
|
|
2
|
+
import subprocess
|
|
3
|
+
import re
|
|
4
|
+
from datetime import datetime
|
|
5
|
+
from typing import Any, Dict, Optional
|
|
6
|
+
|
|
7
|
+
from mcp.types import ToolAnnotations
|
|
8
|
+
|
|
9
|
+
logger = logging.getLogger("mcp-server")
|
|
10
|
+
|
|
11
|
+
|
|
12
|
+
def _parse_cpu(cpu_str: str) -> int:
|
|
13
|
+
"""Parse CPU string to millicores."""
|
|
14
|
+
try:
|
|
15
|
+
cpu_str = str(cpu_str)
|
|
16
|
+
if cpu_str.endswith("m"):
|
|
17
|
+
return int(cpu_str[:-1])
|
|
18
|
+
elif cpu_str.endswith("n"):
|
|
19
|
+
return int(cpu_str[:-1]) // 1000000
|
|
20
|
+
else:
|
|
21
|
+
return int(float(cpu_str) * 1000)
|
|
22
|
+
except:
|
|
23
|
+
return 0
|
|
24
|
+
|
|
25
|
+
|
|
26
|
+
def _parse_memory(mem_str: str) -> int:
|
|
27
|
+
"""Parse memory string to bytes."""
|
|
28
|
+
try:
|
|
29
|
+
mem_str = str(mem_str)
|
|
30
|
+
if mem_str.endswith("Ki"):
|
|
31
|
+
return int(mem_str[:-2]) * 1024
|
|
32
|
+
elif mem_str.endswith("Mi"):
|
|
33
|
+
return int(mem_str[:-2]) * 1024 * 1024
|
|
34
|
+
elif mem_str.endswith("Gi"):
|
|
35
|
+
return int(mem_str[:-2]) * 1024 * 1024 * 1024
|
|
36
|
+
elif mem_str.endswith("K"):
|
|
37
|
+
return int(mem_str[:-1]) * 1000
|
|
38
|
+
elif mem_str.endswith("M"):
|
|
39
|
+
return int(mem_str[:-1]) * 1000000
|
|
40
|
+
elif mem_str.endswith("G"):
|
|
41
|
+
return int(mem_str[:-1]) * 1000000000
|
|
42
|
+
else:
|
|
43
|
+
return int(mem_str)
|
|
44
|
+
except:
|
|
45
|
+
return 0
|
|
46
|
+
|
|
47
|
+
|
|
48
|
+
def _calculate_available(hard: str, used: str) -> str:
|
|
49
|
+
"""Calculate available resources from hard and used values."""
|
|
50
|
+
try:
|
|
51
|
+
hard_num = int(re.sub(r'[^\d]', '', str(hard)) or 0)
|
|
52
|
+
used_num = int(re.sub(r'[^\d]', '', str(used)) or 0)
|
|
53
|
+
suffix = re.sub(r'[\d]', '', str(hard))
|
|
54
|
+
return f"{max(0, hard_num - used_num)}{suffix}"
|
|
55
|
+
except:
|
|
56
|
+
return "N/A"
|
|
57
|
+
|
|
58
|
+
|
|
59
|
+
def register_cost_tools(server, non_destructive: bool):
|
|
60
|
+
"""Register cost and resource optimization tools."""
|
|
61
|
+
|
|
62
|
+
@server.tool(
|
|
63
|
+
annotations=ToolAnnotations(
|
|
64
|
+
title="Get Resource Recommendations",
|
|
65
|
+
readOnlyHint=True,
|
|
66
|
+
),
|
|
67
|
+
)
|
|
68
|
+
def get_resource_recommendations(
|
|
69
|
+
namespace: Optional[str] = None,
|
|
70
|
+
resource_type: str = "all"
|
|
71
|
+
) -> Dict[str, Any]:
|
|
72
|
+
"""Analyze resource usage and provide optimization recommendations for pods/deployments."""
|
|
73
|
+
try:
|
|
74
|
+
from kubernetes import client, config
|
|
75
|
+
config.load_kube_config()
|
|
76
|
+
v1 = client.CoreV1Api()
|
|
77
|
+
|
|
78
|
+
recommendations = []
|
|
79
|
+
|
|
80
|
+
if namespace:
|
|
81
|
+
pods = v1.list_namespaced_pod(namespace).items
|
|
82
|
+
else:
|
|
83
|
+
pods = v1.list_pod_for_all_namespaces().items
|
|
84
|
+
|
|
85
|
+
for pod in pods:
|
|
86
|
+
if pod.status.phase != "Running":
|
|
87
|
+
continue
|
|
88
|
+
|
|
89
|
+
for container in pod.spec.containers:
|
|
90
|
+
issues = []
|
|
91
|
+
suggestions = []
|
|
92
|
+
|
|
93
|
+
resources = container.resources or client.V1ResourceRequirements()
|
|
94
|
+
requests = resources.requests or {}
|
|
95
|
+
limits = resources.limits or {}
|
|
96
|
+
|
|
97
|
+
if not requests:
|
|
98
|
+
issues.append("No resource requests defined")
|
|
99
|
+
suggestions.append("Set CPU/memory requests for better scheduling")
|
|
100
|
+
|
|
101
|
+
if not limits:
|
|
102
|
+
issues.append("No resource limits defined")
|
|
103
|
+
suggestions.append("Set CPU/memory limits to prevent resource exhaustion")
|
|
104
|
+
|
|
105
|
+
if requests and limits:
|
|
106
|
+
cpu_req = requests.get("cpu", "0")
|
|
107
|
+
cpu_lim = limits.get("cpu", "0")
|
|
108
|
+
mem_req = requests.get("memory", "0")
|
|
109
|
+
mem_lim = limits.get("memory", "0")
|
|
110
|
+
|
|
111
|
+
if cpu_req == cpu_lim and mem_req == mem_lim:
|
|
112
|
+
issues.append("Requests equal limits (Guaranteed QoS)")
|
|
113
|
+
suggestions.append("Consider Burstable QoS for non-critical workloads")
|
|
114
|
+
|
|
115
|
+
if not container.liveness_probe:
|
|
116
|
+
issues.append("No liveness probe")
|
|
117
|
+
suggestions.append("Add liveness probe for automatic recovery")
|
|
118
|
+
|
|
119
|
+
if not container.readiness_probe:
|
|
120
|
+
issues.append("No readiness probe")
|
|
121
|
+
suggestions.append("Add readiness probe for traffic management")
|
|
122
|
+
|
|
123
|
+
if issues:
|
|
124
|
+
recommendations.append({
|
|
125
|
+
"pod": pod.metadata.name,
|
|
126
|
+
"namespace": pod.metadata.namespace,
|
|
127
|
+
"container": container.name,
|
|
128
|
+
"issues": issues,
|
|
129
|
+
"suggestions": suggestions,
|
|
130
|
+
"currentResources": {
|
|
131
|
+
"requests": requests,
|
|
132
|
+
"limits": limits
|
|
133
|
+
}
|
|
134
|
+
})
|
|
135
|
+
|
|
136
|
+
return {
|
|
137
|
+
"success": True,
|
|
138
|
+
"totalAnalyzed": len(pods),
|
|
139
|
+
"issuesFound": len(recommendations),
|
|
140
|
+
"recommendations": recommendations[:50]
|
|
141
|
+
}
|
|
142
|
+
except Exception as e:
|
|
143
|
+
logger.error(f"Error getting resource recommendations: {e}")
|
|
144
|
+
return {"success": False, "error": str(e)}
|
|
145
|
+
|
|
146
|
+
@server.tool(
|
|
147
|
+
annotations=ToolAnnotations(
|
|
148
|
+
title="Get Idle Resources",
|
|
149
|
+
readOnlyHint=True,
|
|
150
|
+
),
|
|
151
|
+
)
|
|
152
|
+
def get_idle_resources(
|
|
153
|
+
namespace: Optional[str] = None,
|
|
154
|
+
cpu_threshold: float = 10.0,
|
|
155
|
+
memory_threshold: float = 10.0
|
|
156
|
+
) -> Dict[str, Any]:
|
|
157
|
+
"""Find underutilized pods using less than threshold percentage of requested resources."""
|
|
158
|
+
try:
|
|
159
|
+
cmd = ["kubectl", "top", "pods", "--no-headers"]
|
|
160
|
+
if namespace:
|
|
161
|
+
cmd.extend(["-n", namespace])
|
|
162
|
+
else:
|
|
163
|
+
cmd.append("-A")
|
|
164
|
+
|
|
165
|
+
result = subprocess.run(cmd, capture_output=True, text=True, timeout=60)
|
|
166
|
+
if result.returncode != 0:
|
|
167
|
+
return {"success": False, "error": result.stderr.strip() or "Metrics server not available"}
|
|
168
|
+
|
|
169
|
+
idle_pods = []
|
|
170
|
+
lines = result.stdout.strip().split("\n")
|
|
171
|
+
|
|
172
|
+
for line in lines:
|
|
173
|
+
if not line.strip():
|
|
174
|
+
continue
|
|
175
|
+
parts = line.split()
|
|
176
|
+
if len(parts) >= 3:
|
|
177
|
+
if namespace:
|
|
178
|
+
pod_name, cpu_usage, mem_usage = parts[0], parts[1], parts[2]
|
|
179
|
+
ns = namespace
|
|
180
|
+
elif len(parts) >= 4:
|
|
181
|
+
ns, pod_name, cpu_usage, mem_usage = parts[0], parts[1], parts[2], parts[3]
|
|
182
|
+
else:
|
|
183
|
+
continue
|
|
184
|
+
|
|
185
|
+
cpu_val = int(re.sub(r'[^\d]', '', cpu_usage) or 0)
|
|
186
|
+
mem_val = int(re.sub(r'[^\d]', '', mem_usage) or 0)
|
|
187
|
+
|
|
188
|
+
if cpu_val < cpu_threshold or mem_val < memory_threshold:
|
|
189
|
+
idle_pods.append({
|
|
190
|
+
"namespace": ns,
|
|
191
|
+
"pod": pod_name,
|
|
192
|
+
"cpuUsage": cpu_usage,
|
|
193
|
+
"memoryUsage": mem_usage,
|
|
194
|
+
"recommendation": "Consider scaling down or consolidating"
|
|
195
|
+
})
|
|
196
|
+
|
|
197
|
+
return {
|
|
198
|
+
"success": True,
|
|
199
|
+
"thresholds": {
|
|
200
|
+
"cpu": f"{cpu_threshold}%",
|
|
201
|
+
"memory": f"{memory_threshold}%"
|
|
202
|
+
},
|
|
203
|
+
"idleCount": len(idle_pods),
|
|
204
|
+
"idlePods": idle_pods[:50]
|
|
205
|
+
}
|
|
206
|
+
except subprocess.TimeoutExpired:
|
|
207
|
+
return {"success": False, "error": "Metrics retrieval timed out"}
|
|
208
|
+
except Exception as e:
|
|
209
|
+
logger.error(f"Error finding idle resources: {e}")
|
|
210
|
+
return {"success": False, "error": str(e)}
|
|
211
|
+
|
|
212
|
+
@server.tool(
|
|
213
|
+
annotations=ToolAnnotations(
|
|
214
|
+
title="Get Resource Quotas Usage",
|
|
215
|
+
readOnlyHint=True,
|
|
216
|
+
),
|
|
217
|
+
)
|
|
218
|
+
def get_resource_quotas_usage(namespace: Optional[str] = None) -> Dict[str, Any]:
|
|
219
|
+
"""Show resource quota usage and availability across namespaces."""
|
|
220
|
+
try:
|
|
221
|
+
from kubernetes import client, config
|
|
222
|
+
config.load_kube_config()
|
|
223
|
+
v1 = client.CoreV1Api()
|
|
224
|
+
|
|
225
|
+
if namespace:
|
|
226
|
+
quotas = v1.list_namespaced_resource_quota(namespace).items
|
|
227
|
+
else:
|
|
228
|
+
quotas = v1.list_resource_quota_for_all_namespaces().items
|
|
229
|
+
|
|
230
|
+
quota_usage = []
|
|
231
|
+
for quota in quotas:
|
|
232
|
+
hard = quota.status.hard or {}
|
|
233
|
+
used = quota.status.used or {}
|
|
234
|
+
|
|
235
|
+
resources = []
|
|
236
|
+
for resource_name, hard_val in hard.items():
|
|
237
|
+
used_val = used.get(resource_name, "0")
|
|
238
|
+
resources.append({
|
|
239
|
+
"resource": resource_name,
|
|
240
|
+
"hard": hard_val,
|
|
241
|
+
"used": used_val,
|
|
242
|
+
"available": _calculate_available(hard_val, used_val)
|
|
243
|
+
})
|
|
244
|
+
|
|
245
|
+
quota_usage.append({
|
|
246
|
+
"name": quota.metadata.name,
|
|
247
|
+
"namespace": quota.metadata.namespace,
|
|
248
|
+
"resources": resources
|
|
249
|
+
})
|
|
250
|
+
|
|
251
|
+
return {
|
|
252
|
+
"success": True,
|
|
253
|
+
"count": len(quota_usage),
|
|
254
|
+
"quotas": quota_usage
|
|
255
|
+
}
|
|
256
|
+
except Exception as e:
|
|
257
|
+
logger.error(f"Error getting quota usage: {e}")
|
|
258
|
+
return {"success": False, "error": str(e)}
|
|
259
|
+
|
|
260
|
+
@server.tool(
|
|
261
|
+
annotations=ToolAnnotations(
|
|
262
|
+
title="Get Cost Analysis",
|
|
263
|
+
readOnlyHint=True,
|
|
264
|
+
),
|
|
265
|
+
)
|
|
266
|
+
def get_cost_analysis(namespace: Optional[str] = None) -> Dict[str, Any]:
|
|
267
|
+
"""Analyze resource costs by namespace and workload based on resource requests."""
|
|
268
|
+
try:
|
|
269
|
+
from kubernetes import client, config
|
|
270
|
+
config.load_kube_config()
|
|
271
|
+
v1 = client.CoreV1Api()
|
|
272
|
+
|
|
273
|
+
if namespace:
|
|
274
|
+
pods = v1.list_namespaced_pod(namespace).items
|
|
275
|
+
else:
|
|
276
|
+
pods = v1.list_pod_for_all_namespaces().items
|
|
277
|
+
|
|
278
|
+
namespace_costs = {}
|
|
279
|
+
workload_costs = []
|
|
280
|
+
|
|
281
|
+
for pod in pods:
|
|
282
|
+
if pod.status.phase != "Running":
|
|
283
|
+
continue
|
|
284
|
+
|
|
285
|
+
ns = pod.metadata.namespace
|
|
286
|
+
if ns not in namespace_costs:
|
|
287
|
+
namespace_costs[ns] = {"cpu": 0, "memory": 0, "pods": 0}
|
|
288
|
+
|
|
289
|
+
pod_cpu = 0
|
|
290
|
+
pod_memory = 0
|
|
291
|
+
|
|
292
|
+
for container in pod.spec.containers:
|
|
293
|
+
if container.resources and container.resources.requests:
|
|
294
|
+
cpu = container.resources.requests.get("cpu", "0")
|
|
295
|
+
memory = container.resources.requests.get("memory", "0")
|
|
296
|
+
pod_cpu += _parse_cpu(cpu)
|
|
297
|
+
pod_memory += _parse_memory(memory)
|
|
298
|
+
|
|
299
|
+
namespace_costs[ns]["cpu"] += pod_cpu
|
|
300
|
+
namespace_costs[ns]["memory"] += pod_memory
|
|
301
|
+
namespace_costs[ns]["pods"] += 1
|
|
302
|
+
|
|
303
|
+
owner_kind = "standalone"
|
|
304
|
+
if pod.metadata.owner_references:
|
|
305
|
+
owner_kind = pod.metadata.owner_references[0].kind
|
|
306
|
+
|
|
307
|
+
workload_costs.append({
|
|
308
|
+
"namespace": ns,
|
|
309
|
+
"pod": pod.metadata.name,
|
|
310
|
+
"ownerKind": owner_kind,
|
|
311
|
+
"cpuMillicores": pod_cpu,
|
|
312
|
+
"memoryMi": round(pod_memory / (1024 * 1024), 2)
|
|
313
|
+
})
|
|
314
|
+
|
|
315
|
+
ns_summary = []
|
|
316
|
+
for ns, costs in namespace_costs.items():
|
|
317
|
+
ns_summary.append({
|
|
318
|
+
"namespace": ns,
|
|
319
|
+
"totalCpuMillicores": costs["cpu"],
|
|
320
|
+
"totalMemoryMi": round(costs["memory"] / (1024 * 1024), 2),
|
|
321
|
+
"podCount": costs["pods"]
|
|
322
|
+
})
|
|
323
|
+
|
|
324
|
+
ns_summary.sort(key=lambda x: x["totalCpuMillicores"], reverse=True)
|
|
325
|
+
|
|
326
|
+
return {
|
|
327
|
+
"success": True,
|
|
328
|
+
"note": "Cost estimates based on resource requests. Integrate with cloud billing for actual costs.",
|
|
329
|
+
"byNamespace": ns_summary,
|
|
330
|
+
"topWorkloads": sorted(workload_costs, key=lambda x: x["cpuMillicores"], reverse=True)[:20]
|
|
331
|
+
}
|
|
332
|
+
except Exception as e:
|
|
333
|
+
logger.error(f"Error analyzing costs: {e}")
|
|
334
|
+
return {"success": False, "error": str(e)}
|
|
335
|
+
|
|
336
|
+
@server.tool(
|
|
337
|
+
annotations=ToolAnnotations(
|
|
338
|
+
title="Get Overprovisioned Resources",
|
|
339
|
+
readOnlyHint=True,
|
|
340
|
+
),
|
|
341
|
+
)
|
|
342
|
+
def get_overprovisioned_resources(
|
|
343
|
+
namespace: Optional[str] = None,
|
|
344
|
+
threshold: float = 50.0
|
|
345
|
+
) -> Dict[str, Any]:
|
|
346
|
+
"""Find pods using significantly less resources than requested (over-provisioned)."""
|
|
347
|
+
try:
|
|
348
|
+
from kubernetes import client, config
|
|
349
|
+
config.load_kube_config()
|
|
350
|
+
v1 = client.CoreV1Api()
|
|
351
|
+
|
|
352
|
+
cmd = ["kubectl", "top", "pods", "--no-headers"]
|
|
353
|
+
if namespace:
|
|
354
|
+
cmd.extend(["-n", namespace])
|
|
355
|
+
else:
|
|
356
|
+
cmd.append("-A")
|
|
357
|
+
|
|
358
|
+
result = subprocess.run(cmd, capture_output=True, text=True, timeout=60)
|
|
359
|
+
if result.returncode != 0:
|
|
360
|
+
return {"success": False, "error": result.stderr.strip() or "Metrics server not available"}
|
|
361
|
+
|
|
362
|
+
usage_map = {}
|
|
363
|
+
for line in result.stdout.strip().split("\n"):
|
|
364
|
+
if not line.strip():
|
|
365
|
+
continue
|
|
366
|
+
parts = line.split()
|
|
367
|
+
if namespace and len(parts) >= 3:
|
|
368
|
+
usage_map[(namespace, parts[0])] = {"cpu": parts[1], "memory": parts[2]}
|
|
369
|
+
elif len(parts) >= 4:
|
|
370
|
+
usage_map[(parts[0], parts[1])] = {"cpu": parts[2], "memory": parts[3]}
|
|
371
|
+
|
|
372
|
+
if namespace:
|
|
373
|
+
pods = v1.list_namespaced_pod(namespace).items
|
|
374
|
+
else:
|
|
375
|
+
pods = v1.list_pod_for_all_namespaces().items
|
|
376
|
+
|
|
377
|
+
overprovisioned = []
|
|
378
|
+
for pod in pods:
|
|
379
|
+
if pod.status.phase != "Running":
|
|
380
|
+
continue
|
|
381
|
+
|
|
382
|
+
key = (pod.metadata.namespace, pod.metadata.name)
|
|
383
|
+
if key not in usage_map:
|
|
384
|
+
continue
|
|
385
|
+
|
|
386
|
+
usage = usage_map[key]
|
|
387
|
+
total_cpu_req = 0
|
|
388
|
+
total_mem_req = 0
|
|
389
|
+
|
|
390
|
+
for container in pod.spec.containers:
|
|
391
|
+
if container.resources and container.resources.requests:
|
|
392
|
+
total_cpu_req += _parse_cpu(container.resources.requests.get("cpu", "0"))
|
|
393
|
+
total_mem_req += _parse_memory(container.resources.requests.get("memory", "0"))
|
|
394
|
+
|
|
395
|
+
if total_cpu_req == 0 and total_mem_req == 0:
|
|
396
|
+
continue
|
|
397
|
+
|
|
398
|
+
cpu_used = _parse_cpu(usage["cpu"])
|
|
399
|
+
mem_used = _parse_memory(usage["memory"])
|
|
400
|
+
|
|
401
|
+
cpu_util = (cpu_used / total_cpu_req * 100) if total_cpu_req > 0 else 0
|
|
402
|
+
mem_util = (mem_used / total_mem_req * 100) if total_mem_req > 0 else 0
|
|
403
|
+
|
|
404
|
+
if cpu_util < threshold or mem_util < threshold:
|
|
405
|
+
overprovisioned.append({
|
|
406
|
+
"namespace": pod.metadata.namespace,
|
|
407
|
+
"pod": pod.metadata.name,
|
|
408
|
+
"cpuRequested": f"{total_cpu_req}m",
|
|
409
|
+
"cpuUsed": usage["cpu"],
|
|
410
|
+
"cpuUtilization": f"{cpu_util:.1f}%",
|
|
411
|
+
"memoryRequested": f"{total_mem_req // (1024*1024)}Mi",
|
|
412
|
+
"memoryUsed": usage["memory"],
|
|
413
|
+
"memoryUtilization": f"{mem_util:.1f}%",
|
|
414
|
+
"recommendation": "Consider reducing resource requests"
|
|
415
|
+
})
|
|
416
|
+
|
|
417
|
+
overprovisioned.sort(key=lambda x: float(x["cpuUtilization"].rstrip("%")))
|
|
418
|
+
|
|
419
|
+
return {
|
|
420
|
+
"success": True,
|
|
421
|
+
"threshold": f"{threshold}%",
|
|
422
|
+
"count": len(overprovisioned),
|
|
423
|
+
"overprovisioned": overprovisioned[:50]
|
|
424
|
+
}
|
|
425
|
+
except subprocess.TimeoutExpired:
|
|
426
|
+
return {"success": False, "error": "Metrics retrieval timed out"}
|
|
427
|
+
except Exception as e:
|
|
428
|
+
logger.error(f"Error finding overprovisioned resources: {e}")
|
|
429
|
+
return {"success": False, "error": str(e)}
|
|
430
|
+
|
|
431
|
+
@server.tool(
|
|
432
|
+
annotations=ToolAnnotations(
|
|
433
|
+
title="Get Resource Trends",
|
|
434
|
+
readOnlyHint=True,
|
|
435
|
+
),
|
|
436
|
+
)
|
|
437
|
+
def get_resource_trends(
|
|
438
|
+
namespace: Optional[str] = None,
|
|
439
|
+
resource_type: str = "pods"
|
|
440
|
+
) -> Dict[str, Any]:
|
|
441
|
+
"""Get current resource usage snapshot for trend analysis (requires metrics-server)."""
|
|
442
|
+
try:
|
|
443
|
+
if resource_type == "nodes":
|
|
444
|
+
cmd = ["kubectl", "top", "nodes", "--no-headers"]
|
|
445
|
+
else:
|
|
446
|
+
cmd = ["kubectl", "top", "pods", "--no-headers"]
|
|
447
|
+
if namespace:
|
|
448
|
+
cmd.extend(["-n", namespace])
|
|
449
|
+
else:
|
|
450
|
+
cmd.append("-A")
|
|
451
|
+
|
|
452
|
+
result = subprocess.run(cmd, capture_output=True, text=True, timeout=60)
|
|
453
|
+
if result.returncode != 0:
|
|
454
|
+
return {"success": False, "error": result.stderr.strip() or "Metrics server not available"}
|
|
455
|
+
|
|
456
|
+
metrics = []
|
|
457
|
+
total_cpu = 0
|
|
458
|
+
total_memory = 0
|
|
459
|
+
|
|
460
|
+
for line in result.stdout.strip().split("\n"):
|
|
461
|
+
if not line.strip():
|
|
462
|
+
continue
|
|
463
|
+
parts = line.split()
|
|
464
|
+
|
|
465
|
+
if resource_type == "nodes" and len(parts) >= 5:
|
|
466
|
+
cpu_val = _parse_cpu(parts[1])
|
|
467
|
+
mem_bytes = _parse_memory(parts[3])
|
|
468
|
+
total_cpu += cpu_val
|
|
469
|
+
total_memory += mem_bytes
|
|
470
|
+
metrics.append({
|
|
471
|
+
"node": parts[0],
|
|
472
|
+
"cpuUsage": parts[1],
|
|
473
|
+
"cpuPercent": parts[2],
|
|
474
|
+
"memoryUsage": parts[3],
|
|
475
|
+
"memoryPercent": parts[4]
|
|
476
|
+
})
|
|
477
|
+
elif len(parts) >= 3:
|
|
478
|
+
if namespace:
|
|
479
|
+
cpu_val = _parse_cpu(parts[1])
|
|
480
|
+
mem_bytes = _parse_memory(parts[2])
|
|
481
|
+
metrics.append({
|
|
482
|
+
"namespace": namespace,
|
|
483
|
+
"pod": parts[0],
|
|
484
|
+
"cpuUsage": parts[1],
|
|
485
|
+
"memoryUsage": parts[2]
|
|
486
|
+
})
|
|
487
|
+
elif len(parts) >= 4:
|
|
488
|
+
cpu_val = _parse_cpu(parts[2])
|
|
489
|
+
mem_bytes = _parse_memory(parts[3])
|
|
490
|
+
metrics.append({
|
|
491
|
+
"namespace": parts[0],
|
|
492
|
+
"pod": parts[1],
|
|
493
|
+
"cpuUsage": parts[2],
|
|
494
|
+
"memoryUsage": parts[3]
|
|
495
|
+
})
|
|
496
|
+
else:
|
|
497
|
+
cpu_val = 0
|
|
498
|
+
mem_bytes = 0
|
|
499
|
+
total_cpu += cpu_val
|
|
500
|
+
total_memory += mem_bytes
|
|
501
|
+
|
|
502
|
+
return {
|
|
503
|
+
"success": True,
|
|
504
|
+
"timestamp": datetime.utcnow().isoformat() + "Z",
|
|
505
|
+
"resourceType": resource_type,
|
|
506
|
+
"summary": {
|
|
507
|
+
"totalCpuMillicores": total_cpu,
|
|
508
|
+
"totalMemoryMi": round(total_memory / (1024 * 1024), 2),
|
|
509
|
+
"resourceCount": len(metrics)
|
|
510
|
+
},
|
|
511
|
+
"metrics": metrics[:100],
|
|
512
|
+
"note": "Store snapshots over time for trend analysis"
|
|
513
|
+
}
|
|
514
|
+
except subprocess.TimeoutExpired:
|
|
515
|
+
return {"success": False, "error": "Metrics retrieval timed out"}
|
|
516
|
+
except Exception as e:
|
|
517
|
+
logger.error(f"Error getting resource trends: {e}")
|
|
518
|
+
return {"success": False, "error": str(e)}
|
|
519
|
+
|
|
520
|
+
@server.tool(
|
|
521
|
+
annotations=ToolAnnotations(
|
|
522
|
+
title="Get Namespace Cost Allocation",
|
|
523
|
+
readOnlyHint=True,
|
|
524
|
+
),
|
|
525
|
+
)
|
|
526
|
+
def get_namespace_cost_allocation() -> Dict[str, Any]:
|
|
527
|
+
"""Calculate resource allocation percentages across all namespaces."""
|
|
528
|
+
try:
|
|
529
|
+
from kubernetes import client, config
|
|
530
|
+
config.load_kube_config()
|
|
531
|
+
v1 = client.CoreV1Api()
|
|
532
|
+
|
|
533
|
+
pods = v1.list_pod_for_all_namespaces().items
|
|
534
|
+
|
|
535
|
+
ns_allocation = {}
|
|
536
|
+
total_cpu = 0
|
|
537
|
+
total_memory = 0
|
|
538
|
+
|
|
539
|
+
for pod in pods:
|
|
540
|
+
if pod.status.phase != "Running":
|
|
541
|
+
continue
|
|
542
|
+
|
|
543
|
+
ns = pod.metadata.namespace
|
|
544
|
+
if ns not in ns_allocation:
|
|
545
|
+
ns_allocation[ns] = {"cpu": 0, "memory": 0, "pods": 0}
|
|
546
|
+
|
|
547
|
+
for container in pod.spec.containers:
|
|
548
|
+
if container.resources and container.resources.requests:
|
|
549
|
+
cpu = _parse_cpu(container.resources.requests.get("cpu", "0"))
|
|
550
|
+
memory = _parse_memory(container.resources.requests.get("memory", "0"))
|
|
551
|
+
ns_allocation[ns]["cpu"] += cpu
|
|
552
|
+
ns_allocation[ns]["memory"] += memory
|
|
553
|
+
total_cpu += cpu
|
|
554
|
+
total_memory += memory
|
|
555
|
+
|
|
556
|
+
ns_allocation[ns]["pods"] += 1
|
|
557
|
+
|
|
558
|
+
allocations = []
|
|
559
|
+
for ns, alloc in ns_allocation.items():
|
|
560
|
+
cpu_pct = (alloc["cpu"] / total_cpu * 100) if total_cpu > 0 else 0
|
|
561
|
+
mem_pct = (alloc["memory"] / total_memory * 100) if total_memory > 0 else 0
|
|
562
|
+
|
|
563
|
+
allocations.append({
|
|
564
|
+
"namespace": ns,
|
|
565
|
+
"cpuMillicores": alloc["cpu"],
|
|
566
|
+
"cpuPercent": f"{cpu_pct:.1f}%",
|
|
567
|
+
"memoryMi": round(alloc["memory"] / (1024 * 1024), 2),
|
|
568
|
+
"memoryPercent": f"{mem_pct:.1f}%",
|
|
569
|
+
"podCount": alloc["pods"]
|
|
570
|
+
})
|
|
571
|
+
|
|
572
|
+
allocations.sort(key=lambda x: x["cpuMillicores"], reverse=True)
|
|
573
|
+
|
|
574
|
+
return {
|
|
575
|
+
"success": True,
|
|
576
|
+
"clusterTotals": {
|
|
577
|
+
"totalCpuMillicores": total_cpu,
|
|
578
|
+
"totalMemoryMi": round(total_memory / (1024 * 1024), 2),
|
|
579
|
+
"namespaceCount": len(allocations)
|
|
580
|
+
},
|
|
581
|
+
"allocations": allocations
|
|
582
|
+
}
|
|
583
|
+
except Exception as e:
|
|
584
|
+
logger.error(f"Error calculating namespace allocation: {e}")
|
|
585
|
+
return {"success": False, "error": str(e)}
|
|
586
|
+
|
|
587
|
+
@server.tool(
|
|
588
|
+
annotations=ToolAnnotations(
|
|
589
|
+
title="Optimize Resource Requests",
|
|
590
|
+
readOnlyHint=True,
|
|
591
|
+
),
|
|
592
|
+
)
|
|
593
|
+
def optimize_resource_requests(
|
|
594
|
+
namespace: str,
|
|
595
|
+
deployment_name: Optional[str] = None
|
|
596
|
+
) -> Dict[str, Any]:
|
|
597
|
+
"""Suggest optimal resource requests based on current usage patterns."""
|
|
598
|
+
try:
|
|
599
|
+
from kubernetes import client, config
|
|
600
|
+
config.load_kube_config()
|
|
601
|
+
apps = client.AppsV1Api()
|
|
602
|
+
v1 = client.CoreV1Api()
|
|
603
|
+
|
|
604
|
+
if deployment_name:
|
|
605
|
+
deployments = [apps.read_namespaced_deployment(deployment_name, namespace)]
|
|
606
|
+
else:
|
|
607
|
+
deployments = apps.list_namespaced_deployment(namespace).items
|
|
608
|
+
|
|
609
|
+
cmd = ["kubectl", "top", "pods", "-n", namespace, "--no-headers"]
|
|
610
|
+
result = subprocess.run(cmd, capture_output=True, text=True, timeout=60)
|
|
611
|
+
|
|
612
|
+
usage_map = {}
|
|
613
|
+
if result.returncode == 0:
|
|
614
|
+
for line in result.stdout.strip().split("\n"):
|
|
615
|
+
if not line.strip():
|
|
616
|
+
continue
|
|
617
|
+
parts = line.split()
|
|
618
|
+
if len(parts) >= 3:
|
|
619
|
+
usage_map[parts[0]] = {
|
|
620
|
+
"cpu": _parse_cpu(parts[1]),
|
|
621
|
+
"memory": _parse_memory(parts[2])
|
|
622
|
+
}
|
|
623
|
+
|
|
624
|
+
suggestions = []
|
|
625
|
+
for deploy in deployments:
|
|
626
|
+
pods = v1.list_namespaced_pod(
|
|
627
|
+
namespace,
|
|
628
|
+
label_selector=",".join([f"{k}={v}" for k, v in (deploy.spec.selector.match_labels or {}).items()])
|
|
629
|
+
).items
|
|
630
|
+
|
|
631
|
+
for container_spec in deploy.spec.template.spec.containers:
|
|
632
|
+
current_cpu = 0
|
|
633
|
+
current_mem = 0
|
|
634
|
+
if container_spec.resources and container_spec.resources.requests:
|
|
635
|
+
current_cpu = _parse_cpu(container_spec.resources.requests.get("cpu", "0"))
|
|
636
|
+
current_mem = _parse_memory(container_spec.resources.requests.get("memory", "0"))
|
|
637
|
+
|
|
638
|
+
max_cpu_used = 0
|
|
639
|
+
max_mem_used = 0
|
|
640
|
+
for pod in pods:
|
|
641
|
+
if pod.metadata.name in usage_map:
|
|
642
|
+
max_cpu_used = max(max_cpu_used, usage_map[pod.metadata.name]["cpu"])
|
|
643
|
+
max_mem_used = max(max_mem_used, usage_map[pod.metadata.name]["memory"])
|
|
644
|
+
|
|
645
|
+
if max_cpu_used > 0 or max_mem_used > 0:
|
|
646
|
+
suggested_cpu = int(max_cpu_used * 1.2)
|
|
647
|
+
suggested_mem = int(max_mem_used * 1.2)
|
|
648
|
+
|
|
649
|
+
suggestions.append({
|
|
650
|
+
"deployment": deploy.metadata.name,
|
|
651
|
+
"container": container_spec.name,
|
|
652
|
+
"current": {
|
|
653
|
+
"cpu": f"{current_cpu}m",
|
|
654
|
+
"memory": f"{current_mem // (1024*1024)}Mi"
|
|
655
|
+
},
|
|
656
|
+
"observed": {
|
|
657
|
+
"maxCpu": f"{max_cpu_used}m",
|
|
658
|
+
"maxMemory": f"{max_mem_used // (1024*1024)}Mi"
|
|
659
|
+
},
|
|
660
|
+
"suggested": {
|
|
661
|
+
"cpu": f"{suggested_cpu}m",
|
|
662
|
+
"memory": f"{suggested_mem // (1024*1024)}Mi"
|
|
663
|
+
},
|
|
664
|
+
"potentialSavings": {
|
|
665
|
+
"cpu": f"{max(0, current_cpu - suggested_cpu)}m",
|
|
666
|
+
"memory": f"{max(0, (current_mem - suggested_mem) // (1024*1024))}Mi"
|
|
667
|
+
}
|
|
668
|
+
})
|
|
669
|
+
|
|
670
|
+
return {
|
|
671
|
+
"success": True,
|
|
672
|
+
"namespace": namespace,
|
|
673
|
+
"note": "Suggestions based on current usage + 20% buffer. Monitor over time for accuracy.",
|
|
674
|
+
"suggestions": suggestions
|
|
675
|
+
}
|
|
676
|
+
except subprocess.TimeoutExpired:
|
|
677
|
+
return {"success": False, "error": "Metrics retrieval timed out"}
|
|
678
|
+
except Exception as e:
|
|
679
|
+
logger.error(f"Error optimizing resources: {e}")
|
|
680
|
+
return {"success": False, "error": str(e)}
|