@zerry_jin/k8s-doctor-mcp 1.0.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/dist/index.js ADDED
@@ -0,0 +1,678 @@
1
+ #!/usr/bin/env node
2
+ /**
3
+ * K8s Doctor MCP Server
4
+ *
5
+ * MCP server for AI-powered Kubernetes cluster diagnosis and problem solving.
6
+ * Goes beyond simple queries - analyzes error logs, identifies root causes, and suggests solutions.
7
+ *
8
+ * @author zerry
9
+ * @license MIT
10
+ */
11
+ import { McpServer } from '@modelcontextprotocol/sdk/server/mcp.js';
12
+ import { StdioServerTransport } from '@modelcontextprotocol/sdk/server/stdio.js';
13
+ import * as z from 'zod';
14
+ import { loadK8sConfig, createK8sClients } from './utils/k8s-client.js';
15
+ import { diagnosePod, diagnoseCrashLoop } from './diagnostics/pod-diagnostics.js';
16
+ import { analyzeLogs } from './analyzers/log-analyzer.js';
17
+ import { diagnoseClusterHealth } from './diagnostics/cluster-health.js';
18
+ import { formatIssues, formatBytes, formatCPU, getHealthEmoji, createTable } from './utils/formatters.js';
19
+ import { MemoryCache, getOrCompute } from './utils/cache.js';
20
+ // ============================================
21
+ // MCP Server Initialization
22
+ // ============================================
23
+ const server = new McpServer({
24
+ name: 'k8s-doctor',
25
+ version: '1.0.0',
26
+ });
27
+ // Kubernetes client initialization
28
+ let k8sClients = null;
29
+ let k8sConfig = null;
30
+ // Cache instances for performance optimization
31
+ const namespaceCache = new MemoryCache(30000); // 30 seconds TTL
32
+ const podListCache = new MemoryCache(30000); // 30 seconds TTL
33
+ /**
34
+ * Get K8s clients with lazy initialization
35
+ */
36
+ function getK8sClients() {
37
+ if (!k8sClients || !k8sConfig) {
38
+ try {
39
+ k8sConfig = loadK8sConfig();
40
+ k8sClients = createK8sClients(k8sConfig);
41
+ console.error('✅ Kubernetes connection established');
42
+ }
43
+ catch (error) {
44
+ console.error('❌ Kubernetes connection failed:', error.message);
45
+ throw new Error(`Cannot connect to Kubernetes: ${error.message}\nPlease verify kubectl is configured.`);
46
+ }
47
+ }
48
+ return k8sClients;
49
+ }
50
+ /**
51
+ * Comprehensive pod diagnostics
52
+ *
53
+ * This is the core feature! Clearly explains why the pod is not working.
54
+ * Analyzes all issues including CrashLoopBackOff, ImagePullBackOff, OOM, etc.
55
+ */
56
+ server.registerTool('diagnose-pod', {
57
+ title: 'Comprehensive pod diagnostics',
58
+ description: 'Analyzes pod status, logs, and events to identify root causes and suggest solutions',
59
+ inputSchema: {
60
+ namespace: z.string().describe('Namespace'),
61
+ podName: z.string().describe('Pod name'),
62
+ detailed: z.boolean().default(true).describe('Enable detailed analysis (includes logs)'),
63
+ },
64
+ }, async ({ namespace, podName, detailed }) => {
65
+ try {
66
+ const diagnostics = await diagnosePod(getK8sClients().core, namespace, podName, getK8sClients().metrics);
67
+ let result = `# đŸĨ Pod Diagnosis Report\n\n`;
68
+ result += `**Pod**: ${diagnostics.podInfo.name}\n`;
69
+ result += `**Namespace**: ${diagnostics.podInfo.namespace}\n`;
70
+ result += `**Status**: ${diagnostics.podInfo.phase}\n`;
71
+ result += `**Node**: ${diagnostics.podInfo.nodeName || 'N/A'}\n`;
72
+ result += `**Health**: ${getHealthEmoji(diagnostics.healthScore)} ${diagnostics.healthScore}/100\n\n`;
73
+ // Summary
74
+ result += `## 📊 Summary\n\n${diagnostics.summary}\n\n`;
75
+ // Container Status
76
+ result += `## đŸŗ Container Status\n\n`;
77
+ const containerRows = diagnostics.containers.map(c => [
78
+ c.name,
79
+ c.ready ? '✅' : '❌',
80
+ c.restartCount.toString(),
81
+ c.state.running ? 'Running' :
82
+ c.state.waiting ? `Waiting: ${c.state.waiting.reason}` :
83
+ c.state.terminated ? `Terminated: ${c.state.terminated.reason}` : 'Unknown',
84
+ ]);
85
+ result += createTable(['Name', 'Ready', 'Restarts', 'State'], containerRows);
86
+ result += '\n\n';
87
+ // Resource usage
88
+ result += `## 💾 Resources\n\n`;
89
+ result += `**CPU**:\n`;
90
+ if (diagnostics.resources.cpu.current !== undefined) {
91
+ result += ` - Current: ${formatCPU(diagnostics.resources.cpu.current)}`;
92
+ if (diagnostics.resources.cpu.usagePercent !== undefined) {
93
+ const emoji = diagnostics.resources.cpu.usagePercent >= 80 ? ' âš ī¸' : '';
94
+ result += ` (${diagnostics.resources.cpu.usagePercent.toFixed(1)}%${emoji})\n`;
95
+ }
96
+ else {
97
+ result += '\n';
98
+ }
99
+ }
100
+ if (diagnostics.resources.cpu.requested) {
101
+ result += ` - Requested: ${formatCPU(diagnostics.resources.cpu.requested)}\n`;
102
+ }
103
+ if (diagnostics.resources.cpu.limit) {
104
+ result += ` - Limit: ${formatCPU(diagnostics.resources.cpu.limit)}\n`;
105
+ }
106
+ if (diagnostics.resources.cpu.isThrottled) {
107
+ result += ` - âš ī¸ **WARNING**: CPU usage is high (>80%)\n`;
108
+ }
109
+ result += `\n**Memory**:\n`;
110
+ if (diagnostics.resources.memory.current !== undefined) {
111
+ result += ` - Current: ${formatBytes(diagnostics.resources.memory.current)}`;
112
+ if (diagnostics.resources.memory.usagePercent !== undefined) {
113
+ const emoji = diagnostics.resources.memory.usagePercent >= 90 ? ' 🔴' :
114
+ diagnostics.resources.memory.usagePercent >= 80 ? ' âš ī¸' : '';
115
+ result += ` (${diagnostics.resources.memory.usagePercent.toFixed(1)}%${emoji})\n`;
116
+ }
117
+ else {
118
+ result += '\n';
119
+ }
120
+ }
121
+ if (diagnostics.resources.memory.requested) {
122
+ result += ` - Requested: ${formatBytes(diagnostics.resources.memory.requested)}\n`;
123
+ }
124
+ if (diagnostics.resources.memory.limit) {
125
+ result += ` - Limit: ${formatBytes(diagnostics.resources.memory.limit)}\n`;
126
+ }
127
+ if (diagnostics.resources.memory.isOOMRisk) {
128
+ result += ` - 🔴 **CRITICAL**: OOM risk detected (>90%)\n`;
129
+ }
130
+ if (!diagnostics.resources.cpu.current && !diagnostics.resources.memory.current) {
131
+ result += `\n💡 **Tip**: Install Metrics Server to see real-time usage:\n`;
132
+ result += '```bash\nkubectl apply -f https://github.com/kubernetes-sigs/metrics-server/releases/latest/download/components.yaml\n```\n';
133
+ }
134
+ result += '\n\n';
135
+ // Issues
136
+ result += formatIssues(diagnostics.issues);
137
+ // Recent Events
138
+ if (diagnostics.events.length > 0) {
139
+ result += `## 📋 Recent Events (last 5)\n\n`;
140
+ for (const event of diagnostics.events.slice(0, 5)) {
141
+ const icon = event.type === 'Warning' ? 'âš ī¸' : 'â„šī¸';
142
+ result += `${icon} **${event.reason}** (${event.count} times)\n`;
143
+ result += ` ${event.message}\n\n`;
144
+ }
145
+ }
146
+ return { content: [{ type: 'text', text: result }] };
147
+ }
148
+ catch (error) {
149
+ return {
150
+ content: [{
151
+ type: 'text',
152
+ text: `❌ Pod diagnosis failed: ${error.message}\n\nVerify pod exists:\n\`\`\`bash\nkubectl get pod ${podName} -n ${namespace}\n\`\`\``,
153
+ }],
154
+ };
155
+ }
156
+ });
157
+ /**
158
+ * Specialized CrashLoopBackOff diagnostics
159
+ *
160
+ * CrashLoop is really tricky - this tool analyzes exit codes
161
+ * and logs to accurately identify the root cause
162
+ */
163
+ server.registerTool('debug-crashloop', {
164
+ title: 'CrashLoopBackOff Diagnostics',
165
+ description: 'Analyzes pods in CrashLoop state by examining exit codes, logs, and events to find the root cause',
166
+ inputSchema: {
167
+ namespace: z.string().describe('Namespace'),
168
+ podName: z.string().describe('Pod name'),
169
+ containerName: z.string().optional().describe('Container name (optional)'),
170
+ },
171
+ }, async ({ namespace, podName, containerName }) => {
172
+ try {
173
+ const issues = await diagnoseCrashLoop(getK8sClients().core, getK8sClients().log, namespace, podName, containerName);
174
+ let result = `# 🔍 CrashLoopBackOff Diagnostics\n\n`;
175
+ result += `**Pod**: ${podName}\n`;
176
+ result += `**Namespace**: ${namespace}\n\n`;
177
+ if (issues.length === 0) {
178
+ result += '✅ No CrashLoop issues detected.\n';
179
+ }
180
+ else {
181
+ result += formatIssues(issues);
182
+ }
183
+ // Additional debugging commands
184
+ result += `\n## đŸ› ī¸ Additional Debugging Commands\n\n`;
185
+ result += '```bash\n';
186
+ result += `# Check previous logs (important!)\n`;
187
+ result += `kubectl logs ${podName} -n ${namespace} --previous\n\n`;
188
+ result += `# Check current logs\n`;
189
+ result += `kubectl logs ${podName} -n ${namespace}\n\n`;
190
+ result += `# Check events\n`;
191
+ result += `kubectl describe pod ${podName} -n ${namespace}\n\n`;
192
+ result += `# Check pod YAML\n`;
193
+ result += `kubectl get pod ${podName} -n ${namespace} -o yaml\n`;
194
+ result += '```\n';
195
+ return { content: [{ type: 'text', text: result }] };
196
+ }
197
+ catch (error) {
198
+ return {
199
+ content: [{
200
+ type: 'text',
201
+ text: `❌ CrashLoop diagnostics failed: ${error.message}`,
202
+ }],
203
+ };
204
+ }
205
+ });
206
+ /**
207
+ * Log analysis
208
+ *
209
+ * Rather than just showing logs, finds error patterns
210
+ * and identifies likely causes of errors
211
+ */
212
+ server.registerTool('analyze-logs', {
213
+ title: 'Smart Log Analysis',
214
+ description: 'Detects error patterns in logs and suggests causes and solutions (Connection Refused, OOM, DB errors, etc.)',
215
+ inputSchema: {
216
+ namespace: z.string().describe('Namespace'),
217
+ podName: z.string().describe('Pod name'),
218
+ containerName: z.string().optional().describe('Container name (optional)'),
219
+ tailLines: z.number().default(500).describe('Number of recent lines to analyze'),
220
+ },
221
+ }, async ({ namespace, podName, containerName, tailLines }) => {
222
+ try {
223
+ const analysis = await analyzeLogs(getK8sClients().log, namespace, podName, containerName, tailLines);
224
+ let result = `# 📝 Log Analysis Results\n\n`;
225
+ result += `${analysis.summary}\n\n`;
226
+ // Detected patterns
227
+ if (analysis.patterns.length > 0) {
228
+ result += `## đŸŽ¯ Detected Error Patterns\n\n`;
229
+ for (const pattern of analysis.patterns) {
230
+ result += `### ${pattern.name} (${pattern.matchedLines.length} occurrences)\n\n`;
231
+ result += `**Description**: ${pattern.description}\n\n`;
232
+ result += `**Possible Causes**:\n`;
233
+ for (const cause of pattern.possibleCauses) {
234
+ result += ` - ${cause}\n`;
235
+ }
236
+ result += `\n**Solutions**:\n`;
237
+ for (const solution of pattern.solutions) {
238
+ result += ` - ${solution}\n`;
239
+ }
240
+ result += `\n**Locations**: lines ${pattern.matchedLines.slice(0, 5).join(', ')}`;
241
+ if (pattern.matchedLines.length > 5) {
242
+ result += ` and ${pattern.matchedLines.length - 5} more`;
243
+ }
244
+ result += '\n\n---\n\n';
245
+ }
246
+ }
247
+ // Repeated errors
248
+ if (analysis.repeatedErrors.length > 0) {
249
+ result += `## 🔁 Repeated Errors\n\n`;
250
+ for (const repeated of analysis.repeatedErrors.slice(0, 5)) {
251
+ result += `- **${repeated.message}** (${repeated.count} times)\n`;
252
+ result += ` Lines ${repeated.firstLine} ~ ${repeated.lastLine}\n\n`;
253
+ }
254
+ }
255
+ // Recommendations
256
+ result += `## 💡 Recommendations\n\n`;
257
+ for (const rec of analysis.recommendations) {
258
+ result += `${rec}\n\n`;
259
+ }
260
+ // Error log samples
261
+ if (analysis.errorLines.length > 0) {
262
+ result += `\n## ❌ Error Log Samples (last 10)\n\n\`\`\`\n`;
263
+ for (const line of analysis.errorLines.slice(-10)) {
264
+ result += `${line.lineNumber}: ${line.content}\n`;
265
+ }
266
+ result += '```\n';
267
+ }
268
+ return { content: [{ type: 'text', text: result }] };
269
+ }
270
+ catch (error) {
271
+ return {
272
+ content: [{
273
+ type: 'text',
274
+ text: `❌ Log analysis failed: ${error.message}`,
275
+ }],
276
+ };
277
+ }
278
+ });
279
+ /**
280
+ * Resource usage check
281
+ *
282
+ * Checks if CPU/Memory is approaching limits and OOM risk
283
+ */
284
+ server.registerTool('check-resources', {
285
+ title: 'Resource Usage Check',
286
+ description: 'Compares pod CPU/Memory usage against limits to check for threshold violations',
287
+ inputSchema: {
288
+ namespace: z.string().describe('Namespace'),
289
+ podName: z.string().optional().describe('Specific pod (optional, entire namespace if empty)'),
290
+ },
291
+ }, async ({ namespace, podName }) => {
292
+ try {
293
+ const podsResponse = podName
294
+ ? await getK8sClients().core.readNamespacedPod({ name: podName, namespace })
295
+ : await getK8sClients().core.listNamespacedPod({ namespace });
296
+ const pods = podName ? [podsResponse] : podsResponse.items;
297
+ // Try to get metrics
298
+ let metricsMap = new Map();
299
+ let metricsAvailable = false;
300
+ try {
301
+ const metrics = await getK8sClients().metrics.getPodMetrics(namespace);
302
+ for (const podMetric of metrics.items || []) {
303
+ const name = podMetric.metadata?.name;
304
+ if (name) {
305
+ // Sum container metrics for each pod
306
+ let totalCpu = 0;
307
+ let totalMem = 0;
308
+ for (const container of podMetric.containers || []) {
309
+ if (container.usage?.cpu) {
310
+ totalCpu += parseFloat(container.usage.cpu.replace('n', '')) / 1_000_000;
311
+ }
312
+ if (container.usage?.memory) {
313
+ totalMem += parseInt(container.usage.memory.replace('Ki', '')) * 1024;
314
+ }
315
+ }
316
+ metricsMap.set(name, { cpu: totalCpu, memory: totalMem });
317
+ }
318
+ }
319
+ metricsAvailable = metricsMap.size > 0;
320
+ }
321
+ catch (e) {
322
+ // Metrics Server not available
323
+ }
324
+ let result = `# 💾 Resource Usage Check\n\n`;
325
+ if (metricsAvailable) {
326
+ result += `✅ **Real-time metrics available**\n\n`;
327
+ }
328
+ else {
329
+ result += `âš ī¸ **Metrics Server not available** - showing only spec values\n\n`;
330
+ }
331
+ for (const pod of pods) {
332
+ const containers = pod.spec?.containers || [];
333
+ const podMetrics = metricsMap.get(pod.metadata?.name || '');
334
+ result += `## Pod: ${pod.metadata?.name}\n\n`;
335
+ // Calculate totals
336
+ let totalCpuRequest = 0;
337
+ let totalCpuLimit = 0;
338
+ let totalMemRequest = 0;
339
+ let totalMemLimit = 0;
340
+ for (const container of containers) {
341
+ const requests = container.resources?.requests || {};
342
+ const limits = container.resources?.limits || {};
343
+ if (requests.cpu) {
344
+ const val = requests.cpu.endsWith('m') ? parseInt(requests.cpu) : parseFloat(requests.cpu) * 1000;
345
+ totalCpuRequest += val;
346
+ }
347
+ if (limits.cpu) {
348
+ const val = limits.cpu.endsWith('m') ? parseInt(limits.cpu) : parseFloat(limits.cpu) * 1000;
349
+ totalCpuLimit += val;
350
+ }
351
+ if (requests.memory) {
352
+ totalMemRequest += parseMemoryValue(requests.memory);
353
+ }
354
+ if (limits.memory) {
355
+ totalMemLimit += parseMemoryValue(limits.memory);
356
+ }
357
+ }
358
+ // Show current usage if available
359
+ if (podMetrics) {
360
+ result += `**Current Usage**:\n`;
361
+ result += ` - CPU: ${formatCPU(podMetrics.cpu)}`;
362
+ if (totalCpuLimit > 0) {
363
+ const percent = (podMetrics.cpu / totalCpuLimit) * 100;
364
+ const emoji = percent >= 80 ? ' âš ī¸' : '';
365
+ result += ` (${percent.toFixed(1)}%${emoji})`;
366
+ }
367
+ result += '\n';
368
+ result += ` - Memory: ${formatBytes(podMetrics.memory)}`;
369
+ if (totalMemLimit > 0) {
370
+ const percent = (podMetrics.memory / totalMemLimit) * 100;
371
+ const emoji = percent >= 90 ? ' 🔴' : percent >= 80 ? ' âš ī¸' : '';
372
+ result += ` (${percent.toFixed(1)}%${emoji})`;
373
+ }
374
+ result += '\n\n';
375
+ }
376
+ const rows = [];
377
+ for (const container of containers) {
378
+ const requests = container.resources?.requests || {};
379
+ const limits = container.resources?.limits || {};
380
+ rows.push([
381
+ container.name,
382
+ requests.cpu || 'N/A',
383
+ limits.cpu || 'âš ī¸ None',
384
+ requests.memory || 'N/A',
385
+ limits.memory || 'âš ī¸ None',
386
+ ]);
387
+ }
388
+ result += `**Resource Specs**:\n`;
389
+ result += createTable(['Container', 'CPU Request', 'CPU Limit', 'Memory Request', 'Memory Limit'], rows);
390
+ result += '\n';
391
+ // Warnings
392
+ const noLimits = containers.filter((c) => !c.resources?.limits);
393
+ if (noLimits.length > 0) {
394
+ result += `\nâš ī¸ **Warning**: ${noLimits.length} container(s) have no resource limits set\n`;
395
+ result += `This can lead to unlimited resource consumption.\n\n`;
396
+ }
397
+ // Threshold warnings
398
+ if (podMetrics && totalCpuLimit > 0) {
399
+ const cpuPercent = (podMetrics.cpu / totalCpuLimit) * 100;
400
+ if (cpuPercent >= 80) {
401
+ result += `âš ī¸ **CPU Warning**: Usage is high (${cpuPercent.toFixed(1)}%)\n`;
402
+ }
403
+ }
404
+ if (podMetrics && totalMemLimit > 0) {
405
+ const memPercent = (podMetrics.memory / totalMemLimit) * 100;
406
+ if (memPercent >= 90) {
407
+ result += `🔴 **Memory Critical**: OOM risk detected (${memPercent.toFixed(1)}%)\n`;
408
+ }
409
+ else if (memPercent >= 80) {
410
+ result += `âš ī¸ **Memory Warning**: Usage is high (${memPercent.toFixed(1)}%)\n`;
411
+ }
412
+ }
413
+ }
414
+ if (!metricsAvailable) {
415
+ result += `\n💡 **Tip**: Install Metrics Server to see real-time usage:\n`;
416
+ result += '```bash\nkubectl apply -f https://github.com/kubernetes-sigs/metrics-server/releases/latest/download/components.yaml\n```\n';
417
+ }
418
+ return { content: [{ type: 'text', text: result }] };
419
+ }
420
+ catch (error) {
421
+ return {
422
+ content: [{
423
+ type: 'text',
424
+ text: `❌ Resource check failed: ${error.message}`,
425
+ }],
426
+ };
427
+ }
428
+ });
429
+ /**
430
+ * Cluster-wide Health Diagnosis
431
+ *
432
+ * Scans all nodes and pods in the cluster to check for problems
433
+ */
434
+ server.registerTool('full-diagnosis', {
435
+ title: 'Cluster-wide Health Diagnosis',
436
+ description: 'Comprehensively analyzes cluster nodes, pods, and resources to evaluate health',
437
+ inputSchema: {
438
+ namespace: z.string().optional().describe('Specific namespace only (optional, all if empty)'),
439
+ },
440
+ }, async ({ namespace }) => {
441
+ try {
442
+ const health = await diagnoseClusterHealth(getK8sClients().core, namespace);
443
+ let result = `# đŸĨ Cluster Health Diagnosis\n\n`;
444
+ result += `${health.summary}\n\n`;
445
+ // Node Health
446
+ result += `## đŸ–Ĩī¸ Node Status\n\n`;
447
+ result += `- Total: ${health.nodeHealth.total}\n`;
448
+ result += `- Ready: ${health.nodeHealth.ready} ✅\n`;
449
+ if (health.nodeHealth.notReady > 0) {
450
+ result += `- Not Ready: ${health.nodeHealth.notReady} ❌\n`;
451
+ }
452
+ result += '\n';
453
+ // Pod Health
454
+ result += `## đŸŗ Pod Status\n\n`;
455
+ result += `- Total: ${health.podHealth.total}\n`;
456
+ result += `- Running: ${health.podHealth.running} ✅\n`;
457
+ if (health.podHealth.pending > 0) {
458
+ result += `- Pending: ${health.podHealth.pending} âŗ\n`;
459
+ }
460
+ if (health.podHealth.failed > 0) {
461
+ result += `- Failed: ${health.podHealth.failed} ❌\n`;
462
+ }
463
+ if (health.podHealth.crashLooping > 0) {
464
+ result += `- CrashLoop: ${health.podHealth.crashLooping} đŸ”Ĩ\n`;
465
+ }
466
+ result += '\n';
467
+ // Critical issues
468
+ if (health.criticalIssues.length > 0) {
469
+ result += `## 🔴 Critical Issues\n\n`;
470
+ result += formatIssues(health.criticalIssues);
471
+ }
472
+ // Recommendations
473
+ result += `## 💡 Recommendations\n\n`;
474
+ for (const rec of health.recommendations) {
475
+ result += `${rec}\n\n`;
476
+ }
477
+ return { content: [{ type: 'text', text: result }] };
478
+ }
479
+ catch (error) {
480
+ console.error('Cluster diagnosis error:', error);
481
+ return {
482
+ content: [{
483
+ type: 'text',
484
+ text: `❌ Cluster diagnosis failed: ${error.message}\n\nDetails: ${error.stack || JSON.stringify(error, null, 2)}`,
485
+ }],
486
+ };
487
+ }
488
+ });
489
+ /**
490
+ * Event Query and Analysis
491
+ *
492
+ * Shows resource events in chronological order and alerts on problems
493
+ */
494
+ server.registerTool('check-events', {
495
+ title: 'Event Query and Analysis',
496
+ description: 'Queries events for specific resources or namespaces and analyzes Warning events',
497
+ inputSchema: {
498
+ namespace: z.string().describe('Namespace'),
499
+ resourceName: z.string().optional().describe('Resource name (optional, entire namespace if empty)'),
500
+ showNormal: z.boolean().default(false).describe('Show Normal events too'),
501
+ },
502
+ }, async ({ namespace, resourceName, showNormal }) => {
503
+ try {
504
+ const eventsResponse = await getK8sClients().core.listNamespacedEvent({
505
+ namespace,
506
+ fieldSelector: resourceName ? `involvedObject.name=${resourceName}` : undefined,
507
+ });
508
+ const events = eventsResponse.items;
509
+ // ė‹œę°„ėˆœ ė •ë Ŧ (ėĩœė‹ ėˆœ)
510
+ events.sort((a, b) => new Date(b.lastTimestamp || b.metadata?.creationTimestamp || '').getTime() -
511
+ new Date(a.lastTimestamp || a.metadata?.creationTimestamp || '').getTime());
512
+ let result = `# 📋 Event Analysis\n\n`;
513
+ result += `**Namespace**: ${namespace}\n`;
514
+ if (resourceName) {
515
+ result += `**Resource**: ${resourceName}\n`;
516
+ }
517
+ result += `\n`;
518
+ const warnings = events.filter((e) => e.type === 'Warning');
519
+ const normals = events.filter((e) => e.type === 'Normal');
520
+ result += `Total ${events.length} events (Warning: ${warnings.length}, Normal: ${normals.length})\n\n`;
521
+ // Warning events
522
+ if (warnings.length > 0) {
523
+ result += `## âš ī¸ Warning Events\n\n`;
524
+ for (const event of warnings.slice(0, 20)) {
525
+ result += `**${event.reason}** (${event.count || 1} times)\n`;
526
+ result += ` - ${event.message}\n`;
527
+ result += ` - Target: ${event.involvedObject?.kind}/${event.involvedObject?.name}\n`;
528
+ result += ` - Time: ${event.lastTimestamp || event.metadata?.creationTimestamp}\n\n`;
529
+ }
530
+ }
531
+ else {
532
+ result += `✅ No Warning events!\n\n`;
533
+ }
534
+ // Normal events (optional)
535
+ if (showNormal && normals.length > 0) {
536
+ result += `## â„šī¸ Normal Events (last 10)\n\n`;
537
+ for (const event of normals.slice(0, 10)) {
538
+ result += `- **${event.reason}**: ${event.message}\n`;
539
+ }
540
+ result += '\n';
541
+ }
542
+ return { content: [{ type: 'text', text: result }] };
543
+ }
544
+ catch (error) {
545
+ return {
546
+ content: [{
547
+ type: 'text',
548
+ text: `❌ Event query failed: ${error.message}`,
549
+ }],
550
+ };
551
+ }
552
+ });
553
+ /**
554
+ * List namespaces
555
+ *
556
+ * Utility function - Check available namespaces
557
+ */
558
+ server.registerTool('list-namespaces', {
559
+ title: 'List Namespaces',
560
+ description: 'Lists all namespaces in the cluster',
561
+ inputSchema: {},
562
+ }, async () => {
563
+ try {
564
+ // Use cache for namespace list
565
+ const namespaces = await getOrCompute(namespaceCache, 'all-namespaces', async () => {
566
+ const nsResponse = await getK8sClients().core.listNamespace();
567
+ return nsResponse.items;
568
+ });
569
+ let result = `# 📁 Namespace List\n\n`;
570
+ result += `Total: ${namespaces.length}\n\n`;
571
+ for (const ns of namespaces) {
572
+ const status = ns.status?.phase || 'Unknown';
573
+ const icon = status === 'Active' ? '✅' : '❌';
574
+ result += `${icon} **${ns.metadata?.name}** (${status})\n`;
575
+ }
576
+ return { content: [{ type: 'text', text: result }] };
577
+ }
578
+ catch (error) {
579
+ return {
580
+ content: [{
581
+ type: 'text',
582
+ text: `❌ Namespace query failed: ${error.message}`,
583
+ }],
584
+ };
585
+ }
586
+ });
587
+ /**
588
+ * List pods
589
+ *
590
+ * Utility function - List pods in a namespace
591
+ */
592
+ server.registerTool('list-pods', {
593
+ title: 'List Pods',
594
+ description: 'Lists all pods in a specific namespace',
595
+ inputSchema: {
596
+ namespace: z.string().describe('Namespace'),
597
+ showAll: z.boolean().default(false).describe('Show all pods (default shows only problematic pods)'),
598
+ },
599
+ }, async ({ namespace, showAll }) => {
600
+ try {
601
+ // Use cache for pod list per namespace
602
+ const pods = await getOrCompute(podListCache, `pods-${namespace}`, async () => {
603
+ const podsResponse = await getK8sClients().core.listNamespacedPod({ namespace });
604
+ return podsResponse.items;
605
+ });
606
+ let result = `# đŸŗ Pod List (${namespace})\n\n`;
607
+ const rows = [];
608
+ for (const pod of pods) {
609
+ const phase = pod.status?.phase || 'Unknown';
610
+ const restarts = pod.status?.containerStatuses?.reduce((sum, c) => sum + (c.restartCount || 0), 0) || 0;
611
+ const ready = pod.status?.containerStatuses?.filter((c) => c.ready).length || 0;
612
+ const total = pod.status?.containerStatuses?.length || 0;
613
+ // Filter problematic pods
614
+ const hasProblem = phase !== 'Running' || restarts > 0;
615
+ if (!showAll && !hasProblem)
616
+ continue;
617
+ const statusIcon = phase === 'Running' && restarts === 0 ? '✅' :
618
+ phase === 'Pending' ? 'âŗ' :
619
+ phase === 'Failed' ? '❌' :
620
+ restarts > 5 ? 'đŸ”Ĩ' : 'âš ī¸';
621
+ rows.push([
622
+ statusIcon,
623
+ pod.metadata?.name || '',
624
+ phase,
625
+ `${ready}/${total}`,
626
+ restarts.toString(),
627
+ pod.spec?.nodeName || 'N/A',
628
+ ]);
629
+ }
630
+ if (rows.length === 0) {
631
+ result += '✅ All pods are healthy!\n';
632
+ }
633
+ else {
634
+ result += createTable(['Status', 'Name', 'Phase', 'Ready', 'Restarts', 'Node'], rows);
635
+ }
636
+ return { content: [{ type: 'text', text: result }] };
637
+ }
638
+ catch (error) {
639
+ return {
640
+ content: [{
641
+ type: 'text',
642
+ text: `❌ Pod list query failed: ${error.message}`,
643
+ }],
644
+ };
645
+ }
646
+ });
647
+ // ============================================
648
+ // Helper functions
649
+ // ============================================
650
+ function parseMemoryValue(mem) {
651
+ const units = {
652
+ 'Ki': 1024,
653
+ 'Mi': 1024 * 1024,
654
+ 'Gi': 1024 * 1024 * 1024,
655
+ 'K': 1000,
656
+ 'M': 1000 * 1000,
657
+ 'G': 1000 * 1000 * 1000,
658
+ };
659
+ for (const [unit, multiplier] of Object.entries(units)) {
660
+ if (mem.endsWith(unit)) {
661
+ return parseFloat(mem.slice(0, -unit.length)) * multiplier;
662
+ }
663
+ }
664
+ return parseFloat(mem);
665
+ }
666
+ // ============================================
667
+ // Server startup
668
+ // ============================================
669
+ async function main() {
670
+ const transport = new StdioServerTransport();
671
+ await server.connect(transport);
672
+ console.error('đŸĨ K8s Doctor MCP Server started');
673
+ console.error(' Available in environments where kubectl commands work');
674
+ }
675
+ main().catch(error => {
676
+ console.error('Fatal error:', error);
677
+ process.exit(1);
678
+ });