mcp-server-kubernetes 3.2.1 → 3.4.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/README.md CHANGED
@@ -171,6 +171,73 @@ gemini extensions install https://github.com/Flux159/mcp-server-kubernetes
171
171
  - Guides through a systematic Kubernetes troubleshooting flow for pods based on a keyword and optional namespace.
172
172
  - [x] Non-destructive mode for read and create/update-only access to clusters
173
173
  - [x] Secrets masking for security (masks sensitive data in `kubectl get secrets` commands, does not affect logs)
174
+ - [x] **OpenTelemetry Observability** (opt-in)
175
+ - Distributed tracing for all tool calls
176
+ - Export to Jaeger, Tempo, Grafana, or any OTLP backend
177
+ - Configurable sampling strategies
178
+ - Rich span attributes (tool name, duration, K8s context, errors)
179
+ - See [docs/OBSERVABILITY.md](docs/OBSERVABILITY.md) for details
180
+
181
+ ## Observability
182
+
183
+ The MCP Kubernetes server includes optional **OpenTelemetry integration** for comprehensive observability. This feature is disabled by default and can be enabled via environment variables or Helm configuration.
184
+
185
+ ### Quick Start
186
+
187
+ Enable observability with environment variables:
188
+
189
+ ```bash
190
+ export ENABLE_TELEMETRY=true
191
+ export OTEL_EXPORTER_OTLP_ENDPOINT=http://localhost:4317
192
+
193
+ npx mcp-server-kubernetes
194
+ ```
195
+
196
+ ### What Gets Traced
197
+
198
+ - **All tool calls**: kubectl_get, kubectl_apply, kubectl_logs, etc.
199
+ - **Execution duration**: How long each operation takes
200
+ - **Success/failure status**: Automatic error tracking
201
+ - **Kubernetes context**: Namespace, context, resource type
202
+ - **Rich metadata**: Host, process, and custom attributes
203
+
204
+ ### Backends Supported
205
+
206
+ Works with any OTLP-compatible backend:
207
+ - **Jaeger** (open source)
208
+ - **Grafana Tempo** (open source)
209
+ - **Grafana Cloud** (commercial)
210
+ - **Datadog**, **New Relic**, **Honeycomb**, **Lightstep**, **AWS X-Ray**
211
+
212
+ ### Configuration
213
+
214
+ See **[docs/OBSERVABILITY.md](docs/OBSERVABILITY.md)** for comprehensive documentation including:
215
+ - Configuration options
216
+ - Deployment examples (Kubernetes, Helm, Claude Code)
217
+ - Sampling strategies
218
+ - Production best practices
219
+ - Troubleshooting guide
220
+
221
+ ### Example with Jaeger
222
+
223
+ ```bash
224
+ # Start Jaeger
225
+ docker run -d --name jaeger \
226
+ -e COLLECTOR_OTLP_ENABLED=true \
227
+ -p 16686:16686 \
228
+ -p 4317:4317 \
229
+ jaegertracing/all-in-one:latest
230
+
231
+ # Enable telemetry
232
+ export ENABLE_TELEMETRY=true
233
+ export OTEL_EXPORTER_OTLP_ENDPOINT=http://localhost:4317
234
+ export OTEL_TRACES_SAMPLER=always_on
235
+
236
+ # Run server
237
+ npx mcp-server-kubernetes
238
+
239
+ # View traces: http://localhost:16686
240
+ ```
174
241
 
175
242
  ## Prompts
176
243
 
@@ -0,0 +1,30 @@
1
+ import { NodeSDK } from "@opentelemetry/sdk-node";
2
+ /**
3
+ * Telemetry configuration for OpenTelemetry integration
4
+ * Supports environment variable configuration for flexible deployment
5
+ */
6
+ export interface TelemetryConfig {
7
+ enabled: boolean;
8
+ endpoint?: string;
9
+ serviceName: string;
10
+ serviceVersion: string;
11
+ resourceAttributes: Record<string, string>;
12
+ sampler?: {
13
+ type: "always_on" | "always_off" | "traceidratio";
14
+ arg?: number;
15
+ };
16
+ captureResponseMetadata: boolean;
17
+ }
18
+ /**
19
+ * Get telemetry configuration from environment variables
20
+ */
21
+ export declare function getTelemetryConfig(): TelemetryConfig;
22
+ /**
23
+ * Initialize OpenTelemetry SDK with configuration
24
+ * Call this before starting the MCP server
25
+ */
26
+ export declare function initializeTelemetry(): NodeSDK | null;
27
+ /**
28
+ * Get telemetry configuration summary for logging
29
+ */
30
+ export declare function getTelemetryConfigSummary(): string;
@@ -0,0 +1,155 @@
1
+ import { NodeSDK, resources } from "@opentelemetry/sdk-node";
2
+ import { OTLPTraceExporter } from "@opentelemetry/exporter-trace-otlp-grpc";
3
+ import { SEMRESATTRS_SERVICE_NAME, SEMRESATTRS_SERVICE_VERSION, } from "@opentelemetry/semantic-conventions";
4
+ import { getNodeAutoInstrumentations } from "@opentelemetry/auto-instrumentations-node";
5
+ import { serverConfig } from "./server-config.js";
6
+ /**
7
+ * Parse OpenTelemetry sampling configuration from environment variables
8
+ */
9
+ function parseSamplerConfig() {
10
+ const samplerType = process.env.OTEL_TRACES_SAMPLER;
11
+ const samplerArg = process.env.OTEL_TRACES_SAMPLER_ARG;
12
+ if (!samplerType) {
13
+ return undefined;
14
+ }
15
+ const config = {
16
+ type: samplerType,
17
+ };
18
+ if (samplerArg && (samplerType === "traceidratio" || samplerType.includes("traceidratio"))) {
19
+ const arg = parseFloat(samplerArg);
20
+ if (!isNaN(arg) && arg >= 0 && arg <= 1) {
21
+ config.arg = arg;
22
+ }
23
+ }
24
+ return config;
25
+ }
26
+ /**
27
+ * Parse resource attributes from environment variable
28
+ * Format: "key1=value1,key2=value2"
29
+ */
30
+ function parseResourceAttributes() {
31
+ const attrs = {};
32
+ const envAttrs = process.env.OTEL_RESOURCE_ATTRIBUTES;
33
+ if (envAttrs) {
34
+ const pairs = envAttrs.split(",");
35
+ for (const pair of pairs) {
36
+ const [key, value] = pair.split("=").map((s) => s.trim());
37
+ if (key && value) {
38
+ attrs[key] = value;
39
+ }
40
+ }
41
+ }
42
+ return attrs;
43
+ }
44
+ /**
45
+ * Get telemetry configuration from environment variables
46
+ */
47
+ export function getTelemetryConfig() {
48
+ // Check if telemetry is explicitly enabled (opt-in)
49
+ const enableFlag = process.env.ENABLE_TELEMETRY;
50
+ const isExplicitlyEnabled = enableFlag === "true" || enableFlag === "1";
51
+ const endpoint = process.env.OTEL_EXPORTER_OTLP_ENDPOINT;
52
+ // Telemetry is enabled only if:
53
+ // 1. ENABLE_TELEMETRY=true is set, AND
54
+ // 2. OTEL_EXPORTER_OTLP_ENDPOINT is configured
55
+ const enabled = isExplicitlyEnabled && !!endpoint;
56
+ // Check if response metadata capture is enabled (default: true)
57
+ const captureResponseEnv = process.env.OTEL_CAPTURE_RESPONSE_METADATA;
58
+ const captureResponseMetadata = captureResponseEnv !== "false" && captureResponseEnv !== "0";
59
+ return {
60
+ enabled,
61
+ endpoint,
62
+ serviceName: process.env.OTEL_SERVICE_NAME || serverConfig.name,
63
+ serviceVersion: process.env.OTEL_SERVICE_VERSION || serverConfig.version,
64
+ resourceAttributes: parseResourceAttributes(),
65
+ sampler: parseSamplerConfig(),
66
+ captureResponseMetadata, // Enabled by default, can be disabled with OTEL_CAPTURE_RESPONSE_METADATA=false
67
+ };
68
+ }
69
+ /**
70
+ * Initialize OpenTelemetry SDK with configuration
71
+ * Call this before starting the MCP server
72
+ */
73
+ export function initializeTelemetry() {
74
+ const config = getTelemetryConfig();
75
+ if (!config.enabled) {
76
+ const enableFlag = process.env.ENABLE_TELEMETRY;
77
+ const endpoint = process.env.OTEL_EXPORTER_OTLP_ENDPOINT;
78
+ if (!enableFlag || enableFlag === "false" || enableFlag === "0") {
79
+ // Observability is disabled by default
80
+ return null;
81
+ }
82
+ else if (!endpoint) {
83
+ console.error("OpenTelemetry: ENABLE_TELEMETRY=true but OTEL_EXPORTER_OTLP_ENDPOINT not set");
84
+ return null;
85
+ }
86
+ return null;
87
+ }
88
+ console.error(`Initializing OpenTelemetry: endpoint=${config.endpoint}, service=${config.serviceName}`);
89
+ // Create OTLP trace exporter
90
+ const traceExporter = new OTLPTraceExporter({
91
+ url: config.endpoint,
92
+ });
93
+ // Create resource with service metadata
94
+ const defaultRes = resources.defaultResource();
95
+ const customRes = resources.resourceFromAttributes({
96
+ [SEMRESATTRS_SERVICE_NAME]: config.serviceName,
97
+ [SEMRESATTRS_SERVICE_VERSION]: config.serviceVersion,
98
+ ...config.resourceAttributes,
99
+ });
100
+ const resource = defaultRes.merge(customRes);
101
+ // Initialize Node SDK
102
+ const sdk = new NodeSDK({
103
+ resource,
104
+ traceExporter,
105
+ instrumentations: [
106
+ getNodeAutoInstrumentations({
107
+ // Disable some instrumentations that may be too verbose
108
+ "@opentelemetry/instrumentation-fs": {
109
+ enabled: false,
110
+ },
111
+ }),
112
+ ],
113
+ });
114
+ try {
115
+ sdk.start();
116
+ console.error("OpenTelemetry SDK initialized successfully");
117
+ // Graceful shutdown on process termination
118
+ process.on("SIGTERM", async () => {
119
+ try {
120
+ await sdk.shutdown();
121
+ console.error("OpenTelemetry SDK shut down successfully");
122
+ }
123
+ catch (error) {
124
+ console.error("Error shutting down OpenTelemetry SDK:", error);
125
+ }
126
+ });
127
+ return sdk;
128
+ }
129
+ catch (error) {
130
+ console.error("Failed to initialize OpenTelemetry SDK:", error);
131
+ return null;
132
+ }
133
+ }
134
+ /**
135
+ * Get telemetry configuration summary for logging
136
+ */
137
+ export function getTelemetryConfigSummary() {
138
+ const config = getTelemetryConfig();
139
+ if (!config.enabled) {
140
+ return "Telemetry: Disabled";
141
+ }
142
+ const parts = [
143
+ `Telemetry: Enabled`,
144
+ `Endpoint: ${config.endpoint}`,
145
+ `Service: ${config.serviceName}@${config.serviceVersion}`,
146
+ ];
147
+ if (config.sampler) {
148
+ parts.push(`Sampler: ${config.sampler.type}${config.sampler.arg !== undefined ? `(${config.sampler.arg})` : ""}`);
149
+ }
150
+ const attrCount = Object.keys(config.resourceAttributes).length;
151
+ if (attrCount > 0) {
152
+ parts.push(`Resource Attributes: ${attrCount}`);
153
+ }
154
+ return parts.join(", ");
155
+ }
package/dist/index.d.ts CHANGED
@@ -393,6 +393,17 @@ declare const allTools: ({
393
393
  };
394
394
  readonly required: readonly ["operation"];
395
395
  };
396
+ } | {
397
+ readonly name: "kubectl_reconnect";
398
+ readonly description: "Reconnect to the Kubernetes API server by recreating all API clients. Use this after cluster upgrades (e.g., EKS control plane upgrades that rotate ENIs/IPs) to force fresh DNS resolution and new TCP connections.";
399
+ readonly annotations: {
400
+ readonly readOnlyHint: false;
401
+ };
402
+ readonly inputSchema: {
403
+ readonly type: "object";
404
+ readonly properties: {};
405
+ readonly required: readonly [];
406
+ };
396
407
  } | {
397
408
  readonly name: "kubectl_get";
398
409
  readonly description: "Get or list Kubernetes resources by resource type, name, and optionally namespace";
package/dist/index.js CHANGED
@@ -1,4 +1,8 @@
1
1
  #!/usr/bin/env node
2
+ // Initialize OpenTelemetry before any other imports
3
+ // This must be done first to ensure proper instrumentation
4
+ import { initializeTelemetry, getTelemetryConfigSummary } from "./config/telemetry-config.js";
5
+ const telemetrySdk = initializeTelemetry();
2
6
  import { Server } from "@modelcontextprotocol/sdk/server/index.js";
3
7
  import { StdioServerTransport } from "@modelcontextprotocol/sdk/server/stdio.js";
4
8
  import { installHelmChart, installHelmChartSchema, upgradeHelmChart, upgradeHelmChartSchema, uninstallHelmChart, uninstallHelmChartSchema, } from "./tools/helm-operations.js";
@@ -14,6 +18,7 @@ import { startSSEServer } from "./utils/sse.js";
14
18
  import { startPortForward, PortForwardSchema, stopPortForward, StopPortForwardSchema, } from "./tools/port_forward.js";
15
19
  import { kubectlScale, kubectlScaleSchema } from "./tools/kubectl-scale.js";
16
20
  import { kubectlContext, kubectlContextSchema, } from "./tools/kubectl-context.js";
21
+ import { kubectlReconnect, kubectlReconnectSchema, } from "./tools/kubectl-reconnect.js";
17
22
  import { kubectlGet, kubectlGetSchema } from "./tools/kubectl-get.js";
18
23
  import { kubectlDescribe, kubectlDescribeSchema, } from "./tools/kubectl-describe.js";
19
24
  import { kubectlApply, kubectlApplySchema } from "./tools/kubectl-apply.js";
@@ -26,6 +31,7 @@ import { kubectlRollout, kubectlRolloutSchema, } from "./tools/kubectl-rollout.j
26
31
  import { registerPromptHandlers } from "./prompts/index.js";
27
32
  import { ping, pingSchema } from "./tools/ping.js";
28
33
  import { startStreamableHTTPServer } from "./utils/streamable-http.js";
34
+ import { withTelemetry } from "./middleware/telemetry-middleware.js";
29
35
  // Check environment variables for tool filtering
30
36
  const allowOnlyReadonlyTools = process.env.ALLOW_ONLY_READONLY_TOOLS === "true";
31
37
  const allowedToolsEnv = process.env.ALLOWED_TOOLS;
@@ -36,6 +42,7 @@ const readonlyTools = [
36
42
  kubectlDescribeSchema,
37
43
  kubectlLogsSchema,
38
44
  kubectlContextSchema,
45
+ kubectlReconnectSchema,
39
46
  explainResourceSchema,
40
47
  listApiResourcesSchema,
41
48
  pingSchema,
@@ -64,6 +71,7 @@ const allTools = [
64
71
  kubectlRolloutSchema,
65
72
  // Kubernetes context management
66
73
  kubectlContextSchema,
74
+ kubectlReconnectSchema,
67
75
  // Special operations that aren't covered by simple kubectl commands
68
76
  explainResourceSchema,
69
77
  // Helm operations
@@ -117,13 +125,16 @@ server.setRequestHandler(ListToolsRequestSchema, async () => {
117
125
  }
118
126
  return { tools };
119
127
  });
120
- server.setRequestHandler(CallToolRequestSchema, async (request) => {
128
+ server.setRequestHandler(CallToolRequestSchema, withTelemetry(async (request) => {
121
129
  try {
122
130
  const { name, arguments: input = {} } = request.params;
123
131
  // Handle new kubectl-style commands
124
132
  if (name === "kubectl_context") {
125
133
  return await kubectlContext(k8sManager, input);
126
134
  }
135
+ if (name === "kubectl_reconnect") {
136
+ return await kubectlReconnect(k8sManager);
137
+ }
127
138
  if (name === "kubectl_get") {
128
139
  return await kubectlGet(k8sManager, input);
129
140
  }
@@ -219,7 +230,7 @@ server.setRequestHandler(CallToolRequestSchema, async (request) => {
219
230
  throw error;
220
231
  throw new McpError(ErrorCode.InternalError, `Tool execution failed: ${error}`);
221
232
  }
222
- });
233
+ }));
223
234
  // Start the server
224
235
  if (process.env.ENABLE_UNSAFE_SSE_TRANSPORT) {
225
236
  startSSEServer(server);
@@ -232,6 +243,7 @@ else if (process.env.ENABLE_UNSAFE_STREAMABLE_HTTP_TRANSPORT) {
232
243
  else {
233
244
  const transport = new StdioServerTransport();
234
245
  console.error(`Starting Kubernetes MCP server v${serverConfig.version}, handling commands...`);
246
+ console.error(getTelemetryConfigSummary());
235
247
  server.connect(transport);
236
248
  }
237
249
  ["SIGINT", "SIGTERM"].forEach((signal) => {
@@ -0,0 +1,44 @@
1
+ /**
2
+ * Tool call handler function type
3
+ */
4
+ type ToolCallHandler = (request: {
5
+ params: {
6
+ name: string;
7
+ _meta?: any;
8
+ arguments?: Record<string, any>;
9
+ };
10
+ method: string;
11
+ }) => Promise<any>;
12
+ /**
13
+ * Wrap a tool call handler with OpenTelemetry tracing
14
+ * Creates a span for each tool invocation with detailed attributes
15
+ *
16
+ * @param handler - The original tool call handler function
17
+ * @returns Wrapped handler with tracing instrumentation
18
+ */
19
+ export declare function withTelemetry(handler: ToolCallHandler): ToolCallHandler;
20
+ /**
21
+ * Create a manual span for non-tool operations
22
+ * Useful for tracing other server operations outside of tool calls
23
+ *
24
+ * @param name - Span name
25
+ * @param fn - Function to execute within the span
26
+ * @returns Result of the function
27
+ */
28
+ export declare function withSpan<T>(name: string, attributes: Record<string, string | number | boolean>, fn: () => Promise<T>): Promise<T>;
29
+ /**
30
+ * Add custom attributes to the current active span
31
+ * Useful for adding context during tool execution
32
+ *
33
+ * @param attributes - Key-value pairs to add to the span
34
+ */
35
+ export declare function addSpanAttributes(attributes: Record<string, string | number | boolean>): void;
36
+ /**
37
+ * Record an event on the current active span
38
+ * Useful for tracking significant moments during tool execution
39
+ *
40
+ * @param name - Event name
41
+ * @param attributes - Optional event attributes
42
+ */
43
+ export declare function recordSpanEvent(name: string, attributes?: Record<string, string | number | boolean>): void;
44
+ export {};
@@ -0,0 +1,178 @@
1
+ import { trace, SpanStatusCode } from "@opentelemetry/api";
2
+ import { getTelemetryConfig } from "../config/telemetry-config.js";
3
+ /**
4
+ * Telemetry middleware for MCP tool call tracing
5
+ * Wraps tool handlers with OpenTelemetry spans to provide automatic instrumentation
6
+ */
7
+ // Get tracer instance
8
+ const tracer = trace.getTracer("mcp-server-kubernetes", "0.1.0");
9
+ /**
10
+ * Wrap a tool call handler with OpenTelemetry tracing
11
+ * Creates a span for each tool invocation with detailed attributes
12
+ *
13
+ * @param handler - The original tool call handler function
14
+ * @returns Wrapped handler with tracing instrumentation
15
+ */
16
+ export function withTelemetry(handler) {
17
+ return async (request) => {
18
+ const { name: toolName, arguments: args } = request.params;
19
+ // Create span for this tool call
20
+ return await tracer.startActiveSpan(`tools/call ${toolName}`, {
21
+ attributes: {
22
+ "mcp.method.name": "tools/call",
23
+ "gen_ai.tool.name": toolName,
24
+ "gen_ai.operation.name": "execute_tool",
25
+ "network.transport": "pipe", // STDIO mode
26
+ },
27
+ }, async (span) => {
28
+ const startTime = Date.now();
29
+ try {
30
+ // Add argument metadata (safely, without exposing sensitive data)
31
+ if (args) {
32
+ const argKeys = Object.keys(args);
33
+ span.setAttribute("tool.argument_count", argKeys.length);
34
+ span.setAttribute("tool.argument_keys", argKeys.join(","));
35
+ // Add specific attributes for common arguments
36
+ if (args.context) {
37
+ span.setAttribute("k8s.context", args.context);
38
+ }
39
+ if (args.namespace) {
40
+ span.setAttribute("k8s.namespace", args.namespace);
41
+ }
42
+ if (args.resourceType) {
43
+ span.setAttribute("k8s.resource_type", args.resourceType);
44
+ }
45
+ }
46
+ // Execute the actual tool handler
47
+ const result = await handler(request);
48
+ // Record success
49
+ const duration = Date.now() - startTime;
50
+ span.setAttribute("tool.duration_ms", duration);
51
+ span.setStatus({ code: SpanStatusCode.OK });
52
+ // Capture response metadata (not the actual data)
53
+ // Can be disabled with OTEL_CAPTURE_RESPONSE_METADATA=false for privacy
54
+ const telemetryConfig = getTelemetryConfig();
55
+ if (result && telemetryConfig.captureResponseMetadata) {
56
+ // Check if result has content array (MCP response format)
57
+ if (result.content && Array.isArray(result.content)) {
58
+ span.setAttribute("response.content_items", result.content.length);
59
+ // Get the first content item to analyze
60
+ if (result.content.length > 0) {
61
+ const firstItem = result.content[0];
62
+ span.setAttribute("response.content_type", firstItem.type || "unknown");
63
+ // If it's text content, capture size and maybe a snippet
64
+ if (firstItem.type === "text" && firstItem.text) {
65
+ const textSize = firstItem.text.length;
66
+ span.setAttribute("response.text_size_bytes", textSize);
67
+ // Try to parse JSON and get item count
68
+ try {
69
+ const parsed = JSON.parse(firstItem.text);
70
+ // Check for Kubernetes list response
71
+ if (parsed.items && Array.isArray(parsed.items)) {
72
+ span.setAttribute("response.k8s_items_count", parsed.items.length);
73
+ span.setAttribute("response.k8s_kind", parsed.kind || "unknown");
74
+ }
75
+ // Check for MCP list response
76
+ if (Array.isArray(parsed)) {
77
+ span.setAttribute("response.items_count", parsed.length);
78
+ }
79
+ // Capture if response indicates success
80
+ if (parsed.success !== undefined) {
81
+ span.setAttribute("response.success", parsed.success);
82
+ }
83
+ }
84
+ catch (e) {
85
+ // Not JSON, that's fine - just capture text size
86
+ }
87
+ }
88
+ }
89
+ }
90
+ // Check for direct success indicators
91
+ if (typeof result.success === "boolean") {
92
+ span.setAttribute("response.success", result.success);
93
+ }
94
+ }
95
+ return result;
96
+ }
97
+ catch (error) {
98
+ // Record failure
99
+ const duration = Date.now() - startTime;
100
+ span.setAttribute("tool.duration_ms", duration);
101
+ span.setAttribute("error.type", "tool_error");
102
+ if (error.message) {
103
+ span.setAttribute("error.message", error.message);
104
+ }
105
+ if (error.code) {
106
+ span.setAttribute("error.code", error.code);
107
+ }
108
+ span.setStatus({
109
+ code: SpanStatusCode.ERROR,
110
+ message: error.message || "Tool execution failed",
111
+ });
112
+ // Re-throw to maintain original error behavior
113
+ throw error;
114
+ }
115
+ finally {
116
+ span.end();
117
+ }
118
+ });
119
+ };
120
+ }
121
+ /**
122
+ * Create a manual span for non-tool operations
123
+ * Useful for tracing other server operations outside of tool calls
124
+ *
125
+ * @param name - Span name
126
+ * @param fn - Function to execute within the span
127
+ * @returns Result of the function
128
+ */
129
+ export async function withSpan(name, attributes, fn) {
130
+ return await tracer.startActiveSpan(name, { attributes }, async (span) => {
131
+ try {
132
+ const result = await fn();
133
+ span.setStatus({ code: SpanStatusCode.OK });
134
+ return result;
135
+ }
136
+ catch (error) {
137
+ span.setAttribute("error.type", "operation_error");
138
+ if (error.message) {
139
+ span.setAttribute("error.message", error.message);
140
+ }
141
+ span.setStatus({
142
+ code: SpanStatusCode.ERROR,
143
+ message: error.message || "Operation failed",
144
+ });
145
+ throw error;
146
+ }
147
+ finally {
148
+ span.end();
149
+ }
150
+ });
151
+ }
152
+ /**
153
+ * Add custom attributes to the current active span
154
+ * Useful for adding context during tool execution
155
+ *
156
+ * @param attributes - Key-value pairs to add to the span
157
+ */
158
+ export function addSpanAttributes(attributes) {
159
+ const currentSpan = trace.getActiveSpan();
160
+ if (currentSpan) {
161
+ for (const [key, value] of Object.entries(attributes)) {
162
+ currentSpan.setAttribute(key, value);
163
+ }
164
+ }
165
+ }
166
+ /**
167
+ * Record an event on the current active span
168
+ * Useful for tracking significant moments during tool execution
169
+ *
170
+ * @param name - Event name
171
+ * @param attributes - Optional event attributes
172
+ */
173
+ export function recordSpanEvent(name, attributes) {
174
+ const currentSpan = trace.getActiveSpan();
175
+ if (currentSpan) {
176
+ currentSpan.addEvent(name, attributes);
177
+ }
178
+ }
@@ -1,11 +1,12 @@
1
1
  /**
2
2
  * Tool: exec_in_pod
3
3
  * Execute a command in a Kubernetes pod or container and return the output.
4
- * Uses the official Kubernetes client-node Exec API for native execution.
4
+ * Uses kubectl exec for consistency with other kubectl-based tools.
5
5
  *
6
6
  * SECURITY: Only accepts commands as an array of strings. This prevents command
7
- * injection attacks by executing directly without shell interpretation.
8
- * Shell operators (pipes, redirects, etc.) are intentionally not supported.
7
+ * injection attacks by using execFileSync which executes directly without shell
8
+ * interpretation. Shell operators (pipes, redirects, etc.) are intentionally
9
+ * not supported.
9
10
  */
10
11
  import { KubernetesManager } from "../types.js";
11
12
  /**
@@ -58,12 +59,12 @@ export declare const execInPodSchema: {
58
59
  };
59
60
  };
60
61
  /**
61
- * Execute a command in a Kubernetes pod or container using the Kubernetes client-node Exec API.
62
+ * Execute a command in a Kubernetes pod or container using kubectl exec.
62
63
  * Returns the stdout output as a text response.
63
64
  * Throws McpError on failure.
64
65
  *
65
- * SECURITY: Command must be an array of strings. This executes directly via the
66
- * Kubernetes exec API without shell interpretation, preventing command injection.
66
+ * SECURITY: Command must be an array of strings. execFileSync does not invoke
67
+ * a shell, preventing command injection.
67
68
  */
68
69
  export declare function execInPod(k8sManager: KubernetesManager, input: {
69
70
  name: string;
@@ -1,15 +1,16 @@
1
1
  /**
2
2
  * Tool: exec_in_pod
3
3
  * Execute a command in a Kubernetes pod or container and return the output.
4
- * Uses the official Kubernetes client-node Exec API for native execution.
4
+ * Uses kubectl exec for consistency with other kubectl-based tools.
5
5
  *
6
6
  * SECURITY: Only accepts commands as an array of strings. This prevents command
7
- * injection attacks by executing directly without shell interpretation.
8
- * Shell operators (pipes, redirects, etc.) are intentionally not supported.
7
+ * injection attacks by using execFileSync which executes directly without shell
8
+ * interpretation. Shell operators (pipes, redirects, etc.) are intentionally
9
+ * not supported.
9
10
  */
10
- import * as k8s from "@kubernetes/client-node";
11
+ import { execFileSync } from "child_process";
11
12
  import { McpError, ErrorCode } from "@modelcontextprotocol/sdk/types.js";
12
- import { Writable } from "stream";
13
+ import { getSpawnMaxBuffer } from "../config/max-buffer.js";
13
14
  import { contextParameter, namespaceParameter } from "../models/common-parameters.js";
14
15
  /**
15
16
  * Schema for exec_in_pod tool.
@@ -51,12 +52,12 @@ export const execInPodSchema = {
51
52
  },
52
53
  };
53
54
  /**
54
- * Execute a command in a Kubernetes pod or container using the Kubernetes client-node Exec API.
55
+ * Execute a command in a Kubernetes pod or container using kubectl exec.
55
56
  * Returns the stdout output as a text response.
56
57
  * Throws McpError on failure.
57
58
  *
58
- * SECURITY: Command must be an array of strings. This executes directly via the
59
- * Kubernetes exec API without shell interpretation, preventing command injection.
59
+ * SECURITY: Command must be an array of strings. execFileSync does not invoke
60
+ * a shell, preventing command injection.
60
61
  */
61
62
  export async function execInPod(k8sManager, input) {
62
63
  const namespace = input.namespace || "default";
@@ -73,94 +74,35 @@ export async function execInPod(k8sManager, input) {
73
74
  throw new McpError(ErrorCode.InvalidParams, `Command array element at index ${i} must be a string`);
74
75
  }
75
76
  }
76
- const commandArr = input.command;
77
- // Prepare buffers to capture stdout and stderr
78
- let stdout = "";
79
- let stderr = "";
80
- // Use Node.js Writable streams to collect output
81
- const stdoutStream = new Writable({
82
- write(chunk, _encoding, callback) {
83
- stdout += chunk.toString();
84
- callback();
85
- }
86
- });
87
- const stderrStream = new Writable({
88
- write(chunk, _encoding, callback) {
89
- stderr += chunk.toString();
90
- callback();
91
- }
92
- });
93
- // Add a dummy stdin stream
94
- const stdinStream = new Writable({
95
- write(_chunk, _encoding, callback) {
96
- callback();
97
- }
98
- });
99
77
  try {
100
- // Set context if provided
78
+ const args = ["exec", input.name, "-n", namespace];
79
+ if (input.container) {
80
+ args.push("-c", input.container);
81
+ }
101
82
  if (input.context) {
102
- k8sManager.setCurrentContext(input.context);
83
+ args.push("--context", input.context);
103
84
  }
104
- // Use the Kubernetes client-node Exec API for native exec
105
- const kc = k8sManager.getKubeConfig();
106
- const exec = new k8s.Exec(kc);
107
- // Add a timeout to avoid hanging forever if exec never returns
108
- await new Promise((resolve, reject) => {
109
- let finished = false;
110
- const timeoutMs = input.timeout || 60000;
111
- const timeout = setTimeout(() => {
112
- if (!finished) {
113
- finished = true;
114
- reject(new McpError(ErrorCode.InternalError, "Exec operation timed out (possible networking, RBAC, or cluster issue)"));
115
- }
116
- }, timeoutMs);
117
- console.log("[exec_in_pod] Calling exec.exec with params:", {
118
- namespace,
119
- pod: input.name,
120
- container: input.container ?? "",
121
- commandArr,
122
- stdoutStreamType: typeof stdoutStream,
123
- stderrStreamType: typeof stderrStream,
124
- });
125
- exec.exec(namespace, input.name, input.container ?? "", commandArr, stdoutStream, stderrStream, stdinStream, // use dummy stdin
126
- true, // set tty to true
127
- (status) => {
128
- console.log("[exec_in_pod] exec.exec callback called. Status:", status);
129
- if (finished)
130
- return;
131
- finished = true;
132
- clearTimeout(timeout);
133
- // Always resolve; handle errors based on stderr or thrown errors
134
- resolve();
135
- }).catch((err) => {
136
- console.log("[exec_in_pod] exec.exec threw error:", err);
137
- if (!finished) {
138
- finished = true;
139
- clearTimeout(timeout);
140
- reject(new McpError(ErrorCode.InternalError, `Exec threw error: ${err?.message || err}`));
141
- }
142
- });
85
+ args.push("--", ...input.command);
86
+ const timeoutMs = input.timeout || 60000;
87
+ const result = execFileSync("kubectl", args, {
88
+ encoding: "utf8",
89
+ maxBuffer: getSpawnMaxBuffer(),
90
+ timeout: timeoutMs,
91
+ env: { ...process.env, KUBECONFIG: process.env.KUBECONFIG },
143
92
  });
144
- // Return the collected stdout as the result
145
- // If there is stderr output or no output at all, treat as error
146
- if (stderr || (!stdout && !stderr)) {
147
- throw new McpError(ErrorCode.InternalError, `Failed to execute command in pod: ${stderr || "No output"}`);
148
- }
149
93
  return {
150
94
  content: [
151
95
  {
152
96
  type: "text",
153
- text: stdout,
97
+ text: result,
154
98
  },
155
99
  ],
156
100
  };
157
101
  }
158
102
  catch (error) {
159
- // Collect error message and stderr output if available
160
- let message = error.message || "Unknown error";
161
- if (stderr) {
162
- message += "\n" + stderr;
103
+ if (error.killed || error.signal === "SIGTERM") {
104
+ throw new McpError(ErrorCode.InternalError, "Exec operation timed out (possible networking, RBAC, or cluster issue)");
163
105
  }
164
- throw new McpError(ErrorCode.InternalError, `Failed to execute command in pod: ${message}`);
106
+ throw new McpError(ErrorCode.InternalError, `Failed to execute command in pod: ${error.stderr || error.message}`);
165
107
  }
166
108
  }
@@ -0,0 +1,19 @@
1
+ import { KubernetesManager } from "../types.js";
2
+ export declare const kubectlReconnectSchema: {
3
+ readonly name: "kubectl_reconnect";
4
+ readonly description: "Reconnect to the Kubernetes API server by recreating all API clients. Use this after cluster upgrades (e.g., EKS control plane upgrades that rotate ENIs/IPs) to force fresh DNS resolution and new TCP connections.";
5
+ readonly annotations: {
6
+ readonly readOnlyHint: false;
7
+ };
8
+ readonly inputSchema: {
9
+ readonly type: "object";
10
+ readonly properties: {};
11
+ readonly required: readonly [];
12
+ };
13
+ };
14
+ export declare function kubectlReconnect(k8sManager: KubernetesManager): Promise<{
15
+ content: {
16
+ type: string;
17
+ text: string;
18
+ }[];
19
+ }>;
@@ -0,0 +1,32 @@
1
+ import { McpError, ErrorCode } from "@modelcontextprotocol/sdk/types.js";
2
+ export const kubectlReconnectSchema = {
3
+ name: "kubectl_reconnect",
4
+ description: "Reconnect to the Kubernetes API server by recreating all API clients. Use this after cluster upgrades (e.g., EKS control plane upgrades that rotate ENIs/IPs) to force fresh DNS resolution and new TCP connections.",
5
+ annotations: {
6
+ readOnlyHint: false,
7
+ },
8
+ inputSchema: {
9
+ type: "object",
10
+ properties: {},
11
+ required: [],
12
+ },
13
+ };
14
+ export async function kubectlReconnect(k8sManager) {
15
+ try {
16
+ k8sManager.refreshApiClients();
17
+ return {
18
+ content: [
19
+ {
20
+ type: "text",
21
+ text: JSON.stringify({
22
+ success: true,
23
+ message: "API clients refreshed. DNS will be re-resolved on the next request.",
24
+ }, null, 2),
25
+ },
26
+ ],
27
+ };
28
+ }
29
+ catch (error) {
30
+ throw new McpError(ErrorCode.InternalError, `Failed to reconnect: ${error.message}`);
31
+ }
32
+ }
@@ -1,3 +1,15 @@
1
+ import { timingSafeEqual } from "crypto";
2
+ /** Constant-time string comparison that prevents timing attacks (CWE-208). */
3
+ function timingSafeCompare(a, b) {
4
+ const bufA = Buffer.from(a);
5
+ const bufB = Buffer.from(b);
6
+ if (bufA.length !== bufB.length) {
7
+ // Compare against itself to keep constant time, then return false
8
+ timingSafeEqual(bufA, bufA);
9
+ return false;
10
+ }
11
+ return timingSafeEqual(bufA, bufB);
12
+ }
1
13
  /**
2
14
  * Authentication middleware for MCP HTTP transports.
3
15
  *
@@ -31,7 +43,19 @@ export function createAuthMiddleware() {
31
43
  });
32
44
  return;
33
45
  }
34
- if (providedToken !== authToken) {
46
+ // Reject array-valued headers (e.g. duplicate X-MCP-AUTH)
47
+ if (Array.isArray(providedToken)) {
48
+ res.status(401).json({
49
+ jsonrpc: "2.0",
50
+ error: {
51
+ code: -32001,
52
+ message: "Unauthorized: Only single X-MCP-AUTH header is allowed",
53
+ },
54
+ id: null,
55
+ });
56
+ return;
57
+ }
58
+ if (!timingSafeCompare(providedToken, authToken)) {
35
59
  res.status(403).json({
36
60
  jsonrpc: "2.0",
37
61
  error: {
@@ -51,6 +51,7 @@ export declare class KubernetesManager {
51
51
  *
52
52
  * @param contextName
53
53
  */
54
+ refreshApiClients(): void;
54
55
  setCurrentContext(contextName: string): void;
55
56
  cleanup(): Promise<void>;
56
57
  trackResource(kind: string, name: string, namespace: string): void;
@@ -186,6 +186,11 @@ export class KubernetesManager {
186
186
  *
187
187
  * @param contextName
188
188
  */
189
+ refreshApiClients() {
190
+ this.k8sApi = this.kc.makeApiClient(k8s.CoreV1Api);
191
+ this.k8sAppsApi = this.kc.makeApiClient(k8s.AppsV1Api);
192
+ this.k8sBatchApi = this.kc.makeApiClient(k8s.BatchV1Api);
193
+ }
189
194
  setCurrentContext(contextName) {
190
195
  // Get all available contexts
191
196
  const contexts = this.kc.getContexts();
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "mcp-server-kubernetes",
3
- "version": "3.2.1",
3
+ "version": "3.4.0",
4
4
  "description": "MCP server for interacting with Kubernetes clusters via kubectl",
5
5
  "license": "MIT",
6
6
  "type": "module",
@@ -39,18 +39,24 @@
39
39
  "dependencies": {
40
40
  "@kubernetes/client-node": "1.3.0",
41
41
  "@modelcontextprotocol/sdk": "1.26.0",
42
+ "@opentelemetry/api": "^1.9.0",
43
+ "@opentelemetry/auto-instrumentations-node": "^0.69.0",
44
+ "@opentelemetry/exporter-trace-otlp-grpc": "^0.211.0",
45
+ "@opentelemetry/resources": "^2.5.0",
46
+ "@opentelemetry/sdk-node": "^0.211.0",
47
+ "@opentelemetry/semantic-conventions": "^1.39.0",
42
48
  "express": "4.21.2",
43
49
  "js-yaml": "4.1.1",
44
50
  "yaml": "2.7.0",
45
51
  "zod": "3.25.76"
46
52
  },
47
53
  "devDependencies": {
54
+ "@anthropic-ai/mcpb": "1.1.0",
48
55
  "@types/express": "5.0.1",
49
56
  "@types/js-yaml": "4.0.9",
50
57
  "@types/node": "22.9.3",
51
58
  "shx": "0.3.4",
52
59
  "typescript": "5.6.2",
53
- "vitest": "2.1.9",
54
- "@anthropic-ai/mcpb": "1.1.0"
60
+ "vitest": "2.1.9"
55
61
  }
56
62
  }