kubectl-mcp-server 1.16.0__py3-none-any.whl → 1.17.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,255 @@
1
+ """
2
+ Runtime statistics collection for kubectl-mcp-server.
3
+
4
+ Provides a singleton StatsCollector that tracks:
5
+ - tool_calls_total: Total number of tool invocations
6
+ - tool_errors_total: Total number of tool errors
7
+ - tool_calls_by_name: Breakdown of calls by tool name
8
+ - http_requests_total: Total HTTP requests (for SSE/HTTP transports)
9
+ - uptime: Server uptime in seconds
10
+ """
11
+
12
+ import time
13
+ import threading
14
+ from collections import defaultdict
15
+ from dataclasses import dataclass, field
16
+ from typing import Dict, Any, Optional
17
+
18
+
19
+ @dataclass
20
+ class ToolStats:
21
+ """Statistics for a single tool."""
22
+ calls: int = 0
23
+ errors: int = 0
24
+ total_duration: float = 0.0
25
+ last_call_time: Optional[float] = None
26
+ last_error_time: Optional[float] = None
27
+
28
+
29
+ class StatsCollector:
30
+ """
31
+ Singleton class for collecting runtime statistics.
32
+
33
+ Thread-safe statistics collection for production observability.
34
+
35
+ Usage:
36
+ stats = get_stats_collector()
37
+ stats.record_tool_call("get_pods", success=True, duration=0.5)
38
+
39
+ # Get current stats
40
+ data = stats.get_stats()
41
+ """
42
+
43
+ _instance: Optional["StatsCollector"] = None
44
+ _lock = threading.Lock()
45
+
46
+ def __new__(cls) -> "StatsCollector":
47
+ """Ensure singleton pattern."""
48
+ if cls._instance is None:
49
+ with cls._lock:
50
+ if cls._instance is None:
51
+ cls._instance = super().__new__(cls)
52
+ cls._instance._initialized = False
53
+ return cls._instance
54
+
55
+ def __init__(self):
56
+ """Initialize the stats collector (only once)."""
57
+ if self._initialized:
58
+ return
59
+
60
+ self._stats_lock = threading.Lock()
61
+ self._start_time = time.time()
62
+
63
+ # Core counters
64
+ self._tool_calls_total = 0
65
+ self._tool_errors_total = 0
66
+ self._http_requests_total = 0
67
+
68
+ # Per-tool statistics
69
+ self._tool_stats: Dict[str, ToolStats] = defaultdict(ToolStats)
70
+
71
+ # HTTP request breakdown
72
+ self._http_requests_by_endpoint: Dict[str, int] = defaultdict(int)
73
+ self._http_requests_by_method: Dict[str, int] = defaultdict(int)
74
+
75
+ self._initialized = True
76
+
77
+ def record_tool_call(
78
+ self,
79
+ tool_name: str,
80
+ success: bool = True,
81
+ duration: float = 0.0
82
+ ) -> None:
83
+ """
84
+ Record a tool call.
85
+
86
+ Args:
87
+ tool_name: Name of the tool that was called
88
+ success: Whether the call succeeded
89
+ duration: Call duration in seconds
90
+ """
91
+ with self._stats_lock:
92
+ self._tool_calls_total += 1
93
+
94
+ stats = self._tool_stats[tool_name]
95
+ stats.calls += 1
96
+ stats.total_duration += duration
97
+ stats.last_call_time = time.time()
98
+
99
+ if not success:
100
+ self._tool_errors_total += 1
101
+ stats.errors += 1
102
+ stats.last_error_time = time.time()
103
+
104
+ def record_tool_error(self, tool_name: str) -> None:
105
+ """
106
+ Record a tool error (shorthand for failed call).
107
+
108
+ Args:
109
+ tool_name: Name of the tool that errored
110
+ """
111
+ self.record_tool_call(tool_name, success=False)
112
+
113
+ def record_http_request(
114
+ self,
115
+ endpoint: str = "/",
116
+ method: str = "POST"
117
+ ) -> None:
118
+ """
119
+ Record an HTTP request.
120
+
121
+ Args:
122
+ endpoint: Request endpoint path
123
+ method: HTTP method (GET, POST, etc.)
124
+ """
125
+ with self._stats_lock:
126
+ self._http_requests_total += 1
127
+ self._http_requests_by_endpoint[endpoint] += 1
128
+ self._http_requests_by_method[method] += 1
129
+
130
+ @property
131
+ def uptime(self) -> float:
132
+ """Get server uptime in seconds."""
133
+ return time.time() - self._start_time
134
+
135
+ @property
136
+ def tool_calls_total(self) -> int:
137
+ """Get total tool calls."""
138
+ with self._stats_lock:
139
+ return self._tool_calls_total
140
+
141
+ @property
142
+ def tool_errors_total(self) -> int:
143
+ """Get total tool errors."""
144
+ with self._stats_lock:
145
+ return self._tool_errors_total
146
+
147
+ @property
148
+ def http_requests_total(self) -> int:
149
+ """Get total HTTP requests."""
150
+ with self._stats_lock:
151
+ return self._http_requests_total
152
+
153
+ def get_tool_stats(self, tool_name: str) -> Optional[Dict[str, Any]]:
154
+ """
155
+ Get statistics for a specific tool.
156
+
157
+ Args:
158
+ tool_name: Name of the tool
159
+
160
+ Returns:
161
+ Dictionary with tool statistics or None if not found
162
+ """
163
+ with self._stats_lock:
164
+ if tool_name not in self._tool_stats:
165
+ return None
166
+
167
+ stats = self._tool_stats[tool_name]
168
+ avg_duration = (
169
+ stats.total_duration / stats.calls
170
+ if stats.calls > 0 else 0.0
171
+ )
172
+
173
+ return {
174
+ "calls": stats.calls,
175
+ "errors": stats.errors,
176
+ "error_rate": stats.errors / stats.calls if stats.calls > 0 else 0.0,
177
+ "total_duration_seconds": stats.total_duration,
178
+ "average_duration_seconds": avg_duration,
179
+ "last_call_time": stats.last_call_time,
180
+ "last_error_time": stats.last_error_time,
181
+ }
182
+
183
+ def get_stats(self) -> Dict[str, Any]:
184
+ """
185
+ Get all statistics as a JSON-serializable dictionary.
186
+
187
+ Returns:
188
+ Dictionary containing all collected statistics
189
+ """
190
+ with self._stats_lock:
191
+ # Calculate tool-level stats
192
+ tool_stats_dict = {}
193
+ for tool_name, stats in self._tool_stats.items():
194
+ avg_duration = (
195
+ stats.total_duration / stats.calls
196
+ if stats.calls > 0 else 0.0
197
+ )
198
+ tool_stats_dict[tool_name] = {
199
+ "calls": stats.calls,
200
+ "errors": stats.errors,
201
+ "error_rate": stats.errors / stats.calls if stats.calls > 0 else 0.0,
202
+ "average_duration_seconds": round(avg_duration, 4),
203
+ }
204
+
205
+ # Sort tools by call count (descending)
206
+ sorted_tools = dict(
207
+ sorted(
208
+ tool_stats_dict.items(),
209
+ key=lambda x: x[1]["calls"],
210
+ reverse=True
211
+ )
212
+ )
213
+
214
+ return {
215
+ "uptime_seconds": round(self.uptime, 2),
216
+ "tool_calls_total": self._tool_calls_total,
217
+ "tool_errors_total": self._tool_errors_total,
218
+ "tool_error_rate": (
219
+ self._tool_errors_total / self._tool_calls_total
220
+ if self._tool_calls_total > 0 else 0.0
221
+ ),
222
+ "http_requests_total": self._http_requests_total,
223
+ "http_requests_by_endpoint": dict(self._http_requests_by_endpoint),
224
+ "http_requests_by_method": dict(self._http_requests_by_method),
225
+ "tool_calls_by_name": sorted_tools,
226
+ "unique_tools_called": len(self._tool_stats),
227
+ }
228
+
229
+ def reset(self) -> None:
230
+ """Reset all statistics (useful for testing)."""
231
+ with self._stats_lock:
232
+ self._start_time = time.time()
233
+ self._tool_calls_total = 0
234
+ self._tool_errors_total = 0
235
+ self._http_requests_total = 0
236
+ self._tool_stats.clear()
237
+ self._http_requests_by_endpoint.clear()
238
+ self._http_requests_by_method.clear()
239
+
240
+
241
+ # Module-level singleton accessor
242
+ _stats_collector: Optional[StatsCollector] = None
243
+
244
+
245
+ def get_stats_collector() -> StatsCollector:
246
+ """
247
+ Get the singleton StatsCollector instance.
248
+
249
+ Returns:
250
+ The global StatsCollector instance
251
+ """
252
+ global _stats_collector
253
+ if _stats_collector is None:
254
+ _stats_collector = StatsCollector()
255
+ return _stats_collector
@@ -0,0 +1,335 @@
1
+ """
2
+ OpenTelemetry tracing for kubectl-mcp-server.
3
+
4
+ Provides distributed tracing with OTLP export for production observability.
5
+
6
+ Environment Variables:
7
+ OTEL_EXPORTER_OTLP_ENDPOINT: OTLP endpoint URL (e.g., http://localhost:4317)
8
+ OTEL_EXPORTER_OTLP_HEADERS: Optional headers for OTLP exporter
9
+ OTEL_TRACES_SAMPLER: Sampler type (always_on, always_off, traceidratio, parentbased_always_on)
10
+ OTEL_TRACES_SAMPLER_ARG: Sampler argument (e.g., 0.5 for 50% sampling)
11
+ OTEL_SERVICE_NAME: Service name (default: kubectl-mcp-server)
12
+ OTEL_RESOURCE_ATTRIBUTES: Additional resource attributes
13
+
14
+ Requires: opentelemetry-api, opentelemetry-sdk, opentelemetry-exporter-otlp (optional dependencies)
15
+ """
16
+
17
+ import os
18
+ import logging
19
+ from contextlib import contextmanager
20
+ from typing import Optional, Generator, Any, Dict
21
+
22
+ logger = logging.getLogger(__name__)
23
+
24
+ # Check if OpenTelemetry is available
25
+ _otel_available = False
26
+ _tracer = None
27
+ _tracer_provider = None
28
+
29
+ try:
30
+ from opentelemetry import trace
31
+ from opentelemetry.sdk.trace import TracerProvider, Span
32
+ from opentelemetry.sdk.trace.export import (
33
+ BatchSpanProcessor,
34
+ ConsoleSpanExporter,
35
+ )
36
+ from opentelemetry.sdk.resources import Resource, SERVICE_NAME
37
+ from opentelemetry.trace import Status, StatusCode
38
+ from opentelemetry.trace.propagation.tracecontext import TraceContextTextMapPropagator
39
+
40
+ _otel_available = True
41
+ logger.debug("OpenTelemetry tracing modules available")
42
+
43
+ except ImportError:
44
+ logger.debug(
45
+ "OpenTelemetry not installed. Tracing disabled. "
46
+ "Install with: pip install kubectl-mcp-server[observability]"
47
+ )
48
+
49
+
50
+ def is_tracing_available() -> bool:
51
+ """Check if OpenTelemetry tracing is available."""
52
+ return _otel_available
53
+
54
+
55
+ def _get_sampler():
56
+ """
57
+ Get the configured sampler based on environment variables.
58
+
59
+ Supports:
60
+ - always_on: Always sample
61
+ - always_off: Never sample
62
+ - traceidratio: Sample based on ratio (OTEL_TRACES_SAMPLER_ARG)
63
+ - parentbased_always_on: Parent-based with always_on default
64
+ """
65
+ if not _otel_available:
66
+ return None
67
+
68
+ from opentelemetry.sdk.trace.sampling import (
69
+ ALWAYS_ON,
70
+ ALWAYS_OFF,
71
+ TraceIdRatioBased,
72
+ ParentBasedTraceIdRatio,
73
+ )
74
+
75
+ sampler_type = os.environ.get("OTEL_TRACES_SAMPLER", "parentbased_always_on").lower()
76
+ sampler_arg = os.environ.get("OTEL_TRACES_SAMPLER_ARG", "1.0")
77
+
78
+ try:
79
+ ratio = float(sampler_arg)
80
+ except ValueError:
81
+ ratio = 1.0
82
+ logger.warning(f"Invalid OTEL_TRACES_SAMPLER_ARG: {sampler_arg}, using 1.0")
83
+
84
+ if sampler_type == "always_on":
85
+ return ALWAYS_ON
86
+ elif sampler_type == "always_off":
87
+ return ALWAYS_OFF
88
+ elif sampler_type == "traceidratio":
89
+ return TraceIdRatioBased(ratio)
90
+ elif sampler_type in ("parentbased_always_on", "parentbased_traceidratio"):
91
+ return ParentBasedTraceIdRatio(ratio)
92
+ else:
93
+ logger.warning(f"Unknown sampler type: {sampler_type}, using parentbased_always_on")
94
+ return ParentBasedTraceIdRatio(ratio)
95
+
96
+
97
+ def init_tracing(
98
+ service_name: Optional[str] = None,
99
+ service_version: Optional[str] = None,
100
+ ) -> bool:
101
+ """
102
+ Initialize OpenTelemetry tracing.
103
+
104
+ Args:
105
+ service_name: Service name (default from OTEL_SERVICE_NAME or kubectl-mcp-server)
106
+ service_version: Service version (default from package version)
107
+
108
+ Returns:
109
+ True if tracing was initialized, False otherwise
110
+ """
111
+ global _tracer, _tracer_provider
112
+
113
+ if not _otel_available:
114
+ logger.debug("OpenTelemetry not available, skipping tracing init")
115
+ return False
116
+
117
+ # Already initialized
118
+ if _tracer is not None:
119
+ return True
120
+
121
+ try:
122
+ from opentelemetry import trace
123
+ from opentelemetry.sdk.trace import TracerProvider
124
+ from opentelemetry.sdk.trace.export import BatchSpanProcessor
125
+ from opentelemetry.sdk.resources import Resource, SERVICE_NAME
126
+
127
+ # Get service name
128
+ if service_name is None:
129
+ service_name = os.environ.get("OTEL_SERVICE_NAME", "kubectl-mcp-server")
130
+
131
+ # Get service version
132
+ if service_version is None:
133
+ try:
134
+ from kubectl_mcp_tool import __version__
135
+ service_version = __version__
136
+ except ImportError:
137
+ service_version = "unknown"
138
+
139
+ # Parse additional resource attributes
140
+ resource_attrs = {
141
+ SERVICE_NAME: service_name,
142
+ "service.version": service_version,
143
+ }
144
+
145
+ # Add custom attributes from environment
146
+ custom_attrs = os.environ.get("OTEL_RESOURCE_ATTRIBUTES", "")
147
+ if custom_attrs:
148
+ for attr in custom_attrs.split(","):
149
+ if "=" in attr:
150
+ key, value = attr.split("=", 1)
151
+ resource_attrs[key.strip()] = value.strip()
152
+
153
+ # Create resource
154
+ resource = Resource.create(resource_attrs)
155
+
156
+ # Create tracer provider with sampler
157
+ sampler = _get_sampler()
158
+ _tracer_provider = TracerProvider(resource=resource, sampler=sampler)
159
+
160
+ # Add exporter based on environment
161
+ otlp_endpoint = os.environ.get("OTEL_EXPORTER_OTLP_ENDPOINT")
162
+
163
+ if otlp_endpoint:
164
+ # Use OTLP exporter
165
+ try:
166
+ from opentelemetry.exporter.otlp.proto.grpc.trace_exporter import OTLPSpanExporter
167
+
168
+ otlp_headers = os.environ.get("OTEL_EXPORTER_OTLP_HEADERS", "")
169
+ headers_dict = {}
170
+ if otlp_headers:
171
+ for header in otlp_headers.split(","):
172
+ if "=" in header:
173
+ key, value = header.split("=", 1)
174
+ headers_dict[key.strip()] = value.strip()
175
+
176
+ exporter = OTLPSpanExporter(
177
+ endpoint=otlp_endpoint,
178
+ headers=headers_dict if headers_dict else None,
179
+ )
180
+ _tracer_provider.add_span_processor(BatchSpanProcessor(exporter))
181
+ logger.info(f"OpenTelemetry OTLP exporter configured: {otlp_endpoint}")
182
+
183
+ except ImportError:
184
+ # Try HTTP exporter as fallback
185
+ try:
186
+ from opentelemetry.exporter.otlp.proto.http.trace_exporter import OTLPSpanExporter as HTTPOTLPSpanExporter
187
+
188
+ exporter = HTTPOTLPSpanExporter(endpoint=f"{otlp_endpoint}/v1/traces")
189
+ _tracer_provider.add_span_processor(BatchSpanProcessor(exporter))
190
+ logger.info(f"OpenTelemetry HTTP OTLP exporter configured: {otlp_endpoint}")
191
+
192
+ except ImportError:
193
+ logger.warning(
194
+ "OTLP exporter not available. "
195
+ "Install with: pip install opentelemetry-exporter-otlp"
196
+ )
197
+ # Fall back to console exporter for debugging
198
+ from opentelemetry.sdk.trace.export import ConsoleSpanExporter
199
+ _tracer_provider.add_span_processor(BatchSpanProcessor(ConsoleSpanExporter()))
200
+ logger.info("Using console span exporter (OTLP exporter not available)")
201
+
202
+ elif os.environ.get("OTEL_TRACES_EXPORTER") == "console":
203
+ # Explicitly use console exporter
204
+ from opentelemetry.sdk.trace.export import ConsoleSpanExporter
205
+ _tracer_provider.add_span_processor(BatchSpanProcessor(ConsoleSpanExporter()))
206
+ logger.info("Using console span exporter")
207
+ else:
208
+ # No exporter configured, log a message
209
+ logger.debug(
210
+ "No OTEL_EXPORTER_OTLP_ENDPOINT set, tracing spans will not be exported. "
211
+ "Set OTEL_TRACES_EXPORTER=console for debug output."
212
+ )
213
+
214
+ # Set the global tracer provider
215
+ trace.set_tracer_provider(_tracer_provider)
216
+
217
+ # Create tracer
218
+ _tracer = trace.get_tracer(
219
+ "kubectl-mcp-server",
220
+ service_version,
221
+ )
222
+
223
+ logger.info(f"OpenTelemetry tracing initialized for {service_name} v{service_version}")
224
+ return True
225
+
226
+ except Exception as e:
227
+ logger.error(f"Failed to initialize OpenTelemetry tracing: {e}")
228
+ return False
229
+
230
+
231
+ def get_tracer():
232
+ """
233
+ Get the OpenTelemetry tracer.
234
+
235
+ Returns:
236
+ The tracer instance, or None if not initialized
237
+ """
238
+ return _tracer
239
+
240
+
241
+ def shutdown_tracing() -> None:
242
+ """Shutdown the tracer provider and flush any pending spans."""
243
+ global _tracer, _tracer_provider
244
+
245
+ if _tracer_provider is not None:
246
+ try:
247
+ _tracer_provider.shutdown()
248
+ logger.debug("OpenTelemetry tracing shut down")
249
+ except Exception as e:
250
+ logger.error(f"Error shutting down tracing: {e}")
251
+
252
+ _tracer = None
253
+ _tracer_provider = None
254
+
255
+
256
+ @contextmanager
257
+ def traced_tool_call(
258
+ tool_name: str,
259
+ attributes: Optional[Dict[str, Any]] = None,
260
+ ) -> Generator[Any, None, None]:
261
+ """
262
+ Context manager for tracing a tool call.
263
+
264
+ Creates a span for the tool call and records attributes and errors.
265
+
266
+ Args:
267
+ tool_name: Name of the tool being called
268
+ attributes: Optional additional span attributes
269
+
270
+ Yields:
271
+ The span object (or a no-op if tracing is disabled)
272
+
273
+ Example:
274
+ with traced_tool_call("get_pods", {"namespace": "default"}) as span:
275
+ result = await get_pods(namespace="default")
276
+ span.set_attribute("pod_count", len(result))
277
+ """
278
+ if not _otel_available or _tracer is None:
279
+ # Return a no-op context
280
+ yield None
281
+ return
282
+
283
+ from opentelemetry.trace import Status, StatusCode
284
+
285
+ with _tracer.start_as_current_span(
286
+ f"mcp.tool.{tool_name}",
287
+ kind=trace.SpanKind.INTERNAL,
288
+ ) as span:
289
+ # Set base attributes
290
+ span.set_attribute("mcp.tool.name", tool_name)
291
+
292
+ # Set additional attributes
293
+ if attributes:
294
+ for key, value in attributes.items():
295
+ if isinstance(value, (str, int, float, bool)):
296
+ span.set_attribute(f"mcp.tool.{key}", value)
297
+
298
+ try:
299
+ yield span
300
+ span.set_status(Status(StatusCode.OK))
301
+ except Exception as e:
302
+ span.set_status(Status(StatusCode.ERROR, str(e)))
303
+ span.record_exception(e)
304
+ raise
305
+
306
+
307
+ def add_span_attribute(key: str, value: Any) -> None:
308
+ """
309
+ Add an attribute to the current span.
310
+
311
+ Args:
312
+ key: Attribute key
313
+ value: Attribute value (must be str, int, float, or bool)
314
+ """
315
+ if not _otel_available:
316
+ return
317
+
318
+ span = trace.get_current_span()
319
+ if span is not None and isinstance(value, (str, int, float, bool)):
320
+ span.set_attribute(key, value)
321
+
322
+
323
+ def record_span_exception(exception: Exception) -> None:
324
+ """
325
+ Record an exception on the current span.
326
+
327
+ Args:
328
+ exception: The exception to record
329
+ """
330
+ if not _otel_available:
331
+ return
332
+
333
+ span = trace.get_current_span()
334
+ if span is not None:
335
+ span.record_exception(exception)
@@ -1,5 +1,48 @@
1
1
  from .prompts import register_prompts
2
+ from .custom import (
3
+ CustomPrompt,
4
+ PromptArgument,
5
+ PromptMessage,
6
+ render_prompt,
7
+ load_prompts_from_config,
8
+ load_prompts_from_toml_file,
9
+ validate_prompt_args,
10
+ apply_defaults,
11
+ get_prompt_schema,
12
+ )
13
+ from .builtin import (
14
+ BUILTIN_PROMPTS,
15
+ get_builtin_prompts,
16
+ get_builtin_prompt_by_name,
17
+ CLUSTER_HEALTH_CHECK,
18
+ DEBUG_WORKLOAD,
19
+ RESOURCE_USAGE,
20
+ SECURITY_POSTURE,
21
+ DEPLOYMENT_CHECKLIST,
22
+ INCIDENT_RESPONSE,
23
+ )
2
24
 
3
25
  __all__ = [
26
+ # Main registration function
4
27
  "register_prompts",
28
+ # Custom prompt types and functions
29
+ "CustomPrompt",
30
+ "PromptArgument",
31
+ "PromptMessage",
32
+ "render_prompt",
33
+ "load_prompts_from_config",
34
+ "load_prompts_from_toml_file",
35
+ "validate_prompt_args",
36
+ "apply_defaults",
37
+ "get_prompt_schema",
38
+ # Built-in prompts
39
+ "BUILTIN_PROMPTS",
40
+ "get_builtin_prompts",
41
+ "get_builtin_prompt_by_name",
42
+ "CLUSTER_HEALTH_CHECK",
43
+ "DEBUG_WORKLOAD",
44
+ "RESOURCE_USAGE",
45
+ "SECURITY_POSTURE",
46
+ "DEPLOYMENT_CHECKLIST",
47
+ "INCIDENT_RESPONSE",
5
48
  ]