@rishibhushan/jenkins-mcp-server 1.0.7 → 1.1.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +188 -10
- package/package.json +1 -1
- package/requirements.txt +1 -0
- package/src/jenkins_mcp_server/__init__.py +19 -11
- package/src/jenkins_mcp_server/cache.py +310 -0
- package/src/jenkins_mcp_server/config.py +54 -1
- package/src/jenkins_mcp_server/jenkins_client.py +69 -80
- package/src/jenkins_mcp_server/metrics.py +358 -0
- package/src/jenkins_mcp_server/server.py +1015 -108
|
@@ -5,6 +5,8 @@ Handles loading Jenkins connection settings from multiple sources:
|
|
|
5
5
|
1. VS Code settings.json (highest priority)
|
|
6
6
|
2. Environment variables / .env file
|
|
7
7
|
3. Direct instantiation with parameters
|
|
8
|
+
|
|
9
|
+
Enhanced with timeout configuration support.
|
|
8
10
|
"""
|
|
9
11
|
|
|
10
12
|
import json
|
|
@@ -50,6 +52,50 @@ class JenkinsSettings(BaseSettings):
|
|
|
50
52
|
description="Jenkins API token (preferred over password)"
|
|
51
53
|
)
|
|
52
54
|
|
|
55
|
+
# Timeout settings (High Priority Issue #4)
|
|
56
|
+
timeout: int = Field(
|
|
57
|
+
default=30,
|
|
58
|
+
description="Default timeout for Jenkins API calls in seconds",
|
|
59
|
+
ge=5,
|
|
60
|
+
le=300
|
|
61
|
+
)
|
|
62
|
+
|
|
63
|
+
connect_timeout: int = Field(
|
|
64
|
+
default=10,
|
|
65
|
+
description="Connection timeout in seconds",
|
|
66
|
+
ge=2,
|
|
67
|
+
le=60
|
|
68
|
+
)
|
|
69
|
+
|
|
70
|
+
read_timeout: int = Field(
|
|
71
|
+
default=30,
|
|
72
|
+
description="Read timeout for API responses in seconds",
|
|
73
|
+
ge=5,
|
|
74
|
+
le=300
|
|
75
|
+
)
|
|
76
|
+
|
|
77
|
+
# Retry settings
|
|
78
|
+
max_retries: int = Field(
|
|
79
|
+
default=3,
|
|
80
|
+
description="Maximum number of retry attempts for failed requests",
|
|
81
|
+
ge=0,
|
|
82
|
+
le=10
|
|
83
|
+
)
|
|
84
|
+
|
|
85
|
+
# Console output settings (High Priority Issue #5)
|
|
86
|
+
console_max_lines: int = Field(
|
|
87
|
+
default=1000,
|
|
88
|
+
description="Default maximum lines to return from console output",
|
|
89
|
+
ge=10,
|
|
90
|
+
le=50000
|
|
91
|
+
)
|
|
92
|
+
|
|
93
|
+
# SSL verification
|
|
94
|
+
verify_ssl: bool = Field(
|
|
95
|
+
default=True,
|
|
96
|
+
description="Whether to verify SSL certificates"
|
|
97
|
+
)
|
|
98
|
+
|
|
53
99
|
model_config = SettingsConfigDict(
|
|
54
100
|
env_prefix="JENKINS_",
|
|
55
101
|
env_file_encoding="utf-8",
|
|
@@ -92,6 +138,12 @@ class JenkinsSettings(BaseSettings):
|
|
|
92
138
|
logger.info("Jenkins Configuration:")
|
|
93
139
|
logger.info(f" URL: {self.url or 'Not configured'}")
|
|
94
140
|
logger.info(f" Username: {self.username or 'Not configured'}")
|
|
141
|
+
logger.info(f" Timeout: {self.timeout}s")
|
|
142
|
+
logger.info(f" Connect Timeout: {self.connect_timeout}s")
|
|
143
|
+
logger.info(f" Read Timeout: {self.read_timeout}s")
|
|
144
|
+
logger.info(f" Max Retries: {self.max_retries}")
|
|
145
|
+
logger.info(f" Console Max Lines: {self.console_max_lines}")
|
|
146
|
+
logger.info(f" Verify SSL: {self.verify_ssl}")
|
|
95
147
|
|
|
96
148
|
if hide_sensitive:
|
|
97
149
|
logger.info(f" Authentication: {self.auth_method}")
|
|
@@ -244,7 +296,8 @@ def load_settings(
|
|
|
244
296
|
print(f"=== VS Code settings loaded: {vscode_settings is not None} ===", file=sys.stderr, flush=True)
|
|
245
297
|
if vscode_settings:
|
|
246
298
|
# Merge VS Code settings into our settings object
|
|
247
|
-
for key in ['url', 'username', 'password', 'token'
|
|
299
|
+
for key in ['url', 'username', 'password', 'token', 'timeout', 'connect_timeout',
|
|
300
|
+
'read_timeout', 'max_retries', 'console_max_lines', 'verify_ssl']:
|
|
248
301
|
vscode_value = vscode_settings.get(key)
|
|
249
302
|
if vscode_value is not None:
|
|
250
303
|
setattr(settings, key, vscode_value)
|
|
@@ -3,6 +3,8 @@ Jenkins MCP Server Client Module
|
|
|
3
3
|
|
|
4
4
|
Provides a clean interface to Jenkins API operations with automatic fallback
|
|
5
5
|
between python-jenkins library and direct REST API calls.
|
|
6
|
+
|
|
7
|
+
Enhanced with configurable timeout support.
|
|
6
8
|
"""
|
|
7
9
|
|
|
8
10
|
import logging
|
|
@@ -34,7 +36,8 @@ class JenkinsClient:
|
|
|
34
36
|
Client for interacting with Jenkins API.
|
|
35
37
|
|
|
36
38
|
Supports both python-jenkins library and direct REST API calls
|
|
37
|
-
with automatic fallback for reliability.
|
|
39
|
+
with automatic fallback for reliability. Enhanced with configurable
|
|
40
|
+
timeout support.
|
|
38
41
|
"""
|
|
39
42
|
|
|
40
43
|
def __init__(self, settings: Optional[JenkinsSettings] = None):
|
|
@@ -60,6 +63,12 @@ class JenkinsClient:
|
|
|
60
63
|
username, auth_value = self.settings.get_credentials()
|
|
61
64
|
self.auth = HTTPBasicAuth(username, auth_value)
|
|
62
65
|
|
|
66
|
+
# Store timeout settings (High Priority Issue #4)
|
|
67
|
+
self.timeout = self.settings.timeout
|
|
68
|
+
self.connect_timeout = self.settings.connect_timeout
|
|
69
|
+
self.read_timeout = self.settings.read_timeout
|
|
70
|
+
self.verify_ssl = self.settings.verify_ssl
|
|
71
|
+
|
|
63
72
|
# Cache for python-jenkins server instance
|
|
64
73
|
self._server: Optional[jenkins.Jenkins] = None
|
|
65
74
|
|
|
@@ -67,14 +76,14 @@ class JenkinsClient:
|
|
|
67
76
|
self._test_connection()
|
|
68
77
|
|
|
69
78
|
def _test_connection(self) -> None:
|
|
70
|
-
"""Test connection to Jenkins server (with
|
|
79
|
+
"""Test connection to Jenkins server (with configurable timeout)"""
|
|
71
80
|
try:
|
|
72
|
-
# Quick connection test with
|
|
81
|
+
# Quick connection test with configured timeout for MCP compatibility
|
|
73
82
|
response = requests.get(
|
|
74
83
|
f"{self.base_url}/api/json",
|
|
75
84
|
auth=self.auth,
|
|
76
|
-
verify=False,
|
|
77
|
-
timeout=
|
|
85
|
+
verify=self.verify_ssl if self.verify_ssl else False,
|
|
86
|
+
timeout=self.connect_timeout # Use configured connect timeout
|
|
78
87
|
)
|
|
79
88
|
response.raise_for_status()
|
|
80
89
|
|
|
@@ -83,7 +92,7 @@ class JenkinsClient:
|
|
|
83
92
|
logger.debug(f"Jenkins version: {data.get('_class', 'unknown')}")
|
|
84
93
|
|
|
85
94
|
except requests.Timeout:
|
|
86
|
-
logger.warning(f"Connection to Jenkins timed out (server may be slow)")
|
|
95
|
+
logger.warning(f"Connection to Jenkins timed out after {self.connect_timeout}s (server may be slow)")
|
|
87
96
|
# Don't fail - let actual operations fail if there's a real problem
|
|
88
97
|
except requests.RequestException as e:
|
|
89
98
|
logger.warning(f"Could not verify Jenkins connection: {e}")
|
|
@@ -91,19 +100,20 @@ class JenkinsClient:
|
|
|
91
100
|
|
|
92
101
|
@property
|
|
93
102
|
def server(self) -> jenkins.Jenkins:
|
|
94
|
-
"""Get or create python-jenkins server instance (lazy initialization)"""
|
|
103
|
+
"""Get or create python-jenkins server instance (lazy initialization with timeout)"""
|
|
95
104
|
if self._server is None:
|
|
96
105
|
username, password = self.settings.get_credentials()
|
|
97
106
|
self._server = jenkins.Jenkins(
|
|
98
107
|
self.base_url,
|
|
99
108
|
username=username,
|
|
100
|
-
password=password
|
|
109
|
+
password=password,
|
|
110
|
+
timeout=self.timeout # Use configured timeout
|
|
101
111
|
)
|
|
102
112
|
return self._server
|
|
103
113
|
|
|
104
114
|
def _api_call(self, method: str, endpoint: str, **kwargs) -> requests.Response:
|
|
105
115
|
"""
|
|
106
|
-
Make a direct REST API call to Jenkins.
|
|
116
|
+
Make a direct REST API call to Jenkins with configured timeout.
|
|
107
117
|
|
|
108
118
|
Args:
|
|
109
119
|
method: HTTP method (GET, POST, etc.)
|
|
@@ -115,8 +125,11 @@ class JenkinsClient:
|
|
|
115
125
|
"""
|
|
116
126
|
url = f"{self.base_url}{endpoint}"
|
|
117
127
|
kwargs.setdefault('auth', self.auth)
|
|
118
|
-
kwargs.setdefault('verify', False)
|
|
119
|
-
|
|
128
|
+
kwargs.setdefault('verify', self.verify_ssl if self.verify_ssl else False)
|
|
129
|
+
|
|
130
|
+
# Use configured timeout (can be overridden per call)
|
|
131
|
+
if 'timeout' not in kwargs:
|
|
132
|
+
kwargs['timeout'] = (self.connect_timeout, self.read_timeout)
|
|
120
133
|
|
|
121
134
|
response = requests.request(method, url, **kwargs)
|
|
122
135
|
response.raise_for_status()
|
|
@@ -220,82 +233,46 @@ class JenkinsClient:
|
|
|
220
233
|
Returns:
|
|
221
234
|
Dict with 'queue_id' and 'build_number' (if wait_for_start=True)
|
|
222
235
|
"""
|
|
223
|
-
|
|
224
|
-
|
|
225
|
-
queue_id = self.server.build_job(job_name, parameters or {})
|
|
226
|
-
queue_id = int(queue_id) if queue_id else None
|
|
227
|
-
|
|
228
|
-
build_number = None
|
|
229
|
-
if wait_for_start and queue_id:
|
|
230
|
-
build_number = self._wait_for_build_start(
|
|
231
|
-
queue_id, timeout, poll_interval
|
|
232
|
-
)
|
|
233
|
-
|
|
234
|
-
return {"queue_id": queue_id, "build_number": build_number}
|
|
236
|
+
# Get the last build number before triggering
|
|
237
|
+
last_build_num = self.get_last_build_number(job_name) or 0
|
|
235
238
|
|
|
236
|
-
|
|
237
|
-
|
|
238
|
-
|
|
239
|
-
|
|
239
|
+
# Trigger the build
|
|
240
|
+
if parameters:
|
|
241
|
+
self._api_call(
|
|
242
|
+
'POST',
|
|
243
|
+
f'/job/{job_name}/buildWithParameters',
|
|
244
|
+
params=parameters
|
|
240
245
|
)
|
|
246
|
+
else:
|
|
247
|
+
self._api_call('POST', f'/job/{job_name}/build')
|
|
241
248
|
|
|
242
|
-
|
|
243
|
-
|
|
244
|
-
job_name
|
|
245
|
-
|
|
246
|
-
wait_for_start: bool,
|
|
247
|
-
timeout: int,
|
|
248
|
-
poll_interval: float
|
|
249
|
-
) -> Dict[str, Optional[int]]:
|
|
250
|
-
"""Build job using REST API (fallback method)"""
|
|
251
|
-
endpoint = f'/job/{job_name}/buildWithParameters' if parameters else f'/job/{job_name}/build'
|
|
252
|
-
|
|
253
|
-
response = self._api_call('POST', endpoint, params=parameters)
|
|
249
|
+
# Get queue ID from response
|
|
250
|
+
queue_id = self._extract_queue_id_from_location(
|
|
251
|
+
f'/job/{job_name}/build'
|
|
252
|
+
)
|
|
254
253
|
|
|
255
|
-
|
|
256
|
-
|
|
257
|
-
|
|
254
|
+
result = {
|
|
255
|
+
'queue_id': queue_id,
|
|
256
|
+
'build_number': None
|
|
257
|
+
}
|
|
258
258
|
|
|
259
|
-
|
|
260
|
-
if wait_for_start
|
|
261
|
-
|
|
259
|
+
# Optionally wait for build to start
|
|
260
|
+
if wait_for_start:
|
|
261
|
+
start_time = time.time()
|
|
262
|
+
while time.time() - start_time < timeout:
|
|
263
|
+
time.sleep(poll_interval)
|
|
262
264
|
|
|
263
|
-
|
|
265
|
+
# Check if a new build has started
|
|
266
|
+
current_build_num = self.get_last_build_number(job_name)
|
|
267
|
+
if current_build_num and current_build_num > last_build_num:
|
|
268
|
+
result['build_number'] = current_build_num
|
|
269
|
+
logger.info(f"Build {job_name} #{current_build_num} started")
|
|
270
|
+
break
|
|
264
271
|
|
|
265
|
-
|
|
266
|
-
self,
|
|
267
|
-
queue_id: int,
|
|
268
|
-
timeout: int,
|
|
269
|
-
poll_interval: float
|
|
270
|
-
) -> Optional[int]:
|
|
271
|
-
"""Wait for a queued build to start and return its build number"""
|
|
272
|
-
elapsed = 0.0
|
|
273
|
-
|
|
274
|
-
while elapsed < timeout:
|
|
275
|
-
try:
|
|
276
|
-
# Try python-jenkins first
|
|
277
|
-
item = self.server.get_queue_item(queue_id)
|
|
278
|
-
if item and item.get('executable'):
|
|
279
|
-
return int(item['executable']['number'])
|
|
280
|
-
except Exception:
|
|
281
|
-
# Fall back to REST API
|
|
282
|
-
try:
|
|
283
|
-
response = self._api_call('GET', f'/queue/item/{queue_id}/api/json')
|
|
284
|
-
item = response.json()
|
|
285
|
-
if item.get('executable'):
|
|
286
|
-
return int(item['executable']['number'])
|
|
287
|
-
except Exception:
|
|
288
|
-
pass
|
|
289
|
-
|
|
290
|
-
time.sleep(poll_interval)
|
|
291
|
-
elapsed += poll_interval
|
|
292
|
-
|
|
293
|
-
logger.warning(f"Timeout waiting for build {queue_id} to start")
|
|
294
|
-
return None
|
|
272
|
+
return result
|
|
295
273
|
|
|
296
|
-
|
|
297
|
-
|
|
298
|
-
"""Extract queue ID from Jenkins Location header"""
|
|
274
|
+
def _extract_queue_id_from_location(self, location: str) -> Optional[int]:
|
|
275
|
+
"""Extract queue ID from Location header"""
|
|
299
276
|
if not location:
|
|
300
277
|
return None
|
|
301
278
|
|
|
@@ -480,6 +457,18 @@ class JenkinsClient:
|
|
|
480
457
|
response = self._api_call('GET', f'/computer/{node_name}/api/json')
|
|
481
458
|
return response.json()
|
|
482
459
|
|
|
460
|
+
# ==================== Additional Helper Methods ====================
|
|
461
|
+
|
|
462
|
+
def get_whoami(self) -> Dict[str, Any]:
|
|
463
|
+
"""Get information about the current authenticated user"""
|
|
464
|
+
response = self._api_call('GET', '/me/api/json')
|
|
465
|
+
return response.json()
|
|
466
|
+
|
|
467
|
+
def get_version(self) -> str:
|
|
468
|
+
"""Get Jenkins version"""
|
|
469
|
+
response = self._api_call('GET', '/api/json')
|
|
470
|
+
return response.headers.get('X-Jenkins', 'Unknown')
|
|
471
|
+
|
|
483
472
|
|
|
484
473
|
# ==================== Client Factory ====================
|
|
485
474
|
|
|
@@ -515,4 +504,4 @@ def get_jenkins_client(settings: Optional[JenkinsSettings] = None) -> JenkinsCli
|
|
|
515
504
|
def reset_default_client() -> None:
|
|
516
505
|
"""Reset the default client (useful for testing or reconfiguration)"""
|
|
517
506
|
global _default_client
|
|
518
|
-
_default_client = None
|
|
507
|
+
_default_client = None
|
|
@@ -0,0 +1,358 @@
|
|
|
1
|
+
"""
|
|
2
|
+
Metrics and Telemetry Module for Jenkins MCP Server
|
|
3
|
+
|
|
4
|
+
Tracks tool usage, performance, and errors for monitoring
|
|
5
|
+
and optimization purposes.
|
|
6
|
+
"""
|
|
7
|
+
|
|
8
|
+
import asyncio
|
|
9
|
+
import logging
|
|
10
|
+
import time
|
|
11
|
+
from collections import defaultdict
|
|
12
|
+
from dataclasses import dataclass, field
|
|
13
|
+
from datetime import datetime
|
|
14
|
+
from typing import Any, Dict, List, Optional
|
|
15
|
+
|
|
16
|
+
logger = logging.getLogger(__name__)
|
|
17
|
+
|
|
18
|
+
|
|
19
|
+
@dataclass
|
|
20
|
+
class ToolMetric:
|
|
21
|
+
"""Single tool execution metric"""
|
|
22
|
+
tool_name: str
|
|
23
|
+
execution_time_ms: float
|
|
24
|
+
success: bool
|
|
25
|
+
error_message: Optional[str] = None
|
|
26
|
+
timestamp: datetime = field(default_factory=datetime.now)
|
|
27
|
+
args: Optional[Dict[str, Any]] = None
|
|
28
|
+
|
|
29
|
+
def to_dict(self) -> Dict[str, Any]:
|
|
30
|
+
"""Convert to dictionary"""
|
|
31
|
+
return {
|
|
32
|
+
"tool_name": self.tool_name,
|
|
33
|
+
"execution_time_ms": round(self.execution_time_ms, 2),
|
|
34
|
+
"success": self.success,
|
|
35
|
+
"error_message": self.error_message,
|
|
36
|
+
"timestamp": self.timestamp.isoformat(),
|
|
37
|
+
"args": self.args
|
|
38
|
+
}
|
|
39
|
+
|
|
40
|
+
|
|
41
|
+
@dataclass
|
|
42
|
+
class ToolStats:
|
|
43
|
+
"""Aggregated statistics for a tool"""
|
|
44
|
+
total_calls: int = 0
|
|
45
|
+
successful_calls: int = 0
|
|
46
|
+
failed_calls: int = 0
|
|
47
|
+
total_time_ms: float = 0.0
|
|
48
|
+
min_time_ms: float = float('inf')
|
|
49
|
+
max_time_ms: float = 0.0
|
|
50
|
+
|
|
51
|
+
@property
|
|
52
|
+
def avg_time_ms(self) -> float:
|
|
53
|
+
"""Calculate average execution time"""
|
|
54
|
+
if self.total_calls == 0:
|
|
55
|
+
return 0.0
|
|
56
|
+
return self.total_time_ms / self.total_calls
|
|
57
|
+
|
|
58
|
+
@property
|
|
59
|
+
def success_rate(self) -> float:
|
|
60
|
+
"""Calculate success rate percentage"""
|
|
61
|
+
if self.total_calls == 0:
|
|
62
|
+
return 0.0
|
|
63
|
+
return (self.successful_calls / self.total_calls) * 100
|
|
64
|
+
|
|
65
|
+
def add_metric(self, metric: ToolMetric) -> None:
|
|
66
|
+
"""Add a metric to the statistics"""
|
|
67
|
+
self.total_calls += 1
|
|
68
|
+
self.total_time_ms += metric.execution_time_ms
|
|
69
|
+
|
|
70
|
+
if metric.success:
|
|
71
|
+
self.successful_calls += 1
|
|
72
|
+
else:
|
|
73
|
+
self.failed_calls += 1
|
|
74
|
+
|
|
75
|
+
self.min_time_ms = min(self.min_time_ms, metric.execution_time_ms)
|
|
76
|
+
self.max_time_ms = max(self.max_time_ms, metric.execution_time_ms)
|
|
77
|
+
|
|
78
|
+
def to_dict(self) -> Dict[str, Any]:
|
|
79
|
+
"""Convert to dictionary"""
|
|
80
|
+
return {
|
|
81
|
+
"total_calls": self.total_calls,
|
|
82
|
+
"successful_calls": self.successful_calls,
|
|
83
|
+
"failed_calls": self.failed_calls,
|
|
84
|
+
"success_rate_percent": round(self.success_rate, 2),
|
|
85
|
+
"avg_time_ms": round(self.avg_time_ms, 2),
|
|
86
|
+
"min_time_ms": round(self.min_time_ms, 2) if self.min_time_ms != float('inf') else 0,
|
|
87
|
+
"max_time_ms": round(self.max_time_ms, 2),
|
|
88
|
+
"total_time_ms": round(self.total_time_ms, 2)
|
|
89
|
+
}
|
|
90
|
+
|
|
91
|
+
|
|
92
|
+
class MetricsCollector:
|
|
93
|
+
"""
|
|
94
|
+
Collects and aggregates metrics for tool executions.
|
|
95
|
+
|
|
96
|
+
Features:
|
|
97
|
+
- Per-tool statistics
|
|
98
|
+
- Recent execution history
|
|
99
|
+
- Error tracking
|
|
100
|
+
- Performance monitoring
|
|
101
|
+
"""
|
|
102
|
+
|
|
103
|
+
def __init__(self, max_history: int = 1000):
|
|
104
|
+
"""
|
|
105
|
+
Initialize metrics collector.
|
|
106
|
+
|
|
107
|
+
Args:
|
|
108
|
+
max_history: Maximum number of recent metrics to keep
|
|
109
|
+
"""
|
|
110
|
+
self.max_history = max_history
|
|
111
|
+
self._metrics: List[ToolMetric] = []
|
|
112
|
+
self._tool_stats: Dict[str, ToolStats] = defaultdict(ToolStats)
|
|
113
|
+
self._lock = asyncio.Lock()
|
|
114
|
+
self._start_time = datetime.now()
|
|
115
|
+
|
|
116
|
+
logger.info(f"Metrics collector initialized (max_history={max_history})")
|
|
117
|
+
|
|
118
|
+
async def record_execution(
|
|
119
|
+
self,
|
|
120
|
+
tool_name: str,
|
|
121
|
+
execution_time_ms: float,
|
|
122
|
+
success: bool,
|
|
123
|
+
error_message: Optional[str] = None,
|
|
124
|
+
args: Optional[Dict[str, Any]] = None
|
|
125
|
+
) -> None:
|
|
126
|
+
"""
|
|
127
|
+
Record a tool execution.
|
|
128
|
+
|
|
129
|
+
Args:
|
|
130
|
+
tool_name: Name of the tool
|
|
131
|
+
execution_time_ms: Execution time in milliseconds
|
|
132
|
+
success: Whether execution was successful
|
|
133
|
+
error_message: Error message if failed
|
|
134
|
+
args: Tool arguments (optional, for debugging)
|
|
135
|
+
"""
|
|
136
|
+
metric = ToolMetric(
|
|
137
|
+
tool_name=tool_name,
|
|
138
|
+
execution_time_ms=execution_time_ms,
|
|
139
|
+
success=success,
|
|
140
|
+
error_message=error_message,
|
|
141
|
+
args=args
|
|
142
|
+
)
|
|
143
|
+
|
|
144
|
+
async with self._lock:
|
|
145
|
+
# Add to history
|
|
146
|
+
self._metrics.append(metric)
|
|
147
|
+
|
|
148
|
+
# Trim history if needed
|
|
149
|
+
if len(self._metrics) > self.max_history:
|
|
150
|
+
self._metrics = self._metrics[-self.max_history:]
|
|
151
|
+
|
|
152
|
+
# Update aggregated stats
|
|
153
|
+
self._tool_stats[tool_name].add_metric(metric)
|
|
154
|
+
|
|
155
|
+
# Log based on result
|
|
156
|
+
if success:
|
|
157
|
+
logger.debug(f"Metric recorded: {tool_name} completed in {execution_time_ms:.2f}ms")
|
|
158
|
+
else:
|
|
159
|
+
logger.warning(f"Metric recorded: {tool_name} failed after {execution_time_ms:.2f}ms - {error_message}")
|
|
160
|
+
|
|
161
|
+
async def get_tool_stats(self, tool_name: Optional[str] = None) -> Dict[str, Any]:
|
|
162
|
+
"""
|
|
163
|
+
Get statistics for a specific tool or all tools.
|
|
164
|
+
|
|
165
|
+
Args:
|
|
166
|
+
tool_name: Specific tool name, or None for all tools
|
|
167
|
+
|
|
168
|
+
Returns:
|
|
169
|
+
Dictionary with tool statistics
|
|
170
|
+
"""
|
|
171
|
+
async with self._lock:
|
|
172
|
+
if tool_name:
|
|
173
|
+
if tool_name not in self._tool_stats:
|
|
174
|
+
return {
|
|
175
|
+
"tool_name": tool_name,
|
|
176
|
+
"stats": ToolStats().to_dict()
|
|
177
|
+
}
|
|
178
|
+
|
|
179
|
+
return {
|
|
180
|
+
"tool_name": tool_name,
|
|
181
|
+
"stats": self._tool_stats[tool_name].to_dict()
|
|
182
|
+
}
|
|
183
|
+
|
|
184
|
+
# Return all tools
|
|
185
|
+
return {
|
|
186
|
+
tool_name: stats.to_dict()
|
|
187
|
+
for tool_name, stats in self._tool_stats.items()
|
|
188
|
+
}
|
|
189
|
+
|
|
190
|
+
async def get_recent_metrics(self, limit: int = 100) -> List[Dict[str, Any]]:
|
|
191
|
+
"""
|
|
192
|
+
Get recent metrics.
|
|
193
|
+
|
|
194
|
+
Args:
|
|
195
|
+
limit: Maximum number of metrics to return
|
|
196
|
+
|
|
197
|
+
Returns:
|
|
198
|
+
List of recent metrics
|
|
199
|
+
"""
|
|
200
|
+
async with self._lock:
|
|
201
|
+
recent = self._metrics[-limit:]
|
|
202
|
+
return [m.to_dict() for m in recent]
|
|
203
|
+
|
|
204
|
+
async def get_failed_executions(self, limit: int = 50) -> List[Dict[str, Any]]:
|
|
205
|
+
"""
|
|
206
|
+
Get recent failed executions.
|
|
207
|
+
|
|
208
|
+
Args:
|
|
209
|
+
limit: Maximum number of failures to return
|
|
210
|
+
|
|
211
|
+
Returns:
|
|
212
|
+
List of failed execution metrics
|
|
213
|
+
"""
|
|
214
|
+
async with self._lock:
|
|
215
|
+
failures = [m for m in self._metrics if not m.success]
|
|
216
|
+
recent_failures = failures[-limit:]
|
|
217
|
+
return [m.to_dict() for m in recent_failures]
|
|
218
|
+
|
|
219
|
+
async def get_slow_executions(
|
|
220
|
+
self,
|
|
221
|
+
threshold_ms: float = 1000,
|
|
222
|
+
limit: int = 50
|
|
223
|
+
) -> List[Dict[str, Any]]:
|
|
224
|
+
"""
|
|
225
|
+
Get executions that exceeded a time threshold.
|
|
226
|
+
|
|
227
|
+
Args:
|
|
228
|
+
threshold_ms: Threshold in milliseconds
|
|
229
|
+
limit: Maximum number of results
|
|
230
|
+
|
|
231
|
+
Returns:
|
|
232
|
+
List of slow execution metrics
|
|
233
|
+
"""
|
|
234
|
+
async with self._lock:
|
|
235
|
+
slow = [
|
|
236
|
+
m for m in self._metrics
|
|
237
|
+
if m.execution_time_ms > threshold_ms
|
|
238
|
+
]
|
|
239
|
+
recent_slow = slow[-limit:]
|
|
240
|
+
return [m.to_dict() for m in recent_slow]
|
|
241
|
+
|
|
242
|
+
async def get_summary(self) -> Dict[str, Any]:
|
|
243
|
+
"""
|
|
244
|
+
Get overall metrics summary.
|
|
245
|
+
|
|
246
|
+
Returns:
|
|
247
|
+
Dictionary with summary statistics
|
|
248
|
+
"""
|
|
249
|
+
async with self._lock:
|
|
250
|
+
total_executions = len(self._metrics)
|
|
251
|
+
successful = sum(1 for m in self._metrics if m.success)
|
|
252
|
+
failed = total_executions - successful
|
|
253
|
+
|
|
254
|
+
if total_executions > 0:
|
|
255
|
+
avg_time = sum(m.execution_time_ms for m in self._metrics) / total_executions
|
|
256
|
+
success_rate = (successful / total_executions) * 100
|
|
257
|
+
else:
|
|
258
|
+
avg_time = 0.0
|
|
259
|
+
success_rate = 0.0
|
|
260
|
+
|
|
261
|
+
uptime = datetime.now() - self._start_time
|
|
262
|
+
|
|
263
|
+
return {
|
|
264
|
+
"uptime_seconds": uptime.total_seconds(),
|
|
265
|
+
"uptime_human": str(uptime).split('.')[0], # Remove microseconds
|
|
266
|
+
"total_executions": total_executions,
|
|
267
|
+
"successful_executions": successful,
|
|
268
|
+
"failed_executions": failed,
|
|
269
|
+
"success_rate_percent": round(success_rate, 2),
|
|
270
|
+
"avg_execution_time_ms": round(avg_time, 2),
|
|
271
|
+
"unique_tools_used": len(self._tool_stats),
|
|
272
|
+
"most_used_tool": self._get_most_used_tool(),
|
|
273
|
+
"slowest_tool": self._get_slowest_tool()
|
|
274
|
+
}
|
|
275
|
+
|
|
276
|
+
def _get_most_used_tool(self) -> Optional[str]:
|
|
277
|
+
"""Get the most frequently used tool"""
|
|
278
|
+
if not self._tool_stats:
|
|
279
|
+
return None
|
|
280
|
+
|
|
281
|
+
return max(
|
|
282
|
+
self._tool_stats.items(),
|
|
283
|
+
key=lambda x: x[1].total_calls
|
|
284
|
+
)[0]
|
|
285
|
+
|
|
286
|
+
def _get_slowest_tool(self) -> Optional[str]:
|
|
287
|
+
"""Get the tool with highest average execution time"""
|
|
288
|
+
if not self._tool_stats:
|
|
289
|
+
return None
|
|
290
|
+
|
|
291
|
+
return max(
|
|
292
|
+
self._tool_stats.items(),
|
|
293
|
+
key=lambda x: x[1].avg_time_ms
|
|
294
|
+
)[0]
|
|
295
|
+
|
|
296
|
+
async def reset(self) -> None:
|
|
297
|
+
"""Reset all metrics"""
|
|
298
|
+
async with self._lock:
|
|
299
|
+
self._metrics.clear()
|
|
300
|
+
self._tool_stats.clear()
|
|
301
|
+
self._start_time = datetime.now()
|
|
302
|
+
logger.info("Metrics reset")
|
|
303
|
+
|
|
304
|
+
async def export_metrics(self) -> Dict[str, Any]:
|
|
305
|
+
"""
|
|
306
|
+
Export all metrics data.
|
|
307
|
+
|
|
308
|
+
Returns:
|
|
309
|
+
Complete metrics export
|
|
310
|
+
"""
|
|
311
|
+
async with self._lock:
|
|
312
|
+
return {
|
|
313
|
+
"summary": await self.get_summary(),
|
|
314
|
+
"tool_stats": await self.get_tool_stats(),
|
|
315
|
+
"recent_metrics": await self.get_recent_metrics(limit=100),
|
|
316
|
+
"failed_executions": await self.get_failed_executions(limit=50),
|
|
317
|
+
"slow_executions": await self.get_slow_executions(threshold_ms=1000, limit=50)
|
|
318
|
+
}
|
|
319
|
+
|
|
320
|
+
|
|
321
|
+
# Global metrics collector instance
|
|
322
|
+
_metrics_collector: Optional[MetricsCollector] = None
|
|
323
|
+
|
|
324
|
+
|
|
325
|
+
def get_metrics_collector() -> MetricsCollector:
|
|
326
|
+
"""Get or create the global metrics collector instance"""
|
|
327
|
+
global _metrics_collector
|
|
328
|
+
if _metrics_collector is None:
|
|
329
|
+
_metrics_collector = MetricsCollector()
|
|
330
|
+
return _metrics_collector
|
|
331
|
+
|
|
332
|
+
|
|
333
|
+
# Convenience functions
|
|
334
|
+
async def record_tool_execution(
|
|
335
|
+
tool_name: str,
|
|
336
|
+
execution_time_ms: float,
|
|
337
|
+
success: bool,
|
|
338
|
+
error_message: Optional[str] = None,
|
|
339
|
+
args: Optional[Dict[str, Any]] = None
|
|
340
|
+
) -> None:
|
|
341
|
+
"""Record a tool execution"""
|
|
342
|
+
await get_metrics_collector().record_execution(
|
|
343
|
+
tool_name,
|
|
344
|
+
execution_time_ms,
|
|
345
|
+
success,
|
|
346
|
+
error_message,
|
|
347
|
+
args
|
|
348
|
+
)
|
|
349
|
+
|
|
350
|
+
|
|
351
|
+
async def get_metrics_summary() -> Dict[str, Any]:
|
|
352
|
+
"""Get metrics summary"""
|
|
353
|
+
return await get_metrics_collector().get_summary()
|
|
354
|
+
|
|
355
|
+
|
|
356
|
+
async def get_tool_metrics(tool_name: Optional[str] = None) -> Dict[str, Any]:
|
|
357
|
+
"""Get tool-specific metrics"""
|
|
358
|
+
return await get_metrics_collector().get_tool_stats(tool_name)
|