chuk-tool-processor 0.6__py3-none-any.whl → 0.6.2__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of chuk-tool-processor might be problematic. Click here for more details.
- chuk_tool_processor/execution/strategies/inprocess_strategy.py +107 -8
- chuk_tool_processor/execution/strategies/subprocess_strategy.py +110 -13
- chuk_tool_processor/mcp/mcp_tool.py +351 -149
- chuk_tool_processor/mcp/register_mcp_tools.py +80 -33
- chuk_tool_processor/mcp/stream_manager.py +319 -65
- chuk_tool_processor-0.6.2.dist-info/METADATA +697 -0
- {chuk_tool_processor-0.6.dist-info → chuk_tool_processor-0.6.2.dist-info}/RECORD +9 -9
- chuk_tool_processor-0.6.dist-info/METADATA +0 -830
- {chuk_tool_processor-0.6.dist-info → chuk_tool_processor-0.6.2.dist-info}/WHEEL +0 -0
- {chuk_tool_processor-0.6.dist-info → chuk_tool_processor-0.6.2.dist-info}/top_level.txt +0 -0
|
@@ -1,15 +1,22 @@
|
|
|
1
1
|
#!/usr/bin/env python
|
|
2
2
|
# chuk_tool_processor/mcp/mcp_tool.py
|
|
3
3
|
"""
|
|
4
|
-
MCP tool shim that delegates execution to a StreamManager
|
|
5
|
-
handling its own lazy bootstrap when needed.
|
|
4
|
+
MCP tool shim that delegates execution to a StreamManager.
|
|
6
5
|
|
|
7
|
-
FIXED:
|
|
6
|
+
FIXED: Removed config file management - MCPTool should only handle execution,
|
|
7
|
+
not configuration or bootstrapping. Configuration is handled at registration time.
|
|
8
|
+
|
|
9
|
+
CORE PRINCIPLE: MCPTool wraps a StreamManager and delegates calls to it.
|
|
10
|
+
If the StreamManager becomes unavailable, return graceful errors rather than
|
|
11
|
+
trying to recreate it with config files.
|
|
8
12
|
"""
|
|
9
13
|
from __future__ import annotations
|
|
10
14
|
|
|
11
15
|
import asyncio
|
|
16
|
+
import time
|
|
17
|
+
from enum import Enum
|
|
12
18
|
from typing import Any, Dict, List, Optional
|
|
19
|
+
from dataclasses import dataclass
|
|
13
20
|
|
|
14
21
|
from chuk_tool_processor.logging import get_logger
|
|
15
22
|
from chuk_tool_processor.mcp.stream_manager import StreamManager
|
|
@@ -17,98 +24,101 @@ from chuk_tool_processor.mcp.stream_manager import StreamManager
|
|
|
17
24
|
logger = get_logger("chuk_tool_processor.mcp.mcp_tool")
|
|
18
25
|
|
|
19
26
|
|
|
27
|
+
class ConnectionState(Enum):
|
|
28
|
+
"""Connection states for the MCP tool."""
|
|
29
|
+
HEALTHY = "healthy"
|
|
30
|
+
DEGRADED = "degraded"
|
|
31
|
+
DISCONNECTED = "disconnected"
|
|
32
|
+
FAILED = "failed"
|
|
33
|
+
|
|
34
|
+
|
|
35
|
+
@dataclass
|
|
36
|
+
class RecoveryConfig:
|
|
37
|
+
"""Configuration for connection recovery behavior."""
|
|
38
|
+
max_retries: int = 3
|
|
39
|
+
initial_backoff: float = 1.0
|
|
40
|
+
max_backoff: float = 30.0
|
|
41
|
+
backoff_multiplier: float = 2.0
|
|
42
|
+
circuit_breaker_threshold: int = 5
|
|
43
|
+
circuit_breaker_timeout: float = 60.0
|
|
44
|
+
|
|
45
|
+
|
|
46
|
+
@dataclass
|
|
47
|
+
class ConnectionStats:
|
|
48
|
+
"""Statistics for connection monitoring."""
|
|
49
|
+
total_calls: int = 0
|
|
50
|
+
successful_calls: int = 0
|
|
51
|
+
failed_calls: int = 0
|
|
52
|
+
connection_errors: int = 0
|
|
53
|
+
last_success_time: Optional[float] = None
|
|
54
|
+
last_failure_time: Optional[float] = None
|
|
55
|
+
|
|
56
|
+
|
|
20
57
|
class MCPTool:
|
|
21
58
|
"""
|
|
22
59
|
Wrap a remote MCP tool so it can be called like a local tool.
|
|
23
|
-
|
|
24
|
-
You may pass an existing ``StreamManager`` *positionally* (for legacy
|
|
25
|
-
code) or via the named parameter.
|
|
26
|
-
|
|
27
|
-
If no ``StreamManager`` is supplied the class will start one on first
|
|
28
|
-
use via ``setup_mcp_stdio``.
|
|
29
60
|
|
|
30
|
-
|
|
61
|
+
SIMPLIFIED: This class now focuses only on execution delegation.
|
|
62
|
+
It does NOT handle configuration files or StreamManager bootstrapping.
|
|
31
63
|
"""
|
|
32
64
|
|
|
33
|
-
# ------------------------------------------------------------------ #
|
|
34
65
|
def __init__(
|
|
35
66
|
self,
|
|
36
67
|
tool_name: str = "",
|
|
37
68
|
stream_manager: Optional[StreamManager] = None,
|
|
38
69
|
*,
|
|
39
|
-
|
|
40
|
-
|
|
41
|
-
|
|
42
|
-
namespace: str = "stdio",
|
|
43
|
-
default_timeout: Optional[float] = None
|
|
70
|
+
default_timeout: Optional[float] = None,
|
|
71
|
+
enable_resilience: bool = True,
|
|
72
|
+
recovery_config: Optional[RecoveryConfig] = None,
|
|
44
73
|
) -> None:
|
|
45
74
|
if not tool_name:
|
|
46
|
-
raise ValueError(
|
|
47
|
-
"MCPTool requires a tool_name. "
|
|
48
|
-
"This error usually occurs during subprocess serialization. "
|
|
49
|
-
"Make sure the tool is properly registered with a name."
|
|
50
|
-
)
|
|
75
|
+
raise ValueError("MCPTool requires a tool_name")
|
|
51
76
|
|
|
52
77
|
self.tool_name = tool_name
|
|
53
78
|
self._sm: Optional[StreamManager] = stream_manager
|
|
54
|
-
self.default_timeout = default_timeout
|
|
55
|
-
|
|
56
|
-
# Boot-strap parameters (only needed if _sm is None)
|
|
57
|
-
self._cfg_file = cfg_file
|
|
58
|
-
self._servers = servers or []
|
|
59
|
-
self._server_names = server_names or {}
|
|
60
|
-
self._namespace = namespace
|
|
61
|
-
|
|
62
|
-
# Create lock only when needed (not during deserialization)
|
|
63
|
-
self._sm_lock: Optional[asyncio.Lock] = None
|
|
79
|
+
self.default_timeout = default_timeout or 30.0
|
|
64
80
|
|
|
65
|
-
|
|
66
|
-
|
|
67
|
-
|
|
68
|
-
|
|
69
|
-
|
|
81
|
+
# Resilience features
|
|
82
|
+
self.enable_resilience = enable_resilience
|
|
83
|
+
self.recovery_config = recovery_config or RecoveryConfig()
|
|
84
|
+
|
|
85
|
+
# State tracking (only if resilience enabled)
|
|
86
|
+
if self.enable_resilience:
|
|
87
|
+
self.connection_state = ConnectionState.HEALTHY if stream_manager else ConnectionState.DISCONNECTED
|
|
88
|
+
self.stats = ConnectionStats()
|
|
89
|
+
|
|
90
|
+
# Circuit breaker state
|
|
91
|
+
self._circuit_open = False
|
|
92
|
+
self._circuit_open_time: Optional[float] = None
|
|
93
|
+
self._consecutive_failures = 0
|
|
70
94
|
|
|
71
95
|
# ------------------------------------------------------------------ #
|
|
72
96
|
# Serialization support for subprocess execution
|
|
73
97
|
# ------------------------------------------------------------------ #
|
|
74
98
|
def __getstate__(self) -> Dict[str, Any]:
|
|
75
99
|
"""
|
|
76
|
-
|
|
100
|
+
Serialize for subprocess execution.
|
|
77
101
|
|
|
78
|
-
|
|
79
|
-
The
|
|
102
|
+
SIMPLIFIED: Only preserve essential execution state, not configuration.
|
|
103
|
+
The StreamManager will be None after deserialization - that's expected.
|
|
80
104
|
"""
|
|
81
105
|
state = self.__dict__.copy()
|
|
82
106
|
|
|
83
107
|
# Remove non-serializable items
|
|
84
|
-
state['_sm'] = None # StreamManager will be
|
|
85
|
-
state['_sm_lock'] = None # Lock will be recreated when needed
|
|
86
|
-
|
|
87
|
-
# Ensure we have the necessary configuration for subprocess
|
|
88
|
-
# If no servers specified, default to the tool name (common pattern)
|
|
89
|
-
if not state.get('_servers'):
|
|
90
|
-
# Extract server name from tool_name (e.g., "get_current_time" -> "time")
|
|
91
|
-
# This is a heuristic - adjust based on your naming convention
|
|
92
|
-
if 'time' in self.tool_name.lower():
|
|
93
|
-
state['_servers'] = ['time']
|
|
94
|
-
state['_server_names'] = {0: 'time'}
|
|
95
|
-
else:
|
|
96
|
-
# Default fallback - use the tool name itself
|
|
97
|
-
state['_servers'] = [self.tool_name]
|
|
98
|
-
state['_server_names'] = {0: self.tool_name}
|
|
108
|
+
state['_sm'] = None # StreamManager will be None in subprocess
|
|
99
109
|
|
|
100
|
-
#
|
|
101
|
-
if
|
|
102
|
-
state['
|
|
110
|
+
# Reset connection state for subprocess
|
|
111
|
+
if self.enable_resilience:
|
|
112
|
+
state['connection_state'] = ConnectionState.DISCONNECTED
|
|
103
113
|
|
|
104
|
-
logger.debug(f"Serializing MCPTool '{self.tool_name}' for subprocess
|
|
114
|
+
logger.debug(f"Serializing MCPTool '{self.tool_name}' for subprocess")
|
|
105
115
|
return state
|
|
106
116
|
|
|
107
117
|
def __setstate__(self, state: Dict[str, Any]) -> None:
|
|
108
118
|
"""
|
|
109
|
-
|
|
119
|
+
Deserialize after subprocess execution.
|
|
110
120
|
|
|
111
|
-
|
|
121
|
+
SIMPLIFIED: Just restore state. StreamManager will be None and that's fine.
|
|
112
122
|
"""
|
|
113
123
|
self.__dict__.update(state)
|
|
114
124
|
|
|
@@ -116,102 +126,246 @@ class MCPTool:
|
|
|
116
126
|
if not hasattr(self, 'tool_name') or not self.tool_name:
|
|
117
127
|
raise ValueError("Invalid MCPTool state: missing tool_name")
|
|
118
128
|
|
|
119
|
-
#
|
|
129
|
+
# StreamManager will be None in subprocess - that's expected
|
|
120
130
|
self._sm = None
|
|
121
|
-
|
|
131
|
+
|
|
132
|
+
# Initialize resilience state if enabled
|
|
133
|
+
if self.enable_resilience:
|
|
134
|
+
if not hasattr(self, 'connection_state'):
|
|
135
|
+
self.connection_state = ConnectionState.DISCONNECTED
|
|
136
|
+
if not hasattr(self, 'stats'):
|
|
137
|
+
self.stats = ConnectionStats()
|
|
122
138
|
|
|
123
139
|
logger.debug(f"Deserialized MCPTool '{self.tool_name}' in subprocess")
|
|
124
140
|
|
|
125
141
|
# ------------------------------------------------------------------ #
|
|
126
|
-
async def _ensure_stream_manager(self) -> StreamManager:
|
|
127
|
-
"""
|
|
128
|
-
Lazily create / attach a StreamManager.
|
|
129
|
-
|
|
130
|
-
Importing ``setup_mcp_stdio`` *inside* this function prevents the
|
|
131
|
-
circular-import seen earlier. ★
|
|
132
|
-
"""
|
|
133
|
-
if self._sm is not None:
|
|
134
|
-
return self._sm
|
|
135
|
-
|
|
136
|
-
# Use the lock, creating it if needed
|
|
137
|
-
async with self._ensure_lock():
|
|
138
|
-
if self._sm is None: # re-check inside lock
|
|
139
|
-
logger.info(
|
|
140
|
-
"Boot-strapping MCP stdio transport for '%s'", self.tool_name
|
|
141
|
-
)
|
|
142
|
-
|
|
143
|
-
# ★ local import avoids circular dependency
|
|
144
|
-
from chuk_tool_processor.mcp.setup_mcp_stdio import setup_mcp_stdio
|
|
145
|
-
|
|
146
|
-
_, self._sm = await setup_mcp_stdio(
|
|
147
|
-
config_file=self._cfg_file,
|
|
148
|
-
servers=self._servers,
|
|
149
|
-
server_names=self._server_names,
|
|
150
|
-
namespace=self._namespace,
|
|
151
|
-
)
|
|
152
|
-
|
|
153
|
-
return self._sm # type: ignore[return-value]
|
|
154
|
-
|
|
155
142
|
async def execute(self, timeout: Optional[float] = None, **kwargs: Any) -> Any:
|
|
156
143
|
"""
|
|
157
|
-
|
|
158
|
-
|
|
159
|
-
|
|
160
|
-
|
|
161
|
-
timeout : float | None
|
|
162
|
-
If provided, forward this to StreamManager. Otherwise fall back
|
|
163
|
-
to ``self.default_timeout``.
|
|
164
|
-
**kwargs
|
|
165
|
-
Arguments forwarded to the tool.
|
|
166
|
-
|
|
167
|
-
Returns
|
|
168
|
-
-------
|
|
169
|
-
Any
|
|
170
|
-
The ``content`` of the remote tool response.
|
|
171
|
-
|
|
172
|
-
Raises
|
|
173
|
-
------
|
|
174
|
-
RuntimeError
|
|
175
|
-
The remote tool returned an error payload.
|
|
176
|
-
asyncio.TimeoutError
|
|
177
|
-
The call exceeded the chosen timeout.
|
|
144
|
+
Execute the tool, returning graceful errors if StreamManager unavailable.
|
|
145
|
+
|
|
146
|
+
SIMPLIFIED: If no StreamManager, return a structured error response
|
|
147
|
+
instead of trying to bootstrap one.
|
|
178
148
|
"""
|
|
179
|
-
|
|
149
|
+
# Check if we have a StreamManager
|
|
150
|
+
if self._sm is None:
|
|
151
|
+
return {
|
|
152
|
+
"error": f"Tool '{self.tool_name}' is not available (no stream manager)",
|
|
153
|
+
"tool_name": self.tool_name,
|
|
154
|
+
"available": False,
|
|
155
|
+
"reason": "disconnected"
|
|
156
|
+
}
|
|
180
157
|
|
|
181
|
-
#
|
|
182
|
-
|
|
183
|
-
|
|
184
|
-
|
|
158
|
+
# If resilience is disabled, use simple execution
|
|
159
|
+
if not self.enable_resilience:
|
|
160
|
+
return await self._simple_execute(timeout, **kwargs)
|
|
161
|
+
|
|
162
|
+
# Resilient execution
|
|
163
|
+
return await self._resilient_execute(timeout, **kwargs)
|
|
164
|
+
|
|
165
|
+
async def _simple_execute(self, timeout: Optional[float] = None, **kwargs: Any) -> Any:
|
|
166
|
+
"""Simple execution without resilience features."""
|
|
167
|
+
effective_timeout = timeout if timeout is not None else self.default_timeout
|
|
185
168
|
|
|
186
|
-
call_kwargs
|
|
169
|
+
call_kwargs = {
|
|
187
170
|
"tool_name": self.tool_name,
|
|
188
171
|
"arguments": kwargs,
|
|
189
172
|
}
|
|
190
173
|
if effective_timeout is not None:
|
|
191
174
|
call_kwargs["timeout"] = effective_timeout
|
|
192
|
-
logger.debug(
|
|
193
|
-
"Forwarding timeout=%ss to StreamManager for tool '%s'",
|
|
194
|
-
effective_timeout,
|
|
195
|
-
self.tool_name,
|
|
196
|
-
)
|
|
197
175
|
|
|
198
176
|
try:
|
|
199
|
-
result = await
|
|
177
|
+
result = await self._sm.call_tool(**call_kwargs)
|
|
200
178
|
except asyncio.TimeoutError:
|
|
201
|
-
logger.warning(
|
|
202
|
-
"MCP tool '%s' timed out after %ss",
|
|
203
|
-
self.tool_name,
|
|
204
|
-
effective_timeout,
|
|
205
|
-
)
|
|
179
|
+
logger.warning(f"MCP tool '{self.tool_name}' timed out after {effective_timeout}s")
|
|
206
180
|
raise
|
|
207
181
|
|
|
208
182
|
if result.get("isError"):
|
|
209
183
|
err = result.get("error", "Unknown error")
|
|
210
|
-
logger.error("Remote MCP error from '
|
|
184
|
+
logger.error(f"Remote MCP error from '{self.tool_name}': {err}")
|
|
211
185
|
raise RuntimeError(err)
|
|
212
186
|
|
|
213
187
|
return result.get("content")
|
|
214
188
|
|
|
189
|
+
async def _resilient_execute(self, timeout: Optional[float] = None, **kwargs: Any) -> Any:
|
|
190
|
+
"""Resilient execution with circuit breaker and health checks."""
|
|
191
|
+
# Check circuit breaker
|
|
192
|
+
if self._is_circuit_open():
|
|
193
|
+
return {
|
|
194
|
+
"error": f"Circuit breaker open for tool '{self.tool_name}' - too many recent failures",
|
|
195
|
+
"tool_name": self.tool_name,
|
|
196
|
+
"available": False,
|
|
197
|
+
"reason": "circuit_breaker"
|
|
198
|
+
}
|
|
199
|
+
|
|
200
|
+
effective_timeout = timeout if timeout is not None else self.default_timeout
|
|
201
|
+
self.stats.total_calls += 1
|
|
202
|
+
|
|
203
|
+
# Check if StreamManager is healthy
|
|
204
|
+
if not await self._is_stream_manager_healthy():
|
|
205
|
+
await self._record_failure(is_connection_error=True)
|
|
206
|
+
return {
|
|
207
|
+
"error": f"Tool '{self.tool_name}' is not available (unhealthy connection)",
|
|
208
|
+
"tool_name": self.tool_name,
|
|
209
|
+
"available": False,
|
|
210
|
+
"reason": "unhealthy"
|
|
211
|
+
}
|
|
212
|
+
|
|
213
|
+
# Try execution with retries
|
|
214
|
+
max_attempts = self.recovery_config.max_retries + 1
|
|
215
|
+
backoff = self.recovery_config.initial_backoff
|
|
216
|
+
|
|
217
|
+
for attempt in range(max_attempts):
|
|
218
|
+
try:
|
|
219
|
+
result = await self._execute_with_timeout(effective_timeout, **kwargs)
|
|
220
|
+
await self._record_success()
|
|
221
|
+
return result
|
|
222
|
+
|
|
223
|
+
except asyncio.TimeoutError:
|
|
224
|
+
error_msg = f"Tool '{self.tool_name}' timed out after {effective_timeout}s"
|
|
225
|
+
logger.warning(error_msg)
|
|
226
|
+
await self._record_failure()
|
|
227
|
+
|
|
228
|
+
if attempt == max_attempts - 1:
|
|
229
|
+
return {
|
|
230
|
+
"error": error_msg,
|
|
231
|
+
"tool_name": self.tool_name,
|
|
232
|
+
"available": False,
|
|
233
|
+
"reason": "timeout"
|
|
234
|
+
}
|
|
235
|
+
|
|
236
|
+
except Exception as e:
|
|
237
|
+
error_str = str(e)
|
|
238
|
+
is_connection_error = self._is_connection_error(e)
|
|
239
|
+
|
|
240
|
+
logger.warning(f"Tool '{self.tool_name}' attempt {attempt + 1} failed: {error_str}")
|
|
241
|
+
await self._record_failure(is_connection_error)
|
|
242
|
+
|
|
243
|
+
if attempt == max_attempts - 1:
|
|
244
|
+
return {
|
|
245
|
+
"error": f"Tool execution failed after {max_attempts} attempts: {error_str}",
|
|
246
|
+
"tool_name": self.tool_name,
|
|
247
|
+
"available": False,
|
|
248
|
+
"reason": "execution_failed"
|
|
249
|
+
}
|
|
250
|
+
|
|
251
|
+
# Exponential backoff
|
|
252
|
+
if attempt < max_attempts - 1:
|
|
253
|
+
logger.debug(f"Waiting {backoff:.1f}s before retry {attempt + 2}")
|
|
254
|
+
await asyncio.sleep(backoff)
|
|
255
|
+
backoff = min(backoff * self.recovery_config.backoff_multiplier, self.recovery_config.max_backoff)
|
|
256
|
+
|
|
257
|
+
# Should never reach here
|
|
258
|
+
return {
|
|
259
|
+
"error": f"Tool '{self.tool_name}' failed after all attempts",
|
|
260
|
+
"tool_name": self.tool_name,
|
|
261
|
+
"available": False,
|
|
262
|
+
"reason": "exhausted_retries"
|
|
263
|
+
}
|
|
264
|
+
|
|
265
|
+
async def _execute_with_timeout(self, timeout: float, **kwargs: Any) -> Any:
|
|
266
|
+
"""Execute the tool with timeout."""
|
|
267
|
+
call_kwargs = {
|
|
268
|
+
"tool_name": self.tool_name,
|
|
269
|
+
"arguments": kwargs,
|
|
270
|
+
}
|
|
271
|
+
if timeout is not None:
|
|
272
|
+
call_kwargs["timeout"] = timeout
|
|
273
|
+
|
|
274
|
+
try:
|
|
275
|
+
result = await asyncio.wait_for(
|
|
276
|
+
self._sm.call_tool(**call_kwargs),
|
|
277
|
+
timeout=(timeout + 5.0) if timeout else None
|
|
278
|
+
)
|
|
279
|
+
|
|
280
|
+
if result.get("isError"):
|
|
281
|
+
error = result.get("error", "Unknown error")
|
|
282
|
+
raise RuntimeError(f"Tool execution failed: {error}")
|
|
283
|
+
|
|
284
|
+
return result.get("content")
|
|
285
|
+
|
|
286
|
+
except asyncio.TimeoutError:
|
|
287
|
+
self.connection_state = ConnectionState.DEGRADED
|
|
288
|
+
raise
|
|
289
|
+
except Exception as e:
|
|
290
|
+
if self._is_connection_error(e):
|
|
291
|
+
self.connection_state = ConnectionState.DISCONNECTED
|
|
292
|
+
else:
|
|
293
|
+
self.connection_state = ConnectionState.DEGRADED
|
|
294
|
+
raise
|
|
295
|
+
|
|
296
|
+
async def _is_stream_manager_healthy(self) -> bool:
|
|
297
|
+
"""Check if the StreamManager is healthy."""
|
|
298
|
+
if self._sm is None:
|
|
299
|
+
return False
|
|
300
|
+
|
|
301
|
+
try:
|
|
302
|
+
ping_results = await asyncio.wait_for(self._sm.ping_servers(), timeout=3.0)
|
|
303
|
+
healthy_count = sum(1 for result in ping_results if result.get("ok", False))
|
|
304
|
+
return healthy_count > 0
|
|
305
|
+
except Exception as e:
|
|
306
|
+
logger.debug(f"Health check failed for '{self.tool_name}': {e}")
|
|
307
|
+
return False
|
|
308
|
+
|
|
309
|
+
def _is_connection_error(self, exception: Exception) -> bool:
|
|
310
|
+
"""Determine if an exception indicates a connection problem."""
|
|
311
|
+
error_str = str(exception).lower()
|
|
312
|
+
connection_indicators = [
|
|
313
|
+
"connection lost", "connection closed", "connection refused",
|
|
314
|
+
"broken pipe", "timeout", "eof", "pipe closed", "process died",
|
|
315
|
+
"no route to host", "no server found"
|
|
316
|
+
]
|
|
317
|
+
return any(indicator in error_str for indicator in connection_indicators)
|
|
318
|
+
|
|
319
|
+
async def _record_success(self) -> None:
|
|
320
|
+
"""Record a successful execution."""
|
|
321
|
+
self.stats.successful_calls += 1
|
|
322
|
+
self.stats.last_success_time = time.time()
|
|
323
|
+
self._consecutive_failures = 0
|
|
324
|
+
|
|
325
|
+
# Close circuit breaker if it was open
|
|
326
|
+
if self._circuit_open:
|
|
327
|
+
self._circuit_open = False
|
|
328
|
+
self._circuit_open_time = None
|
|
329
|
+
self.connection_state = ConnectionState.HEALTHY
|
|
330
|
+
logger.info(f"Circuit breaker closed for tool '{self.tool_name}' after successful execution")
|
|
331
|
+
|
|
332
|
+
async def _record_failure(self, is_connection_error: bool = False) -> None:
|
|
333
|
+
"""Record a failed execution."""
|
|
334
|
+
self.stats.failed_calls += 1
|
|
335
|
+
self.stats.last_failure_time = time.time()
|
|
336
|
+
|
|
337
|
+
if is_connection_error:
|
|
338
|
+
self.stats.connection_errors += 1
|
|
339
|
+
self.connection_state = ConnectionState.DISCONNECTED
|
|
340
|
+
else:
|
|
341
|
+
self.connection_state = ConnectionState.DEGRADED
|
|
342
|
+
|
|
343
|
+
self._consecutive_failures += 1
|
|
344
|
+
|
|
345
|
+
# Check if we should open the circuit breaker
|
|
346
|
+
if (self._consecutive_failures >= self.recovery_config.circuit_breaker_threshold and
|
|
347
|
+
not self._circuit_open):
|
|
348
|
+
self._circuit_open = True
|
|
349
|
+
self._circuit_open_time = time.time()
|
|
350
|
+
self.connection_state = ConnectionState.FAILED
|
|
351
|
+
logger.error(f"Circuit breaker opened for tool '{self.tool_name}' after {self._consecutive_failures} consecutive failures")
|
|
352
|
+
|
|
353
|
+
def _is_circuit_open(self) -> bool:
|
|
354
|
+
"""Check if the circuit breaker is currently open."""
|
|
355
|
+
if not self._circuit_open:
|
|
356
|
+
return False
|
|
357
|
+
|
|
358
|
+
# Check if enough time has passed to close the circuit
|
|
359
|
+
if (self._circuit_open_time and
|
|
360
|
+
time.time() - self._circuit_open_time >= self.recovery_config.circuit_breaker_timeout):
|
|
361
|
+
self._circuit_open = False
|
|
362
|
+
self._circuit_open_time = None
|
|
363
|
+
self.connection_state = ConnectionState.HEALTHY
|
|
364
|
+
logger.info(f"Circuit breaker reset for tool '{self.tool_name}' after timeout")
|
|
365
|
+
return False
|
|
366
|
+
|
|
367
|
+
return True
|
|
368
|
+
|
|
215
369
|
# ------------------------------------------------------------------ #
|
|
216
370
|
# Legacy method name support
|
|
217
371
|
async def _aexecute(self, timeout: Optional[float] = None, **kwargs: Any) -> Any:
|
|
@@ -219,25 +373,73 @@ class MCPTool:
|
|
|
219
373
|
return await self.execute(timeout=timeout, **kwargs)
|
|
220
374
|
|
|
221
375
|
# ------------------------------------------------------------------ #
|
|
222
|
-
# Utility
|
|
376
|
+
# Utility and monitoring methods
|
|
223
377
|
# ------------------------------------------------------------------ #
|
|
224
|
-
def
|
|
225
|
-
"""Check if this tool
|
|
226
|
-
|
|
227
|
-
|
|
228
|
-
|
|
229
|
-
return True
|
|
230
|
-
except Exception:
|
|
231
|
-
return False
|
|
378
|
+
def is_available(self) -> bool:
|
|
379
|
+
"""Check if this tool is currently available."""
|
|
380
|
+
return (self._sm is not None and
|
|
381
|
+
not self._is_circuit_open() and
|
|
382
|
+
self.connection_state in [ConnectionState.HEALTHY, ConnectionState.DEGRADED])
|
|
232
383
|
|
|
233
|
-
def
|
|
234
|
-
"""Get
|
|
235
|
-
|
|
384
|
+
def get_stats(self) -> Dict[str, Any]:
|
|
385
|
+
"""Get connection and execution statistics."""
|
|
386
|
+
if not self.enable_resilience:
|
|
387
|
+
return {
|
|
388
|
+
"tool_name": self.tool_name,
|
|
389
|
+
"resilience_enabled": False,
|
|
390
|
+
"available": self._sm is not None
|
|
391
|
+
}
|
|
392
|
+
|
|
393
|
+
success_rate = 0.0
|
|
394
|
+
if self.stats.total_calls > 0:
|
|
395
|
+
success_rate = (self.stats.successful_calls / self.stats.total_calls) * 100
|
|
396
|
+
|
|
236
397
|
return {
|
|
237
|
-
"tool_name":
|
|
238
|
-
"
|
|
239
|
-
"
|
|
240
|
-
"
|
|
241
|
-
"
|
|
242
|
-
"
|
|
243
|
-
|
|
398
|
+
"tool_name": self.tool_name,
|
|
399
|
+
"resilience_enabled": True,
|
|
400
|
+
"available": self.is_available(),
|
|
401
|
+
"state": self.connection_state.value,
|
|
402
|
+
"circuit_open": self._circuit_open,
|
|
403
|
+
"total_calls": self.stats.total_calls,
|
|
404
|
+
"successful_calls": self.stats.successful_calls,
|
|
405
|
+
"failed_calls": self.stats.failed_calls,
|
|
406
|
+
"connection_errors": self.stats.connection_errors,
|
|
407
|
+
"success_rate": success_rate,
|
|
408
|
+
"consecutive_failures": self._consecutive_failures,
|
|
409
|
+
"has_stream_manager": self._sm is not None,
|
|
410
|
+
}
|
|
411
|
+
|
|
412
|
+
def reset_circuit_breaker(self) -> None:
|
|
413
|
+
"""Manually reset the circuit breaker."""
|
|
414
|
+
if not self.enable_resilience:
|
|
415
|
+
return
|
|
416
|
+
|
|
417
|
+
self._circuit_open = False
|
|
418
|
+
self._circuit_open_time = None
|
|
419
|
+
self._consecutive_failures = 0
|
|
420
|
+
self.connection_state = ConnectionState.HEALTHY
|
|
421
|
+
logger.info(f"Circuit breaker manually reset for tool '{self.tool_name}'")
|
|
422
|
+
|
|
423
|
+
def disable_resilience(self) -> None:
|
|
424
|
+
"""Disable resilience features for this tool instance."""
|
|
425
|
+
self.enable_resilience = False
|
|
426
|
+
logger.info(f"Resilience features disabled for tool '{self.tool_name}'")
|
|
427
|
+
|
|
428
|
+
def set_stream_manager(self, stream_manager: Optional[StreamManager]) -> None:
|
|
429
|
+
"""
|
|
430
|
+
Set or update the StreamManager for this tool.
|
|
431
|
+
|
|
432
|
+
This can be used by external systems to reconnect tools after
|
|
433
|
+
StreamManager recovery at a higher level.
|
|
434
|
+
"""
|
|
435
|
+
self._sm = stream_manager
|
|
436
|
+
if stream_manager is not None:
|
|
437
|
+
self.connection_state = ConnectionState.HEALTHY
|
|
438
|
+
if self._circuit_open:
|
|
439
|
+
self._circuit_open = False
|
|
440
|
+
self._circuit_open_time = None
|
|
441
|
+
logger.info(f"Circuit breaker closed for tool '{self.tool_name}' due to new stream manager")
|
|
442
|
+
else:
|
|
443
|
+
self.connection_state = ConnectionState.DISCONNECTED
|
|
444
|
+
|
|
445
|
+
logger.debug(f"StreamManager {'set' if stream_manager else 'cleared'} for tool '{self.tool_name}'")
|