chuk-tool-processor 0.6__py3-none-any.whl → 0.6.2__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of chuk-tool-processor might be problematic. Click here for more details.

@@ -1,15 +1,22 @@
1
1
  #!/usr/bin/env python
2
2
  # chuk_tool_processor/mcp/mcp_tool.py
3
3
  """
4
- MCP tool shim that delegates execution to a StreamManager,
5
- handling its own lazy bootstrap when needed.
4
+ MCP tool shim that delegates execution to a StreamManager.
6
5
 
7
- FIXED: Added subprocess serialization support by implementing __getstate__ and __setstate__
6
+ FIXED: Removed config file management - MCPTool should only handle execution,
7
+ not configuration or bootstrapping. Configuration is handled at registration time.
8
+
9
+ CORE PRINCIPLE: MCPTool wraps a StreamManager and delegates calls to it.
10
+ If the StreamManager becomes unavailable, return graceful errors rather than
11
+ trying to recreate it with config files.
8
12
  """
9
13
  from __future__ import annotations
10
14
 
11
15
  import asyncio
16
+ import time
17
+ from enum import Enum
12
18
  from typing import Any, Dict, List, Optional
19
+ from dataclasses import dataclass
13
20
 
14
21
  from chuk_tool_processor.logging import get_logger
15
22
  from chuk_tool_processor.mcp.stream_manager import StreamManager
@@ -17,98 +24,101 @@ from chuk_tool_processor.mcp.stream_manager import StreamManager
17
24
  logger = get_logger("chuk_tool_processor.mcp.mcp_tool")
18
25
 
19
26
 
27
+ class ConnectionState(Enum):
28
+ """Connection states for the MCP tool."""
29
+ HEALTHY = "healthy"
30
+ DEGRADED = "degraded"
31
+ DISCONNECTED = "disconnected"
32
+ FAILED = "failed"
33
+
34
+
35
+ @dataclass
36
+ class RecoveryConfig:
37
+ """Configuration for connection recovery behavior."""
38
+ max_retries: int = 3
39
+ initial_backoff: float = 1.0
40
+ max_backoff: float = 30.0
41
+ backoff_multiplier: float = 2.0
42
+ circuit_breaker_threshold: int = 5
43
+ circuit_breaker_timeout: float = 60.0
44
+
45
+
46
+ @dataclass
47
+ class ConnectionStats:
48
+ """Statistics for connection monitoring."""
49
+ total_calls: int = 0
50
+ successful_calls: int = 0
51
+ failed_calls: int = 0
52
+ connection_errors: int = 0
53
+ last_success_time: Optional[float] = None
54
+ last_failure_time: Optional[float] = None
55
+
56
+
20
57
  class MCPTool:
21
58
  """
22
59
  Wrap a remote MCP tool so it can be called like a local tool.
23
-
24
- You may pass an existing ``StreamManager`` *positionally* (for legacy
25
- code) or via the named parameter.
26
-
27
- If no ``StreamManager`` is supplied the class will start one on first
28
- use via ``setup_mcp_stdio``.
29
60
 
30
- FIXED: Added serialization support for subprocess execution.
61
+ SIMPLIFIED: This class now focuses only on execution delegation.
62
+ It does NOT handle configuration files or StreamManager bootstrapping.
31
63
  """
32
64
 
33
- # ------------------------------------------------------------------ #
34
65
  def __init__(
35
66
  self,
36
67
  tool_name: str = "",
37
68
  stream_manager: Optional[StreamManager] = None,
38
69
  *,
39
- cfg_file: str = "",
40
- servers: Optional[List[str]] = None,
41
- server_names: Optional[Dict[int, str]] = None,
42
- namespace: str = "stdio",
43
- default_timeout: Optional[float] = None
70
+ default_timeout: Optional[float] = None,
71
+ enable_resilience: bool = True,
72
+ recovery_config: Optional[RecoveryConfig] = None,
44
73
  ) -> None:
45
74
  if not tool_name:
46
- raise ValueError(
47
- "MCPTool requires a tool_name. "
48
- "This error usually occurs during subprocess serialization. "
49
- "Make sure the tool is properly registered with a name."
50
- )
75
+ raise ValueError("MCPTool requires a tool_name")
51
76
 
52
77
  self.tool_name = tool_name
53
78
  self._sm: Optional[StreamManager] = stream_manager
54
- self.default_timeout = default_timeout
55
-
56
- # Boot-strap parameters (only needed if _sm is None)
57
- self._cfg_file = cfg_file
58
- self._servers = servers or []
59
- self._server_names = server_names or {}
60
- self._namespace = namespace
61
-
62
- # Create lock only when needed (not during deserialization)
63
- self._sm_lock: Optional[asyncio.Lock] = None
79
+ self.default_timeout = default_timeout or 30.0
64
80
 
65
- def _ensure_lock(self) -> asyncio.Lock:
66
- """Ensure the lock exists, creating it if necessary."""
67
- if self._sm_lock is None:
68
- self._sm_lock = asyncio.Lock()
69
- return self._sm_lock
81
+ # Resilience features
82
+ self.enable_resilience = enable_resilience
83
+ self.recovery_config = recovery_config or RecoveryConfig()
84
+
85
+ # State tracking (only if resilience enabled)
86
+ if self.enable_resilience:
87
+ self.connection_state = ConnectionState.HEALTHY if stream_manager else ConnectionState.DISCONNECTED
88
+ self.stats = ConnectionStats()
89
+
90
+ # Circuit breaker state
91
+ self._circuit_open = False
92
+ self._circuit_open_time: Optional[float] = None
93
+ self._consecutive_failures = 0
70
94
 
71
95
  # ------------------------------------------------------------------ #
72
96
  # Serialization support for subprocess execution
73
97
  # ------------------------------------------------------------------ #
74
98
  def __getstate__(self) -> Dict[str, Any]:
75
99
  """
76
- Custom serialization for pickle support.
100
+ Serialize for subprocess execution.
77
101
 
78
- Excludes non-serializable async components and stream manager.
79
- The subprocess will recreate these as needed.
102
+ SIMPLIFIED: Only preserve essential execution state, not configuration.
103
+ The StreamManager will be None after deserialization - that's expected.
80
104
  """
81
105
  state = self.__dict__.copy()
82
106
 
83
107
  # Remove non-serializable items
84
- state['_sm'] = None # StreamManager will be recreated in subprocess
85
- state['_sm_lock'] = None # Lock will be recreated when needed
86
-
87
- # Ensure we have the necessary configuration for subprocess
88
- # If no servers specified, default to the tool name (common pattern)
89
- if not state.get('_servers'):
90
- # Extract server name from tool_name (e.g., "get_current_time" -> "time")
91
- # This is a heuristic - adjust based on your naming convention
92
- if 'time' in self.tool_name.lower():
93
- state['_servers'] = ['time']
94
- state['_server_names'] = {0: 'time'}
95
- else:
96
- # Default fallback - use the tool name itself
97
- state['_servers'] = [self.tool_name]
98
- state['_server_names'] = {0: self.tool_name}
108
+ state['_sm'] = None # StreamManager will be None in subprocess
99
109
 
100
- # Ensure we have a config file path
101
- if not state.get('_cfg_file'):
102
- state['_cfg_file'] = 'server_config.json'
110
+ # Reset connection state for subprocess
111
+ if self.enable_resilience:
112
+ state['connection_state'] = ConnectionState.DISCONNECTED
103
113
 
104
- logger.debug(f"Serializing MCPTool '{self.tool_name}' for subprocess with servers: {state['_servers']}")
114
+ logger.debug(f"Serializing MCPTool '{self.tool_name}' for subprocess")
105
115
  return state
106
116
 
107
117
  def __setstate__(self, state: Dict[str, Any]) -> None:
108
118
  """
109
- Custom deserialization for pickle support.
119
+ Deserialize after subprocess execution.
110
120
 
111
- Restores the object state and ensures required fields are set.
121
+ SIMPLIFIED: Just restore state. StreamManager will be None and that's fine.
112
122
  """
113
123
  self.__dict__.update(state)
114
124
 
@@ -116,102 +126,246 @@ class MCPTool:
116
126
  if not hasattr(self, 'tool_name') or not self.tool_name:
117
127
  raise ValueError("Invalid MCPTool state: missing tool_name")
118
128
 
119
- # Initialize transient fields
129
+ # StreamManager will be None in subprocess - that's expected
120
130
  self._sm = None
121
- self._sm_lock = None
131
+
132
+ # Initialize resilience state if enabled
133
+ if self.enable_resilience:
134
+ if not hasattr(self, 'connection_state'):
135
+ self.connection_state = ConnectionState.DISCONNECTED
136
+ if not hasattr(self, 'stats'):
137
+ self.stats = ConnectionStats()
122
138
 
123
139
  logger.debug(f"Deserialized MCPTool '{self.tool_name}' in subprocess")
124
140
 
125
141
  # ------------------------------------------------------------------ #
126
- async def _ensure_stream_manager(self) -> StreamManager:
127
- """
128
- Lazily create / attach a StreamManager.
129
-
130
- Importing ``setup_mcp_stdio`` *inside* this function prevents the
131
- circular-import seen earlier. ★
132
- """
133
- if self._sm is not None:
134
- return self._sm
135
-
136
- # Use the lock, creating it if needed
137
- async with self._ensure_lock():
138
- if self._sm is None: # re-check inside lock
139
- logger.info(
140
- "Boot-strapping MCP stdio transport for '%s'", self.tool_name
141
- )
142
-
143
- # ★ local import avoids circular dependency
144
- from chuk_tool_processor.mcp.setup_mcp_stdio import setup_mcp_stdio
145
-
146
- _, self._sm = await setup_mcp_stdio(
147
- config_file=self._cfg_file,
148
- servers=self._servers,
149
- server_names=self._server_names,
150
- namespace=self._namespace,
151
- )
152
-
153
- return self._sm # type: ignore[return-value]
154
-
155
142
  async def execute(self, timeout: Optional[float] = None, **kwargs: Any) -> Any:
156
143
  """
157
- Invoke the remote MCP tool, guaranteeing that *one* timeout is enforced.
158
-
159
- Parameters
160
- ----------
161
- timeout : float | None
162
- If provided, forward this to StreamManager. Otherwise fall back
163
- to ``self.default_timeout``.
164
- **kwargs
165
- Arguments forwarded to the tool.
166
-
167
- Returns
168
- -------
169
- Any
170
- The ``content`` of the remote tool response.
171
-
172
- Raises
173
- ------
174
- RuntimeError
175
- The remote tool returned an error payload.
176
- asyncio.TimeoutError
177
- The call exceeded the chosen timeout.
144
+ Execute the tool, returning graceful errors if StreamManager unavailable.
145
+
146
+ SIMPLIFIED: If no StreamManager, return a structured error response
147
+ instead of trying to bootstrap one.
178
148
  """
179
- sm = await self._ensure_stream_manager()
149
+ # Check if we have a StreamManager
150
+ if self._sm is None:
151
+ return {
152
+ "error": f"Tool '{self.tool_name}' is not available (no stream manager)",
153
+ "tool_name": self.tool_name,
154
+ "available": False,
155
+ "reason": "disconnected"
156
+ }
180
157
 
181
- # Pick the timeout we will enforce (may be None = no limit).
182
- effective_timeout: Optional[float] = (
183
- timeout if timeout is not None else self.default_timeout
184
- )
158
+ # If resilience is disabled, use simple execution
159
+ if not self.enable_resilience:
160
+ return await self._simple_execute(timeout, **kwargs)
161
+
162
+ # Resilient execution
163
+ return await self._resilient_execute(timeout, **kwargs)
164
+
165
+ async def _simple_execute(self, timeout: Optional[float] = None, **kwargs: Any) -> Any:
166
+ """Simple execution without resilience features."""
167
+ effective_timeout = timeout if timeout is not None else self.default_timeout
185
168
 
186
- call_kwargs: dict[str, Any] = {
169
+ call_kwargs = {
187
170
  "tool_name": self.tool_name,
188
171
  "arguments": kwargs,
189
172
  }
190
173
  if effective_timeout is not None:
191
174
  call_kwargs["timeout"] = effective_timeout
192
- logger.debug(
193
- "Forwarding timeout=%ss to StreamManager for tool '%s'",
194
- effective_timeout,
195
- self.tool_name,
196
- )
197
175
 
198
176
  try:
199
- result = await sm.call_tool(**call_kwargs)
177
+ result = await self._sm.call_tool(**call_kwargs)
200
178
  except asyncio.TimeoutError:
201
- logger.warning(
202
- "MCP tool '%s' timed out after %ss",
203
- self.tool_name,
204
- effective_timeout,
205
- )
179
+ logger.warning(f"MCP tool '{self.tool_name}' timed out after {effective_timeout}s")
206
180
  raise
207
181
 
208
182
  if result.get("isError"):
209
183
  err = result.get("error", "Unknown error")
210
- logger.error("Remote MCP error from '%s': %s", self.tool_name, err)
184
+ logger.error(f"Remote MCP error from '{self.tool_name}': {err}")
211
185
  raise RuntimeError(err)
212
186
 
213
187
  return result.get("content")
214
188
 
189
+ async def _resilient_execute(self, timeout: Optional[float] = None, **kwargs: Any) -> Any:
190
+ """Resilient execution with circuit breaker and health checks."""
191
+ # Check circuit breaker
192
+ if self._is_circuit_open():
193
+ return {
194
+ "error": f"Circuit breaker open for tool '{self.tool_name}' - too many recent failures",
195
+ "tool_name": self.tool_name,
196
+ "available": False,
197
+ "reason": "circuit_breaker"
198
+ }
199
+
200
+ effective_timeout = timeout if timeout is not None else self.default_timeout
201
+ self.stats.total_calls += 1
202
+
203
+ # Check if StreamManager is healthy
204
+ if not await self._is_stream_manager_healthy():
205
+ await self._record_failure(is_connection_error=True)
206
+ return {
207
+ "error": f"Tool '{self.tool_name}' is not available (unhealthy connection)",
208
+ "tool_name": self.tool_name,
209
+ "available": False,
210
+ "reason": "unhealthy"
211
+ }
212
+
213
+ # Try execution with retries
214
+ max_attempts = self.recovery_config.max_retries + 1
215
+ backoff = self.recovery_config.initial_backoff
216
+
217
+ for attempt in range(max_attempts):
218
+ try:
219
+ result = await self._execute_with_timeout(effective_timeout, **kwargs)
220
+ await self._record_success()
221
+ return result
222
+
223
+ except asyncio.TimeoutError:
224
+ error_msg = f"Tool '{self.tool_name}' timed out after {effective_timeout}s"
225
+ logger.warning(error_msg)
226
+ await self._record_failure()
227
+
228
+ if attempt == max_attempts - 1:
229
+ return {
230
+ "error": error_msg,
231
+ "tool_name": self.tool_name,
232
+ "available": False,
233
+ "reason": "timeout"
234
+ }
235
+
236
+ except Exception as e:
237
+ error_str = str(e)
238
+ is_connection_error = self._is_connection_error(e)
239
+
240
+ logger.warning(f"Tool '{self.tool_name}' attempt {attempt + 1} failed: {error_str}")
241
+ await self._record_failure(is_connection_error)
242
+
243
+ if attempt == max_attempts - 1:
244
+ return {
245
+ "error": f"Tool execution failed after {max_attempts} attempts: {error_str}",
246
+ "tool_name": self.tool_name,
247
+ "available": False,
248
+ "reason": "execution_failed"
249
+ }
250
+
251
+ # Exponential backoff
252
+ if attempt < max_attempts - 1:
253
+ logger.debug(f"Waiting {backoff:.1f}s before retry {attempt + 2}")
254
+ await asyncio.sleep(backoff)
255
+ backoff = min(backoff * self.recovery_config.backoff_multiplier, self.recovery_config.max_backoff)
256
+
257
+ # Should never reach here
258
+ return {
259
+ "error": f"Tool '{self.tool_name}' failed after all attempts",
260
+ "tool_name": self.tool_name,
261
+ "available": False,
262
+ "reason": "exhausted_retries"
263
+ }
264
+
265
+ async def _execute_with_timeout(self, timeout: float, **kwargs: Any) -> Any:
266
+ """Execute the tool with timeout."""
267
+ call_kwargs = {
268
+ "tool_name": self.tool_name,
269
+ "arguments": kwargs,
270
+ }
271
+ if timeout is not None:
272
+ call_kwargs["timeout"] = timeout
273
+
274
+ try:
275
+ result = await asyncio.wait_for(
276
+ self._sm.call_tool(**call_kwargs),
277
+ timeout=(timeout + 5.0) if timeout else None
278
+ )
279
+
280
+ if result.get("isError"):
281
+ error = result.get("error", "Unknown error")
282
+ raise RuntimeError(f"Tool execution failed: {error}")
283
+
284
+ return result.get("content")
285
+
286
+ except asyncio.TimeoutError:
287
+ self.connection_state = ConnectionState.DEGRADED
288
+ raise
289
+ except Exception as e:
290
+ if self._is_connection_error(e):
291
+ self.connection_state = ConnectionState.DISCONNECTED
292
+ else:
293
+ self.connection_state = ConnectionState.DEGRADED
294
+ raise
295
+
296
+ async def _is_stream_manager_healthy(self) -> bool:
297
+ """Check if the StreamManager is healthy."""
298
+ if self._sm is None:
299
+ return False
300
+
301
+ try:
302
+ ping_results = await asyncio.wait_for(self._sm.ping_servers(), timeout=3.0)
303
+ healthy_count = sum(1 for result in ping_results if result.get("ok", False))
304
+ return healthy_count > 0
305
+ except Exception as e:
306
+ logger.debug(f"Health check failed for '{self.tool_name}': {e}")
307
+ return False
308
+
309
+ def _is_connection_error(self, exception: Exception) -> bool:
310
+ """Determine if an exception indicates a connection problem."""
311
+ error_str = str(exception).lower()
312
+ connection_indicators = [
313
+ "connection lost", "connection closed", "connection refused",
314
+ "broken pipe", "timeout", "eof", "pipe closed", "process died",
315
+ "no route to host", "no server found"
316
+ ]
317
+ return any(indicator in error_str for indicator in connection_indicators)
318
+
319
+ async def _record_success(self) -> None:
320
+ """Record a successful execution."""
321
+ self.stats.successful_calls += 1
322
+ self.stats.last_success_time = time.time()
323
+ self._consecutive_failures = 0
324
+
325
+ # Close circuit breaker if it was open
326
+ if self._circuit_open:
327
+ self._circuit_open = False
328
+ self._circuit_open_time = None
329
+ self.connection_state = ConnectionState.HEALTHY
330
+ logger.info(f"Circuit breaker closed for tool '{self.tool_name}' after successful execution")
331
+
332
+ async def _record_failure(self, is_connection_error: bool = False) -> None:
333
+ """Record a failed execution."""
334
+ self.stats.failed_calls += 1
335
+ self.stats.last_failure_time = time.time()
336
+
337
+ if is_connection_error:
338
+ self.stats.connection_errors += 1
339
+ self.connection_state = ConnectionState.DISCONNECTED
340
+ else:
341
+ self.connection_state = ConnectionState.DEGRADED
342
+
343
+ self._consecutive_failures += 1
344
+
345
+ # Check if we should open the circuit breaker
346
+ if (self._consecutive_failures >= self.recovery_config.circuit_breaker_threshold and
347
+ not self._circuit_open):
348
+ self._circuit_open = True
349
+ self._circuit_open_time = time.time()
350
+ self.connection_state = ConnectionState.FAILED
351
+ logger.error(f"Circuit breaker opened for tool '{self.tool_name}' after {self._consecutive_failures} consecutive failures")
352
+
353
+ def _is_circuit_open(self) -> bool:
354
+ """Check if the circuit breaker is currently open."""
355
+ if not self._circuit_open:
356
+ return False
357
+
358
+ # Check if enough time has passed to close the circuit
359
+ if (self._circuit_open_time and
360
+ time.time() - self._circuit_open_time >= self.recovery_config.circuit_breaker_timeout):
361
+ self._circuit_open = False
362
+ self._circuit_open_time = None
363
+ self.connection_state = ConnectionState.HEALTHY
364
+ logger.info(f"Circuit breaker reset for tool '{self.tool_name}' after timeout")
365
+ return False
366
+
367
+ return True
368
+
215
369
  # ------------------------------------------------------------------ #
216
370
  # Legacy method name support
217
371
  async def _aexecute(self, timeout: Optional[float] = None, **kwargs: Any) -> Any:
@@ -219,25 +373,73 @@ class MCPTool:
219
373
  return await self.execute(timeout=timeout, **kwargs)
220
374
 
221
375
  # ------------------------------------------------------------------ #
222
- # Utility methods for debugging
376
+ # Utility and monitoring methods
223
377
  # ------------------------------------------------------------------ #
224
- def is_serializable(self) -> bool:
225
- """Check if this tool can be serialized (for debugging)."""
226
- try:
227
- import pickle
228
- pickle.dumps(self)
229
- return True
230
- except Exception:
231
- return False
378
+ def is_available(self) -> bool:
379
+ """Check if this tool is currently available."""
380
+ return (self._sm is not None and
381
+ not self._is_circuit_open() and
382
+ self.connection_state in [ConnectionState.HEALTHY, ConnectionState.DEGRADED])
232
383
 
233
- def get_serialization_info(self) -> Dict[str, Any]:
234
- """Get information about what would be serialized."""
235
- state = self.__getstate__()
384
+ def get_stats(self) -> Dict[str, Any]:
385
+ """Get connection and execution statistics."""
386
+ if not self.enable_resilience:
387
+ return {
388
+ "tool_name": self.tool_name,
389
+ "resilience_enabled": False,
390
+ "available": self._sm is not None
391
+ }
392
+
393
+ success_rate = 0.0
394
+ if self.stats.total_calls > 0:
395
+ success_rate = (self.stats.successful_calls / self.stats.total_calls) * 100
396
+
236
397
  return {
237
- "tool_name": state.get("tool_name"),
238
- "namespace": state.get("_namespace"),
239
- "servers": state.get("_servers"),
240
- "cfg_file": state.get("_cfg_file"),
241
- "has_stream_manager": state.get("_sm") is not None,
242
- "serializable_size": len(str(state))
243
- }
398
+ "tool_name": self.tool_name,
399
+ "resilience_enabled": True,
400
+ "available": self.is_available(),
401
+ "state": self.connection_state.value,
402
+ "circuit_open": self._circuit_open,
403
+ "total_calls": self.stats.total_calls,
404
+ "successful_calls": self.stats.successful_calls,
405
+ "failed_calls": self.stats.failed_calls,
406
+ "connection_errors": self.stats.connection_errors,
407
+ "success_rate": success_rate,
408
+ "consecutive_failures": self._consecutive_failures,
409
+ "has_stream_manager": self._sm is not None,
410
+ }
411
+
412
+ def reset_circuit_breaker(self) -> None:
413
+ """Manually reset the circuit breaker."""
414
+ if not self.enable_resilience:
415
+ return
416
+
417
+ self._circuit_open = False
418
+ self._circuit_open_time = None
419
+ self._consecutive_failures = 0
420
+ self.connection_state = ConnectionState.HEALTHY
421
+ logger.info(f"Circuit breaker manually reset for tool '{self.tool_name}'")
422
+
423
+ def disable_resilience(self) -> None:
424
+ """Disable resilience features for this tool instance."""
425
+ self.enable_resilience = False
426
+ logger.info(f"Resilience features disabled for tool '{self.tool_name}'")
427
+
428
+ def set_stream_manager(self, stream_manager: Optional[StreamManager]) -> None:
429
+ """
430
+ Set or update the StreamManager for this tool.
431
+
432
+ This can be used by external systems to reconnect tools after
433
+ StreamManager recovery at a higher level.
434
+ """
435
+ self._sm = stream_manager
436
+ if stream_manager is not None:
437
+ self.connection_state = ConnectionState.HEALTHY
438
+ if self._circuit_open:
439
+ self._circuit_open = False
440
+ self._circuit_open_time = None
441
+ logger.info(f"Circuit breaker closed for tool '{self.tool_name}' due to new stream manager")
442
+ else:
443
+ self.connection_state = ConnectionState.DISCONNECTED
444
+
445
+ logger.debug(f"StreamManager {'set' if stream_manager else 'cleared'} for tool '{self.tool_name}'")