chuk-tool-processor 0.6.13__py3-none-any.whl → 0.9.7__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of chuk-tool-processor might be problematic. Click here for more details.

Files changed (35) hide show
  1. chuk_tool_processor/core/__init__.py +31 -0
  2. chuk_tool_processor/core/exceptions.py +218 -12
  3. chuk_tool_processor/core/processor.py +38 -7
  4. chuk_tool_processor/execution/strategies/__init__.py +6 -0
  5. chuk_tool_processor/execution/strategies/subprocess_strategy.py +2 -1
  6. chuk_tool_processor/execution/wrappers/__init__.py +42 -0
  7. chuk_tool_processor/execution/wrappers/caching.py +48 -13
  8. chuk_tool_processor/execution/wrappers/circuit_breaker.py +370 -0
  9. chuk_tool_processor/execution/wrappers/rate_limiting.py +31 -1
  10. chuk_tool_processor/execution/wrappers/retry.py +93 -53
  11. chuk_tool_processor/logging/metrics.py +2 -2
  12. chuk_tool_processor/mcp/mcp_tool.py +5 -5
  13. chuk_tool_processor/mcp/setup_mcp_http_streamable.py +44 -2
  14. chuk_tool_processor/mcp/setup_mcp_sse.py +44 -2
  15. chuk_tool_processor/mcp/setup_mcp_stdio.py +2 -0
  16. chuk_tool_processor/mcp/stream_manager.py +130 -75
  17. chuk_tool_processor/mcp/transport/__init__.py +10 -0
  18. chuk_tool_processor/mcp/transport/http_streamable_transport.py +193 -108
  19. chuk_tool_processor/mcp/transport/models.py +100 -0
  20. chuk_tool_processor/mcp/transport/sse_transport.py +155 -59
  21. chuk_tool_processor/mcp/transport/stdio_transport.py +58 -10
  22. chuk_tool_processor/models/__init__.py +20 -0
  23. chuk_tool_processor/models/tool_call.py +34 -1
  24. chuk_tool_processor/models/tool_spec.py +350 -0
  25. chuk_tool_processor/models/validated_tool.py +22 -2
  26. chuk_tool_processor/observability/__init__.py +30 -0
  27. chuk_tool_processor/observability/metrics.py +312 -0
  28. chuk_tool_processor/observability/setup.py +105 -0
  29. chuk_tool_processor/observability/tracing.py +345 -0
  30. chuk_tool_processor/plugins/discovery.py +1 -1
  31. chuk_tool_processor-0.9.7.dist-info/METADATA +1813 -0
  32. {chuk_tool_processor-0.6.13.dist-info → chuk_tool_processor-0.9.7.dist-info}/RECORD +34 -27
  33. chuk_tool_processor-0.6.13.dist-info/METADATA +0 -698
  34. {chuk_tool_processor-0.6.13.dist-info → chuk_tool_processor-0.9.7.dist-info}/WHEEL +0 -0
  35. {chuk_tool_processor-0.6.13.dist-info → chuk_tool_processor-0.9.7.dist-info}/top_level.txt +0 -0
@@ -0,0 +1,370 @@
1
+ # chuk_tool_processor/execution/wrappers/circuit_breaker.py
2
+ """
3
+ Circuit breaker pattern for tool execution.
4
+
5
+ Prevents cascading failures by tracking failure rates and temporarily
6
+ blocking calls to failing tools. Implements a state machine:
7
+
8
+ CLOSED → OPEN → HALF_OPEN → CLOSED (or back to OPEN)
9
+
10
+ States:
11
+ - CLOSED: Normal operation, requests pass through
12
+ - OPEN: Too many failures, requests blocked immediately
13
+ - HALF_OPEN: Testing if service recovered, limited requests allowed
14
+ """
15
+
16
+ from __future__ import annotations
17
+
18
+ import asyncio
19
+ import time
20
+ from datetime import UTC, datetime
21
+ from enum import Enum
22
+ from typing import Any
23
+
24
+ from chuk_tool_processor.core.exceptions import ToolCircuitOpenError
25
+ from chuk_tool_processor.logging import get_logger
26
+ from chuk_tool_processor.models.tool_call import ToolCall
27
+ from chuk_tool_processor.models.tool_result import ToolResult
28
+
29
+ logger = get_logger("chuk_tool_processor.execution.wrappers.circuit_breaker")
30
+
31
+ # Optional observability imports
32
+ try:
33
+ from chuk_tool_processor.observability.metrics import get_metrics
34
+ from chuk_tool_processor.observability.tracing import trace_circuit_breaker
35
+
36
+ _observability_available = True
37
+ except ImportError:
38
+ _observability_available = False
39
+
40
+ # No-op functions when observability not available
41
+ def get_metrics():
42
+ return None
43
+
44
+ def trace_circuit_breaker(*_args, **_kwargs):
45
+ from contextlib import nullcontext
46
+
47
+ return nullcontext()
48
+
49
+
50
+ # --------------------------------------------------------------------------- #
51
+ # Circuit breaker state
52
+ # --------------------------------------------------------------------------- #
53
+ class CircuitState(str, Enum):
54
+ """Circuit breaker states."""
55
+
56
+ CLOSED = "closed" # Normal operation
57
+ OPEN = "open" # Blocking requests due to failures
58
+ HALF_OPEN = "half_open" # Testing recovery with limited requests
59
+
60
+
61
+ class CircuitBreakerConfig:
62
+ """Configuration for circuit breaker behavior."""
63
+
64
+ def __init__(
65
+ self,
66
+ failure_threshold: int = 5,
67
+ success_threshold: int = 2,
68
+ reset_timeout: float = 60.0,
69
+ half_open_max_calls: int = 1,
70
+ timeout_threshold: float | None = None,
71
+ ):
72
+ """
73
+ Initialize circuit breaker configuration.
74
+
75
+ Args:
76
+ failure_threshold: Number of failures before opening circuit
77
+ success_threshold: Number of successes in HALF_OPEN to close circuit
78
+ reset_timeout: Seconds to wait before trying HALF_OPEN
79
+ half_open_max_calls: Max concurrent calls in HALF_OPEN state
80
+ timeout_threshold: Optional timeout (s) to consider as failure
81
+ """
82
+ self.failure_threshold = failure_threshold
83
+ self.success_threshold = success_threshold
84
+ self.reset_timeout = reset_timeout
85
+ self.half_open_max_calls = half_open_max_calls
86
+ self.timeout_threshold = timeout_threshold
87
+
88
+
89
+ class CircuitBreakerState:
90
+ """Per-tool circuit breaker state tracking."""
91
+
92
+ def __init__(self, config: CircuitBreakerConfig):
93
+ self.config = config
94
+ self.state = CircuitState.CLOSED
95
+ self.failure_count = 0
96
+ self.success_count = 0
97
+ self.last_failure_time: float | None = None
98
+ self.opened_at: float | None = None
99
+ self.half_open_calls = 0
100
+ self._lock = asyncio.Lock()
101
+
102
+ async def record_success(self) -> None:
103
+ """Record a successful call."""
104
+ async with self._lock:
105
+ if self.state == CircuitState.HALF_OPEN:
106
+ self.success_count += 1
107
+ logger.debug(f"Circuit HALF_OPEN: success {self.success_count}/{self.config.success_threshold}")
108
+
109
+ # Enough successes? Close the circuit
110
+ if self.success_count >= self.config.success_threshold:
111
+ logger.info("Circuit breaker: Transitioning to CLOSED (service recovered)")
112
+ self.state = CircuitState.CLOSED
113
+ self.failure_count = 0
114
+ self.success_count = 0
115
+ self.opened_at = None
116
+ self.half_open_calls = 0
117
+ else:
118
+ # In CLOSED state, just reset failure count
119
+ self.failure_count = 0
120
+
121
+ async def record_failure(self) -> None:
122
+ """Record a failed call."""
123
+ async with self._lock:
124
+ self.failure_count += 1
125
+ self.last_failure_time = time.monotonic()
126
+ logger.debug(f"Circuit: failure {self.failure_count}/{self.config.failure_threshold}")
127
+
128
+ if self.state == CircuitState.CLOSED:
129
+ # Check if we should open
130
+ if self.failure_count >= self.config.failure_threshold:
131
+ logger.warning(f"Circuit breaker: OPENING after {self.failure_count} failures")
132
+ self.state = CircuitState.OPEN
133
+ self.opened_at = time.monotonic()
134
+ elif self.state == CircuitState.HALF_OPEN:
135
+ # Failed during test → back to OPEN
136
+ logger.warning("Circuit breaker: Back to OPEN (test failed)")
137
+ self.state = CircuitState.OPEN
138
+ self.success_count = 0
139
+ self.opened_at = time.monotonic()
140
+ self.half_open_calls = 0
141
+
142
+ async def can_execute(self) -> bool:
143
+ """Check if a call should be allowed through."""
144
+ async with self._lock:
145
+ if self.state == CircuitState.CLOSED:
146
+ return True
147
+
148
+ if self.state == CircuitState.HALF_OPEN:
149
+ # Limit concurrent calls in HALF_OPEN
150
+ if self.half_open_calls < self.config.half_open_max_calls:
151
+ self.half_open_calls += 1
152
+ return True
153
+ return False
154
+
155
+ # OPEN state: check if we should try HALF_OPEN
156
+ if self.opened_at is not None:
157
+ elapsed = time.monotonic() - self.opened_at
158
+ if elapsed >= self.config.reset_timeout:
159
+ logger.info("Circuit breaker: Transitioning to HALF_OPEN (testing recovery)")
160
+ self.state = CircuitState.HALF_OPEN
161
+ self.half_open_calls = 1
162
+ self.success_count = 0
163
+ return True
164
+
165
+ return False
166
+
167
+ async def release_half_open_slot(self) -> None:
168
+ """Release a HALF_OPEN slot after call completes."""
169
+ async with self._lock:
170
+ if self.state == CircuitState.HALF_OPEN:
171
+ self.half_open_calls = max(0, self.half_open_calls - 1)
172
+
173
+ def get_state(self) -> dict[str, Any]:
174
+ """Get current state as dict."""
175
+ return {
176
+ "state": self.state.value,
177
+ "failure_count": self.failure_count,
178
+ "success_count": self.success_count,
179
+ "opened_at": self.opened_at,
180
+ "time_until_half_open": (
181
+ max(0, self.config.reset_timeout - (time.monotonic() - self.opened_at))
182
+ if self.opened_at and self.state == CircuitState.OPEN
183
+ else None
184
+ ),
185
+ }
186
+
187
+
188
+ # --------------------------------------------------------------------------- #
189
+ # Circuit breaker executor wrapper
190
+ # --------------------------------------------------------------------------- #
191
+ class CircuitBreakerExecutor:
192
+ """
193
+ Executor wrapper that implements circuit breaker pattern.
194
+
195
+ Tracks failures per tool and opens circuit breakers to prevent
196
+ cascading failures when tools are consistently failing.
197
+ """
198
+
199
+ def __init__(
200
+ self,
201
+ executor: Any,
202
+ *,
203
+ default_config: CircuitBreakerConfig | None = None,
204
+ tool_configs: dict[str, CircuitBreakerConfig] | None = None,
205
+ ):
206
+ """
207
+ Initialize circuit breaker executor.
208
+
209
+ Args:
210
+ executor: Underlying executor to wrap
211
+ default_config: Default circuit breaker configuration
212
+ tool_configs: Per-tool circuit breaker configurations
213
+ """
214
+ self.executor = executor
215
+ self.default_config = default_config or CircuitBreakerConfig()
216
+ self.tool_configs = tool_configs or {}
217
+ self._states: dict[str, CircuitBreakerState] = {}
218
+ self._states_lock = asyncio.Lock()
219
+
220
+ async def _get_state(self, tool: str) -> CircuitBreakerState:
221
+ """Get or create circuit breaker state for a tool."""
222
+ if tool not in self._states:
223
+ async with self._states_lock:
224
+ if tool not in self._states:
225
+ config = self.tool_configs.get(tool, self.default_config)
226
+ self._states[tool] = CircuitBreakerState(config)
227
+ return self._states[tool]
228
+
229
+ async def execute(
230
+ self,
231
+ calls: list[ToolCall],
232
+ *,
233
+ timeout: float | None = None,
234
+ use_cache: bool = True,
235
+ ) -> list[ToolResult]:
236
+ """
237
+ Execute tool calls with circuit breaker protection.
238
+
239
+ Args:
240
+ calls: List of tool calls to execute
241
+ timeout: Optional timeout for execution
242
+ use_cache: Whether to use cached results
243
+
244
+ Returns:
245
+ List of tool results
246
+ """
247
+ if not calls:
248
+ return []
249
+
250
+ results: list[ToolResult] = []
251
+
252
+ for call in calls:
253
+ state = await self._get_state(call.tool)
254
+
255
+ # Record circuit breaker state
256
+ metrics = get_metrics()
257
+ if metrics:
258
+ metrics.record_circuit_breaker_state(call.tool, state.state.value)
259
+
260
+ # Check if circuit allows execution with tracing
261
+ with trace_circuit_breaker(call.tool, state.state.value):
262
+ can_execute = await state.can_execute()
263
+
264
+ if not can_execute:
265
+ # Circuit is OPEN - reject immediately
266
+ state_info = state.get_state()
267
+ logger.warning(f"Circuit breaker OPEN for {call.tool} (failures: {state.failure_count})")
268
+
269
+ reset_time = state_info.get("time_until_half_open")
270
+ error = ToolCircuitOpenError(
271
+ tool_name=call.tool,
272
+ failure_count=state.failure_count,
273
+ reset_timeout=reset_time,
274
+ )
275
+
276
+ now = datetime.now(UTC)
277
+ results.append(
278
+ ToolResult(
279
+ tool=call.tool,
280
+ result=None,
281
+ error=str(error),
282
+ start_time=now,
283
+ end_time=now,
284
+ machine="circuit_breaker",
285
+ pid=0,
286
+ )
287
+ )
288
+ continue
289
+
290
+ # Execute the call
291
+ start_time = time.monotonic()
292
+ try:
293
+ # Execute single call
294
+ executor_kwargs = {"timeout": timeout}
295
+ if hasattr(self.executor, "use_cache"):
296
+ executor_kwargs["use_cache"] = use_cache
297
+
298
+ result_list = await self.executor.execute([call], **executor_kwargs)
299
+ result = result_list[0]
300
+
301
+ # Check if successful
302
+ duration = time.monotonic() - start_time
303
+
304
+ # Determine success/failure
305
+ is_timeout = state.config.timeout_threshold is not None and duration > state.config.timeout_threshold
306
+ is_error = result.error is not None
307
+
308
+ if is_error or is_timeout:
309
+ await state.record_failure()
310
+ # Record circuit breaker failure metric
311
+ if metrics:
312
+ metrics.record_circuit_breaker_failure(call.tool)
313
+ else:
314
+ await state.record_success()
315
+
316
+ results.append(result)
317
+
318
+ except Exception as e:
319
+ # Exception during execution
320
+ await state.record_failure()
321
+
322
+ now = datetime.now(UTC)
323
+ results.append(
324
+ ToolResult(
325
+ tool=call.tool,
326
+ result=None,
327
+ error=f"Circuit breaker caught exception: {str(e)}",
328
+ start_time=now,
329
+ end_time=now,
330
+ machine="circuit_breaker",
331
+ pid=0,
332
+ )
333
+ )
334
+
335
+ finally:
336
+ # Release HALF_OPEN slot if applicable
337
+ if state.state == CircuitState.HALF_OPEN:
338
+ await state.release_half_open_slot()
339
+
340
+ return results
341
+
342
+ async def get_circuit_states(self) -> dict[str, dict[str, Any]]:
343
+ """
344
+ Get current state of all circuit breakers.
345
+
346
+ Returns:
347
+ Dict mapping tool name to state info
348
+ """
349
+ states = {}
350
+ async with self._states_lock:
351
+ for tool, state in self._states.items():
352
+ states[tool] = state.get_state()
353
+ return states
354
+
355
+ async def reset_circuit(self, tool: str) -> None:
356
+ """
357
+ Manually reset a circuit breaker.
358
+
359
+ Args:
360
+ tool: Tool name to reset
361
+ """
362
+ if tool in self._states:
363
+ state = self._states[tool]
364
+ async with state._lock:
365
+ state.state = CircuitState.CLOSED
366
+ state.failure_count = 0
367
+ state.success_count = 0
368
+ state.opened_at = None
369
+ state.half_open_calls = 0
370
+ logger.info(f"Manually reset circuit breaker for {tool}")
@@ -25,6 +25,24 @@ from chuk_tool_processor.models.tool_result import ToolResult
25
25
 
26
26
  logger = get_logger("chuk_tool_processor.execution.wrappers.rate_limiting")
27
27
 
28
+ # Optional observability imports
29
+ try:
30
+ from chuk_tool_processor.observability.metrics import get_metrics
31
+ from chuk_tool_processor.observability.tracing import trace_rate_limit
32
+
33
+ _observability_available = True
34
+ except ImportError:
35
+ _observability_available = False
36
+
37
+ # No-op functions when observability not available
38
+ def get_metrics():
39
+ return None
40
+
41
+ def trace_rate_limit(*_args, **_kwargs):
42
+ from contextlib import nullcontext
43
+
44
+ return nullcontext()
45
+
28
46
 
29
47
  # --------------------------------------------------------------------------- #
30
48
  # Core limiter
@@ -220,8 +238,20 @@ class RateLimitedToolExecutor:
220
238
  return []
221
239
 
222
240
  # Block for each call *before* dispatching to the wrapped executor
241
+ metrics = get_metrics()
242
+
223
243
  for c in calls:
224
- await self.limiter.wait(c.tool)
244
+ # Check limits first for metrics
245
+ global_limited, tool_limited = await self.limiter.check_limits(c.tool)
246
+ allowed = not (global_limited or tool_limited)
247
+
248
+ # Trace rate limit check
249
+ with trace_rate_limit(c.tool, allowed):
250
+ await self.limiter.wait(c.tool)
251
+
252
+ # Record metrics
253
+ if metrics:
254
+ metrics.record_rate_limit_check(c.tool, allowed)
225
255
 
226
256
  # Check if the executor has a use_cache parameter
227
257
  if hasattr(self.executor, "execute"):
@@ -21,6 +21,24 @@ from chuk_tool_processor.models.tool_result import ToolResult
21
21
 
22
22
  logger = get_logger("chuk_tool_processor.execution.wrappers.retry")
23
23
 
24
+ # Optional observability imports
25
+ try:
26
+ from chuk_tool_processor.observability.metrics import get_metrics
27
+ from chuk_tool_processor.observability.tracing import trace_retry_attempt
28
+
29
+ _observability_available = True
30
+ except ImportError:
31
+ _observability_available = False
32
+
33
+ # No-op functions when observability not available
34
+ def get_metrics():
35
+ return None
36
+
37
+ def trace_retry_attempt(*_args, **_kwargs):
38
+ from contextlib import nullcontext
39
+
40
+ return nullcontext()
41
+
24
42
 
25
43
  # --------------------------------------------------------------------------- #
26
44
  # Retry configuration
@@ -36,6 +54,7 @@ class RetryConfig:
36
54
  jitter: bool = True,
37
55
  retry_on_exceptions: list[type[Exception]] | None = None,
38
56
  retry_on_error_substrings: list[str] | None = None,
57
+ skip_retry_on_error_substrings: list[str] | None = None,
39
58
  ):
40
59
  if max_retries < 0:
41
60
  raise ValueError("max_retries cannot be negative")
@@ -45,6 +64,7 @@ class RetryConfig:
45
64
  self.jitter = jitter
46
65
  self.retry_on_exceptions = retry_on_exceptions or []
47
66
  self.retry_on_error_substrings = retry_on_error_substrings or []
67
+ self.skip_retry_on_error_substrings = skip_retry_on_error_substrings or []
48
68
 
49
69
  # --------------------------------------------------------------------- #
50
70
  # Decision helpers
@@ -60,6 +80,14 @@ class RetryConfig:
60
80
  if attempt >= self.max_retries:
61
81
  return False
62
82
 
83
+ # Check skip list first - these errors should never be retried
84
+ # (e.g., OAuth errors that need to be handled at transport layer)
85
+ if error_str and self.skip_retry_on_error_substrings:
86
+ error_lower = error_str.lower()
87
+ if any(skip_pattern.lower() in error_lower for skip_pattern in self.skip_retry_on_error_substrings):
88
+ logger.debug(f"Skipping retry for error matching skip pattern: {error_str[:100]}")
89
+ return False
90
+
63
91
  # Nothing specified → always retry until max_retries reached
64
92
  if not self.retry_on_exceptions and not self.retry_on_error_substrings:
65
93
  return True
@@ -167,63 +195,73 @@ class RetryableToolExecutor:
167
195
  # Execute one attempt
168
196
  # ---------------------------------------------------------------- #
169
197
  start_time = datetime.now(UTC)
170
- try:
171
- kwargs = {"timeout": remaining} if remaining is not None else {}
172
- if hasattr(self.executor, "use_cache"):
173
- kwargs["use_cache"] = use_cache
174
198
 
175
- result = (await self.executor.execute([call], **kwargs))[0]
176
- pid = result.pid
177
- machine = result.machine
178
-
179
- # Success?
180
- if not result.error:
199
+ # Trace retry attempt
200
+ with trace_retry_attempt(call.tool, attempt, cfg.max_retries):
201
+ try:
202
+ kwargs = {"timeout": remaining} if remaining is not None else {}
203
+ if hasattr(self.executor, "use_cache"):
204
+ kwargs["use_cache"] = use_cache
205
+
206
+ result = (await self.executor.execute([call], **kwargs))[0]
207
+ pid = result.pid
208
+ machine = result.machine
209
+
210
+ # Record retry metrics
211
+ metrics = get_metrics()
212
+ success = result.error is None
213
+
214
+ if metrics:
215
+ metrics.record_retry_attempt(call.tool, attempt, success)
216
+
217
+ # Success?
218
+ if success:
219
+ result.attempts = attempt + 1
220
+ return result
221
+
222
+ # Error: decide on retry
223
+ last_error = result.error
224
+ if cfg.should_retry(attempt, error_str=result.error):
225
+ delay = cfg.get_delay(attempt)
226
+ # never overshoot the deadline
227
+ if deadline is not None:
228
+ delay = min(delay, max(deadline - time.monotonic(), 0))
229
+ if delay:
230
+ await asyncio.sleep(delay)
231
+ attempt += 1
232
+ continue
233
+
234
+ # No more retries wanted
235
+ result.error = self._wrap_error(last_error, attempt, cfg)
181
236
  result.attempts = attempt + 1
182
237
  return result
183
238
 
184
- # Error: decide on retry
185
- last_error = result.error
186
- if cfg.should_retry(attempt, error_str=result.error):
187
- delay = cfg.get_delay(attempt)
188
- # never overshoot the deadline
189
- if deadline is not None:
190
- delay = min(delay, max(deadline - time.monotonic(), 0))
191
- if delay:
192
- await asyncio.sleep(delay)
193
- attempt += 1
194
- continue
195
-
196
- # No more retries wanted
197
- result.error = self._wrap_error(last_error, attempt, cfg)
198
- result.attempts = attempt + 1
199
- return result
200
-
201
- # ---------------------------------------------------------------- #
202
- # Exception path
203
- # ---------------------------------------------------------------- #
204
- except Exception as exc: # noqa: BLE001
205
- err_str = str(exc)
206
- last_error = err_str
207
- if cfg.should_retry(attempt, error=exc):
208
- delay = cfg.get_delay(attempt)
209
- if deadline is not None:
210
- delay = min(delay, max(deadline - time.monotonic(), 0))
211
- if delay:
212
- await asyncio.sleep(delay)
213
- attempt += 1
214
- continue
215
-
216
- end_time = datetime.now(UTC)
217
- return ToolResult(
218
- tool=call.tool,
219
- result=None,
220
- error=self._wrap_error(err_str, attempt, cfg),
221
- start_time=start_time,
222
- end_time=end_time,
223
- machine=machine,
224
- pid=pid,
225
- attempts=attempt + 1,
226
- )
239
+ # ---------------------------------------------------------------- #
240
+ # Exception path
241
+ # ---------------------------------------------------------------- #
242
+ except Exception as exc: # noqa: BLE001
243
+ err_str = str(exc)
244
+ last_error = err_str
245
+ if cfg.should_retry(attempt, error=exc, error_str=err_str):
246
+ delay = cfg.get_delay(attempt)
247
+ if deadline is not None:
248
+ delay = min(delay, max(deadline - time.monotonic(), 0))
249
+ if delay:
250
+ await asyncio.sleep(delay)
251
+ attempt += 1
252
+ continue
253
+
254
+ end_time = datetime.now(UTC)
255
+ return ToolResult(
256
+ tool=call.tool,
257
+ result=None,
258
+ error=self._wrap_error(err_str, attempt, cfg),
259
+ start_time=start_time,
260
+ end_time=end_time,
261
+ machine=machine,
262
+ pid=pid,
263
+ attempts=attempt + 1,
264
+ )
227
265
 
228
266
  # --------------------------------------------------------------------- #
229
267
  # Helpers
@@ -246,6 +284,7 @@ def retryable(
246
284
  jitter: bool = True,
247
285
  retry_on_exceptions: list[type[Exception]] | None = None,
248
286
  retry_on_error_substrings: list[str] | None = None,
287
+ skip_retry_on_error_substrings: list[str] | None = None,
249
288
  ):
250
289
  """
251
290
  Class decorator that attaches a :class:`RetryConfig` to a *tool* class.
@@ -267,6 +306,7 @@ def retryable(
267
306
  jitter=jitter,
268
307
  retry_on_exceptions=retry_on_exceptions,
269
308
  retry_on_error_substrings=retry_on_error_substrings,
309
+ skip_retry_on_error_substrings=skip_retry_on_error_substrings,
270
310
  )
271
311
  return cls
272
312
 
@@ -45,7 +45,7 @@ class MetricsLogger:
45
45
  cached: Whether the result was retrieved from cache
46
46
  attempts: Number of execution attempts
47
47
  """
48
- self.logger.info(
48
+ self.logger.debug(
49
49
  f"Tool execution metric: {tool}",
50
50
  extra={
51
51
  "context": {
@@ -76,7 +76,7 @@ class MetricsLogger:
76
76
  duration: Parsing duration in seconds
77
77
  num_calls: Number of tool calls parsed
78
78
  """
79
- self.logger.info(
79
+ self.logger.debug(
80
80
  f"Parser metric: {parser}",
81
81
  extra={
82
82
  "context": {
@@ -370,7 +370,7 @@ class MCPTool:
370
370
  self._circuit_open = False
371
371
  self._circuit_open_time = None
372
372
  self.connection_state = ConnectionState.HEALTHY
373
- logger.info(f"Circuit breaker closed for tool '{self.tool_name}' after successful execution")
373
+ logger.debug(f"Circuit breaker closed for tool '{self.tool_name}' after successful execution")
374
374
 
375
375
  async def _record_failure(self, is_connection_error: bool = False) -> None:
376
376
  """Record a failed execution."""
@@ -407,7 +407,7 @@ class MCPTool:
407
407
  self._circuit_open = False
408
408
  self._circuit_open_time = None
409
409
  self.connection_state = ConnectionState.HEALTHY
410
- logger.info(f"Circuit breaker reset for tool '{self.tool_name}' after timeout")
410
+ logger.debug(f"Circuit breaker reset for tool '{self.tool_name}' after timeout")
411
411
  return False
412
412
 
413
413
  return True
@@ -462,12 +462,12 @@ class MCPTool:
462
462
  self._circuit_open_time = None
463
463
  self._consecutive_failures = 0
464
464
  self.connection_state = ConnectionState.HEALTHY
465
- logger.info(f"Circuit breaker manually reset for tool '{self.tool_name}'")
465
+ logger.debug(f"Circuit breaker manually reset for tool '{self.tool_name}'")
466
466
 
467
467
  def disable_resilience(self) -> None:
468
468
  """Disable resilience features for this tool instance."""
469
469
  self.enable_resilience = False
470
- logger.info(f"Resilience features disabled for tool '{self.tool_name}'")
470
+ logger.debug(f"Resilience features disabled for tool '{self.tool_name}'")
471
471
 
472
472
  def set_stream_manager(self, stream_manager: StreamManager | None) -> None:
473
473
  """
@@ -482,7 +482,7 @@ class MCPTool:
482
482
  if self._circuit_open:
483
483
  self._circuit_open = False
484
484
  self._circuit_open_time = None
485
- logger.info(f"Circuit breaker closed for tool '{self.tool_name}' due to new stream manager")
485
+ logger.debug(f"Circuit breaker closed for tool '{self.tool_name}' due to new stream manager")
486
486
  else:
487
487
  self.connection_state = ConnectionState.DISCONNECTED
488
488