code-puppy 0.0.127__py3-none-any.whl → 0.0.128__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- code_puppy/__init__.py +1 -0
- code_puppy/agent.py +65 -69
- code_puppy/agents/agent_code_puppy.py +0 -3
- code_puppy/agents/runtime_manager.py +212 -0
- code_puppy/command_line/command_handler.py +56 -25
- code_puppy/command_line/mcp_commands.py +1298 -0
- code_puppy/command_line/meta_command_handler.py +3 -2
- code_puppy/command_line/model_picker_completion.py +21 -8
- code_puppy/main.py +52 -157
- code_puppy/mcp/__init__.py +23 -0
- code_puppy/mcp/async_lifecycle.py +237 -0
- code_puppy/mcp/circuit_breaker.py +218 -0
- code_puppy/mcp/config_wizard.py +437 -0
- code_puppy/mcp/dashboard.py +291 -0
- code_puppy/mcp/error_isolation.py +360 -0
- code_puppy/mcp/examples/retry_example.py +208 -0
- code_puppy/mcp/health_monitor.py +549 -0
- code_puppy/mcp/managed_server.py +346 -0
- code_puppy/mcp/manager.py +701 -0
- code_puppy/mcp/registry.py +412 -0
- code_puppy/mcp/retry_manager.py +321 -0
- code_puppy/mcp/server_registry_catalog.py +751 -0
- code_puppy/mcp/status_tracker.py +355 -0
- code_puppy/messaging/spinner/textual_spinner.py +6 -2
- code_puppy/model_factory.py +19 -4
- code_puppy/models.json +8 -6
- code_puppy/tui/app.py +19 -27
- code_puppy/tui/tests/test_agent_command.py +22 -15
- {code_puppy-0.0.127.data → code_puppy-0.0.128.data}/data/code_puppy/models.json +8 -6
- {code_puppy-0.0.127.dist-info → code_puppy-0.0.128.dist-info}/METADATA +2 -3
- {code_puppy-0.0.127.dist-info → code_puppy-0.0.128.dist-info}/RECORD +34 -18
- {code_puppy-0.0.127.dist-info → code_puppy-0.0.128.dist-info}/WHEEL +0 -0
- {code_puppy-0.0.127.dist-info → code_puppy-0.0.128.dist-info}/entry_points.txt +0 -0
- {code_puppy-0.0.127.dist-info → code_puppy-0.0.128.dist-info}/licenses/LICENSE +0 -0
|
@@ -0,0 +1,321 @@
|
|
|
1
|
+
"""
|
|
2
|
+
Retry manager for MCP server communication with various backoff strategies.
|
|
3
|
+
|
|
4
|
+
This module provides retry logic for handling transient failures in MCP server
|
|
5
|
+
communication with intelligent backoff strategies to prevent overwhelming failed servers.
|
|
6
|
+
"""
|
|
7
|
+
|
|
8
|
+
import asyncio
|
|
9
|
+
import logging
|
|
10
|
+
import random
|
|
11
|
+
from dataclasses import dataclass, field
|
|
12
|
+
from datetime import datetime, timedelta
|
|
13
|
+
from typing import Any, Callable, Dict, Optional
|
|
14
|
+
from collections import defaultdict
|
|
15
|
+
import httpx
|
|
16
|
+
|
|
17
|
+
logger = logging.getLogger(__name__)
|
|
18
|
+
|
|
19
|
+
|
|
20
|
+
@dataclass
|
|
21
|
+
class RetryStats:
|
|
22
|
+
"""Statistics for retry operations per server."""
|
|
23
|
+
total_retries: int = 0
|
|
24
|
+
successful_retries: int = 0
|
|
25
|
+
failed_retries: int = 0
|
|
26
|
+
average_attempts: float = 0.0
|
|
27
|
+
last_retry: Optional[datetime] = None
|
|
28
|
+
|
|
29
|
+
def calculate_average(self, new_attempts: int) -> None:
|
|
30
|
+
"""Update the average attempts calculation."""
|
|
31
|
+
if self.total_retries == 0:
|
|
32
|
+
self.average_attempts = new_attempts
|
|
33
|
+
else:
|
|
34
|
+
total_attempts = (self.average_attempts * self.total_retries) + new_attempts
|
|
35
|
+
self.average_attempts = total_attempts / (self.total_retries + 1)
|
|
36
|
+
|
|
37
|
+
|
|
38
|
+
class RetryManager:
|
|
39
|
+
"""
|
|
40
|
+
Manages retry logic for MCP server operations with various backoff strategies.
|
|
41
|
+
|
|
42
|
+
Supports different backoff strategies and intelligent retry decisions based on
|
|
43
|
+
error types. Tracks retry statistics per server for monitoring.
|
|
44
|
+
"""
|
|
45
|
+
|
|
46
|
+
def __init__(self):
|
|
47
|
+
"""Initialize the retry manager."""
|
|
48
|
+
self._stats: Dict[str, RetryStats] = defaultdict(RetryStats)
|
|
49
|
+
self._lock = asyncio.Lock()
|
|
50
|
+
|
|
51
|
+
async def retry_with_backoff(
|
|
52
|
+
self,
|
|
53
|
+
func: Callable,
|
|
54
|
+
max_attempts: int = 3,
|
|
55
|
+
strategy: str = "exponential",
|
|
56
|
+
server_id: str = "unknown"
|
|
57
|
+
) -> Any:
|
|
58
|
+
"""
|
|
59
|
+
Execute a function with retry logic and backoff strategy.
|
|
60
|
+
|
|
61
|
+
Args:
|
|
62
|
+
func: The async function to execute
|
|
63
|
+
max_attempts: Maximum number of retry attempts
|
|
64
|
+
strategy: Backoff strategy ('fixed', 'linear', 'exponential', 'exponential_jitter')
|
|
65
|
+
server_id: ID of the server for tracking stats
|
|
66
|
+
|
|
67
|
+
Returns:
|
|
68
|
+
The result of the function call
|
|
69
|
+
|
|
70
|
+
Raises:
|
|
71
|
+
The last exception encountered if all retries fail
|
|
72
|
+
"""
|
|
73
|
+
last_exception = None
|
|
74
|
+
|
|
75
|
+
for attempt in range(max_attempts):
|
|
76
|
+
try:
|
|
77
|
+
result = await func()
|
|
78
|
+
|
|
79
|
+
# Record successful retry if this wasn't the first attempt
|
|
80
|
+
if attempt > 0:
|
|
81
|
+
await self.record_retry(server_id, attempt + 1, success=True)
|
|
82
|
+
|
|
83
|
+
return result
|
|
84
|
+
|
|
85
|
+
except Exception as e:
|
|
86
|
+
last_exception = e
|
|
87
|
+
|
|
88
|
+
# Check if this error is retryable
|
|
89
|
+
if not self.should_retry(e):
|
|
90
|
+
logger.info(
|
|
91
|
+
f"Non-retryable error for server {server_id}: {type(e).__name__}: {e}"
|
|
92
|
+
)
|
|
93
|
+
await self.record_retry(server_id, attempt + 1, success=False)
|
|
94
|
+
raise e
|
|
95
|
+
|
|
96
|
+
# If this is the last attempt, don't wait
|
|
97
|
+
if attempt == max_attempts - 1:
|
|
98
|
+
await self.record_retry(server_id, max_attempts, success=False)
|
|
99
|
+
break
|
|
100
|
+
|
|
101
|
+
# Calculate backoff delay
|
|
102
|
+
delay = self.calculate_backoff(attempt + 1, strategy)
|
|
103
|
+
|
|
104
|
+
logger.warning(
|
|
105
|
+
f"Attempt {attempt + 1}/{max_attempts} failed for server {server_id}: "
|
|
106
|
+
f"{type(e).__name__}: {e}. Retrying in {delay:.2f}s"
|
|
107
|
+
)
|
|
108
|
+
|
|
109
|
+
# Wait before retrying
|
|
110
|
+
await asyncio.sleep(delay)
|
|
111
|
+
|
|
112
|
+
# All attempts failed
|
|
113
|
+
logger.error(
|
|
114
|
+
f"All {max_attempts} attempts failed for server {server_id}. "
|
|
115
|
+
f"Last error: {type(last_exception).__name__}: {last_exception}"
|
|
116
|
+
)
|
|
117
|
+
raise last_exception
|
|
118
|
+
|
|
119
|
+
def calculate_backoff(self, attempt: int, strategy: str) -> float:
|
|
120
|
+
"""
|
|
121
|
+
Calculate backoff delay based on attempt number and strategy.
|
|
122
|
+
|
|
123
|
+
Args:
|
|
124
|
+
attempt: The current attempt number (1-based)
|
|
125
|
+
strategy: The backoff strategy to use
|
|
126
|
+
|
|
127
|
+
Returns:
|
|
128
|
+
Delay in seconds
|
|
129
|
+
"""
|
|
130
|
+
if strategy == "fixed":
|
|
131
|
+
return 1.0
|
|
132
|
+
|
|
133
|
+
elif strategy == "linear":
|
|
134
|
+
return float(attempt)
|
|
135
|
+
|
|
136
|
+
elif strategy == "exponential":
|
|
137
|
+
return 2.0 ** (attempt - 1)
|
|
138
|
+
|
|
139
|
+
elif strategy == "exponential_jitter":
|
|
140
|
+
base_delay = 2.0 ** (attempt - 1)
|
|
141
|
+
jitter = random.uniform(-0.25, 0.25) # ±25% jitter
|
|
142
|
+
return max(0.1, base_delay * (1 + jitter))
|
|
143
|
+
|
|
144
|
+
else:
|
|
145
|
+
logger.warning(f"Unknown backoff strategy: {strategy}, using exponential")
|
|
146
|
+
return 2.0 ** (attempt - 1)
|
|
147
|
+
|
|
148
|
+
def should_retry(self, error: Exception) -> bool:
|
|
149
|
+
"""
|
|
150
|
+
Determine if an error is retryable.
|
|
151
|
+
|
|
152
|
+
Args:
|
|
153
|
+
error: The exception to evaluate
|
|
154
|
+
|
|
155
|
+
Returns:
|
|
156
|
+
True if the error is retryable, False otherwise
|
|
157
|
+
"""
|
|
158
|
+
# Network timeouts and connection errors are retryable
|
|
159
|
+
if isinstance(error, (asyncio.TimeoutError, ConnectionError, OSError)):
|
|
160
|
+
return True
|
|
161
|
+
|
|
162
|
+
# HTTP errors
|
|
163
|
+
if isinstance(error, httpx.HTTPError):
|
|
164
|
+
if isinstance(error, httpx.TimeoutException):
|
|
165
|
+
return True
|
|
166
|
+
elif isinstance(error, httpx.ConnectError):
|
|
167
|
+
return True
|
|
168
|
+
elif isinstance(error, httpx.ReadError):
|
|
169
|
+
return True
|
|
170
|
+
elif hasattr(error, 'response') and error.response is not None:
|
|
171
|
+
status_code = error.response.status_code
|
|
172
|
+
# 5xx server errors are retryable
|
|
173
|
+
if 500 <= status_code < 600:
|
|
174
|
+
return True
|
|
175
|
+
# Rate limit errors are retryable (with longer backoff)
|
|
176
|
+
if status_code == 429:
|
|
177
|
+
return True
|
|
178
|
+
# 4xx client errors are generally not retryable
|
|
179
|
+
# except for specific cases like 408 (timeout)
|
|
180
|
+
if status_code == 408:
|
|
181
|
+
return True
|
|
182
|
+
return False
|
|
183
|
+
|
|
184
|
+
# JSON decode errors might be transient
|
|
185
|
+
if isinstance(error, ValueError) and "json" in str(error).lower():
|
|
186
|
+
return True
|
|
187
|
+
|
|
188
|
+
# Authentication and authorization errors are not retryable
|
|
189
|
+
error_str = str(error).lower()
|
|
190
|
+
if any(term in error_str for term in ["unauthorized", "forbidden", "authentication", "permission"]):
|
|
191
|
+
return False
|
|
192
|
+
|
|
193
|
+
# Schema validation errors are not retryable
|
|
194
|
+
if "schema" in error_str or "validation" in error_str:
|
|
195
|
+
return False
|
|
196
|
+
|
|
197
|
+
# By default, consider other errors as potentially retryable
|
|
198
|
+
# This is conservative but helps handle unknown transient issues
|
|
199
|
+
return True
|
|
200
|
+
|
|
201
|
+
async def record_retry(self, server_id: str, attempts: int, success: bool) -> None:
|
|
202
|
+
"""
|
|
203
|
+
Record retry statistics for a server.
|
|
204
|
+
|
|
205
|
+
Args:
|
|
206
|
+
server_id: ID of the server
|
|
207
|
+
attempts: Number of attempts made
|
|
208
|
+
success: Whether the retry was successful
|
|
209
|
+
"""
|
|
210
|
+
async with self._lock:
|
|
211
|
+
stats = self._stats[server_id]
|
|
212
|
+
stats.total_retries += 1
|
|
213
|
+
stats.last_retry = datetime.now()
|
|
214
|
+
|
|
215
|
+
if success:
|
|
216
|
+
stats.successful_retries += 1
|
|
217
|
+
else:
|
|
218
|
+
stats.failed_retries += 1
|
|
219
|
+
|
|
220
|
+
stats.calculate_average(attempts)
|
|
221
|
+
|
|
222
|
+
async def get_retry_stats(self, server_id: str) -> RetryStats:
|
|
223
|
+
"""
|
|
224
|
+
Get retry statistics for a server.
|
|
225
|
+
|
|
226
|
+
Args:
|
|
227
|
+
server_id: ID of the server
|
|
228
|
+
|
|
229
|
+
Returns:
|
|
230
|
+
RetryStats object with current statistics
|
|
231
|
+
"""
|
|
232
|
+
async with self._lock:
|
|
233
|
+
# Return a copy to avoid external modification
|
|
234
|
+
stats = self._stats[server_id]
|
|
235
|
+
return RetryStats(
|
|
236
|
+
total_retries=stats.total_retries,
|
|
237
|
+
successful_retries=stats.successful_retries,
|
|
238
|
+
failed_retries=stats.failed_retries,
|
|
239
|
+
average_attempts=stats.average_attempts,
|
|
240
|
+
last_retry=stats.last_retry
|
|
241
|
+
)
|
|
242
|
+
|
|
243
|
+
async def get_all_stats(self) -> Dict[str, RetryStats]:
|
|
244
|
+
"""
|
|
245
|
+
Get retry statistics for all servers.
|
|
246
|
+
|
|
247
|
+
Returns:
|
|
248
|
+
Dictionary mapping server IDs to their retry statistics
|
|
249
|
+
"""
|
|
250
|
+
async with self._lock:
|
|
251
|
+
return {
|
|
252
|
+
server_id: RetryStats(
|
|
253
|
+
total_retries=stats.total_retries,
|
|
254
|
+
successful_retries=stats.successful_retries,
|
|
255
|
+
failed_retries=stats.failed_retries,
|
|
256
|
+
average_attempts=stats.average_attempts,
|
|
257
|
+
last_retry=stats.last_retry
|
|
258
|
+
)
|
|
259
|
+
for server_id, stats in self._stats.items()
|
|
260
|
+
}
|
|
261
|
+
|
|
262
|
+
async def clear_stats(self, server_id: str) -> None:
|
|
263
|
+
"""
|
|
264
|
+
Clear retry statistics for a server.
|
|
265
|
+
|
|
266
|
+
Args:
|
|
267
|
+
server_id: ID of the server
|
|
268
|
+
"""
|
|
269
|
+
async with self._lock:
|
|
270
|
+
if server_id in self._stats:
|
|
271
|
+
del self._stats[server_id]
|
|
272
|
+
|
|
273
|
+
async def clear_all_stats(self) -> None:
|
|
274
|
+
"""Clear retry statistics for all servers."""
|
|
275
|
+
async with self._lock:
|
|
276
|
+
self._stats.clear()
|
|
277
|
+
|
|
278
|
+
|
|
279
|
+
# Global retry manager instance
|
|
280
|
+
_retry_manager_instance: Optional[RetryManager] = None
|
|
281
|
+
|
|
282
|
+
|
|
283
|
+
def get_retry_manager() -> RetryManager:
|
|
284
|
+
"""
|
|
285
|
+
Get the global retry manager instance (singleton pattern).
|
|
286
|
+
|
|
287
|
+
Returns:
|
|
288
|
+
The global RetryManager instance
|
|
289
|
+
"""
|
|
290
|
+
global _retry_manager_instance
|
|
291
|
+
if _retry_manager_instance is None:
|
|
292
|
+
_retry_manager_instance = RetryManager()
|
|
293
|
+
return _retry_manager_instance
|
|
294
|
+
|
|
295
|
+
|
|
296
|
+
# Convenience function for common retry patterns
|
|
297
|
+
async def retry_mcp_call(
|
|
298
|
+
func: Callable,
|
|
299
|
+
server_id: str,
|
|
300
|
+
max_attempts: int = 3,
|
|
301
|
+
strategy: str = "exponential_jitter"
|
|
302
|
+
) -> Any:
|
|
303
|
+
"""
|
|
304
|
+
Convenience function for retrying MCP calls with sensible defaults.
|
|
305
|
+
|
|
306
|
+
Args:
|
|
307
|
+
func: The async function to execute
|
|
308
|
+
server_id: ID of the server for tracking
|
|
309
|
+
max_attempts: Maximum retry attempts
|
|
310
|
+
strategy: Backoff strategy
|
|
311
|
+
|
|
312
|
+
Returns:
|
|
313
|
+
The result of the function call
|
|
314
|
+
"""
|
|
315
|
+
retry_manager = get_retry_manager()
|
|
316
|
+
return await retry_manager.retry_with_backoff(
|
|
317
|
+
func=func,
|
|
318
|
+
max_attempts=max_attempts,
|
|
319
|
+
strategy=strategy,
|
|
320
|
+
server_id=server_id
|
|
321
|
+
)
|