hud-python 0.4.23__py3-none-any.whl → 0.4.25__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of hud-python might be problematic. Click here for more details.
- hud/agents/base.py +48 -20
- hud/agents/claude.py +5 -1
- hud/clients/mcp_use.py +9 -1
- hud/clients/tests/test_mcp_use_retry.py +378 -0
- hud/clients/utils/__init__.py +25 -0
- hud/clients/utils/mcp_use_retry.py +201 -0
- hud/clients/utils/retry.py +186 -0
- hud/datasets/execution/parallel.py +25 -8
- hud/otel/config.py +19 -2
- hud/utils/tests/test_version.py +1 -1
- hud/version.py +1 -1
- {hud_python-0.4.23.dist-info → hud_python-0.4.25.dist-info}/METADATA +1 -1
- {hud_python-0.4.23.dist-info → hud_python-0.4.25.dist-info}/RECORD +16 -13
- {hud_python-0.4.23.dist-info → hud_python-0.4.25.dist-info}/WHEEL +0 -0
- {hud_python-0.4.23.dist-info → hud_python-0.4.25.dist-info}/entry_points.txt +0 -0
- {hud_python-0.4.23.dist-info → hud_python-0.4.25.dist-info}/licenses/LICENSE +0 -0
hud/agents/base.py
CHANGED
|
@@ -207,6 +207,7 @@ class MCPAgent(ABC):
|
|
|
207
207
|
else:
|
|
208
208
|
raise TypeError(f"prompt_or_task must be str or Task, got {type(prompt_or_task)}")
|
|
209
209
|
except Exception as e:
|
|
210
|
+
# Always return a Trace object for any exception
|
|
210
211
|
if self._is_connection_error(e):
|
|
211
212
|
# Return error trace for connection failures
|
|
212
213
|
return Trace(
|
|
@@ -215,7 +216,15 @@ class MCPAgent(ABC):
|
|
|
215
216
|
content=self._get_connection_error_message(e),
|
|
216
217
|
isError=True,
|
|
217
218
|
)
|
|
218
|
-
|
|
219
|
+
else:
|
|
220
|
+
# Return error trace for any other exception
|
|
221
|
+
return Trace(
|
|
222
|
+
reward=0.0,
|
|
223
|
+
done=True,
|
|
224
|
+
content=f"Task failed with error: {e}",
|
|
225
|
+
isError=True,
|
|
226
|
+
info={"error": str(e)},
|
|
227
|
+
)
|
|
219
228
|
finally:
|
|
220
229
|
# Cleanup auto-created resources
|
|
221
230
|
await self._cleanup()
|
|
@@ -262,34 +271,53 @@ class MCPAgent(ABC):
|
|
|
262
271
|
prompt_result = Trace(reward=0.0, done=True, content=str(e), isError=True)
|
|
263
272
|
prompt_result.populate_from_context()
|
|
264
273
|
|
|
265
|
-
# Always evaluate if we have
|
|
266
|
-
if
|
|
274
|
+
# Always evaluate if we have evaluate tool, regardless of errors
|
|
275
|
+
if task.evaluate_tool is not None:
|
|
267
276
|
try:
|
|
268
277
|
self.console.progress_log(f"Evaluating tool phase: {task.evaluate_tool}")
|
|
269
278
|
results = await self.call_tools(task.evaluate_tool)
|
|
270
279
|
|
|
271
280
|
if any(result.isError for result in results):
|
|
272
|
-
|
|
273
|
-
|
|
274
|
-
|
|
275
|
-
|
|
276
|
-
|
|
277
|
-
|
|
278
|
-
|
|
279
|
-
|
|
280
|
-
|
|
281
|
-
|
|
282
|
-
|
|
283
|
-
|
|
284
|
-
|
|
285
|
-
|
|
286
|
-
|
|
281
|
+
self.console.warning_log(f"Evaluate tool returned error: {results}")
|
|
282
|
+
# Still extract what we can from the error response
|
|
283
|
+
if prompt_result is None:
|
|
284
|
+
prompt_result = Trace(
|
|
285
|
+
reward=0.0,
|
|
286
|
+
done=True,
|
|
287
|
+
content="Task failed before evaluation",
|
|
288
|
+
isError=True,
|
|
289
|
+
)
|
|
290
|
+
prompt_result.reward = 0.0 # Default to 0 on error
|
|
291
|
+
else:
|
|
292
|
+
# Extract reward and content from evaluation
|
|
293
|
+
if results:
|
|
294
|
+
reward = find_reward(results[0])
|
|
295
|
+
eval_content = find_content(results[0])
|
|
296
|
+
|
|
297
|
+
# Update the prompt result with evaluation reward
|
|
298
|
+
if prompt_result is None:
|
|
299
|
+
prompt_result = Trace(
|
|
300
|
+
reward=reward, done=True, content=eval_content or "", isError=False
|
|
301
|
+
)
|
|
287
302
|
else:
|
|
288
|
-
prompt_result.
|
|
303
|
+
prompt_result.reward = reward
|
|
304
|
+
|
|
305
|
+
# Update the prompt result with evaluation content (if available)
|
|
306
|
+
if eval_content:
|
|
307
|
+
# Prompt result may already have final response content,
|
|
308
|
+
# so we append to it
|
|
309
|
+
if prompt_result.content:
|
|
310
|
+
prompt_result.content += "\n\n" + eval_content
|
|
311
|
+
else:
|
|
312
|
+
prompt_result.content = eval_content
|
|
289
313
|
|
|
290
314
|
except Exception as e:
|
|
291
315
|
self.console.error_log(f"Evaluation phase failed: {e}")
|
|
292
|
-
#
|
|
316
|
+
# Ensure we have a result even if evaluation failed
|
|
317
|
+
if prompt_result is None:
|
|
318
|
+
prompt_result = Trace(
|
|
319
|
+
reward=0.0, done=True, content=f"Evaluation failed: {e}", isError=True
|
|
320
|
+
)
|
|
293
321
|
|
|
294
322
|
return (
|
|
295
323
|
prompt_result
|
hud/agents/claude.py
CHANGED
|
@@ -196,7 +196,11 @@ class ClaudeAgent(MCPAgent):
|
|
|
196
196
|
response = await self.anthropic_client.beta.messages.create(**create_kwargs)
|
|
197
197
|
break
|
|
198
198
|
except BadRequestError as e:
|
|
199
|
-
if
|
|
199
|
+
if (
|
|
200
|
+
"prompt is too long" in str(e)
|
|
201
|
+
or "request_too_large" in str(e)
|
|
202
|
+
or e.status_code == 413
|
|
203
|
+
):
|
|
200
204
|
logger.warning("Prompt too long, truncating message history")
|
|
201
205
|
# Keep first message and last 20 messages
|
|
202
206
|
if len(current_messages) > 21:
|
hud/clients/mcp_use.py
CHANGED
|
@@ -15,6 +15,7 @@ from hud.types import MCPToolCall, MCPToolResult
|
|
|
15
15
|
from hud.version import __version__ as hud_version
|
|
16
16
|
|
|
17
17
|
from .base import BaseHUDClient
|
|
18
|
+
from .utils.mcp_use_retry import patch_all_sessions
|
|
18
19
|
|
|
19
20
|
logger = logging.getLogger(__name__)
|
|
20
21
|
|
|
@@ -63,6 +64,10 @@ class MCPUseHUDClient(BaseHUDClient):
|
|
|
63
64
|
self._sessions = await self._client.create_all_sessions()
|
|
64
65
|
logger.info("Created %d MCP sessions", len(self._sessions))
|
|
65
66
|
|
|
67
|
+
# Patch all sessions with retry logic
|
|
68
|
+
patch_all_sessions(self._sessions)
|
|
69
|
+
logger.debug("Applied retry logic to all MCP sessions")
|
|
70
|
+
|
|
66
71
|
# Configure validation for all sessions based on client setting
|
|
67
72
|
try:
|
|
68
73
|
for session in self._sessions.values():
|
|
@@ -127,7 +132,7 @@ class MCPUseHUDClient(BaseHUDClient):
|
|
|
127
132
|
logger.warning("Client session not initialized for %s", server_name)
|
|
128
133
|
continue
|
|
129
134
|
|
|
130
|
-
# List tools
|
|
135
|
+
# List tools (retry logic is handled at transport level)
|
|
131
136
|
tools_result = await session.connector.client_session.list_tools()
|
|
132
137
|
|
|
133
138
|
logger.info(
|
|
@@ -202,6 +207,7 @@ class MCPUseHUDClient(BaseHUDClient):
|
|
|
202
207
|
if session.connector.client_session is None:
|
|
203
208
|
raise ValueError(f"Client session not initialized for {server_name}")
|
|
204
209
|
|
|
210
|
+
# Call tool (retry logic is handled at transport level)
|
|
205
211
|
result = await session.connector.client_session.call_tool(
|
|
206
212
|
name=original_tool.name, # Use original tool name, not prefixed
|
|
207
213
|
arguments=tool_call.arguments or {},
|
|
@@ -232,6 +238,7 @@ class MCPUseHUDClient(BaseHUDClient):
|
|
|
232
238
|
continue
|
|
233
239
|
# Prefer standard method name if available
|
|
234
240
|
if hasattr(session.connector.client_session, "list_resources"):
|
|
241
|
+
# List resources (retry logic is handled at transport level)
|
|
235
242
|
resources = await session.connector.client_session.list_resources()
|
|
236
243
|
else:
|
|
237
244
|
# If the client doesn't support resource listing, skip
|
|
@@ -262,6 +269,7 @@ class MCPUseHUDClient(BaseHUDClient):
|
|
|
262
269
|
resource_uri = AnyUrl(uri) if isinstance(uri, str) else uri
|
|
263
270
|
# Prefer read_resource; fall back to list_resources if needed
|
|
264
271
|
if hasattr(session.connector.client_session, "read_resource"):
|
|
272
|
+
# Read resource (retry logic is handled at transport level)
|
|
265
273
|
result = await session.connector.client_session.read_resource(resource_uri)
|
|
266
274
|
else:
|
|
267
275
|
# Fallback path for older clients: not supported in strict typing
|
|
@@ -0,0 +1,378 @@
|
|
|
1
|
+
"""Tests for MCP-use client retry functionality."""
|
|
2
|
+
|
|
3
|
+
from __future__ import annotations
|
|
4
|
+
|
|
5
|
+
from unittest.mock import AsyncMock, Mock, patch
|
|
6
|
+
|
|
7
|
+
import pytest
|
|
8
|
+
import requests
|
|
9
|
+
from mcp import types
|
|
10
|
+
|
|
11
|
+
from hud.clients.mcp_use import MCPUseHUDClient
|
|
12
|
+
from hud.clients.utils.mcp_use_retry import (
|
|
13
|
+
create_async_retry_wrapper,
|
|
14
|
+
create_retry_session,
|
|
15
|
+
patch_all_sessions,
|
|
16
|
+
patch_mcp_session_http_client,
|
|
17
|
+
)
|
|
18
|
+
from hud.types import MCPToolCall
|
|
19
|
+
|
|
20
|
+
|
|
21
|
+
class TestRetrySession:
|
|
22
|
+
"""Test the retry session creation."""
|
|
23
|
+
|
|
24
|
+
def test_create_retry_session(self):
|
|
25
|
+
"""Test that retry session is configured correctly."""
|
|
26
|
+
session = create_retry_session(
|
|
27
|
+
max_retries=5,
|
|
28
|
+
retry_status_codes=(500, 502, 503, 504),
|
|
29
|
+
retry_delay=0.5,
|
|
30
|
+
backoff_factor=2.0,
|
|
31
|
+
)
|
|
32
|
+
|
|
33
|
+
# Check that session has adapters mounted
|
|
34
|
+
assert "http://" in session.adapters
|
|
35
|
+
assert "https://" in session.adapters
|
|
36
|
+
|
|
37
|
+
# Check adapter configuration
|
|
38
|
+
adapter = session.adapters["http://"]
|
|
39
|
+
assert adapter.max_retries.total == 5
|
|
40
|
+
assert 500 in adapter.max_retries.status_forcelist
|
|
41
|
+
assert 502 in adapter.max_retries.status_forcelist
|
|
42
|
+
assert adapter.max_retries.backoff_factor == 2.0
|
|
43
|
+
|
|
44
|
+
def test_retry_session_default_values(self):
|
|
45
|
+
"""Test retry session with default values."""
|
|
46
|
+
session = create_retry_session()
|
|
47
|
+
|
|
48
|
+
adapter = session.adapters["https://"]
|
|
49
|
+
assert adapter.max_retries.total == 3
|
|
50
|
+
assert 502 in adapter.max_retries.status_forcelist
|
|
51
|
+
assert 503 in adapter.max_retries.status_forcelist
|
|
52
|
+
assert 504 in adapter.max_retries.status_forcelist
|
|
53
|
+
|
|
54
|
+
|
|
55
|
+
class TestAsyncRetryWrapper:
|
|
56
|
+
"""Test the async retry wrapper functionality."""
|
|
57
|
+
|
|
58
|
+
@pytest.mark.asyncio
|
|
59
|
+
async def test_retry_on_error_status_codes(self):
|
|
60
|
+
"""Test that async wrapper retries on specific status codes."""
|
|
61
|
+
call_count = 0
|
|
62
|
+
|
|
63
|
+
async def mock_func(*args, **kwargs):
|
|
64
|
+
nonlocal call_count
|
|
65
|
+
call_count += 1
|
|
66
|
+
|
|
67
|
+
# First two calls fail, third succeeds
|
|
68
|
+
if call_count < 3:
|
|
69
|
+
result = Mock()
|
|
70
|
+
result.status_code = 503 # Service unavailable
|
|
71
|
+
return result
|
|
72
|
+
|
|
73
|
+
result = Mock()
|
|
74
|
+
result.status_code = 200
|
|
75
|
+
return result
|
|
76
|
+
|
|
77
|
+
wrapped = create_async_retry_wrapper(
|
|
78
|
+
mock_func,
|
|
79
|
+
max_retries=3,
|
|
80
|
+
retry_status_codes=(503,),
|
|
81
|
+
retry_delay=0.01, # Short delay for testing
|
|
82
|
+
)
|
|
83
|
+
|
|
84
|
+
result = await wrapped()
|
|
85
|
+
assert call_count == 3
|
|
86
|
+
assert result.status_code == 200
|
|
87
|
+
|
|
88
|
+
@pytest.mark.asyncio
|
|
89
|
+
async def test_retry_on_exception(self):
|
|
90
|
+
"""Test that async wrapper retries on exceptions with status codes."""
|
|
91
|
+
call_count = 0
|
|
92
|
+
|
|
93
|
+
async def mock_func(*args, **kwargs):
|
|
94
|
+
nonlocal call_count
|
|
95
|
+
call_count += 1
|
|
96
|
+
|
|
97
|
+
if call_count < 3:
|
|
98
|
+
raise Exception("HTTP 503 Service Unavailable")
|
|
99
|
+
|
|
100
|
+
return Mock(status_code=200)
|
|
101
|
+
|
|
102
|
+
wrapped = create_async_retry_wrapper(
|
|
103
|
+
mock_func,
|
|
104
|
+
max_retries=3,
|
|
105
|
+
retry_status_codes=(503,),
|
|
106
|
+
retry_delay=0.01,
|
|
107
|
+
)
|
|
108
|
+
|
|
109
|
+
result = await wrapped()
|
|
110
|
+
assert call_count == 3
|
|
111
|
+
assert result.status_code == 200
|
|
112
|
+
|
|
113
|
+
@pytest.mark.asyncio
|
|
114
|
+
async def test_no_retry_on_success(self):
|
|
115
|
+
"""Test that successful calls don't trigger retries."""
|
|
116
|
+
call_count = 0
|
|
117
|
+
|
|
118
|
+
async def mock_func(*args, **kwargs):
|
|
119
|
+
nonlocal call_count
|
|
120
|
+
call_count += 1
|
|
121
|
+
return Mock(status_code=200)
|
|
122
|
+
|
|
123
|
+
wrapped = create_async_retry_wrapper(mock_func)
|
|
124
|
+
|
|
125
|
+
result = await wrapped()
|
|
126
|
+
assert call_count == 1
|
|
127
|
+
assert result.status_code == 200
|
|
128
|
+
|
|
129
|
+
@pytest.mark.asyncio
|
|
130
|
+
async def test_max_retries_exceeded(self):
|
|
131
|
+
"""Test that retries stop after max attempts."""
|
|
132
|
+
call_count = 0
|
|
133
|
+
|
|
134
|
+
async def mock_func(*args, **kwargs):
|
|
135
|
+
nonlocal call_count
|
|
136
|
+
call_count += 1
|
|
137
|
+
raise Exception("HTTP 503 Service Unavailable")
|
|
138
|
+
|
|
139
|
+
wrapped = create_async_retry_wrapper(
|
|
140
|
+
mock_func,
|
|
141
|
+
max_retries=2,
|
|
142
|
+
retry_status_codes=(503,),
|
|
143
|
+
retry_delay=0.01,
|
|
144
|
+
)
|
|
145
|
+
|
|
146
|
+
with pytest.raises(Exception) as exc_info:
|
|
147
|
+
await wrapped()
|
|
148
|
+
|
|
149
|
+
assert "503" in str(exc_info.value)
|
|
150
|
+
assert call_count == 3 # Initial + 2 retries
|
|
151
|
+
|
|
152
|
+
|
|
153
|
+
class TestSessionPatching:
|
|
154
|
+
"""Test the session patching functionality."""
|
|
155
|
+
|
|
156
|
+
def test_patch_sync_session(self):
|
|
157
|
+
"""Test patching a synchronous session."""
|
|
158
|
+
# Create mock session with connector
|
|
159
|
+
mock_session = Mock()
|
|
160
|
+
mock_session.connector = Mock()
|
|
161
|
+
mock_session.connector._connection_manager = Mock()
|
|
162
|
+
mock_session.connector._connection_manager._session = requests.Session()
|
|
163
|
+
|
|
164
|
+
# Patch the session
|
|
165
|
+
patch_mcp_session_http_client(mock_session)
|
|
166
|
+
|
|
167
|
+
# Verify the session was replaced with retry-enabled one
|
|
168
|
+
patched_session = mock_session.connector._connection_manager._session
|
|
169
|
+
assert "http://" in patched_session.adapters
|
|
170
|
+
assert "https://" in patched_session.adapters
|
|
171
|
+
|
|
172
|
+
# Check that it has retry configuration
|
|
173
|
+
adapter = patched_session.adapters["http://"]
|
|
174
|
+
assert hasattr(adapter, "max_retries")
|
|
175
|
+
|
|
176
|
+
@pytest.mark.asyncio
|
|
177
|
+
async def test_patch_async_session(self):
|
|
178
|
+
"""Test patching an async session."""
|
|
179
|
+
# Create mock async session
|
|
180
|
+
mock_session = Mock()
|
|
181
|
+
mock_session.connector = Mock()
|
|
182
|
+
mock_session.connector.client_session = Mock()
|
|
183
|
+
|
|
184
|
+
async def mock_send_request(*args, **kwargs):
|
|
185
|
+
return Mock(status_code=200)
|
|
186
|
+
|
|
187
|
+
mock_session.connector.client_session._send_request = mock_send_request
|
|
188
|
+
|
|
189
|
+
# Patch the session
|
|
190
|
+
patch_mcp_session_http_client(mock_session)
|
|
191
|
+
|
|
192
|
+
# Verify _send_request was wrapped
|
|
193
|
+
wrapped_func = mock_session.connector.client_session._send_request
|
|
194
|
+
assert wrapped_func != mock_send_request # Function was replaced
|
|
195
|
+
|
|
196
|
+
# Test that wrapped function still works
|
|
197
|
+
result = await wrapped_func()
|
|
198
|
+
assert result.status_code == 200
|
|
199
|
+
|
|
200
|
+
def test_patch_all_sessions(self):
|
|
201
|
+
"""Test patching multiple sessions."""
|
|
202
|
+
# Create mock sessions
|
|
203
|
+
session1 = Mock()
|
|
204
|
+
session1.connector = Mock()
|
|
205
|
+
session1.connector._connection_manager = Mock()
|
|
206
|
+
session1.connector._connection_manager.session = requests.Session()
|
|
207
|
+
|
|
208
|
+
session2 = Mock()
|
|
209
|
+
session2.connector = Mock()
|
|
210
|
+
session2.connector.client_session = Mock()
|
|
211
|
+
session2.connector.client_session._send_request = AsyncMock()
|
|
212
|
+
|
|
213
|
+
sessions = {"server1": session1, "server2": session2}
|
|
214
|
+
|
|
215
|
+
# Patch all sessions
|
|
216
|
+
patch_all_sessions(sessions)
|
|
217
|
+
|
|
218
|
+
# Verify both were patched
|
|
219
|
+
assert "http://" in session1.connector._connection_manager.session.adapters
|
|
220
|
+
assert session2.connector.client_session._send_request != AsyncMock
|
|
221
|
+
|
|
222
|
+
|
|
223
|
+
class TestMCPUseClientRetry:
|
|
224
|
+
"""Test retry functionality integrated into MCPUseHUDClient."""
|
|
225
|
+
|
|
226
|
+
@pytest.mark.asyncio
|
|
227
|
+
async def test_client_applies_retry_on_connect(self):
|
|
228
|
+
"""Test that MCPUseHUDClient applies retry logic during connection."""
|
|
229
|
+
config = {"test_server": {"url": "http://localhost:8080"}}
|
|
230
|
+
client = MCPUseHUDClient(config)
|
|
231
|
+
|
|
232
|
+
# Mock the MCPUseClient and session creation
|
|
233
|
+
with patch("hud.clients.mcp_use.MCPUseClient") as MockMCPUseClient:
|
|
234
|
+
mock_client = Mock()
|
|
235
|
+
MockMCPUseClient.from_dict.return_value = mock_client
|
|
236
|
+
|
|
237
|
+
# Create mock session
|
|
238
|
+
mock_session = Mock()
|
|
239
|
+
mock_session.connector = Mock()
|
|
240
|
+
mock_session.connector.client_session = Mock()
|
|
241
|
+
mock_session.connector.client_session._send_request = AsyncMock()
|
|
242
|
+
mock_session.connector.client_session.list_tools = AsyncMock(
|
|
243
|
+
return_value=Mock(tools=[])
|
|
244
|
+
)
|
|
245
|
+
|
|
246
|
+
mock_client.create_all_sessions = AsyncMock(return_value={"test_server": mock_session})
|
|
247
|
+
|
|
248
|
+
# Initialize client (which applies retry logic)
|
|
249
|
+
await client.initialize()
|
|
250
|
+
|
|
251
|
+
# Verify session was created and patched
|
|
252
|
+
assert len(client._sessions) == 1
|
|
253
|
+
assert "test_server" in client._sessions
|
|
254
|
+
|
|
255
|
+
@pytest.mark.asyncio
|
|
256
|
+
async def test_tool_call_with_retry(self):
|
|
257
|
+
"""Test that tool calls work with retry logic."""
|
|
258
|
+
config = {"test_server": {"url": "http://localhost:8080"}}
|
|
259
|
+
client = MCPUseHUDClient(config)
|
|
260
|
+
|
|
261
|
+
with patch("hud.clients.mcp_use.MCPUseClient") as MockMCPUseClient:
|
|
262
|
+
mock_client = Mock()
|
|
263
|
+
MockMCPUseClient.from_dict.return_value = mock_client
|
|
264
|
+
|
|
265
|
+
# Create mock session
|
|
266
|
+
mock_session = Mock()
|
|
267
|
+
mock_session.connector = Mock()
|
|
268
|
+
mock_session.connector.client_session = Mock()
|
|
269
|
+
|
|
270
|
+
# Mock tool listing
|
|
271
|
+
test_tool = types.Tool(
|
|
272
|
+
name="test_tool",
|
|
273
|
+
description="Test tool",
|
|
274
|
+
inputSchema={"type": "object"},
|
|
275
|
+
)
|
|
276
|
+
mock_session.connector.client_session.list_tools = AsyncMock(
|
|
277
|
+
return_value=Mock(tools=[test_tool])
|
|
278
|
+
)
|
|
279
|
+
|
|
280
|
+
# Mock tool call with simulated retry
|
|
281
|
+
call_count = 0
|
|
282
|
+
|
|
283
|
+
async def mock_call_tool(name, arguments):
|
|
284
|
+
nonlocal call_count
|
|
285
|
+
call_count += 1
|
|
286
|
+
|
|
287
|
+
# First call fails, second succeeds
|
|
288
|
+
if call_count == 1:
|
|
289
|
+
raise Exception("HTTP 503 Service Unavailable")
|
|
290
|
+
|
|
291
|
+
return Mock(
|
|
292
|
+
content=[types.TextContent(type="text", text="Success")],
|
|
293
|
+
isError=False,
|
|
294
|
+
structuredContent=None,
|
|
295
|
+
)
|
|
296
|
+
|
|
297
|
+
mock_session.connector.client_session.call_tool = mock_call_tool
|
|
298
|
+
mock_session.connector.client_session._send_request = AsyncMock()
|
|
299
|
+
|
|
300
|
+
mock_client.create_all_sessions = AsyncMock(return_value={"test_server": mock_session})
|
|
301
|
+
|
|
302
|
+
# Initialize and call tool
|
|
303
|
+
await client.initialize()
|
|
304
|
+
|
|
305
|
+
# Wrap call_tool with retry for this test
|
|
306
|
+
original_call = mock_session.connector.client_session.call_tool
|
|
307
|
+
mock_session.connector.client_session.call_tool = create_async_retry_wrapper(
|
|
308
|
+
original_call,
|
|
309
|
+
max_retries=2,
|
|
310
|
+
retry_status_codes=(503,),
|
|
311
|
+
retry_delay=0.01,
|
|
312
|
+
)
|
|
313
|
+
|
|
314
|
+
result = await client.call_tool(MCPToolCall(name="test_tool", arguments={}))
|
|
315
|
+
|
|
316
|
+
# Verify retry worked
|
|
317
|
+
assert call_count == 2 # Failed once, then succeeded
|
|
318
|
+
assert not result.isError
|
|
319
|
+
assert result.content[0].text == "Success"
|
|
320
|
+
|
|
321
|
+
@pytest.mark.asyncio
|
|
322
|
+
async def test_resource_read_with_retry(self):
|
|
323
|
+
"""Test that resource reading works with retry logic."""
|
|
324
|
+
config = {"test_server": {"url": "http://localhost:8080"}}
|
|
325
|
+
client = MCPUseHUDClient(config)
|
|
326
|
+
|
|
327
|
+
with patch("hud.clients.mcp_use.MCPUseClient") as MockMCPUseClient:
|
|
328
|
+
mock_client = Mock()
|
|
329
|
+
MockMCPUseClient.from_dict.return_value = mock_client
|
|
330
|
+
|
|
331
|
+
# Create mock session
|
|
332
|
+
mock_session = Mock()
|
|
333
|
+
mock_session.connector = Mock()
|
|
334
|
+
mock_session.connector.client_session = Mock()
|
|
335
|
+
mock_session.connector.client_session.list_tools = AsyncMock(
|
|
336
|
+
return_value=Mock(tools=[])
|
|
337
|
+
)
|
|
338
|
+
|
|
339
|
+
# Mock resource read with simulated retry
|
|
340
|
+
call_count = 0
|
|
341
|
+
|
|
342
|
+
async def mock_read_resource(uri):
|
|
343
|
+
nonlocal call_count
|
|
344
|
+
call_count += 1
|
|
345
|
+
|
|
346
|
+
# First call fails, second succeeds
|
|
347
|
+
if call_count == 1:
|
|
348
|
+
raise Exception("HTTP 502 Bad Gateway")
|
|
349
|
+
|
|
350
|
+
return Mock(contents=[Mock(text='{"status": "ok"}')])
|
|
351
|
+
|
|
352
|
+
mock_session.connector.client_session.read_resource = mock_read_resource
|
|
353
|
+
mock_session.connector.client_session._send_request = AsyncMock()
|
|
354
|
+
|
|
355
|
+
mock_client.create_all_sessions = AsyncMock(return_value={"test_server": mock_session})
|
|
356
|
+
|
|
357
|
+
# Initialize
|
|
358
|
+
await client.initialize()
|
|
359
|
+
|
|
360
|
+
# Wrap read_resource with retry for this test
|
|
361
|
+
original_read = mock_session.connector.client_session.read_resource
|
|
362
|
+
mock_session.connector.client_session.read_resource = create_async_retry_wrapper(
|
|
363
|
+
original_read,
|
|
364
|
+
max_retries=2,
|
|
365
|
+
retry_status_codes=(502,),
|
|
366
|
+
retry_delay=0.01,
|
|
367
|
+
)
|
|
368
|
+
|
|
369
|
+
result = await client.read_resource("test://resource")
|
|
370
|
+
|
|
371
|
+
# Verify retry worked
|
|
372
|
+
assert call_count == 2 # Failed once, then succeeded
|
|
373
|
+
assert result is not None
|
|
374
|
+
assert result.contents[0].text == '{"status": "ok"}'
|
|
375
|
+
|
|
376
|
+
|
|
377
|
+
if __name__ == "__main__":
|
|
378
|
+
pytest.main([__file__, "-v"])
|
hud/clients/utils/__init__.py
CHANGED
|
@@ -1 +1,26 @@
|
|
|
1
1
|
"""HUD MCP client utilities."""
|
|
2
|
+
|
|
3
|
+
from __future__ import annotations
|
|
4
|
+
|
|
5
|
+
from .retry import (
|
|
6
|
+
DEFAULT_BACKOFF_FACTOR,
|
|
7
|
+
DEFAULT_MAX_RETRIES,
|
|
8
|
+
DEFAULT_RETRY_DELAY,
|
|
9
|
+
DEFAULT_RETRY_STATUS_CODES,
|
|
10
|
+
is_retryable_error,
|
|
11
|
+
retry_with_backoff,
|
|
12
|
+
with_retry,
|
|
13
|
+
)
|
|
14
|
+
from .retry_transport import RetryTransport, create_retry_httpx_client
|
|
15
|
+
|
|
16
|
+
__all__ = [
|
|
17
|
+
"DEFAULT_BACKOFF_FACTOR",
|
|
18
|
+
"DEFAULT_MAX_RETRIES",
|
|
19
|
+
"DEFAULT_RETRY_DELAY",
|
|
20
|
+
"DEFAULT_RETRY_STATUS_CODES",
|
|
21
|
+
"RetryTransport",
|
|
22
|
+
"create_retry_httpx_client",
|
|
23
|
+
"is_retryable_error",
|
|
24
|
+
"retry_with_backoff",
|
|
25
|
+
"with_retry",
|
|
26
|
+
]
|
|
@@ -0,0 +1,201 @@
|
|
|
1
|
+
"""Retry wrapper for MCP-use HTTP transport.
|
|
2
|
+
|
|
3
|
+
This module provides a transport-level retry mechanism for MCP-use,
|
|
4
|
+
similar to the approach used in FastMCP.
|
|
5
|
+
"""
|
|
6
|
+
|
|
7
|
+
from __future__ import annotations
|
|
8
|
+
|
|
9
|
+
import asyncio
|
|
10
|
+
import logging
|
|
11
|
+
from typing import TYPE_CHECKING, Any, TypeVar
|
|
12
|
+
|
|
13
|
+
if TYPE_CHECKING:
|
|
14
|
+
from collections.abc import Callable
|
|
15
|
+
|
|
16
|
+
import requests
|
|
17
|
+
from requests.adapters import HTTPAdapter
|
|
18
|
+
from urllib3.util.retry import Retry
|
|
19
|
+
|
|
20
|
+
logger = logging.getLogger(__name__)
|
|
21
|
+
|
|
22
|
+
T = TypeVar("T")
|
|
23
|
+
|
|
24
|
+
|
|
25
|
+
def create_retry_session(
|
|
26
|
+
max_retries: int = 3,
|
|
27
|
+
retry_status_codes: tuple[int, ...] = (502, 503, 504),
|
|
28
|
+
retry_delay: float = 1.0,
|
|
29
|
+
backoff_factor: float = 2.0,
|
|
30
|
+
) -> requests.Session:
|
|
31
|
+
"""
|
|
32
|
+
Create a requests session with retry logic.
|
|
33
|
+
|
|
34
|
+
Args:
|
|
35
|
+
max_retries: Maximum number of retry attempts
|
|
36
|
+
retry_status_codes: HTTP status codes to retry
|
|
37
|
+
retry_delay: Initial delay between retries in seconds
|
|
38
|
+
backoff_factor: Multiplier for exponential backoff
|
|
39
|
+
|
|
40
|
+
Returns:
|
|
41
|
+
Configured requests.Session with retry logic
|
|
42
|
+
"""
|
|
43
|
+
session = requests.Session()
|
|
44
|
+
|
|
45
|
+
# Configure retry strategy
|
|
46
|
+
retry = Retry(
|
|
47
|
+
total=max_retries,
|
|
48
|
+
backoff_factor=backoff_factor,
|
|
49
|
+
status_forcelist=list(retry_status_codes),
|
|
50
|
+
# Allow retries on all methods
|
|
51
|
+
allowed_methods=["HEAD", "GET", "OPTIONS", "POST", "PUT", "DELETE", "PATCH"],
|
|
52
|
+
# Respect Retry-After header if present
|
|
53
|
+
respect_retry_after_header=True,
|
|
54
|
+
)
|
|
55
|
+
|
|
56
|
+
# Create adapter with retry strategy
|
|
57
|
+
adapter = HTTPAdapter(max_retries=retry)
|
|
58
|
+
|
|
59
|
+
# Mount adapter for both HTTP and HTTPS
|
|
60
|
+
session.mount("http://", adapter)
|
|
61
|
+
session.mount("https://", adapter)
|
|
62
|
+
|
|
63
|
+
logger.debug(
|
|
64
|
+
"Created retry session with max_retries=%d, status_codes=%s, backoff_factor=%.1f",
|
|
65
|
+
max_retries,
|
|
66
|
+
retry_status_codes,
|
|
67
|
+
backoff_factor,
|
|
68
|
+
)
|
|
69
|
+
|
|
70
|
+
return session
|
|
71
|
+
|
|
72
|
+
|
|
73
|
+
def patch_mcp_session_http_client(session: Any) -> None:
|
|
74
|
+
"""
|
|
75
|
+
Patch an MCP-use session to use HTTP retry logic.
|
|
76
|
+
|
|
77
|
+
This function attempts to replace the HTTP client used by an MCP session
|
|
78
|
+
with one that has retry logic enabled.
|
|
79
|
+
|
|
80
|
+
Args:
|
|
81
|
+
session: MCP-use session to patch
|
|
82
|
+
"""
|
|
83
|
+
try:
|
|
84
|
+
# Check if session has a connector with an HTTP client
|
|
85
|
+
if hasattr(session, "connector"):
|
|
86
|
+
connector = session.connector
|
|
87
|
+
|
|
88
|
+
# For HTTP connectors, patch the underlying HTTP client
|
|
89
|
+
if hasattr(connector, "_connection_manager"):
|
|
90
|
+
manager = connector._connection_manager
|
|
91
|
+
|
|
92
|
+
# If it's using requests, replace the session
|
|
93
|
+
if hasattr(manager, "_session") or hasattr(manager, "session"):
|
|
94
|
+
retry_session = create_retry_session()
|
|
95
|
+
|
|
96
|
+
# Try different attribute names
|
|
97
|
+
if hasattr(manager, "_session"):
|
|
98
|
+
manager._session = retry_session
|
|
99
|
+
logger.debug("Patched connection manager's _session with retry logic")
|
|
100
|
+
elif hasattr(manager, "session"):
|
|
101
|
+
manager.session = retry_session
|
|
102
|
+
logger.debug("Patched connection manager's session with retry logic")
|
|
103
|
+
|
|
104
|
+
# Also check for client_session (async variant)
|
|
105
|
+
if hasattr(connector, "client_session") and connector.client_session:
|
|
106
|
+
client = connector.client_session
|
|
107
|
+
|
|
108
|
+
# Wrap the async HTTP methods with retry logic
|
|
109
|
+
if hasattr(client, "_send_request"):
|
|
110
|
+
original_send = client._send_request
|
|
111
|
+
client._send_request = create_async_retry_wrapper(original_send)
|
|
112
|
+
logger.debug("Wrapped client_session._send_request with retry logic")
|
|
113
|
+
|
|
114
|
+
except Exception as e:
|
|
115
|
+
logger.warning("Could not patch MCP session with retry logic: %s", e)
|
|
116
|
+
|
|
117
|
+
|
|
118
|
+
def create_async_retry_wrapper(
|
|
119
|
+
func: Callable[..., Any],
|
|
120
|
+
max_retries: int = 3,
|
|
121
|
+
retry_status_codes: tuple[int, ...] = (502, 503, 504),
|
|
122
|
+
retry_delay: float = 1.0,
|
|
123
|
+
backoff_factor: float = 2.0,
|
|
124
|
+
) -> Callable[..., Any]:
|
|
125
|
+
"""
|
|
126
|
+
Create an async wrapper that adds retry logic to a function.
|
|
127
|
+
|
|
128
|
+
Args:
|
|
129
|
+
func: The async function to wrap
|
|
130
|
+
max_retries: Maximum number of retry attempts
|
|
131
|
+
retry_status_codes: HTTP status codes to retry
|
|
132
|
+
retry_delay: Initial delay between retries
|
|
133
|
+
backoff_factor: Multiplier for exponential backoff
|
|
134
|
+
|
|
135
|
+
Returns:
|
|
136
|
+
Wrapped function with retry logic
|
|
137
|
+
"""
|
|
138
|
+
|
|
139
|
+
async def wrapper(*args: Any, **kwargs: Any) -> Any:
|
|
140
|
+
last_exception = None
|
|
141
|
+
delay = retry_delay
|
|
142
|
+
|
|
143
|
+
for attempt in range(max_retries + 1):
|
|
144
|
+
try:
|
|
145
|
+
result = await func(*args, **kwargs)
|
|
146
|
+
|
|
147
|
+
# Check if result has a status code that should trigger retry
|
|
148
|
+
if (
|
|
149
|
+
hasattr(result, "status_code")
|
|
150
|
+
and result.status_code in retry_status_codes
|
|
151
|
+
and attempt < max_retries
|
|
152
|
+
):
|
|
153
|
+
logger.warning(
|
|
154
|
+
"HTTP %d error (attempt %d/%d), retrying in %.1fs",
|
|
155
|
+
result.status_code,
|
|
156
|
+
attempt + 1,
|
|
157
|
+
max_retries + 1,
|
|
158
|
+
delay,
|
|
159
|
+
)
|
|
160
|
+
await asyncio.sleep(delay)
|
|
161
|
+
delay *= backoff_factor
|
|
162
|
+
continue
|
|
163
|
+
|
|
164
|
+
return result
|
|
165
|
+
|
|
166
|
+
except Exception as e:
|
|
167
|
+
# Check if it's an HTTP error that should be retried
|
|
168
|
+
error_str = str(e)
|
|
169
|
+
should_retry = any(str(code) in error_str for code in retry_status_codes)
|
|
170
|
+
|
|
171
|
+
if should_retry and attempt < max_retries:
|
|
172
|
+
logger.warning(
|
|
173
|
+
"Error '%s' (attempt %d/%d), retrying in %.1fs",
|
|
174
|
+
e,
|
|
175
|
+
attempt + 1,
|
|
176
|
+
max_retries + 1,
|
|
177
|
+
delay,
|
|
178
|
+
)
|
|
179
|
+
await asyncio.sleep(delay)
|
|
180
|
+
delay *= backoff_factor
|
|
181
|
+
last_exception = e
|
|
182
|
+
else:
|
|
183
|
+
raise
|
|
184
|
+
|
|
185
|
+
# If we exhausted retries, raise the last exception
|
|
186
|
+
if last_exception:
|
|
187
|
+
raise last_exception
|
|
188
|
+
|
|
189
|
+
return wrapper
|
|
190
|
+
|
|
191
|
+
|
|
192
|
+
def patch_all_sessions(sessions: dict[str, Any]) -> None:
|
|
193
|
+
"""
|
|
194
|
+
Apply retry logic to all MCP sessions.
|
|
195
|
+
|
|
196
|
+
Args:
|
|
197
|
+
sessions: Dictionary of session name to session object
|
|
198
|
+
"""
|
|
199
|
+
for name, session in sessions.items():
|
|
200
|
+
logger.debug("Patching session '%s' with retry logic", name)
|
|
201
|
+
patch_mcp_session_http_client(session)
|
|
@@ -0,0 +1,186 @@
|
|
|
1
|
+
"""Shared retry utilities for MCP client operations."""
|
|
2
|
+
|
|
3
|
+
from __future__ import annotations
|
|
4
|
+
|
|
5
|
+
import asyncio
|
|
6
|
+
import logging
|
|
7
|
+
from functools import wraps
|
|
8
|
+
from typing import TYPE_CHECKING, Any, TypeVar
|
|
9
|
+
|
|
10
|
+
if TYPE_CHECKING:
|
|
11
|
+
from collections.abc import Callable
|
|
12
|
+
|
|
13
|
+
from httpx import HTTPStatusError
|
|
14
|
+
from mcp.shared.exceptions import McpError
|
|
15
|
+
|
|
16
|
+
logger = logging.getLogger(__name__)
|
|
17
|
+
|
|
18
|
+
T = TypeVar("T")
|
|
19
|
+
|
|
20
|
+
# Default retry configuration matching requests.py
|
|
21
|
+
DEFAULT_MAX_RETRIES = 4
|
|
22
|
+
DEFAULT_RETRY_DELAY = 2.0
|
|
23
|
+
DEFAULT_RETRY_STATUS_CODES = {502, 503, 504}
|
|
24
|
+
DEFAULT_BACKOFF_FACTOR = 2.0
|
|
25
|
+
|
|
26
|
+
|
|
27
|
+
def is_retryable_error(error: Exception, retry_status_codes: set[int]) -> bool:
|
|
28
|
+
"""
|
|
29
|
+
Check if an error is retryable based on status codes.
|
|
30
|
+
|
|
31
|
+
Args:
|
|
32
|
+
error: The exception to check
|
|
33
|
+
retry_status_codes: Set of HTTP status codes to retry on
|
|
34
|
+
|
|
35
|
+
Returns:
|
|
36
|
+
True if the error is retryable, False otherwise
|
|
37
|
+
"""
|
|
38
|
+
# Check for HTTP status errors with retryable status codes
|
|
39
|
+
if isinstance(error, HTTPStatusError):
|
|
40
|
+
return error.response.status_code in retry_status_codes
|
|
41
|
+
|
|
42
|
+
# Check for MCP errors that might wrap HTTP errors
|
|
43
|
+
if isinstance(error, McpError):
|
|
44
|
+
error_msg = str(error).lower()
|
|
45
|
+
# Check for common gateway error patterns in the message
|
|
46
|
+
for code in retry_status_codes:
|
|
47
|
+
if str(code) in error_msg:
|
|
48
|
+
return True
|
|
49
|
+
# Check for gateway error keywords
|
|
50
|
+
if any(
|
|
51
|
+
keyword in error_msg
|
|
52
|
+
for keyword in ["bad gateway", "service unavailable", "gateway timeout"]
|
|
53
|
+
):
|
|
54
|
+
return True
|
|
55
|
+
|
|
56
|
+
# Check for generic errors with status codes in the message
|
|
57
|
+
error_msg = str(error)
|
|
58
|
+
for code in retry_status_codes:
|
|
59
|
+
if f"{code}" in error_msg or f"status {code}" in error_msg.lower():
|
|
60
|
+
return True
|
|
61
|
+
|
|
62
|
+
return False
|
|
63
|
+
|
|
64
|
+
|
|
65
|
+
async def retry_with_backoff(
|
|
66
|
+
func: Callable[..., Any],
|
|
67
|
+
*args: Any,
|
|
68
|
+
max_retries: int = DEFAULT_MAX_RETRIES,
|
|
69
|
+
retry_delay: float = DEFAULT_RETRY_DELAY,
|
|
70
|
+
retry_status_codes: set[int] | None = None,
|
|
71
|
+
backoff_factor: float = DEFAULT_BACKOFF_FACTOR,
|
|
72
|
+
operation_name: str | None = None,
|
|
73
|
+
**kwargs: Any,
|
|
74
|
+
) -> Any:
|
|
75
|
+
"""
|
|
76
|
+
Execute an async function with retry logic and exponential backoff.
|
|
77
|
+
|
|
78
|
+
This matches the retry behavior in requests.py but can be applied
|
|
79
|
+
to any async function, particularly MCP client operations.
|
|
80
|
+
|
|
81
|
+
Args:
|
|
82
|
+
func: The async function to retry
|
|
83
|
+
*args: Positional arguments for the function
|
|
84
|
+
max_retries: Maximum number of retry attempts
|
|
85
|
+
retry_delay: Initial delay between retries in seconds
|
|
86
|
+
retry_status_codes: HTTP status codes to retry on
|
|
87
|
+
backoff_factor: Multiplier for exponential backoff
|
|
88
|
+
operation_name: Name of the operation for logging
|
|
89
|
+
**kwargs: Keyword arguments for the function
|
|
90
|
+
|
|
91
|
+
Returns:
|
|
92
|
+
The result of the function call
|
|
93
|
+
|
|
94
|
+
Raises:
|
|
95
|
+
The last exception if all retries are exhausted
|
|
96
|
+
"""
|
|
97
|
+
if retry_status_codes is None:
|
|
98
|
+
retry_status_codes = DEFAULT_RETRY_STATUS_CODES
|
|
99
|
+
|
|
100
|
+
operation = operation_name or func.__name__
|
|
101
|
+
last_error = None
|
|
102
|
+
|
|
103
|
+
for attempt in range(max_retries + 1):
|
|
104
|
+
try:
|
|
105
|
+
result = await func(*args, **kwargs)
|
|
106
|
+
return result
|
|
107
|
+
except Exception as e:
|
|
108
|
+
last_error = e
|
|
109
|
+
|
|
110
|
+
# Check if this is a retryable error
|
|
111
|
+
if not is_retryable_error(e, retry_status_codes):
|
|
112
|
+
# Not retryable, raise immediately
|
|
113
|
+
raise
|
|
114
|
+
|
|
115
|
+
# Don't retry if we've exhausted attempts
|
|
116
|
+
if attempt >= max_retries:
|
|
117
|
+
logger.debug(
|
|
118
|
+
"Operation '%s' failed after %d retries: %s",
|
|
119
|
+
operation,
|
|
120
|
+
max_retries,
|
|
121
|
+
e,
|
|
122
|
+
)
|
|
123
|
+
raise
|
|
124
|
+
|
|
125
|
+
# Calculate backoff delay (exponential backoff)
|
|
126
|
+
delay = retry_delay * (backoff_factor**attempt)
|
|
127
|
+
|
|
128
|
+
logger.warning(
|
|
129
|
+
"Operation '%s' failed with retryable error, "
|
|
130
|
+
"retrying in %.2f seconds (attempt %d/%d): %s",
|
|
131
|
+
operation,
|
|
132
|
+
delay,
|
|
133
|
+
attempt + 1,
|
|
134
|
+
max_retries,
|
|
135
|
+
e,
|
|
136
|
+
)
|
|
137
|
+
|
|
138
|
+
await asyncio.sleep(delay)
|
|
139
|
+
|
|
140
|
+
# This should never be reached, but just in case
|
|
141
|
+
if last_error:
|
|
142
|
+
raise last_error
|
|
143
|
+
raise RuntimeError(f"Unexpected retry loop exit for operation '{operation}'")
|
|
144
|
+
|
|
145
|
+
|
|
146
|
+
def with_retry(
|
|
147
|
+
max_retries: int = DEFAULT_MAX_RETRIES,
|
|
148
|
+
retry_delay: float = DEFAULT_RETRY_DELAY,
|
|
149
|
+
retry_status_codes: set[int] | None = None,
|
|
150
|
+
backoff_factor: float = DEFAULT_BACKOFF_FACTOR,
|
|
151
|
+
) -> Callable[[Callable[..., Any]], Callable[..., Any]]:
|
|
152
|
+
"""
|
|
153
|
+
Decorator to add retry logic to async methods.
|
|
154
|
+
|
|
155
|
+
Usage:
|
|
156
|
+
@with_retry(max_retries=3)
|
|
157
|
+
async def my_method(self, ...):
|
|
158
|
+
...
|
|
159
|
+
|
|
160
|
+
Args:
|
|
161
|
+
max_retries: Maximum number of retry attempts
|
|
162
|
+
retry_delay: Initial delay between retries
|
|
163
|
+
retry_status_codes: HTTP status codes to retry on
|
|
164
|
+
backoff_factor: Multiplier for exponential backoff
|
|
165
|
+
|
|
166
|
+
Returns:
|
|
167
|
+
Decorated function with retry logic
|
|
168
|
+
"""
|
|
169
|
+
|
|
170
|
+
def decorator(func: Callable[..., Any]) -> Callable[..., Any]:
|
|
171
|
+
@wraps(func)
|
|
172
|
+
async def wrapper(*args: Any, **kwargs: Any) -> Any:
|
|
173
|
+
return await retry_with_backoff(
|
|
174
|
+
func,
|
|
175
|
+
*args,
|
|
176
|
+
max_retries=max_retries,
|
|
177
|
+
retry_delay=retry_delay,
|
|
178
|
+
retry_status_codes=retry_status_codes,
|
|
179
|
+
backoff_factor=backoff_factor,
|
|
180
|
+
operation_name=func.__name__,
|
|
181
|
+
**kwargs,
|
|
182
|
+
)
|
|
183
|
+
|
|
184
|
+
return wrapper
|
|
185
|
+
|
|
186
|
+
return decorator
|
|
@@ -4,6 +4,7 @@ from __future__ import annotations
|
|
|
4
4
|
|
|
5
5
|
import asyncio
|
|
6
6
|
import logging
|
|
7
|
+
import multiprocessing
|
|
7
8
|
import os
|
|
8
9
|
import traceback
|
|
9
10
|
from concurrent.futures import ProcessPoolExecutor, as_completed
|
|
@@ -191,18 +192,15 @@ def _process_worker(
|
|
|
191
192
|
# Run the async batch processing
|
|
192
193
|
results = loop.run_until_complete(process_batch())
|
|
193
194
|
|
|
194
|
-
#
|
|
195
|
-
#
|
|
196
|
-
# 1. The trace context's __exit__ already called _update_task_status_sync (blocking)
|
|
197
|
-
# 2. But spans are buffered in BatchSpanProcessor and need explicit flush
|
|
195
|
+
# Ensure telemetry is fully sent before process exits
|
|
196
|
+
# Spans are buffered in BatchSpanProcessor and need explicit flush
|
|
198
197
|
|
|
199
198
|
from opentelemetry import trace as otel_trace
|
|
200
199
|
|
|
201
200
|
provider = otel_trace.get_tracer_provider()
|
|
202
201
|
if provider and hasattr(provider, "force_flush"):
|
|
203
|
-
#
|
|
204
|
-
|
|
205
|
-
success = provider.force_flush(timeout_millis=5000) # 5 second timeout # type: ignore
|
|
202
|
+
# Flush of buffered spans
|
|
203
|
+
success = provider.force_flush(timeout_millis=2000) # type: ignore[arg-type]
|
|
206
204
|
if not success:
|
|
207
205
|
logger.warning("Worker %s: Telemetry flush timed out", worker_id)
|
|
208
206
|
|
|
@@ -230,6 +228,18 @@ def _process_worker(
|
|
|
230
228
|
logger.error("Worker %s batch processing failed: %s", worker_id, e)
|
|
231
229
|
return [(idx, {"error": str(e), "isError": True}) for idx, _ in task_batch]
|
|
232
230
|
finally:
|
|
231
|
+
try:
|
|
232
|
+
from opentelemetry import trace as otel_trace
|
|
233
|
+
|
|
234
|
+
provider = otel_trace.get_tracer_provider()
|
|
235
|
+
if provider and hasattr(provider, "force_flush"):
|
|
236
|
+
# Flush buffered spans with reasonable timeout
|
|
237
|
+
success = provider.force_flush(timeout_millis=2000) # type: ignore[arg-type]
|
|
238
|
+
if not success:
|
|
239
|
+
logger.warning("Worker %s: Telemetry flush timed out", worker_id)
|
|
240
|
+
except Exception as e:
|
|
241
|
+
logger.warning("Worker %s: Failed to flush telemetry: %s", worker_id, e)
|
|
242
|
+
|
|
233
243
|
# Clean up the event loop
|
|
234
244
|
try:
|
|
235
245
|
loop.close()
|
|
@@ -342,6 +352,8 @@ async def run_dataset_parallel_manual(
|
|
|
342
352
|
for task_dict in task_dicts:
|
|
343
353
|
if "system_prompt" not in task_dict:
|
|
344
354
|
task_dict["system_prompt"] = custom_system_prompt
|
|
355
|
+
else:
|
|
356
|
+
task_dict["system_prompt"] += "\n" + custom_system_prompt
|
|
345
357
|
|
|
346
358
|
# Prepare job metadata
|
|
347
359
|
job_metadata = metadata or {}
|
|
@@ -366,6 +378,8 @@ async def run_dataset_parallel_manual(
|
|
|
366
378
|
except Exception:
|
|
367
379
|
logger.warning("Failed to extract dataset verification info")
|
|
368
380
|
|
|
381
|
+
# task_dicts = task_dicts[:10]
|
|
382
|
+
|
|
369
383
|
# Create job context
|
|
370
384
|
with hud.job(name, metadata=job_metadata, dataset_link=dataset_link) as job_obj:
|
|
371
385
|
# Prepare agent class info for pickling
|
|
@@ -410,7 +424,10 @@ async def run_dataset_parallel_manual(
|
|
|
410
424
|
)
|
|
411
425
|
|
|
412
426
|
# Process batches in parallel using ProcessPoolExecutor
|
|
413
|
-
executor = ProcessPoolExecutor(
|
|
427
|
+
executor = ProcessPoolExecutor(
|
|
428
|
+
max_workers=max_workers,
|
|
429
|
+
mp_context=multiprocessing.get_context("spawn"),
|
|
430
|
+
)
|
|
414
431
|
try:
|
|
415
432
|
# Submit all batches to workers
|
|
416
433
|
future_to_batch = {
|
hud/otel/config.py
CHANGED
|
@@ -97,7 +97,16 @@ def configure_telemetry(
|
|
|
97
97
|
exporter = HudSpanExporter(
|
|
98
98
|
telemetry_url=settings.hud_telemetry_url, api_key=settings.api_key
|
|
99
99
|
)
|
|
100
|
-
|
|
100
|
+
# Export more continuously to avoid big end flushes
|
|
101
|
+
provider.add_span_processor(
|
|
102
|
+
BatchSpanProcessor(
|
|
103
|
+
exporter,
|
|
104
|
+
schedule_delay_millis=1000,
|
|
105
|
+
max_queue_size=8192,
|
|
106
|
+
max_export_batch_size=256,
|
|
107
|
+
export_timeout_millis=30000,
|
|
108
|
+
)
|
|
109
|
+
)
|
|
101
110
|
elif settings.telemetry_enabled and not settings.api_key and not enable_otlp:
|
|
102
111
|
# Error if no exporters are configured
|
|
103
112
|
raise ValueError(
|
|
@@ -127,7 +136,15 @@ def configure_telemetry(
|
|
|
127
136
|
otlp_config["headers"] = otlp_headers
|
|
128
137
|
|
|
129
138
|
otlp_exporter = OTLPSpanExporter(**otlp_config)
|
|
130
|
-
provider.add_span_processor(
|
|
139
|
+
provider.add_span_processor(
|
|
140
|
+
BatchSpanProcessor(
|
|
141
|
+
otlp_exporter,
|
|
142
|
+
schedule_delay_millis=1000,
|
|
143
|
+
max_queue_size=8192,
|
|
144
|
+
max_export_batch_size=256,
|
|
145
|
+
export_timeout_millis=30000,
|
|
146
|
+
)
|
|
147
|
+
)
|
|
131
148
|
logger.info("OTLP HTTP exporter enabled - endpoint: %s", otlp_config["endpoint"])
|
|
132
149
|
except ImportError:
|
|
133
150
|
logger.warning(
|
hud/utils/tests/test_version.py
CHANGED
hud/version.py
CHANGED
|
@@ -2,10 +2,10 @@ hud/__init__.py,sha256=KU7G-_Mj6Mjf7trXA6X0ufN6QUmqhVi19NKbnNIvD74,532
|
|
|
2
2
|
hud/__main__.py,sha256=YR8Dq8OhINOsVfQ55PmRXXg4fEK84Rt_-rMtJ5rvhWo,145
|
|
3
3
|
hud/settings.py,sha256=bgq_zstNGlan7UDZOiElY8Aw5ZU4xL7Ds5HP_Xzph1A,2535
|
|
4
4
|
hud/types.py,sha256=jta4Hgj_rBdIMcf7mz0tsdgfA1pssp327bxcT6Mfp00,6107
|
|
5
|
-
hud/version.py,sha256=
|
|
5
|
+
hud/version.py,sha256=9XXnK9XmJJQ0fZ1VqYaKnR6Lp3ekC6Wp0RRaPTJwdJ8,105
|
|
6
6
|
hud/agents/__init__.py,sha256=UoIkljWdbq4bM0LD-mSaw6w826EqdEjOk7r6glNYwYQ,286
|
|
7
|
-
hud/agents/base.py,sha256=
|
|
8
|
-
hud/agents/claude.py,sha256=
|
|
7
|
+
hud/agents/base.py,sha256=Ui4FC4FPCTrJ6UZBObuY2mzA99veKzbWFCsFFSVOIJc,32406
|
|
8
|
+
hud/agents/claude.py,sha256=yTVbSNMwNjF9qZAnwDSsgrscaenJpMGnGdW_4rFW_pY,15603
|
|
9
9
|
hud/agents/grounded_openai.py,sha256=U-FHjB2Nh1_o0gmlxY5F17lWJ3oHsNRIB2a7z-IKB64,11231
|
|
10
10
|
hud/agents/langchain.py,sha256=1EgCy8jfjunsWxlPC5XfvfLS6_XZVrIF1ZjtHcrvhYw,9584
|
|
11
11
|
hud/agents/openai.py,sha256=ovARRWNuHqKkZ2Q_OCYSVCIZckrh8XY2jUB2p2x1m88,14259
|
|
@@ -70,18 +70,21 @@ hud/clients/README.md,sha256=XNE3mch95ozDgVqfwCGcrhlHY9CwT1GKfNANNboowto,3826
|
|
|
70
70
|
hud/clients/__init__.py,sha256=N5M_gZv4nP7dLRwpAiaqqaxyaLieGW6397FszeG7JGw,364
|
|
71
71
|
hud/clients/base.py,sha256=vt9mRMZwZg2DtVxQmccR_VZZGamXhx3dvlFJPulbOd8,14131
|
|
72
72
|
hud/clients/fastmcp.py,sha256=KJGi8bmds0Q6rHnkTXb_Hw9ZqWmSo0OfjW05SSuyEJU,9182
|
|
73
|
-
hud/clients/mcp_use.py,sha256=
|
|
73
|
+
hud/clients/mcp_use.py,sha256=TsIIItqmt93hcdW1hj2HlMPJppVo4JDoUuCqQ-ZBoBI,13521
|
|
74
74
|
hud/clients/tests/__init__.py,sha256=sKOtJFFa4mDIXh1U6O8ZUHjigE8CiRMQ2PzJTIBZuVE,33
|
|
75
75
|
hud/clients/tests/test_client_integration.py,sha256=kohU6jfCNfwSnAushHeB1_CmDlRfQc7VBL0GEdJYSeI,4198
|
|
76
76
|
hud/clients/tests/test_fastmcp.py,sha256=4q3TzDjuieTZa89taiNJIrzbUncNkYOG4MaubypA21k,13030
|
|
77
|
+
hud/clients/tests/test_mcp_use_retry.py,sha256=v9vwKa9nrW0EYiWzkq_vpyju40NaOm3SBiJh8VJYX2E,13103
|
|
77
78
|
hud/clients/tests/test_protocol.py,sha256=aK4CS4g3j1D5jPo83ykzZuHUvcZFAulYtIq9T9Hb_fQ,6640
|
|
78
|
-
hud/clients/utils/__init__.py,sha256
|
|
79
|
+
hud/clients/utils/__init__.py,sha256=-zZjcKIWGj2tXbVDOW45UgoGghhLJzFQVZ6miKenuA4,595
|
|
80
|
+
hud/clients/utils/mcp_use_retry.py,sha256=sBCjtgnAXiXASjzFF_AtBEtmizay0Fi0nPL6sVoooeI,6675
|
|
81
|
+
hud/clients/utils/retry.py,sha256=mMs2T_mAlb8AYhSqMR4AmCw7838gqCC4mdG3zjMAYM4,5744
|
|
79
82
|
hud/clients/utils/retry_transport.py,sha256=Rsq25eiKKt_pM1bas78QEZvO0illK97X_3opmaS3A3w,6809
|
|
80
83
|
hud/datasets/__init__.py,sha256=74T4mrjELKtE04XkZKwU8QAJcg2wjqXLqRO9s4GlPr4,678
|
|
81
84
|
hud/datasets/task.py,sha256=bDVLy4EOBfjiU4i8hrFqcQ3dc077vzVNRXIbyNXFnp8,3916
|
|
82
85
|
hud/datasets/utils.py,sha256=3hKvZTkZuCRkTeITB86nNdA1dtHZAqFfAdSPMtcTUhs,4275
|
|
83
86
|
hud/datasets/execution/__init__.py,sha256=4m1AEpMQaUSJFVN_iAXvY6zFttVgZKwE6oQtC0Rrk7U,330
|
|
84
|
-
hud/datasets/execution/parallel.py,sha256=
|
|
87
|
+
hud/datasets/execution/parallel.py,sha256=ZEMMmH018QXOL1QoD_AyspQkGs_41F-GB_mTgdXh6J4,25780
|
|
85
88
|
hud/datasets/execution/runner.py,sha256=EEvb90vvAqFXXx8NyVKLfK5p-gtsfJqiFJAoqSjyfXg,4695
|
|
86
89
|
hud/misc/__init__.py,sha256=m_pprQQ-G-Y0Sd0NEiR8MtAMbElnuFZ2OWT8TXrw7c4,43
|
|
87
90
|
hud/misc/claude_plays_pokemon.py,sha256=IthAkjDVr2Q-GNvX-QLJyMzN7-0pHqqJbagGNv2m7yo,10453
|
|
@@ -92,7 +95,7 @@ hud/native/tests/test_comparator.py,sha256=x1gFLXEDRIiJhH8tg5Rd3ptY-modYaHgSm6-h
|
|
|
92
95
|
hud/native/tests/test_native_init.py,sha256=Is8fcDZimp1Oi2Bv4zavqM3KrpS86_DUXFnqc0AsCH0,2736
|
|
93
96
|
hud/otel/__init__.py,sha256=ii17ayoWiS5vAhA7UAmZ8TkmP52gs2pWyHsD46-uYbE,1003
|
|
94
97
|
hud/otel/collector.py,sha256=jLZymZ8r7xt2VDuWexfbnT7PY1-0aiyLMgjBy8KDY1M,4497
|
|
95
|
-
hud/otel/config.py,sha256=
|
|
98
|
+
hud/otel/config.py,sha256=BPEyINdlxjsTj-IOO7F_JXAqrbMEWNMuhi2GDGqB5Kk,6785
|
|
96
99
|
hud/otel/context.py,sha256=C9MvO99cRSNNDEDC7ehO3eoTPnb6J7AemUYvEp57yEU,17774
|
|
97
100
|
hud/otel/exporters.py,sha256=RLAjWa8b2DJEU21740Idq4fmeIuabLEqGGUspcFDcH4,14331
|
|
98
101
|
hud/otel/instrumentation.py,sha256=bBWxQ5vkiP-2WZ_2ztR5LAw0Wu02SZNKUgs5f-rU-ro,3734
|
|
@@ -174,10 +177,10 @@ hud/utils/tests/test_init.py,sha256=2QLQSGgyP9wJhOvPCusm_zjJad0qApOZi1BXpxcdHXQ,
|
|
|
174
177
|
hud/utils/tests/test_mcp.py,sha256=0pUa16mL-bqbZDXp5NHBnt1gO5o10BOg7zTMHZ1DNPM,4023
|
|
175
178
|
hud/utils/tests/test_progress.py,sha256=QSF7Kpi03Ff_l3mAeqW9qs1nhK50j9vBiSobZq7T4f4,7394
|
|
176
179
|
hud/utils/tests/test_telemetry.py,sha256=5jl7bEx8C8b-FfFUko5pf4UY-mPOR-9HaeL98dGtVHM,2781
|
|
177
|
-
hud/utils/tests/test_version.py,sha256=
|
|
180
|
+
hud/utils/tests/test_version.py,sha256=hizll2edGZxX6BgcLfQx2WKR2sU8TTyUdgkCY0hpMMQ,160
|
|
178
181
|
hud/py.typed,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
179
|
-
hud_python-0.4.
|
|
180
|
-
hud_python-0.4.
|
|
181
|
-
hud_python-0.4.
|
|
182
|
-
hud_python-0.4.
|
|
183
|
-
hud_python-0.4.
|
|
182
|
+
hud_python-0.4.25.dist-info/METADATA,sha256=cV33nGpP96-h1DTaoqkI19o_9ow69_tiEOd6a3yL-Wo,20239
|
|
183
|
+
hud_python-0.4.25.dist-info/WHEEL,sha256=qtCwoSJWgHk21S1Kb4ihdzI2rlJ1ZKaIurTj_ngOhyQ,87
|
|
184
|
+
hud_python-0.4.25.dist-info/entry_points.txt,sha256=jJbodNFg1m0-CDofe5AHvB4zKBq7sSdP97-ohaQ3ae4,63
|
|
185
|
+
hud_python-0.4.25.dist-info/licenses/LICENSE,sha256=yIzBheVUf86FC1bztAcr7RYWWNxyd3B-UJQ3uddg1HA,1078
|
|
186
|
+
hud_python-0.4.25.dist-info/RECORD,,
|
|
File without changes
|
|
File without changes
|
|
File without changes
|