hud-python 0.4.24__py3-none-any.whl → 0.4.26__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of hud-python might be problematic. Click here for more details.
- hud/agents/base.py +36 -17
- hud/agents/misc/response_agent.py +2 -1
- hud/clients/mcp_use.py +13 -19
- hud/clients/tests/test_mcp_use_retry.py +378 -0
- hud/clients/utils/mcp_use_retry.py +201 -0
- hud/datasets/execution/parallel.py +56 -64
- hud/otel/config.py +19 -2
- hud/utils/tests/test_version.py +1 -1
- hud/version.py +1 -1
- {hud_python-0.4.24.dist-info → hud_python-0.4.26.dist-info}/METADATA +1 -1
- {hud_python-0.4.24.dist-info → hud_python-0.4.26.dist-info}/RECORD +14 -12
- {hud_python-0.4.24.dist-info → hud_python-0.4.26.dist-info}/WHEEL +0 -0
- {hud_python-0.4.24.dist-info → hud_python-0.4.26.dist-info}/entry_points.txt +0 -0
- {hud_python-0.4.24.dist-info → hud_python-0.4.26.dist-info}/licenses/LICENSE +0 -0
hud/agents/base.py
CHANGED
|
@@ -556,26 +556,11 @@ class MCPAgent(ABC):
|
|
|
556
556
|
|
|
557
557
|
all_tools = await self.mcp_client.list_tools()
|
|
558
558
|
|
|
559
|
-
# Filter tools
|
|
560
|
-
self._available_tools = []
|
|
561
|
-
self._tool_map = {}
|
|
562
559
|
|
|
563
|
-
# Track response tools by server
|
|
564
560
|
response_tools_by_server: dict[str, str] = {} # server_name -> tool_name
|
|
565
|
-
|
|
566
561
|
for tool in all_tools:
|
|
567
|
-
# Check if tool should be included
|
|
568
|
-
if self.allowed_tools and tool.name not in self.allowed_tools:
|
|
569
|
-
continue
|
|
570
|
-
if tool.name in self.disallowed_tools:
|
|
571
|
-
continue
|
|
572
|
-
|
|
573
|
-
self._available_tools.append(tool)
|
|
574
|
-
# Simplified mapping - just tool name to tool
|
|
575
|
-
self._tool_map[tool.name] = tool
|
|
576
|
-
|
|
577
|
-
# Track response tools
|
|
578
562
|
if "response" in tool.name or tool.name == "response":
|
|
563
|
+
self.console.debug(f"Found response tool: '{tool.name}'")
|
|
579
564
|
# Extract server name from tool name (e.g., "grader_response" -> "grader")
|
|
580
565
|
if "_" in tool.name:
|
|
581
566
|
server_name = tool.name.split("_", 1)[0]
|
|
@@ -583,27 +568,61 @@ class MCPAgent(ABC):
|
|
|
583
568
|
else:
|
|
584
569
|
response_tools_by_server["_default"] = tool.name
|
|
585
570
|
|
|
586
|
-
#
|
|
571
|
+
# Add response tool to lifecycle tools BEFORE filtering
|
|
587
572
|
if response_tools_by_server and hasattr(self.mcp_client, "mcp_config"):
|
|
588
573
|
# Get server names in order from mcp_config
|
|
589
574
|
server_names = list(self.mcp_client.mcp_config.keys())
|
|
575
|
+
self.console.debug(f"Server names: {server_names}")
|
|
590
576
|
|
|
591
577
|
# Try to find response tool from last server first
|
|
592
578
|
response_tool_name = None
|
|
593
579
|
for server_name in reversed(server_names):
|
|
594
580
|
if server_name in response_tools_by_server:
|
|
595
581
|
response_tool_name = response_tools_by_server[server_name]
|
|
582
|
+
self.console.debug(f"Found response tool '{response_tool_name}' from server '{server_name}'")
|
|
596
583
|
break
|
|
597
584
|
|
|
598
585
|
# Fallback to any response tool
|
|
599
586
|
if not response_tool_name and response_tools_by_server:
|
|
600
587
|
response_tool_name = next(iter(response_tools_by_server.values()))
|
|
588
|
+
self.console.debug(f"Using fallback response tool '{response_tool_name}'")
|
|
601
589
|
|
|
602
590
|
# Add to lifecycle tools if found
|
|
603
591
|
if response_tool_name and response_tool_name not in self.lifecycle_tools:
|
|
604
592
|
self.console.debug(f"Auto-detected '{response_tool_name}' tool as a lifecycle tool")
|
|
605
593
|
self.response_tool_name = response_tool_name
|
|
606
594
|
self.lifecycle_tools.append(response_tool_name)
|
|
595
|
+
elif response_tool_name:
|
|
596
|
+
self.console.debug(f"Response tool '{response_tool_name}' already in lifecycle_tools")
|
|
597
|
+
self.response_tool_name = response_tool_name
|
|
598
|
+
else:
|
|
599
|
+
self.console.debug(f"No response tools found or no mcp_config")
|
|
600
|
+
|
|
601
|
+
# Filter tools
|
|
602
|
+
self._available_tools = []
|
|
603
|
+
self._tool_map = {}
|
|
604
|
+
|
|
605
|
+
self.console.debug(f"All tools: {[t.name for t in all_tools]}")
|
|
606
|
+
self.console.debug(f"Allowed tools: {self.allowed_tools}")
|
|
607
|
+
self.console.debug(f"Disallowed tools: {self.disallowed_tools}")
|
|
608
|
+
self.console.debug(f"Lifecycle tools: {self.lifecycle_tools}")
|
|
609
|
+
|
|
610
|
+
for tool in all_tools:
|
|
611
|
+
# Lifecycle tools (setup, evaluate, response) should always be included
|
|
612
|
+
is_lifecycle = tool.name in self.lifecycle_tools
|
|
613
|
+
|
|
614
|
+
# Check if tool should be included
|
|
615
|
+
if not is_lifecycle:
|
|
616
|
+
if self.allowed_tools and tool.name not in self.allowed_tools:
|
|
617
|
+
self.console.debug(f"Skipping tool '{tool.name}' - not in allowed_tools")
|
|
618
|
+
continue
|
|
619
|
+
if tool.name in self.disallowed_tools:
|
|
620
|
+
self.console.debug(f"Skipping tool '{tool.name}' - in disallowed_tools")
|
|
621
|
+
continue
|
|
622
|
+
|
|
623
|
+
self.console.debug(f"Adding tool '{tool.name}' to available tools (lifecycle={is_lifecycle})")
|
|
624
|
+
self._available_tools.append(tool)
|
|
625
|
+
self._tool_map[tool.name] = tool
|
|
607
626
|
|
|
608
627
|
# Check if all required tools are available
|
|
609
628
|
if self.required_tools:
|
|
@@ -34,7 +34,8 @@ class ResponseAgent:
|
|
|
34
34
|
|
|
35
35
|
- STOP: If the agent indicates it has successfully completed a task, even if phrased as a question
|
|
36
36
|
like "I have entered the right values into this form. Would you like me to do anything else?"
|
|
37
|
-
or "Here is the website. Is there any other information you need?"
|
|
37
|
+
or "Here is the website. Is there any other information you need?" or if the agent has
|
|
38
|
+
strongly determined it wants to stop the task.
|
|
38
39
|
|
|
39
40
|
- CONTINUE: If the agent is asking for clarification before proceeding with a task
|
|
40
41
|
like "I'm about to clear cookies from this website. Would you like me to proceed?"
|
hud/clients/mcp_use.py
CHANGED
|
@@ -15,7 +15,7 @@ from hud.types import MCPToolCall, MCPToolResult
|
|
|
15
15
|
from hud.version import __version__ as hud_version
|
|
16
16
|
|
|
17
17
|
from .base import BaseHUDClient
|
|
18
|
-
from .utils.
|
|
18
|
+
from .utils.mcp_use_retry import patch_all_sessions
|
|
19
19
|
|
|
20
20
|
logger = logging.getLogger(__name__)
|
|
21
21
|
|
|
@@ -64,6 +64,10 @@ class MCPUseHUDClient(BaseHUDClient):
|
|
|
64
64
|
self._sessions = await self._client.create_all_sessions()
|
|
65
65
|
logger.info("Created %d MCP sessions", len(self._sessions))
|
|
66
66
|
|
|
67
|
+
# Patch all sessions with retry logic
|
|
68
|
+
patch_all_sessions(self._sessions)
|
|
69
|
+
logger.debug("Applied retry logic to all MCP sessions")
|
|
70
|
+
|
|
67
71
|
# Configure validation for all sessions based on client setting
|
|
68
72
|
try:
|
|
69
73
|
for session in self._sessions.values():
|
|
@@ -128,11 +132,8 @@ class MCPUseHUDClient(BaseHUDClient):
|
|
|
128
132
|
logger.warning("Client session not initialized for %s", server_name)
|
|
129
133
|
continue
|
|
130
134
|
|
|
131
|
-
# List tools
|
|
132
|
-
tools_result = await
|
|
133
|
-
session.connector.client_session.list_tools,
|
|
134
|
-
operation_name=f"list_tools_{server_name}",
|
|
135
|
-
)
|
|
135
|
+
# List tools (retry logic is handled at transport level)
|
|
136
|
+
tools_result = await session.connector.client_session.list_tools()
|
|
136
137
|
|
|
137
138
|
logger.info(
|
|
138
139
|
"Discovered %d tools from '%s': %s",
|
|
@@ -206,12 +207,10 @@ class MCPUseHUDClient(BaseHUDClient):
|
|
|
206
207
|
if session.connector.client_session is None:
|
|
207
208
|
raise ValueError(f"Client session not initialized for {server_name}")
|
|
208
209
|
|
|
209
|
-
# Call tool
|
|
210
|
-
result = await
|
|
211
|
-
session.connector.client_session.call_tool,
|
|
210
|
+
# Call tool (retry logic is handled at transport level)
|
|
211
|
+
result = await session.connector.client_session.call_tool(
|
|
212
212
|
name=original_tool.name, # Use original tool name, not prefixed
|
|
213
213
|
arguments=tool_call.arguments or {},
|
|
214
|
-
operation_name=f"call_tool_{original_tool.name}",
|
|
215
214
|
)
|
|
216
215
|
|
|
217
216
|
if self.verbose:
|
|
@@ -239,10 +238,8 @@ class MCPUseHUDClient(BaseHUDClient):
|
|
|
239
238
|
continue
|
|
240
239
|
# Prefer standard method name if available
|
|
241
240
|
if hasattr(session.connector.client_session, "list_resources"):
|
|
242
|
-
resources
|
|
243
|
-
|
|
244
|
-
operation_name=f"list_resources_{server_name}",
|
|
245
|
-
)
|
|
241
|
+
# List resources (retry logic is handled at transport level)
|
|
242
|
+
resources = await session.connector.client_session.list_resources()
|
|
246
243
|
else:
|
|
247
244
|
# If the client doesn't support resource listing, skip
|
|
248
245
|
continue
|
|
@@ -272,11 +269,8 @@ class MCPUseHUDClient(BaseHUDClient):
|
|
|
272
269
|
resource_uri = AnyUrl(uri) if isinstance(uri, str) else uri
|
|
273
270
|
# Prefer read_resource; fall back to list_resources if needed
|
|
274
271
|
if hasattr(session.connector.client_session, "read_resource"):
|
|
275
|
-
|
|
276
|
-
|
|
277
|
-
resource_uri,
|
|
278
|
-
operation_name=f"read_resource_{server_name}",
|
|
279
|
-
)
|
|
272
|
+
# Read resource (retry logic is handled at transport level)
|
|
273
|
+
result = await session.connector.client_session.read_resource(resource_uri)
|
|
280
274
|
else:
|
|
281
275
|
# Fallback path for older clients: not supported in strict typing
|
|
282
276
|
raise AttributeError("read_resource not available")
|
|
@@ -0,0 +1,378 @@
|
|
|
1
|
+
"""Tests for MCP-use client retry functionality."""
|
|
2
|
+
|
|
3
|
+
from __future__ import annotations
|
|
4
|
+
|
|
5
|
+
from unittest.mock import AsyncMock, Mock, patch
|
|
6
|
+
|
|
7
|
+
import pytest
|
|
8
|
+
import requests
|
|
9
|
+
from mcp import types
|
|
10
|
+
|
|
11
|
+
from hud.clients.mcp_use import MCPUseHUDClient
|
|
12
|
+
from hud.clients.utils.mcp_use_retry import (
|
|
13
|
+
create_async_retry_wrapper,
|
|
14
|
+
create_retry_session,
|
|
15
|
+
patch_all_sessions,
|
|
16
|
+
patch_mcp_session_http_client,
|
|
17
|
+
)
|
|
18
|
+
from hud.types import MCPToolCall
|
|
19
|
+
|
|
20
|
+
|
|
21
|
+
class TestRetrySession:
|
|
22
|
+
"""Test the retry session creation."""
|
|
23
|
+
|
|
24
|
+
def test_create_retry_session(self):
|
|
25
|
+
"""Test that retry session is configured correctly."""
|
|
26
|
+
session = create_retry_session(
|
|
27
|
+
max_retries=5,
|
|
28
|
+
retry_status_codes=(500, 502, 503, 504),
|
|
29
|
+
retry_delay=0.5,
|
|
30
|
+
backoff_factor=2.0,
|
|
31
|
+
)
|
|
32
|
+
|
|
33
|
+
# Check that session has adapters mounted
|
|
34
|
+
assert "http://" in session.adapters
|
|
35
|
+
assert "https://" in session.adapters
|
|
36
|
+
|
|
37
|
+
# Check adapter configuration
|
|
38
|
+
adapter = session.adapters["http://"]
|
|
39
|
+
assert adapter.max_retries.total == 5
|
|
40
|
+
assert 500 in adapter.max_retries.status_forcelist
|
|
41
|
+
assert 502 in adapter.max_retries.status_forcelist
|
|
42
|
+
assert adapter.max_retries.backoff_factor == 2.0
|
|
43
|
+
|
|
44
|
+
def test_retry_session_default_values(self):
|
|
45
|
+
"""Test retry session with default values."""
|
|
46
|
+
session = create_retry_session()
|
|
47
|
+
|
|
48
|
+
adapter = session.adapters["https://"]
|
|
49
|
+
assert adapter.max_retries.total == 3
|
|
50
|
+
assert 502 in adapter.max_retries.status_forcelist
|
|
51
|
+
assert 503 in adapter.max_retries.status_forcelist
|
|
52
|
+
assert 504 in adapter.max_retries.status_forcelist
|
|
53
|
+
|
|
54
|
+
|
|
55
|
+
class TestAsyncRetryWrapper:
|
|
56
|
+
"""Test the async retry wrapper functionality."""
|
|
57
|
+
|
|
58
|
+
@pytest.mark.asyncio
|
|
59
|
+
async def test_retry_on_error_status_codes(self):
|
|
60
|
+
"""Test that async wrapper retries on specific status codes."""
|
|
61
|
+
call_count = 0
|
|
62
|
+
|
|
63
|
+
async def mock_func(*args, **kwargs):
|
|
64
|
+
nonlocal call_count
|
|
65
|
+
call_count += 1
|
|
66
|
+
|
|
67
|
+
# First two calls fail, third succeeds
|
|
68
|
+
if call_count < 3:
|
|
69
|
+
result = Mock()
|
|
70
|
+
result.status_code = 503 # Service unavailable
|
|
71
|
+
return result
|
|
72
|
+
|
|
73
|
+
result = Mock()
|
|
74
|
+
result.status_code = 200
|
|
75
|
+
return result
|
|
76
|
+
|
|
77
|
+
wrapped = create_async_retry_wrapper(
|
|
78
|
+
mock_func,
|
|
79
|
+
max_retries=3,
|
|
80
|
+
retry_status_codes=(503,),
|
|
81
|
+
retry_delay=0.01, # Short delay for testing
|
|
82
|
+
)
|
|
83
|
+
|
|
84
|
+
result = await wrapped()
|
|
85
|
+
assert call_count == 3
|
|
86
|
+
assert result.status_code == 200
|
|
87
|
+
|
|
88
|
+
@pytest.mark.asyncio
|
|
89
|
+
async def test_retry_on_exception(self):
|
|
90
|
+
"""Test that async wrapper retries on exceptions with status codes."""
|
|
91
|
+
call_count = 0
|
|
92
|
+
|
|
93
|
+
async def mock_func(*args, **kwargs):
|
|
94
|
+
nonlocal call_count
|
|
95
|
+
call_count += 1
|
|
96
|
+
|
|
97
|
+
if call_count < 3:
|
|
98
|
+
raise Exception("HTTP 503 Service Unavailable")
|
|
99
|
+
|
|
100
|
+
return Mock(status_code=200)
|
|
101
|
+
|
|
102
|
+
wrapped = create_async_retry_wrapper(
|
|
103
|
+
mock_func,
|
|
104
|
+
max_retries=3,
|
|
105
|
+
retry_status_codes=(503,),
|
|
106
|
+
retry_delay=0.01,
|
|
107
|
+
)
|
|
108
|
+
|
|
109
|
+
result = await wrapped()
|
|
110
|
+
assert call_count == 3
|
|
111
|
+
assert result.status_code == 200
|
|
112
|
+
|
|
113
|
+
@pytest.mark.asyncio
|
|
114
|
+
async def test_no_retry_on_success(self):
|
|
115
|
+
"""Test that successful calls don't trigger retries."""
|
|
116
|
+
call_count = 0
|
|
117
|
+
|
|
118
|
+
async def mock_func(*args, **kwargs):
|
|
119
|
+
nonlocal call_count
|
|
120
|
+
call_count += 1
|
|
121
|
+
return Mock(status_code=200)
|
|
122
|
+
|
|
123
|
+
wrapped = create_async_retry_wrapper(mock_func)
|
|
124
|
+
|
|
125
|
+
result = await wrapped()
|
|
126
|
+
assert call_count == 1
|
|
127
|
+
assert result.status_code == 200
|
|
128
|
+
|
|
129
|
+
@pytest.mark.asyncio
|
|
130
|
+
async def test_max_retries_exceeded(self):
|
|
131
|
+
"""Test that retries stop after max attempts."""
|
|
132
|
+
call_count = 0
|
|
133
|
+
|
|
134
|
+
async def mock_func(*args, **kwargs):
|
|
135
|
+
nonlocal call_count
|
|
136
|
+
call_count += 1
|
|
137
|
+
raise Exception("HTTP 503 Service Unavailable")
|
|
138
|
+
|
|
139
|
+
wrapped = create_async_retry_wrapper(
|
|
140
|
+
mock_func,
|
|
141
|
+
max_retries=2,
|
|
142
|
+
retry_status_codes=(503,),
|
|
143
|
+
retry_delay=0.01,
|
|
144
|
+
)
|
|
145
|
+
|
|
146
|
+
with pytest.raises(Exception) as exc_info:
|
|
147
|
+
await wrapped()
|
|
148
|
+
|
|
149
|
+
assert "503" in str(exc_info.value)
|
|
150
|
+
assert call_count == 3 # Initial + 2 retries
|
|
151
|
+
|
|
152
|
+
|
|
153
|
+
class TestSessionPatching:
|
|
154
|
+
"""Test the session patching functionality."""
|
|
155
|
+
|
|
156
|
+
def test_patch_sync_session(self):
|
|
157
|
+
"""Test patching a synchronous session."""
|
|
158
|
+
# Create mock session with connector
|
|
159
|
+
mock_session = Mock()
|
|
160
|
+
mock_session.connector = Mock()
|
|
161
|
+
mock_session.connector._connection_manager = Mock()
|
|
162
|
+
mock_session.connector._connection_manager._session = requests.Session()
|
|
163
|
+
|
|
164
|
+
# Patch the session
|
|
165
|
+
patch_mcp_session_http_client(mock_session)
|
|
166
|
+
|
|
167
|
+
# Verify the session was replaced with retry-enabled one
|
|
168
|
+
patched_session = mock_session.connector._connection_manager._session
|
|
169
|
+
assert "http://" in patched_session.adapters
|
|
170
|
+
assert "https://" in patched_session.adapters
|
|
171
|
+
|
|
172
|
+
# Check that it has retry configuration
|
|
173
|
+
adapter = patched_session.adapters["http://"]
|
|
174
|
+
assert hasattr(adapter, "max_retries")
|
|
175
|
+
|
|
176
|
+
@pytest.mark.asyncio
|
|
177
|
+
async def test_patch_async_session(self):
|
|
178
|
+
"""Test patching an async session."""
|
|
179
|
+
# Create mock async session
|
|
180
|
+
mock_session = Mock()
|
|
181
|
+
mock_session.connector = Mock()
|
|
182
|
+
mock_session.connector.client_session = Mock()
|
|
183
|
+
|
|
184
|
+
async def mock_send_request(*args, **kwargs):
|
|
185
|
+
return Mock(status_code=200)
|
|
186
|
+
|
|
187
|
+
mock_session.connector.client_session._send_request = mock_send_request
|
|
188
|
+
|
|
189
|
+
# Patch the session
|
|
190
|
+
patch_mcp_session_http_client(mock_session)
|
|
191
|
+
|
|
192
|
+
# Verify _send_request was wrapped
|
|
193
|
+
wrapped_func = mock_session.connector.client_session._send_request
|
|
194
|
+
assert wrapped_func != mock_send_request # Function was replaced
|
|
195
|
+
|
|
196
|
+
# Test that wrapped function still works
|
|
197
|
+
result = await wrapped_func()
|
|
198
|
+
assert result.status_code == 200
|
|
199
|
+
|
|
200
|
+
def test_patch_all_sessions(self):
|
|
201
|
+
"""Test patching multiple sessions."""
|
|
202
|
+
# Create mock sessions
|
|
203
|
+
session1 = Mock()
|
|
204
|
+
session1.connector = Mock()
|
|
205
|
+
session1.connector._connection_manager = Mock()
|
|
206
|
+
session1.connector._connection_manager.session = requests.Session()
|
|
207
|
+
|
|
208
|
+
session2 = Mock()
|
|
209
|
+
session2.connector = Mock()
|
|
210
|
+
session2.connector.client_session = Mock()
|
|
211
|
+
session2.connector.client_session._send_request = AsyncMock()
|
|
212
|
+
|
|
213
|
+
sessions = {"server1": session1, "server2": session2}
|
|
214
|
+
|
|
215
|
+
# Patch all sessions
|
|
216
|
+
patch_all_sessions(sessions)
|
|
217
|
+
|
|
218
|
+
# Verify both were patched
|
|
219
|
+
assert "http://" in session1.connector._connection_manager.session.adapters
|
|
220
|
+
assert session2.connector.client_session._send_request != AsyncMock
|
|
221
|
+
|
|
222
|
+
|
|
223
|
+
class TestMCPUseClientRetry:
|
|
224
|
+
"""Test retry functionality integrated into MCPUseHUDClient."""
|
|
225
|
+
|
|
226
|
+
@pytest.mark.asyncio
|
|
227
|
+
async def test_client_applies_retry_on_connect(self):
|
|
228
|
+
"""Test that MCPUseHUDClient applies retry logic during connection."""
|
|
229
|
+
config = {"test_server": {"url": "http://localhost:8080"}}
|
|
230
|
+
client = MCPUseHUDClient(config)
|
|
231
|
+
|
|
232
|
+
# Mock the MCPUseClient and session creation
|
|
233
|
+
with patch("hud.clients.mcp_use.MCPUseClient") as MockMCPUseClient:
|
|
234
|
+
mock_client = Mock()
|
|
235
|
+
MockMCPUseClient.from_dict.return_value = mock_client
|
|
236
|
+
|
|
237
|
+
# Create mock session
|
|
238
|
+
mock_session = Mock()
|
|
239
|
+
mock_session.connector = Mock()
|
|
240
|
+
mock_session.connector.client_session = Mock()
|
|
241
|
+
mock_session.connector.client_session._send_request = AsyncMock()
|
|
242
|
+
mock_session.connector.client_session.list_tools = AsyncMock(
|
|
243
|
+
return_value=Mock(tools=[])
|
|
244
|
+
)
|
|
245
|
+
|
|
246
|
+
mock_client.create_all_sessions = AsyncMock(return_value={"test_server": mock_session})
|
|
247
|
+
|
|
248
|
+
# Initialize client (which applies retry logic)
|
|
249
|
+
await client.initialize()
|
|
250
|
+
|
|
251
|
+
# Verify session was created and patched
|
|
252
|
+
assert len(client._sessions) == 1
|
|
253
|
+
assert "test_server" in client._sessions
|
|
254
|
+
|
|
255
|
+
@pytest.mark.asyncio
|
|
256
|
+
async def test_tool_call_with_retry(self):
|
|
257
|
+
"""Test that tool calls work with retry logic."""
|
|
258
|
+
config = {"test_server": {"url": "http://localhost:8080"}}
|
|
259
|
+
client = MCPUseHUDClient(config)
|
|
260
|
+
|
|
261
|
+
with patch("hud.clients.mcp_use.MCPUseClient") as MockMCPUseClient:
|
|
262
|
+
mock_client = Mock()
|
|
263
|
+
MockMCPUseClient.from_dict.return_value = mock_client
|
|
264
|
+
|
|
265
|
+
# Create mock session
|
|
266
|
+
mock_session = Mock()
|
|
267
|
+
mock_session.connector = Mock()
|
|
268
|
+
mock_session.connector.client_session = Mock()
|
|
269
|
+
|
|
270
|
+
# Mock tool listing
|
|
271
|
+
test_tool = types.Tool(
|
|
272
|
+
name="test_tool",
|
|
273
|
+
description="Test tool",
|
|
274
|
+
inputSchema={"type": "object"},
|
|
275
|
+
)
|
|
276
|
+
mock_session.connector.client_session.list_tools = AsyncMock(
|
|
277
|
+
return_value=Mock(tools=[test_tool])
|
|
278
|
+
)
|
|
279
|
+
|
|
280
|
+
# Mock tool call with simulated retry
|
|
281
|
+
call_count = 0
|
|
282
|
+
|
|
283
|
+
async def mock_call_tool(name, arguments):
|
|
284
|
+
nonlocal call_count
|
|
285
|
+
call_count += 1
|
|
286
|
+
|
|
287
|
+
# First call fails, second succeeds
|
|
288
|
+
if call_count == 1:
|
|
289
|
+
raise Exception("HTTP 503 Service Unavailable")
|
|
290
|
+
|
|
291
|
+
return Mock(
|
|
292
|
+
content=[types.TextContent(type="text", text="Success")],
|
|
293
|
+
isError=False,
|
|
294
|
+
structuredContent=None,
|
|
295
|
+
)
|
|
296
|
+
|
|
297
|
+
mock_session.connector.client_session.call_tool = mock_call_tool
|
|
298
|
+
mock_session.connector.client_session._send_request = AsyncMock()
|
|
299
|
+
|
|
300
|
+
mock_client.create_all_sessions = AsyncMock(return_value={"test_server": mock_session})
|
|
301
|
+
|
|
302
|
+
# Initialize and call tool
|
|
303
|
+
await client.initialize()
|
|
304
|
+
|
|
305
|
+
# Wrap call_tool with retry for this test
|
|
306
|
+
original_call = mock_session.connector.client_session.call_tool
|
|
307
|
+
mock_session.connector.client_session.call_tool = create_async_retry_wrapper(
|
|
308
|
+
original_call,
|
|
309
|
+
max_retries=2,
|
|
310
|
+
retry_status_codes=(503,),
|
|
311
|
+
retry_delay=0.01,
|
|
312
|
+
)
|
|
313
|
+
|
|
314
|
+
result = await client.call_tool(MCPToolCall(name="test_tool", arguments={}))
|
|
315
|
+
|
|
316
|
+
# Verify retry worked
|
|
317
|
+
assert call_count == 2 # Failed once, then succeeded
|
|
318
|
+
assert not result.isError
|
|
319
|
+
assert result.content[0].text == "Success"
|
|
320
|
+
|
|
321
|
+
@pytest.mark.asyncio
|
|
322
|
+
async def test_resource_read_with_retry(self):
|
|
323
|
+
"""Test that resource reading works with retry logic."""
|
|
324
|
+
config = {"test_server": {"url": "http://localhost:8080"}}
|
|
325
|
+
client = MCPUseHUDClient(config)
|
|
326
|
+
|
|
327
|
+
with patch("hud.clients.mcp_use.MCPUseClient") as MockMCPUseClient:
|
|
328
|
+
mock_client = Mock()
|
|
329
|
+
MockMCPUseClient.from_dict.return_value = mock_client
|
|
330
|
+
|
|
331
|
+
# Create mock session
|
|
332
|
+
mock_session = Mock()
|
|
333
|
+
mock_session.connector = Mock()
|
|
334
|
+
mock_session.connector.client_session = Mock()
|
|
335
|
+
mock_session.connector.client_session.list_tools = AsyncMock(
|
|
336
|
+
return_value=Mock(tools=[])
|
|
337
|
+
)
|
|
338
|
+
|
|
339
|
+
# Mock resource read with simulated retry
|
|
340
|
+
call_count = 0
|
|
341
|
+
|
|
342
|
+
async def mock_read_resource(uri):
|
|
343
|
+
nonlocal call_count
|
|
344
|
+
call_count += 1
|
|
345
|
+
|
|
346
|
+
# First call fails, second succeeds
|
|
347
|
+
if call_count == 1:
|
|
348
|
+
raise Exception("HTTP 502 Bad Gateway")
|
|
349
|
+
|
|
350
|
+
return Mock(contents=[Mock(text='{"status": "ok"}')])
|
|
351
|
+
|
|
352
|
+
mock_session.connector.client_session.read_resource = mock_read_resource
|
|
353
|
+
mock_session.connector.client_session._send_request = AsyncMock()
|
|
354
|
+
|
|
355
|
+
mock_client.create_all_sessions = AsyncMock(return_value={"test_server": mock_session})
|
|
356
|
+
|
|
357
|
+
# Initialize
|
|
358
|
+
await client.initialize()
|
|
359
|
+
|
|
360
|
+
# Wrap read_resource with retry for this test
|
|
361
|
+
original_read = mock_session.connector.client_session.read_resource
|
|
362
|
+
mock_session.connector.client_session.read_resource = create_async_retry_wrapper(
|
|
363
|
+
original_read,
|
|
364
|
+
max_retries=2,
|
|
365
|
+
retry_status_codes=(502,),
|
|
366
|
+
retry_delay=0.01,
|
|
367
|
+
)
|
|
368
|
+
|
|
369
|
+
result = await client.read_resource("test://resource")
|
|
370
|
+
|
|
371
|
+
# Verify retry worked
|
|
372
|
+
assert call_count == 2 # Failed once, then succeeded
|
|
373
|
+
assert result is not None
|
|
374
|
+
assert result.contents[0].text == '{"status": "ok"}'
|
|
375
|
+
|
|
376
|
+
|
|
377
|
+
if __name__ == "__main__":
|
|
378
|
+
pytest.main([__file__, "-v"])
|
|
@@ -0,0 +1,201 @@
|
|
|
1
|
+
"""Retry wrapper for MCP-use HTTP transport.
|
|
2
|
+
|
|
3
|
+
This module provides a transport-level retry mechanism for MCP-use,
|
|
4
|
+
similar to the approach used in FastMCP.
|
|
5
|
+
"""
|
|
6
|
+
|
|
7
|
+
from __future__ import annotations
|
|
8
|
+
|
|
9
|
+
import asyncio
|
|
10
|
+
import logging
|
|
11
|
+
from typing import TYPE_CHECKING, Any, TypeVar
|
|
12
|
+
|
|
13
|
+
if TYPE_CHECKING:
|
|
14
|
+
from collections.abc import Callable
|
|
15
|
+
|
|
16
|
+
import requests
|
|
17
|
+
from requests.adapters import HTTPAdapter
|
|
18
|
+
from urllib3.util.retry import Retry
|
|
19
|
+
|
|
20
|
+
logger = logging.getLogger(__name__)
|
|
21
|
+
|
|
22
|
+
T = TypeVar("T")
|
|
23
|
+
|
|
24
|
+
|
|
25
|
+
def create_retry_session(
|
|
26
|
+
max_retries: int = 3,
|
|
27
|
+
retry_status_codes: tuple[int, ...] = (502, 503, 504),
|
|
28
|
+
retry_delay: float = 1.0,
|
|
29
|
+
backoff_factor: float = 2.0,
|
|
30
|
+
) -> requests.Session:
|
|
31
|
+
"""
|
|
32
|
+
Create a requests session with retry logic.
|
|
33
|
+
|
|
34
|
+
Args:
|
|
35
|
+
max_retries: Maximum number of retry attempts
|
|
36
|
+
retry_status_codes: HTTP status codes to retry
|
|
37
|
+
retry_delay: Initial delay between retries in seconds
|
|
38
|
+
backoff_factor: Multiplier for exponential backoff
|
|
39
|
+
|
|
40
|
+
Returns:
|
|
41
|
+
Configured requests.Session with retry logic
|
|
42
|
+
"""
|
|
43
|
+
session = requests.Session()
|
|
44
|
+
|
|
45
|
+
# Configure retry strategy
|
|
46
|
+
retry = Retry(
|
|
47
|
+
total=max_retries,
|
|
48
|
+
backoff_factor=backoff_factor,
|
|
49
|
+
status_forcelist=list(retry_status_codes),
|
|
50
|
+
# Allow retries on all methods
|
|
51
|
+
allowed_methods=["HEAD", "GET", "OPTIONS", "POST", "PUT", "DELETE", "PATCH"],
|
|
52
|
+
# Respect Retry-After header if present
|
|
53
|
+
respect_retry_after_header=True,
|
|
54
|
+
)
|
|
55
|
+
|
|
56
|
+
# Create adapter with retry strategy
|
|
57
|
+
adapter = HTTPAdapter(max_retries=retry)
|
|
58
|
+
|
|
59
|
+
# Mount adapter for both HTTP and HTTPS
|
|
60
|
+
session.mount("http://", adapter)
|
|
61
|
+
session.mount("https://", adapter)
|
|
62
|
+
|
|
63
|
+
logger.debug(
|
|
64
|
+
"Created retry session with max_retries=%d, status_codes=%s, backoff_factor=%.1f",
|
|
65
|
+
max_retries,
|
|
66
|
+
retry_status_codes,
|
|
67
|
+
backoff_factor,
|
|
68
|
+
)
|
|
69
|
+
|
|
70
|
+
return session
|
|
71
|
+
|
|
72
|
+
|
|
73
|
+
def patch_mcp_session_http_client(session: Any) -> None:
|
|
74
|
+
"""
|
|
75
|
+
Patch an MCP-use session to use HTTP retry logic.
|
|
76
|
+
|
|
77
|
+
This function attempts to replace the HTTP client used by an MCP session
|
|
78
|
+
with one that has retry logic enabled.
|
|
79
|
+
|
|
80
|
+
Args:
|
|
81
|
+
session: MCP-use session to patch
|
|
82
|
+
"""
|
|
83
|
+
try:
|
|
84
|
+
# Check if session has a connector with an HTTP client
|
|
85
|
+
if hasattr(session, "connector"):
|
|
86
|
+
connector = session.connector
|
|
87
|
+
|
|
88
|
+
# For HTTP connectors, patch the underlying HTTP client
|
|
89
|
+
if hasattr(connector, "_connection_manager"):
|
|
90
|
+
manager = connector._connection_manager
|
|
91
|
+
|
|
92
|
+
# If it's using requests, replace the session
|
|
93
|
+
if hasattr(manager, "_session") or hasattr(manager, "session"):
|
|
94
|
+
retry_session = create_retry_session()
|
|
95
|
+
|
|
96
|
+
# Try different attribute names
|
|
97
|
+
if hasattr(manager, "_session"):
|
|
98
|
+
manager._session = retry_session
|
|
99
|
+
logger.debug("Patched connection manager's _session with retry logic")
|
|
100
|
+
elif hasattr(manager, "session"):
|
|
101
|
+
manager.session = retry_session
|
|
102
|
+
logger.debug("Patched connection manager's session with retry logic")
|
|
103
|
+
|
|
104
|
+
# Also check for client_session (async variant)
|
|
105
|
+
if hasattr(connector, "client_session") and connector.client_session:
|
|
106
|
+
client = connector.client_session
|
|
107
|
+
|
|
108
|
+
# Wrap the async HTTP methods with retry logic
|
|
109
|
+
if hasattr(client, "_send_request"):
|
|
110
|
+
original_send = client._send_request
|
|
111
|
+
client._send_request = create_async_retry_wrapper(original_send)
|
|
112
|
+
logger.debug("Wrapped client_session._send_request with retry logic")
|
|
113
|
+
|
|
114
|
+
except Exception as e:
|
|
115
|
+
logger.warning("Could not patch MCP session with retry logic: %s", e)
|
|
116
|
+
|
|
117
|
+
|
|
118
|
+
def create_async_retry_wrapper(
|
|
119
|
+
func: Callable[..., Any],
|
|
120
|
+
max_retries: int = 3,
|
|
121
|
+
retry_status_codes: tuple[int, ...] = (502, 503, 504),
|
|
122
|
+
retry_delay: float = 1.0,
|
|
123
|
+
backoff_factor: float = 2.0,
|
|
124
|
+
) -> Callable[..., Any]:
|
|
125
|
+
"""
|
|
126
|
+
Create an async wrapper that adds retry logic to a function.
|
|
127
|
+
|
|
128
|
+
Args:
|
|
129
|
+
func: The async function to wrap
|
|
130
|
+
max_retries: Maximum number of retry attempts
|
|
131
|
+
retry_status_codes: HTTP status codes to retry
|
|
132
|
+
retry_delay: Initial delay between retries
|
|
133
|
+
backoff_factor: Multiplier for exponential backoff
|
|
134
|
+
|
|
135
|
+
Returns:
|
|
136
|
+
Wrapped function with retry logic
|
|
137
|
+
"""
|
|
138
|
+
|
|
139
|
+
async def wrapper(*args: Any, **kwargs: Any) -> Any:
|
|
140
|
+
last_exception = None
|
|
141
|
+
delay = retry_delay
|
|
142
|
+
|
|
143
|
+
for attempt in range(max_retries + 1):
|
|
144
|
+
try:
|
|
145
|
+
result = await func(*args, **kwargs)
|
|
146
|
+
|
|
147
|
+
# Check if result has a status code that should trigger retry
|
|
148
|
+
if (
|
|
149
|
+
hasattr(result, "status_code")
|
|
150
|
+
and result.status_code in retry_status_codes
|
|
151
|
+
and attempt < max_retries
|
|
152
|
+
):
|
|
153
|
+
logger.warning(
|
|
154
|
+
"HTTP %d error (attempt %d/%d), retrying in %.1fs",
|
|
155
|
+
result.status_code,
|
|
156
|
+
attempt + 1,
|
|
157
|
+
max_retries + 1,
|
|
158
|
+
delay,
|
|
159
|
+
)
|
|
160
|
+
await asyncio.sleep(delay)
|
|
161
|
+
delay *= backoff_factor
|
|
162
|
+
continue
|
|
163
|
+
|
|
164
|
+
return result
|
|
165
|
+
|
|
166
|
+
except Exception as e:
|
|
167
|
+
# Check if it's an HTTP error that should be retried
|
|
168
|
+
error_str = str(e)
|
|
169
|
+
should_retry = any(str(code) in error_str for code in retry_status_codes)
|
|
170
|
+
|
|
171
|
+
if should_retry and attempt < max_retries:
|
|
172
|
+
logger.warning(
|
|
173
|
+
"Error '%s' (attempt %d/%d), retrying in %.1fs",
|
|
174
|
+
e,
|
|
175
|
+
attempt + 1,
|
|
176
|
+
max_retries + 1,
|
|
177
|
+
delay,
|
|
178
|
+
)
|
|
179
|
+
await asyncio.sleep(delay)
|
|
180
|
+
delay *= backoff_factor
|
|
181
|
+
last_exception = e
|
|
182
|
+
else:
|
|
183
|
+
raise
|
|
184
|
+
|
|
185
|
+
# If we exhausted retries, raise the last exception
|
|
186
|
+
if last_exception:
|
|
187
|
+
raise last_exception
|
|
188
|
+
|
|
189
|
+
return wrapper
|
|
190
|
+
|
|
191
|
+
|
|
192
|
+
def patch_all_sessions(sessions: dict[str, Any]) -> None:
|
|
193
|
+
"""
|
|
194
|
+
Apply retry logic to all MCP sessions.
|
|
195
|
+
|
|
196
|
+
Args:
|
|
197
|
+
sessions: Dictionary of session name to session object
|
|
198
|
+
"""
|
|
199
|
+
for name, session in sessions.items():
|
|
200
|
+
logger.debug("Patching session '%s' with retry logic", name)
|
|
201
|
+
patch_mcp_session_http_client(session)
|
|
@@ -4,6 +4,7 @@ from __future__ import annotations
|
|
|
4
4
|
|
|
5
5
|
import asyncio
|
|
6
6
|
import logging
|
|
7
|
+
import multiprocessing
|
|
7
8
|
import os
|
|
8
9
|
import traceback
|
|
9
10
|
from concurrent.futures import ProcessPoolExecutor, as_completed
|
|
@@ -114,58 +115,36 @@ def _process_worker(
|
|
|
114
115
|
task_name = task_dict.get("prompt") or f"Task {index}"
|
|
115
116
|
|
|
116
117
|
# Use the job_id to group all tasks under the same job
|
|
117
|
-
with hud.trace(
|
|
118
|
-
|
|
119
|
-
|
|
120
|
-
|
|
121
|
-
|
|
122
|
-
|
|
123
|
-
|
|
124
|
-
|
|
125
|
-
agent =
|
|
126
|
-
|
|
127
|
-
|
|
128
|
-
|
|
129
|
-
|
|
130
|
-
|
|
131
|
-
|
|
132
|
-
|
|
133
|
-
|
|
134
|
-
|
|
135
|
-
|
|
136
|
-
|
|
137
|
-
|
|
138
|
-
|
|
139
|
-
|
|
140
|
-
)
|
|
141
|
-
|
|
142
|
-
|
|
143
|
-
|
|
144
|
-
|
|
145
|
-
|
|
146
|
-
|
|
147
|
-
)
|
|
148
|
-
|
|
149
|
-
return (index, result)
|
|
150
|
-
except Exception as e:
|
|
151
|
-
# Even if there's an exception, ensure we have a proper result
|
|
152
|
-
logger.error(
|
|
153
|
-
"[Worker %s] Task %s failed during execution: %s",
|
|
154
|
-
worker_id,
|
|
155
|
-
index,
|
|
156
|
-
str(e)[:200],
|
|
157
|
-
)
|
|
158
|
-
# Create a proper Trace result for errors
|
|
159
|
-
from hud.types import Trace
|
|
160
|
-
|
|
161
|
-
error_result = Trace(
|
|
162
|
-
reward=0.0,
|
|
163
|
-
done=True,
|
|
164
|
-
content=f"Task execution failed: {e}",
|
|
165
|
-
isError=True,
|
|
166
|
-
info={"error": str(e), "traceback": traceback.format_exc()},
|
|
167
|
-
)
|
|
168
|
-
return (index, error_result)
|
|
118
|
+
with hud.trace(task_name, job_id=job_id, task_id=task_dict.get("id")):
|
|
119
|
+
# Convert dict to Task
|
|
120
|
+
task = Task(**task_dict)
|
|
121
|
+
|
|
122
|
+
# Create agent instance
|
|
123
|
+
agent = agent_class(**(agent_config or {}))
|
|
124
|
+
|
|
125
|
+
if auto_respond:
|
|
126
|
+
agent.response_agent = ResponseAgent()
|
|
127
|
+
|
|
128
|
+
# Run the task
|
|
129
|
+
result = await agent.run(task, max_steps=max_steps)
|
|
130
|
+
|
|
131
|
+
# Extract and print evaluation score for visibility
|
|
132
|
+
reward = getattr(result, "reward", "N/A")
|
|
133
|
+
logger.info(
|
|
134
|
+
"[Worker %s] Task %s: ✓ Completed (reward: %s)",
|
|
135
|
+
worker_id,
|
|
136
|
+
index,
|
|
137
|
+
reward,
|
|
138
|
+
)
|
|
139
|
+
|
|
140
|
+
logger.info(
|
|
141
|
+
"[Worker %s] Completed task %s (reward: %s)",
|
|
142
|
+
worker_id,
|
|
143
|
+
index,
|
|
144
|
+
reward,
|
|
145
|
+
)
|
|
146
|
+
|
|
147
|
+
return (index, result)
|
|
169
148
|
|
|
170
149
|
except Exception as e:
|
|
171
150
|
error_msg = f"Worker {worker_id}: Task {index} failed: {e}"
|
|
@@ -212,6 +191,19 @@ def _process_worker(
|
|
|
212
191
|
try:
|
|
213
192
|
# Run the async batch processing
|
|
214
193
|
results = loop.run_until_complete(process_batch())
|
|
194
|
+
|
|
195
|
+
# Ensure telemetry is fully sent before process exits
|
|
196
|
+
# Spans are buffered in BatchSpanProcessor and need explicit flush
|
|
197
|
+
|
|
198
|
+
from opentelemetry import trace as otel_trace
|
|
199
|
+
|
|
200
|
+
provider = otel_trace.get_tracer_provider()
|
|
201
|
+
if provider and hasattr(provider, "force_flush"):
|
|
202
|
+
# Flush of buffered spans
|
|
203
|
+
success = provider.force_flush(timeout_millis=2000) # type: ignore[arg-type]
|
|
204
|
+
if not success:
|
|
205
|
+
logger.warning("Worker %s: Telemetry flush timed out", worker_id)
|
|
206
|
+
|
|
215
207
|
return results
|
|
216
208
|
except KeyboardInterrupt:
|
|
217
209
|
logger.info("Worker %s: Interrupted by user, stopping gracefully", worker_id)
|
|
@@ -236,24 +228,17 @@ def _process_worker(
|
|
|
236
228
|
logger.error("Worker %s batch processing failed: %s", worker_id, e)
|
|
237
229
|
return [(idx, {"error": str(e), "isError": True}) for idx, _ in task_batch]
|
|
238
230
|
finally:
|
|
239
|
-
# CRITICAL: Always ensure telemetry is fully sent before process exits
|
|
240
|
-
# This must happen in finally block to ensure it runs even on errors
|
|
241
231
|
try:
|
|
242
232
|
from opentelemetry import trace as otel_trace
|
|
243
233
|
|
|
244
234
|
provider = otel_trace.get_tracer_provider()
|
|
245
235
|
if provider and hasattr(provider, "force_flush"):
|
|
246
|
-
#
|
|
247
|
-
|
|
248
|
-
success = provider.force_flush(
|
|
249
|
-
timeout_millis=10000
|
|
250
|
-
) # 10 second timeout # type: ignore
|
|
236
|
+
# Flush buffered spans with reasonable timeout
|
|
237
|
+
success = provider.force_flush(timeout_millis=2000) # type: ignore[arg-type]
|
|
251
238
|
if not success:
|
|
252
239
|
logger.warning("Worker %s: Telemetry flush timed out", worker_id)
|
|
253
|
-
|
|
254
|
-
|
|
255
|
-
except Exception as flush_error:
|
|
256
|
-
logger.error("Worker %s: Failed to flush telemetry: %s", worker_id, flush_error)
|
|
240
|
+
except Exception as e:
|
|
241
|
+
logger.warning("Worker %s: Failed to flush telemetry: %s", worker_id, e)
|
|
257
242
|
|
|
258
243
|
# Clean up the event loop
|
|
259
244
|
try:
|
|
@@ -367,6 +352,8 @@ async def run_dataset_parallel_manual(
|
|
|
367
352
|
for task_dict in task_dicts:
|
|
368
353
|
if "system_prompt" not in task_dict:
|
|
369
354
|
task_dict["system_prompt"] = custom_system_prompt
|
|
355
|
+
else:
|
|
356
|
+
task_dict["system_prompt"] += "\n" + custom_system_prompt
|
|
370
357
|
|
|
371
358
|
# Prepare job metadata
|
|
372
359
|
job_metadata = metadata or {}
|
|
@@ -391,6 +378,8 @@ async def run_dataset_parallel_manual(
|
|
|
391
378
|
except Exception:
|
|
392
379
|
logger.warning("Failed to extract dataset verification info")
|
|
393
380
|
|
|
381
|
+
# task_dicts = task_dicts[:10]
|
|
382
|
+
|
|
394
383
|
# Create job context
|
|
395
384
|
with hud.job(name, metadata=job_metadata, dataset_link=dataset_link) as job_obj:
|
|
396
385
|
# Prepare agent class info for pickling
|
|
@@ -435,7 +424,10 @@ async def run_dataset_parallel_manual(
|
|
|
435
424
|
)
|
|
436
425
|
|
|
437
426
|
# Process batches in parallel using ProcessPoolExecutor
|
|
438
|
-
executor = ProcessPoolExecutor(
|
|
427
|
+
executor = ProcessPoolExecutor(
|
|
428
|
+
max_workers=max_workers,
|
|
429
|
+
mp_context=multiprocessing.get_context("spawn"),
|
|
430
|
+
)
|
|
439
431
|
try:
|
|
440
432
|
# Submit all batches to workers
|
|
441
433
|
future_to_batch = {
|
hud/otel/config.py
CHANGED
|
@@ -97,7 +97,16 @@ def configure_telemetry(
|
|
|
97
97
|
exporter = HudSpanExporter(
|
|
98
98
|
telemetry_url=settings.hud_telemetry_url, api_key=settings.api_key
|
|
99
99
|
)
|
|
100
|
-
|
|
100
|
+
# Export more continuously to avoid big end flushes
|
|
101
|
+
provider.add_span_processor(
|
|
102
|
+
BatchSpanProcessor(
|
|
103
|
+
exporter,
|
|
104
|
+
schedule_delay_millis=1000,
|
|
105
|
+
max_queue_size=8192,
|
|
106
|
+
max_export_batch_size=256,
|
|
107
|
+
export_timeout_millis=30000,
|
|
108
|
+
)
|
|
109
|
+
)
|
|
101
110
|
elif settings.telemetry_enabled and not settings.api_key and not enable_otlp:
|
|
102
111
|
# Error if no exporters are configured
|
|
103
112
|
raise ValueError(
|
|
@@ -127,7 +136,15 @@ def configure_telemetry(
|
|
|
127
136
|
otlp_config["headers"] = otlp_headers
|
|
128
137
|
|
|
129
138
|
otlp_exporter = OTLPSpanExporter(**otlp_config)
|
|
130
|
-
provider.add_span_processor(
|
|
139
|
+
provider.add_span_processor(
|
|
140
|
+
BatchSpanProcessor(
|
|
141
|
+
otlp_exporter,
|
|
142
|
+
schedule_delay_millis=1000,
|
|
143
|
+
max_queue_size=8192,
|
|
144
|
+
max_export_batch_size=256,
|
|
145
|
+
export_timeout_millis=30000,
|
|
146
|
+
)
|
|
147
|
+
)
|
|
131
148
|
logger.info("OTLP HTTP exporter enabled - endpoint: %s", otlp_config["endpoint"])
|
|
132
149
|
except ImportError:
|
|
133
150
|
logger.warning(
|
hud/utils/tests/test_version.py
CHANGED
hud/version.py
CHANGED
|
@@ -2,16 +2,16 @@ hud/__init__.py,sha256=KU7G-_Mj6Mjf7trXA6X0ufN6QUmqhVi19NKbnNIvD74,532
|
|
|
2
2
|
hud/__main__.py,sha256=YR8Dq8OhINOsVfQ55PmRXXg4fEK84Rt_-rMtJ5rvhWo,145
|
|
3
3
|
hud/settings.py,sha256=bgq_zstNGlan7UDZOiElY8Aw5ZU4xL7Ds5HP_Xzph1A,2535
|
|
4
4
|
hud/types.py,sha256=jta4Hgj_rBdIMcf7mz0tsdgfA1pssp327bxcT6Mfp00,6107
|
|
5
|
-
hud/version.py,sha256=
|
|
5
|
+
hud/version.py,sha256=EbQzTU9GpvP2361rMenfZ5YkA5new-xa4K7myfCFFh4,105
|
|
6
6
|
hud/agents/__init__.py,sha256=UoIkljWdbq4bM0LD-mSaw6w826EqdEjOk7r6glNYwYQ,286
|
|
7
|
-
hud/agents/base.py,sha256=
|
|
7
|
+
hud/agents/base.py,sha256=rlHnzRxz1jER97pYKkpg3rDBkbt94C3lLgwuxopFQ8k,33711
|
|
8
8
|
hud/agents/claude.py,sha256=yTVbSNMwNjF9qZAnwDSsgrscaenJpMGnGdW_4rFW_pY,15603
|
|
9
9
|
hud/agents/grounded_openai.py,sha256=U-FHjB2Nh1_o0gmlxY5F17lWJ3oHsNRIB2a7z-IKB64,11231
|
|
10
10
|
hud/agents/langchain.py,sha256=1EgCy8jfjunsWxlPC5XfvfLS6_XZVrIF1ZjtHcrvhYw,9584
|
|
11
11
|
hud/agents/openai.py,sha256=ovARRWNuHqKkZ2Q_OCYSVCIZckrh8XY2jUB2p2x1m88,14259
|
|
12
12
|
hud/agents/openai_chat_generic.py,sha256=BDtMQnf9ddYN4fKy-CN9IotLyYvHGdZqjju5PODEnU4,10683
|
|
13
13
|
hud/agents/misc/__init__.py,sha256=BYi4Ytp9b_vycpZFXnr5Oyw6ncKLNNGml8Jrb7bWUb4,136
|
|
14
|
-
hud/agents/misc/response_agent.py,sha256=
|
|
14
|
+
hud/agents/misc/response_agent.py,sha256=Kx7RRF2KE3i_t82bDkcyp49z7cmqtRoY48Vv0UCDFXs,3228
|
|
15
15
|
hud/agents/tests/__init__.py,sha256=W-O-_4i34d9TTyEHV-O_q1Ai1gLhzwDaaPo02_TWQIY,34
|
|
16
16
|
hud/agents/tests/test_base.py,sha256=F39ajSqASGUbPyPoWSY9KARFav62qNTK74W11Tr1Tg4,28970
|
|
17
17
|
hud/agents/tests/test_claude.py,sha256=wqEKlzEvx8obz1sSm4NY0j-Zyt1qWNfDOmRqYIuAEd0,13069
|
|
@@ -70,19 +70,21 @@ hud/clients/README.md,sha256=XNE3mch95ozDgVqfwCGcrhlHY9CwT1GKfNANNboowto,3826
|
|
|
70
70
|
hud/clients/__init__.py,sha256=N5M_gZv4nP7dLRwpAiaqqaxyaLieGW6397FszeG7JGw,364
|
|
71
71
|
hud/clients/base.py,sha256=vt9mRMZwZg2DtVxQmccR_VZZGamXhx3dvlFJPulbOd8,14131
|
|
72
72
|
hud/clients/fastmcp.py,sha256=KJGi8bmds0Q6rHnkTXb_Hw9ZqWmSo0OfjW05SSuyEJU,9182
|
|
73
|
-
hud/clients/mcp_use.py,sha256=
|
|
73
|
+
hud/clients/mcp_use.py,sha256=TsIIItqmt93hcdW1hj2HlMPJppVo4JDoUuCqQ-ZBoBI,13521
|
|
74
74
|
hud/clients/tests/__init__.py,sha256=sKOtJFFa4mDIXh1U6O8ZUHjigE8CiRMQ2PzJTIBZuVE,33
|
|
75
75
|
hud/clients/tests/test_client_integration.py,sha256=kohU6jfCNfwSnAushHeB1_CmDlRfQc7VBL0GEdJYSeI,4198
|
|
76
76
|
hud/clients/tests/test_fastmcp.py,sha256=4q3TzDjuieTZa89taiNJIrzbUncNkYOG4MaubypA21k,13030
|
|
77
|
+
hud/clients/tests/test_mcp_use_retry.py,sha256=v9vwKa9nrW0EYiWzkq_vpyju40NaOm3SBiJh8VJYX2E,13103
|
|
77
78
|
hud/clients/tests/test_protocol.py,sha256=aK4CS4g3j1D5jPo83ykzZuHUvcZFAulYtIq9T9Hb_fQ,6640
|
|
78
79
|
hud/clients/utils/__init__.py,sha256=-zZjcKIWGj2tXbVDOW45UgoGghhLJzFQVZ6miKenuA4,595
|
|
80
|
+
hud/clients/utils/mcp_use_retry.py,sha256=sBCjtgnAXiXASjzFF_AtBEtmizay0Fi0nPL6sVoooeI,6675
|
|
79
81
|
hud/clients/utils/retry.py,sha256=mMs2T_mAlb8AYhSqMR4AmCw7838gqCC4mdG3zjMAYM4,5744
|
|
80
82
|
hud/clients/utils/retry_transport.py,sha256=Rsq25eiKKt_pM1bas78QEZvO0illK97X_3opmaS3A3w,6809
|
|
81
83
|
hud/datasets/__init__.py,sha256=74T4mrjELKtE04XkZKwU8QAJcg2wjqXLqRO9s4GlPr4,678
|
|
82
84
|
hud/datasets/task.py,sha256=bDVLy4EOBfjiU4i8hrFqcQ3dc077vzVNRXIbyNXFnp8,3916
|
|
83
85
|
hud/datasets/utils.py,sha256=3hKvZTkZuCRkTeITB86nNdA1dtHZAqFfAdSPMtcTUhs,4275
|
|
84
86
|
hud/datasets/execution/__init__.py,sha256=4m1AEpMQaUSJFVN_iAXvY6zFttVgZKwE6oQtC0Rrk7U,330
|
|
85
|
-
hud/datasets/execution/parallel.py,sha256=
|
|
87
|
+
hud/datasets/execution/parallel.py,sha256=ZEMMmH018QXOL1QoD_AyspQkGs_41F-GB_mTgdXh6J4,25780
|
|
86
88
|
hud/datasets/execution/runner.py,sha256=EEvb90vvAqFXXx8NyVKLfK5p-gtsfJqiFJAoqSjyfXg,4695
|
|
87
89
|
hud/misc/__init__.py,sha256=m_pprQQ-G-Y0Sd0NEiR8MtAMbElnuFZ2OWT8TXrw7c4,43
|
|
88
90
|
hud/misc/claude_plays_pokemon.py,sha256=IthAkjDVr2Q-GNvX-QLJyMzN7-0pHqqJbagGNv2m7yo,10453
|
|
@@ -93,7 +95,7 @@ hud/native/tests/test_comparator.py,sha256=x1gFLXEDRIiJhH8tg5Rd3ptY-modYaHgSm6-h
|
|
|
93
95
|
hud/native/tests/test_native_init.py,sha256=Is8fcDZimp1Oi2Bv4zavqM3KrpS86_DUXFnqc0AsCH0,2736
|
|
94
96
|
hud/otel/__init__.py,sha256=ii17ayoWiS5vAhA7UAmZ8TkmP52gs2pWyHsD46-uYbE,1003
|
|
95
97
|
hud/otel/collector.py,sha256=jLZymZ8r7xt2VDuWexfbnT7PY1-0aiyLMgjBy8KDY1M,4497
|
|
96
|
-
hud/otel/config.py,sha256=
|
|
98
|
+
hud/otel/config.py,sha256=BPEyINdlxjsTj-IOO7F_JXAqrbMEWNMuhi2GDGqB5Kk,6785
|
|
97
99
|
hud/otel/context.py,sha256=C9MvO99cRSNNDEDC7ehO3eoTPnb6J7AemUYvEp57yEU,17774
|
|
98
100
|
hud/otel/exporters.py,sha256=RLAjWa8b2DJEU21740Idq4fmeIuabLEqGGUspcFDcH4,14331
|
|
99
101
|
hud/otel/instrumentation.py,sha256=bBWxQ5vkiP-2WZ_2ztR5LAw0Wu02SZNKUgs5f-rU-ro,3734
|
|
@@ -175,10 +177,10 @@ hud/utils/tests/test_init.py,sha256=2QLQSGgyP9wJhOvPCusm_zjJad0qApOZi1BXpxcdHXQ,
|
|
|
175
177
|
hud/utils/tests/test_mcp.py,sha256=0pUa16mL-bqbZDXp5NHBnt1gO5o10BOg7zTMHZ1DNPM,4023
|
|
176
178
|
hud/utils/tests/test_progress.py,sha256=QSF7Kpi03Ff_l3mAeqW9qs1nhK50j9vBiSobZq7T4f4,7394
|
|
177
179
|
hud/utils/tests/test_telemetry.py,sha256=5jl7bEx8C8b-FfFUko5pf4UY-mPOR-9HaeL98dGtVHM,2781
|
|
178
|
-
hud/utils/tests/test_version.py,sha256=
|
|
180
|
+
hud/utils/tests/test_version.py,sha256=GYpR9LSDJ5o46gRfZeSnywz0JY_LafuZBJObj7oU0y8,160
|
|
179
181
|
hud/py.typed,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
180
|
-
hud_python-0.4.
|
|
181
|
-
hud_python-0.4.
|
|
182
|
-
hud_python-0.4.
|
|
183
|
-
hud_python-0.4.
|
|
184
|
-
hud_python-0.4.
|
|
182
|
+
hud_python-0.4.26.dist-info/METADATA,sha256=dYgepch672ATiMJJXpFsqse1nuw9Zp-T9gWVyvKLBHM,20239
|
|
183
|
+
hud_python-0.4.26.dist-info/WHEEL,sha256=qtCwoSJWgHk21S1Kb4ihdzI2rlJ1ZKaIurTj_ngOhyQ,87
|
|
184
|
+
hud_python-0.4.26.dist-info/entry_points.txt,sha256=jJbodNFg1m0-CDofe5AHvB4zKBq7sSdP97-ohaQ3ae4,63
|
|
185
|
+
hud_python-0.4.26.dist-info/licenses/LICENSE,sha256=yIzBheVUf86FC1bztAcr7RYWWNxyd3B-UJQ3uddg1HA,1078
|
|
186
|
+
hud_python-0.4.26.dist-info/RECORD,,
|
|
File without changes
|
|
File without changes
|
|
File without changes
|