hud-python 0.4.27__py3-none-any.whl → 0.4.29__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of hud-python might be problematic. Click here for more details.
- hud/__init__.py +2 -1
- hud/agents/base.py +73 -45
- hud/agents/claude.py +8 -4
- hud/agents/openai_chat_generic.py +65 -40
- hud/agents/tests/test_base.py +0 -4
- hud/agents/tests/test_openai.py +1 -1
- hud/cli/__init__.py +182 -52
- hud/cli/dev.py +8 -9
- hud/cli/eval.py +317 -119
- hud/cli/flows/__init__.py +0 -0
- hud/cli/flows/tasks.py +0 -0
- hud/cli/get.py +160 -0
- hud/cli/rl/__init__.py +563 -71
- hud/cli/rl/config.py +94 -0
- hud/cli/rl/display.py +133 -0
- hud/cli/rl/gpu.py +63 -0
- hud/cli/rl/gpu_utils.py +318 -0
- hud/cli/rl/presets.py +96 -0
- hud/cli/rl/remote_runner.py +348 -0
- hud/cli/rl/rl_api.py +150 -0
- hud/cli/rl/vllm.py +177 -0
- hud/cli/tests/test_analyze_metadata.py +0 -1
- hud/cli/utils/tasks.py +26 -0
- hud/clients/base.py +21 -23
- hud/clients/mcp_use.py +36 -44
- hud/clients/tests/test_mcp_use_retry.py +10 -10
- hud/datasets/__init__.py +4 -3
- hud/datasets/{execution/parallel.py → parallel.py} +1 -1
- hud/datasets/{execution/runner.py → runner.py} +1 -1
- hud/datasets/utils.py +1 -1
- hud/native/tests/test_native_init.py +1 -1
- hud/otel/config.py +1 -1
- hud/otel/instrumentation.py +35 -0
- hud/rl/README.md +31 -0
- hud/rl/__init__.py +1 -0
- hud/rl/actor.py +174 -0
- hud/rl/buffer.py +371 -0
- hud/rl/chat_template.jinja +101 -0
- hud/rl/config.py +184 -0
- hud/rl/distributed.py +95 -0
- hud/rl/learner.py +586 -0
- hud/rl/tests/__init__.py +1 -0
- hud/rl/tests/test_learner.py +171 -0
- hud/rl/train.py +354 -0
- hud/rl/types.py +101 -0
- hud/rl/utils/start_vllm_server.sh +30 -0
- hud/rl/utils.py +524 -0
- hud/rl/vllm_adapter.py +125 -0
- hud/settings.py +6 -0
- hud/telemetry/__init__.py +2 -1
- hud/telemetry/job.py +46 -3
- hud/telemetry/tests/test_trace.py +3 -3
- hud/telemetry/trace.py +85 -13
- hud/tools/computer/hud.py +4 -4
- hud/tools/tests/test_computer.py +3 -3
- hud/tools/tests/test_computer_actions.py +1 -1
- hud/types.py +123 -2
- hud/utils/group_eval.py +223 -0
- hud/utils/hud_console.py +113 -13
- hud/utils/tasks.py +119 -0
- hud/utils/tests/test_version.py +1 -1
- hud/version.py +1 -1
- {hud_python-0.4.27.dist-info → hud_python-0.4.29.dist-info}/METADATA +20 -2
- {hud_python-0.4.27.dist-info → hud_python-0.4.29.dist-info}/RECORD +67 -47
- hud/cli/hf.py +0 -406
- hud/cli/rl/README.md +0 -243
- hud/cli/rl/init.py +0 -370
- hud/cli/rl/pod.py +0 -501
- hud/cli/rl/ssh.py +0 -322
- hud/cli/rl/train.py +0 -562
- hud/cli/rl/utils.py +0 -165
- hud/datasets/execution/__init__.py +0 -13
- hud/datasets/task.py +0 -116
- {hud_python-0.4.27.dist-info → hud_python-0.4.29.dist-info}/WHEEL +0 -0
- {hud_python-0.4.27.dist-info → hud_python-0.4.29.dist-info}/entry_points.txt +0 -0
- {hud_python-0.4.27.dist-info → hud_python-0.4.29.dist-info}/licenses/LICENSE +0 -0
hud/cli/rl/vllm.py
ADDED
|
@@ -0,0 +1,177 @@
|
|
|
1
|
+
"""vLLM server management utilities."""
|
|
2
|
+
|
|
3
|
+
from __future__ import annotations
|
|
4
|
+
|
|
5
|
+
import asyncio
|
|
6
|
+
import logging
|
|
7
|
+
import os
|
|
8
|
+
import subprocess
|
|
9
|
+
import time
|
|
10
|
+
from pathlib import Path
|
|
11
|
+
|
|
12
|
+
import httpx
|
|
13
|
+
from rich.console import Console
|
|
14
|
+
|
|
15
|
+
from hud.utils.hud_console import HUDConsole
|
|
16
|
+
|
|
17
|
+
logger = logging.getLogger(__name__)
|
|
18
|
+
hud_console = HUDConsole(logger)
|
|
19
|
+
|
|
20
|
+
console = Console()
|
|
21
|
+
|
|
22
|
+
|
|
23
|
+
def get_vllm_args(model_name: str, chat_template_path: Path | None = None) -> list[str]:
|
|
24
|
+
"""Get common vLLM server arguments for both local and remote deployments."""
|
|
25
|
+
args = [
|
|
26
|
+
"serve",
|
|
27
|
+
model_name,
|
|
28
|
+
"--api-key",
|
|
29
|
+
"token-abc123",
|
|
30
|
+
"--host",
|
|
31
|
+
"0.0.0.0", # noqa: S104
|
|
32
|
+
"--port",
|
|
33
|
+
"8000",
|
|
34
|
+
"--tensor-parallel-size",
|
|
35
|
+
"1",
|
|
36
|
+
"--trust-remote-code",
|
|
37
|
+
"--max-model-len",
|
|
38
|
+
"16384",
|
|
39
|
+
"--enable-lora",
|
|
40
|
+
"--max-lora-rank",
|
|
41
|
+
"64",
|
|
42
|
+
"--max-cpu-loras",
|
|
43
|
+
"4",
|
|
44
|
+
"--enable-auto-tool-choice",
|
|
45
|
+
"--tool-call-parser",
|
|
46
|
+
"hermes",
|
|
47
|
+
"--disable-log-requests",
|
|
48
|
+
"--dtype",
|
|
49
|
+
"auto",
|
|
50
|
+
]
|
|
51
|
+
|
|
52
|
+
# Add chat template if provided
|
|
53
|
+
if chat_template_path and chat_template_path.exists():
|
|
54
|
+
args.extend(["--chat-template", str(chat_template_path.absolute())])
|
|
55
|
+
|
|
56
|
+
return args
|
|
57
|
+
|
|
58
|
+
|
|
59
|
+
def check_vllm_server() -> bool:
|
|
60
|
+
"""Check if vLLM server is running."""
|
|
61
|
+
try:
|
|
62
|
+
response = httpx.get("http://localhost:8000/health", timeout=2.0)
|
|
63
|
+
return response.status_code == 200
|
|
64
|
+
except Exception:
|
|
65
|
+
return False
|
|
66
|
+
|
|
67
|
+
|
|
68
|
+
def kill_vllm_server() -> None:
|
|
69
|
+
"""Kill any running vLLM server processes."""
|
|
70
|
+
try:
|
|
71
|
+
# Check for PID file first
|
|
72
|
+
pid_file = Path("/tmp/vllm_server.pid") # noqa: S108
|
|
73
|
+
if pid_file.exists():
|
|
74
|
+
try:
|
|
75
|
+
pid = int(pid_file.read_text().strip())
|
|
76
|
+
subprocess.run(["kill", "-TERM", str(pid)], check=False) # noqa: S603, S607
|
|
77
|
+
time.sleep(2)
|
|
78
|
+
# Force kill if still running
|
|
79
|
+
subprocess.run(["kill", "-9", str(pid)], check=False) # noqa: S603, S607
|
|
80
|
+
pid_file.unlink()
|
|
81
|
+
except Exception as e:
|
|
82
|
+
hud_console.error(f"Failed to kill vLLM server: {e}")
|
|
83
|
+
|
|
84
|
+
# Also try to kill by process name
|
|
85
|
+
subprocess.run(["pkill", "-f", "vllm serve"], check=False) # noqa: S607
|
|
86
|
+
subprocess.run(["pkill", "-f", "vllm.entrypoints.openai.api_server"], check=False) # noqa: S607
|
|
87
|
+
time.sleep(2)
|
|
88
|
+
|
|
89
|
+
# Check for any process using port 8000
|
|
90
|
+
result = subprocess.run(["lsof", "-ti:8000"], capture_output=True, text=True, check=False) # noqa: S607
|
|
91
|
+
|
|
92
|
+
if result.stdout.strip():
|
|
93
|
+
for pid in result.stdout.strip().split("\n"):
|
|
94
|
+
try:
|
|
95
|
+
subprocess.run(["kill", "-9", pid], check=False) # noqa: S603, S607
|
|
96
|
+
except Exception as e:
|
|
97
|
+
hud_console.error(f"Failed to kill vLLM server: {e}")
|
|
98
|
+
|
|
99
|
+
console.print("[yellow]Killed existing vLLM server processes[/yellow]")
|
|
100
|
+
except Exception as e:
|
|
101
|
+
hud_console.error(f"Error killing vLLM server: {e}")
|
|
102
|
+
|
|
103
|
+
|
|
104
|
+
def start_vllm_server(model_name: str, gpu_index: int = 1, restart: bool = False) -> None:
|
|
105
|
+
"""Start vLLM server in the background with dynamic GPU selection."""
|
|
106
|
+
if restart:
|
|
107
|
+
kill_vllm_server()
|
|
108
|
+
time.sleep(3)
|
|
109
|
+
|
|
110
|
+
# Check if already running
|
|
111
|
+
if check_vllm_server():
|
|
112
|
+
console.print("[green]vLLM server is already running[/green]")
|
|
113
|
+
return
|
|
114
|
+
|
|
115
|
+
console.print(f"[cyan]Starting vLLM server with {model_name} on GPU {gpu_index}...[/cyan]")
|
|
116
|
+
|
|
117
|
+
# Set up environment variables
|
|
118
|
+
env = os.environ.copy()
|
|
119
|
+
env.update(
|
|
120
|
+
{
|
|
121
|
+
"CUDA_VISIBLE_DEVICES": str(gpu_index),
|
|
122
|
+
"VLLM_ALLOW_RUNTIME_LORA_UPDATING": "True",
|
|
123
|
+
"TOKENIZERS_PARALLELISM": "false",
|
|
124
|
+
"VLLM_LOGGING_LEVEL": "INFO", # Changed from DEBUG to reduce noise
|
|
125
|
+
"CUDA_LAUNCH_BLOCKING": "1", # Better error messages
|
|
126
|
+
}
|
|
127
|
+
)
|
|
128
|
+
|
|
129
|
+
# Get the path to chat template
|
|
130
|
+
chat_template_path = Path(__file__).parent.parent.parent / "rl" / "chat_template.jinja"
|
|
131
|
+
|
|
132
|
+
# Build the vLLM command
|
|
133
|
+
vllm_args = get_vllm_args(model_name, chat_template_path)
|
|
134
|
+
cmd = ["uv", "run", "vllm", *vllm_args]
|
|
135
|
+
|
|
136
|
+
# Start the server in the background
|
|
137
|
+
with open("/tmp/vllm_server.log", "w") as log_file: # noqa: S108,
|
|
138
|
+
process = subprocess.Popen( # noqa: S603
|
|
139
|
+
cmd,
|
|
140
|
+
env=env,
|
|
141
|
+
stdout=log_file,
|
|
142
|
+
stderr=subprocess.STDOUT,
|
|
143
|
+
preexec_fn=os.setpgrp, # type: ignore
|
|
144
|
+
cwd=Path.cwd(), # Use current working directory
|
|
145
|
+
)
|
|
146
|
+
|
|
147
|
+
console.print("[yellow]vLLM server starting in background...[/yellow]")
|
|
148
|
+
console.print(f"[yellow]Process ID: {process.pid}[/yellow]")
|
|
149
|
+
console.print("[yellow]Check logs at: /tmp/vllm_server.log[/yellow]")
|
|
150
|
+
|
|
151
|
+
# Save PID for later management
|
|
152
|
+
pid_file = Path("/tmp/vllm_server.pid") # noqa: S108
|
|
153
|
+
pid_file.write_text(str(process.pid))
|
|
154
|
+
|
|
155
|
+
|
|
156
|
+
async def wait_for_vllm_server(timeout: int = 360) -> bool: # noqa: ASYNC109
|
|
157
|
+
"""Wait for vLLM server to be ready."""
|
|
158
|
+
start_time = time.time()
|
|
159
|
+
console.print("[yellow]Waiting for vLLM server to be ready (up to 6 minutes)...[/yellow]")
|
|
160
|
+
|
|
161
|
+
async with httpx.AsyncClient() as client:
|
|
162
|
+
while time.time() - start_time < timeout:
|
|
163
|
+
try:
|
|
164
|
+
response = await client.get("http://localhost:8000/health", timeout=2.0)
|
|
165
|
+
if response.status_code == 200:
|
|
166
|
+
console.print("[green]✅ vLLM server is ready![/green]")
|
|
167
|
+
return True
|
|
168
|
+
except Exception as e:
|
|
169
|
+
hud_console.error(f"Failed to connect to vLLM server: {e}")
|
|
170
|
+
|
|
171
|
+
await asyncio.sleep(2)
|
|
172
|
+
elapsed = int(time.time() - start_time)
|
|
173
|
+
console.print(f"[yellow]Waiting... ({elapsed}s / {timeout}s)[/yellow]", end="\r")
|
|
174
|
+
|
|
175
|
+
console.print("\n[red]❌ vLLM server failed to start within timeout[/red]")
|
|
176
|
+
console.print("[yellow]Check /tmp/vllm_server.log for details[/yellow]")
|
|
177
|
+
return False
|
|
@@ -214,7 +214,6 @@ class TestAnalyzeFromMetadata:
|
|
|
214
214
|
|
|
215
215
|
@mock.patch("hud.cli.utils.metadata.check_local_cache")
|
|
216
216
|
@mock.patch("hud.cli.utils.metadata.fetch_lock_from_registry")
|
|
217
|
-
@mock.patch("hud.cli.utils.metadata.design")
|
|
218
217
|
@mock.patch("hud.cli.utils.metadata.console")
|
|
219
218
|
async def test_analyze_not_found(self, mock_console, mock_hud_console, mock_fetch, mock_check):
|
|
220
219
|
"""Test when environment not found anywhere."""
|
hud/cli/utils/tasks.py
ADDED
|
@@ -0,0 +1,26 @@
|
|
|
1
|
+
from __future__ import annotations
|
|
2
|
+
|
|
3
|
+
from pathlib import Path
|
|
4
|
+
|
|
5
|
+
from hud.utils.hud_console import hud_console
|
|
6
|
+
|
|
7
|
+
|
|
8
|
+
def find_tasks_file(tasks_file: str | None, msg: str = "Select a tasks file") -> str:
|
|
9
|
+
"""Find tasks file."""
|
|
10
|
+
if tasks_file:
|
|
11
|
+
return tasks_file
|
|
12
|
+
|
|
13
|
+
# Get current directory and find all .json and .jsonl files
|
|
14
|
+
current_dir = Path.cwd()
|
|
15
|
+
all_files = list(current_dir.glob("*.json")) + list(current_dir.glob("*.jsonl"))
|
|
16
|
+
all_files = [
|
|
17
|
+
str(file).replace(str(current_dir), "").lstrip("/").lstrip("\\") for file in all_files
|
|
18
|
+
]
|
|
19
|
+
all_files = [file for file in all_files if file[0] != "."] # Remove all config files
|
|
20
|
+
|
|
21
|
+
if len(all_files) == 1:
|
|
22
|
+
return str(all_files[0])
|
|
23
|
+
|
|
24
|
+
else:
|
|
25
|
+
# Prompt user to select a file
|
|
26
|
+
return hud_console.select(msg, choices=all_files)
|
hud/clients/base.py
CHANGED
|
@@ -11,18 +11,16 @@ from mcp.types import Implementation
|
|
|
11
11
|
|
|
12
12
|
from hud.shared.exceptions import HudAuthenticationError, HudException
|
|
13
13
|
from hud.types import MCPToolCall, MCPToolResult
|
|
14
|
+
from hud.utils.hud_console import HUDConsole
|
|
14
15
|
from hud.utils.mcp import setup_hud_telemetry
|
|
15
16
|
from hud.version import __version__ as hud_version
|
|
16
17
|
|
|
17
18
|
if TYPE_CHECKING:
|
|
18
19
|
import mcp.types as types
|
|
19
|
-
|
|
20
|
-
else:
|
|
21
|
-
pass
|
|
22
|
-
|
|
23
|
-
|
|
24
20
|
logger = logging.getLogger(__name__)
|
|
25
21
|
|
|
22
|
+
hud_console = HUDConsole(logger=logger)
|
|
23
|
+
|
|
26
24
|
|
|
27
25
|
@runtime_checkable
|
|
28
26
|
class AgentMCPClient(Protocol):
|
|
@@ -113,7 +111,7 @@ class BaseHUDClient(AgentMCPClient):
|
|
|
113
111
|
async def initialize(self, mcp_config: dict[str, dict[str, Any]] | None = None) -> None:
|
|
114
112
|
"""Initialize connection and fetch tools."""
|
|
115
113
|
if self._initialized:
|
|
116
|
-
|
|
114
|
+
hud_console.warning(
|
|
117
115
|
"Client already connected, if you want to reconnect or change the configuration, "
|
|
118
116
|
"call shutdown() first. This is especially important if you are using an agent."
|
|
119
117
|
)
|
|
@@ -130,7 +128,7 @@ class BaseHUDClient(AgentMCPClient):
|
|
|
130
128
|
|
|
131
129
|
self._auto_trace_cm = setup_hud_telemetry(self._mcp_config, auto_trace=self._auto_trace)
|
|
132
130
|
|
|
133
|
-
|
|
131
|
+
hud_console.debug("Initializing MCP client...")
|
|
134
132
|
|
|
135
133
|
try:
|
|
136
134
|
# Check if API key is set for HUD API
|
|
@@ -155,7 +153,6 @@ class BaseHUDClient(AgentMCPClient):
|
|
|
155
153
|
await self._fetch_telemetry()
|
|
156
154
|
|
|
157
155
|
self._initialized = True
|
|
158
|
-
logger.info("Client initialized")
|
|
159
156
|
|
|
160
157
|
async def shutdown(self) -> None:
|
|
161
158
|
"""Disconnect from the MCP server."""
|
|
@@ -163,9 +160,9 @@ class BaseHUDClient(AgentMCPClient):
|
|
|
163
160
|
if self._auto_trace_cm:
|
|
164
161
|
try:
|
|
165
162
|
self._auto_trace_cm.__exit__(None, None, None)
|
|
166
|
-
|
|
163
|
+
hud_console.info("Closed auto-created trace")
|
|
167
164
|
except Exception as e:
|
|
168
|
-
|
|
165
|
+
hud_console.warning(f"Failed to close auto-created trace: {e}")
|
|
169
166
|
finally:
|
|
170
167
|
self._auto_trace_cm = None
|
|
171
168
|
|
|
@@ -173,9 +170,9 @@ class BaseHUDClient(AgentMCPClient):
|
|
|
173
170
|
if self._initialized:
|
|
174
171
|
await self._disconnect()
|
|
175
172
|
self._initialized = False
|
|
176
|
-
|
|
173
|
+
hud_console.info("Shutdown completed")
|
|
177
174
|
else:
|
|
178
|
-
|
|
175
|
+
hud_console.debug("Client was not initialized, skipping disconnect")
|
|
179
176
|
|
|
180
177
|
@overload
|
|
181
178
|
async def call_tool(self, tool_call: MCPToolCall, /) -> MCPToolResult: ...
|
|
@@ -280,27 +277,28 @@ class BaseHUDClient(AgentMCPClient):
|
|
|
280
277
|
telemetry_data = json.loads(result.contents[0].text) # type: ignore
|
|
281
278
|
self._telemetry_data = telemetry_data
|
|
282
279
|
|
|
283
|
-
logger.info("📡 Telemetry data fetched:")
|
|
284
280
|
if "live_url" in telemetry_data:
|
|
285
|
-
|
|
281
|
+
hud_console.info(f" 🖥️ Live URL: {telemetry_data['live_url']}")
|
|
286
282
|
if "vnc_url" in telemetry_data:
|
|
287
|
-
|
|
283
|
+
hud_console.info(f" 🖥️ VNC URL: {telemetry_data['vnc_url']}")
|
|
288
284
|
if "cdp_url" in telemetry_data:
|
|
289
|
-
|
|
285
|
+
hud_console.info(f" 🦾 CDP URL: {telemetry_data['cdp_url']}")
|
|
290
286
|
if "status" in telemetry_data:
|
|
291
|
-
|
|
287
|
+
hud_console.debug(f" 📊 Status: {telemetry_data['status']}")
|
|
292
288
|
if "services" in telemetry_data:
|
|
293
|
-
|
|
289
|
+
hud_console.debug(" 📋 Services:")
|
|
294
290
|
for service, status in telemetry_data["services"].items():
|
|
295
291
|
status_icon = "✅" if status == "running" else "❌"
|
|
296
|
-
|
|
292
|
+
hud_console.debug(f" {status_icon} {service}: {status}")
|
|
297
293
|
|
|
298
294
|
if self.verbose:
|
|
299
|
-
|
|
295
|
+
hud_console.debug(
|
|
296
|
+
f"Full telemetry data:\n{json.dumps(telemetry_data, indent=2)}"
|
|
297
|
+
)
|
|
300
298
|
except Exception as e:
|
|
301
299
|
# Telemetry is optional
|
|
302
300
|
if self.verbose:
|
|
303
|
-
|
|
301
|
+
hud_console.debug(f"No telemetry available: {e}")
|
|
304
302
|
|
|
305
303
|
async def analyze_environment(self) -> dict[str, Any]:
|
|
306
304
|
"""Complete analysis of the MCP environment.
|
|
@@ -363,7 +361,7 @@ class BaseHUDClient(AgentMCPClient):
|
|
|
363
361
|
analysis["resources"].append(resource_info)
|
|
364
362
|
except Exception as e:
|
|
365
363
|
if self.verbose:
|
|
366
|
-
|
|
364
|
+
hud_console.debug(f"Could not list resources: {e}")
|
|
367
365
|
|
|
368
366
|
return analysis
|
|
369
367
|
|
|
@@ -387,5 +385,5 @@ class BaseHUDClient(AgentMCPClient):
|
|
|
387
385
|
return functions
|
|
388
386
|
except Exception as e:
|
|
389
387
|
if self.verbose:
|
|
390
|
-
|
|
388
|
+
hud_console.debug(f"Could not read hub functions for '{hub_name}': {e}")
|
|
391
389
|
return []
|
hud/clients/mcp_use.py
CHANGED
|
@@ -3,6 +3,7 @@
|
|
|
3
3
|
from __future__ import annotations
|
|
4
4
|
|
|
5
5
|
import logging
|
|
6
|
+
import traceback
|
|
6
7
|
from typing import Any
|
|
7
8
|
|
|
8
9
|
from mcp import Implementation, types
|
|
@@ -12,12 +13,14 @@ from mcp_use.session import MCPSession as MCPUseSession
|
|
|
12
13
|
from pydantic import AnyUrl
|
|
13
14
|
|
|
14
15
|
from hud.types import MCPToolCall, MCPToolResult
|
|
16
|
+
from hud.utils.hud_console import HUDConsole
|
|
15
17
|
from hud.version import __version__ as hud_version
|
|
16
18
|
|
|
17
19
|
from .base import BaseHUDClient
|
|
18
20
|
from .utils.mcp_use_retry import patch_all_sessions
|
|
19
21
|
|
|
20
22
|
logger = logging.getLogger(__name__)
|
|
23
|
+
hud_console = HUDConsole(logger=logger)
|
|
21
24
|
|
|
22
25
|
|
|
23
26
|
class MCPUseHUDClient(BaseHUDClient):
|
|
@@ -62,11 +65,11 @@ class MCPUseHUDClient(BaseHUDClient):
|
|
|
62
65
|
try:
|
|
63
66
|
assert self._client is not None # noqa: S101
|
|
64
67
|
self._sessions = await self._client.create_all_sessions()
|
|
65
|
-
|
|
68
|
+
hud_console.info(f"Created {len(self._sessions)} MCP sessions")
|
|
66
69
|
|
|
67
70
|
# Patch all sessions with retry logic
|
|
68
71
|
patch_all_sessions(self._sessions)
|
|
69
|
-
|
|
72
|
+
hud_console.debug("Applied retry logic to all MCP sessions")
|
|
70
73
|
|
|
71
74
|
# Configure validation for all sessions based on client setting
|
|
72
75
|
try:
|
|
@@ -86,21 +89,21 @@ class MCPUseHUDClient(BaseHUDClient):
|
|
|
86
89
|
# Log session details in verbose mode
|
|
87
90
|
if self.verbose and self._sessions:
|
|
88
91
|
for name, session in self._sessions.items():
|
|
89
|
-
|
|
92
|
+
hud_console.debug(f" - {name}: {type(session).__name__}")
|
|
90
93
|
|
|
91
94
|
except McpError as e:
|
|
92
95
|
# Protocol error - the server is reachable but rejecting our request
|
|
93
|
-
|
|
94
|
-
|
|
95
|
-
|
|
96
|
-
|
|
97
|
-
|
|
96
|
+
hud_console.warning(f"MCP protocol error: {e}")
|
|
97
|
+
hud_console.warning("This typically means:")
|
|
98
|
+
hud_console.warning("- Invalid or missing initialization parameters")
|
|
99
|
+
hud_console.warning("- Incompatible protocol version")
|
|
100
|
+
hud_console.warning("- Server-side configuration issues")
|
|
98
101
|
raise
|
|
99
102
|
except Exception as e:
|
|
100
103
|
# Transport or other errors
|
|
101
|
-
|
|
104
|
+
hud_console.error(f"Failed to create sessions: {e}")
|
|
102
105
|
if self.verbose:
|
|
103
|
-
|
|
106
|
+
hud_console.info("Check that the MCP server is running and accessible")
|
|
104
107
|
raise
|
|
105
108
|
|
|
106
109
|
# Populate tool map during initialization
|
|
@@ -129,17 +132,14 @@ class MCPUseHUDClient(BaseHUDClient):
|
|
|
129
132
|
await session.initialize()
|
|
130
133
|
|
|
131
134
|
if session.connector.client_session is None:
|
|
132
|
-
|
|
135
|
+
hud_console.warning(f"Client session not initialized for {server_name}")
|
|
133
136
|
continue
|
|
134
137
|
|
|
135
138
|
# List tools (retry logic is handled at transport level)
|
|
136
139
|
tools_result = await session.connector.client_session.list_tools()
|
|
137
140
|
|
|
138
|
-
|
|
139
|
-
"Discovered
|
|
140
|
-
len(tools_result.tools),
|
|
141
|
-
server_name,
|
|
142
|
-
[tool.name for tool in tools_result.tools],
|
|
141
|
+
hud_console.info(
|
|
142
|
+
f"Discovered {len(tools_result.tools)} tools from '{server_name}': {', '.join([tool.name for tool in tools_result.tools])}", # noqa: E501
|
|
143
143
|
)
|
|
144
144
|
|
|
145
145
|
# Add to collections with optional prefix
|
|
@@ -167,16 +167,15 @@ class MCPUseHUDClient(BaseHUDClient):
|
|
|
167
167
|
if self.verbose:
|
|
168
168
|
for tool in tools_result.tools:
|
|
169
169
|
description = tool.description or ""
|
|
170
|
-
|
|
171
|
-
" Tool '
|
|
172
|
-
tool.name,
|
|
173
|
-
description[:100] + "..." if len(description) > 100 else description,
|
|
170
|
+
hud_console.debug(
|
|
171
|
+
f" Tool '{tool.name}': {description[:100] + '...' if len(description) > 100 else description}", # noqa: E501
|
|
174
172
|
)
|
|
175
173
|
|
|
176
174
|
except Exception as e:
|
|
177
|
-
|
|
175
|
+
hud_console.error(f"Error discovering tools from '{server_name}': {e}")
|
|
178
176
|
if self.verbose:
|
|
179
|
-
|
|
177
|
+
hud_console.error("Full error details:")
|
|
178
|
+
traceback.print_exc()
|
|
180
179
|
|
|
181
180
|
return all_tools
|
|
182
181
|
|
|
@@ -196,12 +195,8 @@ class MCPUseHUDClient(BaseHUDClient):
|
|
|
196
195
|
session = self._sessions[server_name]
|
|
197
196
|
|
|
198
197
|
if self.verbose:
|
|
199
|
-
|
|
200
|
-
"Calling tool '
|
|
201
|
-
tool_call.name,
|
|
202
|
-
original_tool.name,
|
|
203
|
-
server_name,
|
|
204
|
-
tool_call.arguments,
|
|
198
|
+
hud_console.debug(
|
|
199
|
+
f"Calling tool '{tool_call.name}' (original: '{original_tool.name}') on server '{server_name}' with arguments: {tool_call.arguments}" # noqa: E501
|
|
205
200
|
)
|
|
206
201
|
|
|
207
202
|
if session.connector.client_session is None:
|
|
@@ -214,7 +209,7 @@ class MCPUseHUDClient(BaseHUDClient):
|
|
|
214
209
|
)
|
|
215
210
|
|
|
216
211
|
if self.verbose:
|
|
217
|
-
|
|
212
|
+
hud_console.debug(f"Tool '{tool_call.name}' result: {result}")
|
|
218
213
|
|
|
219
214
|
# MCP-use already returns the correct type, but we need to ensure it's MCPToolResult
|
|
220
215
|
return MCPToolResult(
|
|
@@ -246,7 +241,7 @@ class MCPUseHUDClient(BaseHUDClient):
|
|
|
246
241
|
return resources.resources
|
|
247
242
|
except Exception as e:
|
|
248
243
|
if self.verbose:
|
|
249
|
-
|
|
244
|
+
hud_console.debug(f"Could not list resources from server '{server_name}': {e}")
|
|
250
245
|
continue
|
|
251
246
|
return []
|
|
252
247
|
|
|
@@ -276,8 +271,8 @@ class MCPUseHUDClient(BaseHUDClient):
|
|
|
276
271
|
raise AttributeError("read_resource not available")
|
|
277
272
|
|
|
278
273
|
if self.verbose:
|
|
279
|
-
|
|
280
|
-
"Successfully read resource '
|
|
274
|
+
hud_console.debug(
|
|
275
|
+
f"Successfully read resource '{uri}' from server '{server_name}'"
|
|
281
276
|
)
|
|
282
277
|
|
|
283
278
|
return result
|
|
@@ -285,24 +280,21 @@ class MCPUseHUDClient(BaseHUDClient):
|
|
|
285
280
|
except McpError as e:
|
|
286
281
|
# McpError is expected for unsupported resources
|
|
287
282
|
if "telemetry://" in str(uri):
|
|
288
|
-
|
|
289
|
-
"Telemetry resource not supported by server '
|
|
283
|
+
hud_console.debug(
|
|
284
|
+
f"Telemetry resource not supported by server '{server_name}': {e}"
|
|
290
285
|
)
|
|
291
286
|
elif self.verbose:
|
|
292
|
-
|
|
293
|
-
"MCP resource error for '
|
|
287
|
+
hud_console.debug(
|
|
288
|
+
f"MCP resource error for '{uri}' from server '{server_name}': {e}"
|
|
294
289
|
)
|
|
295
290
|
continue
|
|
296
291
|
except Exception as e:
|
|
297
292
|
# Other errors might be more serious
|
|
298
293
|
if "telemetry://" in str(uri):
|
|
299
|
-
|
|
294
|
+
hud_console.debug(f"Failed to fetch telemetry from server '{server_name}': {e}")
|
|
300
295
|
else:
|
|
301
|
-
|
|
302
|
-
"Unexpected error reading resource '
|
|
303
|
-
uri,
|
|
304
|
-
server_name,
|
|
305
|
-
e,
|
|
296
|
+
hud_console.warning(
|
|
297
|
+
f"Unexpected error reading resource '{uri}' from server '{server_name}': {e}" # noqa: E501
|
|
306
298
|
)
|
|
307
299
|
continue
|
|
308
300
|
|
|
@@ -311,14 +303,14 @@ class MCPUseHUDClient(BaseHUDClient):
|
|
|
311
303
|
async def _disconnect(self) -> None:
|
|
312
304
|
"""Close all active sessions."""
|
|
313
305
|
if self._client is None:
|
|
314
|
-
|
|
306
|
+
hud_console.warning("Client is not connected, cannot close")
|
|
315
307
|
return
|
|
316
308
|
|
|
317
309
|
await self._client.close_all_sessions()
|
|
318
310
|
self._sessions = {}
|
|
319
311
|
self._tool_map = {}
|
|
320
312
|
self._initialized = False
|
|
321
|
-
|
|
313
|
+
hud_console.debug("MCP-use client disconnected")
|
|
322
314
|
|
|
323
315
|
# Legacy compatibility methods (limited; tests should not rely on these)
|
|
324
316
|
def get_sessions(self) -> dict[str, Any]:
|
|
@@ -36,20 +36,20 @@ class TestRetrySession:
|
|
|
36
36
|
|
|
37
37
|
# Check adapter configuration
|
|
38
38
|
adapter = session.adapters["http://"]
|
|
39
|
-
assert adapter.max_retries.total == 5
|
|
40
|
-
assert 500 in adapter.max_retries.status_forcelist
|
|
41
|
-
assert 502 in adapter.max_retries.status_forcelist
|
|
42
|
-
assert adapter.max_retries.backoff_factor == 2.0
|
|
39
|
+
assert hasattr(adapter, "max_retries") and adapter.max_retries.total == 5 # type: ignore
|
|
40
|
+
assert 500 in adapter.max_retries.status_forcelist # type: ignore
|
|
41
|
+
assert 502 in adapter.max_retries.status_forcelist # type: ignore
|
|
42
|
+
assert adapter.max_retries.backoff_factor == 2.0 # type: ignore
|
|
43
43
|
|
|
44
44
|
def test_retry_session_default_values(self):
|
|
45
45
|
"""Test retry session with default values."""
|
|
46
46
|
session = create_retry_session()
|
|
47
47
|
|
|
48
48
|
adapter = session.adapters["https://"]
|
|
49
|
-
assert adapter.max_retries.total == 3
|
|
50
|
-
assert 502 in adapter.max_retries.status_forcelist
|
|
51
|
-
assert 503 in adapter.max_retries.status_forcelist
|
|
52
|
-
assert 504 in adapter.max_retries.status_forcelist
|
|
49
|
+
assert adapter.max_retries.total == 3 # type: ignore
|
|
50
|
+
assert 502 in adapter.max_retries.status_forcelist # type: ignore
|
|
51
|
+
assert 503 in adapter.max_retries.status_forcelist # type: ignore
|
|
52
|
+
assert 504 in adapter.max_retries.status_forcelist # type: ignore
|
|
53
53
|
|
|
54
54
|
|
|
55
55
|
class TestAsyncRetryWrapper:
|
|
@@ -316,7 +316,7 @@ class TestMCPUseClientRetry:
|
|
|
316
316
|
# Verify retry worked
|
|
317
317
|
assert call_count == 2 # Failed once, then succeeded
|
|
318
318
|
assert not result.isError
|
|
319
|
-
assert result.content[0].text == "Success"
|
|
319
|
+
assert result.content[0].text == "Success" # type: ignore
|
|
320
320
|
|
|
321
321
|
@pytest.mark.asyncio
|
|
322
322
|
async def test_resource_read_with_retry(self):
|
|
@@ -371,7 +371,7 @@ class TestMCPUseClientRetry:
|
|
|
371
371
|
# Verify retry worked
|
|
372
372
|
assert call_count == 2 # Failed once, then succeeded
|
|
373
373
|
assert result is not None
|
|
374
|
-
assert result.contents[0].text == '{"status": "ok"}'
|
|
374
|
+
assert result.contents[0].text == '{"status": "ok"}' # type: ignore
|
|
375
375
|
|
|
376
376
|
|
|
377
377
|
if __name__ == "__main__":
|
hud/datasets/__init__.py
CHANGED
|
@@ -7,13 +7,14 @@ Provides data models, utilities, and execution functions for working with HUD da
|
|
|
7
7
|
# Execution functions
|
|
8
8
|
from __future__ import annotations
|
|
9
9
|
|
|
10
|
-
from .
|
|
10
|
+
from hud.types import Task
|
|
11
|
+
|
|
12
|
+
from .parallel import (
|
|
11
13
|
calculate_optimal_workers,
|
|
12
|
-
run_dataset,
|
|
13
14
|
run_dataset_parallel,
|
|
14
15
|
run_dataset_parallel_manual,
|
|
15
16
|
)
|
|
16
|
-
from .
|
|
17
|
+
from .runner import run_dataset
|
|
17
18
|
|
|
18
19
|
# Utilities
|
|
19
20
|
from .utils import fetch_system_prompt_from_dataset, save_tasks
|
|
@@ -65,8 +65,8 @@ def _process_worker(
|
|
|
65
65
|
|
|
66
66
|
import hud
|
|
67
67
|
from hud.agents.misc.response_agent import ResponseAgent
|
|
68
|
-
from hud.datasets.task import Task
|
|
69
68
|
from hud.otel import configure_telemetry
|
|
69
|
+
from hud.types import Task
|
|
70
70
|
|
|
71
71
|
# Ensure stdout is not buffered for immediate output
|
|
72
72
|
try:
|
|
@@ -9,7 +9,7 @@ from typing import TYPE_CHECKING, Any, cast
|
|
|
9
9
|
from datasets import Dataset, load_dataset
|
|
10
10
|
|
|
11
11
|
from hud.agents.misc import ResponseAgent
|
|
12
|
-
from hud.
|
|
12
|
+
from hud.types import Task
|
|
13
13
|
|
|
14
14
|
if TYPE_CHECKING:
|
|
15
15
|
from hud.agents import MCPAgent
|
hud/datasets/utils.py
CHANGED
hud/otel/config.py
CHANGED
|
@@ -111,7 +111,7 @@ def configure_telemetry(
|
|
|
111
111
|
# Error if no exporters are configured
|
|
112
112
|
raise ValueError(
|
|
113
113
|
"No telemetry backend configured. Either:\n"
|
|
114
|
-
"1. Set HUD_API_KEY environment variable for HUD telemetry\n"
|
|
114
|
+
"1. Set HUD_API_KEY environment variable for HUD telemetry (https://app.hud.so)\n"
|
|
115
115
|
"2. Use enable_otlp=True with configure_telemetry() for alternative backends (e.g., Jaeger)\n" # noqa: E501
|
|
116
116
|
)
|
|
117
117
|
elif not settings.telemetry_enabled:
|