hud-python 0.4.35__py3-none-any.whl → 0.4.37__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of hud-python might be problematic. Click here for more details.
- hud/agents/__init__.py +2 -0
- hud/agents/lite_llm.py +72 -0
- hud/agents/openai_chat_generic.py +21 -7
- hud/agents/tests/test_claude.py +32 -7
- hud/agents/tests/test_openai.py +29 -6
- hud/cli/__init__.py +228 -79
- hud/cli/build.py +26 -6
- hud/cli/dev.py +21 -40
- hud/cli/eval.py +96 -15
- hud/cli/flows/tasks.py +198 -65
- hud/cli/init.py +222 -629
- hud/cli/pull.py +6 -0
- hud/cli/push.py +11 -1
- hud/cli/rl/__init__.py +14 -4
- hud/cli/rl/celebrate.py +187 -0
- hud/cli/rl/config.py +15 -8
- hud/cli/rl/local_runner.py +44 -20
- hud/cli/rl/remote_runner.py +166 -87
- hud/cli/rl/viewer.py +141 -0
- hud/cli/rl/wait_utils.py +89 -0
- hud/cli/tests/test_build.py +3 -27
- hud/cli/tests/test_mcp_server.py +1 -12
- hud/cli/utils/config.py +85 -0
- hud/cli/utils/docker.py +21 -39
- hud/cli/utils/env_check.py +196 -0
- hud/cli/utils/environment.py +4 -3
- hud/cli/utils/interactive.py +2 -1
- hud/cli/utils/local_runner.py +204 -0
- hud/cli/utils/metadata.py +3 -1
- hud/cli/utils/package_runner.py +292 -0
- hud/cli/utils/remote_runner.py +4 -1
- hud/cli/utils/source_hash.py +108 -0
- hud/clients/base.py +1 -1
- hud/clients/fastmcp.py +1 -1
- hud/clients/mcp_use.py +30 -7
- hud/datasets/parallel.py +3 -1
- hud/datasets/runner.py +4 -1
- hud/otel/config.py +1 -1
- hud/otel/context.py +40 -6
- hud/rl/buffer.py +3 -0
- hud/rl/tests/test_learner.py +1 -1
- hud/rl/vllm_adapter.py +1 -1
- hud/server/server.py +234 -7
- hud/server/tests/test_add_tool.py +60 -0
- hud/server/tests/test_context.py +128 -0
- hud/server/tests/test_mcp_server_handlers.py +44 -0
- hud/server/tests/test_mcp_server_integration.py +405 -0
- hud/server/tests/test_mcp_server_more.py +247 -0
- hud/server/tests/test_run_wrapper.py +53 -0
- hud/server/tests/test_server_extra.py +166 -0
- hud/server/tests/test_sigterm_runner.py +78 -0
- hud/settings.py +38 -0
- hud/shared/hints.py +2 -2
- hud/telemetry/job.py +2 -2
- hud/types.py +9 -2
- hud/utils/tasks.py +32 -24
- hud/utils/tests/test_version.py +1 -1
- hud/version.py +1 -1
- {hud_python-0.4.35.dist-info → hud_python-0.4.37.dist-info}/METADATA +43 -23
- {hud_python-0.4.35.dist-info → hud_python-0.4.37.dist-info}/RECORD +63 -46
- {hud_python-0.4.35.dist-info → hud_python-0.4.37.dist-info}/WHEEL +0 -0
- {hud_python-0.4.35.dist-info → hud_python-0.4.37.dist-info}/entry_points.txt +0 -0
- {hud_python-0.4.35.dist-info → hud_python-0.4.37.dist-info}/licenses/LICENSE +0 -0
|
@@ -0,0 +1,292 @@
|
|
|
1
|
+
"""Run Python modules or commands as MCP servers.
|
|
2
|
+
|
|
3
|
+
This module handles direct execution of MCP servers, including:
|
|
4
|
+
- Python modules with an 'mcp' attribute
|
|
5
|
+
- External commands via FastMCP proxy
|
|
6
|
+
- Auto-reload functionality for development
|
|
7
|
+
|
|
8
|
+
For Docker container execution, see hud dev command.
|
|
9
|
+
"""
|
|
10
|
+
|
|
11
|
+
from __future__ import annotations
|
|
12
|
+
|
|
13
|
+
import importlib
|
|
14
|
+
import logging
|
|
15
|
+
import os
|
|
16
|
+
import shlex
|
|
17
|
+
import signal
|
|
18
|
+
import subprocess
|
|
19
|
+
import sys
|
|
20
|
+
import threading
|
|
21
|
+
import time
|
|
22
|
+
from pathlib import Path
|
|
23
|
+
from typing import Any
|
|
24
|
+
|
|
25
|
+
from fastmcp import FastMCP
|
|
26
|
+
|
|
27
|
+
logger = logging.getLogger(__name__)
|
|
28
|
+
|
|
29
|
+
|
|
30
|
+
async def run_package_as_mcp(
|
|
31
|
+
command: str | list[str],
|
|
32
|
+
transport: str = "stdio",
|
|
33
|
+
port: int = 8765,
|
|
34
|
+
verbose: bool = False,
|
|
35
|
+
reload: bool = False,
|
|
36
|
+
watch_paths: list[str] | None = None,
|
|
37
|
+
server_attr: str = "mcp",
|
|
38
|
+
**extra_kwargs: Any,
|
|
39
|
+
) -> None:
|
|
40
|
+
"""Run a command as an MCP server.
|
|
41
|
+
|
|
42
|
+
Can run:
|
|
43
|
+
- Python modules: 'controller' (imports and looks for mcp attribute)
|
|
44
|
+
- Python -m commands: 'python -m controller'
|
|
45
|
+
- Docker commands: 'docker run -it my-mcp-server'
|
|
46
|
+
- Any executable: './my-mcp-binary'
|
|
47
|
+
|
|
48
|
+
Args:
|
|
49
|
+
command: Command to run (string or list)
|
|
50
|
+
transport: Transport type ("stdio" or "http")
|
|
51
|
+
port: Port for HTTP transport
|
|
52
|
+
verbose: Enable verbose logging
|
|
53
|
+
reload: Enable auto-reload on file changes
|
|
54
|
+
watch_paths: Paths to watch for changes (defaults to ['.'])
|
|
55
|
+
**extra_kwargs: Additional arguments
|
|
56
|
+
"""
|
|
57
|
+
# Set up logging
|
|
58
|
+
if verbose:
|
|
59
|
+
logging.basicConfig(level=logging.DEBUG)
|
|
60
|
+
else:
|
|
61
|
+
logging.basicConfig(level=logging.INFO)
|
|
62
|
+
|
|
63
|
+
# Handle reload mode
|
|
64
|
+
if reload:
|
|
65
|
+
if watch_paths is None:
|
|
66
|
+
watch_paths = ["."]
|
|
67
|
+
|
|
68
|
+
# Detect external command vs module reliably.
|
|
69
|
+
# If command is a string and contains spaces (e.g., "uv run python -m controller")
|
|
70
|
+
# treat as external command. Otherwise, detect common launchers or paths.
|
|
71
|
+
is_external_cmd = False
|
|
72
|
+
if isinstance(command, list):
|
|
73
|
+
is_external_cmd = True
|
|
74
|
+
elif isinstance(command, str):
|
|
75
|
+
stripped = command.strip()
|
|
76
|
+
if " " in stripped or any(
|
|
77
|
+
stripped.startswith(x)
|
|
78
|
+
for x in ["python", "uv ", "docker", "./", "/", ".\\", "C:\\"]
|
|
79
|
+
):
|
|
80
|
+
is_external_cmd = True
|
|
81
|
+
|
|
82
|
+
if is_external_cmd:
|
|
83
|
+
# External command - pass command list directly
|
|
84
|
+
cmd_list = shlex.split(command) if isinstance(command, str) else command
|
|
85
|
+
run_with_reload(cmd_list, watch_paths, verbose)
|
|
86
|
+
else:
|
|
87
|
+
# Python module - use sys.argv approach
|
|
88
|
+
run_with_reload(None, watch_paths, verbose)
|
|
89
|
+
return
|
|
90
|
+
|
|
91
|
+
# Determine if it's a module import or a command
|
|
92
|
+
if isinstance(command, str) and not any(
|
|
93
|
+
command.startswith(x) for x in ["python", "docker", "./", "/", ".\\", "C:\\"]
|
|
94
|
+
):
|
|
95
|
+
# Treat as Python module for backwards compatibility
|
|
96
|
+
logger.info("Importing module: %s", command)
|
|
97
|
+
module = importlib.import_module(command)
|
|
98
|
+
|
|
99
|
+
# Look for server attribute in the module
|
|
100
|
+
if not hasattr(module, server_attr):
|
|
101
|
+
logger.error(
|
|
102
|
+
"Module '%s' does not have an '%s' attribute (MCPServer instance)",
|
|
103
|
+
command,
|
|
104
|
+
server_attr,
|
|
105
|
+
)
|
|
106
|
+
sys.exit(1)
|
|
107
|
+
|
|
108
|
+
server = getattr(module, server_attr)
|
|
109
|
+
|
|
110
|
+
# Configure server options
|
|
111
|
+
run_kwargs = {
|
|
112
|
+
"transport": transport,
|
|
113
|
+
"show_banner": False,
|
|
114
|
+
}
|
|
115
|
+
|
|
116
|
+
if transport == "http":
|
|
117
|
+
# FastMCP expects port/path directly
|
|
118
|
+
run_kwargs["port"] = port
|
|
119
|
+
run_kwargs["path"] = "/mcp"
|
|
120
|
+
|
|
121
|
+
# Merge any extra kwargs
|
|
122
|
+
run_kwargs.update(extra_kwargs)
|
|
123
|
+
|
|
124
|
+
# Run the server
|
|
125
|
+
logger.info("Running %s on %s transport", server.name, transport)
|
|
126
|
+
await server.run_async(**run_kwargs)
|
|
127
|
+
else:
|
|
128
|
+
# Run as external command using shared proxy utility
|
|
129
|
+
# Parse command if string
|
|
130
|
+
cmd_list = shlex.split(command) if isinstance(command, str) else command
|
|
131
|
+
|
|
132
|
+
# Replace 'python' with the current interpreter to preserve venv
|
|
133
|
+
if cmd_list[0] == "python":
|
|
134
|
+
cmd_list[0] = sys.executable
|
|
135
|
+
logger.info("Replaced 'python' with: %s", sys.executable)
|
|
136
|
+
|
|
137
|
+
logger.info("Running command: %s", " ".join(cmd_list))
|
|
138
|
+
|
|
139
|
+
# Create MCP config for the command
|
|
140
|
+
config = {
|
|
141
|
+
"mcpServers": {
|
|
142
|
+
"default": {
|
|
143
|
+
"command": cmd_list[0],
|
|
144
|
+
"args": cmd_list[1:] if len(cmd_list) > 1 else [],
|
|
145
|
+
# transport defaults to stdio
|
|
146
|
+
}
|
|
147
|
+
}
|
|
148
|
+
}
|
|
149
|
+
|
|
150
|
+
# Create proxy server
|
|
151
|
+
proxy = FastMCP.as_proxy(config, name=f"HUD Run - {cmd_list[0]}")
|
|
152
|
+
|
|
153
|
+
# Run the proxy
|
|
154
|
+
await proxy.run_async(
|
|
155
|
+
transport=transport if transport == "http" or transport == "stdio" else None,
|
|
156
|
+
port=port if transport == "http" else None,
|
|
157
|
+
show_banner=False,
|
|
158
|
+
**extra_kwargs,
|
|
159
|
+
)
|
|
160
|
+
|
|
161
|
+
|
|
162
|
+
def run_with_reload(
|
|
163
|
+
target_func: Any,
|
|
164
|
+
watch_paths: list[str],
|
|
165
|
+
verbose: bool = False,
|
|
166
|
+
) -> None:
|
|
167
|
+
"""Run a function or command with file watching and auto-reload.
|
|
168
|
+
|
|
169
|
+
Args:
|
|
170
|
+
target_func: Function to run (sync) or command list
|
|
171
|
+
watch_paths: Paths to watch for changes
|
|
172
|
+
verbose: Enable verbose logging
|
|
173
|
+
"""
|
|
174
|
+
try:
|
|
175
|
+
import watchfiles
|
|
176
|
+
except ImportError:
|
|
177
|
+
logger.error("watchfiles is required for --reload. Install with: pip install watchfiles")
|
|
178
|
+
sys.exit(1)
|
|
179
|
+
|
|
180
|
+
# Resolve watch paths
|
|
181
|
+
resolved_paths = []
|
|
182
|
+
for path_str in watch_paths:
|
|
183
|
+
path = Path(path_str).resolve()
|
|
184
|
+
if path.is_file():
|
|
185
|
+
# Watch the directory containing the file
|
|
186
|
+
resolved_paths.append(str(path.parent))
|
|
187
|
+
else:
|
|
188
|
+
resolved_paths.append(str(path))
|
|
189
|
+
|
|
190
|
+
def run_and_restart() -> None:
|
|
191
|
+
"""Run the target function in a loop, restarting on file changes."""
|
|
192
|
+
|
|
193
|
+
process = None
|
|
194
|
+
|
|
195
|
+
def handle_signal(signum: int, frame: Any) -> None:
|
|
196
|
+
"""Handle signals by terminating the subprocess."""
|
|
197
|
+
if process:
|
|
198
|
+
process.terminate()
|
|
199
|
+
sys.exit(0)
|
|
200
|
+
|
|
201
|
+
signal.signal(signal.SIGTERM, handle_signal)
|
|
202
|
+
signal.signal(signal.SIGINT, handle_signal)
|
|
203
|
+
|
|
204
|
+
stop_event = threading.Event() # Define stop_event at the start
|
|
205
|
+
|
|
206
|
+
while True:
|
|
207
|
+
# Run the target function or command
|
|
208
|
+
if target_func is None:
|
|
209
|
+
# Use sys.argv approach for Python modules
|
|
210
|
+
child_args = [a for a in sys.argv[1:] if a != "--reload"]
|
|
211
|
+
# If first arg is already 'run', don't inject it again
|
|
212
|
+
if child_args and child_args[0] == "run":
|
|
213
|
+
cmd = [sys.executable, "-m", "hud", *child_args]
|
|
214
|
+
else:
|
|
215
|
+
cmd = [sys.executable, "-m", "hud", "run", *child_args]
|
|
216
|
+
elif isinstance(target_func, list):
|
|
217
|
+
# It's a command list
|
|
218
|
+
cmd = target_func
|
|
219
|
+
else:
|
|
220
|
+
# It's a callable - run it directly
|
|
221
|
+
target_func()
|
|
222
|
+
# Wait for file changes before restarting
|
|
223
|
+
stop_event.wait()
|
|
224
|
+
continue
|
|
225
|
+
|
|
226
|
+
if verbose:
|
|
227
|
+
logger.info("Starting process: %s", " ".join(cmd))
|
|
228
|
+
|
|
229
|
+
process = subprocess.Popen(cmd, env=os.environ) # noqa: S603
|
|
230
|
+
|
|
231
|
+
# Watch for changes
|
|
232
|
+
try:
|
|
233
|
+
# Use a proper threading.Event for stop_event as required by watchfiles
|
|
234
|
+
stop_event = threading.Event()
|
|
235
|
+
|
|
236
|
+
def _wait_and_set(
|
|
237
|
+
stop_event: threading.Event, process: subprocess.Popen[bytes]
|
|
238
|
+
) -> None:
|
|
239
|
+
try:
|
|
240
|
+
if process is not None:
|
|
241
|
+
process.wait()
|
|
242
|
+
finally:
|
|
243
|
+
stop_event.set()
|
|
244
|
+
|
|
245
|
+
threading.Thread(
|
|
246
|
+
target=_wait_and_set, args=(stop_event, process), daemon=True
|
|
247
|
+
).start()
|
|
248
|
+
|
|
249
|
+
for changes in watchfiles.watch(*resolved_paths, stop_event=stop_event):
|
|
250
|
+
logger.info("Raw changes detected: %s", changes)
|
|
251
|
+
# Filter for relevant file types
|
|
252
|
+
relevant_changes = [
|
|
253
|
+
(change_type, path)
|
|
254
|
+
for change_type, path in changes
|
|
255
|
+
if any(path.endswith(ext) for ext in [".py", ".json", ".toml", ".yaml"])
|
|
256
|
+
and "__pycache__" not in path
|
|
257
|
+
and not Path(path).name.startswith(".")
|
|
258
|
+
]
|
|
259
|
+
|
|
260
|
+
if relevant_changes:
|
|
261
|
+
logger.info("File changes detected, restarting server...")
|
|
262
|
+
if verbose:
|
|
263
|
+
for change_type, path in relevant_changes:
|
|
264
|
+
logger.debug(" %s: %s", change_type, path)
|
|
265
|
+
|
|
266
|
+
# Terminate the process
|
|
267
|
+
if process is not None:
|
|
268
|
+
process.terminate()
|
|
269
|
+
try:
|
|
270
|
+
if process is not None:
|
|
271
|
+
process.wait(timeout=5)
|
|
272
|
+
except subprocess.TimeoutExpired:
|
|
273
|
+
if process is not None:
|
|
274
|
+
process.kill()
|
|
275
|
+
process.wait()
|
|
276
|
+
|
|
277
|
+
# Brief pause before restart
|
|
278
|
+
time.sleep(0.1)
|
|
279
|
+
break
|
|
280
|
+
else:
|
|
281
|
+
logger.debug("Changes detected but filtered out: %s", changes)
|
|
282
|
+
except KeyboardInterrupt:
|
|
283
|
+
# Handle Ctrl+C gracefully
|
|
284
|
+
if process:
|
|
285
|
+
process.terminate()
|
|
286
|
+
process.wait()
|
|
287
|
+
break
|
|
288
|
+
|
|
289
|
+
# Always act as the parent. The child is launched without --reload,
|
|
290
|
+
# so it won't re-enter this function.
|
|
291
|
+
|
|
292
|
+
run_and_restart()
|
hud/cli/utils/remote_runner.py
CHANGED
|
@@ -293,7 +293,10 @@ def run_remote_server(
|
|
|
293
293
|
if not api_key:
|
|
294
294
|
api_key = settings.api_key
|
|
295
295
|
if not api_key:
|
|
296
|
-
click.echo(
|
|
296
|
+
click.echo(
|
|
297
|
+
"❌ API key required. Set HUD_API_KEY in your environment or run: hud set HUD_API_KEY=your-key-here", # noqa: E501
|
|
298
|
+
err=True,
|
|
299
|
+
)
|
|
297
300
|
sys.exit(1)
|
|
298
301
|
|
|
299
302
|
# Build headers
|
|
@@ -0,0 +1,108 @@
|
|
|
1
|
+
"""Utilities to compute a fast, deterministic source hash for environments.
|
|
2
|
+
|
|
3
|
+
This intentionally focuses on the typical HUD environment layout and aims to be fast:
|
|
4
|
+
- Always include: Dockerfile, pyproject.toml
|
|
5
|
+
- Include directories: controller/, environment/, src/
|
|
6
|
+
- Exclude common build/runtime caches and lock files
|
|
7
|
+
|
|
8
|
+
Note: This is not a full Docker build context hash and does not parse .dockerignore.
|
|
9
|
+
It is sufficient to detect meaningful changes for HUD environments quickly.
|
|
10
|
+
"""
|
|
11
|
+
|
|
12
|
+
from __future__ import annotations
|
|
13
|
+
|
|
14
|
+
import hashlib
|
|
15
|
+
import os
|
|
16
|
+
from pathlib import Path
|
|
17
|
+
from typing import TYPE_CHECKING
|
|
18
|
+
|
|
19
|
+
if TYPE_CHECKING:
|
|
20
|
+
from collections.abc import Iterable
|
|
21
|
+
|
|
22
|
+
EXCLUDE_DIRS = {
|
|
23
|
+
".git",
|
|
24
|
+
".venv",
|
|
25
|
+
"dist",
|
|
26
|
+
"build",
|
|
27
|
+
"node_modules",
|
|
28
|
+
"__pycache__",
|
|
29
|
+
".mypy_cache",
|
|
30
|
+
".pytest_cache",
|
|
31
|
+
".ruff_cache",
|
|
32
|
+
}
|
|
33
|
+
|
|
34
|
+
EXCLUDE_FILE_SUFFIXES = {
|
|
35
|
+
".pyc",
|
|
36
|
+
".log",
|
|
37
|
+
}
|
|
38
|
+
|
|
39
|
+
EXCLUDE_FILES = {
|
|
40
|
+
"hud.lock.yaml",
|
|
41
|
+
}
|
|
42
|
+
|
|
43
|
+
INCLUDE_FILES = {"Dockerfile", "pyproject.toml"}
|
|
44
|
+
INCLUDE_DIRS = {"controller", "environment"}
|
|
45
|
+
|
|
46
|
+
|
|
47
|
+
def iter_source_files(root: Path) -> Iterable[Path]:
|
|
48
|
+
"""Yield files to include in the source hash.
|
|
49
|
+
|
|
50
|
+
The order is not guaranteed; callers should sort for deterministic hashing.
|
|
51
|
+
"""
|
|
52
|
+
# Always include top-level files if present
|
|
53
|
+
for name in INCLUDE_FILES:
|
|
54
|
+
p = root / name
|
|
55
|
+
if p.is_file():
|
|
56
|
+
yield p
|
|
57
|
+
|
|
58
|
+
# Include known directories
|
|
59
|
+
for d in INCLUDE_DIRS:
|
|
60
|
+
dp = root / d
|
|
61
|
+
if not dp.exists():
|
|
62
|
+
continue
|
|
63
|
+
for dirpath, dirnames, filenames in os.walk(dp):
|
|
64
|
+
# prune excluded dirs in-place
|
|
65
|
+
dirnames[:] = [dn for dn in dirnames if dn not in EXCLUDE_DIRS]
|
|
66
|
+
for fn in filenames:
|
|
67
|
+
if fn in EXCLUDE_FILES:
|
|
68
|
+
continue
|
|
69
|
+
if any(fn.endswith(suf) for suf in EXCLUDE_FILE_SUFFIXES):
|
|
70
|
+
continue
|
|
71
|
+
yield Path(dirpath) / fn
|
|
72
|
+
|
|
73
|
+
|
|
74
|
+
def list_source_files(root: Path) -> list[Path]:
|
|
75
|
+
"""Return a sorted list of files used for the source hash.
|
|
76
|
+
|
|
77
|
+
Sorting is by relative path to ensure deterministic ordering.
|
|
78
|
+
"""
|
|
79
|
+
root = root.resolve()
|
|
80
|
+
files = list(iter_source_files(root))
|
|
81
|
+
files.sort(key=lambda p: str(p.resolve().relative_to(root)).replace("\\", "/"))
|
|
82
|
+
return files
|
|
83
|
+
|
|
84
|
+
|
|
85
|
+
def compute_source_hash(directory: str | Path) -> str:
|
|
86
|
+
"""Compute a deterministic SHA-256 hash over relevant source files.
|
|
87
|
+
|
|
88
|
+
Args:
|
|
89
|
+
directory: Environment directory root.
|
|
90
|
+
|
|
91
|
+
Returns:
|
|
92
|
+
Hex digest string.
|
|
93
|
+
"""
|
|
94
|
+
root = Path(directory).resolve()
|
|
95
|
+
files = list_source_files(root)
|
|
96
|
+
|
|
97
|
+
hasher = hashlib.sha256()
|
|
98
|
+
for p in files:
|
|
99
|
+
rel = str(p.resolve().relative_to(root)).replace("\\", "/")
|
|
100
|
+
hasher.update(rel.encode("utf-8"))
|
|
101
|
+
with open(p, "rb") as f:
|
|
102
|
+
while True:
|
|
103
|
+
chunk = f.read(8192)
|
|
104
|
+
if not chunk:
|
|
105
|
+
break
|
|
106
|
+
hasher.update(chunk)
|
|
107
|
+
|
|
108
|
+
return hasher.hexdigest()
|
hud/clients/base.py
CHANGED
|
@@ -139,7 +139,7 @@ class BaseHUDClient(AgentMCPClient):
|
|
|
139
139
|
raise HudAuthenticationError(
|
|
140
140
|
f'Sending authorization "{headers.get("Authorization", "")}", which may'
|
|
141
141
|
" be incomplete. Ensure HUD_API_KEY environment variable is set or send it"
|
|
142
|
-
" as a header. You can get an API key at https://
|
|
142
|
+
" as a header. You can get an API key at https://hud.so"
|
|
143
143
|
)
|
|
144
144
|
# Subclasses implement connection
|
|
145
145
|
await self._connect(self._mcp_config)
|
hud/clients/fastmcp.py
CHANGED
|
@@ -95,7 +95,7 @@ class FastMCPHUDClient(BaseHUDClient):
|
|
|
95
95
|
raise RuntimeError(
|
|
96
96
|
"Authentication failed for HUD API. "
|
|
97
97
|
"Please ensure your HUD_API_KEY environment variable is set correctly." # noqa: E501
|
|
98
|
-
"You can get an API key at https://
|
|
98
|
+
"You can get an API key at https://hud.so"
|
|
99
99
|
) from e
|
|
100
100
|
# Generic 401 error
|
|
101
101
|
raise RuntimeError(
|
hud/clients/mcp_use.py
CHANGED
|
@@ -5,19 +5,22 @@ from __future__ import annotations
|
|
|
5
5
|
import logging
|
|
6
6
|
import traceback
|
|
7
7
|
from typing import Any
|
|
8
|
+
from urllib.parse import urlparse
|
|
8
9
|
|
|
9
10
|
from mcp import Implementation, types
|
|
10
11
|
from mcp.shared.exceptions import McpError
|
|
11
12
|
from mcp_use.client import MCPClient as MCPUseClient
|
|
12
13
|
from mcp_use.session import MCPSession as MCPUseSession
|
|
14
|
+
from mcp_use.types.http import HttpOptions
|
|
13
15
|
from pydantic import AnyUrl
|
|
14
16
|
|
|
17
|
+
from hud.settings import settings
|
|
15
18
|
from hud.types import MCPToolCall, MCPToolResult
|
|
16
19
|
from hud.utils.hud_console import HUDConsole
|
|
17
20
|
from hud.version import __version__ as hud_version
|
|
18
21
|
|
|
19
22
|
from .base import BaseHUDClient
|
|
20
|
-
from .utils.
|
|
23
|
+
from .utils.retry_transport import create_retry_httpx_client
|
|
21
24
|
|
|
22
25
|
logger = logging.getLogger(__name__)
|
|
23
26
|
hud_console = HUDConsole(logger=logger)
|
|
@@ -30,7 +33,11 @@ class MCPUseHUDClient(BaseHUDClient):
|
|
|
30
33
|
name="hud-mcp-use", title="hud MCP-use Client", version=hud_version
|
|
31
34
|
)
|
|
32
35
|
|
|
33
|
-
def __init__(
|
|
36
|
+
def __init__(
|
|
37
|
+
self,
|
|
38
|
+
mcp_config: dict[str, dict[str, Any]] | None = None,
|
|
39
|
+
**kwargs: Any,
|
|
40
|
+
) -> None:
|
|
34
41
|
"""
|
|
35
42
|
Initialize MCP-use client.
|
|
36
43
|
|
|
@@ -51,6 +58,12 @@ class MCPUseHUDClient(BaseHUDClient):
|
|
|
51
58
|
str, tuple[str, types.Tool, types.Tool]
|
|
52
59
|
] = {} # server_name, original_tool, prefixed_tool
|
|
53
60
|
self._client: Any | None = None # Will be MCPUseClient when available
|
|
61
|
+
# Transport options for MCP-use (disable_sse_fallback, httpx_client_factory, etc.)
|
|
62
|
+
# Default to retry-enabled HTTPX client if factory not provided
|
|
63
|
+
self._http_options: HttpOptions = HttpOptions(
|
|
64
|
+
httpx_client_factory=create_retry_httpx_client,
|
|
65
|
+
disable_sse_fallback=True,
|
|
66
|
+
)
|
|
54
67
|
|
|
55
68
|
async def _connect(self, mcp_config: dict[str, dict[str, Any]]) -> None:
|
|
56
69
|
"""Create all sessions for MCP-use client."""
|
|
@@ -58,19 +71,29 @@ class MCPUseHUDClient(BaseHUDClient):
|
|
|
58
71
|
logger.warning("Client is already connected, cannot connect again")
|
|
59
72
|
return
|
|
60
73
|
|
|
74
|
+
# If a server target matches HUD's MCP host and no auth is provided,
|
|
75
|
+
# inject the HUD API key as a Bearer token to avoid OAuth browser flow.
|
|
76
|
+
try:
|
|
77
|
+
hud_mcp_host = urlparse(settings.hud_mcp_url).netloc
|
|
78
|
+
if mcp_config and settings.api_key and hud_mcp_host:
|
|
79
|
+
for server_cfg in mcp_config.values():
|
|
80
|
+
server_url = server_cfg.get("url")
|
|
81
|
+
if not server_url:
|
|
82
|
+
continue
|
|
83
|
+
if urlparse(server_url).netloc == hud_mcp_host and not server_cfg.get("auth"):
|
|
84
|
+
server_cfg["auth"] = settings.api_key
|
|
85
|
+
except Exception:
|
|
86
|
+
logger.warning("Failed to parse HUD MCP URL")
|
|
87
|
+
|
|
61
88
|
config = {"mcpServers": mcp_config}
|
|
62
89
|
if MCPUseClient is None:
|
|
63
90
|
raise ImportError("MCPUseClient is not available")
|
|
64
|
-
self._client = MCPUseClient.from_dict(config)
|
|
91
|
+
self._client = MCPUseClient.from_dict(config, http_options=self._http_options)
|
|
65
92
|
try:
|
|
66
93
|
assert self._client is not None # noqa: S101
|
|
67
94
|
self._sessions = await self._client.create_all_sessions()
|
|
68
95
|
hud_console.info(f"Created {len(self._sessions)} MCP sessions")
|
|
69
96
|
|
|
70
|
-
# Patch all sessions with retry logic
|
|
71
|
-
patch_all_sessions(self._sessions)
|
|
72
|
-
hud_console.debug("Applied retry logic to all MCP sessions")
|
|
73
|
-
|
|
74
97
|
# Configure validation for all sessions based on client setting
|
|
75
98
|
try:
|
|
76
99
|
for session in self._sessions.values():
|
hud/datasets/parallel.py
CHANGED
|
@@ -115,7 +115,9 @@ def _process_worker(
|
|
|
115
115
|
task_name = task_dict.get("prompt") or f"Task {index}"
|
|
116
116
|
|
|
117
117
|
# Use the job_id to group all tasks under the same job
|
|
118
|
-
|
|
118
|
+
raw_task_id = task_dict.get("id")
|
|
119
|
+
safe_task_id = str(raw_task_id) if raw_task_id is not None else None
|
|
120
|
+
with hud.trace(task_name, job_id=job_id, task_id=safe_task_id):
|
|
119
121
|
# Convert dict to Task
|
|
120
122
|
task = Task(**task_dict)
|
|
121
123
|
|
hud/datasets/runner.py
CHANGED
|
@@ -104,7 +104,10 @@ async def run_dataset(
|
|
|
104
104
|
task_name = task_dict.get("prompt") or f"Task {index}"
|
|
105
105
|
if custom_system_prompt and "system_prompt" not in task_dict:
|
|
106
106
|
task_dict["system_prompt"] = custom_system_prompt
|
|
107
|
-
|
|
107
|
+
# Ensure task_id is a string for baggage propagation
|
|
108
|
+
raw_task_id = task_dict.get("id")
|
|
109
|
+
safe_task_id = str(raw_task_id) if raw_task_id is not None else None
|
|
110
|
+
with hud.trace(task_name, job_id=job_obj.id, task_id=safe_task_id):
|
|
108
111
|
# Convert dict to Task here, at trace level
|
|
109
112
|
task = Task(**task_dict)
|
|
110
113
|
|
hud/otel/config.py
CHANGED
|
@@ -111,7 +111,7 @@ def configure_telemetry(
|
|
|
111
111
|
# Error if no exporters are configured
|
|
112
112
|
raise ValueError(
|
|
113
113
|
"No telemetry backend configured. Either:\n"
|
|
114
|
-
"1. Set HUD_API_KEY environment variable for HUD telemetry (https://
|
|
114
|
+
"1. Set HUD_API_KEY environment variable for HUD telemetry (https://hud.so)\n"
|
|
115
115
|
"2. Use enable_otlp=True with configure_telemetry() for alternative backends (e.g., Jaeger)\n" # noqa: E501
|
|
116
116
|
)
|
|
117
117
|
elif not settings.telemetry_enabled:
|
hud/otel/context.py
CHANGED
|
@@ -239,8 +239,25 @@ async def _update_task_status_async(
|
|
|
239
239
|
|
|
240
240
|
try:
|
|
241
241
|
data: dict[str, Any] = {"status": status}
|
|
242
|
-
|
|
243
|
-
|
|
242
|
+
|
|
243
|
+
# Resolve effective job_id from explicit param, OTel baggage, or current job context
|
|
244
|
+
effective_job_id: str | None = job_id
|
|
245
|
+
if not effective_job_id:
|
|
246
|
+
bj = baggage.get_baggage("hud.job_id")
|
|
247
|
+
if isinstance(bj, str) and bj:
|
|
248
|
+
effective_job_id = bj
|
|
249
|
+
if not effective_job_id:
|
|
250
|
+
try:
|
|
251
|
+
from hud.telemetry.job import get_current_job # Local import to avoid cycles
|
|
252
|
+
|
|
253
|
+
current_job = get_current_job()
|
|
254
|
+
if current_job:
|
|
255
|
+
effective_job_id = current_job.id
|
|
256
|
+
except Exception:
|
|
257
|
+
effective_job_id = None
|
|
258
|
+
|
|
259
|
+
if effective_job_id:
|
|
260
|
+
data["job_id"] = effective_job_id
|
|
244
261
|
if error_message:
|
|
245
262
|
data["error_message"] = error_message
|
|
246
263
|
|
|
@@ -302,8 +319,25 @@ def _update_task_status_sync(
|
|
|
302
319
|
|
|
303
320
|
try:
|
|
304
321
|
data: dict[str, Any] = {"status": status}
|
|
305
|
-
|
|
306
|
-
|
|
322
|
+
|
|
323
|
+
# Resolve effective job_id from explicit param, OTel baggage, or current job context
|
|
324
|
+
effective_job_id: str | None = job_id
|
|
325
|
+
if not effective_job_id:
|
|
326
|
+
bj = baggage.get_baggage("hud.job_id")
|
|
327
|
+
if isinstance(bj, str) and bj:
|
|
328
|
+
effective_job_id = bj
|
|
329
|
+
if not effective_job_id:
|
|
330
|
+
try:
|
|
331
|
+
from hud.telemetry.job import get_current_job # Local import to avoid cycles
|
|
332
|
+
|
|
333
|
+
current_job = get_current_job()
|
|
334
|
+
if current_job:
|
|
335
|
+
effective_job_id = current_job.id
|
|
336
|
+
except Exception:
|
|
337
|
+
effective_job_id = None
|
|
338
|
+
|
|
339
|
+
if effective_job_id:
|
|
340
|
+
data["job_id"] = effective_job_id
|
|
307
341
|
if error_message:
|
|
308
342
|
data["error_message"] = error_message
|
|
309
343
|
|
|
@@ -342,7 +376,7 @@ def _print_trace_url(task_run_id: str) -> None:
|
|
|
342
376
|
if not (settings.telemetry_enabled and settings.api_key):
|
|
343
377
|
return
|
|
344
378
|
|
|
345
|
-
url = f"https://
|
|
379
|
+
url = f"https://hud.so/trace/{task_run_id}"
|
|
346
380
|
header = "🚀 See your agent live at:"
|
|
347
381
|
|
|
348
382
|
# ANSI color codes
|
|
@@ -381,7 +415,7 @@ def _print_trace_complete_url(task_run_id: str, error_occurred: bool = False) ->
|
|
|
381
415
|
if not (settings.telemetry_enabled and settings.api_key):
|
|
382
416
|
return
|
|
383
417
|
|
|
384
|
-
url = f"https://
|
|
418
|
+
url = f"https://hud.so/trace/{task_run_id}"
|
|
385
419
|
|
|
386
420
|
# ANSI color codes
|
|
387
421
|
GREEN = "\033[92m"
|
hud/rl/buffer.py
CHANGED
|
@@ -155,6 +155,9 @@ class DatasetBuffer(Buffer[Task]):
|
|
|
155
155
|
f"This is because the number of training steps ({self.training_steps}) is not a multiple of the dataset size ({self.dataset_size})" # noqa: E501
|
|
156
156
|
)
|
|
157
157
|
|
|
158
|
+
if config.verbose:
|
|
159
|
+
hud_console.info(f"Sample task: {tasks[0]}")
|
|
160
|
+
|
|
158
161
|
self.add_fill(tasks, self.number_of_tasks, config.training.shuffle_dataset)
|
|
159
162
|
|
|
160
163
|
def _validate_tasks(self, tasks: list[Task]) -> list[Task]:
|
hud/rl/tests/test_learner.py
CHANGED
|
@@ -163,7 +163,7 @@ def test_skip_update_when_zero_adv(monkeypatch, learner_stub: GRPOLearner):
|
|
|
163
163
|
# Return a zero scalar loss that *depends* on params so backward works,
|
|
164
164
|
# but has zero gradients (no update signal).
|
|
165
165
|
def _zero_loss(self, sample) -> torch.Tensor:
|
|
166
|
-
return sum(p.sum() for p in self.policy.parameters()) * 0.0
|
|
166
|
+
return sum(p.sum() for p in self.policy.parameters()) * 0.0 # type: ignore
|
|
167
167
|
|
|
168
168
|
monkeypatch.setattr(GRPOLearner, "compute_loss", _zero_loss, raising=True)
|
|
169
169
|
|
hud/rl/vllm_adapter.py
CHANGED
|
@@ -36,7 +36,7 @@ class VLLMAdapter:
|
|
|
36
36
|
headers = {"Authorization": f"Bearer {self.api_key}", "Content-Type": "application/json"}
|
|
37
37
|
payload = {"lora_name": adapter_name, "lora_path": adapter_path}
|
|
38
38
|
# Implement exponential backoff for retrying the adapter load request.
|
|
39
|
-
max_retries =
|
|
39
|
+
max_retries = 8
|
|
40
40
|
backoff_factor = 2
|
|
41
41
|
delay = 1 # initial delay in seconds
|
|
42
42
|
|