hud-python 0.4.28__py3-none-any.whl → 0.4.30__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of hud-python might be problematic. Click here for more details.

Files changed (77) hide show
  1. hud/__init__.py +2 -1
  2. hud/agents/base.py +81 -45
  3. hud/agents/claude.py +8 -4
  4. hud/agents/openai_chat_generic.py +66 -40
  5. hud/agents/tests/test_base.py +0 -4
  6. hud/agents/tests/test_openai.py +1 -1
  7. hud/cli/__init__.py +182 -52
  8. hud/cli/dev.py +8 -9
  9. hud/cli/eval.py +317 -119
  10. hud/cli/flows/__init__.py +0 -0
  11. hud/cli/flows/tasks.py +0 -0
  12. hud/cli/get.py +160 -0
  13. hud/cli/rl/__init__.py +567 -71
  14. hud/cli/rl/config.py +94 -0
  15. hud/cli/rl/display.py +133 -0
  16. hud/cli/rl/gpu.py +63 -0
  17. hud/cli/rl/gpu_utils.py +318 -0
  18. hud/cli/rl/presets.py +96 -0
  19. hud/cli/rl/remote_runner.py +347 -0
  20. hud/cli/rl/rl_api.py +150 -0
  21. hud/cli/rl/vllm.py +177 -0
  22. hud/cli/tests/test_analyze_metadata.py +0 -1
  23. hud/cli/utils/tasks.py +26 -0
  24. hud/clients/base.py +21 -23
  25. hud/clients/mcp_use.py +36 -44
  26. hud/clients/tests/test_mcp_use_retry.py +10 -10
  27. hud/datasets/__init__.py +4 -3
  28. hud/datasets/{execution/parallel.py → parallel.py} +1 -1
  29. hud/datasets/{execution/runner.py → runner.py} +1 -1
  30. hud/datasets/utils.py +1 -1
  31. hud/native/comparator.py +6 -6
  32. hud/native/tests/test_comparator.py +8 -8
  33. hud/native/tests/test_native_init.py +13 -11
  34. hud/otel/config.py +1 -1
  35. hud/otel/instrumentation.py +35 -0
  36. hud/rl/README.md +30 -0
  37. hud/rl/__init__.py +1 -0
  38. hud/rl/actor.py +174 -0
  39. hud/rl/buffer.py +371 -0
  40. hud/rl/chat_template.jinja +101 -0
  41. hud/rl/config.py +184 -0
  42. hud/rl/distributed.py +95 -0
  43. hud/rl/learner.py +589 -0
  44. hud/rl/tests/__init__.py +1 -0
  45. hud/rl/tests/test_learner.py +171 -0
  46. hud/rl/train.py +354 -0
  47. hud/rl/types.py +101 -0
  48. hud/rl/utils/start_vllm_server.sh +30 -0
  49. hud/rl/utils.py +524 -0
  50. hud/rl/vllm_adapter.py +125 -0
  51. hud/settings.py +6 -0
  52. hud/telemetry/__init__.py +2 -1
  53. hud/telemetry/job.py +46 -3
  54. hud/telemetry/tests/test_trace.py +3 -3
  55. hud/telemetry/trace.py +85 -13
  56. hud/tools/tests/test_computer.py +3 -3
  57. hud/tools/tests/test_computer_actions.py +1 -1
  58. hud/types.py +123 -2
  59. hud/utils/group_eval.py +223 -0
  60. hud/utils/hud_console.py +113 -13
  61. hud/utils/tasks.py +119 -0
  62. hud/utils/tests/test_version.py +1 -1
  63. hud/version.py +1 -1
  64. {hud_python-0.4.28.dist-info → hud_python-0.4.30.dist-info}/METADATA +20 -2
  65. {hud_python-0.4.28.dist-info → hud_python-0.4.30.dist-info}/RECORD +68 -48
  66. hud/cli/hf.py +0 -406
  67. hud/cli/rl/README.md +0 -243
  68. hud/cli/rl/init.py +0 -370
  69. hud/cli/rl/pod.py +0 -501
  70. hud/cli/rl/ssh.py +0 -322
  71. hud/cli/rl/train.py +0 -562
  72. hud/cli/rl/utils.py +0 -165
  73. hud/datasets/execution/__init__.py +0 -13
  74. hud/datasets/task.py +0 -116
  75. {hud_python-0.4.28.dist-info → hud_python-0.4.30.dist-info}/WHEEL +0 -0
  76. {hud_python-0.4.28.dist-info → hud_python-0.4.30.dist-info}/entry_points.txt +0 -0
  77. {hud_python-0.4.28.dist-info → hud_python-0.4.30.dist-info}/licenses/LICENSE +0 -0
hud/cli/rl/vllm.py ADDED
@@ -0,0 +1,177 @@
1
+ """vLLM server management utilities."""
2
+
3
+ from __future__ import annotations
4
+
5
+ import asyncio
6
+ import logging
7
+ import os
8
+ import subprocess
9
+ import time
10
+ from pathlib import Path
11
+
12
+ import httpx
13
+ from rich.console import Console
14
+
15
+ from hud.utils.hud_console import HUDConsole
16
+
17
+ logger = logging.getLogger(__name__)
18
+ hud_console = HUDConsole(logger)
19
+
20
+ console = Console()
21
+
22
+
23
+ def get_vllm_args(model_name: str, chat_template_path: Path | None = None) -> list[str]:
24
+ """Get common vLLM server arguments for both local and remote deployments."""
25
+ args = [
26
+ "serve",
27
+ model_name,
28
+ "--api-key",
29
+ "token-abc123",
30
+ "--host",
31
+ "0.0.0.0", # noqa: S104
32
+ "--port",
33
+ "8000",
34
+ "--tensor-parallel-size",
35
+ "1",
36
+ "--trust-remote-code",
37
+ "--max-model-len",
38
+ "16384",
39
+ "--enable-lora",
40
+ "--max-lora-rank",
41
+ "64",
42
+ "--max-cpu-loras",
43
+ "4",
44
+ "--enable-auto-tool-choice",
45
+ "--tool-call-parser",
46
+ "hermes",
47
+ "--disable-log-requests",
48
+ "--dtype",
49
+ "auto",
50
+ ]
51
+
52
+ # Add chat template if provided
53
+ if chat_template_path and chat_template_path.exists():
54
+ args.extend(["--chat-template", str(chat_template_path.absolute())])
55
+
56
+ return args
57
+
58
+
59
+ def check_vllm_server() -> bool:
60
+ """Check if vLLM server is running."""
61
+ try:
62
+ response = httpx.get("http://localhost:8000/health", timeout=2.0)
63
+ return response.status_code == 200
64
+ except Exception:
65
+ return False
66
+
67
+
68
+ def kill_vllm_server() -> None:
69
+ """Kill any running vLLM server processes."""
70
+ try:
71
+ # Check for PID file first
72
+ pid_file = Path("/tmp/vllm_server.pid") # noqa: S108
73
+ if pid_file.exists():
74
+ try:
75
+ pid = int(pid_file.read_text().strip())
76
+ subprocess.run(["kill", "-TERM", str(pid)], check=False) # noqa: S603, S607
77
+ time.sleep(2)
78
+ # Force kill if still running
79
+ subprocess.run(["kill", "-9", str(pid)], check=False) # noqa: S603, S607
80
+ pid_file.unlink()
81
+ except Exception as e:
82
+ hud_console.error(f"Failed to kill vLLM server: {e}")
83
+
84
+ # Also try to kill by process name
85
+ subprocess.run(["pkill", "-f", "vllm serve"], check=False) # noqa: S607
86
+ subprocess.run(["pkill", "-f", "vllm.entrypoints.openai.api_server"], check=False) # noqa: S607
87
+ time.sleep(2)
88
+
89
+ # Check for any process using port 8000
90
+ result = subprocess.run(["lsof", "-ti:8000"], capture_output=True, text=True, check=False) # noqa: S607
91
+
92
+ if result.stdout.strip():
93
+ for pid in result.stdout.strip().split("\n"):
94
+ try:
95
+ subprocess.run(["kill", "-9", pid], check=False) # noqa: S603, S607
96
+ except Exception as e:
97
+ hud_console.error(f"Failed to kill vLLM server: {e}")
98
+
99
+ console.print("[yellow]Killed existing vLLM server processes[/yellow]")
100
+ except Exception as e:
101
+ hud_console.error(f"Error killing vLLM server: {e}")
102
+
103
+
104
+ def start_vllm_server(model_name: str, gpu_index: int = 1, restart: bool = False) -> None:
105
+ """Start vLLM server in the background with dynamic GPU selection."""
106
+ if restart:
107
+ kill_vllm_server()
108
+ time.sleep(3)
109
+
110
+ # Check if already running
111
+ if check_vllm_server():
112
+ console.print("[green]vLLM server is already running[/green]")
113
+ return
114
+
115
+ console.print(f"[cyan]Starting vLLM server with {model_name} on GPU {gpu_index}...[/cyan]")
116
+
117
+ # Set up environment variables
118
+ env = os.environ.copy()
119
+ env.update(
120
+ {
121
+ "CUDA_VISIBLE_DEVICES": str(gpu_index),
122
+ "VLLM_ALLOW_RUNTIME_LORA_UPDATING": "True",
123
+ "TOKENIZERS_PARALLELISM": "false",
124
+ "VLLM_LOGGING_LEVEL": "INFO", # Changed from DEBUG to reduce noise
125
+ "CUDA_LAUNCH_BLOCKING": "1", # Better error messages
126
+ }
127
+ )
128
+
129
+ # Get the path to chat template
130
+ chat_template_path = Path(__file__).parent.parent.parent / "rl" / "chat_template.jinja"
131
+
132
+ # Build the vLLM command
133
+ vllm_args = get_vllm_args(model_name, chat_template_path)
134
+ cmd = ["uv", "run", "vllm", *vllm_args]
135
+
136
+ # Start the server in the background
137
+ with open("/tmp/vllm_server.log", "w") as log_file: # noqa: S108,
138
+ process = subprocess.Popen( # noqa: S603
139
+ cmd,
140
+ env=env,
141
+ stdout=log_file,
142
+ stderr=subprocess.STDOUT,
143
+ preexec_fn=os.setpgrp, # type: ignore
144
+ cwd=Path.cwd(), # Use current working directory
145
+ )
146
+
147
+ console.print("[yellow]vLLM server starting in background...[/yellow]")
148
+ console.print(f"[yellow]Process ID: {process.pid}[/yellow]")
149
+ console.print("[yellow]Check logs at: /tmp/vllm_server.log[/yellow]")
150
+
151
+ # Save PID for later management
152
+ pid_file = Path("/tmp/vllm_server.pid") # noqa: S108
153
+ pid_file.write_text(str(process.pid))
154
+
155
+
156
+ async def wait_for_vllm_server(timeout: int = 360) -> bool: # noqa: ASYNC109
157
+ """Wait for vLLM server to be ready."""
158
+ start_time = time.time()
159
+ console.print("[yellow]Waiting for vLLM server to be ready (up to 6 minutes)...[/yellow]")
160
+
161
+ async with httpx.AsyncClient() as client:
162
+ while time.time() - start_time < timeout:
163
+ try:
164
+ response = await client.get("http://localhost:8000/health", timeout=2.0)
165
+ if response.status_code == 200:
166
+ console.print("[green]✅ vLLM server is ready![/green]")
167
+ return True
168
+ except Exception as e:
169
+ hud_console.error(f"Failed to connect to vLLM server: {e}")
170
+
171
+ await asyncio.sleep(2)
172
+ elapsed = int(time.time() - start_time)
173
+ console.print(f"[yellow]Waiting... ({elapsed}s / {timeout}s)[/yellow]", end="\r")
174
+
175
+ console.print("\n[red]❌ vLLM server failed to start within timeout[/red]")
176
+ console.print("[yellow]Check /tmp/vllm_server.log for details[/yellow]")
177
+ return False
@@ -214,7 +214,6 @@ class TestAnalyzeFromMetadata:
214
214
 
215
215
  @mock.patch("hud.cli.utils.metadata.check_local_cache")
216
216
  @mock.patch("hud.cli.utils.metadata.fetch_lock_from_registry")
217
- @mock.patch("hud.cli.utils.metadata.design")
218
217
  @mock.patch("hud.cli.utils.metadata.console")
219
218
  async def test_analyze_not_found(self, mock_console, mock_hud_console, mock_fetch, mock_check):
220
219
  """Test when environment not found anywhere."""
hud/cli/utils/tasks.py ADDED
@@ -0,0 +1,26 @@
1
+ from __future__ import annotations
2
+
3
+ from pathlib import Path
4
+
5
+ from hud.utils.hud_console import hud_console
6
+
7
+
8
+ def find_tasks_file(tasks_file: str | None, msg: str = "Select a tasks file") -> str:
9
+ """Find tasks file."""
10
+ if tasks_file:
11
+ return tasks_file
12
+
13
+ # Get current directory and find all .json and .jsonl files
14
+ current_dir = Path.cwd()
15
+ all_files = list(current_dir.glob("*.json")) + list(current_dir.glob("*.jsonl"))
16
+ all_files = [
17
+ str(file).replace(str(current_dir), "").lstrip("/").lstrip("\\") for file in all_files
18
+ ]
19
+ all_files = [file for file in all_files if file[0] != "."] # Remove all config files
20
+
21
+ if len(all_files) == 1:
22
+ return str(all_files[0])
23
+
24
+ else:
25
+ # Prompt user to select a file
26
+ return hud_console.select(msg, choices=all_files)
hud/clients/base.py CHANGED
@@ -11,18 +11,16 @@ from mcp.types import Implementation
11
11
 
12
12
  from hud.shared.exceptions import HudAuthenticationError, HudException
13
13
  from hud.types import MCPToolCall, MCPToolResult
14
+ from hud.utils.hud_console import HUDConsole
14
15
  from hud.utils.mcp import setup_hud_telemetry
15
16
  from hud.version import __version__ as hud_version
16
17
 
17
18
  if TYPE_CHECKING:
18
19
  import mcp.types as types
19
-
20
- else:
21
- pass
22
-
23
-
24
20
  logger = logging.getLogger(__name__)
25
21
 
22
+ hud_console = HUDConsole(logger=logger)
23
+
26
24
 
27
25
  @runtime_checkable
28
26
  class AgentMCPClient(Protocol):
@@ -113,7 +111,7 @@ class BaseHUDClient(AgentMCPClient):
113
111
  async def initialize(self, mcp_config: dict[str, dict[str, Any]] | None = None) -> None:
114
112
  """Initialize connection and fetch tools."""
115
113
  if self._initialized:
116
- logger.warning(
114
+ hud_console.warning(
117
115
  "Client already connected, if you want to reconnect or change the configuration, "
118
116
  "call shutdown() first. This is especially important if you are using an agent."
119
117
  )
@@ -130,7 +128,7 @@ class BaseHUDClient(AgentMCPClient):
130
128
 
131
129
  self._auto_trace_cm = setup_hud_telemetry(self._mcp_config, auto_trace=self._auto_trace)
132
130
 
133
- logger.debug("Initializing MCP client...")
131
+ hud_console.debug("Initializing MCP client...")
134
132
 
135
133
  try:
136
134
  # Check if API key is set for HUD API
@@ -155,7 +153,6 @@ class BaseHUDClient(AgentMCPClient):
155
153
  await self._fetch_telemetry()
156
154
 
157
155
  self._initialized = True
158
- logger.info("Client initialized")
159
156
 
160
157
  async def shutdown(self) -> None:
161
158
  """Disconnect from the MCP server."""
@@ -163,9 +160,9 @@ class BaseHUDClient(AgentMCPClient):
163
160
  if self._auto_trace_cm:
164
161
  try:
165
162
  self._auto_trace_cm.__exit__(None, None, None)
166
- logger.info("Closed auto-created trace")
163
+ hud_console.info("Closed auto-created trace")
167
164
  except Exception as e:
168
- logger.warning("Failed to close auto-created trace: %s", e)
165
+ hud_console.warning(f"Failed to close auto-created trace: {e}")
169
166
  finally:
170
167
  self._auto_trace_cm = None
171
168
 
@@ -173,9 +170,9 @@ class BaseHUDClient(AgentMCPClient):
173
170
  if self._initialized:
174
171
  await self._disconnect()
175
172
  self._initialized = False
176
- logger.info("Client disconnected")
173
+ hud_console.info("Shutdown completed")
177
174
  else:
178
- logger.debug("Client was not initialized, skipping disconnect")
175
+ hud_console.debug("Client was not initialized, skipping disconnect")
179
176
 
180
177
  @overload
181
178
  async def call_tool(self, tool_call: MCPToolCall, /) -> MCPToolResult: ...
@@ -280,27 +277,28 @@ class BaseHUDClient(AgentMCPClient):
280
277
  telemetry_data = json.loads(result.contents[0].text) # type: ignore
281
278
  self._telemetry_data = telemetry_data
282
279
 
283
- logger.info("📡 Telemetry data fetched:")
284
280
  if "live_url" in telemetry_data:
285
- logger.info(" 🖥️ Live URL: %s", telemetry_data["live_url"])
281
+ hud_console.info(f" 🖥️ Live URL: {telemetry_data['live_url']}")
286
282
  if "vnc_url" in telemetry_data:
287
- logger.info(" 🖥️ VNC URL: %s", telemetry_data["vnc_url"])
283
+ hud_console.info(f" 🖥️ VNC URL: {telemetry_data['vnc_url']}")
288
284
  if "cdp_url" in telemetry_data:
289
- logger.info(" 🦾 CDP URL: %s", telemetry_data["cdp_url"])
285
+ hud_console.info(f" 🦾 CDP URL: {telemetry_data['cdp_url']}")
290
286
  if "status" in telemetry_data:
291
- logger.info(" 📊 Status: %s", telemetry_data["status"])
287
+ hud_console.debug(f" 📊 Status: {telemetry_data['status']}")
292
288
  if "services" in telemetry_data:
293
- logger.debug(" 📋 Services:")
289
+ hud_console.debug(" 📋 Services:")
294
290
  for service, status in telemetry_data["services"].items():
295
291
  status_icon = "✅" if status == "running" else "❌"
296
- logger.debug(" %s %s: %s", status_icon, service, status)
292
+ hud_console.debug(f" {status_icon} {service}: {status}")
297
293
 
298
294
  if self.verbose:
299
- logger.debug("Full telemetry data:\n%s", json.dumps(telemetry_data, indent=2))
295
+ hud_console.debug(
296
+ f"Full telemetry data:\n{json.dumps(telemetry_data, indent=2)}"
297
+ )
300
298
  except Exception as e:
301
299
  # Telemetry is optional
302
300
  if self.verbose:
303
- logger.debug("No telemetry available: %s", e)
301
+ hud_console.debug(f"No telemetry available: {e}")
304
302
 
305
303
  async def analyze_environment(self) -> dict[str, Any]:
306
304
  """Complete analysis of the MCP environment.
@@ -363,7 +361,7 @@ class BaseHUDClient(AgentMCPClient):
363
361
  analysis["resources"].append(resource_info)
364
362
  except Exception as e:
365
363
  if self.verbose:
366
- logger.debug("Could not list resources: %s", e)
364
+ hud_console.debug(f"Could not list resources: {e}")
367
365
 
368
366
  return analysis
369
367
 
@@ -387,5 +385,5 @@ class BaseHUDClient(AgentMCPClient):
387
385
  return functions
388
386
  except Exception as e:
389
387
  if self.verbose:
390
- logger.debug("Could not read hub functions for '%s': %s", hub_name, e)
388
+ hud_console.debug(f"Could not read hub functions for '{hub_name}': {e}")
391
389
  return []
hud/clients/mcp_use.py CHANGED
@@ -3,6 +3,7 @@
3
3
  from __future__ import annotations
4
4
 
5
5
  import logging
6
+ import traceback
6
7
  from typing import Any
7
8
 
8
9
  from mcp import Implementation, types
@@ -12,12 +13,14 @@ from mcp_use.session import MCPSession as MCPUseSession
12
13
  from pydantic import AnyUrl
13
14
 
14
15
  from hud.types import MCPToolCall, MCPToolResult
16
+ from hud.utils.hud_console import HUDConsole
15
17
  from hud.version import __version__ as hud_version
16
18
 
17
19
  from .base import BaseHUDClient
18
20
  from .utils.mcp_use_retry import patch_all_sessions
19
21
 
20
22
  logger = logging.getLogger(__name__)
23
+ hud_console = HUDConsole(logger=logger)
21
24
 
22
25
 
23
26
  class MCPUseHUDClient(BaseHUDClient):
@@ -62,11 +65,11 @@ class MCPUseHUDClient(BaseHUDClient):
62
65
  try:
63
66
  assert self._client is not None # noqa: S101
64
67
  self._sessions = await self._client.create_all_sessions()
65
- logger.info("Created %d MCP sessions", len(self._sessions))
68
+ hud_console.info(f"Created {len(self._sessions)} MCP sessions")
66
69
 
67
70
  # Patch all sessions with retry logic
68
71
  patch_all_sessions(self._sessions)
69
- logger.debug("Applied retry logic to all MCP sessions")
72
+ hud_console.debug("Applied retry logic to all MCP sessions")
70
73
 
71
74
  # Configure validation for all sessions based on client setting
72
75
  try:
@@ -86,21 +89,21 @@ class MCPUseHUDClient(BaseHUDClient):
86
89
  # Log session details in verbose mode
87
90
  if self.verbose and self._sessions:
88
91
  for name, session in self._sessions.items():
89
- logger.debug(" - %s: %s", name, type(session).__name__)
92
+ hud_console.debug(f" - {name}: {type(session).__name__}")
90
93
 
91
94
  except McpError as e:
92
95
  # Protocol error - the server is reachable but rejecting our request
93
- logger.error("MCP protocol error: %s", e)
94
- logger.error("This typically means:")
95
- logger.error("- Invalid or missing initialization parameters")
96
- logger.error("- Incompatible protocol version")
97
- logger.error("- Server-side configuration issues")
96
+ hud_console.warning(f"MCP protocol error: {e}")
97
+ hud_console.warning("This typically means:")
98
+ hud_console.warning("- Invalid or missing initialization parameters")
99
+ hud_console.warning("- Incompatible protocol version")
100
+ hud_console.warning("- Server-side configuration issues")
98
101
  raise
99
102
  except Exception as e:
100
103
  # Transport or other errors
101
- logger.error("Failed to create sessions: %s", e)
104
+ hud_console.error(f"Failed to create sessions: {e}")
102
105
  if self.verbose:
103
- logger.info("Check that the MCP server is running and accessible")
106
+ hud_console.info("Check that the MCP server is running and accessible")
104
107
  raise
105
108
 
106
109
  # Populate tool map during initialization
@@ -129,17 +132,14 @@ class MCPUseHUDClient(BaseHUDClient):
129
132
  await session.initialize()
130
133
 
131
134
  if session.connector.client_session is None:
132
- logger.warning("Client session not initialized for %s", server_name)
135
+ hud_console.warning(f"Client session not initialized for {server_name}")
133
136
  continue
134
137
 
135
138
  # List tools (retry logic is handled at transport level)
136
139
  tools_result = await session.connector.client_session.list_tools()
137
140
 
138
- logger.info(
139
- "Discovered %d tools from '%s': %s",
140
- len(tools_result.tools),
141
- server_name,
142
- [tool.name for tool in tools_result.tools],
141
+ hud_console.info(
142
+ f"Discovered {len(tools_result.tools)} tools from '{server_name}': {', '.join([tool.name for tool in tools_result.tools])}", # noqa: E501
143
143
  )
144
144
 
145
145
  # Add to collections with optional prefix
@@ -167,16 +167,15 @@ class MCPUseHUDClient(BaseHUDClient):
167
167
  if self.verbose:
168
168
  for tool in tools_result.tools:
169
169
  description = tool.description or ""
170
- logger.debug(
171
- " Tool '%s': %s",
172
- tool.name,
173
- description[:100] + "..." if len(description) > 100 else description,
170
+ hud_console.debug(
171
+ f" Tool '{tool.name}': {description[:100] + '...' if len(description) > 100 else description}", # noqa: E501
174
172
  )
175
173
 
176
174
  except Exception as e:
177
- logger.error("Error discovering tools from '%s': %s", server_name, e)
175
+ hud_console.error(f"Error discovering tools from '{server_name}': {e}")
178
176
  if self.verbose:
179
- logger.exception("Full error details:")
177
+ hud_console.error("Full error details:")
178
+ traceback.print_exc()
180
179
 
181
180
  return all_tools
182
181
 
@@ -196,12 +195,8 @@ class MCPUseHUDClient(BaseHUDClient):
196
195
  session = self._sessions[server_name]
197
196
 
198
197
  if self.verbose:
199
- logger.debug(
200
- "Calling tool '%s' (original: '%s') on server '%s' with arguments: %s",
201
- tool_call.name,
202
- original_tool.name,
203
- server_name,
204
- tool_call.arguments,
198
+ hud_console.debug(
199
+ f"Calling tool '{tool_call.name}' (original: '{original_tool.name}') on server '{server_name}' with arguments: {tool_call.arguments}" # noqa: E501
205
200
  )
206
201
 
207
202
  if session.connector.client_session is None:
@@ -214,7 +209,7 @@ class MCPUseHUDClient(BaseHUDClient):
214
209
  )
215
210
 
216
211
  if self.verbose:
217
- logger.debug("Tool '%s' result: %s", tool_call.name, result)
212
+ hud_console.debug(f"Tool '{tool_call.name}' result: {result}")
218
213
 
219
214
  # MCP-use already returns the correct type, but we need to ensure it's MCPToolResult
220
215
  return MCPToolResult(
@@ -246,7 +241,7 @@ class MCPUseHUDClient(BaseHUDClient):
246
241
  return resources.resources
247
242
  except Exception as e:
248
243
  if self.verbose:
249
- logger.debug("Could not list resources from server '%s': %s", server_name, e)
244
+ hud_console.debug(f"Could not list resources from server '{server_name}': {e}")
250
245
  continue
251
246
  return []
252
247
 
@@ -276,8 +271,8 @@ class MCPUseHUDClient(BaseHUDClient):
276
271
  raise AttributeError("read_resource not available")
277
272
 
278
273
  if self.verbose:
279
- logger.debug(
280
- "Successfully read resource '%s' from server '%s'", uri, server_name
274
+ hud_console.debug(
275
+ f"Successfully read resource '{uri}' from server '{server_name}'"
281
276
  )
282
277
 
283
278
  return result
@@ -285,24 +280,21 @@ class MCPUseHUDClient(BaseHUDClient):
285
280
  except McpError as e:
286
281
  # McpError is expected for unsupported resources
287
282
  if "telemetry://" in str(uri):
288
- logger.debug(
289
- "Telemetry resource not supported by server '%s': %s", server_name, e
283
+ hud_console.debug(
284
+ f"Telemetry resource not supported by server '{server_name}': {e}"
290
285
  )
291
286
  elif self.verbose:
292
- logger.debug(
293
- "MCP resource error for '%s' from server '%s': %s", uri, server_name, e
287
+ hud_console.debug(
288
+ f"MCP resource error for '{uri}' from server '{server_name}': {e}"
294
289
  )
295
290
  continue
296
291
  except Exception as e:
297
292
  # Other errors might be more serious
298
293
  if "telemetry://" in str(uri):
299
- logger.debug("Failed to fetch telemetry from server '%s': %s", server_name, e)
294
+ hud_console.debug(f"Failed to fetch telemetry from server '{server_name}': {e}")
300
295
  else:
301
- logger.warning(
302
- "Unexpected error reading resource '%s' from server '%s': %s",
303
- uri,
304
- server_name,
305
- e,
296
+ hud_console.warning(
297
+ f"Unexpected error reading resource '{uri}' from server '{server_name}': {e}" # noqa: E501
306
298
  )
307
299
  continue
308
300
 
@@ -311,14 +303,14 @@ class MCPUseHUDClient(BaseHUDClient):
311
303
  async def _disconnect(self) -> None:
312
304
  """Close all active sessions."""
313
305
  if self._client is None:
314
- logger.warning("Client is not connected, cannot close")
306
+ hud_console.warning("Client is not connected, cannot close")
315
307
  return
316
308
 
317
309
  await self._client.close_all_sessions()
318
310
  self._sessions = {}
319
311
  self._tool_map = {}
320
312
  self._initialized = False
321
- logger.debug("MCP-use client disconnected")
313
+ hud_console.debug("MCP-use client disconnected")
322
314
 
323
315
  # Legacy compatibility methods (limited; tests should not rely on these)
324
316
  def get_sessions(self) -> dict[str, Any]:
@@ -36,20 +36,20 @@ class TestRetrySession:
36
36
 
37
37
  # Check adapter configuration
38
38
  adapter = session.adapters["http://"]
39
- assert adapter.max_retries.total == 5
40
- assert 500 in adapter.max_retries.status_forcelist
41
- assert 502 in adapter.max_retries.status_forcelist
42
- assert adapter.max_retries.backoff_factor == 2.0
39
+ assert hasattr(adapter, "max_retries") and adapter.max_retries.total == 5 # type: ignore
40
+ assert 500 in adapter.max_retries.status_forcelist # type: ignore
41
+ assert 502 in adapter.max_retries.status_forcelist # type: ignore
42
+ assert adapter.max_retries.backoff_factor == 2.0 # type: ignore
43
43
 
44
44
  def test_retry_session_default_values(self):
45
45
  """Test retry session with default values."""
46
46
  session = create_retry_session()
47
47
 
48
48
  adapter = session.adapters["https://"]
49
- assert adapter.max_retries.total == 3
50
- assert 502 in adapter.max_retries.status_forcelist
51
- assert 503 in adapter.max_retries.status_forcelist
52
- assert 504 in adapter.max_retries.status_forcelist
49
+ assert adapter.max_retries.total == 3 # type: ignore
50
+ assert 502 in adapter.max_retries.status_forcelist # type: ignore
51
+ assert 503 in adapter.max_retries.status_forcelist # type: ignore
52
+ assert 504 in adapter.max_retries.status_forcelist # type: ignore
53
53
 
54
54
 
55
55
  class TestAsyncRetryWrapper:
@@ -316,7 +316,7 @@ class TestMCPUseClientRetry:
316
316
  # Verify retry worked
317
317
  assert call_count == 2 # Failed once, then succeeded
318
318
  assert not result.isError
319
- assert result.content[0].text == "Success"
319
+ assert result.content[0].text == "Success" # type: ignore
320
320
 
321
321
  @pytest.mark.asyncio
322
322
  async def test_resource_read_with_retry(self):
@@ -371,7 +371,7 @@ class TestMCPUseClientRetry:
371
371
  # Verify retry worked
372
372
  assert call_count == 2 # Failed once, then succeeded
373
373
  assert result is not None
374
- assert result.contents[0].text == '{"status": "ok"}'
374
+ assert result.contents[0].text == '{"status": "ok"}' # type: ignore
375
375
 
376
376
 
377
377
  if __name__ == "__main__":
hud/datasets/__init__.py CHANGED
@@ -7,13 +7,14 @@ Provides data models, utilities, and execution functions for working with HUD da
7
7
  # Execution functions
8
8
  from __future__ import annotations
9
9
 
10
- from .execution import (
10
+ from hud.types import Task
11
+
12
+ from .parallel import (
11
13
  calculate_optimal_workers,
12
- run_dataset,
13
14
  run_dataset_parallel,
14
15
  run_dataset_parallel_manual,
15
16
  )
16
- from .task import Task
17
+ from .runner import run_dataset
17
18
 
18
19
  # Utilities
19
20
  from .utils import fetch_system_prompt_from_dataset, save_tasks
@@ -65,8 +65,8 @@ def _process_worker(
65
65
 
66
66
  import hud
67
67
  from hud.agents.misc.response_agent import ResponseAgent
68
- from hud.datasets.task import Task
69
68
  from hud.otel import configure_telemetry
69
+ from hud.types import Task
70
70
 
71
71
  # Ensure stdout is not buffered for immediate output
72
72
  try:
@@ -9,7 +9,7 @@ from typing import TYPE_CHECKING, Any, cast
9
9
  from datasets import Dataset, load_dataset
10
10
 
11
11
  from hud.agents.misc import ResponseAgent
12
- from hud.datasets.task import Task
12
+ from hud.types import Task
13
13
 
14
14
  if TYPE_CHECKING:
15
15
  from hud.agents import MCPAgent
hud/datasets/utils.py CHANGED
@@ -8,7 +8,7 @@ from typing import Any
8
8
 
9
9
  from datasets import Dataset
10
10
 
11
- from .task import Task
11
+ from hud.types import Task
12
12
 
13
13
  logger = logging.getLogger("hud.datasets")
14
14
 
hud/native/comparator.py CHANGED
@@ -513,11 +513,11 @@ def make_alias_tool(name: str, preset_mode: ComparisonMode, description: str) ->
513
513
 
514
514
 
515
515
  # Create MCP server
516
- comparator_server = MCPServer(name="comparator")
516
+ comparator = MCPServer(name="comparator")
517
517
 
518
518
  # Register main tool
519
- comparator_server.add_tool(SubmitTool())
520
- comparator_server.add_tool(CompareTool())
519
+ comparator.add_tool(SubmitTool())
520
+ comparator.add_tool(CompareTool())
521
521
 
522
522
  # Register aliases - these are just thin wrappers
523
523
  ALIASES = [
@@ -534,13 +534,13 @@ ALIASES = [
534
534
 
535
535
  for name, mode, desc in ALIASES:
536
536
  AliasTool = make_alias_tool(name, mode, desc)
537
- comparator_server.add_tool(AliasTool())
537
+ comparator.add_tool(AliasTool())
538
538
 
539
539
  # Export for mounting
540
- __all__ = ["comparator_server"]
540
+ __all__ = ["comparator"]
541
541
 
542
542
 
543
543
  if __name__ == "__main__":
544
544
  # Run as standalone server
545
545
  logger.info("Starting Comparator MCP Server...")
546
- comparator_server.run()
546
+ comparator.run()