hud-python 0.4.52__py3-none-any.whl → 0.4.53__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of hud-python might be problematic. Click here for more details.

Files changed (69) hide show
  1. hud/agents/base.py +9 -2
  2. hud/agents/openai_chat_generic.py +15 -3
  3. hud/agents/tests/test_base.py +15 -0
  4. hud/agents/tests/test_base_runtime.py +164 -0
  5. hud/cli/__init__.py +6 -3
  6. hud/cli/build.py +35 -27
  7. hud/cli/dev.py +11 -29
  8. hud/cli/eval.py +61 -61
  9. hud/cli/tests/test_analyze_module.py +120 -0
  10. hud/cli/tests/test_build.py +24 -2
  11. hud/cli/tests/test_build_failure.py +41 -0
  12. hud/cli/tests/test_build_module.py +50 -0
  13. hud/cli/tests/test_cli_more_wrappers.py +30 -0
  14. hud/cli/tests/test_cli_root.py +134 -0
  15. hud/cli/tests/test_mcp_server.py +8 -7
  16. hud/cli/tests/test_push_happy.py +74 -0
  17. hud/cli/tests/test_push_wrapper.py +23 -0
  18. hud/cli/utils/docker.py +120 -1
  19. hud/cli/utils/runner.py +1 -1
  20. hud/cli/utils/tests/__init__.py +0 -0
  21. hud/cli/utils/tests/test_config.py +58 -0
  22. hud/cli/utils/tests/test_docker.py +93 -0
  23. hud/cli/utils/tests/test_docker_hints.py +71 -0
  24. hud/cli/utils/tests/test_env_check.py +74 -0
  25. hud/cli/utils/tests/test_environment.py +42 -0
  26. hud/cli/utils/tests/test_interactive_module.py +60 -0
  27. hud/cli/utils/tests/test_local_runner.py +50 -0
  28. hud/cli/utils/tests/test_logging_utils.py +23 -0
  29. hud/cli/utils/tests/test_metadata.py +49 -0
  30. hud/cli/utils/tests/test_package_runner.py +35 -0
  31. hud/cli/utils/tests/test_registry_utils.py +49 -0
  32. hud/cli/utils/tests/test_remote_runner.py +25 -0
  33. hud/cli/utils/tests/test_runner_modules.py +52 -0
  34. hud/cli/utils/tests/test_source_hash.py +36 -0
  35. hud/cli/utils/tests/test_tasks.py +80 -0
  36. hud/cli/utils/version_check.py +2 -2
  37. hud/datasets/tests/__init__.py +0 -0
  38. hud/datasets/tests/test_runner.py +106 -0
  39. hud/datasets/tests/test_utils.py +228 -0
  40. hud/otel/tests/__init__.py +0 -1
  41. hud/otel/tests/test_instrumentation.py +207 -0
  42. hud/server/tests/test_server_extra.py +2 -0
  43. hud/shared/exceptions.py +35 -4
  44. hud/shared/hints.py +25 -0
  45. hud/shared/requests.py +15 -3
  46. hud/shared/tests/test_exceptions.py +31 -23
  47. hud/shared/tests/test_hints.py +167 -0
  48. hud/telemetry/tests/test_async_context.py +242 -0
  49. hud/telemetry/tests/test_instrument.py +414 -0
  50. hud/telemetry/tests/test_job.py +609 -0
  51. hud/telemetry/tests/test_trace.py +183 -5
  52. hud/tools/computer/settings.py +2 -2
  53. hud/tools/tests/test_submit.py +85 -0
  54. hud/tools/tests/test_types.py +193 -0
  55. hud/types.py +7 -1
  56. hud/utils/agent_factories.py +1 -3
  57. hud/utils/mcp.py +1 -1
  58. hud/utils/tests/test_agent_factories.py +60 -0
  59. hud/utils/tests/test_mcp.py +4 -6
  60. hud/utils/tests/test_pretty_errors.py +186 -0
  61. hud/utils/tests/test_tasks.py +187 -0
  62. hud/utils/tests/test_tool_shorthand.py +154 -0
  63. hud/utils/tests/test_version.py +1 -1
  64. hud/version.py +1 -1
  65. {hud_python-0.4.52.dist-info → hud_python-0.4.53.dist-info}/METADATA +47 -48
  66. {hud_python-0.4.52.dist-info → hud_python-0.4.53.dist-info}/RECORD +69 -31
  67. {hud_python-0.4.52.dist-info → hud_python-0.4.53.dist-info}/WHEEL +0 -0
  68. {hud_python-0.4.52.dist-info → hud_python-0.4.53.dist-info}/entry_points.txt +0 -0
  69. {hud_python-0.4.52.dist-info → hud_python-0.4.53.dist-info}/licenses/LICENSE +0 -0
hud/agents/base.py CHANGED
@@ -137,7 +137,11 @@ class MCPAgent(ABC):
137
137
  "No MCPClient. Please provide one when initializing the agent or pass a Task with mcp_config." # noqa: E501
138
138
  )
139
139
 
140
- await self._setup_config(self.mcp_client.mcp_config)
140
+ try:
141
+ client_cfg = getattr(self.mcp_client, "mcp_config", None)
142
+ except Exception:
143
+ client_cfg = None
144
+ await self._setup_config(client_cfg)
141
145
 
142
146
  # Initialize client if needed
143
147
  try:
@@ -618,8 +622,11 @@ class MCPAgent(ABC):
618
622
  except Exception as e:
619
623
  self.console.error_log(f"Response lifecycle tool failed: {e}")
620
624
 
621
- async def _setup_config(self, mcp_config: dict[str, dict[str, Any]]) -> None:
625
+ async def _setup_config(self, mcp_config: dict[str, dict[str, Any]] | None) -> None:
622
626
  """Inject metadata into the metadata of the initialize request."""
627
+ if not isinstance(mcp_config, dict):
628
+ return
629
+
623
630
  if self.metadata:
624
631
  patch_mcp_config(
625
632
  mcp_config,
@@ -20,6 +20,7 @@ import logging
20
20
  from typing import TYPE_CHECKING, Any, ClassVar, cast
21
21
 
22
22
  import mcp.types as types
23
+ from openai import AsyncOpenAI
23
24
 
24
25
  from hud import instrument
25
26
  from hud.types import AgentResponse, MCPToolCall, MCPToolResult
@@ -28,7 +29,6 @@ from hud.utils.hud_console import HUDConsole
28
29
  from .base import MCPAgent
29
30
 
30
31
  if TYPE_CHECKING:
31
- from openai import AsyncOpenAI
32
32
  from openai.types.chat import ChatCompletionToolParam
33
33
 
34
34
  logger = logging.getLogger(__name__)
@@ -42,14 +42,26 @@ class GenericOpenAIChatAgent(MCPAgent):
42
42
  def __init__(
43
43
  self,
44
44
  *,
45
- openai_client: AsyncOpenAI | None,
45
+ openai_client: AsyncOpenAI | None = None,
46
+ api_key: str | None = None,
47
+ base_url: str | None = None,
46
48
  model_name: str = "gpt-4o-mini",
47
49
  completion_kwargs: dict[str, Any] | None = None,
48
50
  **agent_kwargs: Any,
49
51
  ) -> None:
50
52
  # Accept base-agent settings via **agent_kwargs (e.g., mcp_client, system_prompt, etc.)
51
53
  super().__init__(**agent_kwargs)
52
- self.oai = openai_client
54
+
55
+ # Handle client creation - support both patterns
56
+ if openai_client is not None:
57
+ # Use provided client (backward compatibility)
58
+ self.oai = openai_client
59
+ elif api_key is not None or base_url is not None:
60
+ # Create client from config (new pattern, consistent with other agents)
61
+ self.oai = AsyncOpenAI(api_key=api_key, base_url=base_url)
62
+ else:
63
+ raise ValueError("Either openai_client or (api_key and base_url) must be provided")
64
+
53
65
  self.model_name = model_name
54
66
  self.completion_kwargs: dict[str, Any] = completion_kwargs or {}
55
67
  self.mcp_schemas = []
@@ -329,6 +329,21 @@ class TestBaseMCPAgent:
329
329
  # call_tools doesn't validate empty names, it will return error
330
330
  await agent.call_tools(tool_call)
331
331
 
332
+ def test_get_tool_schemas(self):
333
+ """Test getting tool schemas."""
334
+ agent = MockMCPAgent()
335
+
336
+ agent._available_tools = [
337
+ types.Tool(name="tool1", description="Tool 1", inputSchema={"type": "object"}),
338
+ types.Tool(name="setup", description="Setup", inputSchema={"type": "object"}),
339
+ ]
340
+
341
+ schemas = agent.get_tool_schemas()
342
+
343
+ # Should include non-lifecycle tools
344
+ assert len(schemas) == 2
345
+ assert schemas[0]["name"] == "tool1"
346
+
332
347
  def test_get_tools_by_server(self):
333
348
  """Test getting tools grouped by server."""
334
349
  agent = MockMCPAgent()
@@ -0,0 +1,164 @@
1
+ from __future__ import annotations
2
+
3
+ from unittest import mock
4
+
5
+ import mcp.types as types
6
+ import pytest
7
+
8
+ from hud.agents.base import MCPAgent, find_content, find_reward, text_to_blocks
9
+ from hud.types import AgentResponse, MCPToolCall, MCPToolResult
10
+
11
+
12
+ class DummyAgent(MCPAgent):
13
+ async def get_system_messages(self):
14
+ return [types.TextContent(text="sys", type="text")]
15
+
16
+ async def get_response(self, messages):
17
+ # Single step: no tool calls -> done
18
+ return AgentResponse(content="ok", tool_calls=[], done=True)
19
+
20
+ async def format_blocks(self, blocks):
21
+ # Return as-is
22
+ return blocks
23
+
24
+ async def format_tool_results(self, tool_calls, tool_results):
25
+ return [types.TextContent(text="tools", type="text")]
26
+
27
+
28
+ @pytest.mark.asyncio
29
+ async def test_run_with_string_prompt_auto_client(monkeypatch):
30
+ # Fake MCPClient with required methods
31
+ fake_client = mock.AsyncMock()
32
+ fake_client.initialize.return_value = None
33
+ fake_client.list_tools.return_value = []
34
+ fake_client.shutdown.return_value = None
35
+
36
+ # Patch MCPClient construction inside initialize()
37
+ with mock.patch("hud.clients.MCPClient", return_value=fake_client):
38
+ agent = DummyAgent(mcp_client=fake_client, auto_trace=False)
39
+ result = await agent.run("hello", max_steps=1)
40
+ assert result.done is True and result.isError is False
41
+
42
+
43
+ def test_find_reward_and_content_extractors():
44
+ # Structured content
45
+ r = MCPToolResult(
46
+ content=text_to_blocks("{}"), isError=False, structuredContent={"reward": 0.7}
47
+ )
48
+ assert find_reward(r) == 0.7
49
+
50
+ # Text JSON
51
+ r2 = MCPToolResult(content=text_to_blocks('{"score": 0.5, "content": "hi"}'), isError=False)
52
+ assert find_reward(r2) == 0.5
53
+ assert find_content(r2) == "hi"
54
+
55
+
56
+ @pytest.mark.asyncio
57
+ async def test_call_tools_error_paths():
58
+ fake_client = mock.AsyncMock()
59
+ # First call succeeds
60
+ ok_result = MCPToolResult(content=text_to_blocks("ok"), isError=False)
61
+ fake_client.call_tool.side_effect = [ok_result, RuntimeError("boom")]
62
+ agent = DummyAgent(mcp_client=fake_client, auto_trace=False)
63
+ results = await agent.call_tools(
64
+ [MCPToolCall(name="a", arguments={}), MCPToolCall(name="b", arguments={})]
65
+ )
66
+ assert results[0].isError is False
67
+ assert results[1].isError is True
68
+
69
+
70
+ @pytest.mark.asyncio
71
+ async def test_initialize_without_client_raises_valueerror():
72
+ agent = DummyAgent(mcp_client=None, auto_trace=False)
73
+ with pytest.raises(ValueError):
74
+ await agent.initialize(None)
75
+
76
+
77
+ def test_get_available_tools_before_initialize_raises():
78
+ agent = DummyAgent(mcp_client=mock.AsyncMock(), auto_trace=False)
79
+ with pytest.raises(RuntimeError):
80
+ agent.get_available_tools()
81
+
82
+
83
+ @pytest.mark.asyncio
84
+ async def test_format_message_invalid_type_raises():
85
+ agent = DummyAgent(mcp_client=mock.AsyncMock(), auto_trace=False)
86
+ with pytest.raises(ValueError):
87
+ await agent.format_message({"oops": 1}) # type: ignore
88
+
89
+
90
+ @pytest.mark.asyncio
91
+ async def test_call_tools_timeout_error_shutdown_called():
92
+ fake_client = mock.AsyncMock()
93
+ fake_client.call_tool.side_effect = TimeoutError("timeout")
94
+ fake_client.shutdown.return_value = None
95
+ agent = DummyAgent(mcp_client=fake_client, auto_trace=False)
96
+ with pytest.raises(TimeoutError):
97
+ await agent.call_tools(MCPToolCall(name="x", arguments={}))
98
+ fake_client.shutdown.assert_awaited_once()
99
+
100
+
101
+ def test_text_to_blocks_shapes():
102
+ blocks = text_to_blocks("x")
103
+ assert isinstance(blocks, list) and blocks and isinstance(blocks[0], types.TextContent)
104
+
105
+
106
+ @pytest.mark.asyncio
107
+ async def test_run_returns_connection_error_trace(monkeypatch):
108
+ fake_client = mock.AsyncMock()
109
+ fake_client.mcp_config = {}
110
+ fake_client.initialize.side_effect = RuntimeError("Connection refused http://localhost:1234")
111
+ fake_client.list_tools.return_value = []
112
+ fake_client.shutdown.return_value = None
113
+
114
+ class DummyCM:
115
+ def __exit__(self, *args, **kwargs):
116
+ return False
117
+
118
+ monkeypatch.setattr("hud.utils.mcp.setup_hud_telemetry", lambda *args, **kwargs: DummyCM())
119
+
120
+ agent = DummyAgent(mcp_client=fake_client, auto_trace=False)
121
+ result = await agent.run("p", max_steps=1)
122
+ assert result.isError is True
123
+ assert "Could not connect" in (result.content or "")
124
+
125
+
126
+ @pytest.mark.asyncio
127
+ async def test_run_calls_response_tool_when_configured(monkeypatch):
128
+ fake_client = mock.AsyncMock()
129
+ fake_client.mcp_config = {}
130
+ fake_client.initialize.return_value = None
131
+ fake_client.list_tools.return_value = []
132
+ fake_client.shutdown.return_value = None
133
+ ok = MCPToolResult(content=text_to_blocks("ok"), isError=False)
134
+ fake_client.call_tool.return_value = ok
135
+
136
+ class DummyCM:
137
+ def __exit__(self, *args, **kwargs):
138
+ return False
139
+
140
+ monkeypatch.setattr("hud.utils.mcp.setup_hud_telemetry", lambda *args, **kwargs: DummyCM())
141
+
142
+ agent = DummyAgent(mcp_client=fake_client, auto_trace=False, response_tool_name="submit")
143
+ result = await agent.run("hello", max_steps=1)
144
+ assert result.isError is False
145
+ fake_client.call_tool.assert_awaited()
146
+
147
+
148
+ @pytest.mark.asyncio
149
+ async def test_get_available_tools_after_initialize(monkeypatch):
150
+ fake_client = mock.AsyncMock()
151
+ fake_client.mcp_config = {}
152
+ fake_client.initialize.return_value = None
153
+ fake_client.list_tools.return_value = []
154
+ fake_client.shutdown.return_value = None
155
+
156
+ class DummyCM:
157
+ def __exit__(self, *args, **kwargs):
158
+ return False
159
+
160
+ monkeypatch.setattr("hud.utils.mcp.setup_hud_telemetry", lambda *args, **kwargs: DummyCM())
161
+
162
+ agent = DummyAgent(mcp_client=fake_client, auto_trace=False)
163
+ await agent.initialize(None)
164
+ assert agent.get_available_tools() == []
hud/cli/__init__.py CHANGED
@@ -242,15 +242,18 @@ def debug(
242
242
  if build and not build_environment(directory, image_name):
243
243
  raise typer.Exit(1)
244
244
 
245
- # Build Docker command
246
- from .utils.docker import build_run_command
245
+ # Build Docker command with folder-mode envs
246
+ from .utils.docker import create_docker_run_command
247
247
 
248
- command = build_run_command(image_name, docker_args)
248
+ command = create_docker_run_command(
249
+ image_name, docker_args=docker_args, env_dir=directory
250
+ )
249
251
  else:
250
252
  # Assume it's an image name
251
253
  image = first_param
252
254
  from .utils.docker import build_run_command
253
255
 
256
+ # Image-only mode: do not auto-inject local .env
254
257
  command = build_run_command(image, docker_args)
255
258
  else:
256
259
  console.print(
hud/cli/build.py CHANGED
@@ -161,49 +161,42 @@ async def analyze_mcp_environment(
161
161
  hud_console = HUDConsole()
162
162
  env_vars = env_vars or {}
163
163
 
164
- # Build Docker command to run the image
165
- docker_cmd = ["docker", "run", "--rm", "-i"]
164
+ # Build Docker command to run the image, injecting any provided env vars
165
+ from hud.cli.utils.docker import build_env_flags
166
166
 
167
- # Add environment variables
168
- for key, value in env_vars.items():
169
- docker_cmd.extend(["-e", f"{key}={value}"])
167
+ docker_cmd = ["docker", "run", "--rm", "-i", *build_env_flags(env_vars), image]
170
168
 
171
- docker_cmd.append(image)
169
+ # Show full docker command being used for analysis
170
+ hud_console.dim_info("Command:", " ".join(docker_cmd))
172
171
 
173
- # Create MCP config
174
- config = {
175
- "server": {"command": docker_cmd[0], "args": docker_cmd[1:] if len(docker_cmd) > 1 else []}
176
- }
172
+ # Create MCP config consistently with analyze helpers
173
+ from hud.cli.analyze import parse_docker_command
174
+
175
+ mcp_config = parse_docker_command(docker_cmd)
177
176
 
178
177
  # Initialize client and measure timing
179
178
  start_time = time.time()
180
- client = MCPClient(mcp_config=config, verbose=verbose, auto_trace=False)
179
+ client = MCPClient(mcp_config=mcp_config, verbose=verbose, auto_trace=False)
181
180
  initialized = False
182
181
 
183
182
  try:
184
183
  if verbose:
185
- hud_console.info(f"Initializing MCP client with command: {' '.join(docker_cmd)}")
184
+ hud_console.info("Initializing MCP client...")
186
185
 
187
- # Add timeout to fail fast instead of hanging (30 seconds)
186
+ # Add timeout to fail fast instead of hanging (60 seconds)
188
187
  await asyncio.wait_for(client.initialize(), timeout=60.0)
189
188
  initialized = True
190
189
  initialize_ms = int((time.time() - start_time) * 1000)
191
190
 
192
- # Get tools
193
- tools = await client.list_tools()
194
-
195
- # Extract tool information
196
- tool_info = []
197
- for tool in tools:
198
- tool_dict = {"name": tool.name, "description": tool.description}
199
- if hasattr(tool, "inputSchema") and tool.inputSchema:
200
- tool_dict["inputSchema"] = tool.inputSchema
201
- tool_info.append(tool_dict)
191
+ # Delegate to standard analysis helper for consistency
192
+ full_analysis = await client.analyze_environment()
202
193
 
194
+ # Normalize to build's expected fields
195
+ tools_list = full_analysis.get("tools", [])
203
196
  return {
204
197
  "initializeMs": initialize_ms,
205
- "toolCount": len(tools),
206
- "tools": tool_info,
198
+ "toolCount": len(tools_list),
199
+ "tools": tools_list,
207
200
  "success": True,
208
201
  }
209
202
  except TimeoutError:
@@ -295,6 +288,10 @@ def build_environment(
295
288
  hud_console.error(f"Directory not found: {directory}")
296
289
  raise typer.Exit(1)
297
290
 
291
+ from hud.cli.utils.docker import require_docker_running
292
+
293
+ require_docker_running()
294
+
298
295
  # Step 1: Check for hud.lock.yaml (previous build)
299
296
  lock_path = env_dir / "hud.lock.yaml"
300
297
  base_name = None
@@ -355,13 +352,24 @@ def build_environment(
355
352
 
356
353
  hud_console.success(f"Built temporary image: {temp_tag}")
357
354
 
358
- # Analyze the environment
355
+ # Analyze the environment (merge folder .env if present)
359
356
  hud_console.progress_message("Analyzing MCP environment...")
360
357
 
361
358
  loop = asyncio.new_event_loop()
362
359
  asyncio.set_event_loop(loop)
363
360
  try:
364
- analysis = loop.run_until_complete(analyze_mcp_environment(temp_tag, verbose, env_vars))
361
+ # Merge .env from env_dir for analysis only
362
+ try:
363
+ from hud.cli.utils.docker import load_env_vars_for_dir
364
+
365
+ env_from_file = load_env_vars_for_dir(env_dir)
366
+ except Exception:
367
+ env_from_file = {}
368
+ merged_env_for_analysis = {**env_from_file, **(env_vars or {})}
369
+
370
+ analysis = loop.run_until_complete(
371
+ analyze_mcp_environment(temp_tag, verbose, merged_env_for_analysis)
372
+ )
365
373
  except Exception as e:
366
374
  hud_console.error(f"Failed to analyze MCP environment: {e}")
367
375
  hud_console.info("")
hud/cli/dev.py CHANGED
@@ -504,15 +504,12 @@ def run_docker_dev_server(
504
504
  base_name = image_name.replace(":", "-").replace("/", "-")
505
505
  container_name = f"{base_name}-dev-{pid}"
506
506
 
507
- # Build docker run command with volume mounts
508
- docker_cmd = [
509
- "docker",
510
- "run",
511
- "--rm",
512
- "-i",
507
+ # Build docker run command with volume mounts and folder-mode envs
508
+ from .utils.docker import create_docker_run_command
509
+
510
+ base_args = [
513
511
  "--name",
514
512
  container_name,
515
- # Mount both server and environment for hot-reload
516
513
  "-v",
517
514
  f"{env_dir.absolute()}/server:/app/server:rw",
518
515
  "-v",
@@ -524,29 +521,14 @@ def run_docker_dev_server(
524
521
  "-e",
525
522
  "HUD_DEV=1",
526
523
  ]
524
+ combined_args = [*base_args, *docker_args] if docker_args else base_args
525
+ docker_cmd = create_docker_run_command(
526
+ image_name,
527
+ docker_args=combined_args,
528
+ env_dir=env_dir,
529
+ )
527
530
 
528
- # Load .env file if present
529
- env_file = env_dir / ".env"
530
- loaded_env_vars: dict[str, str] = {}
531
- if env_file.exists():
532
- try:
533
- from hud.cli.utils.config import parse_env_file
534
-
535
- env_contents = env_file.read_text(encoding="utf-8")
536
- loaded_env_vars = parse_env_file(env_contents)
537
- for key, value in loaded_env_vars.items():
538
- docker_cmd.extend(["-e", f"{key}={value}"])
539
- if verbose and loaded_env_vars:
540
- hud_console.info(f"Loaded {len(loaded_env_vars)} env var(s) from .env")
541
- except Exception as e:
542
- hud_console.warning(f"Failed to load .env file: {e}")
543
-
544
- # Add user-provided Docker arguments
545
- if docker_args:
546
- docker_cmd.extend(docker_args)
547
-
548
- # Append the image name
549
- docker_cmd.append(image_name)
531
+ # Env flags already injected by create_docker_run_command
550
532
 
551
533
  # Print startup info
552
534
  hud_console.header("HUD Development Mode (Docker)")
hud/cli/eval.py CHANGED
@@ -68,6 +68,50 @@ def get_available_models() -> list[dict[str, str | None]]:
68
68
  return []
69
69
 
70
70
 
71
+ def _build_vllm_config(
72
+ vllm_base_url: str | None,
73
+ model: str | None,
74
+ allowed_tools: list[str] | None,
75
+ verbose: bool,
76
+ ) -> dict[str, Any]:
77
+ """Build configuration for vLLM agent.
78
+
79
+ Args:
80
+ vllm_base_url: Optional base URL for vLLM server
81
+ model: Model name to use
82
+ allowed_tools: Optional list of allowed tools
83
+ verbose: Enable verbose output
84
+
85
+ Returns:
86
+ Dictionary with agent configuration
87
+ """
88
+ # Determine base URL and API key
89
+ if vllm_base_url is not None:
90
+ base_url = vllm_base_url
91
+ api_key = settings.api_key if base_url.startswith(settings.hud_rl_url) else "token-abc123"
92
+ hud_console.info(f"Using vLLM server at {base_url}")
93
+ else:
94
+ base_url = "http://localhost:8000/v1"
95
+ api_key = "token-abc123"
96
+
97
+ config: dict[str, Any] = {
98
+ "api_key": api_key,
99
+ "base_url": base_url,
100
+ "model_name": model or "served-model",
101
+ "verbose": verbose,
102
+ "completion_kwargs": {
103
+ "temperature": 0.7,
104
+ "max_tokens": 2048,
105
+ "tool_choice": "auto",
106
+ },
107
+ }
108
+
109
+ if allowed_tools:
110
+ config["allowed_tools"] = allowed_tools
111
+
112
+ return config
113
+
114
+
71
115
  def build_agent(
72
116
  agent_type: Literal["claude", "openai", "vllm", "litellm", "integration_test"],
73
117
  *,
@@ -86,8 +130,6 @@ def build_agent(
86
130
  elif agent_type == "vllm":
87
131
  # Create a generic OpenAI agent for vLLM server
88
132
  try:
89
- from openai import AsyncOpenAI
90
-
91
133
  from hud.agents.openai_chat_generic import GenericOpenAIChatAgent
92
134
  except ImportError as e:
93
135
  hud_console.error(
@@ -96,36 +138,14 @@ def build_agent(
96
138
  )
97
139
  raise typer.Exit(1) from e
98
140
 
99
- # Determine the base URL to use
100
- if vllm_base_url is not None:
101
- # Use the provided vLLM URL (for custom/local servers)
102
- base_url = vllm_base_url
103
- hud_console.info(f"Using vLLM server at {base_url}")
104
- api_key = (
105
- settings.api_key if base_url.startswith(settings.hud_rl_url) else "token-abc123"
106
- )
107
- else:
108
- # Default to localhost
109
- base_url = "http://localhost:8000/v1"
110
- api_key = "token-abc123"
111
-
112
- # Create OpenAI client for vLLM
113
- openai_client = AsyncOpenAI(
114
- base_url=base_url,
115
- api_key=api_key,
116
- timeout=30.0,
117
- )
118
-
119
- return GenericOpenAIChatAgent(
120
- openai_client=openai_client,
121
- model_name=model or "served-model", # Default model name
141
+ # Use the shared config builder
142
+ config = _build_vllm_config(
143
+ vllm_base_url=vllm_base_url,
144
+ model=model,
145
+ allowed_tools=allowed_tools,
122
146
  verbose=verbose,
123
- completion_kwargs={
124
- "temperature": 0.7,
125
- "max_tokens": 2048,
126
- "tool_choice": "required", # if self.actor_config.force_tool_choice else "auto",
127
- },
128
147
  )
148
+ return GenericOpenAIChatAgent(**config)
129
149
 
130
150
  elif agent_type == "openai":
131
151
  try:
@@ -257,25 +277,17 @@ async def run_single_task(
257
277
  agent_config["allowed_tools"] = allowed_tools
258
278
  elif agent_type == "vllm":
259
279
  # Special handling for vLLM
260
- sample_agent = build_agent(
261
- agent_type,
280
+ from hud.agents.openai_chat_generic import GenericOpenAIChatAgent
281
+
282
+ agent_class = GenericOpenAIChatAgent
283
+
284
+ # Use the shared config builder
285
+ agent_config = _build_vllm_config(
286
+ vllm_base_url=vllm_base_url,
262
287
  model=model,
263
288
  allowed_tools=allowed_tools,
264
289
  verbose=verbose,
265
- vllm_base_url=vllm_base_url,
266
290
  )
267
- agent_config = {
268
- "openai_client": sample_agent.oai,
269
- "model_name": sample_agent.model_name,
270
- "verbose": verbose,
271
- "completion_kwargs": sample_agent.completion_kwargs,
272
- }
273
- if allowed_tools:
274
- agent_config["allowed_tools"] = allowed_tools
275
-
276
- from hud.agents.openai_chat_generic import GenericOpenAIChatAgent
277
-
278
- agent_class = GenericOpenAIChatAgent
279
291
  elif agent_type == "openai":
280
292
  from hud.agents import OperatorAgent
281
293
 
@@ -382,6 +394,7 @@ async def run_full_dataset(
382
394
  dataset_name = f"Dataset: {path.name}" if path.exists() else source.split("/")[-1]
383
395
 
384
396
  # Build agent class + config for run_dataset
397
+ agent_config: dict[str, Any]
385
398
  if agent_type == "integration_test": # --integration-test mode
386
399
  from hud.agents.misc.integration_test_agent import IntegrationTestRunner
387
400
 
@@ -399,24 +412,13 @@ async def run_full_dataset(
399
412
  )
400
413
  raise typer.Exit(1) from e
401
414
 
402
- # Use build_agent to create a sample agent to get the config
403
- sample_agent = build_agent(
404
- agent_type,
415
+ # Use the shared config builder
416
+ agent_config = _build_vllm_config(
417
+ vllm_base_url=vllm_base_url,
405
418
  model=model,
406
419
  allowed_tools=allowed_tools,
407
420
  verbose=verbose,
408
- vllm_base_url=vllm_base_url,
409
421
  )
410
-
411
- # Extract the config from the sample agent
412
- agent_config: dict[str, Any] = {
413
- "openai_client": sample_agent.oai,
414
- "model_name": sample_agent.model_name,
415
- "verbose": verbose,
416
- "completion_kwargs": sample_agent.completion_kwargs,
417
- }
418
- if allowed_tools:
419
- agent_config["allowed_tools"] = allowed_tools
420
422
  elif agent_type == "openai":
421
423
  try:
422
424
  from hud.agents import OperatorAgent
@@ -630,8 +632,6 @@ def eval_command(
630
632
  # Run with verbose output for debugging
631
633
  hud eval task.json --verbose
632
634
  """
633
- from hud.settings import settings
634
-
635
635
  # Always configure basic logging so agent steps can be logged
636
636
  # Set to INFO by default for consistency with run_evaluation.py
637
637
  if very_verbose: