hud-python 0.5.1__py3-none-any.whl → 0.5.13__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (67) hide show
  1. hud/__init__.py +1 -1
  2. hud/agents/__init__.py +65 -6
  3. hud/agents/base.py +33 -15
  4. hud/agents/claude.py +60 -31
  5. hud/agents/gateway.py +42 -0
  6. hud/agents/gemini.py +15 -26
  7. hud/agents/gemini_cua.py +6 -17
  8. hud/agents/misc/response_agent.py +7 -0
  9. hud/agents/openai.py +16 -29
  10. hud/agents/openai_chat.py +3 -19
  11. hud/agents/operator.py +5 -17
  12. hud/agents/resolver.py +70 -0
  13. hud/agents/tests/test_claude.py +2 -4
  14. hud/agents/tests/test_openai.py +2 -1
  15. hud/agents/tests/test_resolver.py +192 -0
  16. hud/agents/types.py +148 -0
  17. hud/cli/__init__.py +34 -3
  18. hud/cli/build.py +37 -5
  19. hud/cli/dev.py +11 -2
  20. hud/cli/eval.py +51 -39
  21. hud/cli/flows/init.py +1 -1
  22. hud/cli/pull.py +1 -1
  23. hud/cli/push.py +9 -2
  24. hud/cli/tests/test_build.py +2 -2
  25. hud/cli/tests/test_push.py +1 -1
  26. hud/cli/utils/metadata.py +1 -1
  27. hud/cli/utils/tests/test_metadata.py +1 -1
  28. hud/clients/mcp_use.py +6 -1
  29. hud/datasets/loader.py +17 -18
  30. hud/datasets/runner.py +16 -10
  31. hud/datasets/tests/test_loader.py +15 -15
  32. hud/environment/__init__.py +5 -3
  33. hud/environment/connection.py +58 -6
  34. hud/environment/connectors/mcp_config.py +29 -1
  35. hud/environment/environment.py +218 -77
  36. hud/environment/router.py +175 -24
  37. hud/environment/scenarios.py +313 -186
  38. hud/environment/tests/test_connectors.py +10 -23
  39. hud/environment/tests/test_environment.py +432 -0
  40. hud/environment/tests/test_local_connectors.py +81 -40
  41. hud/environment/tests/test_scenarios.py +820 -14
  42. hud/eval/context.py +63 -10
  43. hud/eval/instrument.py +4 -2
  44. hud/eval/manager.py +79 -12
  45. hud/eval/task.py +36 -4
  46. hud/eval/tests/test_eval.py +1 -1
  47. hud/eval/tests/test_task.py +147 -1
  48. hud/eval/types.py +2 -0
  49. hud/eval/utils.py +14 -3
  50. hud/patches/mcp_patches.py +178 -21
  51. hud/telemetry/instrument.py +8 -1
  52. hud/telemetry/tests/test_eval_telemetry.py +8 -8
  53. hud/tools/__init__.py +2 -0
  54. hud/tools/agent.py +223 -0
  55. hud/tools/computer/__init__.py +34 -5
  56. hud/tools/shell.py +3 -3
  57. hud/tools/tests/test_agent_tool.py +355 -0
  58. hud/types.py +62 -34
  59. hud/utils/hud_console.py +30 -17
  60. hud/utils/strict_schema.py +1 -1
  61. hud/utils/tests/test_version.py +1 -1
  62. hud/version.py +1 -1
  63. {hud_python-0.5.1.dist-info → hud_python-0.5.13.dist-info}/METADATA +2 -2
  64. {hud_python-0.5.1.dist-info → hud_python-0.5.13.dist-info}/RECORD +67 -61
  65. {hud_python-0.5.1.dist-info → hud_python-0.5.13.dist-info}/WHEEL +0 -0
  66. {hud_python-0.5.1.dist-info → hud_python-0.5.13.dist-info}/entry_points.txt +0 -0
  67. {hud_python-0.5.1.dist-info → hud_python-0.5.13.dist-info}/licenses/LICENSE +0 -0
hud/__init__.py CHANGED
@@ -18,7 +18,7 @@ from .telemetry.instrument import instrument
18
18
  def trace(*args: object, **kwargs: object) -> EvalContext:
19
19
  """Deprecated: Use hud.eval() instead.
20
20
 
21
- .. deprecated:: 0.5.1
21
+ .. deprecated:: 0.5.2
22
22
  hud.trace() is deprecated. Use hud.eval() or env.eval() instead.
23
23
  """
24
24
  warnings.warn(
hud/agents/__init__.py CHANGED
@@ -1,19 +1,78 @@
1
1
  from __future__ import annotations
2
2
 
3
+ from typing import Any
4
+
3
5
  from .base import MCPAgent
4
6
  from .openai import OpenAIAgent
5
7
  from .openai_chat import OpenAIChatAgent
6
8
  from .operator import OperatorAgent
7
9
 
8
- # Note: These agents are not exported here to avoid requiring optional dependencies.
9
- # Import directly if needed:
10
- # from hud.agents.claude import ClaudeAgent # requires anthropic
11
- # from hud.agents.gemini import GeminiAgent # requires google-genai
12
- # from hud.agents.gemini_cua import GeminiCUAAgent # requires google-genai
13
-
14
10
  __all__ = [
15
11
  "MCPAgent",
16
12
  "OpenAIAgent",
17
13
  "OpenAIChatAgent",
18
14
  "OperatorAgent",
15
+ "create_agent",
19
16
  ]
17
+
18
+
19
+ def create_agent(model: str, **kwargs: Any) -> MCPAgent:
20
+ """Create an agent for a gateway model.
21
+
22
+ This routes ALL requests through the HUD gateway. For direct API access
23
+ (using your own API keys), use the agent classes directly.
24
+
25
+ Args:
26
+ model: Model name (e.g., "gpt-4o", "claude-sonnet-4-5").
27
+ **kwargs: Additional params passed to agent.create().
28
+
29
+ Returns:
30
+ Configured MCPAgent instance with gateway routing.
31
+
32
+ Example:
33
+ ```python
34
+ # Gateway routing (recommended)
35
+ agent = create_agent("gpt-4o")
36
+ agent = create_agent("claude-sonnet-4-5", temperature=0.7)
37
+
38
+ # Direct API access (use agent classes)
39
+ from hud.agents.claude import ClaudeAgent
40
+
41
+ agent = ClaudeAgent.create(model="claude-sonnet-4-5")
42
+ ```
43
+ """
44
+ from hud.agents.gateway import build_gateway_client
45
+ from hud.agents.resolver import resolve_cls
46
+
47
+ # Resolve class and gateway info
48
+ agent_cls, gateway_info = resolve_cls(model)
49
+
50
+ # Get model ID from gateway info or use input
51
+ model_id = model
52
+ if gateway_info:
53
+ model_id = gateway_info.get("model") or gateway_info.get("id") or model
54
+
55
+ # Determine provider: from gateway info, or infer from agent class
56
+ if gateway_info:
57
+ provider = gateway_info.get("provider") or "openai"
58
+ else:
59
+ provider = "openai"
60
+ if agent_cls.__name__ == "ClaudeAgent":
61
+ provider = "anthropic"
62
+ elif agent_cls.__name__ in ("GeminiAgent", "GeminiCUAAgent"):
63
+ provider = "gemini"
64
+
65
+ client = build_gateway_client(provider)
66
+
67
+ # Set up kwargs
68
+ kwargs.setdefault("model", model_id)
69
+
70
+ # Use correct client key based on agent type
71
+ if agent_cls == OpenAIChatAgent:
72
+ kwargs.setdefault("openai_client", client)
73
+ else:
74
+ # Claude and other agents use model_client and validate_api_key
75
+ kwargs.setdefault("model_client", client)
76
+ kwargs.setdefault("validate_api_key", False)
77
+
78
+ return agent_cls.create(**kwargs)
hud/agents/base.py CHANGED
@@ -9,11 +9,12 @@ from abc import ABC, abstractmethod
9
9
  from typing import TYPE_CHECKING, Any, ClassVar, Literal
10
10
 
11
11
  import mcp.types as types
12
- from pydantic import BaseModel, ConfigDict
13
12
 
14
13
  from hud.types import AgentResponse, BaseAgentConfig, MCPToolCall, MCPToolResult, Trace
15
14
  from hud.utils.hud_console import HUDConsole
16
15
 
16
+ from .types import BaseCreateParams
17
+
17
18
  if TYPE_CHECKING:
18
19
  from hud.environment import Environment
19
20
  from hud.eval.context import EvalContext
@@ -22,18 +23,6 @@ if TYPE_CHECKING:
22
23
  logger = logging.getLogger(__name__)
23
24
 
24
25
 
25
- class BaseCreateParams(BaseModel):
26
- """Runtime parameters for agent creation."""
27
-
28
- model_config = ConfigDict(arbitrary_types_allowed=True)
29
-
30
- # Primary way to bind agent to execution context (v5)
31
- ctx: Any | None = None # EvalContext or Environment - agent uses this for tool calls
32
-
33
- auto_respond: bool = False
34
- verbose: bool = False
35
-
36
-
37
26
  class MCPAgent(ABC):
38
27
  """
39
28
  Base class for MCP-enabled agents.
@@ -208,7 +197,21 @@ class MCPAgent(ABC):
208
197
  await self._initialize_from_ctx(ctx)
209
198
 
210
199
  try:
211
- result = await self._run_context(text_to_blocks(ctx.prompt), max_steps=max_steps)
200
+ # Build initial context - optionally append setup tool output
201
+ # Check ctx first (task-level override), then fall back to agent config
202
+ append_setup = getattr(ctx, "append_setup_output", False) or getattr(
203
+ self.config, "append_setup_output", False
204
+ )
205
+ initial_prompt = ctx.prompt
206
+ if append_setup:
207
+ setup_output = getattr(ctx, "setup_output", None)
208
+ if setup_output:
209
+ initial_prompt = f"{initial_prompt}\n\n{setup_output}"
210
+
211
+ # Build initial blocks (text prompt + optional screenshot)
212
+ initial_blocks = text_to_blocks(initial_prompt)
213
+
214
+ result = await self._run_context(initial_blocks, max_steps=max_steps)
212
215
 
213
216
  # Propagate error state to context for platform visibility
214
217
  if result.isError and hasattr(ctx, "error"):
@@ -342,8 +345,17 @@ class MCPAgent(ABC):
342
345
  is_error = False
343
346
 
344
347
  # Ensure all parameters are the correct type
348
+ # Use ctx.reward if already set (e.g., from scenario evaluate), otherwise 0.0
349
+ # Note: For v4 tasks with evaluate_tool, reward is set in __aexit__ after this returns,
350
+ # so callers should prefer ctx.reward over Trace.reward for the final result.
351
+ reward = 0.0
352
+ if self.ctx is not None:
353
+ ctx_reward = getattr(self.ctx, "reward", None)
354
+ if ctx_reward is not None:
355
+ reward = ctx_reward
356
+
345
357
  trace_params = {
346
- "reward": 0.0,
358
+ "reward": reward,
347
359
  "done": True,
348
360
  "messages": messages,
349
361
  "content": final_response.content if final_response else error,
@@ -519,8 +531,14 @@ def find_reward(result: MCPToolResult) -> float:
519
531
 
520
532
  Agent accepts "reward", "grade", "score", or weighted subscores
521
533
 
534
+ If isError is True, return 0.0 (error results should not contribute positive reward).
522
535
  If not found, return 0.0
523
536
  """
537
+ # Error results should return 0.0 - don't extract reward from error responses
538
+ if result.isError:
539
+ logger.warning("Evaluate tool returned error, using reward=0.0")
540
+ return 0.0
541
+
524
542
  accept_keys = ["reward", "grade", "score"]
525
543
 
526
544
  # Check for direct reward/grade/score keys
hud/agents/claude.py CHANGED
@@ -5,16 +5,18 @@ from __future__ import annotations
5
5
  import copy
6
6
  import logging
7
7
  from inspect import cleandoc
8
- from typing import Any, ClassVar, Literal, cast
8
+ from typing import TYPE_CHECKING, Any, ClassVar, Literal, cast
9
9
 
10
10
  import mcp.types as types
11
11
  from anthropic import AsyncAnthropic, AsyncAnthropicBedrock, Omit
12
12
  from anthropic.types import CacheControlEphemeralParam
13
13
  from anthropic.types.beta import (
14
14
  BetaBase64ImageSourceParam,
15
+ BetaBase64PDFSourceParam,
15
16
  BetaContentBlockParam,
16
17
  BetaImageBlockParam,
17
18
  BetaMessageParam,
19
+ BetaRequestDocumentBlockParam,
18
20
  BetaTextBlockParam,
19
21
  BetaToolBash20250124Param,
20
22
  BetaToolComputerUse20250124Param,
@@ -23,7 +25,6 @@ from anthropic.types.beta import (
23
25
  BetaToolTextEditor20250728Param,
24
26
  BetaToolUnionParam,
25
27
  )
26
- from pydantic import ConfigDict
27
28
 
28
29
  from hud.settings import settings
29
30
  from hud.tools.computer.settings import computer_settings
@@ -31,24 +32,13 @@ from hud.types import AgentResponse, BaseAgentConfig, MCPToolCall, MCPToolResult
31
32
  from hud.utils.hud_console import HUDConsole
32
33
  from hud.utils.types import with_signature
33
34
 
34
- from .base import BaseCreateParams, MCPAgent
35
-
36
- logger = logging.getLogger(__name__)
37
-
38
-
39
- class ClaudeConfig(BaseAgentConfig):
40
- model_config = ConfigDict(arbitrary_types_allowed=True)
41
-
42
- model_name: str = "Claude"
43
- model: str = "claude-sonnet-4-5"
44
- model_client: AsyncAnthropic | AsyncAnthropicBedrock | None = None
45
- max_tokens: int = 16384
46
- use_computer_beta: bool = True
47
- validate_api_key: bool = True
35
+ from .base import MCPAgent
36
+ from .types import ClaudeConfig, ClaudeCreateParams
48
37
 
38
+ if TYPE_CHECKING:
39
+ from collections.abc import Sequence
49
40
 
50
- class ClaudeCreateParams(BaseCreateParams, ClaudeConfig):
51
- pass
41
+ logger = logging.getLogger(__name__)
52
42
 
53
43
 
54
44
  class ClaudeAgent(MCPAgent):
@@ -76,12 +66,20 @@ class ClaudeAgent(MCPAgent):
76
66
 
77
67
  model_client = self.config.model_client
78
68
  if model_client is None:
79
- api_key = settings.anthropic_api_key
80
- if not api_key:
81
- raise ValueError("Anthropic API key not found. Set ANTHROPIC_API_KEY.")
82
- model_client = AsyncAnthropic(api_key=api_key)
69
+ # Default to HUD gateway when HUD_API_KEY is available
70
+ if settings.api_key:
71
+ from hud.agents.gateway import build_gateway_client
83
72
 
84
- self.anthropic_client = model_client
73
+ model_client = build_gateway_client("anthropic")
74
+ elif settings.anthropic_api_key:
75
+ model_client = AsyncAnthropic(api_key=settings.anthropic_api_key)
76
+ else:
77
+ raise ValueError(
78
+ "No API key found. Set HUD_API_KEY for HUD gateway, "
79
+ "or ANTHROPIC_API_KEY for direct Anthropic access."
80
+ )
81
+
82
+ self.anthropic_client: AsyncAnthropic | AsyncAnthropicBedrock = model_client
85
83
  self.max_tokens = self.config.max_tokens
86
84
  self.use_computer_beta = self.config.use_computer_beta
87
85
  self.hud_console = HUDConsole(logger=logger)
@@ -212,7 +210,10 @@ class ClaudeAgent(MCPAgent):
212
210
  async def format_tool_results(
213
211
  self, tool_calls: list[MCPToolCall], tool_results: list[MCPToolResult]
214
212
  ) -> list[BetaMessageParam]:
215
- """Format tool results into Claude messages."""
213
+ """Format tool results into Claude messages.
214
+
215
+ Handles EmbeddedResource (PDFs), images, and text content.
216
+ """
216
217
  # Process each tool result
217
218
  user_content = []
218
219
 
@@ -224,7 +225,9 @@ class ClaudeAgent(MCPAgent):
224
225
  continue
225
226
 
226
227
  # Convert MCP tool results to Claude format
227
- claude_blocks = []
228
+ claude_blocks: list[
229
+ BetaTextBlockParam | BetaImageBlockParam | BetaRequestDocumentBlockParam
230
+ ] = []
228
231
 
229
232
  if result.isError:
230
233
  # Extract error message from content
@@ -241,6 +244,16 @@ class ClaudeAgent(MCPAgent):
241
244
  claude_blocks.append(text_to_content_block(content.text))
242
245
  elif isinstance(content, types.ImageContent):
243
246
  claude_blocks.append(base64_to_content_block(content.data))
247
+ elif isinstance(content, types.EmbeddedResource):
248
+ # Handle embedded resources (PDFs)
249
+ resource = content.resource
250
+ if (
251
+ isinstance(resource, types.BlobResourceContents)
252
+ and resource.mimeType == "application/pdf"
253
+ ):
254
+ claude_blocks.append(
255
+ document_to_content_block(base64_data=resource.blob)
256
+ )
244
257
 
245
258
  # Add tool result
246
259
  user_content.append(tool_use_content_block(tool_use_id, claude_blocks))
@@ -295,7 +308,7 @@ class ClaudeAgent(MCPAgent):
295
308
  display_width_px=computer_settings.ANTHROPIC_COMPUTER_WIDTH,
296
309
  display_height_px=computer_settings.ANTHROPIC_COMPUTER_HEIGHT,
297
310
  )
298
- elif tool.name == "computer":
311
+ elif tool.name == "computer" or tool.name.endswith("_computer"):
299
312
  logger.warning(
300
313
  "Renamed tool %s to 'computer', dropping original 'computer' tool",
301
314
  selected_computer_tool.name,
@@ -322,11 +335,14 @@ class ClaudeAgent(MCPAgent):
322
335
  self.claude_tools = []
323
336
  for tool in available_tools:
324
337
  claude_tool = to_api_tool(tool)
325
- if claude_tool is None or "name" not in claude_tool:
338
+ if claude_tool is None:
326
339
  continue
327
- if claude_tool["name"] == "computer":
340
+ tool_name = claude_tool.get("name")
341
+ if tool_name is None:
342
+ continue
343
+ if tool_name == "computer":
328
344
  self.has_computer_tool = True
329
- self.tool_mapping[claude_tool["name"]] = tool.name
345
+ self.tool_mapping[tool_name] = tool.name
330
346
  self.claude_tools.append(claude_tool)
331
347
 
332
348
  def _add_prompt_caching(self, messages: list[BetaMessageParam]) -> list[BetaMessageParam]:
@@ -372,8 +388,21 @@ def text_to_content_block(text: str) -> BetaTextBlockParam:
372
388
  return {"type": "text", "text": text}
373
389
 
374
390
 
391
+ def document_to_content_block(base64_data: str) -> BetaRequestDocumentBlockParam:
392
+ """Convert base64 PDF to Claude document content block."""
393
+ return BetaRequestDocumentBlockParam(
394
+ type="document",
395
+ source=BetaBase64PDFSourceParam(
396
+ type="base64",
397
+ media_type="application/pdf",
398
+ data=base64_data,
399
+ ),
400
+ )
401
+
402
+
375
403
  def tool_use_content_block(
376
- tool_use_id: str, content: list[BetaTextBlockParam | BetaImageBlockParam]
404
+ tool_use_id: str,
405
+ content: Sequence[BetaTextBlockParam | BetaImageBlockParam | BetaRequestDocumentBlockParam],
377
406
  ) -> BetaToolResultBlockParam:
378
407
  """Create tool result content block."""
379
- return {"type": "tool_result", "tool_use_id": tool_use_id, "content": content}
408
+ return {"type": "tool_result", "tool_use_id": tool_use_id, "content": content} # pyright: ignore[reportReturnType]
hud/agents/gateway.py ADDED
@@ -0,0 +1,42 @@
1
+ """Gateway client utilities for HUD inference gateway."""
2
+
3
+ from __future__ import annotations
4
+
5
+ from typing import Any
6
+
7
+
8
+ def build_gateway_client(provider: str) -> Any:
9
+ """Build a client configured for HUD gateway routing.
10
+
11
+ Args:
12
+ provider: Provider name ("anthropic", "openai", "gemini", etc.)
13
+
14
+ Returns:
15
+ Configured async client for the provider.
16
+ """
17
+ from hud.settings import settings
18
+
19
+ provider = provider.lower()
20
+
21
+ if provider == "anthropic":
22
+ from anthropic import AsyncAnthropic
23
+
24
+ return AsyncAnthropic(api_key=settings.api_key, base_url=settings.hud_gateway_url)
25
+
26
+ if provider == "gemini":
27
+ from google import genai
28
+ from google.genai.types import HttpOptions
29
+
30
+ return genai.Client(
31
+ api_key="PLACEHOLDER",
32
+ http_options=HttpOptions(
33
+ api_version="v1beta",
34
+ base_url=settings.hud_gateway_url,
35
+ headers={"Authorization": f"Bearer {settings.api_key}"},
36
+ ),
37
+ )
38
+
39
+ # OpenAI-compatible (openai, azure, together, groq, fireworks, etc.)
40
+ from openai import AsyncOpenAI
41
+
42
+ return AsyncOpenAI(api_key=settings.api_key, base_url=settings.hud_gateway_url)
hud/agents/gemini.py CHANGED
@@ -8,37 +8,18 @@ from typing import Any, ClassVar, cast
8
8
  import mcp.types as types
9
9
  from google import genai
10
10
  from google.genai import types as genai_types
11
- from pydantic import ConfigDict
12
11
 
13
12
  from hud.settings import settings
14
13
  from hud.types import AgentResponse, BaseAgentConfig, MCPToolCall, MCPToolResult
15
14
  from hud.utils.hud_console import HUDConsole
16
15
  from hud.utils.types import with_signature
17
16
 
18
- from .base import BaseCreateParams, MCPAgent
17
+ from .base import MCPAgent
18
+ from .types import GeminiConfig, GeminiCreateParams
19
19
 
20
20
  logger = logging.getLogger(__name__)
21
21
 
22
22
 
23
- class GeminiConfig(BaseAgentConfig):
24
- """Configuration for `GeminiAgent`."""
25
-
26
- model_config = ConfigDict(arbitrary_types_allowed=True)
27
-
28
- model_name: str = "Gemini"
29
- model: str = "gemini-3-pro-preview"
30
- model_client: genai.Client | None = None
31
- temperature: float = 1.0
32
- top_p: float = 0.95
33
- top_k: int = 40
34
- max_output_tokens: int = 8192
35
- validate_api_key: bool = True
36
-
37
-
38
- class GeminiCreateParams(BaseCreateParams, GeminiConfig):
39
- pass
40
-
41
-
42
23
  class GeminiAgent(MCPAgent):
43
24
  """
44
25
  Gemini agent that uses MCP servers for tool execution.
@@ -61,10 +42,18 @@ class GeminiAgent(MCPAgent):
61
42
 
62
43
  model_client = self.config.model_client
63
44
  if model_client is None:
64
- api_key = settings.gemini_api_key
65
- if not api_key:
66
- raise ValueError("Gemini API key not found. Set GEMINI_API_KEY.")
67
- model_client = genai.Client(api_key=api_key)
45
+ # Default to HUD gateway when HUD_API_KEY is available
46
+ if settings.api_key:
47
+ from hud.agents.gateway import build_gateway_client
48
+
49
+ model_client = build_gateway_client("gemini")
50
+ elif settings.gemini_api_key:
51
+ model_client = genai.Client(api_key=settings.gemini_api_key)
52
+ else:
53
+ raise ValueError(
54
+ "No API key found. Set HUD_API_KEY for HUD gateway, "
55
+ "or GEMINI_API_KEY for direct Gemini access."
56
+ )
68
57
 
69
58
  if self.config.validate_api_key:
70
59
  try:
@@ -72,7 +61,7 @@ class GeminiAgent(MCPAgent):
72
61
  except Exception as e:
73
62
  raise ValueError(f"Gemini API key is invalid: {e}") from e
74
63
 
75
- self.gemini_client = model_client
64
+ self.gemini_client: genai.Client = model_client
76
65
  self.temperature = self.config.temperature
77
66
  self.top_p = self.config.top_p
78
67
  self.top_k = self.config.top_k
hud/agents/gemini_cua.py CHANGED
@@ -7,14 +7,14 @@ from typing import Any, ClassVar
7
7
 
8
8
  import mcp.types as types
9
9
  from google.genai import types as genai_types
10
- from pydantic import ConfigDict, Field
11
10
 
12
11
  from hud.tools.computer.settings import computer_settings
13
12
  from hud.types import AgentResponse, BaseAgentConfig, MCPToolCall, MCPToolResult
14
13
  from hud.utils.types import with_signature
15
14
 
16
- from .base import BaseCreateParams, MCPAgent
17
- from .gemini import GeminiAgent, GeminiConfig
15
+ from .base import MCPAgent
16
+ from .gemini import GeminiAgent
17
+ from .types import GeminiCUAConfig, GeminiCUACreateParams
18
18
 
19
19
  logger = logging.getLogger(__name__)
20
20
 
@@ -56,20 +56,6 @@ what they asked.
56
56
  """.strip()
57
57
 
58
58
 
59
- class GeminiCUAConfig(GeminiConfig):
60
- """Configuration for `GeminiCUAAgent`."""
61
-
62
- model_config = ConfigDict(arbitrary_types_allowed=True)
63
-
64
- model_name: str = "GeminiCUA"
65
- model: str = "gemini-2.5-computer-use-preview-10-2025"
66
- excluded_predefined_functions: list[str] = Field(default_factory=list)
67
-
68
-
69
- class GeminiCUACreateParams(BaseCreateParams, GeminiCUAConfig):
70
- pass
71
-
72
-
73
59
  class GeminiCUAAgent(GeminiAgent):
74
60
  """
75
61
  Gemini Computer Use Agent that extends GeminiAgent with computer use capabilities.
@@ -123,6 +109,9 @@ class GeminiCUAAgent(GeminiAgent):
123
109
  )
124
110
  )
125
111
 
112
+ if tool.name == "computer" or tool.name.endswith("_computer"):
113
+ return None
114
+
126
115
  # For non-computer tools, use the parent implementation
127
116
  return super()._to_gemini_tool(tool)
128
117
 
@@ -6,6 +6,7 @@ from typing import Literal
6
6
  from openai import AsyncOpenAI
7
7
 
8
8
  from hud.settings import settings
9
+ from hud.telemetry import instrument
9
10
 
10
11
  logger = logging.getLogger(__name__)
11
12
 
@@ -64,6 +65,11 @@ class ResponseAgent:
64
65
  self.model = model
65
66
  self.system_prompt = system_prompt or DEFAULT_SYSTEM_PROMPT
66
67
 
68
+ @instrument(
69
+ category="agent",
70
+ name="response_agent",
71
+ internal_type="user-message",
72
+ )
67
73
  async def determine_response(self, agent_message: str) -> ResponseType:
68
74
  """
69
75
  Determine whether the agent should stop or continue based on its message.
@@ -86,6 +92,7 @@ class ResponseAgent:
86
92
  ],
87
93
  temperature=0.1,
88
94
  max_tokens=5,
95
+ extra_headers={"Trace-Id": ""},
89
96
  )
90
97
 
91
98
  response_text = response.choices[0].message.content
hud/agents/openai.py CHANGED
@@ -29,39 +29,18 @@ from openai.types.responses import (
29
29
  from openai.types.responses.response_create_params import ToolChoice # noqa: TC002
30
30
  from openai.types.responses.response_input_param import FunctionCallOutput, Message
31
31
  from openai.types.shared_params.reasoning import Reasoning # noqa: TC002
32
- from pydantic import ConfigDict
33
32
 
34
33
  from hud.settings import settings
35
34
  from hud.types import AgentResponse, BaseAgentConfig, MCPToolCall, MCPToolResult, Trace
36
35
  from hud.utils.strict_schema import ensure_strict_json_schema
37
36
  from hud.utils.types import with_signature
38
37
 
39
- from .base import BaseCreateParams, MCPAgent
38
+ from .base import MCPAgent
39
+ from .types import OpenAIConfig, OpenAICreateParams
40
40
 
41
41
  logger = logging.getLogger(__name__)
42
42
 
43
43
 
44
- class OpenAIConfig(BaseAgentConfig):
45
- """Configuration model for `OpenAIAgent`."""
46
-
47
- model_config = ConfigDict(arbitrary_types_allowed=True)
48
-
49
- model_name: str = "OpenAI"
50
- model: str = "gpt-5.1"
51
- model_client: AsyncOpenAI | None = None
52
- max_output_tokens: int | None = None
53
- temperature: float | None = None
54
- reasoning: Reasoning | None = None
55
- tool_choice: ToolChoice | None = None
56
- truncation: Literal["auto", "disabled"] | None = None
57
- parallel_tool_calls: bool | None = None
58
- validate_api_key: bool = True
59
-
60
-
61
- class OpenAICreateParams(BaseCreateParams, OpenAIConfig):
62
- pass
63
-
64
-
65
44
  class OpenAIAgent(MCPAgent):
66
45
  """Generic OpenAI agent that can execute MCP tools through the Responses API."""
67
46
 
@@ -79,10 +58,18 @@ class OpenAIAgent(MCPAgent):
79
58
 
80
59
  model_client = self.config.model_client
81
60
  if model_client is None:
82
- api_key = settings.openai_api_key
83
- if not api_key:
84
- raise ValueError("OpenAI API key not found. Set OPENAI_API_KEY.")
85
- model_client = AsyncOpenAI(api_key=api_key)
61
+ # Default to HUD gateway when HUD_API_KEY is available
62
+ if settings.api_key:
63
+ from hud.agents.gateway import build_gateway_client
64
+
65
+ model_client = build_gateway_client("openai")
66
+ elif settings.openai_api_key:
67
+ model_client = AsyncOpenAI(api_key=settings.openai_api_key)
68
+ else:
69
+ raise ValueError(
70
+ "No API key found. Set HUD_API_KEY for HUD gateway, "
71
+ "or OPENAI_API_KEY for direct OpenAI access."
72
+ )
86
73
 
87
74
  if self.config.validate_api_key:
88
75
  try:
@@ -90,11 +77,11 @@ class OpenAIAgent(MCPAgent):
90
77
  except Exception as exc: # pragma: no cover - network validation
91
78
  raise ValueError(f"OpenAI API key is invalid: {exc}") from exc
92
79
 
93
- self.openai_client = model_client
80
+ self.openai_client: AsyncOpenAI = model_client
94
81
  self._model = self.config.model
95
82
  self.max_output_tokens = self.config.max_output_tokens
96
83
  self.temperature = self.config.temperature
97
- self.reasoning = self.config.reasoning
84
+ self.reasoning: Reasoning | None = self.config.reasoning
98
85
  self.tool_choice: ToolChoice | None = self.config.tool_choice
99
86
  self.parallel_tool_calls = self.config.parallel_tool_calls
100
87
  self.truncation: Literal["auto", "disabled"] | None = self.config.truncation
hud/agents/openai_chat.py CHANGED
@@ -22,14 +22,14 @@ from typing import TYPE_CHECKING, Any, ClassVar, cast
22
22
 
23
23
  import mcp.types as types
24
24
  from openai import AsyncOpenAI
25
- from pydantic import ConfigDict, Field
26
25
 
27
26
  from hud.settings import settings
28
27
  from hud.types import AgentResponse, BaseAgentConfig, MCPToolCall, MCPToolResult
29
28
  from hud.utils.hud_console import HUDConsole
30
29
  from hud.utils.types import with_signature
31
30
 
32
- from .base import BaseCreateParams, MCPAgent
31
+ from .base import MCPAgent
32
+ from .types import OpenAIChatConfig, OpenAIChatCreateParams
33
33
 
34
34
  if TYPE_CHECKING:
35
35
  from openai.types.chat import ChatCompletionToolParam
@@ -38,23 +38,6 @@ if TYPE_CHECKING:
38
38
  logger = logging.getLogger(__name__)
39
39
 
40
40
 
41
- class OpenAIChatConfig(BaseAgentConfig):
42
- """Configuration for `OpenAIChatAgent`."""
43
-
44
- model_config = ConfigDict(arbitrary_types_allowed=True)
45
-
46
- model_name: str = "OpenAI Chat"
47
- model: str = "gpt-5-mini"
48
- openai_client: AsyncOpenAI | None = None
49
- api_key: str | None = None
50
- base_url: str | None = None
51
- completion_kwargs: dict[str, Any] = Field(default_factory=dict)
52
-
53
-
54
- class OpenAIChatCreateParams(BaseCreateParams, OpenAIChatConfig):
55
- pass
56
-
57
-
58
41
  class OpenAIChatAgent(MCPAgent):
59
42
  """MCP-enabled agent that speaks the OpenAI *chat.completions* protocol."""
60
43
 
@@ -82,6 +65,7 @@ class OpenAIChatAgent(MCPAgent):
82
65
  "Use HUD_API_KEY for gateway auth and BYOK headers for provider keys."
83
66
  )
84
67
 
68
+ self.oai: AsyncOpenAI
85
69
  if self.config.openai_client is not None:
86
70
  self.oai = self.config.openai_client
87
71
  elif self.config.api_key is not None or self.config.base_url is not None: