hud-python 0.5.1__py3-none-any.whl → 0.5.13__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- hud/__init__.py +1 -1
- hud/agents/__init__.py +65 -6
- hud/agents/base.py +33 -15
- hud/agents/claude.py +60 -31
- hud/agents/gateway.py +42 -0
- hud/agents/gemini.py +15 -26
- hud/agents/gemini_cua.py +6 -17
- hud/agents/misc/response_agent.py +7 -0
- hud/agents/openai.py +16 -29
- hud/agents/openai_chat.py +3 -19
- hud/agents/operator.py +5 -17
- hud/agents/resolver.py +70 -0
- hud/agents/tests/test_claude.py +2 -4
- hud/agents/tests/test_openai.py +2 -1
- hud/agents/tests/test_resolver.py +192 -0
- hud/agents/types.py +148 -0
- hud/cli/__init__.py +34 -3
- hud/cli/build.py +37 -5
- hud/cli/dev.py +11 -2
- hud/cli/eval.py +51 -39
- hud/cli/flows/init.py +1 -1
- hud/cli/pull.py +1 -1
- hud/cli/push.py +9 -2
- hud/cli/tests/test_build.py +2 -2
- hud/cli/tests/test_push.py +1 -1
- hud/cli/utils/metadata.py +1 -1
- hud/cli/utils/tests/test_metadata.py +1 -1
- hud/clients/mcp_use.py +6 -1
- hud/datasets/loader.py +17 -18
- hud/datasets/runner.py +16 -10
- hud/datasets/tests/test_loader.py +15 -15
- hud/environment/__init__.py +5 -3
- hud/environment/connection.py +58 -6
- hud/environment/connectors/mcp_config.py +29 -1
- hud/environment/environment.py +218 -77
- hud/environment/router.py +175 -24
- hud/environment/scenarios.py +313 -186
- hud/environment/tests/test_connectors.py +10 -23
- hud/environment/tests/test_environment.py +432 -0
- hud/environment/tests/test_local_connectors.py +81 -40
- hud/environment/tests/test_scenarios.py +820 -14
- hud/eval/context.py +63 -10
- hud/eval/instrument.py +4 -2
- hud/eval/manager.py +79 -12
- hud/eval/task.py +36 -4
- hud/eval/tests/test_eval.py +1 -1
- hud/eval/tests/test_task.py +147 -1
- hud/eval/types.py +2 -0
- hud/eval/utils.py +14 -3
- hud/patches/mcp_patches.py +178 -21
- hud/telemetry/instrument.py +8 -1
- hud/telemetry/tests/test_eval_telemetry.py +8 -8
- hud/tools/__init__.py +2 -0
- hud/tools/agent.py +223 -0
- hud/tools/computer/__init__.py +34 -5
- hud/tools/shell.py +3 -3
- hud/tools/tests/test_agent_tool.py +355 -0
- hud/types.py +62 -34
- hud/utils/hud_console.py +30 -17
- hud/utils/strict_schema.py +1 -1
- hud/utils/tests/test_version.py +1 -1
- hud/version.py +1 -1
- {hud_python-0.5.1.dist-info → hud_python-0.5.13.dist-info}/METADATA +2 -2
- {hud_python-0.5.1.dist-info → hud_python-0.5.13.dist-info}/RECORD +67 -61
- {hud_python-0.5.1.dist-info → hud_python-0.5.13.dist-info}/WHEEL +0 -0
- {hud_python-0.5.1.dist-info → hud_python-0.5.13.dist-info}/entry_points.txt +0 -0
- {hud_python-0.5.1.dist-info → hud_python-0.5.13.dist-info}/licenses/LICENSE +0 -0
hud/__init__.py
CHANGED
|
@@ -18,7 +18,7 @@ from .telemetry.instrument import instrument
|
|
|
18
18
|
def trace(*args: object, **kwargs: object) -> EvalContext:
|
|
19
19
|
"""Deprecated: Use hud.eval() instead.
|
|
20
20
|
|
|
21
|
-
.. deprecated:: 0.5.
|
|
21
|
+
.. deprecated:: 0.5.2
|
|
22
22
|
hud.trace() is deprecated. Use hud.eval() or env.eval() instead.
|
|
23
23
|
"""
|
|
24
24
|
warnings.warn(
|
hud/agents/__init__.py
CHANGED
|
@@ -1,19 +1,78 @@
|
|
|
1
1
|
from __future__ import annotations
|
|
2
2
|
|
|
3
|
+
from typing import Any
|
|
4
|
+
|
|
3
5
|
from .base import MCPAgent
|
|
4
6
|
from .openai import OpenAIAgent
|
|
5
7
|
from .openai_chat import OpenAIChatAgent
|
|
6
8
|
from .operator import OperatorAgent
|
|
7
9
|
|
|
8
|
-
# Note: These agents are not exported here to avoid requiring optional dependencies.
|
|
9
|
-
# Import directly if needed:
|
|
10
|
-
# from hud.agents.claude import ClaudeAgent # requires anthropic
|
|
11
|
-
# from hud.agents.gemini import GeminiAgent # requires google-genai
|
|
12
|
-
# from hud.agents.gemini_cua import GeminiCUAAgent # requires google-genai
|
|
13
|
-
|
|
14
10
|
__all__ = [
|
|
15
11
|
"MCPAgent",
|
|
16
12
|
"OpenAIAgent",
|
|
17
13
|
"OpenAIChatAgent",
|
|
18
14
|
"OperatorAgent",
|
|
15
|
+
"create_agent",
|
|
19
16
|
]
|
|
17
|
+
|
|
18
|
+
|
|
19
|
+
def create_agent(model: str, **kwargs: Any) -> MCPAgent:
|
|
20
|
+
"""Create an agent for a gateway model.
|
|
21
|
+
|
|
22
|
+
This routes ALL requests through the HUD gateway. For direct API access
|
|
23
|
+
(using your own API keys), use the agent classes directly.
|
|
24
|
+
|
|
25
|
+
Args:
|
|
26
|
+
model: Model name (e.g., "gpt-4o", "claude-sonnet-4-5").
|
|
27
|
+
**kwargs: Additional params passed to agent.create().
|
|
28
|
+
|
|
29
|
+
Returns:
|
|
30
|
+
Configured MCPAgent instance with gateway routing.
|
|
31
|
+
|
|
32
|
+
Example:
|
|
33
|
+
```python
|
|
34
|
+
# Gateway routing (recommended)
|
|
35
|
+
agent = create_agent("gpt-4o")
|
|
36
|
+
agent = create_agent("claude-sonnet-4-5", temperature=0.7)
|
|
37
|
+
|
|
38
|
+
# Direct API access (use agent classes)
|
|
39
|
+
from hud.agents.claude import ClaudeAgent
|
|
40
|
+
|
|
41
|
+
agent = ClaudeAgent.create(model="claude-sonnet-4-5")
|
|
42
|
+
```
|
|
43
|
+
"""
|
|
44
|
+
from hud.agents.gateway import build_gateway_client
|
|
45
|
+
from hud.agents.resolver import resolve_cls
|
|
46
|
+
|
|
47
|
+
# Resolve class and gateway info
|
|
48
|
+
agent_cls, gateway_info = resolve_cls(model)
|
|
49
|
+
|
|
50
|
+
# Get model ID from gateway info or use input
|
|
51
|
+
model_id = model
|
|
52
|
+
if gateway_info:
|
|
53
|
+
model_id = gateway_info.get("model") or gateway_info.get("id") or model
|
|
54
|
+
|
|
55
|
+
# Determine provider: from gateway info, or infer from agent class
|
|
56
|
+
if gateway_info:
|
|
57
|
+
provider = gateway_info.get("provider") or "openai"
|
|
58
|
+
else:
|
|
59
|
+
provider = "openai"
|
|
60
|
+
if agent_cls.__name__ == "ClaudeAgent":
|
|
61
|
+
provider = "anthropic"
|
|
62
|
+
elif agent_cls.__name__ in ("GeminiAgent", "GeminiCUAAgent"):
|
|
63
|
+
provider = "gemini"
|
|
64
|
+
|
|
65
|
+
client = build_gateway_client(provider)
|
|
66
|
+
|
|
67
|
+
# Set up kwargs
|
|
68
|
+
kwargs.setdefault("model", model_id)
|
|
69
|
+
|
|
70
|
+
# Use correct client key based on agent type
|
|
71
|
+
if agent_cls == OpenAIChatAgent:
|
|
72
|
+
kwargs.setdefault("openai_client", client)
|
|
73
|
+
else:
|
|
74
|
+
# Claude and other agents use model_client and validate_api_key
|
|
75
|
+
kwargs.setdefault("model_client", client)
|
|
76
|
+
kwargs.setdefault("validate_api_key", False)
|
|
77
|
+
|
|
78
|
+
return agent_cls.create(**kwargs)
|
hud/agents/base.py
CHANGED
|
@@ -9,11 +9,12 @@ from abc import ABC, abstractmethod
|
|
|
9
9
|
from typing import TYPE_CHECKING, Any, ClassVar, Literal
|
|
10
10
|
|
|
11
11
|
import mcp.types as types
|
|
12
|
-
from pydantic import BaseModel, ConfigDict
|
|
13
12
|
|
|
14
13
|
from hud.types import AgentResponse, BaseAgentConfig, MCPToolCall, MCPToolResult, Trace
|
|
15
14
|
from hud.utils.hud_console import HUDConsole
|
|
16
15
|
|
|
16
|
+
from .types import BaseCreateParams
|
|
17
|
+
|
|
17
18
|
if TYPE_CHECKING:
|
|
18
19
|
from hud.environment import Environment
|
|
19
20
|
from hud.eval.context import EvalContext
|
|
@@ -22,18 +23,6 @@ if TYPE_CHECKING:
|
|
|
22
23
|
logger = logging.getLogger(__name__)
|
|
23
24
|
|
|
24
25
|
|
|
25
|
-
class BaseCreateParams(BaseModel):
|
|
26
|
-
"""Runtime parameters for agent creation."""
|
|
27
|
-
|
|
28
|
-
model_config = ConfigDict(arbitrary_types_allowed=True)
|
|
29
|
-
|
|
30
|
-
# Primary way to bind agent to execution context (v5)
|
|
31
|
-
ctx: Any | None = None # EvalContext or Environment - agent uses this for tool calls
|
|
32
|
-
|
|
33
|
-
auto_respond: bool = False
|
|
34
|
-
verbose: bool = False
|
|
35
|
-
|
|
36
|
-
|
|
37
26
|
class MCPAgent(ABC):
|
|
38
27
|
"""
|
|
39
28
|
Base class for MCP-enabled agents.
|
|
@@ -208,7 +197,21 @@ class MCPAgent(ABC):
|
|
|
208
197
|
await self._initialize_from_ctx(ctx)
|
|
209
198
|
|
|
210
199
|
try:
|
|
211
|
-
|
|
200
|
+
# Build initial context - optionally append setup tool output
|
|
201
|
+
# Check ctx first (task-level override), then fall back to agent config
|
|
202
|
+
append_setup = getattr(ctx, "append_setup_output", False) or getattr(
|
|
203
|
+
self.config, "append_setup_output", False
|
|
204
|
+
)
|
|
205
|
+
initial_prompt = ctx.prompt
|
|
206
|
+
if append_setup:
|
|
207
|
+
setup_output = getattr(ctx, "setup_output", None)
|
|
208
|
+
if setup_output:
|
|
209
|
+
initial_prompt = f"{initial_prompt}\n\n{setup_output}"
|
|
210
|
+
|
|
211
|
+
# Build initial blocks (text prompt + optional screenshot)
|
|
212
|
+
initial_blocks = text_to_blocks(initial_prompt)
|
|
213
|
+
|
|
214
|
+
result = await self._run_context(initial_blocks, max_steps=max_steps)
|
|
212
215
|
|
|
213
216
|
# Propagate error state to context for platform visibility
|
|
214
217
|
if result.isError and hasattr(ctx, "error"):
|
|
@@ -342,8 +345,17 @@ class MCPAgent(ABC):
|
|
|
342
345
|
is_error = False
|
|
343
346
|
|
|
344
347
|
# Ensure all parameters are the correct type
|
|
348
|
+
# Use ctx.reward if already set (e.g., from scenario evaluate), otherwise 0.0
|
|
349
|
+
# Note: For v4 tasks with evaluate_tool, reward is set in __aexit__ after this returns,
|
|
350
|
+
# so callers should prefer ctx.reward over Trace.reward for the final result.
|
|
351
|
+
reward = 0.0
|
|
352
|
+
if self.ctx is not None:
|
|
353
|
+
ctx_reward = getattr(self.ctx, "reward", None)
|
|
354
|
+
if ctx_reward is not None:
|
|
355
|
+
reward = ctx_reward
|
|
356
|
+
|
|
345
357
|
trace_params = {
|
|
346
|
-
"reward":
|
|
358
|
+
"reward": reward,
|
|
347
359
|
"done": True,
|
|
348
360
|
"messages": messages,
|
|
349
361
|
"content": final_response.content if final_response else error,
|
|
@@ -519,8 +531,14 @@ def find_reward(result: MCPToolResult) -> float:
|
|
|
519
531
|
|
|
520
532
|
Agent accepts "reward", "grade", "score", or weighted subscores
|
|
521
533
|
|
|
534
|
+
If isError is True, return 0.0 (error results should not contribute positive reward).
|
|
522
535
|
If not found, return 0.0
|
|
523
536
|
"""
|
|
537
|
+
# Error results should return 0.0 - don't extract reward from error responses
|
|
538
|
+
if result.isError:
|
|
539
|
+
logger.warning("Evaluate tool returned error, using reward=0.0")
|
|
540
|
+
return 0.0
|
|
541
|
+
|
|
524
542
|
accept_keys = ["reward", "grade", "score"]
|
|
525
543
|
|
|
526
544
|
# Check for direct reward/grade/score keys
|
hud/agents/claude.py
CHANGED
|
@@ -5,16 +5,18 @@ from __future__ import annotations
|
|
|
5
5
|
import copy
|
|
6
6
|
import logging
|
|
7
7
|
from inspect import cleandoc
|
|
8
|
-
from typing import Any, ClassVar, Literal, cast
|
|
8
|
+
from typing import TYPE_CHECKING, Any, ClassVar, Literal, cast
|
|
9
9
|
|
|
10
10
|
import mcp.types as types
|
|
11
11
|
from anthropic import AsyncAnthropic, AsyncAnthropicBedrock, Omit
|
|
12
12
|
from anthropic.types import CacheControlEphemeralParam
|
|
13
13
|
from anthropic.types.beta import (
|
|
14
14
|
BetaBase64ImageSourceParam,
|
|
15
|
+
BetaBase64PDFSourceParam,
|
|
15
16
|
BetaContentBlockParam,
|
|
16
17
|
BetaImageBlockParam,
|
|
17
18
|
BetaMessageParam,
|
|
19
|
+
BetaRequestDocumentBlockParam,
|
|
18
20
|
BetaTextBlockParam,
|
|
19
21
|
BetaToolBash20250124Param,
|
|
20
22
|
BetaToolComputerUse20250124Param,
|
|
@@ -23,7 +25,6 @@ from anthropic.types.beta import (
|
|
|
23
25
|
BetaToolTextEditor20250728Param,
|
|
24
26
|
BetaToolUnionParam,
|
|
25
27
|
)
|
|
26
|
-
from pydantic import ConfigDict
|
|
27
28
|
|
|
28
29
|
from hud.settings import settings
|
|
29
30
|
from hud.tools.computer.settings import computer_settings
|
|
@@ -31,24 +32,13 @@ from hud.types import AgentResponse, BaseAgentConfig, MCPToolCall, MCPToolResult
|
|
|
31
32
|
from hud.utils.hud_console import HUDConsole
|
|
32
33
|
from hud.utils.types import with_signature
|
|
33
34
|
|
|
34
|
-
from .base import
|
|
35
|
-
|
|
36
|
-
logger = logging.getLogger(__name__)
|
|
37
|
-
|
|
38
|
-
|
|
39
|
-
class ClaudeConfig(BaseAgentConfig):
|
|
40
|
-
model_config = ConfigDict(arbitrary_types_allowed=True)
|
|
41
|
-
|
|
42
|
-
model_name: str = "Claude"
|
|
43
|
-
model: str = "claude-sonnet-4-5"
|
|
44
|
-
model_client: AsyncAnthropic | AsyncAnthropicBedrock | None = None
|
|
45
|
-
max_tokens: int = 16384
|
|
46
|
-
use_computer_beta: bool = True
|
|
47
|
-
validate_api_key: bool = True
|
|
35
|
+
from .base import MCPAgent
|
|
36
|
+
from .types import ClaudeConfig, ClaudeCreateParams
|
|
48
37
|
|
|
38
|
+
if TYPE_CHECKING:
|
|
39
|
+
from collections.abc import Sequence
|
|
49
40
|
|
|
50
|
-
|
|
51
|
-
pass
|
|
41
|
+
logger = logging.getLogger(__name__)
|
|
52
42
|
|
|
53
43
|
|
|
54
44
|
class ClaudeAgent(MCPAgent):
|
|
@@ -76,12 +66,20 @@ class ClaudeAgent(MCPAgent):
|
|
|
76
66
|
|
|
77
67
|
model_client = self.config.model_client
|
|
78
68
|
if model_client is None:
|
|
79
|
-
|
|
80
|
-
if
|
|
81
|
-
|
|
82
|
-
model_client = AsyncAnthropic(api_key=api_key)
|
|
69
|
+
# Default to HUD gateway when HUD_API_KEY is available
|
|
70
|
+
if settings.api_key:
|
|
71
|
+
from hud.agents.gateway import build_gateway_client
|
|
83
72
|
|
|
84
|
-
|
|
73
|
+
model_client = build_gateway_client("anthropic")
|
|
74
|
+
elif settings.anthropic_api_key:
|
|
75
|
+
model_client = AsyncAnthropic(api_key=settings.anthropic_api_key)
|
|
76
|
+
else:
|
|
77
|
+
raise ValueError(
|
|
78
|
+
"No API key found. Set HUD_API_KEY for HUD gateway, "
|
|
79
|
+
"or ANTHROPIC_API_KEY for direct Anthropic access."
|
|
80
|
+
)
|
|
81
|
+
|
|
82
|
+
self.anthropic_client: AsyncAnthropic | AsyncAnthropicBedrock = model_client
|
|
85
83
|
self.max_tokens = self.config.max_tokens
|
|
86
84
|
self.use_computer_beta = self.config.use_computer_beta
|
|
87
85
|
self.hud_console = HUDConsole(logger=logger)
|
|
@@ -212,7 +210,10 @@ class ClaudeAgent(MCPAgent):
|
|
|
212
210
|
async def format_tool_results(
|
|
213
211
|
self, tool_calls: list[MCPToolCall], tool_results: list[MCPToolResult]
|
|
214
212
|
) -> list[BetaMessageParam]:
|
|
215
|
-
"""Format tool results into Claude messages.
|
|
213
|
+
"""Format tool results into Claude messages.
|
|
214
|
+
|
|
215
|
+
Handles EmbeddedResource (PDFs), images, and text content.
|
|
216
|
+
"""
|
|
216
217
|
# Process each tool result
|
|
217
218
|
user_content = []
|
|
218
219
|
|
|
@@ -224,7 +225,9 @@ class ClaudeAgent(MCPAgent):
|
|
|
224
225
|
continue
|
|
225
226
|
|
|
226
227
|
# Convert MCP tool results to Claude format
|
|
227
|
-
claude_blocks
|
|
228
|
+
claude_blocks: list[
|
|
229
|
+
BetaTextBlockParam | BetaImageBlockParam | BetaRequestDocumentBlockParam
|
|
230
|
+
] = []
|
|
228
231
|
|
|
229
232
|
if result.isError:
|
|
230
233
|
# Extract error message from content
|
|
@@ -241,6 +244,16 @@ class ClaudeAgent(MCPAgent):
|
|
|
241
244
|
claude_blocks.append(text_to_content_block(content.text))
|
|
242
245
|
elif isinstance(content, types.ImageContent):
|
|
243
246
|
claude_blocks.append(base64_to_content_block(content.data))
|
|
247
|
+
elif isinstance(content, types.EmbeddedResource):
|
|
248
|
+
# Handle embedded resources (PDFs)
|
|
249
|
+
resource = content.resource
|
|
250
|
+
if (
|
|
251
|
+
isinstance(resource, types.BlobResourceContents)
|
|
252
|
+
and resource.mimeType == "application/pdf"
|
|
253
|
+
):
|
|
254
|
+
claude_blocks.append(
|
|
255
|
+
document_to_content_block(base64_data=resource.blob)
|
|
256
|
+
)
|
|
244
257
|
|
|
245
258
|
# Add tool result
|
|
246
259
|
user_content.append(tool_use_content_block(tool_use_id, claude_blocks))
|
|
@@ -295,7 +308,7 @@ class ClaudeAgent(MCPAgent):
|
|
|
295
308
|
display_width_px=computer_settings.ANTHROPIC_COMPUTER_WIDTH,
|
|
296
309
|
display_height_px=computer_settings.ANTHROPIC_COMPUTER_HEIGHT,
|
|
297
310
|
)
|
|
298
|
-
elif tool.name == "computer":
|
|
311
|
+
elif tool.name == "computer" or tool.name.endswith("_computer"):
|
|
299
312
|
logger.warning(
|
|
300
313
|
"Renamed tool %s to 'computer', dropping original 'computer' tool",
|
|
301
314
|
selected_computer_tool.name,
|
|
@@ -322,11 +335,14 @@ class ClaudeAgent(MCPAgent):
|
|
|
322
335
|
self.claude_tools = []
|
|
323
336
|
for tool in available_tools:
|
|
324
337
|
claude_tool = to_api_tool(tool)
|
|
325
|
-
if claude_tool is None
|
|
338
|
+
if claude_tool is None:
|
|
326
339
|
continue
|
|
327
|
-
|
|
340
|
+
tool_name = claude_tool.get("name")
|
|
341
|
+
if tool_name is None:
|
|
342
|
+
continue
|
|
343
|
+
if tool_name == "computer":
|
|
328
344
|
self.has_computer_tool = True
|
|
329
|
-
self.tool_mapping[
|
|
345
|
+
self.tool_mapping[tool_name] = tool.name
|
|
330
346
|
self.claude_tools.append(claude_tool)
|
|
331
347
|
|
|
332
348
|
def _add_prompt_caching(self, messages: list[BetaMessageParam]) -> list[BetaMessageParam]:
|
|
@@ -372,8 +388,21 @@ def text_to_content_block(text: str) -> BetaTextBlockParam:
|
|
|
372
388
|
return {"type": "text", "text": text}
|
|
373
389
|
|
|
374
390
|
|
|
391
|
+
def document_to_content_block(base64_data: str) -> BetaRequestDocumentBlockParam:
|
|
392
|
+
"""Convert base64 PDF to Claude document content block."""
|
|
393
|
+
return BetaRequestDocumentBlockParam(
|
|
394
|
+
type="document",
|
|
395
|
+
source=BetaBase64PDFSourceParam(
|
|
396
|
+
type="base64",
|
|
397
|
+
media_type="application/pdf",
|
|
398
|
+
data=base64_data,
|
|
399
|
+
),
|
|
400
|
+
)
|
|
401
|
+
|
|
402
|
+
|
|
375
403
|
def tool_use_content_block(
|
|
376
|
-
tool_use_id: str,
|
|
404
|
+
tool_use_id: str,
|
|
405
|
+
content: Sequence[BetaTextBlockParam | BetaImageBlockParam | BetaRequestDocumentBlockParam],
|
|
377
406
|
) -> BetaToolResultBlockParam:
|
|
378
407
|
"""Create tool result content block."""
|
|
379
|
-
return {"type": "tool_result", "tool_use_id": tool_use_id, "content": content}
|
|
408
|
+
return {"type": "tool_result", "tool_use_id": tool_use_id, "content": content} # pyright: ignore[reportReturnType]
|
hud/agents/gateway.py
ADDED
|
@@ -0,0 +1,42 @@
|
|
|
1
|
+
"""Gateway client utilities for HUD inference gateway."""
|
|
2
|
+
|
|
3
|
+
from __future__ import annotations
|
|
4
|
+
|
|
5
|
+
from typing import Any
|
|
6
|
+
|
|
7
|
+
|
|
8
|
+
def build_gateway_client(provider: str) -> Any:
|
|
9
|
+
"""Build a client configured for HUD gateway routing.
|
|
10
|
+
|
|
11
|
+
Args:
|
|
12
|
+
provider: Provider name ("anthropic", "openai", "gemini", etc.)
|
|
13
|
+
|
|
14
|
+
Returns:
|
|
15
|
+
Configured async client for the provider.
|
|
16
|
+
"""
|
|
17
|
+
from hud.settings import settings
|
|
18
|
+
|
|
19
|
+
provider = provider.lower()
|
|
20
|
+
|
|
21
|
+
if provider == "anthropic":
|
|
22
|
+
from anthropic import AsyncAnthropic
|
|
23
|
+
|
|
24
|
+
return AsyncAnthropic(api_key=settings.api_key, base_url=settings.hud_gateway_url)
|
|
25
|
+
|
|
26
|
+
if provider == "gemini":
|
|
27
|
+
from google import genai
|
|
28
|
+
from google.genai.types import HttpOptions
|
|
29
|
+
|
|
30
|
+
return genai.Client(
|
|
31
|
+
api_key="PLACEHOLDER",
|
|
32
|
+
http_options=HttpOptions(
|
|
33
|
+
api_version="v1beta",
|
|
34
|
+
base_url=settings.hud_gateway_url,
|
|
35
|
+
headers={"Authorization": f"Bearer {settings.api_key}"},
|
|
36
|
+
),
|
|
37
|
+
)
|
|
38
|
+
|
|
39
|
+
# OpenAI-compatible (openai, azure, together, groq, fireworks, etc.)
|
|
40
|
+
from openai import AsyncOpenAI
|
|
41
|
+
|
|
42
|
+
return AsyncOpenAI(api_key=settings.api_key, base_url=settings.hud_gateway_url)
|
hud/agents/gemini.py
CHANGED
|
@@ -8,37 +8,18 @@ from typing import Any, ClassVar, cast
|
|
|
8
8
|
import mcp.types as types
|
|
9
9
|
from google import genai
|
|
10
10
|
from google.genai import types as genai_types
|
|
11
|
-
from pydantic import ConfigDict
|
|
12
11
|
|
|
13
12
|
from hud.settings import settings
|
|
14
13
|
from hud.types import AgentResponse, BaseAgentConfig, MCPToolCall, MCPToolResult
|
|
15
14
|
from hud.utils.hud_console import HUDConsole
|
|
16
15
|
from hud.utils.types import with_signature
|
|
17
16
|
|
|
18
|
-
from .base import
|
|
17
|
+
from .base import MCPAgent
|
|
18
|
+
from .types import GeminiConfig, GeminiCreateParams
|
|
19
19
|
|
|
20
20
|
logger = logging.getLogger(__name__)
|
|
21
21
|
|
|
22
22
|
|
|
23
|
-
class GeminiConfig(BaseAgentConfig):
|
|
24
|
-
"""Configuration for `GeminiAgent`."""
|
|
25
|
-
|
|
26
|
-
model_config = ConfigDict(arbitrary_types_allowed=True)
|
|
27
|
-
|
|
28
|
-
model_name: str = "Gemini"
|
|
29
|
-
model: str = "gemini-3-pro-preview"
|
|
30
|
-
model_client: genai.Client | None = None
|
|
31
|
-
temperature: float = 1.0
|
|
32
|
-
top_p: float = 0.95
|
|
33
|
-
top_k: int = 40
|
|
34
|
-
max_output_tokens: int = 8192
|
|
35
|
-
validate_api_key: bool = True
|
|
36
|
-
|
|
37
|
-
|
|
38
|
-
class GeminiCreateParams(BaseCreateParams, GeminiConfig):
|
|
39
|
-
pass
|
|
40
|
-
|
|
41
|
-
|
|
42
23
|
class GeminiAgent(MCPAgent):
|
|
43
24
|
"""
|
|
44
25
|
Gemini agent that uses MCP servers for tool execution.
|
|
@@ -61,10 +42,18 @@ class GeminiAgent(MCPAgent):
|
|
|
61
42
|
|
|
62
43
|
model_client = self.config.model_client
|
|
63
44
|
if model_client is None:
|
|
64
|
-
|
|
65
|
-
if
|
|
66
|
-
|
|
67
|
-
|
|
45
|
+
# Default to HUD gateway when HUD_API_KEY is available
|
|
46
|
+
if settings.api_key:
|
|
47
|
+
from hud.agents.gateway import build_gateway_client
|
|
48
|
+
|
|
49
|
+
model_client = build_gateway_client("gemini")
|
|
50
|
+
elif settings.gemini_api_key:
|
|
51
|
+
model_client = genai.Client(api_key=settings.gemini_api_key)
|
|
52
|
+
else:
|
|
53
|
+
raise ValueError(
|
|
54
|
+
"No API key found. Set HUD_API_KEY for HUD gateway, "
|
|
55
|
+
"or GEMINI_API_KEY for direct Gemini access."
|
|
56
|
+
)
|
|
68
57
|
|
|
69
58
|
if self.config.validate_api_key:
|
|
70
59
|
try:
|
|
@@ -72,7 +61,7 @@ class GeminiAgent(MCPAgent):
|
|
|
72
61
|
except Exception as e:
|
|
73
62
|
raise ValueError(f"Gemini API key is invalid: {e}") from e
|
|
74
63
|
|
|
75
|
-
self.gemini_client = model_client
|
|
64
|
+
self.gemini_client: genai.Client = model_client
|
|
76
65
|
self.temperature = self.config.temperature
|
|
77
66
|
self.top_p = self.config.top_p
|
|
78
67
|
self.top_k = self.config.top_k
|
hud/agents/gemini_cua.py
CHANGED
|
@@ -7,14 +7,14 @@ from typing import Any, ClassVar
|
|
|
7
7
|
|
|
8
8
|
import mcp.types as types
|
|
9
9
|
from google.genai import types as genai_types
|
|
10
|
-
from pydantic import ConfigDict, Field
|
|
11
10
|
|
|
12
11
|
from hud.tools.computer.settings import computer_settings
|
|
13
12
|
from hud.types import AgentResponse, BaseAgentConfig, MCPToolCall, MCPToolResult
|
|
14
13
|
from hud.utils.types import with_signature
|
|
15
14
|
|
|
16
|
-
from .base import
|
|
17
|
-
from .gemini import GeminiAgent
|
|
15
|
+
from .base import MCPAgent
|
|
16
|
+
from .gemini import GeminiAgent
|
|
17
|
+
from .types import GeminiCUAConfig, GeminiCUACreateParams
|
|
18
18
|
|
|
19
19
|
logger = logging.getLogger(__name__)
|
|
20
20
|
|
|
@@ -56,20 +56,6 @@ what they asked.
|
|
|
56
56
|
""".strip()
|
|
57
57
|
|
|
58
58
|
|
|
59
|
-
class GeminiCUAConfig(GeminiConfig):
|
|
60
|
-
"""Configuration for `GeminiCUAAgent`."""
|
|
61
|
-
|
|
62
|
-
model_config = ConfigDict(arbitrary_types_allowed=True)
|
|
63
|
-
|
|
64
|
-
model_name: str = "GeminiCUA"
|
|
65
|
-
model: str = "gemini-2.5-computer-use-preview-10-2025"
|
|
66
|
-
excluded_predefined_functions: list[str] = Field(default_factory=list)
|
|
67
|
-
|
|
68
|
-
|
|
69
|
-
class GeminiCUACreateParams(BaseCreateParams, GeminiCUAConfig):
|
|
70
|
-
pass
|
|
71
|
-
|
|
72
|
-
|
|
73
59
|
class GeminiCUAAgent(GeminiAgent):
|
|
74
60
|
"""
|
|
75
61
|
Gemini Computer Use Agent that extends GeminiAgent with computer use capabilities.
|
|
@@ -123,6 +109,9 @@ class GeminiCUAAgent(GeminiAgent):
|
|
|
123
109
|
)
|
|
124
110
|
)
|
|
125
111
|
|
|
112
|
+
if tool.name == "computer" or tool.name.endswith("_computer"):
|
|
113
|
+
return None
|
|
114
|
+
|
|
126
115
|
# For non-computer tools, use the parent implementation
|
|
127
116
|
return super()._to_gemini_tool(tool)
|
|
128
117
|
|
|
@@ -6,6 +6,7 @@ from typing import Literal
|
|
|
6
6
|
from openai import AsyncOpenAI
|
|
7
7
|
|
|
8
8
|
from hud.settings import settings
|
|
9
|
+
from hud.telemetry import instrument
|
|
9
10
|
|
|
10
11
|
logger = logging.getLogger(__name__)
|
|
11
12
|
|
|
@@ -64,6 +65,11 @@ class ResponseAgent:
|
|
|
64
65
|
self.model = model
|
|
65
66
|
self.system_prompt = system_prompt or DEFAULT_SYSTEM_PROMPT
|
|
66
67
|
|
|
68
|
+
@instrument(
|
|
69
|
+
category="agent",
|
|
70
|
+
name="response_agent",
|
|
71
|
+
internal_type="user-message",
|
|
72
|
+
)
|
|
67
73
|
async def determine_response(self, agent_message: str) -> ResponseType:
|
|
68
74
|
"""
|
|
69
75
|
Determine whether the agent should stop or continue based on its message.
|
|
@@ -86,6 +92,7 @@ class ResponseAgent:
|
|
|
86
92
|
],
|
|
87
93
|
temperature=0.1,
|
|
88
94
|
max_tokens=5,
|
|
95
|
+
extra_headers={"Trace-Id": ""},
|
|
89
96
|
)
|
|
90
97
|
|
|
91
98
|
response_text = response.choices[0].message.content
|
hud/agents/openai.py
CHANGED
|
@@ -29,39 +29,18 @@ from openai.types.responses import (
|
|
|
29
29
|
from openai.types.responses.response_create_params import ToolChoice # noqa: TC002
|
|
30
30
|
from openai.types.responses.response_input_param import FunctionCallOutput, Message
|
|
31
31
|
from openai.types.shared_params.reasoning import Reasoning # noqa: TC002
|
|
32
|
-
from pydantic import ConfigDict
|
|
33
32
|
|
|
34
33
|
from hud.settings import settings
|
|
35
34
|
from hud.types import AgentResponse, BaseAgentConfig, MCPToolCall, MCPToolResult, Trace
|
|
36
35
|
from hud.utils.strict_schema import ensure_strict_json_schema
|
|
37
36
|
from hud.utils.types import with_signature
|
|
38
37
|
|
|
39
|
-
from .base import
|
|
38
|
+
from .base import MCPAgent
|
|
39
|
+
from .types import OpenAIConfig, OpenAICreateParams
|
|
40
40
|
|
|
41
41
|
logger = logging.getLogger(__name__)
|
|
42
42
|
|
|
43
43
|
|
|
44
|
-
class OpenAIConfig(BaseAgentConfig):
|
|
45
|
-
"""Configuration model for `OpenAIAgent`."""
|
|
46
|
-
|
|
47
|
-
model_config = ConfigDict(arbitrary_types_allowed=True)
|
|
48
|
-
|
|
49
|
-
model_name: str = "OpenAI"
|
|
50
|
-
model: str = "gpt-5.1"
|
|
51
|
-
model_client: AsyncOpenAI | None = None
|
|
52
|
-
max_output_tokens: int | None = None
|
|
53
|
-
temperature: float | None = None
|
|
54
|
-
reasoning: Reasoning | None = None
|
|
55
|
-
tool_choice: ToolChoice | None = None
|
|
56
|
-
truncation: Literal["auto", "disabled"] | None = None
|
|
57
|
-
parallel_tool_calls: bool | None = None
|
|
58
|
-
validate_api_key: bool = True
|
|
59
|
-
|
|
60
|
-
|
|
61
|
-
class OpenAICreateParams(BaseCreateParams, OpenAIConfig):
|
|
62
|
-
pass
|
|
63
|
-
|
|
64
|
-
|
|
65
44
|
class OpenAIAgent(MCPAgent):
|
|
66
45
|
"""Generic OpenAI agent that can execute MCP tools through the Responses API."""
|
|
67
46
|
|
|
@@ -79,10 +58,18 @@ class OpenAIAgent(MCPAgent):
|
|
|
79
58
|
|
|
80
59
|
model_client = self.config.model_client
|
|
81
60
|
if model_client is None:
|
|
82
|
-
|
|
83
|
-
if
|
|
84
|
-
|
|
85
|
-
|
|
61
|
+
# Default to HUD gateway when HUD_API_KEY is available
|
|
62
|
+
if settings.api_key:
|
|
63
|
+
from hud.agents.gateway import build_gateway_client
|
|
64
|
+
|
|
65
|
+
model_client = build_gateway_client("openai")
|
|
66
|
+
elif settings.openai_api_key:
|
|
67
|
+
model_client = AsyncOpenAI(api_key=settings.openai_api_key)
|
|
68
|
+
else:
|
|
69
|
+
raise ValueError(
|
|
70
|
+
"No API key found. Set HUD_API_KEY for HUD gateway, "
|
|
71
|
+
"or OPENAI_API_KEY for direct OpenAI access."
|
|
72
|
+
)
|
|
86
73
|
|
|
87
74
|
if self.config.validate_api_key:
|
|
88
75
|
try:
|
|
@@ -90,11 +77,11 @@ class OpenAIAgent(MCPAgent):
|
|
|
90
77
|
except Exception as exc: # pragma: no cover - network validation
|
|
91
78
|
raise ValueError(f"OpenAI API key is invalid: {exc}") from exc
|
|
92
79
|
|
|
93
|
-
self.openai_client = model_client
|
|
80
|
+
self.openai_client: AsyncOpenAI = model_client
|
|
94
81
|
self._model = self.config.model
|
|
95
82
|
self.max_output_tokens = self.config.max_output_tokens
|
|
96
83
|
self.temperature = self.config.temperature
|
|
97
|
-
self.reasoning = self.config.reasoning
|
|
84
|
+
self.reasoning: Reasoning | None = self.config.reasoning
|
|
98
85
|
self.tool_choice: ToolChoice | None = self.config.tool_choice
|
|
99
86
|
self.parallel_tool_calls = self.config.parallel_tool_calls
|
|
100
87
|
self.truncation: Literal["auto", "disabled"] | None = self.config.truncation
|
hud/agents/openai_chat.py
CHANGED
|
@@ -22,14 +22,14 @@ from typing import TYPE_CHECKING, Any, ClassVar, cast
|
|
|
22
22
|
|
|
23
23
|
import mcp.types as types
|
|
24
24
|
from openai import AsyncOpenAI
|
|
25
|
-
from pydantic import ConfigDict, Field
|
|
26
25
|
|
|
27
26
|
from hud.settings import settings
|
|
28
27
|
from hud.types import AgentResponse, BaseAgentConfig, MCPToolCall, MCPToolResult
|
|
29
28
|
from hud.utils.hud_console import HUDConsole
|
|
30
29
|
from hud.utils.types import with_signature
|
|
31
30
|
|
|
32
|
-
from .base import
|
|
31
|
+
from .base import MCPAgent
|
|
32
|
+
from .types import OpenAIChatConfig, OpenAIChatCreateParams
|
|
33
33
|
|
|
34
34
|
if TYPE_CHECKING:
|
|
35
35
|
from openai.types.chat import ChatCompletionToolParam
|
|
@@ -38,23 +38,6 @@ if TYPE_CHECKING:
|
|
|
38
38
|
logger = logging.getLogger(__name__)
|
|
39
39
|
|
|
40
40
|
|
|
41
|
-
class OpenAIChatConfig(BaseAgentConfig):
|
|
42
|
-
"""Configuration for `OpenAIChatAgent`."""
|
|
43
|
-
|
|
44
|
-
model_config = ConfigDict(arbitrary_types_allowed=True)
|
|
45
|
-
|
|
46
|
-
model_name: str = "OpenAI Chat"
|
|
47
|
-
model: str = "gpt-5-mini"
|
|
48
|
-
openai_client: AsyncOpenAI | None = None
|
|
49
|
-
api_key: str | None = None
|
|
50
|
-
base_url: str | None = None
|
|
51
|
-
completion_kwargs: dict[str, Any] = Field(default_factory=dict)
|
|
52
|
-
|
|
53
|
-
|
|
54
|
-
class OpenAIChatCreateParams(BaseCreateParams, OpenAIChatConfig):
|
|
55
|
-
pass
|
|
56
|
-
|
|
57
|
-
|
|
58
41
|
class OpenAIChatAgent(MCPAgent):
|
|
59
42
|
"""MCP-enabled agent that speaks the OpenAI *chat.completions* protocol."""
|
|
60
43
|
|
|
@@ -82,6 +65,7 @@ class OpenAIChatAgent(MCPAgent):
|
|
|
82
65
|
"Use HUD_API_KEY for gateway auth and BYOK headers for provider keys."
|
|
83
66
|
)
|
|
84
67
|
|
|
68
|
+
self.oai: AsyncOpenAI
|
|
85
69
|
if self.config.openai_client is not None:
|
|
86
70
|
self.oai = self.config.openai_client
|
|
87
71
|
elif self.config.api_key is not None or self.config.base_url is not None:
|