hud-python 0.4.45__py3-none-any.whl → 0.5.13__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- hud/__init__.py +27 -7
- hud/agents/__init__.py +70 -5
- hud/agents/base.py +238 -500
- hud/agents/claude.py +236 -247
- hud/agents/gateway.py +42 -0
- hud/agents/gemini.py +264 -0
- hud/agents/gemini_cua.py +324 -0
- hud/agents/grounded_openai.py +98 -100
- hud/agents/misc/integration_test_agent.py +51 -20
- hud/agents/misc/response_agent.py +48 -36
- hud/agents/openai.py +282 -296
- hud/agents/{openai_chat_generic.py → openai_chat.py} +63 -33
- hud/agents/operator.py +199 -0
- hud/agents/resolver.py +70 -0
- hud/agents/tests/conftest.py +133 -0
- hud/agents/tests/test_base.py +300 -622
- hud/agents/tests/test_base_runtime.py +233 -0
- hud/agents/tests/test_claude.py +381 -214
- hud/agents/tests/test_client.py +9 -10
- hud/agents/tests/test_gemini.py +369 -0
- hud/agents/tests/test_grounded_openai_agent.py +65 -50
- hud/agents/tests/test_openai.py +377 -140
- hud/agents/tests/test_operator.py +362 -0
- hud/agents/tests/test_resolver.py +192 -0
- hud/agents/tests/test_run_eval.py +179 -0
- hud/agents/types.py +148 -0
- hud/cli/__init__.py +493 -546
- hud/cli/analyze.py +43 -5
- hud/cli/build.py +699 -113
- hud/cli/debug.py +8 -5
- hud/cli/dev.py +889 -732
- hud/cli/eval.py +793 -667
- hud/cli/flows/dev.py +167 -0
- hud/cli/flows/init.py +191 -0
- hud/cli/flows/tasks.py +153 -56
- hud/cli/flows/templates.py +151 -0
- hud/cli/flows/tests/__init__.py +1 -0
- hud/cli/flows/tests/test_dev.py +126 -0
- hud/cli/init.py +60 -58
- hud/cli/pull.py +1 -1
- hud/cli/push.py +38 -13
- hud/cli/rft.py +311 -0
- hud/cli/rft_status.py +145 -0
- hud/cli/tests/test_analyze.py +5 -5
- hud/cli/tests/test_analyze_metadata.py +3 -2
- hud/cli/tests/test_analyze_module.py +120 -0
- hud/cli/tests/test_build.py +110 -8
- hud/cli/tests/test_build_failure.py +41 -0
- hud/cli/tests/test_build_module.py +50 -0
- hud/cli/tests/test_cli_init.py +6 -1
- hud/cli/tests/test_cli_more_wrappers.py +30 -0
- hud/cli/tests/test_cli_root.py +140 -0
- hud/cli/tests/test_convert.py +361 -0
- hud/cli/tests/test_debug.py +12 -10
- hud/cli/tests/test_dev.py +197 -0
- hud/cli/tests/test_eval.py +251 -0
- hud/cli/tests/test_eval_bedrock.py +51 -0
- hud/cli/tests/test_init.py +124 -0
- hud/cli/tests/test_main_module.py +11 -5
- hud/cli/tests/test_mcp_server.py +12 -100
- hud/cli/tests/test_push.py +1 -1
- hud/cli/tests/test_push_happy.py +74 -0
- hud/cli/tests/test_push_wrapper.py +23 -0
- hud/cli/tests/test_registry.py +1 -1
- hud/cli/tests/test_utils.py +1 -1
- hud/cli/{rl → utils}/celebrate.py +14 -12
- hud/cli/utils/config.py +18 -1
- hud/cli/utils/docker.py +130 -4
- hud/cli/utils/env_check.py +9 -9
- hud/cli/utils/git.py +136 -0
- hud/cli/utils/interactive.py +39 -5
- hud/cli/utils/metadata.py +70 -1
- hud/cli/utils/runner.py +1 -1
- hud/cli/utils/server.py +2 -2
- hud/cli/utils/source_hash.py +3 -3
- hud/cli/utils/tasks.py +4 -1
- hud/cli/utils/tests/__init__.py +0 -0
- hud/cli/utils/tests/test_config.py +58 -0
- hud/cli/utils/tests/test_docker.py +93 -0
- hud/cli/utils/tests/test_docker_hints.py +71 -0
- hud/cli/utils/tests/test_env_check.py +74 -0
- hud/cli/utils/tests/test_environment.py +42 -0
- hud/cli/utils/tests/test_git.py +142 -0
- hud/cli/utils/tests/test_interactive_module.py +60 -0
- hud/cli/utils/tests/test_local_runner.py +50 -0
- hud/cli/utils/tests/test_logging_utils.py +23 -0
- hud/cli/utils/tests/test_metadata.py +49 -0
- hud/cli/utils/tests/test_package_runner.py +35 -0
- hud/cli/utils/tests/test_registry_utils.py +49 -0
- hud/cli/utils/tests/test_remote_runner.py +25 -0
- hud/cli/utils/tests/test_runner_modules.py +52 -0
- hud/cli/utils/tests/test_source_hash.py +36 -0
- hud/cli/utils/tests/test_tasks.py +80 -0
- hud/cli/utils/version_check.py +258 -0
- hud/cli/{rl → utils}/viewer.py +2 -2
- hud/clients/README.md +12 -11
- hud/clients/__init__.py +4 -3
- hud/clients/base.py +166 -26
- hud/clients/environment.py +51 -0
- hud/clients/fastmcp.py +13 -6
- hud/clients/mcp_use.py +45 -15
- hud/clients/tests/test_analyze_scenarios.py +206 -0
- hud/clients/tests/test_protocol.py +9 -3
- hud/datasets/__init__.py +23 -20
- hud/datasets/loader.py +326 -0
- hud/datasets/runner.py +198 -105
- hud/datasets/tests/__init__.py +0 -0
- hud/datasets/tests/test_loader.py +221 -0
- hud/datasets/tests/test_utils.py +315 -0
- hud/datasets/utils.py +270 -90
- hud/environment/__init__.py +52 -0
- hud/environment/connection.py +258 -0
- hud/environment/connectors/__init__.py +33 -0
- hud/environment/connectors/base.py +68 -0
- hud/environment/connectors/local.py +177 -0
- hud/environment/connectors/mcp_config.py +137 -0
- hud/environment/connectors/openai.py +101 -0
- hud/environment/connectors/remote.py +172 -0
- hud/environment/environment.py +835 -0
- hud/environment/integrations/__init__.py +45 -0
- hud/environment/integrations/adk.py +67 -0
- hud/environment/integrations/anthropic.py +196 -0
- hud/environment/integrations/gemini.py +92 -0
- hud/environment/integrations/langchain.py +82 -0
- hud/environment/integrations/llamaindex.py +68 -0
- hud/environment/integrations/openai.py +238 -0
- hud/environment/mock.py +306 -0
- hud/environment/router.py +263 -0
- hud/environment/scenarios.py +620 -0
- hud/environment/tests/__init__.py +1 -0
- hud/environment/tests/test_connection.py +317 -0
- hud/environment/tests/test_connectors.py +205 -0
- hud/environment/tests/test_environment.py +593 -0
- hud/environment/tests/test_integrations.py +257 -0
- hud/environment/tests/test_local_connectors.py +242 -0
- hud/environment/tests/test_scenarios.py +1086 -0
- hud/environment/tests/test_tools.py +208 -0
- hud/environment/types.py +23 -0
- hud/environment/utils/__init__.py +35 -0
- hud/environment/utils/formats.py +215 -0
- hud/environment/utils/schema.py +171 -0
- hud/environment/utils/tool_wrappers.py +113 -0
- hud/eval/__init__.py +67 -0
- hud/eval/context.py +727 -0
- hud/eval/display.py +299 -0
- hud/eval/instrument.py +187 -0
- hud/eval/manager.py +533 -0
- hud/eval/parallel.py +268 -0
- hud/eval/task.py +372 -0
- hud/eval/tests/__init__.py +1 -0
- hud/eval/tests/test_context.py +178 -0
- hud/eval/tests/test_eval.py +210 -0
- hud/eval/tests/test_manager.py +152 -0
- hud/eval/tests/test_parallel.py +168 -0
- hud/eval/tests/test_task.py +291 -0
- hud/eval/types.py +65 -0
- hud/eval/utils.py +194 -0
- hud/patches/__init__.py +19 -0
- hud/patches/mcp_patches.py +308 -0
- hud/patches/warnings.py +54 -0
- hud/samples/browser.py +4 -4
- hud/server/__init__.py +2 -1
- hud/server/low_level.py +2 -1
- hud/server/router.py +164 -0
- hud/server/server.py +567 -80
- hud/server/tests/test_mcp_server_integration.py +11 -11
- hud/server/tests/test_mcp_server_more.py +1 -1
- hud/server/tests/test_server_extra.py +2 -0
- hud/settings.py +45 -3
- hud/shared/exceptions.py +36 -10
- hud/shared/hints.py +26 -1
- hud/shared/requests.py +15 -3
- hud/shared/tests/test_exceptions.py +40 -31
- hud/shared/tests/test_hints.py +167 -0
- hud/telemetry/__init__.py +20 -19
- hud/telemetry/exporter.py +201 -0
- hud/telemetry/instrument.py +165 -253
- hud/telemetry/tests/test_eval_telemetry.py +356 -0
- hud/telemetry/tests/test_exporter.py +258 -0
- hud/telemetry/tests/test_instrument.py +401 -0
- hud/tools/__init__.py +18 -2
- hud/tools/agent.py +223 -0
- hud/tools/apply_patch.py +639 -0
- hud/tools/base.py +54 -4
- hud/tools/bash.py +2 -2
- hud/tools/computer/__init__.py +36 -3
- hud/tools/computer/anthropic.py +2 -2
- hud/tools/computer/gemini.py +385 -0
- hud/tools/computer/hud.py +23 -6
- hud/tools/computer/openai.py +20 -21
- hud/tools/computer/qwen.py +434 -0
- hud/tools/computer/settings.py +37 -0
- hud/tools/edit.py +3 -7
- hud/tools/executors/base.py +4 -2
- hud/tools/executors/pyautogui.py +1 -1
- hud/tools/grounding/grounded_tool.py +13 -18
- hud/tools/grounding/grounder.py +10 -31
- hud/tools/grounding/tests/test_grounded_tool.py +26 -44
- hud/tools/jupyter.py +330 -0
- hud/tools/playwright.py +18 -3
- hud/tools/shell.py +308 -0
- hud/tools/tests/test_agent_tool.py +355 -0
- hud/tools/tests/test_apply_patch.py +718 -0
- hud/tools/tests/test_computer.py +4 -9
- hud/tools/tests/test_computer_actions.py +24 -2
- hud/tools/tests/test_jupyter_tool.py +181 -0
- hud/tools/tests/test_shell.py +596 -0
- hud/tools/tests/test_submit.py +85 -0
- hud/tools/tests/test_types.py +193 -0
- hud/tools/types.py +21 -1
- hud/types.py +194 -56
- hud/utils/__init__.py +2 -0
- hud/utils/env.py +67 -0
- hud/utils/hud_console.py +89 -18
- hud/utils/mcp.py +15 -58
- hud/utils/strict_schema.py +162 -0
- hud/utils/tests/test_init.py +1 -2
- hud/utils/tests/test_mcp.py +1 -28
- hud/utils/tests/test_pretty_errors.py +186 -0
- hud/utils/tests/test_tool_shorthand.py +154 -0
- hud/utils/tests/test_version.py +1 -1
- hud/utils/types.py +20 -0
- hud/version.py +1 -1
- hud_python-0.5.13.dist-info/METADATA +264 -0
- hud_python-0.5.13.dist-info/RECORD +305 -0
- {hud_python-0.4.45.dist-info → hud_python-0.5.13.dist-info}/WHEEL +1 -1
- hud/agents/langchain.py +0 -261
- hud/agents/lite_llm.py +0 -72
- hud/cli/rl/__init__.py +0 -180
- hud/cli/rl/config.py +0 -101
- hud/cli/rl/display.py +0 -133
- hud/cli/rl/gpu.py +0 -63
- hud/cli/rl/gpu_utils.py +0 -321
- hud/cli/rl/local_runner.py +0 -595
- hud/cli/rl/presets.py +0 -96
- hud/cli/rl/remote_runner.py +0 -463
- hud/cli/rl/rl_api.py +0 -150
- hud/cli/rl/vllm.py +0 -177
- hud/cli/rl/wait_utils.py +0 -89
- hud/datasets/parallel.py +0 -687
- hud/misc/__init__.py +0 -1
- hud/misc/claude_plays_pokemon.py +0 -292
- hud/otel/__init__.py +0 -35
- hud/otel/collector.py +0 -142
- hud/otel/config.py +0 -181
- hud/otel/context.py +0 -570
- hud/otel/exporters.py +0 -369
- hud/otel/instrumentation.py +0 -135
- hud/otel/processors.py +0 -121
- hud/otel/tests/__init__.py +0 -1
- hud/otel/tests/test_processors.py +0 -197
- hud/rl/README.md +0 -30
- hud/rl/__init__.py +0 -1
- hud/rl/actor.py +0 -176
- hud/rl/buffer.py +0 -405
- hud/rl/chat_template.jinja +0 -101
- hud/rl/config.py +0 -192
- hud/rl/distributed.py +0 -132
- hud/rl/learner.py +0 -637
- hud/rl/tests/__init__.py +0 -1
- hud/rl/tests/test_learner.py +0 -186
- hud/rl/train.py +0 -382
- hud/rl/types.py +0 -101
- hud/rl/utils/start_vllm_server.sh +0 -30
- hud/rl/utils.py +0 -524
- hud/rl/vllm_adapter.py +0 -143
- hud/telemetry/job.py +0 -352
- hud/telemetry/replay.py +0 -74
- hud/telemetry/tests/test_replay.py +0 -40
- hud/telemetry/tests/test_trace.py +0 -63
- hud/telemetry/trace.py +0 -158
- hud/utils/agent_factories.py +0 -86
- hud/utils/async_utils.py +0 -65
- hud/utils/group_eval.py +0 -223
- hud/utils/progress.py +0 -149
- hud/utils/tasks.py +0 -127
- hud/utils/tests/test_async_utils.py +0 -173
- hud/utils/tests/test_progress.py +0 -261
- hud_python-0.4.45.dist-info/METADATA +0 -552
- hud_python-0.4.45.dist-info/RECORD +0 -228
- {hud_python-0.4.45.dist-info → hud_python-0.5.13.dist-info}/entry_points.txt +0 -0
- {hud_python-0.4.45.dist-info → hud_python-0.5.13.dist-info}/licenses/LICENSE +0 -0
|
@@ -0,0 +1,179 @@
|
|
|
1
|
+
"""Tests for MCPAgent.run() with EvalContext."""
|
|
2
|
+
|
|
3
|
+
from __future__ import annotations
|
|
4
|
+
|
|
5
|
+
from typing import Any, ClassVar
|
|
6
|
+
|
|
7
|
+
import pytest
|
|
8
|
+
from mcp import types
|
|
9
|
+
|
|
10
|
+
from hud.agents import MCPAgent
|
|
11
|
+
from hud.agents.base import BaseCreateParams
|
|
12
|
+
from hud.environment.router import ToolRouter
|
|
13
|
+
from hud.eval.context import EvalContext
|
|
14
|
+
from hud.types import AgentResponse, BaseAgentConfig, MCPToolCall, MCPToolResult
|
|
15
|
+
|
|
16
|
+
|
|
17
|
+
class MockConfig(BaseAgentConfig):
|
|
18
|
+
model_name: str = "MockAgent"
|
|
19
|
+
model: str = "mock-model"
|
|
20
|
+
|
|
21
|
+
|
|
22
|
+
class MockCreateParams(BaseCreateParams, MockConfig):
|
|
23
|
+
pass
|
|
24
|
+
|
|
25
|
+
|
|
26
|
+
class MockMCPAgent(MCPAgent):
|
|
27
|
+
"""Mock agent for testing run()."""
|
|
28
|
+
|
|
29
|
+
metadata: ClassVar[dict[str, Any] | None] = {}
|
|
30
|
+
config_cls: ClassVar[type[BaseAgentConfig]] = MockConfig
|
|
31
|
+
|
|
32
|
+
def __init__(self, **kwargs: Any) -> None:
|
|
33
|
+
params = MockCreateParams(**kwargs)
|
|
34
|
+
super().__init__(params)
|
|
35
|
+
self._response = AgentResponse(content="Test response", tool_calls=[], done=True)
|
|
36
|
+
|
|
37
|
+
def set_response(self, response: AgentResponse) -> None:
|
|
38
|
+
self._response = response
|
|
39
|
+
|
|
40
|
+
async def get_response(self, messages: list[dict[str, Any]]) -> AgentResponse:
|
|
41
|
+
return self._response
|
|
42
|
+
|
|
43
|
+
async def format_tool_results(
|
|
44
|
+
self, tool_calls: list[MCPToolCall], tool_results: list[MCPToolResult]
|
|
45
|
+
) -> list[dict[str, Any]]:
|
|
46
|
+
return [{"role": "tool", "content": str(r)} for r in tool_results]
|
|
47
|
+
|
|
48
|
+
async def get_system_messages(self) -> list[Any]:
|
|
49
|
+
return []
|
|
50
|
+
|
|
51
|
+
async def format_blocks(self, blocks: list[types.ContentBlock]) -> list[Any]:
|
|
52
|
+
return [{"type": "text", "text": getattr(b, "text")} for b in blocks if hasattr(b, "text")]
|
|
53
|
+
|
|
54
|
+
|
|
55
|
+
class MockEvalContext(EvalContext):
|
|
56
|
+
"""Mock EvalContext for testing - inherits from real EvalContext."""
|
|
57
|
+
|
|
58
|
+
def __init__(self, prompt: str = "Test prompt", tools: list[types.Tool] | None = None) -> None:
|
|
59
|
+
# Core attributes
|
|
60
|
+
self.prompt = prompt
|
|
61
|
+
self._tools = tools or [types.Tool(name="test_tool", description="Test", inputSchema={})]
|
|
62
|
+
self._submitted: str | None = None
|
|
63
|
+
self.reward: float | None = None
|
|
64
|
+
self._initialized = True
|
|
65
|
+
|
|
66
|
+
# Environment attributes
|
|
67
|
+
self._router = ToolRouter()
|
|
68
|
+
self._agent_include: list[str] | None = None
|
|
69
|
+
self._agent_exclude: list[str] | None = None
|
|
70
|
+
|
|
71
|
+
# EvalContext attributes
|
|
72
|
+
self._task = None
|
|
73
|
+
self.trace_id = "test-trace-id"
|
|
74
|
+
self.eval_name = "test-eval"
|
|
75
|
+
self.job_id: str | None = None
|
|
76
|
+
self.group_id: str | None = None
|
|
77
|
+
self.index = 0
|
|
78
|
+
self.variants: dict[str, Any] = {}
|
|
79
|
+
self.answer: str | None = None
|
|
80
|
+
self.system_prompt: str | None = None
|
|
81
|
+
self.error: BaseException | None = None
|
|
82
|
+
self.metadata: dict[str, Any] = {}
|
|
83
|
+
self.results: list[Any] = []
|
|
84
|
+
self._is_summary = False
|
|
85
|
+
|
|
86
|
+
def as_tools(self) -> list[types.Tool]:
|
|
87
|
+
return self._tools
|
|
88
|
+
|
|
89
|
+
@property
|
|
90
|
+
def has_scenario(self) -> bool:
|
|
91
|
+
return True
|
|
92
|
+
|
|
93
|
+
async def list_tools(self) -> list[types.Tool]:
|
|
94
|
+
return self._tools
|
|
95
|
+
|
|
96
|
+
async def call_tool(self, call: Any, /, **kwargs: Any) -> MCPToolResult:
|
|
97
|
+
# Handle tuple format (name, args)
|
|
98
|
+
if isinstance(call, tuple):
|
|
99
|
+
name = call[0]
|
|
100
|
+
elif hasattr(call, "name"):
|
|
101
|
+
name = call.name
|
|
102
|
+
else:
|
|
103
|
+
name = str(call)
|
|
104
|
+
return MCPToolResult(
|
|
105
|
+
content=[types.TextContent(type="text", text=f"Result from {name}")],
|
|
106
|
+
isError=False,
|
|
107
|
+
)
|
|
108
|
+
|
|
109
|
+
async def submit(self, answer: str) -> None:
|
|
110
|
+
self._submitted = answer
|
|
111
|
+
|
|
112
|
+
|
|
113
|
+
class TestRun:
|
|
114
|
+
"""Tests for MCPAgent.run() with EvalContext."""
|
|
115
|
+
|
|
116
|
+
@pytest.mark.asyncio
|
|
117
|
+
async def test_run_basic(self) -> None:
|
|
118
|
+
"""Test basic run() flow."""
|
|
119
|
+
ctx = MockEvalContext(prompt="Do the task")
|
|
120
|
+
agent = MockMCPAgent()
|
|
121
|
+
|
|
122
|
+
result = await agent.run(ctx)
|
|
123
|
+
|
|
124
|
+
assert result.done
|
|
125
|
+
assert result.content == "Test response"
|
|
126
|
+
assert ctx._submitted == "Test response"
|
|
127
|
+
|
|
128
|
+
@pytest.mark.asyncio
|
|
129
|
+
async def test_run_no_prompt_raises(self) -> None:
|
|
130
|
+
"""Test run() raises when prompt is not set."""
|
|
131
|
+
ctx = MockEvalContext(prompt="")
|
|
132
|
+
agent = MockMCPAgent()
|
|
133
|
+
|
|
134
|
+
with pytest.raises(ValueError, match="prompt is not set"):
|
|
135
|
+
await agent.run(ctx)
|
|
136
|
+
|
|
137
|
+
@pytest.mark.asyncio
|
|
138
|
+
async def test_run_wrong_type_raises(self) -> None:
|
|
139
|
+
"""Test run() raises TypeError for non-EvalContext."""
|
|
140
|
+
agent = MockMCPAgent()
|
|
141
|
+
|
|
142
|
+
with pytest.raises(TypeError, match="must be EvalContext"):
|
|
143
|
+
await agent.run("not an eval context") # type: ignore[arg-type]
|
|
144
|
+
|
|
145
|
+
@pytest.mark.asyncio
|
|
146
|
+
async def test_run_clears_ctx(self) -> None:
|
|
147
|
+
"""Test run() clears ctx after completion."""
|
|
148
|
+
ctx = MockEvalContext(prompt="Do the task")
|
|
149
|
+
agent = MockMCPAgent()
|
|
150
|
+
|
|
151
|
+
await agent.run(ctx)
|
|
152
|
+
assert agent.ctx is None
|
|
153
|
+
|
|
154
|
+
@pytest.mark.asyncio
|
|
155
|
+
async def test_run_no_submit_on_empty_content(self) -> None:
|
|
156
|
+
"""Test run() doesn't submit when content is empty."""
|
|
157
|
+
ctx = MockEvalContext(prompt="Do the task")
|
|
158
|
+
agent = MockMCPAgent()
|
|
159
|
+
agent.set_response(AgentResponse(content="", tool_calls=[], done=True))
|
|
160
|
+
|
|
161
|
+
await agent.run(ctx)
|
|
162
|
+
assert ctx._submitted is None
|
|
163
|
+
|
|
164
|
+
@pytest.mark.asyncio
|
|
165
|
+
async def test_run_initializes_tools(self) -> None:
|
|
166
|
+
"""Test run() initializes tools from context."""
|
|
167
|
+
ctx = MockEvalContext(
|
|
168
|
+
prompt="Do the task",
|
|
169
|
+
tools=[
|
|
170
|
+
types.Tool(name="tool1", description="Tool 1", inputSchema={}),
|
|
171
|
+
types.Tool(name="tool2", description="Tool 2", inputSchema={}),
|
|
172
|
+
],
|
|
173
|
+
)
|
|
174
|
+
agent = MockMCPAgent()
|
|
175
|
+
|
|
176
|
+
await agent.run(ctx)
|
|
177
|
+
|
|
178
|
+
assert agent._initialized
|
|
179
|
+
# After cleanup, ctx is None but tools were discovered
|
hud/agents/types.py
ADDED
|
@@ -0,0 +1,148 @@
|
|
|
1
|
+
"""Agent configuration types.
|
|
2
|
+
|
|
3
|
+
Config classes are defined here separately from agent implementations
|
|
4
|
+
to allow importing them without requiring SDK dependencies (anthropic, google-genai).
|
|
5
|
+
"""
|
|
6
|
+
|
|
7
|
+
from __future__ import annotations
|
|
8
|
+
|
|
9
|
+
from typing import Any, Literal
|
|
10
|
+
|
|
11
|
+
from pydantic import AliasChoices, BaseModel, ConfigDict, Field
|
|
12
|
+
|
|
13
|
+
from hud.types import BaseAgentConfig
|
|
14
|
+
|
|
15
|
+
# Alias to accept both 'model' and 'checkpoint_name' (backwards compat)
|
|
16
|
+
_model_alias = AliasChoices("model", "checkpoint_name")
|
|
17
|
+
|
|
18
|
+
|
|
19
|
+
class BaseCreateParams(BaseModel):
|
|
20
|
+
"""Runtime parameters for agent creation."""
|
|
21
|
+
|
|
22
|
+
model_config = ConfigDict(arbitrary_types_allowed=True)
|
|
23
|
+
|
|
24
|
+
ctx: Any = None # EvalContext or Environment
|
|
25
|
+
auto_respond: bool = False
|
|
26
|
+
verbose: bool = False
|
|
27
|
+
|
|
28
|
+
|
|
29
|
+
# -----------------------------------------------------------------------------
|
|
30
|
+
# Claude
|
|
31
|
+
# -----------------------------------------------------------------------------
|
|
32
|
+
|
|
33
|
+
|
|
34
|
+
class ClaudeConfig(BaseAgentConfig):
|
|
35
|
+
model_config = ConfigDict(arbitrary_types_allowed=True)
|
|
36
|
+
|
|
37
|
+
model_name: str = "Claude"
|
|
38
|
+
model: str = Field(default="claude-sonnet-4-5", validation_alias=_model_alias)
|
|
39
|
+
model_client: Any = None # AsyncAnthropic | AsyncAnthropicBedrock
|
|
40
|
+
max_tokens: int = 16384
|
|
41
|
+
use_computer_beta: bool = True
|
|
42
|
+
validate_api_key: bool = True
|
|
43
|
+
|
|
44
|
+
|
|
45
|
+
class ClaudeCreateParams(BaseCreateParams, ClaudeConfig):
|
|
46
|
+
pass
|
|
47
|
+
|
|
48
|
+
|
|
49
|
+
# -----------------------------------------------------------------------------
|
|
50
|
+
# Gemini
|
|
51
|
+
# -----------------------------------------------------------------------------
|
|
52
|
+
|
|
53
|
+
|
|
54
|
+
class GeminiConfig(BaseAgentConfig):
|
|
55
|
+
"""Configuration for GeminiAgent."""
|
|
56
|
+
|
|
57
|
+
model_config = ConfigDict(arbitrary_types_allowed=True)
|
|
58
|
+
|
|
59
|
+
model_name: str = "Gemini"
|
|
60
|
+
model: str = Field(default="gemini-3-pro-preview", validation_alias=_model_alias)
|
|
61
|
+
model_client: Any = None # genai.Client
|
|
62
|
+
temperature: float = 1.0
|
|
63
|
+
top_p: float = 0.95
|
|
64
|
+
top_k: int = 40
|
|
65
|
+
max_output_tokens: int = 8192
|
|
66
|
+
validate_api_key: bool = True
|
|
67
|
+
|
|
68
|
+
|
|
69
|
+
class GeminiCreateParams(BaseCreateParams, GeminiConfig):
|
|
70
|
+
pass
|
|
71
|
+
|
|
72
|
+
|
|
73
|
+
class GeminiCUAConfig(GeminiConfig):
|
|
74
|
+
"""Configuration for GeminiCUAAgent."""
|
|
75
|
+
|
|
76
|
+
model_config = ConfigDict(arbitrary_types_allowed=True)
|
|
77
|
+
|
|
78
|
+
model_name: str = "GeminiCUA"
|
|
79
|
+
model: str = Field(
|
|
80
|
+
default="gemini-2.5-computer-use-preview-10-2025", validation_alias=_model_alias
|
|
81
|
+
)
|
|
82
|
+
excluded_predefined_functions: list[str] = Field(default_factory=list)
|
|
83
|
+
|
|
84
|
+
|
|
85
|
+
class GeminiCUACreateParams(BaseCreateParams, GeminiCUAConfig):
|
|
86
|
+
pass
|
|
87
|
+
|
|
88
|
+
|
|
89
|
+
# -----------------------------------------------------------------------------
|
|
90
|
+
# OpenAI
|
|
91
|
+
# -----------------------------------------------------------------------------
|
|
92
|
+
|
|
93
|
+
|
|
94
|
+
class OpenAIConfig(BaseAgentConfig):
|
|
95
|
+
"""Configuration for OpenAIAgent."""
|
|
96
|
+
|
|
97
|
+
model_config = ConfigDict(arbitrary_types_allowed=True)
|
|
98
|
+
|
|
99
|
+
model_name: str = "OpenAI"
|
|
100
|
+
model: str = Field(default="gpt-5.1", validation_alias=_model_alias)
|
|
101
|
+
model_client: Any = None # AsyncOpenAI
|
|
102
|
+
max_output_tokens: int | None = None
|
|
103
|
+
temperature: float | None = None
|
|
104
|
+
reasoning: Any = None # openai Reasoning
|
|
105
|
+
tool_choice: Any = None # openai ToolChoice
|
|
106
|
+
truncation: Literal["auto", "disabled"] | None = None
|
|
107
|
+
parallel_tool_calls: bool | None = None
|
|
108
|
+
validate_api_key: bool = True
|
|
109
|
+
|
|
110
|
+
|
|
111
|
+
class OpenAICreateParams(BaseCreateParams, OpenAIConfig):
|
|
112
|
+
pass
|
|
113
|
+
|
|
114
|
+
|
|
115
|
+
class OpenAIChatConfig(BaseAgentConfig):
|
|
116
|
+
"""Configuration for OpenAIChatAgent."""
|
|
117
|
+
|
|
118
|
+
model_config = ConfigDict(arbitrary_types_allowed=True)
|
|
119
|
+
|
|
120
|
+
model_name: str = "OpenAI Chat"
|
|
121
|
+
model: str = Field(default="gpt-5-mini", validation_alias=_model_alias)
|
|
122
|
+
openai_client: Any = None # AsyncOpenAI
|
|
123
|
+
api_key: str | None = None
|
|
124
|
+
base_url: str | None = None
|
|
125
|
+
completion_kwargs: dict[str, Any] = Field(default_factory=dict)
|
|
126
|
+
|
|
127
|
+
|
|
128
|
+
class OpenAIChatCreateParams(BaseCreateParams, OpenAIChatConfig):
|
|
129
|
+
pass
|
|
130
|
+
|
|
131
|
+
|
|
132
|
+
# -----------------------------------------------------------------------------
|
|
133
|
+
# Operator
|
|
134
|
+
# -----------------------------------------------------------------------------
|
|
135
|
+
|
|
136
|
+
|
|
137
|
+
class OperatorConfig(OpenAIConfig):
|
|
138
|
+
"""Configuration for OperatorAgent."""
|
|
139
|
+
|
|
140
|
+
model_config = ConfigDict(arbitrary_types_allowed=True)
|
|
141
|
+
|
|
142
|
+
model_name: str = "Operator"
|
|
143
|
+
model: str = Field(default="computer-use-preview", validation_alias=_model_alias)
|
|
144
|
+
environment: Literal["windows", "mac", "linux", "ubuntu", "browser"] = "linux"
|
|
145
|
+
|
|
146
|
+
|
|
147
|
+
class OperatorCreateParams(BaseCreateParams, OperatorConfig):
|
|
148
|
+
pass
|