hud-python 0.4.45__py3-none-any.whl → 0.5.13__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (282) hide show
  1. hud/__init__.py +27 -7
  2. hud/agents/__init__.py +70 -5
  3. hud/agents/base.py +238 -500
  4. hud/agents/claude.py +236 -247
  5. hud/agents/gateway.py +42 -0
  6. hud/agents/gemini.py +264 -0
  7. hud/agents/gemini_cua.py +324 -0
  8. hud/agents/grounded_openai.py +98 -100
  9. hud/agents/misc/integration_test_agent.py +51 -20
  10. hud/agents/misc/response_agent.py +48 -36
  11. hud/agents/openai.py +282 -296
  12. hud/agents/{openai_chat_generic.py → openai_chat.py} +63 -33
  13. hud/agents/operator.py +199 -0
  14. hud/agents/resolver.py +70 -0
  15. hud/agents/tests/conftest.py +133 -0
  16. hud/agents/tests/test_base.py +300 -622
  17. hud/agents/tests/test_base_runtime.py +233 -0
  18. hud/agents/tests/test_claude.py +381 -214
  19. hud/agents/tests/test_client.py +9 -10
  20. hud/agents/tests/test_gemini.py +369 -0
  21. hud/agents/tests/test_grounded_openai_agent.py +65 -50
  22. hud/agents/tests/test_openai.py +377 -140
  23. hud/agents/tests/test_operator.py +362 -0
  24. hud/agents/tests/test_resolver.py +192 -0
  25. hud/agents/tests/test_run_eval.py +179 -0
  26. hud/agents/types.py +148 -0
  27. hud/cli/__init__.py +493 -546
  28. hud/cli/analyze.py +43 -5
  29. hud/cli/build.py +699 -113
  30. hud/cli/debug.py +8 -5
  31. hud/cli/dev.py +889 -732
  32. hud/cli/eval.py +793 -667
  33. hud/cli/flows/dev.py +167 -0
  34. hud/cli/flows/init.py +191 -0
  35. hud/cli/flows/tasks.py +153 -56
  36. hud/cli/flows/templates.py +151 -0
  37. hud/cli/flows/tests/__init__.py +1 -0
  38. hud/cli/flows/tests/test_dev.py +126 -0
  39. hud/cli/init.py +60 -58
  40. hud/cli/pull.py +1 -1
  41. hud/cli/push.py +38 -13
  42. hud/cli/rft.py +311 -0
  43. hud/cli/rft_status.py +145 -0
  44. hud/cli/tests/test_analyze.py +5 -5
  45. hud/cli/tests/test_analyze_metadata.py +3 -2
  46. hud/cli/tests/test_analyze_module.py +120 -0
  47. hud/cli/tests/test_build.py +110 -8
  48. hud/cli/tests/test_build_failure.py +41 -0
  49. hud/cli/tests/test_build_module.py +50 -0
  50. hud/cli/tests/test_cli_init.py +6 -1
  51. hud/cli/tests/test_cli_more_wrappers.py +30 -0
  52. hud/cli/tests/test_cli_root.py +140 -0
  53. hud/cli/tests/test_convert.py +361 -0
  54. hud/cli/tests/test_debug.py +12 -10
  55. hud/cli/tests/test_dev.py +197 -0
  56. hud/cli/tests/test_eval.py +251 -0
  57. hud/cli/tests/test_eval_bedrock.py +51 -0
  58. hud/cli/tests/test_init.py +124 -0
  59. hud/cli/tests/test_main_module.py +11 -5
  60. hud/cli/tests/test_mcp_server.py +12 -100
  61. hud/cli/tests/test_push.py +1 -1
  62. hud/cli/tests/test_push_happy.py +74 -0
  63. hud/cli/tests/test_push_wrapper.py +23 -0
  64. hud/cli/tests/test_registry.py +1 -1
  65. hud/cli/tests/test_utils.py +1 -1
  66. hud/cli/{rl → utils}/celebrate.py +14 -12
  67. hud/cli/utils/config.py +18 -1
  68. hud/cli/utils/docker.py +130 -4
  69. hud/cli/utils/env_check.py +9 -9
  70. hud/cli/utils/git.py +136 -0
  71. hud/cli/utils/interactive.py +39 -5
  72. hud/cli/utils/metadata.py +70 -1
  73. hud/cli/utils/runner.py +1 -1
  74. hud/cli/utils/server.py +2 -2
  75. hud/cli/utils/source_hash.py +3 -3
  76. hud/cli/utils/tasks.py +4 -1
  77. hud/cli/utils/tests/__init__.py +0 -0
  78. hud/cli/utils/tests/test_config.py +58 -0
  79. hud/cli/utils/tests/test_docker.py +93 -0
  80. hud/cli/utils/tests/test_docker_hints.py +71 -0
  81. hud/cli/utils/tests/test_env_check.py +74 -0
  82. hud/cli/utils/tests/test_environment.py +42 -0
  83. hud/cli/utils/tests/test_git.py +142 -0
  84. hud/cli/utils/tests/test_interactive_module.py +60 -0
  85. hud/cli/utils/tests/test_local_runner.py +50 -0
  86. hud/cli/utils/tests/test_logging_utils.py +23 -0
  87. hud/cli/utils/tests/test_metadata.py +49 -0
  88. hud/cli/utils/tests/test_package_runner.py +35 -0
  89. hud/cli/utils/tests/test_registry_utils.py +49 -0
  90. hud/cli/utils/tests/test_remote_runner.py +25 -0
  91. hud/cli/utils/tests/test_runner_modules.py +52 -0
  92. hud/cli/utils/tests/test_source_hash.py +36 -0
  93. hud/cli/utils/tests/test_tasks.py +80 -0
  94. hud/cli/utils/version_check.py +258 -0
  95. hud/cli/{rl → utils}/viewer.py +2 -2
  96. hud/clients/README.md +12 -11
  97. hud/clients/__init__.py +4 -3
  98. hud/clients/base.py +166 -26
  99. hud/clients/environment.py +51 -0
  100. hud/clients/fastmcp.py +13 -6
  101. hud/clients/mcp_use.py +45 -15
  102. hud/clients/tests/test_analyze_scenarios.py +206 -0
  103. hud/clients/tests/test_protocol.py +9 -3
  104. hud/datasets/__init__.py +23 -20
  105. hud/datasets/loader.py +326 -0
  106. hud/datasets/runner.py +198 -105
  107. hud/datasets/tests/__init__.py +0 -0
  108. hud/datasets/tests/test_loader.py +221 -0
  109. hud/datasets/tests/test_utils.py +315 -0
  110. hud/datasets/utils.py +270 -90
  111. hud/environment/__init__.py +52 -0
  112. hud/environment/connection.py +258 -0
  113. hud/environment/connectors/__init__.py +33 -0
  114. hud/environment/connectors/base.py +68 -0
  115. hud/environment/connectors/local.py +177 -0
  116. hud/environment/connectors/mcp_config.py +137 -0
  117. hud/environment/connectors/openai.py +101 -0
  118. hud/environment/connectors/remote.py +172 -0
  119. hud/environment/environment.py +835 -0
  120. hud/environment/integrations/__init__.py +45 -0
  121. hud/environment/integrations/adk.py +67 -0
  122. hud/environment/integrations/anthropic.py +196 -0
  123. hud/environment/integrations/gemini.py +92 -0
  124. hud/environment/integrations/langchain.py +82 -0
  125. hud/environment/integrations/llamaindex.py +68 -0
  126. hud/environment/integrations/openai.py +238 -0
  127. hud/environment/mock.py +306 -0
  128. hud/environment/router.py +263 -0
  129. hud/environment/scenarios.py +620 -0
  130. hud/environment/tests/__init__.py +1 -0
  131. hud/environment/tests/test_connection.py +317 -0
  132. hud/environment/tests/test_connectors.py +205 -0
  133. hud/environment/tests/test_environment.py +593 -0
  134. hud/environment/tests/test_integrations.py +257 -0
  135. hud/environment/tests/test_local_connectors.py +242 -0
  136. hud/environment/tests/test_scenarios.py +1086 -0
  137. hud/environment/tests/test_tools.py +208 -0
  138. hud/environment/types.py +23 -0
  139. hud/environment/utils/__init__.py +35 -0
  140. hud/environment/utils/formats.py +215 -0
  141. hud/environment/utils/schema.py +171 -0
  142. hud/environment/utils/tool_wrappers.py +113 -0
  143. hud/eval/__init__.py +67 -0
  144. hud/eval/context.py +727 -0
  145. hud/eval/display.py +299 -0
  146. hud/eval/instrument.py +187 -0
  147. hud/eval/manager.py +533 -0
  148. hud/eval/parallel.py +268 -0
  149. hud/eval/task.py +372 -0
  150. hud/eval/tests/__init__.py +1 -0
  151. hud/eval/tests/test_context.py +178 -0
  152. hud/eval/tests/test_eval.py +210 -0
  153. hud/eval/tests/test_manager.py +152 -0
  154. hud/eval/tests/test_parallel.py +168 -0
  155. hud/eval/tests/test_task.py +291 -0
  156. hud/eval/types.py +65 -0
  157. hud/eval/utils.py +194 -0
  158. hud/patches/__init__.py +19 -0
  159. hud/patches/mcp_patches.py +308 -0
  160. hud/patches/warnings.py +54 -0
  161. hud/samples/browser.py +4 -4
  162. hud/server/__init__.py +2 -1
  163. hud/server/low_level.py +2 -1
  164. hud/server/router.py +164 -0
  165. hud/server/server.py +567 -80
  166. hud/server/tests/test_mcp_server_integration.py +11 -11
  167. hud/server/tests/test_mcp_server_more.py +1 -1
  168. hud/server/tests/test_server_extra.py +2 -0
  169. hud/settings.py +45 -3
  170. hud/shared/exceptions.py +36 -10
  171. hud/shared/hints.py +26 -1
  172. hud/shared/requests.py +15 -3
  173. hud/shared/tests/test_exceptions.py +40 -31
  174. hud/shared/tests/test_hints.py +167 -0
  175. hud/telemetry/__init__.py +20 -19
  176. hud/telemetry/exporter.py +201 -0
  177. hud/telemetry/instrument.py +165 -253
  178. hud/telemetry/tests/test_eval_telemetry.py +356 -0
  179. hud/telemetry/tests/test_exporter.py +258 -0
  180. hud/telemetry/tests/test_instrument.py +401 -0
  181. hud/tools/__init__.py +18 -2
  182. hud/tools/agent.py +223 -0
  183. hud/tools/apply_patch.py +639 -0
  184. hud/tools/base.py +54 -4
  185. hud/tools/bash.py +2 -2
  186. hud/tools/computer/__init__.py +36 -3
  187. hud/tools/computer/anthropic.py +2 -2
  188. hud/tools/computer/gemini.py +385 -0
  189. hud/tools/computer/hud.py +23 -6
  190. hud/tools/computer/openai.py +20 -21
  191. hud/tools/computer/qwen.py +434 -0
  192. hud/tools/computer/settings.py +37 -0
  193. hud/tools/edit.py +3 -7
  194. hud/tools/executors/base.py +4 -2
  195. hud/tools/executors/pyautogui.py +1 -1
  196. hud/tools/grounding/grounded_tool.py +13 -18
  197. hud/tools/grounding/grounder.py +10 -31
  198. hud/tools/grounding/tests/test_grounded_tool.py +26 -44
  199. hud/tools/jupyter.py +330 -0
  200. hud/tools/playwright.py +18 -3
  201. hud/tools/shell.py +308 -0
  202. hud/tools/tests/test_agent_tool.py +355 -0
  203. hud/tools/tests/test_apply_patch.py +718 -0
  204. hud/tools/tests/test_computer.py +4 -9
  205. hud/tools/tests/test_computer_actions.py +24 -2
  206. hud/tools/tests/test_jupyter_tool.py +181 -0
  207. hud/tools/tests/test_shell.py +596 -0
  208. hud/tools/tests/test_submit.py +85 -0
  209. hud/tools/tests/test_types.py +193 -0
  210. hud/tools/types.py +21 -1
  211. hud/types.py +194 -56
  212. hud/utils/__init__.py +2 -0
  213. hud/utils/env.py +67 -0
  214. hud/utils/hud_console.py +89 -18
  215. hud/utils/mcp.py +15 -58
  216. hud/utils/strict_schema.py +162 -0
  217. hud/utils/tests/test_init.py +1 -2
  218. hud/utils/tests/test_mcp.py +1 -28
  219. hud/utils/tests/test_pretty_errors.py +186 -0
  220. hud/utils/tests/test_tool_shorthand.py +154 -0
  221. hud/utils/tests/test_version.py +1 -1
  222. hud/utils/types.py +20 -0
  223. hud/version.py +1 -1
  224. hud_python-0.5.13.dist-info/METADATA +264 -0
  225. hud_python-0.5.13.dist-info/RECORD +305 -0
  226. {hud_python-0.4.45.dist-info → hud_python-0.5.13.dist-info}/WHEEL +1 -1
  227. hud/agents/langchain.py +0 -261
  228. hud/agents/lite_llm.py +0 -72
  229. hud/cli/rl/__init__.py +0 -180
  230. hud/cli/rl/config.py +0 -101
  231. hud/cli/rl/display.py +0 -133
  232. hud/cli/rl/gpu.py +0 -63
  233. hud/cli/rl/gpu_utils.py +0 -321
  234. hud/cli/rl/local_runner.py +0 -595
  235. hud/cli/rl/presets.py +0 -96
  236. hud/cli/rl/remote_runner.py +0 -463
  237. hud/cli/rl/rl_api.py +0 -150
  238. hud/cli/rl/vllm.py +0 -177
  239. hud/cli/rl/wait_utils.py +0 -89
  240. hud/datasets/parallel.py +0 -687
  241. hud/misc/__init__.py +0 -1
  242. hud/misc/claude_plays_pokemon.py +0 -292
  243. hud/otel/__init__.py +0 -35
  244. hud/otel/collector.py +0 -142
  245. hud/otel/config.py +0 -181
  246. hud/otel/context.py +0 -570
  247. hud/otel/exporters.py +0 -369
  248. hud/otel/instrumentation.py +0 -135
  249. hud/otel/processors.py +0 -121
  250. hud/otel/tests/__init__.py +0 -1
  251. hud/otel/tests/test_processors.py +0 -197
  252. hud/rl/README.md +0 -30
  253. hud/rl/__init__.py +0 -1
  254. hud/rl/actor.py +0 -176
  255. hud/rl/buffer.py +0 -405
  256. hud/rl/chat_template.jinja +0 -101
  257. hud/rl/config.py +0 -192
  258. hud/rl/distributed.py +0 -132
  259. hud/rl/learner.py +0 -637
  260. hud/rl/tests/__init__.py +0 -1
  261. hud/rl/tests/test_learner.py +0 -186
  262. hud/rl/train.py +0 -382
  263. hud/rl/types.py +0 -101
  264. hud/rl/utils/start_vllm_server.sh +0 -30
  265. hud/rl/utils.py +0 -524
  266. hud/rl/vllm_adapter.py +0 -143
  267. hud/telemetry/job.py +0 -352
  268. hud/telemetry/replay.py +0 -74
  269. hud/telemetry/tests/test_replay.py +0 -40
  270. hud/telemetry/tests/test_trace.py +0 -63
  271. hud/telemetry/trace.py +0 -158
  272. hud/utils/agent_factories.py +0 -86
  273. hud/utils/async_utils.py +0 -65
  274. hud/utils/group_eval.py +0 -223
  275. hud/utils/progress.py +0 -149
  276. hud/utils/tasks.py +0 -127
  277. hud/utils/tests/test_async_utils.py +0 -173
  278. hud/utils/tests/test_progress.py +0 -261
  279. hud_python-0.4.45.dist-info/METADATA +0 -552
  280. hud_python-0.4.45.dist-info/RECORD +0 -228
  281. {hud_python-0.4.45.dist-info → hud_python-0.5.13.dist-info}/entry_points.txt +0 -0
  282. {hud_python-0.4.45.dist-info → hud_python-0.5.13.dist-info}/licenses/LICENSE +0 -0
@@ -0,0 +1,179 @@
1
+ """Tests for MCPAgent.run() with EvalContext."""
2
+
3
+ from __future__ import annotations
4
+
5
+ from typing import Any, ClassVar
6
+
7
+ import pytest
8
+ from mcp import types
9
+
10
+ from hud.agents import MCPAgent
11
+ from hud.agents.base import BaseCreateParams
12
+ from hud.environment.router import ToolRouter
13
+ from hud.eval.context import EvalContext
14
+ from hud.types import AgentResponse, BaseAgentConfig, MCPToolCall, MCPToolResult
15
+
16
+
17
+ class MockConfig(BaseAgentConfig):
18
+ model_name: str = "MockAgent"
19
+ model: str = "mock-model"
20
+
21
+
22
+ class MockCreateParams(BaseCreateParams, MockConfig):
23
+ pass
24
+
25
+
26
+ class MockMCPAgent(MCPAgent):
27
+ """Mock agent for testing run()."""
28
+
29
+ metadata: ClassVar[dict[str, Any] | None] = {}
30
+ config_cls: ClassVar[type[BaseAgentConfig]] = MockConfig
31
+
32
+ def __init__(self, **kwargs: Any) -> None:
33
+ params = MockCreateParams(**kwargs)
34
+ super().__init__(params)
35
+ self._response = AgentResponse(content="Test response", tool_calls=[], done=True)
36
+
37
+ def set_response(self, response: AgentResponse) -> None:
38
+ self._response = response
39
+
40
+ async def get_response(self, messages: list[dict[str, Any]]) -> AgentResponse:
41
+ return self._response
42
+
43
+ async def format_tool_results(
44
+ self, tool_calls: list[MCPToolCall], tool_results: list[MCPToolResult]
45
+ ) -> list[dict[str, Any]]:
46
+ return [{"role": "tool", "content": str(r)} for r in tool_results]
47
+
48
+ async def get_system_messages(self) -> list[Any]:
49
+ return []
50
+
51
+ async def format_blocks(self, blocks: list[types.ContentBlock]) -> list[Any]:
52
+ return [{"type": "text", "text": getattr(b, "text")} for b in blocks if hasattr(b, "text")]
53
+
54
+
55
+ class MockEvalContext(EvalContext):
56
+ """Mock EvalContext for testing - inherits from real EvalContext."""
57
+
58
+ def __init__(self, prompt: str = "Test prompt", tools: list[types.Tool] | None = None) -> None:
59
+ # Core attributes
60
+ self.prompt = prompt
61
+ self._tools = tools or [types.Tool(name="test_tool", description="Test", inputSchema={})]
62
+ self._submitted: str | None = None
63
+ self.reward: float | None = None
64
+ self._initialized = True
65
+
66
+ # Environment attributes
67
+ self._router = ToolRouter()
68
+ self._agent_include: list[str] | None = None
69
+ self._agent_exclude: list[str] | None = None
70
+
71
+ # EvalContext attributes
72
+ self._task = None
73
+ self.trace_id = "test-trace-id"
74
+ self.eval_name = "test-eval"
75
+ self.job_id: str | None = None
76
+ self.group_id: str | None = None
77
+ self.index = 0
78
+ self.variants: dict[str, Any] = {}
79
+ self.answer: str | None = None
80
+ self.system_prompt: str | None = None
81
+ self.error: BaseException | None = None
82
+ self.metadata: dict[str, Any] = {}
83
+ self.results: list[Any] = []
84
+ self._is_summary = False
85
+
86
+ def as_tools(self) -> list[types.Tool]:
87
+ return self._tools
88
+
89
+ @property
90
+ def has_scenario(self) -> bool:
91
+ return True
92
+
93
+ async def list_tools(self) -> list[types.Tool]:
94
+ return self._tools
95
+
96
+ async def call_tool(self, call: Any, /, **kwargs: Any) -> MCPToolResult:
97
+ # Handle tuple format (name, args)
98
+ if isinstance(call, tuple):
99
+ name = call[0]
100
+ elif hasattr(call, "name"):
101
+ name = call.name
102
+ else:
103
+ name = str(call)
104
+ return MCPToolResult(
105
+ content=[types.TextContent(type="text", text=f"Result from {name}")],
106
+ isError=False,
107
+ )
108
+
109
+ async def submit(self, answer: str) -> None:
110
+ self._submitted = answer
111
+
112
+
113
+ class TestRun:
114
+ """Tests for MCPAgent.run() with EvalContext."""
115
+
116
+ @pytest.mark.asyncio
117
+ async def test_run_basic(self) -> None:
118
+ """Test basic run() flow."""
119
+ ctx = MockEvalContext(prompt="Do the task")
120
+ agent = MockMCPAgent()
121
+
122
+ result = await agent.run(ctx)
123
+
124
+ assert result.done
125
+ assert result.content == "Test response"
126
+ assert ctx._submitted == "Test response"
127
+
128
+ @pytest.mark.asyncio
129
+ async def test_run_no_prompt_raises(self) -> None:
130
+ """Test run() raises when prompt is not set."""
131
+ ctx = MockEvalContext(prompt="")
132
+ agent = MockMCPAgent()
133
+
134
+ with pytest.raises(ValueError, match="prompt is not set"):
135
+ await agent.run(ctx)
136
+
137
+ @pytest.mark.asyncio
138
+ async def test_run_wrong_type_raises(self) -> None:
139
+ """Test run() raises TypeError for non-EvalContext."""
140
+ agent = MockMCPAgent()
141
+
142
+ with pytest.raises(TypeError, match="must be EvalContext"):
143
+ await agent.run("not an eval context") # type: ignore[arg-type]
144
+
145
+ @pytest.mark.asyncio
146
+ async def test_run_clears_ctx(self) -> None:
147
+ """Test run() clears ctx after completion."""
148
+ ctx = MockEvalContext(prompt="Do the task")
149
+ agent = MockMCPAgent()
150
+
151
+ await agent.run(ctx)
152
+ assert agent.ctx is None
153
+
154
+ @pytest.mark.asyncio
155
+ async def test_run_no_submit_on_empty_content(self) -> None:
156
+ """Test run() doesn't submit when content is empty."""
157
+ ctx = MockEvalContext(prompt="Do the task")
158
+ agent = MockMCPAgent()
159
+ agent.set_response(AgentResponse(content="", tool_calls=[], done=True))
160
+
161
+ await agent.run(ctx)
162
+ assert ctx._submitted is None
163
+
164
+ @pytest.mark.asyncio
165
+ async def test_run_initializes_tools(self) -> None:
166
+ """Test run() initializes tools from context."""
167
+ ctx = MockEvalContext(
168
+ prompt="Do the task",
169
+ tools=[
170
+ types.Tool(name="tool1", description="Tool 1", inputSchema={}),
171
+ types.Tool(name="tool2", description="Tool 2", inputSchema={}),
172
+ ],
173
+ )
174
+ agent = MockMCPAgent()
175
+
176
+ await agent.run(ctx)
177
+
178
+ assert agent._initialized
179
+ # After cleanup, ctx is None but tools were discovered
hud/agents/types.py ADDED
@@ -0,0 +1,148 @@
1
+ """Agent configuration types.
2
+
3
+ Config classes are defined here separately from agent implementations
4
+ to allow importing them without requiring SDK dependencies (anthropic, google-genai).
5
+ """
6
+
7
+ from __future__ import annotations
8
+
9
+ from typing import Any, Literal
10
+
11
+ from pydantic import AliasChoices, BaseModel, ConfigDict, Field
12
+
13
+ from hud.types import BaseAgentConfig
14
+
15
+ # Alias to accept both 'model' and 'checkpoint_name' (backwards compat)
16
+ _model_alias = AliasChoices("model", "checkpoint_name")
17
+
18
+
19
+ class BaseCreateParams(BaseModel):
20
+ """Runtime parameters for agent creation."""
21
+
22
+ model_config = ConfigDict(arbitrary_types_allowed=True)
23
+
24
+ ctx: Any = None # EvalContext or Environment
25
+ auto_respond: bool = False
26
+ verbose: bool = False
27
+
28
+
29
+ # -----------------------------------------------------------------------------
30
+ # Claude
31
+ # -----------------------------------------------------------------------------
32
+
33
+
34
+ class ClaudeConfig(BaseAgentConfig):
35
+ model_config = ConfigDict(arbitrary_types_allowed=True)
36
+
37
+ model_name: str = "Claude"
38
+ model: str = Field(default="claude-sonnet-4-5", validation_alias=_model_alias)
39
+ model_client: Any = None # AsyncAnthropic | AsyncAnthropicBedrock
40
+ max_tokens: int = 16384
41
+ use_computer_beta: bool = True
42
+ validate_api_key: bool = True
43
+
44
+
45
+ class ClaudeCreateParams(BaseCreateParams, ClaudeConfig):
46
+ pass
47
+
48
+
49
+ # -----------------------------------------------------------------------------
50
+ # Gemini
51
+ # -----------------------------------------------------------------------------
52
+
53
+
54
+ class GeminiConfig(BaseAgentConfig):
55
+ """Configuration for GeminiAgent."""
56
+
57
+ model_config = ConfigDict(arbitrary_types_allowed=True)
58
+
59
+ model_name: str = "Gemini"
60
+ model: str = Field(default="gemini-3-pro-preview", validation_alias=_model_alias)
61
+ model_client: Any = None # genai.Client
62
+ temperature: float = 1.0
63
+ top_p: float = 0.95
64
+ top_k: int = 40
65
+ max_output_tokens: int = 8192
66
+ validate_api_key: bool = True
67
+
68
+
69
+ class GeminiCreateParams(BaseCreateParams, GeminiConfig):
70
+ pass
71
+
72
+
73
+ class GeminiCUAConfig(GeminiConfig):
74
+ """Configuration for GeminiCUAAgent."""
75
+
76
+ model_config = ConfigDict(arbitrary_types_allowed=True)
77
+
78
+ model_name: str = "GeminiCUA"
79
+ model: str = Field(
80
+ default="gemini-2.5-computer-use-preview-10-2025", validation_alias=_model_alias
81
+ )
82
+ excluded_predefined_functions: list[str] = Field(default_factory=list)
83
+
84
+
85
+ class GeminiCUACreateParams(BaseCreateParams, GeminiCUAConfig):
86
+ pass
87
+
88
+
89
+ # -----------------------------------------------------------------------------
90
+ # OpenAI
91
+ # -----------------------------------------------------------------------------
92
+
93
+
94
+ class OpenAIConfig(BaseAgentConfig):
95
+ """Configuration for OpenAIAgent."""
96
+
97
+ model_config = ConfigDict(arbitrary_types_allowed=True)
98
+
99
+ model_name: str = "OpenAI"
100
+ model: str = Field(default="gpt-5.1", validation_alias=_model_alias)
101
+ model_client: Any = None # AsyncOpenAI
102
+ max_output_tokens: int | None = None
103
+ temperature: float | None = None
104
+ reasoning: Any = None # openai Reasoning
105
+ tool_choice: Any = None # openai ToolChoice
106
+ truncation: Literal["auto", "disabled"] | None = None
107
+ parallel_tool_calls: bool | None = None
108
+ validate_api_key: bool = True
109
+
110
+
111
+ class OpenAICreateParams(BaseCreateParams, OpenAIConfig):
112
+ pass
113
+
114
+
115
+ class OpenAIChatConfig(BaseAgentConfig):
116
+ """Configuration for OpenAIChatAgent."""
117
+
118
+ model_config = ConfigDict(arbitrary_types_allowed=True)
119
+
120
+ model_name: str = "OpenAI Chat"
121
+ model: str = Field(default="gpt-5-mini", validation_alias=_model_alias)
122
+ openai_client: Any = None # AsyncOpenAI
123
+ api_key: str | None = None
124
+ base_url: str | None = None
125
+ completion_kwargs: dict[str, Any] = Field(default_factory=dict)
126
+
127
+
128
+ class OpenAIChatCreateParams(BaseCreateParams, OpenAIChatConfig):
129
+ pass
130
+
131
+
132
+ # -----------------------------------------------------------------------------
133
+ # Operator
134
+ # -----------------------------------------------------------------------------
135
+
136
+
137
+ class OperatorConfig(OpenAIConfig):
138
+ """Configuration for OperatorAgent."""
139
+
140
+ model_config = ConfigDict(arbitrary_types_allowed=True)
141
+
142
+ model_name: str = "Operator"
143
+ model: str = Field(default="computer-use-preview", validation_alias=_model_alias)
144
+ environment: Literal["windows", "mac", "linux", "ubuntu", "browser"] = "linux"
145
+
146
+
147
+ class OperatorCreateParams(BaseCreateParams, OperatorConfig):
148
+ pass