hud-python 0.4.45__py3-none-any.whl → 0.5.13__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (282) hide show
  1. hud/__init__.py +27 -7
  2. hud/agents/__init__.py +70 -5
  3. hud/agents/base.py +238 -500
  4. hud/agents/claude.py +236 -247
  5. hud/agents/gateway.py +42 -0
  6. hud/agents/gemini.py +264 -0
  7. hud/agents/gemini_cua.py +324 -0
  8. hud/agents/grounded_openai.py +98 -100
  9. hud/agents/misc/integration_test_agent.py +51 -20
  10. hud/agents/misc/response_agent.py +48 -36
  11. hud/agents/openai.py +282 -296
  12. hud/agents/{openai_chat_generic.py → openai_chat.py} +63 -33
  13. hud/agents/operator.py +199 -0
  14. hud/agents/resolver.py +70 -0
  15. hud/agents/tests/conftest.py +133 -0
  16. hud/agents/tests/test_base.py +300 -622
  17. hud/agents/tests/test_base_runtime.py +233 -0
  18. hud/agents/tests/test_claude.py +381 -214
  19. hud/agents/tests/test_client.py +9 -10
  20. hud/agents/tests/test_gemini.py +369 -0
  21. hud/agents/tests/test_grounded_openai_agent.py +65 -50
  22. hud/agents/tests/test_openai.py +377 -140
  23. hud/agents/tests/test_operator.py +362 -0
  24. hud/agents/tests/test_resolver.py +192 -0
  25. hud/agents/tests/test_run_eval.py +179 -0
  26. hud/agents/types.py +148 -0
  27. hud/cli/__init__.py +493 -546
  28. hud/cli/analyze.py +43 -5
  29. hud/cli/build.py +699 -113
  30. hud/cli/debug.py +8 -5
  31. hud/cli/dev.py +889 -732
  32. hud/cli/eval.py +793 -667
  33. hud/cli/flows/dev.py +167 -0
  34. hud/cli/flows/init.py +191 -0
  35. hud/cli/flows/tasks.py +153 -56
  36. hud/cli/flows/templates.py +151 -0
  37. hud/cli/flows/tests/__init__.py +1 -0
  38. hud/cli/flows/tests/test_dev.py +126 -0
  39. hud/cli/init.py +60 -58
  40. hud/cli/pull.py +1 -1
  41. hud/cli/push.py +38 -13
  42. hud/cli/rft.py +311 -0
  43. hud/cli/rft_status.py +145 -0
  44. hud/cli/tests/test_analyze.py +5 -5
  45. hud/cli/tests/test_analyze_metadata.py +3 -2
  46. hud/cli/tests/test_analyze_module.py +120 -0
  47. hud/cli/tests/test_build.py +110 -8
  48. hud/cli/tests/test_build_failure.py +41 -0
  49. hud/cli/tests/test_build_module.py +50 -0
  50. hud/cli/tests/test_cli_init.py +6 -1
  51. hud/cli/tests/test_cli_more_wrappers.py +30 -0
  52. hud/cli/tests/test_cli_root.py +140 -0
  53. hud/cli/tests/test_convert.py +361 -0
  54. hud/cli/tests/test_debug.py +12 -10
  55. hud/cli/tests/test_dev.py +197 -0
  56. hud/cli/tests/test_eval.py +251 -0
  57. hud/cli/tests/test_eval_bedrock.py +51 -0
  58. hud/cli/tests/test_init.py +124 -0
  59. hud/cli/tests/test_main_module.py +11 -5
  60. hud/cli/tests/test_mcp_server.py +12 -100
  61. hud/cli/tests/test_push.py +1 -1
  62. hud/cli/tests/test_push_happy.py +74 -0
  63. hud/cli/tests/test_push_wrapper.py +23 -0
  64. hud/cli/tests/test_registry.py +1 -1
  65. hud/cli/tests/test_utils.py +1 -1
  66. hud/cli/{rl → utils}/celebrate.py +14 -12
  67. hud/cli/utils/config.py +18 -1
  68. hud/cli/utils/docker.py +130 -4
  69. hud/cli/utils/env_check.py +9 -9
  70. hud/cli/utils/git.py +136 -0
  71. hud/cli/utils/interactive.py +39 -5
  72. hud/cli/utils/metadata.py +70 -1
  73. hud/cli/utils/runner.py +1 -1
  74. hud/cli/utils/server.py +2 -2
  75. hud/cli/utils/source_hash.py +3 -3
  76. hud/cli/utils/tasks.py +4 -1
  77. hud/cli/utils/tests/__init__.py +0 -0
  78. hud/cli/utils/tests/test_config.py +58 -0
  79. hud/cli/utils/tests/test_docker.py +93 -0
  80. hud/cli/utils/tests/test_docker_hints.py +71 -0
  81. hud/cli/utils/tests/test_env_check.py +74 -0
  82. hud/cli/utils/tests/test_environment.py +42 -0
  83. hud/cli/utils/tests/test_git.py +142 -0
  84. hud/cli/utils/tests/test_interactive_module.py +60 -0
  85. hud/cli/utils/tests/test_local_runner.py +50 -0
  86. hud/cli/utils/tests/test_logging_utils.py +23 -0
  87. hud/cli/utils/tests/test_metadata.py +49 -0
  88. hud/cli/utils/tests/test_package_runner.py +35 -0
  89. hud/cli/utils/tests/test_registry_utils.py +49 -0
  90. hud/cli/utils/tests/test_remote_runner.py +25 -0
  91. hud/cli/utils/tests/test_runner_modules.py +52 -0
  92. hud/cli/utils/tests/test_source_hash.py +36 -0
  93. hud/cli/utils/tests/test_tasks.py +80 -0
  94. hud/cli/utils/version_check.py +258 -0
  95. hud/cli/{rl → utils}/viewer.py +2 -2
  96. hud/clients/README.md +12 -11
  97. hud/clients/__init__.py +4 -3
  98. hud/clients/base.py +166 -26
  99. hud/clients/environment.py +51 -0
  100. hud/clients/fastmcp.py +13 -6
  101. hud/clients/mcp_use.py +45 -15
  102. hud/clients/tests/test_analyze_scenarios.py +206 -0
  103. hud/clients/tests/test_protocol.py +9 -3
  104. hud/datasets/__init__.py +23 -20
  105. hud/datasets/loader.py +326 -0
  106. hud/datasets/runner.py +198 -105
  107. hud/datasets/tests/__init__.py +0 -0
  108. hud/datasets/tests/test_loader.py +221 -0
  109. hud/datasets/tests/test_utils.py +315 -0
  110. hud/datasets/utils.py +270 -90
  111. hud/environment/__init__.py +52 -0
  112. hud/environment/connection.py +258 -0
  113. hud/environment/connectors/__init__.py +33 -0
  114. hud/environment/connectors/base.py +68 -0
  115. hud/environment/connectors/local.py +177 -0
  116. hud/environment/connectors/mcp_config.py +137 -0
  117. hud/environment/connectors/openai.py +101 -0
  118. hud/environment/connectors/remote.py +172 -0
  119. hud/environment/environment.py +835 -0
  120. hud/environment/integrations/__init__.py +45 -0
  121. hud/environment/integrations/adk.py +67 -0
  122. hud/environment/integrations/anthropic.py +196 -0
  123. hud/environment/integrations/gemini.py +92 -0
  124. hud/environment/integrations/langchain.py +82 -0
  125. hud/environment/integrations/llamaindex.py +68 -0
  126. hud/environment/integrations/openai.py +238 -0
  127. hud/environment/mock.py +306 -0
  128. hud/environment/router.py +263 -0
  129. hud/environment/scenarios.py +620 -0
  130. hud/environment/tests/__init__.py +1 -0
  131. hud/environment/tests/test_connection.py +317 -0
  132. hud/environment/tests/test_connectors.py +205 -0
  133. hud/environment/tests/test_environment.py +593 -0
  134. hud/environment/tests/test_integrations.py +257 -0
  135. hud/environment/tests/test_local_connectors.py +242 -0
  136. hud/environment/tests/test_scenarios.py +1086 -0
  137. hud/environment/tests/test_tools.py +208 -0
  138. hud/environment/types.py +23 -0
  139. hud/environment/utils/__init__.py +35 -0
  140. hud/environment/utils/formats.py +215 -0
  141. hud/environment/utils/schema.py +171 -0
  142. hud/environment/utils/tool_wrappers.py +113 -0
  143. hud/eval/__init__.py +67 -0
  144. hud/eval/context.py +727 -0
  145. hud/eval/display.py +299 -0
  146. hud/eval/instrument.py +187 -0
  147. hud/eval/manager.py +533 -0
  148. hud/eval/parallel.py +268 -0
  149. hud/eval/task.py +372 -0
  150. hud/eval/tests/__init__.py +1 -0
  151. hud/eval/tests/test_context.py +178 -0
  152. hud/eval/tests/test_eval.py +210 -0
  153. hud/eval/tests/test_manager.py +152 -0
  154. hud/eval/tests/test_parallel.py +168 -0
  155. hud/eval/tests/test_task.py +291 -0
  156. hud/eval/types.py +65 -0
  157. hud/eval/utils.py +194 -0
  158. hud/patches/__init__.py +19 -0
  159. hud/patches/mcp_patches.py +308 -0
  160. hud/patches/warnings.py +54 -0
  161. hud/samples/browser.py +4 -4
  162. hud/server/__init__.py +2 -1
  163. hud/server/low_level.py +2 -1
  164. hud/server/router.py +164 -0
  165. hud/server/server.py +567 -80
  166. hud/server/tests/test_mcp_server_integration.py +11 -11
  167. hud/server/tests/test_mcp_server_more.py +1 -1
  168. hud/server/tests/test_server_extra.py +2 -0
  169. hud/settings.py +45 -3
  170. hud/shared/exceptions.py +36 -10
  171. hud/shared/hints.py +26 -1
  172. hud/shared/requests.py +15 -3
  173. hud/shared/tests/test_exceptions.py +40 -31
  174. hud/shared/tests/test_hints.py +167 -0
  175. hud/telemetry/__init__.py +20 -19
  176. hud/telemetry/exporter.py +201 -0
  177. hud/telemetry/instrument.py +165 -253
  178. hud/telemetry/tests/test_eval_telemetry.py +356 -0
  179. hud/telemetry/tests/test_exporter.py +258 -0
  180. hud/telemetry/tests/test_instrument.py +401 -0
  181. hud/tools/__init__.py +18 -2
  182. hud/tools/agent.py +223 -0
  183. hud/tools/apply_patch.py +639 -0
  184. hud/tools/base.py +54 -4
  185. hud/tools/bash.py +2 -2
  186. hud/tools/computer/__init__.py +36 -3
  187. hud/tools/computer/anthropic.py +2 -2
  188. hud/tools/computer/gemini.py +385 -0
  189. hud/tools/computer/hud.py +23 -6
  190. hud/tools/computer/openai.py +20 -21
  191. hud/tools/computer/qwen.py +434 -0
  192. hud/tools/computer/settings.py +37 -0
  193. hud/tools/edit.py +3 -7
  194. hud/tools/executors/base.py +4 -2
  195. hud/tools/executors/pyautogui.py +1 -1
  196. hud/tools/grounding/grounded_tool.py +13 -18
  197. hud/tools/grounding/grounder.py +10 -31
  198. hud/tools/grounding/tests/test_grounded_tool.py +26 -44
  199. hud/tools/jupyter.py +330 -0
  200. hud/tools/playwright.py +18 -3
  201. hud/tools/shell.py +308 -0
  202. hud/tools/tests/test_agent_tool.py +355 -0
  203. hud/tools/tests/test_apply_patch.py +718 -0
  204. hud/tools/tests/test_computer.py +4 -9
  205. hud/tools/tests/test_computer_actions.py +24 -2
  206. hud/tools/tests/test_jupyter_tool.py +181 -0
  207. hud/tools/tests/test_shell.py +596 -0
  208. hud/tools/tests/test_submit.py +85 -0
  209. hud/tools/tests/test_types.py +193 -0
  210. hud/tools/types.py +21 -1
  211. hud/types.py +194 -56
  212. hud/utils/__init__.py +2 -0
  213. hud/utils/env.py +67 -0
  214. hud/utils/hud_console.py +89 -18
  215. hud/utils/mcp.py +15 -58
  216. hud/utils/strict_schema.py +162 -0
  217. hud/utils/tests/test_init.py +1 -2
  218. hud/utils/tests/test_mcp.py +1 -28
  219. hud/utils/tests/test_pretty_errors.py +186 -0
  220. hud/utils/tests/test_tool_shorthand.py +154 -0
  221. hud/utils/tests/test_version.py +1 -1
  222. hud/utils/types.py +20 -0
  223. hud/version.py +1 -1
  224. hud_python-0.5.13.dist-info/METADATA +264 -0
  225. hud_python-0.5.13.dist-info/RECORD +305 -0
  226. {hud_python-0.4.45.dist-info → hud_python-0.5.13.dist-info}/WHEEL +1 -1
  227. hud/agents/langchain.py +0 -261
  228. hud/agents/lite_llm.py +0 -72
  229. hud/cli/rl/__init__.py +0 -180
  230. hud/cli/rl/config.py +0 -101
  231. hud/cli/rl/display.py +0 -133
  232. hud/cli/rl/gpu.py +0 -63
  233. hud/cli/rl/gpu_utils.py +0 -321
  234. hud/cli/rl/local_runner.py +0 -595
  235. hud/cli/rl/presets.py +0 -96
  236. hud/cli/rl/remote_runner.py +0 -463
  237. hud/cli/rl/rl_api.py +0 -150
  238. hud/cli/rl/vllm.py +0 -177
  239. hud/cli/rl/wait_utils.py +0 -89
  240. hud/datasets/parallel.py +0 -687
  241. hud/misc/__init__.py +0 -1
  242. hud/misc/claude_plays_pokemon.py +0 -292
  243. hud/otel/__init__.py +0 -35
  244. hud/otel/collector.py +0 -142
  245. hud/otel/config.py +0 -181
  246. hud/otel/context.py +0 -570
  247. hud/otel/exporters.py +0 -369
  248. hud/otel/instrumentation.py +0 -135
  249. hud/otel/processors.py +0 -121
  250. hud/otel/tests/__init__.py +0 -1
  251. hud/otel/tests/test_processors.py +0 -197
  252. hud/rl/README.md +0 -30
  253. hud/rl/__init__.py +0 -1
  254. hud/rl/actor.py +0 -176
  255. hud/rl/buffer.py +0 -405
  256. hud/rl/chat_template.jinja +0 -101
  257. hud/rl/config.py +0 -192
  258. hud/rl/distributed.py +0 -132
  259. hud/rl/learner.py +0 -637
  260. hud/rl/tests/__init__.py +0 -1
  261. hud/rl/tests/test_learner.py +0 -186
  262. hud/rl/train.py +0 -382
  263. hud/rl/types.py +0 -101
  264. hud/rl/utils/start_vllm_server.sh +0 -30
  265. hud/rl/utils.py +0 -524
  266. hud/rl/vllm_adapter.py +0 -143
  267. hud/telemetry/job.py +0 -352
  268. hud/telemetry/replay.py +0 -74
  269. hud/telemetry/tests/test_replay.py +0 -40
  270. hud/telemetry/tests/test_trace.py +0 -63
  271. hud/telemetry/trace.py +0 -158
  272. hud/utils/agent_factories.py +0 -86
  273. hud/utils/async_utils.py +0 -65
  274. hud/utils/group_eval.py +0 -223
  275. hud/utils/progress.py +0 -149
  276. hud/utils/tasks.py +0 -127
  277. hud/utils/tests/test_async_utils.py +0 -173
  278. hud/utils/tests/test_progress.py +0 -261
  279. hud_python-0.4.45.dist-info/METADATA +0 -552
  280. hud_python-0.4.45.dist-info/RECORD +0 -228
  281. {hud_python-0.4.45.dist-info → hud_python-0.5.13.dist-info}/entry_points.txt +0 -0
  282. {hud_python-0.4.45.dist-info → hud_python-0.5.13.dist-info}/licenses/LICENSE +0 -0
@@ -0,0 +1,238 @@
1
+ """OpenAI integrations - format conversion and Agents SDK."""
2
+
3
+ from __future__ import annotations
4
+
5
+ import json
6
+ import logging
7
+ from typing import TYPE_CHECKING, Any, cast
8
+
9
+ from hud.environment.utils.schema import ensure_strict_schema, validate_openai_schema
10
+
11
+ if TYPE_CHECKING:
12
+ import mcp.types as mcp_types
13
+ from openai.types.chat import ChatCompletionToolUnionParam
14
+
15
+ __all__ = ["OpenAIMixin"]
16
+
17
+ logger = logging.getLogger(__name__)
18
+
19
+
20
+ class OpenAIMixin:
21
+ """Mixin providing OpenAI format conversion and Agents SDK integration.
22
+
23
+ Format methods (no deps):
24
+ as_openai_chat_tools() - Chat Completions format
25
+ as_openai_responses_tools() - Responses API format
26
+
27
+ Integration methods (requires openai-agents):
28
+ as_openai_agent_tools() - Agents SDK FunctionTool objects
29
+
30
+ Note: The OpenAI Agents SDK also supports:
31
+ - HostedMCPTool - MCP tools hosted by OpenAI
32
+ - MCPServerStdio/Sse/StreamableHttp - Direct MCP server connections
33
+
34
+ For MCP server integration, use as_mcp_server() from the mcp integration.
35
+
36
+ Requires: as_tools() -> list[mcp_types.Tool], call_tool(name, args)
37
+ """
38
+
39
+ def as_tools(self) -> list[mcp_types.Tool]:
40
+ raise NotImplementedError
41
+
42
+ async def call_tool(self, name: str, arguments: dict[str, Any]) -> Any:
43
+ raise NotImplementedError
44
+
45
+ # =========================================================================
46
+ # Format Conversion (no external deps)
47
+ # =========================================================================
48
+
49
+ def as_openai_chat_tools(
50
+ self, *, strict: bool = False, validate: bool = True
51
+ ) -> list[ChatCompletionToolUnionParam]:
52
+ """Convert to OpenAI Chat Completions tool format.
53
+
54
+ Args:
55
+ strict: Enable strict mode for structured outputs
56
+ validate: Validate schemas and skip incompatible tools with warnings
57
+
58
+ Returns:
59
+ List of tool definitions for OpenAI Chat Completions API.
60
+
61
+ Example:
62
+ ```python
63
+ from openai import OpenAI
64
+
65
+ client = OpenAI()
66
+ async with env:
67
+ response = client.chat.completions.create(
68
+ model="gpt-4o",
69
+ messages=[{"role": "user", "content": "Navigate to google.com"}],
70
+ tools=env.as_openai_chat_tools(),
71
+ )
72
+ # Execute tool calls and get results in OpenAI format
73
+ results = await env.call_tools(response.choices[0].message.tool_calls)
74
+ # results are {"role": "tool", "tool_call_id": ..., "content": ...}
75
+ ```
76
+ """
77
+ tools: list[ChatCompletionToolUnionParam] = []
78
+ for t in self.as_tools():
79
+ schema = dict(t.inputSchema) if t.inputSchema else {"type": "object", "properties": {}}
80
+
81
+ # Validate schema for OpenAI compatibility
82
+ if validate:
83
+ errors = validate_openai_schema(schema, t.name)
84
+ if errors:
85
+ for error in errors:
86
+ logger.warning("Skipping tool: %s", error)
87
+ continue
88
+
89
+ if strict:
90
+ schema = ensure_strict_schema(schema)
91
+
92
+ tools.append(
93
+ cast(
94
+ "ChatCompletionToolUnionParam",
95
+ {
96
+ "type": "function",
97
+ "function": {
98
+ "name": t.name,
99
+ "description": t.description or "",
100
+ "parameters": schema,
101
+ **({"strict": True} if strict else {}),
102
+ },
103
+ },
104
+ )
105
+ )
106
+ return tools
107
+
108
+ def as_openai_responses_tools(self, *, validate: bool = True) -> list[dict[str, Any]]:
109
+ """Convert to OpenAI Responses API tool format.
110
+
111
+ Note: Like Chat Completions, you must execute tools yourself.
112
+ OpenAI only auto-executes their built-in tools (code_interpreter, etc).
113
+
114
+ Args:
115
+ validate: Validate schemas and skip incompatible tools with warnings
116
+
117
+ Returns:
118
+ List of tool definitions for OpenAI Responses API.
119
+
120
+ Example:
121
+ ```python
122
+ from openai import OpenAI
123
+
124
+ client = OpenAI()
125
+ async with env:
126
+ response = client.responses.create(
127
+ model="gpt-4o",
128
+ input="Navigate to google.com",
129
+ tools=env.as_openai_responses_tools(),
130
+ )
131
+ # Check for function calls in the response
132
+ for item in response.output:
133
+ if item.type == "function_call":
134
+ result = await env.call_tool(item.name, **item.arguments)
135
+ ```
136
+ """
137
+ tools = []
138
+ for t in self.as_tools():
139
+ schema = dict(t.inputSchema) if t.inputSchema else {"type": "object", "properties": {}}
140
+
141
+ # Validate schema for OpenAI compatibility
142
+ if validate:
143
+ errors = validate_openai_schema(schema, t.name)
144
+ if errors:
145
+ for error in errors:
146
+ logger.warning("Skipping tool: %s", error)
147
+ continue
148
+
149
+ tools.append(
150
+ {
151
+ "type": "function",
152
+ "name": t.name,
153
+ "description": t.description or "",
154
+ "parameters": schema,
155
+ }
156
+ )
157
+ return tools
158
+
159
+ # =========================================================================
160
+ # Agents SDK Integration (requires openai-agents)
161
+ # =========================================================================
162
+
163
+ def as_openai_agent_tools(self, *, validate: bool = True) -> list[Any]:
164
+ """Convert to OpenAI Agents SDK FunctionTool objects.
165
+
166
+ This creates FunctionTool objects that automatically execute against
167
+ this environment. The Agents SDK Runner handles the tool loop.
168
+
169
+ Note: The Agents SDK also supports other tool types:
170
+ - HostedMCPTool: MCP tools hosted by OpenAI
171
+ - MCPServerStdio/Sse/StreamableHttp: Direct MCP server connections
172
+
173
+ For direct MCP integration, consider using as_mcp_server().
174
+
175
+ Requires: pip install openai-agents
176
+
177
+ Args:
178
+ validate: Validate schemas and skip incompatible tools with warnings
179
+
180
+ Returns:
181
+ List of FunctionTool objects for OpenAI Agents SDK.
182
+
183
+ Example:
184
+ ```python
185
+ from agents import Agent, Runner
186
+
187
+ async with env:
188
+ agent = Agent(
189
+ name="browser-agent",
190
+ instructions="You browse the web.",
191
+ tools=env.as_openai_agent_tools(),
192
+ )
193
+ result = await Runner.run(agent, "Go to google.com")
194
+ print(result.final_output)
195
+ ```
196
+ """
197
+ try:
198
+ from agents import FunctionTool
199
+ except ImportError as e:
200
+ raise ImportError(
201
+ "OpenAI Agents SDK not installed. Install with: pip install openai-agents"
202
+ ) from e
203
+
204
+ tools = []
205
+ for t in self.as_tools():
206
+ schema = dict(t.inputSchema) if t.inputSchema else {"type": "object", "properties": {}}
207
+
208
+ # Validate schema for OpenAI compatibility
209
+ if validate:
210
+ errors = validate_openai_schema(schema, t.name)
211
+ if errors:
212
+ for error in errors:
213
+ logger.warning("Skipping tool: %s", error)
214
+ continue
215
+
216
+ tool = _create_function_tool(self, t, FunctionTool)
217
+ tools.append(tool)
218
+ return tools
219
+
220
+
221
+ def _create_function_tool(env: OpenAIMixin, tool: mcp_types.Tool, FunctionTool: type) -> Any:
222
+ """Create a FunctionTool that calls back to the environment."""
223
+ schema = tool.inputSchema or {"type": "object", "properties": {}}
224
+
225
+ async def async_wrapper(ctx: Any, args_json: str) -> str:
226
+ """Async wrapper for the tool that matches FunctionTool signature."""
227
+ kwargs = json.loads(args_json) if args_json else {}
228
+ result = await env.call_tool(tool.name, **kwargs)
229
+ if isinstance(result, str):
230
+ return result
231
+ return json.dumps(result) if result else ""
232
+
233
+ return FunctionTool(
234
+ name=tool.name,
235
+ description=tool.description or "",
236
+ params_json_schema=schema,
237
+ on_invoke_tool=async_wrapper,
238
+ )
@@ -0,0 +1,306 @@
1
+ """Mock functionality for Environment."""
2
+
3
+ from __future__ import annotations
4
+
5
+ import logging
6
+ from typing import TYPE_CHECKING, Any
7
+
8
+ import mcp.types as mcp_types
9
+
10
+ from hud.types import MCPToolResult
11
+
12
+ if TYPE_CHECKING:
13
+ from hud.environment.environment import Environment
14
+
15
+ __all__ = ["MockMixin", "generate_mock_value"]
16
+
17
+ logger = logging.getLogger(__name__)
18
+
19
+
20
+ def generate_mock_value(schema: dict[str, Any], depth: int = 0) -> Any:
21
+ """Generate a reasonable mock value from a JSON schema.
22
+
23
+ Args:
24
+ schema: JSON schema dict with 'type', 'properties', etc.
25
+ depth: Current recursion depth (to prevent infinite loops).
26
+
27
+ Returns:
28
+ A mock value that matches the schema.
29
+ """
30
+ if depth > 10: # Prevent infinite recursion
31
+ return None
32
+
33
+ # Handle $ref - we don't resolve refs, just return placeholder
34
+ if "$ref" in schema:
35
+ return {}
36
+
37
+ # Handle anyOf/oneOf/allOf - pick first option
38
+ if "anyOf" in schema:
39
+ return generate_mock_value(schema["anyOf"][0], depth + 1)
40
+ if "oneOf" in schema:
41
+ return generate_mock_value(schema["oneOf"][0], depth + 1)
42
+ if "allOf" in schema:
43
+ # Merge all schemas
44
+ merged: dict[str, Any] = {}
45
+ for sub_schema in schema["allOf"]:
46
+ result = generate_mock_value(sub_schema, depth + 1)
47
+ if isinstance(result, dict):
48
+ merged.update(result)
49
+ return merged
50
+
51
+ # Check for const or enum first
52
+ if "const" in schema:
53
+ return schema["const"]
54
+ if "enum" in schema:
55
+ return schema["enum"][0] if schema["enum"] else None
56
+
57
+ # Check for default value
58
+ if "default" in schema:
59
+ return schema["default"]
60
+
61
+ # Handle by type
62
+ schema_type = schema.get("type")
63
+
64
+ if schema_type == "string":
65
+ # Check for format hints
66
+ fmt = schema.get("format", "")
67
+ if fmt == "uri" or fmt == "url":
68
+ return "https://example.com"
69
+ if fmt == "email":
70
+ return "user@example.com"
71
+ if fmt == "date":
72
+ return "2024-01-01"
73
+ if fmt == "date-time":
74
+ return "2024-01-01T00:00:00Z"
75
+ if fmt == "uuid":
76
+ return "00000000-0000-0000-0000-000000000000"
77
+ # Use title/description hint if available
78
+ title = schema.get("title", "").lower()
79
+ if "url" in title or "link" in title:
80
+ return "https://example.com"
81
+ if "name" in title:
82
+ return "mock_name"
83
+ if "id" in title:
84
+ return "mock_id"
85
+ return "mock_string"
86
+
87
+ if schema_type == "number" or schema_type == "integer":
88
+ # Check for bounds
89
+ minimum = schema.get("minimum", 0)
90
+ maximum = schema.get("maximum", 100)
91
+ if schema_type == "integer":
92
+ return int((minimum + maximum) / 2) if maximum != float("inf") else minimum
93
+ return float((minimum + maximum) / 2) if maximum != float("inf") else float(minimum)
94
+
95
+ if schema_type == "boolean":
96
+ return True
97
+
98
+ if schema_type == "null":
99
+ return None
100
+
101
+ if schema_type == "array":
102
+ items_schema = schema.get("items", {})
103
+ if items_schema:
104
+ # Generate one item
105
+ return [generate_mock_value(items_schema, depth + 1)]
106
+ return []
107
+
108
+ if schema_type == "object" or "properties" in schema:
109
+ result: dict[str, Any] = {}
110
+ properties = schema.get("properties", {})
111
+ required = set(schema.get("required", []))
112
+
113
+ for prop_name, prop_schema in properties.items():
114
+ # Only include required properties or first few optional ones
115
+ if prop_name in required or len(result) < 3:
116
+ result[prop_name] = generate_mock_value(prop_schema, depth + 1)
117
+
118
+ return result
119
+
120
+ # Handle list of types
121
+ if isinstance(schema_type, list):
122
+ # Pick first non-null type
123
+ for t in schema_type:
124
+ if t != "null":
125
+ return generate_mock_value({"type": t}, depth + 1)
126
+ return None
127
+
128
+ # Fallback for unknown schema
129
+ return None
130
+
131
+
132
+ def generate_mock_tool_result(tool: mcp_types.Tool) -> MCPToolResult:
133
+ """Generate a mock result for a tool based on its output schema.
134
+
135
+ Args:
136
+ tool: MCP Tool with inputSchema and optionally outputSchema.
137
+
138
+ Returns:
139
+ MCPToolResult with mock content.
140
+ """
141
+ # Check if tool has an output schema
142
+ output_schema = getattr(tool, "outputSchema", None)
143
+
144
+ if output_schema:
145
+ mock_value = generate_mock_value(output_schema)
146
+ content_text = str(mock_value) if mock_value is not None else "mock_result"
147
+ else:
148
+ # Generate a sensible default based on tool name
149
+ tool_name = tool.name
150
+ if "screenshot" in tool_name.lower() or "image" in tool_name.lower():
151
+ content_text = "[mock image data]"
152
+ elif "get" in tool_name.lower() or "list" in tool_name.lower():
153
+ content_text = "[]"
154
+ elif "check" in tool_name.lower() or "verify" in tool_name.lower():
155
+ content_text = "true"
156
+ elif "count" in tool_name.lower():
157
+ content_text = "0"
158
+ else:
159
+ content_text = "mock_success"
160
+
161
+ return MCPToolResult(
162
+ content=[mcp_types.TextContent(type="text", text=content_text)],
163
+ isError=False,
164
+ )
165
+
166
+
167
+ class MockMixin:
168
+ """Mixin that adds mock functionality to Environment.
169
+
170
+ When mock mode is enabled:
171
+ - All tool calls return mock values instead of executing
172
+ - Specific tools can have custom mock outputs via mock_tool()
173
+ - Tools are automatically mocked with reasonable defaults based on their schemas
174
+
175
+ Usage:
176
+ env = Environment("test").connect_hub("browser")
177
+ env.mock() # Enable mock mode
178
+
179
+ # Set specific mock outputs
180
+ env.mock_tool("navigate", "Navigation successful")
181
+ env.mock_tool("screenshot", {"image": "base64data..."})
182
+
183
+ async with env:
184
+ result = await env.call_tool("navigate", url="https://example.com")
185
+ # Returns: MCPToolResult with "Navigation successful"
186
+ """
187
+
188
+ _mock_mode: bool
189
+ _mock_outputs: dict[str, Any]
190
+ _mock_tool_schemas: dict[str, mcp_types.Tool]
191
+
192
+ def _init_mock(self) -> None:
193
+ """Initialize mock state. Called from Environment.__init__."""
194
+ self._mock_mode = False
195
+ self._mock_outputs = {}
196
+ self._mock_tool_schemas = {}
197
+
198
+ def mock(self) -> Environment:
199
+ """Enable mock mode - all tool calls will return mock values.
200
+
201
+ Returns:
202
+ self for chaining.
203
+
204
+ Example:
205
+ env = Environment("test").connect_hub("browser").mock()
206
+ """
207
+ self._mock_mode = True
208
+ logger.info("Mock mode enabled for environment %s", getattr(self, "name", "unknown"))
209
+ return self # type: ignore[return-value]
210
+
211
+ def unmock(self) -> Environment:
212
+ """Disable mock mode - tool calls will execute normally.
213
+
214
+ Returns:
215
+ self for chaining.
216
+ """
217
+ self._mock_mode = False
218
+ logger.info("Mock mode disabled for environment %s", getattr(self, "name", "unknown"))
219
+ return self # type: ignore[return-value]
220
+
221
+ @property
222
+ def is_mock(self) -> bool:
223
+ """Check if mock mode is enabled."""
224
+ return self._mock_mode
225
+
226
+ def mock_tool(self, name: str, output: Any) -> Environment:
227
+ """Set a specific mock output for a tool.
228
+
229
+ Args:
230
+ name: Tool name (with prefix if applicable).
231
+ output: The value to return when this tool is called.
232
+ Can be a string, dict, or any JSON-serializable value.
233
+
234
+ Returns:
235
+ self for chaining.
236
+
237
+ Example:
238
+ env.mock_tool("navigate", "Success")
239
+ env.mock_tool("screenshot", {"type": "image", "data": "..."})
240
+ env.mock_tool("get_elements", [{"id": "1", "text": "Button"}])
241
+ """
242
+ self._mock_outputs[name] = output
243
+ logger.debug("Mock output set for tool %s", name)
244
+ return self # type: ignore[return-value]
245
+
246
+ def _get_mock_result(self, name: str, arguments: dict[str, Any]) -> MCPToolResult:
247
+ """Get mock result for a tool call.
248
+
249
+ Priority:
250
+ 1. Custom mock output set via mock_tool()
251
+ 2. Auto-generated mock based on tool's output schema
252
+ 3. Default mock value
253
+
254
+ Args:
255
+ name: Tool name.
256
+ arguments: Tool arguments (for potential future use).
257
+
258
+ Returns:
259
+ MCPToolResult with mock content.
260
+ """
261
+ # Check for custom mock output
262
+ if name in self._mock_outputs:
263
+ output = self._mock_outputs[name]
264
+ # Convert to string if not already
265
+ if isinstance(output, str):
266
+ content_text = output
267
+ else:
268
+ import json
269
+
270
+ try:
271
+ content_text = json.dumps(output)
272
+ except (TypeError, ValueError):
273
+ content_text = str(output)
274
+
275
+ return MCPToolResult(
276
+ content=[mcp_types.TextContent(type="text", text=content_text)],
277
+ isError=False,
278
+ )
279
+
280
+ # Try to find tool schema for auto-generation
281
+ if name in self._mock_tool_schemas:
282
+ return generate_mock_tool_result(self._mock_tool_schemas[name])
283
+
284
+ # Check router for tool schema
285
+ router = getattr(self, "_router", None)
286
+ if router:
287
+ for tool in router.tools:
288
+ if tool.name == name:
289
+ self._mock_tool_schemas[name] = tool
290
+ return generate_mock_tool_result(tool)
291
+
292
+ # Default fallback
293
+ return MCPToolResult(
294
+ content=[mcp_types.TextContent(type="text", text="mock_success")],
295
+ isError=False,
296
+ )
297
+
298
+ def _populate_mock_schemas(self) -> None:
299
+ """Populate mock tool schemas from router after connection.
300
+
301
+ Called after _build_routing to cache tool schemas for mock generation.
302
+ """
303
+ router = getattr(self, "_router", None)
304
+ if router:
305
+ for tool in router.tools:
306
+ self._mock_tool_schemas[tool.name] = tool