hud-python 0.4.45__py3-none-any.whl → 0.5.13__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (282) hide show
  1. hud/__init__.py +27 -7
  2. hud/agents/__init__.py +70 -5
  3. hud/agents/base.py +238 -500
  4. hud/agents/claude.py +236 -247
  5. hud/agents/gateway.py +42 -0
  6. hud/agents/gemini.py +264 -0
  7. hud/agents/gemini_cua.py +324 -0
  8. hud/agents/grounded_openai.py +98 -100
  9. hud/agents/misc/integration_test_agent.py +51 -20
  10. hud/agents/misc/response_agent.py +48 -36
  11. hud/agents/openai.py +282 -296
  12. hud/agents/{openai_chat_generic.py → openai_chat.py} +63 -33
  13. hud/agents/operator.py +199 -0
  14. hud/agents/resolver.py +70 -0
  15. hud/agents/tests/conftest.py +133 -0
  16. hud/agents/tests/test_base.py +300 -622
  17. hud/agents/tests/test_base_runtime.py +233 -0
  18. hud/agents/tests/test_claude.py +381 -214
  19. hud/agents/tests/test_client.py +9 -10
  20. hud/agents/tests/test_gemini.py +369 -0
  21. hud/agents/tests/test_grounded_openai_agent.py +65 -50
  22. hud/agents/tests/test_openai.py +377 -140
  23. hud/agents/tests/test_operator.py +362 -0
  24. hud/agents/tests/test_resolver.py +192 -0
  25. hud/agents/tests/test_run_eval.py +179 -0
  26. hud/agents/types.py +148 -0
  27. hud/cli/__init__.py +493 -546
  28. hud/cli/analyze.py +43 -5
  29. hud/cli/build.py +699 -113
  30. hud/cli/debug.py +8 -5
  31. hud/cli/dev.py +889 -732
  32. hud/cli/eval.py +793 -667
  33. hud/cli/flows/dev.py +167 -0
  34. hud/cli/flows/init.py +191 -0
  35. hud/cli/flows/tasks.py +153 -56
  36. hud/cli/flows/templates.py +151 -0
  37. hud/cli/flows/tests/__init__.py +1 -0
  38. hud/cli/flows/tests/test_dev.py +126 -0
  39. hud/cli/init.py +60 -58
  40. hud/cli/pull.py +1 -1
  41. hud/cli/push.py +38 -13
  42. hud/cli/rft.py +311 -0
  43. hud/cli/rft_status.py +145 -0
  44. hud/cli/tests/test_analyze.py +5 -5
  45. hud/cli/tests/test_analyze_metadata.py +3 -2
  46. hud/cli/tests/test_analyze_module.py +120 -0
  47. hud/cli/tests/test_build.py +110 -8
  48. hud/cli/tests/test_build_failure.py +41 -0
  49. hud/cli/tests/test_build_module.py +50 -0
  50. hud/cli/tests/test_cli_init.py +6 -1
  51. hud/cli/tests/test_cli_more_wrappers.py +30 -0
  52. hud/cli/tests/test_cli_root.py +140 -0
  53. hud/cli/tests/test_convert.py +361 -0
  54. hud/cli/tests/test_debug.py +12 -10
  55. hud/cli/tests/test_dev.py +197 -0
  56. hud/cli/tests/test_eval.py +251 -0
  57. hud/cli/tests/test_eval_bedrock.py +51 -0
  58. hud/cli/tests/test_init.py +124 -0
  59. hud/cli/tests/test_main_module.py +11 -5
  60. hud/cli/tests/test_mcp_server.py +12 -100
  61. hud/cli/tests/test_push.py +1 -1
  62. hud/cli/tests/test_push_happy.py +74 -0
  63. hud/cli/tests/test_push_wrapper.py +23 -0
  64. hud/cli/tests/test_registry.py +1 -1
  65. hud/cli/tests/test_utils.py +1 -1
  66. hud/cli/{rl → utils}/celebrate.py +14 -12
  67. hud/cli/utils/config.py +18 -1
  68. hud/cli/utils/docker.py +130 -4
  69. hud/cli/utils/env_check.py +9 -9
  70. hud/cli/utils/git.py +136 -0
  71. hud/cli/utils/interactive.py +39 -5
  72. hud/cli/utils/metadata.py +70 -1
  73. hud/cli/utils/runner.py +1 -1
  74. hud/cli/utils/server.py +2 -2
  75. hud/cli/utils/source_hash.py +3 -3
  76. hud/cli/utils/tasks.py +4 -1
  77. hud/cli/utils/tests/__init__.py +0 -0
  78. hud/cli/utils/tests/test_config.py +58 -0
  79. hud/cli/utils/tests/test_docker.py +93 -0
  80. hud/cli/utils/tests/test_docker_hints.py +71 -0
  81. hud/cli/utils/tests/test_env_check.py +74 -0
  82. hud/cli/utils/tests/test_environment.py +42 -0
  83. hud/cli/utils/tests/test_git.py +142 -0
  84. hud/cli/utils/tests/test_interactive_module.py +60 -0
  85. hud/cli/utils/tests/test_local_runner.py +50 -0
  86. hud/cli/utils/tests/test_logging_utils.py +23 -0
  87. hud/cli/utils/tests/test_metadata.py +49 -0
  88. hud/cli/utils/tests/test_package_runner.py +35 -0
  89. hud/cli/utils/tests/test_registry_utils.py +49 -0
  90. hud/cli/utils/tests/test_remote_runner.py +25 -0
  91. hud/cli/utils/tests/test_runner_modules.py +52 -0
  92. hud/cli/utils/tests/test_source_hash.py +36 -0
  93. hud/cli/utils/tests/test_tasks.py +80 -0
  94. hud/cli/utils/version_check.py +258 -0
  95. hud/cli/{rl → utils}/viewer.py +2 -2
  96. hud/clients/README.md +12 -11
  97. hud/clients/__init__.py +4 -3
  98. hud/clients/base.py +166 -26
  99. hud/clients/environment.py +51 -0
  100. hud/clients/fastmcp.py +13 -6
  101. hud/clients/mcp_use.py +45 -15
  102. hud/clients/tests/test_analyze_scenarios.py +206 -0
  103. hud/clients/tests/test_protocol.py +9 -3
  104. hud/datasets/__init__.py +23 -20
  105. hud/datasets/loader.py +326 -0
  106. hud/datasets/runner.py +198 -105
  107. hud/datasets/tests/__init__.py +0 -0
  108. hud/datasets/tests/test_loader.py +221 -0
  109. hud/datasets/tests/test_utils.py +315 -0
  110. hud/datasets/utils.py +270 -90
  111. hud/environment/__init__.py +52 -0
  112. hud/environment/connection.py +258 -0
  113. hud/environment/connectors/__init__.py +33 -0
  114. hud/environment/connectors/base.py +68 -0
  115. hud/environment/connectors/local.py +177 -0
  116. hud/environment/connectors/mcp_config.py +137 -0
  117. hud/environment/connectors/openai.py +101 -0
  118. hud/environment/connectors/remote.py +172 -0
  119. hud/environment/environment.py +835 -0
  120. hud/environment/integrations/__init__.py +45 -0
  121. hud/environment/integrations/adk.py +67 -0
  122. hud/environment/integrations/anthropic.py +196 -0
  123. hud/environment/integrations/gemini.py +92 -0
  124. hud/environment/integrations/langchain.py +82 -0
  125. hud/environment/integrations/llamaindex.py +68 -0
  126. hud/environment/integrations/openai.py +238 -0
  127. hud/environment/mock.py +306 -0
  128. hud/environment/router.py +263 -0
  129. hud/environment/scenarios.py +620 -0
  130. hud/environment/tests/__init__.py +1 -0
  131. hud/environment/tests/test_connection.py +317 -0
  132. hud/environment/tests/test_connectors.py +205 -0
  133. hud/environment/tests/test_environment.py +593 -0
  134. hud/environment/tests/test_integrations.py +257 -0
  135. hud/environment/tests/test_local_connectors.py +242 -0
  136. hud/environment/tests/test_scenarios.py +1086 -0
  137. hud/environment/tests/test_tools.py +208 -0
  138. hud/environment/types.py +23 -0
  139. hud/environment/utils/__init__.py +35 -0
  140. hud/environment/utils/formats.py +215 -0
  141. hud/environment/utils/schema.py +171 -0
  142. hud/environment/utils/tool_wrappers.py +113 -0
  143. hud/eval/__init__.py +67 -0
  144. hud/eval/context.py +727 -0
  145. hud/eval/display.py +299 -0
  146. hud/eval/instrument.py +187 -0
  147. hud/eval/manager.py +533 -0
  148. hud/eval/parallel.py +268 -0
  149. hud/eval/task.py +372 -0
  150. hud/eval/tests/__init__.py +1 -0
  151. hud/eval/tests/test_context.py +178 -0
  152. hud/eval/tests/test_eval.py +210 -0
  153. hud/eval/tests/test_manager.py +152 -0
  154. hud/eval/tests/test_parallel.py +168 -0
  155. hud/eval/tests/test_task.py +291 -0
  156. hud/eval/types.py +65 -0
  157. hud/eval/utils.py +194 -0
  158. hud/patches/__init__.py +19 -0
  159. hud/patches/mcp_patches.py +308 -0
  160. hud/patches/warnings.py +54 -0
  161. hud/samples/browser.py +4 -4
  162. hud/server/__init__.py +2 -1
  163. hud/server/low_level.py +2 -1
  164. hud/server/router.py +164 -0
  165. hud/server/server.py +567 -80
  166. hud/server/tests/test_mcp_server_integration.py +11 -11
  167. hud/server/tests/test_mcp_server_more.py +1 -1
  168. hud/server/tests/test_server_extra.py +2 -0
  169. hud/settings.py +45 -3
  170. hud/shared/exceptions.py +36 -10
  171. hud/shared/hints.py +26 -1
  172. hud/shared/requests.py +15 -3
  173. hud/shared/tests/test_exceptions.py +40 -31
  174. hud/shared/tests/test_hints.py +167 -0
  175. hud/telemetry/__init__.py +20 -19
  176. hud/telemetry/exporter.py +201 -0
  177. hud/telemetry/instrument.py +165 -253
  178. hud/telemetry/tests/test_eval_telemetry.py +356 -0
  179. hud/telemetry/tests/test_exporter.py +258 -0
  180. hud/telemetry/tests/test_instrument.py +401 -0
  181. hud/tools/__init__.py +18 -2
  182. hud/tools/agent.py +223 -0
  183. hud/tools/apply_patch.py +639 -0
  184. hud/tools/base.py +54 -4
  185. hud/tools/bash.py +2 -2
  186. hud/tools/computer/__init__.py +36 -3
  187. hud/tools/computer/anthropic.py +2 -2
  188. hud/tools/computer/gemini.py +385 -0
  189. hud/tools/computer/hud.py +23 -6
  190. hud/tools/computer/openai.py +20 -21
  191. hud/tools/computer/qwen.py +434 -0
  192. hud/tools/computer/settings.py +37 -0
  193. hud/tools/edit.py +3 -7
  194. hud/tools/executors/base.py +4 -2
  195. hud/tools/executors/pyautogui.py +1 -1
  196. hud/tools/grounding/grounded_tool.py +13 -18
  197. hud/tools/grounding/grounder.py +10 -31
  198. hud/tools/grounding/tests/test_grounded_tool.py +26 -44
  199. hud/tools/jupyter.py +330 -0
  200. hud/tools/playwright.py +18 -3
  201. hud/tools/shell.py +308 -0
  202. hud/tools/tests/test_agent_tool.py +355 -0
  203. hud/tools/tests/test_apply_patch.py +718 -0
  204. hud/tools/tests/test_computer.py +4 -9
  205. hud/tools/tests/test_computer_actions.py +24 -2
  206. hud/tools/tests/test_jupyter_tool.py +181 -0
  207. hud/tools/tests/test_shell.py +596 -0
  208. hud/tools/tests/test_submit.py +85 -0
  209. hud/tools/tests/test_types.py +193 -0
  210. hud/tools/types.py +21 -1
  211. hud/types.py +194 -56
  212. hud/utils/__init__.py +2 -0
  213. hud/utils/env.py +67 -0
  214. hud/utils/hud_console.py +89 -18
  215. hud/utils/mcp.py +15 -58
  216. hud/utils/strict_schema.py +162 -0
  217. hud/utils/tests/test_init.py +1 -2
  218. hud/utils/tests/test_mcp.py +1 -28
  219. hud/utils/tests/test_pretty_errors.py +186 -0
  220. hud/utils/tests/test_tool_shorthand.py +154 -0
  221. hud/utils/tests/test_version.py +1 -1
  222. hud/utils/types.py +20 -0
  223. hud/version.py +1 -1
  224. hud_python-0.5.13.dist-info/METADATA +264 -0
  225. hud_python-0.5.13.dist-info/RECORD +305 -0
  226. {hud_python-0.4.45.dist-info → hud_python-0.5.13.dist-info}/WHEEL +1 -1
  227. hud/agents/langchain.py +0 -261
  228. hud/agents/lite_llm.py +0 -72
  229. hud/cli/rl/__init__.py +0 -180
  230. hud/cli/rl/config.py +0 -101
  231. hud/cli/rl/display.py +0 -133
  232. hud/cli/rl/gpu.py +0 -63
  233. hud/cli/rl/gpu_utils.py +0 -321
  234. hud/cli/rl/local_runner.py +0 -595
  235. hud/cli/rl/presets.py +0 -96
  236. hud/cli/rl/remote_runner.py +0 -463
  237. hud/cli/rl/rl_api.py +0 -150
  238. hud/cli/rl/vllm.py +0 -177
  239. hud/cli/rl/wait_utils.py +0 -89
  240. hud/datasets/parallel.py +0 -687
  241. hud/misc/__init__.py +0 -1
  242. hud/misc/claude_plays_pokemon.py +0 -292
  243. hud/otel/__init__.py +0 -35
  244. hud/otel/collector.py +0 -142
  245. hud/otel/config.py +0 -181
  246. hud/otel/context.py +0 -570
  247. hud/otel/exporters.py +0 -369
  248. hud/otel/instrumentation.py +0 -135
  249. hud/otel/processors.py +0 -121
  250. hud/otel/tests/__init__.py +0 -1
  251. hud/otel/tests/test_processors.py +0 -197
  252. hud/rl/README.md +0 -30
  253. hud/rl/__init__.py +0 -1
  254. hud/rl/actor.py +0 -176
  255. hud/rl/buffer.py +0 -405
  256. hud/rl/chat_template.jinja +0 -101
  257. hud/rl/config.py +0 -192
  258. hud/rl/distributed.py +0 -132
  259. hud/rl/learner.py +0 -637
  260. hud/rl/tests/__init__.py +0 -1
  261. hud/rl/tests/test_learner.py +0 -186
  262. hud/rl/train.py +0 -382
  263. hud/rl/types.py +0 -101
  264. hud/rl/utils/start_vllm_server.sh +0 -30
  265. hud/rl/utils.py +0 -524
  266. hud/rl/vllm_adapter.py +0 -143
  267. hud/telemetry/job.py +0 -352
  268. hud/telemetry/replay.py +0 -74
  269. hud/telemetry/tests/test_replay.py +0 -40
  270. hud/telemetry/tests/test_trace.py +0 -63
  271. hud/telemetry/trace.py +0 -158
  272. hud/utils/agent_factories.py +0 -86
  273. hud/utils/async_utils.py +0 -65
  274. hud/utils/group_eval.py +0 -223
  275. hud/utils/progress.py +0 -149
  276. hud/utils/tasks.py +0 -127
  277. hud/utils/tests/test_async_utils.py +0 -173
  278. hud/utils/tests/test_progress.py +0 -261
  279. hud_python-0.4.45.dist-info/METADATA +0 -552
  280. hud_python-0.4.45.dist-info/RECORD +0 -228
  281. {hud_python-0.4.45.dist-info → hud_python-0.5.13.dist-info}/entry_points.txt +0 -0
  282. {hud_python-0.4.45.dist-info → hud_python-0.5.13.dist-info}/licenses/LICENSE +0 -0
hud/eval/utils.py ADDED
@@ -0,0 +1,194 @@
1
+ """Utility functions for the eval module."""
2
+
3
+ from __future__ import annotations
4
+
5
+ import logging
6
+ import warnings
7
+ from typing import Any
8
+
9
+ __all__ = ["build_env_from_v4", "is_v4_format", "validate_v4_task"]
10
+
11
+ logger = logging.getLogger(__name__)
12
+
13
+
14
+ def is_v4_format(data: dict[str, Any]) -> bool:
15
+ """Detect if dict looks like v4 LegacyTask format.
16
+
17
+ Used for branching logic. Checks if data has the core v4 fields
18
+ (prompt AND mcp_config). Does NOT validate completeness.
19
+
20
+ Args:
21
+ data: Dict to check
22
+
23
+ Returns:
24
+ True if looks like v4 format, False otherwise
25
+ """
26
+ if not isinstance(data, dict):
27
+ return False
28
+
29
+ # Core v4 detection: prompt + mcp_config
30
+ return bool(data.get("prompt")) and bool(data.get("mcp_config"))
31
+
32
+
33
+ def validate_v4_task(data: dict[str, Any]) -> None:
34
+ """Validate v4 task has all required fields.
35
+
36
+ A valid v4 task must have all three required fields:
37
+ - prompt: The task instruction
38
+ - mcp_config: MCP server configuration
39
+ - evaluate_tool: How to evaluate success
40
+
41
+ Call this after is_v4_format() when you need to ensure completeness.
42
+
43
+ Args:
44
+ data: Dict to validate
45
+
46
+ Raises:
47
+ ValueError: If any required fields are missing
48
+ """
49
+ missing = []
50
+ if not data.get("prompt"):
51
+ missing.append("prompt")
52
+ if not data.get("mcp_config"):
53
+ missing.append("mcp_config")
54
+ if not data.get("evaluate_tool"):
55
+ missing.append("evaluate_tool")
56
+
57
+ if missing:
58
+ raise ValueError(f"v4 task missing required fields: {', '.join(missing)}")
59
+
60
+
61
+ def build_env_from_v4(source: dict[str, Any] | Any) -> dict[str, Any]:
62
+ """Build Environment from v4 LegacyTask format.
63
+
64
+ Creates an Environment configured with the legacy task's fields.
65
+ Returns a dict ready to be passed to Task() constructor.
66
+
67
+ Args:
68
+ source: dict or LegacyTask with v4 fields (prompt, mcp_config, etc.)
69
+
70
+ Returns:
71
+ Dict with Task fields: env, id, scenario, args, validation, system_prompt, metadata
72
+
73
+ Raises:
74
+ TypeError: If source is not a dict or LegacyTask
75
+ """
76
+ from hud.environment import Environment
77
+ from hud.types import LegacyTask, MCPToolCall
78
+
79
+ # Convert dict to LegacyTask if needed
80
+ if isinstance(source, dict):
81
+ with warnings.catch_warnings():
82
+ warnings.filterwarnings("ignore", category=DeprecationWarning)
83
+ legacy = LegacyTask(**source)
84
+ elif isinstance(source, LegacyTask):
85
+ legacy = source
86
+ else:
87
+ raise TypeError(f"Expected dict or LegacyTask, got {type(source).__name__}")
88
+
89
+ # Warn if using local MCP configs (command without url)
90
+ _warn_local_mcp(legacy.mcp_config)
91
+
92
+ # Extract tool filters from agent_config (v4 style)
93
+ # These are agent-level filters, not connection-level
94
+ include_tools: list[str] | None = None
95
+ exclude_tools: list[str] | None = None
96
+ if legacy.agent_config:
97
+ include_tools = legacy.agent_config.allowed_tools
98
+ exclude_tools = legacy.agent_config.disallowed_tools
99
+
100
+ # Convert ["*"] wildcard to None (meaning include all)
101
+ if include_tools == ["*"]:
102
+ include_tools = None
103
+
104
+ # Create Environment - NO connections made here, just config stored
105
+ env = Environment(legacy.id or "v4-legacy")
106
+ env.connect_mcp_config(legacy.mcp_config)
107
+
108
+ # Store agent-level tool filters on Environment (applied in as_tools())
109
+ # This allows Environment to call setup/evaluate while hiding them from agent
110
+ env._agent_include = include_tools
111
+ env._agent_exclude = exclude_tools
112
+
113
+ # Set the prompt
114
+ env.prompt = legacy.prompt
115
+
116
+ # Add setup_tool calls (stored, not executed)
117
+ if legacy.setup_tool:
118
+ setup_calls = legacy.setup_tool
119
+ if not isinstance(setup_calls, list):
120
+ setup_calls = [setup_calls]
121
+ for call in setup_calls:
122
+ env.setup_tool(call.name, **(call.arguments or {}))
123
+
124
+ # Add evaluate_tool calls (stored, not executed)
125
+ if legacy.evaluate_tool:
126
+ eval_calls = legacy.evaluate_tool
127
+ if not isinstance(eval_calls, list):
128
+ eval_calls = [eval_calls]
129
+ for call in eval_calls:
130
+ env.evaluate_tool(call.name, **(call.arguments or {}))
131
+
132
+ # Build Task fields dict
133
+ result: dict[str, Any] = {
134
+ "env": env,
135
+ "id": legacy.id,
136
+ "scenario": None, # v4 uses prompt, not scenarios
137
+ "args": {},
138
+ }
139
+
140
+ # Map integration_test_tool → validation (same concept: tool calls to verify)
141
+ # Also populate _integration_test_calls for IntegrationTestRunner compatibility
142
+ if legacy.integration_test_tool:
143
+ int_test = legacy.integration_test_tool
144
+ if not isinstance(int_test, list):
145
+ int_test = [int_test]
146
+ # Convert to MCPToolCall if needed
147
+ result["validation"] = [
148
+ call if isinstance(call, MCPToolCall) else MCPToolCall(**call.model_dump())
149
+ for call in int_test
150
+ ]
151
+ # Populate _integration_test_calls on env for IntegrationTestRunner
152
+ env._integration_test_calls = [(call.name, call.arguments or {}) for call in int_test]
153
+
154
+ # Extract agent_config fields that need to be passed through
155
+ if legacy.agent_config:
156
+ agent_config_dict: dict[str, Any] = {}
157
+ if legacy.agent_config.system_prompt:
158
+ agent_config_dict["system_prompt"] = legacy.agent_config.system_prompt
159
+ if legacy.agent_config.append_setup_output:
160
+ agent_config_dict["append_setup_output"] = legacy.agent_config.append_setup_output
161
+ if legacy.agent_config.append_setup_tool:
162
+ agent_config_dict["append_setup_tool"] = legacy.agent_config.append_setup_tool
163
+ if agent_config_dict:
164
+ result["agent_config"] = agent_config_dict
165
+
166
+ # Preserve metadata
167
+ if legacy.metadata:
168
+ result["metadata"] = legacy.metadata
169
+
170
+ return result
171
+
172
+
173
+ def _warn_local_mcp(mcp_config: dict[str, Any] | None) -> None:
174
+ """Warn if mcp_config uses local MCP servers (command without url).
175
+
176
+ Local MCP servers can cause port conflicts when running tasks concurrently.
177
+ """
178
+ if not mcp_config:
179
+ return
180
+
181
+ has_local = any(
182
+ isinstance(server_cfg, dict) and "command" in server_cfg and not server_cfg.get("url")
183
+ for server_cfg in mcp_config.values()
184
+ if isinstance(server_cfg, dict)
185
+ )
186
+
187
+ if has_local:
188
+ warnings.warn(
189
+ "Task uses local MCP configuration (command without url). "
190
+ "This may cause port conflicts when running tasks concurrently. "
191
+ "Consider using remote MCP servers for parallel execution.",
192
+ UserWarning,
193
+ stacklevel=4,
194
+ )
@@ -0,0 +1,19 @@
1
+ """
2
+ HUD runtime patches for third-party libraries.
3
+
4
+ This module applies monkey-patches to fix issues in dependencies
5
+ without requiring forked packages.
6
+ """
7
+
8
+ from hud.patches.mcp_patches import apply_all_patches, suppress_fastmcp_logging
9
+ from hud.patches.warnings import apply_default_warning_filters, suppress_mcp_use_import_warnings
10
+
11
+ # Apply patches on import
12
+ apply_all_patches()
13
+
14
+ __all__ = [
15
+ "apply_all_patches",
16
+ "apply_default_warning_filters",
17
+ "suppress_fastmcp_logging",
18
+ "suppress_mcp_use_import_warnings",
19
+ ]
@@ -0,0 +1,308 @@
1
+ """
2
+ Runtime patches for the standard mcp package.
3
+
4
+ These patches apply fixes from the HUD fork without requiring a separate package.
5
+ Import this module early (e.g., in hud/__init__.py) to apply patches.
6
+ """
7
+
8
+ from __future__ import annotations
9
+
10
+ import logging
11
+ from typing import Any
12
+
13
+ logger = logging.getLogger(__name__)
14
+
15
+
16
+ def patch_streamable_http_error_handling() -> None:
17
+ """
18
+ Patch StreamableHTTPTransport.post_writer to handle request errors properly.
19
+
20
+ The original implementation doesn't catch errors in handle_request_async,
21
+ which can cause the client to hang indefinitely. This patch wraps the handler
22
+ to send a proper JSONRPCError response when transport errors occur (e.g.,
23
+ ReadTimeout), allowing the waiting caller to receive the error and fail
24
+ gracefully instead of hanging.
25
+ """
26
+ try:
27
+ from mcp.client.streamable_http import StreamableHTTPTransport
28
+
29
+ async def patched_post_writer(
30
+ self: Any,
31
+ client: Any,
32
+ write_stream_reader: Any,
33
+ read_stream_writer: Any,
34
+ write_stream: Any,
35
+ start_get_stream: Any,
36
+ tg: Any,
37
+ ) -> None:
38
+ import asyncio
39
+ import ssl
40
+ import time
41
+
42
+ import httpx
43
+ from mcp.client.streamable_http import RequestContext
44
+ from mcp.shared.message import ClientMessageMetadata, SessionMessage
45
+ from mcp.types import ErrorData, JSONRPCError, JSONRPCMessage, JSONRPCRequest
46
+
47
+ from hud.settings import settings
48
+
49
+ async def handle_request_async(ctx: RequestContext, is_resumption: bool) -> None:
50
+ msg = ctx.session_message.message
51
+ # Use configured timeout, minimum 30s to prevent instant failures
52
+ timeout = max(settings.client_timeout, 15.0)
53
+ deadline = time.monotonic() + timeout
54
+ retryable = (
55
+ httpx.ConnectError,
56
+ httpx.ReadError,
57
+ httpx.TimeoutException,
58
+ ssl.SSLError,
59
+ )
60
+
61
+ async def send_error_response(exc: Exception) -> None:
62
+ """Send an error response to the client."""
63
+ if isinstance(msg.root, JSONRPCRequest):
64
+ error_response = JSONRPCError(
65
+ jsonrpc="2.0",
66
+ id=msg.root.id,
67
+ error=ErrorData(
68
+ code=-32000,
69
+ message=f"Transport error: {type(exc).__name__}",
70
+ data={"error_type": type(exc).__name__, "detail": str(exc)},
71
+ ),
72
+ )
73
+ await ctx.read_stream_writer.send(
74
+ SessionMessage(JSONRPCMessage(error_response))
75
+ )
76
+ else:
77
+ await ctx.read_stream_writer.send(exc)
78
+
79
+ while True:
80
+ try:
81
+ if is_resumption:
82
+ await self._handle_resumption_request(ctx)
83
+ else:
84
+ await self._handle_post_request(ctx)
85
+ return
86
+ except retryable as e:
87
+ if time.monotonic() >= deadline:
88
+ logger.error("MCP request failed after timeout: %s", e)
89
+ await send_error_response(e)
90
+ return
91
+ logger.warning("Retrying MCP request after error: %s", e)
92
+ await asyncio.sleep(2.0)
93
+ except asyncio.CancelledError:
94
+ raise
95
+ except Exception as e:
96
+ logger.exception("Request handler error: %s", e)
97
+ await send_error_response(e)
98
+ return
99
+
100
+ try:
101
+ async with write_stream_reader:
102
+ async for session_message in write_stream_reader:
103
+ message = session_message.message
104
+ metadata = (
105
+ session_message.metadata
106
+ if isinstance(session_message.metadata, ClientMessageMetadata)
107
+ else None
108
+ )
109
+ is_resumption = bool(metadata and metadata.resumption_token)
110
+
111
+ logger.debug("Sending client message: %s", message)
112
+
113
+ if self._is_initialized_notification(message):
114
+ start_get_stream()
115
+
116
+ ctx = RequestContext(
117
+ client=client,
118
+ headers=self.request_headers,
119
+ session_id=self.session_id,
120
+ session_message=session_message,
121
+ metadata=metadata,
122
+ read_stream_writer=read_stream_writer,
123
+ sse_read_timeout=self.sse_read_timeout,
124
+ )
125
+
126
+ if isinstance(message.root, JSONRPCRequest):
127
+ tg.start_soon(handle_request_async, ctx, is_resumption)
128
+ else:
129
+ await handle_request_async(ctx, is_resumption)
130
+
131
+ except Exception:
132
+ logger.exception("Error in post_writer")
133
+ finally:
134
+ await read_stream_writer.aclose()
135
+ await write_stream.aclose()
136
+
137
+ StreamableHTTPTransport.post_writer = patched_post_writer
138
+ logger.debug("Patched StreamableHTTPTransport.post_writer")
139
+
140
+ except ImportError:
141
+ logger.debug("mcp.client.streamable_http not available, skipping patch")
142
+ except Exception as e:
143
+ logger.warning("Failed to patch streamable_http: %s", e)
144
+
145
+
146
+ def patch_client_session_validation() -> None:
147
+ """
148
+ Patch ClientSession to skip structured output validation.
149
+
150
+ The original validation is strict and raises errors for non-conforming
151
+ but usable responses. We replace it with a no-op.
152
+ """
153
+ try:
154
+ from mcp.client.session import ClientSession
155
+
156
+ async def noop_validate(self: Any, name: str, result: Any) -> None:
157
+ """Skip structured output validation entirely."""
158
+
159
+ ClientSession._validate_tool_result = noop_validate
160
+ logger.debug("Patched ClientSession._validate_tool_result to skip validation")
161
+
162
+ except ImportError:
163
+ logger.debug("mcp.client.session not available, skipping patch")
164
+ except Exception as e:
165
+ logger.warning("Failed to patch client session: %s", e)
166
+
167
+
168
+ def patch_server_output_validation() -> None:
169
+ """
170
+ Patch MCP server to skip structured output validation and auto-generate
171
+ structuredContent for FastMCP tools with x-fastmcp-wrap-result.
172
+ """
173
+ try:
174
+ import json
175
+
176
+ import mcp.types as types
177
+ from mcp.server.lowlevel.server import Server
178
+
179
+ def patched_call_tool(
180
+ self: Any, validate_input: bool = True, validate_output: bool = False
181
+ ) -> Any:
182
+ """Patched call_tool that skips output validation."""
183
+
184
+ def decorator(func: Any) -> Any:
185
+ async def handler(req: types.CallToolRequest) -> Any:
186
+ try:
187
+ tool_name = req.params.name
188
+ arguments = req.params.arguments or {}
189
+ tool = await self._get_cached_tool_definition(tool_name)
190
+
191
+ if validate_input and tool:
192
+ try:
193
+ import jsonschema
194
+
195
+ jsonschema.validate(instance=arguments, schema=tool.inputSchema)
196
+ except jsonschema.ValidationError as e:
197
+ return self._make_error_result(
198
+ f"Input validation error: {e.message}"
199
+ )
200
+
201
+ results = await func(tool_name, arguments)
202
+
203
+ # output normalization
204
+ unstructured_content: list[Any]
205
+ maybe_structured_content: dict[str, Any] | None
206
+ if isinstance(results, types.CallToolResult):
207
+ return types.ServerResult(results)
208
+ elif isinstance(results, tuple) and len(results) == 2:
209
+ unstructured_content, maybe_structured_content = results
210
+ elif isinstance(results, dict):
211
+ maybe_structured_content = results
212
+ text = json.dumps(results, indent=2)
213
+ unstructured_content = [types.TextContent(type="text", text=text)]
214
+ elif results is None:
215
+ # None means success with no content
216
+ unstructured_content = []
217
+ maybe_structured_content = None
218
+ elif isinstance(results, (str, bytes, bytearray, memoryview)):
219
+ # Handle string/bytes explicitly before iterable check
220
+ # (these are iterable but should not be split into chars/ints)
221
+ if isinstance(results, str):
222
+ text = results
223
+ elif isinstance(results, memoryview):
224
+ text = bytes(results).decode("utf-8", errors="replace")
225
+ else:
226
+ text = bytes(results).decode("utf-8", errors="replace")
227
+ unstructured_content = [types.TextContent(type="text", text=text)]
228
+ maybe_structured_content = None
229
+ elif isinstance(results, (int, float, bool)):
230
+ # Primitives -> string representation
231
+ unstructured_content = [
232
+ types.TextContent(type="text", text=str(results))
233
+ ]
234
+ maybe_structured_content = None
235
+ elif hasattr(results, "__iter__"):
236
+ unstructured_content = list(results)
237
+ maybe_structured_content = None
238
+ else:
239
+ return self._make_error_result(
240
+ f"Unexpected return type: {type(results).__name__}"
241
+ )
242
+
243
+ # Auto-generate structuredContent for FastMCP tools
244
+ # FastMCP generates outputSchema but doesn't populate it
245
+ if maybe_structured_content is None and tool:
246
+ output_schema = getattr(tool, "outputSchema", None)
247
+ if output_schema and output_schema.get("x-fastmcp-wrap-result"):
248
+ for item in unstructured_content:
249
+ if isinstance(item, types.TextContent):
250
+ try:
251
+ parsed = json.loads(item.text)
252
+ maybe_structured_content = {"result": parsed}
253
+ except json.JSONDecodeError:
254
+ maybe_structured_content = {"result": item.text}
255
+ break
256
+
257
+ return types.ServerResult(
258
+ types.CallToolResult(
259
+ content=list(unstructured_content),
260
+ structuredContent=maybe_structured_content,
261
+ isError=False,
262
+ )
263
+ )
264
+ except Exception as e:
265
+ return self._make_error_result(str(e))
266
+
267
+ self.request_handlers[types.CallToolRequest] = handler
268
+ return func
269
+
270
+ return decorator
271
+
272
+ Server.call_tool = patched_call_tool
273
+ logger.debug("Patched Server.call_tool to skip output validation")
274
+
275
+ except ImportError:
276
+ logger.debug("mcp.server.lowlevel.server not available, skipping patch")
277
+ except Exception as e:
278
+ logger.warning("Failed to patch server output validation: %s", e)
279
+
280
+
281
+ def suppress_fastmcp_logging(level: int = logging.WARNING) -> None:
282
+ """
283
+ Suppress verbose fastmcp logging.
284
+
285
+ FastMCP logs a lot of INFO-level messages that clutter output.
286
+ This sets all fastmcp loggers to the specified level.
287
+
288
+ Args:
289
+ level: Logging level to set (default: WARNING)
290
+ """
291
+ loggers_to_suppress = [
292
+ "fastmcp",
293
+ "fastmcp.server.server",
294
+ "fastmcp.server.openapi",
295
+ "fastmcp.tools.tool_manager",
296
+ ]
297
+ for logger_name in loggers_to_suppress:
298
+ logging.getLogger(logger_name).setLevel(level)
299
+ logger.debug("Suppressed fastmcp logging to level %s", level)
300
+
301
+
302
+ def apply_all_patches() -> None:
303
+ """Apply all MCP patches."""
304
+ patch_streamable_http_error_handling()
305
+ patch_client_session_validation()
306
+ patch_server_output_validation()
307
+ suppress_fastmcp_logging()
308
+ logger.debug("All MCP patches applied")
@@ -0,0 +1,54 @@
1
+ """
2
+ Centralized warning filters for noisy third-party dependencies.
3
+
4
+ Keep these helpers here so the rest of the codebase can stay clean and avoid
5
+ scattering warning filters across unrelated modules.
6
+ """
7
+
8
+ from __future__ import annotations
9
+
10
+ import warnings
11
+ from contextlib import contextmanager
12
+ from typing import TYPE_CHECKING
13
+
14
+ if TYPE_CHECKING:
15
+ from collections.abc import Iterator
16
+
17
+
18
+ def apply_default_warning_filters(*, verbose: bool) -> None:
19
+ """Apply our default warning filters for non-verbose CLI/server modes."""
20
+ if verbose:
21
+ return
22
+
23
+ warnings.filterwarnings("ignore", category=DeprecationWarning)
24
+
25
+ # Pydantic v2 emits PydanticDeprecatedSince20 for v1-style config usage in deps.
26
+ try:
27
+ from pydantic.warnings import PydanticDeprecatedSince20
28
+ except Exception:
29
+ return
30
+
31
+ warnings.filterwarnings("ignore", category=PydanticDeprecatedSince20)
32
+
33
+
34
+ @contextmanager
35
+ def suppress_mcp_use_import_warnings() -> Iterator[None]:
36
+ """Suppress known noisy warnings emitted during `mcp_use` imports."""
37
+ try:
38
+ from pydantic.warnings import PydanticDeprecatedSince20
39
+ except Exception: # pragma: no cover
40
+ PydanticDeprecatedSince20 = None # type: ignore[assignment]
41
+
42
+ with warnings.catch_warnings():
43
+ # mcp_use currently emits DeprecationWarning from its package __init__.py.
44
+ warnings.filterwarnings("ignore", category=DeprecationWarning, module=r"mcp_use(\..*)?$")
45
+
46
+ # mcp_use currently defines Pydantic v1-style `class Config` in oauth models.
47
+ if PydanticDeprecatedSince20 is not None:
48
+ warnings.filterwarnings(
49
+ "ignore",
50
+ category=PydanticDeprecatedSince20,
51
+ module=r"mcp_use\.client\.auth\.oauth$",
52
+ )
53
+
54
+ yield
hud/samples/browser.py CHANGED
@@ -7,17 +7,17 @@ from typing import Any
7
7
  from pydantic import Field
8
8
 
9
9
  from hud.settings import settings
10
- from hud.types import MCPToolCall, Task
10
+ from hud.types import LegacyTask, MCPToolCall
11
11
 
12
12
 
13
- class BrowserTask(Task):
14
- """Task subclass with browser defaults for BrowserTask(prompt=...)."""
13
+ class BrowserTask(LegacyTask):
14
+ """LegacyTask subclass with browser defaults for BrowserTask(prompt=...)."""
15
15
 
16
16
  prompt: str = "Open Google and be ready to search."
17
17
  mcp_config: dict[str, Any] = Field(
18
18
  default_factory=lambda: {
19
19
  "browser": {
20
- "url": "https://mcp.hud.so/v3/mcp",
20
+ "url": settings.hud_mcp_url,
21
21
  "headers": {
22
22
  "Authorization": f"Bearer {settings.api_key}",
23
23
  "Mcp-Image": "hudevals/hud-remote-browser:0.1.1",
hud/server/__init__.py CHANGED
@@ -1,5 +1,6 @@
1
1
  from __future__ import annotations
2
2
 
3
+ from .router import MCPRouter
3
4
  from .server import MCPServer
4
5
 
5
- __all__ = ["MCPServer"]
6
+ __all__ = ["MCPRouter", "MCPServer"]
hud/server/low_level.py CHANGED
@@ -89,11 +89,12 @@ class LowLevelServerWithInit(_BaseLL):
89
89
 
90
90
  def __init__(
91
91
  self,
92
+ fastmcp: Any,
92
93
  *args: Any,
93
94
  init_fn: Callable[[RequestContext], Awaitable[None]] | None = None,
94
95
  **kwargs: Any,
95
96
  ) -> None:
96
- super().__init__(*args, **kwargs)
97
+ super().__init__(fastmcp, *args, **kwargs)
97
98
  self._init_fn = init_fn
98
99
 
99
100
  async def run(