hud-python 0.4.45__py3-none-any.whl → 0.5.13__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (282) hide show
  1. hud/__init__.py +27 -7
  2. hud/agents/__init__.py +70 -5
  3. hud/agents/base.py +238 -500
  4. hud/agents/claude.py +236 -247
  5. hud/agents/gateway.py +42 -0
  6. hud/agents/gemini.py +264 -0
  7. hud/agents/gemini_cua.py +324 -0
  8. hud/agents/grounded_openai.py +98 -100
  9. hud/agents/misc/integration_test_agent.py +51 -20
  10. hud/agents/misc/response_agent.py +48 -36
  11. hud/agents/openai.py +282 -296
  12. hud/agents/{openai_chat_generic.py → openai_chat.py} +63 -33
  13. hud/agents/operator.py +199 -0
  14. hud/agents/resolver.py +70 -0
  15. hud/agents/tests/conftest.py +133 -0
  16. hud/agents/tests/test_base.py +300 -622
  17. hud/agents/tests/test_base_runtime.py +233 -0
  18. hud/agents/tests/test_claude.py +381 -214
  19. hud/agents/tests/test_client.py +9 -10
  20. hud/agents/tests/test_gemini.py +369 -0
  21. hud/agents/tests/test_grounded_openai_agent.py +65 -50
  22. hud/agents/tests/test_openai.py +377 -140
  23. hud/agents/tests/test_operator.py +362 -0
  24. hud/agents/tests/test_resolver.py +192 -0
  25. hud/agents/tests/test_run_eval.py +179 -0
  26. hud/agents/types.py +148 -0
  27. hud/cli/__init__.py +493 -546
  28. hud/cli/analyze.py +43 -5
  29. hud/cli/build.py +699 -113
  30. hud/cli/debug.py +8 -5
  31. hud/cli/dev.py +889 -732
  32. hud/cli/eval.py +793 -667
  33. hud/cli/flows/dev.py +167 -0
  34. hud/cli/flows/init.py +191 -0
  35. hud/cli/flows/tasks.py +153 -56
  36. hud/cli/flows/templates.py +151 -0
  37. hud/cli/flows/tests/__init__.py +1 -0
  38. hud/cli/flows/tests/test_dev.py +126 -0
  39. hud/cli/init.py +60 -58
  40. hud/cli/pull.py +1 -1
  41. hud/cli/push.py +38 -13
  42. hud/cli/rft.py +311 -0
  43. hud/cli/rft_status.py +145 -0
  44. hud/cli/tests/test_analyze.py +5 -5
  45. hud/cli/tests/test_analyze_metadata.py +3 -2
  46. hud/cli/tests/test_analyze_module.py +120 -0
  47. hud/cli/tests/test_build.py +110 -8
  48. hud/cli/tests/test_build_failure.py +41 -0
  49. hud/cli/tests/test_build_module.py +50 -0
  50. hud/cli/tests/test_cli_init.py +6 -1
  51. hud/cli/tests/test_cli_more_wrappers.py +30 -0
  52. hud/cli/tests/test_cli_root.py +140 -0
  53. hud/cli/tests/test_convert.py +361 -0
  54. hud/cli/tests/test_debug.py +12 -10
  55. hud/cli/tests/test_dev.py +197 -0
  56. hud/cli/tests/test_eval.py +251 -0
  57. hud/cli/tests/test_eval_bedrock.py +51 -0
  58. hud/cli/tests/test_init.py +124 -0
  59. hud/cli/tests/test_main_module.py +11 -5
  60. hud/cli/tests/test_mcp_server.py +12 -100
  61. hud/cli/tests/test_push.py +1 -1
  62. hud/cli/tests/test_push_happy.py +74 -0
  63. hud/cli/tests/test_push_wrapper.py +23 -0
  64. hud/cli/tests/test_registry.py +1 -1
  65. hud/cli/tests/test_utils.py +1 -1
  66. hud/cli/{rl → utils}/celebrate.py +14 -12
  67. hud/cli/utils/config.py +18 -1
  68. hud/cli/utils/docker.py +130 -4
  69. hud/cli/utils/env_check.py +9 -9
  70. hud/cli/utils/git.py +136 -0
  71. hud/cli/utils/interactive.py +39 -5
  72. hud/cli/utils/metadata.py +70 -1
  73. hud/cli/utils/runner.py +1 -1
  74. hud/cli/utils/server.py +2 -2
  75. hud/cli/utils/source_hash.py +3 -3
  76. hud/cli/utils/tasks.py +4 -1
  77. hud/cli/utils/tests/__init__.py +0 -0
  78. hud/cli/utils/tests/test_config.py +58 -0
  79. hud/cli/utils/tests/test_docker.py +93 -0
  80. hud/cli/utils/tests/test_docker_hints.py +71 -0
  81. hud/cli/utils/tests/test_env_check.py +74 -0
  82. hud/cli/utils/tests/test_environment.py +42 -0
  83. hud/cli/utils/tests/test_git.py +142 -0
  84. hud/cli/utils/tests/test_interactive_module.py +60 -0
  85. hud/cli/utils/tests/test_local_runner.py +50 -0
  86. hud/cli/utils/tests/test_logging_utils.py +23 -0
  87. hud/cli/utils/tests/test_metadata.py +49 -0
  88. hud/cli/utils/tests/test_package_runner.py +35 -0
  89. hud/cli/utils/tests/test_registry_utils.py +49 -0
  90. hud/cli/utils/tests/test_remote_runner.py +25 -0
  91. hud/cli/utils/tests/test_runner_modules.py +52 -0
  92. hud/cli/utils/tests/test_source_hash.py +36 -0
  93. hud/cli/utils/tests/test_tasks.py +80 -0
  94. hud/cli/utils/version_check.py +258 -0
  95. hud/cli/{rl → utils}/viewer.py +2 -2
  96. hud/clients/README.md +12 -11
  97. hud/clients/__init__.py +4 -3
  98. hud/clients/base.py +166 -26
  99. hud/clients/environment.py +51 -0
  100. hud/clients/fastmcp.py +13 -6
  101. hud/clients/mcp_use.py +45 -15
  102. hud/clients/tests/test_analyze_scenarios.py +206 -0
  103. hud/clients/tests/test_protocol.py +9 -3
  104. hud/datasets/__init__.py +23 -20
  105. hud/datasets/loader.py +326 -0
  106. hud/datasets/runner.py +198 -105
  107. hud/datasets/tests/__init__.py +0 -0
  108. hud/datasets/tests/test_loader.py +221 -0
  109. hud/datasets/tests/test_utils.py +315 -0
  110. hud/datasets/utils.py +270 -90
  111. hud/environment/__init__.py +52 -0
  112. hud/environment/connection.py +258 -0
  113. hud/environment/connectors/__init__.py +33 -0
  114. hud/environment/connectors/base.py +68 -0
  115. hud/environment/connectors/local.py +177 -0
  116. hud/environment/connectors/mcp_config.py +137 -0
  117. hud/environment/connectors/openai.py +101 -0
  118. hud/environment/connectors/remote.py +172 -0
  119. hud/environment/environment.py +835 -0
  120. hud/environment/integrations/__init__.py +45 -0
  121. hud/environment/integrations/adk.py +67 -0
  122. hud/environment/integrations/anthropic.py +196 -0
  123. hud/environment/integrations/gemini.py +92 -0
  124. hud/environment/integrations/langchain.py +82 -0
  125. hud/environment/integrations/llamaindex.py +68 -0
  126. hud/environment/integrations/openai.py +238 -0
  127. hud/environment/mock.py +306 -0
  128. hud/environment/router.py +263 -0
  129. hud/environment/scenarios.py +620 -0
  130. hud/environment/tests/__init__.py +1 -0
  131. hud/environment/tests/test_connection.py +317 -0
  132. hud/environment/tests/test_connectors.py +205 -0
  133. hud/environment/tests/test_environment.py +593 -0
  134. hud/environment/tests/test_integrations.py +257 -0
  135. hud/environment/tests/test_local_connectors.py +242 -0
  136. hud/environment/tests/test_scenarios.py +1086 -0
  137. hud/environment/tests/test_tools.py +208 -0
  138. hud/environment/types.py +23 -0
  139. hud/environment/utils/__init__.py +35 -0
  140. hud/environment/utils/formats.py +215 -0
  141. hud/environment/utils/schema.py +171 -0
  142. hud/environment/utils/tool_wrappers.py +113 -0
  143. hud/eval/__init__.py +67 -0
  144. hud/eval/context.py +727 -0
  145. hud/eval/display.py +299 -0
  146. hud/eval/instrument.py +187 -0
  147. hud/eval/manager.py +533 -0
  148. hud/eval/parallel.py +268 -0
  149. hud/eval/task.py +372 -0
  150. hud/eval/tests/__init__.py +1 -0
  151. hud/eval/tests/test_context.py +178 -0
  152. hud/eval/tests/test_eval.py +210 -0
  153. hud/eval/tests/test_manager.py +152 -0
  154. hud/eval/tests/test_parallel.py +168 -0
  155. hud/eval/tests/test_task.py +291 -0
  156. hud/eval/types.py +65 -0
  157. hud/eval/utils.py +194 -0
  158. hud/patches/__init__.py +19 -0
  159. hud/patches/mcp_patches.py +308 -0
  160. hud/patches/warnings.py +54 -0
  161. hud/samples/browser.py +4 -4
  162. hud/server/__init__.py +2 -1
  163. hud/server/low_level.py +2 -1
  164. hud/server/router.py +164 -0
  165. hud/server/server.py +567 -80
  166. hud/server/tests/test_mcp_server_integration.py +11 -11
  167. hud/server/tests/test_mcp_server_more.py +1 -1
  168. hud/server/tests/test_server_extra.py +2 -0
  169. hud/settings.py +45 -3
  170. hud/shared/exceptions.py +36 -10
  171. hud/shared/hints.py +26 -1
  172. hud/shared/requests.py +15 -3
  173. hud/shared/tests/test_exceptions.py +40 -31
  174. hud/shared/tests/test_hints.py +167 -0
  175. hud/telemetry/__init__.py +20 -19
  176. hud/telemetry/exporter.py +201 -0
  177. hud/telemetry/instrument.py +165 -253
  178. hud/telemetry/tests/test_eval_telemetry.py +356 -0
  179. hud/telemetry/tests/test_exporter.py +258 -0
  180. hud/telemetry/tests/test_instrument.py +401 -0
  181. hud/tools/__init__.py +18 -2
  182. hud/tools/agent.py +223 -0
  183. hud/tools/apply_patch.py +639 -0
  184. hud/tools/base.py +54 -4
  185. hud/tools/bash.py +2 -2
  186. hud/tools/computer/__init__.py +36 -3
  187. hud/tools/computer/anthropic.py +2 -2
  188. hud/tools/computer/gemini.py +385 -0
  189. hud/tools/computer/hud.py +23 -6
  190. hud/tools/computer/openai.py +20 -21
  191. hud/tools/computer/qwen.py +434 -0
  192. hud/tools/computer/settings.py +37 -0
  193. hud/tools/edit.py +3 -7
  194. hud/tools/executors/base.py +4 -2
  195. hud/tools/executors/pyautogui.py +1 -1
  196. hud/tools/grounding/grounded_tool.py +13 -18
  197. hud/tools/grounding/grounder.py +10 -31
  198. hud/tools/grounding/tests/test_grounded_tool.py +26 -44
  199. hud/tools/jupyter.py +330 -0
  200. hud/tools/playwright.py +18 -3
  201. hud/tools/shell.py +308 -0
  202. hud/tools/tests/test_agent_tool.py +355 -0
  203. hud/tools/tests/test_apply_patch.py +718 -0
  204. hud/tools/tests/test_computer.py +4 -9
  205. hud/tools/tests/test_computer_actions.py +24 -2
  206. hud/tools/tests/test_jupyter_tool.py +181 -0
  207. hud/tools/tests/test_shell.py +596 -0
  208. hud/tools/tests/test_submit.py +85 -0
  209. hud/tools/tests/test_types.py +193 -0
  210. hud/tools/types.py +21 -1
  211. hud/types.py +194 -56
  212. hud/utils/__init__.py +2 -0
  213. hud/utils/env.py +67 -0
  214. hud/utils/hud_console.py +89 -18
  215. hud/utils/mcp.py +15 -58
  216. hud/utils/strict_schema.py +162 -0
  217. hud/utils/tests/test_init.py +1 -2
  218. hud/utils/tests/test_mcp.py +1 -28
  219. hud/utils/tests/test_pretty_errors.py +186 -0
  220. hud/utils/tests/test_tool_shorthand.py +154 -0
  221. hud/utils/tests/test_version.py +1 -1
  222. hud/utils/types.py +20 -0
  223. hud/version.py +1 -1
  224. hud_python-0.5.13.dist-info/METADATA +264 -0
  225. hud_python-0.5.13.dist-info/RECORD +305 -0
  226. {hud_python-0.4.45.dist-info → hud_python-0.5.13.dist-info}/WHEEL +1 -1
  227. hud/agents/langchain.py +0 -261
  228. hud/agents/lite_llm.py +0 -72
  229. hud/cli/rl/__init__.py +0 -180
  230. hud/cli/rl/config.py +0 -101
  231. hud/cli/rl/display.py +0 -133
  232. hud/cli/rl/gpu.py +0 -63
  233. hud/cli/rl/gpu_utils.py +0 -321
  234. hud/cli/rl/local_runner.py +0 -595
  235. hud/cli/rl/presets.py +0 -96
  236. hud/cli/rl/remote_runner.py +0 -463
  237. hud/cli/rl/rl_api.py +0 -150
  238. hud/cli/rl/vllm.py +0 -177
  239. hud/cli/rl/wait_utils.py +0 -89
  240. hud/datasets/parallel.py +0 -687
  241. hud/misc/__init__.py +0 -1
  242. hud/misc/claude_plays_pokemon.py +0 -292
  243. hud/otel/__init__.py +0 -35
  244. hud/otel/collector.py +0 -142
  245. hud/otel/config.py +0 -181
  246. hud/otel/context.py +0 -570
  247. hud/otel/exporters.py +0 -369
  248. hud/otel/instrumentation.py +0 -135
  249. hud/otel/processors.py +0 -121
  250. hud/otel/tests/__init__.py +0 -1
  251. hud/otel/tests/test_processors.py +0 -197
  252. hud/rl/README.md +0 -30
  253. hud/rl/__init__.py +0 -1
  254. hud/rl/actor.py +0 -176
  255. hud/rl/buffer.py +0 -405
  256. hud/rl/chat_template.jinja +0 -101
  257. hud/rl/config.py +0 -192
  258. hud/rl/distributed.py +0 -132
  259. hud/rl/learner.py +0 -637
  260. hud/rl/tests/__init__.py +0 -1
  261. hud/rl/tests/test_learner.py +0 -186
  262. hud/rl/train.py +0 -382
  263. hud/rl/types.py +0 -101
  264. hud/rl/utils/start_vllm_server.sh +0 -30
  265. hud/rl/utils.py +0 -524
  266. hud/rl/vllm_adapter.py +0 -143
  267. hud/telemetry/job.py +0 -352
  268. hud/telemetry/replay.py +0 -74
  269. hud/telemetry/tests/test_replay.py +0 -40
  270. hud/telemetry/tests/test_trace.py +0 -63
  271. hud/telemetry/trace.py +0 -158
  272. hud/utils/agent_factories.py +0 -86
  273. hud/utils/async_utils.py +0 -65
  274. hud/utils/group_eval.py +0 -223
  275. hud/utils/progress.py +0 -149
  276. hud/utils/tasks.py +0 -127
  277. hud/utils/tests/test_async_utils.py +0 -173
  278. hud/utils/tests/test_progress.py +0 -261
  279. hud_python-0.4.45.dist-info/METADATA +0 -552
  280. hud_python-0.4.45.dist-info/RECORD +0 -228
  281. {hud_python-0.4.45.dist-info → hud_python-0.5.13.dist-info}/entry_points.txt +0 -0
  282. {hud_python-0.4.45.dist-info → hud_python-0.5.13.dist-info}/licenses/LICENSE +0 -0
@@ -0,0 +1,208 @@
1
+ """Tests for @env.tool() decorator and tool operations."""
2
+
3
+ from __future__ import annotations
4
+
5
+ import pytest
6
+
7
+ from hud.environment import Environment
8
+
9
+
10
+ class TestToolDecorator:
11
+ """Tests for @env.tool() decorator."""
12
+
13
+ def test_tool_registers_function(self) -> None:
14
+ """@env.tool registers the function in tool manager."""
15
+ env = Environment("test-env")
16
+
17
+ @env.tool()
18
+ def add(a: int, b: int) -> int:
19
+ """Add two numbers."""
20
+ return a + b
21
+
22
+ # Check tool was registered
23
+ tool_names = list(env._tool_manager._tools.keys())
24
+ assert "add" in tool_names
25
+
26
+ def test_tool_with_custom_name(self) -> None:
27
+ """@env.tool(name=...) uses custom name."""
28
+ env = Environment("test-env")
29
+
30
+ @env.tool(name="custom_add")
31
+ def add(a: int, b: int) -> int:
32
+ return a + b
33
+
34
+ tool_names = list(env._tool_manager._tools.keys())
35
+ assert "custom_add" in tool_names
36
+ assert "add" not in tool_names
37
+
38
+ def test_tool_preserves_docstring(self) -> None:
39
+ """@env.tool preserves function docstring as description."""
40
+ env = Environment("test-env")
41
+
42
+ @env.tool()
43
+ def greet(name: str) -> str:
44
+ """Greet someone by name."""
45
+ return f"Hello, {name}!"
46
+
47
+ tool = env._tool_manager._tools.get("greet")
48
+ assert tool is not None
49
+ assert "Greet someone by name" in (tool.description or "")
50
+
51
+ def test_tool_async_function(self) -> None:
52
+ """@env.tool works with async functions."""
53
+ env = Environment("test-env")
54
+
55
+ @env.tool()
56
+ async def fetch_data(url: str) -> str:
57
+ """Fetch data from URL."""
58
+ return f"Data from {url}"
59
+
60
+ tool_names = list(env._tool_manager._tools.keys())
61
+ assert "fetch_data" in tool_names
62
+
63
+ def test_tool_returns_function(self) -> None:
64
+ """@env.tool returns the original function."""
65
+ env = Environment("test-env")
66
+
67
+ @env.tool()
68
+ def add(a: int, b: int) -> int:
69
+ return a + b
70
+
71
+ # Should be able to call it directly
72
+ assert add(2, 3) == 5
73
+
74
+
75
+ class TestListTools:
76
+ """Tests for list_tools and as_tools."""
77
+
78
+ @pytest.mark.asyncio
79
+ async def test_as_tools_returns_registered_tools(self) -> None:
80
+ """as_tools returns list of registered MCP tools."""
81
+ env = Environment("test-env")
82
+
83
+ @env.tool()
84
+ def tool1() -> str:
85
+ return "1"
86
+
87
+ @env.tool()
88
+ def tool2() -> str:
89
+ return "2"
90
+
91
+ async with env:
92
+ tools = env.as_tools()
93
+ tool_names = [t.name for t in tools]
94
+ assert "tool1" in tool_names
95
+ assert "tool2" in tool_names
96
+
97
+ @pytest.mark.asyncio
98
+ async def test_as_tools_empty_when_no_tools(self) -> None:
99
+ """as_tools returns empty list when no tools registered."""
100
+ env = Environment("test-env")
101
+ async with env:
102
+ tools = env.as_tools()
103
+ # May have built-in _hud_submit tool
104
+ user_tools = [t for t in tools if not t.name.startswith("_")]
105
+ assert len(user_tools) == 0
106
+
107
+
108
+ class TestCallTool:
109
+ """Tests for call_tool method."""
110
+
111
+ @pytest.mark.asyncio
112
+ async def test_call_tool_executes_function(self) -> None:
113
+ """call_tool executes registered tool function."""
114
+ env = Environment("test-env")
115
+ executed = []
116
+
117
+ @env.tool()
118
+ def greet(name: str) -> str:
119
+ executed.append(name)
120
+ return f"Hello, {name}!"
121
+
122
+ async with env:
123
+ result = await env.call_tool("greet", name="Alice")
124
+
125
+ assert executed == ["Alice"]
126
+ assert result is not None
127
+
128
+ @pytest.mark.asyncio
129
+ async def test_call_tool_async_function(self) -> None:
130
+ """call_tool works with async tool functions."""
131
+ env = Environment("test-env")
132
+
133
+ @env.tool()
134
+ async def async_greet(name: str) -> str:
135
+ return f"Hello, {name}!"
136
+
137
+ async with env:
138
+ result = await env.call_tool("async_greet", name="Bob")
139
+
140
+ assert result is not None
141
+
142
+ @pytest.mark.asyncio
143
+ async def test_call_tool_not_found(self) -> None:
144
+ """call_tool raises for unknown tool."""
145
+ env = Environment("test-env")
146
+
147
+ async with env:
148
+ with pytest.raises(ValueError, match="Tool not found"):
149
+ await env.call_tool("nonexistent")
150
+
151
+
152
+ class TestMockMode:
153
+ """Tests for mock mode."""
154
+
155
+ def test_mock_mode_default_false(self) -> None:
156
+ """Mock mode is False by default."""
157
+ env = Environment("test-env")
158
+ assert env._mock_mode is False
159
+ assert env.is_mock is False
160
+
161
+ def test_mock_enables_mock_mode(self) -> None:
162
+ """mock() enables mock mode."""
163
+ env = Environment("test-env")
164
+ env.mock()
165
+ assert env._mock_mode is True
166
+ assert env.is_mock is True
167
+
168
+ def test_unmock_disables_mock_mode(self) -> None:
169
+ """unmock() disables mock mode."""
170
+ env = Environment("test-env")
171
+ env.mock()
172
+ env.unmock()
173
+ assert env._mock_mode is False
174
+
175
+ def test_mock_returns_self_for_chaining(self) -> None:
176
+ """mock() returns self for chaining."""
177
+ env = Environment("test-env")
178
+ result = env.mock()
179
+ assert result is env
180
+
181
+ def test_mock_tool_sets_custom_output(self) -> None:
182
+ """mock_tool() sets custom output for a tool."""
183
+ env = Environment("test-env")
184
+ env.mock_tool("navigate", "Custom result")
185
+ assert env._mock_outputs["navigate"] == "Custom result"
186
+
187
+ @pytest.mark.asyncio
188
+ async def test_mock_mode_returns_mock_response(self) -> None:
189
+ """Mock mode returns mock response instead of executing tool."""
190
+ env = Environment("test-env")
191
+ call_count = 0
192
+
193
+ @env.tool()
194
+ def real_tool() -> str:
195
+ nonlocal call_count
196
+ call_count += 1
197
+ return "real result"
198
+
199
+ env.mock()
200
+ env.mock_tool("real_tool", "mocked result")
201
+
202
+ async with env:
203
+ result = await env.call_tool("real_tool")
204
+
205
+ # Tool should not be called in mock mode
206
+ assert call_count == 0
207
+ # Should get the mock result
208
+ assert result is not None
@@ -0,0 +1,23 @@
1
+ """Environment types for configuration and tracing."""
2
+
3
+ from __future__ import annotations
4
+
5
+ from pydantic import BaseModel, Field
6
+
7
+ __all__ = ["EnvConfig"]
8
+
9
+
10
+ class EnvConfig(BaseModel):
11
+ """Environment configuration for Tasks.
12
+
13
+ Specifies which hub to connect to and optional tool filtering.
14
+
15
+ Attributes:
16
+ name: Hub name to connect via connect_hub() (e.g., "browser", "sheets")
17
+ include: Optional whitelist of tool names to include
18
+ exclude: Optional blacklist of tool names to exclude
19
+ """
20
+
21
+ name: str = Field(description="Hub name to connect to")
22
+ include: list[str] | None = Field(default=None, description="Whitelist of tool names")
23
+ exclude: list[str] | None = Field(default=None, description="Blacklist of tool names")
@@ -0,0 +1,35 @@
1
+ """Environment utilities."""
2
+
3
+ from hud.environment.utils.formats import (
4
+ ToolFormat,
5
+ format_result,
6
+ parse_tool_call,
7
+ parse_tool_calls,
8
+ result_to_string,
9
+ )
10
+ from hud.environment.utils.schema import (
11
+ ensure_strict_schema,
12
+ json_type_to_python,
13
+ schema_to_pydantic,
14
+ )
15
+ from hud.environment.utils.tool_wrappers import (
16
+ create_async_tool_fn,
17
+ create_sync_tool_fn,
18
+ create_tool_fns,
19
+ stringify_result,
20
+ )
21
+
22
+ __all__ = [
23
+ "ToolFormat",
24
+ "create_async_tool_fn",
25
+ "create_sync_tool_fn",
26
+ "create_tool_fns",
27
+ "ensure_strict_schema",
28
+ "format_result",
29
+ "json_type_to_python",
30
+ "parse_tool_call",
31
+ "parse_tool_calls",
32
+ "result_to_string",
33
+ "schema_to_pydantic",
34
+ "stringify_result",
35
+ ]
@@ -0,0 +1,215 @@
1
+ """Tool format parsing and conversion for OpenAI, Claude, Gemini, and MCP."""
2
+
3
+ from __future__ import annotations
4
+
5
+ import json
6
+ from enum import Enum, auto
7
+ from typing import Any
8
+
9
+ from hud.types import MCPToolCall, MCPToolResult
10
+
11
+ __all__ = [
12
+ "ToolFormat",
13
+ "format_result",
14
+ "parse_tool_call",
15
+ "parse_tool_calls",
16
+ "result_to_string",
17
+ ]
18
+
19
+
20
+ class ToolFormat(Enum):
21
+ """Detected tool call format."""
22
+
23
+ OPENAI = auto() # function.arguments as JSON string
24
+ CLAUDE = auto() # type="tool_use", input as dict
25
+ GEMINI = auto() # functionCall with args
26
+ MCP = auto() # name + arguments
27
+
28
+
29
+ # -----------------------------------------------------------------------------
30
+ # Parsing
31
+ # -----------------------------------------------------------------------------
32
+
33
+
34
+ def _to_dict(obj: Any) -> dict[str, Any]:
35
+ """Convert object to dict for uniform processing."""
36
+ if isinstance(obj, dict):
37
+ return obj
38
+ if hasattr(obj, "model_dump"):
39
+ return obj.model_dump()
40
+ if hasattr(obj, "__dict__"):
41
+ return vars(obj)
42
+ raise ValueError(f"Cannot convert {type(obj).__name__} to dict")
43
+
44
+
45
+ def _parse_json_args(args: Any) -> dict[str, Any]:
46
+ """Parse arguments, handling JSON strings."""
47
+ if not args:
48
+ return {}
49
+ if isinstance(args, str):
50
+ try:
51
+ return json.loads(args)
52
+ except json.JSONDecodeError:
53
+ return {}
54
+ return args
55
+
56
+
57
+ def parse_tool_call(call: Any, **kwargs: Any) -> tuple[MCPToolCall, ToolFormat]:
58
+ """Parse any tool call format into (MCPToolCall, ToolFormat).
59
+
60
+ Supports:
61
+ - String (tool name only, or with kwargs)
62
+ - Tuple: (name,), (name, args), (name, args, id)
63
+ - MCPToolCall
64
+ - OpenAI: {function: {name, arguments}, id}
65
+ - Claude: {type: "tool_use", name, input, id}
66
+ - Gemini: {functionCall: {name, args}} or {name, args}
67
+ - Generic: {name, arguments}
68
+
69
+ Args:
70
+ call: Tool call in any supported format.
71
+ **kwargs: Additional arguments (merged when call is a string).
72
+
73
+ Returns:
74
+ Tuple of (MCPToolCall, ToolFormat) for the parsed call.
75
+
76
+ Raises:
77
+ ValueError: If format is unrecognized.
78
+ """
79
+ # Primitives
80
+ if isinstance(call, str):
81
+ return MCPToolCall(name=call, arguments=kwargs or {}), ToolFormat.MCP
82
+
83
+ if isinstance(call, tuple):
84
+ tc = MCPToolCall(name=call[0], arguments=call[1] if len(call) > 1 else {})
85
+ if len(call) > 2:
86
+ tc.id = call[2]
87
+ return tc, ToolFormat.MCP
88
+
89
+ if isinstance(call, MCPToolCall):
90
+ return call, ToolFormat.MCP
91
+
92
+ # Convert to dict
93
+ d = _to_dict(call)
94
+
95
+ # OpenAI: {function: {name, arguments}, id}
96
+ if "function" in d:
97
+ f = _to_dict(d["function"]) if not isinstance(d["function"], dict) else d["function"]
98
+ tc = MCPToolCall(name=f["name"], arguments=_parse_json_args(f.get("arguments")))
99
+ if d.get("id"):
100
+ tc.id = d["id"]
101
+ return tc, ToolFormat.OPENAI
102
+
103
+ # Claude: {type: "tool_use", name, input, id}
104
+ if d.get("type") == "tool_use":
105
+ tc = MCPToolCall(name=d["name"], arguments=d.get("input") or {})
106
+ if d.get("id"):
107
+ tc.id = d["id"]
108
+ return tc, ToolFormat.CLAUDE
109
+
110
+ # Gemini: {functionCall: {name, args}} or {name, args}
111
+ if "functionCall" in d:
112
+ fc = d["functionCall"]
113
+ return MCPToolCall(name=fc["name"], arguments=fc.get("args") or {}), ToolFormat.GEMINI
114
+
115
+ if "args" in d and "name" in d and "arguments" not in d:
116
+ return MCPToolCall(name=d["name"], arguments=d.get("args") or {}), ToolFormat.GEMINI
117
+
118
+ # Generic: {name, arguments/input}
119
+ if "name" in d:
120
+ tc = MCPToolCall(name=d["name"], arguments=d.get("arguments") or d.get("input") or {})
121
+ if d.get("id"):
122
+ tc.id = d["id"]
123
+ return tc, ToolFormat.MCP
124
+
125
+ raise ValueError(f"Unrecognized tool call format: {list(d.keys())}")
126
+
127
+
128
+ def _is_tool_block(item: Any) -> bool:
129
+ """Check if item is a tool call (not text/other content)."""
130
+ t = item.get("type") if isinstance(item, dict) else getattr(item, "type", None)
131
+ return t is None or t in ("tool_use", "function")
132
+
133
+
134
+ def parse_tool_calls(calls: Any) -> list[tuple[MCPToolCall, ToolFormat]]:
135
+ """Parse multiple tool calls, filtering non-tool content (e.g. Claude TextBlock).
136
+
137
+ Args:
138
+ calls: Single call or list of calls in any format.
139
+
140
+ Returns:
141
+ List of (MCPToolCall, ToolFormat) tuples.
142
+ """
143
+ if calls is None:
144
+ return []
145
+ if not isinstance(calls, list):
146
+ try:
147
+ return [parse_tool_call(calls)]
148
+ except ValueError:
149
+ return []
150
+
151
+ results = []
152
+ for item in calls:
153
+ if not _is_tool_block(item):
154
+ continue
155
+ try:
156
+ results.append(parse_tool_call(item))
157
+ except ValueError:
158
+ continue
159
+ return results
160
+
161
+
162
+ # -----------------------------------------------------------------------------
163
+ # Result Formatting
164
+ # -----------------------------------------------------------------------------
165
+
166
+
167
+ def result_to_string(result: MCPToolResult) -> str:
168
+ """Convert MCPToolResult content to string.
169
+
170
+ Args:
171
+ result: MCP tool result with content blocks.
172
+
173
+ Returns:
174
+ String representation of the result content.
175
+ """
176
+ if not result.content:
177
+ return ""
178
+ parts = []
179
+ for block in result.content:
180
+ if (text := getattr(block, "text", None)) is not None:
181
+ parts.append(str(text))
182
+ elif (data := getattr(block, "data", None)) is not None:
183
+ parts.append(f"[binary: {len(data)} bytes]")
184
+ return "\n".join(parts)
185
+
186
+
187
+ def format_result(result: MCPToolResult, tc: MCPToolCall, fmt: ToolFormat) -> Any:
188
+ """Format MCPToolResult based on the input format.
189
+
190
+ Args:
191
+ result: MCP tool result.
192
+ tc: Original tool call (for id/name).
193
+ fmt: Target format.
194
+
195
+ Returns:
196
+ OpenAI: {"role": "tool", "tool_call_id": ..., "content": ...}
197
+ Claude: {"type": "tool_result", "tool_use_id": ..., "content": ..., "is_error"?: bool}
198
+ Gemini: {"functionResponse": {"name": ..., "response": {"result": ...}}}
199
+ MCP: MCPToolResult unchanged
200
+ """
201
+ content = result_to_string(result)
202
+
203
+ if fmt == ToolFormat.OPENAI:
204
+ return {"role": "tool", "tool_call_id": tc.id, "content": content}
205
+
206
+ if fmt == ToolFormat.CLAUDE:
207
+ r: dict[str, Any] = {"type": "tool_result", "tool_use_id": tc.id, "content": content}
208
+ if result.isError:
209
+ r["is_error"] = True
210
+ return r
211
+
212
+ if fmt == ToolFormat.GEMINI:
213
+ return {"functionResponse": {"name": tc.name, "response": {"result": content}}}
214
+
215
+ return result # MCP format - return as-is
@@ -0,0 +1,171 @@
1
+ """Schema utilities for tool definitions."""
2
+
3
+ from __future__ import annotations
4
+
5
+ import logging
6
+ from typing import Any
7
+
8
+ __all__ = [
9
+ "ensure_strict_schema",
10
+ "json_type_to_python",
11
+ "schema_to_pydantic",
12
+ "validate_openai_schema",
13
+ ]
14
+
15
+ logger = logging.getLogger(__name__)
16
+
17
+
18
+ def ensure_strict_schema(schema: dict[str, Any]) -> dict[str, Any]:
19
+ """Ensure a JSON schema is compatible with OpenAI's strict mode.
20
+
21
+ OpenAI strict mode requires:
22
+ - additionalProperties: false on all objects
23
+ - All properties must be in required
24
+
25
+ Args:
26
+ schema: Original JSON schema.
27
+
28
+ Returns:
29
+ Modified schema for strict mode.
30
+ """
31
+ schema = dict(schema)
32
+
33
+ if schema.get("type") == "object":
34
+ schema["additionalProperties"] = False
35
+
36
+ if "properties" in schema:
37
+ # All properties must be required
38
+ schema["required"] = list(schema["properties"].keys())
39
+
40
+ # Recursively process nested objects
41
+ for prop_schema in schema["properties"].values():
42
+ if isinstance(prop_schema, dict):
43
+ _ensure_strict_recursive(prop_schema)
44
+
45
+ return schema
46
+
47
+
48
+ def _ensure_strict_recursive(schema: dict[str, Any]) -> None:
49
+ """Recursively apply strict mode to nested schemas."""
50
+ if schema.get("type") == "object":
51
+ schema["additionalProperties"] = False
52
+ if "properties" in schema:
53
+ schema["required"] = list(schema["properties"].keys())
54
+ for prop_schema in schema["properties"].values():
55
+ if isinstance(prop_schema, dict):
56
+ _ensure_strict_recursive(prop_schema)
57
+
58
+ elif schema.get("type") == "array" and "items" in schema:
59
+ if isinstance(schema["items"], dict):
60
+ _ensure_strict_recursive(schema["items"])
61
+
62
+
63
+ def schema_to_pydantic(name: str, schema: dict[str, Any]) -> type:
64
+ """Convert JSON schema to a Pydantic model.
65
+
66
+ Args:
67
+ name: Model name (used for class name).
68
+ schema: JSON schema with properties.
69
+
70
+ Returns:
71
+ Dynamically created Pydantic model class.
72
+ """
73
+ from pydantic import Field, create_model
74
+
75
+ properties = schema.get("properties", {})
76
+ required = set(schema.get("required", []))
77
+
78
+ fields = {}
79
+ for prop_name, prop_schema in properties.items():
80
+ prop_type = json_type_to_python(prop_schema.get("type", "string"))
81
+ default = ... if prop_name in required else None
82
+ description = prop_schema.get("description", "")
83
+ fields[prop_name] = (prop_type, Field(default=default, description=description))
84
+
85
+ return create_model(f"{name}Input", **fields)
86
+
87
+
88
+ def json_type_to_python(json_type: str) -> type:
89
+ """Map JSON schema type to Python type.
90
+
91
+ Args:
92
+ json_type: JSON schema type string.
93
+
94
+ Returns:
95
+ Corresponding Python type.
96
+ """
97
+ mapping = {
98
+ "string": str,
99
+ "integer": int,
100
+ "number": float,
101
+ "boolean": bool,
102
+ "array": list,
103
+ "object": dict,
104
+ }
105
+ return mapping.get(json_type, str)
106
+
107
+
108
+ def validate_openai_schema(
109
+ schema: dict[str, Any],
110
+ tool_name: str = "unknown",
111
+ path: str = "",
112
+ ) -> list[str]:
113
+ """Validate a JSON schema for OpenAI API compatibility.
114
+
115
+ OpenAI's API has specific requirements for tool schemas:
116
+ - Arrays must have 'items' (not 'prefixItems' which tuples generate)
117
+ - Certain schema features like 'prefixItems' are not supported
118
+
119
+ Args:
120
+ schema: JSON schema to validate.
121
+ tool_name: Name of the tool (for error messages).
122
+ path: Current path in schema (for error context).
123
+
124
+ Returns:
125
+ List of validation error messages. Empty if valid.
126
+ """
127
+ errors: list[str] = []
128
+
129
+ if not isinstance(schema, dict):
130
+ return errors
131
+
132
+ # Check for prefixItems (generated by tuple types)
133
+ if "prefixItems" in schema:
134
+ errors.append(
135
+ f"Tool '{tool_name}' has 'prefixItems' at {path or 'root'} "
136
+ "(likely from tuple type). Use list[Model] instead of tuple."
137
+ )
138
+
139
+ # Check arrays have 'items'
140
+ if schema.get("type") == "array" and "items" not in schema and "prefixItems" not in schema:
141
+ errors.append(
142
+ f"Tool '{tool_name}' has array at {path or 'root'} without 'items'. "
143
+ "OpenAI requires 'items' for array schemas."
144
+ )
145
+
146
+ # Recursively check nested schemas
147
+ # Check properties
148
+ if "properties" in schema:
149
+ for prop_name, prop_schema in schema["properties"].items():
150
+ prop_path = f"{path}.{prop_name}" if path else prop_name
151
+ errors.extend(validate_openai_schema(prop_schema, tool_name, prop_path))
152
+
153
+ # Check items
154
+ if "items" in schema and isinstance(schema["items"], dict):
155
+ items_path = f"{path}[items]" if path else "[items]"
156
+ errors.extend(validate_openai_schema(schema["items"], tool_name, items_path))
157
+
158
+ # Check anyOf/oneOf/allOf
159
+ for key in ("anyOf", "oneOf", "allOf"):
160
+ if key in schema:
161
+ for i, sub_schema in enumerate(schema[key]):
162
+ sub_path = f"{path}.{key}[{i}]" if path else f"{key}[{i}]"
163
+ errors.extend(validate_openai_schema(sub_schema, tool_name, sub_path))
164
+
165
+ # Check $defs (definitions)
166
+ if "$defs" in schema:
167
+ for def_name, def_schema in schema["$defs"].items():
168
+ def_path = f"$defs.{def_name}"
169
+ errors.extend(validate_openai_schema(def_schema, tool_name, def_path))
170
+
171
+ return errors