hud-python 0.4.45__py3-none-any.whl → 0.5.1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (274) hide show
  1. hud/__init__.py +27 -7
  2. hud/agents/__init__.py +11 -5
  3. hud/agents/base.py +220 -500
  4. hud/agents/claude.py +200 -240
  5. hud/agents/gemini.py +275 -0
  6. hud/agents/gemini_cua.py +335 -0
  7. hud/agents/grounded_openai.py +98 -100
  8. hud/agents/misc/integration_test_agent.py +51 -20
  9. hud/agents/misc/response_agent.py +41 -36
  10. hud/agents/openai.py +291 -292
  11. hud/agents/{openai_chat_generic.py → openai_chat.py} +80 -34
  12. hud/agents/operator.py +211 -0
  13. hud/agents/tests/conftest.py +133 -0
  14. hud/agents/tests/test_base.py +300 -622
  15. hud/agents/tests/test_base_runtime.py +233 -0
  16. hud/agents/tests/test_claude.py +379 -210
  17. hud/agents/tests/test_client.py +9 -10
  18. hud/agents/tests/test_gemini.py +369 -0
  19. hud/agents/tests/test_grounded_openai_agent.py +65 -50
  20. hud/agents/tests/test_openai.py +376 -140
  21. hud/agents/tests/test_operator.py +362 -0
  22. hud/agents/tests/test_run_eval.py +179 -0
  23. hud/cli/__init__.py +461 -545
  24. hud/cli/analyze.py +43 -5
  25. hud/cli/build.py +664 -110
  26. hud/cli/debug.py +8 -5
  27. hud/cli/dev.py +882 -734
  28. hud/cli/eval.py +782 -668
  29. hud/cli/flows/dev.py +167 -0
  30. hud/cli/flows/init.py +191 -0
  31. hud/cli/flows/tasks.py +153 -56
  32. hud/cli/flows/templates.py +151 -0
  33. hud/cli/flows/tests/__init__.py +1 -0
  34. hud/cli/flows/tests/test_dev.py +126 -0
  35. hud/cli/init.py +60 -58
  36. hud/cli/push.py +29 -11
  37. hud/cli/rft.py +311 -0
  38. hud/cli/rft_status.py +145 -0
  39. hud/cli/tests/test_analyze.py +5 -5
  40. hud/cli/tests/test_analyze_metadata.py +3 -2
  41. hud/cli/tests/test_analyze_module.py +120 -0
  42. hud/cli/tests/test_build.py +108 -6
  43. hud/cli/tests/test_build_failure.py +41 -0
  44. hud/cli/tests/test_build_module.py +50 -0
  45. hud/cli/tests/test_cli_init.py +6 -1
  46. hud/cli/tests/test_cli_more_wrappers.py +30 -0
  47. hud/cli/tests/test_cli_root.py +140 -0
  48. hud/cli/tests/test_convert.py +361 -0
  49. hud/cli/tests/test_debug.py +12 -10
  50. hud/cli/tests/test_dev.py +197 -0
  51. hud/cli/tests/test_eval.py +251 -0
  52. hud/cli/tests/test_eval_bedrock.py +51 -0
  53. hud/cli/tests/test_init.py +124 -0
  54. hud/cli/tests/test_main_module.py +11 -5
  55. hud/cli/tests/test_mcp_server.py +12 -100
  56. hud/cli/tests/test_push_happy.py +74 -0
  57. hud/cli/tests/test_push_wrapper.py +23 -0
  58. hud/cli/tests/test_registry.py +1 -1
  59. hud/cli/tests/test_utils.py +1 -1
  60. hud/cli/{rl → utils}/celebrate.py +14 -12
  61. hud/cli/utils/config.py +18 -1
  62. hud/cli/utils/docker.py +130 -4
  63. hud/cli/utils/env_check.py +9 -9
  64. hud/cli/utils/git.py +136 -0
  65. hud/cli/utils/interactive.py +39 -5
  66. hud/cli/utils/metadata.py +69 -0
  67. hud/cli/utils/runner.py +1 -1
  68. hud/cli/utils/server.py +2 -2
  69. hud/cli/utils/source_hash.py +3 -3
  70. hud/cli/utils/tasks.py +4 -1
  71. hud/cli/utils/tests/__init__.py +0 -0
  72. hud/cli/utils/tests/test_config.py +58 -0
  73. hud/cli/utils/tests/test_docker.py +93 -0
  74. hud/cli/utils/tests/test_docker_hints.py +71 -0
  75. hud/cli/utils/tests/test_env_check.py +74 -0
  76. hud/cli/utils/tests/test_environment.py +42 -0
  77. hud/cli/utils/tests/test_git.py +142 -0
  78. hud/cli/utils/tests/test_interactive_module.py +60 -0
  79. hud/cli/utils/tests/test_local_runner.py +50 -0
  80. hud/cli/utils/tests/test_logging_utils.py +23 -0
  81. hud/cli/utils/tests/test_metadata.py +49 -0
  82. hud/cli/utils/tests/test_package_runner.py +35 -0
  83. hud/cli/utils/tests/test_registry_utils.py +49 -0
  84. hud/cli/utils/tests/test_remote_runner.py +25 -0
  85. hud/cli/utils/tests/test_runner_modules.py +52 -0
  86. hud/cli/utils/tests/test_source_hash.py +36 -0
  87. hud/cli/utils/tests/test_tasks.py +80 -0
  88. hud/cli/utils/version_check.py +258 -0
  89. hud/cli/{rl → utils}/viewer.py +2 -2
  90. hud/clients/README.md +12 -11
  91. hud/clients/__init__.py +4 -3
  92. hud/clients/base.py +166 -26
  93. hud/clients/environment.py +51 -0
  94. hud/clients/fastmcp.py +13 -6
  95. hud/clients/mcp_use.py +40 -15
  96. hud/clients/tests/test_analyze_scenarios.py +206 -0
  97. hud/clients/tests/test_protocol.py +9 -3
  98. hud/datasets/__init__.py +23 -20
  99. hud/datasets/loader.py +327 -0
  100. hud/datasets/runner.py +192 -105
  101. hud/datasets/tests/__init__.py +0 -0
  102. hud/datasets/tests/test_loader.py +221 -0
  103. hud/datasets/tests/test_utils.py +315 -0
  104. hud/datasets/utils.py +270 -90
  105. hud/environment/__init__.py +50 -0
  106. hud/environment/connection.py +206 -0
  107. hud/environment/connectors/__init__.py +33 -0
  108. hud/environment/connectors/base.py +68 -0
  109. hud/environment/connectors/local.py +177 -0
  110. hud/environment/connectors/mcp_config.py +109 -0
  111. hud/environment/connectors/openai.py +101 -0
  112. hud/environment/connectors/remote.py +172 -0
  113. hud/environment/environment.py +694 -0
  114. hud/environment/integrations/__init__.py +45 -0
  115. hud/environment/integrations/adk.py +67 -0
  116. hud/environment/integrations/anthropic.py +196 -0
  117. hud/environment/integrations/gemini.py +92 -0
  118. hud/environment/integrations/langchain.py +82 -0
  119. hud/environment/integrations/llamaindex.py +68 -0
  120. hud/environment/integrations/openai.py +238 -0
  121. hud/environment/mock.py +306 -0
  122. hud/environment/router.py +112 -0
  123. hud/environment/scenarios.py +493 -0
  124. hud/environment/tests/__init__.py +1 -0
  125. hud/environment/tests/test_connection.py +317 -0
  126. hud/environment/tests/test_connectors.py +218 -0
  127. hud/environment/tests/test_environment.py +161 -0
  128. hud/environment/tests/test_integrations.py +257 -0
  129. hud/environment/tests/test_local_connectors.py +201 -0
  130. hud/environment/tests/test_scenarios.py +280 -0
  131. hud/environment/tests/test_tools.py +208 -0
  132. hud/environment/types.py +23 -0
  133. hud/environment/utils/__init__.py +35 -0
  134. hud/environment/utils/formats.py +215 -0
  135. hud/environment/utils/schema.py +171 -0
  136. hud/environment/utils/tool_wrappers.py +113 -0
  137. hud/eval/__init__.py +67 -0
  138. hud/eval/context.py +674 -0
  139. hud/eval/display.py +299 -0
  140. hud/eval/instrument.py +185 -0
  141. hud/eval/manager.py +466 -0
  142. hud/eval/parallel.py +268 -0
  143. hud/eval/task.py +340 -0
  144. hud/eval/tests/__init__.py +1 -0
  145. hud/eval/tests/test_context.py +178 -0
  146. hud/eval/tests/test_eval.py +210 -0
  147. hud/eval/tests/test_manager.py +152 -0
  148. hud/eval/tests/test_parallel.py +168 -0
  149. hud/eval/tests/test_task.py +145 -0
  150. hud/eval/types.py +63 -0
  151. hud/eval/utils.py +183 -0
  152. hud/patches/__init__.py +19 -0
  153. hud/patches/mcp_patches.py +151 -0
  154. hud/patches/warnings.py +54 -0
  155. hud/samples/browser.py +4 -4
  156. hud/server/__init__.py +2 -1
  157. hud/server/low_level.py +2 -1
  158. hud/server/router.py +164 -0
  159. hud/server/server.py +567 -80
  160. hud/server/tests/test_mcp_server_integration.py +11 -11
  161. hud/server/tests/test_mcp_server_more.py +1 -1
  162. hud/server/tests/test_server_extra.py +2 -0
  163. hud/settings.py +45 -3
  164. hud/shared/exceptions.py +36 -10
  165. hud/shared/hints.py +26 -1
  166. hud/shared/requests.py +15 -3
  167. hud/shared/tests/test_exceptions.py +40 -31
  168. hud/shared/tests/test_hints.py +167 -0
  169. hud/telemetry/__init__.py +20 -19
  170. hud/telemetry/exporter.py +201 -0
  171. hud/telemetry/instrument.py +158 -253
  172. hud/telemetry/tests/test_eval_telemetry.py +356 -0
  173. hud/telemetry/tests/test_exporter.py +258 -0
  174. hud/telemetry/tests/test_instrument.py +401 -0
  175. hud/tools/__init__.py +16 -2
  176. hud/tools/apply_patch.py +639 -0
  177. hud/tools/base.py +54 -4
  178. hud/tools/bash.py +2 -2
  179. hud/tools/computer/__init__.py +4 -0
  180. hud/tools/computer/anthropic.py +2 -2
  181. hud/tools/computer/gemini.py +385 -0
  182. hud/tools/computer/hud.py +23 -6
  183. hud/tools/computer/openai.py +20 -21
  184. hud/tools/computer/qwen.py +434 -0
  185. hud/tools/computer/settings.py +37 -0
  186. hud/tools/edit.py +3 -7
  187. hud/tools/executors/base.py +4 -2
  188. hud/tools/executors/pyautogui.py +1 -1
  189. hud/tools/grounding/grounded_tool.py +13 -18
  190. hud/tools/grounding/grounder.py +10 -31
  191. hud/tools/grounding/tests/test_grounded_tool.py +26 -44
  192. hud/tools/jupyter.py +330 -0
  193. hud/tools/playwright.py +18 -3
  194. hud/tools/shell.py +308 -0
  195. hud/tools/tests/test_apply_patch.py +718 -0
  196. hud/tools/tests/test_computer.py +4 -9
  197. hud/tools/tests/test_computer_actions.py +24 -2
  198. hud/tools/tests/test_jupyter_tool.py +181 -0
  199. hud/tools/tests/test_shell.py +596 -0
  200. hud/tools/tests/test_submit.py +85 -0
  201. hud/tools/tests/test_types.py +193 -0
  202. hud/tools/types.py +21 -1
  203. hud/types.py +167 -57
  204. hud/utils/__init__.py +2 -0
  205. hud/utils/env.py +67 -0
  206. hud/utils/hud_console.py +61 -3
  207. hud/utils/mcp.py +15 -58
  208. hud/utils/strict_schema.py +162 -0
  209. hud/utils/tests/test_init.py +1 -2
  210. hud/utils/tests/test_mcp.py +1 -28
  211. hud/utils/tests/test_pretty_errors.py +186 -0
  212. hud/utils/tests/test_tool_shorthand.py +154 -0
  213. hud/utils/tests/test_version.py +1 -1
  214. hud/utils/types.py +20 -0
  215. hud/version.py +1 -1
  216. hud_python-0.5.1.dist-info/METADATA +264 -0
  217. hud_python-0.5.1.dist-info/RECORD +299 -0
  218. {hud_python-0.4.45.dist-info → hud_python-0.5.1.dist-info}/WHEEL +1 -1
  219. hud/agents/langchain.py +0 -261
  220. hud/agents/lite_llm.py +0 -72
  221. hud/cli/rl/__init__.py +0 -180
  222. hud/cli/rl/config.py +0 -101
  223. hud/cli/rl/display.py +0 -133
  224. hud/cli/rl/gpu.py +0 -63
  225. hud/cli/rl/gpu_utils.py +0 -321
  226. hud/cli/rl/local_runner.py +0 -595
  227. hud/cli/rl/presets.py +0 -96
  228. hud/cli/rl/remote_runner.py +0 -463
  229. hud/cli/rl/rl_api.py +0 -150
  230. hud/cli/rl/vllm.py +0 -177
  231. hud/cli/rl/wait_utils.py +0 -89
  232. hud/datasets/parallel.py +0 -687
  233. hud/misc/__init__.py +0 -1
  234. hud/misc/claude_plays_pokemon.py +0 -292
  235. hud/otel/__init__.py +0 -35
  236. hud/otel/collector.py +0 -142
  237. hud/otel/config.py +0 -181
  238. hud/otel/context.py +0 -570
  239. hud/otel/exporters.py +0 -369
  240. hud/otel/instrumentation.py +0 -135
  241. hud/otel/processors.py +0 -121
  242. hud/otel/tests/__init__.py +0 -1
  243. hud/otel/tests/test_processors.py +0 -197
  244. hud/rl/README.md +0 -30
  245. hud/rl/__init__.py +0 -1
  246. hud/rl/actor.py +0 -176
  247. hud/rl/buffer.py +0 -405
  248. hud/rl/chat_template.jinja +0 -101
  249. hud/rl/config.py +0 -192
  250. hud/rl/distributed.py +0 -132
  251. hud/rl/learner.py +0 -637
  252. hud/rl/tests/__init__.py +0 -1
  253. hud/rl/tests/test_learner.py +0 -186
  254. hud/rl/train.py +0 -382
  255. hud/rl/types.py +0 -101
  256. hud/rl/utils/start_vllm_server.sh +0 -30
  257. hud/rl/utils.py +0 -524
  258. hud/rl/vllm_adapter.py +0 -143
  259. hud/telemetry/job.py +0 -352
  260. hud/telemetry/replay.py +0 -74
  261. hud/telemetry/tests/test_replay.py +0 -40
  262. hud/telemetry/tests/test_trace.py +0 -63
  263. hud/telemetry/trace.py +0 -158
  264. hud/utils/agent_factories.py +0 -86
  265. hud/utils/async_utils.py +0 -65
  266. hud/utils/group_eval.py +0 -223
  267. hud/utils/progress.py +0 -149
  268. hud/utils/tasks.py +0 -127
  269. hud/utils/tests/test_async_utils.py +0 -173
  270. hud/utils/tests/test_progress.py +0 -261
  271. hud_python-0.4.45.dist-info/METADATA +0 -552
  272. hud_python-0.4.45.dist-info/RECORD +0 -228
  273. {hud_python-0.4.45.dist-info → hud_python-0.5.1.dist-info}/entry_points.txt +0 -0
  274. {hud_python-0.4.45.dist-info → hud_python-0.5.1.dist-info}/licenses/LICENSE +0 -0
hud/agents/claude.py CHANGED
@@ -4,37 +4,53 @@ from __future__ import annotations
4
4
 
5
5
  import copy
6
6
  import logging
7
- from typing import TYPE_CHECKING, Any, ClassVar, cast
8
-
9
- from anthropic import Anthropic, AsyncAnthropic, BadRequestError
10
- from anthropic.types.beta import BetaContentBlockParam, BetaImageBlockParam, BetaTextBlockParam
11
-
12
- import hud
13
-
14
- if TYPE_CHECKING:
15
- from anthropic.types.beta import (
16
- BetaCacheControlEphemeralParam,
17
- BetaContentBlockParam,
18
- BetaImageBlockParam,
19
- BetaMessageParam,
20
- BetaTextBlockParam,
21
- BetaToolResultBlockParam,
22
- )
23
-
24
- from hud.datasets import Task
7
+ from inspect import cleandoc
8
+ from typing import Any, ClassVar, Literal, cast
25
9
 
26
10
  import mcp.types as types
11
+ from anthropic import AsyncAnthropic, AsyncAnthropicBedrock, Omit
12
+ from anthropic.types import CacheControlEphemeralParam
13
+ from anthropic.types.beta import (
14
+ BetaBase64ImageSourceParam,
15
+ BetaContentBlockParam,
16
+ BetaImageBlockParam,
17
+ BetaMessageParam,
18
+ BetaTextBlockParam,
19
+ BetaToolBash20250124Param,
20
+ BetaToolComputerUse20250124Param,
21
+ BetaToolParam,
22
+ BetaToolResultBlockParam,
23
+ BetaToolTextEditor20250728Param,
24
+ BetaToolUnionParam,
25
+ )
26
+ from pydantic import ConfigDict
27
27
 
28
28
  from hud.settings import settings
29
29
  from hud.tools.computer.settings import computer_settings
30
- from hud.types import AgentResponse, MCPToolCall, MCPToolResult
30
+ from hud.types import AgentResponse, BaseAgentConfig, MCPToolCall, MCPToolResult
31
31
  from hud.utils.hud_console import HUDConsole
32
+ from hud.utils.types import with_signature
32
33
 
33
- from .base import MCPAgent
34
+ from .base import BaseCreateParams, MCPAgent
34
35
 
35
36
  logger = logging.getLogger(__name__)
36
37
 
37
38
 
39
+ class ClaudeConfig(BaseAgentConfig):
40
+ model_config = ConfigDict(arbitrary_types_allowed=True)
41
+
42
+ model_name: str = "Claude"
43
+ model: str = "claude-sonnet-4-5"
44
+ model_client: AsyncAnthropic | AsyncAnthropicBedrock | None = None
45
+ max_tokens: int = 16384
46
+ use_computer_beta: bool = True
47
+ validate_api_key: bool = True
48
+
49
+
50
+ class ClaudeCreateParams(BaseCreateParams, ClaudeConfig):
51
+ pass
52
+
53
+
38
54
  class ClaudeAgent(MCPAgent):
39
55
  """
40
56
  Claude agent that uses MCP servers for tool execution.
@@ -43,89 +59,47 @@ class ClaudeAgent(MCPAgent):
43
59
  tools through MCP servers instead of direct implementation.
44
60
  """
45
61
 
46
- metadata: ClassVar[dict[str, Any]] = {
62
+ metadata: ClassVar[dict[str, Any] | None] = {
47
63
  "display_width": computer_settings.ANTHROPIC_COMPUTER_WIDTH,
48
64
  "display_height": computer_settings.ANTHROPIC_COMPUTER_HEIGHT,
49
65
  }
66
+ config_cls: ClassVar[type[BaseAgentConfig]] = ClaudeConfig
50
67
 
51
- def __init__(
52
- self,
53
- model_client: AsyncAnthropic | None = None,
54
- model: str = "claude-sonnet-4-20250514",
55
- max_tokens: int = 4096,
56
- use_computer_beta: bool = True,
57
- validate_api_key: bool = True,
58
- **kwargs: Any,
59
- ) -> None:
60
- """
61
- Initialize Claude MCP agent.
62
-
63
- Args:
64
- model_client: AsyncAnthropic client (created if not provided)
65
- model: Claude model to use
66
- max_tokens: Maximum tokens for response
67
- use_computer_beta: Whether to use computer-use beta features
68
- **kwargs: Additional arguments passed to BaseMCPAgent (including mcp_client)
69
- """
70
- super().__init__(**kwargs)
71
-
72
- # Initialize client if not provided
68
+ @with_signature(ClaudeCreateParams)
69
+ @classmethod
70
+ def create(cls, **kwargs: Any) -> ClaudeAgent: # pyright: ignore[reportIncompatibleMethodOverride]
71
+ return MCPAgent.create.__func__(cls, **kwargs) # type: ignore[return-value]
72
+
73
+ def __init__(self, params: ClaudeCreateParams | None = None, **kwargs: Any) -> None:
74
+ super().__init__(params, **kwargs)
75
+ self.config: ClaudeConfig
76
+
77
+ model_client = self.config.model_client
73
78
  if model_client is None:
74
79
  api_key = settings.anthropic_api_key
75
80
  if not api_key:
76
81
  raise ValueError("Anthropic API key not found. Set ANTHROPIC_API_KEY.")
77
82
  model_client = AsyncAnthropic(api_key=api_key)
78
83
 
79
- # validate api key if requested
80
- if validate_api_key:
81
- try:
82
- Anthropic(api_key=model_client.api_key).models.list()
83
- except Exception as e:
84
- raise ValueError(f"Anthropic API key is invalid: {e}") from e
85
-
86
84
  self.anthropic_client = model_client
87
- self.model = model
88
- self.max_tokens = max_tokens
89
- self.use_computer_beta = use_computer_beta
85
+ self.max_tokens = self.config.max_tokens
86
+ self.use_computer_beta = self.config.use_computer_beta
90
87
  self.hud_console = HUDConsole(logger=logger)
91
88
 
92
- self.model_name = self.model
93
-
94
- # Track mapping from Claude tool names to MCP tool names
95
- self._claude_to_mcp_tool_map: dict[str, str] = {}
96
- self.claude_tools: list[dict] = []
97
-
98
- # Append Claude-specific instructions to the base system prompt
99
- claude_instructions = """
100
- You are Claude, an AI assistant created by Anthropic. You are helpful, harmless, and honest.
101
-
102
- When working on tasks:
103
- 1. Be thorough and systematic in your approach
104
- 2. Complete tasks autonomously without asking for confirmation
105
- 3. Use available tools efficiently to accomplish your goals
106
- 4. Verify your actions and ensure task completion
107
- 5. Be precise and accurate in all operations
108
-
109
- Remember: You are expected to complete tasks autonomously. The user trusts you to accomplish what they asked.
110
- """.strip() # noqa: E501
111
-
112
- # Append Claude instructions to any base system prompt
113
- if self.system_prompt:
114
- self.system_prompt = f"{self.system_prompt}\n\n{claude_instructions}"
115
- else:
116
- self.system_prompt = claude_instructions
89
+ # these will be initialized in _convert_tools_for_claude
90
+ self.has_computer_tool = False
91
+ self.tool_mapping: dict[str, str] = {}
92
+ self.claude_tools: list[BetaToolUnionParam] = []
117
93
 
118
- async def initialize(self, task: str | Task | None = None) -> None:
119
- """Initialize the agent and build tool mappings."""
120
- await super().initialize(task)
121
- # Build tool mappings after tools are discovered
94
+ def _on_tools_ready(self) -> None:
95
+ """Build Claude-specific tool mappings after tools are discovered."""
122
96
  self._convert_tools_for_claude()
123
97
 
124
- async def get_system_messages(self) -> list[Any]:
98
+ async def get_system_messages(self) -> list[BetaMessageParam]:
125
99
  """No system messages for Claude because applied in get_response"""
126
100
  return []
127
101
 
128
- async def format_blocks(self, blocks: list[types.ContentBlock]) -> list[Any]:
102
+ async def format_blocks(self, blocks: list[types.ContentBlock]) -> list[BetaMessageParam]:
129
103
  """Format messages for Claude."""
130
104
  # Convert MCP content types to Anthropic content types
131
105
  anthropic_blocks: list[BetaContentBlockParam] = []
@@ -134,101 +108,74 @@ class ClaudeAgent(MCPAgent):
134
108
  if isinstance(block, types.TextContent):
135
109
  # Only include fields that Anthropic expects
136
110
  anthropic_blocks.append(
137
- cast(
138
- "BetaTextBlockParam",
139
- {
140
- "type": "text",
141
- "text": block.text,
142
- },
111
+ BetaTextBlockParam(
112
+ type="text",
113
+ text=block.text,
143
114
  )
144
115
  )
145
116
  elif isinstance(block, types.ImageContent):
146
117
  # Convert MCP ImageContent to Anthropic format
147
118
  anthropic_blocks.append(
148
- cast(
149
- "BetaImageBlockParam",
150
- {
151
- "type": "image",
152
- "source": {
153
- "type": "base64",
154
- "media_type": block.mimeType,
155
- "data": block.data,
156
- },
157
- },
119
+ BetaImageBlockParam(
120
+ type="image",
121
+ source=BetaBase64ImageSourceParam(
122
+ type="base64",
123
+ media_type=cast(
124
+ "Literal['image/jpeg', 'image/png', 'image/gif', 'image/webp']",
125
+ block.mimeType,
126
+ ),
127
+ data=block.data,
128
+ ),
158
129
  )
159
130
  )
160
131
  else:
161
- # For other types, try to cast but log a warning
162
- self.hud_console.log(f"Unknown content block type: {type(block)}", level="warning")
163
- anthropic_blocks.append(cast("BetaContentBlockParam", block))
132
+ raise ValueError(f"Unknown content block type: {type(block)}")
164
133
 
165
- return [
166
- cast(
167
- "BetaMessageParam",
168
- {
169
- "role": "user",
170
- "content": anthropic_blocks,
171
- },
172
- )
173
- ]
134
+ return [BetaMessageParam(role="user", content=anthropic_blocks)]
174
135
 
175
- @hud.instrument(
176
- span_type="agent",
177
- record_args=False, # Messages can be large
178
- record_result=True,
179
- )
180
136
  async def get_response(self, messages: list[BetaMessageParam]) -> AgentResponse:
181
137
  """Get response from Claude including any tool calls."""
138
+ messages_cached = self._add_prompt_caching(messages)
182
139
 
183
- # Make API call with retry for prompt length
184
- current_messages = messages.copy()
185
-
186
- while True:
187
- messages_cached = self._add_prompt_caching(current_messages)
188
-
189
- # Build create kwargs
190
- create_kwargs = {
191
- "model": self.model,
192
- "max_tokens": self.max_tokens,
193
- "system": self.system_prompt,
194
- "messages": messages_cached,
195
- "tools": self.claude_tools,
196
- "tool_choice": {"type": "auto", "disable_parallel_tool_use": True},
197
- }
198
-
199
- # Add beta features if using computer tools
200
- if self.use_computer_beta and any(
201
- tool.get("type") == "computer_20250124" for tool in self.claude_tools
202
- ):
203
- create_kwargs["betas"] = ["computer-use-2025-01-24"]
140
+ # betas to use
141
+ betas = ["fine-grained-tool-streaming-2025-05-14"]
142
+ if self.has_computer_tool:
143
+ betas.append("computer-use-2025-01-24")
204
144
 
145
+ # Bedrock doesn't support .stream() - use create(stream=True) instead
146
+ if isinstance(self.anthropic_client, AsyncAnthropicBedrock):
205
147
  try:
206
- response = await self.anthropic_client.beta.messages.create(**create_kwargs)
207
- break
208
- except BadRequestError as e:
209
- if (
210
- "prompt is too long" in str(e)
211
- or "request_too_large" in str(e)
212
- or e.status_code == 413
213
- ):
214
- self.hud_console.warning("Prompt too long, truncating message history")
215
- # Keep first message and last 20 messages
216
- if len(current_messages) > 21:
217
- current_messages = [current_messages[0], *current_messages[-20:]]
218
- else:
219
- raise
220
- else:
221
- raise
222
-
223
- messages.append(
224
- cast(
225
- "BetaMessageParam",
226
- {
227
- "role": "assistant",
228
- "content": response.content,
229
- },
230
- )
231
- )
148
+ response = await self.anthropic_client.beta.messages.create(
149
+ model=self.config.model,
150
+ system=self.system_prompt if self.system_prompt is not None else Omit(),
151
+ max_tokens=self.max_tokens,
152
+ messages=messages_cached,
153
+ tools=self.claude_tools,
154
+ tool_choice={"type": "auto", "disable_parallel_tool_use": True},
155
+ betas=betas,
156
+ )
157
+ messages.append(BetaMessageParam(role="assistant", content=response.content))
158
+ except ModuleNotFoundError:
159
+ raise ValueError(
160
+ "boto3 is required for AWS Bedrock. Use `pip install hud[bedrock]`"
161
+ ) from None
162
+ else:
163
+ # Regular Anthropic client supports .stream()
164
+ async with self.anthropic_client.beta.messages.stream(
165
+ model=self.config.model,
166
+ system=self.system_prompt if self.system_prompt is not None else Omit(),
167
+ max_tokens=self.max_tokens,
168
+ messages=messages_cached,
169
+ tools=self.claude_tools,
170
+ tool_choice={"type": "auto", "disable_parallel_tool_use": True},
171
+ betas=betas,
172
+ ) as stream:
173
+ # allow backend to accumulate message content
174
+ async for _ in stream:
175
+ pass
176
+ # get final message
177
+ response = await stream.get_final_message()
178
+ messages.append(BetaMessageParam(role="assistant", content=response.content))
232
179
 
233
180
  # Process response
234
181
  result = AgentResponse(content="", tool_calls=[], done=True)
@@ -239,29 +186,26 @@ class ClaudeAgent(MCPAgent):
239
186
 
240
187
  for block in response.content:
241
188
  if block.type == "tool_use":
242
- # Map Claude tool name back to MCP tool name
243
- mcp_tool_name = self._claude_to_mcp_tool_map.get(block.name, block.name)
244
-
245
- # Create MCPToolCall object with Claude metadata as extra fields
246
- # Pyright will complain but the tool class accepts extra fields
247
189
  tool_call = MCPToolCall(
248
- id=block.id, # canonical identifier for telemetry
249
- name=mcp_tool_name,
250
- arguments=block.input,
251
- claude_name=block.name, # type: ignore
190
+ id=block.id,
191
+ # look up name in tool_mapping if available, otherwise use block name
192
+ name=self.tool_mapping.get(block.name, block.name),
193
+ arguments=block.input
194
+ if isinstance(block.input, dict)
195
+ else block.input.__dict__,
252
196
  )
253
197
  result.tool_calls.append(tool_call)
254
198
  result.done = False
255
199
  elif block.type == "text":
256
200
  text_content += block.text
257
201
  elif hasattr(block, "type") and block.type == "thinking":
258
- thinking_content += f"Thinking: {block.thinking}\n"
202
+ if thinking_content:
203
+ thinking_content += "\n"
204
+ thinking_content += block.thinking
259
205
 
260
- # Combine text and thinking for final content
206
+ result.content = text_content
261
207
  if thinking_content:
262
- result.content = thinking_content + text_content
263
- else:
264
- result.content = text_content
208
+ result.reasoning = thinking_content
265
209
 
266
210
  return result
267
211
 
@@ -303,81 +247,92 @@ class ClaudeAgent(MCPAgent):
303
247
 
304
248
  # Return as a user message containing all tool results
305
249
  return [
306
- cast(
307
- "BetaMessageParam",
308
- {
309
- "role": "user",
310
- "content": user_content,
311
- },
250
+ BetaMessageParam(
251
+ role="user",
252
+ content=user_content,
312
253
  )
313
254
  ]
314
255
 
315
256
  async def create_user_message(self, text: str) -> BetaMessageParam:
316
257
  """Create a user message in Claude's format."""
317
- return cast("BetaMessageParam", {"role": "user", "content": text})
258
+ return BetaMessageParam(role="user", content=text)
318
259
 
319
- def _convert_tools_for_claude(self) -> list[dict]:
320
- """Convert MCP tools to Claude tool format."""
321
- claude_tools = []
322
- self._claude_to_mcp_tool_map = {} # Reset mapping
260
+ def _convert_tools_for_claude(self) -> None:
261
+ """Convert MCP tools to Claude API tools."""
323
262
 
324
- # Find computer tool by priority
325
- computer_tool_priority = ["anthropic_computer", "computer_anthropic", "computer"]
326
- selected_computer_tool = None
263
+ # First pass: identify all computer tools and find the longest match
264
+ available_tools = self.get_available_tools()
327
265
 
328
- for priority_name in computer_tool_priority:
329
- for tool in self._available_tools:
266
+ # find potential computer tools by priority
267
+ selected_computer_tool = None
268
+ computer_tool_names_by_priority = ["anthropic_computer", "computer_anthropic", "computer"]
269
+ for computer_tool_name in computer_tool_names_by_priority:
270
+ for tool in available_tools:
330
271
  # Check both exact match and suffix match (for prefixed tools)
331
- if tool.name == priority_name or tool.name.endswith(f"_{priority_name}"):
272
+ if tool.name == computer_tool_name or tool.name.endswith(f"_{computer_tool_name}"):
332
273
  selected_computer_tool = tool
333
274
  break
334
275
  if selected_computer_tool:
335
276
  break
336
277
 
337
- # Add the selected computer tool if found
338
- if selected_computer_tool:
339
- claude_tool = {
340
- "type": "computer_20250124",
341
- "name": "computer",
342
- "display_width_px": self.metadata["display_width"],
343
- "display_height_px": self.metadata["display_height"],
344
- }
345
- # Map Claude's "computer" back to the actual MCP tool name
346
- self._claude_to_mcp_tool_map["computer"] = selected_computer_tool.name
347
- claude_tools.append(claude_tool)
348
- self.hud_console.debug(
349
- f"Using {selected_computer_tool.name} as computer tool for Claude"
350
- )
278
+ def to_api_tool(tool: types.Tool) -> BetaToolUnionParam | None:
279
+ if tool.name == "str_replace_based_edit_tool":
280
+ return BetaToolTextEditor20250728Param(
281
+ type="text_editor_20250728",
282
+ name="str_replace_based_edit_tool",
283
+ )
284
+ if tool.name == "bash":
285
+ return BetaToolBash20250124Param(
286
+ type="bash_20250124",
287
+ name="bash",
288
+ )
289
+ if selected_computer_tool is not None:
290
+ if tool.name == selected_computer_tool.name:
291
+ return BetaToolComputerUse20250124Param(
292
+ type="computer_20250124",
293
+ name="computer",
294
+ display_number=1,
295
+ display_width_px=computer_settings.ANTHROPIC_COMPUTER_WIDTH,
296
+ display_height_px=computer_settings.ANTHROPIC_COMPUTER_HEIGHT,
297
+ )
298
+ elif tool.name == "computer":
299
+ logger.warning(
300
+ "Renamed tool %s to 'computer', dropping original 'computer' tool",
301
+ selected_computer_tool.name,
302
+ )
303
+ return None
304
+
305
+ if tool.description is None or tool.inputSchema is None:
306
+ raise ValueError(
307
+ cleandoc(f"""MCP tool {tool.name} requires both a description and inputSchema.
308
+ Add these by:
309
+ 1. Adding a docstring to your @mcp.tool decorated function for the description
310
+ 2. Using pydantic Field() annotations on function parameters for the schema
311
+ """)
312
+ )
351
313
 
352
- # Add other non-computer tools
353
- for tool in self._available_tools:
354
- # Skip computer tools (already handled) and lifecycle tools
355
- is_computer_tool = any(
356
- tool.name == priority_name or tool.name.endswith(f"_{priority_name}")
357
- for priority_name in computer_tool_priority
314
+ return BetaToolParam(
315
+ name=tool.name,
316
+ description=tool.description,
317
+ input_schema=tool.inputSchema,
358
318
  )
359
- if is_computer_tool or tool.name in self.lifecycle_tools:
360
- continue
361
319
 
362
- claude_tool = {
363
- "name": tool.name,
364
- "description": tool.description or f"Execute {tool.name}",
365
- "input_schema": tool.inputSchema
366
- or {
367
- "type": "object",
368
- "properties": {},
369
- },
370
- }
371
- # Direct mapping for non-computer tools
372
- self._claude_to_mcp_tool_map[tool.name] = tool.name
373
- claude_tools.append(claude_tool)
374
-
375
- self.claude_tools = claude_tools
376
- return claude_tools
320
+ self.has_computer_tool = False
321
+ self.tool_mapping = {}
322
+ self.claude_tools = []
323
+ for tool in available_tools:
324
+ claude_tool = to_api_tool(tool)
325
+ if claude_tool is None or "name" not in claude_tool:
326
+ continue
327
+ if claude_tool["name"] == "computer":
328
+ self.has_computer_tool = True
329
+ self.tool_mapping[claude_tool["name"]] = tool.name
330
+ self.claude_tools.append(claude_tool)
377
331
 
378
332
  def _add_prompt_caching(self, messages: list[BetaMessageParam]) -> list[BetaMessageParam]:
379
333
  """Add prompt caching to messages."""
380
334
  messages_cached = copy.deepcopy(messages)
335
+ cache_control = CacheControlEphemeralParam(type="ephemeral")
381
336
 
382
337
  # Mark last user message with cache control
383
338
  if (
@@ -391,20 +346,25 @@ class ClaudeAgent(MCPAgent):
391
346
  for block in last_content:
392
347
  # Only add cache control to dict-like block types that support it
393
348
  if isinstance(block, dict):
394
- block_type = block.get("type")
395
- if block_type in ["text", "image", "tool_use", "tool_result"]:
396
- cache_control: BetaCacheControlEphemeralParam = {"type": "ephemeral"}
397
- block["cache_control"] = cache_control # type: ignore[reportGeneralTypeIssues]
349
+ match block["type"]:
350
+ case "redacted_thinking" | "thinking":
351
+ pass
352
+ case _:
353
+ block["cache_control"] = cache_control
398
354
 
399
355
  return messages_cached
400
356
 
401
357
 
402
358
  def base64_to_content_block(base64: str) -> BetaImageBlockParam:
403
359
  """Convert base64 image to Claude content block."""
404
- return {
405
- "type": "image",
406
- "source": {"type": "base64", "media_type": "image/png", "data": base64},
407
- }
360
+ return BetaImageBlockParam(
361
+ type="image",
362
+ source=BetaBase64ImageSourceParam(
363
+ type="base64",
364
+ media_type="image/png",
365
+ data=base64,
366
+ ),
367
+ )
408
368
 
409
369
 
410
370
  def text_to_content_block(text: str) -> BetaTextBlockParam: