hud-python 0.4.45__py3-none-any.whl → 0.5.13__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (282) hide show
  1. hud/__init__.py +27 -7
  2. hud/agents/__init__.py +70 -5
  3. hud/agents/base.py +238 -500
  4. hud/agents/claude.py +236 -247
  5. hud/agents/gateway.py +42 -0
  6. hud/agents/gemini.py +264 -0
  7. hud/agents/gemini_cua.py +324 -0
  8. hud/agents/grounded_openai.py +98 -100
  9. hud/agents/misc/integration_test_agent.py +51 -20
  10. hud/agents/misc/response_agent.py +48 -36
  11. hud/agents/openai.py +282 -296
  12. hud/agents/{openai_chat_generic.py → openai_chat.py} +63 -33
  13. hud/agents/operator.py +199 -0
  14. hud/agents/resolver.py +70 -0
  15. hud/agents/tests/conftest.py +133 -0
  16. hud/agents/tests/test_base.py +300 -622
  17. hud/agents/tests/test_base_runtime.py +233 -0
  18. hud/agents/tests/test_claude.py +381 -214
  19. hud/agents/tests/test_client.py +9 -10
  20. hud/agents/tests/test_gemini.py +369 -0
  21. hud/agents/tests/test_grounded_openai_agent.py +65 -50
  22. hud/agents/tests/test_openai.py +377 -140
  23. hud/agents/tests/test_operator.py +362 -0
  24. hud/agents/tests/test_resolver.py +192 -0
  25. hud/agents/tests/test_run_eval.py +179 -0
  26. hud/agents/types.py +148 -0
  27. hud/cli/__init__.py +493 -546
  28. hud/cli/analyze.py +43 -5
  29. hud/cli/build.py +699 -113
  30. hud/cli/debug.py +8 -5
  31. hud/cli/dev.py +889 -732
  32. hud/cli/eval.py +793 -667
  33. hud/cli/flows/dev.py +167 -0
  34. hud/cli/flows/init.py +191 -0
  35. hud/cli/flows/tasks.py +153 -56
  36. hud/cli/flows/templates.py +151 -0
  37. hud/cli/flows/tests/__init__.py +1 -0
  38. hud/cli/flows/tests/test_dev.py +126 -0
  39. hud/cli/init.py +60 -58
  40. hud/cli/pull.py +1 -1
  41. hud/cli/push.py +38 -13
  42. hud/cli/rft.py +311 -0
  43. hud/cli/rft_status.py +145 -0
  44. hud/cli/tests/test_analyze.py +5 -5
  45. hud/cli/tests/test_analyze_metadata.py +3 -2
  46. hud/cli/tests/test_analyze_module.py +120 -0
  47. hud/cli/tests/test_build.py +110 -8
  48. hud/cli/tests/test_build_failure.py +41 -0
  49. hud/cli/tests/test_build_module.py +50 -0
  50. hud/cli/tests/test_cli_init.py +6 -1
  51. hud/cli/tests/test_cli_more_wrappers.py +30 -0
  52. hud/cli/tests/test_cli_root.py +140 -0
  53. hud/cli/tests/test_convert.py +361 -0
  54. hud/cli/tests/test_debug.py +12 -10
  55. hud/cli/tests/test_dev.py +197 -0
  56. hud/cli/tests/test_eval.py +251 -0
  57. hud/cli/tests/test_eval_bedrock.py +51 -0
  58. hud/cli/tests/test_init.py +124 -0
  59. hud/cli/tests/test_main_module.py +11 -5
  60. hud/cli/tests/test_mcp_server.py +12 -100
  61. hud/cli/tests/test_push.py +1 -1
  62. hud/cli/tests/test_push_happy.py +74 -0
  63. hud/cli/tests/test_push_wrapper.py +23 -0
  64. hud/cli/tests/test_registry.py +1 -1
  65. hud/cli/tests/test_utils.py +1 -1
  66. hud/cli/{rl → utils}/celebrate.py +14 -12
  67. hud/cli/utils/config.py +18 -1
  68. hud/cli/utils/docker.py +130 -4
  69. hud/cli/utils/env_check.py +9 -9
  70. hud/cli/utils/git.py +136 -0
  71. hud/cli/utils/interactive.py +39 -5
  72. hud/cli/utils/metadata.py +70 -1
  73. hud/cli/utils/runner.py +1 -1
  74. hud/cli/utils/server.py +2 -2
  75. hud/cli/utils/source_hash.py +3 -3
  76. hud/cli/utils/tasks.py +4 -1
  77. hud/cli/utils/tests/__init__.py +0 -0
  78. hud/cli/utils/tests/test_config.py +58 -0
  79. hud/cli/utils/tests/test_docker.py +93 -0
  80. hud/cli/utils/tests/test_docker_hints.py +71 -0
  81. hud/cli/utils/tests/test_env_check.py +74 -0
  82. hud/cli/utils/tests/test_environment.py +42 -0
  83. hud/cli/utils/tests/test_git.py +142 -0
  84. hud/cli/utils/tests/test_interactive_module.py +60 -0
  85. hud/cli/utils/tests/test_local_runner.py +50 -0
  86. hud/cli/utils/tests/test_logging_utils.py +23 -0
  87. hud/cli/utils/tests/test_metadata.py +49 -0
  88. hud/cli/utils/tests/test_package_runner.py +35 -0
  89. hud/cli/utils/tests/test_registry_utils.py +49 -0
  90. hud/cli/utils/tests/test_remote_runner.py +25 -0
  91. hud/cli/utils/tests/test_runner_modules.py +52 -0
  92. hud/cli/utils/tests/test_source_hash.py +36 -0
  93. hud/cli/utils/tests/test_tasks.py +80 -0
  94. hud/cli/utils/version_check.py +258 -0
  95. hud/cli/{rl → utils}/viewer.py +2 -2
  96. hud/clients/README.md +12 -11
  97. hud/clients/__init__.py +4 -3
  98. hud/clients/base.py +166 -26
  99. hud/clients/environment.py +51 -0
  100. hud/clients/fastmcp.py +13 -6
  101. hud/clients/mcp_use.py +45 -15
  102. hud/clients/tests/test_analyze_scenarios.py +206 -0
  103. hud/clients/tests/test_protocol.py +9 -3
  104. hud/datasets/__init__.py +23 -20
  105. hud/datasets/loader.py +326 -0
  106. hud/datasets/runner.py +198 -105
  107. hud/datasets/tests/__init__.py +0 -0
  108. hud/datasets/tests/test_loader.py +221 -0
  109. hud/datasets/tests/test_utils.py +315 -0
  110. hud/datasets/utils.py +270 -90
  111. hud/environment/__init__.py +52 -0
  112. hud/environment/connection.py +258 -0
  113. hud/environment/connectors/__init__.py +33 -0
  114. hud/environment/connectors/base.py +68 -0
  115. hud/environment/connectors/local.py +177 -0
  116. hud/environment/connectors/mcp_config.py +137 -0
  117. hud/environment/connectors/openai.py +101 -0
  118. hud/environment/connectors/remote.py +172 -0
  119. hud/environment/environment.py +835 -0
  120. hud/environment/integrations/__init__.py +45 -0
  121. hud/environment/integrations/adk.py +67 -0
  122. hud/environment/integrations/anthropic.py +196 -0
  123. hud/environment/integrations/gemini.py +92 -0
  124. hud/environment/integrations/langchain.py +82 -0
  125. hud/environment/integrations/llamaindex.py +68 -0
  126. hud/environment/integrations/openai.py +238 -0
  127. hud/environment/mock.py +306 -0
  128. hud/environment/router.py +263 -0
  129. hud/environment/scenarios.py +620 -0
  130. hud/environment/tests/__init__.py +1 -0
  131. hud/environment/tests/test_connection.py +317 -0
  132. hud/environment/tests/test_connectors.py +205 -0
  133. hud/environment/tests/test_environment.py +593 -0
  134. hud/environment/tests/test_integrations.py +257 -0
  135. hud/environment/tests/test_local_connectors.py +242 -0
  136. hud/environment/tests/test_scenarios.py +1086 -0
  137. hud/environment/tests/test_tools.py +208 -0
  138. hud/environment/types.py +23 -0
  139. hud/environment/utils/__init__.py +35 -0
  140. hud/environment/utils/formats.py +215 -0
  141. hud/environment/utils/schema.py +171 -0
  142. hud/environment/utils/tool_wrappers.py +113 -0
  143. hud/eval/__init__.py +67 -0
  144. hud/eval/context.py +727 -0
  145. hud/eval/display.py +299 -0
  146. hud/eval/instrument.py +187 -0
  147. hud/eval/manager.py +533 -0
  148. hud/eval/parallel.py +268 -0
  149. hud/eval/task.py +372 -0
  150. hud/eval/tests/__init__.py +1 -0
  151. hud/eval/tests/test_context.py +178 -0
  152. hud/eval/tests/test_eval.py +210 -0
  153. hud/eval/tests/test_manager.py +152 -0
  154. hud/eval/tests/test_parallel.py +168 -0
  155. hud/eval/tests/test_task.py +291 -0
  156. hud/eval/types.py +65 -0
  157. hud/eval/utils.py +194 -0
  158. hud/patches/__init__.py +19 -0
  159. hud/patches/mcp_patches.py +308 -0
  160. hud/patches/warnings.py +54 -0
  161. hud/samples/browser.py +4 -4
  162. hud/server/__init__.py +2 -1
  163. hud/server/low_level.py +2 -1
  164. hud/server/router.py +164 -0
  165. hud/server/server.py +567 -80
  166. hud/server/tests/test_mcp_server_integration.py +11 -11
  167. hud/server/tests/test_mcp_server_more.py +1 -1
  168. hud/server/tests/test_server_extra.py +2 -0
  169. hud/settings.py +45 -3
  170. hud/shared/exceptions.py +36 -10
  171. hud/shared/hints.py +26 -1
  172. hud/shared/requests.py +15 -3
  173. hud/shared/tests/test_exceptions.py +40 -31
  174. hud/shared/tests/test_hints.py +167 -0
  175. hud/telemetry/__init__.py +20 -19
  176. hud/telemetry/exporter.py +201 -0
  177. hud/telemetry/instrument.py +165 -253
  178. hud/telemetry/tests/test_eval_telemetry.py +356 -0
  179. hud/telemetry/tests/test_exporter.py +258 -0
  180. hud/telemetry/tests/test_instrument.py +401 -0
  181. hud/tools/__init__.py +18 -2
  182. hud/tools/agent.py +223 -0
  183. hud/tools/apply_patch.py +639 -0
  184. hud/tools/base.py +54 -4
  185. hud/tools/bash.py +2 -2
  186. hud/tools/computer/__init__.py +36 -3
  187. hud/tools/computer/anthropic.py +2 -2
  188. hud/tools/computer/gemini.py +385 -0
  189. hud/tools/computer/hud.py +23 -6
  190. hud/tools/computer/openai.py +20 -21
  191. hud/tools/computer/qwen.py +434 -0
  192. hud/tools/computer/settings.py +37 -0
  193. hud/tools/edit.py +3 -7
  194. hud/tools/executors/base.py +4 -2
  195. hud/tools/executors/pyautogui.py +1 -1
  196. hud/tools/grounding/grounded_tool.py +13 -18
  197. hud/tools/grounding/grounder.py +10 -31
  198. hud/tools/grounding/tests/test_grounded_tool.py +26 -44
  199. hud/tools/jupyter.py +330 -0
  200. hud/tools/playwright.py +18 -3
  201. hud/tools/shell.py +308 -0
  202. hud/tools/tests/test_agent_tool.py +355 -0
  203. hud/tools/tests/test_apply_patch.py +718 -0
  204. hud/tools/tests/test_computer.py +4 -9
  205. hud/tools/tests/test_computer_actions.py +24 -2
  206. hud/tools/tests/test_jupyter_tool.py +181 -0
  207. hud/tools/tests/test_shell.py +596 -0
  208. hud/tools/tests/test_submit.py +85 -0
  209. hud/tools/tests/test_types.py +193 -0
  210. hud/tools/types.py +21 -1
  211. hud/types.py +194 -56
  212. hud/utils/__init__.py +2 -0
  213. hud/utils/env.py +67 -0
  214. hud/utils/hud_console.py +89 -18
  215. hud/utils/mcp.py +15 -58
  216. hud/utils/strict_schema.py +162 -0
  217. hud/utils/tests/test_init.py +1 -2
  218. hud/utils/tests/test_mcp.py +1 -28
  219. hud/utils/tests/test_pretty_errors.py +186 -0
  220. hud/utils/tests/test_tool_shorthand.py +154 -0
  221. hud/utils/tests/test_version.py +1 -1
  222. hud/utils/types.py +20 -0
  223. hud/version.py +1 -1
  224. hud_python-0.5.13.dist-info/METADATA +264 -0
  225. hud_python-0.5.13.dist-info/RECORD +305 -0
  226. {hud_python-0.4.45.dist-info → hud_python-0.5.13.dist-info}/WHEEL +1 -1
  227. hud/agents/langchain.py +0 -261
  228. hud/agents/lite_llm.py +0 -72
  229. hud/cli/rl/__init__.py +0 -180
  230. hud/cli/rl/config.py +0 -101
  231. hud/cli/rl/display.py +0 -133
  232. hud/cli/rl/gpu.py +0 -63
  233. hud/cli/rl/gpu_utils.py +0 -321
  234. hud/cli/rl/local_runner.py +0 -595
  235. hud/cli/rl/presets.py +0 -96
  236. hud/cli/rl/remote_runner.py +0 -463
  237. hud/cli/rl/rl_api.py +0 -150
  238. hud/cli/rl/vllm.py +0 -177
  239. hud/cli/rl/wait_utils.py +0 -89
  240. hud/datasets/parallel.py +0 -687
  241. hud/misc/__init__.py +0 -1
  242. hud/misc/claude_plays_pokemon.py +0 -292
  243. hud/otel/__init__.py +0 -35
  244. hud/otel/collector.py +0 -142
  245. hud/otel/config.py +0 -181
  246. hud/otel/context.py +0 -570
  247. hud/otel/exporters.py +0 -369
  248. hud/otel/instrumentation.py +0 -135
  249. hud/otel/processors.py +0 -121
  250. hud/otel/tests/__init__.py +0 -1
  251. hud/otel/tests/test_processors.py +0 -197
  252. hud/rl/README.md +0 -30
  253. hud/rl/__init__.py +0 -1
  254. hud/rl/actor.py +0 -176
  255. hud/rl/buffer.py +0 -405
  256. hud/rl/chat_template.jinja +0 -101
  257. hud/rl/config.py +0 -192
  258. hud/rl/distributed.py +0 -132
  259. hud/rl/learner.py +0 -637
  260. hud/rl/tests/__init__.py +0 -1
  261. hud/rl/tests/test_learner.py +0 -186
  262. hud/rl/train.py +0 -382
  263. hud/rl/types.py +0 -101
  264. hud/rl/utils/start_vllm_server.sh +0 -30
  265. hud/rl/utils.py +0 -524
  266. hud/rl/vllm_adapter.py +0 -143
  267. hud/telemetry/job.py +0 -352
  268. hud/telemetry/replay.py +0 -74
  269. hud/telemetry/tests/test_replay.py +0 -40
  270. hud/telemetry/tests/test_trace.py +0 -63
  271. hud/telemetry/trace.py +0 -158
  272. hud/utils/agent_factories.py +0 -86
  273. hud/utils/async_utils.py +0 -65
  274. hud/utils/group_eval.py +0 -223
  275. hud/utils/progress.py +0 -149
  276. hud/utils/tasks.py +0 -127
  277. hud/utils/tests/test_async_utils.py +0 -173
  278. hud/utils/tests/test_progress.py +0 -261
  279. hud_python-0.4.45.dist-info/METADATA +0 -552
  280. hud_python-0.4.45.dist-info/RECORD +0 -228
  281. {hud_python-0.4.45.dist-info → hud_python-0.5.13.dist-info}/entry_points.txt +0 -0
  282. {hud_python-0.4.45.dist-info → hud_python-0.5.13.dist-info}/licenses/LICENSE +0 -0
hud/agents/claude.py CHANGED
@@ -4,33 +4,39 @@ from __future__ import annotations
4
4
 
5
5
  import copy
6
6
  import logging
7
- from typing import TYPE_CHECKING, Any, ClassVar, cast
8
-
9
- from anthropic import Anthropic, AsyncAnthropic, BadRequestError
10
- from anthropic.types.beta import BetaContentBlockParam, BetaImageBlockParam, BetaTextBlockParam
11
-
12
- import hud
13
-
14
- if TYPE_CHECKING:
15
- from anthropic.types.beta import (
16
- BetaCacheControlEphemeralParam,
17
- BetaContentBlockParam,
18
- BetaImageBlockParam,
19
- BetaMessageParam,
20
- BetaTextBlockParam,
21
- BetaToolResultBlockParam,
22
- )
23
-
24
- from hud.datasets import Task
7
+ from inspect import cleandoc
8
+ from typing import TYPE_CHECKING, Any, ClassVar, Literal, cast
25
9
 
26
10
  import mcp.types as types
11
+ from anthropic import AsyncAnthropic, AsyncAnthropicBedrock, Omit
12
+ from anthropic.types import CacheControlEphemeralParam
13
+ from anthropic.types.beta import (
14
+ BetaBase64ImageSourceParam,
15
+ BetaBase64PDFSourceParam,
16
+ BetaContentBlockParam,
17
+ BetaImageBlockParam,
18
+ BetaMessageParam,
19
+ BetaRequestDocumentBlockParam,
20
+ BetaTextBlockParam,
21
+ BetaToolBash20250124Param,
22
+ BetaToolComputerUse20250124Param,
23
+ BetaToolParam,
24
+ BetaToolResultBlockParam,
25
+ BetaToolTextEditor20250728Param,
26
+ BetaToolUnionParam,
27
+ )
27
28
 
28
29
  from hud.settings import settings
29
30
  from hud.tools.computer.settings import computer_settings
30
- from hud.types import AgentResponse, MCPToolCall, MCPToolResult
31
+ from hud.types import AgentResponse, BaseAgentConfig, MCPToolCall, MCPToolResult
31
32
  from hud.utils.hud_console import HUDConsole
33
+ from hud.utils.types import with_signature
32
34
 
33
35
  from .base import MCPAgent
36
+ from .types import ClaudeConfig, ClaudeCreateParams
37
+
38
+ if TYPE_CHECKING:
39
+ from collections.abc import Sequence
34
40
 
35
41
  logger = logging.getLogger(__name__)
36
42
 
@@ -43,89 +49,55 @@ class ClaudeAgent(MCPAgent):
43
49
  tools through MCP servers instead of direct implementation.
44
50
  """
45
51
 
46
- metadata: ClassVar[dict[str, Any]] = {
52
+ metadata: ClassVar[dict[str, Any] | None] = {
47
53
  "display_width": computer_settings.ANTHROPIC_COMPUTER_WIDTH,
48
54
  "display_height": computer_settings.ANTHROPIC_COMPUTER_HEIGHT,
49
55
  }
56
+ config_cls: ClassVar[type[BaseAgentConfig]] = ClaudeConfig
50
57
 
51
- def __init__(
52
- self,
53
- model_client: AsyncAnthropic | None = None,
54
- model: str = "claude-sonnet-4-20250514",
55
- max_tokens: int = 4096,
56
- use_computer_beta: bool = True,
57
- validate_api_key: bool = True,
58
- **kwargs: Any,
59
- ) -> None:
60
- """
61
- Initialize Claude MCP agent.
62
-
63
- Args:
64
- model_client: AsyncAnthropic client (created if not provided)
65
- model: Claude model to use
66
- max_tokens: Maximum tokens for response
67
- use_computer_beta: Whether to use computer-use beta features
68
- **kwargs: Additional arguments passed to BaseMCPAgent (including mcp_client)
69
- """
70
- super().__init__(**kwargs)
58
+ @with_signature(ClaudeCreateParams)
59
+ @classmethod
60
+ def create(cls, **kwargs: Any) -> ClaudeAgent: # pyright: ignore[reportIncompatibleMethodOverride]
61
+ return MCPAgent.create.__func__(cls, **kwargs) # type: ignore[return-value]
62
+
63
+ def __init__(self, params: ClaudeCreateParams | None = None, **kwargs: Any) -> None:
64
+ super().__init__(params, **kwargs)
65
+ self.config: ClaudeConfig
71
66
 
72
- # Initialize client if not provided
67
+ model_client = self.config.model_client
73
68
  if model_client is None:
74
- api_key = settings.anthropic_api_key
75
- if not api_key:
76
- raise ValueError("Anthropic API key not found. Set ANTHROPIC_API_KEY.")
77
- model_client = AsyncAnthropic(api_key=api_key)
69
+ # Default to HUD gateway when HUD_API_KEY is available
70
+ if settings.api_key:
71
+ from hud.agents.gateway import build_gateway_client
78
72
 
79
- # validate api key if requested
80
- if validate_api_key:
81
- try:
82
- Anthropic(api_key=model_client.api_key).models.list()
83
- except Exception as e:
84
- raise ValueError(f"Anthropic API key is invalid: {e}") from e
85
-
86
- self.anthropic_client = model_client
87
- self.model = model
88
- self.max_tokens = max_tokens
89
- self.use_computer_beta = use_computer_beta
73
+ model_client = build_gateway_client("anthropic")
74
+ elif settings.anthropic_api_key:
75
+ model_client = AsyncAnthropic(api_key=settings.anthropic_api_key)
76
+ else:
77
+ raise ValueError(
78
+ "No API key found. Set HUD_API_KEY for HUD gateway, "
79
+ "or ANTHROPIC_API_KEY for direct Anthropic access."
80
+ )
81
+
82
+ self.anthropic_client: AsyncAnthropic | AsyncAnthropicBedrock = model_client
83
+ self.max_tokens = self.config.max_tokens
84
+ self.use_computer_beta = self.config.use_computer_beta
90
85
  self.hud_console = HUDConsole(logger=logger)
91
86
 
92
- self.model_name = self.model
93
-
94
- # Track mapping from Claude tool names to MCP tool names
95
- self._claude_to_mcp_tool_map: dict[str, str] = {}
96
- self.claude_tools: list[dict] = []
97
-
98
- # Append Claude-specific instructions to the base system prompt
99
- claude_instructions = """
100
- You are Claude, an AI assistant created by Anthropic. You are helpful, harmless, and honest.
101
-
102
- When working on tasks:
103
- 1. Be thorough and systematic in your approach
104
- 2. Complete tasks autonomously without asking for confirmation
105
- 3. Use available tools efficiently to accomplish your goals
106
- 4. Verify your actions and ensure task completion
107
- 5. Be precise and accurate in all operations
108
-
109
- Remember: You are expected to complete tasks autonomously. The user trusts you to accomplish what they asked.
110
- """.strip() # noqa: E501
111
-
112
- # Append Claude instructions to any base system prompt
113
- if self.system_prompt:
114
- self.system_prompt = f"{self.system_prompt}\n\n{claude_instructions}"
115
- else:
116
- self.system_prompt = claude_instructions
87
+ # these will be initialized in _convert_tools_for_claude
88
+ self.has_computer_tool = False
89
+ self.tool_mapping: dict[str, str] = {}
90
+ self.claude_tools: list[BetaToolUnionParam] = []
117
91
 
118
- async def initialize(self, task: str | Task | None = None) -> None:
119
- """Initialize the agent and build tool mappings."""
120
- await super().initialize(task)
121
- # Build tool mappings after tools are discovered
92
+ def _on_tools_ready(self) -> None:
93
+ """Build Claude-specific tool mappings after tools are discovered."""
122
94
  self._convert_tools_for_claude()
123
95
 
124
- async def get_system_messages(self) -> list[Any]:
96
+ async def get_system_messages(self) -> list[BetaMessageParam]:
125
97
  """No system messages for Claude because applied in get_response"""
126
98
  return []
127
99
 
128
- async def format_blocks(self, blocks: list[types.ContentBlock]) -> list[Any]:
100
+ async def format_blocks(self, blocks: list[types.ContentBlock]) -> list[BetaMessageParam]:
129
101
  """Format messages for Claude."""
130
102
  # Convert MCP content types to Anthropic content types
131
103
  anthropic_blocks: list[BetaContentBlockParam] = []
@@ -134,101 +106,74 @@ class ClaudeAgent(MCPAgent):
134
106
  if isinstance(block, types.TextContent):
135
107
  # Only include fields that Anthropic expects
136
108
  anthropic_blocks.append(
137
- cast(
138
- "BetaTextBlockParam",
139
- {
140
- "type": "text",
141
- "text": block.text,
142
- },
109
+ BetaTextBlockParam(
110
+ type="text",
111
+ text=block.text,
143
112
  )
144
113
  )
145
114
  elif isinstance(block, types.ImageContent):
146
115
  # Convert MCP ImageContent to Anthropic format
147
116
  anthropic_blocks.append(
148
- cast(
149
- "BetaImageBlockParam",
150
- {
151
- "type": "image",
152
- "source": {
153
- "type": "base64",
154
- "media_type": block.mimeType,
155
- "data": block.data,
156
- },
157
- },
117
+ BetaImageBlockParam(
118
+ type="image",
119
+ source=BetaBase64ImageSourceParam(
120
+ type="base64",
121
+ media_type=cast(
122
+ "Literal['image/jpeg', 'image/png', 'image/gif', 'image/webp']",
123
+ block.mimeType,
124
+ ),
125
+ data=block.data,
126
+ ),
158
127
  )
159
128
  )
160
129
  else:
161
- # For other types, try to cast but log a warning
162
- self.hud_console.log(f"Unknown content block type: {type(block)}", level="warning")
163
- anthropic_blocks.append(cast("BetaContentBlockParam", block))
130
+ raise ValueError(f"Unknown content block type: {type(block)}")
164
131
 
165
- return [
166
- cast(
167
- "BetaMessageParam",
168
- {
169
- "role": "user",
170
- "content": anthropic_blocks,
171
- },
172
- )
173
- ]
132
+ return [BetaMessageParam(role="user", content=anthropic_blocks)]
174
133
 
175
- @hud.instrument(
176
- span_type="agent",
177
- record_args=False, # Messages can be large
178
- record_result=True,
179
- )
180
134
  async def get_response(self, messages: list[BetaMessageParam]) -> AgentResponse:
181
135
  """Get response from Claude including any tool calls."""
136
+ messages_cached = self._add_prompt_caching(messages)
182
137
 
183
- # Make API call with retry for prompt length
184
- current_messages = messages.copy()
185
-
186
- while True:
187
- messages_cached = self._add_prompt_caching(current_messages)
188
-
189
- # Build create kwargs
190
- create_kwargs = {
191
- "model": self.model,
192
- "max_tokens": self.max_tokens,
193
- "system": self.system_prompt,
194
- "messages": messages_cached,
195
- "tools": self.claude_tools,
196
- "tool_choice": {"type": "auto", "disable_parallel_tool_use": True},
197
- }
198
-
199
- # Add beta features if using computer tools
200
- if self.use_computer_beta and any(
201
- tool.get("type") == "computer_20250124" for tool in self.claude_tools
202
- ):
203
- create_kwargs["betas"] = ["computer-use-2025-01-24"]
138
+ # betas to use
139
+ betas = ["fine-grained-tool-streaming-2025-05-14"]
140
+ if self.has_computer_tool:
141
+ betas.append("computer-use-2025-01-24")
204
142
 
143
+ # Bedrock doesn't support .stream() - use create(stream=True) instead
144
+ if isinstance(self.anthropic_client, AsyncAnthropicBedrock):
205
145
  try:
206
- response = await self.anthropic_client.beta.messages.create(**create_kwargs)
207
- break
208
- except BadRequestError as e:
209
- if (
210
- "prompt is too long" in str(e)
211
- or "request_too_large" in str(e)
212
- or e.status_code == 413
213
- ):
214
- self.hud_console.warning("Prompt too long, truncating message history")
215
- # Keep first message and last 20 messages
216
- if len(current_messages) > 21:
217
- current_messages = [current_messages[0], *current_messages[-20:]]
218
- else:
219
- raise
220
- else:
221
- raise
222
-
223
- messages.append(
224
- cast(
225
- "BetaMessageParam",
226
- {
227
- "role": "assistant",
228
- "content": response.content,
229
- },
230
- )
231
- )
146
+ response = await self.anthropic_client.beta.messages.create(
147
+ model=self.config.model,
148
+ system=self.system_prompt if self.system_prompt is not None else Omit(),
149
+ max_tokens=self.max_tokens,
150
+ messages=messages_cached,
151
+ tools=self.claude_tools,
152
+ tool_choice={"type": "auto", "disable_parallel_tool_use": True},
153
+ betas=betas,
154
+ )
155
+ messages.append(BetaMessageParam(role="assistant", content=response.content))
156
+ except ModuleNotFoundError:
157
+ raise ValueError(
158
+ "boto3 is required for AWS Bedrock. Use `pip install hud[bedrock]`"
159
+ ) from None
160
+ else:
161
+ # Regular Anthropic client supports .stream()
162
+ async with self.anthropic_client.beta.messages.stream(
163
+ model=self.config.model,
164
+ system=self.system_prompt if self.system_prompt is not None else Omit(),
165
+ max_tokens=self.max_tokens,
166
+ messages=messages_cached,
167
+ tools=self.claude_tools,
168
+ tool_choice={"type": "auto", "disable_parallel_tool_use": True},
169
+ betas=betas,
170
+ ) as stream:
171
+ # allow backend to accumulate message content
172
+ async for _ in stream:
173
+ pass
174
+ # get final message
175
+ response = await stream.get_final_message()
176
+ messages.append(BetaMessageParam(role="assistant", content=response.content))
232
177
 
233
178
  # Process response
234
179
  result = AgentResponse(content="", tool_calls=[], done=True)
@@ -239,36 +184,36 @@ class ClaudeAgent(MCPAgent):
239
184
 
240
185
  for block in response.content:
241
186
  if block.type == "tool_use":
242
- # Map Claude tool name back to MCP tool name
243
- mcp_tool_name = self._claude_to_mcp_tool_map.get(block.name, block.name)
244
-
245
- # Create MCPToolCall object with Claude metadata as extra fields
246
- # Pyright will complain but the tool class accepts extra fields
247
187
  tool_call = MCPToolCall(
248
- id=block.id, # canonical identifier for telemetry
249
- name=mcp_tool_name,
250
- arguments=block.input,
251
- claude_name=block.name, # type: ignore
188
+ id=block.id,
189
+ # look up name in tool_mapping if available, otherwise use block name
190
+ name=self.tool_mapping.get(block.name, block.name),
191
+ arguments=block.input
192
+ if isinstance(block.input, dict)
193
+ else block.input.__dict__,
252
194
  )
253
195
  result.tool_calls.append(tool_call)
254
196
  result.done = False
255
197
  elif block.type == "text":
256
198
  text_content += block.text
257
199
  elif hasattr(block, "type") and block.type == "thinking":
258
- thinking_content += f"Thinking: {block.thinking}\n"
200
+ if thinking_content:
201
+ thinking_content += "\n"
202
+ thinking_content += block.thinking
259
203
 
260
- # Combine text and thinking for final content
204
+ result.content = text_content
261
205
  if thinking_content:
262
- result.content = thinking_content + text_content
263
- else:
264
- result.content = text_content
206
+ result.reasoning = thinking_content
265
207
 
266
208
  return result
267
209
 
268
210
  async def format_tool_results(
269
211
  self, tool_calls: list[MCPToolCall], tool_results: list[MCPToolResult]
270
212
  ) -> list[BetaMessageParam]:
271
- """Format tool results into Claude messages."""
213
+ """Format tool results into Claude messages.
214
+
215
+ Handles EmbeddedResource (PDFs), images, and text content.
216
+ """
272
217
  # Process each tool result
273
218
  user_content = []
274
219
 
@@ -280,7 +225,9 @@ class ClaudeAgent(MCPAgent):
280
225
  continue
281
226
 
282
227
  # Convert MCP tool results to Claude format
283
- claude_blocks = []
228
+ claude_blocks: list[
229
+ BetaTextBlockParam | BetaImageBlockParam | BetaRequestDocumentBlockParam
230
+ ] = []
284
231
 
285
232
  if result.isError:
286
233
  # Extract error message from content
@@ -297,87 +244,111 @@ class ClaudeAgent(MCPAgent):
297
244
  claude_blocks.append(text_to_content_block(content.text))
298
245
  elif isinstance(content, types.ImageContent):
299
246
  claude_blocks.append(base64_to_content_block(content.data))
247
+ elif isinstance(content, types.EmbeddedResource):
248
+ # Handle embedded resources (PDFs)
249
+ resource = content.resource
250
+ if (
251
+ isinstance(resource, types.BlobResourceContents)
252
+ and resource.mimeType == "application/pdf"
253
+ ):
254
+ claude_blocks.append(
255
+ document_to_content_block(base64_data=resource.blob)
256
+ )
300
257
 
301
258
  # Add tool result
302
259
  user_content.append(tool_use_content_block(tool_use_id, claude_blocks))
303
260
 
304
261
  # Return as a user message containing all tool results
305
262
  return [
306
- cast(
307
- "BetaMessageParam",
308
- {
309
- "role": "user",
310
- "content": user_content,
311
- },
263
+ BetaMessageParam(
264
+ role="user",
265
+ content=user_content,
312
266
  )
313
267
  ]
314
268
 
315
269
  async def create_user_message(self, text: str) -> BetaMessageParam:
316
270
  """Create a user message in Claude's format."""
317
- return cast("BetaMessageParam", {"role": "user", "content": text})
271
+ return BetaMessageParam(role="user", content=text)
318
272
 
319
- def _convert_tools_for_claude(self) -> list[dict]:
320
- """Convert MCP tools to Claude tool format."""
321
- claude_tools = []
322
- self._claude_to_mcp_tool_map = {} # Reset mapping
273
+ def _convert_tools_for_claude(self) -> None:
274
+ """Convert MCP tools to Claude API tools."""
323
275
 
324
- # Find computer tool by priority
325
- computer_tool_priority = ["anthropic_computer", "computer_anthropic", "computer"]
326
- selected_computer_tool = None
276
+ # First pass: identify all computer tools and find the longest match
277
+ available_tools = self.get_available_tools()
327
278
 
328
- for priority_name in computer_tool_priority:
329
- for tool in self._available_tools:
279
+ # find potential computer tools by priority
280
+ selected_computer_tool = None
281
+ computer_tool_names_by_priority = ["anthropic_computer", "computer_anthropic", "computer"]
282
+ for computer_tool_name in computer_tool_names_by_priority:
283
+ for tool in available_tools:
330
284
  # Check both exact match and suffix match (for prefixed tools)
331
- if tool.name == priority_name or tool.name.endswith(f"_{priority_name}"):
285
+ if tool.name == computer_tool_name or tool.name.endswith(f"_{computer_tool_name}"):
332
286
  selected_computer_tool = tool
333
287
  break
334
288
  if selected_computer_tool:
335
289
  break
336
290
 
337
- # Add the selected computer tool if found
338
- if selected_computer_tool:
339
- claude_tool = {
340
- "type": "computer_20250124",
341
- "name": "computer",
342
- "display_width_px": self.metadata["display_width"],
343
- "display_height_px": self.metadata["display_height"],
344
- }
345
- # Map Claude's "computer" back to the actual MCP tool name
346
- self._claude_to_mcp_tool_map["computer"] = selected_computer_tool.name
347
- claude_tools.append(claude_tool)
348
- self.hud_console.debug(
349
- f"Using {selected_computer_tool.name} as computer tool for Claude"
350
- )
291
+ def to_api_tool(tool: types.Tool) -> BetaToolUnionParam | None:
292
+ if tool.name == "str_replace_based_edit_tool":
293
+ return BetaToolTextEditor20250728Param(
294
+ type="text_editor_20250728",
295
+ name="str_replace_based_edit_tool",
296
+ )
297
+ if tool.name == "bash":
298
+ return BetaToolBash20250124Param(
299
+ type="bash_20250124",
300
+ name="bash",
301
+ )
302
+ if selected_computer_tool is not None:
303
+ if tool.name == selected_computer_tool.name:
304
+ return BetaToolComputerUse20250124Param(
305
+ type="computer_20250124",
306
+ name="computer",
307
+ display_number=1,
308
+ display_width_px=computer_settings.ANTHROPIC_COMPUTER_WIDTH,
309
+ display_height_px=computer_settings.ANTHROPIC_COMPUTER_HEIGHT,
310
+ )
311
+ elif tool.name == "computer" or tool.name.endswith("_computer"):
312
+ logger.warning(
313
+ "Renamed tool %s to 'computer', dropping original 'computer' tool",
314
+ selected_computer_tool.name,
315
+ )
316
+ return None
317
+
318
+ if tool.description is None or tool.inputSchema is None:
319
+ raise ValueError(
320
+ cleandoc(f"""MCP tool {tool.name} requires both a description and inputSchema.
321
+ Add these by:
322
+ 1. Adding a docstring to your @mcp.tool decorated function for the description
323
+ 2. Using pydantic Field() annotations on function parameters for the schema
324
+ """)
325
+ )
351
326
 
352
- # Add other non-computer tools
353
- for tool in self._available_tools:
354
- # Skip computer tools (already handled) and lifecycle tools
355
- is_computer_tool = any(
356
- tool.name == priority_name or tool.name.endswith(f"_{priority_name}")
357
- for priority_name in computer_tool_priority
327
+ return BetaToolParam(
328
+ name=tool.name,
329
+ description=tool.description,
330
+ input_schema=tool.inputSchema,
358
331
  )
359
- if is_computer_tool or tool.name in self.lifecycle_tools:
360
- continue
361
332
 
362
- claude_tool = {
363
- "name": tool.name,
364
- "description": tool.description or f"Execute {tool.name}",
365
- "input_schema": tool.inputSchema
366
- or {
367
- "type": "object",
368
- "properties": {},
369
- },
370
- }
371
- # Direct mapping for non-computer tools
372
- self._claude_to_mcp_tool_map[tool.name] = tool.name
373
- claude_tools.append(claude_tool)
374
-
375
- self.claude_tools = claude_tools
376
- return claude_tools
333
+ self.has_computer_tool = False
334
+ self.tool_mapping = {}
335
+ self.claude_tools = []
336
+ for tool in available_tools:
337
+ claude_tool = to_api_tool(tool)
338
+ if claude_tool is None:
339
+ continue
340
+ tool_name = claude_tool.get("name")
341
+ if tool_name is None:
342
+ continue
343
+ if tool_name == "computer":
344
+ self.has_computer_tool = True
345
+ self.tool_mapping[tool_name] = tool.name
346
+ self.claude_tools.append(claude_tool)
377
347
 
378
348
  def _add_prompt_caching(self, messages: list[BetaMessageParam]) -> list[BetaMessageParam]:
379
349
  """Add prompt caching to messages."""
380
350
  messages_cached = copy.deepcopy(messages)
351
+ cache_control = CacheControlEphemeralParam(type="ephemeral")
381
352
 
382
353
  # Mark last user message with cache control
383
354
  if (
@@ -391,20 +362,25 @@ class ClaudeAgent(MCPAgent):
391
362
  for block in last_content:
392
363
  # Only add cache control to dict-like block types that support it
393
364
  if isinstance(block, dict):
394
- block_type = block.get("type")
395
- if block_type in ["text", "image", "tool_use", "tool_result"]:
396
- cache_control: BetaCacheControlEphemeralParam = {"type": "ephemeral"}
397
- block["cache_control"] = cache_control # type: ignore[reportGeneralTypeIssues]
365
+ match block["type"]:
366
+ case "redacted_thinking" | "thinking":
367
+ pass
368
+ case _:
369
+ block["cache_control"] = cache_control
398
370
 
399
371
  return messages_cached
400
372
 
401
373
 
402
374
  def base64_to_content_block(base64: str) -> BetaImageBlockParam:
403
375
  """Convert base64 image to Claude content block."""
404
- return {
405
- "type": "image",
406
- "source": {"type": "base64", "media_type": "image/png", "data": base64},
407
- }
376
+ return BetaImageBlockParam(
377
+ type="image",
378
+ source=BetaBase64ImageSourceParam(
379
+ type="base64",
380
+ media_type="image/png",
381
+ data=base64,
382
+ ),
383
+ )
408
384
 
409
385
 
410
386
  def text_to_content_block(text: str) -> BetaTextBlockParam:
@@ -412,8 +388,21 @@ def text_to_content_block(text: str) -> BetaTextBlockParam:
412
388
  return {"type": "text", "text": text}
413
389
 
414
390
 
391
+ def document_to_content_block(base64_data: str) -> BetaRequestDocumentBlockParam:
392
+ """Convert base64 PDF to Claude document content block."""
393
+ return BetaRequestDocumentBlockParam(
394
+ type="document",
395
+ source=BetaBase64PDFSourceParam(
396
+ type="base64",
397
+ media_type="application/pdf",
398
+ data=base64_data,
399
+ ),
400
+ )
401
+
402
+
415
403
  def tool_use_content_block(
416
- tool_use_id: str, content: list[BetaTextBlockParam | BetaImageBlockParam]
404
+ tool_use_id: str,
405
+ content: Sequence[BetaTextBlockParam | BetaImageBlockParam | BetaRequestDocumentBlockParam],
417
406
  ) -> BetaToolResultBlockParam:
418
407
  """Create tool result content block."""
419
- return {"type": "tool_result", "tool_use_id": tool_use_id, "content": content}
408
+ return {"type": "tool_result", "tool_use_id": tool_use_id, "content": content} # pyright: ignore[reportReturnType]