hud-python 0.4.45__py3-none-any.whl → 0.5.1__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- hud/__init__.py +27 -7
- hud/agents/__init__.py +11 -5
- hud/agents/base.py +220 -500
- hud/agents/claude.py +200 -240
- hud/agents/gemini.py +275 -0
- hud/agents/gemini_cua.py +335 -0
- hud/agents/grounded_openai.py +98 -100
- hud/agents/misc/integration_test_agent.py +51 -20
- hud/agents/misc/response_agent.py +41 -36
- hud/agents/openai.py +291 -292
- hud/agents/{openai_chat_generic.py → openai_chat.py} +80 -34
- hud/agents/operator.py +211 -0
- hud/agents/tests/conftest.py +133 -0
- hud/agents/tests/test_base.py +300 -622
- hud/agents/tests/test_base_runtime.py +233 -0
- hud/agents/tests/test_claude.py +379 -210
- hud/agents/tests/test_client.py +9 -10
- hud/agents/tests/test_gemini.py +369 -0
- hud/agents/tests/test_grounded_openai_agent.py +65 -50
- hud/agents/tests/test_openai.py +376 -140
- hud/agents/tests/test_operator.py +362 -0
- hud/agents/tests/test_run_eval.py +179 -0
- hud/cli/__init__.py +461 -545
- hud/cli/analyze.py +43 -5
- hud/cli/build.py +664 -110
- hud/cli/debug.py +8 -5
- hud/cli/dev.py +882 -734
- hud/cli/eval.py +782 -668
- hud/cli/flows/dev.py +167 -0
- hud/cli/flows/init.py +191 -0
- hud/cli/flows/tasks.py +153 -56
- hud/cli/flows/templates.py +151 -0
- hud/cli/flows/tests/__init__.py +1 -0
- hud/cli/flows/tests/test_dev.py +126 -0
- hud/cli/init.py +60 -58
- hud/cli/push.py +29 -11
- hud/cli/rft.py +311 -0
- hud/cli/rft_status.py +145 -0
- hud/cli/tests/test_analyze.py +5 -5
- hud/cli/tests/test_analyze_metadata.py +3 -2
- hud/cli/tests/test_analyze_module.py +120 -0
- hud/cli/tests/test_build.py +108 -6
- hud/cli/tests/test_build_failure.py +41 -0
- hud/cli/tests/test_build_module.py +50 -0
- hud/cli/tests/test_cli_init.py +6 -1
- hud/cli/tests/test_cli_more_wrappers.py +30 -0
- hud/cli/tests/test_cli_root.py +140 -0
- hud/cli/tests/test_convert.py +361 -0
- hud/cli/tests/test_debug.py +12 -10
- hud/cli/tests/test_dev.py +197 -0
- hud/cli/tests/test_eval.py +251 -0
- hud/cli/tests/test_eval_bedrock.py +51 -0
- hud/cli/tests/test_init.py +124 -0
- hud/cli/tests/test_main_module.py +11 -5
- hud/cli/tests/test_mcp_server.py +12 -100
- hud/cli/tests/test_push_happy.py +74 -0
- hud/cli/tests/test_push_wrapper.py +23 -0
- hud/cli/tests/test_registry.py +1 -1
- hud/cli/tests/test_utils.py +1 -1
- hud/cli/{rl → utils}/celebrate.py +14 -12
- hud/cli/utils/config.py +18 -1
- hud/cli/utils/docker.py +130 -4
- hud/cli/utils/env_check.py +9 -9
- hud/cli/utils/git.py +136 -0
- hud/cli/utils/interactive.py +39 -5
- hud/cli/utils/metadata.py +69 -0
- hud/cli/utils/runner.py +1 -1
- hud/cli/utils/server.py +2 -2
- hud/cli/utils/source_hash.py +3 -3
- hud/cli/utils/tasks.py +4 -1
- hud/cli/utils/tests/__init__.py +0 -0
- hud/cli/utils/tests/test_config.py +58 -0
- hud/cli/utils/tests/test_docker.py +93 -0
- hud/cli/utils/tests/test_docker_hints.py +71 -0
- hud/cli/utils/tests/test_env_check.py +74 -0
- hud/cli/utils/tests/test_environment.py +42 -0
- hud/cli/utils/tests/test_git.py +142 -0
- hud/cli/utils/tests/test_interactive_module.py +60 -0
- hud/cli/utils/tests/test_local_runner.py +50 -0
- hud/cli/utils/tests/test_logging_utils.py +23 -0
- hud/cli/utils/tests/test_metadata.py +49 -0
- hud/cli/utils/tests/test_package_runner.py +35 -0
- hud/cli/utils/tests/test_registry_utils.py +49 -0
- hud/cli/utils/tests/test_remote_runner.py +25 -0
- hud/cli/utils/tests/test_runner_modules.py +52 -0
- hud/cli/utils/tests/test_source_hash.py +36 -0
- hud/cli/utils/tests/test_tasks.py +80 -0
- hud/cli/utils/version_check.py +258 -0
- hud/cli/{rl → utils}/viewer.py +2 -2
- hud/clients/README.md +12 -11
- hud/clients/__init__.py +4 -3
- hud/clients/base.py +166 -26
- hud/clients/environment.py +51 -0
- hud/clients/fastmcp.py +13 -6
- hud/clients/mcp_use.py +40 -15
- hud/clients/tests/test_analyze_scenarios.py +206 -0
- hud/clients/tests/test_protocol.py +9 -3
- hud/datasets/__init__.py +23 -20
- hud/datasets/loader.py +327 -0
- hud/datasets/runner.py +192 -105
- hud/datasets/tests/__init__.py +0 -0
- hud/datasets/tests/test_loader.py +221 -0
- hud/datasets/tests/test_utils.py +315 -0
- hud/datasets/utils.py +270 -90
- hud/environment/__init__.py +50 -0
- hud/environment/connection.py +206 -0
- hud/environment/connectors/__init__.py +33 -0
- hud/environment/connectors/base.py +68 -0
- hud/environment/connectors/local.py +177 -0
- hud/environment/connectors/mcp_config.py +109 -0
- hud/environment/connectors/openai.py +101 -0
- hud/environment/connectors/remote.py +172 -0
- hud/environment/environment.py +694 -0
- hud/environment/integrations/__init__.py +45 -0
- hud/environment/integrations/adk.py +67 -0
- hud/environment/integrations/anthropic.py +196 -0
- hud/environment/integrations/gemini.py +92 -0
- hud/environment/integrations/langchain.py +82 -0
- hud/environment/integrations/llamaindex.py +68 -0
- hud/environment/integrations/openai.py +238 -0
- hud/environment/mock.py +306 -0
- hud/environment/router.py +112 -0
- hud/environment/scenarios.py +493 -0
- hud/environment/tests/__init__.py +1 -0
- hud/environment/tests/test_connection.py +317 -0
- hud/environment/tests/test_connectors.py +218 -0
- hud/environment/tests/test_environment.py +161 -0
- hud/environment/tests/test_integrations.py +257 -0
- hud/environment/tests/test_local_connectors.py +201 -0
- hud/environment/tests/test_scenarios.py +280 -0
- hud/environment/tests/test_tools.py +208 -0
- hud/environment/types.py +23 -0
- hud/environment/utils/__init__.py +35 -0
- hud/environment/utils/formats.py +215 -0
- hud/environment/utils/schema.py +171 -0
- hud/environment/utils/tool_wrappers.py +113 -0
- hud/eval/__init__.py +67 -0
- hud/eval/context.py +674 -0
- hud/eval/display.py +299 -0
- hud/eval/instrument.py +185 -0
- hud/eval/manager.py +466 -0
- hud/eval/parallel.py +268 -0
- hud/eval/task.py +340 -0
- hud/eval/tests/__init__.py +1 -0
- hud/eval/tests/test_context.py +178 -0
- hud/eval/tests/test_eval.py +210 -0
- hud/eval/tests/test_manager.py +152 -0
- hud/eval/tests/test_parallel.py +168 -0
- hud/eval/tests/test_task.py +145 -0
- hud/eval/types.py +63 -0
- hud/eval/utils.py +183 -0
- hud/patches/__init__.py +19 -0
- hud/patches/mcp_patches.py +151 -0
- hud/patches/warnings.py +54 -0
- hud/samples/browser.py +4 -4
- hud/server/__init__.py +2 -1
- hud/server/low_level.py +2 -1
- hud/server/router.py +164 -0
- hud/server/server.py +567 -80
- hud/server/tests/test_mcp_server_integration.py +11 -11
- hud/server/tests/test_mcp_server_more.py +1 -1
- hud/server/tests/test_server_extra.py +2 -0
- hud/settings.py +45 -3
- hud/shared/exceptions.py +36 -10
- hud/shared/hints.py +26 -1
- hud/shared/requests.py +15 -3
- hud/shared/tests/test_exceptions.py +40 -31
- hud/shared/tests/test_hints.py +167 -0
- hud/telemetry/__init__.py +20 -19
- hud/telemetry/exporter.py +201 -0
- hud/telemetry/instrument.py +158 -253
- hud/telemetry/tests/test_eval_telemetry.py +356 -0
- hud/telemetry/tests/test_exporter.py +258 -0
- hud/telemetry/tests/test_instrument.py +401 -0
- hud/tools/__init__.py +16 -2
- hud/tools/apply_patch.py +639 -0
- hud/tools/base.py +54 -4
- hud/tools/bash.py +2 -2
- hud/tools/computer/__init__.py +4 -0
- hud/tools/computer/anthropic.py +2 -2
- hud/tools/computer/gemini.py +385 -0
- hud/tools/computer/hud.py +23 -6
- hud/tools/computer/openai.py +20 -21
- hud/tools/computer/qwen.py +434 -0
- hud/tools/computer/settings.py +37 -0
- hud/tools/edit.py +3 -7
- hud/tools/executors/base.py +4 -2
- hud/tools/executors/pyautogui.py +1 -1
- hud/tools/grounding/grounded_tool.py +13 -18
- hud/tools/grounding/grounder.py +10 -31
- hud/tools/grounding/tests/test_grounded_tool.py +26 -44
- hud/tools/jupyter.py +330 -0
- hud/tools/playwright.py +18 -3
- hud/tools/shell.py +308 -0
- hud/tools/tests/test_apply_patch.py +718 -0
- hud/tools/tests/test_computer.py +4 -9
- hud/tools/tests/test_computer_actions.py +24 -2
- hud/tools/tests/test_jupyter_tool.py +181 -0
- hud/tools/tests/test_shell.py +596 -0
- hud/tools/tests/test_submit.py +85 -0
- hud/tools/tests/test_types.py +193 -0
- hud/tools/types.py +21 -1
- hud/types.py +167 -57
- hud/utils/__init__.py +2 -0
- hud/utils/env.py +67 -0
- hud/utils/hud_console.py +61 -3
- hud/utils/mcp.py +15 -58
- hud/utils/strict_schema.py +162 -0
- hud/utils/tests/test_init.py +1 -2
- hud/utils/tests/test_mcp.py +1 -28
- hud/utils/tests/test_pretty_errors.py +186 -0
- hud/utils/tests/test_tool_shorthand.py +154 -0
- hud/utils/tests/test_version.py +1 -1
- hud/utils/types.py +20 -0
- hud/version.py +1 -1
- hud_python-0.5.1.dist-info/METADATA +264 -0
- hud_python-0.5.1.dist-info/RECORD +299 -0
- {hud_python-0.4.45.dist-info → hud_python-0.5.1.dist-info}/WHEEL +1 -1
- hud/agents/langchain.py +0 -261
- hud/agents/lite_llm.py +0 -72
- hud/cli/rl/__init__.py +0 -180
- hud/cli/rl/config.py +0 -101
- hud/cli/rl/display.py +0 -133
- hud/cli/rl/gpu.py +0 -63
- hud/cli/rl/gpu_utils.py +0 -321
- hud/cli/rl/local_runner.py +0 -595
- hud/cli/rl/presets.py +0 -96
- hud/cli/rl/remote_runner.py +0 -463
- hud/cli/rl/rl_api.py +0 -150
- hud/cli/rl/vllm.py +0 -177
- hud/cli/rl/wait_utils.py +0 -89
- hud/datasets/parallel.py +0 -687
- hud/misc/__init__.py +0 -1
- hud/misc/claude_plays_pokemon.py +0 -292
- hud/otel/__init__.py +0 -35
- hud/otel/collector.py +0 -142
- hud/otel/config.py +0 -181
- hud/otel/context.py +0 -570
- hud/otel/exporters.py +0 -369
- hud/otel/instrumentation.py +0 -135
- hud/otel/processors.py +0 -121
- hud/otel/tests/__init__.py +0 -1
- hud/otel/tests/test_processors.py +0 -197
- hud/rl/README.md +0 -30
- hud/rl/__init__.py +0 -1
- hud/rl/actor.py +0 -176
- hud/rl/buffer.py +0 -405
- hud/rl/chat_template.jinja +0 -101
- hud/rl/config.py +0 -192
- hud/rl/distributed.py +0 -132
- hud/rl/learner.py +0 -637
- hud/rl/tests/__init__.py +0 -1
- hud/rl/tests/test_learner.py +0 -186
- hud/rl/train.py +0 -382
- hud/rl/types.py +0 -101
- hud/rl/utils/start_vllm_server.sh +0 -30
- hud/rl/utils.py +0 -524
- hud/rl/vllm_adapter.py +0 -143
- hud/telemetry/job.py +0 -352
- hud/telemetry/replay.py +0 -74
- hud/telemetry/tests/test_replay.py +0 -40
- hud/telemetry/tests/test_trace.py +0 -63
- hud/telemetry/trace.py +0 -158
- hud/utils/agent_factories.py +0 -86
- hud/utils/async_utils.py +0 -65
- hud/utils/group_eval.py +0 -223
- hud/utils/progress.py +0 -149
- hud/utils/tasks.py +0 -127
- hud/utils/tests/test_async_utils.py +0 -173
- hud/utils/tests/test_progress.py +0 -261
- hud_python-0.4.45.dist-info/METADATA +0 -552
- hud_python-0.4.45.dist-info/RECORD +0 -228
- {hud_python-0.4.45.dist-info → hud_python-0.5.1.dist-info}/entry_points.txt +0 -0
- {hud_python-0.4.45.dist-info → hud_python-0.5.1.dist-info}/licenses/LICENSE +0 -0
hud/agents/claude.py
CHANGED
|
@@ -4,37 +4,53 @@ from __future__ import annotations
|
|
|
4
4
|
|
|
5
5
|
import copy
|
|
6
6
|
import logging
|
|
7
|
-
from
|
|
8
|
-
|
|
9
|
-
from anthropic import Anthropic, AsyncAnthropic, BadRequestError
|
|
10
|
-
from anthropic.types.beta import BetaContentBlockParam, BetaImageBlockParam, BetaTextBlockParam
|
|
11
|
-
|
|
12
|
-
import hud
|
|
13
|
-
|
|
14
|
-
if TYPE_CHECKING:
|
|
15
|
-
from anthropic.types.beta import (
|
|
16
|
-
BetaCacheControlEphemeralParam,
|
|
17
|
-
BetaContentBlockParam,
|
|
18
|
-
BetaImageBlockParam,
|
|
19
|
-
BetaMessageParam,
|
|
20
|
-
BetaTextBlockParam,
|
|
21
|
-
BetaToolResultBlockParam,
|
|
22
|
-
)
|
|
23
|
-
|
|
24
|
-
from hud.datasets import Task
|
|
7
|
+
from inspect import cleandoc
|
|
8
|
+
from typing import Any, ClassVar, Literal, cast
|
|
25
9
|
|
|
26
10
|
import mcp.types as types
|
|
11
|
+
from anthropic import AsyncAnthropic, AsyncAnthropicBedrock, Omit
|
|
12
|
+
from anthropic.types import CacheControlEphemeralParam
|
|
13
|
+
from anthropic.types.beta import (
|
|
14
|
+
BetaBase64ImageSourceParam,
|
|
15
|
+
BetaContentBlockParam,
|
|
16
|
+
BetaImageBlockParam,
|
|
17
|
+
BetaMessageParam,
|
|
18
|
+
BetaTextBlockParam,
|
|
19
|
+
BetaToolBash20250124Param,
|
|
20
|
+
BetaToolComputerUse20250124Param,
|
|
21
|
+
BetaToolParam,
|
|
22
|
+
BetaToolResultBlockParam,
|
|
23
|
+
BetaToolTextEditor20250728Param,
|
|
24
|
+
BetaToolUnionParam,
|
|
25
|
+
)
|
|
26
|
+
from pydantic import ConfigDict
|
|
27
27
|
|
|
28
28
|
from hud.settings import settings
|
|
29
29
|
from hud.tools.computer.settings import computer_settings
|
|
30
|
-
from hud.types import AgentResponse, MCPToolCall, MCPToolResult
|
|
30
|
+
from hud.types import AgentResponse, BaseAgentConfig, MCPToolCall, MCPToolResult
|
|
31
31
|
from hud.utils.hud_console import HUDConsole
|
|
32
|
+
from hud.utils.types import with_signature
|
|
32
33
|
|
|
33
|
-
from .base import MCPAgent
|
|
34
|
+
from .base import BaseCreateParams, MCPAgent
|
|
34
35
|
|
|
35
36
|
logger = logging.getLogger(__name__)
|
|
36
37
|
|
|
37
38
|
|
|
39
|
+
class ClaudeConfig(BaseAgentConfig):
|
|
40
|
+
model_config = ConfigDict(arbitrary_types_allowed=True)
|
|
41
|
+
|
|
42
|
+
model_name: str = "Claude"
|
|
43
|
+
model: str = "claude-sonnet-4-5"
|
|
44
|
+
model_client: AsyncAnthropic | AsyncAnthropicBedrock | None = None
|
|
45
|
+
max_tokens: int = 16384
|
|
46
|
+
use_computer_beta: bool = True
|
|
47
|
+
validate_api_key: bool = True
|
|
48
|
+
|
|
49
|
+
|
|
50
|
+
class ClaudeCreateParams(BaseCreateParams, ClaudeConfig):
|
|
51
|
+
pass
|
|
52
|
+
|
|
53
|
+
|
|
38
54
|
class ClaudeAgent(MCPAgent):
|
|
39
55
|
"""
|
|
40
56
|
Claude agent that uses MCP servers for tool execution.
|
|
@@ -43,89 +59,47 @@ class ClaudeAgent(MCPAgent):
|
|
|
43
59
|
tools through MCP servers instead of direct implementation.
|
|
44
60
|
"""
|
|
45
61
|
|
|
46
|
-
metadata: ClassVar[dict[str, Any]] = {
|
|
62
|
+
metadata: ClassVar[dict[str, Any] | None] = {
|
|
47
63
|
"display_width": computer_settings.ANTHROPIC_COMPUTER_WIDTH,
|
|
48
64
|
"display_height": computer_settings.ANTHROPIC_COMPUTER_HEIGHT,
|
|
49
65
|
}
|
|
66
|
+
config_cls: ClassVar[type[BaseAgentConfig]] = ClaudeConfig
|
|
50
67
|
|
|
51
|
-
|
|
52
|
-
|
|
53
|
-
|
|
54
|
-
|
|
55
|
-
|
|
56
|
-
|
|
57
|
-
|
|
58
|
-
|
|
59
|
-
|
|
60
|
-
|
|
61
|
-
Initialize Claude MCP agent.
|
|
62
|
-
|
|
63
|
-
Args:
|
|
64
|
-
model_client: AsyncAnthropic client (created if not provided)
|
|
65
|
-
model: Claude model to use
|
|
66
|
-
max_tokens: Maximum tokens for response
|
|
67
|
-
use_computer_beta: Whether to use computer-use beta features
|
|
68
|
-
**kwargs: Additional arguments passed to BaseMCPAgent (including mcp_client)
|
|
69
|
-
"""
|
|
70
|
-
super().__init__(**kwargs)
|
|
71
|
-
|
|
72
|
-
# Initialize client if not provided
|
|
68
|
+
@with_signature(ClaudeCreateParams)
|
|
69
|
+
@classmethod
|
|
70
|
+
def create(cls, **kwargs: Any) -> ClaudeAgent: # pyright: ignore[reportIncompatibleMethodOverride]
|
|
71
|
+
return MCPAgent.create.__func__(cls, **kwargs) # type: ignore[return-value]
|
|
72
|
+
|
|
73
|
+
def __init__(self, params: ClaudeCreateParams | None = None, **kwargs: Any) -> None:
|
|
74
|
+
super().__init__(params, **kwargs)
|
|
75
|
+
self.config: ClaudeConfig
|
|
76
|
+
|
|
77
|
+
model_client = self.config.model_client
|
|
73
78
|
if model_client is None:
|
|
74
79
|
api_key = settings.anthropic_api_key
|
|
75
80
|
if not api_key:
|
|
76
81
|
raise ValueError("Anthropic API key not found. Set ANTHROPIC_API_KEY.")
|
|
77
82
|
model_client = AsyncAnthropic(api_key=api_key)
|
|
78
83
|
|
|
79
|
-
# validate api key if requested
|
|
80
|
-
if validate_api_key:
|
|
81
|
-
try:
|
|
82
|
-
Anthropic(api_key=model_client.api_key).models.list()
|
|
83
|
-
except Exception as e:
|
|
84
|
-
raise ValueError(f"Anthropic API key is invalid: {e}") from e
|
|
85
|
-
|
|
86
84
|
self.anthropic_client = model_client
|
|
87
|
-
self.
|
|
88
|
-
self.
|
|
89
|
-
self.use_computer_beta = use_computer_beta
|
|
85
|
+
self.max_tokens = self.config.max_tokens
|
|
86
|
+
self.use_computer_beta = self.config.use_computer_beta
|
|
90
87
|
self.hud_console = HUDConsole(logger=logger)
|
|
91
88
|
|
|
92
|
-
|
|
93
|
-
|
|
94
|
-
|
|
95
|
-
self.
|
|
96
|
-
self.claude_tools: list[dict] = []
|
|
97
|
-
|
|
98
|
-
# Append Claude-specific instructions to the base system prompt
|
|
99
|
-
claude_instructions = """
|
|
100
|
-
You are Claude, an AI assistant created by Anthropic. You are helpful, harmless, and honest.
|
|
101
|
-
|
|
102
|
-
When working on tasks:
|
|
103
|
-
1. Be thorough and systematic in your approach
|
|
104
|
-
2. Complete tasks autonomously without asking for confirmation
|
|
105
|
-
3. Use available tools efficiently to accomplish your goals
|
|
106
|
-
4. Verify your actions and ensure task completion
|
|
107
|
-
5. Be precise and accurate in all operations
|
|
108
|
-
|
|
109
|
-
Remember: You are expected to complete tasks autonomously. The user trusts you to accomplish what they asked.
|
|
110
|
-
""".strip() # noqa: E501
|
|
111
|
-
|
|
112
|
-
# Append Claude instructions to any base system prompt
|
|
113
|
-
if self.system_prompt:
|
|
114
|
-
self.system_prompt = f"{self.system_prompt}\n\n{claude_instructions}"
|
|
115
|
-
else:
|
|
116
|
-
self.system_prompt = claude_instructions
|
|
89
|
+
# these will be initialized in _convert_tools_for_claude
|
|
90
|
+
self.has_computer_tool = False
|
|
91
|
+
self.tool_mapping: dict[str, str] = {}
|
|
92
|
+
self.claude_tools: list[BetaToolUnionParam] = []
|
|
117
93
|
|
|
118
|
-
|
|
119
|
-
"""
|
|
120
|
-
await super().initialize(task)
|
|
121
|
-
# Build tool mappings after tools are discovered
|
|
94
|
+
def _on_tools_ready(self) -> None:
|
|
95
|
+
"""Build Claude-specific tool mappings after tools are discovered."""
|
|
122
96
|
self._convert_tools_for_claude()
|
|
123
97
|
|
|
124
|
-
async def get_system_messages(self) -> list[
|
|
98
|
+
async def get_system_messages(self) -> list[BetaMessageParam]:
|
|
125
99
|
"""No system messages for Claude because applied in get_response"""
|
|
126
100
|
return []
|
|
127
101
|
|
|
128
|
-
async def format_blocks(self, blocks: list[types.ContentBlock]) -> list[
|
|
102
|
+
async def format_blocks(self, blocks: list[types.ContentBlock]) -> list[BetaMessageParam]:
|
|
129
103
|
"""Format messages for Claude."""
|
|
130
104
|
# Convert MCP content types to Anthropic content types
|
|
131
105
|
anthropic_blocks: list[BetaContentBlockParam] = []
|
|
@@ -134,101 +108,74 @@ class ClaudeAgent(MCPAgent):
|
|
|
134
108
|
if isinstance(block, types.TextContent):
|
|
135
109
|
# Only include fields that Anthropic expects
|
|
136
110
|
anthropic_blocks.append(
|
|
137
|
-
|
|
138
|
-
"
|
|
139
|
-
|
|
140
|
-
"type": "text",
|
|
141
|
-
"text": block.text,
|
|
142
|
-
},
|
|
111
|
+
BetaTextBlockParam(
|
|
112
|
+
type="text",
|
|
113
|
+
text=block.text,
|
|
143
114
|
)
|
|
144
115
|
)
|
|
145
116
|
elif isinstance(block, types.ImageContent):
|
|
146
117
|
# Convert MCP ImageContent to Anthropic format
|
|
147
118
|
anthropic_blocks.append(
|
|
148
|
-
|
|
149
|
-
"
|
|
150
|
-
|
|
151
|
-
|
|
152
|
-
|
|
153
|
-
"
|
|
154
|
-
|
|
155
|
-
|
|
156
|
-
|
|
157
|
-
|
|
119
|
+
BetaImageBlockParam(
|
|
120
|
+
type="image",
|
|
121
|
+
source=BetaBase64ImageSourceParam(
|
|
122
|
+
type="base64",
|
|
123
|
+
media_type=cast(
|
|
124
|
+
"Literal['image/jpeg', 'image/png', 'image/gif', 'image/webp']",
|
|
125
|
+
block.mimeType,
|
|
126
|
+
),
|
|
127
|
+
data=block.data,
|
|
128
|
+
),
|
|
158
129
|
)
|
|
159
130
|
)
|
|
160
131
|
else:
|
|
161
|
-
|
|
162
|
-
self.hud_console.log(f"Unknown content block type: {type(block)}", level="warning")
|
|
163
|
-
anthropic_blocks.append(cast("BetaContentBlockParam", block))
|
|
132
|
+
raise ValueError(f"Unknown content block type: {type(block)}")
|
|
164
133
|
|
|
165
|
-
return [
|
|
166
|
-
cast(
|
|
167
|
-
"BetaMessageParam",
|
|
168
|
-
{
|
|
169
|
-
"role": "user",
|
|
170
|
-
"content": anthropic_blocks,
|
|
171
|
-
},
|
|
172
|
-
)
|
|
173
|
-
]
|
|
134
|
+
return [BetaMessageParam(role="user", content=anthropic_blocks)]
|
|
174
135
|
|
|
175
|
-
@hud.instrument(
|
|
176
|
-
span_type="agent",
|
|
177
|
-
record_args=False, # Messages can be large
|
|
178
|
-
record_result=True,
|
|
179
|
-
)
|
|
180
136
|
async def get_response(self, messages: list[BetaMessageParam]) -> AgentResponse:
|
|
181
137
|
"""Get response from Claude including any tool calls."""
|
|
138
|
+
messages_cached = self._add_prompt_caching(messages)
|
|
182
139
|
|
|
183
|
-
#
|
|
184
|
-
|
|
185
|
-
|
|
186
|
-
|
|
187
|
-
messages_cached = self._add_prompt_caching(current_messages)
|
|
188
|
-
|
|
189
|
-
# Build create kwargs
|
|
190
|
-
create_kwargs = {
|
|
191
|
-
"model": self.model,
|
|
192
|
-
"max_tokens": self.max_tokens,
|
|
193
|
-
"system": self.system_prompt,
|
|
194
|
-
"messages": messages_cached,
|
|
195
|
-
"tools": self.claude_tools,
|
|
196
|
-
"tool_choice": {"type": "auto", "disable_parallel_tool_use": True},
|
|
197
|
-
}
|
|
198
|
-
|
|
199
|
-
# Add beta features if using computer tools
|
|
200
|
-
if self.use_computer_beta and any(
|
|
201
|
-
tool.get("type") == "computer_20250124" for tool in self.claude_tools
|
|
202
|
-
):
|
|
203
|
-
create_kwargs["betas"] = ["computer-use-2025-01-24"]
|
|
140
|
+
# betas to use
|
|
141
|
+
betas = ["fine-grained-tool-streaming-2025-05-14"]
|
|
142
|
+
if self.has_computer_tool:
|
|
143
|
+
betas.append("computer-use-2025-01-24")
|
|
204
144
|
|
|
145
|
+
# Bedrock doesn't support .stream() - use create(stream=True) instead
|
|
146
|
+
if isinstance(self.anthropic_client, AsyncAnthropicBedrock):
|
|
205
147
|
try:
|
|
206
|
-
response = await self.anthropic_client.beta.messages.create(
|
|
207
|
-
|
|
208
|
-
|
|
209
|
-
|
|
210
|
-
|
|
211
|
-
|
|
212
|
-
|
|
213
|
-
|
|
214
|
-
|
|
215
|
-
|
|
216
|
-
|
|
217
|
-
|
|
218
|
-
|
|
219
|
-
|
|
220
|
-
|
|
221
|
-
|
|
222
|
-
|
|
223
|
-
|
|
224
|
-
|
|
225
|
-
|
|
226
|
-
|
|
227
|
-
|
|
228
|
-
|
|
229
|
-
|
|
230
|
-
)
|
|
231
|
-
|
|
148
|
+
response = await self.anthropic_client.beta.messages.create(
|
|
149
|
+
model=self.config.model,
|
|
150
|
+
system=self.system_prompt if self.system_prompt is not None else Omit(),
|
|
151
|
+
max_tokens=self.max_tokens,
|
|
152
|
+
messages=messages_cached,
|
|
153
|
+
tools=self.claude_tools,
|
|
154
|
+
tool_choice={"type": "auto", "disable_parallel_tool_use": True},
|
|
155
|
+
betas=betas,
|
|
156
|
+
)
|
|
157
|
+
messages.append(BetaMessageParam(role="assistant", content=response.content))
|
|
158
|
+
except ModuleNotFoundError:
|
|
159
|
+
raise ValueError(
|
|
160
|
+
"boto3 is required for AWS Bedrock. Use `pip install hud[bedrock]`"
|
|
161
|
+
) from None
|
|
162
|
+
else:
|
|
163
|
+
# Regular Anthropic client supports .stream()
|
|
164
|
+
async with self.anthropic_client.beta.messages.stream(
|
|
165
|
+
model=self.config.model,
|
|
166
|
+
system=self.system_prompt if self.system_prompt is not None else Omit(),
|
|
167
|
+
max_tokens=self.max_tokens,
|
|
168
|
+
messages=messages_cached,
|
|
169
|
+
tools=self.claude_tools,
|
|
170
|
+
tool_choice={"type": "auto", "disable_parallel_tool_use": True},
|
|
171
|
+
betas=betas,
|
|
172
|
+
) as stream:
|
|
173
|
+
# allow backend to accumulate message content
|
|
174
|
+
async for _ in stream:
|
|
175
|
+
pass
|
|
176
|
+
# get final message
|
|
177
|
+
response = await stream.get_final_message()
|
|
178
|
+
messages.append(BetaMessageParam(role="assistant", content=response.content))
|
|
232
179
|
|
|
233
180
|
# Process response
|
|
234
181
|
result = AgentResponse(content="", tool_calls=[], done=True)
|
|
@@ -239,29 +186,26 @@ class ClaudeAgent(MCPAgent):
|
|
|
239
186
|
|
|
240
187
|
for block in response.content:
|
|
241
188
|
if block.type == "tool_use":
|
|
242
|
-
# Map Claude tool name back to MCP tool name
|
|
243
|
-
mcp_tool_name = self._claude_to_mcp_tool_map.get(block.name, block.name)
|
|
244
|
-
|
|
245
|
-
# Create MCPToolCall object with Claude metadata as extra fields
|
|
246
|
-
# Pyright will complain but the tool class accepts extra fields
|
|
247
189
|
tool_call = MCPToolCall(
|
|
248
|
-
id=block.id,
|
|
249
|
-
name
|
|
250
|
-
|
|
251
|
-
|
|
190
|
+
id=block.id,
|
|
191
|
+
# look up name in tool_mapping if available, otherwise use block name
|
|
192
|
+
name=self.tool_mapping.get(block.name, block.name),
|
|
193
|
+
arguments=block.input
|
|
194
|
+
if isinstance(block.input, dict)
|
|
195
|
+
else block.input.__dict__,
|
|
252
196
|
)
|
|
253
197
|
result.tool_calls.append(tool_call)
|
|
254
198
|
result.done = False
|
|
255
199
|
elif block.type == "text":
|
|
256
200
|
text_content += block.text
|
|
257
201
|
elif hasattr(block, "type") and block.type == "thinking":
|
|
258
|
-
thinking_content
|
|
202
|
+
if thinking_content:
|
|
203
|
+
thinking_content += "\n"
|
|
204
|
+
thinking_content += block.thinking
|
|
259
205
|
|
|
260
|
-
|
|
206
|
+
result.content = text_content
|
|
261
207
|
if thinking_content:
|
|
262
|
-
result.
|
|
263
|
-
else:
|
|
264
|
-
result.content = text_content
|
|
208
|
+
result.reasoning = thinking_content
|
|
265
209
|
|
|
266
210
|
return result
|
|
267
211
|
|
|
@@ -303,81 +247,92 @@ class ClaudeAgent(MCPAgent):
|
|
|
303
247
|
|
|
304
248
|
# Return as a user message containing all tool results
|
|
305
249
|
return [
|
|
306
|
-
|
|
307
|
-
"
|
|
308
|
-
|
|
309
|
-
"role": "user",
|
|
310
|
-
"content": user_content,
|
|
311
|
-
},
|
|
250
|
+
BetaMessageParam(
|
|
251
|
+
role="user",
|
|
252
|
+
content=user_content,
|
|
312
253
|
)
|
|
313
254
|
]
|
|
314
255
|
|
|
315
256
|
async def create_user_message(self, text: str) -> BetaMessageParam:
|
|
316
257
|
"""Create a user message in Claude's format."""
|
|
317
|
-
return
|
|
258
|
+
return BetaMessageParam(role="user", content=text)
|
|
318
259
|
|
|
319
|
-
def _convert_tools_for_claude(self) ->
|
|
320
|
-
"""Convert MCP tools to Claude
|
|
321
|
-
claude_tools = []
|
|
322
|
-
self._claude_to_mcp_tool_map = {} # Reset mapping
|
|
260
|
+
def _convert_tools_for_claude(self) -> None:
|
|
261
|
+
"""Convert MCP tools to Claude API tools."""
|
|
323
262
|
|
|
324
|
-
#
|
|
325
|
-
|
|
326
|
-
selected_computer_tool = None
|
|
263
|
+
# First pass: identify all computer tools and find the longest match
|
|
264
|
+
available_tools = self.get_available_tools()
|
|
327
265
|
|
|
328
|
-
|
|
329
|
-
|
|
266
|
+
# find potential computer tools by priority
|
|
267
|
+
selected_computer_tool = None
|
|
268
|
+
computer_tool_names_by_priority = ["anthropic_computer", "computer_anthropic", "computer"]
|
|
269
|
+
for computer_tool_name in computer_tool_names_by_priority:
|
|
270
|
+
for tool in available_tools:
|
|
330
271
|
# Check both exact match and suffix match (for prefixed tools)
|
|
331
|
-
if tool.name ==
|
|
272
|
+
if tool.name == computer_tool_name or tool.name.endswith(f"_{computer_tool_name}"):
|
|
332
273
|
selected_computer_tool = tool
|
|
333
274
|
break
|
|
334
275
|
if selected_computer_tool:
|
|
335
276
|
break
|
|
336
277
|
|
|
337
|
-
|
|
338
|
-
|
|
339
|
-
|
|
340
|
-
|
|
341
|
-
|
|
342
|
-
|
|
343
|
-
|
|
344
|
-
|
|
345
|
-
|
|
346
|
-
|
|
347
|
-
|
|
348
|
-
|
|
349
|
-
|
|
350
|
-
|
|
278
|
+
def to_api_tool(tool: types.Tool) -> BetaToolUnionParam | None:
|
|
279
|
+
if tool.name == "str_replace_based_edit_tool":
|
|
280
|
+
return BetaToolTextEditor20250728Param(
|
|
281
|
+
type="text_editor_20250728",
|
|
282
|
+
name="str_replace_based_edit_tool",
|
|
283
|
+
)
|
|
284
|
+
if tool.name == "bash":
|
|
285
|
+
return BetaToolBash20250124Param(
|
|
286
|
+
type="bash_20250124",
|
|
287
|
+
name="bash",
|
|
288
|
+
)
|
|
289
|
+
if selected_computer_tool is not None:
|
|
290
|
+
if tool.name == selected_computer_tool.name:
|
|
291
|
+
return BetaToolComputerUse20250124Param(
|
|
292
|
+
type="computer_20250124",
|
|
293
|
+
name="computer",
|
|
294
|
+
display_number=1,
|
|
295
|
+
display_width_px=computer_settings.ANTHROPIC_COMPUTER_WIDTH,
|
|
296
|
+
display_height_px=computer_settings.ANTHROPIC_COMPUTER_HEIGHT,
|
|
297
|
+
)
|
|
298
|
+
elif tool.name == "computer":
|
|
299
|
+
logger.warning(
|
|
300
|
+
"Renamed tool %s to 'computer', dropping original 'computer' tool",
|
|
301
|
+
selected_computer_tool.name,
|
|
302
|
+
)
|
|
303
|
+
return None
|
|
304
|
+
|
|
305
|
+
if tool.description is None or tool.inputSchema is None:
|
|
306
|
+
raise ValueError(
|
|
307
|
+
cleandoc(f"""MCP tool {tool.name} requires both a description and inputSchema.
|
|
308
|
+
Add these by:
|
|
309
|
+
1. Adding a docstring to your @mcp.tool decorated function for the description
|
|
310
|
+
2. Using pydantic Field() annotations on function parameters for the schema
|
|
311
|
+
""")
|
|
312
|
+
)
|
|
351
313
|
|
|
352
|
-
|
|
353
|
-
|
|
354
|
-
|
|
355
|
-
|
|
356
|
-
tool.name == priority_name or tool.name.endswith(f"_{priority_name}")
|
|
357
|
-
for priority_name in computer_tool_priority
|
|
314
|
+
return BetaToolParam(
|
|
315
|
+
name=tool.name,
|
|
316
|
+
description=tool.description,
|
|
317
|
+
input_schema=tool.inputSchema,
|
|
358
318
|
)
|
|
359
|
-
if is_computer_tool or tool.name in self.lifecycle_tools:
|
|
360
|
-
continue
|
|
361
319
|
|
|
362
|
-
|
|
363
|
-
|
|
364
|
-
|
|
365
|
-
|
|
366
|
-
|
|
367
|
-
|
|
368
|
-
|
|
369
|
-
|
|
370
|
-
|
|
371
|
-
|
|
372
|
-
self.
|
|
373
|
-
claude_tools.append(claude_tool)
|
|
374
|
-
|
|
375
|
-
self.claude_tools = claude_tools
|
|
376
|
-
return claude_tools
|
|
320
|
+
self.has_computer_tool = False
|
|
321
|
+
self.tool_mapping = {}
|
|
322
|
+
self.claude_tools = []
|
|
323
|
+
for tool in available_tools:
|
|
324
|
+
claude_tool = to_api_tool(tool)
|
|
325
|
+
if claude_tool is None or "name" not in claude_tool:
|
|
326
|
+
continue
|
|
327
|
+
if claude_tool["name"] == "computer":
|
|
328
|
+
self.has_computer_tool = True
|
|
329
|
+
self.tool_mapping[claude_tool["name"]] = tool.name
|
|
330
|
+
self.claude_tools.append(claude_tool)
|
|
377
331
|
|
|
378
332
|
def _add_prompt_caching(self, messages: list[BetaMessageParam]) -> list[BetaMessageParam]:
|
|
379
333
|
"""Add prompt caching to messages."""
|
|
380
334
|
messages_cached = copy.deepcopy(messages)
|
|
335
|
+
cache_control = CacheControlEphemeralParam(type="ephemeral")
|
|
381
336
|
|
|
382
337
|
# Mark last user message with cache control
|
|
383
338
|
if (
|
|
@@ -391,20 +346,25 @@ class ClaudeAgent(MCPAgent):
|
|
|
391
346
|
for block in last_content:
|
|
392
347
|
# Only add cache control to dict-like block types that support it
|
|
393
348
|
if isinstance(block, dict):
|
|
394
|
-
|
|
395
|
-
|
|
396
|
-
|
|
397
|
-
|
|
349
|
+
match block["type"]:
|
|
350
|
+
case "redacted_thinking" | "thinking":
|
|
351
|
+
pass
|
|
352
|
+
case _:
|
|
353
|
+
block["cache_control"] = cache_control
|
|
398
354
|
|
|
399
355
|
return messages_cached
|
|
400
356
|
|
|
401
357
|
|
|
402
358
|
def base64_to_content_block(base64: str) -> BetaImageBlockParam:
|
|
403
359
|
"""Convert base64 image to Claude content block."""
|
|
404
|
-
return
|
|
405
|
-
|
|
406
|
-
|
|
407
|
-
|
|
360
|
+
return BetaImageBlockParam(
|
|
361
|
+
type="image",
|
|
362
|
+
source=BetaBase64ImageSourceParam(
|
|
363
|
+
type="base64",
|
|
364
|
+
media_type="image/png",
|
|
365
|
+
data=base64,
|
|
366
|
+
),
|
|
367
|
+
)
|
|
408
368
|
|
|
409
369
|
|
|
410
370
|
def text_to_content_block(text: str) -> BetaTextBlockParam:
|