hud-python 0.4.45__py3-none-any.whl → 0.5.13__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- hud/__init__.py +27 -7
- hud/agents/__init__.py +70 -5
- hud/agents/base.py +238 -500
- hud/agents/claude.py +236 -247
- hud/agents/gateway.py +42 -0
- hud/agents/gemini.py +264 -0
- hud/agents/gemini_cua.py +324 -0
- hud/agents/grounded_openai.py +98 -100
- hud/agents/misc/integration_test_agent.py +51 -20
- hud/agents/misc/response_agent.py +48 -36
- hud/agents/openai.py +282 -296
- hud/agents/{openai_chat_generic.py → openai_chat.py} +63 -33
- hud/agents/operator.py +199 -0
- hud/agents/resolver.py +70 -0
- hud/agents/tests/conftest.py +133 -0
- hud/agents/tests/test_base.py +300 -622
- hud/agents/tests/test_base_runtime.py +233 -0
- hud/agents/tests/test_claude.py +381 -214
- hud/agents/tests/test_client.py +9 -10
- hud/agents/tests/test_gemini.py +369 -0
- hud/agents/tests/test_grounded_openai_agent.py +65 -50
- hud/agents/tests/test_openai.py +377 -140
- hud/agents/tests/test_operator.py +362 -0
- hud/agents/tests/test_resolver.py +192 -0
- hud/agents/tests/test_run_eval.py +179 -0
- hud/agents/types.py +148 -0
- hud/cli/__init__.py +493 -546
- hud/cli/analyze.py +43 -5
- hud/cli/build.py +699 -113
- hud/cli/debug.py +8 -5
- hud/cli/dev.py +889 -732
- hud/cli/eval.py +793 -667
- hud/cli/flows/dev.py +167 -0
- hud/cli/flows/init.py +191 -0
- hud/cli/flows/tasks.py +153 -56
- hud/cli/flows/templates.py +151 -0
- hud/cli/flows/tests/__init__.py +1 -0
- hud/cli/flows/tests/test_dev.py +126 -0
- hud/cli/init.py +60 -58
- hud/cli/pull.py +1 -1
- hud/cli/push.py +38 -13
- hud/cli/rft.py +311 -0
- hud/cli/rft_status.py +145 -0
- hud/cli/tests/test_analyze.py +5 -5
- hud/cli/tests/test_analyze_metadata.py +3 -2
- hud/cli/tests/test_analyze_module.py +120 -0
- hud/cli/tests/test_build.py +110 -8
- hud/cli/tests/test_build_failure.py +41 -0
- hud/cli/tests/test_build_module.py +50 -0
- hud/cli/tests/test_cli_init.py +6 -1
- hud/cli/tests/test_cli_more_wrappers.py +30 -0
- hud/cli/tests/test_cli_root.py +140 -0
- hud/cli/tests/test_convert.py +361 -0
- hud/cli/tests/test_debug.py +12 -10
- hud/cli/tests/test_dev.py +197 -0
- hud/cli/tests/test_eval.py +251 -0
- hud/cli/tests/test_eval_bedrock.py +51 -0
- hud/cli/tests/test_init.py +124 -0
- hud/cli/tests/test_main_module.py +11 -5
- hud/cli/tests/test_mcp_server.py +12 -100
- hud/cli/tests/test_push.py +1 -1
- hud/cli/tests/test_push_happy.py +74 -0
- hud/cli/tests/test_push_wrapper.py +23 -0
- hud/cli/tests/test_registry.py +1 -1
- hud/cli/tests/test_utils.py +1 -1
- hud/cli/{rl → utils}/celebrate.py +14 -12
- hud/cli/utils/config.py +18 -1
- hud/cli/utils/docker.py +130 -4
- hud/cli/utils/env_check.py +9 -9
- hud/cli/utils/git.py +136 -0
- hud/cli/utils/interactive.py +39 -5
- hud/cli/utils/metadata.py +70 -1
- hud/cli/utils/runner.py +1 -1
- hud/cli/utils/server.py +2 -2
- hud/cli/utils/source_hash.py +3 -3
- hud/cli/utils/tasks.py +4 -1
- hud/cli/utils/tests/__init__.py +0 -0
- hud/cli/utils/tests/test_config.py +58 -0
- hud/cli/utils/tests/test_docker.py +93 -0
- hud/cli/utils/tests/test_docker_hints.py +71 -0
- hud/cli/utils/tests/test_env_check.py +74 -0
- hud/cli/utils/tests/test_environment.py +42 -0
- hud/cli/utils/tests/test_git.py +142 -0
- hud/cli/utils/tests/test_interactive_module.py +60 -0
- hud/cli/utils/tests/test_local_runner.py +50 -0
- hud/cli/utils/tests/test_logging_utils.py +23 -0
- hud/cli/utils/tests/test_metadata.py +49 -0
- hud/cli/utils/tests/test_package_runner.py +35 -0
- hud/cli/utils/tests/test_registry_utils.py +49 -0
- hud/cli/utils/tests/test_remote_runner.py +25 -0
- hud/cli/utils/tests/test_runner_modules.py +52 -0
- hud/cli/utils/tests/test_source_hash.py +36 -0
- hud/cli/utils/tests/test_tasks.py +80 -0
- hud/cli/utils/version_check.py +258 -0
- hud/cli/{rl → utils}/viewer.py +2 -2
- hud/clients/README.md +12 -11
- hud/clients/__init__.py +4 -3
- hud/clients/base.py +166 -26
- hud/clients/environment.py +51 -0
- hud/clients/fastmcp.py +13 -6
- hud/clients/mcp_use.py +45 -15
- hud/clients/tests/test_analyze_scenarios.py +206 -0
- hud/clients/tests/test_protocol.py +9 -3
- hud/datasets/__init__.py +23 -20
- hud/datasets/loader.py +326 -0
- hud/datasets/runner.py +198 -105
- hud/datasets/tests/__init__.py +0 -0
- hud/datasets/tests/test_loader.py +221 -0
- hud/datasets/tests/test_utils.py +315 -0
- hud/datasets/utils.py +270 -90
- hud/environment/__init__.py +52 -0
- hud/environment/connection.py +258 -0
- hud/environment/connectors/__init__.py +33 -0
- hud/environment/connectors/base.py +68 -0
- hud/environment/connectors/local.py +177 -0
- hud/environment/connectors/mcp_config.py +137 -0
- hud/environment/connectors/openai.py +101 -0
- hud/environment/connectors/remote.py +172 -0
- hud/environment/environment.py +835 -0
- hud/environment/integrations/__init__.py +45 -0
- hud/environment/integrations/adk.py +67 -0
- hud/environment/integrations/anthropic.py +196 -0
- hud/environment/integrations/gemini.py +92 -0
- hud/environment/integrations/langchain.py +82 -0
- hud/environment/integrations/llamaindex.py +68 -0
- hud/environment/integrations/openai.py +238 -0
- hud/environment/mock.py +306 -0
- hud/environment/router.py +263 -0
- hud/environment/scenarios.py +620 -0
- hud/environment/tests/__init__.py +1 -0
- hud/environment/tests/test_connection.py +317 -0
- hud/environment/tests/test_connectors.py +205 -0
- hud/environment/tests/test_environment.py +593 -0
- hud/environment/tests/test_integrations.py +257 -0
- hud/environment/tests/test_local_connectors.py +242 -0
- hud/environment/tests/test_scenarios.py +1086 -0
- hud/environment/tests/test_tools.py +208 -0
- hud/environment/types.py +23 -0
- hud/environment/utils/__init__.py +35 -0
- hud/environment/utils/formats.py +215 -0
- hud/environment/utils/schema.py +171 -0
- hud/environment/utils/tool_wrappers.py +113 -0
- hud/eval/__init__.py +67 -0
- hud/eval/context.py +727 -0
- hud/eval/display.py +299 -0
- hud/eval/instrument.py +187 -0
- hud/eval/manager.py +533 -0
- hud/eval/parallel.py +268 -0
- hud/eval/task.py +372 -0
- hud/eval/tests/__init__.py +1 -0
- hud/eval/tests/test_context.py +178 -0
- hud/eval/tests/test_eval.py +210 -0
- hud/eval/tests/test_manager.py +152 -0
- hud/eval/tests/test_parallel.py +168 -0
- hud/eval/tests/test_task.py +291 -0
- hud/eval/types.py +65 -0
- hud/eval/utils.py +194 -0
- hud/patches/__init__.py +19 -0
- hud/patches/mcp_patches.py +308 -0
- hud/patches/warnings.py +54 -0
- hud/samples/browser.py +4 -4
- hud/server/__init__.py +2 -1
- hud/server/low_level.py +2 -1
- hud/server/router.py +164 -0
- hud/server/server.py +567 -80
- hud/server/tests/test_mcp_server_integration.py +11 -11
- hud/server/tests/test_mcp_server_more.py +1 -1
- hud/server/tests/test_server_extra.py +2 -0
- hud/settings.py +45 -3
- hud/shared/exceptions.py +36 -10
- hud/shared/hints.py +26 -1
- hud/shared/requests.py +15 -3
- hud/shared/tests/test_exceptions.py +40 -31
- hud/shared/tests/test_hints.py +167 -0
- hud/telemetry/__init__.py +20 -19
- hud/telemetry/exporter.py +201 -0
- hud/telemetry/instrument.py +165 -253
- hud/telemetry/tests/test_eval_telemetry.py +356 -0
- hud/telemetry/tests/test_exporter.py +258 -0
- hud/telemetry/tests/test_instrument.py +401 -0
- hud/tools/__init__.py +18 -2
- hud/tools/agent.py +223 -0
- hud/tools/apply_patch.py +639 -0
- hud/tools/base.py +54 -4
- hud/tools/bash.py +2 -2
- hud/tools/computer/__init__.py +36 -3
- hud/tools/computer/anthropic.py +2 -2
- hud/tools/computer/gemini.py +385 -0
- hud/tools/computer/hud.py +23 -6
- hud/tools/computer/openai.py +20 -21
- hud/tools/computer/qwen.py +434 -0
- hud/tools/computer/settings.py +37 -0
- hud/tools/edit.py +3 -7
- hud/tools/executors/base.py +4 -2
- hud/tools/executors/pyautogui.py +1 -1
- hud/tools/grounding/grounded_tool.py +13 -18
- hud/tools/grounding/grounder.py +10 -31
- hud/tools/grounding/tests/test_grounded_tool.py +26 -44
- hud/tools/jupyter.py +330 -0
- hud/tools/playwright.py +18 -3
- hud/tools/shell.py +308 -0
- hud/tools/tests/test_agent_tool.py +355 -0
- hud/tools/tests/test_apply_patch.py +718 -0
- hud/tools/tests/test_computer.py +4 -9
- hud/tools/tests/test_computer_actions.py +24 -2
- hud/tools/tests/test_jupyter_tool.py +181 -0
- hud/tools/tests/test_shell.py +596 -0
- hud/tools/tests/test_submit.py +85 -0
- hud/tools/tests/test_types.py +193 -0
- hud/tools/types.py +21 -1
- hud/types.py +194 -56
- hud/utils/__init__.py +2 -0
- hud/utils/env.py +67 -0
- hud/utils/hud_console.py +89 -18
- hud/utils/mcp.py +15 -58
- hud/utils/strict_schema.py +162 -0
- hud/utils/tests/test_init.py +1 -2
- hud/utils/tests/test_mcp.py +1 -28
- hud/utils/tests/test_pretty_errors.py +186 -0
- hud/utils/tests/test_tool_shorthand.py +154 -0
- hud/utils/tests/test_version.py +1 -1
- hud/utils/types.py +20 -0
- hud/version.py +1 -1
- hud_python-0.5.13.dist-info/METADATA +264 -0
- hud_python-0.5.13.dist-info/RECORD +305 -0
- {hud_python-0.4.45.dist-info → hud_python-0.5.13.dist-info}/WHEEL +1 -1
- hud/agents/langchain.py +0 -261
- hud/agents/lite_llm.py +0 -72
- hud/cli/rl/__init__.py +0 -180
- hud/cli/rl/config.py +0 -101
- hud/cli/rl/display.py +0 -133
- hud/cli/rl/gpu.py +0 -63
- hud/cli/rl/gpu_utils.py +0 -321
- hud/cli/rl/local_runner.py +0 -595
- hud/cli/rl/presets.py +0 -96
- hud/cli/rl/remote_runner.py +0 -463
- hud/cli/rl/rl_api.py +0 -150
- hud/cli/rl/vllm.py +0 -177
- hud/cli/rl/wait_utils.py +0 -89
- hud/datasets/parallel.py +0 -687
- hud/misc/__init__.py +0 -1
- hud/misc/claude_plays_pokemon.py +0 -292
- hud/otel/__init__.py +0 -35
- hud/otel/collector.py +0 -142
- hud/otel/config.py +0 -181
- hud/otel/context.py +0 -570
- hud/otel/exporters.py +0 -369
- hud/otel/instrumentation.py +0 -135
- hud/otel/processors.py +0 -121
- hud/otel/tests/__init__.py +0 -1
- hud/otel/tests/test_processors.py +0 -197
- hud/rl/README.md +0 -30
- hud/rl/__init__.py +0 -1
- hud/rl/actor.py +0 -176
- hud/rl/buffer.py +0 -405
- hud/rl/chat_template.jinja +0 -101
- hud/rl/config.py +0 -192
- hud/rl/distributed.py +0 -132
- hud/rl/learner.py +0 -637
- hud/rl/tests/__init__.py +0 -1
- hud/rl/tests/test_learner.py +0 -186
- hud/rl/train.py +0 -382
- hud/rl/types.py +0 -101
- hud/rl/utils/start_vllm_server.sh +0 -30
- hud/rl/utils.py +0 -524
- hud/rl/vllm_adapter.py +0 -143
- hud/telemetry/job.py +0 -352
- hud/telemetry/replay.py +0 -74
- hud/telemetry/tests/test_replay.py +0 -40
- hud/telemetry/tests/test_trace.py +0 -63
- hud/telemetry/trace.py +0 -158
- hud/utils/agent_factories.py +0 -86
- hud/utils/async_utils.py +0 -65
- hud/utils/group_eval.py +0 -223
- hud/utils/progress.py +0 -149
- hud/utils/tasks.py +0 -127
- hud/utils/tests/test_async_utils.py +0 -173
- hud/utils/tests/test_progress.py +0 -261
- hud_python-0.4.45.dist-info/METADATA +0 -552
- hud_python-0.4.45.dist-info/RECORD +0 -228
- {hud_python-0.4.45.dist-info → hud_python-0.5.13.dist-info}/entry_points.txt +0 -0
- {hud_python-0.4.45.dist-info → hud_python-0.5.13.dist-info}/licenses/LICENSE +0 -0
hud/agents/tests/test_claude.py
CHANGED
|
@@ -2,11 +2,11 @@
|
|
|
2
2
|
|
|
3
3
|
from __future__ import annotations
|
|
4
4
|
|
|
5
|
-
from typing import TYPE_CHECKING, cast
|
|
5
|
+
from typing import TYPE_CHECKING, Any, cast
|
|
6
6
|
from unittest.mock import AsyncMock, MagicMock, patch
|
|
7
7
|
|
|
8
8
|
import pytest
|
|
9
|
-
from anthropic import
|
|
9
|
+
from anthropic import AsyncAnthropic, AsyncAnthropicBedrock
|
|
10
10
|
from mcp import types
|
|
11
11
|
|
|
12
12
|
from hud.agents.claude import (
|
|
@@ -15,18 +15,96 @@ from hud.agents.claude import (
|
|
|
15
15
|
text_to_content_block,
|
|
16
16
|
tool_use_content_block,
|
|
17
17
|
)
|
|
18
|
+
from hud.environment.router import ToolRouter
|
|
19
|
+
from hud.eval.context import EvalContext
|
|
18
20
|
from hud.types import MCPToolCall, MCPToolResult
|
|
19
21
|
|
|
20
22
|
if TYPE_CHECKING:
|
|
21
|
-
from
|
|
23
|
+
from collections.abc import Generator
|
|
24
|
+
|
|
25
|
+
from anthropic.types.beta import BetaMessageParam
|
|
26
|
+
|
|
27
|
+
|
|
28
|
+
class MockEvalContext(EvalContext):
|
|
29
|
+
"""Mock EvalContext for testing."""
|
|
30
|
+
|
|
31
|
+
def __init__(self, tools: list[types.Tool] | None = None) -> None:
|
|
32
|
+
# Core attributes
|
|
33
|
+
self.prompt = "Test prompt"
|
|
34
|
+
self._tools = tools or []
|
|
35
|
+
self._submitted: str | None = None
|
|
36
|
+
self.reward: float | None = None
|
|
37
|
+
|
|
38
|
+
# Environment attributes
|
|
39
|
+
self._router = ToolRouter()
|
|
40
|
+
self._agent_include: list[str] | None = None
|
|
41
|
+
self._agent_exclude: list[str] | None = None
|
|
42
|
+
|
|
43
|
+
# EvalContext attributes
|
|
44
|
+
self._task = None
|
|
45
|
+
self.trace_id = "test-trace-id"
|
|
46
|
+
self.eval_name = "test-eval"
|
|
47
|
+
self.job_id: str | None = None
|
|
48
|
+
self.group_id: str | None = None
|
|
49
|
+
self.index = 0
|
|
50
|
+
self.variants: dict[str, Any] = {}
|
|
51
|
+
self.answer: str | None = None
|
|
52
|
+
self.system_prompt: str | None = None
|
|
53
|
+
self.error: BaseException | None = None
|
|
54
|
+
self.metadata: dict[str, Any] = {}
|
|
55
|
+
self.results: list[Any] = []
|
|
56
|
+
self._is_summary = False
|
|
57
|
+
|
|
58
|
+
def as_tools(self) -> list[types.Tool]:
|
|
59
|
+
return self._tools
|
|
60
|
+
|
|
61
|
+
@property
|
|
62
|
+
def has_scenario(self) -> bool:
|
|
63
|
+
return False
|
|
64
|
+
|
|
65
|
+
async def list_tools(self) -> list[types.Tool]:
|
|
66
|
+
return self._tools
|
|
67
|
+
|
|
68
|
+
async def call_tool(self, call: Any, /, **kwargs: Any) -> MCPToolResult:
|
|
69
|
+
return MCPToolResult(
|
|
70
|
+
content=[types.TextContent(type="text", text="ok")],
|
|
71
|
+
isError=False,
|
|
72
|
+
)
|
|
73
|
+
|
|
74
|
+
async def submit(self, answer: str) -> None:
|
|
75
|
+
self._submitted = answer
|
|
76
|
+
|
|
77
|
+
|
|
78
|
+
class MockStreamContextManager:
|
|
79
|
+
"""Mock for Claude's streaming context manager."""
|
|
80
|
+
|
|
81
|
+
def __init__(self, response: MagicMock) -> None:
|
|
82
|
+
self.response = response
|
|
83
|
+
|
|
84
|
+
async def __aenter__(self) -> MockStreamContextManager:
|
|
85
|
+
return self
|
|
86
|
+
|
|
87
|
+
async def __aexit__(
|
|
88
|
+
self, exc_type: type | None, exc_val: Exception | None, exc_tb: Any
|
|
89
|
+
) -> bool:
|
|
90
|
+
return False
|
|
91
|
+
|
|
92
|
+
def __aiter__(self) -> MockStreamContextManager:
|
|
93
|
+
return self
|
|
94
|
+
|
|
95
|
+
async def __anext__(self) -> None:
|
|
96
|
+
raise StopAsyncIteration
|
|
97
|
+
|
|
98
|
+
async def get_final_message(self) -> MagicMock:
|
|
99
|
+
return self.response
|
|
22
100
|
|
|
23
101
|
|
|
24
102
|
class TestClaudeHelperFunctions:
|
|
25
103
|
"""Test helper functions for Claude message formatting."""
|
|
26
104
|
|
|
27
|
-
def test_base64_to_content_block(self):
|
|
105
|
+
def test_base64_to_content_block(self) -> None:
|
|
28
106
|
"""Test base64 image conversion."""
|
|
29
|
-
base64_data = "
|
|
107
|
+
base64_data = "iVBORw0KGgoAAAANSUhEUgAAAAEAAAABCAYAAAAfFcSJAAAADUlEQVR42mNk"
|
|
30
108
|
result = base64_to_content_block(base64_data)
|
|
31
109
|
|
|
32
110
|
assert result["type"] == "image"
|
|
@@ -34,7 +112,7 @@ class TestClaudeHelperFunctions:
|
|
|
34
112
|
assert result["source"]["media_type"] == "image/png"
|
|
35
113
|
assert result["source"]["data"] == base64_data
|
|
36
114
|
|
|
37
|
-
def test_text_to_content_block(self):
|
|
115
|
+
def test_text_to_content_block(self) -> None:
|
|
38
116
|
"""Test text conversion."""
|
|
39
117
|
text = "Hello, world!"
|
|
40
118
|
result = text_to_content_block(text)
|
|
@@ -42,12 +120,10 @@ class TestClaudeHelperFunctions:
|
|
|
42
120
|
assert result["type"] == "text"
|
|
43
121
|
assert result["text"] == text
|
|
44
122
|
|
|
45
|
-
def test_tool_use_content_block(self):
|
|
123
|
+
def test_tool_use_content_block(self) -> None:
|
|
46
124
|
"""Test tool result content block creation."""
|
|
47
125
|
tool_use_id = "tool_123"
|
|
48
|
-
content
|
|
49
|
-
text_to_content_block("Result text")
|
|
50
|
-
]
|
|
126
|
+
content = [text_to_content_block("Result text")]
|
|
51
127
|
|
|
52
128
|
result = tool_use_content_block(tool_use_id, content)
|
|
53
129
|
|
|
@@ -60,192 +136,331 @@ class TestClaudeAgent:
|
|
|
60
136
|
"""Test ClaudeAgent class."""
|
|
61
137
|
|
|
62
138
|
@pytest.fixture
|
|
63
|
-
def
|
|
64
|
-
"""Create a
|
|
65
|
-
|
|
66
|
-
|
|
67
|
-
|
|
68
|
-
|
|
69
|
-
|
|
70
|
-
"""Create a mock Anthropic client."""
|
|
71
|
-
with patch("hud.agents.claude.AsyncAnthropic") as mock:
|
|
72
|
-
client = AsyncMock()
|
|
73
|
-
# Add beta attribute with messages
|
|
74
|
-
client.beta = AsyncMock()
|
|
75
|
-
client.beta.messages = AsyncMock()
|
|
76
|
-
mock.return_value = client
|
|
77
|
-
yield client
|
|
139
|
+
def mock_anthropic(self) -> Generator[AsyncAnthropic, None, None]: # type: ignore[misc]
|
|
140
|
+
"""Create a stub Anthropic client."""
|
|
141
|
+
with patch("hud.agents.claude.AsyncAnthropic") as mock_class:
|
|
142
|
+
client = MagicMock(spec=AsyncAnthropic)
|
|
143
|
+
client.api_key = "test-key"
|
|
144
|
+
mock_class.return_value = client
|
|
145
|
+
yield client # type: ignore[misc]
|
|
78
146
|
|
|
79
147
|
@pytest.mark.asyncio
|
|
80
|
-
async def
|
|
81
|
-
"""Test agent initialization."""
|
|
82
|
-
|
|
83
|
-
|
|
84
|
-
|
|
85
|
-
|
|
86
|
-
model_client=mock_model_client,
|
|
87
|
-
model="claude-3-opus-20240229",
|
|
88
|
-
max_tokens=1000,
|
|
89
|
-
validate_api_key=False, # Skip validation in tests
|
|
148
|
+
async def test_init_with_client(self, mock_anthropic: AsyncAnthropic) -> None:
|
|
149
|
+
"""Test agent initialization with provided client."""
|
|
150
|
+
agent = ClaudeAgent.create(
|
|
151
|
+
model_client=mock_anthropic,
|
|
152
|
+
model="claude-sonnet-4-20250514",
|
|
153
|
+
validate_api_key=False,
|
|
90
154
|
)
|
|
91
155
|
|
|
92
|
-
assert agent.model_name == "
|
|
93
|
-
assert agent.
|
|
94
|
-
assert agent.anthropic_client ==
|
|
156
|
+
assert agent.model_name == "Claude"
|
|
157
|
+
assert agent.config.model == "claude-sonnet-4-20250514"
|
|
158
|
+
assert agent.anthropic_client == mock_anthropic
|
|
95
159
|
|
|
96
160
|
@pytest.mark.asyncio
|
|
97
|
-
async def
|
|
98
|
-
"""Test agent initialization
|
|
99
|
-
|
|
100
|
-
|
|
101
|
-
|
|
102
|
-
|
|
103
|
-
|
|
104
|
-
|
|
161
|
+
async def test_init_with_parameters(self, mock_anthropic: AsyncAnthropic) -> None:
|
|
162
|
+
"""Test agent initialization with various parameters."""
|
|
163
|
+
agent = ClaudeAgent.create(
|
|
164
|
+
model_client=mock_anthropic,
|
|
165
|
+
model="claude-sonnet-4-20250514",
|
|
166
|
+
max_tokens=4096,
|
|
167
|
+
validate_api_key=False,
|
|
168
|
+
)
|
|
105
169
|
|
|
106
|
-
|
|
107
|
-
assert agent.anthropic_client is not None
|
|
170
|
+
assert agent.max_tokens == 4096
|
|
108
171
|
|
|
109
172
|
@pytest.mark.asyncio
|
|
110
|
-
async def
|
|
111
|
-
"""Test formatting content blocks
|
|
112
|
-
|
|
113
|
-
|
|
114
|
-
|
|
115
|
-
model_client=mock_model_client,
|
|
116
|
-
validate_api_key=False, # Skip validation in tests
|
|
173
|
+
async def test_format_blocks_text_only(self, mock_anthropic: AsyncAnthropic) -> None:
|
|
174
|
+
"""Test formatting text content blocks."""
|
|
175
|
+
agent = ClaudeAgent.create(
|
|
176
|
+
model_client=mock_anthropic,
|
|
177
|
+
validate_api_key=False,
|
|
117
178
|
)
|
|
118
179
|
|
|
119
|
-
|
|
120
|
-
|
|
121
|
-
types.TextContent(type="text", text="
|
|
180
|
+
blocks: list[types.ContentBlock] = [
|
|
181
|
+
types.TextContent(type="text", text="Hello, world!"),
|
|
182
|
+
types.TextContent(type="text", text="How are you?"),
|
|
122
183
|
]
|
|
123
|
-
|
|
184
|
+
|
|
185
|
+
messages = await agent.format_blocks(blocks)
|
|
124
186
|
assert len(messages) == 1
|
|
125
187
|
assert messages[0]["role"] == "user"
|
|
126
188
|
content = messages[0]["content"]
|
|
127
189
|
assert isinstance(content, list)
|
|
128
|
-
assert len(content) ==
|
|
129
|
-
assert content[0]["type"] == "text"
|
|
130
|
-
assert content[0]["text"] == "Hello,
|
|
190
|
+
assert len(content) == 2
|
|
191
|
+
assert content[0]["type"] == "text" # type: ignore[index]
|
|
192
|
+
assert content[0]["text"] == "Hello, world!" # type: ignore[index]
|
|
193
|
+
|
|
194
|
+
@pytest.mark.asyncio
|
|
195
|
+
async def test_format_blocks_with_image(self, mock_anthropic: AsyncAnthropic) -> None:
|
|
196
|
+
"""Test formatting image content blocks."""
|
|
197
|
+
agent = ClaudeAgent.create(
|
|
198
|
+
model_client=mock_anthropic,
|
|
199
|
+
validate_api_key=False,
|
|
200
|
+
)
|
|
131
201
|
|
|
132
|
-
|
|
133
|
-
|
|
134
|
-
types.TextContent(type="text", text="Look at this"),
|
|
202
|
+
blocks: list[types.ContentBlock] = [
|
|
203
|
+
types.TextContent(type="text", text="Look at this:"),
|
|
135
204
|
types.ImageContent(type="image", data="base64data", mimeType="image/png"),
|
|
136
205
|
]
|
|
137
|
-
|
|
206
|
+
|
|
207
|
+
messages = await agent.format_blocks(blocks)
|
|
138
208
|
assert len(messages) == 1
|
|
139
|
-
assert messages[0]["role"] == "user"
|
|
140
209
|
content = messages[0]["content"]
|
|
141
210
|
assert isinstance(content, list)
|
|
142
211
|
assert len(content) == 2
|
|
143
|
-
|
|
144
|
-
assert content[0]["type"] == "text"
|
|
145
|
-
assert content[0]["text"] == "Look at this"
|
|
146
|
-
assert content[1]["type"] == "image"
|
|
147
|
-
assert content[1]["source"]["data"] == "base64data"
|
|
212
|
+
assert content[1]["type"] == "image" # type: ignore[index]
|
|
148
213
|
|
|
149
214
|
@pytest.mark.asyncio
|
|
150
|
-
async def
|
|
151
|
-
"""Test
|
|
152
|
-
|
|
153
|
-
|
|
154
|
-
|
|
155
|
-
model_client=mock_model_client,
|
|
156
|
-
validate_api_key=False, # Skip validation in tests
|
|
215
|
+
async def test_format_tool_results_text(self, mock_anthropic: AsyncAnthropic) -> None:
|
|
216
|
+
"""Test formatting tool results with text content."""
|
|
217
|
+
agent = ClaudeAgent.create(
|
|
218
|
+
model_client=mock_anthropic,
|
|
219
|
+
validate_api_key=False,
|
|
157
220
|
)
|
|
158
221
|
|
|
159
|
-
tool_calls = [
|
|
160
|
-
MCPToolCall(name="test_tool", arguments={}, id="id1"),
|
|
161
|
-
]
|
|
162
|
-
|
|
222
|
+
tool_calls = [MCPToolCall(id="call_123", name="test_tool", arguments={})]
|
|
163
223
|
tool_results = [
|
|
164
|
-
MCPToolResult(
|
|
224
|
+
MCPToolResult(
|
|
225
|
+
content=[types.TextContent(type="text", text="Tool output")],
|
|
226
|
+
isError=False,
|
|
227
|
+
)
|
|
165
228
|
]
|
|
166
229
|
|
|
167
230
|
messages = await agent.format_tool_results(tool_calls, tool_results)
|
|
168
|
-
|
|
169
|
-
# format_tool_results returns a single user message with tool result content
|
|
170
231
|
assert len(messages) == 1
|
|
171
232
|
assert messages[0]["role"] == "user"
|
|
172
|
-
|
|
173
|
-
content
|
|
233
|
+
content = messages[0]["content"]
|
|
234
|
+
assert isinstance(content, list)
|
|
174
235
|
assert len(content) == 1
|
|
175
|
-
assert content[0]["type"] == "tool_result" # type: ignore
|
|
176
|
-
assert content[0]["tool_use_id"] == "
|
|
177
|
-
|
|
178
|
-
|
|
179
|
-
|
|
180
|
-
|
|
236
|
+
assert content[0]["type"] == "tool_result" # type: ignore[index]
|
|
237
|
+
assert content[0]["tool_use_id"] == "call_123" # type: ignore[index]
|
|
238
|
+
|
|
239
|
+
@pytest.mark.asyncio
|
|
240
|
+
async def test_format_tool_results_with_error(self, mock_anthropic: AsyncAnthropic) -> None:
|
|
241
|
+
"""Test formatting tool results with error."""
|
|
242
|
+
agent = ClaudeAgent.create(
|
|
243
|
+
model_client=mock_anthropic,
|
|
244
|
+
validate_api_key=False,
|
|
245
|
+
)
|
|
246
|
+
|
|
247
|
+
tool_calls = [MCPToolCall(id="call_123", name="test_tool", arguments={})]
|
|
248
|
+
tool_results = [
|
|
249
|
+
MCPToolResult(
|
|
250
|
+
content=[types.TextContent(type="text", text="Error message")],
|
|
251
|
+
isError=True,
|
|
252
|
+
)
|
|
253
|
+
]
|
|
254
|
+
|
|
255
|
+
messages = await agent.format_tool_results(tool_calls, tool_results)
|
|
256
|
+
assert len(messages) == 1
|
|
257
|
+
content = messages[0]["content"]
|
|
258
|
+
# Error content should include "Error:" prefix
|
|
259
|
+
assert any("Error" in str(block) for block in content[0]["content"]) # type: ignore[index]
|
|
260
|
+
|
|
261
|
+
@pytest.mark.asyncio
|
|
262
|
+
async def test_get_system_messages(self, mock_anthropic: AsyncAnthropic) -> None:
|
|
263
|
+
"""Test that system messages return empty (Claude uses system param)."""
|
|
264
|
+
agent = ClaudeAgent.create(
|
|
265
|
+
model_client=mock_anthropic,
|
|
266
|
+
system_prompt="You are a helpful assistant.",
|
|
267
|
+
validate_api_key=False,
|
|
268
|
+
)
|
|
269
|
+
|
|
270
|
+
messages = await agent.get_system_messages()
|
|
271
|
+
# Claude doesn't use system messages in the message list
|
|
272
|
+
assert messages == []
|
|
181
273
|
|
|
182
274
|
@pytest.mark.asyncio
|
|
183
|
-
async def
|
|
184
|
-
"""Test getting model response
|
|
185
|
-
# Disable telemetry for this test to avoid backend configuration issues
|
|
275
|
+
async def test_get_response_with_thinking(self, mock_anthropic: AsyncAnthropic) -> None:
|
|
276
|
+
"""Test getting model response with thinking content."""
|
|
186
277
|
with patch("hud.settings.settings.telemetry_enabled", False):
|
|
187
|
-
agent = ClaudeAgent(
|
|
188
|
-
mcp_client=mock_mcp_client,
|
|
278
|
+
agent = ClaudeAgent.create(
|
|
189
279
|
model_client=mock_anthropic,
|
|
190
|
-
validate_api_key=False,
|
|
280
|
+
validate_api_key=False,
|
|
191
281
|
)
|
|
282
|
+
# Set up agent as initialized
|
|
283
|
+
agent.claude_tools = []
|
|
284
|
+
agent.tool_mapping = {}
|
|
285
|
+
agent.has_computer_tool = False
|
|
286
|
+
agent._initialized = True
|
|
192
287
|
|
|
193
|
-
# Mock the API response
|
|
194
288
|
mock_response = MagicMock()
|
|
195
289
|
|
|
196
|
-
|
|
290
|
+
thinking_block = MagicMock()
|
|
291
|
+
thinking_block.type = "thinking"
|
|
292
|
+
thinking_block.thinking = "Let me analyze this problem..."
|
|
293
|
+
|
|
197
294
|
text_block = MagicMock()
|
|
198
295
|
text_block.type = "text"
|
|
199
|
-
text_block.text = "
|
|
296
|
+
text_block.text = "Here is the answer"
|
|
200
297
|
|
|
201
|
-
|
|
202
|
-
|
|
203
|
-
tool_block.type = "tool_use"
|
|
204
|
-
tool_block.id = "tool_123"
|
|
205
|
-
tool_block.name = "test_tool"
|
|
206
|
-
tool_block.input = {"param": "value"}
|
|
298
|
+
mock_response.content = [thinking_block, text_block]
|
|
299
|
+
mock_response.usage = MagicMock(input_tokens=10, output_tokens=30)
|
|
207
300
|
|
|
208
|
-
|
|
209
|
-
|
|
210
|
-
mock_anthropic.beta.messages.create = AsyncMock(return_value=mock_response)
|
|
301
|
+
mock_stream = MockStreamContextManager(mock_response)
|
|
302
|
+
mock_anthropic.beta.messages.stream = MagicMock(return_value=mock_stream)
|
|
211
303
|
|
|
212
304
|
messages = [
|
|
213
305
|
cast(
|
|
214
306
|
"BetaMessageParam",
|
|
215
|
-
{"role": "user", "content": [{"type": "text", "text": "
|
|
307
|
+
{"role": "user", "content": [{"type": "text", "text": "Hard question"}]},
|
|
216
308
|
)
|
|
217
309
|
]
|
|
218
310
|
response = await agent.get_response(messages)
|
|
219
311
|
|
|
220
|
-
assert response.content == "
|
|
221
|
-
assert
|
|
222
|
-
|
|
223
|
-
|
|
224
|
-
|
|
225
|
-
|
|
312
|
+
assert response.content == "Here is the answer"
|
|
313
|
+
assert response.reasoning == "Let me analyze this problem..."
|
|
314
|
+
|
|
315
|
+
@pytest.mark.asyncio
|
|
316
|
+
async def test_convert_tools_for_claude(self, mock_anthropic: AsyncAnthropic) -> None:
|
|
317
|
+
"""Test converting MCP tools to Claude format."""
|
|
318
|
+
tools = [
|
|
319
|
+
types.Tool(
|
|
320
|
+
name="my_tool",
|
|
321
|
+
description="A test tool",
|
|
322
|
+
inputSchema={"type": "object", "properties": {"x": {"type": "string"}}},
|
|
323
|
+
)
|
|
324
|
+
]
|
|
325
|
+
ctx = MockEvalContext(tools=tools)
|
|
326
|
+
agent = ClaudeAgent.create(
|
|
327
|
+
model_client=mock_anthropic,
|
|
328
|
+
validate_api_key=False,
|
|
329
|
+
)
|
|
330
|
+
|
|
331
|
+
agent.ctx = ctx
|
|
332
|
+
await agent._initialize_from_ctx(ctx)
|
|
333
|
+
|
|
334
|
+
# Check that tools were converted
|
|
335
|
+
assert len(agent.claude_tools) == 1
|
|
336
|
+
assert agent.claude_tools[0]["name"] == "my_tool" # type: ignore[typeddict-item]
|
|
337
|
+
|
|
338
|
+
@pytest.mark.asyncio
|
|
339
|
+
async def test_computer_tool_detection(self, mock_anthropic: AsyncAnthropic) -> None:
|
|
340
|
+
"""Test that computer tools are detected for beta API."""
|
|
341
|
+
tools = [
|
|
342
|
+
types.Tool(
|
|
343
|
+
name="computer",
|
|
344
|
+
description="Control computer",
|
|
345
|
+
inputSchema={"type": "object"},
|
|
346
|
+
)
|
|
347
|
+
]
|
|
348
|
+
ctx = MockEvalContext(tools=tools)
|
|
349
|
+
agent = ClaudeAgent.create(
|
|
350
|
+
model_client=mock_anthropic,
|
|
351
|
+
validate_api_key=False,
|
|
352
|
+
)
|
|
353
|
+
|
|
354
|
+
agent.ctx = ctx
|
|
355
|
+
await agent._initialize_from_ctx(ctx)
|
|
356
|
+
|
|
357
|
+
assert agent.has_computer_tool is True
|
|
358
|
+
|
|
359
|
+
@pytest.mark.asyncio
|
|
360
|
+
async def test_get_response_with_text(self, mock_anthropic: AsyncAnthropic) -> None:
|
|
361
|
+
"""Test getting response with text output."""
|
|
362
|
+
# Create mock response
|
|
363
|
+
mock_response = MagicMock()
|
|
364
|
+
mock_response.content = [MagicMock(type="text", text="Hello!")]
|
|
365
|
+
|
|
366
|
+
mock_stream = MockStreamContextManager(mock_response)
|
|
367
|
+
mock_anthropic.beta.messages.stream = MagicMock(return_value=mock_stream)
|
|
368
|
+
|
|
369
|
+
agent = ClaudeAgent.create(
|
|
370
|
+
model_client=mock_anthropic,
|
|
371
|
+
validate_api_key=False,
|
|
372
|
+
)
|
|
373
|
+
agent.claude_tools = []
|
|
374
|
+
agent.tool_mapping = {}
|
|
375
|
+
agent.has_computer_tool = False
|
|
376
|
+
agent._initialized = True
|
|
377
|
+
|
|
378
|
+
response = await agent.get_response([])
|
|
379
|
+
assert response.content == "Hello!"
|
|
380
|
+
assert response.done is True
|
|
381
|
+
assert len(response.tool_calls) == 0
|
|
382
|
+
|
|
383
|
+
@pytest.mark.asyncio
|
|
384
|
+
async def test_get_response_with_tool_call(self, mock_anthropic: AsyncAnthropic) -> None:
|
|
385
|
+
"""Test getting response with tool call."""
|
|
386
|
+
mock_tool_use = MagicMock()
|
|
387
|
+
mock_tool_use.type = "tool_use"
|
|
388
|
+
mock_tool_use.id = "call_123"
|
|
389
|
+
mock_tool_use.name = "my_tool"
|
|
390
|
+
mock_tool_use.input = {"x": "value"}
|
|
391
|
+
|
|
392
|
+
mock_response = MagicMock()
|
|
393
|
+
mock_response.content = [mock_tool_use]
|
|
394
|
+
|
|
395
|
+
mock_stream = MockStreamContextManager(mock_response)
|
|
396
|
+
mock_anthropic.beta.messages.stream = MagicMock(return_value=mock_stream)
|
|
397
|
+
|
|
398
|
+
agent = ClaudeAgent.create(
|
|
399
|
+
model_client=mock_anthropic,
|
|
400
|
+
validate_api_key=False,
|
|
401
|
+
)
|
|
402
|
+
agent.claude_tools = []
|
|
403
|
+
agent.tool_mapping = {"my_tool": "my_tool"}
|
|
404
|
+
agent.has_computer_tool = False
|
|
405
|
+
agent._initialized = True
|
|
406
|
+
|
|
407
|
+
response = await agent.get_response([])
|
|
408
|
+
assert response.done is False
|
|
409
|
+
assert len(response.tool_calls) == 1
|
|
410
|
+
assert response.tool_calls[0].name == "my_tool"
|
|
411
|
+
assert response.tool_calls[0].arguments == {"x": "value"}
|
|
412
|
+
|
|
413
|
+
|
|
414
|
+
class TestClaudeAgentBedrock:
|
|
415
|
+
"""Test ClaudeAgent class with Bedrock."""
|
|
416
|
+
|
|
417
|
+
@pytest.fixture
|
|
418
|
+
def bedrock_client(self) -> AsyncAnthropicBedrock:
|
|
419
|
+
"""Create a real AsyncAnthropicBedrock client and stub networked methods."""
|
|
420
|
+
client = AsyncAnthropicBedrock(
|
|
421
|
+
aws_access_key="AKIATEST",
|
|
422
|
+
aws_secret_key="secret",
|
|
423
|
+
aws_region="us-east-1",
|
|
424
|
+
)
|
|
425
|
+
# Stub the actual Bedrock call so tests are hermetic.
|
|
426
|
+
client.beta.messages.create = AsyncMock()
|
|
427
|
+
return client
|
|
428
|
+
|
|
429
|
+
@pytest.mark.asyncio
|
|
430
|
+
async def test_init(self, bedrock_client: AsyncAnthropicBedrock) -> None:
|
|
431
|
+
"""Test agent initialization."""
|
|
432
|
+
agent = ClaudeAgent.create(
|
|
433
|
+
model_client=bedrock_client,
|
|
434
|
+
model="test-model-arn",
|
|
435
|
+
validate_api_key=False,
|
|
436
|
+
)
|
|
226
437
|
|
|
227
|
-
|
|
228
|
-
|
|
438
|
+
assert agent.model_name == "Claude"
|
|
439
|
+
assert agent.config.model == "test-model-arn"
|
|
440
|
+
assert agent.anthropic_client == bedrock_client
|
|
229
441
|
|
|
230
442
|
@pytest.mark.asyncio
|
|
231
|
-
async def
|
|
232
|
-
|
|
233
|
-
|
|
443
|
+
async def test_get_response_bedrock_uses_create_not_stream(
|
|
444
|
+
self, bedrock_client: AsyncAnthropicBedrock
|
|
445
|
+
) -> None:
|
|
446
|
+
"""Bedrock path must call messages.create() (Bedrock doesn't support stream())."""
|
|
234
447
|
with patch("hud.settings.settings.telemetry_enabled", False):
|
|
235
|
-
agent = ClaudeAgent(
|
|
236
|
-
|
|
237
|
-
|
|
238
|
-
validate_api_key=False,
|
|
448
|
+
agent = ClaudeAgent.create(
|
|
449
|
+
model_client=bedrock_client,
|
|
450
|
+
model="test-model-arn",
|
|
451
|
+
validate_api_key=False,
|
|
239
452
|
)
|
|
240
453
|
|
|
454
|
+
# Enable computer tool to verify betas list includes computer-use in Bedrock mode.
|
|
455
|
+
agent.has_computer_tool = True
|
|
456
|
+
|
|
241
457
|
mock_response = MagicMock()
|
|
242
|
-
# Create text block
|
|
243
458
|
text_block = MagicMock()
|
|
244
459
|
text_block.type = "text"
|
|
245
|
-
text_block.text = "
|
|
460
|
+
text_block.text = "Hello from Bedrock"
|
|
246
461
|
mock_response.content = [text_block]
|
|
247
|
-
|
|
248
|
-
|
|
462
|
+
|
|
463
|
+
bedrock_client.beta.messages.create.return_value = mock_response # type: ignore[union-attr]
|
|
249
464
|
|
|
250
465
|
messages = [
|
|
251
466
|
cast(
|
|
@@ -255,95 +470,47 @@ class TestClaudeAgent:
|
|
|
255
470
|
]
|
|
256
471
|
response = await agent.get_response(messages)
|
|
257
472
|
|
|
258
|
-
assert response.content == "
|
|
473
|
+
assert response.content == "Hello from Bedrock"
|
|
259
474
|
assert response.tool_calls == []
|
|
260
475
|
|
|
476
|
+
# Bedrock-specific behavior: uses create() and appends assistant message directly.
|
|
477
|
+
assert not hasattr(bedrock_client.beta.messages, "stream")
|
|
478
|
+
bedrock_client.beta.messages.create.assert_awaited_once() # type: ignore[union-attr]
|
|
479
|
+
assert len(messages) == 2
|
|
480
|
+
assert messages[-1]["role"] == "assistant"
|
|
481
|
+
|
|
482
|
+
# Ensure the Bedrock call shape is stable.
|
|
483
|
+
_, kwargs = bedrock_client.beta.messages.create.call_args # type: ignore[union-attr]
|
|
484
|
+
assert kwargs["model"] == "test-model-arn"
|
|
485
|
+
assert kwargs["tool_choice"] == {"type": "auto", "disable_parallel_tool_use": True}
|
|
486
|
+
assert "fine-grained-tool-streaming-2025-05-14" in kwargs["betas"]
|
|
487
|
+
assert "computer-use-2025-01-24" in kwargs["betas"]
|
|
488
|
+
|
|
261
489
|
@pytest.mark.asyncio
|
|
262
|
-
async def
|
|
263
|
-
|
|
264
|
-
|
|
490
|
+
async def test_get_response_bedrock_missing_boto3_raises_value_error(
|
|
491
|
+
self, bedrock_client: AsyncAnthropicBedrock
|
|
492
|
+
) -> None:
|
|
493
|
+
"""If boto3 isn't installed, Bedrock client import path should raise a clear ValueError."""
|
|
265
494
|
with patch("hud.settings.settings.telemetry_enabled", False):
|
|
266
|
-
agent = ClaudeAgent(
|
|
267
|
-
|
|
268
|
-
|
|
269
|
-
validate_api_key=False,
|
|
270
|
-
)
|
|
271
|
-
|
|
272
|
-
# Mock API error
|
|
273
|
-
mock_anthropic.beta.messages.create = AsyncMock(
|
|
274
|
-
side_effect=BadRequestError(
|
|
275
|
-
message="Invalid request",
|
|
276
|
-
response=MagicMock(status_code=400),
|
|
277
|
-
body={"error": {"message": "Invalid request"}},
|
|
278
|
-
)
|
|
495
|
+
agent = ClaudeAgent.create(
|
|
496
|
+
model_client=bedrock_client,
|
|
497
|
+
model="test-model-arn",
|
|
498
|
+
validate_api_key=False,
|
|
279
499
|
)
|
|
280
500
|
|
|
501
|
+
bedrock_client.beta.messages.create.side_effect = ModuleNotFoundError("boto3") # type: ignore[union-attr]
|
|
281
502
|
messages = [{"role": "user", "content": [{"type": "text", "text": "Hi"}]}]
|
|
282
503
|
|
|
283
|
-
with pytest.raises(
|
|
504
|
+
with pytest.raises(ValueError, match=r"boto3 is required for AWS Bedrock"):
|
|
284
505
|
await agent.get_response(messages) # type: ignore
|
|
285
506
|
|
|
286
|
-
|
|
287
|
-
|
|
288
|
-
|
|
289
|
-
|
|
290
|
-
|
|
291
|
-
|
|
292
|
-
|
|
293
|
-
|
|
294
|
-
|
|
295
|
-
|
|
296
|
-
# agent._available_tools = [
|
|
297
|
-
# types.Tool(
|
|
298
|
-
# name="calculator", description="Calculator", inputSchema={"type": "object"}
|
|
299
|
-
# )
|
|
300
|
-
# ]
|
|
301
|
-
# agent._tool_map = {
|
|
302
|
-
# "calculator": types.Tool(
|
|
303
|
-
# name="calculator", description="Calculator", inputSchema={"type": "object"}
|
|
304
|
-
# )
|
|
305
|
-
# }
|
|
306
|
-
|
|
307
|
-
# # Mock initial response with tool use
|
|
308
|
-
# initial_response = MagicMock()
|
|
309
|
-
# # Create tool use block
|
|
310
|
-
# tool_block = MagicMock()
|
|
311
|
-
# tool_block.type = "tool_use"
|
|
312
|
-
# tool_block.id = "calc_123"
|
|
313
|
-
# tool_block.name = "calculator"
|
|
314
|
-
# tool_block.input = {"operation": "add", "a": 2, "b": 3}
|
|
315
|
-
# initial_response.content = [tool_block]
|
|
316
|
-
# initial_response.usage = MagicMock(input_tokens=10, output_tokens=15)
|
|
317
|
-
|
|
318
|
-
# # Mock follow-up response
|
|
319
|
-
# final_response = MagicMock()
|
|
320
|
-
# text_block = MagicMock()
|
|
321
|
-
# text_block.type = "text"
|
|
322
|
-
# text_block.text = "2 + 3 = 5"
|
|
323
|
-
# final_response.content = [text_block]
|
|
324
|
-
# final_response.usage = MagicMock(input_tokens=20, output_tokens=10)
|
|
325
|
-
|
|
326
|
-
# mock_anthropic.beta.messages.create = AsyncMock(
|
|
327
|
-
# side_effect=[initial_response, final_response]
|
|
328
|
-
# )
|
|
329
|
-
|
|
330
|
-
# # Mock tool execution
|
|
331
|
-
# mock_mcp_client.call_tool = AsyncMock(
|
|
332
|
-
# return_value=MCPToolResult(
|
|
333
|
-
# content=[types.TextContent(type="text", text="5")], isError=False
|
|
334
|
-
# )
|
|
335
|
-
# )
|
|
336
|
-
|
|
337
|
-
# # Mock the mcp_client properties
|
|
338
|
-
# mock_mcp_client.mcp_config = {"test_server": {"url": "http://localhost"}}
|
|
339
|
-
# mock_mcp_client.list_tools = AsyncMock(return_value=agent._available_tools)
|
|
340
|
-
# mock_mcp_client.initialize = AsyncMock()
|
|
341
|
-
|
|
342
|
-
# # Initialize the agent
|
|
343
|
-
# await agent.initialize()
|
|
344
|
-
|
|
345
|
-
# # Use a string prompt instead of a task
|
|
346
|
-
# result = await agent.run("What is 2 + 3?")
|
|
347
|
-
|
|
348
|
-
# assert result.content == "2 + 3 = 5"
|
|
349
|
-
# assert result.done is True
|
|
507
|
+
def test_init_with_bedrock_client_does_not_require_anthropic_api_key(
|
|
508
|
+
self, bedrock_client: AsyncAnthropicBedrock
|
|
509
|
+
) -> None:
|
|
510
|
+
"""Providing model_client should bypass ANTHROPIC_API_KEY validation."""
|
|
511
|
+
with patch("hud.settings.settings.anthropic_api_key", None):
|
|
512
|
+
agent = ClaudeAgent.create(
|
|
513
|
+
model_client=bedrock_client,
|
|
514
|
+
validate_api_key=False,
|
|
515
|
+
)
|
|
516
|
+
assert agent.anthropic_client == bedrock_client
|