hud-python 0.4.45__py3-none-any.whl → 0.5.1__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- hud/__init__.py +27 -7
- hud/agents/__init__.py +11 -5
- hud/agents/base.py +220 -500
- hud/agents/claude.py +200 -240
- hud/agents/gemini.py +275 -0
- hud/agents/gemini_cua.py +335 -0
- hud/agents/grounded_openai.py +98 -100
- hud/agents/misc/integration_test_agent.py +51 -20
- hud/agents/misc/response_agent.py +41 -36
- hud/agents/openai.py +291 -292
- hud/agents/{openai_chat_generic.py → openai_chat.py} +80 -34
- hud/agents/operator.py +211 -0
- hud/agents/tests/conftest.py +133 -0
- hud/agents/tests/test_base.py +300 -622
- hud/agents/tests/test_base_runtime.py +233 -0
- hud/agents/tests/test_claude.py +379 -210
- hud/agents/tests/test_client.py +9 -10
- hud/agents/tests/test_gemini.py +369 -0
- hud/agents/tests/test_grounded_openai_agent.py +65 -50
- hud/agents/tests/test_openai.py +376 -140
- hud/agents/tests/test_operator.py +362 -0
- hud/agents/tests/test_run_eval.py +179 -0
- hud/cli/__init__.py +461 -545
- hud/cli/analyze.py +43 -5
- hud/cli/build.py +664 -110
- hud/cli/debug.py +8 -5
- hud/cli/dev.py +882 -734
- hud/cli/eval.py +782 -668
- hud/cli/flows/dev.py +167 -0
- hud/cli/flows/init.py +191 -0
- hud/cli/flows/tasks.py +153 -56
- hud/cli/flows/templates.py +151 -0
- hud/cli/flows/tests/__init__.py +1 -0
- hud/cli/flows/tests/test_dev.py +126 -0
- hud/cli/init.py +60 -58
- hud/cli/push.py +29 -11
- hud/cli/rft.py +311 -0
- hud/cli/rft_status.py +145 -0
- hud/cli/tests/test_analyze.py +5 -5
- hud/cli/tests/test_analyze_metadata.py +3 -2
- hud/cli/tests/test_analyze_module.py +120 -0
- hud/cli/tests/test_build.py +108 -6
- hud/cli/tests/test_build_failure.py +41 -0
- hud/cli/tests/test_build_module.py +50 -0
- hud/cli/tests/test_cli_init.py +6 -1
- hud/cli/tests/test_cli_more_wrappers.py +30 -0
- hud/cli/tests/test_cli_root.py +140 -0
- hud/cli/tests/test_convert.py +361 -0
- hud/cli/tests/test_debug.py +12 -10
- hud/cli/tests/test_dev.py +197 -0
- hud/cli/tests/test_eval.py +251 -0
- hud/cli/tests/test_eval_bedrock.py +51 -0
- hud/cli/tests/test_init.py +124 -0
- hud/cli/tests/test_main_module.py +11 -5
- hud/cli/tests/test_mcp_server.py +12 -100
- hud/cli/tests/test_push_happy.py +74 -0
- hud/cli/tests/test_push_wrapper.py +23 -0
- hud/cli/tests/test_registry.py +1 -1
- hud/cli/tests/test_utils.py +1 -1
- hud/cli/{rl → utils}/celebrate.py +14 -12
- hud/cli/utils/config.py +18 -1
- hud/cli/utils/docker.py +130 -4
- hud/cli/utils/env_check.py +9 -9
- hud/cli/utils/git.py +136 -0
- hud/cli/utils/interactive.py +39 -5
- hud/cli/utils/metadata.py +69 -0
- hud/cli/utils/runner.py +1 -1
- hud/cli/utils/server.py +2 -2
- hud/cli/utils/source_hash.py +3 -3
- hud/cli/utils/tasks.py +4 -1
- hud/cli/utils/tests/__init__.py +0 -0
- hud/cli/utils/tests/test_config.py +58 -0
- hud/cli/utils/tests/test_docker.py +93 -0
- hud/cli/utils/tests/test_docker_hints.py +71 -0
- hud/cli/utils/tests/test_env_check.py +74 -0
- hud/cli/utils/tests/test_environment.py +42 -0
- hud/cli/utils/tests/test_git.py +142 -0
- hud/cli/utils/tests/test_interactive_module.py +60 -0
- hud/cli/utils/tests/test_local_runner.py +50 -0
- hud/cli/utils/tests/test_logging_utils.py +23 -0
- hud/cli/utils/tests/test_metadata.py +49 -0
- hud/cli/utils/tests/test_package_runner.py +35 -0
- hud/cli/utils/tests/test_registry_utils.py +49 -0
- hud/cli/utils/tests/test_remote_runner.py +25 -0
- hud/cli/utils/tests/test_runner_modules.py +52 -0
- hud/cli/utils/tests/test_source_hash.py +36 -0
- hud/cli/utils/tests/test_tasks.py +80 -0
- hud/cli/utils/version_check.py +258 -0
- hud/cli/{rl → utils}/viewer.py +2 -2
- hud/clients/README.md +12 -11
- hud/clients/__init__.py +4 -3
- hud/clients/base.py +166 -26
- hud/clients/environment.py +51 -0
- hud/clients/fastmcp.py +13 -6
- hud/clients/mcp_use.py +40 -15
- hud/clients/tests/test_analyze_scenarios.py +206 -0
- hud/clients/tests/test_protocol.py +9 -3
- hud/datasets/__init__.py +23 -20
- hud/datasets/loader.py +327 -0
- hud/datasets/runner.py +192 -105
- hud/datasets/tests/__init__.py +0 -0
- hud/datasets/tests/test_loader.py +221 -0
- hud/datasets/tests/test_utils.py +315 -0
- hud/datasets/utils.py +270 -90
- hud/environment/__init__.py +50 -0
- hud/environment/connection.py +206 -0
- hud/environment/connectors/__init__.py +33 -0
- hud/environment/connectors/base.py +68 -0
- hud/environment/connectors/local.py +177 -0
- hud/environment/connectors/mcp_config.py +109 -0
- hud/environment/connectors/openai.py +101 -0
- hud/environment/connectors/remote.py +172 -0
- hud/environment/environment.py +694 -0
- hud/environment/integrations/__init__.py +45 -0
- hud/environment/integrations/adk.py +67 -0
- hud/environment/integrations/anthropic.py +196 -0
- hud/environment/integrations/gemini.py +92 -0
- hud/environment/integrations/langchain.py +82 -0
- hud/environment/integrations/llamaindex.py +68 -0
- hud/environment/integrations/openai.py +238 -0
- hud/environment/mock.py +306 -0
- hud/environment/router.py +112 -0
- hud/environment/scenarios.py +493 -0
- hud/environment/tests/__init__.py +1 -0
- hud/environment/tests/test_connection.py +317 -0
- hud/environment/tests/test_connectors.py +218 -0
- hud/environment/tests/test_environment.py +161 -0
- hud/environment/tests/test_integrations.py +257 -0
- hud/environment/tests/test_local_connectors.py +201 -0
- hud/environment/tests/test_scenarios.py +280 -0
- hud/environment/tests/test_tools.py +208 -0
- hud/environment/types.py +23 -0
- hud/environment/utils/__init__.py +35 -0
- hud/environment/utils/formats.py +215 -0
- hud/environment/utils/schema.py +171 -0
- hud/environment/utils/tool_wrappers.py +113 -0
- hud/eval/__init__.py +67 -0
- hud/eval/context.py +674 -0
- hud/eval/display.py +299 -0
- hud/eval/instrument.py +185 -0
- hud/eval/manager.py +466 -0
- hud/eval/parallel.py +268 -0
- hud/eval/task.py +340 -0
- hud/eval/tests/__init__.py +1 -0
- hud/eval/tests/test_context.py +178 -0
- hud/eval/tests/test_eval.py +210 -0
- hud/eval/tests/test_manager.py +152 -0
- hud/eval/tests/test_parallel.py +168 -0
- hud/eval/tests/test_task.py +145 -0
- hud/eval/types.py +63 -0
- hud/eval/utils.py +183 -0
- hud/patches/__init__.py +19 -0
- hud/patches/mcp_patches.py +151 -0
- hud/patches/warnings.py +54 -0
- hud/samples/browser.py +4 -4
- hud/server/__init__.py +2 -1
- hud/server/low_level.py +2 -1
- hud/server/router.py +164 -0
- hud/server/server.py +567 -80
- hud/server/tests/test_mcp_server_integration.py +11 -11
- hud/server/tests/test_mcp_server_more.py +1 -1
- hud/server/tests/test_server_extra.py +2 -0
- hud/settings.py +45 -3
- hud/shared/exceptions.py +36 -10
- hud/shared/hints.py +26 -1
- hud/shared/requests.py +15 -3
- hud/shared/tests/test_exceptions.py +40 -31
- hud/shared/tests/test_hints.py +167 -0
- hud/telemetry/__init__.py +20 -19
- hud/telemetry/exporter.py +201 -0
- hud/telemetry/instrument.py +158 -253
- hud/telemetry/tests/test_eval_telemetry.py +356 -0
- hud/telemetry/tests/test_exporter.py +258 -0
- hud/telemetry/tests/test_instrument.py +401 -0
- hud/tools/__init__.py +16 -2
- hud/tools/apply_patch.py +639 -0
- hud/tools/base.py +54 -4
- hud/tools/bash.py +2 -2
- hud/tools/computer/__init__.py +4 -0
- hud/tools/computer/anthropic.py +2 -2
- hud/tools/computer/gemini.py +385 -0
- hud/tools/computer/hud.py +23 -6
- hud/tools/computer/openai.py +20 -21
- hud/tools/computer/qwen.py +434 -0
- hud/tools/computer/settings.py +37 -0
- hud/tools/edit.py +3 -7
- hud/tools/executors/base.py +4 -2
- hud/tools/executors/pyautogui.py +1 -1
- hud/tools/grounding/grounded_tool.py +13 -18
- hud/tools/grounding/grounder.py +10 -31
- hud/tools/grounding/tests/test_grounded_tool.py +26 -44
- hud/tools/jupyter.py +330 -0
- hud/tools/playwright.py +18 -3
- hud/tools/shell.py +308 -0
- hud/tools/tests/test_apply_patch.py +718 -0
- hud/tools/tests/test_computer.py +4 -9
- hud/tools/tests/test_computer_actions.py +24 -2
- hud/tools/tests/test_jupyter_tool.py +181 -0
- hud/tools/tests/test_shell.py +596 -0
- hud/tools/tests/test_submit.py +85 -0
- hud/tools/tests/test_types.py +193 -0
- hud/tools/types.py +21 -1
- hud/types.py +167 -57
- hud/utils/__init__.py +2 -0
- hud/utils/env.py +67 -0
- hud/utils/hud_console.py +61 -3
- hud/utils/mcp.py +15 -58
- hud/utils/strict_schema.py +162 -0
- hud/utils/tests/test_init.py +1 -2
- hud/utils/tests/test_mcp.py +1 -28
- hud/utils/tests/test_pretty_errors.py +186 -0
- hud/utils/tests/test_tool_shorthand.py +154 -0
- hud/utils/tests/test_version.py +1 -1
- hud/utils/types.py +20 -0
- hud/version.py +1 -1
- hud_python-0.5.1.dist-info/METADATA +264 -0
- hud_python-0.5.1.dist-info/RECORD +299 -0
- {hud_python-0.4.45.dist-info → hud_python-0.5.1.dist-info}/WHEEL +1 -1
- hud/agents/langchain.py +0 -261
- hud/agents/lite_llm.py +0 -72
- hud/cli/rl/__init__.py +0 -180
- hud/cli/rl/config.py +0 -101
- hud/cli/rl/display.py +0 -133
- hud/cli/rl/gpu.py +0 -63
- hud/cli/rl/gpu_utils.py +0 -321
- hud/cli/rl/local_runner.py +0 -595
- hud/cli/rl/presets.py +0 -96
- hud/cli/rl/remote_runner.py +0 -463
- hud/cli/rl/rl_api.py +0 -150
- hud/cli/rl/vllm.py +0 -177
- hud/cli/rl/wait_utils.py +0 -89
- hud/datasets/parallel.py +0 -687
- hud/misc/__init__.py +0 -1
- hud/misc/claude_plays_pokemon.py +0 -292
- hud/otel/__init__.py +0 -35
- hud/otel/collector.py +0 -142
- hud/otel/config.py +0 -181
- hud/otel/context.py +0 -570
- hud/otel/exporters.py +0 -369
- hud/otel/instrumentation.py +0 -135
- hud/otel/processors.py +0 -121
- hud/otel/tests/__init__.py +0 -1
- hud/otel/tests/test_processors.py +0 -197
- hud/rl/README.md +0 -30
- hud/rl/__init__.py +0 -1
- hud/rl/actor.py +0 -176
- hud/rl/buffer.py +0 -405
- hud/rl/chat_template.jinja +0 -101
- hud/rl/config.py +0 -192
- hud/rl/distributed.py +0 -132
- hud/rl/learner.py +0 -637
- hud/rl/tests/__init__.py +0 -1
- hud/rl/tests/test_learner.py +0 -186
- hud/rl/train.py +0 -382
- hud/rl/types.py +0 -101
- hud/rl/utils/start_vllm_server.sh +0 -30
- hud/rl/utils.py +0 -524
- hud/rl/vllm_adapter.py +0 -143
- hud/telemetry/job.py +0 -352
- hud/telemetry/replay.py +0 -74
- hud/telemetry/tests/test_replay.py +0 -40
- hud/telemetry/tests/test_trace.py +0 -63
- hud/telemetry/trace.py +0 -158
- hud/utils/agent_factories.py +0 -86
- hud/utils/async_utils.py +0 -65
- hud/utils/group_eval.py +0 -223
- hud/utils/progress.py +0 -149
- hud/utils/tasks.py +0 -127
- hud/utils/tests/test_async_utils.py +0 -173
- hud/utils/tests/test_progress.py +0 -261
- hud_python-0.4.45.dist-info/METADATA +0 -552
- hud_python-0.4.45.dist-info/RECORD +0 -228
- {hud_python-0.4.45.dist-info → hud_python-0.5.1.dist-info}/entry_points.txt +0 -0
- {hud_python-0.4.45.dist-info → hud_python-0.5.1.dist-info}/licenses/LICENSE +0 -0
hud/agents/tests/test_claude.py
CHANGED
|
@@ -2,11 +2,11 @@
|
|
|
2
2
|
|
|
3
3
|
from __future__ import annotations
|
|
4
4
|
|
|
5
|
-
from typing import TYPE_CHECKING, cast
|
|
5
|
+
from typing import TYPE_CHECKING, Any, cast
|
|
6
6
|
from unittest.mock import AsyncMock, MagicMock, patch
|
|
7
7
|
|
|
8
8
|
import pytest
|
|
9
|
-
from anthropic import
|
|
9
|
+
from anthropic import AsyncAnthropic, AsyncAnthropicBedrock
|
|
10
10
|
from mcp import types
|
|
11
11
|
|
|
12
12
|
from hud.agents.claude import (
|
|
@@ -15,18 +15,96 @@ from hud.agents.claude import (
|
|
|
15
15
|
text_to_content_block,
|
|
16
16
|
tool_use_content_block,
|
|
17
17
|
)
|
|
18
|
+
from hud.environment.router import ToolRouter
|
|
19
|
+
from hud.eval.context import EvalContext
|
|
18
20
|
from hud.types import MCPToolCall, MCPToolResult
|
|
19
21
|
|
|
20
22
|
if TYPE_CHECKING:
|
|
23
|
+
from collections.abc import Generator
|
|
24
|
+
|
|
21
25
|
from anthropic.types.beta import BetaImageBlockParam, BetaMessageParam, BetaTextBlockParam
|
|
22
26
|
|
|
23
27
|
|
|
28
|
+
class MockEvalContext(EvalContext):
|
|
29
|
+
"""Mock EvalContext for testing."""
|
|
30
|
+
|
|
31
|
+
def __init__(self, tools: list[types.Tool] | None = None) -> None:
|
|
32
|
+
# Core attributes
|
|
33
|
+
self.prompt = "Test prompt"
|
|
34
|
+
self._tools = tools or []
|
|
35
|
+
self._submitted: str | None = None
|
|
36
|
+
self.reward: float | None = None
|
|
37
|
+
|
|
38
|
+
# Environment attributes
|
|
39
|
+
self._router = ToolRouter()
|
|
40
|
+
self._agent_include: list[str] | None = None
|
|
41
|
+
self._agent_exclude: list[str] | None = None
|
|
42
|
+
|
|
43
|
+
# EvalContext attributes
|
|
44
|
+
self._task = None
|
|
45
|
+
self.trace_id = "test-trace-id"
|
|
46
|
+
self.eval_name = "test-eval"
|
|
47
|
+
self.job_id: str | None = None
|
|
48
|
+
self.group_id: str | None = None
|
|
49
|
+
self.index = 0
|
|
50
|
+
self.variants: dict[str, Any] = {}
|
|
51
|
+
self.answer: str | None = None
|
|
52
|
+
self.system_prompt: str | None = None
|
|
53
|
+
self.error: BaseException | None = None
|
|
54
|
+
self.metadata: dict[str, Any] = {}
|
|
55
|
+
self.results: list[Any] = []
|
|
56
|
+
self._is_summary = False
|
|
57
|
+
|
|
58
|
+
def as_tools(self) -> list[types.Tool]:
|
|
59
|
+
return self._tools
|
|
60
|
+
|
|
61
|
+
@property
|
|
62
|
+
def has_scenario(self) -> bool:
|
|
63
|
+
return False
|
|
64
|
+
|
|
65
|
+
async def list_tools(self) -> list[types.Tool]:
|
|
66
|
+
return self._tools
|
|
67
|
+
|
|
68
|
+
async def call_tool(self, call: Any, /, **kwargs: Any) -> MCPToolResult:
|
|
69
|
+
return MCPToolResult(
|
|
70
|
+
content=[types.TextContent(type="text", text="ok")],
|
|
71
|
+
isError=False,
|
|
72
|
+
)
|
|
73
|
+
|
|
74
|
+
async def submit(self, answer: str) -> None:
|
|
75
|
+
self._submitted = answer
|
|
76
|
+
|
|
77
|
+
|
|
78
|
+
class MockStreamContextManager:
|
|
79
|
+
"""Mock for Claude's streaming context manager."""
|
|
80
|
+
|
|
81
|
+
def __init__(self, response: MagicMock) -> None:
|
|
82
|
+
self.response = response
|
|
83
|
+
|
|
84
|
+
async def __aenter__(self) -> MockStreamContextManager:
|
|
85
|
+
return self
|
|
86
|
+
|
|
87
|
+
async def __aexit__(
|
|
88
|
+
self, exc_type: type | None, exc_val: Exception | None, exc_tb: Any
|
|
89
|
+
) -> bool:
|
|
90
|
+
return False
|
|
91
|
+
|
|
92
|
+
def __aiter__(self) -> MockStreamContextManager:
|
|
93
|
+
return self
|
|
94
|
+
|
|
95
|
+
async def __anext__(self) -> None:
|
|
96
|
+
raise StopAsyncIteration
|
|
97
|
+
|
|
98
|
+
async def get_final_message(self) -> MagicMock:
|
|
99
|
+
return self.response
|
|
100
|
+
|
|
101
|
+
|
|
24
102
|
class TestClaudeHelperFunctions:
|
|
25
103
|
"""Test helper functions for Claude message formatting."""
|
|
26
104
|
|
|
27
|
-
def test_base64_to_content_block(self):
|
|
105
|
+
def test_base64_to_content_block(self) -> None:
|
|
28
106
|
"""Test base64 image conversion."""
|
|
29
|
-
base64_data = "
|
|
107
|
+
base64_data = "iVBORw0KGgoAAAANSUhEUgAAAAEAAAABCAYAAAAfFcSJAAAADUlEQVR42mNk"
|
|
30
108
|
result = base64_to_content_block(base64_data)
|
|
31
109
|
|
|
32
110
|
assert result["type"] == "image"
|
|
@@ -34,7 +112,7 @@ class TestClaudeHelperFunctions:
|
|
|
34
112
|
assert result["source"]["media_type"] == "image/png"
|
|
35
113
|
assert result["source"]["data"] == base64_data
|
|
36
114
|
|
|
37
|
-
def test_text_to_content_block(self):
|
|
115
|
+
def test_text_to_content_block(self) -> None:
|
|
38
116
|
"""Test text conversion."""
|
|
39
117
|
text = "Hello, world!"
|
|
40
118
|
result = text_to_content_block(text)
|
|
@@ -42,7 +120,7 @@ class TestClaudeHelperFunctions:
|
|
|
42
120
|
assert result["type"] == "text"
|
|
43
121
|
assert result["text"] == text
|
|
44
122
|
|
|
45
|
-
def test_tool_use_content_block(self):
|
|
123
|
+
def test_tool_use_content_block(self) -> None:
|
|
46
124
|
"""Test tool result content block creation."""
|
|
47
125
|
tool_use_id = "tool_123"
|
|
48
126
|
content: list[BetaTextBlockParam | BetaImageBlockParam] = [
|
|
@@ -60,192 +138,331 @@ class TestClaudeAgent:
|
|
|
60
138
|
"""Test ClaudeAgent class."""
|
|
61
139
|
|
|
62
140
|
@pytest.fixture
|
|
63
|
-
def
|
|
64
|
-
"""Create a
|
|
65
|
-
|
|
66
|
-
|
|
67
|
-
|
|
68
|
-
|
|
69
|
-
|
|
70
|
-
"""Create a mock Anthropic client."""
|
|
71
|
-
with patch("hud.agents.claude.AsyncAnthropic") as mock:
|
|
72
|
-
client = AsyncMock()
|
|
73
|
-
# Add beta attribute with messages
|
|
74
|
-
client.beta = AsyncMock()
|
|
75
|
-
client.beta.messages = AsyncMock()
|
|
76
|
-
mock.return_value = client
|
|
77
|
-
yield client
|
|
141
|
+
def mock_anthropic(self) -> Generator[AsyncAnthropic, None, None]: # type: ignore[misc]
|
|
142
|
+
"""Create a stub Anthropic client."""
|
|
143
|
+
with patch("hud.agents.claude.AsyncAnthropic") as mock_class:
|
|
144
|
+
client = MagicMock(spec=AsyncAnthropic)
|
|
145
|
+
client.api_key = "test-key"
|
|
146
|
+
mock_class.return_value = client
|
|
147
|
+
yield client # type: ignore[misc]
|
|
78
148
|
|
|
79
149
|
@pytest.mark.asyncio
|
|
80
|
-
async def
|
|
81
|
-
"""Test agent initialization."""
|
|
82
|
-
|
|
83
|
-
|
|
84
|
-
|
|
85
|
-
|
|
86
|
-
model_client=mock_model_client,
|
|
87
|
-
model="claude-3-opus-20240229",
|
|
88
|
-
max_tokens=1000,
|
|
89
|
-
validate_api_key=False, # Skip validation in tests
|
|
150
|
+
async def test_init_with_client(self, mock_anthropic: AsyncAnthropic) -> None:
|
|
151
|
+
"""Test agent initialization with provided client."""
|
|
152
|
+
agent = ClaudeAgent.create(
|
|
153
|
+
model_client=mock_anthropic,
|
|
154
|
+
model="claude-sonnet-4-20250514",
|
|
155
|
+
validate_api_key=False,
|
|
90
156
|
)
|
|
91
157
|
|
|
92
|
-
assert agent.model_name == "
|
|
93
|
-
assert agent.
|
|
94
|
-
assert agent.anthropic_client ==
|
|
158
|
+
assert agent.model_name == "Claude"
|
|
159
|
+
assert agent.config.model == "claude-sonnet-4-20250514"
|
|
160
|
+
assert agent.anthropic_client == mock_anthropic
|
|
95
161
|
|
|
96
162
|
@pytest.mark.asyncio
|
|
97
|
-
async def
|
|
98
|
-
"""Test agent initialization
|
|
99
|
-
|
|
100
|
-
|
|
101
|
-
|
|
102
|
-
|
|
103
|
-
|
|
104
|
-
|
|
163
|
+
async def test_init_with_parameters(self, mock_anthropic: AsyncAnthropic) -> None:
|
|
164
|
+
"""Test agent initialization with various parameters."""
|
|
165
|
+
agent = ClaudeAgent.create(
|
|
166
|
+
model_client=mock_anthropic,
|
|
167
|
+
model="claude-sonnet-4-20250514",
|
|
168
|
+
max_tokens=4096,
|
|
169
|
+
validate_api_key=False,
|
|
170
|
+
)
|
|
105
171
|
|
|
106
|
-
|
|
107
|
-
assert agent.anthropic_client is not None
|
|
172
|
+
assert agent.max_tokens == 4096
|
|
108
173
|
|
|
109
174
|
@pytest.mark.asyncio
|
|
110
|
-
async def
|
|
111
|
-
"""Test formatting content blocks
|
|
112
|
-
|
|
113
|
-
|
|
114
|
-
|
|
115
|
-
model_client=mock_model_client,
|
|
116
|
-
validate_api_key=False, # Skip validation in tests
|
|
175
|
+
async def test_format_blocks_text_only(self, mock_anthropic: AsyncAnthropic) -> None:
|
|
176
|
+
"""Test formatting text content blocks."""
|
|
177
|
+
agent = ClaudeAgent.create(
|
|
178
|
+
model_client=mock_anthropic,
|
|
179
|
+
validate_api_key=False,
|
|
117
180
|
)
|
|
118
181
|
|
|
119
|
-
|
|
120
|
-
|
|
121
|
-
types.TextContent(type="text", text="
|
|
182
|
+
blocks: list[types.ContentBlock] = [
|
|
183
|
+
types.TextContent(type="text", text="Hello, world!"),
|
|
184
|
+
types.TextContent(type="text", text="How are you?"),
|
|
122
185
|
]
|
|
123
|
-
|
|
186
|
+
|
|
187
|
+
messages = await agent.format_blocks(blocks)
|
|
124
188
|
assert len(messages) == 1
|
|
125
189
|
assert messages[0]["role"] == "user"
|
|
126
190
|
content = messages[0]["content"]
|
|
127
191
|
assert isinstance(content, list)
|
|
128
|
-
assert len(content) ==
|
|
129
|
-
assert content[0]["type"] == "text"
|
|
130
|
-
assert content[0]["text"] == "Hello,
|
|
192
|
+
assert len(content) == 2
|
|
193
|
+
assert content[0]["type"] == "text" # type: ignore[index]
|
|
194
|
+
assert content[0]["text"] == "Hello, world!" # type: ignore[index]
|
|
131
195
|
|
|
132
|
-
|
|
133
|
-
|
|
134
|
-
|
|
196
|
+
@pytest.mark.asyncio
|
|
197
|
+
async def test_format_blocks_with_image(self, mock_anthropic: AsyncAnthropic) -> None:
|
|
198
|
+
"""Test formatting image content blocks."""
|
|
199
|
+
agent = ClaudeAgent.create(
|
|
200
|
+
model_client=mock_anthropic,
|
|
201
|
+
validate_api_key=False,
|
|
202
|
+
)
|
|
203
|
+
|
|
204
|
+
blocks: list[types.ContentBlock] = [
|
|
205
|
+
types.TextContent(type="text", text="Look at this:"),
|
|
135
206
|
types.ImageContent(type="image", data="base64data", mimeType="image/png"),
|
|
136
207
|
]
|
|
137
|
-
|
|
208
|
+
|
|
209
|
+
messages = await agent.format_blocks(blocks)
|
|
138
210
|
assert len(messages) == 1
|
|
139
|
-
assert messages[0]["role"] == "user"
|
|
140
211
|
content = messages[0]["content"]
|
|
141
212
|
assert isinstance(content, list)
|
|
142
213
|
assert len(content) == 2
|
|
143
|
-
|
|
144
|
-
assert content[0]["type"] == "text"
|
|
145
|
-
assert content[0]["text"] == "Look at this"
|
|
146
|
-
assert content[1]["type"] == "image"
|
|
147
|
-
assert content[1]["source"]["data"] == "base64data"
|
|
214
|
+
assert content[1]["type"] == "image" # type: ignore[index]
|
|
148
215
|
|
|
149
216
|
@pytest.mark.asyncio
|
|
150
|
-
async def
|
|
151
|
-
"""Test
|
|
152
|
-
|
|
153
|
-
|
|
154
|
-
|
|
155
|
-
model_client=mock_model_client,
|
|
156
|
-
validate_api_key=False, # Skip validation in tests
|
|
217
|
+
async def test_format_tool_results_text(self, mock_anthropic: AsyncAnthropic) -> None:
|
|
218
|
+
"""Test formatting tool results with text content."""
|
|
219
|
+
agent = ClaudeAgent.create(
|
|
220
|
+
model_client=mock_anthropic,
|
|
221
|
+
validate_api_key=False,
|
|
157
222
|
)
|
|
158
223
|
|
|
159
|
-
tool_calls = [
|
|
160
|
-
MCPToolCall(name="test_tool", arguments={}, id="id1"),
|
|
161
|
-
]
|
|
162
|
-
|
|
224
|
+
tool_calls = [MCPToolCall(id="call_123", name="test_tool", arguments={})]
|
|
163
225
|
tool_results = [
|
|
164
|
-
MCPToolResult(
|
|
226
|
+
MCPToolResult(
|
|
227
|
+
content=[types.TextContent(type="text", text="Tool output")],
|
|
228
|
+
isError=False,
|
|
229
|
+
)
|
|
165
230
|
]
|
|
166
231
|
|
|
167
232
|
messages = await agent.format_tool_results(tool_calls, tool_results)
|
|
168
|
-
|
|
169
|
-
# format_tool_results returns a single user message with tool result content
|
|
170
233
|
assert len(messages) == 1
|
|
171
234
|
assert messages[0]["role"] == "user"
|
|
172
|
-
|
|
173
|
-
content
|
|
235
|
+
content = messages[0]["content"]
|
|
236
|
+
assert isinstance(content, list)
|
|
174
237
|
assert len(content) == 1
|
|
175
|
-
assert content[0]["type"] == "tool_result" # type: ignore
|
|
176
|
-
assert content[0]["tool_use_id"] == "
|
|
177
|
-
|
|
178
|
-
|
|
179
|
-
|
|
180
|
-
|
|
238
|
+
assert content[0]["type"] == "tool_result" # type: ignore[index]
|
|
239
|
+
assert content[0]["tool_use_id"] == "call_123" # type: ignore[index]
|
|
240
|
+
|
|
241
|
+
@pytest.mark.asyncio
|
|
242
|
+
async def test_format_tool_results_with_error(self, mock_anthropic: AsyncAnthropic) -> None:
|
|
243
|
+
"""Test formatting tool results with error."""
|
|
244
|
+
agent = ClaudeAgent.create(
|
|
245
|
+
model_client=mock_anthropic,
|
|
246
|
+
validate_api_key=False,
|
|
247
|
+
)
|
|
248
|
+
|
|
249
|
+
tool_calls = [MCPToolCall(id="call_123", name="test_tool", arguments={})]
|
|
250
|
+
tool_results = [
|
|
251
|
+
MCPToolResult(
|
|
252
|
+
content=[types.TextContent(type="text", text="Error message")],
|
|
253
|
+
isError=True,
|
|
254
|
+
)
|
|
255
|
+
]
|
|
256
|
+
|
|
257
|
+
messages = await agent.format_tool_results(tool_calls, tool_results)
|
|
258
|
+
assert len(messages) == 1
|
|
259
|
+
content = messages[0]["content"]
|
|
260
|
+
# Error content should include "Error:" prefix
|
|
261
|
+
assert any("Error" in str(block) for block in content[0]["content"]) # type: ignore[index]
|
|
262
|
+
|
|
263
|
+
@pytest.mark.asyncio
|
|
264
|
+
async def test_get_system_messages(self, mock_anthropic: AsyncAnthropic) -> None:
|
|
265
|
+
"""Test that system messages return empty (Claude uses system param)."""
|
|
266
|
+
agent = ClaudeAgent.create(
|
|
267
|
+
model_client=mock_anthropic,
|
|
268
|
+
system_prompt="You are a helpful assistant.",
|
|
269
|
+
validate_api_key=False,
|
|
270
|
+
)
|
|
271
|
+
|
|
272
|
+
messages = await agent.get_system_messages()
|
|
273
|
+
# Claude doesn't use system messages in the message list
|
|
274
|
+
assert messages == []
|
|
181
275
|
|
|
182
276
|
@pytest.mark.asyncio
|
|
183
|
-
async def
|
|
184
|
-
"""Test getting model response
|
|
185
|
-
# Disable telemetry for this test to avoid backend configuration issues
|
|
277
|
+
async def test_get_response_with_thinking(self, mock_anthropic: AsyncAnthropic) -> None:
|
|
278
|
+
"""Test getting model response with thinking content."""
|
|
186
279
|
with patch("hud.settings.settings.telemetry_enabled", False):
|
|
187
|
-
agent = ClaudeAgent(
|
|
188
|
-
mcp_client=mock_mcp_client,
|
|
280
|
+
agent = ClaudeAgent.create(
|
|
189
281
|
model_client=mock_anthropic,
|
|
190
|
-
validate_api_key=False,
|
|
282
|
+
validate_api_key=False,
|
|
191
283
|
)
|
|
284
|
+
# Set up agent as initialized
|
|
285
|
+
agent.claude_tools = []
|
|
286
|
+
agent.tool_mapping = {}
|
|
287
|
+
agent.has_computer_tool = False
|
|
288
|
+
agent._initialized = True
|
|
192
289
|
|
|
193
|
-
# Mock the API response
|
|
194
290
|
mock_response = MagicMock()
|
|
195
291
|
|
|
196
|
-
|
|
292
|
+
thinking_block = MagicMock()
|
|
293
|
+
thinking_block.type = "thinking"
|
|
294
|
+
thinking_block.thinking = "Let me analyze this problem..."
|
|
295
|
+
|
|
197
296
|
text_block = MagicMock()
|
|
198
297
|
text_block.type = "text"
|
|
199
|
-
text_block.text = "
|
|
298
|
+
text_block.text = "Here is the answer"
|
|
200
299
|
|
|
201
|
-
|
|
202
|
-
|
|
203
|
-
tool_block.type = "tool_use"
|
|
204
|
-
tool_block.id = "tool_123"
|
|
205
|
-
tool_block.name = "test_tool"
|
|
206
|
-
tool_block.input = {"param": "value"}
|
|
300
|
+
mock_response.content = [thinking_block, text_block]
|
|
301
|
+
mock_response.usage = MagicMock(input_tokens=10, output_tokens=30)
|
|
207
302
|
|
|
208
|
-
|
|
209
|
-
|
|
210
|
-
mock_anthropic.beta.messages.create = AsyncMock(return_value=mock_response)
|
|
303
|
+
mock_stream = MockStreamContextManager(mock_response)
|
|
304
|
+
mock_anthropic.beta.messages.stream = MagicMock(return_value=mock_stream)
|
|
211
305
|
|
|
212
306
|
messages = [
|
|
213
307
|
cast(
|
|
214
308
|
"BetaMessageParam",
|
|
215
|
-
{"role": "user", "content": [{"type": "text", "text": "
|
|
309
|
+
{"role": "user", "content": [{"type": "text", "text": "Hard question"}]},
|
|
216
310
|
)
|
|
217
311
|
]
|
|
218
312
|
response = await agent.get_response(messages)
|
|
219
313
|
|
|
220
|
-
assert response.content == "
|
|
221
|
-
assert
|
|
222
|
-
|
|
223
|
-
|
|
224
|
-
|
|
225
|
-
|
|
314
|
+
assert response.content == "Here is the answer"
|
|
315
|
+
assert response.reasoning == "Let me analyze this problem..."
|
|
316
|
+
|
|
317
|
+
@pytest.mark.asyncio
|
|
318
|
+
async def test_convert_tools_for_claude(self, mock_anthropic: AsyncAnthropic) -> None:
|
|
319
|
+
"""Test converting MCP tools to Claude format."""
|
|
320
|
+
tools = [
|
|
321
|
+
types.Tool(
|
|
322
|
+
name="my_tool",
|
|
323
|
+
description="A test tool",
|
|
324
|
+
inputSchema={"type": "object", "properties": {"x": {"type": "string"}}},
|
|
325
|
+
)
|
|
326
|
+
]
|
|
327
|
+
ctx = MockEvalContext(tools=tools)
|
|
328
|
+
agent = ClaudeAgent.create(
|
|
329
|
+
model_client=mock_anthropic,
|
|
330
|
+
validate_api_key=False,
|
|
331
|
+
)
|
|
332
|
+
|
|
333
|
+
agent.ctx = ctx
|
|
334
|
+
await agent._initialize_from_ctx(ctx)
|
|
226
335
|
|
|
227
|
-
|
|
228
|
-
|
|
336
|
+
# Check that tools were converted
|
|
337
|
+
assert len(agent.claude_tools) == 1
|
|
338
|
+
assert agent.claude_tools[0]["name"] == "my_tool" # type: ignore[typeddict-item]
|
|
229
339
|
|
|
230
340
|
@pytest.mark.asyncio
|
|
231
|
-
async def
|
|
232
|
-
"""Test
|
|
233
|
-
|
|
341
|
+
async def test_computer_tool_detection(self, mock_anthropic: AsyncAnthropic) -> None:
|
|
342
|
+
"""Test that computer tools are detected for beta API."""
|
|
343
|
+
tools = [
|
|
344
|
+
types.Tool(
|
|
345
|
+
name="computer",
|
|
346
|
+
description="Control computer",
|
|
347
|
+
inputSchema={"type": "object"},
|
|
348
|
+
)
|
|
349
|
+
]
|
|
350
|
+
ctx = MockEvalContext(tools=tools)
|
|
351
|
+
agent = ClaudeAgent.create(
|
|
352
|
+
model_client=mock_anthropic,
|
|
353
|
+
validate_api_key=False,
|
|
354
|
+
)
|
|
355
|
+
|
|
356
|
+
agent.ctx = ctx
|
|
357
|
+
await agent._initialize_from_ctx(ctx)
|
|
358
|
+
|
|
359
|
+
assert agent.has_computer_tool is True
|
|
360
|
+
|
|
361
|
+
@pytest.mark.asyncio
|
|
362
|
+
async def test_get_response_with_text(self, mock_anthropic: AsyncAnthropic) -> None:
|
|
363
|
+
"""Test getting response with text output."""
|
|
364
|
+
# Create mock response
|
|
365
|
+
mock_response = MagicMock()
|
|
366
|
+
mock_response.content = [MagicMock(type="text", text="Hello!")]
|
|
367
|
+
|
|
368
|
+
mock_stream = MockStreamContextManager(mock_response)
|
|
369
|
+
mock_anthropic.beta.messages.stream = MagicMock(return_value=mock_stream)
|
|
370
|
+
|
|
371
|
+
agent = ClaudeAgent.create(
|
|
372
|
+
model_client=mock_anthropic,
|
|
373
|
+
validate_api_key=False,
|
|
374
|
+
)
|
|
375
|
+
agent.claude_tools = []
|
|
376
|
+
agent.tool_mapping = {}
|
|
377
|
+
agent.has_computer_tool = False
|
|
378
|
+
agent._initialized = True
|
|
379
|
+
|
|
380
|
+
response = await agent.get_response([])
|
|
381
|
+
assert response.content == "Hello!"
|
|
382
|
+
assert response.done is True
|
|
383
|
+
assert len(response.tool_calls) == 0
|
|
384
|
+
|
|
385
|
+
@pytest.mark.asyncio
|
|
386
|
+
async def test_get_response_with_tool_call(self, mock_anthropic: AsyncAnthropic) -> None:
|
|
387
|
+
"""Test getting response with tool call."""
|
|
388
|
+
mock_tool_use = MagicMock()
|
|
389
|
+
mock_tool_use.type = "tool_use"
|
|
390
|
+
mock_tool_use.id = "call_123"
|
|
391
|
+
mock_tool_use.name = "my_tool"
|
|
392
|
+
mock_tool_use.input = {"x": "value"}
|
|
393
|
+
|
|
394
|
+
mock_response = MagicMock()
|
|
395
|
+
mock_response.content = [mock_tool_use]
|
|
396
|
+
|
|
397
|
+
mock_stream = MockStreamContextManager(mock_response)
|
|
398
|
+
mock_anthropic.beta.messages.stream = MagicMock(return_value=mock_stream)
|
|
399
|
+
|
|
400
|
+
agent = ClaudeAgent.create(
|
|
401
|
+
model_client=mock_anthropic,
|
|
402
|
+
validate_api_key=False,
|
|
403
|
+
)
|
|
404
|
+
agent.claude_tools = []
|
|
405
|
+
agent.tool_mapping = {"my_tool": "my_tool"}
|
|
406
|
+
agent.has_computer_tool = False
|
|
407
|
+
agent._initialized = True
|
|
408
|
+
|
|
409
|
+
response = await agent.get_response([])
|
|
410
|
+
assert response.done is False
|
|
411
|
+
assert len(response.tool_calls) == 1
|
|
412
|
+
assert response.tool_calls[0].name == "my_tool"
|
|
413
|
+
assert response.tool_calls[0].arguments == {"x": "value"}
|
|
414
|
+
|
|
415
|
+
|
|
416
|
+
class TestClaudeAgentBedrock:
|
|
417
|
+
"""Test ClaudeAgent class with Bedrock."""
|
|
418
|
+
|
|
419
|
+
@pytest.fixture
|
|
420
|
+
def bedrock_client(self) -> AsyncAnthropicBedrock:
|
|
421
|
+
"""Create a real AsyncAnthropicBedrock client and stub networked methods."""
|
|
422
|
+
client = AsyncAnthropicBedrock(
|
|
423
|
+
aws_access_key="AKIATEST",
|
|
424
|
+
aws_secret_key="secret",
|
|
425
|
+
aws_region="us-east-1",
|
|
426
|
+
)
|
|
427
|
+
# Stub the actual Bedrock call so tests are hermetic.
|
|
428
|
+
client.beta.messages.create = AsyncMock()
|
|
429
|
+
return client
|
|
430
|
+
|
|
431
|
+
@pytest.mark.asyncio
|
|
432
|
+
async def test_init(self, bedrock_client: AsyncAnthropicBedrock) -> None:
|
|
433
|
+
"""Test agent initialization."""
|
|
434
|
+
agent = ClaudeAgent.create(
|
|
435
|
+
model_client=bedrock_client,
|
|
436
|
+
model="test-model-arn",
|
|
437
|
+
validate_api_key=False,
|
|
438
|
+
)
|
|
439
|
+
|
|
440
|
+
assert agent.model_name == "Claude"
|
|
441
|
+
assert agent.config.model == "test-model-arn"
|
|
442
|
+
assert agent.anthropic_client == bedrock_client
|
|
443
|
+
|
|
444
|
+
@pytest.mark.asyncio
|
|
445
|
+
async def test_get_response_bedrock_uses_create_not_stream(
|
|
446
|
+
self, bedrock_client: AsyncAnthropicBedrock
|
|
447
|
+
) -> None:
|
|
448
|
+
"""Bedrock path must call messages.create() (Bedrock doesn't support stream())."""
|
|
234
449
|
with patch("hud.settings.settings.telemetry_enabled", False):
|
|
235
|
-
agent = ClaudeAgent(
|
|
236
|
-
|
|
237
|
-
|
|
238
|
-
validate_api_key=False,
|
|
450
|
+
agent = ClaudeAgent.create(
|
|
451
|
+
model_client=bedrock_client,
|
|
452
|
+
model="test-model-arn",
|
|
453
|
+
validate_api_key=False,
|
|
239
454
|
)
|
|
240
455
|
|
|
456
|
+
# Enable computer tool to verify betas list includes computer-use in Bedrock mode.
|
|
457
|
+
agent.has_computer_tool = True
|
|
458
|
+
|
|
241
459
|
mock_response = MagicMock()
|
|
242
|
-
# Create text block
|
|
243
460
|
text_block = MagicMock()
|
|
244
461
|
text_block.type = "text"
|
|
245
|
-
text_block.text = "
|
|
462
|
+
text_block.text = "Hello from Bedrock"
|
|
246
463
|
mock_response.content = [text_block]
|
|
247
|
-
|
|
248
|
-
|
|
464
|
+
|
|
465
|
+
bedrock_client.beta.messages.create.return_value = mock_response # type: ignore[union-attr]
|
|
249
466
|
|
|
250
467
|
messages = [
|
|
251
468
|
cast(
|
|
@@ -255,95 +472,47 @@ class TestClaudeAgent:
|
|
|
255
472
|
]
|
|
256
473
|
response = await agent.get_response(messages)
|
|
257
474
|
|
|
258
|
-
assert response.content == "
|
|
475
|
+
assert response.content == "Hello from Bedrock"
|
|
259
476
|
assert response.tool_calls == []
|
|
260
477
|
|
|
478
|
+
# Bedrock-specific behavior: uses create() and appends assistant message directly.
|
|
479
|
+
assert not hasattr(bedrock_client.beta.messages, "stream")
|
|
480
|
+
bedrock_client.beta.messages.create.assert_awaited_once() # type: ignore[union-attr]
|
|
481
|
+
assert len(messages) == 2
|
|
482
|
+
assert messages[-1]["role"] == "assistant"
|
|
483
|
+
|
|
484
|
+
# Ensure the Bedrock call shape is stable.
|
|
485
|
+
_, kwargs = bedrock_client.beta.messages.create.call_args # type: ignore[union-attr]
|
|
486
|
+
assert kwargs["model"] == "test-model-arn"
|
|
487
|
+
assert kwargs["tool_choice"] == {"type": "auto", "disable_parallel_tool_use": True}
|
|
488
|
+
assert "fine-grained-tool-streaming-2025-05-14" in kwargs["betas"]
|
|
489
|
+
assert "computer-use-2025-01-24" in kwargs["betas"]
|
|
490
|
+
|
|
261
491
|
@pytest.mark.asyncio
|
|
262
|
-
async def
|
|
263
|
-
|
|
264
|
-
|
|
492
|
+
async def test_get_response_bedrock_missing_boto3_raises_value_error(
|
|
493
|
+
self, bedrock_client: AsyncAnthropicBedrock
|
|
494
|
+
) -> None:
|
|
495
|
+
"""If boto3 isn't installed, Bedrock client import path should raise a clear ValueError."""
|
|
265
496
|
with patch("hud.settings.settings.telemetry_enabled", False):
|
|
266
|
-
agent = ClaudeAgent(
|
|
267
|
-
|
|
268
|
-
|
|
269
|
-
validate_api_key=False,
|
|
270
|
-
)
|
|
271
|
-
|
|
272
|
-
# Mock API error
|
|
273
|
-
mock_anthropic.beta.messages.create = AsyncMock(
|
|
274
|
-
side_effect=BadRequestError(
|
|
275
|
-
message="Invalid request",
|
|
276
|
-
response=MagicMock(status_code=400),
|
|
277
|
-
body={"error": {"message": "Invalid request"}},
|
|
278
|
-
)
|
|
497
|
+
agent = ClaudeAgent.create(
|
|
498
|
+
model_client=bedrock_client,
|
|
499
|
+
model="test-model-arn",
|
|
500
|
+
validate_api_key=False,
|
|
279
501
|
)
|
|
280
502
|
|
|
503
|
+
bedrock_client.beta.messages.create.side_effect = ModuleNotFoundError("boto3") # type: ignore[union-attr]
|
|
281
504
|
messages = [{"role": "user", "content": [{"type": "text", "text": "Hi"}]}]
|
|
282
505
|
|
|
283
|
-
with pytest.raises(
|
|
506
|
+
with pytest.raises(ValueError, match=r"boto3 is required for AWS Bedrock"):
|
|
284
507
|
await agent.get_response(messages) # type: ignore
|
|
285
508
|
|
|
286
|
-
|
|
287
|
-
|
|
288
|
-
|
|
289
|
-
|
|
290
|
-
|
|
291
|
-
|
|
292
|
-
|
|
293
|
-
|
|
294
|
-
|
|
295
|
-
|
|
296
|
-
# agent._available_tools = [
|
|
297
|
-
# types.Tool(
|
|
298
|
-
# name="calculator", description="Calculator", inputSchema={"type": "object"}
|
|
299
|
-
# )
|
|
300
|
-
# ]
|
|
301
|
-
# agent._tool_map = {
|
|
302
|
-
# "calculator": types.Tool(
|
|
303
|
-
# name="calculator", description="Calculator", inputSchema={"type": "object"}
|
|
304
|
-
# )
|
|
305
|
-
# }
|
|
306
|
-
|
|
307
|
-
# # Mock initial response with tool use
|
|
308
|
-
# initial_response = MagicMock()
|
|
309
|
-
# # Create tool use block
|
|
310
|
-
# tool_block = MagicMock()
|
|
311
|
-
# tool_block.type = "tool_use"
|
|
312
|
-
# tool_block.id = "calc_123"
|
|
313
|
-
# tool_block.name = "calculator"
|
|
314
|
-
# tool_block.input = {"operation": "add", "a": 2, "b": 3}
|
|
315
|
-
# initial_response.content = [tool_block]
|
|
316
|
-
# initial_response.usage = MagicMock(input_tokens=10, output_tokens=15)
|
|
317
|
-
|
|
318
|
-
# # Mock follow-up response
|
|
319
|
-
# final_response = MagicMock()
|
|
320
|
-
# text_block = MagicMock()
|
|
321
|
-
# text_block.type = "text"
|
|
322
|
-
# text_block.text = "2 + 3 = 5"
|
|
323
|
-
# final_response.content = [text_block]
|
|
324
|
-
# final_response.usage = MagicMock(input_tokens=20, output_tokens=10)
|
|
325
|
-
|
|
326
|
-
# mock_anthropic.beta.messages.create = AsyncMock(
|
|
327
|
-
# side_effect=[initial_response, final_response]
|
|
328
|
-
# )
|
|
329
|
-
|
|
330
|
-
# # Mock tool execution
|
|
331
|
-
# mock_mcp_client.call_tool = AsyncMock(
|
|
332
|
-
# return_value=MCPToolResult(
|
|
333
|
-
# content=[types.TextContent(type="text", text="5")], isError=False
|
|
334
|
-
# )
|
|
335
|
-
# )
|
|
336
|
-
|
|
337
|
-
# # Mock the mcp_client properties
|
|
338
|
-
# mock_mcp_client.mcp_config = {"test_server": {"url": "http://localhost"}}
|
|
339
|
-
# mock_mcp_client.list_tools = AsyncMock(return_value=agent._available_tools)
|
|
340
|
-
# mock_mcp_client.initialize = AsyncMock()
|
|
341
|
-
|
|
342
|
-
# # Initialize the agent
|
|
343
|
-
# await agent.initialize()
|
|
344
|
-
|
|
345
|
-
# # Use a string prompt instead of a task
|
|
346
|
-
# result = await agent.run("What is 2 + 3?")
|
|
347
|
-
|
|
348
|
-
# assert result.content == "2 + 3 = 5"
|
|
349
|
-
# assert result.done is True
|
|
509
|
+
def test_init_with_bedrock_client_does_not_require_anthropic_api_key(
|
|
510
|
+
self, bedrock_client: AsyncAnthropicBedrock
|
|
511
|
+
) -> None:
|
|
512
|
+
"""Providing model_client should bypass ANTHROPIC_API_KEY validation."""
|
|
513
|
+
with patch("hud.settings.settings.anthropic_api_key", None):
|
|
514
|
+
agent = ClaudeAgent.create(
|
|
515
|
+
model_client=bedrock_client,
|
|
516
|
+
validate_api_key=False,
|
|
517
|
+
)
|
|
518
|
+
assert agent.anthropic_client == bedrock_client
|