PyPI - hud-python - Versions diffs - 0.4.45__py3-none-any.whl → 0.5.1__py3-none-any.whl - Mend

hud-python 0.4.45py3-none-any.whl → 0.5.1py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (274) hide show

hud/__init__.py +27 -7
hud/agents/__init__.py +11 -5
hud/agents/base.py +220 -500
hud/agents/claude.py +200 -240
hud/agents/gemini.py +275 -0
hud/agents/gemini_cua.py +335 -0
hud/agents/grounded_openai.py +98 -100
hud/agents/misc/integration_test_agent.py +51 -20
hud/agents/misc/response_agent.py +41 -36
hud/agents/openai.py +291 -292
hud/agents/{openai_chat_generic.py → openai_chat.py} +80 -34
hud/agents/operator.py +211 -0
hud/agents/tests/conftest.py +133 -0
hud/agents/tests/test_base.py +300 -622
hud/agents/tests/test_base_runtime.py +233 -0
hud/agents/tests/test_claude.py +379 -210
hud/agents/tests/test_client.py +9 -10
hud/agents/tests/test_gemini.py +369 -0
hud/agents/tests/test_grounded_openai_agent.py +65 -50
hud/agents/tests/test_openai.py +376 -140
hud/agents/tests/test_operator.py +362 -0
hud/agents/tests/test_run_eval.py +179 -0
hud/cli/__init__.py +461 -545
hud/cli/analyze.py +43 -5
hud/cli/build.py +664 -110
hud/cli/debug.py +8 -5
hud/cli/dev.py +882 -734
hud/cli/eval.py +782 -668
hud/cli/flows/dev.py +167 -0
hud/cli/flows/init.py +191 -0
hud/cli/flows/tasks.py +153 -56
hud/cli/flows/templates.py +151 -0
hud/cli/flows/tests/__init__.py +1 -0
hud/cli/flows/tests/test_dev.py +126 -0
hud/cli/init.py +60 -58
hud/cli/push.py +29 -11
hud/cli/rft.py +311 -0
hud/cli/rft_status.py +145 -0
hud/cli/tests/test_analyze.py +5 -5
hud/cli/tests/test_analyze_metadata.py +3 -2
hud/cli/tests/test_analyze_module.py +120 -0
hud/cli/tests/test_build.py +108 -6
hud/cli/tests/test_build_failure.py +41 -0
hud/cli/tests/test_build_module.py +50 -0
hud/cli/tests/test_cli_init.py +6 -1
hud/cli/tests/test_cli_more_wrappers.py +30 -0
hud/cli/tests/test_cli_root.py +140 -0
hud/cli/tests/test_convert.py +361 -0
hud/cli/tests/test_debug.py +12 -10
hud/cli/tests/test_dev.py +197 -0
hud/cli/tests/test_eval.py +251 -0
hud/cli/tests/test_eval_bedrock.py +51 -0
hud/cli/tests/test_init.py +124 -0
hud/cli/tests/test_main_module.py +11 -5
hud/cli/tests/test_mcp_server.py +12 -100
hud/cli/tests/test_push_happy.py +74 -0
hud/cli/tests/test_push_wrapper.py +23 -0
hud/cli/tests/test_registry.py +1 -1
hud/cli/tests/test_utils.py +1 -1
hud/cli/{rl → utils}/celebrate.py +14 -12
hud/cli/utils/config.py +18 -1
hud/cli/utils/docker.py +130 -4
hud/cli/utils/env_check.py +9 -9
hud/cli/utils/git.py +136 -0
hud/cli/utils/interactive.py +39 -5
hud/cli/utils/metadata.py +69 -0
hud/cli/utils/runner.py +1 -1
hud/cli/utils/server.py +2 -2
hud/cli/utils/source_hash.py +3 -3
hud/cli/utils/tasks.py +4 -1
hud/cli/utils/tests/__init__.py +0 -0
hud/cli/utils/tests/test_config.py +58 -0
hud/cli/utils/tests/test_docker.py +93 -0
hud/cli/utils/tests/test_docker_hints.py +71 -0
hud/cli/utils/tests/test_env_check.py +74 -0
hud/cli/utils/tests/test_environment.py +42 -0
hud/cli/utils/tests/test_git.py +142 -0
hud/cli/utils/tests/test_interactive_module.py +60 -0
hud/cli/utils/tests/test_local_runner.py +50 -0
hud/cli/utils/tests/test_logging_utils.py +23 -0
hud/cli/utils/tests/test_metadata.py +49 -0
hud/cli/utils/tests/test_package_runner.py +35 -0
hud/cli/utils/tests/test_registry_utils.py +49 -0
hud/cli/utils/tests/test_remote_runner.py +25 -0
hud/cli/utils/tests/test_runner_modules.py +52 -0
hud/cli/utils/tests/test_source_hash.py +36 -0
hud/cli/utils/tests/test_tasks.py +80 -0
hud/cli/utils/version_check.py +258 -0
hud/cli/{rl → utils}/viewer.py +2 -2
hud/clients/README.md +12 -11
hud/clients/__init__.py +4 -3
hud/clients/base.py +166 -26
hud/clients/environment.py +51 -0
hud/clients/fastmcp.py +13 -6
hud/clients/mcp_use.py +40 -15
hud/clients/tests/test_analyze_scenarios.py +206 -0
hud/clients/tests/test_protocol.py +9 -3
hud/datasets/__init__.py +23 -20
hud/datasets/loader.py +327 -0
hud/datasets/runner.py +192 -105
hud/datasets/tests/__init__.py +0 -0
hud/datasets/tests/test_loader.py +221 -0
hud/datasets/tests/test_utils.py +315 -0
hud/datasets/utils.py +270 -90
hud/environment/__init__.py +50 -0
hud/environment/connection.py +206 -0
hud/environment/connectors/__init__.py +33 -0
hud/environment/connectors/base.py +68 -0
hud/environment/connectors/local.py +177 -0
hud/environment/connectors/mcp_config.py +109 -0
hud/environment/connectors/openai.py +101 -0
hud/environment/connectors/remote.py +172 -0
hud/environment/environment.py +694 -0
hud/environment/integrations/__init__.py +45 -0
hud/environment/integrations/adk.py +67 -0
hud/environment/integrations/anthropic.py +196 -0
hud/environment/integrations/gemini.py +92 -0
hud/environment/integrations/langchain.py +82 -0
hud/environment/integrations/llamaindex.py +68 -0
hud/environment/integrations/openai.py +238 -0
hud/environment/mock.py +306 -0
hud/environment/router.py +112 -0
hud/environment/scenarios.py +493 -0
hud/environment/tests/__init__.py +1 -0
hud/environment/tests/test_connection.py +317 -0
hud/environment/tests/test_connectors.py +218 -0
hud/environment/tests/test_environment.py +161 -0
hud/environment/tests/test_integrations.py +257 -0
hud/environment/tests/test_local_connectors.py +201 -0
hud/environment/tests/test_scenarios.py +280 -0
hud/environment/tests/test_tools.py +208 -0
hud/environment/types.py +23 -0
hud/environment/utils/__init__.py +35 -0
hud/environment/utils/formats.py +215 -0
hud/environment/utils/schema.py +171 -0
hud/environment/utils/tool_wrappers.py +113 -0
hud/eval/__init__.py +67 -0
hud/eval/context.py +674 -0
hud/eval/display.py +299 -0
hud/eval/instrument.py +185 -0
hud/eval/manager.py +466 -0
hud/eval/parallel.py +268 -0
hud/eval/task.py +340 -0
hud/eval/tests/__init__.py +1 -0
hud/eval/tests/test_context.py +178 -0
hud/eval/tests/test_eval.py +210 -0
hud/eval/tests/test_manager.py +152 -0
hud/eval/tests/test_parallel.py +168 -0
hud/eval/tests/test_task.py +145 -0
hud/eval/types.py +63 -0
hud/eval/utils.py +183 -0
hud/patches/__init__.py +19 -0
hud/patches/mcp_patches.py +151 -0
hud/patches/warnings.py +54 -0
hud/samples/browser.py +4 -4
hud/server/__init__.py +2 -1
hud/server/low_level.py +2 -1
hud/server/router.py +164 -0
hud/server/server.py +567 -80
hud/server/tests/test_mcp_server_integration.py +11 -11
hud/server/tests/test_mcp_server_more.py +1 -1
hud/server/tests/test_server_extra.py +2 -0
hud/settings.py +45 -3
hud/shared/exceptions.py +36 -10
hud/shared/hints.py +26 -1
hud/shared/requests.py +15 -3
hud/shared/tests/test_exceptions.py +40 -31
hud/shared/tests/test_hints.py +167 -0
hud/telemetry/__init__.py +20 -19
hud/telemetry/exporter.py +201 -0
hud/telemetry/instrument.py +158 -253
hud/telemetry/tests/test_eval_telemetry.py +356 -0
hud/telemetry/tests/test_exporter.py +258 -0
hud/telemetry/tests/test_instrument.py +401 -0
hud/tools/__init__.py +16 -2
hud/tools/apply_patch.py +639 -0
hud/tools/base.py +54 -4
hud/tools/bash.py +2 -2
hud/tools/computer/__init__.py +4 -0
hud/tools/computer/anthropic.py +2 -2
hud/tools/computer/gemini.py +385 -0
hud/tools/computer/hud.py +23 -6
hud/tools/computer/openai.py +20 -21
hud/tools/computer/qwen.py +434 -0
hud/tools/computer/settings.py +37 -0
hud/tools/edit.py +3 -7
hud/tools/executors/base.py +4 -2
hud/tools/executors/pyautogui.py +1 -1
hud/tools/grounding/grounded_tool.py +13 -18
hud/tools/grounding/grounder.py +10 -31
hud/tools/grounding/tests/test_grounded_tool.py +26 -44
hud/tools/jupyter.py +330 -0
hud/tools/playwright.py +18 -3
hud/tools/shell.py +308 -0
hud/tools/tests/test_apply_patch.py +718 -0
hud/tools/tests/test_computer.py +4 -9
hud/tools/tests/test_computer_actions.py +24 -2
hud/tools/tests/test_jupyter_tool.py +181 -0
hud/tools/tests/test_shell.py +596 -0
hud/tools/tests/test_submit.py +85 -0
hud/tools/tests/test_types.py +193 -0
hud/tools/types.py +21 -1
hud/types.py +167 -57
hud/utils/__init__.py +2 -0
hud/utils/env.py +67 -0
hud/utils/hud_console.py +61 -3
hud/utils/mcp.py +15 -58
hud/utils/strict_schema.py +162 -0
hud/utils/tests/test_init.py +1 -2
hud/utils/tests/test_mcp.py +1 -28
hud/utils/tests/test_pretty_errors.py +186 -0
hud/utils/tests/test_tool_shorthand.py +154 -0
hud/utils/tests/test_version.py +1 -1
hud/utils/types.py +20 -0
hud/version.py +1 -1
hud_python-0.5.1.dist-info/METADATA +264 -0
hud_python-0.5.1.dist-info/RECORD +299 -0
{hud_python-0.4.45.dist-info → hud_python-0.5.1.dist-info}/WHEEL +1 -1
hud/agents/langchain.py +0 -261
hud/agents/lite_llm.py +0 -72
hud/cli/rl/__init__.py +0 -180
hud/cli/rl/config.py +0 -101
hud/cli/rl/display.py +0 -133
hud/cli/rl/gpu.py +0 -63
hud/cli/rl/gpu_utils.py +0 -321
hud/cli/rl/local_runner.py +0 -595
hud/cli/rl/presets.py +0 -96
hud/cli/rl/remote_runner.py +0 -463
hud/cli/rl/rl_api.py +0 -150
hud/cli/rl/vllm.py +0 -177
hud/cli/rl/wait_utils.py +0 -89
hud/datasets/parallel.py +0 -687
hud/misc/__init__.py +0 -1
hud/misc/claude_plays_pokemon.py +0 -292
hud/otel/__init__.py +0 -35
hud/otel/collector.py +0 -142
hud/otel/config.py +0 -181
hud/otel/context.py +0 -570
hud/otel/exporters.py +0 -369
hud/otel/instrumentation.py +0 -135
hud/otel/processors.py +0 -121
hud/otel/tests/__init__.py +0 -1
hud/otel/tests/test_processors.py +0 -197
hud/rl/README.md +0 -30
hud/rl/__init__.py +0 -1
hud/rl/actor.py +0 -176
hud/rl/buffer.py +0 -405
hud/rl/chat_template.jinja +0 -101
hud/rl/config.py +0 -192
hud/rl/distributed.py +0 -132
hud/rl/learner.py +0 -637
hud/rl/tests/__init__.py +0 -1
hud/rl/tests/test_learner.py +0 -186
hud/rl/train.py +0 -382
hud/rl/types.py +0 -101
hud/rl/utils/start_vllm_server.sh +0 -30
hud/rl/utils.py +0 -524
hud/rl/vllm_adapter.py +0 -143
hud/telemetry/job.py +0 -352
hud/telemetry/replay.py +0 -74
hud/telemetry/tests/test_replay.py +0 -40
hud/telemetry/tests/test_trace.py +0 -63
hud/telemetry/trace.py +0 -158
hud/utils/agent_factories.py +0 -86
hud/utils/async_utils.py +0 -65
hud/utils/group_eval.py +0 -223
hud/utils/progress.py +0 -149
hud/utils/tasks.py +0 -127
hud/utils/tests/test_async_utils.py +0 -173
hud/utils/tests/test_progress.py +0 -261
hud_python-0.4.45.dist-info/METADATA +0 -552
hud_python-0.4.45.dist-info/RECORD +0 -228
{hud_python-0.4.45.dist-info → hud_python-0.5.1.dist-info}/entry_points.txt +0 -0
{hud_python-0.4.45.dist-info → hud_python-0.5.1.dist-info}/licenses/LICENSE +0 -0

hud/agents/claude.py CHANGED Viewed

@@ -4,37 +4,53 @@ from __future__ import annotations
 import copy
 import logging
-from typing import TYPE_CHECKING, Any, ClassVar, cast
-from anthropic import Anthropic, AsyncAnthropic, BadRequestError
-from anthropic.types.beta import BetaContentBlockParam, BetaImageBlockParam, BetaTextBlockParam
-import hud
-if TYPE_CHECKING:
-    from anthropic.types.beta import (
-        BetaCacheControlEphemeralParam,
-        BetaContentBlockParam,
-        BetaImageBlockParam,
-        BetaMessageParam,
-        BetaTextBlockParam,
-        BetaToolResultBlockParam,
-    )
-    from hud.datasets import Task
+from inspect import cleandoc
+from typing import Any, ClassVar, Literal, cast
 import mcp.types as types
+from anthropic import AsyncAnthropic, AsyncAnthropicBedrock, Omit
+from anthropic.types import CacheControlEphemeralParam
+from anthropic.types.beta import (
+    BetaBase64ImageSourceParam,
+    BetaContentBlockParam,
+    BetaImageBlockParam,
+    BetaMessageParam,
+    BetaTextBlockParam,
+    BetaToolBash20250124Param,
+    BetaToolComputerUse20250124Param,
+    BetaToolParam,
+    BetaToolResultBlockParam,
+    BetaToolTextEditor20250728Param,
+    BetaToolUnionParam,
+)
+from pydantic import ConfigDict
 from hud.settings import settings
 from hud.tools.computer.settings import computer_settings
-from hud.types import AgentResponse, MCPToolCall, MCPToolResult
+from hud.types import AgentResponse, BaseAgentConfig, MCPToolCall, MCPToolResult
 from hud.utils.hud_console import HUDConsole
+from hud.utils.types import with_signature
-from .base import MCPAgent
+from .base import BaseCreateParams, MCPAgent
 logger = logging.getLogger(__name__)
+class ClaudeConfig(BaseAgentConfig):
+    model_config = ConfigDict(arbitrary_types_allowed=True)
+    model_name: str = "Claude"
+    model: str = "claude-sonnet-4-5"
+    model_client: AsyncAnthropic | AsyncAnthropicBedrock | None = None
+    max_tokens: int = 16384
+    use_computer_beta: bool = True
+    validate_api_key: bool = True
+class ClaudeCreateParams(BaseCreateParams, ClaudeConfig):
+    pass
 class ClaudeAgent(MCPAgent):
     """
     Claude agent that uses MCP servers for tool execution.
@@ -43,89 +59,47 @@ class ClaudeAgent(MCPAgent):
     tools through MCP servers instead of direct implementation.
     """
-    metadata: ClassVar[dict[str, Any]] = {
+    metadata: ClassVar[dict[str, Any] | None] = {
         "display_width": computer_settings.ANTHROPIC_COMPUTER_WIDTH,
         "display_height": computer_settings.ANTHROPIC_COMPUTER_HEIGHT,
     }
+    config_cls: ClassVar[type[BaseAgentConfig]] = ClaudeConfig
-    def __init__(
-        self,
-        model_client: AsyncAnthropic | None = None,
-        model: str = "claude-sonnet-4-20250514",
-        max_tokens: int = 4096,
-        use_computer_beta: bool = True,
-        validate_api_key: bool = True,
-        **kwargs: Any,
-    ) -> None:
-        """
-        Initialize Claude MCP agent.
-        Args:
-            model_client: AsyncAnthropic client (created if not provided)
-            model: Claude model to use
-            max_tokens: Maximum tokens for response
-            use_computer_beta: Whether to use computer-use beta features
-            **kwargs: Additional arguments passed to BaseMCPAgent (including mcp_client)
-        """
-        super().__init__(**kwargs)
-        # Initialize client if not provided
+    @with_signature(ClaudeCreateParams)
+    @classmethod
+    def create(cls, **kwargs: Any) -> ClaudeAgent:  # pyright: ignore[reportIncompatibleMethodOverride]
+        return MCPAgent.create.__func__(cls, **kwargs)  # type: ignore[return-value]
+    def __init__(self, params: ClaudeCreateParams | None = None, **kwargs: Any) -> None:
+        super().__init__(params, **kwargs)
+        self.config: ClaudeConfig
+        model_client = self.config.model_client
         if model_client is None:
             api_key = settings.anthropic_api_key
             if not api_key:
                 raise ValueError("Anthropic API key not found. Set ANTHROPIC_API_KEY.")
             model_client = AsyncAnthropic(api_key=api_key)
-        # validate api key if requested
-        if validate_api_key:
-            try:
-                Anthropic(api_key=model_client.api_key).models.list()
-            except Exception as e:
-                raise ValueError(f"Anthropic API key is invalid: {e}") from e
         self.anthropic_client = model_client
-        self.model = model
-        self.max_tokens = max_tokens
-        self.use_computer_beta = use_computer_beta
+        self.max_tokens = self.config.max_tokens
+        self.use_computer_beta = self.config.use_computer_beta
         self.hud_console = HUDConsole(logger=logger)
-        self.model_name = self.model
-        # Track mapping from Claude tool names to MCP tool names
-        self._claude_to_mcp_tool_map: dict[str, str] = {}
-        self.claude_tools: list[dict] = []
-        # Append Claude-specific instructions to the base system prompt
-        claude_instructions = """
-        You are Claude, an AI assistant created by Anthropic. You are helpful, harmless, and honest.
-        When working on tasks:
-        1. Be thorough and systematic in your approach
-        2. Complete tasks autonomously without asking for confirmation
-        3. Use available tools efficiently to accomplish your goals
-        4. Verify your actions and ensure task completion
-        5. Be precise and accurate in all operations
-        Remember: You are expected to complete tasks autonomously. The user trusts you to accomplish what they asked.
-        """.strip()  # noqa: E501
-        # Append Claude instructions to any base system prompt
-        if self.system_prompt:
-            self.system_prompt = f"{self.system_prompt}\n\n{claude_instructions}"
-        else:
-            self.system_prompt = claude_instructions
+        # these will be initialized in _convert_tools_for_claude
+        self.has_computer_tool = False
+        self.tool_mapping: dict[str, str] = {}
+        self.claude_tools: list[BetaToolUnionParam] = []
-    async def initialize(self, task: str | Task | None = None) -> None:
-        """Initialize the agent and build tool mappings."""
-        await super().initialize(task)
-        # Build tool mappings after tools are discovered
+    def _on_tools_ready(self) -> None:
+        """Build Claude-specific tool mappings after tools are discovered."""
         self._convert_tools_for_claude()
-    async def get_system_messages(self) -> list[Any]:
+    async def get_system_messages(self) -> list[BetaMessageParam]:
         """No system messages for Claude because applied in get_response"""
         return []
-    async def format_blocks(self, blocks: list[types.ContentBlock]) -> list[Any]:
+    async def format_blocks(self, blocks: list[types.ContentBlock]) -> list[BetaMessageParam]:
         """Format messages for Claude."""
         # Convert MCP content types to Anthropic content types
         anthropic_blocks: list[BetaContentBlockParam] = []
@@ -134,101 +108,74 @@ class ClaudeAgent(MCPAgent):
             if isinstance(block, types.TextContent):
                 # Only include fields that Anthropic expects
                 anthropic_blocks.append(
-                    cast(
-                        "BetaTextBlockParam",
-                        {
-                            "type": "text",
-                            "text": block.text,
-                        },
+                    BetaTextBlockParam(
+                        type="text",
+                        text=block.text,
                     )
                 )
             elif isinstance(block, types.ImageContent):
                 # Convert MCP ImageContent to Anthropic format
                 anthropic_blocks.append(
-                    cast(
-                        "BetaImageBlockParam",
-                        {
-                            "type": "image",
-                            "source": {
-                                "type": "base64",
-                                "media_type": block.mimeType,
-                                "data": block.data,
-                            },
-                        },
+                    BetaImageBlockParam(
+                        type="image",
+                        source=BetaBase64ImageSourceParam(
+                            type="base64",
+                            media_type=cast(
+                                "Literal['image/jpeg', 'image/png', 'image/gif', 'image/webp']",
+                                block.mimeType,
+                            ),
+                            data=block.data,
+                        ),
                     )
                 )
             else:
-                # For other types, try to cast but log a warning
-                self.hud_console.log(f"Unknown content block type: {type(block)}", level="warning")
-                anthropic_blocks.append(cast("BetaContentBlockParam", block))
+                raise ValueError(f"Unknown content block type: {type(block)}")
-        return [
-            cast(
-                "BetaMessageParam",
-                {
-                    "role": "user",
-                    "content": anthropic_blocks,
-                },
-            )
-        ]
+        return [BetaMessageParam(role="user", content=anthropic_blocks)]
-    @hud.instrument(
-        span_type="agent",
-        record_args=False,  # Messages can be large
-        record_result=True,
-    )
     async def get_response(self, messages: list[BetaMessageParam]) -> AgentResponse:
         """Get response from Claude including any tool calls."""
+        messages_cached = self._add_prompt_caching(messages)
-        # Make API call with retry for prompt length
-        current_messages = messages.copy()
-        while True:
-            messages_cached = self._add_prompt_caching(current_messages)
-            # Build create kwargs
-            create_kwargs = {
-                "model": self.model,
-                "max_tokens": self.max_tokens,
-                "system": self.system_prompt,
-                "messages": messages_cached,
-                "tools": self.claude_tools,
-                "tool_choice": {"type": "auto", "disable_parallel_tool_use": True},
-            }
-            # Add beta features if using computer tools
-            if self.use_computer_beta and any(
-                tool.get("type") == "computer_20250124" for tool in self.claude_tools
-            ):
-                create_kwargs["betas"] = ["computer-use-2025-01-24"]
+        # betas to use
+        betas = ["fine-grained-tool-streaming-2025-05-14"]
+        if self.has_computer_tool:
+            betas.append("computer-use-2025-01-24")
+        # Bedrock doesn't support .stream() - use create(stream=True) instead
+        if isinstance(self.anthropic_client, AsyncAnthropicBedrock):
             try:
-                response = await self.anthropic_client.beta.messages.create(**create_kwargs)
-                break
-            except BadRequestError as e:
-                if (
-                    "prompt is too long" in str(e)
-                    or "request_too_large" in str(e)
-                    or e.status_code == 413
-                ):
-                    self.hud_console.warning("Prompt too long, truncating message history")
-                    # Keep first message and last 20 messages
-                    if len(current_messages) > 21:
-                        current_messages = [current_messages[0], *current_messages[-20:]]
-                    else:
-                        raise
-                else:
-                    raise
-        messages.append(
-            cast(
-                "BetaMessageParam",
-                {
-                    "role": "assistant",
-                    "content": response.content,
-                },
-            )
-        )
+                response = await self.anthropic_client.beta.messages.create(
+                    model=self.config.model,
+                    system=self.system_prompt if self.system_prompt is not None else Omit(),
+                    max_tokens=self.max_tokens,
+                    messages=messages_cached,
+                    tools=self.claude_tools,
+                    tool_choice={"type": "auto", "disable_parallel_tool_use": True},
+                    betas=betas,
+                )
+                messages.append(BetaMessageParam(role="assistant", content=response.content))
+            except ModuleNotFoundError:
+                raise ValueError(
+                    "boto3 is required for AWS Bedrock. Use `pip install hud[bedrock]`"
+                ) from None
+        else:
+            # Regular Anthropic client supports .stream()
+            async with self.anthropic_client.beta.messages.stream(
+                model=self.config.model,
+                system=self.system_prompt if self.system_prompt is not None else Omit(),
+                max_tokens=self.max_tokens,
+                messages=messages_cached,
+                tools=self.claude_tools,
+                tool_choice={"type": "auto", "disable_parallel_tool_use": True},
+                betas=betas,
+            ) as stream:
+                # allow backend to accumulate message content
+                async for _ in stream:
+                    pass
+                # get final message
+                response = await stream.get_final_message()
+                messages.append(BetaMessageParam(role="assistant", content=response.content))
         # Process response
         result = AgentResponse(content="", tool_calls=[], done=True)
@@ -239,29 +186,26 @@ class ClaudeAgent(MCPAgent):
         for block in response.content:
             if block.type == "tool_use":
-                # Map Claude tool name back to MCP tool name
-                mcp_tool_name = self._claude_to_mcp_tool_map.get(block.name, block.name)
-                # Create MCPToolCall object with Claude metadata as extra fields
-                # Pyright will complain but the tool class accepts extra fields
                 tool_call = MCPToolCall(
-                    id=block.id,  # canonical identifier for telemetry
-                    name=mcp_tool_name,
-                    arguments=block.input,
-                    claude_name=block.name,  # type: ignore
+                    id=block.id,
+                    # look up name in tool_mapping if available, otherwise use block name
+                    name=self.tool_mapping.get(block.name, block.name),
+                    arguments=block.input
+                    if isinstance(block.input, dict)
+                    else block.input.__dict__,
                 )
                 result.tool_calls.append(tool_call)
                 result.done = False
             elif block.type == "text":
                 text_content += block.text
             elif hasattr(block, "type") and block.type == "thinking":
-                thinking_content += f"Thinking: {block.thinking}\n"
+                if thinking_content:
+                    thinking_content += "\n"
+                thinking_content += block.thinking
-        # Combine text and thinking for final content
+        result.content = text_content
         if thinking_content:
-            result.content = thinking_content + text_content
-        else:
-            result.content = text_content
+            result.reasoning = thinking_content
         return result
@@ -303,81 +247,92 @@ class ClaudeAgent(MCPAgent):
         # Return as a user message containing all tool results
         return [
-            cast(
-                "BetaMessageParam",
-                {
-                    "role": "user",
-                    "content": user_content,
-                },
+            BetaMessageParam(
+                role="user",
+                content=user_content,
             )
         ]
     async def create_user_message(self, text: str) -> BetaMessageParam:
         """Create a user message in Claude's format."""
-        return cast("BetaMessageParam", {"role": "user", "content": text})
+        return BetaMessageParam(role="user", content=text)
-    def _convert_tools_for_claude(self) -> list[dict]:
-        """Convert MCP tools to Claude tool format."""
-        claude_tools = []
-        self._claude_to_mcp_tool_map = {}  # Reset mapping
+    def _convert_tools_for_claude(self) -> None:
+        """Convert MCP tools to Claude API tools."""
-        # Find computer tool by priority
-        computer_tool_priority = ["anthropic_computer", "computer_anthropic", "computer"]
-        selected_computer_tool = None
+        # First pass: identify all computer tools and find the longest match
+        available_tools = self.get_available_tools()
-        for priority_name in computer_tool_priority:
-            for tool in self._available_tools:
+        # find potential computer tools by priority
+        selected_computer_tool = None
+        computer_tool_names_by_priority = ["anthropic_computer", "computer_anthropic", "computer"]
+        for computer_tool_name in computer_tool_names_by_priority:
+            for tool in available_tools:
                 # Check both exact match and suffix match (for prefixed tools)
-                if tool.name == priority_name or tool.name.endswith(f"_{priority_name}"):
+                if tool.name == computer_tool_name or tool.name.endswith(f"_{computer_tool_name}"):
                     selected_computer_tool = tool
                     break
             if selected_computer_tool:
                 break
-        # Add the selected computer tool if found
-        if selected_computer_tool:
-            claude_tool = {
-                "type": "computer_20250124",
-                "name": "computer",
-                "display_width_px": self.metadata["display_width"],
-                "display_height_px": self.metadata["display_height"],
-            }
-            # Map Claude's "computer" back to the actual MCP tool name
-            self._claude_to_mcp_tool_map["computer"] = selected_computer_tool.name
-            claude_tools.append(claude_tool)
-            self.hud_console.debug(
-                f"Using {selected_computer_tool.name} as computer tool for Claude"
-            )
+        def to_api_tool(tool: types.Tool) -> BetaToolUnionParam | None:
+            if tool.name == "str_replace_based_edit_tool":
+                return BetaToolTextEditor20250728Param(
+                    type="text_editor_20250728",
+                    name="str_replace_based_edit_tool",
+                )
+            if tool.name == "bash":
+                return BetaToolBash20250124Param(
+                    type="bash_20250124",
+                    name="bash",
+                )
+            if selected_computer_tool is not None:
+                if tool.name == selected_computer_tool.name:
+                    return BetaToolComputerUse20250124Param(
+                        type="computer_20250124",
+                        name="computer",
+                        display_number=1,
+                        display_width_px=computer_settings.ANTHROPIC_COMPUTER_WIDTH,
+                        display_height_px=computer_settings.ANTHROPIC_COMPUTER_HEIGHT,
+                    )
+                elif tool.name == "computer":
+                    logger.warning(
+                        "Renamed tool %s to 'computer', dropping original 'computer' tool",
+                        selected_computer_tool.name,
+                    )
+                    return None
+            if tool.description is None or tool.inputSchema is None:
+                raise ValueError(
+                    cleandoc(f"""MCP tool {tool.name} requires both a description and inputSchema.
+                    Add these by:
+                    1. Adding a docstring to your @mcp.tool decorated function for the description
+                    2. Using pydantic Field() annotations on function parameters for the schema
+                    """)
+                )
-        # Add other non-computer tools
-        for tool in self._available_tools:
-            # Skip computer tools (already handled) and lifecycle tools
-            is_computer_tool = any(
-                tool.name == priority_name or tool.name.endswith(f"_{priority_name}")
-                for priority_name in computer_tool_priority
+            return BetaToolParam(
+                name=tool.name,
+                description=tool.description,
+                input_schema=tool.inputSchema,
             )
-            if is_computer_tool or tool.name in self.lifecycle_tools:
-                continue
-            claude_tool = {
-                "name": tool.name,
-                "description": tool.description or f"Execute {tool.name}",
-                "input_schema": tool.inputSchema
-                or {
-                    "type": "object",
-                    "properties": {},
-                },
-            }
-            # Direct mapping for non-computer tools
-            self._claude_to_mcp_tool_map[tool.name] = tool.name
-            claude_tools.append(claude_tool)
-        self.claude_tools = claude_tools
-        return claude_tools
+        self.has_computer_tool = False
+        self.tool_mapping = {}
+        self.claude_tools = []
+        for tool in available_tools:
+            claude_tool = to_api_tool(tool)
+            if claude_tool is None or "name" not in claude_tool:
+                continue
+            if claude_tool["name"] == "computer":
+                self.has_computer_tool = True
+            self.tool_mapping[claude_tool["name"]] = tool.name
+            self.claude_tools.append(claude_tool)
     def _add_prompt_caching(self, messages: list[BetaMessageParam]) -> list[BetaMessageParam]:
         """Add prompt caching to messages."""
         messages_cached = copy.deepcopy(messages)
+        cache_control = CacheControlEphemeralParam(type="ephemeral")
         # Mark last user message with cache control
         if (
@@ -391,20 +346,25 @@ class ClaudeAgent(MCPAgent):
                 for block in last_content:
                     # Only add cache control to dict-like block types that support it
                     if isinstance(block, dict):
-                        block_type = block.get("type")
-                        if block_type in ["text", "image", "tool_use", "tool_result"]:
-                            cache_control: BetaCacheControlEphemeralParam = {"type": "ephemeral"}
-                            block["cache_control"] = cache_control  # type: ignore[reportGeneralTypeIssues]
+                        match block["type"]:
+                            case "redacted_thinking" | "thinking":
+                                pass
+                            case _:
+                                block["cache_control"] = cache_control
         return messages_cached
 def base64_to_content_block(base64: str) -> BetaImageBlockParam:
     """Convert base64 image to Claude content block."""
-    return {
-        "type": "image",
-        "source": {"type": "base64", "media_type": "image/png", "data": base64},
-    }
+    return BetaImageBlockParam(
+        type="image",
+        source=BetaBase64ImageSourceParam(
+            type="base64",
+            media_type="image/png",
+            data=base64,
+        ),
+    )
 def text_to_content_block(text: str) -> BetaTextBlockParam:

hud-python 0.4.45__py3-none-any.whl → 0.5.1__py3-none-any.whl

hud-python 0.4.45py3-none-any.whl → 0.5.1py3-none-any.whl