PyPI - hud-python - Versions diffs - 0.2.6__py3-none-any.whl → 0.2.8__py3-none-any.whl - Mend

hud-python 0.2.6py3-none-any.whl → 0.2.8py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.

This version of hud-python might be problematic. Click here for more details.

Files changed (34) hide show

hud/__init__.py +13 -10
hud/adapters/claude/adapter.py +30 -18
hud/adapters/common/adapter.py +0 -1
hud/adapters/common/types.py +129 -4
hud/adapters/operator/adapter.py +23 -13
hud/agent/base.py +5 -4
hud/agent/claude.py +65 -13
hud/agent/claude_plays_pokemon.py +2 -2
hud/agent/langchain.py +8 -2
hud/agent/operator.py +36 -11
hud/agent/tests/test_base.py +2 -2
hud/env/docker_client.py +26 -3
hud/env/environment.py +86 -40
hud/env/local_docker_client.py +50 -4
hud/env/remote_client.py +22 -4
hud/env/remote_docker_client.py +6 -2
hud/gym.py +15 -4
hud/job.py +91 -26
hud/settings.py +6 -0
hud/task.py +84 -6
hud/taskset.py +63 -8
hud/telemetry/exporter.py +4 -6
hud/trajectory.py +3 -0
hud/types.py +28 -2
hud/utils/agent.py +37 -0
hud/utils/common.py +142 -26
hud/utils/config.py +11 -0
hud/utils/tests/test_common.py +225 -0
hud/utils/tests/test_version.py +1 -1
hud/version.py +1 -1
{hud_python-0.2.6.dist-info → hud_python-0.2.8.dist-info}/METADATA +9 -6
{hud_python-0.2.6.dist-info → hud_python-0.2.8.dist-info}/RECORD +34 -33
{hud_python-0.2.6.dist-info → hud_python-0.2.8.dist-info}/WHEEL +0 -0
{hud_python-0.2.6.dist-info → hud_python-0.2.8.dist-info}/licenses/LICENSE +0 -0

hud/__init__.py CHANGED Viewed

@@ -4,8 +4,6 @@ HUD SDK for interacting with the HUD evaluation platform.
 from __future__ import annotations
-import logging
 from . import agent, env, gym, settings, task, taskset, types, utils
 from .adapters import ResponseAction as Response
 from .job import create_job, load_job, run_job
@@ -15,16 +13,21 @@ from .taskset import load_taskset
 from .telemetry import flush, init_telemetry, trace
 from .version import __version__
-init_telemetry()
+if settings.settings.telemetry_enabled:
+    init_telemetry()
+if settings.settings.fancy_logging:
+    import logging
-hud_logger = logging.getLogger("hud")
-hud_logger.setLevel(logging.INFO)
+    hud_logger = logging.getLogger("hud")
+    # TODO: Make this configurable
+    hud_logger.setLevel(logging.INFO)
-if not hud_logger.handlers:
-    handler = logging.StreamHandler()
-    formatter = logging.Formatter("%(asctime)s - %(name)s - %(levelname)s - %(message)s")
-    handler.setFormatter(formatter)
-    hud_logger.addHandler(handler)
+    if not hud_logger.handlers:
+        handler = logging.StreamHandler()
+        formatter = logging.Formatter("[%(levelname)s] %(asctime)s | %(name)s | %(message)s")
+        handler.setFormatter(formatter)
+        hud_logger.addHandler(handler)
 __all__ = [
     "Response",

hud/adapters/claude/adapter.py CHANGED Viewed

@@ -29,12 +29,16 @@ class ClaudeAdapter(Adapter):
         "super_r": "win",
         "right shift": "shift",
         "left shift": "shift",
+        "down shift": "shift",
+        "windows": "win",
+        "page_down": "pagedown",
+        "page_up": "pageup",
     }
-    def __init__(self) -> None:
+    def __init__(self, width: int = 1024, height: int = 768) -> None:
         super().__init__()
-        self.agent_width = 1024  # Claude's preferred width
-        self.agent_height = 768  # Claude's preferred height
+        self.agent_width = width  # Claude's preferred width
+        self.agent_height = height  # Claude's preferred height
     def _map_key(self, key: str) -> CLAKey:
         """Map a key to its standardized form."""
@@ -53,12 +57,13 @@ class ClaudeAdapter(Adapter):
                 if "+" in data["text"]:
                     keys: list[CLAKey] = [self._map_key(k) for k in (data["text"].split("+"))]
                     assert len(keys) > 0
-                    return PressAction(keys=keys)
-                return PressAction(keys=[self._map_key(data["text"])])
+                    converted_action = PressAction(keys=keys)
+                else:
+                    converted_action = PressAction(keys=[self._map_key(data["text"])])
             elif action_type == "type":
                 assert "text" in data
-                return TypeAction(
+                converted_action = TypeAction(
                     text=data["text"],
                     enter_after=False,
                 )
@@ -69,14 +74,14 @@ class ClaudeAdapter(Adapter):
                 coord = data["coordinate"]
                 assert isinstance(coord, list)
                 assert len(coord) == 2
-                return MoveAction(point=Point(x=coord[0], y=coord[1]))
+                converted_action = MoveAction(point=Point(x=coord[0], y=coord[1]))
             elif action_type == "left_click":
                 assert "coordinate" in data
                 coord = data["coordinate"]
                 assert isinstance(coord, list)
                 assert len(coord) == 2
-                return ClickAction(point=Point(x=coord[0], y=coord[1]), button="left")
+                converted_action = ClickAction(point=Point(x=coord[0], y=coord[1]), button="left")
             elif action_type == "left_click_drag":
                 assert "coordinate" in data
@@ -93,28 +98,30 @@ class ClaudeAdapter(Adapter):
                 ):
                     raise ValueError("Left click drag must be preceded by a move or click action")
                 else:
-                    return DragAction(path=[self.memory[-1].point, Point(x=coord[0], y=coord[1])])
+                    converted_action = DragAction(
+                        path=[self.memory[-1].point, Point(x=coord[0], y=coord[1])]
+                    )
             elif action_type == "right_click":
                 assert "coordinate" in data
                 coord = data["coordinate"]
                 assert isinstance(coord, list)
                 assert len(coord) == 2
-                return ClickAction(point=Point(x=coord[0], y=coord[1]), button="right")
+                converted_action = ClickAction(point=Point(x=coord[0], y=coord[1]), button="right")
             elif action_type == "middle_click":
                 assert "coordinate" in data
                 coord = data["coordinate"]
                 assert isinstance(coord, list)
                 assert len(coord) == 2
-                return ClickAction(point=Point(x=coord[0], y=coord[1]), button="middle")
+                converted_action = ClickAction(point=Point(x=coord[0], y=coord[1]), button="middle")
             elif action_type == "double_click":
                 assert "coordinate" in data
                 coord = data["coordinate"]
                 assert isinstance(coord, list)
                 assert len(coord) == 2
-                return ClickAction(
+                converted_action = ClickAction(
                     point=Point(x=coord[0], y=coord[1]), button="left", pattern=[100]
                 )
@@ -123,7 +130,7 @@ class ClaudeAdapter(Adapter):
                 coord = data["coordinate"]
                 assert isinstance(coord, list)
                 assert len(coord) == 2
-                return ClickAction(
+                converted_action = ClickAction(
                     point=Point(x=coord[0], y=coord[1]),
                     button="left",
                     pattern=[100, 100],
@@ -144,25 +151,30 @@ class ClaudeAdapter(Adapter):
                 else:
                     raise ValueError(f"Unsupported scroll direction: {direction}")
-                return ScrollAction(
+                converted_action = ScrollAction(
                     point=Point(x=data["coordinate"][0], y=data["coordinate"][1]),
                     scroll=scroll,
                 )
             elif action_type == "screenshot":
-                return ScreenshotFetch()
+                converted_action = ScreenshotFetch()
             elif action_type == "cursor_position":
-                return PositionFetch()
+                converted_action = PositionFetch()
             elif action_type == "wait":
                 assert "duration" in data
-                return WaitAction(time=data["duration"])
+                converted_action = WaitAction(time=data["duration"])
             elif action_type == "response":
-                return ResponseAction(text=data.get("text", ""))
+                converted_action = ResponseAction(text=data.get("text", ""))
             else:
                 raise ValueError(f"Unsupported action type: {action_type}")
+            converted_action.reasoning = data.get("reasoning", None)
+            converted_action.logs = data.get("logs", None)
+            return converted_action
         except AssertionError:
             raise ValueError(f"Invalid action: {data}") from None

hud/adapters/common/adapter.py CHANGED Viewed

@@ -164,5 +164,4 @@ class Adapter:
     def adapt_list(self, actions: list[Any]) -> list[CLA]:
         if not isinstance(actions, list):
             raise ValueError("Please provide a list of actions")
         return [self.adapt(action) for action in actions]

hud/adapters/common/types.py CHANGED Viewed

@@ -1,13 +1,40 @@
 from __future__ import annotations
-from typing import Annotated, Literal, TypeAlias
+from typing import Annotated, Any, Literal, TypeAlias
 from pydantic import BaseModel, Field
+LogType = str | dict[str, Any] | list[str | dict[str, Any]] | None
+# Helper function to format logs for display
+def _format_logs_for_display(
+    logs: LogType | None = None,
+    reasoning: str | None = None,
+    max_log_len: int = 277,
+) -> str:
+    log_repr = repr(logs)
+    truncated_log = log_repr[:max_log_len] + "..." if len(log_repr) > max_log_len else log_repr
+    return f" │ Reasoning: {reasoning} │ Logs: {truncated_log}"
 # Base class for all actions
 class CLAAction(BaseModel):
     type: str
+    reasoning: str | None = None
+    logs: LogType | None = None
+    def __str__(self) -> str:
+        # Basic representation for actions that don't have a specific override
+        # This base __str__ will NOT include logs by default, subclasses should handle it.
+        attributes = ", ".join(
+            f"{k}='{v}'" if isinstance(v, str) else f"{k}={v}"
+            for k, v in self.model_dump().items()
+            if k != "type" and v is not None and k != "logs" and k != "reasoning"
+        )
+        action_str = f"{self.type.capitalize()}Action ({attributes})"
+        action_str += _format_logs_for_display(self.logs, self.reasoning)
+        return action_str
 # Basic Point model for coordinates
@@ -16,32 +43,59 @@ class Point(BaseModel):
     y: int
-# CLICK ACTION (supports extra options)
+# CLICK ACTION
 class ClickAction(CLAAction):
     type: Literal["click"] = "click"
     point: Point | None = None
     button: CLAButton = "left"
-    pattern: list[int] | None = None  # [delay_1, delay_2, ...]
+    pattern: list[int] | None = None
     hold_keys: list[CLAKey] | None = None
+    def __str__(self) -> str:
+        parts = ["💥 Click"]
+        if self.point:
+            parts.append(f"at ({self.point.x}, {self.point.y})")
+        if self.button != "left":
+            parts.append(f"with {self.button} button")
+        if self.hold_keys:
+            parts.append(f"holding {self.hold_keys}")
+        action_str = " ".join(parts)
+        action_str += _format_logs_for_display(self.logs, self.reasoning)
+        return action_str
 # PRESS ACTION for key presses/hotkeys
 class PressAction(CLAAction):
     type: Literal["press"] = "press"
     keys: list[CLAKey]
+    def __str__(self) -> str:
+        action_str = f"🎹 Press keys: {'+'.join(self.keys)}"
+        action_str += _format_logs_for_display(self.logs, self.reasoning)
+        return action_str
 # KEYDOWN ACTION for key presses/hotkeys
 class KeyDownAction(CLAAction):
     type: Literal["keydown"] = "keydown"
     keys: list[CLAKey]
+    def __str__(self) -> str:
+        action_str = f"👇 KeyDown: {'+'.join(self.keys)}"
+        action_str += _format_logs_for_display(self.logs, self.reasoning)
+        return action_str
 # KEYUP ACTION for key presses/hotkeys
 class KeyUpAction(CLAAction):
     type: Literal["keyup"] = "keyup"
     keys: list[CLAKey]
+    def __str__(self) -> str:
+        action_str = f"👆 KeyUp: {'+'.join(self.keys)}"
+        action_str += _format_logs_for_display(self.logs, self.reasoning)
+        return action_str
 # TYPE ACTION for text typing
 class TypeAction(CLAAction):
@@ -49,6 +103,13 @@ class TypeAction(CLAAction):
     text: str
     enter_after: bool | None = False
+    def __str__(self) -> str:
+        action_str = f'✍️ Type: "{self.text}"'
+        if self.enter_after:
+            action_str += " (and press Enter)"
+        action_str += _format_logs_for_display(self.logs, self.reasoning)
+        return action_str
 # SCROLL ACTION
 class ScrollAction(CLAAction):
@@ -57,6 +118,18 @@ class ScrollAction(CLAAction):
     scroll: Point | None = None
     hold_keys: list[CLAKey] | None = None
+    def __str__(self) -> str:
+        parts = ["📄 Scroll"]
+        if self.point:
+            parts.append(f"at ({self.point.x}, {self.point.y})")
+        if self.scroll:
+            parts.append(f"by ({self.scroll.x}, {self.scroll.y})")
+        if self.hold_keys:  # Added hold_keys for scroll
+            parts.append(f"holding {self.hold_keys}")
+        action_str = " ".join(parts)
+        action_str += _format_logs_for_display(self.logs, self.reasoning)
+        return action_str
 # MOVE ACTION for mouse movement
 class MoveAction(CLAAction):
@@ -64,11 +137,26 @@ class MoveAction(CLAAction):
     point: Point | None = None
     offset: Point | None = None
+    def __str__(self) -> str:
+        parts = ["✨ Move"]
+        if self.point:
+            parts.append(f"to ({self.point.x},{self.point.y})")
+        if self.offset:
+            parts.append(f"by ({self.offset.x},{self.offset.y})")
+        action_str = " ".join(parts)
+        action_str += _format_logs_for_display(self.logs, self.reasoning)
+        return action_str
 # WAIT ACTION
 class WaitAction(CLAAction):
     type: Literal["wait"] = "wait"
-    time: int  # in milliseconds
+    time: int
+    def __str__(self) -> str:
+        action_str = f"💤 Wait for {self.time}ms"
+        action_str += _format_logs_for_display(self.logs, self.reasoning)
+        return action_str
 # DRAG ACTION
@@ -78,26 +166,63 @@ class DragAction(CLAAction):
     pattern: list[int] | None = None  # [delay_1, delay_2, ...]
     hold_keys: list[CLAKey] | None = None
+    def __str__(self) -> str:
+        parts = ["🤏 Drag"]
+        if self.path and len(self.path) > 0:
+            if len(self.path) == 1:
+                parts.append(f"at ({self.path[0].x},{self.path[0].y})")
+            else:
+                parts.append(
+                    f"from ({self.path[0].x}, {self.path[0].y}) to "
+                    f"({self.path[-1].x}, {self.path[-1].y})"
+                )
+        if self.hold_keys:  # Added hold_keys for drag
+            parts.append(f"holding {self.hold_keys}")
+        action_str = " ".join(parts)
+        action_str += _format_logs_for_display(self.logs, self.reasoning)
+        return action_str
 # RESPONSE ACTION from agent
 class ResponseAction(CLAAction):
     type: Literal["response"] = "response"
     text: str  # The final textual response from the agent
+    def __str__(self) -> str:
+        displayed_text = self.text if len(self.text) < 50 else self.text[:47] + "..."
+        action_str = f'💬 Response: "{displayed_text}"'
+        action_str += _format_logs_for_display(self.logs, self.reasoning)
+        return action_str
 # SCREENSHOT ACTION
 class ScreenshotFetch(CLAAction):
     type: Literal["screenshot"] = "screenshot"
+    def __str__(self) -> str:
+        action_str = "📸 Screenshot"
+        action_str += _format_logs_for_display(self.logs, self.reasoning)
+        return action_str
 class PositionFetch(CLAAction):
     type: Literal["position"] = "position"
+    def __str__(self) -> str:
+        action_str = "📍 Position"
+        action_str += _format_logs_for_display(self.logs, self.reasoning)
+        return action_str
 class CustomAction(CLAAction):
     type: Literal["custom"] = "custom"
     action: str
+    def __str__(self) -> str:
+        action_str = f"⚙️ Custom: {self.action}"
+        action_str += _format_logs_for_display(self.logs, self.reasoning)
+        return action_str
 # Union of all possible actions
 CLA = Annotated[

hud/adapters/operator/adapter.py CHANGED Viewed

@@ -26,6 +26,7 @@ class OperatorAdapter(Adapter):
         "arrowdown": "down",
         "arrowleft": "left",
         "arrowright": "right",
+        "cmd": "ctrl",
     }
     BUTTON_MAP: ClassVar[dict[str, CLAButton]] = {"wheel": "middle"}
@@ -49,46 +50,55 @@ class OperatorAdapter(Adapter):
                 x, y = data.get("x", 0), data.get("y", 0)
                 button = data.get("button", "left")
                 button = self.BUTTON_MAP.get(button, button)
-                return ClickAction(point=Point(x=x, y=y), button=button)
+                if button is None:
+                    button = "left"
+                converted_action = ClickAction(point=Point(x=x, y=y), button=button)
             elif action_type == "double_click":
                 x, y = data.get("x", 0), data.get("y", 0)
-                return ClickAction(point=Point(x=x, y=y), button="left", pattern=[100])
+                converted_action = ClickAction(point=Point(x=x, y=y), button="left", pattern=[100])
             elif action_type == "scroll":
-                x, y = data.get("x", 0), data.get("y", 0)
-                scroll_x = data.get("scroll_x", 0)
-                scroll_y = data.get("scroll_y", 0)
-                return ScrollAction(point=Point(x=x, y=y), scroll=Point(x=scroll_x, y=scroll_y))
+                x, y = int(data.get("x", 0)), int(data.get("y", 0))
+                scroll_x = int(data.get("scroll_x", 0))
+                scroll_y = int(data.get("scroll_y", 0))
+                converted_action = ScrollAction(
+                    point=Point(x=x, y=y), scroll=Point(x=scroll_x, y=scroll_y)
+                )
             elif action_type == "type":
                 text = data.get("text", "")
-                return TypeAction(text=text, enter_after=False)
+                converted_action = TypeAction(text=text, enter_after=False)
             elif action_type == "wait":
                 ms = data.get("ms", 1000)
-                return WaitAction(time=ms)
+                converted_action = WaitAction(time=ms)
             elif action_type == "move":
                 x, y = data.get("x", 0), data.get("y", 0)
-                return MoveAction(point=Point(x=x, y=y))
+                converted_action = MoveAction(point=Point(x=x, y=y))
             elif action_type == "keypress":
                 keys = data.get("keys", [])
-                return PressAction(keys=[self._map_key(k) for k in keys])
+                converted_action = PressAction(keys=[self._map_key(k) for k in keys])
             elif action_type == "drag":
                 path = data.get("path", [])
                 points = [Point(x=p.get("x", 0), y=p.get("y", 0)) for p in path]
-                return DragAction(path=points)
+                converted_action = DragAction(path=points)
             elif action_type == "screenshot":
-                return ScreenshotFetch()
+                converted_action = ScreenshotFetch()
             elif action_type == "response":
-                return ResponseAction(text=data.get("text", ""))
+                converted_action = ResponseAction(text=data.get("text", ""))
             else:
                 raise ValueError(f"Unsupported action type: {action_type}")
+            converted_action.reasoning = data.get("reasoning", "")
+            converted_action.logs = data.get("logs", "")
+            return converted_action
         except Exception as e:
             raise ValueError(f"Invalid action: {data}. Error: {e!s}") from e

hud/agent/base.py CHANGED Viewed

@@ -1,5 +1,5 @@
 from abc import ABC, abstractmethod
-from typing import Sequence, TypeVar, Generic
+from typing import Any, Sequence, TypeVar, Generic
 from hud.adapters import Adapter, CLA
 from hud.types import Gym
@@ -31,6 +31,7 @@ class Agent(Generic[ClientT, ActionT], ABC):
         self,
         client: ClientT | None = None,
         adapter: Adapter | None = None,
+        name: str | None = None,
     ):
         """
         Initialize the agent.
@@ -41,6 +42,7 @@ class Agent(Generic[ClientT, ActionT], ABC):
         """
         self.client = client
         self.adapter = adapter
+        self.name = name
     def preprocess(self, observation: Observation) -> Observation:
         """
@@ -70,9 +72,9 @@ class Agent(Generic[ClientT, ActionT], ABC):
             observation: The preprocessed observation
         Returns:
-            tuple[list[ActionT], bool]: A tuple containing the list of raw actions and a
+            tuple[list[ActionT], bool]: A tuple containing the list of raw actions,
                                        boolean indicating if the agent believes it has
-                                       completed the task
+                                       completed the task.
         """
         pass
@@ -82,7 +84,6 @@ class Agent(Generic[ClientT, ActionT], ABC):
         Args:
             actions: The raw actions from the model
         Returns:
             Sequence[CLA]: The actions converted to HUD format
         """

hud/agent/claude.py CHANGED Viewed

@@ -1,13 +1,15 @@
+import copy
 import logging
 from typing import Any, cast
-from anthropic import AsyncAnthropic
+from anthropic import AsyncAnthropic, BadRequestError
 from anthropic.types.beta import (
     BetaMessageParam,
     BetaToolResultBlockParam,
     BetaToolComputerUse20250124Param,
     BetaTextBlockParam,
     BetaImageBlockParam,
+    BetaCacheControlEphemeralParam,
 )
 from hud.adapters import Adapter
@@ -16,6 +18,7 @@ from hud.adapters.claude import ClaudeAdapter
 from hud.types import Gym
 from hud.utils.common import Observation
 from hud.settings import settings
+from hud.adapters.common.types import LogType
 logger = logging.getLogger(__name__)
@@ -63,6 +66,7 @@ class ClaudeAgent(Agent[AsyncAnthropic, Any]):
         model: str = "claude-3-7-sonnet-20250219",
         max_tokens: int = 4096,
         max_iterations: int = 10,
+        name: str | None = None,
     ):
         """
         Initialize the ClaudeAgent.
@@ -73,6 +77,7 @@ class ClaudeAgent(Agent[AsyncAnthropic, Any]):
             model: The Claude model to use
             max_tokens: Maximum tokens for Claude's response
             max_iterations: Maximum number of iterations for the agent
+            name: The name of the agent
         """
         # Initialize client if not provided
         if client is None:
@@ -88,7 +93,10 @@ class ClaudeAgent(Agent[AsyncAnthropic, Any]):
         adapter = adapter or ClaudeAdapter()
-        super().__init__(client=client, adapter=adapter)
+        if name is None:
+            name = model
+        super().__init__(client=client, adapter=adapter, name=name)
         self.model = model
         self.max_tokens = max_tokens
@@ -115,12 +123,15 @@ class ClaudeAgent(Agent[AsyncAnthropic, Any]):
             observation: The preprocessed observation
         Returns:
-            tuple[list[Any], bool]: A tuple containing the list of raw actions and a
-                                   boolean indicating if the agent believes the task is complete
+            tuple[list[Any], bool, list[str | dict[str, Any]] | None]: A tuple containing the list of raw actions,
+                                   boolean indicating if the agent believes the task is complete, and a list of strings or dictionaries of logs.
         """
         if not self.client:
             raise ValueError("Client is required")
+        if not observation.text and not observation.screenshot:
+            raise ValueError("Observation must contain either text or screenshot")
         # Prepare the user content for Claude
         user_content: list[BetaImageBlockParam | BetaTextBlockParam | BetaToolResultBlockParam] = []
@@ -159,15 +170,44 @@ class ClaudeAgent(Agent[AsyncAnthropic, Any]):
             )
         )
-        # Call Claude API using async client
-        response = await self.client.beta.messages.create(
-            model=self.model,
-            max_tokens=self.max_tokens,
-            messages=self.messages,
-            tools=[COMPUTER_TOOL],
-            betas=["computer-use-2025-01-24"],
-            tool_choice={"type": "auto", "disable_parallel_tool_use": True},
-        )
+        # Call Claude API using async client, truncating 50 messages at a time if needed
+        while True:
+            # first, make a copy and add prompt caching to the last message
+            messages_cached = copy.deepcopy(self.messages)
+            # Mark last user message with cache control for prompt caching
+            last_msg = messages_cached[-1]
+            if last_msg.get("role") == "user":
+                last_content = last_msg["content"]
+                if isinstance(last_content, list):
+                    for block in last_content:
+                        if (
+                            not block["type"] == "thinking"
+                            and not block["type"] == "redacted_thinking"
+                        ):
+                            cache_control: BetaCacheControlEphemeralParam = {"type": "ephemeral"}
+                            block["cache_control"] = cache_control
+            try:
+                response = await self.client.beta.messages.create(
+                    model=self.model,
+                    max_tokens=self.max_tokens,
+                    messages=messages_cached,
+                    tools=[COMPUTER_TOOL],
+                    betas=["computer-use-2025-01-24"],
+                    tool_choice={"type": "auto", "disable_parallel_tool_use": True},
+                )
+            except BadRequestError as e:
+                if e.message.startswith("prompt is too long"):
+                    logger.warning(
+                        f"Prompt is too long, removing the first 50 messages except for the first user message: {e.message}"
+                    )
+                    self.messages = [self.messages[0]] + self.messages[50:]
+                    continue
+                else:
+                    raise e
+            # break out of the while loop if we get a response
+            break
         # Add Claude's response to the conversation history
         response_content = response.content
@@ -216,4 +256,16 @@ class ClaudeAgent(Agent[AsyncAnthropic, Any]):
             # logger.info("No tool use and no final text block found.")
             # Keep done = True, actions remains empty
+        reasoning = ""
+        for block in response_content:
+            if block.type == "thinking":
+                reasoning += f"Thinking: {block.thinking}\n"
+            elif block.type == "text":
+                reasoning += block.text
+        # add reasoning to the actions
+        for action in actions:
+            action["reasoning"] = reasoning
+            action["logs"] = response.model_dump()
         return actions, done

hud-python 0.2.6__py3-none-any.whl → 0.2.8__py3-none-any.whl

Potentially problematic release.

hud-python 0.2.6py3-none-any.whl → 0.2.8py3-none-any.whl