hud-python 0.3.4__py3-none-any.whl → 0.4.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of hud-python might be problematic. Click here for more details.

Files changed (192) hide show
  1. hud/__init__.py +22 -89
  2. hud/agents/__init__.py +17 -0
  3. hud/agents/art.py +101 -0
  4. hud/agents/base.py +599 -0
  5. hud/{mcp → agents}/claude.py +373 -321
  6. hud/{mcp → agents}/langchain.py +250 -250
  7. hud/agents/misc/__init__.py +7 -0
  8. hud/{agent → agents}/misc/response_agent.py +80 -80
  9. hud/{mcp → agents}/openai.py +352 -334
  10. hud/agents/openai_chat_generic.py +154 -0
  11. hud/{mcp → agents}/tests/__init__.py +1 -1
  12. hud/agents/tests/test_base.py +742 -0
  13. hud/agents/tests/test_claude.py +324 -0
  14. hud/{mcp → agents}/tests/test_client.py +363 -324
  15. hud/{mcp → agents}/tests/test_openai.py +237 -238
  16. hud/cli/__init__.py +617 -0
  17. hud/cli/__main__.py +8 -0
  18. hud/cli/analyze.py +371 -0
  19. hud/cli/analyze_metadata.py +230 -0
  20. hud/cli/build.py +427 -0
  21. hud/cli/clone.py +185 -0
  22. hud/cli/cursor.py +92 -0
  23. hud/cli/debug.py +392 -0
  24. hud/cli/docker_utils.py +83 -0
  25. hud/cli/init.py +281 -0
  26. hud/cli/interactive.py +353 -0
  27. hud/cli/mcp_server.py +756 -0
  28. hud/cli/pull.py +336 -0
  29. hud/cli/push.py +379 -0
  30. hud/cli/remote_runner.py +311 -0
  31. hud/cli/runner.py +160 -0
  32. hud/cli/tests/__init__.py +3 -0
  33. hud/cli/tests/test_analyze.py +284 -0
  34. hud/cli/tests/test_cli_init.py +265 -0
  35. hud/cli/tests/test_cli_main.py +27 -0
  36. hud/cli/tests/test_clone.py +142 -0
  37. hud/cli/tests/test_cursor.py +253 -0
  38. hud/cli/tests/test_debug.py +453 -0
  39. hud/cli/tests/test_mcp_server.py +139 -0
  40. hud/cli/tests/test_utils.py +388 -0
  41. hud/cli/utils.py +263 -0
  42. hud/clients/README.md +143 -0
  43. hud/clients/__init__.py +16 -0
  44. hud/clients/base.py +354 -0
  45. hud/clients/fastmcp.py +202 -0
  46. hud/clients/mcp_use.py +278 -0
  47. hud/clients/tests/__init__.py +1 -0
  48. hud/clients/tests/test_client_integration.py +111 -0
  49. hud/clients/tests/test_fastmcp.py +342 -0
  50. hud/clients/tests/test_protocol.py +188 -0
  51. hud/clients/utils/__init__.py +1 -0
  52. hud/clients/utils/retry_transport.py +160 -0
  53. hud/datasets.py +322 -192
  54. hud/misc/__init__.py +1 -0
  55. hud/{agent → misc}/claude_plays_pokemon.py +292 -283
  56. hud/otel/__init__.py +35 -0
  57. hud/otel/collector.py +142 -0
  58. hud/otel/config.py +164 -0
  59. hud/otel/context.py +536 -0
  60. hud/otel/exporters.py +366 -0
  61. hud/otel/instrumentation.py +97 -0
  62. hud/otel/processors.py +118 -0
  63. hud/otel/tests/__init__.py +1 -0
  64. hud/otel/tests/test_processors.py +197 -0
  65. hud/server/__init__.py +5 -5
  66. hud/server/context.py +114 -0
  67. hud/server/helper/__init__.py +5 -0
  68. hud/server/low_level.py +132 -0
  69. hud/server/server.py +166 -0
  70. hud/server/tests/__init__.py +3 -0
  71. hud/settings.py +73 -79
  72. hud/shared/__init__.py +5 -0
  73. hud/{exceptions.py → shared/exceptions.py} +180 -180
  74. hud/{server → shared}/requests.py +264 -264
  75. hud/shared/tests/test_exceptions.py +157 -0
  76. hud/{server → shared}/tests/test_requests.py +275 -275
  77. hud/telemetry/__init__.py +25 -30
  78. hud/telemetry/instrument.py +379 -0
  79. hud/telemetry/job.py +309 -141
  80. hud/telemetry/replay.py +74 -0
  81. hud/telemetry/trace.py +83 -0
  82. hud/tools/__init__.py +33 -34
  83. hud/tools/base.py +365 -65
  84. hud/tools/bash.py +161 -137
  85. hud/tools/computer/__init__.py +15 -13
  86. hud/tools/computer/anthropic.py +437 -414
  87. hud/tools/computer/hud.py +376 -328
  88. hud/tools/computer/openai.py +295 -286
  89. hud/tools/computer/settings.py +82 -0
  90. hud/tools/edit.py +314 -290
  91. hud/tools/executors/__init__.py +30 -30
  92. hud/tools/executors/base.py +539 -532
  93. hud/tools/executors/pyautogui.py +621 -619
  94. hud/tools/executors/tests/__init__.py +1 -1
  95. hud/tools/executors/tests/test_base_executor.py +338 -338
  96. hud/tools/executors/tests/test_pyautogui_executor.py +165 -165
  97. hud/tools/executors/xdo.py +511 -503
  98. hud/tools/{playwright_tool.py → playwright.py} +412 -379
  99. hud/tools/tests/__init__.py +3 -3
  100. hud/tools/tests/test_base.py +282 -0
  101. hud/tools/tests/test_bash.py +158 -152
  102. hud/tools/tests/test_bash_extended.py +197 -0
  103. hud/tools/tests/test_computer.py +425 -52
  104. hud/tools/tests/test_computer_actions.py +34 -34
  105. hud/tools/tests/test_edit.py +259 -240
  106. hud/tools/tests/test_init.py +27 -27
  107. hud/tools/tests/test_playwright_tool.py +183 -183
  108. hud/tools/tests/test_tools.py +145 -157
  109. hud/tools/tests/test_utils.py +156 -156
  110. hud/tools/types.py +72 -0
  111. hud/tools/utils.py +50 -50
  112. hud/types.py +136 -89
  113. hud/utils/__init__.py +10 -16
  114. hud/utils/async_utils.py +65 -0
  115. hud/utils/design.py +168 -0
  116. hud/utils/mcp.py +55 -0
  117. hud/utils/progress.py +149 -149
  118. hud/utils/telemetry.py +66 -66
  119. hud/utils/tests/test_async_utils.py +173 -0
  120. hud/utils/tests/test_init.py +17 -21
  121. hud/utils/tests/test_progress.py +261 -225
  122. hud/utils/tests/test_telemetry.py +82 -37
  123. hud/utils/tests/test_version.py +8 -8
  124. hud/version.py +7 -7
  125. hud_python-0.4.0.dist-info/METADATA +474 -0
  126. hud_python-0.4.0.dist-info/RECORD +132 -0
  127. hud_python-0.4.0.dist-info/entry_points.txt +3 -0
  128. {hud_python-0.3.4.dist-info → hud_python-0.4.0.dist-info}/licenses/LICENSE +21 -21
  129. hud/adapters/__init__.py +0 -8
  130. hud/adapters/claude/__init__.py +0 -5
  131. hud/adapters/claude/adapter.py +0 -180
  132. hud/adapters/claude/tests/__init__.py +0 -1
  133. hud/adapters/claude/tests/test_adapter.py +0 -519
  134. hud/adapters/common/__init__.py +0 -6
  135. hud/adapters/common/adapter.py +0 -178
  136. hud/adapters/common/tests/test_adapter.py +0 -289
  137. hud/adapters/common/types.py +0 -446
  138. hud/adapters/operator/__init__.py +0 -5
  139. hud/adapters/operator/adapter.py +0 -108
  140. hud/adapters/operator/tests/__init__.py +0 -1
  141. hud/adapters/operator/tests/test_adapter.py +0 -370
  142. hud/agent/__init__.py +0 -19
  143. hud/agent/base.py +0 -126
  144. hud/agent/claude.py +0 -271
  145. hud/agent/langchain.py +0 -215
  146. hud/agent/misc/__init__.py +0 -3
  147. hud/agent/operator.py +0 -268
  148. hud/agent/tests/__init__.py +0 -1
  149. hud/agent/tests/test_base.py +0 -202
  150. hud/env/__init__.py +0 -11
  151. hud/env/client.py +0 -35
  152. hud/env/docker_client.py +0 -349
  153. hud/env/environment.py +0 -446
  154. hud/env/local_docker_client.py +0 -358
  155. hud/env/remote_client.py +0 -212
  156. hud/env/remote_docker_client.py +0 -292
  157. hud/gym.py +0 -130
  158. hud/job.py +0 -773
  159. hud/mcp/__init__.py +0 -17
  160. hud/mcp/base.py +0 -631
  161. hud/mcp/client.py +0 -312
  162. hud/mcp/tests/test_base.py +0 -512
  163. hud/mcp/tests/test_claude.py +0 -294
  164. hud/task.py +0 -149
  165. hud/taskset.py +0 -237
  166. hud/telemetry/_trace.py +0 -347
  167. hud/telemetry/context.py +0 -230
  168. hud/telemetry/exporter.py +0 -575
  169. hud/telemetry/instrumentation/__init__.py +0 -3
  170. hud/telemetry/instrumentation/mcp.py +0 -259
  171. hud/telemetry/instrumentation/registry.py +0 -59
  172. hud/telemetry/mcp_models.py +0 -270
  173. hud/telemetry/tests/__init__.py +0 -1
  174. hud/telemetry/tests/test_context.py +0 -210
  175. hud/telemetry/tests/test_trace.py +0 -312
  176. hud/tools/helper/README.md +0 -56
  177. hud/tools/helper/__init__.py +0 -9
  178. hud/tools/helper/mcp_server.py +0 -78
  179. hud/tools/helper/server_initialization.py +0 -115
  180. hud/tools/helper/utils.py +0 -58
  181. hud/trajectory.py +0 -94
  182. hud/utils/agent.py +0 -37
  183. hud/utils/common.py +0 -256
  184. hud/utils/config.py +0 -120
  185. hud/utils/deprecation.py +0 -115
  186. hud/utils/misc.py +0 -53
  187. hud/utils/tests/test_common.py +0 -277
  188. hud/utils/tests/test_config.py +0 -129
  189. hud_python-0.3.4.dist-info/METADATA +0 -284
  190. hud_python-0.3.4.dist-info/RECORD +0 -120
  191. /hud/{adapters/common → shared}/tests/__init__.py +0 -0
  192. {hud_python-0.3.4.dist-info → hud_python-0.4.0.dist-info}/WHEEL +0 -0
hud/agent/operator.py DELETED
@@ -1,268 +0,0 @@
1
- import json
2
- import logging
3
- import os
4
- from typing import Any, Literal, cast
5
-
6
- from openai import AsyncOpenAI
7
- from openai.types.responses import (
8
- ToolParam,
9
- ResponseInputParam,
10
- ResponseInputItemParam,
11
- ResponseOutputMessage,
12
- ResponseComputerToolCall,
13
- ResponseOutputText,
14
- )
15
-
16
- from hud.adapters import Adapter
17
- from hud.agent.base import Agent
18
- from hud.adapters.operator import OperatorAdapter
19
- from hud.types import Gym
20
- from hud.utils.common import Observation
21
- from hud.settings import settings
22
- from hud.adapters.common.types import LogType
23
-
24
- logger = logging.getLogger(__name__)
25
-
26
-
27
- class OperatorAgent(Agent[AsyncOpenAI, dict[str, Any]]):
28
- """
29
- An agent implementation using OpenAI's Computer Use API.
30
-
31
- This agent interacts with HUD environments using OpenAI's Computer Use API
32
- through the OperatorAdapter which converts actions to the format expected by HUD.
33
- """
34
-
35
- transfer_gyms: dict[Gym, Gym] = {"qa": "hud-browser"}
36
-
37
- def __init__(
38
- self,
39
- client: AsyncOpenAI | None = None,
40
- model: str = "computer-use-preview",
41
- environment: Literal["windows", "mac", "linux", "browser"] = "browser",
42
- adapter: Adapter | None = None,
43
- max_iterations: int = 8,
44
- name: str | None = None,
45
- ):
46
- """
47
- Initialize the OperatorAgent.
48
-
49
- Args:
50
- client: The AsyncOpenAI client for API calls (optional, created automatically if not provided)
51
- model: The model to use for computer use
52
- environment: The environment type (windows, mac, linux, browser)
53
- adapter: The adapter to use for preprocessing and postprocessing
54
- max_iterations: Maximum number of iterations for the agent
55
- name: The name of the agent
56
- """
57
- # Initialize client if not provided
58
- if client is None:
59
- # Get API key from settings
60
- api_key = settings.openai_api_key
61
- if not api_key:
62
- raise ValueError(
63
- "OpenAI API key not found in settings or environment variables. Set OPENAI_API_KEY."
64
- )
65
-
66
- # Create asynchronous client
67
- client = AsyncOpenAI(api_key=api_key)
68
-
69
- adapter = adapter or OperatorAdapter()
70
-
71
- if name is None:
72
- name = f"openai-{model}"
73
-
74
- super().__init__(client=client, adapter=adapter, name=name)
75
-
76
- self.model = model
77
- self.environment = environment
78
- self.max_iterations = max_iterations
79
-
80
- # Default dimensions
81
- self.width = 1024
82
- self.height = 768
83
-
84
- # Update dimensions if adapter is provided
85
- if self.adapter:
86
- self.width = self.adapter.agent_width
87
- self.height = self.adapter.agent_height
88
-
89
- # Message history and state tracking
90
- self.last_response_id = None
91
- self.pending_call_id = None
92
- self.initial_prompt = None
93
- self.pending_safety_checks = []
94
-
95
- self.base_system_prompt = """
96
- You are an autonomous computer-using agent. Follow these guidelines:
97
-
98
- 1. Be decisive and complete tasks without asking for confirmation unless absolutely necessary.
99
- 2. If you need user confirmation for safety-critical actions, use the formal safety check mechanism.
100
- 3. Do NOT ask questions like "Should I proceed?" or "Would you like me to continue?" - just proceed with the task.
101
- 4. When you find what you're looking for (e.g., a file to upload), proceed with the action directly.
102
- 5. Only stop when the task is fully complete or if you encounter an error that prevents completion.
103
- 6. Trust that the user wants you to complete the entire task they've requested.
104
-
105
- Remember: You wave been given permission to complete the requested task autonomously.
106
- """
107
-
108
- self.task_run_id = None
109
-
110
- async def fetch_response(self, observation: Observation) -> tuple[list[dict[str, Any]], bool]:
111
- """
112
- Fetch a response from the model based on the observation.
113
-
114
- Args:
115
- observation: The preprocessed observation
116
-
117
- Returns:
118
- tuple[list[dict[str, Any]], bool, list[LogType] | None]: A tuple containing the list of raw actions,
119
- boolean indicating if the agent believes the task is complete.
120
- """
121
- if not self.client:
122
- raise ValueError("Client is required")
123
-
124
- # Define the computer use tool with correct type using cast
125
- computer_tool = cast(
126
- ToolParam,
127
- {
128
- "type": "computer_use_preview",
129
- "display_width": self.width,
130
- "display_height": self.height,
131
- "environment": self.environment,
132
- },
133
- )
134
-
135
- # Process the observation based on whether it's the first one or a response to an action
136
- if self.pending_call_id is None: # and self.last_response_id is None:
137
- # This is the first observation, store and send the prompt
138
- self.initial_prompt = observation.text
139
-
140
- # Create the initial request following the required structure
141
- input_content: list[dict[str, Any]] = [
142
- {"type": "input_text", "text": observation.text or ""}
143
- ]
144
-
145
- # Add screenshot if present
146
- if observation.screenshot:
147
- input_content.append(
148
- {
149
- "type": "input_image",
150
- "image_url": f"data:image/png;base64,{observation.screenshot}",
151
- }
152
- )
153
-
154
- # Structure the input correctly for the API using cast
155
- input_param = cast(ResponseInputParam, [{"role": "user", "content": input_content}])
156
-
157
- response = await self.client.responses.create(
158
- model=self.model,
159
- tools=[computer_tool],
160
- input=input_param,
161
- instructions=self.base_system_prompt,
162
- truncation="auto",
163
- reasoning={"summary": "auto"},
164
- )
165
-
166
- else:
167
- if not observation.screenshot:
168
- logger.warning("No screenshot provided for response to action")
169
- return [], True
170
-
171
- # Create a response to the previous action with the new screenshot
172
- input_param_followup = cast(
173
- ResponseInputParam,
174
- [
175
- cast(
176
- ResponseInputItemParam,
177
- {
178
- "call_id": self.pending_call_id,
179
- "type": "computer_call_output",
180
- "output": {
181
- "type": "input_image",
182
- "image_url": f"data:image/png;base64,{observation.screenshot}",
183
- },
184
- "acknowledged_safety_checks": self.pending_safety_checks,
185
- },
186
- )
187
- ],
188
- )
189
- self.pending_safety_checks = []
190
-
191
- response = await self.client.responses.create(
192
- model=self.model,
193
- previous_response_id=self.last_response_id,
194
- tools=[computer_tool],
195
- input=input_param_followup,
196
- truncation="auto",
197
- )
198
-
199
- # Store the response ID for the next call
200
- self.last_response_id = response.id
201
-
202
- # Process the response to extract actions or final text
203
- actions = []
204
- done = True # Assume done unless a computer call is found
205
- final_text_response = ""
206
-
207
- self.pending_call_id = None
208
-
209
- # Check for computer calls first
210
- computer_calls = [
211
- item
212
- for item in response.output
213
- if isinstance(item, ResponseComputerToolCall) and item.type == "computer_call"
214
- ]
215
-
216
- if computer_calls:
217
- # If computer calls exist, process them and set done=False
218
- done = False
219
- for computer_call in computer_calls:
220
- self.pending_call_id = computer_call.call_id
221
- action = computer_call.action
222
- self.pending_safety_checks = computer_call.pending_safety_checks
223
- actions.append(action.model_dump()) # Convert Pydantic model to dict
224
- # logger.info(f"Computer call action: {action}")
225
- else:
226
- # No computer calls, check for a final text message
227
- # logger.info("No computer call found. Checking for final message.")
228
- # logger.info(response.output)
229
- for item in response.output:
230
- if isinstance(item, ResponseOutputMessage) and item.type == "message":
231
- # Extract text from content blocks within the message
232
- full_text = "".join(
233
- [c.text for c in item.content if isinstance(c, ResponseOutputText)]
234
- )
235
- if full_text:
236
- final_text_response = full_text
237
- # logger.info(f"Final text message: {final_text_response}")
238
- break # Stop after finding the first text message
239
-
240
- # If we found final text, package it as a 'response' action
241
- if final_text_response:
242
- if (
243
- "the task is infeasible" in final_text_response.lower()
244
- ): # Custom action for OSWorld
245
- done = True
246
- actions = [{"type": "custom", "action": "FAIL"}]
247
- else:
248
- actions = [{"type": "response", "text": final_text_response}]
249
- done = True
250
- else:
251
- logger.info("No computer calls and no final text message found.")
252
- # Keep done = True, actions remains empty
253
-
254
- reasoning = ""
255
- for item in response.output:
256
- if item.type == "reasoning" and item.summary:
257
- reasoning += f"Thinking: {item.summary[0].text}\n"
258
- elif item.type == "message":
259
- for content in item.content:
260
- if isinstance(content, ResponseOutputText):
261
- reasoning += f"{content.text}\n"
262
-
263
- # add reasoning to the actions
264
- for action in actions:
265
- action["reasoning"] = reasoning
266
- action["logs"] = response.model_dump() # type: ignore[assignment]
267
-
268
- return actions, done
@@ -1 +0,0 @@
1
- # Tests for hud.agent module
@@ -1,202 +0,0 @@
1
- from __future__ import annotations
2
-
3
- from unittest.mock import MagicMock, patch
4
- from typing import Any
5
-
6
- import pytest
7
-
8
- from hud.agent.base import Agent
9
- from hud.adapters import Adapter
10
- from hud.adapters.common.types import ClickAction, Point
11
- from hud.utils.common import Observation
12
-
13
-
14
- class ConcreteAgent(Agent[Any, dict[str, Any]]):
15
- """Concrete implementation of Agent for testing."""
16
-
17
- def __init__(self, client: Any = None, adapter: Adapter | None = None):
18
- super().__init__(client, adapter)
19
- self.mock_responses = []
20
- self.call_count = 0
21
-
22
- async def fetch_response(self, observation: Observation) -> tuple[list[dict[str, Any]], bool]:
23
- """Mock implementation that returns predefined responses."""
24
- if self.call_count < len(self.mock_responses):
25
- actions, done = self.mock_responses[self.call_count]
26
- self.call_count += 1
27
- return actions, done
28
- return [], True
29
-
30
-
31
- class TestAgentBase:
32
- """Test the base Agent class."""
33
-
34
- @pytest.fixture
35
- def mock_client(self):
36
- """Mock client for testing."""
37
- return MagicMock()
38
-
39
- @pytest.fixture
40
- def mock_adapter(self):
41
- """Mock adapter for testing."""
42
- adapter = MagicMock(spec=Adapter)
43
- adapter.rescale.return_value = "rescaled_screenshot"
44
- adapter.adapt_list.return_value = [ClickAction(point=Point(x=100, y=200))]
45
- return adapter
46
-
47
- @pytest.fixture
48
- def agent_with_adapter(self, mock_client, mock_adapter):
49
- """Agent with both client and adapter."""
50
- return ConcreteAgent(client=mock_client, adapter=mock_adapter)
51
-
52
- @pytest.fixture
53
- def agent_without_adapter(self, mock_client):
54
- """Agent with client but no adapter."""
55
- return ConcreteAgent(client=mock_client, adapter=None)
56
-
57
- def test_init_with_client_and_adapter(self, mock_client, mock_adapter):
58
- """Test agent initialization with client and adapter."""
59
- agent = ConcreteAgent(client=mock_client, adapter=mock_adapter)
60
- assert agent.client == mock_client
61
- assert agent.adapter == mock_adapter
62
-
63
- def test_init_with_none_values(self):
64
- """Test agent initialization with None values."""
65
- agent = ConcreteAgent(client=None, adapter=None)
66
- assert agent.client is None
67
- assert agent.adapter is None
68
-
69
- def test_preprocess_without_adapter(self, agent_without_adapter):
70
- """Test preprocess when no adapter is available."""
71
- observation = Observation(text="test", screenshot="screenshot_data")
72
- result = agent_without_adapter.preprocess(observation)
73
-
74
- # Should return original observation unchanged
75
- assert result == observation
76
- assert result.text == "test"
77
- assert result.screenshot == "screenshot_data"
78
-
79
- def test_preprocess_without_screenshot(self, agent_with_adapter):
80
- """Test preprocess when no screenshot is available."""
81
- observation = Observation(text="test", screenshot=None)
82
- result = agent_with_adapter.preprocess(observation)
83
-
84
- # Should return original observation unchanged
85
- assert result == observation
86
- assert result.text == "test"
87
- assert result.screenshot is None
88
-
89
- def test_preprocess_with_adapter_and_screenshot(self, agent_with_adapter, mock_adapter):
90
- """Test preprocess with adapter and screenshot (covers missing lines 48-55)."""
91
- observation = Observation(text="test", screenshot="original_screenshot")
92
- result = agent_with_adapter.preprocess(observation)
93
-
94
- # Should create new observation with rescaled screenshot
95
- mock_adapter.rescale.assert_called_once_with("original_screenshot")
96
- assert result.text == "test"
97
- assert result.screenshot == "rescaled_screenshot"
98
- # Should be a new object, not the original
99
- assert result is not observation
100
-
101
- def test_postprocess_without_adapter(self, agent_without_adapter):
102
- """Test postprocess when no adapter is available (covers missing lines 82-85)."""
103
- actions = [{"type": "click", "x": 100, "y": 200}]
104
-
105
- with pytest.raises(ValueError, match="Cannot postprocess actions without an adapter"):
106
- agent_without_adapter.postprocess(actions)
107
-
108
- def test_postprocess_with_adapter(self, agent_with_adapter, mock_adapter):
109
- """Test postprocess with adapter."""
110
- actions = [{"type": "click", "x": 100, "y": 200}]
111
- result = agent_with_adapter.postprocess(actions)
112
-
113
- mock_adapter.adapt_list.assert_called_once_with(actions)
114
- assert len(result) == 1
115
- assert isinstance(result[0], ClickAction)
116
-
117
- @pytest.mark.asyncio
118
- async def test_predict_without_verbose(self, agent_with_adapter):
119
- """Test predict method without verbose logging."""
120
- observation = Observation(text="test", screenshot="screenshot")
121
- agent_with_adapter.mock_responses = [([{"type": "click", "x": 100, "y": 200}], False)]
122
-
123
- actions, done = await agent_with_adapter.predict(observation, verbose=False)
124
-
125
- assert len(actions) == 1
126
- assert isinstance(actions[0], ClickAction)
127
- assert done is False
128
-
129
- @pytest.mark.asyncio
130
- @patch("hud.agent.base.logger")
131
- async def test_predict_with_verbose_logging(self, mock_logger, agent_with_adapter):
132
- """Test predict method with verbose logging (covers missing lines 100-116)."""
133
- observation = Observation(text="test", screenshot="screenshot")
134
- agent_with_adapter.mock_responses = [([{"type": "click", "x": 100, "y": 200}], True)]
135
-
136
- actions, done = await agent_with_adapter.predict(observation, verbose=True)
137
-
138
- # Verify verbose logging was called
139
- mock_logger.info.assert_any_call("Predicting action...")
140
- mock_logger.info.assert_any_call("Raw action: %s", [{"type": "click", "x": 100, "y": 200}])
141
-
142
- assert len(actions) == 1
143
- assert isinstance(actions[0], ClickAction)
144
- assert done is True
145
-
146
- @pytest.mark.asyncio
147
- async def test_predict_without_adapter_returns_raw_actions(self, agent_without_adapter):
148
- """Test predict without adapter returns raw actions."""
149
- observation = Observation(text="test", screenshot=None)
150
- raw_actions = [{"type": "click", "x": 100, "y": 200}]
151
- agent_without_adapter.mock_responses = [(raw_actions, True)]
152
-
153
- actions, done = await agent_without_adapter.predict(observation, verbose=False)
154
-
155
- # Should return raw actions, not processed ones
156
- assert actions == raw_actions
157
- assert done is True
158
-
159
- @pytest.mark.asyncio
160
- async def test_predict_with_empty_actions(self, agent_with_adapter):
161
- """Test predict when fetch_response returns empty actions."""
162
- observation = Observation(text="test", screenshot="screenshot")
163
- agent_with_adapter.mock_responses = [([], True)]
164
-
165
- actions, done = await agent_with_adapter.predict(observation, verbose=False)
166
-
167
- # Should return empty actions without calling adapter
168
- assert actions == []
169
- assert done is True
170
-
171
- @pytest.mark.asyncio
172
- async def test_predict_full_pipeline(self, agent_with_adapter, mock_adapter):
173
- """Test the complete predict pipeline with all stages."""
174
- # Set up observation with screenshot that will be rescaled
175
- observation = Observation(text="test input", screenshot="original_screenshot")
176
- raw_actions = [{"type": "click", "x": 150, "y": 250}]
177
- agent_with_adapter.mock_responses = [(raw_actions, False)]
178
-
179
- actions, done = await agent_with_adapter.predict(observation, verbose=True)
180
-
181
- # Verify all stages were called
182
- # Stage 1: Preprocessing
183
- mock_adapter.rescale.assert_called_once_with("original_screenshot")
184
-
185
- # Stage 3: Postprocessing
186
- mock_adapter.adapt_list.assert_called_once_with(raw_actions)
187
-
188
- assert len(actions) == 1
189
- assert isinstance(actions[0], ClickAction)
190
- assert done is False
191
-
192
- @pytest.mark.asyncio
193
- async def test_predict_integration_without_screenshot(self, agent_with_adapter):
194
- """Test predict integration when observation has no screenshot."""
195
- observation = Observation(text="test input", screenshot=None)
196
- raw_actions = [{"type": "response", "text": "Task completed"}]
197
- agent_with_adapter.mock_responses = [(raw_actions, True)]
198
-
199
- actions, done = await agent_with_adapter.predict(observation, verbose=False)
200
-
201
- assert len(actions) == 1
202
- assert done is True
hud/env/__init__.py DELETED
@@ -1,11 +0,0 @@
1
- from __future__ import annotations
2
-
3
- from . import docker_client, environment, local_docker_client, remote_client, remote_docker_client
4
-
5
- __all__ = [
6
- "docker_client",
7
- "environment",
8
- "local_docker_client",
9
- "remote_client",
10
- "remote_docker_client",
11
- ]
hud/env/client.py DELETED
@@ -1,35 +0,0 @@
1
- from __future__ import annotations
2
-
3
- from abc import ABC, abstractmethod
4
- from typing import TYPE_CHECKING, Any
5
-
6
- from pydantic import BaseModel
7
-
8
- if TYPE_CHECKING:
9
- from hud.types import EnvironmentStatus
10
- from hud.utils.config import FunctionConfig
11
-
12
-
13
- class Client(BaseModel, ABC):
14
- """
15
- Base class for all environment clients.
16
- """
17
-
18
- @abstractmethod
19
- async def invoke(self, config: FunctionConfig) -> Any:
20
- """
21
- Invoke the environment with the given config.
22
- """
23
-
24
- @abstractmethod
25
- async def get_status(self) -> EnvironmentStatus:
26
- """
27
- Get the current status of the environment.
28
- """
29
-
30
- @abstractmethod
31
- async def close(self) -> None:
32
- """
33
- Close the environment and clean up any resources.
34
- This method should be called when the environment is no longer needed.
35
- """