hud-python 0.3.5__py3-none-any.whl → 0.4.1__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of hud-python might be problematic. Click here for more details.
- hud/__init__.py +22 -89
- hud/agents/__init__.py +15 -0
- hud/agents/art.py +101 -0
- hud/agents/base.py +599 -0
- hud/{mcp → agents}/claude.py +373 -321
- hud/{mcp → agents}/langchain.py +250 -250
- hud/agents/misc/__init__.py +7 -0
- hud/{agent → agents}/misc/response_agent.py +80 -80
- hud/{mcp → agents}/openai.py +352 -334
- hud/agents/openai_chat_generic.py +154 -0
- hud/{mcp → agents}/tests/__init__.py +1 -1
- hud/agents/tests/test_base.py +742 -0
- hud/agents/tests/test_claude.py +324 -0
- hud/{mcp → agents}/tests/test_client.py +363 -324
- hud/{mcp → agents}/tests/test_openai.py +237 -238
- hud/cli/__init__.py +617 -0
- hud/cli/__main__.py +8 -0
- hud/cli/analyze.py +371 -0
- hud/cli/analyze_metadata.py +230 -0
- hud/cli/build.py +427 -0
- hud/cli/clone.py +185 -0
- hud/cli/cursor.py +92 -0
- hud/cli/debug.py +392 -0
- hud/cli/docker_utils.py +83 -0
- hud/cli/init.py +281 -0
- hud/cli/interactive.py +353 -0
- hud/cli/mcp_server.py +756 -0
- hud/cli/pull.py +336 -0
- hud/cli/push.py +370 -0
- hud/cli/remote_runner.py +311 -0
- hud/cli/runner.py +160 -0
- hud/cli/tests/__init__.py +3 -0
- hud/cli/tests/test_analyze.py +284 -0
- hud/cli/tests/test_cli_init.py +265 -0
- hud/cli/tests/test_cli_main.py +27 -0
- hud/cli/tests/test_clone.py +142 -0
- hud/cli/tests/test_cursor.py +253 -0
- hud/cli/tests/test_debug.py +453 -0
- hud/cli/tests/test_mcp_server.py +139 -0
- hud/cli/tests/test_utils.py +388 -0
- hud/cli/utils.py +263 -0
- hud/clients/README.md +143 -0
- hud/clients/__init__.py +16 -0
- hud/clients/base.py +379 -0
- hud/clients/fastmcp.py +222 -0
- hud/clients/mcp_use.py +278 -0
- hud/clients/tests/__init__.py +1 -0
- hud/clients/tests/test_client_integration.py +111 -0
- hud/clients/tests/test_fastmcp.py +342 -0
- hud/clients/tests/test_protocol.py +188 -0
- hud/clients/utils/__init__.py +1 -0
- hud/clients/utils/retry_transport.py +160 -0
- hud/datasets.py +322 -192
- hud/misc/__init__.py +1 -0
- hud/{agent → misc}/claude_plays_pokemon.py +292 -283
- hud/otel/__init__.py +35 -0
- hud/otel/collector.py +142 -0
- hud/otel/config.py +164 -0
- hud/otel/context.py +536 -0
- hud/otel/exporters.py +366 -0
- hud/otel/instrumentation.py +97 -0
- hud/otel/processors.py +118 -0
- hud/otel/tests/__init__.py +1 -0
- hud/otel/tests/test_processors.py +197 -0
- hud/server/__init__.py +5 -5
- hud/server/context.py +114 -0
- hud/server/helper/__init__.py +5 -0
- hud/server/low_level.py +132 -0
- hud/server/server.py +166 -0
- hud/server/tests/__init__.py +3 -0
- hud/settings.py +73 -79
- hud/shared/__init__.py +5 -0
- hud/{exceptions.py → shared/exceptions.py} +180 -180
- hud/{server → shared}/requests.py +264 -264
- hud/shared/tests/test_exceptions.py +157 -0
- hud/{server → shared}/tests/test_requests.py +275 -275
- hud/telemetry/__init__.py +25 -30
- hud/telemetry/instrument.py +379 -0
- hud/telemetry/job.py +309 -141
- hud/telemetry/replay.py +74 -0
- hud/telemetry/trace.py +83 -0
- hud/tools/__init__.py +33 -34
- hud/tools/base.py +365 -65
- hud/tools/bash.py +161 -137
- hud/tools/computer/__init__.py +15 -13
- hud/tools/computer/anthropic.py +437 -420
- hud/tools/computer/hud.py +376 -334
- hud/tools/computer/openai.py +295 -292
- hud/tools/computer/settings.py +82 -0
- hud/tools/edit.py +314 -290
- hud/tools/executors/__init__.py +30 -30
- hud/tools/executors/base.py +539 -532
- hud/tools/executors/pyautogui.py +621 -619
- hud/tools/executors/tests/__init__.py +1 -1
- hud/tools/executors/tests/test_base_executor.py +338 -338
- hud/tools/executors/tests/test_pyautogui_executor.py +165 -165
- hud/tools/executors/xdo.py +511 -503
- hud/tools/{playwright_tool.py → playwright.py} +412 -379
- hud/tools/tests/__init__.py +3 -3
- hud/tools/tests/test_base.py +282 -0
- hud/tools/tests/test_bash.py +158 -152
- hud/tools/tests/test_bash_extended.py +197 -0
- hud/tools/tests/test_computer.py +425 -52
- hud/tools/tests/test_computer_actions.py +34 -34
- hud/tools/tests/test_edit.py +259 -240
- hud/tools/tests/test_init.py +27 -27
- hud/tools/tests/test_playwright_tool.py +183 -183
- hud/tools/tests/test_tools.py +145 -157
- hud/tools/tests/test_utils.py +156 -156
- hud/tools/types.py +72 -0
- hud/tools/utils.py +50 -50
- hud/types.py +136 -89
- hud/utils/__init__.py +10 -16
- hud/utils/async_utils.py +65 -0
- hud/utils/design.py +168 -0
- hud/utils/mcp.py +55 -0
- hud/utils/progress.py +149 -149
- hud/utils/telemetry.py +66 -66
- hud/utils/tests/test_async_utils.py +173 -0
- hud/utils/tests/test_init.py +17 -21
- hud/utils/tests/test_progress.py +261 -225
- hud/utils/tests/test_telemetry.py +82 -37
- hud/utils/tests/test_version.py +8 -8
- hud/version.py +7 -7
- hud_python-0.4.1.dist-info/METADATA +476 -0
- hud_python-0.4.1.dist-info/RECORD +132 -0
- hud_python-0.4.1.dist-info/entry_points.txt +3 -0
- {hud_python-0.3.5.dist-info → hud_python-0.4.1.dist-info}/licenses/LICENSE +21 -21
- hud/adapters/__init__.py +0 -8
- hud/adapters/claude/__init__.py +0 -5
- hud/adapters/claude/adapter.py +0 -180
- hud/adapters/claude/tests/__init__.py +0 -1
- hud/adapters/claude/tests/test_adapter.py +0 -519
- hud/adapters/common/__init__.py +0 -6
- hud/adapters/common/adapter.py +0 -178
- hud/adapters/common/tests/test_adapter.py +0 -289
- hud/adapters/common/types.py +0 -446
- hud/adapters/operator/__init__.py +0 -5
- hud/adapters/operator/adapter.py +0 -108
- hud/adapters/operator/tests/__init__.py +0 -1
- hud/adapters/operator/tests/test_adapter.py +0 -370
- hud/agent/__init__.py +0 -19
- hud/agent/base.py +0 -126
- hud/agent/claude.py +0 -271
- hud/agent/langchain.py +0 -215
- hud/agent/misc/__init__.py +0 -3
- hud/agent/operator.py +0 -268
- hud/agent/tests/__init__.py +0 -1
- hud/agent/tests/test_base.py +0 -202
- hud/env/__init__.py +0 -11
- hud/env/client.py +0 -35
- hud/env/docker_client.py +0 -349
- hud/env/environment.py +0 -446
- hud/env/local_docker_client.py +0 -358
- hud/env/remote_client.py +0 -212
- hud/env/remote_docker_client.py +0 -292
- hud/gym.py +0 -130
- hud/job.py +0 -773
- hud/mcp/__init__.py +0 -17
- hud/mcp/base.py +0 -631
- hud/mcp/client.py +0 -312
- hud/mcp/tests/test_base.py +0 -512
- hud/mcp/tests/test_claude.py +0 -294
- hud/task.py +0 -149
- hud/taskset.py +0 -237
- hud/telemetry/_trace.py +0 -347
- hud/telemetry/context.py +0 -230
- hud/telemetry/exporter.py +0 -575
- hud/telemetry/instrumentation/__init__.py +0 -3
- hud/telemetry/instrumentation/mcp.py +0 -259
- hud/telemetry/instrumentation/registry.py +0 -59
- hud/telemetry/mcp_models.py +0 -270
- hud/telemetry/tests/__init__.py +0 -1
- hud/telemetry/tests/test_context.py +0 -210
- hud/telemetry/tests/test_trace.py +0 -312
- hud/tools/helper/README.md +0 -56
- hud/tools/helper/__init__.py +0 -9
- hud/tools/helper/mcp_server.py +0 -78
- hud/tools/helper/server_initialization.py +0 -115
- hud/tools/helper/utils.py +0 -58
- hud/trajectory.py +0 -94
- hud/utils/agent.py +0 -37
- hud/utils/common.py +0 -256
- hud/utils/config.py +0 -120
- hud/utils/deprecation.py +0 -115
- hud/utils/misc.py +0 -53
- hud/utils/tests/test_common.py +0 -277
- hud/utils/tests/test_config.py +0 -129
- hud_python-0.3.5.dist-info/METADATA +0 -284
- hud_python-0.3.5.dist-info/RECORD +0 -120
- /hud/{adapters/common → shared}/tests/__init__.py +0 -0
- {hud_python-0.3.5.dist-info → hud_python-0.4.1.dist-info}/WHEEL +0 -0
hud/agent/operator.py
DELETED
|
@@ -1,268 +0,0 @@
|
|
|
1
|
-
import json
|
|
2
|
-
import logging
|
|
3
|
-
import os
|
|
4
|
-
from typing import Any, Literal, cast
|
|
5
|
-
|
|
6
|
-
from openai import AsyncOpenAI
|
|
7
|
-
from openai.types.responses import (
|
|
8
|
-
ToolParam,
|
|
9
|
-
ResponseInputParam,
|
|
10
|
-
ResponseInputItemParam,
|
|
11
|
-
ResponseOutputMessage,
|
|
12
|
-
ResponseComputerToolCall,
|
|
13
|
-
ResponseOutputText,
|
|
14
|
-
)
|
|
15
|
-
|
|
16
|
-
from hud.adapters import Adapter
|
|
17
|
-
from hud.agent.base import Agent
|
|
18
|
-
from hud.adapters.operator import OperatorAdapter
|
|
19
|
-
from hud.types import Gym
|
|
20
|
-
from hud.utils.common import Observation
|
|
21
|
-
from hud.settings import settings
|
|
22
|
-
from hud.adapters.common.types import LogType
|
|
23
|
-
|
|
24
|
-
logger = logging.getLogger(__name__)
|
|
25
|
-
|
|
26
|
-
|
|
27
|
-
class OperatorAgent(Agent[AsyncOpenAI, dict[str, Any]]):
|
|
28
|
-
"""
|
|
29
|
-
An agent implementation using OpenAI's Computer Use API.
|
|
30
|
-
|
|
31
|
-
This agent interacts with HUD environments using OpenAI's Computer Use API
|
|
32
|
-
through the OperatorAdapter which converts actions to the format expected by HUD.
|
|
33
|
-
"""
|
|
34
|
-
|
|
35
|
-
transfer_gyms: dict[Gym, Gym] = {"qa": "hud-browser"}
|
|
36
|
-
|
|
37
|
-
def __init__(
|
|
38
|
-
self,
|
|
39
|
-
client: AsyncOpenAI | None = None,
|
|
40
|
-
model: str = "computer-use-preview",
|
|
41
|
-
environment: Literal["windows", "mac", "linux", "browser"] = "browser",
|
|
42
|
-
adapter: Adapter | None = None,
|
|
43
|
-
max_iterations: int = 8,
|
|
44
|
-
name: str | None = None,
|
|
45
|
-
):
|
|
46
|
-
"""
|
|
47
|
-
Initialize the OperatorAgent.
|
|
48
|
-
|
|
49
|
-
Args:
|
|
50
|
-
client: The AsyncOpenAI client for API calls (optional, created automatically if not provided)
|
|
51
|
-
model: The model to use for computer use
|
|
52
|
-
environment: The environment type (windows, mac, linux, browser)
|
|
53
|
-
adapter: The adapter to use for preprocessing and postprocessing
|
|
54
|
-
max_iterations: Maximum number of iterations for the agent
|
|
55
|
-
name: The name of the agent
|
|
56
|
-
"""
|
|
57
|
-
# Initialize client if not provided
|
|
58
|
-
if client is None:
|
|
59
|
-
# Get API key from settings
|
|
60
|
-
api_key = settings.openai_api_key
|
|
61
|
-
if not api_key:
|
|
62
|
-
raise ValueError(
|
|
63
|
-
"OpenAI API key not found in settings or environment variables. Set OPENAI_API_KEY."
|
|
64
|
-
)
|
|
65
|
-
|
|
66
|
-
# Create asynchronous client
|
|
67
|
-
client = AsyncOpenAI(api_key=api_key)
|
|
68
|
-
|
|
69
|
-
adapter = adapter or OperatorAdapter()
|
|
70
|
-
|
|
71
|
-
if name is None:
|
|
72
|
-
name = f"openai-{model}"
|
|
73
|
-
|
|
74
|
-
super().__init__(client=client, adapter=adapter, name=name)
|
|
75
|
-
|
|
76
|
-
self.model = model
|
|
77
|
-
self.environment = environment
|
|
78
|
-
self.max_iterations = max_iterations
|
|
79
|
-
|
|
80
|
-
# Default dimensions
|
|
81
|
-
self.width = 1024
|
|
82
|
-
self.height = 768
|
|
83
|
-
|
|
84
|
-
# Update dimensions if adapter is provided
|
|
85
|
-
if self.adapter:
|
|
86
|
-
self.width = self.adapter.agent_width
|
|
87
|
-
self.height = self.adapter.agent_height
|
|
88
|
-
|
|
89
|
-
# Message history and state tracking
|
|
90
|
-
self.last_response_id = None
|
|
91
|
-
self.pending_call_id = None
|
|
92
|
-
self.initial_prompt = None
|
|
93
|
-
self.pending_safety_checks = []
|
|
94
|
-
|
|
95
|
-
self.base_system_prompt = """
|
|
96
|
-
You are an autonomous computer-using agent. Follow these guidelines:
|
|
97
|
-
|
|
98
|
-
1. Be decisive and complete tasks without asking for confirmation unless absolutely necessary.
|
|
99
|
-
2. If you need user confirmation for safety-critical actions, use the formal safety check mechanism.
|
|
100
|
-
3. Do NOT ask questions like "Should I proceed?" or "Would you like me to continue?" - just proceed with the task.
|
|
101
|
-
4. When you find what you're looking for (e.g., a file to upload), proceed with the action directly.
|
|
102
|
-
5. Only stop when the task is fully complete or if you encounter an error that prevents completion.
|
|
103
|
-
6. Trust that the user wants you to complete the entire task they've requested.
|
|
104
|
-
|
|
105
|
-
Remember: You wave been given permission to complete the requested task autonomously.
|
|
106
|
-
"""
|
|
107
|
-
|
|
108
|
-
self.task_run_id = None
|
|
109
|
-
|
|
110
|
-
async def fetch_response(self, observation: Observation) -> tuple[list[dict[str, Any]], bool]:
|
|
111
|
-
"""
|
|
112
|
-
Fetch a response from the model based on the observation.
|
|
113
|
-
|
|
114
|
-
Args:
|
|
115
|
-
observation: The preprocessed observation
|
|
116
|
-
|
|
117
|
-
Returns:
|
|
118
|
-
tuple[list[dict[str, Any]], bool, list[LogType] | None]: A tuple containing the list of raw actions,
|
|
119
|
-
boolean indicating if the agent believes the task is complete.
|
|
120
|
-
"""
|
|
121
|
-
if not self.client:
|
|
122
|
-
raise ValueError("Client is required")
|
|
123
|
-
|
|
124
|
-
# Define the computer use tool with correct type using cast
|
|
125
|
-
computer_tool = cast(
|
|
126
|
-
ToolParam,
|
|
127
|
-
{
|
|
128
|
-
"type": "computer_use_preview",
|
|
129
|
-
"display_width": self.width,
|
|
130
|
-
"display_height": self.height,
|
|
131
|
-
"environment": self.environment,
|
|
132
|
-
},
|
|
133
|
-
)
|
|
134
|
-
|
|
135
|
-
# Process the observation based on whether it's the first one or a response to an action
|
|
136
|
-
if self.pending_call_id is None: # and self.last_response_id is None:
|
|
137
|
-
# This is the first observation, store and send the prompt
|
|
138
|
-
self.initial_prompt = observation.text
|
|
139
|
-
|
|
140
|
-
# Create the initial request following the required structure
|
|
141
|
-
input_content: list[dict[str, Any]] = [
|
|
142
|
-
{"type": "input_text", "text": observation.text or ""}
|
|
143
|
-
]
|
|
144
|
-
|
|
145
|
-
# Add screenshot if present
|
|
146
|
-
if observation.screenshot:
|
|
147
|
-
input_content.append(
|
|
148
|
-
{
|
|
149
|
-
"type": "input_image",
|
|
150
|
-
"image_url": f"data:image/png;base64,{observation.screenshot}",
|
|
151
|
-
}
|
|
152
|
-
)
|
|
153
|
-
|
|
154
|
-
# Structure the input correctly for the API using cast
|
|
155
|
-
input_param = cast(ResponseInputParam, [{"role": "user", "content": input_content}])
|
|
156
|
-
|
|
157
|
-
response = await self.client.responses.create(
|
|
158
|
-
model=self.model,
|
|
159
|
-
tools=[computer_tool],
|
|
160
|
-
input=input_param,
|
|
161
|
-
instructions=self.base_system_prompt,
|
|
162
|
-
truncation="auto",
|
|
163
|
-
reasoning={"summary": "auto"},
|
|
164
|
-
)
|
|
165
|
-
|
|
166
|
-
else:
|
|
167
|
-
if not observation.screenshot:
|
|
168
|
-
logger.warning("No screenshot provided for response to action")
|
|
169
|
-
return [], True
|
|
170
|
-
|
|
171
|
-
# Create a response to the previous action with the new screenshot
|
|
172
|
-
input_param_followup = cast(
|
|
173
|
-
ResponseInputParam,
|
|
174
|
-
[
|
|
175
|
-
cast(
|
|
176
|
-
ResponseInputItemParam,
|
|
177
|
-
{
|
|
178
|
-
"call_id": self.pending_call_id,
|
|
179
|
-
"type": "computer_call_output",
|
|
180
|
-
"output": {
|
|
181
|
-
"type": "input_image",
|
|
182
|
-
"image_url": f"data:image/png;base64,{observation.screenshot}",
|
|
183
|
-
},
|
|
184
|
-
"acknowledged_safety_checks": self.pending_safety_checks,
|
|
185
|
-
},
|
|
186
|
-
)
|
|
187
|
-
],
|
|
188
|
-
)
|
|
189
|
-
self.pending_safety_checks = []
|
|
190
|
-
|
|
191
|
-
response = await self.client.responses.create(
|
|
192
|
-
model=self.model,
|
|
193
|
-
previous_response_id=self.last_response_id,
|
|
194
|
-
tools=[computer_tool],
|
|
195
|
-
input=input_param_followup,
|
|
196
|
-
truncation="auto",
|
|
197
|
-
)
|
|
198
|
-
|
|
199
|
-
# Store the response ID for the next call
|
|
200
|
-
self.last_response_id = response.id
|
|
201
|
-
|
|
202
|
-
# Process the response to extract actions or final text
|
|
203
|
-
actions = []
|
|
204
|
-
done = True # Assume done unless a computer call is found
|
|
205
|
-
final_text_response = ""
|
|
206
|
-
|
|
207
|
-
self.pending_call_id = None
|
|
208
|
-
|
|
209
|
-
# Check for computer calls first
|
|
210
|
-
computer_calls = [
|
|
211
|
-
item
|
|
212
|
-
for item in response.output
|
|
213
|
-
if isinstance(item, ResponseComputerToolCall) and item.type == "computer_call"
|
|
214
|
-
]
|
|
215
|
-
|
|
216
|
-
if computer_calls:
|
|
217
|
-
# If computer calls exist, process them and set done=False
|
|
218
|
-
done = False
|
|
219
|
-
for computer_call in computer_calls:
|
|
220
|
-
self.pending_call_id = computer_call.call_id
|
|
221
|
-
action = computer_call.action
|
|
222
|
-
self.pending_safety_checks = computer_call.pending_safety_checks
|
|
223
|
-
actions.append(action.model_dump()) # Convert Pydantic model to dict
|
|
224
|
-
# logger.info(f"Computer call action: {action}")
|
|
225
|
-
else:
|
|
226
|
-
# No computer calls, check for a final text message
|
|
227
|
-
# logger.info("No computer call found. Checking for final message.")
|
|
228
|
-
# logger.info(response.output)
|
|
229
|
-
for item in response.output:
|
|
230
|
-
if isinstance(item, ResponseOutputMessage) and item.type == "message":
|
|
231
|
-
# Extract text from content blocks within the message
|
|
232
|
-
full_text = "".join(
|
|
233
|
-
[c.text for c in item.content if isinstance(c, ResponseOutputText)]
|
|
234
|
-
)
|
|
235
|
-
if full_text:
|
|
236
|
-
final_text_response = full_text
|
|
237
|
-
# logger.info(f"Final text message: {final_text_response}")
|
|
238
|
-
break # Stop after finding the first text message
|
|
239
|
-
|
|
240
|
-
# If we found final text, package it as a 'response' action
|
|
241
|
-
if final_text_response:
|
|
242
|
-
if (
|
|
243
|
-
"the task is infeasible" in final_text_response.lower()
|
|
244
|
-
): # Custom action for OSWorld
|
|
245
|
-
done = True
|
|
246
|
-
actions = [{"type": "custom", "action": "FAIL"}]
|
|
247
|
-
else:
|
|
248
|
-
actions = [{"type": "response", "text": final_text_response}]
|
|
249
|
-
done = True
|
|
250
|
-
else:
|
|
251
|
-
logger.info("No computer calls and no final text message found.")
|
|
252
|
-
# Keep done = True, actions remains empty
|
|
253
|
-
|
|
254
|
-
reasoning = ""
|
|
255
|
-
for item in response.output:
|
|
256
|
-
if item.type == "reasoning" and item.summary:
|
|
257
|
-
reasoning += f"Thinking: {item.summary[0].text}\n"
|
|
258
|
-
elif item.type == "message":
|
|
259
|
-
for content in item.content:
|
|
260
|
-
if isinstance(content, ResponseOutputText):
|
|
261
|
-
reasoning += f"{content.text}\n"
|
|
262
|
-
|
|
263
|
-
# add reasoning to the actions
|
|
264
|
-
for action in actions:
|
|
265
|
-
action["reasoning"] = reasoning
|
|
266
|
-
action["logs"] = response.model_dump() # type: ignore[assignment]
|
|
267
|
-
|
|
268
|
-
return actions, done
|
hud/agent/tests/__init__.py
DELETED
|
@@ -1 +0,0 @@
|
|
|
1
|
-
# Tests for hud.agent module
|
hud/agent/tests/test_base.py
DELETED
|
@@ -1,202 +0,0 @@
|
|
|
1
|
-
from __future__ import annotations
|
|
2
|
-
|
|
3
|
-
from unittest.mock import MagicMock, patch
|
|
4
|
-
from typing import Any
|
|
5
|
-
|
|
6
|
-
import pytest
|
|
7
|
-
|
|
8
|
-
from hud.agent.base import Agent
|
|
9
|
-
from hud.adapters import Adapter
|
|
10
|
-
from hud.adapters.common.types import ClickAction, Point
|
|
11
|
-
from hud.utils.common import Observation
|
|
12
|
-
|
|
13
|
-
|
|
14
|
-
class ConcreteAgent(Agent[Any, dict[str, Any]]):
|
|
15
|
-
"""Concrete implementation of Agent for testing."""
|
|
16
|
-
|
|
17
|
-
def __init__(self, client: Any = None, adapter: Adapter | None = None):
|
|
18
|
-
super().__init__(client, adapter)
|
|
19
|
-
self.mock_responses = []
|
|
20
|
-
self.call_count = 0
|
|
21
|
-
|
|
22
|
-
async def fetch_response(self, observation: Observation) -> tuple[list[dict[str, Any]], bool]:
|
|
23
|
-
"""Mock implementation that returns predefined responses."""
|
|
24
|
-
if self.call_count < len(self.mock_responses):
|
|
25
|
-
actions, done = self.mock_responses[self.call_count]
|
|
26
|
-
self.call_count += 1
|
|
27
|
-
return actions, done
|
|
28
|
-
return [], True
|
|
29
|
-
|
|
30
|
-
|
|
31
|
-
class TestAgentBase:
|
|
32
|
-
"""Test the base Agent class."""
|
|
33
|
-
|
|
34
|
-
@pytest.fixture
|
|
35
|
-
def mock_client(self):
|
|
36
|
-
"""Mock client for testing."""
|
|
37
|
-
return MagicMock()
|
|
38
|
-
|
|
39
|
-
@pytest.fixture
|
|
40
|
-
def mock_adapter(self):
|
|
41
|
-
"""Mock adapter for testing."""
|
|
42
|
-
adapter = MagicMock(spec=Adapter)
|
|
43
|
-
adapter.rescale.return_value = "rescaled_screenshot"
|
|
44
|
-
adapter.adapt_list.return_value = [ClickAction(point=Point(x=100, y=200))]
|
|
45
|
-
return adapter
|
|
46
|
-
|
|
47
|
-
@pytest.fixture
|
|
48
|
-
def agent_with_adapter(self, mock_client, mock_adapter):
|
|
49
|
-
"""Agent with both client and adapter."""
|
|
50
|
-
return ConcreteAgent(client=mock_client, adapter=mock_adapter)
|
|
51
|
-
|
|
52
|
-
@pytest.fixture
|
|
53
|
-
def agent_without_adapter(self, mock_client):
|
|
54
|
-
"""Agent with client but no adapter."""
|
|
55
|
-
return ConcreteAgent(client=mock_client, adapter=None)
|
|
56
|
-
|
|
57
|
-
def test_init_with_client_and_adapter(self, mock_client, mock_adapter):
|
|
58
|
-
"""Test agent initialization with client and adapter."""
|
|
59
|
-
agent = ConcreteAgent(client=mock_client, adapter=mock_adapter)
|
|
60
|
-
assert agent.client == mock_client
|
|
61
|
-
assert agent.adapter == mock_adapter
|
|
62
|
-
|
|
63
|
-
def test_init_with_none_values(self):
|
|
64
|
-
"""Test agent initialization with None values."""
|
|
65
|
-
agent = ConcreteAgent(client=None, adapter=None)
|
|
66
|
-
assert agent.client is None
|
|
67
|
-
assert agent.adapter is None
|
|
68
|
-
|
|
69
|
-
def test_preprocess_without_adapter(self, agent_without_adapter):
|
|
70
|
-
"""Test preprocess when no adapter is available."""
|
|
71
|
-
observation = Observation(text="test", screenshot="screenshot_data")
|
|
72
|
-
result = agent_without_adapter.preprocess(observation)
|
|
73
|
-
|
|
74
|
-
# Should return original observation unchanged
|
|
75
|
-
assert result == observation
|
|
76
|
-
assert result.text == "test"
|
|
77
|
-
assert result.screenshot == "screenshot_data"
|
|
78
|
-
|
|
79
|
-
def test_preprocess_without_screenshot(self, agent_with_adapter):
|
|
80
|
-
"""Test preprocess when no screenshot is available."""
|
|
81
|
-
observation = Observation(text="test", screenshot=None)
|
|
82
|
-
result = agent_with_adapter.preprocess(observation)
|
|
83
|
-
|
|
84
|
-
# Should return original observation unchanged
|
|
85
|
-
assert result == observation
|
|
86
|
-
assert result.text == "test"
|
|
87
|
-
assert result.screenshot is None
|
|
88
|
-
|
|
89
|
-
def test_preprocess_with_adapter_and_screenshot(self, agent_with_adapter, mock_adapter):
|
|
90
|
-
"""Test preprocess with adapter and screenshot (covers missing lines 48-55)."""
|
|
91
|
-
observation = Observation(text="test", screenshot="original_screenshot")
|
|
92
|
-
result = agent_with_adapter.preprocess(observation)
|
|
93
|
-
|
|
94
|
-
# Should create new observation with rescaled screenshot
|
|
95
|
-
mock_adapter.rescale.assert_called_once_with("original_screenshot")
|
|
96
|
-
assert result.text == "test"
|
|
97
|
-
assert result.screenshot == "rescaled_screenshot"
|
|
98
|
-
# Should be a new object, not the original
|
|
99
|
-
assert result is not observation
|
|
100
|
-
|
|
101
|
-
def test_postprocess_without_adapter(self, agent_without_adapter):
|
|
102
|
-
"""Test postprocess when no adapter is available (covers missing lines 82-85)."""
|
|
103
|
-
actions = [{"type": "click", "x": 100, "y": 200}]
|
|
104
|
-
|
|
105
|
-
with pytest.raises(ValueError, match="Cannot postprocess actions without an adapter"):
|
|
106
|
-
agent_without_adapter.postprocess(actions)
|
|
107
|
-
|
|
108
|
-
def test_postprocess_with_adapter(self, agent_with_adapter, mock_adapter):
|
|
109
|
-
"""Test postprocess with adapter."""
|
|
110
|
-
actions = [{"type": "click", "x": 100, "y": 200}]
|
|
111
|
-
result = agent_with_adapter.postprocess(actions)
|
|
112
|
-
|
|
113
|
-
mock_adapter.adapt_list.assert_called_once_with(actions)
|
|
114
|
-
assert len(result) == 1
|
|
115
|
-
assert isinstance(result[0], ClickAction)
|
|
116
|
-
|
|
117
|
-
@pytest.mark.asyncio
|
|
118
|
-
async def test_predict_without_verbose(self, agent_with_adapter):
|
|
119
|
-
"""Test predict method without verbose logging."""
|
|
120
|
-
observation = Observation(text="test", screenshot="screenshot")
|
|
121
|
-
agent_with_adapter.mock_responses = [([{"type": "click", "x": 100, "y": 200}], False)]
|
|
122
|
-
|
|
123
|
-
actions, done = await agent_with_adapter.predict(observation, verbose=False)
|
|
124
|
-
|
|
125
|
-
assert len(actions) == 1
|
|
126
|
-
assert isinstance(actions[0], ClickAction)
|
|
127
|
-
assert done is False
|
|
128
|
-
|
|
129
|
-
@pytest.mark.asyncio
|
|
130
|
-
@patch("hud.agent.base.logger")
|
|
131
|
-
async def test_predict_with_verbose_logging(self, mock_logger, agent_with_adapter):
|
|
132
|
-
"""Test predict method with verbose logging (covers missing lines 100-116)."""
|
|
133
|
-
observation = Observation(text="test", screenshot="screenshot")
|
|
134
|
-
agent_with_adapter.mock_responses = [([{"type": "click", "x": 100, "y": 200}], True)]
|
|
135
|
-
|
|
136
|
-
actions, done = await agent_with_adapter.predict(observation, verbose=True)
|
|
137
|
-
|
|
138
|
-
# Verify verbose logging was called
|
|
139
|
-
mock_logger.info.assert_any_call("Predicting action...")
|
|
140
|
-
mock_logger.info.assert_any_call("Raw action: %s", [{"type": "click", "x": 100, "y": 200}])
|
|
141
|
-
|
|
142
|
-
assert len(actions) == 1
|
|
143
|
-
assert isinstance(actions[0], ClickAction)
|
|
144
|
-
assert done is True
|
|
145
|
-
|
|
146
|
-
@pytest.mark.asyncio
|
|
147
|
-
async def test_predict_without_adapter_returns_raw_actions(self, agent_without_adapter):
|
|
148
|
-
"""Test predict without adapter returns raw actions."""
|
|
149
|
-
observation = Observation(text="test", screenshot=None)
|
|
150
|
-
raw_actions = [{"type": "click", "x": 100, "y": 200}]
|
|
151
|
-
agent_without_adapter.mock_responses = [(raw_actions, True)]
|
|
152
|
-
|
|
153
|
-
actions, done = await agent_without_adapter.predict(observation, verbose=False)
|
|
154
|
-
|
|
155
|
-
# Should return raw actions, not processed ones
|
|
156
|
-
assert actions == raw_actions
|
|
157
|
-
assert done is True
|
|
158
|
-
|
|
159
|
-
@pytest.mark.asyncio
|
|
160
|
-
async def test_predict_with_empty_actions(self, agent_with_adapter):
|
|
161
|
-
"""Test predict when fetch_response returns empty actions."""
|
|
162
|
-
observation = Observation(text="test", screenshot="screenshot")
|
|
163
|
-
agent_with_adapter.mock_responses = [([], True)]
|
|
164
|
-
|
|
165
|
-
actions, done = await agent_with_adapter.predict(observation, verbose=False)
|
|
166
|
-
|
|
167
|
-
# Should return empty actions without calling adapter
|
|
168
|
-
assert actions == []
|
|
169
|
-
assert done is True
|
|
170
|
-
|
|
171
|
-
@pytest.mark.asyncio
|
|
172
|
-
async def test_predict_full_pipeline(self, agent_with_adapter, mock_adapter):
|
|
173
|
-
"""Test the complete predict pipeline with all stages."""
|
|
174
|
-
# Set up observation with screenshot that will be rescaled
|
|
175
|
-
observation = Observation(text="test input", screenshot="original_screenshot")
|
|
176
|
-
raw_actions = [{"type": "click", "x": 150, "y": 250}]
|
|
177
|
-
agent_with_adapter.mock_responses = [(raw_actions, False)]
|
|
178
|
-
|
|
179
|
-
actions, done = await agent_with_adapter.predict(observation, verbose=True)
|
|
180
|
-
|
|
181
|
-
# Verify all stages were called
|
|
182
|
-
# Stage 1: Preprocessing
|
|
183
|
-
mock_adapter.rescale.assert_called_once_with("original_screenshot")
|
|
184
|
-
|
|
185
|
-
# Stage 3: Postprocessing
|
|
186
|
-
mock_adapter.adapt_list.assert_called_once_with(raw_actions)
|
|
187
|
-
|
|
188
|
-
assert len(actions) == 1
|
|
189
|
-
assert isinstance(actions[0], ClickAction)
|
|
190
|
-
assert done is False
|
|
191
|
-
|
|
192
|
-
@pytest.mark.asyncio
|
|
193
|
-
async def test_predict_integration_without_screenshot(self, agent_with_adapter):
|
|
194
|
-
"""Test predict integration when observation has no screenshot."""
|
|
195
|
-
observation = Observation(text="test input", screenshot=None)
|
|
196
|
-
raw_actions = [{"type": "response", "text": "Task completed"}]
|
|
197
|
-
agent_with_adapter.mock_responses = [(raw_actions, True)]
|
|
198
|
-
|
|
199
|
-
actions, done = await agent_with_adapter.predict(observation, verbose=False)
|
|
200
|
-
|
|
201
|
-
assert len(actions) == 1
|
|
202
|
-
assert done is True
|
hud/env/__init__.py
DELETED
|
@@ -1,11 +0,0 @@
|
|
|
1
|
-
from __future__ import annotations
|
|
2
|
-
|
|
3
|
-
from . import docker_client, environment, local_docker_client, remote_client, remote_docker_client
|
|
4
|
-
|
|
5
|
-
__all__ = [
|
|
6
|
-
"docker_client",
|
|
7
|
-
"environment",
|
|
8
|
-
"local_docker_client",
|
|
9
|
-
"remote_client",
|
|
10
|
-
"remote_docker_client",
|
|
11
|
-
]
|
hud/env/client.py
DELETED
|
@@ -1,35 +0,0 @@
|
|
|
1
|
-
from __future__ import annotations
|
|
2
|
-
|
|
3
|
-
from abc import ABC, abstractmethod
|
|
4
|
-
from typing import TYPE_CHECKING, Any
|
|
5
|
-
|
|
6
|
-
from pydantic import BaseModel
|
|
7
|
-
|
|
8
|
-
if TYPE_CHECKING:
|
|
9
|
-
from hud.types import EnvironmentStatus
|
|
10
|
-
from hud.utils.config import FunctionConfig
|
|
11
|
-
|
|
12
|
-
|
|
13
|
-
class Client(BaseModel, ABC):
|
|
14
|
-
"""
|
|
15
|
-
Base class for all environment clients.
|
|
16
|
-
"""
|
|
17
|
-
|
|
18
|
-
@abstractmethod
|
|
19
|
-
async def invoke(self, config: FunctionConfig) -> Any:
|
|
20
|
-
"""
|
|
21
|
-
Invoke the environment with the given config.
|
|
22
|
-
"""
|
|
23
|
-
|
|
24
|
-
@abstractmethod
|
|
25
|
-
async def get_status(self) -> EnvironmentStatus:
|
|
26
|
-
"""
|
|
27
|
-
Get the current status of the environment.
|
|
28
|
-
"""
|
|
29
|
-
|
|
30
|
-
@abstractmethod
|
|
31
|
-
async def close(self) -> None:
|
|
32
|
-
"""
|
|
33
|
-
Close the environment and clean up any resources.
|
|
34
|
-
This method should be called when the environment is no longer needed.
|
|
35
|
-
"""
|