hud-python 0.2.4__py3-none-any.whl → 0.2.5__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of hud-python might be problematic. Click here for more details.
- hud/__init__.py +22 -2
- hud/adapters/claude/adapter.py +9 -2
- hud/adapters/claude/tests/__init__.py +1 -0
- hud/adapters/claude/tests/test_adapter.py +519 -0
- hud/adapters/common/types.py +5 -1
- hud/adapters/operator/adapter.py +4 -0
- hud/adapters/operator/tests/__init__.py +1 -0
- hud/adapters/operator/tests/test_adapter.py +370 -0
- hud/agent/__init__.py +4 -0
- hud/agent/base.py +18 -2
- hud/agent/claude.py +20 -17
- hud/agent/claude_plays_pokemon.py +282 -0
- hud/agent/langchain.py +12 -7
- hud/agent/misc/__init__.py +3 -0
- hud/agent/misc/response_agent.py +80 -0
- hud/agent/operator.py +27 -19
- hud/agent/tests/__init__.py +1 -0
- hud/agent/tests/test_base.py +202 -0
- hud/env/docker_client.py +28 -18
- hud/env/environment.py +32 -16
- hud/env/local_docker_client.py +83 -42
- hud/env/remote_client.py +1 -3
- hud/env/remote_docker_client.py +72 -15
- hud/exceptions.py +12 -0
- hud/gym.py +71 -53
- hud/job.py +52 -7
- hud/settings.py +6 -0
- hud/task.py +45 -33
- hud/taskset.py +44 -4
- hud/telemetry/__init__.py +21 -0
- hud/telemetry/_trace.py +173 -0
- hud/telemetry/context.py +193 -0
- hud/telemetry/exporter.py +417 -0
- hud/telemetry/instrumentation/__init__.py +3 -0
- hud/telemetry/instrumentation/mcp.py +498 -0
- hud/telemetry/instrumentation/registry.py +59 -0
- hud/telemetry/mcp_models.py +331 -0
- hud/telemetry/tests/__init__.py +1 -0
- hud/telemetry/tests/test_context.py +203 -0
- hud/telemetry/tests/test_trace.py +270 -0
- hud/types.py +10 -26
- hud/utils/common.py +22 -2
- hud/utils/misc.py +53 -0
- hud/utils/tests/test_version.py +1 -1
- hud/version.py +7 -0
- {hud_python-0.2.4.dist-info → hud_python-0.2.5.dist-info}/METADATA +90 -22
- hud_python-0.2.5.dist-info/RECORD +84 -0
- hud_python-0.2.4.dist-info/RECORD +0 -62
- {hud_python-0.2.4.dist-info → hud_python-0.2.5.dist-info}/WHEEL +0 -0
- {hud_python-0.2.4.dist-info → hud_python-0.2.5.dist-info}/licenses/LICENSE +0 -0
|
@@ -0,0 +1,370 @@
|
|
|
1
|
+
from __future__ import annotations
|
|
2
|
+
|
|
3
|
+
import pytest
|
|
4
|
+
|
|
5
|
+
from hud.adapters.common.types import (
|
|
6
|
+
ClickAction,
|
|
7
|
+
DragAction,
|
|
8
|
+
MoveAction,
|
|
9
|
+
PressAction,
|
|
10
|
+
ResponseAction,
|
|
11
|
+
ScreenshotFetch,
|
|
12
|
+
ScrollAction,
|
|
13
|
+
TypeAction,
|
|
14
|
+
WaitAction,
|
|
15
|
+
)
|
|
16
|
+
from hud.adapters.operator import OperatorAdapter
|
|
17
|
+
|
|
18
|
+
|
|
19
|
+
class TestOperatorAdapter:
|
|
20
|
+
"""Test the OperatorAdapter class."""
|
|
21
|
+
|
|
22
|
+
@pytest.fixture
|
|
23
|
+
def adapter(self):
|
|
24
|
+
"""Fixture providing a clean adapter instance."""
|
|
25
|
+
return OperatorAdapter()
|
|
26
|
+
|
|
27
|
+
def test_init(self, adapter):
|
|
28
|
+
"""Test adapter initialization."""
|
|
29
|
+
assert adapter.agent_width == 1024
|
|
30
|
+
assert adapter.agent_height == 768
|
|
31
|
+
assert adapter.env_width == 1920 # Inherited from parent
|
|
32
|
+
assert adapter.env_height == 1080 # Inherited from parent
|
|
33
|
+
|
|
34
|
+
def test_key_map_constants(self, adapter):
|
|
35
|
+
"""Test KEY_MAP constants."""
|
|
36
|
+
assert adapter.KEY_MAP["return"] == "enter"
|
|
37
|
+
assert adapter.KEY_MAP["arrowup"] == "up"
|
|
38
|
+
assert adapter.KEY_MAP["arrowdown"] == "down"
|
|
39
|
+
assert adapter.KEY_MAP["arrowleft"] == "left"
|
|
40
|
+
assert adapter.KEY_MAP["arrowright"] == "right"
|
|
41
|
+
|
|
42
|
+
def test_button_map_constants(self, adapter):
|
|
43
|
+
"""Test BUTTON_MAP constants."""
|
|
44
|
+
assert adapter.BUTTON_MAP["wheel"] == "middle"
|
|
45
|
+
|
|
46
|
+
def test_map_key_mapped(self, adapter):
|
|
47
|
+
"""Test _map_key with mapped keys."""
|
|
48
|
+
assert adapter._map_key("return") == "enter"
|
|
49
|
+
assert adapter._map_key("RETURN") == "enter" # Test case insensitive
|
|
50
|
+
assert adapter._map_key("arrowup") == "up"
|
|
51
|
+
assert adapter._map_key("ArrowDown") == "down"
|
|
52
|
+
|
|
53
|
+
def test_map_key_unmapped(self, adapter):
|
|
54
|
+
"""Test _map_key with unmapped keys."""
|
|
55
|
+
assert adapter._map_key("space") == "space"
|
|
56
|
+
assert adapter._map_key("CTRL") == "ctrl"
|
|
57
|
+
assert adapter._map_key("Unknown") == "unknown"
|
|
58
|
+
|
|
59
|
+
|
|
60
|
+
class TestOperatorAdapterConvert:
|
|
61
|
+
"""Test the convert method of OperatorAdapter."""
|
|
62
|
+
|
|
63
|
+
@pytest.fixture
|
|
64
|
+
def adapter(self):
|
|
65
|
+
"""Fixture providing a clean adapter instance."""
|
|
66
|
+
return OperatorAdapter()
|
|
67
|
+
|
|
68
|
+
def test_convert_click_action(self, adapter):
|
|
69
|
+
"""Test converting click action."""
|
|
70
|
+
data = {"type": "click", "x": 100, "y": 200, "button": "left"}
|
|
71
|
+
result = adapter.convert(data)
|
|
72
|
+
|
|
73
|
+
assert isinstance(result, ClickAction)
|
|
74
|
+
assert result.point is not None
|
|
75
|
+
assert result.point.x == 100
|
|
76
|
+
assert result.point.y == 200
|
|
77
|
+
assert result.button == "left"
|
|
78
|
+
|
|
79
|
+
def test_convert_click_action_default_values(self, adapter):
|
|
80
|
+
"""Test converting click action with default values."""
|
|
81
|
+
data = {"type": "click"}
|
|
82
|
+
result = adapter.convert(data)
|
|
83
|
+
|
|
84
|
+
assert isinstance(result, ClickAction)
|
|
85
|
+
assert result.point is not None
|
|
86
|
+
assert result.point.x == 0
|
|
87
|
+
assert result.point.y == 0
|
|
88
|
+
assert result.button == "left"
|
|
89
|
+
|
|
90
|
+
def test_convert_click_action_mapped_button(self, adapter):
|
|
91
|
+
"""Test converting click action with mapped button."""
|
|
92
|
+
data = {"type": "click", "x": 100, "y": 200, "button": "wheel"}
|
|
93
|
+
result = adapter.convert(data)
|
|
94
|
+
|
|
95
|
+
assert isinstance(result, ClickAction)
|
|
96
|
+
assert result.button == "middle"
|
|
97
|
+
|
|
98
|
+
def test_convert_double_click_action(self, adapter):
|
|
99
|
+
"""Test converting double click action."""
|
|
100
|
+
data = {"type": "double_click", "x": 150, "y": 250}
|
|
101
|
+
result = adapter.convert(data)
|
|
102
|
+
|
|
103
|
+
assert isinstance(result, ClickAction)
|
|
104
|
+
assert result.point is not None
|
|
105
|
+
assert result.point.x == 150
|
|
106
|
+
assert result.point.y == 250
|
|
107
|
+
assert result.button == "left"
|
|
108
|
+
assert result.pattern == [100] # Double click pattern
|
|
109
|
+
|
|
110
|
+
def test_convert_scroll_action(self, adapter):
|
|
111
|
+
"""Test converting scroll action."""
|
|
112
|
+
data = {"type": "scroll", "x": 300, "y": 400, "scroll_x": 10, "scroll_y": -20}
|
|
113
|
+
result = adapter.convert(data)
|
|
114
|
+
|
|
115
|
+
assert isinstance(result, ScrollAction)
|
|
116
|
+
assert result.point is not None
|
|
117
|
+
assert result.scroll is not None
|
|
118
|
+
assert result.point.x == 300
|
|
119
|
+
assert result.point.y == 400
|
|
120
|
+
assert result.scroll.x == 10
|
|
121
|
+
assert result.scroll.y == -20
|
|
122
|
+
|
|
123
|
+
def test_convert_scroll_action_default_values(self, adapter):
|
|
124
|
+
"""Test converting scroll action with default values."""
|
|
125
|
+
data = {"type": "scroll"}
|
|
126
|
+
result = adapter.convert(data)
|
|
127
|
+
|
|
128
|
+
assert isinstance(result, ScrollAction)
|
|
129
|
+
assert result.point is not None
|
|
130
|
+
assert result.scroll is not None
|
|
131
|
+
assert result.point.x == 0
|
|
132
|
+
assert result.point.y == 0
|
|
133
|
+
assert result.scroll.x == 0
|
|
134
|
+
assert result.scroll.y == 0
|
|
135
|
+
|
|
136
|
+
def test_convert_type_action(self, adapter):
|
|
137
|
+
"""Test converting type action."""
|
|
138
|
+
data = {"type": "type", "text": "Hello, World!"}
|
|
139
|
+
result = adapter.convert(data)
|
|
140
|
+
|
|
141
|
+
assert isinstance(result, TypeAction)
|
|
142
|
+
assert result.text == "Hello, World!"
|
|
143
|
+
assert result.enter_after is False
|
|
144
|
+
|
|
145
|
+
def test_convert_type_action_default_text(self, adapter):
|
|
146
|
+
"""Test converting type action with default text."""
|
|
147
|
+
data = {"type": "type"}
|
|
148
|
+
result = adapter.convert(data)
|
|
149
|
+
|
|
150
|
+
assert isinstance(result, TypeAction)
|
|
151
|
+
assert result.text == ""
|
|
152
|
+
assert result.enter_after is False
|
|
153
|
+
|
|
154
|
+
def test_convert_wait_action(self, adapter):
|
|
155
|
+
"""Test converting wait action."""
|
|
156
|
+
data = {"type": "wait", "ms": 2000}
|
|
157
|
+
result = adapter.convert(data)
|
|
158
|
+
|
|
159
|
+
assert isinstance(result, WaitAction)
|
|
160
|
+
assert result.time == 2000
|
|
161
|
+
|
|
162
|
+
def test_convert_wait_action_default_time(self, adapter):
|
|
163
|
+
"""Test converting wait action with default time."""
|
|
164
|
+
data = {"type": "wait"}
|
|
165
|
+
result = adapter.convert(data)
|
|
166
|
+
|
|
167
|
+
assert isinstance(result, WaitAction)
|
|
168
|
+
assert result.time == 1000
|
|
169
|
+
|
|
170
|
+
def test_convert_move_action(self, adapter):
|
|
171
|
+
"""Test converting move action."""
|
|
172
|
+
data = {"type": "move", "x": 500, "y": 600}
|
|
173
|
+
result = adapter.convert(data)
|
|
174
|
+
|
|
175
|
+
assert isinstance(result, MoveAction)
|
|
176
|
+
assert result.point is not None
|
|
177
|
+
assert result.point.x == 500
|
|
178
|
+
assert result.point.y == 600
|
|
179
|
+
|
|
180
|
+
def test_convert_move_action_default_values(self, adapter):
|
|
181
|
+
"""Test converting move action with default values."""
|
|
182
|
+
data = {"type": "move"}
|
|
183
|
+
result = adapter.convert(data)
|
|
184
|
+
|
|
185
|
+
assert isinstance(result, MoveAction)
|
|
186
|
+
assert result.point is not None
|
|
187
|
+
assert result.point.x == 0
|
|
188
|
+
assert result.point.y == 0
|
|
189
|
+
|
|
190
|
+
def test_convert_keypress_action(self, adapter):
|
|
191
|
+
"""Test converting keypress action."""
|
|
192
|
+
data = {"type": "keypress", "keys": ["ctrl", "c"]}
|
|
193
|
+
result = adapter.convert(data)
|
|
194
|
+
|
|
195
|
+
assert isinstance(result, PressAction)
|
|
196
|
+
assert result.keys == ["ctrl", "c"]
|
|
197
|
+
|
|
198
|
+
def test_convert_keypress_action_mapped_keys(self, adapter):
|
|
199
|
+
"""Test converting keypress action with mapped keys."""
|
|
200
|
+
data = {"type": "keypress", "keys": ["return", "arrowup"]}
|
|
201
|
+
result = adapter.convert(data)
|
|
202
|
+
|
|
203
|
+
assert isinstance(result, PressAction)
|
|
204
|
+
assert result.keys == ["enter", "up"]
|
|
205
|
+
|
|
206
|
+
def test_convert_keypress_action_default_keys(self, adapter):
|
|
207
|
+
"""Test converting keypress action with default keys."""
|
|
208
|
+
data = {"type": "keypress"}
|
|
209
|
+
result = adapter.convert(data)
|
|
210
|
+
|
|
211
|
+
assert isinstance(result, PressAction)
|
|
212
|
+
assert result.keys == []
|
|
213
|
+
|
|
214
|
+
def test_convert_drag_action(self, adapter):
|
|
215
|
+
"""Test converting drag action."""
|
|
216
|
+
data = {
|
|
217
|
+
"type": "drag",
|
|
218
|
+
"path": [{"x": 100, "y": 200}, {"x": 150, "y": 250}, {"x": 200, "y": 300}],
|
|
219
|
+
}
|
|
220
|
+
result = adapter.convert(data)
|
|
221
|
+
|
|
222
|
+
assert isinstance(result, DragAction)
|
|
223
|
+
assert len(result.path) == 3
|
|
224
|
+
assert result.path[0].x == 100
|
|
225
|
+
assert result.path[0].y == 200
|
|
226
|
+
assert result.path[1].x == 150
|
|
227
|
+
assert result.path[1].y == 250
|
|
228
|
+
assert result.path[2].x == 200
|
|
229
|
+
assert result.path[2].y == 300
|
|
230
|
+
|
|
231
|
+
def test_convert_drag_action_default_path(self, adapter):
|
|
232
|
+
"""Test converting drag action with default path."""
|
|
233
|
+
data = {"type": "drag"}
|
|
234
|
+
result = adapter.convert(data)
|
|
235
|
+
|
|
236
|
+
assert isinstance(result, DragAction)
|
|
237
|
+
assert result.path == []
|
|
238
|
+
|
|
239
|
+
def test_convert_drag_action_path_with_missing_coords(self, adapter):
|
|
240
|
+
"""Test converting drag action with missing coordinates."""
|
|
241
|
+
data = {
|
|
242
|
+
"type": "drag",
|
|
243
|
+
"path": [
|
|
244
|
+
{"x": 100}, # Missing y
|
|
245
|
+
{"y": 200}, # Missing x
|
|
246
|
+
{}, # Missing both
|
|
247
|
+
],
|
|
248
|
+
}
|
|
249
|
+
result = adapter.convert(data)
|
|
250
|
+
|
|
251
|
+
assert isinstance(result, DragAction)
|
|
252
|
+
assert len(result.path) == 3
|
|
253
|
+
assert result.path[0].x == 100
|
|
254
|
+
assert result.path[0].y == 0 # Default value
|
|
255
|
+
assert result.path[1].x == 0 # Default value
|
|
256
|
+
assert result.path[1].y == 200
|
|
257
|
+
assert result.path[2].x == 0 # Default value
|
|
258
|
+
assert result.path[2].y == 0 # Default value
|
|
259
|
+
|
|
260
|
+
def test_convert_screenshot_action(self, adapter):
|
|
261
|
+
"""Test converting screenshot action."""
|
|
262
|
+
data = {"type": "screenshot"}
|
|
263
|
+
result = adapter.convert(data)
|
|
264
|
+
|
|
265
|
+
assert isinstance(result, ScreenshotFetch)
|
|
266
|
+
|
|
267
|
+
def test_convert_response_action(self, adapter):
|
|
268
|
+
"""Test converting response action."""
|
|
269
|
+
data = {"type": "response", "text": "Task completed successfully"}
|
|
270
|
+
result = adapter.convert(data)
|
|
271
|
+
|
|
272
|
+
assert isinstance(result, ResponseAction)
|
|
273
|
+
assert result.text == "Task completed successfully"
|
|
274
|
+
|
|
275
|
+
def test_convert_response_action_default_text(self, adapter):
|
|
276
|
+
"""Test converting response action with default text."""
|
|
277
|
+
data = {"type": "response"}
|
|
278
|
+
result = adapter.convert(data)
|
|
279
|
+
|
|
280
|
+
assert isinstance(result, ResponseAction)
|
|
281
|
+
assert result.text == ""
|
|
282
|
+
|
|
283
|
+
def test_convert_unsupported_action_type(self, adapter):
|
|
284
|
+
"""Test converting unsupported action type."""
|
|
285
|
+
data = {"type": "unsupported_action"}
|
|
286
|
+
|
|
287
|
+
with pytest.raises(ValueError) as exc_info:
|
|
288
|
+
adapter.convert(data)
|
|
289
|
+
|
|
290
|
+
assert "Unsupported action type: unsupported_action" in str(exc_info.value)
|
|
291
|
+
|
|
292
|
+
def test_convert_invalid_data_structure(self, adapter):
|
|
293
|
+
"""Test converting invalid data structure."""
|
|
294
|
+
# Test with non-dict data
|
|
295
|
+
with pytest.raises(ValueError) as exc_info:
|
|
296
|
+
adapter.convert("invalid_data")
|
|
297
|
+
|
|
298
|
+
assert "Invalid action" in str(exc_info.value)
|
|
299
|
+
|
|
300
|
+
def test_convert_missing_type_field(self, adapter):
|
|
301
|
+
"""Test converting data without type field."""
|
|
302
|
+
data = {"x": 100, "y": 200} # Missing type
|
|
303
|
+
|
|
304
|
+
with pytest.raises(ValueError) as exc_info:
|
|
305
|
+
adapter.convert(data)
|
|
306
|
+
|
|
307
|
+
assert "Unsupported action type: None" in str(exc_info.value)
|
|
308
|
+
|
|
309
|
+
def test_convert_none_data(self, adapter):
|
|
310
|
+
"""Test converting None data."""
|
|
311
|
+
with pytest.raises(ValueError) as exc_info:
|
|
312
|
+
adapter.convert(None)
|
|
313
|
+
|
|
314
|
+
assert "Invalid action" in str(exc_info.value)
|
|
315
|
+
|
|
316
|
+
|
|
317
|
+
class TestOperatorAdapterIntegration:
|
|
318
|
+
"""Integration tests for OperatorAdapter."""
|
|
319
|
+
|
|
320
|
+
@pytest.fixture
|
|
321
|
+
def adapter(self):
|
|
322
|
+
"""Fixture providing a clean adapter instance."""
|
|
323
|
+
return OperatorAdapter()
|
|
324
|
+
|
|
325
|
+
def test_full_click_pipeline(self, adapter):
|
|
326
|
+
"""Test full click action processing pipeline."""
|
|
327
|
+
# Set adapter dimensions to avoid scaling
|
|
328
|
+
adapter.agent_width = 1920
|
|
329
|
+
adapter.agent_height = 1080
|
|
330
|
+
adapter.env_width = 1920
|
|
331
|
+
adapter.env_height = 1080
|
|
332
|
+
|
|
333
|
+
# Test the full adapt method
|
|
334
|
+
raw_action = {"type": "click", "x": 100, "y": 200, "button": "right"}
|
|
335
|
+
|
|
336
|
+
result = adapter.adapt(raw_action)
|
|
337
|
+
|
|
338
|
+
assert isinstance(result, ClickAction)
|
|
339
|
+
assert result.point is not None
|
|
340
|
+
assert result.point.x == 100
|
|
341
|
+
assert result.point.y == 200
|
|
342
|
+
assert result.button == "right"
|
|
343
|
+
|
|
344
|
+
# Check that it was added to memory
|
|
345
|
+
assert len(adapter.memory) == 1
|
|
346
|
+
assert adapter.memory[0] == result
|
|
347
|
+
|
|
348
|
+
def test_multiple_actions_processing(self, adapter):
|
|
349
|
+
"""Test processing multiple actions."""
|
|
350
|
+
# Set adapter dimensions to avoid scaling
|
|
351
|
+
adapter.agent_width = 1920
|
|
352
|
+
adapter.agent_height = 1080
|
|
353
|
+
adapter.env_width = 1920
|
|
354
|
+
adapter.env_height = 1080
|
|
355
|
+
|
|
356
|
+
actions = [
|
|
357
|
+
{"type": "click", "x": 100, "y": 200},
|
|
358
|
+
{"type": "type", "text": "hello"},
|
|
359
|
+
{"type": "keypress", "keys": ["return"]},
|
|
360
|
+
]
|
|
361
|
+
|
|
362
|
+
results = adapter.adapt_list(actions)
|
|
363
|
+
|
|
364
|
+
assert len(results) == 3
|
|
365
|
+
assert isinstance(results[0], ClickAction)
|
|
366
|
+
assert isinstance(results[1], TypeAction)
|
|
367
|
+
assert isinstance(results[2], PressAction)
|
|
368
|
+
|
|
369
|
+
# Check memory
|
|
370
|
+
assert len(adapter.memory) == 3
|
hud/agent/__init__.py
CHANGED
|
@@ -1,7 +1,9 @@
|
|
|
1
1
|
from .base import Agent
|
|
2
2
|
from .claude import ClaudeAgent
|
|
3
|
+
from .claude_plays_pokemon import ClaudePlaysPokemon
|
|
3
4
|
from .operator import OperatorAgent
|
|
4
5
|
from .langchain import LangchainAgent
|
|
6
|
+
from .misc import ResponseAgent
|
|
5
7
|
|
|
6
8
|
from hud.adapters import OperatorAdapter, ClaudeAdapter
|
|
7
9
|
|
|
@@ -12,4 +14,6 @@ __all__ = [
|
|
|
12
14
|
"OperatorAdapter",
|
|
13
15
|
"ClaudeAdapter",
|
|
14
16
|
"LangchainAgent",
|
|
17
|
+
"ClaudePlaysPokemon",
|
|
18
|
+
"ResponseAgent",
|
|
15
19
|
]
|
hud/agent/base.py
CHANGED
|
@@ -2,7 +2,11 @@ from abc import ABC, abstractmethod
|
|
|
2
2
|
from typing import Sequence, TypeVar, Generic
|
|
3
3
|
|
|
4
4
|
from hud.adapters import Adapter, CLA
|
|
5
|
+
from hud.types import Gym
|
|
5
6
|
from hud.utils.common import Observation
|
|
7
|
+
import logging
|
|
8
|
+
|
|
9
|
+
logger = logging.getLogger(__name__)
|
|
6
10
|
|
|
7
11
|
# Generic type for different client types (Anthropic, OpenAI, etc.)
|
|
8
12
|
ClientT = TypeVar("ClientT")
|
|
@@ -21,7 +25,13 @@ class Agent(Generic[ClientT, ActionT], ABC):
|
|
|
21
25
|
Subclasses only need to implement the fetch_response method.
|
|
22
26
|
"""
|
|
23
27
|
|
|
24
|
-
|
|
28
|
+
transfer_gyms: dict[Gym, Gym] = {}
|
|
29
|
+
|
|
30
|
+
def __init__(
|
|
31
|
+
self,
|
|
32
|
+
client: ClientT | None = None,
|
|
33
|
+
adapter: Adapter | None = None,
|
|
34
|
+
):
|
|
25
35
|
"""
|
|
26
36
|
Initialize the agent.
|
|
27
37
|
|
|
@@ -81,7 +91,9 @@ class Agent(Generic[ClientT, ActionT], ABC):
|
|
|
81
91
|
|
|
82
92
|
return self.adapter.adapt_list(actions)
|
|
83
93
|
|
|
84
|
-
async def predict(
|
|
94
|
+
async def predict(
|
|
95
|
+
self, observation: Observation, verbose: bool = False
|
|
96
|
+
) -> tuple[list[CLA] | list[ActionT], bool]:
|
|
85
97
|
"""
|
|
86
98
|
Predict the next action based on the observation.
|
|
87
99
|
|
|
@@ -94,11 +106,15 @@ class Agent(Generic[ClientT, ActionT], ABC):
|
|
|
94
106
|
tuple[list[CLA] | list[ActionT], bool]: A tuple containing the list of actions and a boolean
|
|
95
107
|
indicating if the agent believes it has completed the task
|
|
96
108
|
"""
|
|
109
|
+
if verbose:
|
|
110
|
+
logger.info("Predicting action...")
|
|
97
111
|
# Stage 1: Preprocess the observation
|
|
98
112
|
processed_obs = self.preprocess(observation)
|
|
99
113
|
|
|
100
114
|
# Stage 2: Fetch response from the model
|
|
101
115
|
actions, done = await self.fetch_response(processed_obs)
|
|
116
|
+
if verbose:
|
|
117
|
+
logger.info("Raw action: %s", actions)
|
|
102
118
|
|
|
103
119
|
# Stage 3: Postprocess the actions if we have an adapter
|
|
104
120
|
if self.adapter and actions:
|
hud/agent/claude.py
CHANGED
|
@@ -13,6 +13,7 @@ from anthropic.types.beta import (
|
|
|
13
13
|
from hud.adapters import Adapter
|
|
14
14
|
from hud.agent.base import Agent
|
|
15
15
|
from hud.adapters.claude import ClaudeAdapter
|
|
16
|
+
from hud.types import Gym
|
|
16
17
|
from hud.utils.common import Observation
|
|
17
18
|
from hud.settings import settings
|
|
18
19
|
|
|
@@ -53,6 +54,8 @@ class ClaudeAgent(Agent[AsyncAnthropic, Any]):
|
|
|
53
54
|
through the ClaudeAdapter which converts actions to the format expected by HUD.
|
|
54
55
|
"""
|
|
55
56
|
|
|
57
|
+
transfer_gyms: dict[Gym, Gym] = {"qa": "hud-browser"}
|
|
58
|
+
|
|
56
59
|
def __init__(
|
|
57
60
|
self,
|
|
58
61
|
client: AsyncAnthropic | None = None,
|
|
@@ -123,20 +126,20 @@ class ClaudeAgent(Agent[AsyncAnthropic, Any]):
|
|
|
123
126
|
|
|
124
127
|
# Add text instruction if present
|
|
125
128
|
if observation.text:
|
|
126
|
-
logger.info("Adding text to user content: %s", observation.text)
|
|
129
|
+
# logger.info("Adding text to user content: %s", observation.text)
|
|
127
130
|
user_content.append(text_to_content_block(str(observation.text)))
|
|
128
131
|
|
|
129
132
|
# Add screenshot if present
|
|
130
133
|
if observation.screenshot:
|
|
131
|
-
logger.info("Adding screenshot to user content")
|
|
134
|
+
# logger.info("Adding screenshot to user content")
|
|
132
135
|
if not self.pending_computer_use_tool_id:
|
|
133
|
-
logger.info("Adding screenshot to user content, no tool id")
|
|
136
|
+
# logger.info("Adding screenshot to user content, no tool id")
|
|
134
137
|
user_content.append(base64_to_content_block(observation.screenshot))
|
|
135
138
|
else:
|
|
136
|
-
logger.info(
|
|
137
|
-
|
|
138
|
-
|
|
139
|
-
)
|
|
139
|
+
# logger.info(
|
|
140
|
+
# "Adding screenshot to user content, tool id: %s",
|
|
141
|
+
# self.pending_computer_use_tool_id,
|
|
142
|
+
# )
|
|
140
143
|
user_content.append(
|
|
141
144
|
tool_use_content_block(
|
|
142
145
|
self.pending_computer_use_tool_id,
|
|
@@ -183,9 +186,9 @@ class ClaudeAgent(Agent[AsyncAnthropic, Any]):
|
|
|
183
186
|
done = True # Assume we're done unless we find a tool use
|
|
184
187
|
|
|
185
188
|
for block in response_content:
|
|
186
|
-
logger.info("Processing block: %s", block)
|
|
189
|
+
# logger.info("Processing block: %s", block)
|
|
187
190
|
if block.type == "tool_use":
|
|
188
|
-
logger.info("Processing tool use: %s", block)
|
|
191
|
+
# logger.info("Processing tool use: %s", block)
|
|
189
192
|
assert block.name == "computer"
|
|
190
193
|
|
|
191
194
|
# Store the raw action
|
|
@@ -197,20 +200,20 @@ class ClaudeAgent(Agent[AsyncAnthropic, Any]):
|
|
|
197
200
|
break
|
|
198
201
|
|
|
199
202
|
# If no tool use action was found, check for a final text response
|
|
200
|
-
if
|
|
203
|
+
if len(actions) == 0 and done:
|
|
201
204
|
final_text_response = ""
|
|
202
205
|
for block in response_content:
|
|
203
206
|
if block.type == "text":
|
|
204
207
|
final_text_response += block.text
|
|
205
208
|
|
|
206
209
|
if final_text_response.strip():
|
|
207
|
-
logger.info(
|
|
208
|
-
|
|
209
|
-
)
|
|
210
|
+
# logger.info(
|
|
211
|
+
# f"No tool use found. Using final text as response: {final_text_response}"
|
|
212
|
+
# )
|
|
210
213
|
actions = [{"action": "response", "text": final_text_response.strip()}]
|
|
211
|
-
|
|
212
|
-
else:
|
|
213
|
-
|
|
214
|
-
|
|
214
|
+
done = True
|
|
215
|
+
# else:
|
|
216
|
+
# logger.info("No tool use and no final text block found.")
|
|
217
|
+
# Keep done = True, actions remains empty
|
|
215
218
|
|
|
216
219
|
return actions, done
|