hud-python 0.2.4__py3-none-any.whl → 0.2.5__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of hud-python might be problematic. Click here for more details.

Files changed (50) hide show
  1. hud/__init__.py +22 -2
  2. hud/adapters/claude/adapter.py +9 -2
  3. hud/adapters/claude/tests/__init__.py +1 -0
  4. hud/adapters/claude/tests/test_adapter.py +519 -0
  5. hud/adapters/common/types.py +5 -1
  6. hud/adapters/operator/adapter.py +4 -0
  7. hud/adapters/operator/tests/__init__.py +1 -0
  8. hud/adapters/operator/tests/test_adapter.py +370 -0
  9. hud/agent/__init__.py +4 -0
  10. hud/agent/base.py +18 -2
  11. hud/agent/claude.py +20 -17
  12. hud/agent/claude_plays_pokemon.py +282 -0
  13. hud/agent/langchain.py +12 -7
  14. hud/agent/misc/__init__.py +3 -0
  15. hud/agent/misc/response_agent.py +80 -0
  16. hud/agent/operator.py +27 -19
  17. hud/agent/tests/__init__.py +1 -0
  18. hud/agent/tests/test_base.py +202 -0
  19. hud/env/docker_client.py +28 -18
  20. hud/env/environment.py +32 -16
  21. hud/env/local_docker_client.py +83 -42
  22. hud/env/remote_client.py +1 -3
  23. hud/env/remote_docker_client.py +72 -15
  24. hud/exceptions.py +12 -0
  25. hud/gym.py +71 -53
  26. hud/job.py +52 -7
  27. hud/settings.py +6 -0
  28. hud/task.py +45 -33
  29. hud/taskset.py +44 -4
  30. hud/telemetry/__init__.py +21 -0
  31. hud/telemetry/_trace.py +173 -0
  32. hud/telemetry/context.py +193 -0
  33. hud/telemetry/exporter.py +417 -0
  34. hud/telemetry/instrumentation/__init__.py +3 -0
  35. hud/telemetry/instrumentation/mcp.py +498 -0
  36. hud/telemetry/instrumentation/registry.py +59 -0
  37. hud/telemetry/mcp_models.py +331 -0
  38. hud/telemetry/tests/__init__.py +1 -0
  39. hud/telemetry/tests/test_context.py +203 -0
  40. hud/telemetry/tests/test_trace.py +270 -0
  41. hud/types.py +10 -26
  42. hud/utils/common.py +22 -2
  43. hud/utils/misc.py +53 -0
  44. hud/utils/tests/test_version.py +1 -1
  45. hud/version.py +7 -0
  46. {hud_python-0.2.4.dist-info → hud_python-0.2.5.dist-info}/METADATA +90 -22
  47. hud_python-0.2.5.dist-info/RECORD +84 -0
  48. hud_python-0.2.4.dist-info/RECORD +0 -62
  49. {hud_python-0.2.4.dist-info → hud_python-0.2.5.dist-info}/WHEEL +0 -0
  50. {hud_python-0.2.4.dist-info → hud_python-0.2.5.dist-info}/licenses/LICENSE +0 -0
@@ -0,0 +1,370 @@
1
+ from __future__ import annotations
2
+
3
+ import pytest
4
+
5
+ from hud.adapters.common.types import (
6
+ ClickAction,
7
+ DragAction,
8
+ MoveAction,
9
+ PressAction,
10
+ ResponseAction,
11
+ ScreenshotFetch,
12
+ ScrollAction,
13
+ TypeAction,
14
+ WaitAction,
15
+ )
16
+ from hud.adapters.operator import OperatorAdapter
17
+
18
+
19
+ class TestOperatorAdapter:
20
+ """Test the OperatorAdapter class."""
21
+
22
+ @pytest.fixture
23
+ def adapter(self):
24
+ """Fixture providing a clean adapter instance."""
25
+ return OperatorAdapter()
26
+
27
+ def test_init(self, adapter):
28
+ """Test adapter initialization."""
29
+ assert adapter.agent_width == 1024
30
+ assert adapter.agent_height == 768
31
+ assert adapter.env_width == 1920 # Inherited from parent
32
+ assert adapter.env_height == 1080 # Inherited from parent
33
+
34
+ def test_key_map_constants(self, adapter):
35
+ """Test KEY_MAP constants."""
36
+ assert adapter.KEY_MAP["return"] == "enter"
37
+ assert adapter.KEY_MAP["arrowup"] == "up"
38
+ assert adapter.KEY_MAP["arrowdown"] == "down"
39
+ assert adapter.KEY_MAP["arrowleft"] == "left"
40
+ assert adapter.KEY_MAP["arrowright"] == "right"
41
+
42
+ def test_button_map_constants(self, adapter):
43
+ """Test BUTTON_MAP constants."""
44
+ assert adapter.BUTTON_MAP["wheel"] == "middle"
45
+
46
+ def test_map_key_mapped(self, adapter):
47
+ """Test _map_key with mapped keys."""
48
+ assert adapter._map_key("return") == "enter"
49
+ assert adapter._map_key("RETURN") == "enter" # Test case insensitive
50
+ assert adapter._map_key("arrowup") == "up"
51
+ assert adapter._map_key("ArrowDown") == "down"
52
+
53
+ def test_map_key_unmapped(self, adapter):
54
+ """Test _map_key with unmapped keys."""
55
+ assert adapter._map_key("space") == "space"
56
+ assert adapter._map_key("CTRL") == "ctrl"
57
+ assert adapter._map_key("Unknown") == "unknown"
58
+
59
+
60
+ class TestOperatorAdapterConvert:
61
+ """Test the convert method of OperatorAdapter."""
62
+
63
+ @pytest.fixture
64
+ def adapter(self):
65
+ """Fixture providing a clean adapter instance."""
66
+ return OperatorAdapter()
67
+
68
+ def test_convert_click_action(self, adapter):
69
+ """Test converting click action."""
70
+ data = {"type": "click", "x": 100, "y": 200, "button": "left"}
71
+ result = adapter.convert(data)
72
+
73
+ assert isinstance(result, ClickAction)
74
+ assert result.point is not None
75
+ assert result.point.x == 100
76
+ assert result.point.y == 200
77
+ assert result.button == "left"
78
+
79
+ def test_convert_click_action_default_values(self, adapter):
80
+ """Test converting click action with default values."""
81
+ data = {"type": "click"}
82
+ result = adapter.convert(data)
83
+
84
+ assert isinstance(result, ClickAction)
85
+ assert result.point is not None
86
+ assert result.point.x == 0
87
+ assert result.point.y == 0
88
+ assert result.button == "left"
89
+
90
+ def test_convert_click_action_mapped_button(self, adapter):
91
+ """Test converting click action with mapped button."""
92
+ data = {"type": "click", "x": 100, "y": 200, "button": "wheel"}
93
+ result = adapter.convert(data)
94
+
95
+ assert isinstance(result, ClickAction)
96
+ assert result.button == "middle"
97
+
98
+ def test_convert_double_click_action(self, adapter):
99
+ """Test converting double click action."""
100
+ data = {"type": "double_click", "x": 150, "y": 250}
101
+ result = adapter.convert(data)
102
+
103
+ assert isinstance(result, ClickAction)
104
+ assert result.point is not None
105
+ assert result.point.x == 150
106
+ assert result.point.y == 250
107
+ assert result.button == "left"
108
+ assert result.pattern == [100] # Double click pattern
109
+
110
+ def test_convert_scroll_action(self, adapter):
111
+ """Test converting scroll action."""
112
+ data = {"type": "scroll", "x": 300, "y": 400, "scroll_x": 10, "scroll_y": -20}
113
+ result = adapter.convert(data)
114
+
115
+ assert isinstance(result, ScrollAction)
116
+ assert result.point is not None
117
+ assert result.scroll is not None
118
+ assert result.point.x == 300
119
+ assert result.point.y == 400
120
+ assert result.scroll.x == 10
121
+ assert result.scroll.y == -20
122
+
123
+ def test_convert_scroll_action_default_values(self, adapter):
124
+ """Test converting scroll action with default values."""
125
+ data = {"type": "scroll"}
126
+ result = adapter.convert(data)
127
+
128
+ assert isinstance(result, ScrollAction)
129
+ assert result.point is not None
130
+ assert result.scroll is not None
131
+ assert result.point.x == 0
132
+ assert result.point.y == 0
133
+ assert result.scroll.x == 0
134
+ assert result.scroll.y == 0
135
+
136
+ def test_convert_type_action(self, adapter):
137
+ """Test converting type action."""
138
+ data = {"type": "type", "text": "Hello, World!"}
139
+ result = adapter.convert(data)
140
+
141
+ assert isinstance(result, TypeAction)
142
+ assert result.text == "Hello, World!"
143
+ assert result.enter_after is False
144
+
145
+ def test_convert_type_action_default_text(self, adapter):
146
+ """Test converting type action with default text."""
147
+ data = {"type": "type"}
148
+ result = adapter.convert(data)
149
+
150
+ assert isinstance(result, TypeAction)
151
+ assert result.text == ""
152
+ assert result.enter_after is False
153
+
154
+ def test_convert_wait_action(self, adapter):
155
+ """Test converting wait action."""
156
+ data = {"type": "wait", "ms": 2000}
157
+ result = adapter.convert(data)
158
+
159
+ assert isinstance(result, WaitAction)
160
+ assert result.time == 2000
161
+
162
+ def test_convert_wait_action_default_time(self, adapter):
163
+ """Test converting wait action with default time."""
164
+ data = {"type": "wait"}
165
+ result = adapter.convert(data)
166
+
167
+ assert isinstance(result, WaitAction)
168
+ assert result.time == 1000
169
+
170
+ def test_convert_move_action(self, adapter):
171
+ """Test converting move action."""
172
+ data = {"type": "move", "x": 500, "y": 600}
173
+ result = adapter.convert(data)
174
+
175
+ assert isinstance(result, MoveAction)
176
+ assert result.point is not None
177
+ assert result.point.x == 500
178
+ assert result.point.y == 600
179
+
180
+ def test_convert_move_action_default_values(self, adapter):
181
+ """Test converting move action with default values."""
182
+ data = {"type": "move"}
183
+ result = adapter.convert(data)
184
+
185
+ assert isinstance(result, MoveAction)
186
+ assert result.point is not None
187
+ assert result.point.x == 0
188
+ assert result.point.y == 0
189
+
190
+ def test_convert_keypress_action(self, adapter):
191
+ """Test converting keypress action."""
192
+ data = {"type": "keypress", "keys": ["ctrl", "c"]}
193
+ result = adapter.convert(data)
194
+
195
+ assert isinstance(result, PressAction)
196
+ assert result.keys == ["ctrl", "c"]
197
+
198
+ def test_convert_keypress_action_mapped_keys(self, adapter):
199
+ """Test converting keypress action with mapped keys."""
200
+ data = {"type": "keypress", "keys": ["return", "arrowup"]}
201
+ result = adapter.convert(data)
202
+
203
+ assert isinstance(result, PressAction)
204
+ assert result.keys == ["enter", "up"]
205
+
206
+ def test_convert_keypress_action_default_keys(self, adapter):
207
+ """Test converting keypress action with default keys."""
208
+ data = {"type": "keypress"}
209
+ result = adapter.convert(data)
210
+
211
+ assert isinstance(result, PressAction)
212
+ assert result.keys == []
213
+
214
+ def test_convert_drag_action(self, adapter):
215
+ """Test converting drag action."""
216
+ data = {
217
+ "type": "drag",
218
+ "path": [{"x": 100, "y": 200}, {"x": 150, "y": 250}, {"x": 200, "y": 300}],
219
+ }
220
+ result = adapter.convert(data)
221
+
222
+ assert isinstance(result, DragAction)
223
+ assert len(result.path) == 3
224
+ assert result.path[0].x == 100
225
+ assert result.path[0].y == 200
226
+ assert result.path[1].x == 150
227
+ assert result.path[1].y == 250
228
+ assert result.path[2].x == 200
229
+ assert result.path[2].y == 300
230
+
231
+ def test_convert_drag_action_default_path(self, adapter):
232
+ """Test converting drag action with default path."""
233
+ data = {"type": "drag"}
234
+ result = adapter.convert(data)
235
+
236
+ assert isinstance(result, DragAction)
237
+ assert result.path == []
238
+
239
+ def test_convert_drag_action_path_with_missing_coords(self, adapter):
240
+ """Test converting drag action with missing coordinates."""
241
+ data = {
242
+ "type": "drag",
243
+ "path": [
244
+ {"x": 100}, # Missing y
245
+ {"y": 200}, # Missing x
246
+ {}, # Missing both
247
+ ],
248
+ }
249
+ result = adapter.convert(data)
250
+
251
+ assert isinstance(result, DragAction)
252
+ assert len(result.path) == 3
253
+ assert result.path[0].x == 100
254
+ assert result.path[0].y == 0 # Default value
255
+ assert result.path[1].x == 0 # Default value
256
+ assert result.path[1].y == 200
257
+ assert result.path[2].x == 0 # Default value
258
+ assert result.path[2].y == 0 # Default value
259
+
260
+ def test_convert_screenshot_action(self, adapter):
261
+ """Test converting screenshot action."""
262
+ data = {"type": "screenshot"}
263
+ result = adapter.convert(data)
264
+
265
+ assert isinstance(result, ScreenshotFetch)
266
+
267
+ def test_convert_response_action(self, adapter):
268
+ """Test converting response action."""
269
+ data = {"type": "response", "text": "Task completed successfully"}
270
+ result = adapter.convert(data)
271
+
272
+ assert isinstance(result, ResponseAction)
273
+ assert result.text == "Task completed successfully"
274
+
275
+ def test_convert_response_action_default_text(self, adapter):
276
+ """Test converting response action with default text."""
277
+ data = {"type": "response"}
278
+ result = adapter.convert(data)
279
+
280
+ assert isinstance(result, ResponseAction)
281
+ assert result.text == ""
282
+
283
+ def test_convert_unsupported_action_type(self, adapter):
284
+ """Test converting unsupported action type."""
285
+ data = {"type": "unsupported_action"}
286
+
287
+ with pytest.raises(ValueError) as exc_info:
288
+ adapter.convert(data)
289
+
290
+ assert "Unsupported action type: unsupported_action" in str(exc_info.value)
291
+
292
+ def test_convert_invalid_data_structure(self, adapter):
293
+ """Test converting invalid data structure."""
294
+ # Test with non-dict data
295
+ with pytest.raises(ValueError) as exc_info:
296
+ adapter.convert("invalid_data")
297
+
298
+ assert "Invalid action" in str(exc_info.value)
299
+
300
+ def test_convert_missing_type_field(self, adapter):
301
+ """Test converting data without type field."""
302
+ data = {"x": 100, "y": 200} # Missing type
303
+
304
+ with pytest.raises(ValueError) as exc_info:
305
+ adapter.convert(data)
306
+
307
+ assert "Unsupported action type: None" in str(exc_info.value)
308
+
309
+ def test_convert_none_data(self, adapter):
310
+ """Test converting None data."""
311
+ with pytest.raises(ValueError) as exc_info:
312
+ adapter.convert(None)
313
+
314
+ assert "Invalid action" in str(exc_info.value)
315
+
316
+
317
+ class TestOperatorAdapterIntegration:
318
+ """Integration tests for OperatorAdapter."""
319
+
320
+ @pytest.fixture
321
+ def adapter(self):
322
+ """Fixture providing a clean adapter instance."""
323
+ return OperatorAdapter()
324
+
325
+ def test_full_click_pipeline(self, adapter):
326
+ """Test full click action processing pipeline."""
327
+ # Set adapter dimensions to avoid scaling
328
+ adapter.agent_width = 1920
329
+ adapter.agent_height = 1080
330
+ adapter.env_width = 1920
331
+ adapter.env_height = 1080
332
+
333
+ # Test the full adapt method
334
+ raw_action = {"type": "click", "x": 100, "y": 200, "button": "right"}
335
+
336
+ result = adapter.adapt(raw_action)
337
+
338
+ assert isinstance(result, ClickAction)
339
+ assert result.point is not None
340
+ assert result.point.x == 100
341
+ assert result.point.y == 200
342
+ assert result.button == "right"
343
+
344
+ # Check that it was added to memory
345
+ assert len(adapter.memory) == 1
346
+ assert adapter.memory[0] == result
347
+
348
+ def test_multiple_actions_processing(self, adapter):
349
+ """Test processing multiple actions."""
350
+ # Set adapter dimensions to avoid scaling
351
+ adapter.agent_width = 1920
352
+ adapter.agent_height = 1080
353
+ adapter.env_width = 1920
354
+ adapter.env_height = 1080
355
+
356
+ actions = [
357
+ {"type": "click", "x": 100, "y": 200},
358
+ {"type": "type", "text": "hello"},
359
+ {"type": "keypress", "keys": ["return"]},
360
+ ]
361
+
362
+ results = adapter.adapt_list(actions)
363
+
364
+ assert len(results) == 3
365
+ assert isinstance(results[0], ClickAction)
366
+ assert isinstance(results[1], TypeAction)
367
+ assert isinstance(results[2], PressAction)
368
+
369
+ # Check memory
370
+ assert len(adapter.memory) == 3
hud/agent/__init__.py CHANGED
@@ -1,7 +1,9 @@
1
1
  from .base import Agent
2
2
  from .claude import ClaudeAgent
3
+ from .claude_plays_pokemon import ClaudePlaysPokemon
3
4
  from .operator import OperatorAgent
4
5
  from .langchain import LangchainAgent
6
+ from .misc import ResponseAgent
5
7
 
6
8
  from hud.adapters import OperatorAdapter, ClaudeAdapter
7
9
 
@@ -12,4 +14,6 @@ __all__ = [
12
14
  "OperatorAdapter",
13
15
  "ClaudeAdapter",
14
16
  "LangchainAgent",
17
+ "ClaudePlaysPokemon",
18
+ "ResponseAgent",
15
19
  ]
hud/agent/base.py CHANGED
@@ -2,7 +2,11 @@ from abc import ABC, abstractmethod
2
2
  from typing import Sequence, TypeVar, Generic
3
3
 
4
4
  from hud.adapters import Adapter, CLA
5
+ from hud.types import Gym
5
6
  from hud.utils.common import Observation
7
+ import logging
8
+
9
+ logger = logging.getLogger(__name__)
6
10
 
7
11
  # Generic type for different client types (Anthropic, OpenAI, etc.)
8
12
  ClientT = TypeVar("ClientT")
@@ -21,7 +25,13 @@ class Agent(Generic[ClientT, ActionT], ABC):
21
25
  Subclasses only need to implement the fetch_response method.
22
26
  """
23
27
 
24
- def __init__(self, client: ClientT | None = None, adapter: Adapter | None = None):
28
+ transfer_gyms: dict[Gym, Gym] = {}
29
+
30
+ def __init__(
31
+ self,
32
+ client: ClientT | None = None,
33
+ adapter: Adapter | None = None,
34
+ ):
25
35
  """
26
36
  Initialize the agent.
27
37
 
@@ -81,7 +91,9 @@ class Agent(Generic[ClientT, ActionT], ABC):
81
91
 
82
92
  return self.adapter.adapt_list(actions)
83
93
 
84
- async def predict(self, observation: Observation) -> tuple[list[CLA] | list[ActionT], bool]:
94
+ async def predict(
95
+ self, observation: Observation, verbose: bool = False
96
+ ) -> tuple[list[CLA] | list[ActionT], bool]:
85
97
  """
86
98
  Predict the next action based on the observation.
87
99
 
@@ -94,11 +106,15 @@ class Agent(Generic[ClientT, ActionT], ABC):
94
106
  tuple[list[CLA] | list[ActionT], bool]: A tuple containing the list of actions and a boolean
95
107
  indicating if the agent believes it has completed the task
96
108
  """
109
+ if verbose:
110
+ logger.info("Predicting action...")
97
111
  # Stage 1: Preprocess the observation
98
112
  processed_obs = self.preprocess(observation)
99
113
 
100
114
  # Stage 2: Fetch response from the model
101
115
  actions, done = await self.fetch_response(processed_obs)
116
+ if verbose:
117
+ logger.info("Raw action: %s", actions)
102
118
 
103
119
  # Stage 3: Postprocess the actions if we have an adapter
104
120
  if self.adapter and actions:
hud/agent/claude.py CHANGED
@@ -13,6 +13,7 @@ from anthropic.types.beta import (
13
13
  from hud.adapters import Adapter
14
14
  from hud.agent.base import Agent
15
15
  from hud.adapters.claude import ClaudeAdapter
16
+ from hud.types import Gym
16
17
  from hud.utils.common import Observation
17
18
  from hud.settings import settings
18
19
 
@@ -53,6 +54,8 @@ class ClaudeAgent(Agent[AsyncAnthropic, Any]):
53
54
  through the ClaudeAdapter which converts actions to the format expected by HUD.
54
55
  """
55
56
 
57
+ transfer_gyms: dict[Gym, Gym] = {"qa": "hud-browser"}
58
+
56
59
  def __init__(
57
60
  self,
58
61
  client: AsyncAnthropic | None = None,
@@ -123,20 +126,20 @@ class ClaudeAgent(Agent[AsyncAnthropic, Any]):
123
126
 
124
127
  # Add text instruction if present
125
128
  if observation.text:
126
- logger.info("Adding text to user content: %s", observation.text)
129
+ # logger.info("Adding text to user content: %s", observation.text)
127
130
  user_content.append(text_to_content_block(str(observation.text)))
128
131
 
129
132
  # Add screenshot if present
130
133
  if observation.screenshot:
131
- logger.info("Adding screenshot to user content")
134
+ # logger.info("Adding screenshot to user content")
132
135
  if not self.pending_computer_use_tool_id:
133
- logger.info("Adding screenshot to user content, no tool id")
136
+ # logger.info("Adding screenshot to user content, no tool id")
134
137
  user_content.append(base64_to_content_block(observation.screenshot))
135
138
  else:
136
- logger.info(
137
- "Adding screenshot to user content, tool id: %s",
138
- self.pending_computer_use_tool_id,
139
- )
139
+ # logger.info(
140
+ # "Adding screenshot to user content, tool id: %s",
141
+ # self.pending_computer_use_tool_id,
142
+ # )
140
143
  user_content.append(
141
144
  tool_use_content_block(
142
145
  self.pending_computer_use_tool_id,
@@ -183,9 +186,9 @@ class ClaudeAgent(Agent[AsyncAnthropic, Any]):
183
186
  done = True # Assume we're done unless we find a tool use
184
187
 
185
188
  for block in response_content:
186
- logger.info("Processing block: %s", block)
189
+ # logger.info("Processing block: %s", block)
187
190
  if block.type == "tool_use":
188
- logger.info("Processing tool use: %s", block)
191
+ # logger.info("Processing tool use: %s", block)
189
192
  assert block.name == "computer"
190
193
 
191
194
  # Store the raw action
@@ -197,20 +200,20 @@ class ClaudeAgent(Agent[AsyncAnthropic, Any]):
197
200
  break
198
201
 
199
202
  # If no tool use action was found, check for a final text response
200
- if not actions and done:
203
+ if len(actions) == 0 and done:
201
204
  final_text_response = ""
202
205
  for block in response_content:
203
206
  if block.type == "text":
204
207
  final_text_response += block.text
205
208
 
206
209
  if final_text_response.strip():
207
- logger.info(
208
- f"No tool use found. Using final text as response: {final_text_response}"
209
- )
210
+ # logger.info(
211
+ # f"No tool use found. Using final text as response: {final_text_response}"
212
+ # )
210
213
  actions = [{"action": "response", "text": final_text_response.strip()}]
211
- # Keep done = True
212
- else:
213
- logger.info("No tool use and no final text block found.")
214
- # Keep done = True, actions remains empty
214
+ done = True
215
+ # else:
216
+ # logger.info("No tool use and no final text block found.")
217
+ # Keep done = True, actions remains empty
215
218
 
216
219
  return actions, done