hud-python 0.2.8__py3-none-any.whl → 0.2.9__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of hud-python might be problematic. Click here for more details.

@@ -217,9 +217,10 @@ class PositionFetch(CLAAction):
217
217
  class CustomAction(CLAAction):
218
218
  type: Literal["custom"] = "custom"
219
219
  action: str
220
+ args: dict[str, Any] | None = None
220
221
 
221
222
  def __str__(self) -> str:
222
- action_str = f"⚙️ Custom: {self.action}"
223
+ action_str = f"⚙️ Custom: {self.action} {self.args}"
223
224
  action_str += _format_logs_for_display(self.logs, self.reasoning)
224
225
  return action_str
225
226
 
@@ -7,6 +7,7 @@ from hud.adapters.common.types import (
7
7
  CLAButton,
8
8
  CLAKey,
9
9
  ClickAction,
10
+ CustomAction,
10
11
  DragAction,
11
12
  MoveAction,
12
13
  Point,
@@ -27,6 +28,7 @@ class OperatorAdapter(Adapter):
27
28
  "arrowleft": "left",
28
29
  "arrowright": "right",
29
30
  "cmd": "ctrl",
31
+ "super": "win",
30
32
  }
31
33
 
32
34
  BUTTON_MAP: ClassVar[dict[str, CLAButton]] = {"wheel": "middle"}
@@ -92,6 +94,8 @@ class OperatorAdapter(Adapter):
92
94
 
93
95
  elif action_type == "response":
94
96
  converted_action = ResponseAction(text=data.get("text", ""))
97
+ elif action_type == "custom":
98
+ converted_action = CustomAction(action=data.get("action", ""))
95
99
  else:
96
100
  raise ValueError(f"Unsupported action type: {action_type}")
97
101
 
hud/agent/operator.py CHANGED
@@ -92,6 +92,19 @@ class OperatorAgent(Agent[AsyncOpenAI, dict[str, Any]]):
92
92
  self.initial_prompt = None
93
93
  self.pending_safety_checks = []
94
94
 
95
+ self.base_system_prompt = """
96
+ You are an autonomous computer-using agent. Follow these guidelines:
97
+
98
+ 1. Be decisive and complete tasks without asking for confirmation unless absolutely necessary.
99
+ 2. If you need user confirmation for safety-critical actions, use the formal safety check mechanism.
100
+ 3. Do NOT ask questions like "Should I proceed?" or "Would you like me to continue?" - just proceed with the task.
101
+ 4. When you find what you're looking for (e.g., a file to upload), proceed with the action directly.
102
+ 5. Only stop when the task is fully complete or if you encounter an error that prevents completion.
103
+ 6. Trust that the user wants you to complete the entire task they've requested.
104
+
105
+ Remember: You wave been given permission to complete the requested task autonomously.
106
+ """
107
+
95
108
  self.task_run_id = None
96
109
 
97
110
  async def fetch_response(self, observation: Observation) -> tuple[list[dict[str, Any]], bool]:
@@ -145,6 +158,7 @@ class OperatorAgent(Agent[AsyncOpenAI, dict[str, Any]]):
145
158
  model=self.model,
146
159
  tools=[computer_tool],
147
160
  input=input_param,
161
+ instructions=self.base_system_prompt,
148
162
  truncation="auto",
149
163
  reasoning={"summary": "auto"},
150
164
  )
@@ -225,9 +239,14 @@ class OperatorAgent(Agent[AsyncOpenAI, dict[str, Any]]):
225
239
 
226
240
  # If we found final text, package it as a 'response' action
227
241
  if final_text_response:
228
- # No ResponseAgent logic here anymore - just return the response
229
- actions = [{"type": "response", "text": final_text_response}]
230
- done = True
242
+ if (
243
+ "the task is infeasible" in final_text_response.lower()
244
+ ): # Custom action for OSWorld
245
+ done = True
246
+ actions = [{"type": "custom", "action": "FAIL"}]
247
+ else:
248
+ actions = [{"type": "response", "text": final_text_response}]
249
+ done = True
231
250
  else:
232
251
  logger.info("No computer calls and no final text message found.")
233
252
  # Keep done = True, actions remains empty
hud/job.py CHANGED
@@ -274,11 +274,11 @@ async def _maybe_resample_action(
274
274
  try:
275
275
  decision = await response_agent.determine_response(response_text)
276
276
  if decision == "CONTINUE":
277
- logger.info("ResponseAgent indicated CONTINUE. Retrying...")
278
- obs.text = "Please continue."
277
+ logger.info("ResponseAgent indicated CONTINUE. for message: %s", response_text)
278
+ obs.text = "Yes, please continue."
279
279
  return obs, False
280
- elif decision == "CONTINUE":
281
- logger.warning("Max continue retries reached. Stopping despite CONTINUE.")
280
+ else:
281
+ logger.warning("ResponseAgent indicated STOP for message: %s", response_text)
282
282
  except Exception as e:
283
283
  logger.warning("Error using ResponseAgent: %s", e)
284
284
  return obs, True
@@ -5,4 +5,4 @@ def test_import():
5
5
  """Test that the package can be imported."""
6
6
  import hud
7
7
 
8
- assert hud.__version__ == "0.2.8"
8
+ assert hud.__version__ == "0.2.9"
hud/version.py CHANGED
@@ -4,4 +4,4 @@ Version information for the HUD SDK.
4
4
 
5
5
  from __future__ import annotations
6
6
 
7
- __version__ = "0.2.8"
7
+ __version__ = "0.2.9"
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: hud-python
3
- Version: 0.2.8
3
+ Version: 0.2.9
4
4
  Summary: SDK for the HUD evaluation platform.
5
5
  Project-URL: Homepage, https://github.com/hud-evals/hud-sdk
6
6
  Project-URL: Bug Tracker, https://github.com/hud-evals/hud-sdk/issues
@@ -1,13 +1,13 @@
1
1
  hud/__init__.py,sha256=kjjq-l2msg9HcfYQ4sL8c0-StQIlsl2qLwh8Tx0nKro,1210
2
2
  hud/exceptions.py,sha256=pifKvSqxj9_g4NfARVyH5a-lTThhi9XW06tIXaBakQw,5526
3
3
  hud/gym.py,sha256=JNWlO2GXev0xIjoTI9HMEbcQgGpzc6fku7-RYoYAxHI,4996
4
- hud/job.py,sha256=_OKcdeWdoT7f3wiR7fZFjTVJs0OZCfMrxEG_cXYR6v4,26965
4
+ hud/job.py,sha256=0vWbr3E5bYstVRzXS_6l-57JGUFcrZpmFrNkOSQ8Aa0,26969
5
5
  hud/settings.py,sha256=rx2zc3abJmf9ztwMHRYf9rGqgGprdRPCRhvJstsgyzc,1674
6
6
  hud/task.py,sha256=vDcjKUo8la0AUTP7mwMc2nYwe0tkbnrWwM9-Kvf3Ugg,8773
7
7
  hud/taskset.py,sha256=9IRwHeAdsk_IEibayM-hElE3gTp0mgmi-huN67h9-tc,7019
8
8
  hud/trajectory.py,sha256=ctAwrGIkdULr4xI6G-1Dp2fhDol4o_PmnPcqTzAEIUc,3797
9
9
  hud/types.py,sha256=xqrBb4rPKVkoLVwnyGk4PUrVKayCjKcUD_--n4OrxIM,2954
10
- hud/version.py,sha256=orhJ7MoupCr28yXngRez5EQfHbk4n6aHC0VjxM1WoGw,104
10
+ hud/version.py,sha256=b76M28Br1tS7hHak1DIKBa5REyYNMWNZAmwUbRwGyuw,104
11
11
  hud/adapters/__init__.py,sha256=zz24KdC_e9TJPgWo6y57_8SzevEE5ak4Cm6tXzMxwRk,266
12
12
  hud/adapters/claude/__init__.py,sha256=i7QEF-29FLb9qxp1eYtXs-adIk_tG54tL-9g6d3xodk,100
13
13
  hud/adapters/claude/adapter.py,sha256=vCpotJ5gzQs4PP2iCXVavIcyG8c_4m1P6fuXStwUxSo,6675
@@ -15,11 +15,11 @@ hud/adapters/claude/tests/__init__.py,sha256=9GZj0rz4tTkiPnLfxTmyBPr-s8UZc3gph6W
15
15
  hud/adapters/claude/tests/test_adapter.py,sha256=cAdHEoqLngLiV7QwlWJ0KuNgb1vNv9WZTPQMnxhMDKI,18319
16
16
  hud/adapters/common/__init__.py,sha256=BjdZWJVs_AKtpFrt-tNsdQRjnz7D97DFEQirJ-r0mp8,118
17
17
  hud/adapters/common/adapter.py,sha256=GETzlsEl-uYkL-U4cQHBnfLAvm1dbXec4fKC2ypR1L0,5821
18
- hud/adapters/common/types.py,sha256=DpBu30o32tFEcTdMF8j-IKLN9cNRA9Luko8FKIB8K20,9904
18
+ hud/adapters/common/types.py,sha256=6frue7_gZlSYtOHhF2tFHqzjltzzHsTVs6-H-jQwZ4Y,9955
19
19
  hud/adapters/common/tests/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
20
20
  hud/adapters/common/tests/test_adapter.py,sha256=rTD36LjvytHqMIyOLDyrn0RLIkd20s6f6dwoBEarJaw,8744
21
21
  hud/adapters/operator/__init__.py,sha256=31vTRs268_TOLd-TeQRKau5bDYy78wxCNpJFhD5_l8U,104
22
- hud/adapters/operator/adapter.py,sha256=heATqYKrTZy4PIM22CfkhgIOPxugDpuF66wOQjZaaxE,3569
22
+ hud/adapters/operator/adapter.py,sha256=Uz4Sr73T57B7v4RRP0uaibHI17N2hBx6Z9YYjgJCUXA,3732
23
23
  hud/adapters/operator/tests/__init__.py,sha256=yTsDVusVXZBQL6DnXpLgKQCBRuOYUAVQ8Blk_k5GETk,41
24
24
  hud/adapters/operator/tests/test_adapter.py,sha256=4RAXwyxAtkh-1Mlt1zJayRkcv3LWaPNEhDVTpwOZd4A,12942
25
25
  hud/agent/__init__.py,sha256=_OxMG3UW1vXSuixdpo09b1jexfWcUbfK44zto8t6_LE,453
@@ -27,7 +27,7 @@ hud/agent/base.py,sha256=hC3mVUMAWo5HHF2b576ScA9UQzsAzcCfPU9S8mDWthA,4080
27
27
  hud/agent/claude.py,sha256=FBSKCxICO6XXYCuIrerVL89bVJ-5JxrZJBDeZgzAdJI,9886
28
28
  hud/agent/claude_plays_pokemon.py,sha256=4TPibnTFhTb24ISRKAU3pA4waIcISTfZLOdfBMIMqxE,10085
29
29
  hud/agent/langchain.py,sha256=H55JNHcGkdl-LVzZEqOFRkuuFEO0D8MI1jCNz9deoko,9012
30
- hud/agent/operator.py,sha256=DDU2YOmNu00apt3W-k45Ybcl4lQ4vAw-v8NXv055Ut8,9387
30
+ hud/agent/operator.py,sha256=kntMOsdL5tzaGVSnzbGvFD2PMLzW2DEB2wEqN_LArQw,10500
31
31
  hud/agent/misc/__init__.py,sha256=-ftYH1T5r7fXKKra6d8jXYmUz9KOTmYwBrPJU-V3S7g,71
32
32
  hud/agent/misc/response_agent.py,sha256=3PPsZqNAyUo2ouSV0ylGQj9fJqojfSB2roq2DadUdG0,3048
33
33
  hud/agent/tests/__init__.py,sha256=HbAW7FvSvzzKPU5LpveZceU8XTcDkRe1Bmte3OGi2f0,29
@@ -77,9 +77,9 @@ hud/utils/tests/test_common.py,sha256=KqDSMf7gWf1oYCiQ_BXsnvW1wUmyzbOzAT-HNoF7tx
77
77
  hud/utils/tests/test_config.py,sha256=dPlXYWuMrxX-NOYbf0vdJ27TJpfacKG8eiKOSGOcfDU,4079
78
78
  hud/utils/tests/test_progress.py,sha256=QunwDgi_heQXhDgmC25zgjr-sFUu5FdJ_1aYigMKeIc,6351
79
79
  hud/utils/tests/test_telemetry.py,sha256=t0An1RTBaE0dZVEpF4uwuq5k1R-PXFR5k4u71h60tx8,1224
80
- hud/utils/tests/test_version.py,sha256=koRqYkFbU5-8GDSXbxX3bghwIBRNoDmI4Mp1e0SFJmI,159
80
+ hud/utils/tests/test_version.py,sha256=DJbzDv9gYYRyWl_73yv8kF7dIXz2xXkCn1QJ6eMO3Yk,159
81
81
  hud/py.typed,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
82
- hud_python-0.2.8.dist-info/METADATA,sha256=w8uLSDQlGIKEb_ILt9V2p0a2JcyBJQGdLCnqCAuKBB0,9785
83
- hud_python-0.2.8.dist-info/WHEEL,sha256=qtCwoSJWgHk21S1Kb4ihdzI2rlJ1ZKaIurTj_ngOhyQ,87
84
- hud_python-0.2.8.dist-info/licenses/LICENSE,sha256=yIzBheVUf86FC1bztAcr7RYWWNxyd3B-UJQ3uddg1HA,1078
85
- hud_python-0.2.8.dist-info/RECORD,,
82
+ hud_python-0.2.9.dist-info/METADATA,sha256=Sy3WVlfOObkJPzwqmao2RHwfErWgPKn1j1zn92zsclQ,9785
83
+ hud_python-0.2.9.dist-info/WHEEL,sha256=qtCwoSJWgHk21S1Kb4ihdzI2rlJ1ZKaIurTj_ngOhyQ,87
84
+ hud_python-0.2.9.dist-info/licenses/LICENSE,sha256=yIzBheVUf86FC1bztAcr7RYWWNxyd3B-UJQ3uddg1HA,1078
85
+ hud_python-0.2.9.dist-info/RECORD,,