hud-python 0.4.34__py3-none-any.whl → 0.4.35__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of hud-python might be problematic. Click here for more details.
- hud/agents/claude.py +9 -1
- hud/agents/openai.py +9 -1
- hud/cli/build.py +1 -1
- hud/cli/eval.py +1 -1
- hud/datasets/runner.py +1 -1
- hud/utils/tests/test_version.py +1 -1
- hud/utils/tool_shorthand.py +7 -4
- hud/version.py +1 -1
- {hud_python-0.4.34.dist-info → hud_python-0.4.35.dist-info}/METADATA +1 -1
- {hud_python-0.4.34.dist-info → hud_python-0.4.35.dist-info}/RECORD +13 -13
- {hud_python-0.4.34.dist-info → hud_python-0.4.35.dist-info}/WHEEL +0 -0
- {hud_python-0.4.34.dist-info → hud_python-0.4.35.dist-info}/entry_points.txt +0 -0
- {hud_python-0.4.34.dist-info → hud_python-0.4.35.dist-info}/licenses/LICENSE +0 -0
hud/agents/claude.py
CHANGED
|
@@ -6,7 +6,7 @@ import copy
|
|
|
6
6
|
import logging
|
|
7
7
|
from typing import TYPE_CHECKING, Any, ClassVar, cast
|
|
8
8
|
|
|
9
|
-
from anthropic import AsyncAnthropic, BadRequestError
|
|
9
|
+
from anthropic import Anthropic, AsyncAnthropic, BadRequestError
|
|
10
10
|
from anthropic.types.beta import BetaContentBlockParam, BetaImageBlockParam, BetaTextBlockParam
|
|
11
11
|
|
|
12
12
|
import hud
|
|
@@ -54,6 +54,7 @@ class ClaudeAgent(MCPAgent):
|
|
|
54
54
|
model: str = "claude-sonnet-4-20250514",
|
|
55
55
|
max_tokens: int = 4096,
|
|
56
56
|
use_computer_beta: bool = True,
|
|
57
|
+
validate_api_key: bool = True,
|
|
57
58
|
**kwargs: Any,
|
|
58
59
|
) -> None:
|
|
59
60
|
"""
|
|
@@ -75,6 +76,13 @@ class ClaudeAgent(MCPAgent):
|
|
|
75
76
|
raise ValueError("Anthropic API key not found. Set ANTHROPIC_API_KEY.")
|
|
76
77
|
model_client = AsyncAnthropic(api_key=api_key)
|
|
77
78
|
|
|
79
|
+
# validate api key if requested
|
|
80
|
+
if validate_api_key:
|
|
81
|
+
try:
|
|
82
|
+
Anthropic(api_key=model_client.api_key).models.list()
|
|
83
|
+
except Exception as e:
|
|
84
|
+
raise ValueError(f"Anthropic API key is invalid: {e}") from e
|
|
85
|
+
|
|
78
86
|
self.anthropic_client = model_client
|
|
79
87
|
self.model = model
|
|
80
88
|
self.max_tokens = max_tokens
|
hud/agents/openai.py
CHANGED
|
@@ -6,7 +6,7 @@ import logging
|
|
|
6
6
|
from typing import Any, ClassVar, Literal
|
|
7
7
|
|
|
8
8
|
import mcp.types as types
|
|
9
|
-
from openai import AsyncOpenAI
|
|
9
|
+
from openai import AsyncOpenAI, OpenAI
|
|
10
10
|
from openai.types.responses import (
|
|
11
11
|
ResponseComputerToolCall,
|
|
12
12
|
ResponseInputMessageContentListParam,
|
|
@@ -45,6 +45,7 @@ class OperatorAgent(MCPAgent):
|
|
|
45
45
|
model_client: AsyncOpenAI | None = None,
|
|
46
46
|
model: str = "computer-use-preview",
|
|
47
47
|
environment: Literal["windows", "mac", "linux", "browser"] = "linux",
|
|
48
|
+
validate_api_key: bool = True,
|
|
48
49
|
**kwargs: Any,
|
|
49
50
|
) -> None:
|
|
50
51
|
"""
|
|
@@ -76,6 +77,13 @@ class OperatorAgent(MCPAgent):
|
|
|
76
77
|
self.pending_call_id: str | None = None
|
|
77
78
|
self.pending_safety_checks: list[Any] = []
|
|
78
79
|
|
|
80
|
+
# validate api key if requested
|
|
81
|
+
if validate_api_key:
|
|
82
|
+
try:
|
|
83
|
+
OpenAI(api_key=self.openai_client.api_key).models.list()
|
|
84
|
+
except Exception as e:
|
|
85
|
+
raise ValueError(f"OpenAI API key is invalid: {e}") from e
|
|
86
|
+
|
|
79
87
|
self.model_name = "openai-" + self.model
|
|
80
88
|
|
|
81
89
|
# Append OpenAI-specific instructions to the base system prompt
|
hud/cli/build.py
CHANGED
|
@@ -489,7 +489,7 @@ def build_environment(
|
|
|
489
489
|
hud_console.warning("Could not retrieve image ID for lock file")
|
|
490
490
|
|
|
491
491
|
# Remove temp image after we're done
|
|
492
|
-
subprocess.run(["docker", "rmi", temp_tag], capture_output=True) # noqa: S603, S607
|
|
492
|
+
subprocess.run(["docker", "rmi", "-f", temp_tag], capture_output=True) # noqa: S603, S607
|
|
493
493
|
|
|
494
494
|
# Add to local registry
|
|
495
495
|
if image_id:
|
hud/cli/eval.py
CHANGED
|
@@ -295,7 +295,7 @@ async def run_full_dataset(
|
|
|
295
295
|
agent_type: Literal["claude", "openai", "vllm"] = "claude",
|
|
296
296
|
model: str | None = None,
|
|
297
297
|
allowed_tools: list[str] | None = None,
|
|
298
|
-
max_concurrent: int =
|
|
298
|
+
max_concurrent: int = 30,
|
|
299
299
|
max_steps: int = 10,
|
|
300
300
|
parallel: bool = False,
|
|
301
301
|
max_workers: int | None = None,
|
hud/datasets/runner.py
CHANGED
|
@@ -22,7 +22,7 @@ async def run_dataset(
|
|
|
22
22
|
dataset: str | Dataset | list[dict[str, Any]],
|
|
23
23
|
agent_class: type[MCPAgent],
|
|
24
24
|
agent_config: dict[str, Any] | None = None,
|
|
25
|
-
max_concurrent: int =
|
|
25
|
+
max_concurrent: int = 30,
|
|
26
26
|
metadata: dict[str, Any] | None = None,
|
|
27
27
|
max_steps: int = 10,
|
|
28
28
|
split: str = "train",
|
hud/utils/tests/test_version.py
CHANGED
hud/utils/tool_shorthand.py
CHANGED
|
@@ -10,7 +10,8 @@ def _is_call_like(obj: Any) -> bool:
|
|
|
10
10
|
return True
|
|
11
11
|
if len(obj) == 1:
|
|
12
12
|
_, v = next(iter(obj.items()))
|
|
13
|
-
|
|
13
|
+
if isinstance(v, dict):
|
|
14
|
+
return "name" in v or (len(v) == 1 and isinstance(next(iter(v.values())), dict))
|
|
14
15
|
return False
|
|
15
16
|
|
|
16
17
|
|
|
@@ -19,9 +20,9 @@ def _to_call_dict(obj: Any) -> Any:
|
|
|
19
20
|
|
|
20
21
|
Rules:
|
|
21
22
|
- If obj is a dict with {name, arguments}: return {name, arguments: recurse(arguments)}
|
|
22
|
-
- Else if obj is a single-key dict {k: v}: return {name: k, arguments: recurse(v)}
|
|
23
|
+
- Else if obj is a single-key dict {k: v} where v looks call-like: return {name: k, arguments: recurse(v)}
|
|
23
24
|
- Else: return obj unchanged (leaf arguments/value)
|
|
24
|
-
"""
|
|
25
|
+
""" # noqa: E501
|
|
25
26
|
if isinstance(obj, dict):
|
|
26
27
|
if "name" in obj and "arguments" in obj:
|
|
27
28
|
args = obj.get("arguments")
|
|
@@ -31,8 +32,10 @@ def _to_call_dict(obj: Any) -> Any:
|
|
|
31
32
|
return {"name": obj.get("name"), "arguments": args}
|
|
32
33
|
if len(obj) == 1:
|
|
33
34
|
k, v = next(iter(obj.items()))
|
|
34
|
-
if
|
|
35
|
+
# Only convert single-key dicts if the value looks like it could be a call
|
|
36
|
+
if isinstance(v, dict) and _is_call_like(v):
|
|
35
37
|
return {"name": k, "arguments": _to_call_dict(v)}
|
|
38
|
+
# Otherwise, leave it as-is (this is the innermost arguments dict)
|
|
36
39
|
return obj
|
|
37
40
|
return obj
|
|
38
41
|
|
hud/version.py
CHANGED
|
@@ -2,13 +2,13 @@ hud/__init__.py,sha256=JMDFUE1pP0J1Xl_miBdt7ERvoffZmTzSFe8yxz512A8,552
|
|
|
2
2
|
hud/__main__.py,sha256=YR8Dq8OhINOsVfQ55PmRXXg4fEK84Rt_-rMtJ5rvhWo,145
|
|
3
3
|
hud/settings.py,sha256=sMS31iW1m-5VpWk-Blhi5-obLcUA0fwxWE1GgJz-vqU,2708
|
|
4
4
|
hud/types.py,sha256=RtNM2fPU1NAujTmZLOydQIU-ybk3gVRCoJ2TM2hJOlw,10752
|
|
5
|
-
hud/version.py,sha256=
|
|
5
|
+
hud/version.py,sha256=FINeU2_U4IFvIW-XEPRMxtXONropSKKTWBc10NjEGws,105
|
|
6
6
|
hud/agents/__init__.py,sha256=UoIkljWdbq4bM0LD-mSaw6w826EqdEjOk7r6glNYwYQ,286
|
|
7
7
|
hud/agents/base.py,sha256=_u1zR3gXzZ1RlTCUYdMcvgHqdJBC4-AB1lZt0yBx8lg,35406
|
|
8
|
-
hud/agents/claude.py,sha256=
|
|
8
|
+
hud/agents/claude.py,sha256=TGhm5gE2ltINDAdEsDxKuT9iGMQ5G87R6kmabU3KPt8,16101
|
|
9
9
|
hud/agents/grounded_openai.py,sha256=U-FHjB2Nh1_o0gmlxY5F17lWJ3oHsNRIB2a7z-IKB64,11231
|
|
10
10
|
hud/agents/langchain.py,sha256=1EgCy8jfjunsWxlPC5XfvfLS6_XZVrIF1ZjtHcrvhYw,9584
|
|
11
|
-
hud/agents/openai.py,sha256=
|
|
11
|
+
hud/agents/openai.py,sha256=O1xV1h1l-W8lmnmXqTYr5CwnmnaniMqOxAZbl2CTTng,14576
|
|
12
12
|
hud/agents/openai_chat_generic.py,sha256=7n7timn3fvNRnL2xzWyOTeNTchej2r9cAL1mU6YnFdY,11605
|
|
13
13
|
hud/agents/misc/__init__.py,sha256=BYi4Ytp9b_vycpZFXnr5Oyw6ncKLNNGml8Jrb7bWUb4,136
|
|
14
14
|
hud/agents/misc/response_agent.py,sha256=uMuRDkz5QgaMQliNzBRepond5sb7KyqIiKm3LstjVnw,3753
|
|
@@ -21,11 +21,11 @@ hud/agents/tests/test_openai.py,sha256=1S5IZuc3O3moSp70gqVGjc6m-_b49dCfz2fgX5IGv
|
|
|
21
21
|
hud/cli/__init__.py,sha256=xL1l5MfdWubd9AWe-cpW64WFS1SVsTgI8fdNdTZhIvs,40259
|
|
22
22
|
hud/cli/__main__.py,sha256=fDH7XITyuDITwSDIVwRso06aouADO0CzTHKqp5TOwJE,143
|
|
23
23
|
hud/cli/analyze.py,sha256=4u5oYfJMquOjT9PzzRTYVcTZDxDi0ilNP_g532_hpOU,14716
|
|
24
|
-
hud/cli/build.py,sha256=
|
|
24
|
+
hud/cli/build.py,sha256=cCsCgUD-vX7ZL5h14dGadig_PWRdcQKBdj1MV0C9CTk,18485
|
|
25
25
|
hud/cli/clone.py,sha256=AwVDIuhr8mHb1oT2Af2HrD25SiTdwATpE6zd93vzLgA,6099
|
|
26
26
|
hud/cli/debug.py,sha256=jtFW8J5F_3rhq1Hf1_SkJ7aLS3wjnyIs_LsC8k5cnzc,14200
|
|
27
27
|
hud/cli/dev.py,sha256=56vQdH9oe_XGnOcRcFbNIsLEoBnpCl1eANlRFUeddHQ,31734
|
|
28
|
-
hud/cli/eval.py,sha256=
|
|
28
|
+
hud/cli/eval.py,sha256=53Xx2Yv6yJrNqvU242qBb8hs2Twh1RIoizNvYy6dGKY,22694
|
|
29
29
|
hud/cli/get.py,sha256=sksKrdzBGZa7ZuSoQkc0haj-CvOGVSSikoVXeaUd3N4,6274
|
|
30
30
|
hud/cli/init.py,sha256=McZwpxZMXD-It_PXINCUy-SwUaPiQ7jdpSU5-F-caO8,19671
|
|
31
31
|
hud/cli/list_func.py,sha256=EVi2Vc3Lb3glBNJxFx4MPnZknZ4xmuJz1OFg_dc8a_E,7177
|
|
@@ -88,7 +88,7 @@ hud/clients/utils/retry.py,sha256=mMs2T_mAlb8AYhSqMR4AmCw7838gqCC4mdG3zjMAYM4,57
|
|
|
88
88
|
hud/clients/utils/retry_transport.py,sha256=Rsq25eiKKt_pM1bas78QEZvO0illK97X_3opmaS3A3w,6809
|
|
89
89
|
hud/datasets/__init__.py,sha256=-g05iDy76CU4JiRHjKBBhgh3STtiIjmWhUfPqgf5hJE,697
|
|
90
90
|
hud/datasets/parallel.py,sha256=m7_z2QwjaRuM9gJFYyiPIJUwrlTxZSvFMAd9L2IDZEo,25772
|
|
91
|
-
hud/datasets/runner.py,sha256=
|
|
91
|
+
hud/datasets/runner.py,sha256=43Ua1PUQgnb6cdO9YXJM7kxdlmxPeSV4478Azy5HVGU,4687
|
|
92
92
|
hud/datasets/utils.py,sha256=hdZfjWH5l3FVJaWBSHEEpjujAG7DqEam_vHgslL8MLs,4279
|
|
93
93
|
hud/misc/__init__.py,sha256=m_pprQQ-G-Y0Sd0NEiR8MtAMbElnuFZ2OWT8TXrw7c4,43
|
|
94
94
|
hud/misc/claude_plays_pokemon.py,sha256=IthAkjDVr2Q-GNvX-QLJyMzN7-0pHqqJbagGNv2m7yo,10453
|
|
@@ -194,17 +194,17 @@ hud/utils/pretty_errors.py,sha256=WGeL4CTHtlA6KgPuV_JSX5l6H4-xbuTp6Y6tw1bkiFg,24
|
|
|
194
194
|
hud/utils/progress.py,sha256=suikwFM8sdSfkV10nAOEaInDhG4XKgOSvFePg4jSj1A,5927
|
|
195
195
|
hud/utils/tasks.py,sha256=JwFIq0cpPMpMYnICUmx_G4CF6uy9MtiCmmmN7eA6FsA,4682
|
|
196
196
|
hud/utils/telemetry.py,sha256=hrVIx2rUjSGyy9IVxTZ_3Jii83PiHjyFRd5ls2whimM,1863
|
|
197
|
-
hud/utils/tool_shorthand.py,sha256=
|
|
197
|
+
hud/utils/tool_shorthand.py,sha256=_haLgK3yazLR2Y0jlEHUUQjw9uZCxi9yTipAwdOAJ70,2148
|
|
198
198
|
hud/utils/tests/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
199
199
|
hud/utils/tests/test_async_utils.py,sha256=RkdSnYErRV3Jn7dfg6CPlcE1RSUL__2B627oIqAyy1s,5945
|
|
200
200
|
hud/utils/tests/test_init.py,sha256=2QLQSGgyP9wJhOvPCusm_zjJad0qApOZi1BXpxcdHXQ,383
|
|
201
201
|
hud/utils/tests/test_mcp.py,sha256=0pUa16mL-bqbZDXp5NHBnt1gO5o10BOg7zTMHZ1DNPM,4023
|
|
202
202
|
hud/utils/tests/test_progress.py,sha256=QSF7Kpi03Ff_l3mAeqW9qs1nhK50j9vBiSobZq7T4f4,7394
|
|
203
203
|
hud/utils/tests/test_telemetry.py,sha256=5jl7bEx8C8b-FfFUko5pf4UY-mPOR-9HaeL98dGtVHM,2781
|
|
204
|
-
hud/utils/tests/test_version.py,sha256=
|
|
204
|
+
hud/utils/tests/test_version.py,sha256=gVwJvjGLJ5VNZYJPFRHyfTPWah6I0M4JS0sYTWLoGM4,160
|
|
205
205
|
hud/py.typed,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
206
|
-
hud_python-0.4.
|
|
207
|
-
hud_python-0.4.
|
|
208
|
-
hud_python-0.4.
|
|
209
|
-
hud_python-0.4.
|
|
210
|
-
hud_python-0.4.
|
|
206
|
+
hud_python-0.4.35.dist-info/METADATA,sha256=bSffhIrX5P4LCM-rTGuZz71gwsIPkoqzFYu-wPcH2SE,20861
|
|
207
|
+
hud_python-0.4.35.dist-info/WHEEL,sha256=qtCwoSJWgHk21S1Kb4ihdzI2rlJ1ZKaIurTj_ngOhyQ,87
|
|
208
|
+
hud_python-0.4.35.dist-info/entry_points.txt,sha256=jJbodNFg1m0-CDofe5AHvB4zKBq7sSdP97-ohaQ3ae4,63
|
|
209
|
+
hud_python-0.4.35.dist-info/licenses/LICENSE,sha256=yIzBheVUf86FC1bztAcr7RYWWNxyd3B-UJQ3uddg1HA,1078
|
|
210
|
+
hud_python-0.4.35.dist-info/RECORD,,
|
|
File without changes
|
|
File without changes
|
|
File without changes
|