hud-python 0.3.5__py3-none-any.whl → 0.4.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of hud-python might be problematic. Click here for more details.
- hud/__init__.py +22 -89
- hud/agents/__init__.py +17 -0
- hud/agents/art.py +101 -0
- hud/agents/base.py +599 -0
- hud/{mcp → agents}/claude.py +373 -321
- hud/{mcp → agents}/langchain.py +250 -250
- hud/agents/misc/__init__.py +7 -0
- hud/{agent → agents}/misc/response_agent.py +80 -80
- hud/{mcp → agents}/openai.py +352 -334
- hud/agents/openai_chat_generic.py +154 -0
- hud/{mcp → agents}/tests/__init__.py +1 -1
- hud/agents/tests/test_base.py +742 -0
- hud/agents/tests/test_claude.py +324 -0
- hud/{mcp → agents}/tests/test_client.py +363 -324
- hud/{mcp → agents}/tests/test_openai.py +237 -238
- hud/cli/__init__.py +617 -0
- hud/cli/__main__.py +8 -0
- hud/cli/analyze.py +371 -0
- hud/cli/analyze_metadata.py +230 -0
- hud/cli/build.py +427 -0
- hud/cli/clone.py +185 -0
- hud/cli/cursor.py +92 -0
- hud/cli/debug.py +392 -0
- hud/cli/docker_utils.py +83 -0
- hud/cli/init.py +281 -0
- hud/cli/interactive.py +353 -0
- hud/cli/mcp_server.py +756 -0
- hud/cli/pull.py +336 -0
- hud/cli/push.py +379 -0
- hud/cli/remote_runner.py +311 -0
- hud/cli/runner.py +160 -0
- hud/cli/tests/__init__.py +3 -0
- hud/cli/tests/test_analyze.py +284 -0
- hud/cli/tests/test_cli_init.py +265 -0
- hud/cli/tests/test_cli_main.py +27 -0
- hud/cli/tests/test_clone.py +142 -0
- hud/cli/tests/test_cursor.py +253 -0
- hud/cli/tests/test_debug.py +453 -0
- hud/cli/tests/test_mcp_server.py +139 -0
- hud/cli/tests/test_utils.py +388 -0
- hud/cli/utils.py +263 -0
- hud/clients/README.md +143 -0
- hud/clients/__init__.py +16 -0
- hud/clients/base.py +354 -0
- hud/clients/fastmcp.py +202 -0
- hud/clients/mcp_use.py +278 -0
- hud/clients/tests/__init__.py +1 -0
- hud/clients/tests/test_client_integration.py +111 -0
- hud/clients/tests/test_fastmcp.py +342 -0
- hud/clients/tests/test_protocol.py +188 -0
- hud/clients/utils/__init__.py +1 -0
- hud/clients/utils/retry_transport.py +160 -0
- hud/datasets.py +322 -192
- hud/misc/__init__.py +1 -0
- hud/{agent → misc}/claude_plays_pokemon.py +292 -283
- hud/otel/__init__.py +35 -0
- hud/otel/collector.py +142 -0
- hud/otel/config.py +164 -0
- hud/otel/context.py +536 -0
- hud/otel/exporters.py +366 -0
- hud/otel/instrumentation.py +97 -0
- hud/otel/processors.py +118 -0
- hud/otel/tests/__init__.py +1 -0
- hud/otel/tests/test_processors.py +197 -0
- hud/server/__init__.py +5 -5
- hud/server/context.py +114 -0
- hud/server/helper/__init__.py +5 -0
- hud/server/low_level.py +132 -0
- hud/server/server.py +166 -0
- hud/server/tests/__init__.py +3 -0
- hud/settings.py +73 -79
- hud/shared/__init__.py +5 -0
- hud/{exceptions.py → shared/exceptions.py} +180 -180
- hud/{server → shared}/requests.py +264 -264
- hud/shared/tests/test_exceptions.py +157 -0
- hud/{server → shared}/tests/test_requests.py +275 -275
- hud/telemetry/__init__.py +25 -30
- hud/telemetry/instrument.py +379 -0
- hud/telemetry/job.py +309 -141
- hud/telemetry/replay.py +74 -0
- hud/telemetry/trace.py +83 -0
- hud/tools/__init__.py +33 -34
- hud/tools/base.py +365 -65
- hud/tools/bash.py +161 -137
- hud/tools/computer/__init__.py +15 -13
- hud/tools/computer/anthropic.py +437 -420
- hud/tools/computer/hud.py +376 -334
- hud/tools/computer/openai.py +295 -292
- hud/tools/computer/settings.py +82 -0
- hud/tools/edit.py +314 -290
- hud/tools/executors/__init__.py +30 -30
- hud/tools/executors/base.py +539 -532
- hud/tools/executors/pyautogui.py +621 -619
- hud/tools/executors/tests/__init__.py +1 -1
- hud/tools/executors/tests/test_base_executor.py +338 -338
- hud/tools/executors/tests/test_pyautogui_executor.py +165 -165
- hud/tools/executors/xdo.py +511 -503
- hud/tools/{playwright_tool.py → playwright.py} +412 -379
- hud/tools/tests/__init__.py +3 -3
- hud/tools/tests/test_base.py +282 -0
- hud/tools/tests/test_bash.py +158 -152
- hud/tools/tests/test_bash_extended.py +197 -0
- hud/tools/tests/test_computer.py +425 -52
- hud/tools/tests/test_computer_actions.py +34 -34
- hud/tools/tests/test_edit.py +259 -240
- hud/tools/tests/test_init.py +27 -27
- hud/tools/tests/test_playwright_tool.py +183 -183
- hud/tools/tests/test_tools.py +145 -157
- hud/tools/tests/test_utils.py +156 -156
- hud/tools/types.py +72 -0
- hud/tools/utils.py +50 -50
- hud/types.py +136 -89
- hud/utils/__init__.py +10 -16
- hud/utils/async_utils.py +65 -0
- hud/utils/design.py +168 -0
- hud/utils/mcp.py +55 -0
- hud/utils/progress.py +149 -149
- hud/utils/telemetry.py +66 -66
- hud/utils/tests/test_async_utils.py +173 -0
- hud/utils/tests/test_init.py +17 -21
- hud/utils/tests/test_progress.py +261 -225
- hud/utils/tests/test_telemetry.py +82 -37
- hud/utils/tests/test_version.py +8 -8
- hud/version.py +7 -7
- hud_python-0.4.0.dist-info/METADATA +474 -0
- hud_python-0.4.0.dist-info/RECORD +132 -0
- hud_python-0.4.0.dist-info/entry_points.txt +3 -0
- {hud_python-0.3.5.dist-info → hud_python-0.4.0.dist-info}/licenses/LICENSE +21 -21
- hud/adapters/__init__.py +0 -8
- hud/adapters/claude/__init__.py +0 -5
- hud/adapters/claude/adapter.py +0 -180
- hud/adapters/claude/tests/__init__.py +0 -1
- hud/adapters/claude/tests/test_adapter.py +0 -519
- hud/adapters/common/__init__.py +0 -6
- hud/adapters/common/adapter.py +0 -178
- hud/adapters/common/tests/test_adapter.py +0 -289
- hud/adapters/common/types.py +0 -446
- hud/adapters/operator/__init__.py +0 -5
- hud/adapters/operator/adapter.py +0 -108
- hud/adapters/operator/tests/__init__.py +0 -1
- hud/adapters/operator/tests/test_adapter.py +0 -370
- hud/agent/__init__.py +0 -19
- hud/agent/base.py +0 -126
- hud/agent/claude.py +0 -271
- hud/agent/langchain.py +0 -215
- hud/agent/misc/__init__.py +0 -3
- hud/agent/operator.py +0 -268
- hud/agent/tests/__init__.py +0 -1
- hud/agent/tests/test_base.py +0 -202
- hud/env/__init__.py +0 -11
- hud/env/client.py +0 -35
- hud/env/docker_client.py +0 -349
- hud/env/environment.py +0 -446
- hud/env/local_docker_client.py +0 -358
- hud/env/remote_client.py +0 -212
- hud/env/remote_docker_client.py +0 -292
- hud/gym.py +0 -130
- hud/job.py +0 -773
- hud/mcp/__init__.py +0 -17
- hud/mcp/base.py +0 -631
- hud/mcp/client.py +0 -312
- hud/mcp/tests/test_base.py +0 -512
- hud/mcp/tests/test_claude.py +0 -294
- hud/task.py +0 -149
- hud/taskset.py +0 -237
- hud/telemetry/_trace.py +0 -347
- hud/telemetry/context.py +0 -230
- hud/telemetry/exporter.py +0 -575
- hud/telemetry/instrumentation/__init__.py +0 -3
- hud/telemetry/instrumentation/mcp.py +0 -259
- hud/telemetry/instrumentation/registry.py +0 -59
- hud/telemetry/mcp_models.py +0 -270
- hud/telemetry/tests/__init__.py +0 -1
- hud/telemetry/tests/test_context.py +0 -210
- hud/telemetry/tests/test_trace.py +0 -312
- hud/tools/helper/README.md +0 -56
- hud/tools/helper/__init__.py +0 -9
- hud/tools/helper/mcp_server.py +0 -78
- hud/tools/helper/server_initialization.py +0 -115
- hud/tools/helper/utils.py +0 -58
- hud/trajectory.py +0 -94
- hud/utils/agent.py +0 -37
- hud/utils/common.py +0 -256
- hud/utils/config.py +0 -120
- hud/utils/deprecation.py +0 -115
- hud/utils/misc.py +0 -53
- hud/utils/tests/test_common.py +0 -277
- hud/utils/tests/test_config.py +0 -129
- hud_python-0.3.5.dist-info/METADATA +0 -284
- hud_python-0.3.5.dist-info/RECORD +0 -120
- /hud/{adapters/common → shared}/tests/__init__.py +0 -0
- {hud_python-0.3.5.dist-info → hud_python-0.4.0.dist-info}/WHEEL +0 -0
hud/__init__.py
CHANGED
|
@@ -1,89 +1,22 @@
|
|
|
1
|
-
"""
|
|
2
|
-
|
|
3
|
-
|
|
4
|
-
|
|
5
|
-
|
|
6
|
-
|
|
7
|
-
|
|
8
|
-
from
|
|
9
|
-
|
|
10
|
-
|
|
11
|
-
|
|
12
|
-
|
|
13
|
-
|
|
14
|
-
|
|
15
|
-
|
|
16
|
-
|
|
17
|
-
|
|
18
|
-
|
|
19
|
-
|
|
20
|
-
|
|
21
|
-
|
|
22
|
-
|
|
23
|
-
"""Emit deprecation warnings for deprecated imports."""
|
|
24
|
-
if name == "Task":
|
|
25
|
-
warnings.warn(
|
|
26
|
-
"Importing Task from hud is deprecated. "
|
|
27
|
-
"Use hud.datasets.TaskConfig instead. "
|
|
28
|
-
"Task will be removed in v0.4.0.",
|
|
29
|
-
DeprecationWarning,
|
|
30
|
-
stacklevel=2,
|
|
31
|
-
)
|
|
32
|
-
return _Task
|
|
33
|
-
elif name == "load_taskset":
|
|
34
|
-
warnings.warn(
|
|
35
|
-
"Importing load_taskset from hud is deprecated. "
|
|
36
|
-
"Use hud-evals HuggingFace datasets instead. "
|
|
37
|
-
"load_taskset will be removed in v0.4.0.",
|
|
38
|
-
DeprecationWarning,
|
|
39
|
-
stacklevel=2,
|
|
40
|
-
)
|
|
41
|
-
return _load_taskset
|
|
42
|
-
raise AttributeError(f"module 'hud' has no attribute '{name}'")
|
|
43
|
-
|
|
44
|
-
|
|
45
|
-
def init_telemetry() -> None:
|
|
46
|
-
from .telemetry import init_telemetry as _init_telemetry
|
|
47
|
-
|
|
48
|
-
_init_telemetry()
|
|
49
|
-
|
|
50
|
-
|
|
51
|
-
if settings.settings.fancy_logging:
|
|
52
|
-
import logging
|
|
53
|
-
import sys
|
|
54
|
-
|
|
55
|
-
hud_logger = logging.getLogger("hud")
|
|
56
|
-
hud_logger.setLevel(logging.INFO)
|
|
57
|
-
|
|
58
|
-
if not hud_logger.handlers:
|
|
59
|
-
# Use the configured stream (defaults to stderr)
|
|
60
|
-
stream = sys.stderr if settings.settings.log_stream.lower() == "stderr" else sys.stdout
|
|
61
|
-
handler = logging.StreamHandler(stream)
|
|
62
|
-
formatter = logging.Formatter("[%(levelname)s] %(asctime)s | %(name)s | %(message)s")
|
|
63
|
-
handler.setFormatter(formatter)
|
|
64
|
-
hud_logger.addHandler(handler)
|
|
65
|
-
hud_logger.propagate = False
|
|
66
|
-
|
|
67
|
-
__all__ = [
|
|
68
|
-
"Response",
|
|
69
|
-
"__version__",
|
|
70
|
-
"agent",
|
|
71
|
-
"create_job",
|
|
72
|
-
"datasets",
|
|
73
|
-
"env",
|
|
74
|
-
"flush",
|
|
75
|
-
"gym",
|
|
76
|
-
"init_telemetry",
|
|
77
|
-
"job",
|
|
78
|
-
"load_job",
|
|
79
|
-
"run_dataset",
|
|
80
|
-
"run_job",
|
|
81
|
-
"settings",
|
|
82
|
-
"task",
|
|
83
|
-
"taskset",
|
|
84
|
-
"to_taskconfigs",
|
|
85
|
-
"trace",
|
|
86
|
-
"trace_open",
|
|
87
|
-
"types",
|
|
88
|
-
"utils",
|
|
89
|
-
]
|
|
1
|
+
"""hud-python.
|
|
2
|
+
|
|
3
|
+
tools for building, evaluating, and training AI agents.
|
|
4
|
+
"""
|
|
5
|
+
|
|
6
|
+
from __future__ import annotations
|
|
7
|
+
|
|
8
|
+
from .telemetry import clear_trace, create_job, get_trace, instrument, job, trace
|
|
9
|
+
|
|
10
|
+
__all__ = [
|
|
11
|
+
"clear_trace",
|
|
12
|
+
"create_job",
|
|
13
|
+
"get_trace",
|
|
14
|
+
"instrument",
|
|
15
|
+
"job",
|
|
16
|
+
"trace",
|
|
17
|
+
]
|
|
18
|
+
|
|
19
|
+
try:
|
|
20
|
+
from .version import __version__
|
|
21
|
+
except ImportError:
|
|
22
|
+
__version__ = "unknown"
|
hud/agents/__init__.py
ADDED
|
@@ -0,0 +1,17 @@
|
|
|
1
|
+
from __future__ import annotations
|
|
2
|
+
|
|
3
|
+
from .art import ArtHUDAgent
|
|
4
|
+
from .base import MCPAgent
|
|
5
|
+
from .claude import ClaudeAgent
|
|
6
|
+
from .langchain import LangChainAgent
|
|
7
|
+
from .openai import OperatorAgent
|
|
8
|
+
from .openai_chat_generic import GenericOpenAIChatAgent
|
|
9
|
+
|
|
10
|
+
__all__ = [
|
|
11
|
+
"ArtHUDAgent",
|
|
12
|
+
"ClaudeAgent",
|
|
13
|
+
"GenericOpenAIChatAgent",
|
|
14
|
+
"LangChainAgent",
|
|
15
|
+
"MCPAgent",
|
|
16
|
+
"OperatorAgent",
|
|
17
|
+
]
|
hud/agents/art.py
ADDED
|
@@ -0,0 +1,101 @@
|
|
|
1
|
+
"""Adapter that plugs a *Trainable* ART model into the HUD MCPAgent stack.
|
|
2
|
+
|
|
3
|
+
This extends GenericOpenAIChatAgent to collect messages_and_choices during
|
|
4
|
+
execution for ART training.
|
|
5
|
+
"""
|
|
6
|
+
|
|
7
|
+
from __future__ import annotations
|
|
8
|
+
|
|
9
|
+
import logging
|
|
10
|
+
from typing import TYPE_CHECKING, Any
|
|
11
|
+
|
|
12
|
+
import hud
|
|
13
|
+
|
|
14
|
+
from .openai_chat_generic import GenericOpenAIChatAgent
|
|
15
|
+
|
|
16
|
+
if TYPE_CHECKING:
|
|
17
|
+
import mcp.types as types
|
|
18
|
+
|
|
19
|
+
from hud.clients import AgentMCPClient
|
|
20
|
+
from hud.types import AgentResponse
|
|
21
|
+
|
|
22
|
+
logger = logging.getLogger(__name__)
|
|
23
|
+
|
|
24
|
+
|
|
25
|
+
system_prompt = (
|
|
26
|
+
"You are an MCP (Model Context Protocol) agent.\n\n"
|
|
27
|
+
"Use MCP tools through the server to complete your task.\n\n"
|
|
28
|
+
"You have a total of {MAX_STEPS} steps."
|
|
29
|
+
)
|
|
30
|
+
|
|
31
|
+
|
|
32
|
+
class ArtHUDAgent(GenericOpenAIChatAgent):
|
|
33
|
+
"""Use an ART *TrainableModel* as the LLM behind a HUD `MCPAgent`.
|
|
34
|
+
|
|
35
|
+
This agent collects messages_and_choices during execution for ART training.
|
|
36
|
+
"""
|
|
37
|
+
|
|
38
|
+
def __init__(self, art_model: Any, mcp_client: AgentMCPClient, **agent_kwargs: Any) -> None:
|
|
39
|
+
# Use ART's openai_client() method to get proper timeouts and patching
|
|
40
|
+
openai_client = art_model.openai_client()
|
|
41
|
+
|
|
42
|
+
super().__init__(
|
|
43
|
+
mcp_client=mcp_client,
|
|
44
|
+
openai_client=openai_client,
|
|
45
|
+
model_name=art_model.get_inference_name(),
|
|
46
|
+
logprobs=True,
|
|
47
|
+
**agent_kwargs,
|
|
48
|
+
)
|
|
49
|
+
self.system_prompt = system_prompt
|
|
50
|
+
|
|
51
|
+
self.art_model = art_model
|
|
52
|
+
self.messages_and_choices: list[Any] = [] # Collect for ART training
|
|
53
|
+
|
|
54
|
+
logger.info(
|
|
55
|
+
"ArtHUDAgent initialised with model '%s' (project=%s)",
|
|
56
|
+
art_model.name,
|
|
57
|
+
getattr(art_model, "project", "unknown"),
|
|
58
|
+
)
|
|
59
|
+
|
|
60
|
+
async def get_system_messages(self) -> list[Any]:
|
|
61
|
+
"""Get system messages for ART."""
|
|
62
|
+
messages = await super().get_system_messages()
|
|
63
|
+
# Store initial messages as dicts for ART
|
|
64
|
+
self.messages_and_choices.extend(messages)
|
|
65
|
+
return messages
|
|
66
|
+
|
|
67
|
+
async def format_blocks(self, blocks: list[types.ContentBlock]) -> list[Any]:
|
|
68
|
+
"""Format blocks for ART."""
|
|
69
|
+
messages = await super().format_blocks(blocks)
|
|
70
|
+
# Store initial messages as dicts for ART
|
|
71
|
+
self.messages_and_choices.extend(messages)
|
|
72
|
+
return messages
|
|
73
|
+
|
|
74
|
+
@hud.instrument(
|
|
75
|
+
span_type="agent",
|
|
76
|
+
record_args=False, # Messages can be large
|
|
77
|
+
record_result=True,
|
|
78
|
+
)
|
|
79
|
+
async def get_response(self, messages: list[Any]) -> AgentResponse:
|
|
80
|
+
"""Get model response and store the Choice for ART."""
|
|
81
|
+
# Call parent's get_model_response
|
|
82
|
+
result = await super().get_response(messages)
|
|
83
|
+
|
|
84
|
+
# Extract and store the Choice from the raw response
|
|
85
|
+
if result.raw and hasattr(result.raw, "choices") and result.raw.choices:
|
|
86
|
+
choice = result.raw.choices[0]
|
|
87
|
+
# Ensure the message has content (required for ART tokenization)
|
|
88
|
+
if choice.message and choice.message.content is None:
|
|
89
|
+
choice.message.content = ""
|
|
90
|
+
self.messages_and_choices.append(choice)
|
|
91
|
+
|
|
92
|
+
return result
|
|
93
|
+
|
|
94
|
+
async def format_tool_results(
|
|
95
|
+
self, tool_calls: list[Any], tool_results: list[Any]
|
|
96
|
+
) -> list[Any]:
|
|
97
|
+
"""Format tool results and store them for ART."""
|
|
98
|
+
tool_messages = await super().format_tool_results(tool_calls, tool_results)
|
|
99
|
+
# Store tool messages for ART
|
|
100
|
+
self.messages_and_choices.extend(tool_messages)
|
|
101
|
+
return tool_messages
|