hud-python 0.3.5__py3-none-any.whl → 0.4.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of hud-python might be problematic. Click here for more details.
- hud/__init__.py +22 -89
- hud/agents/__init__.py +17 -0
- hud/agents/art.py +101 -0
- hud/agents/base.py +599 -0
- hud/{mcp → agents}/claude.py +373 -321
- hud/{mcp → agents}/langchain.py +250 -250
- hud/agents/misc/__init__.py +7 -0
- hud/{agent → agents}/misc/response_agent.py +80 -80
- hud/{mcp → agents}/openai.py +352 -334
- hud/agents/openai_chat_generic.py +154 -0
- hud/{mcp → agents}/tests/__init__.py +1 -1
- hud/agents/tests/test_base.py +742 -0
- hud/agents/tests/test_claude.py +324 -0
- hud/{mcp → agents}/tests/test_client.py +363 -324
- hud/{mcp → agents}/tests/test_openai.py +237 -238
- hud/cli/__init__.py +617 -0
- hud/cli/__main__.py +8 -0
- hud/cli/analyze.py +371 -0
- hud/cli/analyze_metadata.py +230 -0
- hud/cli/build.py +427 -0
- hud/cli/clone.py +185 -0
- hud/cli/cursor.py +92 -0
- hud/cli/debug.py +392 -0
- hud/cli/docker_utils.py +83 -0
- hud/cli/init.py +281 -0
- hud/cli/interactive.py +353 -0
- hud/cli/mcp_server.py +756 -0
- hud/cli/pull.py +336 -0
- hud/cli/push.py +379 -0
- hud/cli/remote_runner.py +311 -0
- hud/cli/runner.py +160 -0
- hud/cli/tests/__init__.py +3 -0
- hud/cli/tests/test_analyze.py +284 -0
- hud/cli/tests/test_cli_init.py +265 -0
- hud/cli/tests/test_cli_main.py +27 -0
- hud/cli/tests/test_clone.py +142 -0
- hud/cli/tests/test_cursor.py +253 -0
- hud/cli/tests/test_debug.py +453 -0
- hud/cli/tests/test_mcp_server.py +139 -0
- hud/cli/tests/test_utils.py +388 -0
- hud/cli/utils.py +263 -0
- hud/clients/README.md +143 -0
- hud/clients/__init__.py +16 -0
- hud/clients/base.py +354 -0
- hud/clients/fastmcp.py +202 -0
- hud/clients/mcp_use.py +278 -0
- hud/clients/tests/__init__.py +1 -0
- hud/clients/tests/test_client_integration.py +111 -0
- hud/clients/tests/test_fastmcp.py +342 -0
- hud/clients/tests/test_protocol.py +188 -0
- hud/clients/utils/__init__.py +1 -0
- hud/clients/utils/retry_transport.py +160 -0
- hud/datasets.py +322 -192
- hud/misc/__init__.py +1 -0
- hud/{agent → misc}/claude_plays_pokemon.py +292 -283
- hud/otel/__init__.py +35 -0
- hud/otel/collector.py +142 -0
- hud/otel/config.py +164 -0
- hud/otel/context.py +536 -0
- hud/otel/exporters.py +366 -0
- hud/otel/instrumentation.py +97 -0
- hud/otel/processors.py +118 -0
- hud/otel/tests/__init__.py +1 -0
- hud/otel/tests/test_processors.py +197 -0
- hud/server/__init__.py +5 -5
- hud/server/context.py +114 -0
- hud/server/helper/__init__.py +5 -0
- hud/server/low_level.py +132 -0
- hud/server/server.py +166 -0
- hud/server/tests/__init__.py +3 -0
- hud/settings.py +73 -79
- hud/shared/__init__.py +5 -0
- hud/{exceptions.py → shared/exceptions.py} +180 -180
- hud/{server → shared}/requests.py +264 -264
- hud/shared/tests/test_exceptions.py +157 -0
- hud/{server → shared}/tests/test_requests.py +275 -275
- hud/telemetry/__init__.py +25 -30
- hud/telemetry/instrument.py +379 -0
- hud/telemetry/job.py +309 -141
- hud/telemetry/replay.py +74 -0
- hud/telemetry/trace.py +83 -0
- hud/tools/__init__.py +33 -34
- hud/tools/base.py +365 -65
- hud/tools/bash.py +161 -137
- hud/tools/computer/__init__.py +15 -13
- hud/tools/computer/anthropic.py +437 -420
- hud/tools/computer/hud.py +376 -334
- hud/tools/computer/openai.py +295 -292
- hud/tools/computer/settings.py +82 -0
- hud/tools/edit.py +314 -290
- hud/tools/executors/__init__.py +30 -30
- hud/tools/executors/base.py +539 -532
- hud/tools/executors/pyautogui.py +621 -619
- hud/tools/executors/tests/__init__.py +1 -1
- hud/tools/executors/tests/test_base_executor.py +338 -338
- hud/tools/executors/tests/test_pyautogui_executor.py +165 -165
- hud/tools/executors/xdo.py +511 -503
- hud/tools/{playwright_tool.py → playwright.py} +412 -379
- hud/tools/tests/__init__.py +3 -3
- hud/tools/tests/test_base.py +282 -0
- hud/tools/tests/test_bash.py +158 -152
- hud/tools/tests/test_bash_extended.py +197 -0
- hud/tools/tests/test_computer.py +425 -52
- hud/tools/tests/test_computer_actions.py +34 -34
- hud/tools/tests/test_edit.py +259 -240
- hud/tools/tests/test_init.py +27 -27
- hud/tools/tests/test_playwright_tool.py +183 -183
- hud/tools/tests/test_tools.py +145 -157
- hud/tools/tests/test_utils.py +156 -156
- hud/tools/types.py +72 -0
- hud/tools/utils.py +50 -50
- hud/types.py +136 -89
- hud/utils/__init__.py +10 -16
- hud/utils/async_utils.py +65 -0
- hud/utils/design.py +168 -0
- hud/utils/mcp.py +55 -0
- hud/utils/progress.py +149 -149
- hud/utils/telemetry.py +66 -66
- hud/utils/tests/test_async_utils.py +173 -0
- hud/utils/tests/test_init.py +17 -21
- hud/utils/tests/test_progress.py +261 -225
- hud/utils/tests/test_telemetry.py +82 -37
- hud/utils/tests/test_version.py +8 -8
- hud/version.py +7 -7
- hud_python-0.4.0.dist-info/METADATA +474 -0
- hud_python-0.4.0.dist-info/RECORD +132 -0
- hud_python-0.4.0.dist-info/entry_points.txt +3 -0
- {hud_python-0.3.5.dist-info → hud_python-0.4.0.dist-info}/licenses/LICENSE +21 -21
- hud/adapters/__init__.py +0 -8
- hud/adapters/claude/__init__.py +0 -5
- hud/adapters/claude/adapter.py +0 -180
- hud/adapters/claude/tests/__init__.py +0 -1
- hud/adapters/claude/tests/test_adapter.py +0 -519
- hud/adapters/common/__init__.py +0 -6
- hud/adapters/common/adapter.py +0 -178
- hud/adapters/common/tests/test_adapter.py +0 -289
- hud/adapters/common/types.py +0 -446
- hud/adapters/operator/__init__.py +0 -5
- hud/adapters/operator/adapter.py +0 -108
- hud/adapters/operator/tests/__init__.py +0 -1
- hud/adapters/operator/tests/test_adapter.py +0 -370
- hud/agent/__init__.py +0 -19
- hud/agent/base.py +0 -126
- hud/agent/claude.py +0 -271
- hud/agent/langchain.py +0 -215
- hud/agent/misc/__init__.py +0 -3
- hud/agent/operator.py +0 -268
- hud/agent/tests/__init__.py +0 -1
- hud/agent/tests/test_base.py +0 -202
- hud/env/__init__.py +0 -11
- hud/env/client.py +0 -35
- hud/env/docker_client.py +0 -349
- hud/env/environment.py +0 -446
- hud/env/local_docker_client.py +0 -358
- hud/env/remote_client.py +0 -212
- hud/env/remote_docker_client.py +0 -292
- hud/gym.py +0 -130
- hud/job.py +0 -773
- hud/mcp/__init__.py +0 -17
- hud/mcp/base.py +0 -631
- hud/mcp/client.py +0 -312
- hud/mcp/tests/test_base.py +0 -512
- hud/mcp/tests/test_claude.py +0 -294
- hud/task.py +0 -149
- hud/taskset.py +0 -237
- hud/telemetry/_trace.py +0 -347
- hud/telemetry/context.py +0 -230
- hud/telemetry/exporter.py +0 -575
- hud/telemetry/instrumentation/__init__.py +0 -3
- hud/telemetry/instrumentation/mcp.py +0 -259
- hud/telemetry/instrumentation/registry.py +0 -59
- hud/telemetry/mcp_models.py +0 -270
- hud/telemetry/tests/__init__.py +0 -1
- hud/telemetry/tests/test_context.py +0 -210
- hud/telemetry/tests/test_trace.py +0 -312
- hud/tools/helper/README.md +0 -56
- hud/tools/helper/__init__.py +0 -9
- hud/tools/helper/mcp_server.py +0 -78
- hud/tools/helper/server_initialization.py +0 -115
- hud/tools/helper/utils.py +0 -58
- hud/trajectory.py +0 -94
- hud/utils/agent.py +0 -37
- hud/utils/common.py +0 -256
- hud/utils/config.py +0 -120
- hud/utils/deprecation.py +0 -115
- hud/utils/misc.py +0 -53
- hud/utils/tests/test_common.py +0 -277
- hud/utils/tests/test_config.py +0 -129
- hud_python-0.3.5.dist-info/METADATA +0 -284
- hud_python-0.3.5.dist-info/RECORD +0 -120
- /hud/{adapters/common → shared}/tests/__init__.py +0 -0
- {hud_python-0.3.5.dist-info → hud_python-0.4.0.dist-info}/WHEEL +0 -0
|
@@ -1,115 +0,0 @@
|
|
|
1
|
-
"""Helper for MCP server initialization with progress notifications.
|
|
2
|
-
|
|
3
|
-
Example:
|
|
4
|
-
```python
|
|
5
|
-
from hud.tools.helper import mcp_intialize_wrapper
|
|
6
|
-
|
|
7
|
-
|
|
8
|
-
@mcp_intialize_wrapper
|
|
9
|
-
async def initialize_environment(session=None, progress_token=None):
|
|
10
|
-
# Send progress if available
|
|
11
|
-
if session and progress_token:
|
|
12
|
-
await session.send_progress_notification(
|
|
13
|
-
progress_token=progress_token, progress=0, total=100, message="Starting services..."
|
|
14
|
-
)
|
|
15
|
-
|
|
16
|
-
# Your initialization code works with or without session
|
|
17
|
-
start_services()
|
|
18
|
-
|
|
19
|
-
|
|
20
|
-
# Create and run server - initialization happens automatically
|
|
21
|
-
mcp = FastMCP("My Server")
|
|
22
|
-
mcp.run()
|
|
23
|
-
```
|
|
24
|
-
"""
|
|
25
|
-
|
|
26
|
-
from __future__ import annotations
|
|
27
|
-
|
|
28
|
-
from typing import TYPE_CHECKING
|
|
29
|
-
|
|
30
|
-
import mcp.types as types
|
|
31
|
-
from mcp.server.session import ServerSession
|
|
32
|
-
|
|
33
|
-
if TYPE_CHECKING:
|
|
34
|
-
from collections.abc import Awaitable, Callable
|
|
35
|
-
|
|
36
|
-
from mcp.shared.session import RequestResponder
|
|
37
|
-
|
|
38
|
-
# Store the original _received_request method
|
|
39
|
-
_original_received_request = ServerSession._received_request
|
|
40
|
-
_init_function: Callable | None = None
|
|
41
|
-
_initialized = False
|
|
42
|
-
|
|
43
|
-
|
|
44
|
-
async def _patched_received_request(
|
|
45
|
-
self: ServerSession, responder: RequestResponder[types.ClientRequest, types.ServerResult]
|
|
46
|
-
) -> types.ServerResult | None:
|
|
47
|
-
"""Intercept initialization to run custom setup with progress notifications."""
|
|
48
|
-
global _initialized, _init_function
|
|
49
|
-
|
|
50
|
-
# Check if this is an initialization request
|
|
51
|
-
if isinstance(responder.request.root, types.InitializeRequest):
|
|
52
|
-
params = responder.request.root.params
|
|
53
|
-
# Extract progress token if present
|
|
54
|
-
progress_token = None
|
|
55
|
-
if hasattr(params, "meta") and params.meta and hasattr(params.meta, "progressToken"):
|
|
56
|
-
progress_token = params.meta.progressToken
|
|
57
|
-
|
|
58
|
-
# Run our initialization function if provided and not already done
|
|
59
|
-
if _init_function and not _initialized:
|
|
60
|
-
try:
|
|
61
|
-
await _init_function(session=self, progress_token=progress_token)
|
|
62
|
-
ServerSession._received_request = _original_received_request
|
|
63
|
-
except Exception as e:
|
|
64
|
-
if progress_token:
|
|
65
|
-
await self.send_progress_notification(
|
|
66
|
-
progress_token=progress_token,
|
|
67
|
-
progress=0,
|
|
68
|
-
total=100,
|
|
69
|
-
message=f"Initialization failed: {e!s}",
|
|
70
|
-
)
|
|
71
|
-
raise
|
|
72
|
-
|
|
73
|
-
# Call the original handler to send the InitializeResult
|
|
74
|
-
result = await _original_received_request(self, responder)
|
|
75
|
-
_initialized = True
|
|
76
|
-
|
|
77
|
-
return result
|
|
78
|
-
|
|
79
|
-
|
|
80
|
-
def mcp_intialize_wrapper(
|
|
81
|
-
init_function: Callable[[ServerSession | None, str | None], Awaitable[None]] | None = None,
|
|
82
|
-
) -> Callable:
|
|
83
|
-
"""Decorator to enable progress notifications during MCP server initialization.
|
|
84
|
-
|
|
85
|
-
Your init function receives optional session and progress_token parameters.
|
|
86
|
-
If provided, use them to send progress updates. If not, the function still works.
|
|
87
|
-
|
|
88
|
-
Usage:
|
|
89
|
-
@mcp_intialize_wrapper
|
|
90
|
-
async def initialize(session=None, progress_token=None):
|
|
91
|
-
if session and progress_token:
|
|
92
|
-
await session.send_progress_notification(...)
|
|
93
|
-
# Your init code here
|
|
94
|
-
|
|
95
|
-
Must be applied before creating FastMCP instance or calling mcp.run().
|
|
96
|
-
"""
|
|
97
|
-
global _init_function
|
|
98
|
-
|
|
99
|
-
def decorator(func: Callable[[ServerSession | None, str | None], Awaitable[None]]) -> Callable:
|
|
100
|
-
global _init_function
|
|
101
|
-
# Store the initialization function
|
|
102
|
-
_init_function = func
|
|
103
|
-
|
|
104
|
-
# Apply the monkey patch if not already applied
|
|
105
|
-
if ServerSession._received_request != _patched_received_request:
|
|
106
|
-
ServerSession._received_request = _patched_received_request # type: ignore[assignment]
|
|
107
|
-
|
|
108
|
-
return func
|
|
109
|
-
|
|
110
|
-
# If called with a function directly
|
|
111
|
-
if init_function is not None:
|
|
112
|
-
return decorator(init_function)
|
|
113
|
-
|
|
114
|
-
# If used as @decorator
|
|
115
|
-
return decorator
|
hud/tools/helper/utils.py
DELETED
|
@@ -1,58 +0,0 @@
|
|
|
1
|
-
from __future__ import annotations
|
|
2
|
-
|
|
3
|
-
import asyncio
|
|
4
|
-
import inspect
|
|
5
|
-
from functools import wraps
|
|
6
|
-
from typing import TYPE_CHECKING, Any
|
|
7
|
-
|
|
8
|
-
if TYPE_CHECKING:
|
|
9
|
-
from collections.abc import Callable
|
|
10
|
-
|
|
11
|
-
from mcp.server.fastmcp import FastMCP
|
|
12
|
-
|
|
13
|
-
|
|
14
|
-
def register_instance_tool(mcp: FastMCP, name: str, instance: Any) -> Callable[..., Any]:
|
|
15
|
-
"""Register ``instance.__call__`` as a FastMCP tool.
|
|
16
|
-
|
|
17
|
-
Parameters
|
|
18
|
-
----------
|
|
19
|
-
mcp:
|
|
20
|
-
A :class:`mcp.server.fastmcp.FastMCP` instance.
|
|
21
|
-
name:
|
|
22
|
-
Public tool name.
|
|
23
|
-
instance:
|
|
24
|
-
Object with an ``async def __call__`` (or sync) implementing the tool.
|
|
25
|
-
"""
|
|
26
|
-
|
|
27
|
-
if inspect.isclass(instance):
|
|
28
|
-
class_name = instance.__name__
|
|
29
|
-
raise TypeError(
|
|
30
|
-
f"register_instance_tool() expects an instance, but got class '{class_name}'. "
|
|
31
|
-
f"Use: register_instance_tool(mcp, '{name}', {class_name}()) "
|
|
32
|
-
f"Not: register_instance_tool(mcp, '{name}', {class_name})"
|
|
33
|
-
)
|
|
34
|
-
|
|
35
|
-
call_fn = instance.__call__
|
|
36
|
-
sig = inspect.signature(call_fn)
|
|
37
|
-
|
|
38
|
-
# Remove *args/**kwargs so Pydantic doesn't treat them as required fields
|
|
39
|
-
from typing import Any as _Any
|
|
40
|
-
|
|
41
|
-
filtered = [
|
|
42
|
-
p.replace(kind=p.POSITIONAL_OR_KEYWORD, annotation=_Any)
|
|
43
|
-
for p in sig.parameters.values()
|
|
44
|
-
if p.kind not in (p.VAR_POSITIONAL, p.VAR_KEYWORD)
|
|
45
|
-
]
|
|
46
|
-
|
|
47
|
-
public_sig = inspect.Signature(parameters=filtered, return_annotation=_Any)
|
|
48
|
-
|
|
49
|
-
@wraps(call_fn)
|
|
50
|
-
async def _wrapper(*args: Any, **kwargs: Any) -> Any: # type: ignore[override]
|
|
51
|
-
result = call_fn(*args, **kwargs)
|
|
52
|
-
if asyncio.iscoroutine(result):
|
|
53
|
-
result = await result
|
|
54
|
-
return result
|
|
55
|
-
|
|
56
|
-
_wrapper.__signature__ = public_sig # type: ignore[attr-defined]
|
|
57
|
-
|
|
58
|
-
return mcp.tool(name=name)(_wrapper)
|
hud/trajectory.py
DELETED
|
@@ -1,94 +0,0 @@
|
|
|
1
|
-
# ruff: noqa: T201
|
|
2
|
-
from __future__ import annotations
|
|
3
|
-
|
|
4
|
-
import datetime
|
|
5
|
-
|
|
6
|
-
from pydantic import BaseModel, Field
|
|
7
|
-
|
|
8
|
-
from .adapters.common.types import LogType
|
|
9
|
-
|
|
10
|
-
|
|
11
|
-
class TrajectoryStep(BaseModel):
|
|
12
|
-
"""Model representing a single task run's trajectory information."""
|
|
13
|
-
|
|
14
|
-
observation_url: str | None = None
|
|
15
|
-
observation_text: str | None = None
|
|
16
|
-
actions: list[dict]
|
|
17
|
-
logs: LogType | None = None
|
|
18
|
-
start_timestamp: str | None = None
|
|
19
|
-
end_timestamp: str | None = None
|
|
20
|
-
|
|
21
|
-
|
|
22
|
-
class Trajectory(BaseModel):
|
|
23
|
-
"""Model representing a single task run's trajectory information."""
|
|
24
|
-
|
|
25
|
-
id: str
|
|
26
|
-
reward: float | None = None
|
|
27
|
-
logs: str | None = None
|
|
28
|
-
error: str | None = None
|
|
29
|
-
trajectory: list[TrajectoryStep] = Field(default_factory=list)
|
|
30
|
-
|
|
31
|
-
def display(self) -> None:
|
|
32
|
-
try:
|
|
33
|
-
from IPython.display import HTML, Markdown, display
|
|
34
|
-
except ImportError:
|
|
35
|
-
raise ImportError("IPython is required for trajectory display") from None
|
|
36
|
-
|
|
37
|
-
trajectory_start_timestamp_str = self.trajectory[0].start_timestamp
|
|
38
|
-
t_start_dt = (
|
|
39
|
-
datetime.datetime.fromisoformat(trajectory_start_timestamp_str.replace("Z", "+00:00"))
|
|
40
|
-
if trajectory_start_timestamp_str
|
|
41
|
-
else None
|
|
42
|
-
)
|
|
43
|
-
for i, step in enumerate(self.trajectory):
|
|
44
|
-
# Use Markdown for better step separation in Jupyter
|
|
45
|
-
display(Markdown(f"### Step {i + 1}"))
|
|
46
|
-
|
|
47
|
-
# Observation Image
|
|
48
|
-
if step.observation_url:
|
|
49
|
-
try:
|
|
50
|
-
# Display in Jupyter/IPython environment using HTML
|
|
51
|
-
display(Markdown("**Observation Image:**"))
|
|
52
|
-
display(HTML(f'<img src="{step.observation_url}" style="max-width:100%;"/>'))
|
|
53
|
-
display(Markdown(f"[Image Link]({step.observation_url})"))
|
|
54
|
-
except Exception as e:
|
|
55
|
-
print(f" [Error processing image: {e}]")
|
|
56
|
-
elif not step.observation_text: # Only print if no image AND no text
|
|
57
|
-
print(" No visual or text observation provided.")
|
|
58
|
-
|
|
59
|
-
# Observation Text
|
|
60
|
-
if step.observation_text:
|
|
61
|
-
print(f" Observation Text: {step.observation_text}")
|
|
62
|
-
|
|
63
|
-
# Actions
|
|
64
|
-
print(f"\n Actions: {step.actions}") # Added newline for spacing
|
|
65
|
-
|
|
66
|
-
# Duration
|
|
67
|
-
duration_str = "N/A"
|
|
68
|
-
step_start_timestamp = self.trajectory[i].start_timestamp
|
|
69
|
-
step_end_timestamp = self.trajectory[i].end_timestamp
|
|
70
|
-
if step_start_timestamp and step_end_timestamp and t_start_dt:
|
|
71
|
-
try:
|
|
72
|
-
# Attempt to parse timestamps (assuming ISO format)
|
|
73
|
-
start_dt = datetime.datetime.fromisoformat(
|
|
74
|
-
step_start_timestamp.replace("Z", "+00:00")
|
|
75
|
-
)
|
|
76
|
-
end_dt = datetime.datetime.fromisoformat(
|
|
77
|
-
step_end_timestamp.replace("Z", "+00:00")
|
|
78
|
-
)
|
|
79
|
-
duration = end_dt - start_dt
|
|
80
|
-
total_seconds = duration.total_seconds()
|
|
81
|
-
minutes = int(total_seconds // 60)
|
|
82
|
-
seconds = total_seconds % 60
|
|
83
|
-
duration_str = f"{minutes}m {seconds:.2f}s"
|
|
84
|
-
|
|
85
|
-
# Calculate the total duration up to this step
|
|
86
|
-
total_duration = end_dt - t_start_dt
|
|
87
|
-
total_minutes = int(total_duration.total_seconds() // 60)
|
|
88
|
-
total_seconds = total_duration.total_seconds() % 60
|
|
89
|
-
total_duration_str = f"{total_minutes}m {total_seconds:.2f}s"
|
|
90
|
-
except ValueError:
|
|
91
|
-
duration_str = "Error parsing timestamps" # Handle potential format issues
|
|
92
|
-
print(f" Step Duration: {duration_str}")
|
|
93
|
-
print(f" Total Duration: {total_duration_str}")
|
|
94
|
-
display(Markdown("---")) # Use Markdown horizontal rule
|
hud/utils/agent.py
DELETED
|
@@ -1,37 +0,0 @@
|
|
|
1
|
-
from __future__ import annotations
|
|
2
|
-
|
|
3
|
-
from typing import TYPE_CHECKING
|
|
4
|
-
|
|
5
|
-
if TYPE_CHECKING:
|
|
6
|
-
from hud.task import Task
|
|
7
|
-
|
|
8
|
-
AGENT_PROMPT = (
|
|
9
|
-
"You are an AI agent whose goal is to accomplish the ultimate task following the instructions."
|
|
10
|
-
)
|
|
11
|
-
|
|
12
|
-
|
|
13
|
-
def format_agent_prompt(environment_prompt: str | None, task: Task | None) -> str:
|
|
14
|
-
"""
|
|
15
|
-
Format the agent prompt with the environment prompt and the task prompt.
|
|
16
|
-
"""
|
|
17
|
-
prompt = AGENT_PROMPT
|
|
18
|
-
|
|
19
|
-
# User-provided system prompt takes precedence over environment prompt
|
|
20
|
-
if task and task.system_prompt:
|
|
21
|
-
prompt += f"\n\n{task.system_prompt}"
|
|
22
|
-
elif environment_prompt:
|
|
23
|
-
prompt += f"\n\n{environment_prompt}"
|
|
24
|
-
|
|
25
|
-
if task:
|
|
26
|
-
if task.sensitive_data:
|
|
27
|
-
prompt += "\n\nHere are placeholders for sensitive data for each domain:"
|
|
28
|
-
for domain, credentials in task.sensitive_data.items():
|
|
29
|
-
prompt += f"\n{domain}: "
|
|
30
|
-
placeholders = [f"{key}" for key in credentials]
|
|
31
|
-
prompt += f"{', '.join(placeholders)}"
|
|
32
|
-
prompt += "\n\nYou can type these placeholders to enter the sensitive data when needed."
|
|
33
|
-
|
|
34
|
-
if task.prompt:
|
|
35
|
-
prompt += f"\n\n{task.prompt}"
|
|
36
|
-
|
|
37
|
-
return prompt
|
hud/utils/common.py
DELETED
|
@@ -1,256 +0,0 @@
|
|
|
1
|
-
from __future__ import annotations
|
|
2
|
-
|
|
3
|
-
import io
|
|
4
|
-
import logging
|
|
5
|
-
import tarfile
|
|
6
|
-
import zipfile
|
|
7
|
-
from typing import TYPE_CHECKING, Any, TypedDict
|
|
8
|
-
|
|
9
|
-
from pathspec import PathSpec
|
|
10
|
-
from pydantic import BaseModel
|
|
11
|
-
|
|
12
|
-
from hud.server.requests import make_request
|
|
13
|
-
from hud.settings import settings
|
|
14
|
-
|
|
15
|
-
if TYPE_CHECKING:
|
|
16
|
-
from collections.abc import Iterator
|
|
17
|
-
from pathlib import Path
|
|
18
|
-
|
|
19
|
-
logger = logging.getLogger("hud.utils.common")
|
|
20
|
-
|
|
21
|
-
|
|
22
|
-
class FunctionConfig(BaseModel):
|
|
23
|
-
function: str # Format: "x.y.z"
|
|
24
|
-
args: list[Any] # Must be json serializable
|
|
25
|
-
|
|
26
|
-
id: str | None = None # Optional id for remote execution
|
|
27
|
-
metadata: dict[str, Any] | None = None # Optional metadata for telemetry
|
|
28
|
-
|
|
29
|
-
def __len__(self) -> int:
|
|
30
|
-
return len(self.args)
|
|
31
|
-
|
|
32
|
-
def __getitem__(self, index: int) -> Any:
|
|
33
|
-
return self.args[index]
|
|
34
|
-
|
|
35
|
-
def __iter__(self) -> Iterator[Any]:
|
|
36
|
-
return iter(self.args)
|
|
37
|
-
|
|
38
|
-
def __str__(self) -> str:
|
|
39
|
-
return f"FC: {self.function}: {', '.join(str(arg) for arg in self.args)} ({self.metadata})"
|
|
40
|
-
|
|
41
|
-
|
|
42
|
-
# Type alias for the shorthand config, which just converts to function name and args
|
|
43
|
-
BasicType = str | int | float | bool | None
|
|
44
|
-
ShorthandConfig = tuple[BasicType | dict[str, Any] | list[BasicType] | list[dict[str, Any]], ...]
|
|
45
|
-
|
|
46
|
-
# Type alias for multiple config formats
|
|
47
|
-
FunctionConfigs = (
|
|
48
|
-
ShorthandConfig
|
|
49
|
-
| FunctionConfig
|
|
50
|
-
| list[FunctionConfig]
|
|
51
|
-
| list[ShorthandConfig]
|
|
52
|
-
| dict[str, Any]
|
|
53
|
-
| str
|
|
54
|
-
)
|
|
55
|
-
|
|
56
|
-
|
|
57
|
-
class Observation(BaseModel):
|
|
58
|
-
"""
|
|
59
|
-
Observation from the environment.
|
|
60
|
-
|
|
61
|
-
Attributes:
|
|
62
|
-
screenshot: Base64 encoded PNG string of the screen
|
|
63
|
-
text: Text observation, if available
|
|
64
|
-
"""
|
|
65
|
-
|
|
66
|
-
screenshot: str | None = None # base64 string png
|
|
67
|
-
text: str | None = None
|
|
68
|
-
|
|
69
|
-
def __str__(self) -> str:
|
|
70
|
-
return f"""Observation(screenshot={
|
|
71
|
-
f"{self.screenshot[:100]}..." if self.screenshot else "None"
|
|
72
|
-
}, text={f"{self.text[:100]}..." if self.text else "None"})"""
|
|
73
|
-
|
|
74
|
-
|
|
75
|
-
class ExecuteResult(TypedDict):
|
|
76
|
-
"""
|
|
77
|
-
Result of an execute command.
|
|
78
|
-
|
|
79
|
-
Attributes:
|
|
80
|
-
stdout: Standard output from the command
|
|
81
|
-
stderr: Standard error from the command
|
|
82
|
-
exit_code: Exit code of the command
|
|
83
|
-
"""
|
|
84
|
-
|
|
85
|
-
stdout: bytes
|
|
86
|
-
stderr: bytes
|
|
87
|
-
exit_code: int
|
|
88
|
-
|
|
89
|
-
|
|
90
|
-
# ---------------------------------------------------------------------------
|
|
91
|
-
# Helper functions for handling ignore patterns
|
|
92
|
-
# ---------------------------------------------------------------------------
|
|
93
|
-
|
|
94
|
-
|
|
95
|
-
def _read_ignore_file(file_path: Path) -> list[str]:
|
|
96
|
-
"""Return patterns from *file_path* (ignoring blanks / comments)."""
|
|
97
|
-
if not file_path.exists():
|
|
98
|
-
return []
|
|
99
|
-
|
|
100
|
-
patterns: list[str] = []
|
|
101
|
-
for line in file_path.read_text().splitlines():
|
|
102
|
-
stripped = line.strip()
|
|
103
|
-
if not stripped or stripped.startswith("#"):
|
|
104
|
-
continue
|
|
105
|
-
patterns.append(stripped)
|
|
106
|
-
return patterns
|
|
107
|
-
|
|
108
|
-
|
|
109
|
-
def _gather_ignore_patterns(root_dir: Path, filename: str) -> list[str]:
|
|
110
|
-
"""Collect *filename* patterns throughout *root_dir* respecting hierarchy.
|
|
111
|
-
|
|
112
|
-
For a nested ignore file located at ``sub/dir/.gitignore`` containing the
|
|
113
|
-
pattern ``foo/``, the returned pattern will be ``sub/dir/foo/`` so that it
|
|
114
|
-
is evaluated relative to *root_dir* when passed to ``PathSpec``.
|
|
115
|
-
"""
|
|
116
|
-
gathered: list[str] = []
|
|
117
|
-
|
|
118
|
-
root_dir = root_dir.resolve()
|
|
119
|
-
|
|
120
|
-
for ignore_file in root_dir.rglob(filename):
|
|
121
|
-
prefix = ignore_file.parent.relative_to(root_dir).as_posix()
|
|
122
|
-
base_prefix = "" if prefix == "." else prefix
|
|
123
|
-
|
|
124
|
-
for pat in _read_ignore_file(ignore_file):
|
|
125
|
-
negate = pat.startswith("!")
|
|
126
|
-
pat_body = pat[1:] if negate else pat
|
|
127
|
-
|
|
128
|
-
# Leading slash means relative to the directory the ignore file is
|
|
129
|
-
# located in - remove it so we can prepend *prefix* below.
|
|
130
|
-
if pat_body.startswith("/"):
|
|
131
|
-
pat_body = pat_body.lstrip("/")
|
|
132
|
-
|
|
133
|
-
full_pattern = f"{base_prefix}/{pat_body}" if base_prefix else pat_body
|
|
134
|
-
if negate:
|
|
135
|
-
full_pattern = f"!{full_pattern}"
|
|
136
|
-
|
|
137
|
-
gathered.append(full_pattern)
|
|
138
|
-
|
|
139
|
-
return gathered
|
|
140
|
-
|
|
141
|
-
|
|
142
|
-
def _compile_pathspec(
|
|
143
|
-
directory: Path,
|
|
144
|
-
*,
|
|
145
|
-
respect_gitignore: bool,
|
|
146
|
-
respect_dockerignore: bool,
|
|
147
|
-
respect_hudignore: bool,
|
|
148
|
-
) -> PathSpec | None:
|
|
149
|
-
"""Compile a ``PathSpec`` from all relevant ignore files under *directory*.
|
|
150
|
-
|
|
151
|
-
In addition to the standard ``.gitignore`` and ``.dockerignore`` files we now
|
|
152
|
-
recognise a project-specific ``.hudignore`` file that shares the same pattern
|
|
153
|
-
syntax. Each file can be toggled independently through the corresponding
|
|
154
|
-
``respect_*`` keyword argument.
|
|
155
|
-
"""
|
|
156
|
-
patterns: list[str] = []
|
|
157
|
-
|
|
158
|
-
if respect_gitignore:
|
|
159
|
-
patterns.extend(_gather_ignore_patterns(directory, ".gitignore"))
|
|
160
|
-
if respect_dockerignore:
|
|
161
|
-
patterns.extend(_gather_ignore_patterns(directory, ".dockerignore"))
|
|
162
|
-
if respect_hudignore:
|
|
163
|
-
patterns.extend(_gather_ignore_patterns(directory, ".hudignore"))
|
|
164
|
-
|
|
165
|
-
if not patterns:
|
|
166
|
-
return None
|
|
167
|
-
|
|
168
|
-
return PathSpec.from_lines("gitwildmatch", patterns)
|
|
169
|
-
|
|
170
|
-
|
|
171
|
-
def _iter_files(
|
|
172
|
-
directory: Path,
|
|
173
|
-
*,
|
|
174
|
-
respect_gitignore: bool,
|
|
175
|
-
respect_dockerignore: bool,
|
|
176
|
-
respect_hudignore: bool,
|
|
177
|
-
) -> Iterator[tuple[Path, Path]]:
|
|
178
|
-
"""Yield ``(file_path, relative_path)`` while respecting ignore files."""
|
|
179
|
-
spec = _compile_pathspec(
|
|
180
|
-
directory,
|
|
181
|
-
respect_gitignore=respect_gitignore,
|
|
182
|
-
respect_dockerignore=respect_dockerignore,
|
|
183
|
-
respect_hudignore=respect_hudignore,
|
|
184
|
-
)
|
|
185
|
-
|
|
186
|
-
for file_path in directory.rglob("*"):
|
|
187
|
-
if not file_path.is_file():
|
|
188
|
-
continue
|
|
189
|
-
rel_path = file_path.relative_to(directory)
|
|
190
|
-
rel_str = rel_path.as_posix()
|
|
191
|
-
if spec and spec.match_file(rel_str):
|
|
192
|
-
continue
|
|
193
|
-
yield file_path, rel_path
|
|
194
|
-
|
|
195
|
-
|
|
196
|
-
def directory_to_tar_bytes(
|
|
197
|
-
directory_path: Path,
|
|
198
|
-
*,
|
|
199
|
-
respect_gitignore: bool = False,
|
|
200
|
-
respect_dockerignore: bool = False,
|
|
201
|
-
respect_hudignore: bool = True,
|
|
202
|
-
) -> bytes:
|
|
203
|
-
"""
|
|
204
|
-
Converts a directory to a tar archive and returns it as bytes.
|
|
205
|
-
|
|
206
|
-
By default the archive respects ignore rules defined in ``.gitignore``,
|
|
207
|
-
``.dockerignore`` and ``.hudignore`` (each can be disabled via kwargs).
|
|
208
|
-
"""
|
|
209
|
-
output = io.BytesIO()
|
|
210
|
-
|
|
211
|
-
with tarfile.open(fileobj=output, mode="w") as tar:
|
|
212
|
-
for file_path, rel_path in _iter_files(
|
|
213
|
-
directory_path,
|
|
214
|
-
respect_gitignore=respect_gitignore,
|
|
215
|
-
respect_dockerignore=respect_dockerignore,
|
|
216
|
-
respect_hudignore=respect_hudignore,
|
|
217
|
-
):
|
|
218
|
-
logger.debug("Adding %s to tar archive", rel_path)
|
|
219
|
-
tar.add(file_path, arcname=str(rel_path))
|
|
220
|
-
|
|
221
|
-
output.seek(0)
|
|
222
|
-
return output.getvalue()
|
|
223
|
-
|
|
224
|
-
|
|
225
|
-
def directory_to_zip_bytes(
|
|
226
|
-
context_dir: Path,
|
|
227
|
-
*,
|
|
228
|
-
respect_gitignore: bool = False,
|
|
229
|
-
respect_dockerignore: bool = False,
|
|
230
|
-
respect_hudignore: bool = True,
|
|
231
|
-
) -> bytes:
|
|
232
|
-
"""Zip *context_dir* and return the zip archive as bytes, respecting ignore rules."""
|
|
233
|
-
output = io.BytesIO()
|
|
234
|
-
with zipfile.ZipFile(output, "w", zipfile.ZIP_DEFLATED) as zipf:
|
|
235
|
-
for file_path, rel_path in _iter_files(
|
|
236
|
-
context_dir,
|
|
237
|
-
respect_gitignore=respect_gitignore,
|
|
238
|
-
respect_dockerignore=respect_dockerignore,
|
|
239
|
-
respect_hudignore=respect_hudignore,
|
|
240
|
-
):
|
|
241
|
-
logger.debug("Adding %s to zip archive", rel_path)
|
|
242
|
-
zipf.write(str(file_path), arcname=str(rel_path))
|
|
243
|
-
return output.getvalue()
|
|
244
|
-
|
|
245
|
-
|
|
246
|
-
async def get_gym_id(gym_name_or_id: str) -> str:
|
|
247
|
-
"""
|
|
248
|
-
Get the gym ID for a given gym name or ID.
|
|
249
|
-
"""
|
|
250
|
-
data = await make_request(
|
|
251
|
-
method="GET",
|
|
252
|
-
url=f"{settings.base_url}/v1/gyms/{gym_name_or_id}",
|
|
253
|
-
api_key=settings.api_key,
|
|
254
|
-
)
|
|
255
|
-
|
|
256
|
-
return data["id"]
|
hud/utils/config.py
DELETED
|
@@ -1,120 +0,0 @@
|
|
|
1
|
-
from __future__ import annotations
|
|
2
|
-
|
|
3
|
-
import logging
|
|
4
|
-
import re
|
|
5
|
-
from typing import TYPE_CHECKING
|
|
6
|
-
|
|
7
|
-
from hud.utils.common import FunctionConfig, FunctionConfigs
|
|
8
|
-
|
|
9
|
-
if TYPE_CHECKING:
|
|
10
|
-
from typing import TypeGuard
|
|
11
|
-
|
|
12
|
-
logger = logging.getLogger("hud.utils.config")
|
|
13
|
-
|
|
14
|
-
REMOTE_FUNCTION_PREFIX = "private_"
|
|
15
|
-
REMOTE_SETUP = "setup"
|
|
16
|
-
REMOTE_EVALUATE = "evaluate"
|
|
17
|
-
|
|
18
|
-
LOCAL_EVALUATORS = ["response_is", "response_includes", "response_match"]
|
|
19
|
-
|
|
20
|
-
|
|
21
|
-
def _is_valid_python_name(name: str) -> bool:
|
|
22
|
-
"""Check if a string is a valid Python identifier."""
|
|
23
|
-
return bool(re.match(r"^[a-zA-Z_][a-zA-Z0-9_]*$", name))
|
|
24
|
-
|
|
25
|
-
|
|
26
|
-
def _validate_hud_config(config: dict) -> FunctionConfig:
|
|
27
|
-
"""Validate and convert a dictionary to an FunctionConfig."""
|
|
28
|
-
if not isinstance(config.get("function"), str):
|
|
29
|
-
raise ValueError("function must be a string")
|
|
30
|
-
|
|
31
|
-
# Validate function path components
|
|
32
|
-
_split_and_validate_path(config["function"])
|
|
33
|
-
|
|
34
|
-
args = config["args"] if isinstance(config.get("args"), list) else [config["args"]]
|
|
35
|
-
|
|
36
|
-
# Create a proper FunctionConfig object instead of using cast
|
|
37
|
-
return FunctionConfig(function=config["function"], args=args, id=config.get("id"))
|
|
38
|
-
|
|
39
|
-
|
|
40
|
-
def _split_and_validate_path(path: str) -> None:
|
|
41
|
-
"""Split a function path into components, validating each part."""
|
|
42
|
-
parts = path.split(".")
|
|
43
|
-
|
|
44
|
-
if not parts:
|
|
45
|
-
raise ValueError("Empty function path")
|
|
46
|
-
|
|
47
|
-
# Validate each part
|
|
48
|
-
for part in parts:
|
|
49
|
-
if not _is_valid_python_name(part):
|
|
50
|
-
raise ValueError(f"Invalid Python identifier in path: {part}")
|
|
51
|
-
|
|
52
|
-
|
|
53
|
-
def _is_list_of_configs(config: FunctionConfigs) -> TypeGuard[list[FunctionConfig]]:
|
|
54
|
-
"""Check if a config is a list of FunctionConfig objects."""
|
|
55
|
-
return isinstance(config, list) and all(isinstance(item, FunctionConfig) for item in config)
|
|
56
|
-
|
|
57
|
-
|
|
58
|
-
def expand_config(config: FunctionConfigs) -> list[FunctionConfig]:
|
|
59
|
-
"""
|
|
60
|
-
Process a config into a standardized list of FunctionConfig objects.
|
|
61
|
-
|
|
62
|
-
Args:
|
|
63
|
-
config: Can be:
|
|
64
|
-
- A tuple where first element is function name and rest are args
|
|
65
|
-
- A FunctionConfig object
|
|
66
|
-
- A dictionary with "function" and "args" keys
|
|
67
|
-
- A list of FunctionConfig objects
|
|
68
|
-
|
|
69
|
-
Returns:
|
|
70
|
-
list[FunctionConfig]: List of standardized configurations
|
|
71
|
-
|
|
72
|
-
Raises:
|
|
73
|
-
ValueError: If the configuration format is invalid
|
|
74
|
-
"""
|
|
75
|
-
logger.debug("Processing config: %s", config)
|
|
76
|
-
|
|
77
|
-
# If it's already a FunctionConfig, just wrap it in a list
|
|
78
|
-
if isinstance(config, FunctionConfig):
|
|
79
|
-
return [config]
|
|
80
|
-
|
|
81
|
-
# If it's a list of FunctionConfigs, return as is
|
|
82
|
-
if _is_list_of_configs(config):
|
|
83
|
-
return config
|
|
84
|
-
|
|
85
|
-
# Handle dictionary configuration
|
|
86
|
-
if isinstance(config, dict):
|
|
87
|
-
return [_validate_hud_config(config)]
|
|
88
|
-
|
|
89
|
-
if isinstance(config, str):
|
|
90
|
-
return [FunctionConfig(function=config, args=[])]
|
|
91
|
-
|
|
92
|
-
# Handle tuple format
|
|
93
|
-
if isinstance(config, tuple):
|
|
94
|
-
if len(config) < 1 or not isinstance(config[0], str):
|
|
95
|
-
error_msg = "Invalid tuple configuration. "
|
|
96
|
-
"Expected tuple[str, ...], got: {type(config)}"
|
|
97
|
-
logger.error(error_msg)
|
|
98
|
-
raise ValueError(error_msg)
|
|
99
|
-
|
|
100
|
-
# First element is the function name, rest are args
|
|
101
|
-
function_name = config[0]
|
|
102
|
-
args = list(config[1:]) if len(config) > 1 else []
|
|
103
|
-
|
|
104
|
-
return [FunctionConfig(function=function_name, args=args)]
|
|
105
|
-
|
|
106
|
-
if isinstance(config, list):
|
|
107
|
-
result = []
|
|
108
|
-
for item in config:
|
|
109
|
-
if isinstance(item, tuple) and len(item) >= 1 and isinstance(item[0], str):
|
|
110
|
-
function_name = item[0]
|
|
111
|
-
args = list(item[1:]) if len(item) > 1 else []
|
|
112
|
-
result.append(FunctionConfig(function=function_name, args=args))
|
|
113
|
-
else:
|
|
114
|
-
raise ValueError(f"Invalid list item configuration: {item}")
|
|
115
|
-
return result
|
|
116
|
-
|
|
117
|
-
# Unknown configuration type
|
|
118
|
-
error_msg = f"Unknown configuration type: {type(config)}"
|
|
119
|
-
logger.error(error_msg)
|
|
120
|
-
raise ValueError(error_msg)
|