hud-python 0.4.47__py3-none-any.whl → 0.4.49__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of hud-python might be problematic. Click here for more details.
- hud/agents/base.py +55 -142
- hud/agents/claude.py +5 -6
- hud/agents/grounded_openai.py +1 -1
- hud/agents/misc/integration_test_agent.py +2 -0
- hud/agents/tests/test_base.py +2 -5
- hud/cli/__init__.py +80 -215
- hud/cli/build.py +105 -45
- hud/cli/dev.py +614 -743
- hud/cli/eval.py +14 -9
- hud/cli/flows/tasks.py +100 -21
- hud/cli/init.py +18 -14
- hud/cli/push.py +27 -9
- hud/cli/rl/local_runner.py +28 -16
- hud/cli/rl/vllm.py +2 -0
- hud/cli/tests/test_analyze_metadata.py +3 -2
- hud/cli/tests/test_eval.py +574 -0
- hud/cli/tests/test_mcp_server.py +6 -95
- hud/cli/tests/test_utils.py +1 -1
- hud/cli/utils/env_check.py +9 -9
- hud/cli/utils/source_hash.py +1 -1
- hud/datasets/parallel.py +0 -12
- hud/datasets/runner.py +1 -4
- hud/rl/actor.py +4 -2
- hud/rl/distributed.py +1 -1
- hud/rl/learner.py +2 -1
- hud/rl/train.py +1 -1
- hud/server/__init__.py +2 -1
- hud/server/router.py +160 -0
- hud/server/server.py +246 -79
- hud/telemetry/trace.py +1 -1
- hud/tools/base.py +20 -10
- hud/tools/computer/__init__.py +2 -0
- hud/tools/computer/qwen.py +431 -0
- hud/tools/computer/settings.py +16 -0
- hud/tools/executors/pyautogui.py +1 -1
- hud/tools/playwright.py +1 -1
- hud/types.py +2 -3
- hud/utils/hud_console.py +43 -0
- hud/utils/tests/test_version.py +1 -1
- hud/version.py +1 -1
- {hud_python-0.4.47.dist-info → hud_python-0.4.49.dist-info}/METADATA +1 -1
- {hud_python-0.4.47.dist-info → hud_python-0.4.49.dist-info}/RECORD +45 -42
- {hud_python-0.4.47.dist-info → hud_python-0.4.49.dist-info}/WHEEL +0 -0
- {hud_python-0.4.47.dist-info → hud_python-0.4.49.dist-info}/entry_points.txt +0 -0
- {hud_python-0.4.47.dist-info → hud_python-0.4.49.dist-info}/licenses/LICENSE +0 -0
hud/cli/tests/test_utils.py
CHANGED
|
@@ -22,7 +22,7 @@ class TestColors:
|
|
|
22
22
|
assert Colors.YELLOW == "\033[93m"
|
|
23
23
|
assert Colors.GOLD == "\033[33m"
|
|
24
24
|
assert Colors.RED == "\033[91m"
|
|
25
|
-
assert Colors.GRAY == "\033[
|
|
25
|
+
assert Colors.GRAY == "\033[37m"
|
|
26
26
|
assert Colors.ENDC == "\033[0m"
|
|
27
27
|
assert Colors.BOLD == "\033[1m"
|
|
28
28
|
|
hud/cli/utils/env_check.py
CHANGED
|
@@ -175,16 +175,16 @@ def ensure_built(env_dir: Path, *, interactive: bool = True) -> dict[str, Any]:
|
|
|
175
175
|
_print_section("Added files", diffs.get("added", []))
|
|
176
176
|
_print_section("Removed files", diffs.get("removed", []))
|
|
177
177
|
|
|
178
|
-
if interactive:
|
|
179
|
-
|
|
180
|
-
|
|
181
|
-
|
|
182
|
-
|
|
183
|
-
|
|
184
|
-
else:
|
|
185
|
-
hud_console.hint("Continuing without rebuild; this may use an outdated image.")
|
|
178
|
+
# if interactive:
|
|
179
|
+
if hud_console.confirm("Rebuild now (runs 'hud build')?", default=True):
|
|
180
|
+
require_docker_running()
|
|
181
|
+
build_environment(str(env_dir), platform="linux/amd64")
|
|
182
|
+
with open(lock_path) as f:
|
|
183
|
+
lock_data = yaml.safe_load(f) or {}
|
|
186
184
|
else:
|
|
187
|
-
hud_console.hint("
|
|
185
|
+
hud_console.hint("Continuing without rebuild; this may use an outdated image.")
|
|
186
|
+
# else:
|
|
187
|
+
# hud_console.hint("Run 'hud build' to update the image before proceeding.")
|
|
188
188
|
elif not stored_hash:
|
|
189
189
|
hud_console.dim_info(
|
|
190
190
|
"Info",
|
hud/cli/utils/source_hash.py
CHANGED
hud/datasets/parallel.py
CHANGED
|
@@ -261,7 +261,6 @@ async def run_dataset_parallel_manual(
|
|
|
261
261
|
max_steps: int = 10,
|
|
262
262
|
split: str = "train",
|
|
263
263
|
auto_respond: bool = False,
|
|
264
|
-
custom_system_prompt: str | None = None,
|
|
265
264
|
) -> list[Any]:
|
|
266
265
|
"""
|
|
267
266
|
Run all tasks in a dataset using process-based parallelism with manual configuration.
|
|
@@ -282,7 +281,6 @@ async def run_dataset_parallel_manual(
|
|
|
282
281
|
max_steps: Maximum steps per task
|
|
283
282
|
split: Dataset split when loading from string
|
|
284
283
|
auto_respond: Whether to use ResponseAgent
|
|
285
|
-
custom_system_prompt: Override system prompt for all tasks
|
|
286
284
|
|
|
287
285
|
Returns:
|
|
288
286
|
List of results in the same order as the input dataset
|
|
@@ -349,14 +347,6 @@ async def run_dataset_parallel_manual(
|
|
|
349
347
|
else:
|
|
350
348
|
raise ValueError(f"Dataset must be string, Dataset, or list, got {type(dataset)}")
|
|
351
349
|
|
|
352
|
-
# Apply custom system prompt if provided
|
|
353
|
-
if custom_system_prompt:
|
|
354
|
-
for task_dict in task_dicts:
|
|
355
|
-
if "system_prompt" not in task_dict:
|
|
356
|
-
task_dict["system_prompt"] = custom_system_prompt
|
|
357
|
-
else:
|
|
358
|
-
task_dict["system_prompt"] += "\n" + custom_system_prompt
|
|
359
|
-
|
|
360
350
|
# Prepare job metadata
|
|
361
351
|
job_metadata = metadata or {}
|
|
362
352
|
job_metadata.update(
|
|
@@ -380,8 +370,6 @@ async def run_dataset_parallel_manual(
|
|
|
380
370
|
except Exception:
|
|
381
371
|
logger.warning("Failed to extract dataset verification info")
|
|
382
372
|
|
|
383
|
-
# task_dicts = task_dicts[:10]
|
|
384
|
-
|
|
385
373
|
# Create job context
|
|
386
374
|
with hud.job(name, metadata=job_metadata, dataset_link=dataset_link) as job_obj:
|
|
387
375
|
# Prepare agent class info for pickling
|
hud/datasets/runner.py
CHANGED
|
@@ -27,7 +27,6 @@ async def run_dataset(
|
|
|
27
27
|
max_steps: int = 10,
|
|
28
28
|
split: str = "train",
|
|
29
29
|
auto_respond: bool = False,
|
|
30
|
-
custom_system_prompt: str | None = None,
|
|
31
30
|
) -> list[Any]:
|
|
32
31
|
"""
|
|
33
32
|
Run all tasks in a dataset with automatic job tracking.
|
|
@@ -43,7 +42,6 @@ async def run_dataset(
|
|
|
43
42
|
max_steps: Maximum steps per task
|
|
44
43
|
split: Dataset split to use when loading from string (default: "train")
|
|
45
44
|
auto_respond: Whether to use auto-response agent
|
|
46
|
-
custom_system_prompt: Override system prompt for all tasks
|
|
47
45
|
|
|
48
46
|
Returns:
|
|
49
47
|
List of results from agent.run() in dataset order
|
|
@@ -102,8 +100,7 @@ async def run_dataset(
|
|
|
102
100
|
async with sem:
|
|
103
101
|
# Create trace for this task
|
|
104
102
|
task_name = task_dict.get("prompt") or f"Task {index}"
|
|
105
|
-
|
|
106
|
-
task_dict["system_prompt"] = custom_system_prompt
|
|
103
|
+
|
|
107
104
|
# Ensure task_id is a string for baggage propagation
|
|
108
105
|
raw_task_id = task_dict.get("id")
|
|
109
106
|
safe_task_id = str(raw_task_id) if raw_task_id is not None else None
|
hud/rl/actor.py
CHANGED
|
@@ -37,7 +37,7 @@ class Actor:
|
|
|
37
37
|
# Match connection limits to parallel_episodes to avoid bottlenecks
|
|
38
38
|
# Use shorter per-request timeout and keep retries modest to avoid long blocking
|
|
39
39
|
http_client = create_retry_httpx_client(
|
|
40
|
-
timeout=httpx.Timeout(
|
|
40
|
+
timeout=httpx.Timeout(60.0),
|
|
41
41
|
)
|
|
42
42
|
return AsyncOpenAI(
|
|
43
43
|
base_url=base_url,
|
|
@@ -151,7 +151,9 @@ if __name__ == "__main__":
|
|
|
151
151
|
"name": "evaluate",
|
|
152
152
|
"arguments": {"name": "game_2048_max_number", "arguments": {"target": 128}},
|
|
153
153
|
},
|
|
154
|
-
"
|
|
154
|
+
"agent_config": {
|
|
155
|
+
"system_prompt": "You are an expert 2048 game player. Use arrow keys to reach the target tile. First take a screenshot, then make strategic moves.", # noqa: E501
|
|
156
|
+
},
|
|
155
157
|
}
|
|
156
158
|
|
|
157
159
|
task = Task(**task_data)
|
hud/rl/distributed.py
CHANGED
|
@@ -81,7 +81,7 @@ def broadcast_object(obj: Any, src: int = 0) -> Any:
|
|
|
81
81
|
return obj
|
|
82
82
|
|
|
83
83
|
obj_list = [obj] if dist.get_rank() == src else [None]
|
|
84
|
-
dist.broadcast_object_list(obj_list, src=src)
|
|
84
|
+
dist.broadcast_object_list(obj_list, src=src)
|
|
85
85
|
return obj_list[0]
|
|
86
86
|
|
|
87
87
|
|
hud/rl/learner.py
CHANGED
|
@@ -148,11 +148,12 @@ class GRPOLearner:
|
|
|
148
148
|
|
|
149
149
|
# Add LoRA adapters or load existing adapter
|
|
150
150
|
policy.config.use_cache = False
|
|
151
|
-
|
|
151
|
+
|
|
152
152
|
if model_cfg.adapter_path:
|
|
153
153
|
# Load existing adapter as baseline
|
|
154
154
|
self.log(f"Loading existing LoRA adapter from: {model_cfg.adapter_path}")
|
|
155
155
|
from peft import PeftModel
|
|
156
|
+
|
|
156
157
|
policy = PeftModel.from_pretrained(policy, model_cfg.adapter_path)
|
|
157
158
|
# Enable adapter training
|
|
158
159
|
policy.train()
|
hud/rl/train.py
CHANGED
|
@@ -95,7 +95,7 @@ async def train(config: Config, tasks: list[Task]) -> None:
|
|
|
95
95
|
if is_main_process()
|
|
96
96
|
else None
|
|
97
97
|
)
|
|
98
|
-
|
|
98
|
+
|
|
99
99
|
# Load initial adapter if provided
|
|
100
100
|
if is_main_process() and config.model.adapter_path and vllm:
|
|
101
101
|
hud_console.info(f"Loading baseline adapter from: {config.model.adapter_path}")
|
hud/server/__init__.py
CHANGED
hud/server/router.py
ADDED
|
@@ -0,0 +1,160 @@
|
|
|
1
|
+
"""MCP Router utilities for FastAPI-like composition patterns."""
|
|
2
|
+
|
|
3
|
+
from __future__ import annotations
|
|
4
|
+
|
|
5
|
+
import logging
|
|
6
|
+
from typing import TYPE_CHECKING, Any
|
|
7
|
+
|
|
8
|
+
from hud.server import MCPServer
|
|
9
|
+
|
|
10
|
+
if TYPE_CHECKING:
|
|
11
|
+
from collections.abc import Callable
|
|
12
|
+
|
|
13
|
+
from fastmcp import FastMCP
|
|
14
|
+
from fastmcp.tools import Tool
|
|
15
|
+
|
|
16
|
+
logger = logging.getLogger(__name__)
|
|
17
|
+
|
|
18
|
+
# MCPRouter is just an alias to FastMCP for FastAPI-like patterns
|
|
19
|
+
MCPRouter = MCPServer
|
|
20
|
+
|
|
21
|
+
# Prefix for internal tool names
|
|
22
|
+
_INTERNAL_PREFIX = "int_"
|
|
23
|
+
|
|
24
|
+
|
|
25
|
+
class HiddenRouter(MCPRouter):
|
|
26
|
+
"""Wraps a FastMCP router to provide a single dispatcher tool for its sub-tools.
|
|
27
|
+
|
|
28
|
+
Instead of exposing all tools at the top level, this creates a single tool
|
|
29
|
+
(named after the router) that dispatches to the router's tools internally.
|
|
30
|
+
|
|
31
|
+
Useful for setup/evaluate patterns where you want:
|
|
32
|
+
- A single 'setup' tool that can call setup_basic(), setup_advanced(), etc.
|
|
33
|
+
- A single 'evaluate' tool that can call evaluate_score(), evaluate_complete(), etc.
|
|
34
|
+
|
|
35
|
+
Example:
|
|
36
|
+
# Create a router with multiple setup functions
|
|
37
|
+
setup_router = MCPRouter(name="setup")
|
|
38
|
+
|
|
39
|
+
@setup_router.tool
|
|
40
|
+
async def reset():
|
|
41
|
+
return "Environment reset"
|
|
42
|
+
|
|
43
|
+
@setup_router.tool
|
|
44
|
+
async def seed_data():
|
|
45
|
+
return "Data seeded"
|
|
46
|
+
|
|
47
|
+
# Wrap in HiddenRouter
|
|
48
|
+
hidden_setup = HiddenRouter(setup_router)
|
|
49
|
+
|
|
50
|
+
# Now you have one 'setup' tool that dispatches to reset/seed_data
|
|
51
|
+
mcp.include_router(hidden_setup)
|
|
52
|
+
"""
|
|
53
|
+
|
|
54
|
+
def __init__(
|
|
55
|
+
self,
|
|
56
|
+
router: FastMCP,
|
|
57
|
+
*,
|
|
58
|
+
title: str | None = None,
|
|
59
|
+
description: str | None = None,
|
|
60
|
+
meta: dict[str, Any] | None = None,
|
|
61
|
+
) -> None:
|
|
62
|
+
"""Wrap an existing router with a dispatcher pattern.
|
|
63
|
+
|
|
64
|
+
Args:
|
|
65
|
+
router: The FastMCP router to wrap
|
|
66
|
+
title: Optional title for the dispatcher tool (defaults to "{name} Dispatcher")
|
|
67
|
+
description: Optional description for the dispatcher tool
|
|
68
|
+
meta: Optional metadata for the dispatcher tool
|
|
69
|
+
"""
|
|
70
|
+
name = router.name or "router"
|
|
71
|
+
|
|
72
|
+
# Naming scheme for hidden/internal tools
|
|
73
|
+
self._prefix_fn: Callable[[str], str] = lambda n: f"{_INTERNAL_PREFIX}{n}"
|
|
74
|
+
|
|
75
|
+
super().__init__(name=name)
|
|
76
|
+
|
|
77
|
+
# Set up dispatcher tool
|
|
78
|
+
dispatcher_title = title or f"{name.title()} Dispatcher"
|
|
79
|
+
dispatcher_desc = description or f"Call internal '{name}' functions"
|
|
80
|
+
|
|
81
|
+
# Register dispatcher that routes to hidden tools
|
|
82
|
+
async def _dispatch(
|
|
83
|
+
name: str,
|
|
84
|
+
arguments: dict | str | None = None,
|
|
85
|
+
ctx: Any | None = None,
|
|
86
|
+
) -> Any:
|
|
87
|
+
"""Gateway to hidden tools.
|
|
88
|
+
|
|
89
|
+
Args:
|
|
90
|
+
name: Internal function name (without prefix)
|
|
91
|
+
arguments: Arguments to forward to the internal tool (dict or JSON string)
|
|
92
|
+
ctx: Request context injected by FastMCP
|
|
93
|
+
"""
|
|
94
|
+
# Handle JSON string inputs
|
|
95
|
+
if isinstance(arguments, str):
|
|
96
|
+
import json
|
|
97
|
+
|
|
98
|
+
try:
|
|
99
|
+
arguments = json.loads(arguments)
|
|
100
|
+
except json.JSONDecodeError:
|
|
101
|
+
arguments = {}
|
|
102
|
+
|
|
103
|
+
# Call the internal tool
|
|
104
|
+
return await self._tool_manager.call_tool(self._prefix_fn(name), arguments or {}) # type: ignore
|
|
105
|
+
|
|
106
|
+
from fastmcp.tools.tool import FunctionTool
|
|
107
|
+
|
|
108
|
+
dispatcher_tool = FunctionTool.from_function(
|
|
109
|
+
_dispatch,
|
|
110
|
+
name=name,
|
|
111
|
+
title=dispatcher_title,
|
|
112
|
+
description=dispatcher_desc,
|
|
113
|
+
tags=set(),
|
|
114
|
+
meta=meta,
|
|
115
|
+
)
|
|
116
|
+
self._tool_manager.add_tool(dispatcher_tool)
|
|
117
|
+
|
|
118
|
+
# Copy all tools from source router as hidden tools
|
|
119
|
+
for tool in router._tool_manager._tools.values():
|
|
120
|
+
tool._key = self._prefix_fn(tool.name)
|
|
121
|
+
self._tool_manager.add_tool(tool)
|
|
122
|
+
|
|
123
|
+
# Expose list of available functions via resource
|
|
124
|
+
async def _functions_catalogue() -> list[str]:
|
|
125
|
+
"""List all internal function names without prefix."""
|
|
126
|
+
return [
|
|
127
|
+
key.removeprefix(_INTERNAL_PREFIX)
|
|
128
|
+
for key in self._tool_manager._tools
|
|
129
|
+
if key.startswith(_INTERNAL_PREFIX)
|
|
130
|
+
]
|
|
131
|
+
|
|
132
|
+
from fastmcp.resources import Resource
|
|
133
|
+
|
|
134
|
+
catalogue_resource = Resource.from_function(
|
|
135
|
+
_functions_catalogue,
|
|
136
|
+
uri=f"{name}://functions",
|
|
137
|
+
name=f"{name.title()} Functions",
|
|
138
|
+
description=f"List of available {name} functions",
|
|
139
|
+
)
|
|
140
|
+
self._resource_manager.add_resource(catalogue_resource)
|
|
141
|
+
|
|
142
|
+
# Override _list_tools to hide internal tools when mounted
|
|
143
|
+
async def _list_tools(self) -> list[Tool]:
|
|
144
|
+
"""Override _list_tools to hide internal tools when mounted."""
|
|
145
|
+
return [
|
|
146
|
+
tool
|
|
147
|
+
for key, tool in self._tool_manager._tools.items()
|
|
148
|
+
if not key.startswith(_INTERNAL_PREFIX)
|
|
149
|
+
]
|
|
150
|
+
|
|
151
|
+
def _sync_list_tools(self) -> dict[str, Tool]:
|
|
152
|
+
"""Override _list_tools to hide internal tools when mounted."""
|
|
153
|
+
return {
|
|
154
|
+
key: tool
|
|
155
|
+
for key, tool in self._tool_manager._tools.items()
|
|
156
|
+
if not key.startswith(_INTERNAL_PREFIX)
|
|
157
|
+
}
|
|
158
|
+
|
|
159
|
+
|
|
160
|
+
__all__ = ["HiddenRouter", "MCPRouter"]
|