hud-python 0.4.47__py3-none-any.whl → 0.4.49__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of hud-python might be problematic. Click here for more details.

Files changed (45) hide show
  1. hud/agents/base.py +55 -142
  2. hud/agents/claude.py +5 -6
  3. hud/agents/grounded_openai.py +1 -1
  4. hud/agents/misc/integration_test_agent.py +2 -0
  5. hud/agents/tests/test_base.py +2 -5
  6. hud/cli/__init__.py +80 -215
  7. hud/cli/build.py +105 -45
  8. hud/cli/dev.py +614 -743
  9. hud/cli/eval.py +14 -9
  10. hud/cli/flows/tasks.py +100 -21
  11. hud/cli/init.py +18 -14
  12. hud/cli/push.py +27 -9
  13. hud/cli/rl/local_runner.py +28 -16
  14. hud/cli/rl/vllm.py +2 -0
  15. hud/cli/tests/test_analyze_metadata.py +3 -2
  16. hud/cli/tests/test_eval.py +574 -0
  17. hud/cli/tests/test_mcp_server.py +6 -95
  18. hud/cli/tests/test_utils.py +1 -1
  19. hud/cli/utils/env_check.py +9 -9
  20. hud/cli/utils/source_hash.py +1 -1
  21. hud/datasets/parallel.py +0 -12
  22. hud/datasets/runner.py +1 -4
  23. hud/rl/actor.py +4 -2
  24. hud/rl/distributed.py +1 -1
  25. hud/rl/learner.py +2 -1
  26. hud/rl/train.py +1 -1
  27. hud/server/__init__.py +2 -1
  28. hud/server/router.py +160 -0
  29. hud/server/server.py +246 -79
  30. hud/telemetry/trace.py +1 -1
  31. hud/tools/base.py +20 -10
  32. hud/tools/computer/__init__.py +2 -0
  33. hud/tools/computer/qwen.py +431 -0
  34. hud/tools/computer/settings.py +16 -0
  35. hud/tools/executors/pyautogui.py +1 -1
  36. hud/tools/playwright.py +1 -1
  37. hud/types.py +2 -3
  38. hud/utils/hud_console.py +43 -0
  39. hud/utils/tests/test_version.py +1 -1
  40. hud/version.py +1 -1
  41. {hud_python-0.4.47.dist-info → hud_python-0.4.49.dist-info}/METADATA +1 -1
  42. {hud_python-0.4.47.dist-info → hud_python-0.4.49.dist-info}/RECORD +45 -42
  43. {hud_python-0.4.47.dist-info → hud_python-0.4.49.dist-info}/WHEEL +0 -0
  44. {hud_python-0.4.47.dist-info → hud_python-0.4.49.dist-info}/entry_points.txt +0 -0
  45. {hud_python-0.4.47.dist-info → hud_python-0.4.49.dist-info}/licenses/LICENSE +0 -0
@@ -22,7 +22,7 @@ class TestColors:
22
22
  assert Colors.YELLOW == "\033[93m"
23
23
  assert Colors.GOLD == "\033[33m"
24
24
  assert Colors.RED == "\033[91m"
25
- assert Colors.GRAY == "\033[90m"
25
+ assert Colors.GRAY == "\033[37m"
26
26
  assert Colors.ENDC == "\033[0m"
27
27
  assert Colors.BOLD == "\033[1m"
28
28
 
@@ -175,16 +175,16 @@ def ensure_built(env_dir: Path, *, interactive: bool = True) -> dict[str, Any]:
175
175
  _print_section("Added files", diffs.get("added", []))
176
176
  _print_section("Removed files", diffs.get("removed", []))
177
177
 
178
- if interactive:
179
- if hud_console.confirm("Rebuild now (runs 'hud build')?", default=True):
180
- require_docker_running()
181
- build_environment(str(env_dir), platform="linux/amd64")
182
- with open(lock_path) as f:
183
- lock_data = yaml.safe_load(f) or {}
184
- else:
185
- hud_console.hint("Continuing without rebuild; this may use an outdated image.")
178
+ # if interactive:
179
+ if hud_console.confirm("Rebuild now (runs 'hud build')?", default=True):
180
+ require_docker_running()
181
+ build_environment(str(env_dir), platform="linux/amd64")
182
+ with open(lock_path) as f:
183
+ lock_data = yaml.safe_load(f) or {}
186
184
  else:
187
- hud_console.hint("Run 'hud build' to update the image before proceeding.")
185
+ hud_console.hint("Continuing without rebuild; this may use an outdated image.")
186
+ # else:
187
+ # hud_console.hint("Run 'hud build' to update the image before proceeding.")
188
188
  elif not stored_hash:
189
189
  hud_console.dim_info(
190
190
  "Info",
@@ -41,7 +41,7 @@ EXCLUDE_FILES = {
41
41
  }
42
42
 
43
43
  INCLUDE_FILES = {"Dockerfile", "pyproject.toml"}
44
- INCLUDE_DIRS = {"controller", "environment"}
44
+ INCLUDE_DIRS = {"server", "mcp", "controller", "environment"}
45
45
 
46
46
 
47
47
  def iter_source_files(root: Path) -> Iterable[Path]:
hud/datasets/parallel.py CHANGED
@@ -261,7 +261,6 @@ async def run_dataset_parallel_manual(
261
261
  max_steps: int = 10,
262
262
  split: str = "train",
263
263
  auto_respond: bool = False,
264
- custom_system_prompt: str | None = None,
265
264
  ) -> list[Any]:
266
265
  """
267
266
  Run all tasks in a dataset using process-based parallelism with manual configuration.
@@ -282,7 +281,6 @@ async def run_dataset_parallel_manual(
282
281
  max_steps: Maximum steps per task
283
282
  split: Dataset split when loading from string
284
283
  auto_respond: Whether to use ResponseAgent
285
- custom_system_prompt: Override system prompt for all tasks
286
284
 
287
285
  Returns:
288
286
  List of results in the same order as the input dataset
@@ -349,14 +347,6 @@ async def run_dataset_parallel_manual(
349
347
  else:
350
348
  raise ValueError(f"Dataset must be string, Dataset, or list, got {type(dataset)}")
351
349
 
352
- # Apply custom system prompt if provided
353
- if custom_system_prompt:
354
- for task_dict in task_dicts:
355
- if "system_prompt" not in task_dict:
356
- task_dict["system_prompt"] = custom_system_prompt
357
- else:
358
- task_dict["system_prompt"] += "\n" + custom_system_prompt
359
-
360
350
  # Prepare job metadata
361
351
  job_metadata = metadata or {}
362
352
  job_metadata.update(
@@ -380,8 +370,6 @@ async def run_dataset_parallel_manual(
380
370
  except Exception:
381
371
  logger.warning("Failed to extract dataset verification info")
382
372
 
383
- # task_dicts = task_dicts[:10]
384
-
385
373
  # Create job context
386
374
  with hud.job(name, metadata=job_metadata, dataset_link=dataset_link) as job_obj:
387
375
  # Prepare agent class info for pickling
hud/datasets/runner.py CHANGED
@@ -27,7 +27,6 @@ async def run_dataset(
27
27
  max_steps: int = 10,
28
28
  split: str = "train",
29
29
  auto_respond: bool = False,
30
- custom_system_prompt: str | None = None,
31
30
  ) -> list[Any]:
32
31
  """
33
32
  Run all tasks in a dataset with automatic job tracking.
@@ -43,7 +42,6 @@ async def run_dataset(
43
42
  max_steps: Maximum steps per task
44
43
  split: Dataset split to use when loading from string (default: "train")
45
44
  auto_respond: Whether to use auto-response agent
46
- custom_system_prompt: Override system prompt for all tasks
47
45
 
48
46
  Returns:
49
47
  List of results from agent.run() in dataset order
@@ -102,8 +100,7 @@ async def run_dataset(
102
100
  async with sem:
103
101
  # Create trace for this task
104
102
  task_name = task_dict.get("prompt") or f"Task {index}"
105
- if custom_system_prompt and "system_prompt" not in task_dict:
106
- task_dict["system_prompt"] = custom_system_prompt
103
+
107
104
  # Ensure task_id is a string for baggage propagation
108
105
  raw_task_id = task_dict.get("id")
109
106
  safe_task_id = str(raw_task_id) if raw_task_id is not None else None
hud/rl/actor.py CHANGED
@@ -37,7 +37,7 @@ class Actor:
37
37
  # Match connection limits to parallel_episodes to avoid bottlenecks
38
38
  # Use shorter per-request timeout and keep retries modest to avoid long blocking
39
39
  http_client = create_retry_httpx_client(
40
- timeout=httpx.Timeout(30.0),
40
+ timeout=httpx.Timeout(60.0),
41
41
  )
42
42
  return AsyncOpenAI(
43
43
  base_url=base_url,
@@ -151,7 +151,9 @@ if __name__ == "__main__":
151
151
  "name": "evaluate",
152
152
  "arguments": {"name": "game_2048_max_number", "arguments": {"target": 128}},
153
153
  },
154
- "system_prompt": "You are an expert 2048 game player. Use arrow keys to reach the target tile. First take a screenshot, then make strategic moves.", # noqa: E501
154
+ "agent_config": {
155
+ "system_prompt": "You are an expert 2048 game player. Use arrow keys to reach the target tile. First take a screenshot, then make strategic moves.", # noqa: E501
156
+ },
155
157
  }
156
158
 
157
159
  task = Task(**task_data)
hud/rl/distributed.py CHANGED
@@ -81,7 +81,7 @@ def broadcast_object(obj: Any, src: int = 0) -> Any:
81
81
  return obj
82
82
 
83
83
  obj_list = [obj] if dist.get_rank() == src else [None]
84
- dist.broadcast_object_list(obj_list, src=src) #, device=torch.device("cpu"))
84
+ dist.broadcast_object_list(obj_list, src=src)
85
85
  return obj_list[0]
86
86
 
87
87
 
hud/rl/learner.py CHANGED
@@ -148,11 +148,12 @@ class GRPOLearner:
148
148
 
149
149
  # Add LoRA adapters or load existing adapter
150
150
  policy.config.use_cache = False
151
-
151
+
152
152
  if model_cfg.adapter_path:
153
153
  # Load existing adapter as baseline
154
154
  self.log(f"Loading existing LoRA adapter from: {model_cfg.adapter_path}")
155
155
  from peft import PeftModel
156
+
156
157
  policy = PeftModel.from_pretrained(policy, model_cfg.adapter_path)
157
158
  # Enable adapter training
158
159
  policy.train()
hud/rl/train.py CHANGED
@@ -95,7 +95,7 @@ async def train(config: Config, tasks: list[Task]) -> None:
95
95
  if is_main_process()
96
96
  else None
97
97
  )
98
-
98
+
99
99
  # Load initial adapter if provided
100
100
  if is_main_process() and config.model.adapter_path and vllm:
101
101
  hud_console.info(f"Loading baseline adapter from: {config.model.adapter_path}")
hud/server/__init__.py CHANGED
@@ -1,5 +1,6 @@
1
1
  from __future__ import annotations
2
2
 
3
+ from .router import MCPRouter
3
4
  from .server import MCPServer
4
5
 
5
- __all__ = ["MCPServer"]
6
+ __all__ = ["MCPRouter", "MCPServer"]
hud/server/router.py ADDED
@@ -0,0 +1,160 @@
1
+ """MCP Router utilities for FastAPI-like composition patterns."""
2
+
3
+ from __future__ import annotations
4
+
5
+ import logging
6
+ from typing import TYPE_CHECKING, Any
7
+
8
+ from hud.server import MCPServer
9
+
10
+ if TYPE_CHECKING:
11
+ from collections.abc import Callable
12
+
13
+ from fastmcp import FastMCP
14
+ from fastmcp.tools import Tool
15
+
16
+ logger = logging.getLogger(__name__)
17
+
18
+ # MCPRouter is just an alias to FastMCP for FastAPI-like patterns
19
+ MCPRouter = MCPServer
20
+
21
+ # Prefix for internal tool names
22
+ _INTERNAL_PREFIX = "int_"
23
+
24
+
25
+ class HiddenRouter(MCPRouter):
26
+ """Wraps a FastMCP router to provide a single dispatcher tool for its sub-tools.
27
+
28
+ Instead of exposing all tools at the top level, this creates a single tool
29
+ (named after the router) that dispatches to the router's tools internally.
30
+
31
+ Useful for setup/evaluate patterns where you want:
32
+ - A single 'setup' tool that can call setup_basic(), setup_advanced(), etc.
33
+ - A single 'evaluate' tool that can call evaluate_score(), evaluate_complete(), etc.
34
+
35
+ Example:
36
+ # Create a router with multiple setup functions
37
+ setup_router = MCPRouter(name="setup")
38
+
39
+ @setup_router.tool
40
+ async def reset():
41
+ return "Environment reset"
42
+
43
+ @setup_router.tool
44
+ async def seed_data():
45
+ return "Data seeded"
46
+
47
+ # Wrap in HiddenRouter
48
+ hidden_setup = HiddenRouter(setup_router)
49
+
50
+ # Now you have one 'setup' tool that dispatches to reset/seed_data
51
+ mcp.include_router(hidden_setup)
52
+ """
53
+
54
+ def __init__(
55
+ self,
56
+ router: FastMCP,
57
+ *,
58
+ title: str | None = None,
59
+ description: str | None = None,
60
+ meta: dict[str, Any] | None = None,
61
+ ) -> None:
62
+ """Wrap an existing router with a dispatcher pattern.
63
+
64
+ Args:
65
+ router: The FastMCP router to wrap
66
+ title: Optional title for the dispatcher tool (defaults to "{name} Dispatcher")
67
+ description: Optional description for the dispatcher tool
68
+ meta: Optional metadata for the dispatcher tool
69
+ """
70
+ name = router.name or "router"
71
+
72
+ # Naming scheme for hidden/internal tools
73
+ self._prefix_fn: Callable[[str], str] = lambda n: f"{_INTERNAL_PREFIX}{n}"
74
+
75
+ super().__init__(name=name)
76
+
77
+ # Set up dispatcher tool
78
+ dispatcher_title = title or f"{name.title()} Dispatcher"
79
+ dispatcher_desc = description or f"Call internal '{name}' functions"
80
+
81
+ # Register dispatcher that routes to hidden tools
82
+ async def _dispatch(
83
+ name: str,
84
+ arguments: dict | str | None = None,
85
+ ctx: Any | None = None,
86
+ ) -> Any:
87
+ """Gateway to hidden tools.
88
+
89
+ Args:
90
+ name: Internal function name (without prefix)
91
+ arguments: Arguments to forward to the internal tool (dict or JSON string)
92
+ ctx: Request context injected by FastMCP
93
+ """
94
+ # Handle JSON string inputs
95
+ if isinstance(arguments, str):
96
+ import json
97
+
98
+ try:
99
+ arguments = json.loads(arguments)
100
+ except json.JSONDecodeError:
101
+ arguments = {}
102
+
103
+ # Call the internal tool
104
+ return await self._tool_manager.call_tool(self._prefix_fn(name), arguments or {}) # type: ignore
105
+
106
+ from fastmcp.tools.tool import FunctionTool
107
+
108
+ dispatcher_tool = FunctionTool.from_function(
109
+ _dispatch,
110
+ name=name,
111
+ title=dispatcher_title,
112
+ description=dispatcher_desc,
113
+ tags=set(),
114
+ meta=meta,
115
+ )
116
+ self._tool_manager.add_tool(dispatcher_tool)
117
+
118
+ # Copy all tools from source router as hidden tools
119
+ for tool in router._tool_manager._tools.values():
120
+ tool._key = self._prefix_fn(tool.name)
121
+ self._tool_manager.add_tool(tool)
122
+
123
+ # Expose list of available functions via resource
124
+ async def _functions_catalogue() -> list[str]:
125
+ """List all internal function names without prefix."""
126
+ return [
127
+ key.removeprefix(_INTERNAL_PREFIX)
128
+ for key in self._tool_manager._tools
129
+ if key.startswith(_INTERNAL_PREFIX)
130
+ ]
131
+
132
+ from fastmcp.resources import Resource
133
+
134
+ catalogue_resource = Resource.from_function(
135
+ _functions_catalogue,
136
+ uri=f"{name}://functions",
137
+ name=f"{name.title()} Functions",
138
+ description=f"List of available {name} functions",
139
+ )
140
+ self._resource_manager.add_resource(catalogue_resource)
141
+
142
+ # Override _list_tools to hide internal tools when mounted
143
+ async def _list_tools(self) -> list[Tool]:
144
+ """Override _list_tools to hide internal tools when mounted."""
145
+ return [
146
+ tool
147
+ for key, tool in self._tool_manager._tools.items()
148
+ if not key.startswith(_INTERNAL_PREFIX)
149
+ ]
150
+
151
+ def _sync_list_tools(self) -> dict[str, Tool]:
152
+ """Override _list_tools to hide internal tools when mounted."""
153
+ return {
154
+ key: tool
155
+ for key, tool in self._tool_manager._tools.items()
156
+ if not key.startswith(_INTERNAL_PREFIX)
157
+ }
158
+
159
+
160
+ __all__ = ["HiddenRouter", "MCPRouter"]