hud-python 0.4.47__py3-none-any.whl → 0.4.49__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of hud-python might be problematic. Click here for more details.
- hud/agents/base.py +55 -142
- hud/agents/claude.py +5 -6
- hud/agents/grounded_openai.py +1 -1
- hud/agents/misc/integration_test_agent.py +2 -0
- hud/agents/tests/test_base.py +2 -5
- hud/cli/__init__.py +80 -215
- hud/cli/build.py +105 -45
- hud/cli/dev.py +614 -743
- hud/cli/eval.py +14 -9
- hud/cli/flows/tasks.py +100 -21
- hud/cli/init.py +18 -14
- hud/cli/push.py +27 -9
- hud/cli/rl/local_runner.py +28 -16
- hud/cli/rl/vllm.py +2 -0
- hud/cli/tests/test_analyze_metadata.py +3 -2
- hud/cli/tests/test_eval.py +574 -0
- hud/cli/tests/test_mcp_server.py +6 -95
- hud/cli/tests/test_utils.py +1 -1
- hud/cli/utils/env_check.py +9 -9
- hud/cli/utils/source_hash.py +1 -1
- hud/datasets/parallel.py +0 -12
- hud/datasets/runner.py +1 -4
- hud/rl/actor.py +4 -2
- hud/rl/distributed.py +1 -1
- hud/rl/learner.py +2 -1
- hud/rl/train.py +1 -1
- hud/server/__init__.py +2 -1
- hud/server/router.py +160 -0
- hud/server/server.py +246 -79
- hud/telemetry/trace.py +1 -1
- hud/tools/base.py +20 -10
- hud/tools/computer/__init__.py +2 -0
- hud/tools/computer/qwen.py +431 -0
- hud/tools/computer/settings.py +16 -0
- hud/tools/executors/pyautogui.py +1 -1
- hud/tools/playwright.py +1 -1
- hud/types.py +2 -3
- hud/utils/hud_console.py +43 -0
- hud/utils/tests/test_version.py +1 -1
- hud/version.py +1 -1
- {hud_python-0.4.47.dist-info → hud_python-0.4.49.dist-info}/METADATA +1 -1
- {hud_python-0.4.47.dist-info → hud_python-0.4.49.dist-info}/RECORD +45 -42
- {hud_python-0.4.47.dist-info → hud_python-0.4.49.dist-info}/WHEEL +0 -0
- {hud_python-0.4.47.dist-info → hud_python-0.4.49.dist-info}/entry_points.txt +0 -0
- {hud_python-0.4.47.dist-info → hud_python-0.4.49.dist-info}/licenses/LICENSE +0 -0
hud/agents/base.py
CHANGED
|
@@ -3,6 +3,7 @@
|
|
|
3
3
|
from __future__ import annotations
|
|
4
4
|
|
|
5
5
|
import asyncio
|
|
6
|
+
import fnmatch
|
|
6
7
|
import json
|
|
7
8
|
import logging
|
|
8
9
|
from abc import ABC, abstractmethod
|
|
@@ -96,12 +97,9 @@ class MCPAgent(ABC):
|
|
|
96
97
|
self.console.set_verbose(True)
|
|
97
98
|
|
|
98
99
|
# User filtering
|
|
99
|
-
self.allowed_tools = allowed_tools
|
|
100
|
-
self.disallowed_tools = disallowed_tools
|
|
101
|
-
|
|
102
|
-
# Task filtering
|
|
103
|
-
self.agent_tools = None
|
|
104
|
-
self.lifecycle_tools = []
|
|
100
|
+
self.allowed_tools: list[str] | None = allowed_tools
|
|
101
|
+
self.disallowed_tools: list[str] | None = disallowed_tools
|
|
102
|
+
self._available_tools: list[types.Tool] | None = None
|
|
105
103
|
|
|
106
104
|
# Messages
|
|
107
105
|
self.system_prompt = system_prompt
|
|
@@ -109,7 +107,6 @@ class MCPAgent(ABC):
|
|
|
109
107
|
self.initial_screenshot = initial_screenshot
|
|
110
108
|
|
|
111
109
|
# Initialize these here so methods can be called before initialize()
|
|
112
|
-
self._available_tools: list[types.Tool] = []
|
|
113
110
|
self._tool_map: dict[str, types.Tool] = {} # Simplified: just name to tool
|
|
114
111
|
self.response_tool_name = None
|
|
115
112
|
|
|
@@ -146,37 +143,52 @@ class MCPAgent(ABC):
|
|
|
146
143
|
except Exception as e:
|
|
147
144
|
self._handle_connection_error(e)
|
|
148
145
|
|
|
149
|
-
# If task is provided, add lifecycle tools
|
|
150
|
-
if isinstance(task, Task):
|
|
151
|
-
if task.
|
|
152
|
-
self.
|
|
153
|
-
if task.
|
|
154
|
-
|
|
155
|
-
|
|
156
|
-
|
|
157
|
-
|
|
158
|
-
|
|
159
|
-
|
|
160
|
-
|
|
161
|
-
self.
|
|
162
|
-
|
|
163
|
-
|
|
164
|
-
|
|
165
|
-
|
|
166
|
-
|
|
167
|
-
|
|
168
|
-
|
|
169
|
-
|
|
170
|
-
|
|
171
|
-
|
|
172
|
-
|
|
173
|
-
|
|
174
|
-
|
|
175
|
-
|
|
176
|
-
|
|
177
|
-
|
|
178
|
-
#
|
|
179
|
-
|
|
146
|
+
# If task is provided, apply agent_config and add lifecycle tools
|
|
147
|
+
if isinstance(task, Task) and task.agent_config:
|
|
148
|
+
if task.agent_config.get("system_prompt"):
|
|
149
|
+
self.system_prompt += "\n\n" + task.agent_config["system_prompt"]
|
|
150
|
+
if "append_setup_output" in task.agent_config:
|
|
151
|
+
self.append_setup_output = task.agent_config["append_setup_output"]
|
|
152
|
+
if "initial_screenshot" in task.agent_config:
|
|
153
|
+
self.initial_screenshot = task.agent_config["initial_screenshot"]
|
|
154
|
+
if "allowed_tools" in task.agent_config:
|
|
155
|
+
# If allowed_tools has already been set, we take the intersection of the two
|
|
156
|
+
# If the list had been empty, we were allowing all tools, so we overwrite this
|
|
157
|
+
if isinstance(self.allowed_tools, list) and len(self.allowed_tools) > 0:
|
|
158
|
+
self.allowed_tools = [
|
|
159
|
+
tool
|
|
160
|
+
for tool in self.allowed_tools
|
|
161
|
+
if tool in task.agent_config["allowed_tools"]
|
|
162
|
+
]
|
|
163
|
+
else: # If allowed_tools is None, we overwrite it
|
|
164
|
+
self.allowed_tools = task.agent_config["allowed_tools"]
|
|
165
|
+
if "disallowed_tools" in task.agent_config:
|
|
166
|
+
# If disallowed_tools has already been set, we take the union of the two
|
|
167
|
+
if isinstance(self.disallowed_tools, list):
|
|
168
|
+
self.disallowed_tools.extend(task.agent_config["disallowed_tools"])
|
|
169
|
+
else: # If disallowed_tools is None, we overwrite it
|
|
170
|
+
self.disallowed_tools = task.agent_config["disallowed_tools"]
|
|
171
|
+
|
|
172
|
+
all_tools = await self.mcp_client.list_tools()
|
|
173
|
+
self._available_tools = []
|
|
174
|
+
|
|
175
|
+
# Filter tools based on allowed and disallowed patterns
|
|
176
|
+
# No allowed tools and no disallowed tools -> we accept all tools
|
|
177
|
+
# No allowed tools and disallowed tools -> we accept all tools except the disallowed ones
|
|
178
|
+
for tool in all_tools:
|
|
179
|
+
if self.allowed_tools is not None and not any(
|
|
180
|
+
fnmatch.fnmatch(tool.name, pattern) for pattern in self.allowed_tools
|
|
181
|
+
):
|
|
182
|
+
continue
|
|
183
|
+
if self.disallowed_tools is not None and any(
|
|
184
|
+
fnmatch.fnmatch(tool.name, pattern) for pattern in self.disallowed_tools
|
|
185
|
+
):
|
|
186
|
+
continue
|
|
187
|
+
self._available_tools.append(tool)
|
|
188
|
+
|
|
189
|
+
self.console.info(
|
|
190
|
+
f"Agent initialized with {len(self.get_available_tools())} tools: {', '.join([t.name for t in self.get_available_tools()])}" # noqa: E501
|
|
191
|
+
)
|
|
180
192
|
|
|
181
193
|
async def run(self, prompt_or_task: str | Task | dict[str, Any], max_steps: int = 10) -> Trace:
|
|
182
194
|
"""
|
|
@@ -575,108 +587,6 @@ class MCPAgent(ABC):
|
|
|
575
587
|
|
|
576
588
|
return await self.format_blocks(blocks)
|
|
577
589
|
|
|
578
|
-
async def _filter_tools(self) -> None:
|
|
579
|
-
"""Apply tool filtering based on allowed/disallowed lists."""
|
|
580
|
-
# Get all tools from client
|
|
581
|
-
if self.mcp_client is None:
|
|
582
|
-
raise ValueError("MCP client is not initialized")
|
|
583
|
-
|
|
584
|
-
all_tools = await self.mcp_client.list_tools()
|
|
585
|
-
|
|
586
|
-
response_tools_by_server: dict[str, str] = {} # server_name -> tool_name
|
|
587
|
-
for tool in all_tools:
|
|
588
|
-
if "response" in tool.name or tool.name == "response":
|
|
589
|
-
self.console.debug(f"Found response tool: '{tool.name}'")
|
|
590
|
-
# Extract server name from tool name (e.g., "grader_response" -> "grader")
|
|
591
|
-
if "_" in tool.name:
|
|
592
|
-
server_name = tool.name.split("_", 1)[0]
|
|
593
|
-
response_tools_by_server[server_name] = tool.name
|
|
594
|
-
else:
|
|
595
|
-
response_tools_by_server["_default"] = tool.name
|
|
596
|
-
|
|
597
|
-
# Add response tool to lifecycle tools BEFORE filtering
|
|
598
|
-
if response_tools_by_server and hasattr(self.mcp_client, "mcp_config"):
|
|
599
|
-
# Get server names in order from mcp_config
|
|
600
|
-
server_names = list(self.mcp_client.mcp_config.keys())
|
|
601
|
-
self.console.debug(f"Server names: {server_names}")
|
|
602
|
-
|
|
603
|
-
# Try to find response tool from last server first
|
|
604
|
-
response_tool_name = None
|
|
605
|
-
for server_name in reversed(server_names):
|
|
606
|
-
if server_name in response_tools_by_server:
|
|
607
|
-
response_tool_name = response_tools_by_server[server_name]
|
|
608
|
-
self.console.debug(
|
|
609
|
-
f"Found response tool '{response_tool_name}' from server '{server_name}'"
|
|
610
|
-
)
|
|
611
|
-
break
|
|
612
|
-
|
|
613
|
-
# Fallback to any response tool
|
|
614
|
-
if not response_tool_name and response_tools_by_server:
|
|
615
|
-
response_tool_name = next(iter(response_tools_by_server.values()))
|
|
616
|
-
self.console.debug(f"Using fallback response tool '{response_tool_name}'")
|
|
617
|
-
|
|
618
|
-
# Add to lifecycle tools if found
|
|
619
|
-
if response_tool_name and response_tool_name not in self.lifecycle_tools:
|
|
620
|
-
self.console.debug(f"Auto-detected '{response_tool_name}' tool as a lifecycle tool")
|
|
621
|
-
self.response_tool_name = response_tool_name
|
|
622
|
-
self.lifecycle_tools.append(response_tool_name)
|
|
623
|
-
elif response_tool_name:
|
|
624
|
-
self.console.debug(
|
|
625
|
-
f"Response tool '{response_tool_name}' already in lifecycle_tools"
|
|
626
|
-
)
|
|
627
|
-
self.response_tool_name = response_tool_name
|
|
628
|
-
else:
|
|
629
|
-
self.console.debug("No response tools found or no mcp_config")
|
|
630
|
-
|
|
631
|
-
# Filter tools
|
|
632
|
-
self._available_tools = []
|
|
633
|
-
self._tool_map = {}
|
|
634
|
-
|
|
635
|
-
self.console.debug(f"All tools: {[t.name for t in all_tools]}")
|
|
636
|
-
self.console.debug(f"Allowed tools: {self.allowed_tools}")
|
|
637
|
-
self.console.debug(f"Agent tools: {self.agent_tools}")
|
|
638
|
-
self.console.debug(f"Disallowed tools: {self.disallowed_tools}")
|
|
639
|
-
self.console.debug(f"Lifecycle tools: {self.lifecycle_tools}")
|
|
640
|
-
|
|
641
|
-
for tool in all_tools:
|
|
642
|
-
# Lifecycle tools (setup, evaluate, response) should always be included
|
|
643
|
-
is_lifecycle = tool.name in self.lifecycle_tools
|
|
644
|
-
|
|
645
|
-
# Check if tool should be included
|
|
646
|
-
if not is_lifecycle:
|
|
647
|
-
if self.allowed_tools and tool.name not in self.allowed_tools:
|
|
648
|
-
self.console.debug(f"Skipping tool '{tool.name}' - not in allowed_tools")
|
|
649
|
-
continue
|
|
650
|
-
if self.agent_tools and tool.name not in self.agent_tools:
|
|
651
|
-
self.console.debug(f"Skipping tool '{tool.name}' - not in agent_tools")
|
|
652
|
-
continue
|
|
653
|
-
if tool.name in self.disallowed_tools:
|
|
654
|
-
self.console.debug(f"Skipping tool '{tool.name}' - in disallowed_tools")
|
|
655
|
-
continue
|
|
656
|
-
|
|
657
|
-
self.console.debug(
|
|
658
|
-
f"Adding tool '{tool.name}' to available tools (lifecycle={is_lifecycle})"
|
|
659
|
-
)
|
|
660
|
-
self._available_tools.append(tool)
|
|
661
|
-
self._tool_map[tool.name] = tool
|
|
662
|
-
|
|
663
|
-
# Check if all required tools are available
|
|
664
|
-
if self.required_tools:
|
|
665
|
-
available_tool_names = {tool.name for tool in self._available_tools}
|
|
666
|
-
missing_tools = [
|
|
667
|
-
tool for tool in self.required_tools if tool not in available_tool_names
|
|
668
|
-
]
|
|
669
|
-
if missing_tools:
|
|
670
|
-
raise ValueError(
|
|
671
|
-
f"Required tools not available: {missing_tools}. "
|
|
672
|
-
f"Available tools: {list(available_tool_names)}"
|
|
673
|
-
)
|
|
674
|
-
|
|
675
|
-
available_tools = self.get_available_tools()
|
|
676
|
-
self.console.info(
|
|
677
|
-
f"Agent initialized with {len(available_tools)} tools: {', '.join([t.name for t in available_tools])}" # noqa: E501
|
|
678
|
-
)
|
|
679
|
-
|
|
680
590
|
async def _maybe_submit_response(self, response: AgentResponse, messages: list[Any]) -> None:
|
|
681
591
|
"""Submit response through lifecycle tool if available.
|
|
682
592
|
|
|
@@ -715,8 +625,11 @@ class MCPAgent(ABC):
|
|
|
715
625
|
|
|
716
626
|
def get_available_tools(self) -> list[types.Tool]:
|
|
717
627
|
"""Get list of available MCP tools for LLM use (excludes lifecycle tools)."""
|
|
718
|
-
|
|
719
|
-
|
|
628
|
+
if self._available_tools is None:
|
|
629
|
+
raise RuntimeError(
|
|
630
|
+
"Tools have not been initialized. Call initialize() before accessing available tools." # noqa: E501
|
|
631
|
+
)
|
|
632
|
+
return self._available_tools
|
|
720
633
|
|
|
721
634
|
def get_tool_schemas(self) -> list[dict]:
|
|
722
635
|
"""Get tool schemas in a format suitable for the model."""
|
hud/agents/claude.py
CHANGED
|
@@ -326,7 +326,7 @@ class ClaudeAgent(MCPAgent):
|
|
|
326
326
|
selected_computer_tool = None
|
|
327
327
|
|
|
328
328
|
for priority_name in computer_tool_priority:
|
|
329
|
-
for tool in self.
|
|
329
|
+
for tool in self.get_available_tools():
|
|
330
330
|
# Check both exact match and suffix match (for prefixed tools)
|
|
331
331
|
if tool.name == priority_name or tool.name.endswith(f"_{priority_name}"):
|
|
332
332
|
selected_computer_tool = tool
|
|
@@ -350,13 +350,12 @@ class ClaudeAgent(MCPAgent):
|
|
|
350
350
|
)
|
|
351
351
|
|
|
352
352
|
# Add other non-computer tools
|
|
353
|
-
for tool in self.
|
|
354
|
-
# Skip computer tools (already handled)
|
|
355
|
-
|
|
353
|
+
for tool in self.get_available_tools():
|
|
354
|
+
# Skip computer tools (already handled)
|
|
355
|
+
if any(
|
|
356
356
|
tool.name == priority_name or tool.name.endswith(f"_{priority_name}")
|
|
357
357
|
for priority_name in computer_tool_priority
|
|
358
|
-
)
|
|
359
|
-
if is_computer_tool or tool.name in self.lifecycle_tools:
|
|
358
|
+
):
|
|
360
359
|
continue
|
|
361
360
|
|
|
362
361
|
claude_tool = {
|
hud/agents/grounded_openai.py
CHANGED
|
@@ -169,7 +169,7 @@ class GroundedOpenAIChatAgent(GenericOpenAIChatAgent):
|
|
|
169
169
|
protected_keys = {"model", "messages", "tools", "parallel_tool_calls"}
|
|
170
170
|
extra = {k: v for k, v in (self.completion_kwargs or {}).items() if k not in protected_keys}
|
|
171
171
|
|
|
172
|
-
response = await self.oai.chat.completions.create(
|
|
172
|
+
response = await self.oai.chat.completions.create( # type: ignore
|
|
173
173
|
model=self.model_name,
|
|
174
174
|
messages=messages,
|
|
175
175
|
tools=tool_schemas,
|
|
@@ -17,6 +17,8 @@ class IntegrationTestRunner(MCPAgent):
|
|
|
17
17
|
# Initialize using base to set up client and telemetry correctly
|
|
18
18
|
await self.initialize(task)
|
|
19
19
|
|
|
20
|
+
self.console.info(f"Full system prompt: {self.system_prompt}")
|
|
21
|
+
|
|
20
22
|
# Validate task shape
|
|
21
23
|
if not getattr(task, "integration_test_tool", None):
|
|
22
24
|
raise ValueError(
|
hud/agents/tests/test_base.py
CHANGED
|
@@ -326,9 +326,6 @@ class TestBaseMCPAgent:
|
|
|
326
326
|
"""Test getting tool schemas."""
|
|
327
327
|
agent = MockMCPAgent()
|
|
328
328
|
|
|
329
|
-
# Add setup to lifecycle tools to test filtering
|
|
330
|
-
agent.lifecycle_tools = ["setup"]
|
|
331
|
-
|
|
332
329
|
agent._available_tools = [
|
|
333
330
|
types.Tool(name="tool1", description="Tool 1", inputSchema={"type": "object"}),
|
|
334
331
|
types.Tool(name="setup", description="Setup", inputSchema={"type": "object"}),
|
|
@@ -598,7 +595,7 @@ class TestMCPAgentExtended:
|
|
|
598
595
|
agent = MockAgentExtended(mcp_client=mock_client, allowed_tools=["tool1", "tool3"])
|
|
599
596
|
await agent.initialize("test")
|
|
600
597
|
|
|
601
|
-
available_names = [tool.name for tool in agent.
|
|
598
|
+
available_names = [tool.name for tool in agent.get_available_tools()]
|
|
602
599
|
assert "tool1" in available_names
|
|
603
600
|
assert "tool3" in available_names
|
|
604
601
|
assert "tool2" not in available_names
|
|
@@ -617,7 +614,7 @@ class TestMCPAgentExtended:
|
|
|
617
614
|
agent = MockAgentExtended(mcp_client=mock_client, disallowed_tools=["tool2"])
|
|
618
615
|
await agent.initialize("test")
|
|
619
616
|
|
|
620
|
-
available_names = [tool.name for tool in agent.
|
|
617
|
+
available_names = [tool.name for tool in agent.get_available_tools()]
|
|
621
618
|
assert "tool1" in available_names
|
|
622
619
|
assert "tool3" in available_names
|
|
623
620
|
assert "tool2" not in available_names
|