hud-python 0.4.47__py3-none-any.whl → 0.4.49__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of hud-python might be problematic. Click here for more details.

Files changed (45) hide show
  1. hud/agents/base.py +55 -142
  2. hud/agents/claude.py +5 -6
  3. hud/agents/grounded_openai.py +1 -1
  4. hud/agents/misc/integration_test_agent.py +2 -0
  5. hud/agents/tests/test_base.py +2 -5
  6. hud/cli/__init__.py +80 -215
  7. hud/cli/build.py +105 -45
  8. hud/cli/dev.py +614 -743
  9. hud/cli/eval.py +14 -9
  10. hud/cli/flows/tasks.py +100 -21
  11. hud/cli/init.py +18 -14
  12. hud/cli/push.py +27 -9
  13. hud/cli/rl/local_runner.py +28 -16
  14. hud/cli/rl/vllm.py +2 -0
  15. hud/cli/tests/test_analyze_metadata.py +3 -2
  16. hud/cli/tests/test_eval.py +574 -0
  17. hud/cli/tests/test_mcp_server.py +6 -95
  18. hud/cli/tests/test_utils.py +1 -1
  19. hud/cli/utils/env_check.py +9 -9
  20. hud/cli/utils/source_hash.py +1 -1
  21. hud/datasets/parallel.py +0 -12
  22. hud/datasets/runner.py +1 -4
  23. hud/rl/actor.py +4 -2
  24. hud/rl/distributed.py +1 -1
  25. hud/rl/learner.py +2 -1
  26. hud/rl/train.py +1 -1
  27. hud/server/__init__.py +2 -1
  28. hud/server/router.py +160 -0
  29. hud/server/server.py +246 -79
  30. hud/telemetry/trace.py +1 -1
  31. hud/tools/base.py +20 -10
  32. hud/tools/computer/__init__.py +2 -0
  33. hud/tools/computer/qwen.py +431 -0
  34. hud/tools/computer/settings.py +16 -0
  35. hud/tools/executors/pyautogui.py +1 -1
  36. hud/tools/playwright.py +1 -1
  37. hud/types.py +2 -3
  38. hud/utils/hud_console.py +43 -0
  39. hud/utils/tests/test_version.py +1 -1
  40. hud/version.py +1 -1
  41. {hud_python-0.4.47.dist-info → hud_python-0.4.49.dist-info}/METADATA +1 -1
  42. {hud_python-0.4.47.dist-info → hud_python-0.4.49.dist-info}/RECORD +45 -42
  43. {hud_python-0.4.47.dist-info → hud_python-0.4.49.dist-info}/WHEEL +0 -0
  44. {hud_python-0.4.47.dist-info → hud_python-0.4.49.dist-info}/entry_points.txt +0 -0
  45. {hud_python-0.4.47.dist-info → hud_python-0.4.49.dist-info}/licenses/LICENSE +0 -0
hud/agents/base.py CHANGED
@@ -3,6 +3,7 @@
3
3
  from __future__ import annotations
4
4
 
5
5
  import asyncio
6
+ import fnmatch
6
7
  import json
7
8
  import logging
8
9
  from abc import ABC, abstractmethod
@@ -96,12 +97,9 @@ class MCPAgent(ABC):
96
97
  self.console.set_verbose(True)
97
98
 
98
99
  # User filtering
99
- self.allowed_tools = allowed_tools
100
- self.disallowed_tools = disallowed_tools or []
101
-
102
- # Task filtering
103
- self.agent_tools = None
104
- self.lifecycle_tools = []
100
+ self.allowed_tools: list[str] | None = allowed_tools
101
+ self.disallowed_tools: list[str] | None = disallowed_tools
102
+ self._available_tools: list[types.Tool] | None = None
105
103
 
106
104
  # Messages
107
105
  self.system_prompt = system_prompt
@@ -109,7 +107,6 @@ class MCPAgent(ABC):
109
107
  self.initial_screenshot = initial_screenshot
110
108
 
111
109
  # Initialize these here so methods can be called before initialize()
112
- self._available_tools: list[types.Tool] = []
113
110
  self._tool_map: dict[str, types.Tool] = {} # Simplified: just name to tool
114
111
  self.response_tool_name = None
115
112
 
@@ -146,37 +143,52 @@ class MCPAgent(ABC):
146
143
  except Exception as e:
147
144
  self._handle_connection_error(e)
148
145
 
149
- # If task is provided, add lifecycle tools
150
- if isinstance(task, Task):
151
- if task.agent_tools:
152
- self.agent_tools = task.agent_tools
153
- if task.setup_tool:
154
- if isinstance(task.setup_tool, list):
155
- for tool in task.setup_tool:
156
- if not self.agent_tools or (
157
- self.agent_tools and tool.name not in self.agent_tools
158
- ):
159
- self.lifecycle_tools.append(tool.name)
160
- elif not self.agent_tools or (
161
- self.agent_tools and task.setup_tool.name not in self.agent_tools
162
- ):
163
- self.lifecycle_tools.append(task.setup_tool.name)
164
- if task.evaluate_tool:
165
- if isinstance(task.evaluate_tool, list):
166
- for tool in task.evaluate_tool:
167
- if not self.agent_tools or (
168
- self.agent_tools and tool.name not in self.agent_tools
169
- ):
170
- self.lifecycle_tools.append(tool.name)
171
- elif not self.agent_tools or (
172
- self.agent_tools and task.evaluate_tool.name not in self.agent_tools
173
- ):
174
- self.lifecycle_tools.append(task.evaluate_tool.name)
175
- if task.system_prompt:
176
- self.system_prompt += "\n\n" + task.system_prompt
177
-
178
- # Re-apply filtering with updated lifecycle tools
179
- await self._filter_tools()
146
+ # If task is provided, apply agent_config and add lifecycle tools
147
+ if isinstance(task, Task) and task.agent_config:
148
+ if task.agent_config.get("system_prompt"):
149
+ self.system_prompt += "\n\n" + task.agent_config["system_prompt"]
150
+ if "append_setup_output" in task.agent_config:
151
+ self.append_setup_output = task.agent_config["append_setup_output"]
152
+ if "initial_screenshot" in task.agent_config:
153
+ self.initial_screenshot = task.agent_config["initial_screenshot"]
154
+ if "allowed_tools" in task.agent_config:
155
+ # If allowed_tools has already been set, we take the intersection of the two
156
+ # If the list had been empty, we were allowing all tools, so we overwrite this
157
+ if isinstance(self.allowed_tools, list) and len(self.allowed_tools) > 0:
158
+ self.allowed_tools = [
159
+ tool
160
+ for tool in self.allowed_tools
161
+ if tool in task.agent_config["allowed_tools"]
162
+ ]
163
+ else: # If allowed_tools is None, we overwrite it
164
+ self.allowed_tools = task.agent_config["allowed_tools"]
165
+ if "disallowed_tools" in task.agent_config:
166
+ # If disallowed_tools has already been set, we take the union of the two
167
+ if isinstance(self.disallowed_tools, list):
168
+ self.disallowed_tools.extend(task.agent_config["disallowed_tools"])
169
+ else: # If disallowed_tools is None, we overwrite it
170
+ self.disallowed_tools = task.agent_config["disallowed_tools"]
171
+
172
+ all_tools = await self.mcp_client.list_tools()
173
+ self._available_tools = []
174
+
175
+ # Filter tools based on allowed and disallowed patterns
176
+ # No allowed tools and no disallowed tools -> we accept all tools
177
+ # No allowed tools and disallowed tools -> we accept all tools except the disallowed ones
178
+ for tool in all_tools:
179
+ if self.allowed_tools is not None and not any(
180
+ fnmatch.fnmatch(tool.name, pattern) for pattern in self.allowed_tools
181
+ ):
182
+ continue
183
+ if self.disallowed_tools is not None and any(
184
+ fnmatch.fnmatch(tool.name, pattern) for pattern in self.disallowed_tools
185
+ ):
186
+ continue
187
+ self._available_tools.append(tool)
188
+
189
+ self.console.info(
190
+ f"Agent initialized with {len(self.get_available_tools())} tools: {', '.join([t.name for t in self.get_available_tools()])}" # noqa: E501
191
+ )
180
192
 
181
193
  async def run(self, prompt_or_task: str | Task | dict[str, Any], max_steps: int = 10) -> Trace:
182
194
  """
@@ -575,108 +587,6 @@ class MCPAgent(ABC):
575
587
 
576
588
  return await self.format_blocks(blocks)
577
589
 
578
- async def _filter_tools(self) -> None:
579
- """Apply tool filtering based on allowed/disallowed lists."""
580
- # Get all tools from client
581
- if self.mcp_client is None:
582
- raise ValueError("MCP client is not initialized")
583
-
584
- all_tools = await self.mcp_client.list_tools()
585
-
586
- response_tools_by_server: dict[str, str] = {} # server_name -> tool_name
587
- for tool in all_tools:
588
- if "response" in tool.name or tool.name == "response":
589
- self.console.debug(f"Found response tool: '{tool.name}'")
590
- # Extract server name from tool name (e.g., "grader_response" -> "grader")
591
- if "_" in tool.name:
592
- server_name = tool.name.split("_", 1)[0]
593
- response_tools_by_server[server_name] = tool.name
594
- else:
595
- response_tools_by_server["_default"] = tool.name
596
-
597
- # Add response tool to lifecycle tools BEFORE filtering
598
- if response_tools_by_server and hasattr(self.mcp_client, "mcp_config"):
599
- # Get server names in order from mcp_config
600
- server_names = list(self.mcp_client.mcp_config.keys())
601
- self.console.debug(f"Server names: {server_names}")
602
-
603
- # Try to find response tool from last server first
604
- response_tool_name = None
605
- for server_name in reversed(server_names):
606
- if server_name in response_tools_by_server:
607
- response_tool_name = response_tools_by_server[server_name]
608
- self.console.debug(
609
- f"Found response tool '{response_tool_name}' from server '{server_name}'"
610
- )
611
- break
612
-
613
- # Fallback to any response tool
614
- if not response_tool_name and response_tools_by_server:
615
- response_tool_name = next(iter(response_tools_by_server.values()))
616
- self.console.debug(f"Using fallback response tool '{response_tool_name}'")
617
-
618
- # Add to lifecycle tools if found
619
- if response_tool_name and response_tool_name not in self.lifecycle_tools:
620
- self.console.debug(f"Auto-detected '{response_tool_name}' tool as a lifecycle tool")
621
- self.response_tool_name = response_tool_name
622
- self.lifecycle_tools.append(response_tool_name)
623
- elif response_tool_name:
624
- self.console.debug(
625
- f"Response tool '{response_tool_name}' already in lifecycle_tools"
626
- )
627
- self.response_tool_name = response_tool_name
628
- else:
629
- self.console.debug("No response tools found or no mcp_config")
630
-
631
- # Filter tools
632
- self._available_tools = []
633
- self._tool_map = {}
634
-
635
- self.console.debug(f"All tools: {[t.name for t in all_tools]}")
636
- self.console.debug(f"Allowed tools: {self.allowed_tools}")
637
- self.console.debug(f"Agent tools: {self.agent_tools}")
638
- self.console.debug(f"Disallowed tools: {self.disallowed_tools}")
639
- self.console.debug(f"Lifecycle tools: {self.lifecycle_tools}")
640
-
641
- for tool in all_tools:
642
- # Lifecycle tools (setup, evaluate, response) should always be included
643
- is_lifecycle = tool.name in self.lifecycle_tools
644
-
645
- # Check if tool should be included
646
- if not is_lifecycle:
647
- if self.allowed_tools and tool.name not in self.allowed_tools:
648
- self.console.debug(f"Skipping tool '{tool.name}' - not in allowed_tools")
649
- continue
650
- if self.agent_tools and tool.name not in self.agent_tools:
651
- self.console.debug(f"Skipping tool '{tool.name}' - not in agent_tools")
652
- continue
653
- if tool.name in self.disallowed_tools:
654
- self.console.debug(f"Skipping tool '{tool.name}' - in disallowed_tools")
655
- continue
656
-
657
- self.console.debug(
658
- f"Adding tool '{tool.name}' to available tools (lifecycle={is_lifecycle})"
659
- )
660
- self._available_tools.append(tool)
661
- self._tool_map[tool.name] = tool
662
-
663
- # Check if all required tools are available
664
- if self.required_tools:
665
- available_tool_names = {tool.name for tool in self._available_tools}
666
- missing_tools = [
667
- tool for tool in self.required_tools if tool not in available_tool_names
668
- ]
669
- if missing_tools:
670
- raise ValueError(
671
- f"Required tools not available: {missing_tools}. "
672
- f"Available tools: {list(available_tool_names)}"
673
- )
674
-
675
- available_tools = self.get_available_tools()
676
- self.console.info(
677
- f"Agent initialized with {len(available_tools)} tools: {', '.join([t.name for t in available_tools])}" # noqa: E501
678
- )
679
-
680
590
  async def _maybe_submit_response(self, response: AgentResponse, messages: list[Any]) -> None:
681
591
  """Submit response through lifecycle tool if available.
682
592
 
@@ -715,8 +625,11 @@ class MCPAgent(ABC):
715
625
 
716
626
  def get_available_tools(self) -> list[types.Tool]:
717
627
  """Get list of available MCP tools for LLM use (excludes lifecycle tools)."""
718
- lifecycle_tool_names = self.lifecycle_tools
719
- return [tool for tool in self._available_tools if tool.name not in lifecycle_tool_names]
628
+ if self._available_tools is None:
629
+ raise RuntimeError(
630
+ "Tools have not been initialized. Call initialize() before accessing available tools." # noqa: E501
631
+ )
632
+ return self._available_tools
720
633
 
721
634
  def get_tool_schemas(self) -> list[dict]:
722
635
  """Get tool schemas in a format suitable for the model."""
hud/agents/claude.py CHANGED
@@ -326,7 +326,7 @@ class ClaudeAgent(MCPAgent):
326
326
  selected_computer_tool = None
327
327
 
328
328
  for priority_name in computer_tool_priority:
329
- for tool in self._available_tools:
329
+ for tool in self.get_available_tools():
330
330
  # Check both exact match and suffix match (for prefixed tools)
331
331
  if tool.name == priority_name or tool.name.endswith(f"_{priority_name}"):
332
332
  selected_computer_tool = tool
@@ -350,13 +350,12 @@ class ClaudeAgent(MCPAgent):
350
350
  )
351
351
 
352
352
  # Add other non-computer tools
353
- for tool in self._available_tools:
354
- # Skip computer tools (already handled) and lifecycle tools
355
- is_computer_tool = any(
353
+ for tool in self.get_available_tools():
354
+ # Skip computer tools (already handled)
355
+ if any(
356
356
  tool.name == priority_name or tool.name.endswith(f"_{priority_name}")
357
357
  for priority_name in computer_tool_priority
358
- )
359
- if is_computer_tool or tool.name in self.lifecycle_tools:
358
+ ):
360
359
  continue
361
360
 
362
361
  claude_tool = {
@@ -169,7 +169,7 @@ class GroundedOpenAIChatAgent(GenericOpenAIChatAgent):
169
169
  protected_keys = {"model", "messages", "tools", "parallel_tool_calls"}
170
170
  extra = {k: v for k, v in (self.completion_kwargs or {}).items() if k not in protected_keys}
171
171
 
172
- response = await self.oai.chat.completions.create(
172
+ response = await self.oai.chat.completions.create( # type: ignore
173
173
  model=self.model_name,
174
174
  messages=messages,
175
175
  tools=tool_schemas,
@@ -17,6 +17,8 @@ class IntegrationTestRunner(MCPAgent):
17
17
  # Initialize using base to set up client and telemetry correctly
18
18
  await self.initialize(task)
19
19
 
20
+ self.console.info(f"Full system prompt: {self.system_prompt}")
21
+
20
22
  # Validate task shape
21
23
  if not getattr(task, "integration_test_tool", None):
22
24
  raise ValueError(
@@ -326,9 +326,6 @@ class TestBaseMCPAgent:
326
326
  """Test getting tool schemas."""
327
327
  agent = MockMCPAgent()
328
328
 
329
- # Add setup to lifecycle tools to test filtering
330
- agent.lifecycle_tools = ["setup"]
331
-
332
329
  agent._available_tools = [
333
330
  types.Tool(name="tool1", description="Tool 1", inputSchema={"type": "object"}),
334
331
  types.Tool(name="setup", description="Setup", inputSchema={"type": "object"}),
@@ -598,7 +595,7 @@ class TestMCPAgentExtended:
598
595
  agent = MockAgentExtended(mcp_client=mock_client, allowed_tools=["tool1", "tool3"])
599
596
  await agent.initialize("test")
600
597
 
601
- available_names = [tool.name for tool in agent._available_tools]
598
+ available_names = [tool.name for tool in agent.get_available_tools()]
602
599
  assert "tool1" in available_names
603
600
  assert "tool3" in available_names
604
601
  assert "tool2" not in available_names
@@ -617,7 +614,7 @@ class TestMCPAgentExtended:
617
614
  agent = MockAgentExtended(mcp_client=mock_client, disallowed_tools=["tool2"])
618
615
  await agent.initialize("test")
619
616
 
620
- available_names = [tool.name for tool in agent._available_tools]
617
+ available_names = [tool.name for tool in agent.get_available_tools()]
621
618
  assert "tool1" in available_names
622
619
  assert "tool3" in available_names
623
620
  assert "tool2" not in available_names