hud-python 0.4.21__py3-none-any.whl → 0.4.23__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of hud-python might be problematic. Click here for more details.

Files changed (63) hide show
  1. hud/agents/base.py +37 -37
  2. hud/agents/claude.py +11 -6
  3. hud/agents/grounded_openai.py +282 -0
  4. hud/agents/misc/response_agent.py +3 -2
  5. hud/agents/openai.py +2 -2
  6. hud/agents/openai_chat_generic.py +3 -1
  7. hud/agents/tests/test_client.py +6 -1
  8. hud/agents/tests/test_grounded_openai_agent.py +155 -0
  9. hud/cli/__init__.py +34 -24
  10. hud/cli/analyze.py +27 -26
  11. hud/cli/build.py +50 -46
  12. hud/cli/debug.py +7 -7
  13. hud/cli/dev.py +107 -99
  14. hud/cli/eval.py +33 -31
  15. hud/cli/hf.py +53 -53
  16. hud/cli/init.py +28 -28
  17. hud/cli/list_func.py +22 -22
  18. hud/cli/pull.py +36 -36
  19. hud/cli/push.py +76 -74
  20. hud/cli/remove.py +42 -40
  21. hud/cli/rl/__init__.py +2 -2
  22. hud/cli/rl/init.py +41 -41
  23. hud/cli/rl/pod.py +97 -91
  24. hud/cli/rl/ssh.py +42 -40
  25. hud/cli/rl/train.py +75 -73
  26. hud/cli/rl/utils.py +10 -10
  27. hud/cli/tests/test_analyze.py +1 -1
  28. hud/cli/tests/test_analyze_metadata.py +2 -2
  29. hud/cli/tests/test_pull.py +45 -45
  30. hud/cli/tests/test_push.py +31 -29
  31. hud/cli/tests/test_registry.py +15 -15
  32. hud/cli/utils/environment.py +11 -11
  33. hud/cli/utils/interactive.py +18 -18
  34. hud/cli/utils/logging.py +12 -12
  35. hud/cli/utils/metadata.py +12 -12
  36. hud/cli/utils/registry.py +5 -5
  37. hud/cli/utils/runner.py +23 -23
  38. hud/cli/utils/server.py +16 -16
  39. hud/settings.py +6 -0
  40. hud/shared/hints.py +7 -7
  41. hud/tools/executors/tests/test_base_executor.py +1 -1
  42. hud/tools/executors/xdo.py +1 -1
  43. hud/tools/grounding/__init__.py +13 -0
  44. hud/tools/grounding/config.py +54 -0
  45. hud/tools/grounding/grounded_tool.py +314 -0
  46. hud/tools/grounding/grounder.py +302 -0
  47. hud/tools/grounding/tests/__init__.py +1 -0
  48. hud/tools/grounding/tests/test_grounded_tool.py +196 -0
  49. hud/tools/tests/test_playwright_tool.py +1 -1
  50. hud/tools/tests/test_tools_init.py +1 -1
  51. hud/tools/tests/test_utils.py +2 -2
  52. hud/types.py +4 -4
  53. hud/utils/__init__.py +3 -3
  54. hud/utils/agent_factories.py +86 -0
  55. hud/utils/{design.py → hud_console.py} +39 -33
  56. hud/utils/pretty_errors.py +6 -6
  57. hud/utils/tests/test_version.py +1 -1
  58. hud/version.py +1 -1
  59. {hud_python-0.4.21.dist-info → hud_python-0.4.23.dist-info}/METADATA +3 -1
  60. {hud_python-0.4.21.dist-info → hud_python-0.4.23.dist-info}/RECORD +63 -54
  61. {hud_python-0.4.21.dist-info → hud_python-0.4.23.dist-info}/WHEEL +0 -0
  62. {hud_python-0.4.21.dist-info → hud_python-0.4.23.dist-info}/entry_points.txt +0 -0
  63. {hud_python-0.4.21.dist-info → hud_python-0.4.23.dist-info}/licenses/LICENSE +0 -0
@@ -0,0 +1,155 @@
1
+ from __future__ import annotations
2
+
3
+ import json
4
+ from typing import Any
5
+
6
+ import mcp.types as types
7
+ import pytest
8
+
9
+ from hud.agents.grounded_openai import GroundedOpenAIChatAgent
10
+ from hud.tools.grounding import GrounderConfig
11
+ from hud.types import MCPToolCall, MCPToolResult
12
+
13
+
14
+ class DummyOpenAI:
15
+ class chat: # type: ignore[no-redef]
16
+ class completions:
17
+ @staticmethod
18
+ async def create(**kwargs: Any) -> Any:
19
+ # Return a minimal object mimicking OpenAI response
20
+ class Msg:
21
+ def __init__(self) -> None:
22
+ self.content = "Thinking..."
23
+ self.tool_calls = [
24
+ type(
25
+ "ToolCall",
26
+ (),
27
+ {
28
+ "id": "call_1",
29
+ "function": type(
30
+ "Fn",
31
+ (),
32
+ {
33
+ "name": "computer",
34
+ "arguments": json.dumps(
35
+ {
36
+ "action": "click",
37
+ "element_description": "blue button",
38
+ }
39
+ ),
40
+ },
41
+ ),
42
+ },
43
+ )()
44
+ ]
45
+
46
+ class Choice:
47
+ def __init__(self) -> None:
48
+ self.message = Msg()
49
+ self.finish_reason = "tool_calls"
50
+
51
+ class Resp:
52
+ def __init__(self) -> None:
53
+ self.choices = [Choice()]
54
+
55
+ return Resp()
56
+
57
+
58
+ class FakeMCPClient:
59
+ def __init__(self) -> None:
60
+ self.tools: list[types.Tool] = [
61
+ types.Tool(name="computer", description="", inputSchema={}),
62
+ types.Tool(name="setup", description="internal functions", inputSchema={}),
63
+ ]
64
+ self.called: list[MCPToolCall] = []
65
+
66
+ async def initialize(self, mcp_config: dict[str, dict[str, Any]] | None = None) -> None:
67
+ return None
68
+
69
+ async def list_tools(self) -> list[types.Tool]:
70
+ return self.tools
71
+
72
+ async def call_tool(self, tool_call: MCPToolCall) -> MCPToolResult:
73
+ self.called.append(tool_call)
74
+ return MCPToolResult(content=[types.TextContent(text="ok", type="text")], isError=False)
75
+
76
+ @property
77
+ def mcp_config(self) -> dict[str, dict[str, Any]]:
78
+ return {"local": {"command": "echo", "args": ["ok"]}}
79
+
80
+ async def shutdown(self) -> None:
81
+ return None
82
+
83
+ async def list_resources(self) -> list[types.Resource]: # not used here
84
+ return []
85
+
86
+ async def read_resource(self, uri: str) -> types.ReadResourceResult | None:
87
+ return None
88
+
89
+
90
+ class DummyGrounder:
91
+ async def predict_click(self, *, image_b64: str, instruction: str, max_retries: int = 3):
92
+ return (7, 9)
93
+
94
+
95
+ class DummyGroundedTool:
96
+ def __init__(self) -> None:
97
+ self.last_args: dict[str, Any] | None = None
98
+
99
+ async def __call__(self, **kwargs: Any):
100
+ self.last_args = kwargs
101
+ return [types.TextContent(text="ok", type="text")]
102
+
103
+ def get_openai_tool_schema(self) -> dict:
104
+ return {
105
+ "type": "function",
106
+ "function": {"name": "computer", "parameters": {"type": "object"}},
107
+ }
108
+
109
+
110
+ @pytest.mark.asyncio
111
+ async def test_call_tools_injects_screenshot_and_delegates(monkeypatch: pytest.MonkeyPatch) -> None:
112
+ # Agent with fake OpenAI client and fake MCP client
113
+ grounder_cfg = GrounderConfig(api_base="http://example", model="qwen")
114
+ agent = GroundedOpenAIChatAgent(
115
+ grounder_config=grounder_cfg,
116
+ openai_client=DummyOpenAI(),
117
+ model_name="gpt-4o-mini",
118
+ mcp_client=FakeMCPClient(),
119
+ initial_screenshot=False,
120
+ )
121
+
122
+ # Inject a dummy grounded tool to observe args without full initialization
123
+ dummy_tool = DummyGroundedTool()
124
+ agent.grounded_tool = dummy_tool # type: ignore
125
+
126
+ # Seed conversation history with a user image
127
+ png_b64 = (
128
+ "iVBORw0KGgoAAAANSUhEUgAAAAEAAAABCAQAAAC1HAwCAAAAC0lEQVR4nGMAAQAABQAB"
129
+ "J2n0mQAAAABJRU5ErkJggg=="
130
+ )
131
+ agent.conversation_history = [
132
+ {
133
+ "role": "user",
134
+ "content": [
135
+ {"type": "image_url", "image_url": {"url": f"data:image/png;base64,{png_b64}"}},
136
+ ],
137
+ }
138
+ ]
139
+
140
+ # Build a tool call as GroundedOpenAIChatAgent.get_response would produce
141
+ tool_call = MCPToolCall(
142
+ name="computer", arguments={"action": "click", "element_description": "blue button"}
143
+ )
144
+
145
+ results = await agent.call_tools(tool_call)
146
+
147
+ # One result returned
148
+ assert len(results) == 1 and not results[0].isError
149
+
150
+ # Grounded tool received screenshot_b64 injected
151
+ assert dummy_tool.last_args is not None
152
+ assert dummy_tool.last_args["action"] == "click"
153
+ assert dummy_tool.last_args["element_description"] == "blue button"
154
+ assert "screenshot_b64" in dummy_tool.last_args
155
+ assert isinstance(dummy_tool.last_args["screenshot_b64"], str)
hud/cli/__init__.py CHANGED
@@ -184,7 +184,7 @@ def debug(
184
184
  hud debug . --max-phase 3 # Stop after phase 3
185
185
  """
186
186
  # Import here to avoid circular imports
187
- from hud.utils.design import HUDDesign
187
+ from hud.utils.hud_console import HUDConsole
188
188
 
189
189
  from .utils.environment import (
190
190
  build_environment,
@@ -193,7 +193,7 @@ def debug(
193
193
  is_environment_directory,
194
194
  )
195
195
 
196
- design = HUDDesign()
196
+ hud_console = HUDConsole()
197
197
 
198
198
  # Determine the command to run
199
199
  command = None
@@ -227,7 +227,7 @@ def debug(
227
227
  image_name, source = get_image_name(directory)
228
228
 
229
229
  if source == "auto":
230
- design.info(f"Auto-generated image name: {image_name}")
230
+ hud_console.info(f"Auto-generated image name: {image_name}")
231
231
 
232
232
  # Build if requested or if image doesn't exist
233
233
  if build or not image_exists(image_name):
@@ -263,20 +263,20 @@ def debug(
263
263
  phases_completed = asyncio.run(debug_mcp_stdio(command, logger, max_phase=max_phase))
264
264
 
265
265
  # Show summary using design system
266
- from hud.utils.design import HUDDesign
266
+ from hud.utils.hud_console import HUDConsole
267
267
 
268
- design = HUDDesign()
268
+ hud_console = HUDConsole()
269
269
 
270
- design.info("") # Empty line
271
- design.section_title("Debug Summary")
270
+ hud_console.info("") # Empty line
271
+ hud_console.section_title("Debug Summary")
272
272
 
273
273
  if phases_completed == max_phase:
274
- design.success(f"All {max_phase} phases completed successfully!")
274
+ hud_console.success(f"All {max_phase} phases completed successfully!")
275
275
  if max_phase == 5:
276
- design.info("Your MCP server is fully functional and ready for production use.")
276
+ hud_console.info("Your MCP server is fully functional and ready for production use.")
277
277
  else:
278
- design.warning(f"Completed {phases_completed} out of {max_phase} phases")
279
- design.info("Check the errors above for troubleshooting.")
278
+ hud_console.warning(f"Completed {phases_completed} out of {max_phase} phases")
279
+ hud_console.info("Check the errors above for troubleshooting.")
280
280
 
281
281
  # Exit with appropriate code
282
282
  if phases_completed < max_phase:
@@ -831,9 +831,9 @@ def eval(
831
831
  ),
832
832
  ) -> None:
833
833
  """🚀 Run evaluation on datasets or individual tasks with agents."""
834
- from hud.utils.design import HUDDesign
834
+ from hud.utils.hud_console import HUDConsole
835
835
 
836
- design = HUDDesign()
836
+ hud_console = HUDConsole()
837
837
 
838
838
  # If no source provided, look for task/eval JSON files in current directory
839
839
  if source is None:
@@ -863,30 +863,30 @@ def eval(
863
863
  json_files = sorted(set(json_files))
864
864
 
865
865
  if not json_files:
866
- design.error(
866
+ hud_console.error(
867
867
  "No source provided and no task/eval JSON files found in current directory"
868
868
  )
869
- design.info(
869
+ hud_console.info(
870
870
  "Usage: hud eval <source> or create a task JSON file "
871
871
  "(e.g., task.json, eval_config.json)"
872
872
  )
873
873
  raise typer.Exit(1)
874
874
  elif len(json_files) == 1:
875
875
  source = str(json_files[0])
876
- design.info(f"Found task file: {source}")
876
+ hud_console.info(f"Found task file: {source}")
877
877
  else:
878
878
  # Multiple files found, let user choose
879
- design.info("Multiple task files found:")
880
- file_choice = design.select(
879
+ hud_console.info("Multiple task files found:")
880
+ file_choice = hud_console.select(
881
881
  "Select a task file to run:",
882
882
  choices=[str(f) for f in json_files],
883
883
  )
884
884
  source = file_choice
885
- design.success(f"Selected: {source}")
885
+ hud_console.success(f"Selected: {source}")
886
886
 
887
887
  # If no agent specified, prompt for selection
888
888
  if agent is None:
889
- agent = design.select(
889
+ agent = hud_console.select(
890
890
  "Select an agent to use:",
891
891
  choices=[
892
892
  {"name": "Claude 4 Sonnet", "value": "claude"},
@@ -898,14 +898,14 @@ def eval(
898
898
  # Validate agent choice
899
899
  valid_agents = ["claude", "openai"]
900
900
  if agent not in valid_agents:
901
- design.error(f"Invalid agent: {agent}. Must be one of: {', '.join(valid_agents)}")
901
+ hud_console.error(f"Invalid agent: {agent}. Must be one of: {', '.join(valid_agents)}")
902
902
  raise typer.Exit(1)
903
903
 
904
904
  # Import eval_command lazily to avoid importing agent dependencies
905
905
  try:
906
906
  from .eval import eval_command
907
907
  except ImportError as e:
908
- design.error(
908
+ hud_console.error(
909
909
  "Evaluation dependencies are not installed. "
910
910
  "Please install with: pip install 'hud-python[agent]'"
911
911
  )
@@ -962,6 +962,16 @@ def hf(
962
962
 
963
963
  def main() -> None:
964
964
  """Main entry point for the CLI."""
965
+ # Handle --version flag before Typer parses args
966
+ if "--version" in sys.argv:
967
+ try:
968
+ from hud import __version__
969
+
970
+ console.print(f"HUD CLI version: [cyan]{__version__}[/cyan]")
971
+ except ImportError:
972
+ console.print("HUD CLI version: [cyan]unknown[/cyan]")
973
+ return
974
+
965
975
  try:
966
976
  # Show header for main help
967
977
  if len(sys.argv) == 1 or (len(sys.argv) == 2 and sys.argv[1] in ["--help", "-h"]):
@@ -995,9 +1005,9 @@ def main() -> None:
995
1005
  except Exception:
996
1006
  exit_code = 1
997
1007
  if exit_code != 0:
998
- from hud.utils.design import design
1008
+ from hud.utils.hud_console import hud_console
999
1009
 
1000
- design.info(SUPPORT_HINT)
1010
+ hud_console.info(SUPPORT_HINT)
1001
1011
  raise
1002
1012
  except Exception:
1003
1013
  raise
hud/cli/analyze.py CHANGED
@@ -13,10 +13,10 @@ from rich.table import Table
13
13
  from rich.tree import Tree
14
14
 
15
15
  from hud.clients import MCPClient
16
- from hud.utils.design import HUDDesign
16
+ from hud.utils.hud_console import HUDConsole
17
17
 
18
18
  console = Console()
19
- design = HUDDesign()
19
+ hud_console = HUDConsole()
20
20
 
21
21
 
22
22
  def parse_docker_command(docker_cmd: list[str]) -> dict:
@@ -28,14 +28,14 @@ def parse_docker_command(docker_cmd: list[str]) -> dict:
28
28
 
29
29
  async def analyze_environment(docker_cmd: list[str], output_format: str, verbose: bool) -> None:
30
30
  """Analyze MCP environment and display results."""
31
- design.header("MCP Environment Analysis", icon="🔍")
31
+ hud_console.header("MCP Environment Analysis", icon="🔍")
32
32
 
33
33
  # Convert Docker command to MCP config
34
34
  mcp_config = parse_docker_command(docker_cmd)
35
35
 
36
36
  # Display command being analyzed
37
- design.dim_info("Command:", " ".join(docker_cmd))
38
- design.info("") # Empty line
37
+ hud_console.dim_info("Command:", " ".join(docker_cmd))
38
+ hud_console.info("") # Empty line
39
39
 
40
40
  # Create client
41
41
  with Progress(
@@ -85,9 +85,9 @@ async def analyze_environment(docker_cmd: list[str], output_format: str, verbose
85
85
  def display_interactive(analysis: dict) -> None:
86
86
  """Display analysis results in interactive format."""
87
87
  # Server metadata
88
- design.section_title("📊 Environment Overview")
88
+ hud_console.section_title("📊 Environment Overview")
89
89
  meta_table = Table(show_header=False, box=None)
90
- meta_table.add_column("Property", style="dim")
90
+ meta_table.add_column("Property", style="bright_black")
91
91
  meta_table.add_column("Value")
92
92
 
93
93
  # Check if this is a live analysis (has metadata) or metadata-only analysis
@@ -126,19 +126,19 @@ def display_interactive(analysis: dict) -> None:
126
126
  console.print(meta_table)
127
127
 
128
128
  # Tools
129
- design.section_title("🔧 Available Tools")
130
- tools_tree = Tree("Tools")
129
+ hud_console.section_title("🔧 Available Tools")
130
+ tools_tree = Tree("[bold bright_white]Tools[/bold bright_white]")
131
131
 
132
132
  # Check if we have hub_tools info (live analysis) or not (metadata-only)
133
133
  if "hub_tools" in analysis:
134
134
  # Live analysis format - separate regular and hub tools
135
135
  # Regular tools
136
- regular_tools = tools_tree.add("Regular Tools")
136
+ regular_tools = tools_tree.add("[bright_white]Regular Tools[/bright_white]")
137
137
  for tool in analysis["tools"]:
138
138
  if tool["name"] not in analysis["hub_tools"]:
139
- tool_node = regular_tools.add(f"[default]{tool['name']}[/default]")
139
+ tool_node = regular_tools.add(f"[bright_white]{tool['name']}[/bright_white]")
140
140
  if tool["description"]:
141
- tool_node.add(f"[dim]{tool['description']}[/dim]")
141
+ tool_node.add(f"[bright_black]{tool['description']}[/bright_black]")
142
142
 
143
143
  # Show input schema if verbose
144
144
  if analysis.get("verbose") and tool.get("input_schema"):
@@ -148,17 +148,17 @@ def display_interactive(analysis: dict) -> None:
148
148
 
149
149
  # Hub tools
150
150
  if analysis["hub_tools"]:
151
- hub_tools = tools_tree.add("Hub Tools")
151
+ hub_tools = tools_tree.add("[bright_white]Hub Tools[/bright_white]")
152
152
  for hub_name, functions in analysis["hub_tools"].items():
153
- hub_node = hub_tools.add(f"[yellow]{hub_name}[/yellow]")
153
+ hub_node = hub_tools.add(f"[rgb(181,137,0)]{hub_name}[/rgb(181,137,0)]")
154
154
  for func in functions:
155
- hub_node.add(f"[default]{func}[/default]")
155
+ hub_node.add(f"[bright_white]{func}[/bright_white]")
156
156
  else:
157
157
  # Metadata-only format - just list all tools
158
158
  for tool in analysis["tools"]:
159
- tool_node = tools_tree.add(f"[default]{tool['name']}[/default]")
159
+ tool_node = tools_tree.add(f"[bright_white]{tool['name']}[/bright_white]")
160
160
  if tool.get("description"):
161
- tool_node.add(f"[dim]{tool['description']}[/dim]")
161
+ tool_node.add(f"[bright_black]{tool['description']}[/bright_black]")
162
162
 
163
163
  # Show input schema if verbose
164
164
  if tool.get("inputSchema"):
@@ -170,11 +170,11 @@ def display_interactive(analysis: dict) -> None:
170
170
 
171
171
  # Resources
172
172
  if analysis["resources"]:
173
- design.section_title("📚 Available Resources")
173
+ hud_console.section_title("📚 Available Resources")
174
174
  resources_table = Table()
175
- resources_table.add_column("URI", style="default")
176
- resources_table.add_column("Name", style="white")
177
- resources_table.add_column("Type", style="dim")
175
+ resources_table.add_column("URI", style="bright_white")
176
+ resources_table.add_column("Name", style="bright_white")
177
+ resources_table.add_column("Type", style="bright_black")
178
178
 
179
179
  for resource in analysis["resources"][:10]:
180
180
  resources_table.add_row(
@@ -184,11 +184,12 @@ def display_interactive(analysis: dict) -> None:
184
184
  console.print(resources_table)
185
185
 
186
186
  if len(analysis["resources"]) > 10:
187
- console.print(f"[dim]... and {len(analysis['resources']) - 10} more resources[/dim]")
187
+ remaining = len(analysis["resources"]) - 10
188
+ console.print(f"[bright_black]... and {remaining} more resources[/bright_black]")
188
189
 
189
190
  # Telemetry (only for live analysis)
190
191
  if analysis.get("telemetry"):
191
- design.section_title("📡 Telemetry Data")
192
+ hud_console.section_title("📡 Telemetry Data")
192
193
  telemetry_table = Table(show_header=False, box=None)
193
194
  telemetry_table.add_column("Key", style="dim")
194
195
  telemetry_table.add_column("Value")
@@ -206,7 +207,7 @@ def display_interactive(analysis: dict) -> None:
206
207
 
207
208
  # Environment variables (for metadata-only analysis)
208
209
  if analysis.get("env_vars"):
209
- design.section_title("🔑 Environment Variables")
210
+ hud_console.section_title("🔑 Environment Variables")
210
211
  env_table = Table(show_header=False, box=None)
211
212
  env_table.add_column("Type", style="dim")
212
213
  env_table.add_column("Variables")
@@ -309,7 +310,7 @@ async def analyze_environment_from_config(
309
310
  config_path: Path, output_format: str, verbose: bool
310
311
  ) -> None:
311
312
  """Analyze MCP environment from a JSON config file."""
312
- design.header("MCP Environment Analysis", icon="🔍")
313
+ hud_console.header("MCP Environment Analysis", icon="🔍")
313
314
 
314
315
  # Load config from file
315
316
  try:
@@ -327,7 +328,7 @@ async def analyze_environment_from_mcp_config(
327
328
  mcp_config: dict[str, Any], output_format: str, verbose: bool
328
329
  ) -> None:
329
330
  """Analyze MCP environment from MCP config dict."""
330
- design.header("MCP Environment Analysis", icon="🔍")
331
+ hud_console.header("MCP Environment Analysis", icon="🔍")
331
332
  await _analyze_with_config(mcp_config, output_format, verbose)
332
333
 
333
334