hud-python 0.4.13__py3-none-any.whl → 0.4.15__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of hud-python might be problematic. Click here for more details.

hud/agents/base.py CHANGED
@@ -6,11 +6,12 @@ import asyncio
6
6
  import json
7
7
  import logging
8
8
  from abc import ABC, abstractmethod
9
- from typing import TYPE_CHECKING, Any, Literal
9
+ from typing import TYPE_CHECKING, Any, ClassVar, Literal
10
10
 
11
11
  import mcp.types as types
12
12
 
13
13
  from hud.types import AgentResponse, MCPToolCall, MCPToolResult, Trace
14
+ from hud.utils.design import HUDDesign
14
15
  from hud.utils.mcp import MCPConfigPatch, patch_mcp_config, setup_hud_telemetry
15
16
 
16
17
  if TYPE_CHECKING:
@@ -35,6 +36,7 @@ class MCPAgent(ABC):
35
36
  """
36
37
 
37
38
  metadata: dict[str, Any]
39
+ required_tools: ClassVar[list[str]] = [] # Tools that must be available
38
40
 
39
41
  def __init__(
40
42
  self,
@@ -51,6 +53,7 @@ class MCPAgent(ABC):
51
53
  model_name: str = "mcp-agent",
52
54
  response_agent: ResponseAgent | None = None,
53
55
  auto_trace: bool = True,
56
+ verbose: bool = False,
54
57
  ) -> None:
55
58
  """
56
59
  Initialize the base MCP agent.
@@ -63,12 +66,18 @@ class MCPAgent(ABC):
63
66
  initial_screenshot: Whether to capture screenshot before first prompt
64
67
  system_prompt: System prompt to use
65
68
  append_setup_output: Whether to append setup tool output to initial messages
69
+ verbose: If True, sets logging level to INFO. If False, only WARNING and above.
66
70
  """
67
71
 
68
72
  self.mcp_client = mcp_client
69
73
  self._auto_created_client = False # Track if we created the client
70
74
 
71
75
  self.model_name = model_name
76
+ self.design = HUDDesign(logger=logger)
77
+
78
+ # Set verbose mode if requested
79
+ if verbose:
80
+ self.design.set_verbose(True)
72
81
 
73
82
  # Filtering
74
83
  self.allowed_tools = allowed_tools
@@ -101,7 +110,7 @@ class MCPAgent(ABC):
101
110
 
102
111
  self.mcp_client = MCPClient(mcp_config=task.mcp_config)
103
112
  self._auto_created_client = True
104
- logger.info("Auto-created MCPClient from task.mcp_config")
113
+ self.design.info_log("Auto-created MCPClient from task.mcp_config")
105
114
 
106
115
  # Ensure we have a client
107
116
  if self.mcp_client is None:
@@ -112,7 +121,10 @@ class MCPAgent(ABC):
112
121
  await self._setup_config(self.mcp_client.mcp_config)
113
122
 
114
123
  # Initialize client if needed
115
- await self.mcp_client.initialize()
124
+ try:
125
+ await self.mcp_client.initialize()
126
+ except Exception as e:
127
+ self._handle_connection_error(e)
116
128
 
117
129
  # If task is provided, add lifecycle tools
118
130
  if isinstance(task, Task):
@@ -134,9 +146,9 @@ class MCPAgent(ABC):
134
146
  # Re-apply filtering with updated lifecycle tools
135
147
  await self._filter_tools()
136
148
 
137
- logger.info(
138
- "Agent initialized with %d available tools (after filtering)",
139
- len(self._available_tools),
149
+ num_tools = len(self._available_tools)
150
+ self.design.success_log(
151
+ f"Agent initialized with {num_tools} available tools (after filtering)"
140
152
  )
141
153
 
142
154
  async def run(self, prompt_or_task: str | Task | dict[str, Any], max_steps: int = 10) -> Trace:
@@ -173,6 +185,16 @@ class MCPAgent(ABC):
173
185
 
174
186
  else:
175
187
  raise TypeError(f"prompt_or_task must be str or Task, got {type(prompt_or_task)}")
188
+ except Exception as e:
189
+ if self._is_connection_error(e):
190
+ # Return error trace for connection failures
191
+ return Trace(
192
+ reward=0.0,
193
+ done=True,
194
+ content=self._get_connection_error_message(e),
195
+ isError=True,
196
+ )
197
+ raise
176
198
  finally:
177
199
  # Cleanup auto-created resources
178
200
  await self._cleanup()
@@ -200,7 +222,7 @@ class MCPAgent(ABC):
200
222
 
201
223
  # Execute the setup tool and append the initial observation to the context
202
224
  if task.setup_tool is not None:
203
- logger.info("Setting up tool phase: %s", task.setup_tool)
225
+ self.design.progress_log(f"Setting up tool phase: {task.setup_tool}")
204
226
  results = await self.call_tools(task.setup_tool)
205
227
  if any(result.isError for result in results):
206
228
  raise RuntimeError(f"{results}")
@@ -214,7 +236,7 @@ class MCPAgent(ABC):
214
236
  prompt_result = await self._run_context(start_context, max_steps=max_steps)
215
237
 
216
238
  except Exception as e:
217
- logger.error("Task execution failed: %s", e)
239
+ self.design.error_log(f"Task execution failed: {e}")
218
240
  # Create an error result but don't return yet - we still want to evaluate
219
241
  prompt_result = Trace(reward=0.0, done=True, content=str(e), isError=True)
220
242
  prompt_result.populate_from_context()
@@ -222,7 +244,7 @@ class MCPAgent(ABC):
222
244
  # Always evaluate if we have a prompt result and evaluate tool
223
245
  if prompt_result is not None and task.evaluate_tool is not None:
224
246
  try:
225
- logger.info("Evaluating tool phase: %s", task.evaluate_tool)
247
+ self.design.progress_log(f"Evaluating tool phase: {task.evaluate_tool}")
226
248
  results = await self.call_tools(task.evaluate_tool)
227
249
 
228
250
  if any(result.isError for result in results):
@@ -245,7 +267,7 @@ class MCPAgent(ABC):
245
267
  prompt_result.content = eval_content
246
268
 
247
269
  except Exception as e:
248
- logger.error("Evaluation phase failed: %s", e)
270
+ self.design.error_log(f"Evaluation phase failed: {e}")
249
271
  # Continue with the prompt result even if evaluation failed
250
272
 
251
273
  return (
@@ -276,21 +298,21 @@ class MCPAgent(ABC):
276
298
 
277
299
  # Add initial context
278
300
  messages.extend(await self.format_message(context))
279
- logger.debug("Messages: %s", messages)
301
+ self.design.debug(f"Messages: {messages}")
280
302
 
281
303
  step_count = 0
282
304
  while max_steps == -1 or step_count < max_steps:
283
305
  step_count += 1
284
306
  if max_steps == -1:
285
- logger.info("Step %s (unlimited)", step_count)
307
+ self.design.debug(f"Step {step_count} (unlimited)")
286
308
  else:
287
- logger.info("Step %s/%s", step_count, max_steps)
309
+ self.design.debug(f"Step {step_count}/{max_steps}")
288
310
 
289
311
  try:
290
312
  # 1. Get model response
291
313
  response = await self.get_response(messages)
292
314
 
293
- logger.info("Agent:\n%s", response)
315
+ self.design.debug(f"Agent:\n{response}")
294
316
 
295
317
  # Check if we should stop
296
318
  if response.done or not response.tool_calls:
@@ -302,16 +324,16 @@ class MCPAgent(ABC):
302
324
  response.content
303
325
  )
304
326
  except Exception as e:
305
- logger.warning("ResponseAgent failed: %s", e)
327
+ self.design.warning_log(f"ResponseAgent failed: {e}")
306
328
  if decision == "STOP":
307
329
  # Try to submit response through lifecycle tool
308
330
  await self._maybe_submit_response(response, messages)
309
331
 
310
- logger.info("Stopping execution")
332
+ self.design.debug("Stopping execution")
311
333
  final_response = response
312
334
  break
313
335
  else:
314
- logger.info("Continuing execution")
336
+ self.design.debug("Continuing execution")
315
337
  messages.extend(await self.format_message(decision))
316
338
  continue
317
339
 
@@ -323,19 +345,31 @@ class MCPAgent(ABC):
323
345
  tool_messages = await self.format_tool_results(tool_calls, tool_results)
324
346
  messages.extend(tool_messages)
325
347
 
348
+ # Compact step completion display
349
+ step_info = f"\n[bold]Step {step_count}"
350
+ if max_steps != -1:
351
+ step_info += f"/{max_steps}"
352
+ step_info += "[/bold]"
353
+
354
+ # Show tool calls and results in compact format
355
+ for call, result in zip(tool_calls, tool_results, strict=False):
356
+ step_info += f"\n{call}\n{result}"
357
+
358
+ self.design.info_log(step_info)
359
+
326
360
  except Exception as e:
327
- logger.error("Step failed: %s", e)
361
+ self.design.error_log(f"Step failed: {e}")
328
362
  error = str(e)
329
363
  break
330
364
 
331
365
  except KeyboardInterrupt:
332
- logger.info("Agent execution interrupted by user")
366
+ self.design.warning_log("Agent execution interrupted by user")
333
367
  error = "Interrupted by user"
334
368
  except asyncio.CancelledError:
335
- logger.info("Agent execution cancelled")
369
+ self.design.warning_log("Agent execution cancelled")
336
370
  error = "Cancelled"
337
371
  except Exception as e:
338
- logger.error("Unexpected error: %s", e)
372
+ self.design.error_log(f"Unexpected error: {e}")
339
373
  error = str(e)
340
374
 
341
375
  # Build result
@@ -376,17 +410,17 @@ class MCPAgent(ABC):
376
410
  results: list[MCPToolResult] = []
377
411
  for tc in tool_call:
378
412
  try:
379
- logger.info("Calling tool: %s", tc)
413
+ self.design.debug(f"Calling tool: {tc}")
380
414
  results.append(await self.mcp_client.call_tool(tc))
381
415
  except TimeoutError as e:
382
- logger.error("Tool execution timed out: %s", e)
416
+ self.design.error_log(f"Tool execution timed out: {e}")
383
417
  try:
384
418
  await self.mcp_client.shutdown()
385
419
  except Exception as close_err:
386
- logger.debug("Failed to close MCP client cleanly: %s", close_err)
420
+ self.design.debug(f"Failed to close MCP client cleanly: {close_err}")
387
421
  raise
388
422
  except Exception as e:
389
- logger.error("Tool execution failed: %s", e)
423
+ self.design.error_log(f"Tool execution failed: {e}")
390
424
  results.append(_format_error_result(str(e)))
391
425
  return results
392
426
 
@@ -490,9 +524,21 @@ class MCPAgent(ABC):
490
524
 
491
525
  # Auto-detect response tool as a lifecycle tool
492
526
  if tool.name == "response" and "response" not in self.lifecycle_tools:
493
- logger.debug("Auto-detected 'response' tool as a lifecycle tool")
527
+ self.design.debug("Auto-detected 'response' tool as a lifecycle tool")
494
528
  self.lifecycle_tools.append("response")
495
529
 
530
+ # Check if all required tools are available
531
+ if self.required_tools:
532
+ available_tool_names = {tool.name for tool in self._available_tools}
533
+ missing_tools = [
534
+ tool for tool in self.required_tools if tool not in available_tool_names
535
+ ]
536
+ if missing_tools:
537
+ raise ValueError(
538
+ f"Required tools not available: {missing_tools}. "
539
+ f"Available tools: {list(available_tool_names)}"
540
+ )
541
+
496
542
  async def _maybe_submit_response(self, response: AgentResponse, messages: list[Any]) -> None:
497
543
  """Submit response through lifecycle tool if available.
498
544
 
@@ -502,7 +548,7 @@ class MCPAgent(ABC):
502
548
  """
503
549
  # Check if we have a response lifecycle tool
504
550
  if "response" in self.lifecycle_tools and "response" in self._tool_map:
505
- logger.debug("Calling response lifecycle tool")
551
+ self.design.debug("Calling response lifecycle tool")
506
552
  try:
507
553
  # Call the response tool with the agent's response
508
554
  response_tool_call = MCPToolCall(
@@ -517,9 +563,9 @@ class MCPAgent(ABC):
517
563
  messages.extend(response_messages)
518
564
 
519
565
  # Mark the task as done
520
- logger.info("Response lifecycle tool executed, marking task as done")
566
+ self.design.debug("Response lifecycle tool executed, marking task as done")
521
567
  except Exception as e:
522
- logger.error("Response lifecycle tool failed: %s", e)
568
+ self.design.error_log(f"Response lifecycle tool failed: {e}")
523
569
 
524
570
  async def _setup_config(self, mcp_config: dict[str, dict[str, Any]]) -> None:
525
571
  """Inject metadata into the metadata of the initialize request."""
@@ -573,9 +619,9 @@ class MCPAgent(ABC):
573
619
  if self._auto_trace_cm:
574
620
  try:
575
621
  self._auto_trace_cm.__exit__(None, None, None)
576
- logger.info("Closed auto-created trace")
622
+ self.design.debug("Closed auto-created trace")
577
623
  except Exception as e:
578
- logger.warning("Failed to close auto-created trace: %s", e)
624
+ self.design.warning_log(f"Failed to close auto-created trace: {e}")
579
625
  finally:
580
626
  self._auto_trace_cm = None
581
627
 
@@ -583,13 +629,52 @@ class MCPAgent(ABC):
583
629
  if self._auto_created_client and self.mcp_client:
584
630
  try:
585
631
  await self.mcp_client.shutdown()
586
- logger.info("Closed auto-created MCPClient")
632
+ self.design.debug("Closed auto-created MCPClient")
587
633
  except Exception as e:
588
- logger.warning("Failed to close auto-created client: %s", e)
634
+ self.design.warning_log(f"Failed to close auto-created client: {e}")
589
635
  finally:
590
636
  self.mcp_client = None
591
637
  self._auto_created_client = False
592
638
 
639
+ def _is_connection_error(self, e: Exception) -> bool:
640
+ """Check if an exception is a connection error."""
641
+ error_msg = str(e).lower()
642
+ return any(
643
+ pattern in error_msg
644
+ for pattern in [
645
+ "connection",
646
+ "connect",
647
+ "refused",
648
+ "failed",
649
+ "could not connect",
650
+ "mcp server",
651
+ ]
652
+ )
653
+
654
+ def _get_connection_error_message(self, e: Exception) -> str:
655
+ """Extract a helpful connection error message."""
656
+ import re
657
+
658
+ url_match = re.search(r"https?://[^\s]+", str(e))
659
+ url = url_match.group(0) if url_match else "the MCP server"
660
+ return f"Connection failed: Could not connect to {url}. Is your MCP client/server running?"
661
+
662
+ def _handle_connection_error(self, e: Exception) -> None:
663
+ """Handle connection errors with helpful messages."""
664
+ if self._is_connection_error(e):
665
+ msg = self._get_connection_error_message(e)
666
+ # Always show connection errors, not just when logging is enabled
667
+ self.design.error(f"❌ {msg}")
668
+ self.design.info("💡 Make sure the MCP server is started before running the agent.")
669
+
670
+ # For localhost, provide specific instructions
671
+ error_str = str(e).lower()
672
+ if "localhost" in error_str or "127.0.0.1" in error_str:
673
+ self.design.info(" Run 'hud dev' in another terminal to start the MCP server")
674
+
675
+ raise RuntimeError(msg) from e
676
+ raise
677
+
593
678
 
594
679
  def _format_error_result(error_message: str) -> MCPToolResult:
595
680
  return MCPToolResult(content=text_to_blocks(error_message), isError=True)
hud/agents/claude.py CHANGED
@@ -50,7 +50,7 @@ class ClaudeAgent(MCPAgent):
50
50
  def __init__(
51
51
  self,
52
52
  model_client: AsyncAnthropic | None = None,
53
- model: str = "claude-3-7-sonnet-20250219",
53
+ model: str = "claude-sonnet-4-20250514",
54
54
  max_tokens: int = 4096,
55
55
  use_computer_beta: bool = True,
56
56
  **kwargs: Any,
hud/agents/openai.py CHANGED
@@ -38,6 +38,7 @@ class OperatorAgent(MCPAgent):
38
38
  "display_width": computer_settings.OPENAI_COMPUTER_WIDTH,
39
39
  "display_height": computer_settings.OPENAI_COMPUTER_HEIGHT,
40
40
  }
41
+ required_tools: ClassVar[list[str]] = ["openai_computer"]
41
42
 
42
43
  def __init__(
43
44
  self,
@@ -143,20 +144,8 @@ class OperatorAgent(MCPAgent):
143
144
  """Get response from OpenAI including any tool calls."""
144
145
  # OpenAI's API is stateful, so we handle messages differently
145
146
 
146
- # Check if we have computer tools available
147
- computer_tool_name = None
148
- for tool in self._available_tools:
149
- if tool.name in ["openai_computer", "computer"]:
150
- computer_tool_name = tool.name
151
- break
152
-
153
- if not computer_tool_name:
154
- # No computer tools available, just return a text response
155
- return AgentResponse(
156
- content="No computer use tools available",
157
- tool_calls=[],
158
- done=True,
159
- )
147
+ # Get the computer tool (guaranteed to exist due to required_tools)
148
+ computer_tool_name = "openai_computer"
160
149
 
161
150
  # Define the computer use tool
162
151
  computer_tool: ToolParam = { # type: ignore[reportAssignmentType]
@@ -209,7 +198,7 @@ class OperatorAgent(MCPAgent):
209
198
  break
210
199
 
211
200
  if not latest_screenshot:
212
- logger.warning("No screenshot provided for response to action")
201
+ self.design.warning_log("No screenshot provided for response to action")
213
202
  return AgentResponse(
214
203
  content="No screenshot available for next action",
215
204
  tool_calls=[],
@@ -332,7 +321,7 @@ class OperatorAgent(MCPAgent):
332
321
  for content in result.content:
333
322
  if isinstance(content, types.TextContent):
334
323
  # Don't add error text as input_text, just track it
335
- logger.error("Tool error: %s", content.text)
324
+ self.design.error_log(f"Tool error: {content.text}")
336
325
  elif isinstance(content, types.ImageContent):
337
326
  # Even error results might have images
338
327
  latest_screenshot = content.data
@@ -20,6 +20,15 @@ class TestOperatorAgent:
20
20
  mcp_client = AsyncMock()
21
21
  # Set up the mcp_config attribute as a regular dict, not a coroutine
22
22
  mcp_client.mcp_config = {"test_server": {"url": "http://test"}}
23
+ # Mock list_tools to return the required openai_computer tool
24
+ mcp_client.list_tools = AsyncMock(
25
+ return_value=[
26
+ types.Tool(
27
+ name="openai_computer", description="OpenAI computer use tool", inputSchema={}
28
+ )
29
+ ]
30
+ )
31
+ mcp_client.initialize = AsyncMock()
23
32
  return mcp_client
24
33
 
25
34
  @pytest.fixture
@@ -129,91 +138,27 @@ class TestOperatorAgent:
129
138
  types.Tool(name="computer_openai", description="Computer tool", inputSchema={})
130
139
  ]
131
140
 
132
- # Since OpenAI checks isinstance() on response types, we need to mock that
133
- # For now, let's just test that we get the expected "No computer use tools available"
134
- # when there are no matching tools
135
- agent._available_tools = [
136
- types.Tool(name="other_tool", description="Other tool", inputSchema={})
137
- ]
138
-
139
- messages = [{"prompt": "What's on the screen?", "screenshot": None}]
140
- response = await agent.get_response(messages)
141
-
142
- assert response.content == "No computer use tools available"
143
- assert response.tool_calls == []
144
- assert response.done is True
145
-
146
- @pytest.mark.asyncio
147
- async def test_get_model_response_text_only(self, mock_mcp_client, mock_openai):
148
- """Test getting text-only response when no computer tools available."""
149
- agent = OperatorAgent(mcp_client=mock_mcp_client, model_client=mock_openai)
141
+ # Mock OpenAI API response for a successful computer use response
142
+ mock_response = MagicMock()
143
+ mock_response.id = "response_123"
144
+ mock_response.state = "completed"
145
+ # Mock the output message structure
146
+ mock_output_text = MagicMock()
147
+ mock_output_text.type = "output_text"
148
+ mock_output_text.text = "I can see the screen content."
149
+ mock_output_message = MagicMock()
150
+ mock_output_message.type = "message"
151
+ mock_output_message.content = [mock_output_text]
152
+ mock_response.output = [mock_output_message]
150
153
 
151
- # Set up with no computer tools
152
- agent._available_tools = []
154
+ mock_openai.responses.create = AsyncMock(return_value=mock_response)
153
155
 
154
- messages = [{"prompt": "Hi", "screenshot": None}]
156
+ messages = [{"prompt": "What's on the screen?", "screenshot": None}]
155
157
  response = await agent.get_response(messages)
156
158
 
157
- assert response.content == "No computer use tools available"
158
- assert response.tool_calls == []
159
+ assert response.content == "I can see the screen content."
159
160
  assert response.done is True
160
161
 
161
- @pytest.mark.asyncio
162
- async def test_run_with_tools(self, mock_mcp_client, mock_openai):
163
- """Test running agent with tool usage."""
164
- agent = OperatorAgent(mcp_client=mock_mcp_client, model_client=mock_openai)
165
-
166
- # Mock tool availability
167
- agent._available_tools = [
168
- types.Tool(name="search", description="Search tool", inputSchema={"type": "object"})
169
- ]
170
- # Base agent doesn't require server mapping for tool execution
171
-
172
- # Mock initial response with tool use
173
- initial_choice = MagicMock()
174
- initial_choice.message = MagicMock(
175
- content=None,
176
- tool_calls=[
177
- MagicMock(
178
- id="call_search",
179
- function=MagicMock(name="search", arguments='{"query": "OpenAI news"}'),
180
- )
181
- ],
182
- )
183
-
184
- initial_response = MagicMock()
185
- initial_response.choices = [initial_choice]
186
- initial_response.usage = MagicMock(prompt_tokens=10, completion_tokens=15, total_tokens=25)
187
-
188
- # Mock follow-up response
189
- final_choice = MagicMock()
190
- final_choice.message = MagicMock(
191
- content="Here are the latest OpenAI news...", tool_calls=None
192
- )
193
-
194
- final_response = MagicMock()
195
- final_response.choices = [final_choice]
196
- final_response.usage = MagicMock(prompt_tokens=20, completion_tokens=10, total_tokens=30)
197
-
198
- mock_openai.chat.completions.create = AsyncMock(
199
- side_effect=[initial_response, final_response]
200
- )
201
-
202
- # Mock tool execution
203
- mock_mcp_client.call_tool = AsyncMock(
204
- return_value=MCPToolResult(
205
- content=[types.TextContent(type="text", text="Search results...")], isError=False
206
- )
207
- )
208
-
209
- # Use a string prompt instead of a task
210
- result = await agent.run("Search for OpenAI news")
211
-
212
- # Since OpenAI integration currently returns "No computer use tools available"
213
- # when the tool isn't a computer tool, we expect this
214
- assert result.content == "No computer use tools available"
215
- assert result.done is True
216
-
217
162
  @pytest.mark.asyncio
218
163
  async def test_handle_empty_response(self, mock_mcp_client, mock_openai):
219
164
  """Test handling empty response from API."""