hud-python 0.4.18__py3-none-any.whl → 0.4.20__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of hud-python might be problematic. Click here for more details.

hud/agents/base.py CHANGED
@@ -30,9 +30,19 @@ class MCPAgent(ABC):
30
30
  """
31
31
  Base class for MCP-enabled agents.
32
32
 
33
- This class provides the foundation for agents that interact with MCP servers,
34
- handling tool discovery and filtering while leaving provider-specific
35
- implementation details to subclasses.
33
+ Provides common behavior for agents that interact with MCP servers, including:
34
+ - Client management: accepts an `AgentMCPClient` or auto-creates one at
35
+ runtime when `run()` is called with a `Task` that includes `mcp_config`.
36
+ - Tool lifecycle: discovery, filtering (`allowed_tools`, `disallowed_tools`),
37
+ and automatic marking of lifecycle tools (setup/evaluate) from a `Task`.
38
+ - Messaging: system prompt handling, optional inclusion of setup output on
39
+ the first turn, and control over initial screenshots.
40
+ - Telemetry & UX: standardized logging/printing via `HUDDesign` and optional
41
+ automatic tracing (`auto_trace`).
42
+
43
+ Subclasses implement provider-specific formatting and response fetching
44
+ by overriding these abstract methods: `get_system_messages`, `get_response`,
45
+ `format_blocks`, and `format_tool_results`.
36
46
  """
37
47
 
38
48
  metadata: dict[str, Any]
@@ -59,14 +69,23 @@ class MCPAgent(ABC):
59
69
  Initialize the base MCP agent.
60
70
 
61
71
  Args:
62
- mcp_client: AgentMCPClient instance for server connections
63
- allowed_tools: List of tool names to allow (None = all tools)
64
- disallowed_tools: List of tool names to disallow
65
- lifecycle_tools: List of tool names to use for lifecycle tools
66
- initial_screenshot: Whether to capture screenshot before first prompt
67
- system_prompt: System prompt to use
68
- append_setup_output: Whether to append setup tool output to initial messages
69
- verbose: If True, sets logging level to INFO. If False, only WARNING and above.
72
+ mcp_client: Client for connecting to MCP servers. If None, a client
73
+ is auto-created at runtime when `run()` is called with a `Task`
74
+ that provides `mcp_config`.
75
+ allowed_tools: Names of tools to allow (None means allow all).
76
+ disallowed_tools: Names of tools to always exclude.
77
+ lifecycle_tools: Tools reserved for lifecycle phases (e.g., setup,
78
+ evaluate). These are hidden from normal tool calling.
79
+ system_prompt: System prompt to seed the conversation.
80
+ append_setup_output: Whether to append setup tool output to the
81
+ first turn's messages.
82
+ initial_screenshot: Whether to include an initial screenshot before
83
+ the first prompt (when supported by the environment).
84
+ model_name: Label used in telemetry/logging to identify the model.
85
+ response_agent: Optional automation that can respond to the model's
86
+ outputs to keep the loop going (e.g., auto-continue/stop).
87
+ auto_trace: If True, automatically creates a trace/span for runs.
88
+ verbose: If True, increases logging verbosity for developer UX.
70
89
  """
71
90
 
72
91
  self.mcp_client = mcp_client
hud/agents/claude.py CHANGED
@@ -306,35 +306,49 @@ class ClaudeAgent(MCPAgent):
306
306
  """Convert MCP tools to Claude tool format."""
307
307
  claude_tools = []
308
308
  self._claude_to_mcp_tool_map = {} # Reset mapping
309
-
309
+
310
+ # Find computer tool by priority
311
+ computer_tool_priority = ["anthropic_computer", "computer_anthropic", "computer"]
312
+ selected_computer_tool = None
313
+
314
+ for priority_name in computer_tool_priority:
315
+ for tool in self._available_tools:
316
+ if tool.name == priority_name:
317
+ selected_computer_tool = tool
318
+ break
319
+ if selected_computer_tool:
320
+ break
321
+
322
+ # Add the selected computer tool if found
323
+ if selected_computer_tool:
324
+ claude_tool = {
325
+ "type": "computer_20250124",
326
+ "name": "computer",
327
+ "display_width_px": self.metadata["display_width"],
328
+ "display_height_px": self.metadata["display_height"],
329
+ }
330
+ # Map Claude's "computer" back to the actual MCP tool name
331
+ self._claude_to_mcp_tool_map["computer"] = selected_computer_tool.name
332
+ claude_tools.append(claude_tool)
333
+ logger.debug(f"Using {selected_computer_tool.name} as computer tool for Claude")
334
+
335
+ # Add other non-computer tools
310
336
  for tool in self._available_tools:
311
- # Special handling for computer use tools
312
- if tool.name in ["computer", "computer_anthropic", "anthropic_computer"]:
313
- # Use Claude's native computer use format with configurable dimensions
314
- claude_tool = {
315
- "type": "computer_20250124",
316
- "name": "computer",
317
- "display_width_px": self.metadata["display_width"],
318
- "display_height_px": self.metadata["display_height"],
319
- }
320
- # Map Claude's "computer" back to the actual MCP tool name
321
- self._claude_to_mcp_tool_map["computer"] = tool.name
322
- elif tool.name not in self.lifecycle_tools:
323
- # Convert regular tools
324
- claude_tool = {
325
- "name": tool.name,
326
- "description": tool.description or f"Execute {tool.name}",
327
- "input_schema": tool.inputSchema
328
- or {
329
- "type": "object",
330
- "properties": {},
331
- },
332
- }
333
- # Direct mapping for non-computer tools
334
- self._claude_to_mcp_tool_map[tool.name] = tool.name
335
- else:
337
+ # Skip computer tools (already handled) and lifecycle tools
338
+ if tool.name in computer_tool_priority or tool.name in self.lifecycle_tools:
336
339
  continue
337
-
340
+
341
+ claude_tool = {
342
+ "name": tool.name,
343
+ "description": tool.description or f"Execute {tool.name}",
344
+ "input_schema": tool.inputSchema
345
+ or {
346
+ "type": "object",
347
+ "properties": {},
348
+ },
349
+ }
350
+ # Direct mapping for non-computer tools
351
+ self._claude_to_mcp_tool_map[tool.name] = tool.name
338
352
  claude_tools.append(claude_tool)
339
353
 
340
354
  self.claude_tools = claude_tools
@@ -7,7 +7,7 @@ through the existing :class:`hud.agent.MCPAgent` scaffolding.
7
7
  Key points:
8
8
  - Stateless, no special server-side conversation state is assumed.
9
9
  - Accepts an :class:`openai.AsyncOpenAI` client, caller can supply their own
10
- base_url / api_key (e.g. ART, llama.cpp, together.ai, …)
10
+ base_url / api_key (e.g. llama.cpp, together.ai, …)
11
11
  - All HUD features (step_count, OTel spans, tool filtering, screenshots, …)
12
12
  come from the ``MCPAgent`` base class, we only implement the three abstract
13
13
  methods
@@ -30,8 +30,6 @@ if TYPE_CHECKING:
30
30
  from openai import AsyncOpenAI
31
31
  from openai.types.chat import ChatCompletionToolParam
32
32
 
33
- from hud.clients import AgentMCPClient
34
-
35
33
  logger = logging.getLogger(__name__)
36
34
 
37
35
 
@@ -40,19 +38,19 @@ class GenericOpenAIChatAgent(MCPAgent):
40
38
 
41
39
  def __init__(
42
40
  self,
43
- mcp_client: AgentMCPClient,
44
41
  *,
45
42
  openai_client: AsyncOpenAI,
46
43
  model_name: str = "gpt-4o-mini",
47
44
  parallel_tool_calls: bool = False,
48
- logprobs: bool = False,
45
+ completion_kwargs: dict[str, Any] | None = None,
49
46
  **agent_kwargs: Any,
50
47
  ) -> None:
51
- super().__init__(mcp_client=mcp_client, **agent_kwargs)
48
+ # Accept base-agent settings via **agent_kwargs (e.g., mcp_client, system_prompt, etc.)
49
+ super().__init__(**agent_kwargs)
52
50
  self.oai = openai_client
53
51
  self.model_name = model_name
54
52
  self.parallel_tool_calls = parallel_tool_calls
55
- self.logprobs = logprobs
53
+ self.completion_kwargs: dict[str, Any] = completion_kwargs or {}
56
54
  self.conversation_history = []
57
55
 
58
56
  @staticmethod
@@ -177,12 +175,15 @@ class GenericOpenAIChatAgent(MCPAgent):
177
175
  # Convert MCP tool schemas to OpenAI format
178
176
  mcp_schemas = self.get_tool_schemas()
179
177
 
178
+ protected_keys = {"model", "messages", "tools", "parallel_tool_calls"}
179
+ extra = {k: v for k, v in (self.completion_kwargs or {}).items() if k not in protected_keys}
180
+
180
181
  response = await self.oai.chat.completions.create(
181
182
  model=self.model_name,
182
183
  messages=messages,
183
184
  tools=cast("list[ChatCompletionToolParam]", mcp_schemas),
184
185
  parallel_tool_calls=self.parallel_tool_calls,
185
- logprobs=self.logprobs,
186
+ **extra,
186
187
  )
187
188
 
188
189
  choice = response.choices[0]
@@ -247,9 +248,7 @@ class GenericOpenAIChatAgent(MCPAgent):
247
248
  image_parts.append(
248
249
  {
249
250
  "type": "image_url",
250
- "image_url": {
251
- "url": f"data:{mime_type};base64,{data}"
252
- },
251
+ "image_url": {"url": f"data:{mime_type};base64,{data}"},
253
252
  }
254
253
  )
255
254
  elif isinstance(item, types.TextContent):
@@ -276,7 +275,7 @@ class GenericOpenAIChatAgent(MCPAgent):
276
275
  # Add a user message with the images
277
276
  content_with_images = [
278
277
  {"type": "text", "text": "Tool returned the following:"},
279
- *image_parts
278
+ *image_parts,
280
279
  ]
281
280
  rendered.append(
282
281
  {
hud/clients/base.py CHANGED
@@ -130,31 +130,19 @@ class BaseHUDClient(AgentMCPClient):
130
130
  logger.debug("Initializing MCP client...")
131
131
 
132
132
  try:
133
+ # Check if API key is set for HUD API
134
+ for server_config in self._mcp_config.values():
135
+ url = server_config.get("url", "")
136
+ headers = server_config.get("headers", {})
137
+ if "mcp.hud.so" in url and len(headers.get("Authorization", "")) < 10:
138
+ raise RuntimeError(
139
+ "Please ensure your HUD_API_KEY environment variable is set correctly."
140
+ "You can get an API key at https://app.hud.so"
141
+ )
133
142
  # Subclasses implement connection
134
143
  await self._connect(self._mcp_config)
135
- except RuntimeError as e:
136
- # Re-raise authentication errors with clear message
137
- if "Authentication failed" in str(e):
138
- raise
139
- raise
140
144
  except Exception as e:
141
- # Check for authentication errors in the exception chain
142
- error_msg = str(e)
143
- if "401" in error_msg or "Unauthorized" in error_msg:
144
- # Check if connecting to HUD API
145
- for server_config in self._mcp_config.values():
146
- url = server_config.get("url", "")
147
- if "mcp.hud.so" in url:
148
- raise RuntimeError(
149
- "Authentication failed for HUD API. "
150
- "Please ensure your HUD_API_KEY environment variable is set correctly. "
151
- "You can get an API key at https://app.hud.so"
152
- ) from e
153
- raise RuntimeError(
154
- "Authentication failed (401 Unauthorized). "
155
- "Please check your credentials or API key."
156
- ) from e
157
- raise
145
+ raise e
158
146
 
159
147
  # Common hud behavior - fetch telemetry
160
148
  await self._fetch_telemetry()
hud/datasets/task.py CHANGED
@@ -3,6 +3,7 @@
3
3
  from __future__ import annotations
4
4
 
5
5
  import json
6
+ import logging
6
7
  from collections import defaultdict
7
8
  from string import Template
8
9
  from typing import Any
@@ -12,6 +13,8 @@ from pydantic import BaseModel, Field, field_validator
12
13
  from hud.settings import settings
13
14
  from hud.types import MCPToolCall
14
15
 
16
+ logger = logging.getLogger(__name__)
17
+
15
18
 
16
19
  class Task(BaseModel):
17
20
  """
@@ -90,6 +93,8 @@ class Task(BaseModel):
90
93
 
91
94
  if settings.api_key:
92
95
  mapping["HUD_API_KEY"] = settings.api_key
96
+ else:
97
+ logger.error("HUD_API_KEY is not set, tracing and remote training will not work")
93
98
 
94
99
  def substitute_in_value(obj: Any) -> Any:
95
100
  """Recursively substitute variables in nested structures."""
hud/tools/playwright.py CHANGED
@@ -153,7 +153,7 @@ class PlaywrightTool(BaseTool):
153
153
  """Ensure browser is launched and ready."""
154
154
  if self._browser is None or not self._browser.is_connected():
155
155
  if self._cdp_url:
156
- logger.info("Connecting to remote browser via CDP: %s", self._cdp_url)
156
+ logger.info("Connecting to remote browser via CDP")
157
157
  else:
158
158
  logger.info("Launching Playwright browser...")
159
159
 
@@ -5,4 +5,4 @@ def test_import():
5
5
  """Test that the package can be imported."""
6
6
  import hud
7
7
 
8
- assert hud.__version__ == "0.4.18"
8
+ assert hud.__version__ == "0.4.20"
hud/version.py CHANGED
@@ -4,4 +4,4 @@ Version information for the HUD SDK.
4
4
 
5
5
  from __future__ import annotations
6
6
 
7
- __version__ = "0.4.18"
7
+ __version__ = "0.4.20"
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: hud-python
3
- Version: 0.4.18
3
+ Version: 0.4.20
4
4
  Summary: SDK for the HUD platform.
5
5
  Project-URL: Homepage, https://github.com/hud-evals/hud-python
6
6
  Project-URL: Bug Tracker, https://github.com/hud-evals/hud-python/issues
@@ -2,13 +2,13 @@ hud/__init__.py,sha256=BjAhZtsHbGN371Q8t3o4v4jltedkmDE85xW0yOILU9g,397
2
2
  hud/__main__.py,sha256=YR8Dq8OhINOsVfQ55PmRXXg4fEK84Rt_-rMtJ5rvhWo,145
3
3
  hud/settings.py,sha256=q9aZiHjvbL4oLE-N8AttTW4rmzS8zPMnsca-iMGyEGc,2362
4
4
  hud/types.py,sha256=gNnyS1G7aYHIR5sT3k3bOfSTFnPylUO6lNGLWbjbeYk,5149
5
- hud/version.py,sha256=8Ag1N-qzwxUt5QwVLTJ5Z43L6M6O6FLpCKva6zONOfc,105
5
+ hud/version.py,sha256=pgW9sHjEdTZlk7884zAV7kzAGXkPVC1P6p_MTwNJTSI,105
6
6
  hud/agents/__init__.py,sha256=UoIkljWdbq4bM0LD-mSaw6w826EqdEjOk7r6glNYwYQ,286
7
- hud/agents/base.py,sha256=rbwYP_a6XTwhY_5CaBlE7SWflnTq1EOuDiNY2XeUWdM,28275
8
- hud/agents/claude.py,sha256=_eD_XKZhVJ6grkHQfbS6JskztueomQcmJeGJMbfNdmE,14534
7
+ hud/agents/base.py,sha256=t3bPRTKzGuejhSeo1jLNprlUv6zNU9ezQfP16tX_pXw,29562
8
+ hud/agents/claude.py,sha256=v061ulKO4n-1dIm3iuY5E1PcEQiErFQbeKsP0GynIWA,15062
9
9
  hud/agents/langchain.py,sha256=1EgCy8jfjunsWxlPC5XfvfLS6_XZVrIF1ZjtHcrvhYw,9584
10
10
  hud/agents/openai.py,sha256=tvFYsZ5yaoLkfjMnHe-COxRttMsLRXBLPdSqgeipQRk,14257
11
- hud/agents/openai_chat_generic.py,sha256=Q6eKlKQIF2o04eGpIcBAyqpdcgRvuolbxmgWTT6ktEQ,10478
11
+ hud/agents/openai_chat_generic.py,sha256=PQAD4GGE6sHs8R95qpgDBHEbSOJ7WXCYGYFmd3Nic1g,10628
12
12
  hud/agents/misc/__init__.py,sha256=BYi4Ytp9b_vycpZFXnr5Oyw6ncKLNNGml8Jrb7bWUb4,136
13
13
  hud/agents/misc/response_agent.py,sha256=pnaomb4H-QJm1YKU3tC1YnZXxOlDbTHIXaIH-6Nkb6I,3102
14
14
  hud/agents/tests/__init__.py,sha256=W-O-_4i34d9TTyEHV-O_q1Ai1gLhzwDaaPo02_TWQIY,34
@@ -66,7 +66,7 @@ hud/cli/utils/runner.py,sha256=qZI1lFNZIFn6d919awUkMtjQ36TfhAvyqGRzQmkal8c,4269
66
66
  hud/cli/utils/server.py,sha256=uSx2DjG5vX-PFoD8zNH-gBHbkTNSHveFSVdAfmp09Tc,7341
67
67
  hud/clients/README.md,sha256=XNE3mch95ozDgVqfwCGcrhlHY9CwT1GKfNANNboowto,3826
68
68
  hud/clients/__init__.py,sha256=bcPIa7dwH5ENsjh7CzjsJ84fm7Ma93NBc2lGfSjGAKM,328
69
- hud/clients/base.py,sha256=F8wq-UGoW1J_MguHq5w_Tcr0mJ4awSWbFOE8xP7sSDA,14129
69
+ hud/clients/base.py,sha256=rWh6PbB53HRrbuVJhv1-zuLeEE0bJMWJf9zUCSoii2Q,13592
70
70
  hud/clients/fastmcp.py,sha256=KJGi8bmds0Q6rHnkTXb_Hw9ZqWmSo0OfjW05SSuyEJU,9182
71
71
  hud/clients/mcp_use.py,sha256=tgvQ5MyY1cJeCR1M7dwYMfDmPnxOQuXPjZeKCr98CJc,11962
72
72
  hud/clients/tests/__init__.py,sha256=sKOtJFFa4mDIXh1U6O8ZUHjigE8CiRMQ2PzJTIBZuVE,33
@@ -76,7 +76,7 @@ hud/clients/tests/test_protocol.py,sha256=aK4CS4g3j1D5jPo83ykzZuHUvcZFAulYtIq9T9
76
76
  hud/clients/utils/__init__.py,sha256=ucYJqOVpEsN-D9OFE2YTNLG628MgxcZAzfYhnbzx02k,32
77
77
  hud/clients/utils/retry_transport.py,sha256=Rsq25eiKKt_pM1bas78QEZvO0illK97X_3opmaS3A3w,6809
78
78
  hud/datasets/__init__.py,sha256=74T4mrjELKtE04XkZKwU8QAJcg2wjqXLqRO9s4GlPr4,678
79
- hud/datasets/task.py,sha256=V82HzRb2_c2MO9EG5ZcY-PMsLt3234Uks7WlkMta5HY,3615
79
+ hud/datasets/task.py,sha256=HjkUS6uFfQkQ1Is3fbsfw0a3pq7FBwfqcnzFVv6txZA,3776
80
80
  hud/datasets/utils.py,sha256=3hKvZTkZuCRkTeITB86nNdA1dtHZAqFfAdSPMtcTUhs,4275
81
81
  hud/datasets/execution/__init__.py,sha256=4m1AEpMQaUSJFVN_iAXvY6zFttVgZKwE6oQtC0Rrk7U,330
82
82
  hud/datasets/execution/parallel.py,sha256=4aL1XpS3vOBqZjgs0vrMZJ4eAoi86Td8C-m5SUtVxMs,25231
@@ -116,7 +116,7 @@ hud/tools/__init__.py,sha256=dT-s4zs2B5GsOZ_K2tZZLKuSIp4u3RIvNYMJ_eUpkrE,960
116
116
  hud/tools/base.py,sha256=4qm5LS3SAkrq_lyfToWYCN9tNvTHohKJNH2siHkE364,15824
117
117
  hud/tools/bash.py,sha256=LJViMGb3lTGBm_gequVVTM7ySh1Xh9bOOIZXU29Lmrw,5209
118
118
  hud/tools/edit.py,sha256=N0AYFXp07-vAJy2li7lvHOL6hfgJOU4LL3iLSZrbRWU,12745
119
- hud/tools/playwright.py,sha256=lF7NxyEu8YbB7tpmCoTf8p9HxIrejahC67x3Xs0Jjb4,15007
119
+ hud/tools/playwright.py,sha256=iyMrQ-ZKyeFia2fBp0yguXswTcXfGqdZcTXXCfUupFU,14988
120
120
  hud/tools/response.py,sha256=t6Oc8NM4u951A1XMCBaIkFyu3VNEQ8dcWURyTygfZmA,2228
121
121
  hud/tools/types.py,sha256=g-CWnUUDSxxIfUy54S1bpY1nfTzdYO1R_nPKYReABjQ,2734
122
122
  hud/tools/utils.py,sha256=bfVyYMcBOJvr1QdptCjVb6jaHVGIL5WUxmY59kzMekQ,1447
@@ -157,10 +157,10 @@ hud/utils/tests/test_init.py,sha256=2QLQSGgyP9wJhOvPCusm_zjJad0qApOZi1BXpxcdHXQ,
157
157
  hud/utils/tests/test_mcp.py,sha256=0pUa16mL-bqbZDXp5NHBnt1gO5o10BOg7zTMHZ1DNPM,4023
158
158
  hud/utils/tests/test_progress.py,sha256=QSF7Kpi03Ff_l3mAeqW9qs1nhK50j9vBiSobZq7T4f4,7394
159
159
  hud/utils/tests/test_telemetry.py,sha256=5jl7bEx8C8b-FfFUko5pf4UY-mPOR-9HaeL98dGtVHM,2781
160
- hud/utils/tests/test_version.py,sha256=Ur5o4UVJbPy4rYJUIc3yBCTK-mk9CAf_7bHv2qSPJEI,160
160
+ hud/utils/tests/test_version.py,sha256=UgaAapQpzHdJPFqy5Mhn8AT45nMCWwiy75_dhLcUlic,160
161
161
  hud/py.typed,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
162
- hud_python-0.4.18.dist-info/METADATA,sha256=vvUR4EBJmH6WqrLg2OxsupIJLs_6S8aVPaCRJjN3sJI,20287
163
- hud_python-0.4.18.dist-info/WHEEL,sha256=qtCwoSJWgHk21S1Kb4ihdzI2rlJ1ZKaIurTj_ngOhyQ,87
164
- hud_python-0.4.18.dist-info/entry_points.txt,sha256=jJbodNFg1m0-CDofe5AHvB4zKBq7sSdP97-ohaQ3ae4,63
165
- hud_python-0.4.18.dist-info/licenses/LICENSE,sha256=yIzBheVUf86FC1bztAcr7RYWWNxyd3B-UJQ3uddg1HA,1078
166
- hud_python-0.4.18.dist-info/RECORD,,
162
+ hud_python-0.4.20.dist-info/METADATA,sha256=YSbi6IhmvzoZl2h_RR_XyoHXe5caBtiUFlkEn7PjTnQ,20287
163
+ hud_python-0.4.20.dist-info/WHEEL,sha256=qtCwoSJWgHk21S1Kb4ihdzI2rlJ1ZKaIurTj_ngOhyQ,87
164
+ hud_python-0.4.20.dist-info/entry_points.txt,sha256=jJbodNFg1m0-CDofe5AHvB4zKBq7sSdP97-ohaQ3ae4,63
165
+ hud_python-0.4.20.dist-info/licenses/LICENSE,sha256=yIzBheVUf86FC1bztAcr7RYWWNxyd3B-UJQ3uddg1HA,1078
166
+ hud_python-0.4.20.dist-info/RECORD,,