hud-python 0.4.22__py3-none-any.whl → 0.4.24__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of hud-python might be problematic. Click here for more details.

Files changed (53) hide show
  1. hud/agents/base.py +85 -59
  2. hud/agents/claude.py +5 -1
  3. hud/agents/grounded_openai.py +3 -1
  4. hud/agents/misc/response_agent.py +3 -2
  5. hud/agents/openai.py +2 -2
  6. hud/agents/openai_chat_generic.py +3 -1
  7. hud/cli/__init__.py +34 -24
  8. hud/cli/analyze.py +27 -26
  9. hud/cli/build.py +50 -46
  10. hud/cli/debug.py +7 -7
  11. hud/cli/dev.py +107 -99
  12. hud/cli/eval.py +31 -29
  13. hud/cli/hf.py +53 -53
  14. hud/cli/init.py +28 -28
  15. hud/cli/list_func.py +22 -22
  16. hud/cli/pull.py +36 -36
  17. hud/cli/push.py +76 -74
  18. hud/cli/remove.py +42 -40
  19. hud/cli/rl/__init__.py +2 -2
  20. hud/cli/rl/init.py +41 -41
  21. hud/cli/rl/pod.py +97 -91
  22. hud/cli/rl/ssh.py +42 -40
  23. hud/cli/rl/train.py +75 -73
  24. hud/cli/rl/utils.py +10 -10
  25. hud/cli/tests/test_analyze.py +1 -1
  26. hud/cli/tests/test_analyze_metadata.py +2 -2
  27. hud/cli/tests/test_pull.py +45 -45
  28. hud/cli/tests/test_push.py +31 -29
  29. hud/cli/tests/test_registry.py +15 -15
  30. hud/cli/utils/environment.py +11 -11
  31. hud/cli/utils/interactive.py +17 -17
  32. hud/cli/utils/logging.py +12 -12
  33. hud/cli/utils/metadata.py +12 -12
  34. hud/cli/utils/registry.py +5 -5
  35. hud/cli/utils/runner.py +23 -23
  36. hud/cli/utils/server.py +16 -16
  37. hud/clients/mcp_use.py +19 -5
  38. hud/clients/utils/__init__.py +25 -0
  39. hud/clients/utils/retry.py +186 -0
  40. hud/datasets/execution/parallel.py +71 -46
  41. hud/shared/hints.py +7 -7
  42. hud/tools/grounding/grounder.py +2 -1
  43. hud/types.py +4 -4
  44. hud/utils/__init__.py +3 -3
  45. hud/utils/{design.py → hud_console.py} +39 -33
  46. hud/utils/pretty_errors.py +6 -6
  47. hud/utils/tests/test_version.py +1 -1
  48. hud/version.py +1 -1
  49. {hud_python-0.4.22.dist-info → hud_python-0.4.24.dist-info}/METADATA +3 -1
  50. {hud_python-0.4.22.dist-info → hud_python-0.4.24.dist-info}/RECORD +53 -52
  51. {hud_python-0.4.22.dist-info → hud_python-0.4.24.dist-info}/WHEEL +0 -0
  52. {hud_python-0.4.22.dist-info → hud_python-0.4.24.dist-info}/entry_points.txt +0 -0
  53. {hud_python-0.4.22.dist-info → hud_python-0.4.24.dist-info}/licenses/LICENSE +0 -0
hud/clients/mcp_use.py CHANGED
@@ -15,6 +15,7 @@ from hud.types import MCPToolCall, MCPToolResult
15
15
  from hud.version import __version__ as hud_version
16
16
 
17
17
  from .base import BaseHUDClient
18
+ from .utils.retry import retry_with_backoff
18
19
 
19
20
  logger = logging.getLogger(__name__)
20
21
 
@@ -127,8 +128,11 @@ class MCPUseHUDClient(BaseHUDClient):
127
128
  logger.warning("Client session not initialized for %s", server_name)
128
129
  continue
129
130
 
130
- # List tools
131
- tools_result = await session.connector.client_session.list_tools()
131
+ # List tools with retry logic for HTTP errors
132
+ tools_result = await retry_with_backoff(
133
+ session.connector.client_session.list_tools,
134
+ operation_name=f"list_tools_{server_name}",
135
+ )
132
136
 
133
137
  logger.info(
134
138
  "Discovered %d tools from '%s': %s",
@@ -202,9 +206,12 @@ class MCPUseHUDClient(BaseHUDClient):
202
206
  if session.connector.client_session is None:
203
207
  raise ValueError(f"Client session not initialized for {server_name}")
204
208
 
205
- result = await session.connector.client_session.call_tool(
209
+ # Call tool with retry logic for HTTP errors (502, 503, 504)
210
+ result = await retry_with_backoff(
211
+ session.connector.client_session.call_tool,
206
212
  name=original_tool.name, # Use original tool name, not prefixed
207
213
  arguments=tool_call.arguments or {},
214
+ operation_name=f"call_tool_{original_tool.name}",
208
215
  )
209
216
 
210
217
  if self.verbose:
@@ -232,7 +239,10 @@ class MCPUseHUDClient(BaseHUDClient):
232
239
  continue
233
240
  # Prefer standard method name if available
234
241
  if hasattr(session.connector.client_session, "list_resources"):
235
- resources = await session.connector.client_session.list_resources()
242
+ resources = await retry_with_backoff(
243
+ session.connector.client_session.list_resources,
244
+ operation_name=f"list_resources_{server_name}",
245
+ )
236
246
  else:
237
247
  # If the client doesn't support resource listing, skip
238
248
  continue
@@ -262,7 +272,11 @@ class MCPUseHUDClient(BaseHUDClient):
262
272
  resource_uri = AnyUrl(uri) if isinstance(uri, str) else uri
263
273
  # Prefer read_resource; fall back to list_resources if needed
264
274
  if hasattr(session.connector.client_session, "read_resource"):
265
- result = await session.connector.client_session.read_resource(resource_uri)
275
+ result = await retry_with_backoff(
276
+ session.connector.client_session.read_resource,
277
+ resource_uri,
278
+ operation_name=f"read_resource_{server_name}",
279
+ )
266
280
  else:
267
281
  # Fallback path for older clients: not supported in strict typing
268
282
  raise AttributeError("read_resource not available")
@@ -1 +1,26 @@
1
1
  """HUD MCP client utilities."""
2
+
3
+ from __future__ import annotations
4
+
5
+ from .retry import (
6
+ DEFAULT_BACKOFF_FACTOR,
7
+ DEFAULT_MAX_RETRIES,
8
+ DEFAULT_RETRY_DELAY,
9
+ DEFAULT_RETRY_STATUS_CODES,
10
+ is_retryable_error,
11
+ retry_with_backoff,
12
+ with_retry,
13
+ )
14
+ from .retry_transport import RetryTransport, create_retry_httpx_client
15
+
16
+ __all__ = [
17
+ "DEFAULT_BACKOFF_FACTOR",
18
+ "DEFAULT_MAX_RETRIES",
19
+ "DEFAULT_RETRY_DELAY",
20
+ "DEFAULT_RETRY_STATUS_CODES",
21
+ "RetryTransport",
22
+ "create_retry_httpx_client",
23
+ "is_retryable_error",
24
+ "retry_with_backoff",
25
+ "with_retry",
26
+ ]
@@ -0,0 +1,186 @@
1
+ """Shared retry utilities for MCP client operations."""
2
+
3
+ from __future__ import annotations
4
+
5
+ import asyncio
6
+ import logging
7
+ from functools import wraps
8
+ from typing import TYPE_CHECKING, Any, TypeVar
9
+
10
+ if TYPE_CHECKING:
11
+ from collections.abc import Callable
12
+
13
+ from httpx import HTTPStatusError
14
+ from mcp.shared.exceptions import McpError
15
+
16
+ logger = logging.getLogger(__name__)
17
+
18
+ T = TypeVar("T")
19
+
20
+ # Default retry configuration matching requests.py
21
+ DEFAULT_MAX_RETRIES = 4
22
+ DEFAULT_RETRY_DELAY = 2.0
23
+ DEFAULT_RETRY_STATUS_CODES = {502, 503, 504}
24
+ DEFAULT_BACKOFF_FACTOR = 2.0
25
+
26
+
27
+ def is_retryable_error(error: Exception, retry_status_codes: set[int]) -> bool:
28
+ """
29
+ Check if an error is retryable based on status codes.
30
+
31
+ Args:
32
+ error: The exception to check
33
+ retry_status_codes: Set of HTTP status codes to retry on
34
+
35
+ Returns:
36
+ True if the error is retryable, False otherwise
37
+ """
38
+ # Check for HTTP status errors with retryable status codes
39
+ if isinstance(error, HTTPStatusError):
40
+ return error.response.status_code in retry_status_codes
41
+
42
+ # Check for MCP errors that might wrap HTTP errors
43
+ if isinstance(error, McpError):
44
+ error_msg = str(error).lower()
45
+ # Check for common gateway error patterns in the message
46
+ for code in retry_status_codes:
47
+ if str(code) in error_msg:
48
+ return True
49
+ # Check for gateway error keywords
50
+ if any(
51
+ keyword in error_msg
52
+ for keyword in ["bad gateway", "service unavailable", "gateway timeout"]
53
+ ):
54
+ return True
55
+
56
+ # Check for generic errors with status codes in the message
57
+ error_msg = str(error)
58
+ for code in retry_status_codes:
59
+ if f"{code}" in error_msg or f"status {code}" in error_msg.lower():
60
+ return True
61
+
62
+ return False
63
+
64
+
65
+ async def retry_with_backoff(
66
+ func: Callable[..., Any],
67
+ *args: Any,
68
+ max_retries: int = DEFAULT_MAX_RETRIES,
69
+ retry_delay: float = DEFAULT_RETRY_DELAY,
70
+ retry_status_codes: set[int] | None = None,
71
+ backoff_factor: float = DEFAULT_BACKOFF_FACTOR,
72
+ operation_name: str | None = None,
73
+ **kwargs: Any,
74
+ ) -> Any:
75
+ """
76
+ Execute an async function with retry logic and exponential backoff.
77
+
78
+ This matches the retry behavior in requests.py but can be applied
79
+ to any async function, particularly MCP client operations.
80
+
81
+ Args:
82
+ func: The async function to retry
83
+ *args: Positional arguments for the function
84
+ max_retries: Maximum number of retry attempts
85
+ retry_delay: Initial delay between retries in seconds
86
+ retry_status_codes: HTTP status codes to retry on
87
+ backoff_factor: Multiplier for exponential backoff
88
+ operation_name: Name of the operation for logging
89
+ **kwargs: Keyword arguments for the function
90
+
91
+ Returns:
92
+ The result of the function call
93
+
94
+ Raises:
95
+ The last exception if all retries are exhausted
96
+ """
97
+ if retry_status_codes is None:
98
+ retry_status_codes = DEFAULT_RETRY_STATUS_CODES
99
+
100
+ operation = operation_name or func.__name__
101
+ last_error = None
102
+
103
+ for attempt in range(max_retries + 1):
104
+ try:
105
+ result = await func(*args, **kwargs)
106
+ return result
107
+ except Exception as e:
108
+ last_error = e
109
+
110
+ # Check if this is a retryable error
111
+ if not is_retryable_error(e, retry_status_codes):
112
+ # Not retryable, raise immediately
113
+ raise
114
+
115
+ # Don't retry if we've exhausted attempts
116
+ if attempt >= max_retries:
117
+ logger.debug(
118
+ "Operation '%s' failed after %d retries: %s",
119
+ operation,
120
+ max_retries,
121
+ e,
122
+ )
123
+ raise
124
+
125
+ # Calculate backoff delay (exponential backoff)
126
+ delay = retry_delay * (backoff_factor**attempt)
127
+
128
+ logger.warning(
129
+ "Operation '%s' failed with retryable error, "
130
+ "retrying in %.2f seconds (attempt %d/%d): %s",
131
+ operation,
132
+ delay,
133
+ attempt + 1,
134
+ max_retries,
135
+ e,
136
+ )
137
+
138
+ await asyncio.sleep(delay)
139
+
140
+ # This should never be reached, but just in case
141
+ if last_error:
142
+ raise last_error
143
+ raise RuntimeError(f"Unexpected retry loop exit for operation '{operation}'")
144
+
145
+
146
+ def with_retry(
147
+ max_retries: int = DEFAULT_MAX_RETRIES,
148
+ retry_delay: float = DEFAULT_RETRY_DELAY,
149
+ retry_status_codes: set[int] | None = None,
150
+ backoff_factor: float = DEFAULT_BACKOFF_FACTOR,
151
+ ) -> Callable[[Callable[..., Any]], Callable[..., Any]]:
152
+ """
153
+ Decorator to add retry logic to async methods.
154
+
155
+ Usage:
156
+ @with_retry(max_retries=3)
157
+ async def my_method(self, ...):
158
+ ...
159
+
160
+ Args:
161
+ max_retries: Maximum number of retry attempts
162
+ retry_delay: Initial delay between retries
163
+ retry_status_codes: HTTP status codes to retry on
164
+ backoff_factor: Multiplier for exponential backoff
165
+
166
+ Returns:
167
+ Decorated function with retry logic
168
+ """
169
+
170
+ def decorator(func: Callable[..., Any]) -> Callable[..., Any]:
171
+ @wraps(func)
172
+ async def wrapper(*args: Any, **kwargs: Any) -> Any:
173
+ return await retry_with_backoff(
174
+ func,
175
+ *args,
176
+ max_retries=max_retries,
177
+ retry_delay=retry_delay,
178
+ retry_status_codes=retry_status_codes,
179
+ backoff_factor=backoff_factor,
180
+ operation_name=func.__name__,
181
+ **kwargs,
182
+ )
183
+
184
+ return wrapper
185
+
186
+ return decorator
@@ -114,36 +114,58 @@ def _process_worker(
114
114
  task_name = task_dict.get("prompt") or f"Task {index}"
115
115
 
116
116
  # Use the job_id to group all tasks under the same job
117
- with hud.trace(task_name, job_id=job_id, task_id=task_dict.get("id")):
118
- # Convert dict to Task
119
- task = Task(**task_dict)
120
-
121
- # Create agent instance
122
- agent = agent_class(**(agent_config or {}))
123
-
124
- if auto_respond:
125
- agent.response_agent = ResponseAgent()
126
-
127
- # Run the task
128
- result = await agent.run(task, max_steps=max_steps)
129
-
130
- # Extract and print evaluation score for visibility
131
- reward = getattr(result, "reward", "N/A")
132
- logger.info(
133
- "[Worker %s] Task %s: Completed (reward: %s)",
134
- worker_id,
135
- index,
136
- reward,
137
- )
138
-
139
- logger.info(
140
- "[Worker %s] Completed task %s (reward: %s)",
141
- worker_id,
142
- index,
143
- reward,
144
- )
145
-
146
- return (index, result)
117
+ with hud.trace(
118
+ task_name, job_id=job_id, task_id=task_dict.get("id")
119
+ ):
120
+ try:
121
+ # Convert dict to Task
122
+ task = Task(**task_dict)
123
+
124
+ # Create agent instance
125
+ agent = agent_class(**(agent_config or {}))
126
+
127
+ if auto_respond:
128
+ agent.response_agent = ResponseAgent()
129
+
130
+ # Run the task - this should ALWAYS return a result, even on error
131
+ result = await agent.run(task, max_steps=max_steps)
132
+
133
+ # Extract and print evaluation score for visibility
134
+ reward = getattr(result, "reward", "N/A")
135
+ logger.info(
136
+ "[Worker %s] Task %s: ✓ Completed (reward: %s)",
137
+ worker_id,
138
+ index,
139
+ reward,
140
+ )
141
+
142
+ logger.info(
143
+ "[Worker %s] Completed task %s (reward: %s)",
144
+ worker_id,
145
+ index,
146
+ reward,
147
+ )
148
+
149
+ return (index, result)
150
+ except Exception as e:
151
+ # Even if there's an exception, ensure we have a proper result
152
+ logger.error(
153
+ "[Worker %s] Task %s failed during execution: %s",
154
+ worker_id,
155
+ index,
156
+ str(e)[:200],
157
+ )
158
+ # Create a proper Trace result for errors
159
+ from hud.types import Trace
160
+
161
+ error_result = Trace(
162
+ reward=0.0,
163
+ done=True,
164
+ content=f"Task execution failed: {e}",
165
+ isError=True,
166
+ info={"error": str(e), "traceback": traceback.format_exc()},
167
+ )
168
+ return (index, error_result)
147
169
 
148
170
  except Exception as e:
149
171
  error_msg = f"Worker {worker_id}: Task {index} failed: {e}"
@@ -190,22 +212,6 @@ def _process_worker(
190
212
  try:
191
213
  # Run the async batch processing
192
214
  results = loop.run_until_complete(process_batch())
193
-
194
- # CRITICAL: Ensure telemetry is fully sent before process exits
195
- # Two things need to complete:
196
- # 1. The trace context's __exit__ already called _update_task_status_sync (blocking)
197
- # 2. But spans are buffered in BatchSpanProcessor and need explicit flush
198
-
199
- from opentelemetry import trace as otel_trace
200
-
201
- provider = otel_trace.get_tracer_provider()
202
- if provider and hasattr(provider, "force_flush"):
203
- # This forces BatchSpanProcessor to export all buffered spans NOW
204
- # The method returns True if successful, False if timeout
205
- success = provider.force_flush(timeout_millis=5000) # 5 second timeout # type: ignore
206
- if not success:
207
- logger.warning("Worker %s: Telemetry flush timed out", worker_id)
208
-
209
215
  return results
210
216
  except KeyboardInterrupt:
211
217
  logger.info("Worker %s: Interrupted by user, stopping gracefully", worker_id)
@@ -230,6 +236,25 @@ def _process_worker(
230
236
  logger.error("Worker %s batch processing failed: %s", worker_id, e)
231
237
  return [(idx, {"error": str(e), "isError": True}) for idx, _ in task_batch]
232
238
  finally:
239
+ # CRITICAL: Always ensure telemetry is fully sent before process exits
240
+ # This must happen in finally block to ensure it runs even on errors
241
+ try:
242
+ from opentelemetry import trace as otel_trace
243
+
244
+ provider = otel_trace.get_tracer_provider()
245
+ if provider and hasattr(provider, "force_flush"):
246
+ # This forces BatchSpanProcessor to export all buffered spans NOW
247
+ # The method returns True if successful, False if timeout
248
+ success = provider.force_flush(
249
+ timeout_millis=10000
250
+ ) # 10 second timeout # type: ignore
251
+ if not success:
252
+ logger.warning("Worker %s: Telemetry flush timed out", worker_id)
253
+ else:
254
+ logger.debug("Worker %s: Telemetry flushed successfully", worker_id)
255
+ except Exception as flush_error:
256
+ logger.error("Worker %s: Failed to flush telemetry: %s", worker_id, flush_error)
257
+
233
258
  # Clean up the event loop
234
259
  try:
235
260
  loop.close()
hud/shared/hints.py CHANGED
@@ -144,9 +144,9 @@ def render_hints(hints: Iterable[Hint] | None, *, design: Any | None = None) ->
144
144
 
145
145
  try:
146
146
  if design is None:
147
- from hud.utils.design import design as default_design # lazy import
147
+ from hud.utils.hud_console import hud_console as default_design # lazy import
148
148
 
149
- design = default_design
149
+ hud_console = default_design
150
150
  except Exception:
151
151
  # If design is unavailable (non-CLI contexts), silently skip rendering
152
152
  return
@@ -155,23 +155,23 @@ def render_hints(hints: Iterable[Hint] | None, *, design: Any | None = None) ->
155
155
  try:
156
156
  # Compact rendering - skip title if same as message
157
157
  if hint.title and hint.title != hint.message:
158
- design.warning(f"{hint.title}: {hint.message}")
158
+ hud_console.warning(f"{hint.title}: {hint.message}")
159
159
  else:
160
- design.warning(hint.message)
160
+ hud_console.warning(hint.message)
161
161
 
162
162
  # Tips as bullet points
163
163
  if hint.tips:
164
164
  for tip in hint.tips:
165
- design.info(f" • {tip}")
165
+ hud_console.info(f" • {tip}")
166
166
 
167
167
  # Only show command examples if provided
168
168
  if hint.command_examples:
169
169
  for cmd in hint.command_examples:
170
- design.command_example(cmd)
170
+ hud_console.command_example(cmd)
171
171
 
172
172
  # Only show docs URL if provided
173
173
  if hint.docs_url:
174
- design.link(hint.docs_url)
174
+ hud_console.link(hint.docs_url)
175
175
  except Exception:
176
176
  logger.warning("Failed to render hint: %s", hint)
177
177
  continue
@@ -9,7 +9,6 @@ import re
9
9
 
10
10
  from openai import AsyncOpenAI
11
11
  from opentelemetry import trace
12
- from PIL import Image
13
12
 
14
13
  from hud import instrument
15
14
  from hud.tools.grounding.config import GrounderConfig # noqa: TC001
@@ -45,6 +44,8 @@ class Grounder:
45
44
  (processed_width, processed_height))
46
45
  """
47
46
  # Decode image
47
+ from PIL import Image
48
+
48
49
  image_bytes = base64.b64decode(image_b64)
49
50
  img = Image.open(io.BytesIO(image_bytes))
50
51
  original_size = (img.width, img.height)
hud/types.py CHANGED
@@ -29,9 +29,9 @@ class MCPToolCall(CallToolRequestParams):
29
29
 
30
30
  def __rich__(self) -> str:
31
31
  """Rich representation with color formatting."""
32
- from hud.utils.design import design
32
+ from hud.utils.hud_console import hud_console
33
33
 
34
- return design.format_tool_call(self.name, self.arguments)
34
+ return hud_console.format_tool_call(self.name, self.arguments)
35
35
 
36
36
 
37
37
  class MCPToolResult(CallToolResult):
@@ -74,10 +74,10 @@ class MCPToolResult(CallToolResult):
74
74
 
75
75
  def __rich__(self) -> str:
76
76
  """Rich representation with color formatting."""
77
- from hud.utils.design import design
77
+ from hud.utils.hud_console import hud_console
78
78
 
79
79
  content_summary = self._get_content_summary()
80
- return design.format_tool_result(content_summary, self.isError)
80
+ return hud_console.format_tool_result(content_summary, self.isError)
81
81
 
82
82
 
83
83
  class AgentResponse(BaseModel):
hud/utils/__init__.py CHANGED
@@ -1,10 +1,10 @@
1
1
  from __future__ import annotations
2
2
 
3
- from .design import HUDDesign, design
3
+ from .hud_console import HUDConsole, hud_console
4
4
  from .telemetry import stream
5
5
 
6
6
  __all__ = [
7
- "HUDDesign",
8
- "design",
7
+ "HUDConsole",
8
+ "hud_console",
9
9
  "stream",
10
10
  ]