hud-python 0.4.35__py3-none-any.whl → 0.4.37__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of hud-python might be problematic. Click here for more details.

Files changed (63) hide show
  1. hud/agents/__init__.py +2 -0
  2. hud/agents/lite_llm.py +72 -0
  3. hud/agents/openai_chat_generic.py +21 -7
  4. hud/agents/tests/test_claude.py +32 -7
  5. hud/agents/tests/test_openai.py +29 -6
  6. hud/cli/__init__.py +228 -79
  7. hud/cli/build.py +26 -6
  8. hud/cli/dev.py +21 -40
  9. hud/cli/eval.py +96 -15
  10. hud/cli/flows/tasks.py +198 -65
  11. hud/cli/init.py +222 -629
  12. hud/cli/pull.py +6 -0
  13. hud/cli/push.py +11 -1
  14. hud/cli/rl/__init__.py +14 -4
  15. hud/cli/rl/celebrate.py +187 -0
  16. hud/cli/rl/config.py +15 -8
  17. hud/cli/rl/local_runner.py +44 -20
  18. hud/cli/rl/remote_runner.py +166 -87
  19. hud/cli/rl/viewer.py +141 -0
  20. hud/cli/rl/wait_utils.py +89 -0
  21. hud/cli/tests/test_build.py +3 -27
  22. hud/cli/tests/test_mcp_server.py +1 -12
  23. hud/cli/utils/config.py +85 -0
  24. hud/cli/utils/docker.py +21 -39
  25. hud/cli/utils/env_check.py +196 -0
  26. hud/cli/utils/environment.py +4 -3
  27. hud/cli/utils/interactive.py +2 -1
  28. hud/cli/utils/local_runner.py +204 -0
  29. hud/cli/utils/metadata.py +3 -1
  30. hud/cli/utils/package_runner.py +292 -0
  31. hud/cli/utils/remote_runner.py +4 -1
  32. hud/cli/utils/source_hash.py +108 -0
  33. hud/clients/base.py +1 -1
  34. hud/clients/fastmcp.py +1 -1
  35. hud/clients/mcp_use.py +30 -7
  36. hud/datasets/parallel.py +3 -1
  37. hud/datasets/runner.py +4 -1
  38. hud/otel/config.py +1 -1
  39. hud/otel/context.py +40 -6
  40. hud/rl/buffer.py +3 -0
  41. hud/rl/tests/test_learner.py +1 -1
  42. hud/rl/vllm_adapter.py +1 -1
  43. hud/server/server.py +234 -7
  44. hud/server/tests/test_add_tool.py +60 -0
  45. hud/server/tests/test_context.py +128 -0
  46. hud/server/tests/test_mcp_server_handlers.py +44 -0
  47. hud/server/tests/test_mcp_server_integration.py +405 -0
  48. hud/server/tests/test_mcp_server_more.py +247 -0
  49. hud/server/tests/test_run_wrapper.py +53 -0
  50. hud/server/tests/test_server_extra.py +166 -0
  51. hud/server/tests/test_sigterm_runner.py +78 -0
  52. hud/settings.py +38 -0
  53. hud/shared/hints.py +2 -2
  54. hud/telemetry/job.py +2 -2
  55. hud/types.py +9 -2
  56. hud/utils/tasks.py +32 -24
  57. hud/utils/tests/test_version.py +1 -1
  58. hud/version.py +1 -1
  59. {hud_python-0.4.35.dist-info → hud_python-0.4.37.dist-info}/METADATA +43 -23
  60. {hud_python-0.4.35.dist-info → hud_python-0.4.37.dist-info}/RECORD +63 -46
  61. {hud_python-0.4.35.dist-info → hud_python-0.4.37.dist-info}/WHEEL +0 -0
  62. {hud_python-0.4.35.dist-info → hud_python-0.4.37.dist-info}/entry_points.txt +0 -0
  63. {hud_python-0.4.35.dist-info → hud_python-0.4.37.dist-info}/licenses/LICENSE +0 -0
hud/agents/__init__.py CHANGED
@@ -2,12 +2,14 @@ from __future__ import annotations
2
2
 
3
3
  from .base import MCPAgent
4
4
  from .claude import ClaudeAgent
5
+ from .lite_llm import LiteAgent
5
6
  from .openai import OperatorAgent
6
7
  from .openai_chat_generic import GenericOpenAIChatAgent
7
8
 
8
9
  __all__ = [
9
10
  "ClaudeAgent",
10
11
  "GenericOpenAIChatAgent",
12
+ "LiteAgent",
11
13
  "MCPAgent",
12
14
  "OperatorAgent",
13
15
  ]
hud/agents/lite_llm.py ADDED
@@ -0,0 +1,72 @@
1
+ """LiteLLM MCP Agent implementation.
2
+
3
+ Same OpenAI chat-completions shape + MCP tool plumbing,
4
+ but transport is LiteLLM and (optionally) tools are shaped by LiteLLM's MCP transformer.
5
+ """
6
+
7
+ from __future__ import annotations
8
+
9
+ import logging
10
+ from typing import Any, ClassVar
11
+
12
+ import litellm
13
+
14
+ from .openai_chat_generic import GenericOpenAIChatAgent
15
+
16
+ logger = logging.getLogger(__name__)
17
+
18
+ # Prefer LiteLLM's built-in MCP -> OpenAI tool transformer (handles Bedrock nuances)
19
+ try:
20
+ from litellm.experimental_mcp_client.tools import (
21
+ transform_mcp_tool_to_openai_tool,
22
+ )
23
+ except Exception: # pragma: no cover - optional dependency
24
+ transform_mcp_tool_to_openai_tool = None # type: ignore
25
+
26
+
27
+ class LiteAgent(GenericOpenAIChatAgent):
28
+ """
29
+ Same OpenAI chat-completions shape + MCP tool plumbing,
30
+ but transport is LiteLLM and (optionally) tools are shaped by LiteLLM's MCP transformer.
31
+ """
32
+
33
+ metadata: ClassVar[dict[str, Any]] = {}
34
+
35
+ def __init__(
36
+ self,
37
+ *,
38
+ model_name: str = "gpt-4o-mini",
39
+ completion_kwargs: dict[str, Any] | None = None,
40
+ **agent_kwargs: Any,
41
+ ) -> None:
42
+ # We don't need an OpenAI client; pass None
43
+ super().__init__(
44
+ openai_client=None,
45
+ model_name=model_name,
46
+ completion_kwargs=completion_kwargs,
47
+ **agent_kwargs,
48
+ )
49
+
50
+ def get_tool_schemas(self) -> list[dict]:
51
+ # Prefer LiteLLM's stricter transformer (handles Bedrock & friends)
52
+ if transform_mcp_tool_to_openai_tool is not None:
53
+ return [
54
+ transform_mcp_tool_to_openai_tool(t) # returns ChatCompletionToolParam-like dict
55
+ for t in self.get_available_tools()
56
+ ]
57
+ # Fallback to the generic OpenAI sanitizer
58
+ return GenericOpenAIChatAgent.get_tool_schemas(self)
59
+
60
+ async def _invoke_chat_completion(
61
+ self,
62
+ *,
63
+ messages: list[Any],
64
+ tools: list[dict] | None,
65
+ extra: dict[str, Any],
66
+ ):
67
+ return await litellm.acompletion(
68
+ model=self.model_name,
69
+ messages=messages,
70
+ tools=tools or None, # LiteLLM tolerates None better than []
71
+ **extra,
72
+ )
@@ -42,7 +42,7 @@ class GenericOpenAIChatAgent(MCPAgent):
42
42
  def __init__(
43
43
  self,
44
44
  *,
45
- openai_client: AsyncOpenAI,
45
+ openai_client: AsyncOpenAI | None,
46
46
  model_name: str = "gpt-4o-mini",
47
47
  completion_kwargs: dict[str, Any] | None = None,
48
48
  **agent_kwargs: Any,
@@ -171,6 +171,23 @@ class GenericOpenAIChatAgent(MCPAgent):
171
171
  openai_tools.append(openai_tool)
172
172
  return openai_tools
173
173
 
174
+ async def _invoke_chat_completion(
175
+ self,
176
+ *,
177
+ messages: list[Any],
178
+ tools: list[dict] | None,
179
+ extra: dict[str, Any],
180
+ ):
181
+ if self.oai is None:
182
+ raise ValueError("openai_client is required for GenericOpenAIChatAgent")
183
+ # default transport = OpenAI SDK
184
+ return await self.oai.chat.completions.create(
185
+ model=self.model_name,
186
+ messages=messages,
187
+ tools=tools, # already ChatCompletionToolParam-shaped
188
+ **extra,
189
+ )
190
+
174
191
  @instrument(
175
192
  span_type="agent",
176
193
  record_args=False,
@@ -180,17 +197,14 @@ class GenericOpenAIChatAgent(MCPAgent):
180
197
  """Send chat request to OpenAI and convert the response."""
181
198
 
182
199
  # Convert MCP tool schemas to OpenAI format
183
- mcp_schemas = self.get_tool_schemas()
200
+ tools = cast("list[ChatCompletionToolParam]", self.get_tool_schemas())
184
201
 
185
202
  protected_keys = {"model", "messages", "tools"}
186
203
  extra = {k: v for k, v in (self.completion_kwargs or {}).items() if k not in protected_keys}
187
204
 
188
205
  try:
189
- response = await self.oai.chat.completions.create(
190
- model=self.model_name,
191
- messages=messages,
192
- tools=cast("list[ChatCompletionToolParam]", mcp_schemas),
193
- **extra,
206
+ response = await self._invoke_chat_completion(
207
+ messages=messages, tools=tools, extra=extra
194
208
  )
195
209
  except Exception as e:
196
210
  error_content = f"Error getting response {e}"
@@ -86,6 +86,7 @@ class TestClaudeAgent:
86
86
  model_client=mock_model_client,
87
87
  model="claude-3-opus-20240229",
88
88
  max_tokens=1000,
89
+ validate_api_key=False, # Skip validation in tests
89
90
  )
90
91
 
91
92
  assert agent.model_name == "claude-3-opus-20240229"
@@ -93,10 +94,14 @@ class TestClaudeAgent:
93
94
  assert agent.anthropic_client == mock_model_client
94
95
 
95
96
  @pytest.mark.asyncio
96
- async def test_init_without_model_client(self, mock_mcp_client):
97
+ async def test_init_without_model_client(self, mock_mcp_client, mock_anthropic):
97
98
  """Test agent initialization without model client."""
98
99
  with patch("hud.settings.settings.anthropic_api_key", "test_key"):
99
- agent = ClaudeAgent(mcp_client=mock_mcp_client, model="claude-3-opus-20240229")
100
+ agent = ClaudeAgent(
101
+ mcp_client=mock_mcp_client,
102
+ model="claude-3-opus-20240229",
103
+ validate_api_key=False, # Skip validation in tests
104
+ )
100
105
 
101
106
  assert agent.model_name == "claude-3-opus-20240229"
102
107
  assert agent.anthropic_client is not None
@@ -105,7 +110,11 @@ class TestClaudeAgent:
105
110
  async def test_format_blocks(self, mock_mcp_client):
106
111
  """Test formatting content blocks into Claude messages."""
107
112
  mock_model_client = MagicMock()
108
- agent = ClaudeAgent(mcp_client=mock_mcp_client, model_client=mock_model_client)
113
+ agent = ClaudeAgent(
114
+ mcp_client=mock_mcp_client,
115
+ model_client=mock_model_client,
116
+ validate_api_key=False, # Skip validation in tests
117
+ )
109
118
 
110
119
  # Test with text only
111
120
  text_blocks: list[types.ContentBlock] = [
@@ -141,7 +150,11 @@ class TestClaudeAgent:
141
150
  async def test_format_tool_results_method(self, mock_mcp_client):
142
151
  """Test the agent's format_tool_results method."""
143
152
  mock_model_client = MagicMock()
144
- agent = ClaudeAgent(mcp_client=mock_mcp_client, model_client=mock_model_client)
153
+ agent = ClaudeAgent(
154
+ mcp_client=mock_mcp_client,
155
+ model_client=mock_model_client,
156
+ validate_api_key=False, # Skip validation in tests
157
+ )
145
158
 
146
159
  tool_calls = [
147
160
  MCPToolCall(name="test_tool", arguments={}, id="id1"),
@@ -171,7 +184,11 @@ class TestClaudeAgent:
171
184
  """Test getting model response from Claude API."""
172
185
  # Disable telemetry for this test to avoid backend configuration issues
173
186
  with patch("hud.settings.settings.telemetry_enabled", False):
174
- agent = ClaudeAgent(mcp_client=mock_mcp_client, model_client=mock_anthropic)
187
+ agent = ClaudeAgent(
188
+ mcp_client=mock_mcp_client,
189
+ model_client=mock_anthropic,
190
+ validate_api_key=False, # Skip validation in tests
191
+ )
175
192
 
176
193
  # Mock the API response
177
194
  mock_response = MagicMock()
@@ -215,7 +232,11 @@ class TestClaudeAgent:
215
232
  """Test getting text-only response."""
216
233
  # Disable telemetry for this test to avoid backend configuration issues
217
234
  with patch("hud.settings.settings.telemetry_enabled", False):
218
- agent = ClaudeAgent(mcp_client=mock_mcp_client, model_client=mock_anthropic)
235
+ agent = ClaudeAgent(
236
+ mcp_client=mock_mcp_client,
237
+ model_client=mock_anthropic,
238
+ validate_api_key=False, # Skip validation in tests
239
+ )
219
240
 
220
241
  mock_response = MagicMock()
221
242
  # Create text block
@@ -242,7 +263,11 @@ class TestClaudeAgent:
242
263
  """Test handling API errors."""
243
264
  # Disable telemetry for this test to avoid backend configuration issues
244
265
  with patch("hud.settings.settings.telemetry_enabled", False):
245
- agent = ClaudeAgent(mcp_client=mock_mcp_client, model_client=mock_anthropic)
266
+ agent = ClaudeAgent(
267
+ mcp_client=mock_mcp_client,
268
+ model_client=mock_anthropic,
269
+ validate_api_key=False, # Skip validation in tests
270
+ )
246
271
 
247
272
  # Mock API error
248
273
  mock_anthropic.beta.messages.create = AsyncMock(
@@ -44,7 +44,10 @@ class TestOperatorAgent:
44
44
  """Test agent initialization."""
45
45
  mock_model_client = MagicMock()
46
46
  agent = OperatorAgent(
47
- mcp_client=mock_mcp_client, model_client=mock_model_client, model="gpt-4"
47
+ mcp_client=mock_mcp_client,
48
+ model_client=mock_model_client,
49
+ model="gpt-4",
50
+ validate_api_key=False, # Skip validation in tests
48
51
  )
49
52
 
50
53
  assert agent.model_name == "openai-gpt-4"
@@ -55,7 +58,11 @@ class TestOperatorAgent:
55
58
  async def test_format_blocks(self, mock_mcp_client):
56
59
  """Test formatting content blocks."""
57
60
  mock_model_client = MagicMock()
58
- agent = OperatorAgent(mcp_client=mock_mcp_client, model_client=mock_model_client)
61
+ agent = OperatorAgent(
62
+ mcp_client=mock_mcp_client,
63
+ model_client=mock_model_client,
64
+ validate_api_key=False, # Skip validation in tests
65
+ )
59
66
 
60
67
  # Test with text blocks
61
68
  blocks: list[types.ContentBlock] = [
@@ -85,7 +92,11 @@ class TestOperatorAgent:
85
92
  @pytest.mark.asyncio
86
93
  async def test_format_tool_results(self, mock_mcp_client, mock_openai):
87
94
  """Test formatting tool results."""
88
- agent = OperatorAgent(mcp_client=mock_mcp_client, model_client=mock_openai)
95
+ agent = OperatorAgent(
96
+ mcp_client=mock_mcp_client,
97
+ model_client=mock_openai,
98
+ validate_api_key=False, # Skip validation in tests
99
+ )
89
100
 
90
101
  tool_calls = [
91
102
  MCPToolCall(name="test_tool", arguments={}, id="call_123"), # type: ignore
@@ -111,7 +122,11 @@ class TestOperatorAgent:
111
122
  @pytest.mark.asyncio
112
123
  async def test_format_tool_results_with_error(self, mock_mcp_client, mock_openai):
113
124
  """Test formatting tool results with errors."""
114
- agent = OperatorAgent(mcp_client=mock_mcp_client, model_client=mock_openai)
125
+ agent = OperatorAgent(
126
+ mcp_client=mock_mcp_client,
127
+ model_client=mock_openai,
128
+ validate_api_key=False, # Skip validation in tests
129
+ )
115
130
 
116
131
  tool_calls = [
117
132
  MCPToolCall(name="failing_tool", arguments={}, id="call_error"), # type: ignore
@@ -131,7 +146,11 @@ class TestOperatorAgent:
131
146
  @pytest.mark.asyncio
132
147
  async def test_get_model_response(self, mock_mcp_client, mock_openai):
133
148
  """Test getting model response from OpenAI API."""
134
- agent = OperatorAgent(mcp_client=mock_mcp_client, model_client=mock_openai)
149
+ agent = OperatorAgent(
150
+ mcp_client=mock_mcp_client,
151
+ model_client=mock_openai,
152
+ validate_api_key=False, # Skip validation in tests
153
+ )
135
154
 
136
155
  # Set up available tools so agent doesn't return "No computer use tools available"
137
156
  agent._available_tools = [
@@ -162,7 +181,11 @@ class TestOperatorAgent:
162
181
  @pytest.mark.asyncio
163
182
  async def test_handle_empty_response(self, mock_mcp_client, mock_openai):
164
183
  """Test handling empty response from API."""
165
- agent = OperatorAgent(mcp_client=mock_mcp_client, model_client=mock_openai)
184
+ agent = OperatorAgent(
185
+ mcp_client=mock_mcp_client,
186
+ model_client=mock_openai,
187
+ validate_api_key=False, # Skip validation in tests
188
+ )
166
189
 
167
190
  # Set up available tools
168
191
  agent._available_tools = [