hud-python 0.4.35__py3-none-any.whl → 0.4.37__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of hud-python might be problematic. Click here for more details.
- hud/agents/__init__.py +2 -0
- hud/agents/lite_llm.py +72 -0
- hud/agents/openai_chat_generic.py +21 -7
- hud/agents/tests/test_claude.py +32 -7
- hud/agents/tests/test_openai.py +29 -6
- hud/cli/__init__.py +228 -79
- hud/cli/build.py +26 -6
- hud/cli/dev.py +21 -40
- hud/cli/eval.py +96 -15
- hud/cli/flows/tasks.py +198 -65
- hud/cli/init.py +222 -629
- hud/cli/pull.py +6 -0
- hud/cli/push.py +11 -1
- hud/cli/rl/__init__.py +14 -4
- hud/cli/rl/celebrate.py +187 -0
- hud/cli/rl/config.py +15 -8
- hud/cli/rl/local_runner.py +44 -20
- hud/cli/rl/remote_runner.py +166 -87
- hud/cli/rl/viewer.py +141 -0
- hud/cli/rl/wait_utils.py +89 -0
- hud/cli/tests/test_build.py +3 -27
- hud/cli/tests/test_mcp_server.py +1 -12
- hud/cli/utils/config.py +85 -0
- hud/cli/utils/docker.py +21 -39
- hud/cli/utils/env_check.py +196 -0
- hud/cli/utils/environment.py +4 -3
- hud/cli/utils/interactive.py +2 -1
- hud/cli/utils/local_runner.py +204 -0
- hud/cli/utils/metadata.py +3 -1
- hud/cli/utils/package_runner.py +292 -0
- hud/cli/utils/remote_runner.py +4 -1
- hud/cli/utils/source_hash.py +108 -0
- hud/clients/base.py +1 -1
- hud/clients/fastmcp.py +1 -1
- hud/clients/mcp_use.py +30 -7
- hud/datasets/parallel.py +3 -1
- hud/datasets/runner.py +4 -1
- hud/otel/config.py +1 -1
- hud/otel/context.py +40 -6
- hud/rl/buffer.py +3 -0
- hud/rl/tests/test_learner.py +1 -1
- hud/rl/vllm_adapter.py +1 -1
- hud/server/server.py +234 -7
- hud/server/tests/test_add_tool.py +60 -0
- hud/server/tests/test_context.py +128 -0
- hud/server/tests/test_mcp_server_handlers.py +44 -0
- hud/server/tests/test_mcp_server_integration.py +405 -0
- hud/server/tests/test_mcp_server_more.py +247 -0
- hud/server/tests/test_run_wrapper.py +53 -0
- hud/server/tests/test_server_extra.py +166 -0
- hud/server/tests/test_sigterm_runner.py +78 -0
- hud/settings.py +38 -0
- hud/shared/hints.py +2 -2
- hud/telemetry/job.py +2 -2
- hud/types.py +9 -2
- hud/utils/tasks.py +32 -24
- hud/utils/tests/test_version.py +1 -1
- hud/version.py +1 -1
- {hud_python-0.4.35.dist-info → hud_python-0.4.37.dist-info}/METADATA +43 -23
- {hud_python-0.4.35.dist-info → hud_python-0.4.37.dist-info}/RECORD +63 -46
- {hud_python-0.4.35.dist-info → hud_python-0.4.37.dist-info}/WHEEL +0 -0
- {hud_python-0.4.35.dist-info → hud_python-0.4.37.dist-info}/entry_points.txt +0 -0
- {hud_python-0.4.35.dist-info → hud_python-0.4.37.dist-info}/licenses/LICENSE +0 -0
hud/agents/__init__.py
CHANGED
|
@@ -2,12 +2,14 @@ from __future__ import annotations
|
|
|
2
2
|
|
|
3
3
|
from .base import MCPAgent
|
|
4
4
|
from .claude import ClaudeAgent
|
|
5
|
+
from .lite_llm import LiteAgent
|
|
5
6
|
from .openai import OperatorAgent
|
|
6
7
|
from .openai_chat_generic import GenericOpenAIChatAgent
|
|
7
8
|
|
|
8
9
|
__all__ = [
|
|
9
10
|
"ClaudeAgent",
|
|
10
11
|
"GenericOpenAIChatAgent",
|
|
12
|
+
"LiteAgent",
|
|
11
13
|
"MCPAgent",
|
|
12
14
|
"OperatorAgent",
|
|
13
15
|
]
|
hud/agents/lite_llm.py
ADDED
|
@@ -0,0 +1,72 @@
|
|
|
1
|
+
"""LiteLLM MCP Agent implementation.
|
|
2
|
+
|
|
3
|
+
Same OpenAI chat-completions shape + MCP tool plumbing,
|
|
4
|
+
but transport is LiteLLM and (optionally) tools are shaped by LiteLLM's MCP transformer.
|
|
5
|
+
"""
|
|
6
|
+
|
|
7
|
+
from __future__ import annotations
|
|
8
|
+
|
|
9
|
+
import logging
|
|
10
|
+
from typing import Any, ClassVar
|
|
11
|
+
|
|
12
|
+
import litellm
|
|
13
|
+
|
|
14
|
+
from .openai_chat_generic import GenericOpenAIChatAgent
|
|
15
|
+
|
|
16
|
+
logger = logging.getLogger(__name__)
|
|
17
|
+
|
|
18
|
+
# Prefer LiteLLM's built-in MCP -> OpenAI tool transformer (handles Bedrock nuances)
|
|
19
|
+
try:
|
|
20
|
+
from litellm.experimental_mcp_client.tools import (
|
|
21
|
+
transform_mcp_tool_to_openai_tool,
|
|
22
|
+
)
|
|
23
|
+
except Exception: # pragma: no cover - optional dependency
|
|
24
|
+
transform_mcp_tool_to_openai_tool = None # type: ignore
|
|
25
|
+
|
|
26
|
+
|
|
27
|
+
class LiteAgent(GenericOpenAIChatAgent):
|
|
28
|
+
"""
|
|
29
|
+
Same OpenAI chat-completions shape + MCP tool plumbing,
|
|
30
|
+
but transport is LiteLLM and (optionally) tools are shaped by LiteLLM's MCP transformer.
|
|
31
|
+
"""
|
|
32
|
+
|
|
33
|
+
metadata: ClassVar[dict[str, Any]] = {}
|
|
34
|
+
|
|
35
|
+
def __init__(
|
|
36
|
+
self,
|
|
37
|
+
*,
|
|
38
|
+
model_name: str = "gpt-4o-mini",
|
|
39
|
+
completion_kwargs: dict[str, Any] | None = None,
|
|
40
|
+
**agent_kwargs: Any,
|
|
41
|
+
) -> None:
|
|
42
|
+
# We don't need an OpenAI client; pass None
|
|
43
|
+
super().__init__(
|
|
44
|
+
openai_client=None,
|
|
45
|
+
model_name=model_name,
|
|
46
|
+
completion_kwargs=completion_kwargs,
|
|
47
|
+
**agent_kwargs,
|
|
48
|
+
)
|
|
49
|
+
|
|
50
|
+
def get_tool_schemas(self) -> list[dict]:
|
|
51
|
+
# Prefer LiteLLM's stricter transformer (handles Bedrock & friends)
|
|
52
|
+
if transform_mcp_tool_to_openai_tool is not None:
|
|
53
|
+
return [
|
|
54
|
+
transform_mcp_tool_to_openai_tool(t) # returns ChatCompletionToolParam-like dict
|
|
55
|
+
for t in self.get_available_tools()
|
|
56
|
+
]
|
|
57
|
+
# Fallback to the generic OpenAI sanitizer
|
|
58
|
+
return GenericOpenAIChatAgent.get_tool_schemas(self)
|
|
59
|
+
|
|
60
|
+
async def _invoke_chat_completion(
|
|
61
|
+
self,
|
|
62
|
+
*,
|
|
63
|
+
messages: list[Any],
|
|
64
|
+
tools: list[dict] | None,
|
|
65
|
+
extra: dict[str, Any],
|
|
66
|
+
):
|
|
67
|
+
return await litellm.acompletion(
|
|
68
|
+
model=self.model_name,
|
|
69
|
+
messages=messages,
|
|
70
|
+
tools=tools or None, # LiteLLM tolerates None better than []
|
|
71
|
+
**extra,
|
|
72
|
+
)
|
|
@@ -42,7 +42,7 @@ class GenericOpenAIChatAgent(MCPAgent):
|
|
|
42
42
|
def __init__(
|
|
43
43
|
self,
|
|
44
44
|
*,
|
|
45
|
-
openai_client: AsyncOpenAI,
|
|
45
|
+
openai_client: AsyncOpenAI | None,
|
|
46
46
|
model_name: str = "gpt-4o-mini",
|
|
47
47
|
completion_kwargs: dict[str, Any] | None = None,
|
|
48
48
|
**agent_kwargs: Any,
|
|
@@ -171,6 +171,23 @@ class GenericOpenAIChatAgent(MCPAgent):
|
|
|
171
171
|
openai_tools.append(openai_tool)
|
|
172
172
|
return openai_tools
|
|
173
173
|
|
|
174
|
+
async def _invoke_chat_completion(
|
|
175
|
+
self,
|
|
176
|
+
*,
|
|
177
|
+
messages: list[Any],
|
|
178
|
+
tools: list[dict] | None,
|
|
179
|
+
extra: dict[str, Any],
|
|
180
|
+
):
|
|
181
|
+
if self.oai is None:
|
|
182
|
+
raise ValueError("openai_client is required for GenericOpenAIChatAgent")
|
|
183
|
+
# default transport = OpenAI SDK
|
|
184
|
+
return await self.oai.chat.completions.create(
|
|
185
|
+
model=self.model_name,
|
|
186
|
+
messages=messages,
|
|
187
|
+
tools=tools, # already ChatCompletionToolParam-shaped
|
|
188
|
+
**extra,
|
|
189
|
+
)
|
|
190
|
+
|
|
174
191
|
@instrument(
|
|
175
192
|
span_type="agent",
|
|
176
193
|
record_args=False,
|
|
@@ -180,17 +197,14 @@ class GenericOpenAIChatAgent(MCPAgent):
|
|
|
180
197
|
"""Send chat request to OpenAI and convert the response."""
|
|
181
198
|
|
|
182
199
|
# Convert MCP tool schemas to OpenAI format
|
|
183
|
-
|
|
200
|
+
tools = cast("list[ChatCompletionToolParam]", self.get_tool_schemas())
|
|
184
201
|
|
|
185
202
|
protected_keys = {"model", "messages", "tools"}
|
|
186
203
|
extra = {k: v for k, v in (self.completion_kwargs or {}).items() if k not in protected_keys}
|
|
187
204
|
|
|
188
205
|
try:
|
|
189
|
-
response = await self.
|
|
190
|
-
|
|
191
|
-
messages=messages,
|
|
192
|
-
tools=cast("list[ChatCompletionToolParam]", mcp_schemas),
|
|
193
|
-
**extra,
|
|
206
|
+
response = await self._invoke_chat_completion(
|
|
207
|
+
messages=messages, tools=tools, extra=extra
|
|
194
208
|
)
|
|
195
209
|
except Exception as e:
|
|
196
210
|
error_content = f"Error getting response {e}"
|
hud/agents/tests/test_claude.py
CHANGED
|
@@ -86,6 +86,7 @@ class TestClaudeAgent:
|
|
|
86
86
|
model_client=mock_model_client,
|
|
87
87
|
model="claude-3-opus-20240229",
|
|
88
88
|
max_tokens=1000,
|
|
89
|
+
validate_api_key=False, # Skip validation in tests
|
|
89
90
|
)
|
|
90
91
|
|
|
91
92
|
assert agent.model_name == "claude-3-opus-20240229"
|
|
@@ -93,10 +94,14 @@ class TestClaudeAgent:
|
|
|
93
94
|
assert agent.anthropic_client == mock_model_client
|
|
94
95
|
|
|
95
96
|
@pytest.mark.asyncio
|
|
96
|
-
async def test_init_without_model_client(self, mock_mcp_client):
|
|
97
|
+
async def test_init_without_model_client(self, mock_mcp_client, mock_anthropic):
|
|
97
98
|
"""Test agent initialization without model client."""
|
|
98
99
|
with patch("hud.settings.settings.anthropic_api_key", "test_key"):
|
|
99
|
-
agent = ClaudeAgent(
|
|
100
|
+
agent = ClaudeAgent(
|
|
101
|
+
mcp_client=mock_mcp_client,
|
|
102
|
+
model="claude-3-opus-20240229",
|
|
103
|
+
validate_api_key=False, # Skip validation in tests
|
|
104
|
+
)
|
|
100
105
|
|
|
101
106
|
assert agent.model_name == "claude-3-opus-20240229"
|
|
102
107
|
assert agent.anthropic_client is not None
|
|
@@ -105,7 +110,11 @@ class TestClaudeAgent:
|
|
|
105
110
|
async def test_format_blocks(self, mock_mcp_client):
|
|
106
111
|
"""Test formatting content blocks into Claude messages."""
|
|
107
112
|
mock_model_client = MagicMock()
|
|
108
|
-
agent = ClaudeAgent(
|
|
113
|
+
agent = ClaudeAgent(
|
|
114
|
+
mcp_client=mock_mcp_client,
|
|
115
|
+
model_client=mock_model_client,
|
|
116
|
+
validate_api_key=False, # Skip validation in tests
|
|
117
|
+
)
|
|
109
118
|
|
|
110
119
|
# Test with text only
|
|
111
120
|
text_blocks: list[types.ContentBlock] = [
|
|
@@ -141,7 +150,11 @@ class TestClaudeAgent:
|
|
|
141
150
|
async def test_format_tool_results_method(self, mock_mcp_client):
|
|
142
151
|
"""Test the agent's format_tool_results method."""
|
|
143
152
|
mock_model_client = MagicMock()
|
|
144
|
-
agent = ClaudeAgent(
|
|
153
|
+
agent = ClaudeAgent(
|
|
154
|
+
mcp_client=mock_mcp_client,
|
|
155
|
+
model_client=mock_model_client,
|
|
156
|
+
validate_api_key=False, # Skip validation in tests
|
|
157
|
+
)
|
|
145
158
|
|
|
146
159
|
tool_calls = [
|
|
147
160
|
MCPToolCall(name="test_tool", arguments={}, id="id1"),
|
|
@@ -171,7 +184,11 @@ class TestClaudeAgent:
|
|
|
171
184
|
"""Test getting model response from Claude API."""
|
|
172
185
|
# Disable telemetry for this test to avoid backend configuration issues
|
|
173
186
|
with patch("hud.settings.settings.telemetry_enabled", False):
|
|
174
|
-
agent = ClaudeAgent(
|
|
187
|
+
agent = ClaudeAgent(
|
|
188
|
+
mcp_client=mock_mcp_client,
|
|
189
|
+
model_client=mock_anthropic,
|
|
190
|
+
validate_api_key=False, # Skip validation in tests
|
|
191
|
+
)
|
|
175
192
|
|
|
176
193
|
# Mock the API response
|
|
177
194
|
mock_response = MagicMock()
|
|
@@ -215,7 +232,11 @@ class TestClaudeAgent:
|
|
|
215
232
|
"""Test getting text-only response."""
|
|
216
233
|
# Disable telemetry for this test to avoid backend configuration issues
|
|
217
234
|
with patch("hud.settings.settings.telemetry_enabled", False):
|
|
218
|
-
agent = ClaudeAgent(
|
|
235
|
+
agent = ClaudeAgent(
|
|
236
|
+
mcp_client=mock_mcp_client,
|
|
237
|
+
model_client=mock_anthropic,
|
|
238
|
+
validate_api_key=False, # Skip validation in tests
|
|
239
|
+
)
|
|
219
240
|
|
|
220
241
|
mock_response = MagicMock()
|
|
221
242
|
# Create text block
|
|
@@ -242,7 +263,11 @@ class TestClaudeAgent:
|
|
|
242
263
|
"""Test handling API errors."""
|
|
243
264
|
# Disable telemetry for this test to avoid backend configuration issues
|
|
244
265
|
with patch("hud.settings.settings.telemetry_enabled", False):
|
|
245
|
-
agent = ClaudeAgent(
|
|
266
|
+
agent = ClaudeAgent(
|
|
267
|
+
mcp_client=mock_mcp_client,
|
|
268
|
+
model_client=mock_anthropic,
|
|
269
|
+
validate_api_key=False, # Skip validation in tests
|
|
270
|
+
)
|
|
246
271
|
|
|
247
272
|
# Mock API error
|
|
248
273
|
mock_anthropic.beta.messages.create = AsyncMock(
|
hud/agents/tests/test_openai.py
CHANGED
|
@@ -44,7 +44,10 @@ class TestOperatorAgent:
|
|
|
44
44
|
"""Test agent initialization."""
|
|
45
45
|
mock_model_client = MagicMock()
|
|
46
46
|
agent = OperatorAgent(
|
|
47
|
-
mcp_client=mock_mcp_client,
|
|
47
|
+
mcp_client=mock_mcp_client,
|
|
48
|
+
model_client=mock_model_client,
|
|
49
|
+
model="gpt-4",
|
|
50
|
+
validate_api_key=False, # Skip validation in tests
|
|
48
51
|
)
|
|
49
52
|
|
|
50
53
|
assert agent.model_name == "openai-gpt-4"
|
|
@@ -55,7 +58,11 @@ class TestOperatorAgent:
|
|
|
55
58
|
async def test_format_blocks(self, mock_mcp_client):
|
|
56
59
|
"""Test formatting content blocks."""
|
|
57
60
|
mock_model_client = MagicMock()
|
|
58
|
-
agent = OperatorAgent(
|
|
61
|
+
agent = OperatorAgent(
|
|
62
|
+
mcp_client=mock_mcp_client,
|
|
63
|
+
model_client=mock_model_client,
|
|
64
|
+
validate_api_key=False, # Skip validation in tests
|
|
65
|
+
)
|
|
59
66
|
|
|
60
67
|
# Test with text blocks
|
|
61
68
|
blocks: list[types.ContentBlock] = [
|
|
@@ -85,7 +92,11 @@ class TestOperatorAgent:
|
|
|
85
92
|
@pytest.mark.asyncio
|
|
86
93
|
async def test_format_tool_results(self, mock_mcp_client, mock_openai):
|
|
87
94
|
"""Test formatting tool results."""
|
|
88
|
-
agent = OperatorAgent(
|
|
95
|
+
agent = OperatorAgent(
|
|
96
|
+
mcp_client=mock_mcp_client,
|
|
97
|
+
model_client=mock_openai,
|
|
98
|
+
validate_api_key=False, # Skip validation in tests
|
|
99
|
+
)
|
|
89
100
|
|
|
90
101
|
tool_calls = [
|
|
91
102
|
MCPToolCall(name="test_tool", arguments={}, id="call_123"), # type: ignore
|
|
@@ -111,7 +122,11 @@ class TestOperatorAgent:
|
|
|
111
122
|
@pytest.mark.asyncio
|
|
112
123
|
async def test_format_tool_results_with_error(self, mock_mcp_client, mock_openai):
|
|
113
124
|
"""Test formatting tool results with errors."""
|
|
114
|
-
agent = OperatorAgent(
|
|
125
|
+
agent = OperatorAgent(
|
|
126
|
+
mcp_client=mock_mcp_client,
|
|
127
|
+
model_client=mock_openai,
|
|
128
|
+
validate_api_key=False, # Skip validation in tests
|
|
129
|
+
)
|
|
115
130
|
|
|
116
131
|
tool_calls = [
|
|
117
132
|
MCPToolCall(name="failing_tool", arguments={}, id="call_error"), # type: ignore
|
|
@@ -131,7 +146,11 @@ class TestOperatorAgent:
|
|
|
131
146
|
@pytest.mark.asyncio
|
|
132
147
|
async def test_get_model_response(self, mock_mcp_client, mock_openai):
|
|
133
148
|
"""Test getting model response from OpenAI API."""
|
|
134
|
-
agent = OperatorAgent(
|
|
149
|
+
agent = OperatorAgent(
|
|
150
|
+
mcp_client=mock_mcp_client,
|
|
151
|
+
model_client=mock_openai,
|
|
152
|
+
validate_api_key=False, # Skip validation in tests
|
|
153
|
+
)
|
|
135
154
|
|
|
136
155
|
# Set up available tools so agent doesn't return "No computer use tools available"
|
|
137
156
|
agent._available_tools = [
|
|
@@ -162,7 +181,11 @@ class TestOperatorAgent:
|
|
|
162
181
|
@pytest.mark.asyncio
|
|
163
182
|
async def test_handle_empty_response(self, mock_mcp_client, mock_openai):
|
|
164
183
|
"""Test handling empty response from API."""
|
|
165
|
-
agent = OperatorAgent(
|
|
184
|
+
agent = OperatorAgent(
|
|
185
|
+
mcp_client=mock_mcp_client,
|
|
186
|
+
model_client=mock_openai,
|
|
187
|
+
validate_api_key=False, # Skip validation in tests
|
|
188
|
+
)
|
|
166
189
|
|
|
167
190
|
# Set up available tools
|
|
168
191
|
agent._available_tools = [
|