hud-python 0.4.22__py3-none-any.whl → 0.4.24__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of hud-python might be problematic. Click here for more details.
- hud/agents/base.py +85 -59
- hud/agents/claude.py +5 -1
- hud/agents/grounded_openai.py +3 -1
- hud/agents/misc/response_agent.py +3 -2
- hud/agents/openai.py +2 -2
- hud/agents/openai_chat_generic.py +3 -1
- hud/cli/__init__.py +34 -24
- hud/cli/analyze.py +27 -26
- hud/cli/build.py +50 -46
- hud/cli/debug.py +7 -7
- hud/cli/dev.py +107 -99
- hud/cli/eval.py +31 -29
- hud/cli/hf.py +53 -53
- hud/cli/init.py +28 -28
- hud/cli/list_func.py +22 -22
- hud/cli/pull.py +36 -36
- hud/cli/push.py +76 -74
- hud/cli/remove.py +42 -40
- hud/cli/rl/__init__.py +2 -2
- hud/cli/rl/init.py +41 -41
- hud/cli/rl/pod.py +97 -91
- hud/cli/rl/ssh.py +42 -40
- hud/cli/rl/train.py +75 -73
- hud/cli/rl/utils.py +10 -10
- hud/cli/tests/test_analyze.py +1 -1
- hud/cli/tests/test_analyze_metadata.py +2 -2
- hud/cli/tests/test_pull.py +45 -45
- hud/cli/tests/test_push.py +31 -29
- hud/cli/tests/test_registry.py +15 -15
- hud/cli/utils/environment.py +11 -11
- hud/cli/utils/interactive.py +17 -17
- hud/cli/utils/logging.py +12 -12
- hud/cli/utils/metadata.py +12 -12
- hud/cli/utils/registry.py +5 -5
- hud/cli/utils/runner.py +23 -23
- hud/cli/utils/server.py +16 -16
- hud/clients/mcp_use.py +19 -5
- hud/clients/utils/__init__.py +25 -0
- hud/clients/utils/retry.py +186 -0
- hud/datasets/execution/parallel.py +71 -46
- hud/shared/hints.py +7 -7
- hud/tools/grounding/grounder.py +2 -1
- hud/types.py +4 -4
- hud/utils/__init__.py +3 -3
- hud/utils/{design.py → hud_console.py} +39 -33
- hud/utils/pretty_errors.py +6 -6
- hud/utils/tests/test_version.py +1 -1
- hud/version.py +1 -1
- {hud_python-0.4.22.dist-info → hud_python-0.4.24.dist-info}/METADATA +3 -1
- {hud_python-0.4.22.dist-info → hud_python-0.4.24.dist-info}/RECORD +53 -52
- {hud_python-0.4.22.dist-info → hud_python-0.4.24.dist-info}/WHEEL +0 -0
- {hud_python-0.4.22.dist-info → hud_python-0.4.24.dist-info}/entry_points.txt +0 -0
- {hud_python-0.4.22.dist-info → hud_python-0.4.24.dist-info}/licenses/LICENSE +0 -0
hud/agents/base.py
CHANGED
|
@@ -11,7 +11,7 @@ from typing import TYPE_CHECKING, Any, ClassVar, Literal
|
|
|
11
11
|
import mcp.types as types
|
|
12
12
|
|
|
13
13
|
from hud.types import AgentResponse, MCPToolCall, MCPToolResult, Trace
|
|
14
|
-
from hud.utils.
|
|
14
|
+
from hud.utils.hud_console import HUDConsole
|
|
15
15
|
from hud.utils.mcp import MCPConfigPatch, patch_mcp_config, setup_hud_telemetry
|
|
16
16
|
|
|
17
17
|
if TYPE_CHECKING:
|
|
@@ -37,7 +37,7 @@ class MCPAgent(ABC):
|
|
|
37
37
|
and automatic marking of lifecycle tools (setup/evaluate) from a `Task`.
|
|
38
38
|
- Messaging: system prompt handling, optional inclusion of setup output on
|
|
39
39
|
the first turn, and control over initial screenshots.
|
|
40
|
-
- Telemetry & UX: standardized logging/printing via `
|
|
40
|
+
- Telemetry & UX: standardized logging/printing via `HUDConsole` and optional
|
|
41
41
|
automatic tracing (`auto_trace`).
|
|
42
42
|
|
|
43
43
|
Subclasses implement provider-specific formatting and response fetching
|
|
@@ -92,13 +92,11 @@ class MCPAgent(ABC):
|
|
|
92
92
|
self._auto_created_client = False # Track if we created the client
|
|
93
93
|
|
|
94
94
|
self.model_name = model_name
|
|
95
|
-
self.
|
|
96
|
-
|
|
97
|
-
self.metadata = {}
|
|
95
|
+
self.console = HUDConsole(logger=logger)
|
|
98
96
|
|
|
99
97
|
# Set verbose mode if requested
|
|
100
98
|
if verbose:
|
|
101
|
-
self.
|
|
99
|
+
self.console.set_verbose(True)
|
|
102
100
|
|
|
103
101
|
# Filtering
|
|
104
102
|
self.allowed_tools = allowed_tools
|
|
@@ -133,7 +131,7 @@ class MCPAgent(ABC):
|
|
|
133
131
|
|
|
134
132
|
self.mcp_client = MCPClient(mcp_config=task.mcp_config)
|
|
135
133
|
self._auto_created_client = True
|
|
136
|
-
self.
|
|
134
|
+
self.console.info_log("Auto-created MCPClient from task.mcp_config")
|
|
137
135
|
|
|
138
136
|
# Ensure we have a client
|
|
139
137
|
if self.mcp_client is None:
|
|
@@ -170,7 +168,7 @@ class MCPAgent(ABC):
|
|
|
170
168
|
await self._filter_tools()
|
|
171
169
|
|
|
172
170
|
num_tools = len(self._available_tools)
|
|
173
|
-
self.
|
|
171
|
+
self.console.success_log(
|
|
174
172
|
f"Agent initialized with {num_tools} available tools (after filtering)"
|
|
175
173
|
)
|
|
176
174
|
|
|
@@ -209,6 +207,7 @@ class MCPAgent(ABC):
|
|
|
209
207
|
else:
|
|
210
208
|
raise TypeError(f"prompt_or_task must be str or Task, got {type(prompt_or_task)}")
|
|
211
209
|
except Exception as e:
|
|
210
|
+
# Always return a Trace object for any exception
|
|
212
211
|
if self._is_connection_error(e):
|
|
213
212
|
# Return error trace for connection failures
|
|
214
213
|
return Trace(
|
|
@@ -217,7 +216,15 @@ class MCPAgent(ABC):
|
|
|
217
216
|
content=self._get_connection_error_message(e),
|
|
218
217
|
isError=True,
|
|
219
218
|
)
|
|
220
|
-
|
|
219
|
+
else:
|
|
220
|
+
# Return error trace for any other exception
|
|
221
|
+
return Trace(
|
|
222
|
+
reward=0.0,
|
|
223
|
+
done=True,
|
|
224
|
+
content=f"Task failed with error: {e}",
|
|
225
|
+
isError=True,
|
|
226
|
+
info={"error": str(e)},
|
|
227
|
+
)
|
|
221
228
|
finally:
|
|
222
229
|
# Cleanup auto-created resources
|
|
223
230
|
await self._cleanup()
|
|
@@ -245,7 +252,7 @@ class MCPAgent(ABC):
|
|
|
245
252
|
|
|
246
253
|
# Execute the setup tool and append the initial observation to the context
|
|
247
254
|
if task.setup_tool is not None:
|
|
248
|
-
self.
|
|
255
|
+
self.console.progress_log(f"Setting up tool phase: {task.setup_tool}")
|
|
249
256
|
results = await self.call_tools(task.setup_tool)
|
|
250
257
|
if any(result.isError for result in results):
|
|
251
258
|
raise RuntimeError(f"{results}")
|
|
@@ -259,39 +266,58 @@ class MCPAgent(ABC):
|
|
|
259
266
|
prompt_result = await self._run_context(start_context, max_steps=max_steps)
|
|
260
267
|
|
|
261
268
|
except Exception as e:
|
|
262
|
-
self.
|
|
269
|
+
self.console.error_log(f"Task execution failed: {e}")
|
|
263
270
|
# Create an error result but don't return yet - we still want to evaluate
|
|
264
271
|
prompt_result = Trace(reward=0.0, done=True, content=str(e), isError=True)
|
|
265
272
|
prompt_result.populate_from_context()
|
|
266
273
|
|
|
267
|
-
# Always evaluate if we have
|
|
268
|
-
if
|
|
274
|
+
# Always evaluate if we have evaluate tool, regardless of errors
|
|
275
|
+
if task.evaluate_tool is not None:
|
|
269
276
|
try:
|
|
270
|
-
self.
|
|
277
|
+
self.console.progress_log(f"Evaluating tool phase: {task.evaluate_tool}")
|
|
271
278
|
results = await self.call_tools(task.evaluate_tool)
|
|
272
279
|
|
|
273
280
|
if any(result.isError for result in results):
|
|
274
|
-
|
|
275
|
-
|
|
276
|
-
|
|
277
|
-
|
|
278
|
-
|
|
279
|
-
|
|
280
|
-
|
|
281
|
-
|
|
282
|
-
|
|
283
|
-
|
|
284
|
-
|
|
285
|
-
|
|
286
|
-
|
|
287
|
-
|
|
288
|
-
|
|
281
|
+
self.console.warning_log(f"Evaluate tool returned error: {results}")
|
|
282
|
+
# Still extract what we can from the error response
|
|
283
|
+
if prompt_result is None:
|
|
284
|
+
prompt_result = Trace(
|
|
285
|
+
reward=0.0,
|
|
286
|
+
done=True,
|
|
287
|
+
content="Task failed before evaluation",
|
|
288
|
+
isError=True,
|
|
289
|
+
)
|
|
290
|
+
prompt_result.reward = 0.0 # Default to 0 on error
|
|
291
|
+
else:
|
|
292
|
+
# Extract reward and content from evaluation
|
|
293
|
+
if results:
|
|
294
|
+
reward = find_reward(results[0])
|
|
295
|
+
eval_content = find_content(results[0])
|
|
296
|
+
|
|
297
|
+
# Update the prompt result with evaluation reward
|
|
298
|
+
if prompt_result is None:
|
|
299
|
+
prompt_result = Trace(
|
|
300
|
+
reward=reward, done=True, content=eval_content or "", isError=False
|
|
301
|
+
)
|
|
289
302
|
else:
|
|
290
|
-
prompt_result.
|
|
303
|
+
prompt_result.reward = reward
|
|
304
|
+
|
|
305
|
+
# Update the prompt result with evaluation content (if available)
|
|
306
|
+
if eval_content:
|
|
307
|
+
# Prompt result may already have final response content,
|
|
308
|
+
# so we append to it
|
|
309
|
+
if prompt_result.content:
|
|
310
|
+
prompt_result.content += "\n\n" + eval_content
|
|
311
|
+
else:
|
|
312
|
+
prompt_result.content = eval_content
|
|
291
313
|
|
|
292
314
|
except Exception as e:
|
|
293
|
-
self.
|
|
294
|
-
#
|
|
315
|
+
self.console.error_log(f"Evaluation phase failed: {e}")
|
|
316
|
+
# Ensure we have a result even if evaluation failed
|
|
317
|
+
if prompt_result is None:
|
|
318
|
+
prompt_result = Trace(
|
|
319
|
+
reward=0.0, done=True, content=f"Evaluation failed: {e}", isError=True
|
|
320
|
+
)
|
|
295
321
|
|
|
296
322
|
return (
|
|
297
323
|
prompt_result
|
|
@@ -321,21 +347,21 @@ class MCPAgent(ABC):
|
|
|
321
347
|
|
|
322
348
|
# Add initial context
|
|
323
349
|
messages.extend(await self.format_message(context))
|
|
324
|
-
self.
|
|
350
|
+
self.console.debug(f"Messages: {messages}")
|
|
325
351
|
|
|
326
352
|
step_count = 0
|
|
327
353
|
while max_steps == -1 or step_count < max_steps:
|
|
328
354
|
step_count += 1
|
|
329
355
|
if max_steps == -1:
|
|
330
|
-
self.
|
|
356
|
+
self.console.debug(f"Step {step_count} (unlimited)")
|
|
331
357
|
else:
|
|
332
|
-
self.
|
|
358
|
+
self.console.debug(f"Step {step_count}/{max_steps}")
|
|
333
359
|
|
|
334
360
|
try:
|
|
335
361
|
# 1. Get model response
|
|
336
362
|
response = await self.get_response(messages)
|
|
337
363
|
|
|
338
|
-
self.
|
|
364
|
+
self.console.debug(f"Agent:\n{response}")
|
|
339
365
|
|
|
340
366
|
# Check if we should stop
|
|
341
367
|
if response.done or not response.tool_calls:
|
|
@@ -347,16 +373,16 @@ class MCPAgent(ABC):
|
|
|
347
373
|
response.content
|
|
348
374
|
)
|
|
349
375
|
except Exception as e:
|
|
350
|
-
self.
|
|
376
|
+
self.console.warning_log(f"ResponseAgent failed: {e}")
|
|
351
377
|
if decision == "STOP":
|
|
352
378
|
# Try to submit response through lifecycle tool
|
|
353
379
|
await self._maybe_submit_response(response, messages)
|
|
354
380
|
|
|
355
|
-
self.
|
|
381
|
+
self.console.debug("Stopping execution")
|
|
356
382
|
final_response = response
|
|
357
383
|
break
|
|
358
384
|
else:
|
|
359
|
-
self.
|
|
385
|
+
self.console.debug("Continuing execution")
|
|
360
386
|
messages.extend(await self.format_message(decision))
|
|
361
387
|
continue
|
|
362
388
|
|
|
@@ -378,21 +404,21 @@ class MCPAgent(ABC):
|
|
|
378
404
|
for call, result in zip(tool_calls, tool_results, strict=False):
|
|
379
405
|
step_info += f"\n{call}\n{result}"
|
|
380
406
|
|
|
381
|
-
self.
|
|
407
|
+
self.console.info_log(step_info)
|
|
382
408
|
|
|
383
409
|
except Exception as e:
|
|
384
|
-
self.
|
|
410
|
+
self.console.error_log(f"Step failed: {e}")
|
|
385
411
|
error = str(e)
|
|
386
412
|
break
|
|
387
413
|
|
|
388
414
|
except KeyboardInterrupt:
|
|
389
|
-
self.
|
|
415
|
+
self.console.warning_log("Agent execution interrupted by user")
|
|
390
416
|
error = "Interrupted by user"
|
|
391
417
|
except asyncio.CancelledError:
|
|
392
|
-
self.
|
|
418
|
+
self.console.warning_log("Agent execution cancelled")
|
|
393
419
|
error = "Cancelled"
|
|
394
420
|
except Exception as e:
|
|
395
|
-
self.
|
|
421
|
+
self.console.error_log(f"Unexpected error: {e}")
|
|
396
422
|
error = str(e)
|
|
397
423
|
|
|
398
424
|
# Build result
|
|
@@ -433,17 +459,17 @@ class MCPAgent(ABC):
|
|
|
433
459
|
results: list[MCPToolResult] = []
|
|
434
460
|
for tc in tool_call:
|
|
435
461
|
try:
|
|
436
|
-
self.
|
|
462
|
+
self.console.debug(f"Calling tool: {tc}")
|
|
437
463
|
results.append(await self.mcp_client.call_tool(tc))
|
|
438
464
|
except TimeoutError as e:
|
|
439
|
-
self.
|
|
465
|
+
self.console.error_log(f"Tool execution timed out: {e}")
|
|
440
466
|
try:
|
|
441
467
|
await self.mcp_client.shutdown()
|
|
442
468
|
except Exception as close_err:
|
|
443
|
-
self.
|
|
469
|
+
self.console.debug(f"Failed to close MCP client cleanly: {close_err}")
|
|
444
470
|
raise
|
|
445
471
|
except Exception as e:
|
|
446
|
-
self.
|
|
472
|
+
self.console.error_log(f"Tool execution failed: {e}")
|
|
447
473
|
results.append(_format_error_result(str(e)))
|
|
448
474
|
return results
|
|
449
475
|
|
|
@@ -575,7 +601,7 @@ class MCPAgent(ABC):
|
|
|
575
601
|
|
|
576
602
|
# Add to lifecycle tools if found
|
|
577
603
|
if response_tool_name and response_tool_name not in self.lifecycle_tools:
|
|
578
|
-
self.
|
|
604
|
+
self.console.debug(f"Auto-detected '{response_tool_name}' tool as a lifecycle tool")
|
|
579
605
|
self.response_tool_name = response_tool_name
|
|
580
606
|
self.lifecycle_tools.append(response_tool_name)
|
|
581
607
|
|
|
@@ -599,7 +625,7 @@ class MCPAgent(ABC):
|
|
|
599
625
|
messages: The current message history (will be modified in-place)
|
|
600
626
|
"""
|
|
601
627
|
if self.response_tool_name:
|
|
602
|
-
self.
|
|
628
|
+
self.console.debug(f"Calling response lifecycle tool: {self.response_tool_name}")
|
|
603
629
|
try:
|
|
604
630
|
# Call the response tool with the agent's response
|
|
605
631
|
response_tool_call = MCPToolCall(
|
|
@@ -614,9 +640,9 @@ class MCPAgent(ABC):
|
|
|
614
640
|
messages.extend(response_messages)
|
|
615
641
|
|
|
616
642
|
# Mark the task as done
|
|
617
|
-
self.
|
|
643
|
+
self.console.debug("Response lifecycle tool executed, marking task as done")
|
|
618
644
|
except Exception as e:
|
|
619
|
-
self.
|
|
645
|
+
self.console.error_log(f"Response lifecycle tool failed: {e}")
|
|
620
646
|
|
|
621
647
|
async def _setup_config(self, mcp_config: dict[str, dict[str, Any]]) -> None:
|
|
622
648
|
"""Inject metadata into the metadata of the initialize request."""
|
|
@@ -670,9 +696,9 @@ class MCPAgent(ABC):
|
|
|
670
696
|
if self._auto_trace_cm:
|
|
671
697
|
try:
|
|
672
698
|
self._auto_trace_cm.__exit__(None, None, None)
|
|
673
|
-
self.
|
|
699
|
+
self.console.debug("Closed auto-created trace")
|
|
674
700
|
except Exception as e:
|
|
675
|
-
self.
|
|
701
|
+
self.console.warning_log(f"Failed to close auto-created trace: {e}")
|
|
676
702
|
finally:
|
|
677
703
|
self._auto_trace_cm = None
|
|
678
704
|
|
|
@@ -680,9 +706,9 @@ class MCPAgent(ABC):
|
|
|
680
706
|
if self._auto_created_client and self.mcp_client:
|
|
681
707
|
try:
|
|
682
708
|
await self.mcp_client.shutdown()
|
|
683
|
-
self.
|
|
709
|
+
self.console.debug("Closed auto-created MCPClient")
|
|
684
710
|
except Exception as e:
|
|
685
|
-
self.
|
|
711
|
+
self.console.warning_log(f"Failed to close auto-created client: {e}")
|
|
686
712
|
finally:
|
|
687
713
|
self.mcp_client = None
|
|
688
714
|
self._auto_created_client = False
|
|
@@ -715,13 +741,13 @@ class MCPAgent(ABC):
|
|
|
715
741
|
if self._is_connection_error(e):
|
|
716
742
|
msg = self._get_connection_error_message(e)
|
|
717
743
|
# Always show connection errors, not just when logging is enabled
|
|
718
|
-
self.
|
|
719
|
-
self.
|
|
744
|
+
self.console.error(f"❌ {msg}")
|
|
745
|
+
self.console.info("💡 Make sure the MCP server is started before running the agent.")
|
|
720
746
|
|
|
721
747
|
# For localhost, provide specific instructions
|
|
722
748
|
error_str = str(e).lower()
|
|
723
749
|
if "localhost" in error_str or "127.0.0.1" in error_str:
|
|
724
|
-
self.
|
|
750
|
+
self.console.info(" Run 'hud dev' in another terminal to start the MCP server")
|
|
725
751
|
|
|
726
752
|
raise RuntimeError(msg) from e
|
|
727
753
|
raise
|
hud/agents/claude.py
CHANGED
|
@@ -196,7 +196,11 @@ class ClaudeAgent(MCPAgent):
|
|
|
196
196
|
response = await self.anthropic_client.beta.messages.create(**create_kwargs)
|
|
197
197
|
break
|
|
198
198
|
except BadRequestError as e:
|
|
199
|
-
if
|
|
199
|
+
if (
|
|
200
|
+
"prompt is too long" in str(e)
|
|
201
|
+
or "request_too_large" in str(e)
|
|
202
|
+
or e.status_code == 413
|
|
203
|
+
):
|
|
200
204
|
logger.warning("Prompt too long, truncating message history")
|
|
201
205
|
# Keep first message and last 20 messages
|
|
202
206
|
if len(current_messages) > 21:
|
hud/agents/grounded_openai.py
CHANGED
|
@@ -3,7 +3,7 @@
|
|
|
3
3
|
from __future__ import annotations
|
|
4
4
|
|
|
5
5
|
import json
|
|
6
|
-
from typing import Any
|
|
6
|
+
from typing import Any, ClassVar
|
|
7
7
|
|
|
8
8
|
from hud import instrument
|
|
9
9
|
from hud.tools.grounding import GroundedComputerTool, Grounder, GrounderConfig
|
|
@@ -26,6 +26,8 @@ class GroundedOpenAIChatAgent(GenericOpenAIChatAgent):
|
|
|
26
26
|
- Grounding model (Qwen2-VL etc) handles visual element detection
|
|
27
27
|
"""
|
|
28
28
|
|
|
29
|
+
metadata: ClassVar[dict[str, Any]] = {}
|
|
30
|
+
|
|
29
31
|
def __init__(
|
|
30
32
|
self,
|
|
31
33
|
*,
|
|
@@ -16,7 +16,7 @@ class ResponseAgent:
|
|
|
16
16
|
based on the agent's final response message.
|
|
17
17
|
"""
|
|
18
18
|
|
|
19
|
-
def __init__(self, api_key: str | None = None) -> None:
|
|
19
|
+
def __init__(self, api_key: str | None = None, model: str = "gpt-4o") -> None:
|
|
20
20
|
self.api_key = api_key or settings.openai_api_key or os.environ.get("OPENAI_API_KEY")
|
|
21
21
|
if not self.api_key:
|
|
22
22
|
raise ValueError(
|
|
@@ -24,6 +24,7 @@ class ResponseAgent:
|
|
|
24
24
|
)
|
|
25
25
|
|
|
26
26
|
self.client = AsyncOpenAI(api_key=self.api_key)
|
|
27
|
+
self.model = model
|
|
27
28
|
|
|
28
29
|
self.system_prompt = """
|
|
29
30
|
You are an assistant that helps determine the appropriate response to an agent's message.
|
|
@@ -54,7 +55,7 @@ class ResponseAgent:
|
|
|
54
55
|
"""
|
|
55
56
|
try:
|
|
56
57
|
response = await self.client.chat.completions.create(
|
|
57
|
-
model=
|
|
58
|
+
model=self.model,
|
|
58
59
|
messages=[
|
|
59
60
|
{"role": "system", "content": self.system_prompt},
|
|
60
61
|
{
|
hud/agents/openai.py
CHANGED
|
@@ -204,7 +204,7 @@ class OperatorAgent(MCPAgent):
|
|
|
204
204
|
break
|
|
205
205
|
|
|
206
206
|
if not latest_screenshot:
|
|
207
|
-
self.
|
|
207
|
+
self.console.warning_log("No screenshot provided for response to action")
|
|
208
208
|
return AgentResponse(
|
|
209
209
|
content="No screenshot available for next action",
|
|
210
210
|
tool_calls=[],
|
|
@@ -327,7 +327,7 @@ class OperatorAgent(MCPAgent):
|
|
|
327
327
|
for content in result.content:
|
|
328
328
|
if isinstance(content, types.TextContent):
|
|
329
329
|
# Don't add error text as input_text, just track it
|
|
330
|
-
self.
|
|
330
|
+
self.console.error_log(f"Tool error: {content.text}")
|
|
331
331
|
elif isinstance(content, types.ImageContent):
|
|
332
332
|
# Even error results might have images
|
|
333
333
|
latest_screenshot = content.data
|
|
@@ -17,7 +17,7 @@ from __future__ import annotations
|
|
|
17
17
|
|
|
18
18
|
import json
|
|
19
19
|
import logging
|
|
20
|
-
from typing import TYPE_CHECKING, Any, cast
|
|
20
|
+
from typing import TYPE_CHECKING, Any, ClassVar, cast
|
|
21
21
|
|
|
22
22
|
import mcp.types as types
|
|
23
23
|
|
|
@@ -36,6 +36,8 @@ logger = logging.getLogger(__name__)
|
|
|
36
36
|
class GenericOpenAIChatAgent(MCPAgent):
|
|
37
37
|
"""MCP-enabled agent that speaks the OpenAI *chat.completions* protocol."""
|
|
38
38
|
|
|
39
|
+
metadata: ClassVar[dict[str, Any]] = {}
|
|
40
|
+
|
|
39
41
|
def __init__(
|
|
40
42
|
self,
|
|
41
43
|
*,
|
hud/cli/__init__.py
CHANGED
|
@@ -184,7 +184,7 @@ def debug(
|
|
|
184
184
|
hud debug . --max-phase 3 # Stop after phase 3
|
|
185
185
|
"""
|
|
186
186
|
# Import here to avoid circular imports
|
|
187
|
-
from hud.utils.
|
|
187
|
+
from hud.utils.hud_console import HUDConsole
|
|
188
188
|
|
|
189
189
|
from .utils.environment import (
|
|
190
190
|
build_environment,
|
|
@@ -193,7 +193,7 @@ def debug(
|
|
|
193
193
|
is_environment_directory,
|
|
194
194
|
)
|
|
195
195
|
|
|
196
|
-
|
|
196
|
+
hud_console = HUDConsole()
|
|
197
197
|
|
|
198
198
|
# Determine the command to run
|
|
199
199
|
command = None
|
|
@@ -227,7 +227,7 @@ def debug(
|
|
|
227
227
|
image_name, source = get_image_name(directory)
|
|
228
228
|
|
|
229
229
|
if source == "auto":
|
|
230
|
-
|
|
230
|
+
hud_console.info(f"Auto-generated image name: {image_name}")
|
|
231
231
|
|
|
232
232
|
# Build if requested or if image doesn't exist
|
|
233
233
|
if build or not image_exists(image_name):
|
|
@@ -263,20 +263,20 @@ def debug(
|
|
|
263
263
|
phases_completed = asyncio.run(debug_mcp_stdio(command, logger, max_phase=max_phase))
|
|
264
264
|
|
|
265
265
|
# Show summary using design system
|
|
266
|
-
from hud.utils.
|
|
266
|
+
from hud.utils.hud_console import HUDConsole
|
|
267
267
|
|
|
268
|
-
|
|
268
|
+
hud_console = HUDConsole()
|
|
269
269
|
|
|
270
|
-
|
|
271
|
-
|
|
270
|
+
hud_console.info("") # Empty line
|
|
271
|
+
hud_console.section_title("Debug Summary")
|
|
272
272
|
|
|
273
273
|
if phases_completed == max_phase:
|
|
274
|
-
|
|
274
|
+
hud_console.success(f"All {max_phase} phases completed successfully!")
|
|
275
275
|
if max_phase == 5:
|
|
276
|
-
|
|
276
|
+
hud_console.info("Your MCP server is fully functional and ready for production use.")
|
|
277
277
|
else:
|
|
278
|
-
|
|
279
|
-
|
|
278
|
+
hud_console.warning(f"Completed {phases_completed} out of {max_phase} phases")
|
|
279
|
+
hud_console.info("Check the errors above for troubleshooting.")
|
|
280
280
|
|
|
281
281
|
# Exit with appropriate code
|
|
282
282
|
if phases_completed < max_phase:
|
|
@@ -831,9 +831,9 @@ def eval(
|
|
|
831
831
|
),
|
|
832
832
|
) -> None:
|
|
833
833
|
"""🚀 Run evaluation on datasets or individual tasks with agents."""
|
|
834
|
-
from hud.utils.
|
|
834
|
+
from hud.utils.hud_console import HUDConsole
|
|
835
835
|
|
|
836
|
-
|
|
836
|
+
hud_console = HUDConsole()
|
|
837
837
|
|
|
838
838
|
# If no source provided, look for task/eval JSON files in current directory
|
|
839
839
|
if source is None:
|
|
@@ -863,30 +863,30 @@ def eval(
|
|
|
863
863
|
json_files = sorted(set(json_files))
|
|
864
864
|
|
|
865
865
|
if not json_files:
|
|
866
|
-
|
|
866
|
+
hud_console.error(
|
|
867
867
|
"No source provided and no task/eval JSON files found in current directory"
|
|
868
868
|
)
|
|
869
|
-
|
|
869
|
+
hud_console.info(
|
|
870
870
|
"Usage: hud eval <source> or create a task JSON file "
|
|
871
871
|
"(e.g., task.json, eval_config.json)"
|
|
872
872
|
)
|
|
873
873
|
raise typer.Exit(1)
|
|
874
874
|
elif len(json_files) == 1:
|
|
875
875
|
source = str(json_files[0])
|
|
876
|
-
|
|
876
|
+
hud_console.info(f"Found task file: {source}")
|
|
877
877
|
else:
|
|
878
878
|
# Multiple files found, let user choose
|
|
879
|
-
|
|
880
|
-
file_choice =
|
|
879
|
+
hud_console.info("Multiple task files found:")
|
|
880
|
+
file_choice = hud_console.select(
|
|
881
881
|
"Select a task file to run:",
|
|
882
882
|
choices=[str(f) for f in json_files],
|
|
883
883
|
)
|
|
884
884
|
source = file_choice
|
|
885
|
-
|
|
885
|
+
hud_console.success(f"Selected: {source}")
|
|
886
886
|
|
|
887
887
|
# If no agent specified, prompt for selection
|
|
888
888
|
if agent is None:
|
|
889
|
-
agent =
|
|
889
|
+
agent = hud_console.select(
|
|
890
890
|
"Select an agent to use:",
|
|
891
891
|
choices=[
|
|
892
892
|
{"name": "Claude 4 Sonnet", "value": "claude"},
|
|
@@ -898,14 +898,14 @@ def eval(
|
|
|
898
898
|
# Validate agent choice
|
|
899
899
|
valid_agents = ["claude", "openai"]
|
|
900
900
|
if agent not in valid_agents:
|
|
901
|
-
|
|
901
|
+
hud_console.error(f"Invalid agent: {agent}. Must be one of: {', '.join(valid_agents)}")
|
|
902
902
|
raise typer.Exit(1)
|
|
903
903
|
|
|
904
904
|
# Import eval_command lazily to avoid importing agent dependencies
|
|
905
905
|
try:
|
|
906
906
|
from .eval import eval_command
|
|
907
907
|
except ImportError as e:
|
|
908
|
-
|
|
908
|
+
hud_console.error(
|
|
909
909
|
"Evaluation dependencies are not installed. "
|
|
910
910
|
"Please install with: pip install 'hud-python[agent]'"
|
|
911
911
|
)
|
|
@@ -962,6 +962,16 @@ def hf(
|
|
|
962
962
|
|
|
963
963
|
def main() -> None:
|
|
964
964
|
"""Main entry point for the CLI."""
|
|
965
|
+
# Handle --version flag before Typer parses args
|
|
966
|
+
if "--version" in sys.argv:
|
|
967
|
+
try:
|
|
968
|
+
from hud import __version__
|
|
969
|
+
|
|
970
|
+
console.print(f"HUD CLI version: [cyan]{__version__}[/cyan]")
|
|
971
|
+
except ImportError:
|
|
972
|
+
console.print("HUD CLI version: [cyan]unknown[/cyan]")
|
|
973
|
+
return
|
|
974
|
+
|
|
965
975
|
try:
|
|
966
976
|
# Show header for main help
|
|
967
977
|
if len(sys.argv) == 1 or (len(sys.argv) == 2 and sys.argv[1] in ["--help", "-h"]):
|
|
@@ -995,9 +1005,9 @@ def main() -> None:
|
|
|
995
1005
|
except Exception:
|
|
996
1006
|
exit_code = 1
|
|
997
1007
|
if exit_code != 0:
|
|
998
|
-
from hud.utils.
|
|
1008
|
+
from hud.utils.hud_console import hud_console
|
|
999
1009
|
|
|
1000
|
-
|
|
1010
|
+
hud_console.info(SUPPORT_HINT)
|
|
1001
1011
|
raise
|
|
1002
1012
|
except Exception:
|
|
1003
1013
|
raise
|