hud-python 0.4.22__py3-none-any.whl → 0.4.23__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of hud-python might be problematic. Click here for more details.
- hud/agents/base.py +37 -39
- hud/agents/grounded_openai.py +3 -1
- hud/agents/misc/response_agent.py +3 -2
- hud/agents/openai.py +2 -2
- hud/agents/openai_chat_generic.py +3 -1
- hud/cli/__init__.py +34 -24
- hud/cli/analyze.py +27 -26
- hud/cli/build.py +50 -46
- hud/cli/debug.py +7 -7
- hud/cli/dev.py +107 -99
- hud/cli/eval.py +31 -29
- hud/cli/hf.py +53 -53
- hud/cli/init.py +28 -28
- hud/cli/list_func.py +22 -22
- hud/cli/pull.py +36 -36
- hud/cli/push.py +76 -74
- hud/cli/remove.py +42 -40
- hud/cli/rl/__init__.py +2 -2
- hud/cli/rl/init.py +41 -41
- hud/cli/rl/pod.py +97 -91
- hud/cli/rl/ssh.py +42 -40
- hud/cli/rl/train.py +75 -73
- hud/cli/rl/utils.py +10 -10
- hud/cli/tests/test_analyze.py +1 -1
- hud/cli/tests/test_analyze_metadata.py +2 -2
- hud/cli/tests/test_pull.py +45 -45
- hud/cli/tests/test_push.py +31 -29
- hud/cli/tests/test_registry.py +15 -15
- hud/cli/utils/environment.py +11 -11
- hud/cli/utils/interactive.py +17 -17
- hud/cli/utils/logging.py +12 -12
- hud/cli/utils/metadata.py +12 -12
- hud/cli/utils/registry.py +5 -5
- hud/cli/utils/runner.py +23 -23
- hud/cli/utils/server.py +16 -16
- hud/shared/hints.py +7 -7
- hud/tools/grounding/grounder.py +2 -1
- hud/types.py +4 -4
- hud/utils/__init__.py +3 -3
- hud/utils/{design.py → hud_console.py} +39 -33
- hud/utils/pretty_errors.py +6 -6
- hud/utils/tests/test_version.py +1 -1
- hud/version.py +1 -1
- {hud_python-0.4.22.dist-info → hud_python-0.4.23.dist-info}/METADATA +3 -1
- {hud_python-0.4.22.dist-info → hud_python-0.4.23.dist-info}/RECORD +48 -48
- {hud_python-0.4.22.dist-info → hud_python-0.4.23.dist-info}/WHEEL +0 -0
- {hud_python-0.4.22.dist-info → hud_python-0.4.23.dist-info}/entry_points.txt +0 -0
- {hud_python-0.4.22.dist-info → hud_python-0.4.23.dist-info}/licenses/LICENSE +0 -0
hud/cli/dev.py
CHANGED
|
@@ -12,7 +12,7 @@ from typing import Any
|
|
|
12
12
|
import click
|
|
13
13
|
from fastmcp import FastMCP
|
|
14
14
|
|
|
15
|
-
from hud.utils.
|
|
15
|
+
from hud.utils.hud_console import HUDConsole
|
|
16
16
|
|
|
17
17
|
from .utils.docker import get_docker_cmd, inject_supervisor
|
|
18
18
|
from .utils.environment import (
|
|
@@ -22,8 +22,8 @@ from .utils.environment import (
|
|
|
22
22
|
update_pyproject_toml,
|
|
23
23
|
)
|
|
24
24
|
|
|
25
|
-
# Global
|
|
26
|
-
|
|
25
|
+
# Global hud_console instance
|
|
26
|
+
hud_console = HUDConsole()
|
|
27
27
|
|
|
28
28
|
|
|
29
29
|
def build_and_update(directory: str | Path, image_name: str, no_cache: bool = False) -> None:
|
|
@@ -47,7 +47,7 @@ def create_proxy_server(
|
|
|
47
47
|
# Get the original CMD from the image
|
|
48
48
|
original_cmd = get_docker_cmd(image_name)
|
|
49
49
|
if not original_cmd:
|
|
50
|
-
|
|
50
|
+
hud_console.warning(f"Could not extract CMD from {image_name}, using default")
|
|
51
51
|
original_cmd = ["python", "-m", "hud_controller.server"]
|
|
52
52
|
|
|
53
53
|
# Generate unique container name from image to avoid conflicts between multiple instances
|
|
@@ -81,7 +81,7 @@ def create_proxy_server(
|
|
|
81
81
|
|
|
82
82
|
# Validate reload options
|
|
83
83
|
if no_reload and full_reload:
|
|
84
|
-
|
|
84
|
+
hud_console.warning("Cannot use --full-reload with --no-reload, ignoring --full-reload")
|
|
85
85
|
full_reload = False
|
|
86
86
|
|
|
87
87
|
if not no_reload and not full_reload:
|
|
@@ -109,30 +109,32 @@ def create_proxy_server(
|
|
|
109
109
|
# Debug output - only if verbose
|
|
110
110
|
if verbose:
|
|
111
111
|
if not no_reload and not full_reload:
|
|
112
|
-
|
|
113
|
-
|
|
112
|
+
hud_console.info("Mode: Hot-reload (server restart within container)")
|
|
113
|
+
hud_console.info("Watching: /app/src for changes")
|
|
114
114
|
elif full_reload:
|
|
115
|
-
|
|
116
|
-
|
|
115
|
+
hud_console.info("Mode: Full reload (container restart on file changes)")
|
|
116
|
+
hud_console.info(
|
|
117
|
+
"Note: Full container restart not yet implemented, using no-reload mode"
|
|
118
|
+
)
|
|
117
119
|
else:
|
|
118
|
-
|
|
119
|
-
|
|
120
|
-
|
|
120
|
+
hud_console.info("Mode: No reload")
|
|
121
|
+
hud_console.info("Container will run without hot-reload")
|
|
122
|
+
hud_console.command_example(f"docker logs -f {container_name}", "View container logs")
|
|
121
123
|
|
|
122
124
|
# Show the full Docker command if there are environment variables
|
|
123
125
|
if docker_args and any(arg == "-e" or arg.startswith("--env") for arg in docker_args):
|
|
124
|
-
|
|
125
|
-
|
|
126
|
-
|
|
126
|
+
hud_console.info("")
|
|
127
|
+
hud_console.info("Docker command with environment variables:")
|
|
128
|
+
hud_console.info(" ".join(docker_cmd))
|
|
127
129
|
|
|
128
130
|
# Create the HTTP proxy server using config
|
|
129
131
|
try:
|
|
130
132
|
proxy = FastMCP.as_proxy(config, name=f"HUD Dev Proxy - {image_name}")
|
|
131
133
|
except Exception as e:
|
|
132
|
-
|
|
133
|
-
|
|
134
|
-
|
|
135
|
-
|
|
134
|
+
hud_console.error(f"Failed to create proxy server: {e}")
|
|
135
|
+
hud_console.info("")
|
|
136
|
+
hud_console.info("💡 Tip: Run the following command to debug the container:")
|
|
137
|
+
hud_console.info(f" hud debug {image_name}")
|
|
136
138
|
raise
|
|
137
139
|
|
|
138
140
|
return proxy
|
|
@@ -228,7 +230,7 @@ async def start_mcp_proxy(
|
|
|
228
230
|
# Now check for src directory
|
|
229
231
|
src_path = Path(directory) / "src"
|
|
230
232
|
if not src_path.exists():
|
|
231
|
-
|
|
233
|
+
hud_console.error(f"Source directory not found: {src_path}")
|
|
232
234
|
raise click.Abort
|
|
233
235
|
|
|
234
236
|
# Extract container name from the proxy configuration (must match create_proxy_server naming)
|
|
@@ -252,16 +254,16 @@ async def start_mcp_proxy(
|
|
|
252
254
|
|
|
253
255
|
if transport == "stdio":
|
|
254
256
|
if verbose:
|
|
255
|
-
|
|
257
|
+
hud_console.info("Starting stdio proxy (each connection gets its own container)")
|
|
256
258
|
else:
|
|
257
259
|
# Find available port for HTTP
|
|
258
260
|
actual_port = find_free_port(port)
|
|
259
261
|
if actual_port is None:
|
|
260
|
-
|
|
262
|
+
hud_console.error(f"No available ports found starting from {port}")
|
|
261
263
|
raise click.Abort
|
|
262
264
|
|
|
263
265
|
if actual_port != port and verbose:
|
|
264
|
-
|
|
266
|
+
hud_console.warning(f"Port {port} in use, using port {actual_port} instead")
|
|
265
267
|
|
|
266
268
|
# Launch MCP Inspector if requested
|
|
267
269
|
if inspector:
|
|
@@ -284,8 +286,8 @@ async def start_mcp_proxy(
|
|
|
284
286
|
)
|
|
285
287
|
|
|
286
288
|
# Print inspector info cleanly
|
|
287
|
-
|
|
288
|
-
|
|
289
|
+
hud_console.section_title("MCP Inspector")
|
|
290
|
+
hud_console.link(inspector_url)
|
|
289
291
|
|
|
290
292
|
# Set environment to disable auth (for development only)
|
|
291
293
|
env = os.environ.copy()
|
|
@@ -311,7 +313,7 @@ async def start_mcp_proxy(
|
|
|
311
313
|
|
|
312
314
|
except (FileNotFoundError, Exception):
|
|
313
315
|
# Silently fail - inspector is optional
|
|
314
|
-
|
|
316
|
+
hud_console.error("Failed to launch inspector")
|
|
315
317
|
|
|
316
318
|
# Launch inspector asynchronously so it doesn't block
|
|
317
319
|
asyncio.create_task(launch_inspector()) # noqa: RUF006
|
|
@@ -319,7 +321,7 @@ async def start_mcp_proxy(
|
|
|
319
321
|
# Launch interactive mode if requested
|
|
320
322
|
if interactive:
|
|
321
323
|
if transport != "http":
|
|
322
|
-
|
|
324
|
+
hud_console.warning("Interactive mode only works with HTTP transport")
|
|
323
325
|
else:
|
|
324
326
|
server_url = f"http://localhost:{actual_port}/mcp"
|
|
325
327
|
|
|
@@ -332,9 +334,9 @@ async def start_mcp_proxy(
|
|
|
332
334
|
time.sleep(3)
|
|
333
335
|
|
|
334
336
|
try:
|
|
335
|
-
|
|
336
|
-
|
|
337
|
-
|
|
337
|
+
hud_console.section_title("Interactive Mode")
|
|
338
|
+
hud_console.info("Starting interactive testing mode...")
|
|
339
|
+
hud_console.info("Press Ctrl+C in the interactive session to exit")
|
|
338
340
|
|
|
339
341
|
# Import and run interactive mode in a new event loop
|
|
340
342
|
from .utils.interactive import run_interactive_mode
|
|
@@ -350,7 +352,7 @@ async def start_mcp_proxy(
|
|
|
350
352
|
except Exception as e:
|
|
351
353
|
# Log error but don't crash the server
|
|
352
354
|
if verbose:
|
|
353
|
-
|
|
355
|
+
hud_console.error(f"Interactive mode error: {e}")
|
|
354
356
|
|
|
355
357
|
# Launch interactive mode in a separate thread
|
|
356
358
|
import threading
|
|
@@ -365,12 +367,14 @@ async def start_mcp_proxy(
|
|
|
365
367
|
Note: The Docker container is created on-demand when the first client connects.
|
|
366
368
|
Any environment variables passed via -e flags are included when the container starts.
|
|
367
369
|
"""
|
|
368
|
-
|
|
370
|
+
log_hud_console = hud_console
|
|
369
371
|
|
|
370
372
|
# Always show waiting message
|
|
371
|
-
|
|
372
|
-
|
|
373
|
-
|
|
373
|
+
log_hud_console.info("") # Empty line for spacing
|
|
374
|
+
log_hud_console.progress_message(
|
|
375
|
+
"⏳ Waiting for first client connection to start container..."
|
|
376
|
+
)
|
|
377
|
+
log_hud_console.info(f"📋 Looking for container: {container_name}") # noqa: G004
|
|
374
378
|
|
|
375
379
|
# Keep trying to stream logs - container is created on demand
|
|
376
380
|
has_shown_started = False
|
|
@@ -395,7 +399,7 @@ async def start_mcp_proxy(
|
|
|
395
399
|
|
|
396
400
|
# Container exists! Show success if first time
|
|
397
401
|
if not has_shown_started:
|
|
398
|
-
|
|
402
|
+
log_hud_console.success("Container started! Streaming logs...")
|
|
399
403
|
has_shown_started = True
|
|
400
404
|
|
|
401
405
|
# Now stream the logs
|
|
@@ -422,7 +426,7 @@ async def start_mcp_proxy(
|
|
|
422
426
|
# Show all logs with gold formatting like hud debug
|
|
423
427
|
# Format all logs in gold/dim style like hud debug's stderr
|
|
424
428
|
# Use stdout console to avoid stderr redirection when not verbose
|
|
425
|
-
|
|
429
|
+
log_hud_console._stdout_console.print(
|
|
426
430
|
f"[rgb(192,150,12)]■[/rgb(192,150,12)] {decoded_line}", highlight=False
|
|
427
431
|
)
|
|
428
432
|
|
|
@@ -435,11 +439,11 @@ async def start_mcp_proxy(
|
|
|
435
439
|
|
|
436
440
|
except Exception as e:
|
|
437
441
|
# Some unexpected error - show it so we can debug
|
|
438
|
-
|
|
442
|
+
log_hud_console.warning(f"Failed to stream Docker logs: {e}") # noqa: G004
|
|
439
443
|
if verbose:
|
|
440
444
|
import traceback
|
|
441
445
|
|
|
442
|
-
|
|
446
|
+
log_hud_console.warning(f"Traceback: {traceback.format_exc()}") # noqa: G004
|
|
443
447
|
await asyncio.sleep(1)
|
|
444
448
|
|
|
445
449
|
# Import contextlib here so it's available in the finally block
|
|
@@ -456,7 +460,7 @@ async def start_mcp_proxy(
|
|
|
456
460
|
|
|
457
461
|
def signal_handler(signum: int, frame: Any) -> None:
|
|
458
462
|
"""Handle signals by setting shutdown event."""
|
|
459
|
-
|
|
463
|
+
hud_console.info(f"\n📡 Received signal {signum}, shutting down gracefully...")
|
|
460
464
|
shutdown_event.set()
|
|
461
465
|
|
|
462
466
|
# Register signal handlers - SIGINT is available on all platforms
|
|
@@ -524,7 +528,7 @@ async def start_mcp_proxy(
|
|
|
524
528
|
stdout, _ = await check_result.communicate()
|
|
525
529
|
|
|
526
530
|
if container_name in stdout.decode():
|
|
527
|
-
|
|
531
|
+
hud_console.info("🛑 Stopping container gracefully...")
|
|
528
532
|
# Stop with 30 second timeout before SIGKILL
|
|
529
533
|
stop_result = await asyncio.create_subprocess_exec(
|
|
530
534
|
"docker",
|
|
@@ -535,10 +539,10 @@ async def start_mcp_proxy(
|
|
|
535
539
|
stderr=asyncio.subprocess.DEVNULL,
|
|
536
540
|
)
|
|
537
541
|
await stop_result.communicate()
|
|
538
|
-
|
|
542
|
+
hud_console.success("✅ Container stopped successfully")
|
|
539
543
|
container_stopped = True
|
|
540
544
|
except Exception as e:
|
|
541
|
-
|
|
545
|
+
hud_console.warning(f"Failed to stop container: {e}")
|
|
542
546
|
|
|
543
547
|
try:
|
|
544
548
|
# Start Docker logs streaming if enabled
|
|
@@ -578,36 +582,36 @@ async def start_mcp_proxy(
|
|
|
578
582
|
show_banner=False,
|
|
579
583
|
)
|
|
580
584
|
except (ConnectionError, OSError) as e:
|
|
581
|
-
|
|
582
|
-
|
|
583
|
-
|
|
584
|
-
|
|
585
|
-
|
|
586
|
-
|
|
587
|
-
|
|
588
|
-
|
|
589
|
-
|
|
585
|
+
hud_console.error(f"Failed to connect to Docker container: {e}")
|
|
586
|
+
hud_console.info("")
|
|
587
|
+
hud_console.info("💡 Tip: Run the following command to debug the container:")
|
|
588
|
+
hud_console.info(f" hud debug {image_name}")
|
|
589
|
+
hud_console.info("")
|
|
590
|
+
hud_console.info("Common issues:")
|
|
591
|
+
hud_console.info(" • Container failed to start or crashed immediately")
|
|
592
|
+
hud_console.info(" • Server initialization failed")
|
|
593
|
+
hud_console.info(" • Port binding conflicts")
|
|
590
594
|
raise
|
|
591
595
|
except KeyboardInterrupt:
|
|
592
|
-
|
|
596
|
+
hud_console.info("\n👋 Shutting down...")
|
|
593
597
|
|
|
594
598
|
# Stop the container before showing next steps
|
|
595
599
|
await stop_container()
|
|
596
600
|
|
|
597
601
|
# Show next steps tutorial
|
|
598
602
|
if not interactive: # Only show if not in interactive mode
|
|
599
|
-
|
|
600
|
-
|
|
601
|
-
|
|
602
|
-
|
|
603
|
-
|
|
604
|
-
|
|
605
|
-
|
|
606
|
-
|
|
607
|
-
|
|
608
|
-
|
|
609
|
-
|
|
610
|
-
|
|
603
|
+
hud_console.section_title("Next Steps")
|
|
604
|
+
hud_console.info("🏗️ Ready to test with real agents? Run:")
|
|
605
|
+
hud_console.info(f" [cyan]hud build {directory}[/cyan]")
|
|
606
|
+
hud_console.info("")
|
|
607
|
+
hud_console.info("This will:")
|
|
608
|
+
hud_console.info(" 1. Build your environment image")
|
|
609
|
+
hud_console.info(" 2. Generate a hud.lock.yaml file")
|
|
610
|
+
hud_console.info(" 3. Prepare it for testing with agents")
|
|
611
|
+
hud_console.info("")
|
|
612
|
+
hud_console.info("Then you can:")
|
|
613
|
+
hud_console.info(" • Test locally: [cyan]hud run <image>[/cyan]")
|
|
614
|
+
hud_console.info(" • Push to registry: [cyan]hud push --image <registry/name>[/cyan]")
|
|
611
615
|
except Exception as e:
|
|
612
616
|
# Suppress the graceful shutdown error and other FastMCP/uvicorn internal errors
|
|
613
617
|
error_msg = str(e)
|
|
@@ -619,7 +623,7 @@ async def start_mcp_proxy(
|
|
|
619
623
|
"Application shutdown complete",
|
|
620
624
|
]
|
|
621
625
|
):
|
|
622
|
-
|
|
626
|
+
hud_console.error(f"Unexpected error: {e}")
|
|
623
627
|
finally:
|
|
624
628
|
# Cancel log streaming task if it exists
|
|
625
629
|
if log_task and not log_task.done():
|
|
@@ -663,7 +667,7 @@ def run_mcp_dev_server(
|
|
|
663
667
|
"""
|
|
664
668
|
# Ensure directory exists
|
|
665
669
|
if not Path(directory).exists():
|
|
666
|
-
|
|
670
|
+
hud_console.error(f"Directory not found: {directory}")
|
|
667
671
|
raise click.Abort
|
|
668
672
|
|
|
669
673
|
# No external dependencies needed for hot-reload anymore!
|
|
@@ -696,10 +700,10 @@ def run_mcp_dev_server(
|
|
|
696
700
|
|
|
697
701
|
actual_port = find_free_port(port)
|
|
698
702
|
if actual_port is None:
|
|
699
|
-
|
|
703
|
+
hud_console.error(f"No available ports found starting from {port}")
|
|
700
704
|
raise click.Abort
|
|
701
705
|
if actual_port != port and verbose:
|
|
702
|
-
|
|
706
|
+
hud_console.warning(f"Port {port} in use, using port {actual_port}")
|
|
703
707
|
|
|
704
708
|
# Create config
|
|
705
709
|
if transport == "stdio":
|
|
@@ -722,38 +726,40 @@ def run_mcp_dev_server(
|
|
|
722
726
|
)
|
|
723
727
|
|
|
724
728
|
# Show header with gold border
|
|
725
|
-
|
|
726
|
-
|
|
729
|
+
hud_console.info("") # Empty line before header
|
|
730
|
+
hud_console.header("HUD Development Server")
|
|
727
731
|
|
|
728
732
|
# Always show the Docker image being used as the first thing after header
|
|
729
|
-
|
|
733
|
+
hud_console.section_title("Docker Image")
|
|
730
734
|
if source == "cache":
|
|
731
|
-
|
|
735
|
+
hud_console.info(f"📦 {resolved_image}")
|
|
732
736
|
elif source == "auto":
|
|
733
|
-
|
|
737
|
+
hud_console.info(f"🔧 {resolved_image} (auto-generated)")
|
|
734
738
|
elif source == "override":
|
|
735
|
-
|
|
739
|
+
hud_console.info(f"🎯 {resolved_image} (specified)")
|
|
736
740
|
else:
|
|
737
|
-
|
|
741
|
+
hud_console.info(f"🐳 {resolved_image}")
|
|
738
742
|
|
|
739
|
-
|
|
743
|
+
hud_console.progress_message(
|
|
740
744
|
f"❗ If any issues arise, run `hud debug {resolved_image}` to debug the container"
|
|
741
745
|
)
|
|
742
746
|
|
|
743
747
|
# Show environment variables if provided
|
|
744
748
|
if docker_args and any(arg == "-e" or arg.startswith("--env") for arg in docker_args):
|
|
745
|
-
|
|
746
|
-
|
|
749
|
+
hud_console.section_title("Environment Variables")
|
|
750
|
+
hud_console.info(
|
|
751
|
+
"The following environment variables will be passed to the Docker container:"
|
|
752
|
+
)
|
|
747
753
|
i = 0
|
|
748
754
|
while i < len(docker_args):
|
|
749
755
|
if docker_args[i] == "-e" and i + 1 < len(docker_args):
|
|
750
|
-
|
|
756
|
+
hud_console.info(f" • {docker_args[i + 1]}")
|
|
751
757
|
i += 2
|
|
752
758
|
elif docker_args[i].startswith("--env="):
|
|
753
|
-
|
|
759
|
+
hud_console.info(f" • {docker_args[i][6:]}")
|
|
754
760
|
i += 1
|
|
755
761
|
elif docker_args[i] == "--env" and i + 1 < len(docker_args):
|
|
756
|
-
|
|
762
|
+
hud_console.info(f" • {docker_args[i + 1]}")
|
|
757
763
|
i += 2
|
|
758
764
|
else:
|
|
759
765
|
i += 1
|
|
@@ -761,35 +767,37 @@ def run_mcp_dev_server(
|
|
|
761
767
|
# Show hints about inspector and interactive mode
|
|
762
768
|
if transport == "http":
|
|
763
769
|
if not inspector and not interactive:
|
|
764
|
-
|
|
765
|
-
|
|
770
|
+
hud_console.progress_message("💡 Run with --inspector to launch MCP Inspector")
|
|
771
|
+
hud_console.progress_message("🧪 Run with --interactive for interactive testing mode")
|
|
766
772
|
elif not inspector:
|
|
767
|
-
|
|
773
|
+
hud_console.progress_message("💡 Run with --inspector to launch MCP Inspector")
|
|
768
774
|
elif not interactive:
|
|
769
|
-
|
|
775
|
+
hud_console.progress_message("🧪 Run with --interactive for interactive testing mode")
|
|
770
776
|
|
|
771
777
|
# Disable logs and hot-reload if interactive mode is enabled
|
|
772
778
|
if interactive:
|
|
773
779
|
if not no_logs:
|
|
774
|
-
|
|
780
|
+
hud_console.warning("Docker logs disabled in interactive mode for better UI experience")
|
|
775
781
|
no_logs = True
|
|
776
782
|
if not no_reload:
|
|
777
|
-
|
|
783
|
+
hud_console.warning(
|
|
784
|
+
"Hot-reload disabled in interactive mode to prevent output interference"
|
|
785
|
+
)
|
|
778
786
|
no_reload = True
|
|
779
787
|
|
|
780
788
|
# Show configuration as JSON (just the server config, not wrapped)
|
|
781
789
|
full_config = {}
|
|
782
790
|
full_config[server_name] = server_config
|
|
783
791
|
|
|
784
|
-
|
|
785
|
-
|
|
792
|
+
hud_console.section_title("MCP Configuration (add this to any agent/client)")
|
|
793
|
+
hud_console.json_config(json.dumps(full_config, indent=2))
|
|
786
794
|
|
|
787
795
|
# Show connection info
|
|
788
|
-
|
|
796
|
+
hud_console.section_title(
|
|
789
797
|
"Connect to Cursor (be careful with multiple windows as that may interfere with the proxy)"
|
|
790
798
|
)
|
|
791
|
-
|
|
792
|
-
|
|
799
|
+
hud_console.link(deeplink)
|
|
800
|
+
hud_console.info("") # Empty line
|
|
793
801
|
|
|
794
802
|
# Start the proxy (pass original port, start_mcp_proxy will find actual port again)
|
|
795
803
|
try:
|
|
@@ -809,10 +817,10 @@ def run_mcp_dev_server(
|
|
|
809
817
|
)
|
|
810
818
|
)
|
|
811
819
|
except Exception as e:
|
|
812
|
-
|
|
813
|
-
|
|
814
|
-
|
|
815
|
-
|
|
816
|
-
|
|
817
|
-
|
|
820
|
+
hud_console.error(f"Failed to start MCP server: {e}")
|
|
821
|
+
hud_console.info("")
|
|
822
|
+
hud_console.info("💡 Tip: Run the following command to debug the container:")
|
|
823
|
+
hud_console.info(f" hud debug {resolved_image}")
|
|
824
|
+
hud_console.info("")
|
|
825
|
+
hud_console.info("This will help identify connection issues or initialization failures.")
|
|
818
826
|
raise
|
hud/cli/eval.py
CHANGED
|
@@ -11,10 +11,10 @@ from typing import Any, Literal
|
|
|
11
11
|
import typer
|
|
12
12
|
|
|
13
13
|
import hud
|
|
14
|
-
from hud.utils.
|
|
14
|
+
from hud.utils.hud_console import HUDConsole
|
|
15
15
|
|
|
16
16
|
logger = logging.getLogger(__name__)
|
|
17
|
-
|
|
17
|
+
hud_console = HUDConsole()
|
|
18
18
|
|
|
19
19
|
|
|
20
20
|
def build_agent(
|
|
@@ -31,7 +31,7 @@ def build_agent(
|
|
|
31
31
|
try:
|
|
32
32
|
from hud.agents import OperatorAgent
|
|
33
33
|
except ImportError as e:
|
|
34
|
-
|
|
34
|
+
hud_console.error(
|
|
35
35
|
"OpenAI agent dependencies are not installed. "
|
|
36
36
|
"Please install with: pip install 'hud-python[agent]'"
|
|
37
37
|
)
|
|
@@ -49,7 +49,7 @@ def build_agent(
|
|
|
49
49
|
try:
|
|
50
50
|
from hud.agents import ClaudeAgent
|
|
51
51
|
except ImportError as e:
|
|
52
|
-
|
|
52
|
+
hud_console.error(
|
|
53
53
|
"Claude agent dependencies are not installed. "
|
|
54
54
|
"Please install with: pip install 'hud-python[agent]'"
|
|
55
55
|
)
|
|
@@ -85,7 +85,7 @@ async def run_single_task(
|
|
|
85
85
|
try:
|
|
86
86
|
from hud.datasets import Task, run_dataset
|
|
87
87
|
except ImportError as e:
|
|
88
|
-
|
|
88
|
+
hud_console.error(
|
|
89
89
|
"Dataset dependencies are not installed. "
|
|
90
90
|
"Please install with: pip install 'hud-python\u27e6agent\u27e7'"
|
|
91
91
|
)
|
|
@@ -94,13 +94,13 @@ async def run_single_task(
|
|
|
94
94
|
# Check if it's a JSON file
|
|
95
95
|
path = Path(source)
|
|
96
96
|
if path.exists() and path.suffix == ".json":
|
|
97
|
-
|
|
97
|
+
hud_console.info("📊 Loading task file…")
|
|
98
98
|
with open(path) as f: # noqa: ASYNC230
|
|
99
99
|
json_data = json.load(f)
|
|
100
100
|
|
|
101
101
|
# Check if JSON contains multiple tasks (list with more than 1 task)
|
|
102
102
|
if isinstance(json_data, list) and len(json_data) > 1:
|
|
103
|
-
|
|
103
|
+
hud_console.info(f"Found {len(json_data)} tasks in JSON file, running as dataset…")
|
|
104
104
|
|
|
105
105
|
# Build agent class and config for run_dataset
|
|
106
106
|
if agent_type == "openai":
|
|
@@ -109,7 +109,7 @@ async def run_single_task(
|
|
|
109
109
|
|
|
110
110
|
agent_class = OperatorAgent
|
|
111
111
|
except ImportError as e:
|
|
112
|
-
|
|
112
|
+
hud_console.error(
|
|
113
113
|
"OpenAI agent dependencies are not installed. "
|
|
114
114
|
"Please install with: pip install 'hud-python\u27e6agent\u27e7'"
|
|
115
115
|
)
|
|
@@ -125,7 +125,7 @@ async def run_single_task(
|
|
|
125
125
|
|
|
126
126
|
agent_class = ClaudeAgent
|
|
127
127
|
except ImportError as e:
|
|
128
|
-
|
|
128
|
+
hud_console.error(
|
|
129
129
|
"Claude agent dependencies are not installed. "
|
|
130
130
|
"Please install with: pip install 'hud-python[agent]'"
|
|
131
131
|
)
|
|
@@ -151,25 +151,25 @@ async def run_single_task(
|
|
|
151
151
|
|
|
152
152
|
# Display summary
|
|
153
153
|
successful = sum(1 for r in results if getattr(r, "reward", 0) > 0)
|
|
154
|
-
|
|
154
|
+
hud_console.success(f"Completed {len(results)} tasks: {successful} successful")
|
|
155
155
|
return
|
|
156
156
|
|
|
157
157
|
# Single task JSON (either direct object or list with 1 task)
|
|
158
158
|
if isinstance(json_data, list) and len(json_data) == 1:
|
|
159
|
-
|
|
159
|
+
hud_console.info("Found 1 task in JSON file, running as single task…")
|
|
160
160
|
task = Task(**json_data[0])
|
|
161
161
|
elif isinstance(json_data, dict):
|
|
162
162
|
task = Task(**json_data)
|
|
163
163
|
else:
|
|
164
|
-
|
|
164
|
+
hud_console.error("JSON file must contain a list of tasks when using --full flag")
|
|
165
165
|
raise typer.Exit(1)
|
|
166
166
|
else:
|
|
167
167
|
# Load from HuggingFace dataset
|
|
168
|
-
|
|
168
|
+
hud_console.info(f"📊 Loading dataset from HuggingFace: {source}…")
|
|
169
169
|
try:
|
|
170
170
|
from datasets import load_dataset
|
|
171
171
|
except ImportError as e:
|
|
172
|
-
|
|
172
|
+
hud_console.error(
|
|
173
173
|
"Datasets library is not installed. "
|
|
174
174
|
"Please install with: pip install 'hud-python[agent]'"
|
|
175
175
|
)
|
|
@@ -190,9 +190,9 @@ async def run_single_task(
|
|
|
190
190
|
allowed_tools=allowed_tools,
|
|
191
191
|
verbose=verbose,
|
|
192
192
|
)
|
|
193
|
-
|
|
193
|
+
hud_console.info(task.prompt)
|
|
194
194
|
result = await agent.run(task, max_steps=max_steps)
|
|
195
|
-
|
|
195
|
+
hud_console.success(f"Reward: {result.reward}")
|
|
196
196
|
|
|
197
197
|
|
|
198
198
|
async def run_full_dataset(
|
|
@@ -217,7 +217,7 @@ async def run_full_dataset(
|
|
|
217
217
|
try:
|
|
218
218
|
from hud.datasets import run_dataset, run_dataset_parallel, run_dataset_parallel_manual
|
|
219
219
|
except ImportError as e:
|
|
220
|
-
|
|
220
|
+
hud_console.error(
|
|
221
221
|
"Dataset dependencies are not installed. "
|
|
222
222
|
"Please install with: pip install 'hud-python[[agent]]'"
|
|
223
223
|
)
|
|
@@ -235,9 +235,9 @@ async def run_full_dataset(
|
|
|
235
235
|
if isinstance(json_data, list):
|
|
236
236
|
dataset_or_tasks = json_data
|
|
237
237
|
dataset_name = f"JSON Dataset: {path.name}"
|
|
238
|
-
|
|
238
|
+
hud_console.info(f"Found {len(json_data)} tasks in JSON file")
|
|
239
239
|
else:
|
|
240
|
-
|
|
240
|
+
hud_console.error("JSON file must contain a list of tasks when using --full flag")
|
|
241
241
|
raise typer.Exit(1)
|
|
242
242
|
|
|
243
243
|
# Build agent class + config for run_dataset
|
|
@@ -247,7 +247,7 @@ async def run_full_dataset(
|
|
|
247
247
|
|
|
248
248
|
agent_class = OperatorAgent
|
|
249
249
|
except ImportError as e:
|
|
250
|
-
|
|
250
|
+
hud_console.error(
|
|
251
251
|
"OpenAI agent dependencies are not installed. "
|
|
252
252
|
"Please install with: pip install 'hud-python[agent]'"
|
|
253
253
|
)
|
|
@@ -263,7 +263,7 @@ async def run_full_dataset(
|
|
|
263
263
|
|
|
264
264
|
agent_class = ClaudeAgent
|
|
265
265
|
except ImportError as e:
|
|
266
|
-
|
|
266
|
+
hud_console.error(
|
|
267
267
|
"Claude agent dependencies are not installed. "
|
|
268
268
|
"Please install with: pip install 'hud-python[agent]'"
|
|
269
269
|
)
|
|
@@ -277,7 +277,7 @@ async def run_full_dataset(
|
|
|
277
277
|
agent_config["allowed_tools"] = allowed_tools
|
|
278
278
|
|
|
279
279
|
if parallel:
|
|
280
|
-
|
|
280
|
+
hud_console.info(
|
|
281
281
|
f"🚀 Running PARALLEL evaluation (workers: {max_workers or 'auto'}, max_concurrent: {max_concurrent})…" # noqa: E501
|
|
282
282
|
)
|
|
283
283
|
if max_workers is None:
|
|
@@ -307,7 +307,7 @@ async def run_full_dataset(
|
|
|
307
307
|
auto_respond=True,
|
|
308
308
|
)
|
|
309
309
|
else:
|
|
310
|
-
|
|
310
|
+
hud_console.info(f"🚀 Running evaluation (max_concurrent: {max_concurrent})…")
|
|
311
311
|
return await run_dataset(
|
|
312
312
|
name=f"Evaluation {dataset_name}",
|
|
313
313
|
dataset=dataset_or_tasks,
|
|
@@ -410,18 +410,20 @@ def eval_command(
|
|
|
410
410
|
# Check for required API keys
|
|
411
411
|
if agent == "claude":
|
|
412
412
|
if not settings.anthropic_api_key:
|
|
413
|
-
|
|
414
|
-
|
|
413
|
+
hud_console.error("ANTHROPIC_API_KEY is required for Claude agent")
|
|
414
|
+
hud_console.info(
|
|
415
|
+
"Set it in your environment or .env file: ANTHROPIC_API_KEY=your-key-here"
|
|
416
|
+
)
|
|
415
417
|
raise typer.Exit(1)
|
|
416
418
|
elif agent == "openai" and not settings.openai_api_key:
|
|
417
|
-
|
|
418
|
-
|
|
419
|
+
hud_console.error("OPENAI_API_KEY is required for OpenAI agent")
|
|
420
|
+
hud_console.info("Set it in your environment or .env file: OPENAI_API_KEY=your-key-here")
|
|
419
421
|
raise typer.Exit(1)
|
|
420
422
|
|
|
421
423
|
# Check for HUD_API_KEY if using HUD services
|
|
422
424
|
if not settings.api_key:
|
|
423
|
-
|
|
424
|
-
|
|
425
|
+
hud_console.warning("HUD_API_KEY not set. Some features may be limited.")
|
|
426
|
+
hud_console.info("Get your API key at: https://app.hud.so")
|
|
425
427
|
|
|
426
428
|
# Parse allowed tools
|
|
427
429
|
allowed_tools_list = (
|