hud-python 0.4.57__py3-none-any.whl → 0.4.59__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of hud-python might be problematic. Click here for more details.

hud/server/server.py CHANGED
@@ -16,7 +16,9 @@ from fastmcp.server.server import FastMCP, Transport
16
16
  from starlette.requests import Request
17
17
  from starlette.responses import JSONResponse, Response
18
18
 
19
+ from hud.cli.eval import run_full_dataset
19
20
  from hud.server.low_level import LowLevelServerWithInit
21
+ from hud.types import Task
20
22
 
21
23
  if TYPE_CHECKING:
22
24
  from collections.abc import AsyncGenerator, Callable
@@ -486,6 +488,84 @@ class MCPServer(FastMCP):
486
488
  self._prompt_manager._prompts[new_key] = prompt
487
489
  # await self.import_server(hidden_router, prefix=None, **kwargs)
488
490
 
491
+ def _get_docker_logs(
492
+ self,
493
+ tail: int = 100,
494
+ since: str | None = None,
495
+ until: str | None = None,
496
+ timestamps: bool = False,
497
+ ) -> dict[str, Any]:
498
+ """Helper function to get Docker container logs.
499
+
500
+ Args:
501
+ tail: Number of lines to show from the end of the logs
502
+ since: Show logs since timestamp or relative time
503
+ until: Show logs before a timestamp or relative time
504
+ timestamps: Show timestamps in log output
505
+
506
+ Returns:
507
+ Dictionary with logs data or error information
508
+ """
509
+ import subprocess
510
+
511
+ container_name = os.environ.get("_HUD_DEV_DOCKER_CONTAINER")
512
+ if not container_name:
513
+ return {"items": [], "container_name": None, "error": "No container name found"}
514
+
515
+ # Build docker logs command
516
+ cmd = ["docker", "logs", "--tail", str(tail)]
517
+
518
+ if since:
519
+ cmd.extend(["--since", since])
520
+ if until:
521
+ cmd.extend(["--until", until])
522
+ if timestamps:
523
+ cmd.append("--timestamps")
524
+
525
+ cmd.append(container_name)
526
+
527
+ try:
528
+ # Run docker logs to get output
529
+ result = subprocess.run( # noqa: S603
530
+ cmd,
531
+ stdout=subprocess.PIPE,
532
+ stderr=subprocess.STDOUT,
533
+ text=True,
534
+ encoding="utf-8",
535
+ errors="replace",
536
+ timeout=5,
537
+ )
538
+
539
+ # Parse logs into items
540
+ items = []
541
+ lines = result.stdout.strip().split("\n") if result.stdout else []
542
+
543
+ for i, line in enumerate(lines):
544
+ if line.strip():
545
+ items.append(
546
+ {
547
+ "id": i,
548
+ "stream": "mixed",
549
+ "log": line,
550
+ "container_name": container_name,
551
+ }
552
+ )
553
+
554
+ return {
555
+ "items": items,
556
+ "container_name": container_name,
557
+ "total_lines": len(items),
558
+ }
559
+
560
+ except subprocess.TimeoutExpired:
561
+ return {"error": "Docker logs timeout", "container_name": container_name, "items": []}
562
+ except Exception as e:
563
+ return {
564
+ "error": f"Failed to get logs: {e!s}",
565
+ "container_name": container_name,
566
+ "items": [],
567
+ }
568
+
489
569
  def _register_hud_helpers(self) -> None:
490
570
  """Register development helper endpoints.
491
571
 
@@ -494,6 +574,7 @@ class MCPServer(FastMCP):
494
574
  - POST /api/tools/{name} - REST wrappers for MCP tools
495
575
  - GET /openapi.json - OpenAPI spec for REST endpoints
496
576
  - GET /logs - Development log endpoint (when provided by dev runtime)
577
+ - hud-logs tool - MCP tool for fetching logs (when in Docker mode)
497
578
  """
498
579
 
499
580
  # Register REST wrapper for each tool
@@ -544,7 +625,7 @@ class MCPServer(FastMCP):
544
625
  endpoint = create_tool_endpoint(tool_key)
545
626
  self.custom_route(f"/api/tools/{tool_key}", methods=["POST"])(endpoint)
546
627
 
547
- # Development log endpoint - only if dev runtime set a provider
628
+ # Development endpoints - only if dev runtime set a provider
548
629
  provider = os.environ.get("_HUD_DEV_LOGS_PROVIDER")
549
630
  if provider == "enabled":
550
631
 
@@ -556,50 +637,182 @@ class MCPServer(FastMCP):
556
637
  - limit: max number of lines to return (default 100)
557
638
  - tail: number of lines from end to return (default 100)
558
639
  """
559
- import subprocess
560
-
561
- # Get container name from environment
562
- container_name = os.environ.get("_HUD_DEV_DOCKER_CONTAINER")
563
- if not container_name:
564
- return JSONResponse({"items": [], "next": None})
565
-
566
640
  # Get query params
567
641
  params = request.query_params
568
- tail = params.get("tail", "100")
642
+ tail = int(params.get("tail", "100"))
643
+
644
+ # Use helper function to get logs
645
+ result = self._get_docker_logs(tail=tail)
646
+
647
+ # Add 'next' field for compatibility with existing API
648
+ if "error" in result:
649
+ return JSONResponse(result, status_code=500)
650
+ else:
651
+ items = result.get("items", [])
652
+ return JSONResponse(
653
+ {
654
+ "items": items,
655
+ "next": len(items) - 1 if items else None,
656
+ }
657
+ )
658
+
659
+ # Import existing types from the codebase
660
+ from pydantic import BaseModel
661
+
662
+ from hud.types import AgentType
663
+
664
+ class EvalRequest(BaseModel):
665
+ """Request model for /eval endpoint."""
666
+
667
+ tasks: list[dict[str, Any]] = []
668
+ agent: str = "claude"
669
+ model: str | None = None
670
+ max_steps: int = 10
671
+ verbose: bool = False
672
+ group_size: int = 1
673
+ name: str | None = None
674
+
675
+ @self.custom_route("/eval", methods=["POST"])
676
+ async def run_eval(request: Request) -> Response:
677
+ """Run evaluation on tasks using the current Docker environment."""
678
+ import asyncio
679
+ import json
569
680
 
570
681
  try:
571
- # Run docker logs to get recent output
572
- result = subprocess.run( # noqa: S603, ASYNC221
573
- ["docker", "logs", "--tail", tail, container_name], # noqa: S607
574
- stdout=subprocess.PIPE,
575
- stderr=subprocess.STDOUT,
576
- text=True,
577
- encoding="utf-8",
578
- errors="replace",
579
- timeout=5,
580
- )
682
+ body = await request.body()
683
+ data = json.loads(body)
684
+
685
+ # Validate request using Pydantic model
686
+ try:
687
+ eval_request = EvalRequest(**data)
688
+ except Exception as e:
689
+ return JSONResponse({"error": f"Invalid request: {e!s}"}, status_code=400)
690
+
691
+ # Get the Docker MCP config from environment
692
+ docker_mcp_config = os.environ.get("_HUD_DEV_DOCKER_MCP_CONFIG")
693
+ if not docker_mcp_config:
694
+ return JSONResponse(
695
+ {"error": "Docker MCP config not available"}, status_code=500
696
+ )
581
697
 
582
- # Parse logs into items
583
- items = []
584
- lines = result.stdout.strip().split("\n") if result.stdout else []
585
-
586
- for i, line in enumerate(lines):
587
- if line.strip():
588
- items.append(
589
- {
590
- "id": i,
591
- "stream": "mixed",
592
- "log": line,
593
- "container_name": container_name,
594
- }
698
+ docker_config = json.loads(docker_mcp_config)
699
+
700
+ # Simplify Docker config for evaluation
701
+ if "docker" in docker_config and "args" in docker_config["docker"]:
702
+ original_args = docker_config["docker"]["args"]
703
+ filtered_args = []
704
+ i = 0
705
+
706
+ while i < len(original_args):
707
+ arg = original_args[i]
708
+
709
+ # Skip volume mounts and their values
710
+ if arg in ["-v", "--volume"]:
711
+ i += 2 # Skip the flag and its value
712
+ continue
713
+
714
+ # Skip combined volume mount args
715
+ if arg.startswith(("-v", "--volume=")):
716
+ i += 1
717
+ continue
718
+
719
+ # Skip explicit container name to avoid collisions
720
+ if arg == "--name" and i + 1 < len(original_args):
721
+ i += 2 # Skip the --name and its value
722
+ continue
723
+
724
+ # Skip dev-specific environment variables
725
+ if arg == "-e" and i + 1 < len(original_args):
726
+ next_arg = original_args[i + 1]
727
+ if next_arg in [
728
+ "PYTHONPATH=/app",
729
+ "HUD_DEV=1",
730
+ "PYTHONUNBUFFERED=1",
731
+ ]:
732
+ i += 2 # Skip the -e and its value
733
+ continue
734
+
735
+ filtered_args.append(arg)
736
+ i += 1
737
+
738
+ # Update the docker args with filtered version
739
+ docker_config["docker"]["args"] = filtered_args
740
+
741
+ try:
742
+ agent_type = AgentType(eval_request.agent.lower())
743
+ except ValueError:
744
+ valid_agents = [
745
+ a.value for a in AgentType if a != AgentType.INTEGRATION_TEST
746
+ ]
747
+ return JSONResponse(
748
+ {
749
+ "error": f"Invalid agent type: {eval_request.agent}",
750
+ "valid_agents": valid_agents,
751
+ },
752
+ status_code=400,
753
+ )
754
+
755
+ # Add MCP config to each task and validate basic structure
756
+ tasks = []
757
+ for task_data in eval_request.tasks:
758
+ task_data["mcp_config"] = docker_config
759
+ tasks.append(Task.model_validate(task_data).model_dump())
760
+
761
+ # Save tasks to temporary file
762
+ import tempfile
763
+
764
+ with tempfile.NamedTemporaryFile(
765
+ mode="w", prefix="hud-eval-", suffix=".json", delete=False
766
+ ) as f:
767
+ json.dump(tasks, f)
768
+ task_file = f.name
769
+
770
+ # Fire and forget - launch evaluation in background
771
+ async def run_eval_background() -> None:
772
+ try:
773
+ await run_full_dataset(
774
+ task_file,
775
+ agent_type=agent_type,
776
+ model=eval_request.model,
777
+ max_steps=eval_request.max_steps,
778
+ verbose=eval_request.verbose,
779
+ group_size=eval_request.group_size,
595
780
  )
781
+ except Exception as e:
782
+ raise e
783
+ finally:
784
+ # Clean up temp file
785
+ import os
786
+
787
+ if os.path.exists(task_file):
788
+ os.unlink(task_file)
789
+
790
+ # Start the evaluation in the background (fire and forget)
791
+ asyncio.create_task(run_eval_background()) # noqa: RUF006
792
+
793
+ # Return immediately
794
+ response_data = {
795
+ "status": "started",
796
+ "message": f"Evaluation launched with {len(tasks)} task(s)",
797
+ "agent": eval_request.agent,
798
+ "model": eval_request.model,
799
+ "max_steps": eval_request.max_steps,
800
+ "verbose": eval_request.verbose,
801
+ }
802
+
803
+ # Include group_size if > 1
804
+ if eval_request.group_size > 1:
805
+ response_data["group_size"] = eval_request.group_size
806
+ response_data["total_episodes"] = len(tasks) * eval_request.group_size
596
807
 
597
- return JSONResponse({"items": items, "next": len(items) - 1 if items else None})
808
+ return JSONResponse(response_data)
598
809
 
599
- except subprocess.TimeoutExpired:
600
- return JSONResponse({"error": "Docker logs timeout"}, status_code=500)
810
+ except json.JSONDecodeError:
811
+ return JSONResponse({"error": "Invalid JSON in request body"}, status_code=400)
601
812
  except Exception as e:
602
- return JSONResponse({"error": f"Failed to get logs: {e!s}"}, status_code=500)
813
+ return JSONResponse(
814
+ {"error": f"Failed to run evaluation: {e!s}"}, status_code=500
815
+ )
603
816
 
604
817
  @self.custom_route("/openapi.json", methods=["GET"])
605
818
  async def openapi_spec(request: Request) -> Response:
@@ -656,6 +869,40 @@ class MCPServer(FastMCP):
656
869
 
657
870
  return JSONResponse(spec)
658
871
 
872
+ # Register hud-logs tool when in Docker dev mode
873
+ container_name = os.environ.get("_HUD_DEV_DOCKER_CONTAINER")
874
+ if container_name:
875
+
876
+ @self.tool("hud-logs")
877
+ async def get_docker_logs(
878
+ tail: int = 100,
879
+ since: str | None = None,
880
+ until: str | None = None,
881
+ timestamps: bool = False,
882
+ ) -> dict[str, Any]:
883
+ """Get logs from the Docker container running the HUD environment.
884
+
885
+ Args:
886
+ tail: Number of lines to show from the end of the logs (default: 100)
887
+ since: Show logs since timestamp (e.g. 2013-01-02T13:23:37Z) or relative (42m)
888
+ until: Show logs before timestamp (e.g. 2013-01-02T13:23:37Z) or relative (42m)
889
+ timestamps: Show timestamps in log output
890
+
891
+ Returns:
892
+ Dictionary with:
893
+ - items: List of log entries
894
+ - container_name: Name of the container
895
+ - total_lines: Total number of log lines returned
896
+ - error: Error message if logs could not be retrieved
897
+ """
898
+ # Use helper function to get logs
899
+ return self._get_docker_logs(
900
+ tail=tail,
901
+ since=since,
902
+ until=until,
903
+ timestamps=timestamps,
904
+ )
905
+
659
906
  @self.custom_route("/docs", methods=["GET"])
660
907
  async def docs_page(request: Request) -> Response:
661
908
  """Interactive documentation page."""
hud/settings.py CHANGED
@@ -94,6 +94,12 @@ class Settings(BaseSettings):
94
94
  validation_alias="OPENAI_API_KEY",
95
95
  )
96
96
 
97
+ gemini_api_key: str | None = Field(
98
+ default=None,
99
+ description="API key for Google Gemini models",
100
+ validation_alias="GEMINI_API_KEY",
101
+ )
102
+
97
103
  openrouter_api_key: str | None = Field(
98
104
  default=None,
99
105
  description="API key for OpenRouter models",
hud/shared/hints.py CHANGED
@@ -38,7 +38,7 @@ HUD_API_KEY_MISSING = Hint(
38
38
  message="Missing or invalid HUD_API_KEY.",
39
39
  tips=[
40
40
  "Set HUD_API_KEY in your environment or run: hud set HUD_API_KEY=your-key-here",
41
- "Get a key at https://hud.so",
41
+ "Get a key at https://hud.ai",
42
42
  "Check for whitespace or truncation",
43
43
  ],
44
44
  docs_url=None,
@@ -68,7 +68,7 @@ PRO_PLAN_REQUIRED = Hint(
68
68
  tips=[
69
69
  "Upgrade your plan to continue",
70
70
  ],
71
- docs_url="https://hud.so/project/billing",
71
+ docs_url="https://hud.ai/project/billing",
72
72
  command_examples=None,
73
73
  code="PRO_PLAN_REQUIRED",
74
74
  context=["billing", "plan"],
@@ -80,7 +80,7 @@ CREDITS_EXHAUSTED = Hint(
80
80
  tips=[
81
81
  "Top up credits or upgrade your plan",
82
82
  ],
83
- docs_url="https://hud.so/project/billing",
83
+ docs_url="https://hud.ai/project/billing",
84
84
  command_examples=None,
85
85
  code="CREDITS_EXHAUSTED",
86
86
  context=["billing", "credits"],
hud/telemetry/job.py CHANGED
@@ -170,7 +170,7 @@ def _print_job_url(job_id: str, job_name: str) -> None:
170
170
  if not (settings.telemetry_enabled and settings.api_key):
171
171
  return
172
172
 
173
- url = f"https://hud.so/jobs/{job_id}"
173
+ url = f"https://hud.ai/jobs/{job_id}"
174
174
  header = f"🚀 Job '{job_name}' started:"
175
175
 
176
176
  # ANSI color codes
@@ -209,7 +209,7 @@ def _print_job_complete_url(job_id: str, job_name: str, error_occurred: bool = F
209
209
  if not (settings.telemetry_enabled and settings.api_key):
210
210
  return
211
211
 
212
- url = f"https://hud.so/jobs/{job_id}"
212
+ url = f"https://hud.ai/jobs/{job_id}"
213
213
 
214
214
  # ANSI color codes
215
215
  GREEN = "\033[92m"
hud/tools/__init__.py CHANGED
@@ -12,7 +12,12 @@ from .response import ResponseTool
12
12
  from .submit import SubmitTool
13
13
 
14
14
  if TYPE_CHECKING:
15
- from .computer import AnthropicComputerTool, HudComputerTool, OpenAIComputerTool
15
+ from .computer import (
16
+ AnthropicComputerTool,
17
+ GeminiComputerTool,
18
+ HudComputerTool,
19
+ OpenAIComputerTool,
20
+ )
16
21
 
17
22
  __all__ = [
18
23
  "AnthropicComputerTool",
@@ -20,6 +25,7 @@ __all__ = [
20
25
  "BaseTool",
21
26
  "BashTool",
22
27
  "EditTool",
28
+ "GeminiComputerTool",
23
29
  "HudComputerTool",
24
30
  "OpenAIComputerTool",
25
31
  "PlaywrightTool",
@@ -30,7 +36,12 @@ __all__ = [
30
36
 
31
37
  def __getattr__(name: str) -> Any:
32
38
  """Lazy import computer tools to avoid importing pyautogui unless needed."""
33
- if name in ("AnthropicComputerTool", "HudComputerTool", "OpenAIComputerTool"):
39
+ if name in (
40
+ "AnthropicComputerTool",
41
+ "HudComputerTool",
42
+ "OpenAIComputerTool",
43
+ "GeminiComputerTool",
44
+ ):
34
45
  from . import computer
35
46
 
36
47
  return getattr(computer, name)
@@ -3,6 +3,7 @@
3
3
  from __future__ import annotations
4
4
 
5
5
  from .anthropic import AnthropicComputerTool
6
+ from .gemini import GeminiComputerTool
6
7
  from .hud import HudComputerTool
7
8
  from .openai import OpenAIComputerTool
8
9
  from .qwen import QwenComputerTool
@@ -10,6 +11,7 @@ from .settings import computer_settings
10
11
 
11
12
  __all__ = [
12
13
  "AnthropicComputerTool",
14
+ "GeminiComputerTool",
13
15
  "HudComputerTool",
14
16
  "OpenAIComputerTool",
15
17
  "QwenComputerTool",