hud-python 0.4.14__py3-none-any.whl → 0.4.16__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of hud-python might be problematic. Click here for more details.
- hud/agents/base.py +118 -33
- hud/agents/claude.py +1 -1
- hud/agents/openai.py +5 -16
- hud/agents/tests/test_openai.py +24 -79
- hud/cli/__init__.py +137 -15
- hud/cli/analyze.py +2 -4
- hud/cli/build.py +6 -2
- hud/cli/dev.py +67 -0
- hud/cli/eval.py +90 -35
- hud/cli/hf.py +406 -0
- hud/cli/init.py +38 -19
- hud/cli/rl/README.md +243 -0
- hud/cli/rl/__init__.py +82 -0
- hud/cli/rl/init.py +370 -0
- hud/cli/rl/pod.py +491 -0
- hud/cli/rl/ssh.py +288 -0
- hud/cli/rl/train.py +421 -0
- hud/cli/rl/utils.py +165 -0
- hud/cli/tests/test_mcp_server.py +1 -4
- hud/clients/base.py +2 -0
- hud/clients/fastmcp.py +7 -2
- hud/clients/mcp_use.py +3 -1
- hud/clients/utils/retry_transport.py +34 -8
- hud/datasets/__init__.py +32 -0
- hud/datasets/execution/__init__.py +13 -0
- hud/datasets/execution/parallel.py +592 -0
- hud/datasets/execution/runner.py +123 -0
- hud/datasets/task.py +107 -0
- hud/datasets/utils.py +118 -0
- hud/otel/instrumentation.py +2 -1
- hud/server/server.py +58 -21
- hud/settings.py +12 -0
- hud/types.py +31 -10
- hud/utils/design.py +168 -2
- hud/utils/tests/test_version.py +1 -1
- hud/version.py +1 -1
- {hud_python-0.4.14.dist-info → hud_python-0.4.16.dist-info}/METADATA +4 -3
- {hud_python-0.4.14.dist-info → hud_python-0.4.16.dist-info}/RECORD +41 -28
- hud/datasets.py +0 -327
- {hud_python-0.4.14.dist-info → hud_python-0.4.16.dist-info}/WHEEL +0 -0
- {hud_python-0.4.14.dist-info → hud_python-0.4.16.dist-info}/entry_points.txt +0 -0
- {hud_python-0.4.14.dist-info → hud_python-0.4.16.dist-info}/licenses/LICENSE +0 -0
|
@@ -0,0 +1,123 @@
|
|
|
1
|
+
"""Standard asyncio-based dataset runner."""
|
|
2
|
+
|
|
3
|
+
from __future__ import annotations
|
|
4
|
+
|
|
5
|
+
import asyncio
|
|
6
|
+
import logging
|
|
7
|
+
from typing import TYPE_CHECKING, Any, cast
|
|
8
|
+
|
|
9
|
+
from datasets import Dataset, load_dataset
|
|
10
|
+
|
|
11
|
+
from hud.agents.misc import ResponseAgent
|
|
12
|
+
from hud.datasets.task import Task
|
|
13
|
+
|
|
14
|
+
if TYPE_CHECKING:
|
|
15
|
+
from hud.agents import MCPAgent
|
|
16
|
+
|
|
17
|
+
logger = logging.getLogger("hud.datasets")
|
|
18
|
+
|
|
19
|
+
|
|
20
|
+
async def run_dataset(
|
|
21
|
+
name: str,
|
|
22
|
+
dataset: str | Dataset | list[dict[str, Any]],
|
|
23
|
+
agent_class: type[MCPAgent],
|
|
24
|
+
agent_config: dict[str, Any] | None = None,
|
|
25
|
+
max_concurrent: int = 50,
|
|
26
|
+
metadata: dict[str, Any] | None = None,
|
|
27
|
+
max_steps: int = 10,
|
|
28
|
+
split: str = "train",
|
|
29
|
+
auto_respond: bool = False,
|
|
30
|
+
custom_system_prompt: str | None = None,
|
|
31
|
+
) -> list[Any]:
|
|
32
|
+
"""
|
|
33
|
+
Run all tasks in a dataset with automatic job tracking.
|
|
34
|
+
|
|
35
|
+
Args:
|
|
36
|
+
name: Name for the job
|
|
37
|
+
dataset: HuggingFace dataset identifier (e.g. "hud-evals/SheetBench-50"),
|
|
38
|
+
Dataset object, OR list of Task objects
|
|
39
|
+
agent_class: Agent class to instantiate (e.g., ClaudeAgent)
|
|
40
|
+
agent_config: Configuration/kwargs for agent (model, etc.)
|
|
41
|
+
max_concurrent: Maximum parallel task execution
|
|
42
|
+
metadata: Optional metadata for the job
|
|
43
|
+
max_steps: Maximum steps per task
|
|
44
|
+
split: Dataset split to use when loading from string (default: "train")
|
|
45
|
+
auto_respond: Whether to use auto-response agent
|
|
46
|
+
custom_system_prompt: Override system prompt for all tasks
|
|
47
|
+
|
|
48
|
+
Returns:
|
|
49
|
+
List of results from agent.run() in dataset order
|
|
50
|
+
|
|
51
|
+
Example:
|
|
52
|
+
>>> from hud.agents import ClaudeAgent
|
|
53
|
+
>>> # Option 1: From dataset string identifier
|
|
54
|
+
>>> results = await run_dataset(
|
|
55
|
+
... "SheetBench Eval",
|
|
56
|
+
... "hud-evals/SheetBench-50",
|
|
57
|
+
... ClaudeAgent,
|
|
58
|
+
... {"model": "claude-3-5-sonnet-20241022"},
|
|
59
|
+
... )
|
|
60
|
+
>>> # Option 2: From HuggingFace dataset object
|
|
61
|
+
>>> from datasets import load_dataset
|
|
62
|
+
>>> dataset = load_dataset("hud-evals/SheetBench-50", split="train")
|
|
63
|
+
>>> results = await run_dataset("my_eval", dataset, ClaudeAgent)
|
|
64
|
+
>>> # Option 3: From list of dicts
|
|
65
|
+
>>> tasks = [{"prompt": "...", "mcp_config": {...}, ...}, ...]
|
|
66
|
+
>>> results = await run_dataset("browser_eval", tasks, ClaudeAgent)
|
|
67
|
+
"""
|
|
68
|
+
# Import here to avoid circular imports
|
|
69
|
+
import hud
|
|
70
|
+
|
|
71
|
+
dataset_link = None
|
|
72
|
+
|
|
73
|
+
# Load dataset from string if needed
|
|
74
|
+
if isinstance(dataset, str):
|
|
75
|
+
logger.info("Loading dataset %s from HuggingFace...", dataset)
|
|
76
|
+
dataset_link = dataset
|
|
77
|
+
|
|
78
|
+
# Load dataset from HuggingFace
|
|
79
|
+
dataset = cast("Dataset", load_dataset(dataset, split=split))
|
|
80
|
+
|
|
81
|
+
# Create job context
|
|
82
|
+
job_metadata = metadata or {}
|
|
83
|
+
job_metadata["agent_class"] = agent_class.__name__
|
|
84
|
+
job_metadata["agent_config"] = agent_config
|
|
85
|
+
|
|
86
|
+
# Extract dataset verification info if available
|
|
87
|
+
if isinstance(dataset, Dataset) and not dataset_link:
|
|
88
|
+
try:
|
|
89
|
+
general_info = next(iter(dataset.info.__dict__["download_checksums"].keys())).split("/")
|
|
90
|
+
project = general_info[3]
|
|
91
|
+
dataset_name = general_info[4].split("@")[0]
|
|
92
|
+
dataset_link = f"{project}/{dataset_name}"
|
|
93
|
+
except Exception:
|
|
94
|
+
logger.warning("Failed to extract dataset verification info")
|
|
95
|
+
|
|
96
|
+
with hud.job(name, metadata=job_metadata, dataset_link=dataset_link) as job_obj:
|
|
97
|
+
# Run tasks with semaphore for concurrency control
|
|
98
|
+
sem = asyncio.Semaphore(max_concurrent)
|
|
99
|
+
results: list[Any | None] = [None] * len(dataset)
|
|
100
|
+
|
|
101
|
+
async def _worker(index: int, task_dict: Any, max_steps: int = 10) -> None:
|
|
102
|
+
async with sem:
|
|
103
|
+
# Create trace for this task
|
|
104
|
+
task_name = task_dict.get("prompt") or f"Task {index}"
|
|
105
|
+
if custom_system_prompt and "system_prompt" not in task_dict:
|
|
106
|
+
task_dict["system_prompt"] = custom_system_prompt
|
|
107
|
+
with hud.trace(task_name, job_id=job_obj.id, task_id=task_dict.get("id")):
|
|
108
|
+
# Convert dict to Task here, at trace level
|
|
109
|
+
task = Task(**task_dict)
|
|
110
|
+
|
|
111
|
+
agent = agent_class(**(agent_config or {}))
|
|
112
|
+
|
|
113
|
+
if auto_respond:
|
|
114
|
+
agent.response_agent = ResponseAgent()
|
|
115
|
+
results[index] = await agent.run(task, max_steps=max_steps)
|
|
116
|
+
|
|
117
|
+
# Execute all tasks
|
|
118
|
+
await asyncio.gather(
|
|
119
|
+
*[_worker(i, task, max_steps=max_steps) for i, task in enumerate(dataset)],
|
|
120
|
+
return_exceptions=True, # Don't fail entire batch on one error
|
|
121
|
+
)
|
|
122
|
+
|
|
123
|
+
return results
|
hud/datasets/task.py
ADDED
|
@@ -0,0 +1,107 @@
|
|
|
1
|
+
"""Task model for HUD datasets."""
|
|
2
|
+
|
|
3
|
+
from __future__ import annotations
|
|
4
|
+
|
|
5
|
+
import json
|
|
6
|
+
from collections import defaultdict
|
|
7
|
+
from string import Template
|
|
8
|
+
from typing import Any
|
|
9
|
+
|
|
10
|
+
from pydantic import BaseModel, Field, field_validator
|
|
11
|
+
|
|
12
|
+
from hud.settings import settings
|
|
13
|
+
from hud.types import MCPToolCall
|
|
14
|
+
|
|
15
|
+
|
|
16
|
+
class Task(BaseModel):
|
|
17
|
+
"""
|
|
18
|
+
A task configuration that can be used to create a task.
|
|
19
|
+
|
|
20
|
+
The mcp_config field supports environment variable substitution using
|
|
21
|
+
template placeholders in the format ${VAR_NAME} or ${VAR_NAME:default_value}.
|
|
22
|
+
|
|
23
|
+
Example:
|
|
24
|
+
mcp_config: {
|
|
25
|
+
"hud": {
|
|
26
|
+
"url": "${HUD_MCP_URL:https://mcp.hud.so/v3/mcp}",
|
|
27
|
+
"headers": {
|
|
28
|
+
"Authorization": "Bearer ${HUD_API_KEY}",
|
|
29
|
+
"Mcp-Image": "your-mcp-image"
|
|
30
|
+
}
|
|
31
|
+
}
|
|
32
|
+
}
|
|
33
|
+
"""
|
|
34
|
+
|
|
35
|
+
id: str | None = None
|
|
36
|
+
prompt: str
|
|
37
|
+
mcp_config: dict[str, Any]
|
|
38
|
+
setup_tool: MCPToolCall | list[MCPToolCall] | None = None
|
|
39
|
+
evaluate_tool: MCPToolCall | list[MCPToolCall] | None = None
|
|
40
|
+
system_prompt: str | None = None
|
|
41
|
+
metadata: dict[str, Any] = Field(default_factory=dict)
|
|
42
|
+
|
|
43
|
+
@field_validator("mcp_config", "metadata", mode="before")
|
|
44
|
+
@classmethod
|
|
45
|
+
def parse_json_strings(cls, v: Any) -> Any:
|
|
46
|
+
"""Parse JSON strings into dictionaries."""
|
|
47
|
+
if isinstance(v, str):
|
|
48
|
+
try:
|
|
49
|
+
return json.loads(v)
|
|
50
|
+
except json.JSONDecodeError as e:
|
|
51
|
+
raise ValueError(f"Invalid JSON string: {e}") from e
|
|
52
|
+
return v
|
|
53
|
+
|
|
54
|
+
@field_validator("setup_tool", "evaluate_tool", mode="before")
|
|
55
|
+
@classmethod
|
|
56
|
+
def convert_dict_to_tool_call(cls, v: Any) -> Any:
|
|
57
|
+
"""Convert dict to MCPToolCall instance, parsing JSON strings first."""
|
|
58
|
+
if v is None:
|
|
59
|
+
return None
|
|
60
|
+
|
|
61
|
+
# Parse JSON string if needed
|
|
62
|
+
if isinstance(v, str):
|
|
63
|
+
try:
|
|
64
|
+
v = json.loads(v)
|
|
65
|
+
except json.JSONDecodeError as e:
|
|
66
|
+
raise ValueError(f"Invalid JSON string: {e}") from e
|
|
67
|
+
|
|
68
|
+
if isinstance(v, dict):
|
|
69
|
+
return MCPToolCall(**v)
|
|
70
|
+
if isinstance(v, list):
|
|
71
|
+
return [MCPToolCall(**item) if isinstance(item, dict) else item for item in v]
|
|
72
|
+
return v
|
|
73
|
+
|
|
74
|
+
@field_validator("mcp_config", mode="before")
|
|
75
|
+
@classmethod
|
|
76
|
+
def resolve_env_vars(cls, v: dict[str, Any]) -> dict[str, Any]:
|
|
77
|
+
"""
|
|
78
|
+
Automatically resolve environment variables in mcp_config using Template.
|
|
79
|
+
|
|
80
|
+
Supports ${VAR_NAME} syntax with variable substitution from
|
|
81
|
+
System environment variables (including HUD_API_KEY, etc.)
|
|
82
|
+
|
|
83
|
+
Missing variables resolve to empty strings.
|
|
84
|
+
"""
|
|
85
|
+
import os
|
|
86
|
+
|
|
87
|
+
# Start with current environment variables
|
|
88
|
+
mapping = dict(os.environ)
|
|
89
|
+
mapping.update(settings.model_dump())
|
|
90
|
+
|
|
91
|
+
if settings.api_key:
|
|
92
|
+
mapping["HUD_API_KEY"] = settings.api_key
|
|
93
|
+
|
|
94
|
+
def substitute_in_value(obj: Any) -> Any:
|
|
95
|
+
"""Recursively substitute variables in nested structures."""
|
|
96
|
+
if isinstance(obj, str):
|
|
97
|
+
# Use Template's substitute with defaultdict - missing vars become empty strings
|
|
98
|
+
safe_mapping = defaultdict(str, mapping)
|
|
99
|
+
return Template(obj).substitute(safe_mapping)
|
|
100
|
+
elif isinstance(obj, dict):
|
|
101
|
+
return {k: substitute_in_value(v) for k, v in obj.items()}
|
|
102
|
+
elif isinstance(obj, list):
|
|
103
|
+
return [substitute_in_value(item) for item in obj]
|
|
104
|
+
else:
|
|
105
|
+
return obj
|
|
106
|
+
|
|
107
|
+
return substitute_in_value(v)
|
hud/datasets/utils.py
ADDED
|
@@ -0,0 +1,118 @@
|
|
|
1
|
+
"""Dataset utilities for loading, saving, and fetching datasets."""
|
|
2
|
+
|
|
3
|
+
from __future__ import annotations
|
|
4
|
+
|
|
5
|
+
import json
|
|
6
|
+
import logging
|
|
7
|
+
from typing import Any
|
|
8
|
+
|
|
9
|
+
from datasets import Dataset
|
|
10
|
+
|
|
11
|
+
from .task import Task
|
|
12
|
+
|
|
13
|
+
logger = logging.getLogger("hud.datasets")
|
|
14
|
+
|
|
15
|
+
|
|
16
|
+
async def fetch_system_prompt_from_dataset(dataset_id: str) -> str | None:
|
|
17
|
+
"""
|
|
18
|
+
Fetch system_prompt.txt from a HuggingFace dataset repository.
|
|
19
|
+
|
|
20
|
+
Args:
|
|
21
|
+
dataset_id: HuggingFace dataset identifier (e.g., "hud-evals/SheetBench-50")
|
|
22
|
+
|
|
23
|
+
Returns:
|
|
24
|
+
System prompt text if found, None otherwise
|
|
25
|
+
"""
|
|
26
|
+
try:
|
|
27
|
+
# Import here to avoid unnecessary dependency
|
|
28
|
+
from huggingface_hub import hf_hub_download
|
|
29
|
+
from huggingface_hub.errors import EntryNotFoundError
|
|
30
|
+
|
|
31
|
+
# Try to download the system_prompt.txt file
|
|
32
|
+
try:
|
|
33
|
+
file_path = hf_hub_download(
|
|
34
|
+
repo_id=dataset_id, filename="system_prompt.txt", repo_type="dataset"
|
|
35
|
+
)
|
|
36
|
+
|
|
37
|
+
# Read and return the content
|
|
38
|
+
with open(file_path, encoding="utf-8") as f: # noqa: ASYNC230
|
|
39
|
+
content = f.read().strip()
|
|
40
|
+
if content:
|
|
41
|
+
logger.info(
|
|
42
|
+
"Loaded system prompt from %s (length: %d chars)", dataset_id, len(content)
|
|
43
|
+
)
|
|
44
|
+
return content
|
|
45
|
+
else:
|
|
46
|
+
logger.warning("System prompt file is empty in %s", dataset_id)
|
|
47
|
+
return None
|
|
48
|
+
|
|
49
|
+
except EntryNotFoundError:
|
|
50
|
+
logger.debug("No system_prompt.txt found in dataset %s", dataset_id)
|
|
51
|
+
return None
|
|
52
|
+
|
|
53
|
+
except ImportError:
|
|
54
|
+
logger.warning(
|
|
55
|
+
"huggingface_hub not installed. Install it to fetch system prompts from datasets."
|
|
56
|
+
)
|
|
57
|
+
return None
|
|
58
|
+
except Exception as e:
|
|
59
|
+
logger.error("Error fetching system prompt from %s: %s", dataset_id, e)
|
|
60
|
+
return None
|
|
61
|
+
|
|
62
|
+
|
|
63
|
+
def save_tasks(
|
|
64
|
+
tasks: list[dict[str, Any]], repo_id: str, fields: list[str] | None = None, **kwargs: Any
|
|
65
|
+
) -> None:
|
|
66
|
+
"""
|
|
67
|
+
Save data to HuggingFace dataset with JSON string serialization.
|
|
68
|
+
|
|
69
|
+
Complex fields (dicts, lists) are serialized as JSON strings to maintain clean schema
|
|
70
|
+
and avoid null value pollution in HuggingFace datasets.
|
|
71
|
+
|
|
72
|
+
Args:
|
|
73
|
+
tasks: List of dictionaries to save
|
|
74
|
+
repo_id: HuggingFace repository ID (e.g., "hud-evals/my-tasks")
|
|
75
|
+
fields: Optional list of fields to save. If None, saves all fields from each dict.
|
|
76
|
+
**kwargs: Additional arguments passed to dataset.push_to_hub()
|
|
77
|
+
"""
|
|
78
|
+
# Safety check: Ensure we're not saving Task objects (which have resolved env vars)
|
|
79
|
+
if tasks and isinstance(tasks[0], Task):
|
|
80
|
+
raise ValueError(
|
|
81
|
+
"save_tasks expects dictionaries, not Task objects. "
|
|
82
|
+
"Task objects have resolved environment variables which would expose secrets. "
|
|
83
|
+
"Please pass raw dictionaries with template strings like '${HUD_API_KEY}' preserved."
|
|
84
|
+
)
|
|
85
|
+
|
|
86
|
+
# Convert to rows with JSON string fields
|
|
87
|
+
data = []
|
|
88
|
+
for i, tc_dict in enumerate(tasks):
|
|
89
|
+
# Additional safety check for each item
|
|
90
|
+
if isinstance(tc_dict, Task):
|
|
91
|
+
raise ValueError(
|
|
92
|
+
f"Item {i} is a Task object, not a dictionary. "
|
|
93
|
+
"This would expose resolved environment variables. "
|
|
94
|
+
"Please convert to dictionary format with template strings preserved."
|
|
95
|
+
)
|
|
96
|
+
|
|
97
|
+
row = {}
|
|
98
|
+
|
|
99
|
+
# Determine which fields to process
|
|
100
|
+
fields_to_process = fields if fields is not None else list(tc_dict.keys())
|
|
101
|
+
|
|
102
|
+
for field in fields_to_process:
|
|
103
|
+
if field in tc_dict:
|
|
104
|
+
value = tc_dict[field]
|
|
105
|
+
# Serialize complex types as JSON strings
|
|
106
|
+
if isinstance(value, (dict | list)):
|
|
107
|
+
row[field] = json.dumps(value)
|
|
108
|
+
elif isinstance(value, (str | int | float | bool | type(None))):
|
|
109
|
+
row[field] = value if value is not None else ""
|
|
110
|
+
else:
|
|
111
|
+
# For other types, convert to string
|
|
112
|
+
row[field] = str(value)
|
|
113
|
+
|
|
114
|
+
data.append(row)
|
|
115
|
+
|
|
116
|
+
# Create and push dataset
|
|
117
|
+
dataset = Dataset.from_list(data)
|
|
118
|
+
dataset.push_to_hub(repo_id, **kwargs)
|
hud/otel/instrumentation.py
CHANGED
|
@@ -32,8 +32,9 @@ def install_mcp_instrumentation(provider: TracerProvider) -> None:
|
|
|
32
32
|
try:
|
|
33
33
|
# First, patch the _instruments to use our fork
|
|
34
34
|
import opentelemetry.instrumentation.mcp.instrumentation as mcp_inst
|
|
35
|
+
|
|
35
36
|
mcp_inst._instruments = ("hud-mcp-python-sdk >= 3.13.1",)
|
|
36
|
-
|
|
37
|
+
|
|
37
38
|
from opentelemetry.instrumentation.mcp.instrumentation import (
|
|
38
39
|
McpInstrumentor,
|
|
39
40
|
)
|
hud/server/server.py
CHANGED
|
@@ -33,6 +33,8 @@ def _run_with_sigterm(coro_fn: Callable[..., Any], *args: Any, **kwargs: Any) ->
|
|
|
33
33
|
"""Run *coro_fn* via anyio.run() and cancel on SIGTERM or SIGINT (POSIX)."""
|
|
34
34
|
global _sigterm_received
|
|
35
35
|
|
|
36
|
+
sys.stderr.flush()
|
|
37
|
+
|
|
36
38
|
async def _runner() -> None:
|
|
37
39
|
stop_evt: asyncio.Event | None = None
|
|
38
40
|
if sys.platform != "win32" and os.getenv("FASTMCP_DISABLE_SIGTERM_HANDLER") != "1":
|
|
@@ -43,28 +45,46 @@ def _run_with_sigterm(coro_fn: Callable[..., Any], *args: Any, **kwargs: Any) ->
|
|
|
43
45
|
def handle_sigterm() -> None:
|
|
44
46
|
global _sigterm_received
|
|
45
47
|
_sigterm_received = True
|
|
46
|
-
logger.info("Received SIGTERM signal")
|
|
48
|
+
logger.info("Received SIGTERM signal, setting shutdown flag")
|
|
49
|
+
stop_evt.set()
|
|
50
|
+
|
|
51
|
+
# Handle SIGINT for hot-reload
|
|
52
|
+
def handle_sigint() -> None:
|
|
53
|
+
logger.info("Received SIGINT signal, triggering hot reload...")
|
|
54
|
+
# Don't set _sigterm_received for SIGINT
|
|
47
55
|
stop_evt.set()
|
|
48
56
|
|
|
49
57
|
# Handle both SIGTERM and SIGINT for graceful shutdown
|
|
50
|
-
|
|
58
|
+
# In Docker containers, we always want to register our handlers
|
|
59
|
+
try:
|
|
51
60
|
loop.add_signal_handler(signal.SIGTERM, handle_sigterm)
|
|
52
|
-
|
|
53
|
-
|
|
54
|
-
|
|
55
|
-
|
|
56
|
-
|
|
57
|
-
|
|
58
|
-
|
|
59
|
-
|
|
60
|
-
|
|
61
|
-
|
|
62
|
-
|
|
63
|
-
|
|
64
|
-
|
|
65
|
-
|
|
66
|
-
|
|
67
|
-
|
|
61
|
+
logger.info("SIGTERM handler registered")
|
|
62
|
+
except (ValueError, OSError) as e:
|
|
63
|
+
logger.warning("Could not register SIGTERM handler: %s", e)
|
|
64
|
+
|
|
65
|
+
try:
|
|
66
|
+
loop.add_signal_handler(signal.SIGINT, handle_sigint)
|
|
67
|
+
logger.info("SIGINT handler registered")
|
|
68
|
+
except (ValueError, OSError) as e:
|
|
69
|
+
logger.warning("Could not register SIGINT handler: %s", e)
|
|
70
|
+
|
|
71
|
+
try:
|
|
72
|
+
async with anyio.create_task_group() as tg:
|
|
73
|
+
tg.start_soon(coro_fn, *args, **kwargs)
|
|
74
|
+
|
|
75
|
+
if stop_evt is not None:
|
|
76
|
+
|
|
77
|
+
async def _watch() -> None:
|
|
78
|
+
logger.info("Signal handler ready, waiting for SIGTERM or SIGINT")
|
|
79
|
+
if stop_evt is not None:
|
|
80
|
+
await stop_evt.wait()
|
|
81
|
+
logger.info("Shutdown signal received, initiating graceful shutdown...")
|
|
82
|
+
tg.cancel_scope.cancel()
|
|
83
|
+
|
|
84
|
+
tg.start_soon(_watch)
|
|
85
|
+
except* asyncio.CancelledError:
|
|
86
|
+
# This ensures the task group cleans up properly
|
|
87
|
+
logger.info("Task group cancelled, cleaning up...")
|
|
68
88
|
|
|
69
89
|
anyio.run(_runner)
|
|
70
90
|
|
|
@@ -101,12 +121,29 @@ class MCPServer(FastMCP):
|
|
|
101
121
|
yield {}
|
|
102
122
|
finally:
|
|
103
123
|
# Only call shutdown handler if SIGTERM was received
|
|
124
|
+
logger.info("Lifespan `finally` block reached. Checking for SIGTERM.")
|
|
125
|
+
# Force flush logs to ensure they're visible
|
|
126
|
+
sys.stderr.flush()
|
|
127
|
+
|
|
104
128
|
if self._shutdown_fn is not None and _sigterm_received:
|
|
105
|
-
logger.info("SIGTERM
|
|
106
|
-
|
|
129
|
+
logger.info("SIGTERM detected! Calling @mcp.shutdown handler...")
|
|
130
|
+
sys.stderr.flush()
|
|
131
|
+
try:
|
|
132
|
+
await self._shutdown_fn()
|
|
133
|
+
logger.info("@mcp.shutdown handler completed successfully.")
|
|
134
|
+
sys.stderr.flush()
|
|
135
|
+
except Exception as e:
|
|
136
|
+
logger.error("Error during @mcp.shutdown: %s", e)
|
|
137
|
+
sys.stderr.flush()
|
|
107
138
|
_sigterm_received = False
|
|
108
139
|
elif self._shutdown_fn is not None:
|
|
109
|
-
logger.
|
|
140
|
+
logger.info(
|
|
141
|
+
"No SIGTERM. This is a hot reload (SIGINT) or normal exit. Skipping @mcp.shutdown handler." # noqa: E501
|
|
142
|
+
)
|
|
143
|
+
sys.stderr.flush()
|
|
144
|
+
else:
|
|
145
|
+
logger.info("No shutdown handler registered.")
|
|
146
|
+
sys.stderr.flush()
|
|
110
147
|
|
|
111
148
|
fastmcp_kwargs["lifespan"] = _lifespan
|
|
112
149
|
|
hud/settings.py
CHANGED
|
@@ -44,6 +44,18 @@ class Settings(BaseSettings):
|
|
|
44
44
|
validation_alias="OPENAI_API_KEY",
|
|
45
45
|
)
|
|
46
46
|
|
|
47
|
+
wandb_api_key: str | None = Field(
|
|
48
|
+
default=None,
|
|
49
|
+
description="API key for Weights & Biases",
|
|
50
|
+
validation_alias="WANDB_API_KEY",
|
|
51
|
+
)
|
|
52
|
+
|
|
53
|
+
prime_api_key: str | None = Field(
|
|
54
|
+
default=None,
|
|
55
|
+
description="API key for Prime Intellect",
|
|
56
|
+
validation_alias="PRIME_API_KEY",
|
|
57
|
+
)
|
|
58
|
+
|
|
47
59
|
telemetry_enabled: bool = Field(
|
|
48
60
|
default=True,
|
|
49
61
|
description="Enable telemetry for the HUD SDK",
|
hud/types.py
CHANGED
|
@@ -1,8 +1,10 @@
|
|
|
1
1
|
from __future__ import annotations
|
|
2
2
|
|
|
3
|
+
import json
|
|
3
4
|
import uuid
|
|
4
5
|
from typing import Any, Literal
|
|
5
6
|
|
|
7
|
+
import mcp.types as types
|
|
6
8
|
from mcp.types import CallToolRequestParams, CallToolResult
|
|
7
9
|
from pydantic import BaseModel, ConfigDict, Field
|
|
8
10
|
|
|
@@ -13,22 +15,41 @@ class MCPToolCall(CallToolRequestParams):
|
|
|
13
15
|
id: str = Field(default_factory=lambda: str(uuid.uuid4())) # Unique identifier for reference
|
|
14
16
|
|
|
15
17
|
def __str__(self) -> str:
|
|
16
|
-
|
|
17
|
-
|
|
18
|
-
|
|
19
|
-
return
|
|
18
|
+
"""Format tool call with Rich markup for HUD design."""
|
|
19
|
+
from hud.utils.design import design
|
|
20
|
+
|
|
21
|
+
return design.format_tool_call(self.name, self.arguments)
|
|
20
22
|
|
|
21
23
|
|
|
22
24
|
class MCPToolResult(CallToolResult):
|
|
23
25
|
"""A tool result."""
|
|
24
26
|
|
|
25
27
|
def __str__(self) -> str:
|
|
26
|
-
|
|
27
|
-
|
|
28
|
-
|
|
29
|
-
|
|
30
|
-
|
|
31
|
-
|
|
28
|
+
"""Format tool result with Rich markup for HUD design - compact version."""
|
|
29
|
+
from hud.utils.design import design
|
|
30
|
+
|
|
31
|
+
# Extract content summary
|
|
32
|
+
content_summary = ""
|
|
33
|
+
if self.content:
|
|
34
|
+
for block in self.content:
|
|
35
|
+
if isinstance(block, types.TextContent):
|
|
36
|
+
# Get first line or truncate
|
|
37
|
+
text = block.text.strip()
|
|
38
|
+
first_line = text.split("\n")[0] if "\n" in text else text
|
|
39
|
+
content_summary = first_line
|
|
40
|
+
break
|
|
41
|
+
elif isinstance(block, types.ImageContent):
|
|
42
|
+
content_summary = "📷 Image"
|
|
43
|
+
break
|
|
44
|
+
|
|
45
|
+
# Or use structured content if no text content
|
|
46
|
+
if not content_summary and self.structuredContent:
|
|
47
|
+
try:
|
|
48
|
+
content_summary = json.dumps(self.structuredContent, separators=(",", ":"))
|
|
49
|
+
except (TypeError, ValueError):
|
|
50
|
+
content_summary = str(self.structuredContent)
|
|
51
|
+
|
|
52
|
+
return design.format_tool_result(content_summary, self.isError)
|
|
32
53
|
|
|
33
54
|
|
|
34
55
|
class AgentResponse(BaseModel):
|