hud-python 0.4.45__py3-none-any.whl → 0.5.1__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- hud/__init__.py +27 -7
- hud/agents/__init__.py +11 -5
- hud/agents/base.py +220 -500
- hud/agents/claude.py +200 -240
- hud/agents/gemini.py +275 -0
- hud/agents/gemini_cua.py +335 -0
- hud/agents/grounded_openai.py +98 -100
- hud/agents/misc/integration_test_agent.py +51 -20
- hud/agents/misc/response_agent.py +41 -36
- hud/agents/openai.py +291 -292
- hud/agents/{openai_chat_generic.py → openai_chat.py} +80 -34
- hud/agents/operator.py +211 -0
- hud/agents/tests/conftest.py +133 -0
- hud/agents/tests/test_base.py +300 -622
- hud/agents/tests/test_base_runtime.py +233 -0
- hud/agents/tests/test_claude.py +379 -210
- hud/agents/tests/test_client.py +9 -10
- hud/agents/tests/test_gemini.py +369 -0
- hud/agents/tests/test_grounded_openai_agent.py +65 -50
- hud/agents/tests/test_openai.py +376 -140
- hud/agents/tests/test_operator.py +362 -0
- hud/agents/tests/test_run_eval.py +179 -0
- hud/cli/__init__.py +461 -545
- hud/cli/analyze.py +43 -5
- hud/cli/build.py +664 -110
- hud/cli/debug.py +8 -5
- hud/cli/dev.py +882 -734
- hud/cli/eval.py +782 -668
- hud/cli/flows/dev.py +167 -0
- hud/cli/flows/init.py +191 -0
- hud/cli/flows/tasks.py +153 -56
- hud/cli/flows/templates.py +151 -0
- hud/cli/flows/tests/__init__.py +1 -0
- hud/cli/flows/tests/test_dev.py +126 -0
- hud/cli/init.py +60 -58
- hud/cli/push.py +29 -11
- hud/cli/rft.py +311 -0
- hud/cli/rft_status.py +145 -0
- hud/cli/tests/test_analyze.py +5 -5
- hud/cli/tests/test_analyze_metadata.py +3 -2
- hud/cli/tests/test_analyze_module.py +120 -0
- hud/cli/tests/test_build.py +108 -6
- hud/cli/tests/test_build_failure.py +41 -0
- hud/cli/tests/test_build_module.py +50 -0
- hud/cli/tests/test_cli_init.py +6 -1
- hud/cli/tests/test_cli_more_wrappers.py +30 -0
- hud/cli/tests/test_cli_root.py +140 -0
- hud/cli/tests/test_convert.py +361 -0
- hud/cli/tests/test_debug.py +12 -10
- hud/cli/tests/test_dev.py +197 -0
- hud/cli/tests/test_eval.py +251 -0
- hud/cli/tests/test_eval_bedrock.py +51 -0
- hud/cli/tests/test_init.py +124 -0
- hud/cli/tests/test_main_module.py +11 -5
- hud/cli/tests/test_mcp_server.py +12 -100
- hud/cli/tests/test_push_happy.py +74 -0
- hud/cli/tests/test_push_wrapper.py +23 -0
- hud/cli/tests/test_registry.py +1 -1
- hud/cli/tests/test_utils.py +1 -1
- hud/cli/{rl → utils}/celebrate.py +14 -12
- hud/cli/utils/config.py +18 -1
- hud/cli/utils/docker.py +130 -4
- hud/cli/utils/env_check.py +9 -9
- hud/cli/utils/git.py +136 -0
- hud/cli/utils/interactive.py +39 -5
- hud/cli/utils/metadata.py +69 -0
- hud/cli/utils/runner.py +1 -1
- hud/cli/utils/server.py +2 -2
- hud/cli/utils/source_hash.py +3 -3
- hud/cli/utils/tasks.py +4 -1
- hud/cli/utils/tests/__init__.py +0 -0
- hud/cli/utils/tests/test_config.py +58 -0
- hud/cli/utils/tests/test_docker.py +93 -0
- hud/cli/utils/tests/test_docker_hints.py +71 -0
- hud/cli/utils/tests/test_env_check.py +74 -0
- hud/cli/utils/tests/test_environment.py +42 -0
- hud/cli/utils/tests/test_git.py +142 -0
- hud/cli/utils/tests/test_interactive_module.py +60 -0
- hud/cli/utils/tests/test_local_runner.py +50 -0
- hud/cli/utils/tests/test_logging_utils.py +23 -0
- hud/cli/utils/tests/test_metadata.py +49 -0
- hud/cli/utils/tests/test_package_runner.py +35 -0
- hud/cli/utils/tests/test_registry_utils.py +49 -0
- hud/cli/utils/tests/test_remote_runner.py +25 -0
- hud/cli/utils/tests/test_runner_modules.py +52 -0
- hud/cli/utils/tests/test_source_hash.py +36 -0
- hud/cli/utils/tests/test_tasks.py +80 -0
- hud/cli/utils/version_check.py +258 -0
- hud/cli/{rl → utils}/viewer.py +2 -2
- hud/clients/README.md +12 -11
- hud/clients/__init__.py +4 -3
- hud/clients/base.py +166 -26
- hud/clients/environment.py +51 -0
- hud/clients/fastmcp.py +13 -6
- hud/clients/mcp_use.py +40 -15
- hud/clients/tests/test_analyze_scenarios.py +206 -0
- hud/clients/tests/test_protocol.py +9 -3
- hud/datasets/__init__.py +23 -20
- hud/datasets/loader.py +327 -0
- hud/datasets/runner.py +192 -105
- hud/datasets/tests/__init__.py +0 -0
- hud/datasets/tests/test_loader.py +221 -0
- hud/datasets/tests/test_utils.py +315 -0
- hud/datasets/utils.py +270 -90
- hud/environment/__init__.py +50 -0
- hud/environment/connection.py +206 -0
- hud/environment/connectors/__init__.py +33 -0
- hud/environment/connectors/base.py +68 -0
- hud/environment/connectors/local.py +177 -0
- hud/environment/connectors/mcp_config.py +109 -0
- hud/environment/connectors/openai.py +101 -0
- hud/environment/connectors/remote.py +172 -0
- hud/environment/environment.py +694 -0
- hud/environment/integrations/__init__.py +45 -0
- hud/environment/integrations/adk.py +67 -0
- hud/environment/integrations/anthropic.py +196 -0
- hud/environment/integrations/gemini.py +92 -0
- hud/environment/integrations/langchain.py +82 -0
- hud/environment/integrations/llamaindex.py +68 -0
- hud/environment/integrations/openai.py +238 -0
- hud/environment/mock.py +306 -0
- hud/environment/router.py +112 -0
- hud/environment/scenarios.py +493 -0
- hud/environment/tests/__init__.py +1 -0
- hud/environment/tests/test_connection.py +317 -0
- hud/environment/tests/test_connectors.py +218 -0
- hud/environment/tests/test_environment.py +161 -0
- hud/environment/tests/test_integrations.py +257 -0
- hud/environment/tests/test_local_connectors.py +201 -0
- hud/environment/tests/test_scenarios.py +280 -0
- hud/environment/tests/test_tools.py +208 -0
- hud/environment/types.py +23 -0
- hud/environment/utils/__init__.py +35 -0
- hud/environment/utils/formats.py +215 -0
- hud/environment/utils/schema.py +171 -0
- hud/environment/utils/tool_wrappers.py +113 -0
- hud/eval/__init__.py +67 -0
- hud/eval/context.py +674 -0
- hud/eval/display.py +299 -0
- hud/eval/instrument.py +185 -0
- hud/eval/manager.py +466 -0
- hud/eval/parallel.py +268 -0
- hud/eval/task.py +340 -0
- hud/eval/tests/__init__.py +1 -0
- hud/eval/tests/test_context.py +178 -0
- hud/eval/tests/test_eval.py +210 -0
- hud/eval/tests/test_manager.py +152 -0
- hud/eval/tests/test_parallel.py +168 -0
- hud/eval/tests/test_task.py +145 -0
- hud/eval/types.py +63 -0
- hud/eval/utils.py +183 -0
- hud/patches/__init__.py +19 -0
- hud/patches/mcp_patches.py +151 -0
- hud/patches/warnings.py +54 -0
- hud/samples/browser.py +4 -4
- hud/server/__init__.py +2 -1
- hud/server/low_level.py +2 -1
- hud/server/router.py +164 -0
- hud/server/server.py +567 -80
- hud/server/tests/test_mcp_server_integration.py +11 -11
- hud/server/tests/test_mcp_server_more.py +1 -1
- hud/server/tests/test_server_extra.py +2 -0
- hud/settings.py +45 -3
- hud/shared/exceptions.py +36 -10
- hud/shared/hints.py +26 -1
- hud/shared/requests.py +15 -3
- hud/shared/tests/test_exceptions.py +40 -31
- hud/shared/tests/test_hints.py +167 -0
- hud/telemetry/__init__.py +20 -19
- hud/telemetry/exporter.py +201 -0
- hud/telemetry/instrument.py +158 -253
- hud/telemetry/tests/test_eval_telemetry.py +356 -0
- hud/telemetry/tests/test_exporter.py +258 -0
- hud/telemetry/tests/test_instrument.py +401 -0
- hud/tools/__init__.py +16 -2
- hud/tools/apply_patch.py +639 -0
- hud/tools/base.py +54 -4
- hud/tools/bash.py +2 -2
- hud/tools/computer/__init__.py +4 -0
- hud/tools/computer/anthropic.py +2 -2
- hud/tools/computer/gemini.py +385 -0
- hud/tools/computer/hud.py +23 -6
- hud/tools/computer/openai.py +20 -21
- hud/tools/computer/qwen.py +434 -0
- hud/tools/computer/settings.py +37 -0
- hud/tools/edit.py +3 -7
- hud/tools/executors/base.py +4 -2
- hud/tools/executors/pyautogui.py +1 -1
- hud/tools/grounding/grounded_tool.py +13 -18
- hud/tools/grounding/grounder.py +10 -31
- hud/tools/grounding/tests/test_grounded_tool.py +26 -44
- hud/tools/jupyter.py +330 -0
- hud/tools/playwright.py +18 -3
- hud/tools/shell.py +308 -0
- hud/tools/tests/test_apply_patch.py +718 -0
- hud/tools/tests/test_computer.py +4 -9
- hud/tools/tests/test_computer_actions.py +24 -2
- hud/tools/tests/test_jupyter_tool.py +181 -0
- hud/tools/tests/test_shell.py +596 -0
- hud/tools/tests/test_submit.py +85 -0
- hud/tools/tests/test_types.py +193 -0
- hud/tools/types.py +21 -1
- hud/types.py +167 -57
- hud/utils/__init__.py +2 -0
- hud/utils/env.py +67 -0
- hud/utils/hud_console.py +61 -3
- hud/utils/mcp.py +15 -58
- hud/utils/strict_schema.py +162 -0
- hud/utils/tests/test_init.py +1 -2
- hud/utils/tests/test_mcp.py +1 -28
- hud/utils/tests/test_pretty_errors.py +186 -0
- hud/utils/tests/test_tool_shorthand.py +154 -0
- hud/utils/tests/test_version.py +1 -1
- hud/utils/types.py +20 -0
- hud/version.py +1 -1
- hud_python-0.5.1.dist-info/METADATA +264 -0
- hud_python-0.5.1.dist-info/RECORD +299 -0
- {hud_python-0.4.45.dist-info → hud_python-0.5.1.dist-info}/WHEEL +1 -1
- hud/agents/langchain.py +0 -261
- hud/agents/lite_llm.py +0 -72
- hud/cli/rl/__init__.py +0 -180
- hud/cli/rl/config.py +0 -101
- hud/cli/rl/display.py +0 -133
- hud/cli/rl/gpu.py +0 -63
- hud/cli/rl/gpu_utils.py +0 -321
- hud/cli/rl/local_runner.py +0 -595
- hud/cli/rl/presets.py +0 -96
- hud/cli/rl/remote_runner.py +0 -463
- hud/cli/rl/rl_api.py +0 -150
- hud/cli/rl/vllm.py +0 -177
- hud/cli/rl/wait_utils.py +0 -89
- hud/datasets/parallel.py +0 -687
- hud/misc/__init__.py +0 -1
- hud/misc/claude_plays_pokemon.py +0 -292
- hud/otel/__init__.py +0 -35
- hud/otel/collector.py +0 -142
- hud/otel/config.py +0 -181
- hud/otel/context.py +0 -570
- hud/otel/exporters.py +0 -369
- hud/otel/instrumentation.py +0 -135
- hud/otel/processors.py +0 -121
- hud/otel/tests/__init__.py +0 -1
- hud/otel/tests/test_processors.py +0 -197
- hud/rl/README.md +0 -30
- hud/rl/__init__.py +0 -1
- hud/rl/actor.py +0 -176
- hud/rl/buffer.py +0 -405
- hud/rl/chat_template.jinja +0 -101
- hud/rl/config.py +0 -192
- hud/rl/distributed.py +0 -132
- hud/rl/learner.py +0 -637
- hud/rl/tests/__init__.py +0 -1
- hud/rl/tests/test_learner.py +0 -186
- hud/rl/train.py +0 -382
- hud/rl/types.py +0 -101
- hud/rl/utils/start_vllm_server.sh +0 -30
- hud/rl/utils.py +0 -524
- hud/rl/vllm_adapter.py +0 -143
- hud/telemetry/job.py +0 -352
- hud/telemetry/replay.py +0 -74
- hud/telemetry/tests/test_replay.py +0 -40
- hud/telemetry/tests/test_trace.py +0 -63
- hud/telemetry/trace.py +0 -158
- hud/utils/agent_factories.py +0 -86
- hud/utils/async_utils.py +0 -65
- hud/utils/group_eval.py +0 -223
- hud/utils/progress.py +0 -149
- hud/utils/tasks.py +0 -127
- hud/utils/tests/test_async_utils.py +0 -173
- hud/utils/tests/test_progress.py +0 -261
- hud_python-0.4.45.dist-info/METADATA +0 -552
- hud_python-0.4.45.dist-info/RECORD +0 -228
- {hud_python-0.4.45.dist-info → hud_python-0.5.1.dist-info}/entry_points.txt +0 -0
- {hud_python-0.4.45.dist-info → hud_python-0.5.1.dist-info}/licenses/LICENSE +0 -0
hud/utils/hud_console.py
CHANGED
|
@@ -20,8 +20,6 @@ import time
|
|
|
20
20
|
import traceback
|
|
21
21
|
from typing import TYPE_CHECKING, Any, Literal, Self
|
|
22
22
|
|
|
23
|
-
import questionary
|
|
24
|
-
import typer
|
|
25
23
|
from rich.console import Console
|
|
26
24
|
from rich.panel import Panel
|
|
27
25
|
from rich.table import Table
|
|
@@ -38,9 +36,26 @@ TEXT = "bright_white" # Off-white that's readable on dark, not too bright on li
|
|
|
38
36
|
SECONDARY = "rgb(108,113,196)" # Muted blue-purple for secondary text
|
|
39
37
|
|
|
40
38
|
|
|
39
|
+
# HUD Symbol System - Minimal 3-category system with default colors
|
|
40
|
+
class Symbols:
|
|
41
|
+
"""Unicode symbols for consistent CLI output with default colors."""
|
|
42
|
+
|
|
43
|
+
# Info/Items - Use for all informational lines (gold)
|
|
44
|
+
ITEM = f"[{GOLD}]•[/{GOLD}]"
|
|
45
|
+
|
|
46
|
+
# Status - Use for state/completion (green)
|
|
47
|
+
SUCCESS = f"[{GREEN}]●[/{GREEN}]"
|
|
48
|
+
|
|
49
|
+
# Flow/Special - Use for transitions and important notes (gold)
|
|
50
|
+
FLOW = f"[{GOLD}]⟿[/{GOLD}]"
|
|
51
|
+
|
|
52
|
+
|
|
41
53
|
class HUDConsole:
|
|
42
54
|
"""Design system for HUD CLI output."""
|
|
43
55
|
|
|
56
|
+
# Make symbols easily accessible
|
|
57
|
+
sym = Symbols
|
|
58
|
+
|
|
44
59
|
def __init__(self, logger: logging.Logger | None = None) -> None:
|
|
45
60
|
"""Initialize the design system.
|
|
46
61
|
|
|
@@ -470,6 +485,9 @@ class HUDConsole:
|
|
|
470
485
|
Returns:
|
|
471
486
|
The selected choice value
|
|
472
487
|
"""
|
|
488
|
+
import questionary
|
|
489
|
+
from questionary import Style
|
|
490
|
+
|
|
473
491
|
# Convert choices to questionary format
|
|
474
492
|
q_choices = []
|
|
475
493
|
|
|
@@ -481,15 +499,27 @@ class HUDConsole:
|
|
|
481
499
|
else:
|
|
482
500
|
q_choices.append(choice)
|
|
483
501
|
|
|
502
|
+
# Custom style for better visibility of selection
|
|
503
|
+
custom_style = Style(
|
|
504
|
+
[
|
|
505
|
+
("qmark", "fg:cyan bold"),
|
|
506
|
+
("question", "bold"),
|
|
507
|
+
("pointer", "fg:cyan bold"),
|
|
508
|
+
("highlighted", "fg:cyan bold"),
|
|
509
|
+
]
|
|
510
|
+
)
|
|
511
|
+
|
|
484
512
|
result = questionary.select(
|
|
485
513
|
message,
|
|
486
514
|
choices=q_choices,
|
|
487
|
-
default=q_choices[default] if default is not None else None,
|
|
488
515
|
instruction="(Use ↑/↓ arrows, Enter to select)",
|
|
516
|
+
style=custom_style,
|
|
489
517
|
).ask()
|
|
490
518
|
|
|
491
519
|
# If no selection made (Ctrl+C or ESC), exit
|
|
492
520
|
if result is None:
|
|
521
|
+
import typer
|
|
522
|
+
|
|
493
523
|
raise typer.Exit(1)
|
|
494
524
|
|
|
495
525
|
return result
|
|
@@ -545,8 +575,36 @@ class HUDConsole:
|
|
|
545
575
|
message: The confirmation message
|
|
546
576
|
default: If True, the default choice is True
|
|
547
577
|
"""
|
|
578
|
+
import questionary
|
|
579
|
+
|
|
548
580
|
return questionary.confirm(message, default=default).ask()
|
|
549
581
|
|
|
582
|
+
# Symbol-based output methods
|
|
583
|
+
def symbol(self, symbol: str, message: str, color: str = GOLD, stderr: bool = True) -> None:
|
|
584
|
+
"""Print a message with a colored symbol prefix.
|
|
585
|
+
|
|
586
|
+
Args:
|
|
587
|
+
symbol: Symbol to use (use Symbols.* constants)
|
|
588
|
+
message: Message text
|
|
589
|
+
color: Color for the symbol (default: gold)
|
|
590
|
+
stderr: If True, output to stderr
|
|
591
|
+
"""
|
|
592
|
+
console = self._stderr_console if stderr else self._stdout_console
|
|
593
|
+
console.print(f"[{color}]{symbol}[/{color}] {message}")
|
|
594
|
+
|
|
595
|
+
def detail(self, message: str, stderr: bool = True) -> None:
|
|
596
|
+
"""Print an indented detail line with gold pointer symbol."""
|
|
597
|
+
console = self._stderr_console if stderr else self._stdout_console
|
|
598
|
+
console.print(f" [{GOLD}]{Symbols.ITEM}[/{GOLD}] {message}")
|
|
599
|
+
|
|
600
|
+
def flow(self, message: str, stderr: bool = True) -> None:
|
|
601
|
+
"""Print a flow/transition message with wave symbol."""
|
|
602
|
+
self.symbol(Symbols.FLOW, message, GOLD, stderr)
|
|
603
|
+
|
|
604
|
+
def note(self, message: str, stderr: bool = True) -> None:
|
|
605
|
+
"""Print an important note with asterism symbol."""
|
|
606
|
+
self.symbol(Symbols.ITEM, message, GOLD, stderr)
|
|
607
|
+
|
|
550
608
|
|
|
551
609
|
# Global design instance for convenience
|
|
552
610
|
class _ProgressContext:
|
hud/utils/mcp.py
CHANGED
|
@@ -5,8 +5,6 @@ from typing import Any
|
|
|
5
5
|
|
|
6
6
|
from pydantic import BaseModel, Field
|
|
7
7
|
|
|
8
|
-
from hud.settings import settings
|
|
9
|
-
|
|
10
8
|
logger = logging.getLogger(__name__)
|
|
11
9
|
|
|
12
10
|
|
|
@@ -17,15 +15,27 @@ class MCPConfigPatch(BaseModel):
|
|
|
17
15
|
meta: dict[str, Any] | None = Field(default_factory=dict, alias="meta")
|
|
18
16
|
|
|
19
17
|
|
|
18
|
+
def _is_hud_server(url: str) -> bool:
|
|
19
|
+
"""Check if a URL is a HUD MCP server.
|
|
20
|
+
|
|
21
|
+
Matches:
|
|
22
|
+
- Any mcp.hud.* domain (including .ai, .so, and future domains)
|
|
23
|
+
- Staging servers (orcstaging.hud.so)
|
|
24
|
+
- Any *.hud.ai or *.hud.so domain
|
|
25
|
+
"""
|
|
26
|
+
if not url:
|
|
27
|
+
return False
|
|
28
|
+
url_lower = url.lower()
|
|
29
|
+
return "mcp.hud." in url_lower or ".hud.ai" in url_lower or ".hud.so" in url_lower
|
|
30
|
+
|
|
31
|
+
|
|
20
32
|
def patch_mcp_config(mcp_config: dict[str, dict[str, Any]], patch: MCPConfigPatch) -> None:
|
|
21
33
|
"""Patch MCP config with additional values."""
|
|
22
|
-
hud_mcp_url = settings.hud_mcp_url
|
|
23
|
-
|
|
24
34
|
for server_cfg in mcp_config.values():
|
|
25
35
|
url = server_cfg.get("url", "")
|
|
26
36
|
|
|
27
37
|
# 1) HTTP header lane (only for hud MCP servers)
|
|
28
|
-
if
|
|
38
|
+
if _is_hud_server(url) and patch.headers:
|
|
29
39
|
for key, value in patch.headers.items():
|
|
30
40
|
headers = server_cfg.setdefault("headers", {})
|
|
31
41
|
headers.setdefault(key, value)
|
|
@@ -35,56 +45,3 @@ def patch_mcp_config(mcp_config: dict[str, dict[str, Any]], patch: MCPConfigPatc
|
|
|
35
45
|
for key, value in patch.meta.items():
|
|
36
46
|
meta = server_cfg.setdefault("meta", {})
|
|
37
47
|
meta.setdefault(key, value)
|
|
38
|
-
|
|
39
|
-
|
|
40
|
-
def setup_hud_telemetry(
|
|
41
|
-
mcp_config: dict[str, dict[str, Any]], auto_trace: bool = True
|
|
42
|
-
) -> Any | None:
|
|
43
|
-
"""Setup telemetry for hud servers.
|
|
44
|
-
|
|
45
|
-
Returns:
|
|
46
|
-
The auto-created trace context manager if one was created, None otherwise.
|
|
47
|
-
Caller is responsible for exiting the context manager.
|
|
48
|
-
"""
|
|
49
|
-
if not mcp_config:
|
|
50
|
-
raise ValueError("Please run initialize() before setting up client-side telemetry")
|
|
51
|
-
|
|
52
|
-
# Check if there are any HUD servers to setup telemetry for
|
|
53
|
-
hud_mcp_url = settings.hud_mcp_url
|
|
54
|
-
has_hud_servers = any(
|
|
55
|
-
hud_mcp_url in server_cfg.get("url", "") for server_cfg in mcp_config.values()
|
|
56
|
-
)
|
|
57
|
-
|
|
58
|
-
# If no HUD servers, no need for telemetry setup
|
|
59
|
-
if not has_hud_servers:
|
|
60
|
-
return None
|
|
61
|
-
|
|
62
|
-
from hud.otel import get_current_task_run_id
|
|
63
|
-
from hud.telemetry import trace
|
|
64
|
-
|
|
65
|
-
run_id = get_current_task_run_id()
|
|
66
|
-
auto_trace_cm = None
|
|
67
|
-
|
|
68
|
-
if not run_id and auto_trace:
|
|
69
|
-
# Start an auto trace and capture its ID for headers/metadata
|
|
70
|
-
auto_trace_cm = trace("My Trace")
|
|
71
|
-
_trace_obj = auto_trace_cm.__enter__()
|
|
72
|
-
try:
|
|
73
|
-
run_id = getattr(_trace_obj, "id", None) or str(_trace_obj)
|
|
74
|
-
except Exception: # pragma: no cover - fallback shouldn't fail lint
|
|
75
|
-
run_id = None
|
|
76
|
-
|
|
77
|
-
# Patch HUD servers with run-id (works whether auto or user trace)
|
|
78
|
-
if run_id:
|
|
79
|
-
patch_mcp_config(
|
|
80
|
-
mcp_config,
|
|
81
|
-
MCPConfigPatch(headers={"Run-Id": run_id}, meta={"run_id": run_id}),
|
|
82
|
-
)
|
|
83
|
-
|
|
84
|
-
if settings.api_key:
|
|
85
|
-
patch_mcp_config(
|
|
86
|
-
mcp_config,
|
|
87
|
-
MCPConfigPatch(headers={"Authorization": f"Bearer {settings.api_key}"}),
|
|
88
|
-
)
|
|
89
|
-
|
|
90
|
-
return auto_trace_cm
|
|
@@ -0,0 +1,162 @@
|
|
|
1
|
+
"""Utilities to convert JSON schemas into OpenAI's strict format."""
|
|
2
|
+
|
|
3
|
+
from __future__ import annotations
|
|
4
|
+
|
|
5
|
+
from typing import Any, TypeGuard
|
|
6
|
+
|
|
7
|
+
_EMPTY_SCHEMA = {
|
|
8
|
+
"additionalProperties": False,
|
|
9
|
+
"type": "object",
|
|
10
|
+
"properties": {},
|
|
11
|
+
"required": [],
|
|
12
|
+
}
|
|
13
|
+
|
|
14
|
+
|
|
15
|
+
def ensure_strict_json_schema(schema: dict[str, Any]) -> dict[str, Any]:
|
|
16
|
+
"""Ensure a JSON schema conforms to OpenAI's strict requirements.
|
|
17
|
+
|
|
18
|
+
This mutates the provided schema in-place and returns it for convenience.
|
|
19
|
+
"""
|
|
20
|
+
if schema == {}:
|
|
21
|
+
return _EMPTY_SCHEMA.copy()
|
|
22
|
+
return _ensure_strict_json_schema(schema, path=(), root=schema)
|
|
23
|
+
|
|
24
|
+
|
|
25
|
+
def _ensure_strict_json_schema(
|
|
26
|
+
json_schema: object,
|
|
27
|
+
*,
|
|
28
|
+
path: tuple[str, ...],
|
|
29
|
+
root: dict[str, Any],
|
|
30
|
+
) -> dict[str, Any]:
|
|
31
|
+
if not _is_dict(json_schema):
|
|
32
|
+
raise TypeError(f"Expected {json_schema} to be a dictionary; path={path}")
|
|
33
|
+
|
|
34
|
+
defs = json_schema.get("$defs")
|
|
35
|
+
if _is_dict(defs):
|
|
36
|
+
for def_name, def_schema in defs.items():
|
|
37
|
+
_ensure_strict_json_schema(def_schema, path=(*path, "$defs", def_name), root=root)
|
|
38
|
+
|
|
39
|
+
definitions = json_schema.get("definitions")
|
|
40
|
+
if _is_dict(definitions):
|
|
41
|
+
for definition_name, definition_schema in definitions.items():
|
|
42
|
+
_ensure_strict_json_schema(
|
|
43
|
+
definition_schema, path=(*path, "definitions", definition_name), root=root
|
|
44
|
+
)
|
|
45
|
+
|
|
46
|
+
typ = json_schema.get("type")
|
|
47
|
+
if typ == "object":
|
|
48
|
+
if "additionalProperties" not in json_schema or json_schema["additionalProperties"] is True:
|
|
49
|
+
json_schema["additionalProperties"] = False
|
|
50
|
+
elif (
|
|
51
|
+
json_schema["additionalProperties"] and json_schema["additionalProperties"] is not False
|
|
52
|
+
):
|
|
53
|
+
raise ValueError(
|
|
54
|
+
"additionalProperties should not be set for object types in strict mode."
|
|
55
|
+
)
|
|
56
|
+
|
|
57
|
+
properties = json_schema.get("properties")
|
|
58
|
+
if _is_dict(properties):
|
|
59
|
+
json_schema["required"] = list(properties.keys())
|
|
60
|
+
json_schema["properties"] = {
|
|
61
|
+
key: _ensure_strict_json_schema(prop_schema, path=(*path, "properties", key), root=root)
|
|
62
|
+
for key, prop_schema in properties.items()
|
|
63
|
+
}
|
|
64
|
+
|
|
65
|
+
items = json_schema.get("items")
|
|
66
|
+
if _is_dict(items):
|
|
67
|
+
json_schema["items"] = _ensure_strict_json_schema(items, path=(*path, "items"), root=root)
|
|
68
|
+
|
|
69
|
+
prefix_items = json_schema.get("prefixItems")
|
|
70
|
+
if _is_list(prefix_items) and prefix_items:
|
|
71
|
+
item_types = set()
|
|
72
|
+
for item in prefix_items:
|
|
73
|
+
if _is_dict(item) and "type" in item:
|
|
74
|
+
item_types.add(item["type"])
|
|
75
|
+
|
|
76
|
+
if len(item_types) == 1:
|
|
77
|
+
item_type = item_types.pop()
|
|
78
|
+
json_schema["items"] = {"type": item_type}
|
|
79
|
+
else:
|
|
80
|
+
json_schema["items"] = {"type": "integer"}
|
|
81
|
+
|
|
82
|
+
tuple_length = len(prefix_items)
|
|
83
|
+
json_schema["minItems"] = tuple_length
|
|
84
|
+
json_schema["maxItems"] = tuple_length
|
|
85
|
+
json_schema.pop("prefixItems")
|
|
86
|
+
|
|
87
|
+
any_of = json_schema.get("anyOf")
|
|
88
|
+
if _is_list(any_of):
|
|
89
|
+
json_schema["anyOf"] = [
|
|
90
|
+
_ensure_strict_json_schema(variant, path=(*path, "anyOf", str(i)), root=root)
|
|
91
|
+
for i, variant in enumerate(any_of)
|
|
92
|
+
]
|
|
93
|
+
|
|
94
|
+
one_of = json_schema.get("oneOf")
|
|
95
|
+
if _is_list(one_of):
|
|
96
|
+
existing_any_of = json_schema.get("anyOf", [])
|
|
97
|
+
if not _is_list(existing_any_of):
|
|
98
|
+
existing_any_of = []
|
|
99
|
+
json_schema["anyOf"] = existing_any_of + [
|
|
100
|
+
_ensure_strict_json_schema(variant, path=(*path, "oneOf", str(i)), root=root)
|
|
101
|
+
for i, variant in enumerate(one_of)
|
|
102
|
+
]
|
|
103
|
+
json_schema.pop("oneOf")
|
|
104
|
+
|
|
105
|
+
all_of = json_schema.get("allOf")
|
|
106
|
+
if _is_list(all_of):
|
|
107
|
+
if len(all_of) == 1:
|
|
108
|
+
json_schema.update(
|
|
109
|
+
_ensure_strict_json_schema(all_of[0], path=(*path, "allOf", "0"), root=root)
|
|
110
|
+
)
|
|
111
|
+
json_schema.pop("allOf")
|
|
112
|
+
else:
|
|
113
|
+
json_schema["allOf"] = [
|
|
114
|
+
_ensure_strict_json_schema(entry, path=(*path, "allOf", str(i)), root=root)
|
|
115
|
+
for i, entry in enumerate(all_of)
|
|
116
|
+
]
|
|
117
|
+
|
|
118
|
+
if "default" in json_schema:
|
|
119
|
+
json_schema.pop("default")
|
|
120
|
+
|
|
121
|
+
for keyword in ("title", "examples"):
|
|
122
|
+
json_schema.pop(keyword, None)
|
|
123
|
+
|
|
124
|
+
ref = json_schema.get("$ref")
|
|
125
|
+
if ref and _has_more_than_n_keys(json_schema, 1):
|
|
126
|
+
if not isinstance(ref, str):
|
|
127
|
+
raise ValueError(f"Received non-string $ref - {ref}")
|
|
128
|
+
resolved = _resolve_ref(root=root, ref=ref)
|
|
129
|
+
if not _is_dict(resolved):
|
|
130
|
+
raise ValueError(
|
|
131
|
+
f"Expected `$ref: {ref}` to resolve to a dictionary but got {resolved}"
|
|
132
|
+
)
|
|
133
|
+
json_schema.update({**resolved, **json_schema})
|
|
134
|
+
json_schema.pop("$ref")
|
|
135
|
+
return _ensure_strict_json_schema(json_schema, path=path, root=root)
|
|
136
|
+
|
|
137
|
+
return json_schema
|
|
138
|
+
|
|
139
|
+
|
|
140
|
+
def _resolve_ref(*, root: dict[str, Any], ref: str) -> object:
|
|
141
|
+
if not ref.startswith("#/"):
|
|
142
|
+
raise ValueError(f"Unexpected $ref format {ref!r}; does not start with #/")
|
|
143
|
+
|
|
144
|
+
path = ref[2:].split("/")
|
|
145
|
+
resolved: object = root
|
|
146
|
+
for key in path:
|
|
147
|
+
assert _is_dict(resolved), f"Encountered non-dictionary entry while resolving {ref}"
|
|
148
|
+
resolved = resolved[key]
|
|
149
|
+
|
|
150
|
+
return resolved
|
|
151
|
+
|
|
152
|
+
|
|
153
|
+
def _is_dict(obj: object) -> TypeGuard[dict[str, Any]]:
|
|
154
|
+
return isinstance(obj, dict)
|
|
155
|
+
|
|
156
|
+
|
|
157
|
+
def _is_list(obj: object) -> TypeGuard[list[object]]:
|
|
158
|
+
return isinstance(obj, list)
|
|
159
|
+
|
|
160
|
+
|
|
161
|
+
def _has_more_than_n_keys(obj: dict[str, object], n: int) -> bool:
|
|
162
|
+
return any(count > n for count, _ in enumerate(obj, start=1))
|
hud/utils/tests/test_init.py
CHANGED
hud/utils/tests/test_mcp.py
CHANGED
|
@@ -2,9 +2,7 @@
|
|
|
2
2
|
|
|
3
3
|
from __future__ import annotations
|
|
4
4
|
|
|
5
|
-
import
|
|
6
|
-
|
|
7
|
-
from hud.utils.mcp import MCPConfigPatch, patch_mcp_config, setup_hud_telemetry
|
|
5
|
+
from hud.utils.mcp import MCPConfigPatch, patch_mcp_config
|
|
8
6
|
|
|
9
7
|
|
|
10
8
|
class TestPatchMCPConfig:
|
|
@@ -85,28 +83,3 @@ class TestPatchMCPConfig:
|
|
|
85
83
|
# Existing meta should be preserved, new one added
|
|
86
84
|
assert mcp_config["test_server"]["meta"]["existing_key"] == "existing_value"
|
|
87
85
|
assert mcp_config["test_server"]["meta"]["test_key"] == "test_value"
|
|
88
|
-
|
|
89
|
-
|
|
90
|
-
class TestSetupHUDTelemetry:
|
|
91
|
-
"""Tests for setup_hud_telemetry function."""
|
|
92
|
-
|
|
93
|
-
def test_empty_config_raises_error(self):
|
|
94
|
-
"""Test that empty config raises ValueError."""
|
|
95
|
-
with pytest.raises(
|
|
96
|
-
ValueError, match="Please run initialize\\(\\) before setting up client-side telemetry"
|
|
97
|
-
):
|
|
98
|
-
setup_hud_telemetry({})
|
|
99
|
-
|
|
100
|
-
def test_none_config_raises_error(self):
|
|
101
|
-
"""Test that None config raises ValueError."""
|
|
102
|
-
with pytest.raises(
|
|
103
|
-
ValueError, match="Please run initialize\\(\\) before setting up client-side telemetry"
|
|
104
|
-
):
|
|
105
|
-
setup_hud_telemetry(None) # type: ignore[arg-type]
|
|
106
|
-
|
|
107
|
-
def test_valid_config_returns_none_when_no_hud_servers(self):
|
|
108
|
-
"""Test that valid config with no HUD servers returns None."""
|
|
109
|
-
mcp_config = {"test_server": {"url": "http://example.com"}}
|
|
110
|
-
|
|
111
|
-
result = setup_hud_telemetry(mcp_config)
|
|
112
|
-
assert result is None
|
|
@@ -0,0 +1,186 @@
|
|
|
1
|
+
from __future__ import annotations
|
|
2
|
+
|
|
3
|
+
import sys
|
|
4
|
+
from unittest.mock import MagicMock, patch
|
|
5
|
+
|
|
6
|
+
from hud.utils.pretty_errors import (
|
|
7
|
+
_async_exception_handler,
|
|
8
|
+
_render_and_fallback,
|
|
9
|
+
install_pretty_errors,
|
|
10
|
+
)
|
|
11
|
+
|
|
12
|
+
|
|
13
|
+
def test_render_and_fallback_hud_exception():
|
|
14
|
+
"""Test _render_and_fallback with HudException."""
|
|
15
|
+
from hud.shared.exceptions import HudException
|
|
16
|
+
|
|
17
|
+
exc = HudException("Test error")
|
|
18
|
+
|
|
19
|
+
with (
|
|
20
|
+
patch("sys.__excepthook__") as mock_excepthook,
|
|
21
|
+
patch("hud.utils.pretty_errors.hud_console") as mock_console,
|
|
22
|
+
patch("sys.stderr.flush"),
|
|
23
|
+
):
|
|
24
|
+
_render_and_fallback(HudException, exc, None)
|
|
25
|
+
|
|
26
|
+
mock_excepthook.assert_called_once()
|
|
27
|
+
mock_console.render_exception.assert_called_once_with(exc)
|
|
28
|
+
|
|
29
|
+
|
|
30
|
+
def test_render_and_fallback_non_hud_exception():
|
|
31
|
+
"""Test _render_and_fallback with non-HudException."""
|
|
32
|
+
exc = ValueError("Test error")
|
|
33
|
+
|
|
34
|
+
with (
|
|
35
|
+
patch("sys.__excepthook__") as mock_excepthook,
|
|
36
|
+
patch("hud.utils.pretty_errors.hud_console") as mock_console,
|
|
37
|
+
):
|
|
38
|
+
_render_and_fallback(ValueError, exc, None)
|
|
39
|
+
|
|
40
|
+
mock_excepthook.assert_called_once()
|
|
41
|
+
# Should not render for non-HudException
|
|
42
|
+
mock_console.render_exception.assert_not_called()
|
|
43
|
+
|
|
44
|
+
|
|
45
|
+
def test_render_and_fallback_rendering_error():
|
|
46
|
+
"""Test _render_and_fallback handles rendering errors gracefully."""
|
|
47
|
+
from hud.shared.exceptions import HudException
|
|
48
|
+
|
|
49
|
+
exc = HudException("Test error")
|
|
50
|
+
|
|
51
|
+
with (
|
|
52
|
+
patch("sys.__excepthook__") as mock_excepthook,
|
|
53
|
+
patch("hud.utils.pretty_errors.hud_console") as mock_console,
|
|
54
|
+
):
|
|
55
|
+
mock_console.render_exception.side_effect = Exception("Render failed")
|
|
56
|
+
|
|
57
|
+
# Should not raise
|
|
58
|
+
_render_and_fallback(HudException, exc, None)
|
|
59
|
+
|
|
60
|
+
mock_excepthook.assert_called_once()
|
|
61
|
+
|
|
62
|
+
|
|
63
|
+
def test_async_exception_handler_with_exception():
|
|
64
|
+
"""Test _async_exception_handler with exception in context."""
|
|
65
|
+
mock_loop = MagicMock()
|
|
66
|
+
context = {"exception": ValueError("Test error")}
|
|
67
|
+
|
|
68
|
+
with patch("hud.utils.pretty_errors.hud_console") as mock_console:
|
|
69
|
+
_async_exception_handler(mock_loop, context)
|
|
70
|
+
|
|
71
|
+
mock_console.render_exception.assert_called_once()
|
|
72
|
+
mock_loop.default_exception_handler.assert_called_once_with(context)
|
|
73
|
+
|
|
74
|
+
|
|
75
|
+
def test_async_exception_handler_with_message():
|
|
76
|
+
"""Test _async_exception_handler with message only."""
|
|
77
|
+
mock_loop = MagicMock()
|
|
78
|
+
context = {"message": "Error message"}
|
|
79
|
+
|
|
80
|
+
with patch("hud.utils.pretty_errors.hud_console") as mock_console:
|
|
81
|
+
_async_exception_handler(mock_loop, context)
|
|
82
|
+
|
|
83
|
+
mock_console.error.assert_called_once_with("Error message")
|
|
84
|
+
mock_console.render_support_hint.assert_called_once()
|
|
85
|
+
mock_loop.default_exception_handler.assert_called_once()
|
|
86
|
+
|
|
87
|
+
|
|
88
|
+
def test_async_exception_handler_rendering_error():
|
|
89
|
+
"""Test _async_exception_handler handles rendering errors."""
|
|
90
|
+
mock_loop = MagicMock()
|
|
91
|
+
context = {"exception": ValueError("Test")}
|
|
92
|
+
|
|
93
|
+
with patch("hud.utils.pretty_errors.hud_console") as mock_console:
|
|
94
|
+
mock_console.render_exception.side_effect = Exception("Render failed")
|
|
95
|
+
|
|
96
|
+
# Should not raise, should call default handler
|
|
97
|
+
_async_exception_handler(mock_loop, context)
|
|
98
|
+
|
|
99
|
+
mock_loop.default_exception_handler.assert_called_once()
|
|
100
|
+
|
|
101
|
+
|
|
102
|
+
def test_install_pretty_errors_with_running_loop():
|
|
103
|
+
"""Test install_pretty_errors with a running event loop."""
|
|
104
|
+
mock_loop = MagicMock()
|
|
105
|
+
|
|
106
|
+
with patch("asyncio.get_running_loop", return_value=mock_loop):
|
|
107
|
+
install_pretty_errors()
|
|
108
|
+
|
|
109
|
+
assert sys.excepthook == _render_and_fallback
|
|
110
|
+
mock_loop.set_exception_handler.assert_called_once_with(_async_exception_handler)
|
|
111
|
+
|
|
112
|
+
|
|
113
|
+
def test_install_pretty_errors_no_running_loop():
|
|
114
|
+
"""Test install_pretty_errors without a running loop."""
|
|
115
|
+
with (
|
|
116
|
+
patch("asyncio.get_running_loop", side_effect=RuntimeError("No running loop")),
|
|
117
|
+
patch("asyncio.new_event_loop") as mock_new_loop,
|
|
118
|
+
):
|
|
119
|
+
mock_loop = MagicMock()
|
|
120
|
+
mock_new_loop.return_value = mock_loop
|
|
121
|
+
|
|
122
|
+
install_pretty_errors()
|
|
123
|
+
|
|
124
|
+
assert sys.excepthook == _render_and_fallback
|
|
125
|
+
mock_loop.set_exception_handler.assert_called_once()
|
|
126
|
+
|
|
127
|
+
|
|
128
|
+
def test_install_pretty_errors_new_loop_fails():
|
|
129
|
+
"""Test install_pretty_errors when creating new loop fails."""
|
|
130
|
+
with (
|
|
131
|
+
patch("asyncio.get_running_loop", side_effect=RuntimeError("No running loop")),
|
|
132
|
+
patch("asyncio.new_event_loop", side_effect=Exception("Can't create loop")),
|
|
133
|
+
):
|
|
134
|
+
# Should not raise
|
|
135
|
+
install_pretty_errors()
|
|
136
|
+
|
|
137
|
+
assert sys.excepthook == _render_and_fallback
|
|
138
|
+
|
|
139
|
+
|
|
140
|
+
def test_install_pretty_errors_set_handler_fails():
|
|
141
|
+
"""Test install_pretty_errors when set_exception_handler fails."""
|
|
142
|
+
mock_loop = MagicMock()
|
|
143
|
+
mock_loop.set_exception_handler.side_effect = Exception("Can't set handler")
|
|
144
|
+
|
|
145
|
+
with patch("asyncio.get_running_loop", return_value=mock_loop):
|
|
146
|
+
# Should not raise
|
|
147
|
+
install_pretty_errors()
|
|
148
|
+
|
|
149
|
+
assert sys.excepthook == _render_and_fallback
|
|
150
|
+
|
|
151
|
+
|
|
152
|
+
def test_async_exception_handler_no_exception_or_message():
|
|
153
|
+
"""Test _async_exception_handler with empty context."""
|
|
154
|
+
mock_loop = MagicMock()
|
|
155
|
+
context = {}
|
|
156
|
+
|
|
157
|
+
with patch("hud.utils.pretty_errors.hud_console") as mock_console:
|
|
158
|
+
_async_exception_handler(mock_loop, context)
|
|
159
|
+
|
|
160
|
+
mock_console.render_exception.assert_not_called()
|
|
161
|
+
mock_console.error.assert_not_called()
|
|
162
|
+
mock_loop.default_exception_handler.assert_called_once()
|
|
163
|
+
|
|
164
|
+
|
|
165
|
+
def test_render_and_fallback_with_traceback():
|
|
166
|
+
"""Test _render_and_fallback includes traceback."""
|
|
167
|
+
from hud.shared.exceptions import HudException
|
|
168
|
+
|
|
169
|
+
exc = HudException("Test error")
|
|
170
|
+
|
|
171
|
+
# Create a fake traceback
|
|
172
|
+
try:
|
|
173
|
+
raise exc
|
|
174
|
+
except HudException as e:
|
|
175
|
+
tb = e.__traceback__
|
|
176
|
+
|
|
177
|
+
with (
|
|
178
|
+
patch("sys.__excepthook__") as mock_excepthook,
|
|
179
|
+
patch("hud.utils.pretty_errors.hud_console"),
|
|
180
|
+
patch("sys.stderr.flush"),
|
|
181
|
+
):
|
|
182
|
+
_render_and_fallback(HudException, exc, tb)
|
|
183
|
+
|
|
184
|
+
# Should call excepthook with traceback
|
|
185
|
+
call_args = mock_excepthook.call_args[0]
|
|
186
|
+
assert call_args[2] == tb
|