hud-python 0.4.45__py3-none-any.whl → 0.5.13__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- hud/__init__.py +27 -7
- hud/agents/__init__.py +70 -5
- hud/agents/base.py +238 -500
- hud/agents/claude.py +236 -247
- hud/agents/gateway.py +42 -0
- hud/agents/gemini.py +264 -0
- hud/agents/gemini_cua.py +324 -0
- hud/agents/grounded_openai.py +98 -100
- hud/agents/misc/integration_test_agent.py +51 -20
- hud/agents/misc/response_agent.py +48 -36
- hud/agents/openai.py +282 -296
- hud/agents/{openai_chat_generic.py → openai_chat.py} +63 -33
- hud/agents/operator.py +199 -0
- hud/agents/resolver.py +70 -0
- hud/agents/tests/conftest.py +133 -0
- hud/agents/tests/test_base.py +300 -622
- hud/agents/tests/test_base_runtime.py +233 -0
- hud/agents/tests/test_claude.py +381 -214
- hud/agents/tests/test_client.py +9 -10
- hud/agents/tests/test_gemini.py +369 -0
- hud/agents/tests/test_grounded_openai_agent.py +65 -50
- hud/agents/tests/test_openai.py +377 -140
- hud/agents/tests/test_operator.py +362 -0
- hud/agents/tests/test_resolver.py +192 -0
- hud/agents/tests/test_run_eval.py +179 -0
- hud/agents/types.py +148 -0
- hud/cli/__init__.py +493 -546
- hud/cli/analyze.py +43 -5
- hud/cli/build.py +699 -113
- hud/cli/debug.py +8 -5
- hud/cli/dev.py +889 -732
- hud/cli/eval.py +793 -667
- hud/cli/flows/dev.py +167 -0
- hud/cli/flows/init.py +191 -0
- hud/cli/flows/tasks.py +153 -56
- hud/cli/flows/templates.py +151 -0
- hud/cli/flows/tests/__init__.py +1 -0
- hud/cli/flows/tests/test_dev.py +126 -0
- hud/cli/init.py +60 -58
- hud/cli/pull.py +1 -1
- hud/cli/push.py +38 -13
- hud/cli/rft.py +311 -0
- hud/cli/rft_status.py +145 -0
- hud/cli/tests/test_analyze.py +5 -5
- hud/cli/tests/test_analyze_metadata.py +3 -2
- hud/cli/tests/test_analyze_module.py +120 -0
- hud/cli/tests/test_build.py +110 -8
- hud/cli/tests/test_build_failure.py +41 -0
- hud/cli/tests/test_build_module.py +50 -0
- hud/cli/tests/test_cli_init.py +6 -1
- hud/cli/tests/test_cli_more_wrappers.py +30 -0
- hud/cli/tests/test_cli_root.py +140 -0
- hud/cli/tests/test_convert.py +361 -0
- hud/cli/tests/test_debug.py +12 -10
- hud/cli/tests/test_dev.py +197 -0
- hud/cli/tests/test_eval.py +251 -0
- hud/cli/tests/test_eval_bedrock.py +51 -0
- hud/cli/tests/test_init.py +124 -0
- hud/cli/tests/test_main_module.py +11 -5
- hud/cli/tests/test_mcp_server.py +12 -100
- hud/cli/tests/test_push.py +1 -1
- hud/cli/tests/test_push_happy.py +74 -0
- hud/cli/tests/test_push_wrapper.py +23 -0
- hud/cli/tests/test_registry.py +1 -1
- hud/cli/tests/test_utils.py +1 -1
- hud/cli/{rl → utils}/celebrate.py +14 -12
- hud/cli/utils/config.py +18 -1
- hud/cli/utils/docker.py +130 -4
- hud/cli/utils/env_check.py +9 -9
- hud/cli/utils/git.py +136 -0
- hud/cli/utils/interactive.py +39 -5
- hud/cli/utils/metadata.py +70 -1
- hud/cli/utils/runner.py +1 -1
- hud/cli/utils/server.py +2 -2
- hud/cli/utils/source_hash.py +3 -3
- hud/cli/utils/tasks.py +4 -1
- hud/cli/utils/tests/__init__.py +0 -0
- hud/cli/utils/tests/test_config.py +58 -0
- hud/cli/utils/tests/test_docker.py +93 -0
- hud/cli/utils/tests/test_docker_hints.py +71 -0
- hud/cli/utils/tests/test_env_check.py +74 -0
- hud/cli/utils/tests/test_environment.py +42 -0
- hud/cli/utils/tests/test_git.py +142 -0
- hud/cli/utils/tests/test_interactive_module.py +60 -0
- hud/cli/utils/tests/test_local_runner.py +50 -0
- hud/cli/utils/tests/test_logging_utils.py +23 -0
- hud/cli/utils/tests/test_metadata.py +49 -0
- hud/cli/utils/tests/test_package_runner.py +35 -0
- hud/cli/utils/tests/test_registry_utils.py +49 -0
- hud/cli/utils/tests/test_remote_runner.py +25 -0
- hud/cli/utils/tests/test_runner_modules.py +52 -0
- hud/cli/utils/tests/test_source_hash.py +36 -0
- hud/cli/utils/tests/test_tasks.py +80 -0
- hud/cli/utils/version_check.py +258 -0
- hud/cli/{rl → utils}/viewer.py +2 -2
- hud/clients/README.md +12 -11
- hud/clients/__init__.py +4 -3
- hud/clients/base.py +166 -26
- hud/clients/environment.py +51 -0
- hud/clients/fastmcp.py +13 -6
- hud/clients/mcp_use.py +45 -15
- hud/clients/tests/test_analyze_scenarios.py +206 -0
- hud/clients/tests/test_protocol.py +9 -3
- hud/datasets/__init__.py +23 -20
- hud/datasets/loader.py +326 -0
- hud/datasets/runner.py +198 -105
- hud/datasets/tests/__init__.py +0 -0
- hud/datasets/tests/test_loader.py +221 -0
- hud/datasets/tests/test_utils.py +315 -0
- hud/datasets/utils.py +270 -90
- hud/environment/__init__.py +52 -0
- hud/environment/connection.py +258 -0
- hud/environment/connectors/__init__.py +33 -0
- hud/environment/connectors/base.py +68 -0
- hud/environment/connectors/local.py +177 -0
- hud/environment/connectors/mcp_config.py +137 -0
- hud/environment/connectors/openai.py +101 -0
- hud/environment/connectors/remote.py +172 -0
- hud/environment/environment.py +835 -0
- hud/environment/integrations/__init__.py +45 -0
- hud/environment/integrations/adk.py +67 -0
- hud/environment/integrations/anthropic.py +196 -0
- hud/environment/integrations/gemini.py +92 -0
- hud/environment/integrations/langchain.py +82 -0
- hud/environment/integrations/llamaindex.py +68 -0
- hud/environment/integrations/openai.py +238 -0
- hud/environment/mock.py +306 -0
- hud/environment/router.py +263 -0
- hud/environment/scenarios.py +620 -0
- hud/environment/tests/__init__.py +1 -0
- hud/environment/tests/test_connection.py +317 -0
- hud/environment/tests/test_connectors.py +205 -0
- hud/environment/tests/test_environment.py +593 -0
- hud/environment/tests/test_integrations.py +257 -0
- hud/environment/tests/test_local_connectors.py +242 -0
- hud/environment/tests/test_scenarios.py +1086 -0
- hud/environment/tests/test_tools.py +208 -0
- hud/environment/types.py +23 -0
- hud/environment/utils/__init__.py +35 -0
- hud/environment/utils/formats.py +215 -0
- hud/environment/utils/schema.py +171 -0
- hud/environment/utils/tool_wrappers.py +113 -0
- hud/eval/__init__.py +67 -0
- hud/eval/context.py +727 -0
- hud/eval/display.py +299 -0
- hud/eval/instrument.py +187 -0
- hud/eval/manager.py +533 -0
- hud/eval/parallel.py +268 -0
- hud/eval/task.py +372 -0
- hud/eval/tests/__init__.py +1 -0
- hud/eval/tests/test_context.py +178 -0
- hud/eval/tests/test_eval.py +210 -0
- hud/eval/tests/test_manager.py +152 -0
- hud/eval/tests/test_parallel.py +168 -0
- hud/eval/tests/test_task.py +291 -0
- hud/eval/types.py +65 -0
- hud/eval/utils.py +194 -0
- hud/patches/__init__.py +19 -0
- hud/patches/mcp_patches.py +308 -0
- hud/patches/warnings.py +54 -0
- hud/samples/browser.py +4 -4
- hud/server/__init__.py +2 -1
- hud/server/low_level.py +2 -1
- hud/server/router.py +164 -0
- hud/server/server.py +567 -80
- hud/server/tests/test_mcp_server_integration.py +11 -11
- hud/server/tests/test_mcp_server_more.py +1 -1
- hud/server/tests/test_server_extra.py +2 -0
- hud/settings.py +45 -3
- hud/shared/exceptions.py +36 -10
- hud/shared/hints.py +26 -1
- hud/shared/requests.py +15 -3
- hud/shared/tests/test_exceptions.py +40 -31
- hud/shared/tests/test_hints.py +167 -0
- hud/telemetry/__init__.py +20 -19
- hud/telemetry/exporter.py +201 -0
- hud/telemetry/instrument.py +165 -253
- hud/telemetry/tests/test_eval_telemetry.py +356 -0
- hud/telemetry/tests/test_exporter.py +258 -0
- hud/telemetry/tests/test_instrument.py +401 -0
- hud/tools/__init__.py +18 -2
- hud/tools/agent.py +223 -0
- hud/tools/apply_patch.py +639 -0
- hud/tools/base.py +54 -4
- hud/tools/bash.py +2 -2
- hud/tools/computer/__init__.py +36 -3
- hud/tools/computer/anthropic.py +2 -2
- hud/tools/computer/gemini.py +385 -0
- hud/tools/computer/hud.py +23 -6
- hud/tools/computer/openai.py +20 -21
- hud/tools/computer/qwen.py +434 -0
- hud/tools/computer/settings.py +37 -0
- hud/tools/edit.py +3 -7
- hud/tools/executors/base.py +4 -2
- hud/tools/executors/pyautogui.py +1 -1
- hud/tools/grounding/grounded_tool.py +13 -18
- hud/tools/grounding/grounder.py +10 -31
- hud/tools/grounding/tests/test_grounded_tool.py +26 -44
- hud/tools/jupyter.py +330 -0
- hud/tools/playwright.py +18 -3
- hud/tools/shell.py +308 -0
- hud/tools/tests/test_agent_tool.py +355 -0
- hud/tools/tests/test_apply_patch.py +718 -0
- hud/tools/tests/test_computer.py +4 -9
- hud/tools/tests/test_computer_actions.py +24 -2
- hud/tools/tests/test_jupyter_tool.py +181 -0
- hud/tools/tests/test_shell.py +596 -0
- hud/tools/tests/test_submit.py +85 -0
- hud/tools/tests/test_types.py +193 -0
- hud/tools/types.py +21 -1
- hud/types.py +194 -56
- hud/utils/__init__.py +2 -0
- hud/utils/env.py +67 -0
- hud/utils/hud_console.py +89 -18
- hud/utils/mcp.py +15 -58
- hud/utils/strict_schema.py +162 -0
- hud/utils/tests/test_init.py +1 -2
- hud/utils/tests/test_mcp.py +1 -28
- hud/utils/tests/test_pretty_errors.py +186 -0
- hud/utils/tests/test_tool_shorthand.py +154 -0
- hud/utils/tests/test_version.py +1 -1
- hud/utils/types.py +20 -0
- hud/version.py +1 -1
- hud_python-0.5.13.dist-info/METADATA +264 -0
- hud_python-0.5.13.dist-info/RECORD +305 -0
- {hud_python-0.4.45.dist-info → hud_python-0.5.13.dist-info}/WHEEL +1 -1
- hud/agents/langchain.py +0 -261
- hud/agents/lite_llm.py +0 -72
- hud/cli/rl/__init__.py +0 -180
- hud/cli/rl/config.py +0 -101
- hud/cli/rl/display.py +0 -133
- hud/cli/rl/gpu.py +0 -63
- hud/cli/rl/gpu_utils.py +0 -321
- hud/cli/rl/local_runner.py +0 -595
- hud/cli/rl/presets.py +0 -96
- hud/cli/rl/remote_runner.py +0 -463
- hud/cli/rl/rl_api.py +0 -150
- hud/cli/rl/vllm.py +0 -177
- hud/cli/rl/wait_utils.py +0 -89
- hud/datasets/parallel.py +0 -687
- hud/misc/__init__.py +0 -1
- hud/misc/claude_plays_pokemon.py +0 -292
- hud/otel/__init__.py +0 -35
- hud/otel/collector.py +0 -142
- hud/otel/config.py +0 -181
- hud/otel/context.py +0 -570
- hud/otel/exporters.py +0 -369
- hud/otel/instrumentation.py +0 -135
- hud/otel/processors.py +0 -121
- hud/otel/tests/__init__.py +0 -1
- hud/otel/tests/test_processors.py +0 -197
- hud/rl/README.md +0 -30
- hud/rl/__init__.py +0 -1
- hud/rl/actor.py +0 -176
- hud/rl/buffer.py +0 -405
- hud/rl/chat_template.jinja +0 -101
- hud/rl/config.py +0 -192
- hud/rl/distributed.py +0 -132
- hud/rl/learner.py +0 -637
- hud/rl/tests/__init__.py +0 -1
- hud/rl/tests/test_learner.py +0 -186
- hud/rl/train.py +0 -382
- hud/rl/types.py +0 -101
- hud/rl/utils/start_vllm_server.sh +0 -30
- hud/rl/utils.py +0 -524
- hud/rl/vllm_adapter.py +0 -143
- hud/telemetry/job.py +0 -352
- hud/telemetry/replay.py +0 -74
- hud/telemetry/tests/test_replay.py +0 -40
- hud/telemetry/tests/test_trace.py +0 -63
- hud/telemetry/trace.py +0 -158
- hud/utils/agent_factories.py +0 -86
- hud/utils/async_utils.py +0 -65
- hud/utils/group_eval.py +0 -223
- hud/utils/progress.py +0 -149
- hud/utils/tasks.py +0 -127
- hud/utils/tests/test_async_utils.py +0 -173
- hud/utils/tests/test_progress.py +0 -261
- hud_python-0.4.45.dist-info/METADATA +0 -552
- hud_python-0.4.45.dist-info/RECORD +0 -228
- {hud_python-0.4.45.dist-info → hud_python-0.5.13.dist-info}/entry_points.txt +0 -0
- {hud_python-0.4.45.dist-info → hud_python-0.5.13.dist-info}/licenses/LICENSE +0 -0
|
@@ -0,0 +1,113 @@
|
|
|
1
|
+
"""Shared tool wrapper utilities for agent framework integrations."""
|
|
2
|
+
|
|
3
|
+
from __future__ import annotations
|
|
4
|
+
|
|
5
|
+
import json
|
|
6
|
+
from typing import TYPE_CHECKING, Any
|
|
7
|
+
|
|
8
|
+
if TYPE_CHECKING:
|
|
9
|
+
from collections.abc import Callable
|
|
10
|
+
|
|
11
|
+
import mcp.types as mcp_types
|
|
12
|
+
|
|
13
|
+
__all__ = [
|
|
14
|
+
"create_async_tool_fn",
|
|
15
|
+
"create_sync_tool_fn",
|
|
16
|
+
"create_tool_fns",
|
|
17
|
+
"stringify_result",
|
|
18
|
+
]
|
|
19
|
+
|
|
20
|
+
|
|
21
|
+
def stringify_result(result: Any) -> str:
|
|
22
|
+
"""Convert a tool result to string format.
|
|
23
|
+
|
|
24
|
+
Args:
|
|
25
|
+
result: The tool result (str, dict, or other).
|
|
26
|
+
|
|
27
|
+
Returns:
|
|
28
|
+
String representation of the result.
|
|
29
|
+
"""
|
|
30
|
+
if isinstance(result, str):
|
|
31
|
+
return result
|
|
32
|
+
return json.dumps(result) if result else ""
|
|
33
|
+
|
|
34
|
+
|
|
35
|
+
def create_async_tool_fn(
|
|
36
|
+
env: Any,
|
|
37
|
+
tool_name: str,
|
|
38
|
+
description: str | None = None,
|
|
39
|
+
) -> Callable[..., Any]:
|
|
40
|
+
"""Create an async function that calls a tool on the environment.
|
|
41
|
+
|
|
42
|
+
Args:
|
|
43
|
+
env: Environment with call_tool method.
|
|
44
|
+
tool_name: Name of the tool to call.
|
|
45
|
+
description: Optional description for the function docstring.
|
|
46
|
+
|
|
47
|
+
Returns:
|
|
48
|
+
Async function that calls the tool and returns string result.
|
|
49
|
+
"""
|
|
50
|
+
|
|
51
|
+
async def async_fn(**kwargs: Any) -> str:
|
|
52
|
+
result = await env.call_tool(tool_name, **kwargs)
|
|
53
|
+
return stringify_result(result)
|
|
54
|
+
|
|
55
|
+
async_fn.__name__ = tool_name
|
|
56
|
+
async_fn.__doc__ = description or f"Tool: {tool_name}"
|
|
57
|
+
return async_fn
|
|
58
|
+
|
|
59
|
+
|
|
60
|
+
def create_sync_tool_fn(
|
|
61
|
+
env: Any,
|
|
62
|
+
tool_name: str,
|
|
63
|
+
description: str | None = None,
|
|
64
|
+
) -> Callable[..., Any]:
|
|
65
|
+
"""Create a sync function that calls a tool on the environment.
|
|
66
|
+
|
|
67
|
+
This handles the complexity of running async code from sync context,
|
|
68
|
+
including when already in an async event loop.
|
|
69
|
+
|
|
70
|
+
Args:
|
|
71
|
+
env: Environment with call_tool method.
|
|
72
|
+
tool_name: Name of the tool to call.
|
|
73
|
+
description: Optional description for the function docstring.
|
|
74
|
+
|
|
75
|
+
Returns:
|
|
76
|
+
Sync function that calls the tool and returns string result.
|
|
77
|
+
"""
|
|
78
|
+
import asyncio
|
|
79
|
+
|
|
80
|
+
def sync_fn(**kwargs: Any) -> str:
|
|
81
|
+
loop = asyncio.get_event_loop()
|
|
82
|
+
if loop.is_running():
|
|
83
|
+
import concurrent.futures
|
|
84
|
+
|
|
85
|
+
with concurrent.futures.ThreadPoolExecutor() as executor:
|
|
86
|
+
future = executor.submit(asyncio.run, env.call_tool(tool_name, **kwargs))
|
|
87
|
+
result = future.result()
|
|
88
|
+
else:
|
|
89
|
+
result = loop.run_until_complete(env.call_tool(tool_name, **kwargs))
|
|
90
|
+
|
|
91
|
+
return stringify_result(result)
|
|
92
|
+
|
|
93
|
+
sync_fn.__name__ = tool_name
|
|
94
|
+
sync_fn.__doc__ = description or f"Tool: {tool_name}"
|
|
95
|
+
return sync_fn
|
|
96
|
+
|
|
97
|
+
|
|
98
|
+
def create_tool_fns(
|
|
99
|
+
env: Any,
|
|
100
|
+
tool: mcp_types.Tool,
|
|
101
|
+
) -> tuple[Callable[..., str], Callable[..., Any]]:
|
|
102
|
+
"""Create both sync and async functions for a tool.
|
|
103
|
+
|
|
104
|
+
Args:
|
|
105
|
+
env: Environment with call_tool method.
|
|
106
|
+
tool: MCP tool definition.
|
|
107
|
+
|
|
108
|
+
Returns:
|
|
109
|
+
Tuple of (sync_fn, async_fn).
|
|
110
|
+
"""
|
|
111
|
+
sync_fn = create_sync_tool_fn(env, tool.name, tool.description)
|
|
112
|
+
async_fn = create_async_tool_fn(env, tool.name, tool.description)
|
|
113
|
+
return sync_fn, async_fn
|
hud/eval/__init__.py
ADDED
|
@@ -0,0 +1,67 @@
|
|
|
1
|
+
"""HUD Eval - Evaluation context and management.
|
|
2
|
+
|
|
3
|
+
This module provides:
|
|
4
|
+
- Task: A runnable evaluation unit (from env())
|
|
5
|
+
- EvalContext: Environment with evaluation tracking (trace_id, reward, etc.)
|
|
6
|
+
- eval(): Standalone context manager for task-based evaluation
|
|
7
|
+
|
|
8
|
+
Usage:
|
|
9
|
+
# Using env() to create Task
|
|
10
|
+
env = Environment("my-env").connect_hub("browser")
|
|
11
|
+
|
|
12
|
+
async with env() as ctx:
|
|
13
|
+
await ctx.call_tool("navigate", url="...")
|
|
14
|
+
|
|
15
|
+
async with env("checkout", user_id="alice") as ctx:
|
|
16
|
+
await agent.run(ctx.prompt)
|
|
17
|
+
|
|
18
|
+
# Standalone with task slugs
|
|
19
|
+
async with hud.eval("my-org/task:1") as ctx:
|
|
20
|
+
await agent.run(ctx)
|
|
21
|
+
|
|
22
|
+
# Orchestrated with Task objects
|
|
23
|
+
tasks = [env("checkout", user_id="alice"), env("checkout", user_id="bob")]
|
|
24
|
+
async with hud.eval(tasks, variants={"model": ["gpt-4o"]}, group=4) as ctx:
|
|
25
|
+
await agent.run(ctx.prompt)
|
|
26
|
+
|
|
27
|
+
# Blank eval for manual reward
|
|
28
|
+
async with hud.eval() as ctx:
|
|
29
|
+
ctx.reward = compute_reward()
|
|
30
|
+
"""
|
|
31
|
+
|
|
32
|
+
from __future__ import annotations
|
|
33
|
+
|
|
34
|
+
from typing import TYPE_CHECKING
|
|
35
|
+
|
|
36
|
+
# Auto-instrument httpx on import
|
|
37
|
+
import hud.eval.instrument # noqa: F401
|
|
38
|
+
|
|
39
|
+
# run_eval is safe to import (uses lazy imports internally)
|
|
40
|
+
from hud.eval.manager import run_eval
|
|
41
|
+
|
|
42
|
+
# Task is safe to import
|
|
43
|
+
from hud.eval.task import Task
|
|
44
|
+
|
|
45
|
+
# Utils for v4 format handling
|
|
46
|
+
from hud.eval.utils import build_env_from_v4, is_v4_format, validate_v4_task
|
|
47
|
+
|
|
48
|
+
if TYPE_CHECKING:
|
|
49
|
+
from hud.eval.context import EvalContext
|
|
50
|
+
|
|
51
|
+
__all__ = [
|
|
52
|
+
"EvalContext",
|
|
53
|
+
"Task",
|
|
54
|
+
"build_env_from_v4",
|
|
55
|
+
"is_v4_format",
|
|
56
|
+
"run_eval",
|
|
57
|
+
"validate_v4_task",
|
|
58
|
+
]
|
|
59
|
+
|
|
60
|
+
|
|
61
|
+
def __getattr__(name: str) -> object:
|
|
62
|
+
"""Lazy import EvalContext to avoid circular imports."""
|
|
63
|
+
if name == "EvalContext":
|
|
64
|
+
from hud.eval.context import EvalContext
|
|
65
|
+
|
|
66
|
+
return EvalContext
|
|
67
|
+
raise AttributeError(f"module 'hud.eval' has no attribute {name!r}")
|