hud-python 0.5.8__tar.gz → 0.5.18__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {hud_python-0.5.8 → hud_python-0.5.18}/PKG-INFO +2 -2
- {hud_python-0.5.8 → hud_python-0.5.18}/hud/agents/__init__.py +10 -13
- {hud_python-0.5.8 → hud_python-0.5.18}/hud/agents/base.py +236 -13
- {hud_python-0.5.8 → hud_python-0.5.18}/hud/agents/claude.py +183 -81
- {hud_python-0.5.8 → hud_python-0.5.18}/hud/agents/gemini.py +134 -43
- {hud_python-0.5.8 → hud_python-0.5.18}/hud/agents/gemini_cua.py +48 -22
- {hud_python-0.5.8 → hud_python-0.5.18}/hud/agents/misc/integration_test_agent.py +6 -1
- {hud_python-0.5.8 → hud_python-0.5.18}/hud/agents/openai.py +162 -60
- {hud_python-0.5.8 → hud_python-0.5.18}/hud/agents/openai_chat.py +26 -20
- {hud_python-0.5.8 → hud_python-0.5.18}/hud/agents/operator.py +17 -27
- {hud_python-0.5.8 → hud_python-0.5.18}/hud/agents/resolver.py +8 -14
- {hud_python-0.5.8 → hud_python-0.5.18}/hud/agents/tests/test_base.py +137 -1
- {hud_python-0.5.8 → hud_python-0.5.18}/hud/agents/tests/test_base_runtime.py +6 -1
- {hud_python-0.5.8 → hud_python-0.5.18}/hud/agents/tests/test_claude.py +3 -0
- hud_python-0.5.18/hud/agents/tests/test_resolver.py +284 -0
- {hud_python-0.5.8 → hud_python-0.5.18}/hud/agents/tests/test_run_eval.py +6 -1
- hud_python-0.5.18/hud/agents/types.py +148 -0
- {hud_python-0.5.8 → hud_python-0.5.18}/hud/cli/__init__.py +43 -12
- {hud_python-0.5.8 → hud_python-0.5.18}/hud/cli/analyze.py +24 -16
- {hud_python-0.5.8 → hud_python-0.5.18}/hud/cli/build.py +90 -35
- {hud_python-0.5.8 → hud_python-0.5.18}/hud/cli/debug.py +12 -11
- hud_python-0.5.18/hud/cli/deploy.py +586 -0
- {hud_python-0.5.8 → hud_python-0.5.18}/hud/cli/dev.py +26 -1
- {hud_python-0.5.8 → hud_python-0.5.18}/hud/cli/eval.py +76 -8
- {hud_python-0.5.8 → hud_python-0.5.18}/hud/cli/init.py +16 -4
- hud_python-0.5.18/hud/cli/link.py +200 -0
- {hud_python-0.5.8 → hud_python-0.5.18}/hud/cli/push.py +9 -0
- {hud_python-0.5.8 → hud_python-0.5.18}/hud/cli/tests/test_analyze.py +40 -26
- {hud_python-0.5.8 → hud_python-0.5.18}/hud/cli/tests/test_analyze_module.py +35 -28
- {hud_python-0.5.8 → hud_python-0.5.18}/hud/cli/tests/test_build.py +37 -40
- {hud_python-0.5.8 → hud_python-0.5.18}/hud/cli/tests/test_debug.py +31 -23
- hud_python-0.5.18/hud/cli/tests/test_debug_directory_mode.py +32 -0
- hud_python-0.5.18/hud/cli/tests/test_deploy.py +288 -0
- hud_python-0.5.18/hud/cli/utils/build_display.py +227 -0
- hud_python-0.5.18/hud/cli/utils/build_logs.py +261 -0
- hud_python-0.5.18/hud/cli/utils/context.py +274 -0
- {hud_python-0.5.8 → hud_python-0.5.18}/hud/cli/utils/environment.py +111 -5
- {hud_python-0.5.8 → hud_python-0.5.18}/hud/cli/utils/interactive.py +13 -16
- hud_python-0.5.18/hud/cli/utils/mcp.py +194 -0
- hud_python-0.5.18/hud/cli/utils/tests/test_environment.py +81 -0
- {hud_python-0.5.8 → hud_python-0.5.18}/hud/cli/utils/tests/test_interactive_module.py +6 -4
- hud_python-0.5.18/hud/cli/utils/validation.py +304 -0
- {hud_python-0.5.8 → hud_python-0.5.18}/hud/cli/utils/version_check.py +15 -1
- {hud_python-0.5.8 → hud_python-0.5.18}/hud/datasets/loader.py +4 -8
- {hud_python-0.5.8 → hud_python-0.5.18}/hud/datasets/runner.py +8 -1
- {hud_python-0.5.8 → hud_python-0.5.18}/hud/datasets/tests/test_loader.py +14 -14
- {hud_python-0.5.8 → hud_python-0.5.18}/hud/datasets/tests/test_utils.py +3 -2
- {hud_python-0.5.8 → hud_python-0.5.18}/hud/datasets/utils.py +16 -15
- {hud_python-0.5.8 → hud_python-0.5.18}/hud/environment/connection.py +51 -6
- {hud_python-0.5.8 → hud_python-0.5.18}/hud/environment/connectors/mcp_config.py +29 -1
- {hud_python-0.5.8 → hud_python-0.5.18}/hud/environment/environment.py +54 -10
- {hud_python-0.5.8 → hud_python-0.5.18}/hud/environment/scenarios.py +142 -32
- {hud_python-0.5.8 → hud_python-0.5.18}/hud/environment/tests/test_connection.py +3 -3
- {hud_python-0.5.8 → hud_python-0.5.18}/hud/environment/tests/test_connectors.py +10 -23
- {hud_python-0.5.8 → hud_python-0.5.18}/hud/environment/tests/test_environment.py +248 -0
- {hud_python-0.5.8 → hud_python-0.5.18}/hud/environment/tests/test_local_connectors.py +81 -40
- {hud_python-0.5.8 → hud_python-0.5.18}/hud/environment/tests/test_scenarios.py +350 -11
- {hud_python-0.5.8 → hud_python-0.5.18}/hud/eval/context.py +76 -9
- {hud_python-0.5.8 → hud_python-0.5.18}/hud/eval/manager.py +105 -27
- {hud_python-0.5.8 → hud_python-0.5.18}/hud/eval/task.py +31 -2
- {hud_python-0.5.8 → hud_python-0.5.18}/hud/eval/tests/test_context.py +11 -0
- hud_python-0.5.18/hud/eval/tests/test_task.py +291 -0
- {hud_python-0.5.8 → hud_python-0.5.18}/hud/eval/types.py +3 -0
- {hud_python-0.5.8 → hud_python-0.5.18}/hud/eval/utils.py +14 -3
- hud_python-0.5.18/hud/patches/mcp_patches.py +320 -0
- {hud_python-0.5.8 → hud_python-0.5.18}/hud/server/server.py +71 -29
- {hud_python-0.5.8 → hud_python-0.5.18}/hud/server/tests/test_context.py +2 -2
- {hud_python-0.5.8 → hud_python-0.5.18}/hud/server/tests/test_mcp_server_integration.py +36 -36
- {hud_python-0.5.8 → hud_python-0.5.18}/hud/server/tests/test_mcp_server_more.py +4 -4
- {hud_python-0.5.8 → hud_python-0.5.18}/hud/shared/hints.py +15 -0
- {hud_python-0.5.8 → hud_python-0.5.18}/hud/shared/tests/test_exceptions.py +1 -3
- {hud_python-0.5.8 → hud_python-0.5.18}/hud/telemetry/tests/test_eval_telemetry.py +7 -7
- hud_python-0.5.18/hud/tools/__init__.py +137 -0
- {hud_python-0.5.8 → hud_python-0.5.18}/hud/tools/base.py +121 -27
- hud_python-0.5.18/hud/tools/coding/__init__.py +64 -0
- {hud_python-0.5.8/hud/tools → hud_python-0.5.18/hud/tools/coding}/apply_patch.py +31 -3
- {hud_python-0.5.8/hud/tools → hud_python-0.5.18/hud/tools/coding}/bash.py +90 -30
- {hud_python-0.5.8/hud/tools → hud_python-0.5.18/hud/tools/coding}/edit.py +66 -86
- hud_python-0.5.18/hud/tools/coding/gemini_edit.py +252 -0
- hud_python-0.5.18/hud/tools/coding/gemini_shell.py +228 -0
- hud_python-0.5.18/hud/tools/coding/session.py +253 -0
- hud_python-0.5.18/hud/tools/coding/shell.py +176 -0
- hud_python-0.5.18/hud/tools/coding/tests/__init__.py +1 -0
- {hud_python-0.5.8/hud/tools → hud_python-0.5.18/hud/tools/coding}/tests/test_apply_patch.py +1 -1
- {hud_python-0.5.8/hud/tools → hud_python-0.5.18/hud/tools/coding}/tests/test_bash.py +49 -13
- {hud_python-0.5.8/hud/tools → hud_python-0.5.18/hud/tools/coding}/tests/test_bash_extended.py +2 -1
- {hud_python-0.5.8/hud/tools → hud_python-0.5.18/hud/tools/coding}/tests/test_edit.py +13 -28
- hud_python-0.5.18/hud/tools/coding/tests/test_gemini_tools.py +231 -0
- {hud_python-0.5.8/hud/tools → hud_python-0.5.18/hud/tools/coding}/tests/test_shell.py +19 -14
- hud_python-0.5.18/hud/tools/coding/utils.py +198 -0
- hud_python-0.5.18/hud/tools/computer/__init__.py +48 -0
- {hud_python-0.5.8 → hud_python-0.5.18}/hud/tools/computer/anthropic.py +33 -5
- {hud_python-0.5.8 → hud_python-0.5.18}/hud/tools/computer/gemini.py +43 -1
- {hud_python-0.5.8 → hud_python-0.5.18}/hud/tools/computer/hud.py +3 -1
- {hud_python-0.5.8 → hud_python-0.5.18}/hud/tools/computer/openai.py +41 -3
- hud_python-0.5.18/hud/tools/computer/tests/__init__.py +1 -0
- hud_python-0.5.18/hud/tools/filesystem/__init__.py +82 -0
- hud_python-0.5.18/hud/tools/filesystem/base.py +703 -0
- hud_python-0.5.18/hud/tools/filesystem/gemini.py +460 -0
- hud_python-0.5.18/hud/tools/filesystem/glob.py +128 -0
- hud_python-0.5.18/hud/tools/filesystem/grep.py +135 -0
- hud_python-0.5.18/hud/tools/filesystem/list.py +170 -0
- hud_python-0.5.18/hud/tools/filesystem/read.py +143 -0
- hud_python-0.5.18/hud/tools/filesystem/tests/__init__.py +1 -0
- hud_python-0.5.18/hud/tools/filesystem/tests/test_glob.py +100 -0
- hud_python-0.5.18/hud/tools/filesystem/tests/test_grep.py +114 -0
- hud_python-0.5.18/hud/tools/filesystem/tests/test_list.py +115 -0
- hud_python-0.5.18/hud/tools/filesystem/tests/test_read.py +132 -0
- hud_python-0.5.18/hud/tools/hosted/__init__.py +24 -0
- hud_python-0.5.18/hud/tools/hosted/base.py +54 -0
- hud_python-0.5.18/hud/tools/hosted/code_execution.py +75 -0
- hud_python-0.5.18/hud/tools/hosted/google_search.py +107 -0
- hud_python-0.5.18/hud/tools/hosted/url_context.py +32 -0
- hud_python-0.5.18/hud/tools/hosted/web_fetch.py +81 -0
- hud_python-0.5.18/hud/tools/hosted/web_search.py +73 -0
- hud_python-0.5.18/hud/tools/memory/__init__.py +50 -0
- hud_python-0.5.18/hud/tools/memory/base.py +222 -0
- hud_python-0.5.18/hud/tools/memory/claude.py +291 -0
- hud_python-0.5.18/hud/tools/memory/gemini.py +200 -0
- hud_python-0.5.18/hud/tools/memory/session.py +223 -0
- hud_python-0.5.18/hud/tools/memory/tests/__init__.py +1 -0
- hud_python-0.5.18/hud/tools/memory/tests/test_claude.py +329 -0
- hud_python-0.5.18/hud/tools/memory/tests/test_gemini.py +85 -0
- hud_python-0.5.18/hud/tools/memory/tests/test_session.py +249 -0
- hud_python-0.5.18/hud/tools/native_types.py +101 -0
- {hud_python-0.5.8 → hud_python-0.5.18}/hud/tools/tests/test_init.py +2 -1
- hud_python-0.5.18/hud/tools/tests/test_native_tool_e2e.py +863 -0
- hud_python-0.5.18/hud/tools/tests/test_native_types.py +336 -0
- {hud_python-0.5.8 → hud_python-0.5.18}/hud/tools/tests/test_tools.py +6 -3
- hud_python-0.5.18/hud/tools/tests/test_types.py +418 -0
- {hud_python-0.5.8 → hud_python-0.5.18}/hud/tools/types.py +76 -19
- {hud_python-0.5.8 → hud_python-0.5.18}/hud/types.py +58 -32
- {hud_python-0.5.8 → hud_python-0.5.18}/hud/utils/tests/test_version.py +1 -1
- {hud_python-0.5.8 → hud_python-0.5.18}/hud/version.py +1 -1
- {hud_python-0.5.8 → hud_python-0.5.18}/pyproject.toml +2 -2
- hud_python-0.5.8/hud/agents/tests/test_client.py +0 -346
- hud_python-0.5.8/hud/agents/tests/test_resolver.py +0 -192
- hud_python-0.5.8/hud/cli/utils/tests/test_environment.py +0 -42
- hud_python-0.5.8/hud/clients/README.md +0 -144
- hud_python-0.5.8/hud/clients/__init__.py +0 -18
- hud_python-0.5.8/hud/clients/base.py +0 -529
- hud_python-0.5.8/hud/clients/environment.py +0 -51
- hud_python-0.5.8/hud/clients/fastmcp.py +0 -230
- hud_python-0.5.8/hud/clients/mcp_use.py +0 -366
- hud_python-0.5.8/hud/clients/tests/__init__.py +0 -1
- hud_python-0.5.8/hud/clients/tests/test_analyze_scenarios.py +0 -206
- hud_python-0.5.8/hud/clients/tests/test_client_integration.py +0 -111
- hud_python-0.5.8/hud/clients/tests/test_fastmcp.py +0 -342
- hud_python-0.5.8/hud/clients/tests/test_mcp_use_retry.py +0 -378
- hud_python-0.5.8/hud/clients/tests/test_protocol.py +0 -194
- hud_python-0.5.8/hud/clients/utils/__init__.py +0 -26
- hud_python-0.5.8/hud/clients/utils/mcp_use_retry.py +0 -201
- hud_python-0.5.8/hud/clients/utils/retry.py +0 -186
- hud_python-0.5.8/hud/clients/utils/retry_transport.py +0 -186
- hud_python-0.5.8/hud/eval/tests/test_task.py +0 -145
- hud_python-0.5.8/hud/patches/mcp_patches.py +0 -151
- hud_python-0.5.8/hud/tools/__init__.py +0 -53
- hud_python-0.5.8/hud/tools/computer/__init__.py +0 -19
- hud_python-0.5.8/hud/tools/shell.py +0 -308
- hud_python-0.5.8/hud/tools/tests/test_types.py +0 -193
- {hud_python-0.5.8 → hud_python-0.5.18}/.gitignore +0 -0
- {hud_python-0.5.8 → hud_python-0.5.18}/LICENSE +0 -0
- {hud_python-0.5.8 → hud_python-0.5.18}/README.md +0 -0
- {hud_python-0.5.8 → hud_python-0.5.18}/examples/README.md +0 -0
- {hud_python-0.5.8 → hud_python-0.5.18}/hud/__init__.py +0 -0
- {hud_python-0.5.8 → hud_python-0.5.18}/hud/__main__.py +0 -0
- {hud_python-0.5.8 → hud_python-0.5.18}/hud/agents/gateway.py +0 -0
- {hud_python-0.5.8 → hud_python-0.5.18}/hud/agents/grounded_openai.py +0 -0
- {hud_python-0.5.8 → hud_python-0.5.18}/hud/agents/misc/__init__.py +0 -0
- {hud_python-0.5.8 → hud_python-0.5.18}/hud/agents/misc/response_agent.py +0 -0
- {hud_python-0.5.8 → hud_python-0.5.18}/hud/agents/tests/__init__.py +0 -0
- {hud_python-0.5.8 → hud_python-0.5.18}/hud/agents/tests/conftest.py +0 -0
- {hud_python-0.5.8 → hud_python-0.5.18}/hud/agents/tests/test_gemini.py +0 -0
- {hud_python-0.5.8 → hud_python-0.5.18}/hud/agents/tests/test_grounded_openai_agent.py +0 -0
- {hud_python-0.5.8 → hud_python-0.5.18}/hud/agents/tests/test_openai.py +0 -0
- {hud_python-0.5.8 → hud_python-0.5.18}/hud/agents/tests/test_operator.py +0 -0
- {hud_python-0.5.8 → hud_python-0.5.18}/hud/cli/__main__.py +0 -0
- {hud_python-0.5.8 → hud_python-0.5.18}/hud/cli/clone.py +0 -0
- {hud_python-0.5.8 → hud_python-0.5.18}/hud/cli/flows/__init__.py +0 -0
- {hud_python-0.5.8 → hud_python-0.5.18}/hud/cli/flows/dev.py +0 -0
- {hud_python-0.5.8 → hud_python-0.5.18}/hud/cli/flows/init.py +0 -0
- {hud_python-0.5.8 → hud_python-0.5.18}/hud/cli/flows/tasks.py +0 -0
- {hud_python-0.5.8 → hud_python-0.5.18}/hud/cli/flows/templates.py +0 -0
- {hud_python-0.5.8 → hud_python-0.5.18}/hud/cli/flows/tests/__init__.py +0 -0
- {hud_python-0.5.8 → hud_python-0.5.18}/hud/cli/flows/tests/test_dev.py +0 -0
- {hud_python-0.5.8 → hud_python-0.5.18}/hud/cli/get.py +0 -0
- {hud_python-0.5.8 → hud_python-0.5.18}/hud/cli/list_func.py +0 -0
- {hud_python-0.5.8 → hud_python-0.5.18}/hud/cli/pull.py +0 -0
- {hud_python-0.5.8 → hud_python-0.5.18}/hud/cli/remove.py +0 -0
- {hud_python-0.5.8 → hud_python-0.5.18}/hud/cli/rft.py +0 -0
- {hud_python-0.5.8 → hud_python-0.5.18}/hud/cli/rft_status.py +0 -0
- {hud_python-0.5.8 → hud_python-0.5.18}/hud/cli/tests/__init__.py +0 -0
- {hud_python-0.5.8 → hud_python-0.5.18}/hud/cli/tests/test_analyze_metadata.py +0 -0
- {hud_python-0.5.8 → hud_python-0.5.18}/hud/cli/tests/test_build_failure.py +0 -0
- {hud_python-0.5.8 → hud_python-0.5.18}/hud/cli/tests/test_build_module.py +0 -0
- {hud_python-0.5.8 → hud_python-0.5.18}/hud/cli/tests/test_cli_init.py +0 -0
- {hud_python-0.5.8 → hud_python-0.5.18}/hud/cli/tests/test_cli_main.py +0 -0
- {hud_python-0.5.8 → hud_python-0.5.18}/hud/cli/tests/test_cli_more_wrappers.py +0 -0
- {hud_python-0.5.8 → hud_python-0.5.18}/hud/cli/tests/test_cli_root.py +0 -0
- {hud_python-0.5.8 → hud_python-0.5.18}/hud/cli/tests/test_clone.py +0 -0
- {hud_python-0.5.8 → hud_python-0.5.18}/hud/cli/tests/test_convert.py +0 -0
- {hud_python-0.5.8 → hud_python-0.5.18}/hud/cli/tests/test_cursor.py +0 -0
- {hud_python-0.5.8 → hud_python-0.5.18}/hud/cli/tests/test_dev.py +0 -0
- {hud_python-0.5.8 → hud_python-0.5.18}/hud/cli/tests/test_eval.py +0 -0
- {hud_python-0.5.8 → hud_python-0.5.18}/hud/cli/tests/test_eval_bedrock.py +0 -0
- {hud_python-0.5.8 → hud_python-0.5.18}/hud/cli/tests/test_init.py +0 -0
- {hud_python-0.5.8 → hud_python-0.5.18}/hud/cli/tests/test_list_func.py +0 -0
- {hud_python-0.5.8 → hud_python-0.5.18}/hud/cli/tests/test_main_module.py +0 -0
- {hud_python-0.5.8 → hud_python-0.5.18}/hud/cli/tests/test_mcp_server.py +0 -0
- {hud_python-0.5.8 → hud_python-0.5.18}/hud/cli/tests/test_pull.py +0 -0
- {hud_python-0.5.8 → hud_python-0.5.18}/hud/cli/tests/test_push.py +0 -0
- {hud_python-0.5.8 → hud_python-0.5.18}/hud/cli/tests/test_push_happy.py +0 -0
- {hud_python-0.5.8 → hud_python-0.5.18}/hud/cli/tests/test_push_wrapper.py +0 -0
- {hud_python-0.5.8 → hud_python-0.5.18}/hud/cli/tests/test_registry.py +0 -0
- {hud_python-0.5.8 → hud_python-0.5.18}/hud/cli/tests/test_utils.py +0 -0
- {hud_python-0.5.8 → hud_python-0.5.18}/hud/cli/utils/__init__.py +0 -0
- {hud_python-0.5.8 → hud_python-0.5.18}/hud/cli/utils/celebrate.py +0 -0
- {hud_python-0.5.8 → hud_python-0.5.18}/hud/cli/utils/config.py +0 -0
- {hud_python-0.5.8 → hud_python-0.5.18}/hud/cli/utils/cursor.py +0 -0
- {hud_python-0.5.8 → hud_python-0.5.18}/hud/cli/utils/docker.py +0 -0
- {hud_python-0.5.8 → hud_python-0.5.18}/hud/cli/utils/env_check.py +0 -0
- {hud_python-0.5.8 → hud_python-0.5.18}/hud/cli/utils/git.py +0 -0
- {hud_python-0.5.8 → hud_python-0.5.18}/hud/cli/utils/local_runner.py +0 -0
- {hud_python-0.5.8 → hud_python-0.5.18}/hud/cli/utils/logging.py +0 -0
- {hud_python-0.5.8 → hud_python-0.5.18}/hud/cli/utils/metadata.py +0 -0
- {hud_python-0.5.8 → hud_python-0.5.18}/hud/cli/utils/package_runner.py +0 -0
- {hud_python-0.5.8 → hud_python-0.5.18}/hud/cli/utils/registry.py +0 -0
- {hud_python-0.5.8 → hud_python-0.5.18}/hud/cli/utils/remote_runner.py +0 -0
- {hud_python-0.5.8 → hud_python-0.5.18}/hud/cli/utils/runner.py +0 -0
- {hud_python-0.5.8 → hud_python-0.5.18}/hud/cli/utils/server.py +0 -0
- {hud_python-0.5.8 → hud_python-0.5.18}/hud/cli/utils/source_hash.py +0 -0
- {hud_python-0.5.8 → hud_python-0.5.18}/hud/cli/utils/tasks.py +0 -0
- {hud_python-0.5.8 → hud_python-0.5.18}/hud/cli/utils/tests/__init__.py +0 -0
- {hud_python-0.5.8 → hud_python-0.5.18}/hud/cli/utils/tests/test_config.py +0 -0
- {hud_python-0.5.8 → hud_python-0.5.18}/hud/cli/utils/tests/test_docker.py +0 -0
- {hud_python-0.5.8 → hud_python-0.5.18}/hud/cli/utils/tests/test_docker_hints.py +0 -0
- {hud_python-0.5.8 → hud_python-0.5.18}/hud/cli/utils/tests/test_env_check.py +0 -0
- {hud_python-0.5.8 → hud_python-0.5.18}/hud/cli/utils/tests/test_git.py +0 -0
- {hud_python-0.5.8 → hud_python-0.5.18}/hud/cli/utils/tests/test_local_runner.py +0 -0
- {hud_python-0.5.8 → hud_python-0.5.18}/hud/cli/utils/tests/test_logging_utils.py +0 -0
- {hud_python-0.5.8 → hud_python-0.5.18}/hud/cli/utils/tests/test_metadata.py +0 -0
- {hud_python-0.5.8 → hud_python-0.5.18}/hud/cli/utils/tests/test_package_runner.py +0 -0
- {hud_python-0.5.8 → hud_python-0.5.18}/hud/cli/utils/tests/test_registry_utils.py +0 -0
- {hud_python-0.5.8 → hud_python-0.5.18}/hud/cli/utils/tests/test_remote_runner.py +0 -0
- {hud_python-0.5.8 → hud_python-0.5.18}/hud/cli/utils/tests/test_runner_modules.py +0 -0
- {hud_python-0.5.8 → hud_python-0.5.18}/hud/cli/utils/tests/test_source_hash.py +0 -0
- {hud_python-0.5.8 → hud_python-0.5.18}/hud/cli/utils/tests/test_tasks.py +0 -0
- {hud_python-0.5.8 → hud_python-0.5.18}/hud/cli/utils/viewer.py +0 -0
- {hud_python-0.5.8 → hud_python-0.5.18}/hud/datasets/__init__.py +0 -0
- {hud_python-0.5.8 → hud_python-0.5.18}/hud/datasets/tests/__init__.py +0 -0
- {hud_python-0.5.8 → hud_python-0.5.18}/hud/environment/__init__.py +0 -0
- {hud_python-0.5.8 → hud_python-0.5.18}/hud/environment/connectors/__init__.py +0 -0
- {hud_python-0.5.8 → hud_python-0.5.18}/hud/environment/connectors/base.py +0 -0
- {hud_python-0.5.8 → hud_python-0.5.18}/hud/environment/connectors/local.py +0 -0
- {hud_python-0.5.8 → hud_python-0.5.18}/hud/environment/connectors/openai.py +0 -0
- {hud_python-0.5.8 → hud_python-0.5.18}/hud/environment/connectors/remote.py +0 -0
- {hud_python-0.5.8 → hud_python-0.5.18}/hud/environment/integrations/__init__.py +0 -0
- {hud_python-0.5.8 → hud_python-0.5.18}/hud/environment/integrations/adk.py +0 -0
- {hud_python-0.5.8 → hud_python-0.5.18}/hud/environment/integrations/anthropic.py +0 -0
- {hud_python-0.5.8 → hud_python-0.5.18}/hud/environment/integrations/gemini.py +0 -0
- {hud_python-0.5.8 → hud_python-0.5.18}/hud/environment/integrations/langchain.py +0 -0
- {hud_python-0.5.8 → hud_python-0.5.18}/hud/environment/integrations/llamaindex.py +0 -0
- {hud_python-0.5.8 → hud_python-0.5.18}/hud/environment/integrations/openai.py +0 -0
- {hud_python-0.5.8 → hud_python-0.5.18}/hud/environment/mock.py +0 -0
- {hud_python-0.5.8 → hud_python-0.5.18}/hud/environment/router.py +0 -0
- {hud_python-0.5.8 → hud_python-0.5.18}/hud/environment/tests/__init__.py +0 -0
- {hud_python-0.5.8 → hud_python-0.5.18}/hud/environment/tests/test_integrations.py +0 -0
- {hud_python-0.5.8 → hud_python-0.5.18}/hud/environment/tests/test_tools.py +0 -0
- {hud_python-0.5.8 → hud_python-0.5.18}/hud/environment/types.py +0 -0
- {hud_python-0.5.8 → hud_python-0.5.18}/hud/environment/utils/__init__.py +0 -0
- {hud_python-0.5.8 → hud_python-0.5.18}/hud/environment/utils/formats.py +0 -0
- {hud_python-0.5.8 → hud_python-0.5.18}/hud/environment/utils/schema.py +0 -0
- {hud_python-0.5.8 → hud_python-0.5.18}/hud/environment/utils/tool_wrappers.py +0 -0
- {hud_python-0.5.8 → hud_python-0.5.18}/hud/eval/__init__.py +0 -0
- {hud_python-0.5.8 → hud_python-0.5.18}/hud/eval/display.py +0 -0
- {hud_python-0.5.8 → hud_python-0.5.18}/hud/eval/instrument.py +0 -0
- {hud_python-0.5.8 → hud_python-0.5.18}/hud/eval/parallel.py +0 -0
- {hud_python-0.5.8 → hud_python-0.5.18}/hud/eval/tests/__init__.py +0 -0
- {hud_python-0.5.8 → hud_python-0.5.18}/hud/eval/tests/test_eval.py +0 -0
- {hud_python-0.5.8 → hud_python-0.5.18}/hud/eval/tests/test_manager.py +0 -0
- {hud_python-0.5.8 → hud_python-0.5.18}/hud/eval/tests/test_parallel.py +0 -0
- {hud_python-0.5.8 → hud_python-0.5.18}/hud/native/__init__.py +0 -0
- {hud_python-0.5.8 → hud_python-0.5.18}/hud/native/comparator.py +0 -0
- {hud_python-0.5.8 → hud_python-0.5.18}/hud/native/tests/__init__.py +0 -0
- {hud_python-0.5.8 → hud_python-0.5.18}/hud/native/tests/test_comparator.py +0 -0
- {hud_python-0.5.8 → hud_python-0.5.18}/hud/native/tests/test_native_init.py +0 -0
- {hud_python-0.5.8 → hud_python-0.5.18}/hud/patches/__init__.py +0 -0
- {hud_python-0.5.8 → hud_python-0.5.18}/hud/patches/warnings.py +0 -0
- {hud_python-0.5.8 → hud_python-0.5.18}/hud/py.typed +0 -0
- {hud_python-0.5.8 → hud_python-0.5.18}/hud/samples/__init__.py +0 -0
- {hud_python-0.5.8 → hud_python-0.5.18}/hud/samples/browser.py +0 -0
- {hud_python-0.5.8 → hud_python-0.5.18}/hud/server/__init__.py +0 -0
- {hud_python-0.5.8 → hud_python-0.5.18}/hud/server/context.py +0 -0
- {hud_python-0.5.8 → hud_python-0.5.18}/hud/server/helper/__init__.py +0 -0
- {hud_python-0.5.8 → hud_python-0.5.18}/hud/server/low_level.py +0 -0
- {hud_python-0.5.8 → hud_python-0.5.18}/hud/server/router.py +0 -0
- {hud_python-0.5.8 → hud_python-0.5.18}/hud/server/tests/__init__.py +0 -0
- {hud_python-0.5.8 → hud_python-0.5.18}/hud/server/tests/test_add_tool.py +0 -0
- {hud_python-0.5.8 → hud_python-0.5.18}/hud/server/tests/test_mcp_server_handlers.py +0 -0
- {hud_python-0.5.8 → hud_python-0.5.18}/hud/server/tests/test_run_wrapper.py +0 -0
- {hud_python-0.5.8 → hud_python-0.5.18}/hud/server/tests/test_server_extra.py +0 -0
- {hud_python-0.5.8 → hud_python-0.5.18}/hud/server/tests/test_sigterm_runner.py +0 -0
- {hud_python-0.5.8 → hud_python-0.5.18}/hud/settings.py +0 -0
- {hud_python-0.5.8 → hud_python-0.5.18}/hud/shared/__init__.py +0 -0
- {hud_python-0.5.8 → hud_python-0.5.18}/hud/shared/exceptions.py +0 -0
- {hud_python-0.5.8 → hud_python-0.5.18}/hud/shared/requests.py +0 -0
- {hud_python-0.5.8 → hud_python-0.5.18}/hud/shared/tests/__init__.py +0 -0
- {hud_python-0.5.8 → hud_python-0.5.18}/hud/shared/tests/test_hints.py +0 -0
- {hud_python-0.5.8 → hud_python-0.5.18}/hud/shared/tests/test_requests.py +0 -0
- {hud_python-0.5.8 → hud_python-0.5.18}/hud/telemetry/__init__.py +0 -0
- {hud_python-0.5.8 → hud_python-0.5.18}/hud/telemetry/exporter.py +0 -0
- {hud_python-0.5.8 → hud_python-0.5.18}/hud/telemetry/instrument.py +0 -0
- {hud_python-0.5.8 → hud_python-0.5.18}/hud/telemetry/tests/__init__.py +0 -0
- {hud_python-0.5.8 → hud_python-0.5.18}/hud/telemetry/tests/test_exporter.py +0 -0
- {hud_python-0.5.8 → hud_python-0.5.18}/hud/telemetry/tests/test_instrument.py +0 -0
- {hud_python-0.5.8 → hud_python-0.5.18}/hud/tools/agent.py +0 -0
- {hud_python-0.5.8 → hud_python-0.5.18}/hud/tools/computer/qwen.py +0 -0
- {hud_python-0.5.8 → hud_python-0.5.18}/hud/tools/computer/settings.py +0 -0
- {hud_python-0.5.8/hud/tools → hud_python-0.5.18/hud/tools/computer}/tests/test_computer.py +0 -0
- {hud_python-0.5.8/hud/tools → hud_python-0.5.18/hud/tools/computer}/tests/test_computer_actions.py +0 -0
- {hud_python-0.5.8 → hud_python-0.5.18}/hud/tools/executors/__init__.py +0 -0
- {hud_python-0.5.8 → hud_python-0.5.18}/hud/tools/executors/base.py +0 -0
- {hud_python-0.5.8 → hud_python-0.5.18}/hud/tools/executors/pyautogui.py +0 -0
- {hud_python-0.5.8 → hud_python-0.5.18}/hud/tools/executors/tests/__init__.py +0 -0
- {hud_python-0.5.8 → hud_python-0.5.18}/hud/tools/executors/tests/test_base_executor.py +0 -0
- {hud_python-0.5.8 → hud_python-0.5.18}/hud/tools/executors/tests/test_pyautogui_executor.py +0 -0
- {hud_python-0.5.8 → hud_python-0.5.18}/hud/tools/executors/xdo.py +0 -0
- {hud_python-0.5.8 → hud_python-0.5.18}/hud/tools/grounding/__init__.py +0 -0
- {hud_python-0.5.8 → hud_python-0.5.18}/hud/tools/grounding/config.py +0 -0
- {hud_python-0.5.8 → hud_python-0.5.18}/hud/tools/grounding/grounded_tool.py +0 -0
- {hud_python-0.5.8 → hud_python-0.5.18}/hud/tools/grounding/grounder.py +0 -0
- {hud_python-0.5.8 → hud_python-0.5.18}/hud/tools/grounding/tests/__init__.py +0 -0
- {hud_python-0.5.8 → hud_python-0.5.18}/hud/tools/grounding/tests/test_grounded_tool.py +0 -0
- {hud_python-0.5.8 → hud_python-0.5.18}/hud/tools/jupyter.py +0 -0
- {hud_python-0.5.8 → hud_python-0.5.18}/hud/tools/playwright.py +0 -0
- {hud_python-0.5.8 → hud_python-0.5.18}/hud/tools/response.py +0 -0
- {hud_python-0.5.8 → hud_python-0.5.18}/hud/tools/submit.py +0 -0
- {hud_python-0.5.8 → hud_python-0.5.18}/hud/tools/tests/__init__.py +0 -0
- {hud_python-0.5.8 → hud_python-0.5.18}/hud/tools/tests/test_agent_tool.py +0 -0
- {hud_python-0.5.8 → hud_python-0.5.18}/hud/tools/tests/test_base.py +0 -0
- {hud_python-0.5.8 → hud_python-0.5.18}/hud/tools/tests/test_jupyter_tool.py +0 -0
- {hud_python-0.5.8 → hud_python-0.5.18}/hud/tools/tests/test_playwright_tool.py +0 -0
- {hud_python-0.5.8 → hud_python-0.5.18}/hud/tools/tests/test_response.py +0 -0
- {hud_python-0.5.8 → hud_python-0.5.18}/hud/tools/tests/test_submit.py +0 -0
- {hud_python-0.5.8 → hud_python-0.5.18}/hud/tools/tests/test_tools_init.py +0 -0
- {hud_python-0.5.8 → hud_python-0.5.18}/hud/tools/tests/test_utils.py +0 -0
- {hud_python-0.5.8 → hud_python-0.5.18}/hud/tools/utils.py +0 -0
- {hud_python-0.5.8 → hud_python-0.5.18}/hud/utils/__init__.py +0 -0
- {hud_python-0.5.8 → hud_python-0.5.18}/hud/utils/env.py +0 -0
- {hud_python-0.5.8 → hud_python-0.5.18}/hud/utils/hud_console.py +0 -0
- {hud_python-0.5.8 → hud_python-0.5.18}/hud/utils/mcp.py +0 -0
- {hud_python-0.5.8 → hud_python-0.5.18}/hud/utils/pretty_errors.py +0 -0
- {hud_python-0.5.8 → hud_python-0.5.18}/hud/utils/strict_schema.py +0 -0
- {hud_python-0.5.8 → hud_python-0.5.18}/hud/utils/telemetry.py +0 -0
- {hud_python-0.5.8 → hud_python-0.5.18}/hud/utils/tests/__init__.py +0 -0
- {hud_python-0.5.8 → hud_python-0.5.18}/hud/utils/tests/test_init.py +0 -0
- {hud_python-0.5.8 → hud_python-0.5.18}/hud/utils/tests/test_mcp.py +0 -0
- {hud_python-0.5.8 → hud_python-0.5.18}/hud/utils/tests/test_pretty_errors.py +0 -0
- {hud_python-0.5.8 → hud_python-0.5.18}/hud/utils/tests/test_telemetry.py +0 -0
- {hud_python-0.5.8 → hud_python-0.5.18}/hud/utils/tests/test_tool_shorthand.py +0 -0
- {hud_python-0.5.8 → hud_python-0.5.18}/hud/utils/tool_shorthand.py +0 -0
- {hud_python-0.5.8 → hud_python-0.5.18}/hud/utils/types.py +0 -0
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: hud-python
|
|
3
|
-
Version: 0.5.
|
|
3
|
+
Version: 0.5.18
|
|
4
4
|
Summary: SDK for the HUD platform.
|
|
5
5
|
Project-URL: Homepage, https://github.com/hud-evals/hud-python
|
|
6
6
|
Project-URL: Bug Tracker, https://github.com/hud-evals/hud-python/issues
|
|
@@ -91,7 +91,7 @@ Requires-Dist: pyright==1.1.407; extra == 'dev'
|
|
|
91
91
|
Requires-Dist: pytest-asyncio; extra == 'dev'
|
|
92
92
|
Requires-Dist: pytest-cov; extra == 'dev'
|
|
93
93
|
Requires-Dist: pytest-mock; extra == 'dev'
|
|
94
|
-
Requires-Dist: pytest
|
|
94
|
+
Requires-Dist: pytest>=8.1.1; extra == 'dev'
|
|
95
95
|
Requires-Dist: ruff>=0.11.8; extra == 'dev'
|
|
96
96
|
Requires-Dist: tornado>=6.5.2; extra == 'dev'
|
|
97
97
|
Description-Content-Type: text/markdown
|
|
@@ -2,12 +2,13 @@ from __future__ import annotations
|
|
|
2
2
|
|
|
3
3
|
from typing import Any
|
|
4
4
|
|
|
5
|
-
from .base import MCPAgent
|
|
5
|
+
from .base import CategorizedTools, MCPAgent
|
|
6
6
|
from .openai import OpenAIAgent
|
|
7
7
|
from .openai_chat import OpenAIChatAgent
|
|
8
8
|
from .operator import OperatorAgent
|
|
9
9
|
|
|
10
10
|
__all__ = [
|
|
11
|
+
"CategorizedTools",
|
|
11
12
|
"MCPAgent",
|
|
12
13
|
"OpenAIAgent",
|
|
13
14
|
"OpenAIChatAgent",
|
|
@@ -47,24 +48,20 @@ def create_agent(model: str, **kwargs: Any) -> MCPAgent:
|
|
|
47
48
|
# Resolve class and gateway info
|
|
48
49
|
agent_cls, gateway_info = resolve_cls(model)
|
|
49
50
|
|
|
50
|
-
# Get model
|
|
51
|
+
# Get model name from gateway info or use input
|
|
51
52
|
model_id = model
|
|
52
53
|
if gateway_info:
|
|
53
|
-
model_id = gateway_info.get("
|
|
54
|
+
model_id = gateway_info.get("model_name") or model
|
|
54
55
|
|
|
55
56
|
# Determine provider: from gateway info, or infer from agent class
|
|
56
57
|
if gateway_info:
|
|
57
|
-
provider = gateway_info
|
|
58
|
+
provider = gateway_info["provider"]["name"]
|
|
58
59
|
else:
|
|
59
|
-
|
|
60
|
-
|
|
61
|
-
|
|
62
|
-
|
|
63
|
-
|
|
64
|
-
ClaudeAgent: "anthropic",
|
|
65
|
-
GeminiAgent: "google",
|
|
66
|
-
}
|
|
67
|
-
provider = _AGENT_TO_PROVIDER.get(agent_cls, "openai")
|
|
60
|
+
provider = "openai"
|
|
61
|
+
if agent_cls.__name__ == "ClaudeAgent":
|
|
62
|
+
provider = "anthropic"
|
|
63
|
+
elif agent_cls.__name__ in ("GeminiAgent", "GeminiCUAAgent"):
|
|
64
|
+
provider = "gemini"
|
|
68
65
|
|
|
69
66
|
client = build_gateway_client(provider)
|
|
70
67
|
|
|
@@ -6,14 +6,17 @@ import asyncio
|
|
|
6
6
|
import json
|
|
7
7
|
import logging
|
|
8
8
|
from abc import ABC, abstractmethod
|
|
9
|
+
from dataclasses import dataclass, field
|
|
9
10
|
from typing import TYPE_CHECKING, Any, ClassVar, Literal
|
|
10
11
|
|
|
11
12
|
import mcp.types as types
|
|
12
|
-
from pydantic import BaseModel, ConfigDict
|
|
13
13
|
|
|
14
|
-
from hud.
|
|
14
|
+
from hud.tools.native_types import NativeToolSpec
|
|
15
|
+
from hud.types import AgentResponse, AgentType, BaseAgentConfig, MCPToolCall, MCPToolResult, Trace
|
|
15
16
|
from hud.utils.hud_console import HUDConsole
|
|
16
17
|
|
|
18
|
+
from .types import BaseCreateParams
|
|
19
|
+
|
|
17
20
|
if TYPE_CHECKING:
|
|
18
21
|
from hud.environment import Environment
|
|
19
22
|
from hud.eval.context import EvalContext
|
|
@@ -22,16 +25,28 @@ if TYPE_CHECKING:
|
|
|
22
25
|
logger = logging.getLogger(__name__)
|
|
23
26
|
|
|
24
27
|
|
|
25
|
-
|
|
26
|
-
|
|
28
|
+
@dataclass
|
|
29
|
+
class CategorizedTools:
|
|
30
|
+
"""Result of categorizing tools by native spec availability.
|
|
31
|
+
|
|
32
|
+
Used by agents to efficiently process tools with shared logic for
|
|
33
|
+
role-based mutual exclusion.
|
|
34
|
+
"""
|
|
35
|
+
|
|
36
|
+
native: list[tuple[types.Tool, NativeToolSpec]] = field(default_factory=list)
|
|
37
|
+
"""Tools with native specs for this agent (tool, spec) pairs."""
|
|
38
|
+
|
|
39
|
+
hosted: list[tuple[types.Tool, NativeToolSpec]] = field(default_factory=list)
|
|
40
|
+
"""Hosted tools with native specs for this agent (tool, spec) pairs."""
|
|
27
41
|
|
|
28
|
-
|
|
42
|
+
generic: list[types.Tool] = field(default_factory=list)
|
|
43
|
+
"""Tools without native specs that aren't role-blocked."""
|
|
29
44
|
|
|
30
|
-
|
|
31
|
-
|
|
45
|
+
claimed_roles: set[str] = field(default_factory=set)
|
|
46
|
+
"""Roles claimed by native tools."""
|
|
32
47
|
|
|
33
|
-
|
|
34
|
-
|
|
48
|
+
skipped: list[tuple[types.Tool, str]] = field(default_factory=list)
|
|
49
|
+
"""Tools skipped due to role conflicts (tool, reason) pairs."""
|
|
35
50
|
|
|
36
51
|
|
|
37
52
|
class MCPAgent(ABC):
|
|
@@ -52,6 +67,185 @@ class MCPAgent(ABC):
|
|
|
52
67
|
required_tools: ClassVar[list[str]] = [] # Tools that must be available
|
|
53
68
|
config_cls: ClassVar[type[BaseAgentConfig]] = BaseAgentConfig
|
|
54
69
|
|
|
70
|
+
@classmethod
|
|
71
|
+
@abstractmethod
|
|
72
|
+
def agent_type(cls) -> AgentType:
|
|
73
|
+
"""Return the AgentType for this agent.
|
|
74
|
+
|
|
75
|
+
Subclasses must implement this to return their corresponding AgentType enum value.
|
|
76
|
+
This is used for resolving native tool specifications.
|
|
77
|
+
|
|
78
|
+
Returns:
|
|
79
|
+
AgentType enum value for this agent
|
|
80
|
+
"""
|
|
81
|
+
raise NotImplementedError
|
|
82
|
+
|
|
83
|
+
def resolve_native_spec(self, tool: types.Tool) -> NativeToolSpec | None:
|
|
84
|
+
"""Check if a tool has a native spec for this agent type and model.
|
|
85
|
+
|
|
86
|
+
Looks up the tool's meta.native_tools field for a spec matching this agent's type.
|
|
87
|
+
If found, validates that the current model supports this native spec.
|
|
88
|
+
Returns a NativeToolSpec that can be used to register the tool with
|
|
89
|
+
the provider's native API format.
|
|
90
|
+
|
|
91
|
+
Falls back to legacy name-based detection for backwards compatibility with
|
|
92
|
+
old environments that don't emit native_tools metadata.
|
|
93
|
+
|
|
94
|
+
Args:
|
|
95
|
+
tool: MCP Tool object to check for native specs
|
|
96
|
+
|
|
97
|
+
Returns:
|
|
98
|
+
NativeToolSpec if the tool has a native spec for this agent and the
|
|
99
|
+
current model supports it, None otherwise. When the model doesn't
|
|
100
|
+
match supported_models, returns None so the tool falls back to
|
|
101
|
+
generic function calling.
|
|
102
|
+
"""
|
|
103
|
+
spec: NativeToolSpec | None = None
|
|
104
|
+
|
|
105
|
+
# First try metadata-based resolution
|
|
106
|
+
if tool.meta:
|
|
107
|
+
native_tools = tool.meta.get("native_tools", {})
|
|
108
|
+
spec_dict = native_tools.get(self.agent_type().value)
|
|
109
|
+
|
|
110
|
+
if spec_dict and isinstance(spec_dict, dict):
|
|
111
|
+
# Extract known fields and put the rest in extra
|
|
112
|
+
known_fields = {
|
|
113
|
+
"api_type",
|
|
114
|
+
"api_name",
|
|
115
|
+
"beta",
|
|
116
|
+
"hosted",
|
|
117
|
+
"role",
|
|
118
|
+
"supported_models",
|
|
119
|
+
}
|
|
120
|
+
extra = {k: v for k, v in spec_dict.items() if k not in known_fields}
|
|
121
|
+
|
|
122
|
+
# Convert supported_models list to tuple for frozen model
|
|
123
|
+
supported_models_raw = spec_dict.get("supported_models")
|
|
124
|
+
supported_models: tuple[str, ...] | None = None
|
|
125
|
+
if supported_models_raw:
|
|
126
|
+
supported_models = tuple(supported_models_raw)
|
|
127
|
+
|
|
128
|
+
spec = NativeToolSpec(
|
|
129
|
+
api_type=spec_dict.get("api_type"),
|
|
130
|
+
api_name=spec_dict.get("api_name"),
|
|
131
|
+
beta=spec_dict.get("beta"),
|
|
132
|
+
hosted=spec_dict.get("hosted", False),
|
|
133
|
+
role=spec_dict.get("role"),
|
|
134
|
+
supported_models=supported_models,
|
|
135
|
+
extra=extra,
|
|
136
|
+
)
|
|
137
|
+
|
|
138
|
+
# Fall back to legacy name-based detection for old environments
|
|
139
|
+
if spec is None:
|
|
140
|
+
spec = self._legacy_native_spec_fallback(tool)
|
|
141
|
+
|
|
142
|
+
# Check if current model supports this native spec
|
|
143
|
+
if spec is not None and not spec.supports_model(self.model):
|
|
144
|
+
logger.debug(
|
|
145
|
+
"Model %s not in supported_models for native spec %s, falling back to functions",
|
|
146
|
+
self.model,
|
|
147
|
+
spec.api_type,
|
|
148
|
+
)
|
|
149
|
+
return None
|
|
150
|
+
|
|
151
|
+
return spec
|
|
152
|
+
|
|
153
|
+
def _legacy_native_spec_fallback(self, tool: types.Tool) -> NativeToolSpec | None:
|
|
154
|
+
"""Detect native tools by name for backwards compatibility.
|
|
155
|
+
|
|
156
|
+
Override in subclasses to support old environments that expose tools
|
|
157
|
+
without native_tools metadata.
|
|
158
|
+
|
|
159
|
+
Args:
|
|
160
|
+
tool: MCP Tool object to check
|
|
161
|
+
|
|
162
|
+
Returns:
|
|
163
|
+
NativeToolSpec if the tool matches a known legacy pattern, None otherwise
|
|
164
|
+
"""
|
|
165
|
+
return None
|
|
166
|
+
|
|
167
|
+
def get_tool_role(self, tool: types.Tool) -> str | None:
|
|
168
|
+
"""Get the role of a tool from any of its native specs.
|
|
169
|
+
|
|
170
|
+
The role is used for mutual exclusion - when an agent accepts a tool
|
|
171
|
+
natively, other tools with the same role are excluded.
|
|
172
|
+
|
|
173
|
+
Args:
|
|
174
|
+
tool: MCP Tool object to check
|
|
175
|
+
|
|
176
|
+
Returns:
|
|
177
|
+
The role string if any native spec defines one, None otherwise
|
|
178
|
+
"""
|
|
179
|
+
if not tool.meta:
|
|
180
|
+
return None
|
|
181
|
+
|
|
182
|
+
native_tools = tool.meta.get("native_tools", {})
|
|
183
|
+
if not native_tools:
|
|
184
|
+
return None
|
|
185
|
+
|
|
186
|
+
# Check all specs for a role (they should all have the same role)
|
|
187
|
+
for spec_dict in native_tools.values():
|
|
188
|
+
if isinstance(spec_dict, dict) and spec_dict.get("role"):
|
|
189
|
+
return spec_dict["role"]
|
|
190
|
+
|
|
191
|
+
return None
|
|
192
|
+
|
|
193
|
+
def categorize_tools(self, tools: list[types.Tool] | None = None) -> CategorizedTools:
|
|
194
|
+
"""Categorize tools by native spec availability with role-based exclusion.
|
|
195
|
+
|
|
196
|
+
This shared method implements the two-pass tool processing logic:
|
|
197
|
+
1. First pass: identify native/hosted tools and claim their roles
|
|
198
|
+
2. Second pass: include generic tools if their role isn't claimed
|
|
199
|
+
|
|
200
|
+
Args:
|
|
201
|
+
tools: List of MCP tools to categorize. If None, uses get_available_tools()
|
|
202
|
+
|
|
203
|
+
Returns:
|
|
204
|
+
CategorizedTools with native, hosted, generic, and skipped tools
|
|
205
|
+
"""
|
|
206
|
+
if tools is None:
|
|
207
|
+
tools = self.get_available_tools()
|
|
208
|
+
|
|
209
|
+
result = CategorizedTools()
|
|
210
|
+
|
|
211
|
+
# First pass: process tools with native specs for this agent
|
|
212
|
+
for tool in tools:
|
|
213
|
+
spec = self.resolve_native_spec(tool)
|
|
214
|
+
if not spec:
|
|
215
|
+
continue
|
|
216
|
+
|
|
217
|
+
# Check for role conflicts between native tools
|
|
218
|
+
if spec.role:
|
|
219
|
+
if spec.role in result.claimed_roles:
|
|
220
|
+
# Another native tool already claimed this role - skip this one
|
|
221
|
+
result.skipped.append(
|
|
222
|
+
(tool, f"role '{spec.role}' already claimed by another native tool")
|
|
223
|
+
)
|
|
224
|
+
continue
|
|
225
|
+
result.claimed_roles.add(spec.role)
|
|
226
|
+
|
|
227
|
+
if spec.hosted:
|
|
228
|
+
result.hosted.append((tool, spec))
|
|
229
|
+
else:
|
|
230
|
+
result.native.append((tool, spec))
|
|
231
|
+
|
|
232
|
+
# Second pass: process tools without native specs (generic function tools)
|
|
233
|
+
for tool in tools:
|
|
234
|
+
spec = self.resolve_native_spec(tool)
|
|
235
|
+
if spec:
|
|
236
|
+
# Already processed in first pass
|
|
237
|
+
continue
|
|
238
|
+
|
|
239
|
+
# Check if this tool's role is already claimed by a native tool
|
|
240
|
+
tool_role = self.get_tool_role(tool)
|
|
241
|
+
if tool_role and tool_role in result.claimed_roles:
|
|
242
|
+
result.skipped.append((tool, f"role '{tool_role}' already claimed by native tool"))
|
|
243
|
+
continue
|
|
244
|
+
|
|
245
|
+
result.generic.append(tool)
|
|
246
|
+
|
|
247
|
+
return result
|
|
248
|
+
|
|
55
249
|
def __init__(self, params: BaseCreateParams | None = None, **kwargs: Any) -> None:
|
|
56
250
|
if params is None:
|
|
57
251
|
import warnings
|
|
@@ -129,8 +323,8 @@ class MCPAgent(ABC):
|
|
|
129
323
|
f"Available tools: {sorted(available_tool_names)}"
|
|
130
324
|
)
|
|
131
325
|
|
|
132
|
-
self.console.
|
|
133
|
-
f"
|
|
326
|
+
self.console.debug(
|
|
327
|
+
f"Discovered {len(self._available_tools)} tools from environment: "
|
|
134
328
|
f"{', '.join([t.name for t in self._available_tools])}"
|
|
135
329
|
)
|
|
136
330
|
|
|
@@ -208,7 +402,21 @@ class MCPAgent(ABC):
|
|
|
208
402
|
await self._initialize_from_ctx(ctx)
|
|
209
403
|
|
|
210
404
|
try:
|
|
211
|
-
|
|
405
|
+
# Build initial context - optionally append setup tool output
|
|
406
|
+
# Check ctx first (task-level override), then fall back to agent config
|
|
407
|
+
append_setup = getattr(ctx, "append_setup_output", False) or getattr(
|
|
408
|
+
self.config, "append_setup_output", False
|
|
409
|
+
)
|
|
410
|
+
initial_prompt = ctx.prompt
|
|
411
|
+
if append_setup:
|
|
412
|
+
setup_output = getattr(ctx, "setup_output", None)
|
|
413
|
+
if setup_output:
|
|
414
|
+
initial_prompt = f"{initial_prompt}\n\n{setup_output}"
|
|
415
|
+
|
|
416
|
+
# Build initial blocks (text prompt + optional screenshot)
|
|
417
|
+
initial_blocks = text_to_blocks(initial_prompt)
|
|
418
|
+
|
|
419
|
+
result = await self._run_context(initial_blocks, max_steps=max_steps)
|
|
212
420
|
|
|
213
421
|
# Propagate error state to context for platform visibility
|
|
214
422
|
if result.isError and hasattr(ctx, "error"):
|
|
@@ -342,8 +550,17 @@ class MCPAgent(ABC):
|
|
|
342
550
|
is_error = False
|
|
343
551
|
|
|
344
552
|
# Ensure all parameters are the correct type
|
|
553
|
+
# Use ctx.reward if already set (e.g., from scenario evaluate), otherwise 0.0
|
|
554
|
+
# Note: For v4 tasks with evaluate_tool, reward is set in __aexit__ after this returns,
|
|
555
|
+
# so callers should prefer ctx.reward over Trace.reward for the final result.
|
|
556
|
+
reward = 0.0
|
|
557
|
+
if self.ctx is not None:
|
|
558
|
+
ctx_reward = getattr(self.ctx, "reward", None)
|
|
559
|
+
if ctx_reward is not None:
|
|
560
|
+
reward = ctx_reward
|
|
561
|
+
|
|
345
562
|
trace_params = {
|
|
346
|
-
"reward":
|
|
563
|
+
"reward": reward,
|
|
347
564
|
"done": True,
|
|
348
565
|
"messages": messages,
|
|
349
566
|
"content": final_response.content if final_response else error,
|
|
@@ -519,8 +736,14 @@ def find_reward(result: MCPToolResult) -> float:
|
|
|
519
736
|
|
|
520
737
|
Agent accepts "reward", "grade", "score", or weighted subscores
|
|
521
738
|
|
|
739
|
+
If isError is True, return 0.0 (error results should not contribute positive reward).
|
|
522
740
|
If not found, return 0.0
|
|
523
741
|
"""
|
|
742
|
+
# Error results should return 0.0 - don't extract reward from error responses
|
|
743
|
+
if result.isError:
|
|
744
|
+
logger.warning("Evaluate tool returned error, using reward=0.0")
|
|
745
|
+
return 0.0
|
|
746
|
+
|
|
524
747
|
accept_keys = ["reward", "grade", "score"]
|
|
525
748
|
|
|
526
749
|
# Check for direct reward/grade/score keys
|