hud-python 0.5.8__tar.gz → 0.5.18__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (362) hide show
  1. {hud_python-0.5.8 → hud_python-0.5.18}/PKG-INFO +2 -2
  2. {hud_python-0.5.8 → hud_python-0.5.18}/hud/agents/__init__.py +10 -13
  3. {hud_python-0.5.8 → hud_python-0.5.18}/hud/agents/base.py +236 -13
  4. {hud_python-0.5.8 → hud_python-0.5.18}/hud/agents/claude.py +183 -81
  5. {hud_python-0.5.8 → hud_python-0.5.18}/hud/agents/gemini.py +134 -43
  6. {hud_python-0.5.8 → hud_python-0.5.18}/hud/agents/gemini_cua.py +48 -22
  7. {hud_python-0.5.8 → hud_python-0.5.18}/hud/agents/misc/integration_test_agent.py +6 -1
  8. {hud_python-0.5.8 → hud_python-0.5.18}/hud/agents/openai.py +162 -60
  9. {hud_python-0.5.8 → hud_python-0.5.18}/hud/agents/openai_chat.py +26 -20
  10. {hud_python-0.5.8 → hud_python-0.5.18}/hud/agents/operator.py +17 -27
  11. {hud_python-0.5.8 → hud_python-0.5.18}/hud/agents/resolver.py +8 -14
  12. {hud_python-0.5.8 → hud_python-0.5.18}/hud/agents/tests/test_base.py +137 -1
  13. {hud_python-0.5.8 → hud_python-0.5.18}/hud/agents/tests/test_base_runtime.py +6 -1
  14. {hud_python-0.5.8 → hud_python-0.5.18}/hud/agents/tests/test_claude.py +3 -0
  15. hud_python-0.5.18/hud/agents/tests/test_resolver.py +284 -0
  16. {hud_python-0.5.8 → hud_python-0.5.18}/hud/agents/tests/test_run_eval.py +6 -1
  17. hud_python-0.5.18/hud/agents/types.py +148 -0
  18. {hud_python-0.5.8 → hud_python-0.5.18}/hud/cli/__init__.py +43 -12
  19. {hud_python-0.5.8 → hud_python-0.5.18}/hud/cli/analyze.py +24 -16
  20. {hud_python-0.5.8 → hud_python-0.5.18}/hud/cli/build.py +90 -35
  21. {hud_python-0.5.8 → hud_python-0.5.18}/hud/cli/debug.py +12 -11
  22. hud_python-0.5.18/hud/cli/deploy.py +586 -0
  23. {hud_python-0.5.8 → hud_python-0.5.18}/hud/cli/dev.py +26 -1
  24. {hud_python-0.5.8 → hud_python-0.5.18}/hud/cli/eval.py +76 -8
  25. {hud_python-0.5.8 → hud_python-0.5.18}/hud/cli/init.py +16 -4
  26. hud_python-0.5.18/hud/cli/link.py +200 -0
  27. {hud_python-0.5.8 → hud_python-0.5.18}/hud/cli/push.py +9 -0
  28. {hud_python-0.5.8 → hud_python-0.5.18}/hud/cli/tests/test_analyze.py +40 -26
  29. {hud_python-0.5.8 → hud_python-0.5.18}/hud/cli/tests/test_analyze_module.py +35 -28
  30. {hud_python-0.5.8 → hud_python-0.5.18}/hud/cli/tests/test_build.py +37 -40
  31. {hud_python-0.5.8 → hud_python-0.5.18}/hud/cli/tests/test_debug.py +31 -23
  32. hud_python-0.5.18/hud/cli/tests/test_debug_directory_mode.py +32 -0
  33. hud_python-0.5.18/hud/cli/tests/test_deploy.py +288 -0
  34. hud_python-0.5.18/hud/cli/utils/build_display.py +227 -0
  35. hud_python-0.5.18/hud/cli/utils/build_logs.py +261 -0
  36. hud_python-0.5.18/hud/cli/utils/context.py +274 -0
  37. {hud_python-0.5.8 → hud_python-0.5.18}/hud/cli/utils/environment.py +111 -5
  38. {hud_python-0.5.8 → hud_python-0.5.18}/hud/cli/utils/interactive.py +13 -16
  39. hud_python-0.5.18/hud/cli/utils/mcp.py +194 -0
  40. hud_python-0.5.18/hud/cli/utils/tests/test_environment.py +81 -0
  41. {hud_python-0.5.8 → hud_python-0.5.18}/hud/cli/utils/tests/test_interactive_module.py +6 -4
  42. hud_python-0.5.18/hud/cli/utils/validation.py +304 -0
  43. {hud_python-0.5.8 → hud_python-0.5.18}/hud/cli/utils/version_check.py +15 -1
  44. {hud_python-0.5.8 → hud_python-0.5.18}/hud/datasets/loader.py +4 -8
  45. {hud_python-0.5.8 → hud_python-0.5.18}/hud/datasets/runner.py +8 -1
  46. {hud_python-0.5.8 → hud_python-0.5.18}/hud/datasets/tests/test_loader.py +14 -14
  47. {hud_python-0.5.8 → hud_python-0.5.18}/hud/datasets/tests/test_utils.py +3 -2
  48. {hud_python-0.5.8 → hud_python-0.5.18}/hud/datasets/utils.py +16 -15
  49. {hud_python-0.5.8 → hud_python-0.5.18}/hud/environment/connection.py +51 -6
  50. {hud_python-0.5.8 → hud_python-0.5.18}/hud/environment/connectors/mcp_config.py +29 -1
  51. {hud_python-0.5.8 → hud_python-0.5.18}/hud/environment/environment.py +54 -10
  52. {hud_python-0.5.8 → hud_python-0.5.18}/hud/environment/scenarios.py +142 -32
  53. {hud_python-0.5.8 → hud_python-0.5.18}/hud/environment/tests/test_connection.py +3 -3
  54. {hud_python-0.5.8 → hud_python-0.5.18}/hud/environment/tests/test_connectors.py +10 -23
  55. {hud_python-0.5.8 → hud_python-0.5.18}/hud/environment/tests/test_environment.py +248 -0
  56. {hud_python-0.5.8 → hud_python-0.5.18}/hud/environment/tests/test_local_connectors.py +81 -40
  57. {hud_python-0.5.8 → hud_python-0.5.18}/hud/environment/tests/test_scenarios.py +350 -11
  58. {hud_python-0.5.8 → hud_python-0.5.18}/hud/eval/context.py +76 -9
  59. {hud_python-0.5.8 → hud_python-0.5.18}/hud/eval/manager.py +105 -27
  60. {hud_python-0.5.8 → hud_python-0.5.18}/hud/eval/task.py +31 -2
  61. {hud_python-0.5.8 → hud_python-0.5.18}/hud/eval/tests/test_context.py +11 -0
  62. hud_python-0.5.18/hud/eval/tests/test_task.py +291 -0
  63. {hud_python-0.5.8 → hud_python-0.5.18}/hud/eval/types.py +3 -0
  64. {hud_python-0.5.8 → hud_python-0.5.18}/hud/eval/utils.py +14 -3
  65. hud_python-0.5.18/hud/patches/mcp_patches.py +320 -0
  66. {hud_python-0.5.8 → hud_python-0.5.18}/hud/server/server.py +71 -29
  67. {hud_python-0.5.8 → hud_python-0.5.18}/hud/server/tests/test_context.py +2 -2
  68. {hud_python-0.5.8 → hud_python-0.5.18}/hud/server/tests/test_mcp_server_integration.py +36 -36
  69. {hud_python-0.5.8 → hud_python-0.5.18}/hud/server/tests/test_mcp_server_more.py +4 -4
  70. {hud_python-0.5.8 → hud_python-0.5.18}/hud/shared/hints.py +15 -0
  71. {hud_python-0.5.8 → hud_python-0.5.18}/hud/shared/tests/test_exceptions.py +1 -3
  72. {hud_python-0.5.8 → hud_python-0.5.18}/hud/telemetry/tests/test_eval_telemetry.py +7 -7
  73. hud_python-0.5.18/hud/tools/__init__.py +137 -0
  74. {hud_python-0.5.8 → hud_python-0.5.18}/hud/tools/base.py +121 -27
  75. hud_python-0.5.18/hud/tools/coding/__init__.py +64 -0
  76. {hud_python-0.5.8/hud/tools → hud_python-0.5.18/hud/tools/coding}/apply_patch.py +31 -3
  77. {hud_python-0.5.8/hud/tools → hud_python-0.5.18/hud/tools/coding}/bash.py +90 -30
  78. {hud_python-0.5.8/hud/tools → hud_python-0.5.18/hud/tools/coding}/edit.py +66 -86
  79. hud_python-0.5.18/hud/tools/coding/gemini_edit.py +252 -0
  80. hud_python-0.5.18/hud/tools/coding/gemini_shell.py +228 -0
  81. hud_python-0.5.18/hud/tools/coding/session.py +253 -0
  82. hud_python-0.5.18/hud/tools/coding/shell.py +176 -0
  83. hud_python-0.5.18/hud/tools/coding/tests/__init__.py +1 -0
  84. {hud_python-0.5.8/hud/tools → hud_python-0.5.18/hud/tools/coding}/tests/test_apply_patch.py +1 -1
  85. {hud_python-0.5.8/hud/tools → hud_python-0.5.18/hud/tools/coding}/tests/test_bash.py +49 -13
  86. {hud_python-0.5.8/hud/tools → hud_python-0.5.18/hud/tools/coding}/tests/test_bash_extended.py +2 -1
  87. {hud_python-0.5.8/hud/tools → hud_python-0.5.18/hud/tools/coding}/tests/test_edit.py +13 -28
  88. hud_python-0.5.18/hud/tools/coding/tests/test_gemini_tools.py +231 -0
  89. {hud_python-0.5.8/hud/tools → hud_python-0.5.18/hud/tools/coding}/tests/test_shell.py +19 -14
  90. hud_python-0.5.18/hud/tools/coding/utils.py +198 -0
  91. hud_python-0.5.18/hud/tools/computer/__init__.py +48 -0
  92. {hud_python-0.5.8 → hud_python-0.5.18}/hud/tools/computer/anthropic.py +33 -5
  93. {hud_python-0.5.8 → hud_python-0.5.18}/hud/tools/computer/gemini.py +43 -1
  94. {hud_python-0.5.8 → hud_python-0.5.18}/hud/tools/computer/hud.py +3 -1
  95. {hud_python-0.5.8 → hud_python-0.5.18}/hud/tools/computer/openai.py +41 -3
  96. hud_python-0.5.18/hud/tools/computer/tests/__init__.py +1 -0
  97. hud_python-0.5.18/hud/tools/filesystem/__init__.py +82 -0
  98. hud_python-0.5.18/hud/tools/filesystem/base.py +703 -0
  99. hud_python-0.5.18/hud/tools/filesystem/gemini.py +460 -0
  100. hud_python-0.5.18/hud/tools/filesystem/glob.py +128 -0
  101. hud_python-0.5.18/hud/tools/filesystem/grep.py +135 -0
  102. hud_python-0.5.18/hud/tools/filesystem/list.py +170 -0
  103. hud_python-0.5.18/hud/tools/filesystem/read.py +143 -0
  104. hud_python-0.5.18/hud/tools/filesystem/tests/__init__.py +1 -0
  105. hud_python-0.5.18/hud/tools/filesystem/tests/test_glob.py +100 -0
  106. hud_python-0.5.18/hud/tools/filesystem/tests/test_grep.py +114 -0
  107. hud_python-0.5.18/hud/tools/filesystem/tests/test_list.py +115 -0
  108. hud_python-0.5.18/hud/tools/filesystem/tests/test_read.py +132 -0
  109. hud_python-0.5.18/hud/tools/hosted/__init__.py +24 -0
  110. hud_python-0.5.18/hud/tools/hosted/base.py +54 -0
  111. hud_python-0.5.18/hud/tools/hosted/code_execution.py +75 -0
  112. hud_python-0.5.18/hud/tools/hosted/google_search.py +107 -0
  113. hud_python-0.5.18/hud/tools/hosted/url_context.py +32 -0
  114. hud_python-0.5.18/hud/tools/hosted/web_fetch.py +81 -0
  115. hud_python-0.5.18/hud/tools/hosted/web_search.py +73 -0
  116. hud_python-0.5.18/hud/tools/memory/__init__.py +50 -0
  117. hud_python-0.5.18/hud/tools/memory/base.py +222 -0
  118. hud_python-0.5.18/hud/tools/memory/claude.py +291 -0
  119. hud_python-0.5.18/hud/tools/memory/gemini.py +200 -0
  120. hud_python-0.5.18/hud/tools/memory/session.py +223 -0
  121. hud_python-0.5.18/hud/tools/memory/tests/__init__.py +1 -0
  122. hud_python-0.5.18/hud/tools/memory/tests/test_claude.py +329 -0
  123. hud_python-0.5.18/hud/tools/memory/tests/test_gemini.py +85 -0
  124. hud_python-0.5.18/hud/tools/memory/tests/test_session.py +249 -0
  125. hud_python-0.5.18/hud/tools/native_types.py +101 -0
  126. {hud_python-0.5.8 → hud_python-0.5.18}/hud/tools/tests/test_init.py +2 -1
  127. hud_python-0.5.18/hud/tools/tests/test_native_tool_e2e.py +863 -0
  128. hud_python-0.5.18/hud/tools/tests/test_native_types.py +336 -0
  129. {hud_python-0.5.8 → hud_python-0.5.18}/hud/tools/tests/test_tools.py +6 -3
  130. hud_python-0.5.18/hud/tools/tests/test_types.py +418 -0
  131. {hud_python-0.5.8 → hud_python-0.5.18}/hud/tools/types.py +76 -19
  132. {hud_python-0.5.8 → hud_python-0.5.18}/hud/types.py +58 -32
  133. {hud_python-0.5.8 → hud_python-0.5.18}/hud/utils/tests/test_version.py +1 -1
  134. {hud_python-0.5.8 → hud_python-0.5.18}/hud/version.py +1 -1
  135. {hud_python-0.5.8 → hud_python-0.5.18}/pyproject.toml +2 -2
  136. hud_python-0.5.8/hud/agents/tests/test_client.py +0 -346
  137. hud_python-0.5.8/hud/agents/tests/test_resolver.py +0 -192
  138. hud_python-0.5.8/hud/cli/utils/tests/test_environment.py +0 -42
  139. hud_python-0.5.8/hud/clients/README.md +0 -144
  140. hud_python-0.5.8/hud/clients/__init__.py +0 -18
  141. hud_python-0.5.8/hud/clients/base.py +0 -529
  142. hud_python-0.5.8/hud/clients/environment.py +0 -51
  143. hud_python-0.5.8/hud/clients/fastmcp.py +0 -230
  144. hud_python-0.5.8/hud/clients/mcp_use.py +0 -366
  145. hud_python-0.5.8/hud/clients/tests/__init__.py +0 -1
  146. hud_python-0.5.8/hud/clients/tests/test_analyze_scenarios.py +0 -206
  147. hud_python-0.5.8/hud/clients/tests/test_client_integration.py +0 -111
  148. hud_python-0.5.8/hud/clients/tests/test_fastmcp.py +0 -342
  149. hud_python-0.5.8/hud/clients/tests/test_mcp_use_retry.py +0 -378
  150. hud_python-0.5.8/hud/clients/tests/test_protocol.py +0 -194
  151. hud_python-0.5.8/hud/clients/utils/__init__.py +0 -26
  152. hud_python-0.5.8/hud/clients/utils/mcp_use_retry.py +0 -201
  153. hud_python-0.5.8/hud/clients/utils/retry.py +0 -186
  154. hud_python-0.5.8/hud/clients/utils/retry_transport.py +0 -186
  155. hud_python-0.5.8/hud/eval/tests/test_task.py +0 -145
  156. hud_python-0.5.8/hud/patches/mcp_patches.py +0 -151
  157. hud_python-0.5.8/hud/tools/__init__.py +0 -53
  158. hud_python-0.5.8/hud/tools/computer/__init__.py +0 -19
  159. hud_python-0.5.8/hud/tools/shell.py +0 -308
  160. hud_python-0.5.8/hud/tools/tests/test_types.py +0 -193
  161. {hud_python-0.5.8 → hud_python-0.5.18}/.gitignore +0 -0
  162. {hud_python-0.5.8 → hud_python-0.5.18}/LICENSE +0 -0
  163. {hud_python-0.5.8 → hud_python-0.5.18}/README.md +0 -0
  164. {hud_python-0.5.8 → hud_python-0.5.18}/examples/README.md +0 -0
  165. {hud_python-0.5.8 → hud_python-0.5.18}/hud/__init__.py +0 -0
  166. {hud_python-0.5.8 → hud_python-0.5.18}/hud/__main__.py +0 -0
  167. {hud_python-0.5.8 → hud_python-0.5.18}/hud/agents/gateway.py +0 -0
  168. {hud_python-0.5.8 → hud_python-0.5.18}/hud/agents/grounded_openai.py +0 -0
  169. {hud_python-0.5.8 → hud_python-0.5.18}/hud/agents/misc/__init__.py +0 -0
  170. {hud_python-0.5.8 → hud_python-0.5.18}/hud/agents/misc/response_agent.py +0 -0
  171. {hud_python-0.5.8 → hud_python-0.5.18}/hud/agents/tests/__init__.py +0 -0
  172. {hud_python-0.5.8 → hud_python-0.5.18}/hud/agents/tests/conftest.py +0 -0
  173. {hud_python-0.5.8 → hud_python-0.5.18}/hud/agents/tests/test_gemini.py +0 -0
  174. {hud_python-0.5.8 → hud_python-0.5.18}/hud/agents/tests/test_grounded_openai_agent.py +0 -0
  175. {hud_python-0.5.8 → hud_python-0.5.18}/hud/agents/tests/test_openai.py +0 -0
  176. {hud_python-0.5.8 → hud_python-0.5.18}/hud/agents/tests/test_operator.py +0 -0
  177. {hud_python-0.5.8 → hud_python-0.5.18}/hud/cli/__main__.py +0 -0
  178. {hud_python-0.5.8 → hud_python-0.5.18}/hud/cli/clone.py +0 -0
  179. {hud_python-0.5.8 → hud_python-0.5.18}/hud/cli/flows/__init__.py +0 -0
  180. {hud_python-0.5.8 → hud_python-0.5.18}/hud/cli/flows/dev.py +0 -0
  181. {hud_python-0.5.8 → hud_python-0.5.18}/hud/cli/flows/init.py +0 -0
  182. {hud_python-0.5.8 → hud_python-0.5.18}/hud/cli/flows/tasks.py +0 -0
  183. {hud_python-0.5.8 → hud_python-0.5.18}/hud/cli/flows/templates.py +0 -0
  184. {hud_python-0.5.8 → hud_python-0.5.18}/hud/cli/flows/tests/__init__.py +0 -0
  185. {hud_python-0.5.8 → hud_python-0.5.18}/hud/cli/flows/tests/test_dev.py +0 -0
  186. {hud_python-0.5.8 → hud_python-0.5.18}/hud/cli/get.py +0 -0
  187. {hud_python-0.5.8 → hud_python-0.5.18}/hud/cli/list_func.py +0 -0
  188. {hud_python-0.5.8 → hud_python-0.5.18}/hud/cli/pull.py +0 -0
  189. {hud_python-0.5.8 → hud_python-0.5.18}/hud/cli/remove.py +0 -0
  190. {hud_python-0.5.8 → hud_python-0.5.18}/hud/cli/rft.py +0 -0
  191. {hud_python-0.5.8 → hud_python-0.5.18}/hud/cli/rft_status.py +0 -0
  192. {hud_python-0.5.8 → hud_python-0.5.18}/hud/cli/tests/__init__.py +0 -0
  193. {hud_python-0.5.8 → hud_python-0.5.18}/hud/cli/tests/test_analyze_metadata.py +0 -0
  194. {hud_python-0.5.8 → hud_python-0.5.18}/hud/cli/tests/test_build_failure.py +0 -0
  195. {hud_python-0.5.8 → hud_python-0.5.18}/hud/cli/tests/test_build_module.py +0 -0
  196. {hud_python-0.5.8 → hud_python-0.5.18}/hud/cli/tests/test_cli_init.py +0 -0
  197. {hud_python-0.5.8 → hud_python-0.5.18}/hud/cli/tests/test_cli_main.py +0 -0
  198. {hud_python-0.5.8 → hud_python-0.5.18}/hud/cli/tests/test_cli_more_wrappers.py +0 -0
  199. {hud_python-0.5.8 → hud_python-0.5.18}/hud/cli/tests/test_cli_root.py +0 -0
  200. {hud_python-0.5.8 → hud_python-0.5.18}/hud/cli/tests/test_clone.py +0 -0
  201. {hud_python-0.5.8 → hud_python-0.5.18}/hud/cli/tests/test_convert.py +0 -0
  202. {hud_python-0.5.8 → hud_python-0.5.18}/hud/cli/tests/test_cursor.py +0 -0
  203. {hud_python-0.5.8 → hud_python-0.5.18}/hud/cli/tests/test_dev.py +0 -0
  204. {hud_python-0.5.8 → hud_python-0.5.18}/hud/cli/tests/test_eval.py +0 -0
  205. {hud_python-0.5.8 → hud_python-0.5.18}/hud/cli/tests/test_eval_bedrock.py +0 -0
  206. {hud_python-0.5.8 → hud_python-0.5.18}/hud/cli/tests/test_init.py +0 -0
  207. {hud_python-0.5.8 → hud_python-0.5.18}/hud/cli/tests/test_list_func.py +0 -0
  208. {hud_python-0.5.8 → hud_python-0.5.18}/hud/cli/tests/test_main_module.py +0 -0
  209. {hud_python-0.5.8 → hud_python-0.5.18}/hud/cli/tests/test_mcp_server.py +0 -0
  210. {hud_python-0.5.8 → hud_python-0.5.18}/hud/cli/tests/test_pull.py +0 -0
  211. {hud_python-0.5.8 → hud_python-0.5.18}/hud/cli/tests/test_push.py +0 -0
  212. {hud_python-0.5.8 → hud_python-0.5.18}/hud/cli/tests/test_push_happy.py +0 -0
  213. {hud_python-0.5.8 → hud_python-0.5.18}/hud/cli/tests/test_push_wrapper.py +0 -0
  214. {hud_python-0.5.8 → hud_python-0.5.18}/hud/cli/tests/test_registry.py +0 -0
  215. {hud_python-0.5.8 → hud_python-0.5.18}/hud/cli/tests/test_utils.py +0 -0
  216. {hud_python-0.5.8 → hud_python-0.5.18}/hud/cli/utils/__init__.py +0 -0
  217. {hud_python-0.5.8 → hud_python-0.5.18}/hud/cli/utils/celebrate.py +0 -0
  218. {hud_python-0.5.8 → hud_python-0.5.18}/hud/cli/utils/config.py +0 -0
  219. {hud_python-0.5.8 → hud_python-0.5.18}/hud/cli/utils/cursor.py +0 -0
  220. {hud_python-0.5.8 → hud_python-0.5.18}/hud/cli/utils/docker.py +0 -0
  221. {hud_python-0.5.8 → hud_python-0.5.18}/hud/cli/utils/env_check.py +0 -0
  222. {hud_python-0.5.8 → hud_python-0.5.18}/hud/cli/utils/git.py +0 -0
  223. {hud_python-0.5.8 → hud_python-0.5.18}/hud/cli/utils/local_runner.py +0 -0
  224. {hud_python-0.5.8 → hud_python-0.5.18}/hud/cli/utils/logging.py +0 -0
  225. {hud_python-0.5.8 → hud_python-0.5.18}/hud/cli/utils/metadata.py +0 -0
  226. {hud_python-0.5.8 → hud_python-0.5.18}/hud/cli/utils/package_runner.py +0 -0
  227. {hud_python-0.5.8 → hud_python-0.5.18}/hud/cli/utils/registry.py +0 -0
  228. {hud_python-0.5.8 → hud_python-0.5.18}/hud/cli/utils/remote_runner.py +0 -0
  229. {hud_python-0.5.8 → hud_python-0.5.18}/hud/cli/utils/runner.py +0 -0
  230. {hud_python-0.5.8 → hud_python-0.5.18}/hud/cli/utils/server.py +0 -0
  231. {hud_python-0.5.8 → hud_python-0.5.18}/hud/cli/utils/source_hash.py +0 -0
  232. {hud_python-0.5.8 → hud_python-0.5.18}/hud/cli/utils/tasks.py +0 -0
  233. {hud_python-0.5.8 → hud_python-0.5.18}/hud/cli/utils/tests/__init__.py +0 -0
  234. {hud_python-0.5.8 → hud_python-0.5.18}/hud/cli/utils/tests/test_config.py +0 -0
  235. {hud_python-0.5.8 → hud_python-0.5.18}/hud/cli/utils/tests/test_docker.py +0 -0
  236. {hud_python-0.5.8 → hud_python-0.5.18}/hud/cli/utils/tests/test_docker_hints.py +0 -0
  237. {hud_python-0.5.8 → hud_python-0.5.18}/hud/cli/utils/tests/test_env_check.py +0 -0
  238. {hud_python-0.5.8 → hud_python-0.5.18}/hud/cli/utils/tests/test_git.py +0 -0
  239. {hud_python-0.5.8 → hud_python-0.5.18}/hud/cli/utils/tests/test_local_runner.py +0 -0
  240. {hud_python-0.5.8 → hud_python-0.5.18}/hud/cli/utils/tests/test_logging_utils.py +0 -0
  241. {hud_python-0.5.8 → hud_python-0.5.18}/hud/cli/utils/tests/test_metadata.py +0 -0
  242. {hud_python-0.5.8 → hud_python-0.5.18}/hud/cli/utils/tests/test_package_runner.py +0 -0
  243. {hud_python-0.5.8 → hud_python-0.5.18}/hud/cli/utils/tests/test_registry_utils.py +0 -0
  244. {hud_python-0.5.8 → hud_python-0.5.18}/hud/cli/utils/tests/test_remote_runner.py +0 -0
  245. {hud_python-0.5.8 → hud_python-0.5.18}/hud/cli/utils/tests/test_runner_modules.py +0 -0
  246. {hud_python-0.5.8 → hud_python-0.5.18}/hud/cli/utils/tests/test_source_hash.py +0 -0
  247. {hud_python-0.5.8 → hud_python-0.5.18}/hud/cli/utils/tests/test_tasks.py +0 -0
  248. {hud_python-0.5.8 → hud_python-0.5.18}/hud/cli/utils/viewer.py +0 -0
  249. {hud_python-0.5.8 → hud_python-0.5.18}/hud/datasets/__init__.py +0 -0
  250. {hud_python-0.5.8 → hud_python-0.5.18}/hud/datasets/tests/__init__.py +0 -0
  251. {hud_python-0.5.8 → hud_python-0.5.18}/hud/environment/__init__.py +0 -0
  252. {hud_python-0.5.8 → hud_python-0.5.18}/hud/environment/connectors/__init__.py +0 -0
  253. {hud_python-0.5.8 → hud_python-0.5.18}/hud/environment/connectors/base.py +0 -0
  254. {hud_python-0.5.8 → hud_python-0.5.18}/hud/environment/connectors/local.py +0 -0
  255. {hud_python-0.5.8 → hud_python-0.5.18}/hud/environment/connectors/openai.py +0 -0
  256. {hud_python-0.5.8 → hud_python-0.5.18}/hud/environment/connectors/remote.py +0 -0
  257. {hud_python-0.5.8 → hud_python-0.5.18}/hud/environment/integrations/__init__.py +0 -0
  258. {hud_python-0.5.8 → hud_python-0.5.18}/hud/environment/integrations/adk.py +0 -0
  259. {hud_python-0.5.8 → hud_python-0.5.18}/hud/environment/integrations/anthropic.py +0 -0
  260. {hud_python-0.5.8 → hud_python-0.5.18}/hud/environment/integrations/gemini.py +0 -0
  261. {hud_python-0.5.8 → hud_python-0.5.18}/hud/environment/integrations/langchain.py +0 -0
  262. {hud_python-0.5.8 → hud_python-0.5.18}/hud/environment/integrations/llamaindex.py +0 -0
  263. {hud_python-0.5.8 → hud_python-0.5.18}/hud/environment/integrations/openai.py +0 -0
  264. {hud_python-0.5.8 → hud_python-0.5.18}/hud/environment/mock.py +0 -0
  265. {hud_python-0.5.8 → hud_python-0.5.18}/hud/environment/router.py +0 -0
  266. {hud_python-0.5.8 → hud_python-0.5.18}/hud/environment/tests/__init__.py +0 -0
  267. {hud_python-0.5.8 → hud_python-0.5.18}/hud/environment/tests/test_integrations.py +0 -0
  268. {hud_python-0.5.8 → hud_python-0.5.18}/hud/environment/tests/test_tools.py +0 -0
  269. {hud_python-0.5.8 → hud_python-0.5.18}/hud/environment/types.py +0 -0
  270. {hud_python-0.5.8 → hud_python-0.5.18}/hud/environment/utils/__init__.py +0 -0
  271. {hud_python-0.5.8 → hud_python-0.5.18}/hud/environment/utils/formats.py +0 -0
  272. {hud_python-0.5.8 → hud_python-0.5.18}/hud/environment/utils/schema.py +0 -0
  273. {hud_python-0.5.8 → hud_python-0.5.18}/hud/environment/utils/tool_wrappers.py +0 -0
  274. {hud_python-0.5.8 → hud_python-0.5.18}/hud/eval/__init__.py +0 -0
  275. {hud_python-0.5.8 → hud_python-0.5.18}/hud/eval/display.py +0 -0
  276. {hud_python-0.5.8 → hud_python-0.5.18}/hud/eval/instrument.py +0 -0
  277. {hud_python-0.5.8 → hud_python-0.5.18}/hud/eval/parallel.py +0 -0
  278. {hud_python-0.5.8 → hud_python-0.5.18}/hud/eval/tests/__init__.py +0 -0
  279. {hud_python-0.5.8 → hud_python-0.5.18}/hud/eval/tests/test_eval.py +0 -0
  280. {hud_python-0.5.8 → hud_python-0.5.18}/hud/eval/tests/test_manager.py +0 -0
  281. {hud_python-0.5.8 → hud_python-0.5.18}/hud/eval/tests/test_parallel.py +0 -0
  282. {hud_python-0.5.8 → hud_python-0.5.18}/hud/native/__init__.py +0 -0
  283. {hud_python-0.5.8 → hud_python-0.5.18}/hud/native/comparator.py +0 -0
  284. {hud_python-0.5.8 → hud_python-0.5.18}/hud/native/tests/__init__.py +0 -0
  285. {hud_python-0.5.8 → hud_python-0.5.18}/hud/native/tests/test_comparator.py +0 -0
  286. {hud_python-0.5.8 → hud_python-0.5.18}/hud/native/tests/test_native_init.py +0 -0
  287. {hud_python-0.5.8 → hud_python-0.5.18}/hud/patches/__init__.py +0 -0
  288. {hud_python-0.5.8 → hud_python-0.5.18}/hud/patches/warnings.py +0 -0
  289. {hud_python-0.5.8 → hud_python-0.5.18}/hud/py.typed +0 -0
  290. {hud_python-0.5.8 → hud_python-0.5.18}/hud/samples/__init__.py +0 -0
  291. {hud_python-0.5.8 → hud_python-0.5.18}/hud/samples/browser.py +0 -0
  292. {hud_python-0.5.8 → hud_python-0.5.18}/hud/server/__init__.py +0 -0
  293. {hud_python-0.5.8 → hud_python-0.5.18}/hud/server/context.py +0 -0
  294. {hud_python-0.5.8 → hud_python-0.5.18}/hud/server/helper/__init__.py +0 -0
  295. {hud_python-0.5.8 → hud_python-0.5.18}/hud/server/low_level.py +0 -0
  296. {hud_python-0.5.8 → hud_python-0.5.18}/hud/server/router.py +0 -0
  297. {hud_python-0.5.8 → hud_python-0.5.18}/hud/server/tests/__init__.py +0 -0
  298. {hud_python-0.5.8 → hud_python-0.5.18}/hud/server/tests/test_add_tool.py +0 -0
  299. {hud_python-0.5.8 → hud_python-0.5.18}/hud/server/tests/test_mcp_server_handlers.py +0 -0
  300. {hud_python-0.5.8 → hud_python-0.5.18}/hud/server/tests/test_run_wrapper.py +0 -0
  301. {hud_python-0.5.8 → hud_python-0.5.18}/hud/server/tests/test_server_extra.py +0 -0
  302. {hud_python-0.5.8 → hud_python-0.5.18}/hud/server/tests/test_sigterm_runner.py +0 -0
  303. {hud_python-0.5.8 → hud_python-0.5.18}/hud/settings.py +0 -0
  304. {hud_python-0.5.8 → hud_python-0.5.18}/hud/shared/__init__.py +0 -0
  305. {hud_python-0.5.8 → hud_python-0.5.18}/hud/shared/exceptions.py +0 -0
  306. {hud_python-0.5.8 → hud_python-0.5.18}/hud/shared/requests.py +0 -0
  307. {hud_python-0.5.8 → hud_python-0.5.18}/hud/shared/tests/__init__.py +0 -0
  308. {hud_python-0.5.8 → hud_python-0.5.18}/hud/shared/tests/test_hints.py +0 -0
  309. {hud_python-0.5.8 → hud_python-0.5.18}/hud/shared/tests/test_requests.py +0 -0
  310. {hud_python-0.5.8 → hud_python-0.5.18}/hud/telemetry/__init__.py +0 -0
  311. {hud_python-0.5.8 → hud_python-0.5.18}/hud/telemetry/exporter.py +0 -0
  312. {hud_python-0.5.8 → hud_python-0.5.18}/hud/telemetry/instrument.py +0 -0
  313. {hud_python-0.5.8 → hud_python-0.5.18}/hud/telemetry/tests/__init__.py +0 -0
  314. {hud_python-0.5.8 → hud_python-0.5.18}/hud/telemetry/tests/test_exporter.py +0 -0
  315. {hud_python-0.5.8 → hud_python-0.5.18}/hud/telemetry/tests/test_instrument.py +0 -0
  316. {hud_python-0.5.8 → hud_python-0.5.18}/hud/tools/agent.py +0 -0
  317. {hud_python-0.5.8 → hud_python-0.5.18}/hud/tools/computer/qwen.py +0 -0
  318. {hud_python-0.5.8 → hud_python-0.5.18}/hud/tools/computer/settings.py +0 -0
  319. {hud_python-0.5.8/hud/tools → hud_python-0.5.18/hud/tools/computer}/tests/test_computer.py +0 -0
  320. {hud_python-0.5.8/hud/tools → hud_python-0.5.18/hud/tools/computer}/tests/test_computer_actions.py +0 -0
  321. {hud_python-0.5.8 → hud_python-0.5.18}/hud/tools/executors/__init__.py +0 -0
  322. {hud_python-0.5.8 → hud_python-0.5.18}/hud/tools/executors/base.py +0 -0
  323. {hud_python-0.5.8 → hud_python-0.5.18}/hud/tools/executors/pyautogui.py +0 -0
  324. {hud_python-0.5.8 → hud_python-0.5.18}/hud/tools/executors/tests/__init__.py +0 -0
  325. {hud_python-0.5.8 → hud_python-0.5.18}/hud/tools/executors/tests/test_base_executor.py +0 -0
  326. {hud_python-0.5.8 → hud_python-0.5.18}/hud/tools/executors/tests/test_pyautogui_executor.py +0 -0
  327. {hud_python-0.5.8 → hud_python-0.5.18}/hud/tools/executors/xdo.py +0 -0
  328. {hud_python-0.5.8 → hud_python-0.5.18}/hud/tools/grounding/__init__.py +0 -0
  329. {hud_python-0.5.8 → hud_python-0.5.18}/hud/tools/grounding/config.py +0 -0
  330. {hud_python-0.5.8 → hud_python-0.5.18}/hud/tools/grounding/grounded_tool.py +0 -0
  331. {hud_python-0.5.8 → hud_python-0.5.18}/hud/tools/grounding/grounder.py +0 -0
  332. {hud_python-0.5.8 → hud_python-0.5.18}/hud/tools/grounding/tests/__init__.py +0 -0
  333. {hud_python-0.5.8 → hud_python-0.5.18}/hud/tools/grounding/tests/test_grounded_tool.py +0 -0
  334. {hud_python-0.5.8 → hud_python-0.5.18}/hud/tools/jupyter.py +0 -0
  335. {hud_python-0.5.8 → hud_python-0.5.18}/hud/tools/playwright.py +0 -0
  336. {hud_python-0.5.8 → hud_python-0.5.18}/hud/tools/response.py +0 -0
  337. {hud_python-0.5.8 → hud_python-0.5.18}/hud/tools/submit.py +0 -0
  338. {hud_python-0.5.8 → hud_python-0.5.18}/hud/tools/tests/__init__.py +0 -0
  339. {hud_python-0.5.8 → hud_python-0.5.18}/hud/tools/tests/test_agent_tool.py +0 -0
  340. {hud_python-0.5.8 → hud_python-0.5.18}/hud/tools/tests/test_base.py +0 -0
  341. {hud_python-0.5.8 → hud_python-0.5.18}/hud/tools/tests/test_jupyter_tool.py +0 -0
  342. {hud_python-0.5.8 → hud_python-0.5.18}/hud/tools/tests/test_playwright_tool.py +0 -0
  343. {hud_python-0.5.8 → hud_python-0.5.18}/hud/tools/tests/test_response.py +0 -0
  344. {hud_python-0.5.8 → hud_python-0.5.18}/hud/tools/tests/test_submit.py +0 -0
  345. {hud_python-0.5.8 → hud_python-0.5.18}/hud/tools/tests/test_tools_init.py +0 -0
  346. {hud_python-0.5.8 → hud_python-0.5.18}/hud/tools/tests/test_utils.py +0 -0
  347. {hud_python-0.5.8 → hud_python-0.5.18}/hud/tools/utils.py +0 -0
  348. {hud_python-0.5.8 → hud_python-0.5.18}/hud/utils/__init__.py +0 -0
  349. {hud_python-0.5.8 → hud_python-0.5.18}/hud/utils/env.py +0 -0
  350. {hud_python-0.5.8 → hud_python-0.5.18}/hud/utils/hud_console.py +0 -0
  351. {hud_python-0.5.8 → hud_python-0.5.18}/hud/utils/mcp.py +0 -0
  352. {hud_python-0.5.8 → hud_python-0.5.18}/hud/utils/pretty_errors.py +0 -0
  353. {hud_python-0.5.8 → hud_python-0.5.18}/hud/utils/strict_schema.py +0 -0
  354. {hud_python-0.5.8 → hud_python-0.5.18}/hud/utils/telemetry.py +0 -0
  355. {hud_python-0.5.8 → hud_python-0.5.18}/hud/utils/tests/__init__.py +0 -0
  356. {hud_python-0.5.8 → hud_python-0.5.18}/hud/utils/tests/test_init.py +0 -0
  357. {hud_python-0.5.8 → hud_python-0.5.18}/hud/utils/tests/test_mcp.py +0 -0
  358. {hud_python-0.5.8 → hud_python-0.5.18}/hud/utils/tests/test_pretty_errors.py +0 -0
  359. {hud_python-0.5.8 → hud_python-0.5.18}/hud/utils/tests/test_telemetry.py +0 -0
  360. {hud_python-0.5.8 → hud_python-0.5.18}/hud/utils/tests/test_tool_shorthand.py +0 -0
  361. {hud_python-0.5.8 → hud_python-0.5.18}/hud/utils/tool_shorthand.py +0 -0
  362. {hud_python-0.5.8 → hud_python-0.5.18}/hud/utils/types.py +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: hud-python
3
- Version: 0.5.8
3
+ Version: 0.5.18
4
4
  Summary: SDK for the HUD platform.
5
5
  Project-URL: Homepage, https://github.com/hud-evals/hud-python
6
6
  Project-URL: Bug Tracker, https://github.com/hud-evals/hud-python/issues
@@ -91,7 +91,7 @@ Requires-Dist: pyright==1.1.407; extra == 'dev'
91
91
  Requires-Dist: pytest-asyncio; extra == 'dev'
92
92
  Requires-Dist: pytest-cov; extra == 'dev'
93
93
  Requires-Dist: pytest-mock; extra == 'dev'
94
- Requires-Dist: pytest<9,>=8.1.1; extra == 'dev'
94
+ Requires-Dist: pytest>=8.1.1; extra == 'dev'
95
95
  Requires-Dist: ruff>=0.11.8; extra == 'dev'
96
96
  Requires-Dist: tornado>=6.5.2; extra == 'dev'
97
97
  Description-Content-Type: text/markdown
@@ -2,12 +2,13 @@ from __future__ import annotations
2
2
 
3
3
  from typing import Any
4
4
 
5
- from .base import MCPAgent
5
+ from .base import CategorizedTools, MCPAgent
6
6
  from .openai import OpenAIAgent
7
7
  from .openai_chat import OpenAIChatAgent
8
8
  from .operator import OperatorAgent
9
9
 
10
10
  __all__ = [
11
+ "CategorizedTools",
11
12
  "MCPAgent",
12
13
  "OpenAIAgent",
13
14
  "OpenAIChatAgent",
@@ -47,24 +48,20 @@ def create_agent(model: str, **kwargs: Any) -> MCPAgent:
47
48
  # Resolve class and gateway info
48
49
  agent_cls, gateway_info = resolve_cls(model)
49
50
 
50
- # Get model ID from gateway info or use input
51
+ # Get model name from gateway info or use input
51
52
  model_id = model
52
53
  if gateway_info:
53
- model_id = gateway_info.get("model") or gateway_info.get("id") or model
54
+ model_id = gateway_info.get("model_name") or model
54
55
 
55
56
  # Determine provider: from gateway info, or infer from agent class
56
57
  if gateway_info:
57
- provider = gateway_info.get("provider") or "openai"
58
+ provider = gateway_info["provider"]["name"]
58
59
  else:
59
- # Map agent class to provider for known types
60
- from hud.agents.claude import ClaudeAgent
61
- from hud.agents.gemini import GeminiAgent
62
-
63
- _AGENT_TO_PROVIDER = {
64
- ClaudeAgent: "anthropic",
65
- GeminiAgent: "google",
66
- }
67
- provider = _AGENT_TO_PROVIDER.get(agent_cls, "openai")
60
+ provider = "openai"
61
+ if agent_cls.__name__ == "ClaudeAgent":
62
+ provider = "anthropic"
63
+ elif agent_cls.__name__ in ("GeminiAgent", "GeminiCUAAgent"):
64
+ provider = "gemini"
68
65
 
69
66
  client = build_gateway_client(provider)
70
67
 
@@ -6,14 +6,17 @@ import asyncio
6
6
  import json
7
7
  import logging
8
8
  from abc import ABC, abstractmethod
9
+ from dataclasses import dataclass, field
9
10
  from typing import TYPE_CHECKING, Any, ClassVar, Literal
10
11
 
11
12
  import mcp.types as types
12
- from pydantic import BaseModel, ConfigDict
13
13
 
14
- from hud.types import AgentResponse, BaseAgentConfig, MCPToolCall, MCPToolResult, Trace
14
+ from hud.tools.native_types import NativeToolSpec
15
+ from hud.types import AgentResponse, AgentType, BaseAgentConfig, MCPToolCall, MCPToolResult, Trace
15
16
  from hud.utils.hud_console import HUDConsole
16
17
 
18
+ from .types import BaseCreateParams
19
+
17
20
  if TYPE_CHECKING:
18
21
  from hud.environment import Environment
19
22
  from hud.eval.context import EvalContext
@@ -22,16 +25,28 @@ if TYPE_CHECKING:
22
25
  logger = logging.getLogger(__name__)
23
26
 
24
27
 
25
- class BaseCreateParams(BaseModel):
26
- """Runtime parameters for agent creation."""
28
+ @dataclass
29
+ class CategorizedTools:
30
+ """Result of categorizing tools by native spec availability.
31
+
32
+ Used by agents to efficiently process tools with shared logic for
33
+ role-based mutual exclusion.
34
+ """
35
+
36
+ native: list[tuple[types.Tool, NativeToolSpec]] = field(default_factory=list)
37
+ """Tools with native specs for this agent (tool, spec) pairs."""
38
+
39
+ hosted: list[tuple[types.Tool, NativeToolSpec]] = field(default_factory=list)
40
+ """Hosted tools with native specs for this agent (tool, spec) pairs."""
27
41
 
28
- model_config = ConfigDict(arbitrary_types_allowed=True)
42
+ generic: list[types.Tool] = field(default_factory=list)
43
+ """Tools without native specs that aren't role-blocked."""
29
44
 
30
- # Primary way to bind agent to execution context (v5)
31
- ctx: Any | None = None # EvalContext or Environment - agent uses this for tool calls
45
+ claimed_roles: set[str] = field(default_factory=set)
46
+ """Roles claimed by native tools."""
32
47
 
33
- auto_respond: bool = False
34
- verbose: bool = False
48
+ skipped: list[tuple[types.Tool, str]] = field(default_factory=list)
49
+ """Tools skipped due to role conflicts (tool, reason) pairs."""
35
50
 
36
51
 
37
52
  class MCPAgent(ABC):
@@ -52,6 +67,185 @@ class MCPAgent(ABC):
52
67
  required_tools: ClassVar[list[str]] = [] # Tools that must be available
53
68
  config_cls: ClassVar[type[BaseAgentConfig]] = BaseAgentConfig
54
69
 
70
+ @classmethod
71
+ @abstractmethod
72
+ def agent_type(cls) -> AgentType:
73
+ """Return the AgentType for this agent.
74
+
75
+ Subclasses must implement this to return their corresponding AgentType enum value.
76
+ This is used for resolving native tool specifications.
77
+
78
+ Returns:
79
+ AgentType enum value for this agent
80
+ """
81
+ raise NotImplementedError
82
+
83
+ def resolve_native_spec(self, tool: types.Tool) -> NativeToolSpec | None:
84
+ """Check if a tool has a native spec for this agent type and model.
85
+
86
+ Looks up the tool's meta.native_tools field for a spec matching this agent's type.
87
+ If found, validates that the current model supports this native spec.
88
+ Returns a NativeToolSpec that can be used to register the tool with
89
+ the provider's native API format.
90
+
91
+ Falls back to legacy name-based detection for backwards compatibility with
92
+ old environments that don't emit native_tools metadata.
93
+
94
+ Args:
95
+ tool: MCP Tool object to check for native specs
96
+
97
+ Returns:
98
+ NativeToolSpec if the tool has a native spec for this agent and the
99
+ current model supports it, None otherwise. When the model doesn't
100
+ match supported_models, returns None so the tool falls back to
101
+ generic function calling.
102
+ """
103
+ spec: NativeToolSpec | None = None
104
+
105
+ # First try metadata-based resolution
106
+ if tool.meta:
107
+ native_tools = tool.meta.get("native_tools", {})
108
+ spec_dict = native_tools.get(self.agent_type().value)
109
+
110
+ if spec_dict and isinstance(spec_dict, dict):
111
+ # Extract known fields and put the rest in extra
112
+ known_fields = {
113
+ "api_type",
114
+ "api_name",
115
+ "beta",
116
+ "hosted",
117
+ "role",
118
+ "supported_models",
119
+ }
120
+ extra = {k: v for k, v in spec_dict.items() if k not in known_fields}
121
+
122
+ # Convert supported_models list to tuple for frozen model
123
+ supported_models_raw = spec_dict.get("supported_models")
124
+ supported_models: tuple[str, ...] | None = None
125
+ if supported_models_raw:
126
+ supported_models = tuple(supported_models_raw)
127
+
128
+ spec = NativeToolSpec(
129
+ api_type=spec_dict.get("api_type"),
130
+ api_name=spec_dict.get("api_name"),
131
+ beta=spec_dict.get("beta"),
132
+ hosted=spec_dict.get("hosted", False),
133
+ role=spec_dict.get("role"),
134
+ supported_models=supported_models,
135
+ extra=extra,
136
+ )
137
+
138
+ # Fall back to legacy name-based detection for old environments
139
+ if spec is None:
140
+ spec = self._legacy_native_spec_fallback(tool)
141
+
142
+ # Check if current model supports this native spec
143
+ if spec is not None and not spec.supports_model(self.model):
144
+ logger.debug(
145
+ "Model %s not in supported_models for native spec %s, falling back to functions",
146
+ self.model,
147
+ spec.api_type,
148
+ )
149
+ return None
150
+
151
+ return spec
152
+
153
+ def _legacy_native_spec_fallback(self, tool: types.Tool) -> NativeToolSpec | None:
154
+ """Detect native tools by name for backwards compatibility.
155
+
156
+ Override in subclasses to support old environments that expose tools
157
+ without native_tools metadata.
158
+
159
+ Args:
160
+ tool: MCP Tool object to check
161
+
162
+ Returns:
163
+ NativeToolSpec if the tool matches a known legacy pattern, None otherwise
164
+ """
165
+ return None
166
+
167
+ def get_tool_role(self, tool: types.Tool) -> str | None:
168
+ """Get the role of a tool from any of its native specs.
169
+
170
+ The role is used for mutual exclusion - when an agent accepts a tool
171
+ natively, other tools with the same role are excluded.
172
+
173
+ Args:
174
+ tool: MCP Tool object to check
175
+
176
+ Returns:
177
+ The role string if any native spec defines one, None otherwise
178
+ """
179
+ if not tool.meta:
180
+ return None
181
+
182
+ native_tools = tool.meta.get("native_tools", {})
183
+ if not native_tools:
184
+ return None
185
+
186
+ # Check all specs for a role (they should all have the same role)
187
+ for spec_dict in native_tools.values():
188
+ if isinstance(spec_dict, dict) and spec_dict.get("role"):
189
+ return spec_dict["role"]
190
+
191
+ return None
192
+
193
+ def categorize_tools(self, tools: list[types.Tool] | None = None) -> CategorizedTools:
194
+ """Categorize tools by native spec availability with role-based exclusion.
195
+
196
+ This shared method implements the two-pass tool processing logic:
197
+ 1. First pass: identify native/hosted tools and claim their roles
198
+ 2. Second pass: include generic tools if their role isn't claimed
199
+
200
+ Args:
201
+ tools: List of MCP tools to categorize. If None, uses get_available_tools()
202
+
203
+ Returns:
204
+ CategorizedTools with native, hosted, generic, and skipped tools
205
+ """
206
+ if tools is None:
207
+ tools = self.get_available_tools()
208
+
209
+ result = CategorizedTools()
210
+
211
+ # First pass: process tools with native specs for this agent
212
+ for tool in tools:
213
+ spec = self.resolve_native_spec(tool)
214
+ if not spec:
215
+ continue
216
+
217
+ # Check for role conflicts between native tools
218
+ if spec.role:
219
+ if spec.role in result.claimed_roles:
220
+ # Another native tool already claimed this role - skip this one
221
+ result.skipped.append(
222
+ (tool, f"role '{spec.role}' already claimed by another native tool")
223
+ )
224
+ continue
225
+ result.claimed_roles.add(spec.role)
226
+
227
+ if spec.hosted:
228
+ result.hosted.append((tool, spec))
229
+ else:
230
+ result.native.append((tool, spec))
231
+
232
+ # Second pass: process tools without native specs (generic function tools)
233
+ for tool in tools:
234
+ spec = self.resolve_native_spec(tool)
235
+ if spec:
236
+ # Already processed in first pass
237
+ continue
238
+
239
+ # Check if this tool's role is already claimed by a native tool
240
+ tool_role = self.get_tool_role(tool)
241
+ if tool_role and tool_role in result.claimed_roles:
242
+ result.skipped.append((tool, f"role '{tool_role}' already claimed by native tool"))
243
+ continue
244
+
245
+ result.generic.append(tool)
246
+
247
+ return result
248
+
55
249
  def __init__(self, params: BaseCreateParams | None = None, **kwargs: Any) -> None:
56
250
  if params is None:
57
251
  import warnings
@@ -129,8 +323,8 @@ class MCPAgent(ABC):
129
323
  f"Available tools: {sorted(available_tool_names)}"
130
324
  )
131
325
 
132
- self.console.info(
133
- f"Agent initialized with {len(self._available_tools)} tools: "
326
+ self.console.debug(
327
+ f"Discovered {len(self._available_tools)} tools from environment: "
134
328
  f"{', '.join([t.name for t in self._available_tools])}"
135
329
  )
136
330
 
@@ -208,7 +402,21 @@ class MCPAgent(ABC):
208
402
  await self._initialize_from_ctx(ctx)
209
403
 
210
404
  try:
211
- result = await self._run_context(text_to_blocks(ctx.prompt), max_steps=max_steps)
405
+ # Build initial context - optionally append setup tool output
406
+ # Check ctx first (task-level override), then fall back to agent config
407
+ append_setup = getattr(ctx, "append_setup_output", False) or getattr(
408
+ self.config, "append_setup_output", False
409
+ )
410
+ initial_prompt = ctx.prompt
411
+ if append_setup:
412
+ setup_output = getattr(ctx, "setup_output", None)
413
+ if setup_output:
414
+ initial_prompt = f"{initial_prompt}\n\n{setup_output}"
415
+
416
+ # Build initial blocks (text prompt + optional screenshot)
417
+ initial_blocks = text_to_blocks(initial_prompt)
418
+
419
+ result = await self._run_context(initial_blocks, max_steps=max_steps)
212
420
 
213
421
  # Propagate error state to context for platform visibility
214
422
  if result.isError and hasattr(ctx, "error"):
@@ -342,8 +550,17 @@ class MCPAgent(ABC):
342
550
  is_error = False
343
551
 
344
552
  # Ensure all parameters are the correct type
553
+ # Use ctx.reward if already set (e.g., from scenario evaluate), otherwise 0.0
554
+ # Note: For v4 tasks with evaluate_tool, reward is set in __aexit__ after this returns,
555
+ # so callers should prefer ctx.reward over Trace.reward for the final result.
556
+ reward = 0.0
557
+ if self.ctx is not None:
558
+ ctx_reward = getattr(self.ctx, "reward", None)
559
+ if ctx_reward is not None:
560
+ reward = ctx_reward
561
+
345
562
  trace_params = {
346
- "reward": 0.0,
563
+ "reward": reward,
347
564
  "done": True,
348
565
  "messages": messages,
349
566
  "content": final_response.content if final_response else error,
@@ -519,8 +736,14 @@ def find_reward(result: MCPToolResult) -> float:
519
736
 
520
737
  Agent accepts "reward", "grade", "score", or weighted subscores
521
738
 
739
+ If isError is True, return 0.0 (error results should not contribute positive reward).
522
740
  If not found, return 0.0
523
741
  """
742
+ # Error results should return 0.0 - don't extract reward from error responses
743
+ if result.isError:
744
+ logger.warning("Evaluate tool returned error, using reward=0.0")
745
+ return 0.0
746
+
524
747
  accept_keys = ["reward", "grade", "score"]
525
748
 
526
749
  # Check for direct reward/grade/score keys