hud-python 0.4.24__tar.gz → 0.4.26__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of hud-python might be problematic. Click here for more details.

Files changed (202) hide show
  1. {hud_python-0.4.24 → hud_python-0.4.26}/PKG-INFO +1 -1
  2. {hud_python-0.4.24 → hud_python-0.4.26}/hud/agents/base.py +36 -17
  3. {hud_python-0.4.24 → hud_python-0.4.26}/hud/agents/misc/response_agent.py +2 -1
  4. {hud_python-0.4.24 → hud_python-0.4.26}/hud/clients/mcp_use.py +13 -19
  5. hud_python-0.4.26/hud/clients/tests/test_mcp_use_retry.py +378 -0
  6. hud_python-0.4.26/hud/clients/utils/mcp_use_retry.py +201 -0
  7. {hud_python-0.4.24 → hud_python-0.4.26}/hud/datasets/execution/parallel.py +56 -64
  8. {hud_python-0.4.24 → hud_python-0.4.26}/hud/otel/config.py +19 -2
  9. {hud_python-0.4.24 → hud_python-0.4.26}/hud/utils/tests/test_version.py +1 -1
  10. {hud_python-0.4.24 → hud_python-0.4.26}/hud/version.py +1 -1
  11. {hud_python-0.4.24 → hud_python-0.4.26}/pyproject.toml +1 -1
  12. {hud_python-0.4.24 → hud_python-0.4.26}/.gitignore +0 -0
  13. {hud_python-0.4.24 → hud_python-0.4.26}/LICENSE +0 -0
  14. {hud_python-0.4.24 → hud_python-0.4.26}/README.md +0 -0
  15. {hud_python-0.4.24 → hud_python-0.4.26}/environments/README.md +0 -0
  16. {hud_python-0.4.24 → hud_python-0.4.26}/environments/browser/README.md +0 -0
  17. {hud_python-0.4.24 → hud_python-0.4.26}/environments/browser/apps/2048/README.md +0 -0
  18. {hud_python-0.4.24 → hud_python-0.4.26}/environments/browser/apps/2048/backend/pyproject.toml +0 -0
  19. {hud_python-0.4.24 → hud_python-0.4.26}/environments/browser/apps/README.md +0 -0
  20. {hud_python-0.4.24 → hud_python-0.4.26}/environments/browser/apps/todo/README.md +0 -0
  21. {hud_python-0.4.24 → hud_python-0.4.26}/environments/browser/apps/todo/backend/pyproject.toml +0 -0
  22. {hud_python-0.4.24 → hud_python-0.4.26}/environments/browser/pyproject.toml +0 -0
  23. {hud_python-0.4.24 → hud_python-0.4.26}/environments/remote_browser/README.md +0 -0
  24. {hud_python-0.4.24 → hud_python-0.4.26}/environments/remote_browser/pyproject.toml +0 -0
  25. {hud_python-0.4.24 → hud_python-0.4.26}/environments/remote_browser/src/hud_controller/providers/README.md +0 -0
  26. {hud_python-0.4.24 → hud_python-0.4.26}/environments/text_2048/README.md +0 -0
  27. {hud_python-0.4.24 → hud_python-0.4.26}/environments/text_2048/pyproject.toml +0 -0
  28. {hud_python-0.4.24 → hud_python-0.4.26}/examples/README.md +0 -0
  29. {hud_python-0.4.24 → hud_python-0.4.26}/hud/__init__.py +0 -0
  30. {hud_python-0.4.24 → hud_python-0.4.26}/hud/__main__.py +0 -0
  31. {hud_python-0.4.24 → hud_python-0.4.26}/hud/agents/__init__.py +0 -0
  32. {hud_python-0.4.24 → hud_python-0.4.26}/hud/agents/claude.py +0 -0
  33. {hud_python-0.4.24 → hud_python-0.4.26}/hud/agents/grounded_openai.py +0 -0
  34. {hud_python-0.4.24 → hud_python-0.4.26}/hud/agents/langchain.py +0 -0
  35. {hud_python-0.4.24 → hud_python-0.4.26}/hud/agents/misc/__init__.py +0 -0
  36. {hud_python-0.4.24 → hud_python-0.4.26}/hud/agents/openai.py +0 -0
  37. {hud_python-0.4.24 → hud_python-0.4.26}/hud/agents/openai_chat_generic.py +0 -0
  38. {hud_python-0.4.24 → hud_python-0.4.26}/hud/agents/tests/__init__.py +0 -0
  39. {hud_python-0.4.24 → hud_python-0.4.26}/hud/agents/tests/test_base.py +0 -0
  40. {hud_python-0.4.24 → hud_python-0.4.26}/hud/agents/tests/test_claude.py +0 -0
  41. {hud_python-0.4.24 → hud_python-0.4.26}/hud/agents/tests/test_client.py +0 -0
  42. {hud_python-0.4.24 → hud_python-0.4.26}/hud/agents/tests/test_grounded_openai_agent.py +0 -0
  43. {hud_python-0.4.24 → hud_python-0.4.26}/hud/agents/tests/test_openai.py +0 -0
  44. {hud_python-0.4.24 → hud_python-0.4.26}/hud/cli/__init__.py +0 -0
  45. {hud_python-0.4.24 → hud_python-0.4.26}/hud/cli/__main__.py +0 -0
  46. {hud_python-0.4.24 → hud_python-0.4.26}/hud/cli/analyze.py +0 -0
  47. {hud_python-0.4.24 → hud_python-0.4.26}/hud/cli/build.py +0 -0
  48. {hud_python-0.4.24 → hud_python-0.4.26}/hud/cli/clone.py +0 -0
  49. {hud_python-0.4.24 → hud_python-0.4.26}/hud/cli/debug.py +0 -0
  50. {hud_python-0.4.24 → hud_python-0.4.26}/hud/cli/dev.py +0 -0
  51. {hud_python-0.4.24 → hud_python-0.4.26}/hud/cli/eval.py +0 -0
  52. {hud_python-0.4.24 → hud_python-0.4.26}/hud/cli/hf.py +0 -0
  53. {hud_python-0.4.24 → hud_python-0.4.26}/hud/cli/init.py +0 -0
  54. {hud_python-0.4.24 → hud_python-0.4.26}/hud/cli/list_func.py +0 -0
  55. {hud_python-0.4.24 → hud_python-0.4.26}/hud/cli/pull.py +0 -0
  56. {hud_python-0.4.24 → hud_python-0.4.26}/hud/cli/push.py +0 -0
  57. {hud_python-0.4.24 → hud_python-0.4.26}/hud/cli/remove.py +0 -0
  58. {hud_python-0.4.24 → hud_python-0.4.26}/hud/cli/rl/README.md +0 -0
  59. {hud_python-0.4.24 → hud_python-0.4.26}/hud/cli/rl/__init__.py +0 -0
  60. {hud_python-0.4.24 → hud_python-0.4.26}/hud/cli/rl/init.py +0 -0
  61. {hud_python-0.4.24 → hud_python-0.4.26}/hud/cli/rl/pod.py +0 -0
  62. {hud_python-0.4.24 → hud_python-0.4.26}/hud/cli/rl/ssh.py +0 -0
  63. {hud_python-0.4.24 → hud_python-0.4.26}/hud/cli/rl/train.py +0 -0
  64. {hud_python-0.4.24 → hud_python-0.4.26}/hud/cli/rl/utils.py +0 -0
  65. {hud_python-0.4.24 → hud_python-0.4.26}/hud/cli/tests/__init__.py +0 -0
  66. {hud_python-0.4.24 → hud_python-0.4.26}/hud/cli/tests/test_analyze.py +0 -0
  67. {hud_python-0.4.24 → hud_python-0.4.26}/hud/cli/tests/test_analyze_metadata.py +0 -0
  68. {hud_python-0.4.24 → hud_python-0.4.26}/hud/cli/tests/test_build.py +0 -0
  69. {hud_python-0.4.24 → hud_python-0.4.26}/hud/cli/tests/test_cli_init.py +0 -0
  70. {hud_python-0.4.24 → hud_python-0.4.26}/hud/cli/tests/test_cli_main.py +0 -0
  71. {hud_python-0.4.24 → hud_python-0.4.26}/hud/cli/tests/test_clone.py +0 -0
  72. {hud_python-0.4.24 → hud_python-0.4.26}/hud/cli/tests/test_cursor.py +0 -0
  73. {hud_python-0.4.24 → hud_python-0.4.26}/hud/cli/tests/test_debug.py +0 -0
  74. {hud_python-0.4.24 → hud_python-0.4.26}/hud/cli/tests/test_list_func.py +0 -0
  75. {hud_python-0.4.24 → hud_python-0.4.26}/hud/cli/tests/test_main_module.py +0 -0
  76. {hud_python-0.4.24 → hud_python-0.4.26}/hud/cli/tests/test_mcp_server.py +0 -0
  77. {hud_python-0.4.24 → hud_python-0.4.26}/hud/cli/tests/test_pull.py +0 -0
  78. {hud_python-0.4.24 → hud_python-0.4.26}/hud/cli/tests/test_push.py +0 -0
  79. {hud_python-0.4.24 → hud_python-0.4.26}/hud/cli/tests/test_registry.py +0 -0
  80. {hud_python-0.4.24 → hud_python-0.4.26}/hud/cli/tests/test_utils.py +0 -0
  81. {hud_python-0.4.24 → hud_python-0.4.26}/hud/cli/utils/__init__.py +0 -0
  82. {hud_python-0.4.24 → hud_python-0.4.26}/hud/cli/utils/cursor.py +0 -0
  83. {hud_python-0.4.24 → hud_python-0.4.26}/hud/cli/utils/docker.py +0 -0
  84. {hud_python-0.4.24 → hud_python-0.4.26}/hud/cli/utils/environment.py +0 -0
  85. {hud_python-0.4.24 → hud_python-0.4.26}/hud/cli/utils/interactive.py +0 -0
  86. {hud_python-0.4.24 → hud_python-0.4.26}/hud/cli/utils/logging.py +0 -0
  87. {hud_python-0.4.24 → hud_python-0.4.26}/hud/cli/utils/metadata.py +0 -0
  88. {hud_python-0.4.24 → hud_python-0.4.26}/hud/cli/utils/registry.py +0 -0
  89. {hud_python-0.4.24 → hud_python-0.4.26}/hud/cli/utils/remote_runner.py +0 -0
  90. {hud_python-0.4.24 → hud_python-0.4.26}/hud/cli/utils/runner.py +0 -0
  91. {hud_python-0.4.24 → hud_python-0.4.26}/hud/cli/utils/server.py +0 -0
  92. {hud_python-0.4.24 → hud_python-0.4.26}/hud/clients/README.md +0 -0
  93. {hud_python-0.4.24 → hud_python-0.4.26}/hud/clients/__init__.py +0 -0
  94. {hud_python-0.4.24 → hud_python-0.4.26}/hud/clients/base.py +0 -0
  95. {hud_python-0.4.24 → hud_python-0.4.26}/hud/clients/fastmcp.py +0 -0
  96. {hud_python-0.4.24 → hud_python-0.4.26}/hud/clients/tests/__init__.py +0 -0
  97. {hud_python-0.4.24 → hud_python-0.4.26}/hud/clients/tests/test_client_integration.py +0 -0
  98. {hud_python-0.4.24 → hud_python-0.4.26}/hud/clients/tests/test_fastmcp.py +0 -0
  99. {hud_python-0.4.24 → hud_python-0.4.26}/hud/clients/tests/test_protocol.py +0 -0
  100. {hud_python-0.4.24 → hud_python-0.4.26}/hud/clients/utils/__init__.py +0 -0
  101. {hud_python-0.4.24 → hud_python-0.4.26}/hud/clients/utils/retry.py +0 -0
  102. {hud_python-0.4.24 → hud_python-0.4.26}/hud/clients/utils/retry_transport.py +0 -0
  103. {hud_python-0.4.24 → hud_python-0.4.26}/hud/datasets/__init__.py +0 -0
  104. {hud_python-0.4.24 → hud_python-0.4.26}/hud/datasets/execution/__init__.py +0 -0
  105. {hud_python-0.4.24 → hud_python-0.4.26}/hud/datasets/execution/runner.py +0 -0
  106. {hud_python-0.4.24 → hud_python-0.4.26}/hud/datasets/task.py +0 -0
  107. {hud_python-0.4.24 → hud_python-0.4.26}/hud/datasets/utils.py +0 -0
  108. {hud_python-0.4.24 → hud_python-0.4.26}/hud/misc/__init__.py +0 -0
  109. {hud_python-0.4.24 → hud_python-0.4.26}/hud/misc/claude_plays_pokemon.py +0 -0
  110. {hud_python-0.4.24 → hud_python-0.4.26}/hud/native/__init__.py +0 -0
  111. {hud_python-0.4.24 → hud_python-0.4.26}/hud/native/comparator.py +0 -0
  112. {hud_python-0.4.24 → hud_python-0.4.26}/hud/native/tests/__init__.py +0 -0
  113. {hud_python-0.4.24 → hud_python-0.4.26}/hud/native/tests/test_comparator.py +0 -0
  114. {hud_python-0.4.24 → hud_python-0.4.26}/hud/native/tests/test_native_init.py +0 -0
  115. {hud_python-0.4.24 → hud_python-0.4.26}/hud/otel/__init__.py +0 -0
  116. {hud_python-0.4.24 → hud_python-0.4.26}/hud/otel/collector.py +0 -0
  117. {hud_python-0.4.24 → hud_python-0.4.26}/hud/otel/context.py +0 -0
  118. {hud_python-0.4.24 → hud_python-0.4.26}/hud/otel/exporters.py +0 -0
  119. {hud_python-0.4.24 → hud_python-0.4.26}/hud/otel/instrumentation.py +0 -0
  120. {hud_python-0.4.24 → hud_python-0.4.26}/hud/otel/processors.py +0 -0
  121. {hud_python-0.4.24 → hud_python-0.4.26}/hud/otel/tests/__init__.py +0 -0
  122. {hud_python-0.4.24 → hud_python-0.4.26}/hud/otel/tests/test_processors.py +0 -0
  123. {hud_python-0.4.24 → hud_python-0.4.26}/hud/py.typed +0 -0
  124. {hud_python-0.4.24 → hud_python-0.4.26}/hud/server/__init__.py +0 -0
  125. {hud_python-0.4.24 → hud_python-0.4.26}/hud/server/context.py +0 -0
  126. {hud_python-0.4.24 → hud_python-0.4.26}/hud/server/helper/__init__.py +0 -0
  127. {hud_python-0.4.24 → hud_python-0.4.26}/hud/server/low_level.py +0 -0
  128. {hud_python-0.4.24 → hud_python-0.4.26}/hud/server/server.py +0 -0
  129. {hud_python-0.4.24 → hud_python-0.4.26}/hud/server/tests/__init__.py +0 -0
  130. {hud_python-0.4.24 → hud_python-0.4.26}/hud/settings.py +0 -0
  131. {hud_python-0.4.24 → hud_python-0.4.26}/hud/shared/__init__.py +0 -0
  132. {hud_python-0.4.24 → hud_python-0.4.26}/hud/shared/exceptions.py +0 -0
  133. {hud_python-0.4.24 → hud_python-0.4.26}/hud/shared/hints.py +0 -0
  134. {hud_python-0.4.24 → hud_python-0.4.26}/hud/shared/requests.py +0 -0
  135. {hud_python-0.4.24 → hud_python-0.4.26}/hud/shared/tests/__init__.py +0 -0
  136. {hud_python-0.4.24 → hud_python-0.4.26}/hud/shared/tests/test_exceptions.py +0 -0
  137. {hud_python-0.4.24 → hud_python-0.4.26}/hud/shared/tests/test_requests.py +0 -0
  138. {hud_python-0.4.24 → hud_python-0.4.26}/hud/telemetry/__init__.py +0 -0
  139. {hud_python-0.4.24 → hud_python-0.4.26}/hud/telemetry/instrument.py +0 -0
  140. {hud_python-0.4.24 → hud_python-0.4.26}/hud/telemetry/job.py +0 -0
  141. {hud_python-0.4.24 → hud_python-0.4.26}/hud/telemetry/replay.py +0 -0
  142. {hud_python-0.4.24 → hud_python-0.4.26}/hud/telemetry/tests/__init__.py +0 -0
  143. {hud_python-0.4.24 → hud_python-0.4.26}/hud/telemetry/tests/test_replay.py +0 -0
  144. {hud_python-0.4.24 → hud_python-0.4.26}/hud/telemetry/tests/test_trace.py +0 -0
  145. {hud_python-0.4.24 → hud_python-0.4.26}/hud/telemetry/trace.py +0 -0
  146. {hud_python-0.4.24 → hud_python-0.4.26}/hud/tools/__init__.py +0 -0
  147. {hud_python-0.4.24 → hud_python-0.4.26}/hud/tools/base.py +0 -0
  148. {hud_python-0.4.24 → hud_python-0.4.26}/hud/tools/bash.py +0 -0
  149. {hud_python-0.4.24 → hud_python-0.4.26}/hud/tools/computer/__init__.py +0 -0
  150. {hud_python-0.4.24 → hud_python-0.4.26}/hud/tools/computer/anthropic.py +0 -0
  151. {hud_python-0.4.24 → hud_python-0.4.26}/hud/tools/computer/hud.py +0 -0
  152. {hud_python-0.4.24 → hud_python-0.4.26}/hud/tools/computer/openai.py +0 -0
  153. {hud_python-0.4.24 → hud_python-0.4.26}/hud/tools/computer/settings.py +0 -0
  154. {hud_python-0.4.24 → hud_python-0.4.26}/hud/tools/edit.py +0 -0
  155. {hud_python-0.4.24 → hud_python-0.4.26}/hud/tools/executors/__init__.py +0 -0
  156. {hud_python-0.4.24 → hud_python-0.4.26}/hud/tools/executors/base.py +0 -0
  157. {hud_python-0.4.24 → hud_python-0.4.26}/hud/tools/executors/pyautogui.py +0 -0
  158. {hud_python-0.4.24 → hud_python-0.4.26}/hud/tools/executors/tests/__init__.py +0 -0
  159. {hud_python-0.4.24 → hud_python-0.4.26}/hud/tools/executors/tests/test_base_executor.py +0 -0
  160. {hud_python-0.4.24 → hud_python-0.4.26}/hud/tools/executors/tests/test_pyautogui_executor.py +0 -0
  161. {hud_python-0.4.24 → hud_python-0.4.26}/hud/tools/executors/xdo.py +0 -0
  162. {hud_python-0.4.24 → hud_python-0.4.26}/hud/tools/grounding/__init__.py +0 -0
  163. {hud_python-0.4.24 → hud_python-0.4.26}/hud/tools/grounding/config.py +0 -0
  164. {hud_python-0.4.24 → hud_python-0.4.26}/hud/tools/grounding/grounded_tool.py +0 -0
  165. {hud_python-0.4.24 → hud_python-0.4.26}/hud/tools/grounding/grounder.py +0 -0
  166. {hud_python-0.4.24 → hud_python-0.4.26}/hud/tools/grounding/tests/__init__.py +0 -0
  167. {hud_python-0.4.24 → hud_python-0.4.26}/hud/tools/grounding/tests/test_grounded_tool.py +0 -0
  168. {hud_python-0.4.24 → hud_python-0.4.26}/hud/tools/playwright.py +0 -0
  169. {hud_python-0.4.24 → hud_python-0.4.26}/hud/tools/response.py +0 -0
  170. {hud_python-0.4.24 → hud_python-0.4.26}/hud/tools/submit.py +0 -0
  171. {hud_python-0.4.24 → hud_python-0.4.26}/hud/tools/tests/__init__.py +0 -0
  172. {hud_python-0.4.24 → hud_python-0.4.26}/hud/tools/tests/test_base.py +0 -0
  173. {hud_python-0.4.24 → hud_python-0.4.26}/hud/tools/tests/test_bash.py +0 -0
  174. {hud_python-0.4.24 → hud_python-0.4.26}/hud/tools/tests/test_bash_extended.py +0 -0
  175. {hud_python-0.4.24 → hud_python-0.4.26}/hud/tools/tests/test_computer.py +0 -0
  176. {hud_python-0.4.24 → hud_python-0.4.26}/hud/tools/tests/test_computer_actions.py +0 -0
  177. {hud_python-0.4.24 → hud_python-0.4.26}/hud/tools/tests/test_edit.py +0 -0
  178. {hud_python-0.4.24 → hud_python-0.4.26}/hud/tools/tests/test_init.py +0 -0
  179. {hud_python-0.4.24 → hud_python-0.4.26}/hud/tools/tests/test_playwright_tool.py +0 -0
  180. {hud_python-0.4.24 → hud_python-0.4.26}/hud/tools/tests/test_response.py +0 -0
  181. {hud_python-0.4.24 → hud_python-0.4.26}/hud/tools/tests/test_tools.py +0 -0
  182. {hud_python-0.4.24 → hud_python-0.4.26}/hud/tools/tests/test_tools_init.py +0 -0
  183. {hud_python-0.4.24 → hud_python-0.4.26}/hud/tools/tests/test_utils.py +0 -0
  184. {hud_python-0.4.24 → hud_python-0.4.26}/hud/tools/types.py +0 -0
  185. {hud_python-0.4.24 → hud_python-0.4.26}/hud/tools/utils.py +0 -0
  186. {hud_python-0.4.24 → hud_python-0.4.26}/hud/types.py +0 -0
  187. {hud_python-0.4.24 → hud_python-0.4.26}/hud/utils/__init__.py +0 -0
  188. {hud_python-0.4.24 → hud_python-0.4.26}/hud/utils/agent_factories.py +0 -0
  189. {hud_python-0.4.24 → hud_python-0.4.26}/hud/utils/async_utils.py +0 -0
  190. {hud_python-0.4.24 → hud_python-0.4.26}/hud/utils/hud_console.py +0 -0
  191. {hud_python-0.4.24 → hud_python-0.4.26}/hud/utils/mcp.py +0 -0
  192. {hud_python-0.4.24 → hud_python-0.4.26}/hud/utils/pretty_errors.py +0 -0
  193. {hud_python-0.4.24 → hud_python-0.4.26}/hud/utils/progress.py +0 -0
  194. {hud_python-0.4.24 → hud_python-0.4.26}/hud/utils/telemetry.py +0 -0
  195. {hud_python-0.4.24 → hud_python-0.4.26}/hud/utils/tests/__init__.py +0 -0
  196. {hud_python-0.4.24 → hud_python-0.4.26}/hud/utils/tests/test_async_utils.py +0 -0
  197. {hud_python-0.4.24 → hud_python-0.4.26}/hud/utils/tests/test_init.py +0 -0
  198. {hud_python-0.4.24 → hud_python-0.4.26}/hud/utils/tests/test_mcp.py +0 -0
  199. {hud_python-0.4.24 → hud_python-0.4.26}/hud/utils/tests/test_progress.py +0 -0
  200. {hud_python-0.4.24 → hud_python-0.4.26}/hud/utils/tests/test_telemetry.py +0 -0
  201. {hud_python-0.4.24 → hud_python-0.4.26}/rl/README.md +0 -0
  202. {hud_python-0.4.24 → hud_python-0.4.26}/rl/pyproject.toml +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: hud-python
3
- Version: 0.4.24
3
+ Version: 0.4.26
4
4
  Summary: SDK for the HUD platform.
5
5
  Project-URL: Homepage, https://github.com/hud-evals/hud-python
6
6
  Project-URL: Bug Tracker, https://github.com/hud-evals/hud-python/issues
@@ -556,26 +556,11 @@ class MCPAgent(ABC):
556
556
 
557
557
  all_tools = await self.mcp_client.list_tools()
558
558
 
559
- # Filter tools
560
- self._available_tools = []
561
- self._tool_map = {}
562
559
 
563
- # Track response tools by server
564
560
  response_tools_by_server: dict[str, str] = {} # server_name -> tool_name
565
-
566
561
  for tool in all_tools:
567
- # Check if tool should be included
568
- if self.allowed_tools and tool.name not in self.allowed_tools:
569
- continue
570
- if tool.name in self.disallowed_tools:
571
- continue
572
-
573
- self._available_tools.append(tool)
574
- # Simplified mapping - just tool name to tool
575
- self._tool_map[tool.name] = tool
576
-
577
- # Track response tools
578
562
  if "response" in tool.name or tool.name == "response":
563
+ self.console.debug(f"Found response tool: '{tool.name}'")
579
564
  # Extract server name from tool name (e.g., "grader_response" -> "grader")
580
565
  if "_" in tool.name:
581
566
  server_name = tool.name.split("_", 1)[0]
@@ -583,27 +568,61 @@ class MCPAgent(ABC):
583
568
  else:
584
569
  response_tools_by_server["_default"] = tool.name
585
570
 
586
- # Find the response tool to use (prioritize last server in config)
571
+ # Add response tool to lifecycle tools BEFORE filtering
587
572
  if response_tools_by_server and hasattr(self.mcp_client, "mcp_config"):
588
573
  # Get server names in order from mcp_config
589
574
  server_names = list(self.mcp_client.mcp_config.keys())
575
+ self.console.debug(f"Server names: {server_names}")
590
576
 
591
577
  # Try to find response tool from last server first
592
578
  response_tool_name = None
593
579
  for server_name in reversed(server_names):
594
580
  if server_name in response_tools_by_server:
595
581
  response_tool_name = response_tools_by_server[server_name]
582
+ self.console.debug(f"Found response tool '{response_tool_name}' from server '{server_name}'")
596
583
  break
597
584
 
598
585
  # Fallback to any response tool
599
586
  if not response_tool_name and response_tools_by_server:
600
587
  response_tool_name = next(iter(response_tools_by_server.values()))
588
+ self.console.debug(f"Using fallback response tool '{response_tool_name}'")
601
589
 
602
590
  # Add to lifecycle tools if found
603
591
  if response_tool_name and response_tool_name not in self.lifecycle_tools:
604
592
  self.console.debug(f"Auto-detected '{response_tool_name}' tool as a lifecycle tool")
605
593
  self.response_tool_name = response_tool_name
606
594
  self.lifecycle_tools.append(response_tool_name)
595
+ elif response_tool_name:
596
+ self.console.debug(f"Response tool '{response_tool_name}' already in lifecycle_tools")
597
+ self.response_tool_name = response_tool_name
598
+ else:
599
+ self.console.debug(f"No response tools found or no mcp_config")
600
+
601
+ # Filter tools
602
+ self._available_tools = []
603
+ self._tool_map = {}
604
+
605
+ self.console.debug(f"All tools: {[t.name for t in all_tools]}")
606
+ self.console.debug(f"Allowed tools: {self.allowed_tools}")
607
+ self.console.debug(f"Disallowed tools: {self.disallowed_tools}")
608
+ self.console.debug(f"Lifecycle tools: {self.lifecycle_tools}")
609
+
610
+ for tool in all_tools:
611
+ # Lifecycle tools (setup, evaluate, response) should always be included
612
+ is_lifecycle = tool.name in self.lifecycle_tools
613
+
614
+ # Check if tool should be included
615
+ if not is_lifecycle:
616
+ if self.allowed_tools and tool.name not in self.allowed_tools:
617
+ self.console.debug(f"Skipping tool '{tool.name}' - not in allowed_tools")
618
+ continue
619
+ if tool.name in self.disallowed_tools:
620
+ self.console.debug(f"Skipping tool '{tool.name}' - in disallowed_tools")
621
+ continue
622
+
623
+ self.console.debug(f"Adding tool '{tool.name}' to available tools (lifecycle={is_lifecycle})")
624
+ self._available_tools.append(tool)
625
+ self._tool_map[tool.name] = tool
607
626
 
608
627
  # Check if all required tools are available
609
628
  if self.required_tools:
@@ -34,7 +34,8 @@ class ResponseAgent:
34
34
 
35
35
  - STOP: If the agent indicates it has successfully completed a task, even if phrased as a question
36
36
  like "I have entered the right values into this form. Would you like me to do anything else?"
37
- or "Here is the website. Is there any other information you need?"
37
+ or "Here is the website. Is there any other information you need?" or if the agent has
38
+ strongly determined it wants to stop the task.
38
39
 
39
40
  - CONTINUE: If the agent is asking for clarification before proceeding with a task
40
41
  like "I'm about to clear cookies from this website. Would you like me to proceed?"
@@ -15,7 +15,7 @@ from hud.types import MCPToolCall, MCPToolResult
15
15
  from hud.version import __version__ as hud_version
16
16
 
17
17
  from .base import BaseHUDClient
18
- from .utils.retry import retry_with_backoff
18
+ from .utils.mcp_use_retry import patch_all_sessions
19
19
 
20
20
  logger = logging.getLogger(__name__)
21
21
 
@@ -64,6 +64,10 @@ class MCPUseHUDClient(BaseHUDClient):
64
64
  self._sessions = await self._client.create_all_sessions()
65
65
  logger.info("Created %d MCP sessions", len(self._sessions))
66
66
 
67
+ # Patch all sessions with retry logic
68
+ patch_all_sessions(self._sessions)
69
+ logger.debug("Applied retry logic to all MCP sessions")
70
+
67
71
  # Configure validation for all sessions based on client setting
68
72
  try:
69
73
  for session in self._sessions.values():
@@ -128,11 +132,8 @@ class MCPUseHUDClient(BaseHUDClient):
128
132
  logger.warning("Client session not initialized for %s", server_name)
129
133
  continue
130
134
 
131
- # List tools with retry logic for HTTP errors
132
- tools_result = await retry_with_backoff(
133
- session.connector.client_session.list_tools,
134
- operation_name=f"list_tools_{server_name}",
135
- )
135
+ # List tools (retry logic is handled at transport level)
136
+ tools_result = await session.connector.client_session.list_tools()
136
137
 
137
138
  logger.info(
138
139
  "Discovered %d tools from '%s': %s",
@@ -206,12 +207,10 @@ class MCPUseHUDClient(BaseHUDClient):
206
207
  if session.connector.client_session is None:
207
208
  raise ValueError(f"Client session not initialized for {server_name}")
208
209
 
209
- # Call tool with retry logic for HTTP errors (502, 503, 504)
210
- result = await retry_with_backoff(
211
- session.connector.client_session.call_tool,
210
+ # Call tool (retry logic is handled at transport level)
211
+ result = await session.connector.client_session.call_tool(
212
212
  name=original_tool.name, # Use original tool name, not prefixed
213
213
  arguments=tool_call.arguments or {},
214
- operation_name=f"call_tool_{original_tool.name}",
215
214
  )
216
215
 
217
216
  if self.verbose:
@@ -239,10 +238,8 @@ class MCPUseHUDClient(BaseHUDClient):
239
238
  continue
240
239
  # Prefer standard method name if available
241
240
  if hasattr(session.connector.client_session, "list_resources"):
242
- resources = await retry_with_backoff(
243
- session.connector.client_session.list_resources,
244
- operation_name=f"list_resources_{server_name}",
245
- )
241
+ # List resources (retry logic is handled at transport level)
242
+ resources = await session.connector.client_session.list_resources()
246
243
  else:
247
244
  # If the client doesn't support resource listing, skip
248
245
  continue
@@ -272,11 +269,8 @@ class MCPUseHUDClient(BaseHUDClient):
272
269
  resource_uri = AnyUrl(uri) if isinstance(uri, str) else uri
273
270
  # Prefer read_resource; fall back to list_resources if needed
274
271
  if hasattr(session.connector.client_session, "read_resource"):
275
- result = await retry_with_backoff(
276
- session.connector.client_session.read_resource,
277
- resource_uri,
278
- operation_name=f"read_resource_{server_name}",
279
- )
272
+ # Read resource (retry logic is handled at transport level)
273
+ result = await session.connector.client_session.read_resource(resource_uri)
280
274
  else:
281
275
  # Fallback path for older clients: not supported in strict typing
282
276
  raise AttributeError("read_resource not available")
@@ -0,0 +1,378 @@
1
+ """Tests for MCP-use client retry functionality."""
2
+
3
+ from __future__ import annotations
4
+
5
+ from unittest.mock import AsyncMock, Mock, patch
6
+
7
+ import pytest
8
+ import requests
9
+ from mcp import types
10
+
11
+ from hud.clients.mcp_use import MCPUseHUDClient
12
+ from hud.clients.utils.mcp_use_retry import (
13
+ create_async_retry_wrapper,
14
+ create_retry_session,
15
+ patch_all_sessions,
16
+ patch_mcp_session_http_client,
17
+ )
18
+ from hud.types import MCPToolCall
19
+
20
+
21
+ class TestRetrySession:
22
+ """Test the retry session creation."""
23
+
24
+ def test_create_retry_session(self):
25
+ """Test that retry session is configured correctly."""
26
+ session = create_retry_session(
27
+ max_retries=5,
28
+ retry_status_codes=(500, 502, 503, 504),
29
+ retry_delay=0.5,
30
+ backoff_factor=2.0,
31
+ )
32
+
33
+ # Check that session has adapters mounted
34
+ assert "http://" in session.adapters
35
+ assert "https://" in session.adapters
36
+
37
+ # Check adapter configuration
38
+ adapter = session.adapters["http://"]
39
+ assert adapter.max_retries.total == 5
40
+ assert 500 in adapter.max_retries.status_forcelist
41
+ assert 502 in adapter.max_retries.status_forcelist
42
+ assert adapter.max_retries.backoff_factor == 2.0
43
+
44
+ def test_retry_session_default_values(self):
45
+ """Test retry session with default values."""
46
+ session = create_retry_session()
47
+
48
+ adapter = session.adapters["https://"]
49
+ assert adapter.max_retries.total == 3
50
+ assert 502 in adapter.max_retries.status_forcelist
51
+ assert 503 in adapter.max_retries.status_forcelist
52
+ assert 504 in adapter.max_retries.status_forcelist
53
+
54
+
55
+ class TestAsyncRetryWrapper:
56
+ """Test the async retry wrapper functionality."""
57
+
58
+ @pytest.mark.asyncio
59
+ async def test_retry_on_error_status_codes(self):
60
+ """Test that async wrapper retries on specific status codes."""
61
+ call_count = 0
62
+
63
+ async def mock_func(*args, **kwargs):
64
+ nonlocal call_count
65
+ call_count += 1
66
+
67
+ # First two calls fail, third succeeds
68
+ if call_count < 3:
69
+ result = Mock()
70
+ result.status_code = 503 # Service unavailable
71
+ return result
72
+
73
+ result = Mock()
74
+ result.status_code = 200
75
+ return result
76
+
77
+ wrapped = create_async_retry_wrapper(
78
+ mock_func,
79
+ max_retries=3,
80
+ retry_status_codes=(503,),
81
+ retry_delay=0.01, # Short delay for testing
82
+ )
83
+
84
+ result = await wrapped()
85
+ assert call_count == 3
86
+ assert result.status_code == 200
87
+
88
+ @pytest.mark.asyncio
89
+ async def test_retry_on_exception(self):
90
+ """Test that async wrapper retries on exceptions with status codes."""
91
+ call_count = 0
92
+
93
+ async def mock_func(*args, **kwargs):
94
+ nonlocal call_count
95
+ call_count += 1
96
+
97
+ if call_count < 3:
98
+ raise Exception("HTTP 503 Service Unavailable")
99
+
100
+ return Mock(status_code=200)
101
+
102
+ wrapped = create_async_retry_wrapper(
103
+ mock_func,
104
+ max_retries=3,
105
+ retry_status_codes=(503,),
106
+ retry_delay=0.01,
107
+ )
108
+
109
+ result = await wrapped()
110
+ assert call_count == 3
111
+ assert result.status_code == 200
112
+
113
+ @pytest.mark.asyncio
114
+ async def test_no_retry_on_success(self):
115
+ """Test that successful calls don't trigger retries."""
116
+ call_count = 0
117
+
118
+ async def mock_func(*args, **kwargs):
119
+ nonlocal call_count
120
+ call_count += 1
121
+ return Mock(status_code=200)
122
+
123
+ wrapped = create_async_retry_wrapper(mock_func)
124
+
125
+ result = await wrapped()
126
+ assert call_count == 1
127
+ assert result.status_code == 200
128
+
129
+ @pytest.mark.asyncio
130
+ async def test_max_retries_exceeded(self):
131
+ """Test that retries stop after max attempts."""
132
+ call_count = 0
133
+
134
+ async def mock_func(*args, **kwargs):
135
+ nonlocal call_count
136
+ call_count += 1
137
+ raise Exception("HTTP 503 Service Unavailable")
138
+
139
+ wrapped = create_async_retry_wrapper(
140
+ mock_func,
141
+ max_retries=2,
142
+ retry_status_codes=(503,),
143
+ retry_delay=0.01,
144
+ )
145
+
146
+ with pytest.raises(Exception) as exc_info:
147
+ await wrapped()
148
+
149
+ assert "503" in str(exc_info.value)
150
+ assert call_count == 3 # Initial + 2 retries
151
+
152
+
153
+ class TestSessionPatching:
154
+ """Test the session patching functionality."""
155
+
156
+ def test_patch_sync_session(self):
157
+ """Test patching a synchronous session."""
158
+ # Create mock session with connector
159
+ mock_session = Mock()
160
+ mock_session.connector = Mock()
161
+ mock_session.connector._connection_manager = Mock()
162
+ mock_session.connector._connection_manager._session = requests.Session()
163
+
164
+ # Patch the session
165
+ patch_mcp_session_http_client(mock_session)
166
+
167
+ # Verify the session was replaced with retry-enabled one
168
+ patched_session = mock_session.connector._connection_manager._session
169
+ assert "http://" in patched_session.adapters
170
+ assert "https://" in patched_session.adapters
171
+
172
+ # Check that it has retry configuration
173
+ adapter = patched_session.adapters["http://"]
174
+ assert hasattr(adapter, "max_retries")
175
+
176
+ @pytest.mark.asyncio
177
+ async def test_patch_async_session(self):
178
+ """Test patching an async session."""
179
+ # Create mock async session
180
+ mock_session = Mock()
181
+ mock_session.connector = Mock()
182
+ mock_session.connector.client_session = Mock()
183
+
184
+ async def mock_send_request(*args, **kwargs):
185
+ return Mock(status_code=200)
186
+
187
+ mock_session.connector.client_session._send_request = mock_send_request
188
+
189
+ # Patch the session
190
+ patch_mcp_session_http_client(mock_session)
191
+
192
+ # Verify _send_request was wrapped
193
+ wrapped_func = mock_session.connector.client_session._send_request
194
+ assert wrapped_func != mock_send_request # Function was replaced
195
+
196
+ # Test that wrapped function still works
197
+ result = await wrapped_func()
198
+ assert result.status_code == 200
199
+
200
+ def test_patch_all_sessions(self):
201
+ """Test patching multiple sessions."""
202
+ # Create mock sessions
203
+ session1 = Mock()
204
+ session1.connector = Mock()
205
+ session1.connector._connection_manager = Mock()
206
+ session1.connector._connection_manager.session = requests.Session()
207
+
208
+ session2 = Mock()
209
+ session2.connector = Mock()
210
+ session2.connector.client_session = Mock()
211
+ session2.connector.client_session._send_request = AsyncMock()
212
+
213
+ sessions = {"server1": session1, "server2": session2}
214
+
215
+ # Patch all sessions
216
+ patch_all_sessions(sessions)
217
+
218
+ # Verify both were patched
219
+ assert "http://" in session1.connector._connection_manager.session.adapters
220
+ assert session2.connector.client_session._send_request != AsyncMock
221
+
222
+
223
+ class TestMCPUseClientRetry:
224
+ """Test retry functionality integrated into MCPUseHUDClient."""
225
+
226
+ @pytest.mark.asyncio
227
+ async def test_client_applies_retry_on_connect(self):
228
+ """Test that MCPUseHUDClient applies retry logic during connection."""
229
+ config = {"test_server": {"url": "http://localhost:8080"}}
230
+ client = MCPUseHUDClient(config)
231
+
232
+ # Mock the MCPUseClient and session creation
233
+ with patch("hud.clients.mcp_use.MCPUseClient") as MockMCPUseClient:
234
+ mock_client = Mock()
235
+ MockMCPUseClient.from_dict.return_value = mock_client
236
+
237
+ # Create mock session
238
+ mock_session = Mock()
239
+ mock_session.connector = Mock()
240
+ mock_session.connector.client_session = Mock()
241
+ mock_session.connector.client_session._send_request = AsyncMock()
242
+ mock_session.connector.client_session.list_tools = AsyncMock(
243
+ return_value=Mock(tools=[])
244
+ )
245
+
246
+ mock_client.create_all_sessions = AsyncMock(return_value={"test_server": mock_session})
247
+
248
+ # Initialize client (which applies retry logic)
249
+ await client.initialize()
250
+
251
+ # Verify session was created and patched
252
+ assert len(client._sessions) == 1
253
+ assert "test_server" in client._sessions
254
+
255
+ @pytest.mark.asyncio
256
+ async def test_tool_call_with_retry(self):
257
+ """Test that tool calls work with retry logic."""
258
+ config = {"test_server": {"url": "http://localhost:8080"}}
259
+ client = MCPUseHUDClient(config)
260
+
261
+ with patch("hud.clients.mcp_use.MCPUseClient") as MockMCPUseClient:
262
+ mock_client = Mock()
263
+ MockMCPUseClient.from_dict.return_value = mock_client
264
+
265
+ # Create mock session
266
+ mock_session = Mock()
267
+ mock_session.connector = Mock()
268
+ mock_session.connector.client_session = Mock()
269
+
270
+ # Mock tool listing
271
+ test_tool = types.Tool(
272
+ name="test_tool",
273
+ description="Test tool",
274
+ inputSchema={"type": "object"},
275
+ )
276
+ mock_session.connector.client_session.list_tools = AsyncMock(
277
+ return_value=Mock(tools=[test_tool])
278
+ )
279
+
280
+ # Mock tool call with simulated retry
281
+ call_count = 0
282
+
283
+ async def mock_call_tool(name, arguments):
284
+ nonlocal call_count
285
+ call_count += 1
286
+
287
+ # First call fails, second succeeds
288
+ if call_count == 1:
289
+ raise Exception("HTTP 503 Service Unavailable")
290
+
291
+ return Mock(
292
+ content=[types.TextContent(type="text", text="Success")],
293
+ isError=False,
294
+ structuredContent=None,
295
+ )
296
+
297
+ mock_session.connector.client_session.call_tool = mock_call_tool
298
+ mock_session.connector.client_session._send_request = AsyncMock()
299
+
300
+ mock_client.create_all_sessions = AsyncMock(return_value={"test_server": mock_session})
301
+
302
+ # Initialize and call tool
303
+ await client.initialize()
304
+
305
+ # Wrap call_tool with retry for this test
306
+ original_call = mock_session.connector.client_session.call_tool
307
+ mock_session.connector.client_session.call_tool = create_async_retry_wrapper(
308
+ original_call,
309
+ max_retries=2,
310
+ retry_status_codes=(503,),
311
+ retry_delay=0.01,
312
+ )
313
+
314
+ result = await client.call_tool(MCPToolCall(name="test_tool", arguments={}))
315
+
316
+ # Verify retry worked
317
+ assert call_count == 2 # Failed once, then succeeded
318
+ assert not result.isError
319
+ assert result.content[0].text == "Success"
320
+
321
+ @pytest.mark.asyncio
322
+ async def test_resource_read_with_retry(self):
323
+ """Test that resource reading works with retry logic."""
324
+ config = {"test_server": {"url": "http://localhost:8080"}}
325
+ client = MCPUseHUDClient(config)
326
+
327
+ with patch("hud.clients.mcp_use.MCPUseClient") as MockMCPUseClient:
328
+ mock_client = Mock()
329
+ MockMCPUseClient.from_dict.return_value = mock_client
330
+
331
+ # Create mock session
332
+ mock_session = Mock()
333
+ mock_session.connector = Mock()
334
+ mock_session.connector.client_session = Mock()
335
+ mock_session.connector.client_session.list_tools = AsyncMock(
336
+ return_value=Mock(tools=[])
337
+ )
338
+
339
+ # Mock resource read with simulated retry
340
+ call_count = 0
341
+
342
+ async def mock_read_resource(uri):
343
+ nonlocal call_count
344
+ call_count += 1
345
+
346
+ # First call fails, second succeeds
347
+ if call_count == 1:
348
+ raise Exception("HTTP 502 Bad Gateway")
349
+
350
+ return Mock(contents=[Mock(text='{"status": "ok"}')])
351
+
352
+ mock_session.connector.client_session.read_resource = mock_read_resource
353
+ mock_session.connector.client_session._send_request = AsyncMock()
354
+
355
+ mock_client.create_all_sessions = AsyncMock(return_value={"test_server": mock_session})
356
+
357
+ # Initialize
358
+ await client.initialize()
359
+
360
+ # Wrap read_resource with retry for this test
361
+ original_read = mock_session.connector.client_session.read_resource
362
+ mock_session.connector.client_session.read_resource = create_async_retry_wrapper(
363
+ original_read,
364
+ max_retries=2,
365
+ retry_status_codes=(502,),
366
+ retry_delay=0.01,
367
+ )
368
+
369
+ result = await client.read_resource("test://resource")
370
+
371
+ # Verify retry worked
372
+ assert call_count == 2 # Failed once, then succeeded
373
+ assert result is not None
374
+ assert result.contents[0].text == '{"status": "ok"}'
375
+
376
+
377
+ if __name__ == "__main__":
378
+ pytest.main([__file__, "-v"])