hud-python 0.5.31__tar.gz → 0.5.33__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (335) hide show
  1. {hud_python-0.5.31 → hud_python-0.5.33}/PKG-INFO +1 -1
  2. {hud_python-0.5.31 → hud_python-0.5.33}/hud/agents/claude.py +11 -3
  3. {hud_python-0.5.31 → hud_python-0.5.33}/hud/cli/tests/test_build.py +2 -2
  4. {hud_python-0.5.31 → hud_python-0.5.33}/hud/datasets/loader.py +9 -10
  5. {hud_python-0.5.31 → hud_python-0.5.33}/hud/datasets/tests/test_loader.py +9 -9
  6. {hud_python-0.5.31 → hud_python-0.5.33}/hud/environment/connection.py +3 -0
  7. {hud_python-0.5.31 → hud_python-0.5.33}/hud/environment/connectors/mcp_config.py +23 -12
  8. {hud_python-0.5.31 → hud_python-0.5.33}/hud/environment/tests/test_connection.py +29 -0
  9. {hud_python-0.5.31 → hud_python-0.5.33}/hud/environment/tests/test_connectors.py +43 -1
  10. {hud_python-0.5.31 → hud_python-0.5.33}/hud/eval/context.py +40 -14
  11. {hud_python-0.5.31 → hud_python-0.5.33}/hud/telemetry/instrument.py +50 -6
  12. {hud_python-0.5.31 → hud_python-0.5.33}/hud/tools/coding/bash.py +28 -15
  13. {hud_python-0.5.31 → hud_python-0.5.33}/hud/tools/coding/edit.py +6 -6
  14. {hud_python-0.5.31 → hud_python-0.5.33}/hud/tools/coding/tests/test_bash.py +13 -20
  15. {hud_python-0.5.31 → hud_python-0.5.33}/hud/tools/coding/tests/test_bash_extended.py +28 -2
  16. {hud_python-0.5.31 → hud_python-0.5.33}/hud/tools/computer/anthropic.py +69 -38
  17. {hud_python-0.5.31 → hud_python-0.5.33}/hud/tools/computer/gemini.py +0 -23
  18. {hud_python-0.5.31 → hud_python-0.5.33}/hud/tools/computer/hud.py +20 -1
  19. {hud_python-0.5.31 → hud_python-0.5.33}/hud/tools/computer/openai.py +0 -21
  20. {hud_python-0.5.31 → hud_python-0.5.33}/hud/tools/computer/settings.py +5 -0
  21. hud_python-0.5.33/hud/tools/computer/tests/test_compression.py +164 -0
  22. {hud_python-0.5.31 → hud_python-0.5.33}/hud/tools/tests/test_native_types.py +1 -1
  23. {hud_python-0.5.31 → hud_python-0.5.33}/hud/tools/types.py +2 -1
  24. {hud_python-0.5.31 → hud_python-0.5.33}/hud/utils/tests/test_version.py +1 -1
  25. {hud_python-0.5.31 → hud_python-0.5.33}/hud/version.py +1 -1
  26. {hud_python-0.5.31 → hud_python-0.5.33}/pyproject.toml +1 -1
  27. {hud_python-0.5.31 → hud_python-0.5.33}/.gitignore +0 -0
  28. {hud_python-0.5.31 → hud_python-0.5.33}/LICENSE +0 -0
  29. {hud_python-0.5.31 → hud_python-0.5.33}/README.md +0 -0
  30. {hud_python-0.5.31 → hud_python-0.5.33}/examples/README.md +0 -0
  31. {hud_python-0.5.31 → hud_python-0.5.33}/hud/__init__.py +0 -0
  32. {hud_python-0.5.31 → hud_python-0.5.33}/hud/__main__.py +0 -0
  33. {hud_python-0.5.31 → hud_python-0.5.33}/hud/agents/__init__.py +0 -0
  34. {hud_python-0.5.31 → hud_python-0.5.33}/hud/agents/base.py +0 -0
  35. {hud_python-0.5.31 → hud_python-0.5.33}/hud/agents/gateway.py +0 -0
  36. {hud_python-0.5.31 → hud_python-0.5.33}/hud/agents/gemini.py +0 -0
  37. {hud_python-0.5.31 → hud_python-0.5.33}/hud/agents/gemini_cua.py +0 -0
  38. {hud_python-0.5.31 → hud_python-0.5.33}/hud/agents/grounded_openai.py +0 -0
  39. {hud_python-0.5.31 → hud_python-0.5.33}/hud/agents/misc/__init__.py +0 -0
  40. {hud_python-0.5.31 → hud_python-0.5.33}/hud/agents/misc/integration_test_agent.py +0 -0
  41. {hud_python-0.5.31 → hud_python-0.5.33}/hud/agents/misc/response_agent.py +0 -0
  42. {hud_python-0.5.31 → hud_python-0.5.33}/hud/agents/openai.py +0 -0
  43. {hud_python-0.5.31 → hud_python-0.5.33}/hud/agents/openai_chat.py +0 -0
  44. {hud_python-0.5.31 → hud_python-0.5.33}/hud/agents/operator.py +0 -0
  45. {hud_python-0.5.31 → hud_python-0.5.33}/hud/agents/resolver.py +0 -0
  46. {hud_python-0.5.31 → hud_python-0.5.33}/hud/agents/tests/__init__.py +0 -0
  47. {hud_python-0.5.31 → hud_python-0.5.33}/hud/agents/tests/conftest.py +0 -0
  48. {hud_python-0.5.31 → hud_python-0.5.33}/hud/agents/tests/test_base.py +0 -0
  49. {hud_python-0.5.31 → hud_python-0.5.33}/hud/agents/tests/test_base_runtime.py +0 -0
  50. {hud_python-0.5.31 → hud_python-0.5.33}/hud/agents/tests/test_claude.py +0 -0
  51. {hud_python-0.5.31 → hud_python-0.5.33}/hud/agents/tests/test_gemini.py +0 -0
  52. {hud_python-0.5.31 → hud_python-0.5.33}/hud/agents/tests/test_grounded_openai_agent.py +0 -0
  53. {hud_python-0.5.31 → hud_python-0.5.33}/hud/agents/tests/test_integration_test_agent.py +0 -0
  54. {hud_python-0.5.31 → hud_python-0.5.33}/hud/agents/tests/test_openai.py +0 -0
  55. {hud_python-0.5.31 → hud_python-0.5.33}/hud/agents/tests/test_operator.py +0 -0
  56. {hud_python-0.5.31 → hud_python-0.5.33}/hud/agents/tests/test_resolver.py +0 -0
  57. {hud_python-0.5.31 → hud_python-0.5.33}/hud/agents/tests/test_run_eval.py +0 -0
  58. {hud_python-0.5.31 → hud_python-0.5.33}/hud/agents/types.py +0 -0
  59. {hud_python-0.5.31 → hud_python-0.5.33}/hud/cli/__init__.py +0 -0
  60. {hud_python-0.5.31 → hud_python-0.5.33}/hud/cli/__main__.py +0 -0
  61. {hud_python-0.5.31 → hud_python-0.5.33}/hud/cli/analyze.py +0 -0
  62. {hud_python-0.5.31 → hud_python-0.5.33}/hud/cli/build.py +0 -0
  63. {hud_python-0.5.31 → hud_python-0.5.33}/hud/cli/cancel.py +0 -0
  64. {hud_python-0.5.31 → hud_python-0.5.33}/hud/cli/convert/__init__.py +0 -0
  65. {hud_python-0.5.31 → hud_python-0.5.33}/hud/cli/convert/base.py +0 -0
  66. {hud_python-0.5.31 → hud_python-0.5.33}/hud/cli/convert/harbor.py +0 -0
  67. {hud_python-0.5.31 → hud_python-0.5.33}/hud/cli/convert/tests/__init__.py +0 -0
  68. {hud_python-0.5.31 → hud_python-0.5.33}/hud/cli/convert/tests/conftest.py +0 -0
  69. {hud_python-0.5.31 → hud_python-0.5.33}/hud/cli/convert/tests/test_harbor.py +0 -0
  70. {hud_python-0.5.31 → hud_python-0.5.33}/hud/cli/debug.py +0 -0
  71. {hud_python-0.5.31 → hud_python-0.5.33}/hud/cli/deploy.py +0 -0
  72. {hud_python-0.5.31 → hud_python-0.5.33}/hud/cli/dev.py +0 -0
  73. {hud_python-0.5.31 → hud_python-0.5.33}/hud/cli/eval.py +0 -0
  74. {hud_python-0.5.31 → hud_python-0.5.33}/hud/cli/flows/__init__.py +0 -0
  75. {hud_python-0.5.31 → hud_python-0.5.33}/hud/cli/flows/dev.py +0 -0
  76. {hud_python-0.5.31 → hud_python-0.5.33}/hud/cli/flows/init.py +0 -0
  77. {hud_python-0.5.31 → hud_python-0.5.33}/hud/cli/flows/tasks.py +0 -0
  78. {hud_python-0.5.31 → hud_python-0.5.33}/hud/cli/flows/templates.py +0 -0
  79. {hud_python-0.5.31 → hud_python-0.5.33}/hud/cli/flows/tests/__init__.py +0 -0
  80. {hud_python-0.5.31 → hud_python-0.5.33}/hud/cli/flows/tests/test_dev.py +0 -0
  81. {hud_python-0.5.31 → hud_python-0.5.33}/hud/cli/init.py +0 -0
  82. {hud_python-0.5.31 → hud_python-0.5.33}/hud/cli/link.py +0 -0
  83. {hud_python-0.5.31 → hud_python-0.5.33}/hud/cli/models.py +0 -0
  84. {hud_python-0.5.31 → hud_python-0.5.33}/hud/cli/push.py +0 -0
  85. {hud_python-0.5.31 → hud_python-0.5.33}/hud/cli/rft.py +0 -0
  86. {hud_python-0.5.31 → hud_python-0.5.33}/hud/cli/rft_status.py +0 -0
  87. {hud_python-0.5.31 → hud_python-0.5.33}/hud/cli/tests/__init__.py +0 -0
  88. {hud_python-0.5.31 → hud_python-0.5.33}/hud/cli/tests/test_analyze.py +0 -0
  89. {hud_python-0.5.31 → hud_python-0.5.33}/hud/cli/tests/test_analyze_metadata.py +0 -0
  90. {hud_python-0.5.31 → hud_python-0.5.33}/hud/cli/tests/test_analyze_module.py +0 -0
  91. {hud_python-0.5.31 → hud_python-0.5.33}/hud/cli/tests/test_build_failure.py +0 -0
  92. {hud_python-0.5.31 → hud_python-0.5.33}/hud/cli/tests/test_build_module.py +0 -0
  93. {hud_python-0.5.31 → hud_python-0.5.33}/hud/cli/tests/test_cli_init.py +0 -0
  94. {hud_python-0.5.31 → hud_python-0.5.33}/hud/cli/tests/test_cli_main.py +0 -0
  95. {hud_python-0.5.31 → hud_python-0.5.33}/hud/cli/tests/test_cli_more_wrappers.py +0 -0
  96. {hud_python-0.5.31 → hud_python-0.5.33}/hud/cli/tests/test_cli_root.py +0 -0
  97. {hud_python-0.5.31 → hud_python-0.5.33}/hud/cli/tests/test_convert.py +0 -0
  98. {hud_python-0.5.31 → hud_python-0.5.33}/hud/cli/tests/test_debug.py +0 -0
  99. {hud_python-0.5.31 → hud_python-0.5.33}/hud/cli/tests/test_debug_directory_mode.py +0 -0
  100. {hud_python-0.5.31 → hud_python-0.5.33}/hud/cli/tests/test_deploy.py +0 -0
  101. {hud_python-0.5.31 → hud_python-0.5.33}/hud/cli/tests/test_dev.py +0 -0
  102. {hud_python-0.5.31 → hud_python-0.5.33}/hud/cli/tests/test_eval.py +0 -0
  103. {hud_python-0.5.31 → hud_python-0.5.33}/hud/cli/tests/test_eval_bedrock.py +0 -0
  104. {hud_python-0.5.31 → hud_python-0.5.33}/hud/cli/tests/test_init.py +0 -0
  105. {hud_python-0.5.31 → hud_python-0.5.33}/hud/cli/tests/test_main_module.py +0 -0
  106. {hud_python-0.5.31 → hud_python-0.5.33}/hud/cli/tests/test_mcp_server.py +0 -0
  107. {hud_python-0.5.31 → hud_python-0.5.33}/hud/cli/tests/test_push.py +0 -0
  108. {hud_python-0.5.31 → hud_python-0.5.33}/hud/cli/tests/test_push_happy.py +0 -0
  109. {hud_python-0.5.31 → hud_python-0.5.33}/hud/cli/tests/test_push_wrapper.py +0 -0
  110. {hud_python-0.5.31 → hud_python-0.5.33}/hud/cli/tests/test_utils.py +0 -0
  111. {hud_python-0.5.31 → hud_python-0.5.33}/hud/cli/utils/__init__.py +0 -0
  112. {hud_python-0.5.31 → hud_python-0.5.33}/hud/cli/utils/api.py +0 -0
  113. {hud_python-0.5.31 → hud_python-0.5.33}/hud/cli/utils/args.py +0 -0
  114. {hud_python-0.5.31 → hud_python-0.5.33}/hud/cli/utils/build_display.py +0 -0
  115. {hud_python-0.5.31 → hud_python-0.5.33}/hud/cli/utils/build_logs.py +0 -0
  116. {hud_python-0.5.31 → hud_python-0.5.33}/hud/cli/utils/config.py +0 -0
  117. {hud_python-0.5.31 → hud_python-0.5.33}/hud/cli/utils/context.py +0 -0
  118. {hud_python-0.5.31 → hud_python-0.5.33}/hud/cli/utils/docker.py +0 -0
  119. {hud_python-0.5.31 → hud_python-0.5.33}/hud/cli/utils/env_check.py +0 -0
  120. {hud_python-0.5.31 → hud_python-0.5.33}/hud/cli/utils/environment.py +0 -0
  121. {hud_python-0.5.31 → hud_python-0.5.33}/hud/cli/utils/git.py +0 -0
  122. {hud_python-0.5.31 → hud_python-0.5.33}/hud/cli/utils/interactive.py +0 -0
  123. {hud_python-0.5.31 → hud_python-0.5.33}/hud/cli/utils/lockfile.py +0 -0
  124. {hud_python-0.5.31 → hud_python-0.5.33}/hud/cli/utils/logging.py +0 -0
  125. {hud_python-0.5.31 → hud_python-0.5.33}/hud/cli/utils/mcp.py +0 -0
  126. {hud_python-0.5.31 → hud_python-0.5.33}/hud/cli/utils/metadata.py +0 -0
  127. {hud_python-0.5.31 → hud_python-0.5.33}/hud/cli/utils/server.py +0 -0
  128. {hud_python-0.5.31 → hud_python-0.5.33}/hud/cli/utils/source_hash.py +0 -0
  129. {hud_python-0.5.31 → hud_python-0.5.33}/hud/cli/utils/tasks.py +0 -0
  130. {hud_python-0.5.31 → hud_python-0.5.33}/hud/cli/utils/tests/__init__.py +0 -0
  131. {hud_python-0.5.31 → hud_python-0.5.33}/hud/cli/utils/tests/test_config.py +0 -0
  132. {hud_python-0.5.31 → hud_python-0.5.33}/hud/cli/utils/tests/test_docker.py +0 -0
  133. {hud_python-0.5.31 → hud_python-0.5.33}/hud/cli/utils/tests/test_docker_hints.py +0 -0
  134. {hud_python-0.5.31 → hud_python-0.5.33}/hud/cli/utils/tests/test_env_check.py +0 -0
  135. {hud_python-0.5.31 → hud_python-0.5.33}/hud/cli/utils/tests/test_environment.py +0 -0
  136. {hud_python-0.5.31 → hud_python-0.5.33}/hud/cli/utils/tests/test_git.py +0 -0
  137. {hud_python-0.5.31 → hud_python-0.5.33}/hud/cli/utils/tests/test_interactive_module.py +0 -0
  138. {hud_python-0.5.31 → hud_python-0.5.33}/hud/cli/utils/tests/test_logging_utils.py +0 -0
  139. {hud_python-0.5.31 → hud_python-0.5.33}/hud/cli/utils/tests/test_metadata.py +0 -0
  140. {hud_python-0.5.31 → hud_python-0.5.33}/hud/cli/utils/tests/test_source_hash.py +0 -0
  141. {hud_python-0.5.31 → hud_python-0.5.33}/hud/cli/utils/tests/test_tasks.py +0 -0
  142. {hud_python-0.5.31 → hud_python-0.5.33}/hud/cli/utils/validation.py +0 -0
  143. {hud_python-0.5.31 → hud_python-0.5.33}/hud/cli/utils/version_check.py +0 -0
  144. {hud_python-0.5.31 → hud_python-0.5.33}/hud/cli/utils/viewer.py +0 -0
  145. {hud_python-0.5.31 → hud_python-0.5.33}/hud/datasets/__init__.py +0 -0
  146. {hud_python-0.5.31 → hud_python-0.5.33}/hud/datasets/runner.py +0 -0
  147. {hud_python-0.5.31 → hud_python-0.5.33}/hud/datasets/tests/__init__.py +0 -0
  148. {hud_python-0.5.31 → hud_python-0.5.33}/hud/datasets/tests/test_utils.py +0 -0
  149. {hud_python-0.5.31 → hud_python-0.5.33}/hud/datasets/utils.py +0 -0
  150. {hud_python-0.5.31 → hud_python-0.5.33}/hud/environment/__init__.py +0 -0
  151. {hud_python-0.5.31 → hud_python-0.5.33}/hud/environment/connectors/__init__.py +0 -0
  152. {hud_python-0.5.31 → hud_python-0.5.33}/hud/environment/connectors/base.py +0 -0
  153. {hud_python-0.5.31 → hud_python-0.5.33}/hud/environment/connectors/local.py +0 -0
  154. {hud_python-0.5.31 → hud_python-0.5.33}/hud/environment/connectors/openai.py +0 -0
  155. {hud_python-0.5.31 → hud_python-0.5.33}/hud/environment/connectors/remote.py +0 -0
  156. {hud_python-0.5.31 → hud_python-0.5.33}/hud/environment/environment.py +0 -0
  157. {hud_python-0.5.31 → hud_python-0.5.33}/hud/environment/integrations/__init__.py +0 -0
  158. {hud_python-0.5.31 → hud_python-0.5.33}/hud/environment/integrations/adk.py +0 -0
  159. {hud_python-0.5.31 → hud_python-0.5.33}/hud/environment/integrations/anthropic.py +0 -0
  160. {hud_python-0.5.31 → hud_python-0.5.33}/hud/environment/integrations/gemini.py +0 -0
  161. {hud_python-0.5.31 → hud_python-0.5.33}/hud/environment/integrations/langchain.py +0 -0
  162. {hud_python-0.5.31 → hud_python-0.5.33}/hud/environment/integrations/llamaindex.py +0 -0
  163. {hud_python-0.5.31 → hud_python-0.5.33}/hud/environment/integrations/openai.py +0 -0
  164. {hud_python-0.5.31 → hud_python-0.5.33}/hud/environment/mock.py +0 -0
  165. {hud_python-0.5.31 → hud_python-0.5.33}/hud/environment/router.py +0 -0
  166. {hud_python-0.5.31 → hud_python-0.5.33}/hud/environment/scenarios.py +0 -0
  167. {hud_python-0.5.31 → hud_python-0.5.33}/hud/environment/tests/__init__.py +0 -0
  168. {hud_python-0.5.31 → hud_python-0.5.33}/hud/environment/tests/test_environment.py +0 -0
  169. {hud_python-0.5.31 → hud_python-0.5.33}/hud/environment/tests/test_integrations.py +0 -0
  170. {hud_python-0.5.31 → hud_python-0.5.33}/hud/environment/tests/test_local_connectors.py +0 -0
  171. {hud_python-0.5.31 → hud_python-0.5.33}/hud/environment/tests/test_scenarios.py +0 -0
  172. {hud_python-0.5.31 → hud_python-0.5.33}/hud/environment/tests/test_session_id.py +0 -0
  173. {hud_python-0.5.31 → hud_python-0.5.33}/hud/environment/tests/test_tools.py +0 -0
  174. {hud_python-0.5.31 → hud_python-0.5.33}/hud/environment/types.py +0 -0
  175. {hud_python-0.5.31 → hud_python-0.5.33}/hud/environment/utils/__init__.py +0 -0
  176. {hud_python-0.5.31 → hud_python-0.5.33}/hud/environment/utils/formats.py +0 -0
  177. {hud_python-0.5.31 → hud_python-0.5.33}/hud/environment/utils/schema.py +0 -0
  178. {hud_python-0.5.31 → hud_python-0.5.33}/hud/environment/utils/tool_wrappers.py +0 -0
  179. {hud_python-0.5.31 → hud_python-0.5.33}/hud/eval/__init__.py +0 -0
  180. {hud_python-0.5.31 → hud_python-0.5.33}/hud/eval/display.py +0 -0
  181. {hud_python-0.5.31 → hud_python-0.5.33}/hud/eval/instrument.py +0 -0
  182. {hud_python-0.5.31 → hud_python-0.5.33}/hud/eval/manager.py +0 -0
  183. {hud_python-0.5.31 → hud_python-0.5.33}/hud/eval/parallel.py +0 -0
  184. {hud_python-0.5.31 → hud_python-0.5.33}/hud/eval/task.py +0 -0
  185. {hud_python-0.5.31 → hud_python-0.5.33}/hud/eval/tests/__init__.py +0 -0
  186. {hud_python-0.5.31 → hud_python-0.5.33}/hud/eval/tests/test_context.py +0 -0
  187. {hud_python-0.5.31 → hud_python-0.5.33}/hud/eval/tests/test_eval.py +0 -0
  188. {hud_python-0.5.31 → hud_python-0.5.33}/hud/eval/tests/test_manager.py +0 -0
  189. {hud_python-0.5.31 → hud_python-0.5.33}/hud/eval/tests/test_parallel.py +0 -0
  190. {hud_python-0.5.31 → hud_python-0.5.33}/hud/eval/tests/test_task.py +0 -0
  191. {hud_python-0.5.31 → hud_python-0.5.33}/hud/eval/types.py +0 -0
  192. {hud_python-0.5.31 → hud_python-0.5.33}/hud/eval/utils.py +0 -0
  193. {hud_python-0.5.31 → hud_python-0.5.33}/hud/native/__init__.py +0 -0
  194. {hud_python-0.5.31 → hud_python-0.5.33}/hud/native/chat.py +0 -0
  195. {hud_python-0.5.31 → hud_python-0.5.33}/hud/patches/__init__.py +0 -0
  196. {hud_python-0.5.31 → hud_python-0.5.33}/hud/patches/mcp_patches.py +0 -0
  197. {hud_python-0.5.31 → hud_python-0.5.33}/hud/patches/warnings.py +0 -0
  198. {hud_python-0.5.31 → hud_python-0.5.33}/hud/py.typed +0 -0
  199. {hud_python-0.5.31 → hud_python-0.5.33}/hud/server/__init__.py +0 -0
  200. {hud_python-0.5.31 → hud_python-0.5.33}/hud/server/context.py +0 -0
  201. {hud_python-0.5.31 → hud_python-0.5.33}/hud/server/helper/__init__.py +0 -0
  202. {hud_python-0.5.31 → hud_python-0.5.33}/hud/server/low_level.py +0 -0
  203. {hud_python-0.5.31 → hud_python-0.5.33}/hud/server/router.py +0 -0
  204. {hud_python-0.5.31 → hud_python-0.5.33}/hud/server/server.py +0 -0
  205. {hud_python-0.5.31 → hud_python-0.5.33}/hud/server/tests/__init__.py +0 -0
  206. {hud_python-0.5.31 → hud_python-0.5.33}/hud/server/tests/test_add_tool.py +0 -0
  207. {hud_python-0.5.31 → hud_python-0.5.33}/hud/server/tests/test_context.py +0 -0
  208. {hud_python-0.5.31 → hud_python-0.5.33}/hud/server/tests/test_mcp_server_handlers.py +0 -0
  209. {hud_python-0.5.31 → hud_python-0.5.33}/hud/server/tests/test_mcp_server_integration.py +0 -0
  210. {hud_python-0.5.31 → hud_python-0.5.33}/hud/server/tests/test_mcp_server_more.py +0 -0
  211. {hud_python-0.5.31 → hud_python-0.5.33}/hud/server/tests/test_prefix_naming.py +0 -0
  212. {hud_python-0.5.31 → hud_python-0.5.33}/hud/server/tests/test_run_wrapper.py +0 -0
  213. {hud_python-0.5.31 → hud_python-0.5.33}/hud/server/tests/test_server_extra.py +0 -0
  214. {hud_python-0.5.31 → hud_python-0.5.33}/hud/server/tests/test_sigterm_runner.py +0 -0
  215. {hud_python-0.5.31 → hud_python-0.5.33}/hud/services/__init__.py +0 -0
  216. {hud_python-0.5.31 → hud_python-0.5.33}/hud/services/chat.py +0 -0
  217. {hud_python-0.5.31 → hud_python-0.5.33}/hud/services/chat_service.py +0 -0
  218. {hud_python-0.5.31 → hud_python-0.5.33}/hud/services/reply_metadata.py +0 -0
  219. {hud_python-0.5.31 → hud_python-0.5.33}/hud/services/tests/__init__.py +0 -0
  220. {hud_python-0.5.31 → hud_python-0.5.33}/hud/services/tests/test_chat.py +0 -0
  221. {hud_python-0.5.31 → hud_python-0.5.33}/hud/services/tests/test_chat_service.py +0 -0
  222. {hud_python-0.5.31 → hud_python-0.5.33}/hud/settings.py +0 -0
  223. {hud_python-0.5.31 → hud_python-0.5.33}/hud/shared/__init__.py +0 -0
  224. {hud_python-0.5.31 → hud_python-0.5.33}/hud/shared/exceptions.py +0 -0
  225. {hud_python-0.5.31 → hud_python-0.5.33}/hud/shared/hints.py +0 -0
  226. {hud_python-0.5.31 → hud_python-0.5.33}/hud/shared/requests.py +0 -0
  227. {hud_python-0.5.31 → hud_python-0.5.33}/hud/shared/tests/__init__.py +0 -0
  228. {hud_python-0.5.31 → hud_python-0.5.33}/hud/shared/tests/test_exceptions.py +0 -0
  229. {hud_python-0.5.31 → hud_python-0.5.33}/hud/shared/tests/test_hints.py +0 -0
  230. {hud_python-0.5.31 → hud_python-0.5.33}/hud/shared/tests/test_requests.py +0 -0
  231. {hud_python-0.5.31 → hud_python-0.5.33}/hud/telemetry/__init__.py +0 -0
  232. {hud_python-0.5.31 → hud_python-0.5.33}/hud/telemetry/exporter.py +0 -0
  233. {hud_python-0.5.31 → hud_python-0.5.33}/hud/telemetry/tests/__init__.py +0 -0
  234. {hud_python-0.5.31 → hud_python-0.5.33}/hud/telemetry/tests/test_eval_telemetry.py +0 -0
  235. {hud_python-0.5.31 → hud_python-0.5.33}/hud/telemetry/tests/test_exporter.py +0 -0
  236. {hud_python-0.5.31 → hud_python-0.5.33}/hud/telemetry/tests/test_instrument.py +0 -0
  237. {hud_python-0.5.31 → hud_python-0.5.33}/hud/tools/__init__.py +0 -0
  238. {hud_python-0.5.31 → hud_python-0.5.33}/hud/tools/agent.py +0 -0
  239. {hud_python-0.5.31 → hud_python-0.5.33}/hud/tools/base.py +0 -0
  240. {hud_python-0.5.31 → hud_python-0.5.33}/hud/tools/coding/__init__.py +0 -0
  241. {hud_python-0.5.31 → hud_python-0.5.33}/hud/tools/coding/apply_patch.py +0 -0
  242. {hud_python-0.5.31 → hud_python-0.5.33}/hud/tools/coding/gemini_edit.py +0 -0
  243. {hud_python-0.5.31 → hud_python-0.5.33}/hud/tools/coding/gemini_shell.py +0 -0
  244. {hud_python-0.5.31 → hud_python-0.5.33}/hud/tools/coding/session.py +0 -0
  245. {hud_python-0.5.31 → hud_python-0.5.33}/hud/tools/coding/shell.py +0 -0
  246. {hud_python-0.5.31 → hud_python-0.5.33}/hud/tools/coding/tests/__init__.py +0 -0
  247. {hud_python-0.5.31 → hud_python-0.5.33}/hud/tools/coding/tests/test_apply_patch.py +0 -0
  248. {hud_python-0.5.31 → hud_python-0.5.33}/hud/tools/coding/tests/test_bash_integration.py +0 -0
  249. {hud_python-0.5.31 → hud_python-0.5.33}/hud/tools/coding/tests/test_edit.py +0 -0
  250. {hud_python-0.5.31 → hud_python-0.5.33}/hud/tools/coding/tests/test_gemini_tools.py +0 -0
  251. {hud_python-0.5.31 → hud_python-0.5.33}/hud/tools/coding/tests/test_shell.py +0 -0
  252. {hud_python-0.5.31 → hud_python-0.5.33}/hud/tools/coding/utils.py +0 -0
  253. {hud_python-0.5.31 → hud_python-0.5.33}/hud/tools/computer/__init__.py +0 -0
  254. {hud_python-0.5.31 → hud_python-0.5.33}/hud/tools/computer/glm.py +0 -0
  255. {hud_python-0.5.31 → hud_python-0.5.33}/hud/tools/computer/qwen.py +0 -0
  256. {hud_python-0.5.31 → hud_python-0.5.33}/hud/tools/computer/tests/__init__.py +0 -0
  257. {hud_python-0.5.31 → hud_python-0.5.33}/hud/tools/computer/tests/test_computer.py +0 -0
  258. {hud_python-0.5.31 → hud_python-0.5.33}/hud/tools/computer/tests/test_computer_actions.py +0 -0
  259. {hud_python-0.5.31 → hud_python-0.5.33}/hud/tools/computer/tests/test_glm_computer.py +0 -0
  260. {hud_python-0.5.31 → hud_python-0.5.33}/hud/tools/elicitation.py +0 -0
  261. {hud_python-0.5.31 → hud_python-0.5.33}/hud/tools/executors/__init__.py +0 -0
  262. {hud_python-0.5.31 → hud_python-0.5.33}/hud/tools/executors/base.py +0 -0
  263. {hud_python-0.5.31 → hud_python-0.5.33}/hud/tools/executors/pyautogui.py +0 -0
  264. {hud_python-0.5.31 → hud_python-0.5.33}/hud/tools/executors/tests/__init__.py +0 -0
  265. {hud_python-0.5.31 → hud_python-0.5.33}/hud/tools/executors/tests/test_base_executor.py +0 -0
  266. {hud_python-0.5.31 → hud_python-0.5.33}/hud/tools/executors/tests/test_pyautogui_executor.py +0 -0
  267. {hud_python-0.5.31 → hud_python-0.5.33}/hud/tools/executors/xdo.py +0 -0
  268. {hud_python-0.5.31 → hud_python-0.5.33}/hud/tools/filesystem/__init__.py +0 -0
  269. {hud_python-0.5.31 → hud_python-0.5.33}/hud/tools/filesystem/base.py +0 -0
  270. {hud_python-0.5.31 → hud_python-0.5.33}/hud/tools/filesystem/gemini.py +0 -0
  271. {hud_python-0.5.31 → hud_python-0.5.33}/hud/tools/filesystem/glob.py +0 -0
  272. {hud_python-0.5.31 → hud_python-0.5.33}/hud/tools/filesystem/grep.py +0 -0
  273. {hud_python-0.5.31 → hud_python-0.5.33}/hud/tools/filesystem/list.py +0 -0
  274. {hud_python-0.5.31 → hud_python-0.5.33}/hud/tools/filesystem/read.py +0 -0
  275. {hud_python-0.5.31 → hud_python-0.5.33}/hud/tools/filesystem/tests/__init__.py +0 -0
  276. {hud_python-0.5.31 → hud_python-0.5.33}/hud/tools/filesystem/tests/test_glob.py +0 -0
  277. {hud_python-0.5.31 → hud_python-0.5.33}/hud/tools/filesystem/tests/test_grep.py +0 -0
  278. {hud_python-0.5.31 → hud_python-0.5.33}/hud/tools/filesystem/tests/test_list.py +0 -0
  279. {hud_python-0.5.31 → hud_python-0.5.33}/hud/tools/filesystem/tests/test_read.py +0 -0
  280. {hud_python-0.5.31 → hud_python-0.5.33}/hud/tools/grounding/__init__.py +0 -0
  281. {hud_python-0.5.31 → hud_python-0.5.33}/hud/tools/grounding/config.py +0 -0
  282. {hud_python-0.5.31 → hud_python-0.5.33}/hud/tools/grounding/grounded_tool.py +0 -0
  283. {hud_python-0.5.31 → hud_python-0.5.33}/hud/tools/grounding/grounder.py +0 -0
  284. {hud_python-0.5.31 → hud_python-0.5.33}/hud/tools/grounding/tests/__init__.py +0 -0
  285. {hud_python-0.5.31 → hud_python-0.5.33}/hud/tools/grounding/tests/test_grounded_tool.py +0 -0
  286. {hud_python-0.5.31 → hud_python-0.5.33}/hud/tools/hosted/__init__.py +0 -0
  287. {hud_python-0.5.31 → hud_python-0.5.33}/hud/tools/hosted/base.py +0 -0
  288. {hud_python-0.5.31 → hud_python-0.5.33}/hud/tools/hosted/code_execution.py +0 -0
  289. {hud_python-0.5.31 → hud_python-0.5.33}/hud/tools/hosted/google_search.py +0 -0
  290. {hud_python-0.5.31 → hud_python-0.5.33}/hud/tools/hosted/tool_search.py +0 -0
  291. {hud_python-0.5.31 → hud_python-0.5.33}/hud/tools/hosted/url_context.py +0 -0
  292. {hud_python-0.5.31 → hud_python-0.5.33}/hud/tools/hosted/web_fetch.py +0 -0
  293. {hud_python-0.5.31 → hud_python-0.5.33}/hud/tools/hosted/web_search.py +0 -0
  294. {hud_python-0.5.31 → hud_python-0.5.33}/hud/tools/jupyter.py +0 -0
  295. {hud_python-0.5.31 → hud_python-0.5.33}/hud/tools/memory/__init__.py +0 -0
  296. {hud_python-0.5.31 → hud_python-0.5.33}/hud/tools/memory/base.py +0 -0
  297. {hud_python-0.5.31 → hud_python-0.5.33}/hud/tools/memory/claude.py +0 -0
  298. {hud_python-0.5.31 → hud_python-0.5.33}/hud/tools/memory/gemini.py +0 -0
  299. {hud_python-0.5.31 → hud_python-0.5.33}/hud/tools/memory/session.py +0 -0
  300. {hud_python-0.5.31 → hud_python-0.5.33}/hud/tools/memory/tests/__init__.py +0 -0
  301. {hud_python-0.5.31 → hud_python-0.5.33}/hud/tools/memory/tests/test_claude.py +0 -0
  302. {hud_python-0.5.31 → hud_python-0.5.33}/hud/tools/memory/tests/test_gemini.py +0 -0
  303. {hud_python-0.5.31 → hud_python-0.5.33}/hud/tools/memory/tests/test_session.py +0 -0
  304. {hud_python-0.5.31 → hud_python-0.5.33}/hud/tools/native_types.py +0 -0
  305. {hud_python-0.5.31 → hud_python-0.5.33}/hud/tools/playwright.py +0 -0
  306. {hud_python-0.5.31 → hud_python-0.5.33}/hud/tools/response.py +0 -0
  307. {hud_python-0.5.31 → hud_python-0.5.33}/hud/tools/submit.py +0 -0
  308. {hud_python-0.5.31 → hud_python-0.5.33}/hud/tools/tests/__init__.py +0 -0
  309. {hud_python-0.5.31 → hud_python-0.5.33}/hud/tools/tests/test_agent_tool.py +0 -0
  310. {hud_python-0.5.31 → hud_python-0.5.33}/hud/tools/tests/test_base.py +0 -0
  311. {hud_python-0.5.31 → hud_python-0.5.33}/hud/tools/tests/test_elicitation.py +0 -0
  312. {hud_python-0.5.31 → hud_python-0.5.33}/hud/tools/tests/test_init.py +0 -0
  313. {hud_python-0.5.31 → hud_python-0.5.33}/hud/tools/tests/test_jupyter_tool.py +0 -0
  314. {hud_python-0.5.31 → hud_python-0.5.33}/hud/tools/tests/test_native_tool_e2e.py +0 -0
  315. {hud_python-0.5.31 → hud_python-0.5.33}/hud/tools/tests/test_playwright_tool.py +0 -0
  316. {hud_python-0.5.31 → hud_python-0.5.33}/hud/tools/tests/test_response.py +0 -0
  317. {hud_python-0.5.31 → hud_python-0.5.33}/hud/tools/tests/test_submit.py +0 -0
  318. {hud_python-0.5.31 → hud_python-0.5.33}/hud/tools/tests/test_tools.py +0 -0
  319. {hud_python-0.5.31 → hud_python-0.5.33}/hud/tools/tests/test_tools_init.py +0 -0
  320. {hud_python-0.5.31 → hud_python-0.5.33}/hud/tools/tests/test_types.py +0 -0
  321. {hud_python-0.5.31 → hud_python-0.5.33}/hud/tools/tests/test_utils.py +0 -0
  322. {hud_python-0.5.31 → hud_python-0.5.33}/hud/tools/utils.py +0 -0
  323. {hud_python-0.5.31 → hud_python-0.5.33}/hud/types.py +0 -0
  324. {hud_python-0.5.31 → hud_python-0.5.33}/hud/utils/__init__.py +0 -0
  325. {hud_python-0.5.31 → hud_python-0.5.33}/hud/utils/env.py +0 -0
  326. {hud_python-0.5.31 → hud_python-0.5.33}/hud/utils/hud_console.py +0 -0
  327. {hud_python-0.5.31 → hud_python-0.5.33}/hud/utils/mcp.py +0 -0
  328. {hud_python-0.5.31 → hud_python-0.5.33}/hud/utils/pretty_errors.py +0 -0
  329. {hud_python-0.5.31 → hud_python-0.5.33}/hud/utils/strict_schema.py +0 -0
  330. {hud_python-0.5.31 → hud_python-0.5.33}/hud/utils/tests/__init__.py +0 -0
  331. {hud_python-0.5.31 → hud_python-0.5.33}/hud/utils/tests/test_init.py +0 -0
  332. {hud_python-0.5.31 → hud_python-0.5.33}/hud/utils/tests/test_pretty_errors.py +0 -0
  333. {hud_python-0.5.31 → hud_python-0.5.33}/hud/utils/tests/test_tool_shorthand.py +0 -0
  334. {hud_python-0.5.31 → hud_python-0.5.33}/hud/utils/tool_shorthand.py +0 -0
  335. {hud_python-0.5.31 → hud_python-0.5.33}/hud/utils/types.py +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: hud-python
3
- Version: 0.5.31
3
+ Version: 0.5.33
4
4
  Summary: SDK for the HUD platform.
5
5
  Project-URL: Homepage, https://github.com/hud-evals/hud-python
6
6
  Project-URL: Bug Tracker, https://github.com/hud-evals/hud-python/issues
@@ -445,7 +445,9 @@ class ClaudeAgent(MCPAgent):
445
445
  text_document_block(content.text, title=tool_call.name)
446
446
  )
447
447
  elif isinstance(content, types.ImageContent):
448
- claude_blocks.append(base64_to_content_block(content.data))
448
+ claude_blocks.append(
449
+ base64_to_content_block(content.data, content.mimeType)
450
+ )
449
451
  elif isinstance(content, types.EmbeddedResource):
450
452
  resource = content.resource
451
453
  if (
@@ -683,13 +685,19 @@ class ClaudeAgent(MCPAgent):
683
685
  return messages_cached
684
686
 
685
687
 
686
- def base64_to_content_block(base64: str) -> BetaImageBlockParam:
688
+ def base64_to_content_block(
689
+ base64: str,
690
+ media_type: str = "image/png",
691
+ ) -> BetaImageBlockParam:
687
692
  """Convert base64 image to Claude content block."""
688
693
  return BetaImageBlockParam(
689
694
  type="image",
690
695
  source=BetaBase64ImageSourceParam(
691
696
  type="base64",
692
- media_type="image/png",
697
+ media_type=cast(
698
+ "Literal['image/jpeg', 'image/png', 'image/gif', 'image/webp']",
699
+ media_type,
700
+ ),
693
701
  data=base64,
694
702
  ),
695
703
  )
@@ -61,12 +61,12 @@ class TestIncrementVersion:
61
61
  def test_increment_minor(self):
62
62
  """Test incrementing minor version."""
63
63
  assert increment_version("1.2.3", "minor") == "1.3.0"
64
- assert increment_version("0.5.31", "minor") == "0.6.0"
64
+ assert increment_version("0.5.33", "minor") == "0.6.0"
65
65
 
66
66
  def test_increment_major(self):
67
67
  """Test incrementing major version."""
68
68
  assert increment_version("1.2.3", "major") == "2.0.0"
69
- assert increment_version("0.5.31", "major") == "1.0.0"
69
+ assert increment_version("0.5.33", "major") == "1.0.0"
70
70
 
71
71
  def test_increment_with_v_prefix(self):
72
72
  """Test incrementing version with v prefix."""
@@ -70,15 +70,15 @@ def _load_from_file(path: Path) -> list[Task]:
70
70
  return [Task(**{**item, "args": item.get("args") or {}}) for item in raw_items]
71
71
 
72
72
 
73
- def resolve_taskset_id(slug: str) -> str:
74
- """Resolve a taskset slug/name to its UUID via the HUD API."""
73
+ def resolve_taskset_id(name: str) -> str:
74
+ """Resolve a taskset name to its UUID via the HUD API."""
75
75
  headers = {}
76
76
  if settings.api_key:
77
77
  headers["Authorization"] = f"Bearer {settings.api_key}"
78
78
 
79
79
  with httpx.Client() as client:
80
80
  response = client.get(
81
- f"{settings.hud_api_url}/tasks/evalset/{slug}",
81
+ f"{settings.hud_api_url}/tasks/evalset/{name}",
82
82
  headers=headers,
83
83
  )
84
84
  response.raise_for_status()
@@ -86,7 +86,7 @@ def resolve_taskset_id(slug: str) -> str:
86
86
 
87
87
  evalset_id = data.get("evalset_id")
88
88
  if not evalset_id:
89
- raise ValueError(f"Could not resolve taskset '{slug}' — not found or no access")
89
+ raise ValueError(f"Could not resolve taskset '{name}' — not found or no access")
90
90
  return evalset_id
91
91
 
92
92
 
@@ -146,14 +146,14 @@ def load_tasks(source: str, *, raw: bool = False) -> list[Task] | list[dict[str,
146
146
 
147
147
  Supports multiple sources with auto-detection:
148
148
  - Local file path (JSON or JSONL)
149
- - HUD API dataset slug (e.g., "hud-evals/SheetBench-50")
149
+ - HUD API evalset name (e.g., "SheetBench-50")
150
150
 
151
151
  Automatically detects and converts v4 LegacyTask format to v5 Task.
152
152
 
153
153
  Args:
154
154
  source: Task source. Can be:
155
155
  - Path to a local JSON/JSONL file
156
- - HUD API dataset slug (e.g., "hud-evals/SheetBench-50")
156
+ - HUD API evalset name (e.g., "SheetBench-50")
157
157
  raw: If True, return raw dicts without validation or env var substitution.
158
158
  Useful for preserving template strings like "${HUD_API_KEY}".
159
159
 
@@ -193,8 +193,7 @@ def save_tasks(
193
193
  Creates or updates a taskset with the given tasks.
194
194
 
195
195
  Args:
196
- name: Taskset name/slug (e.g., "my-evals/benchmark-v1").
197
- If no org prefix, uses user's default org.
196
+ name: Evalset name (e.g., "benchmark-v1").
198
197
  tasks: List of Task objects (v5 format) to save.
199
198
 
200
199
  Returns:
@@ -214,10 +213,10 @@ def save_tasks(
214
213
  ]
215
214
 
216
215
  # Save to HUD API
217
- taskset_id = save_tasks("my-evals/benchmark-v1", tasks)
216
+ taskset_id = save_tasks("benchmark-v1", tasks)
218
217
 
219
218
  # Later, load them back
220
- loaded = load_tasks("my-evals/benchmark-v1")
219
+ loaded = load_tasks("benchmark-v1")
221
220
  ```
222
221
 
223
222
  Raises:
@@ -50,7 +50,7 @@ class TestLoadTasks:
50
50
  mock_client.__exit__.return_value = None
51
51
  mock_client_class.return_value = mock_client
52
52
 
53
- tasks = load_tasks("test-org/test-dataset")
53
+ tasks = load_tasks("test-dataset")
54
54
 
55
55
  assert len(tasks) == 2
56
56
  # Tasks are keyed by ID in dict, order may vary
@@ -61,7 +61,7 @@ class TestLoadTasks:
61
61
  # Platform IDs are internal and should not be inferred from dict keys
62
62
  assert all(t.id is None for t in tasks)
63
63
  mock_client.get.assert_called_once_with(
64
- "https://api.hud.ai/tasks/evalset/test-org/test-dataset",
64
+ "https://api.hud.ai/tasks/evalset/test-dataset",
65
65
  headers={"Authorization": "Bearer test_key"},
66
66
  params={"all": "true"},
67
67
  )
@@ -96,7 +96,7 @@ class TestLoadTasks:
96
96
  mock_client.__exit__.return_value = None
97
97
  mock_client_class.return_value = mock_client
98
98
 
99
- tasks = load_tasks("test-org/test-dataset")
99
+ tasks = load_tasks("test-dataset")
100
100
 
101
101
  assert len(tasks) == 1
102
102
  assert tasks[0].scenario == "checkout"
@@ -126,11 +126,11 @@ class TestLoadTasks:
126
126
  mock_client.__exit__.return_value = None
127
127
  mock_client_class.return_value = mock_client
128
128
 
129
- tasks = load_tasks("test-org/test-dataset")
129
+ tasks = load_tasks("test-dataset")
130
130
 
131
131
  assert len(tasks) == 0
132
132
  mock_client.get.assert_called_once_with(
133
- "https://api.hud.ai/tasks/evalset/test-org/test-dataset",
133
+ "https://api.hud.ai/tasks/evalset/test-dataset",
134
134
  headers={},
135
135
  params={"all": "true"},
136
136
  )
@@ -198,7 +198,7 @@ class TestLoadTasks:
198
198
  mock_client.__exit__.return_value = None
199
199
  mock_client_class.return_value = mock_client
200
200
 
201
- tasks = load_tasks("test-org/test-dataset")
201
+ tasks = load_tasks("test-dataset")
202
202
 
203
203
  assert len(tasks) == 0
204
204
 
@@ -223,7 +223,7 @@ class TestLoadTasks:
223
223
  mock_client.__exit__.return_value = None
224
224
  mock_client_class.return_value = mock_client
225
225
 
226
- tasks = load_tasks("test-org/test-dataset")
226
+ tasks = load_tasks("test-dataset")
227
227
 
228
228
  assert len(tasks) == 1
229
229
  assert tasks[0].scenario == "test"
@@ -259,7 +259,7 @@ class TestSaveTasks:
259
259
  mock_client_class.return_value = mock_client
260
260
 
261
261
  taskset_id = save_tasks(
262
- "test-org/test-dataset",
262
+ "test-dataset",
263
263
  [
264
264
  Task(
265
265
  env={"name": "test-env"},
@@ -276,6 +276,6 @@ class TestSaveTasks:
276
276
  call_args = mock_client.post.call_args
277
277
  assert call_args.args[0] == "https://api.hud.ai/tasks/upload"
278
278
  payload = call_args.kwargs["json"]
279
- assert payload["name"] == "test-org/test-dataset"
279
+ assert payload["name"] == "test-dataset"
280
280
  assert payload["tasks"][0]["slug"] == "checkout-smoke"
281
281
  assert "id" not in payload["tasks"][0]
@@ -159,6 +159,9 @@ class Connector:
159
159
  "transport": self._transport,
160
160
  "auth": self._auth,
161
161
  }
162
+ client_timeout = getattr(self._transport, "_hud_client_timeout", None)
163
+ if client_timeout is not None:
164
+ client_kwargs["timeout"] = client_timeout
162
165
  if self._elicitation_handler is not None:
163
166
  client_kwargs["elicitation_handler"] = self._elicitation_handler
164
167
 
@@ -2,7 +2,7 @@
2
2
 
3
3
  from __future__ import annotations
4
4
 
5
- from typing import TYPE_CHECKING, Any
5
+ from typing import TYPE_CHECKING, Any, cast
6
6
 
7
7
  from hud.environment.connectors.base import BaseConnectorMixin
8
8
 
@@ -66,8 +66,7 @@ class MCPConfigConnectorMixin(BaseConnectorMixin):
66
66
  if settings.client_timeout > 0
67
67
  else min(request_timeout, settings.__class__.model_fields["client_timeout"].default)
68
68
  )
69
- server_config.setdefault("sse_read_timeout", timeout)
70
- transport = _build_transport(server_config)
69
+ transport = _build_transport(server_config, timeout=timeout)
71
70
 
72
71
  return self._add_connection(
73
72
  name,
@@ -121,17 +120,29 @@ class MCPConfigConnectorMixin(BaseConnectorMixin):
121
120
  return self
122
121
 
123
122
 
124
- def _build_transport(server_config: dict[str, Any]) -> Any:
123
+ def _build_transport(server_config: dict[str, Any], *, timeout: float | None = None) -> Any:
125
124
  from fastmcp.client.transports import SSETransport, StreamableHttpTransport
126
125
  from fastmcp.mcp_config import infer_transport_type_from_url
127
126
 
128
127
  url = server_config["url"]
129
128
  transport_type = server_config.get("transport") or infer_transport_type_from_url(url)
130
- transport_cls = SSETransport if transport_type == "sse" else StreamableHttpTransport
131
-
132
- return transport_cls(
133
- url=url,
134
- headers=server_config.get("headers"),
135
- auth=server_config.get("auth"),
136
- sse_read_timeout=server_config.get("sse_read_timeout"),
137
- )
129
+ transport_timeout = timeout if timeout is not None else server_config.get("sse_read_timeout")
130
+ transport_kwargs = {
131
+ "url": url,
132
+ "headers": server_config.get("headers"),
133
+ "auth": server_config.get("auth"),
134
+ "httpx_client_factory": server_config.get("httpx_client_factory"),
135
+ }
136
+
137
+ if transport_type == "sse":
138
+ return SSETransport(
139
+ **transport_kwargs,
140
+ sse_read_timeout=transport_timeout,
141
+ )
142
+
143
+ transport = StreamableHttpTransport(**transport_kwargs)
144
+ if transport_timeout is not None:
145
+ # FastMCP 3.x wants streamable HTTP timeouts on the client/session,
146
+ # not on the transport constructor.
147
+ cast("Any", transport)._hud_client_timeout = transport_timeout
148
+ return transport
@@ -140,6 +140,35 @@ class TestConnector:
140
140
  # Client is now set
141
141
  assert connector.client is mock_client
142
142
 
143
+ @pytest.mark.asyncio
144
+ async def test_connect_passes_transport_timeout_to_client(self) -> None:
145
+ """connect() forwards transport timeout to FastMCP client session kwargs."""
146
+
147
+ class Transport:
148
+ _hud_client_timeout = 300
149
+
150
+ transport = Transport()
151
+ connector = Connector(
152
+ transport=transport,
153
+ config=ConnectionConfig(),
154
+ name="test",
155
+ connection_type=ConnectionType.REMOTE,
156
+ auth="test-token",
157
+ )
158
+
159
+ mock_client = MagicMock()
160
+ mock_client.__aenter__ = AsyncMock(return_value=mock_client)
161
+ mock_client.is_connected = MagicMock(return_value=True)
162
+
163
+ with patch("fastmcp.client.Client", return_value=mock_client) as mock_cls:
164
+ await connector.connect()
165
+
166
+ mock_cls.assert_called_once_with(
167
+ transport=transport,
168
+ auth="test-token",
169
+ timeout=300,
170
+ )
171
+
143
172
  @pytest.mark.asyncio
144
173
  async def test_disconnect_clears_client(self) -> None:
145
174
  """disconnect() closes client and clears state."""
@@ -197,7 +197,7 @@ class TestRemoteConnectorMixin:
197
197
  env = TestEnv()
198
198
  with patch("hud.settings.settings", spec=Settings) as mock_settings:
199
199
  mock_settings.hud_mcp_url = "https://mcp.hud.ai"
200
- mock_settings.client_timeout = 300 # Used in connect_mcp for sse_read_timeout
200
+ mock_settings.client_timeout = 300 # Used in connect_mcp transport timeout logic
201
201
 
202
202
  env.connect_hub("browser")
203
203
 
@@ -205,3 +205,45 @@ class TestRemoteConnectorMixin:
205
205
  assert "hud" in env._connections
206
206
  # Verify hub config is stored for serialization
207
207
  assert env._hub_config == {"name": "browser"}
208
+
209
+ def test_connect_mcp_streamable_transport_uses_client_timeout(self) -> None:
210
+ """Streamable HTTP uses FastMCP client timeout instead of deprecated transport arg."""
211
+ from fastmcp.client.transports import StreamableHttpTransport
212
+
213
+ from hud.environment.connectors.mcp_config import MCPConfigConnectorMixin
214
+ from hud.settings import Settings
215
+
216
+ class TestEnv(MCPConfigConnectorMixin):
217
+ def __init__(self) -> None:
218
+ self._connections: dict[str, Connector] = {}
219
+
220
+ env = TestEnv()
221
+ with patch("hud.settings.settings", spec=Settings) as mock_settings:
222
+ mock_settings.client_timeout = 300
223
+ env.connect_mcp({"browser": {"url": "https://mcp.hud.ai/browser"}})
224
+
225
+ transport = env._connections["browser"]._transport
226
+ assert isinstance(transport, StreamableHttpTransport)
227
+ assert transport.sse_read_timeout is None
228
+ assert getattr(transport, "_hud_client_timeout", None) == 300
229
+
230
+ def test_connect_mcp_sse_transport_keeps_sse_timeout(self) -> None:
231
+ """SSE transports should continue to receive sse_read_timeout directly."""
232
+ from fastmcp.client.transports import SSETransport
233
+
234
+ from hud.environment.connectors.mcp_config import MCPConfigConnectorMixin
235
+ from hud.settings import Settings
236
+
237
+ class TestEnv(MCPConfigConnectorMixin):
238
+ def __init__(self) -> None:
239
+ self._connections: dict[str, Connector] = {}
240
+
241
+ env = TestEnv()
242
+ with patch("hud.settings.settings", spec=Settings) as mock_settings:
243
+ mock_settings.client_timeout = 300
244
+ env.connect_mcp({"browser": {"url": "https://mcp.hud.ai/browser", "transport": "sse"}})
245
+
246
+ transport = env._connections["browser"]._transport
247
+ assert isinstance(transport, SSETransport)
248
+ assert transport.sse_read_timeout is not None
249
+ assert transport.sse_read_timeout.total_seconds() == 300
@@ -723,21 +723,18 @@ class EvalContext(Environment):
723
723
  return False
724
724
 
725
725
  # =========================================================================
726
- # Tool Call Instrumentation
726
+ # MCP Telemetry Instrumentation
727
727
  # =========================================================================
728
728
 
729
- async def _execute_tool(self, name: str, arguments: dict[str, Any]) -> MCPToolResult:
730
- """Execute a tool with automatic telemetry recording.
729
+ def _should_instrument(self) -> bool:
730
+ """Whether local MCP instrumentation should be applied.
731
731
 
732
- Overrides Environment._execute_tool to record MCP spans for the eval context.
733
- Instrumentation is disabled when connected to a remote HUD server (telemetry is
734
- recorded server-side in that case).
732
+ Returns False when telemetry is handled server-side (remote hub or HUD MCP).
735
733
  """
736
- # Skip instrumentation when connected to a remote hub - telemetry is handled server-side
734
+ if not self._trace_enabled:
735
+ return False
737
736
  if self._hub_config is not None:
738
- return await super()._execute_tool(name, arguments)
739
-
740
- # Skip instrumentation for v4 tasks with HUD MCP config (remote server)
737
+ return False
741
738
  if self._mcp_config is not None:
742
739
  from hud.utils.mcp import _is_hud_server
743
740
 
@@ -745,18 +742,47 @@ class EvalContext(Environment):
745
742
  if isinstance(server_cfg, dict):
746
743
  url = server_cfg.get("url", "")
747
744
  if url and _is_hud_server(url):
748
- return await super()._execute_tool(name, arguments)
745
+ return False
746
+ return True
749
747
 
750
- # For local environments, record MCP spans
748
+ async def _execute_tool(self, name: str, arguments: dict[str, Any]) -> MCPToolResult:
749
+ if not self._should_instrument():
750
+ return await super()._execute_tool(name, arguments)
751
751
  return await self._execute_tool_instrumented(name, arguments)
752
752
 
753
- @instrument(category="mcp")
753
+ @instrument(method="tools/call")
754
754
  async def _execute_tool_instrumented(
755
755
  self, name: str, arguments: dict[str, Any]
756
756
  ) -> MCPToolResult:
757
- """Instrumented version of _execute_tool for local environments."""
758
757
  return await super()._execute_tool(name, arguments)
759
758
 
759
+ async def run_scenario_setup(
760
+ self,
761
+ scenario_name: str,
762
+ args: dict[str, Any],
763
+ session_id: str | None = None,
764
+ ) -> str | None:
765
+ if not self._should_instrument():
766
+ return await super().run_scenario_setup(scenario_name, args, session_id)
767
+ return await self._run_setup_instrumented(scenario_name, args)
768
+
769
+ @instrument(method="prompts/get")
770
+ async def _run_setup_instrumented(self, name: str, arguments: dict[str, Any]) -> str | None:
771
+ return await super().run_scenario_setup(name, arguments)
772
+
773
+ async def run_scenario_evaluate(
774
+ self,
775
+ scenario_name: str,
776
+ session_id: str | None = None,
777
+ ) -> EvaluationResult:
778
+ if not self._should_instrument():
779
+ return await super().run_scenario_evaluate(scenario_name, session_id)
780
+ return await self._run_evaluate_instrumented(scenario_name)
781
+
782
+ @instrument(method="resources/read")
783
+ async def _run_evaluate_instrumented(self, uri: str) -> EvaluationResult:
784
+ return await super().run_scenario_evaluate(uri)
785
+
760
786
  def __repr__(self) -> str:
761
787
  return f"EvalContext({self.trace_id[:8]}..., name={self.eval_name!r}, reward={self.reward})"
762
788
 
@@ -100,6 +100,7 @@ def instrument(
100
100
  name: str | None = None,
101
101
  category: str = "function",
102
102
  span_type: str | None = None,
103
+ method: str | None = None,
103
104
  internal_type: str | None = None,
104
105
  record_args: bool = True,
105
106
  record_result: bool = True,
@@ -113,6 +114,7 @@ def instrument(
113
114
  name: str | None = None,
114
115
  category: str = "function",
115
116
  span_type: str | None = None,
117
+ method: str | None = None,
116
118
  internal_type: str | None = None,
117
119
  record_args: bool = True,
118
120
  record_result: bool = True,
@@ -126,6 +128,7 @@ def instrument(
126
128
  name: str | None = None,
127
129
  category: str = "function",
128
130
  span_type: str | None = None,
131
+ method: str | None = None,
129
132
  internal_type: str | None = None,
130
133
  record_args: bool = True,
131
134
  record_result: bool = True,
@@ -138,6 +141,7 @@ def instrument(
138
141
  name: str | None = None,
139
142
  category: str = "function",
140
143
  span_type: str | None = None,
144
+ method: str | None = None,
141
145
  internal_type: str | None = None,
142
146
  record_args: bool = True,
143
147
  record_result: bool = True,
@@ -151,6 +155,10 @@ def instrument(
151
155
  name: Custom span name (defaults to module.function)
152
156
  category: Span category (e.g., "agent", "tool", "function", "mcp")
153
157
  span_type: Alias for category (deprecated, use category instead)
158
+ method: MCP method name (e.g., "tools/call", "resources/read").
159
+ When set, produces MCP spans: name becomes "{method}.mcp",
160
+ type becomes "SERVER", and request is structured as
161
+ {"method": ..., "params": ...}.
154
162
  internal_type: Internal span type (e.g., "user-message")
155
163
  record_args: Whether to record function arguments
156
164
  record_result: Whether to record function result
@@ -168,6 +176,7 @@ def instrument(
168
176
  return await model.generate(messages)
169
177
  """
170
178
  effective_category = span_type if span_type is not None else category
179
+ effective_method = method
171
180
 
172
181
  def decorator(func: Callable[..., Any]) -> Callable[..., Any]:
173
182
  if hasattr(func, "_hud_instrumented"):
@@ -193,13 +202,19 @@ def instrument(
193
202
  error: str | None = None,
194
203
  ) -> dict[str, Any]:
195
204
  """Build a HudSpan-compatible span record."""
196
- # Build attributes using TraceStep
205
+ is_mcp = effective_method is not None
206
+
207
+ extra_attrs: dict[str, Any] = {}
208
+ if is_mcp:
209
+ extra_attrs["method_name"] = effective_method
210
+
197
211
  attributes = TraceStep(
198
212
  task_run_id=task_run_id,
199
- category=effective_category,
200
- type="CLIENT",
213
+ category="mcp" if is_mcp else effective_category,
214
+ type="SERVER" if is_mcp else "CLIENT",
201
215
  start_timestamp=start_time,
202
216
  end_timestamp=end_time,
217
+ **extra_attrs,
203
218
  )
204
219
 
205
220
  # Record arguments as request
@@ -213,21 +228,50 @@ def instrument(
213
228
  if k not in ("self", "cls")
214
229
  }
215
230
  if args_dict:
216
- attributes.request = args_dict
231
+ if is_mcp:
232
+ attributes.request = {
233
+ "method": effective_method,
234
+ "params": args_dict,
235
+ }
236
+ else:
237
+ attributes.request = args_dict
217
238
  except Exception as e:
218
239
  logger.debug("Failed to serialize args: %s", e)
219
240
 
220
241
  # Record result
221
242
  if record_result and result is not None and error is None:
222
243
  try:
223
- attributes.result = _serialize_value(result)
244
+ serialized = _serialize_value(result)
245
+ if is_mcp and effective_method == "prompts/get":
246
+ if isinstance(serialized, str):
247
+ serialized = {
248
+ "messages": [
249
+ {
250
+ "role": "user",
251
+ "content": {
252
+ "type": "text",
253
+ "text": serialized,
254
+ },
255
+ }
256
+ ]
257
+ }
258
+ elif is_mcp and effective_method == "resources/read":
259
+ if isinstance(serialized, list):
260
+ serialized = {"contents": serialized}
261
+ elif isinstance(serialized, dict) and "reward" in serialized:
262
+ uri = args_dict.get("uri", "") if args_dict else ""
263
+ serialized = {
264
+ "contents": [{"uri": uri, "text": json.dumps(serialized)}]
265
+ }
266
+ attributes.result = serialized
224
267
  except Exception as e:
225
268
  logger.debug("Failed to serialize result: %s", e)
226
269
 
227
270
  # Build span
228
271
  span_id = uuid.uuid4().hex[:16]
272
+ effective_name = f"{effective_method}.mcp" if is_mcp else span_name
229
273
  span: dict[str, Any] = {
230
- "name": span_name,
274
+ "name": effective_name,
231
275
  "trace_id": _normalize_trace_id(task_run_id),
232
276
  "span_id": span_id,
233
277
  "parent_span_id": None,