hud-python 0.5.1__tar.gz → 0.5.3__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (308) hide show
  1. {hud_python-0.5.1 → hud_python-0.5.3}/PKG-INFO +1 -1
  2. {hud_python-0.5.1 → hud_python-0.5.3}/hud/__init__.py +1 -1
  3. hud_python-0.5.3/hud/agents/__init__.py +82 -0
  4. hud_python-0.5.3/hud/agents/gateway.py +42 -0
  5. {hud_python-0.5.1 → hud_python-0.5.3}/hud/agents/misc/response_agent.py +7 -0
  6. hud_python-0.5.3/hud/agents/resolver.py +70 -0
  7. hud_python-0.5.3/hud/agents/tests/test_resolver.py +192 -0
  8. {hud_python-0.5.1 → hud_python-0.5.3}/hud/cli/eval.py +17 -37
  9. {hud_python-0.5.1 → hud_python-0.5.3}/hud/cli/flows/init.py +1 -1
  10. {hud_python-0.5.1 → hud_python-0.5.3}/hud/cli/pull.py +1 -1
  11. {hud_python-0.5.1 → hud_python-0.5.3}/hud/cli/push.py +9 -2
  12. {hud_python-0.5.1 → hud_python-0.5.3}/hud/cli/tests/test_push.py +1 -1
  13. {hud_python-0.5.1 → hud_python-0.5.3}/hud/cli/utils/metadata.py +1 -1
  14. {hud_python-0.5.1 → hud_python-0.5.3}/hud/cli/utils/tests/test_metadata.py +1 -1
  15. {hud_python-0.5.1 → hud_python-0.5.3}/hud/datasets/loader.py +13 -10
  16. {hud_python-0.5.1 → hud_python-0.5.3}/hud/datasets/runner.py +9 -10
  17. {hud_python-0.5.1 → hud_python-0.5.3}/hud/datasets/tests/test_loader.py +1 -1
  18. {hud_python-0.5.1 → hud_python-0.5.3}/hud/environment/environment.py +37 -0
  19. {hud_python-0.5.1 → hud_python-0.5.3}/hud/environment/scenarios.py +53 -4
  20. hud_python-0.5.3/hud/environment/tests/test_environment.py +329 -0
  21. hud_python-0.5.3/hud/environment/tests/test_scenarios.py +749 -0
  22. {hud_python-0.5.1 → hud_python-0.5.3}/hud/eval/context.py +11 -1
  23. {hud_python-0.5.1 → hud_python-0.5.3}/hud/eval/instrument.py +4 -2
  24. {hud_python-0.5.1 → hud_python-0.5.3}/hud/eval/task.py +5 -2
  25. {hud_python-0.5.1 → hud_python-0.5.3}/hud/eval/tests/test_eval.py +1 -1
  26. {hud_python-0.5.1 → hud_python-0.5.3}/hud/telemetry/instrument.py +8 -1
  27. {hud_python-0.5.1 → hud_python-0.5.3}/hud/telemetry/tests/test_eval_telemetry.py +8 -8
  28. {hud_python-0.5.1 → hud_python-0.5.3}/hud/tools/__init__.py +2 -0
  29. hud_python-0.5.3/hud/tools/agent.py +216 -0
  30. {hud_python-0.5.1 → hud_python-0.5.3}/hud/tools/shell.py +3 -3
  31. hud_python-0.5.3/hud/tools/tests/test_agent_tool.py +355 -0
  32. {hud_python-0.5.1 → hud_python-0.5.3}/hud/types.py +5 -3
  33. {hud_python-0.5.1 → hud_python-0.5.3}/hud/utils/strict_schema.py +1 -1
  34. {hud_python-0.5.1 → hud_python-0.5.3}/hud/utils/tests/test_version.py +1 -1
  35. {hud_python-0.5.1 → hud_python-0.5.3}/hud/version.py +1 -1
  36. {hud_python-0.5.1 → hud_python-0.5.3}/pyproject.toml +1 -1
  37. hud_python-0.5.1/hud/agents/__init__.py +0 -19
  38. hud_python-0.5.1/hud/environment/tests/test_environment.py +0 -161
  39. hud_python-0.5.1/hud/environment/tests/test_scenarios.py +0 -280
  40. {hud_python-0.5.1 → hud_python-0.5.3}/.gitignore +0 -0
  41. {hud_python-0.5.1 → hud_python-0.5.3}/LICENSE +0 -0
  42. {hud_python-0.5.1 → hud_python-0.5.3}/README.md +0 -0
  43. {hud_python-0.5.1 → hud_python-0.5.3}/examples/README.md +0 -0
  44. {hud_python-0.5.1 → hud_python-0.5.3}/hud/__main__.py +0 -0
  45. {hud_python-0.5.1 → hud_python-0.5.3}/hud/agents/base.py +0 -0
  46. {hud_python-0.5.1 → hud_python-0.5.3}/hud/agents/claude.py +0 -0
  47. {hud_python-0.5.1 → hud_python-0.5.3}/hud/agents/gemini.py +0 -0
  48. {hud_python-0.5.1 → hud_python-0.5.3}/hud/agents/gemini_cua.py +0 -0
  49. {hud_python-0.5.1 → hud_python-0.5.3}/hud/agents/grounded_openai.py +0 -0
  50. {hud_python-0.5.1 → hud_python-0.5.3}/hud/agents/misc/__init__.py +0 -0
  51. {hud_python-0.5.1 → hud_python-0.5.3}/hud/agents/misc/integration_test_agent.py +0 -0
  52. {hud_python-0.5.1 → hud_python-0.5.3}/hud/agents/openai.py +0 -0
  53. {hud_python-0.5.1 → hud_python-0.5.3}/hud/agents/openai_chat.py +0 -0
  54. {hud_python-0.5.1 → hud_python-0.5.3}/hud/agents/operator.py +0 -0
  55. {hud_python-0.5.1 → hud_python-0.5.3}/hud/agents/tests/__init__.py +0 -0
  56. {hud_python-0.5.1 → hud_python-0.5.3}/hud/agents/tests/conftest.py +0 -0
  57. {hud_python-0.5.1 → hud_python-0.5.3}/hud/agents/tests/test_base.py +0 -0
  58. {hud_python-0.5.1 → hud_python-0.5.3}/hud/agents/tests/test_base_runtime.py +0 -0
  59. {hud_python-0.5.1 → hud_python-0.5.3}/hud/agents/tests/test_claude.py +0 -0
  60. {hud_python-0.5.1 → hud_python-0.5.3}/hud/agents/tests/test_client.py +0 -0
  61. {hud_python-0.5.1 → hud_python-0.5.3}/hud/agents/tests/test_gemini.py +0 -0
  62. {hud_python-0.5.1 → hud_python-0.5.3}/hud/agents/tests/test_grounded_openai_agent.py +0 -0
  63. {hud_python-0.5.1 → hud_python-0.5.3}/hud/agents/tests/test_openai.py +0 -0
  64. {hud_python-0.5.1 → hud_python-0.5.3}/hud/agents/tests/test_operator.py +0 -0
  65. {hud_python-0.5.1 → hud_python-0.5.3}/hud/agents/tests/test_run_eval.py +0 -0
  66. {hud_python-0.5.1 → hud_python-0.5.3}/hud/cli/__init__.py +0 -0
  67. {hud_python-0.5.1 → hud_python-0.5.3}/hud/cli/__main__.py +0 -0
  68. {hud_python-0.5.1 → hud_python-0.5.3}/hud/cli/analyze.py +0 -0
  69. {hud_python-0.5.1 → hud_python-0.5.3}/hud/cli/build.py +0 -0
  70. {hud_python-0.5.1 → hud_python-0.5.3}/hud/cli/clone.py +0 -0
  71. {hud_python-0.5.1 → hud_python-0.5.3}/hud/cli/debug.py +0 -0
  72. {hud_python-0.5.1 → hud_python-0.5.3}/hud/cli/dev.py +0 -0
  73. {hud_python-0.5.1 → hud_python-0.5.3}/hud/cli/flows/__init__.py +0 -0
  74. {hud_python-0.5.1 → hud_python-0.5.3}/hud/cli/flows/dev.py +0 -0
  75. {hud_python-0.5.1 → hud_python-0.5.3}/hud/cli/flows/tasks.py +0 -0
  76. {hud_python-0.5.1 → hud_python-0.5.3}/hud/cli/flows/templates.py +0 -0
  77. {hud_python-0.5.1 → hud_python-0.5.3}/hud/cli/flows/tests/__init__.py +0 -0
  78. {hud_python-0.5.1 → hud_python-0.5.3}/hud/cli/flows/tests/test_dev.py +0 -0
  79. {hud_python-0.5.1 → hud_python-0.5.3}/hud/cli/get.py +0 -0
  80. {hud_python-0.5.1 → hud_python-0.5.3}/hud/cli/init.py +0 -0
  81. {hud_python-0.5.1 → hud_python-0.5.3}/hud/cli/list_func.py +0 -0
  82. {hud_python-0.5.1 → hud_python-0.5.3}/hud/cli/remove.py +0 -0
  83. {hud_python-0.5.1 → hud_python-0.5.3}/hud/cli/rft.py +0 -0
  84. {hud_python-0.5.1 → hud_python-0.5.3}/hud/cli/rft_status.py +0 -0
  85. {hud_python-0.5.1 → hud_python-0.5.3}/hud/cli/tests/__init__.py +0 -0
  86. {hud_python-0.5.1 → hud_python-0.5.3}/hud/cli/tests/test_analyze.py +0 -0
  87. {hud_python-0.5.1 → hud_python-0.5.3}/hud/cli/tests/test_analyze_metadata.py +0 -0
  88. {hud_python-0.5.1 → hud_python-0.5.3}/hud/cli/tests/test_analyze_module.py +0 -0
  89. {hud_python-0.5.1 → hud_python-0.5.3}/hud/cli/tests/test_build.py +0 -0
  90. {hud_python-0.5.1 → hud_python-0.5.3}/hud/cli/tests/test_build_failure.py +0 -0
  91. {hud_python-0.5.1 → hud_python-0.5.3}/hud/cli/tests/test_build_module.py +0 -0
  92. {hud_python-0.5.1 → hud_python-0.5.3}/hud/cli/tests/test_cli_init.py +0 -0
  93. {hud_python-0.5.1 → hud_python-0.5.3}/hud/cli/tests/test_cli_main.py +0 -0
  94. {hud_python-0.5.1 → hud_python-0.5.3}/hud/cli/tests/test_cli_more_wrappers.py +0 -0
  95. {hud_python-0.5.1 → hud_python-0.5.3}/hud/cli/tests/test_cli_root.py +0 -0
  96. {hud_python-0.5.1 → hud_python-0.5.3}/hud/cli/tests/test_clone.py +0 -0
  97. {hud_python-0.5.1 → hud_python-0.5.3}/hud/cli/tests/test_convert.py +0 -0
  98. {hud_python-0.5.1 → hud_python-0.5.3}/hud/cli/tests/test_cursor.py +0 -0
  99. {hud_python-0.5.1 → hud_python-0.5.3}/hud/cli/tests/test_debug.py +0 -0
  100. {hud_python-0.5.1 → hud_python-0.5.3}/hud/cli/tests/test_dev.py +0 -0
  101. {hud_python-0.5.1 → hud_python-0.5.3}/hud/cli/tests/test_eval.py +0 -0
  102. {hud_python-0.5.1 → hud_python-0.5.3}/hud/cli/tests/test_eval_bedrock.py +0 -0
  103. {hud_python-0.5.1 → hud_python-0.5.3}/hud/cli/tests/test_init.py +0 -0
  104. {hud_python-0.5.1 → hud_python-0.5.3}/hud/cli/tests/test_list_func.py +0 -0
  105. {hud_python-0.5.1 → hud_python-0.5.3}/hud/cli/tests/test_main_module.py +0 -0
  106. {hud_python-0.5.1 → hud_python-0.5.3}/hud/cli/tests/test_mcp_server.py +0 -0
  107. {hud_python-0.5.1 → hud_python-0.5.3}/hud/cli/tests/test_pull.py +0 -0
  108. {hud_python-0.5.1 → hud_python-0.5.3}/hud/cli/tests/test_push_happy.py +0 -0
  109. {hud_python-0.5.1 → hud_python-0.5.3}/hud/cli/tests/test_push_wrapper.py +0 -0
  110. {hud_python-0.5.1 → hud_python-0.5.3}/hud/cli/tests/test_registry.py +0 -0
  111. {hud_python-0.5.1 → hud_python-0.5.3}/hud/cli/tests/test_utils.py +0 -0
  112. {hud_python-0.5.1 → hud_python-0.5.3}/hud/cli/utils/__init__.py +0 -0
  113. {hud_python-0.5.1 → hud_python-0.5.3}/hud/cli/utils/celebrate.py +0 -0
  114. {hud_python-0.5.1 → hud_python-0.5.3}/hud/cli/utils/config.py +0 -0
  115. {hud_python-0.5.1 → hud_python-0.5.3}/hud/cli/utils/cursor.py +0 -0
  116. {hud_python-0.5.1 → hud_python-0.5.3}/hud/cli/utils/docker.py +0 -0
  117. {hud_python-0.5.1 → hud_python-0.5.3}/hud/cli/utils/env_check.py +0 -0
  118. {hud_python-0.5.1 → hud_python-0.5.3}/hud/cli/utils/environment.py +0 -0
  119. {hud_python-0.5.1 → hud_python-0.5.3}/hud/cli/utils/git.py +0 -0
  120. {hud_python-0.5.1 → hud_python-0.5.3}/hud/cli/utils/interactive.py +0 -0
  121. {hud_python-0.5.1 → hud_python-0.5.3}/hud/cli/utils/local_runner.py +0 -0
  122. {hud_python-0.5.1 → hud_python-0.5.3}/hud/cli/utils/logging.py +0 -0
  123. {hud_python-0.5.1 → hud_python-0.5.3}/hud/cli/utils/package_runner.py +0 -0
  124. {hud_python-0.5.1 → hud_python-0.5.3}/hud/cli/utils/registry.py +0 -0
  125. {hud_python-0.5.1 → hud_python-0.5.3}/hud/cli/utils/remote_runner.py +0 -0
  126. {hud_python-0.5.1 → hud_python-0.5.3}/hud/cli/utils/runner.py +0 -0
  127. {hud_python-0.5.1 → hud_python-0.5.3}/hud/cli/utils/server.py +0 -0
  128. {hud_python-0.5.1 → hud_python-0.5.3}/hud/cli/utils/source_hash.py +0 -0
  129. {hud_python-0.5.1 → hud_python-0.5.3}/hud/cli/utils/tasks.py +0 -0
  130. {hud_python-0.5.1 → hud_python-0.5.3}/hud/cli/utils/tests/__init__.py +0 -0
  131. {hud_python-0.5.1 → hud_python-0.5.3}/hud/cli/utils/tests/test_config.py +0 -0
  132. {hud_python-0.5.1 → hud_python-0.5.3}/hud/cli/utils/tests/test_docker.py +0 -0
  133. {hud_python-0.5.1 → hud_python-0.5.3}/hud/cli/utils/tests/test_docker_hints.py +0 -0
  134. {hud_python-0.5.1 → hud_python-0.5.3}/hud/cli/utils/tests/test_env_check.py +0 -0
  135. {hud_python-0.5.1 → hud_python-0.5.3}/hud/cli/utils/tests/test_environment.py +0 -0
  136. {hud_python-0.5.1 → hud_python-0.5.3}/hud/cli/utils/tests/test_git.py +0 -0
  137. {hud_python-0.5.1 → hud_python-0.5.3}/hud/cli/utils/tests/test_interactive_module.py +0 -0
  138. {hud_python-0.5.1 → hud_python-0.5.3}/hud/cli/utils/tests/test_local_runner.py +0 -0
  139. {hud_python-0.5.1 → hud_python-0.5.3}/hud/cli/utils/tests/test_logging_utils.py +0 -0
  140. {hud_python-0.5.1 → hud_python-0.5.3}/hud/cli/utils/tests/test_package_runner.py +0 -0
  141. {hud_python-0.5.1 → hud_python-0.5.3}/hud/cli/utils/tests/test_registry_utils.py +0 -0
  142. {hud_python-0.5.1 → hud_python-0.5.3}/hud/cli/utils/tests/test_remote_runner.py +0 -0
  143. {hud_python-0.5.1 → hud_python-0.5.3}/hud/cli/utils/tests/test_runner_modules.py +0 -0
  144. {hud_python-0.5.1 → hud_python-0.5.3}/hud/cli/utils/tests/test_source_hash.py +0 -0
  145. {hud_python-0.5.1 → hud_python-0.5.3}/hud/cli/utils/tests/test_tasks.py +0 -0
  146. {hud_python-0.5.1 → hud_python-0.5.3}/hud/cli/utils/version_check.py +0 -0
  147. {hud_python-0.5.1 → hud_python-0.5.3}/hud/cli/utils/viewer.py +0 -0
  148. {hud_python-0.5.1 → hud_python-0.5.3}/hud/clients/README.md +0 -0
  149. {hud_python-0.5.1 → hud_python-0.5.3}/hud/clients/__init__.py +0 -0
  150. {hud_python-0.5.1 → hud_python-0.5.3}/hud/clients/base.py +0 -0
  151. {hud_python-0.5.1 → hud_python-0.5.3}/hud/clients/environment.py +0 -0
  152. {hud_python-0.5.1 → hud_python-0.5.3}/hud/clients/fastmcp.py +0 -0
  153. {hud_python-0.5.1 → hud_python-0.5.3}/hud/clients/mcp_use.py +0 -0
  154. {hud_python-0.5.1 → hud_python-0.5.3}/hud/clients/tests/__init__.py +0 -0
  155. {hud_python-0.5.1 → hud_python-0.5.3}/hud/clients/tests/test_analyze_scenarios.py +0 -0
  156. {hud_python-0.5.1 → hud_python-0.5.3}/hud/clients/tests/test_client_integration.py +0 -0
  157. {hud_python-0.5.1 → hud_python-0.5.3}/hud/clients/tests/test_fastmcp.py +0 -0
  158. {hud_python-0.5.1 → hud_python-0.5.3}/hud/clients/tests/test_mcp_use_retry.py +0 -0
  159. {hud_python-0.5.1 → hud_python-0.5.3}/hud/clients/tests/test_protocol.py +0 -0
  160. {hud_python-0.5.1 → hud_python-0.5.3}/hud/clients/utils/__init__.py +0 -0
  161. {hud_python-0.5.1 → hud_python-0.5.3}/hud/clients/utils/mcp_use_retry.py +0 -0
  162. {hud_python-0.5.1 → hud_python-0.5.3}/hud/clients/utils/retry.py +0 -0
  163. {hud_python-0.5.1 → hud_python-0.5.3}/hud/clients/utils/retry_transport.py +0 -0
  164. {hud_python-0.5.1 → hud_python-0.5.3}/hud/datasets/__init__.py +0 -0
  165. {hud_python-0.5.1 → hud_python-0.5.3}/hud/datasets/tests/__init__.py +0 -0
  166. {hud_python-0.5.1 → hud_python-0.5.3}/hud/datasets/tests/test_utils.py +0 -0
  167. {hud_python-0.5.1 → hud_python-0.5.3}/hud/datasets/utils.py +0 -0
  168. {hud_python-0.5.1 → hud_python-0.5.3}/hud/environment/__init__.py +0 -0
  169. {hud_python-0.5.1 → hud_python-0.5.3}/hud/environment/connection.py +0 -0
  170. {hud_python-0.5.1 → hud_python-0.5.3}/hud/environment/connectors/__init__.py +0 -0
  171. {hud_python-0.5.1 → hud_python-0.5.3}/hud/environment/connectors/base.py +0 -0
  172. {hud_python-0.5.1 → hud_python-0.5.3}/hud/environment/connectors/local.py +0 -0
  173. {hud_python-0.5.1 → hud_python-0.5.3}/hud/environment/connectors/mcp_config.py +0 -0
  174. {hud_python-0.5.1 → hud_python-0.5.3}/hud/environment/connectors/openai.py +0 -0
  175. {hud_python-0.5.1 → hud_python-0.5.3}/hud/environment/connectors/remote.py +0 -0
  176. {hud_python-0.5.1 → hud_python-0.5.3}/hud/environment/integrations/__init__.py +0 -0
  177. {hud_python-0.5.1 → hud_python-0.5.3}/hud/environment/integrations/adk.py +0 -0
  178. {hud_python-0.5.1 → hud_python-0.5.3}/hud/environment/integrations/anthropic.py +0 -0
  179. {hud_python-0.5.1 → hud_python-0.5.3}/hud/environment/integrations/gemini.py +0 -0
  180. {hud_python-0.5.1 → hud_python-0.5.3}/hud/environment/integrations/langchain.py +0 -0
  181. {hud_python-0.5.1 → hud_python-0.5.3}/hud/environment/integrations/llamaindex.py +0 -0
  182. {hud_python-0.5.1 → hud_python-0.5.3}/hud/environment/integrations/openai.py +0 -0
  183. {hud_python-0.5.1 → hud_python-0.5.3}/hud/environment/mock.py +0 -0
  184. {hud_python-0.5.1 → hud_python-0.5.3}/hud/environment/router.py +0 -0
  185. {hud_python-0.5.1 → hud_python-0.5.3}/hud/environment/tests/__init__.py +0 -0
  186. {hud_python-0.5.1 → hud_python-0.5.3}/hud/environment/tests/test_connection.py +0 -0
  187. {hud_python-0.5.1 → hud_python-0.5.3}/hud/environment/tests/test_connectors.py +0 -0
  188. {hud_python-0.5.1 → hud_python-0.5.3}/hud/environment/tests/test_integrations.py +0 -0
  189. {hud_python-0.5.1 → hud_python-0.5.3}/hud/environment/tests/test_local_connectors.py +0 -0
  190. {hud_python-0.5.1 → hud_python-0.5.3}/hud/environment/tests/test_tools.py +0 -0
  191. {hud_python-0.5.1 → hud_python-0.5.3}/hud/environment/types.py +0 -0
  192. {hud_python-0.5.1 → hud_python-0.5.3}/hud/environment/utils/__init__.py +0 -0
  193. {hud_python-0.5.1 → hud_python-0.5.3}/hud/environment/utils/formats.py +0 -0
  194. {hud_python-0.5.1 → hud_python-0.5.3}/hud/environment/utils/schema.py +0 -0
  195. {hud_python-0.5.1 → hud_python-0.5.3}/hud/environment/utils/tool_wrappers.py +0 -0
  196. {hud_python-0.5.1 → hud_python-0.5.3}/hud/eval/__init__.py +0 -0
  197. {hud_python-0.5.1 → hud_python-0.5.3}/hud/eval/display.py +0 -0
  198. {hud_python-0.5.1 → hud_python-0.5.3}/hud/eval/manager.py +0 -0
  199. {hud_python-0.5.1 → hud_python-0.5.3}/hud/eval/parallel.py +0 -0
  200. {hud_python-0.5.1 → hud_python-0.5.3}/hud/eval/tests/__init__.py +0 -0
  201. {hud_python-0.5.1 → hud_python-0.5.3}/hud/eval/tests/test_context.py +0 -0
  202. {hud_python-0.5.1 → hud_python-0.5.3}/hud/eval/tests/test_manager.py +0 -0
  203. {hud_python-0.5.1 → hud_python-0.5.3}/hud/eval/tests/test_parallel.py +0 -0
  204. {hud_python-0.5.1 → hud_python-0.5.3}/hud/eval/tests/test_task.py +0 -0
  205. {hud_python-0.5.1 → hud_python-0.5.3}/hud/eval/types.py +0 -0
  206. {hud_python-0.5.1 → hud_python-0.5.3}/hud/eval/utils.py +0 -0
  207. {hud_python-0.5.1 → hud_python-0.5.3}/hud/native/__init__.py +0 -0
  208. {hud_python-0.5.1 → hud_python-0.5.3}/hud/native/comparator.py +0 -0
  209. {hud_python-0.5.1 → hud_python-0.5.3}/hud/native/tests/__init__.py +0 -0
  210. {hud_python-0.5.1 → hud_python-0.5.3}/hud/native/tests/test_comparator.py +0 -0
  211. {hud_python-0.5.1 → hud_python-0.5.3}/hud/native/tests/test_native_init.py +0 -0
  212. {hud_python-0.5.1 → hud_python-0.5.3}/hud/patches/__init__.py +0 -0
  213. {hud_python-0.5.1 → hud_python-0.5.3}/hud/patches/mcp_patches.py +0 -0
  214. {hud_python-0.5.1 → hud_python-0.5.3}/hud/patches/warnings.py +0 -0
  215. {hud_python-0.5.1 → hud_python-0.5.3}/hud/py.typed +0 -0
  216. {hud_python-0.5.1 → hud_python-0.5.3}/hud/samples/__init__.py +0 -0
  217. {hud_python-0.5.1 → hud_python-0.5.3}/hud/samples/browser.py +0 -0
  218. {hud_python-0.5.1 → hud_python-0.5.3}/hud/server/__init__.py +0 -0
  219. {hud_python-0.5.1 → hud_python-0.5.3}/hud/server/context.py +0 -0
  220. {hud_python-0.5.1 → hud_python-0.5.3}/hud/server/helper/__init__.py +0 -0
  221. {hud_python-0.5.1 → hud_python-0.5.3}/hud/server/low_level.py +0 -0
  222. {hud_python-0.5.1 → hud_python-0.5.3}/hud/server/router.py +0 -0
  223. {hud_python-0.5.1 → hud_python-0.5.3}/hud/server/server.py +0 -0
  224. {hud_python-0.5.1 → hud_python-0.5.3}/hud/server/tests/__init__.py +0 -0
  225. {hud_python-0.5.1 → hud_python-0.5.3}/hud/server/tests/test_add_tool.py +0 -0
  226. {hud_python-0.5.1 → hud_python-0.5.3}/hud/server/tests/test_context.py +0 -0
  227. {hud_python-0.5.1 → hud_python-0.5.3}/hud/server/tests/test_mcp_server_handlers.py +0 -0
  228. {hud_python-0.5.1 → hud_python-0.5.3}/hud/server/tests/test_mcp_server_integration.py +0 -0
  229. {hud_python-0.5.1 → hud_python-0.5.3}/hud/server/tests/test_mcp_server_more.py +0 -0
  230. {hud_python-0.5.1 → hud_python-0.5.3}/hud/server/tests/test_run_wrapper.py +0 -0
  231. {hud_python-0.5.1 → hud_python-0.5.3}/hud/server/tests/test_server_extra.py +0 -0
  232. {hud_python-0.5.1 → hud_python-0.5.3}/hud/server/tests/test_sigterm_runner.py +0 -0
  233. {hud_python-0.5.1 → hud_python-0.5.3}/hud/settings.py +0 -0
  234. {hud_python-0.5.1 → hud_python-0.5.3}/hud/shared/__init__.py +0 -0
  235. {hud_python-0.5.1 → hud_python-0.5.3}/hud/shared/exceptions.py +0 -0
  236. {hud_python-0.5.1 → hud_python-0.5.3}/hud/shared/hints.py +0 -0
  237. {hud_python-0.5.1 → hud_python-0.5.3}/hud/shared/requests.py +0 -0
  238. {hud_python-0.5.1 → hud_python-0.5.3}/hud/shared/tests/__init__.py +0 -0
  239. {hud_python-0.5.1 → hud_python-0.5.3}/hud/shared/tests/test_exceptions.py +0 -0
  240. {hud_python-0.5.1 → hud_python-0.5.3}/hud/shared/tests/test_hints.py +0 -0
  241. {hud_python-0.5.1 → hud_python-0.5.3}/hud/shared/tests/test_requests.py +0 -0
  242. {hud_python-0.5.1 → hud_python-0.5.3}/hud/telemetry/__init__.py +0 -0
  243. {hud_python-0.5.1 → hud_python-0.5.3}/hud/telemetry/exporter.py +0 -0
  244. {hud_python-0.5.1 → hud_python-0.5.3}/hud/telemetry/tests/__init__.py +0 -0
  245. {hud_python-0.5.1 → hud_python-0.5.3}/hud/telemetry/tests/test_exporter.py +0 -0
  246. {hud_python-0.5.1 → hud_python-0.5.3}/hud/telemetry/tests/test_instrument.py +0 -0
  247. {hud_python-0.5.1 → hud_python-0.5.3}/hud/tools/apply_patch.py +0 -0
  248. {hud_python-0.5.1 → hud_python-0.5.3}/hud/tools/base.py +0 -0
  249. {hud_python-0.5.1 → hud_python-0.5.3}/hud/tools/bash.py +0 -0
  250. {hud_python-0.5.1 → hud_python-0.5.3}/hud/tools/computer/__init__.py +0 -0
  251. {hud_python-0.5.1 → hud_python-0.5.3}/hud/tools/computer/anthropic.py +0 -0
  252. {hud_python-0.5.1 → hud_python-0.5.3}/hud/tools/computer/gemini.py +0 -0
  253. {hud_python-0.5.1 → hud_python-0.5.3}/hud/tools/computer/hud.py +0 -0
  254. {hud_python-0.5.1 → hud_python-0.5.3}/hud/tools/computer/openai.py +0 -0
  255. {hud_python-0.5.1 → hud_python-0.5.3}/hud/tools/computer/qwen.py +0 -0
  256. {hud_python-0.5.1 → hud_python-0.5.3}/hud/tools/computer/settings.py +0 -0
  257. {hud_python-0.5.1 → hud_python-0.5.3}/hud/tools/edit.py +0 -0
  258. {hud_python-0.5.1 → hud_python-0.5.3}/hud/tools/executors/__init__.py +0 -0
  259. {hud_python-0.5.1 → hud_python-0.5.3}/hud/tools/executors/base.py +0 -0
  260. {hud_python-0.5.1 → hud_python-0.5.3}/hud/tools/executors/pyautogui.py +0 -0
  261. {hud_python-0.5.1 → hud_python-0.5.3}/hud/tools/executors/tests/__init__.py +0 -0
  262. {hud_python-0.5.1 → hud_python-0.5.3}/hud/tools/executors/tests/test_base_executor.py +0 -0
  263. {hud_python-0.5.1 → hud_python-0.5.3}/hud/tools/executors/tests/test_pyautogui_executor.py +0 -0
  264. {hud_python-0.5.1 → hud_python-0.5.3}/hud/tools/executors/xdo.py +0 -0
  265. {hud_python-0.5.1 → hud_python-0.5.3}/hud/tools/grounding/__init__.py +0 -0
  266. {hud_python-0.5.1 → hud_python-0.5.3}/hud/tools/grounding/config.py +0 -0
  267. {hud_python-0.5.1 → hud_python-0.5.3}/hud/tools/grounding/grounded_tool.py +0 -0
  268. {hud_python-0.5.1 → hud_python-0.5.3}/hud/tools/grounding/grounder.py +0 -0
  269. {hud_python-0.5.1 → hud_python-0.5.3}/hud/tools/grounding/tests/__init__.py +0 -0
  270. {hud_python-0.5.1 → hud_python-0.5.3}/hud/tools/grounding/tests/test_grounded_tool.py +0 -0
  271. {hud_python-0.5.1 → hud_python-0.5.3}/hud/tools/jupyter.py +0 -0
  272. {hud_python-0.5.1 → hud_python-0.5.3}/hud/tools/playwright.py +0 -0
  273. {hud_python-0.5.1 → hud_python-0.5.3}/hud/tools/response.py +0 -0
  274. {hud_python-0.5.1 → hud_python-0.5.3}/hud/tools/submit.py +0 -0
  275. {hud_python-0.5.1 → hud_python-0.5.3}/hud/tools/tests/__init__.py +0 -0
  276. {hud_python-0.5.1 → hud_python-0.5.3}/hud/tools/tests/test_apply_patch.py +0 -0
  277. {hud_python-0.5.1 → hud_python-0.5.3}/hud/tools/tests/test_base.py +0 -0
  278. {hud_python-0.5.1 → hud_python-0.5.3}/hud/tools/tests/test_bash.py +0 -0
  279. {hud_python-0.5.1 → hud_python-0.5.3}/hud/tools/tests/test_bash_extended.py +0 -0
  280. {hud_python-0.5.1 → hud_python-0.5.3}/hud/tools/tests/test_computer.py +0 -0
  281. {hud_python-0.5.1 → hud_python-0.5.3}/hud/tools/tests/test_computer_actions.py +0 -0
  282. {hud_python-0.5.1 → hud_python-0.5.3}/hud/tools/tests/test_edit.py +0 -0
  283. {hud_python-0.5.1 → hud_python-0.5.3}/hud/tools/tests/test_init.py +0 -0
  284. {hud_python-0.5.1 → hud_python-0.5.3}/hud/tools/tests/test_jupyter_tool.py +0 -0
  285. {hud_python-0.5.1 → hud_python-0.5.3}/hud/tools/tests/test_playwright_tool.py +0 -0
  286. {hud_python-0.5.1 → hud_python-0.5.3}/hud/tools/tests/test_response.py +0 -0
  287. {hud_python-0.5.1 → hud_python-0.5.3}/hud/tools/tests/test_shell.py +0 -0
  288. {hud_python-0.5.1 → hud_python-0.5.3}/hud/tools/tests/test_submit.py +0 -0
  289. {hud_python-0.5.1 → hud_python-0.5.3}/hud/tools/tests/test_tools.py +0 -0
  290. {hud_python-0.5.1 → hud_python-0.5.3}/hud/tools/tests/test_tools_init.py +0 -0
  291. {hud_python-0.5.1 → hud_python-0.5.3}/hud/tools/tests/test_types.py +0 -0
  292. {hud_python-0.5.1 → hud_python-0.5.3}/hud/tools/tests/test_utils.py +0 -0
  293. {hud_python-0.5.1 → hud_python-0.5.3}/hud/tools/types.py +0 -0
  294. {hud_python-0.5.1 → hud_python-0.5.3}/hud/tools/utils.py +0 -0
  295. {hud_python-0.5.1 → hud_python-0.5.3}/hud/utils/__init__.py +0 -0
  296. {hud_python-0.5.1 → hud_python-0.5.3}/hud/utils/env.py +0 -0
  297. {hud_python-0.5.1 → hud_python-0.5.3}/hud/utils/hud_console.py +0 -0
  298. {hud_python-0.5.1 → hud_python-0.5.3}/hud/utils/mcp.py +0 -0
  299. {hud_python-0.5.1 → hud_python-0.5.3}/hud/utils/pretty_errors.py +0 -0
  300. {hud_python-0.5.1 → hud_python-0.5.3}/hud/utils/telemetry.py +0 -0
  301. {hud_python-0.5.1 → hud_python-0.5.3}/hud/utils/tests/__init__.py +0 -0
  302. {hud_python-0.5.1 → hud_python-0.5.3}/hud/utils/tests/test_init.py +0 -0
  303. {hud_python-0.5.1 → hud_python-0.5.3}/hud/utils/tests/test_mcp.py +0 -0
  304. {hud_python-0.5.1 → hud_python-0.5.3}/hud/utils/tests/test_pretty_errors.py +0 -0
  305. {hud_python-0.5.1 → hud_python-0.5.3}/hud/utils/tests/test_telemetry.py +0 -0
  306. {hud_python-0.5.1 → hud_python-0.5.3}/hud/utils/tests/test_tool_shorthand.py +0 -0
  307. {hud_python-0.5.1 → hud_python-0.5.3}/hud/utils/tool_shorthand.py +0 -0
  308. {hud_python-0.5.1 → hud_python-0.5.3}/hud/utils/types.py +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: hud-python
3
- Version: 0.5.1
3
+ Version: 0.5.3
4
4
  Summary: SDK for the HUD platform.
5
5
  Project-URL: Homepage, https://github.com/hud-evals/hud-python
6
6
  Project-URL: Bug Tracker, https://github.com/hud-evals/hud-python/issues
@@ -18,7 +18,7 @@ from .telemetry.instrument import instrument
18
18
  def trace(*args: object, **kwargs: object) -> EvalContext:
19
19
  """Deprecated: Use hud.eval() instead.
20
20
 
21
- .. deprecated:: 0.5.1
21
+ .. deprecated:: 0.5.2
22
22
  hud.trace() is deprecated. Use hud.eval() or env.eval() instead.
23
23
  """
24
24
  warnings.warn(
@@ -0,0 +1,82 @@
1
+ from __future__ import annotations
2
+
3
+ from typing import Any
4
+
5
+ from .base import MCPAgent
6
+ from .openai import OpenAIAgent
7
+ from .openai_chat import OpenAIChatAgent
8
+ from .operator import OperatorAgent
9
+
10
+ __all__ = [
11
+ "MCPAgent",
12
+ "OpenAIAgent",
13
+ "OpenAIChatAgent",
14
+ "OperatorAgent",
15
+ "create_agent",
16
+ ]
17
+
18
+
19
+ def create_agent(model: str, **kwargs: Any) -> MCPAgent:
20
+ """Create an agent for a gateway model.
21
+
22
+ This routes ALL requests through the HUD gateway. For direct API access
23
+ (using your own API keys), use the agent classes directly.
24
+
25
+ Args:
26
+ model: Model name (e.g., "gpt-4o", "claude-sonnet-4-5").
27
+ **kwargs: Additional params passed to agent.create().
28
+
29
+ Returns:
30
+ Configured MCPAgent instance with gateway routing.
31
+
32
+ Example:
33
+ ```python
34
+ # Gateway routing (recommended)
35
+ agent = create_agent("gpt-4o")
36
+ agent = create_agent("claude-sonnet-4-5", temperature=0.7)
37
+
38
+ # Direct API access (use agent classes)
39
+ from hud.agents.claude import ClaudeAgent
40
+
41
+ agent = ClaudeAgent.create(model="claude-sonnet-4-5")
42
+ ```
43
+ """
44
+ from hud.agents.gateway import build_gateway_client
45
+ from hud.agents.resolver import resolve_cls
46
+
47
+ # Resolve class and gateway info
48
+ agent_cls, gateway_info = resolve_cls(model)
49
+
50
+ # Get model ID from gateway info or use input
51
+ model_id = model
52
+ if gateway_info:
53
+ model_id = gateway_info.get("model") or gateway_info.get("id") or model
54
+
55
+ # Determine provider: from gateway info, or infer from agent class
56
+ if gateway_info:
57
+ provider = gateway_info.get("provider") or "openai"
58
+ else:
59
+ # Map agent class to provider for known types
60
+ from hud.agents.claude import ClaudeAgent
61
+ from hud.agents.gemini import GeminiAgent
62
+
63
+ _AGENT_TO_PROVIDER = {
64
+ ClaudeAgent: "anthropic",
65
+ GeminiAgent: "google",
66
+ }
67
+ provider = _AGENT_TO_PROVIDER.get(agent_cls, "openai")
68
+
69
+ client = build_gateway_client(provider)
70
+
71
+ # Set up kwargs
72
+ kwargs.setdefault("model", model_id)
73
+
74
+ # Use correct client key based on agent type
75
+ if agent_cls == OpenAIChatAgent:
76
+ kwargs.setdefault("openai_client", client)
77
+ else:
78
+ # Claude and other agents use model_client and validate_api_key
79
+ kwargs.setdefault("model_client", client)
80
+ kwargs.setdefault("validate_api_key", False)
81
+
82
+ return agent_cls.create(**kwargs)
@@ -0,0 +1,42 @@
1
+ """Gateway client utilities for HUD inference gateway."""
2
+
3
+ from __future__ import annotations
4
+
5
+ from typing import Any
6
+
7
+
8
+ def build_gateway_client(provider: str) -> Any:
9
+ """Build a client configured for HUD gateway routing.
10
+
11
+ Args:
12
+ provider: Provider name ("anthropic", "openai", "gemini", etc.)
13
+
14
+ Returns:
15
+ Configured async client for the provider.
16
+ """
17
+ from hud.settings import settings
18
+
19
+ provider = provider.lower()
20
+
21
+ if provider == "anthropic":
22
+ from anthropic import AsyncAnthropic
23
+
24
+ return AsyncAnthropic(api_key=settings.api_key, base_url=settings.hud_gateway_url)
25
+
26
+ if provider == "gemini":
27
+ from google import genai
28
+ from google.genai.types import HttpOptions
29
+
30
+ return genai.Client(
31
+ api_key="PLACEHOLDER",
32
+ http_options=HttpOptions(
33
+ api_version="v1beta",
34
+ base_url=settings.hud_gateway_url,
35
+ headers={"Authorization": f"Bearer {settings.api_key}"},
36
+ ),
37
+ )
38
+
39
+ # OpenAI-compatible (openai, azure, together, groq, fireworks, etc.)
40
+ from openai import AsyncOpenAI
41
+
42
+ return AsyncOpenAI(api_key=settings.api_key, base_url=settings.hud_gateway_url)
@@ -6,6 +6,7 @@ from typing import Literal
6
6
  from openai import AsyncOpenAI
7
7
 
8
8
  from hud.settings import settings
9
+ from hud.telemetry import instrument
9
10
 
10
11
  logger = logging.getLogger(__name__)
11
12
 
@@ -64,6 +65,11 @@ class ResponseAgent:
64
65
  self.model = model
65
66
  self.system_prompt = system_prompt or DEFAULT_SYSTEM_PROMPT
66
67
 
68
+ @instrument(
69
+ category="agent",
70
+ name="response_agent",
71
+ internal_type="user-message",
72
+ )
67
73
  async def determine_response(self, agent_message: str) -> ResponseType:
68
74
  """
69
75
  Determine whether the agent should stop or continue based on its message.
@@ -86,6 +92,7 @@ class ResponseAgent:
86
92
  ],
87
93
  temperature=0.1,
88
94
  max_tokens=5,
95
+ extra_headers={"Trace-Id": ""},
89
96
  )
90
97
 
91
98
  response_text = response.choices[0].message.content
@@ -0,0 +1,70 @@
1
+ """Model resolution - maps model strings to agent classes."""
2
+
3
+ from __future__ import annotations
4
+
5
+ from typing import TYPE_CHECKING, Any
6
+
7
+ if TYPE_CHECKING:
8
+ from hud.agents.base import MCPAgent
9
+
10
+ __all__ = ["resolve_cls"]
11
+
12
+ _models_cache: list[dict[str, Any]] | None = None
13
+
14
+ # Provider name → AgentType value (only anthropic differs)
15
+ _PROVIDER_TO_AGENT = {"anthropic": "claude"}
16
+
17
+
18
+ def _fetch_gateway_models() -> list[dict[str, Any]]:
19
+ """Fetch available models from HUD gateway (cached)."""
20
+ global _models_cache
21
+ if _models_cache is not None:
22
+ return _models_cache
23
+
24
+ import httpx
25
+
26
+ from hud.settings import settings
27
+
28
+ if not settings.api_key:
29
+ return []
30
+
31
+ try:
32
+ resp = httpx.get(
33
+ f"{settings.hud_gateway_url}/models",
34
+ headers={"Authorization": f"Bearer {settings.api_key}"},
35
+ timeout=10.0,
36
+ )
37
+ resp.raise_for_status()
38
+ data = resp.json()
39
+ _models_cache = data.get("data", data) if isinstance(data, dict) else data
40
+ return _models_cache or []
41
+ except Exception:
42
+ return []
43
+
44
+
45
+ def resolve_cls(model: str) -> tuple[type[MCPAgent], dict[str, Any] | None]:
46
+ """Resolve model string to (agent_class, gateway_info).
47
+
48
+ Returns:
49
+ (agent_class, None) for known AgentTypes
50
+ (agent_class, gateway_model_info) for gateway models
51
+ """
52
+ from hud.types import AgentType
53
+
54
+ # Known AgentType → no gateway info
55
+ try:
56
+ return AgentType(model).cls, None
57
+ except ValueError:
58
+ pass
59
+
60
+ # Gateway lookup
61
+ for m in _fetch_gateway_models():
62
+ if model in (m.get("id"), m.get("name"), m.get("model")):
63
+ provider = (m.get("provider") or "openai_compatible").lower()
64
+ agent_str = _PROVIDER_TO_AGENT.get(provider, provider)
65
+ try:
66
+ return AgentType(agent_str).cls, m
67
+ except ValueError:
68
+ return AgentType.OPENAI_COMPATIBLE.cls, m
69
+
70
+ raise ValueError(f"Model '{model}' not found")
@@ -0,0 +1,192 @@
1
+ """Tests for model resolution and create_agent."""
2
+
3
+ from __future__ import annotations
4
+
5
+ from unittest.mock import MagicMock, patch
6
+
7
+ import pytest
8
+
9
+ from hud.agents import create_agent
10
+ from hud.agents.resolver import resolve_cls
11
+
12
+
13
+ class TestResolveCls:
14
+ """Tests for resolve_cls function."""
15
+
16
+ def test_resolves_known_agent_type(self) -> None:
17
+ """Known AgentType strings resolve to their class."""
18
+ from hud.agents.claude import ClaudeAgent
19
+
20
+ cls, gateway_info = resolve_cls("claude")
21
+ assert cls == ClaudeAgent
22
+ assert gateway_info is None
23
+
24
+ def test_resolves_openai(self) -> None:
25
+ """Resolves 'openai' to OpenAIAgent."""
26
+ from hud.agents import OpenAIAgent
27
+
28
+ cls, _gateway_info = resolve_cls("openai")
29
+ assert cls == OpenAIAgent
30
+
31
+ def test_resolves_gemini(self) -> None:
32
+ """Resolves 'gemini' to GeminiAgent."""
33
+ from hud.agents.gemini import GeminiAgent
34
+
35
+ cls, _gateway_info = resolve_cls("gemini")
36
+ assert cls == GeminiAgent
37
+
38
+ def test_unknown_model_without_gateway_raises(self) -> None:
39
+ """Unknown model with no gateway models raises ValueError."""
40
+ with (
41
+ patch("hud.agents.resolver._fetch_gateway_models", return_value=[]),
42
+ pytest.raises(ValueError, match="not found"),
43
+ ):
44
+ resolve_cls("unknown-model-xyz")
45
+
46
+ def test_resolves_gateway_model(self) -> None:
47
+ """Resolves model found in gateway."""
48
+ from hud.agents import OpenAIAgent
49
+
50
+ mock_models = [
51
+ {"id": "gpt-4o", "model": "gpt-4o", "provider": "openai"},
52
+ ]
53
+
54
+ with patch("hud.agents.resolver._fetch_gateway_models", return_value=mock_models):
55
+ cls, info = resolve_cls("gpt-4o")
56
+ assert cls == OpenAIAgent
57
+ assert info is not None
58
+ assert info["id"] == "gpt-4o"
59
+
60
+ def test_resolves_anthropic_provider_to_claude(self) -> None:
61
+ """Provider 'anthropic' maps to ClaudeAgent."""
62
+ from hud.agents.claude import ClaudeAgent
63
+
64
+ mock_models = [
65
+ {"id": "claude-sonnet", "model": "claude-3-sonnet", "provider": "anthropic"},
66
+ ]
67
+
68
+ with patch("hud.agents.resolver._fetch_gateway_models", return_value=mock_models):
69
+ cls, _info = resolve_cls("claude-sonnet")
70
+ assert cls == ClaudeAgent
71
+
72
+ def test_resolves_unknown_provider_to_openai_compatible(self) -> None:
73
+ """Unknown provider maps to OpenAIChatAgent."""
74
+ from hud.agents.openai_chat import OpenAIChatAgent
75
+
76
+ mock_models = [
77
+ {"id": "custom-model", "model": "custom", "provider": "custom-provider"},
78
+ ]
79
+
80
+ with patch("hud.agents.resolver._fetch_gateway_models", return_value=mock_models):
81
+ cls, _info = resolve_cls("custom-model")
82
+ assert cls == OpenAIChatAgent
83
+
84
+
85
+ class TestCreateAgent:
86
+ """Tests for create_agent function - gateway-only."""
87
+
88
+ def test_creates_with_gateway_client(self) -> None:
89
+ """create_agent always uses gateway routing."""
90
+ from hud.agents import OpenAIAgent
91
+
92
+ mock_models = [
93
+ {"id": "gpt-4o", "model": "gpt-4o", "provider": "openai"},
94
+ ]
95
+
96
+ with (
97
+ patch("hud.agents.resolver._fetch_gateway_models", return_value=mock_models),
98
+ patch.object(OpenAIAgent, "create") as mock_create,
99
+ patch("hud.agents.gateway.build_gateway_client") as mock_build_client,
100
+ ):
101
+ mock_client = MagicMock()
102
+ mock_build_client.return_value = mock_client
103
+ mock_agent = MagicMock()
104
+ mock_create.return_value = mock_agent
105
+
106
+ agent = create_agent("gpt-4o")
107
+
108
+ # Should have set model and model_client
109
+ call_kwargs = mock_create.call_args.kwargs
110
+ assert call_kwargs["model"] == "gpt-4o"
111
+ assert "model_client" in call_kwargs
112
+ assert agent == mock_agent
113
+
114
+ def test_passes_kwargs_to_create(self) -> None:
115
+ """Extra kwargs are passed to agent.create()."""
116
+ from hud.agents import OpenAIAgent
117
+
118
+ mock_models = [
119
+ {"id": "gpt-4o", "model": "gpt-4o", "provider": "openai"},
120
+ ]
121
+
122
+ with (
123
+ patch("hud.agents.resolver._fetch_gateway_models", return_value=mock_models),
124
+ patch.object(OpenAIAgent, "create") as mock_create,
125
+ patch("hud.agents.gateway.build_gateway_client"),
126
+ ):
127
+ mock_create.return_value = MagicMock()
128
+
129
+ create_agent("gpt-4o", temperature=0.5, max_tokens=1000)
130
+
131
+ call_kwargs = mock_create.call_args.kwargs
132
+ assert call_kwargs["temperature"] == 0.5
133
+ assert call_kwargs["max_tokens"] == 1000
134
+
135
+ def test_known_agent_type_also_uses_gateway(self) -> None:
136
+ """Even 'claude' string uses gateway (it's a gateway shortcut)."""
137
+ from hud.agents.claude import ClaudeAgent
138
+
139
+ with (
140
+ patch.object(ClaudeAgent, "create") as mock_create,
141
+ patch("hud.agents.gateway.build_gateway_client") as mock_build_client,
142
+ ):
143
+ mock_client = MagicMock()
144
+ mock_build_client.return_value = mock_client
145
+ mock_create.return_value = MagicMock()
146
+
147
+ create_agent("claude")
148
+
149
+ # Should still build gateway client
150
+ mock_build_client.assert_called_once()
151
+ call_kwargs = mock_create.call_args.kwargs
152
+ assert "model_client" in call_kwargs
153
+
154
+
155
+ class TestBuildGatewayClient:
156
+ """Tests for build_gateway_client function."""
157
+
158
+ def test_builds_anthropic_client(self) -> None:
159
+ """Builds AsyncAnthropic for anthropic provider."""
160
+ from hud.agents.gateway import build_gateway_client
161
+
162
+ with patch("hud.settings.settings") as mock_settings:
163
+ mock_settings.api_key = "test-key"
164
+ mock_settings.hud_gateway_url = "https://gateway.hud.ai"
165
+
166
+ with patch("anthropic.AsyncAnthropic") as mock_client_cls:
167
+ build_gateway_client("anthropic")
168
+ mock_client_cls.assert_called_once()
169
+
170
+ def test_builds_openai_client_for_openai(self) -> None:
171
+ """Builds AsyncOpenAI for openai provider."""
172
+ from hud.agents.gateway import build_gateway_client
173
+
174
+ with patch("hud.settings.settings") as mock_settings:
175
+ mock_settings.api_key = "test-key"
176
+ mock_settings.hud_gateway_url = "https://gateway.hud.ai"
177
+
178
+ with patch("openai.AsyncOpenAI") as mock_client_cls:
179
+ build_gateway_client("openai")
180
+ mock_client_cls.assert_called_once()
181
+
182
+ def test_builds_openai_client_for_unknown(self) -> None:
183
+ """Builds AsyncOpenAI for unknown providers (openai-compatible)."""
184
+ from hud.agents.gateway import build_gateway_client
185
+
186
+ with patch("hud.settings.settings") as mock_settings:
187
+ mock_settings.api_key = "test-key"
188
+ mock_settings.hud_gateway_url = "https://gateway.hud.ai"
189
+
190
+ with patch("openai.AsyncOpenAI") as mock_client_cls:
191
+ build_gateway_client("together")
192
+ mock_client_cls.assert_called_once()
@@ -338,47 +338,27 @@ class EvalConfig(BaseModel):
338
338
 
339
339
  # Configure gateway mode - route LLM API calls through HUD gateway
340
340
  if self.gateway:
341
- hud_api_key = settings.api_key
342
- if not hud_api_key:
341
+ if not settings.api_key:
343
342
  raise typer.Exit(1) # Already validated in validate_api_keys()
344
343
 
345
- if self.agent_type == AgentType.CLAUDE:
346
- from anthropic import AsyncAnthropic
347
-
348
- kwargs["model_client"] = AsyncAnthropic(
349
- api_key=hud_api_key,
350
- base_url=settings.hud_gateway_url,
351
- )
352
- hud_console.info("🌐 Using HUD Gateway for Claude API")
353
- elif self.agent_type in (AgentType.OPENAI, AgentType.OPERATOR):
354
- from openai import AsyncOpenAI
344
+ from hud.agents.gateway import build_gateway_client
355
345
 
356
- kwargs["model_client"] = AsyncOpenAI(
357
- api_key=hud_api_key,
358
- base_url=settings.hud_gateway_url,
359
- )
360
- hud_console.info("🌐 Using HUD Gateway for OpenAI API")
361
- elif self.agent_type == AgentType.OPENAI_COMPATIBLE:
362
- from openai import AsyncOpenAI
346
+ # Map AgentType to provider
347
+ agent_to_provider = {
348
+ AgentType.CLAUDE: "anthropic",
349
+ AgentType.OPENAI: "openai",
350
+ AgentType.OPERATOR: "openai",
351
+ AgentType.GEMINI: "gemini",
352
+ AgentType.GEMINI_CUA: "gemini",
353
+ AgentType.OPENAI_COMPATIBLE: "openai",
354
+ }
355
+ provider = agent_to_provider.get(self.agent_type, "openai")
356
+ client = build_gateway_client(provider)
363
357
 
364
- kwargs["openai_client"] = AsyncOpenAI(
365
- api_key=hud_api_key,
366
- base_url=settings.hud_gateway_url,
367
- )
368
- hud_console.info("🌐 Using HUD Gateway for OpenAI-compatible API")
369
- elif self.agent_type in (AgentType.GEMINI, AgentType.GEMINI_CUA):
370
- from google import genai
371
- from google.genai.types import HttpOptions
372
-
373
- kwargs["model_client"] = genai.Client(
374
- api_key="PLACEHOLDER",
375
- http_options=HttpOptions(
376
- api_version="v1beta",
377
- base_url=settings.hud_gateway_url,
378
- headers={"Authorization": f"Bearer {hud_api_key}"},
379
- ),
380
- )
381
- hud_console.info("🌐 Using HUD Gateway for Gemini API")
358
+ # OpenAI-compatible uses openai_client key
359
+ is_oai_compat = self.agent_type == AgentType.OPENAI_COMPATIBLE
360
+ kwargs["openai_client" if is_oai_compat else "model_client"] = client
361
+ hud_console.info(f"🌐 Using HUD Gateway for {provider} API")
382
362
 
383
363
  return kwargs
384
364
 
@@ -102,7 +102,7 @@ def smart_init(
102
102
  hud_console.info(" hud set HUD_API_KEY=your-key-here")
103
103
  hud_console.info(" Or: export HUD_API_KEY=your-key")
104
104
  hud_console.info("")
105
- hud_console.info("Get your key at: https://hud.ai/settings/api-keys")
105
+ hud_console.info("Get your key at: https://hud.ai/project/api-keys")
106
106
  return
107
107
 
108
108
  target = Path(directory).resolve()
@@ -63,7 +63,7 @@ def fetch_lock_from_registry(reference: str) -> dict | None:
63
63
 
64
64
  # URL-encode the path segments to handle special characters in tags
65
65
  url_safe_path = "/".join(quote(part, safe="") for part in reference.split("/"))
66
- registry_url = f"{settings.hud_telemetry_url.rstrip('/')}/registry/envs/{url_safe_path}"
66
+ registry_url = f"{settings.hud_api_url.rstrip('/')}/registry/envs/{url_safe_path}"
67
67
 
68
68
  headers = {}
69
69
  if settings.api_key:
@@ -420,13 +420,20 @@ def push_environment(
420
420
 
421
421
  # URL-encode the path segments to handle special characters in tags
422
422
  url_safe_path = "/".join(quote(part, safe="") for part in name_with_tag.split("/"))
423
- registry_url = f"{settings.hud_telemetry_url.rstrip('/')}/registry/envs/{url_safe_path}"
423
+ registry_url = f"{settings.hud_api_url.rstrip('/')}/registry/envs/{url_safe_path}"
424
+
425
+ # Detect git remote URL for matching existing GitHub-connected registries
426
+ from hud.cli.utils.git import get_git_remote_url
427
+
428
+ github_url = get_git_remote_url(Path(directory))
424
429
 
425
430
  # Prepare the payload
426
- payload = {
431
+ payload: dict[str, str | None] = {
427
432
  "lock": yaml.dump(lock_data, default_flow_style=False, sort_keys=False),
428
433
  "digest": pushed_digest.split("@")[-1] if "@" in pushed_digest else None,
429
434
  }
435
+ if github_url:
436
+ payload["github_url"] = github_url
430
437
 
431
438
  headers = {"Authorization": f"Bearer {settings.api_key}"}
432
439
 
@@ -160,7 +160,7 @@ class TestPushEnvironment:
160
160
  mock_hud_console = mock.Mock()
161
161
  mock_hud_console_class.return_value = mock_hud_console
162
162
  mock_settings.api_key = "test-key"
163
- mock_settings.hud_telemetry_url = "https://api.hud.test"
163
+ mock_settings.hud_api_url = "https://api.hud.test"
164
164
  mock_get_username.return_value = "testuser"
165
165
 
166
166
  # Create lock file
@@ -32,7 +32,7 @@ def fetch_lock_from_registry(reference: str) -> dict | None:
32
32
 
33
33
  # URL-encode the path segments to handle special characters in tags
34
34
  url_safe_path = "/".join(quote(part, safe="") for part in reference.split("/"))
35
- registry_url = f"{settings.hud_telemetry_url.rstrip('/')}/registry/envs/{url_safe_path}"
35
+ registry_url = f"{settings.hud_api_url.rstrip('/')}/registry/envs/{url_safe_path}"
36
36
 
37
37
  headers = {}
38
38
  if settings.api_key:
@@ -18,7 +18,7 @@ if TYPE_CHECKING:
18
18
  @patch("hud.cli.utils.metadata.settings")
19
19
  @patch("requests.get")
20
20
  def test_fetch_lock_from_registry_success(mock_get, mock_settings):
21
- mock_settings.hud_telemetry_url = "https://api.example.com"
21
+ mock_settings.hud_api_url = "https://api.example.com"
22
22
  mock_settings.api_key = None
23
23
  resp = MagicMock(status_code=200)
24
24
  resp.json.return_value = {"lock": "image: img\n"}
@@ -63,7 +63,8 @@ def _load_from_file(path: Path) -> list[Task]:
63
63
  from hud.eval.task import Task
64
64
 
65
65
  raw_items = _load_raw_from_file(path)
66
- return [Task(**item) for item in raw_items]
66
+ # Default args to {} for runnable tasks (None = template)
67
+ return [Task(**{**item, "args": item.get("args") or {}}) for item in raw_items]
67
68
 
68
69
 
69
70
  def _load_raw_from_huggingface(dataset_name: str) -> list[dict[str, Any]]:
@@ -99,7 +100,8 @@ def _load_from_huggingface(dataset_name: str) -> list[Task]:
99
100
  raw_items = _load_raw_from_huggingface(dataset_name)
100
101
  from hud.eval.task import Task
101
102
 
102
- return [Task(**item) for item in raw_items]
103
+ # Default args to {} for runnable tasks (None = template)
104
+ return [Task(**{**item, "args": item.get("args") or {}}) for item in raw_items]
103
105
 
104
106
 
105
107
  def _load_raw_from_api(dataset_name: str) -> list[dict[str, Any]]:
@@ -138,7 +140,8 @@ def _load_from_api(dataset_name: str) -> list[Task]:
138
140
  from hud.eval.task import Task
139
141
 
140
142
  raw_items = _load_raw_from_api(dataset_name)
141
- return [Task(**item) for item in raw_items]
143
+ # Default args to {} for runnable tasks (None = template)
144
+ return [Task(**{**item, "args": item.get("args") or {}}) for item in raw_items]
142
145
 
143
146
 
144
147
  @overload
@@ -234,15 +237,15 @@ def save_tasks(
234
237
  ) -> str:
235
238
  """Save tasks to the HUD API.
236
239
 
237
- Creates or updates an evalset with the given tasks.
240
+ Creates or updates a taskset with the given tasks.
238
241
 
239
242
  Args:
240
- name: Evalset name/slug (e.g., "my-evals/benchmark-v1").
243
+ name: Taskset name/slug (e.g., "my-evals/benchmark-v1").
241
244
  If no org prefix, uses user's default org.
242
245
  tasks: List of Task objects (v5 format) to save.
243
246
 
244
247
  Returns:
245
- The evalset ID of the created/updated evalset.
248
+ The taskset ID of the created/updated taskset.
246
249
 
247
250
  Example:
248
251
  ```python
@@ -258,7 +261,7 @@ def save_tasks(
258
261
  ]
259
262
 
260
263
  # Save to HUD API
261
- evalset_id = save_tasks("my-evals/benchmark-v1", tasks)
264
+ taskset_id = save_tasks("my-evals/benchmark-v1", tasks)
262
265
 
263
266
  # Later, load them back
264
267
  loaded = load_tasks("my-evals/benchmark-v1")
@@ -303,9 +306,9 @@ def save_tasks(
303
306
  )
304
307
  response.raise_for_status()
305
308
  data = response.json()
306
- evalset_id = data.get("evalset_id") or data.get("id") or name
307
- logger.info("Saved %d tasks to evalset: %s", len(tasks), evalset_id)
308
- return evalset_id
309
+ taskset_id = data.get("evalset_id") or data.get("id") or name
310
+ logger.info("Saved %d tasks to taskset: %s", len(tasks), taskset_id)
311
+ return taskset_id
309
312
  except httpx.HTTPStatusError as e:
310
313
  raise ValueError(f"Failed to save tasks: {e.response.text}") from e
311
314
  except Exception as e: