hud-python 0.4.74__tar.gz → 0.5.1__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (405) hide show
  1. hud_python-0.5.1/PKG-INFO +264 -0
  2. hud_python-0.5.1/README.md +166 -0
  3. hud_python-0.5.1/examples/README.md +127 -0
  4. hud_python-0.5.1/hud/__init__.py +50 -0
  5. hud_python-0.5.1/hud/agents/__init__.py +19 -0
  6. hud_python-0.5.1/hud/agents/base.py +589 -0
  7. {hud_python-0.4.74 → hud_python-0.5.1}/hud/agents/claude.py +9 -23
  8. {hud_python-0.4.74 → hud_python-0.5.1}/hud/agents/gemini.py +9 -23
  9. {hud_python-0.4.74 → hud_python-0.5.1}/hud/agents/gemini_cua.py +8 -5
  10. {hud_python-0.4.74 → hud_python-0.5.1}/hud/agents/grounded_openai.py +10 -21
  11. hud_python-0.5.1/hud/agents/misc/integration_test_agent.py +87 -0
  12. {hud_python-0.4.74 → hud_python-0.5.1}/hud/agents/misc/response_agent.py +5 -1
  13. {hud_python-0.4.74 → hud_python-0.5.1}/hud/agents/openai.py +9 -17
  14. {hud_python-0.4.74 → hud_python-0.5.1}/hud/agents/openai_chat.py +28 -14
  15. {hud_python-0.4.74 → hud_python-0.5.1}/hud/agents/operator.py +6 -6
  16. hud_python-0.5.1/hud/agents/tests/conftest.py +133 -0
  17. hud_python-0.5.1/hud/agents/tests/test_base.py +416 -0
  18. hud_python-0.5.1/hud/agents/tests/test_base_runtime.py +233 -0
  19. hud_python-0.5.1/hud/agents/tests/test_claude.py +518 -0
  20. {hud_python-0.4.74 → hud_python-0.5.1}/hud/agents/tests/test_client.py +9 -10
  21. hud_python-0.5.1/hud/agents/tests/test_gemini.py +369 -0
  22. {hud_python-0.4.74 → hud_python-0.5.1}/hud/agents/tests/test_grounded_openai_agent.py +17 -6
  23. hud_python-0.5.1/hud/agents/tests/test_openai.py +449 -0
  24. {hud_python-0.4.74 → hud_python-0.5.1}/hud/agents/tests/test_operator.py +105 -51
  25. hud_python-0.5.1/hud/agents/tests/test_run_eval.py +179 -0
  26. {hud_python-0.4.74 → hud_python-0.5.1}/hud/cli/__init__.py +111 -128
  27. {hud_python-0.4.74 → hud_python-0.5.1}/hud/cli/analyze.py +43 -5
  28. {hud_python-0.4.74 → hud_python-0.5.1}/hud/cli/build.py +74 -12
  29. {hud_python-0.4.74 → hud_python-0.5.1}/hud/cli/debug.py +8 -5
  30. {hud_python-0.4.74 → hud_python-0.5.1}/hud/cli/dev.py +84 -33
  31. {hud_python-0.4.74 → hud_python-0.5.1}/hud/cli/eval.py +182 -136
  32. {hud_python-0.4.74 → hud_python-0.5.1}/hud/cli/flows/dev.py +2 -2
  33. hud_python-0.5.1/hud/cli/flows/init.py +191 -0
  34. {hud_python-0.4.74 → hud_python-0.5.1}/hud/cli/flows/tasks.py +25 -27
  35. hud_python-0.5.1/hud/cli/flows/templates.py +151 -0
  36. {hud_python-0.4.74 → hud_python-0.5.1}/hud/cli/init.py +4 -0
  37. {hud_python-0.4.74 → hud_python-0.5.1}/hud/cli/rft.py +4 -8
  38. {hud_python-0.4.74 → hud_python-0.5.1}/hud/cli/rft_status.py +1 -1
  39. {hud_python-0.4.74 → hud_python-0.5.1}/hud/cli/tests/test_analyze.py +5 -5
  40. {hud_python-0.4.74 → hud_python-0.5.1}/hud/cli/tests/test_analyze_module.py +4 -4
  41. {hud_python-0.4.74 → hud_python-0.5.1}/hud/cli/tests/test_build.py +7 -3
  42. {hud_python-0.4.74 → hud_python-0.5.1}/hud/cli/tests/test_cli_root.py +13 -7
  43. {hud_python-0.4.74 → hud_python-0.5.1}/hud/cli/tests/test_convert.py +19 -25
  44. {hud_python-0.4.74 → hud_python-0.5.1}/hud/cli/tests/test_debug.py +12 -10
  45. {hud_python-0.4.74 → hud_python-0.5.1}/hud/cli/tests/test_dev.py +36 -2
  46. hud_python-0.5.1/hud/cli/tests/test_eval.py +251 -0
  47. {hud_python-0.4.74 → hud_python-0.5.1}/hud/cli/tests/test_registry.py +1 -1
  48. {hud_python-0.4.74/hud/cli/rl → hud_python-0.5.1/hud/cli/utils}/celebrate.py +14 -12
  49. {hud_python-0.4.74 → hud_python-0.5.1}/hud/cli/utils/config.py +18 -1
  50. {hud_python-0.4.74 → hud_python-0.5.1}/hud/cli/utils/docker.py +8 -4
  51. {hud_python-0.4.74 → hud_python-0.5.1}/hud/cli/utils/interactive.py +7 -3
  52. {hud_python-0.4.74 → hud_python-0.5.1}/hud/cli/utils/metadata.py +69 -0
  53. {hud_python-0.4.74 → hud_python-0.5.1}/hud/cli/utils/server.py +2 -2
  54. {hud_python-0.4.74 → hud_python-0.5.1}/hud/cli/utils/source_hash.py +2 -2
  55. {hud_python-0.4.74 → hud_python-0.5.1}/hud/cli/utils/tests/test_env_check.py +1 -1
  56. {hud_python-0.4.74 → hud_python-0.5.1}/hud/cli/utils/tests/test_interactive_module.py +1 -1
  57. {hud_python-0.4.74/hud/cli/rl → hud_python-0.5.1/hud/cli/utils}/viewer.py +2 -2
  58. {hud_python-0.4.74 → hud_python-0.5.1}/hud/clients/__init__.py +4 -3
  59. {hud_python-0.4.74 → hud_python-0.5.1}/hud/clients/base.py +133 -19
  60. hud_python-0.5.1/hud/clients/environment.py +51 -0
  61. {hud_python-0.4.74 → hud_python-0.5.1}/hud/clients/fastmcp.py +7 -1
  62. {hud_python-0.4.74 → hud_python-0.5.1}/hud/clients/mcp_use.py +29 -11
  63. hud_python-0.5.1/hud/clients/tests/test_analyze_scenarios.py +206 -0
  64. hud_python-0.5.1/hud/datasets/__init__.py +36 -0
  65. hud_python-0.5.1/hud/datasets/loader.py +327 -0
  66. hud_python-0.5.1/hud/datasets/runner.py +213 -0
  67. hud_python-0.5.1/hud/datasets/tests/test_loader.py +221 -0
  68. {hud_python-0.4.74 → hud_python-0.5.1}/hud/datasets/tests/test_utils.py +61 -65
  69. hud_python-0.5.1/hud/datasets/utils.py +298 -0
  70. hud_python-0.5.1/hud/environment/__init__.py +50 -0
  71. hud_python-0.5.1/hud/environment/connection.py +206 -0
  72. hud_python-0.5.1/hud/environment/connectors/__init__.py +33 -0
  73. hud_python-0.5.1/hud/environment/connectors/base.py +68 -0
  74. hud_python-0.5.1/hud/environment/connectors/local.py +177 -0
  75. hud_python-0.5.1/hud/environment/connectors/mcp_config.py +109 -0
  76. hud_python-0.5.1/hud/environment/connectors/openai.py +101 -0
  77. hud_python-0.5.1/hud/environment/connectors/remote.py +172 -0
  78. hud_python-0.5.1/hud/environment/environment.py +694 -0
  79. hud_python-0.5.1/hud/environment/integrations/__init__.py +45 -0
  80. hud_python-0.5.1/hud/environment/integrations/adk.py +67 -0
  81. hud_python-0.5.1/hud/environment/integrations/anthropic.py +196 -0
  82. hud_python-0.5.1/hud/environment/integrations/gemini.py +92 -0
  83. hud_python-0.5.1/hud/environment/integrations/langchain.py +82 -0
  84. hud_python-0.5.1/hud/environment/integrations/llamaindex.py +68 -0
  85. hud_python-0.5.1/hud/environment/integrations/openai.py +238 -0
  86. hud_python-0.5.1/hud/environment/mock.py +306 -0
  87. hud_python-0.5.1/hud/environment/router.py +112 -0
  88. hud_python-0.5.1/hud/environment/scenarios.py +493 -0
  89. hud_python-0.5.1/hud/environment/tests/__init__.py +1 -0
  90. hud_python-0.5.1/hud/environment/tests/test_connection.py +317 -0
  91. hud_python-0.5.1/hud/environment/tests/test_connectors.py +218 -0
  92. hud_python-0.5.1/hud/environment/tests/test_environment.py +161 -0
  93. hud_python-0.5.1/hud/environment/tests/test_integrations.py +257 -0
  94. hud_python-0.5.1/hud/environment/tests/test_local_connectors.py +201 -0
  95. hud_python-0.5.1/hud/environment/tests/test_scenarios.py +280 -0
  96. hud_python-0.5.1/hud/environment/tests/test_tools.py +208 -0
  97. hud_python-0.5.1/hud/environment/types.py +23 -0
  98. hud_python-0.5.1/hud/environment/utils/__init__.py +35 -0
  99. hud_python-0.5.1/hud/environment/utils/formats.py +215 -0
  100. hud_python-0.5.1/hud/environment/utils/schema.py +171 -0
  101. hud_python-0.5.1/hud/environment/utils/tool_wrappers.py +113 -0
  102. hud_python-0.5.1/hud/eval/__init__.py +67 -0
  103. hud_python-0.5.1/hud/eval/context.py +674 -0
  104. hud_python-0.5.1/hud/eval/display.py +299 -0
  105. hud_python-0.5.1/hud/eval/instrument.py +185 -0
  106. hud_python-0.5.1/hud/eval/manager.py +466 -0
  107. hud_python-0.5.1/hud/eval/parallel.py +268 -0
  108. hud_python-0.5.1/hud/eval/task.py +340 -0
  109. hud_python-0.5.1/hud/eval/tests/__init__.py +1 -0
  110. hud_python-0.5.1/hud/eval/tests/test_context.py +178 -0
  111. hud_python-0.5.1/hud/eval/tests/test_eval.py +210 -0
  112. hud_python-0.5.1/hud/eval/tests/test_manager.py +152 -0
  113. hud_python-0.5.1/hud/eval/tests/test_parallel.py +168 -0
  114. hud_python-0.5.1/hud/eval/tests/test_task.py +145 -0
  115. hud_python-0.5.1/hud/eval/types.py +63 -0
  116. hud_python-0.5.1/hud/eval/utils.py +183 -0
  117. hud_python-0.5.1/hud/patches/__init__.py +19 -0
  118. hud_python-0.5.1/hud/patches/mcp_patches.py +151 -0
  119. hud_python-0.5.1/hud/patches/warnings.py +54 -0
  120. {hud_python-0.4.74 → hud_python-0.5.1}/hud/samples/browser.py +4 -4
  121. {hud_python-0.4.74 → hud_python-0.5.1}/hud/server/low_level.py +2 -1
  122. {hud_python-0.4.74 → hud_python-0.5.1}/hud/server/router.py +6 -2
  123. {hud_python-0.4.74 → hud_python-0.5.1}/hud/server/server.py +10 -9
  124. {hud_python-0.4.74 → hud_python-0.5.1}/hud/server/tests/test_mcp_server_integration.py +11 -11
  125. {hud_python-0.4.74 → hud_python-0.5.1}/hud/server/tests/test_mcp_server_more.py +1 -1
  126. hud_python-0.5.1/hud/telemetry/__init__.py +27 -0
  127. hud_python-0.5.1/hud/telemetry/exporter.py +201 -0
  128. hud_python-0.5.1/hud/telemetry/instrument.py +284 -0
  129. hud_python-0.5.1/hud/telemetry/tests/test_eval_telemetry.py +356 -0
  130. hud_python-0.5.1/hud/telemetry/tests/test_exporter.py +258 -0
  131. {hud_python-0.4.74 → hud_python-0.5.1}/hud/telemetry/tests/test_instrument.py +8 -21
  132. {hud_python-0.4.74 → hud_python-0.5.1}/hud/tools/__init__.py +3 -0
  133. {hud_python-0.4.74 → hud_python-0.5.1}/hud/tools/base.py +6 -2
  134. {hud_python-0.4.74 → hud_python-0.5.1}/hud/tools/computer/anthropic.py +2 -2
  135. {hud_python-0.4.74 → hud_python-0.5.1}/hud/tools/computer/hud.py +6 -5
  136. {hud_python-0.4.74 → hud_python-0.5.1}/hud/tools/computer/openai.py +2 -11
  137. {hud_python-0.4.74 → hud_python-0.5.1}/hud/tools/computer/qwen.py +1 -1
  138. {hud_python-0.4.74 → hud_python-0.5.1}/hud/tools/grounding/grounded_tool.py +13 -18
  139. {hud_python-0.4.74 → hud_python-0.5.1}/hud/tools/grounding/grounder.py +10 -31
  140. {hud_python-0.4.74 → hud_python-0.5.1}/hud/tools/grounding/tests/test_grounded_tool.py +26 -44
  141. {hud_python-0.4.74 → hud_python-0.5.1}/hud/tools/jupyter.py +44 -27
  142. {hud_python-0.4.74 → hud_python-0.5.1}/hud/tools/shell.py +13 -6
  143. {hud_python-0.4.74 → hud_python-0.5.1}/hud/tools/tests/test_apply_patch.py +3 -1
  144. {hud_python-0.4.74 → hud_python-0.5.1}/hud/tools/tests/test_computer.py +4 -1
  145. {hud_python-0.4.74 → hud_python-0.5.1}/hud/tools/tests/test_computer_actions.py +2 -1
  146. {hud_python-0.4.74 → hud_python-0.5.1}/hud/tools/tests/test_jupyter_tool.py +13 -8
  147. {hud_python-0.4.74 → hud_python-0.5.1}/hud/tools/types.py +12 -0
  148. {hud_python-0.4.74 → hud_python-0.5.1}/hud/types.py +89 -24
  149. {hud_python-0.4.74 → hud_python-0.5.1}/hud/utils/hud_console.py +7 -3
  150. hud_python-0.5.1/hud/utils/mcp.py +47 -0
  151. {hud_python-0.4.74 → hud_python-0.5.1}/hud/utils/tests/test_mcp.py +1 -26
  152. {hud_python-0.4.74 → hud_python-0.5.1}/hud/utils/tests/test_version.py +1 -1
  153. {hud_python-0.4.74 → hud_python-0.5.1}/hud/version.py +1 -1
  154. {hud_python-0.4.74 → hud_python-0.5.1}/pyproject.toml +30 -41
  155. hud_python-0.4.74/PKG-INFO +0 -535
  156. hud_python-0.4.74/README.md +0 -415
  157. hud_python-0.4.74/environments/README.md +0 -956
  158. hud_python-0.4.74/environments/blank/README.md +0 -128
  159. hud_python-0.4.74/environments/blank/environment/README.md +0 -16
  160. hud_python-0.4.74/environments/blank/environment/pyproject.toml +0 -16
  161. hud_python-0.4.74/environments/blank/server/README.md +0 -21
  162. hud_python-0.4.74/environments/blank/server/pyproject.toml +0 -19
  163. hud_python-0.4.74/environments/browser/README.md +0 -191
  164. hud_python-0.4.74/environments/browser/browser-base/README.md +0 -58
  165. hud_python-0.4.74/environments/browser/environment/2048/README.md +0 -103
  166. hud_python-0.4.74/environments/browser/environment/2048/backend/pyproject.toml +0 -9
  167. hud_python-0.4.74/environments/browser/environment/README.md +0 -135
  168. hud_python-0.4.74/environments/browser/environment/pyproject.toml +0 -23
  169. hud_python-0.4.74/environments/browser/environment/todo/README.md +0 -85
  170. hud_python-0.4.74/environments/browser/environment/todo/backend/pyproject.toml +0 -15
  171. hud_python-0.4.74/environments/browser/pyproject.toml +0 -22
  172. hud_python-0.4.74/environments/browser/server/pyproject.toml +0 -21
  173. hud_python-0.4.74/environments/deepresearch/README.md +0 -165
  174. hud_python-0.4.74/environments/deepresearch/environment/pyproject.toml +0 -17
  175. hud_python-0.4.74/environments/deepresearch/pyproject.toml +0 -19
  176. hud_python-0.4.74/environments/deepresearch/server/pyproject.toml +0 -19
  177. hud_python-0.4.74/environments/jupyter/README.md +0 -68
  178. hud_python-0.4.74/environments/jupyter/server/pyproject.toml +0 -34
  179. hud_python-0.4.74/environments/online_mind2web/README.md +0 -36
  180. hud_python-0.4.74/environments/online_mind2web/pyproject.toml +0 -22
  181. hud_python-0.4.74/environments/online_mind2web/src/hud_controller/providers/README.md +0 -110
  182. hud_python-0.4.74/environments/remote_browser/README.md +0 -225
  183. hud_python-0.4.74/environments/remote_browser/pyproject.toml +0 -22
  184. hud_python-0.4.74/environments/remote_browser/src/hud_controller/providers/README.md +0 -110
  185. hud_python-0.4.74/environments/rubrics/README.md +0 -239
  186. hud_python-0.4.74/environments/rubrics/environment/pyproject.toml +0 -19
  187. hud_python-0.4.74/environments/rubrics/pyproject.toml +0 -19
  188. hud_python-0.4.74/environments/rubrics/server/pyproject.toml +0 -19
  189. hud_python-0.4.74/environments/text_2048/README.md +0 -102
  190. hud_python-0.4.74/environments/text_2048/pyproject.toml +0 -22
  191. hud_python-0.4.74/examples/README.md +0 -74
  192. hud_python-0.4.74/hud/__init__.py +0 -42
  193. hud_python-0.4.74/hud/agents/__init__.py +0 -19
  194. hud_python-0.4.74/hud/agents/base.py +0 -840
  195. hud_python-0.4.74/hud/agents/misc/integration_test_agent.py +0 -66
  196. hud_python-0.4.74/hud/agents/tests/conftest.py +0 -124
  197. hud_python-0.4.74/hud/agents/tests/test_base.py +0 -737
  198. hud_python-0.4.74/hud/agents/tests/test_base_runtime.py +0 -177
  199. hud_python-0.4.74/hud/agents/tests/test_claude.py +0 -523
  200. hud_python-0.4.74/hud/agents/tests/test_gemini.py +0 -600
  201. hud_python-0.4.74/hud/agents/tests/test_openai.py +0 -1083
  202. hud_python-0.4.74/hud/agents/utils.py +0 -50
  203. hud_python-0.4.74/hud/cli/rl/__init__.py +0 -180
  204. hud_python-0.4.74/hud/cli/rl/config.py +0 -101
  205. hud_python-0.4.74/hud/cli/rl/display.py +0 -133
  206. hud_python-0.4.74/hud/cli/rl/gpu.py +0 -63
  207. hud_python-0.4.74/hud/cli/rl/gpu_utils.py +0 -321
  208. hud_python-0.4.74/hud/cli/rl/local_runner.py +0 -607
  209. hud_python-0.4.74/hud/cli/rl/presets.py +0 -96
  210. hud_python-0.4.74/hud/cli/rl/remote_runner.py +0 -463
  211. hud_python-0.4.74/hud/cli/rl/rl_api.py +0 -150
  212. hud_python-0.4.74/hud/cli/rl/vllm.py +0 -179
  213. hud_python-0.4.74/hud/cli/rl/wait_utils.py +0 -89
  214. hud_python-0.4.74/hud/cli/tests/test_eval.py +0 -539
  215. hud_python-0.4.74/hud/datasets/__init__.py +0 -33
  216. hud_python-0.4.74/hud/datasets/runner.py +0 -298
  217. hud_python-0.4.74/hud/datasets/tests/test_runner.py +0 -67
  218. hud_python-0.4.74/hud/datasets/utils.py +0 -441
  219. hud_python-0.4.74/hud/misc/__init__.py +0 -1
  220. hud_python-0.4.74/hud/misc/claude_plays_pokemon.py +0 -292
  221. hud_python-0.4.74/hud/otel/__init__.py +0 -35
  222. hud_python-0.4.74/hud/otel/collector.py +0 -142
  223. hud_python-0.4.74/hud/otel/config.py +0 -183
  224. hud_python-0.4.74/hud/otel/context.py +0 -572
  225. hud_python-0.4.74/hud/otel/exporters.py +0 -543
  226. hud_python-0.4.74/hud/otel/instrumentation.py +0 -135
  227. hud_python-0.4.74/hud/otel/processors.py +0 -121
  228. hud_python-0.4.74/hud/otel/tests/test_instrumentation.py +0 -207
  229. hud_python-0.4.74/hud/otel/tests/test_processors.py +0 -197
  230. hud_python-0.4.74/hud/rl/README.md +0 -30
  231. hud_python-0.4.74/hud/rl/__init__.py +0 -1
  232. hud_python-0.4.74/hud/rl/actor.py +0 -178
  233. hud_python-0.4.74/hud/rl/buffer.py +0 -405
  234. hud_python-0.4.74/hud/rl/chat_template.jinja +0 -101
  235. hud_python-0.4.74/hud/rl/config.py +0 -193
  236. hud_python-0.4.74/hud/rl/distributed.py +0 -132
  237. hud_python-0.4.74/hud/rl/learner.py +0 -648
  238. hud_python-0.4.74/hud/rl/tests/__init__.py +0 -1
  239. hud_python-0.4.74/hud/rl/tests/test_learner.py +0 -186
  240. hud_python-0.4.74/hud/rl/train.py +0 -394
  241. hud_python-0.4.74/hud/rl/types.py +0 -101
  242. hud_python-0.4.74/hud/rl/utils/start_vllm_server.sh +0 -30
  243. hud_python-0.4.74/hud/rl/utils.py +0 -524
  244. hud_python-0.4.74/hud/rl/vllm_adapter.py +0 -143
  245. hud_python-0.4.74/hud/telemetry/__init__.py +0 -47
  246. hud_python-0.4.74/hud/telemetry/async_context.py +0 -345
  247. hud_python-0.4.74/hud/telemetry/instrument.py +0 -379
  248. hud_python-0.4.74/hud/telemetry/job.py +0 -355
  249. hud_python-0.4.74/hud/telemetry/replay.py +0 -74
  250. hud_python-0.4.74/hud/telemetry/tests/test_async_context.py +0 -515
  251. hud_python-0.4.74/hud/telemetry/tests/test_job.py +0 -555
  252. hud_python-0.4.74/hud/telemetry/tests/test_replay.py +0 -40
  253. hud_python-0.4.74/hud/telemetry/tests/test_trace.py +0 -241
  254. hud_python-0.4.74/hud/telemetry/trace.py +0 -166
  255. hud_python-0.4.74/hud/telemetry/utils.py +0 -42
  256. hud_python-0.4.74/hud/utils/mcp.py +0 -97
  257. hud_python-0.4.74/hud/utils/tasks.py +0 -186
  258. hud_python-0.4.74/hud/utils/tests/__init__.py +0 -0
  259. hud_python-0.4.74/hud/utils/tests/test_tasks.py +0 -356
  260. {hud_python-0.4.74 → hud_python-0.5.1}/.gitignore +0 -0
  261. {hud_python-0.4.74 → hud_python-0.5.1}/LICENSE +0 -0
  262. {hud_python-0.4.74 → hud_python-0.5.1}/hud/__main__.py +0 -0
  263. {hud_python-0.4.74 → hud_python-0.5.1}/hud/agents/misc/__init__.py +0 -0
  264. {hud_python-0.4.74 → hud_python-0.5.1}/hud/agents/tests/__init__.py +0 -0
  265. {hud_python-0.4.74 → hud_python-0.5.1}/hud/cli/__main__.py +0 -0
  266. {hud_python-0.4.74 → hud_python-0.5.1}/hud/cli/clone.py +0 -0
  267. {hud_python-0.4.74 → hud_python-0.5.1}/hud/cli/flows/__init__.py +0 -0
  268. {hud_python-0.4.74 → hud_python-0.5.1}/hud/cli/flows/tests/__init__.py +0 -0
  269. {hud_python-0.4.74 → hud_python-0.5.1}/hud/cli/flows/tests/test_dev.py +0 -0
  270. {hud_python-0.4.74 → hud_python-0.5.1}/hud/cli/get.py +0 -0
  271. {hud_python-0.4.74 → hud_python-0.5.1}/hud/cli/list_func.py +0 -0
  272. {hud_python-0.4.74 → hud_python-0.5.1}/hud/cli/pull.py +0 -0
  273. {hud_python-0.4.74 → hud_python-0.5.1}/hud/cli/push.py +0 -0
  274. {hud_python-0.4.74 → hud_python-0.5.1}/hud/cli/remove.py +0 -0
  275. {hud_python-0.4.74 → hud_python-0.5.1}/hud/cli/tests/__init__.py +0 -0
  276. {hud_python-0.4.74 → hud_python-0.5.1}/hud/cli/tests/test_analyze_metadata.py +0 -0
  277. {hud_python-0.4.74 → hud_python-0.5.1}/hud/cli/tests/test_build_failure.py +0 -0
  278. {hud_python-0.4.74 → hud_python-0.5.1}/hud/cli/tests/test_build_module.py +0 -0
  279. {hud_python-0.4.74 → hud_python-0.5.1}/hud/cli/tests/test_cli_init.py +0 -0
  280. {hud_python-0.4.74 → hud_python-0.5.1}/hud/cli/tests/test_cli_main.py +0 -0
  281. {hud_python-0.4.74 → hud_python-0.5.1}/hud/cli/tests/test_cli_more_wrappers.py +0 -0
  282. {hud_python-0.4.74 → hud_python-0.5.1}/hud/cli/tests/test_clone.py +0 -0
  283. {hud_python-0.4.74 → hud_python-0.5.1}/hud/cli/tests/test_cursor.py +0 -0
  284. {hud_python-0.4.74 → hud_python-0.5.1}/hud/cli/tests/test_eval_bedrock.py +0 -0
  285. {hud_python-0.4.74 → hud_python-0.5.1}/hud/cli/tests/test_init.py +0 -0
  286. {hud_python-0.4.74 → hud_python-0.5.1}/hud/cli/tests/test_list_func.py +0 -0
  287. {hud_python-0.4.74 → hud_python-0.5.1}/hud/cli/tests/test_main_module.py +0 -0
  288. {hud_python-0.4.74 → hud_python-0.5.1}/hud/cli/tests/test_mcp_server.py +0 -0
  289. {hud_python-0.4.74 → hud_python-0.5.1}/hud/cli/tests/test_pull.py +0 -0
  290. {hud_python-0.4.74 → hud_python-0.5.1}/hud/cli/tests/test_push.py +0 -0
  291. {hud_python-0.4.74 → hud_python-0.5.1}/hud/cli/tests/test_push_happy.py +0 -0
  292. {hud_python-0.4.74 → hud_python-0.5.1}/hud/cli/tests/test_push_wrapper.py +0 -0
  293. {hud_python-0.4.74 → hud_python-0.5.1}/hud/cli/tests/test_utils.py +0 -0
  294. {hud_python-0.4.74 → hud_python-0.5.1}/hud/cli/utils/__init__.py +0 -0
  295. {hud_python-0.4.74 → hud_python-0.5.1}/hud/cli/utils/cursor.py +0 -0
  296. {hud_python-0.4.74 → hud_python-0.5.1}/hud/cli/utils/env_check.py +0 -0
  297. {hud_python-0.4.74 → hud_python-0.5.1}/hud/cli/utils/environment.py +0 -0
  298. {hud_python-0.4.74 → hud_python-0.5.1}/hud/cli/utils/git.py +0 -0
  299. {hud_python-0.4.74 → hud_python-0.5.1}/hud/cli/utils/local_runner.py +0 -0
  300. {hud_python-0.4.74 → hud_python-0.5.1}/hud/cli/utils/logging.py +0 -0
  301. {hud_python-0.4.74 → hud_python-0.5.1}/hud/cli/utils/package_runner.py +0 -0
  302. {hud_python-0.4.74 → hud_python-0.5.1}/hud/cli/utils/registry.py +0 -0
  303. {hud_python-0.4.74 → hud_python-0.5.1}/hud/cli/utils/remote_runner.py +0 -0
  304. {hud_python-0.4.74 → hud_python-0.5.1}/hud/cli/utils/runner.py +0 -0
  305. {hud_python-0.4.74 → hud_python-0.5.1}/hud/cli/utils/tasks.py +0 -0
  306. {hud_python-0.4.74 → hud_python-0.5.1}/hud/cli/utils/tests/__init__.py +0 -0
  307. {hud_python-0.4.74 → hud_python-0.5.1}/hud/cli/utils/tests/test_config.py +0 -0
  308. {hud_python-0.4.74 → hud_python-0.5.1}/hud/cli/utils/tests/test_docker.py +0 -0
  309. {hud_python-0.4.74 → hud_python-0.5.1}/hud/cli/utils/tests/test_docker_hints.py +0 -0
  310. {hud_python-0.4.74 → hud_python-0.5.1}/hud/cli/utils/tests/test_environment.py +0 -0
  311. {hud_python-0.4.74 → hud_python-0.5.1}/hud/cli/utils/tests/test_git.py +0 -0
  312. {hud_python-0.4.74 → hud_python-0.5.1}/hud/cli/utils/tests/test_local_runner.py +0 -0
  313. {hud_python-0.4.74 → hud_python-0.5.1}/hud/cli/utils/tests/test_logging_utils.py +0 -0
  314. {hud_python-0.4.74 → hud_python-0.5.1}/hud/cli/utils/tests/test_metadata.py +0 -0
  315. {hud_python-0.4.74 → hud_python-0.5.1}/hud/cli/utils/tests/test_package_runner.py +0 -0
  316. {hud_python-0.4.74 → hud_python-0.5.1}/hud/cli/utils/tests/test_registry_utils.py +0 -0
  317. {hud_python-0.4.74 → hud_python-0.5.1}/hud/cli/utils/tests/test_remote_runner.py +0 -0
  318. {hud_python-0.4.74 → hud_python-0.5.1}/hud/cli/utils/tests/test_runner_modules.py +0 -0
  319. {hud_python-0.4.74 → hud_python-0.5.1}/hud/cli/utils/tests/test_source_hash.py +0 -0
  320. {hud_python-0.4.74 → hud_python-0.5.1}/hud/cli/utils/tests/test_tasks.py +0 -0
  321. {hud_python-0.4.74 → hud_python-0.5.1}/hud/cli/utils/version_check.py +0 -0
  322. {hud_python-0.4.74 → hud_python-0.5.1}/hud/clients/README.md +0 -0
  323. {hud_python-0.4.74 → hud_python-0.5.1}/hud/clients/tests/__init__.py +0 -0
  324. {hud_python-0.4.74 → hud_python-0.5.1}/hud/clients/tests/test_client_integration.py +0 -0
  325. {hud_python-0.4.74 → hud_python-0.5.1}/hud/clients/tests/test_fastmcp.py +0 -0
  326. {hud_python-0.4.74 → hud_python-0.5.1}/hud/clients/tests/test_mcp_use_retry.py +0 -0
  327. {hud_python-0.4.74 → hud_python-0.5.1}/hud/clients/tests/test_protocol.py +0 -0
  328. {hud_python-0.4.74 → hud_python-0.5.1}/hud/clients/utils/__init__.py +0 -0
  329. {hud_python-0.4.74 → hud_python-0.5.1}/hud/clients/utils/mcp_use_retry.py +0 -0
  330. {hud_python-0.4.74 → hud_python-0.5.1}/hud/clients/utils/retry.py +0 -0
  331. {hud_python-0.4.74 → hud_python-0.5.1}/hud/clients/utils/retry_transport.py +0 -0
  332. {hud_python-0.4.74 → hud_python-0.5.1}/hud/datasets/tests/__init__.py +0 -0
  333. {hud_python-0.4.74 → hud_python-0.5.1}/hud/native/__init__.py +0 -0
  334. {hud_python-0.4.74 → hud_python-0.5.1}/hud/native/comparator.py +0 -0
  335. {hud_python-0.4.74 → hud_python-0.5.1}/hud/native/tests/__init__.py +0 -0
  336. {hud_python-0.4.74 → hud_python-0.5.1}/hud/native/tests/test_comparator.py +0 -0
  337. {hud_python-0.4.74 → hud_python-0.5.1}/hud/native/tests/test_native_init.py +0 -0
  338. {hud_python-0.4.74 → hud_python-0.5.1}/hud/py.typed +0 -0
  339. {hud_python-0.4.74 → hud_python-0.5.1}/hud/samples/__init__.py +0 -0
  340. {hud_python-0.4.74 → hud_python-0.5.1}/hud/server/__init__.py +0 -0
  341. {hud_python-0.4.74 → hud_python-0.5.1}/hud/server/context.py +0 -0
  342. {hud_python-0.4.74 → hud_python-0.5.1}/hud/server/helper/__init__.py +0 -0
  343. {hud_python-0.4.74 → hud_python-0.5.1}/hud/server/tests/__init__.py +0 -0
  344. {hud_python-0.4.74 → hud_python-0.5.1}/hud/server/tests/test_add_tool.py +0 -0
  345. {hud_python-0.4.74 → hud_python-0.5.1}/hud/server/tests/test_context.py +0 -0
  346. {hud_python-0.4.74 → hud_python-0.5.1}/hud/server/tests/test_mcp_server_handlers.py +0 -0
  347. {hud_python-0.4.74 → hud_python-0.5.1}/hud/server/tests/test_run_wrapper.py +0 -0
  348. {hud_python-0.4.74 → hud_python-0.5.1}/hud/server/tests/test_server_extra.py +0 -0
  349. {hud_python-0.4.74 → hud_python-0.5.1}/hud/server/tests/test_sigterm_runner.py +0 -0
  350. {hud_python-0.4.74 → hud_python-0.5.1}/hud/settings.py +0 -0
  351. {hud_python-0.4.74 → hud_python-0.5.1}/hud/shared/__init__.py +0 -0
  352. {hud_python-0.4.74 → hud_python-0.5.1}/hud/shared/exceptions.py +0 -0
  353. {hud_python-0.4.74 → hud_python-0.5.1}/hud/shared/hints.py +0 -0
  354. {hud_python-0.4.74 → hud_python-0.5.1}/hud/shared/requests.py +0 -0
  355. {hud_python-0.4.74/hud/otel → hud_python-0.5.1/hud/shared}/tests/__init__.py +0 -0
  356. {hud_python-0.4.74 → hud_python-0.5.1}/hud/shared/tests/test_exceptions.py +0 -0
  357. {hud_python-0.4.74 → hud_python-0.5.1}/hud/shared/tests/test_hints.py +0 -0
  358. {hud_python-0.4.74 → hud_python-0.5.1}/hud/shared/tests/test_requests.py +0 -0
  359. {hud_python-0.4.74/hud/shared → hud_python-0.5.1/hud/telemetry}/tests/__init__.py +0 -0
  360. {hud_python-0.4.74 → hud_python-0.5.1}/hud/tools/apply_patch.py +0 -0
  361. {hud_python-0.4.74 → hud_python-0.5.1}/hud/tools/bash.py +0 -0
  362. {hud_python-0.4.74 → hud_python-0.5.1}/hud/tools/computer/__init__.py +0 -0
  363. {hud_python-0.4.74 → hud_python-0.5.1}/hud/tools/computer/gemini.py +0 -0
  364. {hud_python-0.4.74 → hud_python-0.5.1}/hud/tools/computer/settings.py +0 -0
  365. {hud_python-0.4.74 → hud_python-0.5.1}/hud/tools/edit.py +0 -0
  366. {hud_python-0.4.74 → hud_python-0.5.1}/hud/tools/executors/__init__.py +0 -0
  367. {hud_python-0.4.74 → hud_python-0.5.1}/hud/tools/executors/base.py +0 -0
  368. {hud_python-0.4.74 → hud_python-0.5.1}/hud/tools/executors/pyautogui.py +0 -0
  369. {hud_python-0.4.74 → hud_python-0.5.1}/hud/tools/executors/tests/__init__.py +0 -0
  370. {hud_python-0.4.74 → hud_python-0.5.1}/hud/tools/executors/tests/test_base_executor.py +0 -0
  371. {hud_python-0.4.74 → hud_python-0.5.1}/hud/tools/executors/tests/test_pyautogui_executor.py +0 -0
  372. {hud_python-0.4.74 → hud_python-0.5.1}/hud/tools/executors/xdo.py +0 -0
  373. {hud_python-0.4.74 → hud_python-0.5.1}/hud/tools/grounding/__init__.py +0 -0
  374. {hud_python-0.4.74 → hud_python-0.5.1}/hud/tools/grounding/config.py +0 -0
  375. {hud_python-0.4.74 → hud_python-0.5.1}/hud/tools/grounding/tests/__init__.py +0 -0
  376. {hud_python-0.4.74 → hud_python-0.5.1}/hud/tools/playwright.py +0 -0
  377. {hud_python-0.4.74 → hud_python-0.5.1}/hud/tools/response.py +0 -0
  378. {hud_python-0.4.74 → hud_python-0.5.1}/hud/tools/submit.py +0 -0
  379. {hud_python-0.4.74 → hud_python-0.5.1}/hud/tools/tests/__init__.py +0 -0
  380. {hud_python-0.4.74 → hud_python-0.5.1}/hud/tools/tests/test_base.py +0 -0
  381. {hud_python-0.4.74 → hud_python-0.5.1}/hud/tools/tests/test_bash.py +0 -0
  382. {hud_python-0.4.74 → hud_python-0.5.1}/hud/tools/tests/test_bash_extended.py +0 -0
  383. {hud_python-0.4.74 → hud_python-0.5.1}/hud/tools/tests/test_edit.py +0 -0
  384. {hud_python-0.4.74 → hud_python-0.5.1}/hud/tools/tests/test_init.py +0 -0
  385. {hud_python-0.4.74 → hud_python-0.5.1}/hud/tools/tests/test_playwright_tool.py +0 -0
  386. {hud_python-0.4.74 → hud_python-0.5.1}/hud/tools/tests/test_response.py +0 -0
  387. {hud_python-0.4.74 → hud_python-0.5.1}/hud/tools/tests/test_shell.py +0 -0
  388. {hud_python-0.4.74 → hud_python-0.5.1}/hud/tools/tests/test_submit.py +0 -0
  389. {hud_python-0.4.74 → hud_python-0.5.1}/hud/tools/tests/test_tools.py +0 -0
  390. {hud_python-0.4.74 → hud_python-0.5.1}/hud/tools/tests/test_tools_init.py +0 -0
  391. {hud_python-0.4.74 → hud_python-0.5.1}/hud/tools/tests/test_types.py +0 -0
  392. {hud_python-0.4.74 → hud_python-0.5.1}/hud/tools/tests/test_utils.py +0 -0
  393. {hud_python-0.4.74 → hud_python-0.5.1}/hud/tools/utils.py +0 -0
  394. {hud_python-0.4.74 → hud_python-0.5.1}/hud/utils/__init__.py +0 -0
  395. {hud_python-0.4.74 → hud_python-0.5.1}/hud/utils/env.py +0 -0
  396. {hud_python-0.4.74 → hud_python-0.5.1}/hud/utils/pretty_errors.py +0 -0
  397. {hud_python-0.4.74 → hud_python-0.5.1}/hud/utils/strict_schema.py +0 -0
  398. {hud_python-0.4.74 → hud_python-0.5.1}/hud/utils/telemetry.py +0 -0
  399. {hud_python-0.4.74/hud/telemetry → hud_python-0.5.1/hud/utils}/tests/__init__.py +0 -0
  400. {hud_python-0.4.74 → hud_python-0.5.1}/hud/utils/tests/test_init.py +0 -0
  401. {hud_python-0.4.74 → hud_python-0.5.1}/hud/utils/tests/test_pretty_errors.py +0 -0
  402. {hud_python-0.4.74 → hud_python-0.5.1}/hud/utils/tests/test_telemetry.py +0 -0
  403. {hud_python-0.4.74 → hud_python-0.5.1}/hud/utils/tests/test_tool_shorthand.py +0 -0
  404. {hud_python-0.4.74 → hud_python-0.5.1}/hud/utils/tool_shorthand.py +0 -0
  405. {hud_python-0.4.74 → hud_python-0.5.1}/hud/utils/types.py +0 -0
@@ -0,0 +1,264 @@
1
+ Metadata-Version: 2.4
2
+ Name: hud-python
3
+ Version: 0.5.1
4
+ Summary: SDK for the HUD platform.
5
+ Project-URL: Homepage, https://github.com/hud-evals/hud-python
6
+ Project-URL: Bug Tracker, https://github.com/hud-evals/hud-python/issues
7
+ Project-URL: Documentation, https://docs.hud.ai
8
+ Author-email: HUD <founders@hud.ai>
9
+ License: MIT License
10
+
11
+ Copyright (c) 2025 Human Union Data, Inc
12
+
13
+ Permission is hereby granted, free of charge, to any person obtaining a copy
14
+ of this software and associated documentation files (the "Software"), to deal
15
+ in the Software without restriction, including without limitation the rights
16
+ to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
17
+ copies of the Software, and to permit persons to whom the Software is
18
+ furnished to do so, subject to the following conditions:
19
+
20
+ The above copyright notice and this permission notice shall be included in all
21
+ copies or substantial portions of the Software.
22
+
23
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
24
+ IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
25
+ FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
26
+ AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
27
+ LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
28
+ OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
29
+ SOFTWARE.
30
+ License-File: LICENSE
31
+ Classifier: Development Status :: 4 - Beta
32
+ Classifier: Intended Audience :: Developers
33
+ Classifier: Programming Language :: Python :: 3
34
+ Classifier: Programming Language :: Python :: 3.11
35
+ Classifier: Programming Language :: Python :: 3.12
36
+ Classifier: Programming Language :: Python :: 3.13
37
+ Requires-Python: <3.13,>=3.11
38
+ Requires-Dist: blessed>=1.20.0
39
+ Requires-Dist: fastmcp==2.13.3
40
+ Requires-Dist: httpx<1,>=0.23.0
41
+ Requires-Dist: mcp<1.23,>1.21.1
42
+ Requires-Dist: openai>=2.8.1
43
+ Requires-Dist: packaging>=21.0
44
+ Requires-Dist: prompt-toolkit==3.0.51
45
+ Requires-Dist: pydantic-settings<3,>=2.2
46
+ Requires-Dist: pydantic<3,>=2.6
47
+ Requires-Dist: questionary==2.1.0
48
+ Requires-Dist: rich>=13.0.0
49
+ Requires-Dist: scarf-sdk>=0.1.0
50
+ Requires-Dist: toml>=0.10.2
51
+ Requires-Dist: typer>=0.9.0
52
+ Requires-Dist: watchfiles>=0.21.0
53
+ Provides-Extra: agent
54
+ Requires-Dist: anthropic>=0.75; extra == 'agent'
55
+ Requires-Dist: datasets>=2.14.0; extra == 'agent'
56
+ Requires-Dist: google-genai; extra == 'agent'
57
+ Requires-Dist: langchain>=1.1.0; extra == 'agent'
58
+ Requires-Dist: mcp-use==1.5.0; extra == 'agent'
59
+ Requires-Dist: openai-agents; extra == 'agent'
60
+ Requires-Dist: pillow>=11.1.0; extra == 'agent'
61
+ Requires-Dist: tornado>=6.5.2; extra == 'agent'
62
+ Provides-Extra: agents
63
+ Requires-Dist: anthropic>=0.75; extra == 'agents'
64
+ Requires-Dist: datasets>=2.14.0; extra == 'agents'
65
+ Requires-Dist: google-genai; extra == 'agents'
66
+ Requires-Dist: langchain>=1.1.0; extra == 'agents'
67
+ Requires-Dist: mcp-use==1.5.0; extra == 'agents'
68
+ Requires-Dist: openai-agents; extra == 'agents'
69
+ Requires-Dist: pillow>=11.1.0; extra == 'agents'
70
+ Requires-Dist: tornado>=6.5.2; extra == 'agents'
71
+ Provides-Extra: bedrock
72
+ Requires-Dist: anthropic[bedrock]>=0.75; extra == 'bedrock'
73
+ Provides-Extra: dev
74
+ Requires-Dist: anthropic>=0.75; extra == 'dev'
75
+ Requires-Dist: datasets>=2.14.0; extra == 'dev'
76
+ Requires-Dist: dotenv>=0.9.9; extra == 'dev'
77
+ Requires-Dist: google-adk; extra == 'dev'
78
+ Requires-Dist: google-genai; extra == 'dev'
79
+ Requires-Dist: ipykernel; extra == 'dev'
80
+ Requires-Dist: ipython<9; extra == 'dev'
81
+ Requires-Dist: jupyter-client; extra == 'dev'
82
+ Requires-Dist: jupyter-core; extra == 'dev'
83
+ Requires-Dist: langchain>=1.1.0; extra == 'dev'
84
+ Requires-Dist: llama-index-core; extra == 'dev'
85
+ Requires-Dist: mcp-use==1.5.0; extra == 'dev'
86
+ Requires-Dist: openai-agents; extra == 'dev'
87
+ Requires-Dist: pillow>=11.1.0; extra == 'dev'
88
+ Requires-Dist: playwright; extra == 'dev'
89
+ Requires-Dist: pyautogui>=0.9.54; extra == 'dev'
90
+ Requires-Dist: pyright==1.1.407; extra == 'dev'
91
+ Requires-Dist: pytest-asyncio; extra == 'dev'
92
+ Requires-Dist: pytest-cov; extra == 'dev'
93
+ Requires-Dist: pytest-mock; extra == 'dev'
94
+ Requires-Dist: pytest<9,>=8.1.1; extra == 'dev'
95
+ Requires-Dist: ruff>=0.11.8; extra == 'dev'
96
+ Requires-Dist: tornado>=6.5.2; extra == 'dev'
97
+ Description-Content-Type: text/markdown
98
+
99
+ <div align="left">
100
+ <picture>
101
+ <source media="(prefers-color-scheme: dark)" srcset="https://raw.githubusercontent.com/hud-evals/hud-python/main/docs/logo/hud_logo_dark.svg">
102
+ <source media="(prefers-color-scheme: light)" srcset="https://raw.githubusercontent.com/hud-evals/hud-python/main/docs/logo/hud_logo.svg">
103
+ <img src="https://raw.githubusercontent.com/hud-evals/hud-python/main/docs/logo/hud_logo.svg" alt="HUD" width="150" style="margin-bottom: 24px;"/>
104
+ </picture>
105
+ </div>
106
+
107
+ The HUD SDK is an open-source Python toolkit for building, evaluating, and training AI agents. Use a unified API for any model provider, wrap your code as MCP environments, run A/B evals at scale, and train with reinforcement learning.
108
+
109
+ To learn more, check out our [Documentation](https://docs.hud.ai) and [API Reference](https://docs.hud.ai/reference).
110
+
111
+ [![PyPI](https://img.shields.io/pypi/v/hud-python?style=flat-square)](https://pypi.org/project/hud-python/)
112
+ [![License](https://img.shields.io/badge/license-MIT-green?style=flat-square)](LICENSE)
113
+ [![Add docs to Cursor](https://img.shields.io/badge/Add%20docs%20to-Cursor-black?style=flat-square)](https://cursor.com/en/install-mcp?name=docs-hud-python&config=eyJ1cmwiOiJodHRwczovL2RvY3MuaHVkLmFpL21jcCJ9)
114
+ [![Discord](https://img.shields.io/discord/1327447144772407390?label=Discord&logo=discord&style=flat-square)](https://discord.gg/wkjtmHYYjm)
115
+ [![X Follow](https://img.shields.io/twitter/follow/hud_evals?style=social)](https://x.com/intent/user?screen_name=hud_evals)
116
+ [![Shop](https://img.shields.io/badge/_-white.svg?label=shop&logo=data:image/png;base64,iVBORw0KGgoAAAANSUhEUgAAABQAAAAJCAYAAAAywQxIAAAAAXNSR0IArs4c6QAAAARnQU1BAACxjwv8YQUAAAAJcEhZcwAACxMAAAsTAQCanBgAAAF6SURBVChTlZA9ixNhFIWf8yaTpFHRRMXCKpAZhCAYFvwoLHZhwUKw9A9YCJb+Bq0sxGbBQrTxX1j41dvIRAjGZbdwRUUGIzPMeyw2swS3WZ/ynHvP5VylafoAWAd+5Xm+wX+SpukmcMf29RDCZrD9BViz3f53+CjYngKZpD5A2/Y7SQBMJpOkKIprdV1vdzqdHzHGblmW9Ww2+5pl2TmAxWKxmM/nP8fj8cmqqtZijJ9sb0u6ABBWjh0riuIt8CqE8LGu66e2d5MkeQ8QY3xme7fb7T4ZjUbrZVl+jjFuSXoEXGxCDgIl9WzfAO5LSmzvNB771R6vzG4Bx0MIt/M8vwV8aLyDQNt70+n0G1AspaTxVln+aghQluVsKbvxVysflT9NQK/XO7R/SGiQ9Nt2aftElmWXJd1kv0kbeANQVdWl4XB4XtJouXaqNRgMHkrqS+r0+/3XwD1JXdungRfAVWBi+6WkK8D3EMJz22cl3W21WgNgx3YAzvwFd0Chdq03gKUAAAAASUVORK5CYII=&style=social)](https://shop.hud.ai)
117
+ [![Scarf](https://static.scarf.sh/a.png?x-pxid=6530ff33-4945-452b-81f9-626872593933)](https://scarf.sh)
118
+ [![Docs](https://img.shields.io/badge/docs-hud.ai-blue?style=flat-square)](https://docs.hud.ai)
119
+
120
+ ## Install
121
+
122
+ ```bash
123
+ pip install hud-python
124
+ ```
125
+
126
+ Get your API key at [hud.ai](https://hud.ai) and set it:
127
+
128
+ ```bash
129
+ export HUD_API_KEY=your-key-here
130
+ ```
131
+
132
+ > For CLI tools (`hud init`, `hud dev`, etc.): `uv tool install hud-python --python 3.12`
133
+
134
+ ![Agent running on SheetBench](https://raw.githubusercontent.com/hud-evals/hud-python/main/docs/src/images/trace_sheet.gif)
135
+
136
+ ## Usage
137
+
138
+ ### Unified Model API
139
+
140
+ Use Claude, GPT, Gemini, or Grok through one OpenAI-compatible endpoint:
141
+
142
+ ```python
143
+ from openai import AsyncOpenAI
144
+ import os
145
+
146
+ client = AsyncOpenAI(
147
+ base_url="https://inference.hud.ai",
148
+ api_key=os.environ["HUD_API_KEY"]
149
+ )
150
+
151
+ response = await client.chat.completions.create(
152
+ model="claude-sonnet-4-5", # or gpt-4o, gemini-2.5-pro (https://hud.ai/models)
153
+ messages=[{"role": "user", "content": "Hello!"}]
154
+ )
155
+ ```
156
+
157
+ Every call is traced at [hud.ai](https://hud.ai). → [Docs](https://docs.hud.ai/quick-links/gateway)
158
+
159
+ ### Environments
160
+
161
+ Turn your code into tools agents can call. Define how to evaluate them:
162
+
163
+ ```python
164
+ from hud import Environment
165
+
166
+ env = Environment("my-env")
167
+
168
+ @env.tool()
169
+ def add(a: int, b: int) -> int:
170
+ """Add two numbers."""
171
+ return a + b
172
+
173
+ @env.scenario("solve-math")
174
+ async def solve_math(problem: str, answer: int):
175
+ response = yield problem # Prompt
176
+ yield 1.0 if str(answer) in response else 0.0 # Reward
177
+
178
+ async with env("solve-math", problem="What is 2+2?", answer=4) as ctx:
179
+ # Your agent logic here - call tools, get response
180
+ result = await ctx.call_tool("add", a=2, b=2)
181
+ await ctx.submit(f"The answer is {result}")
182
+
183
+ print(ctx.reward) # 1.0
184
+ ```
185
+
186
+ The agent runs between the yields. First yield sends the prompt, second yield scores the result. → [Docs](https://docs.hud.ai/quick-links/environments) · [Templates](https://hud.ai/environments)
187
+
188
+ ### A/B Evals
189
+
190
+ Test different models. Repeat runs to see the distribution:
191
+
192
+ ```python
193
+ from openai import AsyncOpenAI
194
+ import os
195
+
196
+ client = AsyncOpenAI(
197
+ base_url="https://inference.hud.ai",
198
+ api_key=os.environ["HUD_API_KEY"]
199
+ )
200
+
201
+ # Using the env from above
202
+ async with env("solve-math", problem="What is 2+2?", answer=4, variants={"model": ["gpt-4o", "claude-sonnet-4-5"]}, group=5) as ctx:
203
+ response = await client.chat.completions.create(
204
+ model=ctx.variants["model"],
205
+ messages=[{"role": "user", "content": ctx.prompt}],
206
+ tools=ctx.tools # Environment tools available to the model
207
+ )
208
+ await ctx.submit(response.choices[0].message.content)
209
+ ```
210
+
211
+ **Variants** test configurations. **Groups** repeat for distribution. Results stream to [hud.ai](https://hud.ai). → [Docs](https://docs.hud.ai/quick-links/ab-testing)
212
+
213
+ ### Deploy & Train
214
+
215
+ Push to GitHub, connect on hud.ai, run at scale:
216
+
217
+ ```bash
218
+ hud init # Scaffold environment
219
+ git push # Push to GitHub
220
+ # Connect on hud.ai → New → Environment
221
+ hud eval my-eval --model gpt-4o --group-size 100
222
+ # Or create and run tasks on the platform
223
+ ```
224
+
225
+ Every run generates training data. Use it to fine-tune or run RL. → [Docs](https://docs.hud.ai/quick-links/deploy)
226
+
227
+ ## Links
228
+
229
+ - 📖 [Documentation](https://docs.hud.ai)
230
+ - ⌨️ [CLI Reference](https://docs.hud.ai/reference/cli/overview)
231
+ - 🏆 [Leaderboards](https://hud.ai/leaderboards)
232
+ - 🌐 [Environment Templates](https://hud.ai/environments)
233
+ - 🤖 [Supported Models](https://hud.ai/models)
234
+ - 💬 [Discord](https://discord.gg/wkjtmHYYjm)
235
+
236
+ ## Enterprise
237
+
238
+ Building agents at scale? We work with teams on custom environments, benchmarks, and training.
239
+
240
+ [📅 Book a call](https://cal.com/jay-hud) · [📧 founders@hud.ai](mailto:founders@hud.ai)
241
+
242
+ ## Contributing
243
+
244
+ We welcome contributions! See [CONTRIBUTING.md](CONTRIBUTING.md).
245
+
246
+ Key areas: [Agents](hud/agents/) · [Tools](hud/tools/) · [Environments](https://hud.ai/environments)
247
+
248
+ <a href="https://github.com/hud-evals/hud-python/graphs/contributors">
249
+ <img src="https://contrib.rocks/image?repo=hud-evals/hud-python&max=50" />
250
+ </a>
251
+
252
+ ## Citation
253
+
254
+ ```bibtex
255
+ @software{hud2025agentevalplatform,
256
+ author = {HUD and Jay Ram and Lorenss Martinsons and Parth Patel and Govind Pimpale and Dylan Bowman and Jaideep and Nguyen Nhat Minh},
257
+ title = {HUD: An Evaluation and RL Envrionments Platform for Agents},
258
+ date = {2025-04},
259
+ url = {https://github.com/hud-evals/hud-python},
260
+ langid = {en}
261
+ }
262
+ ```
263
+
264
+ MIT License · [LICENSE](LICENSE)
@@ -0,0 +1,166 @@
1
+ <div align="left">
2
+ <picture>
3
+ <source media="(prefers-color-scheme: dark)" srcset="https://raw.githubusercontent.com/hud-evals/hud-python/main/docs/logo/hud_logo_dark.svg">
4
+ <source media="(prefers-color-scheme: light)" srcset="https://raw.githubusercontent.com/hud-evals/hud-python/main/docs/logo/hud_logo.svg">
5
+ <img src="https://raw.githubusercontent.com/hud-evals/hud-python/main/docs/logo/hud_logo.svg" alt="HUD" width="150" style="margin-bottom: 24px;"/>
6
+ </picture>
7
+ </div>
8
+
9
+ The HUD SDK is an open-source Python toolkit for building, evaluating, and training AI agents. Use a unified API for any model provider, wrap your code as MCP environments, run A/B evals at scale, and train with reinforcement learning.
10
+
11
+ To learn more, check out our [Documentation](https://docs.hud.ai) and [API Reference](https://docs.hud.ai/reference).
12
+
13
+ [![PyPI](https://img.shields.io/pypi/v/hud-python?style=flat-square)](https://pypi.org/project/hud-python/)
14
+ [![License](https://img.shields.io/badge/license-MIT-green?style=flat-square)](LICENSE)
15
+ [![Add docs to Cursor](https://img.shields.io/badge/Add%20docs%20to-Cursor-black?style=flat-square)](https://cursor.com/en/install-mcp?name=docs-hud-python&config=eyJ1cmwiOiJodHRwczovL2RvY3MuaHVkLmFpL21jcCJ9)
16
+ [![Discord](https://img.shields.io/discord/1327447144772407390?label=Discord&logo=discord&style=flat-square)](https://discord.gg/wkjtmHYYjm)
17
+ [![X Follow](https://img.shields.io/twitter/follow/hud_evals?style=social)](https://x.com/intent/user?screen_name=hud_evals)
18
+ [![Shop](https://img.shields.io/badge/_-white.svg?label=shop&logo=data:image/png;base64,iVBORw0KGgoAAAANSUhEUgAAABQAAAAJCAYAAAAywQxIAAAAAXNSR0IArs4c6QAAAARnQU1BAACxjwv8YQUAAAAJcEhZcwAACxMAAAsTAQCanBgAAAF6SURBVChTlZA9ixNhFIWf8yaTpFHRRMXCKpAZhCAYFvwoLHZhwUKw9A9YCJb+Bq0sxGbBQrTxX1j41dvIRAjGZbdwRUUGIzPMeyw2swS3WZ/ynHvP5VylafoAWAd+5Xm+wX+SpukmcMf29RDCZrD9BViz3f53+CjYngKZpD5A2/Y7SQBMJpOkKIprdV1vdzqdHzHGblmW9Ww2+5pl2TmAxWKxmM/nP8fj8cmqqtZijJ9sb0u6ABBWjh0riuIt8CqE8LGu66e2d5MkeQ8QY3xme7fb7T4ZjUbrZVl+jjFuSXoEXGxCDgIl9WzfAO5LSmzvNB771R6vzG4Bx0MIt/M8vwV8aLyDQNt70+n0G1AspaTxVln+aghQluVsKbvxVysflT9NQK/XO7R/SGiQ9Nt2aftElmWXJd1kv0kbeANQVdWl4XB4XtJouXaqNRgMHkrqS+r0+/3XwD1JXdungRfAVWBi+6WkK8D3EMJz22cl3W21WgNgx3YAzvwFd0Chdq03gKUAAAAASUVORK5CYII=&style=social)](https://shop.hud.ai)
19
+ [![Scarf](https://static.scarf.sh/a.png?x-pxid=6530ff33-4945-452b-81f9-626872593933)](https://scarf.sh)
20
+ [![Docs](https://img.shields.io/badge/docs-hud.ai-blue?style=flat-square)](https://docs.hud.ai)
21
+
22
+ ## Install
23
+
24
+ ```bash
25
+ pip install hud-python
26
+ ```
27
+
28
+ Get your API key at [hud.ai](https://hud.ai) and set it:
29
+
30
+ ```bash
31
+ export HUD_API_KEY=your-key-here
32
+ ```
33
+
34
+ > For CLI tools (`hud init`, `hud dev`, etc.): `uv tool install hud-python --python 3.12`
35
+
36
+ ![Agent running on SheetBench](https://raw.githubusercontent.com/hud-evals/hud-python/main/docs/src/images/trace_sheet.gif)
37
+
38
+ ## Usage
39
+
40
+ ### Unified Model API
41
+
42
+ Use Claude, GPT, Gemini, or Grok through one OpenAI-compatible endpoint:
43
+
44
+ ```python
45
+ from openai import AsyncOpenAI
46
+ import os
47
+
48
+ client = AsyncOpenAI(
49
+ base_url="https://inference.hud.ai",
50
+ api_key=os.environ["HUD_API_KEY"]
51
+ )
52
+
53
+ response = await client.chat.completions.create(
54
+ model="claude-sonnet-4-5", # or gpt-4o, gemini-2.5-pro (https://hud.ai/models)
55
+ messages=[{"role": "user", "content": "Hello!"}]
56
+ )
57
+ ```
58
+
59
+ Every call is traced at [hud.ai](https://hud.ai). → [Docs](https://docs.hud.ai/quick-links/gateway)
60
+
61
+ ### Environments
62
+
63
+ Turn your code into tools agents can call. Define how to evaluate them:
64
+
65
+ ```python
66
+ from hud import Environment
67
+
68
+ env = Environment("my-env")
69
+
70
+ @env.tool()
71
+ def add(a: int, b: int) -> int:
72
+ """Add two numbers."""
73
+ return a + b
74
+
75
+ @env.scenario("solve-math")
76
+ async def solve_math(problem: str, answer: int):
77
+ response = yield problem # Prompt
78
+ yield 1.0 if str(answer) in response else 0.0 # Reward
79
+
80
+ async with env("solve-math", problem="What is 2+2?", answer=4) as ctx:
81
+ # Your agent logic here - call tools, get response
82
+ result = await ctx.call_tool("add", a=2, b=2)
83
+ await ctx.submit(f"The answer is {result}")
84
+
85
+ print(ctx.reward) # 1.0
86
+ ```
87
+
88
+ The agent runs between the yields. First yield sends the prompt, second yield scores the result. → [Docs](https://docs.hud.ai/quick-links/environments) · [Templates](https://hud.ai/environments)
89
+
90
+ ### A/B Evals
91
+
92
+ Test different models. Repeat runs to see the distribution:
93
+
94
+ ```python
95
+ from openai import AsyncOpenAI
96
+ import os
97
+
98
+ client = AsyncOpenAI(
99
+ base_url="https://inference.hud.ai",
100
+ api_key=os.environ["HUD_API_KEY"]
101
+ )
102
+
103
+ # Using the env from above
104
+ async with env("solve-math", problem="What is 2+2?", answer=4, variants={"model": ["gpt-4o", "claude-sonnet-4-5"]}, group=5) as ctx:
105
+ response = await client.chat.completions.create(
106
+ model=ctx.variants["model"],
107
+ messages=[{"role": "user", "content": ctx.prompt}],
108
+ tools=ctx.tools # Environment tools available to the model
109
+ )
110
+ await ctx.submit(response.choices[0].message.content)
111
+ ```
112
+
113
+ **Variants** test configurations. **Groups** repeat for distribution. Results stream to [hud.ai](https://hud.ai). → [Docs](https://docs.hud.ai/quick-links/ab-testing)
114
+
115
+ ### Deploy & Train
116
+
117
+ Push to GitHub, connect on hud.ai, run at scale:
118
+
119
+ ```bash
120
+ hud init # Scaffold environment
121
+ git push # Push to GitHub
122
+ # Connect on hud.ai → New → Environment
123
+ hud eval my-eval --model gpt-4o --group-size 100
124
+ # Or create and run tasks on the platform
125
+ ```
126
+
127
+ Every run generates training data. Use it to fine-tune or run RL. → [Docs](https://docs.hud.ai/quick-links/deploy)
128
+
129
+ ## Links
130
+
131
+ - 📖 [Documentation](https://docs.hud.ai)
132
+ - ⌨️ [CLI Reference](https://docs.hud.ai/reference/cli/overview)
133
+ - 🏆 [Leaderboards](https://hud.ai/leaderboards)
134
+ - 🌐 [Environment Templates](https://hud.ai/environments)
135
+ - 🤖 [Supported Models](https://hud.ai/models)
136
+ - 💬 [Discord](https://discord.gg/wkjtmHYYjm)
137
+
138
+ ## Enterprise
139
+
140
+ Building agents at scale? We work with teams on custom environments, benchmarks, and training.
141
+
142
+ [📅 Book a call](https://cal.com/jay-hud) · [📧 founders@hud.ai](mailto:founders@hud.ai)
143
+
144
+ ## Contributing
145
+
146
+ We welcome contributions! See [CONTRIBUTING.md](CONTRIBUTING.md).
147
+
148
+ Key areas: [Agents](hud/agents/) · [Tools](hud/tools/) · [Environments](https://hud.ai/environments)
149
+
150
+ <a href="https://github.com/hud-evals/hud-python/graphs/contributors">
151
+ <img src="https://contrib.rocks/image?repo=hud-evals/hud-python&max=50" />
152
+ </a>
153
+
154
+ ## Citation
155
+
156
+ ```bibtex
157
+ @software{hud2025agentevalplatform,
158
+ author = {HUD and Jay Ram and Lorenss Martinsons and Parth Patel and Govind Pimpale and Dylan Bowman and Jaideep and Nguyen Nhat Minh},
159
+ title = {HUD: An Evaluation and RL Envrionments Platform for Agents},
160
+ date = {2025-04},
161
+ url = {https://github.com/hud-evals/hud-python},
162
+ langid = {en}
163
+ }
164
+ ```
165
+
166
+ MIT License · [LICENSE](LICENSE)
@@ -0,0 +1,127 @@
1
+ # Examples
2
+
3
+ A collection of examples demonstrating HUD SDK usage patterns.
4
+
5
+ ## Quick Start
6
+
7
+ ### 00_agent_env.py
8
+ Minimal MCP server and client in one file. Shows the basic agent-environment communication pattern using `hud.eval()`.
9
+
10
+ ```bash
11
+ python examples/00_agent_env.py
12
+ ```
13
+
14
+ ### 01_agent_lifecycle.py
15
+ Complete agent lifecycle demonstrating:
16
+ - v5 Task format with Environment and scenario
17
+ - `hud.eval()` context for connection and tracing
18
+ - Agent initialization and execution
19
+ - Automatic scenario setup/evaluation
20
+
21
+ ```bash
22
+ python examples/01_agent_lifecycle.py
23
+ ```
24
+
25
+ > Requires `HUD_API_KEY` and `ANTHROPIC_API_KEY` environment variables.
26
+
27
+ ## Agent Examples
28
+
29
+ ### 02_claude_agent.py
30
+ Claude agent with computer use capabilities for browser automation.
31
+
32
+ ```bash
33
+ python examples/02_claude_agent.py
34
+ ```
35
+
36
+ > Requires `HUD_API_KEY` and `ANTHROPIC_API_KEY`.
37
+
38
+ ### 03_openai_compatible_agent.py
39
+ OpenAI-compatible chat.completions agent with both text and browser 2048 environments.
40
+
41
+ ```bash
42
+ export OPENAI_API_KEY=your-key
43
+ # export OPENAI_BASE_URL=http://localhost:8000/v1 # for local servers (e.g., vllm)
44
+
45
+ python examples/03_openai_compatible_agent.py --mode text # text environment
46
+ python examples/03_openai_compatible_agent.py --mode browser # browser environment
47
+ ```
48
+
49
+ > Requires Docker for local environment execution.
50
+
51
+ ### 04_grounded_agent.py
52
+ Grounded agent that separates visual grounding (element detection) from high-level reasoning.
53
+
54
+ ```bash
55
+ export OPENAI_API_KEY=your-key
56
+ export OPENROUTER_API_KEY=your-key
57
+
58
+ python examples/04_grounded_agent.py
59
+ ```
60
+
61
+ > Requires Docker and API keys for both OpenAI and OpenRouter.
62
+
63
+ ### 05_custom_agent.py
64
+ Build a custom MCPAgent using HUD Gateway for unified model access:
65
+ - No need for individual provider API keys
66
+ - Works with Anthropic, OpenAI, Gemini, OpenRouter models
67
+ - Automatic tracing with `@hud.instrument`
68
+
69
+ ```bash
70
+ HUD_API_KEY=sk-hud-... python examples/05_custom_agent.py
71
+ ```
72
+
73
+ ## Dataset Evaluation
74
+
75
+ ### run_evaluation.py
76
+ Generic dataset evaluation runner using the programmatic API.
77
+
78
+ ```bash
79
+ # Run all tasks in a dataset
80
+ python examples/run_evaluation.py hud-evals/SheetBench-50
81
+
82
+ # Run specific tasks by index
83
+ python examples/run_evaluation.py hud-evals/SheetBench-50 --task-ids 0 1 2
84
+
85
+ # Use different agent and concurrency
86
+ python examples/run_evaluation.py hud-evals/OSWorld-Verified-Gold --agent operator --max-concurrent 50
87
+ ```
88
+
89
+ For production evaluations, prefer the CLI: `hud eval --help`
90
+
91
+ ## Key Concepts
92
+
93
+ ### v5 Task Format
94
+
95
+ The v5 Task format is the recommended way to define evaluation tasks:
96
+
97
+ ```python
98
+ from hud.eval.task import Task
99
+
100
+ # Simple task with hub environment
101
+ task = Task(
102
+ env={"name": "browser"}, # Connect to browser hub
103
+ scenario="checkout", # Scenario to run
104
+ args={"user_id": "alice"}, # Scenario arguments
105
+ )
106
+
107
+ # Task with local Docker environment
108
+ env = hud.Environment("my-env")
109
+ env.connect_local(command="docker", args=["run", "--rm", "-i", "my-image"])
110
+ task = Task(env=env, scenario="test")
111
+ ```
112
+
113
+ ### Using hud.eval()
114
+
115
+ All examples use `hud.eval()` as the primary entry point:
116
+
117
+ ```python
118
+ async with hud.eval(task, name="my-eval", variants={"model": "gpt-4o"}) as ctx:
119
+ result = await agent.run(ctx, max_steps=10)
120
+ print(f"Reward: {ctx.reward}")
121
+ ```
122
+
123
+ The context manager handles:
124
+ - Environment connection (MCP servers start)
125
+ - Scenario setup execution
126
+ - Telemetry and tracing
127
+ - Automatic scenario evaluation on exit
@@ -0,0 +1,50 @@
1
+ """hud-python.
2
+
3
+ tools for building, evaluating, and training AI agents.
4
+ """
5
+
6
+ from __future__ import annotations
7
+
8
+ import warnings
9
+
10
+ # Apply patches to third-party libraries early, before other imports
11
+ from . import patches as _patches # noqa: F401
12
+ from .environment import Environment
13
+ from .eval import EvalContext
14
+ from .eval import run_eval as eval
15
+ from .telemetry.instrument import instrument
16
+
17
+
18
+ def trace(*args: object, **kwargs: object) -> EvalContext:
19
+ """Deprecated: Use hud.eval() instead.
20
+
21
+ .. deprecated:: 0.5.1
22
+ hud.trace() is deprecated. Use hud.eval() or env.eval() instead.
23
+ """
24
+ warnings.warn(
25
+ "hud.trace() is deprecated. Use hud.eval() or env.eval() instead.",
26
+ DeprecationWarning,
27
+ stacklevel=2,
28
+ )
29
+ return eval(*args, **kwargs) # type: ignore[arg-type]
30
+
31
+
32
+ __all__ = [
33
+ "Environment",
34
+ "EvalContext",
35
+ "eval",
36
+ "instrument",
37
+ "trace", # Deprecated alias for eval
38
+ ]
39
+
40
+ try:
41
+ from .version import __version__
42
+ except ImportError:
43
+ __version__ = "unknown"
44
+
45
+ try:
46
+ from .utils.pretty_errors import install_pretty_errors
47
+
48
+ install_pretty_errors()
49
+ except Exception: # noqa: S110
50
+ pass
@@ -0,0 +1,19 @@
1
+ from __future__ import annotations
2
+
3
+ from .base import MCPAgent
4
+ from .openai import OpenAIAgent
5
+ from .openai_chat import OpenAIChatAgent
6
+ from .operator import OperatorAgent
7
+
8
+ # Note: These agents are not exported here to avoid requiring optional dependencies.
9
+ # Import directly if needed:
10
+ # from hud.agents.claude import ClaudeAgent # requires anthropic
11
+ # from hud.agents.gemini import GeminiAgent # requires google-genai
12
+ # from hud.agents.gemini_cua import GeminiCUAAgent # requires google-genai
13
+
14
+ __all__ = [
15
+ "MCPAgent",
16
+ "OpenAIAgent",
17
+ "OpenAIChatAgent",
18
+ "OperatorAgent",
19
+ ]