hud-python 0.4.74__tar.gz → 0.5.0__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (405) hide show
  1. hud_python-0.5.0/PKG-INFO +251 -0
  2. hud_python-0.5.0/README.md +153 -0
  3. hud_python-0.5.0/examples/README.md +127 -0
  4. hud_python-0.5.0/hud/__init__.py +50 -0
  5. hud_python-0.5.0/hud/agents/__init__.py +19 -0
  6. hud_python-0.5.0/hud/agents/base.py +565 -0
  7. {hud_python-0.4.74 → hud_python-0.5.0}/hud/agents/claude.py +9 -23
  8. {hud_python-0.4.74 → hud_python-0.5.0}/hud/agents/gemini.py +9 -23
  9. {hud_python-0.4.74 → hud_python-0.5.0}/hud/agents/gemini_cua.py +8 -5
  10. {hud_python-0.4.74 → hud_python-0.5.0}/hud/agents/grounded_openai.py +10 -21
  11. hud_python-0.5.0/hud/agents/misc/integration_test_agent.py +87 -0
  12. {hud_python-0.4.74 → hud_python-0.5.0}/hud/agents/openai.py +9 -17
  13. {hud_python-0.4.74 → hud_python-0.5.0}/hud/agents/openai_chat.py +16 -14
  14. {hud_python-0.4.74 → hud_python-0.5.0}/hud/agents/operator.py +6 -6
  15. hud_python-0.5.0/hud/agents/tests/conftest.py +133 -0
  16. hud_python-0.5.0/hud/agents/tests/test_base.py +352 -0
  17. hud_python-0.5.0/hud/agents/tests/test_base_runtime.py +233 -0
  18. hud_python-0.5.0/hud/agents/tests/test_claude.py +518 -0
  19. {hud_python-0.4.74 → hud_python-0.5.0}/hud/agents/tests/test_client.py +9 -10
  20. hud_python-0.5.0/hud/agents/tests/test_gemini.py +369 -0
  21. {hud_python-0.4.74 → hud_python-0.5.0}/hud/agents/tests/test_grounded_openai_agent.py +17 -6
  22. hud_python-0.5.0/hud/agents/tests/test_openai.py +449 -0
  23. {hud_python-0.4.74 → hud_python-0.5.0}/hud/agents/tests/test_operator.py +105 -51
  24. hud_python-0.5.0/hud/agents/tests/test_run_eval.py +179 -0
  25. {hud_python-0.4.74 → hud_python-0.5.0}/hud/cli/__init__.py +111 -128
  26. {hud_python-0.4.74 → hud_python-0.5.0}/hud/cli/analyze.py +43 -5
  27. {hud_python-0.4.74 → hud_python-0.5.0}/hud/cli/build.py +74 -12
  28. {hud_python-0.4.74 → hud_python-0.5.0}/hud/cli/debug.py +8 -5
  29. {hud_python-0.4.74 → hud_python-0.5.0}/hud/cli/dev.py +84 -33
  30. {hud_python-0.4.74 → hud_python-0.5.0}/hud/cli/eval.py +130 -113
  31. {hud_python-0.4.74 → hud_python-0.5.0}/hud/cli/flows/dev.py +2 -2
  32. hud_python-0.5.0/hud/cli/flows/init.py +191 -0
  33. {hud_python-0.4.74 → hud_python-0.5.0}/hud/cli/flows/tasks.py +25 -27
  34. hud_python-0.5.0/hud/cli/flows/templates.py +151 -0
  35. {hud_python-0.4.74 → hud_python-0.5.0}/hud/cli/rft.py +4 -8
  36. {hud_python-0.4.74 → hud_python-0.5.0}/hud/cli/rft_status.py +1 -1
  37. {hud_python-0.4.74 → hud_python-0.5.0}/hud/cli/tests/test_analyze.py +5 -5
  38. {hud_python-0.4.74 → hud_python-0.5.0}/hud/cli/tests/test_analyze_module.py +4 -4
  39. {hud_python-0.4.74 → hud_python-0.5.0}/hud/cli/tests/test_build.py +7 -3
  40. {hud_python-0.4.74 → hud_python-0.5.0}/hud/cli/tests/test_cli_root.py +13 -7
  41. {hud_python-0.4.74 → hud_python-0.5.0}/hud/cli/tests/test_convert.py +19 -25
  42. {hud_python-0.4.74 → hud_python-0.5.0}/hud/cli/tests/test_debug.py +12 -10
  43. {hud_python-0.4.74 → hud_python-0.5.0}/hud/cli/tests/test_dev.py +36 -2
  44. hud_python-0.5.0/hud/cli/tests/test_eval.py +251 -0
  45. {hud_python-0.4.74 → hud_python-0.5.0}/hud/cli/tests/test_registry.py +1 -1
  46. {hud_python-0.4.74/hud/cli/rl → hud_python-0.5.0/hud/cli/utils}/celebrate.py +14 -12
  47. {hud_python-0.4.74 → hud_python-0.5.0}/hud/cli/utils/config.py +18 -1
  48. {hud_python-0.4.74 → hud_python-0.5.0}/hud/cli/utils/docker.py +8 -4
  49. {hud_python-0.4.74 → hud_python-0.5.0}/hud/cli/utils/interactive.py +7 -3
  50. {hud_python-0.4.74 → hud_python-0.5.0}/hud/cli/utils/metadata.py +69 -0
  51. {hud_python-0.4.74 → hud_python-0.5.0}/hud/cli/utils/server.py +2 -2
  52. {hud_python-0.4.74 → hud_python-0.5.0}/hud/cli/utils/source_hash.py +2 -2
  53. {hud_python-0.4.74 → hud_python-0.5.0}/hud/cli/utils/tests/test_env_check.py +1 -1
  54. {hud_python-0.4.74 → hud_python-0.5.0}/hud/cli/utils/tests/test_interactive_module.py +1 -1
  55. {hud_python-0.4.74/hud/cli/rl → hud_python-0.5.0/hud/cli/utils}/viewer.py +2 -2
  56. {hud_python-0.4.74 → hud_python-0.5.0}/hud/clients/__init__.py +4 -3
  57. {hud_python-0.4.74 → hud_python-0.5.0}/hud/clients/base.py +133 -19
  58. hud_python-0.5.0/hud/clients/environment.py +51 -0
  59. {hud_python-0.4.74 → hud_python-0.5.0}/hud/clients/fastmcp.py +7 -1
  60. {hud_python-0.4.74 → hud_python-0.5.0}/hud/clients/mcp_use.py +29 -11
  61. hud_python-0.5.0/hud/clients/tests/test_analyze_scenarios.py +206 -0
  62. hud_python-0.5.0/hud/datasets/__init__.py +36 -0
  63. hud_python-0.5.0/hud/datasets/loader.py +327 -0
  64. hud_python-0.5.0/hud/datasets/runner.py +212 -0
  65. hud_python-0.5.0/hud/datasets/tests/test_loader.py +221 -0
  66. {hud_python-0.4.74 → hud_python-0.5.0}/hud/datasets/tests/test_utils.py +61 -65
  67. hud_python-0.5.0/hud/datasets/utils.py +291 -0
  68. hud_python-0.5.0/hud/environment/__init__.py +50 -0
  69. hud_python-0.5.0/hud/environment/connection.py +206 -0
  70. hud_python-0.5.0/hud/environment/connectors/__init__.py +33 -0
  71. hud_python-0.5.0/hud/environment/connectors/base.py +68 -0
  72. hud_python-0.5.0/hud/environment/connectors/local.py +177 -0
  73. hud_python-0.5.0/hud/environment/connectors/mcp_config.py +109 -0
  74. hud_python-0.5.0/hud/environment/connectors/openai.py +101 -0
  75. hud_python-0.5.0/hud/environment/connectors/remote.py +173 -0
  76. hud_python-0.5.0/hud/environment/environment.py +686 -0
  77. hud_python-0.5.0/hud/environment/integrations/__init__.py +45 -0
  78. hud_python-0.5.0/hud/environment/integrations/adk.py +67 -0
  79. hud_python-0.5.0/hud/environment/integrations/anthropic.py +196 -0
  80. hud_python-0.5.0/hud/environment/integrations/gemini.py +92 -0
  81. hud_python-0.5.0/hud/environment/integrations/langchain.py +82 -0
  82. hud_python-0.5.0/hud/environment/integrations/llamaindex.py +68 -0
  83. hud_python-0.5.0/hud/environment/integrations/openai.py +238 -0
  84. hud_python-0.5.0/hud/environment/mock.py +306 -0
  85. hud_python-0.5.0/hud/environment/router.py +112 -0
  86. hud_python-0.5.0/hud/environment/scenarios.py +456 -0
  87. hud_python-0.5.0/hud/environment/tests/__init__.py +1 -0
  88. hud_python-0.5.0/hud/environment/tests/test_connection.py +317 -0
  89. hud_python-0.5.0/hud/environment/tests/test_connectors.py +218 -0
  90. hud_python-0.5.0/hud/environment/tests/test_environment.py +161 -0
  91. hud_python-0.5.0/hud/environment/tests/test_integrations.py +257 -0
  92. hud_python-0.5.0/hud/environment/tests/test_local_connectors.py +201 -0
  93. hud_python-0.5.0/hud/environment/tests/test_scenarios.py +280 -0
  94. hud_python-0.5.0/hud/environment/tests/test_tools.py +208 -0
  95. hud_python-0.5.0/hud/environment/types.py +23 -0
  96. hud_python-0.5.0/hud/environment/utils/__init__.py +35 -0
  97. hud_python-0.5.0/hud/environment/utils/formats.py +215 -0
  98. hud_python-0.5.0/hud/environment/utils/schema.py +171 -0
  99. hud_python-0.5.0/hud/environment/utils/tool_wrappers.py +113 -0
  100. hud_python-0.5.0/hud/eval/__init__.py +67 -0
  101. hud_python-0.5.0/hud/eval/context.py +671 -0
  102. hud_python-0.5.0/hud/eval/display.py +299 -0
  103. hud_python-0.5.0/hud/eval/instrument.py +115 -0
  104. hud_python-0.5.0/hud/eval/manager.py +466 -0
  105. hud_python-0.5.0/hud/eval/parallel.py +268 -0
  106. hud_python-0.5.0/hud/eval/task.py +340 -0
  107. hud_python-0.5.0/hud/eval/tests/__init__.py +1 -0
  108. hud_python-0.5.0/hud/eval/tests/test_context.py +178 -0
  109. hud_python-0.5.0/hud/eval/tests/test_eval.py +210 -0
  110. hud_python-0.5.0/hud/eval/tests/test_manager.py +152 -0
  111. hud_python-0.5.0/hud/eval/tests/test_parallel.py +168 -0
  112. hud_python-0.5.0/hud/eval/tests/test_task.py +145 -0
  113. hud_python-0.5.0/hud/eval/types.py +63 -0
  114. hud_python-0.5.0/hud/eval/utils.py +183 -0
  115. hud_python-0.5.0/hud/patches/__init__.py +19 -0
  116. hud_python-0.5.0/hud/patches/mcp_patches.py +151 -0
  117. hud_python-0.5.0/hud/patches/warnings.py +54 -0
  118. {hud_python-0.4.74 → hud_python-0.5.0}/hud/samples/browser.py +4 -4
  119. {hud_python-0.4.74 → hud_python-0.5.0}/hud/server/low_level.py +2 -1
  120. {hud_python-0.4.74 → hud_python-0.5.0}/hud/server/router.py +6 -2
  121. {hud_python-0.4.74 → hud_python-0.5.0}/hud/server/server.py +10 -9
  122. {hud_python-0.4.74 → hud_python-0.5.0}/hud/server/tests/test_mcp_server_integration.py +11 -11
  123. {hud_python-0.4.74 → hud_python-0.5.0}/hud/server/tests/test_mcp_server_more.py +1 -1
  124. hud_python-0.5.0/hud/telemetry/__init__.py +27 -0
  125. hud_python-0.5.0/hud/telemetry/exporter.py +201 -0
  126. hud_python-0.5.0/hud/telemetry/instrument.py +284 -0
  127. hud_python-0.5.0/hud/telemetry/tests/test_eval_telemetry.py +356 -0
  128. hud_python-0.5.0/hud/telemetry/tests/test_exporter.py +258 -0
  129. {hud_python-0.4.74 → hud_python-0.5.0}/hud/telemetry/tests/test_instrument.py +8 -21
  130. {hud_python-0.4.74 → hud_python-0.5.0}/hud/tools/__init__.py +3 -0
  131. {hud_python-0.4.74 → hud_python-0.5.0}/hud/tools/base.py +6 -2
  132. {hud_python-0.4.74 → hud_python-0.5.0}/hud/tools/computer/anthropic.py +2 -2
  133. {hud_python-0.4.74 → hud_python-0.5.0}/hud/tools/computer/hud.py +6 -5
  134. {hud_python-0.4.74 → hud_python-0.5.0}/hud/tools/computer/openai.py +2 -11
  135. {hud_python-0.4.74 → hud_python-0.5.0}/hud/tools/computer/qwen.py +1 -1
  136. {hud_python-0.4.74 → hud_python-0.5.0}/hud/tools/grounding/grounded_tool.py +13 -18
  137. {hud_python-0.4.74 → hud_python-0.5.0}/hud/tools/grounding/grounder.py +10 -31
  138. {hud_python-0.4.74 → hud_python-0.5.0}/hud/tools/grounding/tests/test_grounded_tool.py +26 -44
  139. {hud_python-0.4.74 → hud_python-0.5.0}/hud/tools/jupyter.py +44 -27
  140. {hud_python-0.4.74 → hud_python-0.5.0}/hud/tools/shell.py +13 -6
  141. {hud_python-0.4.74 → hud_python-0.5.0}/hud/tools/tests/test_apply_patch.py +3 -1
  142. {hud_python-0.4.74 → hud_python-0.5.0}/hud/tools/tests/test_computer.py +4 -1
  143. {hud_python-0.4.74 → hud_python-0.5.0}/hud/tools/tests/test_computer_actions.py +2 -1
  144. {hud_python-0.4.74 → hud_python-0.5.0}/hud/tools/tests/test_jupyter_tool.py +13 -8
  145. {hud_python-0.4.74 → hud_python-0.5.0}/hud/tools/types.py +12 -0
  146. {hud_python-0.4.74 → hud_python-0.5.0}/hud/types.py +89 -24
  147. hud_python-0.5.0/hud/utils/mcp.py +47 -0
  148. {hud_python-0.4.74 → hud_python-0.5.0}/hud/utils/tests/test_mcp.py +1 -26
  149. {hud_python-0.4.74 → hud_python-0.5.0}/hud/utils/tests/test_version.py +1 -1
  150. {hud_python-0.4.74 → hud_python-0.5.0}/hud/version.py +1 -1
  151. {hud_python-0.4.74 → hud_python-0.5.0}/pyproject.toml +30 -41
  152. hud_python-0.4.74/PKG-INFO +0 -535
  153. hud_python-0.4.74/README.md +0 -415
  154. hud_python-0.4.74/environments/README.md +0 -956
  155. hud_python-0.4.74/environments/blank/README.md +0 -128
  156. hud_python-0.4.74/environments/blank/environment/README.md +0 -16
  157. hud_python-0.4.74/environments/blank/environment/pyproject.toml +0 -16
  158. hud_python-0.4.74/environments/blank/server/README.md +0 -21
  159. hud_python-0.4.74/environments/blank/server/pyproject.toml +0 -19
  160. hud_python-0.4.74/environments/browser/README.md +0 -191
  161. hud_python-0.4.74/environments/browser/browser-base/README.md +0 -58
  162. hud_python-0.4.74/environments/browser/environment/2048/README.md +0 -103
  163. hud_python-0.4.74/environments/browser/environment/2048/backend/pyproject.toml +0 -9
  164. hud_python-0.4.74/environments/browser/environment/README.md +0 -135
  165. hud_python-0.4.74/environments/browser/environment/pyproject.toml +0 -23
  166. hud_python-0.4.74/environments/browser/environment/todo/README.md +0 -85
  167. hud_python-0.4.74/environments/browser/environment/todo/backend/pyproject.toml +0 -15
  168. hud_python-0.4.74/environments/browser/pyproject.toml +0 -22
  169. hud_python-0.4.74/environments/browser/server/pyproject.toml +0 -21
  170. hud_python-0.4.74/environments/deepresearch/README.md +0 -165
  171. hud_python-0.4.74/environments/deepresearch/environment/pyproject.toml +0 -17
  172. hud_python-0.4.74/environments/deepresearch/pyproject.toml +0 -19
  173. hud_python-0.4.74/environments/deepresearch/server/pyproject.toml +0 -19
  174. hud_python-0.4.74/environments/jupyter/README.md +0 -68
  175. hud_python-0.4.74/environments/jupyter/server/pyproject.toml +0 -34
  176. hud_python-0.4.74/environments/online_mind2web/README.md +0 -36
  177. hud_python-0.4.74/environments/online_mind2web/pyproject.toml +0 -22
  178. hud_python-0.4.74/environments/online_mind2web/src/hud_controller/providers/README.md +0 -110
  179. hud_python-0.4.74/environments/remote_browser/README.md +0 -225
  180. hud_python-0.4.74/environments/remote_browser/pyproject.toml +0 -22
  181. hud_python-0.4.74/environments/remote_browser/src/hud_controller/providers/README.md +0 -110
  182. hud_python-0.4.74/environments/rubrics/README.md +0 -239
  183. hud_python-0.4.74/environments/rubrics/environment/pyproject.toml +0 -19
  184. hud_python-0.4.74/environments/rubrics/pyproject.toml +0 -19
  185. hud_python-0.4.74/environments/rubrics/server/pyproject.toml +0 -19
  186. hud_python-0.4.74/environments/text_2048/README.md +0 -102
  187. hud_python-0.4.74/environments/text_2048/pyproject.toml +0 -22
  188. hud_python-0.4.74/examples/README.md +0 -74
  189. hud_python-0.4.74/hud/__init__.py +0 -42
  190. hud_python-0.4.74/hud/agents/__init__.py +0 -19
  191. hud_python-0.4.74/hud/agents/base.py +0 -840
  192. hud_python-0.4.74/hud/agents/misc/integration_test_agent.py +0 -66
  193. hud_python-0.4.74/hud/agents/tests/conftest.py +0 -124
  194. hud_python-0.4.74/hud/agents/tests/test_base.py +0 -737
  195. hud_python-0.4.74/hud/agents/tests/test_base_runtime.py +0 -177
  196. hud_python-0.4.74/hud/agents/tests/test_claude.py +0 -523
  197. hud_python-0.4.74/hud/agents/tests/test_gemini.py +0 -600
  198. hud_python-0.4.74/hud/agents/tests/test_openai.py +0 -1083
  199. hud_python-0.4.74/hud/agents/utils.py +0 -50
  200. hud_python-0.4.74/hud/cli/rl/__init__.py +0 -180
  201. hud_python-0.4.74/hud/cli/rl/config.py +0 -101
  202. hud_python-0.4.74/hud/cli/rl/display.py +0 -133
  203. hud_python-0.4.74/hud/cli/rl/gpu.py +0 -63
  204. hud_python-0.4.74/hud/cli/rl/gpu_utils.py +0 -321
  205. hud_python-0.4.74/hud/cli/rl/local_runner.py +0 -607
  206. hud_python-0.4.74/hud/cli/rl/presets.py +0 -96
  207. hud_python-0.4.74/hud/cli/rl/remote_runner.py +0 -463
  208. hud_python-0.4.74/hud/cli/rl/rl_api.py +0 -150
  209. hud_python-0.4.74/hud/cli/rl/vllm.py +0 -179
  210. hud_python-0.4.74/hud/cli/rl/wait_utils.py +0 -89
  211. hud_python-0.4.74/hud/cli/tests/test_eval.py +0 -539
  212. hud_python-0.4.74/hud/datasets/__init__.py +0 -33
  213. hud_python-0.4.74/hud/datasets/runner.py +0 -298
  214. hud_python-0.4.74/hud/datasets/tests/test_runner.py +0 -67
  215. hud_python-0.4.74/hud/datasets/utils.py +0 -441
  216. hud_python-0.4.74/hud/misc/__init__.py +0 -1
  217. hud_python-0.4.74/hud/misc/claude_plays_pokemon.py +0 -292
  218. hud_python-0.4.74/hud/otel/__init__.py +0 -35
  219. hud_python-0.4.74/hud/otel/collector.py +0 -142
  220. hud_python-0.4.74/hud/otel/config.py +0 -183
  221. hud_python-0.4.74/hud/otel/context.py +0 -572
  222. hud_python-0.4.74/hud/otel/exporters.py +0 -543
  223. hud_python-0.4.74/hud/otel/instrumentation.py +0 -135
  224. hud_python-0.4.74/hud/otel/processors.py +0 -121
  225. hud_python-0.4.74/hud/otel/tests/test_instrumentation.py +0 -207
  226. hud_python-0.4.74/hud/otel/tests/test_processors.py +0 -197
  227. hud_python-0.4.74/hud/rl/README.md +0 -30
  228. hud_python-0.4.74/hud/rl/__init__.py +0 -1
  229. hud_python-0.4.74/hud/rl/actor.py +0 -178
  230. hud_python-0.4.74/hud/rl/buffer.py +0 -405
  231. hud_python-0.4.74/hud/rl/chat_template.jinja +0 -101
  232. hud_python-0.4.74/hud/rl/config.py +0 -193
  233. hud_python-0.4.74/hud/rl/distributed.py +0 -132
  234. hud_python-0.4.74/hud/rl/learner.py +0 -648
  235. hud_python-0.4.74/hud/rl/tests/__init__.py +0 -1
  236. hud_python-0.4.74/hud/rl/tests/test_learner.py +0 -186
  237. hud_python-0.4.74/hud/rl/train.py +0 -394
  238. hud_python-0.4.74/hud/rl/types.py +0 -101
  239. hud_python-0.4.74/hud/rl/utils/start_vllm_server.sh +0 -30
  240. hud_python-0.4.74/hud/rl/utils.py +0 -524
  241. hud_python-0.4.74/hud/rl/vllm_adapter.py +0 -143
  242. hud_python-0.4.74/hud/telemetry/__init__.py +0 -47
  243. hud_python-0.4.74/hud/telemetry/async_context.py +0 -345
  244. hud_python-0.4.74/hud/telemetry/instrument.py +0 -379
  245. hud_python-0.4.74/hud/telemetry/job.py +0 -355
  246. hud_python-0.4.74/hud/telemetry/replay.py +0 -74
  247. hud_python-0.4.74/hud/telemetry/tests/test_async_context.py +0 -515
  248. hud_python-0.4.74/hud/telemetry/tests/test_job.py +0 -555
  249. hud_python-0.4.74/hud/telemetry/tests/test_replay.py +0 -40
  250. hud_python-0.4.74/hud/telemetry/tests/test_trace.py +0 -241
  251. hud_python-0.4.74/hud/telemetry/trace.py +0 -166
  252. hud_python-0.4.74/hud/telemetry/utils.py +0 -42
  253. hud_python-0.4.74/hud/utils/mcp.py +0 -97
  254. hud_python-0.4.74/hud/utils/tasks.py +0 -186
  255. hud_python-0.4.74/hud/utils/tests/__init__.py +0 -0
  256. hud_python-0.4.74/hud/utils/tests/test_tasks.py +0 -356
  257. {hud_python-0.4.74 → hud_python-0.5.0}/.gitignore +0 -0
  258. {hud_python-0.4.74 → hud_python-0.5.0}/LICENSE +0 -0
  259. {hud_python-0.4.74 → hud_python-0.5.0}/hud/__main__.py +0 -0
  260. {hud_python-0.4.74 → hud_python-0.5.0}/hud/agents/misc/__init__.py +0 -0
  261. {hud_python-0.4.74 → hud_python-0.5.0}/hud/agents/misc/response_agent.py +0 -0
  262. {hud_python-0.4.74 → hud_python-0.5.0}/hud/agents/tests/__init__.py +0 -0
  263. {hud_python-0.4.74 → hud_python-0.5.0}/hud/cli/__main__.py +0 -0
  264. {hud_python-0.4.74 → hud_python-0.5.0}/hud/cli/clone.py +0 -0
  265. {hud_python-0.4.74 → hud_python-0.5.0}/hud/cli/flows/__init__.py +0 -0
  266. {hud_python-0.4.74 → hud_python-0.5.0}/hud/cli/flows/tests/__init__.py +0 -0
  267. {hud_python-0.4.74 → hud_python-0.5.0}/hud/cli/flows/tests/test_dev.py +0 -0
  268. {hud_python-0.4.74 → hud_python-0.5.0}/hud/cli/get.py +0 -0
  269. {hud_python-0.4.74 → hud_python-0.5.0}/hud/cli/init.py +0 -0
  270. {hud_python-0.4.74 → hud_python-0.5.0}/hud/cli/list_func.py +0 -0
  271. {hud_python-0.4.74 → hud_python-0.5.0}/hud/cli/pull.py +0 -0
  272. {hud_python-0.4.74 → hud_python-0.5.0}/hud/cli/push.py +0 -0
  273. {hud_python-0.4.74 → hud_python-0.5.0}/hud/cli/remove.py +0 -0
  274. {hud_python-0.4.74 → hud_python-0.5.0}/hud/cli/tests/__init__.py +0 -0
  275. {hud_python-0.4.74 → hud_python-0.5.0}/hud/cli/tests/test_analyze_metadata.py +0 -0
  276. {hud_python-0.4.74 → hud_python-0.5.0}/hud/cli/tests/test_build_failure.py +0 -0
  277. {hud_python-0.4.74 → hud_python-0.5.0}/hud/cli/tests/test_build_module.py +0 -0
  278. {hud_python-0.4.74 → hud_python-0.5.0}/hud/cli/tests/test_cli_init.py +0 -0
  279. {hud_python-0.4.74 → hud_python-0.5.0}/hud/cli/tests/test_cli_main.py +0 -0
  280. {hud_python-0.4.74 → hud_python-0.5.0}/hud/cli/tests/test_cli_more_wrappers.py +0 -0
  281. {hud_python-0.4.74 → hud_python-0.5.0}/hud/cli/tests/test_clone.py +0 -0
  282. {hud_python-0.4.74 → hud_python-0.5.0}/hud/cli/tests/test_cursor.py +0 -0
  283. {hud_python-0.4.74 → hud_python-0.5.0}/hud/cli/tests/test_eval_bedrock.py +0 -0
  284. {hud_python-0.4.74 → hud_python-0.5.0}/hud/cli/tests/test_init.py +0 -0
  285. {hud_python-0.4.74 → hud_python-0.5.0}/hud/cli/tests/test_list_func.py +0 -0
  286. {hud_python-0.4.74 → hud_python-0.5.0}/hud/cli/tests/test_main_module.py +0 -0
  287. {hud_python-0.4.74 → hud_python-0.5.0}/hud/cli/tests/test_mcp_server.py +0 -0
  288. {hud_python-0.4.74 → hud_python-0.5.0}/hud/cli/tests/test_pull.py +0 -0
  289. {hud_python-0.4.74 → hud_python-0.5.0}/hud/cli/tests/test_push.py +0 -0
  290. {hud_python-0.4.74 → hud_python-0.5.0}/hud/cli/tests/test_push_happy.py +0 -0
  291. {hud_python-0.4.74 → hud_python-0.5.0}/hud/cli/tests/test_push_wrapper.py +0 -0
  292. {hud_python-0.4.74 → hud_python-0.5.0}/hud/cli/tests/test_utils.py +0 -0
  293. {hud_python-0.4.74 → hud_python-0.5.0}/hud/cli/utils/__init__.py +0 -0
  294. {hud_python-0.4.74 → hud_python-0.5.0}/hud/cli/utils/cursor.py +0 -0
  295. {hud_python-0.4.74 → hud_python-0.5.0}/hud/cli/utils/env_check.py +0 -0
  296. {hud_python-0.4.74 → hud_python-0.5.0}/hud/cli/utils/environment.py +0 -0
  297. {hud_python-0.4.74 → hud_python-0.5.0}/hud/cli/utils/git.py +0 -0
  298. {hud_python-0.4.74 → hud_python-0.5.0}/hud/cli/utils/local_runner.py +0 -0
  299. {hud_python-0.4.74 → hud_python-0.5.0}/hud/cli/utils/logging.py +0 -0
  300. {hud_python-0.4.74 → hud_python-0.5.0}/hud/cli/utils/package_runner.py +0 -0
  301. {hud_python-0.4.74 → hud_python-0.5.0}/hud/cli/utils/registry.py +0 -0
  302. {hud_python-0.4.74 → hud_python-0.5.0}/hud/cli/utils/remote_runner.py +0 -0
  303. {hud_python-0.4.74 → hud_python-0.5.0}/hud/cli/utils/runner.py +0 -0
  304. {hud_python-0.4.74 → hud_python-0.5.0}/hud/cli/utils/tasks.py +0 -0
  305. {hud_python-0.4.74 → hud_python-0.5.0}/hud/cli/utils/tests/__init__.py +0 -0
  306. {hud_python-0.4.74 → hud_python-0.5.0}/hud/cli/utils/tests/test_config.py +0 -0
  307. {hud_python-0.4.74 → hud_python-0.5.0}/hud/cli/utils/tests/test_docker.py +0 -0
  308. {hud_python-0.4.74 → hud_python-0.5.0}/hud/cli/utils/tests/test_docker_hints.py +0 -0
  309. {hud_python-0.4.74 → hud_python-0.5.0}/hud/cli/utils/tests/test_environment.py +0 -0
  310. {hud_python-0.4.74 → hud_python-0.5.0}/hud/cli/utils/tests/test_git.py +0 -0
  311. {hud_python-0.4.74 → hud_python-0.5.0}/hud/cli/utils/tests/test_local_runner.py +0 -0
  312. {hud_python-0.4.74 → hud_python-0.5.0}/hud/cli/utils/tests/test_logging_utils.py +0 -0
  313. {hud_python-0.4.74 → hud_python-0.5.0}/hud/cli/utils/tests/test_metadata.py +0 -0
  314. {hud_python-0.4.74 → hud_python-0.5.0}/hud/cli/utils/tests/test_package_runner.py +0 -0
  315. {hud_python-0.4.74 → hud_python-0.5.0}/hud/cli/utils/tests/test_registry_utils.py +0 -0
  316. {hud_python-0.4.74 → hud_python-0.5.0}/hud/cli/utils/tests/test_remote_runner.py +0 -0
  317. {hud_python-0.4.74 → hud_python-0.5.0}/hud/cli/utils/tests/test_runner_modules.py +0 -0
  318. {hud_python-0.4.74 → hud_python-0.5.0}/hud/cli/utils/tests/test_source_hash.py +0 -0
  319. {hud_python-0.4.74 → hud_python-0.5.0}/hud/cli/utils/tests/test_tasks.py +0 -0
  320. {hud_python-0.4.74 → hud_python-0.5.0}/hud/cli/utils/version_check.py +0 -0
  321. {hud_python-0.4.74 → hud_python-0.5.0}/hud/clients/README.md +0 -0
  322. {hud_python-0.4.74 → hud_python-0.5.0}/hud/clients/tests/__init__.py +0 -0
  323. {hud_python-0.4.74 → hud_python-0.5.0}/hud/clients/tests/test_client_integration.py +0 -0
  324. {hud_python-0.4.74 → hud_python-0.5.0}/hud/clients/tests/test_fastmcp.py +0 -0
  325. {hud_python-0.4.74 → hud_python-0.5.0}/hud/clients/tests/test_mcp_use_retry.py +0 -0
  326. {hud_python-0.4.74 → hud_python-0.5.0}/hud/clients/tests/test_protocol.py +0 -0
  327. {hud_python-0.4.74 → hud_python-0.5.0}/hud/clients/utils/__init__.py +0 -0
  328. {hud_python-0.4.74 → hud_python-0.5.0}/hud/clients/utils/mcp_use_retry.py +0 -0
  329. {hud_python-0.4.74 → hud_python-0.5.0}/hud/clients/utils/retry.py +0 -0
  330. {hud_python-0.4.74 → hud_python-0.5.0}/hud/clients/utils/retry_transport.py +0 -0
  331. {hud_python-0.4.74 → hud_python-0.5.0}/hud/datasets/tests/__init__.py +0 -0
  332. {hud_python-0.4.74 → hud_python-0.5.0}/hud/native/__init__.py +0 -0
  333. {hud_python-0.4.74 → hud_python-0.5.0}/hud/native/comparator.py +0 -0
  334. {hud_python-0.4.74 → hud_python-0.5.0}/hud/native/tests/__init__.py +0 -0
  335. {hud_python-0.4.74 → hud_python-0.5.0}/hud/native/tests/test_comparator.py +0 -0
  336. {hud_python-0.4.74 → hud_python-0.5.0}/hud/native/tests/test_native_init.py +0 -0
  337. {hud_python-0.4.74 → hud_python-0.5.0}/hud/py.typed +0 -0
  338. {hud_python-0.4.74 → hud_python-0.5.0}/hud/samples/__init__.py +0 -0
  339. {hud_python-0.4.74 → hud_python-0.5.0}/hud/server/__init__.py +0 -0
  340. {hud_python-0.4.74 → hud_python-0.5.0}/hud/server/context.py +0 -0
  341. {hud_python-0.4.74 → hud_python-0.5.0}/hud/server/helper/__init__.py +0 -0
  342. {hud_python-0.4.74 → hud_python-0.5.0}/hud/server/tests/__init__.py +0 -0
  343. {hud_python-0.4.74 → hud_python-0.5.0}/hud/server/tests/test_add_tool.py +0 -0
  344. {hud_python-0.4.74 → hud_python-0.5.0}/hud/server/tests/test_context.py +0 -0
  345. {hud_python-0.4.74 → hud_python-0.5.0}/hud/server/tests/test_mcp_server_handlers.py +0 -0
  346. {hud_python-0.4.74 → hud_python-0.5.0}/hud/server/tests/test_run_wrapper.py +0 -0
  347. {hud_python-0.4.74 → hud_python-0.5.0}/hud/server/tests/test_server_extra.py +0 -0
  348. {hud_python-0.4.74 → hud_python-0.5.0}/hud/server/tests/test_sigterm_runner.py +0 -0
  349. {hud_python-0.4.74 → hud_python-0.5.0}/hud/settings.py +0 -0
  350. {hud_python-0.4.74 → hud_python-0.5.0}/hud/shared/__init__.py +0 -0
  351. {hud_python-0.4.74 → hud_python-0.5.0}/hud/shared/exceptions.py +0 -0
  352. {hud_python-0.4.74 → hud_python-0.5.0}/hud/shared/hints.py +0 -0
  353. {hud_python-0.4.74 → hud_python-0.5.0}/hud/shared/requests.py +0 -0
  354. {hud_python-0.4.74/hud/otel → hud_python-0.5.0/hud/shared}/tests/__init__.py +0 -0
  355. {hud_python-0.4.74 → hud_python-0.5.0}/hud/shared/tests/test_exceptions.py +0 -0
  356. {hud_python-0.4.74 → hud_python-0.5.0}/hud/shared/tests/test_hints.py +0 -0
  357. {hud_python-0.4.74 → hud_python-0.5.0}/hud/shared/tests/test_requests.py +0 -0
  358. {hud_python-0.4.74/hud/shared → hud_python-0.5.0/hud/telemetry}/tests/__init__.py +0 -0
  359. {hud_python-0.4.74 → hud_python-0.5.0}/hud/tools/apply_patch.py +0 -0
  360. {hud_python-0.4.74 → hud_python-0.5.0}/hud/tools/bash.py +0 -0
  361. {hud_python-0.4.74 → hud_python-0.5.0}/hud/tools/computer/__init__.py +0 -0
  362. {hud_python-0.4.74 → hud_python-0.5.0}/hud/tools/computer/gemini.py +0 -0
  363. {hud_python-0.4.74 → hud_python-0.5.0}/hud/tools/computer/settings.py +0 -0
  364. {hud_python-0.4.74 → hud_python-0.5.0}/hud/tools/edit.py +0 -0
  365. {hud_python-0.4.74 → hud_python-0.5.0}/hud/tools/executors/__init__.py +0 -0
  366. {hud_python-0.4.74 → hud_python-0.5.0}/hud/tools/executors/base.py +0 -0
  367. {hud_python-0.4.74 → hud_python-0.5.0}/hud/tools/executors/pyautogui.py +0 -0
  368. {hud_python-0.4.74 → hud_python-0.5.0}/hud/tools/executors/tests/__init__.py +0 -0
  369. {hud_python-0.4.74 → hud_python-0.5.0}/hud/tools/executors/tests/test_base_executor.py +0 -0
  370. {hud_python-0.4.74 → hud_python-0.5.0}/hud/tools/executors/tests/test_pyautogui_executor.py +0 -0
  371. {hud_python-0.4.74 → hud_python-0.5.0}/hud/tools/executors/xdo.py +0 -0
  372. {hud_python-0.4.74 → hud_python-0.5.0}/hud/tools/grounding/__init__.py +0 -0
  373. {hud_python-0.4.74 → hud_python-0.5.0}/hud/tools/grounding/config.py +0 -0
  374. {hud_python-0.4.74 → hud_python-0.5.0}/hud/tools/grounding/tests/__init__.py +0 -0
  375. {hud_python-0.4.74 → hud_python-0.5.0}/hud/tools/playwright.py +0 -0
  376. {hud_python-0.4.74 → hud_python-0.5.0}/hud/tools/response.py +0 -0
  377. {hud_python-0.4.74 → hud_python-0.5.0}/hud/tools/submit.py +0 -0
  378. {hud_python-0.4.74 → hud_python-0.5.0}/hud/tools/tests/__init__.py +0 -0
  379. {hud_python-0.4.74 → hud_python-0.5.0}/hud/tools/tests/test_base.py +0 -0
  380. {hud_python-0.4.74 → hud_python-0.5.0}/hud/tools/tests/test_bash.py +0 -0
  381. {hud_python-0.4.74 → hud_python-0.5.0}/hud/tools/tests/test_bash_extended.py +0 -0
  382. {hud_python-0.4.74 → hud_python-0.5.0}/hud/tools/tests/test_edit.py +0 -0
  383. {hud_python-0.4.74 → hud_python-0.5.0}/hud/tools/tests/test_init.py +0 -0
  384. {hud_python-0.4.74 → hud_python-0.5.0}/hud/tools/tests/test_playwright_tool.py +0 -0
  385. {hud_python-0.4.74 → hud_python-0.5.0}/hud/tools/tests/test_response.py +0 -0
  386. {hud_python-0.4.74 → hud_python-0.5.0}/hud/tools/tests/test_shell.py +0 -0
  387. {hud_python-0.4.74 → hud_python-0.5.0}/hud/tools/tests/test_submit.py +0 -0
  388. {hud_python-0.4.74 → hud_python-0.5.0}/hud/tools/tests/test_tools.py +0 -0
  389. {hud_python-0.4.74 → hud_python-0.5.0}/hud/tools/tests/test_tools_init.py +0 -0
  390. {hud_python-0.4.74 → hud_python-0.5.0}/hud/tools/tests/test_types.py +0 -0
  391. {hud_python-0.4.74 → hud_python-0.5.0}/hud/tools/tests/test_utils.py +0 -0
  392. {hud_python-0.4.74 → hud_python-0.5.0}/hud/tools/utils.py +0 -0
  393. {hud_python-0.4.74 → hud_python-0.5.0}/hud/utils/__init__.py +0 -0
  394. {hud_python-0.4.74 → hud_python-0.5.0}/hud/utils/env.py +0 -0
  395. {hud_python-0.4.74 → hud_python-0.5.0}/hud/utils/hud_console.py +0 -0
  396. {hud_python-0.4.74 → hud_python-0.5.0}/hud/utils/pretty_errors.py +0 -0
  397. {hud_python-0.4.74 → hud_python-0.5.0}/hud/utils/strict_schema.py +0 -0
  398. {hud_python-0.4.74 → hud_python-0.5.0}/hud/utils/telemetry.py +0 -0
  399. {hud_python-0.4.74/hud/telemetry → hud_python-0.5.0/hud/utils}/tests/__init__.py +0 -0
  400. {hud_python-0.4.74 → hud_python-0.5.0}/hud/utils/tests/test_init.py +0 -0
  401. {hud_python-0.4.74 → hud_python-0.5.0}/hud/utils/tests/test_pretty_errors.py +0 -0
  402. {hud_python-0.4.74 → hud_python-0.5.0}/hud/utils/tests/test_telemetry.py +0 -0
  403. {hud_python-0.4.74 → hud_python-0.5.0}/hud/utils/tests/test_tool_shorthand.py +0 -0
  404. {hud_python-0.4.74 → hud_python-0.5.0}/hud/utils/tool_shorthand.py +0 -0
  405. {hud_python-0.4.74 → hud_python-0.5.0}/hud/utils/types.py +0 -0
@@ -0,0 +1,251 @@
1
+ Metadata-Version: 2.4
2
+ Name: hud-python
3
+ Version: 0.5.0
4
+ Summary: SDK for the HUD platform.
5
+ Project-URL: Homepage, https://github.com/hud-evals/hud-python
6
+ Project-URL: Bug Tracker, https://github.com/hud-evals/hud-python/issues
7
+ Project-URL: Documentation, https://docs.hud.ai
8
+ Author-email: HUD <founders@hud.ai>
9
+ License: MIT License
10
+
11
+ Copyright (c) 2025 Human Union Data, Inc
12
+
13
+ Permission is hereby granted, free of charge, to any person obtaining a copy
14
+ of this software and associated documentation files (the "Software"), to deal
15
+ in the Software without restriction, including without limitation the rights
16
+ to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
17
+ copies of the Software, and to permit persons to whom the Software is
18
+ furnished to do so, subject to the following conditions:
19
+
20
+ The above copyright notice and this permission notice shall be included in all
21
+ copies or substantial portions of the Software.
22
+
23
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
24
+ IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
25
+ FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
26
+ AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
27
+ LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
28
+ OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
29
+ SOFTWARE.
30
+ License-File: LICENSE
31
+ Classifier: Development Status :: 4 - Beta
32
+ Classifier: Intended Audience :: Developers
33
+ Classifier: Programming Language :: Python :: 3
34
+ Classifier: Programming Language :: Python :: 3.11
35
+ Classifier: Programming Language :: Python :: 3.12
36
+ Classifier: Programming Language :: Python :: 3.13
37
+ Requires-Python: <3.13,>=3.11
38
+ Requires-Dist: blessed>=1.20.0
39
+ Requires-Dist: fastmcp==2.13.3
40
+ Requires-Dist: httpx<1,>=0.23.0
41
+ Requires-Dist: mcp<1.23,>1.21.1
42
+ Requires-Dist: openai>=2.8.1
43
+ Requires-Dist: packaging>=21.0
44
+ Requires-Dist: prompt-toolkit==3.0.51
45
+ Requires-Dist: pydantic-settings<3,>=2.2
46
+ Requires-Dist: pydantic<3,>=2.6
47
+ Requires-Dist: questionary==2.1.0
48
+ Requires-Dist: rich>=13.0.0
49
+ Requires-Dist: scarf-sdk>=0.1.0
50
+ Requires-Dist: toml>=0.10.2
51
+ Requires-Dist: typer>=0.9.0
52
+ Requires-Dist: watchfiles>=0.21.0
53
+ Provides-Extra: agent
54
+ Requires-Dist: anthropic>=0.75; extra == 'agent'
55
+ Requires-Dist: datasets>=2.14.0; extra == 'agent'
56
+ Requires-Dist: google-genai; extra == 'agent'
57
+ Requires-Dist: langchain>=1.1.0; extra == 'agent'
58
+ Requires-Dist: mcp-use==1.5.0; extra == 'agent'
59
+ Requires-Dist: openai-agents; extra == 'agent'
60
+ Requires-Dist: pillow>=11.1.0; extra == 'agent'
61
+ Requires-Dist: tornado>=6.5.2; extra == 'agent'
62
+ Provides-Extra: agents
63
+ Requires-Dist: anthropic>=0.75; extra == 'agents'
64
+ Requires-Dist: datasets>=2.14.0; extra == 'agents'
65
+ Requires-Dist: google-genai; extra == 'agents'
66
+ Requires-Dist: langchain>=1.1.0; extra == 'agents'
67
+ Requires-Dist: mcp-use==1.5.0; extra == 'agents'
68
+ Requires-Dist: openai-agents; extra == 'agents'
69
+ Requires-Dist: pillow>=11.1.0; extra == 'agents'
70
+ Requires-Dist: tornado>=6.5.2; extra == 'agents'
71
+ Provides-Extra: bedrock
72
+ Requires-Dist: anthropic[bedrock]>=0.75; extra == 'bedrock'
73
+ Provides-Extra: dev
74
+ Requires-Dist: anthropic>=0.75; extra == 'dev'
75
+ Requires-Dist: datasets>=2.14.0; extra == 'dev'
76
+ Requires-Dist: dotenv>=0.9.9; extra == 'dev'
77
+ Requires-Dist: google-adk; extra == 'dev'
78
+ Requires-Dist: google-genai; extra == 'dev'
79
+ Requires-Dist: ipykernel; extra == 'dev'
80
+ Requires-Dist: ipython<9; extra == 'dev'
81
+ Requires-Dist: jupyter-client; extra == 'dev'
82
+ Requires-Dist: jupyter-core; extra == 'dev'
83
+ Requires-Dist: langchain>=1.1.0; extra == 'dev'
84
+ Requires-Dist: llama-index-core; extra == 'dev'
85
+ Requires-Dist: mcp-use==1.5.0; extra == 'dev'
86
+ Requires-Dist: openai-agents; extra == 'dev'
87
+ Requires-Dist: pillow>=11.1.0; extra == 'dev'
88
+ Requires-Dist: playwright; extra == 'dev'
89
+ Requires-Dist: pyautogui>=0.9.54; extra == 'dev'
90
+ Requires-Dist: pyright==1.1.407; extra == 'dev'
91
+ Requires-Dist: pytest-asyncio; extra == 'dev'
92
+ Requires-Dist: pytest-cov; extra == 'dev'
93
+ Requires-Dist: pytest-mock; extra == 'dev'
94
+ Requires-Dist: pytest<9,>=8.1.1; extra == 'dev'
95
+ Requires-Dist: ruff>=0.11.8; extra == 'dev'
96
+ Requires-Dist: tornado>=6.5.2; extra == 'dev'
97
+ Description-Content-Type: text/markdown
98
+
99
+ <div align="left">
100
+ <picture>
101
+ <source media="(prefers-color-scheme: dark)" srcset="https://raw.githubusercontent.com/hud-evals/hud-python/main/docs/logo/hud_logo_dark.svg">
102
+ <source media="(prefers-color-scheme: light)" srcset="https://raw.githubusercontent.com/hud-evals/hud-python/main/docs/logo/hud_logo.svg">
103
+ <img src="https://raw.githubusercontent.com/hud-evals/hud-python/main/docs/logo/hud_logo.svg" alt="HUD" width="150" style="margin-bottom: 24px;"/>
104
+ </picture>
105
+ </div>
106
+
107
+ The HUD SDK is an open-source Python toolkit for building, evaluating, and training AI agents. Use a unified API for any model provider, wrap your code as MCP environments, run A/B evals at scale, and train with reinforcement learning.
108
+
109
+ To learn more, check out our [Documentation](https://docs.hud.ai) and [API Reference](https://docs.hud.ai/reference).
110
+
111
+ [![PyPI](https://img.shields.io/pypi/v/hud-python?style=flat-square)](https://pypi.org/project/hud-python/)
112
+ [![License](https://img.shields.io/badge/license-MIT-green?style=flat-square)](LICENSE)
113
+ [![Add docs to Cursor](https://img.shields.io/badge/Add%20docs%20to-Cursor-black?style=flat-square)](https://cursor.com/en/install-mcp?name=docs-hud-python&config=eyJ1cmwiOiJodHRwczovL2RvY3MuaHVkLmFpL21jcCJ9)
114
+ [![Discord](https://img.shields.io/discord/1327447144772407390?label=Discord&logo=discord&style=flat-square)](https://discord.gg/wkjtmHYYjm)
115
+ [![X Follow](https://img.shields.io/twitter/follow/hud_evals?style=social)](https://x.com/intent/user?screen_name=hud_evals)
116
+ [![Shop](https://img.shields.io/badge/_-white.svg?label=shop&logo=data:image/png;base64,iVBORw0KGgoAAAANSUhEUgAAABQAAAAJCAYAAAAywQxIAAAAAXNSR0IArs4c6QAAAARnQU1BAACxjwv8YQUAAAAJcEhZcwAACxMAAAsTAQCanBgAAAF6SURBVChTlZA9ixNhFIWf8yaTpFHRRMXCKpAZhCAYFvwoLHZhwUKw9A9YCJb+Bq0sxGbBQrTxX1j41dvIRAjGZbdwRUUGIzPMeyw2swS3WZ/ynHvP5VylafoAWAd+5Xm+wX+SpukmcMf29RDCZrD9BViz3f53+CjYngKZpD5A2/Y7SQBMJpOkKIprdV1vdzqdHzHGblmW9Ww2+5pl2TmAxWKxmM/nP8fj8cmqqtZijJ9sb0u6ABBWjh0riuIt8CqE8LGu66e2d5MkeQ8QY3xme7fb7T4ZjUbrZVl+jjFuSXoEXGxCDgIl9WzfAO5LSmzvNB771R6vzG4Bx0MIt/M8vwV8aLyDQNt70+n0G1AspaTxVln+aghQluVsKbvxVysflT9NQK/XO7R/SGiQ9Nt2aftElmWXJd1kv0kbeANQVdWl4XB4XtJouXaqNRgMHkrqS+r0+/3XwD1JXdungRfAVWBi+6WkK8D3EMJz22cl3W21WgNgx3YAzvwFd0Chdq03gKUAAAAASUVORK5CYII=&style=social)](https://shop.hud.ai)
117
+ [![Scarf](https://static.scarf.sh/a.png?x-pxid=6530ff33-4945-452b-81f9-626872593933)](https://scarf.sh)
118
+ [![Docs](https://img.shields.io/badge/docs-hud.ai-blue?style=flat-square)](https://docs.hud.ai)
119
+
120
+ ## Install
121
+
122
+ ```bash
123
+ pip install hud-python
124
+ ```
125
+
126
+ Get your API key at [hud.ai](https://hud.ai) and set it:
127
+
128
+ ```bash
129
+ export HUD_API_KEY=your-key-here
130
+ ```
131
+
132
+ > For CLI tools (`hud init`, `hud dev`, etc.): `uv tool install hud-python --python 3.12`
133
+
134
+ ![Agent running on SheetBench](https://raw.githubusercontent.com/hud-evals/hud-python/main/docs/src/images/trace_sheet.gif)
135
+
136
+ ## Usage
137
+
138
+ ### Unified Model API
139
+
140
+ Use Claude, GPT, Gemini, or Grok through one OpenAI-compatible endpoint:
141
+
142
+ ```python
143
+ from openai import AsyncOpenAI
144
+ import os
145
+
146
+ client = AsyncOpenAI(
147
+ base_url="https://inference.hud.ai",
148
+ api_key=os.environ["HUD_API_KEY"]
149
+ )
150
+
151
+ response = await client.chat.completions.create(
152
+ model="claude-sonnet-4-5", # or gpt-4o, gemini-2.5-pro (https://hud.ai/models)
153
+ messages=[{"role": "user", "content": "Hello!"}]
154
+ )
155
+ ```
156
+
157
+ Every call is traced at [hud.ai](https://hud.ai). → [Docs](https://docs.hud.ai/quick-links/gateway)
158
+
159
+ ### Environments
160
+
161
+ Turn your code into tools agents can call. Define how to evaluate them:
162
+
163
+ ```python
164
+ from hud import Environment
165
+
166
+ env = Environment("my-env")
167
+
168
+ @env.tool()
169
+ def search(query: str) -> str:
170
+ """Search the knowledge base."""
171
+ return db.search(query)
172
+
173
+ @env.scenario("find-answer")
174
+ async def find_answer(question: str, answer: str):
175
+ response = yield f"Find: {question}" # Prompt
176
+ yield 1.0 if answer in response else 0.0 # Reward
177
+ ```
178
+
179
+ The agent runs between the yields. First yield sends the prompt, second yield scores the result. → [Docs](https://docs.hud.ai/quick-links/environments) · [Templates](https://hud.ai/environments)
180
+
181
+ ### A/B Evals
182
+
183
+ Test different models. Repeat runs to see the distribution:
184
+
185
+ ```python
186
+ import hud
187
+
188
+ task = env("find-answer", question="What is 2+2?", answer="4")
189
+
190
+ async with hud.eval(task, variants={"model": ["gpt-4o", "claude-sonnet-4-5"]}, group=5) as ctx:
191
+ response = await client.chat.completions.create(
192
+ model=ctx.variants["model"],
193
+ messages=[{"role": "user", "content": ctx.prompt}]
194
+ )
195
+ await ctx.submit(response.choices[0].message.content)
196
+ ```
197
+
198
+ **Variants** test configurations. **Groups** repeat for distribution. Results stream to [hud.ai](https://hud.ai). → [Docs](https://docs.hud.ai/quick-links/ab-testing)
199
+
200
+ ### Deploy & Train
201
+
202
+ Push to GitHub, connect on hud.ai, run at scale:
203
+
204
+ ```bash
205
+ hud init # Scaffold environment
206
+ git push # Push to GitHub
207
+ # Connect on hud.ai → New → Environment
208
+ hud eval my-org/my-eval --model gpt-4o --group-size 100
209
+ # Or create and run tasks on the platform
210
+ ```
211
+
212
+ Every run generates training data. Use it to fine-tune or run RL. → [Docs](https://docs.hud.ai/quick-links/deploy)
213
+
214
+ ## Links
215
+
216
+ - 📖 [Documentation](https://docs.hud.ai)
217
+ - ⌨️ [CLI Reference](https://docs.hud.ai/reference/cli/overview)
218
+ - 🏆 [Leaderboards](https://hud.ai/leaderboards)
219
+ - 🌐 [Environment Templates](https://hud.ai/environments)
220
+ - 🤖 [Supported Models](https://hud.ai/models)
221
+ - 💬 [Discord](https://discord.gg/wkjtmHYYjm)
222
+
223
+ ## Enterprise
224
+
225
+ Building agents at scale? We work with teams on custom environments, benchmarks, and training.
226
+
227
+ [📅 Book a call](https://cal.com/jay-hud) · [📧 founders@hud.ai](mailto:founders@hud.ai)
228
+
229
+ ## Contributing
230
+
231
+ We welcome contributions! See [CONTRIBUTING.md](CONTRIBUTING.md).
232
+
233
+ Key areas: [Agents](hud/agents/) · [Tools](hud/tools/) · [Environments](https://hud.ai/environments)
234
+
235
+ <a href="https://github.com/hud-evals/hud-python/graphs/contributors">
236
+ <img src="https://contrib.rocks/image?repo=hud-evals/hud-python&max=50" />
237
+ </a>
238
+
239
+ ## Citation
240
+
241
+ ```bibtex
242
+ @software{hud2025agentevalplatform,
243
+ author = {HUD and Jay Ram and Lorenss Martinsons and Parth Patel and Govind Pimpale and Dylan Bowman and Jaideep and Nguyen Nhat Minh},
244
+ title = {HUD: An Evaluation and RL Envrionments Platform for Agents},
245
+ date = {2025-04},
246
+ url = {https://github.com/hud-evals/hud-python},
247
+ langid = {en}
248
+ }
249
+ ```
250
+
251
+ MIT License · [LICENSE](LICENSE)
@@ -0,0 +1,153 @@
1
+ <div align="left">
2
+ <picture>
3
+ <source media="(prefers-color-scheme: dark)" srcset="https://raw.githubusercontent.com/hud-evals/hud-python/main/docs/logo/hud_logo_dark.svg">
4
+ <source media="(prefers-color-scheme: light)" srcset="https://raw.githubusercontent.com/hud-evals/hud-python/main/docs/logo/hud_logo.svg">
5
+ <img src="https://raw.githubusercontent.com/hud-evals/hud-python/main/docs/logo/hud_logo.svg" alt="HUD" width="150" style="margin-bottom: 24px;"/>
6
+ </picture>
7
+ </div>
8
+
9
+ The HUD SDK is an open-source Python toolkit for building, evaluating, and training AI agents. Use a unified API for any model provider, wrap your code as MCP environments, run A/B evals at scale, and train with reinforcement learning.
10
+
11
+ To learn more, check out our [Documentation](https://docs.hud.ai) and [API Reference](https://docs.hud.ai/reference).
12
+
13
+ [![PyPI](https://img.shields.io/pypi/v/hud-python?style=flat-square)](https://pypi.org/project/hud-python/)
14
+ [![License](https://img.shields.io/badge/license-MIT-green?style=flat-square)](LICENSE)
15
+ [![Add docs to Cursor](https://img.shields.io/badge/Add%20docs%20to-Cursor-black?style=flat-square)](https://cursor.com/en/install-mcp?name=docs-hud-python&config=eyJ1cmwiOiJodHRwczovL2RvY3MuaHVkLmFpL21jcCJ9)
16
+ [![Discord](https://img.shields.io/discord/1327447144772407390?label=Discord&logo=discord&style=flat-square)](https://discord.gg/wkjtmHYYjm)
17
+ [![X Follow](https://img.shields.io/twitter/follow/hud_evals?style=social)](https://x.com/intent/user?screen_name=hud_evals)
18
+ [![Shop](https://img.shields.io/badge/_-white.svg?label=shop&logo=data:image/png;base64,iVBORw0KGgoAAAANSUhEUgAAABQAAAAJCAYAAAAywQxIAAAAAXNSR0IArs4c6QAAAARnQU1BAACxjwv8YQUAAAAJcEhZcwAACxMAAAsTAQCanBgAAAF6SURBVChTlZA9ixNhFIWf8yaTpFHRRMXCKpAZhCAYFvwoLHZhwUKw9A9YCJb+Bq0sxGbBQrTxX1j41dvIRAjGZbdwRUUGIzPMeyw2swS3WZ/ynHvP5VylafoAWAd+5Xm+wX+SpukmcMf29RDCZrD9BViz3f53+CjYngKZpD5A2/Y7SQBMJpOkKIprdV1vdzqdHzHGblmW9Ww2+5pl2TmAxWKxmM/nP8fj8cmqqtZijJ9sb0u6ABBWjh0riuIt8CqE8LGu66e2d5MkeQ8QY3xme7fb7T4ZjUbrZVl+jjFuSXoEXGxCDgIl9WzfAO5LSmzvNB771R6vzG4Bx0MIt/M8vwV8aLyDQNt70+n0G1AspaTxVln+aghQluVsKbvxVysflT9NQK/XO7R/SGiQ9Nt2aftElmWXJd1kv0kbeANQVdWl4XB4XtJouXaqNRgMHkrqS+r0+/3XwD1JXdungRfAVWBi+6WkK8D3EMJz22cl3W21WgNgx3YAzvwFd0Chdq03gKUAAAAASUVORK5CYII=&style=social)](https://shop.hud.ai)
19
+ [![Scarf](https://static.scarf.sh/a.png?x-pxid=6530ff33-4945-452b-81f9-626872593933)](https://scarf.sh)
20
+ [![Docs](https://img.shields.io/badge/docs-hud.ai-blue?style=flat-square)](https://docs.hud.ai)
21
+
22
+ ## Install
23
+
24
+ ```bash
25
+ pip install hud-python
26
+ ```
27
+
28
+ Get your API key at [hud.ai](https://hud.ai) and set it:
29
+
30
+ ```bash
31
+ export HUD_API_KEY=your-key-here
32
+ ```
33
+
34
+ > For CLI tools (`hud init`, `hud dev`, etc.): `uv tool install hud-python --python 3.12`
35
+
36
+ ![Agent running on SheetBench](https://raw.githubusercontent.com/hud-evals/hud-python/main/docs/src/images/trace_sheet.gif)
37
+
38
+ ## Usage
39
+
40
+ ### Unified Model API
41
+
42
+ Use Claude, GPT, Gemini, or Grok through one OpenAI-compatible endpoint:
43
+
44
+ ```python
45
+ from openai import AsyncOpenAI
46
+ import os
47
+
48
+ client = AsyncOpenAI(
49
+ base_url="https://inference.hud.ai",
50
+ api_key=os.environ["HUD_API_KEY"]
51
+ )
52
+
53
+ response = await client.chat.completions.create(
54
+ model="claude-sonnet-4-5", # or gpt-4o, gemini-2.5-pro (https://hud.ai/models)
55
+ messages=[{"role": "user", "content": "Hello!"}]
56
+ )
57
+ ```
58
+
59
+ Every call is traced at [hud.ai](https://hud.ai). → [Docs](https://docs.hud.ai/quick-links/gateway)
60
+
61
+ ### Environments
62
+
63
+ Turn your code into tools agents can call. Define how to evaluate them:
64
+
65
+ ```python
66
+ from hud import Environment
67
+
68
+ env = Environment("my-env")
69
+
70
+ @env.tool()
71
+ def search(query: str) -> str:
72
+ """Search the knowledge base."""
73
+ return db.search(query)
74
+
75
+ @env.scenario("find-answer")
76
+ async def find_answer(question: str, answer: str):
77
+ response = yield f"Find: {question}" # Prompt
78
+ yield 1.0 if answer in response else 0.0 # Reward
79
+ ```
80
+
81
+ The agent runs between the yields. First yield sends the prompt, second yield scores the result. → [Docs](https://docs.hud.ai/quick-links/environments) · [Templates](https://hud.ai/environments)
82
+
83
+ ### A/B Evals
84
+
85
+ Test different models. Repeat runs to see the distribution:
86
+
87
+ ```python
88
+ import hud
89
+
90
+ task = env("find-answer", question="What is 2+2?", answer="4")
91
+
92
+ async with hud.eval(task, variants={"model": ["gpt-4o", "claude-sonnet-4-5"]}, group=5) as ctx:
93
+ response = await client.chat.completions.create(
94
+ model=ctx.variants["model"],
95
+ messages=[{"role": "user", "content": ctx.prompt}]
96
+ )
97
+ await ctx.submit(response.choices[0].message.content)
98
+ ```
99
+
100
+ **Variants** test configurations. **Groups** repeat for distribution. Results stream to [hud.ai](https://hud.ai). → [Docs](https://docs.hud.ai/quick-links/ab-testing)
101
+
102
+ ### Deploy & Train
103
+
104
+ Push to GitHub, connect on hud.ai, run at scale:
105
+
106
+ ```bash
107
+ hud init # Scaffold environment
108
+ git push # Push to GitHub
109
+ # Connect on hud.ai → New → Environment
110
+ hud eval my-org/my-eval --model gpt-4o --group-size 100
111
+ # Or create and run tasks on the platform
112
+ ```
113
+
114
+ Every run generates training data. Use it to fine-tune or run RL. → [Docs](https://docs.hud.ai/quick-links/deploy)
115
+
116
+ ## Links
117
+
118
+ - 📖 [Documentation](https://docs.hud.ai)
119
+ - ⌨️ [CLI Reference](https://docs.hud.ai/reference/cli/overview)
120
+ - 🏆 [Leaderboards](https://hud.ai/leaderboards)
121
+ - 🌐 [Environment Templates](https://hud.ai/environments)
122
+ - 🤖 [Supported Models](https://hud.ai/models)
123
+ - 💬 [Discord](https://discord.gg/wkjtmHYYjm)
124
+
125
+ ## Enterprise
126
+
127
+ Building agents at scale? We work with teams on custom environments, benchmarks, and training.
128
+
129
+ [📅 Book a call](https://cal.com/jay-hud) · [📧 founders@hud.ai](mailto:founders@hud.ai)
130
+
131
+ ## Contributing
132
+
133
+ We welcome contributions! See [CONTRIBUTING.md](CONTRIBUTING.md).
134
+
135
+ Key areas: [Agents](hud/agents/) · [Tools](hud/tools/) · [Environments](https://hud.ai/environments)
136
+
137
+ <a href="https://github.com/hud-evals/hud-python/graphs/contributors">
138
+ <img src="https://contrib.rocks/image?repo=hud-evals/hud-python&max=50" />
139
+ </a>
140
+
141
+ ## Citation
142
+
143
+ ```bibtex
144
+ @software{hud2025agentevalplatform,
145
+ author = {HUD and Jay Ram and Lorenss Martinsons and Parth Patel and Govind Pimpale and Dylan Bowman and Jaideep and Nguyen Nhat Minh},
146
+ title = {HUD: An Evaluation and RL Envrionments Platform for Agents},
147
+ date = {2025-04},
148
+ url = {https://github.com/hud-evals/hud-python},
149
+ langid = {en}
150
+ }
151
+ ```
152
+
153
+ MIT License · [LICENSE](LICENSE)
@@ -0,0 +1,127 @@
1
+ # Examples
2
+
3
+ A collection of examples demonstrating HUD SDK usage patterns.
4
+
5
+ ## Quick Start
6
+
7
+ ### 00_agent_env.py
8
+ Minimal MCP server and client in one file. Shows the basic agent-environment communication pattern using `hud.eval()`.
9
+
10
+ ```bash
11
+ python examples/00_agent_env.py
12
+ ```
13
+
14
+ ### 01_agent_lifecycle.py
15
+ Complete agent lifecycle demonstrating:
16
+ - v5 Task format with Environment and scenario
17
+ - `hud.eval()` context for connection and tracing
18
+ - Agent initialization and execution
19
+ - Automatic scenario setup/evaluation
20
+
21
+ ```bash
22
+ python examples/01_agent_lifecycle.py
23
+ ```
24
+
25
+ > Requires `HUD_API_KEY` and `ANTHROPIC_API_KEY` environment variables.
26
+
27
+ ## Agent Examples
28
+
29
+ ### 02_claude_agent.py
30
+ Claude agent with computer use capabilities for browser automation.
31
+
32
+ ```bash
33
+ python examples/02_claude_agent.py
34
+ ```
35
+
36
+ > Requires `HUD_API_KEY` and `ANTHROPIC_API_KEY`.
37
+
38
+ ### 03_openai_compatible_agent.py
39
+ OpenAI-compatible chat.completions agent with both text and browser 2048 environments.
40
+
41
+ ```bash
42
+ export OPENAI_API_KEY=your-key
43
+ # export OPENAI_BASE_URL=http://localhost:8000/v1 # for local servers (e.g., vllm)
44
+
45
+ python examples/03_openai_compatible_agent.py --mode text # text environment
46
+ python examples/03_openai_compatible_agent.py --mode browser # browser environment
47
+ ```
48
+
49
+ > Requires Docker for local environment execution.
50
+
51
+ ### 04_grounded_agent.py
52
+ Grounded agent that separates visual grounding (element detection) from high-level reasoning.
53
+
54
+ ```bash
55
+ export OPENAI_API_KEY=your-key
56
+ export OPENROUTER_API_KEY=your-key
57
+
58
+ python examples/04_grounded_agent.py
59
+ ```
60
+
61
+ > Requires Docker and API keys for both OpenAI and OpenRouter.
62
+
63
+ ### 05_custom_agent.py
64
+ Build a custom MCPAgent using HUD Gateway for unified model access:
65
+ - No need for individual provider API keys
66
+ - Works with Anthropic, OpenAI, Gemini, OpenRouter models
67
+ - Automatic tracing with `@hud.instrument`
68
+
69
+ ```bash
70
+ HUD_API_KEY=sk-hud-... python examples/05_custom_agent.py
71
+ ```
72
+
73
+ ## Dataset Evaluation
74
+
75
+ ### run_evaluation.py
76
+ Generic dataset evaluation runner using the programmatic API.
77
+
78
+ ```bash
79
+ # Run all tasks in a dataset
80
+ python examples/run_evaluation.py hud-evals/SheetBench-50
81
+
82
+ # Run specific tasks by index
83
+ python examples/run_evaluation.py hud-evals/SheetBench-50 --task-ids 0 1 2
84
+
85
+ # Use different agent and concurrency
86
+ python examples/run_evaluation.py hud-evals/OSWorld-Verified-Gold --agent operator --max-concurrent 50
87
+ ```
88
+
89
+ For production evaluations, prefer the CLI: `hud eval --help`
90
+
91
+ ## Key Concepts
92
+
93
+ ### v5 Task Format
94
+
95
+ The v5 Task format is the recommended way to define evaluation tasks:
96
+
97
+ ```python
98
+ from hud.eval.task import Task
99
+
100
+ # Simple task with hub environment
101
+ task = Task(
102
+ env={"name": "browser"}, # Connect to browser hub
103
+ scenario="checkout", # Scenario to run
104
+ args={"user_id": "alice"}, # Scenario arguments
105
+ )
106
+
107
+ # Task with local Docker environment
108
+ env = hud.Environment("my-env")
109
+ env.connect_local(command="docker", args=["run", "--rm", "-i", "my-image"])
110
+ task = Task(env=env, scenario="test")
111
+ ```
112
+
113
+ ### Using hud.eval()
114
+
115
+ All examples use `hud.eval()` as the primary entry point:
116
+
117
+ ```python
118
+ async with hud.eval(task, name="my-eval", variants={"model": "gpt-4o"}) as ctx:
119
+ result = await agent.run(ctx, max_steps=10)
120
+ print(f"Reward: {ctx.reward}")
121
+ ```
122
+
123
+ The context manager handles:
124
+ - Environment connection (MCP servers start)
125
+ - Scenario setup execution
126
+ - Telemetry and tracing
127
+ - Automatic scenario evaluation on exit
@@ -0,0 +1,50 @@
1
+ """hud-python.
2
+
3
+ tools for building, evaluating, and training AI agents.
4
+ """
5
+
6
+ from __future__ import annotations
7
+
8
+ import warnings
9
+
10
+ # Apply patches to third-party libraries early, before other imports
11
+ from . import patches as _patches # noqa: F401
12
+ from .environment import Environment
13
+ from .eval import EvalContext
14
+ from .eval import run_eval as eval
15
+ from .telemetry.instrument import instrument
16
+
17
+
18
+ def trace(*args: object, **kwargs: object) -> EvalContext:
19
+ """Deprecated: Use hud.eval() instead.
20
+
21
+ .. deprecated:: 0.5.0
22
+ hud.trace() is deprecated. Use hud.eval() or env.eval() instead.
23
+ """
24
+ warnings.warn(
25
+ "hud.trace() is deprecated. Use hud.eval() or env.eval() instead.",
26
+ DeprecationWarning,
27
+ stacklevel=2,
28
+ )
29
+ return eval(*args, **kwargs) # type: ignore[arg-type]
30
+
31
+
32
+ __all__ = [
33
+ "Environment",
34
+ "EvalContext",
35
+ "eval",
36
+ "instrument",
37
+ "trace", # Deprecated alias for eval
38
+ ]
39
+
40
+ try:
41
+ from .version import __version__
42
+ except ImportError:
43
+ __version__ = "unknown"
44
+
45
+ try:
46
+ from .utils.pretty_errors import install_pretty_errors
47
+
48
+ install_pretty_errors()
49
+ except Exception: # noqa: S110
50
+ pass
@@ -0,0 +1,19 @@
1
+ from __future__ import annotations
2
+
3
+ from .base import MCPAgent
4
+ from .openai import OpenAIAgent
5
+ from .openai_chat import OpenAIChatAgent
6
+ from .operator import OperatorAgent
7
+
8
+ # Note: These agents are not exported here to avoid requiring optional dependencies.
9
+ # Import directly if needed:
10
+ # from hud.agents.claude import ClaudeAgent # requires anthropic
11
+ # from hud.agents.gemini import GeminiAgent # requires google-genai
12
+ # from hud.agents.gemini_cua import GeminiCUAAgent # requires google-genai
13
+
14
+ __all__ = [
15
+ "MCPAgent",
16
+ "OpenAIAgent",
17
+ "OpenAIChatAgent",
18
+ "OperatorAgent",
19
+ ]