hud-python 0.5.41__tar.gz → 0.6.1__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (545) hide show
  1. {hud_python-0.5.41 → hud_python-0.6.1}/.gitignore +14 -2
  2. hud_python-0.6.1/PKG-INFO +278 -0
  3. hud_python-0.6.1/README.md +200 -0
  4. hud_python-0.6.1/cookbooks/a2a-chat/README.md +37 -0
  5. hud_python-0.6.1/cookbooks/a2a-chat/pyproject.toml +18 -0
  6. hud_python-0.6.1/cookbooks/codex-coding/README.md +23 -0
  7. hud_python-0.6.1/cookbooks/codex-coding/pyproject.toml +17 -0
  8. hud_python-0.6.1/cookbooks/rl-training/README.md +120 -0
  9. hud_python-0.6.1/cookbooks/rl-training/pyproject.toml +20 -0
  10. hud_python-0.6.1/hud/__init__.py +64 -0
  11. hud_python-0.6.1/hud/_legacy.py +300 -0
  12. hud_python-0.6.1/hud/agents/__init__.py +133 -0
  13. hud_python-0.6.1/hud/agents/base.py +22 -0
  14. hud_python-0.6.1/hud/agents/browser_use/__init__.py +5 -0
  15. hud_python-0.6.1/hud/agents/browser_use/agent.py +110 -0
  16. hud_python-0.6.1/hud/agents/claude/__init__.py +22 -0
  17. hud_python-0.6.1/hud/agents/claude/agent.py +369 -0
  18. hud_python-0.6.1/hud/agents/claude/sdk/__init__.py +5 -0
  19. hud_python-0.6.1/hud/agents/claude/sdk/agent.py +335 -0
  20. hud_python-0.6.1/hud/agents/claude/sdk/computer_mcp.py +136 -0
  21. hud_python-0.6.1/hud/agents/claude/tools/__init__.py +28 -0
  22. hud_python-0.6.1/hud/agents/claude/tools/base.py +17 -0
  23. hud_python-0.6.1/hud/agents/claude/tools/coding.py +141 -0
  24. hud_python-0.6.1/hud/agents/claude/tools/computer.py +362 -0
  25. hud_python-0.6.1/hud/agents/claude/tools/hosted.py +100 -0
  26. hud_python-0.6.1/hud/agents/claude/tools/mcp_proxy.py +43 -0
  27. hud_python-0.6.1/hud/agents/claude/tools/settings.py +36 -0
  28. hud_python-0.6.1/hud/agents/claude/tools/tests/test_computer.py +149 -0
  29. hud_python-0.6.1/hud/agents/gemini/__init__.py +6 -0
  30. hud_python-0.6.1/hud/agents/gemini/agent.py +297 -0
  31. hud_python-0.6.1/hud/agents/gemini/settings.py +21 -0
  32. hud_python-0.6.1/hud/agents/gemini/tools/__init__.py +33 -0
  33. hud_python-0.6.1/hud/agents/gemini/tools/base.py +9 -0
  34. hud_python-0.6.1/hud/agents/gemini/tools/coding.py +143 -0
  35. hud_python-0.6.1/hud/agents/gemini/tools/computer.py +200 -0
  36. hud_python-0.6.1/hud/agents/gemini/tools/filesystem.py +152 -0
  37. hud_python-0.6.1/hud/agents/gemini/tools/hosted.py +42 -0
  38. hud_python-0.6.1/hud/agents/gemini/tools/mcp_proxy.py +34 -0
  39. hud_python-0.6.1/hud/agents/gemini/tools/tests/test_computer.py +105 -0
  40. hud_python-0.6.1/hud/agents/misc/__init__.py +7 -0
  41. hud_python-0.6.1/hud/agents/misc/response_automation.py +103 -0
  42. hud_python-0.6.1/hud/agents/openai/__init__.py +5 -0
  43. hud_python-0.6.1/hud/agents/openai/agent.py +327 -0
  44. hud_python-0.6.1/hud/agents/openai/tools/__init__.py +21 -0
  45. hud_python-0.6.1/hud/agents/openai/tools/apply_patch.py +328 -0
  46. hud_python-0.6.1/hud/agents/openai/tools/base.py +87 -0
  47. hud_python-0.6.1/hud/agents/openai/tools/coding.py +111 -0
  48. hud_python-0.6.1/hud/agents/openai/tools/computer.py +226 -0
  49. hud_python-0.6.1/hud/agents/openai/tools/hosted.py +35 -0
  50. hud_python-0.6.1/hud/agents/openai/tools/mcp_proxy.py +53 -0
  51. {hud_python-0.5.41/hud/utils → hud_python-0.6.1/hud/agents/openai/tools}/strict_schema.py +5 -5
  52. hud_python-0.6.1/hud/agents/openai/tools/tests/test_computer.py +110 -0
  53. hud_python-0.6.1/hud/agents/openai/tools/tests/test_strict_schema.py +74 -0
  54. hud_python-0.6.1/hud/agents/openai_compatible/__init__.py +5 -0
  55. hud_python-0.6.1/hud/agents/openai_compatible/agent.py +238 -0
  56. hud_python-0.6.1/hud/agents/openai_compatible/tools/__init__.py +14 -0
  57. hud_python-0.6.1/hud/agents/openai_compatible/tools/base.py +170 -0
  58. hud_python-0.6.1/hud/agents/openai_compatible/tools/filesystem.py +138 -0
  59. hud_python-0.6.1/hud/agents/openai_compatible/tools/mcp_proxy.py +30 -0
  60. hud_python-0.6.1/hud/agents/robot/__init__.py +35 -0
  61. hud_python-0.6.1/hud/agents/robot/_types.py +12 -0
  62. hud_python-0.6.1/hud/agents/robot/adapter.py +95 -0
  63. hud_python-0.6.1/hud/agents/robot/agent.py +157 -0
  64. hud_python-0.6.1/hud/agents/robot/model.py +138 -0
  65. hud_python-0.6.1/hud/agents/tests/test_apply_patch.py +78 -0
  66. hud_python-0.6.1/hud/agents/tests/test_base.py +125 -0
  67. hud_python-0.6.1/hud/agents/tests/test_claude_agent.py +145 -0
  68. hud_python-0.6.1/hud/agents/tests/test_claude_sdk_agent.py +148 -0
  69. hud_python-0.6.1/hud/agents/tests/test_gemini_agent.py +148 -0
  70. hud_python-0.6.1/hud/agents/tests/test_openai_agent.py +126 -0
  71. hud_python-0.6.1/hud/agents/tests/test_openai_compatible_agent.py +83 -0
  72. hud_python-0.6.1/hud/agents/tests/test_provider_native_tools.py +248 -0
  73. hud_python-0.6.1/hud/agents/tests/test_tool_agent.py +144 -0
  74. hud_python-0.6.1/hud/agents/tests/test_trace.py +134 -0
  75. hud_python-0.6.1/hud/agents/tool_agent.py +307 -0
  76. hud_python-0.6.1/hud/agents/tools/__init__.py +31 -0
  77. hud_python-0.6.1/hud/agents/tools/base.py +93 -0
  78. hud_python-0.6.1/hud/agents/tools/hosted.py +31 -0
  79. hud_python-0.6.1/hud/agents/tools/mcp.py +45 -0
  80. hud_python-0.6.1/hud/agents/tools/rfb.py +196 -0
  81. hud_python-0.6.1/hud/agents/tools/ssh.py +66 -0
  82. hud_python-0.6.1/hud/agents/types.py +459 -0
  83. hud_python-0.6.1/hud/capabilities/__init__.py +37 -0
  84. hud_python-0.6.1/hud/capabilities/base.py +222 -0
  85. hud_python-0.6.1/hud/capabilities/cdp.py +148 -0
  86. hud_python-0.6.1/hud/capabilities/filetracking.py +85 -0
  87. hud_python-0.6.1/hud/capabilities/mcp.py +76 -0
  88. hud_python-0.6.1/hud/capabilities/rfb.py +137 -0
  89. hud_python-0.6.1/hud/capabilities/robot.py +148 -0
  90. hud_python-0.6.1/hud/capabilities/ssh.py +53 -0
  91. {hud_python-0.5.41 → hud_python-0.6.1}/hud/cli/__init__.py +40 -48
  92. {hud_python-0.5.41 → hud_python-0.6.1}/hud/cli/cancel.py +15 -26
  93. hud_python-0.6.1/hud/cli/client.py +82 -0
  94. hud_python-0.6.1/hud/cli/deploy.py +798 -0
  95. hud_python-0.6.1/hud/cli/eval.py +939 -0
  96. hud_python-0.6.1/hud/cli/init.py +147 -0
  97. {hud_python-0.5.41 → hud_python-0.6.1}/hud/cli/login.py +59 -114
  98. hud_python-0.6.1/hud/cli/models.py +252 -0
  99. hud_python-0.6.1/hud/cli/presets.py +139 -0
  100. hud_python-0.6.1/hud/cli/serve.py +111 -0
  101. hud_python-0.6.1/hud/cli/sync.py +499 -0
  102. hud_python-0.6.1/hud/cli/task.py +210 -0
  103. hud_python-0.6.1/hud/cli/templates.py +142 -0
  104. hud_python-0.6.1/hud/cli/tests/test_cli_init.py +98 -0
  105. {hud_python-0.5.41 → hud_python-0.6.1}/hud/cli/tests/test_deploy.py +174 -64
  106. hud_python-0.6.1/hud/cli/tests/test_eval_config.py +239 -0
  107. hud_python-0.6.1/hud/cli/tests/test_init.py +51 -0
  108. hud_python-0.6.1/hud/cli/tests/test_sync_export.py +27 -0
  109. {hud_python-0.5.41 → hud_python-0.6.1}/hud/cli/utils/api.py +2 -18
  110. {hud_python-0.5.41 → hud_python-0.6.1}/hud/cli/utils/build_display.py +53 -52
  111. {hud_python-0.5.41 → hud_python-0.6.1}/hud/cli/utils/build_logs.py +18 -31
  112. {hud_python-0.5.41 → hud_python-0.6.1}/hud/cli/utils/config.py +13 -0
  113. hud_python-0.6.1/hud/cli/utils/display.py +100 -0
  114. hud_python-0.6.1/hud/cli/utils/jobs.py +38 -0
  115. hud_python-0.6.1/hud/cli/utils/registry.py +100 -0
  116. hud_python-0.6.1/hud/cli/utils/source.py +567 -0
  117. hud_python-0.6.1/hud/cli/utils/tests/test_build_display.py +49 -0
  118. hud_python-0.6.1/hud/cli/utils/tests/test_context.py +74 -0
  119. hud_python-0.6.1/hud/cli/utils/tests/test_registry.py +76 -0
  120. hud_python-0.6.1/hud/cli/utils/tests/test_source.py +304 -0
  121. hud_python-0.6.1/hud/cli/utils/tests/test_version_check.py +121 -0
  122. {hud_python-0.5.41 → hud_python-0.6.1}/hud/cli/utils/version_check.py +1 -1
  123. hud_python-0.6.1/hud/clients/__init__.py +13 -0
  124. hud_python-0.6.1/hud/clients/client.py +396 -0
  125. hud_python-0.6.1/hud/clients/tests/__init__.py +1 -0
  126. hud_python-0.6.1/hud/clients/tests/test_connect.py +111 -0
  127. hud_python-0.6.1/hud/conftest.py +30 -0
  128. hud_python-0.6.1/hud/environment/__init__.py +58 -0
  129. hud_python-0.6.1/hud/environment/env.py +336 -0
  130. hud_python-0.6.1/hud/environment/file_tracker.py +582 -0
  131. hud_python-0.6.1/hud/environment/file_tracking.py +75 -0
  132. hud_python-0.6.1/hud/environment/legacy.py +364 -0
  133. hud_python-0.6.1/hud/environment/robot/__init__.py +29 -0
  134. hud_python-0.6.1/hud/environment/robot/bridge.py +176 -0
  135. hud_python-0.6.1/hud/environment/robot/endpoint.py +210 -0
  136. hud_python-0.6.1/hud/environment/robot/sim_runner.py +111 -0
  137. hud_python-0.6.1/hud/environment/server.py +438 -0
  138. hud_python-0.6.1/hud/environment/tests/conftest.py +28 -0
  139. hud_python-0.6.1/hud/environment/tests/test_capability_backing.py +141 -0
  140. hud_python-0.6.1/hud/environment/tests/test_file_tracker.py +186 -0
  141. hud_python-0.6.1/hud/environment/tests/test_file_tracking.py +47 -0
  142. hud_python-0.6.1/hud/environment/tests/test_legacy.py +273 -0
  143. hud_python-0.6.1/hud/environment/tests/test_loader.py +31 -0
  144. hud_python-0.6.1/hud/environment/tests/test_manifest.py +88 -0
  145. hud_python-0.6.1/hud/environment/tests/test_server.py +81 -0
  146. hud_python-0.6.1/hud/environment/tests/test_tunnel.py +126 -0
  147. hud_python-0.6.1/hud/environment/utils.py +84 -0
  148. hud_python-0.6.1/hud/environment/workspace.py +585 -0
  149. hud_python-0.6.1/hud/eval/__init__.py +76 -0
  150. hud_python-0.6.1/hud/eval/chat.py +157 -0
  151. hud_python-0.6.1/hud/eval/file_tracking.py +112 -0
  152. hud_python-0.6.1/hud/eval/job.py +135 -0
  153. hud_python-0.6.1/hud/eval/run.py +393 -0
  154. hud_python-0.6.1/hud/eval/runtime.py +995 -0
  155. hud_python-0.6.1/hud/eval/sync.py +195 -0
  156. hud_python-0.6.1/hud/eval/task.py +109 -0
  157. hud_python-0.6.1/hud/eval/taskset.py +295 -0
  158. hud_python-0.6.1/hud/eval/tests/test_chat.py +133 -0
  159. hud_python-0.6.1/hud/eval/tests/test_docker_provider.py +742 -0
  160. hud_python-0.6.1/hud/eval/tests/test_file_tracking_observer.py +128 -0
  161. hud_python-0.6.1/hud/eval/tests/test_hosted.py +438 -0
  162. hud_python-0.6.1/hud/eval/tests/test_job.py +63 -0
  163. hud_python-0.6.1/hud/eval/tests/test_rollout.py +302 -0
  164. hud_python-0.6.1/hud/eval/tests/test_sync.py +150 -0
  165. hud_python-0.6.1/hud/eval/tests/test_task.py +271 -0
  166. hud_python-0.6.1/hud/graders/__init__.py +58 -0
  167. hud_python-0.6.1/hud/graders/base.py +49 -0
  168. hud_python-0.6.1/hud/graders/bash.py +79 -0
  169. hud_python-0.6.1/hud/graders/combine.py +172 -0
  170. hud_python-0.6.1/hud/graders/judge.py +176 -0
  171. hud_python-0.6.1/hud/graders/results.py +84 -0
  172. hud_python-0.6.1/hud/graders/text.py +164 -0
  173. hud_python-0.6.1/hud/patches/__init__.py +18 -0
  174. hud_python-0.6.1/hud/patches/tests/__init__.py +3 -0
  175. hud_python-0.6.1/hud/patches/tests/test_warnings.py +108 -0
  176. hud_python-0.6.1/hud/patches/warnings.py +38 -0
  177. hud_python-0.6.1/hud/server.py +32 -0
  178. {hud_python-0.5.41 → hud_python-0.6.1}/hud/settings.py +43 -23
  179. hud_python-0.6.1/hud/telemetry/__init__.py +25 -0
  180. hud_python-0.6.1/hud/telemetry/context.py +47 -0
  181. hud_python-0.6.1/hud/telemetry/exporter.py +236 -0
  182. hud_python-0.6.1/hud/telemetry/filetracking.py +76 -0
  183. hud_python-0.6.1/hud/telemetry/instrument.py +255 -0
  184. hud_python-0.6.1/hud/telemetry/span.py +93 -0
  185. hud_python-0.6.1/hud/telemetry/tests/test_exporter.py +132 -0
  186. hud_python-0.6.1/hud/telemetry/tests/test_filetracking.py +60 -0
  187. {hud_python-0.5.41 → hud_python-0.6.1}/hud/telemetry/tests/test_instrument.py +69 -43
  188. hud_python-0.6.1/hud/train/__init__.py +47 -0
  189. hud_python-0.6.1/hud/train/base.py +102 -0
  190. hud_python-0.6.1/hud/train/client.py +213 -0
  191. hud_python-0.6.1/hud/train/types.py +182 -0
  192. hud_python-0.6.1/hud/types.py +412 -0
  193. hud_python-0.6.1/hud/utils/__init__.py +13 -0
  194. hud_python-0.6.1/hud/utils/exceptions.py +229 -0
  195. hud_python-0.6.1/hud/utils/gateway.py +89 -0
  196. {hud_python-0.5.41/hud/shared → hud_python-0.6.1/hud/utils}/hints.py +3 -20
  197. {hud_python-0.5.41 → hud_python-0.6.1}/hud/utils/hud_console.py +16 -328
  198. hud_python-0.6.1/hud/utils/modules.py +79 -0
  199. hud_python-0.6.1/hud/utils/platform.py +62 -0
  200. {hud_python-0.5.41/hud/shared → hud_python-0.6.1/hud/utils}/requests.py +2 -2
  201. {hud_python-0.5.41 → hud_python-0.6.1}/hud/utils/serialization.py +7 -1
  202. hud_python-0.6.1/hud/utils/tests/test_exceptions.py +102 -0
  203. {hud_python-0.5.41/hud/shared → hud_python-0.6.1/hud/utils}/tests/test_hints.py +1 -1
  204. hud_python-0.6.1/hud/utils/tests/test_hud_console.py +62 -0
  205. hud_python-0.6.1/hud/utils/tests/test_platform.py +55 -0
  206. {hud_python-0.5.41/hud/shared → hud_python-0.6.1/hud/utils}/tests/test_requests.py +5 -5
  207. hud_python-0.6.1/hud/utils/time.py +13 -0
  208. {hud_python-0.5.41 → hud_python-0.6.1}/hud/version.py +1 -1
  209. {hud_python-0.5.41 → hud_python-0.6.1}/pyproject.toml +53 -40
  210. hud_python-0.5.41/PKG-INFO +0 -265
  211. hud_python-0.5.41/README.md +0 -170
  212. hud_python-0.5.41/examples/README.md +0 -62
  213. hud_python-0.5.41/hud/__init__.py +0 -52
  214. hud_python-0.5.41/hud/agents/__init__.py +0 -79
  215. hud_python-0.5.41/hud/agents/base.py +0 -971
  216. hud_python-0.5.41/hud/agents/claude.py +0 -753
  217. hud_python-0.5.41/hud/agents/gateway.py +0 -42
  218. hud_python-0.5.41/hud/agents/gemini.py +0 -593
  219. hud_python-0.5.41/hud/agents/gemini_cua.py +0 -43
  220. hud_python-0.5.41/hud/agents/grounded_openai.py +0 -280
  221. hud_python-0.5.41/hud/agents/misc/__init__.py +0 -8
  222. hud_python-0.5.41/hud/agents/misc/integration_test_agent.py +0 -92
  223. hud_python-0.5.41/hud/agents/misc/response_agent.py +0 -123
  224. hud_python-0.5.41/hud/agents/openai.py +0 -601
  225. hud_python-0.5.41/hud/agents/openai_chat.py +0 -391
  226. hud_python-0.5.41/hud/agents/operator.py +0 -144
  227. hud_python-0.5.41/hud/agents/resolver.py +0 -64
  228. hud_python-0.5.41/hud/agents/tests/conftest.py +0 -133
  229. hud_python-0.5.41/hud/agents/tests/test_base.py +0 -552
  230. hud_python-0.5.41/hud/agents/tests/test_base_runtime.py +0 -238
  231. hud_python-0.5.41/hud/agents/tests/test_claude.py +0 -1159
  232. hud_python-0.5.41/hud/agents/tests/test_gemini.py +0 -849
  233. hud_python-0.5.41/hud/agents/tests/test_grounded_openai_agent.py +0 -170
  234. hud_python-0.5.41/hud/agents/tests/test_integration_test_agent.py +0 -42
  235. hud_python-0.5.41/hud/agents/tests/test_openai.py +0 -610
  236. hud_python-0.5.41/hud/agents/tests/test_operator.py +0 -429
  237. hud_python-0.5.41/hud/agents/tests/test_resolver.py +0 -284
  238. hud_python-0.5.41/hud/agents/tests/test_run_eval.py +0 -271
  239. hud_python-0.5.41/hud/agents/types.py +0 -158
  240. hud_python-0.5.41/hud/cli/analyze.py +0 -518
  241. hud_python-0.5.41/hud/cli/build.py +0 -1047
  242. hud_python-0.5.41/hud/cli/convert/__init__.py +0 -317
  243. hud_python-0.5.41/hud/cli/convert/base.py +0 -78
  244. hud_python-0.5.41/hud/cli/convert/harbor.py +0 -565
  245. hud_python-0.5.41/hud/cli/convert/tests/conftest.py +0 -258
  246. hud_python-0.5.41/hud/cli/convert/tests/test_harbor.py +0 -751
  247. hud_python-0.5.41/hud/cli/debug.py +0 -537
  248. hud_python-0.5.41/hud/cli/deploy.py +0 -811
  249. hud_python-0.5.41/hud/cli/dev.py +0 -1156
  250. hud_python-0.5.41/hud/cli/eval.py +0 -944
  251. hud_python-0.5.41/hud/cli/flows/dev.py +0 -176
  252. hud_python-0.5.41/hud/cli/flows/init.py +0 -224
  253. hud_python-0.5.41/hud/cli/flows/tasks.py +0 -476
  254. hud_python-0.5.41/hud/cli/flows/templates.py +0 -151
  255. hud_python-0.5.41/hud/cli/flows/tests/__init__.py +0 -1
  256. hud_python-0.5.41/hud/cli/flows/tests/test_dev.py +0 -126
  257. hud_python-0.5.41/hud/cli/init.py +0 -388
  258. hud_python-0.5.41/hud/cli/link.py +0 -38
  259. hud_python-0.5.41/hud/cli/models.py +0 -82
  260. hud_python-0.5.41/hud/cli/push.py +0 -485
  261. hud_python-0.5.41/hud/cli/rl.py +0 -372
  262. hud_python-0.5.41/hud/cli/scenario.py +0 -187
  263. hud_python-0.5.41/hud/cli/sync.py +0 -966
  264. hud_python-0.5.41/hud/cli/tests/test_analysis_utils.py +0 -38
  265. hud_python-0.5.41/hud/cli/tests/test_analyze.py +0 -299
  266. hud_python-0.5.41/hud/cli/tests/test_analyze_metadata.py +0 -178
  267. hud_python-0.5.41/hud/cli/tests/test_analyze_module.py +0 -167
  268. hud_python-0.5.41/hud/cli/tests/test_build.py +0 -816
  269. hud_python-0.5.41/hud/cli/tests/test_build_failure.py +0 -41
  270. hud_python-0.5.41/hud/cli/tests/test_build_module.py +0 -50
  271. hud_python-0.5.41/hud/cli/tests/test_cli_init.py +0 -192
  272. hud_python-0.5.41/hud/cli/tests/test_cli_root.py +0 -83
  273. hud_python-0.5.41/hud/cli/tests/test_convert.py +0 -361
  274. hud_python-0.5.41/hud/cli/tests/test_debug.py +0 -463
  275. hud_python-0.5.41/hud/cli/tests/test_debug_directory_mode.py +0 -32
  276. hud_python-0.5.41/hud/cli/tests/test_dev.py +0 -326
  277. hud_python-0.5.41/hud/cli/tests/test_eval.py +0 -251
  278. hud_python-0.5.41/hud/cli/tests/test_init.py +0 -124
  279. hud_python-0.5.41/hud/cli/tests/test_lockfile_utils.py +0 -72
  280. hud_python-0.5.41/hud/cli/tests/test_mcp_server.py +0 -83
  281. hud_python-0.5.41/hud/cli/tests/test_push.py +0 -369
  282. hud_python-0.5.41/hud/cli/tests/test_push_happy.py +0 -74
  283. hud_python-0.5.41/hud/cli/tests/test_push_wrapper.py +0 -23
  284. hud_python-0.5.41/hud/cli/tests/test_rl.py +0 -154
  285. hud_python-0.5.41/hud/cli/tests/test_scenario.py +0 -283
  286. hud_python-0.5.41/hud/cli/tests/test_sync.py +0 -1432
  287. hud_python-0.5.41/hud/cli/tests/test_utils.py +0 -388
  288. hud_python-0.5.41/hud/cli/utils/analysis.py +0 -265
  289. hud_python-0.5.41/hud/cli/utils/args.py +0 -80
  290. hud_python-0.5.41/hud/cli/utils/collect.py +0 -292
  291. hud_python-0.5.41/hud/cli/utils/docker.py +0 -422
  292. hud_python-0.5.41/hud/cli/utils/env_check.py +0 -194
  293. hud_python-0.5.41/hud/cli/utils/environment.py +0 -214
  294. hud_python-0.5.41/hud/cli/utils/git.py +0 -136
  295. hud_python-0.5.41/hud/cli/utils/interactive.py +0 -444
  296. hud_python-0.5.41/hud/cli/utils/lockfile.py +0 -169
  297. hud_python-0.5.41/hud/cli/utils/logging.py +0 -263
  298. hud_python-0.5.41/hud/cli/utils/metadata.py +0 -233
  299. hud_python-0.5.41/hud/cli/utils/name_check.py +0 -140
  300. hud_python-0.5.41/hud/cli/utils/project_config.py +0 -106
  301. hud_python-0.5.41/hud/cli/utils/server.py +0 -250
  302. hud_python-0.5.41/hud/cli/utils/source_hash.py +0 -108
  303. hud_python-0.5.41/hud/cli/utils/taskset.py +0 -83
  304. hud_python-0.5.41/hud/cli/utils/tests/test_collect.py +0 -283
  305. hud_python-0.5.41/hud/cli/utils/tests/test_docker.py +0 -93
  306. hud_python-0.5.41/hud/cli/utils/tests/test_docker_hints.py +0 -71
  307. hud_python-0.5.41/hud/cli/utils/tests/test_env_check.py +0 -74
  308. hud_python-0.5.41/hud/cli/utils/tests/test_environment.py +0 -81
  309. hud_python-0.5.41/hud/cli/utils/tests/test_git.py +0 -142
  310. hud_python-0.5.41/hud/cli/utils/tests/test_interactive_module.py +0 -62
  311. hud_python-0.5.41/hud/cli/utils/tests/test_logging_utils.py +0 -23
  312. hud_python-0.5.41/hud/cli/utils/tests/test_metadata.py +0 -31
  313. hud_python-0.5.41/hud/cli/utils/tests/test_source_hash.py +0 -36
  314. hud_python-0.5.41/hud/cli/utils/validation.py +0 -312
  315. hud_python-0.5.41/hud/cli/utils/viewer.py +0 -141
  316. hud_python-0.5.41/hud/datasets/__init__.py +0 -36
  317. hud_python-0.5.41/hud/datasets/loader.py +0 -283
  318. hud_python-0.5.41/hud/datasets/runner.py +0 -263
  319. hud_python-0.5.41/hud/datasets/tests/test_loader.py +0 -281
  320. hud_python-0.5.41/hud/datasets/tests/test_utils.py +0 -316
  321. hud_python-0.5.41/hud/datasets/utils.py +0 -305
  322. hud_python-0.5.41/hud/environment/__init__.py +0 -53
  323. hud_python-0.5.41/hud/environment/connection.py +0 -340
  324. hud_python-0.5.41/hud/environment/connectors/__init__.py +0 -33
  325. hud_python-0.5.41/hud/environment/connectors/base.py +0 -68
  326. hud_python-0.5.41/hud/environment/connectors/local.py +0 -177
  327. hud_python-0.5.41/hud/environment/connectors/mcp_config.py +0 -191
  328. hud_python-0.5.41/hud/environment/connectors/openai.py +0 -101
  329. hud_python-0.5.41/hud/environment/connectors/remote.py +0 -179
  330. hud_python-0.5.41/hud/environment/environment.py +0 -1153
  331. hud_python-0.5.41/hud/environment/integrations/__init__.py +0 -45
  332. hud_python-0.5.41/hud/environment/integrations/adk.py +0 -67
  333. hud_python-0.5.41/hud/environment/integrations/anthropic.py +0 -196
  334. hud_python-0.5.41/hud/environment/integrations/gemini.py +0 -92
  335. hud_python-0.5.41/hud/environment/integrations/langchain.py +0 -82
  336. hud_python-0.5.41/hud/environment/integrations/llamaindex.py +0 -68
  337. hud_python-0.5.41/hud/environment/integrations/openai.py +0 -219
  338. hud_python-0.5.41/hud/environment/mock.py +0 -306
  339. hud_python-0.5.41/hud/environment/router.py +0 -263
  340. hud_python-0.5.41/hud/environment/scenarios.py +0 -1168
  341. hud_python-0.5.41/hud/environment/tests/__init__.py +0 -1
  342. hud_python-0.5.41/hud/environment/tests/test_connection.py +0 -377
  343. hud_python-0.5.41/hud/environment/tests/test_connectors.py +0 -325
  344. hud_python-0.5.41/hud/environment/tests/test_environment.py +0 -993
  345. hud_python-0.5.41/hud/environment/tests/test_integrations.py +0 -257
  346. hud_python-0.5.41/hud/environment/tests/test_local_connectors.py +0 -242
  347. hud_python-0.5.41/hud/environment/tests/test_scenarios.py +0 -2051
  348. hud_python-0.5.41/hud/environment/tests/test_session_id.py +0 -159
  349. hud_python-0.5.41/hud/environment/tests/test_tools.py +0 -278
  350. hud_python-0.5.41/hud/environment/types.py +0 -23
  351. hud_python-0.5.41/hud/environment/utils/__init__.py +0 -33
  352. hud_python-0.5.41/hud/environment/utils/formats.py +0 -214
  353. hud_python-0.5.41/hud/environment/utils/schema.py +0 -55
  354. hud_python-0.5.41/hud/environment/utils/tool_wrappers.py +0 -113
  355. hud_python-0.5.41/hud/eval/__init__.py +0 -67
  356. hud_python-0.5.41/hud/eval/context.py +0 -821
  357. hud_python-0.5.41/hud/eval/display.py +0 -304
  358. hud_python-0.5.41/hud/eval/instrument.py +0 -187
  359. hud_python-0.5.41/hud/eval/manager.py +0 -459
  360. hud_python-0.5.41/hud/eval/parallel.py +0 -268
  361. hud_python-0.5.41/hud/eval/task.py +0 -468
  362. hud_python-0.5.41/hud/eval/tests/__init__.py +0 -1
  363. hud_python-0.5.41/hud/eval/tests/test_context.py +0 -434
  364. hud_python-0.5.41/hud/eval/tests/test_eval.py +0 -245
  365. hud_python-0.5.41/hud/eval/tests/test_manager.py +0 -238
  366. hud_python-0.5.41/hud/eval/tests/test_parallel.py +0 -168
  367. hud_python-0.5.41/hud/eval/tests/test_task.py +0 -347
  368. hud_python-0.5.41/hud/eval/types.py +0 -66
  369. hud_python-0.5.41/hud/eval/utils.py +0 -194
  370. hud_python-0.5.41/hud/native/__init__.py +0 -36
  371. hud_python-0.5.41/hud/native/chat.py +0 -74
  372. hud_python-0.5.41/hud/native/graders.py +0 -581
  373. hud_python-0.5.41/hud/native/permissions.py +0 -170
  374. hud_python-0.5.41/hud/native/skills.py +0 -127
  375. hud_python-0.5.41/hud/native/tests/__init__.py +0 -1
  376. hud_python-0.5.41/hud/native/tests/test_graders.py +0 -233
  377. hud_python-0.5.41/hud/patches/__init__.py +0 -19
  378. hud_python-0.5.41/hud/patches/warnings.py +0 -54
  379. hud_python-0.5.41/hud/server/__init__.py +0 -6
  380. hud_python-0.5.41/hud/server/context.py +0 -114
  381. hud_python-0.5.41/hud/server/helper/__init__.py +0 -5
  382. hud_python-0.5.41/hud/server/low_level.py +0 -133
  383. hud_python-0.5.41/hud/server/router.py +0 -122
  384. hud_python-0.5.41/hud/server/server.py +0 -1011
  385. hud_python-0.5.41/hud/server/tests/__init__.py +0 -3
  386. hud_python-0.5.41/hud/server/tests/test_add_tool.py +0 -60
  387. hud_python-0.5.41/hud/server/tests/test_context.py +0 -128
  388. hud_python-0.5.41/hud/server/tests/test_mcp_server_handlers.py +0 -44
  389. hud_python-0.5.41/hud/server/tests/test_mcp_server_integration.py +0 -405
  390. hud_python-0.5.41/hud/server/tests/test_mcp_server_more.py +0 -249
  391. hud_python-0.5.41/hud/server/tests/test_prefix_naming.py +0 -100
  392. hud_python-0.5.41/hud/server/tests/test_run_wrapper.py +0 -53
  393. hud_python-0.5.41/hud/server/tests/test_server_extra.py +0 -169
  394. hud_python-0.5.41/hud/server/tests/test_sigterm_runner.py +0 -79
  395. hud_python-0.5.41/hud/services/__init__.py +0 -9
  396. hud_python-0.5.41/hud/services/chat.py +0 -366
  397. hud_python-0.5.41/hud/services/chat_service.py +0 -274
  398. hud_python-0.5.41/hud/services/reply_metadata.py +0 -50
  399. hud_python-0.5.41/hud/services/tests/test_chat.py +0 -265
  400. hud_python-0.5.41/hud/services/tests/test_chat_service.py +0 -152
  401. hud_python-0.5.41/hud/shared/__init__.py +0 -5
  402. hud_python-0.5.41/hud/shared/exceptions.py +0 -393
  403. hud_python-0.5.41/hud/shared/tests/test_exceptions.py +0 -427
  404. hud_python-0.5.41/hud/telemetry/__init__.py +0 -27
  405. hud_python-0.5.41/hud/telemetry/exporter.py +0 -196
  406. hud_python-0.5.41/hud/telemetry/instrument.py +0 -349
  407. hud_python-0.5.41/hud/telemetry/tests/test_eval_telemetry.py +0 -356
  408. hud_python-0.5.41/hud/telemetry/tests/test_exporter.py +0 -258
  409. hud_python-0.5.41/hud/tools/__init__.py +0 -146
  410. hud_python-0.5.41/hud/tools/agent.py +0 -223
  411. hud_python-0.5.41/hud/tools/base.py +0 -541
  412. hud_python-0.5.41/hud/tools/coding/__init__.py +0 -66
  413. hud_python-0.5.41/hud/tools/coding/apply_patch.py +0 -670
  414. hud_python-0.5.41/hud/tools/coding/bash.py +0 -231
  415. hud_python-0.5.41/hud/tools/coding/edit.py +0 -293
  416. hud_python-0.5.41/hud/tools/coding/gemini_edit.py +0 -340
  417. hud_python-0.5.41/hud/tools/coding/gemini_shell.py +0 -228
  418. hud_python-0.5.41/hud/tools/coding/gemini_write.py +0 -92
  419. hud_python-0.5.41/hud/tools/coding/session.py +0 -231
  420. hud_python-0.5.41/hud/tools/coding/shell.py +0 -179
  421. hud_python-0.5.41/hud/tools/coding/tests/__init__.py +0 -1
  422. hud_python-0.5.41/hud/tools/coding/tests/test_apply_patch.py +0 -718
  423. hud_python-0.5.41/hud/tools/coding/tests/test_bash.py +0 -268
  424. hud_python-0.5.41/hud/tools/coding/tests/test_bash_extended.py +0 -224
  425. hud_python-0.5.41/hud/tools/coding/tests/test_bash_integration.py +0 -80
  426. hud_python-0.5.41/hud/tools/coding/tests/test_edit.py +0 -244
  427. hud_python-0.5.41/hud/tools/coding/tests/test_gemini_tools.py +0 -295
  428. hud_python-0.5.41/hud/tools/coding/tests/test_shell.py +0 -724
  429. hud_python-0.5.41/hud/tools/coding/utils.py +0 -241
  430. hud_python-0.5.41/hud/tools/computer/__init__.py +0 -54
  431. hud_python-0.5.41/hud/tools/computer/anthropic.py +0 -721
  432. hud_python-0.5.41/hud/tools/computer/gemini.py +0 -425
  433. hud_python-0.5.41/hud/tools/computer/glm.py +0 -516
  434. hud_python-0.5.41/hud/tools/computer/hud.py +0 -491
  435. hud_python-0.5.41/hud/tools/computer/openai.py +0 -336
  436. hud_python-0.5.41/hud/tools/computer/qwen.py +0 -443
  437. hud_python-0.5.41/hud/tools/computer/settings.py +0 -139
  438. hud_python-0.5.41/hud/tools/computer/tests/__init__.py +0 -1
  439. hud_python-0.5.41/hud/tools/computer/tests/test_compression.py +0 -164
  440. hud_python-0.5.41/hud/tools/computer/tests/test_computer.py +0 -581
  441. hud_python-0.5.41/hud/tools/computer/tests/test_computer_actions.py +0 -56
  442. hud_python-0.5.41/hud/tools/computer/tests/test_glm_computer.py +0 -315
  443. hud_python-0.5.41/hud/tools/elicitation.py +0 -91
  444. hud_python-0.5.41/hud/tools/executors/__init__.py +0 -30
  445. hud_python-0.5.41/hud/tools/executors/base.py +0 -651
  446. hud_python-0.5.41/hud/tools/executors/pyautogui.py +0 -645
  447. hud_python-0.5.41/hud/tools/executors/tests/__init__.py +0 -1
  448. hud_python-0.5.41/hud/tools/executors/tests/test_base_executor.py +0 -365
  449. hud_python-0.5.41/hud/tools/executors/tests/test_pyautogui_executor.py +0 -172
  450. hud_python-0.5.41/hud/tools/executors/xdo.py +0 -558
  451. hud_python-0.5.41/hud/tools/filesystem/__init__.py +0 -84
  452. hud_python-0.5.41/hud/tools/filesystem/base.py +0 -719
  453. hud_python-0.5.41/hud/tools/filesystem/gemini.py +0 -556
  454. hud_python-0.5.41/hud/tools/filesystem/gemini_read_many.py +0 -207
  455. hud_python-0.5.41/hud/tools/filesystem/glob.py +0 -128
  456. hud_python-0.5.41/hud/tools/filesystem/grep.py +0 -135
  457. hud_python-0.5.41/hud/tools/filesystem/list.py +0 -170
  458. hud_python-0.5.41/hud/tools/filesystem/read.py +0 -143
  459. hud_python-0.5.41/hud/tools/filesystem/tests/__init__.py +0 -1
  460. hud_python-0.5.41/hud/tools/filesystem/tests/test_glob.py +0 -109
  461. hud_python-0.5.41/hud/tools/filesystem/tests/test_grep.py +0 -160
  462. hud_python-0.5.41/hud/tools/filesystem/tests/test_list.py +0 -115
  463. hud_python-0.5.41/hud/tools/filesystem/tests/test_read.py +0 -170
  464. hud_python-0.5.41/hud/tools/filesystem/tests/test_read_many.py +0 -121
  465. hud_python-0.5.41/hud/tools/grounding/__init__.py +0 -13
  466. hud_python-0.5.41/hud/tools/grounding/config.py +0 -54
  467. hud_python-0.5.41/hud/tools/grounding/grounded_tool.py +0 -309
  468. hud_python-0.5.41/hud/tools/grounding/grounder.py +0 -281
  469. hud_python-0.5.41/hud/tools/grounding/tests/__init__.py +0 -1
  470. hud_python-0.5.41/hud/tools/grounding/tests/test_grounded_tool.py +0 -178
  471. hud_python-0.5.41/hud/tools/hosted/__init__.py +0 -26
  472. hud_python-0.5.41/hud/tools/hosted/base.py +0 -47
  473. hud_python-0.5.41/hud/tools/hosted/code_execution.py +0 -90
  474. hud_python-0.5.41/hud/tools/hosted/google_search.py +0 -107
  475. hud_python-0.5.41/hud/tools/hosted/tool_search.py +0 -82
  476. hud_python-0.5.41/hud/tools/hosted/url_context.py +0 -32
  477. hud_python-0.5.41/hud/tools/hosted/web_fetch.py +0 -81
  478. hud_python-0.5.41/hud/tools/hosted/web_search.py +0 -73
  479. hud_python-0.5.41/hud/tools/jupyter.py +0 -330
  480. hud_python-0.5.41/hud/tools/memory/__init__.py +0 -50
  481. hud_python-0.5.41/hud/tools/memory/base.py +0 -222
  482. hud_python-0.5.41/hud/tools/memory/claude.py +0 -290
  483. hud_python-0.5.41/hud/tools/memory/gemini.py +0 -199
  484. hud_python-0.5.41/hud/tools/memory/session.py +0 -221
  485. hud_python-0.5.41/hud/tools/memory/tests/__init__.py +0 -1
  486. hud_python-0.5.41/hud/tools/memory/tests/test_claude.py +0 -329
  487. hud_python-0.5.41/hud/tools/memory/tests/test_gemini.py +0 -85
  488. hud_python-0.5.41/hud/tools/memory/tests/test_session.py +0 -249
  489. hud_python-0.5.41/hud/tools/native_types.py +0 -102
  490. hud_python-0.5.41/hud/tools/playwright.py +0 -427
  491. hud_python-0.5.41/hud/tools/response.py +0 -65
  492. hud_python-0.5.41/hud/tools/submit.py +0 -66
  493. hud_python-0.5.41/hud/tools/tests/__init__.py +0 -3
  494. hud_python-0.5.41/hud/tools/tests/test_agent_tool.py +0 -355
  495. hud_python-0.5.41/hud/tools/tests/test_base.py +0 -270
  496. hud_python-0.5.41/hud/tools/tests/test_elicitation.py +0 -118
  497. hud_python-0.5.41/hud/tools/tests/test_init.py +0 -28
  498. hud_python-0.5.41/hud/tools/tests/test_jupyter_tool.py +0 -181
  499. hud_python-0.5.41/hud/tools/tests/test_native_tool_e2e.py +0 -862
  500. hud_python-0.5.41/hud/tools/tests/test_native_types.py +0 -516
  501. hud_python-0.5.41/hud/tools/tests/test_playwright_tool.py +0 -183
  502. hud_python-0.5.41/hud/tools/tests/test_response.py +0 -60
  503. hud_python-0.5.41/hud/tools/tests/test_submit.py +0 -85
  504. hud_python-0.5.41/hud/tools/tests/test_tools.py +0 -148
  505. hud_python-0.5.41/hud/tools/tests/test_tools_init.py +0 -49
  506. hud_python-0.5.41/hud/tools/tests/test_types.py +0 -516
  507. hud_python-0.5.41/hud/tools/tests/test_utils.py +0 -156
  508. hud_python-0.5.41/hud/tools/types.py +0 -280
  509. hud_python-0.5.41/hud/tools/utils.py +0 -50
  510. hud_python-0.5.41/hud/types.py +0 -494
  511. hud_python-0.5.41/hud/utils/__init__.py +0 -10
  512. hud_python-0.5.41/hud/utils/env.py +0 -67
  513. hud_python-0.5.41/hud/utils/mcp.py +0 -15
  514. hud_python-0.5.41/hud/utils/pretty_errors.py +0 -68
  515. hud_python-0.5.41/hud/utils/tests/test_init.py +0 -10
  516. hud_python-0.5.41/hud/utils/tests/test_pretty_errors.py +0 -186
  517. hud_python-0.5.41/hud/utils/tests/test_tool_shorthand.py +0 -154
  518. hud_python-0.5.41/hud/utils/tests/test_version.py +0 -8
  519. hud_python-0.5.41/hud/utils/tool_shorthand.py +0 -62
  520. hud_python-0.5.41/hud/utils/types.py +0 -20
  521. {hud_python-0.5.41 → hud_python-0.6.1}/LICENSE +0 -0
  522. {hud_python-0.5.41 → hud_python-0.6.1}/hud/__main__.py +0 -0
  523. {hud_python-0.5.41/hud/cli/convert → hud_python-0.6.1/hud/agents/claude/tools}/tests/__init__.py +0 -0
  524. {hud_python-0.5.41/hud/cli/flows → hud_python-0.6.1/hud/agents/gemini/tools/tests}/__init__.py +0 -0
  525. {hud_python-0.5.41/hud/cli/utils → hud_python-0.6.1/hud/agents/openai/tools}/tests/__init__.py +0 -0
  526. {hud_python-0.5.41 → hud_python-0.6.1}/hud/agents/tests/__init__.py +0 -0
  527. {hud_python-0.5.41 → hud_python-0.6.1}/hud/cli/__main__.py +0 -0
  528. {hud_python-0.5.41 → hud_python-0.6.1}/hud/cli/tests/__init__.py +0 -0
  529. {hud_python-0.5.41 → hud_python-0.6.1}/hud/cli/tests/test_cli_main.py +0 -0
  530. {hud_python-0.5.41 → hud_python-0.6.1}/hud/cli/tests/test_cli_more_wrappers.py +0 -0
  531. {hud_python-0.5.41 → hud_python-0.6.1}/hud/cli/tests/test_eval_bedrock.py +0 -0
  532. {hud_python-0.5.41 → hud_python-0.6.1}/hud/cli/tests/test_main_module.py +0 -0
  533. {hud_python-0.5.41 → hud_python-0.6.1}/hud/cli/utils/__init__.py +0 -0
  534. {hud_python-0.5.41 → hud_python-0.6.1}/hud/cli/utils/context.py +0 -0
  535. {hud_python-0.5.41 → hud_python-0.6.1}/hud/cli/utils/tasks.py +0 -0
  536. {hud_python-0.5.41/hud/datasets → hud_python-0.6.1/hud/cli/utils}/tests/__init__.py +0 -0
  537. {hud_python-0.5.41 → hud_python-0.6.1}/hud/cli/utils/tests/test_config.py +0 -0
  538. {hud_python-0.5.41 → hud_python-0.6.1}/hud/cli/utils/tests/test_tasks.py +0 -0
  539. {hud_python-0.5.41/hud/services → hud_python-0.6.1/hud/environment}/tests/__init__.py +0 -0
  540. {hud_python-0.5.41/hud/shared → hud_python-0.6.1/hud/eval}/tests/__init__.py +0 -0
  541. {hud_python-0.5.41 → hud_python-0.6.1}/hud/patches/mcp_patches.py +0 -0
  542. {hud_python-0.5.41 → hud_python-0.6.1}/hud/py.typed +0 -0
  543. {hud_python-0.5.41 → hud_python-0.6.1}/hud/telemetry/tests/__init__.py +0 -0
  544. {hud_python-0.5.41 → hud_python-0.6.1}/hud/utils/tests/__init__.py +0 -0
  545. {hud_python-0.5.41 → hud_python-0.6.1}/hud/utils/tests/test_serialization.py +0 -0
@@ -7,6 +7,10 @@ __pycache__
7
7
  .pytest_cache
8
8
  dist/
9
9
  build/
10
+ # The broad build/ rule above also matches docs/v6/build/, which is real docs
11
+ # content (linked from docs.json). Keep tracking it so docs.hud.ai/v6/build/*
12
+ # does not 404.
13
+ !docs/v6/build/
10
14
  *.egg-info/
11
15
  uv.lock
12
16
 
@@ -34,7 +38,6 @@ TODO.md
34
38
  /dev/
35
39
 
36
40
  .claude
37
- CLAUDE.md
38
41
 
39
42
  *.csv
40
43
  .rl_config_*.json
@@ -54,4 +57,13 @@ hud/rl/checkpoints_test/
54
57
  .ck/
55
58
 
56
59
  .hud_eval_config
57
- .hud_eval.toml
60
+ .hud_eval.toml
61
+
62
+ docs/internal
63
+
64
+ environments/
65
+
66
+ experiments/
67
+ .memories/
68
+
69
+ .codex/
@@ -0,0 +1,278 @@
1
+ Metadata-Version: 2.4
2
+ Name: hud-python
3
+ Version: 0.6.1
4
+ Summary: SDK for the HUD platform.
5
+ Project-URL: Homepage, https://github.com/hud-evals/hud-python
6
+ Project-URL: Bug Tracker, https://github.com/hud-evals/hud-python/issues
7
+ Project-URL: Documentation, https://docs.hud.ai
8
+ Author-email: HUD <founders@hud.ai>
9
+ License: MIT License
10
+
11
+ Copyright (c) 2025 Human Union Data, Inc
12
+
13
+ Permission is hereby granted, free of charge, to any person obtaining a copy
14
+ of this software and associated documentation files (the "Software"), to deal
15
+ in the Software without restriction, including without limitation the rights
16
+ to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
17
+ copies of the Software, and to permit persons to whom the Software is
18
+ furnished to do so, subject to the following conditions:
19
+
20
+ The above copyright notice and this permission notice shall be included in all
21
+ copies or substantial portions of the Software.
22
+
23
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
24
+ IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
25
+ FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
26
+ AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
27
+ LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
28
+ OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
29
+ SOFTWARE.
30
+ License-File: LICENSE
31
+ Classifier: Development Status :: 4 - Beta
32
+ Classifier: Intended Audience :: Developers
33
+ Classifier: Programming Language :: Python :: 3
34
+ Classifier: Programming Language :: Python :: 3.11
35
+ Classifier: Programming Language :: Python :: 3.12
36
+ Requires-Python: <3.13,>=3.11
37
+ Requires-Dist: anthropic>=0.78.0
38
+ Requires-Dist: asyncssh>=2.23.0
39
+ Requires-Dist: asyncvnc>=1.3.0
40
+ Requires-Dist: fastmcp==3.0.2
41
+ Requires-Dist: google-genai
42
+ Requires-Dist: httpx<1,>=0.23.0
43
+ Requires-Dist: mcp<2.0,>=1.24.0
44
+ Requires-Dist: openai>=2.26.0
45
+ Requires-Dist: packaging>=21.0
46
+ Requires-Dist: pillow>=11.0.0
47
+ Requires-Dist: prompt-toolkit==3.0.51
48
+ Requires-Dist: pydantic-settings<3,>=2.2
49
+ Requires-Dist: pydantic<3,>=2.6
50
+ Requires-Dist: questionary==2.1.0
51
+ Requires-Dist: rich>=13.0.0
52
+ Requires-Dist: typer>=0.9.0
53
+ Requires-Dist: websockets>=15.0.1
54
+ Provides-Extra: agent
55
+ Provides-Extra: agents
56
+ Provides-Extra: bedrock
57
+ Requires-Dist: anthropic[bedrock]>=0.78.0; extra == 'bedrock'
58
+ Provides-Extra: browseruse
59
+ Requires-Dist: browser-use>=0.11.13; extra == 'browseruse'
60
+ Provides-Extra: daytona
61
+ Requires-Dist: daytona>=0.100; extra == 'daytona'
62
+ Provides-Extra: dev
63
+ Requires-Dist: dotenv>=0.9.9; extra == 'dev'
64
+ Requires-Dist: pyright==1.1.407; extra == 'dev'
65
+ Requires-Dist: pytest-asyncio; extra == 'dev'
66
+ Requires-Dist: pytest-cov; extra == 'dev'
67
+ Requires-Dist: pytest-mock; extra == 'dev'
68
+ Requires-Dist: pytest>=8.1.1; extra == 'dev'
69
+ Requires-Dist: ruff<0.15.0,>=0.11.8; extra == 'dev'
70
+ Provides-Extra: modal
71
+ Requires-Dist: modal>=1.0; extra == 'modal'
72
+ Provides-Extra: robot
73
+ Requires-Dist: numpy>=1.24; extra == 'robot'
74
+ Requires-Dist: openpi-client>=0.1.2; extra == 'robot'
75
+ Provides-Extra: train
76
+ Requires-Dist: torch>=2; extra == 'train'
77
+ Description-Content-Type: text/markdown
78
+
79
+ <div align="left">
80
+ <picture>
81
+ <source media="(prefers-color-scheme: dark)" srcset="https://raw.githubusercontent.com/hud-evals/hud-python/main/docs/logo/hud_logo_dark.svg">
82
+ <source media="(prefers-color-scheme: light)" srcset="https://raw.githubusercontent.com/hud-evals/hud-python/main/docs/logo/hud_logo.svg">
83
+ <img src="https://raw.githubusercontent.com/hud-evals/hud-python/main/docs/logo/hud_logo.svg" alt="HUD" width="150" style="margin-bottom: 24px;"/>
84
+ </picture>
85
+ </div>
86
+
87
+ HUD is a platform for building RL environments for AI agents, across coding, browser, computer-use, and robotics. Define an environment, write tasks, and run them as evals and training across any model, at any scale.
88
+
89
+ To learn more, see the [documentation](https://docs.hud.ai) and [API reference](https://docs.hud.ai/reference/environment).
90
+
91
+ [![PyPI](https://img.shields.io/pypi/v/hud-python?style=flat-square)](https://pypi.org/project/hud-python/)
92
+ [![License](https://img.shields.io/badge/license-MIT-green?style=flat-square)](LICENSE)
93
+ [![Add docs to Cursor](https://img.shields.io/badge/Add%20docs%20to-Cursor-black?style=flat-square)](https://cursor.com/en/install-mcp?name=docs-hud-python&config=eyJ1cmwiOiJodHRwczovL2RvY3MuaHVkLmFpL21jcCJ9)
94
+ [![Discord](https://img.shields.io/discord/1327447144772407390?label=Discord&logo=discord&style=flat-square)](https://discord.gg/wkjtmHYYjm)
95
+ [![X Follow](https://img.shields.io/twitter/follow/hud_evals?style=social)](https://x.com/intent/user?screen_name=hud_evals)
96
+ [![Scarf](https://static.scarf.sh/a.png?x-pxid=6530ff33-4945-452b-81f9-626872593933)](https://scarf.sh)
97
+ [![Docs](https://img.shields.io/badge/docs-hud.ai-blue?style=flat-square)](https://docs.hud.ai)
98
+
99
+ ## Install
100
+
101
+ ```bash
102
+ # Install the CLI (recommended)
103
+ uv tool install hud-python --python 3.12
104
+
105
+ # …or as a library
106
+ pip install hud-python
107
+ ```
108
+
109
+ Get your API key at [hud.ai/project/api-keys](https://hud.ai/project/api-keys) and set it:
110
+
111
+ ```bash
112
+ hud set HUD_API_KEY=your-key-here
113
+ # or: export HUD_API_KEY=your-key-here
114
+ ```
115
+
116
+ Then scaffold your first environment:
117
+
118
+ ```bash
119
+ hud init my-env
120
+ ```
121
+
122
+ ![Agent running on SheetBench](https://raw.githubusercontent.com/hud-evals/hud-python/main/docs/src/images/trace_sheet.gif)
123
+
124
+ ## The protocol
125
+
126
+ HUD is **protocol-first**. An agent and an environment exchange just three things: a **manifest** (the environment's capabilities and tasks), **`tasks.start`** that returns the prompt, and **`tasks.grade`** that returns the reward. In between, the agent just *works*, driving the capabilities itself. HUD owns only that thin envelope, so any model or harness plugs into any environment.
127
+
128
+ ```mermaid
129
+ sequenceDiagram
130
+ participant Agent
131
+ participant Env as Environment
132
+ participant Caps as Capabilities (ssh · mcp · cdp · rfb · robot)
133
+ Agent->>Env: manifest exchange
134
+ Env-->>Agent: capabilities + tasks
135
+ Agent->>Env: tasks.start
136
+ Env-->>Agent: prompt
137
+ rect rgb(238,238,238)
138
+ Note over Agent,Caps: the agent works, driving capabilities directly
139
+ Agent->>Caps: shell · browser · GUI · tools · robot
140
+ Caps-->>Agent: observations
141
+ end
142
+ Agent->>Env: tasks.grade
143
+ Env-->>Agent: reward
144
+ ```
145
+
146
+ Because the protocol only exposes **capabilities** (never a fixed agent), an environment outlives any single harness: new harnesses and models keep running against the same environments, benchmarks, and tasks.
147
+
148
+ ## Package & run anywhere
149
+
150
+ A built image is the **end product for your tasks**: one build packs every task from a single definition. The recommended path is **`hud deploy`**, which builds and registers your environment on HUD in one step; then sync a taskset and run remotely:
151
+
152
+ ```bash
153
+ hud deploy
154
+ hud sync tasks my-taskset
155
+ hud eval my-taskset --remote
156
+ ```
157
+
158
+ For local iteration, the same protocol works against a container on your laptop:
159
+
160
+ ```bash
161
+ hud build .
162
+ docker run -d --name run1 my-env
163
+ docker exec run1 hud task start fix_bug
164
+ docker exec run1 hud task grade fix_bug --answer "…"
165
+ docker rm -f run1
166
+ ```
167
+
168
+ → [Package & deploy](https://docs.hud.ai/run/deploy)
169
+
170
+ ## Environments & templates
171
+
172
+ A **template** is an async generator registered with `@env.template()`: `yield` a prompt, receive the agent's answer, `yield` a reward. Calling the template mints a runnable **Task**; one function spans a whole dataset of variants. The simplest needs no capabilities — just a prompt and a grader:
173
+
174
+ ```python
175
+ from hud import Environment
176
+
177
+ env = Environment(name="letter-count")
178
+
179
+ @env.template()
180
+ async def count_letter(word: str = "strawberry", letter: str = "r"):
181
+ answer = yield f"How many '{letter}'s are in '{word}'? Reply with just the number."
182
+ yield 1.0 if answer and str(word.count(letter)) in answer else 0.0
183
+
184
+ tasks = [count_letter(word=w) for w in ("strawberry", "raspberry", "blueberry")]
185
+ ```
186
+
187
+ Run it immediately against any model:
188
+
189
+ ```bash
190
+ hud eval tasks.py claude --group 3
191
+ ```
192
+
193
+ Each graded evaluation is a **trace** (the SDK's live handle is a `Run`). With `HUD_API_KEY` set, every rollout is recorded on [hud.ai](https://hud.ai). Tasks that need a shell, browser, GUI, or robot declare **capabilities** (below); everything else — variants, grading, batching — stays identical.
194
+
195
+ → [Quickstart](https://docs.hud.ai/quickstart) · [Tasks & tasksets](https://docs.hud.ai/reference/tasks)
196
+
197
+ ## Capabilities & harnesses
198
+
199
+ A **capability** is a connection the environment exposes; a **harness** attaches its own tools to it. The same environment serves a one-shot Q&A or a full computer-use rollout, depending on which capabilities the harness opens.
200
+
201
+ | Protocol | What it exposes |
202
+ |----------|-----------------|
203
+ | **`ssh`** | Shell + files in a sandboxed workspace (`env.workspace(root)`) |
204
+ | **`mcp`** | Tools over the Model Context Protocol |
205
+ | **`cdp`** | Browser control over the Chrome DevTools Protocol |
206
+ | **`rfb`** | Full computer-use over VNC: screen + keyboard/mouse |
207
+ | **`robot`** *(beta)* | Schema-driven robot observation/action loop over WebSocket |
208
+
209
+ **Ships natively:** Claude, OpenAI (Responses), OpenAI-compatible endpoints, and Gemini via `create_agent("claude-sonnet-4-5")` (or `gpt-…`, `gemini-…`). The harness wires capability-backed tools for the model you choose at run time.
210
+
211
+ **Bring your own:** a harness attaches to a capability and defines a tool spec — wrap `browser-use` on `cdp`, a VLA policy on `robot`, or your own agent on `ssh` / `mcp`. No protocol work required.
212
+
213
+ → [Capabilities](https://docs.hud.ai/reference/capabilities) · [Models](https://docs.hud.ai/run/models) · [Robots](https://docs.hud.ai/reference/robots)
214
+
215
+ ## Deploy on the platform
216
+
217
+ From the [platform UI](https://hud.ai) you can run batches, compare models on the same taskset, and inspect every trace.
218
+
219
+ → [Deploy](https://docs.hud.ai/run/deploy) · [Leaderboards](https://hud.ai/leaderboards)
220
+
221
+ ## Train on rewards
222
+
223
+ Every rollout returns a `Run` carrying a `trace_id` and a `reward`, so the tasks you evaluate are already training data. Run a **group** per task and turn the rewards into GRPO advantages with `group_relative()`:
224
+
225
+ ```python
226
+ from hud.agents import create_agent
227
+ from hud.eval import Taskset, group_relative
228
+
229
+ agent = create_agent("claude-sonnet-4-5")
230
+ job = await Taskset(count_letter(word=w) for w in words).run(agent, group=16)
231
+ for runs in job.results.values():
232
+ advantages = group_relative([r.reward for r in runs], normalize_std=True)
233
+ ... # feed (run.trace_id, adv) into your optimizer
234
+ ```
235
+
236
+ HUD is the environment-and-reward source for your own GRPO/PPO loop — the same environment trains any model, text or multimodal, unchanged.
237
+
238
+ → [Training](https://docs.hud.ai/run/training) · [Designing tasks for signal](https://docs.hud.ai/run/signal)
239
+
240
+ ## Links
241
+
242
+ - [Documentation](https://docs.hud.ai)
243
+ - [Quickstart](https://docs.hud.ai/quickstart)
244
+ - [CLI reference](https://docs.hud.ai/reference/cli)
245
+ - [Leaderboards](https://hud.ai/leaderboards)
246
+ - [Environment templates](https://hud.ai/environments)
247
+ - [Supported models](https://hud.ai/models)
248
+ - [Discord](https://discord.gg/wkjtmHYYjm)
249
+
250
+ ## Enterprise
251
+
252
+ Building agents at scale? We work with teams on custom environments, benchmarks, and training.
253
+
254
+ [📅 Book a call](https://cal.com/jay-hud) · [📧 founders@hud.ai](mailto:founders@hud.ai)
255
+
256
+ ## Contributing
257
+
258
+ We welcome contributions! See [CONTRIBUTING.md](CONTRIBUTING.md).
259
+
260
+ Key areas: [Agents](hud/agents/) · [Environments](hud/environment/) · [Capabilities](hud/capabilities/) · [Eval](hud/eval/)
261
+
262
+ <a href="https://github.com/hud-evals/hud-python/graphs/contributors">
263
+ <img src="https://contrib.rocks/image?repo=hud-evals/hud-python&max=50" />
264
+ </a>
265
+
266
+ ## Citation
267
+
268
+ ```bibtex
269
+ @software{hud2025agentevalplatform,
270
+ author = {HUD and Jay Ram and Lorenss Martinsons and Parth Patel and Govind Pimpale and Dylan Bowman and Jaideep and Nguyen Nhat Minh},
271
+ title = {HUD: An Evaluation and RL Envrionments Platform for Agents},
272
+ date = {2025-04},
273
+ url = {https://github.com/hud-evals/hud-python},
274
+ langid = {en}
275
+ }
276
+ ```
277
+
278
+ MIT License · [LICENSE](LICENSE)
@@ -0,0 +1,200 @@
1
+ <div align="left">
2
+ <picture>
3
+ <source media="(prefers-color-scheme: dark)" srcset="https://raw.githubusercontent.com/hud-evals/hud-python/main/docs/logo/hud_logo_dark.svg">
4
+ <source media="(prefers-color-scheme: light)" srcset="https://raw.githubusercontent.com/hud-evals/hud-python/main/docs/logo/hud_logo.svg">
5
+ <img src="https://raw.githubusercontent.com/hud-evals/hud-python/main/docs/logo/hud_logo.svg" alt="HUD" width="150" style="margin-bottom: 24px;"/>
6
+ </picture>
7
+ </div>
8
+
9
+ HUD is a platform for building RL environments for AI agents, across coding, browser, computer-use, and robotics. Define an environment, write tasks, and run them as evals and training across any model, at any scale.
10
+
11
+ To learn more, see the [documentation](https://docs.hud.ai) and [API reference](https://docs.hud.ai/reference/environment).
12
+
13
+ [![PyPI](https://img.shields.io/pypi/v/hud-python?style=flat-square)](https://pypi.org/project/hud-python/)
14
+ [![License](https://img.shields.io/badge/license-MIT-green?style=flat-square)](LICENSE)
15
+ [![Add docs to Cursor](https://img.shields.io/badge/Add%20docs%20to-Cursor-black?style=flat-square)](https://cursor.com/en/install-mcp?name=docs-hud-python&config=eyJ1cmwiOiJodHRwczovL2RvY3MuaHVkLmFpL21jcCJ9)
16
+ [![Discord](https://img.shields.io/discord/1327447144772407390?label=Discord&logo=discord&style=flat-square)](https://discord.gg/wkjtmHYYjm)
17
+ [![X Follow](https://img.shields.io/twitter/follow/hud_evals?style=social)](https://x.com/intent/user?screen_name=hud_evals)
18
+ [![Scarf](https://static.scarf.sh/a.png?x-pxid=6530ff33-4945-452b-81f9-626872593933)](https://scarf.sh)
19
+ [![Docs](https://img.shields.io/badge/docs-hud.ai-blue?style=flat-square)](https://docs.hud.ai)
20
+
21
+ ## Install
22
+
23
+ ```bash
24
+ # Install the CLI (recommended)
25
+ uv tool install hud-python --python 3.12
26
+
27
+ # …or as a library
28
+ pip install hud-python
29
+ ```
30
+
31
+ Get your API key at [hud.ai/project/api-keys](https://hud.ai/project/api-keys) and set it:
32
+
33
+ ```bash
34
+ hud set HUD_API_KEY=your-key-here
35
+ # or: export HUD_API_KEY=your-key-here
36
+ ```
37
+
38
+ Then scaffold your first environment:
39
+
40
+ ```bash
41
+ hud init my-env
42
+ ```
43
+
44
+ ![Agent running on SheetBench](https://raw.githubusercontent.com/hud-evals/hud-python/main/docs/src/images/trace_sheet.gif)
45
+
46
+ ## The protocol
47
+
48
+ HUD is **protocol-first**. An agent and an environment exchange just three things: a **manifest** (the environment's capabilities and tasks), **`tasks.start`** that returns the prompt, and **`tasks.grade`** that returns the reward. In between, the agent just *works*, driving the capabilities itself. HUD owns only that thin envelope, so any model or harness plugs into any environment.
49
+
50
+ ```mermaid
51
+ sequenceDiagram
52
+ participant Agent
53
+ participant Env as Environment
54
+ participant Caps as Capabilities (ssh · mcp · cdp · rfb · robot)
55
+ Agent->>Env: manifest exchange
56
+ Env-->>Agent: capabilities + tasks
57
+ Agent->>Env: tasks.start
58
+ Env-->>Agent: prompt
59
+ rect rgb(238,238,238)
60
+ Note over Agent,Caps: the agent works, driving capabilities directly
61
+ Agent->>Caps: shell · browser · GUI · tools · robot
62
+ Caps-->>Agent: observations
63
+ end
64
+ Agent->>Env: tasks.grade
65
+ Env-->>Agent: reward
66
+ ```
67
+
68
+ Because the protocol only exposes **capabilities** (never a fixed agent), an environment outlives any single harness: new harnesses and models keep running against the same environments, benchmarks, and tasks.
69
+
70
+ ## Package & run anywhere
71
+
72
+ A built image is the **end product for your tasks**: one build packs every task from a single definition. The recommended path is **`hud deploy`**, which builds and registers your environment on HUD in one step; then sync a taskset and run remotely:
73
+
74
+ ```bash
75
+ hud deploy
76
+ hud sync tasks my-taskset
77
+ hud eval my-taskset --remote
78
+ ```
79
+
80
+ For local iteration, the same protocol works against a container on your laptop:
81
+
82
+ ```bash
83
+ hud build .
84
+ docker run -d --name run1 my-env
85
+ docker exec run1 hud task start fix_bug
86
+ docker exec run1 hud task grade fix_bug --answer "…"
87
+ docker rm -f run1
88
+ ```
89
+
90
+ → [Package & deploy](https://docs.hud.ai/run/deploy)
91
+
92
+ ## Environments & templates
93
+
94
+ A **template** is an async generator registered with `@env.template()`: `yield` a prompt, receive the agent's answer, `yield` a reward. Calling the template mints a runnable **Task**; one function spans a whole dataset of variants. The simplest needs no capabilities — just a prompt and a grader:
95
+
96
+ ```python
97
+ from hud import Environment
98
+
99
+ env = Environment(name="letter-count")
100
+
101
+ @env.template()
102
+ async def count_letter(word: str = "strawberry", letter: str = "r"):
103
+ answer = yield f"How many '{letter}'s are in '{word}'? Reply with just the number."
104
+ yield 1.0 if answer and str(word.count(letter)) in answer else 0.0
105
+
106
+ tasks = [count_letter(word=w) for w in ("strawberry", "raspberry", "blueberry")]
107
+ ```
108
+
109
+ Run it immediately against any model:
110
+
111
+ ```bash
112
+ hud eval tasks.py claude --group 3
113
+ ```
114
+
115
+ Each graded evaluation is a **trace** (the SDK's live handle is a `Run`). With `HUD_API_KEY` set, every rollout is recorded on [hud.ai](https://hud.ai). Tasks that need a shell, browser, GUI, or robot declare **capabilities** (below); everything else — variants, grading, batching — stays identical.
116
+
117
+ → [Quickstart](https://docs.hud.ai/quickstart) · [Tasks & tasksets](https://docs.hud.ai/reference/tasks)
118
+
119
+ ## Capabilities & harnesses
120
+
121
+ A **capability** is a connection the environment exposes; a **harness** attaches its own tools to it. The same environment serves a one-shot Q&A or a full computer-use rollout, depending on which capabilities the harness opens.
122
+
123
+ | Protocol | What it exposes |
124
+ |----------|-----------------|
125
+ | **`ssh`** | Shell + files in a sandboxed workspace (`env.workspace(root)`) |
126
+ | **`mcp`** | Tools over the Model Context Protocol |
127
+ | **`cdp`** | Browser control over the Chrome DevTools Protocol |
128
+ | **`rfb`** | Full computer-use over VNC: screen + keyboard/mouse |
129
+ | **`robot`** *(beta)* | Schema-driven robot observation/action loop over WebSocket |
130
+
131
+ **Ships natively:** Claude, OpenAI (Responses), OpenAI-compatible endpoints, and Gemini via `create_agent("claude-sonnet-4-5")` (or `gpt-…`, `gemini-…`). The harness wires capability-backed tools for the model you choose at run time.
132
+
133
+ **Bring your own:** a harness attaches to a capability and defines a tool spec — wrap `browser-use` on `cdp`, a VLA policy on `robot`, or your own agent on `ssh` / `mcp`. No protocol work required.
134
+
135
+ → [Capabilities](https://docs.hud.ai/reference/capabilities) · [Models](https://docs.hud.ai/run/models) · [Robots](https://docs.hud.ai/reference/robots)
136
+
137
+ ## Deploy on the platform
138
+
139
+ From the [platform UI](https://hud.ai) you can run batches, compare models on the same taskset, and inspect every trace.
140
+
141
+ → [Deploy](https://docs.hud.ai/run/deploy) · [Leaderboards](https://hud.ai/leaderboards)
142
+
143
+ ## Train on rewards
144
+
145
+ Every rollout returns a `Run` carrying a `trace_id` and a `reward`, so the tasks you evaluate are already training data. Run a **group** per task and turn the rewards into GRPO advantages with `group_relative()`:
146
+
147
+ ```python
148
+ from hud.agents import create_agent
149
+ from hud.eval import Taskset, group_relative
150
+
151
+ agent = create_agent("claude-sonnet-4-5")
152
+ job = await Taskset(count_letter(word=w) for w in words).run(agent, group=16)
153
+ for runs in job.results.values():
154
+ advantages = group_relative([r.reward for r in runs], normalize_std=True)
155
+ ... # feed (run.trace_id, adv) into your optimizer
156
+ ```
157
+
158
+ HUD is the environment-and-reward source for your own GRPO/PPO loop — the same environment trains any model, text or multimodal, unchanged.
159
+
160
+ → [Training](https://docs.hud.ai/run/training) · [Designing tasks for signal](https://docs.hud.ai/run/signal)
161
+
162
+ ## Links
163
+
164
+ - [Documentation](https://docs.hud.ai)
165
+ - [Quickstart](https://docs.hud.ai/quickstart)
166
+ - [CLI reference](https://docs.hud.ai/reference/cli)
167
+ - [Leaderboards](https://hud.ai/leaderboards)
168
+ - [Environment templates](https://hud.ai/environments)
169
+ - [Supported models](https://hud.ai/models)
170
+ - [Discord](https://discord.gg/wkjtmHYYjm)
171
+
172
+ ## Enterprise
173
+
174
+ Building agents at scale? We work with teams on custom environments, benchmarks, and training.
175
+
176
+ [📅 Book a call](https://cal.com/jay-hud) · [📧 founders@hud.ai](mailto:founders@hud.ai)
177
+
178
+ ## Contributing
179
+
180
+ We welcome contributions! See [CONTRIBUTING.md](CONTRIBUTING.md).
181
+
182
+ Key areas: [Agents](hud/agents/) · [Environments](hud/environment/) · [Capabilities](hud/capabilities/) · [Eval](hud/eval/)
183
+
184
+ <a href="https://github.com/hud-evals/hud-python/graphs/contributors">
185
+ <img src="https://contrib.rocks/image?repo=hud-evals/hud-python&max=50" />
186
+ </a>
187
+
188
+ ## Citation
189
+
190
+ ```bibtex
191
+ @software{hud2025agentevalplatform,
192
+ author = {HUD and Jay Ram and Lorenss Martinsons and Parth Patel and Govind Pimpale and Dylan Bowman and Jaideep and Nguyen Nhat Minh},
193
+ title = {HUD: An Evaluation and RL Envrionments Platform for Agents},
194
+ date = {2025-04},
195
+ url = {https://github.com/hud-evals/hud-python},
196
+ langid = {en}
197
+ }
198
+ ```
199
+
200
+ MIT License · [LICENSE](LICENSE)
@@ -0,0 +1,37 @@
1
+ # A2A Chat
2
+
3
+ Serve a HUD chat task over the [A2A protocol](https://github.com/google/a2a),
4
+ and talk to it from Python clients.
5
+
6
+ `hud.Chat` is protocol-agnostic — these scripts are the protocol layer, kept
7
+ outside the SDK on purpose. Copy and adapt them.
8
+
9
+ | File | What it does |
10
+ |------|--------------|
11
+ | `server.py` | A2A server: one `Chat` (conversation) per A2A context, agent card, citations artifact |
12
+ | `client.py` | Minimal A2A client: send messages, print replies |
13
+ | `llm_client.py` | LLM-fronted client: an OpenAI model decides when to call the A2A agent as a tool |
14
+ | `chat_env.py` | Sample chat environment with `messages`-style tasks to serve |
15
+
16
+ ## Run
17
+
18
+ From this directory (uv resolves the dependencies on first run):
19
+
20
+ ```bash
21
+ # Terminal 1: serve the bundled chat task (spawns chat_env.py per turn)
22
+ uv run server.py
23
+
24
+ # Terminal 2: talk to it
25
+ uv run client.py # plain client
26
+ uv run llm_client.py # LLM-fronted client
27
+ ```
28
+
29
+ Configuration is via env vars: `HUD_MODEL` picks the agent's model (gateway,
30
+ needs `HUD_API_KEY`), `HUD_TASK`/`HUD_ENV` pick the task row, `HUD_SOURCE`
31
+ spawns a different env source, and `HUD_ENV_URL` attaches each turn to an
32
+ already-served control channel (e.g. `hud serve chat_env.py` →
33
+ `HUD_ENV_URL=tcp://127.0.0.1:8765`) instead of spawning.
34
+
35
+ The server publishes an agent card at `/.well-known/agent-card.json` and
36
+ accepts A2A messages at the root endpoint. The configured task should accept a
37
+ `messages` argument for multi-turn history (see `chat_env.py`).
@@ -0,0 +1,18 @@
1
+ [project]
2
+ name = "a2a-chat"
3
+ version = "0.1.0"
4
+ description = "Serve a HUD chat task over the A2A protocol (cookbook)"
5
+ requires-python = ">=3.11,<3.13"
6
+ dependencies = [
7
+ "hud-python",
8
+ # The scripts are written against the 0.3.x server API.
9
+ "a2a-sdk==0.3.26",
10
+ ]
11
+
12
+ [tool.uv]
13
+ package = false
14
+
15
+ # Track the SDK from this repo. If you copied this folder out, delete this
16
+ # block to use the released hud-python from PyPI.
17
+ [tool.uv.sources]
18
+ hud-python = { path = "../..", editable = true }
@@ -0,0 +1,23 @@
1
+ # Codex Coding Agent
2
+
3
+ Build your own [Codex](https://github.com/openai/codex) with the HUD SDK: an
4
+ environment exposes an `ssh` capability backed by a `Workspace`, and
5
+ `OpenAIAgent` drives it with OpenAI's native `shell` and `apply_patch` tools —
6
+ the same protocol the `codex` CLI uses.
7
+
8
+ ## Run
9
+
10
+ From this directory (requires `HUD_API_KEY` for gateway inference):
11
+
12
+ ```bash
13
+ uv run codex_agent.py
14
+
15
+ # Custom task
16
+ uv run codex_agent.py --task "Create a Python script that prints the Fibonacci sequence"
17
+
18
+ # Custom working directory
19
+ uv run codex_agent.py --work-dir ./codex_output
20
+ ```
21
+
22
+ To run the same environment as a packaged, sandboxed box instead of on your
23
+ machine, see `hud deploy` and `RemoteSandbox` in the deploy docs.
@@ -0,0 +1,17 @@
1
+ [project]
2
+ name = "codex-coding"
3
+ version = "0.1.0"
4
+ description = "Build your own Codex with the HUD SDK (cookbook)"
5
+ requires-python = ">=3.11,<3.13"
6
+ dependencies = [
7
+ "hud-python",
8
+ "python-dotenv",
9
+ ]
10
+
11
+ [tool.uv]
12
+ package = false
13
+
14
+ # Track the SDK from this repo. If you copied this folder out, delete this
15
+ # block to use the released hud-python from PyPI.
16
+ [tool.uv.sources]
17
+ hud-python = { path = "../..", editable = true }