hud-python 0.5.33__tar.gz → 0.5.35__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (358) hide show
  1. {hud_python-0.5.33 → hud_python-0.5.35}/PKG-INFO +68 -64
  2. {hud_python-0.5.33 → hud_python-0.5.35}/README.md +67 -63
  3. {hud_python-0.5.33 → hud_python-0.5.35}/hud/agents/base.py +14 -15
  4. {hud_python-0.5.33 → hud_python-0.5.35}/hud/agents/claude.py +6 -2
  5. {hud_python-0.5.33 → hud_python-0.5.35}/hud/agents/gemini.py +6 -2
  6. {hud_python-0.5.33 → hud_python-0.5.35}/hud/agents/openai.py +8 -5
  7. {hud_python-0.5.33 → hud_python-0.5.35}/hud/cli/__init__.py +19 -14
  8. {hud_python-0.5.33 → hud_python-0.5.35}/hud/cli/analyze.py +36 -17
  9. {hud_python-0.5.33 → hud_python-0.5.35}/hud/cli/build.py +226 -462
  10. {hud_python-0.5.33 → hud_python-0.5.35}/hud/cli/debug.py +5 -3
  11. {hud_python-0.5.33 → hud_python-0.5.35}/hud/cli/deploy.py +131 -61
  12. {hud_python-0.5.33 → hud_python-0.5.35}/hud/cli/eval.py +35 -9
  13. {hud_python-0.5.33 → hud_python-0.5.35}/hud/cli/flows/init.py +74 -41
  14. {hud_python-0.5.33 → hud_python-0.5.35}/hud/cli/flows/templates.py +2 -2
  15. {hud_python-0.5.33 → hud_python-0.5.35}/hud/cli/init.py +2 -3
  16. hud_python-0.5.35/hud/cli/link.py +38 -0
  17. hud_python-0.5.35/hud/cli/rl.py +372 -0
  18. hud_python-0.5.35/hud/cli/scenario.py +187 -0
  19. hud_python-0.5.35/hud/cli/sync.py +969 -0
  20. hud_python-0.5.35/hud/cli/tests/test_analysis_utils.py +38 -0
  21. {hud_python-0.5.33 → hud_python-0.5.35}/hud/cli/tests/test_analyze.py +8 -8
  22. {hud_python-0.5.33 → hud_python-0.5.35}/hud/cli/tests/test_analyze_module.py +4 -4
  23. {hud_python-0.5.33 → hud_python-0.5.35}/hud/cli/tests/test_build.py +70 -27
  24. {hud_python-0.5.33 → hud_python-0.5.35}/hud/cli/tests/test_build_failure.py +2 -2
  25. {hud_python-0.5.33 → hud_python-0.5.35}/hud/cli/tests/test_deploy.py +4 -5
  26. hud_python-0.5.35/hud/cli/tests/test_lockfile_utils.py +72 -0
  27. hud_python-0.5.35/hud/cli/tests/test_rl.py +154 -0
  28. hud_python-0.5.35/hud/cli/tests/test_scenario.py +283 -0
  29. hud_python-0.5.35/hud/cli/tests/test_sync.py +1433 -0
  30. hud_python-0.5.33/hud/cli/utils/mcp.py → hud_python-0.5.35/hud/cli/utils/analysis.py +57 -15
  31. {hud_python-0.5.33 → hud_python-0.5.35}/hud/cli/utils/build_display.py +1 -3
  32. hud_python-0.5.35/hud/cli/utils/collect.py +292 -0
  33. {hud_python-0.5.33 → hud_python-0.5.35}/hud/cli/utils/environment.py +6 -28
  34. hud_python-0.5.35/hud/cli/utils/lockfile.py +169 -0
  35. hud_python-0.5.35/hud/cli/utils/name_check.py +140 -0
  36. hud_python-0.5.35/hud/cli/utils/project_config.py +106 -0
  37. hud_python-0.5.35/hud/cli/utils/taskset.py +83 -0
  38. hud_python-0.5.35/hud/cli/utils/tests/test_collect.py +283 -0
  39. {hud_python-0.5.33 → hud_python-0.5.35}/hud/environment/connectors/mcp_config.py +51 -8
  40. {hud_python-0.5.33 → hud_python-0.5.35}/hud/environment/tests/test_connectors.py +76 -0
  41. {hud_python-0.5.33 → hud_python-0.5.35}/hud/eval/task.py +43 -1
  42. hud_python-0.5.35/hud/native/__init__.py +36 -0
  43. hud_python-0.5.35/hud/native/graders.py +581 -0
  44. hud_python-0.5.35/hud/native/permissions.py +170 -0
  45. hud_python-0.5.35/hud/native/skills.py +127 -0
  46. hud_python-0.5.35/hud/native/tests/__init__.py +1 -0
  47. hud_python-0.5.35/hud/native/tests/test_graders.py +233 -0
  48. {hud_python-0.5.33 → hud_python-0.5.35}/hud/patches/mcp_patches.py +1 -1
  49. {hud_python-0.5.33 → hud_python-0.5.35}/hud/services/chat.py +6 -5
  50. {hud_python-0.5.33 → hud_python-0.5.35}/hud/services/tests/test_chat.py +6 -23
  51. {hud_python-0.5.33 → hud_python-0.5.35}/hud/telemetry/instrument.py +2 -5
  52. {hud_python-0.5.33 → hud_python-0.5.35}/hud/tools/coding/edit.py +6 -0
  53. {hud_python-0.5.33 → hud_python-0.5.35}/hud/tools/coding/utils.py +11 -0
  54. {hud_python-0.5.33 → hud_python-0.5.35}/hud/tools/memory/base.py +2 -2
  55. {hud_python-0.5.33 → hud_python-0.5.35}/hud/tools/memory/claude.py +2 -3
  56. {hud_python-0.5.33 → hud_python-0.5.35}/hud/tools/memory/gemini.py +2 -3
  57. {hud_python-0.5.33 → hud_python-0.5.35}/hud/tools/memory/session.py +1 -3
  58. {hud_python-0.5.33 → hud_python-0.5.35}/hud/utils/hud_console.py +96 -8
  59. hud_python-0.5.35/hud/utils/serialization.py +26 -0
  60. hud_python-0.5.35/hud/utils/tests/test_serialization.py +31 -0
  61. {hud_python-0.5.33 → hud_python-0.5.35}/hud/utils/tests/test_version.py +1 -1
  62. {hud_python-0.5.33 → hud_python-0.5.35}/hud/version.py +1 -1
  63. {hud_python-0.5.33 → hud_python-0.5.35}/pyproject.toml +2 -2
  64. hud_python-0.5.33/hud/cli/link.py +0 -199
  65. hud_python-0.5.33/hud/cli/rft.py +0 -350
  66. hud_python-0.5.33/hud/cli/rft_status.py +0 -162
  67. hud_python-0.5.33/hud/cli/utils/lockfile.py +0 -36
  68. hud_python-0.5.33/hud/native/__init__.py +0 -1
  69. {hud_python-0.5.33 → hud_python-0.5.35}/.gitignore +0 -0
  70. {hud_python-0.5.33 → hud_python-0.5.35}/LICENSE +0 -0
  71. {hud_python-0.5.33 → hud_python-0.5.35}/examples/README.md +0 -0
  72. {hud_python-0.5.33 → hud_python-0.5.35}/hud/__init__.py +0 -0
  73. {hud_python-0.5.33 → hud_python-0.5.35}/hud/__main__.py +0 -0
  74. {hud_python-0.5.33 → hud_python-0.5.35}/hud/agents/__init__.py +0 -0
  75. {hud_python-0.5.33 → hud_python-0.5.35}/hud/agents/gateway.py +0 -0
  76. {hud_python-0.5.33 → hud_python-0.5.35}/hud/agents/gemini_cua.py +0 -0
  77. {hud_python-0.5.33 → hud_python-0.5.35}/hud/agents/grounded_openai.py +0 -0
  78. {hud_python-0.5.33 → hud_python-0.5.35}/hud/agents/misc/__init__.py +0 -0
  79. {hud_python-0.5.33 → hud_python-0.5.35}/hud/agents/misc/integration_test_agent.py +0 -0
  80. {hud_python-0.5.33 → hud_python-0.5.35}/hud/agents/misc/response_agent.py +0 -0
  81. {hud_python-0.5.33 → hud_python-0.5.35}/hud/agents/openai_chat.py +0 -0
  82. {hud_python-0.5.33 → hud_python-0.5.35}/hud/agents/operator.py +0 -0
  83. {hud_python-0.5.33 → hud_python-0.5.35}/hud/agents/resolver.py +0 -0
  84. {hud_python-0.5.33 → hud_python-0.5.35}/hud/agents/tests/__init__.py +0 -0
  85. {hud_python-0.5.33 → hud_python-0.5.35}/hud/agents/tests/conftest.py +0 -0
  86. {hud_python-0.5.33 → hud_python-0.5.35}/hud/agents/tests/test_base.py +0 -0
  87. {hud_python-0.5.33 → hud_python-0.5.35}/hud/agents/tests/test_base_runtime.py +0 -0
  88. {hud_python-0.5.33 → hud_python-0.5.35}/hud/agents/tests/test_claude.py +0 -0
  89. {hud_python-0.5.33 → hud_python-0.5.35}/hud/agents/tests/test_gemini.py +0 -0
  90. {hud_python-0.5.33 → hud_python-0.5.35}/hud/agents/tests/test_grounded_openai_agent.py +0 -0
  91. {hud_python-0.5.33 → hud_python-0.5.35}/hud/agents/tests/test_integration_test_agent.py +0 -0
  92. {hud_python-0.5.33 → hud_python-0.5.35}/hud/agents/tests/test_openai.py +0 -0
  93. {hud_python-0.5.33 → hud_python-0.5.35}/hud/agents/tests/test_operator.py +0 -0
  94. {hud_python-0.5.33 → hud_python-0.5.35}/hud/agents/tests/test_resolver.py +0 -0
  95. {hud_python-0.5.33 → hud_python-0.5.35}/hud/agents/tests/test_run_eval.py +0 -0
  96. {hud_python-0.5.33 → hud_python-0.5.35}/hud/agents/types.py +0 -0
  97. {hud_python-0.5.33 → hud_python-0.5.35}/hud/cli/__main__.py +0 -0
  98. {hud_python-0.5.33 → hud_python-0.5.35}/hud/cli/cancel.py +0 -0
  99. {hud_python-0.5.33 → hud_python-0.5.35}/hud/cli/convert/__init__.py +0 -0
  100. {hud_python-0.5.33 → hud_python-0.5.35}/hud/cli/convert/base.py +0 -0
  101. {hud_python-0.5.33 → hud_python-0.5.35}/hud/cli/convert/harbor.py +0 -0
  102. {hud_python-0.5.33 → hud_python-0.5.35}/hud/cli/convert/tests/__init__.py +0 -0
  103. {hud_python-0.5.33 → hud_python-0.5.35}/hud/cli/convert/tests/conftest.py +0 -0
  104. {hud_python-0.5.33 → hud_python-0.5.35}/hud/cli/convert/tests/test_harbor.py +0 -0
  105. {hud_python-0.5.33 → hud_python-0.5.35}/hud/cli/dev.py +0 -0
  106. {hud_python-0.5.33 → hud_python-0.5.35}/hud/cli/flows/__init__.py +0 -0
  107. {hud_python-0.5.33 → hud_python-0.5.35}/hud/cli/flows/dev.py +0 -0
  108. {hud_python-0.5.33 → hud_python-0.5.35}/hud/cli/flows/tasks.py +0 -0
  109. {hud_python-0.5.33 → hud_python-0.5.35}/hud/cli/flows/tests/__init__.py +0 -0
  110. {hud_python-0.5.33 → hud_python-0.5.35}/hud/cli/flows/tests/test_dev.py +0 -0
  111. {hud_python-0.5.33 → hud_python-0.5.35}/hud/cli/models.py +0 -0
  112. {hud_python-0.5.33 → hud_python-0.5.35}/hud/cli/push.py +0 -0
  113. {hud_python-0.5.33 → hud_python-0.5.35}/hud/cli/tests/__init__.py +0 -0
  114. {hud_python-0.5.33 → hud_python-0.5.35}/hud/cli/tests/test_analyze_metadata.py +0 -0
  115. {hud_python-0.5.33 → hud_python-0.5.35}/hud/cli/tests/test_build_module.py +0 -0
  116. {hud_python-0.5.33 → hud_python-0.5.35}/hud/cli/tests/test_cli_init.py +0 -0
  117. {hud_python-0.5.33 → hud_python-0.5.35}/hud/cli/tests/test_cli_main.py +0 -0
  118. {hud_python-0.5.33 → hud_python-0.5.35}/hud/cli/tests/test_cli_more_wrappers.py +0 -0
  119. {hud_python-0.5.33 → hud_python-0.5.35}/hud/cli/tests/test_cli_root.py +0 -0
  120. {hud_python-0.5.33 → hud_python-0.5.35}/hud/cli/tests/test_convert.py +0 -0
  121. {hud_python-0.5.33 → hud_python-0.5.35}/hud/cli/tests/test_debug.py +0 -0
  122. {hud_python-0.5.33 → hud_python-0.5.35}/hud/cli/tests/test_debug_directory_mode.py +0 -0
  123. {hud_python-0.5.33 → hud_python-0.5.35}/hud/cli/tests/test_dev.py +0 -0
  124. {hud_python-0.5.33 → hud_python-0.5.35}/hud/cli/tests/test_eval.py +0 -0
  125. {hud_python-0.5.33 → hud_python-0.5.35}/hud/cli/tests/test_eval_bedrock.py +0 -0
  126. {hud_python-0.5.33 → hud_python-0.5.35}/hud/cli/tests/test_init.py +0 -0
  127. {hud_python-0.5.33 → hud_python-0.5.35}/hud/cli/tests/test_main_module.py +0 -0
  128. {hud_python-0.5.33 → hud_python-0.5.35}/hud/cli/tests/test_mcp_server.py +0 -0
  129. {hud_python-0.5.33 → hud_python-0.5.35}/hud/cli/tests/test_push.py +0 -0
  130. {hud_python-0.5.33 → hud_python-0.5.35}/hud/cli/tests/test_push_happy.py +0 -0
  131. {hud_python-0.5.33 → hud_python-0.5.35}/hud/cli/tests/test_push_wrapper.py +0 -0
  132. {hud_python-0.5.33 → hud_python-0.5.35}/hud/cli/tests/test_utils.py +0 -0
  133. {hud_python-0.5.33 → hud_python-0.5.35}/hud/cli/utils/__init__.py +0 -0
  134. {hud_python-0.5.33 → hud_python-0.5.35}/hud/cli/utils/api.py +0 -0
  135. {hud_python-0.5.33 → hud_python-0.5.35}/hud/cli/utils/args.py +0 -0
  136. {hud_python-0.5.33 → hud_python-0.5.35}/hud/cli/utils/build_logs.py +0 -0
  137. {hud_python-0.5.33 → hud_python-0.5.35}/hud/cli/utils/config.py +0 -0
  138. {hud_python-0.5.33 → hud_python-0.5.35}/hud/cli/utils/context.py +0 -0
  139. {hud_python-0.5.33 → hud_python-0.5.35}/hud/cli/utils/docker.py +0 -0
  140. {hud_python-0.5.33 → hud_python-0.5.35}/hud/cli/utils/env_check.py +0 -0
  141. {hud_python-0.5.33 → hud_python-0.5.35}/hud/cli/utils/git.py +0 -0
  142. {hud_python-0.5.33 → hud_python-0.5.35}/hud/cli/utils/interactive.py +0 -0
  143. {hud_python-0.5.33 → hud_python-0.5.35}/hud/cli/utils/logging.py +0 -0
  144. {hud_python-0.5.33 → hud_python-0.5.35}/hud/cli/utils/metadata.py +0 -0
  145. {hud_python-0.5.33 → hud_python-0.5.35}/hud/cli/utils/server.py +0 -0
  146. {hud_python-0.5.33 → hud_python-0.5.35}/hud/cli/utils/source_hash.py +0 -0
  147. {hud_python-0.5.33 → hud_python-0.5.35}/hud/cli/utils/tasks.py +0 -0
  148. {hud_python-0.5.33 → hud_python-0.5.35}/hud/cli/utils/tests/__init__.py +0 -0
  149. {hud_python-0.5.33 → hud_python-0.5.35}/hud/cli/utils/tests/test_config.py +0 -0
  150. {hud_python-0.5.33 → hud_python-0.5.35}/hud/cli/utils/tests/test_docker.py +0 -0
  151. {hud_python-0.5.33 → hud_python-0.5.35}/hud/cli/utils/tests/test_docker_hints.py +0 -0
  152. {hud_python-0.5.33 → hud_python-0.5.35}/hud/cli/utils/tests/test_env_check.py +0 -0
  153. {hud_python-0.5.33 → hud_python-0.5.35}/hud/cli/utils/tests/test_environment.py +0 -0
  154. {hud_python-0.5.33 → hud_python-0.5.35}/hud/cli/utils/tests/test_git.py +0 -0
  155. {hud_python-0.5.33 → hud_python-0.5.35}/hud/cli/utils/tests/test_interactive_module.py +0 -0
  156. {hud_python-0.5.33 → hud_python-0.5.35}/hud/cli/utils/tests/test_logging_utils.py +0 -0
  157. {hud_python-0.5.33 → hud_python-0.5.35}/hud/cli/utils/tests/test_metadata.py +0 -0
  158. {hud_python-0.5.33 → hud_python-0.5.35}/hud/cli/utils/tests/test_source_hash.py +0 -0
  159. {hud_python-0.5.33 → hud_python-0.5.35}/hud/cli/utils/tests/test_tasks.py +0 -0
  160. {hud_python-0.5.33 → hud_python-0.5.35}/hud/cli/utils/validation.py +0 -0
  161. {hud_python-0.5.33 → hud_python-0.5.35}/hud/cli/utils/version_check.py +0 -0
  162. {hud_python-0.5.33 → hud_python-0.5.35}/hud/cli/utils/viewer.py +0 -0
  163. {hud_python-0.5.33 → hud_python-0.5.35}/hud/datasets/__init__.py +0 -0
  164. {hud_python-0.5.33 → hud_python-0.5.35}/hud/datasets/loader.py +0 -0
  165. {hud_python-0.5.33 → hud_python-0.5.35}/hud/datasets/runner.py +0 -0
  166. {hud_python-0.5.33 → hud_python-0.5.35}/hud/datasets/tests/__init__.py +0 -0
  167. {hud_python-0.5.33 → hud_python-0.5.35}/hud/datasets/tests/test_loader.py +0 -0
  168. {hud_python-0.5.33 → hud_python-0.5.35}/hud/datasets/tests/test_utils.py +0 -0
  169. {hud_python-0.5.33 → hud_python-0.5.35}/hud/datasets/utils.py +0 -0
  170. {hud_python-0.5.33 → hud_python-0.5.35}/hud/environment/__init__.py +0 -0
  171. {hud_python-0.5.33 → hud_python-0.5.35}/hud/environment/connection.py +0 -0
  172. {hud_python-0.5.33 → hud_python-0.5.35}/hud/environment/connectors/__init__.py +0 -0
  173. {hud_python-0.5.33 → hud_python-0.5.35}/hud/environment/connectors/base.py +0 -0
  174. {hud_python-0.5.33 → hud_python-0.5.35}/hud/environment/connectors/local.py +0 -0
  175. {hud_python-0.5.33 → hud_python-0.5.35}/hud/environment/connectors/openai.py +0 -0
  176. {hud_python-0.5.33 → hud_python-0.5.35}/hud/environment/connectors/remote.py +0 -0
  177. {hud_python-0.5.33 → hud_python-0.5.35}/hud/environment/environment.py +0 -0
  178. {hud_python-0.5.33 → hud_python-0.5.35}/hud/environment/integrations/__init__.py +0 -0
  179. {hud_python-0.5.33 → hud_python-0.5.35}/hud/environment/integrations/adk.py +0 -0
  180. {hud_python-0.5.33 → hud_python-0.5.35}/hud/environment/integrations/anthropic.py +0 -0
  181. {hud_python-0.5.33 → hud_python-0.5.35}/hud/environment/integrations/gemini.py +0 -0
  182. {hud_python-0.5.33 → hud_python-0.5.35}/hud/environment/integrations/langchain.py +0 -0
  183. {hud_python-0.5.33 → hud_python-0.5.35}/hud/environment/integrations/llamaindex.py +0 -0
  184. {hud_python-0.5.33 → hud_python-0.5.35}/hud/environment/integrations/openai.py +0 -0
  185. {hud_python-0.5.33 → hud_python-0.5.35}/hud/environment/mock.py +0 -0
  186. {hud_python-0.5.33 → hud_python-0.5.35}/hud/environment/router.py +0 -0
  187. {hud_python-0.5.33 → hud_python-0.5.35}/hud/environment/scenarios.py +0 -0
  188. {hud_python-0.5.33 → hud_python-0.5.35}/hud/environment/tests/__init__.py +0 -0
  189. {hud_python-0.5.33 → hud_python-0.5.35}/hud/environment/tests/test_connection.py +0 -0
  190. {hud_python-0.5.33 → hud_python-0.5.35}/hud/environment/tests/test_environment.py +0 -0
  191. {hud_python-0.5.33 → hud_python-0.5.35}/hud/environment/tests/test_integrations.py +0 -0
  192. {hud_python-0.5.33 → hud_python-0.5.35}/hud/environment/tests/test_local_connectors.py +0 -0
  193. {hud_python-0.5.33 → hud_python-0.5.35}/hud/environment/tests/test_scenarios.py +0 -0
  194. {hud_python-0.5.33 → hud_python-0.5.35}/hud/environment/tests/test_session_id.py +0 -0
  195. {hud_python-0.5.33 → hud_python-0.5.35}/hud/environment/tests/test_tools.py +0 -0
  196. {hud_python-0.5.33 → hud_python-0.5.35}/hud/environment/types.py +0 -0
  197. {hud_python-0.5.33 → hud_python-0.5.35}/hud/environment/utils/__init__.py +0 -0
  198. {hud_python-0.5.33 → hud_python-0.5.35}/hud/environment/utils/formats.py +0 -0
  199. {hud_python-0.5.33 → hud_python-0.5.35}/hud/environment/utils/schema.py +0 -0
  200. {hud_python-0.5.33 → hud_python-0.5.35}/hud/environment/utils/tool_wrappers.py +0 -0
  201. {hud_python-0.5.33 → hud_python-0.5.35}/hud/eval/__init__.py +0 -0
  202. {hud_python-0.5.33 → hud_python-0.5.35}/hud/eval/context.py +0 -0
  203. {hud_python-0.5.33 → hud_python-0.5.35}/hud/eval/display.py +0 -0
  204. {hud_python-0.5.33 → hud_python-0.5.35}/hud/eval/instrument.py +0 -0
  205. {hud_python-0.5.33 → hud_python-0.5.35}/hud/eval/manager.py +0 -0
  206. {hud_python-0.5.33 → hud_python-0.5.35}/hud/eval/parallel.py +0 -0
  207. {hud_python-0.5.33 → hud_python-0.5.35}/hud/eval/tests/__init__.py +0 -0
  208. {hud_python-0.5.33 → hud_python-0.5.35}/hud/eval/tests/test_context.py +0 -0
  209. {hud_python-0.5.33 → hud_python-0.5.35}/hud/eval/tests/test_eval.py +0 -0
  210. {hud_python-0.5.33 → hud_python-0.5.35}/hud/eval/tests/test_manager.py +0 -0
  211. {hud_python-0.5.33 → hud_python-0.5.35}/hud/eval/tests/test_parallel.py +0 -0
  212. {hud_python-0.5.33 → hud_python-0.5.35}/hud/eval/tests/test_task.py +0 -0
  213. {hud_python-0.5.33 → hud_python-0.5.35}/hud/eval/types.py +0 -0
  214. {hud_python-0.5.33 → hud_python-0.5.35}/hud/eval/utils.py +0 -0
  215. {hud_python-0.5.33 → hud_python-0.5.35}/hud/native/chat.py +0 -0
  216. {hud_python-0.5.33 → hud_python-0.5.35}/hud/patches/__init__.py +0 -0
  217. {hud_python-0.5.33 → hud_python-0.5.35}/hud/patches/warnings.py +0 -0
  218. {hud_python-0.5.33 → hud_python-0.5.35}/hud/py.typed +0 -0
  219. {hud_python-0.5.33 → hud_python-0.5.35}/hud/server/__init__.py +0 -0
  220. {hud_python-0.5.33 → hud_python-0.5.35}/hud/server/context.py +0 -0
  221. {hud_python-0.5.33 → hud_python-0.5.35}/hud/server/helper/__init__.py +0 -0
  222. {hud_python-0.5.33 → hud_python-0.5.35}/hud/server/low_level.py +0 -0
  223. {hud_python-0.5.33 → hud_python-0.5.35}/hud/server/router.py +0 -0
  224. {hud_python-0.5.33 → hud_python-0.5.35}/hud/server/server.py +0 -0
  225. {hud_python-0.5.33 → hud_python-0.5.35}/hud/server/tests/__init__.py +0 -0
  226. {hud_python-0.5.33 → hud_python-0.5.35}/hud/server/tests/test_add_tool.py +0 -0
  227. {hud_python-0.5.33 → hud_python-0.5.35}/hud/server/tests/test_context.py +0 -0
  228. {hud_python-0.5.33 → hud_python-0.5.35}/hud/server/tests/test_mcp_server_handlers.py +0 -0
  229. {hud_python-0.5.33 → hud_python-0.5.35}/hud/server/tests/test_mcp_server_integration.py +0 -0
  230. {hud_python-0.5.33 → hud_python-0.5.35}/hud/server/tests/test_mcp_server_more.py +0 -0
  231. {hud_python-0.5.33 → hud_python-0.5.35}/hud/server/tests/test_prefix_naming.py +0 -0
  232. {hud_python-0.5.33 → hud_python-0.5.35}/hud/server/tests/test_run_wrapper.py +0 -0
  233. {hud_python-0.5.33 → hud_python-0.5.35}/hud/server/tests/test_server_extra.py +0 -0
  234. {hud_python-0.5.33 → hud_python-0.5.35}/hud/server/tests/test_sigterm_runner.py +0 -0
  235. {hud_python-0.5.33 → hud_python-0.5.35}/hud/services/__init__.py +0 -0
  236. {hud_python-0.5.33 → hud_python-0.5.35}/hud/services/chat_service.py +0 -0
  237. {hud_python-0.5.33 → hud_python-0.5.35}/hud/services/reply_metadata.py +0 -0
  238. {hud_python-0.5.33 → hud_python-0.5.35}/hud/services/tests/__init__.py +0 -0
  239. {hud_python-0.5.33 → hud_python-0.5.35}/hud/services/tests/test_chat_service.py +0 -0
  240. {hud_python-0.5.33 → hud_python-0.5.35}/hud/settings.py +0 -0
  241. {hud_python-0.5.33 → hud_python-0.5.35}/hud/shared/__init__.py +0 -0
  242. {hud_python-0.5.33 → hud_python-0.5.35}/hud/shared/exceptions.py +0 -0
  243. {hud_python-0.5.33 → hud_python-0.5.35}/hud/shared/hints.py +0 -0
  244. {hud_python-0.5.33 → hud_python-0.5.35}/hud/shared/requests.py +0 -0
  245. {hud_python-0.5.33 → hud_python-0.5.35}/hud/shared/tests/__init__.py +0 -0
  246. {hud_python-0.5.33 → hud_python-0.5.35}/hud/shared/tests/test_exceptions.py +0 -0
  247. {hud_python-0.5.33 → hud_python-0.5.35}/hud/shared/tests/test_hints.py +0 -0
  248. {hud_python-0.5.33 → hud_python-0.5.35}/hud/shared/tests/test_requests.py +0 -0
  249. {hud_python-0.5.33 → hud_python-0.5.35}/hud/telemetry/__init__.py +0 -0
  250. {hud_python-0.5.33 → hud_python-0.5.35}/hud/telemetry/exporter.py +0 -0
  251. {hud_python-0.5.33 → hud_python-0.5.35}/hud/telemetry/tests/__init__.py +0 -0
  252. {hud_python-0.5.33 → hud_python-0.5.35}/hud/telemetry/tests/test_eval_telemetry.py +0 -0
  253. {hud_python-0.5.33 → hud_python-0.5.35}/hud/telemetry/tests/test_exporter.py +0 -0
  254. {hud_python-0.5.33 → hud_python-0.5.35}/hud/telemetry/tests/test_instrument.py +0 -0
  255. {hud_python-0.5.33 → hud_python-0.5.35}/hud/tools/__init__.py +0 -0
  256. {hud_python-0.5.33 → hud_python-0.5.35}/hud/tools/agent.py +0 -0
  257. {hud_python-0.5.33 → hud_python-0.5.35}/hud/tools/base.py +0 -0
  258. {hud_python-0.5.33 → hud_python-0.5.35}/hud/tools/coding/__init__.py +0 -0
  259. {hud_python-0.5.33 → hud_python-0.5.35}/hud/tools/coding/apply_patch.py +0 -0
  260. {hud_python-0.5.33 → hud_python-0.5.35}/hud/tools/coding/bash.py +0 -0
  261. {hud_python-0.5.33 → hud_python-0.5.35}/hud/tools/coding/gemini_edit.py +0 -0
  262. {hud_python-0.5.33 → hud_python-0.5.35}/hud/tools/coding/gemini_shell.py +0 -0
  263. {hud_python-0.5.33 → hud_python-0.5.35}/hud/tools/coding/session.py +0 -0
  264. {hud_python-0.5.33 → hud_python-0.5.35}/hud/tools/coding/shell.py +0 -0
  265. {hud_python-0.5.33 → hud_python-0.5.35}/hud/tools/coding/tests/__init__.py +0 -0
  266. {hud_python-0.5.33 → hud_python-0.5.35}/hud/tools/coding/tests/test_apply_patch.py +0 -0
  267. {hud_python-0.5.33 → hud_python-0.5.35}/hud/tools/coding/tests/test_bash.py +0 -0
  268. {hud_python-0.5.33 → hud_python-0.5.35}/hud/tools/coding/tests/test_bash_extended.py +0 -0
  269. {hud_python-0.5.33 → hud_python-0.5.35}/hud/tools/coding/tests/test_bash_integration.py +0 -0
  270. {hud_python-0.5.33 → hud_python-0.5.35}/hud/tools/coding/tests/test_edit.py +0 -0
  271. {hud_python-0.5.33 → hud_python-0.5.35}/hud/tools/coding/tests/test_gemini_tools.py +0 -0
  272. {hud_python-0.5.33 → hud_python-0.5.35}/hud/tools/coding/tests/test_shell.py +0 -0
  273. {hud_python-0.5.33 → hud_python-0.5.35}/hud/tools/computer/__init__.py +0 -0
  274. {hud_python-0.5.33 → hud_python-0.5.35}/hud/tools/computer/anthropic.py +0 -0
  275. {hud_python-0.5.33 → hud_python-0.5.35}/hud/tools/computer/gemini.py +0 -0
  276. {hud_python-0.5.33 → hud_python-0.5.35}/hud/tools/computer/glm.py +0 -0
  277. {hud_python-0.5.33 → hud_python-0.5.35}/hud/tools/computer/hud.py +0 -0
  278. {hud_python-0.5.33 → hud_python-0.5.35}/hud/tools/computer/openai.py +0 -0
  279. {hud_python-0.5.33 → hud_python-0.5.35}/hud/tools/computer/qwen.py +0 -0
  280. {hud_python-0.5.33 → hud_python-0.5.35}/hud/tools/computer/settings.py +0 -0
  281. {hud_python-0.5.33 → hud_python-0.5.35}/hud/tools/computer/tests/__init__.py +0 -0
  282. {hud_python-0.5.33 → hud_python-0.5.35}/hud/tools/computer/tests/test_compression.py +0 -0
  283. {hud_python-0.5.33 → hud_python-0.5.35}/hud/tools/computer/tests/test_computer.py +0 -0
  284. {hud_python-0.5.33 → hud_python-0.5.35}/hud/tools/computer/tests/test_computer_actions.py +0 -0
  285. {hud_python-0.5.33 → hud_python-0.5.35}/hud/tools/computer/tests/test_glm_computer.py +0 -0
  286. {hud_python-0.5.33 → hud_python-0.5.35}/hud/tools/elicitation.py +0 -0
  287. {hud_python-0.5.33 → hud_python-0.5.35}/hud/tools/executors/__init__.py +0 -0
  288. {hud_python-0.5.33 → hud_python-0.5.35}/hud/tools/executors/base.py +0 -0
  289. {hud_python-0.5.33 → hud_python-0.5.35}/hud/tools/executors/pyautogui.py +0 -0
  290. {hud_python-0.5.33 → hud_python-0.5.35}/hud/tools/executors/tests/__init__.py +0 -0
  291. {hud_python-0.5.33 → hud_python-0.5.35}/hud/tools/executors/tests/test_base_executor.py +0 -0
  292. {hud_python-0.5.33 → hud_python-0.5.35}/hud/tools/executors/tests/test_pyautogui_executor.py +0 -0
  293. {hud_python-0.5.33 → hud_python-0.5.35}/hud/tools/executors/xdo.py +0 -0
  294. {hud_python-0.5.33 → hud_python-0.5.35}/hud/tools/filesystem/__init__.py +0 -0
  295. {hud_python-0.5.33 → hud_python-0.5.35}/hud/tools/filesystem/base.py +0 -0
  296. {hud_python-0.5.33 → hud_python-0.5.35}/hud/tools/filesystem/gemini.py +0 -0
  297. {hud_python-0.5.33 → hud_python-0.5.35}/hud/tools/filesystem/glob.py +0 -0
  298. {hud_python-0.5.33 → hud_python-0.5.35}/hud/tools/filesystem/grep.py +0 -0
  299. {hud_python-0.5.33 → hud_python-0.5.35}/hud/tools/filesystem/list.py +0 -0
  300. {hud_python-0.5.33 → hud_python-0.5.35}/hud/tools/filesystem/read.py +0 -0
  301. {hud_python-0.5.33 → hud_python-0.5.35}/hud/tools/filesystem/tests/__init__.py +0 -0
  302. {hud_python-0.5.33 → hud_python-0.5.35}/hud/tools/filesystem/tests/test_glob.py +0 -0
  303. {hud_python-0.5.33 → hud_python-0.5.35}/hud/tools/filesystem/tests/test_grep.py +0 -0
  304. {hud_python-0.5.33 → hud_python-0.5.35}/hud/tools/filesystem/tests/test_list.py +0 -0
  305. {hud_python-0.5.33 → hud_python-0.5.35}/hud/tools/filesystem/tests/test_read.py +0 -0
  306. {hud_python-0.5.33 → hud_python-0.5.35}/hud/tools/grounding/__init__.py +0 -0
  307. {hud_python-0.5.33 → hud_python-0.5.35}/hud/tools/grounding/config.py +0 -0
  308. {hud_python-0.5.33 → hud_python-0.5.35}/hud/tools/grounding/grounded_tool.py +0 -0
  309. {hud_python-0.5.33 → hud_python-0.5.35}/hud/tools/grounding/grounder.py +0 -0
  310. {hud_python-0.5.33 → hud_python-0.5.35}/hud/tools/grounding/tests/__init__.py +0 -0
  311. {hud_python-0.5.33 → hud_python-0.5.35}/hud/tools/grounding/tests/test_grounded_tool.py +0 -0
  312. {hud_python-0.5.33 → hud_python-0.5.35}/hud/tools/hosted/__init__.py +0 -0
  313. {hud_python-0.5.33 → hud_python-0.5.35}/hud/tools/hosted/base.py +0 -0
  314. {hud_python-0.5.33 → hud_python-0.5.35}/hud/tools/hosted/code_execution.py +0 -0
  315. {hud_python-0.5.33 → hud_python-0.5.35}/hud/tools/hosted/google_search.py +0 -0
  316. {hud_python-0.5.33 → hud_python-0.5.35}/hud/tools/hosted/tool_search.py +0 -0
  317. {hud_python-0.5.33 → hud_python-0.5.35}/hud/tools/hosted/url_context.py +0 -0
  318. {hud_python-0.5.33 → hud_python-0.5.35}/hud/tools/hosted/web_fetch.py +0 -0
  319. {hud_python-0.5.33 → hud_python-0.5.35}/hud/tools/hosted/web_search.py +0 -0
  320. {hud_python-0.5.33 → hud_python-0.5.35}/hud/tools/jupyter.py +0 -0
  321. {hud_python-0.5.33 → hud_python-0.5.35}/hud/tools/memory/__init__.py +0 -0
  322. {hud_python-0.5.33 → hud_python-0.5.35}/hud/tools/memory/tests/__init__.py +0 -0
  323. {hud_python-0.5.33 → hud_python-0.5.35}/hud/tools/memory/tests/test_claude.py +0 -0
  324. {hud_python-0.5.33 → hud_python-0.5.35}/hud/tools/memory/tests/test_gemini.py +0 -0
  325. {hud_python-0.5.33 → hud_python-0.5.35}/hud/tools/memory/tests/test_session.py +0 -0
  326. {hud_python-0.5.33 → hud_python-0.5.35}/hud/tools/native_types.py +0 -0
  327. {hud_python-0.5.33 → hud_python-0.5.35}/hud/tools/playwright.py +0 -0
  328. {hud_python-0.5.33 → hud_python-0.5.35}/hud/tools/response.py +0 -0
  329. {hud_python-0.5.33 → hud_python-0.5.35}/hud/tools/submit.py +0 -0
  330. {hud_python-0.5.33 → hud_python-0.5.35}/hud/tools/tests/__init__.py +0 -0
  331. {hud_python-0.5.33 → hud_python-0.5.35}/hud/tools/tests/test_agent_tool.py +0 -0
  332. {hud_python-0.5.33 → hud_python-0.5.35}/hud/tools/tests/test_base.py +0 -0
  333. {hud_python-0.5.33 → hud_python-0.5.35}/hud/tools/tests/test_elicitation.py +0 -0
  334. {hud_python-0.5.33 → hud_python-0.5.35}/hud/tools/tests/test_init.py +0 -0
  335. {hud_python-0.5.33 → hud_python-0.5.35}/hud/tools/tests/test_jupyter_tool.py +0 -0
  336. {hud_python-0.5.33 → hud_python-0.5.35}/hud/tools/tests/test_native_tool_e2e.py +0 -0
  337. {hud_python-0.5.33 → hud_python-0.5.35}/hud/tools/tests/test_native_types.py +0 -0
  338. {hud_python-0.5.33 → hud_python-0.5.35}/hud/tools/tests/test_playwright_tool.py +0 -0
  339. {hud_python-0.5.33 → hud_python-0.5.35}/hud/tools/tests/test_response.py +0 -0
  340. {hud_python-0.5.33 → hud_python-0.5.35}/hud/tools/tests/test_submit.py +0 -0
  341. {hud_python-0.5.33 → hud_python-0.5.35}/hud/tools/tests/test_tools.py +0 -0
  342. {hud_python-0.5.33 → hud_python-0.5.35}/hud/tools/tests/test_tools_init.py +0 -0
  343. {hud_python-0.5.33 → hud_python-0.5.35}/hud/tools/tests/test_types.py +0 -0
  344. {hud_python-0.5.33 → hud_python-0.5.35}/hud/tools/tests/test_utils.py +0 -0
  345. {hud_python-0.5.33 → hud_python-0.5.35}/hud/tools/types.py +0 -0
  346. {hud_python-0.5.33 → hud_python-0.5.35}/hud/tools/utils.py +0 -0
  347. {hud_python-0.5.33 → hud_python-0.5.35}/hud/types.py +0 -0
  348. {hud_python-0.5.33 → hud_python-0.5.35}/hud/utils/__init__.py +0 -0
  349. {hud_python-0.5.33 → hud_python-0.5.35}/hud/utils/env.py +0 -0
  350. {hud_python-0.5.33 → hud_python-0.5.35}/hud/utils/mcp.py +0 -0
  351. {hud_python-0.5.33 → hud_python-0.5.35}/hud/utils/pretty_errors.py +0 -0
  352. {hud_python-0.5.33 → hud_python-0.5.35}/hud/utils/strict_schema.py +0 -0
  353. {hud_python-0.5.33 → hud_python-0.5.35}/hud/utils/tests/__init__.py +0 -0
  354. {hud_python-0.5.33 → hud_python-0.5.35}/hud/utils/tests/test_init.py +0 -0
  355. {hud_python-0.5.33 → hud_python-0.5.35}/hud/utils/tests/test_pretty_errors.py +0 -0
  356. {hud_python-0.5.33 → hud_python-0.5.35}/hud/utils/tests/test_tool_shorthand.py +0 -0
  357. {hud_python-0.5.33 → hud_python-0.5.35}/hud/utils/tool_shorthand.py +0 -0
  358. {hud_python-0.5.33 → hud_python-0.5.35}/hud/utils/types.py +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: hud-python
3
- Version: 0.5.33
3
+ Version: 0.5.35
4
4
  Summary: SDK for the HUD platform.
5
5
  Project-URL: Homepage, https://github.com/hud-evals/hud-python
6
6
  Project-URL: Bug Tracker, https://github.com/hud-evals/hud-python/issues
@@ -101,7 +101,7 @@ Description-Content-Type: text/markdown
101
101
  </picture>
102
102
  </div>
103
103
 
104
- The HUD SDK is an open-source Python toolkit for building, evaluating, and training AI agents. Use a unified API for any model provider, wrap your code as MCP environments, run A/B evals at scale, and train with reinforcement learning.
104
+ HUD is a platform for building RL environments for AI agents. Define agent-callable tools, write evaluation scenarios, run evals at scale, and train models on the results.
105
105
 
106
106
  To learn more, check out our [Documentation](https://docs.hud.ai) and [API Reference](https://docs.hud.ai/reference).
107
107
 
@@ -110,15 +110,14 @@ To learn more, check out our [Documentation](https://docs.hud.ai) and [API Refer
110
110
  [![Add docs to Cursor](https://img.shields.io/badge/Add%20docs%20to-Cursor-black?style=flat-square)](https://cursor.com/en/install-mcp?name=docs-hud-python&config=eyJ1cmwiOiJodHRwczovL2RvY3MuaHVkLmFpL21jcCJ9)
111
111
  [![Discord](https://img.shields.io/discord/1327447144772407390?label=Discord&logo=discord&style=flat-square)](https://discord.gg/wkjtmHYYjm)
112
112
  [![X Follow](https://img.shields.io/twitter/follow/hud_evals?style=social)](https://x.com/intent/user?screen_name=hud_evals)
113
- [![Shop](https://img.shields.io/badge/_-white.svg?label=shop&logo=data:image/png;base64,iVBORw0KGgoAAAANSUhEUgAAABQAAAAJCAYAAAAywQxIAAAAAXNSR0IArs4c6QAAAARnQU1BAACxjwv8YQUAAAAJcEhZcwAACxMAAAsTAQCanBgAAAF6SURBVChTlZA9ixNhFIWf8yaTpFHRRMXCKpAZhCAYFvwoLHZhwUKw9A9YCJb+Bq0sxGbBQrTxX1j41dvIRAjGZbdwRUUGIzPMeyw2swS3WZ/ynHvP5VylafoAWAd+5Xm+wX+SpukmcMf29RDCZrD9BViz3f53+CjYngKZpD5A2/Y7SQBMJpOkKIprdV1vdzqdHzHGblmW9Ww2+5pl2TmAxWKxmM/nP8fj8cmqqtZijJ9sb0u6ABBWjh0riuIt8CqE8LGu66e2d5MkeQ8QY3xme7fb7T4ZjUbrZVl+jjFuSXoEXGxCDgIl9WzfAO5LSmzvNB771R6vzG4Bx0MIt/M8vwV8aLyDQNt70+n0G1AspaTxVln+aghQluVsKbvxVysflT9NQK/XO7R/SGiQ9Nt2aftElmWXJd1kv0kbeANQVdWl4XB4XtJouXaqNRgMHkrqS+r0+/3XwD1JXdungRfAVWBi+6WkK8D3EMJz22cl3W21WgNgx3YAzvwFd0Chdq03gKUAAAAASUVORK5CYII=&style=social)](https://shop.hud.ai)
114
113
  [![Scarf](https://static.scarf.sh/a.png?x-pxid=6530ff33-4945-452b-81f9-626872593933)](https://scarf.sh)
115
114
  [![Docs](https://img.shields.io/badge/docs-hud.ai-blue?style=flat-square)](https://docs.hud.ai)
116
115
 
117
116
  ## Install
118
117
 
119
118
  ```bash
120
- pip install hud-python
121
- ```
119
+ # Install CLI (recommended)
120
+ uv tool install hud-python --python 3.12
122
121
 
123
122
  Get your API key at [hud.ai](https://hud.ai) and set it:
124
123
 
@@ -126,65 +125,88 @@ Get your API key at [hud.ai](https://hud.ai) and set it:
126
125
  export HUD_API_KEY=your-key-here
127
126
  ```
128
127
 
129
- > For CLI tools (`hud init`, `hud dev`, etc.): `uv tool install hud-python --python 3.12`
128
+ Get your API key at [hud.ai/project/api-keys](https://hud.ai/project/api-keys).
129
+
130
+ > Or install as a library: `pip install hud-python`
130
131
 
131
132
  ![Agent running on SheetBench](https://raw.githubusercontent.com/hud-evals/hud-python/main/docs/src/images/trace_sheet.gif)
132
133
 
133
- ## Usage
134
+ ## Environments
134
135
 
135
- ### Unified Model API
136
+ An environment is the harness an agent operates in. It packages tools (functions agents can call) and scenarios (how agents are evaluated) into a single deployable unit. Each environment spins up fresh and isolated for every evaluation.
136
137
 
137
- Use Claude, GPT, Gemini, or Grok through one OpenAI-compatible endpoint:
138
+ ```python
139
+ from hud import Environment
140
+
141
+ env = Environment("my-env")
142
+
143
+ @env.scenario("count")
144
+ async def count(word: str, letter: str):
145
+ # PROMPT — send a question to the agent.
146
+ # The agent runs its reasoning loop and returns an answer.
147
+ answer = yield f"How many '{letter}' in '{word}'?"
148
+
149
+ # SCORE — check the agent's answer against the correct count.
150
+ # Return a reward: 1.0 for correct, 0.0 for wrong.
151
+ correct = str(word.lower().count(letter.lower()))
152
+ yield 1.0 if answer and correct in answer else 0.0
153
+ ```
154
+
155
+ A scenario has two yields. The first sends a prompt — the agent runs between the yields, calling tools and reasoning. The second checks the result and returns a reward (0.0 to 1.0). → [Core Concepts](https://docs.hud.ai/concepts)
156
+
157
+ ## Run an Agent
138
158
 
139
159
  ```python
140
- from openai import AsyncOpenAI
141
- import os
160
+ import hud
161
+ from hud.agents import create_agent
142
162
 
143
- client = AsyncOpenAI(
144
- base_url="https://inference.hud.ai",
145
- api_key=os.environ["HUD_API_KEY"]
146
- )
163
+ task = env("count", word="strawberry", letter="r")
164
+ agent = create_agent("claude-sonnet-4-5")
147
165
 
148
- response = await client.chat.completions.create(
149
- model="claude-sonnet-4-5", # or gpt-4o, gemini-2.5-pro (https://hud.ai/models)
150
- messages=[{"role": "user", "content": "Hello!"}]
151
- )
166
+ async with hud.eval(task) as ctx:
167
+ result = await agent.run(ctx)
168
+
169
+ print(f"Reward: {result.reward}") # 1.0 if agent answers "3"
152
170
  ```
153
171
 
154
- Every call is traced at [hud.ai](https://hud.ai). → [Docs](https://docs.hud.ai/quick-links/models)
172
+ `create_agent()` picks the right agent class and native tools for each model. → [Environments](https://docs.hud.ai/quick-links/environments)
155
173
 
156
- ### Environments
174
+ ## Workflow
157
175
 
158
- Turn your code into tools agents can call. Define how to evaluate them:
176
+ ```bash
177
+ hud init my-env # Scaffold environment
178
+ cd my-env
179
+ hud dev env:env -w env.py # Run locally with hot-reload
180
+ hud eval tasks.py claude # Run evals locally
181
+ hud deploy # Deploy to platform
182
+ hud sync tasks my-taskset # Sync tasks to platform
183
+ ```
159
184
 
160
- ```python
161
- from hud import Environment
185
+ Once deployed, run evals at scale from the CLI or the [platform UI](https://hud.ai):
162
186
 
163
- env = Environment("my-env")
187
+ ```bash
188
+ hud eval my-taskset claude --remote --full
189
+ ```
164
190
 
165
- @env.tool()
166
- def add(a: int, b: int) -> int:
167
- """Add two numbers."""
168
- return a + b
191
+ → [Deploy](https://docs.hud.ai/quick-links/deploy) · [Testing & Evaluation](https://docs.hud.ai/advanced/testing-environments)
169
192
 
170
- @env.scenario("solve-math")
171
- async def solve_math(problem: str, answer: int):
172
- response = yield problem # Prompt
173
- yield 1.0 if str(answer) in response else 0.0 # Reward
193
+ ## Pre-built Tools
174
194
 
175
- async with env("solve-math", problem="What is 2+2?", answer=4) as ctx:
176
- # Your agent logic here - call tools, get response
177
- result = await ctx.call_tool("add", a=2, b=2)
178
- await ctx.submit(f"The answer is {result}")
195
+ HUD ships tools for computer control, shell execution, file editing, browser automation, and web search. Add them to any environment:
179
196
 
180
- print(ctx.reward) # 1.0
197
+ ```python
198
+ from hud.tools import AnthropicComputerTool, BashTool, EditTool
199
+
200
+ env.add_tool(AnthropicComputerTool()) # Mouse, keyboard, screenshots
201
+ env.add_tool(BashTool()) # Persistent bash shell
202
+ env.add_tool(EditTool()) # File viewing and editing
181
203
  ```
182
204
 
183
- The agent runs between the yields. First yield sends the prompt, second yield scores the result. → [Docs](https://docs.hud.ai/quick-links/environments) · [Templates](https://hud.ai/environments)
205
+ HUD adapts each tool to the model's native format Claude gets `computer_20250124`, OpenAI gets `computer_use_preview`, Gemini gets `ComputerUse`. → [Tools Reference](https://docs.hud.ai/tools/computer)
184
206
 
185
- ### A/B Evals
207
+ ## Model Gateway
186
208
 
187
- Test different models. Repeat runs to see the distribution:
209
+ Use Claude, GPT, Gemini, or Grok through one OpenAI-compatible endpoint:
188
210
 
189
211
  ```python
190
212
  from openai import AsyncOpenAI
@@ -195,31 +217,13 @@ client = AsyncOpenAI(
195
217
  api_key=os.environ["HUD_API_KEY"]
196
218
  )
197
219
 
198
- # Using the env from above
199
- async with env("solve-math", problem="What is 2+2?", answer=4, variants={"model": ["gpt-4o", "claude-sonnet-4-5"]}, group=5) as ctx:
200
- response = await client.chat.completions.create(
201
- model=ctx.variants["model"],
202
- messages=[{"role": "user", "content": ctx.prompt}],
203
- tools=ctx.tools # Environment tools available to the model
204
- )
205
- await ctx.submit(response.choices[0].message.content)
206
- ```
207
-
208
- **Variants** test configurations. **Groups** repeat for distribution. Results stream to [hud.ai](https://hud.ai). → [Docs](https://docs.hud.ai/quick-links/evals)
209
-
210
- ### Deploy & Train
211
-
212
- Push to GitHub, connect on hud.ai, run at scale:
213
-
214
- ```bash
215
- hud init # Scaffold environment
216
- git push # Push to GitHub
217
- # Connect on hud.ai → New → Environment
218
- hud eval my-eval --model gpt-4o --group-size 100
219
- # Or create and run tasks on the platform
220
+ response = await client.chat.completions.create(
221
+ model="claude-sonnet-4-5", # or gpt-4o, gemini-2.5-pro (https://hud.ai/models)
222
+ messages=[{"role": "user", "content": "Hello!"}]
223
+ )
220
224
  ```
221
225
 
222
- Every run generates training data. Use it to fine-tune or run RL. → [Docs](https://docs.hud.ai/quick-links/deploy)
226
+ Every call is traced at [hud.ai](https://hud.ai). → [Models](https://docs.hud.ai/quick-links/models)
223
227
 
224
228
  ## Links
225
229
 
@@ -6,7 +6,7 @@
6
6
  </picture>
7
7
  </div>
8
8
 
9
- The HUD SDK is an open-source Python toolkit for building, evaluating, and training AI agents. Use a unified API for any model provider, wrap your code as MCP environments, run A/B evals at scale, and train with reinforcement learning.
9
+ HUD is a platform for building RL environments for AI agents. Define agent-callable tools, write evaluation scenarios, run evals at scale, and train models on the results.
10
10
 
11
11
  To learn more, check out our [Documentation](https://docs.hud.ai) and [API Reference](https://docs.hud.ai/reference).
12
12
 
@@ -15,15 +15,14 @@ To learn more, check out our [Documentation](https://docs.hud.ai) and [API Refer
15
15
  [![Add docs to Cursor](https://img.shields.io/badge/Add%20docs%20to-Cursor-black?style=flat-square)](https://cursor.com/en/install-mcp?name=docs-hud-python&config=eyJ1cmwiOiJodHRwczovL2RvY3MuaHVkLmFpL21jcCJ9)
16
16
  [![Discord](https://img.shields.io/discord/1327447144772407390?label=Discord&logo=discord&style=flat-square)](https://discord.gg/wkjtmHYYjm)
17
17
  [![X Follow](https://img.shields.io/twitter/follow/hud_evals?style=social)](https://x.com/intent/user?screen_name=hud_evals)
18
- [![Shop](https://img.shields.io/badge/_-white.svg?label=shop&logo=data:image/png;base64,iVBORw0KGgoAAAANSUhEUgAAABQAAAAJCAYAAAAywQxIAAAAAXNSR0IArs4c6QAAAARnQU1BAACxjwv8YQUAAAAJcEhZcwAACxMAAAsTAQCanBgAAAF6SURBVChTlZA9ixNhFIWf8yaTpFHRRMXCKpAZhCAYFvwoLHZhwUKw9A9YCJb+Bq0sxGbBQrTxX1j41dvIRAjGZbdwRUUGIzPMeyw2swS3WZ/ynHvP5VylafoAWAd+5Xm+wX+SpukmcMf29RDCZrD9BViz3f53+CjYngKZpD5A2/Y7SQBMJpOkKIprdV1vdzqdHzHGblmW9Ww2+5pl2TmAxWKxmM/nP8fj8cmqqtZijJ9sb0u6ABBWjh0riuIt8CqE8LGu66e2d5MkeQ8QY3xme7fb7T4ZjUbrZVl+jjFuSXoEXGxCDgIl9WzfAO5LSmzvNB771R6vzG4Bx0MIt/M8vwV8aLyDQNt70+n0G1AspaTxVln+aghQluVsKbvxVysflT9NQK/XO7R/SGiQ9Nt2aftElmWXJd1kv0kbeANQVdWl4XB4XtJouXaqNRgMHkrqS+r0+/3XwD1JXdungRfAVWBi+6WkK8D3EMJz22cl3W21WgNgx3YAzvwFd0Chdq03gKUAAAAASUVORK5CYII=&style=social)](https://shop.hud.ai)
19
18
  [![Scarf](https://static.scarf.sh/a.png?x-pxid=6530ff33-4945-452b-81f9-626872593933)](https://scarf.sh)
20
19
  [![Docs](https://img.shields.io/badge/docs-hud.ai-blue?style=flat-square)](https://docs.hud.ai)
21
20
 
22
21
  ## Install
23
22
 
24
23
  ```bash
25
- pip install hud-python
26
- ```
24
+ # Install CLI (recommended)
25
+ uv tool install hud-python --python 3.12
27
26
 
28
27
  Get your API key at [hud.ai](https://hud.ai) and set it:
29
28
 
@@ -31,65 +30,88 @@ Get your API key at [hud.ai](https://hud.ai) and set it:
31
30
  export HUD_API_KEY=your-key-here
32
31
  ```
33
32
 
34
- > For CLI tools (`hud init`, `hud dev`, etc.): `uv tool install hud-python --python 3.12`
33
+ Get your API key at [hud.ai/project/api-keys](https://hud.ai/project/api-keys).
34
+
35
+ > Or install as a library: `pip install hud-python`
35
36
 
36
37
  ![Agent running on SheetBench](https://raw.githubusercontent.com/hud-evals/hud-python/main/docs/src/images/trace_sheet.gif)
37
38
 
38
- ## Usage
39
+ ## Environments
39
40
 
40
- ### Unified Model API
41
+ An environment is the harness an agent operates in. It packages tools (functions agents can call) and scenarios (how agents are evaluated) into a single deployable unit. Each environment spins up fresh and isolated for every evaluation.
41
42
 
42
- Use Claude, GPT, Gemini, or Grok through one OpenAI-compatible endpoint:
43
+ ```python
44
+ from hud import Environment
45
+
46
+ env = Environment("my-env")
47
+
48
+ @env.scenario("count")
49
+ async def count(word: str, letter: str):
50
+ # PROMPT — send a question to the agent.
51
+ # The agent runs its reasoning loop and returns an answer.
52
+ answer = yield f"How many '{letter}' in '{word}'?"
53
+
54
+ # SCORE — check the agent's answer against the correct count.
55
+ # Return a reward: 1.0 for correct, 0.0 for wrong.
56
+ correct = str(word.lower().count(letter.lower()))
57
+ yield 1.0 if answer and correct in answer else 0.0
58
+ ```
59
+
60
+ A scenario has two yields. The first sends a prompt — the agent runs between the yields, calling tools and reasoning. The second checks the result and returns a reward (0.0 to 1.0). → [Core Concepts](https://docs.hud.ai/concepts)
61
+
62
+ ## Run an Agent
43
63
 
44
64
  ```python
45
- from openai import AsyncOpenAI
46
- import os
65
+ import hud
66
+ from hud.agents import create_agent
47
67
 
48
- client = AsyncOpenAI(
49
- base_url="https://inference.hud.ai",
50
- api_key=os.environ["HUD_API_KEY"]
51
- )
68
+ task = env("count", word="strawberry", letter="r")
69
+ agent = create_agent("claude-sonnet-4-5")
52
70
 
53
- response = await client.chat.completions.create(
54
- model="claude-sonnet-4-5", # or gpt-4o, gemini-2.5-pro (https://hud.ai/models)
55
- messages=[{"role": "user", "content": "Hello!"}]
56
- )
71
+ async with hud.eval(task) as ctx:
72
+ result = await agent.run(ctx)
73
+
74
+ print(f"Reward: {result.reward}") # 1.0 if agent answers "3"
57
75
  ```
58
76
 
59
- Every call is traced at [hud.ai](https://hud.ai). → [Docs](https://docs.hud.ai/quick-links/models)
77
+ `create_agent()` picks the right agent class and native tools for each model. → [Environments](https://docs.hud.ai/quick-links/environments)
60
78
 
61
- ### Environments
79
+ ## Workflow
62
80
 
63
- Turn your code into tools agents can call. Define how to evaluate them:
81
+ ```bash
82
+ hud init my-env # Scaffold environment
83
+ cd my-env
84
+ hud dev env:env -w env.py # Run locally with hot-reload
85
+ hud eval tasks.py claude # Run evals locally
86
+ hud deploy # Deploy to platform
87
+ hud sync tasks my-taskset # Sync tasks to platform
88
+ ```
64
89
 
65
- ```python
66
- from hud import Environment
90
+ Once deployed, run evals at scale from the CLI or the [platform UI](https://hud.ai):
67
91
 
68
- env = Environment("my-env")
92
+ ```bash
93
+ hud eval my-taskset claude --remote --full
94
+ ```
69
95
 
70
- @env.tool()
71
- def add(a: int, b: int) -> int:
72
- """Add two numbers."""
73
- return a + b
96
+ → [Deploy](https://docs.hud.ai/quick-links/deploy) · [Testing & Evaluation](https://docs.hud.ai/advanced/testing-environments)
74
97
 
75
- @env.scenario("solve-math")
76
- async def solve_math(problem: str, answer: int):
77
- response = yield problem # Prompt
78
- yield 1.0 if str(answer) in response else 0.0 # Reward
98
+ ## Pre-built Tools
79
99
 
80
- async with env("solve-math", problem="What is 2+2?", answer=4) as ctx:
81
- # Your agent logic here - call tools, get response
82
- result = await ctx.call_tool("add", a=2, b=2)
83
- await ctx.submit(f"The answer is {result}")
100
+ HUD ships tools for computer control, shell execution, file editing, browser automation, and web search. Add them to any environment:
84
101
 
85
- print(ctx.reward) # 1.0
102
+ ```python
103
+ from hud.tools import AnthropicComputerTool, BashTool, EditTool
104
+
105
+ env.add_tool(AnthropicComputerTool()) # Mouse, keyboard, screenshots
106
+ env.add_tool(BashTool()) # Persistent bash shell
107
+ env.add_tool(EditTool()) # File viewing and editing
86
108
  ```
87
109
 
88
- The agent runs between the yields. First yield sends the prompt, second yield scores the result. → [Docs](https://docs.hud.ai/quick-links/environments) · [Templates](https://hud.ai/environments)
110
+ HUD adapts each tool to the model's native format Claude gets `computer_20250124`, OpenAI gets `computer_use_preview`, Gemini gets `ComputerUse`. → [Tools Reference](https://docs.hud.ai/tools/computer)
89
111
 
90
- ### A/B Evals
112
+ ## Model Gateway
91
113
 
92
- Test different models. Repeat runs to see the distribution:
114
+ Use Claude, GPT, Gemini, or Grok through one OpenAI-compatible endpoint:
93
115
 
94
116
  ```python
95
117
  from openai import AsyncOpenAI
@@ -100,31 +122,13 @@ client = AsyncOpenAI(
100
122
  api_key=os.environ["HUD_API_KEY"]
101
123
  )
102
124
 
103
- # Using the env from above
104
- async with env("solve-math", problem="What is 2+2?", answer=4, variants={"model": ["gpt-4o", "claude-sonnet-4-5"]}, group=5) as ctx:
105
- response = await client.chat.completions.create(
106
- model=ctx.variants["model"],
107
- messages=[{"role": "user", "content": ctx.prompt}],
108
- tools=ctx.tools # Environment tools available to the model
109
- )
110
- await ctx.submit(response.choices[0].message.content)
111
- ```
112
-
113
- **Variants** test configurations. **Groups** repeat for distribution. Results stream to [hud.ai](https://hud.ai). → [Docs](https://docs.hud.ai/quick-links/evals)
114
-
115
- ### Deploy & Train
116
-
117
- Push to GitHub, connect on hud.ai, run at scale:
118
-
119
- ```bash
120
- hud init # Scaffold environment
121
- git push # Push to GitHub
122
- # Connect on hud.ai → New → Environment
123
- hud eval my-eval --model gpt-4o --group-size 100
124
- # Or create and run tasks on the platform
125
+ response = await client.chat.completions.create(
126
+ model="claude-sonnet-4-5", # or gpt-4o, gemini-2.5-pro (https://hud.ai/models)
127
+ messages=[{"role": "user", "content": "Hello!"}]
128
+ )
125
129
  ```
126
130
 
127
- Every run generates training data. Use it to fine-tune or run RL. → [Docs](https://docs.hud.ai/quick-links/deploy)
131
+ Every call is traced at [hud.ai](https://hud.ai). → [Models](https://docs.hud.ai/quick-links/models)
128
132
 
129
133
  ## Links
130
134
 
@@ -336,12 +336,15 @@ class MCPAgent(ABC):
336
336
  f"Available tools: {sorted(available_tool_names)}"
337
337
  )
338
338
 
339
- self.console.debug(
340
- f"Discovered {len(self._available_tools)} tools from environment: "
341
- f"{', '.join([t.name for t in self._available_tools])}"
339
+ self._categorized_tools = self.categorize_tools()
340
+
341
+ # Show tool discovery table (visible at INFO level)
342
+ self.console.format_tool_discovery(
343
+ tools=self._available_tools,
344
+ native_tools=self._categorized_tools.native + self._categorized_tools.hosted,
345
+ skipped=self._categorized_tools.skipped,
342
346
  )
343
347
 
344
- self._categorized_tools = self.categorize_tools()
345
348
  for tool, reason in self._categorized_tools.skipped:
346
349
  logger.debug("Skipping tool %s: %s", tool.name, reason)
347
350
 
@@ -574,17 +577,13 @@ class MCPAgent(ABC):
574
577
  tool_messages = await self.format_tool_results(tool_calls, tool_results)
575
578
  messages.extend(tool_messages)
576
579
 
577
- # Compact step completion display
578
- step_info = f"\n[bold]Step {step_count}"
579
- if max_steps != -1:
580
- step_info += f"/{max_steps}"
581
- step_info += "[/bold]"
582
-
583
- # Show tool calls and results in compact format
584
- for call, result in zip(tool_calls, tool_results, strict=False):
585
- step_info += f"\n{call}\n{result}"
586
-
587
- self.console.info_log(step_info)
580
+ if logger.isEnabledFor(logging.INFO):
581
+ self.console.format_step(
582
+ step=step_count,
583
+ max_steps=max_steps,
584
+ tool_calls=tool_calls,
585
+ tool_results=tool_results,
586
+ )
588
587
 
589
588
  except Exception as e:
590
589
  self.console.error_log(f"Step failed: {e}")
@@ -145,8 +145,12 @@ class ClaudeAgent(MCPAgent):
145
145
  model_client = AsyncAnthropic(api_key=settings.anthropic_api_key)
146
146
  else:
147
147
  raise ValueError(
148
- "No API key found. Set HUD_API_KEY for HUD gateway, "
149
- "or ANTHROPIC_API_KEY for direct Anthropic access."
148
+ "No API key found for Claude.\n"
149
+ " Set HUD_API_KEY to use HUD Gateway"
150
+ " (add your Anthropic key at"
151
+ " hud.ai/project/secrets for BYOK)\n"
152
+ " • Or set ANTHROPIC_API_KEY for direct"
153
+ " access"
150
154
  )
151
155
 
152
156
  self.anthropic_client: AsyncAnthropic | AsyncAnthropicBedrock = model_client
@@ -94,8 +94,12 @@ class GeminiAgent(MCPAgent):
94
94
  raise ValueError(f"Gemini API key is invalid: {e}") from e
95
95
  else:
96
96
  raise ValueError(
97
- "No API key found. Set HUD_API_KEY for HUD gateway, "
98
- "or GEMINI_API_KEY for direct Gemini access."
97
+ "No API key found for Gemini.\n"
98
+ " Set HUD_API_KEY to use HUD Gateway"
99
+ " (add your Gemini key at"
100
+ " hud.ai/project/secrets for BYOK)\n"
101
+ " • Or set GEMINI_API_KEY for direct"
102
+ " access"
99
103
  )
100
104
 
101
105
  self.gemini_client: genai.Client = model_client
@@ -119,8 +119,12 @@ class OpenAIAgent(MCPAgent):
119
119
  raise ValueError(f"OpenAI API key is invalid: {exc}") from exc
120
120
  else:
121
121
  raise ValueError(
122
- "No API key found. Set HUD_API_KEY for HUD gateway, "
123
- "or OPENAI_API_KEY for direct OpenAI access."
122
+ "No API key found for OpenAI.\n"
123
+ " Set HUD_API_KEY to use HUD Gateway"
124
+ " (add your OpenAI key at"
125
+ " hud.ai/project/secrets for BYOK)\n"
126
+ " • Or set OPENAI_API_KEY for direct"
127
+ " access"
124
128
  )
125
129
 
126
130
  self.openai_client: AsyncOpenAI = model_client
@@ -485,10 +489,9 @@ class OpenAIAgent(MCPAgent):
485
489
  type="computer_screenshot",
486
490
  image_url=f"data:image/png;base64,{screenshot}",
487
491
  ),
488
- acknowledged_safety_checks=(
489
- acknowledged_checks if acknowledged_checks else None
490
- ),
491
492
  )
493
+ if acknowledged_checks:
494
+ output_payload["acknowledged_safety_checks"] = acknowledged_checks
492
495
  computer_outputs.append(output_payload)
493
496
  self.pending_call_id = None
494
497
  self.pending_safety_checks = []
@@ -11,7 +11,7 @@ from rich.panel import Panel
11
11
  # Create the main Typer app
12
12
  app = typer.Typer(
13
13
  name="hud",
14
- help="🚀 HUD CLI - build, test, and deploy RL environments",
14
+ help="HUD CLI - build, test, and deploy evaluation environments",
15
15
  add_completion=False,
16
16
  rich_markup_mode="rich",
17
17
  pretty_exceptions_enable=False,
@@ -40,8 +40,9 @@ from .init import init_command # noqa: E402
40
40
  from .link import link_command # noqa: E402
41
41
  from .models import models_command # noqa: E402
42
42
  from .push import push_command # noqa: E402
43
- from .rft import rft_run_command # noqa: E402
44
- from .rft_status import rft_status_typer_command # noqa: E402
43
+ from .rl import rl_run_command, rl_status_command # noqa: E402
44
+ from .scenario import scenario_app # noqa: E402
45
+ from .sync import sync_app # noqa: E402
45
46
 
46
47
  _EXTRA_ARGS = {"allow_extra_args": True, "ignore_unknown_options": True}
47
48
 
@@ -50,7 +51,7 @@ app.command(name="debug", context_settings=_EXTRA_ARGS)(debug_command)
50
51
  app.command(name="dev", context_settings=_EXTRA_ARGS)(dev_command)
51
52
  app.command(name="build", context_settings=_EXTRA_ARGS)(build_command)
52
53
  app.command(name="deploy")(deploy_command)
53
- app.command(name="link")(link_command)
54
+ app.command(name="link", hidden=True)(link_command)
54
55
  app.command(name="eval")(eval_command)
55
56
  app.command(name="push", hidden=True)(push_command)
56
57
  app.command(name="init")(init_command)
@@ -108,11 +109,17 @@ def version() -> None:
108
109
  console.print("HUD CLI version: [cyan]unknown[/cyan]")
109
110
 
110
111
 
111
- # RFT subcommand group
112
- rft_app = typer.Typer(help="🚀 Reinforcement Fine-Tuning (RFT) commands")
113
- rft_app.command("run")(rft_run_command)
114
- rft_app.command("status")(rft_status_typer_command)
115
- app.add_typer(rft_app, name="rft")
112
+ # Scenario subcommand group
113
+ app.add_typer(scenario_app, name="scenario")
114
+
115
+ # Sync subcommand group
116
+ app.add_typer(sync_app, name="sync")
117
+
118
+ # RL subcommand group
119
+ rl_app = typer.Typer(help="🚀 RL training commands\n\nExample: hud rl run my-taskset -m <model-id>")
120
+ rl_app.command("run")(rl_run_command)
121
+ rl_app.command("status")(rl_status_command)
122
+ app.add_typer(rl_app, name="rl")
116
123
 
117
124
 
118
125
  # ---------------------------------------------------------------------------
@@ -140,7 +147,7 @@ def main() -> None:
140
147
  if len(sys.argv) == 1 or (len(sys.argv) == 2 and sys.argv[1] in ["--help", "-h"]):
141
148
  console.print(
142
149
  Panel.fit(
143
- "[bold cyan]🚀 HUD CLI[/bold cyan]\nBuild, test, and deploy RL environments",
150
+ "[bold cyan]HUD CLI[/bold cyan]\nBuild, test, and deploy environments",
144
151
  border_style="cyan",
145
152
  )
146
153
  )
@@ -150,10 +157,8 @@ def main() -> None:
150
157
  )
151
158
  console.print(" 2. Start dev server: [cyan]hud dev[/cyan]")
152
159
  console.print(" 3. Deploy to HUD platform: [cyan]hud deploy[/cyan]")
153
- console.print(" 4. Run evaluations: [cyan]hud eval tasks.jsonl[/cyan]")
154
- console.print("\n[yellow]Training:[/yellow]")
155
- console.print(" [cyan]hud rft run tasks.jsonl[/cyan] Launch an RFT training job")
156
- console.print(" [cyan]hud rft status <model-id>[/cyan] Check training status\n")
160
+ console.print(" 4. Sync tasks: [cyan]hud sync tasks my-taskset[/cyan]")
161
+ console.print(" 5. Run evaluations: [cyan]hud eval tasks.py claude[/cyan]\n")
157
162
 
158
163
  app()
159
164
  except typer.Exit as e: