hud-python 0.4.66__tar.gz → 0.4.68__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (332) hide show
  1. {hud_python-0.4.66 → hud_python-0.4.68}/.gitignore +2 -1
  2. {hud_python-0.4.66 → hud_python-0.4.68}/PKG-INFO +11 -35
  3. {hud_python-0.4.66 → hud_python-0.4.68}/README.md +5 -5
  4. {hud_python-0.4.66 → hud_python-0.4.68}/environments/README.md +1 -1
  5. {hud_python-0.4.66 → hud_python-0.4.68}/environments/browser/pyproject.toml +1 -1
  6. {hud_python-0.4.66 → hud_python-0.4.68}/environments/online_mind2web/pyproject.toml +1 -1
  7. {hud_python-0.4.66 → hud_python-0.4.68}/hud/agents/__init__.py +5 -3
  8. {hud_python-0.4.66 → hud_python-0.4.68}/hud/agents/base.py +105 -98
  9. {hud_python-0.4.66 → hud_python-0.4.68}/hud/agents/claude.py +76 -71
  10. {hud_python-0.4.66 → hud_python-0.4.68}/hud/agents/gemini.py +42 -43
  11. {hud_python-0.4.66 → hud_python-0.4.68}/hud/agents/grounded_openai.py +66 -67
  12. {hud_python-0.4.66 → hud_python-0.4.68}/hud/agents/misc/integration_test_agent.py +12 -4
  13. hud_python-0.4.68/hud/agents/misc/response_agent.py +101 -0
  14. hud_python-0.4.68/hud/agents/openai.py +362 -0
  15. hud_python-0.4.66/hud/agents/openai_chat_generic.py → hud_python-0.4.68/hud/agents/openai_chat.py +47 -32
  16. hud_python-0.4.68/hud/agents/operator.py +211 -0
  17. hud_python-0.4.68/hud/agents/tests/conftest.py +124 -0
  18. {hud_python-0.4.66 → hud_python-0.4.68}/hud/agents/tests/test_base.py +60 -64
  19. {hud_python-0.4.66 → hud_python-0.4.68}/hud/agents/tests/test_base_runtime.py +48 -35
  20. {hud_python-0.4.66 → hud_python-0.4.68}/hud/agents/tests/test_claude.py +22 -34
  21. {hud_python-0.4.66 → hud_python-0.4.68}/hud/agents/tests/test_gemini.py +46 -63
  22. {hud_python-0.4.66 → hud_python-0.4.68}/hud/agents/tests/test_grounded_openai_agent.py +10 -48
  23. hud_python-0.4.68/hud/agents/tests/test_openai.py +1083 -0
  24. hud_python-0.4.68/hud/agents/tests/test_operator.py +308 -0
  25. {hud_python-0.4.66 → hud_python-0.4.68}/hud/cli/__init__.py +118 -244
  26. hud_python-0.4.68/hud/cli/eval.py +741 -0
  27. {hud_python-0.4.66 → hud_python-0.4.68}/hud/cli/tests/test_cli_init.py +6 -1
  28. {hud_python-0.4.66 → hud_python-0.4.68}/hud/cli/tests/test_eval.py +17 -156
  29. {hud_python-0.4.66 → hud_python-0.4.68}/hud/cli/utils/tests/test_tasks.py +5 -5
  30. {hud_python-0.4.66 → hud_python-0.4.68}/hud/clients/README.md +12 -11
  31. hud_python-0.4.68/hud/datasets/__init__.py +33 -0
  32. hud_python-0.4.68/hud/datasets/runner.py +295 -0
  33. hud_python-0.4.68/hud/datasets/tests/test_utils.py +319 -0
  34. hud_python-0.4.68/hud/datasets/utils.py +411 -0
  35. {hud_python-0.4.66 → hud_python-0.4.68}/hud/rl/actor.py +3 -3
  36. {hud_python-0.4.66 → hud_python-0.4.68}/hud/server/server.py +17 -30
  37. {hud_python-0.4.66 → hud_python-0.4.68}/hud/settings.py +6 -0
  38. {hud_python-0.4.66 → hud_python-0.4.68}/hud/telemetry/tests/test_job.py +0 -8
  39. {hud_python-0.4.66 → hud_python-0.4.68}/hud/tools/computer/hud.py +17 -1
  40. {hud_python-0.4.66 → hud_python-0.4.68}/hud/tools/computer/openai.py +14 -7
  41. {hud_python-0.4.66 → hud_python-0.4.68}/hud/tools/tests/test_computer.py +0 -8
  42. {hud_python-0.4.66 → hud_python-0.4.68}/hud/tools/tests/test_computer_actions.py +22 -1
  43. {hud_python-0.4.66 → hud_python-0.4.68}/hud/types.py +58 -4
  44. {hud_python-0.4.66 → hud_python-0.4.68}/hud/utils/__init__.py +2 -0
  45. {hud_python-0.4.66 → hud_python-0.4.68}/hud/utils/hud_console.py +12 -1
  46. hud_python-0.4.68/hud/utils/strict_schema.py +162 -0
  47. {hud_python-0.4.66 → hud_python-0.4.68}/hud/utils/tasks.py +59 -0
  48. {hud_python-0.4.66 → hud_python-0.4.68}/hud/utils/tests/test_init.py +1 -2
  49. {hud_python-0.4.66 → hud_python-0.4.68}/hud/utils/tests/test_tasks.py +170 -1
  50. {hud_python-0.4.66 → hud_python-0.4.68}/hud/utils/tests/test_version.py +1 -1
  51. hud_python-0.4.68/hud/utils/types.py +20 -0
  52. {hud_python-0.4.66 → hud_python-0.4.68}/hud/version.py +1 -1
  53. {hud_python-0.4.66 → hud_python-0.4.68}/pyproject.toml +4 -14
  54. hud_python-0.4.66/hud/agents/langchain.py +0 -264
  55. hud_python-0.4.66/hud/agents/lite_llm.py +0 -72
  56. hud_python-0.4.66/hud/agents/misc/response_agent.py +0 -100
  57. hud_python-0.4.66/hud/agents/openai.py +0 -356
  58. hud_python-0.4.66/hud/agents/tests/test_openai.py +0 -213
  59. hud_python-0.4.66/hud/cli/eval.py +0 -913
  60. hud_python-0.4.66/hud/cli/eval_config.py +0 -179
  61. hud_python-0.4.66/hud/datasets/__init__.py +0 -33
  62. hud_python-0.4.66/hud/datasets/parallel.py +0 -675
  63. hud_python-0.4.66/hud/datasets/runner.py +0 -135
  64. hud_python-0.4.66/hud/datasets/tests/test_utils.py +0 -228
  65. hud_python-0.4.66/hud/datasets/utils.py +0 -118
  66. hud_python-0.4.66/hud/utils/agent_factories.py +0 -84
  67. hud_python-0.4.66/hud/utils/async_utils.py +0 -65
  68. hud_python-0.4.66/hud/utils/group_eval.py +0 -243
  69. hud_python-0.4.66/hud/utils/progress.py +0 -149
  70. hud_python-0.4.66/hud/utils/task_tracking.py +0 -223
  71. hud_python-0.4.66/hud/utils/tests/test_agent_factories.py +0 -61
  72. hud_python-0.4.66/hud/utils/tests/test_async_utils.py +0 -173
  73. hud_python-0.4.66/hud/utils/tests/test_progress.py +0 -261
  74. {hud_python-0.4.66 → hud_python-0.4.68}/LICENSE +0 -0
  75. {hud_python-0.4.66 → hud_python-0.4.68}/environments/blank/README.md +0 -0
  76. {hud_python-0.4.66 → hud_python-0.4.68}/environments/blank/environment/README.md +0 -0
  77. {hud_python-0.4.66 → hud_python-0.4.68}/environments/blank/environment/pyproject.toml +0 -0
  78. {hud_python-0.4.66 → hud_python-0.4.68}/environments/blank/server/README.md +0 -0
  79. {hud_python-0.4.66 → hud_python-0.4.68}/environments/blank/server/pyproject.toml +0 -0
  80. {hud_python-0.4.66 → hud_python-0.4.68}/environments/browser/README.md +0 -0
  81. {hud_python-0.4.66 → hud_python-0.4.68}/environments/browser/browser-base/README.md +0 -0
  82. {hud_python-0.4.66 → hud_python-0.4.68}/environments/browser/environment/2048/README.md +0 -0
  83. {hud_python-0.4.66 → hud_python-0.4.68}/environments/browser/environment/2048/backend/pyproject.toml +0 -0
  84. {hud_python-0.4.66 → hud_python-0.4.68}/environments/browser/environment/README.md +0 -0
  85. {hud_python-0.4.66 → hud_python-0.4.68}/environments/browser/environment/pyproject.toml +0 -0
  86. {hud_python-0.4.66 → hud_python-0.4.68}/environments/browser/environment/todo/README.md +0 -0
  87. {hud_python-0.4.66 → hud_python-0.4.68}/environments/browser/environment/todo/backend/pyproject.toml +0 -0
  88. {hud_python-0.4.66 → hud_python-0.4.68}/environments/browser/server/pyproject.toml +0 -0
  89. {hud_python-0.4.66 → hud_python-0.4.68}/environments/deepresearch/README.md +0 -0
  90. {hud_python-0.4.66 → hud_python-0.4.68}/environments/deepresearch/environment/pyproject.toml +0 -0
  91. {hud_python-0.4.66 → hud_python-0.4.68}/environments/deepresearch/pyproject.toml +0 -0
  92. {hud_python-0.4.66 → hud_python-0.4.68}/environments/deepresearch/server/pyproject.toml +0 -0
  93. {hud_python-0.4.66 → hud_python-0.4.68}/environments/jupyter/README.md +0 -0
  94. {hud_python-0.4.66 → hud_python-0.4.68}/environments/jupyter/server/pyproject.toml +0 -0
  95. {hud_python-0.4.66 → hud_python-0.4.68}/environments/online_mind2web/README.md +0 -0
  96. {hud_python-0.4.66 → hud_python-0.4.68}/environments/online_mind2web/src/hud_controller/providers/README.md +0 -0
  97. {hud_python-0.4.66 → hud_python-0.4.68}/environments/remote_browser/README.md +0 -0
  98. {hud_python-0.4.66 → hud_python-0.4.68}/environments/remote_browser/pyproject.toml +0 -0
  99. {hud_python-0.4.66 → hud_python-0.4.68}/environments/remote_browser/src/hud_controller/providers/README.md +0 -0
  100. {hud_python-0.4.66 → hud_python-0.4.68}/environments/rubrics/README.md +0 -0
  101. {hud_python-0.4.66 → hud_python-0.4.68}/environments/rubrics/environment/pyproject.toml +0 -0
  102. {hud_python-0.4.66 → hud_python-0.4.68}/environments/rubrics/pyproject.toml +0 -0
  103. {hud_python-0.4.66 → hud_python-0.4.68}/environments/rubrics/server/pyproject.toml +0 -0
  104. {hud_python-0.4.66 → hud_python-0.4.68}/environments/text_2048/README.md +0 -0
  105. {hud_python-0.4.66 → hud_python-0.4.68}/environments/text_2048/pyproject.toml +0 -0
  106. {hud_python-0.4.66 → hud_python-0.4.68}/examples/README.md +0 -0
  107. {hud_python-0.4.66 → hud_python-0.4.68}/hud/__init__.py +0 -0
  108. {hud_python-0.4.66 → hud_python-0.4.68}/hud/__main__.py +0 -0
  109. {hud_python-0.4.66 → hud_python-0.4.68}/hud/agents/misc/__init__.py +0 -0
  110. {hud_python-0.4.66 → hud_python-0.4.68}/hud/agents/tests/__init__.py +0 -0
  111. {hud_python-0.4.66 → hud_python-0.4.68}/hud/agents/tests/test_client.py +0 -0
  112. {hud_python-0.4.66 → hud_python-0.4.68}/hud/agents/utils.py +0 -0
  113. {hud_python-0.4.66 → hud_python-0.4.68}/hud/cli/__main__.py +0 -0
  114. {hud_python-0.4.66 → hud_python-0.4.68}/hud/cli/analyze.py +0 -0
  115. {hud_python-0.4.66 → hud_python-0.4.68}/hud/cli/build.py +0 -0
  116. {hud_python-0.4.66 → hud_python-0.4.68}/hud/cli/clone.py +0 -0
  117. {hud_python-0.4.66 → hud_python-0.4.68}/hud/cli/debug.py +0 -0
  118. {hud_python-0.4.66 → hud_python-0.4.68}/hud/cli/dev.py +0 -0
  119. {hud_python-0.4.66 → hud_python-0.4.68}/hud/cli/flows/__init__.py +0 -0
  120. {hud_python-0.4.66 → hud_python-0.4.68}/hud/cli/flows/dev.py +0 -0
  121. {hud_python-0.4.66 → hud_python-0.4.68}/hud/cli/flows/tasks.py +0 -0
  122. {hud_python-0.4.66 → hud_python-0.4.68}/hud/cli/get.py +0 -0
  123. {hud_python-0.4.66 → hud_python-0.4.68}/hud/cli/init.py +0 -0
  124. {hud_python-0.4.66 → hud_python-0.4.68}/hud/cli/list_func.py +0 -0
  125. {hud_python-0.4.66 → hud_python-0.4.68}/hud/cli/pull.py +0 -0
  126. {hud_python-0.4.66 → hud_python-0.4.68}/hud/cli/push.py +0 -0
  127. {hud_python-0.4.66 → hud_python-0.4.68}/hud/cli/remove.py +0 -0
  128. {hud_python-0.4.66 → hud_python-0.4.68}/hud/cli/rft.py +0 -0
  129. {hud_python-0.4.66 → hud_python-0.4.68}/hud/cli/rft_status.py +0 -0
  130. {hud_python-0.4.66 → hud_python-0.4.68}/hud/cli/rl/__init__.py +0 -0
  131. {hud_python-0.4.66 → hud_python-0.4.68}/hud/cli/rl/celebrate.py +0 -0
  132. {hud_python-0.4.66 → hud_python-0.4.68}/hud/cli/rl/config.py +0 -0
  133. {hud_python-0.4.66 → hud_python-0.4.68}/hud/cli/rl/display.py +0 -0
  134. {hud_python-0.4.66 → hud_python-0.4.68}/hud/cli/rl/gpu.py +0 -0
  135. {hud_python-0.4.66 → hud_python-0.4.68}/hud/cli/rl/gpu_utils.py +0 -0
  136. {hud_python-0.4.66 → hud_python-0.4.68}/hud/cli/rl/local_runner.py +0 -0
  137. {hud_python-0.4.66 → hud_python-0.4.68}/hud/cli/rl/presets.py +0 -0
  138. {hud_python-0.4.66 → hud_python-0.4.68}/hud/cli/rl/remote_runner.py +0 -0
  139. {hud_python-0.4.66 → hud_python-0.4.68}/hud/cli/rl/rl_api.py +0 -0
  140. {hud_python-0.4.66 → hud_python-0.4.68}/hud/cli/rl/viewer.py +0 -0
  141. {hud_python-0.4.66 → hud_python-0.4.68}/hud/cli/rl/vllm.py +0 -0
  142. {hud_python-0.4.66 → hud_python-0.4.68}/hud/cli/rl/wait_utils.py +0 -0
  143. {hud_python-0.4.66 → hud_python-0.4.68}/hud/cli/tests/__init__.py +0 -0
  144. {hud_python-0.4.66 → hud_python-0.4.68}/hud/cli/tests/test_analyze.py +0 -0
  145. {hud_python-0.4.66 → hud_python-0.4.68}/hud/cli/tests/test_analyze_metadata.py +0 -0
  146. {hud_python-0.4.66 → hud_python-0.4.68}/hud/cli/tests/test_analyze_module.py +0 -0
  147. {hud_python-0.4.66 → hud_python-0.4.68}/hud/cli/tests/test_build.py +0 -0
  148. {hud_python-0.4.66 → hud_python-0.4.68}/hud/cli/tests/test_build_failure.py +0 -0
  149. {hud_python-0.4.66 → hud_python-0.4.68}/hud/cli/tests/test_build_module.py +0 -0
  150. {hud_python-0.4.66 → hud_python-0.4.68}/hud/cli/tests/test_cli_main.py +0 -0
  151. {hud_python-0.4.66 → hud_python-0.4.68}/hud/cli/tests/test_cli_more_wrappers.py +0 -0
  152. {hud_python-0.4.66 → hud_python-0.4.68}/hud/cli/tests/test_cli_root.py +0 -0
  153. {hud_python-0.4.66 → hud_python-0.4.68}/hud/cli/tests/test_clone.py +0 -0
  154. {hud_python-0.4.66 → hud_python-0.4.68}/hud/cli/tests/test_convert.py +0 -0
  155. {hud_python-0.4.66 → hud_python-0.4.68}/hud/cli/tests/test_cursor.py +0 -0
  156. {hud_python-0.4.66 → hud_python-0.4.68}/hud/cli/tests/test_debug.py +0 -0
  157. {hud_python-0.4.66 → hud_python-0.4.68}/hud/cli/tests/test_list_func.py +0 -0
  158. {hud_python-0.4.66 → hud_python-0.4.68}/hud/cli/tests/test_main_module.py +0 -0
  159. {hud_python-0.4.66 → hud_python-0.4.68}/hud/cli/tests/test_mcp_server.py +0 -0
  160. {hud_python-0.4.66 → hud_python-0.4.68}/hud/cli/tests/test_pull.py +0 -0
  161. {hud_python-0.4.66 → hud_python-0.4.68}/hud/cli/tests/test_push.py +0 -0
  162. {hud_python-0.4.66 → hud_python-0.4.68}/hud/cli/tests/test_push_happy.py +0 -0
  163. {hud_python-0.4.66 → hud_python-0.4.68}/hud/cli/tests/test_push_wrapper.py +0 -0
  164. {hud_python-0.4.66 → hud_python-0.4.68}/hud/cli/tests/test_registry.py +0 -0
  165. {hud_python-0.4.66 → hud_python-0.4.68}/hud/cli/tests/test_utils.py +0 -0
  166. {hud_python-0.4.66 → hud_python-0.4.68}/hud/cli/utils/__init__.py +0 -0
  167. {hud_python-0.4.66 → hud_python-0.4.68}/hud/cli/utils/config.py +0 -0
  168. {hud_python-0.4.66 → hud_python-0.4.68}/hud/cli/utils/cursor.py +0 -0
  169. {hud_python-0.4.66 → hud_python-0.4.68}/hud/cli/utils/docker.py +0 -0
  170. {hud_python-0.4.66 → hud_python-0.4.68}/hud/cli/utils/env_check.py +0 -0
  171. {hud_python-0.4.66 → hud_python-0.4.68}/hud/cli/utils/environment.py +0 -0
  172. {hud_python-0.4.66 → hud_python-0.4.68}/hud/cli/utils/interactive.py +0 -0
  173. {hud_python-0.4.66 → hud_python-0.4.68}/hud/cli/utils/local_runner.py +0 -0
  174. {hud_python-0.4.66 → hud_python-0.4.68}/hud/cli/utils/logging.py +0 -0
  175. {hud_python-0.4.66 → hud_python-0.4.68}/hud/cli/utils/metadata.py +0 -0
  176. {hud_python-0.4.66 → hud_python-0.4.68}/hud/cli/utils/package_runner.py +0 -0
  177. {hud_python-0.4.66 → hud_python-0.4.68}/hud/cli/utils/registry.py +0 -0
  178. {hud_python-0.4.66 → hud_python-0.4.68}/hud/cli/utils/remote_runner.py +0 -0
  179. {hud_python-0.4.66 → hud_python-0.4.68}/hud/cli/utils/runner.py +0 -0
  180. {hud_python-0.4.66 → hud_python-0.4.68}/hud/cli/utils/server.py +0 -0
  181. {hud_python-0.4.66 → hud_python-0.4.68}/hud/cli/utils/source_hash.py +0 -0
  182. {hud_python-0.4.66 → hud_python-0.4.68}/hud/cli/utils/tasks.py +0 -0
  183. {hud_python-0.4.66 → hud_python-0.4.68}/hud/cli/utils/tests/__init__.py +0 -0
  184. {hud_python-0.4.66 → hud_python-0.4.68}/hud/cli/utils/tests/test_config.py +0 -0
  185. {hud_python-0.4.66 → hud_python-0.4.68}/hud/cli/utils/tests/test_docker.py +0 -0
  186. {hud_python-0.4.66 → hud_python-0.4.68}/hud/cli/utils/tests/test_docker_hints.py +0 -0
  187. {hud_python-0.4.66 → hud_python-0.4.68}/hud/cli/utils/tests/test_env_check.py +0 -0
  188. {hud_python-0.4.66 → hud_python-0.4.68}/hud/cli/utils/tests/test_environment.py +0 -0
  189. {hud_python-0.4.66 → hud_python-0.4.68}/hud/cli/utils/tests/test_interactive_module.py +0 -0
  190. {hud_python-0.4.66 → hud_python-0.4.68}/hud/cli/utils/tests/test_local_runner.py +0 -0
  191. {hud_python-0.4.66 → hud_python-0.4.68}/hud/cli/utils/tests/test_logging_utils.py +0 -0
  192. {hud_python-0.4.66 → hud_python-0.4.68}/hud/cli/utils/tests/test_metadata.py +0 -0
  193. {hud_python-0.4.66 → hud_python-0.4.68}/hud/cli/utils/tests/test_package_runner.py +0 -0
  194. {hud_python-0.4.66 → hud_python-0.4.68}/hud/cli/utils/tests/test_registry_utils.py +0 -0
  195. {hud_python-0.4.66 → hud_python-0.4.68}/hud/cli/utils/tests/test_remote_runner.py +0 -0
  196. {hud_python-0.4.66 → hud_python-0.4.68}/hud/cli/utils/tests/test_runner_modules.py +0 -0
  197. {hud_python-0.4.66 → hud_python-0.4.68}/hud/cli/utils/tests/test_source_hash.py +0 -0
  198. {hud_python-0.4.66 → hud_python-0.4.68}/hud/cli/utils/version_check.py +0 -0
  199. {hud_python-0.4.66 → hud_python-0.4.68}/hud/clients/__init__.py +0 -0
  200. {hud_python-0.4.66 → hud_python-0.4.68}/hud/clients/base.py +0 -0
  201. {hud_python-0.4.66 → hud_python-0.4.68}/hud/clients/fastmcp.py +0 -0
  202. {hud_python-0.4.66 → hud_python-0.4.68}/hud/clients/mcp_use.py +0 -0
  203. {hud_python-0.4.66 → hud_python-0.4.68}/hud/clients/tests/__init__.py +0 -0
  204. {hud_python-0.4.66 → hud_python-0.4.68}/hud/clients/tests/test_client_integration.py +0 -0
  205. {hud_python-0.4.66 → hud_python-0.4.68}/hud/clients/tests/test_fastmcp.py +0 -0
  206. {hud_python-0.4.66 → hud_python-0.4.68}/hud/clients/tests/test_mcp_use_retry.py +0 -0
  207. {hud_python-0.4.66 → hud_python-0.4.68}/hud/clients/tests/test_protocol.py +0 -0
  208. {hud_python-0.4.66 → hud_python-0.4.68}/hud/clients/utils/__init__.py +0 -0
  209. {hud_python-0.4.66 → hud_python-0.4.68}/hud/clients/utils/mcp_use_retry.py +0 -0
  210. {hud_python-0.4.66 → hud_python-0.4.68}/hud/clients/utils/retry.py +0 -0
  211. {hud_python-0.4.66 → hud_python-0.4.68}/hud/clients/utils/retry_transport.py +0 -0
  212. {hud_python-0.4.66 → hud_python-0.4.68}/hud/datasets/tests/__init__.py +0 -0
  213. {hud_python-0.4.66 → hud_python-0.4.68}/hud/datasets/tests/test_runner.py +0 -0
  214. {hud_python-0.4.66 → hud_python-0.4.68}/hud/misc/__init__.py +0 -0
  215. {hud_python-0.4.66 → hud_python-0.4.68}/hud/misc/claude_plays_pokemon.py +0 -0
  216. {hud_python-0.4.66 → hud_python-0.4.68}/hud/native/__init__.py +0 -0
  217. {hud_python-0.4.66 → hud_python-0.4.68}/hud/native/comparator.py +0 -0
  218. {hud_python-0.4.66 → hud_python-0.4.68}/hud/native/tests/__init__.py +0 -0
  219. {hud_python-0.4.66 → hud_python-0.4.68}/hud/native/tests/test_comparator.py +0 -0
  220. {hud_python-0.4.66 → hud_python-0.4.68}/hud/native/tests/test_native_init.py +0 -0
  221. {hud_python-0.4.66 → hud_python-0.4.68}/hud/otel/__init__.py +0 -0
  222. {hud_python-0.4.66 → hud_python-0.4.68}/hud/otel/collector.py +0 -0
  223. {hud_python-0.4.66 → hud_python-0.4.68}/hud/otel/config.py +0 -0
  224. {hud_python-0.4.66 → hud_python-0.4.68}/hud/otel/context.py +0 -0
  225. {hud_python-0.4.66 → hud_python-0.4.68}/hud/otel/exporters.py +0 -0
  226. {hud_python-0.4.66 → hud_python-0.4.68}/hud/otel/instrumentation.py +0 -0
  227. {hud_python-0.4.66 → hud_python-0.4.68}/hud/otel/processors.py +0 -0
  228. {hud_python-0.4.66 → hud_python-0.4.68}/hud/otel/tests/__init__.py +0 -0
  229. {hud_python-0.4.66 → hud_python-0.4.68}/hud/otel/tests/test_instrumentation.py +0 -0
  230. {hud_python-0.4.66 → hud_python-0.4.68}/hud/otel/tests/test_processors.py +0 -0
  231. {hud_python-0.4.66 → hud_python-0.4.68}/hud/py.typed +0 -0
  232. {hud_python-0.4.66 → hud_python-0.4.68}/hud/rl/README.md +0 -0
  233. {hud_python-0.4.66 → hud_python-0.4.68}/hud/rl/__init__.py +0 -0
  234. {hud_python-0.4.66 → hud_python-0.4.68}/hud/rl/buffer.py +0 -0
  235. {hud_python-0.4.66 → hud_python-0.4.68}/hud/rl/chat_template.jinja +0 -0
  236. {hud_python-0.4.66 → hud_python-0.4.68}/hud/rl/config.py +0 -0
  237. {hud_python-0.4.66 → hud_python-0.4.68}/hud/rl/distributed.py +0 -0
  238. {hud_python-0.4.66 → hud_python-0.4.68}/hud/rl/learner.py +0 -0
  239. {hud_python-0.4.66 → hud_python-0.4.68}/hud/rl/tests/__init__.py +0 -0
  240. {hud_python-0.4.66 → hud_python-0.4.68}/hud/rl/tests/test_learner.py +0 -0
  241. {hud_python-0.4.66 → hud_python-0.4.68}/hud/rl/train.py +0 -0
  242. {hud_python-0.4.66 → hud_python-0.4.68}/hud/rl/types.py +0 -0
  243. {hud_python-0.4.66 → hud_python-0.4.68}/hud/rl/utils/start_vllm_server.sh +0 -0
  244. {hud_python-0.4.66 → hud_python-0.4.68}/hud/rl/utils.py +0 -0
  245. {hud_python-0.4.66 → hud_python-0.4.68}/hud/rl/vllm_adapter.py +0 -0
  246. {hud_python-0.4.66 → hud_python-0.4.68}/hud/samples/__init__.py +0 -0
  247. {hud_python-0.4.66 → hud_python-0.4.68}/hud/samples/browser.py +0 -0
  248. {hud_python-0.4.66 → hud_python-0.4.68}/hud/server/__init__.py +0 -0
  249. {hud_python-0.4.66 → hud_python-0.4.68}/hud/server/context.py +0 -0
  250. {hud_python-0.4.66 → hud_python-0.4.68}/hud/server/helper/__init__.py +0 -0
  251. {hud_python-0.4.66 → hud_python-0.4.68}/hud/server/low_level.py +0 -0
  252. {hud_python-0.4.66 → hud_python-0.4.68}/hud/server/router.py +0 -0
  253. {hud_python-0.4.66 → hud_python-0.4.68}/hud/server/tests/__init__.py +0 -0
  254. {hud_python-0.4.66 → hud_python-0.4.68}/hud/server/tests/test_add_tool.py +0 -0
  255. {hud_python-0.4.66 → hud_python-0.4.68}/hud/server/tests/test_context.py +0 -0
  256. {hud_python-0.4.66 → hud_python-0.4.68}/hud/server/tests/test_mcp_server_handlers.py +0 -0
  257. {hud_python-0.4.66 → hud_python-0.4.68}/hud/server/tests/test_mcp_server_integration.py +0 -0
  258. {hud_python-0.4.66 → hud_python-0.4.68}/hud/server/tests/test_mcp_server_more.py +0 -0
  259. {hud_python-0.4.66 → hud_python-0.4.68}/hud/server/tests/test_run_wrapper.py +0 -0
  260. {hud_python-0.4.66 → hud_python-0.4.68}/hud/server/tests/test_server_extra.py +0 -0
  261. {hud_python-0.4.66 → hud_python-0.4.68}/hud/server/tests/test_sigterm_runner.py +0 -0
  262. {hud_python-0.4.66 → hud_python-0.4.68}/hud/shared/__init__.py +0 -0
  263. {hud_python-0.4.66 → hud_python-0.4.68}/hud/shared/exceptions.py +0 -0
  264. {hud_python-0.4.66 → hud_python-0.4.68}/hud/shared/hints.py +0 -0
  265. {hud_python-0.4.66 → hud_python-0.4.68}/hud/shared/requests.py +0 -0
  266. {hud_python-0.4.66 → hud_python-0.4.68}/hud/shared/tests/__init__.py +0 -0
  267. {hud_python-0.4.66 → hud_python-0.4.68}/hud/shared/tests/test_exceptions.py +0 -0
  268. {hud_python-0.4.66 → hud_python-0.4.68}/hud/shared/tests/test_hints.py +0 -0
  269. {hud_python-0.4.66 → hud_python-0.4.68}/hud/shared/tests/test_requests.py +0 -0
  270. {hud_python-0.4.66 → hud_python-0.4.68}/hud/telemetry/__init__.py +0 -0
  271. {hud_python-0.4.66 → hud_python-0.4.68}/hud/telemetry/async_context.py +0 -0
  272. {hud_python-0.4.66 → hud_python-0.4.68}/hud/telemetry/instrument.py +0 -0
  273. {hud_python-0.4.66 → hud_python-0.4.68}/hud/telemetry/job.py +0 -0
  274. {hud_python-0.4.66 → hud_python-0.4.68}/hud/telemetry/replay.py +0 -0
  275. {hud_python-0.4.66 → hud_python-0.4.68}/hud/telemetry/tests/__init__.py +0 -0
  276. {hud_python-0.4.66 → hud_python-0.4.68}/hud/telemetry/tests/test_async_context.py +0 -0
  277. {hud_python-0.4.66 → hud_python-0.4.68}/hud/telemetry/tests/test_instrument.py +0 -0
  278. {hud_python-0.4.66 → hud_python-0.4.68}/hud/telemetry/tests/test_replay.py +0 -0
  279. {hud_python-0.4.66 → hud_python-0.4.68}/hud/telemetry/tests/test_trace.py +0 -0
  280. {hud_python-0.4.66 → hud_python-0.4.68}/hud/telemetry/trace.py +0 -0
  281. {hud_python-0.4.66 → hud_python-0.4.68}/hud/telemetry/utils.py +0 -0
  282. {hud_python-0.4.66 → hud_python-0.4.68}/hud/tools/__init__.py +0 -0
  283. {hud_python-0.4.66 → hud_python-0.4.68}/hud/tools/base.py +0 -0
  284. {hud_python-0.4.66 → hud_python-0.4.68}/hud/tools/bash.py +0 -0
  285. {hud_python-0.4.66 → hud_python-0.4.68}/hud/tools/computer/__init__.py +0 -0
  286. {hud_python-0.4.66 → hud_python-0.4.68}/hud/tools/computer/anthropic.py +0 -0
  287. {hud_python-0.4.66 → hud_python-0.4.68}/hud/tools/computer/gemini.py +0 -0
  288. {hud_python-0.4.66 → hud_python-0.4.68}/hud/tools/computer/qwen.py +0 -0
  289. {hud_python-0.4.66 → hud_python-0.4.68}/hud/tools/computer/settings.py +0 -0
  290. {hud_python-0.4.66 → hud_python-0.4.68}/hud/tools/edit.py +0 -0
  291. {hud_python-0.4.66 → hud_python-0.4.68}/hud/tools/executors/__init__.py +0 -0
  292. {hud_python-0.4.66 → hud_python-0.4.68}/hud/tools/executors/base.py +0 -0
  293. {hud_python-0.4.66 → hud_python-0.4.68}/hud/tools/executors/pyautogui.py +0 -0
  294. {hud_python-0.4.66 → hud_python-0.4.68}/hud/tools/executors/tests/__init__.py +0 -0
  295. {hud_python-0.4.66 → hud_python-0.4.68}/hud/tools/executors/tests/test_base_executor.py +0 -0
  296. {hud_python-0.4.66 → hud_python-0.4.68}/hud/tools/executors/tests/test_pyautogui_executor.py +0 -0
  297. {hud_python-0.4.66 → hud_python-0.4.68}/hud/tools/executors/xdo.py +0 -0
  298. {hud_python-0.4.66 → hud_python-0.4.68}/hud/tools/grounding/__init__.py +0 -0
  299. {hud_python-0.4.66 → hud_python-0.4.68}/hud/tools/grounding/config.py +0 -0
  300. {hud_python-0.4.66 → hud_python-0.4.68}/hud/tools/grounding/grounded_tool.py +0 -0
  301. {hud_python-0.4.66 → hud_python-0.4.68}/hud/tools/grounding/grounder.py +0 -0
  302. {hud_python-0.4.66 → hud_python-0.4.68}/hud/tools/grounding/tests/__init__.py +0 -0
  303. {hud_python-0.4.66 → hud_python-0.4.68}/hud/tools/grounding/tests/test_grounded_tool.py +0 -0
  304. {hud_python-0.4.66 → hud_python-0.4.68}/hud/tools/jupyter.py +0 -0
  305. {hud_python-0.4.66 → hud_python-0.4.68}/hud/tools/playwright.py +0 -0
  306. {hud_python-0.4.66 → hud_python-0.4.68}/hud/tools/response.py +0 -0
  307. {hud_python-0.4.66 → hud_python-0.4.68}/hud/tools/submit.py +0 -0
  308. {hud_python-0.4.66 → hud_python-0.4.68}/hud/tools/tests/__init__.py +0 -0
  309. {hud_python-0.4.66 → hud_python-0.4.68}/hud/tools/tests/test_base.py +0 -0
  310. {hud_python-0.4.66 → hud_python-0.4.68}/hud/tools/tests/test_bash.py +0 -0
  311. {hud_python-0.4.66 → hud_python-0.4.68}/hud/tools/tests/test_bash_extended.py +0 -0
  312. {hud_python-0.4.66 → hud_python-0.4.68}/hud/tools/tests/test_edit.py +0 -0
  313. {hud_python-0.4.66 → hud_python-0.4.68}/hud/tools/tests/test_init.py +0 -0
  314. {hud_python-0.4.66 → hud_python-0.4.68}/hud/tools/tests/test_jupyter_tool.py +0 -0
  315. {hud_python-0.4.66 → hud_python-0.4.68}/hud/tools/tests/test_playwright_tool.py +0 -0
  316. {hud_python-0.4.66 → hud_python-0.4.68}/hud/tools/tests/test_response.py +0 -0
  317. {hud_python-0.4.66 → hud_python-0.4.68}/hud/tools/tests/test_submit.py +0 -0
  318. {hud_python-0.4.66 → hud_python-0.4.68}/hud/tools/tests/test_tools.py +0 -0
  319. {hud_python-0.4.66 → hud_python-0.4.68}/hud/tools/tests/test_tools_init.py +0 -0
  320. {hud_python-0.4.66 → hud_python-0.4.68}/hud/tools/tests/test_types.py +0 -0
  321. {hud_python-0.4.66 → hud_python-0.4.68}/hud/tools/tests/test_utils.py +0 -0
  322. {hud_python-0.4.66 → hud_python-0.4.68}/hud/tools/types.py +0 -0
  323. {hud_python-0.4.66 → hud_python-0.4.68}/hud/tools/utils.py +0 -0
  324. {hud_python-0.4.66 → hud_python-0.4.68}/hud/utils/mcp.py +0 -0
  325. {hud_python-0.4.66 → hud_python-0.4.68}/hud/utils/pretty_errors.py +0 -0
  326. {hud_python-0.4.66 → hud_python-0.4.68}/hud/utils/telemetry.py +0 -0
  327. {hud_python-0.4.66 → hud_python-0.4.68}/hud/utils/tests/__init__.py +0 -0
  328. {hud_python-0.4.66 → hud_python-0.4.68}/hud/utils/tests/test_mcp.py +0 -0
  329. {hud_python-0.4.66 → hud_python-0.4.68}/hud/utils/tests/test_pretty_errors.py +0 -0
  330. {hud_python-0.4.66 → hud_python-0.4.68}/hud/utils/tests/test_telemetry.py +0 -0
  331. {hud_python-0.4.66 → hud_python-0.4.68}/hud/utils/tests/test_tool_shorthand.py +0 -0
  332. {hud_python-0.4.66 → hud_python-0.4.68}/hud/utils/tool_shorthand.py +0 -0
@@ -53,4 +53,5 @@ hud/rl/checkpoints_test/
53
53
 
54
54
  .ck/
55
55
 
56
- .hud_eval_config
56
+ .hud_eval_config
57
+ .hud_eval.toml
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: hud-python
3
- Version: 0.4.66
3
+ Version: 0.4.68
4
4
  Summary: SDK for the HUD platform.
5
5
  Project-URL: Homepage, https://github.com/hud-evals/hud-python
6
6
  Project-URL: Bug Tracker, https://github.com/hud-evals/hud-python/issues
@@ -35,7 +35,7 @@ Classifier: Programming Language :: Python :: 3.11
35
35
  Classifier: Programming Language :: Python :: 3.12
36
36
  Classifier: Programming Language :: Python :: 3.13
37
37
  Requires-Python: <3.13,>=3.11
38
- Requires-Dist: anthropic
38
+ Requires-Dist: anthropic>=0.75
39
39
  Requires-Dist: blessed>=1.20.0
40
40
  Requires-Dist: datasets>=2.14.0
41
41
  Requires-Dist: google-genai
@@ -45,7 +45,7 @@ Requires-Dist: hud-mcp-python-sdk>=3.13.2
45
45
  Requires-Dist: hud-mcp-use-python-sdk==2.3.20
46
46
  Requires-Dist: langchain==0.3.27
47
47
  Requires-Dist: numpy>=1.24.0
48
- Requires-Dist: openai
48
+ Requires-Dist: openai>=2.8.1
49
49
  Requires-Dist: opentelemetry-api>=1.34.1
50
50
  Requires-Dist: opentelemetry-exporter-otlp-proto-http>=1.34.1
51
51
  Requires-Dist: opentelemetry-instrumentation-mcp==0.47.0
@@ -64,74 +64,50 @@ Requires-Dist: typer>=0.9.0
64
64
  Requires-Dist: watchfiles>=0.21.0
65
65
  Requires-Dist: wrapt>=1.14.0
66
66
  Provides-Extra: agent
67
- Requires-Dist: aiodocker>=0.24.0; extra == 'agent'
68
67
  Requires-Dist: dotenv>=0.9.9; extra == 'agent'
69
- Requires-Dist: inspect-ai>=0.3.80; extra == 'agent'
70
68
  Requires-Dist: ipykernel; extra == 'agent'
71
69
  Requires-Dist: ipython<9; extra == 'agent'
72
70
  Requires-Dist: jupyter-client; extra == 'agent'
73
71
  Requires-Dist: jupyter-core; extra == 'agent'
74
- Requires-Dist: langchain; extra == 'agent'
75
- Requires-Dist: langchain-anthropic; extra == 'agent'
76
- Requires-Dist: langchain-openai; extra == 'agent'
77
- Requires-Dist: litellm>=1.55.0; extra == 'agent'
78
72
  Requires-Dist: pillow>=11.1.0; extra == 'agent'
79
73
  Requires-Dist: playwright; extra == 'agent'
80
74
  Requires-Dist: pyautogui>=0.9.54; extra == 'agent'
81
- Requires-Dist: pyright==1.1.401; extra == 'agent'
75
+ Requires-Dist: pyright==1.1.407; extra == 'agent'
82
76
  Requires-Dist: pytest-asyncio; extra == 'agent'
83
77
  Requires-Dist: pytest-cov; extra == 'agent'
84
78
  Requires-Dist: pytest-mock; extra == 'agent'
85
79
  Requires-Dist: pytest<9,>=8.1.1; extra == 'agent'
86
80
  Requires-Dist: ruff>=0.11.8; extra == 'agent'
87
- Requires-Dist: setuptools; extra == 'agent'
88
- Requires-Dist: textdistance<5,>=4.5.0; extra == 'agent'
89
81
  Provides-Extra: agents
90
- Requires-Dist: aiodocker>=0.24.0; extra == 'agents'
91
82
  Requires-Dist: dotenv>=0.9.9; extra == 'agents'
92
- Requires-Dist: inspect-ai>=0.3.80; extra == 'agents'
93
83
  Requires-Dist: ipykernel; extra == 'agents'
94
84
  Requires-Dist: ipython<9; extra == 'agents'
95
85
  Requires-Dist: jupyter-client; extra == 'agents'
96
86
  Requires-Dist: jupyter-core; extra == 'agents'
97
- Requires-Dist: langchain; extra == 'agents'
98
- Requires-Dist: langchain-anthropic; extra == 'agents'
99
- Requires-Dist: langchain-openai; extra == 'agents'
100
- Requires-Dist: litellm>=1.55.0; extra == 'agents'
101
87
  Requires-Dist: pillow>=11.1.0; extra == 'agents'
102
88
  Requires-Dist: playwright; extra == 'agents'
103
89
  Requires-Dist: pyautogui>=0.9.54; extra == 'agents'
104
- Requires-Dist: pyright==1.1.401; extra == 'agents'
90
+ Requires-Dist: pyright==1.1.407; extra == 'agents'
105
91
  Requires-Dist: pytest-asyncio; extra == 'agents'
106
92
  Requires-Dist: pytest-cov; extra == 'agents'
107
93
  Requires-Dist: pytest-mock; extra == 'agents'
108
94
  Requires-Dist: pytest<9,>=8.1.1; extra == 'agents'
109
95
  Requires-Dist: ruff>=0.11.8; extra == 'agents'
110
- Requires-Dist: setuptools; extra == 'agents'
111
- Requires-Dist: textdistance<5,>=4.5.0; extra == 'agents'
112
96
  Provides-Extra: dev
113
- Requires-Dist: aiodocker>=0.24.0; extra == 'dev'
114
97
  Requires-Dist: dotenv>=0.9.9; extra == 'dev'
115
- Requires-Dist: inspect-ai>=0.3.80; extra == 'dev'
116
98
  Requires-Dist: ipykernel; extra == 'dev'
117
99
  Requires-Dist: ipython<9; extra == 'dev'
118
100
  Requires-Dist: jupyter-client; extra == 'dev'
119
101
  Requires-Dist: jupyter-core; extra == 'dev'
120
- Requires-Dist: langchain; extra == 'dev'
121
- Requires-Dist: langchain-anthropic; extra == 'dev'
122
- Requires-Dist: langchain-openai; extra == 'dev'
123
- Requires-Dist: litellm>=1.55.0; extra == 'dev'
124
102
  Requires-Dist: pillow>=11.1.0; extra == 'dev'
125
103
  Requires-Dist: playwright; extra == 'dev'
126
104
  Requires-Dist: pyautogui>=0.9.54; extra == 'dev'
127
- Requires-Dist: pyright==1.1.401; extra == 'dev'
105
+ Requires-Dist: pyright==1.1.407; extra == 'dev'
128
106
  Requires-Dist: pytest-asyncio; extra == 'dev'
129
107
  Requires-Dist: pytest-cov; extra == 'dev'
130
108
  Requires-Dist: pytest-mock; extra == 'dev'
131
109
  Requires-Dist: pytest<9,>=8.1.1; extra == 'dev'
132
110
  Requires-Dist: ruff>=0.11.8; extra == 'dev'
133
- Requires-Dist: setuptools; extra == 'dev'
134
- Requires-Dist: textdistance<5,>=4.5.0; extra == 'dev'
135
111
  Provides-Extra: rl
136
112
  Requires-Dist: bitsandbytes>=0.41.0; (sys_platform == 'linux') and extra == 'rl'
137
113
  Requires-Dist: liger-kernel>=0.5.0; (sys_platform == 'linux') and extra == 'rl'
@@ -151,15 +127,15 @@ OSS RL environment + evals toolkit. Wrap software as environments, run benchmark
151
127
 
152
128
  [![PyPI version](https://img.shields.io/pypi/v/hud-python?style=flat-square)](https://pypi.org/project/hud-python/)
153
129
  [![License](https://img.shields.io/badge/license-MIT-green?style=flat-square)](LICENSE)
154
- [![Add docs to Cursor](https://img.shields.io/badge/Add%20docs%20to-Cursor-black?style=flat-square)](https://cursor.com/en/install-mcp?name=docs-hud-python&config=eyJ1cmwiOiJodHRwczovL2RvY3MuaHVkLnNvL21jcCJ9)
130
+ [![Add docs to Cursor](https://img.shields.io/badge/Add%20docs%20to-Cursor-black?style=flat-square)](https://cursor.com/en/install-mcp?name=docs-hud-python&config=eyJ1cmwiOiJodHRwczovL2RvY3MuaHVkLmFpL21jcCJ9)
155
131
  [![Discord](https://img.shields.io/discord/1327447144772407390?label=Discord&logo=discord&style=flat-square)](https://discord.gg/wkjtmHYYjm)
156
132
  [![X Follow](https://img.shields.io/twitter/follow/hud_evals?style=social)](https://x.com/intent/user?screen_name=hud_evals)
157
133
  [![Shop](https://img.shields.io/badge/_-white.svg?label=shop&logo=data:image/png;base64,iVBORw0KGgoAAAANSUhEUgAAABQAAAAJCAYAAAAywQxIAAAAAXNSR0IArs4c6QAAAARnQU1BAACxjwv8YQUAAAAJcEhZcwAACxMAAAsTAQCanBgAAAF6SURBVChTlZA9ixNhFIWf8yaTpFHRRMXCKpAZhCAYFvwoLHZhwUKw9A9YCJb+Bq0sxGbBQrTxX1j41dvIRAjGZbdwRUUGIzPMeyw2swS3WZ/ynHvP5VylafoAWAd+5Xm+wX+SpukmcMf29RDCZrD9BViz3f53+CjYngKZpD5A2/Y7SQBMJpOkKIprdV1vdzqdHzHGblmW9Ww2+5pl2TmAxWKxmM/nP8fj8cmqqtZijJ9sb0u6ABBWjh0riuIt8CqE8LGu66e2d5MkeQ8QY3xme7fb7T4ZjUbrZVl+jjFuSXoEXGxCDgIl9WzfAO5LSmzvNB771R6vzG4Bx0MIt/M8vwV8aLyDQNt70+n0G1AspaTxVln+aghQluVsKbvxVysflT9NQK/XO7R/SGiQ9Nt2aftElmWXJd1kv0kbeANQVdWl4XB4XtJouXaqNRgMHkrqS+r0+/3XwD1JXdungRfAVWBi+6WkK8D3EMJz22cl3W21WgNgx3YAzvwFd0Chdq03gKUAAAAASUVORK5CYII=&style=social)](https://shop.hud.ai)
158
134
 
159
135
 
160
- ### Are you a startup building agents?
136
+ ### Are you an enterprise building agents?
161
137
 
162
- [📅 Hop on a call](https://cal.com/jay-ram-z6st6w/demo) or [📧 founders@hud.ai](mailto:founders@hud.ai)
138
+ [📅 Hop on a call](https://cal.com/jay-hud) or [📧 founders@hud.ai](mailto:founders@hud.ai)
163
139
 
164
140
  ## Highlights
165
141
 
@@ -179,7 +155,7 @@ OSS RL environment + evals toolkit. Wrap software as environments, run benchmark
179
155
  pip install hud-python
180
156
 
181
157
  # CLI - RL pipeline, environment design
182
- uv tool install hud-python
158
+ uv tool install hud-python@latest
183
159
  # uv tool update-shell
184
160
  ```
185
161
 
@@ -439,7 +415,7 @@ Train with the new interactive `hud rl` flow:
439
415
 
440
416
  ```bash
441
417
  # Install CLI
442
- uv tool install hud-python
418
+ uv tool install hud-python@latest
443
419
 
444
420
  # Option A: Run directly from a HuggingFace dataset
445
421
  hud rl hud-evals/2048-basic
@@ -10,15 +10,15 @@ OSS RL environment + evals toolkit. Wrap software as environments, run benchmark
10
10
 
11
11
  [![PyPI version](https://img.shields.io/pypi/v/hud-python?style=flat-square)](https://pypi.org/project/hud-python/)
12
12
  [![License](https://img.shields.io/badge/license-MIT-green?style=flat-square)](LICENSE)
13
- [![Add docs to Cursor](https://img.shields.io/badge/Add%20docs%20to-Cursor-black?style=flat-square)](https://cursor.com/en/install-mcp?name=docs-hud-python&config=eyJ1cmwiOiJodHRwczovL2RvY3MuaHVkLnNvL21jcCJ9)
13
+ [![Add docs to Cursor](https://img.shields.io/badge/Add%20docs%20to-Cursor-black?style=flat-square)](https://cursor.com/en/install-mcp?name=docs-hud-python&config=eyJ1cmwiOiJodHRwczovL2RvY3MuaHVkLmFpL21jcCJ9)
14
14
  [![Discord](https://img.shields.io/discord/1327447144772407390?label=Discord&logo=discord&style=flat-square)](https://discord.gg/wkjtmHYYjm)
15
15
  [![X Follow](https://img.shields.io/twitter/follow/hud_evals?style=social)](https://x.com/intent/user?screen_name=hud_evals)
16
16
  [![Shop](https://img.shields.io/badge/_-white.svg?label=shop&logo=data:image/png;base64,iVBORw0KGgoAAAANSUhEUgAAABQAAAAJCAYAAAAywQxIAAAAAXNSR0IArs4c6QAAAARnQU1BAACxjwv8YQUAAAAJcEhZcwAACxMAAAsTAQCanBgAAAF6SURBVChTlZA9ixNhFIWf8yaTpFHRRMXCKpAZhCAYFvwoLHZhwUKw9A9YCJb+Bq0sxGbBQrTxX1j41dvIRAjGZbdwRUUGIzPMeyw2swS3WZ/ynHvP5VylafoAWAd+5Xm+wX+SpukmcMf29RDCZrD9BViz3f53+CjYngKZpD5A2/Y7SQBMJpOkKIprdV1vdzqdHzHGblmW9Ww2+5pl2TmAxWKxmM/nP8fj8cmqqtZijJ9sb0u6ABBWjh0riuIt8CqE8LGu66e2d5MkeQ8QY3xme7fb7T4ZjUbrZVl+jjFuSXoEXGxCDgIl9WzfAO5LSmzvNB771R6vzG4Bx0MIt/M8vwV8aLyDQNt70+n0G1AspaTxVln+aghQluVsKbvxVysflT9NQK/XO7R/SGiQ9Nt2aftElmWXJd1kv0kbeANQVdWl4XB4XtJouXaqNRgMHkrqS+r0+/3XwD1JXdungRfAVWBi+6WkK8D3EMJz22cl3W21WgNgx3YAzvwFd0Chdq03gKUAAAAASUVORK5CYII=&style=social)](https://shop.hud.ai)
17
17
 
18
18
 
19
- ### Are you a startup building agents?
19
+ ### Are you an enterprise building agents?
20
20
 
21
- [📅 Hop on a call](https://cal.com/jay-ram-z6st6w/demo) or [📧 founders@hud.ai](mailto:founders@hud.ai)
21
+ [📅 Hop on a call](https://cal.com/jay-hud) or [📧 founders@hud.ai](mailto:founders@hud.ai)
22
22
 
23
23
  ## Highlights
24
24
 
@@ -38,7 +38,7 @@ OSS RL environment + evals toolkit. Wrap software as environments, run benchmark
38
38
  pip install hud-python
39
39
 
40
40
  # CLI - RL pipeline, environment design
41
- uv tool install hud-python
41
+ uv tool install hud-python@latest
42
42
  # uv tool update-shell
43
43
  ```
44
44
 
@@ -298,7 +298,7 @@ Train with the new interactive `hud rl` flow:
298
298
 
299
299
  ```bash
300
300
  # Install CLI
301
- uv tool install hud-python
301
+ uv tool install hud-python@latest
302
302
 
303
303
  # Option A: Run directly from a HuggingFace dataset
304
304
  hud rl hud-evals/2048-basic
@@ -60,7 +60,7 @@ The HUD SDK includes a powerful CLI for debugging and analyzing MCP environments
60
60
 
61
61
  ```bash
62
62
  # Install HUD CLI globally with uv (recommended)
63
- uv tool install hud-python
63
+ uv tool install hud-python@latest
64
64
 
65
65
  # Or use without installing
66
66
  uvx --from hud-python hud --help
@@ -3,7 +3,7 @@ name = "hud-browser-controller"
3
3
  version = "0.1.0"
4
4
  description = "HUD Browser Controller - MCP interface for browser environments"
5
5
  requires-python = ">=3.11,<3.14"
6
- dependencies = [ "pydantic>=2.6,<3", "pydantic-settings>=2.2,<3", "hud-python@git+https://github.com/hud-evals/hud-python@env-cli-improvements", "playwright", "pyautogui", "httpx", "typer", "fastapi>=0.104.1", "uvicorn[standard]>=0.24.0", "python-multipart>=0.0.6",]
6
+ dependencies = [ "pydantic>=2.6,<3", "pydantic-settings>=2.2,<3", "hud-python>=0.4.68", "playwright", "pyautogui", "httpx", "typer", "fastapi>=0.104.1", "uvicorn[standard]>=0.24.0", "python-multipart>=0.0.6",]
7
7
 
8
8
  [build-system]
9
9
  requires = [ "hatchling",]
@@ -3,7 +3,7 @@ name = "hud-om2w"
3
3
  version = "0.1.0"
4
4
  description = "HUD Remote Browser Controller with MCP tools for cloud browser providers"
5
5
  requires-python = ">=3.11,<3.13"
6
- dependencies = [ "hud-python==0.4.61", "pyautogui", "playwright", "httpx", "typer", "google-api-python-client", "google-auth",]
6
+ dependencies = [ "hud-python>=0.4.68", "anthropic>=0.74.0", "pyautogui", "playwright", "httpx", "typer", "google-api-python-client", "google-auth",]
7
7
 
8
8
  [build-system]
9
9
  requires = [ "hatchling",]
@@ -3,13 +3,15 @@ from __future__ import annotations
3
3
  from .base import MCPAgent
4
4
  from .claude import ClaudeAgent
5
5
  from .gemini import GeminiAgent
6
- from .openai import OperatorAgent
7
- from .openai_chat_generic import GenericOpenAIChatAgent
6
+ from .openai import OpenAIAgent
7
+ from .openai_chat import OpenAIChatAgent
8
+ from .operator import OperatorAgent
8
9
 
9
10
  __all__ = [
10
11
  "ClaudeAgent",
11
12
  "GeminiAgent",
12
- "GenericOpenAIChatAgent",
13
13
  "MCPAgent",
14
+ "OpenAIAgent",
15
+ "OpenAIChatAgent",
14
16
  "OperatorAgent",
15
17
  ]
@@ -10,22 +10,32 @@ from abc import ABC, abstractmethod
10
10
  from typing import TYPE_CHECKING, Any, ClassVar, Literal
11
11
 
12
12
  import mcp.types as types
13
+ from pydantic import BaseModel, ConfigDict
13
14
 
14
15
  from hud.agents.utils import log_agent_metadata_to_status, log_task_config_to_current_trace
15
- from hud.types import AgentResponse, MCPToolCall, MCPToolResult, Trace
16
+ from hud.clients.base import AgentMCPClient
17
+ from hud.types import AgentResponse, BaseAgentConfig, MCPToolCall, MCPToolResult, Trace
16
18
  from hud.utils.hud_console import HUDConsole
17
19
  from hud.utils.mcp import MCPConfigPatch, patch_mcp_config, setup_hud_telemetry
18
20
 
19
21
  if TYPE_CHECKING:
20
- from hud.clients.base import AgentMCPClient
21
22
  from hud.datasets import Task
22
23
 
23
- from .misc import ResponseAgent
24
-
25
24
 
26
25
  logger = logging.getLogger(__name__)
27
26
 
28
27
 
28
+ class BaseCreateParams(BaseModel):
29
+ """Runtime parameters for agent creation."""
30
+
31
+ model_config = ConfigDict(arbitrary_types_allowed=True)
32
+
33
+ mcp_client: AgentMCPClient | None = None
34
+ auto_trace: bool = True
35
+ auto_respond: bool = False
36
+ verbose: bool = False
37
+
38
+
29
39
  class MCPAgent(ABC):
30
40
  """
31
41
  Base class for MCP-enabled agents.
@@ -45,80 +55,67 @@ class MCPAgent(ABC):
45
55
  `format_blocks`, and `format_tool_results`.
46
56
  """
47
57
 
48
- metadata: dict[str, Any] | None = None
58
+ metadata: ClassVar[dict[str, Any] | None] = None
49
59
  required_tools: ClassVar[list[str]] = [] # Tools that must be available
60
+ config_cls: ClassVar[type[BaseAgentConfig]] = BaseAgentConfig
50
61
 
51
- def __init__(
52
- self,
53
- mcp_client: AgentMCPClient | None = None,
54
- # Filtering
55
- allowed_tools: list[str] | None = None,
56
- disallowed_tools: list[str] | None = None,
57
- response_tool_name: str | None = None,
58
- # Messages
59
- system_prompt: str | None = None,
60
- append_setup_output: bool = True,
61
- initial_screenshot: bool = True,
62
- # Misc
63
- model_name: str = "mcp-agent",
64
- checkpoint_name: str | None = None,
65
- response_agent: ResponseAgent | None = None,
66
- auto_trace: bool = True,
67
- verbose: bool = False,
68
- ) -> None:
69
- """
70
- Initialize the base MCP agent.
62
+ def __init__(self, params: BaseCreateParams | None = None, **kwargs: Any) -> None:
63
+ if params is None:
64
+ import warnings
71
65
 
72
- Args:
73
- mcp_client: Client for connecting to MCP servers. If None, a client
74
- is auto-created at runtime when `run()` is called with a `Task`
75
- that provides `mcp_config`.
76
- allowed_tools: Names of tools to allow (None means allow all).
77
- disallowed_tools: Names of tools to always exclude.
78
- response_tool_name: Name of the tool to use for response.
79
- system_prompt: System prompt to seed the conversation.
80
- append_setup_output: Whether to append setup tool output to the
81
- first turn's messages.
82
- initial_screenshot: Whether to include an initial screenshot before
83
- the first prompt (when supported by the environment).
84
- model_name: Label used in telemetry/logging to identify the model.
85
- response_agent: Optional automation that can respond to the model's
86
- outputs to keep the loop going (e.g., auto-continue/stop).
87
- auto_trace: If True, automatically creates a trace/span for runs.
88
- verbose: If True, increases logging verbosity for developer UX.
89
- """
66
+ warnings.warn(
67
+ f"Passing kwargs to {self.__class__.__name__}() is deprecated. "
68
+ f"Use {self.__class__.__name__}.create(...) instead.",
69
+ DeprecationWarning,
70
+ stacklevel=2,
71
+ )
72
+ CreateParams = type(
73
+ f"{self.config_cls.__name__}CreateParams",
74
+ (BaseCreateParams, self.config_cls),
75
+ {"__module__": self.config_cls.__module__},
76
+ )
77
+ params = CreateParams(**kwargs)
78
+
79
+ config_kwargs = {
80
+ k: getattr(params, k) for k in self.config_cls.model_fields if hasattr(params, k)
81
+ }
82
+ self.config = self.config_cls(**config_kwargs)
90
83
 
91
- self.mcp_client = mcp_client
92
- self._auto_created_client = False # Track if we created the client
84
+ self.mcp_client = params.mcp_client
85
+ self.model_name: str = getattr(params, "model_name", "MCPAgent")
86
+ self.checkpoint_name: str = getattr(params, "checkpoint_name", "unknown")
87
+ self.auto_respond = params.auto_respond
93
88
 
94
- self.model_name = model_name
95
- self.checkpoint_name = checkpoint_name
96
89
  self.console = HUDConsole(logger=logger)
97
90
 
98
- # Set verbose mode if requested
99
- if verbose:
91
+ if params.verbose:
100
92
  self.console.set_verbose(True)
101
93
 
102
- # User filtering
103
- self.allowed_tools: list[str] | None = allowed_tools
104
- self.disallowed_tools: list[str] | None = disallowed_tools
105
- self._available_tools: list[types.Tool] | None = None
106
-
107
- # Messages
108
- self.system_prompt = system_prompt
109
- self.append_setup_output = append_setup_output
110
- self.initial_screenshot = initial_screenshot
94
+ self.allowed_tools = self.config.allowed_tools
95
+ self.disallowed_tools = self.config.disallowed_tools
96
+ self.system_prompt = self.config.system_prompt
97
+ self.append_setup_output = self.config.append_setup_output
98
+ self.initial_screenshot = self.config.initial_screenshot
99
+ self.response_tool_name = self.config.response_tool_name
111
100
 
112
- # Initialize these here so methods can be called before initialize()
113
- self._tool_map: dict[str, types.Tool] = {} # Simplified: just name to tool
114
- self.response_tool_name = response_tool_name
101
+ self._available_tools: list[types.Tool] | None = None
102
+ self._tool_map: dict[str, types.Tool] = {}
115
103
 
116
104
  # Trace
117
- self._auto_trace = auto_trace
118
- self._auto_trace_cm: Any | None = None # Store auto-created trace context manager
105
+ self._auto_trace = params.auto_trace
106
+ self._auto_trace_cm: Any | None = None
119
107
 
120
- # Response agent to automatically interact with the model
121
- self.response_agent = response_agent
108
+ @classmethod
109
+ def create(cls, **kwargs: Any) -> MCPAgent:
110
+ """
111
+ Factory method to create an agent with typed parameters.
112
+ """
113
+ CreateParams = type(
114
+ f"{cls.config_cls.__name__}CreateParams",
115
+ (BaseCreateParams, cls.config_cls),
116
+ {"__module__": cls.config_cls.__module__},
117
+ )
118
+ return cls(params=CreateParams(**kwargs))
122
119
 
123
120
  async def initialize(self, task: str | Task | None = None) -> None:
124
121
  """Initialize the agent with task-specific configuration."""
@@ -129,7 +126,6 @@ class MCPAgent(ABC):
129
126
  from hud.clients import MCPClient
130
127
 
131
128
  self.mcp_client = MCPClient(mcp_config=task.mcp_config)
132
- self._auto_created_client = True
133
129
  self.console.debug("Auto-created MCPClient from task.mcp_config")
134
130
 
135
131
  # Ensure we have a client
@@ -148,41 +144,41 @@ class MCPAgent(ABC):
148
144
  try:
149
145
  await self.mcp_client.initialize()
150
146
  except Exception as e:
147
+ self.console.error_log(f"Failed to initialize MCP client: {e}")
151
148
  self._handle_connection_error(e)
152
149
 
153
150
  # If task is provided, apply agent_config and add lifecycle tools
154
151
  if isinstance(task, Task) and task.agent_config:
155
- if task.agent_config.get("system_prompt"):
152
+ agent_cfg = task.agent_config
153
+ if agent_cfg.system_prompt:
156
154
  if self.system_prompt is None:
157
- self.system_prompt = task.agent_config["system_prompt"]
155
+ self.system_prompt = agent_cfg.system_prompt
158
156
  else:
159
- self.system_prompt += "\n\n" + task.agent_config["system_prompt"]
160
- if "append_setup_output" in task.agent_config:
161
- self.append_setup_output = task.agent_config["append_setup_output"]
162
- if "initial_screenshot" in task.agent_config:
163
- self.initial_screenshot = task.agent_config["initial_screenshot"]
164
- if "allowed_tools" in task.agent_config:
157
+ self.system_prompt += "\n\n" + agent_cfg.system_prompt
158
+ if "append_setup_output" in agent_cfg.model_fields_set:
159
+ self.append_setup_output = agent_cfg.append_setup_output
160
+ if "initial_screenshot" in agent_cfg.model_fields_set:
161
+ self.initial_screenshot = agent_cfg.initial_screenshot
162
+ if agent_cfg.allowed_tools is not None:
165
163
  # If allowed_tools has already been set, we take the intersection of the two
166
164
  # If the list had been empty, we were allowing all tools, so we overwrite this
167
165
  if isinstance(self.allowed_tools, list) and len(self.allowed_tools) > 0:
168
166
  # If task allows "*", keep CLI's allowed_tools unchanged
169
- if "*" not in task.agent_config["allowed_tools"]:
167
+ if "*" not in agent_cfg.allowed_tools:
170
168
  self.allowed_tools = [
171
- tool
172
- for tool in self.allowed_tools
173
- if tool in task.agent_config["allowed_tools"]
169
+ tool for tool in self.allowed_tools if tool in agent_cfg.allowed_tools
174
170
  ]
175
171
  # else: task allows all tools, so CLI's allowed_tools takes precedence
176
172
  else: # If allowed_tools is None, we overwrite it
177
- self.allowed_tools = task.agent_config["allowed_tools"]
178
- if "disallowed_tools" in task.agent_config:
173
+ self.allowed_tools = agent_cfg.allowed_tools
174
+ if agent_cfg.disallowed_tools is not None:
179
175
  # If disallowed_tools has already been set, we take the union of the two
180
176
  if isinstance(self.disallowed_tools, list):
181
- self.disallowed_tools.extend(task.agent_config["disallowed_tools"])
177
+ self.disallowed_tools.extend(agent_cfg.disallowed_tools)
182
178
  else: # If disallowed_tools is None, we overwrite it
183
- self.disallowed_tools = task.agent_config["disallowed_tools"]
184
- if "response_tool_name" in task.agent_config:
185
- self.response_tool_name = task.agent_config["response_tool_name"]
179
+ self.disallowed_tools = agent_cfg.disallowed_tools
180
+ if agent_cfg.response_tool_name is not None:
181
+ self.response_tool_name = agent_cfg.response_tool_name
186
182
 
187
183
  all_tools = await self.mcp_client.list_tools()
188
184
  self._available_tools = []
@@ -201,6 +197,15 @@ class MCPAgent(ABC):
201
197
  continue
202
198
  self._available_tools.append(tool)
203
199
 
200
+ # Validate required tools are present
201
+ available_tool_names = {t.name for t in self._available_tools}
202
+ missing_tools = [tool for tool in self.required_tools if tool not in available_tool_names]
203
+ if missing_tools:
204
+ raise ValueError(
205
+ f"Required tools are missing: {missing_tools}. "
206
+ f"Available tools: {sorted(available_tool_names)}"
207
+ )
208
+
204
209
  self.console.info(
205
210
  f"Agent initialized with {len(self.get_available_tools())} tools: {', '.join([t.name for t in self.get_available_tools()])}" # noqa: E501
206
211
  )
@@ -290,6 +295,10 @@ class MCPAgent(ABC):
290
295
  self.console.progress_log(f"Setting up tool phase: {task.setup_tool}")
291
296
  results = await self.call_tools(task.setup_tool)
292
297
  if any(result.isError for result in results):
298
+ for result in results:
299
+ if result.isError:
300
+ self.console.error_log(f"Error in setup tool: {result}")
301
+
293
302
  return Trace(
294
303
  reward=0.0,
295
304
  done=True,
@@ -389,6 +398,8 @@ class MCPAgent(ABC):
389
398
  final_response = None
390
399
  error = None
391
400
 
401
+ messages: list[Any] = []
402
+
392
403
  try:
393
404
  # Start with system messages
394
405
  messages = await self.get_system_messages()
@@ -413,15 +424,16 @@ class MCPAgent(ABC):
413
424
 
414
425
  # Check if we should stop
415
426
  if response.done or not response.tool_calls:
416
- # Optional external ResponseAgent to decide whether to stop
417
- decision = "STOP"
418
- if self.response_agent is not None and response.content:
427
+ # Use auto_respond to decide whether to stop
428
+ decision: Literal["STOP", "CONTINUE"] = "STOP"
429
+ if self.auto_respond and response.content:
419
430
  try:
420
- decision = await self.response_agent.determine_response(
421
- response.content
422
- )
431
+ from hud.agents.misc import ResponseAgent
432
+
433
+ response_agent = ResponseAgent()
434
+ decision = await response_agent.determine_response(response.content)
423
435
  except Exception as e:
424
- self.console.warning_log(f"ResponseAgent failed: {e}")
436
+ self.console.warning_log(f"Auto-respond failed: {e}")
425
437
  if decision == "STOP":
426
438
  # Try to submit response through lifecycle tool
427
439
  await self._maybe_submit_response(response, messages)
@@ -436,11 +448,7 @@ class MCPAgent(ABC):
436
448
 
437
449
  # 2. Execute tools
438
450
  tool_calls = response.tool_calls
439
- for tool_call in tool_calls:
440
- self.console.info_log(f"{tool_call}")
441
451
  tool_results = await self.call_tools(tool_calls)
442
- for tool_result in tool_results:
443
- self.console.info_log(f"{tool_result}")
444
452
 
445
453
  # 3. Format tool results and add to messages
446
454
  tool_messages = await self.format_tool_results(tool_calls, tool_results)
@@ -699,8 +707,8 @@ class MCPAgent(ABC):
699
707
  finally:
700
708
  self._auto_trace_cm = None
701
709
 
702
- # Clean up auto-created client
703
- if self._auto_created_client and self.mcp_client:
710
+ # Always clean up the client
711
+ if self.mcp_client:
704
712
  try:
705
713
  await self.mcp_client.shutdown()
706
714
  self.console.debug("Closed auto-created MCPClient")
@@ -708,7 +716,6 @@ class MCPAgent(ABC):
708
716
  self.console.warning_log(f"Failed to close auto-created client: {e}")
709
717
  finally:
710
718
  self.mcp_client = None
711
- self._auto_created_client = False
712
719
 
713
720
  def _is_connection_error(self, e: Exception) -> bool:
714
721
  """Check if an exception is a connection error."""