hud-python 0.4.33__tar.gz → 0.4.35__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of hud-python might be problematic. Click here for more details.

Files changed (224) hide show
  1. {hud_python-0.4.33 → hud_python-0.4.35}/.gitignore +3 -1
  2. {hud_python-0.4.33 → hud_python-0.4.35}/PKG-INFO +1 -1
  3. {hud_python-0.4.33 → hud_python-0.4.35}/hud/agents/claude.py +9 -1
  4. {hud_python-0.4.33 → hud_python-0.4.35}/hud/agents/misc/response_agent.py +25 -9
  5. {hud_python-0.4.33 → hud_python-0.4.35}/hud/agents/openai.py +9 -1
  6. {hud_python-0.4.33 → hud_python-0.4.35}/hud/cli/__init__.py +4 -1
  7. {hud_python-0.4.33 → hud_python-0.4.35}/hud/cli/build.py +41 -26
  8. {hud_python-0.4.33 → hud_python-0.4.35}/hud/cli/eval.py +1 -1
  9. {hud_python-0.4.33 → hud_python-0.4.35}/hud/cli/flows/tasks.py +2 -1
  10. {hud_python-0.4.33 → hud_python-0.4.35}/hud/datasets/runner.py +1 -1
  11. {hud_python-0.4.33 → hud_python-0.4.35}/hud/rl/actor.py +7 -5
  12. {hud_python-0.4.33 → hud_python-0.4.35}/hud/rl/tests/test_learner.py +20 -5
  13. {hud_python-0.4.33 → hud_python-0.4.35}/hud/utils/tests/test_version.py +1 -1
  14. {hud_python-0.4.33 → hud_python-0.4.35}/hud/utils/tool_shorthand.py +7 -4
  15. {hud_python-0.4.33 → hud_python-0.4.35}/hud/version.py +1 -1
  16. {hud_python-0.4.33 → hud_python-0.4.35}/pyproject.toml +1 -1
  17. {hud_python-0.4.33 → hud_python-0.4.35}/LICENSE +0 -0
  18. {hud_python-0.4.33 → hud_python-0.4.35}/README.md +0 -0
  19. {hud_python-0.4.33 → hud_python-0.4.35}/environments/README.md +0 -0
  20. {hud_python-0.4.33 → hud_python-0.4.35}/environments/browser/README.md +0 -0
  21. {hud_python-0.4.33 → hud_python-0.4.35}/environments/browser/apps/2048/README.md +0 -0
  22. {hud_python-0.4.33 → hud_python-0.4.35}/environments/browser/apps/2048/backend/pyproject.toml +0 -0
  23. {hud_python-0.4.33 → hud_python-0.4.35}/environments/browser/apps/README.md +0 -0
  24. {hud_python-0.4.33 → hud_python-0.4.35}/environments/browser/apps/todo/README.md +0 -0
  25. {hud_python-0.4.33 → hud_python-0.4.35}/environments/browser/apps/todo/backend/pyproject.toml +0 -0
  26. {hud_python-0.4.33 → hud_python-0.4.35}/environments/browser/pyproject.toml +0 -0
  27. {hud_python-0.4.33 → hud_python-0.4.35}/environments/remote_browser/README.md +0 -0
  28. {hud_python-0.4.33 → hud_python-0.4.35}/environments/remote_browser/pyproject.toml +0 -0
  29. {hud_python-0.4.33 → hud_python-0.4.35}/environments/remote_browser/src/hud_controller/providers/README.md +0 -0
  30. {hud_python-0.4.33 → hud_python-0.4.35}/environments/text_2048/README.md +0 -0
  31. {hud_python-0.4.33 → hud_python-0.4.35}/environments/text_2048/pyproject.toml +0 -0
  32. {hud_python-0.4.33 → hud_python-0.4.35}/examples/README.md +0 -0
  33. {hud_python-0.4.33 → hud_python-0.4.35}/hud/__init__.py +0 -0
  34. {hud_python-0.4.33 → hud_python-0.4.35}/hud/__main__.py +0 -0
  35. {hud_python-0.4.33 → hud_python-0.4.35}/hud/agents/__init__.py +0 -0
  36. {hud_python-0.4.33 → hud_python-0.4.35}/hud/agents/base.py +0 -0
  37. {hud_python-0.4.33 → hud_python-0.4.35}/hud/agents/grounded_openai.py +0 -0
  38. {hud_python-0.4.33 → hud_python-0.4.35}/hud/agents/langchain.py +0 -0
  39. {hud_python-0.4.33 → hud_python-0.4.35}/hud/agents/misc/__init__.py +0 -0
  40. {hud_python-0.4.33 → hud_python-0.4.35}/hud/agents/openai_chat_generic.py +0 -0
  41. {hud_python-0.4.33 → hud_python-0.4.35}/hud/agents/tests/__init__.py +0 -0
  42. {hud_python-0.4.33 → hud_python-0.4.35}/hud/agents/tests/test_base.py +0 -0
  43. {hud_python-0.4.33 → hud_python-0.4.35}/hud/agents/tests/test_claude.py +0 -0
  44. {hud_python-0.4.33 → hud_python-0.4.35}/hud/agents/tests/test_client.py +0 -0
  45. {hud_python-0.4.33 → hud_python-0.4.35}/hud/agents/tests/test_grounded_openai_agent.py +0 -0
  46. {hud_python-0.4.33 → hud_python-0.4.35}/hud/agents/tests/test_openai.py +0 -0
  47. {hud_python-0.4.33 → hud_python-0.4.35}/hud/cli/__main__.py +0 -0
  48. {hud_python-0.4.33 → hud_python-0.4.35}/hud/cli/analyze.py +0 -0
  49. {hud_python-0.4.33 → hud_python-0.4.35}/hud/cli/clone.py +0 -0
  50. {hud_python-0.4.33 → hud_python-0.4.35}/hud/cli/debug.py +0 -0
  51. {hud_python-0.4.33 → hud_python-0.4.35}/hud/cli/dev.py +0 -0
  52. {hud_python-0.4.33 → hud_python-0.4.35}/hud/cli/flows/__init__.py +0 -0
  53. {hud_python-0.4.33 → hud_python-0.4.35}/hud/cli/get.py +0 -0
  54. {hud_python-0.4.33 → hud_python-0.4.35}/hud/cli/init.py +0 -0
  55. {hud_python-0.4.33 → hud_python-0.4.35}/hud/cli/list_func.py +0 -0
  56. {hud_python-0.4.33 → hud_python-0.4.35}/hud/cli/pull.py +0 -0
  57. {hud_python-0.4.33 → hud_python-0.4.35}/hud/cli/push.py +0 -0
  58. {hud_python-0.4.33 → hud_python-0.4.35}/hud/cli/remove.py +0 -0
  59. {hud_python-0.4.33 → hud_python-0.4.35}/hud/cli/rl/__init__.py +0 -0
  60. {hud_python-0.4.33 → hud_python-0.4.35}/hud/cli/rl/config.py +0 -0
  61. {hud_python-0.4.33 → hud_python-0.4.35}/hud/cli/rl/display.py +0 -0
  62. {hud_python-0.4.33 → hud_python-0.4.35}/hud/cli/rl/gpu.py +0 -0
  63. {hud_python-0.4.33 → hud_python-0.4.35}/hud/cli/rl/gpu_utils.py +0 -0
  64. {hud_python-0.4.33 → hud_python-0.4.35}/hud/cli/rl/local_runner.py +0 -0
  65. {hud_python-0.4.33 → hud_python-0.4.35}/hud/cli/rl/presets.py +0 -0
  66. {hud_python-0.4.33 → hud_python-0.4.35}/hud/cli/rl/remote_runner.py +0 -0
  67. {hud_python-0.4.33 → hud_python-0.4.35}/hud/cli/rl/rl_api.py +0 -0
  68. {hud_python-0.4.33 → hud_python-0.4.35}/hud/cli/rl/vllm.py +0 -0
  69. {hud_python-0.4.33 → hud_python-0.4.35}/hud/cli/tests/__init__.py +0 -0
  70. {hud_python-0.4.33 → hud_python-0.4.35}/hud/cli/tests/test_analyze.py +0 -0
  71. {hud_python-0.4.33 → hud_python-0.4.35}/hud/cli/tests/test_analyze_metadata.py +0 -0
  72. {hud_python-0.4.33 → hud_python-0.4.35}/hud/cli/tests/test_build.py +0 -0
  73. {hud_python-0.4.33 → hud_python-0.4.35}/hud/cli/tests/test_cli_init.py +0 -0
  74. {hud_python-0.4.33 → hud_python-0.4.35}/hud/cli/tests/test_cli_main.py +0 -0
  75. {hud_python-0.4.33 → hud_python-0.4.35}/hud/cli/tests/test_clone.py +0 -0
  76. {hud_python-0.4.33 → hud_python-0.4.35}/hud/cli/tests/test_cursor.py +0 -0
  77. {hud_python-0.4.33 → hud_python-0.4.35}/hud/cli/tests/test_debug.py +0 -0
  78. {hud_python-0.4.33 → hud_python-0.4.35}/hud/cli/tests/test_list_func.py +0 -0
  79. {hud_python-0.4.33 → hud_python-0.4.35}/hud/cli/tests/test_main_module.py +0 -0
  80. {hud_python-0.4.33 → hud_python-0.4.35}/hud/cli/tests/test_mcp_server.py +0 -0
  81. {hud_python-0.4.33 → hud_python-0.4.35}/hud/cli/tests/test_pull.py +0 -0
  82. {hud_python-0.4.33 → hud_python-0.4.35}/hud/cli/tests/test_push.py +0 -0
  83. {hud_python-0.4.33 → hud_python-0.4.35}/hud/cli/tests/test_registry.py +0 -0
  84. {hud_python-0.4.33 → hud_python-0.4.35}/hud/cli/tests/test_utils.py +0 -0
  85. {hud_python-0.4.33 → hud_python-0.4.35}/hud/cli/utils/__init__.py +0 -0
  86. {hud_python-0.4.33 → hud_python-0.4.35}/hud/cli/utils/cursor.py +0 -0
  87. {hud_python-0.4.33 → hud_python-0.4.35}/hud/cli/utils/docker.py +0 -0
  88. {hud_python-0.4.33 → hud_python-0.4.35}/hud/cli/utils/environment.py +0 -0
  89. {hud_python-0.4.33 → hud_python-0.4.35}/hud/cli/utils/interactive.py +0 -0
  90. {hud_python-0.4.33 → hud_python-0.4.35}/hud/cli/utils/logging.py +0 -0
  91. {hud_python-0.4.33 → hud_python-0.4.35}/hud/cli/utils/metadata.py +0 -0
  92. {hud_python-0.4.33 → hud_python-0.4.35}/hud/cli/utils/registry.py +0 -0
  93. {hud_python-0.4.33 → hud_python-0.4.35}/hud/cli/utils/remote_runner.py +0 -0
  94. {hud_python-0.4.33 → hud_python-0.4.35}/hud/cli/utils/runner.py +0 -0
  95. {hud_python-0.4.33 → hud_python-0.4.35}/hud/cli/utils/server.py +0 -0
  96. {hud_python-0.4.33 → hud_python-0.4.35}/hud/cli/utils/tasks.py +0 -0
  97. {hud_python-0.4.33 → hud_python-0.4.35}/hud/clients/README.md +0 -0
  98. {hud_python-0.4.33 → hud_python-0.4.35}/hud/clients/__init__.py +0 -0
  99. {hud_python-0.4.33 → hud_python-0.4.35}/hud/clients/base.py +0 -0
  100. {hud_python-0.4.33 → hud_python-0.4.35}/hud/clients/fastmcp.py +0 -0
  101. {hud_python-0.4.33 → hud_python-0.4.35}/hud/clients/mcp_use.py +0 -0
  102. {hud_python-0.4.33 → hud_python-0.4.35}/hud/clients/tests/__init__.py +0 -0
  103. {hud_python-0.4.33 → hud_python-0.4.35}/hud/clients/tests/test_client_integration.py +0 -0
  104. {hud_python-0.4.33 → hud_python-0.4.35}/hud/clients/tests/test_fastmcp.py +0 -0
  105. {hud_python-0.4.33 → hud_python-0.4.35}/hud/clients/tests/test_mcp_use_retry.py +0 -0
  106. {hud_python-0.4.33 → hud_python-0.4.35}/hud/clients/tests/test_protocol.py +0 -0
  107. {hud_python-0.4.33 → hud_python-0.4.35}/hud/clients/utils/__init__.py +0 -0
  108. {hud_python-0.4.33 → hud_python-0.4.35}/hud/clients/utils/mcp_use_retry.py +0 -0
  109. {hud_python-0.4.33 → hud_python-0.4.35}/hud/clients/utils/retry.py +0 -0
  110. {hud_python-0.4.33 → hud_python-0.4.35}/hud/clients/utils/retry_transport.py +0 -0
  111. {hud_python-0.4.33 → hud_python-0.4.35}/hud/datasets/__init__.py +0 -0
  112. {hud_python-0.4.33 → hud_python-0.4.35}/hud/datasets/parallel.py +0 -0
  113. {hud_python-0.4.33 → hud_python-0.4.35}/hud/datasets/utils.py +0 -0
  114. {hud_python-0.4.33 → hud_python-0.4.35}/hud/misc/__init__.py +0 -0
  115. {hud_python-0.4.33 → hud_python-0.4.35}/hud/misc/claude_plays_pokemon.py +0 -0
  116. {hud_python-0.4.33 → hud_python-0.4.35}/hud/native/__init__.py +0 -0
  117. {hud_python-0.4.33 → hud_python-0.4.35}/hud/native/comparator.py +0 -0
  118. {hud_python-0.4.33 → hud_python-0.4.35}/hud/native/tests/__init__.py +0 -0
  119. {hud_python-0.4.33 → hud_python-0.4.35}/hud/native/tests/test_comparator.py +0 -0
  120. {hud_python-0.4.33 → hud_python-0.4.35}/hud/native/tests/test_native_init.py +0 -0
  121. {hud_python-0.4.33 → hud_python-0.4.35}/hud/otel/__init__.py +0 -0
  122. {hud_python-0.4.33 → hud_python-0.4.35}/hud/otel/collector.py +0 -0
  123. {hud_python-0.4.33 → hud_python-0.4.35}/hud/otel/config.py +0 -0
  124. {hud_python-0.4.33 → hud_python-0.4.35}/hud/otel/context.py +0 -0
  125. {hud_python-0.4.33 → hud_python-0.4.35}/hud/otel/exporters.py +0 -0
  126. {hud_python-0.4.33 → hud_python-0.4.35}/hud/otel/instrumentation.py +0 -0
  127. {hud_python-0.4.33 → hud_python-0.4.35}/hud/otel/processors.py +0 -0
  128. {hud_python-0.4.33 → hud_python-0.4.35}/hud/otel/tests/__init__.py +0 -0
  129. {hud_python-0.4.33 → hud_python-0.4.35}/hud/otel/tests/test_processors.py +0 -0
  130. {hud_python-0.4.33 → hud_python-0.4.35}/hud/py.typed +0 -0
  131. {hud_python-0.4.33 → hud_python-0.4.35}/hud/rl/README.md +0 -0
  132. {hud_python-0.4.33 → hud_python-0.4.35}/hud/rl/__init__.py +0 -0
  133. {hud_python-0.4.33 → hud_python-0.4.35}/hud/rl/buffer.py +0 -0
  134. {hud_python-0.4.33 → hud_python-0.4.35}/hud/rl/chat_template.jinja +0 -0
  135. {hud_python-0.4.33 → hud_python-0.4.35}/hud/rl/config.py +0 -0
  136. {hud_python-0.4.33 → hud_python-0.4.35}/hud/rl/distributed.py +0 -0
  137. {hud_python-0.4.33 → hud_python-0.4.35}/hud/rl/learner.py +0 -0
  138. {hud_python-0.4.33 → hud_python-0.4.35}/hud/rl/tests/__init__.py +0 -0
  139. {hud_python-0.4.33 → hud_python-0.4.35}/hud/rl/train.py +0 -0
  140. {hud_python-0.4.33 → hud_python-0.4.35}/hud/rl/types.py +0 -0
  141. {hud_python-0.4.33 → hud_python-0.4.35}/hud/rl/utils/start_vllm_server.sh +0 -0
  142. {hud_python-0.4.33 → hud_python-0.4.35}/hud/rl/utils.py +0 -0
  143. {hud_python-0.4.33 → hud_python-0.4.35}/hud/rl/vllm_adapter.py +0 -0
  144. {hud_python-0.4.33 → hud_python-0.4.35}/hud/samples/__init__.py +0 -0
  145. {hud_python-0.4.33 → hud_python-0.4.35}/hud/samples/browser.py +0 -0
  146. {hud_python-0.4.33 → hud_python-0.4.35}/hud/server/__init__.py +0 -0
  147. {hud_python-0.4.33 → hud_python-0.4.35}/hud/server/context.py +0 -0
  148. {hud_python-0.4.33 → hud_python-0.4.35}/hud/server/helper/__init__.py +0 -0
  149. {hud_python-0.4.33 → hud_python-0.4.35}/hud/server/low_level.py +0 -0
  150. {hud_python-0.4.33 → hud_python-0.4.35}/hud/server/server.py +0 -0
  151. {hud_python-0.4.33 → hud_python-0.4.35}/hud/server/tests/__init__.py +0 -0
  152. {hud_python-0.4.33 → hud_python-0.4.35}/hud/settings.py +0 -0
  153. {hud_python-0.4.33 → hud_python-0.4.35}/hud/shared/__init__.py +0 -0
  154. {hud_python-0.4.33 → hud_python-0.4.35}/hud/shared/exceptions.py +0 -0
  155. {hud_python-0.4.33 → hud_python-0.4.35}/hud/shared/hints.py +0 -0
  156. {hud_python-0.4.33 → hud_python-0.4.35}/hud/shared/requests.py +0 -0
  157. {hud_python-0.4.33 → hud_python-0.4.35}/hud/shared/tests/__init__.py +0 -0
  158. {hud_python-0.4.33 → hud_python-0.4.35}/hud/shared/tests/test_exceptions.py +0 -0
  159. {hud_python-0.4.33 → hud_python-0.4.35}/hud/shared/tests/test_requests.py +0 -0
  160. {hud_python-0.4.33 → hud_python-0.4.35}/hud/telemetry/__init__.py +0 -0
  161. {hud_python-0.4.33 → hud_python-0.4.35}/hud/telemetry/instrument.py +0 -0
  162. {hud_python-0.4.33 → hud_python-0.4.35}/hud/telemetry/job.py +0 -0
  163. {hud_python-0.4.33 → hud_python-0.4.35}/hud/telemetry/replay.py +0 -0
  164. {hud_python-0.4.33 → hud_python-0.4.35}/hud/telemetry/tests/__init__.py +0 -0
  165. {hud_python-0.4.33 → hud_python-0.4.35}/hud/telemetry/tests/test_replay.py +0 -0
  166. {hud_python-0.4.33 → hud_python-0.4.35}/hud/telemetry/tests/test_trace.py +0 -0
  167. {hud_python-0.4.33 → hud_python-0.4.35}/hud/telemetry/trace.py +0 -0
  168. {hud_python-0.4.33 → hud_python-0.4.35}/hud/tools/__init__.py +0 -0
  169. {hud_python-0.4.33 → hud_python-0.4.35}/hud/tools/base.py +0 -0
  170. {hud_python-0.4.33 → hud_python-0.4.35}/hud/tools/bash.py +0 -0
  171. {hud_python-0.4.33 → hud_python-0.4.35}/hud/tools/computer/__init__.py +0 -0
  172. {hud_python-0.4.33 → hud_python-0.4.35}/hud/tools/computer/anthropic.py +0 -0
  173. {hud_python-0.4.33 → hud_python-0.4.35}/hud/tools/computer/hud.py +0 -0
  174. {hud_python-0.4.33 → hud_python-0.4.35}/hud/tools/computer/openai.py +0 -0
  175. {hud_python-0.4.33 → hud_python-0.4.35}/hud/tools/computer/settings.py +0 -0
  176. {hud_python-0.4.33 → hud_python-0.4.35}/hud/tools/edit.py +0 -0
  177. {hud_python-0.4.33 → hud_python-0.4.35}/hud/tools/executors/__init__.py +0 -0
  178. {hud_python-0.4.33 → hud_python-0.4.35}/hud/tools/executors/base.py +0 -0
  179. {hud_python-0.4.33 → hud_python-0.4.35}/hud/tools/executors/pyautogui.py +0 -0
  180. {hud_python-0.4.33 → hud_python-0.4.35}/hud/tools/executors/tests/__init__.py +0 -0
  181. {hud_python-0.4.33 → hud_python-0.4.35}/hud/tools/executors/tests/test_base_executor.py +0 -0
  182. {hud_python-0.4.33 → hud_python-0.4.35}/hud/tools/executors/tests/test_pyautogui_executor.py +0 -0
  183. {hud_python-0.4.33 → hud_python-0.4.35}/hud/tools/executors/xdo.py +0 -0
  184. {hud_python-0.4.33 → hud_python-0.4.35}/hud/tools/grounding/__init__.py +0 -0
  185. {hud_python-0.4.33 → hud_python-0.4.35}/hud/tools/grounding/config.py +0 -0
  186. {hud_python-0.4.33 → hud_python-0.4.35}/hud/tools/grounding/grounded_tool.py +0 -0
  187. {hud_python-0.4.33 → hud_python-0.4.35}/hud/tools/grounding/grounder.py +0 -0
  188. {hud_python-0.4.33 → hud_python-0.4.35}/hud/tools/grounding/tests/__init__.py +0 -0
  189. {hud_python-0.4.33 → hud_python-0.4.35}/hud/tools/grounding/tests/test_grounded_tool.py +0 -0
  190. {hud_python-0.4.33 → hud_python-0.4.35}/hud/tools/playwright.py +0 -0
  191. {hud_python-0.4.33 → hud_python-0.4.35}/hud/tools/response.py +0 -0
  192. {hud_python-0.4.33 → hud_python-0.4.35}/hud/tools/submit.py +0 -0
  193. {hud_python-0.4.33 → hud_python-0.4.35}/hud/tools/tests/__init__.py +0 -0
  194. {hud_python-0.4.33 → hud_python-0.4.35}/hud/tools/tests/test_base.py +0 -0
  195. {hud_python-0.4.33 → hud_python-0.4.35}/hud/tools/tests/test_bash.py +0 -0
  196. {hud_python-0.4.33 → hud_python-0.4.35}/hud/tools/tests/test_bash_extended.py +0 -0
  197. {hud_python-0.4.33 → hud_python-0.4.35}/hud/tools/tests/test_computer.py +0 -0
  198. {hud_python-0.4.33 → hud_python-0.4.35}/hud/tools/tests/test_computer_actions.py +0 -0
  199. {hud_python-0.4.33 → hud_python-0.4.35}/hud/tools/tests/test_edit.py +0 -0
  200. {hud_python-0.4.33 → hud_python-0.4.35}/hud/tools/tests/test_init.py +0 -0
  201. {hud_python-0.4.33 → hud_python-0.4.35}/hud/tools/tests/test_playwright_tool.py +0 -0
  202. {hud_python-0.4.33 → hud_python-0.4.35}/hud/tools/tests/test_response.py +0 -0
  203. {hud_python-0.4.33 → hud_python-0.4.35}/hud/tools/tests/test_tools.py +0 -0
  204. {hud_python-0.4.33 → hud_python-0.4.35}/hud/tools/tests/test_tools_init.py +0 -0
  205. {hud_python-0.4.33 → hud_python-0.4.35}/hud/tools/tests/test_utils.py +0 -0
  206. {hud_python-0.4.33 → hud_python-0.4.35}/hud/tools/types.py +0 -0
  207. {hud_python-0.4.33 → hud_python-0.4.35}/hud/tools/utils.py +0 -0
  208. {hud_python-0.4.33 → hud_python-0.4.35}/hud/types.py +0 -0
  209. {hud_python-0.4.33 → hud_python-0.4.35}/hud/utils/__init__.py +0 -0
  210. {hud_python-0.4.33 → hud_python-0.4.35}/hud/utils/agent_factories.py +0 -0
  211. {hud_python-0.4.33 → hud_python-0.4.35}/hud/utils/async_utils.py +0 -0
  212. {hud_python-0.4.33 → hud_python-0.4.35}/hud/utils/group_eval.py +0 -0
  213. {hud_python-0.4.33 → hud_python-0.4.35}/hud/utils/hud_console.py +0 -0
  214. {hud_python-0.4.33 → hud_python-0.4.35}/hud/utils/mcp.py +0 -0
  215. {hud_python-0.4.33 → hud_python-0.4.35}/hud/utils/pretty_errors.py +0 -0
  216. {hud_python-0.4.33 → hud_python-0.4.35}/hud/utils/progress.py +0 -0
  217. {hud_python-0.4.33 → hud_python-0.4.35}/hud/utils/tasks.py +0 -0
  218. {hud_python-0.4.33 → hud_python-0.4.35}/hud/utils/telemetry.py +0 -0
  219. {hud_python-0.4.33 → hud_python-0.4.35}/hud/utils/tests/__init__.py +0 -0
  220. {hud_python-0.4.33 → hud_python-0.4.35}/hud/utils/tests/test_async_utils.py +0 -0
  221. {hud_python-0.4.33 → hud_python-0.4.35}/hud/utils/tests/test_init.py +0 -0
  222. {hud_python-0.4.33 → hud_python-0.4.35}/hud/utils/tests/test_mcp.py +0 -0
  223. {hud_python-0.4.33 → hud_python-0.4.35}/hud/utils/tests/test_progress.py +0 -0
  224. {hud_python-0.4.33 → hud_python-0.4.35}/hud/utils/tests/test_telemetry.py +0 -0
@@ -50,4 +50,6 @@ test/
50
50
  /checkpoints/
51
51
  /checkpoints_test/
52
52
  hud/rl/checkpoints/
53
- hud/rl/checkpoints_test/
53
+ hud/rl/checkpoints_test/
54
+
55
+ .ck/
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: hud-python
3
- Version: 0.4.33
3
+ Version: 0.4.35
4
4
  Summary: SDK for the HUD platform.
5
5
  Project-URL: Homepage, https://github.com/hud-evals/hud-python
6
6
  Project-URL: Bug Tracker, https://github.com/hud-evals/hud-python/issues
@@ -6,7 +6,7 @@ import copy
6
6
  import logging
7
7
  from typing import TYPE_CHECKING, Any, ClassVar, cast
8
8
 
9
- from anthropic import AsyncAnthropic, BadRequestError
9
+ from anthropic import Anthropic, AsyncAnthropic, BadRequestError
10
10
  from anthropic.types.beta import BetaContentBlockParam, BetaImageBlockParam, BetaTextBlockParam
11
11
 
12
12
  import hud
@@ -54,6 +54,7 @@ class ClaudeAgent(MCPAgent):
54
54
  model: str = "claude-sonnet-4-20250514",
55
55
  max_tokens: int = 4096,
56
56
  use_computer_beta: bool = True,
57
+ validate_api_key: bool = True,
57
58
  **kwargs: Any,
58
59
  ) -> None:
59
60
  """
@@ -75,6 +76,13 @@ class ClaudeAgent(MCPAgent):
75
76
  raise ValueError("Anthropic API key not found. Set ANTHROPIC_API_KEY.")
76
77
  model_client = AsyncAnthropic(api_key=api_key)
77
78
 
79
+ # validate api key if requested
80
+ if validate_api_key:
81
+ try:
82
+ Anthropic(api_key=model_client.api_key).models.list()
83
+ except Exception as e:
84
+ raise ValueError(f"Anthropic API key is invalid: {e}") from e
85
+
78
86
  self.anthropic_client = model_client
79
87
  self.model = model
80
88
  self.max_tokens = max_tokens
@@ -16,7 +16,17 @@ class ResponseAgent:
16
16
  based on the agent's final response message.
17
17
  """
18
18
 
19
- def __init__(self, api_key: str | None = None, model: str = "gpt-4o") -> None:
19
+ def __init__(
20
+ self, api_key: str | None = None, model: str = "gpt-4o", system_prompt: str | None = None
21
+ ) -> None:
22
+ """
23
+ Initialize the ResponseAgent.
24
+
25
+ Args:
26
+ api_key: The API key to use for the OpenAI client
27
+ model: The model to use for the OpenAI client (default: "gpt-4o")
28
+ system_prompt: The system prompt to use for the OpenAI client
29
+ """
20
30
  self.api_key = api_key or settings.openai_api_key or os.environ.get("OPENAI_API_KEY")
21
31
  if not self.api_key:
22
32
  raise ValueError(
@@ -26,23 +36,29 @@ class ResponseAgent:
26
36
  self.client = AsyncOpenAI(api_key=self.api_key)
27
37
  self.model = model
28
38
 
29
- self.system_prompt = """
39
+ self.system_prompt = (
40
+ system_prompt
41
+ or """
30
42
  You are an assistant that helps determine the appropriate response to an agent's message.
31
43
 
32
44
  You will receive messages from an agent that is performing tasks for a user.
33
45
  Your job is to analyze these messages and respond with one of the following:
34
46
 
35
- - STOP: If the agent indicates it has successfully completed a task, even if phrased as a question
36
- like "I have entered the right values into this form. Would you like me to do anything else?"
37
- or "Here is the website. Is there any other information you need?" or if the agent has
38
- strongly determined it wants to stop the task.
39
-
47
+ - STOP: If the agent indicates it has successfully completed a task or is stuck,
48
+ struggling or says it cannot complete the task, even if phrased as a question
49
+ like "I have entered the right values into this form. Would you like me to do
50
+ anything else?" or "Here is the website. Is there any other information you
51
+ need?" or if the agent has strongly determined it wants to stop the task like
52
+ "The task is infeasible. Can I help you with something else?"
53
+
40
54
  - CONTINUE: If the agent is asking for clarification before proceeding with a task
41
55
  like "I'm about to clear cookies from this website. Would you like me to proceed?"
42
- or "I've entered the right values into this form. Would you like me to continue with the rest of the task?"
56
+ or "I've entered the right values into this form. Would you like me to continue
57
+ with the rest of the task?"
43
58
 
44
59
  Respond ONLY with one of these two options.
45
- """ # noqa: E501
60
+ """
61
+ )
46
62
 
47
63
  async def determine_response(self, agent_message: str) -> ResponseType:
48
64
  """
@@ -6,7 +6,7 @@ import logging
6
6
  from typing import Any, ClassVar, Literal
7
7
 
8
8
  import mcp.types as types
9
- from openai import AsyncOpenAI
9
+ from openai import AsyncOpenAI, OpenAI
10
10
  from openai.types.responses import (
11
11
  ResponseComputerToolCall,
12
12
  ResponseInputMessageContentListParam,
@@ -45,6 +45,7 @@ class OperatorAgent(MCPAgent):
45
45
  model_client: AsyncOpenAI | None = None,
46
46
  model: str = "computer-use-preview",
47
47
  environment: Literal["windows", "mac", "linux", "browser"] = "linux",
48
+ validate_api_key: bool = True,
48
49
  **kwargs: Any,
49
50
  ) -> None:
50
51
  """
@@ -76,6 +77,13 @@ class OperatorAgent(MCPAgent):
76
77
  self.pending_call_id: str | None = None
77
78
  self.pending_safety_checks: list[Any] = []
78
79
 
80
+ # validate api key if requested
81
+ if validate_api_key:
82
+ try:
83
+ OpenAI(api_key=self.openai_client.api_key).models.list()
84
+ except Exception as e:
85
+ raise ValueError(f"OpenAI API key is invalid: {e}") from e
86
+
79
87
  self.model_name = "openai-" + self.model
80
88
 
81
89
  # Append OpenAI-specific instructions to the base system prompt
@@ -585,6 +585,9 @@ def build(
585
585
  ),
586
586
  no_cache: bool = typer.Option(False, "--no-cache", help="Build without Docker cache"),
587
587
  verbose: bool = typer.Option(False, "--verbose", "-v", help="Show detailed output"),
588
+ platform: str | None = typer.Option(
589
+ None, "--platform", help="Set Docker target platform (e.g., linux/amd64)"
590
+ ),
588
591
  ) -> None:
589
592
  """🏗️ Build a HUD environment and generate lock file.
590
593
 
@@ -635,7 +638,7 @@ def build(
635
638
  else:
636
639
  i += 1
637
640
 
638
- build_command(directory, tag, no_cache, verbose, env_vars)
641
+ build_command(directory, tag, no_cache, verbose, env_vars, platform)
639
642
 
640
643
 
641
644
  @app.command()
@@ -224,6 +224,7 @@ def build_docker_image(
224
224
  no_cache: bool = False,
225
225
  verbose: bool = False,
226
226
  build_args: dict[str, str] | None = None,
227
+ platform: str | None = None,
227
228
  ) -> bool:
228
229
  """Build a Docker image from a directory."""
229
230
  hud_console = HUDConsole()
@@ -236,7 +237,10 @@ def build_docker_image(
236
237
  return False
237
238
 
238
239
  # Build command
239
- cmd = ["docker", "build", "-t", tag]
240
+ cmd = ["docker", "build"]
241
+ if platform:
242
+ cmd.extend(["--platform", platform])
243
+ cmd.extend(["-t", tag])
240
244
  if no_cache:
241
245
  cmd.append("--no-cache")
242
246
 
@@ -264,6 +268,7 @@ def build_environment(
264
268
  no_cache: bool = False,
265
269
  verbose: bool = False,
266
270
  env_vars: dict[str, str] | None = None,
271
+ platform: str | None = None,
267
272
  ) -> None:
268
273
  """Build a HUD environment and generate lock file."""
269
274
  hud_console = HUDConsole()
@@ -294,9 +299,8 @@ def build_environment(
294
299
  except Exception:
295
300
  default_image = f"{env_dir.name}:dev"
296
301
 
297
- # Use provided tag or default
298
- if not tag:
299
- tag = default_image
302
+ # Determine final image tag to use
303
+ image_tag: str = tag if tag else default_image
300
304
 
301
305
  # Build temporary image first
302
306
  temp_tag = f"hud-build-temp:{int(time.time())}"
@@ -304,7 +308,14 @@ def build_environment(
304
308
  hud_console.progress_message(f"Building Docker image: {temp_tag}")
305
309
 
306
310
  # Build the image (env vars are for runtime, not build time)
307
- if not build_docker_image(env_dir, temp_tag, no_cache, verbose):
311
+ if not build_docker_image(
312
+ env_dir,
313
+ temp_tag,
314
+ no_cache,
315
+ verbose,
316
+ build_args=None,
317
+ platform=platform,
318
+ ):
308
319
  hud_console.error("Docker build failed")
309
320
  raise typer.Exit(1)
310
321
 
@@ -422,21 +433,24 @@ def build_environment(
422
433
 
423
434
  # Build final image with label (uses cache from first build)
424
435
  # Also tag with version
425
- base_name = tag.split(":")[0] if tag and ":" in tag else tag
436
+ base_name = image_tag.split(":")[0] if ":" in image_tag else image_tag
426
437
  version_tag = f"{base_name}:{new_version}"
427
438
 
428
- label_cmd = [
429
- "docker",
430
- "build",
431
- "--label",
432
- f"org.hud.manifest.head={lock_hash}:{lock_size}",
433
- "--label",
434
- f"org.hud.version={new_version}",
435
- "-t",
436
- tag,
437
- "-t",
438
- version_tag,
439
- ]
439
+ label_cmd = ["docker", "build"]
440
+ if platform is not None:
441
+ label_cmd.extend(["--platform", platform])
442
+ label_cmd.extend(
443
+ [
444
+ "--label",
445
+ f"org.hud.manifest.head={lock_hash}:{lock_size}",
446
+ "--label",
447
+ f"org.hud.version={new_version}",
448
+ "-t",
449
+ image_tag,
450
+ "-t",
451
+ version_tag,
452
+ ]
453
+ )
440
454
 
441
455
  label_cmd.append(str(env_dir))
442
456
 
@@ -457,14 +471,14 @@ def build_environment(
457
471
  hud_console.success("Built final image with lock file metadata")
458
472
 
459
473
  # NOW get the image ID after the final build
460
- image_id = get_docker_image_id(tag) # type: ignore
474
+ image_id = get_docker_image_id(image_tag)
461
475
  if image_id:
462
476
  # For local builds, store the image ID
463
477
  # Docker IDs come as sha256:hash, we want tag@sha256:hash
464
478
  if image_id.startswith("sha256:"):
465
- lock_content["image"] = f"{tag}@{image_id}"
479
+ lock_content["image"] = f"{image_tag}@{image_id}"
466
480
  else:
467
- lock_content["image"] = f"{tag}@sha256:{image_id}"
481
+ lock_content["image"] = f"{image_tag}@sha256:{image_id}"
468
482
 
469
483
  # Update the lock file with the new image reference
470
484
  with open(lock_path, "w") as f:
@@ -475,7 +489,7 @@ def build_environment(
475
489
  hud_console.warning("Could not retrieve image ID for lock file")
476
490
 
477
491
  # Remove temp image after we're done
478
- subprocess.run(["docker", "rmi", temp_tag], capture_output=True) # noqa: S603, S607
492
+ subprocess.run(["docker", "rmi", "-f", temp_tag], capture_output=True) # noqa: S603, S607
479
493
 
480
494
  # Add to local registry
481
495
  if image_id:
@@ -487,8 +501,8 @@ def build_environment(
487
501
 
488
502
  # Show the version tag as primary since that's what will be pushed
489
503
  hud_console.status_item("Built image", version_tag, primary=True)
490
- if tag:
491
- hud_console.status_item("Also tagged", tag)
504
+ if image_tag:
505
+ hud_console.status_item("Also tagged", image_tag)
492
506
  hud_console.status_item("Version", new_version)
493
507
  hud_console.status_item("Lock file", "hud.lock.yaml")
494
508
  hud_console.status_item("Tools found", str(analysis["toolCount"]))
@@ -500,7 +514,7 @@ def build_environment(
500
514
  hud_console.section_title("Next Steps")
501
515
  hud_console.info("Test locally:")
502
516
  hud_console.command_example("hud dev", "Hot-reload development")
503
- hud_console.command_example(f"hud run {tag}", "Run the built image")
517
+ hud_console.command_example(f"hud run {image_tag}", "Run the built image")
504
518
  hud_console.info("")
505
519
  hud_console.info("Publish to registry:")
506
520
  hud_console.command_example("hud push", f"Push as {version_tag}")
@@ -517,6 +531,7 @@ def build_command(
517
531
  no_cache: bool = typer.Option(False, "--no-cache", help="Build without Docker cache"),
518
532
  verbose: bool = typer.Option(False, "--verbose", "-v", help="Show detailed output"),
519
533
  env_vars: dict[str, str] | None = None,
534
+ platform: str | None = None,
520
535
  ) -> None:
521
536
  """Build a HUD environment and generate lock file."""
522
- build_environment(directory, tag, no_cache, verbose, env_vars)
537
+ build_environment(directory, tag, no_cache, verbose, env_vars, platform)
@@ -295,7 +295,7 @@ async def run_full_dataset(
295
295
  agent_type: Literal["claude", "openai", "vllm"] = "claude",
296
296
  model: str | None = None,
297
297
  allowed_tools: list[str] | None = None,
298
- max_concurrent: int = 50,
298
+ max_concurrent: int = 30,
299
299
  max_steps: int = 10,
300
300
  parallel: bool = False,
301
301
  max_workers: int | None = None,
@@ -100,7 +100,8 @@ def _ensure_built(env_dir: Path) -> dict[str, Any]:
100
100
  # Check Docker availability before attempting a build
101
101
  require_docker_running()
102
102
  # Run build (non-interactive). If Docker isn't running, this will raise and stop the flow.
103
- build_environment(str(env_dir))
103
+ # Force linux/amd64 platform to ensure compatibility during RL flows.
104
+ build_environment(str(env_dir), platform="linux/amd64")
104
105
 
105
106
  # Load lock file
106
107
  with open(lock_path) as f:
@@ -22,7 +22,7 @@ async def run_dataset(
22
22
  dataset: str | Dataset | list[dict[str, Any]],
23
23
  agent_class: type[MCPAgent],
24
24
  agent_config: dict[str, Any] | None = None,
25
- max_concurrent: int = 50,
25
+ max_concurrent: int = 30,
26
26
  metadata: dict[str, Any] | None = None,
27
27
  max_steps: int = 10,
28
28
  split: str = "train",
@@ -85,18 +85,19 @@ class Actor:
85
85
  )
86
86
  except TimeoutError:
87
87
  hud_console.warning_log(f"Episode timed out for task {t.id}")
88
- return Trace(isError=True, content="Episode timeout")
88
+ # Attach task so buffer grouping has key
89
+ return Trace(isError=True, content="Episode timeout", task=t)
89
90
 
90
91
  results = await asyncio.gather(
91
92
  *[run_with_timeout(t) for t in batch],
92
93
  return_exceptions=True,
93
94
  )
94
95
 
95
- # Normalize exceptions to error traces
96
- for res in results:
96
+ # Normalize exceptions to error traces and ensure task is attached
97
+ for t, res in zip(batch, results, strict=False):
97
98
  if isinstance(res, Exception):
98
99
  hud_console.warning_log(f"Episode error: {res}")
99
- traces.append(Trace(isError=True, content=str(res)))
100
+ traces.append(Trace(isError=True, content=str(res), task=t))
100
101
  else:
101
102
  traces.append(res)
102
103
 
@@ -113,7 +114,8 @@ class Actor:
113
114
 
114
115
  except Exception:
115
116
  logger.info("GOT EXCEPTION")
116
- return Trace(isError=True)
117
+ # Preserve task on exception for grouping
118
+ return Trace(isError=True, task=task)
117
119
 
118
120
  result.info["tool_spec"] = agent.get_tool_schemas()
119
121
 
@@ -38,15 +38,20 @@ def make_sample(
38
38
  ref_logp_tok: torch.Tensor,
39
39
  advantage: float,
40
40
  ):
41
- # Minimal object with required attributes for compute_loss
42
- # inputs only needed for metrics token count
41
+ # Minimal-but-correct object for GRPOLearner.compute_loss.
42
+ # Needs assistant_mask (T-1) and attention_mask (T) for sanity_check().
43
43
  Tm1 = pol_logp_tok.size(-1)
44
- inputs = {"input_ids": torch.zeros(1, Tm1 + 1, dtype=torch.long)}
44
+ inputs = {
45
+ "input_ids": torch.zeros(1, Tm1 + 1, dtype=torch.long),
46
+ "attention_mask": torch.ones(1, Tm1 + 1, dtype=torch.long),
47
+ "assistant_mask": torch.ones(1, Tm1, dtype=torch.bool),
48
+ }
45
49
  return TrainingSample(
46
50
  inputs=inputs,
47
51
  old_logprobs=old_logp_tok,
48
52
  ref_logprobs=ref_logp_tok,
49
- advantage=torch.tensor(advantage, dtype=torch.float32),
53
+ # advantage must be 1D so .view(-1,1) works in compute_loss
54
+ advantage=torch.tensor([advantage], dtype=torch.float32),
50
55
  )
51
56
 
52
57
 
@@ -155,6 +160,13 @@ def test_skip_update_when_zero_adv(monkeypatch, learner_stub: GRPOLearner):
155
160
 
156
161
  monkeypatch.setattr(GRPOLearner, "prepare_groups", _stub_prepare_groups, raising=True)
157
162
 
163
+ # Return a zero scalar loss that *depends* on params so backward works,
164
+ # but has zero gradients (no update signal).
165
+ def _zero_loss(self, sample) -> torch.Tensor:
166
+ return sum(p.sum() for p in self.policy.parameters()) * 0.0
167
+
168
+ monkeypatch.setattr(GRPOLearner, "compute_loss", _zero_loss, raising=True)
169
+
158
170
  # Count optimizer.step calls
159
171
  steps = {"n": 0}
160
172
  # orig_step = learner_stub.optimizer.step
@@ -168,4 +180,7 @@ def test_skip_update_when_zero_adv(monkeypatch, learner_stub: GRPOLearner):
168
180
  assert any(p.requires_grad for p in learner_stub.policy.parameters())
169
181
 
170
182
  learner_stub.update([])
171
- assert steps["n"] == 0
183
+ # With the current learner implementation we still call optimizer.step()
184
+ # even if the per-minibatch "advantage" is zero (the step is a no-op
185
+ # because the gradients are zero). So we expect exactly one step here.
186
+ assert steps["n"] == 1
@@ -5,4 +5,4 @@ def test_import():
5
5
  """Test that the package can be imported."""
6
6
  import hud
7
7
 
8
- assert hud.__version__ == "0.4.33"
8
+ assert hud.__version__ == "0.4.35"
@@ -10,7 +10,8 @@ def _is_call_like(obj: Any) -> bool:
10
10
  return True
11
11
  if len(obj) == 1:
12
12
  _, v = next(iter(obj.items()))
13
- return isinstance(v, dict)
13
+ if isinstance(v, dict):
14
+ return "name" in v or (len(v) == 1 and isinstance(next(iter(v.values())), dict))
14
15
  return False
15
16
 
16
17
 
@@ -19,9 +20,9 @@ def _to_call_dict(obj: Any) -> Any:
19
20
 
20
21
  Rules:
21
22
  - If obj is a dict with {name, arguments}: return {name, arguments: recurse(arguments)}
22
- - Else if obj is a single-key dict {k: v}: return {name: k, arguments: recurse(v)}
23
+ - Else if obj is a single-key dict {k: v} where v looks call-like: return {name: k, arguments: recurse(v)}
23
24
  - Else: return obj unchanged (leaf arguments/value)
24
- """
25
+ """ # noqa: E501
25
26
  if isinstance(obj, dict):
26
27
  if "name" in obj and "arguments" in obj:
27
28
  args = obj.get("arguments")
@@ -31,8 +32,10 @@ def _to_call_dict(obj: Any) -> Any:
31
32
  return {"name": obj.get("name"), "arguments": args}
32
33
  if len(obj) == 1:
33
34
  k, v = next(iter(obj.items()))
34
- if isinstance(v, dict):
35
+ # Only convert single-key dicts if the value looks like it could be a call
36
+ if isinstance(v, dict) and _is_call_like(v):
35
37
  return {"name": k, "arguments": _to_call_dict(v)}
38
+ # Otherwise, leave it as-is (this is the innermost arguments dict)
36
39
  return obj
37
40
  return obj
38
41
 
@@ -4,4 +4,4 @@ Version information for the HUD SDK.
4
4
 
5
5
  from __future__ import annotations
6
6
 
7
- __version__ = "0.4.33"
7
+ __version__ = "0.4.35"
@@ -1,6 +1,6 @@
1
1
  [project]
2
2
  name = "hud-python"
3
- version = "0.4.33"
3
+ version = "0.4.35"
4
4
  description = "SDK for the HUD platform."
5
5
  readme = "README.md"
6
6
  requires-python = ">=3.11, <3.13"
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes