hud-python 0.4.53__tar.gz → 0.4.55__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of hud-python might be problematic. Click here for more details.

Files changed (299) hide show
  1. {hud_python-0.4.53 → hud_python-0.4.55}/PKG-INFO +8 -7
  2. {hud_python-0.4.53 → hud_python-0.4.55}/README.md +6 -6
  3. {hud_python-0.4.53 → hud_python-0.4.55}/environments/README.md +2 -2
  4. {hud_python-0.4.53 → hud_python-0.4.55}/environments/blank/server/pyproject.toml +1 -1
  5. {hud_python-0.4.53 → hud_python-0.4.55}/environments/browser/environment/todo/README.md +2 -2
  6. {hud_python-0.4.53 → hud_python-0.4.55}/environments/browser/server/pyproject.toml +1 -1
  7. {hud_python-0.4.53 → hud_python-0.4.55}/environments/deepresearch/server/pyproject.toml +1 -1
  8. {hud_python-0.4.53 → hud_python-0.4.55}/hud/agents/base.py +8 -0
  9. {hud_python-0.4.53 → hud_python-0.4.55}/hud/agents/claude.py +4 -3
  10. {hud_python-0.4.53 → hud_python-0.4.55}/hud/agents/openai.py +2 -1
  11. {hud_python-0.4.53 → hud_python-0.4.55}/hud/agents/openai_chat_generic.py +3 -2
  12. {hud_python-0.4.53 → hud_python-0.4.55}/hud/agents/tests/test_claude.py +2 -2
  13. {hud_python-0.4.53 → hud_python-0.4.55}/hud/agents/tests/test_openai.py +1 -1
  14. hud_python-0.4.55/hud/agents/utils.py +50 -0
  15. {hud_python-0.4.53 → hud_python-0.4.55}/hud/cli/__init__.py +65 -9
  16. {hud_python-0.4.53 → hud_python-0.4.55}/hud/cli/build.py +185 -25
  17. {hud_python-0.4.53 → hud_python-0.4.55}/hud/cli/dev.py +130 -40
  18. {hud_python-0.4.53 → hud_python-0.4.55}/hud/cli/eval.py +123 -24
  19. hud_python-0.4.55/hud/cli/flows/dev.py +155 -0
  20. {hud_python-0.4.53 → hud_python-0.4.55}/hud/cli/flows/tasks.py +29 -9
  21. {hud_python-0.4.53 → hud_python-0.4.55}/hud/cli/tests/test_eval.py +6 -6
  22. {hud_python-0.4.53 → hud_python-0.4.55}/hud/cli/utils/docker.py +6 -3
  23. {hud_python-0.4.53 → hud_python-0.4.55}/hud/clients/base.py +2 -2
  24. {hud_python-0.4.53 → hud_python-0.4.55}/hud/otel/context.py +42 -1
  25. {hud_python-0.4.53 → hud_python-0.4.55}/hud/server/server.py +29 -3
  26. {hud_python-0.4.53 → hud_python-0.4.55}/hud/settings.py +6 -0
  27. {hud_python-0.4.53 → hud_python-0.4.55}/hud/telemetry/async_context.py +16 -2
  28. {hud_python-0.4.53 → hud_python-0.4.55}/hud/telemetry/trace.py +6 -1
  29. {hud_python-0.4.53 → hud_python-0.4.55}/hud/types.py +10 -0
  30. {hud_python-0.4.53 → hud_python-0.4.55}/hud/utils/group_eval.py +14 -2
  31. {hud_python-0.4.53 → hud_python-0.4.55}/hud/utils/tests/test_agent_factories.py +2 -1
  32. {hud_python-0.4.53 → hud_python-0.4.55}/hud/utils/tests/test_version.py +1 -1
  33. {hud_python-0.4.53 → hud_python-0.4.55}/hud/version.py +1 -1
  34. {hud_python-0.4.53 → hud_python-0.4.55}/pyproject.toml +2 -1
  35. {hud_python-0.4.53 → hud_python-0.4.55}/.gitignore +0 -0
  36. {hud_python-0.4.53 → hud_python-0.4.55}/LICENSE +0 -0
  37. {hud_python-0.4.53 → hud_python-0.4.55}/environments/blank/README.md +0 -0
  38. {hud_python-0.4.53 → hud_python-0.4.55}/environments/blank/environment/README.md +0 -0
  39. {hud_python-0.4.53 → hud_python-0.4.55}/environments/blank/environment/pyproject.toml +0 -0
  40. {hud_python-0.4.53 → hud_python-0.4.55}/environments/blank/server/README.md +0 -0
  41. {hud_python-0.4.53 → hud_python-0.4.55}/environments/browser/README.md +0 -0
  42. {hud_python-0.4.53 → hud_python-0.4.55}/environments/browser/environment/2048/README.md +0 -0
  43. {hud_python-0.4.53 → hud_python-0.4.55}/environments/browser/environment/2048/backend/pyproject.toml +0 -0
  44. {hud_python-0.4.53 → hud_python-0.4.55}/environments/browser/environment/README.md +0 -0
  45. {hud_python-0.4.53 → hud_python-0.4.55}/environments/browser/environment/pyproject.toml +0 -0
  46. {hud_python-0.4.53 → hud_python-0.4.55}/environments/browser/environment/todo/backend/pyproject.toml +0 -0
  47. {hud_python-0.4.53 → hud_python-0.4.55}/environments/browser/pyproject.toml +0 -0
  48. {hud_python-0.4.53 → hud_python-0.4.55}/environments/deepresearch/README.md +0 -0
  49. {hud_python-0.4.53 → hud_python-0.4.55}/environments/deepresearch/environment/pyproject.toml +0 -0
  50. {hud_python-0.4.53 → hud_python-0.4.55}/environments/deepresearch/pyproject.toml +0 -0
  51. {hud_python-0.4.53 → hud_python-0.4.55}/environments/remote_browser/README.md +0 -0
  52. {hud_python-0.4.53 → hud_python-0.4.55}/environments/remote_browser/pyproject.toml +0 -0
  53. {hud_python-0.4.53 → hud_python-0.4.55}/environments/remote_browser/src/hud_controller/providers/README.md +0 -0
  54. {hud_python-0.4.53 → hud_python-0.4.55}/environments/text_2048/README.md +0 -0
  55. {hud_python-0.4.53 → hud_python-0.4.55}/environments/text_2048/pyproject.toml +0 -0
  56. {hud_python-0.4.53 → hud_python-0.4.55}/examples/README.md +0 -0
  57. {hud_python-0.4.53 → hud_python-0.4.55}/hud/__init__.py +0 -0
  58. {hud_python-0.4.53 → hud_python-0.4.55}/hud/__main__.py +0 -0
  59. {hud_python-0.4.53 → hud_python-0.4.55}/hud/agents/__init__.py +0 -0
  60. {hud_python-0.4.53 → hud_python-0.4.55}/hud/agents/grounded_openai.py +0 -0
  61. {hud_python-0.4.53 → hud_python-0.4.55}/hud/agents/langchain.py +0 -0
  62. {hud_python-0.4.53 → hud_python-0.4.55}/hud/agents/lite_llm.py +0 -0
  63. {hud_python-0.4.53 → hud_python-0.4.55}/hud/agents/misc/__init__.py +0 -0
  64. {hud_python-0.4.53 → hud_python-0.4.55}/hud/agents/misc/integration_test_agent.py +0 -0
  65. {hud_python-0.4.53 → hud_python-0.4.55}/hud/agents/misc/response_agent.py +0 -0
  66. {hud_python-0.4.53 → hud_python-0.4.55}/hud/agents/tests/__init__.py +0 -0
  67. {hud_python-0.4.53 → hud_python-0.4.55}/hud/agents/tests/test_base.py +0 -0
  68. {hud_python-0.4.53 → hud_python-0.4.55}/hud/agents/tests/test_base_runtime.py +0 -0
  69. {hud_python-0.4.53 → hud_python-0.4.55}/hud/agents/tests/test_client.py +0 -0
  70. {hud_python-0.4.53 → hud_python-0.4.55}/hud/agents/tests/test_grounded_openai_agent.py +0 -0
  71. {hud_python-0.4.53 → hud_python-0.4.55}/hud/cli/__main__.py +0 -0
  72. {hud_python-0.4.53 → hud_python-0.4.55}/hud/cli/analyze.py +0 -0
  73. {hud_python-0.4.53 → hud_python-0.4.55}/hud/cli/clone.py +0 -0
  74. {hud_python-0.4.53 → hud_python-0.4.55}/hud/cli/debug.py +0 -0
  75. {hud_python-0.4.53 → hud_python-0.4.55}/hud/cli/flows/__init__.py +0 -0
  76. {hud_python-0.4.53 → hud_python-0.4.55}/hud/cli/get.py +0 -0
  77. {hud_python-0.4.53 → hud_python-0.4.55}/hud/cli/init.py +0 -0
  78. {hud_python-0.4.53 → hud_python-0.4.55}/hud/cli/list_func.py +0 -0
  79. {hud_python-0.4.53 → hud_python-0.4.55}/hud/cli/pull.py +0 -0
  80. {hud_python-0.4.53 → hud_python-0.4.55}/hud/cli/push.py +0 -0
  81. {hud_python-0.4.53 → hud_python-0.4.55}/hud/cli/remove.py +0 -0
  82. {hud_python-0.4.53 → hud_python-0.4.55}/hud/cli/rl/__init__.py +0 -0
  83. {hud_python-0.4.53 → hud_python-0.4.55}/hud/cli/rl/celebrate.py +0 -0
  84. {hud_python-0.4.53 → hud_python-0.4.55}/hud/cli/rl/config.py +0 -0
  85. {hud_python-0.4.53 → hud_python-0.4.55}/hud/cli/rl/display.py +0 -0
  86. {hud_python-0.4.53 → hud_python-0.4.55}/hud/cli/rl/gpu.py +0 -0
  87. {hud_python-0.4.53 → hud_python-0.4.55}/hud/cli/rl/gpu_utils.py +0 -0
  88. {hud_python-0.4.53 → hud_python-0.4.55}/hud/cli/rl/local_runner.py +0 -0
  89. {hud_python-0.4.53 → hud_python-0.4.55}/hud/cli/rl/presets.py +0 -0
  90. {hud_python-0.4.53 → hud_python-0.4.55}/hud/cli/rl/remote_runner.py +0 -0
  91. {hud_python-0.4.53 → hud_python-0.4.55}/hud/cli/rl/rl_api.py +0 -0
  92. {hud_python-0.4.53 → hud_python-0.4.55}/hud/cli/rl/viewer.py +0 -0
  93. {hud_python-0.4.53 → hud_python-0.4.55}/hud/cli/rl/vllm.py +0 -0
  94. {hud_python-0.4.53 → hud_python-0.4.55}/hud/cli/rl/wait_utils.py +0 -0
  95. {hud_python-0.4.53 → hud_python-0.4.55}/hud/cli/tests/__init__.py +0 -0
  96. {hud_python-0.4.53 → hud_python-0.4.55}/hud/cli/tests/test_analyze.py +0 -0
  97. {hud_python-0.4.53 → hud_python-0.4.55}/hud/cli/tests/test_analyze_metadata.py +0 -0
  98. {hud_python-0.4.53 → hud_python-0.4.55}/hud/cli/tests/test_analyze_module.py +0 -0
  99. {hud_python-0.4.53 → hud_python-0.4.55}/hud/cli/tests/test_build.py +0 -0
  100. {hud_python-0.4.53 → hud_python-0.4.55}/hud/cli/tests/test_build_failure.py +0 -0
  101. {hud_python-0.4.53 → hud_python-0.4.55}/hud/cli/tests/test_build_module.py +0 -0
  102. {hud_python-0.4.53 → hud_python-0.4.55}/hud/cli/tests/test_cli_init.py +0 -0
  103. {hud_python-0.4.53 → hud_python-0.4.55}/hud/cli/tests/test_cli_main.py +0 -0
  104. {hud_python-0.4.53 → hud_python-0.4.55}/hud/cli/tests/test_cli_more_wrappers.py +0 -0
  105. {hud_python-0.4.53 → hud_python-0.4.55}/hud/cli/tests/test_cli_root.py +0 -0
  106. {hud_python-0.4.53 → hud_python-0.4.55}/hud/cli/tests/test_clone.py +0 -0
  107. {hud_python-0.4.53 → hud_python-0.4.55}/hud/cli/tests/test_cursor.py +0 -0
  108. {hud_python-0.4.53 → hud_python-0.4.55}/hud/cli/tests/test_debug.py +0 -0
  109. {hud_python-0.4.53 → hud_python-0.4.55}/hud/cli/tests/test_list_func.py +0 -0
  110. {hud_python-0.4.53 → hud_python-0.4.55}/hud/cli/tests/test_main_module.py +0 -0
  111. {hud_python-0.4.53 → hud_python-0.4.55}/hud/cli/tests/test_mcp_server.py +0 -0
  112. {hud_python-0.4.53 → hud_python-0.4.55}/hud/cli/tests/test_pull.py +0 -0
  113. {hud_python-0.4.53 → hud_python-0.4.55}/hud/cli/tests/test_push.py +0 -0
  114. {hud_python-0.4.53 → hud_python-0.4.55}/hud/cli/tests/test_push_happy.py +0 -0
  115. {hud_python-0.4.53 → hud_python-0.4.55}/hud/cli/tests/test_push_wrapper.py +0 -0
  116. {hud_python-0.4.53 → hud_python-0.4.55}/hud/cli/tests/test_registry.py +0 -0
  117. {hud_python-0.4.53 → hud_python-0.4.55}/hud/cli/tests/test_utils.py +0 -0
  118. {hud_python-0.4.53 → hud_python-0.4.55}/hud/cli/utils/__init__.py +0 -0
  119. {hud_python-0.4.53 → hud_python-0.4.55}/hud/cli/utils/config.py +0 -0
  120. {hud_python-0.4.53 → hud_python-0.4.55}/hud/cli/utils/cursor.py +0 -0
  121. {hud_python-0.4.53 → hud_python-0.4.55}/hud/cli/utils/env_check.py +0 -0
  122. {hud_python-0.4.53 → hud_python-0.4.55}/hud/cli/utils/environment.py +0 -0
  123. {hud_python-0.4.53 → hud_python-0.4.55}/hud/cli/utils/interactive.py +0 -0
  124. {hud_python-0.4.53 → hud_python-0.4.55}/hud/cli/utils/local_runner.py +0 -0
  125. {hud_python-0.4.53 → hud_python-0.4.55}/hud/cli/utils/logging.py +0 -0
  126. {hud_python-0.4.53 → hud_python-0.4.55}/hud/cli/utils/metadata.py +0 -0
  127. {hud_python-0.4.53 → hud_python-0.4.55}/hud/cli/utils/package_runner.py +0 -0
  128. {hud_python-0.4.53 → hud_python-0.4.55}/hud/cli/utils/registry.py +0 -0
  129. {hud_python-0.4.53 → hud_python-0.4.55}/hud/cli/utils/remote_runner.py +0 -0
  130. {hud_python-0.4.53 → hud_python-0.4.55}/hud/cli/utils/runner.py +0 -0
  131. {hud_python-0.4.53 → hud_python-0.4.55}/hud/cli/utils/server.py +0 -0
  132. {hud_python-0.4.53 → hud_python-0.4.55}/hud/cli/utils/source_hash.py +0 -0
  133. {hud_python-0.4.53 → hud_python-0.4.55}/hud/cli/utils/tasks.py +0 -0
  134. {hud_python-0.4.53 → hud_python-0.4.55}/hud/cli/utils/tests/__init__.py +0 -0
  135. {hud_python-0.4.53 → hud_python-0.4.55}/hud/cli/utils/tests/test_config.py +0 -0
  136. {hud_python-0.4.53 → hud_python-0.4.55}/hud/cli/utils/tests/test_docker.py +0 -0
  137. {hud_python-0.4.53 → hud_python-0.4.55}/hud/cli/utils/tests/test_docker_hints.py +0 -0
  138. {hud_python-0.4.53 → hud_python-0.4.55}/hud/cli/utils/tests/test_env_check.py +0 -0
  139. {hud_python-0.4.53 → hud_python-0.4.55}/hud/cli/utils/tests/test_environment.py +0 -0
  140. {hud_python-0.4.53 → hud_python-0.4.55}/hud/cli/utils/tests/test_interactive_module.py +0 -0
  141. {hud_python-0.4.53 → hud_python-0.4.55}/hud/cli/utils/tests/test_local_runner.py +0 -0
  142. {hud_python-0.4.53 → hud_python-0.4.55}/hud/cli/utils/tests/test_logging_utils.py +0 -0
  143. {hud_python-0.4.53 → hud_python-0.4.55}/hud/cli/utils/tests/test_metadata.py +0 -0
  144. {hud_python-0.4.53 → hud_python-0.4.55}/hud/cli/utils/tests/test_package_runner.py +0 -0
  145. {hud_python-0.4.53 → hud_python-0.4.55}/hud/cli/utils/tests/test_registry_utils.py +0 -0
  146. {hud_python-0.4.53 → hud_python-0.4.55}/hud/cli/utils/tests/test_remote_runner.py +0 -0
  147. {hud_python-0.4.53 → hud_python-0.4.55}/hud/cli/utils/tests/test_runner_modules.py +0 -0
  148. {hud_python-0.4.53 → hud_python-0.4.55}/hud/cli/utils/tests/test_source_hash.py +0 -0
  149. {hud_python-0.4.53 → hud_python-0.4.55}/hud/cli/utils/tests/test_tasks.py +0 -0
  150. {hud_python-0.4.53 → hud_python-0.4.55}/hud/cli/utils/version_check.py +0 -0
  151. {hud_python-0.4.53 → hud_python-0.4.55}/hud/clients/README.md +0 -0
  152. {hud_python-0.4.53 → hud_python-0.4.55}/hud/clients/__init__.py +0 -0
  153. {hud_python-0.4.53 → hud_python-0.4.55}/hud/clients/fastmcp.py +0 -0
  154. {hud_python-0.4.53 → hud_python-0.4.55}/hud/clients/mcp_use.py +0 -0
  155. {hud_python-0.4.53 → hud_python-0.4.55}/hud/clients/tests/__init__.py +0 -0
  156. {hud_python-0.4.53 → hud_python-0.4.55}/hud/clients/tests/test_client_integration.py +0 -0
  157. {hud_python-0.4.53 → hud_python-0.4.55}/hud/clients/tests/test_fastmcp.py +0 -0
  158. {hud_python-0.4.53 → hud_python-0.4.55}/hud/clients/tests/test_mcp_use_retry.py +0 -0
  159. {hud_python-0.4.53 → hud_python-0.4.55}/hud/clients/tests/test_protocol.py +0 -0
  160. {hud_python-0.4.53 → hud_python-0.4.55}/hud/clients/utils/__init__.py +0 -0
  161. {hud_python-0.4.53 → hud_python-0.4.55}/hud/clients/utils/mcp_use_retry.py +0 -0
  162. {hud_python-0.4.53 → hud_python-0.4.55}/hud/clients/utils/retry.py +0 -0
  163. {hud_python-0.4.53 → hud_python-0.4.55}/hud/clients/utils/retry_transport.py +0 -0
  164. {hud_python-0.4.53 → hud_python-0.4.55}/hud/datasets/__init__.py +0 -0
  165. {hud_python-0.4.53 → hud_python-0.4.55}/hud/datasets/parallel.py +0 -0
  166. {hud_python-0.4.53 → hud_python-0.4.55}/hud/datasets/runner.py +0 -0
  167. {hud_python-0.4.53 → hud_python-0.4.55}/hud/datasets/tests/__init__.py +0 -0
  168. {hud_python-0.4.53 → hud_python-0.4.55}/hud/datasets/tests/test_runner.py +0 -0
  169. {hud_python-0.4.53 → hud_python-0.4.55}/hud/datasets/tests/test_utils.py +0 -0
  170. {hud_python-0.4.53 → hud_python-0.4.55}/hud/datasets/utils.py +0 -0
  171. {hud_python-0.4.53 → hud_python-0.4.55}/hud/misc/__init__.py +0 -0
  172. {hud_python-0.4.53 → hud_python-0.4.55}/hud/misc/claude_plays_pokemon.py +0 -0
  173. {hud_python-0.4.53 → hud_python-0.4.55}/hud/native/__init__.py +0 -0
  174. {hud_python-0.4.53 → hud_python-0.4.55}/hud/native/comparator.py +0 -0
  175. {hud_python-0.4.53 → hud_python-0.4.55}/hud/native/tests/__init__.py +0 -0
  176. {hud_python-0.4.53 → hud_python-0.4.55}/hud/native/tests/test_comparator.py +0 -0
  177. {hud_python-0.4.53 → hud_python-0.4.55}/hud/native/tests/test_native_init.py +0 -0
  178. {hud_python-0.4.53 → hud_python-0.4.55}/hud/otel/__init__.py +0 -0
  179. {hud_python-0.4.53 → hud_python-0.4.55}/hud/otel/collector.py +0 -0
  180. {hud_python-0.4.53 → hud_python-0.4.55}/hud/otel/config.py +0 -0
  181. {hud_python-0.4.53 → hud_python-0.4.55}/hud/otel/exporters.py +0 -0
  182. {hud_python-0.4.53 → hud_python-0.4.55}/hud/otel/instrumentation.py +0 -0
  183. {hud_python-0.4.53 → hud_python-0.4.55}/hud/otel/processors.py +0 -0
  184. {hud_python-0.4.53 → hud_python-0.4.55}/hud/otel/tests/__init__.py +0 -0
  185. {hud_python-0.4.53 → hud_python-0.4.55}/hud/otel/tests/test_instrumentation.py +0 -0
  186. {hud_python-0.4.53 → hud_python-0.4.55}/hud/otel/tests/test_processors.py +0 -0
  187. {hud_python-0.4.53 → hud_python-0.4.55}/hud/py.typed +0 -0
  188. {hud_python-0.4.53 → hud_python-0.4.55}/hud/rl/README.md +0 -0
  189. {hud_python-0.4.53 → hud_python-0.4.55}/hud/rl/__init__.py +0 -0
  190. {hud_python-0.4.53 → hud_python-0.4.55}/hud/rl/actor.py +0 -0
  191. {hud_python-0.4.53 → hud_python-0.4.55}/hud/rl/buffer.py +0 -0
  192. {hud_python-0.4.53 → hud_python-0.4.55}/hud/rl/chat_template.jinja +0 -0
  193. {hud_python-0.4.53 → hud_python-0.4.55}/hud/rl/config.py +0 -0
  194. {hud_python-0.4.53 → hud_python-0.4.55}/hud/rl/distributed.py +0 -0
  195. {hud_python-0.4.53 → hud_python-0.4.55}/hud/rl/learner.py +0 -0
  196. {hud_python-0.4.53 → hud_python-0.4.55}/hud/rl/tests/__init__.py +0 -0
  197. {hud_python-0.4.53 → hud_python-0.4.55}/hud/rl/tests/test_learner.py +0 -0
  198. {hud_python-0.4.53 → hud_python-0.4.55}/hud/rl/train.py +0 -0
  199. {hud_python-0.4.53 → hud_python-0.4.55}/hud/rl/types.py +0 -0
  200. {hud_python-0.4.53 → hud_python-0.4.55}/hud/rl/utils/start_vllm_server.sh +0 -0
  201. {hud_python-0.4.53 → hud_python-0.4.55}/hud/rl/utils.py +0 -0
  202. {hud_python-0.4.53 → hud_python-0.4.55}/hud/rl/vllm_adapter.py +0 -0
  203. {hud_python-0.4.53 → hud_python-0.4.55}/hud/samples/__init__.py +0 -0
  204. {hud_python-0.4.53 → hud_python-0.4.55}/hud/samples/browser.py +0 -0
  205. {hud_python-0.4.53 → hud_python-0.4.55}/hud/server/__init__.py +0 -0
  206. {hud_python-0.4.53 → hud_python-0.4.55}/hud/server/context.py +0 -0
  207. {hud_python-0.4.53 → hud_python-0.4.55}/hud/server/helper/__init__.py +0 -0
  208. {hud_python-0.4.53 → hud_python-0.4.55}/hud/server/low_level.py +0 -0
  209. {hud_python-0.4.53 → hud_python-0.4.55}/hud/server/router.py +0 -0
  210. {hud_python-0.4.53 → hud_python-0.4.55}/hud/server/tests/__init__.py +0 -0
  211. {hud_python-0.4.53 → hud_python-0.4.55}/hud/server/tests/test_add_tool.py +0 -0
  212. {hud_python-0.4.53 → hud_python-0.4.55}/hud/server/tests/test_context.py +0 -0
  213. {hud_python-0.4.53 → hud_python-0.4.55}/hud/server/tests/test_mcp_server_handlers.py +0 -0
  214. {hud_python-0.4.53 → hud_python-0.4.55}/hud/server/tests/test_mcp_server_integration.py +0 -0
  215. {hud_python-0.4.53 → hud_python-0.4.55}/hud/server/tests/test_mcp_server_more.py +0 -0
  216. {hud_python-0.4.53 → hud_python-0.4.55}/hud/server/tests/test_run_wrapper.py +0 -0
  217. {hud_python-0.4.53 → hud_python-0.4.55}/hud/server/tests/test_server_extra.py +0 -0
  218. {hud_python-0.4.53 → hud_python-0.4.55}/hud/server/tests/test_sigterm_runner.py +0 -0
  219. {hud_python-0.4.53 → hud_python-0.4.55}/hud/shared/__init__.py +0 -0
  220. {hud_python-0.4.53 → hud_python-0.4.55}/hud/shared/exceptions.py +0 -0
  221. {hud_python-0.4.53 → hud_python-0.4.55}/hud/shared/hints.py +0 -0
  222. {hud_python-0.4.53 → hud_python-0.4.55}/hud/shared/requests.py +0 -0
  223. {hud_python-0.4.53 → hud_python-0.4.55}/hud/shared/tests/__init__.py +0 -0
  224. {hud_python-0.4.53 → hud_python-0.4.55}/hud/shared/tests/test_exceptions.py +0 -0
  225. {hud_python-0.4.53 → hud_python-0.4.55}/hud/shared/tests/test_hints.py +0 -0
  226. {hud_python-0.4.53 → hud_python-0.4.55}/hud/shared/tests/test_requests.py +0 -0
  227. {hud_python-0.4.53 → hud_python-0.4.55}/hud/telemetry/__init__.py +0 -0
  228. {hud_python-0.4.53 → hud_python-0.4.55}/hud/telemetry/instrument.py +0 -0
  229. {hud_python-0.4.53 → hud_python-0.4.55}/hud/telemetry/job.py +0 -0
  230. {hud_python-0.4.53 → hud_python-0.4.55}/hud/telemetry/replay.py +0 -0
  231. {hud_python-0.4.53 → hud_python-0.4.55}/hud/telemetry/tests/__init__.py +0 -0
  232. {hud_python-0.4.53 → hud_python-0.4.55}/hud/telemetry/tests/test_async_context.py +0 -0
  233. {hud_python-0.4.53 → hud_python-0.4.55}/hud/telemetry/tests/test_instrument.py +0 -0
  234. {hud_python-0.4.53 → hud_python-0.4.55}/hud/telemetry/tests/test_job.py +0 -0
  235. {hud_python-0.4.53 → hud_python-0.4.55}/hud/telemetry/tests/test_replay.py +0 -0
  236. {hud_python-0.4.53 → hud_python-0.4.55}/hud/telemetry/tests/test_trace.py +0 -0
  237. {hud_python-0.4.53 → hud_python-0.4.55}/hud/tools/__init__.py +0 -0
  238. {hud_python-0.4.53 → hud_python-0.4.55}/hud/tools/base.py +0 -0
  239. {hud_python-0.4.53 → hud_python-0.4.55}/hud/tools/bash.py +0 -0
  240. {hud_python-0.4.53 → hud_python-0.4.55}/hud/tools/computer/__init__.py +0 -0
  241. {hud_python-0.4.53 → hud_python-0.4.55}/hud/tools/computer/anthropic.py +0 -0
  242. {hud_python-0.4.53 → hud_python-0.4.55}/hud/tools/computer/hud.py +0 -0
  243. {hud_python-0.4.53 → hud_python-0.4.55}/hud/tools/computer/openai.py +0 -0
  244. {hud_python-0.4.53 → hud_python-0.4.55}/hud/tools/computer/qwen.py +0 -0
  245. {hud_python-0.4.53 → hud_python-0.4.55}/hud/tools/computer/settings.py +0 -0
  246. {hud_python-0.4.53 → hud_python-0.4.55}/hud/tools/edit.py +0 -0
  247. {hud_python-0.4.53 → hud_python-0.4.55}/hud/tools/executors/__init__.py +0 -0
  248. {hud_python-0.4.53 → hud_python-0.4.55}/hud/tools/executors/base.py +0 -0
  249. {hud_python-0.4.53 → hud_python-0.4.55}/hud/tools/executors/pyautogui.py +0 -0
  250. {hud_python-0.4.53 → hud_python-0.4.55}/hud/tools/executors/tests/__init__.py +0 -0
  251. {hud_python-0.4.53 → hud_python-0.4.55}/hud/tools/executors/tests/test_base_executor.py +0 -0
  252. {hud_python-0.4.53 → hud_python-0.4.55}/hud/tools/executors/tests/test_pyautogui_executor.py +0 -0
  253. {hud_python-0.4.53 → hud_python-0.4.55}/hud/tools/executors/xdo.py +0 -0
  254. {hud_python-0.4.53 → hud_python-0.4.55}/hud/tools/grounding/__init__.py +0 -0
  255. {hud_python-0.4.53 → hud_python-0.4.55}/hud/tools/grounding/config.py +0 -0
  256. {hud_python-0.4.53 → hud_python-0.4.55}/hud/tools/grounding/grounded_tool.py +0 -0
  257. {hud_python-0.4.53 → hud_python-0.4.55}/hud/tools/grounding/grounder.py +0 -0
  258. {hud_python-0.4.53 → hud_python-0.4.55}/hud/tools/grounding/tests/__init__.py +0 -0
  259. {hud_python-0.4.53 → hud_python-0.4.55}/hud/tools/grounding/tests/test_grounded_tool.py +0 -0
  260. {hud_python-0.4.53 → hud_python-0.4.55}/hud/tools/playwright.py +0 -0
  261. {hud_python-0.4.53 → hud_python-0.4.55}/hud/tools/response.py +0 -0
  262. {hud_python-0.4.53 → hud_python-0.4.55}/hud/tools/submit.py +0 -0
  263. {hud_python-0.4.53 → hud_python-0.4.55}/hud/tools/tests/__init__.py +0 -0
  264. {hud_python-0.4.53 → hud_python-0.4.55}/hud/tools/tests/test_base.py +0 -0
  265. {hud_python-0.4.53 → hud_python-0.4.55}/hud/tools/tests/test_bash.py +0 -0
  266. {hud_python-0.4.53 → hud_python-0.4.55}/hud/tools/tests/test_bash_extended.py +0 -0
  267. {hud_python-0.4.53 → hud_python-0.4.55}/hud/tools/tests/test_computer.py +0 -0
  268. {hud_python-0.4.53 → hud_python-0.4.55}/hud/tools/tests/test_computer_actions.py +0 -0
  269. {hud_python-0.4.53 → hud_python-0.4.55}/hud/tools/tests/test_edit.py +0 -0
  270. {hud_python-0.4.53 → hud_python-0.4.55}/hud/tools/tests/test_init.py +0 -0
  271. {hud_python-0.4.53 → hud_python-0.4.55}/hud/tools/tests/test_playwright_tool.py +0 -0
  272. {hud_python-0.4.53 → hud_python-0.4.55}/hud/tools/tests/test_response.py +0 -0
  273. {hud_python-0.4.53 → hud_python-0.4.55}/hud/tools/tests/test_submit.py +0 -0
  274. {hud_python-0.4.53 → hud_python-0.4.55}/hud/tools/tests/test_tools.py +0 -0
  275. {hud_python-0.4.53 → hud_python-0.4.55}/hud/tools/tests/test_tools_init.py +0 -0
  276. {hud_python-0.4.53 → hud_python-0.4.55}/hud/tools/tests/test_types.py +0 -0
  277. {hud_python-0.4.53 → hud_python-0.4.55}/hud/tools/tests/test_utils.py +0 -0
  278. {hud_python-0.4.53 → hud_python-0.4.55}/hud/tools/types.py +0 -0
  279. {hud_python-0.4.53 → hud_python-0.4.55}/hud/tools/utils.py +0 -0
  280. {hud_python-0.4.53 → hud_python-0.4.55}/hud/utils/__init__.py +0 -0
  281. {hud_python-0.4.53 → hud_python-0.4.55}/hud/utils/agent_factories.py +0 -0
  282. {hud_python-0.4.53 → hud_python-0.4.55}/hud/utils/async_utils.py +0 -0
  283. {hud_python-0.4.53 → hud_python-0.4.55}/hud/utils/hud_console.py +0 -0
  284. {hud_python-0.4.53 → hud_python-0.4.55}/hud/utils/mcp.py +0 -0
  285. {hud_python-0.4.53 → hud_python-0.4.55}/hud/utils/pretty_errors.py +0 -0
  286. {hud_python-0.4.53 → hud_python-0.4.55}/hud/utils/progress.py +0 -0
  287. {hud_python-0.4.53 → hud_python-0.4.55}/hud/utils/task_tracking.py +0 -0
  288. {hud_python-0.4.53 → hud_python-0.4.55}/hud/utils/tasks.py +0 -0
  289. {hud_python-0.4.53 → hud_python-0.4.55}/hud/utils/telemetry.py +0 -0
  290. {hud_python-0.4.53 → hud_python-0.4.55}/hud/utils/tests/__init__.py +0 -0
  291. {hud_python-0.4.53 → hud_python-0.4.55}/hud/utils/tests/test_async_utils.py +0 -0
  292. {hud_python-0.4.53 → hud_python-0.4.55}/hud/utils/tests/test_init.py +0 -0
  293. {hud_python-0.4.53 → hud_python-0.4.55}/hud/utils/tests/test_mcp.py +0 -0
  294. {hud_python-0.4.53 → hud_python-0.4.55}/hud/utils/tests/test_pretty_errors.py +0 -0
  295. {hud_python-0.4.53 → hud_python-0.4.55}/hud/utils/tests/test_progress.py +0 -0
  296. {hud_python-0.4.53 → hud_python-0.4.55}/hud/utils/tests/test_tasks.py +0 -0
  297. {hud_python-0.4.53 → hud_python-0.4.55}/hud/utils/tests/test_telemetry.py +0 -0
  298. {hud_python-0.4.53 → hud_python-0.4.55}/hud/utils/tests/test_tool_shorthand.py +0 -0
  299. {hud_python-0.4.53 → hud_python-0.4.55}/hud/utils/tool_shorthand.py +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: hud-python
3
- Version: 0.4.53
3
+ Version: 0.4.55
4
4
  Summary: SDK for the HUD platform.
5
5
  Project-URL: Homepage, https://github.com/hud-evals/hud-python
6
6
  Project-URL: Bug Tracker, https://github.com/hud-evals/hud-python/issues
@@ -42,6 +42,7 @@ Requires-Dist: httpx<1,>=0.23.0
42
42
  Requires-Dist: hud-fastmcp-python-sdk>=0.1.2
43
43
  Requires-Dist: hud-mcp-python-sdk>=3.13.2
44
44
  Requires-Dist: hud-mcp-use-python-sdk==2.3.20
45
+ Requires-Dist: langchain==0.3.27
45
46
  Requires-Dist: numpy>=1.24.0
46
47
  Requires-Dist: openai
47
48
  Requires-Dist: opentelemetry-api>=1.34.1
@@ -247,8 +248,8 @@ The above example let's the agent play 2048 ([See replay](https://hud.so/trace/6
247
248
  RL using GRPO a Qwen2.5-VL model on any hud dataset:
248
249
 
249
250
  ```bash
250
- hud get hud-evals/basic-2048 # from HF
251
- hud rl basic-2048.json
251
+ hud get hud-evals/2048-basic # from HF
252
+ hud rl 2048-basic.json
252
253
  ```
253
254
 
254
255
  > See [agent training docs](https://docs.hud.so/train-agents/quickstart)
@@ -439,14 +440,14 @@ Train with the new interactive `hud rl` flow:
439
440
  uv tool install hud-python
440
441
 
441
442
  # Option A: Run directly from a HuggingFace dataset
442
- hud rl hud-evals/basic-2048
443
+ hud rl hud-evals/2048-basic
443
444
 
444
445
  # Option B: Download first, modify, then train
445
- hud get hud-evals/basic-2048
446
- hud rl basic-2048.json
446
+ hud get hud-evals/2048-basic
447
+ hud rl 2048-basic.json
447
448
 
448
449
  # Optional: baseline evaluation
449
- hud eval basic-2048.json
450
+ hud eval 2048-basic.json
450
451
  ```
451
452
 
452
453
  Supports multi‑turn RL for both:
@@ -109,8 +109,8 @@ The above example let's the agent play 2048 ([See replay](https://hud.so/trace/6
109
109
  RL using GRPO a Qwen2.5-VL model on any hud dataset:
110
110
 
111
111
  ```bash
112
- hud get hud-evals/basic-2048 # from HF
113
- hud rl basic-2048.json
112
+ hud get hud-evals/2048-basic # from HF
113
+ hud rl 2048-basic.json
114
114
  ```
115
115
 
116
116
  > See [agent training docs](https://docs.hud.so/train-agents/quickstart)
@@ -301,14 +301,14 @@ Train with the new interactive `hud rl` flow:
301
301
  uv tool install hud-python
302
302
 
303
303
  # Option A: Run directly from a HuggingFace dataset
304
- hud rl hud-evals/basic-2048
304
+ hud rl hud-evals/2048-basic
305
305
 
306
306
  # Option B: Download first, modify, then train
307
- hud get hud-evals/basic-2048
308
- hud rl basic-2048.json
307
+ hud get hud-evals/2048-basic
308
+ hud rl 2048-basic.json
309
309
 
310
310
  # Optional: baseline evaluation
311
- hud eval basic-2048.json
311
+ hud eval 2048-basic.json
312
312
  ```
313
313
 
314
314
  Supports multi‑turn RL for both:
@@ -804,9 +804,9 @@ class TodoCompleted:
804
804
  @problem("todo_basic", description="Complete two todo items", difficulty="easy")
805
805
  class TodoBasic:
806
806
  def get_setup(self):
807
- return {"function": "todo_seed", "args": {"num_items": 5}}
807
+ return {"name": "todo_seed", "arguments": {"num_items": 5}}
808
808
  def get_evaluation(self):
809
- return {"function": "todo_completed", "args": {"expected_count": 2}}
809
+ return {"name": "todo_completed", "arguments": {"expected_count": 2}}
810
810
  ```
811
811
 
812
812
  Decorators keep registration *next to the implementation* and avoid manual bookkeeping. The server simply exposes the combined metadata through an MCP **resource**. Follow `environments/browser/src/hud_controller/problems/registry.py` as a template and expose the JSON with `@mcp.resource("problems://registry")`.
@@ -4,7 +4,7 @@ version = "0.1.0"
4
4
  description = "MCP server for blank environment"
5
5
  requires-python = ">=3.11"
6
6
  dependencies = [
7
- "hud-python>=0.4.53",
7
+ "hud-python>=0.4.54",
8
8
  "httpx>=0.28.1",
9
9
  ]
10
10
 
@@ -47,8 +47,8 @@ await setup({"name": "todo_basic_usage"})
47
47
  await evaluate({"name": "todo_basic_usage"})
48
48
 
49
49
  # Direct function calls
50
- await setup({"function": "todo_reset", "args": {}})
51
- await evaluate({"function": "todo_completion_rate", "args": {"min_rate": 0.5}})
50
+ await setup({"name": "todo_reset", "arguments": {}})
51
+ await evaluate({"name": "todo_completion_rate", "arguments": {"min_rate": 0.5}})
52
52
 
53
53
  # MCP resource discovery
54
54
  todo_evaluators = await client.read_resource("evaluators://todo")
@@ -4,7 +4,7 @@ version = "0.1.0"
4
4
  description = "HUD Browser MCP Server"
5
5
  requires-python = ">=3.11,<3.14"
6
6
  dependencies = [
7
- "hud-python>=0.4.53",
7
+ "hud-python>=0.4.54",
8
8
  "httpx",
9
9
  "playwright",
10
10
  "pyautogui",
@@ -4,7 +4,7 @@ version = "0.1.0"
4
4
  description = "MCP server for DeepResearch environment"
5
5
  requires-python = ">=3.11"
6
6
  dependencies = [
7
- "hud-python>=0.4.53",
7
+ "hud-python>=0.4.54",
8
8
  "httpx>=0.24.0",
9
9
  ]
10
10
 
@@ -11,6 +11,7 @@ from typing import TYPE_CHECKING, Any, ClassVar, Literal
11
11
 
12
12
  import mcp.types as types
13
13
 
14
+ from hud.agents.utils import log_agent_metadata_to_status, log_task_config_to_current_trace
14
15
  from hud.types import AgentResponse, MCPToolCall, MCPToolResult, Trace
15
16
  from hud.utils.hud_console import HUDConsole
16
17
  from hud.utils.mcp import MCPConfigPatch, patch_mcp_config, setup_hud_telemetry
@@ -62,6 +63,7 @@ class MCPAgent(ABC):
62
63
  initial_screenshot: bool = True,
63
64
  # Misc
64
65
  model_name: str = "mcp-agent",
66
+ checkpoint_name: str | None = None,
65
67
  response_agent: ResponseAgent | None = None,
66
68
  auto_trace: bool = True,
67
69
  verbose: bool = False,
@@ -92,6 +94,7 @@ class MCPAgent(ABC):
92
94
  self._auto_created_client = False # Track if we created the client
93
95
 
94
96
  self.model_name = model_name
97
+ self.checkpoint_name = checkpoint_name
95
98
  self.console = HUDConsole(logger=logger)
96
99
 
97
100
  # Set verbose mode if requested
@@ -198,6 +201,8 @@ class MCPAgent(ABC):
198
201
  f"Agent initialized with {len(self.get_available_tools())} tools: {', '.join([t.name for t in self.get_available_tools()])}" # noqa: E501
199
202
  )
200
203
 
204
+ await log_agent_metadata_to_status(self.model_name, self.checkpoint_name)
205
+
201
206
  async def run(self, prompt_or_task: str | Task | dict[str, Any], max_steps: int = 10) -> Trace:
202
207
  """
203
208
  Run the agent with the given prompt or task.
@@ -223,6 +228,9 @@ class MCPAgent(ABC):
223
228
 
224
229
  # Handle Task objects with full lifecycle
225
230
  if isinstance(prompt_or_task, Task):
231
+ # Log a compact summary of task config to the current trace (async)
232
+ await log_task_config_to_current_trace(prompt_or_task)
233
+
226
234
  return await self.run_task(prompt_or_task, max_steps)
227
235
 
228
236
  # Handle simple string prompts
@@ -89,7 +89,8 @@ class ClaudeAgent(MCPAgent):
89
89
  self.use_computer_beta = use_computer_beta
90
90
  self.hud_console = HUDConsole(logger=logger)
91
91
 
92
- self.model_name = self.model
92
+ self.model_name = "Claude"
93
+ self.checkpoint_name = self.model
93
94
 
94
95
  # Track mapping from Claude tool names to MCP tool names
95
96
  self._claude_to_mcp_tool_map: dict[str, str] = {}
@@ -98,14 +99,14 @@ class ClaudeAgent(MCPAgent):
98
99
  # Append Claude-specific instructions to the base system prompt
99
100
  claude_instructions = """
100
101
  You are Claude, an AI assistant created by Anthropic. You are helpful, harmless, and honest.
101
-
102
+
102
103
  When working on tasks:
103
104
  1. Be thorough and systematic in your approach
104
105
  2. Complete tasks autonomously without asking for confirmation
105
106
  3. Use available tools efficiently to accomplish your goals
106
107
  4. Verify your actions and ensure task completion
107
108
  5. Be precise and accurate in all operations
108
-
109
+
109
110
  Remember: You are expected to complete tasks autonomously. The user trusts you to accomplish what they asked.
110
111
  """.strip() # noqa: E501
111
112
 
@@ -70,6 +70,7 @@ class OperatorAgent(MCPAgent):
70
70
 
71
71
  self.openai_client = model_client
72
72
  self.model = model
73
+ self.checkpoint_name = self.model
73
74
  self.environment = environment
74
75
 
75
76
  # State tracking for OpenAI's stateful API
@@ -84,7 +85,7 @@ class OperatorAgent(MCPAgent):
84
85
  except Exception as e:
85
86
  raise ValueError(f"OpenAI API key is invalid: {e}") from e
86
87
 
87
- self.model_name = "openai-" + self.model
88
+ self.model_name = "Operator"
88
89
 
89
90
  # Append OpenAI-specific instructions to the base system prompt
90
91
  openai_instructions = """
@@ -62,7 +62,8 @@ class GenericOpenAIChatAgent(MCPAgent):
62
62
  else:
63
63
  raise ValueError("Either openai_client or (api_key and base_url) must be provided")
64
64
 
65
- self.model_name = model_name
65
+ self.model_name = "GenericOpenAI"
66
+ self.checkpoint_name = model_name
66
67
  self.completion_kwargs: dict[str, Any] = completion_kwargs or {}
67
68
  self.mcp_schemas = []
68
69
  self.hud_console = HUDConsole(logger=logger)
@@ -194,7 +195,7 @@ class GenericOpenAIChatAgent(MCPAgent):
194
195
  raise ValueError("openai_client is required for GenericOpenAIChatAgent")
195
196
  # default transport = OpenAI SDK
196
197
  return await self.oai.chat.completions.create(
197
- model=self.model_name,
198
+ model=self.checkpoint_name,
198
199
  messages=messages,
199
200
  tools=tools, # type: ignore ready ChatCompletionToolParam-shaped
200
201
  **extra,
@@ -89,7 +89,7 @@ class TestClaudeAgent:
89
89
  validate_api_key=False, # Skip validation in tests
90
90
  )
91
91
 
92
- assert agent.model_name == "claude-3-opus-20240229"
92
+ assert agent.model_name == "Claude"
93
93
  assert agent.max_tokens == 1000
94
94
  assert agent.anthropic_client == mock_model_client
95
95
 
@@ -103,7 +103,7 @@ class TestClaudeAgent:
103
103
  validate_api_key=False, # Skip validation in tests
104
104
  )
105
105
 
106
- assert agent.model_name == "claude-3-opus-20240229"
106
+ assert agent.model_name == "Claude"
107
107
  assert agent.anthropic_client is not None
108
108
 
109
109
  @pytest.mark.asyncio
@@ -50,7 +50,7 @@ class TestOperatorAgent:
50
50
  validate_api_key=False, # Skip validation in tests
51
51
  )
52
52
 
53
- assert agent.model_name == "openai-gpt-4"
53
+ assert agent.model_name == "Operator"
54
54
  assert agent.model == "gpt-4"
55
55
  assert agent.openai_client == mock_model_client
56
56
 
@@ -0,0 +1,50 @@
1
+ from __future__ import annotations
2
+
3
+ import contextlib
4
+ from typing import TYPE_CHECKING
5
+
6
+ from hud.otel.context import (
7
+ _update_task_status_async,
8
+ get_current_task_run_id,
9
+ )
10
+
11
+ if TYPE_CHECKING:
12
+ from hud.datasets import Task
13
+
14
+
15
+ async def log_task_config_to_current_trace(task: Task) -> None:
16
+ with contextlib.suppress(Exception):
17
+ task_run_id = get_current_task_run_id()
18
+ if not task_run_id:
19
+ return
20
+
21
+ raw_config = task.model_dump()
22
+
23
+ await _update_task_status_async(
24
+ task_run_id,
25
+ "running",
26
+ task_id=task.id,
27
+ extra_metadata={"task_config": raw_config},
28
+ )
29
+
30
+
31
+ async def log_agent_metadata_to_status(
32
+ model_name: str | None = None, checkpoint_name: str | None = None
33
+ ) -> None:
34
+ """Attach agent metadata (model/checkpoint) to current trace status metadata."""
35
+ with contextlib.suppress(Exception):
36
+ task_run_id = get_current_task_run_id()
37
+ if not task_run_id or (not model_name and not checkpoint_name):
38
+ return
39
+
40
+ agent_meta = {}
41
+ if model_name is not None:
42
+ agent_meta["model_name"] = model_name
43
+ if checkpoint_name is not None:
44
+ agent_meta["checkpoint_name"] = checkpoint_name
45
+
46
+ await _update_task_status_async(
47
+ task_run_id,
48
+ "running",
49
+ extra_metadata={"agent": agent_meta},
50
+ )
@@ -12,6 +12,8 @@ from rich.console import Console
12
12
  from rich.panel import Panel
13
13
  from rich.table import Table
14
14
 
15
+ from hud.types import AgentType
16
+
15
17
  from . import list_func as list_module
16
18
  from .analyze import (
17
19
  analyze_environment,
@@ -380,6 +382,11 @@ def dev(
380
382
  "--watch",
381
383
  help="Additional directories to watch for changes (default: current directory)",
382
384
  ),
385
+ new: bool = typer.Option(
386
+ False,
387
+ "--new",
388
+ help="Show Cursor installation link for new server setup",
389
+ ),
383
390
  ) -> None:
384
391
  """🔥 Development mode - run MCP server with hot-reload.
385
392
 
@@ -420,6 +427,7 @@ def dev(
420
427
  watch,
421
428
  docker=docker,
422
429
  docker_args=docker_args,
430
+ new=new,
423
431
  )
424
432
 
425
433
 
@@ -847,7 +855,7 @@ def eval(
847
855
  hud_console = HUDConsole()
848
856
 
849
857
  if integration_test:
850
- agent = "integration_test"
858
+ agent = AgentType.INTEGRATION_TEST
851
859
 
852
860
  # If no source provided, reuse RL helper to find a tasks file interactively
853
861
  if source is None:
@@ -894,17 +902,17 @@ def eval(
894
902
  # Add standard agent choices
895
903
  choices.extend(
896
904
  [
897
- {"name": "Claude 4 Sonnet", "value": "claude"},
898
- {"name": "OpenAI Computer Use", "value": "openai"},
899
- {"name": "vLLM (Local Server)", "value": "vllm"},
900
- {"name": "LiteLLM (Multi-provider)", "value": "litellm"},
905
+ {"name": "Claude 4 Sonnet", "value": AgentType.CLAUDE},
906
+ {"name": "OpenAI Computer Use", "value": AgentType.OPENAI},
907
+ {"name": "vLLM (Local Server)", "value": AgentType.VLLM},
908
+ {"name": "LiteLLM (Multi-provider)", "value": AgentType.LITELLM},
901
909
  ]
902
910
  )
903
911
 
904
912
  agent = hud_console.select("Select an agent to use:", choices=choices, default=0)
905
913
 
906
914
  # Handle HUD model selection
907
- if agent and agent not in ["claude", "openai", "vllm", "litellm", "integration_test"]:
915
+ if agent and agent not in [e.value for e in AgentType]:
908
916
  # Find remote model name
909
917
  model = agent
910
918
  if not vllm_base_url:
@@ -921,20 +929,23 @@ def eval(
921
929
  hud_console.error(f"Model {model} not found")
922
930
  raise typer.Exit(1)
923
931
  model = base_model
924
- agent = "vllm" # Use vLLM backend for HUD models
932
+ agent = AgentType.VLLM # Use vLLM backend for HUD models
925
933
  hud_console.info(f"Using HUD model: {model} (trained on {base_model})")
926
934
 
927
935
  # Validate agent choice
928
- valid_agents = ["claude", "openai", "vllm", "litellm", "integration_test"]
936
+ valid_agents = [e.value for e in AgentType]
929
937
  if agent not in valid_agents:
930
938
  hud_console.error(f"Invalid agent: {agent}. Must be one of: {', '.join(valid_agents)}")
931
939
  raise typer.Exit(1)
932
940
 
941
+ # Type narrowing: agent is now guaranteed to be an AgentType value after validation
942
+ agent = AgentType(agent)
943
+
933
944
  # Run the command
934
945
  eval_command(
935
946
  source=source,
936
947
  full=full,
937
- agent=agent, # type: ignore
948
+ agent=agent,
938
949
  model=model,
939
950
  allowed_tools=allowed_tools,
940
951
  max_concurrent=max_concurrent,
@@ -1074,6 +1085,51 @@ def rl(
1074
1085
  )
1075
1086
 
1076
1087
 
1088
+ @app.command()
1089
+ def convert(
1090
+ tasks_file: str = typer.Argument(
1091
+ ..., help="Path to tasks file (JSON/JSONL) to convert to remote MCP configuration"
1092
+ ),
1093
+ ) -> None:
1094
+ """Convert local MCP task configs to remote (mcp.hud.so) format.
1095
+
1096
+ This mirrors the implicit conversion flow used by 'hud rl' and writes a new
1097
+ remote_<name>.json next to the source file when needed.
1098
+ """
1099
+ from pathlib import Path
1100
+
1101
+ from hud.utils.hud_console import HUDConsole
1102
+
1103
+ hud_console = HUDConsole()
1104
+
1105
+ try:
1106
+ from .flows.tasks import convert_tasks_to_remote
1107
+
1108
+ result_path = convert_tasks_to_remote(tasks_file)
1109
+
1110
+ # If nothing changed, inform the user
1111
+ try:
1112
+ if Path(result_path).resolve() == Path(tasks_file).resolve():
1113
+ hud_console.success(
1114
+ "Tasks already reference remote MCP URLs. No conversion needed."
1115
+ )
1116
+ hud_console.hint("You can run them directly with: hud eval <tasks_file> --full")
1117
+ return
1118
+ except Exception as e:
1119
+ # Best effort; continue with success message
1120
+ hud_console.debug(f"Path comparison failed, continuing: {e}")
1121
+
1122
+ hud_console.success(f"Converted tasks written to: {result_path}")
1123
+ hud_console.hint(
1124
+ "You can now run remote flows: hud rl <converted_file> or hud eval <converted_file>"
1125
+ )
1126
+ except typer.Exit:
1127
+ raise
1128
+ except Exception as e:
1129
+ hud_console.error(f"Failed to convert tasks: {e}")
1130
+ raise typer.Exit(1) from e
1131
+
1132
+
1077
1133
  @app.command()
1078
1134
  def set(
1079
1135
  assignments: list[str] = typer.Argument( # type: ignore[arg-type] # noqa: B008