hud-python 0.4.56__tar.gz → 0.4.58__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of hud-python might be problematic. Click here for more details.

Files changed (306) hide show
  1. {hud_python-0.4.56 → hud_python-0.4.58}/PKG-INFO +1 -1
  2. {hud_python-0.4.56 → hud_python-0.4.58}/environments/README.md +5 -5
  3. hud_python-0.4.58/environments/browser/browser-base/README.md +58 -0
  4. {hud_python-0.4.56 → hud_python-0.4.58}/environments/browser/server/pyproject.toml +1 -1
  5. hud_python-0.4.58/environments/rubrics/README.md +239 -0
  6. {hud_python-0.4.56 → hud_python-0.4.58}/environments/rubrics/environment/pyproject.toml +3 -2
  7. {hud_python-0.4.56 → hud_python-0.4.58}/environments/rubrics/pyproject.toml +1 -1
  8. {hud_python-0.4.56 → hud_python-0.4.58}/hud/cli/__init__.py +20 -7
  9. {hud_python-0.4.56 → hud_python-0.4.58}/hud/cli/dev.py +135 -5
  10. {hud_python-0.4.56 → hud_python-0.4.58}/hud/cli/eval.py +2 -2
  11. {hud_python-0.4.56 → hud_python-0.4.58}/hud/cli/flows/dev.py +10 -19
  12. {hud_python-0.4.56 → hud_python-0.4.58}/hud/cli/init.py +14 -18
  13. {hud_python-0.4.56 → hud_python-0.4.58}/hud/cli/push.py +2 -2
  14. {hud_python-0.4.56 → hud_python-0.4.58}/hud/cli/rl/__init__.py +1 -1
  15. {hud_python-0.4.56 → hud_python-0.4.58}/hud/cli/rl/celebrate.py +1 -1
  16. {hud_python-0.4.56 → hud_python-0.4.58}/hud/cli/rl/remote_runner.py +3 -3
  17. hud_python-0.4.58/hud/cli/tests/test_convert.py +367 -0
  18. {hud_python-0.4.56 → hud_python-0.4.58}/hud/cli/utils/version_check.py +7 -6
  19. {hud_python-0.4.56 → hud_python-0.4.58}/hud/clients/base.py +29 -3
  20. {hud_python-0.4.56 → hud_python-0.4.58}/hud/clients/fastmcp.py +3 -3
  21. {hud_python-0.4.56 → hud_python-0.4.58}/hud/clients/mcp_use.py +2 -2
  22. {hud_python-0.4.56 → hud_python-0.4.58}/hud/clients/tests/test_protocol.py +9 -3
  23. {hud_python-0.4.56 → hud_python-0.4.58}/hud/otel/config.py +1 -1
  24. {hud_python-0.4.56 → hud_python-0.4.58}/hud/otel/context.py +2 -2
  25. {hud_python-0.4.56 → hud_python-0.4.58}/hud/server/server.py +306 -0
  26. {hud_python-0.4.56 → hud_python-0.4.58}/hud/shared/hints.py +3 -3
  27. {hud_python-0.4.56 → hud_python-0.4.58}/hud/telemetry/job.py +2 -2
  28. {hud_python-0.4.56 → hud_python-0.4.58}/hud/tools/playwright.py +8 -1
  29. {hud_python-0.4.56 → hud_python-0.4.58}/hud/types.py +1 -1
  30. {hud_python-0.4.56 → hud_python-0.4.58}/hud/utils/tests/test_version.py +1 -1
  31. {hud_python-0.4.56 → hud_python-0.4.58}/hud/version.py +1 -1
  32. {hud_python-0.4.56 → hud_python-0.4.58}/pyproject.toml +1 -1
  33. hud_python-0.4.56/environments/rubrics/README.md +0 -182
  34. {hud_python-0.4.56 → hud_python-0.4.58}/.gitignore +0 -0
  35. {hud_python-0.4.56 → hud_python-0.4.58}/LICENSE +0 -0
  36. {hud_python-0.4.56 → hud_python-0.4.58}/README.md +0 -0
  37. {hud_python-0.4.56 → hud_python-0.4.58}/environments/blank/README.md +0 -0
  38. {hud_python-0.4.56 → hud_python-0.4.58}/environments/blank/environment/README.md +0 -0
  39. {hud_python-0.4.56 → hud_python-0.4.58}/environments/blank/environment/pyproject.toml +0 -0
  40. {hud_python-0.4.56 → hud_python-0.4.58}/environments/blank/server/README.md +0 -0
  41. {hud_python-0.4.56 → hud_python-0.4.58}/environments/blank/server/pyproject.toml +0 -0
  42. {hud_python-0.4.56 → hud_python-0.4.58}/environments/browser/README.md +0 -0
  43. {hud_python-0.4.56 → hud_python-0.4.58}/environments/browser/environment/2048/README.md +0 -0
  44. {hud_python-0.4.56 → hud_python-0.4.58}/environments/browser/environment/2048/backend/pyproject.toml +0 -0
  45. {hud_python-0.4.56 → hud_python-0.4.58}/environments/browser/environment/README.md +0 -0
  46. {hud_python-0.4.56 → hud_python-0.4.58}/environments/browser/environment/pyproject.toml +0 -0
  47. {hud_python-0.4.56 → hud_python-0.4.58}/environments/browser/environment/todo/README.md +0 -0
  48. {hud_python-0.4.56 → hud_python-0.4.58}/environments/browser/environment/todo/backend/pyproject.toml +0 -0
  49. {hud_python-0.4.56 → hud_python-0.4.58}/environments/browser/pyproject.toml +0 -0
  50. {hud_python-0.4.56 → hud_python-0.4.58}/environments/deepresearch/README.md +0 -0
  51. {hud_python-0.4.56 → hud_python-0.4.58}/environments/deepresearch/environment/pyproject.toml +0 -0
  52. {hud_python-0.4.56 → hud_python-0.4.58}/environments/deepresearch/pyproject.toml +0 -0
  53. {hud_python-0.4.56 → hud_python-0.4.58}/environments/deepresearch/server/pyproject.toml +0 -0
  54. {hud_python-0.4.56 → hud_python-0.4.58}/environments/remote_browser/README.md +0 -0
  55. {hud_python-0.4.56 → hud_python-0.4.58}/environments/remote_browser/pyproject.toml +0 -0
  56. {hud_python-0.4.56 → hud_python-0.4.58}/environments/remote_browser/src/hud_controller/providers/README.md +0 -0
  57. {hud_python-0.4.56 → hud_python-0.4.58}/environments/rubrics/server/pyproject.toml +0 -0
  58. {hud_python-0.4.56 → hud_python-0.4.58}/environments/text_2048/README.md +0 -0
  59. {hud_python-0.4.56 → hud_python-0.4.58}/environments/text_2048/pyproject.toml +0 -0
  60. {hud_python-0.4.56 → hud_python-0.4.58}/examples/README.md +0 -0
  61. {hud_python-0.4.56 → hud_python-0.4.58}/hud/__init__.py +0 -0
  62. {hud_python-0.4.56 → hud_python-0.4.58}/hud/__main__.py +0 -0
  63. {hud_python-0.4.56 → hud_python-0.4.58}/hud/agents/__init__.py +0 -0
  64. {hud_python-0.4.56 → hud_python-0.4.58}/hud/agents/base.py +0 -0
  65. {hud_python-0.4.56 → hud_python-0.4.58}/hud/agents/claude.py +0 -0
  66. {hud_python-0.4.56 → hud_python-0.4.58}/hud/agents/grounded_openai.py +0 -0
  67. {hud_python-0.4.56 → hud_python-0.4.58}/hud/agents/langchain.py +0 -0
  68. {hud_python-0.4.56 → hud_python-0.4.58}/hud/agents/lite_llm.py +0 -0
  69. {hud_python-0.4.56 → hud_python-0.4.58}/hud/agents/misc/__init__.py +0 -0
  70. {hud_python-0.4.56 → hud_python-0.4.58}/hud/agents/misc/integration_test_agent.py +0 -0
  71. {hud_python-0.4.56 → hud_python-0.4.58}/hud/agents/misc/response_agent.py +0 -0
  72. {hud_python-0.4.56 → hud_python-0.4.58}/hud/agents/openai.py +0 -0
  73. {hud_python-0.4.56 → hud_python-0.4.58}/hud/agents/openai_chat_generic.py +0 -0
  74. {hud_python-0.4.56 → hud_python-0.4.58}/hud/agents/tests/__init__.py +0 -0
  75. {hud_python-0.4.56 → hud_python-0.4.58}/hud/agents/tests/test_base.py +0 -0
  76. {hud_python-0.4.56 → hud_python-0.4.58}/hud/agents/tests/test_base_runtime.py +0 -0
  77. {hud_python-0.4.56 → hud_python-0.4.58}/hud/agents/tests/test_claude.py +0 -0
  78. {hud_python-0.4.56 → hud_python-0.4.58}/hud/agents/tests/test_client.py +0 -0
  79. {hud_python-0.4.56 → hud_python-0.4.58}/hud/agents/tests/test_grounded_openai_agent.py +0 -0
  80. {hud_python-0.4.56 → hud_python-0.4.58}/hud/agents/tests/test_openai.py +0 -0
  81. {hud_python-0.4.56 → hud_python-0.4.58}/hud/agents/utils.py +0 -0
  82. {hud_python-0.4.56 → hud_python-0.4.58}/hud/cli/__main__.py +0 -0
  83. {hud_python-0.4.56 → hud_python-0.4.58}/hud/cli/analyze.py +0 -0
  84. {hud_python-0.4.56 → hud_python-0.4.58}/hud/cli/build.py +0 -0
  85. {hud_python-0.4.56 → hud_python-0.4.58}/hud/cli/clone.py +0 -0
  86. {hud_python-0.4.56 → hud_python-0.4.58}/hud/cli/debug.py +0 -0
  87. {hud_python-0.4.56 → hud_python-0.4.58}/hud/cli/flows/__init__.py +0 -0
  88. {hud_python-0.4.56 → hud_python-0.4.58}/hud/cli/flows/tasks.py +0 -0
  89. {hud_python-0.4.56 → hud_python-0.4.58}/hud/cli/get.py +0 -0
  90. {hud_python-0.4.56 → hud_python-0.4.58}/hud/cli/list_func.py +0 -0
  91. {hud_python-0.4.56 → hud_python-0.4.58}/hud/cli/pull.py +0 -0
  92. {hud_python-0.4.56 → hud_python-0.4.58}/hud/cli/remove.py +0 -0
  93. {hud_python-0.4.56 → hud_python-0.4.58}/hud/cli/rl/config.py +0 -0
  94. {hud_python-0.4.56 → hud_python-0.4.58}/hud/cli/rl/display.py +0 -0
  95. {hud_python-0.4.56 → hud_python-0.4.58}/hud/cli/rl/gpu.py +0 -0
  96. {hud_python-0.4.56 → hud_python-0.4.58}/hud/cli/rl/gpu_utils.py +0 -0
  97. {hud_python-0.4.56 → hud_python-0.4.58}/hud/cli/rl/local_runner.py +0 -0
  98. {hud_python-0.4.56 → hud_python-0.4.58}/hud/cli/rl/presets.py +0 -0
  99. {hud_python-0.4.56 → hud_python-0.4.58}/hud/cli/rl/rl_api.py +0 -0
  100. {hud_python-0.4.56 → hud_python-0.4.58}/hud/cli/rl/viewer.py +0 -0
  101. {hud_python-0.4.56 → hud_python-0.4.58}/hud/cli/rl/vllm.py +0 -0
  102. {hud_python-0.4.56 → hud_python-0.4.58}/hud/cli/rl/wait_utils.py +0 -0
  103. {hud_python-0.4.56 → hud_python-0.4.58}/hud/cli/tests/__init__.py +0 -0
  104. {hud_python-0.4.56 → hud_python-0.4.58}/hud/cli/tests/test_analyze.py +0 -0
  105. {hud_python-0.4.56 → hud_python-0.4.58}/hud/cli/tests/test_analyze_metadata.py +0 -0
  106. {hud_python-0.4.56 → hud_python-0.4.58}/hud/cli/tests/test_analyze_module.py +0 -0
  107. {hud_python-0.4.56 → hud_python-0.4.58}/hud/cli/tests/test_build.py +0 -0
  108. {hud_python-0.4.56 → hud_python-0.4.58}/hud/cli/tests/test_build_failure.py +0 -0
  109. {hud_python-0.4.56 → hud_python-0.4.58}/hud/cli/tests/test_build_module.py +0 -0
  110. {hud_python-0.4.56 → hud_python-0.4.58}/hud/cli/tests/test_cli_init.py +0 -0
  111. {hud_python-0.4.56 → hud_python-0.4.58}/hud/cli/tests/test_cli_main.py +0 -0
  112. {hud_python-0.4.56 → hud_python-0.4.58}/hud/cli/tests/test_cli_more_wrappers.py +0 -0
  113. {hud_python-0.4.56 → hud_python-0.4.58}/hud/cli/tests/test_cli_root.py +0 -0
  114. {hud_python-0.4.56 → hud_python-0.4.58}/hud/cli/tests/test_clone.py +0 -0
  115. {hud_python-0.4.56 → hud_python-0.4.58}/hud/cli/tests/test_cursor.py +0 -0
  116. {hud_python-0.4.56 → hud_python-0.4.58}/hud/cli/tests/test_debug.py +0 -0
  117. {hud_python-0.4.56 → hud_python-0.4.58}/hud/cli/tests/test_eval.py +0 -0
  118. {hud_python-0.4.56 → hud_python-0.4.58}/hud/cli/tests/test_list_func.py +0 -0
  119. {hud_python-0.4.56 → hud_python-0.4.58}/hud/cli/tests/test_main_module.py +0 -0
  120. {hud_python-0.4.56 → hud_python-0.4.58}/hud/cli/tests/test_mcp_server.py +0 -0
  121. {hud_python-0.4.56 → hud_python-0.4.58}/hud/cli/tests/test_pull.py +0 -0
  122. {hud_python-0.4.56 → hud_python-0.4.58}/hud/cli/tests/test_push.py +0 -0
  123. {hud_python-0.4.56 → hud_python-0.4.58}/hud/cli/tests/test_push_happy.py +0 -0
  124. {hud_python-0.4.56 → hud_python-0.4.58}/hud/cli/tests/test_push_wrapper.py +0 -0
  125. {hud_python-0.4.56 → hud_python-0.4.58}/hud/cli/tests/test_registry.py +0 -0
  126. {hud_python-0.4.56 → hud_python-0.4.58}/hud/cli/tests/test_utils.py +0 -0
  127. {hud_python-0.4.56 → hud_python-0.4.58}/hud/cli/utils/__init__.py +0 -0
  128. {hud_python-0.4.56 → hud_python-0.4.58}/hud/cli/utils/config.py +0 -0
  129. {hud_python-0.4.56 → hud_python-0.4.58}/hud/cli/utils/cursor.py +0 -0
  130. {hud_python-0.4.56 → hud_python-0.4.58}/hud/cli/utils/docker.py +0 -0
  131. {hud_python-0.4.56 → hud_python-0.4.58}/hud/cli/utils/env_check.py +0 -0
  132. {hud_python-0.4.56 → hud_python-0.4.58}/hud/cli/utils/environment.py +0 -0
  133. {hud_python-0.4.56 → hud_python-0.4.58}/hud/cli/utils/interactive.py +0 -0
  134. {hud_python-0.4.56 → hud_python-0.4.58}/hud/cli/utils/local_runner.py +0 -0
  135. {hud_python-0.4.56 → hud_python-0.4.58}/hud/cli/utils/logging.py +0 -0
  136. {hud_python-0.4.56 → hud_python-0.4.58}/hud/cli/utils/metadata.py +0 -0
  137. {hud_python-0.4.56 → hud_python-0.4.58}/hud/cli/utils/package_runner.py +0 -0
  138. {hud_python-0.4.56 → hud_python-0.4.58}/hud/cli/utils/registry.py +0 -0
  139. {hud_python-0.4.56 → hud_python-0.4.58}/hud/cli/utils/remote_runner.py +0 -0
  140. {hud_python-0.4.56 → hud_python-0.4.58}/hud/cli/utils/runner.py +0 -0
  141. {hud_python-0.4.56 → hud_python-0.4.58}/hud/cli/utils/server.py +0 -0
  142. {hud_python-0.4.56 → hud_python-0.4.58}/hud/cli/utils/source_hash.py +0 -0
  143. {hud_python-0.4.56 → hud_python-0.4.58}/hud/cli/utils/tasks.py +0 -0
  144. {hud_python-0.4.56 → hud_python-0.4.58}/hud/cli/utils/tests/__init__.py +0 -0
  145. {hud_python-0.4.56 → hud_python-0.4.58}/hud/cli/utils/tests/test_config.py +0 -0
  146. {hud_python-0.4.56 → hud_python-0.4.58}/hud/cli/utils/tests/test_docker.py +0 -0
  147. {hud_python-0.4.56 → hud_python-0.4.58}/hud/cli/utils/tests/test_docker_hints.py +0 -0
  148. {hud_python-0.4.56 → hud_python-0.4.58}/hud/cli/utils/tests/test_env_check.py +0 -0
  149. {hud_python-0.4.56 → hud_python-0.4.58}/hud/cli/utils/tests/test_environment.py +0 -0
  150. {hud_python-0.4.56 → hud_python-0.4.58}/hud/cli/utils/tests/test_interactive_module.py +0 -0
  151. {hud_python-0.4.56 → hud_python-0.4.58}/hud/cli/utils/tests/test_local_runner.py +0 -0
  152. {hud_python-0.4.56 → hud_python-0.4.58}/hud/cli/utils/tests/test_logging_utils.py +0 -0
  153. {hud_python-0.4.56 → hud_python-0.4.58}/hud/cli/utils/tests/test_metadata.py +0 -0
  154. {hud_python-0.4.56 → hud_python-0.4.58}/hud/cli/utils/tests/test_package_runner.py +0 -0
  155. {hud_python-0.4.56 → hud_python-0.4.58}/hud/cli/utils/tests/test_registry_utils.py +0 -0
  156. {hud_python-0.4.56 → hud_python-0.4.58}/hud/cli/utils/tests/test_remote_runner.py +0 -0
  157. {hud_python-0.4.56 → hud_python-0.4.58}/hud/cli/utils/tests/test_runner_modules.py +0 -0
  158. {hud_python-0.4.56 → hud_python-0.4.58}/hud/cli/utils/tests/test_source_hash.py +0 -0
  159. {hud_python-0.4.56 → hud_python-0.4.58}/hud/cli/utils/tests/test_tasks.py +0 -0
  160. {hud_python-0.4.56 → hud_python-0.4.58}/hud/clients/README.md +0 -0
  161. {hud_python-0.4.56 → hud_python-0.4.58}/hud/clients/__init__.py +0 -0
  162. {hud_python-0.4.56 → hud_python-0.4.58}/hud/clients/tests/__init__.py +0 -0
  163. {hud_python-0.4.56 → hud_python-0.4.58}/hud/clients/tests/test_client_integration.py +0 -0
  164. {hud_python-0.4.56 → hud_python-0.4.58}/hud/clients/tests/test_fastmcp.py +0 -0
  165. {hud_python-0.4.56 → hud_python-0.4.58}/hud/clients/tests/test_mcp_use_retry.py +0 -0
  166. {hud_python-0.4.56 → hud_python-0.4.58}/hud/clients/utils/__init__.py +0 -0
  167. {hud_python-0.4.56 → hud_python-0.4.58}/hud/clients/utils/mcp_use_retry.py +0 -0
  168. {hud_python-0.4.56 → hud_python-0.4.58}/hud/clients/utils/retry.py +0 -0
  169. {hud_python-0.4.56 → hud_python-0.4.58}/hud/clients/utils/retry_transport.py +0 -0
  170. {hud_python-0.4.56 → hud_python-0.4.58}/hud/datasets/__init__.py +0 -0
  171. {hud_python-0.4.56 → hud_python-0.4.58}/hud/datasets/parallel.py +0 -0
  172. {hud_python-0.4.56 → hud_python-0.4.58}/hud/datasets/runner.py +0 -0
  173. {hud_python-0.4.56 → hud_python-0.4.58}/hud/datasets/tests/__init__.py +0 -0
  174. {hud_python-0.4.56 → hud_python-0.4.58}/hud/datasets/tests/test_runner.py +0 -0
  175. {hud_python-0.4.56 → hud_python-0.4.58}/hud/datasets/tests/test_utils.py +0 -0
  176. {hud_python-0.4.56 → hud_python-0.4.58}/hud/datasets/utils.py +0 -0
  177. {hud_python-0.4.56 → hud_python-0.4.58}/hud/misc/__init__.py +0 -0
  178. {hud_python-0.4.56 → hud_python-0.4.58}/hud/misc/claude_plays_pokemon.py +0 -0
  179. {hud_python-0.4.56 → hud_python-0.4.58}/hud/native/__init__.py +0 -0
  180. {hud_python-0.4.56 → hud_python-0.4.58}/hud/native/comparator.py +0 -0
  181. {hud_python-0.4.56 → hud_python-0.4.58}/hud/native/tests/__init__.py +0 -0
  182. {hud_python-0.4.56 → hud_python-0.4.58}/hud/native/tests/test_comparator.py +0 -0
  183. {hud_python-0.4.56 → hud_python-0.4.58}/hud/native/tests/test_native_init.py +0 -0
  184. {hud_python-0.4.56 → hud_python-0.4.58}/hud/otel/__init__.py +0 -0
  185. {hud_python-0.4.56 → hud_python-0.4.58}/hud/otel/collector.py +0 -0
  186. {hud_python-0.4.56 → hud_python-0.4.58}/hud/otel/exporters.py +0 -0
  187. {hud_python-0.4.56 → hud_python-0.4.58}/hud/otel/instrumentation.py +0 -0
  188. {hud_python-0.4.56 → hud_python-0.4.58}/hud/otel/processors.py +0 -0
  189. {hud_python-0.4.56 → hud_python-0.4.58}/hud/otel/tests/__init__.py +0 -0
  190. {hud_python-0.4.56 → hud_python-0.4.58}/hud/otel/tests/test_instrumentation.py +0 -0
  191. {hud_python-0.4.56 → hud_python-0.4.58}/hud/otel/tests/test_processors.py +0 -0
  192. {hud_python-0.4.56 → hud_python-0.4.58}/hud/py.typed +0 -0
  193. {hud_python-0.4.56 → hud_python-0.4.58}/hud/rl/README.md +0 -0
  194. {hud_python-0.4.56 → hud_python-0.4.58}/hud/rl/__init__.py +0 -0
  195. {hud_python-0.4.56 → hud_python-0.4.58}/hud/rl/actor.py +0 -0
  196. {hud_python-0.4.56 → hud_python-0.4.58}/hud/rl/buffer.py +0 -0
  197. {hud_python-0.4.56 → hud_python-0.4.58}/hud/rl/chat_template.jinja +0 -0
  198. {hud_python-0.4.56 → hud_python-0.4.58}/hud/rl/config.py +0 -0
  199. {hud_python-0.4.56 → hud_python-0.4.58}/hud/rl/distributed.py +0 -0
  200. {hud_python-0.4.56 → hud_python-0.4.58}/hud/rl/learner.py +0 -0
  201. {hud_python-0.4.56 → hud_python-0.4.58}/hud/rl/tests/__init__.py +0 -0
  202. {hud_python-0.4.56 → hud_python-0.4.58}/hud/rl/tests/test_learner.py +0 -0
  203. {hud_python-0.4.56 → hud_python-0.4.58}/hud/rl/train.py +0 -0
  204. {hud_python-0.4.56 → hud_python-0.4.58}/hud/rl/types.py +0 -0
  205. {hud_python-0.4.56 → hud_python-0.4.58}/hud/rl/utils/start_vllm_server.sh +0 -0
  206. {hud_python-0.4.56 → hud_python-0.4.58}/hud/rl/utils.py +0 -0
  207. {hud_python-0.4.56 → hud_python-0.4.58}/hud/rl/vllm_adapter.py +0 -0
  208. {hud_python-0.4.56 → hud_python-0.4.58}/hud/samples/__init__.py +0 -0
  209. {hud_python-0.4.56 → hud_python-0.4.58}/hud/samples/browser.py +0 -0
  210. {hud_python-0.4.56 → hud_python-0.4.58}/hud/server/__init__.py +0 -0
  211. {hud_python-0.4.56 → hud_python-0.4.58}/hud/server/context.py +0 -0
  212. {hud_python-0.4.56 → hud_python-0.4.58}/hud/server/helper/__init__.py +0 -0
  213. {hud_python-0.4.56 → hud_python-0.4.58}/hud/server/low_level.py +0 -0
  214. {hud_python-0.4.56 → hud_python-0.4.58}/hud/server/router.py +0 -0
  215. {hud_python-0.4.56 → hud_python-0.4.58}/hud/server/tests/__init__.py +0 -0
  216. {hud_python-0.4.56 → hud_python-0.4.58}/hud/server/tests/test_add_tool.py +0 -0
  217. {hud_python-0.4.56 → hud_python-0.4.58}/hud/server/tests/test_context.py +0 -0
  218. {hud_python-0.4.56 → hud_python-0.4.58}/hud/server/tests/test_mcp_server_handlers.py +0 -0
  219. {hud_python-0.4.56 → hud_python-0.4.58}/hud/server/tests/test_mcp_server_integration.py +0 -0
  220. {hud_python-0.4.56 → hud_python-0.4.58}/hud/server/tests/test_mcp_server_more.py +0 -0
  221. {hud_python-0.4.56 → hud_python-0.4.58}/hud/server/tests/test_run_wrapper.py +0 -0
  222. {hud_python-0.4.56 → hud_python-0.4.58}/hud/server/tests/test_server_extra.py +0 -0
  223. {hud_python-0.4.56 → hud_python-0.4.58}/hud/server/tests/test_sigterm_runner.py +0 -0
  224. {hud_python-0.4.56 → hud_python-0.4.58}/hud/settings.py +0 -0
  225. {hud_python-0.4.56 → hud_python-0.4.58}/hud/shared/__init__.py +0 -0
  226. {hud_python-0.4.56 → hud_python-0.4.58}/hud/shared/exceptions.py +0 -0
  227. {hud_python-0.4.56 → hud_python-0.4.58}/hud/shared/requests.py +0 -0
  228. {hud_python-0.4.56 → hud_python-0.4.58}/hud/shared/tests/__init__.py +0 -0
  229. {hud_python-0.4.56 → hud_python-0.4.58}/hud/shared/tests/test_exceptions.py +0 -0
  230. {hud_python-0.4.56 → hud_python-0.4.58}/hud/shared/tests/test_hints.py +0 -0
  231. {hud_python-0.4.56 → hud_python-0.4.58}/hud/shared/tests/test_requests.py +0 -0
  232. {hud_python-0.4.56 → hud_python-0.4.58}/hud/telemetry/__init__.py +0 -0
  233. {hud_python-0.4.56 → hud_python-0.4.58}/hud/telemetry/async_context.py +0 -0
  234. {hud_python-0.4.56 → hud_python-0.4.58}/hud/telemetry/instrument.py +0 -0
  235. {hud_python-0.4.56 → hud_python-0.4.58}/hud/telemetry/replay.py +0 -0
  236. {hud_python-0.4.56 → hud_python-0.4.58}/hud/telemetry/tests/__init__.py +0 -0
  237. {hud_python-0.4.56 → hud_python-0.4.58}/hud/telemetry/tests/test_async_context.py +0 -0
  238. {hud_python-0.4.56 → hud_python-0.4.58}/hud/telemetry/tests/test_instrument.py +0 -0
  239. {hud_python-0.4.56 → hud_python-0.4.58}/hud/telemetry/tests/test_job.py +0 -0
  240. {hud_python-0.4.56 → hud_python-0.4.58}/hud/telemetry/tests/test_replay.py +0 -0
  241. {hud_python-0.4.56 → hud_python-0.4.58}/hud/telemetry/tests/test_trace.py +0 -0
  242. {hud_python-0.4.56 → hud_python-0.4.58}/hud/telemetry/trace.py +0 -0
  243. {hud_python-0.4.56 → hud_python-0.4.58}/hud/tools/__init__.py +0 -0
  244. {hud_python-0.4.56 → hud_python-0.4.58}/hud/tools/base.py +0 -0
  245. {hud_python-0.4.56 → hud_python-0.4.58}/hud/tools/bash.py +0 -0
  246. {hud_python-0.4.56 → hud_python-0.4.58}/hud/tools/computer/__init__.py +0 -0
  247. {hud_python-0.4.56 → hud_python-0.4.58}/hud/tools/computer/anthropic.py +0 -0
  248. {hud_python-0.4.56 → hud_python-0.4.58}/hud/tools/computer/hud.py +0 -0
  249. {hud_python-0.4.56 → hud_python-0.4.58}/hud/tools/computer/openai.py +0 -0
  250. {hud_python-0.4.56 → hud_python-0.4.58}/hud/tools/computer/qwen.py +0 -0
  251. {hud_python-0.4.56 → hud_python-0.4.58}/hud/tools/computer/settings.py +0 -0
  252. {hud_python-0.4.56 → hud_python-0.4.58}/hud/tools/edit.py +0 -0
  253. {hud_python-0.4.56 → hud_python-0.4.58}/hud/tools/executors/__init__.py +0 -0
  254. {hud_python-0.4.56 → hud_python-0.4.58}/hud/tools/executors/base.py +0 -0
  255. {hud_python-0.4.56 → hud_python-0.4.58}/hud/tools/executors/pyautogui.py +0 -0
  256. {hud_python-0.4.56 → hud_python-0.4.58}/hud/tools/executors/tests/__init__.py +0 -0
  257. {hud_python-0.4.56 → hud_python-0.4.58}/hud/tools/executors/tests/test_base_executor.py +0 -0
  258. {hud_python-0.4.56 → hud_python-0.4.58}/hud/tools/executors/tests/test_pyautogui_executor.py +0 -0
  259. {hud_python-0.4.56 → hud_python-0.4.58}/hud/tools/executors/xdo.py +0 -0
  260. {hud_python-0.4.56 → hud_python-0.4.58}/hud/tools/grounding/__init__.py +0 -0
  261. {hud_python-0.4.56 → hud_python-0.4.58}/hud/tools/grounding/config.py +0 -0
  262. {hud_python-0.4.56 → hud_python-0.4.58}/hud/tools/grounding/grounded_tool.py +0 -0
  263. {hud_python-0.4.56 → hud_python-0.4.58}/hud/tools/grounding/grounder.py +0 -0
  264. {hud_python-0.4.56 → hud_python-0.4.58}/hud/tools/grounding/tests/__init__.py +0 -0
  265. {hud_python-0.4.56 → hud_python-0.4.58}/hud/tools/grounding/tests/test_grounded_tool.py +0 -0
  266. {hud_python-0.4.56 → hud_python-0.4.58}/hud/tools/response.py +0 -0
  267. {hud_python-0.4.56 → hud_python-0.4.58}/hud/tools/submit.py +0 -0
  268. {hud_python-0.4.56 → hud_python-0.4.58}/hud/tools/tests/__init__.py +0 -0
  269. {hud_python-0.4.56 → hud_python-0.4.58}/hud/tools/tests/test_base.py +0 -0
  270. {hud_python-0.4.56 → hud_python-0.4.58}/hud/tools/tests/test_bash.py +0 -0
  271. {hud_python-0.4.56 → hud_python-0.4.58}/hud/tools/tests/test_bash_extended.py +0 -0
  272. {hud_python-0.4.56 → hud_python-0.4.58}/hud/tools/tests/test_computer.py +0 -0
  273. {hud_python-0.4.56 → hud_python-0.4.58}/hud/tools/tests/test_computer_actions.py +0 -0
  274. {hud_python-0.4.56 → hud_python-0.4.58}/hud/tools/tests/test_edit.py +0 -0
  275. {hud_python-0.4.56 → hud_python-0.4.58}/hud/tools/tests/test_init.py +0 -0
  276. {hud_python-0.4.56 → hud_python-0.4.58}/hud/tools/tests/test_playwright_tool.py +0 -0
  277. {hud_python-0.4.56 → hud_python-0.4.58}/hud/tools/tests/test_response.py +0 -0
  278. {hud_python-0.4.56 → hud_python-0.4.58}/hud/tools/tests/test_submit.py +0 -0
  279. {hud_python-0.4.56 → hud_python-0.4.58}/hud/tools/tests/test_tools.py +0 -0
  280. {hud_python-0.4.56 → hud_python-0.4.58}/hud/tools/tests/test_tools_init.py +0 -0
  281. {hud_python-0.4.56 → hud_python-0.4.58}/hud/tools/tests/test_types.py +0 -0
  282. {hud_python-0.4.56 → hud_python-0.4.58}/hud/tools/tests/test_utils.py +0 -0
  283. {hud_python-0.4.56 → hud_python-0.4.58}/hud/tools/types.py +0 -0
  284. {hud_python-0.4.56 → hud_python-0.4.58}/hud/tools/utils.py +0 -0
  285. {hud_python-0.4.56 → hud_python-0.4.58}/hud/utils/__init__.py +0 -0
  286. {hud_python-0.4.56 → hud_python-0.4.58}/hud/utils/agent_factories.py +0 -0
  287. {hud_python-0.4.56 → hud_python-0.4.58}/hud/utils/async_utils.py +0 -0
  288. {hud_python-0.4.56 → hud_python-0.4.58}/hud/utils/group_eval.py +0 -0
  289. {hud_python-0.4.56 → hud_python-0.4.58}/hud/utils/hud_console.py +0 -0
  290. {hud_python-0.4.56 → hud_python-0.4.58}/hud/utils/mcp.py +0 -0
  291. {hud_python-0.4.56 → hud_python-0.4.58}/hud/utils/pretty_errors.py +0 -0
  292. {hud_python-0.4.56 → hud_python-0.4.58}/hud/utils/progress.py +0 -0
  293. {hud_python-0.4.56 → hud_python-0.4.58}/hud/utils/task_tracking.py +0 -0
  294. {hud_python-0.4.56 → hud_python-0.4.58}/hud/utils/tasks.py +0 -0
  295. {hud_python-0.4.56 → hud_python-0.4.58}/hud/utils/telemetry.py +0 -0
  296. {hud_python-0.4.56 → hud_python-0.4.58}/hud/utils/tests/__init__.py +0 -0
  297. {hud_python-0.4.56 → hud_python-0.4.58}/hud/utils/tests/test_agent_factories.py +0 -0
  298. {hud_python-0.4.56 → hud_python-0.4.58}/hud/utils/tests/test_async_utils.py +0 -0
  299. {hud_python-0.4.56 → hud_python-0.4.58}/hud/utils/tests/test_init.py +0 -0
  300. {hud_python-0.4.56 → hud_python-0.4.58}/hud/utils/tests/test_mcp.py +0 -0
  301. {hud_python-0.4.56 → hud_python-0.4.58}/hud/utils/tests/test_pretty_errors.py +0 -0
  302. {hud_python-0.4.56 → hud_python-0.4.58}/hud/utils/tests/test_progress.py +0 -0
  303. {hud_python-0.4.56 → hud_python-0.4.58}/hud/utils/tests/test_tasks.py +0 -0
  304. {hud_python-0.4.56 → hud_python-0.4.58}/hud/utils/tests/test_telemetry.py +0 -0
  305. {hud_python-0.4.56 → hud_python-0.4.58}/hud/utils/tests/test_tool_shorthand.py +0 -0
  306. {hud_python-0.4.56 → hud_python-0.4.58}/hud/utils/tool_shorthand.py +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: hud-python
3
- Version: 0.4.56
3
+ Version: 0.4.58
4
4
  Summary: SDK for the HUD platform.
5
5
  Project-URL: Homepage, https://github.com/hud-evals/hud-python
6
6
  Project-URL: Bug Tracker, https://github.com/hud-evals/hud-python/issues
@@ -495,7 +495,7 @@ from hud.agents import ClaudeAgent
495
495
  from hud.clients import MCPClient
496
496
 
497
497
  async def main():
498
- # `trace` captures *everything* that happens and sends it to hud.so
498
+ # `trace` captures *everything* that happens and sends it to hud.ai
499
499
  with hud.trace("local_test"):
500
500
  task = Task(
501
501
  prompt="Complete the task",
@@ -524,7 +524,7 @@ async def main():
524
524
  asyncio.run(main())
525
525
  ```
526
526
 
527
- The `trace` context manager sends a full timeline of agent actions, tool calls, and rewards to hud.so – perfect for debugging.
527
+ The `trace` context manager sends a full timeline of agent actions, tool calls, and rewards to hud.ai – perfect for debugging.
528
528
 
529
529
  See `examples/01_hello_2048.py` and `examples/task_with_setup_eval.py` for larger end-to-end demos.
530
530
 
@@ -532,7 +532,7 @@ See `examples/01_hello_2048.py` and `examples/task_with_setup_eval.py` for large
532
532
 
533
533
  ## Phase 4 – Remote Deployment & HUD Runner
534
534
 
535
- **Goal →** the exact same image runs in parallel on hundreds of instances, and exposes more telemetry so the hud.so can visualise the whole lifecycle.
535
+ **Goal →** the exact same image runs in parallel on hundreds of instances, and exposes more telemetry so the hud.ai can visualise the whole lifecycle.
536
536
 
537
537
  ### 1. Publish your image
538
538
 
@@ -595,11 +595,11 @@ async def initialize_environment(session=None, progress_token=None):
595
595
  await send(100, "ready")
596
596
  ```
597
597
 
598
- Those messages are displayed live on hud.so alongside resource graphs – perfect feedback while you wait.
598
+ Those messages are displayed live on hud.ai alongside resource graphs – perfect feedback while you wait.
599
599
 
600
600
  ### 4. Live telemetry (`telemetry://live`) (Optional)
601
601
 
602
- Expose a resource named `telemetry://live` exactly like in `environments/browser/src/hud_controller/server.py` to return live url to be displayed on hud.so.
602
+ Expose a resource named `telemetry://live` exactly like in `environments/browser/src/hud_controller/server.py` to return live url to be displayed on hud.ai.
603
603
 
604
604
  Once all of the above works you can unleash *hundreds* of concurrent agents on your new environment.
605
605
 
@@ -0,0 +1,58 @@
1
+ # Browser Base Image
2
+
3
+ Base Docker image for browser environments with Playwright, Chromium, and VNC support.
4
+
5
+ ## Build
6
+
7
+ ```bash
8
+ docker build -t browser-base:latest .
9
+ ```
10
+
11
+ ## Test with VNC Access
12
+
13
+ ### 1. Start the container
14
+
15
+ ```bash
16
+ docker run -it --rm \
17
+ -p 6080:6080 \
18
+ -p 5900:5900 \
19
+ -e DISPLAY=:1 \
20
+ browser-base:latest \
21
+ bash
22
+ ```
23
+
24
+ ### 2. Inside the container, start display servers
25
+
26
+ ```bash
27
+ Xvfb :1 -screen 0 1920x1080x24 > /dev/null 2>&1 &
28
+ x11vnc -display :1 -nopw -listen 0.0.0.0 -forever > /dev/null 2>&1 &
29
+ /usr/share/novnc/utils/novnc_proxy --vnc localhost:5900 --listen 6080 > /dev/null 2>&1 &
30
+ ```
31
+
32
+ ### 3. Test Playwright
33
+
34
+ ```bash
35
+ python3 -c "
36
+ from playwright.sync_api import sync_playwright
37
+ with sync_playwright() as p:
38
+ browser = p.chromium.launch(headless=False)
39
+ page = browser.new_page()
40
+ page.goto('https://example.com')
41
+ print('Title:', page.title())
42
+ input('Press Enter to close...')
43
+ browser.close()
44
+ "
45
+ ```
46
+
47
+ ### 4. View in browser
48
+
49
+ Open `http://localhost:6080/vnc.html` to see Chromium running.
50
+
51
+ ## What's Included
52
+
53
+ - Ubuntu 24.04
54
+ - Desktop environment (Xvfb, x11vnc, noVNC, xfce4)
55
+ - Node.js & npm
56
+ - Python 3 with uv package manager
57
+ - Playwright with Chromium
58
+ - Development tools (git, curl, wget, etc.)
@@ -4,7 +4,7 @@ version = "0.1.0"
4
4
  description = "HUD Browser MCP Server"
5
5
  requires-python = ">=3.11,<3.14"
6
6
  dependencies = [
7
- "hud-python>=0.4.54",
7
+ "hud-python>=0.4.58",
8
8
  "httpx",
9
9
  "playwright",
10
10
  "pyautogui",
@@ -0,0 +1,239 @@
1
+ # SEC EDGAR Rubrics Environment
2
+
3
+ SEC filing research environment powered by the SEC EDGAR database for accessing company filings and financial data, with rubric-based evaluation for structured grading provided by [The LLM Data Company](https://llmdata.com).
4
+
5
+ See [docs](https://docs.hud.so/build-environments) for the complete environment design workflow.
6
+
7
+ ## Architecture
8
+
9
+ **`environment/`** - Manages SEC EDGAR and web search integration
10
+ - Uses the edgartools Python library to access SEC filing data
11
+ - Integrates with Exa API for supplementary web search capabilities
12
+ - Exposes HTTP endpoints for research workflows with exponential backoff for rate limiting
13
+
14
+ **`server/`** - Wraps data in MCP tools
15
+ - Provides research tools for agents to access SEC filings, financial data, and web search
16
+ - Agents and tasks interact only with these tools
17
+
18
+ **Why separate?** Edit tools for the agent or tasks without restarting the environment backend.
19
+
20
+ ## Tools
21
+
22
+ ### SEC EDGAR Tools
23
+ - **`setup()`** - Initialize the environment and reset state.
24
+ - **`search_company(query: str)`** - Search for a company by ticker symbol or name. Returns company information including ticker, name, and CIK.
25
+ - **`get_filings(ticker?: str, form_type?: str, limit?: int, cutoff_date?: str)`** - Get SEC filings. When `ticker` is provided, returns company-specific filings. Otherwise, returns global recent filings. Can filter by form type (e.g., "10-K", "10-Q", "8-K"), limit results, and filter by date (YYYY-MM-DD).
26
+ - **`get_filing_content(filing_url: str)`** - Fetch the full text content of a specific SEC filing from its URL.
27
+ - **`get_financial_data(ticker: str, accession_number: str)`** - Extract financial statements and key metrics from a 10-K or 10-Q filing. Returns income statement, balance sheet, cash flow, and other financial data.
28
+ - **`get_segment_data(ticker: str, accession_number: str)`** - Extract segment-level financial data from a 10-K or 10-Q filing for companies with multiple business segments.
29
+ - **`get_filing_sections(ticker: str, accession_number: str)`** - Extract specific sections from a 10-K or 10-Q filing (e.g., Business, Risk Factors, MD&A).
30
+
31
+ ### Web Search Tools
32
+ - **`web_search(query: str)`** - Search the web using Exa API. Returns titles and URLs of relevant results.
33
+ - **`web_fetch(url: str)`** - Fetch and extract content from a web URL. Returns summary, highlights, and full content.
34
+
35
+ ### Evaluation Tools
36
+ - **`answer(final_answer: str)`** - Submit the final research answer.
37
+ - **`evaluate(rubric: list[dict])`** - Evaluate submitted answer using a structured rubric with weighted requirements.
38
+
39
+ ### Rubric-Based Evaluation
40
+
41
+ The `evaluate` tool uses The LLM Data Company's [rubric](https://github.com/The-LLM-Data-Company/rubric/) package to grade answers against structured criteria with autograders.
42
+
43
+ ## Setup
44
+
45
+ ### Environment Variables
46
+
47
+ The environment requires several API keys and configuration:
48
+
49
+ **Required:**
50
+ - `EDGAR_IDENTITY` - Your identity for SEC EDGAR access (required by SEC regulations)
51
+ - Format: `"Your Name your.email@example.com"`
52
+
53
+ **Optional:**
54
+ - `EXA_API_KEY` - For web search and content fetching capabilities (if using web_search/web_fetch tools)
55
+ - `HUD_API_KEY` - For HUD telemetry and tracing
56
+ - `ANTHROPIC_API_KEY` - For Claude agent (if using Claude)
57
+ - `OPENAI_API_KEY` - For rubric evaluation (if using OpenAI-based autograders)
58
+
59
+ Add these to your .env before running `hud eval`:
60
+ ```bash
61
+ export EDGAR_IDENTITY="Your Name your.email@example.com"
62
+ export EXA_API_KEY="your-exa-key" # optional, for web search
63
+ export ANTHROPIC_API_KEY="your-anthropic-key" # only if using an Anthropic model
64
+ export OPENAI_API_KEY="your-openai-key"
65
+ # Optional
66
+ export HUD_API_KEY="your-hud-key"
67
+ ```
68
+
69
+ ## Development
70
+
71
+ ```bash
72
+ # Terminal 1 - Environment backend
73
+ cd environment
74
+ export EDGAR_IDENTITY="Your Name your.email@example.com"
75
+ export EXA_API_KEY="your-exa-key" # optional, for web search
76
+ uv run uvicorn server:app --reload
77
+
78
+ # Terminal 2 - MCP server
79
+ cd server
80
+ uv run hud dev
81
+ ```
82
+
83
+ The environment includes exponential backoff for rate limiting, so API calls will automatically retry on 429 errors.
84
+
85
+ In general, we recommend starting work on the environment backend first, then developing the MCP server to expose the right things to the agent.
86
+
87
+ For complex environments that require many dependencies, we recommend running `hud dev` in the environment root:
88
+ ```bash
89
+ cd ..
90
+ hud dev
91
+ ```
92
+
93
+ ## Tasks & Evaluation
94
+
95
+ ```bash
96
+ # Build first in the global folder with the Dockerfile (creates rubrics:latest)
97
+ hud build
98
+ ```
99
+
100
+ Your `tasks.json` uses `docker run` to launch the environment:
101
+
102
+ ```json
103
+ {
104
+ "prompt": "Analyze Tesla's FY2024 10-K filing...",
105
+ "mcp_config": {
106
+ "local": {
107
+ "command": "docker",
108
+ "args": ["run", "--rm", "-i", "rubrics:latest"]
109
+ }
110
+ },
111
+ "evaluate_tool": {
112
+ "name": "evaluate",
113
+ "arguments": {
114
+ "rubric": [...]
115
+ }
116
+ }
117
+ }
118
+ ```
119
+
120
+ **Note:** Export environment variables before running. The Docker container will inherit them from your shell.
121
+
122
+ **Commands:**
123
+ ```bash
124
+ # Build first
125
+ hud build
126
+
127
+ # Test task locally
128
+ export EDGAR_IDENTITY="Your Name your.email@example.com"
129
+ export EXA_API_KEY="your-exa-key" # optional, for web search
130
+ export ANTHROPIC_API_KEY="your-anthropic-key"
131
+ export OPENAI_API_KEY="your-openai-key"
132
+ hud eval tasks.json --max-steps 25
133
+
134
+ # Push environment for remote running
135
+ hud push
136
+
137
+ # Production RL training
138
+ hud rl tasks.json # Auto-converts docker→remote, builds & pushes if needed
139
+ ```
140
+
141
+ ## Publishing Your Environment
142
+
143
+ Once your environment is ready, you can share it with the community:
144
+
145
+ ### 1. Push to Registry
146
+ ```bash
147
+ # Build and push your environment (requires docker hub login and hud api key)
148
+ hud build
149
+ hud push
150
+ ```
151
+
152
+ ### 2. Create a Dataset
153
+
154
+ Create a dataset on HuggingFace with your tasks:
155
+
156
+ **Option A: Upload manually**
157
+ 1. Upload your `tasks.json` to HuggingFace
158
+ 2. Make sure it's **public** to appear on leaderboards
159
+
160
+ **Option B: Use the SDK**
161
+ ```python
162
+ from hud.datasets import save_tasks
163
+ import json
164
+
165
+ # Load your tasks
166
+ with open("tasks.json") as f:
167
+ tasks = json.load(f)
168
+
169
+ # Push to HuggingFace
170
+ save_tasks(tasks, repo_id="your-org/your-dataset")
171
+ ```
172
+
173
+ ### 3. Run and Track Performance
174
+
175
+ ```bash
176
+ # Run Claude on your benchmark
177
+ hud eval "your-org/your-dataset" --agent claude
178
+
179
+ # View results at:
180
+ # hud.so/leaderboards/your-org/your-dataset
181
+ ```
182
+
183
+ **Note**: Only public HuggingFace datasets appear as leaderboards!
184
+
185
+ 📚 Learn more: [Creating Benchmarks](https://docs.hud.so/evaluate-agents/create-benchmarks) | [Leaderboards](https://docs.hud.so/evaluate-agents/leaderboards)
186
+
187
+ ## Example Research Workflow
188
+
189
+ ```python
190
+ # Initialize environment
191
+ setup()
192
+
193
+ # Agent searches for a company
194
+ company_info = search_company("TSLA")
195
+ # Returns: [{"ticker": "TSLA", "name": "Tesla Inc", "cik": "1318605"}]
196
+
197
+ # Agent gets recent filings
198
+ filings = get_filings(ticker="TSLA", form_type="10-K", limit=1)
199
+ # Returns: [{"filing_date": "2024-01-01", "form_type": "10-K", "accession_number": "...", "filing_url": "..."}]
200
+
201
+ # Agent extracts financial data
202
+ financial_data = get_financial_data(ticker="TSLA", accession_number=filings[0]["accession_number"])
203
+ # Returns: {"has_financials": True, "financial_data": {...income statement, balance sheet, etc...}}
204
+
205
+ # Agent gets specific sections from the filing
206
+ sections = get_filing_sections(ticker="TSLA", accession_number=filings[0]["accession_number"])
207
+ # Returns: {"sections": {"business": "...", "risk_factors": "...", "mda": "..."}}
208
+
209
+ # Agent uses web search for additional context
210
+ search_results = web_search("Tesla FY2024 revenue analysis")
211
+ # Returns: [{"title": "...", "url": "..."}]
212
+
213
+ # Agent fetches web content
214
+ web_content = web_fetch(search_results[0]["url"])
215
+ # Returns: "=== SUMMARY ===\n...\n=== KEY HIGHLIGHTS ===\n...\n=== FULL CONTENT ===\n..."
216
+
217
+ # Agent submits final answer
218
+ answer("Based on Tesla's FY2024 10-K, revenue was $96.8B...")
219
+
220
+ # Evaluate answer using rubric
221
+ result = evaluate(rubric=[
222
+ {"requirement": "Correctly states FY2024 revenue", "weight": 15},
223
+ {"requirement": "Provides segment breakdown", "weight": 5},
224
+ ])
225
+ # Returns: {"reward": float, "info": {"report": [...]}, "done": True}
226
+ ```
227
+
228
+ ## Dependencies
229
+
230
+ - **edgartools**: Python library for accessing SEC EDGAR data
231
+ - **fastapi**: Web framework for the environment server
232
+ - **httpx**: HTTP client for API calls
233
+ - **rubric**: LLM Data Company's rubric evaluation package
234
+ - **Exa API**: Web search and content extraction (optional, for web_search/web_fetch tools)
235
+
236
+ ## Acknowledgments
237
+
238
+ * [EdgarTools](https://github.com/dgunning/edgartools) - Python library to access SEC EDGAR
239
+ * [SEC EDGAR MCP](https://github.com/stefanoamorelli/sec-edgar-mcp) - Rich OSS SEC MCP server
@@ -1,13 +1,14 @@
1
1
  [project]
2
2
  name = "rubrics-environment"
3
3
  version = "0.1.0"
4
- description = "Backend service for Rubrics environment"
4
+ description = "Backend service for Rubrics environment with SEC EDGAR integration"
5
5
  requires-python = ">=3.11"
6
6
  dependencies = [
7
7
  "fastapi>=0.104.1",
8
8
  "uvicorn[standard]>=0.24.0",
9
9
  "httpx>=0.24.0",
10
- "rubric>=1.1.7",
10
+ "rubric==1.1.8",
11
+ "edgartools>=4.21.3",
11
12
  ]
12
13
 
13
14
  [build-system]
@@ -16,4 +16,4 @@ image = "rubrics:dev"
16
16
  allow-direct-references = true
17
17
 
18
18
  [tool.hatch.build.targets.wheel]
19
- packages = [ "controller", "environment",]
19
+ packages = [ "server", "environment",]
@@ -253,10 +253,23 @@ def debug(
253
253
  else:
254
254
  # Assume it's an image name
255
255
  image = first_param
256
- from .utils.docker import build_run_command
256
+ from .utils.docker import create_docker_run_command
257
+
258
+ # For image mode, check if there's a .env file in current directory
259
+ # and use it if available (similar to hud dev behavior)
260
+ cwd = Path.cwd()
261
+ if (cwd / ".env").exists():
262
+ # Use create_docker_run_command to load .env from current directory
263
+ command = create_docker_run_command(
264
+ image,
265
+ docker_args=docker_args,
266
+ env_dir=cwd, # Load .env from current directory
267
+ )
268
+ else:
269
+ # No .env file, use basic command without env loading
270
+ from .utils.docker import build_run_command
257
271
 
258
- # Image-only mode: do not auto-inject local .env
259
- command = build_run_command(image, docker_args)
272
+ command = build_run_command(image, docker_args)
260
273
  else:
261
274
  console.print(
262
275
  "[red]Error: Must specify a directory, Docker image, --config, or --cursor[/red]"
@@ -741,14 +754,14 @@ def remove(
741
754
 
742
755
  @app.command()
743
756
  def init(
744
- name: str = typer.Argument(None, help="Environment name (default: current directory name)"),
757
+ name: str = typer.Argument(None, help="Environment name (default: chosen preset name)"),
745
758
  preset: str | None = typer.Option(
746
759
  None,
747
760
  "--preset",
748
761
  "-p",
749
762
  help="Preset to use: blank, deep-research, browser, rubrics. If omitted, you'll choose interactively.", # noqa: E501
750
763
  ),
751
- directory: str = typer.Option(".", "--dir", "-d", help="Target directory"),
764
+ directory: str = typer.Option(".", "--dir", "-d", help="Parent directory for the environment"),
752
765
  force: bool = typer.Option(False, "--force", "-f", help="Overwrite existing files"),
753
766
  ) -> None:
754
767
  """🚀 Initialize a new HUD environment with minimal boilerplate.
@@ -760,8 +773,8 @@ def init(
760
773
  - Required setup/evaluate tools
761
774
 
762
775
  Examples:
763
- hud init # Use current directory name
764
- hud init my-env # Create in ./my-env/
776
+ hud init # Choose preset interactively, create ./preset-name/
777
+ hud init my-env # Create new directory ./my-env/
765
778
  hud init my-env --dir /tmp # Create in /tmp/my-env/
766
779
  """
767
780
  create_environment(name, directory, force, preset)
@@ -3,6 +3,7 @@
3
3
  from __future__ import annotations
4
4
 
5
5
  import asyncio
6
+ import contextlib
6
7
  import importlib
7
8
  import importlib.util
8
9
  import logging
@@ -13,6 +14,8 @@ import threading
13
14
  from pathlib import Path
14
15
  from typing import Any
15
16
 
17
+ import typer
18
+
16
19
  from hud.utils.hud_console import HUDConsole
17
20
 
18
21
  hud_console = HUDConsole()
@@ -26,6 +29,7 @@ def show_dev_server_info(
26
29
  interactive: bool,
27
30
  env_dir: Path | None = None,
28
31
  new: bool = False,
32
+ docker_mode: bool = False,
29
33
  ) -> str:
30
34
  """Show consistent server info for both Python and Docker modes.
31
35
 
@@ -54,7 +58,15 @@ def show_dev_server_info(
54
58
  if transport == "http":
55
59
  hud_console.section_title("Quick Links")
56
60
  hud_console.info(f"{hud_console.sym.ITEM} Docs: http://localhost:{port}/docs")
57
- hud_console.info(f"{hud_console.sym.ITEM} Cursor: {cursor_deeplink}")
61
+ hud_console.info(f"{hud_console.sym.ITEM} Cursor:")
62
+ # Display the Cursor link on its own line to prevent wrapping
63
+ hud_console.link(cursor_deeplink)
64
+
65
+ # Show eval endpoint if in Docker mode
66
+ if docker_mode:
67
+ hud_console.info(
68
+ f"{hud_console.sym.ITEM} Eval API: http://localhost:{port}/eval (POST)"
69
+ )
58
70
 
59
71
  # Check for VNC (browser environment)
60
72
  if env_dir and (env_dir / "environment" / "server.py").exists():
@@ -237,7 +249,7 @@ async def run_mcp_module(
237
249
 
238
250
  from hud.cli.flows.dev import create_dynamic_trace
239
251
 
240
- live_trace_url = await create_dynamic_trace(
252
+ _, live_trace_url = await create_dynamic_trace(
241
253
  mcp_config=local_mcp_config,
242
254
  build_status=False,
243
255
  environment_name=mcp_server.name or "mcp-server",
@@ -510,6 +522,9 @@ def run_docker_dev_server(
510
522
  new: bool = False,
511
523
  ) -> None:
512
524
  """Run MCP server in Docker with volume mounts, expose via local HTTP proxy."""
525
+ import atexit
526
+ import signal
527
+
513
528
  import typer
514
529
  import yaml
515
530
 
@@ -522,6 +537,69 @@ def run_docker_dev_server(
522
537
 
523
538
  cwd = Path.cwd()
524
539
 
540
+ # Container name will be set later and used for cleanup
541
+ container_name: str | None = None
542
+ cleanup_done = False
543
+
544
+ def cleanup_container() -> None:
545
+ """Clean up Docker container on exit."""
546
+ nonlocal cleanup_done
547
+ if cleanup_done or not container_name:
548
+ return
549
+
550
+ cleanup_done = True
551
+ hud_console.debug(f"Cleaning up container: {container_name}")
552
+
553
+ # Check if container is still running
554
+ try:
555
+ result = subprocess.run( # noqa: S603
556
+ ["docker", "ps", "-q", "-f", f"name={container_name}"], # noqa: S607
557
+ stdout=subprocess.PIPE,
558
+ stderr=subprocess.DEVNULL,
559
+ text=True,
560
+ timeout=5,
561
+ )
562
+ if not result.stdout.strip():
563
+ # Container is not running, just try to remove it
564
+ subprocess.run( # noqa: S603
565
+ ["docker", "rm", "-f", container_name], # noqa: S607
566
+ stdout=subprocess.DEVNULL,
567
+ stderr=subprocess.DEVNULL,
568
+ timeout=5,
569
+ )
570
+ return
571
+ except Exception: # noqa: S110
572
+ pass
573
+
574
+ try:
575
+ # First try to stop gracefully
576
+ subprocess.run( # noqa: S603
577
+ ["docker", "stop", container_name], # noqa: S607
578
+ stdout=subprocess.DEVNULL,
579
+ stderr=subprocess.DEVNULL,
580
+ timeout=10,
581
+ )
582
+ hud_console.debug(f"Container {container_name} stopped successfully")
583
+ except subprocess.TimeoutExpired:
584
+ # Force kill if stop times out
585
+ hud_console.debug(f"Container {container_name} stop timeout, forcing kill")
586
+ with contextlib.suppress(Exception):
587
+ subprocess.run( # noqa: S603
588
+ ["docker", "kill", container_name], # noqa: S607
589
+ stdout=subprocess.DEVNULL,
590
+ stderr=subprocess.DEVNULL,
591
+ timeout=5,
592
+ )
593
+
594
+ # Set up signal handlers for cleanup
595
+ def signal_handler(signum: int, frame: Any) -> None:
596
+ cleanup_container()
597
+ sys.exit(0)
598
+
599
+ signal.signal(signal.SIGTERM, signal_handler)
600
+ if sys.platform != "win32":
601
+ signal.signal(signal.SIGHUP, signal_handler)
602
+
525
603
  # Find environment directory (current or parent with hud.lock.yaml)
526
604
  env_dir = cwd
527
605
  lock_path = env_dir / "hud.lock.yaml"
@@ -562,10 +640,14 @@ def run_docker_dev_server(
562
640
  base_name = image_name.replace(":", "-").replace("/", "-")
563
641
  container_name = f"{base_name}-dev-{pid}"
564
642
 
643
+ # Register cleanup function with atexit
644
+ atexit.register(cleanup_container)
645
+
565
646
  # Build docker run command with volume mounts and folder-mode envs
566
647
  from .utils.docker import create_docker_run_command
567
648
 
568
649
  base_args = [
650
+ "--rm", # Automatically remove container when it stops
569
651
  "--name",
570
652
  container_name,
571
653
  "-v",
@@ -608,7 +690,7 @@ def run_docker_dev_server(
608
690
  "headers": {},
609
691
  }
610
692
  }
611
- live_trace_url = _asy.run(
693
+ _, live_trace_url = _asy.run(
612
694
  create_dynamic_trace(
613
695
  mcp_config=local_mcp_config,
614
696
  build_status=True,
@@ -643,6 +725,7 @@ def run_docker_dev_server(
643
725
  interactive=interactive,
644
726
  env_dir=env_dir,
645
727
  new=new,
728
+ docker_mode=True,
646
729
  )
647
730
  hud_console.dim_info(
648
731
  "",
@@ -661,13 +744,38 @@ def run_docker_dev_server(
661
744
  # Create and run proxy with HUD helpers
662
745
  async def run_proxy() -> None:
663
746
  from fastmcp import FastMCP
747
+ from fastmcp.server.proxy import ProxyClient
748
+
749
+ # Create ProxyClient without custom log handler since we capture Docker logs directly
750
+ proxy_client = ProxyClient(mcp_config, name="HUD Docker Dev Proxy")
751
+
752
+ # Extract container name from docker args and store for logs endpoint
753
+ docker_cmd = mcp_config["docker"]["args"]
754
+ container_name = None
755
+ for i, arg in enumerate(docker_cmd):
756
+ if arg == "--name" and i + 1 < len(docker_cmd):
757
+ container_name = docker_cmd[i + 1]
758
+ break
664
759
 
665
- # Create FastMCP proxy to Docker stdio
666
- fastmcp_proxy = FastMCP.as_proxy(mcp_config, name="HUD Docker Dev Proxy")
760
+ if container_name:
761
+ # Store container name for logs endpoint to use
762
+ os.environ["_HUD_DEV_DOCKER_CONTAINER"] = container_name
763
+ hud_console.debug(f"Docker container: {container_name}")
764
+
765
+ # Store the docker mcp_config for the eval endpoint
766
+ import json
767
+
768
+ os.environ["_HUD_DEV_DOCKER_MCP_CONFIG"] = json.dumps(mcp_config)
769
+
770
+ # Create FastMCP proxy using the ProxyClient
771
+ fastmcp_proxy = FastMCP.as_proxy(proxy_client)
667
772
 
668
773
  # Wrap in MCPServer to get /docs and REST wrappers
669
774
  proxy = MCPServer(name="HUD Docker Dev Proxy")
670
775
 
776
+ # Enable logs endpoint on HTTP server
777
+ os.environ["_HUD_DEV_LOGS_PROVIDER"] = "enabled"
778
+
671
779
  # Import all tools from the FastMCP proxy
672
780
  await proxy.import_server(fastmcp_proxy)
673
781
 
@@ -693,7 +801,15 @@ def run_docker_dev_server(
693
801
  asyncio.run(run_proxy())
694
802
  except KeyboardInterrupt:
695
803
  hud_console.info("\n\nStopping...")
804
+ cleanup_container()
696
805
  raise typer.Exit(0) from None
806
+ except Exception:
807
+ # Ensure cleanup happens on any exception
808
+ cleanup_container()
809
+ raise
810
+ finally:
811
+ # Final cleanup attempt
812
+ cleanup_container()
697
813
 
698
814
 
699
815
  def run_mcp_dev_server(
@@ -712,6 +828,20 @@ def run_mcp_dev_server(
712
828
  docker_args = docker_args or []
713
829
  cwd = Path.cwd()
714
830
 
831
+ # Find an available port if not using stdio transport
832
+ if not stdio:
833
+ from hud.cli.utils.logging import find_free_port
834
+
835
+ actual_port = find_free_port(port)
836
+ if actual_port is None:
837
+ hud_console.error(f"No available ports found starting from {port}")
838
+ raise typer.Exit(1)
839
+
840
+ if actual_port != port:
841
+ hud_console.info(f"Port {port} is in use, using port {actual_port} instead")
842
+
843
+ port = actual_port
844
+
715
845
  # Auto-detect Docker mode if Dockerfile present and no module specified
716
846
  if not docker and module is None and should_use_docker_mode(cwd):
717
847
  hud_console.note("Detected Dockerfile - using Docker mode with volume mounts")