hud-python 0.4.57__tar.gz → 0.4.59__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of hud-python might be problematic. Click here for more details.

Files changed (309) hide show
  1. {hud_python-0.4.57 → hud_python-0.4.59}/PKG-INFO +2 -1
  2. {hud_python-0.4.57 → hud_python-0.4.59}/environments/README.md +5 -5
  3. hud_python-0.4.59/environments/browser/browser-base/README.md +58 -0
  4. {hud_python-0.4.57 → hud_python-0.4.59}/environments/browser/server/pyproject.toml +1 -1
  5. hud_python-0.4.59/environments/rubrics/README.md +239 -0
  6. {hud_python-0.4.57 → hud_python-0.4.59}/environments/rubrics/environment/pyproject.toml +3 -2
  7. {hud_python-0.4.57 → hud_python-0.4.59}/environments/rubrics/pyproject.toml +1 -1
  8. {hud_python-0.4.57 → hud_python-0.4.59}/hud/agents/__init__.py +2 -0
  9. hud_python-0.4.59/hud/agents/gemini.py +492 -0
  10. hud_python-0.4.59/hud/agents/tests/test_gemini.py +372 -0
  11. {hud_python-0.4.57 → hud_python-0.4.59}/hud/cli/__init__.py +46 -31
  12. {hud_python-0.4.57 → hud_python-0.4.59}/hud/cli/dev.py +111 -1
  13. {hud_python-0.4.57 → hud_python-0.4.59}/hud/cli/eval.py +59 -3
  14. {hud_python-0.4.57 → hud_python-0.4.59}/hud/cli/flows/dev.py +5 -3
  15. {hud_python-0.4.57 → hud_python-0.4.59}/hud/cli/init.py +14 -18
  16. {hud_python-0.4.57 → hud_python-0.4.59}/hud/cli/push.py +2 -2
  17. {hud_python-0.4.57 → hud_python-0.4.59}/hud/cli/rl/__init__.py +1 -1
  18. {hud_python-0.4.57 → hud_python-0.4.59}/hud/cli/rl/celebrate.py +1 -1
  19. {hud_python-0.4.57 → hud_python-0.4.59}/hud/cli/rl/remote_runner.py +3 -3
  20. {hud_python-0.4.57 → hud_python-0.4.59}/hud/cli/tests/test_eval.py +20 -0
  21. {hud_python-0.4.57 → hud_python-0.4.59}/hud/clients/base.py +1 -1
  22. {hud_python-0.4.57 → hud_python-0.4.59}/hud/clients/fastmcp.py +1 -1
  23. {hud_python-0.4.57 → hud_python-0.4.59}/hud/otel/config.py +1 -1
  24. {hud_python-0.4.57 → hud_python-0.4.59}/hud/otel/context.py +2 -2
  25. {hud_python-0.4.57 → hud_python-0.4.59}/hud/server/server.py +283 -36
  26. {hud_python-0.4.57 → hud_python-0.4.59}/hud/settings.py +6 -0
  27. {hud_python-0.4.57 → hud_python-0.4.59}/hud/shared/hints.py +3 -3
  28. {hud_python-0.4.57 → hud_python-0.4.59}/hud/telemetry/job.py +2 -2
  29. {hud_python-0.4.57 → hud_python-0.4.59}/hud/tools/__init__.py +13 -2
  30. {hud_python-0.4.57 → hud_python-0.4.59}/hud/tools/computer/__init__.py +2 -0
  31. hud_python-0.4.59/hud/tools/computer/gemini.py +385 -0
  32. {hud_python-0.4.57 → hud_python-0.4.59}/hud/tools/computer/settings.py +21 -0
  33. {hud_python-0.4.57 → hud_python-0.4.59}/hud/tools/playwright.py +17 -2
  34. {hud_python-0.4.57 → hud_python-0.4.59}/hud/tools/types.py +9 -1
  35. {hud_python-0.4.57 → hud_python-0.4.59}/hud/types.py +2 -1
  36. {hud_python-0.4.57 → hud_python-0.4.59}/hud/utils/tests/test_version.py +1 -1
  37. {hud_python-0.4.57 → hud_python-0.4.59}/hud/version.py +1 -1
  38. {hud_python-0.4.57 → hud_python-0.4.59}/pyproject.toml +2 -1
  39. hud_python-0.4.57/environments/rubrics/README.md +0 -182
  40. {hud_python-0.4.57 → hud_python-0.4.59}/.gitignore +0 -0
  41. {hud_python-0.4.57 → hud_python-0.4.59}/LICENSE +0 -0
  42. {hud_python-0.4.57 → hud_python-0.4.59}/README.md +0 -0
  43. {hud_python-0.4.57 → hud_python-0.4.59}/environments/blank/README.md +0 -0
  44. {hud_python-0.4.57 → hud_python-0.4.59}/environments/blank/environment/README.md +0 -0
  45. {hud_python-0.4.57 → hud_python-0.4.59}/environments/blank/environment/pyproject.toml +0 -0
  46. {hud_python-0.4.57 → hud_python-0.4.59}/environments/blank/server/README.md +0 -0
  47. {hud_python-0.4.57 → hud_python-0.4.59}/environments/blank/server/pyproject.toml +0 -0
  48. {hud_python-0.4.57 → hud_python-0.4.59}/environments/browser/README.md +0 -0
  49. {hud_python-0.4.57 → hud_python-0.4.59}/environments/browser/environment/2048/README.md +0 -0
  50. {hud_python-0.4.57 → hud_python-0.4.59}/environments/browser/environment/2048/backend/pyproject.toml +0 -0
  51. {hud_python-0.4.57 → hud_python-0.4.59}/environments/browser/environment/README.md +0 -0
  52. {hud_python-0.4.57 → hud_python-0.4.59}/environments/browser/environment/pyproject.toml +0 -0
  53. {hud_python-0.4.57 → hud_python-0.4.59}/environments/browser/environment/todo/README.md +0 -0
  54. {hud_python-0.4.57 → hud_python-0.4.59}/environments/browser/environment/todo/backend/pyproject.toml +0 -0
  55. {hud_python-0.4.57 → hud_python-0.4.59}/environments/browser/pyproject.toml +0 -0
  56. {hud_python-0.4.57 → hud_python-0.4.59}/environments/deepresearch/README.md +0 -0
  57. {hud_python-0.4.57 → hud_python-0.4.59}/environments/deepresearch/environment/pyproject.toml +0 -0
  58. {hud_python-0.4.57 → hud_python-0.4.59}/environments/deepresearch/pyproject.toml +0 -0
  59. {hud_python-0.4.57 → hud_python-0.4.59}/environments/deepresearch/server/pyproject.toml +0 -0
  60. {hud_python-0.4.57 → hud_python-0.4.59}/environments/remote_browser/README.md +0 -0
  61. {hud_python-0.4.57 → hud_python-0.4.59}/environments/remote_browser/pyproject.toml +0 -0
  62. {hud_python-0.4.57 → hud_python-0.4.59}/environments/remote_browser/src/hud_controller/providers/README.md +0 -0
  63. {hud_python-0.4.57 → hud_python-0.4.59}/environments/rubrics/server/pyproject.toml +0 -0
  64. {hud_python-0.4.57 → hud_python-0.4.59}/environments/text_2048/README.md +0 -0
  65. {hud_python-0.4.57 → hud_python-0.4.59}/environments/text_2048/pyproject.toml +0 -0
  66. {hud_python-0.4.57 → hud_python-0.4.59}/examples/README.md +0 -0
  67. {hud_python-0.4.57 → hud_python-0.4.59}/hud/__init__.py +0 -0
  68. {hud_python-0.4.57 → hud_python-0.4.59}/hud/__main__.py +0 -0
  69. {hud_python-0.4.57 → hud_python-0.4.59}/hud/agents/base.py +0 -0
  70. {hud_python-0.4.57 → hud_python-0.4.59}/hud/agents/claude.py +0 -0
  71. {hud_python-0.4.57 → hud_python-0.4.59}/hud/agents/grounded_openai.py +0 -0
  72. {hud_python-0.4.57 → hud_python-0.4.59}/hud/agents/langchain.py +0 -0
  73. {hud_python-0.4.57 → hud_python-0.4.59}/hud/agents/lite_llm.py +0 -0
  74. {hud_python-0.4.57 → hud_python-0.4.59}/hud/agents/misc/__init__.py +0 -0
  75. {hud_python-0.4.57 → hud_python-0.4.59}/hud/agents/misc/integration_test_agent.py +0 -0
  76. {hud_python-0.4.57 → hud_python-0.4.59}/hud/agents/misc/response_agent.py +0 -0
  77. {hud_python-0.4.57 → hud_python-0.4.59}/hud/agents/openai.py +0 -0
  78. {hud_python-0.4.57 → hud_python-0.4.59}/hud/agents/openai_chat_generic.py +0 -0
  79. {hud_python-0.4.57 → hud_python-0.4.59}/hud/agents/tests/__init__.py +0 -0
  80. {hud_python-0.4.57 → hud_python-0.4.59}/hud/agents/tests/test_base.py +0 -0
  81. {hud_python-0.4.57 → hud_python-0.4.59}/hud/agents/tests/test_base_runtime.py +0 -0
  82. {hud_python-0.4.57 → hud_python-0.4.59}/hud/agents/tests/test_claude.py +0 -0
  83. {hud_python-0.4.57 → hud_python-0.4.59}/hud/agents/tests/test_client.py +0 -0
  84. {hud_python-0.4.57 → hud_python-0.4.59}/hud/agents/tests/test_grounded_openai_agent.py +0 -0
  85. {hud_python-0.4.57 → hud_python-0.4.59}/hud/agents/tests/test_openai.py +0 -0
  86. {hud_python-0.4.57 → hud_python-0.4.59}/hud/agents/utils.py +0 -0
  87. {hud_python-0.4.57 → hud_python-0.4.59}/hud/cli/__main__.py +0 -0
  88. {hud_python-0.4.57 → hud_python-0.4.59}/hud/cli/analyze.py +0 -0
  89. {hud_python-0.4.57 → hud_python-0.4.59}/hud/cli/build.py +0 -0
  90. {hud_python-0.4.57 → hud_python-0.4.59}/hud/cli/clone.py +0 -0
  91. {hud_python-0.4.57 → hud_python-0.4.59}/hud/cli/debug.py +0 -0
  92. {hud_python-0.4.57 → hud_python-0.4.59}/hud/cli/flows/__init__.py +0 -0
  93. {hud_python-0.4.57 → hud_python-0.4.59}/hud/cli/flows/tasks.py +0 -0
  94. {hud_python-0.4.57 → hud_python-0.4.59}/hud/cli/get.py +0 -0
  95. {hud_python-0.4.57 → hud_python-0.4.59}/hud/cli/list_func.py +0 -0
  96. {hud_python-0.4.57 → hud_python-0.4.59}/hud/cli/pull.py +0 -0
  97. {hud_python-0.4.57 → hud_python-0.4.59}/hud/cli/remove.py +0 -0
  98. {hud_python-0.4.57 → hud_python-0.4.59}/hud/cli/rl/config.py +0 -0
  99. {hud_python-0.4.57 → hud_python-0.4.59}/hud/cli/rl/display.py +0 -0
  100. {hud_python-0.4.57 → hud_python-0.4.59}/hud/cli/rl/gpu.py +0 -0
  101. {hud_python-0.4.57 → hud_python-0.4.59}/hud/cli/rl/gpu_utils.py +0 -0
  102. {hud_python-0.4.57 → hud_python-0.4.59}/hud/cli/rl/local_runner.py +0 -0
  103. {hud_python-0.4.57 → hud_python-0.4.59}/hud/cli/rl/presets.py +0 -0
  104. {hud_python-0.4.57 → hud_python-0.4.59}/hud/cli/rl/rl_api.py +0 -0
  105. {hud_python-0.4.57 → hud_python-0.4.59}/hud/cli/rl/viewer.py +0 -0
  106. {hud_python-0.4.57 → hud_python-0.4.59}/hud/cli/rl/vllm.py +0 -0
  107. {hud_python-0.4.57 → hud_python-0.4.59}/hud/cli/rl/wait_utils.py +0 -0
  108. {hud_python-0.4.57 → hud_python-0.4.59}/hud/cli/tests/__init__.py +0 -0
  109. {hud_python-0.4.57 → hud_python-0.4.59}/hud/cli/tests/test_analyze.py +0 -0
  110. {hud_python-0.4.57 → hud_python-0.4.59}/hud/cli/tests/test_analyze_metadata.py +0 -0
  111. {hud_python-0.4.57 → hud_python-0.4.59}/hud/cli/tests/test_analyze_module.py +0 -0
  112. {hud_python-0.4.57 → hud_python-0.4.59}/hud/cli/tests/test_build.py +0 -0
  113. {hud_python-0.4.57 → hud_python-0.4.59}/hud/cli/tests/test_build_failure.py +0 -0
  114. {hud_python-0.4.57 → hud_python-0.4.59}/hud/cli/tests/test_build_module.py +0 -0
  115. {hud_python-0.4.57 → hud_python-0.4.59}/hud/cli/tests/test_cli_init.py +0 -0
  116. {hud_python-0.4.57 → hud_python-0.4.59}/hud/cli/tests/test_cli_main.py +0 -0
  117. {hud_python-0.4.57 → hud_python-0.4.59}/hud/cli/tests/test_cli_more_wrappers.py +0 -0
  118. {hud_python-0.4.57 → hud_python-0.4.59}/hud/cli/tests/test_cli_root.py +0 -0
  119. {hud_python-0.4.57 → hud_python-0.4.59}/hud/cli/tests/test_clone.py +0 -0
  120. {hud_python-0.4.57 → hud_python-0.4.59}/hud/cli/tests/test_convert.py +0 -0
  121. {hud_python-0.4.57 → hud_python-0.4.59}/hud/cli/tests/test_cursor.py +0 -0
  122. {hud_python-0.4.57 → hud_python-0.4.59}/hud/cli/tests/test_debug.py +0 -0
  123. {hud_python-0.4.57 → hud_python-0.4.59}/hud/cli/tests/test_list_func.py +0 -0
  124. {hud_python-0.4.57 → hud_python-0.4.59}/hud/cli/tests/test_main_module.py +0 -0
  125. {hud_python-0.4.57 → hud_python-0.4.59}/hud/cli/tests/test_mcp_server.py +0 -0
  126. {hud_python-0.4.57 → hud_python-0.4.59}/hud/cli/tests/test_pull.py +0 -0
  127. {hud_python-0.4.57 → hud_python-0.4.59}/hud/cli/tests/test_push.py +0 -0
  128. {hud_python-0.4.57 → hud_python-0.4.59}/hud/cli/tests/test_push_happy.py +0 -0
  129. {hud_python-0.4.57 → hud_python-0.4.59}/hud/cli/tests/test_push_wrapper.py +0 -0
  130. {hud_python-0.4.57 → hud_python-0.4.59}/hud/cli/tests/test_registry.py +0 -0
  131. {hud_python-0.4.57 → hud_python-0.4.59}/hud/cli/tests/test_utils.py +0 -0
  132. {hud_python-0.4.57 → hud_python-0.4.59}/hud/cli/utils/__init__.py +0 -0
  133. {hud_python-0.4.57 → hud_python-0.4.59}/hud/cli/utils/config.py +0 -0
  134. {hud_python-0.4.57 → hud_python-0.4.59}/hud/cli/utils/cursor.py +0 -0
  135. {hud_python-0.4.57 → hud_python-0.4.59}/hud/cli/utils/docker.py +0 -0
  136. {hud_python-0.4.57 → hud_python-0.4.59}/hud/cli/utils/env_check.py +0 -0
  137. {hud_python-0.4.57 → hud_python-0.4.59}/hud/cli/utils/environment.py +0 -0
  138. {hud_python-0.4.57 → hud_python-0.4.59}/hud/cli/utils/interactive.py +0 -0
  139. {hud_python-0.4.57 → hud_python-0.4.59}/hud/cli/utils/local_runner.py +0 -0
  140. {hud_python-0.4.57 → hud_python-0.4.59}/hud/cli/utils/logging.py +0 -0
  141. {hud_python-0.4.57 → hud_python-0.4.59}/hud/cli/utils/metadata.py +0 -0
  142. {hud_python-0.4.57 → hud_python-0.4.59}/hud/cli/utils/package_runner.py +0 -0
  143. {hud_python-0.4.57 → hud_python-0.4.59}/hud/cli/utils/registry.py +0 -0
  144. {hud_python-0.4.57 → hud_python-0.4.59}/hud/cli/utils/remote_runner.py +0 -0
  145. {hud_python-0.4.57 → hud_python-0.4.59}/hud/cli/utils/runner.py +0 -0
  146. {hud_python-0.4.57 → hud_python-0.4.59}/hud/cli/utils/server.py +0 -0
  147. {hud_python-0.4.57 → hud_python-0.4.59}/hud/cli/utils/source_hash.py +0 -0
  148. {hud_python-0.4.57 → hud_python-0.4.59}/hud/cli/utils/tasks.py +0 -0
  149. {hud_python-0.4.57 → hud_python-0.4.59}/hud/cli/utils/tests/__init__.py +0 -0
  150. {hud_python-0.4.57 → hud_python-0.4.59}/hud/cli/utils/tests/test_config.py +0 -0
  151. {hud_python-0.4.57 → hud_python-0.4.59}/hud/cli/utils/tests/test_docker.py +0 -0
  152. {hud_python-0.4.57 → hud_python-0.4.59}/hud/cli/utils/tests/test_docker_hints.py +0 -0
  153. {hud_python-0.4.57 → hud_python-0.4.59}/hud/cli/utils/tests/test_env_check.py +0 -0
  154. {hud_python-0.4.57 → hud_python-0.4.59}/hud/cli/utils/tests/test_environment.py +0 -0
  155. {hud_python-0.4.57 → hud_python-0.4.59}/hud/cli/utils/tests/test_interactive_module.py +0 -0
  156. {hud_python-0.4.57 → hud_python-0.4.59}/hud/cli/utils/tests/test_local_runner.py +0 -0
  157. {hud_python-0.4.57 → hud_python-0.4.59}/hud/cli/utils/tests/test_logging_utils.py +0 -0
  158. {hud_python-0.4.57 → hud_python-0.4.59}/hud/cli/utils/tests/test_metadata.py +0 -0
  159. {hud_python-0.4.57 → hud_python-0.4.59}/hud/cli/utils/tests/test_package_runner.py +0 -0
  160. {hud_python-0.4.57 → hud_python-0.4.59}/hud/cli/utils/tests/test_registry_utils.py +0 -0
  161. {hud_python-0.4.57 → hud_python-0.4.59}/hud/cli/utils/tests/test_remote_runner.py +0 -0
  162. {hud_python-0.4.57 → hud_python-0.4.59}/hud/cli/utils/tests/test_runner_modules.py +0 -0
  163. {hud_python-0.4.57 → hud_python-0.4.59}/hud/cli/utils/tests/test_source_hash.py +0 -0
  164. {hud_python-0.4.57 → hud_python-0.4.59}/hud/cli/utils/tests/test_tasks.py +0 -0
  165. {hud_python-0.4.57 → hud_python-0.4.59}/hud/cli/utils/version_check.py +0 -0
  166. {hud_python-0.4.57 → hud_python-0.4.59}/hud/clients/README.md +0 -0
  167. {hud_python-0.4.57 → hud_python-0.4.59}/hud/clients/__init__.py +0 -0
  168. {hud_python-0.4.57 → hud_python-0.4.59}/hud/clients/mcp_use.py +0 -0
  169. {hud_python-0.4.57 → hud_python-0.4.59}/hud/clients/tests/__init__.py +0 -0
  170. {hud_python-0.4.57 → hud_python-0.4.59}/hud/clients/tests/test_client_integration.py +0 -0
  171. {hud_python-0.4.57 → hud_python-0.4.59}/hud/clients/tests/test_fastmcp.py +0 -0
  172. {hud_python-0.4.57 → hud_python-0.4.59}/hud/clients/tests/test_mcp_use_retry.py +0 -0
  173. {hud_python-0.4.57 → hud_python-0.4.59}/hud/clients/tests/test_protocol.py +0 -0
  174. {hud_python-0.4.57 → hud_python-0.4.59}/hud/clients/utils/__init__.py +0 -0
  175. {hud_python-0.4.57 → hud_python-0.4.59}/hud/clients/utils/mcp_use_retry.py +0 -0
  176. {hud_python-0.4.57 → hud_python-0.4.59}/hud/clients/utils/retry.py +0 -0
  177. {hud_python-0.4.57 → hud_python-0.4.59}/hud/clients/utils/retry_transport.py +0 -0
  178. {hud_python-0.4.57 → hud_python-0.4.59}/hud/datasets/__init__.py +0 -0
  179. {hud_python-0.4.57 → hud_python-0.4.59}/hud/datasets/parallel.py +0 -0
  180. {hud_python-0.4.57 → hud_python-0.4.59}/hud/datasets/runner.py +0 -0
  181. {hud_python-0.4.57 → hud_python-0.4.59}/hud/datasets/tests/__init__.py +0 -0
  182. {hud_python-0.4.57 → hud_python-0.4.59}/hud/datasets/tests/test_runner.py +0 -0
  183. {hud_python-0.4.57 → hud_python-0.4.59}/hud/datasets/tests/test_utils.py +0 -0
  184. {hud_python-0.4.57 → hud_python-0.4.59}/hud/datasets/utils.py +0 -0
  185. {hud_python-0.4.57 → hud_python-0.4.59}/hud/misc/__init__.py +0 -0
  186. {hud_python-0.4.57 → hud_python-0.4.59}/hud/misc/claude_plays_pokemon.py +0 -0
  187. {hud_python-0.4.57 → hud_python-0.4.59}/hud/native/__init__.py +0 -0
  188. {hud_python-0.4.57 → hud_python-0.4.59}/hud/native/comparator.py +0 -0
  189. {hud_python-0.4.57 → hud_python-0.4.59}/hud/native/tests/__init__.py +0 -0
  190. {hud_python-0.4.57 → hud_python-0.4.59}/hud/native/tests/test_comparator.py +0 -0
  191. {hud_python-0.4.57 → hud_python-0.4.59}/hud/native/tests/test_native_init.py +0 -0
  192. {hud_python-0.4.57 → hud_python-0.4.59}/hud/otel/__init__.py +0 -0
  193. {hud_python-0.4.57 → hud_python-0.4.59}/hud/otel/collector.py +0 -0
  194. {hud_python-0.4.57 → hud_python-0.4.59}/hud/otel/exporters.py +0 -0
  195. {hud_python-0.4.57 → hud_python-0.4.59}/hud/otel/instrumentation.py +0 -0
  196. {hud_python-0.4.57 → hud_python-0.4.59}/hud/otel/processors.py +0 -0
  197. {hud_python-0.4.57 → hud_python-0.4.59}/hud/otel/tests/__init__.py +0 -0
  198. {hud_python-0.4.57 → hud_python-0.4.59}/hud/otel/tests/test_instrumentation.py +0 -0
  199. {hud_python-0.4.57 → hud_python-0.4.59}/hud/otel/tests/test_processors.py +0 -0
  200. {hud_python-0.4.57 → hud_python-0.4.59}/hud/py.typed +0 -0
  201. {hud_python-0.4.57 → hud_python-0.4.59}/hud/rl/README.md +0 -0
  202. {hud_python-0.4.57 → hud_python-0.4.59}/hud/rl/__init__.py +0 -0
  203. {hud_python-0.4.57 → hud_python-0.4.59}/hud/rl/actor.py +0 -0
  204. {hud_python-0.4.57 → hud_python-0.4.59}/hud/rl/buffer.py +0 -0
  205. {hud_python-0.4.57 → hud_python-0.4.59}/hud/rl/chat_template.jinja +0 -0
  206. {hud_python-0.4.57 → hud_python-0.4.59}/hud/rl/config.py +0 -0
  207. {hud_python-0.4.57 → hud_python-0.4.59}/hud/rl/distributed.py +0 -0
  208. {hud_python-0.4.57 → hud_python-0.4.59}/hud/rl/learner.py +0 -0
  209. {hud_python-0.4.57 → hud_python-0.4.59}/hud/rl/tests/__init__.py +0 -0
  210. {hud_python-0.4.57 → hud_python-0.4.59}/hud/rl/tests/test_learner.py +0 -0
  211. {hud_python-0.4.57 → hud_python-0.4.59}/hud/rl/train.py +0 -0
  212. {hud_python-0.4.57 → hud_python-0.4.59}/hud/rl/types.py +0 -0
  213. {hud_python-0.4.57 → hud_python-0.4.59}/hud/rl/utils/start_vllm_server.sh +0 -0
  214. {hud_python-0.4.57 → hud_python-0.4.59}/hud/rl/utils.py +0 -0
  215. {hud_python-0.4.57 → hud_python-0.4.59}/hud/rl/vllm_adapter.py +0 -0
  216. {hud_python-0.4.57 → hud_python-0.4.59}/hud/samples/__init__.py +0 -0
  217. {hud_python-0.4.57 → hud_python-0.4.59}/hud/samples/browser.py +0 -0
  218. {hud_python-0.4.57 → hud_python-0.4.59}/hud/server/__init__.py +0 -0
  219. {hud_python-0.4.57 → hud_python-0.4.59}/hud/server/context.py +0 -0
  220. {hud_python-0.4.57 → hud_python-0.4.59}/hud/server/helper/__init__.py +0 -0
  221. {hud_python-0.4.57 → hud_python-0.4.59}/hud/server/low_level.py +0 -0
  222. {hud_python-0.4.57 → hud_python-0.4.59}/hud/server/router.py +0 -0
  223. {hud_python-0.4.57 → hud_python-0.4.59}/hud/server/tests/__init__.py +0 -0
  224. {hud_python-0.4.57 → hud_python-0.4.59}/hud/server/tests/test_add_tool.py +0 -0
  225. {hud_python-0.4.57 → hud_python-0.4.59}/hud/server/tests/test_context.py +0 -0
  226. {hud_python-0.4.57 → hud_python-0.4.59}/hud/server/tests/test_mcp_server_handlers.py +0 -0
  227. {hud_python-0.4.57 → hud_python-0.4.59}/hud/server/tests/test_mcp_server_integration.py +0 -0
  228. {hud_python-0.4.57 → hud_python-0.4.59}/hud/server/tests/test_mcp_server_more.py +0 -0
  229. {hud_python-0.4.57 → hud_python-0.4.59}/hud/server/tests/test_run_wrapper.py +0 -0
  230. {hud_python-0.4.57 → hud_python-0.4.59}/hud/server/tests/test_server_extra.py +0 -0
  231. {hud_python-0.4.57 → hud_python-0.4.59}/hud/server/tests/test_sigterm_runner.py +0 -0
  232. {hud_python-0.4.57 → hud_python-0.4.59}/hud/shared/__init__.py +0 -0
  233. {hud_python-0.4.57 → hud_python-0.4.59}/hud/shared/exceptions.py +0 -0
  234. {hud_python-0.4.57 → hud_python-0.4.59}/hud/shared/requests.py +0 -0
  235. {hud_python-0.4.57 → hud_python-0.4.59}/hud/shared/tests/__init__.py +0 -0
  236. {hud_python-0.4.57 → hud_python-0.4.59}/hud/shared/tests/test_exceptions.py +0 -0
  237. {hud_python-0.4.57 → hud_python-0.4.59}/hud/shared/tests/test_hints.py +0 -0
  238. {hud_python-0.4.57 → hud_python-0.4.59}/hud/shared/tests/test_requests.py +0 -0
  239. {hud_python-0.4.57 → hud_python-0.4.59}/hud/telemetry/__init__.py +0 -0
  240. {hud_python-0.4.57 → hud_python-0.4.59}/hud/telemetry/async_context.py +0 -0
  241. {hud_python-0.4.57 → hud_python-0.4.59}/hud/telemetry/instrument.py +0 -0
  242. {hud_python-0.4.57 → hud_python-0.4.59}/hud/telemetry/replay.py +0 -0
  243. {hud_python-0.4.57 → hud_python-0.4.59}/hud/telemetry/tests/__init__.py +0 -0
  244. {hud_python-0.4.57 → hud_python-0.4.59}/hud/telemetry/tests/test_async_context.py +0 -0
  245. {hud_python-0.4.57 → hud_python-0.4.59}/hud/telemetry/tests/test_instrument.py +0 -0
  246. {hud_python-0.4.57 → hud_python-0.4.59}/hud/telemetry/tests/test_job.py +0 -0
  247. {hud_python-0.4.57 → hud_python-0.4.59}/hud/telemetry/tests/test_replay.py +0 -0
  248. {hud_python-0.4.57 → hud_python-0.4.59}/hud/telemetry/tests/test_trace.py +0 -0
  249. {hud_python-0.4.57 → hud_python-0.4.59}/hud/telemetry/trace.py +0 -0
  250. {hud_python-0.4.57 → hud_python-0.4.59}/hud/tools/base.py +0 -0
  251. {hud_python-0.4.57 → hud_python-0.4.59}/hud/tools/bash.py +0 -0
  252. {hud_python-0.4.57 → hud_python-0.4.59}/hud/tools/computer/anthropic.py +0 -0
  253. {hud_python-0.4.57 → hud_python-0.4.59}/hud/tools/computer/hud.py +0 -0
  254. {hud_python-0.4.57 → hud_python-0.4.59}/hud/tools/computer/openai.py +0 -0
  255. {hud_python-0.4.57 → hud_python-0.4.59}/hud/tools/computer/qwen.py +0 -0
  256. {hud_python-0.4.57 → hud_python-0.4.59}/hud/tools/edit.py +0 -0
  257. {hud_python-0.4.57 → hud_python-0.4.59}/hud/tools/executors/__init__.py +0 -0
  258. {hud_python-0.4.57 → hud_python-0.4.59}/hud/tools/executors/base.py +0 -0
  259. {hud_python-0.4.57 → hud_python-0.4.59}/hud/tools/executors/pyautogui.py +0 -0
  260. {hud_python-0.4.57 → hud_python-0.4.59}/hud/tools/executors/tests/__init__.py +0 -0
  261. {hud_python-0.4.57 → hud_python-0.4.59}/hud/tools/executors/tests/test_base_executor.py +0 -0
  262. {hud_python-0.4.57 → hud_python-0.4.59}/hud/tools/executors/tests/test_pyautogui_executor.py +0 -0
  263. {hud_python-0.4.57 → hud_python-0.4.59}/hud/tools/executors/xdo.py +0 -0
  264. {hud_python-0.4.57 → hud_python-0.4.59}/hud/tools/grounding/__init__.py +0 -0
  265. {hud_python-0.4.57 → hud_python-0.4.59}/hud/tools/grounding/config.py +0 -0
  266. {hud_python-0.4.57 → hud_python-0.4.59}/hud/tools/grounding/grounded_tool.py +0 -0
  267. {hud_python-0.4.57 → hud_python-0.4.59}/hud/tools/grounding/grounder.py +0 -0
  268. {hud_python-0.4.57 → hud_python-0.4.59}/hud/tools/grounding/tests/__init__.py +0 -0
  269. {hud_python-0.4.57 → hud_python-0.4.59}/hud/tools/grounding/tests/test_grounded_tool.py +0 -0
  270. {hud_python-0.4.57 → hud_python-0.4.59}/hud/tools/response.py +0 -0
  271. {hud_python-0.4.57 → hud_python-0.4.59}/hud/tools/submit.py +0 -0
  272. {hud_python-0.4.57 → hud_python-0.4.59}/hud/tools/tests/__init__.py +0 -0
  273. {hud_python-0.4.57 → hud_python-0.4.59}/hud/tools/tests/test_base.py +0 -0
  274. {hud_python-0.4.57 → hud_python-0.4.59}/hud/tools/tests/test_bash.py +0 -0
  275. {hud_python-0.4.57 → hud_python-0.4.59}/hud/tools/tests/test_bash_extended.py +0 -0
  276. {hud_python-0.4.57 → hud_python-0.4.59}/hud/tools/tests/test_computer.py +0 -0
  277. {hud_python-0.4.57 → hud_python-0.4.59}/hud/tools/tests/test_computer_actions.py +0 -0
  278. {hud_python-0.4.57 → hud_python-0.4.59}/hud/tools/tests/test_edit.py +0 -0
  279. {hud_python-0.4.57 → hud_python-0.4.59}/hud/tools/tests/test_init.py +0 -0
  280. {hud_python-0.4.57 → hud_python-0.4.59}/hud/tools/tests/test_playwright_tool.py +0 -0
  281. {hud_python-0.4.57 → hud_python-0.4.59}/hud/tools/tests/test_response.py +0 -0
  282. {hud_python-0.4.57 → hud_python-0.4.59}/hud/tools/tests/test_submit.py +0 -0
  283. {hud_python-0.4.57 → hud_python-0.4.59}/hud/tools/tests/test_tools.py +0 -0
  284. {hud_python-0.4.57 → hud_python-0.4.59}/hud/tools/tests/test_tools_init.py +0 -0
  285. {hud_python-0.4.57 → hud_python-0.4.59}/hud/tools/tests/test_types.py +0 -0
  286. {hud_python-0.4.57 → hud_python-0.4.59}/hud/tools/tests/test_utils.py +0 -0
  287. {hud_python-0.4.57 → hud_python-0.4.59}/hud/tools/utils.py +0 -0
  288. {hud_python-0.4.57 → hud_python-0.4.59}/hud/utils/__init__.py +0 -0
  289. {hud_python-0.4.57 → hud_python-0.4.59}/hud/utils/agent_factories.py +0 -0
  290. {hud_python-0.4.57 → hud_python-0.4.59}/hud/utils/async_utils.py +0 -0
  291. {hud_python-0.4.57 → hud_python-0.4.59}/hud/utils/group_eval.py +0 -0
  292. {hud_python-0.4.57 → hud_python-0.4.59}/hud/utils/hud_console.py +0 -0
  293. {hud_python-0.4.57 → hud_python-0.4.59}/hud/utils/mcp.py +0 -0
  294. {hud_python-0.4.57 → hud_python-0.4.59}/hud/utils/pretty_errors.py +0 -0
  295. {hud_python-0.4.57 → hud_python-0.4.59}/hud/utils/progress.py +0 -0
  296. {hud_python-0.4.57 → hud_python-0.4.59}/hud/utils/task_tracking.py +0 -0
  297. {hud_python-0.4.57 → hud_python-0.4.59}/hud/utils/tasks.py +0 -0
  298. {hud_python-0.4.57 → hud_python-0.4.59}/hud/utils/telemetry.py +0 -0
  299. {hud_python-0.4.57 → hud_python-0.4.59}/hud/utils/tests/__init__.py +0 -0
  300. {hud_python-0.4.57 → hud_python-0.4.59}/hud/utils/tests/test_agent_factories.py +0 -0
  301. {hud_python-0.4.57 → hud_python-0.4.59}/hud/utils/tests/test_async_utils.py +0 -0
  302. {hud_python-0.4.57 → hud_python-0.4.59}/hud/utils/tests/test_init.py +0 -0
  303. {hud_python-0.4.57 → hud_python-0.4.59}/hud/utils/tests/test_mcp.py +0 -0
  304. {hud_python-0.4.57 → hud_python-0.4.59}/hud/utils/tests/test_pretty_errors.py +0 -0
  305. {hud_python-0.4.57 → hud_python-0.4.59}/hud/utils/tests/test_progress.py +0 -0
  306. {hud_python-0.4.57 → hud_python-0.4.59}/hud/utils/tests/test_tasks.py +0 -0
  307. {hud_python-0.4.57 → hud_python-0.4.59}/hud/utils/tests/test_telemetry.py +0 -0
  308. {hud_python-0.4.57 → hud_python-0.4.59}/hud/utils/tests/test_tool_shorthand.py +0 -0
  309. {hud_python-0.4.57 → hud_python-0.4.59}/hud/utils/tool_shorthand.py +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: hud-python
3
- Version: 0.4.57
3
+ Version: 0.4.59
4
4
  Summary: SDK for the HUD platform.
5
5
  Project-URL: Homepage, https://github.com/hud-evals/hud-python
6
6
  Project-URL: Bug Tracker, https://github.com/hud-evals/hud-python/issues
@@ -38,6 +38,7 @@ Requires-Python: <3.13,>=3.11
38
38
  Requires-Dist: anthropic
39
39
  Requires-Dist: blessed>=1.20.0
40
40
  Requires-Dist: datasets>=2.14.0
41
+ Requires-Dist: google-genai
41
42
  Requires-Dist: httpx<1,>=0.23.0
42
43
  Requires-Dist: hud-fastmcp-python-sdk>=0.1.2
43
44
  Requires-Dist: hud-mcp-python-sdk>=3.13.2
@@ -495,7 +495,7 @@ from hud.agents import ClaudeAgent
495
495
  from hud.clients import MCPClient
496
496
 
497
497
  async def main():
498
- # `trace` captures *everything* that happens and sends it to hud.so
498
+ # `trace` captures *everything* that happens and sends it to hud.ai
499
499
  with hud.trace("local_test"):
500
500
  task = Task(
501
501
  prompt="Complete the task",
@@ -524,7 +524,7 @@ async def main():
524
524
  asyncio.run(main())
525
525
  ```
526
526
 
527
- The `trace` context manager sends a full timeline of agent actions, tool calls, and rewards to hud.so – perfect for debugging.
527
+ The `trace` context manager sends a full timeline of agent actions, tool calls, and rewards to hud.ai – perfect for debugging.
528
528
 
529
529
  See `examples/01_hello_2048.py` and `examples/task_with_setup_eval.py` for larger end-to-end demos.
530
530
 
@@ -532,7 +532,7 @@ See `examples/01_hello_2048.py` and `examples/task_with_setup_eval.py` for large
532
532
 
533
533
  ## Phase 4 – Remote Deployment & HUD Runner
534
534
 
535
- **Goal →** the exact same image runs in parallel on hundreds of instances, and exposes more telemetry so the hud.so can visualise the whole lifecycle.
535
+ **Goal →** the exact same image runs in parallel on hundreds of instances, and exposes more telemetry so the hud.ai can visualise the whole lifecycle.
536
536
 
537
537
  ### 1. Publish your image
538
538
 
@@ -595,11 +595,11 @@ async def initialize_environment(session=None, progress_token=None):
595
595
  await send(100, "ready")
596
596
  ```
597
597
 
598
- Those messages are displayed live on hud.so alongside resource graphs – perfect feedback while you wait.
598
+ Those messages are displayed live on hud.ai alongside resource graphs – perfect feedback while you wait.
599
599
 
600
600
  ### 4. Live telemetry (`telemetry://live`) (Optional)
601
601
 
602
- Expose a resource named `telemetry://live` exactly like in `environments/browser/src/hud_controller/server.py` to return live url to be displayed on hud.so.
602
+ Expose a resource named `telemetry://live` exactly like in `environments/browser/src/hud_controller/server.py` to return live url to be displayed on hud.ai.
603
603
 
604
604
  Once all of the above works you can unleash *hundreds* of concurrent agents on your new environment.
605
605
 
@@ -0,0 +1,58 @@
1
+ # Browser Base Image
2
+
3
+ Base Docker image for browser environments with Playwright, Chromium, and VNC support.
4
+
5
+ ## Build
6
+
7
+ ```bash
8
+ docker build -t browser-base:latest .
9
+ ```
10
+
11
+ ## Test with VNC Access
12
+
13
+ ### 1. Start the container
14
+
15
+ ```bash
16
+ docker run -it --rm \
17
+ -p 6080:6080 \
18
+ -p 5900:5900 \
19
+ -e DISPLAY=:1 \
20
+ browser-base:latest \
21
+ bash
22
+ ```
23
+
24
+ ### 2. Inside the container, start display servers
25
+
26
+ ```bash
27
+ Xvfb :1 -screen 0 1920x1080x24 > /dev/null 2>&1 &
28
+ x11vnc -display :1 -nopw -listen 0.0.0.0 -forever > /dev/null 2>&1 &
29
+ /usr/share/novnc/utils/novnc_proxy --vnc localhost:5900 --listen 6080 > /dev/null 2>&1 &
30
+ ```
31
+
32
+ ### 3. Test Playwright
33
+
34
+ ```bash
35
+ python3 -c "
36
+ from playwright.sync_api import sync_playwright
37
+ with sync_playwright() as p:
38
+ browser = p.chromium.launch(headless=False)
39
+ page = browser.new_page()
40
+ page.goto('https://example.com')
41
+ print('Title:', page.title())
42
+ input('Press Enter to close...')
43
+ browser.close()
44
+ "
45
+ ```
46
+
47
+ ### 4. View in browser
48
+
49
+ Open `http://localhost:6080/vnc.html` to see Chromium running.
50
+
51
+ ## What's Included
52
+
53
+ - Ubuntu 24.04
54
+ - Desktop environment (Xvfb, x11vnc, noVNC, xfce4)
55
+ - Node.js & npm
56
+ - Python 3 with uv package manager
57
+ - Playwright with Chromium
58
+ - Development tools (git, curl, wget, etc.)
@@ -4,7 +4,7 @@ version = "0.1.0"
4
4
  description = "HUD Browser MCP Server"
5
5
  requires-python = ">=3.11,<3.14"
6
6
  dependencies = [
7
- "hud-python>=0.4.54",
7
+ "hud-python>=0.4.59",
8
8
  "httpx",
9
9
  "playwright",
10
10
  "pyautogui",
@@ -0,0 +1,239 @@
1
+ # SEC EDGAR Rubrics Environment
2
+
3
+ SEC filing research environment powered by the SEC EDGAR database for accessing company filings and financial data, with rubric-based evaluation for structured grading provided by [The LLM Data Company](https://llmdata.com).
4
+
5
+ See [docs](https://docs.hud.so/build-environments) for the complete environment design workflow.
6
+
7
+ ## Architecture
8
+
9
+ **`environment/`** - Manages SEC EDGAR and web search integration
10
+ - Uses the edgartools Python library to access SEC filing data
11
+ - Integrates with Exa API for supplementary web search capabilities
12
+ - Exposes HTTP endpoints for research workflows with exponential backoff for rate limiting
13
+
14
+ **`server/`** - Wraps data in MCP tools
15
+ - Provides research tools for agents to access SEC filings, financial data, and web search
16
+ - Agents and tasks interact only with these tools
17
+
18
+ **Why separate?** Edit tools for the agent or tasks without restarting the environment backend.
19
+
20
+ ## Tools
21
+
22
+ ### SEC EDGAR Tools
23
+ - **`setup()`** - Initialize the environment and reset state.
24
+ - **`search_company(query: str)`** - Search for a company by ticker symbol or name. Returns company information including ticker, name, and CIK.
25
+ - **`get_filings(ticker?: str, form_type?: str, limit?: int, cutoff_date?: str)`** - Get SEC filings. When `ticker` is provided, returns company-specific filings. Otherwise, returns global recent filings. Can filter by form type (e.g., "10-K", "10-Q", "8-K"), limit results, and filter by date (YYYY-MM-DD).
26
+ - **`get_filing_content(filing_url: str)`** - Fetch the full text content of a specific SEC filing from its URL.
27
+ - **`get_financial_data(ticker: str, accession_number: str)`** - Extract financial statements and key metrics from a 10-K or 10-Q filing. Returns income statement, balance sheet, cash flow, and other financial data.
28
+ - **`get_segment_data(ticker: str, accession_number: str)`** - Extract segment-level financial data from a 10-K or 10-Q filing for companies with multiple business segments.
29
+ - **`get_filing_sections(ticker: str, accession_number: str)`** - Extract specific sections from a 10-K or 10-Q filing (e.g., Business, Risk Factors, MD&A).
30
+
31
+ ### Web Search Tools
32
+ - **`web_search(query: str)`** - Search the web using Exa API. Returns titles and URLs of relevant results.
33
+ - **`web_fetch(url: str)`** - Fetch and extract content from a web URL. Returns summary, highlights, and full content.
34
+
35
+ ### Evaluation Tools
36
+ - **`answer(final_answer: str)`** - Submit the final research answer.
37
+ - **`evaluate(rubric: list[dict])`** - Evaluate submitted answer using a structured rubric with weighted requirements.
38
+
39
+ ### Rubric-Based Evaluation
40
+
41
+ The `evaluate` tool uses The LLM Data Company's [rubric](https://github.com/The-LLM-Data-Company/rubric/) package to grade answers against structured criteria with autograders.
42
+
43
+ ## Setup
44
+
45
+ ### Environment Variables
46
+
47
+ The environment requires several API keys and configuration:
48
+
49
+ **Required:**
50
+ - `EDGAR_IDENTITY` - Your identity for SEC EDGAR access (required by SEC regulations)
51
+ - Format: `"Your Name your.email@example.com"`
52
+
53
+ **Optional:**
54
+ - `EXA_API_KEY` - For web search and content fetching capabilities (if using web_search/web_fetch tools)
55
+ - `HUD_API_KEY` - For HUD telemetry and tracing
56
+ - `ANTHROPIC_API_KEY` - For Claude agent (if using Claude)
57
+ - `OPENAI_API_KEY` - For rubric evaluation (if using OpenAI-based autograders)
58
+
59
+ Add these to your .env before running `hud eval`:
60
+ ```bash
61
+ export EDGAR_IDENTITY="Your Name your.email@example.com"
62
+ export EXA_API_KEY="your-exa-key" # optional, for web search
63
+ export ANTHROPIC_API_KEY="your-anthropic-key" # only if using an Anthropic model
64
+ export OPENAI_API_KEY="your-openai-key"
65
+ # Optional
66
+ export HUD_API_KEY="your-hud-key"
67
+ ```
68
+
69
+ ## Development
70
+
71
+ ```bash
72
+ # Terminal 1 - Environment backend
73
+ cd environment
74
+ export EDGAR_IDENTITY="Your Name your.email@example.com"
75
+ export EXA_API_KEY="your-exa-key" # optional, for web search
76
+ uv run uvicorn server:app --reload
77
+
78
+ # Terminal 2 - MCP server
79
+ cd server
80
+ uv run hud dev
81
+ ```
82
+
83
+ The environment includes exponential backoff for rate limiting, so API calls will automatically retry on 429 errors.
84
+
85
+ In general, we recommend starting work on the environment backend first, then developing the MCP server to expose the right things to the agent.
86
+
87
+ For complex environments that require many dependencies, we recommend running `hud dev` in the environment root:
88
+ ```bash
89
+ cd ..
90
+ hud dev
91
+ ```
92
+
93
+ ## Tasks & Evaluation
94
+
95
+ ```bash
96
+ # Build first in the global folder with the Dockerfile (creates rubrics:latest)
97
+ hud build
98
+ ```
99
+
100
+ Your `tasks.json` uses `docker run` to launch the environment:
101
+
102
+ ```json
103
+ {
104
+ "prompt": "Analyze Tesla's FY2024 10-K filing...",
105
+ "mcp_config": {
106
+ "local": {
107
+ "command": "docker",
108
+ "args": ["run", "--rm", "-i", "rubrics:latest"]
109
+ }
110
+ },
111
+ "evaluate_tool": {
112
+ "name": "evaluate",
113
+ "arguments": {
114
+ "rubric": [...]
115
+ }
116
+ }
117
+ }
118
+ ```
119
+
120
+ **Note:** Export environment variables before running. The Docker container will inherit them from your shell.
121
+
122
+ **Commands:**
123
+ ```bash
124
+ # Build first
125
+ hud build
126
+
127
+ # Test task locally
128
+ export EDGAR_IDENTITY="Your Name your.email@example.com"
129
+ export EXA_API_KEY="your-exa-key" # optional, for web search
130
+ export ANTHROPIC_API_KEY="your-anthropic-key"
131
+ export OPENAI_API_KEY="your-openai-key"
132
+ hud eval tasks.json --max-steps 25
133
+
134
+ # Push environment for remote running
135
+ hud push
136
+
137
+ # Production RL training
138
+ hud rl tasks.json # Auto-converts docker→remote, builds & pushes if needed
139
+ ```
140
+
141
+ ## Publishing Your Environment
142
+
143
+ Once your environment is ready, you can share it with the community:
144
+
145
+ ### 1. Push to Registry
146
+ ```bash
147
+ # Build and push your environment (requires docker hub login and hud api key)
148
+ hud build
149
+ hud push
150
+ ```
151
+
152
+ ### 2. Create a Dataset
153
+
154
+ Create a dataset on HuggingFace with your tasks:
155
+
156
+ **Option A: Upload manually**
157
+ 1. Upload your `tasks.json` to HuggingFace
158
+ 2. Make sure it's **public** to appear on leaderboards
159
+
160
+ **Option B: Use the SDK**
161
+ ```python
162
+ from hud.datasets import save_tasks
163
+ import json
164
+
165
+ # Load your tasks
166
+ with open("tasks.json") as f:
167
+ tasks = json.load(f)
168
+
169
+ # Push to HuggingFace
170
+ save_tasks(tasks, repo_id="your-org/your-dataset")
171
+ ```
172
+
173
+ ### 3. Run and Track Performance
174
+
175
+ ```bash
176
+ # Run Claude on your benchmark
177
+ hud eval "your-org/your-dataset" --agent claude
178
+
179
+ # View results at:
180
+ # hud.so/leaderboards/your-org/your-dataset
181
+ ```
182
+
183
+ **Note**: Only public HuggingFace datasets appear as leaderboards!
184
+
185
+ 📚 Learn more: [Creating Benchmarks](https://docs.hud.so/evaluate-agents/create-benchmarks) | [Leaderboards](https://docs.hud.so/evaluate-agents/leaderboards)
186
+
187
+ ## Example Research Workflow
188
+
189
+ ```python
190
+ # Initialize environment
191
+ setup()
192
+
193
+ # Agent searches for a company
194
+ company_info = search_company("TSLA")
195
+ # Returns: [{"ticker": "TSLA", "name": "Tesla Inc", "cik": "1318605"}]
196
+
197
+ # Agent gets recent filings
198
+ filings = get_filings(ticker="TSLA", form_type="10-K", limit=1)
199
+ # Returns: [{"filing_date": "2024-01-01", "form_type": "10-K", "accession_number": "...", "filing_url": "..."}]
200
+
201
+ # Agent extracts financial data
202
+ financial_data = get_financial_data(ticker="TSLA", accession_number=filings[0]["accession_number"])
203
+ # Returns: {"has_financials": True, "financial_data": {...income statement, balance sheet, etc...}}
204
+
205
+ # Agent gets specific sections from the filing
206
+ sections = get_filing_sections(ticker="TSLA", accession_number=filings[0]["accession_number"])
207
+ # Returns: {"sections": {"business": "...", "risk_factors": "...", "mda": "..."}}
208
+
209
+ # Agent uses web search for additional context
210
+ search_results = web_search("Tesla FY2024 revenue analysis")
211
+ # Returns: [{"title": "...", "url": "..."}]
212
+
213
+ # Agent fetches web content
214
+ web_content = web_fetch(search_results[0]["url"])
215
+ # Returns: "=== SUMMARY ===\n...\n=== KEY HIGHLIGHTS ===\n...\n=== FULL CONTENT ===\n..."
216
+
217
+ # Agent submits final answer
218
+ answer("Based on Tesla's FY2024 10-K, revenue was $96.8B...")
219
+
220
+ # Evaluate answer using rubric
221
+ result = evaluate(rubric=[
222
+ {"requirement": "Correctly states FY2024 revenue", "weight": 15},
223
+ {"requirement": "Provides segment breakdown", "weight": 5},
224
+ ])
225
+ # Returns: {"reward": float, "info": {"report": [...]}, "done": True}
226
+ ```
227
+
228
+ ## Dependencies
229
+
230
+ - **edgartools**: Python library for accessing SEC EDGAR data
231
+ - **fastapi**: Web framework for the environment server
232
+ - **httpx**: HTTP client for API calls
233
+ - **rubric**: LLM Data Company's rubric evaluation package
234
+ - **Exa API**: Web search and content extraction (optional, for web_search/web_fetch tools)
235
+
236
+ ## Acknowledgments
237
+
238
+ * [EdgarTools](https://github.com/dgunning/edgartools) - Python library to access SEC EDGAR
239
+ * [SEC EDGAR MCP](https://github.com/stefanoamorelli/sec-edgar-mcp) - Rich OSS SEC MCP server
@@ -1,13 +1,14 @@
1
1
  [project]
2
2
  name = "rubrics-environment"
3
3
  version = "0.1.0"
4
- description = "Backend service for Rubrics environment"
4
+ description = "Backend service for Rubrics environment with SEC EDGAR integration"
5
5
  requires-python = ">=3.11"
6
6
  dependencies = [
7
7
  "fastapi>=0.104.1",
8
8
  "uvicorn[standard]>=0.24.0",
9
9
  "httpx>=0.24.0",
10
- "rubric>=1.1.7",
10
+ "rubric==1.1.8",
11
+ "edgartools>=4.21.3",
11
12
  ]
12
13
 
13
14
  [build-system]
@@ -16,4 +16,4 @@ image = "rubrics:dev"
16
16
  allow-direct-references = true
17
17
 
18
18
  [tool.hatch.build.targets.wheel]
19
- packages = [ "controller", "environment",]
19
+ packages = [ "server", "environment",]
@@ -2,11 +2,13 @@ from __future__ import annotations
2
2
 
3
3
  from .base import MCPAgent
4
4
  from .claude import ClaudeAgent
5
+ from .gemini import GeminiAgent
5
6
  from .openai import OperatorAgent
6
7
  from .openai_chat_generic import GenericOpenAIChatAgent
7
8
 
8
9
  __all__ = [
9
10
  "ClaudeAgent",
11
+ "GeminiAgent",
10
12
  "GenericOpenAIChatAgent",
11
13
  "MCPAgent",
12
14
  "OperatorAgent",