hud-python 0.3.3__tar.gz → 0.3.5__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of hud-python might be problematic. Click here for more details.

Files changed (294) hide show
  1. {hud_python-0.3.3 → hud_python-0.3.5}/PKG-INFO +1 -1
  2. {hud_python-0.3.3 → hud_python-0.3.5}/environments/README.md +102 -33
  3. {hud_python-0.3.3/environments/simple_browser → hud_python-0.3.5/environments/browser}/.gitignore +3 -1
  4. {hud_python-0.3.3 → hud_python-0.3.5}/environments/docker_debug.py +49 -7
  5. hud_python-0.3.5/environments/remote_browser/.gitignore +1 -0
  6. hud_python-0.3.5/environments/remote_browser/Dockerfile +40 -0
  7. hud_python-0.3.5/environments/remote_browser/README.md +133 -0
  8. {hud_python-0.3.3 → hud_python-0.3.5}/environments/remote_browser/pyproject.toml +2 -1
  9. hud_python-0.3.5/environments/remote_browser/src/hud_controller/browser_executor.py +265 -0
  10. hud_python-0.3.5/environments/remote_browser/src/hud_controller/evaluators/verify_type_action.py +135 -0
  11. {hud_python-0.3.3 → hud_python-0.3.5}/environments/remote_browser/src/hud_controller/problems/__init__.py +3 -0
  12. hud_python-0.3.5/environments/remote_browser/src/hud_controller/problems/element_interaction.py +38 -0
  13. hud_python-0.3.5/environments/remote_browser/src/hud_controller/problems/form_interaction.py +28 -0
  14. hud_python-0.3.5/environments/remote_browser/src/hud_controller/problems/search_interaction.py +19 -0
  15. {hud_python-0.3.3 → hud_python-0.3.5}/environments/remote_browser/src/hud_controller/runtime.py +20 -2
  16. {hud_python-0.3.3 → hud_python-0.3.5}/environments/remote_browser/src/hud_controller/server.py +101 -41
  17. {hud_python-0.3.3 → hud_python-0.3.5}/environments/remote_browser/src/hud_controller/setup/load_html.py +9 -14
  18. {hud_python-0.3.3 → hud_python-0.3.5}/environments/remote_browser/src/hud_controller/setup/registry.py +5 -1
  19. {hud_python-0.3.3 → hud_python-0.3.5}/environments/remote_browser/src/hud_controller/setup/sheets.py +2 -2
  20. {hud_python-0.3.3 → hud_python-0.3.5}/examples/agents_tools/simple_task_example.py +1 -1
  21. {hud_python-0.3.3 → hud_python-0.3.5}/examples/environments/gmail_local.py +18 -8
  22. {hud_python-0.3.3 → hud_python-0.3.5}/examples/environments/gmail_remote.py +1 -1
  23. {hud_python-0.3.3 → hud_python-0.3.5}/hud/__init__.py +30 -4
  24. {hud_python-0.3.3 → hud_python-0.3.5}/hud/datasets.py +7 -3
  25. {hud_python-0.3.3 → hud_python-0.3.5}/hud/task.py +7 -0
  26. {hud_python-0.3.3 → hud_python-0.3.5}/hud/taskset.py +17 -2
  27. {hud_python-0.3.3 → hud_python-0.3.5}/hud/tools/computer/anthropic.py +12 -3
  28. {hud_python-0.3.3 → hud_python-0.3.5}/hud/tools/computer/hud.py +18 -12
  29. {hud_python-0.3.3 → hud_python-0.3.5}/hud/tools/computer/openai.py +10 -1
  30. {hud_python-0.3.3 → hud_python-0.3.5}/hud/tools/executors/base.py +202 -1
  31. hud_python-0.3.5/hud/utils/__init__.py +16 -0
  32. hud_python-0.3.5/hud/utils/deprecation.py +115 -0
  33. {hud_python-0.3.3 → hud_python-0.3.5}/hud/utils/tests/test_version.py +1 -1
  34. {hud_python-0.3.3 → hud_python-0.3.5}/hud/version.py +1 -1
  35. {hud_python-0.3.3 → hud_python-0.3.5}/pyproject.toml +1 -1
  36. hud_python-0.3.3/environments/pokemon_controller/Dockerfile +0 -11
  37. hud_python-0.3.3/environments/pokemon_controller/pyproject.toml +0 -19
  38. hud_python-0.3.3/environments/pokemon_controller/src/hud_controller/__init__.py +0 -8
  39. hud_python-0.3.3/environments/pokemon_controller/src/hud_controller/display_adapters.py +0 -113
  40. hud_python-0.3.3/environments/pokemon_controller/src/hud_controller/emulator.py +0 -319
  41. hud_python-0.3.3/environments/pokemon_controller/src/hud_controller/evaluator.py +0 -65
  42. hud_python-0.3.3/environments/pokemon_controller/src/hud_controller/kill.py +0 -61
  43. hud_python-0.3.3/environments/pokemon_controller/src/hud_controller/main.py +0 -137
  44. hud_python-0.3.3/environments/pokemon_controller/src/hud_controller/setup.py +0 -63
  45. hud_python-0.3.3/environments/pokemon_controller/src/hud_controller/step.py +0 -37
  46. hud_python-0.3.3/environments/qa_controller/Dockerfile +0 -20
  47. hud_python-0.3.3/environments/qa_controller/pyproject.toml +0 -16
  48. hud_python-0.3.3/environments/qa_controller/src/hud_controller/__init__.py +0 -3
  49. hud_python-0.3.3/environments/qa_controller/src/hud_controller/evaluate/__init__.py +0 -6
  50. hud_python-0.3.3/environments/qa_controller/src/hud_controller/evaluate/matchers.py +0 -135
  51. hud_python-0.3.3/environments/qa_controller/src/hud_controller/info.py +0 -76
  52. hud_python-0.3.3/environments/qa_controller/src/hud_controller/setup/__init__.py +0 -8
  53. hud_python-0.3.3/environments/qa_controller/src/hud_controller/setup/question.py +0 -43
  54. hud_python-0.3.3/environments/qa_controller/src/hud_controller/step.py +0 -46
  55. hud_python-0.3.3/environments/qa_controller/src/hud_controller/utils/__init__.py +0 -1
  56. hud_python-0.3.3/environments/qa_controller/src/hud_controller/utils/state.py +0 -43
  57. hud_python-0.3.3/environments/remote_browser/Dockerfile +0 -23
  58. hud_python-0.3.3/environments/remote_browser/README.md +0 -62
  59. hud_python-0.3.3/environments/remote_browser/src/hud_controller/browser_computer_tool.py +0 -335
  60. hud_python-0.3.3/environments/remote_browser/src/hud_controller/evaluators/verify_type_action.py +0 -102
  61. hud_python-0.3.3/environments/remote_browser/test_mcp.sh +0 -4
  62. hud_python-0.3.3/environments/simple_browser/docker-compose.yml +0 -13
  63. hud_python-0.3.3/hud/utils/__init__.py +0 -7
  64. {hud_python-0.3.3 → hud_python-0.3.5}/.env.example +0 -0
  65. {hud_python-0.3.3 → hud_python-0.3.5}/.github/workflows/ci.yml +0 -0
  66. {hud_python-0.3.3 → hud_python-0.3.5}/.github/workflows/release.yml +0 -0
  67. {hud_python-0.3.3 → hud_python-0.3.5}/.gitignore +0 -0
  68. {hud_python-0.3.3 → hud_python-0.3.5}/LICENSE +0 -0
  69. {hud_python-0.3.3 → hud_python-0.3.5}/MANIFEST.in +0 -0
  70. {hud_python-0.3.3 → hud_python-0.3.5}/README.md +0 -0
  71. {hud_python-0.3.3 → hud_python-0.3.5}/docs/advanced/cla-details.mdx +0 -0
  72. {hud_python-0.3.3 → hud_python-0.3.5}/docs/advanced/environment-control.mdx +0 -0
  73. {hud_python-0.3.3 → hud_python-0.3.5}/docs/advanced/tracing.mdx +0 -0
  74. {hud_python-0.3.3 → hud_python-0.3.5}/docs/advanced/uploading.mdx +0 -0
  75. {hud_python-0.3.3 → hud_python-0.3.5}/docs/api-reference/adapters.mdx +0 -0
  76. {hud_python-0.3.3 → hud_python-0.3.5}/docs/api-reference/env.mdx +0 -0
  77. {hud_python-0.3.3 → hud_python-0.3.5}/docs/api-reference/gym.mdx +0 -0
  78. {hud_python-0.3.3 → hud_python-0.3.5}/docs/api-reference/job.mdx +0 -0
  79. {hud_python-0.3.3 → hud_python-0.3.5}/docs/api-reference/task.mdx +0 -0
  80. {hud_python-0.3.3 → hud_python-0.3.5}/docs/api-reference/taskset.mdx +0 -0
  81. {hud_python-0.3.3 → hud_python-0.3.5}/docs/api-reference/telemetry.mdx +0 -0
  82. {hud_python-0.3.3 → hud_python-0.3.5}/docs/api-reference/trajectory.mdx +0 -0
  83. {hud_python-0.3.3 → hud_python-0.3.5}/docs/concepts/adapter.mdx +0 -0
  84. {hud_python-0.3.3 → hud_python-0.3.5}/docs/concepts/agent.mdx +0 -0
  85. {hud_python-0.3.3 → hud_python-0.3.5}/docs/concepts/environment.mdx +0 -0
  86. {hud_python-0.3.3 → hud_python-0.3.5}/docs/concepts/job.mdx +0 -0
  87. {hud_python-0.3.3 → hud_python-0.3.5}/docs/concepts/task.mdx +0 -0
  88. {hud_python-0.3.3 → hud_python-0.3.5}/docs/concepts/trajectory.mdx +0 -0
  89. {hud_python-0.3.3 → hud_python-0.3.5}/docs/docs.json +0 -0
  90. {hud_python-0.3.3 → hud_python-0.3.5}/docs/environment-creation.mdx +0 -0
  91. {hud_python-0.3.3 → hud_python-0.3.5}/docs/environments/browser.mdx +0 -0
  92. {hud_python-0.3.3 → hud_python-0.3.5}/docs/environments/custom-environments.mdx +0 -0
  93. {hud_python-0.3.3 → hud_python-0.3.5}/docs/environments/custom.mdx +0 -0
  94. {hud_python-0.3.3 → hud_python-0.3.5}/docs/environments/osworld-ubuntu.mdx +0 -0
  95. {hud_python-0.3.3 → hud_python-0.3.5}/docs/environments/qa.mdx +0 -0
  96. {hud_python-0.3.3 → hud_python-0.3.5}/docs/examples/alignment-evaluation.mdx +0 -0
  97. {hud_python-0.3.3 → hud_python-0.3.5}/docs/examples/benchmarking-agents.mdx +0 -0
  98. {hud_python-0.3.3 → hud_python-0.3.5}/docs/examples/custom-os-env.mdx +0 -0
  99. {hud_python-0.3.3 → hud_python-0.3.5}/docs/examples/mcp-agent-tracing.mdx +0 -0
  100. {hud_python-0.3.3 → hud_python-0.3.5}/docs/examples/web-app-testing.mdx +0 -0
  101. {hud_python-0.3.3 → hud_python-0.3.5}/docs/examples/web-mocks.mdx +0 -0
  102. {hud_python-0.3.3 → hud_python-0.3.5}/docs/favicon.png +0 -0
  103. {hud_python-0.3.3 → hud_python-0.3.5}/docs/logo/hud_logo.svg +0 -0
  104. {hud_python-0.3.3 → hud_python-0.3.5}/docs/logo/hud_logo_dark.svg +0 -0
  105. {hud_python-0.3.3 → hud_python-0.3.5}/docs/quickstart.mdx +0 -0
  106. {hud_python-0.3.3 → hud_python-0.3.5}/docs/running-your-agent.mdx +0 -0
  107. {hud_python-0.3.3 → hud_python-0.3.5}/docs/task-creation.mdx +0 -0
  108. {hud_python-0.3.3/environments/simple_browser → hud_python-0.3.5/environments/browser}/.dockerignore +0 -0
  109. {hud_python-0.3.3/environments/simple_browser → hud_python-0.3.5/environments/browser}/Dockerfile +0 -0
  110. {hud_python-0.3.3/environments/simple_browser → hud_python-0.3.5/environments/browser}/README.md +0 -0
  111. {hud_python-0.3.3/environments/simple_browser → hud_python-0.3.5/environments/browser}/apps/README.md +0 -0
  112. {hud_python-0.3.3/environments/simple_browser → hud_python-0.3.5/environments/browser}/apps/todo/README.md +0 -0
  113. {hud_python-0.3.3/environments/simple_browser → hud_python-0.3.5/environments/browser}/apps/todo/backend/main.py +0 -0
  114. {hud_python-0.3.3/environments/simple_browser → hud_python-0.3.5/environments/browser}/apps/todo/backend/pyproject.toml +0 -0
  115. {hud_python-0.3.3/environments/simple_browser → hud_python-0.3.5/environments/browser}/apps/todo/frontend/app/globals.css +0 -0
  116. {hud_python-0.3.3/environments/simple_browser → hud_python-0.3.5/environments/browser}/apps/todo/frontend/app/layout.tsx +0 -0
  117. {hud_python-0.3.3/environments/simple_browser → hud_python-0.3.5/environments/browser}/apps/todo/frontend/app/page.tsx +0 -0
  118. {hud_python-0.3.3/environments/simple_browser → hud_python-0.3.5/environments/browser}/apps/todo/frontend/next.config.js +0 -0
  119. {hud_python-0.3.3/environments/simple_browser → hud_python-0.3.5/environments/browser}/apps/todo/frontend/package-lock.json +0 -0
  120. {hud_python-0.3.3/environments/simple_browser → hud_python-0.3.5/environments/browser}/apps/todo/frontend/package.json +0 -0
  121. {hud_python-0.3.3/environments/simple_browser → hud_python-0.3.5/environments/browser}/apps/todo/frontend/postcss.config.js +0 -0
  122. {hud_python-0.3.3/environments/simple_browser → hud_python-0.3.5/environments/browser}/apps/todo/frontend/tailwind.config.js +0 -0
  123. {hud_python-0.3.3/environments/simple_browser → hud_python-0.3.5/environments/browser}/apps/todo/frontend/tsconfig.json +0 -0
  124. {hud_python-0.3.3/environments/simple_browser → hud_python-0.3.5/environments/browser}/apps/todo/launch.py +0 -0
  125. {hud_python-0.3.3/environments/simple_browser → hud_python-0.3.5/environments/browser}/pyproject.toml +0 -0
  126. {hud_python-0.3.3/environments/simple_browser → hud_python-0.3.5/environments/browser}/src/hud_controller/README.md +0 -0
  127. {hud_python-0.3.3/environments/simple_browser → hud_python-0.3.5/environments/browser}/src/hud_controller/__init__.py +0 -0
  128. {hud_python-0.3.3/environments/simple_browser → hud_python-0.3.5/environments/browser}/src/hud_controller/__main__.py +0 -0
  129. {hud_python-0.3.3/environments/simple_browser → hud_python-0.3.5/environments/browser}/src/hud_controller/evaluators/__init__.py +0 -0
  130. {hud_python-0.3.3/environments/simple_browser → hud_python-0.3.5/environments/browser}/src/hud_controller/evaluators/context.py +0 -0
  131. {hud_python-0.3.3/environments/simple_browser → hud_python-0.3.5/environments/browser}/src/hud_controller/evaluators/registry.py +0 -0
  132. {hud_python-0.3.3/environments/simple_browser → hud_python-0.3.5/environments/browser}/src/hud_controller/evaluators/todo.py +0 -0
  133. {hud_python-0.3.3/environments/simple_browser → hud_python-0.3.5/environments/browser}/src/hud_controller/problems/__init__.py +0 -0
  134. {hud_python-0.3.3/environments/simple_browser → hud_python-0.3.5/environments/browser}/src/hud_controller/problems/registry.py +0 -0
  135. {hud_python-0.3.3/environments/simple_browser → hud_python-0.3.5/environments/browser}/src/hud_controller/problems/todo.py +0 -0
  136. {hud_python-0.3.3/environments/simple_browser → hud_python-0.3.5/environments/browser}/src/hud_controller/runtime.py +0 -0
  137. {hud_python-0.3.3/environments/simple_browser → hud_python-0.3.5/environments/browser}/src/hud_controller/server.py +0 -0
  138. {hud_python-0.3.3/environments/simple_browser → hud_python-0.3.5/environments/browser}/src/hud_controller/services.py +0 -0
  139. {hud_python-0.3.3/environments/simple_browser → hud_python-0.3.5/environments/browser}/src/hud_controller/setup/__init__.py +0 -0
  140. {hud_python-0.3.3/environments/simple_browser → hud_python-0.3.5/environments/browser}/src/hud_controller/setup/registry.py +0 -0
  141. {hud_python-0.3.3/environments/simple_browser → hud_python-0.3.5/environments/browser}/src/hud_controller/setup/todo.py +0 -0
  142. {hud_python-0.3.3/environments/simple_browser → hud_python-0.3.5/environments/browser}/start.sh +0 -0
  143. {hud_python-0.3.3 → hud_python-0.3.5}/environments/remote_browser/src/hud_controller/__init__.py +0 -0
  144. {hud_python-0.3.3 → hud_python-0.3.5}/environments/remote_browser/src/hud_controller/__main__.py +0 -0
  145. {hud_python-0.3.3 → hud_python-0.3.5}/environments/remote_browser/src/hud_controller/evaluators/__init__.py +0 -0
  146. {hud_python-0.3.3 → hud_python-0.3.5}/environments/remote_browser/src/hud_controller/evaluators/context.py +0 -0
  147. {hud_python-0.3.3 → hud_python-0.3.5}/environments/remote_browser/src/hud_controller/evaluators/cookie_exists.py +0 -0
  148. {hud_python-0.3.3 → hud_python-0.3.5}/environments/remote_browser/src/hud_controller/evaluators/cookie_match.py +0 -0
  149. {hud_python-0.3.3 → hud_python-0.3.5}/environments/remote_browser/src/hud_controller/evaluators/history_length.py +0 -0
  150. {hud_python-0.3.3 → hud_python-0.3.5}/environments/remote_browser/src/hud_controller/evaluators/page_contains.py +0 -0
  151. {hud_python-0.3.3 → hud_python-0.3.5}/environments/remote_browser/src/hud_controller/evaluators/raw_last_action_is.py +0 -0
  152. {hud_python-0.3.3 → hud_python-0.3.5}/environments/remote_browser/src/hud_controller/evaluators/registry.py +0 -0
  153. {hud_python-0.3.3 → hud_python-0.3.5}/environments/remote_browser/src/hud_controller/evaluators/selector_history.py +0 -0
  154. {hud_python-0.3.3 → hud_python-0.3.5}/environments/remote_browser/src/hud_controller/evaluators/sheet_contains.py +0 -0
  155. {hud_python-0.3.3 → hud_python-0.3.5}/environments/remote_browser/src/hud_controller/evaluators/sheets_cell_values.py +0 -0
  156. {hud_python-0.3.3 → hud_python-0.3.5}/environments/remote_browser/src/hud_controller/evaluators/url_match.py +0 -0
  157. {hud_python-0.3.3 → hud_python-0.3.5}/environments/remote_browser/src/hud_controller/playwright_with_memory.py +0 -0
  158. {hud_python-0.3.3 → hud_python-0.3.5}/environments/remote_browser/src/hud_controller/problems/navigate_and_verify.py +0 -0
  159. {hud_python-0.3.3 → hud_python-0.3.5}/environments/remote_browser/src/hud_controller/problems/registry.py +0 -0
  160. {hud_python-0.3.3 → hud_python-0.3.5}/environments/remote_browser/src/hud_controller/providers/README.md +0 -0
  161. {hud_python-0.3.3 → hud_python-0.3.5}/environments/remote_browser/src/hud_controller/providers/__init__.py +0 -0
  162. {hud_python-0.3.3 → hud_python-0.3.5}/environments/remote_browser/src/hud_controller/providers/anchorbrowser.py +0 -0
  163. {hud_python-0.3.3 → hud_python-0.3.5}/environments/remote_browser/src/hud_controller/providers/base.py +0 -0
  164. {hud_python-0.3.3 → hud_python-0.3.5}/environments/remote_browser/src/hud_controller/providers/browserbase.py +0 -0
  165. {hud_python-0.3.3 → hud_python-0.3.5}/environments/remote_browser/src/hud_controller/providers/hyperbrowser.py +0 -0
  166. {hud_python-0.3.3 → hud_python-0.3.5}/environments/remote_browser/src/hud_controller/providers/kernel.py +0 -0
  167. {hud_python-0.3.3 → hud_python-0.3.5}/environments/remote_browser/src/hud_controller/providers/steel.py +0 -0
  168. {hud_python-0.3.3 → hud_python-0.3.5}/environments/remote_browser/src/hud_controller/setup/__init__.py +0 -0
  169. {hud_python-0.3.3 → hud_python-0.3.5}/environments/remote_browser/src/hud_controller/setup/cookies.py +0 -0
  170. {hud_python-0.3.3 → hud_python-0.3.5}/environments/remote_browser/src/hud_controller/setup/interact.py +0 -0
  171. {hud_python-0.3.3 → hud_python-0.3.5}/environments/remote_browser/src/hud_controller/setup/navigate.py +0 -0
  172. {hud_python-0.3.3 → hud_python-0.3.5}/examples/README.md +0 -0
  173. {hud_python-0.3.3 → hud_python-0.3.5}/examples/agents_tools/browser_use.ipynb +0 -0
  174. {hud_python-0.3.3 → hud_python-0.3.5}/examples/agents_tools/mcp_claude_agent.py +0 -0
  175. {hud_python-0.3.3 → hud_python-0.3.5}/examples/agents_tools/mcp_openai_agent.py +0 -0
  176. {hud_python-0.3.3 → hud_python-0.3.5}/examples/agents_tools/mcp_test.ipynb +0 -0
  177. {hud_python-0.3.3 → hud_python-0.3.5}/examples/agents_tools/mcp_use_agent.py +0 -0
  178. {hud_python-0.3.3 → hud_python-0.3.5}/examples/agents_tools/sensitive_data.ipynb +0 -0
  179. {hud_python-0.3.3 → hud_python-0.3.5}/examples/environments/pokemon_local.ipynb +0 -0
  180. {hud_python-0.3.3 → hud_python-0.3.5}/examples/environments/pokemon_remote.ipynb +0 -0
  181. {hud_python-0.3.3 → hud_python-0.3.5}/examples/environments/remote.ipynb +0 -0
  182. {hud_python-0.3.3 → hud_python-0.3.5}/examples/environments/resources_example.py +0 -0
  183. {hud_python-0.3.3 → hud_python-0.3.5}/examples/environments/simple_browser_example.py +0 -0
  184. {hud_python-0.3.3 → hud_python-0.3.5}/examples/evaluations/eval.py +0 -0
  185. {hud_python-0.3.3 → hud_python-0.3.5}/examples/evaluations/osworld.ipynb +0 -0
  186. {hud_python-0.3.3 → hud_python-0.3.5}/examples/evaluations/sheetbench_direct_example.ipynb +0 -0
  187. {hud_python-0.3.3 → hud_python-0.3.5}/examples/evaluations/tasks.ipynb +0 -0
  188. {hud_python-0.3.3 → hud_python-0.3.5}/examples/evaluations/telemetry_and_datasets.ipynb +0 -0
  189. {hud_python-0.3.3 → hud_python-0.3.5}/examples/evaluations/wordle_example.ipynb +0 -0
  190. {hud_python-0.3.3 → hud_python-0.3.5}/examples/sheets_bench_cua_example.ipynb +0 -0
  191. {hud_python-0.3.3 → hud_python-0.3.5}/hud/adapters/__init__.py +0 -0
  192. {hud_python-0.3.3 → hud_python-0.3.5}/hud/adapters/claude/__init__.py +0 -0
  193. {hud_python-0.3.3 → hud_python-0.3.5}/hud/adapters/claude/adapter.py +0 -0
  194. {hud_python-0.3.3 → hud_python-0.3.5}/hud/adapters/claude/tests/__init__.py +0 -0
  195. {hud_python-0.3.3 → hud_python-0.3.5}/hud/adapters/claude/tests/test_adapter.py +0 -0
  196. {hud_python-0.3.3 → hud_python-0.3.5}/hud/adapters/common/__init__.py +0 -0
  197. {hud_python-0.3.3 → hud_python-0.3.5}/hud/adapters/common/adapter.py +0 -0
  198. {hud_python-0.3.3 → hud_python-0.3.5}/hud/adapters/common/tests/__init__.py +0 -0
  199. {hud_python-0.3.3 → hud_python-0.3.5}/hud/adapters/common/tests/test_adapter.py +0 -0
  200. {hud_python-0.3.3 → hud_python-0.3.5}/hud/adapters/common/types.py +0 -0
  201. {hud_python-0.3.3 → hud_python-0.3.5}/hud/adapters/operator/__init__.py +0 -0
  202. {hud_python-0.3.3 → hud_python-0.3.5}/hud/adapters/operator/adapter.py +0 -0
  203. {hud_python-0.3.3 → hud_python-0.3.5}/hud/adapters/operator/tests/__init__.py +0 -0
  204. {hud_python-0.3.3 → hud_python-0.3.5}/hud/adapters/operator/tests/test_adapter.py +0 -0
  205. {hud_python-0.3.3 → hud_python-0.3.5}/hud/agent/__init__.py +0 -0
  206. {hud_python-0.3.3 → hud_python-0.3.5}/hud/agent/base.py +0 -0
  207. {hud_python-0.3.3 → hud_python-0.3.5}/hud/agent/claude.py +0 -0
  208. {hud_python-0.3.3 → hud_python-0.3.5}/hud/agent/claude_plays_pokemon.py +0 -0
  209. {hud_python-0.3.3 → hud_python-0.3.5}/hud/agent/langchain.py +0 -0
  210. {hud_python-0.3.3 → hud_python-0.3.5}/hud/agent/misc/__init__.py +0 -0
  211. {hud_python-0.3.3 → hud_python-0.3.5}/hud/agent/misc/response_agent.py +0 -0
  212. {hud_python-0.3.3 → hud_python-0.3.5}/hud/agent/operator.py +0 -0
  213. {hud_python-0.3.3 → hud_python-0.3.5}/hud/agent/tests/__init__.py +0 -0
  214. {hud_python-0.3.3 → hud_python-0.3.5}/hud/agent/tests/test_base.py +0 -0
  215. {hud_python-0.3.3 → hud_python-0.3.5}/hud/env/__init__.py +0 -0
  216. {hud_python-0.3.3 → hud_python-0.3.5}/hud/env/client.py +0 -0
  217. {hud_python-0.3.3 → hud_python-0.3.5}/hud/env/docker_client.py +0 -0
  218. {hud_python-0.3.3 → hud_python-0.3.5}/hud/env/environment.py +0 -0
  219. {hud_python-0.3.3 → hud_python-0.3.5}/hud/env/local_docker_client.py +0 -0
  220. {hud_python-0.3.3 → hud_python-0.3.5}/hud/env/remote_client.py +0 -0
  221. {hud_python-0.3.3 → hud_python-0.3.5}/hud/env/remote_docker_client.py +0 -0
  222. {hud_python-0.3.3 → hud_python-0.3.5}/hud/exceptions.py +0 -0
  223. {hud_python-0.3.3 → hud_python-0.3.5}/hud/gym.py +0 -0
  224. {hud_python-0.3.3 → hud_python-0.3.5}/hud/job.py +0 -0
  225. {hud_python-0.3.3 → hud_python-0.3.5}/hud/mcp/__init__.py +0 -0
  226. {hud_python-0.3.3 → hud_python-0.3.5}/hud/mcp/base.py +0 -0
  227. {hud_python-0.3.3 → hud_python-0.3.5}/hud/mcp/claude.py +0 -0
  228. {hud_python-0.3.3 → hud_python-0.3.5}/hud/mcp/client.py +0 -0
  229. {hud_python-0.3.3 → hud_python-0.3.5}/hud/mcp/langchain.py +0 -0
  230. {hud_python-0.3.3 → hud_python-0.3.5}/hud/mcp/openai.py +0 -0
  231. {hud_python-0.3.3 → hud_python-0.3.5}/hud/mcp/tests/__init__.py +0 -0
  232. {hud_python-0.3.3 → hud_python-0.3.5}/hud/mcp/tests/test_base.py +0 -0
  233. {hud_python-0.3.3 → hud_python-0.3.5}/hud/mcp/tests/test_claude.py +0 -0
  234. {hud_python-0.3.3 → hud_python-0.3.5}/hud/mcp/tests/test_client.py +0 -0
  235. {hud_python-0.3.3 → hud_python-0.3.5}/hud/mcp/tests/test_openai.py +0 -0
  236. {hud_python-0.3.3 → hud_python-0.3.5}/hud/py.typed +0 -0
  237. {hud_python-0.3.3 → hud_python-0.3.5}/hud/server/__init__.py +0 -0
  238. {hud_python-0.3.3 → hud_python-0.3.5}/hud/server/requests.py +0 -0
  239. {hud_python-0.3.3 → hud_python-0.3.5}/hud/server/tests/__init__.py +0 -0
  240. {hud_python-0.3.3 → hud_python-0.3.5}/hud/server/tests/test_requests.py +0 -0
  241. {hud_python-0.3.3 → hud_python-0.3.5}/hud/settings.py +0 -0
  242. {hud_python-0.3.3 → hud_python-0.3.5}/hud/telemetry/__init__.py +0 -0
  243. {hud_python-0.3.3 → hud_python-0.3.5}/hud/telemetry/_trace.py +0 -0
  244. {hud_python-0.3.3 → hud_python-0.3.5}/hud/telemetry/context.py +0 -0
  245. {hud_python-0.3.3 → hud_python-0.3.5}/hud/telemetry/exporter.py +0 -0
  246. {hud_python-0.3.3 → hud_python-0.3.5}/hud/telemetry/instrumentation/__init__.py +0 -0
  247. {hud_python-0.3.3 → hud_python-0.3.5}/hud/telemetry/instrumentation/mcp.py +0 -0
  248. {hud_python-0.3.3 → hud_python-0.3.5}/hud/telemetry/instrumentation/registry.py +0 -0
  249. {hud_python-0.3.3 → hud_python-0.3.5}/hud/telemetry/job.py +0 -0
  250. {hud_python-0.3.3 → hud_python-0.3.5}/hud/telemetry/mcp_models.py +0 -0
  251. {hud_python-0.3.3 → hud_python-0.3.5}/hud/telemetry/tests/__init__.py +0 -0
  252. {hud_python-0.3.3 → hud_python-0.3.5}/hud/telemetry/tests/test_context.py +0 -0
  253. {hud_python-0.3.3 → hud_python-0.3.5}/hud/telemetry/tests/test_trace.py +0 -0
  254. {hud_python-0.3.3 → hud_python-0.3.5}/hud/tools/__init__.py +0 -0
  255. {hud_python-0.3.3 → hud_python-0.3.5}/hud/tools/base.py +0 -0
  256. {hud_python-0.3.3 → hud_python-0.3.5}/hud/tools/bash.py +0 -0
  257. {hud_python-0.3.3 → hud_python-0.3.5}/hud/tools/computer/__init__.py +0 -0
  258. {hud_python-0.3.3 → hud_python-0.3.5}/hud/tools/edit.py +0 -0
  259. {hud_python-0.3.3 → hud_python-0.3.5}/hud/tools/executors/__init__.py +0 -0
  260. {hud_python-0.3.3 → hud_python-0.3.5}/hud/tools/executors/pyautogui.py +0 -0
  261. {hud_python-0.3.3 → hud_python-0.3.5}/hud/tools/executors/tests/__init__.py +0 -0
  262. {hud_python-0.3.3 → hud_python-0.3.5}/hud/tools/executors/tests/test_base_executor.py +0 -0
  263. {hud_python-0.3.3 → hud_python-0.3.5}/hud/tools/executors/tests/test_pyautogui_executor.py +0 -0
  264. {hud_python-0.3.3 → hud_python-0.3.5}/hud/tools/executors/xdo.py +0 -0
  265. {hud_python-0.3.3 → hud_python-0.3.5}/hud/tools/helper/README.md +0 -0
  266. {hud_python-0.3.3 → hud_python-0.3.5}/hud/tools/helper/__init__.py +0 -0
  267. {hud_python-0.3.3 → hud_python-0.3.5}/hud/tools/helper/mcp_server.py +0 -0
  268. {hud_python-0.3.3 → hud_python-0.3.5}/hud/tools/helper/server_initialization.py +0 -0
  269. {hud_python-0.3.3 → hud_python-0.3.5}/hud/tools/helper/utils.py +0 -0
  270. {hud_python-0.3.3 → hud_python-0.3.5}/hud/tools/playwright_tool.py +0 -0
  271. {hud_python-0.3.3 → hud_python-0.3.5}/hud/tools/tests/__init__.py +0 -0
  272. {hud_python-0.3.3 → hud_python-0.3.5}/hud/tools/tests/test_bash.py +0 -0
  273. {hud_python-0.3.3 → hud_python-0.3.5}/hud/tools/tests/test_computer.py +0 -0
  274. {hud_python-0.3.3 → hud_python-0.3.5}/hud/tools/tests/test_computer_actions.py +0 -0
  275. {hud_python-0.3.3 → hud_python-0.3.5}/hud/tools/tests/test_edit.py +0 -0
  276. {hud_python-0.3.3 → hud_python-0.3.5}/hud/tools/tests/test_init.py +0 -0
  277. {hud_python-0.3.3 → hud_python-0.3.5}/hud/tools/tests/test_playwright_tool.py +0 -0
  278. {hud_python-0.3.3 → hud_python-0.3.5}/hud/tools/tests/test_tools.py +0 -0
  279. {hud_python-0.3.3 → hud_python-0.3.5}/hud/tools/tests/test_utils.py +0 -0
  280. {hud_python-0.3.3 → hud_python-0.3.5}/hud/tools/utils.py +0 -0
  281. {hud_python-0.3.3 → hud_python-0.3.5}/hud/trajectory.py +0 -0
  282. {hud_python-0.3.3 → hud_python-0.3.5}/hud/types.py +0 -0
  283. {hud_python-0.3.3 → hud_python-0.3.5}/hud/utils/agent.py +0 -0
  284. {hud_python-0.3.3 → hud_python-0.3.5}/hud/utils/common.py +0 -0
  285. {hud_python-0.3.3 → hud_python-0.3.5}/hud/utils/config.py +0 -0
  286. {hud_python-0.3.3 → hud_python-0.3.5}/hud/utils/misc.py +0 -0
  287. {hud_python-0.3.3 → hud_python-0.3.5}/hud/utils/progress.py +0 -0
  288. {hud_python-0.3.3 → hud_python-0.3.5}/hud/utils/telemetry.py +0 -0
  289. {hud_python-0.3.3 → hud_python-0.3.5}/hud/utils/tests/__init__.py +0 -0
  290. {hud_python-0.3.3 → hud_python-0.3.5}/hud/utils/tests/test_common.py +0 -0
  291. {hud_python-0.3.3 → hud_python-0.3.5}/hud/utils/tests/test_config.py +0 -0
  292. {hud_python-0.3.3 → hud_python-0.3.5}/hud/utils/tests/test_init.py +0 -0
  293. {hud_python-0.3.3 → hud_python-0.3.5}/hud/utils/tests/test_progress.py +0 -0
  294. {hud_python-0.3.3 → hud_python-0.3.5}/hud/utils/tests/test_telemetry.py +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: hud-python
3
- Version: 0.3.3
3
+ Version: 0.3.5
4
4
  Summary: SDK for the HUD platform.
5
5
  Project-URL: Homepage, https://github.com/hud-evals/hud-python
6
6
  Project-URL: Bug Tracker, https://github.com/hud-evals/hud-python/issues
@@ -74,15 +74,38 @@ docker build -t my-environment .
74
74
  docker run --rm -it my-environment # look for the log line on stderr
75
75
  ```
76
76
 
77
+ ### Recommended Environment Structure
78
+
79
+ For Python-based MCP environments, use this standard structure:
80
+
81
+ ```
82
+ my-environment/
83
+ ├── Dockerfile
84
+ ├── pyproject.toml
85
+ ├── README.md
86
+ └── src/
87
+ └── my_module/ # Your Python package
88
+ ├── __init__.py
89
+ ├── server.py # MCP server (Phase 2)
90
+ ├── setup/ # Setup functions (Phase 3)
91
+ ├── evaluators/ # Evaluation logic (Phase 3)
92
+ └── problems/ # Problem definitions (Phase 3)
93
+ ```
94
+
95
+ This structure enables:
96
+ - Clean separation of concerns
97
+ - Easy volume mounting for development (Phase 5)
98
+ - Standard Python packaging with `pip install -e .`
99
+
77
100
  • **One Dockerfile only** – no docker-compose.
78
- • If youre building a GUI environment, start from `hudpython/novnc-base:latest` instead and leave VNC configuration for later phases.
101
+ • If you're building a GUI environment, start from `hudpython/novnc-base:latest` instead and leave VNC configuration for later phases.
79
102
 
80
103
  Checkpoint reached? Congratulations – move on.
81
104
 
82
105
  👉 Quick sanity check: `python environments/docker_debug.py my-environment:latest` (verifies Phase 1 automatically)
83
106
 
84
107
  Need inspiration? Skim the real Dockerfiles used in the example browser environments:
85
- • [`simple_browser/Dockerfile`](./simple_browser/Dockerfile)
108
+ • [`browser/Dockerfile`](./browser/Dockerfile)
86
109
  • [`remote_browser/Dockerfile`](./remote_browser/Dockerfile)
87
110
  They follow the exact same pattern – a single file, logs to stderr, nothing fancy.
88
111
 
@@ -315,28 +338,74 @@ Those messages are displayed live on app.hud.so alongside resource graphs – pe
315
338
 
316
339
  ### 4. Live telemetry (`telemetry://live`) (Optional)
317
340
 
318
- Expose a resource named `telemetry://live` exactly like in `environments/simple_browser/src/hud_controller/server.py` to return live url to be displayed on app.hud.so.
341
+ Expose a resource named `telemetry://live` exactly like in `environments/browser/src/hud_controller/server.py` to return live url to be displayed on app.hud.so.
319
342
 
320
343
  Once all of the above works you can unleash *hundreds* of concurrent agents on your new environment.
321
344
 
322
345
  ---
323
346
 
324
- ## Phase 5 – Automated Iteration with *cursor-mcp*
347
+ ## Phase 5 – Takeoff: Automatic environment improvement with Cursor Agent
348
+
349
+ To enable rapid development without constant Docker rebuilds, use the unified Dockerfile's development mode. This allows you to edit code locally and see changes immediately in the running MCP server, and use Cursor Agent to automate iteration.
350
+
351
+ ### Setting up Development Mode
352
+
353
+ #### 1. Update Your Dockerfile
354
+
355
+ First, modify your Dockerfile to support a `DEV_MODE` build argument to simplify transitioning between dev and build:
356
+
357
+ ```dockerfile
358
+ # Add this at the top of your Dockerfile
359
+ ARG DEV_MODE=false
360
+
361
+ # ... your existing setup ...
325
362
 
326
- [`cursor-mcp`](https://github.com/hud-evals/cursor-mcp) turns the edit → build → restart → test loop into a single key-press and adds tools to Cursor Agent that can drive the whole workflow for you. The agent reads the MCP spec, your code, and the live server state, then proposes fixes or new tests on its own. It then has access to the MCP tools the environment provides, enabling it to test all functionality, which completes the iteration loop.
363
+ # Conditionally handle source for dev mode -- this should reflect your environment structure
364
+ RUN if [ "$DEV_MODE" = "true" ]; then \
365
+ mkdir -p /app/src/your_module && \
366
+ echo "# Stub for editable install" > /app/src/your_module/__init__.py; \
367
+ fi
327
368
 
328
- 1. Add an entry to `.cursor/mcp.json`:
369
+ # Copy source (will be overridden by volume mount in dev mode but necessary for the build in the Phase 1 recommended setup)
370
+ COPY src/ ./src/
371
+
372
+ # Install in editable mode still works!
373
+ RUN pip install -e .
374
+
375
+ # ... your existing setup ...
376
+ ```
377
+
378
+ The key insight: In dev mode, we create stub files so the package can be installed, but the actual source will come from the volume mount.
379
+
380
+ #### 2. Build the Development Image
381
+
382
+ ```bash
383
+ docker build --build-arg DEV_MODE=true -t my-environment:dev .
384
+ ```
385
+
386
+ #### 3. Configure Cursor Agent for development
387
+
388
+ Add a development configuration to `.cursor/mcp.json` that includes the volume mount:
329
389
 
330
390
  ```jsonc
331
391
  {
332
392
  "mcp_config": {
333
- "env": {
393
+ // If your production config looks like this,
394
+ "my-environment": {
334
395
  "command": "docker",
335
396
  "args": ["run", "--rm", "-i", "my-environment:latest"]
336
397
  },
337
- "cursor-manager": {
338
- "command": "uvx",
339
- "args": ["cursor-mcp"]
398
+ // This is how you make the dev mode config:
399
+ "my-environment-dev": {
400
+ "command": "docker",
401
+ "args": [
402
+ "run", "--rm", "-i",
403
+ "-v", "%cd%/src:/app/src:rw", // Windows
404
+ // "-v", "$(pwd)/src:/app/src:rw", // Linux/Mac
405
+ "-e", "PYTHONPATH=/app/src", // Required for module imports in the Phase 1 like setup
406
+ // Add your environment variables here
407
+ "my-environment:dev" // dev instead of latest!
408
+ ]
340
409
  }
341
410
  }
342
411
  }
@@ -345,36 +414,36 @@ Once all of the above works you can unleash *hundreds* of concurrent agents on y
345
414
  2. Follow the cursor rules below: rebuild, refresh, test, reflect, repeat.
346
415
  3. Keep the agent open for any messages or issues.
347
416
 
348
- ### Cursor rules – paste this once
417
+ ### 4. Cursor rules – paste this once
349
418
 
350
- Inside `.cursor/rules/mcp_environment_iteration.mdc` add (or verify) the following so the agent always knows the expected loop:
419
+ Inside `.cursor/rules/hud_environment_iteration.mdc` add (or verify) the following so the agent always knows the expected iteration loop:
351
420
 
352
421
  ```mdc
353
422
  ---
354
- description: When making an environment that launches and MCP server this is the iteration loop
423
+ description: Improve an MCP environment
355
424
  alwaysApply: false
356
425
  ---
357
- Setting up (also refer to environments/README.md):
358
- 1. Follow each environment's README.md or any other steps to set it up for the MCP server to be able to directly launch it (such as building the dockerfile)
359
- 2. Run local tests to make sure the initialize without immediate errors and stays alive until properly closed. If the server crashes within the first few seconds then the manager will not pick up on it. In this case please go back and either debug the docker run directly, or the mcp server by piping an initialization request.
360
- 3. When the server initialization is stable, use the cursor-manager tool to see the current list of tools and add it if necessary. Take note of the name.
361
- 4. When working, tell the user to send another message to refresh your list of tools.
362
-
363
- After setting up, when iterating (will not require a user message ever):
364
- 1. Look at the environment project and refine/edit/fix files
365
- 2. Follow its README to set it up for the MCP server (such as building the dockerfile)
366
- 3. Use the cursor-manager tool to refresh this server (by name)
367
- 4. See its status using cursor-manager, if it's running then follow with step 5. If it fails, then check the logs using cursor-manager and go back to step 1, but ask the user to reset.
368
- 5. Use the tools from that server (by name) to test the functionality and edge cases, reflect on the success of your TODOs and think of new things to fix. If the tools are unavailable but the status is running, then ask the user to refresh the user message.
369
- 6. Review your TODOs, update with new TODOs
370
- 7. Repeat until reached user's high level goals, or generally extremely happy with the final result
371
-
372
- In general:
373
- 1. Try to avoid running direct docker or mcp commands and use the tools. If you want to run a docker command or python mcp server command then ask permission and only use if otherwise completely impossible.
374
- 2. If at any point the docker build starts breaking on initialize, return to setting up properly
426
+ Setup
427
+ 1. Make sure the user has set up the mcp config for the environment by seeing if you have access to the tools by the given name (i.e. my-environment-dev), and make sure the title is in dev mode. If not, ask the user to make a dev version!
428
+ 2. Make sure you can find the source folder for this environment. Explore its contents and README.
429
+ 3. Clarify the objectives and ask follow up questions on the initial query to determine precise implementation details.
430
+
431
+ Iteration
432
+ 1. Use the exposed tools by the environment to interact with it. This means navigating around with a computer, editing, launching commands, whatever means accessible to you. If there are any exposed resources, try to access them to determine the structure of the calls.
433
+ 2. Based on the objectives, test and verify the functionality of different tools and parts of the environment. If any tool call responds with an error, note it down. If any interaction with the environment is wrong, unexpected, incomplete, or parts of the environment are not developed fully, note it down. If any new problem sets up wrong or evaluation does not match the expected outcome, note it down. All of these inconsistencies you should note down in your TODOs.
434
+ 3. Then, based on the TODOs, view the source folder and find the places where those errors would occur. Think about the system and how to fix it. Then fix it.
435
+ 4. After you've fixed your TODO items, go back to step 2 and test them. Test through all of your available tools, and use feedback (such as screenshots) to determine your progress. If they now work as expected, mark them as complete. If not, continue the loop from step 2. Be extremely careful, scrupolous and attentive to all details. Never assume something is working unless you've tested it fully for all of its edge cases.
436
+ 5. The only time you can exit this iteration loop is if you're adding a *new* tool, a new import package to the environment, need additional environment variables, or if there is no feasible way to create input conditions to test something. In this case, ask the user for help and recap your progress. If you're simply changing tools, changing code, and still have more realistic TODOs, the environment will refresh automatically and you should continue working. In *all* other cases, you must continue this iteration loop until you can come up with no more TODOs. You must not halt.
375
437
  ```
376
438
 
377
- The result: fast, autonomous turnaround times even for complex GUI environments.
439
+ ### 5. Prompt the agent
440
+
441
+ ```txt
442
+ Context: In the my-environment folder, I have a browser app environment. I've built a tool to interact with it called my-environment-dev.
443
+ Interaction: There are multiple tools to setup and evaluate the environment. There are also interaction tools for you to be able to move around it, and a screenshot tool to see the state. Use all of the available tools.
444
+ Objective: Please test if all setup, evaluation functions are working. This means you should come up with new problem definitions to test all functionality on. Be creative in how you pick edge cases to test on.
445
+ Rules: @hud_environment_iteration.mdc
446
+ ```
378
447
 
379
448
  ---
380
449
 
@@ -411,7 +480,7 @@ class TodoBasic:
411
480
  return {"function": "todo_completed", "args": {"expected_count": 2}}
412
481
  ```
413
482
 
414
- Decorators keep registration *next to the implementation* and avoid manual bookkeeping. The server simply exposes the combined metadata through an MCP **resource**. Follow `environments/simple_browser/src/hud_controller/problems/registry.py` as a template and expose the JSON with `@mcp.resource("problems://registry")`.
483
+ Decorators keep registration *next to the implementation* and avoid manual bookkeeping. The server simply exposes the combined metadata through an MCP **resource**. Follow `environments/browser/src/hud_controller/problems/registry.py` as a template and expose the JSON with `@mcp.resource("problems://registry")`.
415
484
 
416
485
  ### Other finishing touches
417
486
 
@@ -97,4 +97,6 @@ logs/
97
97
  *.log
98
98
 
99
99
  # Docker
100
- .dockerignore.local
100
+ .dockerignore.local
101
+
102
+ gcp.json
@@ -233,6 +233,12 @@ async def debug_mcp_docker(image: str) -> None:
233
233
 
234
234
  logger.info(f"\n{Colors.BOLD}🔍 Docker MCP Server Debugger{Colors.ENDC}")
235
235
  logger.info(f"{Colors.GRAY}Image: {image}{Colors.ENDC}")
236
+
237
+ # Show extra docker args if provided
238
+ extra_args = getattr(__builtins__, "_docker_extra_args", [])
239
+ if extra_args:
240
+ logger.info(f"{Colors.GRAY}Extra args: {' '.join(extra_args)}{Colors.ENDC}")
241
+
236
242
  logger.info(f"{Colors.GRAY}Time: {datetime.now().strftime('%Y-%m-%d %H:%M:%S')}{Colors.ENDC}")
237
243
 
238
244
  # Explain color coding
@@ -253,7 +259,9 @@ async def debug_mcp_docker(image: str) -> None:
253
259
  log_phase(1, "Basic Docker Container Test")
254
260
 
255
261
  try:
256
- cmd = ["docker", "run", "--rm", image, "echo", "Container OK"]
262
+ # Get extra docker args if provided
263
+ extra_args = getattr(__builtins__, "_docker_extra_args", [])
264
+ cmd = ["docker", "run", "--rm"] + extra_args + [image, "echo", "Container OK"]
257
265
  log_command(cmd)
258
266
 
259
267
  result = subprocess.run(cmd, capture_output=True, text=True, timeout=10)
@@ -293,7 +301,9 @@ async def debug_mcp_docker(image: str) -> None:
293
301
  }
294
302
 
295
303
  try:
296
- cmd = ["docker", "run", "--rm", "-i", image]
304
+ # Get extra docker args if provided
305
+ extra_args = getattr(__builtins__, "_docker_extra_args", [])
306
+ cmd = ["docker", "run", "--rm", "-i"] + extra_args + [image]
297
307
  log_command(cmd)
298
308
 
299
309
  log_stdio(f"Sending: {json.dumps(init_request)}")
@@ -395,7 +405,11 @@ async def debug_mcp_docker(image: str) -> None:
395
405
  try:
396
406
  from hud.mcp import MCPClient
397
407
 
398
- mcp_config = {"test": {"command": "docker", "args": ["run", "--rm", "-i", image]}}
408
+ # Get extra docker args if provided
409
+ extra_args = getattr(__builtins__, "_docker_extra_args", [])
410
+ mcp_config = {
411
+ "test": {"command": "docker", "args": ["run", "--rm", "-i"] + extra_args + [image]}
412
+ }
399
413
 
400
414
  cmd = ["docker"] + mcp_config["test"]["args"]
401
415
  log_command(cmd)
@@ -618,12 +632,18 @@ async def debug_mcp_docker(image: str) -> None:
618
632
 
619
633
  log_info(f"Baseline: Memory={baseline_memory:.1f}MB, CPU={baseline_cpu:.1f}%")
620
634
 
635
+ # Get extra docker args if provided
636
+ extra_args = getattr(__builtins__, "_docker_extra_args", [])
637
+
621
638
  # Create multiple concurrent clients
622
639
  log_info("Creating 3 concurrent MCP clients...")
623
640
 
624
641
  for i in range(3):
625
642
  client_config = {
626
- f"test_concurrent_{i}": {"command": "docker", "args": ["run", "--rm", "-i", image]}
643
+ f"test_concurrent_{i}": {
644
+ "command": "docker",
645
+ "args": ["run", "--rm", "-i"] + extra_args + [image],
646
+ }
627
647
  }
628
648
 
629
649
  concurrent_client = MCPClient(mcp_config=client_config, verbose=False)
@@ -689,11 +709,14 @@ async def debug_mcp_docker(image: str) -> None:
689
709
 
690
710
  # Still test basic concurrent connections
691
711
  try:
712
+ # Get extra docker args if provided
713
+ extra_args = getattr(__builtins__, "_docker_extra_args", [])
714
+
692
715
  for i in range(3):
693
716
  client_config = {
694
717
  f"test_concurrent_{i}": {
695
718
  "command": "docker",
696
- "args": ["run", "--rm", "-i", image],
719
+ "args": ["run", "--rm", "-i"] + extra_args + [image],
697
720
  }
698
721
  }
699
722
 
@@ -734,10 +757,29 @@ async def debug_mcp_docker(image: str) -> None:
734
757
 
735
758
 
736
759
  if __name__ == "__main__":
737
- if len(sys.argv) != 2:
738
- print("Usage: python docker_debug.py <docker-image>")
760
+ import warnings
761
+ import gc
762
+
763
+ if len(sys.argv) < 2:
764
+ print("Usage: python docker_debug.py <docker-image> [docker-args...]")
739
765
  print("Example: python docker_debug.py hudpython/gmail-clone:latest")
766
+ print(
767
+ "Example: python docker_debug.py my-env:latest -e BROWSER_PROVIDER=browserbase -e API_KEY=xxx"
768
+ )
740
769
  sys.exit(1)
741
770
 
742
771
  docker_image = sys.argv[1]
772
+ docker_extra_args = sys.argv[2:] if len(sys.argv) > 2 else []
773
+
774
+ # Suppress cleanup warnings
775
+ warnings.filterwarnings("ignore", category=ResourceWarning)
776
+
777
+ # Store extra args globally so they can be used in docker commands
778
+ import builtins
779
+
780
+ setattr(builtins, "_docker_extra_args", docker_extra_args)
781
+
743
782
  asyncio.run(debug_mcp_docker(docker_image))
783
+
784
+ # Force cleanup to avoid warnings
785
+ gc.collect()
@@ -0,0 +1 @@
1
+ gcp.json
@@ -0,0 +1,40 @@
1
+ # Build argument to control dev vs production mode
2
+ ARG DEV_MODE=false
3
+
4
+ # Use our HUD base browser image with Playwright and uv pre-installed
5
+ FROM hudpython/base-browser:latest
6
+
7
+ # Create app-specific working directory
8
+ WORKDIR /app
9
+
10
+ # Copy project files
11
+ COPY pyproject.toml ./
12
+
13
+ # Conditionally copy source or create stub for development
14
+ RUN if [ "$DEV_MODE" = "true" ]; then \
15
+ echo "Dev mode: Creating stub for editable install" && \
16
+ mkdir -p /app/src/hud_controller && \
17
+ echo "# Stub for package installation" > /app/src/hud_controller/__init__.py; \
18
+ else \
19
+ echo "Production mode: Source will be copied"; \
20
+ fi
21
+
22
+ # Copy source only in production mode (this is a no-op if DEV_MODE=true but doesn't hurt)
23
+ COPY src/ ./src/
24
+
25
+ # Install the package using the existing venv at /opt/venv
26
+ # The --python flag tells uv to use this specific Python instead of creating a new venv
27
+ RUN uv pip install --python /opt/venv -e .
28
+
29
+ # Create directories for logs and data
30
+ RUN mkdir -p /app/logs /app/data
31
+
32
+ # Environment variables (PATH and VIRTUAL_ENV already set by base image)
33
+ ENV PYTHONUNBUFFERED=1
34
+ ENV LOG_LEVEL=INFO
35
+
36
+ # Google Cloud Platform Credentials for Sheets functionality (here for Windows builds)
37
+ ENV GCP_CREDENTIALS_JSON=""
38
+
39
+ # Run the command directly - it's installed in /opt/venv which is in PATH
40
+ CMD ["hud-remote-browser"]
@@ -0,0 +1,133 @@
1
+ # HUD Remote Browser MCP Server
2
+
3
+ This MCP server provides browser automation capabilities using various remote browser providers.
4
+
5
+ ## Running with Docker
6
+
7
+ The Docker image supports both production and development modes using the same Dockerfile.
8
+
9
+ ### Building the Image
10
+
11
+ ```bash
12
+ # Production build (default)
13
+ docker build -t hud-remote-browser:latest .
14
+
15
+ # Development build (for hot-reload with volume mounts)
16
+ docker build --build-arg DEV_MODE=true -t hud-remote-browser:dev .
17
+ ```
18
+
19
+ ### Running in Production Mode
20
+
21
+ ```bash
22
+ # Using AnchorBrowser
23
+ docker run --rm -i \
24
+ -e BROWSER_PROVIDER=anchorbrowser \
25
+ -e ANCHOR_API_KEY=your-api-key \
26
+ hud-remote-browser:latest
27
+
28
+ # Using BrowserBase
29
+ docker run --rm -i \
30
+ -e BROWSER_PROVIDER=browserbase \
31
+ -e BROWSERBASE_API_KEY=your-api-key \
32
+ -e BROWSERBASE_PROJECT_ID=your-project-id \
33
+ hud-remote-browser:latest
34
+ ```
35
+
36
+ ### Running in Development Mode (Hot Reload)
37
+
38
+ Development mode allows you to edit code locally and see changes immediately without rebuilding:
39
+
40
+ ```bash
41
+ # Windows
42
+ docker run --rm -i ^
43
+ -v "%cd%\src:/app/src:rw" ^
44
+ -e BROWSER_PROVIDER=anchorbrowser ^
45
+ -e ANCHOR_API_KEY=your-api-key ^
46
+ -e PYTHONPATH=/app ^
47
+ hud-remote-browser:dev
48
+
49
+ # Linux/Mac
50
+ docker run --rm -i \
51
+ -v "$(pwd)/src:/app/src:rw" \
52
+ -e BROWSER_PROVIDER=anchorbrowser \
53
+ -e ANCHOR_API_KEY=your-api-key \
54
+ -e PYTHONPATH=/app/src \
55
+ hud-remote-browser:dev
56
+ ```
57
+
58
+ The `-v` flag mounts your local `src/` directory into the container, allowing instant code changes.
59
+
60
+ ## Supported Browser Providers
61
+
62
+ - **anchorbrowser** - Requires `ANCHOR_API_KEY`
63
+ - **browserbase** - Requires `BROWSERBASE_API_KEY` and `BROWSERBASE_PROJECT_ID`
64
+ - **hyperbrowser** - Requires `HYPERBROWSER_API_KEY`
65
+ - **steel** - Requires `STEEL_API_KEY`
66
+ - **kernel** - No additional requirements
67
+
68
+ ## Environment Variables
69
+
70
+ ### Core Variables
71
+
72
+ | Variable | Required | Description |
73
+ |----------|----------|-------------|
74
+ | `BROWSER_PROVIDER` | **Yes** | The browser provider to use |
75
+ | `LOG_LEVEL` | No | Logging level (default: INFO) |
76
+
77
+ ### Provider-Specific Variables
78
+
79
+ | Provider | Required Variables |
80
+ |----------|-------------------|
81
+ | anchorbrowser | `ANCHOR_API_KEY` |
82
+ | browserbase | `BROWSERBASE_API_KEY`, `BROWSERBASE_PROJECT_ID` |
83
+ | hyperbrowser | `HYPERBROWSER_API_KEY` |
84
+ | steel | `STEEL_API_KEY` |
85
+
86
+ ### Optional Browser Settings
87
+
88
+ | Variable | Description |
89
+ |----------|-------------|
90
+ | `HEADLESS` | Whether to run browser in headless mode |
91
+ | `DEFAULT_TIMEOUT` | Default timeout for browser operations |
92
+ | `WINDOW_WIDTH` | Browser window width |
93
+ | `WINDOW_HEIGHT` | Browser window height |
94
+ | `PROXY_URL` | HTTP proxy URL |
95
+
96
+ ### Google Cloud Platform (GCP) Credentials
97
+
98
+ For Google Sheets functionality, you can provide GCP credentials in two formats:
99
+
100
+ **Option A: Single JSON String**
101
+ ```bash
102
+ -e GCP_CREDENTIALS_JSON='{"type":"service_account","project_id":"...","private_key":"..."}'
103
+ ```
104
+
105
+ **Option B: Individual Fields**
106
+ ```bash
107
+ -e GCP_TYPE="service_account" \
108
+ -e GCP_PROJECT_ID="your-project-id" \
109
+ -e GCP_PRIVATE_KEY_ID="your-key-id" \
110
+ -e GCP_PRIVATE_KEY="-----BEGIN PRIVATE KEY-----\n..." \
111
+ -e GCP_CLIENT_EMAIL="your-service-account@project.iam.gserviceaccount.com" \
112
+ -e GCP_CLIENT_ID="your-client-id" \
113
+ -e GCP_AUTH_URI="https://accounts.google.com/o/oauth2/auth" \
114
+ -e GCP_TOKEN_URI="https://oauth2.googleapis.com/token" \
115
+ -e GCP_AUTH_PROVIDER_X509_CERT_URL="https://www.googleapis.com/oauth2/v1/certs" \
116
+ -e GCP_CLIENT_X509_CERT_URL="https://www.googleapis.com/robot/v1/metadata/x509/..."
117
+ ```
118
+
119
+ ## MCP Protocol
120
+
121
+ The server communicates via stdio using the MCP protocol. Example initialization:
122
+
123
+ ```bash
124
+ echo '{"jsonrpc": "2.0", "id": 1, "method": "initialize", "params": {...}}' | \
125
+ docker run --rm -i -e BROWSER_PROVIDER=steel -e STEEL_API_KEY=... hud-remote-browser:latest
126
+ ```
127
+
128
+ ## Error Handling
129
+
130
+ If `BROWSER_PROVIDER` is not set, the server will fail with:
131
+ ```
132
+ BROWSER_PROVIDER environment variable is required. Supported providers: anchorbrowser, steel, browserbase, hyperbrowser, kernel
133
+ ```
@@ -4,7 +4,8 @@ version = "0.1.0"
4
4
  description = "HUD Remote Browser Controller with MCP tools for cloud browser providers"
5
5
  requires-python = ">=3.11,<3.13"
6
6
  dependencies = [
7
- "hud-python",
7
+ "hud-python>=0.3.2",
8
+ "pyautogui",
8
9
  "playwright",
9
10
  "httpx",
10
11
  "typer",