hud-python 0.4.12__tar.gz → 0.4.14__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of hud-python might be problematic. Click here for more details.

Files changed (172) hide show
  1. {hud_python-0.4.12 → hud_python-0.4.14}/PKG-INFO +6 -7
  2. {hud_python-0.4.12 → hud_python-0.4.14}/environments/README.md +12 -12
  3. hud_python-0.4.14/environments/browser/README.md +213 -0
  4. {hud_python-0.4.12 → hud_python-0.4.14}/environments/remote_browser/pyproject.toml +1 -1
  5. {hud_python-0.4.12 → hud_python-0.4.14}/hud/cli/__init__.py +8 -1
  6. {hud_python-0.4.12 → hud_python-0.4.14}/hud/cli/dev.py +41 -13
  7. {hud_python-0.4.12 → hud_python-0.4.14}/hud/cli/eval.py +36 -31
  8. hud_python-0.4.14/hud/cli/init.py +658 -0
  9. {hud_python-0.4.12 → hud_python-0.4.14}/hud/cli/list_func.py +1 -1
  10. {hud_python-0.4.12 → hud_python-0.4.14}/hud/clients/fastmcp.py +2 -12
  11. {hud_python-0.4.12 → hud_python-0.4.14}/hud/clients/mcp_use.py +1 -7
  12. {hud_python-0.4.12 → hud_python-0.4.14}/hud/otel/instrumentation.py +5 -1
  13. {hud_python-0.4.12 → hud_python-0.4.14}/hud/server/server.py +1 -1
  14. {hud_python-0.4.12 → hud_python-0.4.14}/hud/utils/tests/test_version.py +1 -1
  15. {hud_python-0.4.12 → hud_python-0.4.14}/hud/version.py +1 -1
  16. {hud_python-0.4.12 → hud_python-0.4.14}/pyproject.toml +4 -5
  17. hud_python-0.4.12/environments/browser/README.md +0 -447
  18. hud_python-0.4.12/environments/browser/src/hud_controller/README.md +0 -117
  19. hud_python-0.4.12/hud/cli/init.py +0 -279
  20. {hud_python-0.4.12 → hud_python-0.4.14}/.gitignore +0 -0
  21. {hud_python-0.4.12 → hud_python-0.4.14}/LICENSE +0 -0
  22. {hud_python-0.4.12 → hud_python-0.4.14}/README.md +0 -0
  23. {hud_python-0.4.12 → hud_python-0.4.14}/environments/browser/apps/2048/README.md +0 -0
  24. {hud_python-0.4.12 → hud_python-0.4.14}/environments/browser/apps/2048/backend/pyproject.toml +0 -0
  25. {hud_python-0.4.12 → hud_python-0.4.14}/environments/browser/apps/README.md +0 -0
  26. {hud_python-0.4.12 → hud_python-0.4.14}/environments/browser/apps/todo/README.md +0 -0
  27. {hud_python-0.4.12 → hud_python-0.4.14}/environments/browser/apps/todo/backend/pyproject.toml +0 -0
  28. {hud_python-0.4.12 → hud_python-0.4.14}/environments/browser/pyproject.toml +0 -0
  29. {hud_python-0.4.12 → hud_python-0.4.14}/environments/remote_browser/README.md +0 -0
  30. {hud_python-0.4.12 → hud_python-0.4.14}/environments/remote_browser/src/hud_controller/providers/README.md +0 -0
  31. {hud_python-0.4.12 → hud_python-0.4.14}/environments/text_2048/README.md +0 -0
  32. {hud_python-0.4.12 → hud_python-0.4.14}/environments/text_2048/pyproject.toml +0 -0
  33. {hud_python-0.4.12 → hud_python-0.4.14}/examples/README.md +0 -0
  34. {hud_python-0.4.12 → hud_python-0.4.14}/hud/__init__.py +0 -0
  35. {hud_python-0.4.12 → hud_python-0.4.14}/hud/__main__.py +0 -0
  36. {hud_python-0.4.12 → hud_python-0.4.14}/hud/agents/__init__.py +0 -0
  37. {hud_python-0.4.12 → hud_python-0.4.14}/hud/agents/base.py +0 -0
  38. {hud_python-0.4.12 → hud_python-0.4.14}/hud/agents/claude.py +0 -0
  39. {hud_python-0.4.12 → hud_python-0.4.14}/hud/agents/langchain.py +0 -0
  40. {hud_python-0.4.12 → hud_python-0.4.14}/hud/agents/misc/__init__.py +0 -0
  41. {hud_python-0.4.12 → hud_python-0.4.14}/hud/agents/misc/response_agent.py +0 -0
  42. {hud_python-0.4.12 → hud_python-0.4.14}/hud/agents/openai.py +0 -0
  43. {hud_python-0.4.12 → hud_python-0.4.14}/hud/agents/openai_chat_generic.py +0 -0
  44. {hud_python-0.4.12 → hud_python-0.4.14}/hud/agents/tests/__init__.py +0 -0
  45. {hud_python-0.4.12 → hud_python-0.4.14}/hud/agents/tests/test_base.py +0 -0
  46. {hud_python-0.4.12 → hud_python-0.4.14}/hud/agents/tests/test_claude.py +0 -0
  47. {hud_python-0.4.12 → hud_python-0.4.14}/hud/agents/tests/test_client.py +0 -0
  48. {hud_python-0.4.12 → hud_python-0.4.14}/hud/agents/tests/test_openai.py +0 -0
  49. {hud_python-0.4.12 → hud_python-0.4.14}/hud/cli/__main__.py +0 -0
  50. {hud_python-0.4.12 → hud_python-0.4.14}/hud/cli/analyze.py +0 -0
  51. {hud_python-0.4.12 → hud_python-0.4.14}/hud/cli/build.py +0 -0
  52. {hud_python-0.4.12 → hud_python-0.4.14}/hud/cli/clone.py +0 -0
  53. {hud_python-0.4.12 → hud_python-0.4.14}/hud/cli/debug.py +0 -0
  54. {hud_python-0.4.12 → hud_python-0.4.14}/hud/cli/pull.py +0 -0
  55. {hud_python-0.4.12 → hud_python-0.4.14}/hud/cli/push.py +0 -0
  56. {hud_python-0.4.12 → hud_python-0.4.14}/hud/cli/remove.py +0 -0
  57. {hud_python-0.4.12 → hud_python-0.4.14}/hud/cli/tests/__init__.py +0 -0
  58. {hud_python-0.4.12 → hud_python-0.4.14}/hud/cli/tests/test_analyze.py +0 -0
  59. {hud_python-0.4.12 → hud_python-0.4.14}/hud/cli/tests/test_analyze_metadata.py +0 -0
  60. {hud_python-0.4.12 → hud_python-0.4.14}/hud/cli/tests/test_build.py +0 -0
  61. {hud_python-0.4.12 → hud_python-0.4.14}/hud/cli/tests/test_cli_init.py +0 -0
  62. {hud_python-0.4.12 → hud_python-0.4.14}/hud/cli/tests/test_cli_main.py +0 -0
  63. {hud_python-0.4.12 → hud_python-0.4.14}/hud/cli/tests/test_clone.py +0 -0
  64. {hud_python-0.4.12 → hud_python-0.4.14}/hud/cli/tests/test_cursor.py +0 -0
  65. {hud_python-0.4.12 → hud_python-0.4.14}/hud/cli/tests/test_debug.py +0 -0
  66. {hud_python-0.4.12 → hud_python-0.4.14}/hud/cli/tests/test_list_func.py +0 -0
  67. {hud_python-0.4.12 → hud_python-0.4.14}/hud/cli/tests/test_main_module.py +0 -0
  68. {hud_python-0.4.12 → hud_python-0.4.14}/hud/cli/tests/test_mcp_server.py +0 -0
  69. {hud_python-0.4.12 → hud_python-0.4.14}/hud/cli/tests/test_pull.py +0 -0
  70. {hud_python-0.4.12 → hud_python-0.4.14}/hud/cli/tests/test_push.py +0 -0
  71. {hud_python-0.4.12 → hud_python-0.4.14}/hud/cli/tests/test_registry.py +0 -0
  72. {hud_python-0.4.12 → hud_python-0.4.14}/hud/cli/tests/test_utils.py +0 -0
  73. {hud_python-0.4.12 → hud_python-0.4.14}/hud/cli/utils/__init__.py +0 -0
  74. {hud_python-0.4.12 → hud_python-0.4.14}/hud/cli/utils/cursor.py +0 -0
  75. {hud_python-0.4.12 → hud_python-0.4.14}/hud/cli/utils/docker.py +0 -0
  76. {hud_python-0.4.12 → hud_python-0.4.14}/hud/cli/utils/environment.py +0 -0
  77. {hud_python-0.4.12 → hud_python-0.4.14}/hud/cli/utils/interactive.py +0 -0
  78. {hud_python-0.4.12 → hud_python-0.4.14}/hud/cli/utils/logging.py +0 -0
  79. {hud_python-0.4.12 → hud_python-0.4.14}/hud/cli/utils/metadata.py +0 -0
  80. {hud_python-0.4.12 → hud_python-0.4.14}/hud/cli/utils/registry.py +0 -0
  81. {hud_python-0.4.12 → hud_python-0.4.14}/hud/cli/utils/remote_runner.py +0 -0
  82. {hud_python-0.4.12 → hud_python-0.4.14}/hud/cli/utils/runner.py +0 -0
  83. {hud_python-0.4.12 → hud_python-0.4.14}/hud/cli/utils/server.py +0 -0
  84. {hud_python-0.4.12 → hud_python-0.4.14}/hud/clients/README.md +0 -0
  85. {hud_python-0.4.12 → hud_python-0.4.14}/hud/clients/__init__.py +0 -0
  86. {hud_python-0.4.12 → hud_python-0.4.14}/hud/clients/base.py +0 -0
  87. {hud_python-0.4.12 → hud_python-0.4.14}/hud/clients/tests/__init__.py +0 -0
  88. {hud_python-0.4.12 → hud_python-0.4.14}/hud/clients/tests/test_client_integration.py +0 -0
  89. {hud_python-0.4.12 → hud_python-0.4.14}/hud/clients/tests/test_fastmcp.py +0 -0
  90. {hud_python-0.4.12 → hud_python-0.4.14}/hud/clients/tests/test_protocol.py +0 -0
  91. {hud_python-0.4.12 → hud_python-0.4.14}/hud/clients/utils/__init__.py +0 -0
  92. {hud_python-0.4.12 → hud_python-0.4.14}/hud/clients/utils/retry_transport.py +0 -0
  93. {hud_python-0.4.12 → hud_python-0.4.14}/hud/datasets.py +0 -0
  94. {hud_python-0.4.12 → hud_python-0.4.14}/hud/misc/__init__.py +0 -0
  95. {hud_python-0.4.12 → hud_python-0.4.14}/hud/misc/claude_plays_pokemon.py +0 -0
  96. {hud_python-0.4.12 → hud_python-0.4.14}/hud/otel/__init__.py +0 -0
  97. {hud_python-0.4.12 → hud_python-0.4.14}/hud/otel/collector.py +0 -0
  98. {hud_python-0.4.12 → hud_python-0.4.14}/hud/otel/config.py +0 -0
  99. {hud_python-0.4.12 → hud_python-0.4.14}/hud/otel/context.py +0 -0
  100. {hud_python-0.4.12 → hud_python-0.4.14}/hud/otel/exporters.py +0 -0
  101. {hud_python-0.4.12 → hud_python-0.4.14}/hud/otel/processors.py +0 -0
  102. {hud_python-0.4.12 → hud_python-0.4.14}/hud/otel/tests/__init__.py +0 -0
  103. {hud_python-0.4.12 → hud_python-0.4.14}/hud/otel/tests/test_processors.py +0 -0
  104. {hud_python-0.4.12 → hud_python-0.4.14}/hud/py.typed +0 -0
  105. {hud_python-0.4.12 → hud_python-0.4.14}/hud/server/__init__.py +0 -0
  106. {hud_python-0.4.12 → hud_python-0.4.14}/hud/server/context.py +0 -0
  107. {hud_python-0.4.12 → hud_python-0.4.14}/hud/server/helper/__init__.py +0 -0
  108. {hud_python-0.4.12 → hud_python-0.4.14}/hud/server/low_level.py +0 -0
  109. {hud_python-0.4.12 → hud_python-0.4.14}/hud/server/tests/__init__.py +0 -0
  110. {hud_python-0.4.12 → hud_python-0.4.14}/hud/settings.py +0 -0
  111. {hud_python-0.4.12 → hud_python-0.4.14}/hud/shared/__init__.py +0 -0
  112. {hud_python-0.4.12 → hud_python-0.4.14}/hud/shared/exceptions.py +0 -0
  113. {hud_python-0.4.12 → hud_python-0.4.14}/hud/shared/requests.py +0 -0
  114. {hud_python-0.4.12 → hud_python-0.4.14}/hud/shared/tests/__init__.py +0 -0
  115. {hud_python-0.4.12 → hud_python-0.4.14}/hud/shared/tests/test_exceptions.py +0 -0
  116. {hud_python-0.4.12 → hud_python-0.4.14}/hud/shared/tests/test_requests.py +0 -0
  117. {hud_python-0.4.12 → hud_python-0.4.14}/hud/telemetry/__init__.py +0 -0
  118. {hud_python-0.4.12 → hud_python-0.4.14}/hud/telemetry/instrument.py +0 -0
  119. {hud_python-0.4.12 → hud_python-0.4.14}/hud/telemetry/job.py +0 -0
  120. {hud_python-0.4.12 → hud_python-0.4.14}/hud/telemetry/replay.py +0 -0
  121. {hud_python-0.4.12 → hud_python-0.4.14}/hud/telemetry/tests/__init__.py +0 -0
  122. {hud_python-0.4.12 → hud_python-0.4.14}/hud/telemetry/tests/test_replay.py +0 -0
  123. {hud_python-0.4.12 → hud_python-0.4.14}/hud/telemetry/tests/test_trace.py +0 -0
  124. {hud_python-0.4.12 → hud_python-0.4.14}/hud/telemetry/trace.py +0 -0
  125. {hud_python-0.4.12 → hud_python-0.4.14}/hud/tools/__init__.py +0 -0
  126. {hud_python-0.4.12 → hud_python-0.4.14}/hud/tools/base.py +0 -0
  127. {hud_python-0.4.12 → hud_python-0.4.14}/hud/tools/bash.py +0 -0
  128. {hud_python-0.4.12 → hud_python-0.4.14}/hud/tools/computer/__init__.py +0 -0
  129. {hud_python-0.4.12 → hud_python-0.4.14}/hud/tools/computer/anthropic.py +0 -0
  130. {hud_python-0.4.12 → hud_python-0.4.14}/hud/tools/computer/hud.py +0 -0
  131. {hud_python-0.4.12 → hud_python-0.4.14}/hud/tools/computer/openai.py +0 -0
  132. {hud_python-0.4.12 → hud_python-0.4.14}/hud/tools/computer/settings.py +0 -0
  133. {hud_python-0.4.12 → hud_python-0.4.14}/hud/tools/edit.py +0 -0
  134. {hud_python-0.4.12 → hud_python-0.4.14}/hud/tools/executors/__init__.py +0 -0
  135. {hud_python-0.4.12 → hud_python-0.4.14}/hud/tools/executors/base.py +0 -0
  136. {hud_python-0.4.12 → hud_python-0.4.14}/hud/tools/executors/pyautogui.py +0 -0
  137. {hud_python-0.4.12 → hud_python-0.4.14}/hud/tools/executors/tests/__init__.py +0 -0
  138. {hud_python-0.4.12 → hud_python-0.4.14}/hud/tools/executors/tests/test_base_executor.py +0 -0
  139. {hud_python-0.4.12 → hud_python-0.4.14}/hud/tools/executors/tests/test_pyautogui_executor.py +0 -0
  140. {hud_python-0.4.12 → hud_python-0.4.14}/hud/tools/executors/xdo.py +0 -0
  141. {hud_python-0.4.12 → hud_python-0.4.14}/hud/tools/playwright.py +0 -0
  142. {hud_python-0.4.12 → hud_python-0.4.14}/hud/tools/response.py +0 -0
  143. {hud_python-0.4.12 → hud_python-0.4.14}/hud/tools/tests/__init__.py +0 -0
  144. {hud_python-0.4.12 → hud_python-0.4.14}/hud/tools/tests/test_base.py +0 -0
  145. {hud_python-0.4.12 → hud_python-0.4.14}/hud/tools/tests/test_bash.py +0 -0
  146. {hud_python-0.4.12 → hud_python-0.4.14}/hud/tools/tests/test_bash_extended.py +0 -0
  147. {hud_python-0.4.12 → hud_python-0.4.14}/hud/tools/tests/test_computer.py +0 -0
  148. {hud_python-0.4.12 → hud_python-0.4.14}/hud/tools/tests/test_computer_actions.py +0 -0
  149. {hud_python-0.4.12 → hud_python-0.4.14}/hud/tools/tests/test_edit.py +0 -0
  150. {hud_python-0.4.12 → hud_python-0.4.14}/hud/tools/tests/test_init.py +0 -0
  151. {hud_python-0.4.12 → hud_python-0.4.14}/hud/tools/tests/test_playwright_tool.py +0 -0
  152. {hud_python-0.4.12 → hud_python-0.4.14}/hud/tools/tests/test_response.py +0 -0
  153. {hud_python-0.4.12 → hud_python-0.4.14}/hud/tools/tests/test_tools.py +0 -0
  154. {hud_python-0.4.12 → hud_python-0.4.14}/hud/tools/tests/test_tools_init.py +0 -0
  155. {hud_python-0.4.12 → hud_python-0.4.14}/hud/tools/tests/test_utils.py +0 -0
  156. {hud_python-0.4.12 → hud_python-0.4.14}/hud/tools/types.py +0 -0
  157. {hud_python-0.4.12 → hud_python-0.4.14}/hud/tools/utils.py +0 -0
  158. {hud_python-0.4.12 → hud_python-0.4.14}/hud/types.py +0 -0
  159. {hud_python-0.4.12 → hud_python-0.4.14}/hud/utils/__init__.py +0 -0
  160. {hud_python-0.4.12 → hud_python-0.4.14}/hud/utils/async_utils.py +0 -0
  161. {hud_python-0.4.12 → hud_python-0.4.14}/hud/utils/design.py +0 -0
  162. {hud_python-0.4.12 → hud_python-0.4.14}/hud/utils/mcp.py +0 -0
  163. {hud_python-0.4.12 → hud_python-0.4.14}/hud/utils/progress.py +0 -0
  164. {hud_python-0.4.12 → hud_python-0.4.14}/hud/utils/telemetry.py +0 -0
  165. {hud_python-0.4.12 → hud_python-0.4.14}/hud/utils/tests/__init__.py +0 -0
  166. {hud_python-0.4.12 → hud_python-0.4.14}/hud/utils/tests/test_async_utils.py +0 -0
  167. {hud_python-0.4.12 → hud_python-0.4.14}/hud/utils/tests/test_init.py +0 -0
  168. {hud_python-0.4.12 → hud_python-0.4.14}/hud/utils/tests/test_mcp.py +0 -0
  169. {hud_python-0.4.12 → hud_python-0.4.14}/hud/utils/tests/test_progress.py +0 -0
  170. {hud_python-0.4.12 → hud_python-0.4.14}/hud/utils/tests/test_telemetry.py +0 -0
  171. {hud_python-0.4.12 → hud_python-0.4.14}/rl/README.md +0 -0
  172. {hud_python-0.4.12 → hud_python-0.4.14}/rl/pyproject.toml +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: hud-python
3
- Version: 0.4.12
3
+ Version: 0.4.14
4
4
  Summary: SDK for the HUD platform.
5
5
  Project-URL: Homepage, https://github.com/hud-evals/hud-python
6
6
  Project-URL: Bug Tracker, https://github.com/hud-evals/hud-python/issues
@@ -35,10 +35,9 @@ Classifier: Programming Language :: Python :: 3.11
35
35
  Classifier: Programming Language :: Python :: 3.12
36
36
  Classifier: Programming Language :: Python :: 3.13
37
37
  Requires-Python: <3.14,>=3.11
38
- Requires-Dist: fastmcp>=2.11.2
39
38
  Requires-Dist: httpx<1,>=0.23.0
40
- Requires-Dist: hud-mcp-python-sdk>=0.1.0
41
- Requires-Dist: mcp>=1.13.1
39
+ Requires-Dist: hud-fastmcp-python-sdk>=0.1.2
40
+ Requires-Dist: hud-mcp-python-sdk>=3.13.2
42
41
  Requires-Dist: opentelemetry-api>=1.34.1
43
42
  Requires-Dist: opentelemetry-exporter-otlp-proto-http>=1.34.1
44
43
  Requires-Dist: opentelemetry-instrumentation-mcp>=0.44.1
@@ -56,6 +55,7 @@ Provides-Extra: agent
56
55
  Requires-Dist: anthropic; extra == 'agent'
57
56
  Requires-Dist: datasets>=2.14.0; extra == 'agent'
58
57
  Requires-Dist: dotenv>=0.9.9; extra == 'agent'
58
+ Requires-Dist: hud-mcp-use-python-sdk>=2.3.13; extra == 'agent'
59
59
  Requires-Dist: ipykernel; extra == 'agent'
60
60
  Requires-Dist: ipython<9; extra == 'agent'
61
61
  Requires-Dist: jupyter-client; extra == 'agent'
@@ -63,13 +63,13 @@ Requires-Dist: jupyter-core; extra == 'agent'
63
63
  Requires-Dist: langchain; extra == 'agent'
64
64
  Requires-Dist: langchain-anthropic; extra == 'agent'
65
65
  Requires-Dist: langchain-openai; extra == 'agent'
66
- Requires-Dist: mcp-use; extra == 'agent'
67
66
  Requires-Dist: numpy>=1.24.0; extra == 'agent'
68
67
  Requires-Dist: openai; extra == 'agent'
69
68
  Provides-Extra: agents
70
69
  Requires-Dist: anthropic; extra == 'agents'
71
70
  Requires-Dist: datasets>=2.14.0; extra == 'agents'
72
71
  Requires-Dist: dotenv>=0.9.9; extra == 'agents'
72
+ Requires-Dist: hud-mcp-use-python-sdk>=2.3.13; extra == 'agents'
73
73
  Requires-Dist: ipykernel; extra == 'agents'
74
74
  Requires-Dist: ipython<9; extra == 'agents'
75
75
  Requires-Dist: jupyter-client; extra == 'agents'
@@ -77,7 +77,6 @@ Requires-Dist: jupyter-core; extra == 'agents'
77
77
  Requires-Dist: langchain; extra == 'agents'
78
78
  Requires-Dist: langchain-anthropic; extra == 'agents'
79
79
  Requires-Dist: langchain-openai; extra == 'agents'
80
- Requires-Dist: mcp-use; extra == 'agents'
81
80
  Requires-Dist: numpy>=1.24.0; extra == 'agents'
82
81
  Requires-Dist: openai; extra == 'agents'
83
82
  Provides-Extra: dev
@@ -85,6 +84,7 @@ Requires-Dist: aiodocker>=0.24.0; extra == 'dev'
85
84
  Requires-Dist: anthropic; extra == 'dev'
86
85
  Requires-Dist: datasets>=2.14.0; extra == 'dev'
87
86
  Requires-Dist: dotenv>=0.9.9; extra == 'dev'
87
+ Requires-Dist: hud-mcp-use-python-sdk>=2.3.13; extra == 'dev'
88
88
  Requires-Dist: inspect-ai>=0.3.80; extra == 'dev'
89
89
  Requires-Dist: ipykernel; extra == 'dev'
90
90
  Requires-Dist: ipython<9; extra == 'dev'
@@ -93,7 +93,6 @@ Requires-Dist: jupyter-core; extra == 'dev'
93
93
  Requires-Dist: langchain; extra == 'dev'
94
94
  Requires-Dist: langchain-anthropic; extra == 'dev'
95
95
  Requires-Dist: langchain-openai; extra == 'dev'
96
- Requires-Dist: mcp-use; extra == 'dev'
97
96
  Requires-Dist: numpy>=1.24.0; extra == 'dev'
98
97
  Requires-Dist: openai; extra == 'dev'
99
98
  Requires-Dist: pillow>=11.1.0; extra == 'dev'
@@ -351,7 +351,7 @@ from . import basic, advanced # This registers all @setup.tool() decorated func
351
351
 
352
352
  # In setup/basic.py
353
353
  from . import setup
354
- from hud.tools.types import SetupResult
354
+ from mcp.types import TextContent
355
355
 
356
356
  @setup.tool()
357
357
  async def reset(**kwargs):
@@ -361,14 +361,14 @@ async def reset(**kwargs):
361
361
  **kwargs: Additional parameters
362
362
 
363
363
  Returns:
364
- SetupResult
364
+ TextContent
365
365
  """
366
366
  # Access environment from the hub
367
367
  env = setup.env
368
368
  await env.reset_state()
369
- return SetupResult(
370
- content="Environment reset to initial state",
371
- info={"status": "success"}
369
+ return TextContent(
370
+ text="Environment reset to initial state",
371
+ type="text"
372
372
  )
373
373
 
374
374
  @setup.tool()
@@ -379,14 +379,14 @@ async def seed_data(num_items: int = 5):
379
379
  num_items: Number of items to create
380
380
 
381
381
  Returns:
382
- SetupResult
382
+ TextContent
383
383
  """
384
384
  # Access environment from the hub
385
385
  env = setup.env
386
386
  items = await env.create_items(num_items)
387
- return SetupResult(
388
- content=f"Created {len(items)} items",
389
- info={"items_created": len(items)}
387
+ return TextContent(
388
+ text=f"Created {len(items)} items",
389
+ type="text"
390
390
  )
391
391
 
392
392
  # In evaluate/__init__.py
@@ -827,13 +827,13 @@ Before making changes:
827
827
  ```python
828
828
  # In setup/my_new_setup.py
829
829
  from . import setup
830
- from hud.tools import BaseSetup, SetupResult
830
+ from hud.tools import BaseSetup, TextContent
831
831
 
832
832
  @setup("my_new_setup", description="Clear description of what this does")
833
833
  class MyNewSetup(BaseSetup):
834
- async def __call__(self, context, param1: str, param2: int = 10) -> SetupResult:
834
+ async def __call__(self, context, param1: str, param2: int = 10) -> TextContent:
835
835
  # Implementation
836
- return {"status": "success", "details": "..."}
836
+ return TextContent(...)
837
837
  ```
838
838
 
839
839
  **Adding New Evaluators**
@@ -0,0 +1,213 @@
1
+ # Browser Environment
2
+
3
+ A browser automation environment for HUD that provides GUI access and web app interaction capabilities. This environment supports hot-reloading during development while maintaining persistent state.
4
+
5
+ ## Architecture Overview
6
+
7
+ The browser environment uses a two-process architecture:
8
+
9
+ 1. **Context Server** (`context.py`): Long-running process that maintains persistent state
10
+ 2. **MCP Server** (`server.py`): Hot-reloadable process that handles tool requests
11
+
12
+ ### Key Components
13
+
14
+ - **BrowserContext**: Stores persistent state (running apps, ports, playwright instance)
15
+ - **ServiceManager**: Manages X11, VNC, and app processes
16
+ - **BaseHub Tools**: Setup and evaluate tools organized by app (2048, todo)
17
+ - **Multiprocessing Proxy**: Enables state sharing between processes
18
+
19
+ ## Context Management and Common Pitfalls
20
+
21
+ ### Understanding the Proxy System
22
+
23
+ The browser environment uses Python's `multiprocessing.Manager` to share state between the context server and MCP server. This introduces important constraints:
24
+
25
+ #### ❌ Common Pitfall: Unpicklable Objects
26
+
27
+ ```python
28
+ # BAD: This will fail with "cannot pickle 'coroutine' object"
29
+ @setup.tool("my_tool")
30
+ async def my_tool():
31
+ env = setup.env
32
+ result = await env.call_app_api("app", "/api/endpoint") # Returns coroutine
33
+ # The coroutine can't be serialized through the proxy!
34
+ ```
35
+
36
+ #### ✅ Solution: Direct HTTP Calls
37
+
38
+ ```python
39
+ # GOOD: Make HTTP calls directly
40
+ @setup.tool("my_tool")
41
+ async def my_tool():
42
+ import httpx
43
+
44
+ # Get the backend port from persistent context
45
+ persistent_ctx = setup.env
46
+ backend_port = persistent_ctx.get_app_backend_port("app")
47
+
48
+ # Make API call directly
49
+ url = f"http://localhost:{backend_port}/api/endpoint"
50
+ async with httpx.AsyncClient() as client:
51
+ response = await client.get(url)
52
+ response.raise_for_status()
53
+ result = response.json()
54
+ ```
55
+
56
+ ### State Synchronization Issues
57
+
58
+ #### ❌ Common Pitfall: Direct List/Dict Manipulation
59
+
60
+ ```python
61
+ # BAD: Regular Python lists don't sync through proxy
62
+ class ServiceManager:
63
+ def __init__(self):
64
+ self._launched_apps = [] # Won't sync!
65
+ ```
66
+
67
+ #### ✅ Solution: Store State in Persistent Context
68
+
69
+ ```python
70
+ # GOOD: Use the persistent context for shared state
71
+ class BrowserContext:
72
+ def __init__(self):
73
+ self._running_apps: List[str] = []
74
+ self._app_ports: Dict[str, Dict[str, int]] = {}
75
+
76
+ def add_running_app(self, app_name: str) -> None:
77
+ """Add app to running list."""
78
+ if app_name not in self._running_apps:
79
+ self._running_apps.append(app_name)
80
+ ```
81
+
82
+ ### Accessing Shared Resources
83
+
84
+ #### ❌ Common Pitfall: Direct Attribute Access
85
+
86
+ ```python
87
+ # BAD: Direct attribute access on proxy objects
88
+ playwright_tool = env.playwright # May not work with proxy
89
+ ```
90
+
91
+ #### ✅ Solution: Use Getter Methods
92
+
93
+ ```python
94
+ # GOOD: Use proxy-friendly getter methods
95
+ playwright_tool = persistent_ctx.get_playwright_tool()
96
+ ```
97
+
98
+ ## Best Practices
99
+
100
+ ### 1. Tool Implementation Pattern
101
+
102
+ All setup and evaluate tools should follow this pattern:
103
+
104
+ ```python
105
+ @setup.tool("tool_name")
106
+ async def tool_name(param1: type, param2: type):
107
+ """Tool description."""
108
+ try:
109
+ # Get persistent context
110
+ persistent_ctx = setup.env # or evaluate.env
111
+
112
+ # Get app ports
113
+ backend_port = persistent_ctx.get_app_backend_port("app_name")
114
+
115
+ # Make HTTP request
116
+ url = f"http://localhost:{backend_port}/api/endpoint"
117
+ async with httpx.AsyncClient() as client:
118
+ response = await client.method(url, json=data)
119
+ response.raise_for_status()
120
+ result = response.json()
121
+
122
+ # Return result
123
+ return TextContent(
124
+ text=f"Success message",
125
+ type="text"
126
+ )
127
+ except Exception as e:
128
+ logger.error(f"tool_name failed: {e}")
129
+ return TextContent(
130
+ text=f"Failed: {str(e)}",
131
+ type="text"
132
+ )
133
+ ```
134
+
135
+ ### 2. App Launch Pattern
136
+
137
+ When launching apps, ensure ports are stored in the persistent context:
138
+
139
+ ```python
140
+ # In launch_app tool
141
+ app_info = await service_manager.launch_app(app_name)
142
+
143
+ # Store ports in persistent context for later access
144
+ try:
145
+ backend_port = service_manager.get_app_port(app_name)
146
+ frontend_port = service_manager.get_app_frontend_port(app_name)
147
+ persistent_ctx.set_app_ports(app_name, frontend_port, backend_port)
148
+ except Exception as e:
149
+ logger.error(f"Failed to store ports: {e}")
150
+
151
+ # Track app in persistent context
152
+ persistent_ctx.add_running_app(app_name)
153
+ ```
154
+
155
+ ### 3. Import Organization
156
+
157
+ Keep imports at module level:
158
+
159
+ ```python
160
+ # At top of file
161
+ import logging
162
+ import httpx
163
+ from mcp.types import TextContent
164
+ from . import setup
165
+
166
+ # Not inside functions
167
+ ```
168
+
169
+ ## Troubleshooting
170
+
171
+ ### "Cannot pickle 'coroutine' object"
172
+
173
+ **Cause**: Trying to return an async function result through the proxy.
174
+
175
+ **Fix**: Don't use async methods on proxied objects. Make direct HTTP calls instead.
176
+
177
+ ### "App not launched" errors
178
+
179
+ **Cause**: State synchronization issue between ServiceManager and persistent context.
180
+
181
+ **Fix**: Ensure `launch_app` stores app info in the persistent context, and setup/evaluate tools check the persistent context's app list.
182
+
183
+ ### "Object has no attribute" on proxy objects
184
+
185
+ **Cause**: Direct attribute access on multiprocessing proxy objects.
186
+
187
+ **Fix**: Use getter/setter methods instead of direct attribute access.
188
+
189
+ ## Development Workflow
190
+
191
+ 1. **Start the environment**: `hud dev`
192
+ 2. **Make changes**: Edit tools in `src/hud_controller/`
193
+ 3. **Test immediately**: The MCP server hot-reloads automatically
194
+ 4. **Check logs**: Look for serialization or proxy errors
195
+
196
+ ## Adding New Apps
197
+
198
+ 1. Create app directory in `apps/`
199
+ 2. Add setup tools in `src/hud_controller/setup/app_name.py`
200
+ 3. Add evaluate tools in `src/hud_controller/evaluate/app_name.py`
201
+ 4. Follow the HTTP pattern - no `call_app_api` usage
202
+ 5. Store app ports in persistent context when launching
203
+
204
+ ## Key Files
205
+
206
+ - `context.py`: Persistent state management
207
+ - `server.py`: MCP server and tool definitions
208
+ - `services.py`: Process management for X11, VNC, apps
209
+ - `setup/`: Setup tools organized by app
210
+ - `evaluate/`: Evaluation tools organized by app
211
+
212
+ Remember: When in doubt, make direct HTTP calls and store state in the persistent context!
213
+
@@ -3,7 +3,7 @@ name = "hud-remote-browser"
3
3
  version = "0.1.0"
4
4
  description = "HUD Remote Browser Controller with MCP tools for cloud browser providers"
5
5
  requires-python = ">=3.11,<3.13"
6
- dependencies = [ "hud-python==0.4.12", "pyautogui", "playwright", "httpx", "typer", "google-api-python-client", "google-auth",]
6
+ dependencies = [ "hud-python>=0.4.12", "pyautogui", "playwright", "httpx", "typer", "google-api-python-client", "google-auth",]
7
7
 
8
8
  [build-system]
9
9
  requires = [ "hatchling",]
@@ -348,6 +348,11 @@ def dev(
348
348
  ),
349
349
  port: int = typer.Option(8765, "--port", "-p", help="HTTP server port (ignored for stdio)"),
350
350
  no_reload: bool = typer.Option(False, "--no-reload", help="Disable hot-reload"),
351
+ full_reload: bool = typer.Option(
352
+ False,
353
+ "--full-reload",
354
+ help="Restart entire container on file changes (instead of just server process)",
355
+ ),
351
356
  verbose: bool = typer.Option(False, "--verbose", "-v", help="Show server logs"),
352
357
  inspector: bool = typer.Option(
353
358
  False, "--inspector", help="Launch MCP Inspector (HTTP mode only)"
@@ -375,12 +380,13 @@ def dev(
375
380
  hud dev . --inspector # Launch MCP Inspector (HTTP mode only)
376
381
  hud dev . --interactive # Launch interactive testing mode (HTTP mode only)
377
382
  hud dev . --no-logs # Disable Docker log streaming
383
+ hud dev . --full-reload # Restart entire container on file changes (instead of just server)
378
384
 
379
385
  # With Docker arguments (after all options):
380
386
  hud dev . -e BROWSER_PROVIDER=anchorbrowser -e ANCHOR_API_KEY=xxx
381
387
  hud dev . -e API_KEY=secret -v /tmp/data:/data --network host
382
388
  hud dev . --build -e DEBUG=true --memory 2g
383
- """
389
+ """ # noqa: E501
384
390
  # Parse directory and Docker arguments
385
391
  if params:
386
392
  directory = params[0]
@@ -397,6 +403,7 @@ def dev(
397
403
  transport,
398
404
  port,
399
405
  no_reload,
406
+ full_reload,
400
407
  verbose,
401
408
  inspector,
402
409
  no_logs,
@@ -35,6 +35,7 @@ def create_proxy_server(
35
35
  directory: str | Path,
36
36
  image_name: str,
37
37
  no_reload: bool = False,
38
+ full_reload: bool = False,
38
39
  verbose: bool = False,
39
40
  docker_args: list[str] | None = None,
40
41
  interactive: bool = False,
@@ -48,8 +49,12 @@ def create_proxy_server(
48
49
  design.warning(f"Could not extract CMD from {image_name}, using default")
49
50
  original_cmd = ["python", "-m", "hud_controller.server"]
50
51
 
51
- # Generate container name from image
52
- container_name = f"{image_name.replace(':', '-').replace('/', '-')}"
52
+ # Generate unique container name from image to avoid conflicts between multiple instances
53
+ import os
54
+
55
+ pid = str(os.getpid())[-6:] # Last 6 digits of process ID for uniqueness
56
+ base_name = image_name.replace(":", "-").replace("/", "-")
57
+ container_name = f"{base_name}-{pid}"
53
58
 
54
59
  # Build the docker run command
55
60
  docker_cmd = [
@@ -73,14 +78,20 @@ def create_proxy_server(
73
78
  if interactive:
74
79
  no_reload = True
75
80
 
76
- if not no_reload:
77
- # Inject our supervisor into the CMD
81
+ # Validate reload options
82
+ if no_reload and full_reload:
83
+ design.warning("Cannot use --full-reload with --no-reload, ignoring --full-reload")
84
+ full_reload = False
85
+
86
+ if not no_reload and not full_reload:
87
+ # Standard hot-reload: inject supervisor for server restart within container
78
88
  modified_cmd = inject_supervisor(original_cmd)
79
89
  docker_cmd.extend(["--entrypoint", modified_cmd[0]])
80
90
  docker_cmd.append(image_name)
81
91
  docker_cmd.extend(modified_cmd[1:])
82
92
  else:
83
- # No reload - use original CMD
93
+ # No reload or full reload: use original CMD without supervisor
94
+ # Note: Full reload logic (container restart) would be implemented here in the future
84
95
  docker_cmd.append(image_name)
85
96
 
86
97
  # Create configuration following MCPConfig schema
@@ -96,9 +107,14 @@ def create_proxy_server(
96
107
 
97
108
  # Debug output - only if verbose
98
109
  if verbose:
99
- if not no_reload:
110
+ if not no_reload and not full_reload:
111
+ design.info("Mode: Hot-reload (server restart within container)")
100
112
  design.info("Watching: /app/src for changes")
113
+ elif full_reload:
114
+ design.info("Mode: Full reload (container restart on file changes)")
115
+ design.info("Note: Full container restart not yet implemented, using no-reload mode")
101
116
  else:
117
+ design.info("Mode: No reload")
102
118
  design.info("Container will run without hot-reload")
103
119
  design.command_example(f"docker logs -f {container_name}", "View container logs")
104
120
 
@@ -127,6 +143,7 @@ async def start_mcp_proxy(
127
143
  transport: str,
128
144
  port: int,
129
145
  no_reload: bool = False,
146
+ full_reload: bool = False,
130
147
  verbose: bool = False,
131
148
  inspector: bool = False,
132
149
  no_logs: bool = False,
@@ -212,8 +229,12 @@ async def start_mcp_proxy(
212
229
  design.error(f"Source directory not found: {src_path}")
213
230
  raise click.Abort
214
231
 
215
- # Extract container name from the proxy configuration
216
- container_name = f"{image_name.replace(':', '-').replace('/', '-')}"
232
+ # Extract container name from the proxy configuration (must match create_proxy_server naming)
233
+ import os
234
+
235
+ pid = str(os.getpid())[-6:] # Last 6 digits of process ID for uniqueness
236
+ base_name = image_name.replace(":", "-").replace("/", "-")
237
+ container_name = f"{base_name}-{pid}"
217
238
 
218
239
  # Remove any existing container with the same name (silently)
219
240
  # Note: The proxy creates containers on-demand when clients connect
@@ -347,6 +368,7 @@ async def start_mcp_proxy(
347
368
  # Always show waiting message
348
369
  log_design.info("") # Empty line for spacing
349
370
  log_design.progress_message("⏳ Waiting for first client connection to start container...")
371
+ log_design.info(f"📋 Looking for container: {container_name}") # noqa: G004
350
372
 
351
373
  # Keep trying to stream logs - container is created on demand
352
374
  has_shown_started = False
@@ -397,7 +419,8 @@ async def start_mcp_proxy(
397
419
 
398
420
  # Show all logs with gold formatting like hud debug
399
421
  # Format all logs in gold/dim style like hud debug's stderr
400
- log_design.console.print(
422
+ # Use stdout console to avoid stderr redirection when not verbose
423
+ log_design._stdout_console.print(
401
424
  f"[rgb(192,150,12)]■[/rgb(192,150,12)] {decoded_line}", highlight=False
402
425
  )
403
426
 
@@ -408,16 +431,19 @@ async def start_mcp_proxy(
408
431
  await asyncio.sleep(1)
409
432
  continue # Loop back to check if container exists
410
433
 
411
- except Exception:
412
- # Some unexpected error
434
+ except Exception as e:
435
+ # Some unexpected error - show it so we can debug
436
+ log_design.warning(f"Failed to stream Docker logs: {e}") # noqa: G004
413
437
  if verbose:
414
- log_design.warning("Failed to stream logs")
438
+ import traceback
439
+
440
+ log_design.warning(f"Traceback: {traceback.format_exc()}") # noqa: G004
415
441
  await asyncio.sleep(1)
416
442
 
417
443
  # CRITICAL: Create proxy AFTER all logging setup to prevent it from resetting logging config
418
444
  # This is important because FastMCP might initialize loggers during creation
419
445
  proxy = create_proxy_server(
420
- directory, image_name, no_reload, verbose, docker_args or [], interactive
446
+ directory, image_name, no_reload, full_reload, verbose, docker_args or [], interactive
421
447
  )
422
448
 
423
449
  # One more attempt to suppress the FastMCP server log
@@ -548,6 +574,7 @@ def run_mcp_dev_server(
548
574
  transport: str = "http",
549
575
  port: int = 8765,
550
576
  no_reload: bool = False,
577
+ full_reload: bool = False,
551
578
  verbose: bool = False,
552
579
  inspector: bool = False,
553
580
  no_logs: bool = False,
@@ -706,6 +733,7 @@ def run_mcp_dev_server(
706
733
  transport,
707
734
  port,
708
735
  no_reload,
736
+ full_reload,
709
737
  verbose,
710
738
  inspector,
711
739
  no_logs,
@@ -26,15 +26,6 @@ def build_agent(
26
26
  """Create and return the requested agent type."""
27
27
 
28
28
  # Import agents lazily to avoid dependency issues
29
- try:
30
- from hud.agents.misc.response_agent import ResponseAgent
31
- except ImportError as e:
32
- design.error(
33
- "Agent dependencies are not installed. "
34
- "Please install with: pip install 'hud-python[agent]'"
35
- )
36
- raise typer.Exit(1) from e
37
-
38
29
  if agent_type == "openai":
39
30
  try:
40
31
  from hud.agents import OperatorAgent
@@ -45,12 +36,12 @@ def build_agent(
45
36
  )
46
37
  raise typer.Exit(1) from e
47
38
 
48
- allowed_tools = allowed_tools or ["openai_computer"]
49
-
50
- return OperatorAgent(
51
- allowed_tools=allowed_tools,
52
- response_agent=ResponseAgent(),
53
- )
39
+ if allowed_tools:
40
+ return OperatorAgent(
41
+ allowed_tools=allowed_tools,
42
+ )
43
+ else:
44
+ return OperatorAgent()
54
45
 
55
46
  # Fallback Claude agent (Anthropic)
56
47
  try:
@@ -63,13 +54,16 @@ def build_agent(
63
54
  raise typer.Exit(1) from e
64
55
 
65
56
  model = model or "claude-sonnet-4-20250514"
66
- allowed_tools = allowed_tools or ["anthropic_computer"]
67
57
 
68
- return ClaudeAgent(
69
- model=model,
70
- allowed_tools=allowed_tools,
71
- response_agent=ResponseAgent(),
72
- )
58
+ if allowed_tools:
59
+ return ClaudeAgent(
60
+ model=model,
61
+ allowed_tools=allowed_tools,
62
+ )
63
+ else:
64
+ return ClaudeAgent(
65
+ model=model,
66
+ )
73
67
 
74
68
 
75
69
  async def run_single_task(
@@ -100,8 +94,8 @@ async def run_single_task(
100
94
  with open(path) as f: # noqa: ASYNC230
101
95
  json_data = json.load(f)
102
96
 
103
- # Check if JSON contains a list of tasks
104
- if isinstance(json_data, list):
97
+ # Check if JSON contains multiple tasks (list with more than 1 task)
98
+ if isinstance(json_data, list) and len(json_data) > 1:
105
99
  design.info(f"Found {len(json_data)} tasks in JSON file, running as dataset…")
106
100
 
107
101
  # Build agent class and config for run_dataset
@@ -118,8 +112,10 @@ async def run_single_task(
118
112
  raise typer.Exit(1) from e
119
113
 
120
114
  agent_config: dict[str, Any] = {
121
- "allowed_tools": allowed_tools or ["openai_computer"],
122
115
  }
116
+ if allowed_tools:
117
+ agent_config["allowed_tools"] = allowed_tools
118
+
123
119
  else:
124
120
  try:
125
121
  from hud.agents import ClaudeAgent
@@ -134,8 +130,9 @@ async def run_single_task(
134
130
 
135
131
  agent_config = {
136
132
  "model": model or "claude-sonnet-4-20250514",
137
- "allowed_tools": allowed_tools or ["anthropic_computer"],
138
133
  }
134
+ if allowed_tools:
135
+ agent_config["allowed_tools"] = allowed_tools
139
136
 
140
137
  # Run as dataset with single-task concurrency to maintain debug behavior
141
138
  results = await run_dataset(
@@ -146,7 +143,6 @@ async def run_single_task(
146
143
  max_concurrent=1, # Run sequentially for debug mode
147
144
  metadata={"source": str(path)},
148
145
  max_steps=max_steps,
149
- auto_respond=True,
150
146
  )
151
147
 
152
148
  # Display summary
@@ -154,8 +150,15 @@ async def run_single_task(
154
150
  design.success(f"Completed {len(results)} tasks: {successful} successful")
155
151
  return
156
152
 
157
- # Single task JSON
158
- task = Task(**json_data)
153
+ # Single task JSON (either direct object or list with 1 task)
154
+ if isinstance(json_data, list) and len(json_data) == 1:
155
+ design.info("Found 1 task in JSON file, running as single task…")
156
+ task = Task(**json_data[0])
157
+ elif isinstance(json_data, dict):
158
+ task = Task(**json_data)
159
+ else:
160
+ design.error("JSON file must contain a list of tasks when using --full flag")
161
+ raise typer.Exit(1)
159
162
  else:
160
163
  # Load from HuggingFace dataset
161
164
  try:
@@ -238,8 +241,10 @@ async def run_full_dataset(
238
241
  raise typer.Exit(1) from e
239
242
 
240
243
  agent_config: dict[str, Any] = {
241
- "allowed_tools": allowed_tools or ["openai_computer"],
242
244
  }
245
+ if allowed_tools:
246
+ agent_config["allowed_tools"] = allowed_tools
247
+
243
248
  else:
244
249
  try:
245
250
  from hud.agents import ClaudeAgent
@@ -254,8 +259,9 @@ async def run_full_dataset(
254
259
 
255
260
  agent_config = {
256
261
  "model": model or "claude-sonnet-4-20250514",
257
- "allowed_tools": allowed_tools or ["anthropic_computer"],
258
262
  }
263
+ if allowed_tools:
264
+ agent_config["allowed_tools"] = allowed_tools
259
265
 
260
266
  design.info("🚀 Running evaluation…")
261
267
  return await run_dataset(
@@ -266,7 +272,6 @@ async def run_full_dataset(
266
272
  max_concurrent=max_concurrent,
267
273
  metadata={"dataset": source},
268
274
  max_steps=max_steps,
269
- auto_respond=True,
270
275
  )
271
276
 
272
277