hud-python 0.4.59__tar.gz → 0.4.60__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (311) hide show
  1. {hud_python-0.4.59 → hud_python-0.4.60}/PKG-INFO +1 -1
  2. {hud_python-0.4.59 → hud_python-0.4.60}/environments/README.md +1 -1
  3. {hud_python-0.4.59 → hud_python-0.4.60}/environments/browser/server/pyproject.toml +1 -1
  4. {hud_python-0.4.59 → hud_python-0.4.60}/hud/agents/gemini.py +2 -1
  5. {hud_python-0.4.59 → hud_python-0.4.60}/hud/cli/eval.py +21 -16
  6. {hud_python-0.4.59 → hud_python-0.4.60}/hud/datasets/parallel.py +1 -1
  7. {hud_python-0.4.59 → hud_python-0.4.60}/hud/datasets/runner.py +4 -53
  8. hud_python-0.4.60/hud/datasets/tests/test_runner.py +67 -0
  9. {hud_python-0.4.59 → hud_python-0.4.60}/hud/otel/context.py +16 -59
  10. {hud_python-0.4.59 → hud_python-0.4.60}/hud/rl/actor.py +1 -1
  11. {hud_python-0.4.59 → hud_python-0.4.60}/hud/telemetry/__init__.py +14 -17
  12. {hud_python-0.4.59 → hud_python-0.4.60}/hud/telemetry/async_context.py +77 -85
  13. {hud_python-0.4.59 → hud_python-0.4.60}/hud/telemetry/job.py +8 -44
  14. hud_python-0.4.60/hud/telemetry/tests/test_async_context.py +515 -0
  15. {hud_python-0.4.59 → hud_python-0.4.60}/hud/telemetry/tests/test_job.py +0 -46
  16. {hud_python-0.4.59 → hud_python-0.4.60}/hud/telemetry/trace.py +5 -7
  17. hud_python-0.4.60/hud/telemetry/utils.py +42 -0
  18. {hud_python-0.4.59 → hud_python-0.4.60}/hud/utils/group_eval.py +19 -11
  19. {hud_python-0.4.59 → hud_python-0.4.60}/hud/utils/tests/test_version.py +1 -1
  20. {hud_python-0.4.59 → hud_python-0.4.60}/hud/version.py +1 -1
  21. {hud_python-0.4.59 → hud_python-0.4.60}/pyproject.toml +1 -1
  22. hud_python-0.4.59/hud/datasets/tests/test_runner.py +0 -106
  23. hud_python-0.4.59/hud/telemetry/tests/test_async_context.py +0 -242
  24. {hud_python-0.4.59 → hud_python-0.4.60}/.gitignore +0 -0
  25. {hud_python-0.4.59 → hud_python-0.4.60}/LICENSE +0 -0
  26. {hud_python-0.4.59 → hud_python-0.4.60}/README.md +0 -0
  27. {hud_python-0.4.59 → hud_python-0.4.60}/environments/blank/README.md +0 -0
  28. {hud_python-0.4.59 → hud_python-0.4.60}/environments/blank/environment/README.md +0 -0
  29. {hud_python-0.4.59 → hud_python-0.4.60}/environments/blank/environment/pyproject.toml +0 -0
  30. {hud_python-0.4.59 → hud_python-0.4.60}/environments/blank/server/README.md +0 -0
  31. {hud_python-0.4.59 → hud_python-0.4.60}/environments/blank/server/pyproject.toml +0 -0
  32. {hud_python-0.4.59 → hud_python-0.4.60}/environments/browser/README.md +0 -0
  33. {hud_python-0.4.59 → hud_python-0.4.60}/environments/browser/browser-base/README.md +0 -0
  34. {hud_python-0.4.59 → hud_python-0.4.60}/environments/browser/environment/2048/README.md +0 -0
  35. {hud_python-0.4.59 → hud_python-0.4.60}/environments/browser/environment/2048/backend/pyproject.toml +0 -0
  36. {hud_python-0.4.59 → hud_python-0.4.60}/environments/browser/environment/README.md +0 -0
  37. {hud_python-0.4.59 → hud_python-0.4.60}/environments/browser/environment/pyproject.toml +0 -0
  38. {hud_python-0.4.59 → hud_python-0.4.60}/environments/browser/environment/todo/README.md +0 -0
  39. {hud_python-0.4.59 → hud_python-0.4.60}/environments/browser/environment/todo/backend/pyproject.toml +0 -0
  40. {hud_python-0.4.59 → hud_python-0.4.60}/environments/browser/pyproject.toml +0 -0
  41. {hud_python-0.4.59 → hud_python-0.4.60}/environments/deepresearch/README.md +0 -0
  42. {hud_python-0.4.59 → hud_python-0.4.60}/environments/deepresearch/environment/pyproject.toml +0 -0
  43. {hud_python-0.4.59 → hud_python-0.4.60}/environments/deepresearch/pyproject.toml +0 -0
  44. {hud_python-0.4.59 → hud_python-0.4.60}/environments/deepresearch/server/pyproject.toml +0 -0
  45. {hud_python-0.4.59 → hud_python-0.4.60}/environments/remote_browser/README.md +0 -0
  46. {hud_python-0.4.59 → hud_python-0.4.60}/environments/remote_browser/pyproject.toml +0 -0
  47. {hud_python-0.4.59 → hud_python-0.4.60}/environments/remote_browser/src/hud_controller/providers/README.md +0 -0
  48. {hud_python-0.4.59 → hud_python-0.4.60}/environments/rubrics/README.md +0 -0
  49. {hud_python-0.4.59 → hud_python-0.4.60}/environments/rubrics/environment/pyproject.toml +0 -0
  50. {hud_python-0.4.59 → hud_python-0.4.60}/environments/rubrics/pyproject.toml +0 -0
  51. {hud_python-0.4.59 → hud_python-0.4.60}/environments/rubrics/server/pyproject.toml +0 -0
  52. {hud_python-0.4.59 → hud_python-0.4.60}/environments/text_2048/README.md +0 -0
  53. {hud_python-0.4.59 → hud_python-0.4.60}/environments/text_2048/pyproject.toml +0 -0
  54. {hud_python-0.4.59 → hud_python-0.4.60}/examples/README.md +0 -0
  55. {hud_python-0.4.59 → hud_python-0.4.60}/hud/__init__.py +0 -0
  56. {hud_python-0.4.59 → hud_python-0.4.60}/hud/__main__.py +0 -0
  57. {hud_python-0.4.59 → hud_python-0.4.60}/hud/agents/__init__.py +0 -0
  58. {hud_python-0.4.59 → hud_python-0.4.60}/hud/agents/base.py +0 -0
  59. {hud_python-0.4.59 → hud_python-0.4.60}/hud/agents/claude.py +0 -0
  60. {hud_python-0.4.59 → hud_python-0.4.60}/hud/agents/grounded_openai.py +0 -0
  61. {hud_python-0.4.59 → hud_python-0.4.60}/hud/agents/langchain.py +0 -0
  62. {hud_python-0.4.59 → hud_python-0.4.60}/hud/agents/lite_llm.py +0 -0
  63. {hud_python-0.4.59 → hud_python-0.4.60}/hud/agents/misc/__init__.py +0 -0
  64. {hud_python-0.4.59 → hud_python-0.4.60}/hud/agents/misc/integration_test_agent.py +0 -0
  65. {hud_python-0.4.59 → hud_python-0.4.60}/hud/agents/misc/response_agent.py +0 -0
  66. {hud_python-0.4.59 → hud_python-0.4.60}/hud/agents/openai.py +0 -0
  67. {hud_python-0.4.59 → hud_python-0.4.60}/hud/agents/openai_chat_generic.py +0 -0
  68. {hud_python-0.4.59 → hud_python-0.4.60}/hud/agents/tests/__init__.py +0 -0
  69. {hud_python-0.4.59 → hud_python-0.4.60}/hud/agents/tests/test_base.py +0 -0
  70. {hud_python-0.4.59 → hud_python-0.4.60}/hud/agents/tests/test_base_runtime.py +0 -0
  71. {hud_python-0.4.59 → hud_python-0.4.60}/hud/agents/tests/test_claude.py +0 -0
  72. {hud_python-0.4.59 → hud_python-0.4.60}/hud/agents/tests/test_client.py +0 -0
  73. {hud_python-0.4.59 → hud_python-0.4.60}/hud/agents/tests/test_gemini.py +0 -0
  74. {hud_python-0.4.59 → hud_python-0.4.60}/hud/agents/tests/test_grounded_openai_agent.py +0 -0
  75. {hud_python-0.4.59 → hud_python-0.4.60}/hud/agents/tests/test_openai.py +0 -0
  76. {hud_python-0.4.59 → hud_python-0.4.60}/hud/agents/utils.py +0 -0
  77. {hud_python-0.4.59 → hud_python-0.4.60}/hud/cli/__init__.py +0 -0
  78. {hud_python-0.4.59 → hud_python-0.4.60}/hud/cli/__main__.py +0 -0
  79. {hud_python-0.4.59 → hud_python-0.4.60}/hud/cli/analyze.py +0 -0
  80. {hud_python-0.4.59 → hud_python-0.4.60}/hud/cli/build.py +0 -0
  81. {hud_python-0.4.59 → hud_python-0.4.60}/hud/cli/clone.py +0 -0
  82. {hud_python-0.4.59 → hud_python-0.4.60}/hud/cli/debug.py +0 -0
  83. {hud_python-0.4.59 → hud_python-0.4.60}/hud/cli/dev.py +0 -0
  84. {hud_python-0.4.59 → hud_python-0.4.60}/hud/cli/flows/__init__.py +0 -0
  85. {hud_python-0.4.59 → hud_python-0.4.60}/hud/cli/flows/dev.py +0 -0
  86. {hud_python-0.4.59 → hud_python-0.4.60}/hud/cli/flows/tasks.py +0 -0
  87. {hud_python-0.4.59 → hud_python-0.4.60}/hud/cli/get.py +0 -0
  88. {hud_python-0.4.59 → hud_python-0.4.60}/hud/cli/init.py +0 -0
  89. {hud_python-0.4.59 → hud_python-0.4.60}/hud/cli/list_func.py +0 -0
  90. {hud_python-0.4.59 → hud_python-0.4.60}/hud/cli/pull.py +0 -0
  91. {hud_python-0.4.59 → hud_python-0.4.60}/hud/cli/push.py +0 -0
  92. {hud_python-0.4.59 → hud_python-0.4.60}/hud/cli/remove.py +0 -0
  93. {hud_python-0.4.59 → hud_python-0.4.60}/hud/cli/rl/__init__.py +0 -0
  94. {hud_python-0.4.59 → hud_python-0.4.60}/hud/cli/rl/celebrate.py +0 -0
  95. {hud_python-0.4.59 → hud_python-0.4.60}/hud/cli/rl/config.py +0 -0
  96. {hud_python-0.4.59 → hud_python-0.4.60}/hud/cli/rl/display.py +0 -0
  97. {hud_python-0.4.59 → hud_python-0.4.60}/hud/cli/rl/gpu.py +0 -0
  98. {hud_python-0.4.59 → hud_python-0.4.60}/hud/cli/rl/gpu_utils.py +0 -0
  99. {hud_python-0.4.59 → hud_python-0.4.60}/hud/cli/rl/local_runner.py +0 -0
  100. {hud_python-0.4.59 → hud_python-0.4.60}/hud/cli/rl/presets.py +0 -0
  101. {hud_python-0.4.59 → hud_python-0.4.60}/hud/cli/rl/remote_runner.py +0 -0
  102. {hud_python-0.4.59 → hud_python-0.4.60}/hud/cli/rl/rl_api.py +0 -0
  103. {hud_python-0.4.59 → hud_python-0.4.60}/hud/cli/rl/viewer.py +0 -0
  104. {hud_python-0.4.59 → hud_python-0.4.60}/hud/cli/rl/vllm.py +0 -0
  105. {hud_python-0.4.59 → hud_python-0.4.60}/hud/cli/rl/wait_utils.py +0 -0
  106. {hud_python-0.4.59 → hud_python-0.4.60}/hud/cli/tests/__init__.py +0 -0
  107. {hud_python-0.4.59 → hud_python-0.4.60}/hud/cli/tests/test_analyze.py +0 -0
  108. {hud_python-0.4.59 → hud_python-0.4.60}/hud/cli/tests/test_analyze_metadata.py +0 -0
  109. {hud_python-0.4.59 → hud_python-0.4.60}/hud/cli/tests/test_analyze_module.py +0 -0
  110. {hud_python-0.4.59 → hud_python-0.4.60}/hud/cli/tests/test_build.py +0 -0
  111. {hud_python-0.4.59 → hud_python-0.4.60}/hud/cli/tests/test_build_failure.py +0 -0
  112. {hud_python-0.4.59 → hud_python-0.4.60}/hud/cli/tests/test_build_module.py +0 -0
  113. {hud_python-0.4.59 → hud_python-0.4.60}/hud/cli/tests/test_cli_init.py +0 -0
  114. {hud_python-0.4.59 → hud_python-0.4.60}/hud/cli/tests/test_cli_main.py +0 -0
  115. {hud_python-0.4.59 → hud_python-0.4.60}/hud/cli/tests/test_cli_more_wrappers.py +0 -0
  116. {hud_python-0.4.59 → hud_python-0.4.60}/hud/cli/tests/test_cli_root.py +0 -0
  117. {hud_python-0.4.59 → hud_python-0.4.60}/hud/cli/tests/test_clone.py +0 -0
  118. {hud_python-0.4.59 → hud_python-0.4.60}/hud/cli/tests/test_convert.py +0 -0
  119. {hud_python-0.4.59 → hud_python-0.4.60}/hud/cli/tests/test_cursor.py +0 -0
  120. {hud_python-0.4.59 → hud_python-0.4.60}/hud/cli/tests/test_debug.py +0 -0
  121. {hud_python-0.4.59 → hud_python-0.4.60}/hud/cli/tests/test_eval.py +0 -0
  122. {hud_python-0.4.59 → hud_python-0.4.60}/hud/cli/tests/test_list_func.py +0 -0
  123. {hud_python-0.4.59 → hud_python-0.4.60}/hud/cli/tests/test_main_module.py +0 -0
  124. {hud_python-0.4.59 → hud_python-0.4.60}/hud/cli/tests/test_mcp_server.py +0 -0
  125. {hud_python-0.4.59 → hud_python-0.4.60}/hud/cli/tests/test_pull.py +0 -0
  126. {hud_python-0.4.59 → hud_python-0.4.60}/hud/cli/tests/test_push.py +0 -0
  127. {hud_python-0.4.59 → hud_python-0.4.60}/hud/cli/tests/test_push_happy.py +0 -0
  128. {hud_python-0.4.59 → hud_python-0.4.60}/hud/cli/tests/test_push_wrapper.py +0 -0
  129. {hud_python-0.4.59 → hud_python-0.4.60}/hud/cli/tests/test_registry.py +0 -0
  130. {hud_python-0.4.59 → hud_python-0.4.60}/hud/cli/tests/test_utils.py +0 -0
  131. {hud_python-0.4.59 → hud_python-0.4.60}/hud/cli/utils/__init__.py +0 -0
  132. {hud_python-0.4.59 → hud_python-0.4.60}/hud/cli/utils/config.py +0 -0
  133. {hud_python-0.4.59 → hud_python-0.4.60}/hud/cli/utils/cursor.py +0 -0
  134. {hud_python-0.4.59 → hud_python-0.4.60}/hud/cli/utils/docker.py +0 -0
  135. {hud_python-0.4.59 → hud_python-0.4.60}/hud/cli/utils/env_check.py +0 -0
  136. {hud_python-0.4.59 → hud_python-0.4.60}/hud/cli/utils/environment.py +0 -0
  137. {hud_python-0.4.59 → hud_python-0.4.60}/hud/cli/utils/interactive.py +0 -0
  138. {hud_python-0.4.59 → hud_python-0.4.60}/hud/cli/utils/local_runner.py +0 -0
  139. {hud_python-0.4.59 → hud_python-0.4.60}/hud/cli/utils/logging.py +0 -0
  140. {hud_python-0.4.59 → hud_python-0.4.60}/hud/cli/utils/metadata.py +0 -0
  141. {hud_python-0.4.59 → hud_python-0.4.60}/hud/cli/utils/package_runner.py +0 -0
  142. {hud_python-0.4.59 → hud_python-0.4.60}/hud/cli/utils/registry.py +0 -0
  143. {hud_python-0.4.59 → hud_python-0.4.60}/hud/cli/utils/remote_runner.py +0 -0
  144. {hud_python-0.4.59 → hud_python-0.4.60}/hud/cli/utils/runner.py +0 -0
  145. {hud_python-0.4.59 → hud_python-0.4.60}/hud/cli/utils/server.py +0 -0
  146. {hud_python-0.4.59 → hud_python-0.4.60}/hud/cli/utils/source_hash.py +0 -0
  147. {hud_python-0.4.59 → hud_python-0.4.60}/hud/cli/utils/tasks.py +0 -0
  148. {hud_python-0.4.59 → hud_python-0.4.60}/hud/cli/utils/tests/__init__.py +0 -0
  149. {hud_python-0.4.59 → hud_python-0.4.60}/hud/cli/utils/tests/test_config.py +0 -0
  150. {hud_python-0.4.59 → hud_python-0.4.60}/hud/cli/utils/tests/test_docker.py +0 -0
  151. {hud_python-0.4.59 → hud_python-0.4.60}/hud/cli/utils/tests/test_docker_hints.py +0 -0
  152. {hud_python-0.4.59 → hud_python-0.4.60}/hud/cli/utils/tests/test_env_check.py +0 -0
  153. {hud_python-0.4.59 → hud_python-0.4.60}/hud/cli/utils/tests/test_environment.py +0 -0
  154. {hud_python-0.4.59 → hud_python-0.4.60}/hud/cli/utils/tests/test_interactive_module.py +0 -0
  155. {hud_python-0.4.59 → hud_python-0.4.60}/hud/cli/utils/tests/test_local_runner.py +0 -0
  156. {hud_python-0.4.59 → hud_python-0.4.60}/hud/cli/utils/tests/test_logging_utils.py +0 -0
  157. {hud_python-0.4.59 → hud_python-0.4.60}/hud/cli/utils/tests/test_metadata.py +0 -0
  158. {hud_python-0.4.59 → hud_python-0.4.60}/hud/cli/utils/tests/test_package_runner.py +0 -0
  159. {hud_python-0.4.59 → hud_python-0.4.60}/hud/cli/utils/tests/test_registry_utils.py +0 -0
  160. {hud_python-0.4.59 → hud_python-0.4.60}/hud/cli/utils/tests/test_remote_runner.py +0 -0
  161. {hud_python-0.4.59 → hud_python-0.4.60}/hud/cli/utils/tests/test_runner_modules.py +0 -0
  162. {hud_python-0.4.59 → hud_python-0.4.60}/hud/cli/utils/tests/test_source_hash.py +0 -0
  163. {hud_python-0.4.59 → hud_python-0.4.60}/hud/cli/utils/tests/test_tasks.py +0 -0
  164. {hud_python-0.4.59 → hud_python-0.4.60}/hud/cli/utils/version_check.py +0 -0
  165. {hud_python-0.4.59 → hud_python-0.4.60}/hud/clients/README.md +0 -0
  166. {hud_python-0.4.59 → hud_python-0.4.60}/hud/clients/__init__.py +0 -0
  167. {hud_python-0.4.59 → hud_python-0.4.60}/hud/clients/base.py +0 -0
  168. {hud_python-0.4.59 → hud_python-0.4.60}/hud/clients/fastmcp.py +0 -0
  169. {hud_python-0.4.59 → hud_python-0.4.60}/hud/clients/mcp_use.py +0 -0
  170. {hud_python-0.4.59 → hud_python-0.4.60}/hud/clients/tests/__init__.py +0 -0
  171. {hud_python-0.4.59 → hud_python-0.4.60}/hud/clients/tests/test_client_integration.py +0 -0
  172. {hud_python-0.4.59 → hud_python-0.4.60}/hud/clients/tests/test_fastmcp.py +0 -0
  173. {hud_python-0.4.59 → hud_python-0.4.60}/hud/clients/tests/test_mcp_use_retry.py +0 -0
  174. {hud_python-0.4.59 → hud_python-0.4.60}/hud/clients/tests/test_protocol.py +0 -0
  175. {hud_python-0.4.59 → hud_python-0.4.60}/hud/clients/utils/__init__.py +0 -0
  176. {hud_python-0.4.59 → hud_python-0.4.60}/hud/clients/utils/mcp_use_retry.py +0 -0
  177. {hud_python-0.4.59 → hud_python-0.4.60}/hud/clients/utils/retry.py +0 -0
  178. {hud_python-0.4.59 → hud_python-0.4.60}/hud/clients/utils/retry_transport.py +0 -0
  179. {hud_python-0.4.59 → hud_python-0.4.60}/hud/datasets/__init__.py +0 -0
  180. {hud_python-0.4.59 → hud_python-0.4.60}/hud/datasets/tests/__init__.py +0 -0
  181. {hud_python-0.4.59 → hud_python-0.4.60}/hud/datasets/tests/test_utils.py +0 -0
  182. {hud_python-0.4.59 → hud_python-0.4.60}/hud/datasets/utils.py +0 -0
  183. {hud_python-0.4.59 → hud_python-0.4.60}/hud/misc/__init__.py +0 -0
  184. {hud_python-0.4.59 → hud_python-0.4.60}/hud/misc/claude_plays_pokemon.py +0 -0
  185. {hud_python-0.4.59 → hud_python-0.4.60}/hud/native/__init__.py +0 -0
  186. {hud_python-0.4.59 → hud_python-0.4.60}/hud/native/comparator.py +0 -0
  187. {hud_python-0.4.59 → hud_python-0.4.60}/hud/native/tests/__init__.py +0 -0
  188. {hud_python-0.4.59 → hud_python-0.4.60}/hud/native/tests/test_comparator.py +0 -0
  189. {hud_python-0.4.59 → hud_python-0.4.60}/hud/native/tests/test_native_init.py +0 -0
  190. {hud_python-0.4.59 → hud_python-0.4.60}/hud/otel/__init__.py +0 -0
  191. {hud_python-0.4.59 → hud_python-0.4.60}/hud/otel/collector.py +0 -0
  192. {hud_python-0.4.59 → hud_python-0.4.60}/hud/otel/config.py +0 -0
  193. {hud_python-0.4.59 → hud_python-0.4.60}/hud/otel/exporters.py +0 -0
  194. {hud_python-0.4.59 → hud_python-0.4.60}/hud/otel/instrumentation.py +0 -0
  195. {hud_python-0.4.59 → hud_python-0.4.60}/hud/otel/processors.py +0 -0
  196. {hud_python-0.4.59 → hud_python-0.4.60}/hud/otel/tests/__init__.py +0 -0
  197. {hud_python-0.4.59 → hud_python-0.4.60}/hud/otel/tests/test_instrumentation.py +0 -0
  198. {hud_python-0.4.59 → hud_python-0.4.60}/hud/otel/tests/test_processors.py +0 -0
  199. {hud_python-0.4.59 → hud_python-0.4.60}/hud/py.typed +0 -0
  200. {hud_python-0.4.59 → hud_python-0.4.60}/hud/rl/README.md +0 -0
  201. {hud_python-0.4.59 → hud_python-0.4.60}/hud/rl/__init__.py +0 -0
  202. {hud_python-0.4.59 → hud_python-0.4.60}/hud/rl/buffer.py +0 -0
  203. {hud_python-0.4.59 → hud_python-0.4.60}/hud/rl/chat_template.jinja +0 -0
  204. {hud_python-0.4.59 → hud_python-0.4.60}/hud/rl/config.py +0 -0
  205. {hud_python-0.4.59 → hud_python-0.4.60}/hud/rl/distributed.py +0 -0
  206. {hud_python-0.4.59 → hud_python-0.4.60}/hud/rl/learner.py +0 -0
  207. {hud_python-0.4.59 → hud_python-0.4.60}/hud/rl/tests/__init__.py +0 -0
  208. {hud_python-0.4.59 → hud_python-0.4.60}/hud/rl/tests/test_learner.py +0 -0
  209. {hud_python-0.4.59 → hud_python-0.4.60}/hud/rl/train.py +0 -0
  210. {hud_python-0.4.59 → hud_python-0.4.60}/hud/rl/types.py +0 -0
  211. {hud_python-0.4.59 → hud_python-0.4.60}/hud/rl/utils/start_vllm_server.sh +0 -0
  212. {hud_python-0.4.59 → hud_python-0.4.60}/hud/rl/utils.py +0 -0
  213. {hud_python-0.4.59 → hud_python-0.4.60}/hud/rl/vllm_adapter.py +0 -0
  214. {hud_python-0.4.59 → hud_python-0.4.60}/hud/samples/__init__.py +0 -0
  215. {hud_python-0.4.59 → hud_python-0.4.60}/hud/samples/browser.py +0 -0
  216. {hud_python-0.4.59 → hud_python-0.4.60}/hud/server/__init__.py +0 -0
  217. {hud_python-0.4.59 → hud_python-0.4.60}/hud/server/context.py +0 -0
  218. {hud_python-0.4.59 → hud_python-0.4.60}/hud/server/helper/__init__.py +0 -0
  219. {hud_python-0.4.59 → hud_python-0.4.60}/hud/server/low_level.py +0 -0
  220. {hud_python-0.4.59 → hud_python-0.4.60}/hud/server/router.py +0 -0
  221. {hud_python-0.4.59 → hud_python-0.4.60}/hud/server/server.py +0 -0
  222. {hud_python-0.4.59 → hud_python-0.4.60}/hud/server/tests/__init__.py +0 -0
  223. {hud_python-0.4.59 → hud_python-0.4.60}/hud/server/tests/test_add_tool.py +0 -0
  224. {hud_python-0.4.59 → hud_python-0.4.60}/hud/server/tests/test_context.py +0 -0
  225. {hud_python-0.4.59 → hud_python-0.4.60}/hud/server/tests/test_mcp_server_handlers.py +0 -0
  226. {hud_python-0.4.59 → hud_python-0.4.60}/hud/server/tests/test_mcp_server_integration.py +0 -0
  227. {hud_python-0.4.59 → hud_python-0.4.60}/hud/server/tests/test_mcp_server_more.py +0 -0
  228. {hud_python-0.4.59 → hud_python-0.4.60}/hud/server/tests/test_run_wrapper.py +0 -0
  229. {hud_python-0.4.59 → hud_python-0.4.60}/hud/server/tests/test_server_extra.py +0 -0
  230. {hud_python-0.4.59 → hud_python-0.4.60}/hud/server/tests/test_sigterm_runner.py +0 -0
  231. {hud_python-0.4.59 → hud_python-0.4.60}/hud/settings.py +0 -0
  232. {hud_python-0.4.59 → hud_python-0.4.60}/hud/shared/__init__.py +0 -0
  233. {hud_python-0.4.59 → hud_python-0.4.60}/hud/shared/exceptions.py +0 -0
  234. {hud_python-0.4.59 → hud_python-0.4.60}/hud/shared/hints.py +0 -0
  235. {hud_python-0.4.59 → hud_python-0.4.60}/hud/shared/requests.py +0 -0
  236. {hud_python-0.4.59 → hud_python-0.4.60}/hud/shared/tests/__init__.py +0 -0
  237. {hud_python-0.4.59 → hud_python-0.4.60}/hud/shared/tests/test_exceptions.py +0 -0
  238. {hud_python-0.4.59 → hud_python-0.4.60}/hud/shared/tests/test_hints.py +0 -0
  239. {hud_python-0.4.59 → hud_python-0.4.60}/hud/shared/tests/test_requests.py +0 -0
  240. {hud_python-0.4.59 → hud_python-0.4.60}/hud/telemetry/instrument.py +0 -0
  241. {hud_python-0.4.59 → hud_python-0.4.60}/hud/telemetry/replay.py +0 -0
  242. {hud_python-0.4.59 → hud_python-0.4.60}/hud/telemetry/tests/__init__.py +0 -0
  243. {hud_python-0.4.59 → hud_python-0.4.60}/hud/telemetry/tests/test_instrument.py +0 -0
  244. {hud_python-0.4.59 → hud_python-0.4.60}/hud/telemetry/tests/test_replay.py +0 -0
  245. {hud_python-0.4.59 → hud_python-0.4.60}/hud/telemetry/tests/test_trace.py +0 -0
  246. {hud_python-0.4.59 → hud_python-0.4.60}/hud/tools/__init__.py +0 -0
  247. {hud_python-0.4.59 → hud_python-0.4.60}/hud/tools/base.py +0 -0
  248. {hud_python-0.4.59 → hud_python-0.4.60}/hud/tools/bash.py +0 -0
  249. {hud_python-0.4.59 → hud_python-0.4.60}/hud/tools/computer/__init__.py +0 -0
  250. {hud_python-0.4.59 → hud_python-0.4.60}/hud/tools/computer/anthropic.py +0 -0
  251. {hud_python-0.4.59 → hud_python-0.4.60}/hud/tools/computer/gemini.py +0 -0
  252. {hud_python-0.4.59 → hud_python-0.4.60}/hud/tools/computer/hud.py +0 -0
  253. {hud_python-0.4.59 → hud_python-0.4.60}/hud/tools/computer/openai.py +0 -0
  254. {hud_python-0.4.59 → hud_python-0.4.60}/hud/tools/computer/qwen.py +0 -0
  255. {hud_python-0.4.59 → hud_python-0.4.60}/hud/tools/computer/settings.py +0 -0
  256. {hud_python-0.4.59 → hud_python-0.4.60}/hud/tools/edit.py +0 -0
  257. {hud_python-0.4.59 → hud_python-0.4.60}/hud/tools/executors/__init__.py +0 -0
  258. {hud_python-0.4.59 → hud_python-0.4.60}/hud/tools/executors/base.py +0 -0
  259. {hud_python-0.4.59 → hud_python-0.4.60}/hud/tools/executors/pyautogui.py +0 -0
  260. {hud_python-0.4.59 → hud_python-0.4.60}/hud/tools/executors/tests/__init__.py +0 -0
  261. {hud_python-0.4.59 → hud_python-0.4.60}/hud/tools/executors/tests/test_base_executor.py +0 -0
  262. {hud_python-0.4.59 → hud_python-0.4.60}/hud/tools/executors/tests/test_pyautogui_executor.py +0 -0
  263. {hud_python-0.4.59 → hud_python-0.4.60}/hud/tools/executors/xdo.py +0 -0
  264. {hud_python-0.4.59 → hud_python-0.4.60}/hud/tools/grounding/__init__.py +0 -0
  265. {hud_python-0.4.59 → hud_python-0.4.60}/hud/tools/grounding/config.py +0 -0
  266. {hud_python-0.4.59 → hud_python-0.4.60}/hud/tools/grounding/grounded_tool.py +0 -0
  267. {hud_python-0.4.59 → hud_python-0.4.60}/hud/tools/grounding/grounder.py +0 -0
  268. {hud_python-0.4.59 → hud_python-0.4.60}/hud/tools/grounding/tests/__init__.py +0 -0
  269. {hud_python-0.4.59 → hud_python-0.4.60}/hud/tools/grounding/tests/test_grounded_tool.py +0 -0
  270. {hud_python-0.4.59 → hud_python-0.4.60}/hud/tools/playwright.py +0 -0
  271. {hud_python-0.4.59 → hud_python-0.4.60}/hud/tools/response.py +0 -0
  272. {hud_python-0.4.59 → hud_python-0.4.60}/hud/tools/submit.py +0 -0
  273. {hud_python-0.4.59 → hud_python-0.4.60}/hud/tools/tests/__init__.py +0 -0
  274. {hud_python-0.4.59 → hud_python-0.4.60}/hud/tools/tests/test_base.py +0 -0
  275. {hud_python-0.4.59 → hud_python-0.4.60}/hud/tools/tests/test_bash.py +0 -0
  276. {hud_python-0.4.59 → hud_python-0.4.60}/hud/tools/tests/test_bash_extended.py +0 -0
  277. {hud_python-0.4.59 → hud_python-0.4.60}/hud/tools/tests/test_computer.py +0 -0
  278. {hud_python-0.4.59 → hud_python-0.4.60}/hud/tools/tests/test_computer_actions.py +0 -0
  279. {hud_python-0.4.59 → hud_python-0.4.60}/hud/tools/tests/test_edit.py +0 -0
  280. {hud_python-0.4.59 → hud_python-0.4.60}/hud/tools/tests/test_init.py +0 -0
  281. {hud_python-0.4.59 → hud_python-0.4.60}/hud/tools/tests/test_playwright_tool.py +0 -0
  282. {hud_python-0.4.59 → hud_python-0.4.60}/hud/tools/tests/test_response.py +0 -0
  283. {hud_python-0.4.59 → hud_python-0.4.60}/hud/tools/tests/test_submit.py +0 -0
  284. {hud_python-0.4.59 → hud_python-0.4.60}/hud/tools/tests/test_tools.py +0 -0
  285. {hud_python-0.4.59 → hud_python-0.4.60}/hud/tools/tests/test_tools_init.py +0 -0
  286. {hud_python-0.4.59 → hud_python-0.4.60}/hud/tools/tests/test_types.py +0 -0
  287. {hud_python-0.4.59 → hud_python-0.4.60}/hud/tools/tests/test_utils.py +0 -0
  288. {hud_python-0.4.59 → hud_python-0.4.60}/hud/tools/types.py +0 -0
  289. {hud_python-0.4.59 → hud_python-0.4.60}/hud/tools/utils.py +0 -0
  290. {hud_python-0.4.59 → hud_python-0.4.60}/hud/types.py +0 -0
  291. {hud_python-0.4.59 → hud_python-0.4.60}/hud/utils/__init__.py +0 -0
  292. {hud_python-0.4.59 → hud_python-0.4.60}/hud/utils/agent_factories.py +0 -0
  293. {hud_python-0.4.59 → hud_python-0.4.60}/hud/utils/async_utils.py +0 -0
  294. {hud_python-0.4.59 → hud_python-0.4.60}/hud/utils/hud_console.py +0 -0
  295. {hud_python-0.4.59 → hud_python-0.4.60}/hud/utils/mcp.py +0 -0
  296. {hud_python-0.4.59 → hud_python-0.4.60}/hud/utils/pretty_errors.py +0 -0
  297. {hud_python-0.4.59 → hud_python-0.4.60}/hud/utils/progress.py +0 -0
  298. {hud_python-0.4.59 → hud_python-0.4.60}/hud/utils/task_tracking.py +0 -0
  299. {hud_python-0.4.59 → hud_python-0.4.60}/hud/utils/tasks.py +0 -0
  300. {hud_python-0.4.59 → hud_python-0.4.60}/hud/utils/telemetry.py +0 -0
  301. {hud_python-0.4.59 → hud_python-0.4.60}/hud/utils/tests/__init__.py +0 -0
  302. {hud_python-0.4.59 → hud_python-0.4.60}/hud/utils/tests/test_agent_factories.py +0 -0
  303. {hud_python-0.4.59 → hud_python-0.4.60}/hud/utils/tests/test_async_utils.py +0 -0
  304. {hud_python-0.4.59 → hud_python-0.4.60}/hud/utils/tests/test_init.py +0 -0
  305. {hud_python-0.4.59 → hud_python-0.4.60}/hud/utils/tests/test_mcp.py +0 -0
  306. {hud_python-0.4.59 → hud_python-0.4.60}/hud/utils/tests/test_pretty_errors.py +0 -0
  307. {hud_python-0.4.59 → hud_python-0.4.60}/hud/utils/tests/test_progress.py +0 -0
  308. {hud_python-0.4.59 → hud_python-0.4.60}/hud/utils/tests/test_tasks.py +0 -0
  309. {hud_python-0.4.59 → hud_python-0.4.60}/hud/utils/tests/test_telemetry.py +0 -0
  310. {hud_python-0.4.59 → hud_python-0.4.60}/hud/utils/tests/test_tool_shorthand.py +0 -0
  311. {hud_python-0.4.59 → hud_python-0.4.60}/hud/utils/tool_shorthand.py +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: hud-python
3
- Version: 0.4.59
3
+ Version: 0.4.60
4
4
  Summary: SDK for the HUD platform.
5
5
  Project-URL: Homepage, https://github.com/hud-evals/hud-python
6
6
  Project-URL: Bug Tracker, https://github.com/hud-evals/hud-python/issues
@@ -496,7 +496,7 @@ from hud.clients import MCPClient
496
496
 
497
497
  async def main():
498
498
  # `trace` captures *everything* that happens and sends it to hud.ai
499
- with hud.trace("local_test"):
499
+ async with hud.async_trace("local_test"):
500
500
  task = Task(
501
501
  prompt="Complete the task",
502
502
  mcp_config={
@@ -4,7 +4,7 @@ version = "0.1.0"
4
4
  description = "HUD Browser MCP Server"
5
5
  requires-python = ">=3.11,<3.14"
6
6
  dependencies = [
7
- "hud-python>=0.4.59",
7
+ "hud-python>=0.4.60",
8
8
  "httpx",
9
9
  "playwright",
10
10
  "pyautogui",
@@ -461,7 +461,8 @@ class GeminiAgent(MCPAgent):
461
461
  def _remove_old_screenshots(self, messages: list[genai_types.Content]) -> None:
462
462
  """
463
463
  Remove screenshots from old turns to manage context length.
464
- Keeps only the last N turns with screenshots (configured via self.max_recent_turn_with_screenshots).
464
+ Keeps only the last N turns with screenshots (configured via
465
+ self.max_recent_turn_with_screenshots).
465
466
  """
466
467
  turn_with_screenshots_found = 0
467
468
 
@@ -260,9 +260,8 @@ async def run_single_task(
260
260
  ) -> None:
261
261
  """Load one task and execute it, or detect if JSON contains a list and run as dataset."""
262
262
 
263
- # Provide early feedback to user
264
263
  hud_console.info("🔧 Initializing evaluation...")
265
- # Import Task and run_dataset lazily
264
+
266
265
  try:
267
266
  from hud.utils.tasks import load_tasks
268
267
  except ImportError as e:
@@ -399,23 +398,31 @@ async def run_single_task(
399
398
 
400
399
  if group_size > 1:
401
400
  hud_console.info(f"🔄 Running task with group_size={group_size}")
402
- # Run with grouping
403
- stats = await run_tasks_grouped(
404
- tasks=[task],
405
- agent_class=agent_class,
406
- agent_config=agent_config,
407
- group_size=group_size,
408
- max_parallel_episodes=48, # Same as RL default
409
- max_steps=max_steps,
410
- verbose=verbose,
411
- )
401
+ async with hud.async_job(
402
+ name=f"Group Eval: {task_prompt[:50]}... (x{group_size})",
403
+ metadata={
404
+ "task_id": getattr(task, "id", None),
405
+ "group_size": group_size,
406
+ "total_episodes": group_size,
407
+ },
408
+ ) as job:
409
+ stats = await run_tasks_grouped(
410
+ tasks=[task],
411
+ agent_class=agent_class,
412
+ agent_config=agent_config,
413
+ group_size=group_size,
414
+ max_parallel_episodes=48,
415
+ max_steps=max_steps,
416
+ verbose=verbose,
417
+ job_id=job.id,
418
+ )
412
419
  display_group_statistics(stats, show_details=True)
413
420
  else:
414
421
  # Enable agent step logging for single task mode
415
422
  logging.getLogger("hud.agents").setLevel(logging.INFO)
416
423
  logging.getLogger("hud.agents.base").setLevel(logging.INFO)
417
424
 
418
- with hud.trace(name=task_prompt):
425
+ async with hud.async_trace(name=task_prompt):
419
426
  agent = build_agent(
420
427
  agent_type,
421
428
  model=model,
@@ -442,10 +449,8 @@ async def run_full_dataset(
442
449
  ) -> list[Any]:
443
450
  """Run evaluation across the entire dataset using asyncio-based concurrency."""
444
451
 
445
- # Provide early feedback to user
446
452
  hud_console.info("🔧 Initializing evaluation...")
447
453
 
448
- # Import run_dataset lazily
449
454
  try:
450
455
  from hud.datasets import run_dataset
451
456
  from hud.utils.tasks import load_tasks
@@ -627,7 +632,7 @@ async def run_full_dataset(
627
632
  hud_console.info(f"🔄 Running dataset with group_size={group_size}")
628
633
 
629
634
  # Run with job tracking
630
- with hud.job(
635
+ async with hud.async_job(
631
636
  name=f"Evaluation {dataset_name} (group_size={group_size})",
632
637
  metadata={
633
638
  "dataset": source,
@@ -371,7 +371,7 @@ async def run_dataset_parallel_manual(
371
371
  logger.warning("Failed to extract dataset verification info")
372
372
 
373
373
  # Create job context
374
- with hud.job(name, metadata=job_metadata, dataset_link=dataset_link) as job_obj:
374
+ async with hud.async_job(name, metadata=job_metadata, dataset_link=dataset_link) as job_obj:
375
375
  # Prepare agent class info for pickling
376
376
  agent_module = agent_class.__module__
377
377
  agent_name = agent_class.__name__
@@ -30,20 +30,14 @@ async def run_dataset(
30
30
  ) -> list[Any]:
31
31
  """Run all tasks in a dataset with automatic job and telemetry tracking.
32
32
 
33
- This function handles concurrent task execution with proper telemetry collection.
34
- All tasks are executed in parallel up to `max_concurrent`, with full telemetry
35
- automatically uploaded to the HUD platform.
36
-
37
33
  Args:
38
34
  name: Name for the job
39
35
  dataset: HuggingFace dataset identifier (e.g. "hud-evals/SheetBench-50"),
40
36
  Dataset object, OR list of Task objects
41
37
  agent_class: Agent class to instantiate (e.g., ClaudeAgent)
42
- agent_config: Configuration/kwargs for agent (model, etc.)
43
- max_concurrent: Maximum parallel task execution. Higher values improve throughput
44
- but may increase memory usage. Recommended: 30-200 depending on
45
- task complexity and available resources.
46
- metadata: Optional metadata for the job
38
+ agent_config: Configuration kwargs for agent initialization
39
+ max_concurrent: Maximum concurrent tasks (recommended: 50-200)
40
+ metadata: Optional job metadata
47
41
  max_steps: Maximum steps per task
48
42
  split: Dataset split to use when loading from string (default: "train")
49
43
  auto_respond: Whether to use auto-response agent
@@ -101,7 +95,6 @@ async def run_dataset(
101
95
  except Exception:
102
96
  logger.warning("Failed to extract dataset verification info")
103
97
 
104
- # Use async job context manager for high-concurrency telemetry
105
98
  async with hud.async_job(name, metadata=job_metadata, dataset_link=dataset_link) as job_obj:
106
99
  # Run tasks with semaphore for concurrency control
107
100
  sem = asyncio.Semaphore(max_concurrent)
@@ -112,12 +105,10 @@ async def run_dataset(
112
105
  try:
113
106
  # Create trace for this task
114
107
  task_name = task_dict.get("prompt") or f"Task {index}"
115
-
116
- # Ensure task_id is a string for baggage propagation
117
108
  raw_task_id = task_dict.get("id")
118
109
  safe_task_id = str(raw_task_id) if raw_task_id is not None else None
110
+
119
111
  async with hud.async_trace(task_name, job_id=job_obj.id, task_id=safe_task_id):
120
- # with hud.trace(task_name, job_id=job_obj.id, task_id=safe_task_id):
121
112
  # Convert dict to Task here, at trace level
122
113
  task = Task(**task_dict)
123
114
 
@@ -141,44 +132,4 @@ async def run_dataset(
141
132
  if isinstance(result, Exception):
142
133
  logger.error("Worker %s failed with exception: %s", i, result, exc_info=result)
143
134
 
144
- # Ensure all telemetry is uploaded before returning
145
- await _flush_telemetry()
146
-
147
135
  return results
148
-
149
-
150
- async def _flush_telemetry() -> None:
151
- """Flush all pending telemetry operations.
152
-
153
- Ensures complete telemetry upload by:
154
- 1. Waiting for all async status updates to complete
155
- 2. Forcing OpenTelemetry span processor to export remaining spans
156
-
157
- This prevents telemetry loss at high concurrency (200+ tasks) by ensuring
158
- all operations complete before process exit.
159
- """
160
- from hud.otel.config import is_telemetry_configured
161
- from hud.utils import hud_console
162
- from hud.utils.task_tracking import wait_all_tasks
163
-
164
- hud_console.info("Uploading telemetry...")
165
-
166
- # Step 1: Wait for async status updates (job/trace status)
167
- completed_tasks = await wait_all_tasks(timeout_seconds=20.0)
168
- if completed_tasks > 0:
169
- hud_console.info(f"Completed {completed_tasks} pending telemetry tasks")
170
-
171
- # Step 2: Flush OpenTelemetry span exports
172
- if is_telemetry_configured():
173
- try:
174
- from opentelemetry import trace
175
- from opentelemetry.sdk.trace import TracerProvider
176
-
177
- provider = trace.get_tracer_provider()
178
- if isinstance(provider, TracerProvider):
179
- provider.force_flush(timeout_millis=20000)
180
- logger.debug("OpenTelemetry spans flushed successfully")
181
- except Exception as e:
182
- logger.warning("Failed to flush OpenTelemetry: %s", e)
183
-
184
- hud_console.info("Telemetry uploaded successfully")
@@ -0,0 +1,67 @@
1
+ from __future__ import annotations
2
+
3
+ from unittest.mock import MagicMock, patch
4
+
5
+ import pytest
6
+
7
+ from hud.telemetry.utils import flush_telemetry
8
+
9
+
10
+ @pytest.mark.asyncio
11
+ async def test_flush_telemetry():
12
+ """Test flush_telemetry function."""
13
+ with (
14
+ patch("hud.otel.config.is_telemetry_configured", return_value=True),
15
+ patch("hud.utils.hud_console.hud_console"),
16
+ patch("opentelemetry.trace.get_tracer_provider") as mock_get_provider,
17
+ ):
18
+ from opentelemetry.sdk.trace import TracerProvider
19
+
20
+ mock_provider = MagicMock(spec=TracerProvider)
21
+ mock_provider.force_flush.return_value = True
22
+ mock_get_provider.return_value = mock_provider
23
+
24
+ await flush_telemetry()
25
+
26
+ mock_provider.force_flush.assert_called_once_with(timeout_millis=5000)
27
+
28
+
29
+ @pytest.mark.asyncio
30
+ async def test_flush_telemetry_not_configured():
31
+ """Test flush_telemetry when telemetry is not configured."""
32
+ with patch("hud.otel.config.is_telemetry_configured", return_value=False):
33
+ await flush_telemetry()
34
+
35
+
36
+ @pytest.mark.asyncio
37
+ async def test_flush_telemetry_exception():
38
+ """Test flush_telemetry handles exceptions gracefully."""
39
+ with (
40
+ patch("hud.otel.config.is_telemetry_configured", return_value=True),
41
+ patch("hud.utils.hud_console.hud_console"),
42
+ patch("opentelemetry.trace.get_tracer_provider") as mock_get_provider,
43
+ ):
44
+ from opentelemetry.sdk.trace import TracerProvider
45
+
46
+ mock_provider = MagicMock(spec=TracerProvider)
47
+ mock_provider.force_flush.side_effect = Exception("Flush failed")
48
+ mock_get_provider.return_value = mock_provider
49
+
50
+ await flush_telemetry()
51
+
52
+
53
+ @pytest.mark.asyncio
54
+ async def test_flush_telemetry_timeout():
55
+ """Test flush_telemetry when force_flush times out."""
56
+ with (
57
+ patch("hud.otel.config.is_telemetry_configured", return_value=True),
58
+ patch("hud.utils.hud_console.hud_console"),
59
+ patch("opentelemetry.trace.get_tracer_provider") as mock_get_provider,
60
+ ):
61
+ from opentelemetry.sdk.trace import TracerProvider
62
+
63
+ mock_provider = MagicMock(spec=TracerProvider)
64
+ mock_provider.force_flush.return_value = False
65
+ mock_get_provider.return_value = mock_provider
66
+
67
+ await flush_telemetry()
@@ -22,7 +22,6 @@ if TYPE_CHECKING:
22
22
 
23
23
  from hud.settings import settings
24
24
  from hud.shared import make_request, make_request_sync
25
- from hud.utils.async_utils import fire_and_forget
26
25
 
27
26
  logger = logging.getLogger(__name__)
28
27
 
@@ -301,32 +300,6 @@ async def _update_task_status_async(
301
300
  logger.warning("Failed to update task status: %s", e)
302
301
 
303
302
 
304
- def _fire_and_forget_status_update(
305
- task_run_id: str,
306
- status: str,
307
- job_id: str | None = None,
308
- error_message: str | None = None,
309
- trace_name: str | None = None,
310
- task_id: str | None = None,
311
- group_id: str | None = None,
312
- extra_metadata: dict[str, Any] | None = None,
313
- ) -> None:
314
- """Fire and forget status update - works in any context including Jupyter."""
315
- fire_and_forget(
316
- _update_task_status_async(
317
- task_run_id,
318
- status,
319
- job_id,
320
- error_message,
321
- trace_name,
322
- task_id,
323
- group_id,
324
- extra_metadata,
325
- ),
326
- f"update task {task_run_id} status to {status}",
327
- )
328
-
329
-
330
303
  def _update_task_status_sync(
331
304
  task_run_id: str,
332
305
  status: str,
@@ -468,7 +441,7 @@ def _print_trace_complete_url(task_run_id: str, error_occurred: bool = False) ->
468
441
  class trace:
469
442
  """Internal OpenTelemetry trace context manager.
470
443
 
471
- This is the implementation class. Users should use hud.trace() instead.
444
+ This is the sync implementation. For async code, use hud.async_trace() instead.
472
445
  """
473
446
 
474
447
  def __init__(
@@ -532,9 +505,9 @@ class trace:
532
505
  )
533
506
  self._span = self._span_manager.__enter__()
534
507
 
535
- # Update task status to running if root (only for HUD backend)
508
+ # Update task status to running (sync call - blocking is expected)
536
509
  if self.is_root and settings.telemetry_enabled and settings.api_key:
537
- _fire_and_forget_status_update(
510
+ _update_task_status_sync(
538
511
  self.task_run_id,
539
512
  "running",
540
513
  job_id=self.job_id,
@@ -542,7 +515,6 @@ class trace:
542
515
  task_id=self.task_id,
543
516
  group_id=self.group_id,
544
517
  )
545
- # Print the nice trace URL box (only if not part of a job)
546
518
  if not self.job_id:
547
519
  _print_trace_url(self.task_run_id)
548
520
 
@@ -556,35 +528,20 @@ class trace:
556
528
  exc_tb: TracebackType | None,
557
529
  ) -> None:
558
530
  """Exit the trace context."""
559
- # Update task status if root (only for HUD backend)
531
+ # Update task status (sync call - blocking is expected for sync context manager)
560
532
  if self.is_root and settings.telemetry_enabled and settings.api_key:
561
- if exc_type is not None:
562
- # Use fire-and-forget to avoid blocking the event loop
563
- _fire_and_forget_status_update(
564
- self.task_run_id,
565
- "error",
566
- job_id=self.job_id,
567
- error_message=str(exc_val),
568
- trace_name=self.span_name,
569
- task_id=self.task_id,
570
- group_id=self.group_id,
571
- )
572
- # Print error completion message (only if not part of a job)
573
- if not self.job_id:
574
- _print_trace_complete_url(self.task_run_id, error_occurred=True)
575
- else:
576
- # Use fire-and-forget to avoid blocking the event loop
577
- _fire_and_forget_status_update(
578
- self.task_run_id,
579
- "completed",
580
- job_id=self.job_id,
581
- trace_name=self.span_name,
582
- task_id=self.task_id,
583
- group_id=self.group_id,
584
- )
585
- # Print success completion message (only if not part of a job)
586
- if not self.job_id:
587
- _print_trace_complete_url(self.task_run_id, error_occurred=False)
533
+ status = "error" if exc_type else "completed"
534
+ _update_task_status_sync(
535
+ self.task_run_id,
536
+ status,
537
+ job_id=self.job_id,
538
+ error_message=str(exc_val) if exc_val else None,
539
+ trace_name=self.span_name,
540
+ task_id=self.task_id,
541
+ group_id=self.group_id,
542
+ )
543
+ if not self.job_id:
544
+ _print_trace_complete_url(self.task_run_id, error_occurred=bool(exc_type))
588
545
 
589
546
  # End the span
590
547
  if self._span and self._span_manager is not None:
@@ -109,7 +109,7 @@ class Actor:
109
109
 
110
110
  # Run the task
111
111
  try:
112
- with hud.trace(f"Training | {task.prompt}", job_id=job_id):
112
+ async with hud.async_trace(f"Training | {task.prompt}", job_id=job_id):
113
113
  result = await agent.run(task, max_steps=self.actor_config.max_steps_per_episode)
114
114
 
115
115
  except Exception:
@@ -2,30 +2,27 @@
2
2
 
3
3
  Provides telemetry APIs for tracking agent execution and experiments.
4
4
 
5
- Standard Usage:
5
+ Async Usage (Recommended):
6
6
  >>> import hud
7
- >>> with hud.trace("My Task"):
8
- ... do_work()
7
+ >>> async with hud.async_trace("Task"):
8
+ ... await agent.run(task)
9
+ >>> async with hud.async_job("Evaluation") as job:
10
+ ... async with hud.async_trace("Task", job_id=job.id):
11
+ ... await agent.run(task)
9
12
 
13
+ Sync Usage:
14
+ >>> import hud
15
+ >>> with hud.trace("Task"):
16
+ ... do_work()
10
17
  >>> with hud.job("My Job") as job:
11
18
  ... with hud.trace("Task", job_id=job.id):
12
19
  ... do_work()
13
20
 
14
- High-Concurrency Usage (200+ parallel tasks):
15
- >>> import hud
16
- >>> async with hud.async_job("Evaluation") as job:
17
- ... async with hud.async_trace("Task", job_id=job.id):
18
- ... await do_async_work()
19
-
20
21
  APIs:
21
- - trace(), job() - Standard context managers (for typical usage)
22
- - async_trace(), async_job() - Async context managers (for high concurrency)
23
- - instrument() - Decorator for instrumenting functions
24
- - get_trace() - Retrieve collected traces for replay
25
-
26
- Note:
27
- Use async_trace/async_job only for high-concurrency scenarios (200+ tasks).
28
- The run_dataset() function uses them automatically.
22
+ - async_trace(), async_job() - Async context managers (recommended)
23
+ - trace(), job() - Sync context managers
24
+ - flush_telemetry() - Manual span flushing (rarely needed)
25
+ - instrument() - Function instrumentation decorator
29
26
  """
30
27
 
31
28
  from __future__ import annotations