hud-python 0.4.45__py3-none-any.whl → 0.5.1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (274) hide show
  1. hud/__init__.py +27 -7
  2. hud/agents/__init__.py +11 -5
  3. hud/agents/base.py +220 -500
  4. hud/agents/claude.py +200 -240
  5. hud/agents/gemini.py +275 -0
  6. hud/agents/gemini_cua.py +335 -0
  7. hud/agents/grounded_openai.py +98 -100
  8. hud/agents/misc/integration_test_agent.py +51 -20
  9. hud/agents/misc/response_agent.py +41 -36
  10. hud/agents/openai.py +291 -292
  11. hud/agents/{openai_chat_generic.py → openai_chat.py} +80 -34
  12. hud/agents/operator.py +211 -0
  13. hud/agents/tests/conftest.py +133 -0
  14. hud/agents/tests/test_base.py +300 -622
  15. hud/agents/tests/test_base_runtime.py +233 -0
  16. hud/agents/tests/test_claude.py +379 -210
  17. hud/agents/tests/test_client.py +9 -10
  18. hud/agents/tests/test_gemini.py +369 -0
  19. hud/agents/tests/test_grounded_openai_agent.py +65 -50
  20. hud/agents/tests/test_openai.py +376 -140
  21. hud/agents/tests/test_operator.py +362 -0
  22. hud/agents/tests/test_run_eval.py +179 -0
  23. hud/cli/__init__.py +461 -545
  24. hud/cli/analyze.py +43 -5
  25. hud/cli/build.py +664 -110
  26. hud/cli/debug.py +8 -5
  27. hud/cli/dev.py +882 -734
  28. hud/cli/eval.py +782 -668
  29. hud/cli/flows/dev.py +167 -0
  30. hud/cli/flows/init.py +191 -0
  31. hud/cli/flows/tasks.py +153 -56
  32. hud/cli/flows/templates.py +151 -0
  33. hud/cli/flows/tests/__init__.py +1 -0
  34. hud/cli/flows/tests/test_dev.py +126 -0
  35. hud/cli/init.py +60 -58
  36. hud/cli/push.py +29 -11
  37. hud/cli/rft.py +311 -0
  38. hud/cli/rft_status.py +145 -0
  39. hud/cli/tests/test_analyze.py +5 -5
  40. hud/cli/tests/test_analyze_metadata.py +3 -2
  41. hud/cli/tests/test_analyze_module.py +120 -0
  42. hud/cli/tests/test_build.py +108 -6
  43. hud/cli/tests/test_build_failure.py +41 -0
  44. hud/cli/tests/test_build_module.py +50 -0
  45. hud/cli/tests/test_cli_init.py +6 -1
  46. hud/cli/tests/test_cli_more_wrappers.py +30 -0
  47. hud/cli/tests/test_cli_root.py +140 -0
  48. hud/cli/tests/test_convert.py +361 -0
  49. hud/cli/tests/test_debug.py +12 -10
  50. hud/cli/tests/test_dev.py +197 -0
  51. hud/cli/tests/test_eval.py +251 -0
  52. hud/cli/tests/test_eval_bedrock.py +51 -0
  53. hud/cli/tests/test_init.py +124 -0
  54. hud/cli/tests/test_main_module.py +11 -5
  55. hud/cli/tests/test_mcp_server.py +12 -100
  56. hud/cli/tests/test_push_happy.py +74 -0
  57. hud/cli/tests/test_push_wrapper.py +23 -0
  58. hud/cli/tests/test_registry.py +1 -1
  59. hud/cli/tests/test_utils.py +1 -1
  60. hud/cli/{rl → utils}/celebrate.py +14 -12
  61. hud/cli/utils/config.py +18 -1
  62. hud/cli/utils/docker.py +130 -4
  63. hud/cli/utils/env_check.py +9 -9
  64. hud/cli/utils/git.py +136 -0
  65. hud/cli/utils/interactive.py +39 -5
  66. hud/cli/utils/metadata.py +69 -0
  67. hud/cli/utils/runner.py +1 -1
  68. hud/cli/utils/server.py +2 -2
  69. hud/cli/utils/source_hash.py +3 -3
  70. hud/cli/utils/tasks.py +4 -1
  71. hud/cli/utils/tests/__init__.py +0 -0
  72. hud/cli/utils/tests/test_config.py +58 -0
  73. hud/cli/utils/tests/test_docker.py +93 -0
  74. hud/cli/utils/tests/test_docker_hints.py +71 -0
  75. hud/cli/utils/tests/test_env_check.py +74 -0
  76. hud/cli/utils/tests/test_environment.py +42 -0
  77. hud/cli/utils/tests/test_git.py +142 -0
  78. hud/cli/utils/tests/test_interactive_module.py +60 -0
  79. hud/cli/utils/tests/test_local_runner.py +50 -0
  80. hud/cli/utils/tests/test_logging_utils.py +23 -0
  81. hud/cli/utils/tests/test_metadata.py +49 -0
  82. hud/cli/utils/tests/test_package_runner.py +35 -0
  83. hud/cli/utils/tests/test_registry_utils.py +49 -0
  84. hud/cli/utils/tests/test_remote_runner.py +25 -0
  85. hud/cli/utils/tests/test_runner_modules.py +52 -0
  86. hud/cli/utils/tests/test_source_hash.py +36 -0
  87. hud/cli/utils/tests/test_tasks.py +80 -0
  88. hud/cli/utils/version_check.py +258 -0
  89. hud/cli/{rl → utils}/viewer.py +2 -2
  90. hud/clients/README.md +12 -11
  91. hud/clients/__init__.py +4 -3
  92. hud/clients/base.py +166 -26
  93. hud/clients/environment.py +51 -0
  94. hud/clients/fastmcp.py +13 -6
  95. hud/clients/mcp_use.py +40 -15
  96. hud/clients/tests/test_analyze_scenarios.py +206 -0
  97. hud/clients/tests/test_protocol.py +9 -3
  98. hud/datasets/__init__.py +23 -20
  99. hud/datasets/loader.py +327 -0
  100. hud/datasets/runner.py +192 -105
  101. hud/datasets/tests/__init__.py +0 -0
  102. hud/datasets/tests/test_loader.py +221 -0
  103. hud/datasets/tests/test_utils.py +315 -0
  104. hud/datasets/utils.py +270 -90
  105. hud/environment/__init__.py +50 -0
  106. hud/environment/connection.py +206 -0
  107. hud/environment/connectors/__init__.py +33 -0
  108. hud/environment/connectors/base.py +68 -0
  109. hud/environment/connectors/local.py +177 -0
  110. hud/environment/connectors/mcp_config.py +109 -0
  111. hud/environment/connectors/openai.py +101 -0
  112. hud/environment/connectors/remote.py +172 -0
  113. hud/environment/environment.py +694 -0
  114. hud/environment/integrations/__init__.py +45 -0
  115. hud/environment/integrations/adk.py +67 -0
  116. hud/environment/integrations/anthropic.py +196 -0
  117. hud/environment/integrations/gemini.py +92 -0
  118. hud/environment/integrations/langchain.py +82 -0
  119. hud/environment/integrations/llamaindex.py +68 -0
  120. hud/environment/integrations/openai.py +238 -0
  121. hud/environment/mock.py +306 -0
  122. hud/environment/router.py +112 -0
  123. hud/environment/scenarios.py +493 -0
  124. hud/environment/tests/__init__.py +1 -0
  125. hud/environment/tests/test_connection.py +317 -0
  126. hud/environment/tests/test_connectors.py +218 -0
  127. hud/environment/tests/test_environment.py +161 -0
  128. hud/environment/tests/test_integrations.py +257 -0
  129. hud/environment/tests/test_local_connectors.py +201 -0
  130. hud/environment/tests/test_scenarios.py +280 -0
  131. hud/environment/tests/test_tools.py +208 -0
  132. hud/environment/types.py +23 -0
  133. hud/environment/utils/__init__.py +35 -0
  134. hud/environment/utils/formats.py +215 -0
  135. hud/environment/utils/schema.py +171 -0
  136. hud/environment/utils/tool_wrappers.py +113 -0
  137. hud/eval/__init__.py +67 -0
  138. hud/eval/context.py +674 -0
  139. hud/eval/display.py +299 -0
  140. hud/eval/instrument.py +185 -0
  141. hud/eval/manager.py +466 -0
  142. hud/eval/parallel.py +268 -0
  143. hud/eval/task.py +340 -0
  144. hud/eval/tests/__init__.py +1 -0
  145. hud/eval/tests/test_context.py +178 -0
  146. hud/eval/tests/test_eval.py +210 -0
  147. hud/eval/tests/test_manager.py +152 -0
  148. hud/eval/tests/test_parallel.py +168 -0
  149. hud/eval/tests/test_task.py +145 -0
  150. hud/eval/types.py +63 -0
  151. hud/eval/utils.py +183 -0
  152. hud/patches/__init__.py +19 -0
  153. hud/patches/mcp_patches.py +151 -0
  154. hud/patches/warnings.py +54 -0
  155. hud/samples/browser.py +4 -4
  156. hud/server/__init__.py +2 -1
  157. hud/server/low_level.py +2 -1
  158. hud/server/router.py +164 -0
  159. hud/server/server.py +567 -80
  160. hud/server/tests/test_mcp_server_integration.py +11 -11
  161. hud/server/tests/test_mcp_server_more.py +1 -1
  162. hud/server/tests/test_server_extra.py +2 -0
  163. hud/settings.py +45 -3
  164. hud/shared/exceptions.py +36 -10
  165. hud/shared/hints.py +26 -1
  166. hud/shared/requests.py +15 -3
  167. hud/shared/tests/test_exceptions.py +40 -31
  168. hud/shared/tests/test_hints.py +167 -0
  169. hud/telemetry/__init__.py +20 -19
  170. hud/telemetry/exporter.py +201 -0
  171. hud/telemetry/instrument.py +158 -253
  172. hud/telemetry/tests/test_eval_telemetry.py +356 -0
  173. hud/telemetry/tests/test_exporter.py +258 -0
  174. hud/telemetry/tests/test_instrument.py +401 -0
  175. hud/tools/__init__.py +16 -2
  176. hud/tools/apply_patch.py +639 -0
  177. hud/tools/base.py +54 -4
  178. hud/tools/bash.py +2 -2
  179. hud/tools/computer/__init__.py +4 -0
  180. hud/tools/computer/anthropic.py +2 -2
  181. hud/tools/computer/gemini.py +385 -0
  182. hud/tools/computer/hud.py +23 -6
  183. hud/tools/computer/openai.py +20 -21
  184. hud/tools/computer/qwen.py +434 -0
  185. hud/tools/computer/settings.py +37 -0
  186. hud/tools/edit.py +3 -7
  187. hud/tools/executors/base.py +4 -2
  188. hud/tools/executors/pyautogui.py +1 -1
  189. hud/tools/grounding/grounded_tool.py +13 -18
  190. hud/tools/grounding/grounder.py +10 -31
  191. hud/tools/grounding/tests/test_grounded_tool.py +26 -44
  192. hud/tools/jupyter.py +330 -0
  193. hud/tools/playwright.py +18 -3
  194. hud/tools/shell.py +308 -0
  195. hud/tools/tests/test_apply_patch.py +718 -0
  196. hud/tools/tests/test_computer.py +4 -9
  197. hud/tools/tests/test_computer_actions.py +24 -2
  198. hud/tools/tests/test_jupyter_tool.py +181 -0
  199. hud/tools/tests/test_shell.py +596 -0
  200. hud/tools/tests/test_submit.py +85 -0
  201. hud/tools/tests/test_types.py +193 -0
  202. hud/tools/types.py +21 -1
  203. hud/types.py +167 -57
  204. hud/utils/__init__.py +2 -0
  205. hud/utils/env.py +67 -0
  206. hud/utils/hud_console.py +61 -3
  207. hud/utils/mcp.py +15 -58
  208. hud/utils/strict_schema.py +162 -0
  209. hud/utils/tests/test_init.py +1 -2
  210. hud/utils/tests/test_mcp.py +1 -28
  211. hud/utils/tests/test_pretty_errors.py +186 -0
  212. hud/utils/tests/test_tool_shorthand.py +154 -0
  213. hud/utils/tests/test_version.py +1 -1
  214. hud/utils/types.py +20 -0
  215. hud/version.py +1 -1
  216. hud_python-0.5.1.dist-info/METADATA +264 -0
  217. hud_python-0.5.1.dist-info/RECORD +299 -0
  218. {hud_python-0.4.45.dist-info → hud_python-0.5.1.dist-info}/WHEEL +1 -1
  219. hud/agents/langchain.py +0 -261
  220. hud/agents/lite_llm.py +0 -72
  221. hud/cli/rl/__init__.py +0 -180
  222. hud/cli/rl/config.py +0 -101
  223. hud/cli/rl/display.py +0 -133
  224. hud/cli/rl/gpu.py +0 -63
  225. hud/cli/rl/gpu_utils.py +0 -321
  226. hud/cli/rl/local_runner.py +0 -595
  227. hud/cli/rl/presets.py +0 -96
  228. hud/cli/rl/remote_runner.py +0 -463
  229. hud/cli/rl/rl_api.py +0 -150
  230. hud/cli/rl/vllm.py +0 -177
  231. hud/cli/rl/wait_utils.py +0 -89
  232. hud/datasets/parallel.py +0 -687
  233. hud/misc/__init__.py +0 -1
  234. hud/misc/claude_plays_pokemon.py +0 -292
  235. hud/otel/__init__.py +0 -35
  236. hud/otel/collector.py +0 -142
  237. hud/otel/config.py +0 -181
  238. hud/otel/context.py +0 -570
  239. hud/otel/exporters.py +0 -369
  240. hud/otel/instrumentation.py +0 -135
  241. hud/otel/processors.py +0 -121
  242. hud/otel/tests/__init__.py +0 -1
  243. hud/otel/tests/test_processors.py +0 -197
  244. hud/rl/README.md +0 -30
  245. hud/rl/__init__.py +0 -1
  246. hud/rl/actor.py +0 -176
  247. hud/rl/buffer.py +0 -405
  248. hud/rl/chat_template.jinja +0 -101
  249. hud/rl/config.py +0 -192
  250. hud/rl/distributed.py +0 -132
  251. hud/rl/learner.py +0 -637
  252. hud/rl/tests/__init__.py +0 -1
  253. hud/rl/tests/test_learner.py +0 -186
  254. hud/rl/train.py +0 -382
  255. hud/rl/types.py +0 -101
  256. hud/rl/utils/start_vllm_server.sh +0 -30
  257. hud/rl/utils.py +0 -524
  258. hud/rl/vllm_adapter.py +0 -143
  259. hud/telemetry/job.py +0 -352
  260. hud/telemetry/replay.py +0 -74
  261. hud/telemetry/tests/test_replay.py +0 -40
  262. hud/telemetry/tests/test_trace.py +0 -63
  263. hud/telemetry/trace.py +0 -158
  264. hud/utils/agent_factories.py +0 -86
  265. hud/utils/async_utils.py +0 -65
  266. hud/utils/group_eval.py +0 -223
  267. hud/utils/progress.py +0 -149
  268. hud/utils/tasks.py +0 -127
  269. hud/utils/tests/test_async_utils.py +0 -173
  270. hud/utils/tests/test_progress.py +0 -261
  271. hud_python-0.4.45.dist-info/METADATA +0 -552
  272. hud_python-0.4.45.dist-info/RECORD +0 -228
  273. {hud_python-0.4.45.dist-info → hud_python-0.5.1.dist-info}/entry_points.txt +0 -0
  274. {hud_python-0.4.45.dist-info → hud_python-0.5.1.dist-info}/licenses/LICENSE +0 -0
hud/telemetry/__init__.py CHANGED
@@ -1,26 +1,27 @@
1
- """HUD Telemetry - User-facing APIs for tracing and job management.
1
+ """HUD Telemetry - Lightweight telemetry for HUD SDK.
2
2
 
3
- This module provides the main telemetry APIs that users interact with:
4
- - trace: Context manager for tracing code execution
5
- - job: Context manager and utilities for job management
6
- - instrument: Decorator for instrumenting functions
7
- - get_trace: Retrieve collected traces for replay/analysis
8
- """
3
+ This module provides:
4
+ - @instrument decorator for recording function calls
5
+ - High-performance span export to HUD API
6
+
7
+ Usage:
8
+ import hud
9
9
 
10
- from __future__ import annotations
10
+ @hud.instrument
11
+ async def my_function():
12
+ ...
13
+
14
+ # Within an eval context, calls are recorded
15
+ async with hud.eval(task) as ctx:
16
+ result = await my_function()
17
+ """
11
18
 
12
- from .instrument import instrument
13
- from .job import Job, create_job, job
14
- from .replay import clear_trace, get_trace
15
- from .trace import Trace, trace
19
+ from hud.telemetry.exporter import flush, queue_span, shutdown
20
+ from hud.telemetry.instrument import instrument
16
21
 
17
22
  __all__ = [
18
- "Job",
19
- "Trace",
20
- "clear_trace",
21
- "create_job",
22
- "get_trace",
23
+ "flush",
23
24
  "instrument",
24
- "job",
25
- "trace",
25
+ "queue_span",
26
+ "shutdown",
26
27
  ]
@@ -0,0 +1,201 @@
1
+ """High-performance span exporter for HUD telemetry backend.
2
+
3
+ This module provides a lightweight span exporter that sends spans to the HUD
4
+ telemetry API immediately, using a thread pool to avoid blocking async code.
5
+
6
+ No OpenTelemetry dependency required.
7
+ """
8
+
9
+ from __future__ import annotations
10
+
11
+ import atexit
12
+ import concurrent.futures as cf
13
+ import contextlib
14
+ import logging
15
+ from collections import defaultdict
16
+ from concurrent.futures import ThreadPoolExecutor
17
+ from typing import Any
18
+
19
+ from hud.shared import make_request_sync
20
+
21
+ logger = logging.getLogger(__name__)
22
+
23
+ # Global singleton thread pool for span exports
24
+ _export_executor: ThreadPoolExecutor | None = None
25
+
26
+ # Pending futures for shutdown coordination
27
+ _pending_futures: list[cf.Future[bool]] = []
28
+
29
+ # Spans waiting to be flushed at context exit (per task_run_id)
30
+ _pending_spans: dict[str, list[dict[str, Any]]] = defaultdict(list)
31
+
32
+
33
+ def _get_export_executor() -> ThreadPoolExecutor:
34
+ """Get or create the global thread pool for span exports."""
35
+ global _export_executor
36
+ if _export_executor is None:
37
+ _export_executor = ThreadPoolExecutor(max_workers=4, thread_name_prefix="span-export")
38
+
39
+ def cleanup() -> None:
40
+ if _export_executor is not None:
41
+ _export_executor.shutdown(wait=True)
42
+
43
+ atexit.register(cleanup)
44
+ return _export_executor
45
+
46
+
47
+ def _do_upload(
48
+ task_run_id: str,
49
+ spans: list[dict[str, Any]],
50
+ telemetry_url: str,
51
+ api_key: str,
52
+ ) -> bool:
53
+ """Upload spans to HUD API (sync, runs in thread pool)."""
54
+ try:
55
+ url = f"{telemetry_url}/trace/{task_run_id}/telemetry-upload"
56
+ payload: dict[str, Any] = {"telemetry": spans}
57
+
58
+ logger.debug("Uploading %d spans to %s", len(spans), url)
59
+ make_request_sync(
60
+ method="POST",
61
+ url=url,
62
+ json=payload,
63
+ api_key=api_key,
64
+ )
65
+ return True
66
+ except Exception as e:
67
+ logger.debug("Failed to upload spans for task %s: %s", task_run_id, e)
68
+ return False
69
+
70
+
71
+ def _get_api_key() -> str | None:
72
+ """Get the API key - prefer context override, fallback to settings."""
73
+ from hud.eval.context import get_current_api_key
74
+ from hud.settings import settings
75
+
76
+ return get_current_api_key() or settings.api_key
77
+
78
+
79
+ def queue_span(span: dict[str, Any]) -> None:
80
+ """Queue a span and immediately upload it (non-blocking).
81
+
82
+ Uses thread pool to upload without blocking the event loop.
83
+ """
84
+ from hud.settings import settings
85
+
86
+ api_key = _get_api_key()
87
+ if not api_key or not settings.telemetry_enabled:
88
+ return
89
+
90
+ task_run_id = span.get("attributes", {}).get("task_run_id")
91
+ if not task_run_id:
92
+ return
93
+
94
+ # Store for potential re-flush at context exit
95
+ _pending_spans[task_run_id].append(span)
96
+
97
+ # Capture api_key for upload closure (context may change)
98
+ upload_api_key = api_key
99
+
100
+ # Upload immediately via thread pool
101
+ import asyncio
102
+
103
+ try:
104
+ loop = asyncio.get_running_loop()
105
+ # In async context - use thread pool
106
+ executor = _get_export_executor()
107
+
108
+ def _upload() -> bool:
109
+ return _do_upload(task_run_id, [span], settings.hud_telemetry_url, upload_api_key)
110
+
111
+ future = loop.run_in_executor(executor, _upload)
112
+ _pending_futures.append(future) # type: ignore[arg-type]
113
+
114
+ def _cleanup_done(f: cf.Future[bool]) -> None:
115
+ with contextlib.suppress(Exception):
116
+ _ = f.exception()
117
+ with contextlib.suppress(ValueError):
118
+ _pending_futures.remove(f)
119
+ # Remove from pending spans on success
120
+ if not f.exception():
121
+ with contextlib.suppress(Exception):
122
+ if task_run_id in _pending_spans and span in _pending_spans[task_run_id]:
123
+ _pending_spans[task_run_id].remove(span)
124
+
125
+ future.add_done_callback(_cleanup_done) # type: ignore[arg-type]
126
+
127
+ except RuntimeError:
128
+ # No event loop - upload synchronously
129
+ if _do_upload(task_run_id, [span], settings.hud_telemetry_url, upload_api_key):
130
+ with contextlib.suppress(Exception):
131
+ if task_run_id in _pending_spans and span in _pending_spans[task_run_id]:
132
+ _pending_spans[task_run_id].remove(span)
133
+
134
+
135
+ def flush(task_run_id: str | None = None) -> None:
136
+ """Flush any pending spans (called at context exit).
137
+
138
+ This ensures any spans that failed to upload are retried.
139
+
140
+ Args:
141
+ task_run_id: Optional task run ID to flush. If None, flushes all.
142
+ """
143
+ from hud.settings import settings
144
+
145
+ api_key = _get_api_key()
146
+ if not api_key or not settings.telemetry_enabled:
147
+ _pending_spans.clear()
148
+ return
149
+
150
+ if task_run_id:
151
+ # Flush specific task
152
+ spans = _pending_spans.pop(task_run_id, [])
153
+ if spans:
154
+ _do_upload(task_run_id, spans, settings.hud_telemetry_url, api_key)
155
+ else:
156
+ # Flush all
157
+ for tid, spans in list(_pending_spans.items()):
158
+ if spans:
159
+ _do_upload(tid, spans, settings.hud_telemetry_url, api_key)
160
+ _pending_spans.clear()
161
+
162
+
163
+ def shutdown(timeout: float = 10.0) -> bool:
164
+ """Shutdown and wait for pending exports.
165
+
166
+ Args:
167
+ timeout: Maximum time to wait in seconds
168
+
169
+ Returns:
170
+ True if all exports completed, False if timed out
171
+ """
172
+ # Wait for pending async exports
173
+ if _pending_futures:
174
+ try:
175
+ done, not_done = cf.wait(_pending_futures, timeout=timeout)
176
+ for f in done:
177
+ with contextlib.suppress(Exception):
178
+ _ = f.exception()
179
+ _pending_futures.clear()
180
+
181
+ # Flush any remaining spans synchronously
182
+ flush()
183
+
184
+ return len(not_done) == 0
185
+ except Exception:
186
+ return False
187
+
188
+ # Flush any remaining spans
189
+ flush()
190
+ return True
191
+
192
+
193
+ # Register shutdown handler
194
+ atexit.register(lambda: shutdown(timeout=5.0))
195
+
196
+
197
+ __all__ = [
198
+ "flush",
199
+ "queue_span",
200
+ "shutdown",
201
+ ]