hud-python 0.4.45__py3-none-any.whl → 0.5.13__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (282) hide show
  1. hud/__init__.py +27 -7
  2. hud/agents/__init__.py +70 -5
  3. hud/agents/base.py +238 -500
  4. hud/agents/claude.py +236 -247
  5. hud/agents/gateway.py +42 -0
  6. hud/agents/gemini.py +264 -0
  7. hud/agents/gemini_cua.py +324 -0
  8. hud/agents/grounded_openai.py +98 -100
  9. hud/agents/misc/integration_test_agent.py +51 -20
  10. hud/agents/misc/response_agent.py +48 -36
  11. hud/agents/openai.py +282 -296
  12. hud/agents/{openai_chat_generic.py → openai_chat.py} +63 -33
  13. hud/agents/operator.py +199 -0
  14. hud/agents/resolver.py +70 -0
  15. hud/agents/tests/conftest.py +133 -0
  16. hud/agents/tests/test_base.py +300 -622
  17. hud/agents/tests/test_base_runtime.py +233 -0
  18. hud/agents/tests/test_claude.py +381 -214
  19. hud/agents/tests/test_client.py +9 -10
  20. hud/agents/tests/test_gemini.py +369 -0
  21. hud/agents/tests/test_grounded_openai_agent.py +65 -50
  22. hud/agents/tests/test_openai.py +377 -140
  23. hud/agents/tests/test_operator.py +362 -0
  24. hud/agents/tests/test_resolver.py +192 -0
  25. hud/agents/tests/test_run_eval.py +179 -0
  26. hud/agents/types.py +148 -0
  27. hud/cli/__init__.py +493 -546
  28. hud/cli/analyze.py +43 -5
  29. hud/cli/build.py +699 -113
  30. hud/cli/debug.py +8 -5
  31. hud/cli/dev.py +889 -732
  32. hud/cli/eval.py +793 -667
  33. hud/cli/flows/dev.py +167 -0
  34. hud/cli/flows/init.py +191 -0
  35. hud/cli/flows/tasks.py +153 -56
  36. hud/cli/flows/templates.py +151 -0
  37. hud/cli/flows/tests/__init__.py +1 -0
  38. hud/cli/flows/tests/test_dev.py +126 -0
  39. hud/cli/init.py +60 -58
  40. hud/cli/pull.py +1 -1
  41. hud/cli/push.py +38 -13
  42. hud/cli/rft.py +311 -0
  43. hud/cli/rft_status.py +145 -0
  44. hud/cli/tests/test_analyze.py +5 -5
  45. hud/cli/tests/test_analyze_metadata.py +3 -2
  46. hud/cli/tests/test_analyze_module.py +120 -0
  47. hud/cli/tests/test_build.py +110 -8
  48. hud/cli/tests/test_build_failure.py +41 -0
  49. hud/cli/tests/test_build_module.py +50 -0
  50. hud/cli/tests/test_cli_init.py +6 -1
  51. hud/cli/tests/test_cli_more_wrappers.py +30 -0
  52. hud/cli/tests/test_cli_root.py +140 -0
  53. hud/cli/tests/test_convert.py +361 -0
  54. hud/cli/tests/test_debug.py +12 -10
  55. hud/cli/tests/test_dev.py +197 -0
  56. hud/cli/tests/test_eval.py +251 -0
  57. hud/cli/tests/test_eval_bedrock.py +51 -0
  58. hud/cli/tests/test_init.py +124 -0
  59. hud/cli/tests/test_main_module.py +11 -5
  60. hud/cli/tests/test_mcp_server.py +12 -100
  61. hud/cli/tests/test_push.py +1 -1
  62. hud/cli/tests/test_push_happy.py +74 -0
  63. hud/cli/tests/test_push_wrapper.py +23 -0
  64. hud/cli/tests/test_registry.py +1 -1
  65. hud/cli/tests/test_utils.py +1 -1
  66. hud/cli/{rl → utils}/celebrate.py +14 -12
  67. hud/cli/utils/config.py +18 -1
  68. hud/cli/utils/docker.py +130 -4
  69. hud/cli/utils/env_check.py +9 -9
  70. hud/cli/utils/git.py +136 -0
  71. hud/cli/utils/interactive.py +39 -5
  72. hud/cli/utils/metadata.py +70 -1
  73. hud/cli/utils/runner.py +1 -1
  74. hud/cli/utils/server.py +2 -2
  75. hud/cli/utils/source_hash.py +3 -3
  76. hud/cli/utils/tasks.py +4 -1
  77. hud/cli/utils/tests/__init__.py +0 -0
  78. hud/cli/utils/tests/test_config.py +58 -0
  79. hud/cli/utils/tests/test_docker.py +93 -0
  80. hud/cli/utils/tests/test_docker_hints.py +71 -0
  81. hud/cli/utils/tests/test_env_check.py +74 -0
  82. hud/cli/utils/tests/test_environment.py +42 -0
  83. hud/cli/utils/tests/test_git.py +142 -0
  84. hud/cli/utils/tests/test_interactive_module.py +60 -0
  85. hud/cli/utils/tests/test_local_runner.py +50 -0
  86. hud/cli/utils/tests/test_logging_utils.py +23 -0
  87. hud/cli/utils/tests/test_metadata.py +49 -0
  88. hud/cli/utils/tests/test_package_runner.py +35 -0
  89. hud/cli/utils/tests/test_registry_utils.py +49 -0
  90. hud/cli/utils/tests/test_remote_runner.py +25 -0
  91. hud/cli/utils/tests/test_runner_modules.py +52 -0
  92. hud/cli/utils/tests/test_source_hash.py +36 -0
  93. hud/cli/utils/tests/test_tasks.py +80 -0
  94. hud/cli/utils/version_check.py +258 -0
  95. hud/cli/{rl → utils}/viewer.py +2 -2
  96. hud/clients/README.md +12 -11
  97. hud/clients/__init__.py +4 -3
  98. hud/clients/base.py +166 -26
  99. hud/clients/environment.py +51 -0
  100. hud/clients/fastmcp.py +13 -6
  101. hud/clients/mcp_use.py +45 -15
  102. hud/clients/tests/test_analyze_scenarios.py +206 -0
  103. hud/clients/tests/test_protocol.py +9 -3
  104. hud/datasets/__init__.py +23 -20
  105. hud/datasets/loader.py +326 -0
  106. hud/datasets/runner.py +198 -105
  107. hud/datasets/tests/__init__.py +0 -0
  108. hud/datasets/tests/test_loader.py +221 -0
  109. hud/datasets/tests/test_utils.py +315 -0
  110. hud/datasets/utils.py +270 -90
  111. hud/environment/__init__.py +52 -0
  112. hud/environment/connection.py +258 -0
  113. hud/environment/connectors/__init__.py +33 -0
  114. hud/environment/connectors/base.py +68 -0
  115. hud/environment/connectors/local.py +177 -0
  116. hud/environment/connectors/mcp_config.py +137 -0
  117. hud/environment/connectors/openai.py +101 -0
  118. hud/environment/connectors/remote.py +172 -0
  119. hud/environment/environment.py +835 -0
  120. hud/environment/integrations/__init__.py +45 -0
  121. hud/environment/integrations/adk.py +67 -0
  122. hud/environment/integrations/anthropic.py +196 -0
  123. hud/environment/integrations/gemini.py +92 -0
  124. hud/environment/integrations/langchain.py +82 -0
  125. hud/environment/integrations/llamaindex.py +68 -0
  126. hud/environment/integrations/openai.py +238 -0
  127. hud/environment/mock.py +306 -0
  128. hud/environment/router.py +263 -0
  129. hud/environment/scenarios.py +620 -0
  130. hud/environment/tests/__init__.py +1 -0
  131. hud/environment/tests/test_connection.py +317 -0
  132. hud/environment/tests/test_connectors.py +205 -0
  133. hud/environment/tests/test_environment.py +593 -0
  134. hud/environment/tests/test_integrations.py +257 -0
  135. hud/environment/tests/test_local_connectors.py +242 -0
  136. hud/environment/tests/test_scenarios.py +1086 -0
  137. hud/environment/tests/test_tools.py +208 -0
  138. hud/environment/types.py +23 -0
  139. hud/environment/utils/__init__.py +35 -0
  140. hud/environment/utils/formats.py +215 -0
  141. hud/environment/utils/schema.py +171 -0
  142. hud/environment/utils/tool_wrappers.py +113 -0
  143. hud/eval/__init__.py +67 -0
  144. hud/eval/context.py +727 -0
  145. hud/eval/display.py +299 -0
  146. hud/eval/instrument.py +187 -0
  147. hud/eval/manager.py +533 -0
  148. hud/eval/parallel.py +268 -0
  149. hud/eval/task.py +372 -0
  150. hud/eval/tests/__init__.py +1 -0
  151. hud/eval/tests/test_context.py +178 -0
  152. hud/eval/tests/test_eval.py +210 -0
  153. hud/eval/tests/test_manager.py +152 -0
  154. hud/eval/tests/test_parallel.py +168 -0
  155. hud/eval/tests/test_task.py +291 -0
  156. hud/eval/types.py +65 -0
  157. hud/eval/utils.py +194 -0
  158. hud/patches/__init__.py +19 -0
  159. hud/patches/mcp_patches.py +308 -0
  160. hud/patches/warnings.py +54 -0
  161. hud/samples/browser.py +4 -4
  162. hud/server/__init__.py +2 -1
  163. hud/server/low_level.py +2 -1
  164. hud/server/router.py +164 -0
  165. hud/server/server.py +567 -80
  166. hud/server/tests/test_mcp_server_integration.py +11 -11
  167. hud/server/tests/test_mcp_server_more.py +1 -1
  168. hud/server/tests/test_server_extra.py +2 -0
  169. hud/settings.py +45 -3
  170. hud/shared/exceptions.py +36 -10
  171. hud/shared/hints.py +26 -1
  172. hud/shared/requests.py +15 -3
  173. hud/shared/tests/test_exceptions.py +40 -31
  174. hud/shared/tests/test_hints.py +167 -0
  175. hud/telemetry/__init__.py +20 -19
  176. hud/telemetry/exporter.py +201 -0
  177. hud/telemetry/instrument.py +165 -253
  178. hud/telemetry/tests/test_eval_telemetry.py +356 -0
  179. hud/telemetry/tests/test_exporter.py +258 -0
  180. hud/telemetry/tests/test_instrument.py +401 -0
  181. hud/tools/__init__.py +18 -2
  182. hud/tools/agent.py +223 -0
  183. hud/tools/apply_patch.py +639 -0
  184. hud/tools/base.py +54 -4
  185. hud/tools/bash.py +2 -2
  186. hud/tools/computer/__init__.py +36 -3
  187. hud/tools/computer/anthropic.py +2 -2
  188. hud/tools/computer/gemini.py +385 -0
  189. hud/tools/computer/hud.py +23 -6
  190. hud/tools/computer/openai.py +20 -21
  191. hud/tools/computer/qwen.py +434 -0
  192. hud/tools/computer/settings.py +37 -0
  193. hud/tools/edit.py +3 -7
  194. hud/tools/executors/base.py +4 -2
  195. hud/tools/executors/pyautogui.py +1 -1
  196. hud/tools/grounding/grounded_tool.py +13 -18
  197. hud/tools/grounding/grounder.py +10 -31
  198. hud/tools/grounding/tests/test_grounded_tool.py +26 -44
  199. hud/tools/jupyter.py +330 -0
  200. hud/tools/playwright.py +18 -3
  201. hud/tools/shell.py +308 -0
  202. hud/tools/tests/test_agent_tool.py +355 -0
  203. hud/tools/tests/test_apply_patch.py +718 -0
  204. hud/tools/tests/test_computer.py +4 -9
  205. hud/tools/tests/test_computer_actions.py +24 -2
  206. hud/tools/tests/test_jupyter_tool.py +181 -0
  207. hud/tools/tests/test_shell.py +596 -0
  208. hud/tools/tests/test_submit.py +85 -0
  209. hud/tools/tests/test_types.py +193 -0
  210. hud/tools/types.py +21 -1
  211. hud/types.py +194 -56
  212. hud/utils/__init__.py +2 -0
  213. hud/utils/env.py +67 -0
  214. hud/utils/hud_console.py +89 -18
  215. hud/utils/mcp.py +15 -58
  216. hud/utils/strict_schema.py +162 -0
  217. hud/utils/tests/test_init.py +1 -2
  218. hud/utils/tests/test_mcp.py +1 -28
  219. hud/utils/tests/test_pretty_errors.py +186 -0
  220. hud/utils/tests/test_tool_shorthand.py +154 -0
  221. hud/utils/tests/test_version.py +1 -1
  222. hud/utils/types.py +20 -0
  223. hud/version.py +1 -1
  224. hud_python-0.5.13.dist-info/METADATA +264 -0
  225. hud_python-0.5.13.dist-info/RECORD +305 -0
  226. {hud_python-0.4.45.dist-info → hud_python-0.5.13.dist-info}/WHEEL +1 -1
  227. hud/agents/langchain.py +0 -261
  228. hud/agents/lite_llm.py +0 -72
  229. hud/cli/rl/__init__.py +0 -180
  230. hud/cli/rl/config.py +0 -101
  231. hud/cli/rl/display.py +0 -133
  232. hud/cli/rl/gpu.py +0 -63
  233. hud/cli/rl/gpu_utils.py +0 -321
  234. hud/cli/rl/local_runner.py +0 -595
  235. hud/cli/rl/presets.py +0 -96
  236. hud/cli/rl/remote_runner.py +0 -463
  237. hud/cli/rl/rl_api.py +0 -150
  238. hud/cli/rl/vllm.py +0 -177
  239. hud/cli/rl/wait_utils.py +0 -89
  240. hud/datasets/parallel.py +0 -687
  241. hud/misc/__init__.py +0 -1
  242. hud/misc/claude_plays_pokemon.py +0 -292
  243. hud/otel/__init__.py +0 -35
  244. hud/otel/collector.py +0 -142
  245. hud/otel/config.py +0 -181
  246. hud/otel/context.py +0 -570
  247. hud/otel/exporters.py +0 -369
  248. hud/otel/instrumentation.py +0 -135
  249. hud/otel/processors.py +0 -121
  250. hud/otel/tests/__init__.py +0 -1
  251. hud/otel/tests/test_processors.py +0 -197
  252. hud/rl/README.md +0 -30
  253. hud/rl/__init__.py +0 -1
  254. hud/rl/actor.py +0 -176
  255. hud/rl/buffer.py +0 -405
  256. hud/rl/chat_template.jinja +0 -101
  257. hud/rl/config.py +0 -192
  258. hud/rl/distributed.py +0 -132
  259. hud/rl/learner.py +0 -637
  260. hud/rl/tests/__init__.py +0 -1
  261. hud/rl/tests/test_learner.py +0 -186
  262. hud/rl/train.py +0 -382
  263. hud/rl/types.py +0 -101
  264. hud/rl/utils/start_vllm_server.sh +0 -30
  265. hud/rl/utils.py +0 -524
  266. hud/rl/vllm_adapter.py +0 -143
  267. hud/telemetry/job.py +0 -352
  268. hud/telemetry/replay.py +0 -74
  269. hud/telemetry/tests/test_replay.py +0 -40
  270. hud/telemetry/tests/test_trace.py +0 -63
  271. hud/telemetry/trace.py +0 -158
  272. hud/utils/agent_factories.py +0 -86
  273. hud/utils/async_utils.py +0 -65
  274. hud/utils/group_eval.py +0 -223
  275. hud/utils/progress.py +0 -149
  276. hud/utils/tasks.py +0 -127
  277. hud/utils/tests/test_async_utils.py +0 -173
  278. hud/utils/tests/test_progress.py +0 -261
  279. hud_python-0.4.45.dist-info/METADATA +0 -552
  280. hud_python-0.4.45.dist-info/RECORD +0 -228
  281. {hud_python-0.4.45.dist-info → hud_python-0.5.13.dist-info}/entry_points.txt +0 -0
  282. {hud_python-0.4.45.dist-info → hud_python-0.5.13.dist-info}/licenses/LICENSE +0 -0
hud/telemetry/__init__.py CHANGED
@@ -1,26 +1,27 @@
1
- """HUD Telemetry - User-facing APIs for tracing and job management.
1
+ """HUD Telemetry - Lightweight telemetry for HUD SDK.
2
2
 
3
- This module provides the main telemetry APIs that users interact with:
4
- - trace: Context manager for tracing code execution
5
- - job: Context manager and utilities for job management
6
- - instrument: Decorator for instrumenting functions
7
- - get_trace: Retrieve collected traces for replay/analysis
8
- """
3
+ This module provides:
4
+ - @instrument decorator for recording function calls
5
+ - High-performance span export to HUD API
6
+
7
+ Usage:
8
+ import hud
9
9
 
10
- from __future__ import annotations
10
+ @hud.instrument
11
+ async def my_function():
12
+ ...
13
+
14
+ # Within an eval context, calls are recorded
15
+ async with hud.eval(task) as ctx:
16
+ result = await my_function()
17
+ """
11
18
 
12
- from .instrument import instrument
13
- from .job import Job, create_job, job
14
- from .replay import clear_trace, get_trace
15
- from .trace import Trace, trace
19
+ from hud.telemetry.exporter import flush, queue_span, shutdown
20
+ from hud.telemetry.instrument import instrument
16
21
 
17
22
  __all__ = [
18
- "Job",
19
- "Trace",
20
- "clear_trace",
21
- "create_job",
22
- "get_trace",
23
+ "flush",
23
24
  "instrument",
24
- "job",
25
- "trace",
25
+ "queue_span",
26
+ "shutdown",
26
27
  ]
@@ -0,0 +1,201 @@
1
+ """High-performance span exporter for HUD telemetry backend.
2
+
3
+ This module provides a lightweight span exporter that sends spans to the HUD
4
+ telemetry API immediately, using a thread pool to avoid blocking async code.
5
+
6
+ No OpenTelemetry dependency required.
7
+ """
8
+
9
+ from __future__ import annotations
10
+
11
+ import atexit
12
+ import concurrent.futures as cf
13
+ import contextlib
14
+ import logging
15
+ from collections import defaultdict
16
+ from concurrent.futures import ThreadPoolExecutor
17
+ from typing import Any
18
+
19
+ from hud.shared import make_request_sync
20
+
21
+ logger = logging.getLogger(__name__)
22
+
23
+ # Global singleton thread pool for span exports
24
+ _export_executor: ThreadPoolExecutor | None = None
25
+
26
+ # Pending futures for shutdown coordination
27
+ _pending_futures: list[cf.Future[bool]] = []
28
+
29
+ # Spans waiting to be flushed at context exit (per task_run_id)
30
+ _pending_spans: dict[str, list[dict[str, Any]]] = defaultdict(list)
31
+
32
+
33
+ def _get_export_executor() -> ThreadPoolExecutor:
34
+ """Get or create the global thread pool for span exports."""
35
+ global _export_executor
36
+ if _export_executor is None:
37
+ _export_executor = ThreadPoolExecutor(max_workers=4, thread_name_prefix="span-export")
38
+
39
+ def cleanup() -> None:
40
+ if _export_executor is not None:
41
+ _export_executor.shutdown(wait=True)
42
+
43
+ atexit.register(cleanup)
44
+ return _export_executor
45
+
46
+
47
+ def _do_upload(
48
+ task_run_id: str,
49
+ spans: list[dict[str, Any]],
50
+ telemetry_url: str,
51
+ api_key: str,
52
+ ) -> bool:
53
+ """Upload spans to HUD API (sync, runs in thread pool)."""
54
+ try:
55
+ url = f"{telemetry_url}/trace/{task_run_id}/telemetry-upload"
56
+ payload: dict[str, Any] = {"telemetry": spans}
57
+
58
+ logger.debug("Uploading %d spans to %s", len(spans), url)
59
+ make_request_sync(
60
+ method="POST",
61
+ url=url,
62
+ json=payload,
63
+ api_key=api_key,
64
+ )
65
+ return True
66
+ except Exception as e:
67
+ logger.debug("Failed to upload spans for task %s: %s", task_run_id, e)
68
+ return False
69
+
70
+
71
+ def _get_api_key() -> str | None:
72
+ """Get the API key - prefer context override, fallback to settings."""
73
+ from hud.eval.context import get_current_api_key
74
+ from hud.settings import settings
75
+
76
+ return get_current_api_key() or settings.api_key
77
+
78
+
79
+ def queue_span(span: dict[str, Any]) -> None:
80
+ """Queue a span and immediately upload it (non-blocking).
81
+
82
+ Uses thread pool to upload without blocking the event loop.
83
+ """
84
+ from hud.settings import settings
85
+
86
+ api_key = _get_api_key()
87
+ if not api_key or not settings.telemetry_enabled:
88
+ return
89
+
90
+ task_run_id = span.get("attributes", {}).get("task_run_id")
91
+ if not task_run_id:
92
+ return
93
+
94
+ # Store for potential re-flush at context exit
95
+ _pending_spans[task_run_id].append(span)
96
+
97
+ # Capture api_key for upload closure (context may change)
98
+ upload_api_key = api_key
99
+
100
+ # Upload immediately via thread pool
101
+ import asyncio
102
+
103
+ try:
104
+ loop = asyncio.get_running_loop()
105
+ # In async context - use thread pool
106
+ executor = _get_export_executor()
107
+
108
+ def _upload() -> bool:
109
+ return _do_upload(task_run_id, [span], settings.hud_telemetry_url, upload_api_key)
110
+
111
+ future = loop.run_in_executor(executor, _upload)
112
+ _pending_futures.append(future) # type: ignore[arg-type]
113
+
114
+ def _cleanup_done(f: cf.Future[bool]) -> None:
115
+ with contextlib.suppress(Exception):
116
+ _ = f.exception()
117
+ with contextlib.suppress(ValueError):
118
+ _pending_futures.remove(f)
119
+ # Remove from pending spans on success
120
+ if not f.exception():
121
+ with contextlib.suppress(Exception):
122
+ if task_run_id in _pending_spans and span in _pending_spans[task_run_id]:
123
+ _pending_spans[task_run_id].remove(span)
124
+
125
+ future.add_done_callback(_cleanup_done) # type: ignore[arg-type]
126
+
127
+ except RuntimeError:
128
+ # No event loop - upload synchronously
129
+ if _do_upload(task_run_id, [span], settings.hud_telemetry_url, upload_api_key):
130
+ with contextlib.suppress(Exception):
131
+ if task_run_id in _pending_spans and span in _pending_spans[task_run_id]:
132
+ _pending_spans[task_run_id].remove(span)
133
+
134
+
135
+ def flush(task_run_id: str | None = None) -> None:
136
+ """Flush any pending spans (called at context exit).
137
+
138
+ This ensures any spans that failed to upload are retried.
139
+
140
+ Args:
141
+ task_run_id: Optional task run ID to flush. If None, flushes all.
142
+ """
143
+ from hud.settings import settings
144
+
145
+ api_key = _get_api_key()
146
+ if not api_key or not settings.telemetry_enabled:
147
+ _pending_spans.clear()
148
+ return
149
+
150
+ if task_run_id:
151
+ # Flush specific task
152
+ spans = _pending_spans.pop(task_run_id, [])
153
+ if spans:
154
+ _do_upload(task_run_id, spans, settings.hud_telemetry_url, api_key)
155
+ else:
156
+ # Flush all
157
+ for tid, spans in list(_pending_spans.items()):
158
+ if spans:
159
+ _do_upload(tid, spans, settings.hud_telemetry_url, api_key)
160
+ _pending_spans.clear()
161
+
162
+
163
+ def shutdown(timeout: float = 10.0) -> bool:
164
+ """Shutdown and wait for pending exports.
165
+
166
+ Args:
167
+ timeout: Maximum time to wait in seconds
168
+
169
+ Returns:
170
+ True if all exports completed, False if timed out
171
+ """
172
+ # Wait for pending async exports
173
+ if _pending_futures:
174
+ try:
175
+ done, not_done = cf.wait(_pending_futures, timeout=timeout)
176
+ for f in done:
177
+ with contextlib.suppress(Exception):
178
+ _ = f.exception()
179
+ _pending_futures.clear()
180
+
181
+ # Flush any remaining spans synchronously
182
+ flush()
183
+
184
+ return len(not_done) == 0
185
+ except Exception:
186
+ return False
187
+
188
+ # Flush any remaining spans
189
+ flush()
190
+ return True
191
+
192
+
193
+ # Register shutdown handler
194
+ atexit.register(lambda: shutdown(timeout=5.0))
195
+
196
+
197
+ __all__ = [
198
+ "flush",
199
+ "queue_span",
200
+ "shutdown",
201
+ ]