hud-python 0.4.45__py3-none-any.whl → 0.5.13__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (282) hide show
  1. hud/__init__.py +27 -7
  2. hud/agents/__init__.py +70 -5
  3. hud/agents/base.py +238 -500
  4. hud/agents/claude.py +236 -247
  5. hud/agents/gateway.py +42 -0
  6. hud/agents/gemini.py +264 -0
  7. hud/agents/gemini_cua.py +324 -0
  8. hud/agents/grounded_openai.py +98 -100
  9. hud/agents/misc/integration_test_agent.py +51 -20
  10. hud/agents/misc/response_agent.py +48 -36
  11. hud/agents/openai.py +282 -296
  12. hud/agents/{openai_chat_generic.py → openai_chat.py} +63 -33
  13. hud/agents/operator.py +199 -0
  14. hud/agents/resolver.py +70 -0
  15. hud/agents/tests/conftest.py +133 -0
  16. hud/agents/tests/test_base.py +300 -622
  17. hud/agents/tests/test_base_runtime.py +233 -0
  18. hud/agents/tests/test_claude.py +381 -214
  19. hud/agents/tests/test_client.py +9 -10
  20. hud/agents/tests/test_gemini.py +369 -0
  21. hud/agents/tests/test_grounded_openai_agent.py +65 -50
  22. hud/agents/tests/test_openai.py +377 -140
  23. hud/agents/tests/test_operator.py +362 -0
  24. hud/agents/tests/test_resolver.py +192 -0
  25. hud/agents/tests/test_run_eval.py +179 -0
  26. hud/agents/types.py +148 -0
  27. hud/cli/__init__.py +493 -546
  28. hud/cli/analyze.py +43 -5
  29. hud/cli/build.py +699 -113
  30. hud/cli/debug.py +8 -5
  31. hud/cli/dev.py +889 -732
  32. hud/cli/eval.py +793 -667
  33. hud/cli/flows/dev.py +167 -0
  34. hud/cli/flows/init.py +191 -0
  35. hud/cli/flows/tasks.py +153 -56
  36. hud/cli/flows/templates.py +151 -0
  37. hud/cli/flows/tests/__init__.py +1 -0
  38. hud/cli/flows/tests/test_dev.py +126 -0
  39. hud/cli/init.py +60 -58
  40. hud/cli/pull.py +1 -1
  41. hud/cli/push.py +38 -13
  42. hud/cli/rft.py +311 -0
  43. hud/cli/rft_status.py +145 -0
  44. hud/cli/tests/test_analyze.py +5 -5
  45. hud/cli/tests/test_analyze_metadata.py +3 -2
  46. hud/cli/tests/test_analyze_module.py +120 -0
  47. hud/cli/tests/test_build.py +110 -8
  48. hud/cli/tests/test_build_failure.py +41 -0
  49. hud/cli/tests/test_build_module.py +50 -0
  50. hud/cli/tests/test_cli_init.py +6 -1
  51. hud/cli/tests/test_cli_more_wrappers.py +30 -0
  52. hud/cli/tests/test_cli_root.py +140 -0
  53. hud/cli/tests/test_convert.py +361 -0
  54. hud/cli/tests/test_debug.py +12 -10
  55. hud/cli/tests/test_dev.py +197 -0
  56. hud/cli/tests/test_eval.py +251 -0
  57. hud/cli/tests/test_eval_bedrock.py +51 -0
  58. hud/cli/tests/test_init.py +124 -0
  59. hud/cli/tests/test_main_module.py +11 -5
  60. hud/cli/tests/test_mcp_server.py +12 -100
  61. hud/cli/tests/test_push.py +1 -1
  62. hud/cli/tests/test_push_happy.py +74 -0
  63. hud/cli/tests/test_push_wrapper.py +23 -0
  64. hud/cli/tests/test_registry.py +1 -1
  65. hud/cli/tests/test_utils.py +1 -1
  66. hud/cli/{rl → utils}/celebrate.py +14 -12
  67. hud/cli/utils/config.py +18 -1
  68. hud/cli/utils/docker.py +130 -4
  69. hud/cli/utils/env_check.py +9 -9
  70. hud/cli/utils/git.py +136 -0
  71. hud/cli/utils/interactive.py +39 -5
  72. hud/cli/utils/metadata.py +70 -1
  73. hud/cli/utils/runner.py +1 -1
  74. hud/cli/utils/server.py +2 -2
  75. hud/cli/utils/source_hash.py +3 -3
  76. hud/cli/utils/tasks.py +4 -1
  77. hud/cli/utils/tests/__init__.py +0 -0
  78. hud/cli/utils/tests/test_config.py +58 -0
  79. hud/cli/utils/tests/test_docker.py +93 -0
  80. hud/cli/utils/tests/test_docker_hints.py +71 -0
  81. hud/cli/utils/tests/test_env_check.py +74 -0
  82. hud/cli/utils/tests/test_environment.py +42 -0
  83. hud/cli/utils/tests/test_git.py +142 -0
  84. hud/cli/utils/tests/test_interactive_module.py +60 -0
  85. hud/cli/utils/tests/test_local_runner.py +50 -0
  86. hud/cli/utils/tests/test_logging_utils.py +23 -0
  87. hud/cli/utils/tests/test_metadata.py +49 -0
  88. hud/cli/utils/tests/test_package_runner.py +35 -0
  89. hud/cli/utils/tests/test_registry_utils.py +49 -0
  90. hud/cli/utils/tests/test_remote_runner.py +25 -0
  91. hud/cli/utils/tests/test_runner_modules.py +52 -0
  92. hud/cli/utils/tests/test_source_hash.py +36 -0
  93. hud/cli/utils/tests/test_tasks.py +80 -0
  94. hud/cli/utils/version_check.py +258 -0
  95. hud/cli/{rl → utils}/viewer.py +2 -2
  96. hud/clients/README.md +12 -11
  97. hud/clients/__init__.py +4 -3
  98. hud/clients/base.py +166 -26
  99. hud/clients/environment.py +51 -0
  100. hud/clients/fastmcp.py +13 -6
  101. hud/clients/mcp_use.py +45 -15
  102. hud/clients/tests/test_analyze_scenarios.py +206 -0
  103. hud/clients/tests/test_protocol.py +9 -3
  104. hud/datasets/__init__.py +23 -20
  105. hud/datasets/loader.py +326 -0
  106. hud/datasets/runner.py +198 -105
  107. hud/datasets/tests/__init__.py +0 -0
  108. hud/datasets/tests/test_loader.py +221 -0
  109. hud/datasets/tests/test_utils.py +315 -0
  110. hud/datasets/utils.py +270 -90
  111. hud/environment/__init__.py +52 -0
  112. hud/environment/connection.py +258 -0
  113. hud/environment/connectors/__init__.py +33 -0
  114. hud/environment/connectors/base.py +68 -0
  115. hud/environment/connectors/local.py +177 -0
  116. hud/environment/connectors/mcp_config.py +137 -0
  117. hud/environment/connectors/openai.py +101 -0
  118. hud/environment/connectors/remote.py +172 -0
  119. hud/environment/environment.py +835 -0
  120. hud/environment/integrations/__init__.py +45 -0
  121. hud/environment/integrations/adk.py +67 -0
  122. hud/environment/integrations/anthropic.py +196 -0
  123. hud/environment/integrations/gemini.py +92 -0
  124. hud/environment/integrations/langchain.py +82 -0
  125. hud/environment/integrations/llamaindex.py +68 -0
  126. hud/environment/integrations/openai.py +238 -0
  127. hud/environment/mock.py +306 -0
  128. hud/environment/router.py +263 -0
  129. hud/environment/scenarios.py +620 -0
  130. hud/environment/tests/__init__.py +1 -0
  131. hud/environment/tests/test_connection.py +317 -0
  132. hud/environment/tests/test_connectors.py +205 -0
  133. hud/environment/tests/test_environment.py +593 -0
  134. hud/environment/tests/test_integrations.py +257 -0
  135. hud/environment/tests/test_local_connectors.py +242 -0
  136. hud/environment/tests/test_scenarios.py +1086 -0
  137. hud/environment/tests/test_tools.py +208 -0
  138. hud/environment/types.py +23 -0
  139. hud/environment/utils/__init__.py +35 -0
  140. hud/environment/utils/formats.py +215 -0
  141. hud/environment/utils/schema.py +171 -0
  142. hud/environment/utils/tool_wrappers.py +113 -0
  143. hud/eval/__init__.py +67 -0
  144. hud/eval/context.py +727 -0
  145. hud/eval/display.py +299 -0
  146. hud/eval/instrument.py +187 -0
  147. hud/eval/manager.py +533 -0
  148. hud/eval/parallel.py +268 -0
  149. hud/eval/task.py +372 -0
  150. hud/eval/tests/__init__.py +1 -0
  151. hud/eval/tests/test_context.py +178 -0
  152. hud/eval/tests/test_eval.py +210 -0
  153. hud/eval/tests/test_manager.py +152 -0
  154. hud/eval/tests/test_parallel.py +168 -0
  155. hud/eval/tests/test_task.py +291 -0
  156. hud/eval/types.py +65 -0
  157. hud/eval/utils.py +194 -0
  158. hud/patches/__init__.py +19 -0
  159. hud/patches/mcp_patches.py +308 -0
  160. hud/patches/warnings.py +54 -0
  161. hud/samples/browser.py +4 -4
  162. hud/server/__init__.py +2 -1
  163. hud/server/low_level.py +2 -1
  164. hud/server/router.py +164 -0
  165. hud/server/server.py +567 -80
  166. hud/server/tests/test_mcp_server_integration.py +11 -11
  167. hud/server/tests/test_mcp_server_more.py +1 -1
  168. hud/server/tests/test_server_extra.py +2 -0
  169. hud/settings.py +45 -3
  170. hud/shared/exceptions.py +36 -10
  171. hud/shared/hints.py +26 -1
  172. hud/shared/requests.py +15 -3
  173. hud/shared/tests/test_exceptions.py +40 -31
  174. hud/shared/tests/test_hints.py +167 -0
  175. hud/telemetry/__init__.py +20 -19
  176. hud/telemetry/exporter.py +201 -0
  177. hud/telemetry/instrument.py +165 -253
  178. hud/telemetry/tests/test_eval_telemetry.py +356 -0
  179. hud/telemetry/tests/test_exporter.py +258 -0
  180. hud/telemetry/tests/test_instrument.py +401 -0
  181. hud/tools/__init__.py +18 -2
  182. hud/tools/agent.py +223 -0
  183. hud/tools/apply_patch.py +639 -0
  184. hud/tools/base.py +54 -4
  185. hud/tools/bash.py +2 -2
  186. hud/tools/computer/__init__.py +36 -3
  187. hud/tools/computer/anthropic.py +2 -2
  188. hud/tools/computer/gemini.py +385 -0
  189. hud/tools/computer/hud.py +23 -6
  190. hud/tools/computer/openai.py +20 -21
  191. hud/tools/computer/qwen.py +434 -0
  192. hud/tools/computer/settings.py +37 -0
  193. hud/tools/edit.py +3 -7
  194. hud/tools/executors/base.py +4 -2
  195. hud/tools/executors/pyautogui.py +1 -1
  196. hud/tools/grounding/grounded_tool.py +13 -18
  197. hud/tools/grounding/grounder.py +10 -31
  198. hud/tools/grounding/tests/test_grounded_tool.py +26 -44
  199. hud/tools/jupyter.py +330 -0
  200. hud/tools/playwright.py +18 -3
  201. hud/tools/shell.py +308 -0
  202. hud/tools/tests/test_agent_tool.py +355 -0
  203. hud/tools/tests/test_apply_patch.py +718 -0
  204. hud/tools/tests/test_computer.py +4 -9
  205. hud/tools/tests/test_computer_actions.py +24 -2
  206. hud/tools/tests/test_jupyter_tool.py +181 -0
  207. hud/tools/tests/test_shell.py +596 -0
  208. hud/tools/tests/test_submit.py +85 -0
  209. hud/tools/tests/test_types.py +193 -0
  210. hud/tools/types.py +21 -1
  211. hud/types.py +194 -56
  212. hud/utils/__init__.py +2 -0
  213. hud/utils/env.py +67 -0
  214. hud/utils/hud_console.py +89 -18
  215. hud/utils/mcp.py +15 -58
  216. hud/utils/strict_schema.py +162 -0
  217. hud/utils/tests/test_init.py +1 -2
  218. hud/utils/tests/test_mcp.py +1 -28
  219. hud/utils/tests/test_pretty_errors.py +186 -0
  220. hud/utils/tests/test_tool_shorthand.py +154 -0
  221. hud/utils/tests/test_version.py +1 -1
  222. hud/utils/types.py +20 -0
  223. hud/version.py +1 -1
  224. hud_python-0.5.13.dist-info/METADATA +264 -0
  225. hud_python-0.5.13.dist-info/RECORD +305 -0
  226. {hud_python-0.4.45.dist-info → hud_python-0.5.13.dist-info}/WHEEL +1 -1
  227. hud/agents/langchain.py +0 -261
  228. hud/agents/lite_llm.py +0 -72
  229. hud/cli/rl/__init__.py +0 -180
  230. hud/cli/rl/config.py +0 -101
  231. hud/cli/rl/display.py +0 -133
  232. hud/cli/rl/gpu.py +0 -63
  233. hud/cli/rl/gpu_utils.py +0 -321
  234. hud/cli/rl/local_runner.py +0 -595
  235. hud/cli/rl/presets.py +0 -96
  236. hud/cli/rl/remote_runner.py +0 -463
  237. hud/cli/rl/rl_api.py +0 -150
  238. hud/cli/rl/vllm.py +0 -177
  239. hud/cli/rl/wait_utils.py +0 -89
  240. hud/datasets/parallel.py +0 -687
  241. hud/misc/__init__.py +0 -1
  242. hud/misc/claude_plays_pokemon.py +0 -292
  243. hud/otel/__init__.py +0 -35
  244. hud/otel/collector.py +0 -142
  245. hud/otel/config.py +0 -181
  246. hud/otel/context.py +0 -570
  247. hud/otel/exporters.py +0 -369
  248. hud/otel/instrumentation.py +0 -135
  249. hud/otel/processors.py +0 -121
  250. hud/otel/tests/__init__.py +0 -1
  251. hud/otel/tests/test_processors.py +0 -197
  252. hud/rl/README.md +0 -30
  253. hud/rl/__init__.py +0 -1
  254. hud/rl/actor.py +0 -176
  255. hud/rl/buffer.py +0 -405
  256. hud/rl/chat_template.jinja +0 -101
  257. hud/rl/config.py +0 -192
  258. hud/rl/distributed.py +0 -132
  259. hud/rl/learner.py +0 -637
  260. hud/rl/tests/__init__.py +0 -1
  261. hud/rl/tests/test_learner.py +0 -186
  262. hud/rl/train.py +0 -382
  263. hud/rl/types.py +0 -101
  264. hud/rl/utils/start_vllm_server.sh +0 -30
  265. hud/rl/utils.py +0 -524
  266. hud/rl/vllm_adapter.py +0 -143
  267. hud/telemetry/job.py +0 -352
  268. hud/telemetry/replay.py +0 -74
  269. hud/telemetry/tests/test_replay.py +0 -40
  270. hud/telemetry/tests/test_trace.py +0 -63
  271. hud/telemetry/trace.py +0 -158
  272. hud/utils/agent_factories.py +0 -86
  273. hud/utils/async_utils.py +0 -65
  274. hud/utils/group_eval.py +0 -223
  275. hud/utils/progress.py +0 -149
  276. hud/utils/tasks.py +0 -127
  277. hud/utils/tests/test_async_utils.py +0 -173
  278. hud/utils/tests/test_progress.py +0 -261
  279. hud_python-0.4.45.dist-info/METADATA +0 -552
  280. hud_python-0.4.45.dist-info/RECORD +0 -228
  281. {hud_python-0.4.45.dist-info → hud_python-0.5.13.dist-info}/entry_points.txt +0 -0
  282. {hud_python-0.4.45.dist-info → hud_python-0.5.13.dist-info}/licenses/LICENSE +0 -0
hud/telemetry/job.py DELETED
@@ -1,352 +0,0 @@
1
- """Job management for HUD SDK.
2
-
3
- This module provides APIs for managing jobs - logical groupings of related tasks.
4
- Jobs can be used to track experiments, batch processing, training runs, etc.
5
- """
6
-
7
- from __future__ import annotations
8
-
9
- import asyncio
10
- import logging
11
- import uuid
12
- from contextlib import contextmanager
13
- from datetime import UTC, datetime
14
- from functools import wraps
15
- from typing import TYPE_CHECKING, Any
16
-
17
- from hud.settings import settings
18
- from hud.shared import make_request, make_request_sync
19
-
20
- if TYPE_CHECKING:
21
- from collections.abc import Callable, Generator
22
-
23
- logger = logging.getLogger(__name__)
24
-
25
-
26
- class Job:
27
- """A job represents a collection of related tasks."""
28
-
29
- def __init__(
30
- self,
31
- job_id: str,
32
- name: str,
33
- metadata: dict[str, Any] | None = None,
34
- dataset_link: str | None = None,
35
- ) -> None:
36
- self.id = job_id
37
- self.name = name
38
- self.metadata = metadata or {}
39
- self.dataset_link = dataset_link
40
- self.status = "created"
41
- self.created_at = datetime.now(UTC)
42
- self.tasks: list[str] = []
43
-
44
- def add_task(self, task_id: str) -> None:
45
- """Associate a task with this job."""
46
- self.tasks.append(task_id)
47
-
48
- async def update_status(self, status: str) -> None:
49
- """Update job status on the server."""
50
- self.status = status
51
- if settings.telemetry_enabled:
52
- try:
53
- payload = {
54
- "name": self.name,
55
- "status": status,
56
- "metadata": self.metadata,
57
- }
58
- if self.dataset_link:
59
- payload["dataset_link"] = self.dataset_link
60
-
61
- await make_request(
62
- method="POST",
63
- url=f"{settings.hud_telemetry_url}/jobs/{self.id}/status",
64
- json=payload,
65
- api_key=settings.api_key,
66
- )
67
- except Exception as e:
68
- logger.warning("Failed to update job status: %s", e)
69
-
70
- def update_status_sync(self, status: str) -> None:
71
- """Synchronously update job status on the server."""
72
- self.status = status
73
- if settings.telemetry_enabled:
74
- try:
75
- payload = {
76
- "name": self.name,
77
- "status": status,
78
- "metadata": self.metadata,
79
- }
80
- if self.dataset_link:
81
- payload["dataset_link"] = self.dataset_link
82
-
83
- make_request_sync(
84
- method="POST",
85
- url=f"{settings.hud_telemetry_url}/jobs/{self.id}/status",
86
- json=payload,
87
- api_key=settings.api_key,
88
- )
89
- except Exception as e:
90
- logger.warning("Failed to update job status: %s", e)
91
-
92
- async def log(self, metrics: dict[str, Any]) -> None:
93
- """Log metrics to the job.
94
-
95
- Args:
96
- metrics: Dictionary of metric name to value pairs
97
-
98
- Example:
99
- await job.log({"loss": 0.5, "accuracy": 0.95, "epoch": 1})
100
- """
101
- if settings.telemetry_enabled:
102
- try:
103
- await make_request(
104
- method="POST",
105
- url=f"{settings.hud_telemetry_url}/jobs/{self.id}/log",
106
- json={"metrics": metrics, "timestamp": datetime.now(UTC).isoformat()},
107
- api_key=settings.api_key,
108
- )
109
- except Exception as e:
110
- logger.warning("Failed to log metrics to job: %s", e)
111
-
112
- def log_sync(self, metrics: dict[str, Any]) -> None:
113
- """Synchronously log metrics to the job.
114
-
115
- Args:
116
- metrics: Dictionary of metric name to value pairs
117
-
118
- Example:
119
- job.log_sync({"loss": 0.5, "accuracy": 0.95, "epoch": 1})
120
- """
121
- if settings.telemetry_enabled:
122
- try:
123
- make_request_sync(
124
- method="POST",
125
- url=f"{settings.hud_telemetry_url}/jobs/{self.id}/log",
126
- json={"metrics": metrics, "timestamp": datetime.now(UTC).isoformat()},
127
- api_key=settings.api_key,
128
- )
129
- except Exception as e:
130
- logger.warning("Failed to log metrics to job: %s", e)
131
-
132
- def __repr__(self) -> str:
133
- return f"Job(id={self.id!r}, name={self.name!r}, status={self.status!r})"
134
-
135
-
136
- # Global job registry for the decorator pattern
137
- _current_job: Job | None = None
138
-
139
-
140
- def _print_job_url(job_id: str, job_name: str) -> None:
141
- """Print the job URL in a colorful box."""
142
- # Only print HUD URL if HUD telemetry is enabled and has API key
143
- if not (settings.telemetry_enabled and settings.api_key):
144
- return
145
-
146
- url = f"https://hud.so/jobs/{job_id}"
147
- header = f"🚀 Job '{job_name}' started:"
148
-
149
- # ANSI color codes
150
- DIM = "\033[90m" # Dim/Gray for border
151
- GOLD = "\033[33m" # Gold/Yellow for URL
152
- RESET = "\033[0m"
153
- BOLD = "\033[1m"
154
-
155
- # Calculate box width based on the longest line
156
- box_width = max(len(url), len(header)) + 6
157
-
158
- # Box drawing characters
159
- top_border = "╔" + "═" * (box_width - 2) + "╗"
160
- bottom_border = "╚" + "═" * (box_width - 2) + "╝"
161
- divider = "╟" + "─" * (box_width - 2) + "╢"
162
-
163
- # Center the content
164
- header_padding = (box_width - len(header) - 2) // 2
165
- url_padding = (box_width - len(url) - 2) // 2
166
-
167
- # Print the box
168
- print(f"\n{DIM}{top_border}{RESET}") # noqa: T201
169
- print( # noqa: T201
170
- f"{DIM}║{RESET}{' ' * header_padding}{header}{' ' * (box_width - len(header) - header_padding - 3)}{DIM}║{RESET}" # noqa: E501
171
- )
172
- print(f"{DIM}{divider}{RESET}") # noqa: T201
173
- print( # noqa: T201
174
- f"{DIM}║{RESET}{' ' * url_padding}{BOLD}{GOLD}{url}{RESET}{' ' * (box_width - len(url) - url_padding - 2)}{DIM}║{RESET}" # noqa: E501
175
- )
176
- print(f"{DIM}{bottom_border}{RESET}\n") # noqa: T201
177
-
178
-
179
- def _print_job_complete_url(job_id: str, job_name: str, error_occurred: bool = False) -> None:
180
- """Print the job completion URL with appropriate messaging."""
181
- # Only print HUD URL if HUD telemetry is enabled and has API key
182
- if not (settings.telemetry_enabled and settings.api_key):
183
- return
184
-
185
- url = f"https://hud.so/jobs/{job_id}"
186
-
187
- # ANSI color codes
188
- GREEN = "\033[92m"
189
- RED = "\033[91m"
190
- GOLD = "\033[33m"
191
- RESET = "\033[0m"
192
- DIM = "\033[2m"
193
- BOLD = "\033[1m"
194
-
195
- if error_occurred:
196
- print( # noqa: T201
197
- f"\n{RED}✗ Job '{job_name}' failed!{RESET} {DIM}View details at:{RESET} {BOLD}{GOLD}{url}{RESET}\n" # noqa: E501
198
- )
199
- else:
200
- print( # noqa: T201
201
- f"\n{GREEN}✓ Job '{job_name}' complete!{RESET} {DIM}View all results at:{RESET} {BOLD}{GOLD}{url}{RESET}\n" # noqa: E501
202
- )
203
-
204
-
205
- def get_current_job() -> Job | None:
206
- """Get the currently active job, if any."""
207
- return _current_job
208
-
209
-
210
- @contextmanager
211
- def job(
212
- name: str,
213
- metadata: dict[str, Any] | None = None,
214
- job_id: str | None = None,
215
- dataset_link: str | None = None,
216
- ) -> Generator[Job, None, None]:
217
- """Context manager for job tracking.
218
-
219
- Groups related tasks together under a single job for tracking and organization.
220
-
221
- Args:
222
- name: Human-readable job name
223
- metadata: Optional metadata dictionary
224
- job_id: Optional job ID (auto-generated if not provided)
225
- dataset_link: Optional HuggingFace dataset identifier (e.g. "hud-evals/SheetBench-50")
226
-
227
- Yields:
228
- Job: The job object
229
-
230
- Example:
231
- with hud.job("training_run", {"model": "gpt-4"}) as job:
232
- for epoch in range(10):
233
- with hud.trace(f"epoch_{epoch}", job_id=job.id):
234
- train_epoch()
235
- """
236
- global _current_job
237
-
238
- if not job_id:
239
- job_id = str(uuid.uuid4())
240
-
241
- job_obj = Job(job_id, name, metadata, dataset_link)
242
-
243
- # Set as current job
244
- old_job = _current_job
245
- _current_job = job_obj
246
-
247
- try:
248
- # Update status to running synchronously to ensure job is registered before tasks start
249
- job_obj.update_status_sync("running")
250
- # Print the nice job URL box
251
- _print_job_url(job_obj.id, job_obj.name)
252
- yield job_obj
253
- # Update status to completed synchronously to ensure it completes before process exit
254
- job_obj.update_status_sync("completed")
255
- # Print job completion message
256
- _print_job_complete_url(job_obj.id, job_obj.name, error_occurred=False)
257
- except Exception:
258
- # Update status to failed synchronously to ensure it completes before process exit
259
- job_obj.update_status_sync("failed")
260
- # Print job failure message
261
- _print_job_complete_url(job_obj.id, job_obj.name, error_occurred=True)
262
- raise
263
- finally:
264
- _current_job = old_job
265
-
266
-
267
- def create_job(
268
- name: str,
269
- metadata: dict[str, Any] | None = None,
270
- dataset_link: str | None = None,
271
- job_id: str | None = None,
272
- ) -> Job:
273
- """Create a job without using context manager.
274
-
275
- Useful when you need explicit control over job lifecycle.
276
-
277
- Args:
278
- name: Human-readable job name
279
- metadata: Optional metadata dictionary
280
- dataset_link: Optional HuggingFace dataset identifier (e.g. "hud-evals/SheetBench-50")
281
- job_id: Optional job ID (auto-generated if not provided)
282
- Returns:
283
- Job: The created job object
284
-
285
- Example:
286
- job = hud.create_job("data_processing")
287
- try:
288
- for item in items:
289
- with hud.trace(f"process_{item.id}", job_id=job.id):
290
- process(item)
291
- finally:
292
- await job.update_status("completed")
293
- """
294
- job_id = job_id or str(uuid.uuid4())
295
- return Job(job_id, name, metadata, dataset_link)
296
-
297
-
298
- def job_decorator(name: str | None = None, **metadata: Any) -> Callable:
299
- """Decorator for functions that should be tracked as jobs.
300
-
301
- Args:
302
- name: Job name (defaults to function name)
303
- **metadata: Additional metadata for the job
304
-
305
- Example:
306
- @hud.job_decorator("model_training", model="gpt-4", dataset="v2")
307
- async def train_model(config):
308
- # This entire function execution is tracked as a job
309
- await model.train(config)
310
- return model.evaluate()
311
- """
312
-
313
- def decorator(func: Callable) -> Callable:
314
- job_name = name or func.__name__
315
-
316
- @wraps(func)
317
- async def async_wrapper(*args: Any, **kwargs: Any) -> Any:
318
- with job(job_name, metadata) as job_obj:
319
- # Store job ID in function for access
320
- func._current_job_id = job_obj.id
321
- try:
322
- return await func(*args, **kwargs)
323
- finally:
324
- delattr(func, "_current_job_id")
325
-
326
- @wraps(func)
327
- def sync_wrapper(*args: Any, **kwargs: Any) -> Any:
328
- with job(job_name, metadata) as job_obj:
329
- # Store job ID in function for access
330
- func._current_job_id = job_obj.id
331
- try:
332
- return func(*args, **kwargs)
333
- finally:
334
- delattr(func, "_current_job_id")
335
-
336
- # Return appropriate wrapper based on function type
337
- if asyncio.iscoroutinefunction(func):
338
- return async_wrapper
339
- else:
340
- return sync_wrapper
341
-
342
- return decorator
343
-
344
-
345
- # Convenience exports
346
- __all__ = [
347
- "Job",
348
- "create_job",
349
- "get_current_job",
350
- "job",
351
- "job_decorator",
352
- ]
hud/telemetry/replay.py DELETED
@@ -1,74 +0,0 @@
1
- """Trace retrieval and replay functionality.
2
-
3
- This module provides APIs to retrieve collected traces for analysis,
4
- debugging, and replay purposes.
5
- """
6
-
7
- from __future__ import annotations
8
-
9
- from typing import TYPE_CHECKING
10
-
11
- from hud.otel.collector import clear_trace as _clear_trace
12
- from hud.otel.collector import get_trace as _get_trace
13
-
14
- if TYPE_CHECKING:
15
- from hud.types import Trace
16
-
17
- __all__ = ["clear_trace", "get_trace"]
18
-
19
-
20
- def get_trace(task_run_id: str) -> Trace | None:
21
- """Retrieve the collected trace for a task run.
22
-
23
- Returns None if trace collection was disabled or the trace doesn't exist.
24
-
25
- Args:
26
- task_run_id: The task run ID to retrieve the trace for
27
-
28
- Returns:
29
- Trace object containing all collected steps, or None if not found
30
-
31
- Usage:
32
- import hud
33
-
34
- # Run agent with tracing
35
- with hud.trace() as task_run_id:
36
- agent = MyAgent()
37
- result = await agent.run("solve task")
38
-
39
- # Get the trace for analysis
40
- trace = hud.get_trace(task_run_id)
41
- if trace:
42
- print(f"Collected {len(trace.trace)} steps")
43
-
44
- # Analyze agent vs MCP steps
45
- agent_steps = [s for s in trace.trace if s.category == "agent"]
46
- mcp_steps = [s for s in trace.trace if s.category == "mcp"]
47
-
48
- print(f"Agent steps: {len(agent_steps)}")
49
- print(f"MCP steps: {len(mcp_steps)}")
50
-
51
- # Replay or analyze individual steps
52
- for step in trace.trace:
53
- if step.category == "agent" and step.result:
54
- print(f"Agent: {step.result.get('content') if isinstance(step.result, dict) else step.result}")
55
- if step.category == "mcp" and step.request:
56
- print(f"MCP: {step.request.method if hasattr(step.request, 'method') else step.request}")
57
- """ # noqa: E501
58
- return _get_trace(task_run_id)
59
-
60
-
61
- def clear_trace(task_run_id: str) -> None:
62
- """Clear the collected trace for a task run ID.
63
-
64
- Useful for cleaning up memory after processing large traces.
65
-
66
- Args:
67
- task_run_id: The task run ID to clear the trace for
68
-
69
- Usage:
70
- trace = hud.get_trace(task_run_id)
71
- # Process trace...
72
- hud.clear_trace(task_run_id) # Free memory
73
- """
74
- _clear_trace(task_run_id)
@@ -1,40 +0,0 @@
1
- """Tests for telemetry replay functionality."""
2
-
3
- from __future__ import annotations
4
-
5
- from unittest.mock import patch
6
-
7
- from hud.telemetry.replay import clear_trace, get_trace
8
-
9
-
10
- class TestReplayAPI:
11
- """Tests for replay API functions."""
12
-
13
- def test_get_trace_calls_internal(self):
14
- """Test that get_trace calls the internal _get_trace function."""
15
- with patch("hud.telemetry.replay._get_trace") as mock_get:
16
- mock_get.return_value = None
17
-
18
- result = get_trace("test-task-id")
19
-
20
- mock_get.assert_called_once_with("test-task-id")
21
- assert result is None
22
-
23
- def test_clear_trace_calls_internal(self):
24
- """Test that clear_trace calls the internal _clear_trace function."""
25
- with patch("hud.telemetry.replay._clear_trace") as mock_clear:
26
- clear_trace("test-task-id")
27
-
28
- mock_clear.assert_called_once_with("test-task-id")
29
-
30
- def test_get_trace_with_data(self):
31
- """Test get_trace with mock data."""
32
- mock_trace = {"trace": [{"step": 1}], "task_run_id": "test-123"}
33
-
34
- with patch("hud.telemetry.replay._get_trace") as mock_get:
35
- mock_get.return_value = mock_trace
36
-
37
- result = get_trace("test-123")
38
-
39
- assert result == mock_trace
40
- mock_get.assert_called_once_with("test-123")
@@ -1,63 +0,0 @@
1
- """Tests for telemetry trace functionality."""
2
-
3
- from __future__ import annotations
4
-
5
- from unittest.mock import patch
6
-
7
- from hud.telemetry.trace import trace
8
-
9
-
10
- class TestTraceAPI:
11
- """Tests for trace API function."""
12
-
13
- def test_trace_with_disabled_telemetry_and_no_api_key(self):
14
- """Test trace behavior when telemetry is disabled and no API key."""
15
- # Mock settings to disable telemetry and remove API key
16
- mock_settings = type("Settings", (), {"telemetry_enabled": False, "api_key": None})()
17
-
18
- with (
19
- patch("hud.settings.get_settings", return_value=mock_settings),
20
- patch("hud.telemetry.trace.OtelTrace") as mock_otel_trace,
21
- ):
22
- mock_otel_trace.return_value.__enter__.return_value = "custom-otlp-trace"
23
-
24
- with trace("test-trace") as task_run_id:
25
- # Should use placeholder ID for custom backends
26
- assert task_run_id.id == "custom-otlp-trace"
27
-
28
- def test_trace_with_enabled_telemetry_and_api_key(self):
29
- """Test trace behavior when telemetry is enabled with API key."""
30
- mock_settings = type("Settings", (), {"telemetry_enabled": True, "api_key": "test-key"})()
31
-
32
- with (
33
- patch("hud.settings.get_settings", return_value=mock_settings),
34
- patch("hud.telemetry.trace.OtelTrace") as mock_otel_trace,
35
- patch("hud.telemetry.trace.uuid.uuid4") as mock_uuid,
36
- ):
37
- mock_uuid.return_value = "mock-uuid-123"
38
- mock_otel_trace.return_value.__enter__.return_value = "mock-uuid-123"
39
-
40
- with trace("test-trace") as task_run_id:
41
- # Should use generated UUID
42
- assert task_run_id.id == "mock-uuid-123"
43
-
44
- def test_trace_with_no_api_key(self):
45
- """Test trace behavior with no API key (custom backend scenario)."""
46
- mock_settings = type(
47
- "Settings",
48
- (),
49
- {
50
- "telemetry_enabled": True, # Enabled but no API key
51
- "api_key": None,
52
- },
53
- )()
54
-
55
- with (
56
- patch("hud.settings.get_settings", return_value=mock_settings),
57
- patch("hud.telemetry.trace.OtelTrace") as mock_otel_trace,
58
- ):
59
- mock_otel_trace.return_value.__enter__.return_value = "custom-otlp-trace"
60
-
61
- with trace("test-trace") as task_run_id:
62
- # Should use custom backend placeholder
63
- assert task_run_id.id == "custom-otlp-trace"
hud/telemetry/trace.py DELETED
@@ -1,158 +0,0 @@
1
- """User-facing trace context manager for HUD telemetry.
2
-
3
- This module provides the simple trace() API that users interact with.
4
- The actual OpenTelemetry implementation is in hud.otel.
5
- """
6
-
7
- from __future__ import annotations
8
-
9
- import logging
10
- import uuid
11
- from contextlib import contextmanager
12
- from datetime import UTC, datetime
13
- from typing import TYPE_CHECKING, Any
14
-
15
- from hud.otel import configure_telemetry
16
- from hud.otel import trace as OtelTrace
17
- from hud.settings import settings
18
- from hud.shared import make_request, make_request_sync
19
-
20
- if TYPE_CHECKING:
21
- from collections.abc import Generator
22
-
23
- logger = logging.getLogger(__name__)
24
-
25
- __all__ = ["Trace", "trace"]
26
-
27
-
28
- class Trace:
29
- """A trace represents a single task execution with telemetry."""
30
-
31
- def __init__(
32
- self,
33
- trace_id: str,
34
- name: str,
35
- job_id: str | None = None,
36
- task_id: str | None = None,
37
- ) -> None:
38
- self.id = trace_id
39
- self.name = name
40
- self.job_id = job_id
41
- self.task_id = task_id
42
- self.created_at = datetime.now(UTC)
43
-
44
- async def log(self, metrics: dict[str, Any]) -> None:
45
- """Log metrics to this trace.
46
-
47
- Args:
48
- metrics: Dictionary of metric name to value pairs
49
-
50
- Example:
51
- await trace.log({"step": 1, "loss": 0.5, "accuracy": 0.92})
52
- """
53
- if settings.telemetry_enabled:
54
- try:
55
- await make_request(
56
- method="POST",
57
- url=f"{settings.hud_telemetry_url}/traces/{self.id}/log",
58
- json={"metrics": metrics, "timestamp": datetime.now(UTC).isoformat()},
59
- api_key=settings.api_key,
60
- )
61
- except Exception as e:
62
- logger.warning("Failed to log metrics to trace: %s", e)
63
-
64
- def log_sync(self, metrics: dict[str, Any]) -> None:
65
- """Synchronously log metrics to this trace.
66
-
67
- Args:
68
- metrics: Dictionary of metric name to value pairs
69
-
70
- Example:
71
- trace.log_sync({"step": 1, "loss": 0.5, "accuracy": 0.92})
72
- """
73
- if settings.telemetry_enabled:
74
- try:
75
- make_request_sync(
76
- method="POST",
77
- url=f"{settings.hud_telemetry_url}/traces/{self.id}/log",
78
- json={"metrics": metrics, "timestamp": datetime.now(UTC).isoformat()},
79
- api_key=settings.api_key,
80
- )
81
- except Exception as e:
82
- logger.warning("Failed to log metrics to trace: %s", e)
83
-
84
- def __repr__(self) -> str:
85
- return f"Trace(id={self.id!r}, name={self.name!r})"
86
-
87
-
88
- @contextmanager
89
- def trace(
90
- name: str = "Test task from hud",
91
- *,
92
- root: bool = True,
93
- attrs: dict[str, Any] | None = None,
94
- job_id: str | None = None,
95
- task_id: str | None = None,
96
- ) -> Generator[Trace, None, None]:
97
- """Start a HUD trace context.
98
-
99
- A unique task_run_id is automatically generated for each trace.
100
-
101
- Args:
102
- name: Descriptive name for this trace/task
103
- root: Whether this is a root trace (updates task status)
104
- attrs: Additional attributes to attach to the trace
105
- job_id: Optional job ID to associate with this trace
106
- task_id: Optional task ID (for custom task identifiers)
107
-
108
- Yields:
109
- Trace: The trace object with logging capabilities
110
-
111
- Usage:
112
- import hud
113
-
114
- # Basic usage
115
- with hud.trace("My Task") as trace:
116
- # Your code here
117
- trace.log_sync({"step": 1, "progress": 0.5})
118
-
119
- # Async logging
120
- async with hud.trace("Async Task") as trace:
121
- await trace.log({"loss": 0.23, "accuracy": 0.95})
122
-
123
- # With job association
124
- with hud.job("Training Run") as job:
125
- with hud.trace("Epoch 1", job_id=job.id) as trace:
126
- trace.log_sync({"epoch": 1, "loss": 0.5})
127
- """
128
- # Ensure telemetry is configured
129
- configure_telemetry()
130
-
131
- # Only generate task_run_id if using HUD backend
132
- # For custom OTLP backends, we don't need it
133
- from hud.settings import get_settings
134
-
135
- settings = get_settings()
136
-
137
- if settings.telemetry_enabled and settings.api_key:
138
- task_run_id = str(uuid.uuid4())
139
- else:
140
- # Use a placeholder for custom backends
141
- logger.warning(
142
- "HUD API key is not set, using a placeholder for the task run ID. If this looks wrong, check your API key." # noqa: E501
143
- )
144
- task_run_id = str(uuid.uuid4())
145
-
146
- # Create trace object
147
- trace_obj = Trace(task_run_id, name, job_id, task_id)
148
-
149
- # Delegate to OpenTelemetry implementation
150
- with OtelTrace(
151
- task_run_id,
152
- is_root=root,
153
- span_name=name,
154
- attributes=attrs or {},
155
- job_id=job_id,
156
- task_id=task_id,
157
- ):
158
- yield trace_obj