hud-python 0.4.51__py3-none-any.whl → 0.4.53__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of hud-python might be problematic. Click here for more details.

Files changed (88) hide show
  1. hud/__init__.py +13 -1
  2. hud/agents/base.py +14 -3
  3. hud/agents/lite_llm.py +1 -1
  4. hud/agents/openai_chat_generic.py +15 -3
  5. hud/agents/tests/test_base.py +9 -2
  6. hud/agents/tests/test_base_runtime.py +164 -0
  7. hud/cli/__init__.py +18 -25
  8. hud/cli/build.py +35 -27
  9. hud/cli/dev.py +11 -29
  10. hud/cli/eval.py +114 -145
  11. hud/cli/tests/test_analyze_module.py +120 -0
  12. hud/cli/tests/test_build.py +26 -3
  13. hud/cli/tests/test_build_failure.py +41 -0
  14. hud/cli/tests/test_build_module.py +50 -0
  15. hud/cli/tests/test_cli_more_wrappers.py +30 -0
  16. hud/cli/tests/test_cli_root.py +134 -0
  17. hud/cli/tests/test_eval.py +4 -0
  18. hud/cli/tests/test_mcp_server.py +8 -7
  19. hud/cli/tests/test_push_happy.py +74 -0
  20. hud/cli/tests/test_push_wrapper.py +23 -0
  21. hud/cli/utils/docker.py +120 -1
  22. hud/cli/utils/runner.py +1 -1
  23. hud/cli/utils/tasks.py +4 -1
  24. hud/cli/utils/tests/__init__.py +0 -0
  25. hud/cli/utils/tests/test_config.py +58 -0
  26. hud/cli/utils/tests/test_docker.py +93 -0
  27. hud/cli/utils/tests/test_docker_hints.py +71 -0
  28. hud/cli/utils/tests/test_env_check.py +74 -0
  29. hud/cli/utils/tests/test_environment.py +42 -0
  30. hud/cli/utils/tests/test_interactive_module.py +60 -0
  31. hud/cli/utils/tests/test_local_runner.py +50 -0
  32. hud/cli/utils/tests/test_logging_utils.py +23 -0
  33. hud/cli/utils/tests/test_metadata.py +49 -0
  34. hud/cli/utils/tests/test_package_runner.py +35 -0
  35. hud/cli/utils/tests/test_registry_utils.py +49 -0
  36. hud/cli/utils/tests/test_remote_runner.py +25 -0
  37. hud/cli/utils/tests/test_runner_modules.py +52 -0
  38. hud/cli/utils/tests/test_source_hash.py +36 -0
  39. hud/cli/utils/tests/test_tasks.py +80 -0
  40. hud/cli/utils/version_check.py +257 -0
  41. hud/clients/base.py +1 -1
  42. hud/clients/mcp_use.py +3 -1
  43. hud/datasets/parallel.py +2 -2
  44. hud/datasets/runner.py +85 -24
  45. hud/datasets/tests/__init__.py +0 -0
  46. hud/datasets/tests/test_runner.py +106 -0
  47. hud/datasets/tests/test_utils.py +228 -0
  48. hud/otel/config.py +8 -6
  49. hud/otel/context.py +4 -4
  50. hud/otel/exporters.py +231 -57
  51. hud/otel/tests/__init__.py +0 -1
  52. hud/otel/tests/test_instrumentation.py +207 -0
  53. hud/rl/learner.py +1 -1
  54. hud/server/tests/test_server_extra.py +2 -0
  55. hud/shared/exceptions.py +35 -9
  56. hud/shared/hints.py +25 -0
  57. hud/shared/requests.py +15 -3
  58. hud/shared/tests/test_exceptions.py +39 -30
  59. hud/shared/tests/test_hints.py +167 -0
  60. hud/telemetry/__init__.py +30 -6
  61. hud/telemetry/async_context.py +331 -0
  62. hud/telemetry/job.py +51 -12
  63. hud/telemetry/tests/test_async_context.py +242 -0
  64. hud/telemetry/tests/test_instrument.py +414 -0
  65. hud/telemetry/tests/test_job.py +609 -0
  66. hud/telemetry/tests/test_trace.py +184 -6
  67. hud/telemetry/trace.py +16 -17
  68. hud/tools/computer/qwen.py +4 -1
  69. hud/tools/computer/settings.py +2 -2
  70. hud/tools/executors/base.py +4 -2
  71. hud/tools/tests/test_submit.py +85 -0
  72. hud/tools/tests/test_types.py +193 -0
  73. hud/types.py +7 -1
  74. hud/utils/agent_factories.py +1 -3
  75. hud/utils/mcp.py +1 -1
  76. hud/utils/task_tracking.py +223 -0
  77. hud/utils/tests/test_agent_factories.py +60 -0
  78. hud/utils/tests/test_mcp.py +4 -6
  79. hud/utils/tests/test_pretty_errors.py +186 -0
  80. hud/utils/tests/test_tasks.py +187 -0
  81. hud/utils/tests/test_tool_shorthand.py +154 -0
  82. hud/utils/tests/test_version.py +1 -1
  83. hud/version.py +1 -1
  84. {hud_python-0.4.51.dist-info → hud_python-0.4.53.dist-info}/METADATA +48 -48
  85. {hud_python-0.4.51.dist-info → hud_python-0.4.53.dist-info}/RECORD +88 -47
  86. {hud_python-0.4.51.dist-info → hud_python-0.4.53.dist-info}/WHEEL +0 -0
  87. {hud_python-0.4.51.dist-info → hud_python-0.4.53.dist-info}/entry_points.txt +0 -0
  88. {hud_python-0.4.51.dist-info → hud_python-0.4.53.dist-info}/licenses/LICENSE +0 -0
hud/telemetry/__init__.py CHANGED
@@ -1,14 +1,36 @@
1
- """HUD Telemetry - User-facing APIs for tracing and job management.
1
+ """HUD Telemetry - Tracing and job management for agent execution.
2
2
 
3
- This module provides the main telemetry APIs that users interact with:
4
- - trace: Context manager for tracing code execution
5
- - job: Context manager and utilities for job management
6
- - instrument: Decorator for instrumenting functions
7
- - get_trace: Retrieve collected traces for replay/analysis
3
+ Provides telemetry APIs for tracking agent execution and experiments.
4
+
5
+ Standard Usage:
6
+ >>> import hud
7
+ >>> with hud.trace("My Task"):
8
+ ... do_work()
9
+
10
+ >>> with hud.job("My Job") as job:
11
+ ... with hud.trace("Task", job_id=job.id):
12
+ ... do_work()
13
+
14
+ High-Concurrency Usage (200+ parallel tasks):
15
+ >>> import hud
16
+ >>> async with hud.async_job("Evaluation") as job:
17
+ ... async with hud.async_trace("Task", job_id=job.id):
18
+ ... await do_async_work()
19
+
20
+ APIs:
21
+ - trace(), job() - Standard context managers (for typical usage)
22
+ - async_trace(), async_job() - Async context managers (for high concurrency)
23
+ - instrument() - Decorator for instrumenting functions
24
+ - get_trace() - Retrieve collected traces for replay
25
+
26
+ Note:
27
+ Use async_trace/async_job only for high-concurrency scenarios (200+ tasks).
28
+ The run_dataset() function uses them automatically.
8
29
  """
9
30
 
10
31
  from __future__ import annotations
11
32
 
33
+ from .async_context import async_job, async_trace
12
34
  from .instrument import instrument
13
35
  from .job import Job, create_job, job
14
36
  from .replay import clear_trace, get_trace
@@ -17,6 +39,8 @@ from .trace import Trace, trace
17
39
  __all__ = [
18
40
  "Job",
19
41
  "Trace",
42
+ "async_job",
43
+ "async_trace",
20
44
  "clear_trace",
21
45
  "create_job",
22
46
  "get_trace",
@@ -0,0 +1,331 @@
1
+ """Async context managers for HUD telemetry.
2
+
3
+ Provides async versions of trace and job context managers for high-concurrency
4
+ async code. These prevent event loop blocking by using async I/O operations.
5
+
6
+ Usage:
7
+ >>> import hud
8
+ >>> async with hud.async_job("My Job") as job:
9
+ ... async with hud.async_trace("Task", job_id=job.id) as trace:
10
+ ... await do_work()
11
+
12
+ When to use:
13
+ - High-concurrency scenarios (200+ parallel tasks)
14
+ - Custom async evaluation loops
15
+ - Async frameworks with HUD telemetry integration
16
+
17
+ When NOT to use:
18
+ - Typical scripts/notebooks → use `hud.trace()` and `hud.job()`
19
+ - Low concurrency (< 30 tasks) → standard context managers are fine
20
+ - Synchronous code → must use `hud.trace()` and `hud.job()`
21
+
22
+ Note:
23
+ The `run_dataset()` function automatically uses these async context managers
24
+ internally, so most users don't need to use them directly.
25
+ """
26
+
27
+ from __future__ import annotations
28
+
29
+ import logging
30
+ import uuid
31
+ from typing import TYPE_CHECKING, Any
32
+
33
+ if TYPE_CHECKING:
34
+ from types import TracebackType
35
+
36
+ from hud.otel import configure_telemetry
37
+ from hud.otel.context import (
38
+ _print_trace_complete_url,
39
+ _print_trace_url,
40
+ _update_task_status_async,
41
+ )
42
+ from hud.otel.context import (
43
+ trace as OtelTrace,
44
+ )
45
+ from hud.settings import settings
46
+ from hud.shared import make_request
47
+ from hud.telemetry.job import Job, _print_job_complete_url, _print_job_url
48
+ from hud.telemetry.trace import Trace
49
+ from hud.utils.task_tracking import track_task
50
+
51
+ logger = logging.getLogger(__name__)
52
+
53
+ # Module exports
54
+ __all__ = ["AsyncJob", "AsyncTrace", "async_job", "async_trace"]
55
+
56
+ # Global state for current job
57
+ _current_job: Job | None = None
58
+
59
+
60
+ class AsyncTrace:
61
+ """Async context manager for HUD trace tracking.
62
+
63
+ This is the async equivalent of `hud.trace()`, designed for use in
64
+ high-concurrency async contexts. It tracks task execution with automatic
65
+ status updates that don't block the event loop.
66
+
67
+ The context manager:
68
+ - Creates a unique task_run_id for telemetry correlation
69
+ - Sends async status updates ("running", "completed", "error")
70
+ - Integrates with OpenTelemetry for span collection
71
+ - Tracks all async operations for proper cleanup
72
+
73
+ Use `async_trace()` helper function instead of instantiating directly.
74
+ """
75
+
76
+ def __init__(
77
+ self,
78
+ name: str = "Test task from hud",
79
+ *,
80
+ root: bool = True,
81
+ attrs: dict[str, Any] | None = None,
82
+ job_id: str | None = None,
83
+ task_id: str | None = None,
84
+ ) -> None:
85
+ self.name = name
86
+ self.root = root
87
+ self.attrs = attrs or {}
88
+ self.job_id = job_id
89
+ self.task_id = task_id
90
+ self.task_run_id = str(uuid.uuid4())
91
+ self.trace_obj = Trace(self.task_run_id, name, job_id, task_id)
92
+ self._otel_trace = None
93
+
94
+ async def __aenter__(self) -> Trace:
95
+ """Enter the async trace context."""
96
+ # Ensure telemetry is configured
97
+ configure_telemetry()
98
+
99
+ # Start the OpenTelemetry span
100
+ self._otel_trace = OtelTrace(
101
+ self.task_run_id,
102
+ is_root=self.root,
103
+ span_name=self.name,
104
+ attributes=self.attrs,
105
+ job_id=self.job_id,
106
+ task_id=self.task_id,
107
+ )
108
+ self._otel_trace.__enter__()
109
+
110
+ # Send async status update if this is a root trace
111
+ if self.root and settings.telemetry_enabled and settings.api_key:
112
+ track_task(
113
+ _update_task_status_async(
114
+ self.task_run_id,
115
+ "running",
116
+ job_id=self.job_id,
117
+ trace_name=self.name,
118
+ task_id=self.task_id,
119
+ ),
120
+ name=f"trace-status-{self.task_run_id[:8]}",
121
+ )
122
+
123
+ # Print trace URL if not part of a job
124
+ if not self.job_id:
125
+ _print_trace_url(self.task_run_id)
126
+
127
+ logger.debug("Started trace: %s (%s)", self.name, self.task_run_id)
128
+ return self.trace_obj
129
+
130
+ async def __aexit__(
131
+ self,
132
+ exc_type: type[BaseException] | None,
133
+ exc_val: BaseException | None,
134
+ exc_tb: TracebackType | None,
135
+ ) -> None:
136
+ """Exit the async trace context."""
137
+ # Send async status update if this is a root trace
138
+ if self.root and settings.telemetry_enabled and settings.api_key:
139
+ status = "error" if exc_type else "completed"
140
+
141
+ track_task(
142
+ _update_task_status_async(
143
+ self.task_run_id,
144
+ status,
145
+ job_id=self.job_id,
146
+ error_message=str(exc_val) if exc_val else None,
147
+ trace_name=self.name,
148
+ task_id=self.task_id,
149
+ ),
150
+ name=f"trace-status-{self.task_run_id[:8]}-{status}",
151
+ )
152
+
153
+ # Print completion message if not part of a job
154
+ if not self.job_id:
155
+ _print_trace_complete_url(self.task_run_id, error_occurred=bool(exc_type))
156
+
157
+ # Close the OpenTelemetry span
158
+ if self._otel_trace:
159
+ self._otel_trace.__exit__(exc_type, exc_val, exc_tb)
160
+
161
+ logger.debug("Ended trace: %s (%s)", self.name, self.task_run_id)
162
+
163
+
164
+ class AsyncJob:
165
+ """Async context manager for HUD job tracking.
166
+
167
+ This is the async equivalent of `hud.job()`, designed for grouping
168
+ related tasks in high-concurrency async contexts. It manages job
169
+ status updates without blocking the event loop.
170
+
171
+ The context manager:
172
+ - Creates or uses a provided job_id
173
+ - Sends async status updates ("running", "completed", "failed")
174
+ - Associates all child traces with this job
175
+ - Tracks async operations for proper cleanup
176
+
177
+ Use `async_job()` helper function instead of instantiating directly.
178
+ """
179
+
180
+ def __init__(
181
+ self,
182
+ name: str,
183
+ metadata: dict[str, Any] | None = None,
184
+ job_id: str | None = None,
185
+ dataset_link: str | None = None,
186
+ ) -> None:
187
+ self.job_id = job_id or str(uuid.uuid4())
188
+ self.job = Job(self.job_id, name, metadata, dataset_link)
189
+
190
+ async def __aenter__(self) -> Job:
191
+ """Enter the async job context."""
192
+ global _current_job
193
+
194
+ # Save previous job and set this as current
195
+ self._old_job = _current_job
196
+ _current_job = self.job
197
+
198
+ # Send async status update
199
+ if settings.telemetry_enabled:
200
+ payload = {
201
+ "name": self.job.name,
202
+ "status": "running",
203
+ "metadata": self.job.metadata,
204
+ }
205
+ if self.job.dataset_link:
206
+ payload["dataset_link"] = self.job.dataset_link
207
+
208
+ track_task(
209
+ make_request(
210
+ method="POST",
211
+ url=f"{settings.hud_telemetry_url}/jobs/{self.job.id}/status",
212
+ json=payload,
213
+ api_key=settings.api_key,
214
+ ),
215
+ name=f"job-status-{self.job.id[:8]}-running",
216
+ )
217
+
218
+ _print_job_url(self.job.id, self.job.name)
219
+ logger.debug("Started job: %s (%s)", self.job.name, self.job.id)
220
+ return self.job
221
+
222
+ async def __aexit__(
223
+ self,
224
+ exc_type: type[BaseException] | None,
225
+ exc_val: BaseException | None,
226
+ exc_tb: TracebackType | None,
227
+ ) -> None:
228
+ """Exit the async job context."""
229
+ global _current_job
230
+
231
+ # Send async status update
232
+ if settings.telemetry_enabled:
233
+ status = "failed" if exc_type else "completed"
234
+ payload = {
235
+ "name": self.job.name,
236
+ "status": status,
237
+ "metadata": self.job.metadata,
238
+ }
239
+ if self.job.dataset_link:
240
+ payload["dataset_link"] = self.job.dataset_link
241
+
242
+ track_task(
243
+ make_request(
244
+ method="POST",
245
+ url=f"{settings.hud_telemetry_url}/jobs/{self.job.id}/status",
246
+ json=payload,
247
+ api_key=settings.api_key,
248
+ ),
249
+ name=f"job-status-{self.job.id[:8]}-{status}",
250
+ )
251
+
252
+ _print_job_complete_url(self.job.id, self.job.name, error_occurred=bool(exc_type))
253
+
254
+ # Restore previous job
255
+ _current_job = self._old_job
256
+
257
+ logger.debug("Ended job: %s (%s)", self.job.name, self.job.id)
258
+
259
+
260
+ def async_trace(
261
+ name: str = "Test task from hud",
262
+ *,
263
+ root: bool = True,
264
+ attrs: dict[str, Any] | None = None,
265
+ job_id: str | None = None,
266
+ task_id: str | None = None,
267
+ ) -> AsyncTrace:
268
+ """Create an async trace context for telemetry tracking.
269
+
270
+ This is the async equivalent of `hud.trace()` for use in high-concurrency
271
+ async contexts. Status updates are sent asynchronously and tracked to ensure
272
+ completion before shutdown.
273
+
274
+ Args:
275
+ name: Descriptive name for this trace/task
276
+ root: Whether this is a root trace (updates task status)
277
+ attrs: Additional attributes to attach to the trace
278
+ job_id: Optional job ID to associate with this trace
279
+ task_id: Optional task ID for custom task identifiers
280
+
281
+ Returns:
282
+ AsyncTrace context manager
283
+
284
+ Example:
285
+ >>> import hud
286
+ >>> async with hud.async_trace("Process Data") as trace:
287
+ ... result = await process_async()
288
+ ... await trace.log({"items_processed": len(result)})
289
+
290
+ Note:
291
+ Most users should use `hud.trace()` which works fine for typical usage.
292
+ Use this async version only in high-concurrency scenarios (200+ parallel
293
+ tasks) or when writing custom async evaluation frameworks.
294
+ """
295
+ return AsyncTrace(name, root=root, attrs=attrs, job_id=job_id, task_id=task_id)
296
+
297
+
298
+ def async_job(
299
+ name: str,
300
+ metadata: dict[str, Any] | None = None,
301
+ job_id: str | None = None,
302
+ dataset_link: str | None = None,
303
+ ) -> AsyncJob:
304
+ """Create an async job context for grouping related tasks.
305
+
306
+ This is the async equivalent of `hud.job()` for use in high-concurrency
307
+ async contexts. Job status updates are sent asynchronously and tracked
308
+ to ensure completion before shutdown.
309
+
310
+ Args:
311
+ name: Human-readable job name
312
+ metadata: Optional metadata dictionary
313
+ job_id: Optional job ID (auto-generated if not provided)
314
+ dataset_link: Optional HuggingFace dataset identifier
315
+
316
+ Returns:
317
+ AsyncJob context manager
318
+
319
+ Example:
320
+ >>> import hud
321
+ >>> async with hud.async_job("Batch Processing") as job:
322
+ ... for item in items:
323
+ ... async with hud.async_trace(f"Process {item.id}", job_id=job.id):
324
+ ... await process(item)
325
+
326
+ Note:
327
+ Most users should use `hud.job()` which works fine for typical usage.
328
+ Use this async version only in high-concurrency scenarios (200+ parallel
329
+ tasks) or when writing custom async evaluation frameworks.
330
+ """
331
+ return AsyncJob(name, metadata=metadata, job_id=job_id, dataset_link=dataset_link)
hud/telemetry/job.py CHANGED
@@ -89,6 +89,33 @@ class Job:
89
89
  except Exception as e:
90
90
  logger.warning("Failed to update job status: %s", e)
91
91
 
92
+ def update_status_fire_and_forget(self, status: str) -> None:
93
+ """Update job status without blocking (fire-and-forget)."""
94
+ self.status = status
95
+ if settings.telemetry_enabled:
96
+ from hud.utils.async_utils import fire_and_forget
97
+
98
+ async def _update() -> None:
99
+ try:
100
+ payload = {
101
+ "name": self.name,
102
+ "status": status,
103
+ "metadata": self.metadata,
104
+ }
105
+ if self.dataset_link:
106
+ payload["dataset_link"] = self.dataset_link
107
+
108
+ await make_request(
109
+ method="POST",
110
+ url=f"{settings.hud_telemetry_url}/jobs/{self.id}/status",
111
+ json=payload,
112
+ api_key=settings.api_key,
113
+ )
114
+ except Exception as e:
115
+ logger.warning("Failed to update job status: %s", e)
116
+
117
+ fire_and_forget(_update(), f"update job {self.id} status to {status}")
118
+
92
119
  async def log(self, metrics: dict[str, Any]) -> None:
93
120
  """Log metrics to the job.
94
121
 
@@ -214,9 +241,9 @@ def job(
214
241
  job_id: str | None = None,
215
242
  dataset_link: str | None = None,
216
243
  ) -> Generator[Job, None, None]:
217
- """Context manager for job tracking.
244
+ """Context manager for job tracking and organization.
218
245
 
219
- Groups related tasks together under a single job for tracking and organization.
246
+ Groups related tasks together under a single job for tracking and visualization.
220
247
 
221
248
  Args:
222
249
  name: Human-readable job name
@@ -228,10 +255,22 @@ def job(
228
255
  Job: The job object
229
256
 
230
257
  Example:
231
- with hud.job("training_run", {"model": "gpt-4"}) as job:
232
- for epoch in range(10):
233
- with hud.trace(f"epoch_{epoch}", job_id=job.id):
234
- train_epoch()
258
+ >>> import hud
259
+ >>> # Synchronous code
260
+ >>> with hud.job("training_run", {"model": "gpt-4"}) as job:
261
+ ... for epoch in range(10):
262
+ ... with hud.trace(f"epoch_{epoch}", job_id=job.id):
263
+ ... train_epoch()
264
+ >>> # For async code with HIGH CONCURRENCY (200+ tasks), use async_job
265
+ >>> async with hud.async_job("batch_processing") as job:
266
+ ... for item in items:
267
+ ... async with hud.async_trace(f"process_{item}", job_id=job.id):
268
+ ... await process(item)
269
+
270
+ Note:
271
+ For simple async code (< 30 parallel tasks), this context manager works fine.
272
+ Use `hud.async_job()` only for high-concurrency scenarios (200+ parallel tasks)
273
+ where event loop blocking becomes an issue.
235
274
  """
236
275
  global _current_job
237
276
 
@@ -245,18 +284,18 @@ def job(
245
284
  _current_job = job_obj
246
285
 
247
286
  try:
248
- # Update status to running synchronously to ensure job is registered before tasks start
249
- job_obj.update_status_sync("running")
287
+ # Update status to running (fire-and-forget to avoid blocking)
288
+ job_obj.update_status_fire_and_forget("running")
250
289
  # Print the nice job URL box
251
290
  _print_job_url(job_obj.id, job_obj.name)
252
291
  yield job_obj
253
- # Update status to completed synchronously to ensure it completes before process exit
254
- job_obj.update_status_sync("completed")
292
+ # Update status to completed (fire-and-forget to avoid blocking)
293
+ job_obj.update_status_fire_and_forget("completed")
255
294
  # Print job completion message
256
295
  _print_job_complete_url(job_obj.id, job_obj.name, error_occurred=False)
257
296
  except Exception:
258
- # Update status to failed synchronously to ensure it completes before process exit
259
- job_obj.update_status_sync("failed")
297
+ # Update status to failed (fire-and-forget to avoid blocking)
298
+ job_obj.update_status_fire_and_forget("failed")
260
299
  # Print job failure message
261
300
  _print_job_complete_url(job_obj.id, job_obj.name, error_occurred=True)
262
301
  raise