hud-python 0.4.51__py3-none-any.whl → 0.4.53__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of hud-python might be problematic. Click here for more details.

Files changed (88) hide show
  1. hud/__init__.py +13 -1
  2. hud/agents/base.py +14 -3
  3. hud/agents/lite_llm.py +1 -1
  4. hud/agents/openai_chat_generic.py +15 -3
  5. hud/agents/tests/test_base.py +9 -2
  6. hud/agents/tests/test_base_runtime.py +164 -0
  7. hud/cli/__init__.py +18 -25
  8. hud/cli/build.py +35 -27
  9. hud/cli/dev.py +11 -29
  10. hud/cli/eval.py +114 -145
  11. hud/cli/tests/test_analyze_module.py +120 -0
  12. hud/cli/tests/test_build.py +26 -3
  13. hud/cli/tests/test_build_failure.py +41 -0
  14. hud/cli/tests/test_build_module.py +50 -0
  15. hud/cli/tests/test_cli_more_wrappers.py +30 -0
  16. hud/cli/tests/test_cli_root.py +134 -0
  17. hud/cli/tests/test_eval.py +4 -0
  18. hud/cli/tests/test_mcp_server.py +8 -7
  19. hud/cli/tests/test_push_happy.py +74 -0
  20. hud/cli/tests/test_push_wrapper.py +23 -0
  21. hud/cli/utils/docker.py +120 -1
  22. hud/cli/utils/runner.py +1 -1
  23. hud/cli/utils/tasks.py +4 -1
  24. hud/cli/utils/tests/__init__.py +0 -0
  25. hud/cli/utils/tests/test_config.py +58 -0
  26. hud/cli/utils/tests/test_docker.py +93 -0
  27. hud/cli/utils/tests/test_docker_hints.py +71 -0
  28. hud/cli/utils/tests/test_env_check.py +74 -0
  29. hud/cli/utils/tests/test_environment.py +42 -0
  30. hud/cli/utils/tests/test_interactive_module.py +60 -0
  31. hud/cli/utils/tests/test_local_runner.py +50 -0
  32. hud/cli/utils/tests/test_logging_utils.py +23 -0
  33. hud/cli/utils/tests/test_metadata.py +49 -0
  34. hud/cli/utils/tests/test_package_runner.py +35 -0
  35. hud/cli/utils/tests/test_registry_utils.py +49 -0
  36. hud/cli/utils/tests/test_remote_runner.py +25 -0
  37. hud/cli/utils/tests/test_runner_modules.py +52 -0
  38. hud/cli/utils/tests/test_source_hash.py +36 -0
  39. hud/cli/utils/tests/test_tasks.py +80 -0
  40. hud/cli/utils/version_check.py +257 -0
  41. hud/clients/base.py +1 -1
  42. hud/clients/mcp_use.py +3 -1
  43. hud/datasets/parallel.py +2 -2
  44. hud/datasets/runner.py +85 -24
  45. hud/datasets/tests/__init__.py +0 -0
  46. hud/datasets/tests/test_runner.py +106 -0
  47. hud/datasets/tests/test_utils.py +228 -0
  48. hud/otel/config.py +8 -6
  49. hud/otel/context.py +4 -4
  50. hud/otel/exporters.py +231 -57
  51. hud/otel/tests/__init__.py +0 -1
  52. hud/otel/tests/test_instrumentation.py +207 -0
  53. hud/rl/learner.py +1 -1
  54. hud/server/tests/test_server_extra.py +2 -0
  55. hud/shared/exceptions.py +35 -9
  56. hud/shared/hints.py +25 -0
  57. hud/shared/requests.py +15 -3
  58. hud/shared/tests/test_exceptions.py +39 -30
  59. hud/shared/tests/test_hints.py +167 -0
  60. hud/telemetry/__init__.py +30 -6
  61. hud/telemetry/async_context.py +331 -0
  62. hud/telemetry/job.py +51 -12
  63. hud/telemetry/tests/test_async_context.py +242 -0
  64. hud/telemetry/tests/test_instrument.py +414 -0
  65. hud/telemetry/tests/test_job.py +609 -0
  66. hud/telemetry/tests/test_trace.py +184 -6
  67. hud/telemetry/trace.py +16 -17
  68. hud/tools/computer/qwen.py +4 -1
  69. hud/tools/computer/settings.py +2 -2
  70. hud/tools/executors/base.py +4 -2
  71. hud/tools/tests/test_submit.py +85 -0
  72. hud/tools/tests/test_types.py +193 -0
  73. hud/types.py +7 -1
  74. hud/utils/agent_factories.py +1 -3
  75. hud/utils/mcp.py +1 -1
  76. hud/utils/task_tracking.py +223 -0
  77. hud/utils/tests/test_agent_factories.py +60 -0
  78. hud/utils/tests/test_mcp.py +4 -6
  79. hud/utils/tests/test_pretty_errors.py +186 -0
  80. hud/utils/tests/test_tasks.py +187 -0
  81. hud/utils/tests/test_tool_shorthand.py +154 -0
  82. hud/utils/tests/test_version.py +1 -1
  83. hud/version.py +1 -1
  84. {hud_python-0.4.51.dist-info → hud_python-0.4.53.dist-info}/METADATA +48 -48
  85. {hud_python-0.4.51.dist-info → hud_python-0.4.53.dist-info}/RECORD +88 -47
  86. {hud_python-0.4.51.dist-info → hud_python-0.4.53.dist-info}/WHEEL +0 -0
  87. {hud_python-0.4.51.dist-info → hud_python-0.4.53.dist-info}/entry_points.txt +0 -0
  88. {hud_python-0.4.51.dist-info → hud_python-0.4.53.dist-info}/licenses/LICENSE +0 -0
@@ -0,0 +1,228 @@
1
+ from __future__ import annotations
2
+
3
+ from unittest.mock import MagicMock, mock_open, patch
4
+
5
+ import pytest
6
+
7
+ from hud.datasets.utils import fetch_system_prompt_from_dataset, save_tasks
8
+ from hud.types import Task
9
+
10
+
11
+ @pytest.mark.asyncio
12
+ async def test_fetch_system_prompt_success():
13
+ """Test successful fetch of system prompt."""
14
+ with patch("huggingface_hub.hf_hub_download") as mock_download:
15
+ mock_download.return_value = "/tmp/system_prompt.txt"
16
+ with patch("builtins.open", mock_open(read_data="Test system prompt")):
17
+ result = await fetch_system_prompt_from_dataset("test/dataset")
18
+ assert result == "Test system prompt"
19
+ mock_download.assert_called_once()
20
+
21
+
22
+ @pytest.mark.asyncio
23
+ async def test_fetch_system_prompt_empty_file():
24
+ """Test fetch when file is empty."""
25
+ with patch("huggingface_hub.hf_hub_download") as mock_download:
26
+ mock_download.return_value = "/tmp/system_prompt.txt"
27
+ with patch("builtins.open", mock_open(read_data=" \n ")):
28
+ result = await fetch_system_prompt_from_dataset("test/dataset")
29
+ assert result is None
30
+
31
+
32
+ @pytest.mark.asyncio
33
+ async def test_fetch_system_prompt_file_not_found():
34
+ """Test fetch when file doesn't exist."""
35
+ with patch("huggingface_hub.hf_hub_download") as mock_download:
36
+ from huggingface_hub.errors import EntryNotFoundError
37
+
38
+ mock_download.side_effect = EntryNotFoundError("File not found")
39
+ result = await fetch_system_prompt_from_dataset("test/dataset")
40
+ assert result is None
41
+
42
+
43
+ @pytest.mark.asyncio
44
+ async def test_fetch_system_prompt_import_error():
45
+ """Test fetch when huggingface_hub is not installed."""
46
+ # Mock the import itself to raise ImportError
47
+ import sys
48
+
49
+ with patch.dict(sys.modules, {"huggingface_hub": None}):
50
+ result = await fetch_system_prompt_from_dataset("test/dataset")
51
+ assert result is None
52
+
53
+
54
+ @pytest.mark.asyncio
55
+ async def test_fetch_system_prompt_general_exception():
56
+ """Test fetch with general exception."""
57
+ with patch("huggingface_hub.hf_hub_download") as mock_download:
58
+ mock_download.side_effect = Exception("Network error")
59
+ result = await fetch_system_prompt_from_dataset("test/dataset")
60
+ assert result is None
61
+
62
+
63
+ def test_save_tasks_basic():
64
+ """Test basic save_tasks functionality."""
65
+ tasks = [
66
+ {"id": "1", "prompt": "test", "mcp_config": {"key": "value"}},
67
+ {"id": "2", "prompt": "test2", "mcp_config": {"key2": "value2"}},
68
+ ]
69
+
70
+ with patch("hud.datasets.utils.Dataset") as mock_dataset_class:
71
+ mock_dataset = MagicMock()
72
+ mock_dataset_class.from_list.return_value = mock_dataset
73
+
74
+ save_tasks(tasks, "test/repo")
75
+
76
+ mock_dataset_class.from_list.assert_called_once()
77
+ call_args = mock_dataset_class.from_list.call_args[0][0]
78
+ assert len(call_args) == 2
79
+ # Check that mcp_config was JSON serialized
80
+ assert isinstance(call_args[0]["mcp_config"], str)
81
+ mock_dataset.push_to_hub.assert_called_once_with("test/repo")
82
+
83
+
84
+ def test_save_tasks_with_specific_fields():
85
+ """Test save_tasks with specific fields."""
86
+ tasks = [
87
+ {"id": "1", "prompt": "test", "mcp_config": {"key": "value"}, "extra": "data"},
88
+ ]
89
+
90
+ with patch("hud.datasets.utils.Dataset") as mock_dataset_class:
91
+ mock_dataset = MagicMock()
92
+ mock_dataset_class.from_list.return_value = mock_dataset
93
+
94
+ save_tasks(tasks, "test/repo", fields=["id", "prompt"])
95
+
96
+ call_args = mock_dataset_class.from_list.call_args[0][0]
97
+ assert "id" in call_args[0]
98
+ assert "prompt" in call_args[0]
99
+ assert "extra" not in call_args[0]
100
+
101
+
102
+ def test_save_tasks_with_list_field():
103
+ """Test save_tasks serializes list fields."""
104
+ tasks = [
105
+ {"id": "1", "tags": ["tag1", "tag2"], "count": 5},
106
+ ]
107
+
108
+ with patch("hud.datasets.utils.Dataset") as mock_dataset_class:
109
+ mock_dataset = MagicMock()
110
+ mock_dataset_class.from_list.return_value = mock_dataset
111
+
112
+ save_tasks(tasks, "test/repo")
113
+
114
+ call_args = mock_dataset_class.from_list.call_args[0][0]
115
+ # List should be JSON serialized
116
+ assert isinstance(call_args[0]["tags"], str)
117
+ assert '"tag1"' in call_args[0]["tags"]
118
+
119
+
120
+ def test_save_tasks_with_primitive_types():
121
+ """Test save_tasks handles various primitive types."""
122
+ tasks = [
123
+ {
124
+ "string": "text",
125
+ "integer": 42,
126
+ "float": 3.14,
127
+ "boolean": True,
128
+ "none": None,
129
+ },
130
+ ]
131
+
132
+ with patch("hud.datasets.utils.Dataset") as mock_dataset_class:
133
+ mock_dataset = MagicMock()
134
+ mock_dataset_class.from_list.return_value = mock_dataset
135
+
136
+ save_tasks(tasks, "test/repo")
137
+
138
+ call_args = mock_dataset_class.from_list.call_args[0][0]
139
+ assert call_args[0]["string"] == "text"
140
+ assert call_args[0]["integer"] == 42
141
+ assert call_args[0]["float"] == 3.14
142
+ assert call_args[0]["boolean"] is True
143
+ assert call_args[0]["none"] == "" # None becomes empty string
144
+
145
+
146
+ def test_save_tasks_with_other_type():
147
+ """Test save_tasks converts other types to string."""
148
+
149
+ class CustomObj:
150
+ def __str__(self):
151
+ return "custom_value"
152
+
153
+ tasks = [
154
+ {"id": "1", "custom": CustomObj()},
155
+ ]
156
+
157
+ with patch("hud.datasets.utils.Dataset") as mock_dataset_class:
158
+ mock_dataset = MagicMock()
159
+ mock_dataset_class.from_list.return_value = mock_dataset
160
+
161
+ save_tasks(tasks, "test/repo")
162
+
163
+ call_args = mock_dataset_class.from_list.call_args[0][0]
164
+ assert call_args[0]["custom"] == "custom_value"
165
+
166
+
167
+ def test_save_tasks_rejects_task_objects():
168
+ """Test save_tasks raises error for Task objects."""
169
+ task = Task(prompt="test", mcp_config={})
170
+
171
+ with pytest.raises(ValueError, match="expects dictionaries, not Task objects"):
172
+ save_tasks([task], "test/repo") # type: ignore
173
+
174
+
175
+ def test_save_tasks_rejects_task_objects_in_list():
176
+ """Test save_tasks raises error when Task object is in the list."""
177
+ tasks = [
178
+ {"id": "1", "prompt": "test", "mcp_config": {}},
179
+ Task(prompt="test2", mcp_config={}), # Task object
180
+ ]
181
+
182
+ with pytest.raises(ValueError, match="Item 1 is a Task object"):
183
+ save_tasks(tasks, "test/repo") # type: ignore
184
+
185
+
186
+ def test_save_tasks_with_kwargs():
187
+ """Test save_tasks passes kwargs to push_to_hub."""
188
+ tasks = [{"id": "1", "prompt": "test"}]
189
+
190
+ with patch("hud.datasets.utils.Dataset") as mock_dataset_class:
191
+ mock_dataset = MagicMock()
192
+ mock_dataset_class.from_list.return_value = mock_dataset
193
+
194
+ save_tasks(tasks, "test/repo", private=True, commit_message="Test commit")
195
+
196
+ mock_dataset.push_to_hub.assert_called_once_with(
197
+ "test/repo", private=True, commit_message="Test commit"
198
+ )
199
+
200
+
201
+ def test_save_tasks_field_not_in_dict():
202
+ """Test save_tasks handles missing fields gracefully."""
203
+ tasks = [
204
+ {"id": "1", "prompt": "test"},
205
+ ]
206
+
207
+ with patch("hud.datasets.utils.Dataset") as mock_dataset_class:
208
+ mock_dataset = MagicMock()
209
+ mock_dataset_class.from_list.return_value = mock_dataset
210
+
211
+ # Request fields that don't exist
212
+ save_tasks(tasks, "test/repo", fields=["id", "missing_field"])
213
+
214
+ call_args = mock_dataset_class.from_list.call_args[0][0]
215
+ assert "id" in call_args[0]
216
+ assert "missing_field" not in call_args[0]
217
+
218
+
219
+ def test_save_tasks_empty_list():
220
+ """Test save_tasks with empty list."""
221
+ with patch("hud.datasets.utils.Dataset") as mock_dataset_class:
222
+ mock_dataset = MagicMock()
223
+ mock_dataset_class.from_list.return_value = mock_dataset
224
+
225
+ save_tasks([], "test/repo")
226
+
227
+ mock_dataset_class.from_list.assert_called_once_with([])
228
+ mock_dataset.push_to_hub.assert_called_once()
hud/otel/config.py CHANGED
@@ -94,16 +94,18 @@ def configure_telemetry(
94
94
 
95
95
  # HUD exporter (only if enabled and API key is available)
96
96
  if settings.telemetry_enabled and settings.api_key:
97
+ # Use the HudSpanExporter directly (it now handles async context internally)
97
98
  exporter = HudSpanExporter(
98
99
  telemetry_url=settings.hud_telemetry_url, api_key=settings.api_key
99
100
  )
100
- # Export more continuously to avoid big end flushes
101
+
102
+ # Batch exports for efficiency while maintaining reasonable real-time visibility
101
103
  provider.add_span_processor(
102
104
  BatchSpanProcessor(
103
105
  exporter,
104
- schedule_delay_millis=1000,
105
- max_queue_size=8192,
106
- max_export_batch_size=256,
106
+ schedule_delay_millis=1000, # Export every 5 seconds (less frequent)
107
+ max_queue_size=16384, # Larger queue for high-volume scenarios
108
+ max_export_batch_size=512, # Larger batches (fewer uploads)
107
109
  export_timeout_millis=30000,
108
110
  )
109
111
  )
@@ -140,8 +142,8 @@ def configure_telemetry(
140
142
  BatchSpanProcessor(
141
143
  otlp_exporter,
142
144
  schedule_delay_millis=1000,
143
- max_queue_size=8192,
144
- max_export_batch_size=256,
145
+ max_queue_size=16384,
146
+ max_export_batch_size=512,
145
147
  export_timeout_millis=30000,
146
148
  )
147
149
  )
hud/otel/context.py CHANGED
@@ -520,8 +520,8 @@ class trace:
520
520
  # Update task status if root (only for HUD backend)
521
521
  if self.is_root and settings.telemetry_enabled and settings.api_key:
522
522
  if exc_type is not None:
523
- # Use synchronous update to ensure it completes before process exit
524
- _update_task_status_sync(
523
+ # Use fire-and-forget to avoid blocking the event loop
524
+ _fire_and_forget_status_update(
525
525
  self.task_run_id,
526
526
  "error",
527
527
  job_id=self.job_id,
@@ -533,8 +533,8 @@ class trace:
533
533
  if not self.job_id:
534
534
  _print_trace_complete_url(self.task_run_id, error_occurred=True)
535
535
  else:
536
- # Use synchronous update to ensure it completes before process exit
537
- _update_task_status_sync(
536
+ # Use fire-and-forget to avoid blocking the event loop
537
+ _fire_and_forget_status_update(
538
538
  self.task_run_id,
539
539
  "completed",
540
540
  job_id=self.job_id,
hud/otel/exporters.py CHANGED
@@ -1,21 +1,27 @@
1
- """Custom OpenTelemetry exporter that sends spans to the existing HUD telemetry
2
- HTTP endpoint (/trace/<id>/telemetry-upload).
1
+ """Custom OpenTelemetry exporter for HUD telemetry backend.
3
2
 
4
- The exporter groups spans by ``hud.task_run_id`` baggage / attribute so we keep
5
- exactly the same semantics the old async worker in ``hud.telemetry.exporter``
6
- implemented.
3
+ This exporter sends spans to the HUD telemetry HTTP endpoint, grouping them
4
+ by task_run_id for efficient batch uploads.
7
5
 
8
- This exporter is *synchronous* (derives from :class:`SpanExporter`). We rely on
9
- ``hud.shared.make_request_sync`` which already contains retry & auth logic.
6
+ Performance optimizations:
7
+ - Detects async contexts and runs exports in a thread pool to avoid blocking
8
+ - Uses persistent HTTP client with connection pooling for reduced overhead
9
+ - Tracks pending export futures to ensure completion during shutdown
10
+
11
+ The exporter derives from SpanExporter (synchronous interface) but handles
12
+ async contexts intelligently to prevent event loop blocking during high-concurrency
13
+ workloads.
10
14
  """
11
15
 
12
16
  from __future__ import annotations
13
17
 
18
+ import atexit
19
+ import concurrent.futures as cf
14
20
  import contextlib
15
21
  import json
16
22
  import logging
17
- import time
18
23
  from collections import defaultdict
24
+ from concurrent.futures import ThreadPoolExecutor
19
25
  from datetime import UTC, datetime
20
26
  from typing import TYPE_CHECKING, Any
21
27
 
@@ -31,6 +37,34 @@ if TYPE_CHECKING:
31
37
 
32
38
  logger = logging.getLogger(__name__)
33
39
 
40
+ # Global singleton thread pool for span exports
41
+ _export_executor: ThreadPoolExecutor | None = None
42
+
43
+
44
+ def get_export_executor() -> ThreadPoolExecutor:
45
+ """Get or create the global thread pool for span exports.
46
+
47
+ Returns a singleton ThreadPoolExecutor used for running span exports
48
+ in a thread pool when called from async contexts, preventing event
49
+ loop blocking during high-concurrency workloads.
50
+
51
+ The executor is automatically cleaned up on process exit via atexit.
52
+
53
+ Returns:
54
+ ThreadPoolExecutor with 8 workers for high-throughput parallel uploads
55
+ """
56
+ global _export_executor
57
+ if _export_executor is None:
58
+ # Use 8 workers to handle high-volume parallel uploads efficiently
59
+ _export_executor = ThreadPoolExecutor(max_workers=8, thread_name_prefix="span-export")
60
+
61
+ def cleanup() -> None:
62
+ if _export_executor is not None:
63
+ _export_executor.shutdown(wait=True)
64
+
65
+ atexit.register(cleanup)
66
+ return _export_executor
67
+
34
68
 
35
69
  # ---------------------------------------------------------------------------
36
70
  # Models
@@ -297,73 +331,213 @@ def _span_to_dict(span: ReadableSpan) -> dict[str, Any]:
297
331
 
298
332
 
299
333
  class HudSpanExporter(SpanExporter):
300
- """Exporter that forwards spans to HUD backend using existing endpoint."""
334
+ """OpenTelemetry span exporter for the HUD backend.
335
+
336
+ This exporter groups spans by task_run_id and sends them to the HUD
337
+ telemetry endpoint. Performance optimizations include:
338
+
339
+ - Auto-detects async contexts and runs exports in thread pool (non-blocking)
340
+ - Tracks pending export futures for proper shutdown coordination
341
+
342
+ Handles high-concurrency scenarios (200+ parallel tasks) by offloading
343
+ synchronous HTTP operations to a thread pool when called from async
344
+ contexts, preventing event loop blocking.
345
+ """
301
346
 
302
347
  def __init__(self, *, telemetry_url: str, api_key: str) -> None:
348
+ """Initialize the HUD span exporter.
349
+
350
+ Args:
351
+ telemetry_url: Base URL for the HUD telemetry backend
352
+ api_key: API key for authentication
353
+ """
303
354
  super().__init__()
304
355
  self._telemetry_url = telemetry_url.rstrip("/")
305
356
  self._api_key = api_key
306
357
 
307
- # ------------------------------------------------------------------
308
- # Core API
309
- # ------------------------------------------------------------------
358
+ # Track pending export futures for shutdown coordination
359
+ self._pending_futures: list[cf.Future[SpanExportResult]] = []
360
+
310
361
  def export(self, spans: list[ReadableSpan]) -> SpanExportResult: # type: ignore[override]
362
+ """Export spans to HUD backend.
363
+
364
+ Auto-detects async contexts: if called from an async event loop, runs
365
+ the export in a thread pool to avoid blocking. Otherwise runs synchronously.
366
+
367
+ Args:
368
+ spans: List of ReadableSpan objects to export
369
+
370
+ Returns:
371
+ SpanExportResult.SUCCESS (returns immediately in async contexts)
372
+ """
311
373
  if not spans:
312
374
  return SpanExportResult.SUCCESS
313
375
 
314
- # Group spans by hud.task_run_id attribute
376
+ # Group spans by task_run_id for batched uploads
315
377
  grouped: dict[str, list[ReadableSpan]] = defaultdict(list)
316
378
  for span in spans:
317
379
  run_id = span.attributes.get("hud.task_run_id") if span.attributes else None
318
380
  if not run_id:
319
- # Skip spans that are outside HUD traces
381
+ # Skip spans outside HUD traces
320
382
  continue
321
383
  grouped[str(run_id)].append(span)
322
384
 
323
- # Send each group synchronously (retry inside make_request_sync)
324
- for run_id, span_batch in grouped.items():
325
- try:
326
- url = f"{self._telemetry_url}/trace/{run_id}/telemetry-upload"
327
- telemetry_spans = [_span_to_dict(s) for s in span_batch]
328
- # Include current step count in metadata
329
- metadata = {}
330
- # Get the HIGHEST step count from the batch (most recent)
331
- step_count = 0
332
- for span in span_batch:
333
- if span.attributes and "hud.step_count" in span.attributes:
334
- current_step = span.attributes["hud.step_count"]
335
- if isinstance(current_step, int) and current_step > step_count:
336
- step_count = current_step
337
-
338
- payload = {
339
- "metadata": metadata,
340
- "telemetry": telemetry_spans,
341
- }
342
-
343
- # Only include step_count if we found any steps
344
- if step_count > 0:
345
- payload["step_count"] = step_count
346
-
347
- logger.debug("HUD exporter sending %d spans to %s", len(span_batch), url)
348
- make_request_sync(
349
- method="POST",
350
- url=url,
351
- json=payload,
352
- api_key=self._api_key,
353
- )
354
- except Exception as exc:
355
- logger.exception("HUD exporter failed to send spans for task %s: %s", run_id, exc)
356
- # If *any* group fails we return FAILURE so the OTEL SDK can retry
357
- return SpanExportResult.FAILURE
358
-
359
- return SpanExportResult.SUCCESS
385
+ # Detect async context to avoid event loop blocking
386
+ import asyncio
387
+
388
+ try:
389
+ loop = asyncio.get_running_loop()
390
+ # In async context - offload to thread pool
391
+ executor = get_export_executor()
392
+
393
+ def _sync_export() -> SpanExportResult:
394
+ # Send each group synchronously (retry inside make_request_sync)
395
+ for run_id, span_batch in grouped.items():
396
+ try:
397
+ url = f"{self._telemetry_url}/trace/{run_id}/telemetry-upload"
398
+ telemetry_spans = [_span_to_dict(s) for s in span_batch]
399
+ # Include current step count in metadata
400
+ metadata = {}
401
+ # Get the HIGHEST step count from the batch (most recent)
402
+ step_count = 0
403
+ for span in span_batch:
404
+ if span.attributes and "hud.step_count" in span.attributes:
405
+ current_step = span.attributes["hud.step_count"]
406
+ if isinstance(current_step, int) and current_step > step_count:
407
+ step_count = current_step
408
+
409
+ payload = {
410
+ "metadata": metadata,
411
+ "telemetry": telemetry_spans,
412
+ }
413
+
414
+ # Only include step_count if we found any steps
415
+ if step_count > 0:
416
+ payload["step_count"] = step_count
417
+
418
+ logger.debug("HUD exporter sending %d spans to %s", len(span_batch), url)
419
+ make_request_sync(
420
+ method="POST",
421
+ url=url,
422
+ json=payload,
423
+ api_key=self._api_key,
424
+ )
425
+ except Exception as exc:
426
+ logger.exception(
427
+ "HUD exporter failed to send spans for task %s: %s", run_id, exc
428
+ )
429
+ return SpanExportResult.FAILURE
430
+ return SpanExportResult.SUCCESS
431
+
432
+ # Run in thread to avoid blocking event loop
433
+ future = loop.run_in_executor(executor, _sync_export)
434
+ # Track and cleanup when done
435
+ self._pending_futures.append(future) # type: ignore[list-item]
436
+
437
+ def _cleanup_done(f: cf.Future[SpanExportResult]) -> None:
438
+ with contextlib.suppress(Exception):
439
+ # Consume exception to avoid "exception was never retrieved"
440
+ _ = f.exception()
441
+ # Remove from pending list
442
+ with contextlib.suppress(ValueError):
443
+ self._pending_futures.remove(f)
444
+
445
+ future.add_done_callback(_cleanup_done) # type: ignore[arg-type]
446
+ # Don't wait for it - return immediately
447
+ return SpanExportResult.SUCCESS
448
+
449
+ except RuntimeError:
450
+ # No event loop - run synchronously
451
+ # Send each group synchronously (retry inside make_request_sync)
452
+ for run_id, span_batch in grouped.items():
453
+ try:
454
+ url = f"{self._telemetry_url}/trace/{run_id}/telemetry-upload"
455
+ telemetry_spans = [_span_to_dict(s) for s in span_batch]
456
+ # Include current step count in metadata
457
+ metadata = {}
458
+ # Get the HIGHEST step count from the batch (most recent)
459
+ step_count = 0
460
+ for span in span_batch:
461
+ if span.attributes and "hud.step_count" in span.attributes:
462
+ current_step = span.attributes["hud.step_count"]
463
+ if isinstance(current_step, int) and current_step > step_count:
464
+ step_count = current_step
465
+
466
+ payload = {
467
+ "metadata": metadata,
468
+ "telemetry": telemetry_spans,
469
+ }
470
+
471
+ # Only include step_count if we found any steps
472
+ if step_count > 0:
473
+ payload["step_count"] = step_count
474
+
475
+ logger.debug("HUD exporter sending %d spans to %s", len(span_batch), url)
476
+ make_request_sync(
477
+ method="POST",
478
+ url=url,
479
+ json=payload,
480
+ api_key=self._api_key,
481
+ )
482
+ except Exception as exc:
483
+ logger.exception(
484
+ "HUD exporter failed to send spans for task %s: %s", run_id, exc
485
+ )
486
+ # If *any* group fails we return FAILURE so the OTEL SDK can retry
487
+ return SpanExportResult.FAILURE
488
+
489
+ return SpanExportResult.SUCCESS
360
490
 
361
491
  def shutdown(self) -> None: # type: ignore[override]
362
- # Nothing to cleanup, httpx handled inside make_request_sync
363
- pass
492
+ """Shutdown the exporter and wait for pending exports.
493
+
494
+ Waits up to 10 seconds for any in-flight exports to complete.
495
+ """
496
+ try:
497
+ if self._pending_futures:
498
+ with contextlib.suppress(Exception):
499
+ cf.wait(self._pending_futures, timeout=10.0)
500
+ finally:
501
+ self._pending_futures.clear()
364
502
 
365
503
  def force_flush(self, timeout_millis: int | None = None) -> bool: # type: ignore[override]
366
- if timeout_millis:
367
- time.sleep(timeout_millis / 1000)
368
- # Synchronous export, nothing buffered here
369
- return True
504
+ """Force flush all pending span exports.
505
+
506
+ Waits for all pending export futures to complete before returning.
507
+ This is called by the OpenTelemetry SDK during shutdown to ensure
508
+ all telemetry is uploaded.
509
+
510
+ Args:
511
+ timeout_millis: Maximum time to wait in milliseconds
512
+
513
+ Returns:
514
+ True if all exports completed, False otherwise
515
+ """
516
+ try:
517
+ if not self._pending_futures:
518
+ return True
519
+
520
+ total_pending = len(self._pending_futures)
521
+ if total_pending > 10:
522
+ # Show progress for large batches
523
+ logger.info("Flushing %d pending telemetry uploads...", total_pending)
524
+
525
+ timeout = (timeout_millis or 30000) / 1000.0
526
+ done, not_done = cf.wait(self._pending_futures, timeout=timeout)
527
+
528
+ # Consume exceptions to avoid "exception was never retrieved" warnings
529
+ for f in list(done):
530
+ with contextlib.suppress(Exception):
531
+ _ = f.exception()
532
+
533
+ # Remove completed futures
534
+ for f in list(done):
535
+ with contextlib.suppress(ValueError):
536
+ self._pending_futures.remove(f)
537
+
538
+ if total_pending > 10:
539
+ logger.info("Completed %d/%d telemetry uploads", len(done), total_pending)
540
+
541
+ return len(not_done) == 0
542
+ except Exception:
543
+ return False
@@ -1 +0,0 @@
1
- """Tests for OpenTelemetry integration."""