hud-python 0.4.51__py3-none-any.whl → 0.4.53__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of hud-python might be problematic. Click here for more details.
- hud/__init__.py +13 -1
- hud/agents/base.py +14 -3
- hud/agents/lite_llm.py +1 -1
- hud/agents/openai_chat_generic.py +15 -3
- hud/agents/tests/test_base.py +9 -2
- hud/agents/tests/test_base_runtime.py +164 -0
- hud/cli/__init__.py +18 -25
- hud/cli/build.py +35 -27
- hud/cli/dev.py +11 -29
- hud/cli/eval.py +114 -145
- hud/cli/tests/test_analyze_module.py +120 -0
- hud/cli/tests/test_build.py +26 -3
- hud/cli/tests/test_build_failure.py +41 -0
- hud/cli/tests/test_build_module.py +50 -0
- hud/cli/tests/test_cli_more_wrappers.py +30 -0
- hud/cli/tests/test_cli_root.py +134 -0
- hud/cli/tests/test_eval.py +4 -0
- hud/cli/tests/test_mcp_server.py +8 -7
- hud/cli/tests/test_push_happy.py +74 -0
- hud/cli/tests/test_push_wrapper.py +23 -0
- hud/cli/utils/docker.py +120 -1
- hud/cli/utils/runner.py +1 -1
- hud/cli/utils/tasks.py +4 -1
- hud/cli/utils/tests/__init__.py +0 -0
- hud/cli/utils/tests/test_config.py +58 -0
- hud/cli/utils/tests/test_docker.py +93 -0
- hud/cli/utils/tests/test_docker_hints.py +71 -0
- hud/cli/utils/tests/test_env_check.py +74 -0
- hud/cli/utils/tests/test_environment.py +42 -0
- hud/cli/utils/tests/test_interactive_module.py +60 -0
- hud/cli/utils/tests/test_local_runner.py +50 -0
- hud/cli/utils/tests/test_logging_utils.py +23 -0
- hud/cli/utils/tests/test_metadata.py +49 -0
- hud/cli/utils/tests/test_package_runner.py +35 -0
- hud/cli/utils/tests/test_registry_utils.py +49 -0
- hud/cli/utils/tests/test_remote_runner.py +25 -0
- hud/cli/utils/tests/test_runner_modules.py +52 -0
- hud/cli/utils/tests/test_source_hash.py +36 -0
- hud/cli/utils/tests/test_tasks.py +80 -0
- hud/cli/utils/version_check.py +257 -0
- hud/clients/base.py +1 -1
- hud/clients/mcp_use.py +3 -1
- hud/datasets/parallel.py +2 -2
- hud/datasets/runner.py +85 -24
- hud/datasets/tests/__init__.py +0 -0
- hud/datasets/tests/test_runner.py +106 -0
- hud/datasets/tests/test_utils.py +228 -0
- hud/otel/config.py +8 -6
- hud/otel/context.py +4 -4
- hud/otel/exporters.py +231 -57
- hud/otel/tests/__init__.py +0 -1
- hud/otel/tests/test_instrumentation.py +207 -0
- hud/rl/learner.py +1 -1
- hud/server/tests/test_server_extra.py +2 -0
- hud/shared/exceptions.py +35 -9
- hud/shared/hints.py +25 -0
- hud/shared/requests.py +15 -3
- hud/shared/tests/test_exceptions.py +39 -30
- hud/shared/tests/test_hints.py +167 -0
- hud/telemetry/__init__.py +30 -6
- hud/telemetry/async_context.py +331 -0
- hud/telemetry/job.py +51 -12
- hud/telemetry/tests/test_async_context.py +242 -0
- hud/telemetry/tests/test_instrument.py +414 -0
- hud/telemetry/tests/test_job.py +609 -0
- hud/telemetry/tests/test_trace.py +184 -6
- hud/telemetry/trace.py +16 -17
- hud/tools/computer/qwen.py +4 -1
- hud/tools/computer/settings.py +2 -2
- hud/tools/executors/base.py +4 -2
- hud/tools/tests/test_submit.py +85 -0
- hud/tools/tests/test_types.py +193 -0
- hud/types.py +7 -1
- hud/utils/agent_factories.py +1 -3
- hud/utils/mcp.py +1 -1
- hud/utils/task_tracking.py +223 -0
- hud/utils/tests/test_agent_factories.py +60 -0
- hud/utils/tests/test_mcp.py +4 -6
- hud/utils/tests/test_pretty_errors.py +186 -0
- hud/utils/tests/test_tasks.py +187 -0
- hud/utils/tests/test_tool_shorthand.py +154 -0
- hud/utils/tests/test_version.py +1 -1
- hud/version.py +1 -1
- {hud_python-0.4.51.dist-info → hud_python-0.4.53.dist-info}/METADATA +48 -48
- {hud_python-0.4.51.dist-info → hud_python-0.4.53.dist-info}/RECORD +88 -47
- {hud_python-0.4.51.dist-info → hud_python-0.4.53.dist-info}/WHEEL +0 -0
- {hud_python-0.4.51.dist-info → hud_python-0.4.53.dist-info}/entry_points.txt +0 -0
- {hud_python-0.4.51.dist-info → hud_python-0.4.53.dist-info}/licenses/LICENSE +0 -0
|
@@ -0,0 +1,228 @@
|
|
|
1
|
+
from __future__ import annotations
|
|
2
|
+
|
|
3
|
+
from unittest.mock import MagicMock, mock_open, patch
|
|
4
|
+
|
|
5
|
+
import pytest
|
|
6
|
+
|
|
7
|
+
from hud.datasets.utils import fetch_system_prompt_from_dataset, save_tasks
|
|
8
|
+
from hud.types import Task
|
|
9
|
+
|
|
10
|
+
|
|
11
|
+
@pytest.mark.asyncio
|
|
12
|
+
async def test_fetch_system_prompt_success():
|
|
13
|
+
"""Test successful fetch of system prompt."""
|
|
14
|
+
with patch("huggingface_hub.hf_hub_download") as mock_download:
|
|
15
|
+
mock_download.return_value = "/tmp/system_prompt.txt"
|
|
16
|
+
with patch("builtins.open", mock_open(read_data="Test system prompt")):
|
|
17
|
+
result = await fetch_system_prompt_from_dataset("test/dataset")
|
|
18
|
+
assert result == "Test system prompt"
|
|
19
|
+
mock_download.assert_called_once()
|
|
20
|
+
|
|
21
|
+
|
|
22
|
+
@pytest.mark.asyncio
|
|
23
|
+
async def test_fetch_system_prompt_empty_file():
|
|
24
|
+
"""Test fetch when file is empty."""
|
|
25
|
+
with patch("huggingface_hub.hf_hub_download") as mock_download:
|
|
26
|
+
mock_download.return_value = "/tmp/system_prompt.txt"
|
|
27
|
+
with patch("builtins.open", mock_open(read_data=" \n ")):
|
|
28
|
+
result = await fetch_system_prompt_from_dataset("test/dataset")
|
|
29
|
+
assert result is None
|
|
30
|
+
|
|
31
|
+
|
|
32
|
+
@pytest.mark.asyncio
|
|
33
|
+
async def test_fetch_system_prompt_file_not_found():
|
|
34
|
+
"""Test fetch when file doesn't exist."""
|
|
35
|
+
with patch("huggingface_hub.hf_hub_download") as mock_download:
|
|
36
|
+
from huggingface_hub.errors import EntryNotFoundError
|
|
37
|
+
|
|
38
|
+
mock_download.side_effect = EntryNotFoundError("File not found")
|
|
39
|
+
result = await fetch_system_prompt_from_dataset("test/dataset")
|
|
40
|
+
assert result is None
|
|
41
|
+
|
|
42
|
+
|
|
43
|
+
@pytest.mark.asyncio
|
|
44
|
+
async def test_fetch_system_prompt_import_error():
|
|
45
|
+
"""Test fetch when huggingface_hub is not installed."""
|
|
46
|
+
# Mock the import itself to raise ImportError
|
|
47
|
+
import sys
|
|
48
|
+
|
|
49
|
+
with patch.dict(sys.modules, {"huggingface_hub": None}):
|
|
50
|
+
result = await fetch_system_prompt_from_dataset("test/dataset")
|
|
51
|
+
assert result is None
|
|
52
|
+
|
|
53
|
+
|
|
54
|
+
@pytest.mark.asyncio
|
|
55
|
+
async def test_fetch_system_prompt_general_exception():
|
|
56
|
+
"""Test fetch with general exception."""
|
|
57
|
+
with patch("huggingface_hub.hf_hub_download") as mock_download:
|
|
58
|
+
mock_download.side_effect = Exception("Network error")
|
|
59
|
+
result = await fetch_system_prompt_from_dataset("test/dataset")
|
|
60
|
+
assert result is None
|
|
61
|
+
|
|
62
|
+
|
|
63
|
+
def test_save_tasks_basic():
|
|
64
|
+
"""Test basic save_tasks functionality."""
|
|
65
|
+
tasks = [
|
|
66
|
+
{"id": "1", "prompt": "test", "mcp_config": {"key": "value"}},
|
|
67
|
+
{"id": "2", "prompt": "test2", "mcp_config": {"key2": "value2"}},
|
|
68
|
+
]
|
|
69
|
+
|
|
70
|
+
with patch("hud.datasets.utils.Dataset") as mock_dataset_class:
|
|
71
|
+
mock_dataset = MagicMock()
|
|
72
|
+
mock_dataset_class.from_list.return_value = mock_dataset
|
|
73
|
+
|
|
74
|
+
save_tasks(tasks, "test/repo")
|
|
75
|
+
|
|
76
|
+
mock_dataset_class.from_list.assert_called_once()
|
|
77
|
+
call_args = mock_dataset_class.from_list.call_args[0][0]
|
|
78
|
+
assert len(call_args) == 2
|
|
79
|
+
# Check that mcp_config was JSON serialized
|
|
80
|
+
assert isinstance(call_args[0]["mcp_config"], str)
|
|
81
|
+
mock_dataset.push_to_hub.assert_called_once_with("test/repo")
|
|
82
|
+
|
|
83
|
+
|
|
84
|
+
def test_save_tasks_with_specific_fields():
|
|
85
|
+
"""Test save_tasks with specific fields."""
|
|
86
|
+
tasks = [
|
|
87
|
+
{"id": "1", "prompt": "test", "mcp_config": {"key": "value"}, "extra": "data"},
|
|
88
|
+
]
|
|
89
|
+
|
|
90
|
+
with patch("hud.datasets.utils.Dataset") as mock_dataset_class:
|
|
91
|
+
mock_dataset = MagicMock()
|
|
92
|
+
mock_dataset_class.from_list.return_value = mock_dataset
|
|
93
|
+
|
|
94
|
+
save_tasks(tasks, "test/repo", fields=["id", "prompt"])
|
|
95
|
+
|
|
96
|
+
call_args = mock_dataset_class.from_list.call_args[0][0]
|
|
97
|
+
assert "id" in call_args[0]
|
|
98
|
+
assert "prompt" in call_args[0]
|
|
99
|
+
assert "extra" not in call_args[0]
|
|
100
|
+
|
|
101
|
+
|
|
102
|
+
def test_save_tasks_with_list_field():
|
|
103
|
+
"""Test save_tasks serializes list fields."""
|
|
104
|
+
tasks = [
|
|
105
|
+
{"id": "1", "tags": ["tag1", "tag2"], "count": 5},
|
|
106
|
+
]
|
|
107
|
+
|
|
108
|
+
with patch("hud.datasets.utils.Dataset") as mock_dataset_class:
|
|
109
|
+
mock_dataset = MagicMock()
|
|
110
|
+
mock_dataset_class.from_list.return_value = mock_dataset
|
|
111
|
+
|
|
112
|
+
save_tasks(tasks, "test/repo")
|
|
113
|
+
|
|
114
|
+
call_args = mock_dataset_class.from_list.call_args[0][0]
|
|
115
|
+
# List should be JSON serialized
|
|
116
|
+
assert isinstance(call_args[0]["tags"], str)
|
|
117
|
+
assert '"tag1"' in call_args[0]["tags"]
|
|
118
|
+
|
|
119
|
+
|
|
120
|
+
def test_save_tasks_with_primitive_types():
|
|
121
|
+
"""Test save_tasks handles various primitive types."""
|
|
122
|
+
tasks = [
|
|
123
|
+
{
|
|
124
|
+
"string": "text",
|
|
125
|
+
"integer": 42,
|
|
126
|
+
"float": 3.14,
|
|
127
|
+
"boolean": True,
|
|
128
|
+
"none": None,
|
|
129
|
+
},
|
|
130
|
+
]
|
|
131
|
+
|
|
132
|
+
with patch("hud.datasets.utils.Dataset") as mock_dataset_class:
|
|
133
|
+
mock_dataset = MagicMock()
|
|
134
|
+
mock_dataset_class.from_list.return_value = mock_dataset
|
|
135
|
+
|
|
136
|
+
save_tasks(tasks, "test/repo")
|
|
137
|
+
|
|
138
|
+
call_args = mock_dataset_class.from_list.call_args[0][0]
|
|
139
|
+
assert call_args[0]["string"] == "text"
|
|
140
|
+
assert call_args[0]["integer"] == 42
|
|
141
|
+
assert call_args[0]["float"] == 3.14
|
|
142
|
+
assert call_args[0]["boolean"] is True
|
|
143
|
+
assert call_args[0]["none"] == "" # None becomes empty string
|
|
144
|
+
|
|
145
|
+
|
|
146
|
+
def test_save_tasks_with_other_type():
|
|
147
|
+
"""Test save_tasks converts other types to string."""
|
|
148
|
+
|
|
149
|
+
class CustomObj:
|
|
150
|
+
def __str__(self):
|
|
151
|
+
return "custom_value"
|
|
152
|
+
|
|
153
|
+
tasks = [
|
|
154
|
+
{"id": "1", "custom": CustomObj()},
|
|
155
|
+
]
|
|
156
|
+
|
|
157
|
+
with patch("hud.datasets.utils.Dataset") as mock_dataset_class:
|
|
158
|
+
mock_dataset = MagicMock()
|
|
159
|
+
mock_dataset_class.from_list.return_value = mock_dataset
|
|
160
|
+
|
|
161
|
+
save_tasks(tasks, "test/repo")
|
|
162
|
+
|
|
163
|
+
call_args = mock_dataset_class.from_list.call_args[0][0]
|
|
164
|
+
assert call_args[0]["custom"] == "custom_value"
|
|
165
|
+
|
|
166
|
+
|
|
167
|
+
def test_save_tasks_rejects_task_objects():
|
|
168
|
+
"""Test save_tasks raises error for Task objects."""
|
|
169
|
+
task = Task(prompt="test", mcp_config={})
|
|
170
|
+
|
|
171
|
+
with pytest.raises(ValueError, match="expects dictionaries, not Task objects"):
|
|
172
|
+
save_tasks([task], "test/repo") # type: ignore
|
|
173
|
+
|
|
174
|
+
|
|
175
|
+
def test_save_tasks_rejects_task_objects_in_list():
|
|
176
|
+
"""Test save_tasks raises error when Task object is in the list."""
|
|
177
|
+
tasks = [
|
|
178
|
+
{"id": "1", "prompt": "test", "mcp_config": {}},
|
|
179
|
+
Task(prompt="test2", mcp_config={}), # Task object
|
|
180
|
+
]
|
|
181
|
+
|
|
182
|
+
with pytest.raises(ValueError, match="Item 1 is a Task object"):
|
|
183
|
+
save_tasks(tasks, "test/repo") # type: ignore
|
|
184
|
+
|
|
185
|
+
|
|
186
|
+
def test_save_tasks_with_kwargs():
|
|
187
|
+
"""Test save_tasks passes kwargs to push_to_hub."""
|
|
188
|
+
tasks = [{"id": "1", "prompt": "test"}]
|
|
189
|
+
|
|
190
|
+
with patch("hud.datasets.utils.Dataset") as mock_dataset_class:
|
|
191
|
+
mock_dataset = MagicMock()
|
|
192
|
+
mock_dataset_class.from_list.return_value = mock_dataset
|
|
193
|
+
|
|
194
|
+
save_tasks(tasks, "test/repo", private=True, commit_message="Test commit")
|
|
195
|
+
|
|
196
|
+
mock_dataset.push_to_hub.assert_called_once_with(
|
|
197
|
+
"test/repo", private=True, commit_message="Test commit"
|
|
198
|
+
)
|
|
199
|
+
|
|
200
|
+
|
|
201
|
+
def test_save_tasks_field_not_in_dict():
|
|
202
|
+
"""Test save_tasks handles missing fields gracefully."""
|
|
203
|
+
tasks = [
|
|
204
|
+
{"id": "1", "prompt": "test"},
|
|
205
|
+
]
|
|
206
|
+
|
|
207
|
+
with patch("hud.datasets.utils.Dataset") as mock_dataset_class:
|
|
208
|
+
mock_dataset = MagicMock()
|
|
209
|
+
mock_dataset_class.from_list.return_value = mock_dataset
|
|
210
|
+
|
|
211
|
+
# Request fields that don't exist
|
|
212
|
+
save_tasks(tasks, "test/repo", fields=["id", "missing_field"])
|
|
213
|
+
|
|
214
|
+
call_args = mock_dataset_class.from_list.call_args[0][0]
|
|
215
|
+
assert "id" in call_args[0]
|
|
216
|
+
assert "missing_field" not in call_args[0]
|
|
217
|
+
|
|
218
|
+
|
|
219
|
+
def test_save_tasks_empty_list():
|
|
220
|
+
"""Test save_tasks with empty list."""
|
|
221
|
+
with patch("hud.datasets.utils.Dataset") as mock_dataset_class:
|
|
222
|
+
mock_dataset = MagicMock()
|
|
223
|
+
mock_dataset_class.from_list.return_value = mock_dataset
|
|
224
|
+
|
|
225
|
+
save_tasks([], "test/repo")
|
|
226
|
+
|
|
227
|
+
mock_dataset_class.from_list.assert_called_once_with([])
|
|
228
|
+
mock_dataset.push_to_hub.assert_called_once()
|
hud/otel/config.py
CHANGED
|
@@ -94,16 +94,18 @@ def configure_telemetry(
|
|
|
94
94
|
|
|
95
95
|
# HUD exporter (only if enabled and API key is available)
|
|
96
96
|
if settings.telemetry_enabled and settings.api_key:
|
|
97
|
+
# Use the HudSpanExporter directly (it now handles async context internally)
|
|
97
98
|
exporter = HudSpanExporter(
|
|
98
99
|
telemetry_url=settings.hud_telemetry_url, api_key=settings.api_key
|
|
99
100
|
)
|
|
100
|
-
|
|
101
|
+
|
|
102
|
+
# Batch exports for efficiency while maintaining reasonable real-time visibility
|
|
101
103
|
provider.add_span_processor(
|
|
102
104
|
BatchSpanProcessor(
|
|
103
105
|
exporter,
|
|
104
|
-
schedule_delay_millis=1000,
|
|
105
|
-
max_queue_size=
|
|
106
|
-
max_export_batch_size=
|
|
106
|
+
schedule_delay_millis=1000, # Export every 5 seconds (less frequent)
|
|
107
|
+
max_queue_size=16384, # Larger queue for high-volume scenarios
|
|
108
|
+
max_export_batch_size=512, # Larger batches (fewer uploads)
|
|
107
109
|
export_timeout_millis=30000,
|
|
108
110
|
)
|
|
109
111
|
)
|
|
@@ -140,8 +142,8 @@ def configure_telemetry(
|
|
|
140
142
|
BatchSpanProcessor(
|
|
141
143
|
otlp_exporter,
|
|
142
144
|
schedule_delay_millis=1000,
|
|
143
|
-
max_queue_size=
|
|
144
|
-
max_export_batch_size=
|
|
145
|
+
max_queue_size=16384,
|
|
146
|
+
max_export_batch_size=512,
|
|
145
147
|
export_timeout_millis=30000,
|
|
146
148
|
)
|
|
147
149
|
)
|
hud/otel/context.py
CHANGED
|
@@ -520,8 +520,8 @@ class trace:
|
|
|
520
520
|
# Update task status if root (only for HUD backend)
|
|
521
521
|
if self.is_root and settings.telemetry_enabled and settings.api_key:
|
|
522
522
|
if exc_type is not None:
|
|
523
|
-
# Use
|
|
524
|
-
|
|
523
|
+
# Use fire-and-forget to avoid blocking the event loop
|
|
524
|
+
_fire_and_forget_status_update(
|
|
525
525
|
self.task_run_id,
|
|
526
526
|
"error",
|
|
527
527
|
job_id=self.job_id,
|
|
@@ -533,8 +533,8 @@ class trace:
|
|
|
533
533
|
if not self.job_id:
|
|
534
534
|
_print_trace_complete_url(self.task_run_id, error_occurred=True)
|
|
535
535
|
else:
|
|
536
|
-
# Use
|
|
537
|
-
|
|
536
|
+
# Use fire-and-forget to avoid blocking the event loop
|
|
537
|
+
_fire_and_forget_status_update(
|
|
538
538
|
self.task_run_id,
|
|
539
539
|
"completed",
|
|
540
540
|
job_id=self.job_id,
|
hud/otel/exporters.py
CHANGED
|
@@ -1,21 +1,27 @@
|
|
|
1
|
-
"""Custom OpenTelemetry exporter
|
|
2
|
-
HTTP endpoint (/trace/<id>/telemetry-upload).
|
|
1
|
+
"""Custom OpenTelemetry exporter for HUD telemetry backend.
|
|
3
2
|
|
|
4
|
-
|
|
5
|
-
|
|
6
|
-
implemented.
|
|
3
|
+
This exporter sends spans to the HUD telemetry HTTP endpoint, grouping them
|
|
4
|
+
by task_run_id for efficient batch uploads.
|
|
7
5
|
|
|
8
|
-
|
|
9
|
-
|
|
6
|
+
Performance optimizations:
|
|
7
|
+
- Detects async contexts and runs exports in a thread pool to avoid blocking
|
|
8
|
+
- Uses persistent HTTP client with connection pooling for reduced overhead
|
|
9
|
+
- Tracks pending export futures to ensure completion during shutdown
|
|
10
|
+
|
|
11
|
+
The exporter derives from SpanExporter (synchronous interface) but handles
|
|
12
|
+
async contexts intelligently to prevent event loop blocking during high-concurrency
|
|
13
|
+
workloads.
|
|
10
14
|
"""
|
|
11
15
|
|
|
12
16
|
from __future__ import annotations
|
|
13
17
|
|
|
18
|
+
import atexit
|
|
19
|
+
import concurrent.futures as cf
|
|
14
20
|
import contextlib
|
|
15
21
|
import json
|
|
16
22
|
import logging
|
|
17
|
-
import time
|
|
18
23
|
from collections import defaultdict
|
|
24
|
+
from concurrent.futures import ThreadPoolExecutor
|
|
19
25
|
from datetime import UTC, datetime
|
|
20
26
|
from typing import TYPE_CHECKING, Any
|
|
21
27
|
|
|
@@ -31,6 +37,34 @@ if TYPE_CHECKING:
|
|
|
31
37
|
|
|
32
38
|
logger = logging.getLogger(__name__)
|
|
33
39
|
|
|
40
|
+
# Global singleton thread pool for span exports
|
|
41
|
+
_export_executor: ThreadPoolExecutor | None = None
|
|
42
|
+
|
|
43
|
+
|
|
44
|
+
def get_export_executor() -> ThreadPoolExecutor:
|
|
45
|
+
"""Get or create the global thread pool for span exports.
|
|
46
|
+
|
|
47
|
+
Returns a singleton ThreadPoolExecutor used for running span exports
|
|
48
|
+
in a thread pool when called from async contexts, preventing event
|
|
49
|
+
loop blocking during high-concurrency workloads.
|
|
50
|
+
|
|
51
|
+
The executor is automatically cleaned up on process exit via atexit.
|
|
52
|
+
|
|
53
|
+
Returns:
|
|
54
|
+
ThreadPoolExecutor with 8 workers for high-throughput parallel uploads
|
|
55
|
+
"""
|
|
56
|
+
global _export_executor
|
|
57
|
+
if _export_executor is None:
|
|
58
|
+
# Use 8 workers to handle high-volume parallel uploads efficiently
|
|
59
|
+
_export_executor = ThreadPoolExecutor(max_workers=8, thread_name_prefix="span-export")
|
|
60
|
+
|
|
61
|
+
def cleanup() -> None:
|
|
62
|
+
if _export_executor is not None:
|
|
63
|
+
_export_executor.shutdown(wait=True)
|
|
64
|
+
|
|
65
|
+
atexit.register(cleanup)
|
|
66
|
+
return _export_executor
|
|
67
|
+
|
|
34
68
|
|
|
35
69
|
# ---------------------------------------------------------------------------
|
|
36
70
|
# Models
|
|
@@ -297,73 +331,213 @@ def _span_to_dict(span: ReadableSpan) -> dict[str, Any]:
|
|
|
297
331
|
|
|
298
332
|
|
|
299
333
|
class HudSpanExporter(SpanExporter):
|
|
300
|
-
"""
|
|
334
|
+
"""OpenTelemetry span exporter for the HUD backend.
|
|
335
|
+
|
|
336
|
+
This exporter groups spans by task_run_id and sends them to the HUD
|
|
337
|
+
telemetry endpoint. Performance optimizations include:
|
|
338
|
+
|
|
339
|
+
- Auto-detects async contexts and runs exports in thread pool (non-blocking)
|
|
340
|
+
- Tracks pending export futures for proper shutdown coordination
|
|
341
|
+
|
|
342
|
+
Handles high-concurrency scenarios (200+ parallel tasks) by offloading
|
|
343
|
+
synchronous HTTP operations to a thread pool when called from async
|
|
344
|
+
contexts, preventing event loop blocking.
|
|
345
|
+
"""
|
|
301
346
|
|
|
302
347
|
def __init__(self, *, telemetry_url: str, api_key: str) -> None:
|
|
348
|
+
"""Initialize the HUD span exporter.
|
|
349
|
+
|
|
350
|
+
Args:
|
|
351
|
+
telemetry_url: Base URL for the HUD telemetry backend
|
|
352
|
+
api_key: API key for authentication
|
|
353
|
+
"""
|
|
303
354
|
super().__init__()
|
|
304
355
|
self._telemetry_url = telemetry_url.rstrip("/")
|
|
305
356
|
self._api_key = api_key
|
|
306
357
|
|
|
307
|
-
|
|
308
|
-
|
|
309
|
-
|
|
358
|
+
# Track pending export futures for shutdown coordination
|
|
359
|
+
self._pending_futures: list[cf.Future[SpanExportResult]] = []
|
|
360
|
+
|
|
310
361
|
def export(self, spans: list[ReadableSpan]) -> SpanExportResult: # type: ignore[override]
|
|
362
|
+
"""Export spans to HUD backend.
|
|
363
|
+
|
|
364
|
+
Auto-detects async contexts: if called from an async event loop, runs
|
|
365
|
+
the export in a thread pool to avoid blocking. Otherwise runs synchronously.
|
|
366
|
+
|
|
367
|
+
Args:
|
|
368
|
+
spans: List of ReadableSpan objects to export
|
|
369
|
+
|
|
370
|
+
Returns:
|
|
371
|
+
SpanExportResult.SUCCESS (returns immediately in async contexts)
|
|
372
|
+
"""
|
|
311
373
|
if not spans:
|
|
312
374
|
return SpanExportResult.SUCCESS
|
|
313
375
|
|
|
314
|
-
# Group spans by
|
|
376
|
+
# Group spans by task_run_id for batched uploads
|
|
315
377
|
grouped: dict[str, list[ReadableSpan]] = defaultdict(list)
|
|
316
378
|
for span in spans:
|
|
317
379
|
run_id = span.attributes.get("hud.task_run_id") if span.attributes else None
|
|
318
380
|
if not run_id:
|
|
319
|
-
# Skip spans
|
|
381
|
+
# Skip spans outside HUD traces
|
|
320
382
|
continue
|
|
321
383
|
grouped[str(run_id)].append(span)
|
|
322
384
|
|
|
323
|
-
#
|
|
324
|
-
|
|
325
|
-
|
|
326
|
-
|
|
327
|
-
|
|
328
|
-
|
|
329
|
-
|
|
330
|
-
|
|
331
|
-
|
|
332
|
-
|
|
333
|
-
|
|
334
|
-
|
|
335
|
-
|
|
336
|
-
|
|
337
|
-
|
|
338
|
-
|
|
339
|
-
|
|
340
|
-
|
|
341
|
-
|
|
342
|
-
|
|
343
|
-
|
|
344
|
-
|
|
345
|
-
|
|
346
|
-
|
|
347
|
-
|
|
348
|
-
|
|
349
|
-
|
|
350
|
-
|
|
351
|
-
|
|
352
|
-
|
|
353
|
-
|
|
354
|
-
|
|
355
|
-
|
|
356
|
-
|
|
357
|
-
|
|
358
|
-
|
|
359
|
-
|
|
385
|
+
# Detect async context to avoid event loop blocking
|
|
386
|
+
import asyncio
|
|
387
|
+
|
|
388
|
+
try:
|
|
389
|
+
loop = asyncio.get_running_loop()
|
|
390
|
+
# In async context - offload to thread pool
|
|
391
|
+
executor = get_export_executor()
|
|
392
|
+
|
|
393
|
+
def _sync_export() -> SpanExportResult:
|
|
394
|
+
# Send each group synchronously (retry inside make_request_sync)
|
|
395
|
+
for run_id, span_batch in grouped.items():
|
|
396
|
+
try:
|
|
397
|
+
url = f"{self._telemetry_url}/trace/{run_id}/telemetry-upload"
|
|
398
|
+
telemetry_spans = [_span_to_dict(s) for s in span_batch]
|
|
399
|
+
# Include current step count in metadata
|
|
400
|
+
metadata = {}
|
|
401
|
+
# Get the HIGHEST step count from the batch (most recent)
|
|
402
|
+
step_count = 0
|
|
403
|
+
for span in span_batch:
|
|
404
|
+
if span.attributes and "hud.step_count" in span.attributes:
|
|
405
|
+
current_step = span.attributes["hud.step_count"]
|
|
406
|
+
if isinstance(current_step, int) and current_step > step_count:
|
|
407
|
+
step_count = current_step
|
|
408
|
+
|
|
409
|
+
payload = {
|
|
410
|
+
"metadata": metadata,
|
|
411
|
+
"telemetry": telemetry_spans,
|
|
412
|
+
}
|
|
413
|
+
|
|
414
|
+
# Only include step_count if we found any steps
|
|
415
|
+
if step_count > 0:
|
|
416
|
+
payload["step_count"] = step_count
|
|
417
|
+
|
|
418
|
+
logger.debug("HUD exporter sending %d spans to %s", len(span_batch), url)
|
|
419
|
+
make_request_sync(
|
|
420
|
+
method="POST",
|
|
421
|
+
url=url,
|
|
422
|
+
json=payload,
|
|
423
|
+
api_key=self._api_key,
|
|
424
|
+
)
|
|
425
|
+
except Exception as exc:
|
|
426
|
+
logger.exception(
|
|
427
|
+
"HUD exporter failed to send spans for task %s: %s", run_id, exc
|
|
428
|
+
)
|
|
429
|
+
return SpanExportResult.FAILURE
|
|
430
|
+
return SpanExportResult.SUCCESS
|
|
431
|
+
|
|
432
|
+
# Run in thread to avoid blocking event loop
|
|
433
|
+
future = loop.run_in_executor(executor, _sync_export)
|
|
434
|
+
# Track and cleanup when done
|
|
435
|
+
self._pending_futures.append(future) # type: ignore[list-item]
|
|
436
|
+
|
|
437
|
+
def _cleanup_done(f: cf.Future[SpanExportResult]) -> None:
|
|
438
|
+
with contextlib.suppress(Exception):
|
|
439
|
+
# Consume exception to avoid "exception was never retrieved"
|
|
440
|
+
_ = f.exception()
|
|
441
|
+
# Remove from pending list
|
|
442
|
+
with contextlib.suppress(ValueError):
|
|
443
|
+
self._pending_futures.remove(f)
|
|
444
|
+
|
|
445
|
+
future.add_done_callback(_cleanup_done) # type: ignore[arg-type]
|
|
446
|
+
# Don't wait for it - return immediately
|
|
447
|
+
return SpanExportResult.SUCCESS
|
|
448
|
+
|
|
449
|
+
except RuntimeError:
|
|
450
|
+
# No event loop - run synchronously
|
|
451
|
+
# Send each group synchronously (retry inside make_request_sync)
|
|
452
|
+
for run_id, span_batch in grouped.items():
|
|
453
|
+
try:
|
|
454
|
+
url = f"{self._telemetry_url}/trace/{run_id}/telemetry-upload"
|
|
455
|
+
telemetry_spans = [_span_to_dict(s) for s in span_batch]
|
|
456
|
+
# Include current step count in metadata
|
|
457
|
+
metadata = {}
|
|
458
|
+
# Get the HIGHEST step count from the batch (most recent)
|
|
459
|
+
step_count = 0
|
|
460
|
+
for span in span_batch:
|
|
461
|
+
if span.attributes and "hud.step_count" in span.attributes:
|
|
462
|
+
current_step = span.attributes["hud.step_count"]
|
|
463
|
+
if isinstance(current_step, int) and current_step > step_count:
|
|
464
|
+
step_count = current_step
|
|
465
|
+
|
|
466
|
+
payload = {
|
|
467
|
+
"metadata": metadata,
|
|
468
|
+
"telemetry": telemetry_spans,
|
|
469
|
+
}
|
|
470
|
+
|
|
471
|
+
# Only include step_count if we found any steps
|
|
472
|
+
if step_count > 0:
|
|
473
|
+
payload["step_count"] = step_count
|
|
474
|
+
|
|
475
|
+
logger.debug("HUD exporter sending %d spans to %s", len(span_batch), url)
|
|
476
|
+
make_request_sync(
|
|
477
|
+
method="POST",
|
|
478
|
+
url=url,
|
|
479
|
+
json=payload,
|
|
480
|
+
api_key=self._api_key,
|
|
481
|
+
)
|
|
482
|
+
except Exception as exc:
|
|
483
|
+
logger.exception(
|
|
484
|
+
"HUD exporter failed to send spans for task %s: %s", run_id, exc
|
|
485
|
+
)
|
|
486
|
+
# If *any* group fails we return FAILURE so the OTEL SDK can retry
|
|
487
|
+
return SpanExportResult.FAILURE
|
|
488
|
+
|
|
489
|
+
return SpanExportResult.SUCCESS
|
|
360
490
|
|
|
361
491
|
def shutdown(self) -> None: # type: ignore[override]
|
|
362
|
-
|
|
363
|
-
|
|
492
|
+
"""Shutdown the exporter and wait for pending exports.
|
|
493
|
+
|
|
494
|
+
Waits up to 10 seconds for any in-flight exports to complete.
|
|
495
|
+
"""
|
|
496
|
+
try:
|
|
497
|
+
if self._pending_futures:
|
|
498
|
+
with contextlib.suppress(Exception):
|
|
499
|
+
cf.wait(self._pending_futures, timeout=10.0)
|
|
500
|
+
finally:
|
|
501
|
+
self._pending_futures.clear()
|
|
364
502
|
|
|
365
503
|
def force_flush(self, timeout_millis: int | None = None) -> bool: # type: ignore[override]
|
|
366
|
-
|
|
367
|
-
|
|
368
|
-
|
|
369
|
-
|
|
504
|
+
"""Force flush all pending span exports.
|
|
505
|
+
|
|
506
|
+
Waits for all pending export futures to complete before returning.
|
|
507
|
+
This is called by the OpenTelemetry SDK during shutdown to ensure
|
|
508
|
+
all telemetry is uploaded.
|
|
509
|
+
|
|
510
|
+
Args:
|
|
511
|
+
timeout_millis: Maximum time to wait in milliseconds
|
|
512
|
+
|
|
513
|
+
Returns:
|
|
514
|
+
True if all exports completed, False otherwise
|
|
515
|
+
"""
|
|
516
|
+
try:
|
|
517
|
+
if not self._pending_futures:
|
|
518
|
+
return True
|
|
519
|
+
|
|
520
|
+
total_pending = len(self._pending_futures)
|
|
521
|
+
if total_pending > 10:
|
|
522
|
+
# Show progress for large batches
|
|
523
|
+
logger.info("Flushing %d pending telemetry uploads...", total_pending)
|
|
524
|
+
|
|
525
|
+
timeout = (timeout_millis or 30000) / 1000.0
|
|
526
|
+
done, not_done = cf.wait(self._pending_futures, timeout=timeout)
|
|
527
|
+
|
|
528
|
+
# Consume exceptions to avoid "exception was never retrieved" warnings
|
|
529
|
+
for f in list(done):
|
|
530
|
+
with contextlib.suppress(Exception):
|
|
531
|
+
_ = f.exception()
|
|
532
|
+
|
|
533
|
+
# Remove completed futures
|
|
534
|
+
for f in list(done):
|
|
535
|
+
with contextlib.suppress(ValueError):
|
|
536
|
+
self._pending_futures.remove(f)
|
|
537
|
+
|
|
538
|
+
if total_pending > 10:
|
|
539
|
+
logger.info("Completed %d/%d telemetry uploads", len(done), total_pending)
|
|
540
|
+
|
|
541
|
+
return len(not_done) == 0
|
|
542
|
+
except Exception:
|
|
543
|
+
return False
|
hud/otel/tests/__init__.py
CHANGED
|
@@ -1 +0,0 @@
|
|
|
1
|
-
"""Tests for OpenTelemetry integration."""
|