hud-python 0.3.5__py3-none-any.whl → 0.4.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of hud-python might be problematic. Click here for more details.

Files changed (192) hide show
  1. hud/__init__.py +22 -89
  2. hud/agents/__init__.py +17 -0
  3. hud/agents/art.py +101 -0
  4. hud/agents/base.py +599 -0
  5. hud/{mcp → agents}/claude.py +373 -321
  6. hud/{mcp → agents}/langchain.py +250 -250
  7. hud/agents/misc/__init__.py +7 -0
  8. hud/{agent → agents}/misc/response_agent.py +80 -80
  9. hud/{mcp → agents}/openai.py +352 -334
  10. hud/agents/openai_chat_generic.py +154 -0
  11. hud/{mcp → agents}/tests/__init__.py +1 -1
  12. hud/agents/tests/test_base.py +742 -0
  13. hud/agents/tests/test_claude.py +324 -0
  14. hud/{mcp → agents}/tests/test_client.py +363 -324
  15. hud/{mcp → agents}/tests/test_openai.py +237 -238
  16. hud/cli/__init__.py +617 -0
  17. hud/cli/__main__.py +8 -0
  18. hud/cli/analyze.py +371 -0
  19. hud/cli/analyze_metadata.py +230 -0
  20. hud/cli/build.py +427 -0
  21. hud/cli/clone.py +185 -0
  22. hud/cli/cursor.py +92 -0
  23. hud/cli/debug.py +392 -0
  24. hud/cli/docker_utils.py +83 -0
  25. hud/cli/init.py +281 -0
  26. hud/cli/interactive.py +353 -0
  27. hud/cli/mcp_server.py +756 -0
  28. hud/cli/pull.py +336 -0
  29. hud/cli/push.py +379 -0
  30. hud/cli/remote_runner.py +311 -0
  31. hud/cli/runner.py +160 -0
  32. hud/cli/tests/__init__.py +3 -0
  33. hud/cli/tests/test_analyze.py +284 -0
  34. hud/cli/tests/test_cli_init.py +265 -0
  35. hud/cli/tests/test_cli_main.py +27 -0
  36. hud/cli/tests/test_clone.py +142 -0
  37. hud/cli/tests/test_cursor.py +253 -0
  38. hud/cli/tests/test_debug.py +453 -0
  39. hud/cli/tests/test_mcp_server.py +139 -0
  40. hud/cli/tests/test_utils.py +388 -0
  41. hud/cli/utils.py +263 -0
  42. hud/clients/README.md +143 -0
  43. hud/clients/__init__.py +16 -0
  44. hud/clients/base.py +354 -0
  45. hud/clients/fastmcp.py +202 -0
  46. hud/clients/mcp_use.py +278 -0
  47. hud/clients/tests/__init__.py +1 -0
  48. hud/clients/tests/test_client_integration.py +111 -0
  49. hud/clients/tests/test_fastmcp.py +342 -0
  50. hud/clients/tests/test_protocol.py +188 -0
  51. hud/clients/utils/__init__.py +1 -0
  52. hud/clients/utils/retry_transport.py +160 -0
  53. hud/datasets.py +322 -192
  54. hud/misc/__init__.py +1 -0
  55. hud/{agent → misc}/claude_plays_pokemon.py +292 -283
  56. hud/otel/__init__.py +35 -0
  57. hud/otel/collector.py +142 -0
  58. hud/otel/config.py +164 -0
  59. hud/otel/context.py +536 -0
  60. hud/otel/exporters.py +366 -0
  61. hud/otel/instrumentation.py +97 -0
  62. hud/otel/processors.py +118 -0
  63. hud/otel/tests/__init__.py +1 -0
  64. hud/otel/tests/test_processors.py +197 -0
  65. hud/server/__init__.py +5 -5
  66. hud/server/context.py +114 -0
  67. hud/server/helper/__init__.py +5 -0
  68. hud/server/low_level.py +132 -0
  69. hud/server/server.py +166 -0
  70. hud/server/tests/__init__.py +3 -0
  71. hud/settings.py +73 -79
  72. hud/shared/__init__.py +5 -0
  73. hud/{exceptions.py → shared/exceptions.py} +180 -180
  74. hud/{server → shared}/requests.py +264 -264
  75. hud/shared/tests/test_exceptions.py +157 -0
  76. hud/{server → shared}/tests/test_requests.py +275 -275
  77. hud/telemetry/__init__.py +25 -30
  78. hud/telemetry/instrument.py +379 -0
  79. hud/telemetry/job.py +309 -141
  80. hud/telemetry/replay.py +74 -0
  81. hud/telemetry/trace.py +83 -0
  82. hud/tools/__init__.py +33 -34
  83. hud/tools/base.py +365 -65
  84. hud/tools/bash.py +161 -137
  85. hud/tools/computer/__init__.py +15 -13
  86. hud/tools/computer/anthropic.py +437 -420
  87. hud/tools/computer/hud.py +376 -334
  88. hud/tools/computer/openai.py +295 -292
  89. hud/tools/computer/settings.py +82 -0
  90. hud/tools/edit.py +314 -290
  91. hud/tools/executors/__init__.py +30 -30
  92. hud/tools/executors/base.py +539 -532
  93. hud/tools/executors/pyautogui.py +621 -619
  94. hud/tools/executors/tests/__init__.py +1 -1
  95. hud/tools/executors/tests/test_base_executor.py +338 -338
  96. hud/tools/executors/tests/test_pyautogui_executor.py +165 -165
  97. hud/tools/executors/xdo.py +511 -503
  98. hud/tools/{playwright_tool.py → playwright.py} +412 -379
  99. hud/tools/tests/__init__.py +3 -3
  100. hud/tools/tests/test_base.py +282 -0
  101. hud/tools/tests/test_bash.py +158 -152
  102. hud/tools/tests/test_bash_extended.py +197 -0
  103. hud/tools/tests/test_computer.py +425 -52
  104. hud/tools/tests/test_computer_actions.py +34 -34
  105. hud/tools/tests/test_edit.py +259 -240
  106. hud/tools/tests/test_init.py +27 -27
  107. hud/tools/tests/test_playwright_tool.py +183 -183
  108. hud/tools/tests/test_tools.py +145 -157
  109. hud/tools/tests/test_utils.py +156 -156
  110. hud/tools/types.py +72 -0
  111. hud/tools/utils.py +50 -50
  112. hud/types.py +136 -89
  113. hud/utils/__init__.py +10 -16
  114. hud/utils/async_utils.py +65 -0
  115. hud/utils/design.py +168 -0
  116. hud/utils/mcp.py +55 -0
  117. hud/utils/progress.py +149 -149
  118. hud/utils/telemetry.py +66 -66
  119. hud/utils/tests/test_async_utils.py +173 -0
  120. hud/utils/tests/test_init.py +17 -21
  121. hud/utils/tests/test_progress.py +261 -225
  122. hud/utils/tests/test_telemetry.py +82 -37
  123. hud/utils/tests/test_version.py +8 -8
  124. hud/version.py +7 -7
  125. hud_python-0.4.0.dist-info/METADATA +474 -0
  126. hud_python-0.4.0.dist-info/RECORD +132 -0
  127. hud_python-0.4.0.dist-info/entry_points.txt +3 -0
  128. {hud_python-0.3.5.dist-info → hud_python-0.4.0.dist-info}/licenses/LICENSE +21 -21
  129. hud/adapters/__init__.py +0 -8
  130. hud/adapters/claude/__init__.py +0 -5
  131. hud/adapters/claude/adapter.py +0 -180
  132. hud/adapters/claude/tests/__init__.py +0 -1
  133. hud/adapters/claude/tests/test_adapter.py +0 -519
  134. hud/adapters/common/__init__.py +0 -6
  135. hud/adapters/common/adapter.py +0 -178
  136. hud/adapters/common/tests/test_adapter.py +0 -289
  137. hud/adapters/common/types.py +0 -446
  138. hud/adapters/operator/__init__.py +0 -5
  139. hud/adapters/operator/adapter.py +0 -108
  140. hud/adapters/operator/tests/__init__.py +0 -1
  141. hud/adapters/operator/tests/test_adapter.py +0 -370
  142. hud/agent/__init__.py +0 -19
  143. hud/agent/base.py +0 -126
  144. hud/agent/claude.py +0 -271
  145. hud/agent/langchain.py +0 -215
  146. hud/agent/misc/__init__.py +0 -3
  147. hud/agent/operator.py +0 -268
  148. hud/agent/tests/__init__.py +0 -1
  149. hud/agent/tests/test_base.py +0 -202
  150. hud/env/__init__.py +0 -11
  151. hud/env/client.py +0 -35
  152. hud/env/docker_client.py +0 -349
  153. hud/env/environment.py +0 -446
  154. hud/env/local_docker_client.py +0 -358
  155. hud/env/remote_client.py +0 -212
  156. hud/env/remote_docker_client.py +0 -292
  157. hud/gym.py +0 -130
  158. hud/job.py +0 -773
  159. hud/mcp/__init__.py +0 -17
  160. hud/mcp/base.py +0 -631
  161. hud/mcp/client.py +0 -312
  162. hud/mcp/tests/test_base.py +0 -512
  163. hud/mcp/tests/test_claude.py +0 -294
  164. hud/task.py +0 -149
  165. hud/taskset.py +0 -237
  166. hud/telemetry/_trace.py +0 -347
  167. hud/telemetry/context.py +0 -230
  168. hud/telemetry/exporter.py +0 -575
  169. hud/telemetry/instrumentation/__init__.py +0 -3
  170. hud/telemetry/instrumentation/mcp.py +0 -259
  171. hud/telemetry/instrumentation/registry.py +0 -59
  172. hud/telemetry/mcp_models.py +0 -270
  173. hud/telemetry/tests/__init__.py +0 -1
  174. hud/telemetry/tests/test_context.py +0 -210
  175. hud/telemetry/tests/test_trace.py +0 -312
  176. hud/tools/helper/README.md +0 -56
  177. hud/tools/helper/__init__.py +0 -9
  178. hud/tools/helper/mcp_server.py +0 -78
  179. hud/tools/helper/server_initialization.py +0 -115
  180. hud/tools/helper/utils.py +0 -58
  181. hud/trajectory.py +0 -94
  182. hud/utils/agent.py +0 -37
  183. hud/utils/common.py +0 -256
  184. hud/utils/config.py +0 -120
  185. hud/utils/deprecation.py +0 -115
  186. hud/utils/misc.py +0 -53
  187. hud/utils/tests/test_common.py +0 -277
  188. hud/utils/tests/test_config.py +0 -129
  189. hud_python-0.3.5.dist-info/METADATA +0 -284
  190. hud_python-0.3.5.dist-info/RECORD +0 -120
  191. /hud/{adapters/common → shared}/tests/__init__.py +0 -0
  192. {hud_python-0.3.5.dist-info → hud_python-0.4.0.dist-info}/WHEEL +0 -0
hud/telemetry/exporter.py DELETED
@@ -1,575 +0,0 @@
1
- from __future__ import annotations
2
-
3
- import asyncio
4
- import concurrent.futures # For run_coroutine_threadsafe return type
5
- import enum
6
- import json
7
- import logging
8
- import threading
9
- import time
10
- from datetime import UTC, datetime # For ISO timestamp conversion
11
- from typing import TYPE_CHECKING, Any
12
-
13
- if TYPE_CHECKING:
14
- from collections.abc import Coroutine
15
-
16
- import httpx
17
- from pydantic import BaseModel
18
-
19
- from hud.settings import settings
20
-
21
- # Import BaseMCPCall and TrajectoryStep for type hinting and transformation
22
- from hud.telemetry.mcp_models import ( # MCPResponseCall for isinstance check
23
- BaseMCPCall,
24
- MCPResponseCall,
25
- TrajectoryStep,
26
- )
27
-
28
- logger = logging.getLogger("hud.telemetry")
29
-
30
-
31
- # --- Task Run Status Models ---
32
- class TaskRunStatus(enum.StrEnum):
33
- INITIALIZING = "initializing"
34
- RUNNING = "running"
35
- EVALUATING = "evaluating"
36
- COMPLETED = "completed"
37
- ERROR = "error"
38
-
39
-
40
- class TaskRunStatusUpdateRequest(BaseModel):
41
- """Request model for updating task run status."""
42
-
43
- status: TaskRunStatus
44
- error_message: str | None = None # Optional error message if status is ERROR
45
- metadata: dict[str, Any] | None = None # Optional metadata for context
46
- job_id: str | None = None # Optional parent job ID
47
-
48
-
49
- # --- Job Status Models ---
50
- class JobStatus(enum.StrEnum):
51
- RUNNING = "running"
52
- COMPLETED = "completed"
53
- ERROR = "error"
54
-
55
-
56
- class JobStatusUpdateRequest(BaseModel):
57
- """Request model for updating job status."""
58
-
59
- status: JobStatus
60
- error_message: str | None = None # Optional error message if status is ERROR
61
- metadata: dict[str, Any] | None = None # Optional metadata for context
62
- taskset_name: str | None = None # Optional dataset/taskset name
63
-
64
-
65
- # --- Worker Thread and Event Loop Management ---
66
- _worker_thread: threading.Thread | None = None
67
- _worker_loop: asyncio.AbstractEventLoop | None = None
68
- _worker_lock = threading.Lock() # For protecting worker thread/loop startup
69
- _worker_loop_ready_event = threading.Event() # Event for sync between threads
70
-
71
- # --- Async Queue and Task (managed by the worker loop) ---
72
- _SENTINEL_FOR_WORKER_SHUTDOWN = object() # Sentinel for queue-based shutdown signaling
73
- _export_queue_async: list[dict[str, Any] | object] = [] # Queue can hold dicts or sentinel
74
- _export_lock_async = asyncio.Lock() # Async lock for the async queue
75
- _export_task_async: asyncio.Task | None = None # Async task for processing the queue
76
-
77
- # --- Constants ---
78
- EXPORT_INTERVAL = 5.0 # seconds - delay between non-incremental exports
79
- MIN_EXPORT_INTERVAL = 0.1 # seconds - minimum delay between any exports to avoid overwhelming
80
- # MAX_BATCH_SIZE removed as we send one trace payload at a time
81
-
82
-
83
- def _run_worker_loop() -> None:
84
- """Target function for the worker thread. Runs its own asyncio event loop."""
85
- global _worker_loop
86
- logger.debug("Telemetry worker thread: Starting event loop.")
87
- _worker_loop = asyncio.new_event_loop()
88
- asyncio.set_event_loop(_worker_loop)
89
-
90
- _worker_loop_ready_event.set() # Signal that loop is created and set for this thread
91
-
92
- try:
93
- logger.debug("Telemetry worker thread: Event loop running.")
94
- _worker_loop.run_forever()
95
- except Exception as e:
96
- logger.exception("Telemetry worker loop encountered an unhandled exception: %s", e)
97
- finally:
98
- logger.debug("Telemetry worker loop: Starting cleanup...")
99
- if _export_task_async and not _export_task_async.done():
100
- logger.debug("Telemetry worker loop: Cancelling active export processing task.")
101
- _export_task_async.cancel()
102
- try:
103
- # Wait for the task to acknowledge cancellation
104
- _worker_loop.run_until_complete(
105
- asyncio.gather(_export_task_async, return_exceptions=True)
106
- )
107
- except asyncio.CancelledError:
108
- logger.debug(
109
- "Telemetry worker loop: Export processing task acknowledged cancellation."
110
- )
111
- except Exception as e_gather:
112
- logger.debug(
113
- "Telemetry worker loop: Exception during export task cleanup: %s", e_gather
114
- )
115
-
116
- logger.debug("Telemetry worker loop: Closing.")
117
- _worker_loop.close()
118
- logger.debug("Telemetry worker thread: Event loop closed.")
119
- # _worker_loop_ready_event.clear() # Should be cleared by starter if thread is to be reused
120
-
121
-
122
- def _start_worker_if_needed() -> None:
123
- """Starts the background worker thread if not already running. Assumes _worker_lock is held."""
124
- global _worker_thread # _worker_loop is set by the thread itself
125
- if _worker_thread is None or not _worker_thread.is_alive():
126
- logger.debug("Telemetry: Worker thread not alive, starting new one.")
127
- # _worker_loop should be None here or will be replaced by the new thread
128
- _worker_loop_ready_event.clear()
129
- _worker_thread = threading.Thread(
130
- target=_run_worker_loop, daemon=True, name="HUDTelemetryWorker"
131
- )
132
- _worker_thread.start()
133
-
134
- logger.debug("Telemetry: Waiting for worker thread event loop to be ready...")
135
- if not _worker_loop_ready_event.wait(timeout=5.0): # Wait up to 5 seconds
136
- logger.error(
137
- "Telemetry: Worker thread failed to signal event loop readiness within timeout."
138
- )
139
- # This is a problem, subsequent submissions might fail.
140
- return
141
-
142
- # Minor delay to ensure loop might have started run_forever if wait was too tight
143
- time.sleep(0.05)
144
- if _worker_loop is None or not _worker_loop.is_running():
145
- logger.error("Telemetry: Worker loop is not ready or not running after event was set.")
146
- else:
147
- logger.debug("Telemetry: Worker thread event loop is ready.")
148
-
149
-
150
- def submit_to_worker_loop(coro: Coroutine[Any, Any, Any]) -> concurrent.futures.Future[Any] | None:
151
- """Submits a coroutine to be run on the worker thread's event loop."""
152
- with _worker_lock: # Protects check-and-start of worker thread/loop
153
- _start_worker_if_needed()
154
-
155
- # Check _worker_loop status AFTER attempting to start and waiting for readiness event
156
- if _worker_loop is None or not _worker_loop.is_running():
157
- logger.error(
158
- "Telemetry: Worker loop not available or not running for submitting coroutine."
159
- )
160
- return None
161
-
162
- try:
163
- future = asyncio.run_coroutine_threadsafe(coro, _worker_loop)
164
- return future
165
- except Exception as e:
166
- # This can happen if the loop is shut down right as we try to submit
167
- logger.exception("Telemetry: Failed to submit coroutine to worker loop: %s", e)
168
- return None
169
-
170
-
171
- # --- Telemetry Export Logic (runs on worker thread's loop) ---
172
-
173
-
174
- async def export_telemetry(
175
- task_run_id: str,
176
- trace_attributes: dict[str, Any],
177
- mcp_calls: list[BaseMCPCall], # Type hint is now list[BaseMCPCall]
178
- ) -> None:
179
- """
180
- Export telemetry data to the HUD telemetry service.
181
-
182
- Args:
183
- task_run_id: The task run ID associated with this telemetry
184
- trace_attributes: Attributes of the trace
185
- mcp_calls: List of MCP call Pydantic models to export
186
- """
187
- trajectory_steps_data: list[dict[str, Any]] = []
188
- for mcp_call_model in mcp_calls:
189
- action_data = mcp_call_model.model_dump()
190
-
191
- start_ts_iso = None
192
- end_ts_iso = None
193
-
194
- # Get start_time if available (e.g. on MCPRequestCall, MCPNotificationCall)
195
- actual_start_time_float = getattr(mcp_call_model, "start_time", None)
196
- if actual_start_time_float:
197
- start_ts_iso = (
198
- datetime.fromtimestamp(actual_start_time_float, UTC)
199
- .isoformat()
200
- .replace("+00:00", "Z")
201
- )
202
-
203
- # Use 'end_time' if available, otherwise fall back to 'timestamp' for the end_timestamp
204
- actual_end_time_float = getattr(mcp_call_model, "end_time", None)
205
- effective_end_timestamp_float = (
206
- actual_end_time_float if actual_end_time_float else mcp_call_model.timestamp
207
- )
208
-
209
- if effective_end_timestamp_float:
210
- end_ts_iso = (
211
- datetime.fromtimestamp(effective_end_timestamp_float, UTC)
212
- .isoformat()
213
- .replace("+00:00", "Z")
214
- )
215
-
216
- # For events that are more like points in time (e.g., a received response that
217
- # doesn't have a separate start_time field) set start_timestamp to be the same as
218
- # end_timestamp if start_timestamp wasn't explicitly set.
219
- if end_ts_iso and not start_ts_iso:
220
- start_ts_iso = end_ts_iso
221
-
222
- step_metadata: dict[str, Any] = {
223
- "mcp_method": mcp_call_model.method,
224
- "mcp_status": mcp_call_model.status.value,
225
- "mcp_call_type_original": mcp_call_model.call_type,
226
- }
227
- if mcp_call_model.direction:
228
- step_metadata["mcp_direction"] = mcp_call_model.direction.value
229
- if mcp_call_model.message_id is not None:
230
- step_metadata["mcp_message_id"] = str(mcp_call_model.message_id) # Ensure string
231
-
232
- # Specific handling for MCPResponseCall fields in metadata
233
- if isinstance(mcp_call_model, MCPResponseCall):
234
- step_metadata["mcp_is_error"] = mcp_call_model.is_error # bool is fine for JSON Any
235
- if mcp_call_model.is_error:
236
- if mcp_call_model.error is not None:
237
- step_metadata["mcp_error_details"] = str(mcp_call_model.error) # Ensure string
238
- if mcp_call_model.error_type is not None:
239
- step_metadata["mcp_error_type"] = str(
240
- mcp_call_model.error_type
241
- ) # Ensure string
242
-
243
- obs_text = None
244
- if isinstance(mcp_call_model, MCPResponseCall) and mcp_call_model.response_data:
245
- result_data = mcp_call_model.response_data.get("result")
246
- if result_data is not None:
247
- try:
248
- obs_text = json.dumps(result_data)
249
- except (TypeError, OverflowError):
250
- obs_text = str(result_data)
251
-
252
- trajectory_step = TrajectoryStep(
253
- type="mcp-step",
254
- actions=[action_data],
255
- start_timestamp=start_ts_iso,
256
- end_timestamp=end_ts_iso,
257
- metadata=step_metadata,
258
- observation_text=obs_text,
259
- )
260
- trajectory_steps_data.append(trajectory_step.model_dump())
261
-
262
- payload_to_queue = {
263
- "task_run_id": task_run_id,
264
- "attributes": trace_attributes,
265
- "mcp_calls": trajectory_steps_data,
266
- "timestamp": time.time(),
267
- }
268
-
269
- await _queue_for_export_async(payload_to_queue)
270
-
271
-
272
- async def _queue_for_export_async(payload: dict[str, Any] | object) -> None:
273
- """Adds a payload or sentinel to the async export queue. Runs on worker loop."""
274
- global _export_task_async, _worker_loop
275
- if not _worker_loop or not _worker_loop.is_running():
276
- logger.error("Cannot queue telemetry, worker loop not running or not set.")
277
- return
278
-
279
- async with _export_lock_async:
280
- _export_queue_async.append(payload)
281
- if _export_task_async is None or _export_task_async.done():
282
- _export_task_async = _worker_loop.create_task(_process_export_queue_async())
283
- logger.debug("Started/Restarted async telemetry export processing task on worker loop.")
284
-
285
-
286
- async def _process_export_queue_async() -> None:
287
- """Processes the async export queue. Runs on worker loop via _export_task_async."""
288
- global _export_task_async
289
- try:
290
- while True:
291
- payload_to_process: dict[str, Any] | object | None = None
292
- async with _export_lock_async:
293
- if not _export_queue_async:
294
- logger.debug("Async export queue empty, processing task will pause.")
295
- _export_task_async = None
296
- return
297
- payload_to_process = _export_queue_async.pop(0)
298
-
299
- if payload_to_process is _SENTINEL_FOR_WORKER_SHUTDOWN:
300
- logger.debug("Shutdown sentinel received by processing task, stopping.")
301
- _export_task_async = None
302
- return
303
-
304
- if isinstance(payload_to_process, dict): # Ensure it's a dict before processing as such
305
- await _export_trace_payload_async(payload_to_process)
306
-
307
- # Apply appropriate delay based on export type
308
- is_incremental = payload_to_process.get("attributes", {}).get("incremental", False)
309
- if is_incremental:
310
- # Small delay for incremental exports to avoid overwhelming the server
311
- await asyncio.sleep(MIN_EXPORT_INTERVAL)
312
- else:
313
- # Longer delay for final exports
314
- await asyncio.sleep(EXPORT_INTERVAL)
315
- else:
316
- # Should not happen if only dicts and sentinel are queued
317
- logger.warning("Unexpected item in telemetry queue: %s", type(payload_to_process))
318
-
319
- except asyncio.CancelledError:
320
- logger.debug("Async telemetry export processing task cancelled.")
321
- _export_task_async = None
322
- raise
323
- except Exception as e:
324
- logger.exception("Error in async telemetry export processing task: %s", e)
325
- _export_task_async = None
326
-
327
-
328
- async def _export_trace_payload_async(payload: dict[str, Any]) -> None:
329
- """Export a single trace payload to the HUD telemetry service."""
330
- if not settings.telemetry_enabled:
331
- logger.debug("Telemetry export skipped - telemetry not enabled")
332
- return
333
-
334
- task_run_id = payload.get("task_run_id")
335
- if not task_run_id:
336
- logger.warning("Payload missing task_run_id, skipping export")
337
- return
338
-
339
- # The payload itself is what we want to send (containing attributes and mcp_calls list)
340
- # The mcp_calls within the payload are already dumped dictionaries.
341
- data_to_send = {
342
- "metadata": payload.get("attributes", {}),
343
- "telemetry": payload.get("mcp_calls", []),
344
- }
345
-
346
- await send_telemetry_to_server(task_run_id, data_to_send)
347
-
348
-
349
- async def send_telemetry_to_server(task_run_id: str, data: dict[str, Any]) -> None:
350
- telemetry_url = f"{settings.base_url}/v2/task_runs/{task_run_id}/telemetry-upload"
351
-
352
- try:
353
- async with httpx.AsyncClient() as client:
354
- headers = {
355
- "Content-Type": "application/json",
356
- "Authorization": f"Bearer {settings.api_key}",
357
- }
358
-
359
- logger.debug(
360
- "Exporting telemetry for task run %s to %s",
361
- task_run_id,
362
- telemetry_url,
363
- )
364
- response = await client.post(
365
- telemetry_url,
366
- json=data, # Send the structured attributes and mcp_calls
367
- headers=headers,
368
- timeout=30.0,
369
- )
370
-
371
- if response.status_code >= 200 and response.status_code < 300:
372
- logger.debug(
373
- "Successfully exported telemetry for task run %s. Status: %s",
374
- task_run_id,
375
- response.status_code,
376
- )
377
- else:
378
- logger.warning(
379
- "Failed to export telemetry for task run %s: HTTP %s - %s",
380
- task_run_id,
381
- response.status_code,
382
- response.text,
383
- )
384
- except Exception as e:
385
- logger.exception("Error exporting telemetry for task run %s: %s", task_run_id, e)
386
-
387
-
388
- async def update_task_run_status(
389
- task_run_id: str,
390
- status: TaskRunStatus,
391
- error_message: str | None = None,
392
- metadata: dict[str, Any] | None = None,
393
- job_id: str | None = None,
394
- ) -> None:
395
- """Update the status of a task run."""
396
- if not settings.telemetry_enabled:
397
- logger.debug("Status update skipped - telemetry not enabled")
398
- return
399
-
400
- status_url = f"{settings.base_url}/v2/task_runs/{task_run_id}/status"
401
-
402
- try:
403
- async with httpx.AsyncClient() as client:
404
- headers = {
405
- "Content-Type": "application/json",
406
- "Authorization": f"Bearer {settings.api_key}",
407
- }
408
-
409
- request_data = TaskRunStatusUpdateRequest(
410
- status=status, error_message=error_message, metadata=metadata, job_id=job_id
411
- )
412
-
413
- logger.debug(
414
- "Updating status for task run %s to %s",
415
- task_run_id,
416
- status,
417
- )
418
-
419
- response = await client.post(
420
- status_url,
421
- json=request_data.model_dump(exclude_none=True),
422
- headers=headers,
423
- timeout=10.0,
424
- )
425
-
426
- if response.status_code >= 200 and response.status_code < 300:
427
- logger.debug(
428
- "Successfully updated status for task run %s to %s",
429
- task_run_id,
430
- status,
431
- )
432
- else:
433
- logger.warning(
434
- "Failed to update status for task run %s: HTTP %s - %s",
435
- task_run_id,
436
- response.status_code,
437
- response.text,
438
- )
439
- except Exception as e:
440
- logger.exception("Error updating status for task run %s: %s", task_run_id, e)
441
-
442
-
443
- async def update_job_status(
444
- job_id: str,
445
- status: JobStatus,
446
- error_message: str | None = None,
447
- metadata: dict[str, Any] | None = None,
448
- taskset_name: str | None = None,
449
- ) -> None:
450
- """Update the status of a job."""
451
- if not settings.telemetry_enabled:
452
- logger.debug("Job status update skipped - telemetry not enabled")
453
- return
454
-
455
- status_url = f"{settings.base_url}/v2/jobs/{job_id}/status"
456
-
457
- try:
458
- async with httpx.AsyncClient() as client:
459
- headers = {
460
- "Content-Type": "application/json",
461
- "Authorization": f"Bearer {settings.api_key}",
462
- }
463
-
464
- request_data = JobStatusUpdateRequest(
465
- status=status,
466
- error_message=error_message,
467
- metadata=metadata,
468
- taskset_name=taskset_name,
469
- )
470
-
471
- logger.debug(
472
- "Updating status for job %s to %s",
473
- job_id,
474
- status,
475
- )
476
-
477
- response = await client.post(
478
- status_url,
479
- json=request_data.model_dump(exclude_none=True),
480
- headers=headers,
481
- timeout=10.0,
482
- )
483
-
484
- if response.status_code >= 200 and response.status_code < 300:
485
- logger.debug(
486
- "Successfully updated status for job %s to %s",
487
- job_id,
488
- status,
489
- )
490
- else:
491
- logger.warning(
492
- "Failed to update status for job %s: HTTP %s - %s",
493
- job_id,
494
- response.status_code,
495
- response.text,
496
- )
497
- except Exception as e:
498
- logger.exception("Error updating status for job %s: %s", job_id, e)
499
-
500
-
501
- # --- Public Shutdown Function ---
502
- def flush(timeout: float = 10.0) -> None:
503
- """Flushes pending telemetry data and stops the worker thread."""
504
- global _worker_thread, _worker_loop, _export_task_async, _export_queue_async
505
- logger.debug("Initiating telemetry flush and shutdown.")
506
-
507
- shutdown_future: concurrent.futures.Future | None = None
508
- if _worker_loop and _worker_loop.is_running():
509
- logger.debug("Submitting shutdown sentinel to telemetry worker's queue.")
510
- coro = _queue_for_export_async(_SENTINEL_FOR_WORKER_SHUTDOWN)
511
- try:
512
- shutdown_future = asyncio.run_coroutine_threadsafe(coro, _worker_loop)
513
- except Exception as e: # Catch errors during submission (e.g. if loop is shutting down)
514
- logger.warning("Exception during submission of shutdown sentinel: %s", e, exc_info=True)
515
- # Proceed to attempt thread join if possible
516
-
517
- if shutdown_future:
518
- try:
519
- shutdown_future.result(timeout / 2 if timeout else None)
520
- logger.debug("Shutdown sentinel successfully queued.")
521
- except concurrent.futures.TimeoutError:
522
- logger.warning("Timeout waiting for shutdown sentinel to be queued.")
523
- except Exception as e:
524
- logger.warning(
525
- "Error waiting for shutdown sentinel to be queued: %s", e, exc_info=True
526
- )
527
-
528
- # Wait for the current _export_task_async to see the sentinel and finish.
529
- # This is tricky because the task lives on another thread's loop.
530
- # The best way is for _process_export_queue_async to clear _export_task_async when it exits.
531
- # We then wait a bit for that to happen.
532
- if _export_task_async is not None: # Check if a task was even known to be running
533
- # This check is racy, but it's the best we can do without more complex inter-thread
534
- # sync for task completion. Give some time for the task to process the sentinel and
535
- # clear itself.
536
- # Max wait for task to clear - should be longer than EXPORT_INTERVAL to ensure
537
- # the task has time to wake from sleep and process the sentinel
538
- attempt_timeout = time.time() + (timeout / 2 if timeout else 2.0) + EXPORT_INTERVAL + 1.0
539
- while _export_task_async is not None and time.time() < attempt_timeout:
540
- time.sleep(0.1)
541
- # _export_task_async is set to None by _process_export_queue_async upon its exit.
542
- if _export_task_async is not None:
543
- # This is often a false positive due to race conditions during shutdown
544
- logger.debug(
545
- "Telemetry processing task did not clear itself after sentinel. "
546
- "This is normal during shutdown."
547
- )
548
- else:
549
- logger.debug("Telemetry processing task appears to have completed after sentinel.")
550
-
551
- if _worker_loop and _worker_loop.is_running():
552
- logger.debug("Requesting telemetry worker event loop to stop.")
553
- # Ask the loop to stop running run_forever
554
- _worker_loop.call_soon_threadsafe(_worker_loop.stop)
555
-
556
- if _worker_thread and _worker_thread.is_alive():
557
- logger.debug(
558
- "Joining telemetry worker thread (up to remaining timeout)...",
559
- )
560
- # Calculate remaining timeout for join
561
- remaining_timeout = timeout - (timeout / 2) if timeout else None # Simplistic split
562
- if remaining_timeout is not None and remaining_timeout < 0:
563
- remaining_timeout = 0
564
-
565
- _worker_thread.join(remaining_timeout)
566
- if _worker_thread.is_alive():
567
- logger.warning("Telemetry worker thread did not shut down cleanly after timeout.")
568
- else:
569
- logger.debug("Telemetry worker thread successfully joined.")
570
-
571
- _worker_thread = None
572
- _worker_loop = None
573
- _export_task_async = None
574
- # _export_queue_async.clear() # Optionally clear the queue
575
- logger.debug("Telemetry flush and shutdown process completed.")
@@ -1,3 +0,0 @@
1
- """MCP instrumentation for telemetry collection."""
2
-
3
- from __future__ import annotations