hud-python 0.3.0__py3-none-any.whl → 0.3.1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of hud-python might be problematic. Click here for more details.

Files changed (53) hide show
  1. hud/__init__.py +7 -4
  2. hud/adapters/common/adapter.py +14 -3
  3. hud/adapters/common/tests/test_adapter.py +16 -4
  4. hud/datasets.py +188 -0
  5. hud/env/docker_client.py +14 -2
  6. hud/env/local_docker_client.py +28 -6
  7. hud/gym.py +0 -9
  8. hud/{mcp_agent → mcp}/__init__.py +2 -0
  9. hud/mcp/base.py +631 -0
  10. hud/{mcp_agent → mcp}/claude.py +52 -47
  11. hud/mcp/client.py +312 -0
  12. hud/{mcp_agent → mcp}/langchain.py +52 -33
  13. hud/{mcp_agent → mcp}/openai.py +56 -40
  14. hud/{mcp_agent → mcp}/tests/test_base.py +129 -54
  15. hud/mcp/tests/test_claude.py +294 -0
  16. hud/mcp/tests/test_client.py +324 -0
  17. hud/mcp/tests/test_openai.py +238 -0
  18. hud/settings.py +6 -0
  19. hud/task.py +1 -88
  20. hud/taskset.py +2 -23
  21. hud/telemetry/__init__.py +5 -0
  22. hud/telemetry/_trace.py +180 -17
  23. hud/telemetry/context.py +79 -0
  24. hud/telemetry/exporter.py +165 -6
  25. hud/telemetry/job.py +141 -0
  26. hud/telemetry/tests/test_trace.py +36 -25
  27. hud/tools/__init__.py +14 -1
  28. hud/tools/executors/__init__.py +19 -2
  29. hud/tools/executors/pyautogui.py +84 -50
  30. hud/tools/executors/tests/test_pyautogui_executor.py +4 -1
  31. hud/tools/playwright_tool.py +73 -67
  32. hud/tools/tests/test_edit.py +8 -1
  33. hud/tools/tests/test_tools.py +3 -0
  34. hud/trajectory.py +5 -1
  35. hud/utils/tests/test_version.py +1 -1
  36. hud/version.py +1 -1
  37. {hud_python-0.3.0.dist-info → hud_python-0.3.1.dist-info}/METADATA +20 -14
  38. {hud_python-0.3.0.dist-info → hud_python-0.3.1.dist-info}/RECORD +41 -46
  39. hud/evaluators/__init__.py +0 -9
  40. hud/evaluators/base.py +0 -32
  41. hud/evaluators/inspect.py +0 -24
  42. hud/evaluators/judge.py +0 -189
  43. hud/evaluators/match.py +0 -156
  44. hud/evaluators/remote.py +0 -65
  45. hud/evaluators/tests/__init__.py +0 -0
  46. hud/evaluators/tests/test_inspect.py +0 -12
  47. hud/evaluators/tests/test_judge.py +0 -231
  48. hud/evaluators/tests/test_match.py +0 -115
  49. hud/evaluators/tests/test_remote.py +0 -98
  50. hud/mcp_agent/base.py +0 -723
  51. /hud/{mcp_agent → mcp}/tests/__init__.py +0 -0
  52. {hud_python-0.3.0.dist-info → hud_python-0.3.1.dist-info}/WHEEL +0 -0
  53. {hud_python-0.3.0.dist-info → hud_python-0.3.1.dist-info}/licenses/LICENSE +0 -0
hud/telemetry/_trace.py CHANGED
@@ -1,5 +1,6 @@
1
1
  from __future__ import annotations
2
2
 
3
+ # ruff: noqa: T201
3
4
  import asyncio
4
5
  import logging
5
6
  import time
@@ -13,14 +14,12 @@ from typing import (
13
14
  TypeVar,
14
15
  )
15
16
 
16
- from hud.telemetry import exporter
17
17
  from hud.telemetry.context import (
18
18
  flush_buffer,
19
19
  get_current_task_run_id,
20
20
  is_root_trace,
21
21
  set_current_task_run_id,
22
22
  )
23
- from hud.telemetry.exporter import submit_to_worker_loop
24
23
  from hud.telemetry.instrumentation.registry import registry
25
24
 
26
25
  if TYPE_CHECKING:
@@ -54,9 +53,104 @@ def _ensure_telemetry_initialized() -> None:
54
53
  init_telemetry()
55
54
 
56
55
 
56
+ def _detect_agent_model() -> str | None:
57
+ """
58
+ Try to auto-detect agent model from parent frames.
59
+ This is a best-effort approach and may not work in all cases.
60
+ """
61
+ import sys
62
+
63
+ try:
64
+ # Try different frame depths (2-3 typically covers most cases)
65
+ for depth in range(2, 3):
66
+ try:
67
+ frame = sys._getframe(depth)
68
+ # Check local variables for agent objects
69
+ for var_value in frame.f_locals.values():
70
+ # Look for objects with model_name attribute
71
+ if hasattr(var_value, "model_name") and hasattr(var_value, "run"):
72
+ # Likely an agent object
73
+ model_name = getattr(var_value, "model_name", None)
74
+ if model_name:
75
+ logger.debug(
76
+ "Found agent with model_name in frame %d: %s", depth, model_name
77
+ )
78
+ return str(model_name)
79
+
80
+ # Also check self in case we're in a method
81
+ if "self" in frame.f_locals:
82
+ self_obj = frame.f_locals["self"]
83
+ if hasattr(self_obj, "model_name"):
84
+ model_name = getattr(self_obj, "model_name", None)
85
+ if model_name:
86
+ logger.debug(
87
+ "Found agent model_name in self at frame %d: %s", depth, model_name
88
+ )
89
+ return str(model_name)
90
+
91
+ except (ValueError, AttributeError):
92
+ # Frame doesn't exist at this depth or other issues
93
+ continue
94
+
95
+ except Exception as e:
96
+ logger.debug("Agent model detection failed: %s", e)
97
+
98
+ return None
99
+
100
+
101
+ def _print_trace_url(task_run_id: str) -> None:
102
+ """Print the trace URL in a colorful box."""
103
+ url = f"https://app.hud.so/trace/{task_run_id}"
104
+ header = "🚀 See your agent live at:"
105
+
106
+ # ANSI color codes
107
+ DIM = "\033[90m" # Dim/Gray for border (visible on both light and dark terminals)
108
+ GOLD = "\033[33m" # Gold/Yellow for URL
109
+ RESET = "\033[0m"
110
+ BOLD = "\033[1m"
111
+
112
+ # Calculate box width based on the longest line
113
+ box_width = max(len(url), len(header)) + 6
114
+
115
+ # Box drawing characters
116
+ top_border = "╔" + "═" * (box_width - 2) + "╗"
117
+ bottom_border = "╚" + "═" * (box_width - 2) + "╝"
118
+ divider = "╟" + "─" * (box_width - 2) + "╢"
119
+
120
+ # Center the content
121
+ header_padding = (box_width - len(header) - 2) // 2
122
+ url_padding = (box_width - len(url) - 2) // 2
123
+
124
+ # Print the box
125
+ print(f"\n{DIM}{top_border}{RESET}")
126
+ print(
127
+ f"{DIM}║{RESET}{' ' * header_padding}{header}{' ' * (box_width - len(header) - header_padding - 3)}{DIM}║{RESET}" # noqa: E501
128
+ )
129
+ print(f"{DIM}{divider}{RESET}")
130
+ print(
131
+ f"{DIM}║{RESET}{' ' * url_padding}{BOLD}{GOLD}{url}{RESET}{' ' * (box_width - len(url) - url_padding - 2)}{DIM}║{RESET}" # noqa: E501
132
+ )
133
+ print(f"{DIM}{bottom_border}{RESET}\n")
134
+
135
+
136
+ def _print_trace_complete_url(task_run_id: str) -> None:
137
+ """Print the trace completion URL in a simple colorful format."""
138
+ url = f"https://app.hud.so/trace/{task_run_id}"
139
+
140
+ # ANSI color codes
141
+ GREEN = "\033[92m"
142
+ GOLD = "\033[33m"
143
+ RESET = "\033[0m"
144
+ DIM = "\033[2m"
145
+ BOLD = "\033[1m"
146
+
147
+ print(f"\n{GREEN}✓ Trace complete!{RESET} {DIM}View at:{RESET} {BOLD}{GOLD}{url}{RESET}\n")
148
+
149
+
57
150
  @contextmanager
58
151
  def trace_open(
59
152
  name: str | None = None,
153
+ agent_model: str | None = None,
60
154
  run_id: str | None = None,
61
155
  attributes: dict[str, Any] | None = None,
62
156
  ) -> Generator[str, None, None]:
@@ -75,12 +169,16 @@ def trace_open(
75
169
 
76
170
  task_run_id = run_id or str(uuid.uuid4())
77
171
 
78
- logger.info("See your agent live at https://app.hud.so/trace/%s", task_run_id)
172
+ _print_trace_url(task_run_id)
79
173
 
80
174
  local_attributes = attributes.copy() if attributes is not None else {}
81
175
  if name is not None:
82
176
  local_attributes["trace_name"] = name
83
177
 
178
+ # Auto-detect agent if not explicitly provided
179
+ if agent_model is None:
180
+ agent_model = _detect_agent_model()
181
+
84
182
  start_time = time.time()
85
183
  logger.debug("Starting trace %s (Name: %s)", task_run_id, name if name else "Unnamed")
86
184
 
@@ -91,8 +189,39 @@ def trace_open(
91
189
  is_root = previous_task_id is None
92
190
  is_root_trace.set(is_root)
93
191
 
192
+ # Update status to initializing for root traces
193
+ if is_root:
194
+ from hud.telemetry.exporter import (
195
+ TaskRunStatus,
196
+ submit_to_worker_loop,
197
+ update_task_run_status,
198
+ )
199
+ from hud.telemetry.job import get_current_job_id
200
+
201
+ # Include metadata in the initial status update
202
+ initial_metadata = local_attributes.copy()
203
+ initial_metadata["is_root_trace"] = is_root
204
+ if agent_model:
205
+ initial_metadata["agent_model"] = agent_model
206
+
207
+ # Get job_id if we're in a job context
208
+ job_id = get_current_job_id()
209
+
210
+ coro = update_task_run_status(
211
+ task_run_id, TaskRunStatus.INITIALIZING, metadata=initial_metadata, job_id=job_id
212
+ )
213
+ submit_to_worker_loop(coro)
214
+ logger.debug("Updated task run %s status to INITIALIZING with metadata", task_run_id)
215
+
216
+ error_occurred = False
217
+ error_message = None
218
+
94
219
  try:
95
220
  yield task_run_id
221
+ except Exception as e:
222
+ error_occurred = True
223
+ error_message = str(e)
224
+ raise
96
225
  finally:
97
226
  end_time = time.time()
98
227
  duration = end_time - start_time
@@ -101,32 +230,65 @@ def trace_open(
101
230
 
102
231
  logger.debug("Finishing trace %s after %.2f seconds", task_run_id, duration)
103
232
 
233
+ # Update status for root traces
234
+ if is_root:
235
+ from hud.telemetry.exporter import (
236
+ TaskRunStatus,
237
+ submit_to_worker_loop,
238
+ update_task_run_status,
239
+ )
240
+
241
+ # Include final metadata with duration
242
+ final_metadata = local_attributes.copy()
243
+
244
+ if error_occurred:
245
+ coro = update_task_run_status(
246
+ task_run_id, TaskRunStatus.ERROR, error_message, metadata=final_metadata
247
+ )
248
+ logger.debug("Updated task run %s status to ERROR: %s", task_run_id, error_message)
249
+ else:
250
+ coro = update_task_run_status(
251
+ task_run_id, TaskRunStatus.COMPLETED, metadata=final_metadata
252
+ )
253
+ logger.debug("Updated task run %s status to COMPLETED with metadata", task_run_id)
254
+
255
+ # Wait for the status update to complete
256
+ future = submit_to_worker_loop(coro)
257
+ if future:
258
+ try:
259
+ # Wait up to 5 seconds for the status update
260
+ import concurrent.futures
261
+
262
+ future.result(timeout=5.0)
263
+ logger.debug("Status update completed successfully")
264
+ except concurrent.futures.TimeoutError:
265
+ logger.warning("Timeout waiting for status update to complete")
266
+ except Exception as e:
267
+ logger.error("Error waiting for status update: %s", e)
268
+
269
+ # Export any remaining records before flushing
270
+ if is_root:
271
+ from hud.telemetry.context import export_incremental
272
+
273
+ export_incremental()
274
+
104
275
  # Always flush the buffer for the current task
105
276
  mcp_calls = flush_buffer(export=True)
106
277
  logger.debug("Flushed %d MCP calls for trace %s", len(mcp_calls), task_run_id)
107
278
 
108
- # Submit the telemetry payload to the worker queue
109
- if is_root and mcp_calls:
110
- coro = exporter.export_telemetry(
111
- task_run_id=task_run_id,
112
- trace_attributes=local_attributes,
113
- mcp_calls=mcp_calls,
114
- )
115
- submit_to_worker_loop(coro)
116
-
117
279
  # Restore previous context
118
280
  set_current_task_run_id(previous_task_id)
119
281
  is_root_trace.set(was_root)
120
282
 
121
283
  # Log at the end
122
284
  if is_root:
123
- view_url = f"https://app.hud.so/trace/{task_run_id}"
124
- logger.info("View trace at %s", view_url)
285
+ _print_trace_complete_url(task_run_id)
125
286
 
126
287
 
127
288
  @contextmanager
128
289
  def trace(
129
290
  name: str | None = None,
291
+ agent_model: str | None = None,
130
292
  attributes: dict[str, Any] | None = None,
131
293
  ) -> Generator[str, None, None]:
132
294
  """
@@ -142,7 +304,7 @@ def trace(
142
304
  Returns:
143
305
  The generated task run ID (UUID string) used for this trace
144
306
  """
145
- with trace_open(name=name, attributes=attributes) as task_run_id:
307
+ with trace_open(name=name, agent_model=agent_model, attributes=attributes) as task_run_id:
146
308
  yield task_run_id
147
309
 
148
310
  # Ensure telemetry is flushed synchronously
@@ -153,6 +315,7 @@ def trace(
153
315
 
154
316
  def trace_decorator(
155
317
  name: str | None = None,
318
+ agent_model: str | None = None,
156
319
  attributes: dict[str, Any] | None = None,
157
320
  ) -> Any:
158
321
  """
@@ -167,7 +330,7 @@ def trace_decorator(
167
330
  @wraps(func)
168
331
  async def async_wrapper(*args: Any, **kwargs: Any) -> Any:
169
332
  func_name = name or f"{func.__module__}.{func.__name__}"
170
- with trace_open(name=func_name, attributes=attributes):
333
+ with trace_open(name=func_name, agent_model=agent_model, attributes=attributes):
171
334
  return await func(*args, **kwargs)
172
335
 
173
336
  return async_wrapper
@@ -176,7 +339,7 @@ def trace_decorator(
176
339
  @wraps(func)
177
340
  def sync_wrapper(*args: Any, **kwargs: Any) -> Any:
178
341
  func_name = name or f"{func.__module__}.{func.__name__}"
179
- with trace_open(name=func_name, attributes=attributes):
342
+ with trace_open(name=func_name, agent_model=agent_model, attributes=attributes):
180
343
  return func(*args, **kwargs)
181
344
 
182
345
  return sync_wrapper
hud/telemetry/context.py CHANGED
@@ -22,6 +22,10 @@ current_task_run_id: contextvars.ContextVar[str | None] = contextvars.ContextVar
22
22
  )
23
23
  # Global dictionary for buffering, keyed by task_run_id
24
24
  _GLOBAL_MCP_CALL_BUFFERS: defaultdict[str, list[BaseMCPCall]] = defaultdict(list)
25
+ # Track the last exported index for each task_run_id
26
+ _GLOBAL_EXPORT_INDICES: defaultdict[str, int] = defaultdict(int)
27
+ # Track whether we've seen a non-init request for each task_run_id
28
+ _GLOBAL_HAS_NON_INIT_REQUEST: defaultdict[str, bool] = defaultdict(bool)
25
29
  is_root_trace: contextvars.ContextVar[bool] = contextvars.ContextVar("is_root_trace", default=False)
26
30
 
27
31
  # Maximum buffer size before automatic flush
@@ -67,6 +71,48 @@ def buffer_mcp_call(record: BaseMCPCall | dict[str, Any]) -> None:
67
71
  flush_buffer(export=True)
68
72
 
69
73
 
74
+ def export_incremental() -> list[BaseMCPCall]:
75
+ """
76
+ Export only new MCP calls since last export without clearing the buffer.
77
+
78
+ Returns:
79
+ The list of newly exported MCP calls
80
+ """
81
+ task_run_id = get_current_task_run_id()
82
+ if not task_run_id or not is_root_trace.get():
83
+ return []
84
+
85
+ buffer = _GLOBAL_MCP_CALL_BUFFERS.get(task_run_id, [])
86
+ last_exported_idx = _GLOBAL_EXPORT_INDICES.get(task_run_id, 0)
87
+
88
+ # Get only the new records since last export
89
+ new_records = buffer[last_exported_idx:]
90
+
91
+ if new_records:
92
+ # Update the export index
93
+ _GLOBAL_EXPORT_INDICES[task_run_id] = len(buffer)
94
+
95
+ # Trigger export
96
+ from hud.telemetry import exporter
97
+ from hud.telemetry.exporter import submit_to_worker_loop
98
+
99
+ # Get current trace attributes if available
100
+ attributes = {"incremental": True}
101
+
102
+ coro = exporter.export_telemetry(
103
+ task_run_id=task_run_id,
104
+ trace_attributes=attributes,
105
+ mcp_calls=new_records.copy(), # Copy to avoid modification during export
106
+ )
107
+ submit_to_worker_loop(coro)
108
+
109
+ logger.debug(
110
+ "Incremental export: %d new MCP calls for trace %s", len(new_records), task_run_id
111
+ )
112
+
113
+ return new_records
114
+
115
+
70
116
  def flush_buffer(export: bool = False) -> list[BaseMCPCall]:
71
117
  """
72
118
  Clear the MCP calls buffer and return its contents.
@@ -83,6 +129,10 @@ def flush_buffer(export: bool = False) -> list[BaseMCPCall]:
83
129
  return []
84
130
 
85
131
  buffer_for_task = _GLOBAL_MCP_CALL_BUFFERS.pop(task_run_id, [])
132
+ # Clean up export index when buffer is flushed
133
+ _GLOBAL_EXPORT_INDICES.pop(task_run_id, None)
134
+ # Clean up non-init request tracking
135
+ _GLOBAL_HAS_NON_INIT_REQUEST.pop(task_run_id, None)
86
136
  return buffer_for_task
87
137
 
88
138
 
@@ -95,6 +145,31 @@ def create_request_record(
95
145
  logger.warning("No active task_run_id, request record will not be created")
96
146
  raise ValueError("No active task_run_id")
97
147
 
148
+ # Check if this is the first non-init request and update status
149
+ if is_root_trace.get() and not _GLOBAL_HAS_NON_INIT_REQUEST[task_run_id]:
150
+ # Common initialization method patterns
151
+ init_methods = {"initialize", "session/new", "init", "setup", "connect"}
152
+ method_lower = method.lower()
153
+
154
+ # Check if this is NOT an initialization method
155
+ if not any(init_pattern in method_lower for init_pattern in init_methods):
156
+ _GLOBAL_HAS_NON_INIT_REQUEST[task_run_id] = True
157
+
158
+ # Update status to running
159
+ from hud.telemetry.exporter import (
160
+ TaskRunStatus,
161
+ submit_to_worker_loop,
162
+ update_task_run_status,
163
+ )
164
+
165
+ coro = update_task_run_status(task_run_id, TaskRunStatus.RUNNING)
166
+ submit_to_worker_loop(coro)
167
+ logger.debug(
168
+ "Updated task run %s status to RUNNING on first non-init request: %s",
169
+ task_run_id,
170
+ method,
171
+ )
172
+
98
173
  record = MCPRequestCall(
99
174
  task_run_id=task_run_id,
100
175
  method=method,
@@ -128,6 +203,10 @@ def create_response_record(
128
203
  )
129
204
 
130
205
  buffer_mcp_call(record)
206
+
207
+ # Trigger incremental export when we receive a response
208
+ export_incremental()
209
+
131
210
  return record
132
211
 
133
212
 
hud/telemetry/exporter.py CHANGED
@@ -2,6 +2,7 @@ from __future__ import annotations
2
2
 
3
3
  import asyncio
4
4
  import concurrent.futures # For run_coroutine_threadsafe return type
5
+ import enum
5
6
  import json
6
7
  import logging
7
8
  import threading
@@ -13,6 +14,7 @@ if TYPE_CHECKING:
13
14
  from collections.abc import Coroutine
14
15
 
15
16
  import httpx
17
+ from pydantic import BaseModel
16
18
 
17
19
  from hud.settings import settings
18
20
 
@@ -25,6 +27,41 @@ from hud.telemetry.mcp_models import ( # MCPResponseCall for isinstance check
25
27
 
26
28
  logger = logging.getLogger("hud.telemetry")
27
29
 
30
+
31
+ # --- Task Run Status Models ---
32
+ class TaskRunStatus(enum.StrEnum):
33
+ INITIALIZING = "initializing"
34
+ RUNNING = "running"
35
+ EVALUATING = "evaluating"
36
+ COMPLETED = "completed"
37
+ ERROR = "error"
38
+
39
+
40
+ class TaskRunStatusUpdateRequest(BaseModel):
41
+ """Request model for updating task run status."""
42
+
43
+ status: TaskRunStatus
44
+ error_message: str | None = None # Optional error message if status is ERROR
45
+ metadata: dict[str, Any] | None = None # Optional metadata for context
46
+ job_id: str | None = None # Optional parent job ID
47
+
48
+
49
+ # --- Job Status Models ---
50
+ class JobStatus(enum.StrEnum):
51
+ RUNNING = "running"
52
+ COMPLETED = "completed"
53
+ ERROR = "error"
54
+
55
+
56
+ class JobStatusUpdateRequest(BaseModel):
57
+ """Request model for updating job status."""
58
+
59
+ status: JobStatus
60
+ error_message: str | None = None # Optional error message if status is ERROR
61
+ metadata: dict[str, Any] | None = None # Optional metadata for context
62
+ taskset_name: str | None = None # Optional dataset/taskset name
63
+
64
+
28
65
  # --- Worker Thread and Event Loop Management ---
29
66
  _worker_thread: threading.Thread | None = None
30
67
  _worker_loop: asyncio.AbstractEventLoop | None = None
@@ -38,7 +75,8 @@ _export_lock_async = asyncio.Lock() # Async lock for the async queue
38
75
  _export_task_async: asyncio.Task | None = None # Async task for processing the queue
39
76
 
40
77
  # --- Constants ---
41
- EXPORT_INTERVAL = 5.0 # seconds
78
+ EXPORT_INTERVAL = 5.0 # seconds - delay between non-incremental exports
79
+ MIN_EXPORT_INTERVAL = 0.1 # seconds - minimum delay between any exports to avoid overwhelming
42
80
  # MAX_BATCH_SIZE removed as we send one trace payload at a time
43
81
 
44
82
 
@@ -265,12 +303,19 @@ async def _process_export_queue_async() -> None:
265
303
 
266
304
  if isinstance(payload_to_process, dict): # Ensure it's a dict before processing as such
267
305
  await _export_trace_payload_async(payload_to_process)
306
+
307
+ # Apply appropriate delay based on export type
308
+ is_incremental = payload_to_process.get("attributes", {}).get("incremental", False)
309
+ if is_incremental:
310
+ # Small delay for incremental exports to avoid overwhelming the server
311
+ await asyncio.sleep(MIN_EXPORT_INTERVAL)
312
+ else:
313
+ # Longer delay for final exports
314
+ await asyncio.sleep(EXPORT_INTERVAL)
268
315
  else:
269
316
  # Should not happen if only dicts and sentinel are queued
270
317
  logger.warning("Unexpected item in telemetry queue: %s", type(payload_to_process))
271
318
 
272
- await asyncio.sleep(EXPORT_INTERVAL)
273
-
274
319
  except asyncio.CancelledError:
275
320
  logger.debug("Async telemetry export processing task cancelled.")
276
321
  _export_task_async = None
@@ -340,6 +385,119 @@ async def send_telemetry_to_server(task_run_id: str, data: dict[str, Any]) -> No
340
385
  logger.exception("Error exporting telemetry for task run %s: %s", task_run_id, e)
341
386
 
342
387
 
388
+ async def update_task_run_status(
389
+ task_run_id: str,
390
+ status: TaskRunStatus,
391
+ error_message: str | None = None,
392
+ metadata: dict[str, Any] | None = None,
393
+ job_id: str | None = None,
394
+ ) -> None:
395
+ """Update the status of a task run."""
396
+ if not settings.telemetry_enabled:
397
+ logger.debug("Status update skipped - telemetry not enabled")
398
+ return
399
+
400
+ status_url = f"{settings.base_url}/v2/task_runs/{task_run_id}/status"
401
+
402
+ try:
403
+ async with httpx.AsyncClient() as client:
404
+ headers = {
405
+ "Content-Type": "application/json",
406
+ "Authorization": f"Bearer {settings.api_key}",
407
+ }
408
+
409
+ request_data = TaskRunStatusUpdateRequest(
410
+ status=status, error_message=error_message, metadata=metadata, job_id=job_id
411
+ )
412
+
413
+ logger.debug(
414
+ "Updating status for task run %s to %s",
415
+ task_run_id,
416
+ status,
417
+ )
418
+
419
+ response = await client.post(
420
+ status_url,
421
+ json=request_data.model_dump(exclude_none=True),
422
+ headers=headers,
423
+ timeout=10.0,
424
+ )
425
+
426
+ if response.status_code >= 200 and response.status_code < 300:
427
+ logger.debug(
428
+ "Successfully updated status for task run %s to %s",
429
+ task_run_id,
430
+ status,
431
+ )
432
+ else:
433
+ logger.warning(
434
+ "Failed to update status for task run %s: HTTP %s - %s",
435
+ task_run_id,
436
+ response.status_code,
437
+ response.text,
438
+ )
439
+ except Exception as e:
440
+ logger.exception("Error updating status for task run %s: %s", task_run_id, e)
441
+
442
+
443
+ async def update_job_status(
444
+ job_id: str,
445
+ status: JobStatus,
446
+ error_message: str | None = None,
447
+ metadata: dict[str, Any] | None = None,
448
+ taskset_name: str | None = None,
449
+ ) -> None:
450
+ """Update the status of a job."""
451
+ if not settings.telemetry_enabled:
452
+ logger.debug("Job status update skipped - telemetry not enabled")
453
+ return
454
+
455
+ status_url = f"{settings.base_url}/v2/jobs/{job_id}/status"
456
+
457
+ try:
458
+ async with httpx.AsyncClient() as client:
459
+ headers = {
460
+ "Content-Type": "application/json",
461
+ "Authorization": f"Bearer {settings.api_key}",
462
+ }
463
+
464
+ request_data = JobStatusUpdateRequest(
465
+ status=status,
466
+ error_message=error_message,
467
+ metadata=metadata,
468
+ taskset_name=taskset_name,
469
+ )
470
+
471
+ logger.debug(
472
+ "Updating status for job %s to %s",
473
+ job_id,
474
+ status,
475
+ )
476
+
477
+ response = await client.post(
478
+ status_url,
479
+ json=request_data.model_dump(exclude_none=True),
480
+ headers=headers,
481
+ timeout=10.0,
482
+ )
483
+
484
+ if response.status_code >= 200 and response.status_code < 300:
485
+ logger.debug(
486
+ "Successfully updated status for job %s to %s",
487
+ job_id,
488
+ status,
489
+ )
490
+ else:
491
+ logger.warning(
492
+ "Failed to update status for job %s: HTTP %s - %s",
493
+ job_id,
494
+ response.status_code,
495
+ response.text,
496
+ )
497
+ except Exception as e:
498
+ logger.exception("Error updating status for job %s: %s", job_id, e)
499
+
500
+
343
501
  # --- Public Shutdown Function ---
344
502
  def flush(timeout: float = 10.0) -> None:
345
503
  """Flushes pending telemetry data and stops the worker thread."""
@@ -382,9 +540,10 @@ def flush(timeout: float = 10.0) -> None:
382
540
  time.sleep(0.1)
383
541
  # _export_task_async is set to None by _process_export_queue_async upon its exit.
384
542
  if _export_task_async is not None:
385
- logger.warning(
386
- "Telemetry processing task did not clear itself after sentinel. May still be "
387
- "running or stuck."
543
+ # This is often a false positive due to race conditions during shutdown
544
+ logger.debug(
545
+ "Telemetry processing task did not clear itself after sentinel. "
546
+ "This is normal during shutdown."
388
547
  )
389
548
  else:
390
549
  logger.debug("Telemetry processing task appears to have completed after sentinel.")