hud-python 0.2.10__py3-none-any.whl → 0.3.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of hud-python might be problematic. Click here for more details.

Files changed (64) hide show
  1. hud/__init__.py +14 -5
  2. hud/env/docker_client.py +1 -1
  3. hud/env/environment.py +10 -7
  4. hud/env/local_docker_client.py +1 -1
  5. hud/env/remote_client.py +1 -1
  6. hud/env/remote_docker_client.py +2 -2
  7. hud/exceptions.py +2 -1
  8. hud/mcp_agent/__init__.py +15 -0
  9. hud/mcp_agent/base.py +723 -0
  10. hud/mcp_agent/claude.py +316 -0
  11. hud/mcp_agent/langchain.py +231 -0
  12. hud/mcp_agent/openai.py +318 -0
  13. hud/mcp_agent/tests/__init__.py +1 -0
  14. hud/mcp_agent/tests/test_base.py +437 -0
  15. hud/settings.py +14 -2
  16. hud/task.py +4 -0
  17. hud/telemetry/__init__.py +11 -7
  18. hud/telemetry/_trace.py +82 -71
  19. hud/telemetry/context.py +9 -27
  20. hud/telemetry/exporter.py +6 -5
  21. hud/telemetry/instrumentation/mcp.py +174 -410
  22. hud/telemetry/mcp_models.py +13 -74
  23. hud/telemetry/tests/test_context.py +9 -6
  24. hud/telemetry/tests/test_trace.py +92 -61
  25. hud/tools/__init__.py +21 -0
  26. hud/tools/base.py +65 -0
  27. hud/tools/bash.py +137 -0
  28. hud/tools/computer/__init__.py +13 -0
  29. hud/tools/computer/anthropic.py +411 -0
  30. hud/tools/computer/hud.py +315 -0
  31. hud/tools/computer/openai.py +283 -0
  32. hud/tools/edit.py +290 -0
  33. hud/tools/executors/__init__.py +13 -0
  34. hud/tools/executors/base.py +331 -0
  35. hud/tools/executors/pyautogui.py +585 -0
  36. hud/tools/executors/tests/__init__.py +1 -0
  37. hud/tools/executors/tests/test_base_executor.py +338 -0
  38. hud/tools/executors/tests/test_pyautogui_executor.py +162 -0
  39. hud/tools/executors/xdo.py +503 -0
  40. hud/tools/helper/README.md +56 -0
  41. hud/tools/helper/__init__.py +9 -0
  42. hud/tools/helper/mcp_server.py +78 -0
  43. hud/tools/helper/server_initialization.py +115 -0
  44. hud/tools/helper/utils.py +58 -0
  45. hud/tools/playwright_tool.py +373 -0
  46. hud/tools/tests/__init__.py +3 -0
  47. hud/tools/tests/test_bash.py +152 -0
  48. hud/tools/tests/test_computer.py +52 -0
  49. hud/tools/tests/test_computer_actions.py +34 -0
  50. hud/tools/tests/test_edit.py +233 -0
  51. hud/tools/tests/test_init.py +27 -0
  52. hud/tools/tests/test_playwright_tool.py +183 -0
  53. hud/tools/tests/test_tools.py +154 -0
  54. hud/tools/tests/test_utils.py +156 -0
  55. hud/tools/utils.py +50 -0
  56. hud/types.py +10 -1
  57. hud/utils/tests/test_init.py +21 -0
  58. hud/utils/tests/test_version.py +1 -1
  59. hud/version.py +1 -1
  60. {hud_python-0.2.10.dist-info → hud_python-0.3.0.dist-info}/METADATA +9 -6
  61. hud_python-0.3.0.dist-info/RECORD +124 -0
  62. hud_python-0.2.10.dist-info/RECORD +0 -85
  63. {hud_python-0.2.10.dist-info → hud_python-0.3.0.dist-info}/WHEEL +0 -0
  64. {hud_python-0.2.10.dist-info → hud_python-0.3.0.dist-info}/licenses/LICENSE +0 -0
hud/telemetry/_trace.py CHANGED
@@ -11,7 +11,6 @@ from typing import (
11
11
  Any,
12
12
  ParamSpec,
13
13
  TypeVar,
14
- overload,
15
14
  )
16
15
 
17
16
  from hud.telemetry import exporter
@@ -25,42 +24,58 @@ from hud.telemetry.exporter import submit_to_worker_loop
25
24
  from hud.telemetry.instrumentation.registry import registry
26
25
 
27
26
  if TYPE_CHECKING:
28
- from collections.abc import (
29
- Callable,
30
- Coroutine,
31
- Generator,
32
- )
27
+ from collections.abc import Generator
33
28
 
34
- from hud.telemetry.mcp_models import BaseMCPCall
35
29
 
36
30
  logger = logging.getLogger("hud.telemetry")
37
31
  T = TypeVar("T")
32
+ P = ParamSpec("P")
33
+
34
+ # Track whether telemetry has been initialized
35
+ _telemetry_initialized = False
38
36
 
39
37
 
40
38
  def init_telemetry() -> None:
41
39
  """Initialize telemetry instrumentors and ensure worker is started if telemetry is active."""
40
+ global _telemetry_initialized
41
+ if _telemetry_initialized:
42
+ return
43
+
42
44
  registry.install_all()
43
45
  logger.info("Telemetry initialized.")
46
+ _telemetry_initialized = True
47
+
48
+
49
+ def _ensure_telemetry_initialized() -> None:
50
+ """Ensure telemetry is initialized - called lazily by trace functions."""
51
+ from hud.settings import settings
52
+
53
+ if settings.telemetry_enabled and not _telemetry_initialized:
54
+ init_telemetry()
44
55
 
45
56
 
46
57
  @contextmanager
47
- def trace(
58
+ def trace_open(
48
59
  name: str | None = None,
60
+ run_id: str | None = None,
49
61
  attributes: dict[str, Any] | None = None,
50
62
  ) -> Generator[str, None, None]:
51
63
  """
52
64
  Context manager for tracing a block of code.
53
- The task_run_id is always generated internally as a UUID.
54
- Telemetry export is handled by a background worker thread.
55
65
 
56
66
  Args:
57
- attributes: Optional dictionary of attributes to associate with this trace
58
67
  name: Optional name for this trace, will be added to attributes.
68
+ attributes: Optional dictionary of attributes to associate with this trace
59
69
 
60
70
  Returns:
61
71
  The generated task run ID (UUID string) used for this trace
62
72
  """
63
- task_run_id = str(uuid.uuid4())
73
+ # Lazy initialization - only initialize telemetry when trace() is actually called
74
+ _ensure_telemetry_initialized()
75
+
76
+ task_run_id = run_id or str(uuid.uuid4())
77
+
78
+ logger.info("See your agent live at https://app.hud.so/trace/%s", task_run_id)
64
79
 
65
80
  local_attributes = attributes.copy() if attributes is not None else {}
66
81
  if name is not None:
@@ -81,91 +96,87 @@ def trace(
81
96
  finally:
82
97
  end_time = time.time()
83
98
  duration = end_time - start_time
99
+ local_attributes["duration_seconds"] = duration
100
+ local_attributes["is_root_trace"] = is_root
84
101
 
85
- mcp_calls: list[BaseMCPCall] = flush_buffer()
102
+ logger.debug("Finishing trace %s after %.2f seconds", task_run_id, duration)
86
103
 
87
- trace_attributes_final = {
88
- **local_attributes,
89
- "start_time": start_time,
90
- "end_time": end_time,
91
- "duration": duration,
92
- "is_root": is_root,
93
- }
104
+ # Always flush the buffer for the current task
105
+ mcp_calls = flush_buffer(export=True)
106
+ logger.debug("Flushed %d MCP calls for trace %s", len(mcp_calls), task_run_id)
94
107
 
108
+ # Submit the telemetry payload to the worker queue
95
109
  if is_root and mcp_calls:
96
- try:
97
- coro_to_submit = exporter.export_telemetry(
98
- task_run_id=task_run_id,
99
- trace_attributes=trace_attributes_final,
100
- mcp_calls=mcp_calls,
101
- )
102
- future = submit_to_worker_loop(coro_to_submit)
103
- if future:
104
- logger.debug(
105
- "Telemetry for trace %s submitted to background worker.", task_run_id
106
- )
107
- else:
108
- logger.warning(
109
- "Failed to submit telemetry for trace %s to"
110
- "background worker (loop not available).",
111
- task_run_id,
112
- )
113
- except Exception as e:
114
- logger.warning("Failed to submit telemetry for trace %s: %s", task_run_id, e)
115
-
110
+ coro = exporter.export_telemetry(
111
+ task_run_id=task_run_id,
112
+ trace_attributes=local_attributes,
113
+ mcp_calls=mcp_calls,
114
+ )
115
+ submit_to_worker_loop(coro)
116
+
117
+ # Restore previous context
116
118
  set_current_task_run_id(previous_task_id)
117
119
  is_root_trace.set(was_root)
118
120
 
119
- logger.debug(
120
- "Ended trace %s (Name: %s) with %d MCP call(s)",
121
- task_run_id,
122
- name if name else "Unnamed",
123
- len(mcp_calls),
124
- )
121
+ # Log at the end
122
+ if is_root:
123
+ view_url = f"https://app.hud.so/trace/{task_run_id}"
124
+ logger.info("View trace at %s", view_url)
125
125
 
126
- logger.info("View trace at https://app.hud.so/jobs/traces/%s", task_run_id)
127
126
 
127
+ @contextmanager
128
+ def trace(
129
+ name: str | None = None,
130
+ attributes: dict[str, Any] | None = None,
131
+ ) -> Generator[str, None, None]:
132
+ """
133
+ Synchronous context manager that traces and blocks until telemetry is sent.
128
134
 
129
- P = ParamSpec("P")
130
- R = TypeVar("R")
135
+ This is the "worry-free" option when you want to ensure telemetry is
136
+ sent immediately before continuing, rather than relying on background workers.
131
137
 
138
+ Args:
139
+ name: Optional name for this trace
140
+ attributes: Optional attributes for the trace
132
141
 
133
- def register_trace(
134
- name: str | None = None, attributes: dict[str, Any] | None = None
135
- ) -> Callable[[Callable[..., Any]], Callable[..., Any]]:
142
+ Returns:
143
+ The generated task run ID (UUID string) used for this trace
136
144
  """
137
- Decorator to wrap a synchronous or asynchronous function call
138
- within a hud._telemetry.trace context.
145
+ with trace_open(name=name, attributes=attributes) as task_run_id:
146
+ yield task_run_id
139
147
 
140
- Args:
141
- name: Optional name for the trace.
142
- attributes: Optional dictionary of attributes for the trace.
143
- """
148
+ # Ensure telemetry is flushed synchronously
149
+ from hud import flush
150
+
151
+ flush()
144
152
 
145
- @overload
146
- def decorator(
147
- func: Callable[P, Coroutine[Any, Any, R]],
148
- ) -> Callable[P, Coroutine[Any, Any, R]]: ...
149
153
 
150
- @overload
151
- def decorator(func: Callable[P, R]) -> Callable[P, R]: ...
154
+ def trace_decorator(
155
+ name: str | None = None,
156
+ attributes: dict[str, Any] | None = None,
157
+ ) -> Any:
158
+ """
159
+ Decorator for tracing functions.
160
+
161
+ Can be used on both sync and async functions.
162
+ """
152
163
 
153
- def decorator(func: Callable[P, Any]) -> Callable[P, Any]:
164
+ def decorator(func: Any) -> Any:
154
165
  if asyncio.iscoroutinefunction(func):
155
166
 
156
167
  @wraps(func)
157
- async def async_wrapper(*args: P.args, **kwargs: P.kwargs) -> Any:
158
- effective_name = name if name else func.__name__
159
- with trace(name=effective_name, attributes=attributes):
168
+ async def async_wrapper(*args: Any, **kwargs: Any) -> Any:
169
+ func_name = name or f"{func.__module__}.{func.__name__}"
170
+ with trace_open(name=func_name, attributes=attributes):
160
171
  return await func(*args, **kwargs)
161
172
 
162
173
  return async_wrapper
163
174
  else:
164
175
 
165
176
  @wraps(func)
166
- def sync_wrapper(*args: P.args, **kwargs: P.kwargs) -> Any:
167
- effective_name = name if name else func.__name__
168
- with trace(name=effective_name, attributes=attributes):
177
+ def sync_wrapper(*args: Any, **kwargs: Any) -> Any:
178
+ func_name = name or f"{func.__module__}.{func.__name__}"
179
+ with trace_open(name=func_name, attributes=attributes):
169
180
  return func(*args, **kwargs)
170
181
 
171
182
  return sync_wrapper
hud/telemetry/context.py CHANGED
@@ -8,7 +8,6 @@ from typing import Any, TypeVar
8
8
 
9
9
  from hud.telemetry.mcp_models import (
10
10
  BaseMCPCall,
11
- MCPManualTestCall,
12
11
  MCPNotificationCall,
13
12
  MCPRequestCall,
14
13
  MCPResponseCall,
@@ -21,7 +20,7 @@ logger = logging.getLogger("hud.telemetry")
21
20
  current_task_run_id: contextvars.ContextVar[str | None] = contextvars.ContextVar(
22
21
  "current_task_run_id", default=None
23
22
  )
24
- # NEW: Global dictionary for buffering, keyed by task_run_id
23
+ # Global dictionary for buffering, keyed by task_run_id
25
24
  _GLOBAL_MCP_CALL_BUFFERS: defaultdict[str, list[BaseMCPCall]] = defaultdict(list)
26
25
  is_root_trace: contextvars.ContextVar[bool] = contextvars.ContextVar("is_root_trace", default=False)
27
26
 
@@ -43,6 +42,7 @@ def set_current_task_run_id(task_run_id: str | None) -> None:
43
42
 
44
43
 
45
44
  def buffer_mcp_call(record: BaseMCPCall | dict[str, Any]) -> None:
45
+ """Buffer an MCP call record for the current trace."""
46
46
  task_run_id = get_current_task_run_id()
47
47
 
48
48
  if not task_run_id:
@@ -51,7 +51,7 @@ def buffer_mcp_call(record: BaseMCPCall | dict[str, Any]) -> None:
51
51
  )
52
52
  return
53
53
 
54
- # Ensure 'record' is a Pydantic model instance from here
54
+ # Ensure 'record' is a Pydantic model instance
55
55
  if isinstance(record, dict):
56
56
  try:
57
57
  record_model = BaseMCPCall.from_dict(record)
@@ -82,11 +82,8 @@ def flush_buffer(export: bool = False) -> list[BaseMCPCall]:
82
82
  logger.warning("FLUSH_BUFFER: No current task_run_id. Cannot flush.")
83
83
  return []
84
84
 
85
- buffer_for_task = _GLOBAL_MCP_CALL_BUFFERS.pop(
86
- task_run_id, []
87
- ) # Get and remove the list for this task
88
-
89
- return buffer_for_task # Return the flushed items
85
+ buffer_for_task = _GLOBAL_MCP_CALL_BUFFERS.pop(task_run_id, [])
86
+ return buffer_for_task
90
87
 
91
88
 
92
89
  def create_request_record(
@@ -118,10 +115,13 @@ def create_response_record(
118
115
  logger.warning("No active task_run_id, response record will not be created")
119
116
  raise ValueError("No active task_run_id")
120
117
 
118
+ # Default to COMPLETED status if not provided
119
+ if "status" not in kwargs:
120
+ kwargs["status"] = StatusType.COMPLETED
121
+
121
122
  record = MCPResponseCall(
122
123
  task_run_id=task_run_id,
123
124
  method=method,
124
- status=StatusType.COMPLETED,
125
125
  related_request_id=related_request_id,
126
126
  is_error=is_error,
127
127
  **kwargs,
@@ -149,21 +149,3 @@ def create_notification_record(
149
149
  )
150
150
  buffer_mcp_call(record)
151
151
  return record
152
-
153
-
154
- def create_manual_test_record(**custom_data: Any) -> MCPManualTestCall | None:
155
- """Create and buffer a manual test record"""
156
- task_run_id = get_current_task_run_id()
157
- if not task_run_id:
158
- logger.warning("No active task_run_id, manual test record will not be created")
159
- return None
160
-
161
- record = MCPManualTestCall.create(task_run_id=task_run_id, **custom_data)
162
- buffer_mcp_call(record)
163
- return record
164
-
165
-
166
- def reset_context() -> None:
167
- """Reset all telemetry context variables. Useful for test isolation."""
168
- set_current_task_run_id(None)
169
- is_root_trace.set(False)
hud/telemetry/exporter.py CHANGED
@@ -6,7 +6,7 @@ import json
6
6
  import logging
7
7
  import threading
8
8
  import time
9
- from datetime import datetime, timezone # For ISO timestamp conversion
9
+ from datetime import UTC, datetime # For ISO timestamp conversion
10
10
  from typing import TYPE_CHECKING, Any
11
11
 
12
12
  if TYPE_CHECKING:
@@ -157,7 +157,7 @@ async def export_telemetry(
157
157
  actual_start_time_float = getattr(mcp_call_model, "start_time", None)
158
158
  if actual_start_time_float:
159
159
  start_ts_iso = (
160
- datetime.fromtimestamp(actual_start_time_float, timezone.utc)
160
+ datetime.fromtimestamp(actual_start_time_float, UTC)
161
161
  .isoformat()
162
162
  .replace("+00:00", "Z")
163
163
  )
@@ -170,7 +170,7 @@ async def export_telemetry(
170
170
 
171
171
  if effective_end_timestamp_float:
172
172
  end_ts_iso = (
173
- datetime.fromtimestamp(effective_end_timestamp_float, timezone.utc)
173
+ datetime.fromtimestamp(effective_end_timestamp_float, UTC)
174
174
  .isoformat()
175
175
  .replace("+00:00", "Z")
176
176
  )
@@ -375,8 +375,9 @@ def flush(timeout: float = 10.0) -> None:
375
375
  # This check is racy, but it's the best we can do without more complex inter-thread
376
376
  # sync for task completion. Give some time for the task to process the sentinel and
377
377
  # clear itself.
378
- # Max wait for task to clear
379
- attempt_timeout = time.time() + (timeout / 2 if timeout else 2.0)
378
+ # Max wait for task to clear - should be longer than EXPORT_INTERVAL to ensure
379
+ # the task has time to wake from sleep and process the sentinel
380
+ attempt_timeout = time.time() + (timeout / 2 if timeout else 2.0) + EXPORT_INTERVAL + 1.0
380
381
  while _export_task_async is not None and time.time() < attempt_timeout:
381
382
  time.sleep(0.1)
382
383
  # _export_task_async is set to None by _process_export_queue_async upon its exit.