hud-python 0.2.10__py3-none-any.whl → 0.3.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of hud-python might be problematic. Click here for more details.
- hud/__init__.py +14 -5
- hud/env/docker_client.py +1 -1
- hud/env/environment.py +10 -7
- hud/env/local_docker_client.py +1 -1
- hud/env/remote_client.py +1 -1
- hud/env/remote_docker_client.py +2 -2
- hud/exceptions.py +2 -1
- hud/mcp_agent/__init__.py +15 -0
- hud/mcp_agent/base.py +723 -0
- hud/mcp_agent/claude.py +316 -0
- hud/mcp_agent/langchain.py +231 -0
- hud/mcp_agent/openai.py +318 -0
- hud/mcp_agent/tests/__init__.py +1 -0
- hud/mcp_agent/tests/test_base.py +437 -0
- hud/settings.py +14 -2
- hud/task.py +4 -0
- hud/telemetry/__init__.py +11 -7
- hud/telemetry/_trace.py +82 -71
- hud/telemetry/context.py +9 -27
- hud/telemetry/exporter.py +6 -5
- hud/telemetry/instrumentation/mcp.py +174 -410
- hud/telemetry/mcp_models.py +13 -74
- hud/telemetry/tests/test_context.py +9 -6
- hud/telemetry/tests/test_trace.py +92 -61
- hud/tools/__init__.py +21 -0
- hud/tools/base.py +65 -0
- hud/tools/bash.py +137 -0
- hud/tools/computer/__init__.py +13 -0
- hud/tools/computer/anthropic.py +411 -0
- hud/tools/computer/hud.py +315 -0
- hud/tools/computer/openai.py +283 -0
- hud/tools/edit.py +290 -0
- hud/tools/executors/__init__.py +13 -0
- hud/tools/executors/base.py +331 -0
- hud/tools/executors/pyautogui.py +585 -0
- hud/tools/executors/tests/__init__.py +1 -0
- hud/tools/executors/tests/test_base_executor.py +338 -0
- hud/tools/executors/tests/test_pyautogui_executor.py +162 -0
- hud/tools/executors/xdo.py +503 -0
- hud/tools/helper/README.md +56 -0
- hud/tools/helper/__init__.py +9 -0
- hud/tools/helper/mcp_server.py +78 -0
- hud/tools/helper/server_initialization.py +115 -0
- hud/tools/helper/utils.py +58 -0
- hud/tools/playwright_tool.py +373 -0
- hud/tools/tests/__init__.py +3 -0
- hud/tools/tests/test_bash.py +152 -0
- hud/tools/tests/test_computer.py +52 -0
- hud/tools/tests/test_computer_actions.py +34 -0
- hud/tools/tests/test_edit.py +233 -0
- hud/tools/tests/test_init.py +27 -0
- hud/tools/tests/test_playwright_tool.py +183 -0
- hud/tools/tests/test_tools.py +154 -0
- hud/tools/tests/test_utils.py +156 -0
- hud/tools/utils.py +50 -0
- hud/types.py +10 -1
- hud/utils/tests/test_init.py +21 -0
- hud/utils/tests/test_version.py +1 -1
- hud/version.py +1 -1
- {hud_python-0.2.10.dist-info → hud_python-0.3.0.dist-info}/METADATA +9 -6
- hud_python-0.3.0.dist-info/RECORD +124 -0
- hud_python-0.2.10.dist-info/RECORD +0 -85
- {hud_python-0.2.10.dist-info → hud_python-0.3.0.dist-info}/WHEEL +0 -0
- {hud_python-0.2.10.dist-info → hud_python-0.3.0.dist-info}/licenses/LICENSE +0 -0
hud/telemetry/_trace.py
CHANGED
|
@@ -11,7 +11,6 @@ from typing import (
|
|
|
11
11
|
Any,
|
|
12
12
|
ParamSpec,
|
|
13
13
|
TypeVar,
|
|
14
|
-
overload,
|
|
15
14
|
)
|
|
16
15
|
|
|
17
16
|
from hud.telemetry import exporter
|
|
@@ -25,42 +24,58 @@ from hud.telemetry.exporter import submit_to_worker_loop
|
|
|
25
24
|
from hud.telemetry.instrumentation.registry import registry
|
|
26
25
|
|
|
27
26
|
if TYPE_CHECKING:
|
|
28
|
-
from collections.abc import
|
|
29
|
-
Callable,
|
|
30
|
-
Coroutine,
|
|
31
|
-
Generator,
|
|
32
|
-
)
|
|
27
|
+
from collections.abc import Generator
|
|
33
28
|
|
|
34
|
-
from hud.telemetry.mcp_models import BaseMCPCall
|
|
35
29
|
|
|
36
30
|
logger = logging.getLogger("hud.telemetry")
|
|
37
31
|
T = TypeVar("T")
|
|
32
|
+
P = ParamSpec("P")
|
|
33
|
+
|
|
34
|
+
# Track whether telemetry has been initialized
|
|
35
|
+
_telemetry_initialized = False
|
|
38
36
|
|
|
39
37
|
|
|
40
38
|
def init_telemetry() -> None:
|
|
41
39
|
"""Initialize telemetry instrumentors and ensure worker is started if telemetry is active."""
|
|
40
|
+
global _telemetry_initialized
|
|
41
|
+
if _telemetry_initialized:
|
|
42
|
+
return
|
|
43
|
+
|
|
42
44
|
registry.install_all()
|
|
43
45
|
logger.info("Telemetry initialized.")
|
|
46
|
+
_telemetry_initialized = True
|
|
47
|
+
|
|
48
|
+
|
|
49
|
+
def _ensure_telemetry_initialized() -> None:
|
|
50
|
+
"""Ensure telemetry is initialized - called lazily by trace functions."""
|
|
51
|
+
from hud.settings import settings
|
|
52
|
+
|
|
53
|
+
if settings.telemetry_enabled and not _telemetry_initialized:
|
|
54
|
+
init_telemetry()
|
|
44
55
|
|
|
45
56
|
|
|
46
57
|
@contextmanager
|
|
47
|
-
def
|
|
58
|
+
def trace_open(
|
|
48
59
|
name: str | None = None,
|
|
60
|
+
run_id: str | None = None,
|
|
49
61
|
attributes: dict[str, Any] | None = None,
|
|
50
62
|
) -> Generator[str, None, None]:
|
|
51
63
|
"""
|
|
52
64
|
Context manager for tracing a block of code.
|
|
53
|
-
The task_run_id is always generated internally as a UUID.
|
|
54
|
-
Telemetry export is handled by a background worker thread.
|
|
55
65
|
|
|
56
66
|
Args:
|
|
57
|
-
attributes: Optional dictionary of attributes to associate with this trace
|
|
58
67
|
name: Optional name for this trace, will be added to attributes.
|
|
68
|
+
attributes: Optional dictionary of attributes to associate with this trace
|
|
59
69
|
|
|
60
70
|
Returns:
|
|
61
71
|
The generated task run ID (UUID string) used for this trace
|
|
62
72
|
"""
|
|
63
|
-
|
|
73
|
+
# Lazy initialization - only initialize telemetry when trace() is actually called
|
|
74
|
+
_ensure_telemetry_initialized()
|
|
75
|
+
|
|
76
|
+
task_run_id = run_id or str(uuid.uuid4())
|
|
77
|
+
|
|
78
|
+
logger.info("See your agent live at https://app.hud.so/trace/%s", task_run_id)
|
|
64
79
|
|
|
65
80
|
local_attributes = attributes.copy() if attributes is not None else {}
|
|
66
81
|
if name is not None:
|
|
@@ -81,91 +96,87 @@ def trace(
|
|
|
81
96
|
finally:
|
|
82
97
|
end_time = time.time()
|
|
83
98
|
duration = end_time - start_time
|
|
99
|
+
local_attributes["duration_seconds"] = duration
|
|
100
|
+
local_attributes["is_root_trace"] = is_root
|
|
84
101
|
|
|
85
|
-
|
|
102
|
+
logger.debug("Finishing trace %s after %.2f seconds", task_run_id, duration)
|
|
86
103
|
|
|
87
|
-
|
|
88
|
-
|
|
89
|
-
|
|
90
|
-
"end_time": end_time,
|
|
91
|
-
"duration": duration,
|
|
92
|
-
"is_root": is_root,
|
|
93
|
-
}
|
|
104
|
+
# Always flush the buffer for the current task
|
|
105
|
+
mcp_calls = flush_buffer(export=True)
|
|
106
|
+
logger.debug("Flushed %d MCP calls for trace %s", len(mcp_calls), task_run_id)
|
|
94
107
|
|
|
108
|
+
# Submit the telemetry payload to the worker queue
|
|
95
109
|
if is_root and mcp_calls:
|
|
96
|
-
|
|
97
|
-
|
|
98
|
-
|
|
99
|
-
|
|
100
|
-
|
|
101
|
-
|
|
102
|
-
|
|
103
|
-
|
|
104
|
-
logger.debug(
|
|
105
|
-
"Telemetry for trace %s submitted to background worker.", task_run_id
|
|
106
|
-
)
|
|
107
|
-
else:
|
|
108
|
-
logger.warning(
|
|
109
|
-
"Failed to submit telemetry for trace %s to"
|
|
110
|
-
"background worker (loop not available).",
|
|
111
|
-
task_run_id,
|
|
112
|
-
)
|
|
113
|
-
except Exception as e:
|
|
114
|
-
logger.warning("Failed to submit telemetry for trace %s: %s", task_run_id, e)
|
|
115
|
-
|
|
110
|
+
coro = exporter.export_telemetry(
|
|
111
|
+
task_run_id=task_run_id,
|
|
112
|
+
trace_attributes=local_attributes,
|
|
113
|
+
mcp_calls=mcp_calls,
|
|
114
|
+
)
|
|
115
|
+
submit_to_worker_loop(coro)
|
|
116
|
+
|
|
117
|
+
# Restore previous context
|
|
116
118
|
set_current_task_run_id(previous_task_id)
|
|
117
119
|
is_root_trace.set(was_root)
|
|
118
120
|
|
|
119
|
-
|
|
120
|
-
|
|
121
|
-
task_run_id
|
|
122
|
-
|
|
123
|
-
len(mcp_calls),
|
|
124
|
-
)
|
|
121
|
+
# Log at the end
|
|
122
|
+
if is_root:
|
|
123
|
+
view_url = f"https://app.hud.so/trace/{task_run_id}"
|
|
124
|
+
logger.info("View trace at %s", view_url)
|
|
125
125
|
|
|
126
|
-
logger.info("View trace at https://app.hud.so/jobs/traces/%s", task_run_id)
|
|
127
126
|
|
|
127
|
+
@contextmanager
|
|
128
|
+
def trace(
|
|
129
|
+
name: str | None = None,
|
|
130
|
+
attributes: dict[str, Any] | None = None,
|
|
131
|
+
) -> Generator[str, None, None]:
|
|
132
|
+
"""
|
|
133
|
+
Synchronous context manager that traces and blocks until telemetry is sent.
|
|
128
134
|
|
|
129
|
-
|
|
130
|
-
|
|
135
|
+
This is the "worry-free" option when you want to ensure telemetry is
|
|
136
|
+
sent immediately before continuing, rather than relying on background workers.
|
|
131
137
|
|
|
138
|
+
Args:
|
|
139
|
+
name: Optional name for this trace
|
|
140
|
+
attributes: Optional attributes for the trace
|
|
132
141
|
|
|
133
|
-
|
|
134
|
-
|
|
135
|
-
) -> Callable[[Callable[..., Any]], Callable[..., Any]]:
|
|
142
|
+
Returns:
|
|
143
|
+
The generated task run ID (UUID string) used for this trace
|
|
136
144
|
"""
|
|
137
|
-
|
|
138
|
-
|
|
145
|
+
with trace_open(name=name, attributes=attributes) as task_run_id:
|
|
146
|
+
yield task_run_id
|
|
139
147
|
|
|
140
|
-
|
|
141
|
-
|
|
142
|
-
|
|
143
|
-
|
|
148
|
+
# Ensure telemetry is flushed synchronously
|
|
149
|
+
from hud import flush
|
|
150
|
+
|
|
151
|
+
flush()
|
|
144
152
|
|
|
145
|
-
@overload
|
|
146
|
-
def decorator(
|
|
147
|
-
func: Callable[P, Coroutine[Any, Any, R]],
|
|
148
|
-
) -> Callable[P, Coroutine[Any, Any, R]]: ...
|
|
149
153
|
|
|
150
|
-
|
|
151
|
-
|
|
154
|
+
def trace_decorator(
|
|
155
|
+
name: str | None = None,
|
|
156
|
+
attributes: dict[str, Any] | None = None,
|
|
157
|
+
) -> Any:
|
|
158
|
+
"""
|
|
159
|
+
Decorator for tracing functions.
|
|
160
|
+
|
|
161
|
+
Can be used on both sync and async functions.
|
|
162
|
+
"""
|
|
152
163
|
|
|
153
|
-
def decorator(func:
|
|
164
|
+
def decorator(func: Any) -> Any:
|
|
154
165
|
if asyncio.iscoroutinefunction(func):
|
|
155
166
|
|
|
156
167
|
@wraps(func)
|
|
157
|
-
async def async_wrapper(*args:
|
|
158
|
-
|
|
159
|
-
with
|
|
168
|
+
async def async_wrapper(*args: Any, **kwargs: Any) -> Any:
|
|
169
|
+
func_name = name or f"{func.__module__}.{func.__name__}"
|
|
170
|
+
with trace_open(name=func_name, attributes=attributes):
|
|
160
171
|
return await func(*args, **kwargs)
|
|
161
172
|
|
|
162
173
|
return async_wrapper
|
|
163
174
|
else:
|
|
164
175
|
|
|
165
176
|
@wraps(func)
|
|
166
|
-
def sync_wrapper(*args:
|
|
167
|
-
|
|
168
|
-
with
|
|
177
|
+
def sync_wrapper(*args: Any, **kwargs: Any) -> Any:
|
|
178
|
+
func_name = name or f"{func.__module__}.{func.__name__}"
|
|
179
|
+
with trace_open(name=func_name, attributes=attributes):
|
|
169
180
|
return func(*args, **kwargs)
|
|
170
181
|
|
|
171
182
|
return sync_wrapper
|
hud/telemetry/context.py
CHANGED
|
@@ -8,7 +8,6 @@ from typing import Any, TypeVar
|
|
|
8
8
|
|
|
9
9
|
from hud.telemetry.mcp_models import (
|
|
10
10
|
BaseMCPCall,
|
|
11
|
-
MCPManualTestCall,
|
|
12
11
|
MCPNotificationCall,
|
|
13
12
|
MCPRequestCall,
|
|
14
13
|
MCPResponseCall,
|
|
@@ -21,7 +20,7 @@ logger = logging.getLogger("hud.telemetry")
|
|
|
21
20
|
current_task_run_id: contextvars.ContextVar[str | None] = contextvars.ContextVar(
|
|
22
21
|
"current_task_run_id", default=None
|
|
23
22
|
)
|
|
24
|
-
#
|
|
23
|
+
# Global dictionary for buffering, keyed by task_run_id
|
|
25
24
|
_GLOBAL_MCP_CALL_BUFFERS: defaultdict[str, list[BaseMCPCall]] = defaultdict(list)
|
|
26
25
|
is_root_trace: contextvars.ContextVar[bool] = contextvars.ContextVar("is_root_trace", default=False)
|
|
27
26
|
|
|
@@ -43,6 +42,7 @@ def set_current_task_run_id(task_run_id: str | None) -> None:
|
|
|
43
42
|
|
|
44
43
|
|
|
45
44
|
def buffer_mcp_call(record: BaseMCPCall | dict[str, Any]) -> None:
|
|
45
|
+
"""Buffer an MCP call record for the current trace."""
|
|
46
46
|
task_run_id = get_current_task_run_id()
|
|
47
47
|
|
|
48
48
|
if not task_run_id:
|
|
@@ -51,7 +51,7 @@ def buffer_mcp_call(record: BaseMCPCall | dict[str, Any]) -> None:
|
|
|
51
51
|
)
|
|
52
52
|
return
|
|
53
53
|
|
|
54
|
-
# Ensure 'record' is a Pydantic model instance
|
|
54
|
+
# Ensure 'record' is a Pydantic model instance
|
|
55
55
|
if isinstance(record, dict):
|
|
56
56
|
try:
|
|
57
57
|
record_model = BaseMCPCall.from_dict(record)
|
|
@@ -82,11 +82,8 @@ def flush_buffer(export: bool = False) -> list[BaseMCPCall]:
|
|
|
82
82
|
logger.warning("FLUSH_BUFFER: No current task_run_id. Cannot flush.")
|
|
83
83
|
return []
|
|
84
84
|
|
|
85
|
-
buffer_for_task = _GLOBAL_MCP_CALL_BUFFERS.pop(
|
|
86
|
-
|
|
87
|
-
) # Get and remove the list for this task
|
|
88
|
-
|
|
89
|
-
return buffer_for_task # Return the flushed items
|
|
85
|
+
buffer_for_task = _GLOBAL_MCP_CALL_BUFFERS.pop(task_run_id, [])
|
|
86
|
+
return buffer_for_task
|
|
90
87
|
|
|
91
88
|
|
|
92
89
|
def create_request_record(
|
|
@@ -118,10 +115,13 @@ def create_response_record(
|
|
|
118
115
|
logger.warning("No active task_run_id, response record will not be created")
|
|
119
116
|
raise ValueError("No active task_run_id")
|
|
120
117
|
|
|
118
|
+
# Default to COMPLETED status if not provided
|
|
119
|
+
if "status" not in kwargs:
|
|
120
|
+
kwargs["status"] = StatusType.COMPLETED
|
|
121
|
+
|
|
121
122
|
record = MCPResponseCall(
|
|
122
123
|
task_run_id=task_run_id,
|
|
123
124
|
method=method,
|
|
124
|
-
status=StatusType.COMPLETED,
|
|
125
125
|
related_request_id=related_request_id,
|
|
126
126
|
is_error=is_error,
|
|
127
127
|
**kwargs,
|
|
@@ -149,21 +149,3 @@ def create_notification_record(
|
|
|
149
149
|
)
|
|
150
150
|
buffer_mcp_call(record)
|
|
151
151
|
return record
|
|
152
|
-
|
|
153
|
-
|
|
154
|
-
def create_manual_test_record(**custom_data: Any) -> MCPManualTestCall | None:
|
|
155
|
-
"""Create and buffer a manual test record"""
|
|
156
|
-
task_run_id = get_current_task_run_id()
|
|
157
|
-
if not task_run_id:
|
|
158
|
-
logger.warning("No active task_run_id, manual test record will not be created")
|
|
159
|
-
return None
|
|
160
|
-
|
|
161
|
-
record = MCPManualTestCall.create(task_run_id=task_run_id, **custom_data)
|
|
162
|
-
buffer_mcp_call(record)
|
|
163
|
-
return record
|
|
164
|
-
|
|
165
|
-
|
|
166
|
-
def reset_context() -> None:
|
|
167
|
-
"""Reset all telemetry context variables. Useful for test isolation."""
|
|
168
|
-
set_current_task_run_id(None)
|
|
169
|
-
is_root_trace.set(False)
|
hud/telemetry/exporter.py
CHANGED
|
@@ -6,7 +6,7 @@ import json
|
|
|
6
6
|
import logging
|
|
7
7
|
import threading
|
|
8
8
|
import time
|
|
9
|
-
from datetime import
|
|
9
|
+
from datetime import UTC, datetime # For ISO timestamp conversion
|
|
10
10
|
from typing import TYPE_CHECKING, Any
|
|
11
11
|
|
|
12
12
|
if TYPE_CHECKING:
|
|
@@ -157,7 +157,7 @@ async def export_telemetry(
|
|
|
157
157
|
actual_start_time_float = getattr(mcp_call_model, "start_time", None)
|
|
158
158
|
if actual_start_time_float:
|
|
159
159
|
start_ts_iso = (
|
|
160
|
-
datetime.fromtimestamp(actual_start_time_float,
|
|
160
|
+
datetime.fromtimestamp(actual_start_time_float, UTC)
|
|
161
161
|
.isoformat()
|
|
162
162
|
.replace("+00:00", "Z")
|
|
163
163
|
)
|
|
@@ -170,7 +170,7 @@ async def export_telemetry(
|
|
|
170
170
|
|
|
171
171
|
if effective_end_timestamp_float:
|
|
172
172
|
end_ts_iso = (
|
|
173
|
-
datetime.fromtimestamp(effective_end_timestamp_float,
|
|
173
|
+
datetime.fromtimestamp(effective_end_timestamp_float, UTC)
|
|
174
174
|
.isoformat()
|
|
175
175
|
.replace("+00:00", "Z")
|
|
176
176
|
)
|
|
@@ -375,8 +375,9 @@ def flush(timeout: float = 10.0) -> None:
|
|
|
375
375
|
# This check is racy, but it's the best we can do without more complex inter-thread
|
|
376
376
|
# sync for task completion. Give some time for the task to process the sentinel and
|
|
377
377
|
# clear itself.
|
|
378
|
-
# Max wait for task to clear
|
|
379
|
-
|
|
378
|
+
# Max wait for task to clear - should be longer than EXPORT_INTERVAL to ensure
|
|
379
|
+
# the task has time to wake from sleep and process the sentinel
|
|
380
|
+
attempt_timeout = time.time() + (timeout / 2 if timeout else 2.0) + EXPORT_INTERVAL + 1.0
|
|
380
381
|
while _export_task_async is not None and time.time() < attempt_timeout:
|
|
381
382
|
time.sleep(0.1)
|
|
382
383
|
# _export_task_async is set to None by _process_export_queue_async upon its exit.
|