hud-python 0.3.5__py3-none-any.whl → 0.4.1__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of hud-python might be problematic. Click here for more details.
- hud/__init__.py +22 -89
- hud/agents/__init__.py +15 -0
- hud/agents/art.py +101 -0
- hud/agents/base.py +599 -0
- hud/{mcp → agents}/claude.py +373 -321
- hud/{mcp → agents}/langchain.py +250 -250
- hud/agents/misc/__init__.py +7 -0
- hud/{agent → agents}/misc/response_agent.py +80 -80
- hud/{mcp → agents}/openai.py +352 -334
- hud/agents/openai_chat_generic.py +154 -0
- hud/{mcp → agents}/tests/__init__.py +1 -1
- hud/agents/tests/test_base.py +742 -0
- hud/agents/tests/test_claude.py +324 -0
- hud/{mcp → agents}/tests/test_client.py +363 -324
- hud/{mcp → agents}/tests/test_openai.py +237 -238
- hud/cli/__init__.py +617 -0
- hud/cli/__main__.py +8 -0
- hud/cli/analyze.py +371 -0
- hud/cli/analyze_metadata.py +230 -0
- hud/cli/build.py +427 -0
- hud/cli/clone.py +185 -0
- hud/cli/cursor.py +92 -0
- hud/cli/debug.py +392 -0
- hud/cli/docker_utils.py +83 -0
- hud/cli/init.py +281 -0
- hud/cli/interactive.py +353 -0
- hud/cli/mcp_server.py +756 -0
- hud/cli/pull.py +336 -0
- hud/cli/push.py +370 -0
- hud/cli/remote_runner.py +311 -0
- hud/cli/runner.py +160 -0
- hud/cli/tests/__init__.py +3 -0
- hud/cli/tests/test_analyze.py +284 -0
- hud/cli/tests/test_cli_init.py +265 -0
- hud/cli/tests/test_cli_main.py +27 -0
- hud/cli/tests/test_clone.py +142 -0
- hud/cli/tests/test_cursor.py +253 -0
- hud/cli/tests/test_debug.py +453 -0
- hud/cli/tests/test_mcp_server.py +139 -0
- hud/cli/tests/test_utils.py +388 -0
- hud/cli/utils.py +263 -0
- hud/clients/README.md +143 -0
- hud/clients/__init__.py +16 -0
- hud/clients/base.py +379 -0
- hud/clients/fastmcp.py +222 -0
- hud/clients/mcp_use.py +278 -0
- hud/clients/tests/__init__.py +1 -0
- hud/clients/tests/test_client_integration.py +111 -0
- hud/clients/tests/test_fastmcp.py +342 -0
- hud/clients/tests/test_protocol.py +188 -0
- hud/clients/utils/__init__.py +1 -0
- hud/clients/utils/retry_transport.py +160 -0
- hud/datasets.py +322 -192
- hud/misc/__init__.py +1 -0
- hud/{agent → misc}/claude_plays_pokemon.py +292 -283
- hud/otel/__init__.py +35 -0
- hud/otel/collector.py +142 -0
- hud/otel/config.py +164 -0
- hud/otel/context.py +536 -0
- hud/otel/exporters.py +366 -0
- hud/otel/instrumentation.py +97 -0
- hud/otel/processors.py +118 -0
- hud/otel/tests/__init__.py +1 -0
- hud/otel/tests/test_processors.py +197 -0
- hud/server/__init__.py +5 -5
- hud/server/context.py +114 -0
- hud/server/helper/__init__.py +5 -0
- hud/server/low_level.py +132 -0
- hud/server/server.py +166 -0
- hud/server/tests/__init__.py +3 -0
- hud/settings.py +73 -79
- hud/shared/__init__.py +5 -0
- hud/{exceptions.py → shared/exceptions.py} +180 -180
- hud/{server → shared}/requests.py +264 -264
- hud/shared/tests/test_exceptions.py +157 -0
- hud/{server → shared}/tests/test_requests.py +275 -275
- hud/telemetry/__init__.py +25 -30
- hud/telemetry/instrument.py +379 -0
- hud/telemetry/job.py +309 -141
- hud/telemetry/replay.py +74 -0
- hud/telemetry/trace.py +83 -0
- hud/tools/__init__.py +33 -34
- hud/tools/base.py +365 -65
- hud/tools/bash.py +161 -137
- hud/tools/computer/__init__.py +15 -13
- hud/tools/computer/anthropic.py +437 -420
- hud/tools/computer/hud.py +376 -334
- hud/tools/computer/openai.py +295 -292
- hud/tools/computer/settings.py +82 -0
- hud/tools/edit.py +314 -290
- hud/tools/executors/__init__.py +30 -30
- hud/tools/executors/base.py +539 -532
- hud/tools/executors/pyautogui.py +621 -619
- hud/tools/executors/tests/__init__.py +1 -1
- hud/tools/executors/tests/test_base_executor.py +338 -338
- hud/tools/executors/tests/test_pyautogui_executor.py +165 -165
- hud/tools/executors/xdo.py +511 -503
- hud/tools/{playwright_tool.py → playwright.py} +412 -379
- hud/tools/tests/__init__.py +3 -3
- hud/tools/tests/test_base.py +282 -0
- hud/tools/tests/test_bash.py +158 -152
- hud/tools/tests/test_bash_extended.py +197 -0
- hud/tools/tests/test_computer.py +425 -52
- hud/tools/tests/test_computer_actions.py +34 -34
- hud/tools/tests/test_edit.py +259 -240
- hud/tools/tests/test_init.py +27 -27
- hud/tools/tests/test_playwright_tool.py +183 -183
- hud/tools/tests/test_tools.py +145 -157
- hud/tools/tests/test_utils.py +156 -156
- hud/tools/types.py +72 -0
- hud/tools/utils.py +50 -50
- hud/types.py +136 -89
- hud/utils/__init__.py +10 -16
- hud/utils/async_utils.py +65 -0
- hud/utils/design.py +168 -0
- hud/utils/mcp.py +55 -0
- hud/utils/progress.py +149 -149
- hud/utils/telemetry.py +66 -66
- hud/utils/tests/test_async_utils.py +173 -0
- hud/utils/tests/test_init.py +17 -21
- hud/utils/tests/test_progress.py +261 -225
- hud/utils/tests/test_telemetry.py +82 -37
- hud/utils/tests/test_version.py +8 -8
- hud/version.py +7 -7
- hud_python-0.4.1.dist-info/METADATA +476 -0
- hud_python-0.4.1.dist-info/RECORD +132 -0
- hud_python-0.4.1.dist-info/entry_points.txt +3 -0
- {hud_python-0.3.5.dist-info → hud_python-0.4.1.dist-info}/licenses/LICENSE +21 -21
- hud/adapters/__init__.py +0 -8
- hud/adapters/claude/__init__.py +0 -5
- hud/adapters/claude/adapter.py +0 -180
- hud/adapters/claude/tests/__init__.py +0 -1
- hud/adapters/claude/tests/test_adapter.py +0 -519
- hud/adapters/common/__init__.py +0 -6
- hud/adapters/common/adapter.py +0 -178
- hud/adapters/common/tests/test_adapter.py +0 -289
- hud/adapters/common/types.py +0 -446
- hud/adapters/operator/__init__.py +0 -5
- hud/adapters/operator/adapter.py +0 -108
- hud/adapters/operator/tests/__init__.py +0 -1
- hud/adapters/operator/tests/test_adapter.py +0 -370
- hud/agent/__init__.py +0 -19
- hud/agent/base.py +0 -126
- hud/agent/claude.py +0 -271
- hud/agent/langchain.py +0 -215
- hud/agent/misc/__init__.py +0 -3
- hud/agent/operator.py +0 -268
- hud/agent/tests/__init__.py +0 -1
- hud/agent/tests/test_base.py +0 -202
- hud/env/__init__.py +0 -11
- hud/env/client.py +0 -35
- hud/env/docker_client.py +0 -349
- hud/env/environment.py +0 -446
- hud/env/local_docker_client.py +0 -358
- hud/env/remote_client.py +0 -212
- hud/env/remote_docker_client.py +0 -292
- hud/gym.py +0 -130
- hud/job.py +0 -773
- hud/mcp/__init__.py +0 -17
- hud/mcp/base.py +0 -631
- hud/mcp/client.py +0 -312
- hud/mcp/tests/test_base.py +0 -512
- hud/mcp/tests/test_claude.py +0 -294
- hud/task.py +0 -149
- hud/taskset.py +0 -237
- hud/telemetry/_trace.py +0 -347
- hud/telemetry/context.py +0 -230
- hud/telemetry/exporter.py +0 -575
- hud/telemetry/instrumentation/__init__.py +0 -3
- hud/telemetry/instrumentation/mcp.py +0 -259
- hud/telemetry/instrumentation/registry.py +0 -59
- hud/telemetry/mcp_models.py +0 -270
- hud/telemetry/tests/__init__.py +0 -1
- hud/telemetry/tests/test_context.py +0 -210
- hud/telemetry/tests/test_trace.py +0 -312
- hud/tools/helper/README.md +0 -56
- hud/tools/helper/__init__.py +0 -9
- hud/tools/helper/mcp_server.py +0 -78
- hud/tools/helper/server_initialization.py +0 -115
- hud/tools/helper/utils.py +0 -58
- hud/trajectory.py +0 -94
- hud/utils/agent.py +0 -37
- hud/utils/common.py +0 -256
- hud/utils/config.py +0 -120
- hud/utils/deprecation.py +0 -115
- hud/utils/misc.py +0 -53
- hud/utils/tests/test_common.py +0 -277
- hud/utils/tests/test_config.py +0 -129
- hud_python-0.3.5.dist-info/METADATA +0 -284
- hud_python-0.3.5.dist-info/RECORD +0 -120
- /hud/{adapters/common → shared}/tests/__init__.py +0 -0
- {hud_python-0.3.5.dist-info → hud_python-0.4.1.dist-info}/WHEEL +0 -0
hud/tools/types.py
ADDED
|
@@ -0,0 +1,72 @@
|
|
|
1
|
+
from __future__ import annotations
|
|
2
|
+
|
|
3
|
+
from typing import Any
|
|
4
|
+
|
|
5
|
+
from mcp.types import ContentBlock, ImageContent, TextContent
|
|
6
|
+
from pydantic import BaseModel, ConfigDict, Field
|
|
7
|
+
|
|
8
|
+
|
|
9
|
+
class EvaluationResult(BaseModel):
|
|
10
|
+
"""Standard evaluation result format."""
|
|
11
|
+
|
|
12
|
+
reward: float = Field(default=0.0, description="Usually a value between 0.0 and 1.0")
|
|
13
|
+
done: bool = Field(default=False, description="Whether the task/episode is complete")
|
|
14
|
+
content: str | None = Field(default=None, description="Additional information")
|
|
15
|
+
info: dict[str, Any] = Field(default_factory=dict, description="Additional information")
|
|
16
|
+
isError: bool = Field(default=False, description="Whether the evaluation failed")
|
|
17
|
+
|
|
18
|
+
model_config = ConfigDict(extra="allow")
|
|
19
|
+
|
|
20
|
+
|
|
21
|
+
class ContentResult(BaseModel):
|
|
22
|
+
"""Represents the intermediate result of a tool execution.
|
|
23
|
+
|
|
24
|
+
Often useful for tools that need to return multiple types of content.
|
|
25
|
+
"""
|
|
26
|
+
|
|
27
|
+
output: str | None = Field(default=None, description="Output text")
|
|
28
|
+
error: str | None = Field(default=None, description="Error message")
|
|
29
|
+
base64_image: str | None = Field(default=None, description="Base64-encoded image")
|
|
30
|
+
system: str | None = Field(default=None, description="System message")
|
|
31
|
+
|
|
32
|
+
def __add__(self, other: ContentResult) -> ContentResult:
|
|
33
|
+
def combine_fields(
|
|
34
|
+
field: str | None, other_field: str | None, concatenate: bool = True
|
|
35
|
+
) -> str | None:
|
|
36
|
+
if field and other_field:
|
|
37
|
+
if concatenate:
|
|
38
|
+
return field + other_field
|
|
39
|
+
raise ValueError("Cannot combine tool results")
|
|
40
|
+
return field or other_field
|
|
41
|
+
|
|
42
|
+
return ContentResult(
|
|
43
|
+
output=combine_fields(self.output, other.output),
|
|
44
|
+
error=combine_fields(self.error, other.error),
|
|
45
|
+
base64_image=combine_fields(self.base64_image, other.base64_image, False),
|
|
46
|
+
system=combine_fields(self.system, other.system),
|
|
47
|
+
)
|
|
48
|
+
|
|
49
|
+
def to_content_blocks(self) -> list[ContentBlock]:
|
|
50
|
+
"""Helper method to convert ContentResult to content blocks.
|
|
51
|
+
|
|
52
|
+
Subclasses can use this when they work with ContentResult internally.
|
|
53
|
+
|
|
54
|
+
Args:
|
|
55
|
+
result: ContentResult to convert
|
|
56
|
+
|
|
57
|
+
Returns:
|
|
58
|
+
List of ContentBlock
|
|
59
|
+
"""
|
|
60
|
+
blocks: list[ContentBlock] = []
|
|
61
|
+
|
|
62
|
+
if self.output:
|
|
63
|
+
blocks.append(TextContent(text=self.output, type="text"))
|
|
64
|
+
if self.error:
|
|
65
|
+
blocks.append(TextContent(text=self.error, type="text"))
|
|
66
|
+
if self.base64_image:
|
|
67
|
+
blocks.append(ImageContent(data=self.base64_image, mimeType="image/png", type="image"))
|
|
68
|
+
return blocks
|
|
69
|
+
|
|
70
|
+
|
|
71
|
+
class ToolError(Exception):
|
|
72
|
+
"""An error raised by a tool."""
|
hud/tools/utils.py
CHANGED
|
@@ -1,50 +1,50 @@
|
|
|
1
|
-
from __future__ import annotations
|
|
2
|
-
|
|
3
|
-
import asyncio
|
|
4
|
-
import subprocess
|
|
5
|
-
|
|
6
|
-
# Default timeout for running commands
|
|
7
|
-
DEFAULT_TIMEOUT = 10.0
|
|
8
|
-
|
|
9
|
-
|
|
10
|
-
async def run(
|
|
11
|
-
command: str | list[str],
|
|
12
|
-
input: str | None = None,
|
|
13
|
-
timeout: float | None = DEFAULT_TIMEOUT, # noqa: ASYNC109
|
|
14
|
-
) -> tuple[int, str, str]:
|
|
15
|
-
"""
|
|
16
|
-
Run a command asynchronously and return the result.
|
|
17
|
-
|
|
18
|
-
Args:
|
|
19
|
-
command: Command to run (string or list of strings)
|
|
20
|
-
input: Optional input to send to stdin
|
|
21
|
-
timeout: Timeout in seconds
|
|
22
|
-
|
|
23
|
-
Returns:
|
|
24
|
-
Tuple of (return_code, stdout, stderr)
|
|
25
|
-
"""
|
|
26
|
-
if isinstance(command, str):
|
|
27
|
-
proc = await asyncio.create_subprocess_shell(
|
|
28
|
-
command,
|
|
29
|
-
stdin=subprocess.PIPE if input else None,
|
|
30
|
-
stdout=subprocess.PIPE,
|
|
31
|
-
stderr=subprocess.PIPE,
|
|
32
|
-
)
|
|
33
|
-
else:
|
|
34
|
-
proc = await asyncio.create_subprocess_exec(
|
|
35
|
-
*command,
|
|
36
|
-
stdin=subprocess.PIPE if input else None,
|
|
37
|
-
stdout=subprocess.PIPE,
|
|
38
|
-
stderr=subprocess.PIPE,
|
|
39
|
-
)
|
|
40
|
-
|
|
41
|
-
stdout, stderr = await asyncio.wait_for(
|
|
42
|
-
proc.communicate(input=input.encode() if input else None), timeout=timeout
|
|
43
|
-
)
|
|
44
|
-
|
|
45
|
-
return proc.returncode or 0, stdout.decode(), stderr.decode()
|
|
46
|
-
|
|
47
|
-
|
|
48
|
-
def maybe_truncate(text: str, max_length: int = 2048 * 10) -> str:
|
|
49
|
-
"""Truncate output if too long."""
|
|
50
|
-
return text if len(text) <= max_length else text[:max_length] + "... (truncated)"
|
|
1
|
+
from __future__ import annotations
|
|
2
|
+
|
|
3
|
+
import asyncio
|
|
4
|
+
import subprocess
|
|
5
|
+
|
|
6
|
+
# Default timeout for running commands
|
|
7
|
+
DEFAULT_TIMEOUT = 10.0
|
|
8
|
+
|
|
9
|
+
|
|
10
|
+
async def run(
|
|
11
|
+
command: str | list[str],
|
|
12
|
+
input: str | None = None,
|
|
13
|
+
timeout: float | None = DEFAULT_TIMEOUT, # noqa: ASYNC109
|
|
14
|
+
) -> tuple[int, str, str]:
|
|
15
|
+
"""
|
|
16
|
+
Run a command asynchronously and return the result.
|
|
17
|
+
|
|
18
|
+
Args:
|
|
19
|
+
command: Command to run (string or list of strings)
|
|
20
|
+
input: Optional input to send to stdin
|
|
21
|
+
timeout: Timeout in seconds
|
|
22
|
+
|
|
23
|
+
Returns:
|
|
24
|
+
Tuple of (return_code, stdout, stderr)
|
|
25
|
+
"""
|
|
26
|
+
if isinstance(command, str):
|
|
27
|
+
proc = await asyncio.create_subprocess_shell(
|
|
28
|
+
command,
|
|
29
|
+
stdin=subprocess.PIPE if input else None,
|
|
30
|
+
stdout=subprocess.PIPE,
|
|
31
|
+
stderr=subprocess.PIPE,
|
|
32
|
+
)
|
|
33
|
+
else:
|
|
34
|
+
proc = await asyncio.create_subprocess_exec(
|
|
35
|
+
*command,
|
|
36
|
+
stdin=subprocess.PIPE if input else None,
|
|
37
|
+
stdout=subprocess.PIPE,
|
|
38
|
+
stderr=subprocess.PIPE,
|
|
39
|
+
)
|
|
40
|
+
|
|
41
|
+
stdout, stderr = await asyncio.wait_for(
|
|
42
|
+
proc.communicate(input=input.encode() if input else None), timeout=timeout
|
|
43
|
+
)
|
|
44
|
+
|
|
45
|
+
return proc.returncode or 0, stdout.decode(), stderr.decode()
|
|
46
|
+
|
|
47
|
+
|
|
48
|
+
def maybe_truncate(text: str, max_length: int = 2048 * 10) -> str:
|
|
49
|
+
"""Truncate output if too long."""
|
|
50
|
+
return text if len(text) <= max_length else text[:max_length] + "... (truncated)"
|
hud/types.py
CHANGED
|
@@ -1,89 +1,136 @@
|
|
|
1
|
-
from __future__ import annotations
|
|
2
|
-
|
|
3
|
-
import
|
|
4
|
-
from
|
|
5
|
-
|
|
6
|
-
|
|
7
|
-
from pydantic import BaseModel
|
|
8
|
-
|
|
9
|
-
|
|
10
|
-
class
|
|
11
|
-
"""
|
|
12
|
-
|
|
13
|
-
|
|
14
|
-
|
|
15
|
-
|
|
16
|
-
|
|
17
|
-
|
|
18
|
-
|
|
19
|
-
|
|
20
|
-
|
|
21
|
-
|
|
22
|
-
|
|
23
|
-
|
|
24
|
-
|
|
25
|
-
|
|
26
|
-
|
|
27
|
-
|
|
28
|
-
|
|
29
|
-
|
|
30
|
-
|
|
31
|
-
|
|
32
|
-
|
|
33
|
-
|
|
34
|
-
|
|
35
|
-
|
|
36
|
-
|
|
37
|
-
|
|
38
|
-
|
|
39
|
-
|
|
40
|
-
|
|
41
|
-
|
|
42
|
-
|
|
43
|
-
|
|
44
|
-
|
|
45
|
-
|
|
46
|
-
|
|
47
|
-
|
|
48
|
-
|
|
49
|
-
|
|
50
|
-
|
|
51
|
-
|
|
52
|
-
|
|
53
|
-
|
|
54
|
-
|
|
55
|
-
|
|
56
|
-
|
|
57
|
-
|
|
58
|
-
|
|
59
|
-
|
|
60
|
-
|
|
61
|
-
|
|
62
|
-
|
|
63
|
-
|
|
64
|
-
|
|
65
|
-
|
|
66
|
-
|
|
67
|
-
|
|
68
|
-
|
|
69
|
-
|
|
70
|
-
|
|
71
|
-
#
|
|
72
|
-
|
|
73
|
-
|
|
74
|
-
|
|
75
|
-
|
|
76
|
-
|
|
77
|
-
|
|
78
|
-
#
|
|
79
|
-
|
|
80
|
-
|
|
81
|
-
|
|
82
|
-
#
|
|
83
|
-
|
|
84
|
-
|
|
85
|
-
#
|
|
86
|
-
|
|
87
|
-
|
|
88
|
-
|
|
89
|
-
|
|
1
|
+
from __future__ import annotations
|
|
2
|
+
|
|
3
|
+
import uuid
|
|
4
|
+
from typing import Any, Literal
|
|
5
|
+
|
|
6
|
+
from mcp.types import CallToolRequestParams, CallToolResult
|
|
7
|
+
from pydantic import BaseModel, ConfigDict, Field
|
|
8
|
+
|
|
9
|
+
|
|
10
|
+
class MCPToolCall(CallToolRequestParams):
|
|
11
|
+
"""A tool call."""
|
|
12
|
+
|
|
13
|
+
id: str = Field(default_factory=lambda: str(uuid.uuid4())) # Unique identifier for reference
|
|
14
|
+
|
|
15
|
+
def __str__(self) -> str:
|
|
16
|
+
response = f"Tool: {self.name}"
|
|
17
|
+
if self.arguments:
|
|
18
|
+
response += f"\nArguments: {self.arguments}"
|
|
19
|
+
return response
|
|
20
|
+
|
|
21
|
+
|
|
22
|
+
class MCPToolResult(CallToolResult):
|
|
23
|
+
"""A tool result."""
|
|
24
|
+
|
|
25
|
+
def __str__(self) -> str:
|
|
26
|
+
response = f"Content: {self.content}"
|
|
27
|
+
if self.structuredContent:
|
|
28
|
+
response += f"\nStructured Content: {self.structuredContent}"
|
|
29
|
+
if self.isError:
|
|
30
|
+
response += f"\nError: {self.isError}"
|
|
31
|
+
return response
|
|
32
|
+
|
|
33
|
+
|
|
34
|
+
class AgentResponse(BaseModel):
|
|
35
|
+
"""A model response in the conversation."""
|
|
36
|
+
|
|
37
|
+
# --- FUNCTIONAL ---
|
|
38
|
+
tool_calls: list[MCPToolCall] = Field(default_factory=list)
|
|
39
|
+
done: bool = Field(default=False)
|
|
40
|
+
|
|
41
|
+
# --- TELEMETRY [app.hud.so] ---
|
|
42
|
+
# Responses
|
|
43
|
+
content: str | None = Field(default=None)
|
|
44
|
+
reasoning: str | None = Field(default=None)
|
|
45
|
+
info: dict[str, Any] = Field(default_factory=dict)
|
|
46
|
+
isError: bool = Field(default=False)
|
|
47
|
+
raw: Any | None = Field(default=None) # Include raw response for access to Choice objects
|
|
48
|
+
|
|
49
|
+
# Timestamps
|
|
50
|
+
start_timestamp: str | None = None
|
|
51
|
+
end_timestamp: str | None = None
|
|
52
|
+
|
|
53
|
+
def __str__(self) -> str:
|
|
54
|
+
response = ""
|
|
55
|
+
if self.reasoning:
|
|
56
|
+
response += f"Reasoning: {self.reasoning}\n"
|
|
57
|
+
if self.content:
|
|
58
|
+
response += f"Content: {self.content}\n"
|
|
59
|
+
if self.tool_calls:
|
|
60
|
+
response += f"""Tool Calls: {
|
|
61
|
+
", ".join([f"{tc.name}: {tc.arguments}" for tc in self.tool_calls])
|
|
62
|
+
}"""
|
|
63
|
+
if self.raw:
|
|
64
|
+
response += f"Raw: {self.raw}"
|
|
65
|
+
return response
|
|
66
|
+
|
|
67
|
+
|
|
68
|
+
class TraceStep(BaseModel):
|
|
69
|
+
"""Canonical data for a single span (shared with telemetry)."""
|
|
70
|
+
|
|
71
|
+
# HUD identifiers
|
|
72
|
+
task_run_id: str | None = Field(default=None)
|
|
73
|
+
job_id: str | None = Field(default=None)
|
|
74
|
+
|
|
75
|
+
# Span category - can be any string, but "mcp" and "agent" are privileged on the platform
|
|
76
|
+
category: Literal["mcp", "agent"] | str = Field(default="mcp") # noqa: PYI051
|
|
77
|
+
|
|
78
|
+
# Generic I/O fields - works for any category
|
|
79
|
+
request: Any | None = None
|
|
80
|
+
result: Any | None = None
|
|
81
|
+
|
|
82
|
+
# Generic span info
|
|
83
|
+
type: str = Field(default="CLIENT")
|
|
84
|
+
|
|
85
|
+
# Timestamps (optional, for local tracking)
|
|
86
|
+
start_timestamp: str | None = None
|
|
87
|
+
end_timestamp: str | None = None
|
|
88
|
+
|
|
89
|
+
model_config = ConfigDict(populate_by_name=True, extra="allow")
|
|
90
|
+
|
|
91
|
+
|
|
92
|
+
class Trace(BaseModel):
|
|
93
|
+
"""Unified result from agent execution (task or prompt).
|
|
94
|
+
|
|
95
|
+
Fields:
|
|
96
|
+
- done: Whether the run is complete
|
|
97
|
+
- reward: The reward for the run
|
|
98
|
+
- info: Additional metadata for the run
|
|
99
|
+
- content: The final content/response from the agent
|
|
100
|
+
- isError: Whether the execution resulted in an error
|
|
101
|
+
- trace: The steps taken in the run (empty if not tracing)
|
|
102
|
+
"""
|
|
103
|
+
|
|
104
|
+
done: bool = Field(default=True)
|
|
105
|
+
reward: float = Field(default=0.0)
|
|
106
|
+
info: dict[str, Any] = Field(default_factory=dict)
|
|
107
|
+
content: str | None = Field(default=None)
|
|
108
|
+
isError: bool = Field(default=False)
|
|
109
|
+
trace: list[TraceStep] = Field(default_factory=list)
|
|
110
|
+
|
|
111
|
+
def append(self, step: TraceStep) -> None:
|
|
112
|
+
self.trace.append(step)
|
|
113
|
+
|
|
114
|
+
def populate_from_context(self) -> None:
|
|
115
|
+
"""Populate trace steps from the current trace context if available.
|
|
116
|
+
|
|
117
|
+
This checks if we're executing within a hud.trace() context and
|
|
118
|
+
automatically populates the trace field with collected steps.
|
|
119
|
+
"""
|
|
120
|
+
from hud.otel.context import get_current_task_run_id
|
|
121
|
+
from hud.telemetry.replay import get_trace
|
|
122
|
+
|
|
123
|
+
task_run_id = get_current_task_run_id()
|
|
124
|
+
if task_run_id:
|
|
125
|
+
collected_trace = get_trace(task_run_id)
|
|
126
|
+
if collected_trace:
|
|
127
|
+
self.trace = collected_trace.trace
|
|
128
|
+
|
|
129
|
+
|
|
130
|
+
__all__ = [
|
|
131
|
+
"AgentResponse",
|
|
132
|
+
"MCPToolCall",
|
|
133
|
+
"MCPToolResult",
|
|
134
|
+
"Trace",
|
|
135
|
+
"TraceStep",
|
|
136
|
+
]
|
hud/utils/__init__.py
CHANGED
|
@@ -1,16 +1,10 @@
|
|
|
1
|
-
from __future__ import annotations
|
|
2
|
-
|
|
3
|
-
from .
|
|
4
|
-
from .
|
|
5
|
-
|
|
6
|
-
|
|
7
|
-
|
|
8
|
-
|
|
9
|
-
"
|
|
10
|
-
|
|
11
|
-
"FunctionConfigs",
|
|
12
|
-
"deprecated",
|
|
13
|
-
"emit_deprecation_warning",
|
|
14
|
-
"expand_config",
|
|
15
|
-
"stream",
|
|
16
|
-
]
|
|
1
|
+
from __future__ import annotations
|
|
2
|
+
|
|
3
|
+
from .design import HUDDesign, design
|
|
4
|
+
from .telemetry import stream
|
|
5
|
+
|
|
6
|
+
__all__ = [
|
|
7
|
+
"HUDDesign",
|
|
8
|
+
"design",
|
|
9
|
+
"stream",
|
|
10
|
+
]
|
hud/utils/async_utils.py
ADDED
|
@@ -0,0 +1,65 @@
|
|
|
1
|
+
"""Async utilities for HUD SDK.
|
|
2
|
+
|
|
3
|
+
This module provides utilities for running async code in various environments,
|
|
4
|
+
including Jupyter notebooks and synchronous contexts.
|
|
5
|
+
"""
|
|
6
|
+
|
|
7
|
+
from __future__ import annotations
|
|
8
|
+
|
|
9
|
+
import asyncio
|
|
10
|
+
import logging
|
|
11
|
+
import threading
|
|
12
|
+
from typing import TYPE_CHECKING, Any
|
|
13
|
+
|
|
14
|
+
if TYPE_CHECKING:
|
|
15
|
+
from collections.abc import Coroutine
|
|
16
|
+
|
|
17
|
+
logger = logging.getLogger(__name__)
|
|
18
|
+
|
|
19
|
+
|
|
20
|
+
def fire_and_forget(coro: Coroutine[Any, Any, Any], description: str = "task") -> None:
|
|
21
|
+
"""Execute a coroutine in a fire-and-forget manner.
|
|
22
|
+
|
|
23
|
+
This function handles running async code in various contexts:
|
|
24
|
+
- When an event loop is already running (normal async context)
|
|
25
|
+
- When no event loop exists (sync context, some Jupyter setups)
|
|
26
|
+
- Gracefully handles interpreter shutdown
|
|
27
|
+
|
|
28
|
+
Args:
|
|
29
|
+
coro: The coroutine to execute
|
|
30
|
+
description: Description of the task for logging (e.g., "update job status")
|
|
31
|
+
|
|
32
|
+
Example:
|
|
33
|
+
fire_and_forget(
|
|
34
|
+
some_async_function(),
|
|
35
|
+
description="update status"
|
|
36
|
+
)
|
|
37
|
+
"""
|
|
38
|
+
try:
|
|
39
|
+
# Try to get current event loop
|
|
40
|
+
loop = asyncio.get_running_loop()
|
|
41
|
+
# Schedule the coroutine
|
|
42
|
+
task = loop.create_task(coro)
|
|
43
|
+
# Add error handler to prevent unhandled exceptions
|
|
44
|
+
task.add_done_callback(lambda t: t.exception() if not t.cancelled() else None)
|
|
45
|
+
except RuntimeError:
|
|
46
|
+
# No running event loop (e.g., Jupyter without %autoawait, sync context)
|
|
47
|
+
try:
|
|
48
|
+
# Try to run in a thread as a fallback
|
|
49
|
+
def run_in_thread() -> None:
|
|
50
|
+
loop = asyncio.new_event_loop()
|
|
51
|
+
asyncio.set_event_loop(loop)
|
|
52
|
+
try:
|
|
53
|
+
loop.run_until_complete(coro)
|
|
54
|
+
except Exception as e:
|
|
55
|
+
# Suppress warnings about interpreter shutdown
|
|
56
|
+
if "interpreter shutdown" not in str(e):
|
|
57
|
+
logger.debug("Error in threaded %s: %s", description, e)
|
|
58
|
+
|
|
59
|
+
thread = threading.Thread(target=run_in_thread, daemon=True)
|
|
60
|
+
thread.start()
|
|
61
|
+
except Exception as e:
|
|
62
|
+
# If that fails too, just log and continue
|
|
63
|
+
# Special case: suppress "cannot schedule new futures after interpreter shutdown"
|
|
64
|
+
if "interpreter shutdown" not in str(e):
|
|
65
|
+
logger.debug("Could not %s - no event loop available: %s", description, e)
|
hud/utils/design.py
ADDED
|
@@ -0,0 +1,168 @@
|
|
|
1
|
+
"""HUD Design System - Consistent styling utilities for CLI output.
|
|
2
|
+
|
|
3
|
+
This module provides a unified design system for HUD CLI commands,
|
|
4
|
+
ensuring consistent colors, formatting, and visual hierarchy across
|
|
5
|
+
all commands.
|
|
6
|
+
|
|
7
|
+
Color Palette:
|
|
8
|
+
- Gold (#c0960c): Primary brand color for headers and important elements
|
|
9
|
+
- Black: Standard text and underlined links
|
|
10
|
+
- Red: Errors and failures
|
|
11
|
+
- Green: Success messages
|
|
12
|
+
- Dim/Gray: Secondary information
|
|
13
|
+
"""
|
|
14
|
+
|
|
15
|
+
from __future__ import annotations
|
|
16
|
+
|
|
17
|
+
from rich.console import Console
|
|
18
|
+
from rich.panel import Panel
|
|
19
|
+
from rich.table import Table
|
|
20
|
+
|
|
21
|
+
# HUD Brand Colors
|
|
22
|
+
GOLD = "rgb(192,150,12)" # #c0960c
|
|
23
|
+
RED = "red"
|
|
24
|
+
GREEN = "green"
|
|
25
|
+
DIM = "dim"
|
|
26
|
+
|
|
27
|
+
|
|
28
|
+
class HUDDesign:
|
|
29
|
+
"""Design system for HUD CLI output."""
|
|
30
|
+
|
|
31
|
+
def __init__(self, console: Console | None = None, stderr: bool = False) -> None:
|
|
32
|
+
"""Initialize the design system.
|
|
33
|
+
|
|
34
|
+
Args:
|
|
35
|
+
console: Rich console instance. Creates new one if not provided.
|
|
36
|
+
stderr: If True, output to stderr instead of stdout.
|
|
37
|
+
"""
|
|
38
|
+
self.console = console or Console(stderr=stderr)
|
|
39
|
+
|
|
40
|
+
def header(self, title: str, icon: str = "🚀") -> None:
|
|
41
|
+
"""Print a header panel with gold border.
|
|
42
|
+
|
|
43
|
+
Args:
|
|
44
|
+
title: The title text
|
|
45
|
+
icon: Optional emoji icon
|
|
46
|
+
"""
|
|
47
|
+
self.console.print(Panel.fit(f"{icon} [bold]{title}[/bold]", border_style=GOLD))
|
|
48
|
+
|
|
49
|
+
def section_title(self, title: str) -> None:
|
|
50
|
+
"""Print a section title in gold.
|
|
51
|
+
|
|
52
|
+
Args:
|
|
53
|
+
title: The section title
|
|
54
|
+
"""
|
|
55
|
+
self.console.print(f"\n[bold {GOLD}]{title}[/bold {GOLD}]")
|
|
56
|
+
|
|
57
|
+
def success(self, message: str) -> None:
|
|
58
|
+
"""Print a success message.
|
|
59
|
+
|
|
60
|
+
Args:
|
|
61
|
+
message: The success message
|
|
62
|
+
"""
|
|
63
|
+
self.console.print(f"[{GREEN} not bold]✅ {message}[/{GREEN} not bold]")
|
|
64
|
+
|
|
65
|
+
def error(self, message: str) -> None:
|
|
66
|
+
"""Print an error message.
|
|
67
|
+
|
|
68
|
+
Args:
|
|
69
|
+
message: The error message
|
|
70
|
+
"""
|
|
71
|
+
self.console.print(f"[{RED} not bold]❌ {message}[/{RED} not bold]")
|
|
72
|
+
|
|
73
|
+
def warning(self, message: str) -> None:
|
|
74
|
+
"""Print a warning message.
|
|
75
|
+
|
|
76
|
+
Args:
|
|
77
|
+
message: The warning message
|
|
78
|
+
"""
|
|
79
|
+
self.console.print(f"[yellow]⚠️ {message}[/yellow]")
|
|
80
|
+
|
|
81
|
+
def info(self, message: str) -> None:
|
|
82
|
+
"""Print an info message.
|
|
83
|
+
|
|
84
|
+
Args:
|
|
85
|
+
message: The info message
|
|
86
|
+
"""
|
|
87
|
+
self.console.print(f"[default not bold]{message}[/default not bold]")
|
|
88
|
+
|
|
89
|
+
def dim_info(self, label: str, value: str) -> None:
|
|
90
|
+
"""Print dimmed info with a label.
|
|
91
|
+
|
|
92
|
+
Args:
|
|
93
|
+
label: The label text
|
|
94
|
+
value: The value text
|
|
95
|
+
"""
|
|
96
|
+
self.console.print(f"[{DIM}]{label}[/{DIM}] [default]{value}[/default]")
|
|
97
|
+
|
|
98
|
+
def link(self, url: str) -> None:
|
|
99
|
+
"""Print an underlined link.
|
|
100
|
+
|
|
101
|
+
Args:
|
|
102
|
+
url: The URL to display
|
|
103
|
+
"""
|
|
104
|
+
self.console.print(f"[default not bold underline]{url}[/default not bold underline]")
|
|
105
|
+
|
|
106
|
+
def json_config(self, json_str: str) -> None:
|
|
107
|
+
"""Print JSON configuration with light theme.
|
|
108
|
+
|
|
109
|
+
Args:
|
|
110
|
+
json_str: JSON string to display
|
|
111
|
+
"""
|
|
112
|
+
# Just print the JSON as plain text to avoid any syntax coloring
|
|
113
|
+
self.console.print(f"[default not bold]{json_str}[/default not bold]")
|
|
114
|
+
|
|
115
|
+
def key_value_table(self, data: dict[str, str], show_header: bool = False) -> None:
|
|
116
|
+
"""Print a key-value table.
|
|
117
|
+
|
|
118
|
+
Args:
|
|
119
|
+
data: Dictionary of key-value pairs
|
|
120
|
+
show_header: Whether to show table header
|
|
121
|
+
"""
|
|
122
|
+
table = Table(show_header=show_header, box=None, padding=(0, 1))
|
|
123
|
+
table.add_column("Key", style=DIM, no_wrap=True)
|
|
124
|
+
table.add_column("Value")
|
|
125
|
+
|
|
126
|
+
for key, value in data.items():
|
|
127
|
+
table.add_row(key, value)
|
|
128
|
+
|
|
129
|
+
self.console.print(table)
|
|
130
|
+
|
|
131
|
+
def progress_message(self, message: str) -> None:
|
|
132
|
+
"""Print a progress message.
|
|
133
|
+
|
|
134
|
+
Args:
|
|
135
|
+
message: The progress message
|
|
136
|
+
"""
|
|
137
|
+
self.console.print(f"[{DIM} not bold]{message}[/{DIM} not bold]")
|
|
138
|
+
|
|
139
|
+
def phase(self, phase_num: int, title: str) -> None:
|
|
140
|
+
"""Print a phase header (for debug command).
|
|
141
|
+
|
|
142
|
+
Args:
|
|
143
|
+
phase_num: Phase number
|
|
144
|
+
title: Phase title
|
|
145
|
+
"""
|
|
146
|
+
self.console.print(f"\n{'=' * 80}", style=GOLD)
|
|
147
|
+
self.console.print(f"[bold {GOLD}]PHASE {phase_num}: {title}[/bold {GOLD}]")
|
|
148
|
+
self.console.print(f"{'=' * 80}", style=GOLD)
|
|
149
|
+
|
|
150
|
+
def command(self, cmd: list[str]) -> None:
|
|
151
|
+
"""Print a command being executed.
|
|
152
|
+
|
|
153
|
+
Args:
|
|
154
|
+
cmd: Command parts as list
|
|
155
|
+
"""
|
|
156
|
+
self.console.print(f"[bold]$ {' '.join(cmd)}[/bold]")
|
|
157
|
+
|
|
158
|
+
def hint(self, hint: str) -> None:
|
|
159
|
+
"""Print a hint message.
|
|
160
|
+
|
|
161
|
+
Args:
|
|
162
|
+
hint: The hint text
|
|
163
|
+
"""
|
|
164
|
+
self.console.print(f"\n[yellow]💡 Hint: {hint}[/yellow]")
|
|
165
|
+
|
|
166
|
+
|
|
167
|
+
# Global design instance for convenience
|
|
168
|
+
design = HUDDesign()
|