hud-python 0.3.0__py3-none-any.whl → 0.3.2__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of hud-python might be problematic. Click here for more details.
- hud/__init__.py +7 -4
- hud/adapters/common/adapter.py +14 -3
- hud/adapters/common/tests/test_adapter.py +16 -4
- hud/datasets.py +188 -0
- hud/env/docker_client.py +14 -2
- hud/env/local_docker_client.py +28 -6
- hud/gym.py +0 -9
- hud/{mcp_agent → mcp}/__init__.py +2 -0
- hud/mcp/base.py +631 -0
- hud/{mcp_agent → mcp}/claude.py +52 -47
- hud/mcp/client.py +312 -0
- hud/{mcp_agent → mcp}/langchain.py +52 -33
- hud/{mcp_agent → mcp}/openai.py +56 -40
- hud/{mcp_agent → mcp}/tests/test_base.py +129 -54
- hud/mcp/tests/test_claude.py +294 -0
- hud/mcp/tests/test_client.py +324 -0
- hud/mcp/tests/test_openai.py +238 -0
- hud/settings.py +6 -0
- hud/task.py +2 -88
- hud/taskset.py +2 -23
- hud/telemetry/__init__.py +5 -0
- hud/telemetry/_trace.py +180 -17
- hud/telemetry/context.py +79 -0
- hud/telemetry/exporter.py +165 -6
- hud/telemetry/job.py +141 -0
- hud/telemetry/tests/test_trace.py +36 -25
- hud/tools/__init__.py +14 -1
- hud/tools/computer/hud.py +13 -0
- hud/tools/executors/__init__.py +19 -2
- hud/tools/executors/pyautogui.py +84 -50
- hud/tools/executors/tests/test_pyautogui_executor.py +4 -1
- hud/tools/playwright_tool.py +73 -67
- hud/tools/tests/test_edit.py +8 -1
- hud/tools/tests/test_tools.py +3 -0
- hud/trajectory.py +5 -1
- hud/utils/tests/test_version.py +1 -1
- hud/version.py +1 -1
- {hud_python-0.3.0.dist-info → hud_python-0.3.2.dist-info}/METADATA +20 -14
- {hud_python-0.3.0.dist-info → hud_python-0.3.2.dist-info}/RECORD +42 -47
- hud/evaluators/__init__.py +0 -9
- hud/evaluators/base.py +0 -32
- hud/evaluators/inspect.py +0 -24
- hud/evaluators/judge.py +0 -189
- hud/evaluators/match.py +0 -156
- hud/evaluators/remote.py +0 -65
- hud/evaluators/tests/__init__.py +0 -0
- hud/evaluators/tests/test_inspect.py +0 -12
- hud/evaluators/tests/test_judge.py +0 -231
- hud/evaluators/tests/test_match.py +0 -115
- hud/evaluators/tests/test_remote.py +0 -98
- hud/mcp_agent/base.py +0 -723
- /hud/{mcp_agent → mcp}/tests/__init__.py +0 -0
- {hud_python-0.3.0.dist-info → hud_python-0.3.2.dist-info}/WHEEL +0 -0
- {hud_python-0.3.0.dist-info → hud_python-0.3.2.dist-info}/licenses/LICENSE +0 -0
hud/__init__.py
CHANGED
|
@@ -4,13 +4,13 @@ HUD SDK for interacting with the HUD evaluation platform.
|
|
|
4
4
|
|
|
5
5
|
from __future__ import annotations
|
|
6
6
|
|
|
7
|
-
from . import agent, env, gym, settings, task, taskset, types, utils
|
|
7
|
+
from . import agent, datasets, env, gym, settings, task, taskset, types, utils
|
|
8
8
|
from .adapters import ResponseAction as Response
|
|
9
|
+
from .datasets import run_dataset, to_taskconfigs
|
|
9
10
|
from .job import create_job, load_job, run_job
|
|
10
|
-
from .job import job as register_job
|
|
11
11
|
from .task import Task
|
|
12
12
|
from .taskset import load_taskset
|
|
13
|
-
from .telemetry import flush, trace, trace_open
|
|
13
|
+
from .telemetry import flush, job, trace, trace_open # New context-based job
|
|
14
14
|
from .version import __version__
|
|
15
15
|
|
|
16
16
|
|
|
@@ -42,17 +42,20 @@ __all__ = [
|
|
|
42
42
|
"__version__",
|
|
43
43
|
"agent",
|
|
44
44
|
"create_job",
|
|
45
|
+
"datasets",
|
|
45
46
|
"env",
|
|
46
47
|
"flush",
|
|
47
48
|
"gym",
|
|
48
49
|
"init_telemetry",
|
|
50
|
+
"job",
|
|
49
51
|
"load_job",
|
|
50
52
|
"load_taskset",
|
|
51
|
-
"
|
|
53
|
+
"run_dataset",
|
|
52
54
|
"run_job",
|
|
53
55
|
"settings",
|
|
54
56
|
"task",
|
|
55
57
|
"taskset",
|
|
58
|
+
"to_taskconfigs",
|
|
56
59
|
"trace",
|
|
57
60
|
"trace_open",
|
|
58
61
|
"types",
|
hud/adapters/common/adapter.py
CHANGED
|
@@ -2,16 +2,18 @@ from __future__ import annotations
|
|
|
2
2
|
|
|
3
3
|
from typing import TYPE_CHECKING, Any, TypeAlias
|
|
4
4
|
|
|
5
|
-
import numpy as np
|
|
6
|
-
from PIL import Image
|
|
7
5
|
from pydantic import TypeAdapter, ValidationError
|
|
8
6
|
|
|
9
7
|
from .types import CLA
|
|
10
8
|
|
|
11
9
|
if TYPE_CHECKING:
|
|
10
|
+
import numpy as np
|
|
11
|
+
from PIL import Image
|
|
12
12
|
from typing_extensions import TypeIs
|
|
13
13
|
|
|
14
|
-
ImageType: TypeAlias = np.ndarray[Any, Any] | Image.Image | str | None
|
|
14
|
+
ImageType: TypeAlias = np.ndarray[Any, Any] | Image.Image | str | None
|
|
15
|
+
else:
|
|
16
|
+
ImageType: TypeAlias = Any | str | None
|
|
15
17
|
|
|
16
18
|
|
|
17
19
|
def _is_numpy_array(observation: Any) -> TypeIs[np.ndarray]:
|
|
@@ -69,6 +71,15 @@ class Adapter:
|
|
|
69
71
|
if observation is None:
|
|
70
72
|
return None
|
|
71
73
|
|
|
74
|
+
# Import PIL only when needed
|
|
75
|
+
try:
|
|
76
|
+
from PIL import Image
|
|
77
|
+
except ImportError:
|
|
78
|
+
raise ImportError(
|
|
79
|
+
"PIL (Pillow) is required for image processing. "
|
|
80
|
+
"Please install it with 'pip install Pillow'"
|
|
81
|
+
) from None
|
|
82
|
+
|
|
72
83
|
# Handle different input types.
|
|
73
84
|
if _is_numpy_array(observation):
|
|
74
85
|
# Convert numpy array to PIL Image
|
|
@@ -4,10 +4,17 @@ import base64
|
|
|
4
4
|
import io
|
|
5
5
|
from unittest.mock import MagicMock, patch
|
|
6
6
|
|
|
7
|
-
import numpy as np
|
|
8
7
|
import pytest
|
|
9
8
|
from PIL import Image
|
|
10
9
|
|
|
10
|
+
try:
|
|
11
|
+
import numpy as np
|
|
12
|
+
|
|
13
|
+
HAS_NUMPY = True
|
|
14
|
+
except ImportError:
|
|
15
|
+
HAS_NUMPY = False
|
|
16
|
+
np = None
|
|
17
|
+
|
|
11
18
|
from hud.adapters.common import Adapter
|
|
12
19
|
from hud.adapters.common.types import ClickAction, Point, TypeAction
|
|
13
20
|
|
|
@@ -25,15 +32,19 @@ def test_image():
|
|
|
25
32
|
img_bytes = io.BytesIO()
|
|
26
33
|
img.save(img_bytes, format="PNG")
|
|
27
34
|
img_base64 = base64.b64encode(img_bytes.getvalue()).decode("utf-8")
|
|
28
|
-
img_array = np.array(img)
|
|
29
35
|
|
|
30
|
-
|
|
36
|
+
result = {
|
|
31
37
|
"pil": img,
|
|
32
38
|
"bytes": img_bytes.getvalue(),
|
|
33
39
|
"base64": img_base64,
|
|
34
|
-
"array": img_array,
|
|
35
40
|
}
|
|
36
41
|
|
|
42
|
+
if HAS_NUMPY:
|
|
43
|
+
img_array = np.array(img) # type: ignore
|
|
44
|
+
result["array"] = img_array
|
|
45
|
+
|
|
46
|
+
return result
|
|
47
|
+
|
|
37
48
|
|
|
38
49
|
def test_init(adapter):
|
|
39
50
|
"""Test adapter initialization."""
|
|
@@ -99,6 +110,7 @@ def test_rescale_pil_image(adapter, test_image):
|
|
|
99
110
|
assert img.size == (adapter.agent_width, adapter.agent_height)
|
|
100
111
|
|
|
101
112
|
|
|
113
|
+
@pytest.mark.skipif(not HAS_NUMPY, reason="numpy not available")
|
|
102
114
|
def test_rescale_numpy_array(adapter, test_image):
|
|
103
115
|
"""Test rescaling numpy array."""
|
|
104
116
|
result = adapter.rescale(test_image["array"])
|
hud/datasets.py
ADDED
|
@@ -0,0 +1,188 @@
|
|
|
1
|
+
"""Dataset utilities for working with HuggingFace datasets and TaskConfigs."""
|
|
2
|
+
|
|
3
|
+
from __future__ import annotations
|
|
4
|
+
|
|
5
|
+
import asyncio
|
|
6
|
+
import logging
|
|
7
|
+
from string import Template
|
|
8
|
+
from typing import TYPE_CHECKING, Any
|
|
9
|
+
|
|
10
|
+
from mcp.types import CallToolRequestParams as MCPToolParams
|
|
11
|
+
from pydantic import BaseModel, Field, field_validator
|
|
12
|
+
|
|
13
|
+
from hud.telemetry.job import job
|
|
14
|
+
|
|
15
|
+
if TYPE_CHECKING:
|
|
16
|
+
from datasets import Dataset
|
|
17
|
+
|
|
18
|
+
from hud.mcp.base import AgentResult, BaseMCPAgent
|
|
19
|
+
|
|
20
|
+
logger = logging.getLogger("hud.datasets")
|
|
21
|
+
|
|
22
|
+
|
|
23
|
+
class TaskConfig(BaseModel):
|
|
24
|
+
"""
|
|
25
|
+
A task configuration that can be used to create a task.
|
|
26
|
+
|
|
27
|
+
The mcp_config field supports environment variable substitution using
|
|
28
|
+
template placeholders in the format ${VAR_NAME} or ${VAR_NAME:default_value}.
|
|
29
|
+
|
|
30
|
+
Example:
|
|
31
|
+
mcp_config: {
|
|
32
|
+
"hud": {
|
|
33
|
+
"url": "${HUD_MCP_URL:https://mcp.hud.so/v3/mcp}",
|
|
34
|
+
"headers": {
|
|
35
|
+
"Authorization": "Bearer ${HUD_API_KEY}",
|
|
36
|
+
"Run-Id": "${RUN_ID}"
|
|
37
|
+
}
|
|
38
|
+
}
|
|
39
|
+
}
|
|
40
|
+
"""
|
|
41
|
+
|
|
42
|
+
id: str | None = None
|
|
43
|
+
prompt: str
|
|
44
|
+
mcp_config: dict[str, Any]
|
|
45
|
+
setup_tool: MCPToolParams | None = None
|
|
46
|
+
evaluate_tool: MCPToolParams | None = None
|
|
47
|
+
metadata: dict[str, Any] = Field(default_factory=dict)
|
|
48
|
+
|
|
49
|
+
@field_validator("mcp_config", mode="before")
|
|
50
|
+
@classmethod
|
|
51
|
+
def resolve_env_vars(cls, v: dict[str, Any]) -> dict[str, Any]:
|
|
52
|
+
"""
|
|
53
|
+
Automatically resolve environment variables in mcp_config using Template.
|
|
54
|
+
|
|
55
|
+
Supports ${VAR_NAME} syntax with variable substitution from:
|
|
56
|
+
1. System environment variables (including HUD_API_KEY, etc.)
|
|
57
|
+
2. Runtime context variables (e.g., RUN_ID from telemetry context)
|
|
58
|
+
|
|
59
|
+
Missing variables resolve to empty strings.
|
|
60
|
+
"""
|
|
61
|
+
import os
|
|
62
|
+
|
|
63
|
+
from hud.telemetry.context import get_current_task_run_id
|
|
64
|
+
|
|
65
|
+
# Start with current environment variables
|
|
66
|
+
mapping = dict(os.environ)
|
|
67
|
+
|
|
68
|
+
# Add runtime context variables if available
|
|
69
|
+
run_id = get_current_task_run_id()
|
|
70
|
+
if run_id:
|
|
71
|
+
mapping["RUN_ID"] = run_id
|
|
72
|
+
|
|
73
|
+
def substitute_in_value(obj: Any) -> Any:
|
|
74
|
+
"""Recursively substitute variables in nested structures."""
|
|
75
|
+
if isinstance(obj, str):
|
|
76
|
+
# Use Template's safe_substitute - missing vars become empty strings
|
|
77
|
+
return Template(obj).safe_substitute(mapping)
|
|
78
|
+
elif isinstance(obj, dict):
|
|
79
|
+
return {k: substitute_in_value(v) for k, v in obj.items()}
|
|
80
|
+
elif isinstance(obj, list):
|
|
81
|
+
return [substitute_in_value(item) for item in obj]
|
|
82
|
+
else:
|
|
83
|
+
return obj
|
|
84
|
+
|
|
85
|
+
return substitute_in_value(v)
|
|
86
|
+
|
|
87
|
+
|
|
88
|
+
def to_taskconfigs(dataset: Dataset) -> Dataset:
|
|
89
|
+
"""
|
|
90
|
+
Convert a HuggingFace dataset to contain TaskConfig objects.
|
|
91
|
+
|
|
92
|
+
Args:
|
|
93
|
+
dataset: HuggingFace dataset with task data
|
|
94
|
+
|
|
95
|
+
Returns:
|
|
96
|
+
Dataset with 'task' column containing TaskConfig objects
|
|
97
|
+
|
|
98
|
+
Example:
|
|
99
|
+
>>> dataset = load_dataset("hud/sheetbench-v1", split="test")
|
|
100
|
+
>>> tasks = to_taskconfigs(dataset)
|
|
101
|
+
>>> tasks[0]["task"] # This is a TaskConfig object
|
|
102
|
+
"""
|
|
103
|
+
|
|
104
|
+
def _convert(example: dict[str, Any]) -> dict[str, TaskConfig]:
|
|
105
|
+
return {"task": TaskConfig(**example)}
|
|
106
|
+
|
|
107
|
+
# Map and keep only the task column
|
|
108
|
+
return dataset.map(_convert, remove_columns=dataset.column_names)
|
|
109
|
+
|
|
110
|
+
|
|
111
|
+
async def run_dataset(
|
|
112
|
+
name: str,
|
|
113
|
+
dataset: Dataset,
|
|
114
|
+
agent_class: type[BaseMCPAgent],
|
|
115
|
+
agent_config: dict[str, Any] | None = None,
|
|
116
|
+
max_concurrent: int = 5,
|
|
117
|
+
metadata: dict[str, Any] | None = None,
|
|
118
|
+
) -> list[Any]:
|
|
119
|
+
"""
|
|
120
|
+
Run all tasks in a dataset with automatic job tracking.
|
|
121
|
+
|
|
122
|
+
Args:
|
|
123
|
+
name: Name for the job
|
|
124
|
+
dataset: HuggingFace Dataset (raw, not converted)
|
|
125
|
+
agent_class: Agent class to instantiate (e.g., ClaudeMCPAgent)
|
|
126
|
+
agent_config: Configuration for agent (model, etc.)
|
|
127
|
+
max_concurrent: Maximum parallel task execution
|
|
128
|
+
metadata: Optional metadata for the job
|
|
129
|
+
|
|
130
|
+
Returns:
|
|
131
|
+
List of results from agent.run() in dataset order
|
|
132
|
+
|
|
133
|
+
Example:
|
|
134
|
+
>>> from datasets import load_dataset
|
|
135
|
+
>>> from hud.mcp import ClaudeMCPAgent
|
|
136
|
+
>>> dataset = load_dataset("hud/sheetbench-v1", split="test")
|
|
137
|
+
>>> results = await run_dataset(
|
|
138
|
+
... "sheetbench_eval",
|
|
139
|
+
... dataset,
|
|
140
|
+
... ClaudeMCPAgent,
|
|
141
|
+
... {"model": "claude-3-5-sonnet-20241022"},
|
|
142
|
+
... max_concurrent=3,
|
|
143
|
+
... )
|
|
144
|
+
"""
|
|
145
|
+
# Import here to avoid circular imports
|
|
146
|
+
import hud
|
|
147
|
+
from hud.mcp.client import MCPClient
|
|
148
|
+
|
|
149
|
+
# Convert dataset to TaskConfigs internally
|
|
150
|
+
tasks = to_taskconfigs(dataset)
|
|
151
|
+
|
|
152
|
+
# Create job context
|
|
153
|
+
job_metadata = metadata or {}
|
|
154
|
+
job_metadata["agent_class"] = agent_class.__name__
|
|
155
|
+
if agent_config:
|
|
156
|
+
job_metadata["agent_config"] = agent_config
|
|
157
|
+
|
|
158
|
+
with job(name, metadata=job_metadata):
|
|
159
|
+
# Run tasks with semaphore for concurrency control
|
|
160
|
+
sem = asyncio.Semaphore(max_concurrent)
|
|
161
|
+
results: list[AgentResult | None] = [None] * len(tasks)
|
|
162
|
+
|
|
163
|
+
async def _worker(index: int, row: Any) -> None:
|
|
164
|
+
async with sem:
|
|
165
|
+
task = row["task"]
|
|
166
|
+
|
|
167
|
+
# Create trace for this task
|
|
168
|
+
with hud.trace(f"task_{index}"):
|
|
169
|
+
# Create fresh MCP client per task
|
|
170
|
+
if task.mcp_config:
|
|
171
|
+
client = MCPClient(mcp_config=task.mcp_config)
|
|
172
|
+
agent = agent_class(mcp_client=client, **(agent_config or {}))
|
|
173
|
+
|
|
174
|
+
try:
|
|
175
|
+
results[index] = await agent.run(task)
|
|
176
|
+
finally:
|
|
177
|
+
await client.close()
|
|
178
|
+
else:
|
|
179
|
+
logger.warning("Task %d has no mcp_config defined", index)
|
|
180
|
+
results[index] = None
|
|
181
|
+
|
|
182
|
+
# Execute all tasks
|
|
183
|
+
await asyncio.gather(
|
|
184
|
+
*[_worker(i, row) for i, row in enumerate(tasks)],
|
|
185
|
+
return_exceptions=True, # Don't fail entire batch on one error
|
|
186
|
+
)
|
|
187
|
+
|
|
188
|
+
return results
|
hud/env/docker_client.py
CHANGED
|
@@ -8,8 +8,6 @@ import uuid
|
|
|
8
8
|
from pathlib import Path
|
|
9
9
|
from typing import TYPE_CHECKING, Any
|
|
10
10
|
|
|
11
|
-
import toml
|
|
12
|
-
|
|
13
11
|
from hud.env.client import Client
|
|
14
12
|
from hud.types import EnvironmentStatus
|
|
15
13
|
from hud.utils.common import _compile_pathspec, directory_to_tar_bytes
|
|
@@ -97,6 +95,13 @@ class DockerClient(Client):
|
|
|
97
95
|
raise FileNotFoundError(f"pyproject.toml not found in {source_path}")
|
|
98
96
|
|
|
99
97
|
# validate package name
|
|
98
|
+
try:
|
|
99
|
+
import toml
|
|
100
|
+
except ImportError:
|
|
101
|
+
raise ImportError(
|
|
102
|
+
"toml is required for parsing pyproject.toml files. "
|
|
103
|
+
"Please install it with 'pip install toml'"
|
|
104
|
+
) from None
|
|
100
105
|
pyproject_data = toml.load(pyproject_path)
|
|
101
106
|
package_name = pyproject_data.get("project", {}).get("name")
|
|
102
107
|
if not package_name:
|
|
@@ -241,6 +246,13 @@ class DockerClient(Client):
|
|
|
241
246
|
or self._last_pyproject_toml_str != current_pyproject_content
|
|
242
247
|
):
|
|
243
248
|
# Update package name if pyproject.toml changed
|
|
249
|
+
try:
|
|
250
|
+
import toml
|
|
251
|
+
except ImportError:
|
|
252
|
+
raise ImportError(
|
|
253
|
+
"toml is required for parsing pyproject.toml files. "
|
|
254
|
+
"Please install it with 'pip install toml'"
|
|
255
|
+
) from None
|
|
244
256
|
pyproject_data = toml.loads(current_pyproject_content)
|
|
245
257
|
self._package_name = pyproject_data.get("project", {}).get("name")
|
|
246
258
|
if not self._package_name:
|
hud/env/local_docker_client.py
CHANGED
|
@@ -9,8 +9,15 @@ import time
|
|
|
9
9
|
import uuid
|
|
10
10
|
from typing import TYPE_CHECKING, Any
|
|
11
11
|
|
|
12
|
-
|
|
13
|
-
|
|
12
|
+
try:
|
|
13
|
+
import aiodocker
|
|
14
|
+
from aiohttp import ClientTimeout
|
|
15
|
+
|
|
16
|
+
AIODOCKER_AVAILABLE = True
|
|
17
|
+
except ImportError:
|
|
18
|
+
AIODOCKER_AVAILABLE = False
|
|
19
|
+
aiodocker = None # type: ignore
|
|
20
|
+
ClientTimeout = None # type: ignore
|
|
14
21
|
|
|
15
22
|
from hud.env.docker_client import DockerClient, EnvironmentStatus
|
|
16
23
|
from hud.utils import ExecuteResult
|
|
@@ -40,7 +47,12 @@ class LocalDockerClient(DockerClient):
|
|
|
40
47
|
image_tag = f"hud-env-{uuid.uuid4().hex[:8]}"
|
|
41
48
|
|
|
42
49
|
# Initialize Docker client
|
|
43
|
-
|
|
50
|
+
if not AIODOCKER_AVAILABLE:
|
|
51
|
+
raise ImportError(
|
|
52
|
+
"aiodocker is required for LocalDockerClient. "
|
|
53
|
+
"Please install it with 'pip install aiodocker'"
|
|
54
|
+
)
|
|
55
|
+
docker_client = aiodocker.Docker() # type: ignore
|
|
44
56
|
|
|
45
57
|
# Create a tar file from the path
|
|
46
58
|
tar_bytes = directory_to_tar_bytes(build_context)
|
|
@@ -82,7 +94,12 @@ class LocalDockerClient(DockerClient):
|
|
|
82
94
|
"""
|
|
83
95
|
|
|
84
96
|
# Initialize Docker client
|
|
85
|
-
|
|
97
|
+
if not AIODOCKER_AVAILABLE:
|
|
98
|
+
raise ImportError(
|
|
99
|
+
"aiodocker is required for LocalDockerClient. "
|
|
100
|
+
"Please install it with 'pip install aiodocker'"
|
|
101
|
+
)
|
|
102
|
+
docker_client = aiodocker.Docker() # type: ignore
|
|
86
103
|
|
|
87
104
|
# Default host config
|
|
88
105
|
if host_config is None:
|
|
@@ -156,7 +173,7 @@ class LocalDockerClient(DockerClient):
|
|
|
156
173
|
client._log_task = log_task # type: ignore[attr-defined]
|
|
157
174
|
return client
|
|
158
175
|
|
|
159
|
-
def __init__(self, docker_conn: aiodocker.Docker, container_id: str) -> None:
|
|
176
|
+
def __init__(self, docker_conn: aiodocker.Docker, container_id: str) -> None: # type: ignore
|
|
160
177
|
"""
|
|
161
178
|
Initialize the DockerClient.
|
|
162
179
|
|
|
@@ -164,6 +181,11 @@ class LocalDockerClient(DockerClient):
|
|
|
164
181
|
docker_conn: Docker client connection
|
|
165
182
|
container_id: ID of the Docker container to control
|
|
166
183
|
"""
|
|
184
|
+
if not AIODOCKER_AVAILABLE:
|
|
185
|
+
raise ImportError(
|
|
186
|
+
"aiodocker is required for LocalDockerClient. "
|
|
187
|
+
"Please install it with 'pip install aiodocker'"
|
|
188
|
+
)
|
|
167
189
|
super().__init__()
|
|
168
190
|
|
|
169
191
|
# Store container ID instead of container object
|
|
@@ -239,7 +261,7 @@ class LocalDockerClient(DockerClient):
|
|
|
239
261
|
exec_result = await container.exec(
|
|
240
262
|
cmd=command,
|
|
241
263
|
)
|
|
242
|
-
output: Stream = exec_result.start(timeout=ClientTimeout(timeout), detach=False)
|
|
264
|
+
output: Stream = exec_result.start(timeout=ClientTimeout(timeout), detach=False) # type: ignore
|
|
243
265
|
|
|
244
266
|
stdout_data = bytearray()
|
|
245
267
|
stderr_data = bytearray()
|
hud/gym.py
CHANGED
|
@@ -50,15 +50,6 @@ async def make(
|
|
|
50
50
|
effective_job_id = job.id
|
|
51
51
|
elif job_id is not None:
|
|
52
52
|
effective_job_id = job_id
|
|
53
|
-
else:
|
|
54
|
-
try:
|
|
55
|
-
import hud.job
|
|
56
|
-
|
|
57
|
-
active_job = hud.job.get_active_job()
|
|
58
|
-
if active_job:
|
|
59
|
-
effective_job_id = active_job.id
|
|
60
|
-
except ImportError:
|
|
61
|
-
pass
|
|
62
53
|
|
|
63
54
|
build_data = {}
|
|
64
55
|
try:
|
|
@@ -4,6 +4,7 @@ from __future__ import annotations
|
|
|
4
4
|
|
|
5
5
|
from .base import BaseMCPAgent
|
|
6
6
|
from .claude import ClaudeMCPAgent
|
|
7
|
+
from .client import MCPClient
|
|
7
8
|
from .langchain import LangChainMCPAgent
|
|
8
9
|
from .openai import OpenAIMCPAgent
|
|
9
10
|
|
|
@@ -11,5 +12,6 @@ __all__ = [
|
|
|
11
12
|
"BaseMCPAgent",
|
|
12
13
|
"ClaudeMCPAgent",
|
|
13
14
|
"LangChainMCPAgent",
|
|
15
|
+
"MCPClient",
|
|
14
16
|
"OpenAIMCPAgent",
|
|
15
17
|
]
|