hud-python 0.2.4__py3-none-any.whl → 0.2.6__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of hud-python might be problematic. Click here for more details.
- hud/__init__.py +22 -2
- hud/adapters/claude/adapter.py +9 -2
- hud/adapters/claude/tests/__init__.py +1 -0
- hud/adapters/claude/tests/test_adapter.py +519 -0
- hud/adapters/common/types.py +5 -1
- hud/adapters/operator/adapter.py +4 -0
- hud/adapters/operator/tests/__init__.py +1 -0
- hud/adapters/operator/tests/test_adapter.py +370 -0
- hud/agent/__init__.py +4 -0
- hud/agent/base.py +18 -2
- hud/agent/claude.py +20 -17
- hud/agent/claude_plays_pokemon.py +283 -0
- hud/agent/langchain.py +12 -7
- hud/agent/misc/__init__.py +3 -0
- hud/agent/misc/response_agent.py +80 -0
- hud/agent/operator.py +27 -19
- hud/agent/tests/__init__.py +1 -0
- hud/agent/tests/test_base.py +202 -0
- hud/env/docker_client.py +28 -18
- hud/env/environment.py +32 -16
- hud/env/local_docker_client.py +83 -42
- hud/env/remote_client.py +1 -3
- hud/env/remote_docker_client.py +71 -14
- hud/exceptions.py +12 -0
- hud/gym.py +71 -53
- hud/job.py +59 -14
- hud/server/requests.py +26 -4
- hud/settings.py +7 -1
- hud/task.py +45 -33
- hud/taskset.py +56 -4
- hud/telemetry/__init__.py +21 -0
- hud/telemetry/_trace.py +173 -0
- hud/telemetry/context.py +169 -0
- hud/telemetry/exporter.py +417 -0
- hud/telemetry/instrumentation/__init__.py +3 -0
- hud/telemetry/instrumentation/mcp.py +495 -0
- hud/telemetry/instrumentation/registry.py +59 -0
- hud/telemetry/mcp_models.py +331 -0
- hud/telemetry/tests/__init__.py +1 -0
- hud/telemetry/tests/test_context.py +207 -0
- hud/telemetry/tests/test_trace.py +270 -0
- hud/types.py +11 -27
- hud/utils/common.py +22 -2
- hud/utils/misc.py +53 -0
- hud/utils/tests/test_version.py +1 -1
- hud/version.py +7 -0
- {hud_python-0.2.4.dist-info → hud_python-0.2.6.dist-info}/METADATA +98 -30
- hud_python-0.2.6.dist-info/RECORD +84 -0
- hud_python-0.2.4.dist-info/RECORD +0 -62
- {hud_python-0.2.4.dist-info → hud_python-0.2.6.dist-info}/WHEEL +0 -0
- {hud_python-0.2.4.dist-info → hud_python-0.2.6.dist-info}/licenses/LICENSE +0 -0
hud/env/remote_docker_client.py
CHANGED
|
@@ -2,7 +2,9 @@ from __future__ import annotations
|
|
|
2
2
|
|
|
3
3
|
import logging
|
|
4
4
|
from base64 import b64decode, b64encode
|
|
5
|
-
from typing import Any
|
|
5
|
+
from typing import TYPE_CHECKING, Any
|
|
6
|
+
|
|
7
|
+
import httpx
|
|
6
8
|
|
|
7
9
|
from hud.env.docker_client import DockerClient
|
|
8
10
|
from hud.exceptions import HudResponseError
|
|
@@ -10,11 +12,27 @@ from hud.server import make_request
|
|
|
10
12
|
from hud.settings import settings
|
|
11
13
|
from hud.types import EnvironmentStatus
|
|
12
14
|
from hud.utils import ExecuteResult
|
|
13
|
-
from hud.utils.common import get_gym_id
|
|
15
|
+
from hud.utils.common import directory_to_zip_bytes, get_gym_id
|
|
16
|
+
|
|
17
|
+
if TYPE_CHECKING:
|
|
18
|
+
from pathlib import Path
|
|
14
19
|
|
|
15
20
|
logger = logging.getLogger("hud.env.remote_env_client")
|
|
16
21
|
|
|
17
22
|
|
|
23
|
+
async def upload_bytes_to_presigned_url(presigned_url: str, data_bytes: bytes) -> None:
|
|
24
|
+
try:
|
|
25
|
+
async with httpx.AsyncClient() as client:
|
|
26
|
+
response = await client.put(presigned_url, content=data_bytes)
|
|
27
|
+
response.raise_for_status()
|
|
28
|
+
except httpx.HTTPStatusError as e:
|
|
29
|
+
logger.exception("Failed to upload to presigned URL")
|
|
30
|
+
raise HudResponseError(message=f"Failed to upload to presigned URL: {e}") from e
|
|
31
|
+
except httpx.RequestError as e:
|
|
32
|
+
logger.exception("Network error uploading to presigned URL")
|
|
33
|
+
raise HudResponseError(message=f"Network error uploading to presigned URL: {e}") from e
|
|
34
|
+
|
|
35
|
+
|
|
18
36
|
class RemoteDockerClient(DockerClient):
|
|
19
37
|
"""
|
|
20
38
|
Remote environment client implementation.
|
|
@@ -22,21 +40,64 @@ class RemoteDockerClient(DockerClient):
|
|
|
22
40
|
Uses the HUD API to manage a remote environment.
|
|
23
41
|
"""
|
|
24
42
|
|
|
43
|
+
@classmethod
|
|
44
|
+
async def build_image(cls, build_context: Path) -> tuple[str, dict[str, Any]]:
|
|
45
|
+
"""
|
|
46
|
+
Build an image from a build context.
|
|
47
|
+
"""
|
|
48
|
+
# create the presigned url by making a POST request to /v2/builds
|
|
49
|
+
logger.info("Creating build")
|
|
50
|
+
response = await make_request(
|
|
51
|
+
method="POST",
|
|
52
|
+
url=f"{settings.base_url}/v2/builds",
|
|
53
|
+
api_key=settings.api_key,
|
|
54
|
+
)
|
|
55
|
+
logger.info("Build created")
|
|
56
|
+
presigned_url = response["presigned_url"]
|
|
57
|
+
|
|
58
|
+
# List files in the build context
|
|
59
|
+
files = list(build_context.glob("**/*"))
|
|
60
|
+
logger.info("Found %d files in build context %s", len(files), build_context)
|
|
61
|
+
|
|
62
|
+
if len(files) == 0:
|
|
63
|
+
raise HudResponseError(message="Build context is empty")
|
|
64
|
+
|
|
65
|
+
# zip the build context
|
|
66
|
+
logger.info("Zipping build context")
|
|
67
|
+
zip_bytes = directory_to_zip_bytes(build_context)
|
|
68
|
+
logger.info("Created zip archive of size %d kb", len(zip_bytes) // 1024)
|
|
69
|
+
# upload the zip bytes to the presigned url
|
|
70
|
+
logger.info("Uploading build context")
|
|
71
|
+
await upload_bytes_to_presigned_url(presigned_url, zip_bytes)
|
|
72
|
+
logger.info("Build context uploaded")
|
|
73
|
+
|
|
74
|
+
# start the build and return uri and logs
|
|
75
|
+
logger.info("Starting build")
|
|
76
|
+
response = await make_request(
|
|
77
|
+
method="POST",
|
|
78
|
+
url=f"{settings.base_url}/v2/builds/{response['id']}/start",
|
|
79
|
+
api_key=settings.api_key,
|
|
80
|
+
)
|
|
81
|
+
logger.info("Build completed")
|
|
82
|
+
|
|
83
|
+
return response["uri"], {"logs": response["logs"]}
|
|
84
|
+
|
|
25
85
|
@classmethod
|
|
26
86
|
async def create(
|
|
27
87
|
cls,
|
|
28
|
-
|
|
88
|
+
image_uri: str,
|
|
29
89
|
*,
|
|
30
90
|
job_id: str | None = None,
|
|
31
91
|
task_id: str | None = None,
|
|
32
92
|
metadata: dict[str, Any] | None = None,
|
|
33
|
-
) ->
|
|
93
|
+
) -> RemoteDockerClient:
|
|
34
94
|
"""
|
|
35
|
-
Creates a remote environment client from
|
|
95
|
+
Creates a remote environment client from an image.
|
|
36
96
|
|
|
37
97
|
Args:
|
|
38
|
-
|
|
39
|
-
|
|
98
|
+
image_uri: The image uri to create the environment from
|
|
99
|
+
job_id: The job_id of the environment to create
|
|
100
|
+
task_id: The task_id of the environment to create
|
|
40
101
|
metadata: Metadata to associate with the environment
|
|
41
102
|
|
|
42
103
|
Returns:
|
|
@@ -52,13 +113,14 @@ class RemoteDockerClient(DockerClient):
|
|
|
52
113
|
|
|
53
114
|
logger.info("Creating remote environment")
|
|
54
115
|
|
|
116
|
+
# true_gym_id = await get_gym_id("local-docker")
|
|
55
117
|
true_gym_id = await get_gym_id("docker")
|
|
56
118
|
|
|
57
119
|
# augment metadata with dockerfile
|
|
58
120
|
if "environment_config" not in metadata:
|
|
59
121
|
metadata["environment_config"] = {}
|
|
60
122
|
|
|
61
|
-
metadata["environment_config"]["
|
|
123
|
+
metadata["environment_config"]["image_uri"] = image_uri
|
|
62
124
|
|
|
63
125
|
# Create a new environment via the HUD API
|
|
64
126
|
response = await make_request(
|
|
@@ -85,12 +147,7 @@ class RemoteDockerClient(DockerClient):
|
|
|
85
147
|
response_json=response,
|
|
86
148
|
)
|
|
87
149
|
|
|
88
|
-
|
|
89
|
-
controller = cls(env_id)
|
|
90
|
-
|
|
91
|
-
build_metadata = response.get("metadata", {})
|
|
92
|
-
|
|
93
|
-
return controller, build_metadata
|
|
150
|
+
return cls(env_id)
|
|
94
151
|
|
|
95
152
|
def __init__(self, env_id: str) -> None:
|
|
96
153
|
"""
|
hud/exceptions.py
CHANGED
|
@@ -165,3 +165,15 @@ class HudNetworkError(HudException):
|
|
|
165
165
|
This exception is raised when there are issues with the network
|
|
166
166
|
connection, DNS resolution, or other network-related problems.
|
|
167
167
|
"""
|
|
168
|
+
|
|
169
|
+
|
|
170
|
+
class GymMakeException(HudException):
|
|
171
|
+
"""Raised when environment creation or setup fails, includes context data."""
|
|
172
|
+
|
|
173
|
+
def __init__(self, message: str, data: dict[str, Any]) -> None:
|
|
174
|
+
super().__init__(message)
|
|
175
|
+
self.data = data
|
|
176
|
+
|
|
177
|
+
def __str__(self) -> str:
|
|
178
|
+
base = super().__str__()
|
|
179
|
+
return f"{base} | Data: {self.data}"
|
hud/gym.py
CHANGED
|
@@ -8,6 +8,8 @@ from hud.env.environment import Environment
|
|
|
8
8
|
from hud.env.local_docker_client import LocalDockerClient
|
|
9
9
|
from hud.env.remote_client import RemoteClient
|
|
10
10
|
from hud.env.remote_docker_client import RemoteDockerClient
|
|
11
|
+
from hud.exceptions import GymMakeException
|
|
12
|
+
from hud.telemetry.context import get_current_task_run_id
|
|
11
13
|
from hud.types import CustomGym, Gym
|
|
12
14
|
from hud.utils.common import get_gym_id
|
|
13
15
|
|
|
@@ -34,17 +36,19 @@ async def make(
|
|
|
34
36
|
job_id: ID of job to associate with this environment (deprecated, use job instead)
|
|
35
37
|
metadata: Additional metadata for the environment
|
|
36
38
|
"""
|
|
37
|
-
|
|
38
|
-
|
|
39
|
+
task = None
|
|
40
|
+
if isinstance(env_src, str | CustomGym):
|
|
41
|
+
gym = env_src
|
|
42
|
+
else:
|
|
43
|
+
gym = env_src.gym
|
|
44
|
+
task = env_src
|
|
39
45
|
|
|
40
|
-
# Handle job parameter
|
|
41
46
|
effective_job_id = None
|
|
42
47
|
if job is not None:
|
|
43
48
|
effective_job_id = job.id
|
|
44
49
|
elif job_id is not None:
|
|
45
50
|
effective_job_id = job_id
|
|
46
51
|
else:
|
|
47
|
-
# Try to get an active job from the decorator context
|
|
48
52
|
try:
|
|
49
53
|
import hud.job
|
|
50
54
|
|
|
@@ -52,59 +56,73 @@ async def make(
|
|
|
52
56
|
if active_job:
|
|
53
57
|
effective_job_id = active_job.id
|
|
54
58
|
except ImportError:
|
|
55
|
-
pass
|
|
56
|
-
|
|
57
|
-
|
|
58
|
-
|
|
59
|
-
|
|
60
|
-
|
|
61
|
-
|
|
62
|
-
|
|
63
|
-
|
|
59
|
+
pass
|
|
60
|
+
|
|
61
|
+
build_data = {}
|
|
62
|
+
try:
|
|
63
|
+
metadata_copy = {} if metadata is None else metadata.copy()
|
|
64
|
+
|
|
65
|
+
current_task_run_id = get_current_task_run_id()
|
|
66
|
+
if current_task_run_id:
|
|
67
|
+
metadata_copy["task_run_id"] = current_task_run_id
|
|
68
|
+
logger.debug(
|
|
69
|
+
"Passing task_run_id %s from hud.telemetry context to environment metadata.",
|
|
70
|
+
current_task_run_id,
|
|
71
|
+
)
|
|
64
72
|
|
|
65
|
-
|
|
66
|
-
|
|
67
|
-
|
|
68
|
-
|
|
69
|
-
|
|
70
|
-
|
|
71
|
-
|
|
72
|
-
|
|
73
|
-
|
|
74
|
-
|
|
75
|
-
|
|
73
|
+
if isinstance(gym, CustomGym):
|
|
74
|
+
if isinstance(gym.image_or_build_context, str):
|
|
75
|
+
uri = gym.image_or_build_context
|
|
76
|
+
elif isinstance(gym.image_or_build_context, Path):
|
|
77
|
+
if gym.location == "local":
|
|
78
|
+
uri, build_data = await LocalDockerClient.build_image(
|
|
79
|
+
gym.image_or_build_context
|
|
80
|
+
)
|
|
81
|
+
elif gym.location == "remote":
|
|
82
|
+
uri, build_data = await RemoteDockerClient.build_image(
|
|
83
|
+
gym.image_or_build_context
|
|
84
|
+
)
|
|
85
|
+
else:
|
|
86
|
+
raise ValueError(f"Invalid environment location: {gym.location}")
|
|
87
|
+
else:
|
|
88
|
+
raise ValueError(f"Invalid image or build context: {gym.image_or_build_context}")
|
|
89
|
+
|
|
90
|
+
if gym.location == "local":
|
|
91
|
+
logger.info("Creating local environment")
|
|
92
|
+
client = await LocalDockerClient.create(uri)
|
|
93
|
+
elif gym.location == "remote":
|
|
94
|
+
logger.info("Creating remote environment")
|
|
95
|
+
client = await RemoteDockerClient.create(
|
|
96
|
+
image_uri=uri,
|
|
97
|
+
job_id=effective_job_id,
|
|
98
|
+
task_id=task.id if task else None,
|
|
99
|
+
metadata=metadata_copy,
|
|
100
|
+
)
|
|
101
|
+
else:
|
|
102
|
+
raise ValueError(f"Invalid environment location: {gym.location}")
|
|
103
|
+
|
|
104
|
+
if isinstance(gym.image_or_build_context, Path):
|
|
105
|
+
logger.info("Setting source path %s", gym.image_or_build_context)
|
|
106
|
+
client.set_source_path(gym.image_or_build_context)
|
|
107
|
+
elif isinstance(gym, str):
|
|
108
|
+
logger.info("Creating private environment")
|
|
109
|
+
true_gym_id = await get_gym_id(gym)
|
|
110
|
+
client, build_data = await RemoteClient.create(
|
|
111
|
+
gym_id=true_gym_id,
|
|
76
112
|
job_id=effective_job_id,
|
|
77
113
|
task_id=task.id if task else None,
|
|
78
|
-
metadata=
|
|
114
|
+
metadata=metadata_copy,
|
|
79
115
|
)
|
|
80
116
|
else:
|
|
81
|
-
raise ValueError(f"Invalid
|
|
82
|
-
|
|
83
|
-
# Set up the environment with a source path
|
|
84
|
-
if gym.controller_source_dir:
|
|
85
|
-
logger.info("Setting source path")
|
|
86
|
-
client.set_source_path(Path(gym.controller_source_dir))
|
|
87
|
-
elif isinstance(gym, str):
|
|
88
|
-
logger.info("Creating private environment")
|
|
89
|
-
# Note: the gym_name_or_id is a unique identifier, but it is not a true
|
|
90
|
-
# gym_id for the purposes of building the environment
|
|
91
|
-
# we therefore fetch the gym_id from the HUD API here
|
|
92
|
-
true_gym_id = await get_gym_id(gym)
|
|
93
|
-
|
|
94
|
-
# Create the environment
|
|
95
|
-
client, build_data = await RemoteClient.create(
|
|
96
|
-
gym_id=true_gym_id,
|
|
97
|
-
job_id=effective_job_id,
|
|
98
|
-
task_id=task.id if task else None,
|
|
99
|
-
metadata=metadata,
|
|
100
|
-
)
|
|
101
|
-
else:
|
|
102
|
-
raise ValueError(f"Invalid gym source: {gym}")
|
|
117
|
+
raise ValueError(f"Invalid gym source: {gym}")
|
|
103
118
|
|
|
104
|
-
|
|
105
|
-
|
|
106
|
-
|
|
107
|
-
if task:
|
|
108
|
-
await environment._setup()
|
|
119
|
+
environment = Environment(
|
|
120
|
+
client=client, metadata=metadata_copy, task=task, build_data=build_data
|
|
121
|
+
)
|
|
109
122
|
|
|
110
|
-
|
|
123
|
+
if task:
|
|
124
|
+
await environment._setup()
|
|
125
|
+
return environment
|
|
126
|
+
except Exception as e:
|
|
127
|
+
build_data["exception"] = str(e)
|
|
128
|
+
raise GymMakeException("Failed to create environment", build_data) from e
|
hud/job.py
CHANGED
|
@@ -1,22 +1,24 @@
|
|
|
1
1
|
from __future__ import annotations
|
|
2
2
|
|
|
3
3
|
import asyncio
|
|
4
|
-
import datetime
|
|
5
4
|
import functools
|
|
6
5
|
import inspect
|
|
7
6
|
import logging
|
|
8
7
|
import sys
|
|
9
8
|
from collections.abc import Callable, Coroutine
|
|
9
|
+
from datetime import datetime
|
|
10
10
|
from typing import TYPE_CHECKING, Any, TypeVar, cast
|
|
11
11
|
|
|
12
12
|
from pydantic import BaseModel, PrivateAttr, TypeAdapter
|
|
13
13
|
|
|
14
14
|
import hud.server
|
|
15
|
-
from hud import gym
|
|
15
|
+
from hud import Response, gym
|
|
16
|
+
from hud.agent import ResponseAgent
|
|
16
17
|
from hud.settings import settings
|
|
17
18
|
from hud.task import Task
|
|
18
19
|
from hud.taskset import TaskSet
|
|
19
20
|
from hud.trajectory import Trajectory
|
|
21
|
+
from hud.utils.common import Observation
|
|
20
22
|
from hud.utils.progress import StepProgressTracker
|
|
21
23
|
|
|
22
24
|
if TYPE_CHECKING:
|
|
@@ -42,7 +44,7 @@ class Job(BaseModel):
|
|
|
42
44
|
id: str
|
|
43
45
|
name: str
|
|
44
46
|
metadata: dict[str, Any] | None = None
|
|
45
|
-
created_at: datetime
|
|
47
|
+
created_at: datetime
|
|
46
48
|
status: str
|
|
47
49
|
|
|
48
50
|
# Internal cache for trajectories
|
|
@@ -162,13 +164,15 @@ async def create_job(
|
|
|
162
164
|
# If not, we might need to make a subsequent GET request
|
|
163
165
|
job_data = data # Adjust if the API response structure is different
|
|
164
166
|
|
|
165
|
-
|
|
167
|
+
created_at = datetime.fromisoformat(job_data["created_at"].replace("Z", "+00:00"))
|
|
168
|
+
|
|
169
|
+
logger.info("View job at https://app.hud.so/jobs/%s.", job_data["id"])
|
|
166
170
|
|
|
167
171
|
return Job(
|
|
168
172
|
id=job_data["id"],
|
|
169
173
|
name=job_data["name"],
|
|
170
174
|
metadata=job_data.get("metadata", {}), # Ensure metadata is dict
|
|
171
|
-
created_at=
|
|
175
|
+
created_at=created_at, # Parse datetime
|
|
172
176
|
status=job_data["status"],
|
|
173
177
|
)
|
|
174
178
|
|
|
@@ -259,6 +263,27 @@ def get_active_job() -> Job | None:
|
|
|
259
263
|
return None
|
|
260
264
|
|
|
261
265
|
|
|
266
|
+
async def _maybe_resample_action(
|
|
267
|
+
obs: Observation, action: Any, response_agent: ResponseAgent
|
|
268
|
+
) -> tuple[Observation, bool]:
|
|
269
|
+
if isinstance(action, Response):
|
|
270
|
+
action = action.model_dump()
|
|
271
|
+
if isinstance(action, dict) and action.get("type") == "response":
|
|
272
|
+
response_text = action.get("text", "")
|
|
273
|
+
if response_agent and response_text:
|
|
274
|
+
try:
|
|
275
|
+
decision = await response_agent.determine_response(response_text)
|
|
276
|
+
if decision == "CONTINUE":
|
|
277
|
+
logger.info("ResponseAgent indicated CONTINUE. Retrying...")
|
|
278
|
+
obs = Observation(text="Please continue.")
|
|
279
|
+
return obs, False
|
|
280
|
+
elif decision == "CONTINUE":
|
|
281
|
+
logger.warning("Max continue retries reached. Stopping despite CONTINUE.")
|
|
282
|
+
except Exception as e:
|
|
283
|
+
logger.warning("Error using ResponseAgent: %s", e)
|
|
284
|
+
return obs, True
|
|
285
|
+
|
|
286
|
+
|
|
262
287
|
async def _execute_task(
|
|
263
288
|
agent_cls: type[Agent],
|
|
264
289
|
adapter_cls: type[Adapter] | None,
|
|
@@ -270,6 +295,7 @@ async def _execute_task(
|
|
|
270
295
|
max_steps_per_task: int,
|
|
271
296
|
job: Job,
|
|
272
297
|
tracker: StepProgressTracker | None = None,
|
|
298
|
+
auto_reply_question: bool = False,
|
|
273
299
|
# Use semaphores instead of rate limiter
|
|
274
300
|
env_creation_semaphore: asyncio.Semaphore | None = None,
|
|
275
301
|
agent_predict_semaphore: asyncio.Semaphore | None = None,
|
|
@@ -283,10 +309,15 @@ async def _execute_task(
|
|
|
283
309
|
status = "error"
|
|
284
310
|
error_msg = "Initialization failed"
|
|
285
311
|
try:
|
|
312
|
+
response_agent = ResponseAgent() if auto_reply_question else None
|
|
313
|
+
|
|
286
314
|
adapter_instance = None
|
|
287
315
|
if adapter_cls:
|
|
288
316
|
adapter_instance = adapter_cls(**(adapter_kwargs or {}))
|
|
289
|
-
agent_instance = agent_cls(
|
|
317
|
+
agent_instance = agent_cls(
|
|
318
|
+
adapter=adapter_instance,
|
|
319
|
+
**(agent_kwargs or {}),
|
|
320
|
+
)
|
|
290
321
|
if agent_instance is None:
|
|
291
322
|
raise RuntimeError("Agent could not be instantiated")
|
|
292
323
|
|
|
@@ -303,6 +334,7 @@ async def _execute_task(
|
|
|
303
334
|
obs, _ = obs_tuple
|
|
304
335
|
|
|
305
336
|
step_error = None
|
|
337
|
+
|
|
306
338
|
for step in range(max_steps_per_task):
|
|
307
339
|
action, done = (None, False)
|
|
308
340
|
try:
|
|
@@ -319,6 +351,11 @@ async def _execute_task(
|
|
|
319
351
|
if action is None and not done:
|
|
320
352
|
done = True
|
|
321
353
|
|
|
354
|
+
if done and response_agent:
|
|
355
|
+
obs, finish = await _maybe_resample_action(obs, action[-1], response_agent)
|
|
356
|
+
if not finish:
|
|
357
|
+
continue
|
|
358
|
+
|
|
322
359
|
step_result = await env.step(action)
|
|
323
360
|
if step_result is None:
|
|
324
361
|
terminated = True
|
|
@@ -344,10 +381,10 @@ async def _execute_task(
|
|
|
344
381
|
"type": "step_error",
|
|
345
382
|
"step": step + 1,
|
|
346
383
|
"error": str(agent_step_err),
|
|
347
|
-
"timestamp": datetime.
|
|
384
|
+
"timestamp": datetime.now().isoformat(),
|
|
348
385
|
}
|
|
349
386
|
)
|
|
350
|
-
|
|
387
|
+
continue
|
|
351
388
|
else:
|
|
352
389
|
logger.warning("[Job: %s/%s, Task: %s] Max steps reached.", job.name, job.id, task_id)
|
|
353
390
|
|
|
@@ -361,6 +398,7 @@ async def _execute_task(
|
|
|
361
398
|
evaluation_result = await env.evaluate()
|
|
362
399
|
status = "completed"
|
|
363
400
|
error_msg = None
|
|
401
|
+
# logger.info("Evaluation result: %s", evaluation_result)
|
|
364
402
|
except Exception as eval_err:
|
|
365
403
|
logger.exception(
|
|
366
404
|
"[Job: %s/%s, Task: %s] Evaluation Error: %s",
|
|
@@ -377,7 +415,7 @@ async def _execute_task(
|
|
|
377
415
|
"task_id": task_id,
|
|
378
416
|
"type": "evaluation_error",
|
|
379
417
|
"error": str(eval_err),
|
|
380
|
-
"timestamp": datetime.
|
|
418
|
+
"timestamp": datetime.now().isoformat(),
|
|
381
419
|
}
|
|
382
420
|
)
|
|
383
421
|
|
|
@@ -391,7 +429,7 @@ async def _execute_task(
|
|
|
391
429
|
"task_id": task_id,
|
|
392
430
|
"type": "setup_error",
|
|
393
431
|
"error": str(e),
|
|
394
|
-
"timestamp": datetime.
|
|
432
|
+
"timestamp": datetime.now().isoformat(),
|
|
395
433
|
}
|
|
396
434
|
)
|
|
397
435
|
|
|
@@ -411,7 +449,7 @@ async def _execute_task(
|
|
|
411
449
|
"task_id": task_id,
|
|
412
450
|
"type": "env_close_error",
|
|
413
451
|
"error": str(close_err),
|
|
414
|
-
"timestamp": datetime.
|
|
452
|
+
"timestamp": datetime.now().isoformat(),
|
|
415
453
|
}
|
|
416
454
|
)
|
|
417
455
|
|
|
@@ -453,6 +491,7 @@ async def run_job(
|
|
|
453
491
|
agent_cls: type[Agent],
|
|
454
492
|
task_or_taskset: Task | TaskSet,
|
|
455
493
|
job_name: str,
|
|
494
|
+
auto_reply_question: bool = False,
|
|
456
495
|
adapter_cls: type[Adapter] | None = None,
|
|
457
496
|
agent_kwargs: dict[str, Any] | None = None,
|
|
458
497
|
adapter_kwargs: dict[str, Any] | None = None,
|
|
@@ -461,8 +500,8 @@ async def run_job(
|
|
|
461
500
|
job_metadata: dict[str, Any] | None = None,
|
|
462
501
|
show_progress: bool = True,
|
|
463
502
|
# Concurrency control with semaphores
|
|
464
|
-
max_concurrent_env_creations: int | None = 30, # Limits
|
|
465
|
-
max_concurrent_agent_predictions: int | None =
|
|
503
|
+
max_concurrent_env_creations: int | None = 30, # Limits gym.make calls
|
|
504
|
+
max_concurrent_agent_predictions: int | None = None, # No limit on LLM calls
|
|
466
505
|
max_concurrent_tasks: int | None = 30, # Limits overall task concurrency
|
|
467
506
|
) -> Job:
|
|
468
507
|
"""
|
|
@@ -495,12 +534,14 @@ async def run_job(
|
|
|
495
534
|
Returns:
|
|
496
535
|
The created Job object with errors stored in job.errors.
|
|
497
536
|
"""
|
|
537
|
+
|
|
498
538
|
tasks_to_run: list[Task] = []
|
|
499
539
|
created_job: Job | None = None
|
|
500
540
|
|
|
501
541
|
evalset_id = None
|
|
502
542
|
if isinstance(task_or_taskset, TaskSet):
|
|
503
543
|
evalset_id = task_or_taskset.id
|
|
544
|
+
await task_or_taskset.fit(agent_cls)
|
|
504
545
|
|
|
505
546
|
gym_id = None
|
|
506
547
|
if isinstance(task_or_taskset, Task):
|
|
@@ -519,7 +560,7 @@ async def run_job(
|
|
|
519
560
|
evalset_id=evalset_id,
|
|
520
561
|
gym_id=gym_id,
|
|
521
562
|
)
|
|
522
|
-
logger.info("Created job with ID: %s", created_job.id)
|
|
563
|
+
# logger.info("Created job with ID: %s", created_job.id)
|
|
523
564
|
except Exception as e:
|
|
524
565
|
logger.exception("Failed to create job '%s': %s", job_name, e)
|
|
525
566
|
raise
|
|
@@ -555,6 +596,8 @@ async def run_job(
|
|
|
555
596
|
logger.info(
|
|
556
597
|
"Limiting concurrent agent predictions to %d.", max_concurrent_agent_predictions
|
|
557
598
|
)
|
|
599
|
+
else:
|
|
600
|
+
logger.info("No limit on concurrent agent predictions.")
|
|
558
601
|
|
|
559
602
|
task_execution_sema = None
|
|
560
603
|
effective_concurrency = num_tasks # Default to running all if parallel
|
|
@@ -606,6 +649,7 @@ async def run_job(
|
|
|
606
649
|
tracker=tracker,
|
|
607
650
|
env_creation_semaphore=env_creation_sema,
|
|
608
651
|
agent_predict_semaphore=agent_predict_sema,
|
|
652
|
+
auto_reply_question=auto_reply_question,
|
|
609
653
|
)
|
|
610
654
|
for task, task_id in zip(tasks_to_run, task_ids, strict=True)
|
|
611
655
|
]
|
|
@@ -641,6 +685,7 @@ async def run_job(
|
|
|
641
685
|
tracker=tracker,
|
|
642
686
|
env_creation_semaphore=env_creation_sema,
|
|
643
687
|
agent_predict_semaphore=agent_predict_sema,
|
|
688
|
+
auto_reply_question=auto_reply_question,
|
|
644
689
|
)
|
|
645
690
|
|
|
646
691
|
finally:
|
hud/server/requests.py
CHANGED
|
@@ -6,6 +6,7 @@ from __future__ import annotations
|
|
|
6
6
|
|
|
7
7
|
import asyncio
|
|
8
8
|
import logging
|
|
9
|
+
import ssl
|
|
9
10
|
import time
|
|
10
11
|
from typing import Any
|
|
11
12
|
|
|
@@ -20,7 +21,7 @@ from hud.exceptions import (
|
|
|
20
21
|
|
|
21
22
|
# Set up logger
|
|
22
23
|
logger = logging.getLogger("hud.http")
|
|
23
|
-
logger.setLevel(logging.
|
|
24
|
+
logger.setLevel(logging.INFO)
|
|
24
25
|
|
|
25
26
|
|
|
26
27
|
# Long running requests can take up to 10 minutes.
|
|
@@ -37,7 +38,7 @@ async def _handle_retry(
|
|
|
37
38
|
) -> None:
|
|
38
39
|
"""Helper function to handle retry logic and logging."""
|
|
39
40
|
retry_time = retry_delay * (2 ** (attempt - 1)) # Exponential backoff
|
|
40
|
-
logger.
|
|
41
|
+
logger.debug(
|
|
41
42
|
"%s from %s, retrying in %.2f seconds (attempt %d/%d)",
|
|
42
43
|
error_msg,
|
|
43
44
|
url,
|
|
@@ -140,6 +141,12 @@ async def make_request(
|
|
|
140
141
|
continue
|
|
141
142
|
else:
|
|
142
143
|
raise HudNetworkError(f"Network error: {e!s}") from None
|
|
144
|
+
except ssl.SSLError as e:
|
|
145
|
+
if attempt <= max_retries:
|
|
146
|
+
await _handle_retry(attempt, max_retries, retry_delay, url, f"SSL error: {e}")
|
|
147
|
+
continue
|
|
148
|
+
else:
|
|
149
|
+
raise HudNetworkError(f"SSL error: {e!s}") from None
|
|
143
150
|
except Exception as e:
|
|
144
151
|
raise HudRequestError(f"Unexpected error: {e!s}") from None
|
|
145
152
|
raise HudRequestError(f"Request failed after {max_retries} retries with unknown error")
|
|
@@ -201,7 +208,7 @@ def make_request_sync(
|
|
|
201
208
|
# Check if we got a retriable status code
|
|
202
209
|
if response.status_code in retry_status_codes and attempt <= max_retries:
|
|
203
210
|
retry_time = retry_delay * (2 ** (attempt - 1)) # Exponential backoff
|
|
204
|
-
logger.
|
|
211
|
+
logger.debug(
|
|
205
212
|
"Received status %d from %s, retrying in %.2f seconds (attempt %d/%d)",
|
|
206
213
|
response.status_code,
|
|
207
214
|
url,
|
|
@@ -222,7 +229,7 @@ def make_request_sync(
|
|
|
222
229
|
except httpx.RequestError as e:
|
|
223
230
|
if attempt <= max_retries:
|
|
224
231
|
retry_time = retry_delay * (2 ** (attempt - 1))
|
|
225
|
-
logger.
|
|
232
|
+
logger.debug(
|
|
226
233
|
"Network error %s from %s, retrying in %.2f seconds (attempt %d/%d)",
|
|
227
234
|
str(e),
|
|
228
235
|
url,
|
|
@@ -234,6 +241,21 @@ def make_request_sync(
|
|
|
234
241
|
continue
|
|
235
242
|
else:
|
|
236
243
|
raise HudNetworkError(f"Network error: {e!s}") from None
|
|
244
|
+
except ssl.SSLError as e:
|
|
245
|
+
if attempt <= max_retries:
|
|
246
|
+
retry_time = retry_delay * (2 ** (attempt - 1)) # Exponential backoff
|
|
247
|
+
logger.debug(
|
|
248
|
+
"SSL error %s from %s, retrying in %.2f seconds (attempt %d/%d)",
|
|
249
|
+
str(e),
|
|
250
|
+
url,
|
|
251
|
+
retry_time,
|
|
252
|
+
attempt,
|
|
253
|
+
max_retries,
|
|
254
|
+
)
|
|
255
|
+
time.sleep(retry_time)
|
|
256
|
+
continue
|
|
257
|
+
else:
|
|
258
|
+
raise HudNetworkError(f"SSL error: {e!s}") from None
|
|
237
259
|
except Exception as e:
|
|
238
260
|
raise HudRequestError(f"Unexpected error: {e!s}") from None
|
|
239
261
|
raise HudRequestError(f"Request failed after {max_retries} retries with unknown error")
|
hud/settings.py
CHANGED
|
@@ -15,7 +15,7 @@ class Settings(BaseSettings):
|
|
|
15
15
|
model_config = SettingsConfigDict(env_file=".env", env_file_encoding="utf-8", extra="allow")
|
|
16
16
|
|
|
17
17
|
base_url: str = Field(
|
|
18
|
-
default="https://
|
|
18
|
+
default="https://orchestration.hud.so/hud-gym/api",
|
|
19
19
|
description="Base URL for the HUD API",
|
|
20
20
|
validation_alias="base_url",
|
|
21
21
|
)
|
|
@@ -38,6 +38,12 @@ class Settings(BaseSettings):
|
|
|
38
38
|
validation_alias="OPENAI_API_KEY",
|
|
39
39
|
)
|
|
40
40
|
|
|
41
|
+
telemetry_enabled: bool = Field(
|
|
42
|
+
default=True,
|
|
43
|
+
description="Enable telemetry for the HUD SDK",
|
|
44
|
+
validation_alias="TELEMETRY_ENABLED",
|
|
45
|
+
)
|
|
46
|
+
|
|
41
47
|
|
|
42
48
|
# Create a singleton instance
|
|
43
49
|
settings = Settings()
|