hud-python 0.1.5__py3-none-any.whl → 0.2.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of hud-python might be problematic. Click here for more details.

Files changed (46) hide show
  1. hud/__init__.py +16 -12
  2. hud/adapters/__init__.py +4 -2
  3. hud/adapters/claude/adapter.py +0 -1
  4. hud/adapters/common/adapter.py +11 -10
  5. hud/adapters/common/types.py +27 -13
  6. hud/adapters/operator/__init__.py +5 -0
  7. hud/adapters/operator/adapter.py +93 -0
  8. hud/agent/__init__.py +7 -0
  9. hud/agent/base.py +109 -0
  10. hud/agent/claude.py +187 -0
  11. hud/agent/operator.py +190 -0
  12. hud/env/__init__.py +11 -0
  13. hud/env/client.py +35 -0
  14. hud/env/docker_client.py +306 -0
  15. hud/env/environment.py +181 -0
  16. hud/env/local_docker_client.py +249 -0
  17. hud/env/remote_client.py +185 -0
  18. hud/env/remote_docker_client.py +221 -0
  19. hud/evaluators/__init__.py +10 -0
  20. hud/evaluators/base.py +31 -0
  21. hud/evaluators/inspect.py +29 -0
  22. hud/evaluators/judge.py +213 -0
  23. hud/evaluators/match.py +163 -0
  24. hud/evaluators/remote.py +78 -0
  25. hud/gym.py +101 -15
  26. hud/job.py +185 -0
  27. hud/server/__init__.py +2 -2
  28. hud/server/requests.py +87 -0
  29. hud/settings.py +13 -2
  30. hud/task.py +133 -0
  31. hud/taskset.py +95 -0
  32. hud/trajectory.py +90 -0
  33. hud/types.py +65 -0
  34. hud/utils/__init__.py +4 -2
  35. hud/utils/common.py +69 -0
  36. hud/utils/config.py +182 -4
  37. hud/utils/telemetry.py +67 -0
  38. hud_python-0.2.0.dist-info/METADATA +188 -0
  39. hud_python-0.2.0.dist-info/RECORD +44 -0
  40. {hud_python-0.1.5.dist-info → hud_python-0.2.0.dist-info}/licenses/LICENSE +1 -1
  41. hud/client.py +0 -200
  42. hud/environment.py +0 -318
  43. hud/run.py +0 -208
  44. hud_python-0.1.5.dist-info/METADATA +0 -125
  45. hud_python-0.1.5.dist-info/RECORD +0 -21
  46. {hud_python-0.1.5.dist-info → hud_python-0.2.0.dist-info}/WHEEL +0 -0
hud/env/environment.py ADDED
@@ -0,0 +1,181 @@
1
+ """Base classes for environment implementations."""
2
+
3
+ from __future__ import annotations
4
+
5
+ import logging
6
+ from typing import TYPE_CHECKING, Any
7
+
8
+ from pydantic import BaseModel
9
+
10
+ from hud.env.client import Client
11
+ from hud.env.remote_client import RemoteClient
12
+ from hud.task import Task
13
+ from hud.utils import HudStyleConfigs, expand_config
14
+ from hud.utils.config import REMOTE_EVALUATE, REMOTE_SETUP, HudStyleConfig, create_remote_config
15
+
16
+ if TYPE_CHECKING:
17
+ from hud.adapters.common import CLA
18
+
19
+ logger = logging.getLogger("hud.environment")
20
+
21
+
22
+ class Observation(BaseModel):
23
+ """
24
+ Observation from the environment.
25
+
26
+ Attributes:
27
+ screenshot: Base64 encoded PNG string of the screen
28
+ text: Text observation, if available
29
+ """
30
+
31
+ screenshot: str | None = None # base64 string png
32
+ text: str | None = None
33
+
34
+
35
+ class Environment(BaseModel):
36
+ """
37
+ Environment base class that provides common functionality for all environment implementations.
38
+ This class uses the primitives provided by EnvClient to implement core environment operations.
39
+ """
40
+
41
+ metadata: dict[str, Any]
42
+ client: Client
43
+ url: str | None = None
44
+ live_url: str | None = None
45
+ # The task id to use for the environment reset
46
+ task: Task | None = None
47
+ build_data: dict[str, Any]
48
+
49
+ async def _invoke_all(self, configs: HudStyleConfigs) -> list[Any]:
50
+ # Execute each config and collect results
51
+ configs_all = [configs] if not isinstance(configs, list) else configs
52
+ results = []
53
+ for config in configs_all:
54
+ for expanded_config in expand_config(config):
55
+ result, stdout, stderr = await self.client.invoke(expanded_config)
56
+ results.append(result)
57
+ if stdout:
58
+ logger.info(
59
+ "%s produced stdout:\n%s",
60
+ expanded_config.function,
61
+ stdout.decode(),
62
+ )
63
+ if stderr:
64
+ logger.warning(
65
+ "%s produced stderr:\n%s",
66
+ expanded_config.function,
67
+ stderr.decode(),
68
+ )
69
+ return results
70
+
71
+ async def _setup(self, config: HudStyleConfigs | None = None) -> None:
72
+ """
73
+ Setup the environment.
74
+
75
+ Args:
76
+ config: The configuration to use for the setup
77
+ """
78
+ if isinstance(self.client, RemoteClient):
79
+ await self._invoke_all(create_remote_config(self.task, config, REMOTE_SETUP))
80
+ else:
81
+ if config is not None:
82
+ await self._invoke_all(config)
83
+ elif self.task and self.task.config is not None:
84
+ await self._invoke_all(self.task.config)
85
+ else:
86
+ raise ValueError("No config or task provided for local environment")
87
+
88
+ async def evaluate(self, config: HudStyleConfigs | None = None) -> Any:
89
+ """
90
+ Evaluate the environment.
91
+
92
+ Args:
93
+ config: The configuration to use for the evaluation
94
+
95
+ Returns:
96
+ Any: Result of the evaluation
97
+ """
98
+ if isinstance(self.client, RemoteClient):
99
+ results = await self._invoke_all(
100
+ create_remote_config(self.task, config, REMOTE_EVALUATE))
101
+ else:
102
+ if config is not None:
103
+ results = await self._invoke_all(config)
104
+ elif self.task and self.task.config is not None:
105
+ results = await self._invoke_all(self.task.config)
106
+ else:
107
+ raise ValueError("No config or task provided for local environment")
108
+ if len(results) == 1:
109
+ return results[0]
110
+ else:
111
+ return results
112
+
113
+
114
+ async def reset(self, configs: HudStyleConfigs | None = None) -> tuple[
115
+ Observation, dict[str, Any]
116
+ ]:
117
+ """
118
+ Reset the environment.
119
+
120
+ Args:
121
+ configs: The configuration to use for the reset
122
+
123
+ Returns:
124
+ Observation: The first observation from the environment
125
+ info: Dictionary of information about the environment
126
+ """
127
+ #await self._setup(configs)
128
+ obs, _, _, info = await self.step()
129
+ if self.task and self.task.prompt:
130
+ obs.text = self.task.prompt
131
+ return obs, info
132
+
133
+ async def step(self, actions: list[CLA] | None = None) -> tuple[
134
+ Observation, float, bool, dict[str, Any]
135
+ ]:
136
+ """Execute a step in the environment.
137
+
138
+ Args:
139
+ action: The action to execute
140
+
141
+ Returns:
142
+ Any: Result of the step execution
143
+ """
144
+ if actions is None or len(actions) == 0:
145
+ actions = []
146
+
147
+ result, stdout, stderr = await self.client.invoke(
148
+ HudStyleConfig(function="step", args=[[action.model_dump() for action in actions]])
149
+ )
150
+ if stdout:
151
+ logger.info("Step produced stdout: %s", stdout.decode())
152
+ if stderr:
153
+ logger.warning("Step produced stderr: %s", stderr.decode())
154
+
155
+
156
+ observation = Observation.model_validate(result["observation"], strict=True)
157
+
158
+ return observation, 0, False, {}
159
+
160
+ async def get_urls(self) -> dict[str, Any]:
161
+ """Get URLs for the environment.
162
+
163
+ Returns:
164
+ dict: Dictionary of URLs for accessing the environment
165
+ """
166
+ data, _, _ = await self.client.invoke(HudStyleConfig(function="get_urls", args=[]))
167
+
168
+ self.url = data.get("url")
169
+ self.live_url = data.get("live_url")
170
+
171
+ return {
172
+ "url": self.url,
173
+ "live_url": self.live_url,
174
+ }
175
+
176
+ async def close(self) -> None:
177
+ """Close the environment.
178
+
179
+ This should release any resources and clean up the environment.
180
+ """
181
+ await self.client.close()
@@ -0,0 +1,249 @@
1
+ from __future__ import annotations
2
+
3
+ import io
4
+ import logging
5
+ import tarfile
6
+ import tempfile
7
+ import uuid
8
+ from typing import TYPE_CHECKING, Any
9
+
10
+ import aiodocker
11
+ from aiohttp import ClientTimeout
12
+
13
+ from hud.env.docker_client import DockerClient, EnvironmentStatus
14
+ from hud.utils import ExecuteResult
15
+
16
+ if TYPE_CHECKING:
17
+ from aiodocker.containers import DockerContainer
18
+ from aiodocker.stream import Stream
19
+
20
+ logger = logging.getLogger("hud.env.docker_env_client")
21
+
22
+ class LocalDockerClient(DockerClient):
23
+ """
24
+ Docker-based environment client implementation.
25
+ """
26
+
27
+ @classmethod
28
+ async def create(cls, dockerfile: str, ports: list[int] | None = None) -> tuple[LocalDockerClient, dict[str, Any]]:
29
+ """
30
+ Creates a Docker environment client from a dockerfile.
31
+
32
+ Args:
33
+ dockerfile: The dockerfile content to build the Docker image
34
+
35
+ Returns:
36
+ DockerClient: An instance of the Docker environment client
37
+ """
38
+ # Create a unique image tag
39
+ image_tag = f"hud-env-{uuid.uuid4().hex[:8]}"
40
+
41
+ # Initialize Docker client
42
+ docker_client = aiodocker.Docker()
43
+
44
+ # Create fileobj for the Dockerfile
45
+ dockerfile_fileobj = io.BytesIO(dockerfile.encode("utf-8"))
46
+
47
+ if ports is None:
48
+ ports = []
49
+
50
+ # Create a tar file from the dockerfile
51
+ with tempfile.NamedTemporaryFile() as f:
52
+ with tarfile.open(mode="w:gz", fileobj=f) as t:
53
+ dfinfo = tarfile.TarInfo("Dockerfile")
54
+ dfinfo.size = len(dockerfile_fileobj.getvalue())
55
+ dockerfile_fileobj.seek(0)
56
+ t.addfile(dfinfo, dockerfile_fileobj)
57
+
58
+ # Reset the file pointer to the beginning of the file
59
+ f.seek(0)
60
+
61
+ # Build the image
62
+ build_stream = await docker_client.images.build(
63
+ fileobj=f,
64
+ encoding="gzip",
65
+ tag=image_tag,
66
+ rm=True,
67
+ pull=True,
68
+ forcerm=True,
69
+ )
70
+
71
+ # Print build output
72
+ output = ""
73
+ for chunk in build_stream:
74
+ if "stream" in chunk:
75
+ logger.info(chunk["stream"])
76
+ output += chunk["stream"]
77
+
78
+ # Create and start the container
79
+ container_config = {
80
+ "Image": image_tag,
81
+ "Tty": True,
82
+ "OpenStdin": True,
83
+ "Cmd": None,
84
+ "HostConfig": {
85
+ "PublishAllPorts": True,
86
+ },
87
+ "ExposedPorts": {
88
+ f"{port}/tcp": {} for port in ports
89
+ },
90
+ }
91
+
92
+ container = await docker_client.containers.create(config=container_config)
93
+ await container.start()
94
+
95
+ # Return the controller instance
96
+ return cls(docker_client, container.id), {"build_output": output}
97
+
98
+ def __init__(self, docker_conn: aiodocker.Docker, container_id: str) -> None:
99
+ """
100
+ Initialize the DockerClient.
101
+
102
+ Args:
103
+ docker_conn: Docker client connection
104
+ container_id: ID of the Docker container to control
105
+ """
106
+ super().__init__()
107
+
108
+ # Store container ID instead of container object
109
+ self._container_id = container_id
110
+
111
+ # Docker client will be initialized when needed
112
+ self._docker = docker_conn
113
+
114
+ @property
115
+ def container_id(self) -> str:
116
+ """Get the container ID."""
117
+ return self._container_id
118
+
119
+ @container_id.setter
120
+ def container_id(self, value: str) -> None:
121
+ """Set the container ID."""
122
+ self._container_id = value
123
+
124
+ async def _get_container(self) -> DockerContainer:
125
+ """Get the container object from aiodocker."""
126
+ return await self._docker.containers.get(self.container_id)
127
+
128
+ async def get_status(self) -> EnvironmentStatus:
129
+ """
130
+ Get the current status of the Docker environment.
131
+
132
+ Returns:
133
+ EnvironmentStatus: The current status of the environment
134
+ """
135
+ try:
136
+ container = await self._get_container()
137
+ container_data = await container.show()
138
+
139
+ # Check the container state
140
+ state = container_data.get("State", {})
141
+ status = state.get("Status", "").lower()
142
+
143
+ if status == "running":
144
+ return EnvironmentStatus.RUNNING
145
+ elif status == "created" or status == "starting":
146
+ return EnvironmentStatus.INITIALIZING
147
+ elif status in ["exited", "dead", "removing", "paused"]:
148
+ return EnvironmentStatus.COMPLETED
149
+ else:
150
+ # Any other state is considered an error
151
+ return EnvironmentStatus.ERROR
152
+
153
+ except Exception:
154
+ # If we can't connect to the container or there's any other error
155
+ return EnvironmentStatus.ERROR
156
+
157
+ async def execute(
158
+ self,
159
+ command: list[str],
160
+ *,
161
+ timeout: int | None = None,
162
+ ) -> ExecuteResult:
163
+ """
164
+ Execute a command in the container.
165
+
166
+ Args:
167
+ command: Command to execute
168
+ workdir: Working directory for the command
169
+
170
+ Returns:
171
+ ExecuteResult: Result of the command execution
172
+ """
173
+ container = await self._get_container()
174
+
175
+ exec_result = await container.exec(
176
+ cmd=command,
177
+ )
178
+ output: Stream = exec_result.start(timeout=ClientTimeout(timeout), detach=False)
179
+
180
+ stdout_data = bytearray()
181
+ stderr_data = bytearray()
182
+
183
+ while True:
184
+ message = await output.read_out()
185
+ if message is None:
186
+ break
187
+ if message.stream == 1: # stdout
188
+ stdout_data.extend(message.data)
189
+ elif message.stream == 2: # stderr
190
+ stderr_data.extend(message.data)
191
+
192
+ return ExecuteResult(
193
+ stdout=bytes(stdout_data),
194
+ stderr=bytes(stderr_data),
195
+ # TODO: Get the exit code from the output
196
+ exit_code=0,
197
+ )
198
+
199
+
200
+ async def get_archive(self, path: str) -> bytes:
201
+ """
202
+ Get an archive of a path from the container.
203
+
204
+ Args:
205
+ path: Path in the container to archive
206
+
207
+ Returns:
208
+ bytes: Tar archive containing the path contents
209
+ """
210
+ container = await self._get_container()
211
+
212
+ tarfile = await container.get_archive(path)
213
+ # we know tarfile has fileobj BytesIO
214
+ # read the tarfile into a bytes object
215
+ fileobj = tarfile.fileobj
216
+ if not isinstance(fileobj, io.BytesIO):
217
+ raise TypeError("fileobj is not a BytesIO object")
218
+ return fileobj.getvalue()
219
+
220
+ async def put_archive(self, path: str, data: bytes) -> None:
221
+ """
222
+ Put an archive of data at a path in the container.
223
+
224
+ Args:
225
+ path: Path in the container to extract the archive to
226
+ data: Bytes of the tar archive to extract
227
+
228
+ Returns:
229
+ bool: True if successful
230
+ """
231
+ container = await self._get_container()
232
+
233
+ # Convert bytes to a file-like object for aiodocker
234
+ file_obj = io.BytesIO(data)
235
+ await container.put_archive(path=path, data=file_obj)
236
+
237
+ async def close(self) -> None:
238
+ """
239
+ Close the Docker environment by stopping and removing the container.
240
+ """
241
+ try:
242
+ container = await self._get_container()
243
+ await container.stop()
244
+ await container.delete()
245
+ except Exception as e:
246
+ # Log the error but don't raise it since this is cleanup
247
+ logger.warning("Error during Docker container cleanup: %s", e)
248
+ finally:
249
+ await self._docker.close()
@@ -0,0 +1,185 @@
1
+ from __future__ import annotations
2
+
3
+ import logging
4
+ from base64 import b64decode
5
+ from typing import TYPE_CHECKING, Any
6
+
7
+ from hud.env.client import Client
8
+ from hud.server import make_request
9
+ from hud.settings import settings
10
+ from hud.types import EnvironmentStatus
11
+ from hud.utils import ExecuteResult
12
+
13
+ if TYPE_CHECKING:
14
+ from hud.utils.config import HudStyleConfig
15
+
16
+ logger = logging.getLogger("hud.env.remote_env_client")
17
+
18
+ class RemoteClient(Client):
19
+ """
20
+ Remote environment client implementation.
21
+
22
+ Uses the HUD API to manage a remote environment.
23
+ """
24
+
25
+ @classmethod
26
+ async def create(
27
+ cls,
28
+ *,
29
+ gym_id: str | None = None,
30
+ job_id: str | None = None,
31
+ task_id: str | None = None,
32
+ metadata: dict[str, Any] | None = None,
33
+ ) -> tuple[RemoteClient, dict[str, Any]]:
34
+ """
35
+ Creates a remote environment client from a dockerfile or gym_id.
36
+
37
+ Args:
38
+ dockerfile: The dockerfile content to build the environment
39
+ gym_id: The gym_id of the environment to create
40
+ metadata: Metadata to associate with the environment
41
+
42
+ Returns:
43
+ RemoteClient: An instance of the remote environment client
44
+ """
45
+
46
+ # Validate arguments
47
+ if metadata is None:
48
+ metadata = {}
49
+
50
+
51
+ request_data = {
52
+ # still named run_id for backwards compatibility
53
+ "run_id": job_id,
54
+ "metadata": metadata,
55
+ "gym_id": gym_id,
56
+ "task_id": task_id,
57
+ }
58
+
59
+ # Create a new environment via the HUD API
60
+ response = await make_request(
61
+ method="POST",
62
+ url=f"{settings.base_url}/v2/create_environment",
63
+ json=request_data,
64
+ api_key=settings.api_key,
65
+ )
66
+
67
+ # Get the environment ID from the response
68
+ env_id = response.get("id")
69
+ if not env_id:
70
+ raise ValueError("Failed to create remote environment: No ID returned")
71
+
72
+ # Create the controller instance
73
+ controller = cls(env_id)
74
+
75
+ build_data = response.get("metadata", {})
76
+
77
+ return controller, build_data
78
+
79
+ def __init__(self, env_id: str) -> None:
80
+ """
81
+ Initialize the RemoteClient.
82
+
83
+ Args:
84
+ env_id: ID of the remote environment to control
85
+ """
86
+ super().__init__()
87
+ self._env_id = env_id
88
+
89
+ @property
90
+ def env_id(self) -> str:
91
+ """The ID of the remote environment."""
92
+ return self._env_id
93
+
94
+ async def get_status(self) -> EnvironmentStatus:
95
+ """
96
+ Get the current status of the remote environment.
97
+
98
+ Returns:
99
+ EnvironmentStatus: The current status of the environment
100
+ """
101
+ try:
102
+ response = await make_request(
103
+ method="GET",
104
+ url=f"{settings.base_url}/v2/environments/{self.env_id}/state",
105
+ api_key=settings.api_key,
106
+ )
107
+ logger.debug("Environment status response: %s", response)
108
+
109
+ status = response.get("state", "").lower()
110
+
111
+ if status == "running":
112
+ return EnvironmentStatus.RUNNING
113
+ elif status == "initializing" or status == "pending":
114
+ return EnvironmentStatus.INITIALIZING
115
+ elif status == "completed" or status == "terminated":
116
+ return EnvironmentStatus.COMPLETED
117
+ else:
118
+ # Any other status is considered an error
119
+ logger.warning("Abnormal environment status response: %s", response)
120
+ return EnvironmentStatus.ERROR
121
+
122
+ except Exception:
123
+ # If we can't connect to the API or there's any other error
124
+ logger.info("(potentially transient) Error getting environment status")
125
+ return EnvironmentStatus.ERROR
126
+
127
+ async def execute(
128
+ self,
129
+ command: list[str],
130
+ *,
131
+ workdir: str | None = None,
132
+ timeout: float | None = None,
133
+ ) -> ExecuteResult:
134
+ """
135
+ Execute a command in the environment.
136
+ No-op in some environments (like browser use).
137
+
138
+ Args:
139
+ command: Command to execute
140
+ workdir: Working directory for the command (ignored for remote environments)
141
+
142
+ Returns:
143
+ ExecuteResult: Result of the command execution
144
+ """
145
+ data = await make_request(
146
+ method="POST",
147
+ url=f"{settings.base_url}/v2/environments/{self.env_id}/execute",
148
+ json={
149
+ "command": command,
150
+ "workdir": workdir,
151
+ "timeout": timeout,
152
+ },
153
+ api_key=settings.api_key,
154
+ )
155
+
156
+ return ExecuteResult(
157
+ stdout=b64decode(data["stdout"]),
158
+ stderr=b64decode(data["stderr"]),
159
+ exit_code=data["exit_code"]
160
+ )
161
+
162
+
163
+ async def invoke(self, config: HudStyleConfig) -> tuple[Any, bytes, bytes]:
164
+ """
165
+ Invoke a function in the environment.
166
+ """
167
+ data = await make_request(
168
+ method="POST",
169
+ url=f"{settings.base_url}/v2/environments/{self.env_id}/invoke",
170
+ json=config.model_dump(),
171
+ api_key=settings.api_key,
172
+ )
173
+
174
+ return data["result"], b64decode(data["stdout"]), b64decode(data["stderr"])
175
+
176
+
177
+ async def close(self) -> None:
178
+ """
179
+ Close the remote environment by making a request to the server.
180
+ """
181
+ await make_request(
182
+ method="POST",
183
+ url=f"{settings.base_url}/v2/environments/{self.env_id}/close",
184
+ api_key=settings.api_key,
185
+ )