hud-python 0.2.0__py3-none-any.whl → 0.2.2__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of hud-python might be problematic. Click here for more details.
- hud/__init__.py +3 -2
- hud/adapters/__init__.py +2 -1
- hud/adapters/claude/adapter.py +15 -2
- hud/adapters/common/types.py +7 -3
- hud/adapters/operator/adapter.py +10 -6
- hud/agent/__init__.py +2 -1
- hud/agent/claude.py +22 -2
- hud/agent/langchain.py +198 -0
- hud/agent/operator.py +35 -17
- hud/env/docker_client.py +1 -1
- hud/env/environment.py +182 -9
- hud/env/local_docker_client.py +3 -1
- hud/env/remote_client.py +4 -0
- hud/gym.py +3 -3
- hud/job.py +420 -12
- hud/task.py +41 -30
- hud/taskset.py +8 -0
- hud/types.py +5 -3
- hud/utils/common.py +31 -1
- hud/utils/config.py +2 -93
- hud/utils/progress.py +136 -0
- {hud_python-0.2.0.dist-info → hud_python-0.2.2.dist-info}/METADATA +52 -39
- hud_python-0.2.2.dist-info/RECORD +46 -0
- hud_python-0.2.0.dist-info/RECORD +0 -44
- {hud_python-0.2.0.dist-info → hud_python-0.2.2.dist-info}/WHEEL +0 -0
- {hud_python-0.2.0.dist-info → hud_python-0.2.2.dist-info}/licenses/LICENSE +0 -0
hud/env/environment.py
CHANGED
|
@@ -10,14 +10,13 @@ from pydantic import BaseModel
|
|
|
10
10
|
from hud.env.client import Client
|
|
11
11
|
from hud.env.remote_client import RemoteClient
|
|
12
12
|
from hud.task import Task
|
|
13
|
-
from hud.utils import
|
|
14
|
-
from hud.utils.config import REMOTE_EVALUATE,
|
|
15
|
-
|
|
16
|
-
if TYPE_CHECKING:
|
|
17
|
-
from hud.adapters.common import CLA
|
|
13
|
+
from hud.utils.common import HudStyleConfig, HudStyleConfigs
|
|
14
|
+
from hud.utils.config import REMOTE_EVALUATE, REMOTE_FUNCTION_PREFIX, REMOTE_SETUP, expand_config
|
|
18
15
|
|
|
19
16
|
logger = logging.getLogger("hud.environment")
|
|
20
17
|
|
|
18
|
+
if TYPE_CHECKING:
|
|
19
|
+
from hud.adapters.common import CLA
|
|
21
20
|
|
|
22
21
|
class Observation(BaseModel):
|
|
23
22
|
"""
|
|
@@ -46,6 +45,9 @@ class Environment(BaseModel):
|
|
|
46
45
|
task: Task | None = None
|
|
47
46
|
build_data: dict[str, Any]
|
|
48
47
|
|
|
48
|
+
# final response
|
|
49
|
+
final_response: str | None = None
|
|
50
|
+
|
|
49
51
|
async def _invoke_all(self, configs: HudStyleConfigs) -> list[Any]:
|
|
50
52
|
# Execute each config and collect results
|
|
51
53
|
configs_all = [configs] if not isinstance(configs, list) else configs
|
|
@@ -76,7 +78,7 @@ class Environment(BaseModel):
|
|
|
76
78
|
config: The configuration to use for the setup
|
|
77
79
|
"""
|
|
78
80
|
if isinstance(self.client, RemoteClient):
|
|
79
|
-
await self._invoke_all(create_remote_config(self
|
|
81
|
+
await self._invoke_all(create_remote_config(self, config, REMOTE_SETUP))
|
|
80
82
|
else:
|
|
81
83
|
if config is not None:
|
|
82
84
|
await self._invoke_all(config)
|
|
@@ -97,7 +99,7 @@ class Environment(BaseModel):
|
|
|
97
99
|
"""
|
|
98
100
|
if isinstance(self.client, RemoteClient):
|
|
99
101
|
results = await self._invoke_all(
|
|
100
|
-
create_remote_config(self
|
|
102
|
+
create_remote_config(self, config, REMOTE_EVALUATE))
|
|
101
103
|
else:
|
|
102
104
|
if config is not None:
|
|
103
105
|
results = await self._invoke_all(config)
|
|
@@ -143,9 +145,14 @@ class Environment(BaseModel):
|
|
|
143
145
|
"""
|
|
144
146
|
if actions is None or len(actions) == 0:
|
|
145
147
|
actions = []
|
|
146
|
-
|
|
148
|
+
args = [[action.model_dump() for action in actions]]
|
|
149
|
+
|
|
150
|
+
# TODO: Move this into the server side
|
|
151
|
+
if self._maybe_store_response(actions):
|
|
152
|
+
return Observation(text=self.final_response), 0, False, {}
|
|
153
|
+
|
|
147
154
|
result, stdout, stderr = await self.client.invoke(
|
|
148
|
-
HudStyleConfig(function="step", args=
|
|
155
|
+
HudStyleConfig(function="step", args=args)
|
|
149
156
|
)
|
|
150
157
|
if stdout:
|
|
151
158
|
logger.info("Step produced stdout: %s", stdout.decode())
|
|
@@ -156,6 +163,21 @@ class Environment(BaseModel):
|
|
|
156
163
|
observation = Observation.model_validate(result["observation"], strict=True)
|
|
157
164
|
|
|
158
165
|
return observation, 0, False, {}
|
|
166
|
+
|
|
167
|
+
def _maybe_store_response(self, actions: list[CLA]) -> bool:
|
|
168
|
+
"""Store the final response into the environment.
|
|
169
|
+
|
|
170
|
+
Args:
|
|
171
|
+
actions: The action(s) to check
|
|
172
|
+
|
|
173
|
+
Returns:
|
|
174
|
+
bool: True if the response was submitted, False otherwise
|
|
175
|
+
"""
|
|
176
|
+
if len(actions) > 0 and actions[-1].type == "response":
|
|
177
|
+
self.final_response = actions[-1].text
|
|
178
|
+
return True
|
|
179
|
+
return False
|
|
180
|
+
|
|
159
181
|
|
|
160
182
|
async def get_urls(self) -> dict[str, Any]:
|
|
161
183
|
"""Get URLs for the environment.
|
|
@@ -179,3 +201,154 @@ class Environment(BaseModel):
|
|
|
179
201
|
This should release any resources and clean up the environment.
|
|
180
202
|
"""
|
|
181
203
|
await self.client.close()
|
|
204
|
+
|
|
205
|
+
def create_remote_config(
|
|
206
|
+
env: Environment | None = None,
|
|
207
|
+
config: HudStyleConfigs | None = None,
|
|
208
|
+
function: str | None = None,
|
|
209
|
+
) -> list[HudStyleConfig]:
|
|
210
|
+
"""
|
|
211
|
+
Create a remote configuration for setup or evaluate, determining the final
|
|
212
|
+
function call structure based on the provided task or explicit config.
|
|
213
|
+
|
|
214
|
+
This function orchestrates how setup and evaluate steps defined in a Task
|
|
215
|
+
or passed directly are prepared for remote execution via `env._invoke_all`.
|
|
216
|
+
|
|
217
|
+
Args:
|
|
218
|
+
env: Environment object, potentially containing a task definition.
|
|
219
|
+
Used to access `env.task` and `env.final_response`.
|
|
220
|
+
config: Direct configuration override (e.g., passed to `env.evaluate(config=...)`).
|
|
221
|
+
Can be in various HudStyleConfigs formats.
|
|
222
|
+
function: The top-level function context, typically "setup" or "evaluate".
|
|
223
|
+
|
|
224
|
+
Returns:
|
|
225
|
+
list[HudStyleConfig]: A list containing a single HudStyleConfig object
|
|
226
|
+
ready for remote invocation via `client.invoke`.
|
|
227
|
+
The specific function/arguments are chosen based on this priority:
|
|
228
|
+
1. Explicit `config` parameter (if provided).
|
|
229
|
+
2. Specific `task` attribute (e.g., `task.evaluate`).
|
|
230
|
+
3. General `task.config` dictionary.
|
|
231
|
+
4. Default private function using `task.id`
|
|
232
|
+
(e.g., `private_evaluate(task.id)`).
|
|
233
|
+
5. Base `function` name with minimal/default arguments.
|
|
234
|
+
|
|
235
|
+
Logic & Examples (Assuming `function="evaluate"` for examples):
|
|
236
|
+
|
|
237
|
+
1) Explicit `config` provided: The `config` is expanded and becomes the `args`
|
|
238
|
+
for the top-level `function` call. If the environment has a final_response,
|
|
239
|
+
it's appended to these args.
|
|
240
|
+
- Example Input:
|
|
241
|
+
`env` (with `final_response="Paris"`)
|
|
242
|
+
`config=("contains_text", "Paris")`
|
|
243
|
+
`function="evaluate"`
|
|
244
|
+
- Example Output:
|
|
245
|
+
`[HudStyleConfig(function='evaluate', args=[
|
|
246
|
+
HudStyleConfig(function='contains_text', args=['Paris', 'Paris'])
|
|
247
|
+
])]`
|
|
248
|
+
|
|
249
|
+
2) No explicit `config`, Task has the attribute (e.g., `task.evaluate`):
|
|
250
|
+
The Task's attribute value (e.g., `task.evaluate`) is expanded and becomes the `args`
|
|
251
|
+
for the top-level `function` call. Task ID is added if present. `final_response` is
|
|
252
|
+
appended if present.
|
|
253
|
+
- Example Input:
|
|
254
|
+
`env` (`task=Task(id="t1", evaluate=("check_answer",), ...)`, `final_response="42"`)
|
|
255
|
+
`config=None`
|
|
256
|
+
`function="evaluate"`
|
|
257
|
+
- Example Output:
|
|
258
|
+
`[HudStyleConfig(function='evaluate', args=[HudStyleConfig(function='check_answer',
|
|
259
|
+
args=['42'], id='t1')])]`
|
|
260
|
+
|
|
261
|
+
3) No explicit `config`, no specific Task attribute, Task has `task.config`:
|
|
262
|
+
The `task.config` dictionary becomes the single argument for the top-level
|
|
263
|
+
`function` call. Task ID is added to the config dict if present. `final_response` is
|
|
264
|
+
appended if present.
|
|
265
|
+
- Example Input:
|
|
266
|
+
`env` (with `task=Task(id="t2", config={"expected": "val"}, ...)`)
|
|
267
|
+
`config=None`
|
|
268
|
+
`function="evaluate"`
|
|
269
|
+
- Example Output:
|
|
270
|
+
`[HudStyleConfig(function='evaluate', args=[{"expected": "val", "id": "t2"}])]`
|
|
271
|
+
|
|
272
|
+
4) No explicit `config`, no specific Task attribute, no `task.config`, Task has `task.id`:
|
|
273
|
+
Calls a private function (`private_<function>`) on the remote end, passing
|
|
274
|
+
the `task.id` as the only argument.
|
|
275
|
+
- Example Input:
|
|
276
|
+
`env` (with `task=Task(id="t3", ...)`)
|
|
277
|
+
`config=None`
|
|
278
|
+
`function="evaluate"`
|
|
279
|
+
- Example Output:
|
|
280
|
+
`[HudStyleConfig(function='private_evaluate', args=['t3'])]`
|
|
281
|
+
|
|
282
|
+
5) No explicit `config` and no relevant Task info:
|
|
283
|
+
Calls the top-level `function` with empty args.
|
|
284
|
+
- Example Input:
|
|
285
|
+
`env` (with `task=Task(...)`)
|
|
286
|
+
`config=None`
|
|
287
|
+
`function="evaluate"`
|
|
288
|
+
- Example Output:
|
|
289
|
+
`[HudStyleConfig(function='evaluate', args=[])]`
|
|
290
|
+
"""
|
|
291
|
+
# If no function provided, just expand the config and return it directly
|
|
292
|
+
if function is None:
|
|
293
|
+
if config:
|
|
294
|
+
return expand_config(config)
|
|
295
|
+
raise ValueError("Either function or config must be provided")
|
|
296
|
+
|
|
297
|
+
# Case 1: Explicit config provided
|
|
298
|
+
if config:
|
|
299
|
+
expanded_configs = expand_config(config)
|
|
300
|
+
if env and env.final_response:
|
|
301
|
+
# Ensure args is a list before appending
|
|
302
|
+
if not isinstance(expanded_configs[0].args, list):
|
|
303
|
+
expanded_configs[0].args = [expanded_configs[0].args]
|
|
304
|
+
expanded_configs[0].args.append(env.final_response) # for remote responses
|
|
305
|
+
return [HudStyleConfig(function=function, args=expanded_configs)]
|
|
306
|
+
|
|
307
|
+
# Otherwise, use the environment's task
|
|
308
|
+
task = env.task if env else None
|
|
309
|
+
|
|
310
|
+
# Must have a task for the remaining cases
|
|
311
|
+
if task is None:
|
|
312
|
+
raise ValueError("Either task or config must be provided")
|
|
313
|
+
|
|
314
|
+
# Case 2: Task has the specified function attribute
|
|
315
|
+
task_config = getattr(task, function, None)
|
|
316
|
+
if task_config:
|
|
317
|
+
expanded_configs = expand_config(task_config)
|
|
318
|
+
if task.id:
|
|
319
|
+
expanded_configs[0].id = task.id # for remote IDs
|
|
320
|
+
elif env and env.final_response:
|
|
321
|
+
# Ensure args is a list before appending
|
|
322
|
+
if not isinstance(expanded_configs[0].args, list):
|
|
323
|
+
expanded_configs[0].args = [expanded_configs[0].args]
|
|
324
|
+
expanded_configs[0].args.append(env.final_response) # for remote responses
|
|
325
|
+
return [HudStyleConfig(function=function, args=expanded_configs)]
|
|
326
|
+
|
|
327
|
+
# Case 3: Check for task.config
|
|
328
|
+
if hasattr(task, "config") and task.config:
|
|
329
|
+
# Ensure task.config is a dictionary before adding id
|
|
330
|
+
final_args = task.config.copy() if isinstance(task.config, dict) else {}
|
|
331
|
+
if task.id:
|
|
332
|
+
final_args["id"] = task.id # for remote IDs
|
|
333
|
+
if env and env.final_response:
|
|
334
|
+
# Append response, ensuring args exists and is a list
|
|
335
|
+
if "args" not in final_args:
|
|
336
|
+
final_args["args"] = []
|
|
337
|
+
if not isinstance(final_args["args"], list):
|
|
338
|
+
final_args["args"] = [final_args["args"]]
|
|
339
|
+
final_args["args"].append(env.final_response)
|
|
340
|
+
return [HudStyleConfig(function=function, args=[final_args])]
|
|
341
|
+
|
|
342
|
+
# Case 4: Use task.id
|
|
343
|
+
if task.id:
|
|
344
|
+
args_list = [task.id]
|
|
345
|
+
if env and env.final_response:
|
|
346
|
+
args_list.append(env.final_response) # Append final response
|
|
347
|
+
return [HudStyleConfig(function=f"{REMOTE_FUNCTION_PREFIX}{function}", args=args_list)]
|
|
348
|
+
|
|
349
|
+
# Case 5: No valid configuration found
|
|
350
|
+
args_list = []
|
|
351
|
+
if env and env.final_response:
|
|
352
|
+
args_list.append(env.final_response)
|
|
353
|
+
return [HudStyleConfig(function=function, args=args_list)]
|
|
354
|
+
|
hud/env/local_docker_client.py
CHANGED
|
@@ -25,7 +25,9 @@ class LocalDockerClient(DockerClient):
|
|
|
25
25
|
"""
|
|
26
26
|
|
|
27
27
|
@classmethod
|
|
28
|
-
async def create(cls, dockerfile: str, ports: list[int] | None = None) -> tuple[
|
|
28
|
+
async def create(cls, dockerfile: str, ports: list[int] | None = None) -> tuple[
|
|
29
|
+
LocalDockerClient, dict[str, Any]
|
|
30
|
+
]:
|
|
29
31
|
"""
|
|
30
32
|
Creates a Docker environment client from a dockerfile.
|
|
31
33
|
|
hud/env/remote_client.py
CHANGED
|
@@ -74,6 +74,10 @@ class RemoteClient(Client):
|
|
|
74
74
|
|
|
75
75
|
build_data = response.get("metadata", {})
|
|
76
76
|
|
|
77
|
+
if response.get("readme"):
|
|
78
|
+
logger.info("[HUD] %s gym created, see how to use it at %s", gym_id,
|
|
79
|
+
response.get("readme"))
|
|
80
|
+
|
|
77
81
|
return controller, build_data
|
|
78
82
|
|
|
79
83
|
def __init__(self, env_id: str) -> None:
|
hud/gym.py
CHANGED
|
@@ -8,12 +8,12 @@ from hud.env.environment import Environment
|
|
|
8
8
|
from hud.env.local_docker_client import LocalDockerClient
|
|
9
9
|
from hud.env.remote_client import RemoteClient
|
|
10
10
|
from hud.env.remote_docker_client import RemoteDockerClient
|
|
11
|
-
from hud.task import Task
|
|
12
11
|
from hud.types import CustomGym, Gym
|
|
13
12
|
from hud.utils.common import get_gym_id
|
|
14
13
|
|
|
15
14
|
if TYPE_CHECKING:
|
|
16
15
|
from hud.job import Job
|
|
16
|
+
from hud.task import Task
|
|
17
17
|
|
|
18
18
|
logger = logging.getLogger("hud.gym")
|
|
19
19
|
|
|
@@ -54,9 +54,9 @@ async def make(
|
|
|
54
54
|
|
|
55
55
|
gym = None
|
|
56
56
|
task = None
|
|
57
|
-
if isinstance(env_src,
|
|
57
|
+
if isinstance(env_src, str | CustomGym):
|
|
58
58
|
gym = env_src
|
|
59
|
-
|
|
59
|
+
else:
|
|
60
60
|
gym = env_src.gym
|
|
61
61
|
task = env_src
|
|
62
62
|
|