hud-python 0.2.2__py3-none-any.whl → 0.2.4__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of hud-python might be problematic. Click here for more details.

Files changed (58) hide show
  1. hud/__init__.py +4 -3
  2. hud/adapters/claude/adapter.py +5 -14
  3. hud/adapters/common/adapter.py +3 -3
  4. hud/adapters/common/tests/__init__.py +0 -0
  5. hud/adapters/common/tests/test_adapter.py +277 -0
  6. hud/adapters/common/types.py +3 -3
  7. hud/adapters/operator/adapter.py +16 -23
  8. hud/agent/__init__.py +8 -1
  9. hud/agent/base.py +28 -28
  10. hud/agent/claude.py +69 -60
  11. hud/agent/langchain.py +32 -26
  12. hud/agent/operator.py +75 -67
  13. hud/env/__init__.py +5 -5
  14. hud/env/client.py +2 -2
  15. hud/env/docker_client.py +37 -39
  16. hud/env/environment.py +91 -66
  17. hud/env/local_docker_client.py +5 -7
  18. hud/env/remote_client.py +39 -32
  19. hud/env/remote_docker_client.py +13 -3
  20. hud/evaluators/__init__.py +2 -3
  21. hud/evaluators/base.py +4 -3
  22. hud/evaluators/inspect.py +3 -8
  23. hud/evaluators/judge.py +34 -58
  24. hud/evaluators/match.py +42 -49
  25. hud/evaluators/remote.py +13 -26
  26. hud/evaluators/tests/__init__.py +0 -0
  27. hud/evaluators/tests/test_inspect.py +12 -0
  28. hud/evaluators/tests/test_judge.py +231 -0
  29. hud/evaluators/tests/test_match.py +115 -0
  30. hud/evaluators/tests/test_remote.py +98 -0
  31. hud/exceptions.py +167 -0
  32. hud/gym.py +9 -7
  33. hud/job.py +179 -109
  34. hud/server/__init__.py +2 -2
  35. hud/server/requests.py +148 -186
  36. hud/server/tests/__init__.py +0 -0
  37. hud/server/tests/test_requests.py +275 -0
  38. hud/settings.py +3 -2
  39. hud/task.py +9 -19
  40. hud/taskset.py +44 -11
  41. hud/trajectory.py +6 -9
  42. hud/types.py +12 -9
  43. hud/utils/__init__.py +2 -2
  44. hud/utils/common.py +36 -15
  45. hud/utils/config.py +45 -30
  46. hud/utils/progress.py +34 -21
  47. hud/utils/telemetry.py +10 -11
  48. hud/utils/tests/__init__.py +0 -0
  49. hud/utils/tests/test_common.py +52 -0
  50. hud/utils/tests/test_config.py +129 -0
  51. hud/utils/tests/test_progress.py +225 -0
  52. hud/utils/tests/test_telemetry.py +37 -0
  53. hud/utils/tests/test_version.py +8 -0
  54. {hud_python-0.2.2.dist-info → hud_python-0.2.4.dist-info}/METADATA +9 -6
  55. hud_python-0.2.4.dist-info/RECORD +62 -0
  56. hud_python-0.2.2.dist-info/RECORD +0 -46
  57. {hud_python-0.2.2.dist-info → hud_python-0.2.4.dist-info}/WHEEL +0 -0
  58. {hud_python-0.2.2.dist-info → hud_python-0.2.4.dist-info}/licenses/LICENSE +0 -0
hud/env/__init__.py CHANGED
@@ -3,9 +3,9 @@ from __future__ import annotations
3
3
  from . import docker_client, environment, local_docker_client, remote_client, remote_docker_client
4
4
 
5
5
  __all__ = [
6
- "docker_client",
7
- "environment",
8
- "local_docker_client",
9
- "remote_client",
10
- "remote_docker_client",
6
+ "docker_client",
7
+ "environment",
8
+ "local_docker_client",
9
+ "remote_client",
10
+ "remote_docker_client",
11
11
  ]
hud/env/client.py CHANGED
@@ -7,7 +7,7 @@ from pydantic import BaseModel
7
7
 
8
8
  if TYPE_CHECKING:
9
9
  from hud.types import EnvironmentStatus
10
- from hud.utils.config import HudStyleConfig
10
+ from hud.utils.config import FunctionConfig
11
11
 
12
12
 
13
13
  class Client(BaseModel, ABC):
@@ -16,7 +16,7 @@ class Client(BaseModel, ABC):
16
16
  """
17
17
 
18
18
  @abstractmethod
19
- async def invoke(self, config: HudStyleConfig) -> Any:
19
+ async def invoke(self, config: FunctionConfig) -> Any:
20
20
  """
21
21
  Invoke the environment with the given config.
22
22
  """
hud/env/docker_client.py CHANGED
@@ -16,7 +16,7 @@ from hud.utils.common import directory_to_tar_bytes
16
16
 
17
17
  if TYPE_CHECKING:
18
18
  from hud.utils import ExecuteResult
19
- from hud.utils.config import HudStyleConfig
19
+ from hud.utils.config import FunctionConfig
20
20
 
21
21
  logger = logging.getLogger("hud.env.docker_client")
22
22
 
@@ -33,7 +33,7 @@ class InvokeError(Exception):
33
33
  """
34
34
 
35
35
 
36
- def invoke_template(config: HudStyleConfig, package_name: str, divider: str) -> str:
36
+ def invoke_template(config: FunctionConfig, package_name: str, divider: str) -> str:
37
37
  """
38
38
  Return a python script to run the given config.
39
39
  """
@@ -51,16 +51,17 @@ print("{divider}")
51
51
  print(result_str)
52
52
  """
53
53
 
54
+
54
55
  class DockerClient(Client):
55
56
  """
56
57
  Base class for environment clients.
57
-
58
+
58
59
  Handles updating the environment when local files change.
59
60
  """
60
-
61
+
61
62
  _last_pyproject_toml_str: str | None = None
62
63
  _last_update_time: int = 0
63
- _last_file_mtimes: dict[str, float] = {} # noqa: RUF012
64
+ _last_file_mtimes: dict[str, float] = {} # noqa: RUF012 - Not recognized as Pydantic model
64
65
  _source_path: Path | None = None
65
66
  _package_name: str | None = None
66
67
 
@@ -68,47 +69,46 @@ class DockerClient(Client):
68
69
  def source_path(self) -> Path | None:
69
70
  """Get the source path."""
70
71
  return self._source_path
71
-
72
+
72
73
  @property
73
74
  def package_name(self) -> str:
74
75
  """Get the package name."""
75
76
  if not self._package_name:
76
77
  raise ValueError("Package name not set")
77
78
  return self._package_name
78
-
79
79
 
80
80
  def set_source_path(self, source_path: Path) -> None:
81
81
  """
82
82
  Set the source path for this environment controller.
83
83
  Can only be set once, and cannot be set if source_path is already set.
84
-
84
+
85
85
  Args:
86
86
  source_path: Path to the source code to use in the environment
87
-
87
+
88
88
  Raises:
89
89
  ValueError: If source_path has already been set
90
90
  """
91
91
  if self._source_path:
92
92
  raise ValueError("Source path has already been set")
93
-
93
+
94
94
  # Validate source path
95
95
  if not source_path.exists():
96
96
  raise FileNotFoundError(f"Source path {source_path} does not exist")
97
97
  if not source_path.is_dir():
98
98
  raise NotADirectoryError(f"Source path {source_path} is not a directory")
99
-
99
+
100
100
  # Parse pyproject.toml to get package name
101
101
  pyproject_path = source_path / "pyproject.toml"
102
102
  if not pyproject_path.exists():
103
103
  raise FileNotFoundError(f"pyproject.toml not found in {source_path}")
104
-
104
+
105
105
  pyproject_data = toml.load(pyproject_path)
106
106
  self._package_name = pyproject_data.get("project", {}).get("name")
107
107
  if not self._package_name:
108
108
  raise ValueError("Could not find package name in pyproject.toml")
109
-
109
+
110
110
  self._source_path = source_path
111
-
111
+
112
112
  @classmethod
113
113
  @abc.abstractmethod
114
114
  async def create(cls, dockerfile: str) -> DockerClient:
@@ -121,26 +121,26 @@ class DockerClient(Client):
121
121
  Returns:
122
122
  EnvClient: An instance of the environment client
123
123
  """
124
-
124
+
125
125
  @abc.abstractmethod
126
126
  async def get_status(self) -> EnvironmentStatus:
127
127
  """
128
128
  Get the current status of the environment.
129
-
129
+
130
130
  Returns:
131
131
  EnvironmentStatus: A status enum indicating the current state of the environment
132
132
  """
133
-
133
+
134
134
  def _get_all_file_mtimes(self) -> dict[str, float]:
135
135
  """
136
136
  Get modification times for all files in the source path.
137
-
137
+
138
138
  Returns:
139
139
  Dict[str, float]: Dictionary mapping file paths to modification times
140
140
  """
141
141
  if not self._source_path:
142
142
  return {}
143
-
143
+
144
144
  file_mtimes = {}
145
145
  for root, _, files in os.walk(self._source_path):
146
146
  for file in files:
@@ -151,12 +151,12 @@ class DockerClient(Client):
151
151
  # Skip files that can't be accessed
152
152
  continue
153
153
  return file_mtimes
154
-
154
+
155
155
  async def needs_update(self) -> bool:
156
156
  """
157
157
  Check if the environment needs an update by:
158
158
  1. Checking if any file has been modified since the last update
159
-
159
+
160
160
  Returns:
161
161
  bool: True if the environment needs an update, False otherwise.
162
162
  """
@@ -166,18 +166,18 @@ class DockerClient(Client):
166
166
 
167
167
  # Check if any file has been modified since the last update
168
168
  current_mtimes = self._get_all_file_mtimes()
169
-
169
+
170
170
  # If we don't have previous modification times, we need an update
171
171
  if not self._last_file_mtimes:
172
172
  return True
173
-
173
+
174
174
  # Check for new or modified files
175
175
  for file_path, mtime in current_mtimes.items():
176
176
  if file_path not in self._last_file_mtimes or mtime > self._last_file_mtimes[file_path]:
177
177
  return True
178
-
178
+
179
179
  return False
180
-
180
+
181
181
  async def update(self) -> None:
182
182
  """
183
183
  Base update method for environment controllers.
@@ -186,22 +186,22 @@ class DockerClient(Client):
186
186
  # If no source path, nothing to update
187
187
  if not self._source_path:
188
188
  return
189
-
189
+
190
190
  logger.info("Updating environment")
191
191
 
192
192
  # Save current file modification times
193
193
  self._last_file_mtimes = self._get_all_file_mtimes()
194
-
194
+
195
195
  # Create tar archive of the source code and send it to the container
196
196
  tar_bytes = directory_to_tar_bytes(self._source_path)
197
197
  await self.execute(["mkdir", "-p", "/root/controller"], timeout=5)
198
198
  await self.put_archive("/root/controller", tar_bytes)
199
-
199
+
200
200
  # Check if pyproject.toml exists and parse it
201
201
  pyproject_path = self._source_path / "pyproject.toml"
202
202
  if not pyproject_path.exists():
203
203
  raise FileNotFoundError(f"pyproject.toml not found in {self._source_path}")
204
-
204
+
205
205
  # Read and parse the current content of pyproject.toml
206
206
  current_pyproject_content = pyproject_path.read_text()
207
207
  if (
@@ -224,8 +224,7 @@ class DockerClient(Client):
224
224
  logger.warning("STDERR:\n%s", result["stderr"])
225
225
  # Save current pyproject.toml content
226
226
  self._last_pyproject_toml_str = current_pyproject_content
227
-
228
-
227
+
229
228
  @abc.abstractmethod
230
229
  async def execute(
231
230
  self,
@@ -235,20 +234,20 @@ class DockerClient(Client):
235
234
  ) -> ExecuteResult:
236
235
  """
237
236
  Execute a command in the environment. May not be supported by all environments.
238
-
237
+
239
238
  Args:
240
239
  command: The command to execute
241
240
  workdir: The working directory to execute the command in
242
241
  timeout: The timeout for the command
243
-
242
+
244
243
  Returns:
245
244
  ExecuteResult: The result of the command
246
245
  """
247
-
248
- async def invoke(self, config: HudStyleConfig) -> tuple[Any, bytes, bytes]:
246
+
247
+ async def invoke(self, config: FunctionConfig) -> tuple[Any, bytes, bytes]:
249
248
  """
250
249
  Invoke a function in the environment. Supported by all environments.
251
-
250
+
252
251
  Args:
253
252
  config: The configuration to invoke
254
253
 
@@ -289,11 +288,11 @@ class DockerClient(Client):
289
288
  May not be supported by all environments. (notably browser environments)
290
289
  Args:
291
290
  path: The path to get the archive of
292
-
291
+
293
292
  Returns:
294
293
  bytes: The archive of the path
295
294
  """
296
-
295
+
297
296
  @abc.abstractmethod
298
297
  async def put_archive(self, path: str, data: bytes) -> bool:
299
298
  """
@@ -303,4 +302,3 @@ class DockerClient(Client):
303
302
  path: The path to put the archive at
304
303
  data: The data to put in the archive
305
304
  """
306
-
hud/env/environment.py CHANGED
@@ -10,25 +10,21 @@ from pydantic import BaseModel
10
10
  from hud.env.client import Client
11
11
  from hud.env.remote_client import RemoteClient
12
12
  from hud.task import Task
13
- from hud.utils.common import HudStyleConfig, HudStyleConfigs
14
- from hud.utils.config import REMOTE_EVALUATE, REMOTE_FUNCTION_PREFIX, REMOTE_SETUP, expand_config
13
+ from hud.utils.common import FunctionConfig, FunctionConfigs, Observation
14
+ from hud.utils.config import (
15
+ LOCAL_EVALUATORS,
16
+ REMOTE_EVALUATE,
17
+ REMOTE_FUNCTION_PREFIX,
18
+ REMOTE_SETUP,
19
+ expand_config,
20
+ )
21
+ from hud.utils.telemetry import stream
15
22
 
16
23
  logger = logging.getLogger("hud.environment")
17
24
 
18
25
  if TYPE_CHECKING:
19
26
  from hud.adapters.common import CLA
20
-
21
- class Observation(BaseModel):
22
- """
23
- Observation from the environment.
24
-
25
- Attributes:
26
- screenshot: Base64 encoded PNG string of the screen
27
- text: Text observation, if available
28
- """
29
-
30
- screenshot: str | None = None # base64 string png
31
- text: str | None = None
27
+ from hud.agent import Agent
32
28
 
33
29
 
34
30
  class Environment(BaseModel):
@@ -48,7 +44,7 @@ class Environment(BaseModel):
48
44
  # final response
49
45
  final_response: str | None = None
50
46
 
51
- async def _invoke_all(self, configs: HudStyleConfigs) -> list[Any]:
47
+ async def _invoke_all(self, configs: FunctionConfigs) -> list[Any]:
52
48
  # Execute each config and collect results
53
49
  configs_all = [configs] if not isinstance(configs, list) else configs
54
50
  results = []
@@ -69,8 +65,8 @@ class Environment(BaseModel):
69
65
  stderr.decode(),
70
66
  )
71
67
  return results
72
-
73
- async def _setup(self, config: HudStyleConfigs | None = None) -> None:
68
+
69
+ async def _setup(self, config: FunctionConfigs | None = None) -> None:
74
70
  """
75
71
  Setup the environment.
76
72
 
@@ -87,7 +83,7 @@ class Environment(BaseModel):
87
83
  else:
88
84
  raise ValueError("No config or task provided for local environment")
89
85
 
90
- async def evaluate(self, config: HudStyleConfigs | None = None) -> Any:
86
+ async def evaluate(self, config: FunctionConfigs | None = None) -> Any:
91
87
  """
92
88
  Evaluate the environment.
93
89
 
@@ -98,8 +94,7 @@ class Environment(BaseModel):
98
94
  Any: Result of the evaluation
99
95
  """
100
96
  if isinstance(self.client, RemoteClient):
101
- results = await self._invoke_all(
102
- create_remote_config(self, config, REMOTE_EVALUATE))
97
+ results = await self._invoke_all(create_remote_config(self, config, REMOTE_EVALUATE))
103
98
  else:
104
99
  if config is not None:
105
100
  results = await self._invoke_all(config)
@@ -111,11 +106,10 @@ class Environment(BaseModel):
111
106
  return results[0]
112
107
  else:
113
108
  return results
114
-
115
109
 
116
- async def reset(self, configs: HudStyleConfigs | None = None) -> tuple[
117
- Observation, dict[str, Any]
118
- ]:
110
+ async def reset(
111
+ self, configs: FunctionConfigs | None = None
112
+ ) -> tuple[Observation, dict[str, Any]]:
119
113
  """
120
114
  Reset the environment.
121
115
 
@@ -126,15 +120,15 @@ class Environment(BaseModel):
126
120
  Observation: The first observation from the environment
127
121
  info: Dictionary of information about the environment
128
122
  """
129
- #await self._setup(configs)
123
+ # await self._setup(configs)
130
124
  obs, _, _, info = await self.step()
131
125
  if self.task and self.task.prompt:
132
126
  obs.text = self.task.prompt
133
127
  return obs, info
134
128
 
135
- async def step(self, actions: list[CLA] | None = None) -> tuple[
136
- Observation, float, bool, dict[str, Any]
137
- ]:
129
+ async def step(
130
+ self, actions: CLA | list[CLA] | None = None
131
+ ) -> tuple[Observation, float, bool, dict[str, Any]]:
138
132
  """Execute a step in the environment.
139
133
 
140
134
  Args:
@@ -143,6 +137,8 @@ class Environment(BaseModel):
143
137
  Returns:
144
138
  Any: Result of the step execution
145
139
  """
140
+ if not isinstance(actions, list) and actions is not None:
141
+ actions = [actions]
146
142
  if actions is None or len(actions) == 0:
147
143
  actions = []
148
144
  args = [[action.model_dump() for action in actions]]
@@ -150,20 +146,19 @@ class Environment(BaseModel):
150
146
  # TODO: Move this into the server side
151
147
  if self._maybe_store_response(actions):
152
148
  return Observation(text=self.final_response), 0, False, {}
153
-
149
+
154
150
  result, stdout, stderr = await self.client.invoke(
155
- HudStyleConfig(function="step", args=args)
151
+ FunctionConfig(function="step", args=args)
156
152
  )
157
153
  if stdout:
158
154
  logger.info("Step produced stdout: %s", stdout.decode())
159
155
  if stderr:
160
156
  logger.warning("Step produced stderr: %s", stderr.decode())
161
157
 
162
-
163
158
  observation = Observation.model_validate(result["observation"], strict=True)
164
159
 
165
160
  return observation, 0, False, {}
166
-
161
+
167
162
  def _maybe_store_response(self, actions: list[CLA]) -> bool:
168
163
  """Store the final response into the environment.
169
164
 
@@ -178,14 +173,13 @@ class Environment(BaseModel):
178
173
  return True
179
174
  return False
180
175
 
181
-
182
176
  async def get_urls(self) -> dict[str, Any]:
183
177
  """Get URLs for the environment.
184
178
 
185
179
  Returns:
186
180
  dict: Dictionary of URLs for accessing the environment
187
181
  """
188
- data, _, _ = await self.client.invoke(HudStyleConfig(function="get_urls", args=[]))
182
+ data, _, _ = await self.client.invoke(FunctionConfig(function="get_urls", args=[]))
189
183
 
190
184
  self.url = data.get("url")
191
185
  self.live_url = data.get("live_url")
@@ -202,11 +196,43 @@ class Environment(BaseModel):
202
196
  """
203
197
  await self.client.close()
204
198
 
199
+ async def stream(self) -> str | None:
200
+ urls = await self.get_urls()
201
+ if urls["live_url"] is None:
202
+ logger.warning("No live URL found")
203
+ return None
204
+ # Stream the live view
205
+ return stream(urls["live_url"])
206
+
207
+ async def run(self, agent: Agent, max_steps: int = 27, verbose: bool = True) -> Any:
208
+ """Run an agent in the environment.
209
+
210
+ Args:
211
+ agent: The agent to run
212
+ """
213
+ if verbose:
214
+ logger.info("[HUD] Running agent in environment...")
215
+ obs, _ = await self.reset()
216
+ for i in range(max_steps):
217
+ action, done = await agent.predict(obs)
218
+ if verbose:
219
+ logger.info("[HUD] Step %d: Action: %s", i, action)
220
+ obs, reward, terminated, info = await self.step(action)
221
+ if verbose:
222
+ logger.info("[HUD] Step %d: Observation: %s", i, obs)
223
+ if done or terminated:
224
+ break
225
+ result = await self.evaluate()
226
+ if verbose:
227
+ logger.info("[HUD] Evaluation result: %s", result)
228
+ return result
229
+
230
+
205
231
  def create_remote_config(
206
232
  env: Environment | None = None,
207
- config: HudStyleConfigs | None = None,
233
+ config: FunctionConfigs | None = None,
208
234
  function: str | None = None,
209
- ) -> list[HudStyleConfig]:
235
+ ) -> list[FunctionConfig]:
210
236
  """
211
237
  Create a remote configuration for setup or evaluate, determining the final
212
238
  function call structure based on the provided task or explicit config.
@@ -218,11 +244,11 @@ def create_remote_config(
218
244
  env: Environment object, potentially containing a task definition.
219
245
  Used to access `env.task` and `env.final_response`.
220
246
  config: Direct configuration override (e.g., passed to `env.evaluate(config=...)`).
221
- Can be in various HudStyleConfigs formats.
247
+ Can be in various FunctionConfigs formats.
222
248
  function: The top-level function context, typically "setup" or "evaluate".
223
249
 
224
250
  Returns:
225
- list[HudStyleConfig]: A list containing a single HudStyleConfig object
251
+ list[FunctionConfig]: A list containing a single FunctionConfig object
226
252
  ready for remote invocation via `client.invoke`.
227
253
  The specific function/arguments are chosen based on this priority:
228
254
  1. Explicit `config` parameter (if provided).
@@ -242,8 +268,8 @@ def create_remote_config(
242
268
  `config=("contains_text", "Paris")`
243
269
  `function="evaluate"`
244
270
  - Example Output:
245
- `[HudStyleConfig(function='evaluate', args=[
246
- HudStyleConfig(function='contains_text', args=['Paris', 'Paris'])
271
+ `[FunctionConfig(function='evaluate', args=[
272
+ FunctionConfig(function='contains_text', args=['Paris', 'Paris'])
247
273
  ])]`
248
274
 
249
275
  2) No explicit `config`, Task has the attribute (e.g., `task.evaluate`):
@@ -255,7 +281,7 @@ def create_remote_config(
255
281
  `config=None`
256
282
  `function="evaluate"`
257
283
  - Example Output:
258
- `[HudStyleConfig(function='evaluate', args=[HudStyleConfig(function='check_answer',
284
+ `[FunctionConfig(function='evaluate', args=[FunctionConfig(function='check_answer',
259
285
  args=['42'], id='t1')])]`
260
286
 
261
287
  3) No explicit `config`, no specific Task attribute, Task has `task.config`:
@@ -267,7 +293,7 @@ def create_remote_config(
267
293
  `config=None`
268
294
  `function="evaluate"`
269
295
  - Example Output:
270
- `[HudStyleConfig(function='evaluate', args=[{"expected": "val", "id": "t2"}])]`
296
+ `[FunctionConfig(function='evaluate', args=[{"expected": "val", "id": "t2"}])]`
271
297
 
272
298
  4) No explicit `config`, no specific Task attribute, no `task.config`, Task has `task.id`:
273
299
  Calls a private function (`private_<function>`) on the remote end, passing
@@ -277,7 +303,7 @@ def create_remote_config(
277
303
  `config=None`
278
304
  `function="evaluate"`
279
305
  - Example Output:
280
- `[HudStyleConfig(function='private_evaluate', args=['t3'])]`
306
+ `[FunctionConfig(function='private_evaluate', args=['t3'])]`
281
307
 
282
308
  5) No explicit `config` and no relevant Task info:
283
309
  Calls the top-level `function` with empty args.
@@ -286,50 +312,50 @@ def create_remote_config(
286
312
  `config=None`
287
313
  `function="evaluate"`
288
314
  - Example Output:
289
- `[HudStyleConfig(function='evaluate', args=[])]`
315
+ `[FunctionConfig(function='evaluate', args=[])]`
290
316
  """
291
317
  # If no function provided, just expand the config and return it directly
292
318
  if function is None:
293
319
  if config:
294
320
  return expand_config(config)
295
321
  raise ValueError("Either function or config must be provided")
296
-
322
+
297
323
  # Case 1: Explicit config provided
298
324
  if config:
299
325
  expanded_configs = expand_config(config)
300
- if env and env.final_response:
326
+ if env and env.final_response and expanded_configs[0].args[0] in LOCAL_EVALUATORS:
301
327
  # Ensure args is a list before appending
302
328
  if not isinstance(expanded_configs[0].args, list):
303
- expanded_configs[0].args = [expanded_configs[0].args]
304
- expanded_configs[0].args.append(env.final_response) # for remote responses
305
- return [HudStyleConfig(function=function, args=expanded_configs)]
306
-
329
+ expanded_configs[0].args = [expanded_configs[0].args]
330
+ expanded_configs[0].args.append(env.final_response) # for remote responses
331
+ return [FunctionConfig(function=function, args=expanded_configs)]
332
+
307
333
  # Otherwise, use the environment's task
308
334
  task = env.task if env else None
309
-
335
+
310
336
  # Must have a task for the remaining cases
311
337
  if task is None:
312
338
  raise ValueError("Either task or config must be provided")
313
-
339
+
314
340
  # Case 2: Task has the specified function attribute
315
341
  task_config = getattr(task, function, None)
316
342
  if task_config:
317
343
  expanded_configs = expand_config(task_config)
318
344
  if task.id:
319
- expanded_configs[0].id = task.id # for remote IDs
320
- elif env and env.final_response:
345
+ expanded_configs[0].id = task.id # for remote IDs
346
+ if env and env.final_response and expanded_configs[0].function in LOCAL_EVALUATORS:
321
347
  # Ensure args is a list before appending
322
348
  if not isinstance(expanded_configs[0].args, list):
323
- expanded_configs[0].args = [expanded_configs[0].args]
324
- expanded_configs[0].args.append(env.final_response) # for remote responses
325
- return [HudStyleConfig(function=function, args=expanded_configs)]
326
-
349
+ expanded_configs[0].args = [expanded_configs[0].args]
350
+ expanded_configs[0].args.append(env.final_response) # for remote responses
351
+ return [FunctionConfig(function=function, args=expanded_configs)]
352
+
327
353
  # Case 3: Check for task.config
328
354
  if hasattr(task, "config") and task.config:
329
355
  # Ensure task.config is a dictionary before adding id
330
356
  final_args = task.config.copy() if isinstance(task.config, dict) else {}
331
357
  if task.id:
332
- final_args["id"] = task.id # for remote IDs
358
+ final_args["id"] = task.id # for remote IDs
333
359
  if env and env.final_response:
334
360
  # Append response, ensuring args exists and is a list
335
361
  if "args" not in final_args:
@@ -337,18 +363,17 @@ def create_remote_config(
337
363
  if not isinstance(final_args["args"], list):
338
364
  final_args["args"] = [final_args["args"]]
339
365
  final_args["args"].append(env.final_response)
340
- return [HudStyleConfig(function=function, args=[final_args])]
341
-
366
+ return [FunctionConfig(function=function, args=[final_args])]
367
+
342
368
  # Case 4: Use task.id
343
369
  if task.id:
344
370
  args_list = [task.id]
345
371
  if env and env.final_response:
346
- args_list.append(env.final_response) # Append final response
347
- return [HudStyleConfig(function=f"{REMOTE_FUNCTION_PREFIX}{function}", args=args_list)]
348
-
372
+ args_list.append(env.final_response) # Append final response
373
+ return [FunctionConfig(function=f"{REMOTE_FUNCTION_PREFIX}{function}", args=args_list)]
374
+
349
375
  # Case 5: No valid configuration found
350
376
  args_list = []
351
377
  if env and env.final_response:
352
378
  args_list.append(env.final_response)
353
- return [HudStyleConfig(function=function, args=args_list)]
354
-
379
+ return [FunctionConfig(function=function, args=args_list)]
@@ -19,15 +19,16 @@ if TYPE_CHECKING:
19
19
 
20
20
  logger = logging.getLogger("hud.env.docker_env_client")
21
21
 
22
+
22
23
  class LocalDockerClient(DockerClient):
23
24
  """
24
25
  Docker-based environment client implementation.
25
26
  """
26
27
 
27
28
  @classmethod
28
- async def create(cls, dockerfile: str, ports: list[int] | None = None) -> tuple[
29
- LocalDockerClient, dict[str, Any]
30
- ]:
29
+ async def create(
30
+ cls, dockerfile: str, ports: list[int] | None = None
31
+ ) -> tuple[LocalDockerClient, dict[str, Any]]:
31
32
  """
32
33
  Creates a Docker environment client from a dockerfile.
33
34
 
@@ -86,9 +87,7 @@ class LocalDockerClient(DockerClient):
86
87
  "HostConfig": {
87
88
  "PublishAllPorts": True,
88
89
  },
89
- "ExposedPorts": {
90
- f"{port}/tcp": {} for port in ports
91
- },
90
+ "ExposedPorts": {f"{port}/tcp": {} for port in ports},
92
91
  }
93
92
 
94
93
  container = await docker_client.containers.create(config=container_config)
@@ -198,7 +197,6 @@ class LocalDockerClient(DockerClient):
198
197
  exit_code=0,
199
198
  )
200
199
 
201
-
202
200
  async def get_archive(self, path: str) -> bytes:
203
201
  """
204
202
  Get an archive of a path from the container.