hud-python 0.2.5__py3-none-any.whl → 0.2.7__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of hud-python might be problematic. Click here for more details.

@@ -11,6 +11,7 @@ from anthropic.types.beta import (
11
11
  BetaImageBlockParam,
12
12
  )
13
13
 
14
+ from hud.adapters.common.types import CLA, LogType
14
15
  from hud.agent import Agent
15
16
  from hud.adapters import Adapter
16
17
  from hud.settings import settings
@@ -128,7 +129,7 @@ def extract_json_from_response(response: str) -> str:
128
129
  return response.strip()
129
130
 
130
131
 
131
- class ClaudePlaysPokemon(Agent[AsyncAnthropic, None]):
132
+ class ClaudePlaysPokemon(Agent[AsyncAnthropic, CLA]):
132
133
  """AI agent that plays Pokémon games using Claude."""
133
134
 
134
135
  def __init__(
@@ -191,7 +192,7 @@ class ClaudePlaysPokemon(Agent[AsyncAnthropic, None]):
191
192
  observation: The current game observation
192
193
 
193
194
  Returns:
194
- tuple[list[dict[str, Any]], bool]: List of actions and whether the game is done
195
+ tuple[list[dict[str, Any]], bool, list[LogType] | None]: List of actions, whether the game is done, and a list of strings or dictionaries of logs.
195
196
 
196
197
  Raises:
197
198
  ValueError: If client is not initialized
hud/agent/langchain.py CHANGED
@@ -24,6 +24,7 @@ from hud.adapters.common.types import (
24
24
  WaitAction,
25
25
  ResponseAction,
26
26
  CustomAction,
27
+ LogType,
27
28
  # Exclude ScreenshotFetch, PositionFetch as they are internal
28
29
  )
29
30
 
@@ -74,6 +75,7 @@ class LangchainAgent(Agent[LangchainModelOrRunnable, Any], Generic[LangchainMode
74
75
  langchain_model: LangchainModelOrRunnable,
75
76
  adapter: Optional[Adapter] = None,
76
77
  system_prompt: str | None = None,
78
+ name: str | None = None,
77
79
  ):
78
80
  """
79
81
  Initialize the LangchainAgent.
@@ -88,7 +90,9 @@ class LangchainAgent(Agent[LangchainModelOrRunnable, Any], Generic[LangchainMode
88
90
  system_prompt: An optional system prompt to guide the Langchain model.
89
91
  If None, a default prompt encouraging single CLA output is used.
90
92
  """
91
- super().__init__(client=langchain_model, adapter=adapter) # Store model as 'client'
93
+ super().__init__(
94
+ client=langchain_model, adapter=adapter, name=name
95
+ ) # Store model as 'client'
92
96
  self.langchain_model = langchain_model # Also store with specific name
93
97
 
94
98
  self.system_prompt_str = system_prompt or self._get_default_system_prompt()
@@ -137,7 +141,7 @@ class LangchainAgent(Agent[LangchainModelOrRunnable, Any], Generic[LangchainMode
137
141
  if not human_content:
138
142
  logger.warning("LangchainAgent received an observation with no text or screenshot.")
139
143
  # Decide how to handle empty observation - perhaps return no action?
140
- return [], False # Or raise an error?
144
+ return [], False
141
145
 
142
146
  current_human_message = HumanMessage(content=human_content)
143
147
 
@@ -202,7 +206,9 @@ class LangchainAgent(Agent[LangchainModelOrRunnable, Any], Generic[LangchainMode
202
206
  # TODO: Consider history truncation/summarization if it grows too long
203
207
 
204
208
  if actual_action:
209
+ actual_action = actual_action.model_dump()
205
210
  # Return the single action dictionary within a list
211
+ actual_action["logs"] = ai_message_content_for_history
206
212
  return [actual_action], is_done
207
213
  else:
208
214
  # Should ideally not happen if structure validation worked, but as a fallback
hud/agent/operator.py CHANGED
@@ -19,6 +19,7 @@ from hud.adapters.operator import OperatorAdapter
19
19
  from hud.types import Gym
20
20
  from hud.utils.common import Observation
21
21
  from hud.settings import settings
22
+ from hud.adapters.common.types import LogType
22
23
 
23
24
  logger = logging.getLogger(__name__)
24
25
 
@@ -37,9 +38,10 @@ class OperatorAgent(Agent[AsyncOpenAI, dict[str, Any]]):
37
38
  self,
38
39
  client: AsyncOpenAI | None = None,
39
40
  model: str = "computer-use-preview",
40
- environment: Literal["windows", "mac", "linux", "browser"] = "linux",
41
+ environment: Literal["windows", "mac", "linux", "browser"] = "browser",
41
42
  adapter: Adapter | None = None,
42
43
  max_iterations: int = 8,
44
+ name: str | None = None,
43
45
  ):
44
46
  """
45
47
  Initialize the OperatorAgent.
@@ -50,6 +52,7 @@ class OperatorAgent(Agent[AsyncOpenAI, dict[str, Any]]):
50
52
  environment: The environment type (windows, mac, linux, browser)
51
53
  adapter: The adapter to use for preprocessing and postprocessing
52
54
  max_iterations: Maximum number of iterations for the agent
55
+ name: The name of the agent
53
56
  """
54
57
  # Initialize client if not provided
55
58
  if client is None:
@@ -65,7 +68,10 @@ class OperatorAgent(Agent[AsyncOpenAI, dict[str, Any]]):
65
68
 
66
69
  adapter = adapter or OperatorAdapter()
67
70
 
68
- super().__init__(client=client, adapter=adapter)
71
+ if name is None:
72
+ name = f"openai-{model}"
73
+
74
+ super().__init__(client=client, adapter=adapter, name=name)
69
75
 
70
76
  self.model = model
71
77
  self.environment = environment
@@ -86,6 +92,8 @@ class OperatorAgent(Agent[AsyncOpenAI, dict[str, Any]]):
86
92
  self.initial_prompt = None
87
93
  self.pending_safety_checks = []
88
94
 
95
+ self.task_run_id = None
96
+
89
97
  async def fetch_response(self, observation: Observation) -> tuple[list[dict[str, Any]], bool]:
90
98
  """
91
99
  Fetch a response from the model based on the observation.
@@ -94,8 +102,8 @@ class OperatorAgent(Agent[AsyncOpenAI, dict[str, Any]]):
94
102
  observation: The preprocessed observation
95
103
 
96
104
  Returns:
97
- tuple[list[dict[str, Any]], bool]: A tuple containing the list of raw actions and a
98
- boolean indicating if the agent believes the task is complete
105
+ tuple[list[dict[str, Any]], bool, list[LogType] | None]: A tuple containing the list of raw actions,
106
+ boolean indicating if the agent believes the task is complete.
99
107
  """
100
108
  if not self.client:
101
109
  raise ValueError("Client is required")
@@ -112,7 +120,7 @@ class OperatorAgent(Agent[AsyncOpenAI, dict[str, Any]]):
112
120
  )
113
121
 
114
122
  # Process the observation based on whether it's the first one or a response to an action
115
- if self.pending_call_id is None and self.last_response_id is None:
123
+ if self.pending_call_id is None: # and self.last_response_id is None:
116
124
  # This is the first observation, store and send the prompt
117
125
  self.initial_prompt = observation.text
118
126
 
@@ -133,13 +141,15 @@ class OperatorAgent(Agent[AsyncOpenAI, dict[str, Any]]):
133
141
  # Structure the input correctly for the API using cast
134
142
  input_param = cast(ResponseInputParam, [{"role": "user", "content": input_content}])
135
143
 
136
- # Call OpenAI API for the initial prompt (asynchronous call)
137
144
  response = await self.client.responses.create(
138
- model=self.model, tools=[computer_tool], input=input_param, truncation="auto"
145
+ model=self.model,
146
+ tools=[computer_tool],
147
+ input=input_param,
148
+ truncation="auto",
149
+ reasoning={"summary": "auto"},
139
150
  )
140
151
 
141
152
  else:
142
- # This is a response to a previous action
143
153
  if not observation.screenshot:
144
154
  logger.warning("No screenshot provided for response to action")
145
155
  return [], True
@@ -164,7 +174,6 @@ class OperatorAgent(Agent[AsyncOpenAI, dict[str, Any]]):
164
174
  )
165
175
  self.pending_safety_checks = []
166
176
 
167
- # Call OpenAI API for follow-up (asynchronous call)
168
177
  response = await self.client.responses.create(
169
178
  model=self.model,
170
179
  previous_response_id=self.last_response_id,
@@ -181,6 +190,8 @@ class OperatorAgent(Agent[AsyncOpenAI, dict[str, Any]]):
181
190
  done = True # Assume done unless a computer call is found
182
191
  final_text_response = ""
183
192
 
193
+ self.pending_call_id = None
194
+
184
195
  # Check for computer calls first
185
196
  computer_calls = [
186
197
  item
@@ -217,8 +228,22 @@ class OperatorAgent(Agent[AsyncOpenAI, dict[str, Any]]):
217
228
  # No ResponseAgent logic here anymore - just return the response
218
229
  actions = [{"type": "response", "text": final_text_response}]
219
230
  done = True
220
- # else:
221
- # logger.info("No computer calls and no final text message found.")
231
+ else:
232
+ logger.info("No computer calls and no final text message found.")
222
233
  # Keep done = True, actions remains empty
223
234
 
235
+ reasoning = ""
236
+ for item in response.output:
237
+ if item.type == "reasoning" and item.summary:
238
+ reasoning += f"Thinking: {item.summary[0].text}\n"
239
+ elif item.type == "message":
240
+ for content in item.content:
241
+ if isinstance(content, ResponseOutputText):
242
+ reasoning += f"{content.text}\n"
243
+
244
+ # add reasoning to the actions
245
+ for action in actions:
246
+ action["reasoning"] = reasoning
247
+ action["logs"] = response.model_dump() # type: ignore[assignment]
248
+
224
249
  return actions, done
@@ -22,9 +22,9 @@ class ConcreteAgent(Agent[Any, dict[str, Any]]):
22
22
  async def fetch_response(self, observation: Observation) -> tuple[list[dict[str, Any]], bool]:
23
23
  """Mock implementation that returns predefined responses."""
24
24
  if self.call_count < len(self.mock_responses):
25
- response = self.mock_responses[self.call_count]
25
+ actions, done = self.mock_responses[self.call_count]
26
26
  self.call_count += 1
27
- return response
27
+ return actions, done
28
28
  return [], True
29
29
 
30
30
 
hud/env/docker_client.py CHANGED
@@ -12,7 +12,7 @@ import toml
12
12
 
13
13
  from hud.env.client import Client
14
14
  from hud.types import EnvironmentStatus
15
- from hud.utils.common import directory_to_tar_bytes
15
+ from hud.utils.common import _compile_pathspec, directory_to_tar_bytes
16
16
 
17
17
  if TYPE_CHECKING:
18
18
  from hud.utils import ExecuteResult
@@ -151,15 +151,32 @@ class DockerClient(Client):
151
151
  if not self._source_path:
152
152
  return {}
153
153
 
154
- file_mtimes = {}
154
+ # Build ignore spec (currently we only care about .hudignore but reuse
155
+ # the common helper for consistency).
156
+ spec = _compile_pathspec(
157
+ self._source_path,
158
+ respect_gitignore=False,
159
+ respect_dockerignore=False,
160
+ respect_hudignore=True,
161
+ )
162
+
163
+ file_mtimes: dict[str, float] = {}
164
+
155
165
  for root, _, files in os.walk(self._source_path):
156
166
  for file in files:
157
167
  file_path = Path(root) / file
168
+ rel_path = file_path.relative_to(self._source_path).as_posix()
169
+
170
+ # Skip ignored files
171
+ if spec and spec.match_file(rel_path):
172
+ continue
173
+
158
174
  try:
159
175
  file_mtimes[str(file_path)] = file_path.stat().st_mtime
160
176
  except (FileNotFoundError, PermissionError):
161
177
  # Skip files that can't be accessed
162
178
  continue
179
+
163
180
  return file_mtimes
164
181
 
165
182
  async def needs_update(self) -> bool:
@@ -181,6 +198,11 @@ class DockerClient(Client):
181
198
  if not self._last_file_mtimes:
182
199
  return True
183
200
 
201
+ # Check for removed files
202
+ for file_path in self._last_file_mtimes:
203
+ if file_path not in current_mtimes:
204
+ return True
205
+
184
206
  # Check for new or modified files
185
207
  for file_path, mtime in current_mtimes.items():
186
208
  if file_path not in self._last_file_mtimes or mtime > self._last_file_mtimes[file_path]:
hud/env/environment.py CHANGED
@@ -8,14 +8,14 @@ from typing import TYPE_CHECKING, Any
8
8
  from pydantic import BaseModel
9
9
 
10
10
  from hud.env.client import Client
11
- from hud.env.remote_client import RemoteClient
11
+ from hud.env.remote_client import RemoteClient, SetupRequest
12
12
  from hud.task import Task
13
+ from hud.utils.agent import format_agent_prompt
13
14
  from hud.utils.common import FunctionConfig, FunctionConfigs, Observation
14
15
  from hud.utils.config import (
15
16
  LOCAL_EVALUATORS,
16
17
  REMOTE_EVALUATE,
17
18
  REMOTE_FUNCTION_PREFIX,
18
- REMOTE_SETUP,
19
19
  expand_config,
20
20
  )
21
21
  from hud.utils.telemetry import stream
@@ -41,9 +41,15 @@ class Environment(BaseModel):
41
41
  task: Task | None = None
42
42
  build_data: dict[str, Any]
43
43
 
44
+ # The task run id
45
+ task_run_id: str | None = None
46
+
44
47
  # final response
45
48
  final_response: str | None = None
46
49
 
50
+ # environment prompt information
51
+ environment_prompt: str | None = None
52
+
47
53
  async def _invoke_all(self, configs: FunctionConfigs) -> list[Any]:
48
54
  # Execute each config and collect results
49
55
  configs_all = [configs] if not isinstance(configs, list) else configs
@@ -69,24 +75,45 @@ class Environment(BaseModel):
69
75
  async def _setup(self, config: FunctionConfigs | None = None) -> None:
70
76
  """
71
77
  Setup the environment.
78
+ No-op if no config or task is provided.
72
79
 
73
80
  Args:
74
81
  config: The configuration to use for the setup
75
82
  """
76
83
  if isinstance(self.client, RemoteClient):
77
84
  await self.get_urls()
78
- await self._invoke_all(create_remote_config(self, config, REMOTE_SETUP))
85
+
86
+ setup_request = SetupRequest()
87
+
88
+ if self.task:
89
+ setup_request.task_id = self.task.id
90
+ setup_request.config = self.task.config
91
+ setup_request.metadata = _format_task_metadata(self.task)
92
+ if self.task.setup:
93
+ setup_request.setup = expand_config(self.task.setup)[0]
94
+ elif config:
95
+ setup_request.setup = expand_config(config)[0]
96
+ else:
97
+ raise ValueError("No task or config provided for remote environment")
98
+
99
+ result = await self.client.setup(setup_request)
100
+
101
+ if result and result.get("id"):
102
+ self.task_run_id = result.get("id")
103
+ logger.info("View the live trace at https://app.hud.so/trace/%s", self.task_run_id)
104
+ else:
105
+ logger.warning("No task run id found in the result")
79
106
  else:
80
107
  if config is not None:
81
108
  await self._invoke_all(config)
82
109
  elif self.task and self.task.setup is not None:
83
110
  await self._invoke_all(self.task.setup)
84
- else:
85
- raise ValueError(
86
- "No config, task or task setup function provided for local environment"
87
- )
88
111
 
89
- async def evaluate(self, config: FunctionConfigs | None = None) -> Any:
112
+ async def evaluate(
113
+ self,
114
+ config: FunctionConfigs | None = None,
115
+ metadata: dict[str, Any] | None = None,
116
+ ) -> Any:
90
117
  """
91
118
  Evaluate the environment.
92
119
 
@@ -97,7 +124,9 @@ class Environment(BaseModel):
97
124
  Any: Result of the evaluation
98
125
  """
99
126
  if isinstance(self.client, RemoteClient):
100
- results = await self._invoke_all(create_remote_config(self, config, REMOTE_EVALUATE))
127
+ results = await self._invoke_all(
128
+ create_remote_config(self, config, REMOTE_EVALUATE, metadata)
129
+ )
101
130
  else:
102
131
  if config is not None:
103
132
  results = await self._invoke_all(config)
@@ -110,27 +139,32 @@ class Environment(BaseModel):
110
139
  else:
111
140
  return results
112
141
 
113
- async def reset(
114
- self, configs: FunctionConfigs | None = None
115
- ) -> tuple[Observation, dict[str, Any]]:
142
+ async def reset(self) -> tuple[Observation, dict[str, Any]]:
116
143
  """
117
- Reset the environment.
144
+ Reset the environment and return the first observation with the agent prompt.
118
145
 
119
146
  Args:
120
- configs: The configuration to use for the reset
147
+ None
121
148
 
122
149
  Returns:
123
- Observation: The first observation from the environment
150
+ Observation: The first observation from the environment with the agent prompt
124
151
  info: Dictionary of information about the environment
125
152
  """
126
153
  # await self._setup(configs)
127
154
  obs, _, _, info = await self.step()
128
- if self.task and self.task.prompt:
129
- obs.text = self.task.prompt
155
+
156
+ if self.build_data.get("environment_prompt"):
157
+ self.environment_prompt = self.build_data["environment_prompt"]
158
+
159
+ # Format the agent prompt with the environment prompt and the task prompt
160
+ obs.text = format_agent_prompt(self.environment_prompt, self.task)
161
+
130
162
  return obs, info
131
163
 
132
164
  async def step(
133
- self, actions: CLA | list[CLA] | None = None
165
+ self,
166
+ actions: CLA | list[CLA] | None = None,
167
+ verbose: bool = False,
134
168
  ) -> tuple[Observation, float, bool, dict[str, Any]]:
135
169
  """Execute a step in the environment.
136
170
 
@@ -152,10 +186,11 @@ class Environment(BaseModel):
152
186
  result, stdout, stderr = await self.client.invoke(
153
187
  FunctionConfig(function="step", args=args)
154
188
  )
155
- if stdout:
156
- logger.info("Step produced stdout: %s", stdout.decode())
157
- if stderr:
158
- logger.warning("Step produced stderr: %s", stderr.decode())
189
+ if verbose:
190
+ if stdout:
191
+ logger.info("Step produced stdout: %s", stdout.decode())
192
+ if stderr:
193
+ logger.warning("Step produced stderr: %s", stderr.decode())
159
194
 
160
195
  observation = Observation.model_validate(result["observation"], strict=True)
161
196
 
@@ -199,12 +234,12 @@ class Environment(BaseModel):
199
234
  await self.client.close()
200
235
 
201
236
  async def stream(self) -> str | None:
202
- urls = await self.get_urls()
203
- if urls["live_url"] is None:
237
+ if not self.live_url:
238
+ await self.get_urls()
239
+ if self.live_url is None:
204
240
  logger.warning("No live URL found")
205
241
  return None
206
- # Stream the live view
207
- return stream(urls["live_url"])
242
+ return stream(self.live_url)
208
243
 
209
244
  async def run(self, agent: Agent, max_steps: int = 27, verbose: bool = True) -> Any:
210
245
  """Run an agent in the environment.
@@ -218,7 +253,11 @@ class Environment(BaseModel):
218
253
  for i in range(max_steps):
219
254
  action, done = await agent.predict(obs, verbose=verbose)
220
255
  if verbose:
221
- logger.info("Step %d: Action: %s", i, action)
256
+ logger.info(
257
+ "Step %d: Action: %s",
258
+ i,
259
+ [str(a) for a in action] if len(action) > 1 else str(action[0]),
260
+ )
222
261
  obs, reward, terminated, info = await self.step(action)
223
262
  if verbose:
224
263
  logger.info("Step %d: Observation: %s", i, obs)
@@ -230,10 +269,21 @@ class Environment(BaseModel):
230
269
  return result
231
270
 
232
271
 
272
+ def _format_task_metadata(task: Task) -> dict[str, Any]:
273
+ metadata = {}
274
+ if task.metadata:
275
+ for key, value in task.metadata.items():
276
+ metadata[str(key)] = value
277
+ if task.sensitive_data:
278
+ metadata["sensitive_data"] = task.sensitive_data
279
+ return metadata
280
+
281
+
233
282
  def create_remote_config(
234
283
  env: Environment | None = None,
235
284
  config: FunctionConfigs | None = None,
236
285
  function: str | None = None,
286
+ metadata: dict[str, Any] | None = None,
237
287
  ) -> list[FunctionConfig]:
238
288
  """
239
289
  Create a remote configuration for setup or evaluate, determining the final
@@ -317,6 +367,8 @@ def create_remote_config(
317
367
  `[FunctionConfig(function='evaluate', args=[])]`
318
368
  """
319
369
  # If no function provided, just expand the config and return it directly
370
+ if metadata is None:
371
+ metadata = {}
320
372
  if function is None:
321
373
  if config:
322
374
  return expand_config(config)
@@ -330,7 +382,7 @@ def create_remote_config(
330
382
  if not isinstance(expanded_configs[0].args, list):
331
383
  expanded_configs[0].args = [expanded_configs[0].args]
332
384
  expanded_configs[0].args.append(env.final_response) # for remote responses
333
- return [FunctionConfig(function=function, args=expanded_configs)]
385
+ return [FunctionConfig(function=function, args=expanded_configs, metadata=metadata)]
334
386
 
335
387
  # Otherwise, use the environment's task
336
388
  task = env.task if env else None
@@ -339,6 +391,8 @@ def create_remote_config(
339
391
  if task is None:
340
392
  raise ValueError("Either task or config must be provided")
341
393
 
394
+ metadata = _format_task_metadata(task)
395
+
342
396
  # Case 2: Task has the specified function attribute
343
397
  task_config = getattr(task, function, None)
344
398
  if task_config:
@@ -350,11 +404,7 @@ def create_remote_config(
350
404
  if not isinstance(expanded_configs[0].args, list):
351
405
  expanded_configs[0].args = [expanded_configs[0].args]
352
406
  expanded_configs[0].args.append(env.final_response) # for remote responses
353
- return [
354
- FunctionConfig(
355
- function=function, args=expanded_configs, metadata={"task": task.model_dump()}
356
- )
357
- ]
407
+ return [FunctionConfig(function=function, args=expanded_configs, metadata=metadata)]
358
408
 
359
409
  # Case 3: Check for task.config
360
410
  if hasattr(task, "config") and task.config:
@@ -369,11 +419,7 @@ def create_remote_config(
369
419
  if not isinstance(final_args["args"], list):
370
420
  final_args["args"] = [final_args["args"]]
371
421
  final_args["args"].append(env.final_response)
372
- return [
373
- FunctionConfig(
374
- function=function, args=[final_args], metadata={"task": task.model_dump()}
375
- )
376
- ]
422
+ return [FunctionConfig(function=function, args=[final_args], metadata=metadata)]
377
423
 
378
424
  # Case 4: Use task.id
379
425
  if task.id:
@@ -384,7 +430,7 @@ def create_remote_config(
384
430
  FunctionConfig(
385
431
  function=f"{REMOTE_FUNCTION_PREFIX}{function}",
386
432
  args=args_list,
387
- metadata={"task": task.model_dump()},
433
+ metadata=metadata,
388
434
  )
389
435
  ]
390
436
 
@@ -392,4 +438,4 @@ def create_remote_config(
392
438
  args_list = []
393
439
  if env and env.final_response:
394
440
  args_list.append(env.final_response)
395
- return [FunctionConfig(function=function, args=args_list, metadata={"task": task.model_dump()})]
441
+ return [FunctionConfig(function=function, args=args_list, metadata=metadata)]
@@ -1,6 +1,7 @@
1
1
  from __future__ import annotations
2
2
 
3
3
  import asyncio
4
+ import contextlib
4
5
  import io
5
6
  import logging
6
7
  import textwrap
@@ -34,6 +35,7 @@ class LocalDockerClient(DockerClient):
34
35
  """
35
36
  Build an image from a build context.
36
37
  """
38
+ logger.info("Building image from %s", build_context)
37
39
  # Create a unique image tag
38
40
  image_tag = f"hud-env-{uuid.uuid4().hex[:8]}"
39
41
 
@@ -67,6 +69,7 @@ class LocalDockerClient(DockerClient):
67
69
  async def create(
68
70
  cls,
69
71
  image: str,
72
+ host_config: dict[str, Any] | None = None,
70
73
  ) -> LocalDockerClient:
71
74
  """
72
75
  Creates a Docker environment client from a image.
@@ -81,20 +84,42 @@ class LocalDockerClient(DockerClient):
81
84
  # Initialize Docker client
82
85
  docker_client = aiodocker.Docker()
83
86
 
87
+ # Default host config
88
+ if host_config is None:
89
+ host_config = {
90
+ "PublishAllPorts": True,
91
+ }
92
+
84
93
  # Create and start the container
85
94
  container_config = {
86
95
  "Image": image,
87
96
  "Tty": True,
88
97
  "OpenStdin": True,
89
98
  "Cmd": None,
90
- "HostConfig": {
91
- "PublishAllPorts": True,
92
- },
99
+ "HostConfig": host_config,
93
100
  }
94
101
 
95
102
  container = await docker_client.containers.create(config=container_config)
96
103
  await container.start()
97
104
 
105
+ # --------------------------------------------------
106
+ # Stream container logs while we wait for readiness
107
+ # --------------------------------------------------
108
+ async def _stream_logs() -> None:
109
+ try:
110
+ # .log() with follow=True -> async iterator of bytes/str
111
+ async for raw in container.log(stdout=True, stderr=True, follow=True):
112
+ if isinstance(raw, bytes):
113
+ raw = raw.decode(errors="replace")
114
+ logger.info("container %s | %s", container.id[:12], raw.rstrip())
115
+ except asyncio.CancelledError:
116
+ # task cancelled during cleanup - silently exit
117
+ return
118
+ except Exception:
119
+ logger.exception("error while streaming logs from %s", container.id[:12])
120
+
121
+ log_task: asyncio.Task | None = asyncio.create_task(_stream_logs())
122
+
98
123
  inspection = await container.show()
99
124
  if health_check_config := inspection["Config"].get("Healthcheck"):
100
125
  # Using the interval as spinup deadline is a bit implicit - could
@@ -115,9 +140,21 @@ class LocalDockerClient(DockerClient):
115
140
  raise TimeoutError(f"{container.id} not healthy after {window_secs}s")
116
141
  await asyncio.sleep(1)
117
142
  logger.debug("Container %s is healthy", container.id)
143
+ else:
144
+ logger.debug("Container %s has no healthcheck, assuming ready", container.id)
145
+
146
+ # Stop the log stream now that the container is ready
147
+ if log_task is not None:
148
+ log_task.cancel()
149
+ with contextlib.suppress(Exception):
150
+ await log_task
151
+ log_task = None
118
152
 
119
153
  # Return the controller instance
120
- return cls(docker_client, container.id)
154
+ client = cls(docker_client, container.id)
155
+ # store the task so close() can cancel if it is still running
156
+ client._log_task = log_task # type: ignore[attr-defined]
157
+ return client
121
158
 
122
159
  def __init__(self, docker_conn: aiodocker.Docker, container_id: str) -> None:
123
160
  """
@@ -135,6 +172,9 @@ class LocalDockerClient(DockerClient):
135
172
  # Docker client will be initialized when needed
136
173
  self._docker = docker_conn
137
174
 
175
+ # Background task for streaming logs (may be None)
176
+ self._log_task: asyncio.Task | None = None
177
+
138
178
  @property
139
179
  def container_id(self) -> str:
140
180
  """Get the container ID."""
@@ -288,3 +328,9 @@ class LocalDockerClient(DockerClient):
288
328
  logger.warning("Error during Docker container cleanup: %s", e)
289
329
  finally:
290
330
  await self._docker.close()
331
+
332
+ # Cancel background log forwarding first (if still active)
333
+ if self._log_task is not None:
334
+ self._log_task.cancel()
335
+ with contextlib.suppress(Exception):
336
+ await self._log_task