eval-protocol 0.0.3__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- development/__init__.py +1 -0
- development/normalize_sandbox_fusion.py +628 -0
- development/utils/__init__.py +1 -0
- development/utils/generate_api_key.py +31 -0
- development/utils/subprocess_manager.py +481 -0
- eval_protocol/__init__.py +86 -0
- eval_protocol/__main__.py +10 -0
- eval_protocol/_version.py +21 -0
- eval_protocol/adapters/__init__.py +1 -0
- eval_protocol/adapters/braintrust.py +8 -0
- eval_protocol/adapters/trl.py +8 -0
- eval_protocol/agent/__init__.py +29 -0
- eval_protocol/agent/models.py +69 -0
- eval_protocol/agent/orchestrator.py +893 -0
- eval_protocol/agent/resource_abc.py +89 -0
- eval_protocol/agent/resource_pool.py +184 -0
- eval_protocol/agent/resources/__init__.py +44 -0
- eval_protocol/agent/resources/bfcl_envs/__init__.py +1 -0
- eval_protocol/agent/resources/bfcl_envs/gorilla_file_system.py +342 -0
- eval_protocol/agent/resources/bfcl_envs/math_api.py +40 -0
- eval_protocol/agent/resources/bfcl_envs/posting_api.py +157 -0
- eval_protocol/agent/resources/bfcl_sim_api_resource.py +314 -0
- eval_protocol/agent/resources/docker_resource.py +479 -0
- eval_protocol/agent/resources/filesystem_resource.py +371 -0
- eval_protocol/agent/resources/http_rollout_protocol.py +85 -0
- eval_protocol/agent/resources/http_rollout_resource.py +325 -0
- eval_protocol/agent/resources/python_state_resource.py +170 -0
- eval_protocol/agent/resources/sql_resource.py +271 -0
- eval_protocol/agent/task_manager.py +1064 -0
- eval_protocol/agent/tool_registry.py +111 -0
- eval_protocol/auth.py +156 -0
- eval_protocol/cli.py +425 -0
- eval_protocol/cli_commands/__init__.py +1 -0
- eval_protocol/cli_commands/agent_eval_cmd.py +264 -0
- eval_protocol/cli_commands/common.py +242 -0
- eval_protocol/cli_commands/deploy.py +486 -0
- eval_protocol/cli_commands/deploy_mcp.py +287 -0
- eval_protocol/cli_commands/preview.py +186 -0
- eval_protocol/cli_commands/run_eval_cmd.py +202 -0
- eval_protocol/common_utils.py +36 -0
- eval_protocol/config.py +180 -0
- eval_protocol/datasets/__init__.py +1 -0
- eval_protocol/datasets/loader.py +521 -0
- eval_protocol/evaluation.py +1045 -0
- eval_protocol/execution/__init__.py +1 -0
- eval_protocol/execution/pipeline.py +920 -0
- eval_protocol/gcp_tools.py +484 -0
- eval_protocol/generation/cache.py +141 -0
- eval_protocol/generation/clients/base.py +67 -0
- eval_protocol/generation/clients.py +248 -0
- eval_protocol/generic_server.py +165 -0
- eval_protocol/integrations/__init__.py +12 -0
- eval_protocol/integrations/braintrust.py +51 -0
- eval_protocol/integrations/deepeval.py +106 -0
- eval_protocol/integrations/openeval.py +40 -0
- eval_protocol/integrations/trl.py +187 -0
- eval_protocol/mcp/__init__.py +48 -0
- eval_protocol/mcp/adapter.py +131 -0
- eval_protocol/mcp/client/__init__.py +12 -0
- eval_protocol/mcp/client/connection.py +499 -0
- eval_protocol/mcp/clients.py +195 -0
- eval_protocol/mcp/execution/__init__.py +23 -0
- eval_protocol/mcp/execution/base_policy.py +227 -0
- eval_protocol/mcp/execution/fireworks_policy.py +209 -0
- eval_protocol/mcp/execution/manager.py +506 -0
- eval_protocol/mcp/execution/policy.py +421 -0
- eval_protocol/mcp/grid_renderer.py +54 -0
- eval_protocol/mcp/mcpgym.py +637 -0
- eval_protocol/mcp/process_manager.py +177 -0
- eval_protocol/mcp/session/__init__.py +11 -0
- eval_protocol/mcp/session/manager.py +228 -0
- eval_protocol/mcp/simple_process_manager.py +291 -0
- eval_protocol/mcp/simulation_server.py +458 -0
- eval_protocol/mcp/types.py +80 -0
- eval_protocol/mcp_agent/__init__.py +1 -0
- eval_protocol/mcp_agent/config.py +147 -0
- eval_protocol/mcp_agent/intermediary_server.py +542 -0
- eval_protocol/mcp_agent/main.py +210 -0
- eval_protocol/mcp_agent/orchestration/__init__.py +1 -0
- eval_protocol/mcp_agent/orchestration/base_client.py +132 -0
- eval_protocol/mcp_agent/orchestration/local_docker_client.py +702 -0
- eval_protocol/mcp_agent/orchestration/remote_http_client.py +304 -0
- eval_protocol/mcp_agent/orchestration/stdio_mcp_client_helper.py +3 -0
- eval_protocol/mcp_agent/session.py +79 -0
- eval_protocol/mcp_env.py +304 -0
- eval_protocol/models.py +366 -0
- eval_protocol/packaging.py +219 -0
- eval_protocol/platform_api.py +360 -0
- eval_protocol/playback_policy.py +396 -0
- eval_protocol/resources.py +128 -0
- eval_protocol/reward_function.py +410 -0
- eval_protocol/rewards/__init__.py +94 -0
- eval_protocol/rewards/accuracy.py +454 -0
- eval_protocol/rewards/accuracy_length.py +173 -0
- eval_protocol/rewards/apps_coding_reward.py +331 -0
- eval_protocol/rewards/apps_execution_utils.py +149 -0
- eval_protocol/rewards/apps_testing_util.py +559 -0
- eval_protocol/rewards/bfcl_reward.py +313 -0
- eval_protocol/rewards/code_execution.py +1620 -0
- eval_protocol/rewards/code_execution_utils.py +72 -0
- eval_protocol/rewards/cpp_code.py +861 -0
- eval_protocol/rewards/deepcoder_reward.py +161 -0
- eval_protocol/rewards/format.py +129 -0
- eval_protocol/rewards/function_calling.py +541 -0
- eval_protocol/rewards/json_schema.py +422 -0
- eval_protocol/rewards/language_consistency.py +700 -0
- eval_protocol/rewards/lean_prover.py +479 -0
- eval_protocol/rewards/length.py +375 -0
- eval_protocol/rewards/list_comparison_math_reward.py +221 -0
- eval_protocol/rewards/math.py +762 -0
- eval_protocol/rewards/multiple_choice_math_reward.py +232 -0
- eval_protocol/rewards/reasoning_steps.py +249 -0
- eval_protocol/rewards/repetition.py +342 -0
- eval_protocol/rewards/tag_count.py +162 -0
- eval_protocol/rl_processing.py +82 -0
- eval_protocol/server.py +271 -0
- eval_protocol/typed_interface.py +260 -0
- eval_protocol/utils/__init__.py +8 -0
- eval_protocol/utils/batch_evaluation.py +217 -0
- eval_protocol/utils/batch_transformation.py +205 -0
- eval_protocol/utils/dataset_helpers.py +112 -0
- eval_protocol/utils/module_loader.py +56 -0
- eval_protocol/utils/packaging_utils.py +108 -0
- eval_protocol/utils/static_policy.py +305 -0
- eval_protocol-0.0.3.dist-info/METADATA +635 -0
- eval_protocol-0.0.3.dist-info/RECORD +130 -0
- eval_protocol-0.0.3.dist-info/WHEEL +5 -0
- eval_protocol-0.0.3.dist-info/entry_points.txt +4 -0
- eval_protocol-0.0.3.dist-info/licenses/LICENSE +201 -0
- eval_protocol-0.0.3.dist-info/top_level.txt +2 -0
|
@@ -0,0 +1,479 @@
|
|
|
1
|
+
"""
|
|
2
|
+
DockerResource: A ForkableResource for managing Docker container states.
|
|
3
|
+
"""
|
|
4
|
+
|
|
5
|
+
import io
|
|
6
|
+
import uuid
|
|
7
|
+
from pathlib import Path
|
|
8
|
+
from typing import TYPE_CHECKING, Any, Dict, List, Optional, Tuple
|
|
9
|
+
|
|
10
|
+
from ..resource_abc import ForkableResource
|
|
11
|
+
|
|
12
|
+
# Attempt to import Docker SDK with error handling
|
|
13
|
+
try:
|
|
14
|
+
import docker
|
|
15
|
+
|
|
16
|
+
if TYPE_CHECKING:
|
|
17
|
+
from docker.errors import APIError, DockerException, NotFound
|
|
18
|
+
from docker.models.containers import Container
|
|
19
|
+
else:
|
|
20
|
+
from docker.errors import APIError, DockerException, NotFound
|
|
21
|
+
from docker.models.containers import Container
|
|
22
|
+
|
|
23
|
+
DOCKER_SDK_AVAILABLE = True
|
|
24
|
+
# Ensure these are available for type checking even if the runtime import fails
|
|
25
|
+
# The `else` block for DOCKER_SDK_AVAILABLE = False will define runtime dummies.
|
|
26
|
+
DockerException = DockerException
|
|
27
|
+
NotFound = NotFound
|
|
28
|
+
APIError = APIError
|
|
29
|
+
Container = Container
|
|
30
|
+
try:
|
|
31
|
+
_daemon_check_client = docker.from_env()
|
|
32
|
+
_daemon_check_client.ping()
|
|
33
|
+
DOCKER_DAEMON_AVAILABLE = True
|
|
34
|
+
except Exception:
|
|
35
|
+
DOCKER_DAEMON_AVAILABLE = False
|
|
36
|
+
finally:
|
|
37
|
+
try:
|
|
38
|
+
_daemon_check_client.close()
|
|
39
|
+
except Exception:
|
|
40
|
+
pass
|
|
41
|
+
|
|
42
|
+
except ImportError:
|
|
43
|
+
DOCKER_SDK_AVAILABLE = False
|
|
44
|
+
DOCKER_DAEMON_AVAILABLE = False
|
|
45
|
+
|
|
46
|
+
# Define dummy classes/exceptions if docker SDK is not available
|
|
47
|
+
# These are only defined if the import fails.
|
|
48
|
+
class DockerException(Exception): # type: ignore[no-redef]
|
|
49
|
+
pass
|
|
50
|
+
|
|
51
|
+
class NotFound(DockerException): # type: ignore[no-redef]
|
|
52
|
+
pass
|
|
53
|
+
|
|
54
|
+
class APIError(DockerException): # type: ignore[no-redef]
|
|
55
|
+
pass
|
|
56
|
+
|
|
57
|
+
class Container: # type: ignore[no-redef]
|
|
58
|
+
id: str = ""
|
|
59
|
+
name: str = ""
|
|
60
|
+
image: Any = None
|
|
61
|
+
status: str = ""
|
|
62
|
+
ports: Dict[str, Any] = {}
|
|
63
|
+
|
|
64
|
+
def remove(self, force: bool = False, v: bool = False) -> None:
|
|
65
|
+
pass
|
|
66
|
+
|
|
67
|
+
def commit(self, **kwargs: Any) -> Any:
|
|
68
|
+
return None
|
|
69
|
+
|
|
70
|
+
def reload(self) -> None:
|
|
71
|
+
pass
|
|
72
|
+
|
|
73
|
+
def start(self) -> None:
|
|
74
|
+
pass
|
|
75
|
+
|
|
76
|
+
def exec_run(self, **kwargs: Any) -> Tuple[int, bytes]:
|
|
77
|
+
return (0, b"")
|
|
78
|
+
|
|
79
|
+
def logs(self, **kwargs: Any) -> bytes:
|
|
80
|
+
return b""
|
|
81
|
+
|
|
82
|
+
|
|
83
|
+
class DockerResource(ForkableResource):
|
|
84
|
+
"""
|
|
85
|
+
A ForkableResource for managing Docker container states.
|
|
86
|
+
|
|
87
|
+
Allows initializing a container from an image, forking (by committing the
|
|
88
|
+
current container and starting a new one from the committed image),
|
|
89
|
+
checkpointing (committing to an image), and restoring from a checkpoint image.
|
|
90
|
+
Commands can be executed within the container.
|
|
91
|
+
|
|
92
|
+
Requires the Docker SDK (`docker` pip package) to be installed and Docker daemon running.
|
|
93
|
+
"""
|
|
94
|
+
|
|
95
|
+
def __init__(self) -> None:
|
|
96
|
+
if not DOCKER_SDK_AVAILABLE:
|
|
97
|
+
raise ImportError("Docker SDK not found. Please install 'docker' package to use DockerResource.")
|
|
98
|
+
if not DOCKER_DAEMON_AVAILABLE:
|
|
99
|
+
raise RuntimeError("Docker daemon not running or not accessible")
|
|
100
|
+
self._client = docker.from_env()
|
|
101
|
+
self._config: Dict[str, Any] = {}
|
|
102
|
+
self._container: Optional[Container] = None
|
|
103
|
+
self._image_id_for_fork_or_checkpoint: Optional[str] = (
|
|
104
|
+
None # Stores the ID of the image used for the current container
|
|
105
|
+
)
|
|
106
|
+
self._is_closed = False # To prevent operations on closed resource
|
|
107
|
+
|
|
108
|
+
def _generate_name(self, prefix: str) -> str:
|
|
109
|
+
return f"rk_{prefix}_{uuid.uuid4().hex}"
|
|
110
|
+
|
|
111
|
+
def _cleanup_container(self, container: Optional[Container]) -> None:
|
|
112
|
+
if container:
|
|
113
|
+
try:
|
|
114
|
+
container.remove(force=True, v=True) # v=True to remove volumes
|
|
115
|
+
except NotFound:
|
|
116
|
+
pass # Already removed
|
|
117
|
+
except APIError as e:
|
|
118
|
+
print(f"DockerResource: Error removing container {(container.id or '')[:12]}: {e}")
|
|
119
|
+
|
|
120
|
+
def _cleanup_image(self, image_id: Optional[str]) -> None:
|
|
121
|
+
if image_id:
|
|
122
|
+
try:
|
|
123
|
+
self._client.images.remove(image=image_id, force=True)
|
|
124
|
+
except NotFound:
|
|
125
|
+
pass # Already removed
|
|
126
|
+
except APIError as e:
|
|
127
|
+
# Often "image is being used by stopped container" if cleanup order is tricky
|
|
128
|
+
print(f"DockerResource: Error removing image {image_id[:12]}: {e}")
|
|
129
|
+
|
|
130
|
+
async def setup(self, config: Dict[str, Any]) -> None:
|
|
131
|
+
"""
|
|
132
|
+
Initializes and starts a Docker container based on the provided configuration.
|
|
133
|
+
|
|
134
|
+
Args:
|
|
135
|
+
config: Configuration dictionary. Expected keys:
|
|
136
|
+
- 'image_name' (str): Name of the Docker image to use (e.g., 'ubuntu:latest').
|
|
137
|
+
- 'container_name' (Optional[str]): Name for the container. Defaults to a UUID.
|
|
138
|
+
- 'docker_run_options' (Optional[Dict[str, Any]]): Options for docker.client.containers.run()
|
|
139
|
+
e.g., {'detach': True, 'ports': {'80/tcp': 8080}, 'environment': ["VAR=value"]}
|
|
140
|
+
'detach' will always be True.
|
|
141
|
+
"""
|
|
142
|
+
if self._is_closed:
|
|
143
|
+
raise RuntimeError("Cannot setup a closed DockerResource.")
|
|
144
|
+
self._config = config.copy()
|
|
145
|
+
|
|
146
|
+
image_name = self._config.get("image_name")
|
|
147
|
+
if not image_name:
|
|
148
|
+
raise ValueError("Missing 'image_name' in DockerResource config.")
|
|
149
|
+
|
|
150
|
+
# Pull the image if not present locally (optional, could be pre-pulled)
|
|
151
|
+
try:
|
|
152
|
+
self._client.images.get(image_name)
|
|
153
|
+
except NotFound:
|
|
154
|
+
print(f"DockerResource: Image '{image_name}' not found locally. Pulling...")
|
|
155
|
+
try:
|
|
156
|
+
self._client.images.pull(image_name)
|
|
157
|
+
except APIError as e:
|
|
158
|
+
raise DockerException(f"Failed to pull image '{image_name}': {e}") from e
|
|
159
|
+
|
|
160
|
+
self._image_id_for_fork_or_checkpoint = image_name # Base image for the first container
|
|
161
|
+
|
|
162
|
+
container_name = self._config.get("container_name", self._generate_name("container"))
|
|
163
|
+
run_options = self._config.get("docker_run_options", {}).copy()
|
|
164
|
+
run_options["detach"] = True # Must be detached for this model
|
|
165
|
+
run_options["name"] = container_name
|
|
166
|
+
|
|
167
|
+
# Clean up any existing container with the same name (e.g. from a failed previous run)
|
|
168
|
+
try:
|
|
169
|
+
existing_container = self._client.containers.get(container_name)
|
|
170
|
+
self._cleanup_container(existing_container)
|
|
171
|
+
except NotFound:
|
|
172
|
+
pass
|
|
173
|
+
|
|
174
|
+
try:
|
|
175
|
+
self._container = self._client.containers.run(image_name, **run_options)
|
|
176
|
+
if self._container:
|
|
177
|
+
self._container.reload() # Ensure state is up-to-date
|
|
178
|
+
except APIError as e:
|
|
179
|
+
raise DockerException(
|
|
180
|
+
f"Failed to start container '{container_name}' from image '{image_name}': {e}"
|
|
181
|
+
) from e
|
|
182
|
+
|
|
183
|
+
async def fork(self) -> "DockerResource":
|
|
184
|
+
"""
|
|
185
|
+
Creates a new DockerResource by committing the current container's state
|
|
186
|
+
to a new image and starting a new container from that image.
|
|
187
|
+
"""
|
|
188
|
+
if self._is_closed or not self._container:
|
|
189
|
+
raise RuntimeError("Cannot fork: resource is closed or not set up.")
|
|
190
|
+
|
|
191
|
+
# 1. Commit current container to a new image
|
|
192
|
+
fork_image_tag = self._generate_name("fork_img")
|
|
193
|
+
try:
|
|
194
|
+
committed_image = self._container.commit(repository=fork_image_tag)
|
|
195
|
+
except APIError as e:
|
|
196
|
+
raise DockerException(f"Failed to commit container {(self._container.id or '')[:12]} for fork: {e}") from e
|
|
197
|
+
|
|
198
|
+
# 2. Create new DockerResource instance
|
|
199
|
+
forked_resource = DockerResource()
|
|
200
|
+
forked_resource._config = self._config.copy() # Inherit original config
|
|
201
|
+
|
|
202
|
+
# Modify config for the new container if needed (e.g., new name)
|
|
203
|
+
forked_container_name = self._generate_name("fork_container")
|
|
204
|
+
forked_resource._config["container_name"] = forked_container_name
|
|
205
|
+
|
|
206
|
+
# The new container will run from the committed image
|
|
207
|
+
forked_resource._image_id_for_fork_or_checkpoint = committed_image.id
|
|
208
|
+
|
|
209
|
+
run_options = self._config.get("docker_run_options", {}).copy()
|
|
210
|
+
run_options["detach"] = True
|
|
211
|
+
run_options["name"] = forked_container_name
|
|
212
|
+
|
|
213
|
+
try:
|
|
214
|
+
forked_resource._container = self._client.containers.run(committed_image.id, **run_options)
|
|
215
|
+
if forked_resource._container:
|
|
216
|
+
forked_resource._container.reload()
|
|
217
|
+
except APIError as e:
|
|
218
|
+
self._cleanup_image(committed_image.id) # Cleanup committed image if run fails
|
|
219
|
+
raise DockerException(f"Failed to start forked container from image {committed_image.id[:12]}: {e}") from e
|
|
220
|
+
|
|
221
|
+
return forked_resource
|
|
222
|
+
|
|
223
|
+
async def checkpoint(self) -> Dict[str, Any]:
|
|
224
|
+
"""
|
|
225
|
+
Checkpoints the container by committing its current state to a new image.
|
|
226
|
+
Returns the ID of the committed image.
|
|
227
|
+
"""
|
|
228
|
+
if self._is_closed or not self._container:
|
|
229
|
+
raise RuntimeError("Cannot checkpoint: resource is closed or not set up.")
|
|
230
|
+
|
|
231
|
+
checkpoint_image_tag = self._generate_name("checkpoint_img")
|
|
232
|
+
try:
|
|
233
|
+
committed_image = self._container.commit(repository=checkpoint_image_tag)
|
|
234
|
+
return {"type": "docker_image_id", "image_id": committed_image.id}
|
|
235
|
+
except APIError as e:
|
|
236
|
+
raise DockerException(
|
|
237
|
+
f"Failed to commit container {(self._container.id or '')[:12]} for checkpoint: {e}"
|
|
238
|
+
) from e
|
|
239
|
+
|
|
240
|
+
async def restore(self, state_data: Dict[str, Any]) -> None:
|
|
241
|
+
"""
|
|
242
|
+
Restores the resource by starting a new container from a checkpointed image ID.
|
|
243
|
+
The existing container (if any) is stopped and removed.
|
|
244
|
+
"""
|
|
245
|
+
if self._is_closed:
|
|
246
|
+
raise RuntimeError("Cannot restore a closed DockerResource.")
|
|
247
|
+
|
|
248
|
+
image_id = state_data.get("image_id")
|
|
249
|
+
if state_data.get("type") != "docker_image_id" or not image_id:
|
|
250
|
+
raise ValueError(
|
|
251
|
+
"Invalid state_data for DockerResource restore. Expected {'type': 'docker_image_id', 'image_id': '...'}"
|
|
252
|
+
)
|
|
253
|
+
|
|
254
|
+
# Ensure the checkpointed image exists
|
|
255
|
+
try:
|
|
256
|
+
self._client.images.get(image_id)
|
|
257
|
+
except NotFound:
|
|
258
|
+
raise DockerException(f"Checkpoint image ID '{image_id}' not found.") from None
|
|
259
|
+
|
|
260
|
+
# Cleanup existing container before restoring
|
|
261
|
+
if self._container:
|
|
262
|
+
self._cleanup_container(self._container)
|
|
263
|
+
self._container = None
|
|
264
|
+
|
|
265
|
+
# Update current image ID to the one we are restoring from
|
|
266
|
+
self._image_id_for_fork_or_checkpoint = image_id
|
|
267
|
+
|
|
268
|
+
restored_container_name = self._config.get("container_name", self._generate_name("restored_container"))
|
|
269
|
+
# If a container_name was in original config, we might want to reuse it or ensure uniqueness
|
|
270
|
+
self._config["container_name"] = restored_container_name # Update config for consistency
|
|
271
|
+
|
|
272
|
+
run_options = self._config.get("docker_run_options", {}).copy()
|
|
273
|
+
run_options["detach"] = True
|
|
274
|
+
run_options["name"] = restored_container_name
|
|
275
|
+
|
|
276
|
+
try:
|
|
277
|
+
self._container = self._client.containers.run(image_id, **run_options)
|
|
278
|
+
if self._container:
|
|
279
|
+
self._container.reload()
|
|
280
|
+
except APIError as e:
|
|
281
|
+
raise DockerException(f"Failed to start container from checkpoint image {image_id[:12]}: {e}") from e
|
|
282
|
+
|
|
283
|
+
async def step(self, action_name: str, action_params: Dict[str, Any]) -> Any:
|
|
284
|
+
"""
|
|
285
|
+
Executes a command inside the Docker container or performs other Docker actions.
|
|
286
|
+
|
|
287
|
+
Supported actions:
|
|
288
|
+
- 'exec_command': Executes a command inside the container.
|
|
289
|
+
Params: {'command': str | List[str], 'workdir': Optional[str], 'user': Optional[str]}
|
|
290
|
+
Returns: {'exit_code': int, 'output': bytes (stdout + stderr)}
|
|
291
|
+
- 'get_logs': Retrieves container logs.
|
|
292
|
+
Params: {'stdout': bool, 'stderr': bool, 'tail': int | 'all'}
|
|
293
|
+
Returns: str (logs)
|
|
294
|
+
"""
|
|
295
|
+
if self._is_closed or not self._container:
|
|
296
|
+
raise RuntimeError("Cannot execute step: resource is closed or not set up.")
|
|
297
|
+
|
|
298
|
+
self._container.reload()
|
|
299
|
+
if self._container.status != "running":
|
|
300
|
+
try: # Attempt to start if stopped
|
|
301
|
+
self._container.start()
|
|
302
|
+
self._container.reload()
|
|
303
|
+
if self._container.status != "running":
|
|
304
|
+
raise DockerException(
|
|
305
|
+
f"Container {(self._container.id or '')[:12]} is not running (status: {self._container.status}). Cannot execute step."
|
|
306
|
+
)
|
|
307
|
+
except APIError as e:
|
|
308
|
+
raise DockerException(
|
|
309
|
+
f"Failed to start container {(self._container.id or '')[:12]} for step: {e}"
|
|
310
|
+
) from e
|
|
311
|
+
|
|
312
|
+
if action_name == "exec_command":
|
|
313
|
+
command = action_params.get("command")
|
|
314
|
+
if not command:
|
|
315
|
+
raise ValueError("Missing 'command' in action_params for 'exec_command'.")
|
|
316
|
+
|
|
317
|
+
exec_options = {
|
|
318
|
+
"cmd": command,
|
|
319
|
+
"stdout": True,
|
|
320
|
+
"stderr": True,
|
|
321
|
+
"workdir": action_params.get("workdir"),
|
|
322
|
+
"user": action_params.get("user"),
|
|
323
|
+
"demux": False, # Get stdout and stderr interleaved as a single stream
|
|
324
|
+
}
|
|
325
|
+
# Filter out None values for docker SDK
|
|
326
|
+
exec_options = {k: v for k, v in exec_options.items() if v is not None}
|
|
327
|
+
|
|
328
|
+
try:
|
|
329
|
+
exit_code, output_stream = self._container.exec_run(**exec_options)
|
|
330
|
+
output_bytes = output_stream if output_stream else b""
|
|
331
|
+
return {
|
|
332
|
+
"exit_code": exit_code,
|
|
333
|
+
"output": output_bytes.decode("utf-8", errors="replace"),
|
|
334
|
+
}
|
|
335
|
+
except APIError as e:
|
|
336
|
+
raise DockerException(
|
|
337
|
+
f"Failed to execute command in container {(self._container.id or '')[:12]}: {e}"
|
|
338
|
+
) from e
|
|
339
|
+
|
|
340
|
+
elif action_name == "get_logs":
|
|
341
|
+
log_options = {
|
|
342
|
+
"stdout": action_params.get("stdout", True),
|
|
343
|
+
"stderr": action_params.get("stderr", True),
|
|
344
|
+
"timestamps": action_params.get("timestamps", False),
|
|
345
|
+
"tail": action_params.get("tail", "all"),
|
|
346
|
+
}
|
|
347
|
+
try:
|
|
348
|
+
logs_bytes = self._container.logs(**log_options)
|
|
349
|
+
return logs_bytes.decode("utf-8", errors="replace")
|
|
350
|
+
except APIError as e:
|
|
351
|
+
raise DockerException(
|
|
352
|
+
f"Failed to get logs for container {(self._container.id or '')[:12]}: {e}"
|
|
353
|
+
) from e
|
|
354
|
+
else:
|
|
355
|
+
raise NotImplementedError(f"Action '{action_name}' not supported by DockerResource.")
|
|
356
|
+
|
|
357
|
+
async def get_observation(self) -> Dict[str, Any]:
|
|
358
|
+
"""
|
|
359
|
+
Returns information about the current container.
|
|
360
|
+
"""
|
|
361
|
+
if self._is_closed or not self._container:
|
|
362
|
+
return {"status": "closed or not_initialized"}
|
|
363
|
+
|
|
364
|
+
self._container.reload()
|
|
365
|
+
return {
|
|
366
|
+
"type": "docker",
|
|
367
|
+
"container_id": self._container.id,
|
|
368
|
+
"container_name": self._container.name,
|
|
369
|
+
"image_id": (
|
|
370
|
+
self._container.image.id
|
|
371
|
+
if hasattr(self._container, "image") and self._container.image
|
|
372
|
+
else self._image_id_for_fork_or_checkpoint
|
|
373
|
+
),
|
|
374
|
+
"status": self._container.status,
|
|
375
|
+
"ports": self._container.ports,
|
|
376
|
+
}
|
|
377
|
+
|
|
378
|
+
async def get_tools_spec(self) -> List[Dict[str, Any]]:
|
|
379
|
+
"""
|
|
380
|
+
Returns tool specifications for interacting with the Docker container.
|
|
381
|
+
"""
|
|
382
|
+
return [
|
|
383
|
+
{
|
|
384
|
+
"type": "function",
|
|
385
|
+
"function": {
|
|
386
|
+
"name": "exec_command",
|
|
387
|
+
"description": "Executes a command inside the Docker container.",
|
|
388
|
+
"parameters": {
|
|
389
|
+
"type": "object",
|
|
390
|
+
"properties": {
|
|
391
|
+
"command": {
|
|
392
|
+
"oneOf": [
|
|
393
|
+
{"type": "string"},
|
|
394
|
+
{"type": "array", "items": {"type": "string"}},
|
|
395
|
+
],
|
|
396
|
+
"description": "The command to execute (string or list of strings).",
|
|
397
|
+
},
|
|
398
|
+
"workdir": {
|
|
399
|
+
"type": "string",
|
|
400
|
+
"description": "Working directory inside the container (optional).",
|
|
401
|
+
},
|
|
402
|
+
"user": {
|
|
403
|
+
"type": "string",
|
|
404
|
+
"description": "User to run command as (optional).",
|
|
405
|
+
},
|
|
406
|
+
},
|
|
407
|
+
"required": ["command"],
|
|
408
|
+
},
|
|
409
|
+
},
|
|
410
|
+
},
|
|
411
|
+
{
|
|
412
|
+
"type": "function",
|
|
413
|
+
"function": {
|
|
414
|
+
"name": "get_logs",
|
|
415
|
+
"description": "Retrieves logs from the Docker container.",
|
|
416
|
+
"parameters": {
|
|
417
|
+
"type": "object",
|
|
418
|
+
"properties": {
|
|
419
|
+
"stdout": {
|
|
420
|
+
"type": "boolean",
|
|
421
|
+
"default": True,
|
|
422
|
+
"description": "Include stdout.",
|
|
423
|
+
},
|
|
424
|
+
"stderr": {
|
|
425
|
+
"type": "boolean",
|
|
426
|
+
"default": True,
|
|
427
|
+
"description": "Include stderr.",
|
|
428
|
+
},
|
|
429
|
+
"tail": {
|
|
430
|
+
"oneOf": [
|
|
431
|
+
{"type": "integer"},
|
|
432
|
+
{"type": "string", "enum": ["all"]},
|
|
433
|
+
],
|
|
434
|
+
"default": "all",
|
|
435
|
+
"description": "Number of lines from end of logs or 'all'.",
|
|
436
|
+
},
|
|
437
|
+
},
|
|
438
|
+
},
|
|
439
|
+
},
|
|
440
|
+
},
|
|
441
|
+
]
|
|
442
|
+
|
|
443
|
+
async def close(self) -> None:
|
|
444
|
+
"""
|
|
445
|
+
Stops and removes the managed Docker container and any images created
|
|
446
|
+
by this specific resource instance during fork/checkpoint if they are not
|
|
447
|
+
the original base image.
|
|
448
|
+
"""
|
|
449
|
+
if self._is_closed:
|
|
450
|
+
return
|
|
451
|
+
|
|
452
|
+
self._cleanup_container(self._container)
|
|
453
|
+
self._container = None
|
|
454
|
+
|
|
455
|
+
# Cleanup the image that this container was based on, IF it was a result of a fork/checkpoint
|
|
456
|
+
# and not the original user-provided image_name from config.
|
|
457
|
+
# This logic is a bit tricky: we only want to remove images we created.
|
|
458
|
+
# self._image_id_for_fork_or_checkpoint stores the ID of the image the *current* container was made from.
|
|
459
|
+
# If this ID is different from self._config.get("image_name") (the very first image),
|
|
460
|
+
# then it's an image we created via commit.
|
|
461
|
+
original_base_image_name = self._config.get("image_name")
|
|
462
|
+
if self._image_id_for_fork_or_checkpoint and self._image_id_for_fork_or_checkpoint != original_base_image_name:
|
|
463
|
+
# Check if the image ID is a full ID or a tag like the original.
|
|
464
|
+
# This check might need refinement if original_base_image_name is an ID itself.
|
|
465
|
+
if original_base_image_name is not None:
|
|
466
|
+
try:
|
|
467
|
+
img_obj = self._client.images.get(original_base_image_name)
|
|
468
|
+
if img_obj.id != self._image_id_for_fork_or_checkpoint:
|
|
469
|
+
self._cleanup_image(self._image_id_for_fork_or_checkpoint)
|
|
470
|
+
except NotFound: # Original image name might not be an ID, or might have been removed.
|
|
471
|
+
self._cleanup_image(self._image_id_for_fork_or_checkpoint)
|
|
472
|
+
else: # original_base_image_name IS None
|
|
473
|
+
# If original_base_image_name is None, but _image_id_for_fork_or_checkpoint is set
|
|
474
|
+
# (and different from None, due to the outer if), then it's an image to clean up.
|
|
475
|
+
if self._image_id_for_fork_or_checkpoint:
|
|
476
|
+
self._cleanup_image(self._image_id_for_fork_or_checkpoint)
|
|
477
|
+
|
|
478
|
+
self._image_id_for_fork_or_checkpoint = None
|
|
479
|
+
self._is_closed = True
|