eval-protocol 0.0.3__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (130) hide show
  1. development/__init__.py +1 -0
  2. development/normalize_sandbox_fusion.py +628 -0
  3. development/utils/__init__.py +1 -0
  4. development/utils/generate_api_key.py +31 -0
  5. development/utils/subprocess_manager.py +481 -0
  6. eval_protocol/__init__.py +86 -0
  7. eval_protocol/__main__.py +10 -0
  8. eval_protocol/_version.py +21 -0
  9. eval_protocol/adapters/__init__.py +1 -0
  10. eval_protocol/adapters/braintrust.py +8 -0
  11. eval_protocol/adapters/trl.py +8 -0
  12. eval_protocol/agent/__init__.py +29 -0
  13. eval_protocol/agent/models.py +69 -0
  14. eval_protocol/agent/orchestrator.py +893 -0
  15. eval_protocol/agent/resource_abc.py +89 -0
  16. eval_protocol/agent/resource_pool.py +184 -0
  17. eval_protocol/agent/resources/__init__.py +44 -0
  18. eval_protocol/agent/resources/bfcl_envs/__init__.py +1 -0
  19. eval_protocol/agent/resources/bfcl_envs/gorilla_file_system.py +342 -0
  20. eval_protocol/agent/resources/bfcl_envs/math_api.py +40 -0
  21. eval_protocol/agent/resources/bfcl_envs/posting_api.py +157 -0
  22. eval_protocol/agent/resources/bfcl_sim_api_resource.py +314 -0
  23. eval_protocol/agent/resources/docker_resource.py +479 -0
  24. eval_protocol/agent/resources/filesystem_resource.py +371 -0
  25. eval_protocol/agent/resources/http_rollout_protocol.py +85 -0
  26. eval_protocol/agent/resources/http_rollout_resource.py +325 -0
  27. eval_protocol/agent/resources/python_state_resource.py +170 -0
  28. eval_protocol/agent/resources/sql_resource.py +271 -0
  29. eval_protocol/agent/task_manager.py +1064 -0
  30. eval_protocol/agent/tool_registry.py +111 -0
  31. eval_protocol/auth.py +156 -0
  32. eval_protocol/cli.py +425 -0
  33. eval_protocol/cli_commands/__init__.py +1 -0
  34. eval_protocol/cli_commands/agent_eval_cmd.py +264 -0
  35. eval_protocol/cli_commands/common.py +242 -0
  36. eval_protocol/cli_commands/deploy.py +486 -0
  37. eval_protocol/cli_commands/deploy_mcp.py +287 -0
  38. eval_protocol/cli_commands/preview.py +186 -0
  39. eval_protocol/cli_commands/run_eval_cmd.py +202 -0
  40. eval_protocol/common_utils.py +36 -0
  41. eval_protocol/config.py +180 -0
  42. eval_protocol/datasets/__init__.py +1 -0
  43. eval_protocol/datasets/loader.py +521 -0
  44. eval_protocol/evaluation.py +1045 -0
  45. eval_protocol/execution/__init__.py +1 -0
  46. eval_protocol/execution/pipeline.py +920 -0
  47. eval_protocol/gcp_tools.py +484 -0
  48. eval_protocol/generation/cache.py +141 -0
  49. eval_protocol/generation/clients/base.py +67 -0
  50. eval_protocol/generation/clients.py +248 -0
  51. eval_protocol/generic_server.py +165 -0
  52. eval_protocol/integrations/__init__.py +12 -0
  53. eval_protocol/integrations/braintrust.py +51 -0
  54. eval_protocol/integrations/deepeval.py +106 -0
  55. eval_protocol/integrations/openeval.py +40 -0
  56. eval_protocol/integrations/trl.py +187 -0
  57. eval_protocol/mcp/__init__.py +48 -0
  58. eval_protocol/mcp/adapter.py +131 -0
  59. eval_protocol/mcp/client/__init__.py +12 -0
  60. eval_protocol/mcp/client/connection.py +499 -0
  61. eval_protocol/mcp/clients.py +195 -0
  62. eval_protocol/mcp/execution/__init__.py +23 -0
  63. eval_protocol/mcp/execution/base_policy.py +227 -0
  64. eval_protocol/mcp/execution/fireworks_policy.py +209 -0
  65. eval_protocol/mcp/execution/manager.py +506 -0
  66. eval_protocol/mcp/execution/policy.py +421 -0
  67. eval_protocol/mcp/grid_renderer.py +54 -0
  68. eval_protocol/mcp/mcpgym.py +637 -0
  69. eval_protocol/mcp/process_manager.py +177 -0
  70. eval_protocol/mcp/session/__init__.py +11 -0
  71. eval_protocol/mcp/session/manager.py +228 -0
  72. eval_protocol/mcp/simple_process_manager.py +291 -0
  73. eval_protocol/mcp/simulation_server.py +458 -0
  74. eval_protocol/mcp/types.py +80 -0
  75. eval_protocol/mcp_agent/__init__.py +1 -0
  76. eval_protocol/mcp_agent/config.py +147 -0
  77. eval_protocol/mcp_agent/intermediary_server.py +542 -0
  78. eval_protocol/mcp_agent/main.py +210 -0
  79. eval_protocol/mcp_agent/orchestration/__init__.py +1 -0
  80. eval_protocol/mcp_agent/orchestration/base_client.py +132 -0
  81. eval_protocol/mcp_agent/orchestration/local_docker_client.py +702 -0
  82. eval_protocol/mcp_agent/orchestration/remote_http_client.py +304 -0
  83. eval_protocol/mcp_agent/orchestration/stdio_mcp_client_helper.py +3 -0
  84. eval_protocol/mcp_agent/session.py +79 -0
  85. eval_protocol/mcp_env.py +304 -0
  86. eval_protocol/models.py +366 -0
  87. eval_protocol/packaging.py +219 -0
  88. eval_protocol/platform_api.py +360 -0
  89. eval_protocol/playback_policy.py +396 -0
  90. eval_protocol/resources.py +128 -0
  91. eval_protocol/reward_function.py +410 -0
  92. eval_protocol/rewards/__init__.py +94 -0
  93. eval_protocol/rewards/accuracy.py +454 -0
  94. eval_protocol/rewards/accuracy_length.py +173 -0
  95. eval_protocol/rewards/apps_coding_reward.py +331 -0
  96. eval_protocol/rewards/apps_execution_utils.py +149 -0
  97. eval_protocol/rewards/apps_testing_util.py +559 -0
  98. eval_protocol/rewards/bfcl_reward.py +313 -0
  99. eval_protocol/rewards/code_execution.py +1620 -0
  100. eval_protocol/rewards/code_execution_utils.py +72 -0
  101. eval_protocol/rewards/cpp_code.py +861 -0
  102. eval_protocol/rewards/deepcoder_reward.py +161 -0
  103. eval_protocol/rewards/format.py +129 -0
  104. eval_protocol/rewards/function_calling.py +541 -0
  105. eval_protocol/rewards/json_schema.py +422 -0
  106. eval_protocol/rewards/language_consistency.py +700 -0
  107. eval_protocol/rewards/lean_prover.py +479 -0
  108. eval_protocol/rewards/length.py +375 -0
  109. eval_protocol/rewards/list_comparison_math_reward.py +221 -0
  110. eval_protocol/rewards/math.py +762 -0
  111. eval_protocol/rewards/multiple_choice_math_reward.py +232 -0
  112. eval_protocol/rewards/reasoning_steps.py +249 -0
  113. eval_protocol/rewards/repetition.py +342 -0
  114. eval_protocol/rewards/tag_count.py +162 -0
  115. eval_protocol/rl_processing.py +82 -0
  116. eval_protocol/server.py +271 -0
  117. eval_protocol/typed_interface.py +260 -0
  118. eval_protocol/utils/__init__.py +8 -0
  119. eval_protocol/utils/batch_evaluation.py +217 -0
  120. eval_protocol/utils/batch_transformation.py +205 -0
  121. eval_protocol/utils/dataset_helpers.py +112 -0
  122. eval_protocol/utils/module_loader.py +56 -0
  123. eval_protocol/utils/packaging_utils.py +108 -0
  124. eval_protocol/utils/static_policy.py +305 -0
  125. eval_protocol-0.0.3.dist-info/METADATA +635 -0
  126. eval_protocol-0.0.3.dist-info/RECORD +130 -0
  127. eval_protocol-0.0.3.dist-info/WHEEL +5 -0
  128. eval_protocol-0.0.3.dist-info/entry_points.txt +4 -0
  129. eval_protocol-0.0.3.dist-info/licenses/LICENSE +201 -0
  130. eval_protocol-0.0.3.dist-info/top_level.txt +2 -0
@@ -0,0 +1,479 @@
1
+ """
2
+ DockerResource: A ForkableResource for managing Docker container states.
3
+ """
4
+
5
+ import io
6
+ import uuid
7
+ from pathlib import Path
8
+ from typing import TYPE_CHECKING, Any, Dict, List, Optional, Tuple
9
+
10
+ from ..resource_abc import ForkableResource
11
+
12
+ # Attempt to import Docker SDK with error handling
13
+ try:
14
+ import docker
15
+
16
+ if TYPE_CHECKING:
17
+ from docker.errors import APIError, DockerException, NotFound
18
+ from docker.models.containers import Container
19
+ else:
20
+ from docker.errors import APIError, DockerException, NotFound
21
+ from docker.models.containers import Container
22
+
23
+ DOCKER_SDK_AVAILABLE = True
24
+ # Ensure these are available for type checking even if the runtime import fails
25
+ # The `else` block for DOCKER_SDK_AVAILABLE = False will define runtime dummies.
26
+ DockerException = DockerException
27
+ NotFound = NotFound
28
+ APIError = APIError
29
+ Container = Container
30
+ try:
31
+ _daemon_check_client = docker.from_env()
32
+ _daemon_check_client.ping()
33
+ DOCKER_DAEMON_AVAILABLE = True
34
+ except Exception:
35
+ DOCKER_DAEMON_AVAILABLE = False
36
+ finally:
37
+ try:
38
+ _daemon_check_client.close()
39
+ except Exception:
40
+ pass
41
+
42
+ except ImportError:
43
+ DOCKER_SDK_AVAILABLE = False
44
+ DOCKER_DAEMON_AVAILABLE = False
45
+
46
+ # Define dummy classes/exceptions if docker SDK is not available
47
+ # These are only defined if the import fails.
48
+ class DockerException(Exception): # type: ignore[no-redef]
49
+ pass
50
+
51
+ class NotFound(DockerException): # type: ignore[no-redef]
52
+ pass
53
+
54
+ class APIError(DockerException): # type: ignore[no-redef]
55
+ pass
56
+
57
+ class Container: # type: ignore[no-redef]
58
+ id: str = ""
59
+ name: str = ""
60
+ image: Any = None
61
+ status: str = ""
62
+ ports: Dict[str, Any] = {}
63
+
64
+ def remove(self, force: bool = False, v: bool = False) -> None:
65
+ pass
66
+
67
+ def commit(self, **kwargs: Any) -> Any:
68
+ return None
69
+
70
+ def reload(self) -> None:
71
+ pass
72
+
73
+ def start(self) -> None:
74
+ pass
75
+
76
+ def exec_run(self, **kwargs: Any) -> Tuple[int, bytes]:
77
+ return (0, b"")
78
+
79
+ def logs(self, **kwargs: Any) -> bytes:
80
+ return b""
81
+
82
+
83
+ class DockerResource(ForkableResource):
84
+ """
85
+ A ForkableResource for managing Docker container states.
86
+
87
+ Allows initializing a container from an image, forking (by committing the
88
+ current container and starting a new one from the committed image),
89
+ checkpointing (committing to an image), and restoring from a checkpoint image.
90
+ Commands can be executed within the container.
91
+
92
+ Requires the Docker SDK (`docker` pip package) to be installed and Docker daemon running.
93
+ """
94
+
95
+ def __init__(self) -> None:
96
+ if not DOCKER_SDK_AVAILABLE:
97
+ raise ImportError("Docker SDK not found. Please install 'docker' package to use DockerResource.")
98
+ if not DOCKER_DAEMON_AVAILABLE:
99
+ raise RuntimeError("Docker daemon not running or not accessible")
100
+ self._client = docker.from_env()
101
+ self._config: Dict[str, Any] = {}
102
+ self._container: Optional[Container] = None
103
+ self._image_id_for_fork_or_checkpoint: Optional[str] = (
104
+ None # Stores the ID of the image used for the current container
105
+ )
106
+ self._is_closed = False # To prevent operations on closed resource
107
+
108
+ def _generate_name(self, prefix: str) -> str:
109
+ return f"rk_{prefix}_{uuid.uuid4().hex}"
110
+
111
+ def _cleanup_container(self, container: Optional[Container]) -> None:
112
+ if container:
113
+ try:
114
+ container.remove(force=True, v=True) # v=True to remove volumes
115
+ except NotFound:
116
+ pass # Already removed
117
+ except APIError as e:
118
+ print(f"DockerResource: Error removing container {(container.id or '')[:12]}: {e}")
119
+
120
+ def _cleanup_image(self, image_id: Optional[str]) -> None:
121
+ if image_id:
122
+ try:
123
+ self._client.images.remove(image=image_id, force=True)
124
+ except NotFound:
125
+ pass # Already removed
126
+ except APIError as e:
127
+ # Often "image is being used by stopped container" if cleanup order is tricky
128
+ print(f"DockerResource: Error removing image {image_id[:12]}: {e}")
129
+
130
+ async def setup(self, config: Dict[str, Any]) -> None:
131
+ """
132
+ Initializes and starts a Docker container based on the provided configuration.
133
+
134
+ Args:
135
+ config: Configuration dictionary. Expected keys:
136
+ - 'image_name' (str): Name of the Docker image to use (e.g., 'ubuntu:latest').
137
+ - 'container_name' (Optional[str]): Name for the container. Defaults to a UUID.
138
+ - 'docker_run_options' (Optional[Dict[str, Any]]): Options for docker.client.containers.run()
139
+ e.g., {'detach': True, 'ports': {'80/tcp': 8080}, 'environment': ["VAR=value"]}
140
+ 'detach' will always be True.
141
+ """
142
+ if self._is_closed:
143
+ raise RuntimeError("Cannot setup a closed DockerResource.")
144
+ self._config = config.copy()
145
+
146
+ image_name = self._config.get("image_name")
147
+ if not image_name:
148
+ raise ValueError("Missing 'image_name' in DockerResource config.")
149
+
150
+ # Pull the image if not present locally (optional, could be pre-pulled)
151
+ try:
152
+ self._client.images.get(image_name)
153
+ except NotFound:
154
+ print(f"DockerResource: Image '{image_name}' not found locally. Pulling...")
155
+ try:
156
+ self._client.images.pull(image_name)
157
+ except APIError as e:
158
+ raise DockerException(f"Failed to pull image '{image_name}': {e}") from e
159
+
160
+ self._image_id_for_fork_or_checkpoint = image_name # Base image for the first container
161
+
162
+ container_name = self._config.get("container_name", self._generate_name("container"))
163
+ run_options = self._config.get("docker_run_options", {}).copy()
164
+ run_options["detach"] = True # Must be detached for this model
165
+ run_options["name"] = container_name
166
+
167
+ # Clean up any existing container with the same name (e.g. from a failed previous run)
168
+ try:
169
+ existing_container = self._client.containers.get(container_name)
170
+ self._cleanup_container(existing_container)
171
+ except NotFound:
172
+ pass
173
+
174
+ try:
175
+ self._container = self._client.containers.run(image_name, **run_options)
176
+ if self._container:
177
+ self._container.reload() # Ensure state is up-to-date
178
+ except APIError as e:
179
+ raise DockerException(
180
+ f"Failed to start container '{container_name}' from image '{image_name}': {e}"
181
+ ) from e
182
+
183
+ async def fork(self) -> "DockerResource":
184
+ """
185
+ Creates a new DockerResource by committing the current container's state
186
+ to a new image and starting a new container from that image.
187
+ """
188
+ if self._is_closed or not self._container:
189
+ raise RuntimeError("Cannot fork: resource is closed or not set up.")
190
+
191
+ # 1. Commit current container to a new image
192
+ fork_image_tag = self._generate_name("fork_img")
193
+ try:
194
+ committed_image = self._container.commit(repository=fork_image_tag)
195
+ except APIError as e:
196
+ raise DockerException(f"Failed to commit container {(self._container.id or '')[:12]} for fork: {e}") from e
197
+
198
+ # 2. Create new DockerResource instance
199
+ forked_resource = DockerResource()
200
+ forked_resource._config = self._config.copy() # Inherit original config
201
+
202
+ # Modify config for the new container if needed (e.g., new name)
203
+ forked_container_name = self._generate_name("fork_container")
204
+ forked_resource._config["container_name"] = forked_container_name
205
+
206
+ # The new container will run from the committed image
207
+ forked_resource._image_id_for_fork_or_checkpoint = committed_image.id
208
+
209
+ run_options = self._config.get("docker_run_options", {}).copy()
210
+ run_options["detach"] = True
211
+ run_options["name"] = forked_container_name
212
+
213
+ try:
214
+ forked_resource._container = self._client.containers.run(committed_image.id, **run_options)
215
+ if forked_resource._container:
216
+ forked_resource._container.reload()
217
+ except APIError as e:
218
+ self._cleanup_image(committed_image.id) # Cleanup committed image if run fails
219
+ raise DockerException(f"Failed to start forked container from image {committed_image.id[:12]}: {e}") from e
220
+
221
+ return forked_resource
222
+
223
+ async def checkpoint(self) -> Dict[str, Any]:
224
+ """
225
+ Checkpoints the container by committing its current state to a new image.
226
+ Returns the ID of the committed image.
227
+ """
228
+ if self._is_closed or not self._container:
229
+ raise RuntimeError("Cannot checkpoint: resource is closed or not set up.")
230
+
231
+ checkpoint_image_tag = self._generate_name("checkpoint_img")
232
+ try:
233
+ committed_image = self._container.commit(repository=checkpoint_image_tag)
234
+ return {"type": "docker_image_id", "image_id": committed_image.id}
235
+ except APIError as e:
236
+ raise DockerException(
237
+ f"Failed to commit container {(self._container.id or '')[:12]} for checkpoint: {e}"
238
+ ) from e
239
+
240
+ async def restore(self, state_data: Dict[str, Any]) -> None:
241
+ """
242
+ Restores the resource by starting a new container from a checkpointed image ID.
243
+ The existing container (if any) is stopped and removed.
244
+ """
245
+ if self._is_closed:
246
+ raise RuntimeError("Cannot restore a closed DockerResource.")
247
+
248
+ image_id = state_data.get("image_id")
249
+ if state_data.get("type") != "docker_image_id" or not image_id:
250
+ raise ValueError(
251
+ "Invalid state_data for DockerResource restore. Expected {'type': 'docker_image_id', 'image_id': '...'}"
252
+ )
253
+
254
+ # Ensure the checkpointed image exists
255
+ try:
256
+ self._client.images.get(image_id)
257
+ except NotFound:
258
+ raise DockerException(f"Checkpoint image ID '{image_id}' not found.") from None
259
+
260
+ # Cleanup existing container before restoring
261
+ if self._container:
262
+ self._cleanup_container(self._container)
263
+ self._container = None
264
+
265
+ # Update current image ID to the one we are restoring from
266
+ self._image_id_for_fork_or_checkpoint = image_id
267
+
268
+ restored_container_name = self._config.get("container_name", self._generate_name("restored_container"))
269
+ # If a container_name was in original config, we might want to reuse it or ensure uniqueness
270
+ self._config["container_name"] = restored_container_name # Update config for consistency
271
+
272
+ run_options = self._config.get("docker_run_options", {}).copy()
273
+ run_options["detach"] = True
274
+ run_options["name"] = restored_container_name
275
+
276
+ try:
277
+ self._container = self._client.containers.run(image_id, **run_options)
278
+ if self._container:
279
+ self._container.reload()
280
+ except APIError as e:
281
+ raise DockerException(f"Failed to start container from checkpoint image {image_id[:12]}: {e}") from e
282
+
283
+ async def step(self, action_name: str, action_params: Dict[str, Any]) -> Any:
284
+ """
285
+ Executes a command inside the Docker container or performs other Docker actions.
286
+
287
+ Supported actions:
288
+ - 'exec_command': Executes a command inside the container.
289
+ Params: {'command': str | List[str], 'workdir': Optional[str], 'user': Optional[str]}
290
+ Returns: {'exit_code': int, 'output': bytes (stdout + stderr)}
291
+ - 'get_logs': Retrieves container logs.
292
+ Params: {'stdout': bool, 'stderr': bool, 'tail': int | 'all'}
293
+ Returns: str (logs)
294
+ """
295
+ if self._is_closed or not self._container:
296
+ raise RuntimeError("Cannot execute step: resource is closed or not set up.")
297
+
298
+ self._container.reload()
299
+ if self._container.status != "running":
300
+ try: # Attempt to start if stopped
301
+ self._container.start()
302
+ self._container.reload()
303
+ if self._container.status != "running":
304
+ raise DockerException(
305
+ f"Container {(self._container.id or '')[:12]} is not running (status: {self._container.status}). Cannot execute step."
306
+ )
307
+ except APIError as e:
308
+ raise DockerException(
309
+ f"Failed to start container {(self._container.id or '')[:12]} for step: {e}"
310
+ ) from e
311
+
312
+ if action_name == "exec_command":
313
+ command = action_params.get("command")
314
+ if not command:
315
+ raise ValueError("Missing 'command' in action_params for 'exec_command'.")
316
+
317
+ exec_options = {
318
+ "cmd": command,
319
+ "stdout": True,
320
+ "stderr": True,
321
+ "workdir": action_params.get("workdir"),
322
+ "user": action_params.get("user"),
323
+ "demux": False, # Get stdout and stderr interleaved as a single stream
324
+ }
325
+ # Filter out None values for docker SDK
326
+ exec_options = {k: v for k, v in exec_options.items() if v is not None}
327
+
328
+ try:
329
+ exit_code, output_stream = self._container.exec_run(**exec_options)
330
+ output_bytes = output_stream if output_stream else b""
331
+ return {
332
+ "exit_code": exit_code,
333
+ "output": output_bytes.decode("utf-8", errors="replace"),
334
+ }
335
+ except APIError as e:
336
+ raise DockerException(
337
+ f"Failed to execute command in container {(self._container.id or '')[:12]}: {e}"
338
+ ) from e
339
+
340
+ elif action_name == "get_logs":
341
+ log_options = {
342
+ "stdout": action_params.get("stdout", True),
343
+ "stderr": action_params.get("stderr", True),
344
+ "timestamps": action_params.get("timestamps", False),
345
+ "tail": action_params.get("tail", "all"),
346
+ }
347
+ try:
348
+ logs_bytes = self._container.logs(**log_options)
349
+ return logs_bytes.decode("utf-8", errors="replace")
350
+ except APIError as e:
351
+ raise DockerException(
352
+ f"Failed to get logs for container {(self._container.id or '')[:12]}: {e}"
353
+ ) from e
354
+ else:
355
+ raise NotImplementedError(f"Action '{action_name}' not supported by DockerResource.")
356
+
357
+ async def get_observation(self) -> Dict[str, Any]:
358
+ """
359
+ Returns information about the current container.
360
+ """
361
+ if self._is_closed or not self._container:
362
+ return {"status": "closed or not_initialized"}
363
+
364
+ self._container.reload()
365
+ return {
366
+ "type": "docker",
367
+ "container_id": self._container.id,
368
+ "container_name": self._container.name,
369
+ "image_id": (
370
+ self._container.image.id
371
+ if hasattr(self._container, "image") and self._container.image
372
+ else self._image_id_for_fork_or_checkpoint
373
+ ),
374
+ "status": self._container.status,
375
+ "ports": self._container.ports,
376
+ }
377
+
378
+ async def get_tools_spec(self) -> List[Dict[str, Any]]:
379
+ """
380
+ Returns tool specifications for interacting with the Docker container.
381
+ """
382
+ return [
383
+ {
384
+ "type": "function",
385
+ "function": {
386
+ "name": "exec_command",
387
+ "description": "Executes a command inside the Docker container.",
388
+ "parameters": {
389
+ "type": "object",
390
+ "properties": {
391
+ "command": {
392
+ "oneOf": [
393
+ {"type": "string"},
394
+ {"type": "array", "items": {"type": "string"}},
395
+ ],
396
+ "description": "The command to execute (string or list of strings).",
397
+ },
398
+ "workdir": {
399
+ "type": "string",
400
+ "description": "Working directory inside the container (optional).",
401
+ },
402
+ "user": {
403
+ "type": "string",
404
+ "description": "User to run command as (optional).",
405
+ },
406
+ },
407
+ "required": ["command"],
408
+ },
409
+ },
410
+ },
411
+ {
412
+ "type": "function",
413
+ "function": {
414
+ "name": "get_logs",
415
+ "description": "Retrieves logs from the Docker container.",
416
+ "parameters": {
417
+ "type": "object",
418
+ "properties": {
419
+ "stdout": {
420
+ "type": "boolean",
421
+ "default": True,
422
+ "description": "Include stdout.",
423
+ },
424
+ "stderr": {
425
+ "type": "boolean",
426
+ "default": True,
427
+ "description": "Include stderr.",
428
+ },
429
+ "tail": {
430
+ "oneOf": [
431
+ {"type": "integer"},
432
+ {"type": "string", "enum": ["all"]},
433
+ ],
434
+ "default": "all",
435
+ "description": "Number of lines from end of logs or 'all'.",
436
+ },
437
+ },
438
+ },
439
+ },
440
+ },
441
+ ]
442
+
443
+ async def close(self) -> None:
444
+ """
445
+ Stops and removes the managed Docker container and any images created
446
+ by this specific resource instance during fork/checkpoint if they are not
447
+ the original base image.
448
+ """
449
+ if self._is_closed:
450
+ return
451
+
452
+ self._cleanup_container(self._container)
453
+ self._container = None
454
+
455
+ # Cleanup the image that this container was based on, IF it was a result of a fork/checkpoint
456
+ # and not the original user-provided image_name from config.
457
+ # This logic is a bit tricky: we only want to remove images we created.
458
+ # self._image_id_for_fork_or_checkpoint stores the ID of the image the *current* container was made from.
459
+ # If this ID is different from self._config.get("image_name") (the very first image),
460
+ # then it's an image we created via commit.
461
+ original_base_image_name = self._config.get("image_name")
462
+ if self._image_id_for_fork_or_checkpoint and self._image_id_for_fork_or_checkpoint != original_base_image_name:
463
+ # Check if the image ID is a full ID or a tag like the original.
464
+ # This check might need refinement if original_base_image_name is an ID itself.
465
+ if original_base_image_name is not None:
466
+ try:
467
+ img_obj = self._client.images.get(original_base_image_name)
468
+ if img_obj.id != self._image_id_for_fork_or_checkpoint:
469
+ self._cleanup_image(self._image_id_for_fork_or_checkpoint)
470
+ except NotFound: # Original image name might not be an ID, or might have been removed.
471
+ self._cleanup_image(self._image_id_for_fork_or_checkpoint)
472
+ else: # original_base_image_name IS None
473
+ # If original_base_image_name is None, but _image_id_for_fork_or_checkpoint is set
474
+ # (and different from None, due to the outer if), then it's an image to clean up.
475
+ if self._image_id_for_fork_or_checkpoint:
476
+ self._cleanup_image(self._image_id_for_fork_or_checkpoint)
477
+
478
+ self._image_id_for_fork_or_checkpoint = None
479
+ self._is_closed = True