eval-protocol 0.0.3__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (130) hide show
  1. development/__init__.py +1 -0
  2. development/normalize_sandbox_fusion.py +628 -0
  3. development/utils/__init__.py +1 -0
  4. development/utils/generate_api_key.py +31 -0
  5. development/utils/subprocess_manager.py +481 -0
  6. eval_protocol/__init__.py +86 -0
  7. eval_protocol/__main__.py +10 -0
  8. eval_protocol/_version.py +21 -0
  9. eval_protocol/adapters/__init__.py +1 -0
  10. eval_protocol/adapters/braintrust.py +8 -0
  11. eval_protocol/adapters/trl.py +8 -0
  12. eval_protocol/agent/__init__.py +29 -0
  13. eval_protocol/agent/models.py +69 -0
  14. eval_protocol/agent/orchestrator.py +893 -0
  15. eval_protocol/agent/resource_abc.py +89 -0
  16. eval_protocol/agent/resource_pool.py +184 -0
  17. eval_protocol/agent/resources/__init__.py +44 -0
  18. eval_protocol/agent/resources/bfcl_envs/__init__.py +1 -0
  19. eval_protocol/agent/resources/bfcl_envs/gorilla_file_system.py +342 -0
  20. eval_protocol/agent/resources/bfcl_envs/math_api.py +40 -0
  21. eval_protocol/agent/resources/bfcl_envs/posting_api.py +157 -0
  22. eval_protocol/agent/resources/bfcl_sim_api_resource.py +314 -0
  23. eval_protocol/agent/resources/docker_resource.py +479 -0
  24. eval_protocol/agent/resources/filesystem_resource.py +371 -0
  25. eval_protocol/agent/resources/http_rollout_protocol.py +85 -0
  26. eval_protocol/agent/resources/http_rollout_resource.py +325 -0
  27. eval_protocol/agent/resources/python_state_resource.py +170 -0
  28. eval_protocol/agent/resources/sql_resource.py +271 -0
  29. eval_protocol/agent/task_manager.py +1064 -0
  30. eval_protocol/agent/tool_registry.py +111 -0
  31. eval_protocol/auth.py +156 -0
  32. eval_protocol/cli.py +425 -0
  33. eval_protocol/cli_commands/__init__.py +1 -0
  34. eval_protocol/cli_commands/agent_eval_cmd.py +264 -0
  35. eval_protocol/cli_commands/common.py +242 -0
  36. eval_protocol/cli_commands/deploy.py +486 -0
  37. eval_protocol/cli_commands/deploy_mcp.py +287 -0
  38. eval_protocol/cli_commands/preview.py +186 -0
  39. eval_protocol/cli_commands/run_eval_cmd.py +202 -0
  40. eval_protocol/common_utils.py +36 -0
  41. eval_protocol/config.py +180 -0
  42. eval_protocol/datasets/__init__.py +1 -0
  43. eval_protocol/datasets/loader.py +521 -0
  44. eval_protocol/evaluation.py +1045 -0
  45. eval_protocol/execution/__init__.py +1 -0
  46. eval_protocol/execution/pipeline.py +920 -0
  47. eval_protocol/gcp_tools.py +484 -0
  48. eval_protocol/generation/cache.py +141 -0
  49. eval_protocol/generation/clients/base.py +67 -0
  50. eval_protocol/generation/clients.py +248 -0
  51. eval_protocol/generic_server.py +165 -0
  52. eval_protocol/integrations/__init__.py +12 -0
  53. eval_protocol/integrations/braintrust.py +51 -0
  54. eval_protocol/integrations/deepeval.py +106 -0
  55. eval_protocol/integrations/openeval.py +40 -0
  56. eval_protocol/integrations/trl.py +187 -0
  57. eval_protocol/mcp/__init__.py +48 -0
  58. eval_protocol/mcp/adapter.py +131 -0
  59. eval_protocol/mcp/client/__init__.py +12 -0
  60. eval_protocol/mcp/client/connection.py +499 -0
  61. eval_protocol/mcp/clients.py +195 -0
  62. eval_protocol/mcp/execution/__init__.py +23 -0
  63. eval_protocol/mcp/execution/base_policy.py +227 -0
  64. eval_protocol/mcp/execution/fireworks_policy.py +209 -0
  65. eval_protocol/mcp/execution/manager.py +506 -0
  66. eval_protocol/mcp/execution/policy.py +421 -0
  67. eval_protocol/mcp/grid_renderer.py +54 -0
  68. eval_protocol/mcp/mcpgym.py +637 -0
  69. eval_protocol/mcp/process_manager.py +177 -0
  70. eval_protocol/mcp/session/__init__.py +11 -0
  71. eval_protocol/mcp/session/manager.py +228 -0
  72. eval_protocol/mcp/simple_process_manager.py +291 -0
  73. eval_protocol/mcp/simulation_server.py +458 -0
  74. eval_protocol/mcp/types.py +80 -0
  75. eval_protocol/mcp_agent/__init__.py +1 -0
  76. eval_protocol/mcp_agent/config.py +147 -0
  77. eval_protocol/mcp_agent/intermediary_server.py +542 -0
  78. eval_protocol/mcp_agent/main.py +210 -0
  79. eval_protocol/mcp_agent/orchestration/__init__.py +1 -0
  80. eval_protocol/mcp_agent/orchestration/base_client.py +132 -0
  81. eval_protocol/mcp_agent/orchestration/local_docker_client.py +702 -0
  82. eval_protocol/mcp_agent/orchestration/remote_http_client.py +304 -0
  83. eval_protocol/mcp_agent/orchestration/stdio_mcp_client_helper.py +3 -0
  84. eval_protocol/mcp_agent/session.py +79 -0
  85. eval_protocol/mcp_env.py +304 -0
  86. eval_protocol/models.py +366 -0
  87. eval_protocol/packaging.py +219 -0
  88. eval_protocol/platform_api.py +360 -0
  89. eval_protocol/playback_policy.py +396 -0
  90. eval_protocol/resources.py +128 -0
  91. eval_protocol/reward_function.py +410 -0
  92. eval_protocol/rewards/__init__.py +94 -0
  93. eval_protocol/rewards/accuracy.py +454 -0
  94. eval_protocol/rewards/accuracy_length.py +173 -0
  95. eval_protocol/rewards/apps_coding_reward.py +331 -0
  96. eval_protocol/rewards/apps_execution_utils.py +149 -0
  97. eval_protocol/rewards/apps_testing_util.py +559 -0
  98. eval_protocol/rewards/bfcl_reward.py +313 -0
  99. eval_protocol/rewards/code_execution.py +1620 -0
  100. eval_protocol/rewards/code_execution_utils.py +72 -0
  101. eval_protocol/rewards/cpp_code.py +861 -0
  102. eval_protocol/rewards/deepcoder_reward.py +161 -0
  103. eval_protocol/rewards/format.py +129 -0
  104. eval_protocol/rewards/function_calling.py +541 -0
  105. eval_protocol/rewards/json_schema.py +422 -0
  106. eval_protocol/rewards/language_consistency.py +700 -0
  107. eval_protocol/rewards/lean_prover.py +479 -0
  108. eval_protocol/rewards/length.py +375 -0
  109. eval_protocol/rewards/list_comparison_math_reward.py +221 -0
  110. eval_protocol/rewards/math.py +762 -0
  111. eval_protocol/rewards/multiple_choice_math_reward.py +232 -0
  112. eval_protocol/rewards/reasoning_steps.py +249 -0
  113. eval_protocol/rewards/repetition.py +342 -0
  114. eval_protocol/rewards/tag_count.py +162 -0
  115. eval_protocol/rl_processing.py +82 -0
  116. eval_protocol/server.py +271 -0
  117. eval_protocol/typed_interface.py +260 -0
  118. eval_protocol/utils/__init__.py +8 -0
  119. eval_protocol/utils/batch_evaluation.py +217 -0
  120. eval_protocol/utils/batch_transformation.py +205 -0
  121. eval_protocol/utils/dataset_helpers.py +112 -0
  122. eval_protocol/utils/module_loader.py +56 -0
  123. eval_protocol/utils/packaging_utils.py +108 -0
  124. eval_protocol/utils/static_policy.py +305 -0
  125. eval_protocol-0.0.3.dist-info/METADATA +635 -0
  126. eval_protocol-0.0.3.dist-info/RECORD +130 -0
  127. eval_protocol-0.0.3.dist-info/WHEEL +5 -0
  128. eval_protocol-0.0.3.dist-info/entry_points.txt +4 -0
  129. eval_protocol-0.0.3.dist-info/licenses/LICENSE +201 -0
  130. eval_protocol-0.0.3.dist-info/top_level.txt +2 -0
@@ -0,0 +1,702 @@
1
+ import asyncio
2
+ import logging
3
+ import shutil # Added for directory copying
4
+ import uuid
5
+ from pathlib import Path
6
+ from typing import Any, AsyncIterator, Dict, List, Optional, Set, Tuple
7
+
8
+ import docker
9
+ import docker.errors
10
+ import docker.models.containers
11
+ import httpx
12
+ import mcp.types as types
13
+ from anyio.abc import ObjectReceiveStream, ObjectSendStream
14
+
15
+ # ListToolsResult is not in mcp.client.session, likely in mcp.types or mcp.shared.message
16
+ from mcp.client.session import DEFAULT_CLIENT_INFO, ClientSession, SessionMessage
17
+
18
+ # Assuming ListToolsResult is in mcp.types, which is imported as types
19
+ # If not, this will need further correction. For now, we'll use types.ListToolsResult
20
+ from mcp.client.stdio import StdioServerParameters, stdio_client
21
+ from mcp.client.streamable_http import ( # Added for HTTP transport tool listing
22
+ streamablehttp_client,
23
+ )
24
+
25
+ from eval_protocol.mcp_agent.config import AppConfig, BackendServerConfig
26
+ from eval_protocol.mcp_agent.orchestration.base_client import (
27
+ AbstractOrchestrationClient,
28
+ ManagedInstanceInfo,
29
+ )
30
+
31
+ logger = logging.getLogger(__name__)
32
+ ENCODING = "utf-8"
33
+ DEFAULT_INSTANCE_DATA_BASE_PATH = Path("/tmp/rk_mcp_instance_data")
34
+
35
+
36
+ class LocalDockerOrchestrationClient(AbstractOrchestrationClient):
37
+ def __init__(self, app_config: AppConfig):
38
+ self.app_config = app_config
39
+ self.docker_client: Optional[docker.DockerClient] = None
40
+ self.http_client: Optional[httpx.AsyncClient] = None
41
+ self._used_host_ports: Set[int] = set()
42
+ self._temporary_images: Set[str] = set()
43
+ self.instance_data_base_path = DEFAULT_INSTANCE_DATA_BASE_PATH
44
+
45
+ self._stdio_instance_tasks: Dict[str, asyncio.Task] = {}
46
+ self._stdio_client_sessions: Dict[str, ClientSession] = {}
47
+ self._stdio_shutdown_events: Dict[str, asyncio.Event] = {}
48
+
49
+ async def startup(self) -> None:
50
+ self.instance_data_base_path.mkdir(parents=True, exist_ok=True)
51
+ logger.info(f"Instance data base path for host-copied templates: {self.instance_data_base_path.resolve()}")
52
+ try:
53
+ self.docker_client = docker.from_env()
54
+ if not self.docker_client.ping(): # type: ignore
55
+ raise ConnectionError("Failed to connect to Docker daemon using docker.from_env().")
56
+ logger.info("Successfully connected to Docker daemon.")
57
+ except docker.errors.DockerException as e:
58
+ logger.warning(f"docker.from_env() failed: {e}. Trying explicit base_url.")
59
+ try:
60
+ self.docker_client = docker.DockerClient(base_url="unix://var/run/docker.sock")
61
+ if not self.docker_client.ping(): # type: ignore
62
+ raise ConnectionError("Failed to connect to Docker daemon with explicit base_url.")
63
+ logger.info("Successfully connected to Docker daemon with explicit base_url.")
64
+ except docker.errors.DockerException as e_explicit:
65
+ raise ConnectionError(f"Docker client initialization failed: {e_explicit}") from e_explicit
66
+
67
+ api_defaults = (
68
+ self.app_config.global_remote_api_defaults
69
+ if isinstance(self.app_config.global_remote_api_defaults, dict)
70
+ else {}
71
+ )
72
+ self.http_client = httpx.AsyncClient(timeout=api_defaults.get("timeout", 30.0))
73
+ logger.info("LocalDockerOrchestrationClient started.")
74
+
75
+ async def _manage_stdio_instance_lifecycle(
76
+ self,
77
+ instance_uuid: str,
78
+ container_name: str,
79
+ server_params: StdioServerParameters,
80
+ initialization_complete_event: asyncio.Event,
81
+ shutdown_event: asyncio.Event,
82
+ ):
83
+ client_session_stdio: Optional[ClientSession] = None
84
+ try:
85
+ logger.info(f"[{container_name}] Lifecycle task started.")
86
+ async with stdio_client(server_params) as (read_stream, write_stream):
87
+ logger.info(f"[{container_name}] Stdio transport established via stdio_client.")
88
+
89
+ client_session_stdio = ClientSession(
90
+ read_stream=read_stream,
91
+ write_stream=write_stream,
92
+ client_info=DEFAULT_CLIENT_INFO,
93
+ )
94
+
95
+ async with client_session_stdio:
96
+ logger.info(f"[{container_name}] Attempting to initialize ClientSession...")
97
+ await asyncio.wait_for(client_session_stdio.initialize(), timeout=15.0)
98
+ logger.info(f"[{container_name}] ClientSession initialized successfully.")
99
+
100
+ try:
101
+ # Corrected type hint assuming ListToolsResult is in mcp.types
102
+ list_tools_response: types.ListToolsResult = await asyncio.wait_for(
103
+ client_session_stdio.list_tools(), timeout=5.0
104
+ )
105
+ if hasattr(list_tools_response, "tools") and list_tools_response.tools is not None:
106
+ reported_tools = [
107
+ tool.name for tool in list_tools_response.tools
108
+ ] # Assuming tool object has a .name
109
+ logger.info(f"[{container_name}] Backend server reported tools: {reported_tools}")
110
+ else:
111
+ logger.warning(
112
+ f"[{container_name}] Backend server list_tools response did not contain 'tools' attribute or it was None. Response: {list_tools_response}"
113
+ )
114
+ except AttributeError as e_attr:
115
+ logger.warning(
116
+ f"[{container_name}] AttributeError accessing tools from list_tools response: {e_attr}. Response: {getattr(list_tools_response, '__dict__', list_tools_response)}"
117
+ )
118
+ except Exception as e_list_tools:
119
+ logger.warning(
120
+ f"[{container_name}] Error calling/processing list_tools on backend server: {e_list_tools}"
121
+ )
122
+
123
+ self._stdio_client_sessions[instance_uuid] = client_session_stdio
124
+ initialization_complete_event.set()
125
+
126
+ await shutdown_event.wait()
127
+ logger.info(f"[{container_name}] Shutdown event received.")
128
+
129
+ logger.info(f"[{container_name}] stdio_client context exited cleanly.")
130
+
131
+ except asyncio.TimeoutError:
132
+ logger.error(f"[{container_name}] Timeout during ClientSession initialization.")
133
+ initialization_complete_event.set()
134
+ except Exception as e:
135
+ logger.error(
136
+ f"[{container_name}] Error in stdio instance lifecycle: {e}",
137
+ exc_info=True,
138
+ )
139
+ initialization_complete_event.set()
140
+ finally:
141
+ logger.debug(f"[{container_name}] In _manage_stdio_instance_lifecycle finally block.")
142
+ if client_session_stdio is None:
143
+ logger.info(f"[{container_name}] ClientSession was not created or assigned in lifecycle task.")
144
+
145
+ self._stdio_client_sessions.pop(instance_uuid, None)
146
+ self._stdio_shutdown_events.pop(instance_uuid, None) # Ensure event is removed
147
+ logger.info(f"[{container_name}] Lifecycle task finished.")
148
+
149
+ async def shutdown(self) -> None:
150
+ if self.http_client:
151
+ await self.http_client.aclose()
152
+
153
+ logger.info(
154
+ f"Shutting down LocalDockerOrchestrationClient. Cleaning up {len(self._stdio_instance_tasks)} stdio instance tasks."
155
+ )
156
+ for instance_uuid, event in list(self._stdio_shutdown_events.items()):
157
+ logger.info(f"Signaling shutdown for stdio instance task {instance_uuid}.")
158
+ event.set()
159
+
160
+ tasks_to_wait_for = list(self._stdio_instance_tasks.values())
161
+ if tasks_to_wait_for:
162
+ results = await asyncio.gather(*tasks_to_wait_for, return_exceptions=True)
163
+ for i, result in enumerate(results):
164
+ if isinstance(result, Exception):
165
+ task_name = (
166
+ tasks_to_wait_for[i].get_name() if hasattr(tasks_to_wait_for[i], "get_name") else f"Task-{i}"
167
+ )
168
+ logger.error(
169
+ f"Stdio lifecycle task {task_name} raised an exception during shutdown: {result}",
170
+ exc_info=result,
171
+ )
172
+ logger.info("All stdio instance tasks awaited.")
173
+ self._stdio_instance_tasks.clear()
174
+
175
+ if self.docker_client:
176
+ for image_tag in list(self._temporary_images):
177
+ try:
178
+ self.docker_client.images.remove(image=image_tag, force=False) # type: ignore
179
+ self._temporary_images.discard(image_tag)
180
+ except Exception as e:
181
+ logger.warning(f"Failed to remove temp image {image_tag}: {e}")
182
+ if hasattr(self.docker_client, "api") and hasattr(self.docker_client.api, "close"):
183
+ self.docker_client.api.close() # type: ignore
184
+ elif hasattr(self.docker_client, "close"):
185
+ self.docker_client.close() # type: ignore
186
+ logger.info("LocalDockerOrchestrationClient shut down.")
187
+
188
+ async def _perform_startup_check(self, url: str, check: Dict[str, Any]) -> bool:
189
+ # ... (content remains the same) ...
190
+ if not self.http_client:
191
+ return False
192
+ name, args = check.get("tool_name"), check.get("arguments", {})
193
+ if not name:
194
+ return True
195
+ for attempt in range(5):
196
+ try:
197
+ res = await self.http_client.post(url, json={"tool_name": name, "arguments": args})
198
+ res.raise_for_status()
199
+ return True
200
+ except Exception as e:
201
+ logger.warning(f"Startup check fail {attempt+1}/5: {e}")
202
+ if attempt < 4:
203
+ await asyncio.sleep(2)
204
+ return False
205
+
206
+ async def provision_instances(
207
+ self,
208
+ backend_config: BackendServerConfig,
209
+ num_instances: int,
210
+ session_id: str,
211
+ template_details: Optional[Any] = None, # template_details is generic, could be path for fs
212
+ ) -> List[ManagedInstanceInfo]:
213
+ if not self.docker_client:
214
+ raise RuntimeError("Docker client not initialized.")
215
+
216
+ image_to_run_from = backend_config.docker_image
217
+ committed_img_tag: Optional[str] = None
218
+ managed_instances: List[ManagedInstanceInfo] = []
219
+
220
+ # Determine if we are using host copy for filesystem template
221
+ use_host_copy_template = (
222
+ backend_config.backend_type == "filesystem"
223
+ and backend_config.template_data_path_host
224
+ and Path(backend_config.template_data_path_host).is_dir()
225
+ )
226
+
227
+ # Image templating via docker commit (original logic)
228
+ # This might be mutually exclusive with host_copy_template for filesystem, or could be combined if needed.
229
+ # For now, assume host_copy_template takes precedence for filesystem if specified.
230
+ if (
231
+ not use_host_copy_template
232
+ and backend_config.instance_scoping == "session"
233
+ and (template_details or backend_config.template_data_path_host)
234
+ and backend_config.container_template_data_path
235
+ ):
236
+
237
+ host_path_for_commit = template_details or backend_config.template_data_path_host
238
+ if not host_path_for_commit or not backend_config.container_template_data_path:
239
+ raise ValueError(
240
+ "template_data_path_host and container_template_data_path required for stateful session with image template."
241
+ )
242
+
243
+ temp_cont_name = f"rk-mcp-template-{session_id}-{backend_config.backend_name_ref}-{uuid.uuid4().hex[:4]}"
244
+ try:
245
+ logger.info(
246
+ f"Creating template container for commit: {temp_cont_name} from {backend_config.docker_image}"
247
+ )
248
+ temp_c = self.docker_client.containers.run( # type: ignore
249
+ image=backend_config.docker_image,
250
+ name=temp_cont_name,
251
+ volumes={
252
+ str(Path(host_path_for_commit).resolve()): {
253
+ "bind": backend_config.container_template_data_path,
254
+ "mode": "rw",
255
+ }
256
+ },
257
+ detach=True,
258
+ )
259
+ # Allow time for potential init scripts in container to modify state from template
260
+ # This duration might need to be configurable or based on a health check.
261
+ await asyncio.sleep(
262
+ self.app_config.global_docker_options.get("template_commit_delay_s", 5)
263
+ if self.app_config.global_docker_options
264
+ else 5
265
+ )
266
+
267
+ committed_img_tag = (
268
+ f"rk-mcp-templateimg-{session_id}-{backend_config.backend_name_ref}:{uuid.uuid4().hex[:6]}"
269
+ )
270
+ logger.info(f"Committing {temp_c.id} to {committed_img_tag}") # type: ignore
271
+ temp_c.commit(repository=committed_img_tag.split(":")[0], tag=committed_img_tag.split(":")[1]) # type: ignore
272
+ image_to_run_from = committed_img_tag
273
+ self._temporary_images.add(committed_img_tag)
274
+ finally:
275
+ if "temp_c" in locals() and temp_c:
276
+ try:
277
+ temp_c.stop(timeout=5)
278
+ temp_c.remove() # type: ignore
279
+ except Exception as e:
280
+ logger.warning(f"Could not cleanup template container for commit: {e}")
281
+
282
+ for i in range(num_instances):
283
+ instance_uuid = uuid.uuid4().hex[:8]
284
+ container_name = f"rk-mcp-inst-{session_id}-{backend_config.backend_name_ref}-{instance_uuid}"
285
+ mcp_endpoint_url: Optional[str] = None
286
+ host_port: Optional[int] = None
287
+ instance_internal_details: Dict[str, Any] = {
288
+ "container_name": container_name,
289
+ "instance_uuid": instance_uuid,
290
+ }
291
+ current_container_volumes = dict(backend_config.container_volumes or {}) # Start with configured volumes
292
+
293
+ try:
294
+ if use_host_copy_template and backend_config.template_data_path_host:
295
+ instance_host_data_path = (
296
+ self.instance_data_base_path / session_id / backend_config.backend_name_ref / instance_uuid
297
+ )
298
+ instance_host_data_path.mkdir(parents=True, exist_ok=True)
299
+
300
+ logger.info(
301
+ f"Copying template from {backend_config.template_data_path_host} to {instance_host_data_path} for instance {container_name}"
302
+ )
303
+ shutil.copytree(
304
+ backend_config.template_data_path_host,
305
+ instance_host_data_path,
306
+ dirs_exist_ok=True,
307
+ )
308
+
309
+ instance_internal_details["instance_host_data_path"] = str(instance_host_data_path.resolve())
310
+ # Override/set the volume for /data (assuming /data is the target for mcp/filesystem)
311
+ # The container_command for mcp/filesystem is often ["/data"], so it serves what's at /data.
312
+ container_data_path_target = (
313
+ "/data" # This should ideally come from config or be standard for "filesystem" type
314
+ )
315
+ current_container_volumes = {
316
+ str(instance_host_data_path.resolve()): {
317
+ "bind": container_data_path_target,
318
+ "mode": "rw",
319
+ }
320
+ }
321
+ logger.info(f"Using dynamic volume for {container_name}: {current_container_volumes}")
322
+
323
+ logger.info(
324
+ f"Provisioning instance {container_name} (transport: {backend_config.mcp_transport}) from image {image_to_run_from}"
325
+ )
326
+ if backend_config.mcp_transport == "http":
327
+ # ... (HTTP provisioning logic, ensure it uses current_container_volumes) ...
328
+ if not self.docker_client:
329
+ raise RuntimeError("Docker client not initialized for HTTP provisioning.")
330
+ if not backend_config.container_port:
331
+ raise ValueError("container_port required for http.")
332
+ port_bindings = {f"{backend_config.container_port}/tcp": 0}
333
+ run_kwargs: Dict[str, Any] = {
334
+ "image": image_to_run_from,
335
+ "name": container_name,
336
+ "detach": True,
337
+ "command": backend_config.container_command,
338
+ "volumes": current_container_volumes, # Use potentially modified volumes
339
+ "labels": {
340
+ "rewardkit-mcp-session-id": session_id,
341
+ "rewardkit-mcp-backend-name": backend_config.backend_name_ref,
342
+ "rewardkit-mcp-instance-id": instance_uuid,
343
+ "rewardkit-mcp-managed": "true",
344
+ },
345
+ "ports": port_bindings,
346
+ **(self.app_config.global_docker_options or {}),
347
+ }
348
+ container = self.docker_client.containers.run(**run_kwargs)
349
+ container.reload()
350
+ bindings = (
351
+ container.attrs.get("NetworkSettings", {})
352
+ .get("Ports", {})
353
+ .get(f"{backend_config.container_port}/tcp")
354
+ )
355
+ if not (bindings and bindings[0].get("HostPort")):
356
+ logs = "N/A"
357
+ try:
358
+ logs = container.logs(stdout=True, stderr=True).decode(ENCODING, "replace")
359
+ except Exception:
360
+ pass
361
+ logger.error(f"Failed to get host port for {container_name}. Logs:\n{logs}")
362
+ try:
363
+ container.stop(timeout=5)
364
+ container.remove()
365
+ except Exception:
366
+ pass
367
+ raise RuntimeError(f"Failed to get host port for {container_name}")
368
+ host_port = int(bindings[0]["HostPort"])
369
+ self._used_host_ports.add(host_port)
370
+ mcp_endpoint_url = f"http://localhost:{host_port}/mcp" # Assuming /mcp path
371
+ instance_internal_details["container_id"] = container.id # Store container_id earlier
372
+ if backend_config.startup_check_mcp_tool and not await self._perform_startup_check(
373
+ mcp_endpoint_url, backend_config.startup_check_mcp_tool
374
+ ):
375
+ logs = "N/A"
376
+ try:
377
+ logs = container.logs(stdout=True, stderr=True).decode(ENCODING, "replace")
378
+ except Exception:
379
+ pass
380
+ logger.error(f"HTTP Startup check failed for {container_name}. Logs:\n{logs}")
381
+ try:
382
+ container.stop(timeout=5)
383
+ container.remove()
384
+ except Exception:
385
+ pass
386
+ self._used_host_ports.discard(host_port)
387
+ raise RuntimeError(f"Startup check failed for {container_name}")
388
+ logger.info(f"HTTP Instance {container_name} (ID: {container.id}) on port {host_port}")
389
+ instance_internal_details.update({"host_port": host_port})
390
+
391
+ elif backend_config.mcp_transport == "stdio":
392
+ docker_run_args = ["run", "--rm", "-i", "--name", container_name]
393
+ # Use current_container_volumes which might have been dynamically set by host-copy template logic
394
+ if current_container_volumes:
395
+ for h_path, c_path_dict in current_container_volumes.items():
396
+ bind_path, mode = c_path_dict.get("bind"), c_path_dict.get("mode", "rw")
397
+ if bind_path:
398
+ docker_run_args.extend(
399
+ ["-v", f"{h_path}:{bind_path}:{mode}"]
400
+ ) # h_path is already resolved if from instance_host_data_path
401
+
402
+ docker_run_args.append(image_to_run_from)
403
+ if backend_config.container_command: # This is the command for the MCP server inside docker
404
+ docker_run_args.extend(backend_config.container_command)
405
+
406
+ # The StdioServerParameters command should be "docker" and args are the docker run command
407
+ # The backend_config.mcp_server_stdio_command is for *inside* the container if we were to exec.
408
+ # Here, we are running the container itself as the stdio server process.
409
+ server_params = StdioServerParameters(command="docker", args=docker_run_args, env=dict(os.environ)) # type: ignore
410
+ logger.info(
411
+ f"Preparing to launch stdio container {container_name} via dedicated task with command: docker {' '.join(docker_run_args)}"
412
+ )
413
+
414
+ initialization_complete_event = asyncio.Event()
415
+ shutdown_event = asyncio.Event()
416
+ self._stdio_shutdown_events[instance_uuid] = shutdown_event
417
+
418
+ lifecycle_task = asyncio.create_task(
419
+ self._manage_stdio_instance_lifecycle(
420
+ instance_uuid,
421
+ container_name,
422
+ server_params,
423
+ initialization_complete_event,
424
+ shutdown_event,
425
+ )
426
+ )
427
+ self._stdio_instance_tasks[instance_uuid] = lifecycle_task
428
+
429
+ logger.info(f"Waiting for stdio instance {container_name} (task) to complete initialization...")
430
+ await asyncio.wait_for(initialization_complete_event.wait(), timeout=30.0)
431
+
432
+ client_session_stdio = self._stdio_client_sessions.get(instance_uuid)
433
+ if not client_session_stdio:
434
+ if lifecycle_task.done() and lifecycle_task.exception():
435
+ raise RuntimeError(
436
+ f"Stdio instance task for {container_name} failed during initialization."
437
+ ) from lifecycle_task.exception()
438
+ raise RuntimeError(f"ClientSession not established by lifecycle task for {container_name}.")
439
+
440
+ logger.info(
441
+ f"Stdio instance {container_name} (task) initialization complete. ClientSession ready."
442
+ )
443
+ instance_internal_details["container_name"] = container_name # Already set, but ensure it's there
444
+
445
+ if backend_config.startup_check_mcp_tool:
446
+ logger.info(f"Performing startup check for stdio instance {container_name}...")
447
+ startup_tool_name = backend_config.startup_check_mcp_tool.get("tool_name", "ping")
448
+ startup_tool_args = backend_config.startup_check_mcp_tool.get("arguments", {})
449
+ # The session is already active in the lifecycle task, do not re-enter context manager
450
+ await asyncio.wait_for(
451
+ client_session_stdio.call_tool(startup_tool_name, startup_tool_args),
452
+ timeout=10.0,
453
+ )
454
+ logger.info(f"Stdio startup check for {container_name} successful.")
455
+ else:
456
+ raise ValueError(f"Unsupported mcp_transport: {backend_config.mcp_transport}")
457
+
458
+ managed_instances.append(
459
+ ManagedInstanceInfo(
460
+ instance_id=instance_uuid,
461
+ backend_name_ref=backend_config.backend_name_ref,
462
+ orchestration_mode="local_docker",
463
+ mcp_transport=backend_config.mcp_transport,
464
+ mcp_endpoint_url=mcp_endpoint_url,
465
+ internal_instance_details=instance_internal_details,
466
+ committed_image_tag=committed_img_tag,
467
+ )
468
+ )
469
+ except Exception as e:
470
+ logger.error(f"Failed to provision instance {container_name}: {e}", exc_info=True)
471
+ if backend_config.mcp_transport == "stdio":
472
+ if instance_uuid in self._stdio_shutdown_events:
473
+ self._stdio_shutdown_events[instance_uuid].set()
474
+ task_to_clean = self._stdio_instance_tasks.pop(instance_uuid, None)
475
+ if task_to_clean and not task_to_clean.done():
476
+ try:
477
+ await asyncio.wait_for(task_to_clean, timeout=5.0)
478
+ except asyncio.TimeoutError:
479
+ logger.warning(
480
+ f"Timeout waiting for stdio task {instance_uuid} to clean up after provisioning error."
481
+ )
482
+ task_to_clean.cancel()
483
+ except Exception as task_e:
484
+ logger.error(f"Exception during stdio task cleanup for {instance_uuid}: {task_e}")
485
+ # Cleanup copied host directory if provisioning failed mid-way
486
+ if "instance_host_data_path" in instance_internal_details:
487
+ shutil.rmtree(
488
+ instance_internal_details["instance_host_data_path"],
489
+ ignore_errors=True,
490
+ )
491
+ logger.info(
492
+ f"Cleaned up instance data directory {instance_internal_details['instance_host_data_path']} due to provisioning error."
493
+ )
494
+ raise
495
+ return managed_instances
496
+
497
+ async def deprovision_instances(self, instances: List[ManagedInstanceInfo]) -> None:
498
+ if not self.docker_client:
499
+ logger.warning("Docker client not init for deprovision.")
500
+
501
+ for instance in instances:
502
+ if instance.orchestration_mode != "local_docker":
503
+ continue
504
+
505
+ details = instance.internal_instance_details
506
+ instance_uuid = details.get("instance_uuid", instance.instance_id)
507
+
508
+ if instance.mcp_transport == "http":
509
+ container_id = details.get("container_id")
510
+ if not container_id or not self.docker_client:
511
+ continue
512
+ try:
513
+ container = self.docker_client.containers.get(container_id)
514
+ container.stop(timeout=10)
515
+ container.remove()
516
+ logger.info(f"HTTP Container {container_id} deprovisioned.")
517
+ if details.get("host_port"):
518
+ self._used_host_ports.discard(details["host_port"])
519
+ except Exception as e:
520
+ logger.error(f"Error deprovisioning HTTP container {container_id}: {e}")
521
+
522
+ elif instance.mcp_transport == "stdio":
523
+ logger.info(f"Deprovisioning stdio instance {instance_uuid} ({details.get('container_name')})...")
524
+
525
+ shutdown_event = self._stdio_shutdown_events.pop(instance_uuid, None)
526
+ if shutdown_event:
527
+ logger.info(f"Signaling shutdown for stdio instance task {instance_uuid}.")
528
+ shutdown_event.set()
529
+ else:
530
+ logger.warning(f"No shutdown event found for stdio instance {instance_uuid}.")
531
+
532
+ task = self._stdio_instance_tasks.pop(instance_uuid, None)
533
+ if task:
534
+ logger.info(f"Waiting for stdio instance task {instance_uuid} to complete...")
535
+ try:
536
+ await asyncio.wait_for(task, timeout=10.0)
537
+ logger.info(f"Stdio instance task {instance_uuid} completed.")
538
+ except asyncio.TimeoutError:
539
+ logger.error(
540
+ f"Timeout waiting for stdio instance task {instance_uuid} to complete. Cancelling."
541
+ )
542
+ task.cancel()
543
+ try:
544
+ await task
545
+ except asyncio.CancelledError:
546
+ logger.info(f"Stdio instance task {instance_uuid} cancelled.")
547
+ except Exception as e_task_cancel:
548
+ logger.error(
549
+ f"Exception during cancellation of stdio task {instance_uuid}: {e_task_cancel}"
550
+ )
551
+ except Exception as e_task_wait:
552
+ logger.error(
553
+ f"Exception waiting for stdio instance task {instance_uuid}: {e_task_wait}",
554
+ exc_info=True,
555
+ )
556
+ else:
557
+ logger.warning(f"No lifecycle task found for stdio instance {instance_uuid} during deprovision.")
558
+
559
+ if instance_uuid in self._stdio_client_sessions:
560
+ logger.warning(
561
+ f"ClientSession for {instance_uuid} still in _stdio_client_sessions after task handling. Popping."
562
+ )
563
+ self._stdio_client_sessions.pop(instance_uuid, None)
564
+ logger.info(f"Stdio instance {instance_uuid} deprovisioning process complete.")
565
+
566
+ # Cleanup copied host directory if it exists
567
+ instance_host_data_path_str = details.get("instance_host_data_path")
568
+ if instance_host_data_path_str:
569
+ logger.info(f"Cleaning up instance data directory: {instance_host_data_path_str}")
570
+ shutil.rmtree(instance_host_data_path_str, ignore_errors=True)
571
+
572
+ async def call_tool_on_instance(
573
+ self, instance: ManagedInstanceInfo, tool_name: str, tool_args: Dict[str, Any]
574
+ ) -> Dict[str, Any]:
575
+ if instance.orchestration_mode != "local_docker":
576
+ raise ValueError("Only handles local_docker instances.")
577
+
578
+ if instance.mcp_transport == "http":
579
+ if not self.http_client:
580
+ raise RuntimeError("HTTP client not initialized.")
581
+ if not instance.mcp_endpoint_url:
582
+ raise ValueError(f"mcp_endpoint_url required for HTTP {instance.instance_id}")
583
+ payload = {"tool_name": tool_name, "arguments": tool_args}
584
+ try:
585
+ res = await self.http_client.post(instance.mcp_endpoint_url, json=payload)
586
+ res.raise_for_status()
587
+ return res.json()
588
+ except Exception as e:
589
+ raise RuntimeError(f"MCP HTTP call failed: {e}") from e
590
+
591
+ elif instance.mcp_transport == "stdio":
592
+ instance_uuid = instance.internal_instance_details.get("instance_uuid", instance.instance_id)
593
+ cs = self._stdio_client_sessions.get(instance_uuid)
594
+
595
+ if not cs or not isinstance(cs, ClientSession):
596
+ raise RuntimeError(f"Valid ClientSession not found for stdio instance {instance_uuid}.")
597
+
598
+ try:
599
+ logger.debug(
600
+ f"Calling tool {tool_name} via stdio ClientSession for {instance_uuid} (session already active in lifecycle task)"
601
+ )
602
+ tool_result = await cs.call_tool(tool_name, tool_args)
603
+
604
+ if hasattr(tool_result, "model_dump"):
605
+ dumped = tool_result.model_dump(exclude_none=True)
606
+ if isinstance(dumped, dict):
607
+ return dumped
608
+ return {
609
+ "error": "Tool result model_dump was not a dict or not a Pydantic model",
610
+ "details": str(dumped),
611
+ }
612
+ elif isinstance(tool_result, dict):
613
+ return tool_result
614
+ else:
615
+ return {
616
+ "error": "Tool result unexpected format",
617
+ "details": str(tool_result),
618
+ }
619
+
620
+ except Exception as e:
621
+ logger.error(
622
+ f"MCP stdio tool call for {tool_name} on instance {instance_uuid} failed: {e}",
623
+ exc_info=True,
624
+ )
625
+ raise RuntimeError(f"MCP stdio tool call for {tool_name} failed: {e}") from e
626
+ else:
627
+ raise ValueError(f"Unsupported mcp_transport: {instance.mcp_transport}")
628
+
629
+ async def list_tools_on_instance(self, instance: ManagedInstanceInfo) -> types.ListToolsResult:
630
+ if instance.orchestration_mode != "local_docker":
631
+ raise ValueError("LocalDockerOrchestrationClient can only list tools for 'local_docker' instances.")
632
+
633
+ logger.info(
634
+ f"Listing tools for local Docker instance {instance.instance_id} ({instance.backend_name_ref}) using {instance.mcp_transport} transport."
635
+ )
636
+
637
+ if instance.mcp_transport == "http":
638
+ if not instance.mcp_endpoint_url:
639
+ raise ValueError(
640
+ f"Instance {instance.instance_id} ({instance.backend_name_ref}) is HTTP but mcp_endpoint_url is missing."
641
+ )
642
+ target_base_url = instance.mcp_endpoint_url.rstrip("/")
643
+ try:
644
+ async with streamablehttp_client(base_url=target_base_url) as (
645
+ read_s,
646
+ write_s,
647
+ _, # get_session_id_func usually not needed for a single call
648
+ ):
649
+ # Create a ClientSession with these streams
650
+ mcp_session_for_list_tools = ClientSession(
651
+ read_stream=read_s,
652
+ write_stream=write_s,
653
+ client_info=DEFAULT_CLIENT_INFO, # Added default client info
654
+ )
655
+ # Initialize the session (MCP handshake)
656
+ await mcp_session_for_list_tools.initialize()
657
+ list_tools_result = await mcp_session_for_list_tools.list_tools()
658
+ # ClientSession does not need to be explicitly closed here if not used further,
659
+ # as the underlying streams from streamablehttp_client will be closed by its context manager.
660
+ logger.info(
661
+ f"Successfully listed {len(list_tools_result.tools)} tools from {target_base_url} for HTTP instance {instance.instance_id}"
662
+ )
663
+ return list_tools_result
664
+ except Exception as e:
665
+ logger.error(
666
+ f"Error listing tools from HTTP instance {instance.instance_id} at {target_base_url}: {e}",
667
+ exc_info=True,
668
+ )
669
+ raise RuntimeError(f"Failed to list tools from HTTP Docker instance {instance.instance_id}") from e
670
+
671
+ elif instance.mcp_transport == "stdio":
672
+ instance_uuid = instance.internal_instance_details.get("instance_uuid", instance.instance_id)
673
+ cs = self._stdio_client_sessions.get(instance_uuid)
674
+
675
+ if not cs or not isinstance(cs, ClientSession):
676
+ # This could happen if the instance is still initializing or failed to initialize.
677
+ # For simplicity, we raise. A more robust solution might wait or check task status.
678
+ logger.error(
679
+ f"ClientSession not found or invalid for stdio instance {instance_uuid}. It might be initializing or failed."
680
+ )
681
+ raise RuntimeError(
682
+ f"Valid ClientSession not found for stdio instance {instance_uuid}. Cannot list tools."
683
+ )
684
+
685
+ try:
686
+ logger.debug(f"Listing tools via existing stdio ClientSession for {instance_uuid}")
687
+ list_tools_result = await cs.list_tools()
688
+ logger.info(
689
+ f"Successfully listed {len(list_tools_result.tools)} tools for stdio instance {instance_uuid}"
690
+ )
691
+ return list_tools_result
692
+ except Exception as e:
693
+ logger.error(
694
+ f"Error listing tools from stdio instance {instance_uuid}: {e}",
695
+ exc_info=True,
696
+ )
697
+ raise RuntimeError(f"Failed to list tools from stdio Docker instance {instance_uuid}") from e
698
+ else:
699
+ raise ValueError(f"Unsupported mcp_transport for local_docker: {instance.mcp_transport}")
700
+
701
+
702
+ import os