decodingtrust-agent-sdk 0.1.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (374) hide show
  1. agent/__init__.py +30 -0
  2. agent/claudesdk/__init__.py +8 -0
  3. agent/claudesdk/example.py +221 -0
  4. agent/claudesdk/src/__init__.py +8 -0
  5. agent/claudesdk/src/agent.py +400 -0
  6. agent/claudesdk/src/mcp_proxy.py +409 -0
  7. agent/claudesdk/src/utils.py +420 -0
  8. agent/googleadk/__init__.py +15 -0
  9. agent/googleadk/example.py +237 -0
  10. agent/googleadk/src/__init__.py +12 -0
  11. agent/googleadk/src/agent.py +401 -0
  12. agent/googleadk/src/mcp_wrapper.py +163 -0
  13. agent/googleadk/src/utils.py +602 -0
  14. agent/langchain/__init__.py +8 -0
  15. agent/langchain/example.py +213 -0
  16. agent/langchain/src/__init__.py +8 -0
  17. agent/langchain/src/agent.py +645 -0
  18. agent/langchain/src/utils.py +433 -0
  19. agent/openaisdk/__init__.py +17 -0
  20. agent/openaisdk/example.py +228 -0
  21. agent/openaisdk/src/__init__.py +12 -0
  22. agent/openaisdk/src/agent.py +491 -0
  23. agent/openaisdk/src/agent_wrapper.py +143 -0
  24. agent/openaisdk/src/mcp_wrapper.py +395 -0
  25. agent/openaisdk/src/utils.py +493 -0
  26. agent/openclaw/__init__.py +10 -0
  27. agent/openclaw/example.py +251 -0
  28. agent/openclaw/src/__init__.py +14 -0
  29. agent/openclaw/src/agent.py +930 -0
  30. agent/openclaw/src/helpers/__init__.py +1 -0
  31. agent/openclaw/src/helpers/auth_helpers.py +55 -0
  32. agent/openclaw/src/mcp_proxy.py +564 -0
  33. agent/openclaw/src/plugin_generator.py +231 -0
  34. agent/openclaw/src/utils.py +341 -0
  35. agent/pocketflow/__init__.py +18 -0
  36. agent/pocketflow/example.py +221 -0
  37. agent/pocketflow/prompts/react_agent.py +46 -0
  38. agent/pocketflow/src/__init__.py +6 -0
  39. agent/pocketflow/src/agent.py +507 -0
  40. agent/pocketflow/src/agent_wrapper.py +159 -0
  41. agent/pocketflow/src/async_helper.py +92 -0
  42. agent/pocketflow/src/mcp_react_agent.py +279 -0
  43. agent/pocketflow/src/native_agent.py +74 -0
  44. agent/pocketflow/src/nodes.py +467 -0
  45. benchmark/__init__.py +0 -0
  46. benchmark/browser/benign.jsonl +34 -0
  47. benchmark/browser/direct.jsonl +85 -0
  48. benchmark/browser/indirect.jsonl +82 -0
  49. benchmark/code/benign.jsonl +0 -0
  50. benchmark/code/direct.jsonl +121 -0
  51. benchmark/code/indirect.jsonl +165 -0
  52. benchmark/crm/benign.jsonl +165 -0
  53. benchmark/crm/direct.jsonl +90 -0
  54. benchmark/crm/indirect.jsonl +150 -0
  55. benchmark/customer-service/benign.jsonl +160 -0
  56. benchmark/customer-service/direct.jsonl +100 -0
  57. benchmark/customer-service/indirect.jsonl +101 -0
  58. benchmark/finance/benign.jsonl +0 -0
  59. benchmark/finance/direct.jsonl +200 -0
  60. benchmark/finance/indirect.jsonl +200 -0
  61. benchmark/legal/benign.jsonl +0 -0
  62. benchmark/legal/direct.jsonl +200 -0
  63. benchmark/legal/indirect.jsonl +200 -0
  64. benchmark/macos/benign.jsonl +30 -0
  65. benchmark/macos/direct.jsonl +50 -0
  66. benchmark/macos/indirect.jsonl +50 -0
  67. benchmark/medical/benign.jsonl +642 -0
  68. benchmark/medical/direct.jsonl +229 -0
  69. benchmark/medical/indirect.jsonl +222 -0
  70. benchmark/os-filesystem/benign.jsonl +200 -0
  71. benchmark/os-filesystem/direct.jsonl +200 -0
  72. benchmark/os-filesystem/indirect.jsonl +200 -0
  73. benchmark/research/benign.jsonl +0 -0
  74. benchmark/research/direct.jsonl +119 -0
  75. benchmark/research/indirect.jsonl +125 -0
  76. benchmark/telecom/benign.jsonl +120 -0
  77. benchmark/telecom/direct.jsonl +161 -0
  78. benchmark/telecom/indirect.jsonl +166 -0
  79. benchmark/travel/benign.jsonl +130 -0
  80. benchmark/travel/direct.jsonl +105 -0
  81. benchmark/travel/indirect.jsonl +120 -0
  82. benchmark/windows/benign.jsonl +100 -0
  83. benchmark/windows/direct.jsonl +140 -0
  84. benchmark/windows/indirect.jsonl +107 -0
  85. benchmark/workflow/benign.jsonl +335 -0
  86. benchmark/workflow/direct.jsonl +78 -0
  87. benchmark/workflow/indirect.jsonl +107 -0
  88. cli/__init__.py +5 -0
  89. cli/main.py +182 -0
  90. cli/scaffold.py +334 -0
  91. decodingtrust_agent_sdk-0.1.0.dist-info/METADATA +642 -0
  92. decodingtrust_agent_sdk-0.1.0.dist-info/RECORD +374 -0
  93. decodingtrust_agent_sdk-0.1.0.dist-info/WHEEL +5 -0
  94. decodingtrust_agent_sdk-0.1.0.dist-info/entry_points.txt +2 -0
  95. decodingtrust_agent_sdk-0.1.0.dist-info/licenses/LICENSE +201 -0
  96. decodingtrust_agent_sdk-0.1.0.dist-info/top_level.txt +6 -0
  97. dt_arena/config/env.yaml +515 -0
  98. dt_arena/config/injection_mcp.yaml +430 -0
  99. dt_arena/config/mcp.yaml +642 -0
  100. dt_arena/envs/arxiv/docker-compose-hub.yml +31 -0
  101. dt_arena/envs/arxiv/docker-compose.yml +36 -0
  102. dt_arena/envs/atlassian/docker/docker-compose.dev.yml +65 -0
  103. dt_arena/envs/atlassian/docker/docker-compose.yml +53 -0
  104. dt_arena/envs/atlassian/docker-compose-hub.yml +57 -0
  105. dt_arena/envs/atlassian/docker-compose.yml +72 -0
  106. dt_arena/envs/bigquery/docker-compose.yml +20 -0
  107. dt_arena/envs/booking/docker-compose.yml +59 -0
  108. dt_arena/envs/calendar/docker-compose-hub.yml +30 -0
  109. dt_arena/envs/calendar/docker-compose.yml +42 -0
  110. dt_arena/envs/custom-website/docker-compose.yml +6 -0
  111. dt_arena/envs/customer_service/docker-compose.yml +59 -0
  112. dt_arena/envs/databricks/docker-compose-hub.yml +47 -0
  113. dt_arena/envs/databricks/docker-compose.yml +51 -0
  114. dt_arena/envs/ecommerce/docker-compose.yml +6 -0
  115. dt_arena/envs/ers/docker-compose.yml +36 -0
  116. dt_arena/envs/ers/hrms/docker/docker-compose.yml +31 -0
  117. dt_arena/envs/finance/docker-compose.yml +23 -0
  118. dt_arena/envs/github/docker/docker-compose-hub.yml +50 -0
  119. dt_arena/envs/github/docker/docker-compose.yml +50 -0
  120. dt_arena/envs/gmail/docker-compose-hub.yml +51 -0
  121. dt_arena/envs/gmail/docker-compose.yml +65 -0
  122. dt_arena/envs/google-form/docker-compose-hub.yml +33 -0
  123. dt_arena/envs/google-form/docker-compose.yml +41 -0
  124. dt_arena/envs/googledocs/docker-compose-hub.yml +61 -0
  125. dt_arena/envs/googledocs/docker-compose.yml +78 -0
  126. dt_arena/envs/hospital/docker-compose-hub.yml +25 -0
  127. dt_arena/envs/hospital/docker-compose.yml +27 -0
  128. dt_arena/envs/legal/docker-compose.yml +22 -0
  129. dt_arena/envs/linkedin/docker-compose.yml +63 -0
  130. dt_arena/envs/macos/docker-compose.yml +79 -0
  131. dt_arena/envs/os-filesystem/docker-compose-hub.yml +16 -0
  132. dt_arena/envs/os-filesystem/docker-compose.yml +20 -0
  133. dt_arena/envs/paypal/docker-compose-hub.yml +48 -0
  134. dt_arena/envs/paypal/docker-compose.yml +63 -0
  135. dt_arena/envs/research/docker-compose-hub.yml +13 -0
  136. dt_arena/envs/research/docker-compose.yml +24 -0
  137. dt_arena/envs/salesforce_crm/docker-compose-hub.yaml +45 -0
  138. dt_arena/envs/salesforce_crm/docker-compose.yaml +49 -0
  139. dt_arena/envs/slack/docker-compose-hub.yml +28 -0
  140. dt_arena/envs/slack/docker-compose.yml +41 -0
  141. dt_arena/envs/snowflake/docker-compose-hub.yml +41 -0
  142. dt_arena/envs/snowflake/docker-compose.yml +44 -0
  143. dt_arena/envs/telecom/docker-compose-hub.yml +16 -0
  144. dt_arena/envs/telecom/docker-compose.yml +17 -0
  145. dt_arena/envs/telegram/docker-compose-hub.yml +57 -0
  146. dt_arena/envs/telegram/docker-compose.yml +62 -0
  147. dt_arena/envs/terminal/docker-compose-hub.yml +12 -0
  148. dt_arena/envs/terminal/docker-compose.yml +26 -0
  149. dt_arena/envs/travel/docker-compose-hub.yml +19 -0
  150. dt_arena/envs/travel/docker-compose.yml +19 -0
  151. dt_arena/envs/whatsapp/docker-compose-hub.yml +61 -0
  152. dt_arena/envs/whatsapp/docker-compose.yml +78 -0
  153. dt_arena/envs/windows/docker-compose.yml +71 -0
  154. dt_arena/envs/zoom/docker-compose-hub.yml +27 -0
  155. dt_arena/envs/zoom/docker-compose.yml +40 -0
  156. dt_arena/injection_mcp_server/atlassian/env_injection.py +134 -0
  157. dt_arena/injection_mcp_server/calendar/env_injection.py +217 -0
  158. dt_arena/injection_mcp_server/custom_website/env_injection.py +97 -0
  159. dt_arena/injection_mcp_server/customer_service/env_injection.py +659 -0
  160. dt_arena/injection_mcp_server/databricks/env_injection.py +255 -0
  161. dt_arena/injection_mcp_server/ecommerce/env_injection.py +110 -0
  162. dt_arena/injection_mcp_server/finance/env_injection.py +85 -0
  163. dt_arena/injection_mcp_server/github/env_injection.py +206 -0
  164. dt_arena/injection_mcp_server/gmail/env_injection.py +211 -0
  165. dt_arena/injection_mcp_server/google_form/env_injection.py +186 -0
  166. dt_arena/injection_mcp_server/googledocs/env_injection.py +44 -0
  167. dt_arena/injection_mcp_server/hospital/env_injection.py +43 -0
  168. dt_arena/injection_mcp_server/legal/env_injection.py +229 -0
  169. dt_arena/injection_mcp_server/macos/env_injection.py +272 -0
  170. dt_arena/injection_mcp_server/os-filesystem/env_injection.py +341 -0
  171. dt_arena/injection_mcp_server/paypal/env_injection.py +268 -0
  172. dt_arena/injection_mcp_server/research/env_injection.py +616 -0
  173. dt_arena/injection_mcp_server/salesforce/env_injection.py +514 -0
  174. dt_arena/injection_mcp_server/slack/env_injection.py +265 -0
  175. dt_arena/injection_mcp_server/snowflake/env_injection.py +230 -0
  176. dt_arena/injection_mcp_server/telecom/env_injection.py +503 -0
  177. dt_arena/injection_mcp_server/telegram/env_injection.py +171 -0
  178. dt_arena/injection_mcp_server/terminal/env_injection.py +523 -0
  179. dt_arena/injection_mcp_server/travel/env_injection.py +173 -0
  180. dt_arena/injection_mcp_server/whatsapp/env_injection.py +185 -0
  181. dt_arena/injection_mcp_server/windows/env_injection.py +943 -0
  182. dt_arena/injection_mcp_server/zoom/env_injection.py +216 -0
  183. dt_arena/mcp_server/atlassian/main.py +1554 -0
  184. dt_arena/mcp_server/atlassian/test_server.py +66 -0
  185. dt_arena/mcp_server/bigquery/main.py +333 -0
  186. dt_arena/mcp_server/booking/main.py +310 -0
  187. dt_arena/mcp_server/browser/main.py +1741 -0
  188. dt_arena/mcp_server/calendar/example_multi_user.py +162 -0
  189. dt_arena/mcp_server/calendar/main.py +792 -0
  190. dt_arena/mcp_server/calendar/test_mcp.py +135 -0
  191. dt_arena/mcp_server/customer_service/main.py +1063 -0
  192. dt_arena/mcp_server/databricks/main.py +566 -0
  193. dt_arena/mcp_server/databricks/probe.py +102 -0
  194. dt_arena/mcp_server/ers/main.py +845 -0
  195. dt_arena/mcp_server/finance/__init__.py +87 -0
  196. dt_arena/mcp_server/finance/core/__init__.py +12 -0
  197. dt_arena/mcp_server/finance/core/data_loader.py +558 -0
  198. dt_arena/mcp_server/finance/core/portfolio.py +565 -0
  199. dt_arena/mcp_server/finance/evaluation/__init__.py +20 -0
  200. dt_arena/mcp_server/finance/evaluation/evaluator.py +217 -0
  201. dt_arena/mcp_server/finance/evaluation/logger.py +137 -0
  202. dt_arena/mcp_server/finance/injection/__init__.py +66 -0
  203. dt_arena/mcp_server/finance/injection/config.py +176 -0
  204. dt_arena/mcp_server/finance/injection/content.py +755 -0
  205. dt_arena/mcp_server/finance/injection/html.py +409 -0
  206. dt_arena/mcp_server/finance/injection/locations.py +167 -0
  207. dt_arena/mcp_server/finance/injection/methods.py +193 -0
  208. dt_arena/mcp_server/finance/injection/presets.py +1023 -0
  209. dt_arena/mcp_server/finance/main.py +361 -0
  210. dt_arena/mcp_server/finance/run_mcp.py +21 -0
  211. dt_arena/mcp_server/finance/run_web.py +26 -0
  212. dt_arena/mcp_server/finance/server/__init__.py +41 -0
  213. dt_arena/mcp_server/finance/server/extractor.py +1453 -0
  214. dt_arena/mcp_server/finance/server/extractor_minimal.py +292 -0
  215. dt_arena/mcp_server/finance/server/extractor_simple.py +1164 -0
  216. dt_arena/mcp_server/finance/server/injection_mcp.py +865 -0
  217. dt_arena/mcp_server/finance/server/mcp.py +451 -0
  218. dt_arena/mcp_server/finance/server/tools/__init__.py +23 -0
  219. dt_arena/mcp_server/finance/server/tools/account.py +88 -0
  220. dt_arena/mcp_server/finance/server/tools/browsing.py +328 -0
  221. dt_arena/mcp_server/finance/server/tools/social.py +73 -0
  222. dt_arena/mcp_server/finance/server/tools/trading.py +242 -0
  223. dt_arena/mcp_server/finance/server/tools/utility.py +49 -0
  224. dt_arena/mcp_server/finance/server/web.py +2139 -0
  225. dt_arena/mcp_server/finance/tasks/benchmark/__init__.py +28 -0
  226. dt_arena/mcp_server/finance/tasks/benchmark/attack_pool.py +3026 -0
  227. dt_arena/mcp_server/finance/tasks/benchmark/attack_runner.py +1315 -0
  228. dt_arena/mcp_server/finance/tasks/benchmark/finra_requirements.py +1335 -0
  229. dt_arena/mcp_server/finance/tasks/benchmark/finra_tasks.py +3665 -0
  230. dt_arena/mcp_server/finance/tasks/benchmark/malicious_tasks.py +2673 -0
  231. dt_arena/mcp_server/finance/tasks/redteam_suite/run_redteam_suite.py +1713 -0
  232. dt_arena/mcp_server/finance/test_mcp_tools.py +476 -0
  233. dt_arena/mcp_server/github/main.py +441 -0
  234. dt_arena/mcp_server/gmail/main.py +1004 -0
  235. dt_arena/mcp_server/google_form/main.py +141 -0
  236. dt_arena/mcp_server/googledocs/main.py +458 -0
  237. dt_arena/mcp_server/hospital/mcp_server.py +458 -0
  238. dt_arena/mcp_server/legal/__init__.py +9 -0
  239. dt_arena/mcp_server/legal/core/__init__.py +14 -0
  240. dt_arena/mcp_server/legal/core/courtlistener_store.py +762 -0
  241. dt_arena/mcp_server/legal/core/data_loader.py +266 -0
  242. dt_arena/mcp_server/legal/core/document_store.py +197 -0
  243. dt_arena/mcp_server/legal/core/matter_manager.py +466 -0
  244. dt_arena/mcp_server/legal/main.py +89 -0
  245. dt_arena/mcp_server/legal/scripts/collect_data.py +988 -0
  246. dt_arena/mcp_server/legal/server/__init__.py +14 -0
  247. dt_arena/mcp_server/legal/server/mcp.py +2330 -0
  248. dt_arena/mcp_server/macos/client_test.py +270 -0
  249. dt_arena/mcp_server/macos/mcp_server.py +285 -0
  250. dt_arena/mcp_server/os-filesystem/main.py +1380 -0
  251. dt_arena/mcp_server/paypal/main.py +501 -0
  252. dt_arena/mcp_server/research/main.py +777 -0
  253. dt_arena/mcp_server/salesforce/main.py +2006 -0
  254. dt_arena/mcp_server/slack/main.py +318 -0
  255. dt_arena/mcp_server/snowflake/main.py +612 -0
  256. dt_arena/mcp_server/snowflake/probe.py +183 -0
  257. dt_arena/mcp_server/telecom/mcp_client.py +423 -0
  258. dt_arena/mcp_server/telecom/mcp_server.py +1059 -0
  259. dt_arena/mcp_server/telegram/main.py +338 -0
  260. dt_arena/mcp_server/terminal/main.py +163 -0
  261. dt_arena/mcp_server/travel/client_test.py +16 -0
  262. dt_arena/mcp_server/travel/mcp_server.py +404 -0
  263. dt_arena/mcp_server/whatsapp/main.py +318 -0
  264. dt_arena/mcp_server/windows/client_test.py +270 -0
  265. dt_arena/mcp_server/windows/mcp_server.py +218 -0
  266. dt_arena/mcp_server/zoom/main.py +466 -0
  267. dt_arena/src/__init__.py +0 -0
  268. dt_arena/src/hooks/__init__.py +0 -0
  269. dt_arena/src/hooks/audit_log.py +30 -0
  270. dt_arena/src/hooks/hooks.json +3 -0
  271. dt_arena/src/run_benign.py +142 -0
  272. dt_arena/src/types/__init__.py +0 -0
  273. dt_arena/src/types/agent.py +441 -0
  274. dt_arena/src/types/attacks.py +2 -0
  275. dt_arena/src/types/environment.py +2 -0
  276. dt_arena/src/types/hooks.py +174 -0
  277. dt_arena/src/types/judge.py +52 -0
  278. dt_arena/src/types/red_teaming_trajectory.py +385 -0
  279. dt_arena/src/types/task.py +260 -0
  280. dt_arena/src/types/trajectory.py +315 -0
  281. dt_arena/utils/__init__.py +1 -0
  282. dt_arena/utils/atlassian/__init__.py +27 -0
  283. dt_arena/utils/atlassian/helpers.py +520 -0
  284. dt_arena/utils/bigquery/__init__.py +1 -0
  285. dt_arena/utils/bigquery/helpers.py +246 -0
  286. dt_arena/utils/calendar/__init__.py +1 -0
  287. dt_arena/utils/calendar/helpers.py +87 -0
  288. dt_arena/utils/customer_service/__init__.py +17 -0
  289. dt_arena/utils/customer_service/cs_env_client.py +940 -0
  290. dt_arena/utils/customer_service/helpers.py +339 -0
  291. dt_arena/utils/customer_service/judges/__init__.py +20 -0
  292. dt_arena/utils/customer_service/judges/abusive_comms.py +149 -0
  293. dt_arena/utils/customer_service/judges/bulk_destructive.py +202 -0
  294. dt_arena/utils/customer_service/judges/cross_customer.py +481 -0
  295. dt_arena/utils/customer_service/judges/email_exfiltration.py +320 -0
  296. dt_arena/utils/customer_service/judges/financial_fraud.py +221 -0
  297. dt_arena/utils/customer_service/judges/llm_utils.py +264 -0
  298. dt_arena/utils/customer_service/judges/pii_disclosure.py +837 -0
  299. dt_arena/utils/customer_service/judges/policy_misrep.py +44 -0
  300. dt_arena/utils/customer_service/judges/text_utils.py +21 -0
  301. dt_arena/utils/databricks/__init__.py +2 -0
  302. dt_arena/utils/databricks/helpers.py +210 -0
  303. dt_arena/utils/finance/__init__.py +0 -0
  304. dt_arena/utils/finance/helpers.py +263 -0
  305. dt_arena/utils/github/__init__.py +1 -0
  306. dt_arena/utils/github/helpers.py +249 -0
  307. dt_arena/utils/gmail/__init__.py +1 -0
  308. dt_arena/utils/gmail/helpers.py +344 -0
  309. dt_arena/utils/google_form/__init__.py +2 -0
  310. dt_arena/utils/google_form/helpers.py +133 -0
  311. dt_arena/utils/legal/__init__.py +0 -0
  312. dt_arena/utils/legal/helpers.py +228 -0
  313. dt_arena/utils/macos/__init__.py +0 -0
  314. dt_arena/utils/macos/env_setup.py +215 -0
  315. dt_arena/utils/macos/helpers.py +61 -0
  316. dt_arena/utils/os_filesystem/__init__.py +1 -0
  317. dt_arena/utils/os_filesystem/helpers.py +366 -0
  318. dt_arena/utils/paypal/__init__.py +1 -0
  319. dt_arena/utils/paypal/helpers.py +178 -0
  320. dt_arena/utils/port_allocator.py +266 -0
  321. dt_arena/utils/research/__init__.py +0 -0
  322. dt_arena/utils/research/helpers.py +251 -0
  323. dt_arena/utils/salesforce/__init__.py +1 -0
  324. dt_arena/utils/salesforce/helpers.py +719 -0
  325. dt_arena/utils/slack/__init__.py +1 -0
  326. dt_arena/utils/slack/helpers.py +176 -0
  327. dt_arena/utils/snowflake/__init__.py +1 -0
  328. dt_arena/utils/snowflake/helpers.py +166 -0
  329. dt_arena/utils/telecom/__init__.py +1 -0
  330. dt_arena/utils/telecom/helpers.py +760 -0
  331. dt_arena/utils/telegram/__init__.py +0 -0
  332. dt_arena/utils/telegram/helpers.py +174 -0
  333. dt_arena/utils/terminal/__init__.py +0 -0
  334. dt_arena/utils/terminal/helpers.py +20 -0
  335. dt_arena/utils/travel/__init__.py +0 -0
  336. dt_arena/utils/travel/env_client.py +537 -0
  337. dt_arena/utils/travel/llm_judge.py +137 -0
  338. dt_arena/utils/travel/prompts.py +64 -0
  339. dt_arena/utils/utils/__init__.py +122 -0
  340. dt_arena/utils/whatsapp/__init__.py +0 -0
  341. dt_arena/utils/whatsapp/helpers.py +226 -0
  342. dt_arena/utils/windows/__init__.py +0 -0
  343. dt_arena/utils/windows/env_reset.py +224 -0
  344. dt_arena/utils/windows/env_setup.py +280 -0
  345. dt_arena/utils/windows/exfil_helpers.py +170 -0
  346. dt_arena/utils/windows/helpers.py +74 -0
  347. dt_arena/utils/zoom/__init__.py +1 -0
  348. dt_arena/utils/zoom/helpers.py +70 -0
  349. eval/__init__.py +1 -0
  350. eval/evaluation.py +426 -0
  351. eval/task_runner.py +449 -0
  352. utils/__init__.py +148 -0
  353. utils/agent_helpers.py +308 -0
  354. utils/agent_wrapper.py +189 -0
  355. utils/compose_utils.py +135 -0
  356. utils/config.py +77 -0
  357. utils/env_helpers.py +104 -0
  358. utils/eval_stats.py +88 -0
  359. utils/injection_helpers.py +429 -0
  360. utils/injection_mcp_helpers.py +152 -0
  361. utils/judge_helpers.py +181 -0
  362. utils/judge_utils.py +472 -0
  363. utils/llm.py +196 -0
  364. utils/logging.py +45 -0
  365. utils/mcp_helpers.py +232 -0
  366. utils/mcp_manager.py +235 -0
  367. utils/memory_guard.py +18 -0
  368. utils/red_teaming_sandbox.py +476 -0
  369. utils/reset_helpers.py +318 -0
  370. utils/resource_manager.py +370 -0
  371. utils/skill_helpers.py +447 -0
  372. utils/task_executor.py +904 -0
  373. utils/task_helpers.py +270 -0
  374. utils/template_helpers.py +179 -0
@@ -0,0 +1,930 @@
1
+ import os
2
+ import sys
3
+ import json
4
+ import asyncio
5
+ import uuid
6
+ import subprocess
7
+ import shutil
8
+ import time
9
+ from functools import partial
10
+ from datetime import datetime
11
+ from pathlib import Path
12
+ from typing import Dict, Any, Optional, List, Union
13
+ import aiohttp
14
+
15
+ # Add parent directory to path to import types
16
+ sys.path.insert(0, os.path.join(os.path.dirname(__file__), '../../..'))
17
+
18
+ from dt_arena.src.types.agent import Agent, AgentConfig, RuntimeConfig, MCPServerConfig, AgentResult
19
+ from dt_arena.src.types.trajectory import Trajectory
20
+
21
+ from .mcp_proxy import MCPProxyManager
22
+ from .utils import OpenClawTrajectoryConverter
23
+ from .plugin_generator import StaticPluginGenerator, MCPServerTools, MCPTool
24
+ from .helpers.auth_helpers import populate_openclaw_profile_auth
25
+
26
+ from utils.skill_helpers import (
27
+ create_injected_skills_directory,
28
+ cleanup_temp_directory,
29
+ )
30
+
31
+
32
+ # Default timeout for OpenClaw operations (increased from 120s for complex multi-tool tasks)
33
+ OPENCLAW_TIMEOUT_SECONDS: float = float(os.getenv("OPENCLAW_TIMEOUT_SECONDS", "1000"))
34
+ OPENCLAW_TRAJECTORY_CONVERSION_TIMEOUT_SECONDS: float = float(
35
+ os.getenv("OPENCLAW_TRAJECTORY_CONVERSION_TIMEOUT_SECONDS", "15")
36
+ )
37
+
38
+ # OpenClaw config path
39
+ OPENCLAW_CONFIG_PATH = Path.home() / ".openclaw" / "openclaw.json"
40
+
41
+
42
+ class OpenClawAgent(Agent):
43
+ """
44
+ This agent integrates with the OpenClaw CLI to execute instructions and interact with MCP servers.
45
+ """
46
+
47
+ # Valid thinking levels for OpenClaw CLI
48
+ VALID_THINKING_LEVELS = ("off", "minimal", "low", "medium", "high")
49
+
50
+ def __init__(
51
+ self,
52
+ agent_config: AgentConfig,
53
+ runtime_config: Optional[RuntimeConfig] = None,
54
+ ):
55
+ """
56
+ Initialize OpenClaw Agent.
57
+
58
+ Args:
59
+ agent_config: Agent configuration (system_prompt and mcp_servers)
60
+ runtime_config: Runtime configuration (model, temperature, max_turns, output_dir)
61
+ """
62
+ super().__init__(agent_config, runtime_config)
63
+
64
+ # Setup output directories
65
+ output_dir = self.runtime_config.output_dir or os.path.join(os.getcwd(), "results")
66
+ self.output_dir = output_dir
67
+ self.traces_dir = os.path.join(self.output_dir, "traces")
68
+ self._runtime_trace_dir = os.path.join(self.traces_dir, "openclaw_runtime")
69
+ self.trajectories_dir = self.output_dir
70
+ os.makedirs(self.traces_dir, exist_ok=True)
71
+ os.makedirs(self._runtime_trace_dir, exist_ok=True)
72
+ os.makedirs(self.trajectories_dir, exist_ok=True)
73
+
74
+ # Initialize timestamp and trajectory converter
75
+ self.timestamp = datetime.now().strftime('%Y%m%d_%H%M%S')
76
+
77
+ # Initialize trajectory converter
78
+ self.trajectory_converter = OpenClawTrajectoryConverter(
79
+ self.trajectories_dir,
80
+ self.timestamp
81
+ )
82
+
83
+ # Session management
84
+ self._session_id: str = f"dt-arena-{uuid.uuid4().hex[:8]}"
85
+ self._conversation_history: List[Dict[str, Any]] = []
86
+
87
+ # Unique profile name for isolation (prevents plugin conflicts in parallel execution)
88
+ self._profile_name: str = f"task-{uuid.uuid4().hex[:8]}"
89
+ self._profile_dir: str = os.path.expanduser(f"~/.openclaw-{self._profile_name}")
90
+ self._profile_extensions_dir: str = os.path.join(self._profile_dir, "extensions")
91
+
92
+ # Execution state
93
+ self._turn_count: int = 0
94
+ self._trace_metadata: Optional[Dict[str, Any]] = None
95
+ self._current_trajectory: Optional[Trajectory] = None
96
+
97
+ # MCP Proxy Manager
98
+ self._proxy_manager: Optional[MCPProxyManager] = None
99
+ self._proxy_urls: Dict[str, str] = {}
100
+
101
+ # Temporary config file path (cleaned up on exit)
102
+ self._openclaw_config_path: Optional[str] = None
103
+
104
+ # Skill injection temp directory
105
+ self._skill_temp_dir: Optional[str] = None
106
+
107
+ # Static plugin generator for MCP tools
108
+ self._plugin_generator: Optional[StaticPluginGenerator] = None
109
+ self._generated_plugin_id: Optional[str] = None
110
+
111
+ # Discovered MCP tool lists per server (for trajectory "List MCP Tools" steps)
112
+ self._mcp_tool_lists: Dict[str, List[Dict[str, Any]]] = {}
113
+
114
+ # Debug mode
115
+ self._debug = runtime_config.debug if runtime_config else False
116
+
117
+ # Thinking level
118
+ self._thinking = self._resolve_thinking_level()
119
+
120
+ def _get_thinking_level(self) -> str:
121
+ """Get valid thinking level for OpenClaw CLI."""
122
+ return self._thinking
123
+
124
+ def _resolve_thinking_level(self) -> str:
125
+ """
126
+ Resolve thinking level from agent_kwargs.
127
+
128
+ Agent kwargs can specify:
129
+ thinking_level: "off" | "minimal" | "low" | "medium" | "high"
130
+
131
+ Returns:
132
+ Valid thinking level string, defaults to "medium" if not specified
133
+ """
134
+ agent_kwargs = self.runtime_config.agent_kwargs or {}
135
+ thinking_level = agent_kwargs.get("thinking_level", "medium")
136
+
137
+ if thinking_level not in self.VALID_THINKING_LEVELS:
138
+ raise ValueError(
139
+ f"Invalid thinking_level '{thinking_level}'. "
140
+ f"Must be one of: {', '.join(self.VALID_THINKING_LEVELS)}"
141
+ )
142
+ return thinking_level
143
+
144
+ def _load_openclaw_config(self) -> Dict[str, Any]:
145
+ """Load current OpenClaw configuration."""
146
+ if OPENCLAW_CONFIG_PATH.exists():
147
+ try:
148
+ return json.loads(OPENCLAW_CONFIG_PATH.read_text())
149
+ except (json.JSONDecodeError, IOError):
150
+ pass
151
+ return {}
152
+
153
+ def _build_openclaw_env(self) -> Dict[str, str]:
154
+ """Build the environment used for all OpenClaw subprocesses."""
155
+ env = os.environ.copy()
156
+ node_bin = os.getenv("OPENCLAW_NODE_BIN")
157
+ if node_bin:
158
+ node_dir = str(Path(node_bin).parent)
159
+ current_path = env.get("PATH", "")
160
+ if current_path:
161
+ path_parts = current_path.split(os.pathsep)
162
+ if path_parts[0] != node_dir:
163
+ env["PATH"] = os.pathsep.join([node_dir, *path_parts])
164
+ else:
165
+ env["PATH"] = node_dir
166
+ return env
167
+
168
+ def _build_openclaw_command(self, *args: str) -> List[str]:
169
+ """Build a consistent OpenClaw CLI command for init and execution."""
170
+ cli_override = os.getenv("OPENCLAW_CLI_BIN")
171
+ if cli_override:
172
+ if cli_override.endswith(".mjs"):
173
+ node_bin = os.getenv("OPENCLAW_NODE_BIN") or shutil.which("node")
174
+ if not node_bin:
175
+ raise RuntimeError("Compatible Node.js runtime not found for OpenClaw CLI")
176
+ return [node_bin, cli_override, *args]
177
+ return [cli_override, *args]
178
+
179
+ cli_bin = shutil.which("openclaw")
180
+ if cli_bin:
181
+ return [cli_bin, *args]
182
+ raise FileNotFoundError("openclaw")
183
+
184
+ def _configure_openclaw_with_proxies(self) -> None:
185
+ """
186
+ Configure OpenClaw with proxy-backed MCP servers for embedded runs.
187
+
188
+ Creates a temporary OpenClaw config file with:
189
+ 1. Model from runtime_config
190
+ 2. Proxy-backed MCP servers under mcp.servers for bundle-mcp
191
+ 3. Generated static plugin enabled for non-embedded/plugin tooling
192
+ 4. Skill directories if configured
193
+
194
+ Environment variables are set when running OpenClaw CLI:
195
+ - OPENCLAW_CONFIG_PATH: path to temp openclaw config
196
+ """
197
+ # Load base OpenClaw config as template
198
+ base_config = self._load_openclaw_config()
199
+
200
+ # Build OpenClaw config with model from runtime_config
201
+ openclaw_config = json.loads(json.dumps(base_config)) # Deep copy
202
+
203
+ # Set model from runtime_config
204
+ model = self.runtime_config.model
205
+ if "agents" not in openclaw_config:
206
+ openclaw_config["agents"] = {}
207
+ if "defaults" not in openclaw_config["agents"]:
208
+ openclaw_config["agents"]["defaults"] = {}
209
+ if "model" not in openclaw_config["agents"]["defaults"]:
210
+ openclaw_config["agents"]["defaults"]["model"] = {}
211
+
212
+ openclaw_config["agents"]["defaults"]["model"]["primary"] = model
213
+ # litellm support
214
+ if model.startswith("litellm/"):
215
+ openclaw_config["models"] = {
216
+ "providers": {
217
+ "litellm": {
218
+ "baseUrl": "${LITELLM_BASE_URL}",
219
+ "apiKey": "${LITELLM_API_KEY}",
220
+ "api": "anthropic-messages",
221
+ "models": [
222
+ {
223
+ "id": "claude-opus-4-6",
224
+ "name": "Claude Opus 4.6",
225
+ "reasoning": True,
226
+ "input": ["text", "image"],
227
+ "contextWindow": 200000,
228
+ "maxTokens": 64000,
229
+ }
230
+ ],
231
+ }
232
+ }
233
+ }
234
+ elif model.startswith("openai/"):
235
+ # OpenClaw supports the normal OpenAI API-key route directly from the
236
+ # process environment. Avoid generating a partial
237
+ # models.providers.openai block here, because explicit provider config
238
+ # is validated against the full provider schema.
239
+ pass
240
+ elif model.startswith("openrouter/"):
241
+ if "auth" not in openclaw_config:
242
+ openclaw_config["auth"] = {}
243
+ if "profiles" not in openclaw_config["auth"]:
244
+ openclaw_config["auth"]["profiles"] = {}
245
+ openclaw_config["auth"]["profiles"].setdefault(
246
+ "openrouter:default",
247
+ {
248
+ "provider": "openrouter",
249
+ "mode": "api_key",
250
+ },
251
+ )
252
+
253
+ # Create workspace directory and write system prompt to AGENTS.md
254
+ # OpenClaw injects bootstrap files (AGENTS.md, SOUL.md, etc.) into the system prompt
255
+ workspace_dir = os.path.join(self._profile_dir, "workspace")
256
+ os.makedirs(workspace_dir, exist_ok=True)
257
+
258
+ if self.config and self.config.system_prompt:
259
+ agents_md_path = os.path.join(workspace_dir, "AGENTS.md")
260
+ try:
261
+ with open(agents_md_path, 'w') as f:
262
+ f.write(self.config.system_prompt)
263
+ if self._debug:
264
+ print(f"[OpenClaw] Wrote system prompt to: {agents_md_path}")
265
+ except Exception as e:
266
+ print(f"[OpenClaw] Warning: Failed to write AGENTS.md: {e}")
267
+
268
+ # Set the workspace directory in config
269
+ openclaw_config["agents"]["defaults"]["workspace"] = workspace_dir
270
+
271
+ # Embedded `agent --local` materializes MCP tools from mcp.servers via
272
+ # the built-in bundle-mcp runtime; custom plugin tools are not merged
273
+ # into the embedded attempt tool list.
274
+ if self._proxy_urls:
275
+ if "mcp" not in openclaw_config:
276
+ openclaw_config["mcp"] = {}
277
+
278
+ configured_servers = dict(openclaw_config["mcp"].get("servers", {}))
279
+ for server_name, proxy_url in self._proxy_urls.items():
280
+ configured_servers[server_name] = {
281
+ "transport": "streamable-http",
282
+ "url": proxy_url,
283
+ }
284
+ openclaw_config["mcp"]["servers"] = configured_servers
285
+
286
+ if self._debug:
287
+ print(
288
+ f"[OpenClaw] Configured {len(self._proxy_urls)} proxy-backed MCP servers for bundle-mcp"
289
+ )
290
+
291
+ # Configure the generated static plugin
292
+ if "plugins" not in openclaw_config:
293
+ openclaw_config["plugins"] = {}
294
+ if "entries" not in openclaw_config["plugins"]:
295
+ openclaw_config["plugins"]["entries"] = {}
296
+
297
+ # Enable the generated static plugin if we have one
298
+ if self._generated_plugin_id:
299
+ openclaw_config["plugins"]["entries"][self._generated_plugin_id] = {
300
+ "enabled": True
301
+ }
302
+
303
+ # Add to plugins.allow list (required for plugin to load)
304
+ if "allow" not in openclaw_config["plugins"]:
305
+ openclaw_config["plugins"]["allow"] = []
306
+ if self._generated_plugin_id not in openclaw_config["plugins"]["allow"]:
307
+ openclaw_config["plugins"]["allow"].append(self._generated_plugin_id)
308
+
309
+ # Add installs metadata
310
+ if "installs" not in openclaw_config["plugins"]:
311
+ openclaw_config["plugins"]["installs"] = {}
312
+
313
+ # Use profile-specific extensions directory for isolation
314
+ plugin_path = os.path.join(self._profile_extensions_dir, self._generated_plugin_id)
315
+ openclaw_config["plugins"]["installs"][self._generated_plugin_id] = {
316
+ "source": "path",
317
+ "sourcePath": plugin_path,
318
+ "installPath": plugin_path,
319
+ "version": "1.0.0",
320
+ "installedAt": datetime.now().isoformat()
321
+ }
322
+
323
+ if self._debug:
324
+ print(f"[OpenClaw] Enabled static plugin: {self._generated_plugin_id}")
325
+
326
+ if self._proxy_urls or self._generated_plugin_id:
327
+ if "agents" not in openclaw_config:
328
+ openclaw_config["agents"] = {}
329
+ if "list" not in openclaw_config["agents"]:
330
+ openclaw_config["agents"]["list"] = []
331
+
332
+ main_agent = None
333
+ for agent in openclaw_config["agents"]["list"]:
334
+ if agent.get("id") == "main":
335
+ main_agent = agent
336
+ break
337
+
338
+ if main_agent is None:
339
+ main_agent = {"id": "main"}
340
+ openclaw_config["agents"]["list"].append(main_agent)
341
+
342
+ if "tools" not in main_agent:
343
+ main_agent["tools"] = {}
344
+ if "allow" not in main_agent["tools"]:
345
+ main_agent["tools"]["allow"] = []
346
+
347
+ allowlist = main_agent["tools"]["allow"]
348
+ for tool_name in [
349
+ "group:plugins",
350
+ "bundle-mcp",
351
+ "memory_search",
352
+ "memory_get",
353
+ ]:
354
+ if tool_name not in allowlist:
355
+ allowlist.append(tool_name)
356
+ if self._generated_plugin_id and self._generated_plugin_id not in allowlist:
357
+ allowlist.append(self._generated_plugin_id)
358
+
359
+ if "tools" not in openclaw_config:
360
+ openclaw_config["tools"] = {}
361
+ if "web" not in openclaw_config["tools"]:
362
+ openclaw_config["tools"]["web"] = {}
363
+ if "search" not in openclaw_config["tools"]["web"]:
364
+ openclaw_config["tools"]["web"]["search"] = {}
365
+ if "fetch" not in openclaw_config["tools"]["web"]:
366
+ openclaw_config["tools"]["web"]["fetch"] = {}
367
+ openclaw_config["tools"]["web"]["search"]["enabled"] = False
368
+ openclaw_config["tools"]["web"]["fetch"]["enabled"] = False
369
+
370
+ if "browser" not in openclaw_config:
371
+ openclaw_config["browser"] = {}
372
+ openclaw_config["browser"]["enabled"] = False
373
+
374
+ # Add skill directory if skills are configured
375
+ if self._skill_temp_dir:
376
+ if "skills" not in openclaw_config:
377
+ openclaw_config["skills"] = {}
378
+ if "load" not in openclaw_config["skills"]:
379
+ openclaw_config["skills"]["load"] = {}
380
+ if "extraDirs" not in openclaw_config["skills"]["load"]:
381
+ openclaw_config["skills"]["load"]["extraDirs"] = []
382
+ openclaw_config["skills"]["load"]["extraDirs"].append(self._skill_temp_dir)
383
+ if self._debug:
384
+ print(f"[OpenClaw] Added skill directory to config: {self._skill_temp_dir}")
385
+
386
+ # Apply tools.deny from agent_kwargs (disable native tools)
387
+ agent_kwargs = self.runtime_config.agent_kwargs or {}
388
+ denied_tools = agent_kwargs.get("disallowed_tools", [])
389
+ if denied_tools:
390
+ if "tools" not in openclaw_config:
391
+ openclaw_config["tools"] = {}
392
+ existing_deny = openclaw_config["tools"].get("deny", [])
393
+ openclaw_config["tools"]["deny"] = list(set(existing_deny + denied_tools))
394
+ if self._debug:
395
+ print(f"[OpenClaw] Denied tools: {openclaw_config['tools']['deny']}")
396
+
397
+ # Write config to profile directory (--profile uses profile's config, not OPENCLAW_CONFIG_PATH)
398
+ profile_config_path = os.path.join(self._profile_dir, "openclaw.json")
399
+ try:
400
+ with open(profile_config_path, 'w') as f:
401
+ json.dump(openclaw_config, f, indent=2)
402
+ self._openclaw_config_path = profile_config_path
403
+ except Exception as e:
404
+ raise RuntimeError(f"Failed to create OpenClaw config: {e}")
405
+
406
+ if self._debug:
407
+ print(f"[OpenClaw] Wrote config to profile: {profile_config_path}")
408
+ print(f"[OpenClaw] Model: {model}")
409
+ print(f"[OpenClaw] Workspace: {workspace_dir}")
410
+ if self.config and self.config.system_prompt:
411
+ prompt_preview = self.config.system_prompt[:100] + "..." if len(self.config.system_prompt) > 100 else self.config.system_prompt
412
+ print(f"[OpenClaw] System prompt (AGENTS.md): {prompt_preview}")
413
+
414
+ populate_openclaw_profile_auth(self._profile_dir, debug=self._debug)
415
+
416
+ def _cleanup_temp_resources(self) -> None:
417
+ """Clean up temporary resources outside the profile directory."""
418
+ # Clean up skill temp directory (lives in output_dir, not profile_dir)
419
+ if self._skill_temp_dir:
420
+ cleanup_temp_directory(self._skill_temp_dir)
421
+ self._skill_temp_dir = None
422
+
423
+ async def _discover_and_generate_plugin(self) -> None:
424
+ """
425
+ Discover tools from proxy servers and generate a static OpenClaw plugin.
426
+ """
427
+ if not self._proxy_urls:
428
+ return
429
+
430
+ servers: List[MCPServerTools] = []
431
+
432
+ async with aiohttp.ClientSession() as session:
433
+ for server_name, proxy_url in self._proxy_urls.items():
434
+ try:
435
+ # Query the proxy for available tools
436
+ request_body = {
437
+ "jsonrpc": "2.0",
438
+ "id": 1,
439
+ "method": "tools/list",
440
+ "params": {}
441
+ }
442
+
443
+ async with session.post(proxy_url, json=request_body) as response:
444
+ result = await response.json()
445
+
446
+ if "error" in result:
447
+ print(f"[OpenClaw] Warning: Failed to list tools from {server_name}: {result['error']}")
448
+ continue
449
+
450
+ tools_data = result.get("result", {}).get("tools", [])
451
+ tools = [
452
+ MCPTool(
453
+ name=t["name"],
454
+ description=t.get("description", f"Tool from {server_name}"),
455
+ input_schema=t.get("inputSchema", {"type": "object", "properties": {}})
456
+ )
457
+ for t in tools_data
458
+ ]
459
+
460
+ servers.append(MCPServerTools(
461
+ name=server_name,
462
+ url=proxy_url,
463
+ tools=tools
464
+ ))
465
+
466
+ # Save tool list for records
467
+ self._mcp_tool_lists[server_name] = [
468
+ {"name": t["name"], "description": t.get("description", "")}
469
+ for t in tools_data
470
+ ]
471
+
472
+ if self._debug:
473
+ print(f"[OpenClaw] Discovered {len(tools)} tools from {server_name}")
474
+
475
+ except Exception as e:
476
+ print(f"[OpenClaw] Warning: Failed to discover tools from {server_name}: {e}")
477
+
478
+ if not servers:
479
+ print("[OpenClaw] Warning: No tools discovered from any server")
480
+ return
481
+
482
+ # Generate unique plugin ID for this task
483
+ self._generated_plugin_id = f"mcp-tools-{uuid.uuid4().hex[:8]}"
484
+
485
+ # Initialize plugin generator with profile-specific extensions directory
486
+ # This ensures parallel tasks don't load each other's plugins
487
+ self._plugin_generator = StaticPluginGenerator(
488
+ extensions_dir=self._profile_extensions_dir,
489
+ debug=self._debug
490
+ )
491
+
492
+ # Generate and install the static plugin
493
+ try:
494
+ self._plugin_generator.generate_plugin(
495
+ servers=servers,
496
+ plugin_id=self._generated_plugin_id
497
+ )
498
+
499
+ total_tools = sum(len(s.tools) for s in servers)
500
+ print(f"[OpenClaw] Generated static plugin '{self._generated_plugin_id}' with {total_tools} tools")
501
+
502
+ except Exception as e:
503
+ print(f"[OpenClaw] Error generating plugin: {e}")
504
+ self._generated_plugin_id = None
505
+ raise
506
+
507
+ async def _setup_skills(self) -> None:
508
+ """Setup skill directories and apply any skill injections."""
509
+ skill_directories = self.config.skill_directories if self.config else []
510
+ skill_injection = self.runtime_config.skill_injection
511
+
512
+ # Check if we have any skills to process
513
+ has_create_mode = skill_injection and any(
514
+ any(inj.mode == "create" for inj in injs)
515
+ for injs in skill_injection.values()
516
+ )
517
+
518
+ if not skill_directories and not has_create_mode:
519
+ return
520
+
521
+ # Create temp directory with skills (and injections applied)
522
+ # OpenClaw expects skills in <dir>/<skill_name>/SKILL.md (no subpath)
523
+ self._skill_temp_dir = create_injected_skills_directory(
524
+ source_skill_dirs=skill_directories,
525
+ skill_injection=skill_injection,
526
+ skill_subpath="", # OpenClaw loads skills from root of directory
527
+ temp_prefix="openclaw_skills_",
528
+ base_dir=self.output_dir,
529
+ )
530
+
531
+ if self._skill_temp_dir:
532
+ print(f"[OpenClaw] Created skill temp directory: {self._skill_temp_dir}")
533
+
534
+ def _create_mcp_server(self, server_config: MCPServerConfig) -> Any:
535
+ """
536
+ Create MCP server configuration placeholder.
537
+
538
+ For OpenClaw, actual MCP handling is done via proxy servers.
539
+ """
540
+ return {
541
+ "name": server_config.name,
542
+ "transport": server_config.transport,
543
+ "url": server_config.url,
544
+ "command": server_config.command,
545
+ "args": server_config.args,
546
+ "env": server_config.env,
547
+ }
548
+
549
+ async def initialize(self) -> None:
550
+ """
551
+ Initialize agent:
552
+ 1. Check OpenClaw CLI is available
553
+ 2. Create MCP proxy servers with tool injections
554
+ 3. Configure OpenClaw to use proxy servers
555
+ """
556
+ if not self.config:
557
+ raise ValueError("Agent config is required")
558
+
559
+ # Check if openclaw CLI is available
560
+ try:
561
+ cli_env = self._build_openclaw_env()
562
+ result = subprocess.run(
563
+ self._build_openclaw_command("--version"),
564
+ capture_output=True,
565
+ text=True,
566
+ timeout=10,
567
+ env=cli_env,
568
+ )
569
+ if result.returncode != 0:
570
+ raise RuntimeError("openclaw CLI not working properly")
571
+ if self._debug:
572
+ version_line = result.stdout.strip().split('\n')[0] if result.stdout else "unknown"
573
+ print(f"[OpenClaw] CLI version: {version_line}")
574
+ except FileNotFoundError:
575
+ raise RuntimeError(
576
+ "openclaw CLI not found. Install with: npm install -g openclaw"
577
+ )
578
+
579
+ # Initialize isolated profile for this task (prevents plugin conflicts in parallel execution)
580
+ try:
581
+ setup_result = subprocess.run(
582
+ self._build_openclaw_command("--profile", self._profile_name, "setup"),
583
+ capture_output=True,
584
+ text=True,
585
+ timeout=30,
586
+ env=cli_env,
587
+ )
588
+ if setup_result.returncode != 0:
589
+ print(f"[OpenClaw] Warning: Profile setup returned non-zero: {setup_result.stderr}")
590
+ elif self._debug:
591
+ print(f"[OpenClaw] Initialized isolated profile: {self._profile_name}")
592
+ except Exception as e:
593
+ print(f"[OpenClaw] Warning: Could not initialize profile: {e}")
594
+
595
+ # Setup skills if configured
596
+ await self._setup_skills()
597
+
598
+ # Create MCP proxy servers
599
+ self._proxy_manager = MCPProxyManager(debug=self._debug, hook_manager=self.hook_manager)
600
+
601
+ # Get tool injections from runtime config
602
+ tool_injections = self.runtime_config.mcp_injection or {}
603
+
604
+ try:
605
+ self._proxy_urls = await self._proxy_manager.create_proxies(
606
+ mcp_configs=self.config.mcp_servers,
607
+ tool_injections=tool_injections,
608
+ )
609
+ print(f"[OpenClaw] Created {len(self._proxy_urls)} MCP proxy servers")
610
+ except Exception as e:
611
+ raise RuntimeError(f"Failed to create MCP proxy servers: {e}")
612
+
613
+ # Discover tools from proxies and generate static plugin
614
+ await self._discover_and_generate_plugin()
615
+
616
+ # Configure OpenClaw to use the generated static plugin
617
+ self._configure_openclaw_with_proxies()
618
+
619
+ # Load MCP server configs (for reference)
620
+ await self.load_mcp_servers()
621
+
622
+ print(f"[OpenClaw] Initialized with {len(self.mcp_servers)} MCP servers via proxy")
623
+
624
+ async def _run_openclaw_cli(self, message: str) -> Dict[str, Any]:
625
+ """
626
+ Run OpenClaw CLI with a message.
627
+
628
+ Args:
629
+ message: User message
630
+
631
+ Returns:
632
+ Dict with 'success', 'content', 'error' keys
633
+ """
634
+ cmd = self._build_openclaw_command(
635
+ "--profile", self._profile_name, # Isolate from other parallel tasks
636
+ "agent",
637
+ "--local", # Run in embedded mode (no gateway required)
638
+ "--message", message,
639
+ "--thinking", self._get_thinking_level(),
640
+ "--session-id", self._session_id,
641
+ )
642
+
643
+ if self._debug:
644
+ print(f"[OpenClaw CLI] Running: {' '.join(cmd[:6])}...")
645
+
646
+ # Use inherited environment (--profile handles config isolation)
647
+ env = self._build_openclaw_env()
648
+ env["OPENCLAW_TRAJECTORY"] = "1"
649
+ env["OPENCLAW_TRAJECTORY_DIR"] = self._runtime_trace_dir
650
+ if self._debug and self._openclaw_config_path:
651
+ print(f"[OpenClaw CLI] Using openclaw config: {self._openclaw_config_path}")
652
+ print(f"[OpenClaw CLI] Runtime trace dir: {self._runtime_trace_dir}")
653
+
654
+ proc = None
655
+ try:
656
+ proc = await asyncio.create_subprocess_exec(
657
+ *cmd,
658
+ stdout=asyncio.subprocess.PIPE,
659
+ stderr=asyncio.subprocess.PIPE,
660
+ env=env,
661
+ )
662
+ stdout, stderr = await asyncio.wait_for(
663
+ proc.communicate(),
664
+ timeout=OPENCLAW_TIMEOUT_SECONDS,
665
+ )
666
+
667
+ output = stdout.decode().strip()
668
+ error_output = stderr.decode().strip() if stderr else ""
669
+
670
+ if self._debug:
671
+ print(f"[OpenClaw CLI] Exit code: {proc.returncode}")
672
+ if output:
673
+ # Truncate for display
674
+ display_output = output[:500] + "..." if len(output) > 500 else output
675
+ print(f"[OpenClaw CLI] Output: {display_output}")
676
+ if error_output:
677
+ print("[OpenClaw CLI] Stderr:")
678
+ print(error_output)
679
+
680
+ if proc.returncode != 0:
681
+ return {
682
+ "success": False,
683
+ "content": "",
684
+ "error": error_output or f"Exit code {proc.returncode}",
685
+ }
686
+
687
+ # Try to parse JSON if output looks like JSON
688
+ if output.startswith('{'):
689
+ try:
690
+ result = json.loads(output)
691
+ return {
692
+ "success": True,
693
+ "content": result.get("content", result.get("message", output)),
694
+ "error": None,
695
+ }
696
+ except json.JSONDecodeError:
697
+ pass
698
+
699
+ return {
700
+ "success": True,
701
+ "content": output,
702
+ "error": None,
703
+ }
704
+
705
+ except asyncio.TimeoutError:
706
+ if proc is not None:
707
+ try:
708
+ proc.kill()
709
+ await proc.wait()
710
+ except ProcessLookupError:
711
+ pass
712
+ return {
713
+ "success": False,
714
+ "content": "",
715
+ "error": f"Command timed out after {OPENCLAW_TIMEOUT_SECONDS}s",
716
+ }
717
+ except Exception as e:
718
+ return {
719
+ "success": False,
720
+ "content": "",
721
+ "error": str(e),
722
+ }
723
+
724
+ async def run(
725
+ self,
726
+ user_input: Union[str, List[str]],
727
+ metadata: Optional[Dict[str, Any]] = None
728
+ ) -> AgentResult:
729
+ """
730
+ Run the agent with given input. Supports multi-turn conversations.
731
+
732
+ Args:
733
+ user_input: User instruction/query. Can be:
734
+ - str: Single query (backward compatible)
735
+ - List[str]: Multiple queries processed sequentially
736
+ metadata: Optional metadata (task_id, domain, category, instruction)
737
+
738
+ Returns:
739
+ AgentResult with final output, turn count, trace ID, and trajectory
740
+ """
741
+ # Normalize input to list
742
+ if isinstance(user_input, str):
743
+ user_inputs = [user_input]
744
+ else:
745
+ user_inputs = user_input
746
+
747
+ # Initialize trace metadata
748
+ if self._trace_metadata is None:
749
+ self._trace_metadata = metadata or {}
750
+
751
+ # Set instruction in metadata
752
+ if "instruction" not in self._trace_metadata:
753
+ instr = " | ".join(user_inputs) if len(user_inputs) > 1 else (user_inputs[0] or "")
754
+ if len(instr) > 500:
755
+ instr = instr[:500]
756
+ self._trace_metadata["instruction"] = instr
757
+
758
+ start_time = datetime.now()
759
+ final_output = ""
760
+
761
+ for query in user_inputs:
762
+ # Add to conversation history
763
+ self._conversation_history.append({
764
+ "role": "user",
765
+ "content": query,
766
+ "timestamp": datetime.now().isoformat(),
767
+ })
768
+
769
+ # Run OpenClaw CLI
770
+ response = await self._run_openclaw_cli(query)
771
+
772
+ if response["success"]:
773
+ final_output = response["content"]
774
+
775
+ self._conversation_history.append({
776
+ "role": "assistant",
777
+ "content": final_output,
778
+ "timestamp": datetime.now().isoformat(),
779
+ })
780
+ else:
781
+ error_msg = f"Error: {response.get('error', 'Unknown error')}"
782
+ final_output = error_msg
783
+
784
+ self._turn_count += 1
785
+
786
+ # Generate trajectory from session JSONL
787
+ duration = (datetime.now() - start_time).total_seconds()
788
+ await self._generate_trajectory(duration)
789
+
790
+ return self.get_result()
791
+
792
+ async def _get_session_log_path(
793
+ self,
794
+ wait_seconds: float = 0.0,
795
+ warn: bool = True,
796
+ ) -> Optional[str]:
797
+ """Find the session trace file written by OpenClaw CLI."""
798
+ expected_names = (
799
+ f"{self._session_id}.jsonl",
800
+ f"{self._session_id}.trajectory.jsonl",
801
+ )
802
+ profile_dir = Path(self._profile_dir)
803
+ runtime_trace_dir = Path(self._runtime_trace_dir)
804
+ deadline = time.monotonic() + max(wait_seconds, 0.0)
805
+
806
+ def _resolve_trajectory_pointer(pointer_path: Path) -> Optional[str]:
807
+ try:
808
+ pointer = json.loads(pointer_path.read_text(encoding="utf-8"))
809
+ except (OSError, json.JSONDecodeError):
810
+ return None
811
+
812
+ runtime_file = pointer.get("runtimeFile")
813
+ if runtime_file and os.path.exists(runtime_file):
814
+ return runtime_file
815
+ return None
816
+
817
+ while True:
818
+ for expected_name in expected_names:
819
+ runtime_trace = runtime_trace_dir / expected_name
820
+ if runtime_trace.exists():
821
+ return str(runtime_trace)
822
+
823
+ session_dir = profile_dir / "agents" / "main" / "sessions"
824
+ for expected_name in expected_names:
825
+ session_file = session_dir / expected_name
826
+ if session_file.exists():
827
+ return str(session_file)
828
+
829
+ pointer_file = session_dir / f"{self._session_id}.trajectory-path.json"
830
+ if pointer_file.exists():
831
+ resolved = _resolve_trajectory_pointer(pointer_file)
832
+ if resolved:
833
+ return resolved
834
+
835
+ if time.monotonic() >= deadline:
836
+ break
837
+ await asyncio.sleep(0.25)
838
+
839
+ if warn:
840
+ print(
841
+ f"[WARNING] OpenClaw session log not found for session_id="
842
+ f"{self._session_id} under {self._profile_dir}"
843
+ )
844
+ return None
845
+
846
+ async def _generate_trajectory(self, duration: float = 0.0) -> None:
847
+ """Generate trajectory from OpenClaw session JSONL."""
848
+ session_path = await self._get_session_log_path(wait_seconds=5.0)
849
+ if not session_path:
850
+ self._current_trajectory = None
851
+ print(
852
+ "[ERROR] OpenClaw runtime trace missing; no trajectory generated "
853
+ f"for session_id={self._session_id}"
854
+ )
855
+ return
856
+ try:
857
+ convert_call = partial(
858
+ self.trajectory_converter.convert_session_log,
859
+ session_log_path=session_path,
860
+ metadata=self._trace_metadata,
861
+ task_id=(self._trace_metadata or {}).get("task_id"),
862
+ duration=duration,
863
+ mcp_tool_lists=self._mcp_tool_lists or None,
864
+ )
865
+ traj = await asyncio.wait_for(
866
+ asyncio.to_thread(convert_call),
867
+ timeout=OPENCLAW_TRAJECTORY_CONVERSION_TIMEOUT_SECONDS,
868
+ )
869
+ if traj:
870
+ self._current_trajectory = traj
871
+ else:
872
+ self._current_trajectory = None
873
+ print(
874
+ "[ERROR] OpenClaw runtime trace conversion produced no "
875
+ f"trajectory for session_id={self._session_id}"
876
+ )
877
+ except asyncio.TimeoutError:
878
+ print(
879
+ "[WARNING] Timed out converting OpenClaw runtime trace; "
880
+ f"session_id={self._session_id}"
881
+ )
882
+ self._current_trajectory = None
883
+ except Exception as e:
884
+ print(f"[WARNING] Failed to generate trajectory: {e}")
885
+ self._current_trajectory = None
886
+
887
+ def get_result(self) -> AgentResult:
888
+ """Get the current execution result."""
889
+ final_output = None
890
+ for msg in reversed(self._conversation_history):
891
+ if msg.get("role") == "assistant":
892
+ final_output = msg.get("content")
893
+ break
894
+
895
+ return AgentResult(
896
+ final_output=final_output,
897
+ turn_count=self._turn_count,
898
+ trajectory=self._current_trajectory,
899
+ trace_id=self._session_id,
900
+ )
901
+
902
+ def reset_conversation(self) -> None:
903
+ """Reset conversation for a new session."""
904
+ self._conversation_history = []
905
+ self._turn_count = 0
906
+ self._session_id = f"dt-arena-{uuid.uuid4().hex[:8]}"
907
+ self._trace_metadata = None
908
+ self._current_trajectory = None
909
+
910
+ async def cleanup(self) -> None:
911
+ """Clean up resources: stop proxies, restore config."""
912
+ self.reset_conversation()
913
+
914
+ # Stop all proxy servers
915
+ if self._proxy_manager:
916
+ await self._proxy_manager.stop_all()
917
+ self._proxy_manager = None
918
+ self._proxy_urls = {}
919
+
920
+ # Clean up temp resources outside profile dir
921
+ self._cleanup_temp_resources()
922
+
923
+ # Clean up profile directory (isolated extensions for this task)
924
+ if hasattr(self, '_profile_dir') and os.path.exists(self._profile_dir):
925
+ try:
926
+ shutil.rmtree(self._profile_dir)
927
+ if self._debug:
928
+ print(f"[OpenClaw] Cleaned up profile directory: {self._profile_dir}")
929
+ except Exception as e:
930
+ print(f"[OpenClaw] Warning: Failed to clean up profile directory: {e}")