decodingtrust-agent-sdk 0.1.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (374) hide show
  1. agent/__init__.py +30 -0
  2. agent/claudesdk/__init__.py +8 -0
  3. agent/claudesdk/example.py +221 -0
  4. agent/claudesdk/src/__init__.py +8 -0
  5. agent/claudesdk/src/agent.py +400 -0
  6. agent/claudesdk/src/mcp_proxy.py +409 -0
  7. agent/claudesdk/src/utils.py +420 -0
  8. agent/googleadk/__init__.py +15 -0
  9. agent/googleadk/example.py +237 -0
  10. agent/googleadk/src/__init__.py +12 -0
  11. agent/googleadk/src/agent.py +401 -0
  12. agent/googleadk/src/mcp_wrapper.py +163 -0
  13. agent/googleadk/src/utils.py +602 -0
  14. agent/langchain/__init__.py +8 -0
  15. agent/langchain/example.py +213 -0
  16. agent/langchain/src/__init__.py +8 -0
  17. agent/langchain/src/agent.py +645 -0
  18. agent/langchain/src/utils.py +433 -0
  19. agent/openaisdk/__init__.py +17 -0
  20. agent/openaisdk/example.py +228 -0
  21. agent/openaisdk/src/__init__.py +12 -0
  22. agent/openaisdk/src/agent.py +491 -0
  23. agent/openaisdk/src/agent_wrapper.py +143 -0
  24. agent/openaisdk/src/mcp_wrapper.py +395 -0
  25. agent/openaisdk/src/utils.py +493 -0
  26. agent/openclaw/__init__.py +10 -0
  27. agent/openclaw/example.py +251 -0
  28. agent/openclaw/src/__init__.py +14 -0
  29. agent/openclaw/src/agent.py +930 -0
  30. agent/openclaw/src/helpers/__init__.py +1 -0
  31. agent/openclaw/src/helpers/auth_helpers.py +55 -0
  32. agent/openclaw/src/mcp_proxy.py +564 -0
  33. agent/openclaw/src/plugin_generator.py +231 -0
  34. agent/openclaw/src/utils.py +341 -0
  35. agent/pocketflow/__init__.py +18 -0
  36. agent/pocketflow/example.py +221 -0
  37. agent/pocketflow/prompts/react_agent.py +46 -0
  38. agent/pocketflow/src/__init__.py +6 -0
  39. agent/pocketflow/src/agent.py +507 -0
  40. agent/pocketflow/src/agent_wrapper.py +159 -0
  41. agent/pocketflow/src/async_helper.py +92 -0
  42. agent/pocketflow/src/mcp_react_agent.py +279 -0
  43. agent/pocketflow/src/native_agent.py +74 -0
  44. agent/pocketflow/src/nodes.py +467 -0
  45. benchmark/__init__.py +0 -0
  46. benchmark/browser/benign.jsonl +34 -0
  47. benchmark/browser/direct.jsonl +85 -0
  48. benchmark/browser/indirect.jsonl +82 -0
  49. benchmark/code/benign.jsonl +0 -0
  50. benchmark/code/direct.jsonl +121 -0
  51. benchmark/code/indirect.jsonl +165 -0
  52. benchmark/crm/benign.jsonl +165 -0
  53. benchmark/crm/direct.jsonl +90 -0
  54. benchmark/crm/indirect.jsonl +150 -0
  55. benchmark/customer-service/benign.jsonl +160 -0
  56. benchmark/customer-service/direct.jsonl +100 -0
  57. benchmark/customer-service/indirect.jsonl +101 -0
  58. benchmark/finance/benign.jsonl +0 -0
  59. benchmark/finance/direct.jsonl +200 -0
  60. benchmark/finance/indirect.jsonl +200 -0
  61. benchmark/legal/benign.jsonl +0 -0
  62. benchmark/legal/direct.jsonl +200 -0
  63. benchmark/legal/indirect.jsonl +200 -0
  64. benchmark/macos/benign.jsonl +30 -0
  65. benchmark/macos/direct.jsonl +50 -0
  66. benchmark/macos/indirect.jsonl +50 -0
  67. benchmark/medical/benign.jsonl +642 -0
  68. benchmark/medical/direct.jsonl +229 -0
  69. benchmark/medical/indirect.jsonl +222 -0
  70. benchmark/os-filesystem/benign.jsonl +200 -0
  71. benchmark/os-filesystem/direct.jsonl +200 -0
  72. benchmark/os-filesystem/indirect.jsonl +200 -0
  73. benchmark/research/benign.jsonl +0 -0
  74. benchmark/research/direct.jsonl +119 -0
  75. benchmark/research/indirect.jsonl +125 -0
  76. benchmark/telecom/benign.jsonl +120 -0
  77. benchmark/telecom/direct.jsonl +161 -0
  78. benchmark/telecom/indirect.jsonl +166 -0
  79. benchmark/travel/benign.jsonl +130 -0
  80. benchmark/travel/direct.jsonl +105 -0
  81. benchmark/travel/indirect.jsonl +120 -0
  82. benchmark/windows/benign.jsonl +100 -0
  83. benchmark/windows/direct.jsonl +140 -0
  84. benchmark/windows/indirect.jsonl +107 -0
  85. benchmark/workflow/benign.jsonl +335 -0
  86. benchmark/workflow/direct.jsonl +78 -0
  87. benchmark/workflow/indirect.jsonl +107 -0
  88. cli/__init__.py +5 -0
  89. cli/main.py +182 -0
  90. cli/scaffold.py +334 -0
  91. decodingtrust_agent_sdk-0.1.0.dist-info/METADATA +642 -0
  92. decodingtrust_agent_sdk-0.1.0.dist-info/RECORD +374 -0
  93. decodingtrust_agent_sdk-0.1.0.dist-info/WHEEL +5 -0
  94. decodingtrust_agent_sdk-0.1.0.dist-info/entry_points.txt +2 -0
  95. decodingtrust_agent_sdk-0.1.0.dist-info/licenses/LICENSE +201 -0
  96. decodingtrust_agent_sdk-0.1.0.dist-info/top_level.txt +6 -0
  97. dt_arena/config/env.yaml +515 -0
  98. dt_arena/config/injection_mcp.yaml +430 -0
  99. dt_arena/config/mcp.yaml +642 -0
  100. dt_arena/envs/arxiv/docker-compose-hub.yml +31 -0
  101. dt_arena/envs/arxiv/docker-compose.yml +36 -0
  102. dt_arena/envs/atlassian/docker/docker-compose.dev.yml +65 -0
  103. dt_arena/envs/atlassian/docker/docker-compose.yml +53 -0
  104. dt_arena/envs/atlassian/docker-compose-hub.yml +57 -0
  105. dt_arena/envs/atlassian/docker-compose.yml +72 -0
  106. dt_arena/envs/bigquery/docker-compose.yml +20 -0
  107. dt_arena/envs/booking/docker-compose.yml +59 -0
  108. dt_arena/envs/calendar/docker-compose-hub.yml +30 -0
  109. dt_arena/envs/calendar/docker-compose.yml +42 -0
  110. dt_arena/envs/custom-website/docker-compose.yml +6 -0
  111. dt_arena/envs/customer_service/docker-compose.yml +59 -0
  112. dt_arena/envs/databricks/docker-compose-hub.yml +47 -0
  113. dt_arena/envs/databricks/docker-compose.yml +51 -0
  114. dt_arena/envs/ecommerce/docker-compose.yml +6 -0
  115. dt_arena/envs/ers/docker-compose.yml +36 -0
  116. dt_arena/envs/ers/hrms/docker/docker-compose.yml +31 -0
  117. dt_arena/envs/finance/docker-compose.yml +23 -0
  118. dt_arena/envs/github/docker/docker-compose-hub.yml +50 -0
  119. dt_arena/envs/github/docker/docker-compose.yml +50 -0
  120. dt_arena/envs/gmail/docker-compose-hub.yml +51 -0
  121. dt_arena/envs/gmail/docker-compose.yml +65 -0
  122. dt_arena/envs/google-form/docker-compose-hub.yml +33 -0
  123. dt_arena/envs/google-form/docker-compose.yml +41 -0
  124. dt_arena/envs/googledocs/docker-compose-hub.yml +61 -0
  125. dt_arena/envs/googledocs/docker-compose.yml +78 -0
  126. dt_arena/envs/hospital/docker-compose-hub.yml +25 -0
  127. dt_arena/envs/hospital/docker-compose.yml +27 -0
  128. dt_arena/envs/legal/docker-compose.yml +22 -0
  129. dt_arena/envs/linkedin/docker-compose.yml +63 -0
  130. dt_arena/envs/macos/docker-compose.yml +79 -0
  131. dt_arena/envs/os-filesystem/docker-compose-hub.yml +16 -0
  132. dt_arena/envs/os-filesystem/docker-compose.yml +20 -0
  133. dt_arena/envs/paypal/docker-compose-hub.yml +48 -0
  134. dt_arena/envs/paypal/docker-compose.yml +63 -0
  135. dt_arena/envs/research/docker-compose-hub.yml +13 -0
  136. dt_arena/envs/research/docker-compose.yml +24 -0
  137. dt_arena/envs/salesforce_crm/docker-compose-hub.yaml +45 -0
  138. dt_arena/envs/salesforce_crm/docker-compose.yaml +49 -0
  139. dt_arena/envs/slack/docker-compose-hub.yml +28 -0
  140. dt_arena/envs/slack/docker-compose.yml +41 -0
  141. dt_arena/envs/snowflake/docker-compose-hub.yml +41 -0
  142. dt_arena/envs/snowflake/docker-compose.yml +44 -0
  143. dt_arena/envs/telecom/docker-compose-hub.yml +16 -0
  144. dt_arena/envs/telecom/docker-compose.yml +17 -0
  145. dt_arena/envs/telegram/docker-compose-hub.yml +57 -0
  146. dt_arena/envs/telegram/docker-compose.yml +62 -0
  147. dt_arena/envs/terminal/docker-compose-hub.yml +12 -0
  148. dt_arena/envs/terminal/docker-compose.yml +26 -0
  149. dt_arena/envs/travel/docker-compose-hub.yml +19 -0
  150. dt_arena/envs/travel/docker-compose.yml +19 -0
  151. dt_arena/envs/whatsapp/docker-compose-hub.yml +61 -0
  152. dt_arena/envs/whatsapp/docker-compose.yml +78 -0
  153. dt_arena/envs/windows/docker-compose.yml +71 -0
  154. dt_arena/envs/zoom/docker-compose-hub.yml +27 -0
  155. dt_arena/envs/zoom/docker-compose.yml +40 -0
  156. dt_arena/injection_mcp_server/atlassian/env_injection.py +134 -0
  157. dt_arena/injection_mcp_server/calendar/env_injection.py +217 -0
  158. dt_arena/injection_mcp_server/custom_website/env_injection.py +97 -0
  159. dt_arena/injection_mcp_server/customer_service/env_injection.py +659 -0
  160. dt_arena/injection_mcp_server/databricks/env_injection.py +255 -0
  161. dt_arena/injection_mcp_server/ecommerce/env_injection.py +110 -0
  162. dt_arena/injection_mcp_server/finance/env_injection.py +85 -0
  163. dt_arena/injection_mcp_server/github/env_injection.py +206 -0
  164. dt_arena/injection_mcp_server/gmail/env_injection.py +211 -0
  165. dt_arena/injection_mcp_server/google_form/env_injection.py +186 -0
  166. dt_arena/injection_mcp_server/googledocs/env_injection.py +44 -0
  167. dt_arena/injection_mcp_server/hospital/env_injection.py +43 -0
  168. dt_arena/injection_mcp_server/legal/env_injection.py +229 -0
  169. dt_arena/injection_mcp_server/macos/env_injection.py +272 -0
  170. dt_arena/injection_mcp_server/os-filesystem/env_injection.py +341 -0
  171. dt_arena/injection_mcp_server/paypal/env_injection.py +268 -0
  172. dt_arena/injection_mcp_server/research/env_injection.py +616 -0
  173. dt_arena/injection_mcp_server/salesforce/env_injection.py +514 -0
  174. dt_arena/injection_mcp_server/slack/env_injection.py +265 -0
  175. dt_arena/injection_mcp_server/snowflake/env_injection.py +230 -0
  176. dt_arena/injection_mcp_server/telecom/env_injection.py +503 -0
  177. dt_arena/injection_mcp_server/telegram/env_injection.py +171 -0
  178. dt_arena/injection_mcp_server/terminal/env_injection.py +523 -0
  179. dt_arena/injection_mcp_server/travel/env_injection.py +173 -0
  180. dt_arena/injection_mcp_server/whatsapp/env_injection.py +185 -0
  181. dt_arena/injection_mcp_server/windows/env_injection.py +943 -0
  182. dt_arena/injection_mcp_server/zoom/env_injection.py +216 -0
  183. dt_arena/mcp_server/atlassian/main.py +1554 -0
  184. dt_arena/mcp_server/atlassian/test_server.py +66 -0
  185. dt_arena/mcp_server/bigquery/main.py +333 -0
  186. dt_arena/mcp_server/booking/main.py +310 -0
  187. dt_arena/mcp_server/browser/main.py +1741 -0
  188. dt_arena/mcp_server/calendar/example_multi_user.py +162 -0
  189. dt_arena/mcp_server/calendar/main.py +792 -0
  190. dt_arena/mcp_server/calendar/test_mcp.py +135 -0
  191. dt_arena/mcp_server/customer_service/main.py +1063 -0
  192. dt_arena/mcp_server/databricks/main.py +566 -0
  193. dt_arena/mcp_server/databricks/probe.py +102 -0
  194. dt_arena/mcp_server/ers/main.py +845 -0
  195. dt_arena/mcp_server/finance/__init__.py +87 -0
  196. dt_arena/mcp_server/finance/core/__init__.py +12 -0
  197. dt_arena/mcp_server/finance/core/data_loader.py +558 -0
  198. dt_arena/mcp_server/finance/core/portfolio.py +565 -0
  199. dt_arena/mcp_server/finance/evaluation/__init__.py +20 -0
  200. dt_arena/mcp_server/finance/evaluation/evaluator.py +217 -0
  201. dt_arena/mcp_server/finance/evaluation/logger.py +137 -0
  202. dt_arena/mcp_server/finance/injection/__init__.py +66 -0
  203. dt_arena/mcp_server/finance/injection/config.py +176 -0
  204. dt_arena/mcp_server/finance/injection/content.py +755 -0
  205. dt_arena/mcp_server/finance/injection/html.py +409 -0
  206. dt_arena/mcp_server/finance/injection/locations.py +167 -0
  207. dt_arena/mcp_server/finance/injection/methods.py +193 -0
  208. dt_arena/mcp_server/finance/injection/presets.py +1023 -0
  209. dt_arena/mcp_server/finance/main.py +361 -0
  210. dt_arena/mcp_server/finance/run_mcp.py +21 -0
  211. dt_arena/mcp_server/finance/run_web.py +26 -0
  212. dt_arena/mcp_server/finance/server/__init__.py +41 -0
  213. dt_arena/mcp_server/finance/server/extractor.py +1453 -0
  214. dt_arena/mcp_server/finance/server/extractor_minimal.py +292 -0
  215. dt_arena/mcp_server/finance/server/extractor_simple.py +1164 -0
  216. dt_arena/mcp_server/finance/server/injection_mcp.py +865 -0
  217. dt_arena/mcp_server/finance/server/mcp.py +451 -0
  218. dt_arena/mcp_server/finance/server/tools/__init__.py +23 -0
  219. dt_arena/mcp_server/finance/server/tools/account.py +88 -0
  220. dt_arena/mcp_server/finance/server/tools/browsing.py +328 -0
  221. dt_arena/mcp_server/finance/server/tools/social.py +73 -0
  222. dt_arena/mcp_server/finance/server/tools/trading.py +242 -0
  223. dt_arena/mcp_server/finance/server/tools/utility.py +49 -0
  224. dt_arena/mcp_server/finance/server/web.py +2139 -0
  225. dt_arena/mcp_server/finance/tasks/benchmark/__init__.py +28 -0
  226. dt_arena/mcp_server/finance/tasks/benchmark/attack_pool.py +3026 -0
  227. dt_arena/mcp_server/finance/tasks/benchmark/attack_runner.py +1315 -0
  228. dt_arena/mcp_server/finance/tasks/benchmark/finra_requirements.py +1335 -0
  229. dt_arena/mcp_server/finance/tasks/benchmark/finra_tasks.py +3665 -0
  230. dt_arena/mcp_server/finance/tasks/benchmark/malicious_tasks.py +2673 -0
  231. dt_arena/mcp_server/finance/tasks/redteam_suite/run_redteam_suite.py +1713 -0
  232. dt_arena/mcp_server/finance/test_mcp_tools.py +476 -0
  233. dt_arena/mcp_server/github/main.py +441 -0
  234. dt_arena/mcp_server/gmail/main.py +1004 -0
  235. dt_arena/mcp_server/google_form/main.py +141 -0
  236. dt_arena/mcp_server/googledocs/main.py +458 -0
  237. dt_arena/mcp_server/hospital/mcp_server.py +458 -0
  238. dt_arena/mcp_server/legal/__init__.py +9 -0
  239. dt_arena/mcp_server/legal/core/__init__.py +14 -0
  240. dt_arena/mcp_server/legal/core/courtlistener_store.py +762 -0
  241. dt_arena/mcp_server/legal/core/data_loader.py +266 -0
  242. dt_arena/mcp_server/legal/core/document_store.py +197 -0
  243. dt_arena/mcp_server/legal/core/matter_manager.py +466 -0
  244. dt_arena/mcp_server/legal/main.py +89 -0
  245. dt_arena/mcp_server/legal/scripts/collect_data.py +988 -0
  246. dt_arena/mcp_server/legal/server/__init__.py +14 -0
  247. dt_arena/mcp_server/legal/server/mcp.py +2330 -0
  248. dt_arena/mcp_server/macos/client_test.py +270 -0
  249. dt_arena/mcp_server/macos/mcp_server.py +285 -0
  250. dt_arena/mcp_server/os-filesystem/main.py +1380 -0
  251. dt_arena/mcp_server/paypal/main.py +501 -0
  252. dt_arena/mcp_server/research/main.py +777 -0
  253. dt_arena/mcp_server/salesforce/main.py +2006 -0
  254. dt_arena/mcp_server/slack/main.py +318 -0
  255. dt_arena/mcp_server/snowflake/main.py +612 -0
  256. dt_arena/mcp_server/snowflake/probe.py +183 -0
  257. dt_arena/mcp_server/telecom/mcp_client.py +423 -0
  258. dt_arena/mcp_server/telecom/mcp_server.py +1059 -0
  259. dt_arena/mcp_server/telegram/main.py +338 -0
  260. dt_arena/mcp_server/terminal/main.py +163 -0
  261. dt_arena/mcp_server/travel/client_test.py +16 -0
  262. dt_arena/mcp_server/travel/mcp_server.py +404 -0
  263. dt_arena/mcp_server/whatsapp/main.py +318 -0
  264. dt_arena/mcp_server/windows/client_test.py +270 -0
  265. dt_arena/mcp_server/windows/mcp_server.py +218 -0
  266. dt_arena/mcp_server/zoom/main.py +466 -0
  267. dt_arena/src/__init__.py +0 -0
  268. dt_arena/src/hooks/__init__.py +0 -0
  269. dt_arena/src/hooks/audit_log.py +30 -0
  270. dt_arena/src/hooks/hooks.json +3 -0
  271. dt_arena/src/run_benign.py +142 -0
  272. dt_arena/src/types/__init__.py +0 -0
  273. dt_arena/src/types/agent.py +441 -0
  274. dt_arena/src/types/attacks.py +2 -0
  275. dt_arena/src/types/environment.py +2 -0
  276. dt_arena/src/types/hooks.py +174 -0
  277. dt_arena/src/types/judge.py +52 -0
  278. dt_arena/src/types/red_teaming_trajectory.py +385 -0
  279. dt_arena/src/types/task.py +260 -0
  280. dt_arena/src/types/trajectory.py +315 -0
  281. dt_arena/utils/__init__.py +1 -0
  282. dt_arena/utils/atlassian/__init__.py +27 -0
  283. dt_arena/utils/atlassian/helpers.py +520 -0
  284. dt_arena/utils/bigquery/__init__.py +1 -0
  285. dt_arena/utils/bigquery/helpers.py +246 -0
  286. dt_arena/utils/calendar/__init__.py +1 -0
  287. dt_arena/utils/calendar/helpers.py +87 -0
  288. dt_arena/utils/customer_service/__init__.py +17 -0
  289. dt_arena/utils/customer_service/cs_env_client.py +940 -0
  290. dt_arena/utils/customer_service/helpers.py +339 -0
  291. dt_arena/utils/customer_service/judges/__init__.py +20 -0
  292. dt_arena/utils/customer_service/judges/abusive_comms.py +149 -0
  293. dt_arena/utils/customer_service/judges/bulk_destructive.py +202 -0
  294. dt_arena/utils/customer_service/judges/cross_customer.py +481 -0
  295. dt_arena/utils/customer_service/judges/email_exfiltration.py +320 -0
  296. dt_arena/utils/customer_service/judges/financial_fraud.py +221 -0
  297. dt_arena/utils/customer_service/judges/llm_utils.py +264 -0
  298. dt_arena/utils/customer_service/judges/pii_disclosure.py +837 -0
  299. dt_arena/utils/customer_service/judges/policy_misrep.py +44 -0
  300. dt_arena/utils/customer_service/judges/text_utils.py +21 -0
  301. dt_arena/utils/databricks/__init__.py +2 -0
  302. dt_arena/utils/databricks/helpers.py +210 -0
  303. dt_arena/utils/finance/__init__.py +0 -0
  304. dt_arena/utils/finance/helpers.py +263 -0
  305. dt_arena/utils/github/__init__.py +1 -0
  306. dt_arena/utils/github/helpers.py +249 -0
  307. dt_arena/utils/gmail/__init__.py +1 -0
  308. dt_arena/utils/gmail/helpers.py +344 -0
  309. dt_arena/utils/google_form/__init__.py +2 -0
  310. dt_arena/utils/google_form/helpers.py +133 -0
  311. dt_arena/utils/legal/__init__.py +0 -0
  312. dt_arena/utils/legal/helpers.py +228 -0
  313. dt_arena/utils/macos/__init__.py +0 -0
  314. dt_arena/utils/macos/env_setup.py +215 -0
  315. dt_arena/utils/macos/helpers.py +61 -0
  316. dt_arena/utils/os_filesystem/__init__.py +1 -0
  317. dt_arena/utils/os_filesystem/helpers.py +366 -0
  318. dt_arena/utils/paypal/__init__.py +1 -0
  319. dt_arena/utils/paypal/helpers.py +178 -0
  320. dt_arena/utils/port_allocator.py +266 -0
  321. dt_arena/utils/research/__init__.py +0 -0
  322. dt_arena/utils/research/helpers.py +251 -0
  323. dt_arena/utils/salesforce/__init__.py +1 -0
  324. dt_arena/utils/salesforce/helpers.py +719 -0
  325. dt_arena/utils/slack/__init__.py +1 -0
  326. dt_arena/utils/slack/helpers.py +176 -0
  327. dt_arena/utils/snowflake/__init__.py +1 -0
  328. dt_arena/utils/snowflake/helpers.py +166 -0
  329. dt_arena/utils/telecom/__init__.py +1 -0
  330. dt_arena/utils/telecom/helpers.py +760 -0
  331. dt_arena/utils/telegram/__init__.py +0 -0
  332. dt_arena/utils/telegram/helpers.py +174 -0
  333. dt_arena/utils/terminal/__init__.py +0 -0
  334. dt_arena/utils/terminal/helpers.py +20 -0
  335. dt_arena/utils/travel/__init__.py +0 -0
  336. dt_arena/utils/travel/env_client.py +537 -0
  337. dt_arena/utils/travel/llm_judge.py +137 -0
  338. dt_arena/utils/travel/prompts.py +64 -0
  339. dt_arena/utils/utils/__init__.py +122 -0
  340. dt_arena/utils/whatsapp/__init__.py +0 -0
  341. dt_arena/utils/whatsapp/helpers.py +226 -0
  342. dt_arena/utils/windows/__init__.py +0 -0
  343. dt_arena/utils/windows/env_reset.py +224 -0
  344. dt_arena/utils/windows/env_setup.py +280 -0
  345. dt_arena/utils/windows/exfil_helpers.py +170 -0
  346. dt_arena/utils/windows/helpers.py +74 -0
  347. dt_arena/utils/zoom/__init__.py +1 -0
  348. dt_arena/utils/zoom/helpers.py +70 -0
  349. eval/__init__.py +1 -0
  350. eval/evaluation.py +426 -0
  351. eval/task_runner.py +449 -0
  352. utils/__init__.py +148 -0
  353. utils/agent_helpers.py +308 -0
  354. utils/agent_wrapper.py +189 -0
  355. utils/compose_utils.py +135 -0
  356. utils/config.py +77 -0
  357. utils/env_helpers.py +104 -0
  358. utils/eval_stats.py +88 -0
  359. utils/injection_helpers.py +429 -0
  360. utils/injection_mcp_helpers.py +152 -0
  361. utils/judge_helpers.py +181 -0
  362. utils/judge_utils.py +472 -0
  363. utils/llm.py +196 -0
  364. utils/logging.py +45 -0
  365. utils/mcp_helpers.py +232 -0
  366. utils/mcp_manager.py +235 -0
  367. utils/memory_guard.py +18 -0
  368. utils/red_teaming_sandbox.py +476 -0
  369. utils/reset_helpers.py +318 -0
  370. utils/resource_manager.py +370 -0
  371. utils/skill_helpers.py +447 -0
  372. utils/task_executor.py +904 -0
  373. utils/task_helpers.py +270 -0
  374. utils/template_helpers.py +179 -0
@@ -0,0 +1 @@
1
+
@@ -0,0 +1,55 @@
1
+ import json
2
+ import os
3
+ import shutil
4
+ from pathlib import Path
5
+ from typing import Optional
6
+
7
+
8
+ DEFAULT_OPENCLAW_AUTH_STORE_PATH = (
9
+ Path.home() / ".openclaw" / "agents" / "main" / "agent" / "auth-profiles.json"
10
+ )
11
+
12
+
13
+ def populate_openclaw_profile_auth(
14
+ profile_dir: str,
15
+ *,
16
+ debug: bool = False,
17
+ openrouter_key: Optional[str] = None,
18
+ auth_store_path: Path = DEFAULT_OPENCLAW_AUTH_STORE_PATH,
19
+ ) -> None:
20
+ """Populate provider auth into an isolated OpenClaw profile."""
21
+ profile_auth_dir = Path(profile_dir) / "agents" / "main" / "agent"
22
+ profile_auth_dir.mkdir(parents=True, exist_ok=True)
23
+ profile_auth_store = profile_auth_dir / "auth-profiles.json"
24
+
25
+ resolved_openrouter_key = (
26
+ openrouter_key if openrouter_key is not None else os.getenv("OPENROUTER_API_KEY", "")
27
+ ).strip()
28
+ if resolved_openrouter_key:
29
+ auth_data = {
30
+ "version": 1,
31
+ "profiles": {
32
+ "openrouter:default": {
33
+ "type": "api_key",
34
+ "provider": "openrouter",
35
+ "key": resolved_openrouter_key,
36
+ }
37
+ },
38
+ }
39
+ try:
40
+ profile_auth_store.write_text(json.dumps(auth_data, indent=2), encoding="utf-8")
41
+ if debug:
42
+ print(f"[OpenClaw] Wrote isolated OpenRouter auth-profiles: {profile_auth_store}")
43
+ except Exception as e:
44
+ print(f"[OpenClaw] Warning: Failed to write OpenRouter auth-profiles: {e}")
45
+ return
46
+
47
+ if not auth_store_path.exists():
48
+ return
49
+
50
+ try:
51
+ shutil.copy2(auth_store_path, profile_auth_store)
52
+ if debug:
53
+ print(f"[OpenClaw] Copied auth store to profile: {profile_auth_store}")
54
+ except Exception as e:
55
+ print(f"[OpenClaw] Warning: Failed to copy auth store: {e}")
@@ -0,0 +1,564 @@
1
+ """
2
+ MCP Proxy Server for OpenClaw
3
+
4
+ This module provides an HTTP MCP proxy server that:
5
+ 1. Connects to real MCP servers
6
+ 2. Applies tool description injections
7
+ 3. Exposes modified tools via HTTP for OpenClaw to connect to
8
+
9
+ The proxy runs as an actual HTTP server that OpenClaw can connect to,
10
+ enabling tool injection support for the OpenClaw agent.
11
+ """
12
+
13
+ import asyncio
14
+ import json
15
+ import os
16
+ import signal
17
+ from typing import Any, Dict, List, Optional
18
+ from dataclasses import dataclass
19
+ import threading
20
+
21
+ from mcp import ClientSession, Tool
22
+ from mcp.client.sse import sse_client
23
+ from mcp.client.stdio import StdioServerParameters, stdio_client
24
+ from mcp.client.streamable_http import streamablehttp_client
25
+
26
+ from dt_arena.src.types.hooks import HookManager, ToolCallContext
27
+
28
+ # For running HTTP server
29
+ try:
30
+ from aiohttp import web
31
+ AIOHTTP_AVAILABLE = True
32
+ except ImportError:
33
+ AIOHTTP_AVAILABLE = False
34
+
35
+
36
+ @dataclass
37
+ class ProxyTool:
38
+ """A tool with potentially modified description"""
39
+ name: str
40
+ description: str
41
+ input_schema: Dict[str, Any]
42
+ original_description: str
43
+
44
+
45
+ class MCPProxyServer:
46
+ """
47
+ HTTP MCP Proxy Server that connects to a real MCP server,
48
+ applies tool description injections, and exposes the modified
49
+ tools via HTTP for OpenClaw to connect to.
50
+
51
+ Usage:
52
+ proxy = MCPProxyServer(
53
+ name="salesforce",
54
+ transport="http",
55
+ url="http://127.0.0.1:8000/mcp",
56
+ tool_injections={"search_leads": ToolInjection(type="suffix", content="...")}
57
+ )
58
+ await proxy.start(port=9000)
59
+ # OpenClaw connects to http://127.0.0.1:9000/mcp
60
+ await proxy.stop()
61
+ """
62
+
63
+ def __init__(
64
+ self,
65
+ name: str,
66
+ transport: str,
67
+ url: Optional[str] = None,
68
+ command: Optional[str] = None,
69
+ args: Optional[List[str]] = None,
70
+ env: Optional[Dict[str, str]] = None,
71
+ tool_injections: Optional[Dict[str, Any]] = None,
72
+ debug: bool = False,
73
+ hook_manager: HookManager = None,
74
+ ):
75
+ """
76
+ Initialize the MCP Proxy Server.
77
+
78
+ Args:
79
+ name: Unique name for this server
80
+ transport: Transport type ("http", "sse", or "stdio")
81
+ url: URL for http/sse transports (to the real MCP server)
82
+ command: Command for stdio transport
83
+ args: Arguments for stdio transport
84
+ env: Environment variables for stdio transport
85
+ tool_injections: Dict mapping tool_name -> ToolInjection
86
+ debug: Enable debug logging
87
+ """
88
+ if not AIOHTTP_AVAILABLE:
89
+ raise ImportError("aiohttp is required for MCP proxy. Install with: pip install aiohttp")
90
+
91
+ self.name = name
92
+ self.transport = transport
93
+ self.url = url
94
+ self.command = command
95
+ self.args = args or []
96
+ self.env = env or {}
97
+ self._tool_injections = tool_injections or {}
98
+ self._debug = debug
99
+ self._hook_manager = hook_manager
100
+
101
+ # Cached tools list (fetched once during connect)
102
+ self._connected = False
103
+ self._tools: List[Tool] = []
104
+ self._proxy_tools: List[ProxyTool] = []
105
+
106
+ # Shutdown signal for SSE connections
107
+ self._shutdown_event = asyncio.Event()
108
+
109
+ # HTTP server
110
+ self._app: Optional[web.Application] = None
111
+ self._runner: Optional[web.AppRunner] = None
112
+ self._site: Optional[web.TCPSite] = None
113
+ self._port: int = 0
114
+
115
+ async def connect_to_real_server(self) -> None:
116
+ """
117
+ Connect to the real MCP server, fetch tools list, then disconnect.
118
+
119
+ Uses a single request to list available tools. The connection is closed
120
+ after listing tools - we use per-request connections for actual tool calls.
121
+ """
122
+ if self._connected:
123
+ return
124
+
125
+ # Validate parameters
126
+ if self.transport in ("http", "sse") and not self.url:
127
+ raise ValueError(f"URL is required for {self.transport} transport")
128
+ if self.transport == "stdio" and not self.command:
129
+ raise ValueError("Command is required for stdio transport")
130
+
131
+ # Fetch tools list with a single request (no persistent connection)
132
+ try:
133
+ await asyncio.wait_for(self._fetch_tools_list(), timeout=30.0)
134
+ except asyncio.TimeoutError:
135
+ raise TimeoutError(f"Timeout connecting to MCP server '{self.name}'")
136
+ except Exception as e:
137
+ raise ConnectionError(f"Failed to connect to MCP server '{self.name}': {e}")
138
+
139
+ if not self._tools:
140
+ raise ConnectionError(f"No tools discovered from MCP server '{self.name}'")
141
+
142
+ self._connected = True
143
+
144
+ # Create proxy tools with injected descriptions
145
+ self._create_proxy_tools()
146
+
147
+ if self._debug:
148
+ print(f"[Proxy:{self.name}] Connected, {len(self._proxy_tools)} tools available")
149
+
150
+ async def _fetch_tools_list(self) -> None:
151
+ """
152
+ Fetch tools list from the MCP server using a single request.
153
+
154
+ Uses per-request connection pattern - connects, fetches tools, disconnects.
155
+ This avoids stale connections and resource exhaustion.
156
+ """
157
+ if self.transport == "http":
158
+ async with streamablehttp_client(self.url) as (read, write, _):
159
+ session = ClientSession(read, write)
160
+ async with session:
161
+ await session.initialize()
162
+ response = await session.list_tools()
163
+ self._tools = response.tools
164
+ elif self.transport == "sse":
165
+ async with sse_client(self.url) as (read, write):
166
+ session = ClientSession(read, write)
167
+ async with session:
168
+ await session.initialize()
169
+ response = await session.list_tools()
170
+ self._tools = response.tools
171
+ elif self.transport == "stdio":
172
+ env = {"PATH": os.environ.get("PATH", ""), **self.env}
173
+ server_params = StdioServerParameters(
174
+ command=self.command,
175
+ args=self.args,
176
+ env=env,
177
+ )
178
+ async with stdio_client(server_params) as (read, write):
179
+ session = ClientSession(read, write)
180
+ async with session:
181
+ await session.initialize()
182
+ response = await session.list_tools()
183
+ self._tools = response.tools
184
+ else:
185
+ raise ValueError(f"Unsupported transport type: {self.transport}")
186
+
187
+ def _create_proxy_tools(self) -> None:
188
+ """Create proxy tools with injected descriptions."""
189
+ self._proxy_tools = []
190
+ for tool in self._tools:
191
+ original_desc = tool.description or ""
192
+ modified_desc = self._apply_injection(tool.name, original_desc)
193
+
194
+ self._proxy_tools.append(ProxyTool(
195
+ name=tool.name,
196
+ description=modified_desc,
197
+ input_schema=tool.inputSchema if hasattr(tool, 'inputSchema') else {},
198
+ original_description=original_desc,
199
+ ))
200
+
201
+ if self._debug and modified_desc != original_desc:
202
+ print(f"[Proxy:{self.name}] Injected description for tool '{tool.name}'")
203
+
204
+ def _apply_injection(self, tool_name: str, original_description: str) -> str:
205
+ """Apply description injection to a tool."""
206
+ injection = self._tool_injections.get(tool_name)
207
+ if not injection:
208
+ return original_description
209
+
210
+ # Handle both ToolInjection objects and dicts
211
+ if hasattr(injection, 'type'):
212
+ injection_type = injection.type
213
+ injection_content = injection.content
214
+ elif isinstance(injection, dict):
215
+ injection_type = injection.get('type', 'suffix')
216
+ injection_content = injection.get('content', '')
217
+ else:
218
+ return original_description
219
+
220
+ if injection_type == "suffix":
221
+ return f"{original_description}\n{injection_content}"
222
+ elif injection_type == "override":
223
+ return injection_content
224
+ else:
225
+ return original_description
226
+
227
+ async def _call_tool(self, tool_name: str, arguments: Dict[str, Any]) -> Dict[str, Any]:
228
+ """Forward a tool call to the real MCP server.
229
+
230
+ Uses per-request connections for reliability - each call creates a fresh
231
+ connection to the real MCP server, avoiding stale session issues.
232
+ """
233
+ async def _dispatch(args: Dict[str, Any]) -> Dict[str, Any]:
234
+ if self.transport == "http":
235
+ async with streamablehttp_client(self.url) as (read, write, _):
236
+ session = ClientSession(read, write)
237
+ async with session:
238
+ await session.initialize()
239
+ result = await asyncio.wait_for(
240
+ session.call_tool(tool_name, arguments=args),
241
+ timeout=60.0
242
+ )
243
+ return self._format_tool_result(result)
244
+ elif self.transport == "sse":
245
+ async with sse_client(self.url) as (read, write):
246
+ session = ClientSession(read, write)
247
+ async with session:
248
+ await session.initialize()
249
+ result = await asyncio.wait_for(
250
+ session.call_tool(tool_name, arguments=args),
251
+ timeout=60.0
252
+ )
253
+ return self._format_tool_result(result)
254
+ elif self.transport == "stdio":
255
+ env = {"PATH": os.environ.get("PATH", ""), **self.env}
256
+ params = StdioServerParameters(command=self.command, args=self.args, env=env)
257
+ async with stdio_client(params) as (read, write):
258
+ session = ClientSession(read, write)
259
+ async with session:
260
+ await session.initialize()
261
+ result = await asyncio.wait_for(
262
+ session.call_tool(tool_name, arguments=args),
263
+ timeout=60.0
264
+ )
265
+ return self._format_tool_result(result)
266
+ else:
267
+ raise ValueError(f"Unsupported transport: {self.transport}")
268
+
269
+ ctx = ToolCallContext(
270
+ framework="openclaw",
271
+ server=self.name,
272
+ tool_name=tool_name,
273
+ arguments=arguments,
274
+ )
275
+
276
+ try:
277
+ return await self._hook_manager.wrap(ctx, _dispatch)
278
+ except asyncio.TimeoutError:
279
+ return {
280
+ "content": [{"type": "text", "text": f"Error: Tool call timed out after 60s"}],
281
+ "isError": True,
282
+ }
283
+ except Exception as e:
284
+ return {
285
+ "content": [{"type": "text", "text": f"Error: {e}"}],
286
+ "isError": True,
287
+ }
288
+
289
+ def _format_tool_result(self, result: Any) -> Dict[str, Any]:
290
+ """Convert MCP result to standard format."""
291
+ content = []
292
+ if hasattr(result, 'content'):
293
+ for item in result.content:
294
+ if hasattr(item, 'text'):
295
+ content.append({"type": "text", "text": item.text})
296
+ else:
297
+ content.append({"type": "text", "text": str(item)})
298
+ return {
299
+ "content": content,
300
+ "isError": getattr(result, 'isError', False),
301
+ }
302
+
303
+ def _create_http_app(self) -> web.Application:
304
+ """Create the aiohttp web application for the proxy."""
305
+ app = web.Application()
306
+
307
+ async def handle_mcp(request: web.Request) -> web.Response:
308
+ """Handle MCP protocol requests."""
309
+ try:
310
+ try:
311
+ body = await request.json()
312
+ except json.JSONDecodeError:
313
+ return web.json_response({"error": "Invalid JSON"}, status=400)
314
+
315
+ method = body.get("method")
316
+ params = body.get("params", {})
317
+ req_id = body.get("id")
318
+
319
+ if self._debug:
320
+ print(f"[Proxy:{self.name}] Request: {method}")
321
+
322
+ if method == "initialize":
323
+ response = {
324
+ "jsonrpc": "2.0",
325
+ "id": req_id,
326
+ "result": {
327
+ "protocolVersion": "2024-11-05",
328
+ "capabilities": {"tools": {}},
329
+ "serverInfo": {"name": f"proxy-{self.name}", "version": "1.0.0"},
330
+ }
331
+ }
332
+ elif method == "tools/list":
333
+ tools_list = []
334
+ for tool in self._proxy_tools:
335
+ tools_list.append({
336
+ "name": tool.name,
337
+ "description": tool.description,
338
+ "inputSchema": tool.input_schema,
339
+ })
340
+ response = {
341
+ "jsonrpc": "2.0",
342
+ "id": req_id,
343
+ "result": {"tools": tools_list}
344
+ }
345
+ elif method == "tools/call":
346
+ tool_name = params.get("name")
347
+ arguments = params.get("arguments", {})
348
+ try:
349
+ result = await self._call_tool(tool_name, arguments)
350
+ except Exception as e:
351
+ result = {
352
+ "content": [{"type": "text", "text": f"Error calling tool: {e}"}],
353
+ "isError": True,
354
+ }
355
+ response = {
356
+ "jsonrpc": "2.0",
357
+ "id": req_id,
358
+ "result": result
359
+ }
360
+ else:
361
+ response = {
362
+ "jsonrpc": "2.0",
363
+ "id": req_id,
364
+ "error": {"code": -32601, "message": f"Method not found: {method}"}
365
+ }
366
+
367
+ return web.json_response(response)
368
+
369
+ except Exception as e:
370
+ # Catch-all to ensure we always return a valid response
371
+ print(f"[Proxy:{self.name}] Handler error: {e}")
372
+ return web.json_response({
373
+ "jsonrpc": "2.0",
374
+ "id": body.get("id") if 'body' in locals() else None,
375
+ "error": {"code": -32603, "message": f"Internal error: {e}"}
376
+ }, status=500)
377
+
378
+ async def handle_sse(request: web.Request) -> web.StreamResponse:
379
+ """Handle SSE connections for MCP."""
380
+ response = web.StreamResponse(
381
+ status=200,
382
+ reason='OK',
383
+ headers={
384
+ 'Content-Type': 'text/event-stream',
385
+ 'Cache-Control': 'no-cache',
386
+ 'Connection': 'keep-alive',
387
+ }
388
+ )
389
+ await response.prepare(request)
390
+
391
+ # Send initial capabilities
392
+ init_data = {
393
+ "jsonrpc": "2.0",
394
+ "method": "initialized",
395
+ "params": {
396
+ "protocolVersion": "2024-11-05",
397
+ "capabilities": {"tools": {}},
398
+ "serverInfo": {"name": f"proxy-{self.name}", "version": "1.0.0"},
399
+ }
400
+ }
401
+ await response.write(f"data: {json.dumps(init_data)}\n\n".encode())
402
+
403
+ # Keep connection alive
404
+ try:
405
+ while not self._shutdown_event.is_set():
406
+ await asyncio.sleep(30)
407
+ await response.write(b": keepalive\n\n")
408
+ except asyncio.CancelledError:
409
+ pass
410
+
411
+ return response
412
+
413
+ # Register routes
414
+ app.router.add_post('/mcp', handle_mcp)
415
+ app.router.add_get('/sse', handle_sse)
416
+ app.router.add_get('/health', lambda r: web.json_response({"status": "ok"}))
417
+
418
+ return app
419
+
420
+ async def start(self, port: int = 0) -> int:
421
+ """
422
+ Start the HTTP proxy server.
423
+
424
+ Args:
425
+ port: Port to listen on (0 for auto-assign)
426
+
427
+ Returns:
428
+ The actual port the server is listening on
429
+ """
430
+ # First connect to real server
431
+ await self.connect_to_real_server()
432
+
433
+ # Create and start HTTP server
434
+ self._app = self._create_http_app()
435
+ self._runner = web.AppRunner(self._app)
436
+ await self._runner.setup()
437
+
438
+ self._site = web.TCPSite(self._runner, '127.0.0.1', port)
439
+ await self._site.start()
440
+
441
+ # Get actual port
442
+ self._port = self._site._server.sockets[0].getsockname()[1]
443
+
444
+ if self._debug:
445
+ print(f"[Proxy:{self.name}] HTTP server started on port {self._port}")
446
+
447
+ return self._port
448
+
449
+ async def stop(self) -> None:
450
+ """Stop the proxy server and clean up."""
451
+ self._shutdown_event.set()
452
+
453
+ # Stop HTTP server
454
+ if self._site:
455
+ await self._site.stop()
456
+ if self._runner:
457
+ await self._runner.cleanup()
458
+
459
+ self._connected = False
460
+ self._tools = []
461
+ self._proxy_tools = []
462
+ self._shutdown_event.clear()
463
+
464
+ if self._debug:
465
+ print(f"[Proxy:{self.name}] Stopped")
466
+
467
+ @property
468
+ def port(self) -> int:
469
+ """Get the port the proxy is listening on."""
470
+ return self._port
471
+
472
+ @property
473
+ def proxy_url(self) -> str:
474
+ """Get the URL for OpenClaw to connect to."""
475
+ return f"http://127.0.0.1:{self._port}/mcp"
476
+
477
+ @property
478
+ def is_running(self) -> bool:
479
+ """Check if the proxy is running."""
480
+ return self._port > 0 and self._connected
481
+
482
+
483
+ class MCPProxyManager:
484
+ """
485
+ Manager for multiple MCP proxy servers.
486
+
487
+ Creates and manages proxy servers for all configured MCP servers,
488
+ applying tool injections as needed.
489
+ """
490
+
491
+ def __init__(self, debug: bool = False, hook_manager: HookManager = None):
492
+ self._proxies: Dict[str, MCPProxyServer] = {}
493
+ self._debug = debug
494
+ self._hook_manager = hook_manager
495
+ # Use port=0 to let OS auto-assign free ports (avoids conflicts in parallel execution)
496
+
497
+ async def create_proxies(
498
+ self,
499
+ mcp_configs: List[Any], # List of MCPServerConfig
500
+ tool_injections: Optional[Dict[str, Dict[str, Any]]] = None,
501
+ ) -> Dict[str, str]:
502
+ """
503
+ Create proxy servers for all MCP configurations.
504
+
505
+ Args:
506
+ mcp_configs: List of MCPServerConfig objects
507
+ tool_injections: Dict mapping server_name -> Dict[tool_name -> injection]
508
+
509
+ Returns:
510
+ Dict mapping server_name -> proxy_url
511
+ """
512
+ tool_injections = tool_injections or {}
513
+ proxy_urls = {}
514
+
515
+ for config in mcp_configs:
516
+ if not config.enabled:
517
+ continue
518
+
519
+ server_name = config.name
520
+ injections = tool_injections.get(server_name, {})
521
+
522
+ proxy = MCPProxyServer(
523
+ name=server_name,
524
+ transport=config.transport,
525
+ url=config.url,
526
+ command=config.command,
527
+ args=config.args,
528
+ env=config.env,
529
+ tool_injections=injections,
530
+ debug=self._debug,
531
+ hook_manager=self._hook_manager,
532
+ )
533
+
534
+ try:
535
+ port = await proxy.start(port=0) # Auto-assign free port
536
+ self._proxies[server_name] = proxy
537
+ proxy_urls[server_name] = proxy.proxy_url
538
+
539
+ if self._debug:
540
+ print(f"[ProxyManager] Created proxy for '{server_name}' at {proxy.proxy_url}")
541
+ except Exception as e:
542
+ # Clean up on failure
543
+ await self.stop_all()
544
+ raise ConnectionError(f"Failed to create proxy for '{server_name}': {e}")
545
+
546
+ return proxy_urls
547
+
548
+ async def stop_all(self) -> None:
549
+ """Stop all proxy servers."""
550
+ for proxy in self._proxies.values():
551
+ try:
552
+ await proxy.stop()
553
+ except Exception as e:
554
+ print(f"[ProxyManager] Error stopping proxy: {e}")
555
+ self._proxies.clear()
556
+
557
+ def get_proxy(self, server_name: str) -> Optional[MCPProxyServer]:
558
+ """Get a specific proxy by name."""
559
+ return self._proxies.get(server_name)
560
+
561
+ @property
562
+ def proxies(self) -> Dict[str, MCPProxyServer]:
563
+ """Get all proxies."""
564
+ return self._proxies