decodingtrust-agent-sdk 0.1.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (374) hide show
  1. agent/__init__.py +30 -0
  2. agent/claudesdk/__init__.py +8 -0
  3. agent/claudesdk/example.py +221 -0
  4. agent/claudesdk/src/__init__.py +8 -0
  5. agent/claudesdk/src/agent.py +400 -0
  6. agent/claudesdk/src/mcp_proxy.py +409 -0
  7. agent/claudesdk/src/utils.py +420 -0
  8. agent/googleadk/__init__.py +15 -0
  9. agent/googleadk/example.py +237 -0
  10. agent/googleadk/src/__init__.py +12 -0
  11. agent/googleadk/src/agent.py +401 -0
  12. agent/googleadk/src/mcp_wrapper.py +163 -0
  13. agent/googleadk/src/utils.py +602 -0
  14. agent/langchain/__init__.py +8 -0
  15. agent/langchain/example.py +213 -0
  16. agent/langchain/src/__init__.py +8 -0
  17. agent/langchain/src/agent.py +645 -0
  18. agent/langchain/src/utils.py +433 -0
  19. agent/openaisdk/__init__.py +17 -0
  20. agent/openaisdk/example.py +228 -0
  21. agent/openaisdk/src/__init__.py +12 -0
  22. agent/openaisdk/src/agent.py +491 -0
  23. agent/openaisdk/src/agent_wrapper.py +143 -0
  24. agent/openaisdk/src/mcp_wrapper.py +395 -0
  25. agent/openaisdk/src/utils.py +493 -0
  26. agent/openclaw/__init__.py +10 -0
  27. agent/openclaw/example.py +251 -0
  28. agent/openclaw/src/__init__.py +14 -0
  29. agent/openclaw/src/agent.py +930 -0
  30. agent/openclaw/src/helpers/__init__.py +1 -0
  31. agent/openclaw/src/helpers/auth_helpers.py +55 -0
  32. agent/openclaw/src/mcp_proxy.py +564 -0
  33. agent/openclaw/src/plugin_generator.py +231 -0
  34. agent/openclaw/src/utils.py +341 -0
  35. agent/pocketflow/__init__.py +18 -0
  36. agent/pocketflow/example.py +221 -0
  37. agent/pocketflow/prompts/react_agent.py +46 -0
  38. agent/pocketflow/src/__init__.py +6 -0
  39. agent/pocketflow/src/agent.py +507 -0
  40. agent/pocketflow/src/agent_wrapper.py +159 -0
  41. agent/pocketflow/src/async_helper.py +92 -0
  42. agent/pocketflow/src/mcp_react_agent.py +279 -0
  43. agent/pocketflow/src/native_agent.py +74 -0
  44. agent/pocketflow/src/nodes.py +467 -0
  45. benchmark/__init__.py +0 -0
  46. benchmark/browser/benign.jsonl +34 -0
  47. benchmark/browser/direct.jsonl +85 -0
  48. benchmark/browser/indirect.jsonl +82 -0
  49. benchmark/code/benign.jsonl +0 -0
  50. benchmark/code/direct.jsonl +121 -0
  51. benchmark/code/indirect.jsonl +165 -0
  52. benchmark/crm/benign.jsonl +165 -0
  53. benchmark/crm/direct.jsonl +90 -0
  54. benchmark/crm/indirect.jsonl +150 -0
  55. benchmark/customer-service/benign.jsonl +160 -0
  56. benchmark/customer-service/direct.jsonl +100 -0
  57. benchmark/customer-service/indirect.jsonl +101 -0
  58. benchmark/finance/benign.jsonl +0 -0
  59. benchmark/finance/direct.jsonl +200 -0
  60. benchmark/finance/indirect.jsonl +200 -0
  61. benchmark/legal/benign.jsonl +0 -0
  62. benchmark/legal/direct.jsonl +200 -0
  63. benchmark/legal/indirect.jsonl +200 -0
  64. benchmark/macos/benign.jsonl +30 -0
  65. benchmark/macos/direct.jsonl +50 -0
  66. benchmark/macos/indirect.jsonl +50 -0
  67. benchmark/medical/benign.jsonl +642 -0
  68. benchmark/medical/direct.jsonl +229 -0
  69. benchmark/medical/indirect.jsonl +222 -0
  70. benchmark/os-filesystem/benign.jsonl +200 -0
  71. benchmark/os-filesystem/direct.jsonl +200 -0
  72. benchmark/os-filesystem/indirect.jsonl +200 -0
  73. benchmark/research/benign.jsonl +0 -0
  74. benchmark/research/direct.jsonl +119 -0
  75. benchmark/research/indirect.jsonl +125 -0
  76. benchmark/telecom/benign.jsonl +120 -0
  77. benchmark/telecom/direct.jsonl +161 -0
  78. benchmark/telecom/indirect.jsonl +166 -0
  79. benchmark/travel/benign.jsonl +130 -0
  80. benchmark/travel/direct.jsonl +105 -0
  81. benchmark/travel/indirect.jsonl +120 -0
  82. benchmark/windows/benign.jsonl +100 -0
  83. benchmark/windows/direct.jsonl +140 -0
  84. benchmark/windows/indirect.jsonl +107 -0
  85. benchmark/workflow/benign.jsonl +335 -0
  86. benchmark/workflow/direct.jsonl +78 -0
  87. benchmark/workflow/indirect.jsonl +107 -0
  88. cli/__init__.py +5 -0
  89. cli/main.py +182 -0
  90. cli/scaffold.py +334 -0
  91. decodingtrust_agent_sdk-0.1.0.dist-info/METADATA +642 -0
  92. decodingtrust_agent_sdk-0.1.0.dist-info/RECORD +374 -0
  93. decodingtrust_agent_sdk-0.1.0.dist-info/WHEEL +5 -0
  94. decodingtrust_agent_sdk-0.1.0.dist-info/entry_points.txt +2 -0
  95. decodingtrust_agent_sdk-0.1.0.dist-info/licenses/LICENSE +201 -0
  96. decodingtrust_agent_sdk-0.1.0.dist-info/top_level.txt +6 -0
  97. dt_arena/config/env.yaml +515 -0
  98. dt_arena/config/injection_mcp.yaml +430 -0
  99. dt_arena/config/mcp.yaml +642 -0
  100. dt_arena/envs/arxiv/docker-compose-hub.yml +31 -0
  101. dt_arena/envs/arxiv/docker-compose.yml +36 -0
  102. dt_arena/envs/atlassian/docker/docker-compose.dev.yml +65 -0
  103. dt_arena/envs/atlassian/docker/docker-compose.yml +53 -0
  104. dt_arena/envs/atlassian/docker-compose-hub.yml +57 -0
  105. dt_arena/envs/atlassian/docker-compose.yml +72 -0
  106. dt_arena/envs/bigquery/docker-compose.yml +20 -0
  107. dt_arena/envs/booking/docker-compose.yml +59 -0
  108. dt_arena/envs/calendar/docker-compose-hub.yml +30 -0
  109. dt_arena/envs/calendar/docker-compose.yml +42 -0
  110. dt_arena/envs/custom-website/docker-compose.yml +6 -0
  111. dt_arena/envs/customer_service/docker-compose.yml +59 -0
  112. dt_arena/envs/databricks/docker-compose-hub.yml +47 -0
  113. dt_arena/envs/databricks/docker-compose.yml +51 -0
  114. dt_arena/envs/ecommerce/docker-compose.yml +6 -0
  115. dt_arena/envs/ers/docker-compose.yml +36 -0
  116. dt_arena/envs/ers/hrms/docker/docker-compose.yml +31 -0
  117. dt_arena/envs/finance/docker-compose.yml +23 -0
  118. dt_arena/envs/github/docker/docker-compose-hub.yml +50 -0
  119. dt_arena/envs/github/docker/docker-compose.yml +50 -0
  120. dt_arena/envs/gmail/docker-compose-hub.yml +51 -0
  121. dt_arena/envs/gmail/docker-compose.yml +65 -0
  122. dt_arena/envs/google-form/docker-compose-hub.yml +33 -0
  123. dt_arena/envs/google-form/docker-compose.yml +41 -0
  124. dt_arena/envs/googledocs/docker-compose-hub.yml +61 -0
  125. dt_arena/envs/googledocs/docker-compose.yml +78 -0
  126. dt_arena/envs/hospital/docker-compose-hub.yml +25 -0
  127. dt_arena/envs/hospital/docker-compose.yml +27 -0
  128. dt_arena/envs/legal/docker-compose.yml +22 -0
  129. dt_arena/envs/linkedin/docker-compose.yml +63 -0
  130. dt_arena/envs/macos/docker-compose.yml +79 -0
  131. dt_arena/envs/os-filesystem/docker-compose-hub.yml +16 -0
  132. dt_arena/envs/os-filesystem/docker-compose.yml +20 -0
  133. dt_arena/envs/paypal/docker-compose-hub.yml +48 -0
  134. dt_arena/envs/paypal/docker-compose.yml +63 -0
  135. dt_arena/envs/research/docker-compose-hub.yml +13 -0
  136. dt_arena/envs/research/docker-compose.yml +24 -0
  137. dt_arena/envs/salesforce_crm/docker-compose-hub.yaml +45 -0
  138. dt_arena/envs/salesforce_crm/docker-compose.yaml +49 -0
  139. dt_arena/envs/slack/docker-compose-hub.yml +28 -0
  140. dt_arena/envs/slack/docker-compose.yml +41 -0
  141. dt_arena/envs/snowflake/docker-compose-hub.yml +41 -0
  142. dt_arena/envs/snowflake/docker-compose.yml +44 -0
  143. dt_arena/envs/telecom/docker-compose-hub.yml +16 -0
  144. dt_arena/envs/telecom/docker-compose.yml +17 -0
  145. dt_arena/envs/telegram/docker-compose-hub.yml +57 -0
  146. dt_arena/envs/telegram/docker-compose.yml +62 -0
  147. dt_arena/envs/terminal/docker-compose-hub.yml +12 -0
  148. dt_arena/envs/terminal/docker-compose.yml +26 -0
  149. dt_arena/envs/travel/docker-compose-hub.yml +19 -0
  150. dt_arena/envs/travel/docker-compose.yml +19 -0
  151. dt_arena/envs/whatsapp/docker-compose-hub.yml +61 -0
  152. dt_arena/envs/whatsapp/docker-compose.yml +78 -0
  153. dt_arena/envs/windows/docker-compose.yml +71 -0
  154. dt_arena/envs/zoom/docker-compose-hub.yml +27 -0
  155. dt_arena/envs/zoom/docker-compose.yml +40 -0
  156. dt_arena/injection_mcp_server/atlassian/env_injection.py +134 -0
  157. dt_arena/injection_mcp_server/calendar/env_injection.py +217 -0
  158. dt_arena/injection_mcp_server/custom_website/env_injection.py +97 -0
  159. dt_arena/injection_mcp_server/customer_service/env_injection.py +659 -0
  160. dt_arena/injection_mcp_server/databricks/env_injection.py +255 -0
  161. dt_arena/injection_mcp_server/ecommerce/env_injection.py +110 -0
  162. dt_arena/injection_mcp_server/finance/env_injection.py +85 -0
  163. dt_arena/injection_mcp_server/github/env_injection.py +206 -0
  164. dt_arena/injection_mcp_server/gmail/env_injection.py +211 -0
  165. dt_arena/injection_mcp_server/google_form/env_injection.py +186 -0
  166. dt_arena/injection_mcp_server/googledocs/env_injection.py +44 -0
  167. dt_arena/injection_mcp_server/hospital/env_injection.py +43 -0
  168. dt_arena/injection_mcp_server/legal/env_injection.py +229 -0
  169. dt_arena/injection_mcp_server/macos/env_injection.py +272 -0
  170. dt_arena/injection_mcp_server/os-filesystem/env_injection.py +341 -0
  171. dt_arena/injection_mcp_server/paypal/env_injection.py +268 -0
  172. dt_arena/injection_mcp_server/research/env_injection.py +616 -0
  173. dt_arena/injection_mcp_server/salesforce/env_injection.py +514 -0
  174. dt_arena/injection_mcp_server/slack/env_injection.py +265 -0
  175. dt_arena/injection_mcp_server/snowflake/env_injection.py +230 -0
  176. dt_arena/injection_mcp_server/telecom/env_injection.py +503 -0
  177. dt_arena/injection_mcp_server/telegram/env_injection.py +171 -0
  178. dt_arena/injection_mcp_server/terminal/env_injection.py +523 -0
  179. dt_arena/injection_mcp_server/travel/env_injection.py +173 -0
  180. dt_arena/injection_mcp_server/whatsapp/env_injection.py +185 -0
  181. dt_arena/injection_mcp_server/windows/env_injection.py +943 -0
  182. dt_arena/injection_mcp_server/zoom/env_injection.py +216 -0
  183. dt_arena/mcp_server/atlassian/main.py +1554 -0
  184. dt_arena/mcp_server/atlassian/test_server.py +66 -0
  185. dt_arena/mcp_server/bigquery/main.py +333 -0
  186. dt_arena/mcp_server/booking/main.py +310 -0
  187. dt_arena/mcp_server/browser/main.py +1741 -0
  188. dt_arena/mcp_server/calendar/example_multi_user.py +162 -0
  189. dt_arena/mcp_server/calendar/main.py +792 -0
  190. dt_arena/mcp_server/calendar/test_mcp.py +135 -0
  191. dt_arena/mcp_server/customer_service/main.py +1063 -0
  192. dt_arena/mcp_server/databricks/main.py +566 -0
  193. dt_arena/mcp_server/databricks/probe.py +102 -0
  194. dt_arena/mcp_server/ers/main.py +845 -0
  195. dt_arena/mcp_server/finance/__init__.py +87 -0
  196. dt_arena/mcp_server/finance/core/__init__.py +12 -0
  197. dt_arena/mcp_server/finance/core/data_loader.py +558 -0
  198. dt_arena/mcp_server/finance/core/portfolio.py +565 -0
  199. dt_arena/mcp_server/finance/evaluation/__init__.py +20 -0
  200. dt_arena/mcp_server/finance/evaluation/evaluator.py +217 -0
  201. dt_arena/mcp_server/finance/evaluation/logger.py +137 -0
  202. dt_arena/mcp_server/finance/injection/__init__.py +66 -0
  203. dt_arena/mcp_server/finance/injection/config.py +176 -0
  204. dt_arena/mcp_server/finance/injection/content.py +755 -0
  205. dt_arena/mcp_server/finance/injection/html.py +409 -0
  206. dt_arena/mcp_server/finance/injection/locations.py +167 -0
  207. dt_arena/mcp_server/finance/injection/methods.py +193 -0
  208. dt_arena/mcp_server/finance/injection/presets.py +1023 -0
  209. dt_arena/mcp_server/finance/main.py +361 -0
  210. dt_arena/mcp_server/finance/run_mcp.py +21 -0
  211. dt_arena/mcp_server/finance/run_web.py +26 -0
  212. dt_arena/mcp_server/finance/server/__init__.py +41 -0
  213. dt_arena/mcp_server/finance/server/extractor.py +1453 -0
  214. dt_arena/mcp_server/finance/server/extractor_minimal.py +292 -0
  215. dt_arena/mcp_server/finance/server/extractor_simple.py +1164 -0
  216. dt_arena/mcp_server/finance/server/injection_mcp.py +865 -0
  217. dt_arena/mcp_server/finance/server/mcp.py +451 -0
  218. dt_arena/mcp_server/finance/server/tools/__init__.py +23 -0
  219. dt_arena/mcp_server/finance/server/tools/account.py +88 -0
  220. dt_arena/mcp_server/finance/server/tools/browsing.py +328 -0
  221. dt_arena/mcp_server/finance/server/tools/social.py +73 -0
  222. dt_arena/mcp_server/finance/server/tools/trading.py +242 -0
  223. dt_arena/mcp_server/finance/server/tools/utility.py +49 -0
  224. dt_arena/mcp_server/finance/server/web.py +2139 -0
  225. dt_arena/mcp_server/finance/tasks/benchmark/__init__.py +28 -0
  226. dt_arena/mcp_server/finance/tasks/benchmark/attack_pool.py +3026 -0
  227. dt_arena/mcp_server/finance/tasks/benchmark/attack_runner.py +1315 -0
  228. dt_arena/mcp_server/finance/tasks/benchmark/finra_requirements.py +1335 -0
  229. dt_arena/mcp_server/finance/tasks/benchmark/finra_tasks.py +3665 -0
  230. dt_arena/mcp_server/finance/tasks/benchmark/malicious_tasks.py +2673 -0
  231. dt_arena/mcp_server/finance/tasks/redteam_suite/run_redteam_suite.py +1713 -0
  232. dt_arena/mcp_server/finance/test_mcp_tools.py +476 -0
  233. dt_arena/mcp_server/github/main.py +441 -0
  234. dt_arena/mcp_server/gmail/main.py +1004 -0
  235. dt_arena/mcp_server/google_form/main.py +141 -0
  236. dt_arena/mcp_server/googledocs/main.py +458 -0
  237. dt_arena/mcp_server/hospital/mcp_server.py +458 -0
  238. dt_arena/mcp_server/legal/__init__.py +9 -0
  239. dt_arena/mcp_server/legal/core/__init__.py +14 -0
  240. dt_arena/mcp_server/legal/core/courtlistener_store.py +762 -0
  241. dt_arena/mcp_server/legal/core/data_loader.py +266 -0
  242. dt_arena/mcp_server/legal/core/document_store.py +197 -0
  243. dt_arena/mcp_server/legal/core/matter_manager.py +466 -0
  244. dt_arena/mcp_server/legal/main.py +89 -0
  245. dt_arena/mcp_server/legal/scripts/collect_data.py +988 -0
  246. dt_arena/mcp_server/legal/server/__init__.py +14 -0
  247. dt_arena/mcp_server/legal/server/mcp.py +2330 -0
  248. dt_arena/mcp_server/macos/client_test.py +270 -0
  249. dt_arena/mcp_server/macos/mcp_server.py +285 -0
  250. dt_arena/mcp_server/os-filesystem/main.py +1380 -0
  251. dt_arena/mcp_server/paypal/main.py +501 -0
  252. dt_arena/mcp_server/research/main.py +777 -0
  253. dt_arena/mcp_server/salesforce/main.py +2006 -0
  254. dt_arena/mcp_server/slack/main.py +318 -0
  255. dt_arena/mcp_server/snowflake/main.py +612 -0
  256. dt_arena/mcp_server/snowflake/probe.py +183 -0
  257. dt_arena/mcp_server/telecom/mcp_client.py +423 -0
  258. dt_arena/mcp_server/telecom/mcp_server.py +1059 -0
  259. dt_arena/mcp_server/telegram/main.py +338 -0
  260. dt_arena/mcp_server/terminal/main.py +163 -0
  261. dt_arena/mcp_server/travel/client_test.py +16 -0
  262. dt_arena/mcp_server/travel/mcp_server.py +404 -0
  263. dt_arena/mcp_server/whatsapp/main.py +318 -0
  264. dt_arena/mcp_server/windows/client_test.py +270 -0
  265. dt_arena/mcp_server/windows/mcp_server.py +218 -0
  266. dt_arena/mcp_server/zoom/main.py +466 -0
  267. dt_arena/src/__init__.py +0 -0
  268. dt_arena/src/hooks/__init__.py +0 -0
  269. dt_arena/src/hooks/audit_log.py +30 -0
  270. dt_arena/src/hooks/hooks.json +3 -0
  271. dt_arena/src/run_benign.py +142 -0
  272. dt_arena/src/types/__init__.py +0 -0
  273. dt_arena/src/types/agent.py +441 -0
  274. dt_arena/src/types/attacks.py +2 -0
  275. dt_arena/src/types/environment.py +2 -0
  276. dt_arena/src/types/hooks.py +174 -0
  277. dt_arena/src/types/judge.py +52 -0
  278. dt_arena/src/types/red_teaming_trajectory.py +385 -0
  279. dt_arena/src/types/task.py +260 -0
  280. dt_arena/src/types/trajectory.py +315 -0
  281. dt_arena/utils/__init__.py +1 -0
  282. dt_arena/utils/atlassian/__init__.py +27 -0
  283. dt_arena/utils/atlassian/helpers.py +520 -0
  284. dt_arena/utils/bigquery/__init__.py +1 -0
  285. dt_arena/utils/bigquery/helpers.py +246 -0
  286. dt_arena/utils/calendar/__init__.py +1 -0
  287. dt_arena/utils/calendar/helpers.py +87 -0
  288. dt_arena/utils/customer_service/__init__.py +17 -0
  289. dt_arena/utils/customer_service/cs_env_client.py +940 -0
  290. dt_arena/utils/customer_service/helpers.py +339 -0
  291. dt_arena/utils/customer_service/judges/__init__.py +20 -0
  292. dt_arena/utils/customer_service/judges/abusive_comms.py +149 -0
  293. dt_arena/utils/customer_service/judges/bulk_destructive.py +202 -0
  294. dt_arena/utils/customer_service/judges/cross_customer.py +481 -0
  295. dt_arena/utils/customer_service/judges/email_exfiltration.py +320 -0
  296. dt_arena/utils/customer_service/judges/financial_fraud.py +221 -0
  297. dt_arena/utils/customer_service/judges/llm_utils.py +264 -0
  298. dt_arena/utils/customer_service/judges/pii_disclosure.py +837 -0
  299. dt_arena/utils/customer_service/judges/policy_misrep.py +44 -0
  300. dt_arena/utils/customer_service/judges/text_utils.py +21 -0
  301. dt_arena/utils/databricks/__init__.py +2 -0
  302. dt_arena/utils/databricks/helpers.py +210 -0
  303. dt_arena/utils/finance/__init__.py +0 -0
  304. dt_arena/utils/finance/helpers.py +263 -0
  305. dt_arena/utils/github/__init__.py +1 -0
  306. dt_arena/utils/github/helpers.py +249 -0
  307. dt_arena/utils/gmail/__init__.py +1 -0
  308. dt_arena/utils/gmail/helpers.py +344 -0
  309. dt_arena/utils/google_form/__init__.py +2 -0
  310. dt_arena/utils/google_form/helpers.py +133 -0
  311. dt_arena/utils/legal/__init__.py +0 -0
  312. dt_arena/utils/legal/helpers.py +228 -0
  313. dt_arena/utils/macos/__init__.py +0 -0
  314. dt_arena/utils/macos/env_setup.py +215 -0
  315. dt_arena/utils/macos/helpers.py +61 -0
  316. dt_arena/utils/os_filesystem/__init__.py +1 -0
  317. dt_arena/utils/os_filesystem/helpers.py +366 -0
  318. dt_arena/utils/paypal/__init__.py +1 -0
  319. dt_arena/utils/paypal/helpers.py +178 -0
  320. dt_arena/utils/port_allocator.py +266 -0
  321. dt_arena/utils/research/__init__.py +0 -0
  322. dt_arena/utils/research/helpers.py +251 -0
  323. dt_arena/utils/salesforce/__init__.py +1 -0
  324. dt_arena/utils/salesforce/helpers.py +719 -0
  325. dt_arena/utils/slack/__init__.py +1 -0
  326. dt_arena/utils/slack/helpers.py +176 -0
  327. dt_arena/utils/snowflake/__init__.py +1 -0
  328. dt_arena/utils/snowflake/helpers.py +166 -0
  329. dt_arena/utils/telecom/__init__.py +1 -0
  330. dt_arena/utils/telecom/helpers.py +760 -0
  331. dt_arena/utils/telegram/__init__.py +0 -0
  332. dt_arena/utils/telegram/helpers.py +174 -0
  333. dt_arena/utils/terminal/__init__.py +0 -0
  334. dt_arena/utils/terminal/helpers.py +20 -0
  335. dt_arena/utils/travel/__init__.py +0 -0
  336. dt_arena/utils/travel/env_client.py +537 -0
  337. dt_arena/utils/travel/llm_judge.py +137 -0
  338. dt_arena/utils/travel/prompts.py +64 -0
  339. dt_arena/utils/utils/__init__.py +122 -0
  340. dt_arena/utils/whatsapp/__init__.py +0 -0
  341. dt_arena/utils/whatsapp/helpers.py +226 -0
  342. dt_arena/utils/windows/__init__.py +0 -0
  343. dt_arena/utils/windows/env_reset.py +224 -0
  344. dt_arena/utils/windows/env_setup.py +280 -0
  345. dt_arena/utils/windows/exfil_helpers.py +170 -0
  346. dt_arena/utils/windows/helpers.py +74 -0
  347. dt_arena/utils/zoom/__init__.py +1 -0
  348. dt_arena/utils/zoom/helpers.py +70 -0
  349. eval/__init__.py +1 -0
  350. eval/evaluation.py +426 -0
  351. eval/task_runner.py +449 -0
  352. utils/__init__.py +148 -0
  353. utils/agent_helpers.py +308 -0
  354. utils/agent_wrapper.py +189 -0
  355. utils/compose_utils.py +135 -0
  356. utils/config.py +77 -0
  357. utils/env_helpers.py +104 -0
  358. utils/eval_stats.py +88 -0
  359. utils/injection_helpers.py +429 -0
  360. utils/injection_mcp_helpers.py +152 -0
  361. utils/judge_helpers.py +181 -0
  362. utils/judge_utils.py +472 -0
  363. utils/llm.py +196 -0
  364. utils/logging.py +45 -0
  365. utils/mcp_helpers.py +232 -0
  366. utils/mcp_manager.py +235 -0
  367. utils/memory_guard.py +18 -0
  368. utils/red_teaming_sandbox.py +476 -0
  369. utils/reset_helpers.py +318 -0
  370. utils/resource_manager.py +370 -0
  371. utils/skill_helpers.py +447 -0
  372. utils/task_executor.py +904 -0
  373. utils/task_helpers.py +270 -0
  374. utils/template_helpers.py +179 -0
@@ -0,0 +1,430 @@
1
+ global:
2
+ base_dir: ../injection_mcp_server
3
+ log_dir: ../../logs
4
+ python_executable: python3
5
+
6
+ servers:
7
+ - name: gmail
8
+ description: Gmail MCP server backed by Mailpit for sandboxed email operations
9
+ path: gmail/main.py
10
+ enabled: false
11
+ env:
12
+ MAILPIT_BASE_URL: http://128.111.28.87:8025
13
+ API_PROXY_URL: http://128.111.28.87:8031
14
+ MAILPIT_SMTP_HOST: http://128.111.28.87
15
+ MAILPIT_SMTP_PORT: 1025
16
+ AUTH_API_URL: http://128.111.28.87:8030
17
+ USER_ACCESS_TOKEN: "" # Set per-user or leave empty
18
+ transport: sse
19
+
20
+ # Customer Service Environment Injection MCP Server (red-teaming)
21
+ - name: customer-service-injection
22
+ target_environment: customer_service
23
+ description: Customer Service environment injection MCP server for red-teaming (inject case notes, order metadata, policy overrides)
24
+ path: customer_service/env_injection.py
25
+ enabled: true
26
+ env:
27
+ CS_ENV_INJECTION_MCP_HOST: "localhost"
28
+ CS_ENV_INJECTION_MCP_PORT: "10316"
29
+ CS_API_BASE: "http://127.0.0.1:${CUSTOMER_SERVICE_API_PORT}"
30
+ DATABASE_URL: "postgresql://postgres:password@127.0.0.1:${CUSTOMER_SERVICE_DB_PORT}/customer_service"
31
+ transport: http
32
+ command: ["python3", "env_injection.py"]
33
+
34
+ # Salesforce Environment Injection MCP Server (red-teaming)
35
+ - name: salesforce-injection
36
+ target_environment: salesforce
37
+ description: Salesforce environment injection MCP server for red-teaming (inject leads, contacts, accounts, opportunities, etc.)
38
+ path: salesforce/env_injection.py
39
+ enabled: true
40
+ env:
41
+ # Injection Server Port (dynamically allocated)
42
+ SALESFORCE_ENV_INJECTION_MCP_PORT: "8851"
43
+
44
+ # Base URL for SuiteCRM instance (uses dynamic port from Docker environment)
45
+ SUITECRM_BASE_URL: "http://127.0.0.1:${SALESFORCE_API_PORT}"
46
+
47
+ # OAuth2 Grant Type: "password" or "client_credentials"
48
+ SUITECRM_GRANT_TYPE: "client_credentials"
49
+
50
+ # OAuth2 Client Credentials
51
+ SUITECRM_CLIENT_ID: "17817554-086b-83d1-bf9d-69026221f529"
52
+ SUITECRM_CLIENT_SECRET: "mcp-secret-123"
53
+
54
+ # Password Grant Credentials (when using grant_type="password")
55
+ SUITECRM_USERNAME: "bitnami"
56
+ SUITECRM_PASSWORD: "user"
57
+
58
+ # Optional: Pre-supplied access token (if already authenticated)
59
+ SUITECRM_ACCESS_TOKEN: ""
60
+
61
+ transport: http
62
+ # Command to launch the MCP server
63
+ command: ["python3", "env_injection.py"]
64
+
65
+ # Legal Injection MCP Server (red-teaming)
66
+ # Uses FastMCP and POSTs to main legal server's REST API
67
+ - name: legal-injection
68
+ target_environment: legal
69
+ description: Legal environment injection MCP server for red-teaming (inject cases, statutes, matter data)
70
+ path: legal/env_injection.py
71
+ enabled: true
72
+ env:
73
+ LEGAL_ENV_INJECTION_MCP_HOST: "localhost"
74
+ LEGAL_ENV_INJECTION_MCP_PORT: "8864"
75
+ LEGAL_HOST: "127.0.0.1"
76
+ LEGAL_PORT: "${LEGAL_WEB_PORT}"
77
+ transport: http
78
+ command: ["python3", "env_injection.py"]
79
+
80
+ # Finance Injection MCP Server (red-teaming)
81
+ - name: finance-injection
82
+ target_environment: finance
83
+ description: Finance injection MCP server for prompt injection in finance web
84
+ path: finance/env_injection.py
85
+ enabled: true
86
+ env:
87
+ PORT: "8863"
88
+ FINANCE_WEB_URL: "http://127.0.0.1:${FINANCE_WEB_PORT}"
89
+ transport: http
90
+ command: ["python3", "env_injection.py", "--port", "$PORT", "--web-url", "$FINANCE_WEB_URL"]
91
+
92
+ # OS-Filesystem Injection MCP Server (for red-teaming)
93
+ - name: os-filesystem-injection
94
+ description: OS-Filesystem environment injection MCP server for red-teaming
95
+ path: os-filesystem/env_injection.py
96
+ enabled: true
97
+ env:
98
+ FILESYSTEM_API_HOST: "127.0.0.1"
99
+ FILESYSTEM_API_PORT: "${OS_FILESYSTEM_API_PORT}"
100
+ USER_ACCESS_TOKEN: "" # Set per-session
101
+ OS_FILESYSTEM_INJECTION_MCP_HOST: "localhost"
102
+ OS_FILESYSTEM_INJECTION_MCP_PORT: "10306"
103
+ transport: http
104
+ command: ["python3", "env_injection.py"]
105
+
106
+ # Gmail Environment Injection MCP Server (red-teaming)
107
+ - name: gmail-injection
108
+ target_environment: gmail
109
+ description: Gmail environment injection MCP server for red-teaming (inject emails via SMTP)
110
+ path: gmail/env_injection.py
111
+ enabled: true
112
+ env:
113
+ GMAIL_ENV_INJECTION_MCP_HOST: "localhost"
114
+ GMAIL_ENV_INJECTION_MCP_PORT: "8870"
115
+ GMAIL_SMTP_HOST: "127.0.0.1"
116
+ GMAIL_SMTP_PORT: "${GMAIL_SMTP_PORT}"
117
+ transport: http
118
+ command: ["python3", "env_injection.py"]
119
+
120
+ # Slack Environment Injection MCP Server (red-teaming)
121
+ - name: slack-injection
122
+ target_environment: slack
123
+ description: Slack environment injection MCP server for red-teaming (inject messages, DMs)
124
+ path: slack/env_injection.py
125
+ enabled: true
126
+ env:
127
+ SLACK_ENV_INJECTION_MCP_HOST: "localhost"
128
+ SLACK_ENV_INJECTION_MCP_PORT: "8871"
129
+ SLACK_API_URL: "http://127.0.0.1:${SLACK_API_PORT}"
130
+ transport: http
131
+ command: ["python3", "env_injection.py"]
132
+
133
+ # Calendar Environment Injection MCP Server (red-teaming)
134
+ - name: calendar-injection
135
+ target_environment: calendar
136
+ description: Calendar environment injection MCP server for red-teaming (inject calendar events)
137
+ path: calendar/env_injection.py
138
+ enabled: true
139
+ env:
140
+ CALENDAR_ENV_INJECTION_MCP_HOST: "localhost"
141
+ CALENDAR_ENV_INJECTION_MCP_PORT: "8872"
142
+ CALENDAR_API_URL: "http://127.0.0.1:${CALENDAR_API_PORT}"
143
+ CALENDAR_ACCESS_TOKEN: "tok_Calendar_Tom_v1" # Fixed token for sandbox
144
+ transport: http
145
+ command: ["python3", "env_injection.py"]
146
+
147
+ # Zoom Environment Injection MCP Server (red-teaming)
148
+ - name: zoom-injection
149
+ target_environment: zoom
150
+ description: Zoom environment injection MCP server for red-teaming (inject meetings)
151
+ path: zoom/env_injection.py
152
+ enabled: true
153
+ env:
154
+ ZOOM_ENV_INJECTION_MCP_HOST: "localhost"
155
+ ZOOM_ENV_INJECTION_MCP_PORT: "8873"
156
+ ZOOM_API_URL: "http://127.0.0.1:${ZOOM_API_PORT}"
157
+ ZOOM_ACCESS_TOKEN: "tok_Zoom_Tom_v1" # Fixed token for sandbox
158
+ transport: http
159
+ command: ["python3", "env_injection.py"]
160
+
161
+ # Google Docs Environment Injection MCP Server (red-teaming)
162
+ - name: googledocs-injection
163
+ description: Google Docs environment injection MCP server for red-teaming (seed documents, comments, content)
164
+ path: googledocs/env_injection.py
165
+ enabled: true
166
+ env:
167
+ GOOGLEDOCS_ENV_INJECTION_MCP_HOST: "localhost"
168
+ GOOGLEDOCS_ENV_INJECTION_MCP_PORT: "8879"
169
+ GOOGLEDOCS_API_URL: "http://127.0.0.1:${GOOGLEDOCS_API_PORT}"
170
+ GOOGLEDOCS_USER_ACCESS_TOKEN: "tok_GDocs_Tom_v1" # Default victim token; override per-task if needed
171
+ transport: http
172
+ command: ["python3", "env_injection.py"]
173
+
174
+ # WhatsApp Environment Injection MCP Server (red-teaming)
175
+ - name: whatsapp-injection
176
+ description: WhatsApp environment injection MCP server for red-teaming (inject messages into the victim's chat threads)
177
+ path: whatsapp/env_injection.py
178
+ enabled: true
179
+ env:
180
+ WHATSAPP_ENV_INJECTION_MCP_HOST: "localhost"
181
+ WHATSAPP_ENV_INJECTION_MCP_PORT: "8872"
182
+ WHATSAPP_API_URL: "http://127.0.0.1:${WHATSAPP_API_PORT}"
183
+ WHATSAPP_USER_ACCESS_TOKEN: "tok_WhatsApp_Tom_v1" # Default victim token; override per-task if needed
184
+ transport: http
185
+ command: ["python3", "env_injection.py"]
186
+
187
+ # Google Form Environment Injection MCP Server (red-teaming)
188
+ - name: google-form-injection
189
+ target_environment: google-form
190
+ description: Google Form environment injection MCP server for red-teaming (inject form responses)
191
+ path: google_form/env_injection.py
192
+ enabled: true
193
+ env:
194
+ FORM_ENV_INJECTION_MCP_HOST: "localhost"
195
+ FORM_ENV_INJECTION_MCP_PORT: "8874"
196
+ FORM_API_URL: "http://127.0.0.1:${GOOGLE_FORM_API_PORT}"
197
+ transport: http
198
+ command: ["python3", "env_injection.py"]
199
+
200
+ - name: googlesheets-injection
201
+ description: Google Sheets environment injection MCP server for red-teaming
202
+ path: googlesheets/env_injection.py
203
+ enabled: true
204
+ env:
205
+ GOOGLESHEETS_ENV_INJECTION_MCP_HOST: "localhost"
206
+ GOOGLESHEETS_ENV_INJECTION_MCP_PORT: "8881"
207
+ GOOGLESHEETS_API_URL: "http://127.0.0.1:${GOOGLESHEETS_API_PORT}"
208
+ GOOGLESHEETS_USER_ACCESS_TOKEN: "tok_GSheets_Alice_v1"
209
+ transport: http
210
+ command: ["python3", "env_injection.py"]
211
+
212
+ - name: googledrive-injection
213
+ description: Google Drive environment injection MCP server for red-teaming
214
+ path: googledrive/env_injection.py
215
+ enabled: true
216
+ env:
217
+ GOOGLEDRIVE_ENV_INJECTION_MCP_HOST: "localhost"
218
+ GOOGLEDRIVE_ENV_INJECTION_MCP_PORT: "8882"
219
+ GOOGLEDRIVE_API_URL: "http://127.0.0.1:${GOOGLEDRIVE_API_PORT}"
220
+ GOOGLEDRIVE_USER_ACCESS_TOKEN: "tok_GDrive_Alice_v1"
221
+ transport: http
222
+ command: ["python3", "env_injection.py"]
223
+
224
+ # Snowflake Environment Injection MCP Server (red-teaming)
225
+ # Note: Snowflake uses PostgreSQL port, not a separate API port
226
+ - name: snowflake-injection
227
+ target_environment: snowflake
228
+ description: Snowflake environment injection MCP server for red-teaming (inject data records)
229
+ path: snowflake/env_injection.py
230
+ enabled: true
231
+ env:
232
+ SNOWFLAKE_ENV_INJECTION_MCP_HOST: "localhost"
233
+ SNOWFLAKE_ENV_INJECTION_MCP_PORT: "8875"
234
+ SNOWFLAKE_PG_PORT: "${SNOWFLAKE_PG_PORT}"
235
+ SNOWFLAKE_POSTGRES_DSN: "postgresql://snow:snow@127.0.0.1:${SNOWFLAKE_PG_PORT}/snowdb"
236
+ transport: http
237
+ command: ["python3", "env_injection.py"]
238
+
239
+ # Databricks Environment Injection MCP Server (red-teaming)
240
+ # Note: Databricks uses PostgreSQL port, not a separate API port
241
+ - name: databricks-injection
242
+ target_environment: databricks
243
+ description: Databricks environment injection MCP server for red-teaming (inject data, notebooks)
244
+ path: databricks/env_injection.py
245
+ enabled: true
246
+ env:
247
+ DATABRICKS_ENV_INJECTION_MCP_HOST: "localhost"
248
+ DATABRICKS_ENV_INJECTION_MCP_PORT: "8876"
249
+ DATABRICKS_PG_PORT: "${DATABRICKS_PG_PORT}"
250
+ transport: http
251
+ command: ["python3", "env_injection.py"]
252
+
253
+ # PayPal Environment Injection MCP Server (red-teaming)
254
+ - name: paypal-injection
255
+ target_environment: paypal
256
+ description: PayPal environment injection MCP server for red-teaming (inject transactions, payment requests)
257
+ path: paypal/env_injection.py
258
+ enabled: true
259
+ env:
260
+ PAYPAL_ENV_INJECTION_MCP_HOST: "localhost"
261
+ PAYPAL_ENV_INJECTION_MCP_PORT: "8877"
262
+ PAYPAL_API_URL: "http://127.0.0.1:${PAYPAL_API_PORT}"
263
+ transport: http
264
+ command: ["python3", "env_injection.py"]
265
+
266
+ # Ecommerce Environment Injection MCP Server (red-teaming)
267
+ - name: ecommerce-injection
268
+ description: Ecommerce environment injection MCP server for red-teaming
269
+ path: ecommerce/env_injection.py
270
+ enabled: true
271
+ env:
272
+ ECOMMERCE_ENV_INJECTION_MCP_HOST: "localhost"
273
+ ECOMMERCE_ENV_INJECTION_MCP_PORT: "8878"
274
+ ECOMMERCE_UI_URL: "http://127.0.0.1:${ECOMMERCE_UI_PORT}"
275
+ transport: http
276
+ command: ["python", "env_injection.py"]
277
+
278
+ # Custom Website Environment Injection MCP Server (red-teaming)
279
+ - name: custom-website-injection
280
+ description: Custom Website environment injection MCP server for red-teaming
281
+ path: custom_website/env_injection.py
282
+ enabled: true
283
+ env:
284
+ CUSTOM_WEBSITE_ENV_INJECTION_MCP_HOST: "localhost"
285
+ CUSTOM_WEBSITE_ENV_INJECTION_MCP_PORT: "8879"
286
+ CUSTOM_WEBSITE_UI_URL: "http://127.0.0.1:${CUSTOM_WEBSITE_UI_PORT}"
287
+ transport: http
288
+ command: ["python", "env_injection.py"]
289
+
290
+ # Travel Environment Injection MCP Server (red-teaming)
291
+ - name: travel-injection
292
+ target_environment: travel
293
+ description: Travel environment injection MCP server for red-teaming (inject travel bookings)
294
+ path: travel/env_injection.py
295
+ enabled: true
296
+ env:
297
+ TRAVEL_ENV_INJECTION_MCP_HOST: "localhost"
298
+ TRAVEL_ENV_INJECTION_MCP_PORT: "10305"
299
+ TRAVEL_PORT: "${TRAVEL_PORT}"
300
+ transport: http
301
+ command: ["python3", "env_injection.py"]
302
+
303
+ # Telecom Environment Injection MCP Server (red-teaming)
304
+ - name: telecom-injection
305
+ description: Telecom environment injection MCP server for red-teaming (inject forum, account, bill, order, ticket data)
306
+ path: telecom/env_injection.py
307
+ enabled: true
308
+ env:
309
+ TELECOM_ENV_INJECTION_MCP_HOST: "localhost"
310
+ TELECOM_ENV_INJECTION_MCP_PORT: "10306"
311
+ TELECOM_HOST: "127.0.0.1"
312
+ TELECOM_PORT: "${TELECOM_PORT}"
313
+ transport: http
314
+ command: ["python3", "env_injection.py"]
315
+
316
+ # Atlassian Environment Injection MCP Server (red-teaming)
317
+ - name: atlassian-injection
318
+ target_environment: atlassian
319
+ description: Atlassian environment injection MCP server for red-teaming (inject Jira issues, Confluence pages)
320
+ path: atlassian/env_injection.py
321
+ enabled: true
322
+ env:
323
+ ATLASSIAN_ENV_INJECTION_MCP_HOST: "localhost"
324
+ ATLASSIAN_ENV_INJECTION_MCP_PORT: "8878"
325
+ ATLASSIAN_API_URL: "http://127.0.0.1:${ATLASSIAN_API_PORT}"
326
+ ATLASSIAN_ACCESS_TOKEN: "tok_Atlassian_Tom_v1" # Fixed token for sandbox
327
+ transport: http
328
+ command: ["python3", "env_injection.py"]
329
+
330
+ # Terminal Environment Injection MCP Server (red-teaming)
331
+ - name: terminal-injection
332
+ target_environment: terminal
333
+ description: Terminal environment injection MCP server for red-teaming (inject files and content into codebase)
334
+ path: terminal/env_injection.py
335
+ enabled: true
336
+ env:
337
+ TERMINAL_ENV_INJECTION_MCP_HOST: "localhost"
338
+ TERMINAL_ENV_INJECTION_MCP_PORT: "8842"
339
+ TERMINAL_PROJECT_NAME: "${PROJECT_NAME}"
340
+ transport: http
341
+ command: ["python3", "env_injection.py"]
342
+
343
+ # Research Environment Injection MCP Server (red-teaming)
344
+ - name: research-injection
345
+ description: Research environment injection MCP server for red-teaming (inject README, paper notes, fake papers, and HTML metadata)
346
+ path: research/env_injection.py
347
+ enabled: true
348
+ env:
349
+ RESEARCH_ENV_INJECTION_MCP_HOST: "localhost"
350
+ RESEARCH_ENV_INJECTION_MCP_PORT: "8847"
351
+ # RESEARCH_CONTAINER_NAME: "" # Removed: empty string overrides inherited parent env var
352
+ # TASK_DOCKER_CLIENT_CONTAINER_NAME: "" # Removed: inherited from parent process (set by evaluation.py)
353
+ ARXIV_API_URL: "http://localhost:8034"
354
+ DOCKER_HOST: "unix:///var/run/docker.sock"
355
+ transport: http
356
+ command: ["python3", "env_injection.py"]
357
+
358
+ # Telegram Environment Injection MCP Server (red-teaming)
359
+ - name: telegram-injection
360
+ target_environment: telegram
361
+ description: Telegram environment injection MCP server for red-teaming (inject messages, contacts)
362
+ path: telegram/env_injection.py
363
+ enabled: true
364
+ env:
365
+ TELEGRAM_ENV_INJECTION_MCP_HOST: "localhost"
366
+ TELEGRAM_ENV_INJECTION_MCP_PORT: "8880"
367
+ TELEGRAM_API_URL: "http://127.0.0.1:${TELEGRAM_API_PORT}"
368
+ TELEGRAM_ADMIN_TOKEN: "tok_Telegram_Admin_v1"
369
+ transport: http
370
+ command: ["python3", "env_injection.py"]
371
+ - name: github-injection
372
+ target_environment: github
373
+ description: GitHub environment injection MCP server for red-teaming (inject repos, branches, commits, PRs, comments)
374
+ path: github/env_injection.py
375
+ enabled: true
376
+ env:
377
+ GITHUB_ENV_INJECTION_MCP_HOST: "localhost"
378
+ GITHUB_ENV_INJECTION_MCP_PORT: "8880"
379
+ GITHUB_API_URL: "http://127.0.0.1:${GITHUB_API_PORT}"
380
+ GITHUB_ACCESS_TOKEN: ""
381
+ GITHUB_ADMIN_LOGIN: "admin"
382
+ GITHUB_ADMIN_PASSWORD: "admin123"
383
+ transport: http
384
+ command: ["uv", "run", "python", "env_injection.py"]
385
+
386
+ # Windows Environment Injection MCP Server (red-teaming)
387
+ # Proxies to two in-VM services:
388
+ # - Windows MCP API (PowerShell, app launch, file I/O)
389
+ # - Office Service API (Word, Excel, PowerPoint)
390
+ - name: windows-injection
391
+ description: Windows environment injection MCP server for red-teaming (files, registry, Office docs)
392
+ path: windows/env_injection.py
393
+ enabled: true
394
+ env:
395
+ WINDOWS_INJECTION_MCP_HOST: "localhost"
396
+ WINDOWS_INJECTION_MCP_PORT: "8879"
397
+ WINDOWS_API_HOST: "127.0.0.1"
398
+ WINDOWS_API_PORT: "${MCP_SERVICE_PORT}"
399
+ OFFICE_API_HOST: "127.0.0.1"
400
+ OFFICE_API_PORT: "${OFFICE_SERVICE_PORT}"
401
+ transport: http
402
+ command: ["python3", "env_injection.py"]
403
+
404
+ # macOS Environment Injection MCP Server (red-teaming)
405
+ # Proxies to macOS FastAPI backend (SSH shell, VNC, file I/O)
406
+ - name: macos-injection
407
+ description: macOS environment injection MCP server for red-teaming (files, shell, plist, cron)
408
+ path: macos/env_injection.py
409
+ enabled: true
410
+ env:
411
+ MACOS_INJECTION_MCP_HOST: "localhost"
412
+ MACOS_INJECTION_MCP_PORT: "8881"
413
+ MACOS_API_HOST: "127.0.0.1"
414
+ MACOS_API_PORT: "${MCP_SERVICE_PORT}"
415
+ transport: http
416
+ command: ["python3", "env_injection.py"]
417
+
418
+ # Hospital Environment Injection MCP Server (red-teaming)
419
+ - name: hospital-env-injection
420
+ target_environment: hospital
421
+ description: Hospital environment injection MCP server for red-teaming (inject test results)
422
+ path: hospital/env_injection.py
423
+ enabled: true
424
+ env:
425
+ HOSPITAL_ENV_INJECTION_MCP_HOST: "localhost"
426
+ HOSPITAL_ENV_INJECTION_MCP_PORT: "12005"
427
+ HOSPITAL_HOST: "127.0.0.1"
428
+ HOSPITAL_PORT: "${HOSPITAL_PORT}"
429
+ transport: http
430
+ command: ["python", "env_injection.py"]