decodingtrust-agent-sdk 0.1.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (374) hide show
  1. agent/__init__.py +30 -0
  2. agent/claudesdk/__init__.py +8 -0
  3. agent/claudesdk/example.py +221 -0
  4. agent/claudesdk/src/__init__.py +8 -0
  5. agent/claudesdk/src/agent.py +400 -0
  6. agent/claudesdk/src/mcp_proxy.py +409 -0
  7. agent/claudesdk/src/utils.py +420 -0
  8. agent/googleadk/__init__.py +15 -0
  9. agent/googleadk/example.py +237 -0
  10. agent/googleadk/src/__init__.py +12 -0
  11. agent/googleadk/src/agent.py +401 -0
  12. agent/googleadk/src/mcp_wrapper.py +163 -0
  13. agent/googleadk/src/utils.py +602 -0
  14. agent/langchain/__init__.py +8 -0
  15. agent/langchain/example.py +213 -0
  16. agent/langchain/src/__init__.py +8 -0
  17. agent/langchain/src/agent.py +645 -0
  18. agent/langchain/src/utils.py +433 -0
  19. agent/openaisdk/__init__.py +17 -0
  20. agent/openaisdk/example.py +228 -0
  21. agent/openaisdk/src/__init__.py +12 -0
  22. agent/openaisdk/src/agent.py +491 -0
  23. agent/openaisdk/src/agent_wrapper.py +143 -0
  24. agent/openaisdk/src/mcp_wrapper.py +395 -0
  25. agent/openaisdk/src/utils.py +493 -0
  26. agent/openclaw/__init__.py +10 -0
  27. agent/openclaw/example.py +251 -0
  28. agent/openclaw/src/__init__.py +14 -0
  29. agent/openclaw/src/agent.py +930 -0
  30. agent/openclaw/src/helpers/__init__.py +1 -0
  31. agent/openclaw/src/helpers/auth_helpers.py +55 -0
  32. agent/openclaw/src/mcp_proxy.py +564 -0
  33. agent/openclaw/src/plugin_generator.py +231 -0
  34. agent/openclaw/src/utils.py +341 -0
  35. agent/pocketflow/__init__.py +18 -0
  36. agent/pocketflow/example.py +221 -0
  37. agent/pocketflow/prompts/react_agent.py +46 -0
  38. agent/pocketflow/src/__init__.py +6 -0
  39. agent/pocketflow/src/agent.py +507 -0
  40. agent/pocketflow/src/agent_wrapper.py +159 -0
  41. agent/pocketflow/src/async_helper.py +92 -0
  42. agent/pocketflow/src/mcp_react_agent.py +279 -0
  43. agent/pocketflow/src/native_agent.py +74 -0
  44. agent/pocketflow/src/nodes.py +467 -0
  45. benchmark/__init__.py +0 -0
  46. benchmark/browser/benign.jsonl +34 -0
  47. benchmark/browser/direct.jsonl +85 -0
  48. benchmark/browser/indirect.jsonl +82 -0
  49. benchmark/code/benign.jsonl +0 -0
  50. benchmark/code/direct.jsonl +121 -0
  51. benchmark/code/indirect.jsonl +165 -0
  52. benchmark/crm/benign.jsonl +165 -0
  53. benchmark/crm/direct.jsonl +90 -0
  54. benchmark/crm/indirect.jsonl +150 -0
  55. benchmark/customer-service/benign.jsonl +160 -0
  56. benchmark/customer-service/direct.jsonl +100 -0
  57. benchmark/customer-service/indirect.jsonl +101 -0
  58. benchmark/finance/benign.jsonl +0 -0
  59. benchmark/finance/direct.jsonl +200 -0
  60. benchmark/finance/indirect.jsonl +200 -0
  61. benchmark/legal/benign.jsonl +0 -0
  62. benchmark/legal/direct.jsonl +200 -0
  63. benchmark/legal/indirect.jsonl +200 -0
  64. benchmark/macos/benign.jsonl +30 -0
  65. benchmark/macos/direct.jsonl +50 -0
  66. benchmark/macos/indirect.jsonl +50 -0
  67. benchmark/medical/benign.jsonl +642 -0
  68. benchmark/medical/direct.jsonl +229 -0
  69. benchmark/medical/indirect.jsonl +222 -0
  70. benchmark/os-filesystem/benign.jsonl +200 -0
  71. benchmark/os-filesystem/direct.jsonl +200 -0
  72. benchmark/os-filesystem/indirect.jsonl +200 -0
  73. benchmark/research/benign.jsonl +0 -0
  74. benchmark/research/direct.jsonl +119 -0
  75. benchmark/research/indirect.jsonl +125 -0
  76. benchmark/telecom/benign.jsonl +120 -0
  77. benchmark/telecom/direct.jsonl +161 -0
  78. benchmark/telecom/indirect.jsonl +166 -0
  79. benchmark/travel/benign.jsonl +130 -0
  80. benchmark/travel/direct.jsonl +105 -0
  81. benchmark/travel/indirect.jsonl +120 -0
  82. benchmark/windows/benign.jsonl +100 -0
  83. benchmark/windows/direct.jsonl +140 -0
  84. benchmark/windows/indirect.jsonl +107 -0
  85. benchmark/workflow/benign.jsonl +335 -0
  86. benchmark/workflow/direct.jsonl +78 -0
  87. benchmark/workflow/indirect.jsonl +107 -0
  88. cli/__init__.py +5 -0
  89. cli/main.py +182 -0
  90. cli/scaffold.py +334 -0
  91. decodingtrust_agent_sdk-0.1.0.dist-info/METADATA +642 -0
  92. decodingtrust_agent_sdk-0.1.0.dist-info/RECORD +374 -0
  93. decodingtrust_agent_sdk-0.1.0.dist-info/WHEEL +5 -0
  94. decodingtrust_agent_sdk-0.1.0.dist-info/entry_points.txt +2 -0
  95. decodingtrust_agent_sdk-0.1.0.dist-info/licenses/LICENSE +201 -0
  96. decodingtrust_agent_sdk-0.1.0.dist-info/top_level.txt +6 -0
  97. dt_arena/config/env.yaml +515 -0
  98. dt_arena/config/injection_mcp.yaml +430 -0
  99. dt_arena/config/mcp.yaml +642 -0
  100. dt_arena/envs/arxiv/docker-compose-hub.yml +31 -0
  101. dt_arena/envs/arxiv/docker-compose.yml +36 -0
  102. dt_arena/envs/atlassian/docker/docker-compose.dev.yml +65 -0
  103. dt_arena/envs/atlassian/docker/docker-compose.yml +53 -0
  104. dt_arena/envs/atlassian/docker-compose-hub.yml +57 -0
  105. dt_arena/envs/atlassian/docker-compose.yml +72 -0
  106. dt_arena/envs/bigquery/docker-compose.yml +20 -0
  107. dt_arena/envs/booking/docker-compose.yml +59 -0
  108. dt_arena/envs/calendar/docker-compose-hub.yml +30 -0
  109. dt_arena/envs/calendar/docker-compose.yml +42 -0
  110. dt_arena/envs/custom-website/docker-compose.yml +6 -0
  111. dt_arena/envs/customer_service/docker-compose.yml +59 -0
  112. dt_arena/envs/databricks/docker-compose-hub.yml +47 -0
  113. dt_arena/envs/databricks/docker-compose.yml +51 -0
  114. dt_arena/envs/ecommerce/docker-compose.yml +6 -0
  115. dt_arena/envs/ers/docker-compose.yml +36 -0
  116. dt_arena/envs/ers/hrms/docker/docker-compose.yml +31 -0
  117. dt_arena/envs/finance/docker-compose.yml +23 -0
  118. dt_arena/envs/github/docker/docker-compose-hub.yml +50 -0
  119. dt_arena/envs/github/docker/docker-compose.yml +50 -0
  120. dt_arena/envs/gmail/docker-compose-hub.yml +51 -0
  121. dt_arena/envs/gmail/docker-compose.yml +65 -0
  122. dt_arena/envs/google-form/docker-compose-hub.yml +33 -0
  123. dt_arena/envs/google-form/docker-compose.yml +41 -0
  124. dt_arena/envs/googledocs/docker-compose-hub.yml +61 -0
  125. dt_arena/envs/googledocs/docker-compose.yml +78 -0
  126. dt_arena/envs/hospital/docker-compose-hub.yml +25 -0
  127. dt_arena/envs/hospital/docker-compose.yml +27 -0
  128. dt_arena/envs/legal/docker-compose.yml +22 -0
  129. dt_arena/envs/linkedin/docker-compose.yml +63 -0
  130. dt_arena/envs/macos/docker-compose.yml +79 -0
  131. dt_arena/envs/os-filesystem/docker-compose-hub.yml +16 -0
  132. dt_arena/envs/os-filesystem/docker-compose.yml +20 -0
  133. dt_arena/envs/paypal/docker-compose-hub.yml +48 -0
  134. dt_arena/envs/paypal/docker-compose.yml +63 -0
  135. dt_arena/envs/research/docker-compose-hub.yml +13 -0
  136. dt_arena/envs/research/docker-compose.yml +24 -0
  137. dt_arena/envs/salesforce_crm/docker-compose-hub.yaml +45 -0
  138. dt_arena/envs/salesforce_crm/docker-compose.yaml +49 -0
  139. dt_arena/envs/slack/docker-compose-hub.yml +28 -0
  140. dt_arena/envs/slack/docker-compose.yml +41 -0
  141. dt_arena/envs/snowflake/docker-compose-hub.yml +41 -0
  142. dt_arena/envs/snowflake/docker-compose.yml +44 -0
  143. dt_arena/envs/telecom/docker-compose-hub.yml +16 -0
  144. dt_arena/envs/telecom/docker-compose.yml +17 -0
  145. dt_arena/envs/telegram/docker-compose-hub.yml +57 -0
  146. dt_arena/envs/telegram/docker-compose.yml +62 -0
  147. dt_arena/envs/terminal/docker-compose-hub.yml +12 -0
  148. dt_arena/envs/terminal/docker-compose.yml +26 -0
  149. dt_arena/envs/travel/docker-compose-hub.yml +19 -0
  150. dt_arena/envs/travel/docker-compose.yml +19 -0
  151. dt_arena/envs/whatsapp/docker-compose-hub.yml +61 -0
  152. dt_arena/envs/whatsapp/docker-compose.yml +78 -0
  153. dt_arena/envs/windows/docker-compose.yml +71 -0
  154. dt_arena/envs/zoom/docker-compose-hub.yml +27 -0
  155. dt_arena/envs/zoom/docker-compose.yml +40 -0
  156. dt_arena/injection_mcp_server/atlassian/env_injection.py +134 -0
  157. dt_arena/injection_mcp_server/calendar/env_injection.py +217 -0
  158. dt_arena/injection_mcp_server/custom_website/env_injection.py +97 -0
  159. dt_arena/injection_mcp_server/customer_service/env_injection.py +659 -0
  160. dt_arena/injection_mcp_server/databricks/env_injection.py +255 -0
  161. dt_arena/injection_mcp_server/ecommerce/env_injection.py +110 -0
  162. dt_arena/injection_mcp_server/finance/env_injection.py +85 -0
  163. dt_arena/injection_mcp_server/github/env_injection.py +206 -0
  164. dt_arena/injection_mcp_server/gmail/env_injection.py +211 -0
  165. dt_arena/injection_mcp_server/google_form/env_injection.py +186 -0
  166. dt_arena/injection_mcp_server/googledocs/env_injection.py +44 -0
  167. dt_arena/injection_mcp_server/hospital/env_injection.py +43 -0
  168. dt_arena/injection_mcp_server/legal/env_injection.py +229 -0
  169. dt_arena/injection_mcp_server/macos/env_injection.py +272 -0
  170. dt_arena/injection_mcp_server/os-filesystem/env_injection.py +341 -0
  171. dt_arena/injection_mcp_server/paypal/env_injection.py +268 -0
  172. dt_arena/injection_mcp_server/research/env_injection.py +616 -0
  173. dt_arena/injection_mcp_server/salesforce/env_injection.py +514 -0
  174. dt_arena/injection_mcp_server/slack/env_injection.py +265 -0
  175. dt_arena/injection_mcp_server/snowflake/env_injection.py +230 -0
  176. dt_arena/injection_mcp_server/telecom/env_injection.py +503 -0
  177. dt_arena/injection_mcp_server/telegram/env_injection.py +171 -0
  178. dt_arena/injection_mcp_server/terminal/env_injection.py +523 -0
  179. dt_arena/injection_mcp_server/travel/env_injection.py +173 -0
  180. dt_arena/injection_mcp_server/whatsapp/env_injection.py +185 -0
  181. dt_arena/injection_mcp_server/windows/env_injection.py +943 -0
  182. dt_arena/injection_mcp_server/zoom/env_injection.py +216 -0
  183. dt_arena/mcp_server/atlassian/main.py +1554 -0
  184. dt_arena/mcp_server/atlassian/test_server.py +66 -0
  185. dt_arena/mcp_server/bigquery/main.py +333 -0
  186. dt_arena/mcp_server/booking/main.py +310 -0
  187. dt_arena/mcp_server/browser/main.py +1741 -0
  188. dt_arena/mcp_server/calendar/example_multi_user.py +162 -0
  189. dt_arena/mcp_server/calendar/main.py +792 -0
  190. dt_arena/mcp_server/calendar/test_mcp.py +135 -0
  191. dt_arena/mcp_server/customer_service/main.py +1063 -0
  192. dt_arena/mcp_server/databricks/main.py +566 -0
  193. dt_arena/mcp_server/databricks/probe.py +102 -0
  194. dt_arena/mcp_server/ers/main.py +845 -0
  195. dt_arena/mcp_server/finance/__init__.py +87 -0
  196. dt_arena/mcp_server/finance/core/__init__.py +12 -0
  197. dt_arena/mcp_server/finance/core/data_loader.py +558 -0
  198. dt_arena/mcp_server/finance/core/portfolio.py +565 -0
  199. dt_arena/mcp_server/finance/evaluation/__init__.py +20 -0
  200. dt_arena/mcp_server/finance/evaluation/evaluator.py +217 -0
  201. dt_arena/mcp_server/finance/evaluation/logger.py +137 -0
  202. dt_arena/mcp_server/finance/injection/__init__.py +66 -0
  203. dt_arena/mcp_server/finance/injection/config.py +176 -0
  204. dt_arena/mcp_server/finance/injection/content.py +755 -0
  205. dt_arena/mcp_server/finance/injection/html.py +409 -0
  206. dt_arena/mcp_server/finance/injection/locations.py +167 -0
  207. dt_arena/mcp_server/finance/injection/methods.py +193 -0
  208. dt_arena/mcp_server/finance/injection/presets.py +1023 -0
  209. dt_arena/mcp_server/finance/main.py +361 -0
  210. dt_arena/mcp_server/finance/run_mcp.py +21 -0
  211. dt_arena/mcp_server/finance/run_web.py +26 -0
  212. dt_arena/mcp_server/finance/server/__init__.py +41 -0
  213. dt_arena/mcp_server/finance/server/extractor.py +1453 -0
  214. dt_arena/mcp_server/finance/server/extractor_minimal.py +292 -0
  215. dt_arena/mcp_server/finance/server/extractor_simple.py +1164 -0
  216. dt_arena/mcp_server/finance/server/injection_mcp.py +865 -0
  217. dt_arena/mcp_server/finance/server/mcp.py +451 -0
  218. dt_arena/mcp_server/finance/server/tools/__init__.py +23 -0
  219. dt_arena/mcp_server/finance/server/tools/account.py +88 -0
  220. dt_arena/mcp_server/finance/server/tools/browsing.py +328 -0
  221. dt_arena/mcp_server/finance/server/tools/social.py +73 -0
  222. dt_arena/mcp_server/finance/server/tools/trading.py +242 -0
  223. dt_arena/mcp_server/finance/server/tools/utility.py +49 -0
  224. dt_arena/mcp_server/finance/server/web.py +2139 -0
  225. dt_arena/mcp_server/finance/tasks/benchmark/__init__.py +28 -0
  226. dt_arena/mcp_server/finance/tasks/benchmark/attack_pool.py +3026 -0
  227. dt_arena/mcp_server/finance/tasks/benchmark/attack_runner.py +1315 -0
  228. dt_arena/mcp_server/finance/tasks/benchmark/finra_requirements.py +1335 -0
  229. dt_arena/mcp_server/finance/tasks/benchmark/finra_tasks.py +3665 -0
  230. dt_arena/mcp_server/finance/tasks/benchmark/malicious_tasks.py +2673 -0
  231. dt_arena/mcp_server/finance/tasks/redteam_suite/run_redteam_suite.py +1713 -0
  232. dt_arena/mcp_server/finance/test_mcp_tools.py +476 -0
  233. dt_arena/mcp_server/github/main.py +441 -0
  234. dt_arena/mcp_server/gmail/main.py +1004 -0
  235. dt_arena/mcp_server/google_form/main.py +141 -0
  236. dt_arena/mcp_server/googledocs/main.py +458 -0
  237. dt_arena/mcp_server/hospital/mcp_server.py +458 -0
  238. dt_arena/mcp_server/legal/__init__.py +9 -0
  239. dt_arena/mcp_server/legal/core/__init__.py +14 -0
  240. dt_arena/mcp_server/legal/core/courtlistener_store.py +762 -0
  241. dt_arena/mcp_server/legal/core/data_loader.py +266 -0
  242. dt_arena/mcp_server/legal/core/document_store.py +197 -0
  243. dt_arena/mcp_server/legal/core/matter_manager.py +466 -0
  244. dt_arena/mcp_server/legal/main.py +89 -0
  245. dt_arena/mcp_server/legal/scripts/collect_data.py +988 -0
  246. dt_arena/mcp_server/legal/server/__init__.py +14 -0
  247. dt_arena/mcp_server/legal/server/mcp.py +2330 -0
  248. dt_arena/mcp_server/macos/client_test.py +270 -0
  249. dt_arena/mcp_server/macos/mcp_server.py +285 -0
  250. dt_arena/mcp_server/os-filesystem/main.py +1380 -0
  251. dt_arena/mcp_server/paypal/main.py +501 -0
  252. dt_arena/mcp_server/research/main.py +777 -0
  253. dt_arena/mcp_server/salesforce/main.py +2006 -0
  254. dt_arena/mcp_server/slack/main.py +318 -0
  255. dt_arena/mcp_server/snowflake/main.py +612 -0
  256. dt_arena/mcp_server/snowflake/probe.py +183 -0
  257. dt_arena/mcp_server/telecom/mcp_client.py +423 -0
  258. dt_arena/mcp_server/telecom/mcp_server.py +1059 -0
  259. dt_arena/mcp_server/telegram/main.py +338 -0
  260. dt_arena/mcp_server/terminal/main.py +163 -0
  261. dt_arena/mcp_server/travel/client_test.py +16 -0
  262. dt_arena/mcp_server/travel/mcp_server.py +404 -0
  263. dt_arena/mcp_server/whatsapp/main.py +318 -0
  264. dt_arena/mcp_server/windows/client_test.py +270 -0
  265. dt_arena/mcp_server/windows/mcp_server.py +218 -0
  266. dt_arena/mcp_server/zoom/main.py +466 -0
  267. dt_arena/src/__init__.py +0 -0
  268. dt_arena/src/hooks/__init__.py +0 -0
  269. dt_arena/src/hooks/audit_log.py +30 -0
  270. dt_arena/src/hooks/hooks.json +3 -0
  271. dt_arena/src/run_benign.py +142 -0
  272. dt_arena/src/types/__init__.py +0 -0
  273. dt_arena/src/types/agent.py +441 -0
  274. dt_arena/src/types/attacks.py +2 -0
  275. dt_arena/src/types/environment.py +2 -0
  276. dt_arena/src/types/hooks.py +174 -0
  277. dt_arena/src/types/judge.py +52 -0
  278. dt_arena/src/types/red_teaming_trajectory.py +385 -0
  279. dt_arena/src/types/task.py +260 -0
  280. dt_arena/src/types/trajectory.py +315 -0
  281. dt_arena/utils/__init__.py +1 -0
  282. dt_arena/utils/atlassian/__init__.py +27 -0
  283. dt_arena/utils/atlassian/helpers.py +520 -0
  284. dt_arena/utils/bigquery/__init__.py +1 -0
  285. dt_arena/utils/bigquery/helpers.py +246 -0
  286. dt_arena/utils/calendar/__init__.py +1 -0
  287. dt_arena/utils/calendar/helpers.py +87 -0
  288. dt_arena/utils/customer_service/__init__.py +17 -0
  289. dt_arena/utils/customer_service/cs_env_client.py +940 -0
  290. dt_arena/utils/customer_service/helpers.py +339 -0
  291. dt_arena/utils/customer_service/judges/__init__.py +20 -0
  292. dt_arena/utils/customer_service/judges/abusive_comms.py +149 -0
  293. dt_arena/utils/customer_service/judges/bulk_destructive.py +202 -0
  294. dt_arena/utils/customer_service/judges/cross_customer.py +481 -0
  295. dt_arena/utils/customer_service/judges/email_exfiltration.py +320 -0
  296. dt_arena/utils/customer_service/judges/financial_fraud.py +221 -0
  297. dt_arena/utils/customer_service/judges/llm_utils.py +264 -0
  298. dt_arena/utils/customer_service/judges/pii_disclosure.py +837 -0
  299. dt_arena/utils/customer_service/judges/policy_misrep.py +44 -0
  300. dt_arena/utils/customer_service/judges/text_utils.py +21 -0
  301. dt_arena/utils/databricks/__init__.py +2 -0
  302. dt_arena/utils/databricks/helpers.py +210 -0
  303. dt_arena/utils/finance/__init__.py +0 -0
  304. dt_arena/utils/finance/helpers.py +263 -0
  305. dt_arena/utils/github/__init__.py +1 -0
  306. dt_arena/utils/github/helpers.py +249 -0
  307. dt_arena/utils/gmail/__init__.py +1 -0
  308. dt_arena/utils/gmail/helpers.py +344 -0
  309. dt_arena/utils/google_form/__init__.py +2 -0
  310. dt_arena/utils/google_form/helpers.py +133 -0
  311. dt_arena/utils/legal/__init__.py +0 -0
  312. dt_arena/utils/legal/helpers.py +228 -0
  313. dt_arena/utils/macos/__init__.py +0 -0
  314. dt_arena/utils/macos/env_setup.py +215 -0
  315. dt_arena/utils/macos/helpers.py +61 -0
  316. dt_arena/utils/os_filesystem/__init__.py +1 -0
  317. dt_arena/utils/os_filesystem/helpers.py +366 -0
  318. dt_arena/utils/paypal/__init__.py +1 -0
  319. dt_arena/utils/paypal/helpers.py +178 -0
  320. dt_arena/utils/port_allocator.py +266 -0
  321. dt_arena/utils/research/__init__.py +0 -0
  322. dt_arena/utils/research/helpers.py +251 -0
  323. dt_arena/utils/salesforce/__init__.py +1 -0
  324. dt_arena/utils/salesforce/helpers.py +719 -0
  325. dt_arena/utils/slack/__init__.py +1 -0
  326. dt_arena/utils/slack/helpers.py +176 -0
  327. dt_arena/utils/snowflake/__init__.py +1 -0
  328. dt_arena/utils/snowflake/helpers.py +166 -0
  329. dt_arena/utils/telecom/__init__.py +1 -0
  330. dt_arena/utils/telecom/helpers.py +760 -0
  331. dt_arena/utils/telegram/__init__.py +0 -0
  332. dt_arena/utils/telegram/helpers.py +174 -0
  333. dt_arena/utils/terminal/__init__.py +0 -0
  334. dt_arena/utils/terminal/helpers.py +20 -0
  335. dt_arena/utils/travel/__init__.py +0 -0
  336. dt_arena/utils/travel/env_client.py +537 -0
  337. dt_arena/utils/travel/llm_judge.py +137 -0
  338. dt_arena/utils/travel/prompts.py +64 -0
  339. dt_arena/utils/utils/__init__.py +122 -0
  340. dt_arena/utils/whatsapp/__init__.py +0 -0
  341. dt_arena/utils/whatsapp/helpers.py +226 -0
  342. dt_arena/utils/windows/__init__.py +0 -0
  343. dt_arena/utils/windows/env_reset.py +224 -0
  344. dt_arena/utils/windows/env_setup.py +280 -0
  345. dt_arena/utils/windows/exfil_helpers.py +170 -0
  346. dt_arena/utils/windows/helpers.py +74 -0
  347. dt_arena/utils/zoom/__init__.py +1 -0
  348. dt_arena/utils/zoom/helpers.py +70 -0
  349. eval/__init__.py +1 -0
  350. eval/evaluation.py +426 -0
  351. eval/task_runner.py +449 -0
  352. utils/__init__.py +148 -0
  353. utils/agent_helpers.py +308 -0
  354. utils/agent_wrapper.py +189 -0
  355. utils/compose_utils.py +135 -0
  356. utils/config.py +77 -0
  357. utils/env_helpers.py +104 -0
  358. utils/eval_stats.py +88 -0
  359. utils/injection_helpers.py +429 -0
  360. utils/injection_mcp_helpers.py +152 -0
  361. utils/judge_helpers.py +181 -0
  362. utils/judge_utils.py +472 -0
  363. utils/llm.py +196 -0
  364. utils/logging.py +45 -0
  365. utils/mcp_helpers.py +232 -0
  366. utils/mcp_manager.py +235 -0
  367. utils/memory_guard.py +18 -0
  368. utils/red_teaming_sandbox.py +476 -0
  369. utils/reset_helpers.py +318 -0
  370. utils/resource_manager.py +370 -0
  371. utils/skill_helpers.py +447 -0
  372. utils/task_executor.py +904 -0
  373. utils/task_helpers.py +270 -0
  374. utils/template_helpers.py +179 -0
@@ -0,0 +1,476 @@
1
+ import os
2
+ import tempfile
3
+ import atexit
4
+ import shutil
5
+ from typing import Dict, Any, Optional
6
+ from pathlib import Path
7
+
8
+
9
+ def load_env_file(env_file_path: str) -> Dict[str, str]:
10
+ """
11
+ Load environment variables from a .env file.
12
+
13
+ Args:
14
+ env_file_path: Path to the .env file
15
+
16
+ Returns:
17
+ Dictionary of environment variables
18
+ """
19
+ env_vars = {}
20
+
21
+ if not os.path.exists(env_file_path):
22
+ return env_vars
23
+
24
+ try:
25
+ with open(env_file_path, 'r') as f:
26
+ for line in f:
27
+ line = line.strip()
28
+ # Skip empty lines and comments
29
+ if not line or line.startswith('#'):
30
+ continue
31
+
32
+ # Parse KEY=VALUE
33
+ if '=' in line:
34
+ key, value = line.split('=', 1)
35
+ key = key.strip()
36
+ value = value.strip()
37
+
38
+ # Remove quotes if present
39
+ if value.startswith('"') and value.endswith('"'):
40
+ value = value[1:-1]
41
+ elif value.startswith("'") and value.endswith("'"):
42
+ value = value[1:-1]
43
+
44
+ env_vars[key] = value
45
+
46
+ print(f"[DOCKER] Loaded {len(env_vars)} environment variables from {env_file_path}")
47
+ except Exception as e:
48
+ print(f"[DOCKER] Warning: Failed to load .env file: {e}")
49
+
50
+ return env_vars
51
+
52
+
53
+ class DockerExecutor:
54
+ """
55
+ Docker executor for running attack algorithm scripts in a persistent container.
56
+
57
+ Can either create a new container or attach to an existing one (for sharing
58
+ across parallel processes).
59
+ """
60
+
61
+ def __init__(
62
+ self,
63
+ image: str = "red-teaming-sandbox:latest",
64
+ skills_dir: str = "dt_arms/attack_skills/attack_algorithms",
65
+ timeout: int = 30,
66
+ memory_limit: str = "2g",
67
+ cpu_quota: int = 100000,
68
+ env_file: str = "dt_arms/docker/.env",
69
+ enable_gpu: bool = True,
70
+ container_id: Optional[str] = None, # Attach to existing container
71
+ ):
72
+ """
73
+ Initialize Docker executor with persistent container.
74
+
75
+ Args:
76
+ image: Docker image name
77
+ skills_dir: Path to attack_algorithms directory (will be mounted read-only)
78
+ timeout: Default timeout in seconds for command execution
79
+ memory_limit: Memory limit (e.g., "2g", "512m")
80
+ cpu_quota: CPU quota (100000 = 1 CPU)
81
+ env_file: Path to .env file with environment variables (default: "dt_arms/docker/.env")
82
+ enable_gpu: Whether to enable GPU access in container (default: True)
83
+ container_id: If provided, attach to existing container instead of creating new one
84
+ """
85
+ self.image = image
86
+ self.skills_dir = os.path.abspath(skills_dir)
87
+ self.timeout = timeout
88
+ self.memory_limit = memory_limit
89
+ self.cpu_quota = cpu_quota
90
+ self.enable_gpu = enable_gpu
91
+
92
+ # Load environment variables from .env file
93
+ self.env_vars = load_env_file(env_file)
94
+
95
+ # Docker client (lazy initialization)
96
+ self._docker_client = None
97
+
98
+ # Persistent container (created on first execute or attached)
99
+ self._container = None
100
+
101
+ # If container_id provided, we're attaching to an existing shared container
102
+ self._shared_container_id = container_id
103
+ self._owns_container = container_id is None # Only cleanup if we created it
104
+
105
+ # Workspace directory for temporary files
106
+ self._workspace_dir = None
107
+
108
+ # Register cleanup on program exit (Ctrl-C, normal exit, etc.)
109
+ # Only register if we own the container
110
+ if self._owns_container:
111
+ atexit.register(self.cleanup)
112
+
113
+ # Track if cleanup has been called
114
+ self._cleaned_up = False
115
+
116
+ @property
117
+ def docker_client(self):
118
+ """Lazy-load Docker client."""
119
+ if self._docker_client is None:
120
+ try:
121
+ import docker
122
+ self._docker_client = docker.from_env()
123
+ except Exception as e:
124
+ raise RuntimeError(
125
+ f"Failed to connect to Docker daemon: {e}\n"
126
+ "Make sure Docker is running."
127
+ )
128
+ return self._docker_client
129
+
130
+ @property
131
+ def container_id(self) -> Optional[str]:
132
+ """Get the container ID for sharing with other processes."""
133
+ if self._container is not None:
134
+ return self._container.id
135
+ return self._shared_container_id
136
+
137
+ def start_shared_container(self) -> str:
138
+ """
139
+ Start the container and return its ID for sharing with subprocesses.
140
+
141
+ This is useful for the orchestrator to create a container once,
142
+ then pass the ID to worker subprocesses.
143
+
144
+ Returns:
145
+ Container ID string
146
+ """
147
+ self._ensure_container()
148
+ return self._container.id
149
+
150
+ def __enter__(self):
151
+ """Context manager entry."""
152
+ return self
153
+
154
+ def __exit__(self, exc_type, exc_val, exc_tb):
155
+ """Context manager exit - automatic cleanup."""
156
+ self.cleanup()
157
+
158
+ def _ensure_container(self):
159
+ """
160
+ Ensure the persistent container is running.
161
+ Creates and starts it if it doesn't exist, or attaches to existing if container_id provided.
162
+ """
163
+ if self._container is not None:
164
+ # Check if container is still running
165
+ try:
166
+ self._container.reload()
167
+ if self._container.status == "running":
168
+ return # Container is running
169
+ else:
170
+ # Container stopped, restart it (only if we own it)
171
+ if self._owns_container:
172
+ print(f"[DOCKER] Container stopped, restarting...")
173
+ self._container.start()
174
+ return
175
+ except Exception as e:
176
+ # Container doesn't exist or error
177
+ if self._owns_container:
178
+ print(f"[DOCKER] Container error: {e}, recreating...")
179
+ self._container = None
180
+ else:
181
+ raise RuntimeError(f"Shared container {self._shared_container_id} not available: {e}")
182
+
183
+ # If attaching to existing container
184
+ if self._shared_container_id:
185
+ try:
186
+ self._container = self.docker_client.containers.get(self._shared_container_id)
187
+ self._container.reload()
188
+ if self._container.status != "running":
189
+ raise RuntimeError(f"Shared container {self._shared_container_id} is not running")
190
+ print(f"[DOCKER] Attached to shared container: {self._container.short_id}")
191
+ return
192
+ except Exception as e:
193
+ raise RuntimeError(f"Failed to attach to shared container {self._shared_container_id}: {e}")
194
+
195
+ # Create workspace directory if needed
196
+ if self._workspace_dir is None:
197
+ self._workspace_dir = tempfile.mkdtemp(prefix="arms_workspace_")
198
+
199
+ # Verify skills directory exists
200
+ if not os.path.exists(self.skills_dir):
201
+ raise RuntimeError(f"Skills directory not found: {self.skills_dir}")
202
+
203
+ # Create and start persistent container
204
+ print(f"[DOCKER] Creating persistent container...")
205
+ if self.env_vars:
206
+ print(f"[DOCKER] Passing {len(self.env_vars)} environment variables to container")
207
+ if self.enable_gpu:
208
+ print(f"[DOCKER] GPU access enabled")
209
+
210
+ try:
211
+ # Prepare container configuration
212
+ container_config = {
213
+ "image": self.image,
214
+ "command": ["tail", "-f", "/dev/null"], # Keep container running
215
+ "volumes": {
216
+ self.skills_dir: {
217
+ "bind": "/skills",
218
+ "mode": "ro" # Read-only
219
+ },
220
+ self._workspace_dir: {
221
+ "bind": "/workspace",
222
+ "mode": "rw" # Read-write
223
+ }
224
+ },
225
+ "working_dir": "/workspace",
226
+ "network_mode": "host",
227
+ "mem_limit": self.memory_limit,
228
+ "cpu_quota": self.cpu_quota,
229
+ "environment": self.env_vars, # Pass environment variables from .env file
230
+ "detach": True,
231
+ "remove": False,
232
+ "stdout": True,
233
+ "stderr": True,
234
+ }
235
+
236
+ # Add GPU support if enabled
237
+ if self.enable_gpu:
238
+ container_config["device_requests"] = [
239
+ {
240
+ "driver": "nvidia",
241
+ "count": -1, # -1 means all GPUs
242
+ "capabilities": [["gpu", "utility", "compute"]],
243
+ }
244
+ ]
245
+
246
+ self._container = self.docker_client.containers.run(**container_config)
247
+ print(f"[DOCKER] Container created: {self._container.short_id}")
248
+ except Exception as e:
249
+ error_msg = str(e)
250
+ if self.enable_gpu and ("could not select device driver" in error_msg.lower() or "nvidia" in error_msg.lower()):
251
+ print(f"[DOCKER] Warning: GPU access failed, falling back to CPU-only mode")
252
+ print(f"[DOCKER] GPU Error: {error_msg}")
253
+ # Retry without GPU
254
+ if "device_requests" in container_config:
255
+ del container_config["device_requests"]
256
+ self._container = self.docker_client.containers.run(**container_config)
257
+ print(f"[DOCKER] Container created (CPU-only): {self._container.short_id}")
258
+ else:
259
+ raise RuntimeError(f"Failed to create Docker container: {e}")
260
+
261
+ def execute(
262
+ self,
263
+ skill_name: str,
264
+ command: str,
265
+ timeout: Optional[int] = None,
266
+ ) -> Dict[str, Any]:
267
+ """
268
+ Execute a command in the persistent Docker container within the skill's directory.
269
+
270
+ Args:
271
+ skill_name: Name of the skill (e.g., "emoji-attack")
272
+ command: Command to execute (e.g., 'python run_attack.py "text"')
273
+ timeout: Timeout in seconds (overrides default if provided)
274
+
275
+ Returns:
276
+ Dictionary with:
277
+ - stdout: Command output
278
+ - stderr: Error output
279
+ - exit_code: Exit code (0 = success)
280
+ - error: Error message if execution failed
281
+ - timeout: Whether execution timed out
282
+ """
283
+ timeout = timeout or self.timeout
284
+
285
+ # Verify skills directory exists
286
+ if not os.path.exists(self.skills_dir):
287
+ return {
288
+ "stdout": "",
289
+ "stderr": f"Skills directory not found: {self.skills_dir}",
290
+ "exit_code": -1,
291
+ "error": f"Skills directory not found: {self.skills_dir}",
292
+ "timeout": False
293
+ }
294
+
295
+ # Skill directory path
296
+ skill_dir = Path(self.skills_dir) / skill_name
297
+ if not skill_dir.exists():
298
+ return {
299
+ "stdout": "",
300
+ "stderr": f"Skill directory not found: {skill_dir}",
301
+ "exit_code": -1,
302
+ "error": f"Skill directory not found: {skill_dir}",
303
+ "timeout": False
304
+ }
305
+
306
+ try:
307
+ # Ensure persistent container is running
308
+ self._ensure_container()
309
+
310
+ # Execute command in the running container
311
+ # Use bash to cd into skill directory and run command
312
+ exec_command = f"cd /skills/{skill_name} && {command}"
313
+
314
+ # Use demux=True to separate stdout and stderr
315
+ exit_code, output = self._container.exec_run(
316
+ cmd=["bash", "-c", exec_command],
317
+ stdout=True,
318
+ stderr=True,
319
+ demux=True,
320
+ workdir=f"/skills/{skill_name}",
321
+ )
322
+
323
+ # output is a tuple of (stdout_bytes, stderr_bytes) when demux=True
324
+ stdout_bytes, stderr_bytes = output if output else (b"", b"")
325
+ stdout = stdout_bytes.decode('utf-8') if stdout_bytes else ""
326
+ stderr = stderr_bytes.decode('utf-8') if stderr_bytes else ""
327
+
328
+ return {
329
+ "stdout": stdout,
330
+ "stderr": stderr,
331
+ "exit_code": exit_code,
332
+ "error": None if exit_code == 0 else f"Command failed with exit code {exit_code}",
333
+ "timeout": False
334
+ }
335
+
336
+ except Exception as e:
337
+ error_type = type(e).__name__
338
+ error_msg = str(e)
339
+
340
+ # Check if timeout
341
+ if "timeout" in error_msg.lower() or error_type in ["ReadTimeout", "TimeoutError"]:
342
+ return {
343
+ "stdout": "",
344
+ "stderr": f"Command execution timeout ({timeout}s)",
345
+ "exit_code": -1,
346
+ "error": f"Command execution timeout ({timeout}s)",
347
+ "timeout": True
348
+ }
349
+
350
+ # Check if Docker image not found
351
+ if "not found" in error_msg.lower() and "image" in error_msg.lower():
352
+ return {
353
+ "stdout": "",
354
+ "stderr": f"Docker image '{self.image}' not found. Please build it first.",
355
+ "exit_code": -1,
356
+ "error": f"Docker image '{self.image}' not found",
357
+ "timeout": False
358
+ }
359
+
360
+ # General error - fall back to subprocess
361
+ print(f"[DOCKER] Warning: Docker execution failed ({error_type}: {error_msg}), falling back to subprocess")
362
+ return self._execute_subprocess(skill_name, command, timeout)
363
+
364
+ def _execute_subprocess(
365
+ self,
366
+ skill_name: str,
367
+ command: str,
368
+ timeout: Optional[int] = None,
369
+ ) -> Dict[str, Any]:
370
+ """
371
+ Fallback to subprocess execution (for development without Docker).
372
+
373
+ This is the same as the current ExecuteSkillNode implementation.
374
+ """
375
+ import subprocess
376
+
377
+ timeout = timeout or self.timeout
378
+ skill_dir = Path(self.skills_dir) / skill_name
379
+
380
+ if not skill_dir.exists():
381
+ return {
382
+ "stdout": "",
383
+ "stderr": f"Skill directory not found: {skill_dir}",
384
+ "exit_code": -1,
385
+ "error": f"Skill directory not found: {skill_dir}",
386
+ "timeout": False
387
+ }
388
+
389
+ try:
390
+ result = subprocess.run(
391
+ command,
392
+ shell=True,
393
+ cwd=str(skill_dir),
394
+ capture_output=True,
395
+ text=True,
396
+ timeout=timeout,
397
+ env={**os.environ}
398
+ )
399
+
400
+ return {
401
+ "stdout": result.stdout,
402
+ "stderr": result.stderr,
403
+ "exit_code": result.returncode,
404
+ "error": None,
405
+ "timeout": False
406
+ }
407
+
408
+ except subprocess.TimeoutExpired:
409
+ return {
410
+ "stdout": "",
411
+ "stderr": f"Command execution timeout ({timeout}s)",
412
+ "exit_code": -1,
413
+ "error": f"Command execution timeout ({timeout}s)",
414
+ "timeout": True
415
+ }
416
+ except Exception as e:
417
+ return {
418
+ "stdout": "",
419
+ "stderr": str(e),
420
+ "exit_code": -1,
421
+ "error": f"Execution failed: {e}",
422
+ "timeout": False
423
+ }
424
+
425
+ def cleanup(self):
426
+ """
427
+ Clean up Docker resources.
428
+
429
+ This is called automatically:
430
+ - When program exits (via atexit)
431
+ - When using context manager (via __exit__)
432
+ - Can also be called manually
433
+
434
+ Safe to call multiple times (idempotent).
435
+ Only cleans up container if we own it (not attached to shared container).
436
+ """
437
+ # Prevent duplicate cleanup
438
+ if self._cleaned_up:
439
+ return
440
+
441
+ self._cleaned_up = True
442
+
443
+ # Stop and remove persistent container
444
+ if self._container is not None and self._owns_container:
445
+ try:
446
+ print(f"[DOCKER] Stopping container {self._container.short_id}...")
447
+ self._container.stop(timeout=5)
448
+ self._container.remove()
449
+ print(f"[DOCKER] Container removed")
450
+ except Exception as e:
451
+ print(f"[DOCKER] Warning: Failed to remove container: {e}")
452
+ self._container = None
453
+
454
+ # Clean up workspace directory
455
+ if self._workspace_dir and os.path.exists(self._workspace_dir) and self._owns_container:
456
+ try:
457
+ shutil.rmtree(self._workspace_dir)
458
+ print(f"[DOCKER] Cleaned up workspace: {self._workspace_dir}")
459
+ except Exception as e:
460
+ print(f"[DOCKER] Warning: Failed to clean up workspace: {e}")
461
+ self._workspace_dir = None
462
+
463
+ # Close Docker client
464
+ if self._docker_client:
465
+ try:
466
+ self._docker_client.close()
467
+ except Exception as e:
468
+ print(f"[DOCKER] Warning: Failed to close Docker client: {e}")
469
+ self._docker_client = None
470
+
471
+ def __del__(self):
472
+ """Destructor - ensure cleanup."""
473
+ # Note: atexit is more reliable than __del__
474
+ # This is just a backup
475
+ if not self._cleaned_up:
476
+ self.cleanup()