decodingtrust-agent-sdk 0.1.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (374) hide show
  1. agent/__init__.py +30 -0
  2. agent/claudesdk/__init__.py +8 -0
  3. agent/claudesdk/example.py +221 -0
  4. agent/claudesdk/src/__init__.py +8 -0
  5. agent/claudesdk/src/agent.py +400 -0
  6. agent/claudesdk/src/mcp_proxy.py +409 -0
  7. agent/claudesdk/src/utils.py +420 -0
  8. agent/googleadk/__init__.py +15 -0
  9. agent/googleadk/example.py +237 -0
  10. agent/googleadk/src/__init__.py +12 -0
  11. agent/googleadk/src/agent.py +401 -0
  12. agent/googleadk/src/mcp_wrapper.py +163 -0
  13. agent/googleadk/src/utils.py +602 -0
  14. agent/langchain/__init__.py +8 -0
  15. agent/langchain/example.py +213 -0
  16. agent/langchain/src/__init__.py +8 -0
  17. agent/langchain/src/agent.py +645 -0
  18. agent/langchain/src/utils.py +433 -0
  19. agent/openaisdk/__init__.py +17 -0
  20. agent/openaisdk/example.py +228 -0
  21. agent/openaisdk/src/__init__.py +12 -0
  22. agent/openaisdk/src/agent.py +491 -0
  23. agent/openaisdk/src/agent_wrapper.py +143 -0
  24. agent/openaisdk/src/mcp_wrapper.py +395 -0
  25. agent/openaisdk/src/utils.py +493 -0
  26. agent/openclaw/__init__.py +10 -0
  27. agent/openclaw/example.py +251 -0
  28. agent/openclaw/src/__init__.py +14 -0
  29. agent/openclaw/src/agent.py +930 -0
  30. agent/openclaw/src/helpers/__init__.py +1 -0
  31. agent/openclaw/src/helpers/auth_helpers.py +55 -0
  32. agent/openclaw/src/mcp_proxy.py +564 -0
  33. agent/openclaw/src/plugin_generator.py +231 -0
  34. agent/openclaw/src/utils.py +341 -0
  35. agent/pocketflow/__init__.py +18 -0
  36. agent/pocketflow/example.py +221 -0
  37. agent/pocketflow/prompts/react_agent.py +46 -0
  38. agent/pocketflow/src/__init__.py +6 -0
  39. agent/pocketflow/src/agent.py +507 -0
  40. agent/pocketflow/src/agent_wrapper.py +159 -0
  41. agent/pocketflow/src/async_helper.py +92 -0
  42. agent/pocketflow/src/mcp_react_agent.py +279 -0
  43. agent/pocketflow/src/native_agent.py +74 -0
  44. agent/pocketflow/src/nodes.py +467 -0
  45. benchmark/__init__.py +0 -0
  46. benchmark/browser/benign.jsonl +34 -0
  47. benchmark/browser/direct.jsonl +85 -0
  48. benchmark/browser/indirect.jsonl +82 -0
  49. benchmark/code/benign.jsonl +0 -0
  50. benchmark/code/direct.jsonl +121 -0
  51. benchmark/code/indirect.jsonl +165 -0
  52. benchmark/crm/benign.jsonl +165 -0
  53. benchmark/crm/direct.jsonl +90 -0
  54. benchmark/crm/indirect.jsonl +150 -0
  55. benchmark/customer-service/benign.jsonl +160 -0
  56. benchmark/customer-service/direct.jsonl +100 -0
  57. benchmark/customer-service/indirect.jsonl +101 -0
  58. benchmark/finance/benign.jsonl +0 -0
  59. benchmark/finance/direct.jsonl +200 -0
  60. benchmark/finance/indirect.jsonl +200 -0
  61. benchmark/legal/benign.jsonl +0 -0
  62. benchmark/legal/direct.jsonl +200 -0
  63. benchmark/legal/indirect.jsonl +200 -0
  64. benchmark/macos/benign.jsonl +30 -0
  65. benchmark/macos/direct.jsonl +50 -0
  66. benchmark/macos/indirect.jsonl +50 -0
  67. benchmark/medical/benign.jsonl +642 -0
  68. benchmark/medical/direct.jsonl +229 -0
  69. benchmark/medical/indirect.jsonl +222 -0
  70. benchmark/os-filesystem/benign.jsonl +200 -0
  71. benchmark/os-filesystem/direct.jsonl +200 -0
  72. benchmark/os-filesystem/indirect.jsonl +200 -0
  73. benchmark/research/benign.jsonl +0 -0
  74. benchmark/research/direct.jsonl +119 -0
  75. benchmark/research/indirect.jsonl +125 -0
  76. benchmark/telecom/benign.jsonl +120 -0
  77. benchmark/telecom/direct.jsonl +161 -0
  78. benchmark/telecom/indirect.jsonl +166 -0
  79. benchmark/travel/benign.jsonl +130 -0
  80. benchmark/travel/direct.jsonl +105 -0
  81. benchmark/travel/indirect.jsonl +120 -0
  82. benchmark/windows/benign.jsonl +100 -0
  83. benchmark/windows/direct.jsonl +140 -0
  84. benchmark/windows/indirect.jsonl +107 -0
  85. benchmark/workflow/benign.jsonl +335 -0
  86. benchmark/workflow/direct.jsonl +78 -0
  87. benchmark/workflow/indirect.jsonl +107 -0
  88. cli/__init__.py +5 -0
  89. cli/main.py +182 -0
  90. cli/scaffold.py +334 -0
  91. decodingtrust_agent_sdk-0.1.0.dist-info/METADATA +642 -0
  92. decodingtrust_agent_sdk-0.1.0.dist-info/RECORD +374 -0
  93. decodingtrust_agent_sdk-0.1.0.dist-info/WHEEL +5 -0
  94. decodingtrust_agent_sdk-0.1.0.dist-info/entry_points.txt +2 -0
  95. decodingtrust_agent_sdk-0.1.0.dist-info/licenses/LICENSE +201 -0
  96. decodingtrust_agent_sdk-0.1.0.dist-info/top_level.txt +6 -0
  97. dt_arena/config/env.yaml +515 -0
  98. dt_arena/config/injection_mcp.yaml +430 -0
  99. dt_arena/config/mcp.yaml +642 -0
  100. dt_arena/envs/arxiv/docker-compose-hub.yml +31 -0
  101. dt_arena/envs/arxiv/docker-compose.yml +36 -0
  102. dt_arena/envs/atlassian/docker/docker-compose.dev.yml +65 -0
  103. dt_arena/envs/atlassian/docker/docker-compose.yml +53 -0
  104. dt_arena/envs/atlassian/docker-compose-hub.yml +57 -0
  105. dt_arena/envs/atlassian/docker-compose.yml +72 -0
  106. dt_arena/envs/bigquery/docker-compose.yml +20 -0
  107. dt_arena/envs/booking/docker-compose.yml +59 -0
  108. dt_arena/envs/calendar/docker-compose-hub.yml +30 -0
  109. dt_arena/envs/calendar/docker-compose.yml +42 -0
  110. dt_arena/envs/custom-website/docker-compose.yml +6 -0
  111. dt_arena/envs/customer_service/docker-compose.yml +59 -0
  112. dt_arena/envs/databricks/docker-compose-hub.yml +47 -0
  113. dt_arena/envs/databricks/docker-compose.yml +51 -0
  114. dt_arena/envs/ecommerce/docker-compose.yml +6 -0
  115. dt_arena/envs/ers/docker-compose.yml +36 -0
  116. dt_arena/envs/ers/hrms/docker/docker-compose.yml +31 -0
  117. dt_arena/envs/finance/docker-compose.yml +23 -0
  118. dt_arena/envs/github/docker/docker-compose-hub.yml +50 -0
  119. dt_arena/envs/github/docker/docker-compose.yml +50 -0
  120. dt_arena/envs/gmail/docker-compose-hub.yml +51 -0
  121. dt_arena/envs/gmail/docker-compose.yml +65 -0
  122. dt_arena/envs/google-form/docker-compose-hub.yml +33 -0
  123. dt_arena/envs/google-form/docker-compose.yml +41 -0
  124. dt_arena/envs/googledocs/docker-compose-hub.yml +61 -0
  125. dt_arena/envs/googledocs/docker-compose.yml +78 -0
  126. dt_arena/envs/hospital/docker-compose-hub.yml +25 -0
  127. dt_arena/envs/hospital/docker-compose.yml +27 -0
  128. dt_arena/envs/legal/docker-compose.yml +22 -0
  129. dt_arena/envs/linkedin/docker-compose.yml +63 -0
  130. dt_arena/envs/macos/docker-compose.yml +79 -0
  131. dt_arena/envs/os-filesystem/docker-compose-hub.yml +16 -0
  132. dt_arena/envs/os-filesystem/docker-compose.yml +20 -0
  133. dt_arena/envs/paypal/docker-compose-hub.yml +48 -0
  134. dt_arena/envs/paypal/docker-compose.yml +63 -0
  135. dt_arena/envs/research/docker-compose-hub.yml +13 -0
  136. dt_arena/envs/research/docker-compose.yml +24 -0
  137. dt_arena/envs/salesforce_crm/docker-compose-hub.yaml +45 -0
  138. dt_arena/envs/salesforce_crm/docker-compose.yaml +49 -0
  139. dt_arena/envs/slack/docker-compose-hub.yml +28 -0
  140. dt_arena/envs/slack/docker-compose.yml +41 -0
  141. dt_arena/envs/snowflake/docker-compose-hub.yml +41 -0
  142. dt_arena/envs/snowflake/docker-compose.yml +44 -0
  143. dt_arena/envs/telecom/docker-compose-hub.yml +16 -0
  144. dt_arena/envs/telecom/docker-compose.yml +17 -0
  145. dt_arena/envs/telegram/docker-compose-hub.yml +57 -0
  146. dt_arena/envs/telegram/docker-compose.yml +62 -0
  147. dt_arena/envs/terminal/docker-compose-hub.yml +12 -0
  148. dt_arena/envs/terminal/docker-compose.yml +26 -0
  149. dt_arena/envs/travel/docker-compose-hub.yml +19 -0
  150. dt_arena/envs/travel/docker-compose.yml +19 -0
  151. dt_arena/envs/whatsapp/docker-compose-hub.yml +61 -0
  152. dt_arena/envs/whatsapp/docker-compose.yml +78 -0
  153. dt_arena/envs/windows/docker-compose.yml +71 -0
  154. dt_arena/envs/zoom/docker-compose-hub.yml +27 -0
  155. dt_arena/envs/zoom/docker-compose.yml +40 -0
  156. dt_arena/injection_mcp_server/atlassian/env_injection.py +134 -0
  157. dt_arena/injection_mcp_server/calendar/env_injection.py +217 -0
  158. dt_arena/injection_mcp_server/custom_website/env_injection.py +97 -0
  159. dt_arena/injection_mcp_server/customer_service/env_injection.py +659 -0
  160. dt_arena/injection_mcp_server/databricks/env_injection.py +255 -0
  161. dt_arena/injection_mcp_server/ecommerce/env_injection.py +110 -0
  162. dt_arena/injection_mcp_server/finance/env_injection.py +85 -0
  163. dt_arena/injection_mcp_server/github/env_injection.py +206 -0
  164. dt_arena/injection_mcp_server/gmail/env_injection.py +211 -0
  165. dt_arena/injection_mcp_server/google_form/env_injection.py +186 -0
  166. dt_arena/injection_mcp_server/googledocs/env_injection.py +44 -0
  167. dt_arena/injection_mcp_server/hospital/env_injection.py +43 -0
  168. dt_arena/injection_mcp_server/legal/env_injection.py +229 -0
  169. dt_arena/injection_mcp_server/macos/env_injection.py +272 -0
  170. dt_arena/injection_mcp_server/os-filesystem/env_injection.py +341 -0
  171. dt_arena/injection_mcp_server/paypal/env_injection.py +268 -0
  172. dt_arena/injection_mcp_server/research/env_injection.py +616 -0
  173. dt_arena/injection_mcp_server/salesforce/env_injection.py +514 -0
  174. dt_arena/injection_mcp_server/slack/env_injection.py +265 -0
  175. dt_arena/injection_mcp_server/snowflake/env_injection.py +230 -0
  176. dt_arena/injection_mcp_server/telecom/env_injection.py +503 -0
  177. dt_arena/injection_mcp_server/telegram/env_injection.py +171 -0
  178. dt_arena/injection_mcp_server/terminal/env_injection.py +523 -0
  179. dt_arena/injection_mcp_server/travel/env_injection.py +173 -0
  180. dt_arena/injection_mcp_server/whatsapp/env_injection.py +185 -0
  181. dt_arena/injection_mcp_server/windows/env_injection.py +943 -0
  182. dt_arena/injection_mcp_server/zoom/env_injection.py +216 -0
  183. dt_arena/mcp_server/atlassian/main.py +1554 -0
  184. dt_arena/mcp_server/atlassian/test_server.py +66 -0
  185. dt_arena/mcp_server/bigquery/main.py +333 -0
  186. dt_arena/mcp_server/booking/main.py +310 -0
  187. dt_arena/mcp_server/browser/main.py +1741 -0
  188. dt_arena/mcp_server/calendar/example_multi_user.py +162 -0
  189. dt_arena/mcp_server/calendar/main.py +792 -0
  190. dt_arena/mcp_server/calendar/test_mcp.py +135 -0
  191. dt_arena/mcp_server/customer_service/main.py +1063 -0
  192. dt_arena/mcp_server/databricks/main.py +566 -0
  193. dt_arena/mcp_server/databricks/probe.py +102 -0
  194. dt_arena/mcp_server/ers/main.py +845 -0
  195. dt_arena/mcp_server/finance/__init__.py +87 -0
  196. dt_arena/mcp_server/finance/core/__init__.py +12 -0
  197. dt_arena/mcp_server/finance/core/data_loader.py +558 -0
  198. dt_arena/mcp_server/finance/core/portfolio.py +565 -0
  199. dt_arena/mcp_server/finance/evaluation/__init__.py +20 -0
  200. dt_arena/mcp_server/finance/evaluation/evaluator.py +217 -0
  201. dt_arena/mcp_server/finance/evaluation/logger.py +137 -0
  202. dt_arena/mcp_server/finance/injection/__init__.py +66 -0
  203. dt_arena/mcp_server/finance/injection/config.py +176 -0
  204. dt_arena/mcp_server/finance/injection/content.py +755 -0
  205. dt_arena/mcp_server/finance/injection/html.py +409 -0
  206. dt_arena/mcp_server/finance/injection/locations.py +167 -0
  207. dt_arena/mcp_server/finance/injection/methods.py +193 -0
  208. dt_arena/mcp_server/finance/injection/presets.py +1023 -0
  209. dt_arena/mcp_server/finance/main.py +361 -0
  210. dt_arena/mcp_server/finance/run_mcp.py +21 -0
  211. dt_arena/mcp_server/finance/run_web.py +26 -0
  212. dt_arena/mcp_server/finance/server/__init__.py +41 -0
  213. dt_arena/mcp_server/finance/server/extractor.py +1453 -0
  214. dt_arena/mcp_server/finance/server/extractor_minimal.py +292 -0
  215. dt_arena/mcp_server/finance/server/extractor_simple.py +1164 -0
  216. dt_arena/mcp_server/finance/server/injection_mcp.py +865 -0
  217. dt_arena/mcp_server/finance/server/mcp.py +451 -0
  218. dt_arena/mcp_server/finance/server/tools/__init__.py +23 -0
  219. dt_arena/mcp_server/finance/server/tools/account.py +88 -0
  220. dt_arena/mcp_server/finance/server/tools/browsing.py +328 -0
  221. dt_arena/mcp_server/finance/server/tools/social.py +73 -0
  222. dt_arena/mcp_server/finance/server/tools/trading.py +242 -0
  223. dt_arena/mcp_server/finance/server/tools/utility.py +49 -0
  224. dt_arena/mcp_server/finance/server/web.py +2139 -0
  225. dt_arena/mcp_server/finance/tasks/benchmark/__init__.py +28 -0
  226. dt_arena/mcp_server/finance/tasks/benchmark/attack_pool.py +3026 -0
  227. dt_arena/mcp_server/finance/tasks/benchmark/attack_runner.py +1315 -0
  228. dt_arena/mcp_server/finance/tasks/benchmark/finra_requirements.py +1335 -0
  229. dt_arena/mcp_server/finance/tasks/benchmark/finra_tasks.py +3665 -0
  230. dt_arena/mcp_server/finance/tasks/benchmark/malicious_tasks.py +2673 -0
  231. dt_arena/mcp_server/finance/tasks/redteam_suite/run_redteam_suite.py +1713 -0
  232. dt_arena/mcp_server/finance/test_mcp_tools.py +476 -0
  233. dt_arena/mcp_server/github/main.py +441 -0
  234. dt_arena/mcp_server/gmail/main.py +1004 -0
  235. dt_arena/mcp_server/google_form/main.py +141 -0
  236. dt_arena/mcp_server/googledocs/main.py +458 -0
  237. dt_arena/mcp_server/hospital/mcp_server.py +458 -0
  238. dt_arena/mcp_server/legal/__init__.py +9 -0
  239. dt_arena/mcp_server/legal/core/__init__.py +14 -0
  240. dt_arena/mcp_server/legal/core/courtlistener_store.py +762 -0
  241. dt_arena/mcp_server/legal/core/data_loader.py +266 -0
  242. dt_arena/mcp_server/legal/core/document_store.py +197 -0
  243. dt_arena/mcp_server/legal/core/matter_manager.py +466 -0
  244. dt_arena/mcp_server/legal/main.py +89 -0
  245. dt_arena/mcp_server/legal/scripts/collect_data.py +988 -0
  246. dt_arena/mcp_server/legal/server/__init__.py +14 -0
  247. dt_arena/mcp_server/legal/server/mcp.py +2330 -0
  248. dt_arena/mcp_server/macos/client_test.py +270 -0
  249. dt_arena/mcp_server/macos/mcp_server.py +285 -0
  250. dt_arena/mcp_server/os-filesystem/main.py +1380 -0
  251. dt_arena/mcp_server/paypal/main.py +501 -0
  252. dt_arena/mcp_server/research/main.py +777 -0
  253. dt_arena/mcp_server/salesforce/main.py +2006 -0
  254. dt_arena/mcp_server/slack/main.py +318 -0
  255. dt_arena/mcp_server/snowflake/main.py +612 -0
  256. dt_arena/mcp_server/snowflake/probe.py +183 -0
  257. dt_arena/mcp_server/telecom/mcp_client.py +423 -0
  258. dt_arena/mcp_server/telecom/mcp_server.py +1059 -0
  259. dt_arena/mcp_server/telegram/main.py +338 -0
  260. dt_arena/mcp_server/terminal/main.py +163 -0
  261. dt_arena/mcp_server/travel/client_test.py +16 -0
  262. dt_arena/mcp_server/travel/mcp_server.py +404 -0
  263. dt_arena/mcp_server/whatsapp/main.py +318 -0
  264. dt_arena/mcp_server/windows/client_test.py +270 -0
  265. dt_arena/mcp_server/windows/mcp_server.py +218 -0
  266. dt_arena/mcp_server/zoom/main.py +466 -0
  267. dt_arena/src/__init__.py +0 -0
  268. dt_arena/src/hooks/__init__.py +0 -0
  269. dt_arena/src/hooks/audit_log.py +30 -0
  270. dt_arena/src/hooks/hooks.json +3 -0
  271. dt_arena/src/run_benign.py +142 -0
  272. dt_arena/src/types/__init__.py +0 -0
  273. dt_arena/src/types/agent.py +441 -0
  274. dt_arena/src/types/attacks.py +2 -0
  275. dt_arena/src/types/environment.py +2 -0
  276. dt_arena/src/types/hooks.py +174 -0
  277. dt_arena/src/types/judge.py +52 -0
  278. dt_arena/src/types/red_teaming_trajectory.py +385 -0
  279. dt_arena/src/types/task.py +260 -0
  280. dt_arena/src/types/trajectory.py +315 -0
  281. dt_arena/utils/__init__.py +1 -0
  282. dt_arena/utils/atlassian/__init__.py +27 -0
  283. dt_arena/utils/atlassian/helpers.py +520 -0
  284. dt_arena/utils/bigquery/__init__.py +1 -0
  285. dt_arena/utils/bigquery/helpers.py +246 -0
  286. dt_arena/utils/calendar/__init__.py +1 -0
  287. dt_arena/utils/calendar/helpers.py +87 -0
  288. dt_arena/utils/customer_service/__init__.py +17 -0
  289. dt_arena/utils/customer_service/cs_env_client.py +940 -0
  290. dt_arena/utils/customer_service/helpers.py +339 -0
  291. dt_arena/utils/customer_service/judges/__init__.py +20 -0
  292. dt_arena/utils/customer_service/judges/abusive_comms.py +149 -0
  293. dt_arena/utils/customer_service/judges/bulk_destructive.py +202 -0
  294. dt_arena/utils/customer_service/judges/cross_customer.py +481 -0
  295. dt_arena/utils/customer_service/judges/email_exfiltration.py +320 -0
  296. dt_arena/utils/customer_service/judges/financial_fraud.py +221 -0
  297. dt_arena/utils/customer_service/judges/llm_utils.py +264 -0
  298. dt_arena/utils/customer_service/judges/pii_disclosure.py +837 -0
  299. dt_arena/utils/customer_service/judges/policy_misrep.py +44 -0
  300. dt_arena/utils/customer_service/judges/text_utils.py +21 -0
  301. dt_arena/utils/databricks/__init__.py +2 -0
  302. dt_arena/utils/databricks/helpers.py +210 -0
  303. dt_arena/utils/finance/__init__.py +0 -0
  304. dt_arena/utils/finance/helpers.py +263 -0
  305. dt_arena/utils/github/__init__.py +1 -0
  306. dt_arena/utils/github/helpers.py +249 -0
  307. dt_arena/utils/gmail/__init__.py +1 -0
  308. dt_arena/utils/gmail/helpers.py +344 -0
  309. dt_arena/utils/google_form/__init__.py +2 -0
  310. dt_arena/utils/google_form/helpers.py +133 -0
  311. dt_arena/utils/legal/__init__.py +0 -0
  312. dt_arena/utils/legal/helpers.py +228 -0
  313. dt_arena/utils/macos/__init__.py +0 -0
  314. dt_arena/utils/macos/env_setup.py +215 -0
  315. dt_arena/utils/macos/helpers.py +61 -0
  316. dt_arena/utils/os_filesystem/__init__.py +1 -0
  317. dt_arena/utils/os_filesystem/helpers.py +366 -0
  318. dt_arena/utils/paypal/__init__.py +1 -0
  319. dt_arena/utils/paypal/helpers.py +178 -0
  320. dt_arena/utils/port_allocator.py +266 -0
  321. dt_arena/utils/research/__init__.py +0 -0
  322. dt_arena/utils/research/helpers.py +251 -0
  323. dt_arena/utils/salesforce/__init__.py +1 -0
  324. dt_arena/utils/salesforce/helpers.py +719 -0
  325. dt_arena/utils/slack/__init__.py +1 -0
  326. dt_arena/utils/slack/helpers.py +176 -0
  327. dt_arena/utils/snowflake/__init__.py +1 -0
  328. dt_arena/utils/snowflake/helpers.py +166 -0
  329. dt_arena/utils/telecom/__init__.py +1 -0
  330. dt_arena/utils/telecom/helpers.py +760 -0
  331. dt_arena/utils/telegram/__init__.py +0 -0
  332. dt_arena/utils/telegram/helpers.py +174 -0
  333. dt_arena/utils/terminal/__init__.py +0 -0
  334. dt_arena/utils/terminal/helpers.py +20 -0
  335. dt_arena/utils/travel/__init__.py +0 -0
  336. dt_arena/utils/travel/env_client.py +537 -0
  337. dt_arena/utils/travel/llm_judge.py +137 -0
  338. dt_arena/utils/travel/prompts.py +64 -0
  339. dt_arena/utils/utils/__init__.py +122 -0
  340. dt_arena/utils/whatsapp/__init__.py +0 -0
  341. dt_arena/utils/whatsapp/helpers.py +226 -0
  342. dt_arena/utils/windows/__init__.py +0 -0
  343. dt_arena/utils/windows/env_reset.py +224 -0
  344. dt_arena/utils/windows/env_setup.py +280 -0
  345. dt_arena/utils/windows/exfil_helpers.py +170 -0
  346. dt_arena/utils/windows/helpers.py +74 -0
  347. dt_arena/utils/zoom/__init__.py +1 -0
  348. dt_arena/utils/zoom/helpers.py +70 -0
  349. eval/__init__.py +1 -0
  350. eval/evaluation.py +426 -0
  351. eval/task_runner.py +449 -0
  352. utils/__init__.py +148 -0
  353. utils/agent_helpers.py +308 -0
  354. utils/agent_wrapper.py +189 -0
  355. utils/compose_utils.py +135 -0
  356. utils/config.py +77 -0
  357. utils/env_helpers.py +104 -0
  358. utils/eval_stats.py +88 -0
  359. utils/injection_helpers.py +429 -0
  360. utils/injection_mcp_helpers.py +152 -0
  361. utils/judge_helpers.py +181 -0
  362. utils/judge_utils.py +472 -0
  363. utils/llm.py +196 -0
  364. utils/logging.py +45 -0
  365. utils/mcp_helpers.py +232 -0
  366. utils/mcp_manager.py +235 -0
  367. utils/memory_guard.py +18 -0
  368. utils/red_teaming_sandbox.py +476 -0
  369. utils/reset_helpers.py +318 -0
  370. utils/resource_manager.py +370 -0
  371. utils/skill_helpers.py +447 -0
  372. utils/task_executor.py +904 -0
  373. utils/task_helpers.py +270 -0
  374. utils/template_helpers.py +179 -0
utils/skill_helpers.py ADDED
@@ -0,0 +1,447 @@
1
+ import os
2
+ import yaml
3
+ import shutil
4
+ import tempfile
5
+ import fnmatch
6
+ from pathlib import Path
7
+ from typing import Dict, List, Optional, Any
8
+
9
+ from dt_arena.src.types.agent import SkillInjection
10
+
11
+ def load_skill_metadata(skill_path: str) -> Dict[str, str]:
12
+ """
13
+ Load metadata from a SKILL.md file.
14
+
15
+ Metadata is enclosed between --- markers at the top of the file.
16
+
17
+ Args:
18
+ skill_path: Path to SKILL.md file
19
+
20
+ Returns:
21
+ Dictionary with metadata fields (name, description, etc.) and raw_metadata_text
22
+ """
23
+ with open(skill_path, 'r', encoding='utf-8') as f:
24
+ content = f.read()
25
+
26
+ # Extract metadata between --- markers
27
+ if content.startswith('---'):
28
+ parts = content.split('---', 2)
29
+ if len(parts) >= 3:
30
+ metadata_str = parts[1].strip()
31
+ metadata = {}
32
+ for line in metadata_str.split('\n'):
33
+ if ':' in line:
34
+ key, value = line.split(':', 1)
35
+ metadata[key.strip()] = value.strip()
36
+ # Store the raw metadata text for display
37
+ metadata['raw_metadata_text'] = metadata_str
38
+ return metadata
39
+
40
+ return {}
41
+
42
+ def load_skill_full_content(skill_path: str) -> str:
43
+ """
44
+ Load full content from a SKILL.md file.
45
+
46
+ Args:
47
+ skill_path: Path to SKILL.md file
48
+
49
+ Returns:
50
+ Full content of the skill file
51
+ """
52
+ with open(skill_path, 'r', encoding='utf-8') as f:
53
+ return f.read()
54
+
55
+ def scan_available_skills(
56
+ skills_dir: str,
57
+ allowed_skills: List[str] = None,
58
+ skill_types: List[str] = None,
59
+ ) -> List[Dict[str, Any]]:
60
+ """
61
+ Scan all SKILL.md files in the attack_skills directory.
62
+
63
+ Args:
64
+ skills_dir: Path to attack_skills directory
65
+ allowed_skills: Optional list of skill names to include. If None, all skills are included.
66
+ Supports glob patterns (e.g., "gmail-*" matches all gmail skills).
67
+ skill_types: Optional list of skill types to include.
68
+ If None, all types are included.
69
+
70
+ Returns:
71
+ List of skill metadata dictionaries with added 'path' field
72
+ """
73
+
74
+ skills = []
75
+ skills_path = Path(skills_dir)
76
+
77
+ if not skills_path.exists():
78
+ return skills
79
+
80
+ # Find all SKILL.md files
81
+ for skill_file in skills_path.rglob('SKILL.md'):
82
+ metadata = load_skill_metadata(str(skill_file))
83
+ if metadata:
84
+ metadata['path'] = str(skill_file)
85
+ skill_name = metadata.get('name', '')
86
+ skill_type = metadata.get('type', '')
87
+
88
+ # Filter by skill type if specified
89
+ if skill_types is not None:
90
+ if skill_type not in skill_types:
91
+ continue
92
+
93
+ # Filter by allowed_skills if specified
94
+ if allowed_skills is not None:
95
+ matched = False
96
+ for pattern in allowed_skills:
97
+ # Support glob patterns (e.g., "gmail-*", "*-exfiltration")
98
+ if fnmatch.fnmatch(skill_name, pattern):
99
+ matched = True
100
+ break
101
+ if not matched:
102
+ continue
103
+
104
+ skills.append(metadata)
105
+
106
+ return skills
107
+
108
+ def format_skills_for_prompt(skills: List[Dict[str, Any]]) -> str:
109
+ """
110
+ Format available skills for inclusion in a prompt.
111
+
112
+ Args:
113
+ skills: List of skill metadata dictionaries
114
+
115
+ Returns:
116
+ Formatted string describing all available skills with full metadata
117
+ """
118
+ if not skills:
119
+ return "No attack skills available."
120
+
121
+ formatted = []
122
+ for skill in skills:
123
+ # Display the complete YAML frontmatter (content between --- markers)
124
+ raw_metadata = skill.get("raw_metadata_text", "")
125
+ if raw_metadata:
126
+ formatted.append("---")
127
+ formatted.append(raw_metadata)
128
+ formatted.append("---")
129
+ else:
130
+ # Fallback to individual fields if raw metadata not available
131
+ name = skill.get("name", "unknown")
132
+ description = skill.get("description", "No description")
133
+ type_val = skill.get("type", "unknown")
134
+ injection_types = skill.get("injection_types", "N/A")
135
+ formatted.append(f"Skill Name: {name}")
136
+ formatted.append(f" - Description: {description}")
137
+ formatted.append(f" - Type: {type_val}")
138
+ formatted.append(f" - Injection Points: {injection_types}")
139
+ formatted.append("")
140
+
141
+ return "\n".join(formatted)
142
+
143
+ def apply_injection_to_content(
144
+ original_content: str,
145
+ injections: List[SkillInjection]
146
+ ) -> str:
147
+ """
148
+ Apply insert/append-mode injections to skill content.
149
+
150
+ Supports multiple injections with different row positions.
151
+ - Insert mode with specific rows: applied in reverse row order (high to low)
152
+ - Append mode (or insert with row=-1): applied in original order at the end
153
+
154
+ Args:
155
+ original_content: Original skill file content
156
+ injections: List of SkillInjection objects
157
+
158
+ Returns:
159
+ Modified content with injections applied
160
+ """
161
+ lines = original_content.split('\n')
162
+
163
+ # Separate inserts (specific rows) from appends (end of file)
164
+ inserts_with_row = []
165
+ appends = []
166
+
167
+ for inj in injections:
168
+ if inj.mode == "create":
169
+ continue # Skip create mode
170
+ elif inj.mode == "append" or inj.row == -1:
171
+ appends.append(inj)
172
+ elif inj.mode == "insert" and inj.row >= 1:
173
+ inserts_with_row.append(inj)
174
+
175
+ # Process inserts in reverse row order (high to low) to maintain positions
176
+ sorted_inserts = sorted(inserts_with_row, key=lambda x: x.row, reverse=True)
177
+ for injection in sorted_inserts:
178
+ insert_index = min(injection.row - 1, len(lines))
179
+ injection_lines = injection.content.split('\n')
180
+ for i, line in enumerate(injection_lines):
181
+ lines.insert(insert_index + i, line)
182
+
183
+ # Process appends in original order
184
+ for injection in appends:
185
+ lines.append('')
186
+ lines.extend(injection.content.split('\n'))
187
+
188
+ return '\n'.join(lines)
189
+
190
+ def create_injected_skills_directory(
191
+ source_skill_dirs: List[str],
192
+ skill_injection: Optional[Dict[str, List[SkillInjection]]] = None,
193
+ skill_subpath: str = "",
194
+ temp_prefix: str = "skill_injection_",
195
+ base_dir: Optional[str] = None,
196
+ ) -> Optional[str]:
197
+ """
198
+ Create a temporary directory with injected skills.
199
+
200
+ This function:
201
+ 1. Creates a temp directory with optional subpath structure
202
+ 2. Copies skills from source directories
203
+ 3. Applies insert/append-mode modifications to existing skills
204
+ 4. Creates new skills for create-mode injections
205
+
206
+ Args:
207
+ source_skill_dirs: List of source skill directory paths
208
+ skill_injection: Dict mapping skill_name -> list of SkillInjection objects
209
+ skill_subpath: Subdirectory path within temp dir for skills
210
+ temp_prefix: Prefix for temp directory name
211
+ base_dir: Base directory for creating temp dir (defaults to task output dir)
212
+
213
+ Returns:
214
+ Path to the temp root directory, or None if no skills to process
215
+ For Claude SDK, set cwd to this path
216
+ For OpenClaw, add this path to skills.load.extraDirs
217
+ """
218
+ if not source_skill_dirs and not skill_injection:
219
+ return None
220
+
221
+ # Ensure base_dir exists if provided
222
+ if base_dir:
223
+ os.makedirs(base_dir, exist_ok=True)
224
+
225
+ # Create temp directory under base_dir (or system temp if not provided)
226
+ temp_dir = tempfile.mkdtemp(prefix=temp_prefix, dir=base_dir)
227
+
228
+ # Create skill subdirectory if specified
229
+ if skill_subpath:
230
+ skills_dir = os.path.join(temp_dir, skill_subpath)
231
+ os.makedirs(skills_dir, exist_ok=True)
232
+ else:
233
+ skills_dir = temp_dir
234
+
235
+ # Track processed skills
236
+ processed_skills: set = set()
237
+
238
+ # Validate and use injection configs
239
+ valid_modes = ("insert", "append", "create")
240
+ injection_configs: Dict[str, List[SkillInjection]] = {}
241
+ if skill_injection:
242
+ for skill_name, injections in skill_injection.items():
243
+ injection_configs[skill_name] = []
244
+ for inj in injections:
245
+ # Validate mode
246
+ if not inj.mode:
247
+ raise ValueError(f"Skill injection for '{skill_name}' missing required 'mode' field")
248
+ if inj.mode not in valid_modes:
249
+ raise ValueError(f"Invalid skill injection mode '{inj.mode}' for '{skill_name}'. Must be one of: {valid_modes}")
250
+ injection_configs[skill_name].append(inj)
251
+
252
+ # Copy and modify existing skills from source directories
253
+ for source_dir in source_skill_dirs:
254
+ if not os.path.isdir(source_dir):
255
+ continue
256
+
257
+ for item in os.listdir(source_dir):
258
+ skill_subdir = os.path.join(source_dir, item)
259
+ if not os.path.isdir(skill_subdir):
260
+ continue
261
+
262
+ skill_file = os.path.join(skill_subdir, 'SKILL.md')
263
+ if not os.path.exists(skill_file):
264
+ continue
265
+
266
+ skill_name = item
267
+ dest_skill_dir = os.path.join(skills_dir, skill_name)
268
+ dest_skill_file = os.path.join(dest_skill_dir, 'SKILL.md')
269
+
270
+ # Create destination directory
271
+ os.makedirs(dest_skill_dir, exist_ok=True)
272
+
273
+ # Read original content
274
+ with open(skill_file, 'r') as f:
275
+ content = f.read()
276
+
277
+ # Apply injections if any
278
+ if skill_name in injection_configs:
279
+ insert_configs = [c for c in injection_configs[skill_name] if c.mode in ("insert", "append")]
280
+ if insert_configs:
281
+ content = apply_injection_to_content(content, insert_configs)
282
+ print(f"[INFO] Applied {len(insert_configs)} insert/append injection(s) to skill '{skill_name}'")
283
+
284
+ # Write to destination
285
+ with open(dest_skill_file, 'w') as f:
286
+ f.write(content)
287
+
288
+ processed_skills.add(skill_name)
289
+
290
+ # Handle create-mode injections (new skills)
291
+ for skill_name, configs in injection_configs.items():
292
+ create_configs = [c for c in configs if c.mode == "create"]
293
+
294
+ if create_configs and skill_name not in processed_skills:
295
+ # Create new skill directory
296
+ dest_skill_dir = os.path.join(skills_dir, skill_name)
297
+ dest_skill_file = os.path.join(dest_skill_dir, 'SKILL.md')
298
+
299
+ os.makedirs(dest_skill_dir, exist_ok=True)
300
+
301
+ # Use the first create-mode injection content
302
+ content = create_configs[0].content
303
+
304
+ # Apply any insert/append-mode injections as well
305
+ insert_configs = [c for c in configs if c.mode in ("insert", "append")]
306
+ if insert_configs:
307
+ content = apply_injection_to_content(content, insert_configs)
308
+
309
+ with open(dest_skill_file, 'w') as f:
310
+ f.write(content)
311
+
312
+ print(f"[INFO] Created new skill '{skill_name}' via create-mode injection")
313
+ processed_skills.add(skill_name)
314
+
315
+ # Check if we have any skills
316
+ if not processed_skills:
317
+ # Clean up empty temp dir
318
+ shutil.rmtree(temp_dir)
319
+ return None
320
+
321
+ print(f"[INFO] Created skill temp directory: {temp_dir}")
322
+ return temp_dir
323
+
324
+ def load_skills_as_text(
325
+ skills_temp_dir: str,
326
+ skill_subpath: str = "",
327
+ ) -> str:
328
+ """
329
+ Read all SKILL.md files and return as formatted text for prompt embedding.
330
+ Used by SDKs without native skill mechanisms (e.g., Google ADK).
331
+
332
+ Args:
333
+ skills_temp_dir: Path to the temp directory containing skills
334
+ skill_subpath: Optional subdirectory path within temp dir for skills
335
+
336
+ Returns:
337
+ Formatted string with all skill contents, or empty string if no skills found
338
+ """
339
+ if skill_subpath:
340
+ skills_dir = os.path.join(skills_temp_dir, skill_subpath)
341
+ else:
342
+ skills_dir = skills_temp_dir
343
+
344
+ if not os.path.isdir(skills_dir):
345
+ return ""
346
+
347
+ skill_sections = []
348
+ for item in sorted(os.listdir(skills_dir)):
349
+ skill_path = os.path.join(skills_dir, item)
350
+ if not os.path.isdir(skill_path):
351
+ continue
352
+ skill_file = os.path.join(skill_path, "SKILL.md")
353
+ if not os.path.exists(skill_file):
354
+ continue
355
+ with open(skill_file, "r", encoding="utf-8") as f:
356
+ content = f.read()
357
+ skill_sections.append(f"### Skill: {item}\n\n{content}")
358
+
359
+ if not skill_sections:
360
+ return ""
361
+
362
+ header = (
363
+ "\n\n---\n\n"
364
+ "## Available Skills\n\n"
365
+ "Below are skill instructions that guide your behavior for this task. "
366
+ "Follow them carefully.\n\n"
367
+ )
368
+ return header + "\n\n".join(skill_sections)
369
+
370
+ def parse_skill_metadata(skill_file: str) -> Optional[str]:
371
+ """Parse the metadata section from a SKILL.md file, if it exists.
372
+
373
+ Args:
374
+ skill_file: Path to a SKILL.md file
375
+
376
+ Returns:
377
+ The metadata string, or None if not found or parsing fails
378
+ """
379
+ try:
380
+ with open(skill_file, "r", encoding="utf-8") as f:
381
+ content = f.read()
382
+ if not content.startswith("---"):
383
+ return None
384
+ end = content.find("---", 3)
385
+ if end == -1:
386
+ return None
387
+ try:
388
+ frontmatter = yaml.safe_load(content[3:end])
389
+ except yaml.YAMLError:
390
+ print(f"[WARNING] Failed to parse YAML frontmatter in {skill_file}")
391
+ return None
392
+ text_frontmatter = yaml.dump(frontmatter, sort_keys=False)
393
+ return text_frontmatter
394
+ except Exception:
395
+ print(f"[WARNING] Failed to read or parse skill file {skill_file}")
396
+ return None
397
+
398
+ def load_skills_as_toolset(
399
+ skills_temp_dir: str,
400
+ skill_subpath: str = "",
401
+ ):
402
+ """
403
+ Load skills via Google ADK skills API, return SkillToolset or None.
404
+
405
+ Counterpart to load_skills_as_text() for SDKs with native skill support.
406
+
407
+ Args:
408
+ skills_temp_dir: Path to the temp directory containing skills
409
+ skill_subpath: Optional subdirectory path within temp dir for skills
410
+
411
+ Returns:
412
+ SkillToolset instance, or None if no skills found
413
+ """
414
+ import pathlib
415
+ from google.adk.skills import load_skill_from_dir
416
+ from google.adk.tools.skill_toolset import SkillToolset
417
+
418
+ if skill_subpath:
419
+ skills_dir = os.path.join(skills_temp_dir, skill_subpath)
420
+ else:
421
+ skills_dir = skills_temp_dir
422
+
423
+ if not os.path.isdir(skills_dir):
424
+ return None
425
+
426
+ skills = []
427
+ for item in sorted(os.listdir(skills_dir)):
428
+ skill_path = os.path.join(skills_dir, item)
429
+ if os.path.isdir(skill_path) and os.path.exists(os.path.join(skill_path, "SKILL.md")):
430
+ skills.append(load_skill_from_dir(pathlib.Path(skill_path)))
431
+
432
+ return SkillToolset(skills=skills) if skills else None
433
+
434
+
435
+ def cleanup_temp_directory(temp_dir: str) -> None:
436
+ """
437
+ Clean up a temporary skill directory.
438
+
439
+ Args:
440
+ temp_dir: Path to the temp directory to remove
441
+ """
442
+ if temp_dir and os.path.exists(temp_dir):
443
+ try:
444
+ shutil.rmtree(temp_dir)
445
+ print(f"[INFO] Cleaned up temp skill directory: {temp_dir}")
446
+ except Exception as e:
447
+ print(f"[WARNING] Failed to clean up temp directory {temp_dir}: {e}")