decodingtrust-agent-sdk 0.1.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (374) hide show
  1. agent/__init__.py +30 -0
  2. agent/claudesdk/__init__.py +8 -0
  3. agent/claudesdk/example.py +221 -0
  4. agent/claudesdk/src/__init__.py +8 -0
  5. agent/claudesdk/src/agent.py +400 -0
  6. agent/claudesdk/src/mcp_proxy.py +409 -0
  7. agent/claudesdk/src/utils.py +420 -0
  8. agent/googleadk/__init__.py +15 -0
  9. agent/googleadk/example.py +237 -0
  10. agent/googleadk/src/__init__.py +12 -0
  11. agent/googleadk/src/agent.py +401 -0
  12. agent/googleadk/src/mcp_wrapper.py +163 -0
  13. agent/googleadk/src/utils.py +602 -0
  14. agent/langchain/__init__.py +8 -0
  15. agent/langchain/example.py +213 -0
  16. agent/langchain/src/__init__.py +8 -0
  17. agent/langchain/src/agent.py +645 -0
  18. agent/langchain/src/utils.py +433 -0
  19. agent/openaisdk/__init__.py +17 -0
  20. agent/openaisdk/example.py +228 -0
  21. agent/openaisdk/src/__init__.py +12 -0
  22. agent/openaisdk/src/agent.py +491 -0
  23. agent/openaisdk/src/agent_wrapper.py +143 -0
  24. agent/openaisdk/src/mcp_wrapper.py +395 -0
  25. agent/openaisdk/src/utils.py +493 -0
  26. agent/openclaw/__init__.py +10 -0
  27. agent/openclaw/example.py +251 -0
  28. agent/openclaw/src/__init__.py +14 -0
  29. agent/openclaw/src/agent.py +930 -0
  30. agent/openclaw/src/helpers/__init__.py +1 -0
  31. agent/openclaw/src/helpers/auth_helpers.py +55 -0
  32. agent/openclaw/src/mcp_proxy.py +564 -0
  33. agent/openclaw/src/plugin_generator.py +231 -0
  34. agent/openclaw/src/utils.py +341 -0
  35. agent/pocketflow/__init__.py +18 -0
  36. agent/pocketflow/example.py +221 -0
  37. agent/pocketflow/prompts/react_agent.py +46 -0
  38. agent/pocketflow/src/__init__.py +6 -0
  39. agent/pocketflow/src/agent.py +507 -0
  40. agent/pocketflow/src/agent_wrapper.py +159 -0
  41. agent/pocketflow/src/async_helper.py +92 -0
  42. agent/pocketflow/src/mcp_react_agent.py +279 -0
  43. agent/pocketflow/src/native_agent.py +74 -0
  44. agent/pocketflow/src/nodes.py +467 -0
  45. benchmark/__init__.py +0 -0
  46. benchmark/browser/benign.jsonl +34 -0
  47. benchmark/browser/direct.jsonl +85 -0
  48. benchmark/browser/indirect.jsonl +82 -0
  49. benchmark/code/benign.jsonl +0 -0
  50. benchmark/code/direct.jsonl +121 -0
  51. benchmark/code/indirect.jsonl +165 -0
  52. benchmark/crm/benign.jsonl +165 -0
  53. benchmark/crm/direct.jsonl +90 -0
  54. benchmark/crm/indirect.jsonl +150 -0
  55. benchmark/customer-service/benign.jsonl +160 -0
  56. benchmark/customer-service/direct.jsonl +100 -0
  57. benchmark/customer-service/indirect.jsonl +101 -0
  58. benchmark/finance/benign.jsonl +0 -0
  59. benchmark/finance/direct.jsonl +200 -0
  60. benchmark/finance/indirect.jsonl +200 -0
  61. benchmark/legal/benign.jsonl +0 -0
  62. benchmark/legal/direct.jsonl +200 -0
  63. benchmark/legal/indirect.jsonl +200 -0
  64. benchmark/macos/benign.jsonl +30 -0
  65. benchmark/macos/direct.jsonl +50 -0
  66. benchmark/macos/indirect.jsonl +50 -0
  67. benchmark/medical/benign.jsonl +642 -0
  68. benchmark/medical/direct.jsonl +229 -0
  69. benchmark/medical/indirect.jsonl +222 -0
  70. benchmark/os-filesystem/benign.jsonl +200 -0
  71. benchmark/os-filesystem/direct.jsonl +200 -0
  72. benchmark/os-filesystem/indirect.jsonl +200 -0
  73. benchmark/research/benign.jsonl +0 -0
  74. benchmark/research/direct.jsonl +119 -0
  75. benchmark/research/indirect.jsonl +125 -0
  76. benchmark/telecom/benign.jsonl +120 -0
  77. benchmark/telecom/direct.jsonl +161 -0
  78. benchmark/telecom/indirect.jsonl +166 -0
  79. benchmark/travel/benign.jsonl +130 -0
  80. benchmark/travel/direct.jsonl +105 -0
  81. benchmark/travel/indirect.jsonl +120 -0
  82. benchmark/windows/benign.jsonl +100 -0
  83. benchmark/windows/direct.jsonl +140 -0
  84. benchmark/windows/indirect.jsonl +107 -0
  85. benchmark/workflow/benign.jsonl +335 -0
  86. benchmark/workflow/direct.jsonl +78 -0
  87. benchmark/workflow/indirect.jsonl +107 -0
  88. cli/__init__.py +5 -0
  89. cli/main.py +182 -0
  90. cli/scaffold.py +334 -0
  91. decodingtrust_agent_sdk-0.1.0.dist-info/METADATA +642 -0
  92. decodingtrust_agent_sdk-0.1.0.dist-info/RECORD +374 -0
  93. decodingtrust_agent_sdk-0.1.0.dist-info/WHEEL +5 -0
  94. decodingtrust_agent_sdk-0.1.0.dist-info/entry_points.txt +2 -0
  95. decodingtrust_agent_sdk-0.1.0.dist-info/licenses/LICENSE +201 -0
  96. decodingtrust_agent_sdk-0.1.0.dist-info/top_level.txt +6 -0
  97. dt_arena/config/env.yaml +515 -0
  98. dt_arena/config/injection_mcp.yaml +430 -0
  99. dt_arena/config/mcp.yaml +642 -0
  100. dt_arena/envs/arxiv/docker-compose-hub.yml +31 -0
  101. dt_arena/envs/arxiv/docker-compose.yml +36 -0
  102. dt_arena/envs/atlassian/docker/docker-compose.dev.yml +65 -0
  103. dt_arena/envs/atlassian/docker/docker-compose.yml +53 -0
  104. dt_arena/envs/atlassian/docker-compose-hub.yml +57 -0
  105. dt_arena/envs/atlassian/docker-compose.yml +72 -0
  106. dt_arena/envs/bigquery/docker-compose.yml +20 -0
  107. dt_arena/envs/booking/docker-compose.yml +59 -0
  108. dt_arena/envs/calendar/docker-compose-hub.yml +30 -0
  109. dt_arena/envs/calendar/docker-compose.yml +42 -0
  110. dt_arena/envs/custom-website/docker-compose.yml +6 -0
  111. dt_arena/envs/customer_service/docker-compose.yml +59 -0
  112. dt_arena/envs/databricks/docker-compose-hub.yml +47 -0
  113. dt_arena/envs/databricks/docker-compose.yml +51 -0
  114. dt_arena/envs/ecommerce/docker-compose.yml +6 -0
  115. dt_arena/envs/ers/docker-compose.yml +36 -0
  116. dt_arena/envs/ers/hrms/docker/docker-compose.yml +31 -0
  117. dt_arena/envs/finance/docker-compose.yml +23 -0
  118. dt_arena/envs/github/docker/docker-compose-hub.yml +50 -0
  119. dt_arena/envs/github/docker/docker-compose.yml +50 -0
  120. dt_arena/envs/gmail/docker-compose-hub.yml +51 -0
  121. dt_arena/envs/gmail/docker-compose.yml +65 -0
  122. dt_arena/envs/google-form/docker-compose-hub.yml +33 -0
  123. dt_arena/envs/google-form/docker-compose.yml +41 -0
  124. dt_arena/envs/googledocs/docker-compose-hub.yml +61 -0
  125. dt_arena/envs/googledocs/docker-compose.yml +78 -0
  126. dt_arena/envs/hospital/docker-compose-hub.yml +25 -0
  127. dt_arena/envs/hospital/docker-compose.yml +27 -0
  128. dt_arena/envs/legal/docker-compose.yml +22 -0
  129. dt_arena/envs/linkedin/docker-compose.yml +63 -0
  130. dt_arena/envs/macos/docker-compose.yml +79 -0
  131. dt_arena/envs/os-filesystem/docker-compose-hub.yml +16 -0
  132. dt_arena/envs/os-filesystem/docker-compose.yml +20 -0
  133. dt_arena/envs/paypal/docker-compose-hub.yml +48 -0
  134. dt_arena/envs/paypal/docker-compose.yml +63 -0
  135. dt_arena/envs/research/docker-compose-hub.yml +13 -0
  136. dt_arena/envs/research/docker-compose.yml +24 -0
  137. dt_arena/envs/salesforce_crm/docker-compose-hub.yaml +45 -0
  138. dt_arena/envs/salesforce_crm/docker-compose.yaml +49 -0
  139. dt_arena/envs/slack/docker-compose-hub.yml +28 -0
  140. dt_arena/envs/slack/docker-compose.yml +41 -0
  141. dt_arena/envs/snowflake/docker-compose-hub.yml +41 -0
  142. dt_arena/envs/snowflake/docker-compose.yml +44 -0
  143. dt_arena/envs/telecom/docker-compose-hub.yml +16 -0
  144. dt_arena/envs/telecom/docker-compose.yml +17 -0
  145. dt_arena/envs/telegram/docker-compose-hub.yml +57 -0
  146. dt_arena/envs/telegram/docker-compose.yml +62 -0
  147. dt_arena/envs/terminal/docker-compose-hub.yml +12 -0
  148. dt_arena/envs/terminal/docker-compose.yml +26 -0
  149. dt_arena/envs/travel/docker-compose-hub.yml +19 -0
  150. dt_arena/envs/travel/docker-compose.yml +19 -0
  151. dt_arena/envs/whatsapp/docker-compose-hub.yml +61 -0
  152. dt_arena/envs/whatsapp/docker-compose.yml +78 -0
  153. dt_arena/envs/windows/docker-compose.yml +71 -0
  154. dt_arena/envs/zoom/docker-compose-hub.yml +27 -0
  155. dt_arena/envs/zoom/docker-compose.yml +40 -0
  156. dt_arena/injection_mcp_server/atlassian/env_injection.py +134 -0
  157. dt_arena/injection_mcp_server/calendar/env_injection.py +217 -0
  158. dt_arena/injection_mcp_server/custom_website/env_injection.py +97 -0
  159. dt_arena/injection_mcp_server/customer_service/env_injection.py +659 -0
  160. dt_arena/injection_mcp_server/databricks/env_injection.py +255 -0
  161. dt_arena/injection_mcp_server/ecommerce/env_injection.py +110 -0
  162. dt_arena/injection_mcp_server/finance/env_injection.py +85 -0
  163. dt_arena/injection_mcp_server/github/env_injection.py +206 -0
  164. dt_arena/injection_mcp_server/gmail/env_injection.py +211 -0
  165. dt_arena/injection_mcp_server/google_form/env_injection.py +186 -0
  166. dt_arena/injection_mcp_server/googledocs/env_injection.py +44 -0
  167. dt_arena/injection_mcp_server/hospital/env_injection.py +43 -0
  168. dt_arena/injection_mcp_server/legal/env_injection.py +229 -0
  169. dt_arena/injection_mcp_server/macos/env_injection.py +272 -0
  170. dt_arena/injection_mcp_server/os-filesystem/env_injection.py +341 -0
  171. dt_arena/injection_mcp_server/paypal/env_injection.py +268 -0
  172. dt_arena/injection_mcp_server/research/env_injection.py +616 -0
  173. dt_arena/injection_mcp_server/salesforce/env_injection.py +514 -0
  174. dt_arena/injection_mcp_server/slack/env_injection.py +265 -0
  175. dt_arena/injection_mcp_server/snowflake/env_injection.py +230 -0
  176. dt_arena/injection_mcp_server/telecom/env_injection.py +503 -0
  177. dt_arena/injection_mcp_server/telegram/env_injection.py +171 -0
  178. dt_arena/injection_mcp_server/terminal/env_injection.py +523 -0
  179. dt_arena/injection_mcp_server/travel/env_injection.py +173 -0
  180. dt_arena/injection_mcp_server/whatsapp/env_injection.py +185 -0
  181. dt_arena/injection_mcp_server/windows/env_injection.py +943 -0
  182. dt_arena/injection_mcp_server/zoom/env_injection.py +216 -0
  183. dt_arena/mcp_server/atlassian/main.py +1554 -0
  184. dt_arena/mcp_server/atlassian/test_server.py +66 -0
  185. dt_arena/mcp_server/bigquery/main.py +333 -0
  186. dt_arena/mcp_server/booking/main.py +310 -0
  187. dt_arena/mcp_server/browser/main.py +1741 -0
  188. dt_arena/mcp_server/calendar/example_multi_user.py +162 -0
  189. dt_arena/mcp_server/calendar/main.py +792 -0
  190. dt_arena/mcp_server/calendar/test_mcp.py +135 -0
  191. dt_arena/mcp_server/customer_service/main.py +1063 -0
  192. dt_arena/mcp_server/databricks/main.py +566 -0
  193. dt_arena/mcp_server/databricks/probe.py +102 -0
  194. dt_arena/mcp_server/ers/main.py +845 -0
  195. dt_arena/mcp_server/finance/__init__.py +87 -0
  196. dt_arena/mcp_server/finance/core/__init__.py +12 -0
  197. dt_arena/mcp_server/finance/core/data_loader.py +558 -0
  198. dt_arena/mcp_server/finance/core/portfolio.py +565 -0
  199. dt_arena/mcp_server/finance/evaluation/__init__.py +20 -0
  200. dt_arena/mcp_server/finance/evaluation/evaluator.py +217 -0
  201. dt_arena/mcp_server/finance/evaluation/logger.py +137 -0
  202. dt_arena/mcp_server/finance/injection/__init__.py +66 -0
  203. dt_arena/mcp_server/finance/injection/config.py +176 -0
  204. dt_arena/mcp_server/finance/injection/content.py +755 -0
  205. dt_arena/mcp_server/finance/injection/html.py +409 -0
  206. dt_arena/mcp_server/finance/injection/locations.py +167 -0
  207. dt_arena/mcp_server/finance/injection/methods.py +193 -0
  208. dt_arena/mcp_server/finance/injection/presets.py +1023 -0
  209. dt_arena/mcp_server/finance/main.py +361 -0
  210. dt_arena/mcp_server/finance/run_mcp.py +21 -0
  211. dt_arena/mcp_server/finance/run_web.py +26 -0
  212. dt_arena/mcp_server/finance/server/__init__.py +41 -0
  213. dt_arena/mcp_server/finance/server/extractor.py +1453 -0
  214. dt_arena/mcp_server/finance/server/extractor_minimal.py +292 -0
  215. dt_arena/mcp_server/finance/server/extractor_simple.py +1164 -0
  216. dt_arena/mcp_server/finance/server/injection_mcp.py +865 -0
  217. dt_arena/mcp_server/finance/server/mcp.py +451 -0
  218. dt_arena/mcp_server/finance/server/tools/__init__.py +23 -0
  219. dt_arena/mcp_server/finance/server/tools/account.py +88 -0
  220. dt_arena/mcp_server/finance/server/tools/browsing.py +328 -0
  221. dt_arena/mcp_server/finance/server/tools/social.py +73 -0
  222. dt_arena/mcp_server/finance/server/tools/trading.py +242 -0
  223. dt_arena/mcp_server/finance/server/tools/utility.py +49 -0
  224. dt_arena/mcp_server/finance/server/web.py +2139 -0
  225. dt_arena/mcp_server/finance/tasks/benchmark/__init__.py +28 -0
  226. dt_arena/mcp_server/finance/tasks/benchmark/attack_pool.py +3026 -0
  227. dt_arena/mcp_server/finance/tasks/benchmark/attack_runner.py +1315 -0
  228. dt_arena/mcp_server/finance/tasks/benchmark/finra_requirements.py +1335 -0
  229. dt_arena/mcp_server/finance/tasks/benchmark/finra_tasks.py +3665 -0
  230. dt_arena/mcp_server/finance/tasks/benchmark/malicious_tasks.py +2673 -0
  231. dt_arena/mcp_server/finance/tasks/redteam_suite/run_redteam_suite.py +1713 -0
  232. dt_arena/mcp_server/finance/test_mcp_tools.py +476 -0
  233. dt_arena/mcp_server/github/main.py +441 -0
  234. dt_arena/mcp_server/gmail/main.py +1004 -0
  235. dt_arena/mcp_server/google_form/main.py +141 -0
  236. dt_arena/mcp_server/googledocs/main.py +458 -0
  237. dt_arena/mcp_server/hospital/mcp_server.py +458 -0
  238. dt_arena/mcp_server/legal/__init__.py +9 -0
  239. dt_arena/mcp_server/legal/core/__init__.py +14 -0
  240. dt_arena/mcp_server/legal/core/courtlistener_store.py +762 -0
  241. dt_arena/mcp_server/legal/core/data_loader.py +266 -0
  242. dt_arena/mcp_server/legal/core/document_store.py +197 -0
  243. dt_arena/mcp_server/legal/core/matter_manager.py +466 -0
  244. dt_arena/mcp_server/legal/main.py +89 -0
  245. dt_arena/mcp_server/legal/scripts/collect_data.py +988 -0
  246. dt_arena/mcp_server/legal/server/__init__.py +14 -0
  247. dt_arena/mcp_server/legal/server/mcp.py +2330 -0
  248. dt_arena/mcp_server/macos/client_test.py +270 -0
  249. dt_arena/mcp_server/macos/mcp_server.py +285 -0
  250. dt_arena/mcp_server/os-filesystem/main.py +1380 -0
  251. dt_arena/mcp_server/paypal/main.py +501 -0
  252. dt_arena/mcp_server/research/main.py +777 -0
  253. dt_arena/mcp_server/salesforce/main.py +2006 -0
  254. dt_arena/mcp_server/slack/main.py +318 -0
  255. dt_arena/mcp_server/snowflake/main.py +612 -0
  256. dt_arena/mcp_server/snowflake/probe.py +183 -0
  257. dt_arena/mcp_server/telecom/mcp_client.py +423 -0
  258. dt_arena/mcp_server/telecom/mcp_server.py +1059 -0
  259. dt_arena/mcp_server/telegram/main.py +338 -0
  260. dt_arena/mcp_server/terminal/main.py +163 -0
  261. dt_arena/mcp_server/travel/client_test.py +16 -0
  262. dt_arena/mcp_server/travel/mcp_server.py +404 -0
  263. dt_arena/mcp_server/whatsapp/main.py +318 -0
  264. dt_arena/mcp_server/windows/client_test.py +270 -0
  265. dt_arena/mcp_server/windows/mcp_server.py +218 -0
  266. dt_arena/mcp_server/zoom/main.py +466 -0
  267. dt_arena/src/__init__.py +0 -0
  268. dt_arena/src/hooks/__init__.py +0 -0
  269. dt_arena/src/hooks/audit_log.py +30 -0
  270. dt_arena/src/hooks/hooks.json +3 -0
  271. dt_arena/src/run_benign.py +142 -0
  272. dt_arena/src/types/__init__.py +0 -0
  273. dt_arena/src/types/agent.py +441 -0
  274. dt_arena/src/types/attacks.py +2 -0
  275. dt_arena/src/types/environment.py +2 -0
  276. dt_arena/src/types/hooks.py +174 -0
  277. dt_arena/src/types/judge.py +52 -0
  278. dt_arena/src/types/red_teaming_trajectory.py +385 -0
  279. dt_arena/src/types/task.py +260 -0
  280. dt_arena/src/types/trajectory.py +315 -0
  281. dt_arena/utils/__init__.py +1 -0
  282. dt_arena/utils/atlassian/__init__.py +27 -0
  283. dt_arena/utils/atlassian/helpers.py +520 -0
  284. dt_arena/utils/bigquery/__init__.py +1 -0
  285. dt_arena/utils/bigquery/helpers.py +246 -0
  286. dt_arena/utils/calendar/__init__.py +1 -0
  287. dt_arena/utils/calendar/helpers.py +87 -0
  288. dt_arena/utils/customer_service/__init__.py +17 -0
  289. dt_arena/utils/customer_service/cs_env_client.py +940 -0
  290. dt_arena/utils/customer_service/helpers.py +339 -0
  291. dt_arena/utils/customer_service/judges/__init__.py +20 -0
  292. dt_arena/utils/customer_service/judges/abusive_comms.py +149 -0
  293. dt_arena/utils/customer_service/judges/bulk_destructive.py +202 -0
  294. dt_arena/utils/customer_service/judges/cross_customer.py +481 -0
  295. dt_arena/utils/customer_service/judges/email_exfiltration.py +320 -0
  296. dt_arena/utils/customer_service/judges/financial_fraud.py +221 -0
  297. dt_arena/utils/customer_service/judges/llm_utils.py +264 -0
  298. dt_arena/utils/customer_service/judges/pii_disclosure.py +837 -0
  299. dt_arena/utils/customer_service/judges/policy_misrep.py +44 -0
  300. dt_arena/utils/customer_service/judges/text_utils.py +21 -0
  301. dt_arena/utils/databricks/__init__.py +2 -0
  302. dt_arena/utils/databricks/helpers.py +210 -0
  303. dt_arena/utils/finance/__init__.py +0 -0
  304. dt_arena/utils/finance/helpers.py +263 -0
  305. dt_arena/utils/github/__init__.py +1 -0
  306. dt_arena/utils/github/helpers.py +249 -0
  307. dt_arena/utils/gmail/__init__.py +1 -0
  308. dt_arena/utils/gmail/helpers.py +344 -0
  309. dt_arena/utils/google_form/__init__.py +2 -0
  310. dt_arena/utils/google_form/helpers.py +133 -0
  311. dt_arena/utils/legal/__init__.py +0 -0
  312. dt_arena/utils/legal/helpers.py +228 -0
  313. dt_arena/utils/macos/__init__.py +0 -0
  314. dt_arena/utils/macos/env_setup.py +215 -0
  315. dt_arena/utils/macos/helpers.py +61 -0
  316. dt_arena/utils/os_filesystem/__init__.py +1 -0
  317. dt_arena/utils/os_filesystem/helpers.py +366 -0
  318. dt_arena/utils/paypal/__init__.py +1 -0
  319. dt_arena/utils/paypal/helpers.py +178 -0
  320. dt_arena/utils/port_allocator.py +266 -0
  321. dt_arena/utils/research/__init__.py +0 -0
  322. dt_arena/utils/research/helpers.py +251 -0
  323. dt_arena/utils/salesforce/__init__.py +1 -0
  324. dt_arena/utils/salesforce/helpers.py +719 -0
  325. dt_arena/utils/slack/__init__.py +1 -0
  326. dt_arena/utils/slack/helpers.py +176 -0
  327. dt_arena/utils/snowflake/__init__.py +1 -0
  328. dt_arena/utils/snowflake/helpers.py +166 -0
  329. dt_arena/utils/telecom/__init__.py +1 -0
  330. dt_arena/utils/telecom/helpers.py +760 -0
  331. dt_arena/utils/telegram/__init__.py +0 -0
  332. dt_arena/utils/telegram/helpers.py +174 -0
  333. dt_arena/utils/terminal/__init__.py +0 -0
  334. dt_arena/utils/terminal/helpers.py +20 -0
  335. dt_arena/utils/travel/__init__.py +0 -0
  336. dt_arena/utils/travel/env_client.py +537 -0
  337. dt_arena/utils/travel/llm_judge.py +137 -0
  338. dt_arena/utils/travel/prompts.py +64 -0
  339. dt_arena/utils/utils/__init__.py +122 -0
  340. dt_arena/utils/whatsapp/__init__.py +0 -0
  341. dt_arena/utils/whatsapp/helpers.py +226 -0
  342. dt_arena/utils/windows/__init__.py +0 -0
  343. dt_arena/utils/windows/env_reset.py +224 -0
  344. dt_arena/utils/windows/env_setup.py +280 -0
  345. dt_arena/utils/windows/exfil_helpers.py +170 -0
  346. dt_arena/utils/windows/helpers.py +74 -0
  347. dt_arena/utils/zoom/__init__.py +1 -0
  348. dt_arena/utils/zoom/helpers.py +70 -0
  349. eval/__init__.py +1 -0
  350. eval/evaluation.py +426 -0
  351. eval/task_runner.py +449 -0
  352. utils/__init__.py +148 -0
  353. utils/agent_helpers.py +308 -0
  354. utils/agent_wrapper.py +189 -0
  355. utils/compose_utils.py +135 -0
  356. utils/config.py +77 -0
  357. utils/env_helpers.py +104 -0
  358. utils/eval_stats.py +88 -0
  359. utils/injection_helpers.py +429 -0
  360. utils/injection_mcp_helpers.py +152 -0
  361. utils/judge_helpers.py +181 -0
  362. utils/judge_utils.py +472 -0
  363. utils/llm.py +196 -0
  364. utils/logging.py +45 -0
  365. utils/mcp_helpers.py +232 -0
  366. utils/mcp_manager.py +235 -0
  367. utils/memory_guard.py +18 -0
  368. utils/red_teaming_sandbox.py +476 -0
  369. utils/reset_helpers.py +318 -0
  370. utils/resource_manager.py +370 -0
  371. utils/skill_helpers.py +447 -0
  372. utils/task_executor.py +904 -0
  373. utils/task_helpers.py +270 -0
  374. utils/template_helpers.py +179 -0
@@ -0,0 +1,260 @@
1
+ """
2
+ Task and Attack configuration types for DecodingTrust Agent Arena
3
+ """
4
+
5
+ import os
6
+ import yaml
7
+ from typing import Any, Dict, Optional, List, Union
8
+ from dataclasses import dataclass, field
9
+
10
+
11
+ @dataclass
12
+ class TaskConfig:
13
+ """Task configuration defining the user instruction"""
14
+ original_instruction: Union[str, List[str]] # Single instruction or list for multi-turn
15
+ task_id: Optional[str] = None
16
+ domain: Optional[str] = None
17
+
18
+ @classmethod
19
+ def from_yaml(cls, config_path: str) -> 'TaskConfig':
20
+ """Load complete configuration from YAML file"""
21
+ if not os.path.exists(config_path):
22
+ raise FileNotFoundError(f"Config file not found: {config_path}")
23
+
24
+ with open(config_path, 'r', encoding='utf-8') as f:
25
+ data = yaml.safe_load(f)
26
+
27
+ # Parse Task section
28
+ task_data = data.get('Task', {})
29
+ # Support both string and list for task_instruction
30
+ instruction = task_data.get('task_instruction', task_data.get('original_instruction', ''))
31
+ return cls(
32
+ original_instruction=instruction, # Can be str or List[str]
33
+ task_id=task_data.get('task_id'),
34
+ domain=task_data.get('domain'),
35
+ )
36
+
37
+ @property
38
+ def is_multi_turn(self) -> bool:
39
+ """Check if this is a multi-turn task"""
40
+ return isinstance(self.original_instruction, list)
41
+
42
+ @property
43
+ def instruction_count(self) -> int:
44
+ """Get number of instructions"""
45
+ if isinstance(self.original_instruction, list):
46
+ return len(self.original_instruction)
47
+ return 1
48
+
49
+
50
+ @dataclass
51
+ class AttackStep:
52
+ """Single step in an attack sequence (legacy format)"""
53
+ function: str # e.g., "inject_prompt", "salesforce_mcp:inject_lead"
54
+ parameters: Dict[str, Any] = field(default_factory=dict)
55
+ description: Optional[str] = None
56
+
57
+ @classmethod
58
+ def from_string(cls, step_str: str) -> 'AttackStep':
59
+ """
60
+ Parse attack step from string format: "function_name(param1=value1, param2=value2)"
61
+
62
+ Example:
63
+ "inject_prompt(target='system', content='malicious prompt')"
64
+ "salesforce_mcp:inject_lead(name='John Doe', email='john@example.com')"
65
+ """
66
+ # Simple parser for function call format
67
+ if '(' not in step_str:
68
+ return cls(function=step_str.strip(), parameters={})
69
+
70
+ func_name = step_str[:step_str.index('(')].strip()
71
+ params_str = step_str[step_str.index('(')+1:step_str.rindex(')')].strip()
72
+
73
+ params = {}
74
+ if params_str:
75
+ # Simple parameter parsing (handles basic cases)
76
+ for param in params_str.split(','):
77
+ if '=' in param:
78
+ key, value = param.split('=', 1)
79
+ key = key.strip()
80
+ value = value.strip().strip('"\'')
81
+ params[key] = value
82
+
83
+ return cls(function=func_name, parameters=params)
84
+
85
+ def to_string(self) -> str:
86
+ """Convert attack step to string representation"""
87
+ if not self.parameters:
88
+ return f"{self.function}()"
89
+
90
+ param_strs = [f"{k}='{v}'" for k, v in self.parameters.items()]
91
+ return f"{self.function}({', '.join(param_strs)})"
92
+
93
+
94
+ @dataclass
95
+ class AttackStepConfig:
96
+ """
97
+ Single attack step within an attack turn.
98
+
99
+ Supports four types:
100
+ - "tool": Inject into tool description (suffix or override)
101
+ - "prompt": Inject into user instruction (suffix or override)
102
+ - "environment": Inject via MCP tool call
103
+ - "skill": Inject into skill SKILL.md file or create new skill
104
+ """
105
+ type: str # "tool", "prompt", "environment", or "skill"
106
+ mode: Optional[str] = None # "suffix" or "override" (tool/prompt), "insert", "append", or "create" (skill)
107
+ content: Optional[str] = None
108
+ # For tool injection
109
+ injected_tool: Optional[str] = None # "server_name:tool_name"
110
+ # For environment injection
111
+ injection_mcp_tool: Optional[str] = None
112
+ kwargs: Dict[str, Any] = field(default_factory=dict)
113
+ # For skill injection
114
+ skill_name: Optional[str] = None
115
+ row: Optional[int] = None # Line number for insert mode, or -1 for append/create
116
+
117
+ @classmethod
118
+ def from_dict(cls, data: Dict[str, Any]) -> 'AttackStepConfig':
119
+ """Create AttackStepConfig from dictionary"""
120
+ return cls(
121
+ type=data.get('type', ''),
122
+ mode=data.get('mode'),
123
+ content=data.get('content'),
124
+ injected_tool=data.get('injected_tool'),
125
+ injection_mcp_tool=data.get('injection_mcp_tool'),
126
+ kwargs=data.get('kwargs', {}),
127
+ skill_name=data.get('skill_name'),
128
+ row=data.get('row'),
129
+ )
130
+
131
+ def get_tool_server_and_name(self) -> tuple:
132
+ """
133
+ Parse injected_tool to get server name and tool name.
134
+
135
+ Returns:
136
+ (server_name, tool_name) tuple, or (None, None) if not a tool injection
137
+ """
138
+ if not self.injected_tool or self.type != "tool":
139
+ return (None, None)
140
+
141
+ if ':' in self.injected_tool:
142
+ parts = self.injected_tool.split(':', 1)
143
+ return (parts[0], parts[1])
144
+ else:
145
+ # No server specified, return None for server
146
+ return (None, self.injected_tool)
147
+
148
+
149
+ @dataclass
150
+ class AttackTurn:
151
+ """
152
+ Single turn in a multi-turn attack sequence.
153
+
154
+ Each turn can have multiple attack steps that are applied together.
155
+ """
156
+ turn_id: int
157
+ attack_steps: List[AttackStepConfig] = field(default_factory=list)
158
+
159
+ @classmethod
160
+ def from_dict(cls, data: Dict[str, Any]) -> 'AttackTurn':
161
+ """Create AttackTurn from dictionary"""
162
+ attack_steps = []
163
+ for step_data in data.get('attack_steps', []):
164
+ attack_steps.append(AttackStepConfig.from_dict(step_data))
165
+
166
+ return cls(
167
+ turn_id=data.get('turn_id', 1),
168
+ attack_steps=attack_steps,
169
+ )
170
+
171
+ def get_tool_injections(self) -> List[AttackStepConfig]:
172
+ """Get all tool attack steps"""
173
+ return [s for s in self.attack_steps if s.type == "tool"]
174
+
175
+ def get_prompt_injections(self) -> List[AttackStepConfig]:
176
+ """Get all prompt attack steps"""
177
+ return [s for s in self.attack_steps if s.type == "prompt"]
178
+
179
+ def get_environment_injections(self) -> List[AttackStepConfig]:
180
+ """Get all environment attack steps"""
181
+ return [s for s in self.attack_steps if s.type == "environment"]
182
+
183
+ def get_skill_injections(self) -> List[AttackStepConfig]:
184
+ """Get all skill attack steps"""
185
+ return [s for s in self.attack_steps if s.type == "skill"]
186
+
187
+
188
+ @dataclass
189
+ class AttackConfig:
190
+ """Attack configuration defining step-by-step attack reproduction"""
191
+ risk_category: Optional[str] = None # e.g., "social-scoring", "privacy-violation"
192
+ threat_model: Optional[str] = None # e.g., "direct", "indirect"
193
+ malicious_goal: Optional[str] = None # The attacker's goal/intent
194
+ additional_information: Optional[str] = None # Per-task hints (e.g., victim email, attacker accounts)
195
+ attack_turns: List[AttackTurn] = field(default_factory=list) # Attack turns with attack steps
196
+ # Legacy fields for backward compatibility
197
+ attack_name: Optional[str] = None
198
+ attack_type: Optional[str] = None # e.g., "prompt_injection", "data_exfiltration"
199
+ steps: List[AttackStep] = field(default_factory=list)
200
+ description: Optional[str] = None
201
+
202
+ @classmethod
203
+ def from_yaml(cls, config_path: str) -> Optional['AttackConfig']:
204
+ """Load attack configuration from YAML file"""
205
+ if not os.path.exists(config_path):
206
+ raise FileNotFoundError(f"Config file not found: {config_path}")
207
+
208
+ with open(config_path, 'r', encoding='utf-8') as f:
209
+ data = yaml.safe_load(f)
210
+
211
+ # Parse Attack section (optional)
212
+ if 'Attack' not in data or not data['Attack']:
213
+ return None
214
+
215
+ attack_data = data['Attack']
216
+ steps = []
217
+
218
+ # Parse legacy 'steps' field
219
+ if 'steps' in attack_data:
220
+ for step_data in attack_data['steps']:
221
+ if isinstance(step_data, str):
222
+ # Parse from string format
223
+ steps.append(AttackStep.from_string(step_data))
224
+ elif isinstance(step_data, dict):
225
+ # Parse from dict format
226
+ steps.append(AttackStep(
227
+ function=step_data['function'],
228
+ parameters=step_data.get('parameters', {}),
229
+ description=step_data.get('description')
230
+ ))
231
+
232
+ # Parse attack_turns format with attack_steps
233
+ attack_turns = []
234
+ if 'attack_turns' in attack_data:
235
+ for turn_data in attack_data['attack_turns']:
236
+ if isinstance(turn_data, dict):
237
+ attack_turns.append(AttackTurn.from_dict(turn_data))
238
+
239
+ return cls(
240
+ risk_category=attack_data.get('risk_category'),
241
+ threat_model=attack_data.get('threat_model'),
242
+ malicious_goal=attack_data.get('malicious_goal'),
243
+ additional_information=attack_data.get('additional_information'),
244
+ attack_turns=attack_turns,
245
+ attack_name=attack_data.get('attack_name'),
246
+ attack_type=attack_data.get('attack_type'),
247
+ steps=steps,
248
+ description=attack_data.get('description')
249
+ )
250
+
251
+ def get_step_sequence(self) -> List[str]:
252
+ """Get attack steps as a list of string representations"""
253
+ return [step.to_string() for step in self.steps]
254
+
255
+ def get_turn(self, turn_id: int) -> Optional[AttackTurn]:
256
+ """Get attack turn by turn_id (1-indexed)"""
257
+ for turn in self.attack_turns:
258
+ if turn.turn_id == turn_id:
259
+ return turn
260
+ return None
@@ -0,0 +1,315 @@
1
+ import os
2
+ import json
3
+ from abc import ABC, abstractmethod
4
+ from datetime import datetime
5
+ from typing import Optional, Any, List, Union, Dict
6
+
7
+
8
+ class Trajectory:
9
+ """
10
+ A class for building and manipulating trajectory data dynamically.
11
+
12
+ This class provides methods to:
13
+ - Load existing trajectories from JSON files
14
+ - Create new trajectories with task metadata
15
+ - Append different types of steps (user, agent, tool)
16
+ - Update trajectory statistics
17
+ - Save trajectories to JSON files
18
+ """
19
+
20
+ def __init__(
21
+ self,
22
+ task_id: Optional[str] = None,
23
+ original_instruction: Optional[str] = None,
24
+ malicious_instruction: Optional[str] = None,
25
+ domain: Optional[str] = None,
26
+ risk_category: Optional[str] = None
27
+ ):
28
+ """
29
+ Initialize a new trajectory.
30
+
31
+ Args:
32
+ task_id: Unique task identifier.
33
+ original_instruction: Benign / intended task instruction text.
34
+ malicious_instruction: Malicious instruction text (if any).
35
+ domain: Task domain (e.g., "crm", "workflow").
36
+ risk_category: Risk category (e.g., "social-scoring").
37
+ """
38
+
39
+ self.data = {
40
+ "task_info": {
41
+ "task_id": task_id or "unknown",
42
+ "original_instruction": original_instruction or "",
43
+ "malicious_instruction": malicious_instruction or "",
44
+ "domain": domain,
45
+ "risk_category": risk_category
46
+ },
47
+ "traj_info": {
48
+ "step_count": 0,
49
+ "actions_count": 0,
50
+ "tool_count": 0,
51
+ "user_turn": 0,
52
+ "duration": 0.0,
53
+ "timestamp": datetime.now().isoformat(),
54
+ "agent_final_response": None # Final response from the agent
55
+ },
56
+ "trajectory": []
57
+ }
58
+ self._next_step_id = 0
59
+ self._start_time: Optional[float] = None
60
+
61
+ def __len__(self) -> int:
62
+ """Return the number of steps in the trajectory."""
63
+ return len(self.data.get("trajectory", []))
64
+
65
+ def __getitem__(self, index: int) -> Dict[str, Any]:
66
+ """Get a step from the trajectory by index."""
67
+ return self.data.get("trajectory", [])[index]
68
+
69
+ def __bool__(self) -> bool:
70
+ """Return True if the trajectory has any steps."""
71
+ return len(self.data.get("trajectory", [])) > 0
72
+
73
+ def __iter__(self):
74
+ """Iterate over the trajectory steps."""
75
+ return iter(self.data.get("trajectory", []))
76
+
77
+ @classmethod
78
+ def load(cls, filepath: str) -> 'Trajectory':
79
+ """
80
+ Load trajectory from a JSON file
81
+
82
+ Args:
83
+ filepath: Path to trajectory JSON file
84
+
85
+ Returns:
86
+ Trajectory instance with loaded data
87
+ """
88
+ with open(filepath, 'r', encoding='utf-8') as f:
89
+ data = json.load(f)
90
+
91
+ # Create instance with task info (support both legacy and new keys)
92
+ task_info = data.get("task_info", {})
93
+ instance = cls(
94
+ task_id=task_info.get("task_id"),
95
+ original_instruction=task_info.get("original_instruction")
96
+ or task_info.get("instruction"),
97
+ malicious_instruction=task_info.get("malicious_instruction"),
98
+ domain=task_info.get("domain"),
99
+ risk_category=task_info.get("risk_category"),
100
+ )
101
+
102
+ # Load full data
103
+ instance.data = data
104
+
105
+ # Update next_step_id based on loaded trajectory
106
+ if data.get("trajectory"):
107
+ max_step_id = max(step["step_id"] for step in data["trajectory"])
108
+ instance._next_step_id = max_step_id + 1
109
+
110
+ return instance
111
+
112
+ def append_user_step(self, message: str, metadata: Optional[Dict[str, Any]] = None) -> int:
113
+ """
114
+ Append a user message step to the trajectory
115
+
116
+ Args:
117
+ message: User message content
118
+ metadata: Optional metadata
119
+
120
+ Returns:
121
+ Step ID of the appended step
122
+ """
123
+ step = {
124
+ "role": "user",
125
+ "state": message,
126
+ "metadata": metadata or {},
127
+ "step_id": self._next_step_id
128
+ }
129
+ self.data["trajectory"].append(step)
130
+ self.data["traj_info"]["user_turn"] += 1
131
+ self._update_counts()
132
+ self._next_step_id += 1
133
+ return step["step_id"]
134
+
135
+ def append_agent_step(
136
+ self,
137
+ action: str,
138
+ tool_name: Optional[str] = None,
139
+ tool_params: Optional[Dict[str, Any]] = None,
140
+ server: Optional[str] = None,
141
+ metadata: Optional[Dict[str, Any]] = None
142
+ ) -> int:
143
+ """
144
+ Append an agent action step to the trajectory
145
+
146
+ Args:
147
+ action: Action description (e.g., "list_records(module_name='Accounts')")
148
+ tool_name: Name of the tool being called
149
+ tool_params: Parameters passed to the tool
150
+ server: MCP server name
151
+ metadata: Additional metadata
152
+
153
+ Returns:
154
+ Step ID of the appended step
155
+ """
156
+ step_metadata = metadata or {}
157
+ if tool_name:
158
+ step_metadata["tool_name"] = tool_name
159
+ if tool_params:
160
+ step_metadata["tool_params"] = tool_params
161
+ if server:
162
+ step_metadata["server"] = server
163
+
164
+ step = {
165
+ "role": "agent",
166
+ "action": action,
167
+ "metadata": step_metadata,
168
+ "step_id": self._next_step_id
169
+ }
170
+ self.data["trajectory"].append(step)
171
+ self.data["traj_info"]["actions_count"] += 1
172
+ self._update_counts()
173
+ self._next_step_id += 1
174
+ return step["step_id"]
175
+
176
+ def append_tool_return(
177
+ self,
178
+ result: Union[str, Dict, List],
179
+ tool_name: Optional[str] = None,
180
+ server: Optional[str] = None,
181
+ metadata: Optional[Dict[str, Any]] = None
182
+ ) -> int:
183
+ """
184
+ Append a tool return/response step to the trajectory
185
+
186
+ Args:
187
+ result: Tool execution result (can be string, dict, or list)
188
+ tool_name: Name of the tool that returned this result
189
+ server: MCP server name
190
+ metadata: Additional metadata
191
+
192
+ Returns:
193
+ Step ID of the appended step
194
+ """
195
+ step_metadata = metadata or {}
196
+ if tool_name:
197
+ step_metadata["tool_name"] = tool_name
198
+ if server:
199
+ step_metadata["server"] = server
200
+
201
+ step = {
202
+ "role": "tool",
203
+ "state": result,
204
+ "metadata": step_metadata,
205
+ "step_id": self._next_step_id
206
+ }
207
+ self.data["trajectory"].append(step)
208
+ self.data["traj_info"]["tool_count"] += 1
209
+ self._update_counts()
210
+ self._next_step_id += 1
211
+ return step["step_id"]
212
+
213
+ def append_env_return(
214
+ self,
215
+ state: Union[str, Dict, List],
216
+ metadata: Optional[Dict[str, Any]] = None
217
+ ) -> int:
218
+ """
219
+ Append an environment state return (alias for tool return with generic metadata)
220
+
221
+ Args:
222
+ state: Environment state data
223
+ metadata: Additional metadata
224
+
225
+ Returns:
226
+ Step ID of the appended step
227
+ """
228
+
229
+ step_metadata = metadata or {}
230
+
231
+ step = {
232
+ "role": "tool",
233
+ "state": state,
234
+ "metadata": step_metadata,
235
+ "step_id": self._next_step_id
236
+ }
237
+ self.data["trajectory"].append(step)
238
+ self.data["traj_info"]["tool_count"] += 1
239
+ self._update_counts()
240
+ self._next_step_id += 1
241
+ return step["step_id"]
242
+
243
+
244
+ def set_final_response(self, response: str):
245
+ """Set the agent's final response"""
246
+ self.data["traj_info"]["agent_final_response"] = response
247
+
248
+ @property
249
+ def final_response(self) -> Optional[str]:
250
+ """Get the agent's final response"""
251
+ # First check if explicitly set
252
+ stored = self.data["traj_info"].get("agent_final_response")
253
+ if stored:
254
+ return stored
255
+
256
+ # Fallback: extract from last agent send_message_to_user action
257
+ for step in reversed(self.data["trajectory"]):
258
+ if step.get("role") == "agent" and step.get("action") == "send_message_to_user":
259
+ return step.get("metadata", {}).get("message")
260
+ return None
261
+
262
+ def _update_counts(self):
263
+ """Update step_count in traj_info"""
264
+ self.data["traj_info"]["step_count"] = len(self.data["trajectory"])
265
+
266
+ def save(self, filepath: str) -> str:
267
+ """
268
+ Save trajectory to a JSON file
269
+
270
+ Args:
271
+ filepath: Output file path
272
+
273
+ Returns:
274
+ Path to saved file
275
+ """
276
+ # Ensure directory exists
277
+ os.makedirs(os.path.dirname(filepath) if os.path.dirname(filepath) else ".", exist_ok=True)
278
+
279
+ with open(filepath, 'w', encoding='utf-8') as f:
280
+ json.dump(self.data, f, indent=2, ensure_ascii=False)
281
+
282
+ return filepath
283
+
284
+ def start_timer(self):
285
+ """Start timing the trajectory execution"""
286
+ self._start_time = datetime.now().timestamp()
287
+
288
+ def stop_timer(self):
289
+ """Stop timing and update duration"""
290
+ if self._start_time:
291
+ end_time = datetime.now().timestamp()
292
+ self.data["traj_info"]["duration"] = round(end_time - self._start_time, 3)
293
+ self.data["traj_info"]["timestamp"] = datetime.now().isoformat()
294
+
295
+ def to_dict(self) -> dict:
296
+ """Return the underlying trajectory dictionary."""
297
+ return self.data
298
+
299
+ def start_timer(self):
300
+ """Start timing the trajectory execution"""
301
+ self._start_time = datetime.now().timestamp()
302
+
303
+ def stop_timer(self):
304
+ """Stop timing and update duration"""
305
+ if self._start_time:
306
+ end_time = datetime.now().timestamp()
307
+ self.data["traj_info"]["duration"] = round(end_time - self._start_time, 3)
308
+ self.data["traj_info"]["timestamp"] = datetime.now().isoformat()
309
+
310
+ def to_dict(self) -> Dict[str, Any]:
311
+ """Return the trajectory as a dictionary"""
312
+ return self.data
313
+
314
+ def __repr__(self) -> str:
315
+ return f"Trajectory(task_id={self.data['task_info']['task_id']}, steps={len(self.data['trajectory'])})"
@@ -0,0 +1 @@
1
+ """Shared utilities for benign workflow judges."""
@@ -0,0 +1,27 @@
1
+ """Atlassian helper utilities."""
2
+ from .helpers import (
3
+ AtlassianClient,
4
+ login,
5
+ list_projects,
6
+ get_project,
7
+ list_issues,
8
+ get_issue,
9
+ create_issue,
10
+ update_issue,
11
+ add_comment,
12
+ search_issues,
13
+ )
14
+
15
+ __all__ = [
16
+ "AtlassianClient",
17
+ "login",
18
+ "list_projects",
19
+ "get_project",
20
+ "list_issues",
21
+ "get_issue",
22
+ "create_issue",
23
+ "update_issue",
24
+ "add_comment",
25
+ "search_issues",
26
+ ]
27
+