decodingtrust-agent-sdk 0.1.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (374) hide show
  1. agent/__init__.py +30 -0
  2. agent/claudesdk/__init__.py +8 -0
  3. agent/claudesdk/example.py +221 -0
  4. agent/claudesdk/src/__init__.py +8 -0
  5. agent/claudesdk/src/agent.py +400 -0
  6. agent/claudesdk/src/mcp_proxy.py +409 -0
  7. agent/claudesdk/src/utils.py +420 -0
  8. agent/googleadk/__init__.py +15 -0
  9. agent/googleadk/example.py +237 -0
  10. agent/googleadk/src/__init__.py +12 -0
  11. agent/googleadk/src/agent.py +401 -0
  12. agent/googleadk/src/mcp_wrapper.py +163 -0
  13. agent/googleadk/src/utils.py +602 -0
  14. agent/langchain/__init__.py +8 -0
  15. agent/langchain/example.py +213 -0
  16. agent/langchain/src/__init__.py +8 -0
  17. agent/langchain/src/agent.py +645 -0
  18. agent/langchain/src/utils.py +433 -0
  19. agent/openaisdk/__init__.py +17 -0
  20. agent/openaisdk/example.py +228 -0
  21. agent/openaisdk/src/__init__.py +12 -0
  22. agent/openaisdk/src/agent.py +491 -0
  23. agent/openaisdk/src/agent_wrapper.py +143 -0
  24. agent/openaisdk/src/mcp_wrapper.py +395 -0
  25. agent/openaisdk/src/utils.py +493 -0
  26. agent/openclaw/__init__.py +10 -0
  27. agent/openclaw/example.py +251 -0
  28. agent/openclaw/src/__init__.py +14 -0
  29. agent/openclaw/src/agent.py +930 -0
  30. agent/openclaw/src/helpers/__init__.py +1 -0
  31. agent/openclaw/src/helpers/auth_helpers.py +55 -0
  32. agent/openclaw/src/mcp_proxy.py +564 -0
  33. agent/openclaw/src/plugin_generator.py +231 -0
  34. agent/openclaw/src/utils.py +341 -0
  35. agent/pocketflow/__init__.py +18 -0
  36. agent/pocketflow/example.py +221 -0
  37. agent/pocketflow/prompts/react_agent.py +46 -0
  38. agent/pocketflow/src/__init__.py +6 -0
  39. agent/pocketflow/src/agent.py +507 -0
  40. agent/pocketflow/src/agent_wrapper.py +159 -0
  41. agent/pocketflow/src/async_helper.py +92 -0
  42. agent/pocketflow/src/mcp_react_agent.py +279 -0
  43. agent/pocketflow/src/native_agent.py +74 -0
  44. agent/pocketflow/src/nodes.py +467 -0
  45. benchmark/__init__.py +0 -0
  46. benchmark/browser/benign.jsonl +34 -0
  47. benchmark/browser/direct.jsonl +85 -0
  48. benchmark/browser/indirect.jsonl +82 -0
  49. benchmark/code/benign.jsonl +0 -0
  50. benchmark/code/direct.jsonl +121 -0
  51. benchmark/code/indirect.jsonl +165 -0
  52. benchmark/crm/benign.jsonl +165 -0
  53. benchmark/crm/direct.jsonl +90 -0
  54. benchmark/crm/indirect.jsonl +150 -0
  55. benchmark/customer-service/benign.jsonl +160 -0
  56. benchmark/customer-service/direct.jsonl +100 -0
  57. benchmark/customer-service/indirect.jsonl +101 -0
  58. benchmark/finance/benign.jsonl +0 -0
  59. benchmark/finance/direct.jsonl +200 -0
  60. benchmark/finance/indirect.jsonl +200 -0
  61. benchmark/legal/benign.jsonl +0 -0
  62. benchmark/legal/direct.jsonl +200 -0
  63. benchmark/legal/indirect.jsonl +200 -0
  64. benchmark/macos/benign.jsonl +30 -0
  65. benchmark/macos/direct.jsonl +50 -0
  66. benchmark/macos/indirect.jsonl +50 -0
  67. benchmark/medical/benign.jsonl +642 -0
  68. benchmark/medical/direct.jsonl +229 -0
  69. benchmark/medical/indirect.jsonl +222 -0
  70. benchmark/os-filesystem/benign.jsonl +200 -0
  71. benchmark/os-filesystem/direct.jsonl +200 -0
  72. benchmark/os-filesystem/indirect.jsonl +200 -0
  73. benchmark/research/benign.jsonl +0 -0
  74. benchmark/research/direct.jsonl +119 -0
  75. benchmark/research/indirect.jsonl +125 -0
  76. benchmark/telecom/benign.jsonl +120 -0
  77. benchmark/telecom/direct.jsonl +161 -0
  78. benchmark/telecom/indirect.jsonl +166 -0
  79. benchmark/travel/benign.jsonl +130 -0
  80. benchmark/travel/direct.jsonl +105 -0
  81. benchmark/travel/indirect.jsonl +120 -0
  82. benchmark/windows/benign.jsonl +100 -0
  83. benchmark/windows/direct.jsonl +140 -0
  84. benchmark/windows/indirect.jsonl +107 -0
  85. benchmark/workflow/benign.jsonl +335 -0
  86. benchmark/workflow/direct.jsonl +78 -0
  87. benchmark/workflow/indirect.jsonl +107 -0
  88. cli/__init__.py +5 -0
  89. cli/main.py +182 -0
  90. cli/scaffold.py +334 -0
  91. decodingtrust_agent_sdk-0.1.0.dist-info/METADATA +642 -0
  92. decodingtrust_agent_sdk-0.1.0.dist-info/RECORD +374 -0
  93. decodingtrust_agent_sdk-0.1.0.dist-info/WHEEL +5 -0
  94. decodingtrust_agent_sdk-0.1.0.dist-info/entry_points.txt +2 -0
  95. decodingtrust_agent_sdk-0.1.0.dist-info/licenses/LICENSE +201 -0
  96. decodingtrust_agent_sdk-0.1.0.dist-info/top_level.txt +6 -0
  97. dt_arena/config/env.yaml +515 -0
  98. dt_arena/config/injection_mcp.yaml +430 -0
  99. dt_arena/config/mcp.yaml +642 -0
  100. dt_arena/envs/arxiv/docker-compose-hub.yml +31 -0
  101. dt_arena/envs/arxiv/docker-compose.yml +36 -0
  102. dt_arena/envs/atlassian/docker/docker-compose.dev.yml +65 -0
  103. dt_arena/envs/atlassian/docker/docker-compose.yml +53 -0
  104. dt_arena/envs/atlassian/docker-compose-hub.yml +57 -0
  105. dt_arena/envs/atlassian/docker-compose.yml +72 -0
  106. dt_arena/envs/bigquery/docker-compose.yml +20 -0
  107. dt_arena/envs/booking/docker-compose.yml +59 -0
  108. dt_arena/envs/calendar/docker-compose-hub.yml +30 -0
  109. dt_arena/envs/calendar/docker-compose.yml +42 -0
  110. dt_arena/envs/custom-website/docker-compose.yml +6 -0
  111. dt_arena/envs/customer_service/docker-compose.yml +59 -0
  112. dt_arena/envs/databricks/docker-compose-hub.yml +47 -0
  113. dt_arena/envs/databricks/docker-compose.yml +51 -0
  114. dt_arena/envs/ecommerce/docker-compose.yml +6 -0
  115. dt_arena/envs/ers/docker-compose.yml +36 -0
  116. dt_arena/envs/ers/hrms/docker/docker-compose.yml +31 -0
  117. dt_arena/envs/finance/docker-compose.yml +23 -0
  118. dt_arena/envs/github/docker/docker-compose-hub.yml +50 -0
  119. dt_arena/envs/github/docker/docker-compose.yml +50 -0
  120. dt_arena/envs/gmail/docker-compose-hub.yml +51 -0
  121. dt_arena/envs/gmail/docker-compose.yml +65 -0
  122. dt_arena/envs/google-form/docker-compose-hub.yml +33 -0
  123. dt_arena/envs/google-form/docker-compose.yml +41 -0
  124. dt_arena/envs/googledocs/docker-compose-hub.yml +61 -0
  125. dt_arena/envs/googledocs/docker-compose.yml +78 -0
  126. dt_arena/envs/hospital/docker-compose-hub.yml +25 -0
  127. dt_arena/envs/hospital/docker-compose.yml +27 -0
  128. dt_arena/envs/legal/docker-compose.yml +22 -0
  129. dt_arena/envs/linkedin/docker-compose.yml +63 -0
  130. dt_arena/envs/macos/docker-compose.yml +79 -0
  131. dt_arena/envs/os-filesystem/docker-compose-hub.yml +16 -0
  132. dt_arena/envs/os-filesystem/docker-compose.yml +20 -0
  133. dt_arena/envs/paypal/docker-compose-hub.yml +48 -0
  134. dt_arena/envs/paypal/docker-compose.yml +63 -0
  135. dt_arena/envs/research/docker-compose-hub.yml +13 -0
  136. dt_arena/envs/research/docker-compose.yml +24 -0
  137. dt_arena/envs/salesforce_crm/docker-compose-hub.yaml +45 -0
  138. dt_arena/envs/salesforce_crm/docker-compose.yaml +49 -0
  139. dt_arena/envs/slack/docker-compose-hub.yml +28 -0
  140. dt_arena/envs/slack/docker-compose.yml +41 -0
  141. dt_arena/envs/snowflake/docker-compose-hub.yml +41 -0
  142. dt_arena/envs/snowflake/docker-compose.yml +44 -0
  143. dt_arena/envs/telecom/docker-compose-hub.yml +16 -0
  144. dt_arena/envs/telecom/docker-compose.yml +17 -0
  145. dt_arena/envs/telegram/docker-compose-hub.yml +57 -0
  146. dt_arena/envs/telegram/docker-compose.yml +62 -0
  147. dt_arena/envs/terminal/docker-compose-hub.yml +12 -0
  148. dt_arena/envs/terminal/docker-compose.yml +26 -0
  149. dt_arena/envs/travel/docker-compose-hub.yml +19 -0
  150. dt_arena/envs/travel/docker-compose.yml +19 -0
  151. dt_arena/envs/whatsapp/docker-compose-hub.yml +61 -0
  152. dt_arena/envs/whatsapp/docker-compose.yml +78 -0
  153. dt_arena/envs/windows/docker-compose.yml +71 -0
  154. dt_arena/envs/zoom/docker-compose-hub.yml +27 -0
  155. dt_arena/envs/zoom/docker-compose.yml +40 -0
  156. dt_arena/injection_mcp_server/atlassian/env_injection.py +134 -0
  157. dt_arena/injection_mcp_server/calendar/env_injection.py +217 -0
  158. dt_arena/injection_mcp_server/custom_website/env_injection.py +97 -0
  159. dt_arena/injection_mcp_server/customer_service/env_injection.py +659 -0
  160. dt_arena/injection_mcp_server/databricks/env_injection.py +255 -0
  161. dt_arena/injection_mcp_server/ecommerce/env_injection.py +110 -0
  162. dt_arena/injection_mcp_server/finance/env_injection.py +85 -0
  163. dt_arena/injection_mcp_server/github/env_injection.py +206 -0
  164. dt_arena/injection_mcp_server/gmail/env_injection.py +211 -0
  165. dt_arena/injection_mcp_server/google_form/env_injection.py +186 -0
  166. dt_arena/injection_mcp_server/googledocs/env_injection.py +44 -0
  167. dt_arena/injection_mcp_server/hospital/env_injection.py +43 -0
  168. dt_arena/injection_mcp_server/legal/env_injection.py +229 -0
  169. dt_arena/injection_mcp_server/macos/env_injection.py +272 -0
  170. dt_arena/injection_mcp_server/os-filesystem/env_injection.py +341 -0
  171. dt_arena/injection_mcp_server/paypal/env_injection.py +268 -0
  172. dt_arena/injection_mcp_server/research/env_injection.py +616 -0
  173. dt_arena/injection_mcp_server/salesforce/env_injection.py +514 -0
  174. dt_arena/injection_mcp_server/slack/env_injection.py +265 -0
  175. dt_arena/injection_mcp_server/snowflake/env_injection.py +230 -0
  176. dt_arena/injection_mcp_server/telecom/env_injection.py +503 -0
  177. dt_arena/injection_mcp_server/telegram/env_injection.py +171 -0
  178. dt_arena/injection_mcp_server/terminal/env_injection.py +523 -0
  179. dt_arena/injection_mcp_server/travel/env_injection.py +173 -0
  180. dt_arena/injection_mcp_server/whatsapp/env_injection.py +185 -0
  181. dt_arena/injection_mcp_server/windows/env_injection.py +943 -0
  182. dt_arena/injection_mcp_server/zoom/env_injection.py +216 -0
  183. dt_arena/mcp_server/atlassian/main.py +1554 -0
  184. dt_arena/mcp_server/atlassian/test_server.py +66 -0
  185. dt_arena/mcp_server/bigquery/main.py +333 -0
  186. dt_arena/mcp_server/booking/main.py +310 -0
  187. dt_arena/mcp_server/browser/main.py +1741 -0
  188. dt_arena/mcp_server/calendar/example_multi_user.py +162 -0
  189. dt_arena/mcp_server/calendar/main.py +792 -0
  190. dt_arena/mcp_server/calendar/test_mcp.py +135 -0
  191. dt_arena/mcp_server/customer_service/main.py +1063 -0
  192. dt_arena/mcp_server/databricks/main.py +566 -0
  193. dt_arena/mcp_server/databricks/probe.py +102 -0
  194. dt_arena/mcp_server/ers/main.py +845 -0
  195. dt_arena/mcp_server/finance/__init__.py +87 -0
  196. dt_arena/mcp_server/finance/core/__init__.py +12 -0
  197. dt_arena/mcp_server/finance/core/data_loader.py +558 -0
  198. dt_arena/mcp_server/finance/core/portfolio.py +565 -0
  199. dt_arena/mcp_server/finance/evaluation/__init__.py +20 -0
  200. dt_arena/mcp_server/finance/evaluation/evaluator.py +217 -0
  201. dt_arena/mcp_server/finance/evaluation/logger.py +137 -0
  202. dt_arena/mcp_server/finance/injection/__init__.py +66 -0
  203. dt_arena/mcp_server/finance/injection/config.py +176 -0
  204. dt_arena/mcp_server/finance/injection/content.py +755 -0
  205. dt_arena/mcp_server/finance/injection/html.py +409 -0
  206. dt_arena/mcp_server/finance/injection/locations.py +167 -0
  207. dt_arena/mcp_server/finance/injection/methods.py +193 -0
  208. dt_arena/mcp_server/finance/injection/presets.py +1023 -0
  209. dt_arena/mcp_server/finance/main.py +361 -0
  210. dt_arena/mcp_server/finance/run_mcp.py +21 -0
  211. dt_arena/mcp_server/finance/run_web.py +26 -0
  212. dt_arena/mcp_server/finance/server/__init__.py +41 -0
  213. dt_arena/mcp_server/finance/server/extractor.py +1453 -0
  214. dt_arena/mcp_server/finance/server/extractor_minimal.py +292 -0
  215. dt_arena/mcp_server/finance/server/extractor_simple.py +1164 -0
  216. dt_arena/mcp_server/finance/server/injection_mcp.py +865 -0
  217. dt_arena/mcp_server/finance/server/mcp.py +451 -0
  218. dt_arena/mcp_server/finance/server/tools/__init__.py +23 -0
  219. dt_arena/mcp_server/finance/server/tools/account.py +88 -0
  220. dt_arena/mcp_server/finance/server/tools/browsing.py +328 -0
  221. dt_arena/mcp_server/finance/server/tools/social.py +73 -0
  222. dt_arena/mcp_server/finance/server/tools/trading.py +242 -0
  223. dt_arena/mcp_server/finance/server/tools/utility.py +49 -0
  224. dt_arena/mcp_server/finance/server/web.py +2139 -0
  225. dt_arena/mcp_server/finance/tasks/benchmark/__init__.py +28 -0
  226. dt_arena/mcp_server/finance/tasks/benchmark/attack_pool.py +3026 -0
  227. dt_arena/mcp_server/finance/tasks/benchmark/attack_runner.py +1315 -0
  228. dt_arena/mcp_server/finance/tasks/benchmark/finra_requirements.py +1335 -0
  229. dt_arena/mcp_server/finance/tasks/benchmark/finra_tasks.py +3665 -0
  230. dt_arena/mcp_server/finance/tasks/benchmark/malicious_tasks.py +2673 -0
  231. dt_arena/mcp_server/finance/tasks/redteam_suite/run_redteam_suite.py +1713 -0
  232. dt_arena/mcp_server/finance/test_mcp_tools.py +476 -0
  233. dt_arena/mcp_server/github/main.py +441 -0
  234. dt_arena/mcp_server/gmail/main.py +1004 -0
  235. dt_arena/mcp_server/google_form/main.py +141 -0
  236. dt_arena/mcp_server/googledocs/main.py +458 -0
  237. dt_arena/mcp_server/hospital/mcp_server.py +458 -0
  238. dt_arena/mcp_server/legal/__init__.py +9 -0
  239. dt_arena/mcp_server/legal/core/__init__.py +14 -0
  240. dt_arena/mcp_server/legal/core/courtlistener_store.py +762 -0
  241. dt_arena/mcp_server/legal/core/data_loader.py +266 -0
  242. dt_arena/mcp_server/legal/core/document_store.py +197 -0
  243. dt_arena/mcp_server/legal/core/matter_manager.py +466 -0
  244. dt_arena/mcp_server/legal/main.py +89 -0
  245. dt_arena/mcp_server/legal/scripts/collect_data.py +988 -0
  246. dt_arena/mcp_server/legal/server/__init__.py +14 -0
  247. dt_arena/mcp_server/legal/server/mcp.py +2330 -0
  248. dt_arena/mcp_server/macos/client_test.py +270 -0
  249. dt_arena/mcp_server/macos/mcp_server.py +285 -0
  250. dt_arena/mcp_server/os-filesystem/main.py +1380 -0
  251. dt_arena/mcp_server/paypal/main.py +501 -0
  252. dt_arena/mcp_server/research/main.py +777 -0
  253. dt_arena/mcp_server/salesforce/main.py +2006 -0
  254. dt_arena/mcp_server/slack/main.py +318 -0
  255. dt_arena/mcp_server/snowflake/main.py +612 -0
  256. dt_arena/mcp_server/snowflake/probe.py +183 -0
  257. dt_arena/mcp_server/telecom/mcp_client.py +423 -0
  258. dt_arena/mcp_server/telecom/mcp_server.py +1059 -0
  259. dt_arena/mcp_server/telegram/main.py +338 -0
  260. dt_arena/mcp_server/terminal/main.py +163 -0
  261. dt_arena/mcp_server/travel/client_test.py +16 -0
  262. dt_arena/mcp_server/travel/mcp_server.py +404 -0
  263. dt_arena/mcp_server/whatsapp/main.py +318 -0
  264. dt_arena/mcp_server/windows/client_test.py +270 -0
  265. dt_arena/mcp_server/windows/mcp_server.py +218 -0
  266. dt_arena/mcp_server/zoom/main.py +466 -0
  267. dt_arena/src/__init__.py +0 -0
  268. dt_arena/src/hooks/__init__.py +0 -0
  269. dt_arena/src/hooks/audit_log.py +30 -0
  270. dt_arena/src/hooks/hooks.json +3 -0
  271. dt_arena/src/run_benign.py +142 -0
  272. dt_arena/src/types/__init__.py +0 -0
  273. dt_arena/src/types/agent.py +441 -0
  274. dt_arena/src/types/attacks.py +2 -0
  275. dt_arena/src/types/environment.py +2 -0
  276. dt_arena/src/types/hooks.py +174 -0
  277. dt_arena/src/types/judge.py +52 -0
  278. dt_arena/src/types/red_teaming_trajectory.py +385 -0
  279. dt_arena/src/types/task.py +260 -0
  280. dt_arena/src/types/trajectory.py +315 -0
  281. dt_arena/utils/__init__.py +1 -0
  282. dt_arena/utils/atlassian/__init__.py +27 -0
  283. dt_arena/utils/atlassian/helpers.py +520 -0
  284. dt_arena/utils/bigquery/__init__.py +1 -0
  285. dt_arena/utils/bigquery/helpers.py +246 -0
  286. dt_arena/utils/calendar/__init__.py +1 -0
  287. dt_arena/utils/calendar/helpers.py +87 -0
  288. dt_arena/utils/customer_service/__init__.py +17 -0
  289. dt_arena/utils/customer_service/cs_env_client.py +940 -0
  290. dt_arena/utils/customer_service/helpers.py +339 -0
  291. dt_arena/utils/customer_service/judges/__init__.py +20 -0
  292. dt_arena/utils/customer_service/judges/abusive_comms.py +149 -0
  293. dt_arena/utils/customer_service/judges/bulk_destructive.py +202 -0
  294. dt_arena/utils/customer_service/judges/cross_customer.py +481 -0
  295. dt_arena/utils/customer_service/judges/email_exfiltration.py +320 -0
  296. dt_arena/utils/customer_service/judges/financial_fraud.py +221 -0
  297. dt_arena/utils/customer_service/judges/llm_utils.py +264 -0
  298. dt_arena/utils/customer_service/judges/pii_disclosure.py +837 -0
  299. dt_arena/utils/customer_service/judges/policy_misrep.py +44 -0
  300. dt_arena/utils/customer_service/judges/text_utils.py +21 -0
  301. dt_arena/utils/databricks/__init__.py +2 -0
  302. dt_arena/utils/databricks/helpers.py +210 -0
  303. dt_arena/utils/finance/__init__.py +0 -0
  304. dt_arena/utils/finance/helpers.py +263 -0
  305. dt_arena/utils/github/__init__.py +1 -0
  306. dt_arena/utils/github/helpers.py +249 -0
  307. dt_arena/utils/gmail/__init__.py +1 -0
  308. dt_arena/utils/gmail/helpers.py +344 -0
  309. dt_arena/utils/google_form/__init__.py +2 -0
  310. dt_arena/utils/google_form/helpers.py +133 -0
  311. dt_arena/utils/legal/__init__.py +0 -0
  312. dt_arena/utils/legal/helpers.py +228 -0
  313. dt_arena/utils/macos/__init__.py +0 -0
  314. dt_arena/utils/macos/env_setup.py +215 -0
  315. dt_arena/utils/macos/helpers.py +61 -0
  316. dt_arena/utils/os_filesystem/__init__.py +1 -0
  317. dt_arena/utils/os_filesystem/helpers.py +366 -0
  318. dt_arena/utils/paypal/__init__.py +1 -0
  319. dt_arena/utils/paypal/helpers.py +178 -0
  320. dt_arena/utils/port_allocator.py +266 -0
  321. dt_arena/utils/research/__init__.py +0 -0
  322. dt_arena/utils/research/helpers.py +251 -0
  323. dt_arena/utils/salesforce/__init__.py +1 -0
  324. dt_arena/utils/salesforce/helpers.py +719 -0
  325. dt_arena/utils/slack/__init__.py +1 -0
  326. dt_arena/utils/slack/helpers.py +176 -0
  327. dt_arena/utils/snowflake/__init__.py +1 -0
  328. dt_arena/utils/snowflake/helpers.py +166 -0
  329. dt_arena/utils/telecom/__init__.py +1 -0
  330. dt_arena/utils/telecom/helpers.py +760 -0
  331. dt_arena/utils/telegram/__init__.py +0 -0
  332. dt_arena/utils/telegram/helpers.py +174 -0
  333. dt_arena/utils/terminal/__init__.py +0 -0
  334. dt_arena/utils/terminal/helpers.py +20 -0
  335. dt_arena/utils/travel/__init__.py +0 -0
  336. dt_arena/utils/travel/env_client.py +537 -0
  337. dt_arena/utils/travel/llm_judge.py +137 -0
  338. dt_arena/utils/travel/prompts.py +64 -0
  339. dt_arena/utils/utils/__init__.py +122 -0
  340. dt_arena/utils/whatsapp/__init__.py +0 -0
  341. dt_arena/utils/whatsapp/helpers.py +226 -0
  342. dt_arena/utils/windows/__init__.py +0 -0
  343. dt_arena/utils/windows/env_reset.py +224 -0
  344. dt_arena/utils/windows/env_setup.py +280 -0
  345. dt_arena/utils/windows/exfil_helpers.py +170 -0
  346. dt_arena/utils/windows/helpers.py +74 -0
  347. dt_arena/utils/zoom/__init__.py +1 -0
  348. dt_arena/utils/zoom/helpers.py +70 -0
  349. eval/__init__.py +1 -0
  350. eval/evaluation.py +426 -0
  351. eval/task_runner.py +449 -0
  352. utils/__init__.py +148 -0
  353. utils/agent_helpers.py +308 -0
  354. utils/agent_wrapper.py +189 -0
  355. utils/compose_utils.py +135 -0
  356. utils/config.py +77 -0
  357. utils/env_helpers.py +104 -0
  358. utils/eval_stats.py +88 -0
  359. utils/injection_helpers.py +429 -0
  360. utils/injection_mcp_helpers.py +152 -0
  361. utils/judge_helpers.py +181 -0
  362. utils/judge_utils.py +472 -0
  363. utils/llm.py +196 -0
  364. utils/logging.py +45 -0
  365. utils/mcp_helpers.py +232 -0
  366. utils/mcp_manager.py +235 -0
  367. utils/memory_guard.py +18 -0
  368. utils/red_teaming_sandbox.py +476 -0
  369. utils/reset_helpers.py +318 -0
  370. utils/resource_manager.py +370 -0
  371. utils/skill_helpers.py +447 -0
  372. utils/task_executor.py +904 -0
  373. utils/task_helpers.py +270 -0
  374. utils/template_helpers.py +179 -0
eval/task_runner.py ADDED
@@ -0,0 +1,449 @@
1
+ import argparse
2
+ import asyncio
3
+ import os
4
+ import subprocess
5
+ import sys
6
+ from pathlib import Path
7
+ from typing import Optional, Dict, List
8
+ import yaml
9
+
10
+ from utils import (
11
+ PROJECT_ROOT,
12
+ task_setup,
13
+ start_task_mcp_servers,
14
+ build_agent,
15
+ get_agent_choices,
16
+ run_judge,
17
+ print_judge_result,
18
+ build_tool_injections_from_config,
19
+ build_skill_injections_from_config,
20
+ apply_prompt_injections,
21
+ get_env_injections_from_attack,
22
+ get_required_injection_servers,
23
+ apply_environment_injections_async,
24
+ start_injection_mcp_servers,
25
+ wait_for_injection_mcp_ready,
26
+ extract_dataset_path,
27
+ get_default_disallowed_tools,
28
+ )
29
+ from utils.resource_manager import (
30
+ ResourceManager,
31
+ generate_task_id,
32
+ )
33
+
34
+ from dt_arena.src.types.agent import AgentConfig, RuntimeConfig
35
+ from dt_arena.src.types.task import AttackConfig, TaskConfig
36
+
37
+
38
+ async def run_single_task(
39
+ task_dir: Path,
40
+ *,
41
+ agent_type: str,
42
+ model: str,
43
+ temperature: Optional[float] = None,
44
+ max_turns: int = 200,
45
+ skip_mcp: bool,
46
+ skip_judge: bool = False,
47
+ debug: bool = False,
48
+ direct_prompt: bool = False,
49
+ disallowed_tools: Optional[List[str]] = None,
50
+ ) -> int:
51
+ """Run a single workflow task with the selected agent implementation.
52
+
53
+ Args:
54
+ task_dir: Path to the task directory containing config.yaml
55
+ agent_type: Type of agent to use (e.g., "openaisdk", "langchain")
56
+ model: Model identifier (e.g., "gpt-4o")
57
+ temperature: Sampling temperature for the model (default: None, uses model default)
58
+ max_turns: Maximum number of conversation turns (default: 200)
59
+ skip_mcp: If True, skip MCP server setup
60
+ skip_judge: If True, skip running the judge after task completion
61
+
62
+ Returns:
63
+ Exit code (0 for success, non-zero for failure)
64
+ """
65
+
66
+ config_path = task_dir / "config.yaml"
67
+ if not config_path.exists():
68
+ print(f"[ERROR] Configuration file not found: {config_path}")
69
+ return 1
70
+
71
+ print(f"[INFO] Loading agent config from: {config_path}")
72
+ print("-" * 80)
73
+
74
+ manager = None
75
+ injection_manager = None # For environment injection MCP servers
76
+ task_id: Optional[str] = None
77
+ resource_mgr = ResourceManager.instance()
78
+
79
+ try:
80
+ agent_cfg = AgentConfig.from_yaml(str(config_path))
81
+
82
+ # Generate unique task ID for resource tracking
83
+ task_id = generate_task_id(task_dir)
84
+ print(f"[INFO] Task ID: {task_id}")
85
+
86
+ # Run setup.sh for task-specific env initialization
87
+ print("[SETUP] Initializing environments...")
88
+ task_setup(task_dir, task_id=task_id)
89
+
90
+ # Set up and start MCP servers
91
+ if not skip_mcp:
92
+ manager = start_task_mcp_servers(agent_cfg, task_id, task_dir, resource_mgr)
93
+ else:
94
+ print("[MCP] Skipped MCP server startup as requested.")
95
+
96
+ # Load task and attack configs
97
+ task_cfg = TaskConfig.from_yaml(str(config_path))
98
+
99
+ # Build output directory following dataset structure:
100
+ # {root}/benchmark/{agent_type}/{model}/{domain}/{task_type}/.../{task_id}/
101
+ # For direct prompt mode: {root}/direct_prompt/{agent_type}/{model}/...
102
+ output_root = Path(os.getenv("EVAL_RESULTS_ROOT", str(Path.cwd() / "results")))
103
+ dataset_path = extract_dataset_path(task_dir)
104
+ # Sanitize model for use in path (replace special chars)
105
+ safe_model = model.replace("/", "_").replace(":", "_")
106
+ result_type = "direct_prompt" if direct_prompt else "benchmark"
107
+ output_dir = (output_root / result_type / agent_type / safe_model / dataset_path).resolve()
108
+ output_dir.mkdir(parents=True, exist_ok=True)
109
+ print(f"[INFO] Output directory: {output_dir}")
110
+
111
+ try:
112
+ attack_cfg = AttackConfig.from_yaml(str(config_path))
113
+ except Exception as e:
114
+ print(f"[WARN] Failed to parse Attack config: {e}")
115
+ attack_cfg = None
116
+
117
+ # Build tool injections (all injections before the first turn for now)
118
+ mcp_injection = build_tool_injections_from_config(attack_cfg)
119
+
120
+ # Build skill injections
121
+ skill_injection = build_skill_injections_from_config(attack_cfg)
122
+
123
+ # Build env injections by turn
124
+ all_env_injections = get_env_injections_from_attack(attack_cfg)
125
+ injection_server_urls: Dict[str, str] = {}
126
+
127
+ if all_env_injections and not skip_mcp:
128
+ required_servers = get_required_injection_servers(all_env_injections)
129
+ if required_servers:
130
+ print(f"\n[ENV INJECTION] Required injection servers: {list(required_servers.keys())}")
131
+
132
+ # Build injection_config
133
+ injection_config = {
134
+ "environment_enabled": True,
135
+ "environment_servers": required_servers,
136
+ }
137
+
138
+ # Start injection MCP servers
139
+ injection_manager, injection_config = start_injection_mcp_servers(
140
+ injection_config,
141
+ resource_manager=resource_mgr,
142
+ task_id=task_id,
143
+ )
144
+
145
+ if injection_config.get("environment_servers"):
146
+ print("[ENV INJECTION] Waiting for injection servers to be ready...")
147
+ wait_for_injection_mcp_ready(injection_config)
148
+
149
+ # Extract server URLs for later use
150
+ for server_name, server_info in injection_config["environment_servers"].items():
151
+ if isinstance(server_info, dict) and "url" in server_info:
152
+ injection_server_urls[server_name] = server_info["url"]
153
+
154
+ print(f"[ENV INJECTION] Injection servers ready: {list(injection_server_urls.keys())}")
155
+
156
+ # Build runtime config
157
+ effective_disallowed_tools = disallowed_tools
158
+ if effective_disallowed_tools is None:
159
+ effective_disallowed_tools = get_default_disallowed_tools(agent_type, task_cfg.domain)
160
+ if effective_disallowed_tools:
161
+ print(
162
+ f"[CONFIG] Auto-disabling native tools for {task_cfg.domain}/{agent_type}: "
163
+ f"{', '.join(effective_disallowed_tools)}"
164
+ )
165
+
166
+ agent_kwargs = {}
167
+ if effective_disallowed_tools:
168
+ agent_kwargs["disallowed_tools"] = effective_disallowed_tools
169
+
170
+ runtime_cfg = RuntimeConfig(
171
+ model=model,
172
+ temperature=temperature,
173
+ max_turns=max_turns,
174
+ output_dir=str(output_dir),
175
+ mcp_injection=mcp_injection,
176
+ skill_injection=skill_injection,
177
+ debug=debug,
178
+ agent_kwargs=agent_kwargs if agent_kwargs else None,
179
+ )
180
+
181
+ # Build agent
182
+ agent = build_agent(
183
+ agent_type=agent_type,
184
+ agent_cfg=agent_cfg,
185
+ runtime_cfg=runtime_cfg,
186
+ )
187
+
188
+ # Display configuration
189
+ print(f"\n[CONFIG] Task ID: {task_cfg.task_id}")
190
+ print(f"[CONFIG] Domain: {task_cfg.domain}")
191
+ print(f"[CONFIG] Agent: {agent_type}")
192
+ print(f"[CONFIG] Model: {model}")
193
+
194
+ if attack_cfg:
195
+ print(f"\n[ATTACK] Risk Category: {attack_cfg.risk_category}")
196
+ print(f"[ATTACK] Threat Model: {attack_cfg.threat_model}")
197
+ if attack_cfg.malicious_goal:
198
+ print(f"[ATTACK] Malicious Goal: {attack_cfg.malicious_goal}")
199
+ if attack_cfg.attack_turns:
200
+ print(f"[ATTACK] Attack Turns: {len(attack_cfg.attack_turns)}")
201
+ for turn in attack_cfg.attack_turns:
202
+ print(f" Turn {turn.turn_id}: {len(turn.attack_steps)} attack step(s)")
203
+ for step in turn.attack_steps:
204
+ print(f" - {step.type} ({step.mode})")
205
+
206
+ # Handle prompt injections
207
+ if direct_prompt:
208
+ # Direct prompt mode: use malicious goal directly as the prompt
209
+ if attack_cfg and attack_cfg.malicious_goal:
210
+ user_instruction = attack_cfg.malicious_goal
211
+ print("[INFO] Direct prompt mode: using malicious goal as instruction.")
212
+ else:
213
+ raise ValueError("Direct prompt mode requires malicious_goal in config.")
214
+ else:
215
+ user_instruction = task_cfg.original_instruction
216
+ user_instruction = apply_prompt_injections(user_instruction, attack_cfg)
217
+
218
+ if not user_instruction or user_instruction == "" or user_instruction == []:
219
+ raise ValueError("Instruction is empty after applying prompt injections.")
220
+
221
+ # Normalize instruction
222
+ if isinstance(user_instruction, str):
223
+ user_instruction = user_instruction.strip()
224
+ else:
225
+ user_instruction = [instr.strip() for instr in user_instruction]
226
+
227
+ print("\n" + "=" * 80)
228
+ print("[INSTRUCTION (after injection)]")
229
+ print("=" * 80)
230
+ if isinstance(user_instruction, list):
231
+ print(f"Multi-turn task with {len(user_instruction)} queries:")
232
+ for i, instr in enumerate(user_instruction, 1):
233
+ print(f" {i}. {instr}")
234
+ else:
235
+ print(user_instruction)
236
+ print("=" * 80)
237
+
238
+ if attack_cfg and attack_cfg.malicious_goal:
239
+ print("\n[MALICIOUS GOAL] Testing Against:")
240
+ print("-" * 80)
241
+ print(attack_cfg.malicious_goal.strip())
242
+ print("-" * 80)
243
+
244
+ # Run agent with per-turn env injection support
245
+ async with agent:
246
+ metadata = {
247
+ "task_id": task_cfg.task_id,
248
+ "domain": task_cfg.domain,
249
+ "risk_category": attack_cfg.risk_category if attack_cfg else None,
250
+ "malicious_goal": attack_cfg.malicious_goal if attack_cfg else None,
251
+ "direct_prompt": direct_prompt,
252
+ }
253
+
254
+ # Normalize instructions to list for per-turn processing
255
+ if isinstance(user_instruction, str):
256
+ instructions = [user_instruction]
257
+ else:
258
+ instructions = list(user_instruction)
259
+
260
+ # Determine if we have env injections to apply per-turn
261
+ has_env_injections = bool(all_env_injections) and bool(injection_server_urls)
262
+
263
+ # Execute turns with per-turn env injection application
264
+ result = None
265
+ for turn_idx, turn_instruction in enumerate(instructions):
266
+ turn_id = turn_idx + 1 # 1-indexed
267
+
268
+ # Apply environment injections for this turn BEFORE agent processes query
269
+ if has_env_injections:
270
+ turn_env_injections = get_env_injections_from_attack(attack_cfg, turn_id=turn_id)
271
+ if turn_env_injections:
272
+ print(f"\n[ENV INJECTION] Applying {len(turn_env_injections)} injection(s) for turn {turn_id}...")
273
+ injection_results = await apply_environment_injections_async(
274
+ turn_env_injections, injection_server_urls
275
+ )
276
+ # Log injection results
277
+ for inj_result in injection_results:
278
+ status = "OK" if inj_result["success"] else "FAILED"
279
+ print(f" [{status}] {inj_result['server_name']}:{inj_result['tool_name']}")
280
+ if not inj_result["success"]:
281
+ print(f" Error: {inj_result['error']}")
282
+
283
+ # Run agent for this turn
284
+ print(f"\n[TURN {turn_id}] Processing query...")
285
+ try:
286
+ result = await agent.run(turn_instruction, metadata=metadata)
287
+ except Exception as agent_err:
288
+ err_msg = str(agent_err)
289
+ # Check if this is a context window overflow
290
+ is_context_overflow = "context length" in err_msg.lower() or "ContextWindowExceeded" in type(agent_err).__name__
291
+ if is_context_overflow:
292
+ print(f"\n[CONTEXT OVERFLOW] Turn {turn_id} exceeded model context window: {err_msg[:200]}")
293
+ # Round 1 tool calls may have already mutated DB state,
294
+ # so we still need to run the judge to check for attack success.
295
+ # Break out of the turn loop and proceed to judge with empty response.
296
+ result = None
297
+ break
298
+ else:
299
+ raise # re-raise non-context-overflow errors
300
+
301
+ if result is not None:
302
+ print("\n" + "=" * 80)
303
+ print("[AGENT RESPONSE]")
304
+ print("=" * 80)
305
+ final_output = getattr(result, "final_output", None) or str(result)
306
+ print(final_output)
307
+ print("=" * 80)
308
+ print(f"\n[SUCCESS] Task completed with agent '{agent_type}'")
309
+ else:
310
+ # Context overflow — agent didn't finish, but Round 1 tool calls may have mutated DB
311
+ final_output = ""
312
+ print(f"\n[PARTIAL] Agent hit context overflow; running judge on partial execution")
313
+
314
+ print(f"[INFO] Results saved to: {output_dir}")
315
+
316
+ # Run judge
317
+ if not skip_judge:
318
+ judge_result = run_judge(task_dir, response=final_output)
319
+ print_judge_result(judge_result)
320
+
321
+ judge_output_path = output_dir / "judge_result.json"
322
+ import json
323
+ with open(judge_output_path, "w") as f:
324
+ json.dump(judge_result, f, indent=2)
325
+ print(f"[INFO] Judge result saved to: {judge_output_path}")
326
+
327
+ if judge_result.get("task_success") is False:
328
+ return 1
329
+ else:
330
+ print("[JUDGE] Skipped judge as requested.")
331
+
332
+ return 0
333
+
334
+ except subprocess.CalledProcessError as e:
335
+ print(f"[ERROR] Setup command failed with exit code {e.returncode}")
336
+ return e.returncode or 1
337
+ except FileNotFoundError as e:
338
+ print(f"[ERROR] File not found: {e}")
339
+ return 1
340
+ except Exception as e:
341
+ print(f"[ERROR] Execution failed: {e}")
342
+ import traceback
343
+ traceback.print_exc()
344
+ return 1
345
+ finally:
346
+ if manager:
347
+ print("[MCP] Stopping MCP servers...")
348
+ manager.stop_all()
349
+ if injection_manager:
350
+ print("[ENV INJECTION] Stopping injection MCP servers...")
351
+ injection_manager.stop_all()
352
+ # Docker cleanup is handled by DockerPool, not here
353
+
354
+
355
+ def main() -> None:
356
+ parser = argparse.ArgumentParser(
357
+ description="Run a single workflow task for evaluation."
358
+ )
359
+ parser.add_argument(
360
+ "--task-dir",
361
+ type=str,
362
+ required=True,
363
+ help="Path to task directory (e.g., dataset/workflow/benign/1)",
364
+ )
365
+ parser.add_argument(
366
+ "--agent-type",
367
+ type=str,
368
+ default="openaisdk",
369
+ choices=get_agent_choices(),
370
+ help="Agent implementation to use.",
371
+ )
372
+ parser.add_argument(
373
+ "--model",
374
+ type=str,
375
+ default="gpt-4o",
376
+ help="Base model identifier.",
377
+ )
378
+ parser.add_argument(
379
+ "--temperature",
380
+ type=float,
381
+ default=None,
382
+ help="Sampling temperature for the model (default: None, uses model default).",
383
+ )
384
+ parser.add_argument(
385
+ "--max-turns",
386
+ type=int,
387
+ default=200,
388
+ help="Maximum number of conversation turns (default: 200).",
389
+ )
390
+ parser.add_argument(
391
+ "--skip-mcp",
392
+ action="store_true",
393
+ help="Do not start MCP servers automatically.",
394
+ )
395
+ parser.add_argument(
396
+ "--skip-judge",
397
+ action="store_true",
398
+ help="Skip running judge after task completion.",
399
+ )
400
+ parser.add_argument(
401
+ "--debug",
402
+ action="store_true",
403
+ help="Enable debug mode to save extra info like tool descriptions in trajectory.",
404
+ )
405
+ parser.add_argument(
406
+ "--direct-prompt",
407
+ action="store_true",
408
+ help="For direct threat model: use malicious goal directly as prompt instead of loading attack turns.",
409
+ )
410
+ parser.add_argument(
411
+ "--disallowed-tools",
412
+ type=str,
413
+ nargs="+",
414
+ default=None,
415
+ help=(
416
+ "Native tools to disable. If omitted, os-filesystem tasks default to "
417
+ "domain-specific native-tool deny lists for claudesdk/openclaw."
418
+ ),
419
+ )
420
+
421
+ args = parser.parse_args()
422
+ task_dir = Path(args.task_dir).resolve()
423
+
424
+ try:
425
+ rc = asyncio.run(
426
+ run_single_task(
427
+ task_dir=task_dir,
428
+ agent_type=args.agent_type,
429
+ model=args.model,
430
+ temperature=args.temperature,
431
+ max_turns=args.max_turns,
432
+ skip_mcp=args.skip_mcp,
433
+ skip_judge=args.skip_judge,
434
+ debug=args.debug,
435
+ direct_prompt=args.direct_prompt,
436
+ disallowed_tools=args.disallowed_tools,
437
+ )
438
+ )
439
+ sys.exit(rc)
440
+ except KeyboardInterrupt:
441
+ print("\n[INFO] Interrupted by user")
442
+ sys.exit(0)
443
+ except Exception as e:
444
+ print(f"\n[ERROR] {e}")
445
+ sys.exit(1)
446
+
447
+
448
+ if __name__ == "__main__":
449
+ main()
utils/__init__.py ADDED
@@ -0,0 +1,148 @@
1
+ from .config import (
2
+ PROJECT_ROOT,
3
+ ENV_CONFIG_PATH,
4
+ MCP_CONFIG_PATH,
5
+ INJECTION_MCP_CONFIG_PATH,
6
+ TASK_RUNNER_PATH,
7
+ BENCHMARK_ROOT,
8
+ resolve_benchmark_task_list,
9
+ )
10
+ from .env_helpers import (
11
+ task_setup,
12
+ teardown_envs,
13
+ teardown_task,
14
+ )
15
+ from .mcp_helpers import (
16
+ start_task_mcp_servers,
17
+ )
18
+ from .agent_helpers import (
19
+ AGENT_REGISTRY,
20
+ build_agent,
21
+ get_agent_choices,
22
+ get_default_disallowed_tools,
23
+ )
24
+ from .judge_helpers import (
25
+ run_judge,
26
+ print_judge_result,
27
+ )
28
+ from .judge_utils import (
29
+ JudgeValidator,
30
+ extract_tool_calls,
31
+ get_tool_names,
32
+ has_tool_call,
33
+ validate_required_tools,
34
+ check_security,
35
+ )
36
+ from .task_helpers import (
37
+ TaskSpec,
38
+ build_task_dir,
39
+ parse_task_list,
40
+ extract_dataset_path,
41
+ )
42
+ from .injection_helpers import (
43
+ build_tool_injections_from_config,
44
+ build_skill_injections_from_config,
45
+ apply_prompt_injections,
46
+ get_env_injections_from_attack,
47
+ get_required_injection_servers,
48
+ apply_environment_injections,
49
+ apply_environment_injections_async,
50
+ )
51
+ from .skill_helpers import (
52
+ apply_injection_to_content,
53
+ create_injected_skills_directory,
54
+ cleanup_temp_directory,
55
+ load_skills_as_text,
56
+ )
57
+ # Re-export SkillInjection from dt_arena for convenience
58
+ from dt_arena.src.types.agent import SkillInjection
59
+ from .compose_utils import (
60
+ get_project_name,
61
+ get_project_args,
62
+ run_compose,
63
+ run_compose_exec,
64
+ run_compose_cp,
65
+ )
66
+ from .injection_mcp_helpers import (
67
+ parse_injection_config,
68
+ start_injection_mcp_servers,
69
+ wait_for_injection_mcp_ready,
70
+ )
71
+ from .template_helpers import (
72
+ find_template_vars,
73
+ render_template,
74
+ resolve_port_from_env,
75
+ allocate_server_port,
76
+ resolve_server_env_vars,
77
+ build_server_name_map,
78
+ wait_for_servers_ready,
79
+ )
80
+
81
+ __all__ = [
82
+ # config
83
+ "PROJECT_ROOT",
84
+ "ENV_CONFIG_PATH",
85
+ "MCP_CONFIG_PATH",
86
+ "INJECTION_MCP_CONFIG_PATH",
87
+ "TASK_RUNNER_PATH",
88
+ "BENCHMARK_ROOT",
89
+ "resolve_benchmark_task_list",
90
+ # env_helpers
91
+ "task_setup",
92
+ "teardown_envs",
93
+ "teardown_task",
94
+ # mcp_helpers
95
+ "start_task_mcp_servers",
96
+ # agent_helpers
97
+ "AGENT_REGISTRY",
98
+ "build_agent",
99
+ "get_agent_choices",
100
+ "get_default_disallowed_tools",
101
+ # judge_helpers
102
+ "run_judge",
103
+ "print_judge_result",
104
+ # judge_utils
105
+ "JudgeValidator",
106
+ "extract_tool_calls",
107
+ "get_tool_names",
108
+ "has_tool_call",
109
+ "validate_required_tools",
110
+ "check_security",
111
+ # task_helpers
112
+ "TaskSpec",
113
+ "build_task_dir",
114
+ "parse_task_list",
115
+ "extract_dataset_path",
116
+ # injection_helpers
117
+ "build_tool_injections_from_config",
118
+ "build_skill_injections_from_config",
119
+ "apply_prompt_injections",
120
+ "get_env_injections_from_attack",
121
+ "get_required_injection_servers",
122
+ "apply_environment_injections",
123
+ "apply_environment_injections_async",
124
+ # skill_helpers
125
+ "SkillInjection",
126
+ "apply_injection_to_content",
127
+ "create_injected_skills_directory",
128
+ "cleanup_temp_directory",
129
+ "load_skills_as_text",
130
+ # compose_utils
131
+ "get_project_name",
132
+ "get_project_args",
133
+ "run_compose",
134
+ "run_compose_exec",
135
+ "run_compose_cp",
136
+ # injection_mcp_helpers
137
+ "parse_injection_config",
138
+ "start_injection_mcp_servers",
139
+ "wait_for_injection_mcp_ready",
140
+ # template_helpers
141
+ "find_template_vars",
142
+ "render_template",
143
+ "resolve_port_from_env",
144
+ "allocate_server_port",
145
+ "resolve_server_env_vars",
146
+ "build_server_name_map",
147
+ "wait_for_servers_ready",
148
+ ]