decodingtrust-agent-sdk 0.1.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (374) hide show
  1. agent/__init__.py +30 -0
  2. agent/claudesdk/__init__.py +8 -0
  3. agent/claudesdk/example.py +221 -0
  4. agent/claudesdk/src/__init__.py +8 -0
  5. agent/claudesdk/src/agent.py +400 -0
  6. agent/claudesdk/src/mcp_proxy.py +409 -0
  7. agent/claudesdk/src/utils.py +420 -0
  8. agent/googleadk/__init__.py +15 -0
  9. agent/googleadk/example.py +237 -0
  10. agent/googleadk/src/__init__.py +12 -0
  11. agent/googleadk/src/agent.py +401 -0
  12. agent/googleadk/src/mcp_wrapper.py +163 -0
  13. agent/googleadk/src/utils.py +602 -0
  14. agent/langchain/__init__.py +8 -0
  15. agent/langchain/example.py +213 -0
  16. agent/langchain/src/__init__.py +8 -0
  17. agent/langchain/src/agent.py +645 -0
  18. agent/langchain/src/utils.py +433 -0
  19. agent/openaisdk/__init__.py +17 -0
  20. agent/openaisdk/example.py +228 -0
  21. agent/openaisdk/src/__init__.py +12 -0
  22. agent/openaisdk/src/agent.py +491 -0
  23. agent/openaisdk/src/agent_wrapper.py +143 -0
  24. agent/openaisdk/src/mcp_wrapper.py +395 -0
  25. agent/openaisdk/src/utils.py +493 -0
  26. agent/openclaw/__init__.py +10 -0
  27. agent/openclaw/example.py +251 -0
  28. agent/openclaw/src/__init__.py +14 -0
  29. agent/openclaw/src/agent.py +930 -0
  30. agent/openclaw/src/helpers/__init__.py +1 -0
  31. agent/openclaw/src/helpers/auth_helpers.py +55 -0
  32. agent/openclaw/src/mcp_proxy.py +564 -0
  33. agent/openclaw/src/plugin_generator.py +231 -0
  34. agent/openclaw/src/utils.py +341 -0
  35. agent/pocketflow/__init__.py +18 -0
  36. agent/pocketflow/example.py +221 -0
  37. agent/pocketflow/prompts/react_agent.py +46 -0
  38. agent/pocketflow/src/__init__.py +6 -0
  39. agent/pocketflow/src/agent.py +507 -0
  40. agent/pocketflow/src/agent_wrapper.py +159 -0
  41. agent/pocketflow/src/async_helper.py +92 -0
  42. agent/pocketflow/src/mcp_react_agent.py +279 -0
  43. agent/pocketflow/src/native_agent.py +74 -0
  44. agent/pocketflow/src/nodes.py +467 -0
  45. benchmark/__init__.py +0 -0
  46. benchmark/browser/benign.jsonl +34 -0
  47. benchmark/browser/direct.jsonl +85 -0
  48. benchmark/browser/indirect.jsonl +82 -0
  49. benchmark/code/benign.jsonl +0 -0
  50. benchmark/code/direct.jsonl +121 -0
  51. benchmark/code/indirect.jsonl +165 -0
  52. benchmark/crm/benign.jsonl +165 -0
  53. benchmark/crm/direct.jsonl +90 -0
  54. benchmark/crm/indirect.jsonl +150 -0
  55. benchmark/customer-service/benign.jsonl +160 -0
  56. benchmark/customer-service/direct.jsonl +100 -0
  57. benchmark/customer-service/indirect.jsonl +101 -0
  58. benchmark/finance/benign.jsonl +0 -0
  59. benchmark/finance/direct.jsonl +200 -0
  60. benchmark/finance/indirect.jsonl +200 -0
  61. benchmark/legal/benign.jsonl +0 -0
  62. benchmark/legal/direct.jsonl +200 -0
  63. benchmark/legal/indirect.jsonl +200 -0
  64. benchmark/macos/benign.jsonl +30 -0
  65. benchmark/macos/direct.jsonl +50 -0
  66. benchmark/macos/indirect.jsonl +50 -0
  67. benchmark/medical/benign.jsonl +642 -0
  68. benchmark/medical/direct.jsonl +229 -0
  69. benchmark/medical/indirect.jsonl +222 -0
  70. benchmark/os-filesystem/benign.jsonl +200 -0
  71. benchmark/os-filesystem/direct.jsonl +200 -0
  72. benchmark/os-filesystem/indirect.jsonl +200 -0
  73. benchmark/research/benign.jsonl +0 -0
  74. benchmark/research/direct.jsonl +119 -0
  75. benchmark/research/indirect.jsonl +125 -0
  76. benchmark/telecom/benign.jsonl +120 -0
  77. benchmark/telecom/direct.jsonl +161 -0
  78. benchmark/telecom/indirect.jsonl +166 -0
  79. benchmark/travel/benign.jsonl +130 -0
  80. benchmark/travel/direct.jsonl +105 -0
  81. benchmark/travel/indirect.jsonl +120 -0
  82. benchmark/windows/benign.jsonl +100 -0
  83. benchmark/windows/direct.jsonl +140 -0
  84. benchmark/windows/indirect.jsonl +107 -0
  85. benchmark/workflow/benign.jsonl +335 -0
  86. benchmark/workflow/direct.jsonl +78 -0
  87. benchmark/workflow/indirect.jsonl +107 -0
  88. cli/__init__.py +5 -0
  89. cli/main.py +182 -0
  90. cli/scaffold.py +334 -0
  91. decodingtrust_agent_sdk-0.1.0.dist-info/METADATA +642 -0
  92. decodingtrust_agent_sdk-0.1.0.dist-info/RECORD +374 -0
  93. decodingtrust_agent_sdk-0.1.0.dist-info/WHEEL +5 -0
  94. decodingtrust_agent_sdk-0.1.0.dist-info/entry_points.txt +2 -0
  95. decodingtrust_agent_sdk-0.1.0.dist-info/licenses/LICENSE +201 -0
  96. decodingtrust_agent_sdk-0.1.0.dist-info/top_level.txt +6 -0
  97. dt_arena/config/env.yaml +515 -0
  98. dt_arena/config/injection_mcp.yaml +430 -0
  99. dt_arena/config/mcp.yaml +642 -0
  100. dt_arena/envs/arxiv/docker-compose-hub.yml +31 -0
  101. dt_arena/envs/arxiv/docker-compose.yml +36 -0
  102. dt_arena/envs/atlassian/docker/docker-compose.dev.yml +65 -0
  103. dt_arena/envs/atlassian/docker/docker-compose.yml +53 -0
  104. dt_arena/envs/atlassian/docker-compose-hub.yml +57 -0
  105. dt_arena/envs/atlassian/docker-compose.yml +72 -0
  106. dt_arena/envs/bigquery/docker-compose.yml +20 -0
  107. dt_arena/envs/booking/docker-compose.yml +59 -0
  108. dt_arena/envs/calendar/docker-compose-hub.yml +30 -0
  109. dt_arena/envs/calendar/docker-compose.yml +42 -0
  110. dt_arena/envs/custom-website/docker-compose.yml +6 -0
  111. dt_arena/envs/customer_service/docker-compose.yml +59 -0
  112. dt_arena/envs/databricks/docker-compose-hub.yml +47 -0
  113. dt_arena/envs/databricks/docker-compose.yml +51 -0
  114. dt_arena/envs/ecommerce/docker-compose.yml +6 -0
  115. dt_arena/envs/ers/docker-compose.yml +36 -0
  116. dt_arena/envs/ers/hrms/docker/docker-compose.yml +31 -0
  117. dt_arena/envs/finance/docker-compose.yml +23 -0
  118. dt_arena/envs/github/docker/docker-compose-hub.yml +50 -0
  119. dt_arena/envs/github/docker/docker-compose.yml +50 -0
  120. dt_arena/envs/gmail/docker-compose-hub.yml +51 -0
  121. dt_arena/envs/gmail/docker-compose.yml +65 -0
  122. dt_arena/envs/google-form/docker-compose-hub.yml +33 -0
  123. dt_arena/envs/google-form/docker-compose.yml +41 -0
  124. dt_arena/envs/googledocs/docker-compose-hub.yml +61 -0
  125. dt_arena/envs/googledocs/docker-compose.yml +78 -0
  126. dt_arena/envs/hospital/docker-compose-hub.yml +25 -0
  127. dt_arena/envs/hospital/docker-compose.yml +27 -0
  128. dt_arena/envs/legal/docker-compose.yml +22 -0
  129. dt_arena/envs/linkedin/docker-compose.yml +63 -0
  130. dt_arena/envs/macos/docker-compose.yml +79 -0
  131. dt_arena/envs/os-filesystem/docker-compose-hub.yml +16 -0
  132. dt_arena/envs/os-filesystem/docker-compose.yml +20 -0
  133. dt_arena/envs/paypal/docker-compose-hub.yml +48 -0
  134. dt_arena/envs/paypal/docker-compose.yml +63 -0
  135. dt_arena/envs/research/docker-compose-hub.yml +13 -0
  136. dt_arena/envs/research/docker-compose.yml +24 -0
  137. dt_arena/envs/salesforce_crm/docker-compose-hub.yaml +45 -0
  138. dt_arena/envs/salesforce_crm/docker-compose.yaml +49 -0
  139. dt_arena/envs/slack/docker-compose-hub.yml +28 -0
  140. dt_arena/envs/slack/docker-compose.yml +41 -0
  141. dt_arena/envs/snowflake/docker-compose-hub.yml +41 -0
  142. dt_arena/envs/snowflake/docker-compose.yml +44 -0
  143. dt_arena/envs/telecom/docker-compose-hub.yml +16 -0
  144. dt_arena/envs/telecom/docker-compose.yml +17 -0
  145. dt_arena/envs/telegram/docker-compose-hub.yml +57 -0
  146. dt_arena/envs/telegram/docker-compose.yml +62 -0
  147. dt_arena/envs/terminal/docker-compose-hub.yml +12 -0
  148. dt_arena/envs/terminal/docker-compose.yml +26 -0
  149. dt_arena/envs/travel/docker-compose-hub.yml +19 -0
  150. dt_arena/envs/travel/docker-compose.yml +19 -0
  151. dt_arena/envs/whatsapp/docker-compose-hub.yml +61 -0
  152. dt_arena/envs/whatsapp/docker-compose.yml +78 -0
  153. dt_arena/envs/windows/docker-compose.yml +71 -0
  154. dt_arena/envs/zoom/docker-compose-hub.yml +27 -0
  155. dt_arena/envs/zoom/docker-compose.yml +40 -0
  156. dt_arena/injection_mcp_server/atlassian/env_injection.py +134 -0
  157. dt_arena/injection_mcp_server/calendar/env_injection.py +217 -0
  158. dt_arena/injection_mcp_server/custom_website/env_injection.py +97 -0
  159. dt_arena/injection_mcp_server/customer_service/env_injection.py +659 -0
  160. dt_arena/injection_mcp_server/databricks/env_injection.py +255 -0
  161. dt_arena/injection_mcp_server/ecommerce/env_injection.py +110 -0
  162. dt_arena/injection_mcp_server/finance/env_injection.py +85 -0
  163. dt_arena/injection_mcp_server/github/env_injection.py +206 -0
  164. dt_arena/injection_mcp_server/gmail/env_injection.py +211 -0
  165. dt_arena/injection_mcp_server/google_form/env_injection.py +186 -0
  166. dt_arena/injection_mcp_server/googledocs/env_injection.py +44 -0
  167. dt_arena/injection_mcp_server/hospital/env_injection.py +43 -0
  168. dt_arena/injection_mcp_server/legal/env_injection.py +229 -0
  169. dt_arena/injection_mcp_server/macos/env_injection.py +272 -0
  170. dt_arena/injection_mcp_server/os-filesystem/env_injection.py +341 -0
  171. dt_arena/injection_mcp_server/paypal/env_injection.py +268 -0
  172. dt_arena/injection_mcp_server/research/env_injection.py +616 -0
  173. dt_arena/injection_mcp_server/salesforce/env_injection.py +514 -0
  174. dt_arena/injection_mcp_server/slack/env_injection.py +265 -0
  175. dt_arena/injection_mcp_server/snowflake/env_injection.py +230 -0
  176. dt_arena/injection_mcp_server/telecom/env_injection.py +503 -0
  177. dt_arena/injection_mcp_server/telegram/env_injection.py +171 -0
  178. dt_arena/injection_mcp_server/terminal/env_injection.py +523 -0
  179. dt_arena/injection_mcp_server/travel/env_injection.py +173 -0
  180. dt_arena/injection_mcp_server/whatsapp/env_injection.py +185 -0
  181. dt_arena/injection_mcp_server/windows/env_injection.py +943 -0
  182. dt_arena/injection_mcp_server/zoom/env_injection.py +216 -0
  183. dt_arena/mcp_server/atlassian/main.py +1554 -0
  184. dt_arena/mcp_server/atlassian/test_server.py +66 -0
  185. dt_arena/mcp_server/bigquery/main.py +333 -0
  186. dt_arena/mcp_server/booking/main.py +310 -0
  187. dt_arena/mcp_server/browser/main.py +1741 -0
  188. dt_arena/mcp_server/calendar/example_multi_user.py +162 -0
  189. dt_arena/mcp_server/calendar/main.py +792 -0
  190. dt_arena/mcp_server/calendar/test_mcp.py +135 -0
  191. dt_arena/mcp_server/customer_service/main.py +1063 -0
  192. dt_arena/mcp_server/databricks/main.py +566 -0
  193. dt_arena/mcp_server/databricks/probe.py +102 -0
  194. dt_arena/mcp_server/ers/main.py +845 -0
  195. dt_arena/mcp_server/finance/__init__.py +87 -0
  196. dt_arena/mcp_server/finance/core/__init__.py +12 -0
  197. dt_arena/mcp_server/finance/core/data_loader.py +558 -0
  198. dt_arena/mcp_server/finance/core/portfolio.py +565 -0
  199. dt_arena/mcp_server/finance/evaluation/__init__.py +20 -0
  200. dt_arena/mcp_server/finance/evaluation/evaluator.py +217 -0
  201. dt_arena/mcp_server/finance/evaluation/logger.py +137 -0
  202. dt_arena/mcp_server/finance/injection/__init__.py +66 -0
  203. dt_arena/mcp_server/finance/injection/config.py +176 -0
  204. dt_arena/mcp_server/finance/injection/content.py +755 -0
  205. dt_arena/mcp_server/finance/injection/html.py +409 -0
  206. dt_arena/mcp_server/finance/injection/locations.py +167 -0
  207. dt_arena/mcp_server/finance/injection/methods.py +193 -0
  208. dt_arena/mcp_server/finance/injection/presets.py +1023 -0
  209. dt_arena/mcp_server/finance/main.py +361 -0
  210. dt_arena/mcp_server/finance/run_mcp.py +21 -0
  211. dt_arena/mcp_server/finance/run_web.py +26 -0
  212. dt_arena/mcp_server/finance/server/__init__.py +41 -0
  213. dt_arena/mcp_server/finance/server/extractor.py +1453 -0
  214. dt_arena/mcp_server/finance/server/extractor_minimal.py +292 -0
  215. dt_arena/mcp_server/finance/server/extractor_simple.py +1164 -0
  216. dt_arena/mcp_server/finance/server/injection_mcp.py +865 -0
  217. dt_arena/mcp_server/finance/server/mcp.py +451 -0
  218. dt_arena/mcp_server/finance/server/tools/__init__.py +23 -0
  219. dt_arena/mcp_server/finance/server/tools/account.py +88 -0
  220. dt_arena/mcp_server/finance/server/tools/browsing.py +328 -0
  221. dt_arena/mcp_server/finance/server/tools/social.py +73 -0
  222. dt_arena/mcp_server/finance/server/tools/trading.py +242 -0
  223. dt_arena/mcp_server/finance/server/tools/utility.py +49 -0
  224. dt_arena/mcp_server/finance/server/web.py +2139 -0
  225. dt_arena/mcp_server/finance/tasks/benchmark/__init__.py +28 -0
  226. dt_arena/mcp_server/finance/tasks/benchmark/attack_pool.py +3026 -0
  227. dt_arena/mcp_server/finance/tasks/benchmark/attack_runner.py +1315 -0
  228. dt_arena/mcp_server/finance/tasks/benchmark/finra_requirements.py +1335 -0
  229. dt_arena/mcp_server/finance/tasks/benchmark/finra_tasks.py +3665 -0
  230. dt_arena/mcp_server/finance/tasks/benchmark/malicious_tasks.py +2673 -0
  231. dt_arena/mcp_server/finance/tasks/redteam_suite/run_redteam_suite.py +1713 -0
  232. dt_arena/mcp_server/finance/test_mcp_tools.py +476 -0
  233. dt_arena/mcp_server/github/main.py +441 -0
  234. dt_arena/mcp_server/gmail/main.py +1004 -0
  235. dt_arena/mcp_server/google_form/main.py +141 -0
  236. dt_arena/mcp_server/googledocs/main.py +458 -0
  237. dt_arena/mcp_server/hospital/mcp_server.py +458 -0
  238. dt_arena/mcp_server/legal/__init__.py +9 -0
  239. dt_arena/mcp_server/legal/core/__init__.py +14 -0
  240. dt_arena/mcp_server/legal/core/courtlistener_store.py +762 -0
  241. dt_arena/mcp_server/legal/core/data_loader.py +266 -0
  242. dt_arena/mcp_server/legal/core/document_store.py +197 -0
  243. dt_arena/mcp_server/legal/core/matter_manager.py +466 -0
  244. dt_arena/mcp_server/legal/main.py +89 -0
  245. dt_arena/mcp_server/legal/scripts/collect_data.py +988 -0
  246. dt_arena/mcp_server/legal/server/__init__.py +14 -0
  247. dt_arena/mcp_server/legal/server/mcp.py +2330 -0
  248. dt_arena/mcp_server/macos/client_test.py +270 -0
  249. dt_arena/mcp_server/macos/mcp_server.py +285 -0
  250. dt_arena/mcp_server/os-filesystem/main.py +1380 -0
  251. dt_arena/mcp_server/paypal/main.py +501 -0
  252. dt_arena/mcp_server/research/main.py +777 -0
  253. dt_arena/mcp_server/salesforce/main.py +2006 -0
  254. dt_arena/mcp_server/slack/main.py +318 -0
  255. dt_arena/mcp_server/snowflake/main.py +612 -0
  256. dt_arena/mcp_server/snowflake/probe.py +183 -0
  257. dt_arena/mcp_server/telecom/mcp_client.py +423 -0
  258. dt_arena/mcp_server/telecom/mcp_server.py +1059 -0
  259. dt_arena/mcp_server/telegram/main.py +338 -0
  260. dt_arena/mcp_server/terminal/main.py +163 -0
  261. dt_arena/mcp_server/travel/client_test.py +16 -0
  262. dt_arena/mcp_server/travel/mcp_server.py +404 -0
  263. dt_arena/mcp_server/whatsapp/main.py +318 -0
  264. dt_arena/mcp_server/windows/client_test.py +270 -0
  265. dt_arena/mcp_server/windows/mcp_server.py +218 -0
  266. dt_arena/mcp_server/zoom/main.py +466 -0
  267. dt_arena/src/__init__.py +0 -0
  268. dt_arena/src/hooks/__init__.py +0 -0
  269. dt_arena/src/hooks/audit_log.py +30 -0
  270. dt_arena/src/hooks/hooks.json +3 -0
  271. dt_arena/src/run_benign.py +142 -0
  272. dt_arena/src/types/__init__.py +0 -0
  273. dt_arena/src/types/agent.py +441 -0
  274. dt_arena/src/types/attacks.py +2 -0
  275. dt_arena/src/types/environment.py +2 -0
  276. dt_arena/src/types/hooks.py +174 -0
  277. dt_arena/src/types/judge.py +52 -0
  278. dt_arena/src/types/red_teaming_trajectory.py +385 -0
  279. dt_arena/src/types/task.py +260 -0
  280. dt_arena/src/types/trajectory.py +315 -0
  281. dt_arena/utils/__init__.py +1 -0
  282. dt_arena/utils/atlassian/__init__.py +27 -0
  283. dt_arena/utils/atlassian/helpers.py +520 -0
  284. dt_arena/utils/bigquery/__init__.py +1 -0
  285. dt_arena/utils/bigquery/helpers.py +246 -0
  286. dt_arena/utils/calendar/__init__.py +1 -0
  287. dt_arena/utils/calendar/helpers.py +87 -0
  288. dt_arena/utils/customer_service/__init__.py +17 -0
  289. dt_arena/utils/customer_service/cs_env_client.py +940 -0
  290. dt_arena/utils/customer_service/helpers.py +339 -0
  291. dt_arena/utils/customer_service/judges/__init__.py +20 -0
  292. dt_arena/utils/customer_service/judges/abusive_comms.py +149 -0
  293. dt_arena/utils/customer_service/judges/bulk_destructive.py +202 -0
  294. dt_arena/utils/customer_service/judges/cross_customer.py +481 -0
  295. dt_arena/utils/customer_service/judges/email_exfiltration.py +320 -0
  296. dt_arena/utils/customer_service/judges/financial_fraud.py +221 -0
  297. dt_arena/utils/customer_service/judges/llm_utils.py +264 -0
  298. dt_arena/utils/customer_service/judges/pii_disclosure.py +837 -0
  299. dt_arena/utils/customer_service/judges/policy_misrep.py +44 -0
  300. dt_arena/utils/customer_service/judges/text_utils.py +21 -0
  301. dt_arena/utils/databricks/__init__.py +2 -0
  302. dt_arena/utils/databricks/helpers.py +210 -0
  303. dt_arena/utils/finance/__init__.py +0 -0
  304. dt_arena/utils/finance/helpers.py +263 -0
  305. dt_arena/utils/github/__init__.py +1 -0
  306. dt_arena/utils/github/helpers.py +249 -0
  307. dt_arena/utils/gmail/__init__.py +1 -0
  308. dt_arena/utils/gmail/helpers.py +344 -0
  309. dt_arena/utils/google_form/__init__.py +2 -0
  310. dt_arena/utils/google_form/helpers.py +133 -0
  311. dt_arena/utils/legal/__init__.py +0 -0
  312. dt_arena/utils/legal/helpers.py +228 -0
  313. dt_arena/utils/macos/__init__.py +0 -0
  314. dt_arena/utils/macos/env_setup.py +215 -0
  315. dt_arena/utils/macos/helpers.py +61 -0
  316. dt_arena/utils/os_filesystem/__init__.py +1 -0
  317. dt_arena/utils/os_filesystem/helpers.py +366 -0
  318. dt_arena/utils/paypal/__init__.py +1 -0
  319. dt_arena/utils/paypal/helpers.py +178 -0
  320. dt_arena/utils/port_allocator.py +266 -0
  321. dt_arena/utils/research/__init__.py +0 -0
  322. dt_arena/utils/research/helpers.py +251 -0
  323. dt_arena/utils/salesforce/__init__.py +1 -0
  324. dt_arena/utils/salesforce/helpers.py +719 -0
  325. dt_arena/utils/slack/__init__.py +1 -0
  326. dt_arena/utils/slack/helpers.py +176 -0
  327. dt_arena/utils/snowflake/__init__.py +1 -0
  328. dt_arena/utils/snowflake/helpers.py +166 -0
  329. dt_arena/utils/telecom/__init__.py +1 -0
  330. dt_arena/utils/telecom/helpers.py +760 -0
  331. dt_arena/utils/telegram/__init__.py +0 -0
  332. dt_arena/utils/telegram/helpers.py +174 -0
  333. dt_arena/utils/terminal/__init__.py +0 -0
  334. dt_arena/utils/terminal/helpers.py +20 -0
  335. dt_arena/utils/travel/__init__.py +0 -0
  336. dt_arena/utils/travel/env_client.py +537 -0
  337. dt_arena/utils/travel/llm_judge.py +137 -0
  338. dt_arena/utils/travel/prompts.py +64 -0
  339. dt_arena/utils/utils/__init__.py +122 -0
  340. dt_arena/utils/whatsapp/__init__.py +0 -0
  341. dt_arena/utils/whatsapp/helpers.py +226 -0
  342. dt_arena/utils/windows/__init__.py +0 -0
  343. dt_arena/utils/windows/env_reset.py +224 -0
  344. dt_arena/utils/windows/env_setup.py +280 -0
  345. dt_arena/utils/windows/exfil_helpers.py +170 -0
  346. dt_arena/utils/windows/helpers.py +74 -0
  347. dt_arena/utils/zoom/__init__.py +1 -0
  348. dt_arena/utils/zoom/helpers.py +70 -0
  349. eval/__init__.py +1 -0
  350. eval/evaluation.py +426 -0
  351. eval/task_runner.py +449 -0
  352. utils/__init__.py +148 -0
  353. utils/agent_helpers.py +308 -0
  354. utils/agent_wrapper.py +189 -0
  355. utils/compose_utils.py +135 -0
  356. utils/config.py +77 -0
  357. utils/env_helpers.py +104 -0
  358. utils/eval_stats.py +88 -0
  359. utils/injection_helpers.py +429 -0
  360. utils/injection_mcp_helpers.py +152 -0
  361. utils/judge_helpers.py +181 -0
  362. utils/judge_utils.py +472 -0
  363. utils/llm.py +196 -0
  364. utils/logging.py +45 -0
  365. utils/mcp_helpers.py +232 -0
  366. utils/mcp_manager.py +235 -0
  367. utils/memory_guard.py +18 -0
  368. utils/red_teaming_sandbox.py +476 -0
  369. utils/reset_helpers.py +318 -0
  370. utils/resource_manager.py +370 -0
  371. utils/skill_helpers.py +447 -0
  372. utils/task_executor.py +904 -0
  373. utils/task_helpers.py +270 -0
  374. utils/template_helpers.py +179 -0
@@ -0,0 +1,152 @@
1
+ from typing import Any, Dict, Optional, Tuple
2
+
3
+ from .config import INJECTION_MCP_CONFIG_PATH
4
+ from .mcp_helpers import _start_mcp_servers_impl
5
+ from .template_helpers import wait_for_servers_ready
6
+ from .mcp_manager import MCPServerManager
7
+
8
+
9
+ def parse_injection_config(config_data: Dict[str, Any]) -> Dict[str, Any]:
10
+ """Parse RedTeamingAgent injection configuration from config.yaml.
11
+
12
+ Args:
13
+ config_data: The full config data loaded from config.yaml
14
+
15
+ Returns:
16
+ Dict with parsed injection configuration:
17
+ {
18
+ "prompt_enabled": bool,
19
+ "tool_enabled": bool,
20
+ "environment_enabled": bool,
21
+ "skill_enabled": bool,
22
+ "environment_servers": {
23
+ "server_name": ["tool1", "tool2"] or "all"
24
+ }
25
+ }
26
+ """
27
+ result = {
28
+ "prompt_enabled": False,
29
+ "tool_enabled": False,
30
+ "environment_enabled": False,
31
+ "skill_enabled": False,
32
+ "skill_modes": [], # List of allowed modes: "insert", "append", "create"
33
+ "environment_servers": {},
34
+ }
35
+
36
+ rt_config = config_data.get("RedTeamingAgent", {})
37
+ available = rt_config.get("available_injections", {})
38
+
39
+ # Parse prompt and tool (simple bool)
40
+ result["prompt_enabled"] = bool(available.get("prompt", False))
41
+ result["tool_enabled"] = bool(available.get("tool", False))
42
+
43
+ # Parse skill - can be bool or dict with mode constraints
44
+ skill_config = available.get("skill", False)
45
+ if isinstance(skill_config, bool):
46
+ # Simple bool: enable all modes
47
+ result["skill_enabled"] = skill_config
48
+ if skill_config:
49
+ result["skill_modes"] = ["insert", "append", "create"]
50
+ elif isinstance(skill_config, dict):
51
+ # Dict with mode constraints
52
+ allowed_modes = []
53
+ if skill_config.get("insert", False):
54
+ allowed_modes.append("insert")
55
+ if skill_config.get("append", False):
56
+ allowed_modes.append("append")
57
+ if skill_config.get("create", False):
58
+ allowed_modes.append("create")
59
+ result["skill_enabled"] = len(allowed_modes) > 0
60
+ result["skill_modes"] = allowed_modes
61
+ else:
62
+ result["skill_enabled"] = False
63
+ result["skill_modes"] = []
64
+
65
+ # Parse environment enabled flag
66
+ result["environment_enabled"] = bool(available.get("environment", False))
67
+
68
+ # Parse env_injection_config (separate section for server configurations)
69
+ env_injection_config = rt_config.get("env_injection_config", {})
70
+ if env_injection_config:
71
+ result["environment_servers"] = env_injection_config
72
+
73
+ return result
74
+
75
+
76
+ def start_injection_mcp_servers(
77
+ injection_config: Dict[str, Any],
78
+ resource_manager: Any = None,
79
+ task_id: Optional[str] = None,
80
+ host: str = "127.0.0.1"
81
+ ) -> Tuple[Optional[MCPServerManager], Dict[str, Any]]:
82
+ """Set up and start environment injection MCP servers.
83
+
84
+ Args:
85
+ injection_config: Injection configuration with environment_servers dict
86
+ resource_manager: ResourceManager for port allocation (optional)
87
+ task_id: Unique task identifier (optional)
88
+ host: Host address for MCP server URLs (default: 127.0.0.1)
89
+
90
+ Returns:
91
+ Tuple of (MCPServerManager instance or None, updated injection_config)
92
+ """
93
+ # Extract server configs from injection_config
94
+ server_configs = injection_config.get("environment_servers", {})
95
+
96
+ if not server_configs:
97
+ return None, injection_config
98
+
99
+ if not INJECTION_MCP_CONFIG_PATH.exists():
100
+ print(f"[WARN] Injection MCP config not found: {INJECTION_MCP_CONFIG_PATH}")
101
+ return None, injection_config
102
+
103
+ # Build server list from config
104
+ server_list = list(server_configs.keys())
105
+
106
+ manager, server_urls = _start_mcp_servers_impl(
107
+ config_path=str(INJECTION_MCP_CONFIG_PATH),
108
+ server_list=server_list,
109
+ resource_mgr=resource_manager,
110
+ task_id=task_id or "injection",
111
+ host=host,
112
+ prefix="injection",
113
+ log_prefix="[INJECTION MCP]",
114
+ )
115
+
116
+ # Build updated environment_servers with URLs embedded
117
+ updated_servers: Dict[str, Dict[str, Any]] = {}
118
+ for server_name, tools in server_configs.items():
119
+ updated_servers[server_name] = {
120
+ "tools": tools,
121
+ "url": server_urls.get(server_name, ""),
122
+ }
123
+
124
+ # Update injection_config with URLs
125
+ injection_config["environment_servers"] = updated_servers
126
+ return manager, injection_config
127
+
128
+
129
+ def wait_for_injection_mcp_ready(
130
+ injection_config: Dict[str, Any],
131
+ timeout: float = 30.0,
132
+ interval: float = 0.5
133
+ ) -> None:
134
+ """Wait for injection MCP servers to be ready.
135
+
136
+ Args:
137
+ injection_config: The injection configuration dict with environment_servers
138
+ containing {"server_name": {"url": "http://...", "tools": ...}}
139
+ timeout: Maximum time to wait in seconds
140
+ interval: Polling interval in seconds
141
+ """
142
+ env_servers = injection_config.get("environment_servers", {})
143
+ if not env_servers:
144
+ return
145
+
146
+ # Extract URLs from the new format
147
+ server_urls: Dict[str, str] = {}
148
+ for name, server_info in env_servers.items():
149
+ if isinstance(server_info, dict) and "url" in server_info:
150
+ server_urls[name] = server_info["url"]
151
+
152
+ wait_for_servers_ready(server_urls, timeout, interval)
utils/judge_helpers.py ADDED
@@ -0,0 +1,181 @@
1
+ import importlib.util
2
+ import sys
3
+ from pathlib import Path
4
+ from typing import Optional, Dict, Any, List
5
+
6
+
7
+ def load_judge_module(task_dir: Path):
8
+ """
9
+ Dynamically load the judge.py module from a task directory.
10
+
11
+ Args:
12
+ task_dir: Path to task directory containing judge.py
13
+
14
+ Returns:
15
+ Loaded module or None if not found
16
+ """
17
+ judge_path = task_dir / "judge.py"
18
+ if not judge_path.exists():
19
+ return None
20
+
21
+ unique_name = f"task_judge_{task_dir.name}"
22
+ spec = importlib.util.spec_from_file_location(unique_name, str(judge_path))
23
+ if spec is None or spec.loader is None:
24
+ return None
25
+
26
+ module = importlib.util.module_from_spec(spec)
27
+
28
+ root_dir = task_dir
29
+ for _ in range(6):
30
+ root_dir = root_dir.parent
31
+ if (root_dir / "dt_arena" / "src" / "types").exists():
32
+ break
33
+
34
+ types_path = str(root_dir / "dt_arena" / "src" / "types")
35
+ root_path = str(root_dir)
36
+
37
+ sys.path.insert(0, types_path)
38
+ sys.path.insert(1, root_path)
39
+
40
+ try:
41
+ spec.loader.exec_module(module)
42
+ finally:
43
+ # Clean up added paths
44
+ if types_path in sys.path:
45
+ sys.path.remove(types_path)
46
+ if root_path in sys.path:
47
+ sys.path.remove(root_path)
48
+
49
+ return module
50
+
51
+
52
+ def run_judge(
53
+ task_dir: Path,
54
+ response: str = "",
55
+ *,
56
+ check_task: bool = True,
57
+ check_attack: bool = True,
58
+ ) -> Dict[str, Any]:
59
+ """
60
+ Run the judge for a task.
61
+
62
+ Supports the class-based Judge interface with eval_task() and eval_attack() methods.
63
+ The Judge class is expected to have:
64
+ - eval_task(agent_responses: List[str]) -> Tuple[bool, Dict]
65
+ - eval_attack(agent_responses: List[str]) -> Optional[Tuple[bool, Dict]]
66
+
67
+ Note:
68
+ DEPRECATED: The `trajectory` parameter has been removed.
69
+ This parameter is no longer supported.
70
+
71
+ Args:
72
+ task_dir: Path to task directory
73
+ response: Agent's final response/output
74
+ check_task: Whether to run eval_task check
75
+ check_attack: Whether to run eval_attack check
76
+
77
+ Returns:
78
+ Dict with judge results:
79
+ {
80
+ "task_success": bool or None,
81
+ "task_message": str,
82
+ "task_metadata": dict,
83
+ "attack_success": bool or None,
84
+ "attack_message": str,
85
+ "attack_metadata": dict,
86
+ "error": str or None
87
+ }
88
+ """
89
+ result = {
90
+ "task_success": None,
91
+ "task_message": "",
92
+ "task_metadata": {},
93
+ "attack_success": None,
94
+ "attack_message": "",
95
+ "attack_metadata": {},
96
+ "error": None,
97
+ }
98
+
99
+ try:
100
+ module = load_judge_module(task_dir)
101
+ if module is None:
102
+ result["error"] = f"judge.py not found in {task_dir}"
103
+ return result
104
+
105
+ # Find the Judge class in the module
106
+ judge_class = None
107
+ for name in dir(module):
108
+ obj = getattr(module, name)
109
+ if (isinstance(obj, type) and
110
+ name == "Judge" and
111
+ hasattr(obj, "eval_task") and
112
+ hasattr(obj, "eval_attack")):
113
+ judge_class = obj
114
+ break
115
+
116
+ if judge_class is None:
117
+ result["error"] = f"Judge class with eval_task/eval_attack not found in {task_dir}/judge.py"
118
+ return result
119
+
120
+ # Instantiate the judge
121
+ judge = judge_class()
122
+
123
+ # Prepare agent_responses as a list
124
+ agent_responses: List[str] = [response] if isinstance(response, str) else list(response)
125
+
126
+ # Run eval_task check
127
+ if check_task:
128
+ try:
129
+ success, metadata = judge.eval_task(agent_responses)
130
+ result["task_success"] = success
131
+ result["task_metadata"] = metadata
132
+ result["task_message"] = metadata.get("message", "")
133
+ except Exception as e:
134
+ result["task_success"] = False
135
+ result["task_message"] = f"Error running eval_task: {e}"
136
+
137
+ # Run eval_attack check
138
+ if check_attack:
139
+ try:
140
+ attack_result = judge.eval_attack(agent_responses)
141
+ if attack_result is not None:
142
+ success, metadata = attack_result
143
+ result["attack_success"] = success
144
+ result["attack_metadata"] = metadata
145
+ result["attack_message"] = metadata.get("message", "")
146
+ # If None, attack evaluation is not applicable (benign task)
147
+ except Exception as e:
148
+ result["attack_success"] = False
149
+ result["attack_message"] = f"Error running eval_attack: {e}"
150
+
151
+ except Exception as e:
152
+ result["error"] = f"Failed to load/run judge: {e}"
153
+
154
+ return result
155
+
156
+
157
+ def print_judge_result(result: Dict[str, Any]) -> None:
158
+ """Pretty print judge results."""
159
+ print("\n" + "=" * 80)
160
+ print("[JUDGE RESULTS]")
161
+ print("=" * 80)
162
+
163
+ if result.get("error"):
164
+ print(f"❌ Judge Error: {result['error']}")
165
+ return
166
+
167
+ # Task success
168
+ if result["task_success"] is not None:
169
+ icon = "✅" if result["task_success"] else "❌"
170
+ print(f"{icon} Task Success: {result['task_success']}")
171
+ print(f" Message: {result['task_message']}")
172
+
173
+ # Attack success (for adversarial tasks)
174
+ if result["attack_success"] is not None:
175
+ icon = "⚠️" if result["attack_success"] else "🛡️"
176
+ status = "ATTACK SUCCEEDED" if result["attack_success"] else "ATTACK BLOCKED"
177
+ print(f"{icon} Attack Check: {status}")
178
+ print(f" Message: {result['attack_message']}")
179
+
180
+ print("=" * 80)
181
+