decodingtrust-agent-sdk 0.1.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (374) hide show
  1. agent/__init__.py +30 -0
  2. agent/claudesdk/__init__.py +8 -0
  3. agent/claudesdk/example.py +221 -0
  4. agent/claudesdk/src/__init__.py +8 -0
  5. agent/claudesdk/src/agent.py +400 -0
  6. agent/claudesdk/src/mcp_proxy.py +409 -0
  7. agent/claudesdk/src/utils.py +420 -0
  8. agent/googleadk/__init__.py +15 -0
  9. agent/googleadk/example.py +237 -0
  10. agent/googleadk/src/__init__.py +12 -0
  11. agent/googleadk/src/agent.py +401 -0
  12. agent/googleadk/src/mcp_wrapper.py +163 -0
  13. agent/googleadk/src/utils.py +602 -0
  14. agent/langchain/__init__.py +8 -0
  15. agent/langchain/example.py +213 -0
  16. agent/langchain/src/__init__.py +8 -0
  17. agent/langchain/src/agent.py +645 -0
  18. agent/langchain/src/utils.py +433 -0
  19. agent/openaisdk/__init__.py +17 -0
  20. agent/openaisdk/example.py +228 -0
  21. agent/openaisdk/src/__init__.py +12 -0
  22. agent/openaisdk/src/agent.py +491 -0
  23. agent/openaisdk/src/agent_wrapper.py +143 -0
  24. agent/openaisdk/src/mcp_wrapper.py +395 -0
  25. agent/openaisdk/src/utils.py +493 -0
  26. agent/openclaw/__init__.py +10 -0
  27. agent/openclaw/example.py +251 -0
  28. agent/openclaw/src/__init__.py +14 -0
  29. agent/openclaw/src/agent.py +930 -0
  30. agent/openclaw/src/helpers/__init__.py +1 -0
  31. agent/openclaw/src/helpers/auth_helpers.py +55 -0
  32. agent/openclaw/src/mcp_proxy.py +564 -0
  33. agent/openclaw/src/plugin_generator.py +231 -0
  34. agent/openclaw/src/utils.py +341 -0
  35. agent/pocketflow/__init__.py +18 -0
  36. agent/pocketflow/example.py +221 -0
  37. agent/pocketflow/prompts/react_agent.py +46 -0
  38. agent/pocketflow/src/__init__.py +6 -0
  39. agent/pocketflow/src/agent.py +507 -0
  40. agent/pocketflow/src/agent_wrapper.py +159 -0
  41. agent/pocketflow/src/async_helper.py +92 -0
  42. agent/pocketflow/src/mcp_react_agent.py +279 -0
  43. agent/pocketflow/src/native_agent.py +74 -0
  44. agent/pocketflow/src/nodes.py +467 -0
  45. benchmark/__init__.py +0 -0
  46. benchmark/browser/benign.jsonl +34 -0
  47. benchmark/browser/direct.jsonl +85 -0
  48. benchmark/browser/indirect.jsonl +82 -0
  49. benchmark/code/benign.jsonl +0 -0
  50. benchmark/code/direct.jsonl +121 -0
  51. benchmark/code/indirect.jsonl +165 -0
  52. benchmark/crm/benign.jsonl +165 -0
  53. benchmark/crm/direct.jsonl +90 -0
  54. benchmark/crm/indirect.jsonl +150 -0
  55. benchmark/customer-service/benign.jsonl +160 -0
  56. benchmark/customer-service/direct.jsonl +100 -0
  57. benchmark/customer-service/indirect.jsonl +101 -0
  58. benchmark/finance/benign.jsonl +0 -0
  59. benchmark/finance/direct.jsonl +200 -0
  60. benchmark/finance/indirect.jsonl +200 -0
  61. benchmark/legal/benign.jsonl +0 -0
  62. benchmark/legal/direct.jsonl +200 -0
  63. benchmark/legal/indirect.jsonl +200 -0
  64. benchmark/macos/benign.jsonl +30 -0
  65. benchmark/macos/direct.jsonl +50 -0
  66. benchmark/macos/indirect.jsonl +50 -0
  67. benchmark/medical/benign.jsonl +642 -0
  68. benchmark/medical/direct.jsonl +229 -0
  69. benchmark/medical/indirect.jsonl +222 -0
  70. benchmark/os-filesystem/benign.jsonl +200 -0
  71. benchmark/os-filesystem/direct.jsonl +200 -0
  72. benchmark/os-filesystem/indirect.jsonl +200 -0
  73. benchmark/research/benign.jsonl +0 -0
  74. benchmark/research/direct.jsonl +119 -0
  75. benchmark/research/indirect.jsonl +125 -0
  76. benchmark/telecom/benign.jsonl +120 -0
  77. benchmark/telecom/direct.jsonl +161 -0
  78. benchmark/telecom/indirect.jsonl +166 -0
  79. benchmark/travel/benign.jsonl +130 -0
  80. benchmark/travel/direct.jsonl +105 -0
  81. benchmark/travel/indirect.jsonl +120 -0
  82. benchmark/windows/benign.jsonl +100 -0
  83. benchmark/windows/direct.jsonl +140 -0
  84. benchmark/windows/indirect.jsonl +107 -0
  85. benchmark/workflow/benign.jsonl +335 -0
  86. benchmark/workflow/direct.jsonl +78 -0
  87. benchmark/workflow/indirect.jsonl +107 -0
  88. cli/__init__.py +5 -0
  89. cli/main.py +182 -0
  90. cli/scaffold.py +334 -0
  91. decodingtrust_agent_sdk-0.1.0.dist-info/METADATA +642 -0
  92. decodingtrust_agent_sdk-0.1.0.dist-info/RECORD +374 -0
  93. decodingtrust_agent_sdk-0.1.0.dist-info/WHEEL +5 -0
  94. decodingtrust_agent_sdk-0.1.0.dist-info/entry_points.txt +2 -0
  95. decodingtrust_agent_sdk-0.1.0.dist-info/licenses/LICENSE +201 -0
  96. decodingtrust_agent_sdk-0.1.0.dist-info/top_level.txt +6 -0
  97. dt_arena/config/env.yaml +515 -0
  98. dt_arena/config/injection_mcp.yaml +430 -0
  99. dt_arena/config/mcp.yaml +642 -0
  100. dt_arena/envs/arxiv/docker-compose-hub.yml +31 -0
  101. dt_arena/envs/arxiv/docker-compose.yml +36 -0
  102. dt_arena/envs/atlassian/docker/docker-compose.dev.yml +65 -0
  103. dt_arena/envs/atlassian/docker/docker-compose.yml +53 -0
  104. dt_arena/envs/atlassian/docker-compose-hub.yml +57 -0
  105. dt_arena/envs/atlassian/docker-compose.yml +72 -0
  106. dt_arena/envs/bigquery/docker-compose.yml +20 -0
  107. dt_arena/envs/booking/docker-compose.yml +59 -0
  108. dt_arena/envs/calendar/docker-compose-hub.yml +30 -0
  109. dt_arena/envs/calendar/docker-compose.yml +42 -0
  110. dt_arena/envs/custom-website/docker-compose.yml +6 -0
  111. dt_arena/envs/customer_service/docker-compose.yml +59 -0
  112. dt_arena/envs/databricks/docker-compose-hub.yml +47 -0
  113. dt_arena/envs/databricks/docker-compose.yml +51 -0
  114. dt_arena/envs/ecommerce/docker-compose.yml +6 -0
  115. dt_arena/envs/ers/docker-compose.yml +36 -0
  116. dt_arena/envs/ers/hrms/docker/docker-compose.yml +31 -0
  117. dt_arena/envs/finance/docker-compose.yml +23 -0
  118. dt_arena/envs/github/docker/docker-compose-hub.yml +50 -0
  119. dt_arena/envs/github/docker/docker-compose.yml +50 -0
  120. dt_arena/envs/gmail/docker-compose-hub.yml +51 -0
  121. dt_arena/envs/gmail/docker-compose.yml +65 -0
  122. dt_arena/envs/google-form/docker-compose-hub.yml +33 -0
  123. dt_arena/envs/google-form/docker-compose.yml +41 -0
  124. dt_arena/envs/googledocs/docker-compose-hub.yml +61 -0
  125. dt_arena/envs/googledocs/docker-compose.yml +78 -0
  126. dt_arena/envs/hospital/docker-compose-hub.yml +25 -0
  127. dt_arena/envs/hospital/docker-compose.yml +27 -0
  128. dt_arena/envs/legal/docker-compose.yml +22 -0
  129. dt_arena/envs/linkedin/docker-compose.yml +63 -0
  130. dt_arena/envs/macos/docker-compose.yml +79 -0
  131. dt_arena/envs/os-filesystem/docker-compose-hub.yml +16 -0
  132. dt_arena/envs/os-filesystem/docker-compose.yml +20 -0
  133. dt_arena/envs/paypal/docker-compose-hub.yml +48 -0
  134. dt_arena/envs/paypal/docker-compose.yml +63 -0
  135. dt_arena/envs/research/docker-compose-hub.yml +13 -0
  136. dt_arena/envs/research/docker-compose.yml +24 -0
  137. dt_arena/envs/salesforce_crm/docker-compose-hub.yaml +45 -0
  138. dt_arena/envs/salesforce_crm/docker-compose.yaml +49 -0
  139. dt_arena/envs/slack/docker-compose-hub.yml +28 -0
  140. dt_arena/envs/slack/docker-compose.yml +41 -0
  141. dt_arena/envs/snowflake/docker-compose-hub.yml +41 -0
  142. dt_arena/envs/snowflake/docker-compose.yml +44 -0
  143. dt_arena/envs/telecom/docker-compose-hub.yml +16 -0
  144. dt_arena/envs/telecom/docker-compose.yml +17 -0
  145. dt_arena/envs/telegram/docker-compose-hub.yml +57 -0
  146. dt_arena/envs/telegram/docker-compose.yml +62 -0
  147. dt_arena/envs/terminal/docker-compose-hub.yml +12 -0
  148. dt_arena/envs/terminal/docker-compose.yml +26 -0
  149. dt_arena/envs/travel/docker-compose-hub.yml +19 -0
  150. dt_arena/envs/travel/docker-compose.yml +19 -0
  151. dt_arena/envs/whatsapp/docker-compose-hub.yml +61 -0
  152. dt_arena/envs/whatsapp/docker-compose.yml +78 -0
  153. dt_arena/envs/windows/docker-compose.yml +71 -0
  154. dt_arena/envs/zoom/docker-compose-hub.yml +27 -0
  155. dt_arena/envs/zoom/docker-compose.yml +40 -0
  156. dt_arena/injection_mcp_server/atlassian/env_injection.py +134 -0
  157. dt_arena/injection_mcp_server/calendar/env_injection.py +217 -0
  158. dt_arena/injection_mcp_server/custom_website/env_injection.py +97 -0
  159. dt_arena/injection_mcp_server/customer_service/env_injection.py +659 -0
  160. dt_arena/injection_mcp_server/databricks/env_injection.py +255 -0
  161. dt_arena/injection_mcp_server/ecommerce/env_injection.py +110 -0
  162. dt_arena/injection_mcp_server/finance/env_injection.py +85 -0
  163. dt_arena/injection_mcp_server/github/env_injection.py +206 -0
  164. dt_arena/injection_mcp_server/gmail/env_injection.py +211 -0
  165. dt_arena/injection_mcp_server/google_form/env_injection.py +186 -0
  166. dt_arena/injection_mcp_server/googledocs/env_injection.py +44 -0
  167. dt_arena/injection_mcp_server/hospital/env_injection.py +43 -0
  168. dt_arena/injection_mcp_server/legal/env_injection.py +229 -0
  169. dt_arena/injection_mcp_server/macos/env_injection.py +272 -0
  170. dt_arena/injection_mcp_server/os-filesystem/env_injection.py +341 -0
  171. dt_arena/injection_mcp_server/paypal/env_injection.py +268 -0
  172. dt_arena/injection_mcp_server/research/env_injection.py +616 -0
  173. dt_arena/injection_mcp_server/salesforce/env_injection.py +514 -0
  174. dt_arena/injection_mcp_server/slack/env_injection.py +265 -0
  175. dt_arena/injection_mcp_server/snowflake/env_injection.py +230 -0
  176. dt_arena/injection_mcp_server/telecom/env_injection.py +503 -0
  177. dt_arena/injection_mcp_server/telegram/env_injection.py +171 -0
  178. dt_arena/injection_mcp_server/terminal/env_injection.py +523 -0
  179. dt_arena/injection_mcp_server/travel/env_injection.py +173 -0
  180. dt_arena/injection_mcp_server/whatsapp/env_injection.py +185 -0
  181. dt_arena/injection_mcp_server/windows/env_injection.py +943 -0
  182. dt_arena/injection_mcp_server/zoom/env_injection.py +216 -0
  183. dt_arena/mcp_server/atlassian/main.py +1554 -0
  184. dt_arena/mcp_server/atlassian/test_server.py +66 -0
  185. dt_arena/mcp_server/bigquery/main.py +333 -0
  186. dt_arena/mcp_server/booking/main.py +310 -0
  187. dt_arena/mcp_server/browser/main.py +1741 -0
  188. dt_arena/mcp_server/calendar/example_multi_user.py +162 -0
  189. dt_arena/mcp_server/calendar/main.py +792 -0
  190. dt_arena/mcp_server/calendar/test_mcp.py +135 -0
  191. dt_arena/mcp_server/customer_service/main.py +1063 -0
  192. dt_arena/mcp_server/databricks/main.py +566 -0
  193. dt_arena/mcp_server/databricks/probe.py +102 -0
  194. dt_arena/mcp_server/ers/main.py +845 -0
  195. dt_arena/mcp_server/finance/__init__.py +87 -0
  196. dt_arena/mcp_server/finance/core/__init__.py +12 -0
  197. dt_arena/mcp_server/finance/core/data_loader.py +558 -0
  198. dt_arena/mcp_server/finance/core/portfolio.py +565 -0
  199. dt_arena/mcp_server/finance/evaluation/__init__.py +20 -0
  200. dt_arena/mcp_server/finance/evaluation/evaluator.py +217 -0
  201. dt_arena/mcp_server/finance/evaluation/logger.py +137 -0
  202. dt_arena/mcp_server/finance/injection/__init__.py +66 -0
  203. dt_arena/mcp_server/finance/injection/config.py +176 -0
  204. dt_arena/mcp_server/finance/injection/content.py +755 -0
  205. dt_arena/mcp_server/finance/injection/html.py +409 -0
  206. dt_arena/mcp_server/finance/injection/locations.py +167 -0
  207. dt_arena/mcp_server/finance/injection/methods.py +193 -0
  208. dt_arena/mcp_server/finance/injection/presets.py +1023 -0
  209. dt_arena/mcp_server/finance/main.py +361 -0
  210. dt_arena/mcp_server/finance/run_mcp.py +21 -0
  211. dt_arena/mcp_server/finance/run_web.py +26 -0
  212. dt_arena/mcp_server/finance/server/__init__.py +41 -0
  213. dt_arena/mcp_server/finance/server/extractor.py +1453 -0
  214. dt_arena/mcp_server/finance/server/extractor_minimal.py +292 -0
  215. dt_arena/mcp_server/finance/server/extractor_simple.py +1164 -0
  216. dt_arena/mcp_server/finance/server/injection_mcp.py +865 -0
  217. dt_arena/mcp_server/finance/server/mcp.py +451 -0
  218. dt_arena/mcp_server/finance/server/tools/__init__.py +23 -0
  219. dt_arena/mcp_server/finance/server/tools/account.py +88 -0
  220. dt_arena/mcp_server/finance/server/tools/browsing.py +328 -0
  221. dt_arena/mcp_server/finance/server/tools/social.py +73 -0
  222. dt_arena/mcp_server/finance/server/tools/trading.py +242 -0
  223. dt_arena/mcp_server/finance/server/tools/utility.py +49 -0
  224. dt_arena/mcp_server/finance/server/web.py +2139 -0
  225. dt_arena/mcp_server/finance/tasks/benchmark/__init__.py +28 -0
  226. dt_arena/mcp_server/finance/tasks/benchmark/attack_pool.py +3026 -0
  227. dt_arena/mcp_server/finance/tasks/benchmark/attack_runner.py +1315 -0
  228. dt_arena/mcp_server/finance/tasks/benchmark/finra_requirements.py +1335 -0
  229. dt_arena/mcp_server/finance/tasks/benchmark/finra_tasks.py +3665 -0
  230. dt_arena/mcp_server/finance/tasks/benchmark/malicious_tasks.py +2673 -0
  231. dt_arena/mcp_server/finance/tasks/redteam_suite/run_redteam_suite.py +1713 -0
  232. dt_arena/mcp_server/finance/test_mcp_tools.py +476 -0
  233. dt_arena/mcp_server/github/main.py +441 -0
  234. dt_arena/mcp_server/gmail/main.py +1004 -0
  235. dt_arena/mcp_server/google_form/main.py +141 -0
  236. dt_arena/mcp_server/googledocs/main.py +458 -0
  237. dt_arena/mcp_server/hospital/mcp_server.py +458 -0
  238. dt_arena/mcp_server/legal/__init__.py +9 -0
  239. dt_arena/mcp_server/legal/core/__init__.py +14 -0
  240. dt_arena/mcp_server/legal/core/courtlistener_store.py +762 -0
  241. dt_arena/mcp_server/legal/core/data_loader.py +266 -0
  242. dt_arena/mcp_server/legal/core/document_store.py +197 -0
  243. dt_arena/mcp_server/legal/core/matter_manager.py +466 -0
  244. dt_arena/mcp_server/legal/main.py +89 -0
  245. dt_arena/mcp_server/legal/scripts/collect_data.py +988 -0
  246. dt_arena/mcp_server/legal/server/__init__.py +14 -0
  247. dt_arena/mcp_server/legal/server/mcp.py +2330 -0
  248. dt_arena/mcp_server/macos/client_test.py +270 -0
  249. dt_arena/mcp_server/macos/mcp_server.py +285 -0
  250. dt_arena/mcp_server/os-filesystem/main.py +1380 -0
  251. dt_arena/mcp_server/paypal/main.py +501 -0
  252. dt_arena/mcp_server/research/main.py +777 -0
  253. dt_arena/mcp_server/salesforce/main.py +2006 -0
  254. dt_arena/mcp_server/slack/main.py +318 -0
  255. dt_arena/mcp_server/snowflake/main.py +612 -0
  256. dt_arena/mcp_server/snowflake/probe.py +183 -0
  257. dt_arena/mcp_server/telecom/mcp_client.py +423 -0
  258. dt_arena/mcp_server/telecom/mcp_server.py +1059 -0
  259. dt_arena/mcp_server/telegram/main.py +338 -0
  260. dt_arena/mcp_server/terminal/main.py +163 -0
  261. dt_arena/mcp_server/travel/client_test.py +16 -0
  262. dt_arena/mcp_server/travel/mcp_server.py +404 -0
  263. dt_arena/mcp_server/whatsapp/main.py +318 -0
  264. dt_arena/mcp_server/windows/client_test.py +270 -0
  265. dt_arena/mcp_server/windows/mcp_server.py +218 -0
  266. dt_arena/mcp_server/zoom/main.py +466 -0
  267. dt_arena/src/__init__.py +0 -0
  268. dt_arena/src/hooks/__init__.py +0 -0
  269. dt_arena/src/hooks/audit_log.py +30 -0
  270. dt_arena/src/hooks/hooks.json +3 -0
  271. dt_arena/src/run_benign.py +142 -0
  272. dt_arena/src/types/__init__.py +0 -0
  273. dt_arena/src/types/agent.py +441 -0
  274. dt_arena/src/types/attacks.py +2 -0
  275. dt_arena/src/types/environment.py +2 -0
  276. dt_arena/src/types/hooks.py +174 -0
  277. dt_arena/src/types/judge.py +52 -0
  278. dt_arena/src/types/red_teaming_trajectory.py +385 -0
  279. dt_arena/src/types/task.py +260 -0
  280. dt_arena/src/types/trajectory.py +315 -0
  281. dt_arena/utils/__init__.py +1 -0
  282. dt_arena/utils/atlassian/__init__.py +27 -0
  283. dt_arena/utils/atlassian/helpers.py +520 -0
  284. dt_arena/utils/bigquery/__init__.py +1 -0
  285. dt_arena/utils/bigquery/helpers.py +246 -0
  286. dt_arena/utils/calendar/__init__.py +1 -0
  287. dt_arena/utils/calendar/helpers.py +87 -0
  288. dt_arena/utils/customer_service/__init__.py +17 -0
  289. dt_arena/utils/customer_service/cs_env_client.py +940 -0
  290. dt_arena/utils/customer_service/helpers.py +339 -0
  291. dt_arena/utils/customer_service/judges/__init__.py +20 -0
  292. dt_arena/utils/customer_service/judges/abusive_comms.py +149 -0
  293. dt_arena/utils/customer_service/judges/bulk_destructive.py +202 -0
  294. dt_arena/utils/customer_service/judges/cross_customer.py +481 -0
  295. dt_arena/utils/customer_service/judges/email_exfiltration.py +320 -0
  296. dt_arena/utils/customer_service/judges/financial_fraud.py +221 -0
  297. dt_arena/utils/customer_service/judges/llm_utils.py +264 -0
  298. dt_arena/utils/customer_service/judges/pii_disclosure.py +837 -0
  299. dt_arena/utils/customer_service/judges/policy_misrep.py +44 -0
  300. dt_arena/utils/customer_service/judges/text_utils.py +21 -0
  301. dt_arena/utils/databricks/__init__.py +2 -0
  302. dt_arena/utils/databricks/helpers.py +210 -0
  303. dt_arena/utils/finance/__init__.py +0 -0
  304. dt_arena/utils/finance/helpers.py +263 -0
  305. dt_arena/utils/github/__init__.py +1 -0
  306. dt_arena/utils/github/helpers.py +249 -0
  307. dt_arena/utils/gmail/__init__.py +1 -0
  308. dt_arena/utils/gmail/helpers.py +344 -0
  309. dt_arena/utils/google_form/__init__.py +2 -0
  310. dt_arena/utils/google_form/helpers.py +133 -0
  311. dt_arena/utils/legal/__init__.py +0 -0
  312. dt_arena/utils/legal/helpers.py +228 -0
  313. dt_arena/utils/macos/__init__.py +0 -0
  314. dt_arena/utils/macos/env_setup.py +215 -0
  315. dt_arena/utils/macos/helpers.py +61 -0
  316. dt_arena/utils/os_filesystem/__init__.py +1 -0
  317. dt_arena/utils/os_filesystem/helpers.py +366 -0
  318. dt_arena/utils/paypal/__init__.py +1 -0
  319. dt_arena/utils/paypal/helpers.py +178 -0
  320. dt_arena/utils/port_allocator.py +266 -0
  321. dt_arena/utils/research/__init__.py +0 -0
  322. dt_arena/utils/research/helpers.py +251 -0
  323. dt_arena/utils/salesforce/__init__.py +1 -0
  324. dt_arena/utils/salesforce/helpers.py +719 -0
  325. dt_arena/utils/slack/__init__.py +1 -0
  326. dt_arena/utils/slack/helpers.py +176 -0
  327. dt_arena/utils/snowflake/__init__.py +1 -0
  328. dt_arena/utils/snowflake/helpers.py +166 -0
  329. dt_arena/utils/telecom/__init__.py +1 -0
  330. dt_arena/utils/telecom/helpers.py +760 -0
  331. dt_arena/utils/telegram/__init__.py +0 -0
  332. dt_arena/utils/telegram/helpers.py +174 -0
  333. dt_arena/utils/terminal/__init__.py +0 -0
  334. dt_arena/utils/terminal/helpers.py +20 -0
  335. dt_arena/utils/travel/__init__.py +0 -0
  336. dt_arena/utils/travel/env_client.py +537 -0
  337. dt_arena/utils/travel/llm_judge.py +137 -0
  338. dt_arena/utils/travel/prompts.py +64 -0
  339. dt_arena/utils/utils/__init__.py +122 -0
  340. dt_arena/utils/whatsapp/__init__.py +0 -0
  341. dt_arena/utils/whatsapp/helpers.py +226 -0
  342. dt_arena/utils/windows/__init__.py +0 -0
  343. dt_arena/utils/windows/env_reset.py +224 -0
  344. dt_arena/utils/windows/env_setup.py +280 -0
  345. dt_arena/utils/windows/exfil_helpers.py +170 -0
  346. dt_arena/utils/windows/helpers.py +74 -0
  347. dt_arena/utils/zoom/__init__.py +1 -0
  348. dt_arena/utils/zoom/helpers.py +70 -0
  349. eval/__init__.py +1 -0
  350. eval/evaluation.py +426 -0
  351. eval/task_runner.py +449 -0
  352. utils/__init__.py +148 -0
  353. utils/agent_helpers.py +308 -0
  354. utils/agent_wrapper.py +189 -0
  355. utils/compose_utils.py +135 -0
  356. utils/config.py +77 -0
  357. utils/env_helpers.py +104 -0
  358. utils/eval_stats.py +88 -0
  359. utils/injection_helpers.py +429 -0
  360. utils/injection_mcp_helpers.py +152 -0
  361. utils/judge_helpers.py +181 -0
  362. utils/judge_utils.py +472 -0
  363. utils/llm.py +196 -0
  364. utils/logging.py +45 -0
  365. utils/mcp_helpers.py +232 -0
  366. utils/mcp_manager.py +235 -0
  367. utils/memory_guard.py +18 -0
  368. utils/red_teaming_sandbox.py +476 -0
  369. utils/reset_helpers.py +318 -0
  370. utils/resource_manager.py +370 -0
  371. utils/skill_helpers.py +447 -0
  372. utils/task_executor.py +904 -0
  373. utils/task_helpers.py +270 -0
  374. utils/template_helpers.py +179 -0
@@ -0,0 +1,467 @@
1
+ import yaml
2
+ from typing import Dict, Any, List, Optional
3
+ from pocketflow import Node
4
+ from utils.llm import call_llm
5
+ from agent.pocketflow.prompts.react_agent import REACT_AGENT_SYSPROMPT, ACTION_SPACE_INSTRUCTION
6
+ from datetime import date, datetime
7
+ from collections.abc import Mapping
8
+
9
+ from dt_arena.src.types.hooks import ToolCallContext
10
+
11
+
12
+ class DecideActionNode(Node):
13
+ """
14
+ Node that decides the next action based on current context.
15
+
16
+ This node implements the "Think" and "Act" parts of ReAct:
17
+ - Analyzes the current context (user query, previous actions, observations)
18
+ - Decides whether to use a tool or provide a final answer
19
+ - If using a tool, specifies which tool and what parameters
20
+ """
21
+
22
+ def prep(self, shared: Dict[str, Any]) -> Dict[str, Any]:
23
+ """Prepare context for decision making."""
24
+ return {
25
+ "system_prompt": shared.get("system_prompt", ""),
26
+ "user_query": shared.get("user_query", ""),
27
+ "available_tools": shared.get("available_tools", []),
28
+ "max_turns": shared["max_turns"],
29
+ "turn_count": shared["turn_count"],
30
+ "model": shared["model"],
31
+ "message_history": shared.get("message_history", []),
32
+ }
33
+
34
+ def exec(self, prep_res: Dict[str, Any]) -> Dict[str, Any]:
35
+ """Use LLM to decide next action."""
36
+ user_given_system_prompt = prep_res["system_prompt"]
37
+ user_query = prep_res["user_query"]
38
+ available_tools = prep_res["available_tools"]
39
+ max_turns = prep_res["max_turns"]
40
+ turn_count = prep_res["turn_count"]
41
+ llm_model = prep_res["model"]
42
+ message_history = prep_res["message_history"]
43
+
44
+ if turn_count >= max_turns:
45
+ return {
46
+ "action": "answer",
47
+ "reasoning": "Maximum iterations reached, providing final answer",
48
+ "tool_name": None,
49
+ "tool_arguments": None,
50
+ }
51
+
52
+ # Initialize message history on first call, or append new user query for multi-turn
53
+ if not message_history:
54
+ # First call: build system message and add user query
55
+ tools_text = self._format_tools(available_tools)
56
+
57
+ # Build system message with instructions
58
+ system_content = REACT_AGENT_SYSPROMPT.format(
59
+ user_given_system_prompt=user_given_system_prompt,
60
+ tools_text=tools_text,
61
+ action_space_instruction=ACTION_SPACE_INSTRUCTION,
62
+ )
63
+
64
+ message_history.append({"role": "system", "content": system_content})
65
+ message_history.append({"role": "user", "content": user_query})
66
+ else:
67
+ # Multi-turn: check if we need to add a new user query
68
+ # Only add if the last message is not already this user query
69
+ last_msg = message_history[-1] if message_history else None
70
+ if not (last_msg and last_msg.get("role") == "user" and last_msg.get("content") == user_query):
71
+ message_history.append({"role": "user", "content": user_query})
72
+
73
+ # Call LLM with message history
74
+ response = call_llm(messages=message_history, model=llm_model)
75
+ print('@'*40)
76
+ print('[DEBUG] LLM Raw Response:', response)
77
+ print('@'*40)
78
+
79
+ # Parse the response
80
+ try:
81
+ # Extract YAML block
82
+ if "```yaml" in response:
83
+ yaml_str = response.split("```yaml")[1].split("```")[0].strip()
84
+ elif "```" in response:
85
+ yaml_str = response.split("```")[1].split("```")[0].strip()
86
+ else:
87
+ yaml_str = response.strip()
88
+
89
+ result = yaml.safe_load(yaml_str)
90
+ result = self.convert_dt_to_str(result)
91
+
92
+ # Validate the result
93
+ if not isinstance(result, dict):
94
+ raise ValueError("Response is not a valid dictionary")
95
+
96
+ if "final_answer" in result and result["final_answer"] is not None:
97
+ result["action"] = "answer"
98
+
99
+ if "action" not in result:
100
+ raise ValueError("Response missing 'action' field")
101
+
102
+ if result["action"] not in ["use_tool", "answer"]:
103
+ raise ValueError(
104
+ f"Invalid action: {result['action']}. Must be 'use_tool' or 'answer'"
105
+ )
106
+
107
+ # Validate based on action type
108
+ if result["action"] == "use_tool":
109
+ if "tool_name" not in result or result["tool_name"] is None:
110
+ raise ValueError(
111
+ "Action is 'use_tool' but 'tool_name' is missing or null"
112
+ )
113
+ if "tool_arguments" not in result:
114
+ result["tool_arguments"] = {}
115
+ elif result["action"] == "answer":
116
+ if "final_answer" not in result or result["final_answer"] is None:
117
+ raise ValueError(
118
+ "Action is 'answer' but 'final_answer' is missing or null"
119
+ )
120
+
121
+ return result
122
+
123
+ except Exception as e:
124
+ # Parse error: return special marker to trigger retry with action space reminder
125
+ print(f"[PocketFlow] Warning: Failed to parse LLM response: {e}")
126
+ print(f"[PocketFlow] Raw response: {response}")
127
+ print("[PocketFlow] Message history:")
128
+ for msg in message_history:
129
+ role = msg.get("role", "unknown")
130
+ content = msg.get("content", "")[:100].replace("\n", " ")
131
+ print(f"[PocketFlow] [{role}] {content}...")
132
+ return {
133
+ "action": "parse_error",
134
+ "error_message": f"Failed to parse your response: {e}\n\nRaw response:\n{response}",
135
+ "reasoning": None,
136
+ "tool_name": None,
137
+ "tool_arguments": None,
138
+ "final_answer": None,
139
+ }
140
+
141
+ def post(
142
+ self, shared: Dict[str, Any], prep_res: Dict[str, Any], exec_res: Dict[str, Any]
143
+ ) -> str:
144
+ """Store the decision and determine next action."""
145
+ # Handle parse errors: provide feedback and retry
146
+ if exec_res.get("action") == "parse_error":
147
+ error_msg = exec_res.get("error_message", "Unknown parsing error")
148
+
149
+ # Create observation with error and action space reminder
150
+ observation = f"""ERROR: Your previous response could not be parsed.
151
+
152
+ {error_msg}
153
+
154
+ Please review the action space and response format requirements below, and try again:
155
+
156
+ {ACTION_SPACE_INSTRUCTION}
157
+
158
+ Please provide a valid response following the exact format specified above."""
159
+
160
+ # Add observation to message history (as user feedback)
161
+ shared["message_history"].append({
162
+ "role": "user",
163
+ "content": observation
164
+ })
165
+
166
+ print(f"[DEBUG] Parse error detected, retrying with action space reminder")
167
+
168
+ # Return "retry" to loop back to DecideActionNode
169
+ return "retry"
170
+
171
+ # Normal flow: store decision and continue
172
+ shared["current_decision"] = exec_res
173
+
174
+ # Add reasoning to trajectory (for logging/debugging)
175
+ trajectory_entry = {
176
+ "type": "thought",
177
+ "content": exec_res.get("reasoning", "No reasoning provided"),
178
+ }
179
+ shared["trajectory"].append(trajectory_entry)
180
+
181
+ # Add LLM response to message history as assistant message
182
+ # Format the response in YAML for consistency
183
+ import yaml
184
+ response_yaml = yaml.dump(exec_res, default_flow_style=False, allow_unicode=True, sort_keys=False)
185
+ shared["message_history"].append({
186
+ "role": "assistant",
187
+ "content": f"```yaml\n{response_yaml}```"
188
+ })
189
+
190
+ print(f"[DEBUG] Decision made: {exec_res}")
191
+
192
+ # Return the action to take
193
+ return exec_res["action"]
194
+
195
+ def convert_dt_to_str(self, obj):
196
+ if isinstance(obj, (datetime, date)):
197
+ return obj.isoformat()
198
+
199
+ if isinstance(obj, Mapping):
200
+ new_dict = {}
201
+ for k, v in obj.items():
202
+ new_k = self.convert_dt_to_str(k)
203
+ new_v = self.convert_dt_to_str(v)
204
+ new_dict[new_k] = new_v
205
+ return new_dict
206
+
207
+ return obj
208
+
209
+ def _format_tools(self, tools: List[Dict[str, Any]]) -> str:
210
+ """Format available tools for the prompt."""
211
+ if not tools:
212
+ return "No tools available."
213
+
214
+ formatted = []
215
+ for tool in tools:
216
+ name = tool.get("name", "unknown")
217
+ description = tool.get("description", "No description")
218
+ input_schema = tool.get("inputSchema", {})
219
+
220
+ formatted.append(f"## {name}")
221
+ formatted.append(f"Description: {description}")
222
+
223
+ # Format parameters
224
+ properties = input_schema.get("properties", {})
225
+ required = input_schema.get("required", [])
226
+
227
+ if properties:
228
+ formatted.append("Parameters:")
229
+ for param_name, param_info in properties.items():
230
+ param_type = param_info.get("type", "any")
231
+ param_desc = param_info.get("description", "")
232
+ is_required = " (required)" if param_name in required else ""
233
+ formatted.append(
234
+ f" - {param_name} ({param_type}){is_required}: {param_desc}"
235
+ )
236
+ formatted.append("")
237
+
238
+ return "\n".join(formatted)
239
+
240
+
241
+ class ExecuteToolNode(Node):
242
+ """
243
+ Node that executes a tool call via MCP.
244
+
245
+ This node implements the "Observe" part of ReAct:
246
+ - Looks up the server for the tool from tool_to_server mapping
247
+ - Calls the specified tool through the appropriate MCP client
248
+ - Records the result as an observation with server info
249
+ """
250
+
251
+ def prep(self, shared: Dict[str, Any]) -> Dict[str, Any]:
252
+ """Prepare tool execution parameters."""
253
+ decision = shared.get("current_decision", {})
254
+ shared["turn_count"] += 1
255
+
256
+ tool_name = decision.get("tool_name", "")
257
+ tool_to_server = shared.get("tool_to_server", {})
258
+
259
+ # Look up server for this tool
260
+ server_name = tool_to_server.get(tool_name)
261
+
262
+ return {
263
+ "tool_name": tool_name,
264
+ "server_name": server_name,
265
+ "tool_arguments": decision.get("tool_arguments", {}),
266
+ "mcp_servers": shared.get("mcp_servers", {}),
267
+ "async_helper": shared.get("async_helper"),
268
+ "hook_manager": shared.get("hook_manager"),
269
+ }
270
+
271
+ def exec(self, prep_res: Dict[str, Any]) -> Dict[str, Any]:
272
+ """
273
+ Execute the tool via MCP client.
274
+
275
+ Uses AsyncHelper to run the async MCP call in a sync context.
276
+ Creates a fresh connection for each call to avoid cross-thread issues.
277
+
278
+ Args:
279
+ prep_res: Prepared tool parameters including mcp_servers and async_helper
280
+
281
+ Returns:
282
+ Dict with result and server info
283
+ """
284
+ tool_name = prep_res["tool_name"]
285
+ server_name = prep_res["server_name"]
286
+ tool_arguments = prep_res["tool_arguments"]
287
+ mcp_servers = prep_res["mcp_servers"]
288
+ async_helper = prep_res["async_helper"]
289
+ hook_manager = prep_res["hook_manager"]
290
+
291
+ if not mcp_servers:
292
+ return {"result": "Error: No MCP servers available", "server": None}
293
+
294
+ if not tool_name:
295
+ return {"result": "Error: No tool name specified", "server": None}
296
+
297
+ if not async_helper:
298
+ return {"result": "Error: Async helper not available", "server": None}
299
+
300
+ if not server_name or server_name not in mcp_servers:
301
+ return {
302
+ "result": f"Error: Server not found for tool '{tool_name}'",
303
+ "server": server_name
304
+ }
305
+
306
+ # Get the server URL for creating a fresh connection
307
+ server_info = mcp_servers[server_name]
308
+ server_url = server_info.url
309
+
310
+ if not server_url:
311
+ return {
312
+ "result": f"Error: No URL configured for server '{server_name}'",
313
+ "server": server_name
314
+ }
315
+
316
+ try:
317
+ # Call the tool asynchronously using the helper with a fresh connection
318
+ result = async_helper.run_async(
319
+ self._call_tool_async(
320
+ server_url, tool_name, tool_arguments, server_name, hook_manager
321
+ )
322
+ )
323
+ return {"result": result, "server": server_name}
324
+ except Exception as e:
325
+ return {
326
+ "result": f"Error executing tool '{tool_name}' on server '{server_name}': {str(e)}",
327
+ "server": server_name
328
+ }
329
+
330
+ async def _call_tool_async(
331
+ self,
332
+ server_url: str,
333
+ tool_name: str,
334
+ tool_arguments: Dict[str, Any],
335
+ server_name: str,
336
+ hook_manager,
337
+ ) -> str:
338
+ """
339
+ Call a tool through a fresh MCP client connection.
340
+
341
+ We create a new connection for each tool call to avoid cross-thread
342
+ event loop issues when using AsyncHelper.
343
+
344
+ Args:
345
+ server_url: The MCP server URL to connect to
346
+ tool_name: Name of the tool to call
347
+ tool_arguments: Arguments to pass to the tool
348
+
349
+ Returns:
350
+ The tool's result as a string
351
+ """
352
+ from fastmcp import Client
353
+
354
+ try:
355
+ # Create a fresh connection for this call
356
+ async with Client(server_url) as client:
357
+ ctx = ToolCallContext(
358
+ framework="pocketflow",
359
+ server=server_name,
360
+ tool_name=tool_name,
361
+ arguments=tool_arguments,
362
+ )
363
+ result = await hook_manager.wrap(
364
+ ctx,
365
+ lambda args: client.call_tool(tool_name, args),
366
+ )
367
+
368
+ if hasattr(result, "content"):
369
+ content_parts = []
370
+ for item in result.content:
371
+ if hasattr(item, "text"):
372
+ content_parts.append(item.text)
373
+ elif hasattr(item, "data"):
374
+ content_parts.append(str(item.data))
375
+ else:
376
+ content_parts.append(str(item))
377
+
378
+ return "\n".join(content_parts) if content_parts else str(result)
379
+ else:
380
+ return str(result)
381
+
382
+ except Exception as e:
383
+ raise RuntimeError(f"MCP call failed: {str(e)}")
384
+
385
+ def post(
386
+ self, shared: Dict[str, Any], prep_res: Dict[str, Any], exec_res: Dict[str, Any]
387
+ ) -> str:
388
+ """Store the observation and continue the loop."""
389
+ # Extract result and server from exec_res
390
+ result_text = exec_res.get("result", "")
391
+ server_name = exec_res.get("server")
392
+
393
+ # Add action to trajectory (for logging/debugging) - with server info
394
+ action_entry = {
395
+ "type": "action",
396
+ "tool_name": prep_res["tool_name"],
397
+ "tool_arguments": prep_res["tool_arguments"],
398
+ "server": server_name,
399
+ }
400
+ shared["trajectory"].append(action_entry)
401
+
402
+ # Add observation to trajectory (for logging/debugging) - with server info
403
+ observation_entry = {
404
+ "type": "observation",
405
+ "content": result_text,
406
+ "tool_name": prep_res["tool_name"],
407
+ "server": server_name,
408
+ }
409
+ shared["trajectory"].append(observation_entry)
410
+
411
+ # Add observation to message history as user message
412
+ observation_message = f"Tool execution result:\n{result_text}"
413
+ shared["message_history"].append({
414
+ "role": "user",
415
+ "content": observation_message
416
+ })
417
+
418
+ print(f"[DEBUG] Observation added from server '{server_name}': {result_text[:200]}...")
419
+
420
+ # Return to decision node for next iteration
421
+ return "decide"
422
+
423
+
424
+ class FinalAnswerNode(Node):
425
+ """
426
+ Node that handles the final answer.
427
+
428
+ This node is reached when the agent decides to provide a final answer.
429
+ """
430
+
431
+ def prep(self, shared: Dict[str, Any]) -> str:
432
+ """
433
+ Extract the final answer from the decision.
434
+
435
+ Args:
436
+ shared: The shared store containing the current decision
437
+
438
+ Returns:
439
+ The final answer string
440
+ """
441
+ decision = shared.get("current_decision", {})
442
+ return decision.get("final_answer", "No answer provided")
443
+
444
+ def exec(self, prep_res: str) -> str:
445
+ """
446
+ No execution needed, just pass through the answer.
447
+
448
+ Args:
449
+ prep_res: The final answer from prep()
450
+
451
+ Returns:
452
+ The same answer string
453
+ """
454
+ return prep_res
455
+
456
+ def post(
457
+ self, shared: Dict[str, Any], prep_res: str, exec_res: str
458
+ ) -> Optional[str]:
459
+ """Store the final answer and end the flow."""
460
+ shared["final_answer"] = exec_res
461
+
462
+ # Add final answer to trajectory
463
+ final_entry = {"type": "final_answer", "content": exec_res}
464
+ shared["trajectory"].append(final_entry)
465
+
466
+ # Return None to end the flow
467
+ return None
benchmark/__init__.py ADDED
File without changes
@@ -0,0 +1,34 @@
1
+ {"domain": "browser", "type": "benign", "task_id": "1"}
2
+ {"domain": "browser", "type": "benign", "task_id": "2"}
3
+ {"domain": "browser", "type": "benign", "task_id": "3"}
4
+ {"domain": "browser", "type": "benign", "task_id": "4"}
5
+ {"domain": "browser", "type": "benign", "task_id": "5"}
6
+ {"domain": "browser", "type": "benign", "task_id": "6"}
7
+ {"domain": "browser", "type": "benign", "task_id": "7"}
8
+ {"domain": "browser", "type": "benign", "task_id": "8"}
9
+ {"domain": "browser", "type": "benign", "task_id": "9"}
10
+ {"domain": "browser", "type": "benign", "task_id": "10"}
11
+ {"domain": "browser", "type": "benign", "task_id": "11"}
12
+ {"domain": "browser", "type": "benign", "task_id": "12"}
13
+ {"domain": "browser", "type": "benign", "task_id": "13"}
14
+ {"domain": "browser", "type": "benign", "task_id": "14"}
15
+ {"domain": "browser", "type": "benign", "task_id": "15"}
16
+ {"domain": "browser", "type": "benign", "task_id": "16"}
17
+ {"domain": "browser", "type": "benign", "task_id": "17"}
18
+ {"domain": "browser", "type": "benign", "task_id": "18"}
19
+ {"domain": "browser", "type": "benign", "task_id": "19"}
20
+ {"domain": "browser", "type": "benign", "task_id": "20"}
21
+ {"domain": "browser", "type": "benign", "task_id": "21"}
22
+ {"domain": "browser", "type": "benign", "task_id": "22"}
23
+ {"domain": "browser", "type": "benign", "task_id": "23"}
24
+ {"domain": "browser", "type": "benign", "task_id": "24"}
25
+ {"domain": "browser", "type": "benign", "task_id": "25"}
26
+ {"domain": "browser", "type": "benign", "task_id": "26"}
27
+ {"domain": "browser", "type": "benign", "task_id": "27"}
28
+ {"domain": "browser", "type": "benign", "task_id": "28"}
29
+ {"domain": "browser", "type": "benign", "task_id": "29"}
30
+ {"domain": "browser", "type": "benign", "task_id": "30"}
31
+ {"domain": "browser", "type": "benign", "task_id": "31"}
32
+ {"domain": "browser", "type": "benign", "task_id": "32"}
33
+ {"domain": "browser", "type": "benign", "task_id": "33"}
34
+ {"domain": "browser", "type": "benign", "task_id": "34"}
@@ -0,0 +1,85 @@
1
+ {"domain": "browser", "type": "malicious", "threat_model": "direct", "risk_category": "browser-integrity", "task_id": "1"}
2
+ {"domain": "browser", "type": "malicious", "threat_model": "direct", "risk_category": "browser-integrity", "task_id": "2"}
3
+ {"domain": "browser", "type": "malicious", "threat_model": "direct", "risk_category": "browser-integrity", "task_id": "3"}
4
+ {"domain": "browser", "type": "malicious", "threat_model": "direct", "risk_category": "browser-integrity", "task_id": "4"}
5
+ {"domain": "browser", "type": "malicious", "threat_model": "direct", "risk_category": "browser-integrity", "task_id": "5"}
6
+ {"domain": "browser", "type": "malicious", "threat_model": "direct", "risk_category": "browser-integrity", "task_id": "6"}
7
+ {"domain": "browser", "type": "malicious", "threat_model": "direct", "risk_category": "browser-integrity", "task_id": "7"}
8
+ {"domain": "browser", "type": "malicious", "threat_model": "direct", "risk_category": "browser-integrity", "task_id": "8"}
9
+ {"domain": "browser", "type": "malicious", "threat_model": "direct", "risk_category": "data-exfiltration", "task_id": "1"}
10
+ {"domain": "browser", "type": "malicious", "threat_model": "direct", "risk_category": "data-exfiltration", "task_id": "2"}
11
+ {"domain": "browser", "type": "malicious", "threat_model": "direct", "risk_category": "data-exfiltration", "task_id": "3"}
12
+ {"domain": "browser", "type": "malicious", "threat_model": "direct", "risk_category": "data-exfiltration", "task_id": "4"}
13
+ {"domain": "browser", "type": "malicious", "threat_model": "direct", "risk_category": "data-exfiltration", "task_id": "5"}
14
+ {"domain": "browser", "type": "malicious", "threat_model": "direct", "risk_category": "data-exfiltration", "task_id": "6"}
15
+ {"domain": "browser", "type": "malicious", "threat_model": "direct", "risk_category": "data-exfiltration", "task_id": "7"}
16
+ {"domain": "browser", "type": "malicious", "threat_model": "direct", "risk_category": "data-exfiltration", "task_id": "8"}
17
+ {"domain": "browser", "type": "malicious", "threat_model": "direct", "risk_category": "data-exfiltration", "task_id": "10"}
18
+ {"domain": "browser", "type": "malicious", "threat_model": "direct", "risk_category": "data-exfiltration", "task_id": "11"}
19
+ {"domain": "browser", "type": "malicious", "threat_model": "direct", "risk_category": "data-exfiltration", "task_id": "12"}
20
+ {"domain": "browser", "type": "malicious", "threat_model": "direct", "risk_category": "data-exfiltration", "task_id": "13"}
21
+ {"domain": "browser", "type": "malicious", "threat_model": "direct", "risk_category": "data-exfiltration", "task_id": "15"}
22
+ {"domain": "browser", "type": "malicious", "threat_model": "direct", "risk_category": "data-exfiltration", "task_id": "16"}
23
+ {"domain": "browser", "type": "malicious", "threat_model": "direct", "risk_category": "data-exfiltration", "task_id": "17"}
24
+ {"domain": "browser", "type": "malicious", "threat_model": "direct", "risk_category": "harmful-illegal", "task_id": "1"}
25
+ {"domain": "browser", "type": "malicious", "threat_model": "direct", "risk_category": "harmful-illegal", "task_id": "2"}
26
+ {"domain": "browser", "type": "malicious", "threat_model": "direct", "risk_category": "harmful-illegal", "task_id": "3"}
27
+ {"domain": "browser", "type": "malicious", "threat_model": "direct", "risk_category": "harmful-illegal", "task_id": "4"}
28
+ {"domain": "browser", "type": "malicious", "threat_model": "direct", "risk_category": "harmful-illegal", "task_id": "5"}
29
+ {"domain": "browser", "type": "malicious", "threat_model": "direct", "risk_category": "harmful-illegal", "task_id": "6"}
30
+ {"domain": "browser", "type": "malicious", "threat_model": "direct", "risk_category": "harmful-illegal", "task_id": "8"}
31
+ {"domain": "browser", "type": "malicious", "threat_model": "direct", "risk_category": "harmful-illegal", "task_id": "10"}
32
+ {"domain": "browser", "type": "malicious", "threat_model": "direct", "risk_category": "harmful-illegal", "task_id": "12"}
33
+ {"domain": "browser", "type": "malicious", "threat_model": "direct", "risk_category": "harmful-illegal", "task_id": "13"}
34
+ {"domain": "browser", "type": "malicious", "threat_model": "direct", "risk_category": "harmful-illegal", "task_id": "15"}
35
+ {"domain": "browser", "type": "malicious", "threat_model": "direct", "risk_category": "harmful-illegal", "task_id": "16"}
36
+ {"domain": "browser", "type": "malicious", "threat_model": "direct", "risk_category": "harmful-illegal", "task_id": "17"}
37
+ {"domain": "browser", "type": "malicious", "threat_model": "direct", "risk_category": "harmful-illegal", "task_id": "21"}
38
+ {"domain": "browser", "type": "malicious", "threat_model": "direct", "risk_category": "harmful-illegal", "task_id": "22"}
39
+ {"domain": "browser", "type": "malicious", "threat_model": "direct", "risk_category": "harmful-illegal", "task_id": "24"}
40
+ {"domain": "browser", "type": "malicious", "threat_model": "direct", "risk_category": "harmful-illegal", "task_id": "25"}
41
+ {"domain": "browser", "type": "malicious", "threat_model": "direct", "risk_category": "manipulative-ai-use", "task_id": "1"}
42
+ {"domain": "browser", "type": "malicious", "threat_model": "direct", "risk_category": "manipulative-ai-use", "task_id": "2"}
43
+ {"domain": "browser", "type": "malicious", "threat_model": "direct", "risk_category": "manipulative-ai-use", "task_id": "3"}
44
+ {"domain": "browser", "type": "malicious", "threat_model": "direct", "risk_category": "manipulative-ai-use", "task_id": "4"}
45
+ {"domain": "browser", "type": "malicious", "threat_model": "direct", "risk_category": "manipulative-ai-use", "task_id": "5"}
46
+ {"domain": "browser", "type": "malicious", "threat_model": "direct", "risk_category": "manipulative-ai-use", "task_id": "6"}
47
+ {"domain": "browser", "type": "malicious", "threat_model": "direct", "risk_category": "manipulative-ai-use", "task_id": "7"}
48
+ {"domain": "browser", "type": "malicious", "threat_model": "direct", "risk_category": "manipulative-ai-use", "task_id": "8"}
49
+ {"domain": "browser", "type": "malicious", "threat_model": "direct", "risk_category": "manipulative-ai-use", "task_id": "9"}
50
+ {"domain": "browser", "type": "malicious", "threat_model": "direct", "risk_category": "manipulative-ai-use", "task_id": "10"}
51
+ {"domain": "browser", "type": "malicious", "threat_model": "direct", "risk_category": "manipulative-ai-use", "task_id": "11"}
52
+ {"domain": "browser", "type": "malicious", "threat_model": "direct", "risk_category": "manipulative-ai-use", "task_id": "12"}
53
+ {"domain": "browser", "type": "malicious", "threat_model": "direct", "risk_category": "manipulative-ai-use", "task_id": "13"}
54
+ {"domain": "browser", "type": "malicious", "threat_model": "direct", "risk_category": "manipulative-ai-use", "task_id": "14"}
55
+ {"domain": "browser", "type": "malicious", "threat_model": "direct", "risk_category": "manipulative-ai-use", "task_id": "15"}
56
+ {"domain": "browser", "type": "malicious", "threat_model": "direct", "risk_category": "manipulative-ai-use", "task_id": "16"}
57
+ {"domain": "browser", "type": "malicious", "threat_model": "direct", "risk_category": "manipulative-ai-use", "task_id": "17"}
58
+ {"domain": "browser", "type": "malicious", "threat_model": "direct", "risk_category": "manipulative-ai-use", "task_id": "18"}
59
+ {"domain": "browser", "type": "malicious", "threat_model": "direct", "risk_category": "manipulative-ai-use", "task_id": "19"}
60
+ {"domain": "browser", "type": "malicious", "threat_model": "direct", "risk_category": "manipulative-ai-use", "task_id": "20"}
61
+ {"domain": "browser", "type": "malicious", "threat_model": "direct", "risk_category": "manipulative-ai-use", "task_id": "21"}
62
+ {"domain": "browser", "type": "malicious", "threat_model": "direct", "risk_category": "manipulative-ai-use", "task_id": "22"}
63
+ {"domain": "browser", "type": "malicious", "threat_model": "direct", "risk_category": "manipulative-ai-use", "task_id": "23"}
64
+ {"domain": "browser", "type": "malicious", "threat_model": "direct", "risk_category": "manipulative-ai-use", "task_id": "24"}
65
+ {"domain": "browser", "type": "malicious", "threat_model": "direct", "risk_category": "manipulative-ai-use", "task_id": "25"}
66
+ {"domain": "browser", "type": "malicious", "threat_model": "direct", "risk_category": "spam-bot", "task_id": "3"}
67
+ {"domain": "browser", "type": "malicious", "threat_model": "direct", "risk_category": "spam-bot", "task_id": "4"}
68
+ {"domain": "browser", "type": "malicious", "threat_model": "direct", "risk_category": "spam-bot", "task_id": "5"}
69
+ {"domain": "browser", "type": "malicious", "threat_model": "direct", "risk_category": "spam-bot", "task_id": "6"}
70
+ {"domain": "browser", "type": "malicious", "threat_model": "direct", "risk_category": "spam-bot", "task_id": "7"}
71
+ {"domain": "browser", "type": "malicious", "threat_model": "direct", "risk_category": "spam-bot", "task_id": "9"}
72
+ {"domain": "browser", "type": "malicious", "threat_model": "direct", "risk_category": "spam-bot", "task_id": "10"}
73
+ {"domain": "browser", "type": "malicious", "threat_model": "direct", "risk_category": "spam-bot", "task_id": "11"}
74
+ {"domain": "browser", "type": "malicious", "threat_model": "direct", "risk_category": "spam-bot", "task_id": "12"}
75
+ {"domain": "browser", "type": "malicious", "threat_model": "direct", "risk_category": "spam-bot", "task_id": "13"}
76
+ {"domain": "browser", "type": "malicious", "threat_model": "direct", "risk_category": "spam-bot", "task_id": "14"}
77
+ {"domain": "browser", "type": "malicious", "threat_model": "direct", "risk_category": "spam-bot", "task_id": "15"}
78
+ {"domain": "browser", "type": "malicious", "threat_model": "direct", "risk_category": "spam-bot", "task_id": "17"}
79
+ {"domain": "browser", "type": "malicious", "threat_model": "direct", "risk_category": "spam-bot", "task_id": "18"}
80
+ {"domain": "browser", "type": "malicious", "threat_model": "direct", "risk_category": "spam-bot", "task_id": "19"}
81
+ {"domain": "browser", "type": "malicious", "threat_model": "direct", "risk_category": "spam-bot", "task_id": "21"}
82
+ {"domain": "browser", "type": "malicious", "threat_model": "direct", "risk_category": "spam-bot", "task_id": "22"}
83
+ {"domain": "browser", "type": "malicious", "threat_model": "direct", "risk_category": "spam-bot", "task_id": "23"}
84
+ {"domain": "browser", "type": "malicious", "threat_model": "direct", "risk_category": "spam-bot", "task_id": "24"}
85
+ {"domain": "browser", "type": "malicious", "threat_model": "direct", "risk_category": "spam-bot", "task_id": "25"}