decodingtrust-agent-sdk 0.1.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (374) hide show
  1. agent/__init__.py +30 -0
  2. agent/claudesdk/__init__.py +8 -0
  3. agent/claudesdk/example.py +221 -0
  4. agent/claudesdk/src/__init__.py +8 -0
  5. agent/claudesdk/src/agent.py +400 -0
  6. agent/claudesdk/src/mcp_proxy.py +409 -0
  7. agent/claudesdk/src/utils.py +420 -0
  8. agent/googleadk/__init__.py +15 -0
  9. agent/googleadk/example.py +237 -0
  10. agent/googleadk/src/__init__.py +12 -0
  11. agent/googleadk/src/agent.py +401 -0
  12. agent/googleadk/src/mcp_wrapper.py +163 -0
  13. agent/googleadk/src/utils.py +602 -0
  14. agent/langchain/__init__.py +8 -0
  15. agent/langchain/example.py +213 -0
  16. agent/langchain/src/__init__.py +8 -0
  17. agent/langchain/src/agent.py +645 -0
  18. agent/langchain/src/utils.py +433 -0
  19. agent/openaisdk/__init__.py +17 -0
  20. agent/openaisdk/example.py +228 -0
  21. agent/openaisdk/src/__init__.py +12 -0
  22. agent/openaisdk/src/agent.py +491 -0
  23. agent/openaisdk/src/agent_wrapper.py +143 -0
  24. agent/openaisdk/src/mcp_wrapper.py +395 -0
  25. agent/openaisdk/src/utils.py +493 -0
  26. agent/openclaw/__init__.py +10 -0
  27. agent/openclaw/example.py +251 -0
  28. agent/openclaw/src/__init__.py +14 -0
  29. agent/openclaw/src/agent.py +930 -0
  30. agent/openclaw/src/helpers/__init__.py +1 -0
  31. agent/openclaw/src/helpers/auth_helpers.py +55 -0
  32. agent/openclaw/src/mcp_proxy.py +564 -0
  33. agent/openclaw/src/plugin_generator.py +231 -0
  34. agent/openclaw/src/utils.py +341 -0
  35. agent/pocketflow/__init__.py +18 -0
  36. agent/pocketflow/example.py +221 -0
  37. agent/pocketflow/prompts/react_agent.py +46 -0
  38. agent/pocketflow/src/__init__.py +6 -0
  39. agent/pocketflow/src/agent.py +507 -0
  40. agent/pocketflow/src/agent_wrapper.py +159 -0
  41. agent/pocketflow/src/async_helper.py +92 -0
  42. agent/pocketflow/src/mcp_react_agent.py +279 -0
  43. agent/pocketflow/src/native_agent.py +74 -0
  44. agent/pocketflow/src/nodes.py +467 -0
  45. benchmark/__init__.py +0 -0
  46. benchmark/browser/benign.jsonl +34 -0
  47. benchmark/browser/direct.jsonl +85 -0
  48. benchmark/browser/indirect.jsonl +82 -0
  49. benchmark/code/benign.jsonl +0 -0
  50. benchmark/code/direct.jsonl +121 -0
  51. benchmark/code/indirect.jsonl +165 -0
  52. benchmark/crm/benign.jsonl +165 -0
  53. benchmark/crm/direct.jsonl +90 -0
  54. benchmark/crm/indirect.jsonl +150 -0
  55. benchmark/customer-service/benign.jsonl +160 -0
  56. benchmark/customer-service/direct.jsonl +100 -0
  57. benchmark/customer-service/indirect.jsonl +101 -0
  58. benchmark/finance/benign.jsonl +0 -0
  59. benchmark/finance/direct.jsonl +200 -0
  60. benchmark/finance/indirect.jsonl +200 -0
  61. benchmark/legal/benign.jsonl +0 -0
  62. benchmark/legal/direct.jsonl +200 -0
  63. benchmark/legal/indirect.jsonl +200 -0
  64. benchmark/macos/benign.jsonl +30 -0
  65. benchmark/macos/direct.jsonl +50 -0
  66. benchmark/macos/indirect.jsonl +50 -0
  67. benchmark/medical/benign.jsonl +642 -0
  68. benchmark/medical/direct.jsonl +229 -0
  69. benchmark/medical/indirect.jsonl +222 -0
  70. benchmark/os-filesystem/benign.jsonl +200 -0
  71. benchmark/os-filesystem/direct.jsonl +200 -0
  72. benchmark/os-filesystem/indirect.jsonl +200 -0
  73. benchmark/research/benign.jsonl +0 -0
  74. benchmark/research/direct.jsonl +119 -0
  75. benchmark/research/indirect.jsonl +125 -0
  76. benchmark/telecom/benign.jsonl +120 -0
  77. benchmark/telecom/direct.jsonl +161 -0
  78. benchmark/telecom/indirect.jsonl +166 -0
  79. benchmark/travel/benign.jsonl +130 -0
  80. benchmark/travel/direct.jsonl +105 -0
  81. benchmark/travel/indirect.jsonl +120 -0
  82. benchmark/windows/benign.jsonl +100 -0
  83. benchmark/windows/direct.jsonl +140 -0
  84. benchmark/windows/indirect.jsonl +107 -0
  85. benchmark/workflow/benign.jsonl +335 -0
  86. benchmark/workflow/direct.jsonl +78 -0
  87. benchmark/workflow/indirect.jsonl +107 -0
  88. cli/__init__.py +5 -0
  89. cli/main.py +182 -0
  90. cli/scaffold.py +334 -0
  91. decodingtrust_agent_sdk-0.1.0.dist-info/METADATA +642 -0
  92. decodingtrust_agent_sdk-0.1.0.dist-info/RECORD +374 -0
  93. decodingtrust_agent_sdk-0.1.0.dist-info/WHEEL +5 -0
  94. decodingtrust_agent_sdk-0.1.0.dist-info/entry_points.txt +2 -0
  95. decodingtrust_agent_sdk-0.1.0.dist-info/licenses/LICENSE +201 -0
  96. decodingtrust_agent_sdk-0.1.0.dist-info/top_level.txt +6 -0
  97. dt_arena/config/env.yaml +515 -0
  98. dt_arena/config/injection_mcp.yaml +430 -0
  99. dt_arena/config/mcp.yaml +642 -0
  100. dt_arena/envs/arxiv/docker-compose-hub.yml +31 -0
  101. dt_arena/envs/arxiv/docker-compose.yml +36 -0
  102. dt_arena/envs/atlassian/docker/docker-compose.dev.yml +65 -0
  103. dt_arena/envs/atlassian/docker/docker-compose.yml +53 -0
  104. dt_arena/envs/atlassian/docker-compose-hub.yml +57 -0
  105. dt_arena/envs/atlassian/docker-compose.yml +72 -0
  106. dt_arena/envs/bigquery/docker-compose.yml +20 -0
  107. dt_arena/envs/booking/docker-compose.yml +59 -0
  108. dt_arena/envs/calendar/docker-compose-hub.yml +30 -0
  109. dt_arena/envs/calendar/docker-compose.yml +42 -0
  110. dt_arena/envs/custom-website/docker-compose.yml +6 -0
  111. dt_arena/envs/customer_service/docker-compose.yml +59 -0
  112. dt_arena/envs/databricks/docker-compose-hub.yml +47 -0
  113. dt_arena/envs/databricks/docker-compose.yml +51 -0
  114. dt_arena/envs/ecommerce/docker-compose.yml +6 -0
  115. dt_arena/envs/ers/docker-compose.yml +36 -0
  116. dt_arena/envs/ers/hrms/docker/docker-compose.yml +31 -0
  117. dt_arena/envs/finance/docker-compose.yml +23 -0
  118. dt_arena/envs/github/docker/docker-compose-hub.yml +50 -0
  119. dt_arena/envs/github/docker/docker-compose.yml +50 -0
  120. dt_arena/envs/gmail/docker-compose-hub.yml +51 -0
  121. dt_arena/envs/gmail/docker-compose.yml +65 -0
  122. dt_arena/envs/google-form/docker-compose-hub.yml +33 -0
  123. dt_arena/envs/google-form/docker-compose.yml +41 -0
  124. dt_arena/envs/googledocs/docker-compose-hub.yml +61 -0
  125. dt_arena/envs/googledocs/docker-compose.yml +78 -0
  126. dt_arena/envs/hospital/docker-compose-hub.yml +25 -0
  127. dt_arena/envs/hospital/docker-compose.yml +27 -0
  128. dt_arena/envs/legal/docker-compose.yml +22 -0
  129. dt_arena/envs/linkedin/docker-compose.yml +63 -0
  130. dt_arena/envs/macos/docker-compose.yml +79 -0
  131. dt_arena/envs/os-filesystem/docker-compose-hub.yml +16 -0
  132. dt_arena/envs/os-filesystem/docker-compose.yml +20 -0
  133. dt_arena/envs/paypal/docker-compose-hub.yml +48 -0
  134. dt_arena/envs/paypal/docker-compose.yml +63 -0
  135. dt_arena/envs/research/docker-compose-hub.yml +13 -0
  136. dt_arena/envs/research/docker-compose.yml +24 -0
  137. dt_arena/envs/salesforce_crm/docker-compose-hub.yaml +45 -0
  138. dt_arena/envs/salesforce_crm/docker-compose.yaml +49 -0
  139. dt_arena/envs/slack/docker-compose-hub.yml +28 -0
  140. dt_arena/envs/slack/docker-compose.yml +41 -0
  141. dt_arena/envs/snowflake/docker-compose-hub.yml +41 -0
  142. dt_arena/envs/snowflake/docker-compose.yml +44 -0
  143. dt_arena/envs/telecom/docker-compose-hub.yml +16 -0
  144. dt_arena/envs/telecom/docker-compose.yml +17 -0
  145. dt_arena/envs/telegram/docker-compose-hub.yml +57 -0
  146. dt_arena/envs/telegram/docker-compose.yml +62 -0
  147. dt_arena/envs/terminal/docker-compose-hub.yml +12 -0
  148. dt_arena/envs/terminal/docker-compose.yml +26 -0
  149. dt_arena/envs/travel/docker-compose-hub.yml +19 -0
  150. dt_arena/envs/travel/docker-compose.yml +19 -0
  151. dt_arena/envs/whatsapp/docker-compose-hub.yml +61 -0
  152. dt_arena/envs/whatsapp/docker-compose.yml +78 -0
  153. dt_arena/envs/windows/docker-compose.yml +71 -0
  154. dt_arena/envs/zoom/docker-compose-hub.yml +27 -0
  155. dt_arena/envs/zoom/docker-compose.yml +40 -0
  156. dt_arena/injection_mcp_server/atlassian/env_injection.py +134 -0
  157. dt_arena/injection_mcp_server/calendar/env_injection.py +217 -0
  158. dt_arena/injection_mcp_server/custom_website/env_injection.py +97 -0
  159. dt_arena/injection_mcp_server/customer_service/env_injection.py +659 -0
  160. dt_arena/injection_mcp_server/databricks/env_injection.py +255 -0
  161. dt_arena/injection_mcp_server/ecommerce/env_injection.py +110 -0
  162. dt_arena/injection_mcp_server/finance/env_injection.py +85 -0
  163. dt_arena/injection_mcp_server/github/env_injection.py +206 -0
  164. dt_arena/injection_mcp_server/gmail/env_injection.py +211 -0
  165. dt_arena/injection_mcp_server/google_form/env_injection.py +186 -0
  166. dt_arena/injection_mcp_server/googledocs/env_injection.py +44 -0
  167. dt_arena/injection_mcp_server/hospital/env_injection.py +43 -0
  168. dt_arena/injection_mcp_server/legal/env_injection.py +229 -0
  169. dt_arena/injection_mcp_server/macos/env_injection.py +272 -0
  170. dt_arena/injection_mcp_server/os-filesystem/env_injection.py +341 -0
  171. dt_arena/injection_mcp_server/paypal/env_injection.py +268 -0
  172. dt_arena/injection_mcp_server/research/env_injection.py +616 -0
  173. dt_arena/injection_mcp_server/salesforce/env_injection.py +514 -0
  174. dt_arena/injection_mcp_server/slack/env_injection.py +265 -0
  175. dt_arena/injection_mcp_server/snowflake/env_injection.py +230 -0
  176. dt_arena/injection_mcp_server/telecom/env_injection.py +503 -0
  177. dt_arena/injection_mcp_server/telegram/env_injection.py +171 -0
  178. dt_arena/injection_mcp_server/terminal/env_injection.py +523 -0
  179. dt_arena/injection_mcp_server/travel/env_injection.py +173 -0
  180. dt_arena/injection_mcp_server/whatsapp/env_injection.py +185 -0
  181. dt_arena/injection_mcp_server/windows/env_injection.py +943 -0
  182. dt_arena/injection_mcp_server/zoom/env_injection.py +216 -0
  183. dt_arena/mcp_server/atlassian/main.py +1554 -0
  184. dt_arena/mcp_server/atlassian/test_server.py +66 -0
  185. dt_arena/mcp_server/bigquery/main.py +333 -0
  186. dt_arena/mcp_server/booking/main.py +310 -0
  187. dt_arena/mcp_server/browser/main.py +1741 -0
  188. dt_arena/mcp_server/calendar/example_multi_user.py +162 -0
  189. dt_arena/mcp_server/calendar/main.py +792 -0
  190. dt_arena/mcp_server/calendar/test_mcp.py +135 -0
  191. dt_arena/mcp_server/customer_service/main.py +1063 -0
  192. dt_arena/mcp_server/databricks/main.py +566 -0
  193. dt_arena/mcp_server/databricks/probe.py +102 -0
  194. dt_arena/mcp_server/ers/main.py +845 -0
  195. dt_arena/mcp_server/finance/__init__.py +87 -0
  196. dt_arena/mcp_server/finance/core/__init__.py +12 -0
  197. dt_arena/mcp_server/finance/core/data_loader.py +558 -0
  198. dt_arena/mcp_server/finance/core/portfolio.py +565 -0
  199. dt_arena/mcp_server/finance/evaluation/__init__.py +20 -0
  200. dt_arena/mcp_server/finance/evaluation/evaluator.py +217 -0
  201. dt_arena/mcp_server/finance/evaluation/logger.py +137 -0
  202. dt_arena/mcp_server/finance/injection/__init__.py +66 -0
  203. dt_arena/mcp_server/finance/injection/config.py +176 -0
  204. dt_arena/mcp_server/finance/injection/content.py +755 -0
  205. dt_arena/mcp_server/finance/injection/html.py +409 -0
  206. dt_arena/mcp_server/finance/injection/locations.py +167 -0
  207. dt_arena/mcp_server/finance/injection/methods.py +193 -0
  208. dt_arena/mcp_server/finance/injection/presets.py +1023 -0
  209. dt_arena/mcp_server/finance/main.py +361 -0
  210. dt_arena/mcp_server/finance/run_mcp.py +21 -0
  211. dt_arena/mcp_server/finance/run_web.py +26 -0
  212. dt_arena/mcp_server/finance/server/__init__.py +41 -0
  213. dt_arena/mcp_server/finance/server/extractor.py +1453 -0
  214. dt_arena/mcp_server/finance/server/extractor_minimal.py +292 -0
  215. dt_arena/mcp_server/finance/server/extractor_simple.py +1164 -0
  216. dt_arena/mcp_server/finance/server/injection_mcp.py +865 -0
  217. dt_arena/mcp_server/finance/server/mcp.py +451 -0
  218. dt_arena/mcp_server/finance/server/tools/__init__.py +23 -0
  219. dt_arena/mcp_server/finance/server/tools/account.py +88 -0
  220. dt_arena/mcp_server/finance/server/tools/browsing.py +328 -0
  221. dt_arena/mcp_server/finance/server/tools/social.py +73 -0
  222. dt_arena/mcp_server/finance/server/tools/trading.py +242 -0
  223. dt_arena/mcp_server/finance/server/tools/utility.py +49 -0
  224. dt_arena/mcp_server/finance/server/web.py +2139 -0
  225. dt_arena/mcp_server/finance/tasks/benchmark/__init__.py +28 -0
  226. dt_arena/mcp_server/finance/tasks/benchmark/attack_pool.py +3026 -0
  227. dt_arena/mcp_server/finance/tasks/benchmark/attack_runner.py +1315 -0
  228. dt_arena/mcp_server/finance/tasks/benchmark/finra_requirements.py +1335 -0
  229. dt_arena/mcp_server/finance/tasks/benchmark/finra_tasks.py +3665 -0
  230. dt_arena/mcp_server/finance/tasks/benchmark/malicious_tasks.py +2673 -0
  231. dt_arena/mcp_server/finance/tasks/redteam_suite/run_redteam_suite.py +1713 -0
  232. dt_arena/mcp_server/finance/test_mcp_tools.py +476 -0
  233. dt_arena/mcp_server/github/main.py +441 -0
  234. dt_arena/mcp_server/gmail/main.py +1004 -0
  235. dt_arena/mcp_server/google_form/main.py +141 -0
  236. dt_arena/mcp_server/googledocs/main.py +458 -0
  237. dt_arena/mcp_server/hospital/mcp_server.py +458 -0
  238. dt_arena/mcp_server/legal/__init__.py +9 -0
  239. dt_arena/mcp_server/legal/core/__init__.py +14 -0
  240. dt_arena/mcp_server/legal/core/courtlistener_store.py +762 -0
  241. dt_arena/mcp_server/legal/core/data_loader.py +266 -0
  242. dt_arena/mcp_server/legal/core/document_store.py +197 -0
  243. dt_arena/mcp_server/legal/core/matter_manager.py +466 -0
  244. dt_arena/mcp_server/legal/main.py +89 -0
  245. dt_arena/mcp_server/legal/scripts/collect_data.py +988 -0
  246. dt_arena/mcp_server/legal/server/__init__.py +14 -0
  247. dt_arena/mcp_server/legal/server/mcp.py +2330 -0
  248. dt_arena/mcp_server/macos/client_test.py +270 -0
  249. dt_arena/mcp_server/macos/mcp_server.py +285 -0
  250. dt_arena/mcp_server/os-filesystem/main.py +1380 -0
  251. dt_arena/mcp_server/paypal/main.py +501 -0
  252. dt_arena/mcp_server/research/main.py +777 -0
  253. dt_arena/mcp_server/salesforce/main.py +2006 -0
  254. dt_arena/mcp_server/slack/main.py +318 -0
  255. dt_arena/mcp_server/snowflake/main.py +612 -0
  256. dt_arena/mcp_server/snowflake/probe.py +183 -0
  257. dt_arena/mcp_server/telecom/mcp_client.py +423 -0
  258. dt_arena/mcp_server/telecom/mcp_server.py +1059 -0
  259. dt_arena/mcp_server/telegram/main.py +338 -0
  260. dt_arena/mcp_server/terminal/main.py +163 -0
  261. dt_arena/mcp_server/travel/client_test.py +16 -0
  262. dt_arena/mcp_server/travel/mcp_server.py +404 -0
  263. dt_arena/mcp_server/whatsapp/main.py +318 -0
  264. dt_arena/mcp_server/windows/client_test.py +270 -0
  265. dt_arena/mcp_server/windows/mcp_server.py +218 -0
  266. dt_arena/mcp_server/zoom/main.py +466 -0
  267. dt_arena/src/__init__.py +0 -0
  268. dt_arena/src/hooks/__init__.py +0 -0
  269. dt_arena/src/hooks/audit_log.py +30 -0
  270. dt_arena/src/hooks/hooks.json +3 -0
  271. dt_arena/src/run_benign.py +142 -0
  272. dt_arena/src/types/__init__.py +0 -0
  273. dt_arena/src/types/agent.py +441 -0
  274. dt_arena/src/types/attacks.py +2 -0
  275. dt_arena/src/types/environment.py +2 -0
  276. dt_arena/src/types/hooks.py +174 -0
  277. dt_arena/src/types/judge.py +52 -0
  278. dt_arena/src/types/red_teaming_trajectory.py +385 -0
  279. dt_arena/src/types/task.py +260 -0
  280. dt_arena/src/types/trajectory.py +315 -0
  281. dt_arena/utils/__init__.py +1 -0
  282. dt_arena/utils/atlassian/__init__.py +27 -0
  283. dt_arena/utils/atlassian/helpers.py +520 -0
  284. dt_arena/utils/bigquery/__init__.py +1 -0
  285. dt_arena/utils/bigquery/helpers.py +246 -0
  286. dt_arena/utils/calendar/__init__.py +1 -0
  287. dt_arena/utils/calendar/helpers.py +87 -0
  288. dt_arena/utils/customer_service/__init__.py +17 -0
  289. dt_arena/utils/customer_service/cs_env_client.py +940 -0
  290. dt_arena/utils/customer_service/helpers.py +339 -0
  291. dt_arena/utils/customer_service/judges/__init__.py +20 -0
  292. dt_arena/utils/customer_service/judges/abusive_comms.py +149 -0
  293. dt_arena/utils/customer_service/judges/bulk_destructive.py +202 -0
  294. dt_arena/utils/customer_service/judges/cross_customer.py +481 -0
  295. dt_arena/utils/customer_service/judges/email_exfiltration.py +320 -0
  296. dt_arena/utils/customer_service/judges/financial_fraud.py +221 -0
  297. dt_arena/utils/customer_service/judges/llm_utils.py +264 -0
  298. dt_arena/utils/customer_service/judges/pii_disclosure.py +837 -0
  299. dt_arena/utils/customer_service/judges/policy_misrep.py +44 -0
  300. dt_arena/utils/customer_service/judges/text_utils.py +21 -0
  301. dt_arena/utils/databricks/__init__.py +2 -0
  302. dt_arena/utils/databricks/helpers.py +210 -0
  303. dt_arena/utils/finance/__init__.py +0 -0
  304. dt_arena/utils/finance/helpers.py +263 -0
  305. dt_arena/utils/github/__init__.py +1 -0
  306. dt_arena/utils/github/helpers.py +249 -0
  307. dt_arena/utils/gmail/__init__.py +1 -0
  308. dt_arena/utils/gmail/helpers.py +344 -0
  309. dt_arena/utils/google_form/__init__.py +2 -0
  310. dt_arena/utils/google_form/helpers.py +133 -0
  311. dt_arena/utils/legal/__init__.py +0 -0
  312. dt_arena/utils/legal/helpers.py +228 -0
  313. dt_arena/utils/macos/__init__.py +0 -0
  314. dt_arena/utils/macos/env_setup.py +215 -0
  315. dt_arena/utils/macos/helpers.py +61 -0
  316. dt_arena/utils/os_filesystem/__init__.py +1 -0
  317. dt_arena/utils/os_filesystem/helpers.py +366 -0
  318. dt_arena/utils/paypal/__init__.py +1 -0
  319. dt_arena/utils/paypal/helpers.py +178 -0
  320. dt_arena/utils/port_allocator.py +266 -0
  321. dt_arena/utils/research/__init__.py +0 -0
  322. dt_arena/utils/research/helpers.py +251 -0
  323. dt_arena/utils/salesforce/__init__.py +1 -0
  324. dt_arena/utils/salesforce/helpers.py +719 -0
  325. dt_arena/utils/slack/__init__.py +1 -0
  326. dt_arena/utils/slack/helpers.py +176 -0
  327. dt_arena/utils/snowflake/__init__.py +1 -0
  328. dt_arena/utils/snowflake/helpers.py +166 -0
  329. dt_arena/utils/telecom/__init__.py +1 -0
  330. dt_arena/utils/telecom/helpers.py +760 -0
  331. dt_arena/utils/telegram/__init__.py +0 -0
  332. dt_arena/utils/telegram/helpers.py +174 -0
  333. dt_arena/utils/terminal/__init__.py +0 -0
  334. dt_arena/utils/terminal/helpers.py +20 -0
  335. dt_arena/utils/travel/__init__.py +0 -0
  336. dt_arena/utils/travel/env_client.py +537 -0
  337. dt_arena/utils/travel/llm_judge.py +137 -0
  338. dt_arena/utils/travel/prompts.py +64 -0
  339. dt_arena/utils/utils/__init__.py +122 -0
  340. dt_arena/utils/whatsapp/__init__.py +0 -0
  341. dt_arena/utils/whatsapp/helpers.py +226 -0
  342. dt_arena/utils/windows/__init__.py +0 -0
  343. dt_arena/utils/windows/env_reset.py +224 -0
  344. dt_arena/utils/windows/env_setup.py +280 -0
  345. dt_arena/utils/windows/exfil_helpers.py +170 -0
  346. dt_arena/utils/windows/helpers.py +74 -0
  347. dt_arena/utils/zoom/__init__.py +1 -0
  348. dt_arena/utils/zoom/helpers.py +70 -0
  349. eval/__init__.py +1 -0
  350. eval/evaluation.py +426 -0
  351. eval/task_runner.py +449 -0
  352. utils/__init__.py +148 -0
  353. utils/agent_helpers.py +308 -0
  354. utils/agent_wrapper.py +189 -0
  355. utils/compose_utils.py +135 -0
  356. utils/config.py +77 -0
  357. utils/env_helpers.py +104 -0
  358. utils/eval_stats.py +88 -0
  359. utils/injection_helpers.py +429 -0
  360. utils/injection_mcp_helpers.py +152 -0
  361. utils/judge_helpers.py +181 -0
  362. utils/judge_utils.py +472 -0
  363. utils/llm.py +196 -0
  364. utils/logging.py +45 -0
  365. utils/mcp_helpers.py +232 -0
  366. utils/mcp_manager.py +235 -0
  367. utils/memory_guard.py +18 -0
  368. utils/red_teaming_sandbox.py +476 -0
  369. utils/reset_helpers.py +318 -0
  370. utils/resource_manager.py +370 -0
  371. utils/skill_helpers.py +447 -0
  372. utils/task_executor.py +904 -0
  373. utils/task_helpers.py +270 -0
  374. utils/template_helpers.py +179 -0
@@ -0,0 +1,231 @@
1
+ import os
2
+ import json
3
+ import shutil
4
+ import tempfile
5
+ import subprocess
6
+ from typing import Dict, List, Any, Optional
7
+ from dataclasses import dataclass
8
+
9
+
10
+ @dataclass
11
+ class MCPTool:
12
+ """Represents an MCP tool definition."""
13
+ name: str
14
+ description: str
15
+ input_schema: Dict[str, Any]
16
+
17
+
18
+ @dataclass
19
+ class MCPServerTools:
20
+ """MCP server with its discovered tools."""
21
+ name: str
22
+ url: str
23
+ tools: List[MCPTool]
24
+
25
+
26
+ class StaticPluginGenerator:
27
+ """Generates static OpenClaw plugins for MCP servers."""
28
+
29
+ def __init__(self, extensions_dir: Optional[str] = None, debug: bool = False):
30
+ """
31
+ Initialize the plugin generator.
32
+
33
+ Args:
34
+ extensions_dir: Directory for OpenClaw extensions.
35
+ Defaults to ~/.openclaw/extensions
36
+ debug: Enable debug output
37
+ """
38
+ self.extensions_dir = extensions_dir or os.path.expanduser(
39
+ "~/.openclaw/extensions"
40
+ )
41
+ self.debug = debug
42
+ self._generated_plugins: List[str] = []
43
+
44
+ def generate_plugin(
45
+ self,
46
+ servers: List[MCPServerTools],
47
+ plugin_id: str,
48
+ ) -> str:
49
+ """
50
+ Generate a static OpenClaw plugin for the given MCP servers.
51
+
52
+ Args:
53
+ servers: List of MCP servers with their tools
54
+ plugin_id: Unique identifier for this plugin
55
+
56
+ Returns:
57
+ Path to the installed plugin directory
58
+ """
59
+ # Create temp directory for plugin
60
+ plugin_dir = tempfile.mkdtemp(prefix=f"openclaw_plugin_{plugin_id}_")
61
+
62
+ try:
63
+ # Generate plugin files
64
+ self._write_package_json(plugin_dir, plugin_id)
65
+ self._write_manifest(plugin_dir, plugin_id, servers)
66
+ self._write_index_ts(plugin_dir, servers)
67
+
68
+ # Install the plugin
69
+ installed_path = self._install_plugin(plugin_dir, plugin_id)
70
+ self._generated_plugins.append(plugin_id)
71
+
72
+ if self.debug:
73
+ print(f"[PluginGenerator] Generated plugin '{plugin_id}' at {installed_path}")
74
+
75
+ return installed_path
76
+
77
+ finally:
78
+ # Clean up temp directory
79
+ shutil.rmtree(plugin_dir, ignore_errors=True)
80
+
81
+ def _write_package_json(self, plugin_dir: str, plugin_id: str) -> None:
82
+ """Write package.json for the plugin."""
83
+ package = {
84
+ "name": plugin_id,
85
+ "version": "1.0.0",
86
+ "type": "module",
87
+ "openclaw": {
88
+ "extensions": ["./index.ts"]
89
+ },
90
+ "dependencies": {}
91
+ }
92
+
93
+ with open(os.path.join(plugin_dir, "package.json"), "w") as f:
94
+ json.dump(package, f, indent=2)
95
+
96
+ def _write_manifest(
97
+ self,
98
+ plugin_dir: str,
99
+ plugin_id: str,
100
+ servers: List[MCPServerTools]
101
+ ) -> None:
102
+ """Write openclaw.plugin.json manifest."""
103
+ # Build server config schema
104
+ server_names = [s.name for s in servers]
105
+
106
+ manifest = {
107
+ "id": plugin_id,
108
+ "name": f"MCP Tools ({', '.join(server_names)})",
109
+ "description": f"Static MCP tools for servers: {', '.join(server_names)}",
110
+ "configSchema": {
111
+ "type": "object",
112
+ "properties": {}
113
+ }
114
+ }
115
+
116
+ with open(os.path.join(plugin_dir, "openclaw.plugin.json"), "w") as f:
117
+ json.dump(manifest, f, indent=2)
118
+
119
+ def _write_index_ts(self, plugin_dir: str, servers: List[MCPServerTools]) -> None:
120
+ """Write the main plugin TypeScript file."""
121
+
122
+ # Generate tool registration code for each server
123
+ tool_registrations = []
124
+
125
+ for server in servers:
126
+ for tool in server.tools:
127
+ tool_name = f"{server.name}_{tool.name}"
128
+
129
+ # Escape strings for TypeScript
130
+ description = tool.description.replace("\\", "\\\\").replace('"', '\\"').replace("\n", "\\n")
131
+ schema_json = json.dumps(tool.input_schema)
132
+
133
+ registration = f'''
134
+ // Tool: {tool_name} from {server.name}
135
+ api.registerTool({{
136
+ name: "{tool_name}",
137
+ description: "{description}",
138
+ parameters: {schema_json},
139
+ async execute(_id: string, params: unknown) {{
140
+ try {{
141
+ const response = await fetch("{server.url}", {{
142
+ method: "POST",
143
+ headers: {{ "Content-Type": "application/json" }},
144
+ body: JSON.stringify({{
145
+ jsonrpc: "2.0",
146
+ id: Date.now(),
147
+ method: "tools/call",
148
+ params: {{
149
+ name: "{tool.name}",
150
+ arguments: params
151
+ }}
152
+ }})
153
+ }});
154
+
155
+ const result = await response.json();
156
+
157
+ if (result.error) {{
158
+ return {{
159
+ content: [{{ type: "text", text: `Error: ${{result.error.message}}` }}],
160
+ isError: true
161
+ }};
162
+ }}
163
+
164
+ const text = result.result?.content
165
+ ?.map((c: any) => c.text ?? c.data ?? "")
166
+ .join("\\n") ?? JSON.stringify(result.result);
167
+
168
+ return {{
169
+ content: [{{ type: "text", text }}],
170
+ isError: false
171
+ }};
172
+ }} catch (err) {{
173
+ return {{
174
+ content: [{{ type: "text", text: `Error calling {tool_name}: ${{err}}` }}],
175
+ isError: true
176
+ }};
177
+ }}
178
+ }}
179
+ }});
180
+ '''
181
+ tool_registrations.append(registration)
182
+
183
+ # Combine into full plugin code (minimal logging to verify loading)
184
+ total_tools = sum(len(s.tools) for s in servers)
185
+ plugin_code = f'''// Auto-generated static MCP tools plugin
186
+ // Generated for servers: {", ".join(s.name for s in servers)}
187
+
188
+ export default function (api: any) {{
189
+ console.log("[OpenClaw-MCP] Loading {total_tools} tools...");
190
+ {"".join(tool_registrations)}
191
+ console.log("[OpenClaw-MCP] Done.");
192
+ }}
193
+ '''
194
+
195
+ with open(os.path.join(plugin_dir, "index.ts"), "w") as f:
196
+ f.write(plugin_code)
197
+
198
+ def _install_plugin(self, plugin_dir: str, plugin_id: str) -> str:
199
+ """
200
+ Install the plugin to OpenClaw extensions directory.
201
+ """
202
+ # Remove existing plugin if present
203
+ target_dir = os.path.join(self.extensions_dir, plugin_id)
204
+ if os.path.exists(target_dir):
205
+ shutil.rmtree(target_dir)
206
+
207
+ # Copy plugin to extensions directory
208
+ os.makedirs(self.extensions_dir, exist_ok=True)
209
+ shutil.copytree(plugin_dir, target_dir)
210
+
211
+ if self.debug:
212
+ print(f"[PluginGenerator] Installed plugin to: {target_dir}")
213
+
214
+ return target_dir
215
+
216
+ def uninstall_plugin(self, plugin_id: str) -> None:
217
+ """Remove a generated plugin."""
218
+ target_dir = os.path.join(self.extensions_dir, plugin_id)
219
+
220
+ if os.path.exists(target_dir):
221
+ shutil.rmtree(target_dir)
222
+ if self.debug:
223
+ print(f"[PluginGenerator] Removed plugin directory: {target_dir}")
224
+
225
+ if plugin_id in self._generated_plugins:
226
+ self._generated_plugins.remove(plugin_id)
227
+
228
+ def cleanup_all(self) -> None:
229
+ """Remove all plugins generated by this instance."""
230
+ for plugin_id in list(self._generated_plugins):
231
+ self.uninstall_plugin(plugin_id)
@@ -0,0 +1,341 @@
1
+ import os
2
+ import json
3
+ from datetime import datetime, timezone
4
+ from typing import List, Dict, Any, Optional, Set
5
+
6
+ from dt_arena.src.types.trajectory import Trajectory
7
+
8
+
9
+ class OpenClawTrajectoryConverter:
10
+ """
11
+ Convert OpenClaw session logs to standard trajectory format.
12
+
13
+ The trajectory format is compatible with the DecodingTrust Agent Arena
14
+ evaluation platform and includes:
15
+ - Task info (task_id, instruction, domain, category)
16
+ - Trajectory info (success, step_count, duration, timestamp)
17
+ - Trajectory steps (user, agent, tool messages)
18
+ """
19
+
20
+ def __init__(self, output_dir: str, timestamp: Optional[str] = None):
21
+ self.output_dir = output_dir
22
+ self.timestamp = timestamp
23
+ os.makedirs(self.output_dir, exist_ok=True)
24
+
25
+ def convert_session_log(
26
+ self,
27
+ session_log_path: str,
28
+ metadata: Optional[Dict[str, Any]] = None,
29
+ task_id: Optional[str] = None,
30
+ duration: float = 0.0,
31
+ mcp_tool_lists: Optional[Dict[str, List[Dict[str, Any]]]] = None,
32
+ ) -> Optional[Trajectory]:
33
+ """
34
+ Convert an OpenClaw session JSONL file directly to a Trajectory object.
35
+
36
+ Reads the session log written by OpenClaw CLI and produces the standard
37
+ trajectory format (matching openaisdk output): each tool call yields
38
+ exactly 2 steps — an agent action step and a tool return step.
39
+
40
+ Args:
41
+ session_log_path: Path to the session JSONL file
42
+ metadata: Optional trace metadata (task_id, instruction, domain, etc.)
43
+ task_id: Optional task ID override
44
+ duration: Execution duration in seconds
45
+ mcp_tool_lists: Optional dict mapping server name to list of tool dicts
46
+ (each with "name" and "description"). Stored in traj_info.metadata
47
+ as available tools per MCP server.
48
+
49
+ Returns:
50
+ Trajectory object, or None if file is missing / empty
51
+ """
52
+ if not os.path.exists(session_log_path):
53
+ return None
54
+
55
+ meta = metadata or {}
56
+
57
+ # Create Trajectory
58
+ traj = Trajectory(
59
+ task_id=task_id or meta.get("task_id", "unknown"),
60
+ original_instruction=meta.get("instruction", ""),
61
+ malicious_instruction=meta.get("malicious_goal"),
62
+ domain=meta.get("domain"),
63
+ risk_category=meta.get("category"),
64
+ )
65
+ traj.data["traj_info"]["duration"] = round(duration, 3)
66
+ traj.data["traj_info"]["timestamp"] = datetime.now(timezone.utc).isoformat()
67
+ traj.data["traj_info"]["metadata"] = {
68
+ "framework": "openclaw",
69
+ }
70
+
71
+ # Store MCP tool lists as metadata (not trajectory steps)
72
+ if mcp_tool_lists:
73
+ traj.data["traj_info"]["metadata"]["mcp_tools"] = mcp_tool_lists
74
+
75
+ # Parse session JSONL / runtime trajectory JSONL
76
+ entries: List[Dict[str, Any]] = []
77
+ with open(session_log_path, "r", encoding="utf-8") as f:
78
+ for line in f:
79
+ line = line.strip()
80
+ if not line:
81
+ continue
82
+ try:
83
+ entry = json.loads(line)
84
+ except json.JSONDecodeError:
85
+ continue
86
+ if isinstance(entry, dict):
87
+ entries.append(entry)
88
+
89
+ if not entries:
90
+ return None
91
+
92
+ self._convert_runtime_trajectory_entries(traj, entries)
93
+
94
+ # Save trajectory file
95
+ if self.timestamp:
96
+ filename = f"{self.timestamp}.json"
97
+ else:
98
+ filename = f"{task_id or 'unknown'}.json"
99
+ output_path = os.path.join(self.output_dir, filename)
100
+ with open(output_path, "w", encoding="utf-8") as f:
101
+ json.dump(traj.to_dict(), f, indent=2, ensure_ascii=False)
102
+
103
+ return traj
104
+
105
+ def _convert_runtime_trajectory_entries(
106
+ self,
107
+ traj: Trajectory,
108
+ entries: List[Dict[str, Any]],
109
+ ) -> None:
110
+ """Convert OpenClaw 2026.4+ runtime trajectory JSONL traces."""
111
+ traj_meta = traj.data["traj_info"].setdefault("metadata", {})
112
+ traj_meta["trace_schema"] = "openclaw-trajectory"
113
+
114
+ last_assistant_text: Optional[str] = None
115
+ seen_user_messages: Set[str] = set()
116
+ seen_tool_call_ids: Set[str] = set()
117
+
118
+ for entry in entries:
119
+ event_type = entry.get("type")
120
+ data = entry.get("data") or {}
121
+
122
+ for key in ("traceId", "sessionId", "provider", "modelId", "modelApi", "workspaceDir", "runId"):
123
+ value = entry.get(key)
124
+ if value is not None:
125
+ traj_meta.setdefault(key, value)
126
+
127
+ if event_type == "trace.metadata":
128
+ harness = data.get("harness")
129
+ if harness:
130
+ traj_meta["harness"] = harness
131
+ invocation = (harness or {}).get("invocation")
132
+ if invocation:
133
+ traj_meta["invocation"] = invocation
134
+
135
+ elif event_type == "context.compiled":
136
+ system_prompt = data.get("systemPrompt")
137
+ if system_prompt:
138
+ traj_meta.setdefault("system_prompt", system_prompt)
139
+ available_tools = data.get("availableTools")
140
+ if available_tools:
141
+ traj_meta["available_tools"] = available_tools
142
+
143
+ elif event_type == "prompt.submitted":
144
+ prompt = data.get("prompt")
145
+ if prompt:
146
+ seen_user_messages.add(prompt)
147
+ traj.append_user_step(
148
+ prompt,
149
+ metadata={
150
+ "source": "openclaw-trajectory",
151
+ "session_id": entry.get("sessionId"),
152
+ "ts": entry.get("ts"),
153
+ },
154
+ )
155
+
156
+ elif event_type == "model.completed":
157
+ usage = data.get("usage")
158
+ if usage:
159
+ traj_meta["usage"] = usage
160
+ messages_snapshot = data.get("messagesSnapshot")
161
+ if isinstance(messages_snapshot, list):
162
+ snapshot_assistant_text = self._append_message_steps(
163
+ traj,
164
+ messages_snapshot,
165
+ source="openclaw-trajectory",
166
+ session_id=entry.get("sessionId"),
167
+ ts=entry.get("ts"),
168
+ seen_user_messages=seen_user_messages,
169
+ seen_tool_call_ids=seen_tool_call_ids,
170
+ )
171
+ if snapshot_assistant_text:
172
+ last_assistant_text = snapshot_assistant_text
173
+ assistant_text = self._join_assistant_texts(data.get("assistantTexts"))
174
+ if assistant_text and not last_assistant_text:
175
+ last_assistant_text = assistant_text
176
+
177
+ elif event_type == "session.ended":
178
+ traj_meta["final_status"] = data.get("status")
179
+
180
+ if last_assistant_text:
181
+ traj.append_agent_step(
182
+ action="send_message_to_user",
183
+ metadata={"message": last_assistant_text, "source": "openclaw-trajectory"},
184
+ )
185
+ traj.set_final_response(last_assistant_text)
186
+
187
+ def _append_message_steps(
188
+ self,
189
+ traj: Trajectory,
190
+ messages: List[Dict[str, Any]],
191
+ source: Optional[str] = None,
192
+ session_id: Optional[str] = None,
193
+ ts: Optional[str] = None,
194
+ seen_user_messages: Optional[Set[str]] = None,
195
+ seen_tool_call_ids: Optional[Set[str]] = None,
196
+ ) -> Optional[str]:
197
+ """Append user/tool steps from an OpenClaw message array."""
198
+ tool_results: Dict[str, Dict[str, Any]] = {}
199
+ for msg in messages:
200
+ if msg.get("role") == "toolResult":
201
+ call_id = msg.get("toolCallId")
202
+ if call_id:
203
+ tool_results[call_id] = msg
204
+
205
+ last_assistant_text: Optional[str] = None
206
+
207
+ for msg in messages:
208
+ role = msg.get("role")
209
+
210
+ if role == "user":
211
+ texts = self._extract_message_texts(msg.get("content", []))
212
+ if not texts:
213
+ continue
214
+ user_text = "\n".join(texts)
215
+ if seen_user_messages is not None and user_text in seen_user_messages:
216
+ continue
217
+ if seen_user_messages is not None:
218
+ seen_user_messages.add(user_text)
219
+ traj.append_user_step(
220
+ user_text,
221
+ metadata=self._build_trace_metadata(source, session_id, ts),
222
+ )
223
+
224
+ elif role == "assistant":
225
+ for block in msg.get("content", []):
226
+ if not isinstance(block, dict):
227
+ continue
228
+
229
+ if block.get("type") == "text":
230
+ text = block.get("text", "").strip()
231
+ if text:
232
+ last_assistant_text = text
233
+
234
+ elif block.get("type") == "toolCall":
235
+ call_id = block.get("id", "")
236
+ if seen_tool_call_ids is not None and call_id and call_id in seen_tool_call_ids:
237
+ continue
238
+ if seen_tool_call_ids is not None and call_id:
239
+ seen_tool_call_ids.add(call_id)
240
+
241
+ tool_name = block.get("name", "unknown")
242
+ arguments = block.get("arguments", {})
243
+ action_str = self._format_action_string(tool_name, arguments)
244
+ server = self._extract_server_name(tool_name)
245
+
246
+ traj.append_agent_step(
247
+ action=action_str,
248
+ tool_name=tool_name,
249
+ tool_params=arguments,
250
+ server=server,
251
+ metadata=self._build_trace_metadata(source, session_id, ts),
252
+ )
253
+
254
+ result_msg = tool_results.get(call_id)
255
+ if result_msg:
256
+ result_parts = self._extract_message_texts(result_msg.get("content", []))
257
+ result_payload = "\n".join(result_parts)
258
+ traj.append_tool_return(
259
+ result=self._parse_tool_output(result_payload),
260
+ tool_name=tool_name,
261
+ server=server,
262
+ metadata=self._build_trace_metadata(source, session_id, ts),
263
+ )
264
+
265
+ return last_assistant_text
266
+
267
+ @staticmethod
268
+ def _extract_message_texts(content: Any) -> List[str]:
269
+ """Extract text fragments from OpenClaw content blocks."""
270
+ texts: List[str] = []
271
+ if not isinstance(content, list):
272
+ return texts
273
+ for block in content:
274
+ if isinstance(block, dict) and block.get("type") == "text":
275
+ text = str(block.get("text", "")).strip()
276
+ if text:
277
+ texts.append(text)
278
+ elif isinstance(block, str):
279
+ text = block.strip()
280
+ if text:
281
+ texts.append(text)
282
+ return texts
283
+
284
+ @staticmethod
285
+ def _build_trace_metadata(
286
+ source: Optional[str],
287
+ session_id: Optional[str],
288
+ ts: Optional[str],
289
+ ) -> Dict[str, Any]:
290
+ metadata: Dict[str, Any] = {}
291
+ if source:
292
+ metadata["source"] = source
293
+ if session_id:
294
+ metadata["session_id"] = session_id
295
+ if ts:
296
+ metadata["ts"] = ts
297
+ return metadata
298
+
299
+ @staticmethod
300
+ def _join_assistant_texts(texts: Any) -> Optional[str]:
301
+ """Normalize OpenClaw runtime assistantTexts arrays into one response."""
302
+ if not texts or not isinstance(texts, list):
303
+ return None
304
+ parts = [str(text).strip() for text in texts if str(text).strip()]
305
+ if not parts:
306
+ return None
307
+ return "\n\n".join(parts)
308
+
309
+ @staticmethod
310
+ def _format_action_string(tool_name: str, arguments: Dict[str, Any]) -> str:
311
+ """Format a tool call into a human-readable action string."""
312
+ params = []
313
+ for k, v in arguments.items():
314
+ if isinstance(v, str):
315
+ v_display = v[:100] + "..." if len(v) > 100 else v
316
+ params.append(f'{k}="{v_display}"')
317
+ else:
318
+ params.append(f"{k}={v}")
319
+ return f"{tool_name}({', '.join(params)})"
320
+
321
+ @staticmethod
322
+ def _extract_server_name(tool_name: str) -> str:
323
+ """
324
+ Extract MCP server name from a prefixed tool name.
325
+
326
+ OpenClaw plugin tools are named like: workspace_ServerName_toolName
327
+ """
328
+ parts = tool_name.split("_", 2)
329
+ if len(parts) >= 3 and parts[0] == "workspace":
330
+ return parts[1]
331
+ return "openclaw"
332
+
333
+ @staticmethod
334
+ def _parse_tool_output(output_data: Any) -> Any:
335
+ """Parse tool output, attempting JSON decode for string data."""
336
+ if isinstance(output_data, str):
337
+ try:
338
+ return json.loads(output_data)
339
+ except json.JSONDecodeError:
340
+ return output_data
341
+ return output_data
@@ -0,0 +1,18 @@
1
+ """
2
+ PocketFlow Agent Implementation
3
+
4
+ This module provides an agent implementation using PocketFlow for workflow orchestration
5
+ with trajectory tracking and MCP server support.
6
+
7
+ """
8
+
9
+ from .src.agent import MCPReactAgent, MCPServerInfo
10
+ from .src.native_agent import NativeMCPReactAgent
11
+ from .src.agent_wrapper import MCPReactAgentNativeWrapper
12
+
13
+ __all__ = [
14
+ "MCPReactAgent",
15
+ "MCPServerInfo",
16
+ "NativeMCPReactAgent",
17
+ "MCPReactAgentNativeWrapper",
18
+ ]