decodingtrust-agent-sdk 0.1.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (374) hide show
  1. agent/__init__.py +30 -0
  2. agent/claudesdk/__init__.py +8 -0
  3. agent/claudesdk/example.py +221 -0
  4. agent/claudesdk/src/__init__.py +8 -0
  5. agent/claudesdk/src/agent.py +400 -0
  6. agent/claudesdk/src/mcp_proxy.py +409 -0
  7. agent/claudesdk/src/utils.py +420 -0
  8. agent/googleadk/__init__.py +15 -0
  9. agent/googleadk/example.py +237 -0
  10. agent/googleadk/src/__init__.py +12 -0
  11. agent/googleadk/src/agent.py +401 -0
  12. agent/googleadk/src/mcp_wrapper.py +163 -0
  13. agent/googleadk/src/utils.py +602 -0
  14. agent/langchain/__init__.py +8 -0
  15. agent/langchain/example.py +213 -0
  16. agent/langchain/src/__init__.py +8 -0
  17. agent/langchain/src/agent.py +645 -0
  18. agent/langchain/src/utils.py +433 -0
  19. agent/openaisdk/__init__.py +17 -0
  20. agent/openaisdk/example.py +228 -0
  21. agent/openaisdk/src/__init__.py +12 -0
  22. agent/openaisdk/src/agent.py +491 -0
  23. agent/openaisdk/src/agent_wrapper.py +143 -0
  24. agent/openaisdk/src/mcp_wrapper.py +395 -0
  25. agent/openaisdk/src/utils.py +493 -0
  26. agent/openclaw/__init__.py +10 -0
  27. agent/openclaw/example.py +251 -0
  28. agent/openclaw/src/__init__.py +14 -0
  29. agent/openclaw/src/agent.py +930 -0
  30. agent/openclaw/src/helpers/__init__.py +1 -0
  31. agent/openclaw/src/helpers/auth_helpers.py +55 -0
  32. agent/openclaw/src/mcp_proxy.py +564 -0
  33. agent/openclaw/src/plugin_generator.py +231 -0
  34. agent/openclaw/src/utils.py +341 -0
  35. agent/pocketflow/__init__.py +18 -0
  36. agent/pocketflow/example.py +221 -0
  37. agent/pocketflow/prompts/react_agent.py +46 -0
  38. agent/pocketflow/src/__init__.py +6 -0
  39. agent/pocketflow/src/agent.py +507 -0
  40. agent/pocketflow/src/agent_wrapper.py +159 -0
  41. agent/pocketflow/src/async_helper.py +92 -0
  42. agent/pocketflow/src/mcp_react_agent.py +279 -0
  43. agent/pocketflow/src/native_agent.py +74 -0
  44. agent/pocketflow/src/nodes.py +467 -0
  45. benchmark/__init__.py +0 -0
  46. benchmark/browser/benign.jsonl +34 -0
  47. benchmark/browser/direct.jsonl +85 -0
  48. benchmark/browser/indirect.jsonl +82 -0
  49. benchmark/code/benign.jsonl +0 -0
  50. benchmark/code/direct.jsonl +121 -0
  51. benchmark/code/indirect.jsonl +165 -0
  52. benchmark/crm/benign.jsonl +165 -0
  53. benchmark/crm/direct.jsonl +90 -0
  54. benchmark/crm/indirect.jsonl +150 -0
  55. benchmark/customer-service/benign.jsonl +160 -0
  56. benchmark/customer-service/direct.jsonl +100 -0
  57. benchmark/customer-service/indirect.jsonl +101 -0
  58. benchmark/finance/benign.jsonl +0 -0
  59. benchmark/finance/direct.jsonl +200 -0
  60. benchmark/finance/indirect.jsonl +200 -0
  61. benchmark/legal/benign.jsonl +0 -0
  62. benchmark/legal/direct.jsonl +200 -0
  63. benchmark/legal/indirect.jsonl +200 -0
  64. benchmark/macos/benign.jsonl +30 -0
  65. benchmark/macos/direct.jsonl +50 -0
  66. benchmark/macos/indirect.jsonl +50 -0
  67. benchmark/medical/benign.jsonl +642 -0
  68. benchmark/medical/direct.jsonl +229 -0
  69. benchmark/medical/indirect.jsonl +222 -0
  70. benchmark/os-filesystem/benign.jsonl +200 -0
  71. benchmark/os-filesystem/direct.jsonl +200 -0
  72. benchmark/os-filesystem/indirect.jsonl +200 -0
  73. benchmark/research/benign.jsonl +0 -0
  74. benchmark/research/direct.jsonl +119 -0
  75. benchmark/research/indirect.jsonl +125 -0
  76. benchmark/telecom/benign.jsonl +120 -0
  77. benchmark/telecom/direct.jsonl +161 -0
  78. benchmark/telecom/indirect.jsonl +166 -0
  79. benchmark/travel/benign.jsonl +130 -0
  80. benchmark/travel/direct.jsonl +105 -0
  81. benchmark/travel/indirect.jsonl +120 -0
  82. benchmark/windows/benign.jsonl +100 -0
  83. benchmark/windows/direct.jsonl +140 -0
  84. benchmark/windows/indirect.jsonl +107 -0
  85. benchmark/workflow/benign.jsonl +335 -0
  86. benchmark/workflow/direct.jsonl +78 -0
  87. benchmark/workflow/indirect.jsonl +107 -0
  88. cli/__init__.py +5 -0
  89. cli/main.py +182 -0
  90. cli/scaffold.py +334 -0
  91. decodingtrust_agent_sdk-0.1.0.dist-info/METADATA +642 -0
  92. decodingtrust_agent_sdk-0.1.0.dist-info/RECORD +374 -0
  93. decodingtrust_agent_sdk-0.1.0.dist-info/WHEEL +5 -0
  94. decodingtrust_agent_sdk-0.1.0.dist-info/entry_points.txt +2 -0
  95. decodingtrust_agent_sdk-0.1.0.dist-info/licenses/LICENSE +201 -0
  96. decodingtrust_agent_sdk-0.1.0.dist-info/top_level.txt +6 -0
  97. dt_arena/config/env.yaml +515 -0
  98. dt_arena/config/injection_mcp.yaml +430 -0
  99. dt_arena/config/mcp.yaml +642 -0
  100. dt_arena/envs/arxiv/docker-compose-hub.yml +31 -0
  101. dt_arena/envs/arxiv/docker-compose.yml +36 -0
  102. dt_arena/envs/atlassian/docker/docker-compose.dev.yml +65 -0
  103. dt_arena/envs/atlassian/docker/docker-compose.yml +53 -0
  104. dt_arena/envs/atlassian/docker-compose-hub.yml +57 -0
  105. dt_arena/envs/atlassian/docker-compose.yml +72 -0
  106. dt_arena/envs/bigquery/docker-compose.yml +20 -0
  107. dt_arena/envs/booking/docker-compose.yml +59 -0
  108. dt_arena/envs/calendar/docker-compose-hub.yml +30 -0
  109. dt_arena/envs/calendar/docker-compose.yml +42 -0
  110. dt_arena/envs/custom-website/docker-compose.yml +6 -0
  111. dt_arena/envs/customer_service/docker-compose.yml +59 -0
  112. dt_arena/envs/databricks/docker-compose-hub.yml +47 -0
  113. dt_arena/envs/databricks/docker-compose.yml +51 -0
  114. dt_arena/envs/ecommerce/docker-compose.yml +6 -0
  115. dt_arena/envs/ers/docker-compose.yml +36 -0
  116. dt_arena/envs/ers/hrms/docker/docker-compose.yml +31 -0
  117. dt_arena/envs/finance/docker-compose.yml +23 -0
  118. dt_arena/envs/github/docker/docker-compose-hub.yml +50 -0
  119. dt_arena/envs/github/docker/docker-compose.yml +50 -0
  120. dt_arena/envs/gmail/docker-compose-hub.yml +51 -0
  121. dt_arena/envs/gmail/docker-compose.yml +65 -0
  122. dt_arena/envs/google-form/docker-compose-hub.yml +33 -0
  123. dt_arena/envs/google-form/docker-compose.yml +41 -0
  124. dt_arena/envs/googledocs/docker-compose-hub.yml +61 -0
  125. dt_arena/envs/googledocs/docker-compose.yml +78 -0
  126. dt_arena/envs/hospital/docker-compose-hub.yml +25 -0
  127. dt_arena/envs/hospital/docker-compose.yml +27 -0
  128. dt_arena/envs/legal/docker-compose.yml +22 -0
  129. dt_arena/envs/linkedin/docker-compose.yml +63 -0
  130. dt_arena/envs/macos/docker-compose.yml +79 -0
  131. dt_arena/envs/os-filesystem/docker-compose-hub.yml +16 -0
  132. dt_arena/envs/os-filesystem/docker-compose.yml +20 -0
  133. dt_arena/envs/paypal/docker-compose-hub.yml +48 -0
  134. dt_arena/envs/paypal/docker-compose.yml +63 -0
  135. dt_arena/envs/research/docker-compose-hub.yml +13 -0
  136. dt_arena/envs/research/docker-compose.yml +24 -0
  137. dt_arena/envs/salesforce_crm/docker-compose-hub.yaml +45 -0
  138. dt_arena/envs/salesforce_crm/docker-compose.yaml +49 -0
  139. dt_arena/envs/slack/docker-compose-hub.yml +28 -0
  140. dt_arena/envs/slack/docker-compose.yml +41 -0
  141. dt_arena/envs/snowflake/docker-compose-hub.yml +41 -0
  142. dt_arena/envs/snowflake/docker-compose.yml +44 -0
  143. dt_arena/envs/telecom/docker-compose-hub.yml +16 -0
  144. dt_arena/envs/telecom/docker-compose.yml +17 -0
  145. dt_arena/envs/telegram/docker-compose-hub.yml +57 -0
  146. dt_arena/envs/telegram/docker-compose.yml +62 -0
  147. dt_arena/envs/terminal/docker-compose-hub.yml +12 -0
  148. dt_arena/envs/terminal/docker-compose.yml +26 -0
  149. dt_arena/envs/travel/docker-compose-hub.yml +19 -0
  150. dt_arena/envs/travel/docker-compose.yml +19 -0
  151. dt_arena/envs/whatsapp/docker-compose-hub.yml +61 -0
  152. dt_arena/envs/whatsapp/docker-compose.yml +78 -0
  153. dt_arena/envs/windows/docker-compose.yml +71 -0
  154. dt_arena/envs/zoom/docker-compose-hub.yml +27 -0
  155. dt_arena/envs/zoom/docker-compose.yml +40 -0
  156. dt_arena/injection_mcp_server/atlassian/env_injection.py +134 -0
  157. dt_arena/injection_mcp_server/calendar/env_injection.py +217 -0
  158. dt_arena/injection_mcp_server/custom_website/env_injection.py +97 -0
  159. dt_arena/injection_mcp_server/customer_service/env_injection.py +659 -0
  160. dt_arena/injection_mcp_server/databricks/env_injection.py +255 -0
  161. dt_arena/injection_mcp_server/ecommerce/env_injection.py +110 -0
  162. dt_arena/injection_mcp_server/finance/env_injection.py +85 -0
  163. dt_arena/injection_mcp_server/github/env_injection.py +206 -0
  164. dt_arena/injection_mcp_server/gmail/env_injection.py +211 -0
  165. dt_arena/injection_mcp_server/google_form/env_injection.py +186 -0
  166. dt_arena/injection_mcp_server/googledocs/env_injection.py +44 -0
  167. dt_arena/injection_mcp_server/hospital/env_injection.py +43 -0
  168. dt_arena/injection_mcp_server/legal/env_injection.py +229 -0
  169. dt_arena/injection_mcp_server/macos/env_injection.py +272 -0
  170. dt_arena/injection_mcp_server/os-filesystem/env_injection.py +341 -0
  171. dt_arena/injection_mcp_server/paypal/env_injection.py +268 -0
  172. dt_arena/injection_mcp_server/research/env_injection.py +616 -0
  173. dt_arena/injection_mcp_server/salesforce/env_injection.py +514 -0
  174. dt_arena/injection_mcp_server/slack/env_injection.py +265 -0
  175. dt_arena/injection_mcp_server/snowflake/env_injection.py +230 -0
  176. dt_arena/injection_mcp_server/telecom/env_injection.py +503 -0
  177. dt_arena/injection_mcp_server/telegram/env_injection.py +171 -0
  178. dt_arena/injection_mcp_server/terminal/env_injection.py +523 -0
  179. dt_arena/injection_mcp_server/travel/env_injection.py +173 -0
  180. dt_arena/injection_mcp_server/whatsapp/env_injection.py +185 -0
  181. dt_arena/injection_mcp_server/windows/env_injection.py +943 -0
  182. dt_arena/injection_mcp_server/zoom/env_injection.py +216 -0
  183. dt_arena/mcp_server/atlassian/main.py +1554 -0
  184. dt_arena/mcp_server/atlassian/test_server.py +66 -0
  185. dt_arena/mcp_server/bigquery/main.py +333 -0
  186. dt_arena/mcp_server/booking/main.py +310 -0
  187. dt_arena/mcp_server/browser/main.py +1741 -0
  188. dt_arena/mcp_server/calendar/example_multi_user.py +162 -0
  189. dt_arena/mcp_server/calendar/main.py +792 -0
  190. dt_arena/mcp_server/calendar/test_mcp.py +135 -0
  191. dt_arena/mcp_server/customer_service/main.py +1063 -0
  192. dt_arena/mcp_server/databricks/main.py +566 -0
  193. dt_arena/mcp_server/databricks/probe.py +102 -0
  194. dt_arena/mcp_server/ers/main.py +845 -0
  195. dt_arena/mcp_server/finance/__init__.py +87 -0
  196. dt_arena/mcp_server/finance/core/__init__.py +12 -0
  197. dt_arena/mcp_server/finance/core/data_loader.py +558 -0
  198. dt_arena/mcp_server/finance/core/portfolio.py +565 -0
  199. dt_arena/mcp_server/finance/evaluation/__init__.py +20 -0
  200. dt_arena/mcp_server/finance/evaluation/evaluator.py +217 -0
  201. dt_arena/mcp_server/finance/evaluation/logger.py +137 -0
  202. dt_arena/mcp_server/finance/injection/__init__.py +66 -0
  203. dt_arena/mcp_server/finance/injection/config.py +176 -0
  204. dt_arena/mcp_server/finance/injection/content.py +755 -0
  205. dt_arena/mcp_server/finance/injection/html.py +409 -0
  206. dt_arena/mcp_server/finance/injection/locations.py +167 -0
  207. dt_arena/mcp_server/finance/injection/methods.py +193 -0
  208. dt_arena/mcp_server/finance/injection/presets.py +1023 -0
  209. dt_arena/mcp_server/finance/main.py +361 -0
  210. dt_arena/mcp_server/finance/run_mcp.py +21 -0
  211. dt_arena/mcp_server/finance/run_web.py +26 -0
  212. dt_arena/mcp_server/finance/server/__init__.py +41 -0
  213. dt_arena/mcp_server/finance/server/extractor.py +1453 -0
  214. dt_arena/mcp_server/finance/server/extractor_minimal.py +292 -0
  215. dt_arena/mcp_server/finance/server/extractor_simple.py +1164 -0
  216. dt_arena/mcp_server/finance/server/injection_mcp.py +865 -0
  217. dt_arena/mcp_server/finance/server/mcp.py +451 -0
  218. dt_arena/mcp_server/finance/server/tools/__init__.py +23 -0
  219. dt_arena/mcp_server/finance/server/tools/account.py +88 -0
  220. dt_arena/mcp_server/finance/server/tools/browsing.py +328 -0
  221. dt_arena/mcp_server/finance/server/tools/social.py +73 -0
  222. dt_arena/mcp_server/finance/server/tools/trading.py +242 -0
  223. dt_arena/mcp_server/finance/server/tools/utility.py +49 -0
  224. dt_arena/mcp_server/finance/server/web.py +2139 -0
  225. dt_arena/mcp_server/finance/tasks/benchmark/__init__.py +28 -0
  226. dt_arena/mcp_server/finance/tasks/benchmark/attack_pool.py +3026 -0
  227. dt_arena/mcp_server/finance/tasks/benchmark/attack_runner.py +1315 -0
  228. dt_arena/mcp_server/finance/tasks/benchmark/finra_requirements.py +1335 -0
  229. dt_arena/mcp_server/finance/tasks/benchmark/finra_tasks.py +3665 -0
  230. dt_arena/mcp_server/finance/tasks/benchmark/malicious_tasks.py +2673 -0
  231. dt_arena/mcp_server/finance/tasks/redteam_suite/run_redteam_suite.py +1713 -0
  232. dt_arena/mcp_server/finance/test_mcp_tools.py +476 -0
  233. dt_arena/mcp_server/github/main.py +441 -0
  234. dt_arena/mcp_server/gmail/main.py +1004 -0
  235. dt_arena/mcp_server/google_form/main.py +141 -0
  236. dt_arena/mcp_server/googledocs/main.py +458 -0
  237. dt_arena/mcp_server/hospital/mcp_server.py +458 -0
  238. dt_arena/mcp_server/legal/__init__.py +9 -0
  239. dt_arena/mcp_server/legal/core/__init__.py +14 -0
  240. dt_arena/mcp_server/legal/core/courtlistener_store.py +762 -0
  241. dt_arena/mcp_server/legal/core/data_loader.py +266 -0
  242. dt_arena/mcp_server/legal/core/document_store.py +197 -0
  243. dt_arena/mcp_server/legal/core/matter_manager.py +466 -0
  244. dt_arena/mcp_server/legal/main.py +89 -0
  245. dt_arena/mcp_server/legal/scripts/collect_data.py +988 -0
  246. dt_arena/mcp_server/legal/server/__init__.py +14 -0
  247. dt_arena/mcp_server/legal/server/mcp.py +2330 -0
  248. dt_arena/mcp_server/macos/client_test.py +270 -0
  249. dt_arena/mcp_server/macos/mcp_server.py +285 -0
  250. dt_arena/mcp_server/os-filesystem/main.py +1380 -0
  251. dt_arena/mcp_server/paypal/main.py +501 -0
  252. dt_arena/mcp_server/research/main.py +777 -0
  253. dt_arena/mcp_server/salesforce/main.py +2006 -0
  254. dt_arena/mcp_server/slack/main.py +318 -0
  255. dt_arena/mcp_server/snowflake/main.py +612 -0
  256. dt_arena/mcp_server/snowflake/probe.py +183 -0
  257. dt_arena/mcp_server/telecom/mcp_client.py +423 -0
  258. dt_arena/mcp_server/telecom/mcp_server.py +1059 -0
  259. dt_arena/mcp_server/telegram/main.py +338 -0
  260. dt_arena/mcp_server/terminal/main.py +163 -0
  261. dt_arena/mcp_server/travel/client_test.py +16 -0
  262. dt_arena/mcp_server/travel/mcp_server.py +404 -0
  263. dt_arena/mcp_server/whatsapp/main.py +318 -0
  264. dt_arena/mcp_server/windows/client_test.py +270 -0
  265. dt_arena/mcp_server/windows/mcp_server.py +218 -0
  266. dt_arena/mcp_server/zoom/main.py +466 -0
  267. dt_arena/src/__init__.py +0 -0
  268. dt_arena/src/hooks/__init__.py +0 -0
  269. dt_arena/src/hooks/audit_log.py +30 -0
  270. dt_arena/src/hooks/hooks.json +3 -0
  271. dt_arena/src/run_benign.py +142 -0
  272. dt_arena/src/types/__init__.py +0 -0
  273. dt_arena/src/types/agent.py +441 -0
  274. dt_arena/src/types/attacks.py +2 -0
  275. dt_arena/src/types/environment.py +2 -0
  276. dt_arena/src/types/hooks.py +174 -0
  277. dt_arena/src/types/judge.py +52 -0
  278. dt_arena/src/types/red_teaming_trajectory.py +385 -0
  279. dt_arena/src/types/task.py +260 -0
  280. dt_arena/src/types/trajectory.py +315 -0
  281. dt_arena/utils/__init__.py +1 -0
  282. dt_arena/utils/atlassian/__init__.py +27 -0
  283. dt_arena/utils/atlassian/helpers.py +520 -0
  284. dt_arena/utils/bigquery/__init__.py +1 -0
  285. dt_arena/utils/bigquery/helpers.py +246 -0
  286. dt_arena/utils/calendar/__init__.py +1 -0
  287. dt_arena/utils/calendar/helpers.py +87 -0
  288. dt_arena/utils/customer_service/__init__.py +17 -0
  289. dt_arena/utils/customer_service/cs_env_client.py +940 -0
  290. dt_arena/utils/customer_service/helpers.py +339 -0
  291. dt_arena/utils/customer_service/judges/__init__.py +20 -0
  292. dt_arena/utils/customer_service/judges/abusive_comms.py +149 -0
  293. dt_arena/utils/customer_service/judges/bulk_destructive.py +202 -0
  294. dt_arena/utils/customer_service/judges/cross_customer.py +481 -0
  295. dt_arena/utils/customer_service/judges/email_exfiltration.py +320 -0
  296. dt_arena/utils/customer_service/judges/financial_fraud.py +221 -0
  297. dt_arena/utils/customer_service/judges/llm_utils.py +264 -0
  298. dt_arena/utils/customer_service/judges/pii_disclosure.py +837 -0
  299. dt_arena/utils/customer_service/judges/policy_misrep.py +44 -0
  300. dt_arena/utils/customer_service/judges/text_utils.py +21 -0
  301. dt_arena/utils/databricks/__init__.py +2 -0
  302. dt_arena/utils/databricks/helpers.py +210 -0
  303. dt_arena/utils/finance/__init__.py +0 -0
  304. dt_arena/utils/finance/helpers.py +263 -0
  305. dt_arena/utils/github/__init__.py +1 -0
  306. dt_arena/utils/github/helpers.py +249 -0
  307. dt_arena/utils/gmail/__init__.py +1 -0
  308. dt_arena/utils/gmail/helpers.py +344 -0
  309. dt_arena/utils/google_form/__init__.py +2 -0
  310. dt_arena/utils/google_form/helpers.py +133 -0
  311. dt_arena/utils/legal/__init__.py +0 -0
  312. dt_arena/utils/legal/helpers.py +228 -0
  313. dt_arena/utils/macos/__init__.py +0 -0
  314. dt_arena/utils/macos/env_setup.py +215 -0
  315. dt_arena/utils/macos/helpers.py +61 -0
  316. dt_arena/utils/os_filesystem/__init__.py +1 -0
  317. dt_arena/utils/os_filesystem/helpers.py +366 -0
  318. dt_arena/utils/paypal/__init__.py +1 -0
  319. dt_arena/utils/paypal/helpers.py +178 -0
  320. dt_arena/utils/port_allocator.py +266 -0
  321. dt_arena/utils/research/__init__.py +0 -0
  322. dt_arena/utils/research/helpers.py +251 -0
  323. dt_arena/utils/salesforce/__init__.py +1 -0
  324. dt_arena/utils/salesforce/helpers.py +719 -0
  325. dt_arena/utils/slack/__init__.py +1 -0
  326. dt_arena/utils/slack/helpers.py +176 -0
  327. dt_arena/utils/snowflake/__init__.py +1 -0
  328. dt_arena/utils/snowflake/helpers.py +166 -0
  329. dt_arena/utils/telecom/__init__.py +1 -0
  330. dt_arena/utils/telecom/helpers.py +760 -0
  331. dt_arena/utils/telegram/__init__.py +0 -0
  332. dt_arena/utils/telegram/helpers.py +174 -0
  333. dt_arena/utils/terminal/__init__.py +0 -0
  334. dt_arena/utils/terminal/helpers.py +20 -0
  335. dt_arena/utils/travel/__init__.py +0 -0
  336. dt_arena/utils/travel/env_client.py +537 -0
  337. dt_arena/utils/travel/llm_judge.py +137 -0
  338. dt_arena/utils/travel/prompts.py +64 -0
  339. dt_arena/utils/utils/__init__.py +122 -0
  340. dt_arena/utils/whatsapp/__init__.py +0 -0
  341. dt_arena/utils/whatsapp/helpers.py +226 -0
  342. dt_arena/utils/windows/__init__.py +0 -0
  343. dt_arena/utils/windows/env_reset.py +224 -0
  344. dt_arena/utils/windows/env_setup.py +280 -0
  345. dt_arena/utils/windows/exfil_helpers.py +170 -0
  346. dt_arena/utils/windows/helpers.py +74 -0
  347. dt_arena/utils/zoom/__init__.py +1 -0
  348. dt_arena/utils/zoom/helpers.py +70 -0
  349. eval/__init__.py +1 -0
  350. eval/evaluation.py +426 -0
  351. eval/task_runner.py +449 -0
  352. utils/__init__.py +148 -0
  353. utils/agent_helpers.py +308 -0
  354. utils/agent_wrapper.py +189 -0
  355. utils/compose_utils.py +135 -0
  356. utils/config.py +77 -0
  357. utils/env_helpers.py +104 -0
  358. utils/eval_stats.py +88 -0
  359. utils/injection_helpers.py +429 -0
  360. utils/injection_mcp_helpers.py +152 -0
  361. utils/judge_helpers.py +181 -0
  362. utils/judge_utils.py +472 -0
  363. utils/llm.py +196 -0
  364. utils/logging.py +45 -0
  365. utils/mcp_helpers.py +232 -0
  366. utils/mcp_manager.py +235 -0
  367. utils/memory_guard.py +18 -0
  368. utils/red_teaming_sandbox.py +476 -0
  369. utils/reset_helpers.py +318 -0
  370. utils/resource_manager.py +370 -0
  371. utils/skill_helpers.py +447 -0
  372. utils/task_executor.py +904 -0
  373. utils/task_helpers.py +270 -0
  374. utils/template_helpers.py +179 -0
@@ -0,0 +1,441 @@
1
+ """GitHub Clone Sandbox MCP Server (Bearer-token based)."""
2
+ from __future__ import annotations
3
+
4
+ import os
5
+ from pathlib import Path
6
+ from typing import Any, Dict, List, Optional
7
+
8
+ import httpx
9
+ from fastmcp import FastMCP
10
+
11
+ try:
12
+ import yaml
13
+ except Exception:
14
+ yaml = None
15
+
16
+ GITHUB_API_URL = os.getenv("GITHUB_API_URL", "http://127.0.0.1:8045")
17
+ DEFAULT_USER_ACCESS_TOKEN = os.getenv("USER_ACCESS_TOKEN", "")
18
+
19
+ mcp = FastMCP("GitHub Clone Sandbox MCP Server")
20
+
21
+
22
+ def _port_from_registry(default_port: int) -> int:
23
+ try:
24
+ if yaml is None:
25
+ return default_port
26
+ registry_path = Path(__file__).resolve().parent.parent / "registry.yaml"
27
+ if not registry_path.exists():
28
+ return default_port
29
+ data = yaml.safe_load(registry_path.read_text()) or {}
30
+ service_name = Path(__file__).resolve().parent.name
31
+ for srv in (data.get("servers") or []):
32
+ if isinstance(srv, dict) and srv.get("name") == service_name:
33
+ env = srv.get("env") or {}
34
+ port_str = str(env.get("PORT") or "").strip().strip('"')
35
+ return int(port_str) if port_str else default_port
36
+ except Exception:
37
+ return default_port
38
+ return default_port
39
+
40
+
41
+ def _resolve_token(token: Optional[str] = None) -> str:
42
+ return (token or DEFAULT_USER_ACCESS_TOKEN or "").strip()
43
+
44
+
45
+ def _require_token(token: Optional[str] = None) -> str:
46
+ resolved = _resolve_token(token)
47
+ if not resolved:
48
+ raise ValueError("access_token is required (or set USER_ACCESS_TOKEN)")
49
+ return resolved
50
+
51
+
52
+ def _headers(token: Optional[str] = None) -> Dict[str, str]:
53
+ headers = {"Accept": "application/json", "Content-Type": "application/json"}
54
+ resolved = _resolve_token(token)
55
+ if resolved:
56
+ headers["Authorization"] = f"Bearer {resolved}"
57
+ return headers
58
+
59
+
60
+ async def _req(method: str, path: str, *, token: Optional[str] = None, params: Optional[Dict[str, Any]] = None, body: Optional[Dict[str, Any]] = None) -> Any:
61
+ async with httpx.AsyncClient() as client:
62
+ r = await client.request(method, f"{GITHUB_API_URL}{path}", params=params, json=body, headers=_headers(token))
63
+ r.raise_for_status()
64
+ return r.json()
65
+
66
+
67
+ @mcp.tool()
68
+ async def reset_environment() -> Any:
69
+ """Reset GitHub sandbox data via reset API."""
70
+ return await _req("POST", "/api/v1/reset")
71
+
72
+
73
+ @mcp.tool()
74
+ async def init_environment_from_json(spec: Dict[str, Any], reset_first: bool = True) -> Any:
75
+ """Initialize GitHub sandbox environment from JSON spec.
76
+
77
+ This is intended for scenario/task construction, not attack payload injection.
78
+ """
79
+ body = dict(spec or {})
80
+ body["reset"] = bool(reset_first)
81
+ return await _req("POST", "/api/v1/init-json", body=body)
82
+
83
+
84
+ @mcp.tool()
85
+ async def get_auth_context() -> Any:
86
+ """Get MCP authentication mode info."""
87
+ return {
88
+ "authMode": "OAuth2 access token (Bearer)",
89
+ "requiresAccessTokenForWriteOps": True,
90
+ "hasDefaultAccessToken": bool(DEFAULT_USER_ACCESS_TOKEN),
91
+ "apiBaseUrl": GITHUB_API_URL,
92
+ }
93
+
94
+
95
+ # Users / Org
96
+ @mcp.tool()
97
+ async def get_user_profile(username: str) -> Any:
98
+ data = await _req("GET", f"/api/users/{username}")
99
+ return data.get("user", data)
100
+
101
+
102
+ @mcp.tool()
103
+ async def get_me(access_token: Optional[str] = None) -> Any:
104
+ data = await _req("GET", "/api/me", token=_require_token(access_token))
105
+ return data.get("user", data)
106
+
107
+
108
+ @mcp.tool()
109
+ async def list_user_repos(username: str, page: int = 1, limit: int = 20, visibility: Optional[str] = None, search: Optional[str] = None) -> Any:
110
+ params: Dict[str, Any] = {"page": page, "limit": limit}
111
+ if visibility:
112
+ params["visibility"] = visibility
113
+ if search:
114
+ params["search"] = search
115
+ return await _req("GET", f"/api/users/{username}/repos", params=params)
116
+
117
+
118
+ @mcp.tool()
119
+ async def get_org(login: str) -> Any:
120
+ data = await _req("GET", f"/api/orgs/{login}")
121
+ return data.get("organization", data.get("org", data))
122
+
123
+
124
+ @mcp.tool()
125
+ async def list_org_members(login: str, page: int = 1, limit: int = 20) -> Any:
126
+ return await _req("GET", f"/api/orgs/{login}/members", params={"page": page, "limit": limit})
127
+
128
+
129
+ @mcp.tool()
130
+ async def list_org_repos(login: str, page: int = 1, limit: int = 20, visibility: Optional[str] = None, search: Optional[str] = None) -> Any:
131
+ params: Dict[str, Any] = {"page": page, "limit": limit}
132
+ if visibility:
133
+ params["visibility"] = visibility
134
+ if search:
135
+ params["search"] = search
136
+ return await _req("GET", f"/api/orgs/{login}/repos", params=params)
137
+
138
+
139
+ # Repo / Branch / Commit
140
+ @mcp.tool()
141
+ async def search_repos(q: Optional[str] = None, page: int = 1, limit: int = 30) -> Any:
142
+ params: Dict[str, Any] = {"page": page, "limit": limit}
143
+ if q:
144
+ params["q"] = q
145
+ return await _req("GET", "/api/repos", params=params)
146
+
147
+
148
+ @mcp.tool()
149
+ async def get_repo(owner: str, repo: str) -> Any:
150
+ data = await _req("GET", f"/api/repos/{owner}/{repo}")
151
+ return data.get("repository", data.get("repo", data))
152
+
153
+
154
+ @mcp.tool()
155
+ async def create_repo(ownerType: str, ownerLogin: str, name: str, description: Optional[str] = None, visibility: str = "public", defaultBranch: str = "main", access_token: Optional[str] = None) -> Any:
156
+ data = await _req(
157
+ "POST",
158
+ "/api/repos",
159
+ token=_require_token(access_token),
160
+ body={
161
+ "ownerType": ownerType,
162
+ "ownerLogin": ownerLogin,
163
+ "name": name,
164
+ "description": description,
165
+ "visibility": visibility,
166
+ "defaultBranch": defaultBranch,
167
+ },
168
+ )
169
+ return data.get("repository", data.get("repo", data.get("item", data)))
170
+
171
+
172
+ @mcp.tool()
173
+ async def update_repo(owner: str, repo: str, description: Optional[str] = None, visibility: Optional[str] = None, defaultBranch: Optional[str] = None, access_token: Optional[str] = None) -> Any:
174
+ body: Dict[str, Any] = {}
175
+ if description is not None:
176
+ body["description"] = description
177
+ if visibility is not None:
178
+ body["visibility"] = visibility
179
+ if defaultBranch is not None:
180
+ body["defaultBranch"] = defaultBranch
181
+ data = await _req("PATCH", f"/api/repos/{owner}/{repo}", token=_require_token(access_token), body=body)
182
+ return data.get("repo", data)
183
+
184
+
185
+ @mcp.tool()
186
+ async def fork_repo(owner: str, repo: str, access_token: Optional[str] = None) -> Any:
187
+ data = await _req("POST", f"/api/repos/{owner}/{repo}/fork", token=_require_token(access_token))
188
+ return data.get("repo", data)
189
+
190
+
191
+ @mcp.tool()
192
+ async def star_repo(owner: str, repo: str, access_token: Optional[str] = None) -> Any:
193
+ return await _req("POST", f"/api/repos/{owner}/{repo}/stars", token=_require_token(access_token))
194
+
195
+
196
+ @mcp.tool()
197
+ async def unstar_repo(owner: str, repo: str, access_token: Optional[str] = None) -> Any:
198
+ return await _req("DELETE", f"/api/repos/{owner}/{repo}/stars", token=_require_token(access_token))
199
+
200
+
201
+ @mcp.tool()
202
+ async def list_branches(owner: str, repo: str) -> Any:
203
+ data = await _req("GET", f"/api/repos/{owner}/{repo}/branches")
204
+ return data.get("branches", data)
205
+
206
+
207
+ @mcp.tool()
208
+ async def get_branch(owner: str, repo: str, branch: str) -> Any:
209
+ """Get a single branch by name."""
210
+ data = await _req("GET", f"/api/repos/{owner}/{repo}/branches/{branch}")
211
+ return data.get("branch", data)
212
+
213
+
214
+ @mcp.tool()
215
+ async def create_branch(owner: str, repo: str, name: str, source_branch: str = "main", access_token: Optional[str] = None) -> Any:
216
+ data = await _req("POST", f"/api/repos/{owner}/{repo}/branches", token=_require_token(access_token), body={"name": name, "source_branch": source_branch})
217
+ return data.get("branch", data)
218
+
219
+
220
+ @mcp.tool()
221
+ async def list_commits(owner: str, repo: str, branch: Optional[str] = None, page: int = 1, limit: int = 20) -> Any:
222
+ params: Dict[str, Any] = {"page": page, "limit": limit}
223
+ if branch:
224
+ params["branch"] = branch
225
+ return await _req("GET", f"/api/repos/{owner}/{repo}/commits", params=params)
226
+
227
+
228
+ @mcp.tool()
229
+ async def get_commit(owner: str, repo: str, sha: str) -> Any:
230
+ data = await _req("GET", f"/api/repos/{owner}/{repo}/commits/{sha}")
231
+ return data.get("commit", data)
232
+
233
+
234
+ @mcp.tool()
235
+ async def create_commit(owner: str, repo: str, message: str, path: str, content: str, branch: Optional[str] = None, access_token: Optional[str] = None) -> Any:
236
+ payload: Dict[str, Any] = {"message": message, "path": path, "content": content}
237
+ if branch:
238
+ payload["branch"] = branch
239
+ data = await _req("POST", f"/api/repos/{owner}/{repo}/commits", token=_require_token(access_token), body=payload)
240
+ return data.get("commit", data)
241
+
242
+
243
+ @mcp.tool()
244
+ async def get_repo_tree(owner: str, repo: str, branch: Optional[str] = None, path: str = "", commit: Optional[str] = None) -> Any:
245
+ params: Dict[str, Any] = {}
246
+ if branch:
247
+ params["branch"] = branch
248
+ if path:
249
+ params["path"] = path
250
+ if commit:
251
+ params["commit"] = commit
252
+ return await _req("GET", f"/api/repos/{owner}/{repo}/tree", params=params)
253
+
254
+
255
+ @mcp.tool()
256
+ async def get_blob(owner: str, repo: str, path: str, branch: Optional[str] = None, commit: Optional[str] = None) -> Any:
257
+ params: Dict[str, Any] = {"path": path}
258
+ if branch:
259
+ params["branch"] = branch
260
+ if commit:
261
+ params["commit"] = commit
262
+ return await _req("GET", f"/api/repos/{owner}/{repo}/blob", params=params)
263
+
264
+
265
+ @mcp.tool()
266
+ async def get_repository_content(owner: str, repo: str, path: str = "", ref: Optional[str] = None) -> Any:
267
+ """GitHub-like content reader for files or directories.
268
+
269
+ If `path` points to a file, returns blob payload.
270
+ If `path` points to a directory (or empty), returns tree payload.
271
+ """
272
+ tree_params: Dict[str, Any] = {"path": path} if path else {}
273
+ blob_params: Dict[str, Any] = {"path": path} if path else {}
274
+ if ref:
275
+ tree_params["branch"] = ref
276
+ blob_params["branch"] = ref
277
+
278
+ if not path:
279
+ return await _req("GET", f"/api/repos/{owner}/{repo}/tree", params=tree_params)
280
+
281
+ try:
282
+ return await _req("GET", f"/api/repos/{owner}/{repo}/blob", params=blob_params)
283
+ except httpx.HTTPStatusError as e:
284
+ if e.response.status_code != 404:
285
+ raise
286
+ return await _req("GET", f"/api/repos/{owner}/{repo}/tree", params=tree_params)
287
+
288
+
289
+ @mcp.tool()
290
+ async def list_repo_labels(owner: str, repo: str) -> Any:
291
+ return await _req("GET", f"/api/repos/{owner}/{repo}/labels")
292
+
293
+
294
+ @mcp.tool()
295
+ async def list_repo_milestones(owner: str, repo: str) -> Any:
296
+ return await _req("GET", f"/api/repos/{owner}/{repo}/milestones")
297
+
298
+
299
+ @mcp.tool()
300
+ async def list_repo_stars(owner: str, repo: str) -> Any:
301
+ """List stargazers/star metadata for a repository."""
302
+ return await _req("GET", f"/api/repos/{owner}/{repo}/stars")
303
+
304
+
305
+ # Issues
306
+ @mcp.tool()
307
+ async def list_issues(owner: str, repo: str, page: int = 1, limit: int = 20, state: Optional[str] = None, search: Optional[str] = None) -> Any:
308
+ params: Dict[str, Any] = {"page": page, "limit": limit}
309
+ if state:
310
+ params["state"] = state
311
+ if search:
312
+ params["search"] = search
313
+ return await _req("GET", f"/api/repos/{owner}/{repo}/issues", params=params)
314
+
315
+
316
+ @mcp.tool()
317
+ async def get_issue(owner: str, repo: str, number: int) -> Any:
318
+ data = await _req("GET", f"/api/repos/{owner}/{repo}/issues/{number}")
319
+ return data.get("issue", data)
320
+
321
+
322
+ @mcp.tool()
323
+ async def create_issue(
324
+ owner: str,
325
+ repo: str,
326
+ title: str,
327
+ body: Optional[str] = None,
328
+ labels: Optional[List[str]] = None,
329
+ assignees: Optional[List[str]] = None,
330
+ access_token: Optional[str] = None,
331
+ ) -> Any:
332
+ payload = {"title": title, "body": body, "labels": labels or [], "assignees": assignees or []}
333
+ data = await _req("POST", f"/api/repos/{owner}/{repo}/issues", token=_require_token(access_token), body=payload)
334
+ return data.get("issue", data)
335
+
336
+
337
+ @mcp.tool()
338
+ async def add_issue_comment(owner: str, repo: str, number: int, body: str, access_token: Optional[str] = None) -> Any:
339
+ data = await _req("POST", f"/api/repos/{owner}/{repo}/issues/{number}/comments", token=_require_token(access_token), body={"body": body})
340
+ return data.get("comment", data)
341
+
342
+
343
+ @mcp.tool()
344
+ async def list_issue_comments(owner: str, repo: str, number: int, page: int = 1, limit: int = 20) -> Any:
345
+ """List comments under an issue."""
346
+ return await _req(
347
+ "GET",
348
+ f"/api/repos/{owner}/{repo}/issues/{number}/comments",
349
+ params={"page": page, "limit": limit},
350
+ )
351
+
352
+
353
+ # Pull Requests
354
+ @mcp.tool()
355
+ async def list_pulls(owner: str, repo: str, page: int = 1, limit: int = 20, state: Optional[str] = None, search: Optional[str] = None) -> Any:
356
+ params: Dict[str, Any] = {"page": page, "limit": limit}
357
+ if state:
358
+ params["state"] = state
359
+ if search:
360
+ params["search"] = search
361
+ return await _req("GET", f"/api/repos/{owner}/{repo}/pulls", params=params)
362
+
363
+
364
+ @mcp.tool()
365
+ async def get_pull(owner: str, repo: str, number: int) -> Any:
366
+ data = await _req("GET", f"/api/repos/{owner}/{repo}/pulls/{number}")
367
+ return data.get("pullRequest", data.get("pull", data))
368
+
369
+
370
+ @mcp.tool()
371
+ async def create_pull(owner: str, repo: str, title: str, body: Optional[str] = None, source_branch: str = "main", target_branch: str = "main", access_token: Optional[str] = None) -> Any:
372
+ payload = {"title": title, "body": body, "source_branch": source_branch, "target_branch": target_branch}
373
+ data = await _req("POST", f"/api/repos/{owner}/{repo}/pulls", token=_require_token(access_token), body=payload)
374
+ return data.get("pullRequest", data.get("pull", data))
375
+
376
+
377
+ @mcp.tool()
378
+ async def add_pull_comment(owner: str, repo: str, number: int, body: str, access_token: Optional[str] = None) -> Any:
379
+ data = await _req("POST", f"/api/repos/{owner}/{repo}/pulls/{number}/comments", token=_require_token(access_token), body={"body": body})
380
+ return data.get("comment", data)
381
+
382
+
383
+ @mcp.tool()
384
+ async def list_pull_comments(owner: str, repo: str, number: int, page: int = 1, limit: int = 20) -> Any:
385
+ """List comments under a pull request."""
386
+ return await _req(
387
+ "GET",
388
+ f"/api/repos/{owner}/{repo}/pulls/{number}/comments",
389
+ params={"page": page, "limit": limit},
390
+ )
391
+
392
+
393
+ @mcp.tool()
394
+ async def list_pull_files(owner: str, repo: str, number: int) -> Any:
395
+ """List changed files of a pull request."""
396
+ pr = await get_pull(owner=owner, repo=repo, number=number)
397
+ return {
398
+ "pull_number": number,
399
+ "files": pr.get("filesChanged", []),
400
+ "changed_files_count": pr.get("changedFilesCount", 0),
401
+ }
402
+
403
+
404
+ @mcp.tool()
405
+ async def list_pull_commits(owner: str, repo: str, number: int) -> Any:
406
+ """List commits associated with a pull request."""
407
+ pr = await get_pull(owner=owner, repo=repo, number=number)
408
+ return {
409
+ "pull_number": number,
410
+ "commits": pr.get("commits", []),
411
+ "count": len(pr.get("commits", [])),
412
+ }
413
+
414
+
415
+ @mcp.tool()
416
+ async def merge_pull(owner: str, repo: str, number: int, access_token: Optional[str] = None) -> Any:
417
+ return await _req("PATCH", f"/api/repos/{owner}/{repo}/pulls/{number}/merge", token=_require_token(access_token))
418
+
419
+
420
+ # Activity / Notifications
421
+ @mcp.tool()
422
+ async def get_activity_feed(page: int = 1, limit: int = 20, access_token: Optional[str] = None) -> Any:
423
+ return await _req("GET", "/api/activity", token=_require_token(access_token), params={"page": page, "limit": limit})
424
+
425
+
426
+ @mcp.tool()
427
+ async def list_notifications(all: bool = False, page: int = 1, limit: int = 20, access_token: Optional[str] = None) -> Any:
428
+ params: Dict[str, Any] = {"page": page, "limit": limit}
429
+ if all:
430
+ params["all"] = "true"
431
+ return await _req("GET", "/api/notifications", token=_require_token(access_token), params=params)
432
+
433
+
434
+ @mcp.tool()
435
+ async def mark_notification_read(notification_id: str, access_token: Optional[str] = None) -> Any:
436
+ return await _req("PATCH", f"/api/notifications/{notification_id}/read", token=_require_token(access_token))
437
+
438
+
439
+ if __name__ == "__main__":
440
+ port = int(os.getenv("PORT") or _port_from_registry(8867))
441
+ mcp.run(transport="http", host="0.0.0.0", port=port)