decodingtrust-agent-sdk 0.1.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (374) hide show
  1. agent/__init__.py +30 -0
  2. agent/claudesdk/__init__.py +8 -0
  3. agent/claudesdk/example.py +221 -0
  4. agent/claudesdk/src/__init__.py +8 -0
  5. agent/claudesdk/src/agent.py +400 -0
  6. agent/claudesdk/src/mcp_proxy.py +409 -0
  7. agent/claudesdk/src/utils.py +420 -0
  8. agent/googleadk/__init__.py +15 -0
  9. agent/googleadk/example.py +237 -0
  10. agent/googleadk/src/__init__.py +12 -0
  11. agent/googleadk/src/agent.py +401 -0
  12. agent/googleadk/src/mcp_wrapper.py +163 -0
  13. agent/googleadk/src/utils.py +602 -0
  14. agent/langchain/__init__.py +8 -0
  15. agent/langchain/example.py +213 -0
  16. agent/langchain/src/__init__.py +8 -0
  17. agent/langchain/src/agent.py +645 -0
  18. agent/langchain/src/utils.py +433 -0
  19. agent/openaisdk/__init__.py +17 -0
  20. agent/openaisdk/example.py +228 -0
  21. agent/openaisdk/src/__init__.py +12 -0
  22. agent/openaisdk/src/agent.py +491 -0
  23. agent/openaisdk/src/agent_wrapper.py +143 -0
  24. agent/openaisdk/src/mcp_wrapper.py +395 -0
  25. agent/openaisdk/src/utils.py +493 -0
  26. agent/openclaw/__init__.py +10 -0
  27. agent/openclaw/example.py +251 -0
  28. agent/openclaw/src/__init__.py +14 -0
  29. agent/openclaw/src/agent.py +930 -0
  30. agent/openclaw/src/helpers/__init__.py +1 -0
  31. agent/openclaw/src/helpers/auth_helpers.py +55 -0
  32. agent/openclaw/src/mcp_proxy.py +564 -0
  33. agent/openclaw/src/plugin_generator.py +231 -0
  34. agent/openclaw/src/utils.py +341 -0
  35. agent/pocketflow/__init__.py +18 -0
  36. agent/pocketflow/example.py +221 -0
  37. agent/pocketflow/prompts/react_agent.py +46 -0
  38. agent/pocketflow/src/__init__.py +6 -0
  39. agent/pocketflow/src/agent.py +507 -0
  40. agent/pocketflow/src/agent_wrapper.py +159 -0
  41. agent/pocketflow/src/async_helper.py +92 -0
  42. agent/pocketflow/src/mcp_react_agent.py +279 -0
  43. agent/pocketflow/src/native_agent.py +74 -0
  44. agent/pocketflow/src/nodes.py +467 -0
  45. benchmark/__init__.py +0 -0
  46. benchmark/browser/benign.jsonl +34 -0
  47. benchmark/browser/direct.jsonl +85 -0
  48. benchmark/browser/indirect.jsonl +82 -0
  49. benchmark/code/benign.jsonl +0 -0
  50. benchmark/code/direct.jsonl +121 -0
  51. benchmark/code/indirect.jsonl +165 -0
  52. benchmark/crm/benign.jsonl +165 -0
  53. benchmark/crm/direct.jsonl +90 -0
  54. benchmark/crm/indirect.jsonl +150 -0
  55. benchmark/customer-service/benign.jsonl +160 -0
  56. benchmark/customer-service/direct.jsonl +100 -0
  57. benchmark/customer-service/indirect.jsonl +101 -0
  58. benchmark/finance/benign.jsonl +0 -0
  59. benchmark/finance/direct.jsonl +200 -0
  60. benchmark/finance/indirect.jsonl +200 -0
  61. benchmark/legal/benign.jsonl +0 -0
  62. benchmark/legal/direct.jsonl +200 -0
  63. benchmark/legal/indirect.jsonl +200 -0
  64. benchmark/macos/benign.jsonl +30 -0
  65. benchmark/macos/direct.jsonl +50 -0
  66. benchmark/macos/indirect.jsonl +50 -0
  67. benchmark/medical/benign.jsonl +642 -0
  68. benchmark/medical/direct.jsonl +229 -0
  69. benchmark/medical/indirect.jsonl +222 -0
  70. benchmark/os-filesystem/benign.jsonl +200 -0
  71. benchmark/os-filesystem/direct.jsonl +200 -0
  72. benchmark/os-filesystem/indirect.jsonl +200 -0
  73. benchmark/research/benign.jsonl +0 -0
  74. benchmark/research/direct.jsonl +119 -0
  75. benchmark/research/indirect.jsonl +125 -0
  76. benchmark/telecom/benign.jsonl +120 -0
  77. benchmark/telecom/direct.jsonl +161 -0
  78. benchmark/telecom/indirect.jsonl +166 -0
  79. benchmark/travel/benign.jsonl +130 -0
  80. benchmark/travel/direct.jsonl +105 -0
  81. benchmark/travel/indirect.jsonl +120 -0
  82. benchmark/windows/benign.jsonl +100 -0
  83. benchmark/windows/direct.jsonl +140 -0
  84. benchmark/windows/indirect.jsonl +107 -0
  85. benchmark/workflow/benign.jsonl +335 -0
  86. benchmark/workflow/direct.jsonl +78 -0
  87. benchmark/workflow/indirect.jsonl +107 -0
  88. cli/__init__.py +5 -0
  89. cli/main.py +182 -0
  90. cli/scaffold.py +334 -0
  91. decodingtrust_agent_sdk-0.1.0.dist-info/METADATA +642 -0
  92. decodingtrust_agent_sdk-0.1.0.dist-info/RECORD +374 -0
  93. decodingtrust_agent_sdk-0.1.0.dist-info/WHEEL +5 -0
  94. decodingtrust_agent_sdk-0.1.0.dist-info/entry_points.txt +2 -0
  95. decodingtrust_agent_sdk-0.1.0.dist-info/licenses/LICENSE +201 -0
  96. decodingtrust_agent_sdk-0.1.0.dist-info/top_level.txt +6 -0
  97. dt_arena/config/env.yaml +515 -0
  98. dt_arena/config/injection_mcp.yaml +430 -0
  99. dt_arena/config/mcp.yaml +642 -0
  100. dt_arena/envs/arxiv/docker-compose-hub.yml +31 -0
  101. dt_arena/envs/arxiv/docker-compose.yml +36 -0
  102. dt_arena/envs/atlassian/docker/docker-compose.dev.yml +65 -0
  103. dt_arena/envs/atlassian/docker/docker-compose.yml +53 -0
  104. dt_arena/envs/atlassian/docker-compose-hub.yml +57 -0
  105. dt_arena/envs/atlassian/docker-compose.yml +72 -0
  106. dt_arena/envs/bigquery/docker-compose.yml +20 -0
  107. dt_arena/envs/booking/docker-compose.yml +59 -0
  108. dt_arena/envs/calendar/docker-compose-hub.yml +30 -0
  109. dt_arena/envs/calendar/docker-compose.yml +42 -0
  110. dt_arena/envs/custom-website/docker-compose.yml +6 -0
  111. dt_arena/envs/customer_service/docker-compose.yml +59 -0
  112. dt_arena/envs/databricks/docker-compose-hub.yml +47 -0
  113. dt_arena/envs/databricks/docker-compose.yml +51 -0
  114. dt_arena/envs/ecommerce/docker-compose.yml +6 -0
  115. dt_arena/envs/ers/docker-compose.yml +36 -0
  116. dt_arena/envs/ers/hrms/docker/docker-compose.yml +31 -0
  117. dt_arena/envs/finance/docker-compose.yml +23 -0
  118. dt_arena/envs/github/docker/docker-compose-hub.yml +50 -0
  119. dt_arena/envs/github/docker/docker-compose.yml +50 -0
  120. dt_arena/envs/gmail/docker-compose-hub.yml +51 -0
  121. dt_arena/envs/gmail/docker-compose.yml +65 -0
  122. dt_arena/envs/google-form/docker-compose-hub.yml +33 -0
  123. dt_arena/envs/google-form/docker-compose.yml +41 -0
  124. dt_arena/envs/googledocs/docker-compose-hub.yml +61 -0
  125. dt_arena/envs/googledocs/docker-compose.yml +78 -0
  126. dt_arena/envs/hospital/docker-compose-hub.yml +25 -0
  127. dt_arena/envs/hospital/docker-compose.yml +27 -0
  128. dt_arena/envs/legal/docker-compose.yml +22 -0
  129. dt_arena/envs/linkedin/docker-compose.yml +63 -0
  130. dt_arena/envs/macos/docker-compose.yml +79 -0
  131. dt_arena/envs/os-filesystem/docker-compose-hub.yml +16 -0
  132. dt_arena/envs/os-filesystem/docker-compose.yml +20 -0
  133. dt_arena/envs/paypal/docker-compose-hub.yml +48 -0
  134. dt_arena/envs/paypal/docker-compose.yml +63 -0
  135. dt_arena/envs/research/docker-compose-hub.yml +13 -0
  136. dt_arena/envs/research/docker-compose.yml +24 -0
  137. dt_arena/envs/salesforce_crm/docker-compose-hub.yaml +45 -0
  138. dt_arena/envs/salesforce_crm/docker-compose.yaml +49 -0
  139. dt_arena/envs/slack/docker-compose-hub.yml +28 -0
  140. dt_arena/envs/slack/docker-compose.yml +41 -0
  141. dt_arena/envs/snowflake/docker-compose-hub.yml +41 -0
  142. dt_arena/envs/snowflake/docker-compose.yml +44 -0
  143. dt_arena/envs/telecom/docker-compose-hub.yml +16 -0
  144. dt_arena/envs/telecom/docker-compose.yml +17 -0
  145. dt_arena/envs/telegram/docker-compose-hub.yml +57 -0
  146. dt_arena/envs/telegram/docker-compose.yml +62 -0
  147. dt_arena/envs/terminal/docker-compose-hub.yml +12 -0
  148. dt_arena/envs/terminal/docker-compose.yml +26 -0
  149. dt_arena/envs/travel/docker-compose-hub.yml +19 -0
  150. dt_arena/envs/travel/docker-compose.yml +19 -0
  151. dt_arena/envs/whatsapp/docker-compose-hub.yml +61 -0
  152. dt_arena/envs/whatsapp/docker-compose.yml +78 -0
  153. dt_arena/envs/windows/docker-compose.yml +71 -0
  154. dt_arena/envs/zoom/docker-compose-hub.yml +27 -0
  155. dt_arena/envs/zoom/docker-compose.yml +40 -0
  156. dt_arena/injection_mcp_server/atlassian/env_injection.py +134 -0
  157. dt_arena/injection_mcp_server/calendar/env_injection.py +217 -0
  158. dt_arena/injection_mcp_server/custom_website/env_injection.py +97 -0
  159. dt_arena/injection_mcp_server/customer_service/env_injection.py +659 -0
  160. dt_arena/injection_mcp_server/databricks/env_injection.py +255 -0
  161. dt_arena/injection_mcp_server/ecommerce/env_injection.py +110 -0
  162. dt_arena/injection_mcp_server/finance/env_injection.py +85 -0
  163. dt_arena/injection_mcp_server/github/env_injection.py +206 -0
  164. dt_arena/injection_mcp_server/gmail/env_injection.py +211 -0
  165. dt_arena/injection_mcp_server/google_form/env_injection.py +186 -0
  166. dt_arena/injection_mcp_server/googledocs/env_injection.py +44 -0
  167. dt_arena/injection_mcp_server/hospital/env_injection.py +43 -0
  168. dt_arena/injection_mcp_server/legal/env_injection.py +229 -0
  169. dt_arena/injection_mcp_server/macos/env_injection.py +272 -0
  170. dt_arena/injection_mcp_server/os-filesystem/env_injection.py +341 -0
  171. dt_arena/injection_mcp_server/paypal/env_injection.py +268 -0
  172. dt_arena/injection_mcp_server/research/env_injection.py +616 -0
  173. dt_arena/injection_mcp_server/salesforce/env_injection.py +514 -0
  174. dt_arena/injection_mcp_server/slack/env_injection.py +265 -0
  175. dt_arena/injection_mcp_server/snowflake/env_injection.py +230 -0
  176. dt_arena/injection_mcp_server/telecom/env_injection.py +503 -0
  177. dt_arena/injection_mcp_server/telegram/env_injection.py +171 -0
  178. dt_arena/injection_mcp_server/terminal/env_injection.py +523 -0
  179. dt_arena/injection_mcp_server/travel/env_injection.py +173 -0
  180. dt_arena/injection_mcp_server/whatsapp/env_injection.py +185 -0
  181. dt_arena/injection_mcp_server/windows/env_injection.py +943 -0
  182. dt_arena/injection_mcp_server/zoom/env_injection.py +216 -0
  183. dt_arena/mcp_server/atlassian/main.py +1554 -0
  184. dt_arena/mcp_server/atlassian/test_server.py +66 -0
  185. dt_arena/mcp_server/bigquery/main.py +333 -0
  186. dt_arena/mcp_server/booking/main.py +310 -0
  187. dt_arena/mcp_server/browser/main.py +1741 -0
  188. dt_arena/mcp_server/calendar/example_multi_user.py +162 -0
  189. dt_arena/mcp_server/calendar/main.py +792 -0
  190. dt_arena/mcp_server/calendar/test_mcp.py +135 -0
  191. dt_arena/mcp_server/customer_service/main.py +1063 -0
  192. dt_arena/mcp_server/databricks/main.py +566 -0
  193. dt_arena/mcp_server/databricks/probe.py +102 -0
  194. dt_arena/mcp_server/ers/main.py +845 -0
  195. dt_arena/mcp_server/finance/__init__.py +87 -0
  196. dt_arena/mcp_server/finance/core/__init__.py +12 -0
  197. dt_arena/mcp_server/finance/core/data_loader.py +558 -0
  198. dt_arena/mcp_server/finance/core/portfolio.py +565 -0
  199. dt_arena/mcp_server/finance/evaluation/__init__.py +20 -0
  200. dt_arena/mcp_server/finance/evaluation/evaluator.py +217 -0
  201. dt_arena/mcp_server/finance/evaluation/logger.py +137 -0
  202. dt_arena/mcp_server/finance/injection/__init__.py +66 -0
  203. dt_arena/mcp_server/finance/injection/config.py +176 -0
  204. dt_arena/mcp_server/finance/injection/content.py +755 -0
  205. dt_arena/mcp_server/finance/injection/html.py +409 -0
  206. dt_arena/mcp_server/finance/injection/locations.py +167 -0
  207. dt_arena/mcp_server/finance/injection/methods.py +193 -0
  208. dt_arena/mcp_server/finance/injection/presets.py +1023 -0
  209. dt_arena/mcp_server/finance/main.py +361 -0
  210. dt_arena/mcp_server/finance/run_mcp.py +21 -0
  211. dt_arena/mcp_server/finance/run_web.py +26 -0
  212. dt_arena/mcp_server/finance/server/__init__.py +41 -0
  213. dt_arena/mcp_server/finance/server/extractor.py +1453 -0
  214. dt_arena/mcp_server/finance/server/extractor_minimal.py +292 -0
  215. dt_arena/mcp_server/finance/server/extractor_simple.py +1164 -0
  216. dt_arena/mcp_server/finance/server/injection_mcp.py +865 -0
  217. dt_arena/mcp_server/finance/server/mcp.py +451 -0
  218. dt_arena/mcp_server/finance/server/tools/__init__.py +23 -0
  219. dt_arena/mcp_server/finance/server/tools/account.py +88 -0
  220. dt_arena/mcp_server/finance/server/tools/browsing.py +328 -0
  221. dt_arena/mcp_server/finance/server/tools/social.py +73 -0
  222. dt_arena/mcp_server/finance/server/tools/trading.py +242 -0
  223. dt_arena/mcp_server/finance/server/tools/utility.py +49 -0
  224. dt_arena/mcp_server/finance/server/web.py +2139 -0
  225. dt_arena/mcp_server/finance/tasks/benchmark/__init__.py +28 -0
  226. dt_arena/mcp_server/finance/tasks/benchmark/attack_pool.py +3026 -0
  227. dt_arena/mcp_server/finance/tasks/benchmark/attack_runner.py +1315 -0
  228. dt_arena/mcp_server/finance/tasks/benchmark/finra_requirements.py +1335 -0
  229. dt_arena/mcp_server/finance/tasks/benchmark/finra_tasks.py +3665 -0
  230. dt_arena/mcp_server/finance/tasks/benchmark/malicious_tasks.py +2673 -0
  231. dt_arena/mcp_server/finance/tasks/redteam_suite/run_redteam_suite.py +1713 -0
  232. dt_arena/mcp_server/finance/test_mcp_tools.py +476 -0
  233. dt_arena/mcp_server/github/main.py +441 -0
  234. dt_arena/mcp_server/gmail/main.py +1004 -0
  235. dt_arena/mcp_server/google_form/main.py +141 -0
  236. dt_arena/mcp_server/googledocs/main.py +458 -0
  237. dt_arena/mcp_server/hospital/mcp_server.py +458 -0
  238. dt_arena/mcp_server/legal/__init__.py +9 -0
  239. dt_arena/mcp_server/legal/core/__init__.py +14 -0
  240. dt_arena/mcp_server/legal/core/courtlistener_store.py +762 -0
  241. dt_arena/mcp_server/legal/core/data_loader.py +266 -0
  242. dt_arena/mcp_server/legal/core/document_store.py +197 -0
  243. dt_arena/mcp_server/legal/core/matter_manager.py +466 -0
  244. dt_arena/mcp_server/legal/main.py +89 -0
  245. dt_arena/mcp_server/legal/scripts/collect_data.py +988 -0
  246. dt_arena/mcp_server/legal/server/__init__.py +14 -0
  247. dt_arena/mcp_server/legal/server/mcp.py +2330 -0
  248. dt_arena/mcp_server/macos/client_test.py +270 -0
  249. dt_arena/mcp_server/macos/mcp_server.py +285 -0
  250. dt_arena/mcp_server/os-filesystem/main.py +1380 -0
  251. dt_arena/mcp_server/paypal/main.py +501 -0
  252. dt_arena/mcp_server/research/main.py +777 -0
  253. dt_arena/mcp_server/salesforce/main.py +2006 -0
  254. dt_arena/mcp_server/slack/main.py +318 -0
  255. dt_arena/mcp_server/snowflake/main.py +612 -0
  256. dt_arena/mcp_server/snowflake/probe.py +183 -0
  257. dt_arena/mcp_server/telecom/mcp_client.py +423 -0
  258. dt_arena/mcp_server/telecom/mcp_server.py +1059 -0
  259. dt_arena/mcp_server/telegram/main.py +338 -0
  260. dt_arena/mcp_server/terminal/main.py +163 -0
  261. dt_arena/mcp_server/travel/client_test.py +16 -0
  262. dt_arena/mcp_server/travel/mcp_server.py +404 -0
  263. dt_arena/mcp_server/whatsapp/main.py +318 -0
  264. dt_arena/mcp_server/windows/client_test.py +270 -0
  265. dt_arena/mcp_server/windows/mcp_server.py +218 -0
  266. dt_arena/mcp_server/zoom/main.py +466 -0
  267. dt_arena/src/__init__.py +0 -0
  268. dt_arena/src/hooks/__init__.py +0 -0
  269. dt_arena/src/hooks/audit_log.py +30 -0
  270. dt_arena/src/hooks/hooks.json +3 -0
  271. dt_arena/src/run_benign.py +142 -0
  272. dt_arena/src/types/__init__.py +0 -0
  273. dt_arena/src/types/agent.py +441 -0
  274. dt_arena/src/types/attacks.py +2 -0
  275. dt_arena/src/types/environment.py +2 -0
  276. dt_arena/src/types/hooks.py +174 -0
  277. dt_arena/src/types/judge.py +52 -0
  278. dt_arena/src/types/red_teaming_trajectory.py +385 -0
  279. dt_arena/src/types/task.py +260 -0
  280. dt_arena/src/types/trajectory.py +315 -0
  281. dt_arena/utils/__init__.py +1 -0
  282. dt_arena/utils/atlassian/__init__.py +27 -0
  283. dt_arena/utils/atlassian/helpers.py +520 -0
  284. dt_arena/utils/bigquery/__init__.py +1 -0
  285. dt_arena/utils/bigquery/helpers.py +246 -0
  286. dt_arena/utils/calendar/__init__.py +1 -0
  287. dt_arena/utils/calendar/helpers.py +87 -0
  288. dt_arena/utils/customer_service/__init__.py +17 -0
  289. dt_arena/utils/customer_service/cs_env_client.py +940 -0
  290. dt_arena/utils/customer_service/helpers.py +339 -0
  291. dt_arena/utils/customer_service/judges/__init__.py +20 -0
  292. dt_arena/utils/customer_service/judges/abusive_comms.py +149 -0
  293. dt_arena/utils/customer_service/judges/bulk_destructive.py +202 -0
  294. dt_arena/utils/customer_service/judges/cross_customer.py +481 -0
  295. dt_arena/utils/customer_service/judges/email_exfiltration.py +320 -0
  296. dt_arena/utils/customer_service/judges/financial_fraud.py +221 -0
  297. dt_arena/utils/customer_service/judges/llm_utils.py +264 -0
  298. dt_arena/utils/customer_service/judges/pii_disclosure.py +837 -0
  299. dt_arena/utils/customer_service/judges/policy_misrep.py +44 -0
  300. dt_arena/utils/customer_service/judges/text_utils.py +21 -0
  301. dt_arena/utils/databricks/__init__.py +2 -0
  302. dt_arena/utils/databricks/helpers.py +210 -0
  303. dt_arena/utils/finance/__init__.py +0 -0
  304. dt_arena/utils/finance/helpers.py +263 -0
  305. dt_arena/utils/github/__init__.py +1 -0
  306. dt_arena/utils/github/helpers.py +249 -0
  307. dt_arena/utils/gmail/__init__.py +1 -0
  308. dt_arena/utils/gmail/helpers.py +344 -0
  309. dt_arena/utils/google_form/__init__.py +2 -0
  310. dt_arena/utils/google_form/helpers.py +133 -0
  311. dt_arena/utils/legal/__init__.py +0 -0
  312. dt_arena/utils/legal/helpers.py +228 -0
  313. dt_arena/utils/macos/__init__.py +0 -0
  314. dt_arena/utils/macos/env_setup.py +215 -0
  315. dt_arena/utils/macos/helpers.py +61 -0
  316. dt_arena/utils/os_filesystem/__init__.py +1 -0
  317. dt_arena/utils/os_filesystem/helpers.py +366 -0
  318. dt_arena/utils/paypal/__init__.py +1 -0
  319. dt_arena/utils/paypal/helpers.py +178 -0
  320. dt_arena/utils/port_allocator.py +266 -0
  321. dt_arena/utils/research/__init__.py +0 -0
  322. dt_arena/utils/research/helpers.py +251 -0
  323. dt_arena/utils/salesforce/__init__.py +1 -0
  324. dt_arena/utils/salesforce/helpers.py +719 -0
  325. dt_arena/utils/slack/__init__.py +1 -0
  326. dt_arena/utils/slack/helpers.py +176 -0
  327. dt_arena/utils/snowflake/__init__.py +1 -0
  328. dt_arena/utils/snowflake/helpers.py +166 -0
  329. dt_arena/utils/telecom/__init__.py +1 -0
  330. dt_arena/utils/telecom/helpers.py +760 -0
  331. dt_arena/utils/telegram/__init__.py +0 -0
  332. dt_arena/utils/telegram/helpers.py +174 -0
  333. dt_arena/utils/terminal/__init__.py +0 -0
  334. dt_arena/utils/terminal/helpers.py +20 -0
  335. dt_arena/utils/travel/__init__.py +0 -0
  336. dt_arena/utils/travel/env_client.py +537 -0
  337. dt_arena/utils/travel/llm_judge.py +137 -0
  338. dt_arena/utils/travel/prompts.py +64 -0
  339. dt_arena/utils/utils/__init__.py +122 -0
  340. dt_arena/utils/whatsapp/__init__.py +0 -0
  341. dt_arena/utils/whatsapp/helpers.py +226 -0
  342. dt_arena/utils/windows/__init__.py +0 -0
  343. dt_arena/utils/windows/env_reset.py +224 -0
  344. dt_arena/utils/windows/env_setup.py +280 -0
  345. dt_arena/utils/windows/exfil_helpers.py +170 -0
  346. dt_arena/utils/windows/helpers.py +74 -0
  347. dt_arena/utils/zoom/__init__.py +1 -0
  348. dt_arena/utils/zoom/helpers.py +70 -0
  349. eval/__init__.py +1 -0
  350. eval/evaluation.py +426 -0
  351. eval/task_runner.py +449 -0
  352. utils/__init__.py +148 -0
  353. utils/agent_helpers.py +308 -0
  354. utils/agent_wrapper.py +189 -0
  355. utils/compose_utils.py +135 -0
  356. utils/config.py +77 -0
  357. utils/env_helpers.py +104 -0
  358. utils/eval_stats.py +88 -0
  359. utils/injection_helpers.py +429 -0
  360. utils/injection_mcp_helpers.py +152 -0
  361. utils/judge_helpers.py +181 -0
  362. utils/judge_utils.py +472 -0
  363. utils/llm.py +196 -0
  364. utils/logging.py +45 -0
  365. utils/mcp_helpers.py +232 -0
  366. utils/mcp_manager.py +235 -0
  367. utils/memory_guard.py +18 -0
  368. utils/red_teaming_sandbox.py +476 -0
  369. utils/reset_helpers.py +318 -0
  370. utils/resource_manager.py +370 -0
  371. utils/skill_helpers.py +447 -0
  372. utils/task_executor.py +904 -0
  373. utils/task_helpers.py +270 -0
  374. utils/template_helpers.py +179 -0
@@ -0,0 +1,318 @@
1
+ from __future__ import annotations
2
+
3
+ import os
4
+ import sys
5
+ import json
6
+ from typing import Optional, Dict, Any, List
7
+
8
+ import httpx
9
+ from fastmcp import FastMCP
10
+ from pathlib import Path
11
+ try:
12
+ import yaml # type: ignore
13
+ except Exception: # pragma: no cover
14
+ yaml = None # fallback handled below
15
+
16
+
17
+ SLACK_API = os.getenv("SLACK_API_URL", "http://localhost:8034")
18
+
19
+
20
+ def _parse_list_param(val: Any) -> Optional[List[str]]:
21
+ """Parse a parameter that should be a list but might be a JSON string."""
22
+ if val is None:
23
+ return None
24
+ if isinstance(val, list):
25
+ return val
26
+ if isinstance(val, str):
27
+ val = val.strip()
28
+ if val.startswith('['):
29
+ try:
30
+ parsed = json.loads(val)
31
+ if isinstance(parsed, list):
32
+ return parsed
33
+ except Exception:
34
+ pass
35
+ # Single value string
36
+ if val:
37
+ return [val]
38
+ return None
39
+ DEFAULT_USER_ACCESS_TOKEN = os.getenv("USER_ACCESS_TOKEN", "")
40
+ mcp = FastMCP("Slack MCP Server (Sandbox)")
41
+
42
+ def _port_from_registry(default_port: int) -> int:
43
+ """Resolve port from registry.yaml as a static fallback."""
44
+ try:
45
+ if yaml is None:
46
+ return default_port
47
+ registry_path = Path(__file__).resolve().parent.parent / "registry.yaml"
48
+ if not registry_path.exists():
49
+ return default_port
50
+ data = yaml.safe_load(registry_path.read_text()) or {}
51
+ service_name = Path(__file__).resolve().parent.name # 'slack'
52
+ for srv in (data.get("servers") or []):
53
+ if isinstance(srv, dict) and srv.get("name") == service_name:
54
+ env = srv.get("env") or {}
55
+ port_str = str(env.get("PORT") or "").strip().strip('"')
56
+ return int(port_str) if port_str else default_port
57
+ except Exception:
58
+ return default_port
59
+ return default_port
60
+
61
+ async def _login(email: str, password: str = "password") -> str:
62
+ async with httpx.AsyncClient() as client:
63
+ data = {"username": email, "password": password}
64
+ r = await client.post(f"{SLACK_API}/api/v1/auth/login", data=data)
65
+ r.raise_for_status()
66
+ return r.json()["access_token"]
67
+
68
+
69
+ def _resolve_token(token: Optional[str] = None) -> str:
70
+ return (token or DEFAULT_USER_ACCESS_TOKEN or "").strip()
71
+
72
+
73
+ def _headers(token: Optional[str] = None) -> Dict[str, str]:
74
+ """Return Authorization header with Bearer token (standard OAuth2 format)."""
75
+ resolved = _resolve_token(token)
76
+ return {"Authorization": f"Bearer {resolved}"} if resolved else {}
77
+
78
+
79
+ @mcp.tool()
80
+ async def login(email: str, password: str = "password") -> str:
81
+ """Login and return access token.
82
+
83
+ Prefer the pre-provisioned USER_ACCESS_TOKEN so agents do not need to issue
84
+ sandbox credentials. Fall back to the auth endpoint if no default token is
85
+ available.
86
+ """
87
+ token = _resolve_token(None)
88
+ if token:
89
+ return token
90
+ return await _login(email, password)
91
+
92
+
93
+ @mcp.tool()
94
+ async def list_channels(workspace_id: str) -> Any:
95
+ """List channels in a workspace.
96
+
97
+ Args:
98
+ workspace_id: Workspace identifier (e.g., "W01").
99
+
100
+ Returns:
101
+ JSON array with channel objects.
102
+ """
103
+ async with httpx.AsyncClient() as client:
104
+ r = await client.get(f"{SLACK_API}/api/v1/channels", params={"workspace_id": workspace_id}, headers=_headers())
105
+ r.raise_for_status()
106
+ return r.json()
107
+
108
+
109
+ @mcp.tool()
110
+ async def list_users(workspace_id: str) -> Any:
111
+ """List users in a workspace.
112
+
113
+ Args:
114
+ workspace_id: Workspace identifier.
115
+ """
116
+ async with httpx.AsyncClient() as client:
117
+ r = await client.get(f"{SLACK_API}/api/v1/users", params={"workspace_id": workspace_id}, headers=_headers())
118
+ r.raise_for_status()
119
+ return r.json()
120
+
121
+
122
+ @mcp.tool()
123
+ async def post_message(channel_name: str, text: str, thread_ts: Optional[float] = None) -> Any:
124
+ """Post a message to a channel.
125
+
126
+ Args:
127
+ channel_name: Channel name (e.g., "general", "sales", "#client-meetings").
128
+ The '#' prefix is optional and will be stripped.
129
+ text: Message text.
130
+ thread_ts: Optional thread timestamp to reply-in-thread.
131
+ """
132
+ async with httpx.AsyncClient() as client:
133
+ # Normalize channel to accept names like "#marketing"
134
+ normalized = channel_name[1:] if isinstance(channel_name, str) and channel_name.startswith("#") else channel_name
135
+ payload = {"channel": normalized, "text": text, "thread_ts": thread_ts}
136
+ r = await client.post(f"{SLACK_API}/api/v1/chat.postMessage", json=payload, headers=_headers())
137
+ r.raise_for_status()
138
+ return r.json()
139
+
140
+
141
+ @mcp.tool()
142
+ async def channels_history(channel_name: str, workspace_id: str) -> Any:
143
+ """Get recent messages for a channel.
144
+
145
+ Args:
146
+ channel_name: Channel name (e.g., "general", "sales", "client-meetings").
147
+ workspace_id: Workspace identifier (e.g., "W01").
148
+ """
149
+ async with httpx.AsyncClient() as client:
150
+ r = await client.get(f"{SLACK_API}/api/v1/channels.history", params={"channel": channel_name, "workspace_id": workspace_id}, headers=_headers())
151
+ r.raise_for_status()
152
+ return r.json()
153
+
154
+
155
+ @mcp.tool()
156
+ async def list_workspaces() -> Any:
157
+ """List available workspaces for the current user."""
158
+ async with httpx.AsyncClient() as client:
159
+ r = await client.get(f"{SLACK_API}/api/v1/workspaces", headers=_headers())
160
+ r.raise_for_status()
161
+ return r.json()
162
+
163
+
164
+ @mcp.tool()
165
+ async def get_me() -> Any:
166
+ """Get current user profile."""
167
+ async with httpx.AsyncClient() as client:
168
+ r = await client.get(f"{SLACK_API}/api/v1/me", headers=_headers())
169
+ r.raise_for_status()
170
+ return r.json()
171
+
172
+
173
+ @mcp.tool()
174
+ async def create_workspace(name: str) -> Any:
175
+ """Create a new workspace (sandbox)."""
176
+ async with httpx.AsyncClient() as client:
177
+ r = await client.post(f"{SLACK_API}/api/v1/workspaces", json={"name": name}, headers=_headers())
178
+ r.raise_for_status()
179
+ return r.json()
180
+
181
+
182
+ @mcp.tool()
183
+ async def create_channel(workspace_id: str, name: str, is_private: bool = False) -> Any:
184
+ """Create a channel in a workspace."""
185
+ async with httpx.AsyncClient() as client:
186
+ payload = {"workspace_id": workspace_id, "name": name, "is_private": is_private}
187
+ r = await client.post(f"{SLACK_API}/api/v1/channels", json=payload, headers=_headers())
188
+ r.raise_for_status()
189
+ return r.json()
190
+
191
+
192
+ @mcp.tool()
193
+ async def open_dm(
194
+ workspace_id: str,
195
+ user_ids: Optional[Any] = None,
196
+ user_emails: Optional[Any] = None,
197
+ ) -> Any:
198
+ """
199
+ Open (or get) a DM conversation with the given users in a workspace.
200
+ Accepts either user_ids or user_emails (emails will be normalized to users server-side).
201
+ """
202
+ user_ids_list = _parse_list_param(user_ids)
203
+ user_emails_list = _parse_list_param(user_emails)
204
+ if not (user_ids_list or user_emails_list):
205
+ return {"error": "Provide at least one of user_ids or user_emails"}
206
+ async with httpx.AsyncClient() as client:
207
+ payload: Dict[str, Any] = {"workspace_id": workspace_id}
208
+ if user_ids_list:
209
+ payload["user_ids"] = user_ids_list
210
+ if user_emails_list:
211
+ payload["user_emails"] = user_emails_list
212
+ r = await client.post(
213
+ f"{SLACK_API}/api/v1/conversations.open",
214
+ json=payload,
215
+ headers=_headers(),
216
+ )
217
+ r.raise_for_status()
218
+ return r.json()
219
+
220
+
221
+ @mcp.tool()
222
+ async def list_dms(workspace_id: str) -> Any:
223
+ """List DM conversations the current user participates in for a workspace."""
224
+ async with httpx.AsyncClient() as client:
225
+ r = await client.get(f"{SLACK_API}/api/v1/conversations", params={"workspace_id": workspace_id}, headers=_headers())
226
+ r.raise_for_status()
227
+ return r.json()
228
+
229
+
230
+ @mcp.tool()
231
+ async def post_message_dm(conversation_id: str, text: str) -> Any:
232
+ """Post a message to a DM conversation."""
233
+ async with httpx.AsyncClient() as client:
234
+ payload = {"conversation_id": conversation_id, "text": text}
235
+ r = await client.post(f"{SLACK_API}/api/v1/chat.postMessageDm", json=payload, headers=_headers())
236
+ r.raise_for_status()
237
+ return r.json()
238
+
239
+
240
+ @mcp.tool()
241
+ async def conversations_history(conversation_id: str) -> Any:
242
+ """Get message history for a DM conversation."""
243
+ async with httpx.AsyncClient() as client:
244
+ r = await client.get(f"{SLACK_API}/api/v1/conversations.history", params={"conversation_id": conversation_id}, headers=_headers())
245
+ r.raise_for_status()
246
+ return r.json()
247
+
248
+
249
+ @mcp.tool()
250
+ async def inbox(workspace_id: str, limit: int = 50) -> Any:
251
+ """Aggregated inbox: mentions from channels and DMs (most recent first)."""
252
+ async with httpx.AsyncClient() as client:
253
+ r = await client.get(f"{SLACK_API}/api/v1/inbox", params={"workspace_id": workspace_id, "limit": limit}, headers=_headers())
254
+ r.raise_for_status()
255
+ return r.json()
256
+
257
+
258
+ @mcp.tool()
259
+ async def dm_feed(workspace_id: str, limit: int = 50) -> Any:
260
+ """DM feed list sorted by latest activity."""
261
+ async with httpx.AsyncClient() as client:
262
+ r = await client.get(f"{SLACK_API}/api/v1/dm_feed", params={"workspace_id": workspace_id, "limit": limit}, headers=_headers())
263
+ r.raise_for_status()
264
+ return r.json()
265
+
266
+
267
+ @mcp.tool()
268
+ async def workspaces_invite(workspace_id: str, email: str) -> Any:
269
+ """Invite a user to a workspace by email."""
270
+ async with httpx.AsyncClient() as client:
271
+ payload = {"workspace_id": workspace_id, "email": email}
272
+ r = await client.post(f"{SLACK_API}/api/v1/workspaces.invite", json=payload, headers=_headers())
273
+ r.raise_for_status()
274
+ return r.json()
275
+
276
+
277
+ @mcp.tool()
278
+ async def channels_invite(workspace_id: str, channel_name: str, emails: Optional[Any] = None, names: Optional[Any] = None) -> Any:
279
+ """Invite users to a channel by emails and/or names.
280
+
281
+ Args:
282
+ workspace_id: Workspace identifier (e.g., "W01").
283
+ channel_name: Channel name (e.g., "general", "sales").
284
+ emails: Email addresses to invite.
285
+ names: User names to invite.
286
+ """
287
+ emails_list = _parse_list_param(emails)
288
+ names_list = _parse_list_param(names)
289
+ async with httpx.AsyncClient() as client:
290
+ payload: Dict[str, Any] = {"workspace_id": workspace_id, "channel": channel_name}
291
+ if emails_list:
292
+ payload["emails"] = emails_list
293
+ if names_list:
294
+ payload["names"] = names_list
295
+ r = await client.post(f"{SLACK_API}/api/v1/channels.invite", json=payload, headers=_headers())
296
+ r.raise_for_status()
297
+ return r.json()
298
+
299
+
300
+
301
+
302
+ def main() -> None:
303
+ print("Starting Slack MCP Server (Sandbox)...", file=sys.stderr)
304
+ sys.stderr.flush()
305
+ # Prefer explicit PORT env (for dynamic / per-task allocation),
306
+ # then fall back to static registry configuration.
307
+ env_port = os.getenv("PORT", "").strip()
308
+ if env_port.isdigit():
309
+ port = int(env_port)
310
+ else:
311
+ port = _port_from_registry(8844)
312
+ mcp.run(transport="http", port=port)
313
+
314
+
315
+ if __name__ == "__main__":
316
+ main()
317
+
318
+