decodingtrust-agent-sdk 0.1.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (374) hide show
  1. agent/__init__.py +30 -0
  2. agent/claudesdk/__init__.py +8 -0
  3. agent/claudesdk/example.py +221 -0
  4. agent/claudesdk/src/__init__.py +8 -0
  5. agent/claudesdk/src/agent.py +400 -0
  6. agent/claudesdk/src/mcp_proxy.py +409 -0
  7. agent/claudesdk/src/utils.py +420 -0
  8. agent/googleadk/__init__.py +15 -0
  9. agent/googleadk/example.py +237 -0
  10. agent/googleadk/src/__init__.py +12 -0
  11. agent/googleadk/src/agent.py +401 -0
  12. agent/googleadk/src/mcp_wrapper.py +163 -0
  13. agent/googleadk/src/utils.py +602 -0
  14. agent/langchain/__init__.py +8 -0
  15. agent/langchain/example.py +213 -0
  16. agent/langchain/src/__init__.py +8 -0
  17. agent/langchain/src/agent.py +645 -0
  18. agent/langchain/src/utils.py +433 -0
  19. agent/openaisdk/__init__.py +17 -0
  20. agent/openaisdk/example.py +228 -0
  21. agent/openaisdk/src/__init__.py +12 -0
  22. agent/openaisdk/src/agent.py +491 -0
  23. agent/openaisdk/src/agent_wrapper.py +143 -0
  24. agent/openaisdk/src/mcp_wrapper.py +395 -0
  25. agent/openaisdk/src/utils.py +493 -0
  26. agent/openclaw/__init__.py +10 -0
  27. agent/openclaw/example.py +251 -0
  28. agent/openclaw/src/__init__.py +14 -0
  29. agent/openclaw/src/agent.py +930 -0
  30. agent/openclaw/src/helpers/__init__.py +1 -0
  31. agent/openclaw/src/helpers/auth_helpers.py +55 -0
  32. agent/openclaw/src/mcp_proxy.py +564 -0
  33. agent/openclaw/src/plugin_generator.py +231 -0
  34. agent/openclaw/src/utils.py +341 -0
  35. agent/pocketflow/__init__.py +18 -0
  36. agent/pocketflow/example.py +221 -0
  37. agent/pocketflow/prompts/react_agent.py +46 -0
  38. agent/pocketflow/src/__init__.py +6 -0
  39. agent/pocketflow/src/agent.py +507 -0
  40. agent/pocketflow/src/agent_wrapper.py +159 -0
  41. agent/pocketflow/src/async_helper.py +92 -0
  42. agent/pocketflow/src/mcp_react_agent.py +279 -0
  43. agent/pocketflow/src/native_agent.py +74 -0
  44. agent/pocketflow/src/nodes.py +467 -0
  45. benchmark/__init__.py +0 -0
  46. benchmark/browser/benign.jsonl +34 -0
  47. benchmark/browser/direct.jsonl +85 -0
  48. benchmark/browser/indirect.jsonl +82 -0
  49. benchmark/code/benign.jsonl +0 -0
  50. benchmark/code/direct.jsonl +121 -0
  51. benchmark/code/indirect.jsonl +165 -0
  52. benchmark/crm/benign.jsonl +165 -0
  53. benchmark/crm/direct.jsonl +90 -0
  54. benchmark/crm/indirect.jsonl +150 -0
  55. benchmark/customer-service/benign.jsonl +160 -0
  56. benchmark/customer-service/direct.jsonl +100 -0
  57. benchmark/customer-service/indirect.jsonl +101 -0
  58. benchmark/finance/benign.jsonl +0 -0
  59. benchmark/finance/direct.jsonl +200 -0
  60. benchmark/finance/indirect.jsonl +200 -0
  61. benchmark/legal/benign.jsonl +0 -0
  62. benchmark/legal/direct.jsonl +200 -0
  63. benchmark/legal/indirect.jsonl +200 -0
  64. benchmark/macos/benign.jsonl +30 -0
  65. benchmark/macos/direct.jsonl +50 -0
  66. benchmark/macos/indirect.jsonl +50 -0
  67. benchmark/medical/benign.jsonl +642 -0
  68. benchmark/medical/direct.jsonl +229 -0
  69. benchmark/medical/indirect.jsonl +222 -0
  70. benchmark/os-filesystem/benign.jsonl +200 -0
  71. benchmark/os-filesystem/direct.jsonl +200 -0
  72. benchmark/os-filesystem/indirect.jsonl +200 -0
  73. benchmark/research/benign.jsonl +0 -0
  74. benchmark/research/direct.jsonl +119 -0
  75. benchmark/research/indirect.jsonl +125 -0
  76. benchmark/telecom/benign.jsonl +120 -0
  77. benchmark/telecom/direct.jsonl +161 -0
  78. benchmark/telecom/indirect.jsonl +166 -0
  79. benchmark/travel/benign.jsonl +130 -0
  80. benchmark/travel/direct.jsonl +105 -0
  81. benchmark/travel/indirect.jsonl +120 -0
  82. benchmark/windows/benign.jsonl +100 -0
  83. benchmark/windows/direct.jsonl +140 -0
  84. benchmark/windows/indirect.jsonl +107 -0
  85. benchmark/workflow/benign.jsonl +335 -0
  86. benchmark/workflow/direct.jsonl +78 -0
  87. benchmark/workflow/indirect.jsonl +107 -0
  88. cli/__init__.py +5 -0
  89. cli/main.py +182 -0
  90. cli/scaffold.py +334 -0
  91. decodingtrust_agent_sdk-0.1.0.dist-info/METADATA +642 -0
  92. decodingtrust_agent_sdk-0.1.0.dist-info/RECORD +374 -0
  93. decodingtrust_agent_sdk-0.1.0.dist-info/WHEEL +5 -0
  94. decodingtrust_agent_sdk-0.1.0.dist-info/entry_points.txt +2 -0
  95. decodingtrust_agent_sdk-0.1.0.dist-info/licenses/LICENSE +201 -0
  96. decodingtrust_agent_sdk-0.1.0.dist-info/top_level.txt +6 -0
  97. dt_arena/config/env.yaml +515 -0
  98. dt_arena/config/injection_mcp.yaml +430 -0
  99. dt_arena/config/mcp.yaml +642 -0
  100. dt_arena/envs/arxiv/docker-compose-hub.yml +31 -0
  101. dt_arena/envs/arxiv/docker-compose.yml +36 -0
  102. dt_arena/envs/atlassian/docker/docker-compose.dev.yml +65 -0
  103. dt_arena/envs/atlassian/docker/docker-compose.yml +53 -0
  104. dt_arena/envs/atlassian/docker-compose-hub.yml +57 -0
  105. dt_arena/envs/atlassian/docker-compose.yml +72 -0
  106. dt_arena/envs/bigquery/docker-compose.yml +20 -0
  107. dt_arena/envs/booking/docker-compose.yml +59 -0
  108. dt_arena/envs/calendar/docker-compose-hub.yml +30 -0
  109. dt_arena/envs/calendar/docker-compose.yml +42 -0
  110. dt_arena/envs/custom-website/docker-compose.yml +6 -0
  111. dt_arena/envs/customer_service/docker-compose.yml +59 -0
  112. dt_arena/envs/databricks/docker-compose-hub.yml +47 -0
  113. dt_arena/envs/databricks/docker-compose.yml +51 -0
  114. dt_arena/envs/ecommerce/docker-compose.yml +6 -0
  115. dt_arena/envs/ers/docker-compose.yml +36 -0
  116. dt_arena/envs/ers/hrms/docker/docker-compose.yml +31 -0
  117. dt_arena/envs/finance/docker-compose.yml +23 -0
  118. dt_arena/envs/github/docker/docker-compose-hub.yml +50 -0
  119. dt_arena/envs/github/docker/docker-compose.yml +50 -0
  120. dt_arena/envs/gmail/docker-compose-hub.yml +51 -0
  121. dt_arena/envs/gmail/docker-compose.yml +65 -0
  122. dt_arena/envs/google-form/docker-compose-hub.yml +33 -0
  123. dt_arena/envs/google-form/docker-compose.yml +41 -0
  124. dt_arena/envs/googledocs/docker-compose-hub.yml +61 -0
  125. dt_arena/envs/googledocs/docker-compose.yml +78 -0
  126. dt_arena/envs/hospital/docker-compose-hub.yml +25 -0
  127. dt_arena/envs/hospital/docker-compose.yml +27 -0
  128. dt_arena/envs/legal/docker-compose.yml +22 -0
  129. dt_arena/envs/linkedin/docker-compose.yml +63 -0
  130. dt_arena/envs/macos/docker-compose.yml +79 -0
  131. dt_arena/envs/os-filesystem/docker-compose-hub.yml +16 -0
  132. dt_arena/envs/os-filesystem/docker-compose.yml +20 -0
  133. dt_arena/envs/paypal/docker-compose-hub.yml +48 -0
  134. dt_arena/envs/paypal/docker-compose.yml +63 -0
  135. dt_arena/envs/research/docker-compose-hub.yml +13 -0
  136. dt_arena/envs/research/docker-compose.yml +24 -0
  137. dt_arena/envs/salesforce_crm/docker-compose-hub.yaml +45 -0
  138. dt_arena/envs/salesforce_crm/docker-compose.yaml +49 -0
  139. dt_arena/envs/slack/docker-compose-hub.yml +28 -0
  140. dt_arena/envs/slack/docker-compose.yml +41 -0
  141. dt_arena/envs/snowflake/docker-compose-hub.yml +41 -0
  142. dt_arena/envs/snowflake/docker-compose.yml +44 -0
  143. dt_arena/envs/telecom/docker-compose-hub.yml +16 -0
  144. dt_arena/envs/telecom/docker-compose.yml +17 -0
  145. dt_arena/envs/telegram/docker-compose-hub.yml +57 -0
  146. dt_arena/envs/telegram/docker-compose.yml +62 -0
  147. dt_arena/envs/terminal/docker-compose-hub.yml +12 -0
  148. dt_arena/envs/terminal/docker-compose.yml +26 -0
  149. dt_arena/envs/travel/docker-compose-hub.yml +19 -0
  150. dt_arena/envs/travel/docker-compose.yml +19 -0
  151. dt_arena/envs/whatsapp/docker-compose-hub.yml +61 -0
  152. dt_arena/envs/whatsapp/docker-compose.yml +78 -0
  153. dt_arena/envs/windows/docker-compose.yml +71 -0
  154. dt_arena/envs/zoom/docker-compose-hub.yml +27 -0
  155. dt_arena/envs/zoom/docker-compose.yml +40 -0
  156. dt_arena/injection_mcp_server/atlassian/env_injection.py +134 -0
  157. dt_arena/injection_mcp_server/calendar/env_injection.py +217 -0
  158. dt_arena/injection_mcp_server/custom_website/env_injection.py +97 -0
  159. dt_arena/injection_mcp_server/customer_service/env_injection.py +659 -0
  160. dt_arena/injection_mcp_server/databricks/env_injection.py +255 -0
  161. dt_arena/injection_mcp_server/ecommerce/env_injection.py +110 -0
  162. dt_arena/injection_mcp_server/finance/env_injection.py +85 -0
  163. dt_arena/injection_mcp_server/github/env_injection.py +206 -0
  164. dt_arena/injection_mcp_server/gmail/env_injection.py +211 -0
  165. dt_arena/injection_mcp_server/google_form/env_injection.py +186 -0
  166. dt_arena/injection_mcp_server/googledocs/env_injection.py +44 -0
  167. dt_arena/injection_mcp_server/hospital/env_injection.py +43 -0
  168. dt_arena/injection_mcp_server/legal/env_injection.py +229 -0
  169. dt_arena/injection_mcp_server/macos/env_injection.py +272 -0
  170. dt_arena/injection_mcp_server/os-filesystem/env_injection.py +341 -0
  171. dt_arena/injection_mcp_server/paypal/env_injection.py +268 -0
  172. dt_arena/injection_mcp_server/research/env_injection.py +616 -0
  173. dt_arena/injection_mcp_server/salesforce/env_injection.py +514 -0
  174. dt_arena/injection_mcp_server/slack/env_injection.py +265 -0
  175. dt_arena/injection_mcp_server/snowflake/env_injection.py +230 -0
  176. dt_arena/injection_mcp_server/telecom/env_injection.py +503 -0
  177. dt_arena/injection_mcp_server/telegram/env_injection.py +171 -0
  178. dt_arena/injection_mcp_server/terminal/env_injection.py +523 -0
  179. dt_arena/injection_mcp_server/travel/env_injection.py +173 -0
  180. dt_arena/injection_mcp_server/whatsapp/env_injection.py +185 -0
  181. dt_arena/injection_mcp_server/windows/env_injection.py +943 -0
  182. dt_arena/injection_mcp_server/zoom/env_injection.py +216 -0
  183. dt_arena/mcp_server/atlassian/main.py +1554 -0
  184. dt_arena/mcp_server/atlassian/test_server.py +66 -0
  185. dt_arena/mcp_server/bigquery/main.py +333 -0
  186. dt_arena/mcp_server/booking/main.py +310 -0
  187. dt_arena/mcp_server/browser/main.py +1741 -0
  188. dt_arena/mcp_server/calendar/example_multi_user.py +162 -0
  189. dt_arena/mcp_server/calendar/main.py +792 -0
  190. dt_arena/mcp_server/calendar/test_mcp.py +135 -0
  191. dt_arena/mcp_server/customer_service/main.py +1063 -0
  192. dt_arena/mcp_server/databricks/main.py +566 -0
  193. dt_arena/mcp_server/databricks/probe.py +102 -0
  194. dt_arena/mcp_server/ers/main.py +845 -0
  195. dt_arena/mcp_server/finance/__init__.py +87 -0
  196. dt_arena/mcp_server/finance/core/__init__.py +12 -0
  197. dt_arena/mcp_server/finance/core/data_loader.py +558 -0
  198. dt_arena/mcp_server/finance/core/portfolio.py +565 -0
  199. dt_arena/mcp_server/finance/evaluation/__init__.py +20 -0
  200. dt_arena/mcp_server/finance/evaluation/evaluator.py +217 -0
  201. dt_arena/mcp_server/finance/evaluation/logger.py +137 -0
  202. dt_arena/mcp_server/finance/injection/__init__.py +66 -0
  203. dt_arena/mcp_server/finance/injection/config.py +176 -0
  204. dt_arena/mcp_server/finance/injection/content.py +755 -0
  205. dt_arena/mcp_server/finance/injection/html.py +409 -0
  206. dt_arena/mcp_server/finance/injection/locations.py +167 -0
  207. dt_arena/mcp_server/finance/injection/methods.py +193 -0
  208. dt_arena/mcp_server/finance/injection/presets.py +1023 -0
  209. dt_arena/mcp_server/finance/main.py +361 -0
  210. dt_arena/mcp_server/finance/run_mcp.py +21 -0
  211. dt_arena/mcp_server/finance/run_web.py +26 -0
  212. dt_arena/mcp_server/finance/server/__init__.py +41 -0
  213. dt_arena/mcp_server/finance/server/extractor.py +1453 -0
  214. dt_arena/mcp_server/finance/server/extractor_minimal.py +292 -0
  215. dt_arena/mcp_server/finance/server/extractor_simple.py +1164 -0
  216. dt_arena/mcp_server/finance/server/injection_mcp.py +865 -0
  217. dt_arena/mcp_server/finance/server/mcp.py +451 -0
  218. dt_arena/mcp_server/finance/server/tools/__init__.py +23 -0
  219. dt_arena/mcp_server/finance/server/tools/account.py +88 -0
  220. dt_arena/mcp_server/finance/server/tools/browsing.py +328 -0
  221. dt_arena/mcp_server/finance/server/tools/social.py +73 -0
  222. dt_arena/mcp_server/finance/server/tools/trading.py +242 -0
  223. dt_arena/mcp_server/finance/server/tools/utility.py +49 -0
  224. dt_arena/mcp_server/finance/server/web.py +2139 -0
  225. dt_arena/mcp_server/finance/tasks/benchmark/__init__.py +28 -0
  226. dt_arena/mcp_server/finance/tasks/benchmark/attack_pool.py +3026 -0
  227. dt_arena/mcp_server/finance/tasks/benchmark/attack_runner.py +1315 -0
  228. dt_arena/mcp_server/finance/tasks/benchmark/finra_requirements.py +1335 -0
  229. dt_arena/mcp_server/finance/tasks/benchmark/finra_tasks.py +3665 -0
  230. dt_arena/mcp_server/finance/tasks/benchmark/malicious_tasks.py +2673 -0
  231. dt_arena/mcp_server/finance/tasks/redteam_suite/run_redteam_suite.py +1713 -0
  232. dt_arena/mcp_server/finance/test_mcp_tools.py +476 -0
  233. dt_arena/mcp_server/github/main.py +441 -0
  234. dt_arena/mcp_server/gmail/main.py +1004 -0
  235. dt_arena/mcp_server/google_form/main.py +141 -0
  236. dt_arena/mcp_server/googledocs/main.py +458 -0
  237. dt_arena/mcp_server/hospital/mcp_server.py +458 -0
  238. dt_arena/mcp_server/legal/__init__.py +9 -0
  239. dt_arena/mcp_server/legal/core/__init__.py +14 -0
  240. dt_arena/mcp_server/legal/core/courtlistener_store.py +762 -0
  241. dt_arena/mcp_server/legal/core/data_loader.py +266 -0
  242. dt_arena/mcp_server/legal/core/document_store.py +197 -0
  243. dt_arena/mcp_server/legal/core/matter_manager.py +466 -0
  244. dt_arena/mcp_server/legal/main.py +89 -0
  245. dt_arena/mcp_server/legal/scripts/collect_data.py +988 -0
  246. dt_arena/mcp_server/legal/server/__init__.py +14 -0
  247. dt_arena/mcp_server/legal/server/mcp.py +2330 -0
  248. dt_arena/mcp_server/macos/client_test.py +270 -0
  249. dt_arena/mcp_server/macos/mcp_server.py +285 -0
  250. dt_arena/mcp_server/os-filesystem/main.py +1380 -0
  251. dt_arena/mcp_server/paypal/main.py +501 -0
  252. dt_arena/mcp_server/research/main.py +777 -0
  253. dt_arena/mcp_server/salesforce/main.py +2006 -0
  254. dt_arena/mcp_server/slack/main.py +318 -0
  255. dt_arena/mcp_server/snowflake/main.py +612 -0
  256. dt_arena/mcp_server/snowflake/probe.py +183 -0
  257. dt_arena/mcp_server/telecom/mcp_client.py +423 -0
  258. dt_arena/mcp_server/telecom/mcp_server.py +1059 -0
  259. dt_arena/mcp_server/telegram/main.py +338 -0
  260. dt_arena/mcp_server/terminal/main.py +163 -0
  261. dt_arena/mcp_server/travel/client_test.py +16 -0
  262. dt_arena/mcp_server/travel/mcp_server.py +404 -0
  263. dt_arena/mcp_server/whatsapp/main.py +318 -0
  264. dt_arena/mcp_server/windows/client_test.py +270 -0
  265. dt_arena/mcp_server/windows/mcp_server.py +218 -0
  266. dt_arena/mcp_server/zoom/main.py +466 -0
  267. dt_arena/src/__init__.py +0 -0
  268. dt_arena/src/hooks/__init__.py +0 -0
  269. dt_arena/src/hooks/audit_log.py +30 -0
  270. dt_arena/src/hooks/hooks.json +3 -0
  271. dt_arena/src/run_benign.py +142 -0
  272. dt_arena/src/types/__init__.py +0 -0
  273. dt_arena/src/types/agent.py +441 -0
  274. dt_arena/src/types/attacks.py +2 -0
  275. dt_arena/src/types/environment.py +2 -0
  276. dt_arena/src/types/hooks.py +174 -0
  277. dt_arena/src/types/judge.py +52 -0
  278. dt_arena/src/types/red_teaming_trajectory.py +385 -0
  279. dt_arena/src/types/task.py +260 -0
  280. dt_arena/src/types/trajectory.py +315 -0
  281. dt_arena/utils/__init__.py +1 -0
  282. dt_arena/utils/atlassian/__init__.py +27 -0
  283. dt_arena/utils/atlassian/helpers.py +520 -0
  284. dt_arena/utils/bigquery/__init__.py +1 -0
  285. dt_arena/utils/bigquery/helpers.py +246 -0
  286. dt_arena/utils/calendar/__init__.py +1 -0
  287. dt_arena/utils/calendar/helpers.py +87 -0
  288. dt_arena/utils/customer_service/__init__.py +17 -0
  289. dt_arena/utils/customer_service/cs_env_client.py +940 -0
  290. dt_arena/utils/customer_service/helpers.py +339 -0
  291. dt_arena/utils/customer_service/judges/__init__.py +20 -0
  292. dt_arena/utils/customer_service/judges/abusive_comms.py +149 -0
  293. dt_arena/utils/customer_service/judges/bulk_destructive.py +202 -0
  294. dt_arena/utils/customer_service/judges/cross_customer.py +481 -0
  295. dt_arena/utils/customer_service/judges/email_exfiltration.py +320 -0
  296. dt_arena/utils/customer_service/judges/financial_fraud.py +221 -0
  297. dt_arena/utils/customer_service/judges/llm_utils.py +264 -0
  298. dt_arena/utils/customer_service/judges/pii_disclosure.py +837 -0
  299. dt_arena/utils/customer_service/judges/policy_misrep.py +44 -0
  300. dt_arena/utils/customer_service/judges/text_utils.py +21 -0
  301. dt_arena/utils/databricks/__init__.py +2 -0
  302. dt_arena/utils/databricks/helpers.py +210 -0
  303. dt_arena/utils/finance/__init__.py +0 -0
  304. dt_arena/utils/finance/helpers.py +263 -0
  305. dt_arena/utils/github/__init__.py +1 -0
  306. dt_arena/utils/github/helpers.py +249 -0
  307. dt_arena/utils/gmail/__init__.py +1 -0
  308. dt_arena/utils/gmail/helpers.py +344 -0
  309. dt_arena/utils/google_form/__init__.py +2 -0
  310. dt_arena/utils/google_form/helpers.py +133 -0
  311. dt_arena/utils/legal/__init__.py +0 -0
  312. dt_arena/utils/legal/helpers.py +228 -0
  313. dt_arena/utils/macos/__init__.py +0 -0
  314. dt_arena/utils/macos/env_setup.py +215 -0
  315. dt_arena/utils/macos/helpers.py +61 -0
  316. dt_arena/utils/os_filesystem/__init__.py +1 -0
  317. dt_arena/utils/os_filesystem/helpers.py +366 -0
  318. dt_arena/utils/paypal/__init__.py +1 -0
  319. dt_arena/utils/paypal/helpers.py +178 -0
  320. dt_arena/utils/port_allocator.py +266 -0
  321. dt_arena/utils/research/__init__.py +0 -0
  322. dt_arena/utils/research/helpers.py +251 -0
  323. dt_arena/utils/salesforce/__init__.py +1 -0
  324. dt_arena/utils/salesforce/helpers.py +719 -0
  325. dt_arena/utils/slack/__init__.py +1 -0
  326. dt_arena/utils/slack/helpers.py +176 -0
  327. dt_arena/utils/snowflake/__init__.py +1 -0
  328. dt_arena/utils/snowflake/helpers.py +166 -0
  329. dt_arena/utils/telecom/__init__.py +1 -0
  330. dt_arena/utils/telecom/helpers.py +760 -0
  331. dt_arena/utils/telegram/__init__.py +0 -0
  332. dt_arena/utils/telegram/helpers.py +174 -0
  333. dt_arena/utils/terminal/__init__.py +0 -0
  334. dt_arena/utils/terminal/helpers.py +20 -0
  335. dt_arena/utils/travel/__init__.py +0 -0
  336. dt_arena/utils/travel/env_client.py +537 -0
  337. dt_arena/utils/travel/llm_judge.py +137 -0
  338. dt_arena/utils/travel/prompts.py +64 -0
  339. dt_arena/utils/utils/__init__.py +122 -0
  340. dt_arena/utils/whatsapp/__init__.py +0 -0
  341. dt_arena/utils/whatsapp/helpers.py +226 -0
  342. dt_arena/utils/windows/__init__.py +0 -0
  343. dt_arena/utils/windows/env_reset.py +224 -0
  344. dt_arena/utils/windows/env_setup.py +280 -0
  345. dt_arena/utils/windows/exfil_helpers.py +170 -0
  346. dt_arena/utils/windows/helpers.py +74 -0
  347. dt_arena/utils/zoom/__init__.py +1 -0
  348. dt_arena/utils/zoom/helpers.py +70 -0
  349. eval/__init__.py +1 -0
  350. eval/evaluation.py +426 -0
  351. eval/task_runner.py +449 -0
  352. utils/__init__.py +148 -0
  353. utils/agent_helpers.py +308 -0
  354. utils/agent_wrapper.py +189 -0
  355. utils/compose_utils.py +135 -0
  356. utils/config.py +77 -0
  357. utils/env_helpers.py +104 -0
  358. utils/eval_stats.py +88 -0
  359. utils/injection_helpers.py +429 -0
  360. utils/injection_mcp_helpers.py +152 -0
  361. utils/judge_helpers.py +181 -0
  362. utils/judge_utils.py +472 -0
  363. utils/llm.py +196 -0
  364. utils/logging.py +45 -0
  365. utils/mcp_helpers.py +232 -0
  366. utils/mcp_manager.py +235 -0
  367. utils/memory_guard.py +18 -0
  368. utils/red_teaming_sandbox.py +476 -0
  369. utils/reset_helpers.py +318 -0
  370. utils/resource_manager.py +370 -0
  371. utils/skill_helpers.py +447 -0
  372. utils/task_executor.py +904 -0
  373. utils/task_helpers.py +270 -0
  374. utils/template_helpers.py +179 -0
@@ -0,0 +1,520 @@
1
+ """Helper functions for interacting with the Atlassian sandbox API."""
2
+ from __future__ import annotations
3
+
4
+ import os
5
+ import json
6
+ import re
7
+ from http.client import HTTPConnection, HTTPSConnection
8
+ from typing import Dict, Any, Optional, List
9
+ from urllib.parse import urlparse, urlencode
10
+ from pathlib import Path
11
+
12
+ import yaml
13
+
14
+
15
+ def _get_connection(base_url: str) -> HTTPConnection:
16
+ """Create an HTTP(S) connection from a base URL."""
17
+ parsed = urlparse(base_url)
18
+ host = parsed.hostname or "localhost"
19
+ port = parsed.port or (443 if parsed.scheme == "https" else 80)
20
+
21
+ if parsed.scheme == "https":
22
+ return HTTPSConnection(host, port)
23
+ return HTTPConnection(host, port)
24
+
25
+
26
+ def _resolve_base_url(explicit: Optional[str] = None) -> str:
27
+ """Resolve Atlassian API base URL with dynamic-port awareness."""
28
+ if explicit:
29
+ return explicit.rstrip("/")
30
+
31
+ env_url = (os.getenv("ATLASSIAN_API_URL") or "").strip()
32
+ if env_url:
33
+ return env_url.rstrip("/")
34
+
35
+ env_port = (os.getenv("ATLASSIAN_API_PORT") or "").strip()
36
+ if env_port:
37
+ return f"http://127.0.0.1:{env_port}"
38
+
39
+ # Fallback to registry static default.
40
+ try:
41
+ root = Path(__file__).resolve().parents[3]
42
+ registry_file = root / "dt_arena" / "envs" / "registry.yaml"
43
+ if registry_file.exists():
44
+ reg = yaml.safe_load(registry_file.read_text()) or {}
45
+ base = ((reg.get("services") or {}).get("atlassian") or {}).get("api_base_url")
46
+ if base:
47
+ return str(base).rstrip("/")
48
+ except Exception:
49
+ pass
50
+
51
+ return "http://localhost:8040"
52
+
53
+
54
+ class AtlassianClient:
55
+ """Simple HTTP client for Atlassian API."""
56
+
57
+ def __init__(self, base_url: Optional[str] = None, token: Optional[str] = None):
58
+ """Initialize the client.
59
+
60
+ Args:
61
+ base_url: API base URL (default: from ATLASSIAN_API_URL env)
62
+ token: JWT access token (default: from USER_ACCESS_TOKEN env)
63
+ """
64
+ self.base_url = _resolve_base_url(base_url)
65
+ self.token = token or os.getenv("USER_ACCESS_TOKEN", "")
66
+
67
+ def _headers(self) -> Dict[str, str]:
68
+ """Build request headers."""
69
+ headers = {
70
+ "Content-Type": "application/json",
71
+ "Accept": "application/json",
72
+ }
73
+ if self.token:
74
+ headers["Authorization"] = f"Bearer {self.token}"
75
+ return headers
76
+
77
+ def _request(
78
+ self,
79
+ method: str,
80
+ path: str,
81
+ body: Optional[Dict[str, Any]] = None,
82
+ params: Optional[Dict[str, Any]] = None,
83
+ ) -> Dict[str, Any]:
84
+ """Make an HTTP request.
85
+
86
+ Args:
87
+ method: HTTP method (GET, POST, PATCH, DELETE)
88
+ path: API path (e.g., /api/issues)
89
+ body: Request body (for POST/PATCH)
90
+ params: Query parameters
91
+
92
+ Returns:
93
+ Response as dict
94
+ """
95
+ conn = _get_connection(self.base_url)
96
+
97
+ url = path
98
+ if params:
99
+ url = f"{path}?{urlencode(params)}"
100
+
101
+ try:
102
+ conn.request(
103
+ method,
104
+ url,
105
+ body=json.dumps(body) if body else None,
106
+ headers=self._headers()
107
+ )
108
+ resp = conn.getresponse()
109
+ data = resp.read().decode("utf-8")
110
+
111
+ if resp.status >= 400:
112
+ return {"error": True, "status": resp.status, "message": data}
113
+
114
+ return json.loads(data) if data else {}
115
+ finally:
116
+ conn.close()
117
+
118
+ def login(self, email: str, password: str = "password") -> str:
119
+ """Login and get JWT token.
120
+
121
+ Args:
122
+ email: User email
123
+ password: User password
124
+
125
+ Returns:
126
+ JWT token
127
+ """
128
+ result = self._request("POST", "/api/auth/login", {"email": email, "password": password})
129
+ if "token" in result:
130
+ self.token = result["token"]
131
+ return result.get("token", "")
132
+
133
+ def list_projects(self, search: Optional[str] = None, limit: int = 25, offset: int = 0) -> List[Dict[str, Any]]:
134
+ """List projects.
135
+
136
+ Args:
137
+ search: Optional search query
138
+ limit: Max results
139
+ offset: Pagination offset
140
+
141
+ Returns:
142
+ List of projects
143
+ """
144
+ params = {"limit": limit, "offset": offset}
145
+ if search:
146
+ params["search"] = search
147
+ result = self._request("GET", "/api/projects", params=params)
148
+ return result.get("items", [])
149
+
150
+ def get_project(self, project_id: str) -> Dict[str, Any]:
151
+ """Get project by ID.
152
+
153
+ Args:
154
+ project_id: Project UUID or key
155
+
156
+ Returns:
157
+ Project details
158
+ """
159
+ return self._request("GET", f"/api/projects/{project_id}")
160
+
161
+ def list_issues(
162
+ self,
163
+ project_id: Optional[str] = None,
164
+ limit: int = 25,
165
+ offset: int = 0,
166
+ ) -> List[Dict[str, Any]]:
167
+ """List issues.
168
+
169
+ Args:
170
+ project_id: Optional project filter
171
+ limit: Max results
172
+ offset: Pagination offset
173
+
174
+ Returns:
175
+ List of issues
176
+ """
177
+ params = {"limit": limit, "offset": offset}
178
+
179
+ def _looks_like_uuid(value: str) -> bool:
180
+ return bool(re.fullmatch(r"[0-9a-fA-F-]{36}", value or ""))
181
+
182
+ if project_id:
183
+ resolved_project_id = project_id
184
+ # Judge code often passes project key (e.g., "OPS"), but the backend
185
+ # issues/projects endpoint expects UUID. Resolve key -> id first.
186
+ if not _looks_like_uuid(project_id):
187
+ for proj in self.list_projects(limit=200, offset=0):
188
+ if (proj.get("key") or "").lower() == project_id.lower():
189
+ resolved_project_id = proj.get("id") or project_id
190
+ break
191
+
192
+ path = f"/api/issues/projects/{resolved_project_id}"
193
+ result = self._request("GET", path, params=params)
194
+ items = result.get("items", []) if isinstance(result, dict) else result
195
+
196
+ # Fallback to search endpoint when project route returns empty for key/id mismatch.
197
+ if (not items) and project_id:
198
+ search_params = {"projectId": resolved_project_id, "limit": limit, "offset": offset}
199
+ result = self._request("GET", "/api/issues/search", params=search_params)
200
+ items = result.get("items", []) if isinstance(result, dict) else result
201
+ # Last-resort fallback: some tasks/judges pass a project key that does not
202
+ # exist in the current seeded workspace. Return global issues instead of
203
+ # hard-empty to reduce false negatives in environment validation.
204
+ if not items:
205
+ result = self._request("GET", "/api/issues/search", params={"limit": limit, "offset": offset})
206
+ items = result.get("items", []) if isinstance(result, dict) else result
207
+ return self._normalize_issues(items)
208
+ else:
209
+ # Use search endpoint without query to list all issues
210
+ # (the /api/issues endpoint requires projectId)
211
+ path = "/api/issues/search"
212
+ result = self._request("GET", path, params=params)
213
+ items = result.get("items", []) if isinstance(result, dict) else result
214
+ return self._normalize_issues(items)
215
+
216
+ def get_issue(self, issue_id: str) -> Dict[str, Any]:
217
+ """Get issue by ID or key.
218
+
219
+ Args:
220
+ issue_id: Issue UUID or key (e.g., PROJ-123)
221
+
222
+ Returns:
223
+ Issue details
224
+ """
225
+ issue = self._request("GET", f"/api/issues/{issue_id}")
226
+ if isinstance(issue, dict):
227
+ normalized = self._normalize_issue(issue)
228
+ return normalized
229
+ return issue
230
+
231
+ @staticmethod
232
+ def _normalize_issue(issue: Dict[str, Any]) -> Dict[str, Any]:
233
+ """Normalize backend issue payload to include Jira-like summary fields."""
234
+ out = dict(issue)
235
+ summary = out.get("summary") or out.get("title") or ""
236
+ out["summary"] = summary
237
+ description = (
238
+ out.get("description")
239
+ or out.get("body")
240
+ or out.get("content")
241
+ or out.get("details")
242
+ or ""
243
+ )
244
+ if isinstance(description, dict):
245
+ try:
246
+ description = json.dumps(description, ensure_ascii=False)
247
+ except Exception:
248
+ description = str(description)
249
+ description = str(description)
250
+ if description:
251
+ out["description"] = description
252
+ fields = dict(out.get("fields") or {})
253
+ if not fields.get("summary"):
254
+ fields["summary"] = summary
255
+ if out.get("description") and not fields.get("description"):
256
+ fields["description"] = out.get("description")
257
+ out["fields"] = fields
258
+ return out
259
+
260
+ def _normalize_issues(self, items: Any) -> List[Dict[str, Any]]:
261
+ if not isinstance(items, list):
262
+ return []
263
+ normalized: List[Dict[str, Any]] = []
264
+ for issue in items:
265
+ if isinstance(issue, dict):
266
+ normalized.append(self._normalize_issue(issue))
267
+ return normalized
268
+
269
+ def create_issue(
270
+ self,
271
+ project_id: str,
272
+ title: str,
273
+ issue_type: str = "task",
274
+ description: Optional[str] = None,
275
+ priority: str = "medium",
276
+ assignee_id: Optional[str] = None,
277
+ labels: Optional[List[str]] = None,
278
+ ) -> Dict[str, Any]:
279
+ """Create a new issue.
280
+
281
+ Args:
282
+ project_id: Project UUID
283
+ title: Issue title
284
+ issue_type: Type (story, task, bug, epic)
285
+ description: Issue description
286
+ priority: Priority level
287
+ assignee_id: Assignee UUID
288
+ labels: List of labels
289
+
290
+ Returns:
291
+ Created issue
292
+ """
293
+ body = {
294
+ "projectId": project_id,
295
+ "title": title,
296
+ "type": issue_type,
297
+ "priority": priority,
298
+ }
299
+ if description:
300
+ body["description"] = description
301
+ if assignee_id:
302
+ body["assigneeId"] = assignee_id
303
+ if labels:
304
+ body["labels"] = labels
305
+
306
+ return self._request("POST", "/api/issues", body=body)
307
+
308
+ def update_issue(
309
+ self,
310
+ issue_id: str,
311
+ title: Optional[str] = None,
312
+ description: Optional[str] = None,
313
+ priority: Optional[str] = None,
314
+ labels: Optional[List[str]] = None,
315
+ ) -> Dict[str, Any]:
316
+ """Update an issue.
317
+
318
+ Args:
319
+ issue_id: Issue UUID or key
320
+ title: New title
321
+ description: New description
322
+ priority: New priority
323
+ labels: New labels
324
+
325
+ Returns:
326
+ Updated issue
327
+ """
328
+ body = {}
329
+ if title is not None:
330
+ body["title"] = title
331
+ if description is not None:
332
+ body["description"] = description
333
+ if priority is not None:
334
+ body["priority"] = priority
335
+ if labels is not None:
336
+ body["labels"] = labels
337
+
338
+ return self._request("PATCH", f"/api/issues/{issue_id}", body=body)
339
+
340
+ def add_comment(self, issue_id: str, body: str) -> Dict[str, Any]:
341
+ """Add a comment to an issue.
342
+
343
+ Args:
344
+ issue_id: Issue UUID or key
345
+ body: Comment text
346
+
347
+ Returns:
348
+ Created comment
349
+ """
350
+ return self._request("POST", f"/api/issues/{issue_id}/comments", body={"body": body})
351
+
352
+ def search_issues(
353
+ self,
354
+ query: str,
355
+ project_id: Optional[str] = None,
356
+ limit: int = 25,
357
+ offset: int = 0,
358
+ ) -> List[Dict[str, Any]]:
359
+ """Search issues.
360
+
361
+ Args:
362
+ query: Search query
363
+ project_id: Optional project filter
364
+ limit: Max results
365
+ offset: Pagination offset
366
+
367
+ Returns:
368
+ List of matching issues
369
+ """
370
+ params = {"q": query, "limit": limit, "offset": offset}
371
+ if project_id:
372
+ params["projectId"] = project_id
373
+ result = self._request("GET", "/api/issues/search", params=params)
374
+ return result.get("items", []) if isinstance(result, dict) else result
375
+
376
+
377
+ # Convenience functions that accept explicit credentials
378
+ def _get_client(base_url: Optional[str] = None, token: Optional[str] = None) -> AtlassianClient:
379
+ """Get a configured client.
380
+
381
+ Args:
382
+ base_url: API base URL (falls back to ATLASSIAN_API_URL env if not provided)
383
+ token: Access token (falls back to USER_ACCESS_TOKEN env if not provided)
384
+ """
385
+ return AtlassianClient(base_url=base_url, token=token)
386
+
387
+
388
+ def login(email: str, password: str = "password", base_url: Optional[str] = None) -> str:
389
+ """Login and return token."""
390
+ return _get_client(base_url=base_url).login(email, password)
391
+
392
+
393
+ def list_projects(
394
+ token: str,
395
+ base_url: Optional[str] = None,
396
+ search: Optional[str] = None,
397
+ limit: int = 25,
398
+ offset: int = 0,
399
+ ) -> List[Dict[str, Any]]:
400
+ """List projects."""
401
+ return _get_client(base_url=base_url, token=token).list_projects(search, limit, offset)
402
+
403
+
404
+ def get_project(project_id: str, token: str, base_url: Optional[str] = None) -> Dict[str, Any]:
405
+ """Get project by ID."""
406
+ return _get_client(base_url=base_url, token=token).get_project(project_id)
407
+
408
+
409
+ def list_issues(
410
+ token: str,
411
+ base_url: Optional[str] = None,
412
+ project_id: Optional[str] = None,
413
+ limit: int = 25,
414
+ offset: int = 0,
415
+ ) -> List[Dict[str, Any]]:
416
+ """List issues."""
417
+ return _get_client(base_url=base_url, token=token).list_issues(project_id, limit, offset)
418
+
419
+
420
+ def get_issue(issue_id: str, token: str, base_url: Optional[str] = None) -> Dict[str, Any]:
421
+ """Get issue by ID or key."""
422
+ return _get_client(base_url=base_url, token=token).get_issue(issue_id)
423
+
424
+
425
+ def create_issue(
426
+ project_id: str,
427
+ title: str,
428
+ token: str,
429
+ base_url: Optional[str] = None,
430
+ issue_type: str = "task",
431
+ description: Optional[str] = None,
432
+ priority: str = "medium",
433
+ assignee_id: Optional[str] = None,
434
+ labels: Optional[List[str]] = None,
435
+ ) -> Dict[str, Any]:
436
+ """Create a new issue."""
437
+ return _get_client(base_url=base_url, token=token).create_issue(
438
+ project_id, title, issue_type, description, priority, assignee_id, labels
439
+ )
440
+
441
+
442
+ def update_issue(
443
+ issue_id: str,
444
+ token: str,
445
+ base_url: Optional[str] = None,
446
+ title: Optional[str] = None,
447
+ description: Optional[str] = None,
448
+ priority: Optional[str] = None,
449
+ labels: Optional[List[str]] = None,
450
+ ) -> Dict[str, Any]:
451
+ """Update an issue."""
452
+ return _get_client(base_url=base_url, token=token).update_issue(issue_id, title, description, priority, labels)
453
+
454
+
455
+ def add_comment(issue_id: str, body: str, token: str, base_url: Optional[str] = None) -> Dict[str, Any]:
456
+ """Add a comment to an issue."""
457
+ return _get_client(base_url=base_url, token=token).add_comment(issue_id, body)
458
+
459
+
460
+ def search_issues(
461
+ query: str,
462
+ token: str,
463
+ base_url: Optional[str] = None,
464
+ project_id: Optional[str] = None,
465
+ limit: int = 25,
466
+ offset: int = 0,
467
+ ) -> List[Dict[str, Any]]:
468
+ """Search issues."""
469
+ return _get_client(base_url=base_url, token=token).search_issues(query, project_id, limit, offset)
470
+
471
+
472
+ def search_confluence(
473
+ token: str,
474
+ query: str,
475
+ base_url: Optional[str] = None,
476
+ limit: int = 25,
477
+ ) -> List[Dict[str, Any]]:
478
+ """Search Confluence-like pages in sandbox.
479
+
480
+ In this sandbox, Confluence pages are persisted through the same backend as
481
+ issue-like records. We expose a page-shaped helper so judges and workflows
482
+ can query consistently.
483
+ """
484
+ client = _get_client(base_url=base_url, token=token)
485
+ issues = client.search_issues(query=query, project_id=None, limit=limit, offset=0)
486
+ pages: List[Dict[str, Any]] = []
487
+ for issue in issues:
488
+ if not isinstance(issue, dict):
489
+ continue
490
+ title = str(issue.get("summary") or issue.get("title") or "")
491
+ if not title:
492
+ continue
493
+ pages.append(
494
+ {
495
+ "id": issue.get("id"),
496
+ "title": title,
497
+ "body": issue.get("description") or issue.get("fields", {}).get("description") or "",
498
+ "url": f"{client.base_url}/issues/{issue.get('id')}",
499
+ }
500
+ )
501
+ return pages
502
+
503
+
504
+ def list_confluence_pages(
505
+ token: str,
506
+ base_url: Optional[str] = None,
507
+ limit: int = 100,
508
+ ) -> List[Dict[str, Any]]:
509
+ """Backward-compatible alias used by older judges.
510
+
511
+ Some workflow judges import `list_confluence_pages`; expose it here so they
512
+ can evaluate page-like content without import errors.
513
+ """
514
+ return search_confluence(
515
+ token=token,
516
+ query="",
517
+ base_url=base_url,
518
+ limit=limit,
519
+ )
520
+
@@ -0,0 +1 @@
1
+ """BigQuery helpers package."""