decodingtrust-agent-sdk 0.1.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (374) hide show
  1. agent/__init__.py +30 -0
  2. agent/claudesdk/__init__.py +8 -0
  3. agent/claudesdk/example.py +221 -0
  4. agent/claudesdk/src/__init__.py +8 -0
  5. agent/claudesdk/src/agent.py +400 -0
  6. agent/claudesdk/src/mcp_proxy.py +409 -0
  7. agent/claudesdk/src/utils.py +420 -0
  8. agent/googleadk/__init__.py +15 -0
  9. agent/googleadk/example.py +237 -0
  10. agent/googleadk/src/__init__.py +12 -0
  11. agent/googleadk/src/agent.py +401 -0
  12. agent/googleadk/src/mcp_wrapper.py +163 -0
  13. agent/googleadk/src/utils.py +602 -0
  14. agent/langchain/__init__.py +8 -0
  15. agent/langchain/example.py +213 -0
  16. agent/langchain/src/__init__.py +8 -0
  17. agent/langchain/src/agent.py +645 -0
  18. agent/langchain/src/utils.py +433 -0
  19. agent/openaisdk/__init__.py +17 -0
  20. agent/openaisdk/example.py +228 -0
  21. agent/openaisdk/src/__init__.py +12 -0
  22. agent/openaisdk/src/agent.py +491 -0
  23. agent/openaisdk/src/agent_wrapper.py +143 -0
  24. agent/openaisdk/src/mcp_wrapper.py +395 -0
  25. agent/openaisdk/src/utils.py +493 -0
  26. agent/openclaw/__init__.py +10 -0
  27. agent/openclaw/example.py +251 -0
  28. agent/openclaw/src/__init__.py +14 -0
  29. agent/openclaw/src/agent.py +930 -0
  30. agent/openclaw/src/helpers/__init__.py +1 -0
  31. agent/openclaw/src/helpers/auth_helpers.py +55 -0
  32. agent/openclaw/src/mcp_proxy.py +564 -0
  33. agent/openclaw/src/plugin_generator.py +231 -0
  34. agent/openclaw/src/utils.py +341 -0
  35. agent/pocketflow/__init__.py +18 -0
  36. agent/pocketflow/example.py +221 -0
  37. agent/pocketflow/prompts/react_agent.py +46 -0
  38. agent/pocketflow/src/__init__.py +6 -0
  39. agent/pocketflow/src/agent.py +507 -0
  40. agent/pocketflow/src/agent_wrapper.py +159 -0
  41. agent/pocketflow/src/async_helper.py +92 -0
  42. agent/pocketflow/src/mcp_react_agent.py +279 -0
  43. agent/pocketflow/src/native_agent.py +74 -0
  44. agent/pocketflow/src/nodes.py +467 -0
  45. benchmark/__init__.py +0 -0
  46. benchmark/browser/benign.jsonl +34 -0
  47. benchmark/browser/direct.jsonl +85 -0
  48. benchmark/browser/indirect.jsonl +82 -0
  49. benchmark/code/benign.jsonl +0 -0
  50. benchmark/code/direct.jsonl +121 -0
  51. benchmark/code/indirect.jsonl +165 -0
  52. benchmark/crm/benign.jsonl +165 -0
  53. benchmark/crm/direct.jsonl +90 -0
  54. benchmark/crm/indirect.jsonl +150 -0
  55. benchmark/customer-service/benign.jsonl +160 -0
  56. benchmark/customer-service/direct.jsonl +100 -0
  57. benchmark/customer-service/indirect.jsonl +101 -0
  58. benchmark/finance/benign.jsonl +0 -0
  59. benchmark/finance/direct.jsonl +200 -0
  60. benchmark/finance/indirect.jsonl +200 -0
  61. benchmark/legal/benign.jsonl +0 -0
  62. benchmark/legal/direct.jsonl +200 -0
  63. benchmark/legal/indirect.jsonl +200 -0
  64. benchmark/macos/benign.jsonl +30 -0
  65. benchmark/macos/direct.jsonl +50 -0
  66. benchmark/macos/indirect.jsonl +50 -0
  67. benchmark/medical/benign.jsonl +642 -0
  68. benchmark/medical/direct.jsonl +229 -0
  69. benchmark/medical/indirect.jsonl +222 -0
  70. benchmark/os-filesystem/benign.jsonl +200 -0
  71. benchmark/os-filesystem/direct.jsonl +200 -0
  72. benchmark/os-filesystem/indirect.jsonl +200 -0
  73. benchmark/research/benign.jsonl +0 -0
  74. benchmark/research/direct.jsonl +119 -0
  75. benchmark/research/indirect.jsonl +125 -0
  76. benchmark/telecom/benign.jsonl +120 -0
  77. benchmark/telecom/direct.jsonl +161 -0
  78. benchmark/telecom/indirect.jsonl +166 -0
  79. benchmark/travel/benign.jsonl +130 -0
  80. benchmark/travel/direct.jsonl +105 -0
  81. benchmark/travel/indirect.jsonl +120 -0
  82. benchmark/windows/benign.jsonl +100 -0
  83. benchmark/windows/direct.jsonl +140 -0
  84. benchmark/windows/indirect.jsonl +107 -0
  85. benchmark/workflow/benign.jsonl +335 -0
  86. benchmark/workflow/direct.jsonl +78 -0
  87. benchmark/workflow/indirect.jsonl +107 -0
  88. cli/__init__.py +5 -0
  89. cli/main.py +182 -0
  90. cli/scaffold.py +334 -0
  91. decodingtrust_agent_sdk-0.1.0.dist-info/METADATA +642 -0
  92. decodingtrust_agent_sdk-0.1.0.dist-info/RECORD +374 -0
  93. decodingtrust_agent_sdk-0.1.0.dist-info/WHEEL +5 -0
  94. decodingtrust_agent_sdk-0.1.0.dist-info/entry_points.txt +2 -0
  95. decodingtrust_agent_sdk-0.1.0.dist-info/licenses/LICENSE +201 -0
  96. decodingtrust_agent_sdk-0.1.0.dist-info/top_level.txt +6 -0
  97. dt_arena/config/env.yaml +515 -0
  98. dt_arena/config/injection_mcp.yaml +430 -0
  99. dt_arena/config/mcp.yaml +642 -0
  100. dt_arena/envs/arxiv/docker-compose-hub.yml +31 -0
  101. dt_arena/envs/arxiv/docker-compose.yml +36 -0
  102. dt_arena/envs/atlassian/docker/docker-compose.dev.yml +65 -0
  103. dt_arena/envs/atlassian/docker/docker-compose.yml +53 -0
  104. dt_arena/envs/atlassian/docker-compose-hub.yml +57 -0
  105. dt_arena/envs/atlassian/docker-compose.yml +72 -0
  106. dt_arena/envs/bigquery/docker-compose.yml +20 -0
  107. dt_arena/envs/booking/docker-compose.yml +59 -0
  108. dt_arena/envs/calendar/docker-compose-hub.yml +30 -0
  109. dt_arena/envs/calendar/docker-compose.yml +42 -0
  110. dt_arena/envs/custom-website/docker-compose.yml +6 -0
  111. dt_arena/envs/customer_service/docker-compose.yml +59 -0
  112. dt_arena/envs/databricks/docker-compose-hub.yml +47 -0
  113. dt_arena/envs/databricks/docker-compose.yml +51 -0
  114. dt_arena/envs/ecommerce/docker-compose.yml +6 -0
  115. dt_arena/envs/ers/docker-compose.yml +36 -0
  116. dt_arena/envs/ers/hrms/docker/docker-compose.yml +31 -0
  117. dt_arena/envs/finance/docker-compose.yml +23 -0
  118. dt_arena/envs/github/docker/docker-compose-hub.yml +50 -0
  119. dt_arena/envs/github/docker/docker-compose.yml +50 -0
  120. dt_arena/envs/gmail/docker-compose-hub.yml +51 -0
  121. dt_arena/envs/gmail/docker-compose.yml +65 -0
  122. dt_arena/envs/google-form/docker-compose-hub.yml +33 -0
  123. dt_arena/envs/google-form/docker-compose.yml +41 -0
  124. dt_arena/envs/googledocs/docker-compose-hub.yml +61 -0
  125. dt_arena/envs/googledocs/docker-compose.yml +78 -0
  126. dt_arena/envs/hospital/docker-compose-hub.yml +25 -0
  127. dt_arena/envs/hospital/docker-compose.yml +27 -0
  128. dt_arena/envs/legal/docker-compose.yml +22 -0
  129. dt_arena/envs/linkedin/docker-compose.yml +63 -0
  130. dt_arena/envs/macos/docker-compose.yml +79 -0
  131. dt_arena/envs/os-filesystem/docker-compose-hub.yml +16 -0
  132. dt_arena/envs/os-filesystem/docker-compose.yml +20 -0
  133. dt_arena/envs/paypal/docker-compose-hub.yml +48 -0
  134. dt_arena/envs/paypal/docker-compose.yml +63 -0
  135. dt_arena/envs/research/docker-compose-hub.yml +13 -0
  136. dt_arena/envs/research/docker-compose.yml +24 -0
  137. dt_arena/envs/salesforce_crm/docker-compose-hub.yaml +45 -0
  138. dt_arena/envs/salesforce_crm/docker-compose.yaml +49 -0
  139. dt_arena/envs/slack/docker-compose-hub.yml +28 -0
  140. dt_arena/envs/slack/docker-compose.yml +41 -0
  141. dt_arena/envs/snowflake/docker-compose-hub.yml +41 -0
  142. dt_arena/envs/snowflake/docker-compose.yml +44 -0
  143. dt_arena/envs/telecom/docker-compose-hub.yml +16 -0
  144. dt_arena/envs/telecom/docker-compose.yml +17 -0
  145. dt_arena/envs/telegram/docker-compose-hub.yml +57 -0
  146. dt_arena/envs/telegram/docker-compose.yml +62 -0
  147. dt_arena/envs/terminal/docker-compose-hub.yml +12 -0
  148. dt_arena/envs/terminal/docker-compose.yml +26 -0
  149. dt_arena/envs/travel/docker-compose-hub.yml +19 -0
  150. dt_arena/envs/travel/docker-compose.yml +19 -0
  151. dt_arena/envs/whatsapp/docker-compose-hub.yml +61 -0
  152. dt_arena/envs/whatsapp/docker-compose.yml +78 -0
  153. dt_arena/envs/windows/docker-compose.yml +71 -0
  154. dt_arena/envs/zoom/docker-compose-hub.yml +27 -0
  155. dt_arena/envs/zoom/docker-compose.yml +40 -0
  156. dt_arena/injection_mcp_server/atlassian/env_injection.py +134 -0
  157. dt_arena/injection_mcp_server/calendar/env_injection.py +217 -0
  158. dt_arena/injection_mcp_server/custom_website/env_injection.py +97 -0
  159. dt_arena/injection_mcp_server/customer_service/env_injection.py +659 -0
  160. dt_arena/injection_mcp_server/databricks/env_injection.py +255 -0
  161. dt_arena/injection_mcp_server/ecommerce/env_injection.py +110 -0
  162. dt_arena/injection_mcp_server/finance/env_injection.py +85 -0
  163. dt_arena/injection_mcp_server/github/env_injection.py +206 -0
  164. dt_arena/injection_mcp_server/gmail/env_injection.py +211 -0
  165. dt_arena/injection_mcp_server/google_form/env_injection.py +186 -0
  166. dt_arena/injection_mcp_server/googledocs/env_injection.py +44 -0
  167. dt_arena/injection_mcp_server/hospital/env_injection.py +43 -0
  168. dt_arena/injection_mcp_server/legal/env_injection.py +229 -0
  169. dt_arena/injection_mcp_server/macos/env_injection.py +272 -0
  170. dt_arena/injection_mcp_server/os-filesystem/env_injection.py +341 -0
  171. dt_arena/injection_mcp_server/paypal/env_injection.py +268 -0
  172. dt_arena/injection_mcp_server/research/env_injection.py +616 -0
  173. dt_arena/injection_mcp_server/salesforce/env_injection.py +514 -0
  174. dt_arena/injection_mcp_server/slack/env_injection.py +265 -0
  175. dt_arena/injection_mcp_server/snowflake/env_injection.py +230 -0
  176. dt_arena/injection_mcp_server/telecom/env_injection.py +503 -0
  177. dt_arena/injection_mcp_server/telegram/env_injection.py +171 -0
  178. dt_arena/injection_mcp_server/terminal/env_injection.py +523 -0
  179. dt_arena/injection_mcp_server/travel/env_injection.py +173 -0
  180. dt_arena/injection_mcp_server/whatsapp/env_injection.py +185 -0
  181. dt_arena/injection_mcp_server/windows/env_injection.py +943 -0
  182. dt_arena/injection_mcp_server/zoom/env_injection.py +216 -0
  183. dt_arena/mcp_server/atlassian/main.py +1554 -0
  184. dt_arena/mcp_server/atlassian/test_server.py +66 -0
  185. dt_arena/mcp_server/bigquery/main.py +333 -0
  186. dt_arena/mcp_server/booking/main.py +310 -0
  187. dt_arena/mcp_server/browser/main.py +1741 -0
  188. dt_arena/mcp_server/calendar/example_multi_user.py +162 -0
  189. dt_arena/mcp_server/calendar/main.py +792 -0
  190. dt_arena/mcp_server/calendar/test_mcp.py +135 -0
  191. dt_arena/mcp_server/customer_service/main.py +1063 -0
  192. dt_arena/mcp_server/databricks/main.py +566 -0
  193. dt_arena/mcp_server/databricks/probe.py +102 -0
  194. dt_arena/mcp_server/ers/main.py +845 -0
  195. dt_arena/mcp_server/finance/__init__.py +87 -0
  196. dt_arena/mcp_server/finance/core/__init__.py +12 -0
  197. dt_arena/mcp_server/finance/core/data_loader.py +558 -0
  198. dt_arena/mcp_server/finance/core/portfolio.py +565 -0
  199. dt_arena/mcp_server/finance/evaluation/__init__.py +20 -0
  200. dt_arena/mcp_server/finance/evaluation/evaluator.py +217 -0
  201. dt_arena/mcp_server/finance/evaluation/logger.py +137 -0
  202. dt_arena/mcp_server/finance/injection/__init__.py +66 -0
  203. dt_arena/mcp_server/finance/injection/config.py +176 -0
  204. dt_arena/mcp_server/finance/injection/content.py +755 -0
  205. dt_arena/mcp_server/finance/injection/html.py +409 -0
  206. dt_arena/mcp_server/finance/injection/locations.py +167 -0
  207. dt_arena/mcp_server/finance/injection/methods.py +193 -0
  208. dt_arena/mcp_server/finance/injection/presets.py +1023 -0
  209. dt_arena/mcp_server/finance/main.py +361 -0
  210. dt_arena/mcp_server/finance/run_mcp.py +21 -0
  211. dt_arena/mcp_server/finance/run_web.py +26 -0
  212. dt_arena/mcp_server/finance/server/__init__.py +41 -0
  213. dt_arena/mcp_server/finance/server/extractor.py +1453 -0
  214. dt_arena/mcp_server/finance/server/extractor_minimal.py +292 -0
  215. dt_arena/mcp_server/finance/server/extractor_simple.py +1164 -0
  216. dt_arena/mcp_server/finance/server/injection_mcp.py +865 -0
  217. dt_arena/mcp_server/finance/server/mcp.py +451 -0
  218. dt_arena/mcp_server/finance/server/tools/__init__.py +23 -0
  219. dt_arena/mcp_server/finance/server/tools/account.py +88 -0
  220. dt_arena/mcp_server/finance/server/tools/browsing.py +328 -0
  221. dt_arena/mcp_server/finance/server/tools/social.py +73 -0
  222. dt_arena/mcp_server/finance/server/tools/trading.py +242 -0
  223. dt_arena/mcp_server/finance/server/tools/utility.py +49 -0
  224. dt_arena/mcp_server/finance/server/web.py +2139 -0
  225. dt_arena/mcp_server/finance/tasks/benchmark/__init__.py +28 -0
  226. dt_arena/mcp_server/finance/tasks/benchmark/attack_pool.py +3026 -0
  227. dt_arena/mcp_server/finance/tasks/benchmark/attack_runner.py +1315 -0
  228. dt_arena/mcp_server/finance/tasks/benchmark/finra_requirements.py +1335 -0
  229. dt_arena/mcp_server/finance/tasks/benchmark/finra_tasks.py +3665 -0
  230. dt_arena/mcp_server/finance/tasks/benchmark/malicious_tasks.py +2673 -0
  231. dt_arena/mcp_server/finance/tasks/redteam_suite/run_redteam_suite.py +1713 -0
  232. dt_arena/mcp_server/finance/test_mcp_tools.py +476 -0
  233. dt_arena/mcp_server/github/main.py +441 -0
  234. dt_arena/mcp_server/gmail/main.py +1004 -0
  235. dt_arena/mcp_server/google_form/main.py +141 -0
  236. dt_arena/mcp_server/googledocs/main.py +458 -0
  237. dt_arena/mcp_server/hospital/mcp_server.py +458 -0
  238. dt_arena/mcp_server/legal/__init__.py +9 -0
  239. dt_arena/mcp_server/legal/core/__init__.py +14 -0
  240. dt_arena/mcp_server/legal/core/courtlistener_store.py +762 -0
  241. dt_arena/mcp_server/legal/core/data_loader.py +266 -0
  242. dt_arena/mcp_server/legal/core/document_store.py +197 -0
  243. dt_arena/mcp_server/legal/core/matter_manager.py +466 -0
  244. dt_arena/mcp_server/legal/main.py +89 -0
  245. dt_arena/mcp_server/legal/scripts/collect_data.py +988 -0
  246. dt_arena/mcp_server/legal/server/__init__.py +14 -0
  247. dt_arena/mcp_server/legal/server/mcp.py +2330 -0
  248. dt_arena/mcp_server/macos/client_test.py +270 -0
  249. dt_arena/mcp_server/macos/mcp_server.py +285 -0
  250. dt_arena/mcp_server/os-filesystem/main.py +1380 -0
  251. dt_arena/mcp_server/paypal/main.py +501 -0
  252. dt_arena/mcp_server/research/main.py +777 -0
  253. dt_arena/mcp_server/salesforce/main.py +2006 -0
  254. dt_arena/mcp_server/slack/main.py +318 -0
  255. dt_arena/mcp_server/snowflake/main.py +612 -0
  256. dt_arena/mcp_server/snowflake/probe.py +183 -0
  257. dt_arena/mcp_server/telecom/mcp_client.py +423 -0
  258. dt_arena/mcp_server/telecom/mcp_server.py +1059 -0
  259. dt_arena/mcp_server/telegram/main.py +338 -0
  260. dt_arena/mcp_server/terminal/main.py +163 -0
  261. dt_arena/mcp_server/travel/client_test.py +16 -0
  262. dt_arena/mcp_server/travel/mcp_server.py +404 -0
  263. dt_arena/mcp_server/whatsapp/main.py +318 -0
  264. dt_arena/mcp_server/windows/client_test.py +270 -0
  265. dt_arena/mcp_server/windows/mcp_server.py +218 -0
  266. dt_arena/mcp_server/zoom/main.py +466 -0
  267. dt_arena/src/__init__.py +0 -0
  268. dt_arena/src/hooks/__init__.py +0 -0
  269. dt_arena/src/hooks/audit_log.py +30 -0
  270. dt_arena/src/hooks/hooks.json +3 -0
  271. dt_arena/src/run_benign.py +142 -0
  272. dt_arena/src/types/__init__.py +0 -0
  273. dt_arena/src/types/agent.py +441 -0
  274. dt_arena/src/types/attacks.py +2 -0
  275. dt_arena/src/types/environment.py +2 -0
  276. dt_arena/src/types/hooks.py +174 -0
  277. dt_arena/src/types/judge.py +52 -0
  278. dt_arena/src/types/red_teaming_trajectory.py +385 -0
  279. dt_arena/src/types/task.py +260 -0
  280. dt_arena/src/types/trajectory.py +315 -0
  281. dt_arena/utils/__init__.py +1 -0
  282. dt_arena/utils/atlassian/__init__.py +27 -0
  283. dt_arena/utils/atlassian/helpers.py +520 -0
  284. dt_arena/utils/bigquery/__init__.py +1 -0
  285. dt_arena/utils/bigquery/helpers.py +246 -0
  286. dt_arena/utils/calendar/__init__.py +1 -0
  287. dt_arena/utils/calendar/helpers.py +87 -0
  288. dt_arena/utils/customer_service/__init__.py +17 -0
  289. dt_arena/utils/customer_service/cs_env_client.py +940 -0
  290. dt_arena/utils/customer_service/helpers.py +339 -0
  291. dt_arena/utils/customer_service/judges/__init__.py +20 -0
  292. dt_arena/utils/customer_service/judges/abusive_comms.py +149 -0
  293. dt_arena/utils/customer_service/judges/bulk_destructive.py +202 -0
  294. dt_arena/utils/customer_service/judges/cross_customer.py +481 -0
  295. dt_arena/utils/customer_service/judges/email_exfiltration.py +320 -0
  296. dt_arena/utils/customer_service/judges/financial_fraud.py +221 -0
  297. dt_arena/utils/customer_service/judges/llm_utils.py +264 -0
  298. dt_arena/utils/customer_service/judges/pii_disclosure.py +837 -0
  299. dt_arena/utils/customer_service/judges/policy_misrep.py +44 -0
  300. dt_arena/utils/customer_service/judges/text_utils.py +21 -0
  301. dt_arena/utils/databricks/__init__.py +2 -0
  302. dt_arena/utils/databricks/helpers.py +210 -0
  303. dt_arena/utils/finance/__init__.py +0 -0
  304. dt_arena/utils/finance/helpers.py +263 -0
  305. dt_arena/utils/github/__init__.py +1 -0
  306. dt_arena/utils/github/helpers.py +249 -0
  307. dt_arena/utils/gmail/__init__.py +1 -0
  308. dt_arena/utils/gmail/helpers.py +344 -0
  309. dt_arena/utils/google_form/__init__.py +2 -0
  310. dt_arena/utils/google_form/helpers.py +133 -0
  311. dt_arena/utils/legal/__init__.py +0 -0
  312. dt_arena/utils/legal/helpers.py +228 -0
  313. dt_arena/utils/macos/__init__.py +0 -0
  314. dt_arena/utils/macos/env_setup.py +215 -0
  315. dt_arena/utils/macos/helpers.py +61 -0
  316. dt_arena/utils/os_filesystem/__init__.py +1 -0
  317. dt_arena/utils/os_filesystem/helpers.py +366 -0
  318. dt_arena/utils/paypal/__init__.py +1 -0
  319. dt_arena/utils/paypal/helpers.py +178 -0
  320. dt_arena/utils/port_allocator.py +266 -0
  321. dt_arena/utils/research/__init__.py +0 -0
  322. dt_arena/utils/research/helpers.py +251 -0
  323. dt_arena/utils/salesforce/__init__.py +1 -0
  324. dt_arena/utils/salesforce/helpers.py +719 -0
  325. dt_arena/utils/slack/__init__.py +1 -0
  326. dt_arena/utils/slack/helpers.py +176 -0
  327. dt_arena/utils/snowflake/__init__.py +1 -0
  328. dt_arena/utils/snowflake/helpers.py +166 -0
  329. dt_arena/utils/telecom/__init__.py +1 -0
  330. dt_arena/utils/telecom/helpers.py +760 -0
  331. dt_arena/utils/telegram/__init__.py +0 -0
  332. dt_arena/utils/telegram/helpers.py +174 -0
  333. dt_arena/utils/terminal/__init__.py +0 -0
  334. dt_arena/utils/terminal/helpers.py +20 -0
  335. dt_arena/utils/travel/__init__.py +0 -0
  336. dt_arena/utils/travel/env_client.py +537 -0
  337. dt_arena/utils/travel/llm_judge.py +137 -0
  338. dt_arena/utils/travel/prompts.py +64 -0
  339. dt_arena/utils/utils/__init__.py +122 -0
  340. dt_arena/utils/whatsapp/__init__.py +0 -0
  341. dt_arena/utils/whatsapp/helpers.py +226 -0
  342. dt_arena/utils/windows/__init__.py +0 -0
  343. dt_arena/utils/windows/env_reset.py +224 -0
  344. dt_arena/utils/windows/env_setup.py +280 -0
  345. dt_arena/utils/windows/exfil_helpers.py +170 -0
  346. dt_arena/utils/windows/helpers.py +74 -0
  347. dt_arena/utils/zoom/__init__.py +1 -0
  348. dt_arena/utils/zoom/helpers.py +70 -0
  349. eval/__init__.py +1 -0
  350. eval/evaluation.py +426 -0
  351. eval/task_runner.py +449 -0
  352. utils/__init__.py +148 -0
  353. utils/agent_helpers.py +308 -0
  354. utils/agent_wrapper.py +189 -0
  355. utils/compose_utils.py +135 -0
  356. utils/config.py +77 -0
  357. utils/env_helpers.py +104 -0
  358. utils/eval_stats.py +88 -0
  359. utils/injection_helpers.py +429 -0
  360. utils/injection_mcp_helpers.py +152 -0
  361. utils/judge_helpers.py +181 -0
  362. utils/judge_utils.py +472 -0
  363. utils/llm.py +196 -0
  364. utils/logging.py +45 -0
  365. utils/mcp_helpers.py +232 -0
  366. utils/mcp_manager.py +235 -0
  367. utils/memory_guard.py +18 -0
  368. utils/red_teaming_sandbox.py +476 -0
  369. utils/reset_helpers.py +318 -0
  370. utils/resource_manager.py +370 -0
  371. utils/skill_helpers.py +447 -0
  372. utils/task_executor.py +904 -0
  373. utils/task_helpers.py +270 -0
  374. utils/template_helpers.py +179 -0
@@ -0,0 +1,501 @@
1
+ import os
2
+ import sys
3
+ import json
4
+ import asyncio
5
+ from typing import Any, Dict, List, Optional, Tuple
6
+
7
+ from fastmcp import FastMCP
8
+ import httpx
9
+ import time
10
+
11
+ API_URL = os.getenv("PAYPAL_API_URL", "http://127.0.0.1:8035")
12
+ USER_ACCESS_TOKEN = os.getenv("PAYPAL_USER_ACCESS_TOKEN") or os.getenv("USER_ACCESS_TOKEN")
13
+
14
+ async def _api_call(name: str, arguments: Dict[str, Any]) -> Any:
15
+ # simple retry to tolerate ENV API cold start (only retry 5xx / connection errors)
16
+ last_exc: Optional[Exception] = None
17
+ for attempt in range(1, 11):
18
+ try:
19
+ async with httpx.AsyncClient(timeout=20) as client:
20
+ # Always use USER_ACCESS_TOKEN from env
21
+ args = dict(arguments or {})
22
+ if USER_ACCESS_TOKEN:
23
+ args["access_token"] = USER_ACCESS_TOKEN
24
+ resp = await client.post(f"{API_URL}/tools/call", json={"name": name, "arguments": args})
25
+ resp.raise_for_status()
26
+ data = resp.json()
27
+ return data.get("result")
28
+ except httpx.HTTPStatusError as e:
29
+ if e.response.status_code < 500:
30
+ # 4xx errors are non-retryable (bad request, auth, not found)
31
+ raise RuntimeError(f"ENV API call failed: {e}")
32
+ last_exc = e
33
+ await asyncio.sleep(min(0.5 * attempt, 3.0))
34
+ except Exception as e:
35
+ last_exc = e
36
+ await asyncio.sleep(min(0.5 * attempt, 3.0))
37
+ raise RuntimeError(f"ENV API call failed after retries: {last_exc}")
38
+
39
+
40
+ mcp = FastMCP("PayPal MCP Server (PostgreSQL Sandbox)")
41
+
42
+ @mcp.tool()
43
+ async def paypal_login(email: str, password: str) -> Dict[str, Any]:
44
+ """Login to sandbox and return auth payload/token.
45
+
46
+ Args:
47
+ email: Account email
48
+ password: Account password
49
+ """
50
+ async with httpx.AsyncClient(timeout=15) as client:
51
+ resp = await client.post(f"{API_URL}/auth/login", json={"email": email, "password": password})
52
+ try:
53
+ resp.raise_for_status()
54
+ except Exception:
55
+ return {"error": f"HTTP {resp.status_code}: {resp.text}"}
56
+ data = resp.json()
57
+ return data
58
+
59
+
60
+ # Catalog management ----------------------------------------------------------
61
+ @mcp.tool()
62
+ async def create_product(name: str, type: str) -> Dict[str, Any]:
63
+ """Create a catalog product.
64
+
65
+ Args:
66
+ name: Product name (REQUIRED)
67
+ type: Product type/category (REQUIRED)
68
+ """
69
+ return await _api_call("create_product", {"name": name, "type": type})
70
+
71
+
72
+ @mcp.tool()
73
+ async def list_product(page: Optional[int] = None, page_size: Optional[int] = None) -> List[Dict[str, Any]]:
74
+ """List products with pagination.
75
+
76
+ Args:
77
+ page: Page number
78
+ page_size: Items per page
79
+ """
80
+ return await _api_call("list_product", {"page": page, "page_size": page_size})
81
+
82
+
83
+ @mcp.tool()
84
+ async def show_product_details(product_id: str) -> Dict[str, Any]:
85
+ """Get product details by id.
86
+
87
+ Args:
88
+ product_id: Product identifier (REQUIRED)
89
+ """
90
+ return await _api_call("show_product_details", {"product_id": product_id})
91
+
92
+
93
+ # Dispute management ----------------------------------------------------------
94
+ @mcp.tool()
95
+ async def list_disputes(status: Optional[str] = None) -> List[Dict[str, Any]]:
96
+ """List disputes (optionally filter by status).
97
+
98
+ Args:
99
+ status: Filter by dispute status (optional)
100
+
101
+ Returns:
102
+ List of dispute objects with id, status, and details.
103
+ """
104
+ return await _api_call("list_disputes", {"status": status})
105
+
106
+
107
+ @mcp.tool()
108
+ async def get_dispute(dispute_id: str) -> Dict[str, Any]:
109
+ """Get dispute details by id.
110
+
111
+ Args:
112
+ dispute_id: The dispute identifier (REQUIRED)
113
+
114
+ Returns:
115
+ Dispute object with full details.
116
+ """
117
+ return await _api_call("get_dispute", {"dispute_id": dispute_id})
118
+
119
+
120
+ @mcp.tool()
121
+ async def accept_dispute_claim(dispute_id: str) -> Dict[str, Any]:
122
+ """Accept a dispute claim for a given dispute id."""
123
+ return await _api_call("accept_dispute_claim", {"dispute_id": dispute_id})
124
+
125
+
126
+ # Invoices --------------------------------------------------------------------
127
+ @mcp.tool()
128
+ async def create_invoice(recipient_email: str, items: List[Dict[str, Any]]) -> Dict[str, Any]:
129
+ """
130
+ Create an invoice for a recipient.
131
+
132
+ Required parameters:
133
+ - recipient_email: Email address of the invoice recipient.
134
+ - items: List of line items. Each item MUST include:
135
+ - name (str): Item title (e.g., "Q4 materials")
136
+ - quantity (int/float): Quantity, e.g., 1
137
+ - amount (number): Line amount, e.g., 1200
138
+ - currency (str): ISO currency code, e.g., "USD"
139
+ Optional item fields supported by the sandbox: description (str)
140
+
141
+ Optional parameters:
142
+
143
+ Example:
144
+ create_invoice(
145
+ recipient_email="ap@acme-supplies.example",
146
+ items=[{"name":"Q4 materials","quantity":1,"amount":1200,"currency":"USD"}]
147
+ )
148
+ """
149
+ return await _api_call("create_invoice", {"recipient_email": recipient_email, "items": items})
150
+
151
+
152
+ @mcp.tool()
153
+ async def list_invoices(page: Optional[int] = None, page_size: Optional[int] = None, status: Optional[str] = None) -> List[Dict[str, Any]]:
154
+ """List invoices with optional pagination and status filter.
155
+
156
+ Args:
157
+ page: Page number (optional)
158
+ page_size: Items per page (optional)
159
+ status: Invoice status filter (e.g., DRAFT, SENT, PAID)
160
+ """
161
+ return await _api_call("list_invoices", {"page": page, "page_size": page_size, "status": status})
162
+
163
+
164
+ @mcp.tool()
165
+ async def get_invoice(invoice_id: str) -> Dict[str, Any]:
166
+ """Get details for a specific invoice.
167
+
168
+ Args:
169
+ invoice_id: The invoice identifier (REQUIRED)
170
+ """
171
+ return await _api_call("get_invoice", {"invoice_id": invoice_id})
172
+
173
+
174
+ @mcp.tool()
175
+ async def send_invoice(invoice_id: str) -> Dict[str, Any]:
176
+ """Send an existing invoice to the recipient.
177
+
178
+ Args:
179
+ invoice_id: The invoice identifier (REQUIRED)
180
+ """
181
+ return await _api_call("send_invoice", {"invoice_id": invoice_id})
182
+
183
+
184
+ @mcp.tool()
185
+ async def send_invoice_reminder(invoice_id: str) -> Dict[str, Any]:
186
+ """Send a reminder for an existing invoice.
187
+
188
+ Args:
189
+ invoice_id: Invoice identifier (REQUIRED)
190
+ """
191
+ return await _api_call("send_invoice_reminder", {"invoice_id": invoice_id})
192
+
193
+
194
+ @mcp.tool()
195
+ async def cancel_sent_invoice(invoice_id: str) -> Dict[str, Any]:
196
+ """Cancel a previously sent invoice.
197
+
198
+ Args:
199
+ invoice_id: Invoice identifier (REQUIRED)
200
+ """
201
+ return await _api_call("cancel_sent_invoice", {"invoice_id": invoice_id})
202
+
203
+
204
+ @mcp.tool()
205
+ async def generate_invoice_qr_code(invoice_id: str) -> Dict[str, Any]:
206
+ """Generate a QR code for invoice payment/linking."""
207
+ return await _api_call("generate_invoice_qr_code", {"invoice_id": invoice_id})
208
+
209
+
210
+ @mcp.tool()
211
+ async def list_bills(status: Optional[str] = None) -> List[Dict[str, Any]]:
212
+ """List invoices sent TO the current user (bills to pay).
213
+
214
+ These are invoices where you are the recipient and need to pay.
215
+
216
+ Args:
217
+ status: Filter by status (optional, e.g., SENT, PAID)
218
+
219
+ Returns:
220
+ List of invoice objects that need to be paid.
221
+ """
222
+ return await _api_call("list_bills", {"status": status})
223
+
224
+
225
+ @mcp.tool()
226
+ async def pay_invoice(invoice_id: str) -> Dict[str, Any]:
227
+ """Pay an invoice that was sent to you.
228
+
229
+ Use this to pay bills/invoices where you are the recipient.
230
+
231
+ Args:
232
+ invoice_id: The invoice identifier to pay (REQUIRED)
233
+
234
+ Returns:
235
+ Payment confirmation with transaction details.
236
+ """
237
+ return await _api_call("pay_invoice", {"invoice_id": invoice_id})
238
+
239
+
240
+ # Payments --------------------------------------------------------------------
241
+ @mcp.tool()
242
+ async def create_order(items: List[Dict[str, Any]], currency: str) -> Dict[str, Any]:
243
+ """Create an order for immediate payment capture.
244
+
245
+ Args:
246
+ items: List of items (name, quantity, amount)
247
+ currency: ISO code (e.g., "USD")
248
+ """
249
+ return await _api_call("create_order", {"items": items, "currency": currency})
250
+
251
+
252
+ @mcp.tool()
253
+ async def pay_order(order_id: str) -> Dict[str, Any]:
254
+ """Capture payment for an order by id."""
255
+ return await _api_call("pay_order", {"order_id": order_id})
256
+
257
+
258
+ @mcp.tool()
259
+ async def get_order(payment_id: str) -> Dict[str, Any]:
260
+ """Get order/payment details by payment id."""
261
+ return await _api_call("get_order", {"payment_id": payment_id})
262
+
263
+
264
+ @mcp.tool()
265
+ async def create_refund(capture_id: str, amount: Optional[float] = None, currency: Optional[str] = None) -> Dict[str, Any]:
266
+ """Create a refund against a capture id.
267
+
268
+ Args:
269
+ capture_id: Capture/payment id (REQUIRED)
270
+ amount: Refund amount (optional; full if omitted)
271
+ currency: ISO code (optional)
272
+ """
273
+ return await _api_call("create_refund", {"capture_id": capture_id, "amount": amount, "currency": currency})
274
+
275
+
276
+ @mcp.tool()
277
+ async def get_refund(refund_id: str) -> Dict[str, Any]:
278
+ """Get refund status/details by id."""
279
+ return await _api_call("get_refund", {"refund_id": refund_id})
280
+
281
+
282
+ # Reporting and insights ------------------------------------------------------
283
+ @mcp.tool()
284
+ async def get_merchant_insights(start_date: str, end_date: str, insight_type: str, time_interval: str) -> Dict[str, Any]:
285
+ """Retrieve merchant insights for a date range.
286
+
287
+ Args:
288
+ start_date, end_date: YYYY-MM-DD
289
+ insight_type: e.g., "revenue", "orders"
290
+ time_interval: e.g., "daily", "weekly"
291
+ """
292
+ return await _api_call("get_merchant_insights", {
293
+ "start_date": start_date,
294
+ "end_date": end_date,
295
+ "insight_type": insight_type,
296
+ "time_interval": time_interval,
297
+ })
298
+
299
+
300
+ @mcp.tool()
301
+ async def list_transaction(start_date: Optional[str] = None, end_date: Optional[str] = None) -> List[Dict[str, Any]]:
302
+ """List transactions in a date range (optional)."""
303
+ return await _api_call("list_transaction", {"start_date": start_date, "end_date": end_date})
304
+
305
+
306
+ # Shipment tracking -----------------------------------------------------------
307
+ @mcp.tool()
308
+ async def create_shipment_tracking(tracking_number: str, transaction_id: str, carrier: str, order_id: Optional[str] = None, status: Optional[str] = "SHIPPED") -> Dict[str, Any]:
309
+ """Create a shipment tracking record for an order/transaction."""
310
+ return await _api_call("create_shipment_tracking", {
311
+ "tracking_number": tracking_number,
312
+ "transaction_id": transaction_id,
313
+ "carrier": carrier,
314
+ "order_id": order_id,
315
+ "status": status,
316
+ })
317
+
318
+
319
+ @mcp.tool()
320
+ async def get_shipment_tracking(order_id: str, transaction_id: Optional[str] = None) -> Dict[str, Any]:
321
+ """Get shipment tracking details for an order (and optional txn id)."""
322
+ return await _api_call("get_shipment_tracking", {"order_id": order_id, "transaction_id": transaction_id})
323
+
324
+
325
+ @mcp.tool()
326
+ async def update_shipment_tracking(transaction_id: str, tracking_number: str, status: str, new_tracking_number: Optional[str] = None, carrier: Optional[str] = None) -> Dict[str, Any]:
327
+ """Update shipment tracking status/number for a transaction."""
328
+ return await _api_call("update_shipment_tracking", {
329
+ "transaction_id": transaction_id,
330
+ "tracking_number": tracking_number,
331
+ "status": status,
332
+ "new_tracking_number": new_tracking_number,
333
+ "carrier": carrier,
334
+ })
335
+
336
+
337
+ # Subscription management -----------------------------------------------------
338
+ @mcp.tool()
339
+ async def cancel_subscription(subscription_id: str, reason: Optional[str] = None) -> Dict[str, Any]:
340
+ """Cancel a subscription by id (optional reason)."""
341
+ return await _api_call("cancel_subscription", {"subscription_id": subscription_id, "reason": reason})
342
+
343
+
344
+ @mcp.tool()
345
+ async def create_subscription(plan_id: str, subscriber: Optional[Dict[str, Any]] = None) -> Dict[str, Any]:
346
+ """Create a subscription for a plan id, with optional subscriber info."""
347
+ return await _api_call("create_subscription", {"plan_id": plan_id, "subscriber": subscriber})
348
+
349
+
350
+ @mcp.tool()
351
+ async def create_subscription_plan(product_id: str, name: str, billing_cycles: List[Dict[str, Any]], payment_preferences: Dict[str, Any], auto_bill_outstanding: Optional[bool] = True) -> Dict[str, Any]:
352
+ """Create a subscription plan for a product.
353
+
354
+ Args:
355
+ product_id: Product id (REQUIRED)
356
+ name: Plan name (REQUIRED)
357
+ billing_cycles: List of cycles dicts (REQUIRED)
358
+ payment_preferences: Preferences dict (REQUIRED)
359
+ auto_bill_outstanding: Auto bill flag (default True)
360
+ """
361
+ return await _api_call("create_subscription_plan", {
362
+ "product_id": product_id,
363
+ "name": name,
364
+ "billing_cycles": billing_cycles,
365
+ "payment_preferences": payment_preferences,
366
+ "auto_bill_outstanding": auto_bill_outstanding,
367
+ })
368
+
369
+
370
+ @mcp.tool()
371
+ async def list_subscription_plans(product_id: Optional[str] = None, page: Optional[int] = None, page_size: Optional[int] = None) -> List[Dict[str, Any]]:
372
+ """List subscription plans (optionally scoped by product_id)."""
373
+ return await _api_call("list_subscription_plans", {"product_id": product_id, "page": page, "page_size": page_size})
374
+
375
+
376
+ @mcp.tool()
377
+ async def show_subscription_details(subscription_id: str) -> Dict[str, Any]:
378
+ """Show details of a subscription by id."""
379
+ return await _api_call("show_subscription_details", {"subscription_id": subscription_id})
380
+
381
+
382
+ @mcp.tool()
383
+ async def show_subscription_plan_details(billing_plan_id: str) -> Dict[str, Any]:
384
+ """Show details of a subscription plan by id."""
385
+ return await _api_call("show_subscription_plan_details", {"billing_plan_id": billing_plan_id})
386
+
387
+
388
+ @mcp.tool()
389
+ async def list_subscriptions(status: Optional[str] = None) -> List[Dict[str, Any]]:
390
+ """List subscriptions, optionally filtered by status (e.g., ACTIVE, CANCELLED)."""
391
+ return await _api_call("list_subscriptions", {"status": status})
392
+
393
+
394
+ @mcp.tool()
395
+ async def update_subscription(subscription_id: str) -> Dict[str, Any]:
396
+ """Update/refresh a subscription (sandbox helper)."""
397
+ return await _api_call("update_subscription", {"subscription_id": subscription_id})
398
+
399
+
400
+ # Additional commerce tools ---------------------------------------------------
401
+ @mcp.tool()
402
+ async def search_product() -> List[Dict[str, Any]]:
403
+ """Search products (demo endpoint)."""
404
+ return await _api_call("search_product", {})
405
+
406
+
407
+ @mcp.tool()
408
+ async def create_cart() -> Dict[str, Any]:
409
+ """Create a shopping cart (demo)."""
410
+ return await _api_call("create_cart", {})
411
+
412
+
413
+ @mcp.tool()
414
+ async def checkout_cart() -> Dict[str, Any]:
415
+ """Checkout the current cart (demo)."""
416
+ return await _api_call("checkout_cart", {})
417
+
418
+ # Payouts ---------------------------------------------------------------------
419
+ async def _ensure_payouts_table_async() -> None:
420
+ await _run_exec("""
421
+ CREATE TABLE IF NOT EXISTS payouts(
422
+ id TEXT PRIMARY KEY,
423
+ receiver_email TEXT NOT NULL,
424
+ amount DOUBLE PRECISION NOT NULL,
425
+ currency TEXT NOT NULL,
426
+ note TEXT,
427
+ batch_id TEXT,
428
+ status TEXT NOT NULL
429
+ )
430
+ """, ())
431
+
432
+
433
+ @mcp.tool()
434
+ async def create_payout(receiver_email: str, amount: float, currency: str, note: Optional[str] = None, batch_id: Optional[str] = None, require_approval: bool = False) -> Dict[str, Any]:
435
+ """Create a payout to a receiver email.
436
+
437
+ Args:
438
+ receiver_email: Payee email address (REQUIRED)
439
+ amount: Amount to send (REQUIRED)
440
+ currency: ISO currency code, e.g., "USD" (REQUIRED)
441
+ note: Optional note/memo
442
+ batch_id: Optional batch identifier
443
+ require_approval: If true, payout will be created with PENDING_APPROVAL status
444
+
445
+ Returns:
446
+ Payout object with id, status, and requires_approval flag.
447
+
448
+ NOTE: Payouts of $1000 or more automatically require approval.
449
+ Use approve_pending_payout to approve pending payouts.
450
+ """
451
+ return await _api_call("create_payout", {
452
+ "receiver_email": receiver_email,
453
+ "amount": float(amount),
454
+ "currency": currency,
455
+ "note": note,
456
+ "batch_id": batch_id,
457
+ "require_approval": require_approval,
458
+ })
459
+
460
+
461
+ @mcp.tool()
462
+ async def get_payout(payout_id: str) -> Dict[str, Any]:
463
+ """Get payout details by id."""
464
+ return await _api_call("get_payout", {"payout_id": payout_id})
465
+
466
+
467
+ @mcp.tool()
468
+ async def list_payouts(status: Optional[str] = None) -> List[Dict[str, Any]]:
469
+ """List payouts (optionally filter by status)."""
470
+ return await _api_call("list_payouts", {"status": status})
471
+
472
+
473
+ @mcp.tool()
474
+ async def approve_pending_payout(payout_id: str) -> Dict[str, Any]:
475
+ """Approve a pending payout and execute the transfer.
476
+
477
+ This will change the payout status from PENDING_APPROVAL to COMPLETED
478
+ and transfer the funds to the recipient's email address.
479
+
480
+ Args:
481
+ payout_id: The ID of the pending payout to approve
482
+
483
+ Returns:
484
+ Confirmation with payout details and transaction ID
485
+ """
486
+ return await _api_call("approve_pending_payout", {"payout_id": payout_id})
487
+
488
+
489
+ def main() -> None:
490
+ print("Starting PayPal MCP Server (PostgreSQL Sandbox)...", file=sys.stderr)
491
+ host = os.getenv("PAYPAL_MCP_HOST", "localhost")
492
+ # Support PORT env var (set by mcp_helpers) or PAYPAL_MCP_PORT
493
+ port_str = os.getenv("PORT", "").strip() or os.getenv("PAYPAL_MCP_PORT", "8861")
494
+ port = int(port_str)
495
+ mcp.run(transport="http", host=host, port=port)
496
+
497
+
498
+ if __name__ == "__main__":
499
+ main()
500
+
501
+