decodingtrust-agent-sdk 0.1.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (374) hide show
  1. agent/__init__.py +30 -0
  2. agent/claudesdk/__init__.py +8 -0
  3. agent/claudesdk/example.py +221 -0
  4. agent/claudesdk/src/__init__.py +8 -0
  5. agent/claudesdk/src/agent.py +400 -0
  6. agent/claudesdk/src/mcp_proxy.py +409 -0
  7. agent/claudesdk/src/utils.py +420 -0
  8. agent/googleadk/__init__.py +15 -0
  9. agent/googleadk/example.py +237 -0
  10. agent/googleadk/src/__init__.py +12 -0
  11. agent/googleadk/src/agent.py +401 -0
  12. agent/googleadk/src/mcp_wrapper.py +163 -0
  13. agent/googleadk/src/utils.py +602 -0
  14. agent/langchain/__init__.py +8 -0
  15. agent/langchain/example.py +213 -0
  16. agent/langchain/src/__init__.py +8 -0
  17. agent/langchain/src/agent.py +645 -0
  18. agent/langchain/src/utils.py +433 -0
  19. agent/openaisdk/__init__.py +17 -0
  20. agent/openaisdk/example.py +228 -0
  21. agent/openaisdk/src/__init__.py +12 -0
  22. agent/openaisdk/src/agent.py +491 -0
  23. agent/openaisdk/src/agent_wrapper.py +143 -0
  24. agent/openaisdk/src/mcp_wrapper.py +395 -0
  25. agent/openaisdk/src/utils.py +493 -0
  26. agent/openclaw/__init__.py +10 -0
  27. agent/openclaw/example.py +251 -0
  28. agent/openclaw/src/__init__.py +14 -0
  29. agent/openclaw/src/agent.py +930 -0
  30. agent/openclaw/src/helpers/__init__.py +1 -0
  31. agent/openclaw/src/helpers/auth_helpers.py +55 -0
  32. agent/openclaw/src/mcp_proxy.py +564 -0
  33. agent/openclaw/src/plugin_generator.py +231 -0
  34. agent/openclaw/src/utils.py +341 -0
  35. agent/pocketflow/__init__.py +18 -0
  36. agent/pocketflow/example.py +221 -0
  37. agent/pocketflow/prompts/react_agent.py +46 -0
  38. agent/pocketflow/src/__init__.py +6 -0
  39. agent/pocketflow/src/agent.py +507 -0
  40. agent/pocketflow/src/agent_wrapper.py +159 -0
  41. agent/pocketflow/src/async_helper.py +92 -0
  42. agent/pocketflow/src/mcp_react_agent.py +279 -0
  43. agent/pocketflow/src/native_agent.py +74 -0
  44. agent/pocketflow/src/nodes.py +467 -0
  45. benchmark/__init__.py +0 -0
  46. benchmark/browser/benign.jsonl +34 -0
  47. benchmark/browser/direct.jsonl +85 -0
  48. benchmark/browser/indirect.jsonl +82 -0
  49. benchmark/code/benign.jsonl +0 -0
  50. benchmark/code/direct.jsonl +121 -0
  51. benchmark/code/indirect.jsonl +165 -0
  52. benchmark/crm/benign.jsonl +165 -0
  53. benchmark/crm/direct.jsonl +90 -0
  54. benchmark/crm/indirect.jsonl +150 -0
  55. benchmark/customer-service/benign.jsonl +160 -0
  56. benchmark/customer-service/direct.jsonl +100 -0
  57. benchmark/customer-service/indirect.jsonl +101 -0
  58. benchmark/finance/benign.jsonl +0 -0
  59. benchmark/finance/direct.jsonl +200 -0
  60. benchmark/finance/indirect.jsonl +200 -0
  61. benchmark/legal/benign.jsonl +0 -0
  62. benchmark/legal/direct.jsonl +200 -0
  63. benchmark/legal/indirect.jsonl +200 -0
  64. benchmark/macos/benign.jsonl +30 -0
  65. benchmark/macos/direct.jsonl +50 -0
  66. benchmark/macos/indirect.jsonl +50 -0
  67. benchmark/medical/benign.jsonl +642 -0
  68. benchmark/medical/direct.jsonl +229 -0
  69. benchmark/medical/indirect.jsonl +222 -0
  70. benchmark/os-filesystem/benign.jsonl +200 -0
  71. benchmark/os-filesystem/direct.jsonl +200 -0
  72. benchmark/os-filesystem/indirect.jsonl +200 -0
  73. benchmark/research/benign.jsonl +0 -0
  74. benchmark/research/direct.jsonl +119 -0
  75. benchmark/research/indirect.jsonl +125 -0
  76. benchmark/telecom/benign.jsonl +120 -0
  77. benchmark/telecom/direct.jsonl +161 -0
  78. benchmark/telecom/indirect.jsonl +166 -0
  79. benchmark/travel/benign.jsonl +130 -0
  80. benchmark/travel/direct.jsonl +105 -0
  81. benchmark/travel/indirect.jsonl +120 -0
  82. benchmark/windows/benign.jsonl +100 -0
  83. benchmark/windows/direct.jsonl +140 -0
  84. benchmark/windows/indirect.jsonl +107 -0
  85. benchmark/workflow/benign.jsonl +335 -0
  86. benchmark/workflow/direct.jsonl +78 -0
  87. benchmark/workflow/indirect.jsonl +107 -0
  88. cli/__init__.py +5 -0
  89. cli/main.py +182 -0
  90. cli/scaffold.py +334 -0
  91. decodingtrust_agent_sdk-0.1.0.dist-info/METADATA +642 -0
  92. decodingtrust_agent_sdk-0.1.0.dist-info/RECORD +374 -0
  93. decodingtrust_agent_sdk-0.1.0.dist-info/WHEEL +5 -0
  94. decodingtrust_agent_sdk-0.1.0.dist-info/entry_points.txt +2 -0
  95. decodingtrust_agent_sdk-0.1.0.dist-info/licenses/LICENSE +201 -0
  96. decodingtrust_agent_sdk-0.1.0.dist-info/top_level.txt +6 -0
  97. dt_arena/config/env.yaml +515 -0
  98. dt_arena/config/injection_mcp.yaml +430 -0
  99. dt_arena/config/mcp.yaml +642 -0
  100. dt_arena/envs/arxiv/docker-compose-hub.yml +31 -0
  101. dt_arena/envs/arxiv/docker-compose.yml +36 -0
  102. dt_arena/envs/atlassian/docker/docker-compose.dev.yml +65 -0
  103. dt_arena/envs/atlassian/docker/docker-compose.yml +53 -0
  104. dt_arena/envs/atlassian/docker-compose-hub.yml +57 -0
  105. dt_arena/envs/atlassian/docker-compose.yml +72 -0
  106. dt_arena/envs/bigquery/docker-compose.yml +20 -0
  107. dt_arena/envs/booking/docker-compose.yml +59 -0
  108. dt_arena/envs/calendar/docker-compose-hub.yml +30 -0
  109. dt_arena/envs/calendar/docker-compose.yml +42 -0
  110. dt_arena/envs/custom-website/docker-compose.yml +6 -0
  111. dt_arena/envs/customer_service/docker-compose.yml +59 -0
  112. dt_arena/envs/databricks/docker-compose-hub.yml +47 -0
  113. dt_arena/envs/databricks/docker-compose.yml +51 -0
  114. dt_arena/envs/ecommerce/docker-compose.yml +6 -0
  115. dt_arena/envs/ers/docker-compose.yml +36 -0
  116. dt_arena/envs/ers/hrms/docker/docker-compose.yml +31 -0
  117. dt_arena/envs/finance/docker-compose.yml +23 -0
  118. dt_arena/envs/github/docker/docker-compose-hub.yml +50 -0
  119. dt_arena/envs/github/docker/docker-compose.yml +50 -0
  120. dt_arena/envs/gmail/docker-compose-hub.yml +51 -0
  121. dt_arena/envs/gmail/docker-compose.yml +65 -0
  122. dt_arena/envs/google-form/docker-compose-hub.yml +33 -0
  123. dt_arena/envs/google-form/docker-compose.yml +41 -0
  124. dt_arena/envs/googledocs/docker-compose-hub.yml +61 -0
  125. dt_arena/envs/googledocs/docker-compose.yml +78 -0
  126. dt_arena/envs/hospital/docker-compose-hub.yml +25 -0
  127. dt_arena/envs/hospital/docker-compose.yml +27 -0
  128. dt_arena/envs/legal/docker-compose.yml +22 -0
  129. dt_arena/envs/linkedin/docker-compose.yml +63 -0
  130. dt_arena/envs/macos/docker-compose.yml +79 -0
  131. dt_arena/envs/os-filesystem/docker-compose-hub.yml +16 -0
  132. dt_arena/envs/os-filesystem/docker-compose.yml +20 -0
  133. dt_arena/envs/paypal/docker-compose-hub.yml +48 -0
  134. dt_arena/envs/paypal/docker-compose.yml +63 -0
  135. dt_arena/envs/research/docker-compose-hub.yml +13 -0
  136. dt_arena/envs/research/docker-compose.yml +24 -0
  137. dt_arena/envs/salesforce_crm/docker-compose-hub.yaml +45 -0
  138. dt_arena/envs/salesforce_crm/docker-compose.yaml +49 -0
  139. dt_arena/envs/slack/docker-compose-hub.yml +28 -0
  140. dt_arena/envs/slack/docker-compose.yml +41 -0
  141. dt_arena/envs/snowflake/docker-compose-hub.yml +41 -0
  142. dt_arena/envs/snowflake/docker-compose.yml +44 -0
  143. dt_arena/envs/telecom/docker-compose-hub.yml +16 -0
  144. dt_arena/envs/telecom/docker-compose.yml +17 -0
  145. dt_arena/envs/telegram/docker-compose-hub.yml +57 -0
  146. dt_arena/envs/telegram/docker-compose.yml +62 -0
  147. dt_arena/envs/terminal/docker-compose-hub.yml +12 -0
  148. dt_arena/envs/terminal/docker-compose.yml +26 -0
  149. dt_arena/envs/travel/docker-compose-hub.yml +19 -0
  150. dt_arena/envs/travel/docker-compose.yml +19 -0
  151. dt_arena/envs/whatsapp/docker-compose-hub.yml +61 -0
  152. dt_arena/envs/whatsapp/docker-compose.yml +78 -0
  153. dt_arena/envs/windows/docker-compose.yml +71 -0
  154. dt_arena/envs/zoom/docker-compose-hub.yml +27 -0
  155. dt_arena/envs/zoom/docker-compose.yml +40 -0
  156. dt_arena/injection_mcp_server/atlassian/env_injection.py +134 -0
  157. dt_arena/injection_mcp_server/calendar/env_injection.py +217 -0
  158. dt_arena/injection_mcp_server/custom_website/env_injection.py +97 -0
  159. dt_arena/injection_mcp_server/customer_service/env_injection.py +659 -0
  160. dt_arena/injection_mcp_server/databricks/env_injection.py +255 -0
  161. dt_arena/injection_mcp_server/ecommerce/env_injection.py +110 -0
  162. dt_arena/injection_mcp_server/finance/env_injection.py +85 -0
  163. dt_arena/injection_mcp_server/github/env_injection.py +206 -0
  164. dt_arena/injection_mcp_server/gmail/env_injection.py +211 -0
  165. dt_arena/injection_mcp_server/google_form/env_injection.py +186 -0
  166. dt_arena/injection_mcp_server/googledocs/env_injection.py +44 -0
  167. dt_arena/injection_mcp_server/hospital/env_injection.py +43 -0
  168. dt_arena/injection_mcp_server/legal/env_injection.py +229 -0
  169. dt_arena/injection_mcp_server/macos/env_injection.py +272 -0
  170. dt_arena/injection_mcp_server/os-filesystem/env_injection.py +341 -0
  171. dt_arena/injection_mcp_server/paypal/env_injection.py +268 -0
  172. dt_arena/injection_mcp_server/research/env_injection.py +616 -0
  173. dt_arena/injection_mcp_server/salesforce/env_injection.py +514 -0
  174. dt_arena/injection_mcp_server/slack/env_injection.py +265 -0
  175. dt_arena/injection_mcp_server/snowflake/env_injection.py +230 -0
  176. dt_arena/injection_mcp_server/telecom/env_injection.py +503 -0
  177. dt_arena/injection_mcp_server/telegram/env_injection.py +171 -0
  178. dt_arena/injection_mcp_server/terminal/env_injection.py +523 -0
  179. dt_arena/injection_mcp_server/travel/env_injection.py +173 -0
  180. dt_arena/injection_mcp_server/whatsapp/env_injection.py +185 -0
  181. dt_arena/injection_mcp_server/windows/env_injection.py +943 -0
  182. dt_arena/injection_mcp_server/zoom/env_injection.py +216 -0
  183. dt_arena/mcp_server/atlassian/main.py +1554 -0
  184. dt_arena/mcp_server/atlassian/test_server.py +66 -0
  185. dt_arena/mcp_server/bigquery/main.py +333 -0
  186. dt_arena/mcp_server/booking/main.py +310 -0
  187. dt_arena/mcp_server/browser/main.py +1741 -0
  188. dt_arena/mcp_server/calendar/example_multi_user.py +162 -0
  189. dt_arena/mcp_server/calendar/main.py +792 -0
  190. dt_arena/mcp_server/calendar/test_mcp.py +135 -0
  191. dt_arena/mcp_server/customer_service/main.py +1063 -0
  192. dt_arena/mcp_server/databricks/main.py +566 -0
  193. dt_arena/mcp_server/databricks/probe.py +102 -0
  194. dt_arena/mcp_server/ers/main.py +845 -0
  195. dt_arena/mcp_server/finance/__init__.py +87 -0
  196. dt_arena/mcp_server/finance/core/__init__.py +12 -0
  197. dt_arena/mcp_server/finance/core/data_loader.py +558 -0
  198. dt_arena/mcp_server/finance/core/portfolio.py +565 -0
  199. dt_arena/mcp_server/finance/evaluation/__init__.py +20 -0
  200. dt_arena/mcp_server/finance/evaluation/evaluator.py +217 -0
  201. dt_arena/mcp_server/finance/evaluation/logger.py +137 -0
  202. dt_arena/mcp_server/finance/injection/__init__.py +66 -0
  203. dt_arena/mcp_server/finance/injection/config.py +176 -0
  204. dt_arena/mcp_server/finance/injection/content.py +755 -0
  205. dt_arena/mcp_server/finance/injection/html.py +409 -0
  206. dt_arena/mcp_server/finance/injection/locations.py +167 -0
  207. dt_arena/mcp_server/finance/injection/methods.py +193 -0
  208. dt_arena/mcp_server/finance/injection/presets.py +1023 -0
  209. dt_arena/mcp_server/finance/main.py +361 -0
  210. dt_arena/mcp_server/finance/run_mcp.py +21 -0
  211. dt_arena/mcp_server/finance/run_web.py +26 -0
  212. dt_arena/mcp_server/finance/server/__init__.py +41 -0
  213. dt_arena/mcp_server/finance/server/extractor.py +1453 -0
  214. dt_arena/mcp_server/finance/server/extractor_minimal.py +292 -0
  215. dt_arena/mcp_server/finance/server/extractor_simple.py +1164 -0
  216. dt_arena/mcp_server/finance/server/injection_mcp.py +865 -0
  217. dt_arena/mcp_server/finance/server/mcp.py +451 -0
  218. dt_arena/mcp_server/finance/server/tools/__init__.py +23 -0
  219. dt_arena/mcp_server/finance/server/tools/account.py +88 -0
  220. dt_arena/mcp_server/finance/server/tools/browsing.py +328 -0
  221. dt_arena/mcp_server/finance/server/tools/social.py +73 -0
  222. dt_arena/mcp_server/finance/server/tools/trading.py +242 -0
  223. dt_arena/mcp_server/finance/server/tools/utility.py +49 -0
  224. dt_arena/mcp_server/finance/server/web.py +2139 -0
  225. dt_arena/mcp_server/finance/tasks/benchmark/__init__.py +28 -0
  226. dt_arena/mcp_server/finance/tasks/benchmark/attack_pool.py +3026 -0
  227. dt_arena/mcp_server/finance/tasks/benchmark/attack_runner.py +1315 -0
  228. dt_arena/mcp_server/finance/tasks/benchmark/finra_requirements.py +1335 -0
  229. dt_arena/mcp_server/finance/tasks/benchmark/finra_tasks.py +3665 -0
  230. dt_arena/mcp_server/finance/tasks/benchmark/malicious_tasks.py +2673 -0
  231. dt_arena/mcp_server/finance/tasks/redteam_suite/run_redteam_suite.py +1713 -0
  232. dt_arena/mcp_server/finance/test_mcp_tools.py +476 -0
  233. dt_arena/mcp_server/github/main.py +441 -0
  234. dt_arena/mcp_server/gmail/main.py +1004 -0
  235. dt_arena/mcp_server/google_form/main.py +141 -0
  236. dt_arena/mcp_server/googledocs/main.py +458 -0
  237. dt_arena/mcp_server/hospital/mcp_server.py +458 -0
  238. dt_arena/mcp_server/legal/__init__.py +9 -0
  239. dt_arena/mcp_server/legal/core/__init__.py +14 -0
  240. dt_arena/mcp_server/legal/core/courtlistener_store.py +762 -0
  241. dt_arena/mcp_server/legal/core/data_loader.py +266 -0
  242. dt_arena/mcp_server/legal/core/document_store.py +197 -0
  243. dt_arena/mcp_server/legal/core/matter_manager.py +466 -0
  244. dt_arena/mcp_server/legal/main.py +89 -0
  245. dt_arena/mcp_server/legal/scripts/collect_data.py +988 -0
  246. dt_arena/mcp_server/legal/server/__init__.py +14 -0
  247. dt_arena/mcp_server/legal/server/mcp.py +2330 -0
  248. dt_arena/mcp_server/macos/client_test.py +270 -0
  249. dt_arena/mcp_server/macos/mcp_server.py +285 -0
  250. dt_arena/mcp_server/os-filesystem/main.py +1380 -0
  251. dt_arena/mcp_server/paypal/main.py +501 -0
  252. dt_arena/mcp_server/research/main.py +777 -0
  253. dt_arena/mcp_server/salesforce/main.py +2006 -0
  254. dt_arena/mcp_server/slack/main.py +318 -0
  255. dt_arena/mcp_server/snowflake/main.py +612 -0
  256. dt_arena/mcp_server/snowflake/probe.py +183 -0
  257. dt_arena/mcp_server/telecom/mcp_client.py +423 -0
  258. dt_arena/mcp_server/telecom/mcp_server.py +1059 -0
  259. dt_arena/mcp_server/telegram/main.py +338 -0
  260. dt_arena/mcp_server/terminal/main.py +163 -0
  261. dt_arena/mcp_server/travel/client_test.py +16 -0
  262. dt_arena/mcp_server/travel/mcp_server.py +404 -0
  263. dt_arena/mcp_server/whatsapp/main.py +318 -0
  264. dt_arena/mcp_server/windows/client_test.py +270 -0
  265. dt_arena/mcp_server/windows/mcp_server.py +218 -0
  266. dt_arena/mcp_server/zoom/main.py +466 -0
  267. dt_arena/src/__init__.py +0 -0
  268. dt_arena/src/hooks/__init__.py +0 -0
  269. dt_arena/src/hooks/audit_log.py +30 -0
  270. dt_arena/src/hooks/hooks.json +3 -0
  271. dt_arena/src/run_benign.py +142 -0
  272. dt_arena/src/types/__init__.py +0 -0
  273. dt_arena/src/types/agent.py +441 -0
  274. dt_arena/src/types/attacks.py +2 -0
  275. dt_arena/src/types/environment.py +2 -0
  276. dt_arena/src/types/hooks.py +174 -0
  277. dt_arena/src/types/judge.py +52 -0
  278. dt_arena/src/types/red_teaming_trajectory.py +385 -0
  279. dt_arena/src/types/task.py +260 -0
  280. dt_arena/src/types/trajectory.py +315 -0
  281. dt_arena/utils/__init__.py +1 -0
  282. dt_arena/utils/atlassian/__init__.py +27 -0
  283. dt_arena/utils/atlassian/helpers.py +520 -0
  284. dt_arena/utils/bigquery/__init__.py +1 -0
  285. dt_arena/utils/bigquery/helpers.py +246 -0
  286. dt_arena/utils/calendar/__init__.py +1 -0
  287. dt_arena/utils/calendar/helpers.py +87 -0
  288. dt_arena/utils/customer_service/__init__.py +17 -0
  289. dt_arena/utils/customer_service/cs_env_client.py +940 -0
  290. dt_arena/utils/customer_service/helpers.py +339 -0
  291. dt_arena/utils/customer_service/judges/__init__.py +20 -0
  292. dt_arena/utils/customer_service/judges/abusive_comms.py +149 -0
  293. dt_arena/utils/customer_service/judges/bulk_destructive.py +202 -0
  294. dt_arena/utils/customer_service/judges/cross_customer.py +481 -0
  295. dt_arena/utils/customer_service/judges/email_exfiltration.py +320 -0
  296. dt_arena/utils/customer_service/judges/financial_fraud.py +221 -0
  297. dt_arena/utils/customer_service/judges/llm_utils.py +264 -0
  298. dt_arena/utils/customer_service/judges/pii_disclosure.py +837 -0
  299. dt_arena/utils/customer_service/judges/policy_misrep.py +44 -0
  300. dt_arena/utils/customer_service/judges/text_utils.py +21 -0
  301. dt_arena/utils/databricks/__init__.py +2 -0
  302. dt_arena/utils/databricks/helpers.py +210 -0
  303. dt_arena/utils/finance/__init__.py +0 -0
  304. dt_arena/utils/finance/helpers.py +263 -0
  305. dt_arena/utils/github/__init__.py +1 -0
  306. dt_arena/utils/github/helpers.py +249 -0
  307. dt_arena/utils/gmail/__init__.py +1 -0
  308. dt_arena/utils/gmail/helpers.py +344 -0
  309. dt_arena/utils/google_form/__init__.py +2 -0
  310. dt_arena/utils/google_form/helpers.py +133 -0
  311. dt_arena/utils/legal/__init__.py +0 -0
  312. dt_arena/utils/legal/helpers.py +228 -0
  313. dt_arena/utils/macos/__init__.py +0 -0
  314. dt_arena/utils/macos/env_setup.py +215 -0
  315. dt_arena/utils/macos/helpers.py +61 -0
  316. dt_arena/utils/os_filesystem/__init__.py +1 -0
  317. dt_arena/utils/os_filesystem/helpers.py +366 -0
  318. dt_arena/utils/paypal/__init__.py +1 -0
  319. dt_arena/utils/paypal/helpers.py +178 -0
  320. dt_arena/utils/port_allocator.py +266 -0
  321. dt_arena/utils/research/__init__.py +0 -0
  322. dt_arena/utils/research/helpers.py +251 -0
  323. dt_arena/utils/salesforce/__init__.py +1 -0
  324. dt_arena/utils/salesforce/helpers.py +719 -0
  325. dt_arena/utils/slack/__init__.py +1 -0
  326. dt_arena/utils/slack/helpers.py +176 -0
  327. dt_arena/utils/snowflake/__init__.py +1 -0
  328. dt_arena/utils/snowflake/helpers.py +166 -0
  329. dt_arena/utils/telecom/__init__.py +1 -0
  330. dt_arena/utils/telecom/helpers.py +760 -0
  331. dt_arena/utils/telegram/__init__.py +0 -0
  332. dt_arena/utils/telegram/helpers.py +174 -0
  333. dt_arena/utils/terminal/__init__.py +0 -0
  334. dt_arena/utils/terminal/helpers.py +20 -0
  335. dt_arena/utils/travel/__init__.py +0 -0
  336. dt_arena/utils/travel/env_client.py +537 -0
  337. dt_arena/utils/travel/llm_judge.py +137 -0
  338. dt_arena/utils/travel/prompts.py +64 -0
  339. dt_arena/utils/utils/__init__.py +122 -0
  340. dt_arena/utils/whatsapp/__init__.py +0 -0
  341. dt_arena/utils/whatsapp/helpers.py +226 -0
  342. dt_arena/utils/windows/__init__.py +0 -0
  343. dt_arena/utils/windows/env_reset.py +224 -0
  344. dt_arena/utils/windows/env_setup.py +280 -0
  345. dt_arena/utils/windows/exfil_helpers.py +170 -0
  346. dt_arena/utils/windows/helpers.py +74 -0
  347. dt_arena/utils/zoom/__init__.py +1 -0
  348. dt_arena/utils/zoom/helpers.py +70 -0
  349. eval/__init__.py +1 -0
  350. eval/evaluation.py +426 -0
  351. eval/task_runner.py +449 -0
  352. utils/__init__.py +148 -0
  353. utils/agent_helpers.py +308 -0
  354. utils/agent_wrapper.py +189 -0
  355. utils/compose_utils.py +135 -0
  356. utils/config.py +77 -0
  357. utils/env_helpers.py +104 -0
  358. utils/eval_stats.py +88 -0
  359. utils/injection_helpers.py +429 -0
  360. utils/injection_mcp_helpers.py +152 -0
  361. utils/judge_helpers.py +181 -0
  362. utils/judge_utils.py +472 -0
  363. utils/llm.py +196 -0
  364. utils/logging.py +45 -0
  365. utils/mcp_helpers.py +232 -0
  366. utils/mcp_manager.py +235 -0
  367. utils/memory_guard.py +18 -0
  368. utils/red_teaming_sandbox.py +476 -0
  369. utils/reset_helpers.py +318 -0
  370. utils/resource_manager.py +370 -0
  371. utils/skill_helpers.py +447 -0
  372. utils/task_executor.py +904 -0
  373. utils/task_helpers.py +270 -0
  374. utils/template_helpers.py +179 -0
@@ -0,0 +1,642 @@
1
+ global:
2
+ base_dir: "../mcp_server"
3
+ log_dir: "../../logs"
4
+ python_executable: "python3"
5
+
6
+ servers:
7
+ - name: gmail
8
+ description: Gmail MCP server backed by Mailpit for sandboxed email operations
9
+ path: gmail/main.py
10
+ enabled: true
11
+ port: 8853
12
+ environment: gmail
13
+ env:
14
+ PORT: "" # Dynamically allocated by task_runner
15
+ MAILPIT_BASE_URL: "http://127.0.0.1:${GMAIL_UI_PORT}"
16
+ API_PROXY_URL: "http://127.0.0.1:${GMAIL_PROXY_PORT}"
17
+ MAILPIT_SMTP_HOST: "127.0.0.1"
18
+ MAILPIT_SMTP_PORT: "${GMAIL_SMTP_PORT}"
19
+ AUTH_API_URL: "http://127.0.0.1:${GMAIL_AUTH_PORT}"
20
+ USER_ACCESS_TOKEN: "" # Set per-task in config.yaml
21
+ transport: http
22
+ command: ["uv", "run", "python", "main.py"]
23
+
24
+ - name: salesforce
25
+ description: Salesforce CRM MCP server for comprehensive CRM operations
26
+ path: salesforce/main.py
27
+ enabled: true
28
+ port: 8005
29
+ environment: salesforce # Docker environment name from env.yaml
30
+ env:
31
+ # HTTP MCP Server Port (dynamically allocated by task_runner)
32
+ PORT: ""
33
+
34
+ # Base URL for Salesforce instance (uses environment's API port)
35
+ SALESFORCE_BASE_URL: "http://127.0.0.1:${SALESFORCE_API_PORT}"
36
+
37
+ # OAuth2 Grant Type: "password" or "client_credentials"
38
+ SALESFORCE_GRANT_TYPE: "client_credentials"
39
+
40
+ # OAuth2 Client Credentials
41
+ SALESFORCE_CLIENT_ID: "17817554-086b-83d1-bf9d-69026221f529"
42
+ SALESFORCE_CLIENT_SECRET: "mcp-secret-123"
43
+
44
+ # Password Grant Credentials (when using grant_type="password")
45
+ SALESFORCE_USERNAME: "bitnami"
46
+ SALESFORCE_PASSWORD: "user"
47
+
48
+ # Optional: Pre-supplied access token (if already authenticated). If empty,
49
+ # start.sh will auto-fetch using the grant type above.
50
+ SALESFORCE_ACCESS_TOKEN: ""
51
+
52
+ transport: http
53
+ # Directly run the MCP server with uv (HTTP at http://localhost:$PORT/mcp)
54
+ command: ["uv", "run", "python", "main.py"]
55
+
56
+ # Calendar MCP Server
57
+ - name: calendar
58
+ description: Google Calendar Sandbox MCP server
59
+ path: calendar/main.py
60
+ enabled: true
61
+ port: 8841
62
+ environment: calendar
63
+ env:
64
+ PORT: ""
65
+ CALENDAR_API_URL: "http://127.0.0.1:${CALENDAR_API_PORT}"
66
+ USER_ACCESS_TOKEN: "" # Set per-task in config.yaml
67
+ transport: http
68
+ command: ["uv", "run", "python", "main.py"]
69
+
70
+ # Zoom MCP Server
71
+ - name: zoom
72
+ description: Zoom Sandbox MCP server
73
+ path: zoom/main.py
74
+ enabled: true
75
+ port: 8847
76
+ environment: zoom
77
+ env:
78
+ PORT: ""
79
+ ZOOM_API_URL: "http://127.0.0.1:${ZOOM_API_PORT}"
80
+ USER_ACCESS_TOKEN: "" # Set per-task in config.yaml
81
+ transport: http
82
+ command: ["uv", "run", "python", "main.py"]
83
+
84
+ # Slack MCP Server
85
+ - name: slack
86
+ description: Slack Sandbox MCP server
87
+ path: slack/main.py
88
+ enabled: true
89
+ port: 8844
90
+ environment: slack
91
+ env:
92
+ PORT: ""
93
+ SLACK_API_URL: "http://127.0.0.1:${SLACK_API_PORT}"
94
+ USER_ACCESS_TOKEN: "" # Set per-task in config.yaml
95
+ transport: http
96
+ command: ["uv", "run", "python", "main.py"]
97
+
98
+ # Snowflake MCP Server
99
+ - name: snowflake
100
+ description: Snowflake MCP server
101
+ path: snowflake/main.py
102
+ enabled: true
103
+ port: 8842
104
+ environment: snowflake
105
+ env:
106
+ PORT: ""
107
+ HOST: "0.0.0.0"
108
+ # Local PostgreSQL (see envs/snowflake/docker-compose.yml)
109
+ POSTGRES_DSN: "postgresql://snow:snow@127.0.0.1:${SNOWFLAKE_PG_PORT}/snowdb"
110
+ # Search configuration
111
+ SEARCH_MODE: "faiss" # "faiss" or "simple"
112
+ SEARCH_TABLE: "product_search_view"
113
+ SEARCH_COLUMNS: "name,description"
114
+ # Optional OpenAI config (key should be provided via secure environment, not committed)
115
+ OPENAI_MODEL: "gpt-4o-mini"
116
+ transport: http
117
+ command: ["uv", "run", "python", "main.py"]
118
+
119
+ # Databricks MCP Server
120
+ - name: databricks
121
+ description: Databricks MCP server
122
+ path: databricks/main.py
123
+ enabled: true
124
+ port: 8843
125
+ environment: databricks
126
+ env:
127
+ PORT: ""
128
+ HOST: "0.0.0.0"
129
+ POSTGRES_DSN: "postgresql://dbx:dbx@127.0.0.1:${DATABRICKS_PG_PORT}/dbxdb"
130
+ SEARCH_MODE: "faiss"
131
+ SEARCH_TABLE: "product_search_view"
132
+ SEARCH_COLUMNS: "name,description"
133
+ OPENAI_MODEL: "gpt-4o-mini"
134
+ transport: http
135
+ command: ["uv", "run", "python", "main.py"]
136
+
137
+ # PayPal MCP Server (Local Sandbox)
138
+ - name: paypal
139
+ description: PayPal MCP server (local sandbox, no external API)
140
+ path: paypal/main.py
141
+ enabled: true
142
+ port: 8861
143
+ environment: paypal
144
+ env:
145
+ PORT: ""
146
+ PAYPAL_API_URL: "http://127.0.0.1:${PAYPAL_API_PORT}"
147
+ PAYPAL_USER_ACCESS_TOKEN: "" # Set per-task in config.yaml
148
+ transport: http
149
+ command: ["uv", "run", "python", "main.py"]
150
+
151
+ # ERS MCP Server
152
+ - name: ers
153
+ description: ERS (Employee Reimbursement System) MCP server
154
+ path: ers/main.py
155
+ enabled: true
156
+ port: 8859
157
+ environment: ers
158
+ env:
159
+ PORT: ""
160
+ ERS_API_URL: "http://127.0.0.1:${ERS_API_PORT}"
161
+ USER_ACCESS_TOKEN: ""
162
+ transport: http
163
+ command: ["uv", "run", "python", "main.py"]
164
+
165
+ # macOS MCP
166
+ - name: macos-os
167
+ description: macOS desktop automation MCP server (requires FastAPI backend)
168
+ path: macos/mcp_server.py
169
+ enabled: true
170
+ port: 8847
171
+ environment: macos
172
+ env:
173
+ PORT: ""
174
+ MACOS_API_URL: "http://127.0.0.1:${MCP_SERVICE_PORT}"
175
+ transport: http
176
+ command: ["uv", "run", "python", "mcp_server.py", "--transport", "streamable-http", "--port", "$PORT"]
177
+
178
+ # Windows MCP
179
+ - name: windows-os
180
+ description: Windows desktop automation MCP server (requires FastAPI backend)
181
+ path: windows/mcp_server.py
182
+ enabled: true
183
+ port: 8846
184
+ environment: windows
185
+ env:
186
+ PORT: ""
187
+ WINDOWS_API_URL: "http://127.0.0.1:${MCP_SERVICE_PORT}"
188
+ transport: http
189
+ command: ["uv", "run", "python", "mcp_server.py", "--transport", "streamable-http", "--port", "$PORT"]
190
+
191
+ # Browser MCP Server
192
+ - name: browser
193
+ description: Browser MCP server
194
+ path: browser/main.py
195
+ enabled: true
196
+ port: 8850
197
+ transport: http
198
+ command: ["uv", "run", "python", "main.py"]
199
+ environment: [ecommerce, custom-website]
200
+ env:
201
+ ECOMMERCE_UI_URL: "http://127.0.0.1:${ECOMMERCE_UI_PORT}"
202
+ CUSTOM_WEBSITE_UI_URL: "http://127.0.0.1:${CUSTOM_WEBSITE_UI_PORT}"
203
+
204
+ - name: OS-filesystem
205
+ description: OS-Filesystem MCP server for agent evaluation
206
+ path: os-filesystem/main.py
207
+ enabled: true
208
+ port: 8033
209
+ environment: os-filesystem # Docker environment name from env.yaml
210
+ transport: http
211
+ command: ["uv", "run", "python", "main.py"]
212
+ env:
213
+ HOST: "0.0.0.0"
214
+ PORT: ""
215
+ FILESYSTEM_API_URL: "http://127.0.0.1:${OS_FILESYSTEM_API_PORT}"
216
+ USER_ACCESS_TOKEN: "" # Set per-task in config.yaml
217
+
218
+ # Local Google Form MCP Server
219
+ - name: google-form
220
+ description: Local Google Form MCP server (manages schema and submissions for the dockerized form)
221
+ path: google_form/main.py
222
+ enabled: true
223
+ port: 8855
224
+ environment: google-form
225
+ env:
226
+ PORT: ""
227
+ FORM_API_BASE: "http://127.0.0.1:${GOOGLE_FORM_API_PORT}"
228
+ # Optional: direct file path to schema.json for edits (outside docker)
229
+ SCHEMA_PATH: "" # Will be set dynamically by task
230
+ UI_URL: "http://127.0.0.1:${GOOGLE_FORM_UI_PORT}/"
231
+ transport: http
232
+ command: ["uv", "run", "python", "main.py"]
233
+
234
+ - name: googlesheets
235
+ description: Google Sheets MCP server
236
+ path: googlesheets/main.py
237
+ enabled: true
238
+ port: 8868
239
+ environment: googlesheets
240
+ env:
241
+ PORT: ""
242
+ GOOGLESHEETS_API_URL: "http://127.0.0.1:${GOOGLESHEETS_API_PORT}"
243
+ GOOGLESHEETS_USER_ACCESS_TOKEN: ""
244
+ transport: http
245
+ command: ["uv", "run", "python", "main.py"]
246
+
247
+ - name: googledrive
248
+ description: Google Drive MCP server
249
+ path: googledrive/main.py
250
+ enabled: true
251
+ port: 8869
252
+ environment: googledrive
253
+ env:
254
+ PORT: ""
255
+ GOOGLEDRIVE_API_URL: "http://127.0.0.1:${GOOGLEDRIVE_API_PORT}"
256
+ GOOGLEDRIVE_USER_ACCESS_TOKEN: ""
257
+ transport: http
258
+ command: ["uv", "run", "python", "main.py"]
259
+
260
+ - name: travel-suite
261
+ description: "Travel MCP server for travel bookings and itineraries"
262
+ path: travel/mcp_server.py
263
+ enabled: true
264
+ port: 10301
265
+ environment: travel
266
+ env:
267
+ PORT: ""
268
+ TRAVEL_HOST: "127.0.0.1"
269
+ TRAVEL_PORT: "${TRAVEL_PORT}"
270
+ transport: http
271
+ command: ["python3", "mcp_server.py"]
272
+
273
+ # Atlassian MCP Server (Jira + Confluence)
274
+ - name: atlassian
275
+ description: Atlassian MCP server for Jira issues and Confluence pages
276
+ path: atlassian/main.py
277
+ enabled: true
278
+ port: 8862
279
+ environment: atlassian
280
+ env:
281
+ PORT: ""
282
+ ATLASSIAN_API_URL: "http://127.0.0.1:${ATLASSIAN_API_PORT}"
283
+ USER_ACCESS_TOKEN: "" # Set per-task in config.yaml
284
+ transport: http
285
+ command: ["uv", "run", "python", "main.py"]
286
+
287
+ # Terminal MCP Server
288
+ - name: terminal
289
+ description: Terminal MCP server for code execution and terminal operations
290
+ path: terminal/main.py
291
+ enabled: true
292
+ port: 8845
293
+ environment: terminal # Docker environment name from env.yaml
294
+ env:
295
+ PORT: "" # Dynamically allocated by task_runner
296
+ transport: http # streamable-http transport (uses /mcp path, not /sse)
297
+ command: ["uv", "run", "python", "main.py"]
298
+
299
+ # Research MCP Server
300
+ - name: Research
301
+ description: Research MCP server for academic search and code execution
302
+ path: research/main.py
303
+ enabled: true
304
+ port: 8846
305
+ environment: [research, arxiv] # Docker environment names
306
+ env:
307
+ PORT: "" # Dynamically allocated by task_runner
308
+ HOST: "0.0.0.0"
309
+ # TERMINAL_CONTAINER_NAME: inherited from parent process (set dynamically by evaluation.py)
310
+ ARXIV_API_URL: "http://127.0.0.1:${ARXIV_API_PORT}" # Resolved dynamically from env
311
+ transport: http
312
+ command: ["uv", "run", "python", "main.py"]
313
+
314
+ # Telecom MCP Server
315
+ - name: telecom
316
+ description: Telecom MCP server for managing customers, accounts, bills, tickets, and more
317
+ path: telecom/mcp_server.py
318
+ enabled: true
319
+ port: 12345
320
+ environment: telecom
321
+ transport: http
322
+ command: ["python", "mcp_server.py"]
323
+ env:
324
+ TELECOM_MCP_PORT: "" # Dynamically allocated by task_runner
325
+ TELECOM_MCP_TRANSPORT: "http"
326
+ TELECOM_HOST: "127.0.0.1"
327
+ TELECOM_PORT: "${TELECOM_PORT}" # Resolved from Docker-allocated port
328
+
329
+ - name: github
330
+ description: GitHub Clone MCP server for repositories, issues, pull requests, and git operations
331
+ path: github/main.py
332
+ enabled: true
333
+ port: 8867
334
+ environment: github
335
+ env:
336
+ PORT: ""
337
+ GITHUB_API_URL: "http://127.0.0.1:${GITHUB_API_PORT}"
338
+ USER_ACCESS_TOKEN: ""
339
+ transport: http
340
+ command: ["uv", "run", "python", "main.py"]
341
+
342
+ - name: gitlab
343
+ description: GitLab MCP server for projects, repository files, issues, and merge requests
344
+ path: gitlab/main.py
345
+ enabled: true
346
+ port: 8872
347
+ environment: gitlab
348
+ env:
349
+ PORT: ""
350
+ GITLAB_API_URL: "http://127.0.0.1:${GITLAB_UI_PORT}/api/v4"
351
+ USER_ACCESS_TOKEN: ""
352
+ transport: http
353
+ command: ["uv", "run", "python", "main.py"]
354
+
355
+ - name: HospitalClient
356
+ description: Hospital MCP server for medical diagnosis tools
357
+ path: hospital/mcp_server.py
358
+ enabled: true
359
+ port: 12002
360
+ environment: hospital
361
+ env:
362
+ PORT: ""
363
+ HOSPITAL_HOST: "127.0.0.1"
364
+ HOSPITAL_PORT: "${HOSPITAL_PORT}"
365
+ HOSPITAL_MCP_HOST: "0.0.0.0"
366
+ HOSPITAL_MCP_PORT: "${PORT}"
367
+ transport: http
368
+ command: ["python", "mcp_server.py"]
369
+
370
+ # Finance MCP Server (Docker-backed Flask web server)
371
+ - name: finance
372
+ description: Finance Trading MCP server for FINRA benchmark
373
+ path: finance/main.py
374
+ enabled: true
375
+ port: 8002
376
+ environment: finance
377
+ transport: http
378
+ command: ["uv", "run", "python", "main.py", "mcp", "--port", "$PORT", "--web-url", "http://127.0.0.1:${FINANCE_WEB_PORT}", "--auto-web"]
379
+ env:
380
+ HOST: "0.0.0.0"
381
+ PORT: "8002"
382
+ FINANCE_WEB_PORT: "${FINANCE_WEB_PORT}"
383
+
384
+ # Telegram MCP Server (Docker-backed PostgreSQL + FastAPI)
385
+ - name: telegram
386
+ description: Telegram MCP server for messaging, contacts, and calls
387
+ path: telegram/main.py
388
+ enabled: true
389
+ port: 8864
390
+ environment: telegram
391
+ env:
392
+ PORT: ""
393
+ TELEGRAM_API_URL: "http://127.0.0.1:${TELEGRAM_API_PORT}"
394
+ TELEGRAM_USER_ACCESS_TOKEN: ""
395
+ transport: http
396
+ command: ["uv", "run", "python", "main.py"]
397
+
398
+ # Legal MCP Server (Docker-backed Flask web server)
399
+ - name: legal
400
+ description: Legal MCP server for legal research
401
+ path: legal/main.py
402
+ enabled: true
403
+ port: 8003
404
+ environment: legal
405
+ transport: http
406
+ command: ["uv", "run", "python", "main.py", "mcp", "--port", "$PORT"]
407
+ env:
408
+ HOST: "0.0.0.0"
409
+ PORT: "8003"
410
+ LEGAL_WEB_PORT: "${LEGAL_WEB_PORT}"
411
+
412
+ # Customer Service MCP Server
413
+ - name: customer_service
414
+ description: Customer Service MCP server
415
+ path: customer_service/main.py
416
+ enabled: true
417
+ port: 22631
418
+ environment: customer_service
419
+ env:
420
+ PORT: ""
421
+ CS_API_BASE: "http://127.0.0.1:${CUSTOMER_SERVICE_API_PORT}"
422
+ transport: http
423
+ command: ["python3", "main.py"]
424
+
425
+ # BigQuery MCP Server (backed by local emulator)
426
+ - name: bigquery
427
+ description: BigQuery MCP server backed by local emulator
428
+ path: bigquery/main.py
429
+ enabled: true
430
+ port: 8868
431
+ environment: bigquery
432
+ env:
433
+ PORT: ""
434
+ HOST: "0.0.0.0"
435
+ BIGQUERY_API_URL: "http://127.0.0.1:${BIGQUERY_API_PORT}"
436
+ BIGQUERY_GRPC_TARGET: "127.0.0.1:${BIGQUERY_GRPC_PORT}"
437
+ BIGQUERY_PROJECT_ID: "test-project"
438
+ BIGQUERY_LOCATION: "US"
439
+ # Chase MCP Server (Local Sandbox)
440
+ - name: chase
441
+ description: Chase retail banking MCP server (accounts, transfers, Zelle, cards)
442
+ path: chase/main.py
443
+ enabled: true
444
+ port: 8880
445
+ environment: chase
446
+ env:
447
+ PORT: ""
448
+ CHASE_API_URL: "http://127.0.0.1:${CHASE_API_PORT}"
449
+ CHASE_USER_ACCESS_TOKEN: "" # Set per-task in config.yaml
450
+ transport: http
451
+ command: ["uv", "run", "python", "main.py"]
452
+
453
+ # LinkedIn MCP Server (Local Sandbox)
454
+ - name: linkedin
455
+ description: LinkedIn MCP server (profiles, jobs, posts, connections, messaging)
456
+ path: linkedin/main.py
457
+ enabled: true
458
+ port: 8881
459
+ environment: linkedin
460
+ env:
461
+ PORT: ""
462
+ LINKEDIN_API_URL: "http://127.0.0.1:${LINKEDIN_API_PORT}"
463
+ LINKEDIN_USER_ACCESS_TOKEN: "" # Set per-task in config.yaml
464
+ transport: http
465
+ command: ["uv", "run", "python", "main.py"]
466
+
467
+ # X (Twitter) MCP Server (Local Sandbox)
468
+ - name: x
469
+ description: X / Twitter MCP server (tweets, timeline, DMs, bookmarks, search)
470
+ path: x/main.py
471
+ enabled: true
472
+ port: 8882
473
+ environment: x
474
+ env:
475
+ PORT: ""
476
+ X_API_URL: "http://127.0.0.1:${X_API_PORT}"
477
+ X_USER_ACCESS_TOKEN: "" # Set per-task in config.yaml
478
+ transport: http
479
+ command: ["uv", "run", "python", "main.py"]
480
+
481
+ # Notion MCP Server (Local Sandbox)
482
+ - name: notion
483
+ description: Notion MCP server (pages, blocks, databases, comments)
484
+ path: notion/main.py
485
+ enabled: true
486
+ port: 8883
487
+ environment: notion
488
+ env:
489
+ PORT: ""
490
+ NOTION_API_URL: "http://127.0.0.1:${NOTION_API_PORT}"
491
+ NOTION_USER_ACCESS_TOKEN: "" # Set per-task in config.yaml
492
+ transport: http
493
+ command: ["uv", "run", "python", "main.py"]
494
+
495
+ # Reddit MCP Server (Local Sandbox)
496
+ - name: reddit
497
+ description: Reddit MCP server (subreddits, posts, comments, votes, DMs, search)
498
+ path: reddit/main.py
499
+ enabled: true
500
+ port: 8884
501
+ environment: reddit
502
+ env:
503
+ PORT: ""
504
+ REDDIT_API_URL: "http://127.0.0.1:${REDDIT_API_PORT}"
505
+ REDDIT_USER_ACCESS_TOKEN: "" # Set per-task in config.yaml
506
+ transport: http
507
+ command: ["uv", "run", "python", "main.py"]
508
+
509
+ # Robinhood MCP Server (Local Sandbox)
510
+ - name: robinhood
511
+ description: Robinhood MCP server (stocks, crypto, orders, portfolio, watchlists)
512
+ path: robinhood/main.py
513
+ enabled: true
514
+ port: 8885
515
+ environment: robinhood
516
+ env:
517
+ PORT: ""
518
+ ROBINHOOD_API_URL: "http://127.0.0.1:${ROBINHOOD_API_PORT}"
519
+ ROBINHOOD_USER_ACCESS_TOKEN: "" # Set per-task in config.yaml
520
+ transport: http
521
+ command: ["uv", "run", "python", "main.py"]
522
+
523
+ # Dropbox MCP Server (Local Sandbox)
524
+ - name: dropbox
525
+ description: Dropbox MCP server (files, folders, sharing, search)
526
+ path: dropbox/main.py
527
+ enabled: true
528
+ port: 8886
529
+ environment: dropbox
530
+ env:
531
+ PORT: ""
532
+ DROPBOX_API_URL: "http://127.0.0.1:${DROPBOX_API_PORT}"
533
+ DROPBOX_USER_ACCESS_TOKEN: "" # Set per-task in config.yaml
534
+ # DoorDash MCP Server (Local Sandbox)
535
+ - name: doordash
536
+ description: DoorDash MCP server (restaurants, cart, orders, DashPass)
537
+ path: doordash/main.py
538
+ enabled: true
539
+ port: 8873
540
+ environment: doordash
541
+ env:
542
+ PORT: ""
543
+ DOORDASH_API_URL: "http://127.0.0.1:${DOORDASH_API_PORT}"
544
+ DOORDASH_USER_ACCESS_TOKEN: "" # Set per-task in config.yaml
545
+ transport: http
546
+ command: ["uv", "run", "python", "main.py"]
547
+
548
+ # United Airlines MCP Server (Local Sandbox)
549
+ - name: united
550
+ description: United Airlines MCP server (flight search, booking, status)
551
+ path: united/main.py
552
+ enabled: true
553
+ port: 8874
554
+ environment: united
555
+ env:
556
+ PORT: ""
557
+ UNITED_API_URL: "http://127.0.0.1:${UNITED_API_PORT}"
558
+ UNITED_USER_ACCESS_TOKEN: ""
559
+ transport: http
560
+ command: ["uv", "run", "python", "main.py"]
561
+
562
+ # Southwest Airlines MCP Server (Local Sandbox)
563
+ - name: southwest
564
+ description: Southwest Airlines MCP server (flight search, booking, low fare calendar)
565
+ path: southwest/main.py
566
+ enabled: true
567
+ port: 8875
568
+ environment: southwest
569
+ env:
570
+ PORT: ""
571
+ SOUTHWEST_API_URL: "http://127.0.0.1:${SOUTHWEST_API_PORT}"
572
+ SOUTHWEST_USER_ACCESS_TOKEN: ""
573
+ transport: http
574
+ command: ["uv", "run", "python", "main.py"]
575
+
576
+ # Enterprise Rent-A-Car MCP Server (Local Sandbox)
577
+ - name: enterprise
578
+ description: Enterprise Rent-A-Car MCP server (locations, vehicle classes, car booking)
579
+ path: enterprise/main.py
580
+ enabled: true
581
+ port: 8876
582
+ environment: enterprise
583
+ env:
584
+ PORT: ""
585
+ ENTERPRISE_API_URL: "http://127.0.0.1:${ENTERPRISE_API_PORT}"
586
+ ENTERPRISE_USER_ACCESS_TOKEN: ""
587
+ transport: http
588
+ command: ["uv", "run", "python", "main.py"]
589
+
590
+ # Expedia MCP Server (Local Sandbox)
591
+ - name: expedia
592
+ description: Expedia MCP server (destinations, property search, hotel booking)
593
+ path: expedia/main.py
594
+ enabled: true
595
+ port: 8877
596
+ environment: expedia
597
+ env:
598
+ PORT: ""
599
+ EXPEDIA_API_URL: "http://127.0.0.1:${EXPEDIA_API_PORT}"
600
+ EXPEDIA_USER_ACCESS_TOKEN: ""
601
+ transport: http
602
+ command: ["uv", "run", "python", "main.py"]
603
+
604
+ # Booking.com MCP Server (Local Sandbox)
605
+ - name: booking
606
+ description: Booking.com MCP server (destinations, property search, hotel booking)
607
+ path: booking/main.py
608
+ enabled: true
609
+ port: 8878
610
+ environment: booking
611
+ env:
612
+ PORT: ""
613
+ BOOKING_API_URL: "http://127.0.0.1:${BOOKING_API_PORT}"
614
+ BOOKING_USER_ACCESS_TOKEN: ""
615
+
616
+ # WhatsApp MCP Server
617
+ - name: whatsapp
618
+ description: WhatsApp MCP server backed by local PostgreSQL + FastAPI sandbox
619
+ path: whatsapp/main.py
620
+ enabled: true
621
+ port: 8865
622
+ environment: whatsapp
623
+ env:
624
+ PORT: ""
625
+ WHATSAPP_API_URL: "http://127.0.0.1:${WHATSAPP_API_PORT}"
626
+ WHATSAPP_USER_ACCESS_TOKEN: "" # Set per-task in config.yaml
627
+ transport: http
628
+ command: ["uv", "run", "python", "main.py"]
629
+
630
+ # Google Docs MCP Server
631
+ - name: googledocs
632
+ description: Google Docs MCP server backed by local PostgreSQL + FastAPI sandbox
633
+ path: googledocs/main.py
634
+ enabled: true
635
+ port: 8866
636
+ environment: googledocs
637
+ env:
638
+ PORT: ""
639
+ GOOGLEDOCS_API_URL: "http://127.0.0.1:${GOOGLEDOCS_API_PORT}"
640
+ GOOGLEDOCS_USER_ACCESS_TOKEN: "" # Set per-task in config.yaml
641
+ transport: http
642
+ command: ["uv", "run", "python", "main.py"]
@@ -0,0 +1,31 @@
1
+ services:
2
+ arxiv-api:
3
+ image: decodingtrustagent/arxiv:api-latest
4
+ restart: unless-stopped
5
+ ports:
6
+ - "${ARXIV_API_PORT:-8034}:8033"
7
+ volumes:
8
+ - arxiv_data:/app/data
9
+ environment:
10
+ - DATABASE_URL=sqlite:////app/data/arxiv.db
11
+ command: >
12
+ sh -c "python sandbox_init.py && python main.py"
13
+ healthcheck:
14
+ test: ["CMD", "python", "-c", "import urllib.request; urllib.request.urlopen('http://localhost:8033/health')"]
15
+ interval: 10s
16
+ timeout: 5s
17
+ retries: 5
18
+
19
+ arxiv-ui:
20
+ image: decodingtrustagent/arxiv:ui-latest
21
+ restart: unless-stopped
22
+ ports:
23
+ - "${ARXIV_UI_PORT:-8058}:80"
24
+ depends_on:
25
+ - arxiv-api
26
+ environment:
27
+ - NODE_ENV=production
28
+ - REACT_APP_API_URL=http://localhost:${ARXIV_API_PORT:-8034}
29
+
30
+ volumes:
31
+ arxiv_data:
@@ -0,0 +1,36 @@
1
+ services:
2
+ arxiv-api:
3
+ build:
4
+ context: ./arxiv_api
5
+ dockerfile: Dockerfile
6
+ restart: unless-stopped
7
+ ports:
8
+ - "${ARXIV_API_PORT:-8034}:8033"
9
+ volumes:
10
+ - arxiv_data:/app/data
11
+ environment:
12
+ - DATABASE_URL=sqlite:////app/data/arxiv.db
13
+ command: >
14
+ sh -c "python sandbox_init.py && python main.py"
15
+ healthcheck:
16
+ test: ["CMD", "python", "-c", "import urllib.request; urllib.request.urlopen('http://localhost:8033/health')"]
17
+ interval: 10s
18
+ timeout: 5s
19
+ retries: 5
20
+
21
+ arxiv-ui:
22
+ build:
23
+ context: ./arxiv_ui
24
+ dockerfile: Dockerfile
25
+ restart: unless-stopped
26
+ ports:
27
+ - "${ARXIV_UI_PORT:-8058}:80"
28
+ depends_on:
29
+ - arxiv-api
30
+ environment:
31
+ - NODE_ENV=production
32
+ - REACT_APP_API_URL=http://localhost:${ARXIV_API_PORT:-8034}
33
+
34
+ volumes:
35
+ arxiv_data:
36
+