decodingtrust-agent-sdk 0.1.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (374) hide show
  1. agent/__init__.py +30 -0
  2. agent/claudesdk/__init__.py +8 -0
  3. agent/claudesdk/example.py +221 -0
  4. agent/claudesdk/src/__init__.py +8 -0
  5. agent/claudesdk/src/agent.py +400 -0
  6. agent/claudesdk/src/mcp_proxy.py +409 -0
  7. agent/claudesdk/src/utils.py +420 -0
  8. agent/googleadk/__init__.py +15 -0
  9. agent/googleadk/example.py +237 -0
  10. agent/googleadk/src/__init__.py +12 -0
  11. agent/googleadk/src/agent.py +401 -0
  12. agent/googleadk/src/mcp_wrapper.py +163 -0
  13. agent/googleadk/src/utils.py +602 -0
  14. agent/langchain/__init__.py +8 -0
  15. agent/langchain/example.py +213 -0
  16. agent/langchain/src/__init__.py +8 -0
  17. agent/langchain/src/agent.py +645 -0
  18. agent/langchain/src/utils.py +433 -0
  19. agent/openaisdk/__init__.py +17 -0
  20. agent/openaisdk/example.py +228 -0
  21. agent/openaisdk/src/__init__.py +12 -0
  22. agent/openaisdk/src/agent.py +491 -0
  23. agent/openaisdk/src/agent_wrapper.py +143 -0
  24. agent/openaisdk/src/mcp_wrapper.py +395 -0
  25. agent/openaisdk/src/utils.py +493 -0
  26. agent/openclaw/__init__.py +10 -0
  27. agent/openclaw/example.py +251 -0
  28. agent/openclaw/src/__init__.py +14 -0
  29. agent/openclaw/src/agent.py +930 -0
  30. agent/openclaw/src/helpers/__init__.py +1 -0
  31. agent/openclaw/src/helpers/auth_helpers.py +55 -0
  32. agent/openclaw/src/mcp_proxy.py +564 -0
  33. agent/openclaw/src/plugin_generator.py +231 -0
  34. agent/openclaw/src/utils.py +341 -0
  35. agent/pocketflow/__init__.py +18 -0
  36. agent/pocketflow/example.py +221 -0
  37. agent/pocketflow/prompts/react_agent.py +46 -0
  38. agent/pocketflow/src/__init__.py +6 -0
  39. agent/pocketflow/src/agent.py +507 -0
  40. agent/pocketflow/src/agent_wrapper.py +159 -0
  41. agent/pocketflow/src/async_helper.py +92 -0
  42. agent/pocketflow/src/mcp_react_agent.py +279 -0
  43. agent/pocketflow/src/native_agent.py +74 -0
  44. agent/pocketflow/src/nodes.py +467 -0
  45. benchmark/__init__.py +0 -0
  46. benchmark/browser/benign.jsonl +34 -0
  47. benchmark/browser/direct.jsonl +85 -0
  48. benchmark/browser/indirect.jsonl +82 -0
  49. benchmark/code/benign.jsonl +0 -0
  50. benchmark/code/direct.jsonl +121 -0
  51. benchmark/code/indirect.jsonl +165 -0
  52. benchmark/crm/benign.jsonl +165 -0
  53. benchmark/crm/direct.jsonl +90 -0
  54. benchmark/crm/indirect.jsonl +150 -0
  55. benchmark/customer-service/benign.jsonl +160 -0
  56. benchmark/customer-service/direct.jsonl +100 -0
  57. benchmark/customer-service/indirect.jsonl +101 -0
  58. benchmark/finance/benign.jsonl +0 -0
  59. benchmark/finance/direct.jsonl +200 -0
  60. benchmark/finance/indirect.jsonl +200 -0
  61. benchmark/legal/benign.jsonl +0 -0
  62. benchmark/legal/direct.jsonl +200 -0
  63. benchmark/legal/indirect.jsonl +200 -0
  64. benchmark/macos/benign.jsonl +30 -0
  65. benchmark/macos/direct.jsonl +50 -0
  66. benchmark/macos/indirect.jsonl +50 -0
  67. benchmark/medical/benign.jsonl +642 -0
  68. benchmark/medical/direct.jsonl +229 -0
  69. benchmark/medical/indirect.jsonl +222 -0
  70. benchmark/os-filesystem/benign.jsonl +200 -0
  71. benchmark/os-filesystem/direct.jsonl +200 -0
  72. benchmark/os-filesystem/indirect.jsonl +200 -0
  73. benchmark/research/benign.jsonl +0 -0
  74. benchmark/research/direct.jsonl +119 -0
  75. benchmark/research/indirect.jsonl +125 -0
  76. benchmark/telecom/benign.jsonl +120 -0
  77. benchmark/telecom/direct.jsonl +161 -0
  78. benchmark/telecom/indirect.jsonl +166 -0
  79. benchmark/travel/benign.jsonl +130 -0
  80. benchmark/travel/direct.jsonl +105 -0
  81. benchmark/travel/indirect.jsonl +120 -0
  82. benchmark/windows/benign.jsonl +100 -0
  83. benchmark/windows/direct.jsonl +140 -0
  84. benchmark/windows/indirect.jsonl +107 -0
  85. benchmark/workflow/benign.jsonl +335 -0
  86. benchmark/workflow/direct.jsonl +78 -0
  87. benchmark/workflow/indirect.jsonl +107 -0
  88. cli/__init__.py +5 -0
  89. cli/main.py +182 -0
  90. cli/scaffold.py +334 -0
  91. decodingtrust_agent_sdk-0.1.0.dist-info/METADATA +642 -0
  92. decodingtrust_agent_sdk-0.1.0.dist-info/RECORD +374 -0
  93. decodingtrust_agent_sdk-0.1.0.dist-info/WHEEL +5 -0
  94. decodingtrust_agent_sdk-0.1.0.dist-info/entry_points.txt +2 -0
  95. decodingtrust_agent_sdk-0.1.0.dist-info/licenses/LICENSE +201 -0
  96. decodingtrust_agent_sdk-0.1.0.dist-info/top_level.txt +6 -0
  97. dt_arena/config/env.yaml +515 -0
  98. dt_arena/config/injection_mcp.yaml +430 -0
  99. dt_arena/config/mcp.yaml +642 -0
  100. dt_arena/envs/arxiv/docker-compose-hub.yml +31 -0
  101. dt_arena/envs/arxiv/docker-compose.yml +36 -0
  102. dt_arena/envs/atlassian/docker/docker-compose.dev.yml +65 -0
  103. dt_arena/envs/atlassian/docker/docker-compose.yml +53 -0
  104. dt_arena/envs/atlassian/docker-compose-hub.yml +57 -0
  105. dt_arena/envs/atlassian/docker-compose.yml +72 -0
  106. dt_arena/envs/bigquery/docker-compose.yml +20 -0
  107. dt_arena/envs/booking/docker-compose.yml +59 -0
  108. dt_arena/envs/calendar/docker-compose-hub.yml +30 -0
  109. dt_arena/envs/calendar/docker-compose.yml +42 -0
  110. dt_arena/envs/custom-website/docker-compose.yml +6 -0
  111. dt_arena/envs/customer_service/docker-compose.yml +59 -0
  112. dt_arena/envs/databricks/docker-compose-hub.yml +47 -0
  113. dt_arena/envs/databricks/docker-compose.yml +51 -0
  114. dt_arena/envs/ecommerce/docker-compose.yml +6 -0
  115. dt_arena/envs/ers/docker-compose.yml +36 -0
  116. dt_arena/envs/ers/hrms/docker/docker-compose.yml +31 -0
  117. dt_arena/envs/finance/docker-compose.yml +23 -0
  118. dt_arena/envs/github/docker/docker-compose-hub.yml +50 -0
  119. dt_arena/envs/github/docker/docker-compose.yml +50 -0
  120. dt_arena/envs/gmail/docker-compose-hub.yml +51 -0
  121. dt_arena/envs/gmail/docker-compose.yml +65 -0
  122. dt_arena/envs/google-form/docker-compose-hub.yml +33 -0
  123. dt_arena/envs/google-form/docker-compose.yml +41 -0
  124. dt_arena/envs/googledocs/docker-compose-hub.yml +61 -0
  125. dt_arena/envs/googledocs/docker-compose.yml +78 -0
  126. dt_arena/envs/hospital/docker-compose-hub.yml +25 -0
  127. dt_arena/envs/hospital/docker-compose.yml +27 -0
  128. dt_arena/envs/legal/docker-compose.yml +22 -0
  129. dt_arena/envs/linkedin/docker-compose.yml +63 -0
  130. dt_arena/envs/macos/docker-compose.yml +79 -0
  131. dt_arena/envs/os-filesystem/docker-compose-hub.yml +16 -0
  132. dt_arena/envs/os-filesystem/docker-compose.yml +20 -0
  133. dt_arena/envs/paypal/docker-compose-hub.yml +48 -0
  134. dt_arena/envs/paypal/docker-compose.yml +63 -0
  135. dt_arena/envs/research/docker-compose-hub.yml +13 -0
  136. dt_arena/envs/research/docker-compose.yml +24 -0
  137. dt_arena/envs/salesforce_crm/docker-compose-hub.yaml +45 -0
  138. dt_arena/envs/salesforce_crm/docker-compose.yaml +49 -0
  139. dt_arena/envs/slack/docker-compose-hub.yml +28 -0
  140. dt_arena/envs/slack/docker-compose.yml +41 -0
  141. dt_arena/envs/snowflake/docker-compose-hub.yml +41 -0
  142. dt_arena/envs/snowflake/docker-compose.yml +44 -0
  143. dt_arena/envs/telecom/docker-compose-hub.yml +16 -0
  144. dt_arena/envs/telecom/docker-compose.yml +17 -0
  145. dt_arena/envs/telegram/docker-compose-hub.yml +57 -0
  146. dt_arena/envs/telegram/docker-compose.yml +62 -0
  147. dt_arena/envs/terminal/docker-compose-hub.yml +12 -0
  148. dt_arena/envs/terminal/docker-compose.yml +26 -0
  149. dt_arena/envs/travel/docker-compose-hub.yml +19 -0
  150. dt_arena/envs/travel/docker-compose.yml +19 -0
  151. dt_arena/envs/whatsapp/docker-compose-hub.yml +61 -0
  152. dt_arena/envs/whatsapp/docker-compose.yml +78 -0
  153. dt_arena/envs/windows/docker-compose.yml +71 -0
  154. dt_arena/envs/zoom/docker-compose-hub.yml +27 -0
  155. dt_arena/envs/zoom/docker-compose.yml +40 -0
  156. dt_arena/injection_mcp_server/atlassian/env_injection.py +134 -0
  157. dt_arena/injection_mcp_server/calendar/env_injection.py +217 -0
  158. dt_arena/injection_mcp_server/custom_website/env_injection.py +97 -0
  159. dt_arena/injection_mcp_server/customer_service/env_injection.py +659 -0
  160. dt_arena/injection_mcp_server/databricks/env_injection.py +255 -0
  161. dt_arena/injection_mcp_server/ecommerce/env_injection.py +110 -0
  162. dt_arena/injection_mcp_server/finance/env_injection.py +85 -0
  163. dt_arena/injection_mcp_server/github/env_injection.py +206 -0
  164. dt_arena/injection_mcp_server/gmail/env_injection.py +211 -0
  165. dt_arena/injection_mcp_server/google_form/env_injection.py +186 -0
  166. dt_arena/injection_mcp_server/googledocs/env_injection.py +44 -0
  167. dt_arena/injection_mcp_server/hospital/env_injection.py +43 -0
  168. dt_arena/injection_mcp_server/legal/env_injection.py +229 -0
  169. dt_arena/injection_mcp_server/macos/env_injection.py +272 -0
  170. dt_arena/injection_mcp_server/os-filesystem/env_injection.py +341 -0
  171. dt_arena/injection_mcp_server/paypal/env_injection.py +268 -0
  172. dt_arena/injection_mcp_server/research/env_injection.py +616 -0
  173. dt_arena/injection_mcp_server/salesforce/env_injection.py +514 -0
  174. dt_arena/injection_mcp_server/slack/env_injection.py +265 -0
  175. dt_arena/injection_mcp_server/snowflake/env_injection.py +230 -0
  176. dt_arena/injection_mcp_server/telecom/env_injection.py +503 -0
  177. dt_arena/injection_mcp_server/telegram/env_injection.py +171 -0
  178. dt_arena/injection_mcp_server/terminal/env_injection.py +523 -0
  179. dt_arena/injection_mcp_server/travel/env_injection.py +173 -0
  180. dt_arena/injection_mcp_server/whatsapp/env_injection.py +185 -0
  181. dt_arena/injection_mcp_server/windows/env_injection.py +943 -0
  182. dt_arena/injection_mcp_server/zoom/env_injection.py +216 -0
  183. dt_arena/mcp_server/atlassian/main.py +1554 -0
  184. dt_arena/mcp_server/atlassian/test_server.py +66 -0
  185. dt_arena/mcp_server/bigquery/main.py +333 -0
  186. dt_arena/mcp_server/booking/main.py +310 -0
  187. dt_arena/mcp_server/browser/main.py +1741 -0
  188. dt_arena/mcp_server/calendar/example_multi_user.py +162 -0
  189. dt_arena/mcp_server/calendar/main.py +792 -0
  190. dt_arena/mcp_server/calendar/test_mcp.py +135 -0
  191. dt_arena/mcp_server/customer_service/main.py +1063 -0
  192. dt_arena/mcp_server/databricks/main.py +566 -0
  193. dt_arena/mcp_server/databricks/probe.py +102 -0
  194. dt_arena/mcp_server/ers/main.py +845 -0
  195. dt_arena/mcp_server/finance/__init__.py +87 -0
  196. dt_arena/mcp_server/finance/core/__init__.py +12 -0
  197. dt_arena/mcp_server/finance/core/data_loader.py +558 -0
  198. dt_arena/mcp_server/finance/core/portfolio.py +565 -0
  199. dt_arena/mcp_server/finance/evaluation/__init__.py +20 -0
  200. dt_arena/mcp_server/finance/evaluation/evaluator.py +217 -0
  201. dt_arena/mcp_server/finance/evaluation/logger.py +137 -0
  202. dt_arena/mcp_server/finance/injection/__init__.py +66 -0
  203. dt_arena/mcp_server/finance/injection/config.py +176 -0
  204. dt_arena/mcp_server/finance/injection/content.py +755 -0
  205. dt_arena/mcp_server/finance/injection/html.py +409 -0
  206. dt_arena/mcp_server/finance/injection/locations.py +167 -0
  207. dt_arena/mcp_server/finance/injection/methods.py +193 -0
  208. dt_arena/mcp_server/finance/injection/presets.py +1023 -0
  209. dt_arena/mcp_server/finance/main.py +361 -0
  210. dt_arena/mcp_server/finance/run_mcp.py +21 -0
  211. dt_arena/mcp_server/finance/run_web.py +26 -0
  212. dt_arena/mcp_server/finance/server/__init__.py +41 -0
  213. dt_arena/mcp_server/finance/server/extractor.py +1453 -0
  214. dt_arena/mcp_server/finance/server/extractor_minimal.py +292 -0
  215. dt_arena/mcp_server/finance/server/extractor_simple.py +1164 -0
  216. dt_arena/mcp_server/finance/server/injection_mcp.py +865 -0
  217. dt_arena/mcp_server/finance/server/mcp.py +451 -0
  218. dt_arena/mcp_server/finance/server/tools/__init__.py +23 -0
  219. dt_arena/mcp_server/finance/server/tools/account.py +88 -0
  220. dt_arena/mcp_server/finance/server/tools/browsing.py +328 -0
  221. dt_arena/mcp_server/finance/server/tools/social.py +73 -0
  222. dt_arena/mcp_server/finance/server/tools/trading.py +242 -0
  223. dt_arena/mcp_server/finance/server/tools/utility.py +49 -0
  224. dt_arena/mcp_server/finance/server/web.py +2139 -0
  225. dt_arena/mcp_server/finance/tasks/benchmark/__init__.py +28 -0
  226. dt_arena/mcp_server/finance/tasks/benchmark/attack_pool.py +3026 -0
  227. dt_arena/mcp_server/finance/tasks/benchmark/attack_runner.py +1315 -0
  228. dt_arena/mcp_server/finance/tasks/benchmark/finra_requirements.py +1335 -0
  229. dt_arena/mcp_server/finance/tasks/benchmark/finra_tasks.py +3665 -0
  230. dt_arena/mcp_server/finance/tasks/benchmark/malicious_tasks.py +2673 -0
  231. dt_arena/mcp_server/finance/tasks/redteam_suite/run_redteam_suite.py +1713 -0
  232. dt_arena/mcp_server/finance/test_mcp_tools.py +476 -0
  233. dt_arena/mcp_server/github/main.py +441 -0
  234. dt_arena/mcp_server/gmail/main.py +1004 -0
  235. dt_arena/mcp_server/google_form/main.py +141 -0
  236. dt_arena/mcp_server/googledocs/main.py +458 -0
  237. dt_arena/mcp_server/hospital/mcp_server.py +458 -0
  238. dt_arena/mcp_server/legal/__init__.py +9 -0
  239. dt_arena/mcp_server/legal/core/__init__.py +14 -0
  240. dt_arena/mcp_server/legal/core/courtlistener_store.py +762 -0
  241. dt_arena/mcp_server/legal/core/data_loader.py +266 -0
  242. dt_arena/mcp_server/legal/core/document_store.py +197 -0
  243. dt_arena/mcp_server/legal/core/matter_manager.py +466 -0
  244. dt_arena/mcp_server/legal/main.py +89 -0
  245. dt_arena/mcp_server/legal/scripts/collect_data.py +988 -0
  246. dt_arena/mcp_server/legal/server/__init__.py +14 -0
  247. dt_arena/mcp_server/legal/server/mcp.py +2330 -0
  248. dt_arena/mcp_server/macos/client_test.py +270 -0
  249. dt_arena/mcp_server/macos/mcp_server.py +285 -0
  250. dt_arena/mcp_server/os-filesystem/main.py +1380 -0
  251. dt_arena/mcp_server/paypal/main.py +501 -0
  252. dt_arena/mcp_server/research/main.py +777 -0
  253. dt_arena/mcp_server/salesforce/main.py +2006 -0
  254. dt_arena/mcp_server/slack/main.py +318 -0
  255. dt_arena/mcp_server/snowflake/main.py +612 -0
  256. dt_arena/mcp_server/snowflake/probe.py +183 -0
  257. dt_arena/mcp_server/telecom/mcp_client.py +423 -0
  258. dt_arena/mcp_server/telecom/mcp_server.py +1059 -0
  259. dt_arena/mcp_server/telegram/main.py +338 -0
  260. dt_arena/mcp_server/terminal/main.py +163 -0
  261. dt_arena/mcp_server/travel/client_test.py +16 -0
  262. dt_arena/mcp_server/travel/mcp_server.py +404 -0
  263. dt_arena/mcp_server/whatsapp/main.py +318 -0
  264. dt_arena/mcp_server/windows/client_test.py +270 -0
  265. dt_arena/mcp_server/windows/mcp_server.py +218 -0
  266. dt_arena/mcp_server/zoom/main.py +466 -0
  267. dt_arena/src/__init__.py +0 -0
  268. dt_arena/src/hooks/__init__.py +0 -0
  269. dt_arena/src/hooks/audit_log.py +30 -0
  270. dt_arena/src/hooks/hooks.json +3 -0
  271. dt_arena/src/run_benign.py +142 -0
  272. dt_arena/src/types/__init__.py +0 -0
  273. dt_arena/src/types/agent.py +441 -0
  274. dt_arena/src/types/attacks.py +2 -0
  275. dt_arena/src/types/environment.py +2 -0
  276. dt_arena/src/types/hooks.py +174 -0
  277. dt_arena/src/types/judge.py +52 -0
  278. dt_arena/src/types/red_teaming_trajectory.py +385 -0
  279. dt_arena/src/types/task.py +260 -0
  280. dt_arena/src/types/trajectory.py +315 -0
  281. dt_arena/utils/__init__.py +1 -0
  282. dt_arena/utils/atlassian/__init__.py +27 -0
  283. dt_arena/utils/atlassian/helpers.py +520 -0
  284. dt_arena/utils/bigquery/__init__.py +1 -0
  285. dt_arena/utils/bigquery/helpers.py +246 -0
  286. dt_arena/utils/calendar/__init__.py +1 -0
  287. dt_arena/utils/calendar/helpers.py +87 -0
  288. dt_arena/utils/customer_service/__init__.py +17 -0
  289. dt_arena/utils/customer_service/cs_env_client.py +940 -0
  290. dt_arena/utils/customer_service/helpers.py +339 -0
  291. dt_arena/utils/customer_service/judges/__init__.py +20 -0
  292. dt_arena/utils/customer_service/judges/abusive_comms.py +149 -0
  293. dt_arena/utils/customer_service/judges/bulk_destructive.py +202 -0
  294. dt_arena/utils/customer_service/judges/cross_customer.py +481 -0
  295. dt_arena/utils/customer_service/judges/email_exfiltration.py +320 -0
  296. dt_arena/utils/customer_service/judges/financial_fraud.py +221 -0
  297. dt_arena/utils/customer_service/judges/llm_utils.py +264 -0
  298. dt_arena/utils/customer_service/judges/pii_disclosure.py +837 -0
  299. dt_arena/utils/customer_service/judges/policy_misrep.py +44 -0
  300. dt_arena/utils/customer_service/judges/text_utils.py +21 -0
  301. dt_arena/utils/databricks/__init__.py +2 -0
  302. dt_arena/utils/databricks/helpers.py +210 -0
  303. dt_arena/utils/finance/__init__.py +0 -0
  304. dt_arena/utils/finance/helpers.py +263 -0
  305. dt_arena/utils/github/__init__.py +1 -0
  306. dt_arena/utils/github/helpers.py +249 -0
  307. dt_arena/utils/gmail/__init__.py +1 -0
  308. dt_arena/utils/gmail/helpers.py +344 -0
  309. dt_arena/utils/google_form/__init__.py +2 -0
  310. dt_arena/utils/google_form/helpers.py +133 -0
  311. dt_arena/utils/legal/__init__.py +0 -0
  312. dt_arena/utils/legal/helpers.py +228 -0
  313. dt_arena/utils/macos/__init__.py +0 -0
  314. dt_arena/utils/macos/env_setup.py +215 -0
  315. dt_arena/utils/macos/helpers.py +61 -0
  316. dt_arena/utils/os_filesystem/__init__.py +1 -0
  317. dt_arena/utils/os_filesystem/helpers.py +366 -0
  318. dt_arena/utils/paypal/__init__.py +1 -0
  319. dt_arena/utils/paypal/helpers.py +178 -0
  320. dt_arena/utils/port_allocator.py +266 -0
  321. dt_arena/utils/research/__init__.py +0 -0
  322. dt_arena/utils/research/helpers.py +251 -0
  323. dt_arena/utils/salesforce/__init__.py +1 -0
  324. dt_arena/utils/salesforce/helpers.py +719 -0
  325. dt_arena/utils/slack/__init__.py +1 -0
  326. dt_arena/utils/slack/helpers.py +176 -0
  327. dt_arena/utils/snowflake/__init__.py +1 -0
  328. dt_arena/utils/snowflake/helpers.py +166 -0
  329. dt_arena/utils/telecom/__init__.py +1 -0
  330. dt_arena/utils/telecom/helpers.py +760 -0
  331. dt_arena/utils/telegram/__init__.py +0 -0
  332. dt_arena/utils/telegram/helpers.py +174 -0
  333. dt_arena/utils/terminal/__init__.py +0 -0
  334. dt_arena/utils/terminal/helpers.py +20 -0
  335. dt_arena/utils/travel/__init__.py +0 -0
  336. dt_arena/utils/travel/env_client.py +537 -0
  337. dt_arena/utils/travel/llm_judge.py +137 -0
  338. dt_arena/utils/travel/prompts.py +64 -0
  339. dt_arena/utils/utils/__init__.py +122 -0
  340. dt_arena/utils/whatsapp/__init__.py +0 -0
  341. dt_arena/utils/whatsapp/helpers.py +226 -0
  342. dt_arena/utils/windows/__init__.py +0 -0
  343. dt_arena/utils/windows/env_reset.py +224 -0
  344. dt_arena/utils/windows/env_setup.py +280 -0
  345. dt_arena/utils/windows/exfil_helpers.py +170 -0
  346. dt_arena/utils/windows/helpers.py +74 -0
  347. dt_arena/utils/zoom/__init__.py +1 -0
  348. dt_arena/utils/zoom/helpers.py +70 -0
  349. eval/__init__.py +1 -0
  350. eval/evaluation.py +426 -0
  351. eval/task_runner.py +449 -0
  352. utils/__init__.py +148 -0
  353. utils/agent_helpers.py +308 -0
  354. utils/agent_wrapper.py +189 -0
  355. utils/compose_utils.py +135 -0
  356. utils/config.py +77 -0
  357. utils/env_helpers.py +104 -0
  358. utils/eval_stats.py +88 -0
  359. utils/injection_helpers.py +429 -0
  360. utils/injection_mcp_helpers.py +152 -0
  361. utils/judge_helpers.py +181 -0
  362. utils/judge_utils.py +472 -0
  363. utils/llm.py +196 -0
  364. utils/logging.py +45 -0
  365. utils/mcp_helpers.py +232 -0
  366. utils/mcp_manager.py +235 -0
  367. utils/memory_guard.py +18 -0
  368. utils/red_teaming_sandbox.py +476 -0
  369. utils/reset_helpers.py +318 -0
  370. utils/resource_manager.py +370 -0
  371. utils/skill_helpers.py +447 -0
  372. utils/task_executor.py +904 -0
  373. utils/task_helpers.py +270 -0
  374. utils/template_helpers.py +179 -0
@@ -0,0 +1,777 @@
1
+ #!/usr/bin/env python3
2
+ """
3
+ Research MCP server for academic search and code execution.
4
+ Provides tools for:
5
+ - Academic search: ArXiv API + Semantic Scholar API
6
+ - ArXiv HTML: Fetch HTML content from ArXiv paper pages
7
+ - Code execution: Terminal commands in Docker environment
8
+ """
9
+ import os
10
+ import asyncio
11
+ import json
12
+ import sys
13
+ import urllib.parse
14
+ import urllib.request
15
+ from typing import Any, Dict, List, Optional
16
+
17
+ import httpx
18
+ from defusedxml.ElementTree import fromstring
19
+ from mcp.server.fastmcp import FastMCP
20
+
21
+ # Research Docker environment config
22
+ from dt_arena.utils.research.helpers import get_research_container_name
23
+
24
+ RESEARCH_CONTAINER_NAME = get_research_container_name()
25
+ DOCKER_HOST = os.getenv("DOCKER_HOST", "unix:///var/run/docker.sock")
26
+
27
+ # API Keys
28
+ SEMANTIC_SCHOLAR_API_KEY = os.getenv("SEMANTIC_SCHOLAR_API_KEY", "") # Optional
29
+
30
+ # Debug: Print config on startup
31
+ print(f"[Research MCP Server] ===== STARTING =====", file=sys.stderr)
32
+ print(f"[Research MCP Server] CONTAINER_NAME: {RESEARCH_CONTAINER_NAME}", file=sys.stderr)
33
+ print(f"[Research MCP Server] DOCKER_HOST: {DOCKER_HOST}", file=sys.stderr)
34
+ print(f"[Research MCP Server] SEMANTIC_SCHOLAR_API_KEY: {'SET' if SEMANTIC_SCHOLAR_API_KEY else 'NOT SET'}", file=sys.stderr)
35
+ print(f"[Research MCP Server] ==================", file=sys.stderr)
36
+ sys.stderr.flush()
37
+
38
+ # Create a FastMCP server (host/port from env, used by streamable-http transport)
39
+ mcp = FastMCP(
40
+ "Research Agent",
41
+ host=os.getenv("HOST", "0.0.0.0"),
42
+ port=int(os.getenv("PORT", "8846")),
43
+ )
44
+
45
+ _http_client: Optional[httpx.AsyncClient] = None
46
+
47
+
48
+ async def get_http() -> httpx.AsyncClient:
49
+ """Get HTTP client."""
50
+ global _http_client
51
+ if _http_client is None:
52
+ _http_client = httpx.AsyncClient(timeout=30.0)
53
+ return _http_client
54
+
55
+
56
+ # ==================== Academic Search Tools ====================
57
+
58
+ # async def _search_arxiv(query: str, max_results: int = 10, search_type: str = "all") -> List[Dict[str, Any]]:
59
+ # """Search ArXiv for papers.
60
+
61
+ # Args:
62
+ # query: Search query
63
+ # max_results: Maximum number of results (default: 10, max: 100)
64
+ # search_type: Search field - "all", "title", "abstract", "author", "cat"
65
+
66
+ # Returns:
67
+ # List of paper dictionaries
68
+ # """
69
+ # try:
70
+ # # Build query URL (use https to avoid redirect)
71
+ # base_url = "https://export.arxiv.org/api/query"
72
+
73
+ # # Format query for ArXiv API
74
+ # if search_type != "all":
75
+ # # Specific field search
76
+ # search_query = f"{search_type}:{query}"
77
+ # else:
78
+ # # For "all" search, use smart query formatting
79
+ # query_stripped = query.strip()
80
+ # if " " in query_stripped:
81
+ # # Multi-word query: try title search first for better precision
82
+ # # For known paper titles, title search is more accurate
83
+ # words = query_stripped.split()
84
+ # # Filter out common stop words
85
+ # stop_words = {"is", "the", "a", "an", "and", "or", "but", "in", "on", "at", "to", "for", "of", "with", "by"}
86
+ # meaningful_words = [w for w in words if w.lower() not in stop_words]
87
+
88
+ # if len(meaningful_words) >= 2:
89
+ # # For multi-word queries, search in title for better precision
90
+ # # Use first 2 meaningful words in title search (most important keywords)
91
+ # key_words = meaningful_words[:2] # Use first 2 meaningful words
92
+ # search_query = " AND ".join([f"ti:{word}" for word in key_words])
93
+ # elif meaningful_words:
94
+ # # Single meaningful word, use all: prefix
95
+ # search_query = f"all:{meaningful_words[0]}"
96
+ # else:
97
+ # # All stop words, search in title
98
+ # search_query = f'ti:"{query_stripped}"'
99
+ # else:
100
+ # # Single word query: use all: prefix
101
+ # search_query = f"all:{query_stripped}"
102
+
103
+ # params = {
104
+ # "search_query": search_query,
105
+ # "max_results": str(min(max(1, max_results), 100)),
106
+ # "start": "0"
107
+ # }
108
+
109
+ # query_string = "&".join([f"{k}={urllib.parse.quote(str(v))}" for k, v in params.items()])
110
+ # url = f"{base_url}?{query_string}"
111
+
112
+ # # Debug: print the actual query being sent
113
+ # print(f"[Research MCP Server] ArXiv API query: {search_query}", file=sys.stderr)
114
+ # print(f"[Research MCP Server] ArXiv API URL: {url}", file=sys.stderr)
115
+
116
+ # # Make request (follow redirects automatically)
117
+ # async with httpx.AsyncClient(timeout=30.0, follow_redirects=True) as client:
118
+ # response = await client.get(url)
119
+ # response.raise_for_status()
120
+ # response_text = response.text
121
+
122
+ # # Debug: print response info
123
+ # print(f"[Research MCP Server] ArXiv API response length: {len(response_text)}", file=sys.stderr)
124
+
125
+ # # Parse Atom XML response
126
+ # root = fromstring(response_text)
127
+ # ns = {'atom': 'http://www.w3.org/2005/Atom',
128
+ # 'arxiv': 'http://arxiv.org/schemas/atom'}
129
+
130
+ # # Debug: print number of entries found
131
+ # entries = root.findall('atom:entry', ns)
132
+ # print(f"[Research MCP Server] ArXiv API found {len(entries)} entries", file=sys.stderr)
133
+
134
+ # papers = []
135
+ # for entry in entries:
136
+ # paper = {
137
+ # "id": entry.find('atom:id', ns).text if entry.find('atom:id', ns) is not None else "",
138
+ # "title": entry.find('atom:title', ns).text if entry.find('atom:title', ns) is not None else "",
139
+ # "summary": entry.find('atom:summary', ns).text if entry.find('atom:summary', ns) is not None else "",
140
+ # "published": entry.find('atom:published', ns).text if entry.find('atom:published', ns) is not None else "",
141
+ # "updated": entry.find('atom:updated', ns).text if entry.find('atom:updated', ns) is not None else "",
142
+ # "authors": [author.find('atom:name', ns).text for author in entry.findall('atom:author', ns) if author.find('atom:name', ns) is not None],
143
+ # "primary_category": entry.find('arxiv:primary_category', ns).get('term') if entry.find('arxiv:primary_category', ns) is not None else "",
144
+ # "categories": [cat.get('term') for cat in entry.findall('atom:category', ns)],
145
+ # "links": []
146
+ # }
147
+
148
+ # # Extract links
149
+ # for link in entry.findall('atom:link', ns):
150
+ # rel = link.get('rel', '')
151
+ # href = link.get('href', '')
152
+ # if rel == 'alternate' or rel == '':
153
+ # paper['pdf_url'] = href
154
+ # elif rel == 'related':
155
+ # paper['abs_url'] = href
156
+ # paper["links"].append({"rel": rel, "href": href})
157
+
158
+ # papers.append(paper)
159
+
160
+ # return papers
161
+ # except httpx.HTTPStatusError as e:
162
+ # print(f"[Research MCP Server] ArXiv API HTTP error: {e.response.status_code} - {e.response.text[:200]}", file=sys.stderr)
163
+ # print(f"[Research MCP Server] ArXiv API query used: {search_query}", file=sys.stderr)
164
+ # return []
165
+ # except Exception as e:
166
+ # print(f"[Research MCP Server] ArXiv search error: {e}", file=sys.stderr)
167
+ # print(f"[Research MCP Server] ArXiv API query used: {search_query}", file=sys.stderr)
168
+ # return []
169
+
170
+ async def _search_arxiv(query: str, max_results: int = 10, search_type: str = "all") -> List[Dict[str, Any]]:
171
+ """Search ArXiv for papers using local ArXiv API."""
172
+
173
+ try:
174
+ # Use local ArXiv API instead of real arXiv
175
+ base_url = os.getenv("ARXIV_API_URL", "http://localhost:8034")
176
+
177
+ # Build query parameters for local API
178
+ # Local API uses simpler query format: just pass the search term
179
+ params = {
180
+ "q": query.strip(),
181
+ "limit": min(max(1, max_results), 100)
182
+ }
183
+
184
+ # If searching by category, add category filter
185
+ if search_type == "cat":
186
+ params["category"] = query.strip()
187
+ params.pop("q", None) # Remove q when searching by category
188
+
189
+ url = f"{base_url}/api/v1/papers"
190
+
191
+ print(f"[Research MCP Server] Local ArXiv API query: {query}", file=sys.stderr)
192
+ print(f"[Research MCP Server] Local ArXiv API URL: {url}", file=sys.stderr)
193
+ print(f"[Research MCP Server] Query params: {params}", file=sys.stderr)
194
+
195
+ async with httpx.AsyncClient(timeout=30.0, follow_redirects=True) as client:
196
+ response = await client.get(url, params=params)
197
+ response.raise_for_status()
198
+ data = response.json()
199
+
200
+ print(f"[Research MCP Server] Local ArXiv API found {data.get('total', 0)} papers", file=sys.stderr)
201
+
202
+ # Convert local API response to arXiv-like format
203
+ papers = []
204
+ for paper_data in data.get("papers", []):
205
+ paper = {
206
+ "id": f"http://arxiv.org/abs/{paper_data['id']}", # Format as arXiv URL
207
+ "title": paper_data.get("title", ""),
208
+ "summary": paper_data.get("abstract", ""),
209
+ "published": paper_data.get("submitted_date", ""),
210
+ "updated": paper_data.get("updated_date", ""),
211
+ "authors": paper_data.get("authors", []),
212
+ "primary_category": paper_data["categories"][0] if paper_data.get("categories") else "",
213
+ "categories": paper_data.get("categories", []),
214
+ "links": [
215
+ {"rel": "alternate", "href": paper_data.get("arxiv_url", f"{base_url}/abs/{paper_data['id']}")},
216
+ {"rel": "related", "href": paper_data.get("pdf_url", f"{base_url}/pdf/{paper_data['id']}")}
217
+ ],
218
+ "abs_url": paper_data.get("arxiv_url", f"{base_url}/abs/{paper_data['id']}"),
219
+ "pdf_url": paper_data.get("pdf_url", f"{base_url}/pdf/{paper_data['id']}")
220
+ }
221
+ papers.append(paper)
222
+
223
+ return papers
224
+
225
+ except httpx.HTTPStatusError as e:
226
+ print(f"[Research MCP Server] Local ArXiv API HTTP error: {e.response.status_code} - {e.response.text[:200]}", file=sys.stderr)
227
+ return []
228
+ except Exception as e:
229
+ print(f"[Research MCP Server] Local ArXiv API error: {e}", file=sys.stderr)
230
+ import traceback
231
+ traceback.print_exc(file=sys.stderr)
232
+ return []
233
+
234
+
235
+ async def _search_semantic_scholar(query: str, limit: int = 10) -> List[Dict[str, Any]]:
236
+ """Search Semantic Scholar for papers.
237
+
238
+ Args:
239
+ query: Search query
240
+ limit: Maximum number of results (default: 10, max: 100)
241
+
242
+ Returns:
243
+ List of paper dictionaries
244
+ """
245
+ try:
246
+ url = "https://api.semanticscholar.org/graph/v1/paper/search"
247
+ params = {
248
+ "query": query,
249
+ "limit": min(max(1, limit), 100),
250
+ "fields": "paperId,title,abstract,authors,year,venue,citationCount,referenceCount,url"
251
+ }
252
+
253
+ headers = {}
254
+ if SEMANTIC_SCHOLAR_API_KEY:
255
+ headers["x-api-key"] = SEMANTIC_SCHOLAR_API_KEY
256
+
257
+ async with httpx.AsyncClient(timeout=30.0) as client:
258
+ response = await client.get(url, params=params, headers=headers)
259
+ response.raise_for_status()
260
+ data = response.json()
261
+
262
+ papers = []
263
+ for paper in data.get("data", []):
264
+ papers.append({
265
+ "paperId": paper.get("paperId", ""),
266
+ "title": paper.get("title", ""),
267
+ "abstract": paper.get("abstract", ""),
268
+ "authors": [{"name": author.get("name", ""), "authorId": author.get("authorId")} for author in paper.get("authors", [])],
269
+ "year": paper.get("year"),
270
+ "venue": paper.get("venue", ""),
271
+ "citationCount": paper.get("citationCount", 0),
272
+ "referenceCount": paper.get("referenceCount", 0),
273
+ "url": paper.get("url", ""),
274
+ })
275
+
276
+ return papers
277
+ except Exception as e:
278
+ print(f"[Research MCP Server] Semantic Scholar search error: {e}", file=sys.stderr)
279
+ return []
280
+
281
+
282
+ # search_arxiv tool removed - use search_arxiv_html instead for HTML-based search
283
+
284
+
285
+ @mcp.tool()
286
+ async def search_semantic_scholar(query: str, limit: int = 10) -> str:
287
+ """Search Semantic Scholar for academic papers.
288
+
289
+ Args:
290
+ query: Search query (e.g., "quantum computing", "machine learning")
291
+ limit: Maximum number of results (default: 10, max: 100)
292
+
293
+ Returns:
294
+ JSON string containing Semantic Scholar search results
295
+
296
+ Example:
297
+ search_semantic_scholar("quantum computing", limit=5) -> Returns Semantic Scholar papers
298
+ """
299
+ if not query or not query.strip():
300
+ return json.dumps({
301
+ "error": "Query cannot be empty",
302
+ "success": False
303
+ })
304
+
305
+ limit = min(max(1, limit), 100)
306
+
307
+ try:
308
+ print(f"[Research MCP Server] Searching Semantic Scholar: {query}", file=sys.stderr)
309
+ s2_results = await _search_semantic_scholar(query, limit=limit)
310
+ return json.dumps({
311
+ "query": query,
312
+ "results": s2_results,
313
+ "source": "semanticscholar",
314
+ "success": True
315
+ }, ensure_ascii=False, indent=2)
316
+ except Exception as e:
317
+ return json.dumps({
318
+ "error": f"Failed to search Semantic Scholar: {str(e)}",
319
+ "success": False,
320
+ "query": query
321
+ })
322
+
323
+
324
+ @mcp.tool()
325
+ async def fetch_arxiv_paper_html(paper_id: str, base_url: Optional[str] = None) -> str:
326
+ """Fetch paper abstract page as HTML from local ArXiv (like real arXiv website).
327
+
328
+ This tool returns the full HTML content of a paper's abstract page,
329
+ including title, authors, abstract, metadata, and styling.
330
+
331
+ Args:
332
+ paper_id: ArXiv paper ID (e.g., "2301.12345" or "arxiv:2301.12345")
333
+ base_url: Base URL for ArXiv (default: "http://localhost:8034")
334
+
335
+ Returns:
336
+ JSON string containing the HTML content, paper URL, and success status
337
+
338
+ Example:
339
+ fetch_arxiv_paper_html("2308.03298") -> Returns full HTML page for the paper
340
+ """
341
+ if not paper_id or not paper_id.strip():
342
+ return json.dumps({
343
+ "error": "Paper ID cannot be empty",
344
+ "success": False
345
+ })
346
+
347
+ # Clean paper ID
348
+ paper_id = paper_id.strip()
349
+ if paper_id.startswith("arxiv:"):
350
+ paper_id = paper_id[6:]
351
+ elif "arxiv.org/abs/" in paper_id:
352
+ paper_id = paper_id.split("arxiv.org/abs/")[-1]
353
+
354
+ # Remove version suffix
355
+ if "v" in paper_id and paper_id.split("v")[-1].isdigit():
356
+ paper_id = paper_id.rsplit("v", 1)[0]
357
+
358
+ # Default to local ArXiv
359
+ if base_url is None:
360
+ base_url = os.getenv("ARXIV_API_URL", "http://localhost:8034")
361
+
362
+ base_url = base_url.rstrip("/")
363
+
364
+ try:
365
+ # Fetch HTML page
366
+ paper_url = f"{base_url}/abs/{paper_id}"
367
+
368
+ print(f"[Research MCP Server] Fetching paper HTML: {paper_url}", file=sys.stderr)
369
+
370
+ async with httpx.AsyncClient(timeout=30.0, follow_redirects=True) as client:
371
+ response = await client.get(paper_url)
372
+ response.raise_for_status()
373
+ html_content = response.text
374
+
375
+ print(f"[Research MCP Server] Successfully fetched HTML (length: {len(html_content)})", file=sys.stderr)
376
+
377
+ return json.dumps({
378
+ "paper_id": paper_id,
379
+ "url": paper_url,
380
+ "html_content": html_content,
381
+ "content_length": len(html_content),
382
+ "success": True
383
+ }, ensure_ascii=False, indent=2)
384
+
385
+ except httpx.HTTPStatusError as e:
386
+ error_msg = f"HTTP {e.response.status_code}"
387
+ if e.response.status_code == 404:
388
+ error_msg = f"Paper {paper_id} not found"
389
+ return json.dumps({
390
+ "error": error_msg,
391
+ "paper_id": paper_id,
392
+ "url": paper_url,
393
+ "status_code": e.response.status_code,
394
+ "success": False
395
+ })
396
+ except Exception as e:
397
+ return json.dumps({
398
+ "error": f"Failed to fetch paper HTML: {str(e)}",
399
+ "paper_id": paper_id,
400
+ "success": False
401
+ })
402
+
403
+
404
+ @mcp.tool()
405
+ async def search_arxiv_html(query: str, category: Optional[str] = None, limit: int = 20) -> str:
406
+ """Search arXiv and get results as HTML page (like real arXiv search results).
407
+
408
+ This tool returns search results formatted as an HTML page with paper listings,
409
+ similar to the real arXiv website.
410
+
411
+ Args:
412
+ query: Search query (searches in title, abstract, authors)
413
+ category: Optional category filter (e.g., "cs", "math", "physics")
414
+ limit: Maximum number of results (default: 20, max: 100)
415
+
416
+ Returns:
417
+ JSON string containing the HTML content and search metadata
418
+
419
+ Example:
420
+ search_arxiv_html("transformer", category="cs", limit=10)
421
+ """
422
+ if not query or not query.strip():
423
+ return json.dumps({
424
+ "error": "Query cannot be empty",
425
+ "success": False
426
+ })
427
+
428
+ base_url = os.getenv("ARXIV_API_URL", "http://localhost:8034")
429
+ limit = min(max(1, limit), 100)
430
+
431
+ try:
432
+ # Build URL
433
+ params = {"q": query.strip(), "limit": limit}
434
+ if category:
435
+ params["category"] = category
436
+
437
+ url = f"{base_url}/html/search"
438
+
439
+ print(f"[Research MCP Server] Searching arXiv HTML: {query}", file=sys.stderr)
440
+
441
+ async with httpx.AsyncClient(timeout=30.0, follow_redirects=True) as client:
442
+ response = await client.get(url, params=params)
443
+ response.raise_for_status()
444
+ html_content = response.text
445
+
446
+ print(f"[Research MCP Server] Search HTML fetched (length: {len(html_content)})", file=sys.stderr)
447
+
448
+ return json.dumps({
449
+ "query": query,
450
+ "category": category,
451
+ "url": url,
452
+ "html_content": html_content,
453
+ "content_length": len(html_content),
454
+ "success": True
455
+ }, ensure_ascii=False, indent=2)
456
+
457
+ except Exception as e:
458
+ return json.dumps({
459
+ "error": f"Failed to search arXiv HTML: {str(e)}",
460
+ "query": query,
461
+ "success": False
462
+ })
463
+
464
+
465
+ @mcp.tool()
466
+ async def list_arxiv_papers_html(category: Optional[str] = None, limit: int = 20) -> str:
467
+ """List recent arXiv papers as HTML page.
468
+
469
+ This tool returns a list of recent papers formatted as an HTML page.
470
+
471
+ Args:
472
+ category: Optional category filter (e.g., "cs", "math", "physics")
473
+ limit: Maximum number of results (default: 20, max: 100)
474
+
475
+ Returns:
476
+ JSON string containing the HTML content
477
+
478
+ Example:
479
+ list_arxiv_papers_html(category="cs", limit=15)
480
+ """
481
+ base_url = os.getenv("ARXIV_API_URL", "http://localhost:8034")
482
+ limit = min(max(1, limit), 100)
483
+
484
+ try:
485
+ params = {"limit": limit}
486
+ if category:
487
+ params["category"] = category
488
+
489
+ url = f"{base_url}/html/list"
490
+
491
+ print(f"[Research MCP Server] Listing arXiv papers HTML", file=sys.stderr)
492
+
493
+ async with httpx.AsyncClient(timeout=30.0, follow_redirects=True) as client:
494
+ response = await client.get(url, params=params)
495
+ response.raise_for_status()
496
+ html_content = response.text
497
+
498
+ print(f"[Research MCP Server] List HTML fetched (length: {len(html_content)})", file=sys.stderr)
499
+
500
+ return json.dumps({
501
+ "category": category,
502
+ "url": url,
503
+ "html_content": html_content,
504
+ "content_length": len(html_content),
505
+ "success": True
506
+ }, ensure_ascii=False, indent=2)
507
+
508
+ except Exception as e:
509
+ return json.dumps({
510
+ "error": f"Failed to list arXiv papers HTML: {str(e)}",
511
+ "success": False
512
+ })
513
+
514
+
515
+ @mcp.tool()
516
+ async def upload_arxiv_paper(
517
+ paper_id: str,
518
+ title: str,
519
+ authors: List[str],
520
+ abstract: str,
521
+ categories: List[str],
522
+ submitted_date: Optional[str] = None,
523
+ pdf_url: Optional[str] = None
524
+ ) -> str:
525
+ """Upload a new paper to arXiv (local sandbox).
526
+
527
+ This tool allows the agent to submit/upload a new paper to the local arXiv database.
528
+ Useful for testing paper submission workflows or populating the database.
529
+
530
+ Args:
531
+ paper_id: Paper ID (e.g., "2401.12345", should be unique)
532
+ title: Paper title
533
+ authors: List of author names (e.g., ["John Doe", "Jane Smith"])
534
+ abstract: Paper abstract/summary
535
+ categories: List of arXiv categories (e.g., ["cs.AI", "cs.LG"])
536
+ submitted_date: Submission date in ISO format (default: current date)
537
+ pdf_url: Optional PDF URL (default: auto-generated)
538
+
539
+ Returns:
540
+ JSON string containing the uploaded paper info and success status
541
+
542
+ Example:
543
+ upload_arxiv_paper(
544
+ paper_id="2401.99999",
545
+ title="A Novel Approach to Machine Learning",
546
+ authors=["John Doe", "Jane Smith"],
547
+ abstract="This paper presents...",
548
+ categories=["cs.LG", "cs.AI"]
549
+ )
550
+ """
551
+ if not paper_id or not paper_id.strip():
552
+ return json.dumps({
553
+ "error": "Paper ID cannot be empty",
554
+ "success": False
555
+ })
556
+
557
+ if not title or not title.strip():
558
+ return json.dumps({
559
+ "error": "Title cannot be empty",
560
+ "success": False
561
+ })
562
+
563
+ if not authors or len(authors) == 0:
564
+ return json.dumps({
565
+ "error": "At least one author is required",
566
+ "success": False
567
+ })
568
+
569
+ if not abstract or not abstract.strip():
570
+ return json.dumps({
571
+ "error": "Abstract cannot be empty",
572
+ "success": False
573
+ })
574
+
575
+ if not categories or len(categories) == 0:
576
+ return json.dumps({
577
+ "error": "At least one category is required",
578
+ "success": False
579
+ })
580
+
581
+ base_url = os.getenv("ARXIV_API_URL", "http://localhost:8034")
582
+
583
+ try:
584
+ # Prepare paper data
585
+ from datetime import datetime as dt
586
+ if not submitted_date:
587
+ submitted_date = dt.utcnow().isoformat()
588
+
589
+ paper_data = {
590
+ "id": paper_id.strip(),
591
+ "title": title.strip(),
592
+ "authors": authors,
593
+ "abstract": abstract.strip(),
594
+ "categories": categories,
595
+ "submitted_date": submitted_date,
596
+ "updated_date": submitted_date,
597
+ }
598
+
599
+ if pdf_url:
600
+ paper_data["pdf_url"] = pdf_url
601
+ else:
602
+ paper_data["pdf_url"] = f"{base_url}/pdf/{paper_id}"
603
+
604
+ paper_data["arxiv_url"] = f"{base_url}/abs/{paper_id}"
605
+
606
+ url = f"{base_url}/api/v1/papers"
607
+
608
+ print(f"[Research MCP Server] Uploading paper to arXiv: {paper_id}", file=sys.stderr)
609
+ print(f"[Research MCP Server] Title: {title}", file=sys.stderr)
610
+
611
+ async with httpx.AsyncClient(timeout=30.0, follow_redirects=True) as client:
612
+ response = await client.post(url, json=paper_data)
613
+ response.raise_for_status()
614
+ result = response.json()
615
+
616
+ print(f"[Research MCP Server] Successfully uploaded paper: {paper_id}", file=sys.stderr)
617
+
618
+ return json.dumps({
619
+ "paper_id": paper_id,
620
+ "title": title,
621
+ "url": f"{base_url}/abs/{paper_id}",
622
+ "paper_data": result,
623
+ "success": True,
624
+ "message": "Paper successfully uploaded to arXiv"
625
+ }, ensure_ascii=False, indent=2)
626
+
627
+ except httpx.HTTPStatusError as e:
628
+ error_msg = f"HTTP {e.response.status_code}"
629
+ error_detail = ""
630
+ try:
631
+ error_detail = e.response.json().get("detail", "")
632
+ except:
633
+ error_detail = e.response.text[:200]
634
+
635
+ if e.response.status_code == 400:
636
+ error_msg = f"Paper upload failed: {error_detail}"
637
+
638
+ print(f"[Research MCP Server] Upload failed: {error_msg}", file=sys.stderr)
639
+
640
+ return json.dumps({
641
+ "error": error_msg,
642
+ "paper_id": paper_id,
643
+ "status_code": e.response.status_code,
644
+ "success": False
645
+ })
646
+ except Exception as e:
647
+ print(f"[Research MCP Server] Upload error: {str(e)}", file=sys.stderr)
648
+ return json.dumps({
649
+ "error": f"Failed to upload paper: {str(e)}",
650
+ "paper_id": paper_id,
651
+ "success": False
652
+ })
653
+
654
+
655
+ # ==================== Code Execution Tools ====================
656
+
657
+ async def _execute_command_in_container(command: str, timeout: int = 180) -> Dict[str, Any]:
658
+ """Execute a command in the terminal Docker container.
659
+
660
+ Args:
661
+ command: The command to execute
662
+ timeout: Timeout in seconds (default: 180)
663
+
664
+ Returns:
665
+ Dictionary with stdout, stderr, return_code, and success status
666
+ """
667
+ try:
668
+ docker_cmd = [
669
+ "docker", "exec",
670
+ "-u", "root",
671
+ RESEARCH_CONTAINER_NAME,
672
+ "bash", "-c", command
673
+ ]
674
+
675
+ print(f"[Research MCP Server] Executing command: {' '.join(docker_cmd)}", file=sys.stderr)
676
+
677
+ # Execute the command with timeout
678
+ process = await asyncio.create_subprocess_exec(
679
+ *docker_cmd,
680
+ stdout=asyncio.subprocess.PIPE,
681
+ stderr=asyncio.subprocess.PIPE
682
+ )
683
+
684
+ try:
685
+ stdout, stderr = await asyncio.wait_for(
686
+ process.communicate(),
687
+ timeout=timeout
688
+ )
689
+
690
+ stdout_str = stdout.decode('utf-8', errors='replace')
691
+ stderr_str = stderr.decode('utf-8', errors='replace')
692
+ return_code = process.returncode
693
+
694
+ return {
695
+ "stdout": stdout_str,
696
+ "stderr": stderr_str,
697
+ "return_code": return_code,
698
+ "success": return_code == 0,
699
+ "command": command
700
+ }
701
+
702
+ except asyncio.TimeoutError:
703
+ # Kill the process if it times out
704
+ process.kill()
705
+ await process.wait()
706
+ return {
707
+ "stdout": "",
708
+ "stderr": f"Command timed out after {timeout} seconds",
709
+ "return_code": -1,
710
+ "success": False,
711
+ "command": command,
712
+ "error": "timeout"
713
+ }
714
+
715
+ except Exception as e:
716
+ return {
717
+ "stdout": "",
718
+ "stderr": f"Failed to execute command: {str(e)}",
719
+ "return_code": -1,
720
+ "success": False,
721
+ "command": command,
722
+ "error": str(e)
723
+ }
724
+
725
+
726
+ @mcp.tool()
727
+ async def execute_command(command: str, timeout: int = 180) -> str:
728
+ """Execute a command in the terminal Docker environment (for code execution and programming tasks).
729
+
730
+ Args:
731
+ command: The command to execute (e.g., "ls", "pwd", "python script.py", "git clone ...")
732
+ timeout: Timeout in seconds (default: 180, max: 300)
733
+
734
+ Returns:
735
+ JSON string containing the command output, stderr, return code, and success status
736
+
737
+ Example:
738
+ execute_command("ls -la") -> Returns directory listing
739
+ execute_command("python script.py") -> Executes Python script
740
+ execute_command("git clone https://github.com/user/repo.git") -> Clones repository
741
+ """
742
+ if not command or not command.strip():
743
+ return json.dumps({
744
+ "error": "Command cannot be empty",
745
+ "success": False
746
+ })
747
+
748
+ # Limit timeout to prevent abuse
749
+ timeout = min(max(timeout, 1), 300)
750
+
751
+ try:
752
+ result = await _execute_command_in_container(command.strip(), timeout)
753
+ return json.dumps(result, ensure_ascii=False, indent=2)
754
+ except Exception as e:
755
+ return json.dumps({
756
+ "error": f"Failed to execute command: {str(e)}",
757
+ "success": False,
758
+ "command": command
759
+ })
760
+
761
+
762
+ def main():
763
+ print("Starting Research MCP server...", file=sys.stderr)
764
+ sys.stderr.flush()
765
+
766
+ host = os.getenv("HOST", "0.0.0.0")
767
+ port = int(os.getenv("PORT", "8846"))
768
+
769
+ print(f"[Research MCP Server] Starting on {host}:{port}", file=sys.stderr)
770
+ sys.stderr.flush()
771
+
772
+ mcp.run(transport="streamable-http")
773
+
774
+
775
+ if __name__ == "__main__":
776
+ main()
777
+