paperplain-mcp 1.1.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (3) hide show
  1. package/README.md +88 -0
  2. package/package.json +46 -0
  3. package/server.js +385 -0
package/README.md ADDED
@@ -0,0 +1,88 @@
1
+ # PaperPlain MCP
2
+
3
+ Give any AI agent access to 200M+ peer-reviewed papers from PubMed, ArXiv, and Semantic Scholar.
4
+
5
+ **Free. No API key. No account. No backend.**
6
+
7
+ The MCP calls PubMed and ArXiv directly and returns papers with full abstracts. Your agent's own LLM synthesizes the findings — no black-box summaries, no extra cost, full context.
8
+
9
+ ## Install
10
+
11
+ ```bash
12
+ npx -y paperplain-mcp
13
+ ```
14
+
15
+ ## Setup
16
+
17
+ Add to your MCP config file (Claude Desktop, Cursor, Windsurf, or any MCP-compatible client):
18
+
19
+ ```json
20
+ {
21
+ "mcpServers": {
22
+ "paperplain": {
23
+ "command": "npx",
24
+ "args": ["-y", "paperplain-mcp"]
25
+ }
26
+ }
27
+ }
28
+ ```
29
+
30
+ Restart your client. That's it.
31
+
32
+ **Config file locations:**
33
+ - Claude Desktop (Mac): `~/Library/Application Support/Claude/claude_desktop_config.json`
34
+ - Cursor: `.cursor/mcp.json`
35
+ - Windsurf: `~/.codeium/windsurf/mcp_config.json`
36
+
37
+ ## Tools
38
+
39
+ ### `search_research`
40
+ Search PubMed, ArXiv, and Semantic Scholar for peer-reviewed papers. Auto-routes based on topic (health → PubMed + S2, CS/AI → ArXiv + S2, general → all three).
41
+
42
+ ```
43
+ query Natural language question or topic
44
+ max_results 1–10 papers (default: 5)
45
+ domain "auto" | "health" | "cs" | "general"
46
+ ```
47
+
48
+ Returns: array of papers with title, authors, abstract, published date, URL, DOI.
49
+
50
+ ### `fetch_paper`
51
+ Fetch full metadata and abstract for a specific paper by ID.
52
+
53
+ ```
54
+ paper_id ArXiv ID ("2301.07041") or PubMed ID ("pubmed:37183813")
55
+ ```
56
+
57
+ ## How it works
58
+
59
+ 1. Your agent calls `search_research("effects of sleep deprivation on memory")`
60
+ 2. PaperPlain routes to PubMed + Semantic Scholar (health topic), fetches abstracts
61
+ 3. Returns structured JSON with papers and full abstracts
62
+ 4. Your agent's LLM synthesizes findings using its full context
63
+
64
+ No LLM calls on our side. No cost. No rate limits beyond what PubMed/ArXiv impose.
65
+
66
+ ## Example
67
+
68
+ ```
69
+ User: What does the research say about cold exposure and metabolism?
70
+
71
+ Agent calls: search_research("cold exposure brown adipose tissue metabolism")
72
+ → Returns 5 PubMed papers with abstracts
73
+ → Agent synthesizes: "Three RCTs found that regular cold water immersion (14°C,
74
+ 1hr/week for 6 weeks) increased brown adipose tissue activity by 37-42%..."
75
+ ```
76
+
77
+ ## Self-host
78
+
79
+ ```bash
80
+ git clone https://github.com/sulmatajb/paperplain
81
+ cd paperplain/mcp
82
+ npm install
83
+ node server.js
84
+ ```
85
+
86
+ ## License
87
+
88
+ MIT — do whatever you want with it.
package/package.json ADDED
@@ -0,0 +1,46 @@
1
+ {
2
+ "name": "paperplain-mcp",
3
+ "version": "1.1.1",
4
+ "description": "MCP server — search 200M+ peer-reviewed papers from PubMed, ArXiv, and Semantic Scholar. Free. No API key.",
5
+ "type": "module",
6
+ "bin": {
7
+ "paperplain-mcp": "./server.js"
8
+ },
9
+ "files": [
10
+ "server.js",
11
+ "README.md"
12
+ ],
13
+ "scripts": {
14
+ "start": "node server.js",
15
+ "dev": "node --watch server.js"
16
+ },
17
+ "keywords": [
18
+ "mcp",
19
+ "model-context-protocol",
20
+ "pubmed",
21
+ "arxiv",
22
+ "semantic-scholar",
23
+ "research",
24
+ "papers",
25
+ "science",
26
+ "ai-agent",
27
+ "claude"
28
+ ],
29
+ "homepage": "https://github.com/sulmatajb/paperplain",
30
+ "repository": {
31
+ "type": "git",
32
+ "url": "https://github.com/sulmatajb/paperplain.git",
33
+ "directory": "mcp"
34
+ },
35
+ "bugs": {
36
+ "url": "https://github.com/sulmatajb/paperplain/issues"
37
+ },
38
+ "license": "MIT",
39
+ "dependencies": {
40
+ "@modelcontextprotocol/sdk": "^1.0.0",
41
+ "zod": "^3.22.0"
42
+ },
43
+ "engines": {
44
+ "node": ">=18"
45
+ }
46
+ }
package/server.js ADDED
@@ -0,0 +1,385 @@
1
+ #!/usr/bin/env node
2
+ /**
3
+ * PaperPlain MCP Server
4
+ * Searches PubMed and ArXiv directly — no API key, no backend, completely free.
5
+ * Returns papers with full abstracts so the agent's LLM can synthesize.
6
+ */
7
+
8
+ import { McpServer } from "@modelcontextprotocol/sdk/server/mcp.js";
9
+ import { StdioServerTransport } from "@modelcontextprotocol/sdk/server/stdio.js";
10
+ import { z } from "zod";
11
+
12
+ const ARXIV_BASE = "https://export.arxiv.org/api/query";
13
+ const PUBMED_BASE = "https://eutils.ncbi.nlm.nih.gov/entrez/eutils";
14
+ const PUBMED_PARAMS = "tool=paperplain&email=hello@paperplain.io";
15
+ const SEMANTIC_SCHOLAR_BASE = "https://api.semanticscholar.org/graph/v1";
16
+
17
+ // ── Domain classifier (keyword-based, no LLM needed) ───────────────────────
18
+ const HEALTH_KEYWORDS =
19
+ /\b(sleep|insomnia|anxiety|anxious|stress|depress|pain|ache|headache|migraine|diet|nutrition|weight|obese|exercise|fatigue|tired|energy|focus|adhd|autism|cancer|diabetes|blood|pressure|heart|cholesterol|vitamin|supplement|immune|gut|digestion|mental health|therapy|meditation|mindfulness|mood|burnout|inflammation|allergy|asthma|skin|aging|memory|alzheimer|cognitive|brain|alcohol|smoking|addiction|symptoms|treatment|medicine|medication|dose|chronic|surgery|vaccine|antibiot|clinical|patient|disease|disorder|syndrome|injury|rehabilitation|psychiatric|neurol|cardio|oncol|gastro|pediatr|geriatric)\b/i;
20
+ const CS_KEYWORDS =
21
+ /\b(algorithm|neural network|machine learning|deep learning|transformer|llm|language model|reinforcement|classification|clustering|regression|computer vision|nlp|natural language|robotics|autonomous|blockchain|cryptograph|database|distributed|cloud|microservice|compiler|operating system|cybersecurity|quantum comput|software engineer|retrieval|embedding|vector|attention|fine.tun|prompt|inference|benchmark)\b/i;
22
+
23
+ function classifyDomain(query) {
24
+ if (HEALTH_KEYWORDS.test(query)) return "health";
25
+ if (CS_KEYWORDS.test(query)) return "cs";
26
+ return "general";
27
+ }
28
+
29
+ // ── ArXiv ──────────────────────────────────────────────────────────────────
30
+ function parseArxivXml(xml) {
31
+ const papers = [];
32
+ const entries = xml.match(/<entry>([\s\S]*?)<\/entry>/g) || [];
33
+ for (const entry of entries) {
34
+ const id =
35
+ (
36
+ entry.match(/<id>https?:\/\/arxiv\.org\/abs\/([^<]+)<\/id>/) || []
37
+ )[1] || "";
38
+ const title =
39
+ (entry.match(/<title>([\s\S]*?)<\/title>/) || [])[1]
40
+ ?.replace(/\s+/g, " ")
41
+ .trim() || "";
42
+ const abstract =
43
+ (entry.match(/<summary>([\s\S]*?)<\/summary>/) || [])[1]
44
+ ?.replace(/\s+/g, " ")
45
+ .trim() || "";
46
+ const published =
47
+ (
48
+ entry.match(/<published>([^<]+)<\/published>/) || []
49
+ )[1]?.slice(0, 10) || "";
50
+ const authors = [...entry.matchAll(/<name>([^<]+)<\/name>/g)].map((m) =>
51
+ m[1].trim()
52
+ );
53
+ const doi =
54
+ (entry.match(/<arxiv:doi[^>]*>([^<]+)<\/arxiv:doi>/) || [])[1] || "";
55
+ if (!id || !title) continue;
56
+ papers.push({
57
+ id: `arxiv:${id}`,
58
+ source: "arxiv",
59
+ title,
60
+ authors,
61
+ abstract,
62
+ published,
63
+ doi,
64
+ url: `https://arxiv.org/abs/${id}`,
65
+ pdf_url: `https://arxiv.org/pdf/${id}`,
66
+ });
67
+ }
68
+ return papers;
69
+ }
70
+
71
+ async function searchArxiv(query, maxResults) {
72
+ const url = `${ARXIV_BASE}?search_query=all:${encodeURIComponent(query)}&start=0&max_results=${maxResults}&sortBy=relevance&sortOrder=descending`;
73
+ const res = await fetch(url);
74
+ if (!res.ok) return [];
75
+ return parseArxivXml(await res.text());
76
+ }
77
+
78
+ async function fetchArxivById(arxivId) {
79
+ const clean = arxivId.replace(/^arxiv:/i, "").replace(/^.*abs\//, "").trim();
80
+ const url = `${ARXIV_BASE}?id_list=${clean}`;
81
+ const res = await fetch(url);
82
+ if (!res.ok) return null;
83
+ const papers = parseArxivXml(await res.text());
84
+ return papers[0] || null;
85
+ }
86
+
87
+ // ── PubMed ─────────────────────────────────────────────────────────────────
88
+ async function searchPubMed(query, maxResults) {
89
+ const searchUrl = `${PUBMED_BASE}/esearch.fcgi?db=pubmed&term=${encodeURIComponent(query)}&retmax=${maxResults}&retmode=json&sort=relevance&${PUBMED_PARAMS}`;
90
+ const searchRes = await fetch(searchUrl);
91
+ if (!searchRes.ok) return [];
92
+ const searchData = await searchRes.json();
93
+ const pmids = searchData?.esearchresult?.idlist || [];
94
+ if (!pmids.length) return [];
95
+
96
+ // Fetch summaries (title, authors, date)
97
+ const summaryUrl = `${PUBMED_BASE}/esummary.fcgi?db=pubmed&id=${pmids.join(",")}&retmode=json&${PUBMED_PARAMS}`;
98
+ const summaryRes = await fetch(summaryUrl);
99
+ if (!summaryRes.ok) return [];
100
+ const summaryData = await summaryRes.json();
101
+ const result = summaryData?.result || {};
102
+
103
+ // Fetch abstracts via efetch
104
+ const abstracts = await fetchPubMedAbstracts(pmids);
105
+
106
+ return pmids
107
+ .map((pmid) => {
108
+ const item = result[pmid];
109
+ if (!item || !item.title) return null;
110
+ const doi =
111
+ (Array.isArray(item.articleids) ? item.articleids : []).find(
112
+ (e) => e.idtype === "doi"
113
+ )?.value || "";
114
+ return {
115
+ id: `pubmed:${pmid}`,
116
+ source: "pubmed",
117
+ title: item.title.trim(),
118
+ authors: Array.isArray(item.authors)
119
+ ? item.authors.map((a) => a.name).filter(Boolean)
120
+ : [],
121
+ abstract: abstracts[pmid] || "",
122
+ published: item.epubdate || item.pubdate || "",
123
+ doi,
124
+ url: `https://pubmed.ncbi.nlm.nih.gov/${pmid}/`,
125
+ };
126
+ })
127
+ .filter(Boolean);
128
+ }
129
+
130
+ async function fetchPubMedAbstracts(pmids) {
131
+ const url = `${PUBMED_BASE}/efetch.fcgi?db=pubmed&id=${pmids.join(",")}&retmode=xml&rettype=abstract&${PUBMED_PARAMS}`;
132
+ const res = await fetch(url);
133
+ if (!res.ok) return {};
134
+ const xml = await res.text();
135
+ const out = {};
136
+ for (const article of xml.match(/<PubmedArticle>[\s\S]*?<\/PubmedArticle>/g) || []) {
137
+ const pmidMatch = article.match(/<PMID[^>]*>(\d+)<\/PMID>/);
138
+ if (!pmidMatch) continue;
139
+ const parts = [...article.matchAll(/<AbstractText[^>]*>([\s\S]*?)<\/AbstractText>/g)];
140
+ if (parts.length) {
141
+ out[pmidMatch[1]] = parts.map((m) => m[1].replace(/\s+/g, " ").trim()).join(" ");
142
+ }
143
+ }
144
+ return out;
145
+ }
146
+
147
+ // ── Semantic Scholar ───────────────────────────────────────────────────────
148
+ async function searchSemanticScholar(query, maxResults) {
149
+ try {
150
+ const fields = "title,abstract,authors,year,citationCount,openAccessPdf,externalIds";
151
+ const url = `${SEMANTIC_SCHOLAR_BASE}/paper/search?query=${encodeURIComponent(query)}&limit=${maxResults}&fields=${fields}`;
152
+ const controller = new AbortController();
153
+ const timeout = setTimeout(() => controller.abort(), 10000);
154
+ let response;
155
+ try {
156
+ response = await fetch(url, { signal: controller.signal });
157
+ } finally {
158
+ clearTimeout(timeout);
159
+ }
160
+ if (!response.ok) return [];
161
+ const data = await response.json().catch(() => null);
162
+ if (!data?.data) return [];
163
+ return data.data
164
+ .map((item) => {
165
+ if (!item.paperId || !item.title || !item.abstract) return null;
166
+ const ext = item.externalIds || {};
167
+ const doi = ext.DOI || "";
168
+ const arxivId = ext.ArXiv || "";
169
+ let paperUrl;
170
+ if (arxivId) paperUrl = `https://arxiv.org/abs/${arxivId}`;
171
+ else if (doi) paperUrl = `https://doi.org/${doi}`;
172
+ else paperUrl = `https://www.semanticscholar.org/paper/${item.paperId}`;
173
+ return {
174
+ id: `s2:${item.paperId}`,
175
+ source: "semanticscholar",
176
+ title: (item.title || "").replace(/\s+/g, " ").trim(),
177
+ authors: Array.isArray(item.authors) ? item.authors.map((a) => a.name).filter(Boolean) : [],
178
+ abstract: (item.abstract || "").replace(/\s+/g, " ").trim(),
179
+ published: item.year ? `${item.year}` : "",
180
+ doi,
181
+ url: paperUrl,
182
+ pdf_url: item.openAccessPdf?.url || "",
183
+ citations: typeof item.citationCount === "number" ? item.citationCount : 0,
184
+ };
185
+ })
186
+ .filter(Boolean)
187
+ .sort((a, b) => b.citations - a.citations);
188
+ } catch {
189
+ return [];
190
+ }
191
+ }
192
+
193
+ // ── MCP Server ─────────────────────────────────────────────────────────────
194
+ const server = new McpServer({
195
+ name: "paperplain",
196
+ version: "1.1.0",
197
+ description:
198
+ "Search 200M+ peer-reviewed papers from PubMed, ArXiv, and Semantic Scholar. Returns papers with full abstracts — use your own model to synthesize findings.",
199
+ });
200
+
201
+ // Tool 1: search_research
202
+ server.tool(
203
+ "search_research",
204
+ `Search PubMed, ArXiv, and Semantic Scholar for peer-reviewed papers on any topic.
205
+ Auto-routes health/medical queries to PubMed, CS/AI to ArXiv + Semantic Scholar, general to all three.
206
+ Returns papers with titles, authors, abstracts, and source URLs.
207
+ Use the returned abstracts to synthesize findings, answer the user's question, or cite specific claims.`,
208
+ {
209
+ query: z
210
+ .string()
211
+ .describe(
212
+ "Natural language research question or topic, e.g. 'effects of sleep deprivation on memory consolidation'"
213
+ ),
214
+ max_results: z
215
+ .number()
216
+ .min(1)
217
+ .max(10)
218
+ .optional()
219
+ .default(5)
220
+ .describe("Number of papers to return (1-10, default 5)"),
221
+ domain: z
222
+ .enum(["health", "cs", "general", "auto"])
223
+ .optional()
224
+ .default("auto")
225
+ .describe(
226
+ "Force a specific database: health=PubMed+S2, cs=ArXiv+S2, general=all three, auto=detect from query"
227
+ ),
228
+ },
229
+ async ({ query, max_results, domain }) => {
230
+ const resolvedDomain = domain === "auto" ? classifyDomain(query) : domain;
231
+ let papers = [];
232
+ let sources = [];
233
+
234
+ try {
235
+ if (resolvedDomain === "health") {
236
+ // PubMed primary, Semantic Scholar as fill
237
+ let pubmedPapers = await searchPubMed(query, max_results);
238
+ if (pubmedPapers.length) sources.push("pubmed");
239
+ if (pubmedPapers.length < max_results) {
240
+ const s2 = await searchSemanticScholar(query, max_results - pubmedPapers.length);
241
+ if (s2.length) sources.push("semanticscholar");
242
+ const seen = new Set(pubmedPapers.map((p) => p.id));
243
+ for (const p of s2) if (!seen.has(p.id)) pubmedPapers.push(p);
244
+ }
245
+ papers = pubmedPapers.slice(0, max_results);
246
+ } else if (resolvedDomain === "cs") {
247
+ // ArXiv + Semantic Scholar, deduplicate overlaps
248
+ const [arxiv, s2] = await Promise.all([
249
+ searchArxiv(query, max_results),
250
+ searchSemanticScholar(query, Math.ceil(max_results / 2)),
251
+ ]);
252
+ if (arxiv.length) sources.push("arxiv");
253
+ if (s2.length) sources.push("semanticscholar");
254
+ const maxArxiv = Math.ceil(max_results * 0.6);
255
+ const arxivIds = new Set(arxiv.map((p) => p.id));
256
+ const uniqueS2 = s2.filter((p) => !arxivIds.has(p.id));
257
+ papers = [
258
+ ...arxiv.slice(0, maxArxiv),
259
+ ...uniqueS2.slice(0, max_results - Math.min(arxiv.length, maxArxiv)),
260
+ ].slice(0, max_results);
261
+ } else {
262
+ // General: all three sources interleaved
263
+ const [arxiv, pubmed, s2] = await Promise.all([
264
+ searchArxiv(query, max_results),
265
+ searchPubMed(query, max_results),
266
+ searchSemanticScholar(query, Math.ceil(max_results / 2)),
267
+ ]);
268
+ if (arxiv.length) sources.push("arxiv");
269
+ if (pubmed.length) sources.push("pubmed");
270
+ if (s2.length) sources.push("semanticscholar");
271
+ const maxEach = Math.floor(max_results / 3);
272
+ const remainder = max_results - maxEach * 3;
273
+ papers = [
274
+ ...arxiv.slice(0, maxEach + remainder),
275
+ ...pubmed.slice(0, maxEach),
276
+ ...s2.slice(0, maxEach),
277
+ ].slice(0, max_results);
278
+ }
279
+
280
+ return {
281
+ content: [
282
+ {
283
+ type: "text",
284
+ text: JSON.stringify(
285
+ {
286
+ query,
287
+ domain: resolvedDomain,
288
+ sources_searched: sources,
289
+ total: papers.length,
290
+ papers: papers.map((p) => ({
291
+ id: p.id,
292
+ source: p.source,
293
+ title: p.title,
294
+ authors: p.authors.slice(0, 4),
295
+ published: p.published,
296
+ abstract: p.abstract || "(abstract not available)",
297
+ url: p.url,
298
+ doi: p.doi || undefined,
299
+ ...(p.citations > 0 ? { citations: p.citations } : {}),
300
+ })),
301
+ },
302
+ null,
303
+ 2
304
+ ),
305
+ },
306
+ ],
307
+ };
308
+ } catch (err) {
309
+ return {
310
+ content: [{ type: "text", text: `Search failed: ${err.message}` }],
311
+ isError: true,
312
+ };
313
+ }
314
+ }
315
+ );
316
+
317
+ // Tool 2: fetch_paper
318
+ server.tool(
319
+ "fetch_paper",
320
+ `Fetch the full abstract and metadata for a specific paper by ID.
321
+ Supports ArXiv IDs (e.g. '2301.07041' or 'arxiv:2301.07041') and PubMed IDs (e.g. 'pubmed:37183813' or just '37183813').
322
+ Use this to get the full abstract of a paper you already know about.`,
323
+ {
324
+ paper_id: z
325
+ .string()
326
+ .describe(
327
+ "ArXiv ID (e.g. '2301.07041') or PubMed ID (e.g. 'pubmed:37183813')"
328
+ ),
329
+ },
330
+ async ({ paper_id }) => {
331
+ try {
332
+ const isArxiv =
333
+ /arxiv:/i.test(paper_id) ||
334
+ /^\d{4}\.\d{4,5}$/.test(paper_id.trim()) ||
335
+ /arxiv\.org/.test(paper_id);
336
+ const isPubMed = /pubmed:/i.test(paper_id) || /^\d{6,9}$/.test(paper_id.trim());
337
+
338
+ let paper = null;
339
+
340
+ if (isArxiv) {
341
+ paper = await fetchArxivById(paper_id);
342
+ } else if (isPubMed) {
343
+ const pmid = paper_id.replace(/^pubmed:/i, "").trim();
344
+ const abstracts = await fetchPubMedAbstracts([pmid]);
345
+ const summaryUrl = `${PUBMED_BASE}/esummary.fcgi?db=pubmed&id=${pmid}&retmode=json&${PUBMED_PARAMS}`;
346
+ const summaryRes = await fetch(summaryUrl);
347
+ if (summaryRes.ok) {
348
+ const data = await summaryRes.json();
349
+ const item = data?.result?.[pmid];
350
+ if (item) {
351
+ paper = {
352
+ id: `pubmed:${pmid}`,
353
+ source: "pubmed",
354
+ title: item.title?.trim() || "",
355
+ authors: Array.isArray(item.authors) ? item.authors.map((a) => a.name) : [],
356
+ abstract: abstracts[pmid] || "",
357
+ published: item.epubdate || item.pubdate || "",
358
+ doi: (Array.isArray(item.articleids) ? item.articleids : []).find((e) => e.idtype === "doi")?.value || "",
359
+ url: `https://pubmed.ncbi.nlm.nih.gov/${pmid}/`,
360
+ };
361
+ }
362
+ }
363
+ }
364
+
365
+ if (!paper) {
366
+ return {
367
+ content: [{ type: "text", text: `Paper not found: ${paper_id}` }],
368
+ isError: true,
369
+ };
370
+ }
371
+
372
+ return {
373
+ content: [{ type: "text", text: JSON.stringify(paper, null, 2) }],
374
+ };
375
+ } catch (err) {
376
+ return {
377
+ content: [{ type: "text", text: `Fetch failed: ${err.message}` }],
378
+ isError: true,
379
+ };
380
+ }
381
+ }
382
+ );
383
+
384
+ const transport = new StdioServerTransport();
385
+ await server.connect(transport);