paperplain-mcp 1.1.2 → 1.2.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (2) hide show
  1. package/package.json +1 -1
  2. package/server.js +243 -37
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "paperplain-mcp",
3
- "version": "1.1.2",
3
+ "version": "1.2.2",
4
4
  "description": "MCP server — search 200M+ peer-reviewed papers from PubMed, ArXiv, and Semantic Scholar. Free. No API key.",
5
5
  "type": "module",
6
6
  "bin": {
package/server.js CHANGED
@@ -15,14 +15,17 @@ const PUBMED_PARAMS = "tool=paperplain&email=hello@paperplain.io";
15
15
  const SEMANTIC_SCHOLAR_BASE = "https://api.semanticscholar.org/graph/v1";
16
16
 
17
17
  // ── Domain classifier (keyword-based, no LLM needed) ───────────────────────
18
+ // Note: "energy" intentionally excluded from health — it's more common in
19
+ // CS/engineering contexts (energy management, HEMS, smart grid) than health.
18
20
  const HEALTH_KEYWORDS =
19
- /\b(sleep|insomnia|anxiety|anxious|stress|depress|pain|ache|headache|migraine|diet|nutrition|weight|obese|exercise|fatigue|tired|energy|focus|adhd|autism|cancer|diabetes|blood|pressure|heart|cholesterol|vitamin|supplement|immune|gut|digestion|mental health|therapy|meditation|mindfulness|mood|burnout|inflammation|allergy|asthma|skin|aging|memory|alzheimer|cognitive|brain|alcohol|smoking|addiction|symptoms|treatment|medicine|medication|dose|chronic|surgery|vaccine|antibiot|clinical|patient|disease|disorder|syndrome|injury|rehabilitation|psychiatric|neurol|cardio|oncol|gastro|pediatr|geriatric)\b/i;
21
+ /\b(sleep|insomnia|anxiety|anxious|stress|depress|pain|ache|headache|migraine|diet|nutrition|weight|obese|exercise|fatigue|tired|focus|adhd|autism|cancer|diabetes|blood|pressure|heart|cholesterol|vitamin|supplement|immune|gut|digestion|mental health|therapy|meditation|mindfulness|mood|burnout|inflammation|allergy|asthma|skin|aging|memory|alzheimer|cognitive|brain|alcohol|smoking|addiction|symptoms|treatment|medicine|medication|dose|chronic|surgery|vaccine|antibiot|clinical|patient|disease|disorder|syndrome|injury|rehabilitation|psychiatric|neurol|cardio|oncol|gastro|pediatr|geriatric)\b/i;
20
22
  const CS_KEYWORDS =
21
- /\b(algorithm|neural network|machine learning|deep learning|transformer|llm|language model|reinforcement|classification|clustering|regression|computer vision|nlp|natural language|robotics|autonomous|blockchain|cryptograph|database|distributed|cloud|microservice|compiler|operating system|cybersecurity|quantum comput|software engineer|retrieval|embedding|vector|attention|fine.tun|prompt|inference|benchmark)\b/i;
23
+ /\b(algorithm|neural network|machine learning|deep learning|transformer|llm|large language model|language model|reinforcement|classification|clustering|regression|computer vision|nlp|natural language|robotics|autonomous|blockchain|cryptograph|database|distributed|cloud|microservice|compiler|operating system|cybersecurity|quantum comput|software engineer|retrieval|embedding|vector|attention|fine.tun|prompt|inference|benchmark|agentic|multi.agent|smart grid|demand response|energy management|HEMS|home energy|building energy|V2G|vehicle.to.grid|EV charging|electric vehicle|battery storage|renewable energy|solar|wind power|forecasting|optimization|scheduling|control system|model predictive|reinforcement learning)\b/i;
22
24
 
23
25
  function classifyDomain(query) {
24
- if (HEALTH_KEYWORDS.test(query)) return "health";
26
+ // CS check runs first — engineering/AI topics should not fall into health
25
27
  if (CS_KEYWORDS.test(query)) return "cs";
28
+ if (HEALTH_KEYWORDS.test(query)) return "health";
26
29
  return "general";
27
30
  }
28
31
 
@@ -83,16 +86,31 @@ async function searchArxiv(query, maxResults) {
83
86
  try {
84
87
  const res = await fetchWithTimeout(url);
85
88
  if (!res.ok) return [];
86
- return parseArxivXml(await res.text());
89
+ const papers = parseArxivXml(await res.text());
90
+ // If broad search returns nothing, retry with title-field search
91
+ if (papers.length === 0) {
92
+ const titleUrl = `${ARXIV_BASE}?search_query=ti:${encodeURIComponent(query)}&start=0&max_results=${maxResults}&sortBy=relevance&sortOrder=descending`;
93
+ const titleRes = await fetchWithTimeout(titleUrl);
94
+ if (titleRes.ok) return parseArxivXml(await titleRes.text());
95
+ }
96
+ return papers;
87
97
  } catch {
88
98
  return [];
89
99
  }
90
100
  }
91
101
 
102
+ function normalizeArxivId(arxivId) {
103
+ return arxivId
104
+ .replace(/^arxiv:/i, "")
105
+ .replace(/^https?:\/\/arxiv\.org\/(abs|pdf)\//, "")
106
+ .replace(/\.pdf$/i, "")
107
+ .trim();
108
+ }
109
+
92
110
  async function fetchArxivById(arxivId) {
93
- const clean = arxivId.replace(/^arxiv:/i, "").replace(/^.*abs\//, "").trim();
111
+ const clean = normalizeArxivId(arxivId);
94
112
  try {
95
- const res = await fetchWithTimeout(`${ARXIV_BASE}?id_list=${clean}`);
113
+ const res = await fetchWithTimeout(`${ARXIV_BASE}?id_list=${encodeURIComponent(clean)}`);
96
114
  if (!res.ok) return null;
97
115
  const papers = parseArxivXml(await res.text());
98
116
  return papers[0] || null;
@@ -101,6 +119,36 @@ async function fetchArxivById(arxivId) {
101
119
  }
102
120
  }
103
121
 
122
+ async function fetchS2ByArxivId(arxivId) {
123
+ // S2 accepts ARXIV: prefix — useful as fallback when ArXiv API is rate-limited
124
+ const clean = normalizeArxivId(arxivId).replace(/v\d+$/i, ""); // strip version for S2
125
+ try {
126
+ const fields = "title,abstract,authors,year,citationCount,openAccessPdf,externalIds";
127
+ const res = await fetchWithTimeout(
128
+ `${SEMANTIC_SCHOLAR_BASE}/paper/ARXIV:${encodeURIComponent(clean)}?fields=${fields}`
129
+ );
130
+ if (!res.ok) return null;
131
+ const item = await res.json().catch(() => null);
132
+ if (!item || !item.paperId || !item.title) return null;
133
+ const ext = item.externalIds || {};
134
+ const doi = ext.DOI || "";
135
+ return {
136
+ id: `arxiv:${clean}`,
137
+ source: "arxiv",
138
+ title: (item.title || "").replace(/\s+/g, " ").trim(),
139
+ authors: Array.isArray(item.authors) ? item.authors.map((a) => a.name).filter(Boolean) : [],
140
+ abstract: (item.abstract || "").replace(/\s+/g, " ").trim(),
141
+ published: item.year ? `${item.year}` : "",
142
+ doi,
143
+ url: `https://arxiv.org/abs/${clean}`,
144
+ pdf_url: item.openAccessPdf?.url || `https://arxiv.org/pdf/${clean}`,
145
+ citations: typeof item.citationCount === "number" ? item.citationCount : 0,
146
+ };
147
+ } catch {
148
+ return null;
149
+ }
150
+ }
151
+
104
152
  // ── PubMed ─────────────────────────────────────────────────────────────────
105
153
  async function searchPubMed(query, maxResults) {
106
154
  try {
@@ -214,7 +262,7 @@ async function searchSemanticScholar(query, maxResults) {
214
262
  // ── MCP Server ─────────────────────────────────────────────────────────────
215
263
  const server = new McpServer({
216
264
  name: "paperplain",
217
- version: "1.1.0",
265
+ version: "1.2.2",
218
266
  description:
219
267
  "Search 200M+ peer-reviewed papers from PubMed, ArXiv, and Semantic Scholar. Returns papers with full abstracts — use your own model to synthesize findings.",
220
268
  });
@@ -250,28 +298,45 @@ Use the returned abstracts to synthesize findings, answer the user's question, o
250
298
  async ({ query, max_results, domain }) => {
251
299
  const resolvedDomain = domain === "auto" ? classifyDomain(query) : domain;
252
300
  let papers = [];
253
- let sources = [];
301
+ // Track each source: "ok" | "empty" | "error"
302
+ const sourceStatus = {};
303
+
304
+ async function safeArxiv(q, n) {
305
+ try {
306
+ const r = await searchArxiv(q, n);
307
+ sourceStatus.arxiv = r.length ? "ok" : "empty";
308
+ return r;
309
+ } catch { sourceStatus.arxiv = "error"; return []; }
310
+ }
311
+ async function safePubMed(q, n) {
312
+ try {
313
+ const r = await searchPubMed(q, n);
314
+ sourceStatus.pubmed = r.length ? "ok" : "empty";
315
+ return r;
316
+ } catch { sourceStatus.pubmed = "error"; return []; }
317
+ }
318
+ async function safeS2(q, n) {
319
+ try {
320
+ const r = await searchSemanticScholar(q, n);
321
+ sourceStatus.semanticscholar = r.length ? "ok" : "empty";
322
+ return r;
323
+ } catch { sourceStatus.semanticscholar = "error"; return []; }
324
+ }
254
325
 
255
326
  try {
256
327
  if (resolvedDomain === "health") {
257
- // PubMed primary, Semantic Scholar as fill
258
- let pubmedPapers = await searchPubMed(query, max_results);
259
- if (pubmedPapers.length) sources.push("pubmed");
328
+ let pubmedPapers = await safePubMed(query, max_results);
260
329
  if (pubmedPapers.length < max_results) {
261
- const s2 = await searchSemanticScholar(query, max_results - pubmedPapers.length);
262
- if (s2.length) sources.push("semanticscholar");
330
+ const s2 = await safeS2(query, max_results - pubmedPapers.length);
263
331
  const seen = new Set(pubmedPapers.map((p) => p.id));
264
332
  for (const p of s2) if (!seen.has(p.id)) pubmedPapers.push(p);
265
333
  }
266
334
  papers = pubmedPapers.slice(0, max_results);
267
335
  } else if (resolvedDomain === "cs") {
268
- // ArXiv + Semantic Scholar, deduplicate overlaps
269
336
  const [arxiv, s2] = await Promise.all([
270
- searchArxiv(query, max_results),
271
- searchSemanticScholar(query, Math.ceil(max_results / 2)),
337
+ safeArxiv(query, max_results),
338
+ safeS2(query, Math.ceil(max_results / 2)),
272
339
  ]);
273
- if (arxiv.length) sources.push("arxiv");
274
- if (s2.length) sources.push("semanticscholar");
275
340
  const maxArxiv = Math.ceil(max_results * 0.6);
276
341
  const arxivIds = new Set(arxiv.map((p) => p.id));
277
342
  const uniqueS2 = s2.filter((p) => !arxivIds.has(p.id));
@@ -280,15 +345,11 @@ Use the returned abstracts to synthesize findings, answer the user's question, o
280
345
  ...uniqueS2.slice(0, max_results - Math.min(arxiv.length, maxArxiv)),
281
346
  ].slice(0, max_results);
282
347
  } else {
283
- // General: all three sources interleaved
284
348
  const [arxiv, pubmed, s2] = await Promise.all([
285
- searchArxiv(query, max_results),
286
- searchPubMed(query, max_results),
287
- searchSemanticScholar(query, Math.ceil(max_results / 2)),
349
+ safeArxiv(query, max_results),
350
+ safePubMed(query, max_results),
351
+ safeS2(query, Math.ceil(max_results / 2)),
288
352
  ]);
289
- if (arxiv.length) sources.push("arxiv");
290
- if (pubmed.length) sources.push("pubmed");
291
- if (s2.length) sources.push("semanticscholar");
292
353
  const maxEach = Math.floor(max_results / 3);
293
354
  const remainder = max_results - maxEach * 3;
294
355
  papers = [
@@ -298,6 +359,18 @@ Use the returned abstracts to synthesize findings, answer the user's question, o
298
359
  ].slice(0, max_results);
299
360
  }
300
361
 
362
+ // Warn if expected sources came back empty or errored
363
+ const warnings = [];
364
+ const expectedSources = resolvedDomain === "health"
365
+ ? ["pubmed", "semanticscholar"]
366
+ : resolvedDomain === "cs"
367
+ ? ["arxiv", "semanticscholar"]
368
+ : ["arxiv", "pubmed", "semanticscholar"];
369
+ for (const src of expectedSources) {
370
+ if (sourceStatus[src] === "empty") warnings.push(`${src}: returned 0 results (API may be rate-limited or query too specific)`);
371
+ if (sourceStatus[src] === "error") warnings.push(`${src}: request failed (API may be temporarily unavailable)`);
372
+ }
373
+
301
374
  return {
302
375
  content: [
303
376
  {
@@ -306,7 +379,8 @@ Use the returned abstracts to synthesize findings, answer the user's question, o
306
379
  {
307
380
  query,
308
381
  domain: resolvedDomain,
309
- sources_searched: sources,
382
+ source_status: sourceStatus,
383
+ ...(warnings.length ? { warnings } : {}),
310
384
  total: papers.length,
311
385
  papers: papers.map((p) => ({
312
386
  id: p.id,
@@ -335,33 +409,79 @@ Use the returned abstracts to synthesize findings, answer the user's question, o
335
409
  }
336
410
  );
337
411
 
412
+ // ── Semantic Scholar single-paper lookup (by DOI or S2 paper ID) ───────────
413
+ async function fetchS2ByDoi(doi) {
414
+ try {
415
+ const clean = doi.replace(/^doi:/i, "").trim();
416
+ const fields = "title,abstract,authors,year,citationCount,openAccessPdf,externalIds";
417
+ const res = await fetchWithTimeout(
418
+ `${SEMANTIC_SCHOLAR_BASE}/paper/DOI:${encodeURIComponent(clean)}?fields=${fields}`
419
+ );
420
+ if (!res.ok) return null;
421
+ const item = await res.json().catch(() => null);
422
+ if (!item || !item.paperId || !item.title) return null;
423
+ const ext = item.externalIds || {};
424
+ const arxivId = ext.ArXiv || "";
425
+ let url;
426
+ if (arxivId) url = `https://arxiv.org/abs/${arxivId}`;
427
+ else if (clean) url = `https://doi.org/${clean}`;
428
+ else url = `https://www.semanticscholar.org/paper/${item.paperId}`;
429
+ return {
430
+ id: `s2:${item.paperId}`,
431
+ source: "semanticscholar",
432
+ title: (item.title || "").replace(/\s+/g, " ").trim(),
433
+ authors: Array.isArray(item.authors) ? item.authors.map((a) => a.name).filter(Boolean) : [],
434
+ abstract: (item.abstract || "").replace(/\s+/g, " ").trim(),
435
+ published: item.year ? `${item.year}` : "",
436
+ doi: clean,
437
+ url,
438
+ pdf_url: item.openAccessPdf?.url || "",
439
+ citations: typeof item.citationCount === "number" ? item.citationCount : 0,
440
+ };
441
+ } catch {
442
+ return null;
443
+ }
444
+ }
445
+
338
446
  // Tool 2: fetch_paper
339
447
  server.tool(
340
448
  "fetch_paper",
341
- `Fetch the full abstract and metadata for a specific paper by ID.
342
- Supports ArXiv IDs (e.g. '2301.07041' or 'arxiv:2301.07041') and PubMed IDs (e.g. 'pubmed:37183813' or just '37183813').
343
- Use this to get the full abstract of a paper you already know about.`,
449
+ `Fetch the full abstract and metadata for a specific paper by ID or DOI.
450
+ Supports:
451
+ - ArXiv IDs: '2301.07041', 'arxiv:2301.07041v2', 'https://arxiv.org/abs/2301.07041'
452
+ - PubMed IDs: 'pubmed:37183813' or just '37183813'
453
+ - DOIs: '10.1145/3290605.3300857' or 'doi:10.1145/3290605.3300857' (looks up via Semantic Scholar)
454
+ Use this to verify a specific paper you already know about or to retrieve its abstract.`,
344
455
  {
345
456
  paper_id: z
346
457
  .string()
347
458
  .describe(
348
- "ArXiv ID (e.g. '2301.07041') or PubMed ID (e.g. 'pubmed:37183813')"
459
+ "ArXiv ID, PubMed ID, or DOI — e.g. '2301.07041', 'pubmed:37183813', or '10.1145/3290605.3300857'"
349
460
  ),
350
461
  },
351
462
  async ({ paper_id }) => {
352
463
  try {
464
+ const trimmed = paper_id.trim();
353
465
  const isArxiv =
354
- /arxiv:/i.test(paper_id) ||
355
- /^\d{4}\.\d{4,5}$/.test(paper_id.trim()) ||
356
- /arxiv\.org/.test(paper_id);
357
- const isPubMed = /pubmed:/i.test(paper_id) || /^\d{6,9}$/.test(paper_id.trim());
466
+ /arxiv:/i.test(trimmed) ||
467
+ /^\d{4}\.\d{4,5}(v\d+)?$/.test(trimmed) || // 2301.07041 or 2301.07041v2
468
+ /^[a-z-]+(\.[A-Z]+)?\/\d{7}(v\d+)?$/.test(trimmed) || // old format: cs.LG/0504010
469
+ /arxiv\.org/.test(trimmed);
470
+ const isPubMed =
471
+ /pubmed:/i.test(trimmed) || /^\d{6,9}$/.test(trimmed);
472
+ const isDOI =
473
+ /^doi:/i.test(trimmed) || /^10\.\d{4,}\/\S+$/.test(trimmed);
358
474
 
359
475
  let paper = null;
360
476
 
361
477
  if (isArxiv) {
362
- paper = await fetchArxivById(paper_id);
478
+ paper = await fetchArxivById(trimmed);
479
+ // Fallback: ArXiv API rate-limits under parallel load — try S2 ARXIV: endpoint
480
+ if (!paper) paper = await fetchS2ByArxivId(trimmed);
481
+ } else if (isDOI) {
482
+ paper = await fetchS2ByDoi(trimmed);
363
483
  } else if (isPubMed) {
364
- const pmid = paper_id.replace(/^pubmed:/i, "").trim();
484
+ const pmid = trimmed.replace(/^pubmed:/i, "").trim();
365
485
  const abstracts = await fetchPubMedAbstracts([pmid]);
366
486
  const summaryUrl = `${PUBMED_BASE}/esummary.fcgi?db=pubmed&id=${pmid}&retmode=json&${PUBMED_PARAMS}`;
367
487
  const summaryRes = await fetch(summaryUrl);
@@ -385,7 +505,10 @@ Use this to get the full abstract of a paper you already know about.`,
385
505
 
386
506
  if (!paper) {
387
507
  return {
388
- content: [{ type: "text", text: `Paper not found: ${paper_id}` }],
508
+ content: [{
509
+ type: "text",
510
+ text: `Paper not found: ${paper_id}\n\nTip: For arXiv papers, try the bare ID (e.g. '2301.07041'). For journal papers, try the DOI (e.g. '10.1145/3290605.3300857'). For PubMed papers, use the PMID number.`,
511
+ }],
389
512
  isError: true,
390
513
  };
391
514
  }
@@ -402,5 +525,88 @@ Use this to get the full abstract of a paper you already know about.`,
402
525
  }
403
526
  );
404
527
 
528
+ // Tool 3: find_paper_by_title
529
+ server.tool(
530
+ "find_paper_by_title",
531
+ `Find a specific paper when you only know its title (or partial title).
532
+ Uses Semantic Scholar's title-match search. Returns the closest match with full abstract, authors, DOI, and source URL.
533
+ Useful for verifying a citation or retrieving abstract details for a paper you already know exists.`,
534
+ {
535
+ title: z
536
+ .string()
537
+ .describe("The paper title or a key phrase from it, e.g. 'Attention Is All You Need'"),
538
+ year: z
539
+ .number()
540
+ .optional()
541
+ .describe("Publication year to narrow down the match (optional)"),
542
+ },
543
+ async ({ title, year }) => {
544
+ try {
545
+ const fields = "title,abstract,authors,year,citationCount,openAccessPdf,externalIds";
546
+ const url = `${SEMANTIC_SCHOLAR_BASE}/paper/search?query=${encodeURIComponent(title)}&limit=5&fields=${fields}`;
547
+ const res = await fetchWithTimeout(url);
548
+ if (!res.ok) {
549
+ return {
550
+ content: [{ type: "text", text: `Search failed: Semantic Scholar returned ${res.status}` }],
551
+ isError: true,
552
+ };
553
+ }
554
+ const data = await res.json().catch(() => null);
555
+ if (!data?.data?.length) {
556
+ return {
557
+ content: [{ type: "text", text: `No papers found matching: "${title}"` }],
558
+ isError: true,
559
+ };
560
+ }
561
+
562
+ // Pick best match: prefer year match if provided, otherwise take top result
563
+ let candidates = data.data.filter((p) => p.title && p.abstract);
564
+ if (!candidates.length) candidates = data.data.filter((p) => p.title);
565
+ if (!candidates.length) {
566
+ return {
567
+ content: [{ type: "text", text: `No papers found matching: "${title}"` }],
568
+ isError: true,
569
+ };
570
+ }
571
+
572
+ let best = candidates[0];
573
+ if (year) {
574
+ const yearMatch = candidates.find((p) => p.year === year);
575
+ if (yearMatch) best = yearMatch;
576
+ }
577
+
578
+ const ext = best.externalIds || {};
579
+ const doi = ext.DOI || "";
580
+ const arxivId = ext.ArXiv || "";
581
+ let paperUrl;
582
+ if (arxivId) paperUrl = `https://arxiv.org/abs/${arxivId}`;
583
+ else if (doi) paperUrl = `https://doi.org/${doi}`;
584
+ else paperUrl = `https://www.semanticscholar.org/paper/${best.paperId}`;
585
+
586
+ const paper = {
587
+ id: arxivId ? `arxiv:${arxivId}` : `s2:${best.paperId}`,
588
+ source: arxivId ? "arxiv" : "semanticscholar",
589
+ title: (best.title || "").replace(/\s+/g, " ").trim(),
590
+ authors: Array.isArray(best.authors) ? best.authors.map((a) => a.name).filter(Boolean) : [],
591
+ abstract: (best.abstract || "").replace(/\s+/g, " ").trim(),
592
+ published: best.year ? `${best.year}` : "",
593
+ doi,
594
+ url: paperUrl,
595
+ pdf_url: best.openAccessPdf?.url || "",
596
+ citations: typeof best.citationCount === "number" ? best.citationCount : 0,
597
+ };
598
+
599
+ return {
600
+ content: [{ type: "text", text: JSON.stringify(paper, null, 2) }],
601
+ };
602
+ } catch (err) {
603
+ return {
604
+ content: [{ type: "text", text: `find_paper_by_title failed: ${err.message}` }],
605
+ isError: true,
606
+ };
607
+ }
608
+ }
609
+ );
610
+
405
611
  const transport = new StdioServerTransport();
406
612
  await server.connect(transport);