npm - paperplain-mcp - Versions diffs - 1.1.1 - Mend

paperplain-mcp 1.1.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (3) hide show

package/README.md ADDED Viewed

@@ -0,0 +1,88 @@
+# PaperPlain MCP
+Give any AI agent access to 200M+ peer-reviewed papers from PubMed, ArXiv, and Semantic Scholar.
+**Free. No API key. No account. No backend.**
+The MCP calls PubMed and ArXiv directly and returns papers with full abstracts. Your agent's own LLM synthesizes the findings — no black-box summaries, no extra cost, full context.
+## Install
+```bash
+npx -y paperplain-mcp
+```
+## Setup
+Add to your MCP config file (Claude Desktop, Cursor, Windsurf, or any MCP-compatible client):
+```json
+{
+  "mcpServers": {
+    "paperplain": {
+      "command": "npx",
+      "args": ["-y", "paperplain-mcp"]
+    }
+  }
+}
+```
+Restart your client. That's it.
+**Config file locations:**
+- Claude Desktop (Mac): `~/Library/Application Support/Claude/claude_desktop_config.json`
+- Cursor: `.cursor/mcp.json`
+- Windsurf: `~/.codeium/windsurf/mcp_config.json`
+## Tools
+### `search_research`
+Search PubMed, ArXiv, and Semantic Scholar for peer-reviewed papers. Auto-routes based on topic (health → PubMed + S2, CS/AI → ArXiv + S2, general → all three).
+```
+query         Natural language question or topic
+max_results   1–10 papers (default: 5)
+domain        "auto" | "health" | "cs" | "general"
+```
+Returns: array of papers with title, authors, abstract, published date, URL, DOI.
+### `fetch_paper`
+Fetch full metadata and abstract for a specific paper by ID.
+```
+paper_id   ArXiv ID ("2301.07041") or PubMed ID ("pubmed:37183813")
+```
+## How it works
+1. Your agent calls `search_research("effects of sleep deprivation on memory")`
+2. PaperPlain routes to PubMed + Semantic Scholar (health topic), fetches abstracts
+3. Returns structured JSON with papers and full abstracts
+4. Your agent's LLM synthesizes findings using its full context
+No LLM calls on our side. No cost. No rate limits beyond what PubMed/ArXiv impose.
+## Example
+```
+User: What does the research say about cold exposure and metabolism?
+Agent calls: search_research("cold exposure brown adipose tissue metabolism")
+→ Returns 5 PubMed papers with abstracts
+→ Agent synthesizes: "Three RCTs found that regular cold water immersion (14°C,
+  1hr/week for 6 weeks) increased brown adipose tissue activity by 37-42%..."
+```
+## Self-host
+```bash
+git clone https://github.com/sulmatajb/paperplain
+cd paperplain/mcp
+npm install
+node server.js
+```
+## License
+MIT — do whatever you want with it.

package/package.json ADDED Viewed

@@ -0,0 +1,46 @@
+{
+  "name": "paperplain-mcp",
+  "version": "1.1.1",
+  "description": "MCP server — search 200M+ peer-reviewed papers from PubMed, ArXiv, and Semantic Scholar. Free. No API key.",
+  "type": "module",
+  "bin": {
+    "paperplain-mcp": "./server.js"
+  },
+  "files": [
+    "server.js",
+    "README.md"
+  ],
+  "scripts": {
+    "start": "node server.js",
+    "dev": "node --watch server.js"
+  },
+  "keywords": [
+    "mcp",
+    "model-context-protocol",
+    "pubmed",
+    "arxiv",
+    "semantic-scholar",
+    "research",
+    "papers",
+    "science",
+    "ai-agent",
+    "claude"
+  ],
+  "homepage": "https://github.com/sulmatajb/paperplain",
+  "repository": {
+    "type": "git",
+    "url": "https://github.com/sulmatajb/paperplain.git",
+    "directory": "mcp"
+  },
+  "bugs": {
+    "url": "https://github.com/sulmatajb/paperplain/issues"
+  },
+  "license": "MIT",
+  "dependencies": {
+    "@modelcontextprotocol/sdk": "^1.0.0",
+    "zod": "^3.22.0"
+  },
+  "engines": {
+    "node": ">=18"
+  }
+}

package/server.js ADDED Viewed

@@ -0,0 +1,385 @@
+#!/usr/bin/env node
+/**
+ * PaperPlain MCP Server
+ * Searches PubMed and ArXiv directly — no API key, no backend, completely free.
+ * Returns papers with full abstracts so the agent's LLM can synthesize.
+ */
+import { McpServer } from "@modelcontextprotocol/sdk/server/mcp.js";
+import { StdioServerTransport } from "@modelcontextprotocol/sdk/server/stdio.js";
+import { z } from "zod";
+const ARXIV_BASE = "https://export.arxiv.org/api/query";
+const PUBMED_BASE = "https://eutils.ncbi.nlm.nih.gov/entrez/eutils";
+const PUBMED_PARAMS = "tool=paperplain&email=hello@paperplain.io";
+const SEMANTIC_SCHOLAR_BASE = "https://api.semanticscholar.org/graph/v1";
+// ── Domain classifier (keyword-based, no LLM needed) ───────────────────────
+const HEALTH_KEYWORDS =
+  /\b(sleep|insomnia|anxiety|anxious|stress|depress|pain|ache|headache|migraine|diet|nutrition|weight|obese|exercise|fatigue|tired|energy|focus|adhd|autism|cancer|diabetes|blood|pressure|heart|cholesterol|vitamin|supplement|immune|gut|digestion|mental health|therapy|meditation|mindfulness|mood|burnout|inflammation|allergy|asthma|skin|aging|memory|alzheimer|cognitive|brain|alcohol|smoking|addiction|symptoms|treatment|medicine|medication|dose|chronic|surgery|vaccine|antibiot|clinical|patient|disease|disorder|syndrome|injury|rehabilitation|psychiatric|neurol|cardio|oncol|gastro|pediatr|geriatric)\b/i;
+const CS_KEYWORDS =
+  /\b(algorithm|neural network|machine learning|deep learning|transformer|llm|language model|reinforcement|classification|clustering|regression|computer vision|nlp|natural language|robotics|autonomous|blockchain|cryptograph|database|distributed|cloud|microservice|compiler|operating system|cybersecurity|quantum comput|software engineer|retrieval|embedding|vector|attention|fine.tun|prompt|inference|benchmark)\b/i;
+function classifyDomain(query) {
+  if (HEALTH_KEYWORDS.test(query)) return "health";
+  if (CS_KEYWORDS.test(query)) return "cs";
+  return "general";
+}
+// ── ArXiv ──────────────────────────────────────────────────────────────────
+function parseArxivXml(xml) {
+  const papers = [];
+  const entries = xml.match(/<entry>([\s\S]*?)<\/entry>/g) || [];
+  for (const entry of entries) {
+    const id =
+      (
+        entry.match(/<id>https?:\/\/arxiv\.org\/abs\/([^<]+)<\/id>/) || []
+      )[1] || "";
+    const title =
+      (entry.match(/<title>([\s\S]*?)<\/title>/) || [])[1]
+        ?.replace(/\s+/g, " ")
+        .trim() || "";
+    const abstract =
+      (entry.match(/<summary>([\s\S]*?)<\/summary>/) || [])[1]
+        ?.replace(/\s+/g, " ")
+        .trim() || "";
+    const published =
+      (
+        entry.match(/<published>([^<]+)<\/published>/) || []
+      )[1]?.slice(0, 10) || "";
+    const authors = [...entry.matchAll(/<name>([^<]+)<\/name>/g)].map((m) =>
+      m[1].trim()
+    );
+    const doi =
+      (entry.match(/<arxiv:doi[^>]*>([^<]+)<\/arxiv:doi>/) || [])[1] || "";
+    if (!id || !title) continue;
+    papers.push({
+      id: `arxiv:${id}`,
+      source: "arxiv",
+      title,
+      authors,
+      abstract,
+      published,
+      doi,
+      url: `https://arxiv.org/abs/${id}`,
+      pdf_url: `https://arxiv.org/pdf/${id}`,
+    });
+  }
+  return papers;
+}
+async function searchArxiv(query, maxResults) {
+  const url = `${ARXIV_BASE}?search_query=all:${encodeURIComponent(query)}&start=0&max_results=${maxResults}&sortBy=relevance&sortOrder=descending`;
+  const res = await fetch(url);
+  if (!res.ok) return [];
+  return parseArxivXml(await res.text());
+}
+async function fetchArxivById(arxivId) {
+  const clean = arxivId.replace(/^arxiv:/i, "").replace(/^.*abs\//, "").trim();
+  const url = `${ARXIV_BASE}?id_list=${clean}`;
+  const res = await fetch(url);
+  if (!res.ok) return null;
+  const papers = parseArxivXml(await res.text());
+  return papers[0] || null;
+}
+// ── PubMed ─────────────────────────────────────────────────────────────────
+async function searchPubMed(query, maxResults) {
+  const searchUrl = `${PUBMED_BASE}/esearch.fcgi?db=pubmed&term=${encodeURIComponent(query)}&retmax=${maxResults}&retmode=json&sort=relevance&${PUBMED_PARAMS}`;
+  const searchRes = await fetch(searchUrl);
+  if (!searchRes.ok) return [];
+  const searchData = await searchRes.json();
+  const pmids = searchData?.esearchresult?.idlist || [];
+  if (!pmids.length) return [];
+  // Fetch summaries (title, authors, date)
+  const summaryUrl = `${PUBMED_BASE}/esummary.fcgi?db=pubmed&id=${pmids.join(",")}&retmode=json&${PUBMED_PARAMS}`;
+  const summaryRes = await fetch(summaryUrl);
+  if (!summaryRes.ok) return [];
+  const summaryData = await summaryRes.json();
+  const result = summaryData?.result || {};
+  // Fetch abstracts via efetch
+  const abstracts = await fetchPubMedAbstracts(pmids);
+  return pmids
+    .map((pmid) => {
+      const item = result[pmid];
+      if (!item || !item.title) return null;
+      const doi =
+        (Array.isArray(item.articleids) ? item.articleids : []).find(
+          (e) => e.idtype === "doi"
+        )?.value || "";
+      return {
+        id: `pubmed:${pmid}`,
+        source: "pubmed",
+        title: item.title.trim(),
+        authors: Array.isArray(item.authors)
+          ? item.authors.map((a) => a.name).filter(Boolean)
+          : [],
+        abstract: abstracts[pmid] || "",
+        published: item.epubdate || item.pubdate || "",
+        doi,
+        url: `https://pubmed.ncbi.nlm.nih.gov/${pmid}/`,
+      };
+    })
+    .filter(Boolean);
+}
+async function fetchPubMedAbstracts(pmids) {
+  const url = `${PUBMED_BASE}/efetch.fcgi?db=pubmed&id=${pmids.join(",")}&retmode=xml&rettype=abstract&${PUBMED_PARAMS}`;
+  const res = await fetch(url);
+  if (!res.ok) return {};
+  const xml = await res.text();
+  const out = {};
+  for (const article of xml.match(/<PubmedArticle>[\s\S]*?<\/PubmedArticle>/g) || []) {
+    const pmidMatch = article.match(/<PMID[^>]*>(\d+)<\/PMID>/);
+    if (!pmidMatch) continue;
+    const parts = [...article.matchAll(/<AbstractText[^>]*>([\s\S]*?)<\/AbstractText>/g)];
+    if (parts.length) {
+      out[pmidMatch[1]] = parts.map((m) => m[1].replace(/\s+/g, " ").trim()).join(" ");
+    }
+  }
+  return out;
+}
+// ── Semantic Scholar ───────────────────────────────────────────────────────
+async function searchSemanticScholar(query, maxResults) {
+  try {
+    const fields = "title,abstract,authors,year,citationCount,openAccessPdf,externalIds";
+    const url = `${SEMANTIC_SCHOLAR_BASE}/paper/search?query=${encodeURIComponent(query)}&limit=${maxResults}&fields=${fields}`;
+    const controller = new AbortController();
+    const timeout = setTimeout(() => controller.abort(), 10000);
+    let response;
+    try {
+      response = await fetch(url, { signal: controller.signal });
+    } finally {
+      clearTimeout(timeout);
+    }
+    if (!response.ok) return [];
+    const data = await response.json().catch(() => null);
+    if (!data?.data) return [];
+    return data.data
+      .map((item) => {
+        if (!item.paperId || !item.title || !item.abstract) return null;
+        const ext = item.externalIds || {};
+        const doi = ext.DOI || "";
+        const arxivId = ext.ArXiv || "";
+        let paperUrl;
+        if (arxivId) paperUrl = `https://arxiv.org/abs/${arxivId}`;
+        else if (doi) paperUrl = `https://doi.org/${doi}`;
+        else paperUrl = `https://www.semanticscholar.org/paper/${item.paperId}`;
+        return {
+          id: `s2:${item.paperId}`,
+          source: "semanticscholar",
+          title: (item.title || "").replace(/\s+/g, " ").trim(),
+          authors: Array.isArray(item.authors) ? item.authors.map((a) => a.name).filter(Boolean) : [],
+          abstract: (item.abstract || "").replace(/\s+/g, " ").trim(),
+          published: item.year ? `${item.year}` : "",
+          doi,
+          url: paperUrl,
+          pdf_url: item.openAccessPdf?.url || "",
+          citations: typeof item.citationCount === "number" ? item.citationCount : 0,
+        };
+      })
+      .filter(Boolean)
+      .sort((a, b) => b.citations - a.citations);
+  } catch {
+    return [];
+  }
+}
+// ── MCP Server ─────────────────────────────────────────────────────────────
+const server = new McpServer({
+  name: "paperplain",
+  version: "1.1.0",
+  description:
+    "Search 200M+ peer-reviewed papers from PubMed, ArXiv, and Semantic Scholar. Returns papers with full abstracts — use your own model to synthesize findings.",
+});
+// Tool 1: search_research
+server.tool(
+  "search_research",
+  `Search PubMed, ArXiv, and Semantic Scholar for peer-reviewed papers on any topic.
+Auto-routes health/medical queries to PubMed, CS/AI to ArXiv + Semantic Scholar, general to all three.
+Returns papers with titles, authors, abstracts, and source URLs.
+Use the returned abstracts to synthesize findings, answer the user's question, or cite specific claims.`,
+  {
+    query: z
+      .string()
+      .describe(
+        "Natural language research question or topic, e.g. 'effects of sleep deprivation on memory consolidation'"
+      ),
+    max_results: z
+      .number()
+      .min(1)
+      .max(10)
+      .optional()
+      .default(5)
+      .describe("Number of papers to return (1-10, default 5)"),
+    domain: z
+      .enum(["health", "cs", "general", "auto"])
+      .optional()
+      .default("auto")
+      .describe(
+        "Force a specific database: health=PubMed+S2, cs=ArXiv+S2, general=all three, auto=detect from query"
+      ),
+  },
+  async ({ query, max_results, domain }) => {
+    const resolvedDomain = domain === "auto" ? classifyDomain(query) : domain;
+    let papers = [];
+    let sources = [];
+    try {
+      if (resolvedDomain === "health") {
+        // PubMed primary, Semantic Scholar as fill
+        let pubmedPapers = await searchPubMed(query, max_results);
+        if (pubmedPapers.length) sources.push("pubmed");
+        if (pubmedPapers.length < max_results) {
+          const s2 = await searchSemanticScholar(query, max_results - pubmedPapers.length);
+          if (s2.length) sources.push("semanticscholar");
+          const seen = new Set(pubmedPapers.map((p) => p.id));
+          for (const p of s2) if (!seen.has(p.id)) pubmedPapers.push(p);
+        }
+        papers = pubmedPapers.slice(0, max_results);
+      } else if (resolvedDomain === "cs") {
+        // ArXiv + Semantic Scholar, deduplicate overlaps
+        const [arxiv, s2] = await Promise.all([
+          searchArxiv(query, max_results),
+          searchSemanticScholar(query, Math.ceil(max_results / 2)),
+        ]);
+        if (arxiv.length) sources.push("arxiv");
+        if (s2.length) sources.push("semanticscholar");
+        const maxArxiv = Math.ceil(max_results * 0.6);
+        const arxivIds = new Set(arxiv.map((p) => p.id));
+        const uniqueS2 = s2.filter((p) => !arxivIds.has(p.id));
+        papers = [
+          ...arxiv.slice(0, maxArxiv),
+          ...uniqueS2.slice(0, max_results - Math.min(arxiv.length, maxArxiv)),
+        ].slice(0, max_results);
+      } else {
+        // General: all three sources interleaved
+        const [arxiv, pubmed, s2] = await Promise.all([
+          searchArxiv(query, max_results),
+          searchPubMed(query, max_results),
+          searchSemanticScholar(query, Math.ceil(max_results / 2)),
+        ]);
+        if (arxiv.length) sources.push("arxiv");
+        if (pubmed.length) sources.push("pubmed");
+        if (s2.length) sources.push("semanticscholar");
+        const maxEach = Math.floor(max_results / 3);
+        const remainder = max_results - maxEach * 3;
+        papers = [
+          ...arxiv.slice(0, maxEach + remainder),
+          ...pubmed.slice(0, maxEach),
+          ...s2.slice(0, maxEach),
+        ].slice(0, max_results);
+      }
+      return {
+        content: [
+          {
+            type: "text",
+            text: JSON.stringify(
+              {
+                query,
+                domain: resolvedDomain,
+                sources_searched: sources,
+                total: papers.length,
+                papers: papers.map((p) => ({
+                  id: p.id,
+                  source: p.source,
+                  title: p.title,
+                  authors: p.authors.slice(0, 4),
+                  published: p.published,
+                  abstract: p.abstract || "(abstract not available)",
+                  url: p.url,
+                  doi: p.doi || undefined,
+                  ...(p.citations > 0 ? { citations: p.citations } : {}),
+                })),
+              },
+              null,
+              2
+            ),
+          },
+        ],
+      };
+    } catch (err) {
+      return {
+        content: [{ type: "text", text: `Search failed: ${err.message}` }],
+        isError: true,
+      };
+    }
+  }
+);
+// Tool 2: fetch_paper
+server.tool(
+  "fetch_paper",
+  `Fetch the full abstract and metadata for a specific paper by ID.
+Supports ArXiv IDs (e.g. '2301.07041' or 'arxiv:2301.07041') and PubMed IDs (e.g. 'pubmed:37183813' or just '37183813').
+Use this to get the full abstract of a paper you already know about.`,
+  {
+    paper_id: z
+      .string()
+      .describe(
+        "ArXiv ID (e.g. '2301.07041') or PubMed ID (e.g. 'pubmed:37183813')"
+      ),
+  },
+  async ({ paper_id }) => {
+    try {
+      const isArxiv =
+        /arxiv:/i.test(paper_id) ||
+        /^\d{4}\.\d{4,5}$/.test(paper_id.trim()) ||
+        /arxiv\.org/.test(paper_id);
+      const isPubMed = /pubmed:/i.test(paper_id) || /^\d{6,9}$/.test(paper_id.trim());
+      let paper = null;
+      if (isArxiv) {
+        paper = await fetchArxivById(paper_id);
+      } else if (isPubMed) {
+        const pmid = paper_id.replace(/^pubmed:/i, "").trim();
+        const abstracts = await fetchPubMedAbstracts([pmid]);
+        const summaryUrl = `${PUBMED_BASE}/esummary.fcgi?db=pubmed&id=${pmid}&retmode=json&${PUBMED_PARAMS}`;
+        const summaryRes = await fetch(summaryUrl);
+        if (summaryRes.ok) {
+          const data = await summaryRes.json();
+          const item = data?.result?.[pmid];
+          if (item) {
+            paper = {
+              id: `pubmed:${pmid}`,
+              source: "pubmed",
+              title: item.title?.trim() || "",
+              authors: Array.isArray(item.authors) ? item.authors.map((a) => a.name) : [],
+              abstract: abstracts[pmid] || "",
+              published: item.epubdate || item.pubdate || "",
+              doi: (Array.isArray(item.articleids) ? item.articleids : []).find((e) => e.idtype === "doi")?.value || "",
+              url: `https://pubmed.ncbi.nlm.nih.gov/${pmid}/`,
+            };
+          }
+        }
+      }
+      if (!paper) {
+        return {
+          content: [{ type: "text", text: `Paper not found: ${paper_id}` }],
+          isError: true,
+        };
+      }
+      return {
+        content: [{ type: "text", text: JSON.stringify(paper, null, 2) }],
+      };
+    } catch (err) {
+      return {
+        content: [{ type: "text", text: `Fetch failed: ${err.message}` }],
+        isError: true,
+      };
+    }
+  }
+);
+const transport = new StdioServerTransport();
+await server.connect(transport);