paperplain-mcp 1.1.1 → 1.2.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (2) hide show
  1. package/package.json +1 -1
  2. package/server.js +309 -82
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "paperplain-mcp",
3
- "version": "1.1.1",
3
+ "version": "1.2.2",
4
4
  "description": "MCP server — search 200M+ peer-reviewed papers from PubMed, ArXiv, and Semantic Scholar. Free. No API key.",
5
5
  "type": "module",
6
6
  "bin": {
package/server.js CHANGED
@@ -15,14 +15,17 @@ const PUBMED_PARAMS = "tool=paperplain&email=hello@paperplain.io";
15
15
  const SEMANTIC_SCHOLAR_BASE = "https://api.semanticscholar.org/graph/v1";
16
16
 
17
17
  // ── Domain classifier (keyword-based, no LLM needed) ───────────────────────
18
+ // Note: "energy" intentionally excluded from health — it's more common in
19
+ // CS/engineering contexts (energy management, HEMS, smart grid) than health.
18
20
  const HEALTH_KEYWORDS =
19
- /\b(sleep|insomnia|anxiety|anxious|stress|depress|pain|ache|headache|migraine|diet|nutrition|weight|obese|exercise|fatigue|tired|energy|focus|adhd|autism|cancer|diabetes|blood|pressure|heart|cholesterol|vitamin|supplement|immune|gut|digestion|mental health|therapy|meditation|mindfulness|mood|burnout|inflammation|allergy|asthma|skin|aging|memory|alzheimer|cognitive|brain|alcohol|smoking|addiction|symptoms|treatment|medicine|medication|dose|chronic|surgery|vaccine|antibiot|clinical|patient|disease|disorder|syndrome|injury|rehabilitation|psychiatric|neurol|cardio|oncol|gastro|pediatr|geriatric)\b/i;
21
+ /\b(sleep|insomnia|anxiety|anxious|stress|depress|pain|ache|headache|migraine|diet|nutrition|weight|obese|exercise|fatigue|tired|focus|adhd|autism|cancer|diabetes|blood|pressure|heart|cholesterol|vitamin|supplement|immune|gut|digestion|mental health|therapy|meditation|mindfulness|mood|burnout|inflammation|allergy|asthma|skin|aging|memory|alzheimer|cognitive|brain|alcohol|smoking|addiction|symptoms|treatment|medicine|medication|dose|chronic|surgery|vaccine|antibiot|clinical|patient|disease|disorder|syndrome|injury|rehabilitation|psychiatric|neurol|cardio|oncol|gastro|pediatr|geriatric)\b/i;
20
22
  const CS_KEYWORDS =
21
- /\b(algorithm|neural network|machine learning|deep learning|transformer|llm|language model|reinforcement|classification|clustering|regression|computer vision|nlp|natural language|robotics|autonomous|blockchain|cryptograph|database|distributed|cloud|microservice|compiler|operating system|cybersecurity|quantum comput|software engineer|retrieval|embedding|vector|attention|fine.tun|prompt|inference|benchmark)\b/i;
23
+ /\b(algorithm|neural network|machine learning|deep learning|transformer|llm|large language model|language model|reinforcement|classification|clustering|regression|computer vision|nlp|natural language|robotics|autonomous|blockchain|cryptograph|database|distributed|cloud|microservice|compiler|operating system|cybersecurity|quantum comput|software engineer|retrieval|embedding|vector|attention|fine.tun|prompt|inference|benchmark|agentic|multi.agent|smart grid|demand response|energy management|HEMS|home energy|building energy|V2G|vehicle.to.grid|EV charging|electric vehicle|battery storage|renewable energy|solar|wind power|forecasting|optimization|scheduling|control system|model predictive|reinforcement learning)\b/i;
22
24
 
23
25
  function classifyDomain(query) {
24
- if (HEALTH_KEYWORDS.test(query)) return "health";
26
+ // CS check runs first — engineering/AI topics should not fall into health
25
27
  if (CS_KEYWORDS.test(query)) return "cs";
28
+ if (HEALTH_KEYWORDS.test(query)) return "health";
26
29
  return "general";
27
30
  }
28
31
 
@@ -68,63 +71,129 @@ function parseArxivXml(xml) {
68
71
  return papers;
69
72
  }
70
73
 
74
+ async function fetchWithTimeout(url, ms = 10000) {
75
+ const controller = new AbortController();
76
+ const timer = setTimeout(() => controller.abort(), ms);
77
+ try {
78
+ return await fetch(url, { signal: controller.signal });
79
+ } finally {
80
+ clearTimeout(timer);
81
+ }
82
+ }
83
+
71
84
  async function searchArxiv(query, maxResults) {
72
85
  const url = `${ARXIV_BASE}?search_query=all:${encodeURIComponent(query)}&start=0&max_results=${maxResults}&sortBy=relevance&sortOrder=descending`;
73
- const res = await fetch(url);
74
- if (!res.ok) return [];
75
- return parseArxivXml(await res.text());
86
+ try {
87
+ const res = await fetchWithTimeout(url);
88
+ if (!res.ok) return [];
89
+ const papers = parseArxivXml(await res.text());
90
+ // If broad search returns nothing, retry with title-field search
91
+ if (papers.length === 0) {
92
+ const titleUrl = `${ARXIV_BASE}?search_query=ti:${encodeURIComponent(query)}&start=0&max_results=${maxResults}&sortBy=relevance&sortOrder=descending`;
93
+ const titleRes = await fetchWithTimeout(titleUrl);
94
+ if (titleRes.ok) return parseArxivXml(await titleRes.text());
95
+ }
96
+ return papers;
97
+ } catch {
98
+ return [];
99
+ }
100
+ }
101
+
102
+ function normalizeArxivId(arxivId) {
103
+ return arxivId
104
+ .replace(/^arxiv:/i, "")
105
+ .replace(/^https?:\/\/arxiv\.org\/(abs|pdf)\//, "")
106
+ .replace(/\.pdf$/i, "")
107
+ .trim();
76
108
  }
77
109
 
78
110
  async function fetchArxivById(arxivId) {
79
- const clean = arxivId.replace(/^arxiv:/i, "").replace(/^.*abs\//, "").trim();
80
- const url = `${ARXIV_BASE}?id_list=${clean}`;
81
- const res = await fetch(url);
82
- if (!res.ok) return null;
83
- const papers = parseArxivXml(await res.text());
84
- return papers[0] || null;
111
+ const clean = normalizeArxivId(arxivId);
112
+ try {
113
+ const res = await fetchWithTimeout(`${ARXIV_BASE}?id_list=${encodeURIComponent(clean)}`);
114
+ if (!res.ok) return null;
115
+ const papers = parseArxivXml(await res.text());
116
+ return papers[0] || null;
117
+ } catch {
118
+ return null;
119
+ }
120
+ }
121
+
122
+ async function fetchS2ByArxivId(arxivId) {
123
+ // S2 accepts ARXIV: prefix — useful as fallback when ArXiv API is rate-limited
124
+ const clean = normalizeArxivId(arxivId).replace(/v\d+$/i, ""); // strip version for S2
125
+ try {
126
+ const fields = "title,abstract,authors,year,citationCount,openAccessPdf,externalIds";
127
+ const res = await fetchWithTimeout(
128
+ `${SEMANTIC_SCHOLAR_BASE}/paper/ARXIV:${encodeURIComponent(clean)}?fields=${fields}`
129
+ );
130
+ if (!res.ok) return null;
131
+ const item = await res.json().catch(() => null);
132
+ if (!item || !item.paperId || !item.title) return null;
133
+ const ext = item.externalIds || {};
134
+ const doi = ext.DOI || "";
135
+ return {
136
+ id: `arxiv:${clean}`,
137
+ source: "arxiv",
138
+ title: (item.title || "").replace(/\s+/g, " ").trim(),
139
+ authors: Array.isArray(item.authors) ? item.authors.map((a) => a.name).filter(Boolean) : [],
140
+ abstract: (item.abstract || "").replace(/\s+/g, " ").trim(),
141
+ published: item.year ? `${item.year}` : "",
142
+ doi,
143
+ url: `https://arxiv.org/abs/${clean}`,
144
+ pdf_url: item.openAccessPdf?.url || `https://arxiv.org/pdf/${clean}`,
145
+ citations: typeof item.citationCount === "number" ? item.citationCount : 0,
146
+ };
147
+ } catch {
148
+ return null;
149
+ }
85
150
  }
86
151
 
87
152
  // ── PubMed ─────────────────────────────────────────────────────────────────
88
153
  async function searchPubMed(query, maxResults) {
89
- const searchUrl = `${PUBMED_BASE}/esearch.fcgi?db=pubmed&term=${encodeURIComponent(query)}&retmax=${maxResults}&retmode=json&sort=relevance&${PUBMED_PARAMS}`;
90
- const searchRes = await fetch(searchUrl);
91
- if (!searchRes.ok) return [];
92
- const searchData = await searchRes.json();
93
- const pmids = searchData?.esearchresult?.idlist || [];
94
- if (!pmids.length) return [];
95
-
96
- // Fetch summaries (title, authors, date)
97
- const summaryUrl = `${PUBMED_BASE}/esummary.fcgi?db=pubmed&id=${pmids.join(",")}&retmode=json&${PUBMED_PARAMS}`;
98
- const summaryRes = await fetch(summaryUrl);
99
- if (!summaryRes.ok) return [];
100
- const summaryData = await summaryRes.json();
101
- const result = summaryData?.result || {};
102
-
103
- // Fetch abstracts via efetch
104
- const abstracts = await fetchPubMedAbstracts(pmids);
105
-
106
- return pmids
107
- .map((pmid) => {
108
- const item = result[pmid];
109
- if (!item || !item.title) return null;
110
- const doi =
111
- (Array.isArray(item.articleids) ? item.articleids : []).find(
112
- (e) => e.idtype === "doi"
113
- )?.value || "";
114
- return {
115
- id: `pubmed:${pmid}`,
116
- source: "pubmed",
117
- title: item.title.trim(),
118
- authors: Array.isArray(item.authors)
119
- ? item.authors.map((a) => a.name).filter(Boolean)
120
- : [],
121
- abstract: abstracts[pmid] || "",
122
- published: item.epubdate || item.pubdate || "",
123
- doi,
124
- url: `https://pubmed.ncbi.nlm.nih.gov/${pmid}/`,
125
- };
126
- })
127
- .filter(Boolean);
154
+ try {
155
+ const searchUrl = `${PUBMED_BASE}/esearch.fcgi?db=pubmed&term=${encodeURIComponent(query)}&retmax=${maxResults}&retmode=json&sort=relevance&${PUBMED_PARAMS}`;
156
+ const searchRes = await fetchWithTimeout(searchUrl);
157
+ if (!searchRes.ok) return [];
158
+ const searchData = await searchRes.json();
159
+ const pmids = searchData?.esearchresult?.idlist || [];
160
+ if (!pmids.length) return [];
161
+
162
+ const summaryUrl = `${PUBMED_BASE}/esummary.fcgi?db=pubmed&id=${pmids.join(",")}&retmode=json&${PUBMED_PARAMS}`;
163
+ const summaryRes = await fetchWithTimeout(summaryUrl);
164
+ if (!summaryRes.ok) return [];
165
+ const summaryData = await summaryRes.json();
166
+ const result = summaryData?.result || {};
167
+
168
+ const abstracts = await fetchPubMedAbstracts(pmids);
169
+
170
+ return pmids
171
+ .map((pmid) => {
172
+ const item = result[pmid];
173
+ if (!item || !item.title) return null;
174
+ const abstract = abstracts[pmid] || "";
175
+ if (!abstract) return null; // skip papers with no abstract — useless for synthesis
176
+ const doi =
177
+ (Array.isArray(item.articleids) ? item.articleids : []).find(
178
+ (e) => e.idtype === "doi"
179
+ )?.value || "";
180
+ return {
181
+ id: `pubmed:${pmid}`,
182
+ source: "pubmed",
183
+ title: item.title.trim(),
184
+ authors: Array.isArray(item.authors)
185
+ ? item.authors.map((a) => a.name).filter(Boolean)
186
+ : [],
187
+ abstract,
188
+ published: item.epubdate || item.pubdate || "",
189
+ doi,
190
+ url: `https://pubmed.ncbi.nlm.nih.gov/${pmid}/`,
191
+ };
192
+ })
193
+ .filter(Boolean);
194
+ } catch {
195
+ return [];
196
+ }
128
197
  }
129
198
 
130
199
  async function fetchPubMedAbstracts(pmids) {
@@ -193,7 +262,7 @@ async function searchSemanticScholar(query, maxResults) {
193
262
  // ── MCP Server ─────────────────────────────────────────────────────────────
194
263
  const server = new McpServer({
195
264
  name: "paperplain",
196
- version: "1.1.0",
265
+ version: "1.2.2",
197
266
  description:
198
267
  "Search 200M+ peer-reviewed papers from PubMed, ArXiv, and Semantic Scholar. Returns papers with full abstracts — use your own model to synthesize findings.",
199
268
  });
@@ -229,28 +298,45 @@ Use the returned abstracts to synthesize findings, answer the user's question, o
229
298
  async ({ query, max_results, domain }) => {
230
299
  const resolvedDomain = domain === "auto" ? classifyDomain(query) : domain;
231
300
  let papers = [];
232
- let sources = [];
301
+ // Track each source: "ok" | "empty" | "error"
302
+ const sourceStatus = {};
303
+
304
+ async function safeArxiv(q, n) {
305
+ try {
306
+ const r = await searchArxiv(q, n);
307
+ sourceStatus.arxiv = r.length ? "ok" : "empty";
308
+ return r;
309
+ } catch { sourceStatus.arxiv = "error"; return []; }
310
+ }
311
+ async function safePubMed(q, n) {
312
+ try {
313
+ const r = await searchPubMed(q, n);
314
+ sourceStatus.pubmed = r.length ? "ok" : "empty";
315
+ return r;
316
+ } catch { sourceStatus.pubmed = "error"; return []; }
317
+ }
318
+ async function safeS2(q, n) {
319
+ try {
320
+ const r = await searchSemanticScholar(q, n);
321
+ sourceStatus.semanticscholar = r.length ? "ok" : "empty";
322
+ return r;
323
+ } catch { sourceStatus.semanticscholar = "error"; return []; }
324
+ }
233
325
 
234
326
  try {
235
327
  if (resolvedDomain === "health") {
236
- // PubMed primary, Semantic Scholar as fill
237
- let pubmedPapers = await searchPubMed(query, max_results);
238
- if (pubmedPapers.length) sources.push("pubmed");
328
+ let pubmedPapers = await safePubMed(query, max_results);
239
329
  if (pubmedPapers.length < max_results) {
240
- const s2 = await searchSemanticScholar(query, max_results - pubmedPapers.length);
241
- if (s2.length) sources.push("semanticscholar");
330
+ const s2 = await safeS2(query, max_results - pubmedPapers.length);
242
331
  const seen = new Set(pubmedPapers.map((p) => p.id));
243
332
  for (const p of s2) if (!seen.has(p.id)) pubmedPapers.push(p);
244
333
  }
245
334
  papers = pubmedPapers.slice(0, max_results);
246
335
  } else if (resolvedDomain === "cs") {
247
- // ArXiv + Semantic Scholar, deduplicate overlaps
248
336
  const [arxiv, s2] = await Promise.all([
249
- searchArxiv(query, max_results),
250
- searchSemanticScholar(query, Math.ceil(max_results / 2)),
337
+ safeArxiv(query, max_results),
338
+ safeS2(query, Math.ceil(max_results / 2)),
251
339
  ]);
252
- if (arxiv.length) sources.push("arxiv");
253
- if (s2.length) sources.push("semanticscholar");
254
340
  const maxArxiv = Math.ceil(max_results * 0.6);
255
341
  const arxivIds = new Set(arxiv.map((p) => p.id));
256
342
  const uniqueS2 = s2.filter((p) => !arxivIds.has(p.id));
@@ -259,15 +345,11 @@ Use the returned abstracts to synthesize findings, answer the user's question, o
259
345
  ...uniqueS2.slice(0, max_results - Math.min(arxiv.length, maxArxiv)),
260
346
  ].slice(0, max_results);
261
347
  } else {
262
- // General: all three sources interleaved
263
348
  const [arxiv, pubmed, s2] = await Promise.all([
264
- searchArxiv(query, max_results),
265
- searchPubMed(query, max_results),
266
- searchSemanticScholar(query, Math.ceil(max_results / 2)),
349
+ safeArxiv(query, max_results),
350
+ safePubMed(query, max_results),
351
+ safeS2(query, Math.ceil(max_results / 2)),
267
352
  ]);
268
- if (arxiv.length) sources.push("arxiv");
269
- if (pubmed.length) sources.push("pubmed");
270
- if (s2.length) sources.push("semanticscholar");
271
353
  const maxEach = Math.floor(max_results / 3);
272
354
  const remainder = max_results - maxEach * 3;
273
355
  papers = [
@@ -277,6 +359,18 @@ Use the returned abstracts to synthesize findings, answer the user's question, o
277
359
  ].slice(0, max_results);
278
360
  }
279
361
 
362
+ // Warn if expected sources came back empty or errored
363
+ const warnings = [];
364
+ const expectedSources = resolvedDomain === "health"
365
+ ? ["pubmed", "semanticscholar"]
366
+ : resolvedDomain === "cs"
367
+ ? ["arxiv", "semanticscholar"]
368
+ : ["arxiv", "pubmed", "semanticscholar"];
369
+ for (const src of expectedSources) {
370
+ if (sourceStatus[src] === "empty") warnings.push(`${src}: returned 0 results (API may be rate-limited or query too specific)`);
371
+ if (sourceStatus[src] === "error") warnings.push(`${src}: request failed (API may be temporarily unavailable)`);
372
+ }
373
+
280
374
  return {
281
375
  content: [
282
376
  {
@@ -285,7 +379,8 @@ Use the returned abstracts to synthesize findings, answer the user's question, o
285
379
  {
286
380
  query,
287
381
  domain: resolvedDomain,
288
- sources_searched: sources,
382
+ source_status: sourceStatus,
383
+ ...(warnings.length ? { warnings } : {}),
289
384
  total: papers.length,
290
385
  papers: papers.map((p) => ({
291
386
  id: p.id,
@@ -314,33 +409,79 @@ Use the returned abstracts to synthesize findings, answer the user's question, o
314
409
  }
315
410
  );
316
411
 
412
+ // ── Semantic Scholar single-paper lookup (by DOI or S2 paper ID) ───────────
413
+ async function fetchS2ByDoi(doi) {
414
+ try {
415
+ const clean = doi.replace(/^doi:/i, "").trim();
416
+ const fields = "title,abstract,authors,year,citationCount,openAccessPdf,externalIds";
417
+ const res = await fetchWithTimeout(
418
+ `${SEMANTIC_SCHOLAR_BASE}/paper/DOI:${encodeURIComponent(clean)}?fields=${fields}`
419
+ );
420
+ if (!res.ok) return null;
421
+ const item = await res.json().catch(() => null);
422
+ if (!item || !item.paperId || !item.title) return null;
423
+ const ext = item.externalIds || {};
424
+ const arxivId = ext.ArXiv || "";
425
+ let url;
426
+ if (arxivId) url = `https://arxiv.org/abs/${arxivId}`;
427
+ else if (clean) url = `https://doi.org/${clean}`;
428
+ else url = `https://www.semanticscholar.org/paper/${item.paperId}`;
429
+ return {
430
+ id: `s2:${item.paperId}`,
431
+ source: "semanticscholar",
432
+ title: (item.title || "").replace(/\s+/g, " ").trim(),
433
+ authors: Array.isArray(item.authors) ? item.authors.map((a) => a.name).filter(Boolean) : [],
434
+ abstract: (item.abstract || "").replace(/\s+/g, " ").trim(),
435
+ published: item.year ? `${item.year}` : "",
436
+ doi: clean,
437
+ url,
438
+ pdf_url: item.openAccessPdf?.url || "",
439
+ citations: typeof item.citationCount === "number" ? item.citationCount : 0,
440
+ };
441
+ } catch {
442
+ return null;
443
+ }
444
+ }
445
+
317
446
  // Tool 2: fetch_paper
318
447
  server.tool(
319
448
  "fetch_paper",
320
- `Fetch the full abstract and metadata for a specific paper by ID.
321
- Supports ArXiv IDs (e.g. '2301.07041' or 'arxiv:2301.07041') and PubMed IDs (e.g. 'pubmed:37183813' or just '37183813').
322
- Use this to get the full abstract of a paper you already know about.`,
449
+ `Fetch the full abstract and metadata for a specific paper by ID or DOI.
450
+ Supports:
451
+ - ArXiv IDs: '2301.07041', 'arxiv:2301.07041v2', 'https://arxiv.org/abs/2301.07041'
452
+ - PubMed IDs: 'pubmed:37183813' or just '37183813'
453
+ - DOIs: '10.1145/3290605.3300857' or 'doi:10.1145/3290605.3300857' (looks up via Semantic Scholar)
454
+ Use this to verify a specific paper you already know about or to retrieve its abstract.`,
323
455
  {
324
456
  paper_id: z
325
457
  .string()
326
458
  .describe(
327
- "ArXiv ID (e.g. '2301.07041') or PubMed ID (e.g. 'pubmed:37183813')"
459
+ "ArXiv ID, PubMed ID, or DOI — e.g. '2301.07041', 'pubmed:37183813', or '10.1145/3290605.3300857'"
328
460
  ),
329
461
  },
330
462
  async ({ paper_id }) => {
331
463
  try {
464
+ const trimmed = paper_id.trim();
332
465
  const isArxiv =
333
- /arxiv:/i.test(paper_id) ||
334
- /^\d{4}\.\d{4,5}$/.test(paper_id.trim()) ||
335
- /arxiv\.org/.test(paper_id);
336
- const isPubMed = /pubmed:/i.test(paper_id) || /^\d{6,9}$/.test(paper_id.trim());
466
+ /arxiv:/i.test(trimmed) ||
467
+ /^\d{4}\.\d{4,5}(v\d+)?$/.test(trimmed) || // 2301.07041 or 2301.07041v2
468
+ /^[a-z-]+(\.[A-Z]+)?\/\d{7}(v\d+)?$/.test(trimmed) || // old format: cs.LG/0504010
469
+ /arxiv\.org/.test(trimmed);
470
+ const isPubMed =
471
+ /pubmed:/i.test(trimmed) || /^\d{6,9}$/.test(trimmed);
472
+ const isDOI =
473
+ /^doi:/i.test(trimmed) || /^10\.\d{4,}\/\S+$/.test(trimmed);
337
474
 
338
475
  let paper = null;
339
476
 
340
477
  if (isArxiv) {
341
- paper = await fetchArxivById(paper_id);
478
+ paper = await fetchArxivById(trimmed);
479
+ // Fallback: ArXiv API rate-limits under parallel load — try S2 ARXIV: endpoint
480
+ if (!paper) paper = await fetchS2ByArxivId(trimmed);
481
+ } else if (isDOI) {
482
+ paper = await fetchS2ByDoi(trimmed);
342
483
  } else if (isPubMed) {
343
- const pmid = paper_id.replace(/^pubmed:/i, "").trim();
484
+ const pmid = trimmed.replace(/^pubmed:/i, "").trim();
344
485
  const abstracts = await fetchPubMedAbstracts([pmid]);
345
486
  const summaryUrl = `${PUBMED_BASE}/esummary.fcgi?db=pubmed&id=${pmid}&retmode=json&${PUBMED_PARAMS}`;
346
487
  const summaryRes = await fetch(summaryUrl);
@@ -364,7 +505,10 @@ Use this to get the full abstract of a paper you already know about.`,
364
505
 
365
506
  if (!paper) {
366
507
  return {
367
- content: [{ type: "text", text: `Paper not found: ${paper_id}` }],
508
+ content: [{
509
+ type: "text",
510
+ text: `Paper not found: ${paper_id}\n\nTip: For arXiv papers, try the bare ID (e.g. '2301.07041'). For journal papers, try the DOI (e.g. '10.1145/3290605.3300857'). For PubMed papers, use the PMID number.`,
511
+ }],
368
512
  isError: true,
369
513
  };
370
514
  }
@@ -381,5 +525,88 @@ Use this to get the full abstract of a paper you already know about.`,
381
525
  }
382
526
  );
383
527
 
528
+ // Tool 3: find_paper_by_title
529
+ server.tool(
530
+ "find_paper_by_title",
531
+ `Find a specific paper when you only know its title (or partial title).
532
+ Uses Semantic Scholar's title-match search. Returns the closest match with full abstract, authors, DOI, and source URL.
533
+ Useful for verifying a citation or retrieving abstract details for a paper you already know exists.`,
534
+ {
535
+ title: z
536
+ .string()
537
+ .describe("The paper title or a key phrase from it, e.g. 'Attention Is All You Need'"),
538
+ year: z
539
+ .number()
540
+ .optional()
541
+ .describe("Publication year to narrow down the match (optional)"),
542
+ },
543
+ async ({ title, year }) => {
544
+ try {
545
+ const fields = "title,abstract,authors,year,citationCount,openAccessPdf,externalIds";
546
+ const url = `${SEMANTIC_SCHOLAR_BASE}/paper/search?query=${encodeURIComponent(title)}&limit=5&fields=${fields}`;
547
+ const res = await fetchWithTimeout(url);
548
+ if (!res.ok) {
549
+ return {
550
+ content: [{ type: "text", text: `Search failed: Semantic Scholar returned ${res.status}` }],
551
+ isError: true,
552
+ };
553
+ }
554
+ const data = await res.json().catch(() => null);
555
+ if (!data?.data?.length) {
556
+ return {
557
+ content: [{ type: "text", text: `No papers found matching: "${title}"` }],
558
+ isError: true,
559
+ };
560
+ }
561
+
562
+ // Pick best match: prefer year match if provided, otherwise take top result
563
+ let candidates = data.data.filter((p) => p.title && p.abstract);
564
+ if (!candidates.length) candidates = data.data.filter((p) => p.title);
565
+ if (!candidates.length) {
566
+ return {
567
+ content: [{ type: "text", text: `No papers found matching: "${title}"` }],
568
+ isError: true,
569
+ };
570
+ }
571
+
572
+ let best = candidates[0];
573
+ if (year) {
574
+ const yearMatch = candidates.find((p) => p.year === year);
575
+ if (yearMatch) best = yearMatch;
576
+ }
577
+
578
+ const ext = best.externalIds || {};
579
+ const doi = ext.DOI || "";
580
+ const arxivId = ext.ArXiv || "";
581
+ let paperUrl;
582
+ if (arxivId) paperUrl = `https://arxiv.org/abs/${arxivId}`;
583
+ else if (doi) paperUrl = `https://doi.org/${doi}`;
584
+ else paperUrl = `https://www.semanticscholar.org/paper/${best.paperId}`;
585
+
586
+ const paper = {
587
+ id: arxivId ? `arxiv:${arxivId}` : `s2:${best.paperId}`,
588
+ source: arxivId ? "arxiv" : "semanticscholar",
589
+ title: (best.title || "").replace(/\s+/g, " ").trim(),
590
+ authors: Array.isArray(best.authors) ? best.authors.map((a) => a.name).filter(Boolean) : [],
591
+ abstract: (best.abstract || "").replace(/\s+/g, " ").trim(),
592
+ published: best.year ? `${best.year}` : "",
593
+ doi,
594
+ url: paperUrl,
595
+ pdf_url: best.openAccessPdf?.url || "",
596
+ citations: typeof best.citationCount === "number" ? best.citationCount : 0,
597
+ };
598
+
599
+ return {
600
+ content: [{ type: "text", text: JSON.stringify(paper, null, 2) }],
601
+ };
602
+ } catch (err) {
603
+ return {
604
+ content: [{ type: "text", text: `find_paper_by_title failed: ${err.message}` }],
605
+ isError: true,
606
+ };
607
+ }
608
+ }
609
+ );
610
+
384
611
  const transport = new StdioServerTransport();
385
612
  await server.connect(transport);