paperplain-mcp 1.1.1 → 1.2.2
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/package.json +1 -1
- package/server.js +309 -82
package/package.json
CHANGED
package/server.js
CHANGED
|
@@ -15,14 +15,17 @@ const PUBMED_PARAMS = "tool=paperplain&email=hello@paperplain.io";
|
|
|
15
15
|
const SEMANTIC_SCHOLAR_BASE = "https://api.semanticscholar.org/graph/v1";
|
|
16
16
|
|
|
17
17
|
// ── Domain classifier (keyword-based, no LLM needed) ───────────────────────
|
|
18
|
+
// Note: "energy" intentionally excluded from health — it's more common in
|
|
19
|
+
// CS/engineering contexts (energy management, HEMS, smart grid) than health.
|
|
18
20
|
const HEALTH_KEYWORDS =
|
|
19
|
-
/\b(sleep|insomnia|anxiety|anxious|stress|depress|pain|ache|headache|migraine|diet|nutrition|weight|obese|exercise|fatigue|tired|
|
|
21
|
+
/\b(sleep|insomnia|anxiety|anxious|stress|depress|pain|ache|headache|migraine|diet|nutrition|weight|obese|exercise|fatigue|tired|focus|adhd|autism|cancer|diabetes|blood|pressure|heart|cholesterol|vitamin|supplement|immune|gut|digestion|mental health|therapy|meditation|mindfulness|mood|burnout|inflammation|allergy|asthma|skin|aging|memory|alzheimer|cognitive|brain|alcohol|smoking|addiction|symptoms|treatment|medicine|medication|dose|chronic|surgery|vaccine|antibiot|clinical|patient|disease|disorder|syndrome|injury|rehabilitation|psychiatric|neurol|cardio|oncol|gastro|pediatr|geriatric)\b/i;
|
|
20
22
|
const CS_KEYWORDS =
|
|
21
|
-
/\b(algorithm|neural network|machine learning|deep learning|transformer|llm|language model|reinforcement|classification|clustering|regression|computer vision|nlp|natural language|robotics|autonomous|blockchain|cryptograph|database|distributed|cloud|microservice|compiler|operating system|cybersecurity|quantum comput|software engineer|retrieval|embedding|vector|attention|fine.tun|prompt|inference|benchmark)\b/i;
|
|
23
|
+
/\b(algorithm|neural network|machine learning|deep learning|transformer|llm|large language model|language model|reinforcement|classification|clustering|regression|computer vision|nlp|natural language|robotics|autonomous|blockchain|cryptograph|database|distributed|cloud|microservice|compiler|operating system|cybersecurity|quantum comput|software engineer|retrieval|embedding|vector|attention|fine.tun|prompt|inference|benchmark|agentic|multi.agent|smart grid|demand response|energy management|HEMS|home energy|building energy|V2G|vehicle.to.grid|EV charging|electric vehicle|battery storage|renewable energy|solar|wind power|forecasting|optimization|scheduling|control system|model predictive|reinforcement learning)\b/i;
|
|
22
24
|
|
|
23
25
|
function classifyDomain(query) {
|
|
24
|
-
|
|
26
|
+
// CS check runs first — engineering/AI topics should not fall into health
|
|
25
27
|
if (CS_KEYWORDS.test(query)) return "cs";
|
|
28
|
+
if (HEALTH_KEYWORDS.test(query)) return "health";
|
|
26
29
|
return "general";
|
|
27
30
|
}
|
|
28
31
|
|
|
@@ -68,63 +71,129 @@ function parseArxivXml(xml) {
|
|
|
68
71
|
return papers;
|
|
69
72
|
}
|
|
70
73
|
|
|
74
|
+
async function fetchWithTimeout(url, ms = 10000) {
|
|
75
|
+
const controller = new AbortController();
|
|
76
|
+
const timer = setTimeout(() => controller.abort(), ms);
|
|
77
|
+
try {
|
|
78
|
+
return await fetch(url, { signal: controller.signal });
|
|
79
|
+
} finally {
|
|
80
|
+
clearTimeout(timer);
|
|
81
|
+
}
|
|
82
|
+
}
|
|
83
|
+
|
|
71
84
|
async function searchArxiv(query, maxResults) {
|
|
72
85
|
const url = `${ARXIV_BASE}?search_query=all:${encodeURIComponent(query)}&start=0&max_results=${maxResults}&sortBy=relevance&sortOrder=descending`;
|
|
73
|
-
|
|
74
|
-
|
|
75
|
-
|
|
86
|
+
try {
|
|
87
|
+
const res = await fetchWithTimeout(url);
|
|
88
|
+
if (!res.ok) return [];
|
|
89
|
+
const papers = parseArxivXml(await res.text());
|
|
90
|
+
// If broad search returns nothing, retry with title-field search
|
|
91
|
+
if (papers.length === 0) {
|
|
92
|
+
const titleUrl = `${ARXIV_BASE}?search_query=ti:${encodeURIComponent(query)}&start=0&max_results=${maxResults}&sortBy=relevance&sortOrder=descending`;
|
|
93
|
+
const titleRes = await fetchWithTimeout(titleUrl);
|
|
94
|
+
if (titleRes.ok) return parseArxivXml(await titleRes.text());
|
|
95
|
+
}
|
|
96
|
+
return papers;
|
|
97
|
+
} catch {
|
|
98
|
+
return [];
|
|
99
|
+
}
|
|
100
|
+
}
|
|
101
|
+
|
|
102
|
+
function normalizeArxivId(arxivId) {
|
|
103
|
+
return arxivId
|
|
104
|
+
.replace(/^arxiv:/i, "")
|
|
105
|
+
.replace(/^https?:\/\/arxiv\.org\/(abs|pdf)\//, "")
|
|
106
|
+
.replace(/\.pdf$/i, "")
|
|
107
|
+
.trim();
|
|
76
108
|
}
|
|
77
109
|
|
|
78
110
|
async function fetchArxivById(arxivId) {
|
|
79
|
-
const clean = arxivId
|
|
80
|
-
|
|
81
|
-
|
|
82
|
-
|
|
83
|
-
|
|
84
|
-
|
|
111
|
+
const clean = normalizeArxivId(arxivId);
|
|
112
|
+
try {
|
|
113
|
+
const res = await fetchWithTimeout(`${ARXIV_BASE}?id_list=${encodeURIComponent(clean)}`);
|
|
114
|
+
if (!res.ok) return null;
|
|
115
|
+
const papers = parseArxivXml(await res.text());
|
|
116
|
+
return papers[0] || null;
|
|
117
|
+
} catch {
|
|
118
|
+
return null;
|
|
119
|
+
}
|
|
120
|
+
}
|
|
121
|
+
|
|
122
|
+
async function fetchS2ByArxivId(arxivId) {
|
|
123
|
+
// S2 accepts ARXIV: prefix — useful as fallback when ArXiv API is rate-limited
|
|
124
|
+
const clean = normalizeArxivId(arxivId).replace(/v\d+$/i, ""); // strip version for S2
|
|
125
|
+
try {
|
|
126
|
+
const fields = "title,abstract,authors,year,citationCount,openAccessPdf,externalIds";
|
|
127
|
+
const res = await fetchWithTimeout(
|
|
128
|
+
`${SEMANTIC_SCHOLAR_BASE}/paper/ARXIV:${encodeURIComponent(clean)}?fields=${fields}`
|
|
129
|
+
);
|
|
130
|
+
if (!res.ok) return null;
|
|
131
|
+
const item = await res.json().catch(() => null);
|
|
132
|
+
if (!item || !item.paperId || !item.title) return null;
|
|
133
|
+
const ext = item.externalIds || {};
|
|
134
|
+
const doi = ext.DOI || "";
|
|
135
|
+
return {
|
|
136
|
+
id: `arxiv:${clean}`,
|
|
137
|
+
source: "arxiv",
|
|
138
|
+
title: (item.title || "").replace(/\s+/g, " ").trim(),
|
|
139
|
+
authors: Array.isArray(item.authors) ? item.authors.map((a) => a.name).filter(Boolean) : [],
|
|
140
|
+
abstract: (item.abstract || "").replace(/\s+/g, " ").trim(),
|
|
141
|
+
published: item.year ? `${item.year}` : "",
|
|
142
|
+
doi,
|
|
143
|
+
url: `https://arxiv.org/abs/${clean}`,
|
|
144
|
+
pdf_url: item.openAccessPdf?.url || `https://arxiv.org/pdf/${clean}`,
|
|
145
|
+
citations: typeof item.citationCount === "number" ? item.citationCount : 0,
|
|
146
|
+
};
|
|
147
|
+
} catch {
|
|
148
|
+
return null;
|
|
149
|
+
}
|
|
85
150
|
}
|
|
86
151
|
|
|
87
152
|
// ── PubMed ─────────────────────────────────────────────────────────────────
|
|
88
153
|
async function searchPubMed(query, maxResults) {
|
|
89
|
-
|
|
90
|
-
|
|
91
|
-
|
|
92
|
-
|
|
93
|
-
|
|
94
|
-
|
|
95
|
-
|
|
96
|
-
|
|
97
|
-
|
|
98
|
-
|
|
99
|
-
|
|
100
|
-
|
|
101
|
-
|
|
102
|
-
|
|
103
|
-
|
|
104
|
-
|
|
105
|
-
|
|
106
|
-
|
|
107
|
-
|
|
108
|
-
|
|
109
|
-
|
|
110
|
-
|
|
111
|
-
|
|
112
|
-
(
|
|
113
|
-
|
|
114
|
-
|
|
115
|
-
|
|
116
|
-
|
|
117
|
-
|
|
118
|
-
|
|
119
|
-
|
|
120
|
-
|
|
121
|
-
|
|
122
|
-
|
|
123
|
-
|
|
124
|
-
|
|
125
|
-
|
|
126
|
-
|
|
127
|
-
|
|
154
|
+
try {
|
|
155
|
+
const searchUrl = `${PUBMED_BASE}/esearch.fcgi?db=pubmed&term=${encodeURIComponent(query)}&retmax=${maxResults}&retmode=json&sort=relevance&${PUBMED_PARAMS}`;
|
|
156
|
+
const searchRes = await fetchWithTimeout(searchUrl);
|
|
157
|
+
if (!searchRes.ok) return [];
|
|
158
|
+
const searchData = await searchRes.json();
|
|
159
|
+
const pmids = searchData?.esearchresult?.idlist || [];
|
|
160
|
+
if (!pmids.length) return [];
|
|
161
|
+
|
|
162
|
+
const summaryUrl = `${PUBMED_BASE}/esummary.fcgi?db=pubmed&id=${pmids.join(",")}&retmode=json&${PUBMED_PARAMS}`;
|
|
163
|
+
const summaryRes = await fetchWithTimeout(summaryUrl);
|
|
164
|
+
if (!summaryRes.ok) return [];
|
|
165
|
+
const summaryData = await summaryRes.json();
|
|
166
|
+
const result = summaryData?.result || {};
|
|
167
|
+
|
|
168
|
+
const abstracts = await fetchPubMedAbstracts(pmids);
|
|
169
|
+
|
|
170
|
+
return pmids
|
|
171
|
+
.map((pmid) => {
|
|
172
|
+
const item = result[pmid];
|
|
173
|
+
if (!item || !item.title) return null;
|
|
174
|
+
const abstract = abstracts[pmid] || "";
|
|
175
|
+
if (!abstract) return null; // skip papers with no abstract — useless for synthesis
|
|
176
|
+
const doi =
|
|
177
|
+
(Array.isArray(item.articleids) ? item.articleids : []).find(
|
|
178
|
+
(e) => e.idtype === "doi"
|
|
179
|
+
)?.value || "";
|
|
180
|
+
return {
|
|
181
|
+
id: `pubmed:${pmid}`,
|
|
182
|
+
source: "pubmed",
|
|
183
|
+
title: item.title.trim(),
|
|
184
|
+
authors: Array.isArray(item.authors)
|
|
185
|
+
? item.authors.map((a) => a.name).filter(Boolean)
|
|
186
|
+
: [],
|
|
187
|
+
abstract,
|
|
188
|
+
published: item.epubdate || item.pubdate || "",
|
|
189
|
+
doi,
|
|
190
|
+
url: `https://pubmed.ncbi.nlm.nih.gov/${pmid}/`,
|
|
191
|
+
};
|
|
192
|
+
})
|
|
193
|
+
.filter(Boolean);
|
|
194
|
+
} catch {
|
|
195
|
+
return [];
|
|
196
|
+
}
|
|
128
197
|
}
|
|
129
198
|
|
|
130
199
|
async function fetchPubMedAbstracts(pmids) {
|
|
@@ -193,7 +262,7 @@ async function searchSemanticScholar(query, maxResults) {
|
|
|
193
262
|
// ── MCP Server ─────────────────────────────────────────────────────────────
|
|
194
263
|
const server = new McpServer({
|
|
195
264
|
name: "paperplain",
|
|
196
|
-
version: "1.
|
|
265
|
+
version: "1.2.2",
|
|
197
266
|
description:
|
|
198
267
|
"Search 200M+ peer-reviewed papers from PubMed, ArXiv, and Semantic Scholar. Returns papers with full abstracts — use your own model to synthesize findings.",
|
|
199
268
|
});
|
|
@@ -229,28 +298,45 @@ Use the returned abstracts to synthesize findings, answer the user's question, o
|
|
|
229
298
|
async ({ query, max_results, domain }) => {
|
|
230
299
|
const resolvedDomain = domain === "auto" ? classifyDomain(query) : domain;
|
|
231
300
|
let papers = [];
|
|
232
|
-
|
|
301
|
+
// Track each source: "ok" | "empty" | "error"
|
|
302
|
+
const sourceStatus = {};
|
|
303
|
+
|
|
304
|
+
async function safeArxiv(q, n) {
|
|
305
|
+
try {
|
|
306
|
+
const r = await searchArxiv(q, n);
|
|
307
|
+
sourceStatus.arxiv = r.length ? "ok" : "empty";
|
|
308
|
+
return r;
|
|
309
|
+
} catch { sourceStatus.arxiv = "error"; return []; }
|
|
310
|
+
}
|
|
311
|
+
async function safePubMed(q, n) {
|
|
312
|
+
try {
|
|
313
|
+
const r = await searchPubMed(q, n);
|
|
314
|
+
sourceStatus.pubmed = r.length ? "ok" : "empty";
|
|
315
|
+
return r;
|
|
316
|
+
} catch { sourceStatus.pubmed = "error"; return []; }
|
|
317
|
+
}
|
|
318
|
+
async function safeS2(q, n) {
|
|
319
|
+
try {
|
|
320
|
+
const r = await searchSemanticScholar(q, n);
|
|
321
|
+
sourceStatus.semanticscholar = r.length ? "ok" : "empty";
|
|
322
|
+
return r;
|
|
323
|
+
} catch { sourceStatus.semanticscholar = "error"; return []; }
|
|
324
|
+
}
|
|
233
325
|
|
|
234
326
|
try {
|
|
235
327
|
if (resolvedDomain === "health") {
|
|
236
|
-
|
|
237
|
-
let pubmedPapers = await searchPubMed(query, max_results);
|
|
238
|
-
if (pubmedPapers.length) sources.push("pubmed");
|
|
328
|
+
let pubmedPapers = await safePubMed(query, max_results);
|
|
239
329
|
if (pubmedPapers.length < max_results) {
|
|
240
|
-
const s2 = await
|
|
241
|
-
if (s2.length) sources.push("semanticscholar");
|
|
330
|
+
const s2 = await safeS2(query, max_results - pubmedPapers.length);
|
|
242
331
|
const seen = new Set(pubmedPapers.map((p) => p.id));
|
|
243
332
|
for (const p of s2) if (!seen.has(p.id)) pubmedPapers.push(p);
|
|
244
333
|
}
|
|
245
334
|
papers = pubmedPapers.slice(0, max_results);
|
|
246
335
|
} else if (resolvedDomain === "cs") {
|
|
247
|
-
// ArXiv + Semantic Scholar, deduplicate overlaps
|
|
248
336
|
const [arxiv, s2] = await Promise.all([
|
|
249
|
-
|
|
250
|
-
|
|
337
|
+
safeArxiv(query, max_results),
|
|
338
|
+
safeS2(query, Math.ceil(max_results / 2)),
|
|
251
339
|
]);
|
|
252
|
-
if (arxiv.length) sources.push("arxiv");
|
|
253
|
-
if (s2.length) sources.push("semanticscholar");
|
|
254
340
|
const maxArxiv = Math.ceil(max_results * 0.6);
|
|
255
341
|
const arxivIds = new Set(arxiv.map((p) => p.id));
|
|
256
342
|
const uniqueS2 = s2.filter((p) => !arxivIds.has(p.id));
|
|
@@ -259,15 +345,11 @@ Use the returned abstracts to synthesize findings, answer the user's question, o
|
|
|
259
345
|
...uniqueS2.slice(0, max_results - Math.min(arxiv.length, maxArxiv)),
|
|
260
346
|
].slice(0, max_results);
|
|
261
347
|
} else {
|
|
262
|
-
// General: all three sources interleaved
|
|
263
348
|
const [arxiv, pubmed, s2] = await Promise.all([
|
|
264
|
-
|
|
265
|
-
|
|
266
|
-
|
|
349
|
+
safeArxiv(query, max_results),
|
|
350
|
+
safePubMed(query, max_results),
|
|
351
|
+
safeS2(query, Math.ceil(max_results / 2)),
|
|
267
352
|
]);
|
|
268
|
-
if (arxiv.length) sources.push("arxiv");
|
|
269
|
-
if (pubmed.length) sources.push("pubmed");
|
|
270
|
-
if (s2.length) sources.push("semanticscholar");
|
|
271
353
|
const maxEach = Math.floor(max_results / 3);
|
|
272
354
|
const remainder = max_results - maxEach * 3;
|
|
273
355
|
papers = [
|
|
@@ -277,6 +359,18 @@ Use the returned abstracts to synthesize findings, answer the user's question, o
|
|
|
277
359
|
].slice(0, max_results);
|
|
278
360
|
}
|
|
279
361
|
|
|
362
|
+
// Warn if expected sources came back empty or errored
|
|
363
|
+
const warnings = [];
|
|
364
|
+
const expectedSources = resolvedDomain === "health"
|
|
365
|
+
? ["pubmed", "semanticscholar"]
|
|
366
|
+
: resolvedDomain === "cs"
|
|
367
|
+
? ["arxiv", "semanticscholar"]
|
|
368
|
+
: ["arxiv", "pubmed", "semanticscholar"];
|
|
369
|
+
for (const src of expectedSources) {
|
|
370
|
+
if (sourceStatus[src] === "empty") warnings.push(`${src}: returned 0 results (API may be rate-limited or query too specific)`);
|
|
371
|
+
if (sourceStatus[src] === "error") warnings.push(`${src}: request failed (API may be temporarily unavailable)`);
|
|
372
|
+
}
|
|
373
|
+
|
|
280
374
|
return {
|
|
281
375
|
content: [
|
|
282
376
|
{
|
|
@@ -285,7 +379,8 @@ Use the returned abstracts to synthesize findings, answer the user's question, o
|
|
|
285
379
|
{
|
|
286
380
|
query,
|
|
287
381
|
domain: resolvedDomain,
|
|
288
|
-
|
|
382
|
+
source_status: sourceStatus,
|
|
383
|
+
...(warnings.length ? { warnings } : {}),
|
|
289
384
|
total: papers.length,
|
|
290
385
|
papers: papers.map((p) => ({
|
|
291
386
|
id: p.id,
|
|
@@ -314,33 +409,79 @@ Use the returned abstracts to synthesize findings, answer the user's question, o
|
|
|
314
409
|
}
|
|
315
410
|
);
|
|
316
411
|
|
|
412
|
+
// ── Semantic Scholar single-paper lookup (by DOI or S2 paper ID) ───────────
|
|
413
|
+
async function fetchS2ByDoi(doi) {
|
|
414
|
+
try {
|
|
415
|
+
const clean = doi.replace(/^doi:/i, "").trim();
|
|
416
|
+
const fields = "title,abstract,authors,year,citationCount,openAccessPdf,externalIds";
|
|
417
|
+
const res = await fetchWithTimeout(
|
|
418
|
+
`${SEMANTIC_SCHOLAR_BASE}/paper/DOI:${encodeURIComponent(clean)}?fields=${fields}`
|
|
419
|
+
);
|
|
420
|
+
if (!res.ok) return null;
|
|
421
|
+
const item = await res.json().catch(() => null);
|
|
422
|
+
if (!item || !item.paperId || !item.title) return null;
|
|
423
|
+
const ext = item.externalIds || {};
|
|
424
|
+
const arxivId = ext.ArXiv || "";
|
|
425
|
+
let url;
|
|
426
|
+
if (arxivId) url = `https://arxiv.org/abs/${arxivId}`;
|
|
427
|
+
else if (clean) url = `https://doi.org/${clean}`;
|
|
428
|
+
else url = `https://www.semanticscholar.org/paper/${item.paperId}`;
|
|
429
|
+
return {
|
|
430
|
+
id: `s2:${item.paperId}`,
|
|
431
|
+
source: "semanticscholar",
|
|
432
|
+
title: (item.title || "").replace(/\s+/g, " ").trim(),
|
|
433
|
+
authors: Array.isArray(item.authors) ? item.authors.map((a) => a.name).filter(Boolean) : [],
|
|
434
|
+
abstract: (item.abstract || "").replace(/\s+/g, " ").trim(),
|
|
435
|
+
published: item.year ? `${item.year}` : "",
|
|
436
|
+
doi: clean,
|
|
437
|
+
url,
|
|
438
|
+
pdf_url: item.openAccessPdf?.url || "",
|
|
439
|
+
citations: typeof item.citationCount === "number" ? item.citationCount : 0,
|
|
440
|
+
};
|
|
441
|
+
} catch {
|
|
442
|
+
return null;
|
|
443
|
+
}
|
|
444
|
+
}
|
|
445
|
+
|
|
317
446
|
// Tool 2: fetch_paper
|
|
318
447
|
server.tool(
|
|
319
448
|
"fetch_paper",
|
|
320
|
-
`Fetch the full abstract and metadata for a specific paper by ID.
|
|
321
|
-
Supports
|
|
322
|
-
|
|
449
|
+
`Fetch the full abstract and metadata for a specific paper by ID or DOI.
|
|
450
|
+
Supports:
|
|
451
|
+
- ArXiv IDs: '2301.07041', 'arxiv:2301.07041v2', 'https://arxiv.org/abs/2301.07041'
|
|
452
|
+
- PubMed IDs: 'pubmed:37183813' or just '37183813'
|
|
453
|
+
- DOIs: '10.1145/3290605.3300857' or 'doi:10.1145/3290605.3300857' (looks up via Semantic Scholar)
|
|
454
|
+
Use this to verify a specific paper you already know about or to retrieve its abstract.`,
|
|
323
455
|
{
|
|
324
456
|
paper_id: z
|
|
325
457
|
.string()
|
|
326
458
|
.describe(
|
|
327
|
-
"ArXiv ID
|
|
459
|
+
"ArXiv ID, PubMed ID, or DOI — e.g. '2301.07041', 'pubmed:37183813', or '10.1145/3290605.3300857'"
|
|
328
460
|
),
|
|
329
461
|
},
|
|
330
462
|
async ({ paper_id }) => {
|
|
331
463
|
try {
|
|
464
|
+
const trimmed = paper_id.trim();
|
|
332
465
|
const isArxiv =
|
|
333
|
-
/arxiv:/i.test(
|
|
334
|
-
/^\d{4}\.\d{4,5}
|
|
335
|
-
|
|
336
|
-
|
|
466
|
+
/arxiv:/i.test(trimmed) ||
|
|
467
|
+
/^\d{4}\.\d{4,5}(v\d+)?$/.test(trimmed) || // 2301.07041 or 2301.07041v2
|
|
468
|
+
/^[a-z-]+(\.[A-Z]+)?\/\d{7}(v\d+)?$/.test(trimmed) || // old format: cs.LG/0504010
|
|
469
|
+
/arxiv\.org/.test(trimmed);
|
|
470
|
+
const isPubMed =
|
|
471
|
+
/pubmed:/i.test(trimmed) || /^\d{6,9}$/.test(trimmed);
|
|
472
|
+
const isDOI =
|
|
473
|
+
/^doi:/i.test(trimmed) || /^10\.\d{4,}\/\S+$/.test(trimmed);
|
|
337
474
|
|
|
338
475
|
let paper = null;
|
|
339
476
|
|
|
340
477
|
if (isArxiv) {
|
|
341
|
-
paper = await fetchArxivById(
|
|
478
|
+
paper = await fetchArxivById(trimmed);
|
|
479
|
+
// Fallback: ArXiv API rate-limits under parallel load — try S2 ARXIV: endpoint
|
|
480
|
+
if (!paper) paper = await fetchS2ByArxivId(trimmed);
|
|
481
|
+
} else if (isDOI) {
|
|
482
|
+
paper = await fetchS2ByDoi(trimmed);
|
|
342
483
|
} else if (isPubMed) {
|
|
343
|
-
const pmid =
|
|
484
|
+
const pmid = trimmed.replace(/^pubmed:/i, "").trim();
|
|
344
485
|
const abstracts = await fetchPubMedAbstracts([pmid]);
|
|
345
486
|
const summaryUrl = `${PUBMED_BASE}/esummary.fcgi?db=pubmed&id=${pmid}&retmode=json&${PUBMED_PARAMS}`;
|
|
346
487
|
const summaryRes = await fetch(summaryUrl);
|
|
@@ -364,7 +505,10 @@ Use this to get the full abstract of a paper you already know about.`,
|
|
|
364
505
|
|
|
365
506
|
if (!paper) {
|
|
366
507
|
return {
|
|
367
|
-
content: [{
|
|
508
|
+
content: [{
|
|
509
|
+
type: "text",
|
|
510
|
+
text: `Paper not found: ${paper_id}\n\nTip: For arXiv papers, try the bare ID (e.g. '2301.07041'). For journal papers, try the DOI (e.g. '10.1145/3290605.3300857'). For PubMed papers, use the PMID number.`,
|
|
511
|
+
}],
|
|
368
512
|
isError: true,
|
|
369
513
|
};
|
|
370
514
|
}
|
|
@@ -381,5 +525,88 @@ Use this to get the full abstract of a paper you already know about.`,
|
|
|
381
525
|
}
|
|
382
526
|
);
|
|
383
527
|
|
|
528
|
+
// Tool 3: find_paper_by_title
|
|
529
|
+
server.tool(
|
|
530
|
+
"find_paper_by_title",
|
|
531
|
+
`Find a specific paper when you only know its title (or partial title).
|
|
532
|
+
Uses Semantic Scholar's title-match search. Returns the closest match with full abstract, authors, DOI, and source URL.
|
|
533
|
+
Useful for verifying a citation or retrieving abstract details for a paper you already know exists.`,
|
|
534
|
+
{
|
|
535
|
+
title: z
|
|
536
|
+
.string()
|
|
537
|
+
.describe("The paper title or a key phrase from it, e.g. 'Attention Is All You Need'"),
|
|
538
|
+
year: z
|
|
539
|
+
.number()
|
|
540
|
+
.optional()
|
|
541
|
+
.describe("Publication year to narrow down the match (optional)"),
|
|
542
|
+
},
|
|
543
|
+
async ({ title, year }) => {
|
|
544
|
+
try {
|
|
545
|
+
const fields = "title,abstract,authors,year,citationCount,openAccessPdf,externalIds";
|
|
546
|
+
const url = `${SEMANTIC_SCHOLAR_BASE}/paper/search?query=${encodeURIComponent(title)}&limit=5&fields=${fields}`;
|
|
547
|
+
const res = await fetchWithTimeout(url);
|
|
548
|
+
if (!res.ok) {
|
|
549
|
+
return {
|
|
550
|
+
content: [{ type: "text", text: `Search failed: Semantic Scholar returned ${res.status}` }],
|
|
551
|
+
isError: true,
|
|
552
|
+
};
|
|
553
|
+
}
|
|
554
|
+
const data = await res.json().catch(() => null);
|
|
555
|
+
if (!data?.data?.length) {
|
|
556
|
+
return {
|
|
557
|
+
content: [{ type: "text", text: `No papers found matching: "${title}"` }],
|
|
558
|
+
isError: true,
|
|
559
|
+
};
|
|
560
|
+
}
|
|
561
|
+
|
|
562
|
+
// Pick best match: prefer year match if provided, otherwise take top result
|
|
563
|
+
let candidates = data.data.filter((p) => p.title && p.abstract);
|
|
564
|
+
if (!candidates.length) candidates = data.data.filter((p) => p.title);
|
|
565
|
+
if (!candidates.length) {
|
|
566
|
+
return {
|
|
567
|
+
content: [{ type: "text", text: `No papers found matching: "${title}"` }],
|
|
568
|
+
isError: true,
|
|
569
|
+
};
|
|
570
|
+
}
|
|
571
|
+
|
|
572
|
+
let best = candidates[0];
|
|
573
|
+
if (year) {
|
|
574
|
+
const yearMatch = candidates.find((p) => p.year === year);
|
|
575
|
+
if (yearMatch) best = yearMatch;
|
|
576
|
+
}
|
|
577
|
+
|
|
578
|
+
const ext = best.externalIds || {};
|
|
579
|
+
const doi = ext.DOI || "";
|
|
580
|
+
const arxivId = ext.ArXiv || "";
|
|
581
|
+
let paperUrl;
|
|
582
|
+
if (arxivId) paperUrl = `https://arxiv.org/abs/${arxivId}`;
|
|
583
|
+
else if (doi) paperUrl = `https://doi.org/${doi}`;
|
|
584
|
+
else paperUrl = `https://www.semanticscholar.org/paper/${best.paperId}`;
|
|
585
|
+
|
|
586
|
+
const paper = {
|
|
587
|
+
id: arxivId ? `arxiv:${arxivId}` : `s2:${best.paperId}`,
|
|
588
|
+
source: arxivId ? "arxiv" : "semanticscholar",
|
|
589
|
+
title: (best.title || "").replace(/\s+/g, " ").trim(),
|
|
590
|
+
authors: Array.isArray(best.authors) ? best.authors.map((a) => a.name).filter(Boolean) : [],
|
|
591
|
+
abstract: (best.abstract || "").replace(/\s+/g, " ").trim(),
|
|
592
|
+
published: best.year ? `${best.year}` : "",
|
|
593
|
+
doi,
|
|
594
|
+
url: paperUrl,
|
|
595
|
+
pdf_url: best.openAccessPdf?.url || "",
|
|
596
|
+
citations: typeof best.citationCount === "number" ? best.citationCount : 0,
|
|
597
|
+
};
|
|
598
|
+
|
|
599
|
+
return {
|
|
600
|
+
content: [{ type: "text", text: JSON.stringify(paper, null, 2) }],
|
|
601
|
+
};
|
|
602
|
+
} catch (err) {
|
|
603
|
+
return {
|
|
604
|
+
content: [{ type: "text", text: `find_paper_by_title failed: ${err.message}` }],
|
|
605
|
+
isError: true,
|
|
606
|
+
};
|
|
607
|
+
}
|
|
608
|
+
}
|
|
609
|
+
);
|
|
610
|
+
|
|
384
611
|
const transport = new StdioServerTransport();
|
|
385
612
|
await server.connect(transport);
|