@pipeworx/mcp-semanticscholar 0.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/LICENSE ADDED
@@ -0,0 +1,21 @@
1
+ MIT License
2
+
3
+ Copyright (c) 2026 Pipeworx
4
+
5
+ Permission is hereby granted, free of charge, to any person obtaining a copy
6
+ of this software and associated documentation files (the "Software"), to deal
7
+ in the Software without restriction, including without limitation the rights
8
+ to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
9
+ copies of the Software, and to permit persons to whom the Software is
10
+ furnished to do so, subject to the following conditions:
11
+
12
+ The above copyright notice and this permission notice shall be included in all
13
+ copies or substantial portions of the Software.
14
+
15
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16
+ IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17
+ FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18
+ AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19
+ LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20
+ OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21
+ SOFTWARE.
package/README.md ADDED
@@ -0,0 +1,55 @@
1
+ # mcp-semanticscholar
2
+
3
+ Semantic Scholar Academic Graph MCP.
4
+
5
+ Part of [Pipeworx](https://pipeworx.io) — an MCP gateway connecting AI agents to 842+ live data sources.
6
+
7
+ ## Tools
8
+
9
+ | Tool | Description |
10
+ |------|-------------|
11
+
12
+ ## Quick Start
13
+
14
+ Add to your MCP client (Claude Desktop, Cursor, Windsurf, etc.):
15
+
16
+ ```json
17
+ {
18
+ "mcpServers": {
19
+ "semanticscholar": {
20
+ "url": "https://gateway.pipeworx.io/semanticscholar/mcp"
21
+ }
22
+ }
23
+ }
24
+ ```
25
+
26
+ Or connect to the full Pipeworx gateway for access to all 842+ data sources:
27
+
28
+ ```json
29
+ {
30
+ "mcpServers": {
31
+ "pipeworx": {
32
+ "url": "https://gateway.pipeworx.io/mcp"
33
+ }
34
+ }
35
+ }
36
+ ```
37
+
38
+ ## Using with ask_pipeworx
39
+
40
+ Instead of calling tools directly, you can ask questions in plain English:
41
+
42
+ ```
43
+ ask_pipeworx({ question: "your question about Semanticscholar data" })
44
+ ```
45
+
46
+ The gateway picks the right tool and fills the arguments automatically.
47
+
48
+ ## More
49
+
50
+ - [All tools and guides](https://github.com/pipeworx-io/examples)
51
+ - [pipeworx.io](https://pipeworx.io)
52
+
53
+ ## License
54
+
55
+ MIT
package/package.json ADDED
@@ -0,0 +1,20 @@
1
+ {
2
+ "name": "@pipeworx/mcp-semanticscholar",
3
+ "version": "0.1.0",
4
+ "description": "Semantic Scholar Academic Graph MCP.",
5
+ "type": "module",
6
+ "main": "src/index.ts",
7
+ "types": "src/index.ts",
8
+ "keywords": ["mcp", "mcp-server", "model-context-protocol", "pipeworx", "semanticscholar"],
9
+ "license": "MIT",
10
+ "repository": {
11
+ "type": "git",
12
+ "url": "https://github.com/pipeworx-io/mcp-semanticscholar"
13
+ },
14
+ "scripts": {
15
+ "typecheck": "tsc --noEmit"
16
+ },
17
+ "devDependencies": {
18
+ "typescript": "^5.7.0"
19
+ }
20
+ }
package/server.json ADDED
@@ -0,0 +1,18 @@
1
+ {
2
+ "$schema": "https://static.modelcontextprotocol.io/schemas/2025-12-11/server.schema.json",
3
+ "name": "io.github.pipeworx-io/semanticscholar",
4
+ "title": "Semanticscholar",
5
+ "description": "Semantic Scholar Academic Graph MCP.",
6
+ "version": "0.1.0",
7
+ "websiteUrl": "https://pipeworx.io/packs/semanticscholar",
8
+ "repository": {
9
+ "url": "https://github.com/pipeworx-io/mcp-semanticscholar",
10
+ "source": "github"
11
+ },
12
+ "remotes": [
13
+ {
14
+ "type": "streamable-http",
15
+ "url": "https://gateway.pipeworx.io/semanticscholar/mcp"
16
+ }
17
+ ]
18
+ }
package/src/index.ts ADDED
@@ -0,0 +1,284 @@
1
+ interface McpToolDefinition {
2
+ name: string;
3
+ description: string;
4
+ inputSchema: {
5
+ type: 'object';
6
+ properties: Record<string, unknown>;
7
+ required?: string[];
8
+ };
9
+ }
10
+
11
+ interface McpToolExport {
12
+ tools: McpToolDefinition[];
13
+ callTool: (name: string, args: Record<string, unknown>) => Promise<unknown>;
14
+ meter?: { credits: number };
15
+ cost?: Record<string, unknown>;
16
+ provider?: string;
17
+ }
18
+
19
+ /**
20
+ * Semantic Scholar Academic Graph MCP.
21
+ *
22
+ * Search 200M+ academic papers, resolve papers by ID/DOI/arXiv, trace
23
+ * citations, and look up authors with citation metrics and h-index — via the
24
+ * Semantic Scholar Academic Graph API. Keyless (public graph endpoints; be
25
+ * polite with a User-Agent). Rate-limited without a key, so 429s are expected
26
+ * under load.
27
+ */
28
+
29
+
30
+ const BASE = 'https://api.semanticscholar.org/graph/v1';
31
+ const UA = 'pipeworx/1.0 (+https://pipeworx.io)';
32
+
33
+ const tools: McpToolExport['tools'] = [
34
+ {
35
+ name: 'search_papers',
36
+ description:
37
+ 'Search 200M+ academic papers on Semantic Scholar by keyword. Returns titles, authors, year, venue, citation counts, DOI, and open-access PDF links. Optionally filter by year range and field of study. Keyless.',
38
+ inputSchema: {
39
+ type: 'object',
40
+ properties: {
41
+ query: {
42
+ type: 'string',
43
+ description: 'Search query, e.g. "transformer attention mechanism" or "CRISPR gene editing".',
44
+ },
45
+ limit: {
46
+ type: 'number',
47
+ description: 'Max results to return (default 10, max 25).',
48
+ },
49
+ year: {
50
+ type: 'string',
51
+ description: 'Filter by publication year or range, e.g. "2023" or "2020-2024".',
52
+ },
53
+ fields_of_study: {
54
+ type: 'string',
55
+ description:
56
+ 'Filter by field of study, e.g. "Computer Science", "Medicine", "Biology", "Physics".',
57
+ },
58
+ },
59
+ required: ['query'],
60
+ },
61
+ },
62
+ {
63
+ name: 'get_paper',
64
+ description:
65
+ 'Get full metadata for a single paper by ID. Accepts a Semantic Scholar paper ID, or a prefixed ID like "DOI:10.1145/3292500", "arXiv:2106.15928", or "CorpusId:215416146". Returns abstract, TLDR summary, authors, venue, citation/reference counts, fields of study, and open-access PDF. Keyless.',
66
+ inputSchema: {
67
+ type: 'object',
68
+ properties: {
69
+ paper_id: {
70
+ type: 'string',
71
+ description:
72
+ 'Paper identifier. A Semantic Scholar ID, or prefixed: "DOI:10...", "arXiv:2106.15928", "CorpusId:...".',
73
+ },
74
+ },
75
+ required: ['paper_id'],
76
+ },
77
+ },
78
+ {
79
+ name: 'get_paper_citations',
80
+ description:
81
+ 'List papers that CITE a given paper (the works citing it), with their titles, authors, year, and citation counts. Useful for forward citation tracing and finding follow-up work. Keyless.',
82
+ inputSchema: {
83
+ type: 'object',
84
+ properties: {
85
+ paper_id: {
86
+ type: 'string',
87
+ description:
88
+ 'Paper identifier. A Semantic Scholar ID, or prefixed: "DOI:10...", "arXiv:2106.15928", "CorpusId:...".',
89
+ },
90
+ limit: {
91
+ type: 'number',
92
+ description: 'Max citing papers to return (default 10, max 25).',
93
+ },
94
+ },
95
+ required: ['paper_id'],
96
+ },
97
+ },
98
+ {
99
+ name: 'get_author',
100
+ description:
101
+ 'Search for academic authors by name on Semantic Scholar. Returns up to 5 matches with affiliations, paper count, total citation count, h-index, and profile URL. Keyless.',
102
+ inputSchema: {
103
+ type: 'object',
104
+ properties: {
105
+ name: {
106
+ type: 'string',
107
+ description: 'Author name to search for, e.g. "Yoshua Bengio".',
108
+ },
109
+ },
110
+ required: ['name'],
111
+ },
112
+ },
113
+ ];
114
+
115
+ async function callTool(name: string, args: Record<string, unknown>): Promise<unknown> {
116
+ try {
117
+ switch (name) {
118
+ case 'search_papers':
119
+ return searchPapers(args);
120
+ case 'get_paper':
121
+ return getPaper(args);
122
+ case 'get_paper_citations':
123
+ return getPaperCitations(args);
124
+ case 'get_author':
125
+ return getAuthor(args);
126
+ default:
127
+ return { error: `Unknown tool: ${name}` };
128
+ }
129
+ } catch (e) {
130
+ return { error: e instanceof Error ? e.message : String(e) };
131
+ }
132
+ }
133
+
134
+ function truncate(s: unknown, n: number): string | undefined {
135
+ if (typeof s !== 'string' || !s) return undefined;
136
+ return s.length > n ? `${s.slice(0, n)}…` : s;
137
+ }
138
+
139
+ function rateLimited(): { error: string } {
140
+ return {
141
+ error:
142
+ 'Semantic Scholar rate limit (429). The keyless public API throttles aggressively — wait a few seconds and retry.',
143
+ };
144
+ }
145
+
146
+ /** Map a raw Semantic Scholar paper object → compact shape. */
147
+ function mapPaper(p: Record<string, unknown>, abstractChars = 400, authorLimit = 8): unknown {
148
+ const authors = Array.isArray(p.authors)
149
+ ? (p.authors as Array<Record<string, unknown>>).slice(0, authorLimit).map((a) => a.name)
150
+ : undefined;
151
+ const externalIds = (p.externalIds as Record<string, unknown> | undefined) ?? undefined;
152
+ const openAccessPdf = (p.openAccessPdf as Record<string, unknown> | undefined)?.url;
153
+ return {
154
+ paperId: p.paperId,
155
+ title: p.title,
156
+ year: p.year,
157
+ authors,
158
+ citationCount: p.citationCount,
159
+ venue: p.venue,
160
+ doi: externalIds?.DOI,
161
+ url: p.url,
162
+ openAccessPdf: openAccessPdf ?? undefined,
163
+ abstract: truncate(p.abstract, abstractChars),
164
+ };
165
+ }
166
+
167
+ async function searchPapers(args: Record<string, unknown>): Promise<unknown> {
168
+ const query = typeof args.query === 'string' ? args.query.trim() : '';
169
+ if (!query) return { error: 'provide a query', query: args.query ?? null };
170
+ const limit = Math.min(Math.max(Number(args.limit) || 10, 1), 25);
171
+ const fields = 'title,abstract,year,authors,citationCount,venue,externalIds,url,openAccessPdf';
172
+
173
+ let url = `${BASE}/paper/search?query=${encodeURIComponent(query)}&limit=${limit}&fields=${encodeURIComponent(fields)}`;
174
+ if (typeof args.year === 'string' && args.year.trim())
175
+ url += `&year=${encodeURIComponent(args.year.trim())}`;
176
+ if (typeof args.fields_of_study === 'string' && args.fields_of_study.trim())
177
+ url += `&fieldsOfStudy=${encodeURIComponent(args.fields_of_study.trim())}`;
178
+
179
+ const res = await fetch(url, { headers: { Accept: 'application/json', 'User-Agent': UA } });
180
+ if (res.status === 429) return rateLimited();
181
+ if (!res.ok) return { error: `Semantic Scholar: ${res.status} ${(await res.text()).slice(0, 200)}` };
182
+
183
+ const body = (await res.json()) as Record<string, unknown>;
184
+ const data = Array.isArray(body.data) ? (body.data as Array<Record<string, unknown>>) : [];
185
+ return {
186
+ total: body.total ?? data.length,
187
+ papers: data.map((p) => mapPaper(p)),
188
+ };
189
+ }
190
+
191
+ async function getPaper(args: Record<string, unknown>): Promise<unknown> {
192
+ const paperId = typeof args.paper_id === 'string' ? args.paper_id.trim() : '';
193
+ if (!paperId) return { error: 'provide a paper_id', paper_id: args.paper_id ?? null };
194
+ const fields =
195
+ 'title,abstract,year,authors,citationCount,referenceCount,venue,externalIds,url,openAccessPdf,tldr,fieldsOfStudy,publicationTypes';
196
+
197
+ const url = `${BASE}/paper/${encodeURIComponent(paperId)}?fields=${encodeURIComponent(fields)}`;
198
+ const res = await fetch(url, { headers: { Accept: 'application/json', 'User-Agent': UA } });
199
+ if (res.status === 429) return rateLimited();
200
+ if (res.status === 404) return { error: 'paper not found', paper_id: paperId };
201
+ if (!res.ok) return { error: `Semantic Scholar: ${res.status} ${(await res.text()).slice(0, 200)}` };
202
+
203
+ const p = (await res.json()) as Record<string, unknown>;
204
+ const externalIds = (p.externalIds as Record<string, unknown> | undefined) ?? undefined;
205
+ const openAccessPdf = (p.openAccessPdf as Record<string, unknown> | undefined)?.url;
206
+ const tldr = (p.tldr as Record<string, unknown> | undefined)?.text;
207
+ const authors = Array.isArray(p.authors)
208
+ ? (p.authors as Array<Record<string, unknown>>).slice(0, 15).map((a) => a.name)
209
+ : undefined;
210
+ return {
211
+ paperId: p.paperId,
212
+ title: p.title,
213
+ year: p.year,
214
+ authors,
215
+ venue: p.venue,
216
+ citationCount: p.citationCount,
217
+ referenceCount: p.referenceCount,
218
+ doi: externalIds?.DOI,
219
+ url: p.url,
220
+ openAccessPdf: openAccessPdf ?? undefined,
221
+ tldr: tldr ?? undefined,
222
+ fieldsOfStudy: p.fieldsOfStudy,
223
+ publicationTypes: p.publicationTypes,
224
+ abstract: truncate(p.abstract, 800),
225
+ };
226
+ }
227
+
228
+ async function getPaperCitations(args: Record<string, unknown>): Promise<unknown> {
229
+ const paperId = typeof args.paper_id === 'string' ? args.paper_id.trim() : '';
230
+ if (!paperId) return { error: 'provide a paper_id', paper_id: args.paper_id ?? null };
231
+ const limit = Math.min(Math.max(Number(args.limit) || 10, 1), 25);
232
+ const fields = 'title,year,authors,citationCount';
233
+
234
+ const url = `${BASE}/paper/${encodeURIComponent(paperId)}/citations?fields=${encodeURIComponent(fields)}&limit=${limit}`;
235
+ const res = await fetch(url, { headers: { Accept: 'application/json', 'User-Agent': UA } });
236
+ if (res.status === 429) return rateLimited();
237
+ if (res.status === 404) return { error: 'paper not found', paper_id: paperId };
238
+ if (!res.ok) return { error: `Semantic Scholar: ${res.status} ${(await res.text()).slice(0, 200)}` };
239
+
240
+ const body = (await res.json()) as Record<string, unknown>;
241
+ const data = Array.isArray(body.data) ? (body.data as Array<Record<string, unknown>>) : [];
242
+ const citations = data
243
+ .map((row) => row.citingPaper as Record<string, unknown> | undefined)
244
+ .filter((p): p is Record<string, unknown> => !!p)
245
+ .map((p) => {
246
+ const compact = mapPaper(p, 400, 6) as Record<string, unknown>;
247
+ return {
248
+ paperId: compact.paperId,
249
+ title: compact.title,
250
+ year: compact.year,
251
+ authors: compact.authors,
252
+ citationCount: compact.citationCount,
253
+ };
254
+ });
255
+ return { count: citations.length, citations };
256
+ }
257
+
258
+ async function getAuthor(args: Record<string, unknown>): Promise<unknown> {
259
+ const name = typeof args.name === 'string' ? args.name.trim() : '';
260
+ if (!name) return { error: 'provide an author name', name: args.name ?? null };
261
+ const fields = 'name,affiliations,paperCount,citationCount,hIndex,url';
262
+
263
+ const url = `${BASE}/author/search?query=${encodeURIComponent(name)}&fields=${encodeURIComponent(fields)}&limit=5`;
264
+ const res = await fetch(url, { headers: { Accept: 'application/json', 'User-Agent': UA } });
265
+ if (res.status === 429) return rateLimited();
266
+ if (!res.ok) return { error: `Semantic Scholar: ${res.status} ${(await res.text()).slice(0, 200)}` };
267
+
268
+ const body = (await res.json()) as Record<string, unknown>;
269
+ const data = Array.isArray(body.data) ? (body.data as Array<Record<string, unknown>>) : [];
270
+ return {
271
+ count: data.length,
272
+ authors: data.map((a) => ({
273
+ authorId: a.authorId,
274
+ name: a.name,
275
+ affiliations: a.affiliations,
276
+ paperCount: a.paperCount,
277
+ citationCount: a.citationCount,
278
+ hIndex: a.hIndex,
279
+ url: a.url,
280
+ })),
281
+ };
282
+ }
283
+
284
+ export default { tools, callTool, meter: { credits: 1 } } satisfies McpToolExport;
package/tsconfig.json ADDED
@@ -0,0 +1,14 @@
1
+ {
2
+ "compilerOptions": {
3
+ "target": "ES2022",
4
+ "module": "ESNext",
5
+ "moduleResolution": "bundler",
6
+ "strict": true,
7
+ "esModuleInterop": true,
8
+ "skipLibCheck": true,
9
+ "outDir": "dist",
10
+ "rootDir": "src",
11
+ "declaration": true
12
+ },
13
+ "include": ["src"]
14
+ }