scholar-mcp 1.0.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +232 -0
- package/dist/cli/args.js +57 -0
- package/dist/config.js +131 -0
- package/dist/core/logger.js +36 -0
- package/dist/http/start-http-server.js +329 -0
- package/dist/index.js +66 -0
- package/dist/mcp/create-scholar-mcp-server.js +583 -0
- package/dist/mcp/start-stdio-server.js +8 -0
- package/dist/research/citation-service.js +407 -0
- package/dist/research/errors.js +36 -0
- package/dist/research/extraction-service.js +109 -0
- package/dist/research/http-client.js +62 -0
- package/dist/research/index.js +7 -0
- package/dist/research/ingestion-service.js +430 -0
- package/dist/research/literature-service.js +387 -0
- package/dist/research/providers/crossref-client.js +73 -0
- package/dist/research/providers/openalex-client.js +80 -0
- package/dist/research/providers/semantic-scholar-client.js +60 -0
- package/dist/research/research-service.js +53 -0
- package/dist/research/types.js +1 -0
- package/dist/research/utils.js +54 -0
- package/dist/scholar/errors.js +30 -0
- package/dist/scholar/scholar-client.js +99 -0
- package/dist/scholar/scholar-parser.js +251 -0
- package/dist/scholar/scholar-service.js +202 -0
- package/dist/scholar/types.js +1 -0
- package/dist/version.js +14 -0
- package/package.json +49 -0
|
@@ -0,0 +1,329 @@
|
|
|
1
|
+
import { randomUUID } from 'node:crypto';
|
|
2
|
+
import { serve } from '@hono/node-server';
|
|
3
|
+
import { WebStandardStreamableHTTPServerTransport } from '@modelcontextprotocol/sdk/server/webStandardStreamableHttp.js';
|
|
4
|
+
import { isInitializeRequest } from '@modelcontextprotocol/sdk/types.js';
|
|
5
|
+
import { Hono } from 'hono';
|
|
6
|
+
import { createScholarMcpServer } from '../mcp/create-scholar-mcp-server.js';
|
|
7
|
+
const LOCAL_HOSTS = new Set(['127.0.0.1', 'localhost', '::1']);
|
|
8
|
+
const MCP_SESSION_HEADER = 'mcp-session-id';
|
|
9
|
+
const normalizeHostHeader = (hostHeader) => {
|
|
10
|
+
const normalized = hostHeader.trim().toLowerCase();
|
|
11
|
+
const withoutPort = normalized.startsWith('[')
|
|
12
|
+
? normalized.replace(/^\[([^\]]+)\](?::\d+)?$/, '$1')
|
|
13
|
+
: normalized.replace(/:\d+$/, '');
|
|
14
|
+
return {
|
|
15
|
+
full: normalized,
|
|
16
|
+
hostname: withoutPort
|
|
17
|
+
};
|
|
18
|
+
};
|
|
19
|
+
const isLoopbackOrigin = (origin) => {
|
|
20
|
+
try {
|
|
21
|
+
const parsed = new URL(origin);
|
|
22
|
+
return LOCAL_HOSTS.has(parsed.hostname.toLowerCase());
|
|
23
|
+
}
|
|
24
|
+
catch {
|
|
25
|
+
return false;
|
|
26
|
+
}
|
|
27
|
+
};
|
|
28
|
+
const isHostAllowed = (hostHeader, config) => {
|
|
29
|
+
if (!hostHeader) {
|
|
30
|
+
return false;
|
|
31
|
+
}
|
|
32
|
+
const host = normalizeHostHeader(hostHeader);
|
|
33
|
+
if (config.allowedHosts.length > 0) {
|
|
34
|
+
return config.allowedHosts.includes(host.full) || config.allowedHosts.includes(host.hostname);
|
|
35
|
+
}
|
|
36
|
+
if (LOCAL_HOSTS.has(config.host.toLowerCase())) {
|
|
37
|
+
return LOCAL_HOSTS.has(host.hostname);
|
|
38
|
+
}
|
|
39
|
+
return true;
|
|
40
|
+
};
|
|
41
|
+
const isOriginAllowed = (origin, config) => {
|
|
42
|
+
if (!origin) {
|
|
43
|
+
return true;
|
|
44
|
+
}
|
|
45
|
+
if (config.allowedOrigins.length > 0) {
|
|
46
|
+
return config.allowedOrigins.includes(origin);
|
|
47
|
+
}
|
|
48
|
+
if (LOCAL_HOSTS.has(config.host.toLowerCase())) {
|
|
49
|
+
return isLoopbackOrigin(origin);
|
|
50
|
+
}
|
|
51
|
+
return true;
|
|
52
|
+
};
|
|
53
|
+
const isAuthorized = (authorization, config) => {
|
|
54
|
+
if (!config.apiKey) {
|
|
55
|
+
return true;
|
|
56
|
+
}
|
|
57
|
+
if (!authorization || !authorization.startsWith('Bearer ')) {
|
|
58
|
+
return false;
|
|
59
|
+
}
|
|
60
|
+
const token = authorization.slice('Bearer '.length).trim();
|
|
61
|
+
return token.length > 0 && token === config.apiKey;
|
|
62
|
+
};
|
|
63
|
+
const attachCorsHeaders = (response, origin) => {
|
|
64
|
+
if (!origin) {
|
|
65
|
+
return response;
|
|
66
|
+
}
|
|
67
|
+
response.headers.set('Access-Control-Allow-Origin', origin);
|
|
68
|
+
response.headers.set('Access-Control-Allow-Methods', 'GET,POST,DELETE,OPTIONS');
|
|
69
|
+
response.headers.set('Access-Control-Allow-Headers', 'Content-Type, Authorization, mcp-session-id, MCP-Protocol-Version, Last-Event-ID');
|
|
70
|
+
response.headers.set('Access-Control-Expose-Headers', 'mcp-session-id, MCP-Protocol-Version');
|
|
71
|
+
response.headers.set('Vary', 'Origin');
|
|
72
|
+
return response;
|
|
73
|
+
};
|
|
74
|
+
const isStale = (runtime, now, config) => now - runtime.lastSeenAt > config.httpSessionTtlMs;
|
|
75
|
+
export const createHttpApp = (config, service, researchService, logger) => {
|
|
76
|
+
const app = new Hono();
|
|
77
|
+
const sessions = new Map();
|
|
78
|
+
const closeSession = async (sessionId, reason, closeTransport) => {
|
|
79
|
+
const runtime = sessions.get(sessionId);
|
|
80
|
+
if (!runtime) {
|
|
81
|
+
return false;
|
|
82
|
+
}
|
|
83
|
+
sessions.delete(sessionId);
|
|
84
|
+
if (closeTransport) {
|
|
85
|
+
await runtime.transport.close().catch(() => undefined);
|
|
86
|
+
}
|
|
87
|
+
await runtime.closeServer().catch(() => undefined);
|
|
88
|
+
logger.debug('Closed MCP HTTP session', {
|
|
89
|
+
sessionId,
|
|
90
|
+
reason,
|
|
91
|
+
openSessions: sessions.size
|
|
92
|
+
});
|
|
93
|
+
return true;
|
|
94
|
+
};
|
|
95
|
+
const pruneExpiredSessions = async (reason) => {
|
|
96
|
+
if (config.httpSessionMode !== 'stateful' || sessions.size === 0) {
|
|
97
|
+
return;
|
|
98
|
+
}
|
|
99
|
+
const now = Date.now();
|
|
100
|
+
const expired = [...sessions.entries()]
|
|
101
|
+
.filter(([, runtime]) => isStale(runtime, now, config))
|
|
102
|
+
.map(([sessionId]) => sessionId);
|
|
103
|
+
await Promise.all(expired.map((sessionId) => closeSession(sessionId, reason, true)));
|
|
104
|
+
};
|
|
105
|
+
const evictOldestSession = async () => {
|
|
106
|
+
if (sessions.size < config.httpMaxSessions) {
|
|
107
|
+
return;
|
|
108
|
+
}
|
|
109
|
+
let oldestSessionId = null;
|
|
110
|
+
let oldestSeen = Number.POSITIVE_INFINITY;
|
|
111
|
+
for (const [sessionId, runtime] of sessions.entries()) {
|
|
112
|
+
if (runtime.lastSeenAt < oldestSeen) {
|
|
113
|
+
oldestSeen = runtime.lastSeenAt;
|
|
114
|
+
oldestSessionId = sessionId;
|
|
115
|
+
}
|
|
116
|
+
}
|
|
117
|
+
if (oldestSessionId) {
|
|
118
|
+
await closeSession(oldestSessionId, 'evicted_capacity', true);
|
|
119
|
+
logger.warn('Evicted oldest HTTP MCP session to respect session limit', {
|
|
120
|
+
maxSessions: config.httpMaxSessions,
|
|
121
|
+
evictedSessionId: oldestSessionId
|
|
122
|
+
});
|
|
123
|
+
}
|
|
124
|
+
};
|
|
125
|
+
const createSessionRuntime = async () => {
|
|
126
|
+
await evictOldestSession();
|
|
127
|
+
const server = createScholarMcpServer(config, service, researchService, logger);
|
|
128
|
+
const transport = new WebStandardStreamableHTTPServerTransport({
|
|
129
|
+
sessionIdGenerator: () => randomUUID(),
|
|
130
|
+
enableJsonResponse: true,
|
|
131
|
+
onsessioninitialized: (sessionId) => {
|
|
132
|
+
const now = Date.now();
|
|
133
|
+
sessions.set(sessionId, {
|
|
134
|
+
sessionId,
|
|
135
|
+
createdAt: now,
|
|
136
|
+
lastSeenAt: now,
|
|
137
|
+
transport,
|
|
138
|
+
closeServer: async () => server.close().catch(() => undefined)
|
|
139
|
+
});
|
|
140
|
+
logger.debug('Initialized MCP HTTP session', {
|
|
141
|
+
sessionId,
|
|
142
|
+
openSessions: sessions.size
|
|
143
|
+
});
|
|
144
|
+
},
|
|
145
|
+
onsessionclosed: (sessionId) => {
|
|
146
|
+
void closeSession(sessionId, 'client_delete', false);
|
|
147
|
+
}
|
|
148
|
+
});
|
|
149
|
+
await server.connect(transport);
|
|
150
|
+
return {
|
|
151
|
+
transport,
|
|
152
|
+
closeServer: async () => server.close().catch(() => undefined),
|
|
153
|
+
closeAfterRequest: false,
|
|
154
|
+
closeIfUninitialized: true
|
|
155
|
+
};
|
|
156
|
+
};
|
|
157
|
+
const createStatelessRuntime = async () => {
|
|
158
|
+
const transport = new WebStandardStreamableHTTPServerTransport({
|
|
159
|
+
sessionIdGenerator: undefined,
|
|
160
|
+
enableJsonResponse: true
|
|
161
|
+
});
|
|
162
|
+
const server = createScholarMcpServer(config, service, researchService, logger);
|
|
163
|
+
await server.connect(transport);
|
|
164
|
+
return {
|
|
165
|
+
transport,
|
|
166
|
+
closeServer: async () => server.close().catch(() => undefined),
|
|
167
|
+
closeAfterRequest: true,
|
|
168
|
+
closeIfUninitialized: false
|
|
169
|
+
};
|
|
170
|
+
};
|
|
171
|
+
const resolveTransport = async (request) => {
|
|
172
|
+
if (config.httpSessionMode === 'stateless') {
|
|
173
|
+
return createStatelessRuntime();
|
|
174
|
+
}
|
|
175
|
+
await pruneExpiredSessions('ttl_expired');
|
|
176
|
+
const method = request.method.toUpperCase();
|
|
177
|
+
const sessionId = request.headers.get(MCP_SESSION_HEADER)?.trim();
|
|
178
|
+
if (sessionId) {
|
|
179
|
+
const runtime = sessions.get(sessionId);
|
|
180
|
+
if (!runtime) {
|
|
181
|
+
return Response.json({ error: 'Session not found' }, { status: 404 });
|
|
182
|
+
}
|
|
183
|
+
runtime.lastSeenAt = Date.now();
|
|
184
|
+
return {
|
|
185
|
+
transport: runtime.transport,
|
|
186
|
+
closeServer: runtime.closeServer,
|
|
187
|
+
closeAfterRequest: false,
|
|
188
|
+
closeIfUninitialized: false
|
|
189
|
+
};
|
|
190
|
+
}
|
|
191
|
+
if (method !== 'POST') {
|
|
192
|
+
return Response.json({ error: 'Missing mcp-session-id header' }, { status: 400 });
|
|
193
|
+
}
|
|
194
|
+
let parsedBody;
|
|
195
|
+
try {
|
|
196
|
+
parsedBody = await request.clone().json();
|
|
197
|
+
}
|
|
198
|
+
catch {
|
|
199
|
+
return Response.json({ error: 'Invalid JSON request body' }, { status: 400 });
|
|
200
|
+
}
|
|
201
|
+
if (!isInitializeRequest(parsedBody)) {
|
|
202
|
+
return Response.json({ error: 'Expected an initialize request when mcp-session-id is absent' }, { status: 400 });
|
|
203
|
+
}
|
|
204
|
+
const runtime = await createSessionRuntime();
|
|
205
|
+
return {
|
|
206
|
+
...runtime,
|
|
207
|
+
parsedBody
|
|
208
|
+
};
|
|
209
|
+
};
|
|
210
|
+
app.onError((error, c) => {
|
|
211
|
+
logger.error('Unhandled HTTP runtime error', {
|
|
212
|
+
error: error instanceof Error ? error.message : String(error)
|
|
213
|
+
});
|
|
214
|
+
return c.json({
|
|
215
|
+
jsonrpc: '2.0',
|
|
216
|
+
error: {
|
|
217
|
+
code: -32603,
|
|
218
|
+
message: 'Internal server error'
|
|
219
|
+
},
|
|
220
|
+
id: null
|
|
221
|
+
}, 500);
|
|
222
|
+
});
|
|
223
|
+
app.notFound((c) => c.json({ error: 'Not found' }, 404));
|
|
224
|
+
app.get('/', (c) => c.json({
|
|
225
|
+
name: config.serverName,
|
|
226
|
+
version: config.serverVersion,
|
|
227
|
+
transport: 'streamable-http',
|
|
228
|
+
endpoint: config.endpointPath,
|
|
229
|
+
health: config.healthPath,
|
|
230
|
+
sessionMode: config.httpSessionMode
|
|
231
|
+
}));
|
|
232
|
+
app.get(config.healthPath, (c) => c.json({
|
|
233
|
+
status: 'ok',
|
|
234
|
+
uptimeSeconds: Math.round(process.uptime()),
|
|
235
|
+
serverName: config.serverName,
|
|
236
|
+
serverVersion: config.serverVersion,
|
|
237
|
+
transport: 'http',
|
|
238
|
+
sessionMode: config.httpSessionMode,
|
|
239
|
+
openSessions: sessions.size,
|
|
240
|
+
timestamp: new Date().toISOString()
|
|
241
|
+
}));
|
|
242
|
+
app.use(config.endpointPath, async (c, next) => {
|
|
243
|
+
const hostHeader = c.req.header('host') ?? '';
|
|
244
|
+
const origin = c.req.header('origin');
|
|
245
|
+
const authorization = c.req.header('authorization');
|
|
246
|
+
if (!isHostAllowed(hostHeader, config)) {
|
|
247
|
+
return attachCorsHeaders(c.json({ error: 'Forbidden host header' }, 403), origin);
|
|
248
|
+
}
|
|
249
|
+
if (!isOriginAllowed(origin, config)) {
|
|
250
|
+
return attachCorsHeaders(c.json({ error: 'Forbidden origin' }, 403), origin);
|
|
251
|
+
}
|
|
252
|
+
if (c.req.method !== 'OPTIONS' && !isAuthorized(authorization, config)) {
|
|
253
|
+
return attachCorsHeaders(c.json({ error: 'Unauthorized' }, 401), origin);
|
|
254
|
+
}
|
|
255
|
+
await next();
|
|
256
|
+
});
|
|
257
|
+
app.options(config.endpointPath, (c) => {
|
|
258
|
+
const origin = c.req.header('origin');
|
|
259
|
+
const response = new Response(null, { status: 204 });
|
|
260
|
+
return attachCorsHeaders(response, origin);
|
|
261
|
+
});
|
|
262
|
+
app.all(config.endpointPath, async (c) => {
|
|
263
|
+
const origin = c.req.header('origin');
|
|
264
|
+
const resolved = await resolveTransport(c.req.raw);
|
|
265
|
+
if (resolved instanceof Response) {
|
|
266
|
+
return attachCorsHeaders(resolved, origin);
|
|
267
|
+
}
|
|
268
|
+
const { transport, parsedBody, closeAfterRequest, closeIfUninitialized, closeServer } = resolved;
|
|
269
|
+
try {
|
|
270
|
+
const response = await transport.handleRequest(c.req.raw, parsedBody === undefined ? undefined : { parsedBody });
|
|
271
|
+
return attachCorsHeaders(response, origin);
|
|
272
|
+
}
|
|
273
|
+
catch (error) {
|
|
274
|
+
logger.error('MCP HTTP request handling failed', {
|
|
275
|
+
error: error instanceof Error ? error.message : String(error)
|
|
276
|
+
});
|
|
277
|
+
const response = Response.json({
|
|
278
|
+
jsonrpc: '2.0',
|
|
279
|
+
error: {
|
|
280
|
+
code: -32603,
|
|
281
|
+
message: 'Internal server error'
|
|
282
|
+
},
|
|
283
|
+
id: null
|
|
284
|
+
}, { status: 500 });
|
|
285
|
+
return attachCorsHeaders(response, origin);
|
|
286
|
+
}
|
|
287
|
+
finally {
|
|
288
|
+
if (closeAfterRequest) {
|
|
289
|
+
await transport.close().catch(() => undefined);
|
|
290
|
+
await closeServer();
|
|
291
|
+
return;
|
|
292
|
+
}
|
|
293
|
+
if (closeIfUninitialized && !transport.sessionId) {
|
|
294
|
+
await transport.close().catch(() => undefined);
|
|
295
|
+
await closeServer();
|
|
296
|
+
}
|
|
297
|
+
}
|
|
298
|
+
});
|
|
299
|
+
return {
|
|
300
|
+
app,
|
|
301
|
+
shutdown: async () => {
|
|
302
|
+
await Promise.all([...sessions.keys()].map((sessionId) => closeSession(sessionId, 'server_shutdown', true)));
|
|
303
|
+
}
|
|
304
|
+
};
|
|
305
|
+
};
|
|
306
|
+
export const startHttpServer = (config, service, researchService, logger) => {
|
|
307
|
+
const runtime = createHttpApp(config, service, researchService, logger);
|
|
308
|
+
const server = serve({
|
|
309
|
+
fetch: runtime.app.fetch,
|
|
310
|
+
port: config.port,
|
|
311
|
+
hostname: config.host
|
|
312
|
+
}, (info) => {
|
|
313
|
+
logger.info('ScholarMCP HTTP transport listening', {
|
|
314
|
+
host: config.host,
|
|
315
|
+
port: info.port,
|
|
316
|
+
endpoint: config.endpointPath,
|
|
317
|
+
health: config.healthPath,
|
|
318
|
+
sessionMode: config.httpSessionMode
|
|
319
|
+
});
|
|
320
|
+
});
|
|
321
|
+
const shutdown = (signal) => {
|
|
322
|
+
logger.info('Shutting down HTTP transport', { signal });
|
|
323
|
+
server.close();
|
|
324
|
+
void runtime.shutdown();
|
|
325
|
+
};
|
|
326
|
+
process.on('SIGINT', () => shutdown('SIGINT'));
|
|
327
|
+
process.on('SIGTERM', () => shutdown('SIGTERM'));
|
|
328
|
+
return server;
|
|
329
|
+
};
|
package/dist/index.js
ADDED
|
@@ -0,0 +1,66 @@
|
|
|
1
|
+
#!/usr/bin/env node
|
|
2
|
+
import { config as loadDotEnv } from 'dotenv';
|
|
3
|
+
import { parseCliArgs, CLI_USAGE } from './cli/args.js';
|
|
4
|
+
import { parseConfig } from './config.js';
|
|
5
|
+
import { Logger } from './core/logger.js';
|
|
6
|
+
import { startHttpServer } from './http/start-http-server.js';
|
|
7
|
+
import { startStdioServer } from './mcp/start-stdio-server.js';
|
|
8
|
+
import { ResearchService } from './research/research-service.js';
|
|
9
|
+
import { ScholarService } from './scholar/scholar-service.js';
|
|
10
|
+
import { getPackageVersion } from './version.js';
|
|
11
|
+
loadDotEnv({ quiet: true });
|
|
12
|
+
const printStdout = (message) => {
|
|
13
|
+
process.stdout.write(`${message}\n`);
|
|
14
|
+
};
|
|
15
|
+
const printStderr = (message) => {
|
|
16
|
+
process.stderr.write(`${message}\n`);
|
|
17
|
+
};
|
|
18
|
+
const run = async () => {
|
|
19
|
+
const cli = parseCliArgs(process.argv.slice(2));
|
|
20
|
+
if (cli.showHelp) {
|
|
21
|
+
printStdout(CLI_USAGE);
|
|
22
|
+
return;
|
|
23
|
+
}
|
|
24
|
+
if (cli.showVersion) {
|
|
25
|
+
printStdout(getPackageVersion());
|
|
26
|
+
return;
|
|
27
|
+
}
|
|
28
|
+
const config = parseConfig(cli.transport
|
|
29
|
+
? {
|
|
30
|
+
SCHOLAR_MCP_TRANSPORT: cli.transport
|
|
31
|
+
}
|
|
32
|
+
: undefined);
|
|
33
|
+
const logger = new Logger(config.logLevel);
|
|
34
|
+
const scholarService = ScholarService.fromConfig(config, logger);
|
|
35
|
+
const researchService = ResearchService.fromConfig(config, logger, scholarService);
|
|
36
|
+
switch (config.transport) {
|
|
37
|
+
case 'stdio': {
|
|
38
|
+
await startStdioServer(config, scholarService, researchService, logger);
|
|
39
|
+
return;
|
|
40
|
+
}
|
|
41
|
+
case 'http': {
|
|
42
|
+
startHttpServer(config, scholarService, researchService, logger);
|
|
43
|
+
return;
|
|
44
|
+
}
|
|
45
|
+
case 'both': {
|
|
46
|
+
startHttpServer(config, scholarService, researchService, logger);
|
|
47
|
+
await startStdioServer(config, scholarService, researchService, logger);
|
|
48
|
+
return;
|
|
49
|
+
}
|
|
50
|
+
default: {
|
|
51
|
+
throw new Error(`Unsupported transport mode: ${String(config.transport)}`);
|
|
52
|
+
}
|
|
53
|
+
}
|
|
54
|
+
};
|
|
55
|
+
run().catch((error) => {
|
|
56
|
+
const message = error instanceof Error ? error.message : String(error);
|
|
57
|
+
printStderr(`ScholarMCP failed to start: ${message}`);
|
|
58
|
+
if (error instanceof Error && error.stack) {
|
|
59
|
+
printStderr(error.stack);
|
|
60
|
+
}
|
|
61
|
+
if (message.includes('Unknown argument') || message.includes('Invalid transport')) {
|
|
62
|
+
printStderr('');
|
|
63
|
+
printStderr(CLI_USAGE);
|
|
64
|
+
}
|
|
65
|
+
process.exitCode = 1;
|
|
66
|
+
});
|