openalex-mcp-server 1.0.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/src/index.js ADDED
@@ -0,0 +1,603 @@
1
+ /**
2
+ * OpenAlex MCP 服务器入口
3
+ * 提供 MCP 协议接口,用于与 AI Agent 交互
4
+ */
5
+
6
+ import { Server } from '@modelcontextprotocol/sdk/server/index.js';
7
+ import { StdioServerTransport } from '@modelcontextprotocol/sdk/server/stdio.js';
8
+ import {
9
+ CallToolRequestSchema,
10
+ ListToolsRequestSchema
11
+ } from '@modelcontextprotocol/sdk/types.js';
12
+ import dotenv from 'dotenv';
13
+ import { OpenAlexClient } from './openalex-client.js';
14
+ import { CacheManager } from './cache-manager.js';
15
+ import { FulltextDownloader } from './fulltext-downloader.js';
16
+ import { optimizeWork, optimizeSearchResults, optimizeBatchResults } from './json-optimizer.js';
17
+ import path from 'path';
18
+ import { fileURLToPath } from 'url';
19
+
20
+ // 加载环境变量
21
+ dotenv.config();
22
+
23
+ const __filename = fileURLToPath(import.meta.url);
24
+ const __dirname = path.dirname(__filename);
25
+
26
+ // 从环境变量获取配置
27
+ const OPENALEX_API_KEY = process.env.OPENALEX_API_KEY || null;
28
+ const CACHE_ENABLED = process.env.CACHE_ENABLED !== 'false'; // 默认启用缓存
29
+ const ABSTRACT_MODE = process.env.ABSTRACT_MODE || 'quick';
30
+
31
+ // 初始化客户端和缓存管理器
32
+ const openAlexClient = new OpenAlexClient(OPENALEX_API_KEY || undefined);
33
+ const cacheManager = new CacheManager('cache/papers');
34
+ const fulltextDownloader = new FulltextDownloader('cache/fulltext');
35
+
36
+ /**
37
+ * 创建 MCP Server 实例
38
+ */
39
+ const server = new Server(
40
+ {
41
+ name: 'openalex-mcp-server',
42
+ version: '1.0.0'
43
+ },
44
+ {
45
+ capabilities: {
46
+ tools: {}
47
+ }
48
+ }
49
+ );
50
+
51
+ /**
52
+ * 注册所有 MCP 工具
53
+ */
54
+
55
+ // openalex_search - 搜索论文
56
+ server.setRequestHandler(ListToolsRequestSchema, async () => {
57
+ return {
58
+ tools: [
59
+ {
60
+ name: 'openalex_search',
61
+ description: 'Search for academic papers in OpenAlex database by keywords. Returns simplified paper metadata with title, authors, publication venue, year, citation count, and open access status.',
62
+ inputSchema: {
63
+ type: 'object',
64
+ properties: {
65
+ query: {
66
+ type: 'string',
67
+ description: 'Search keywords to find papers by title'
68
+ },
69
+ max_results: {
70
+ type: 'number',
71
+ description: 'Number of results per page (default: 20, max: 200)',
72
+ default: 20
73
+ },
74
+ page: {
75
+ type: 'number',
76
+ description: 'Page number for pagination (default: 1)',
77
+ default: 1
78
+ },
79
+ sort_by: {
80
+ type: 'string',
81
+ description: 'Sort field and order, e.g., "cited_by_count:desc", "publication_year:desc"'
82
+ },
83
+ filters: {
84
+ type: 'object',
85
+ description: 'Filters to apply to search',
86
+ properties: {
87
+ publication_year: {
88
+ type: 'number',
89
+ description: 'Filter by publication year'
90
+ },
91
+ is_oa: {
92
+ type: 'boolean',
93
+ description: 'Filter by open access status'
94
+ },
95
+ type: {
96
+ type: 'string',
97
+ description: 'Filter by document type (e.g., "article", "conference-paper")'
98
+ }
99
+ }
100
+ }
101
+ },
102
+ required: ['query']
103
+ }
104
+ },
105
+ {
106
+ name: 'openalex_get_work',
107
+ description: 'Get detailed information about a specific paper by its OpenAlex ID, DOI, or PMID. Includes full abstract, author information, topics, and references count.',
108
+ inputSchema: {
109
+ type: 'object',
110
+ properties: {
111
+ work_id: {
112
+ type: 'string',
113
+ description: 'Paper identifier (OpenAlex ID like W1234567890, DOI like 10.xxx, or PMID)'
114
+ },
115
+ include_abstract: {
116
+ type: 'boolean',
117
+ description: 'Include abstract in response (default: true)',
118
+ default: true
119
+ },
120
+ abstract_mode: {
121
+ type: 'string',
122
+ description: 'Abstract processing mode: "quick" (cached) or "deep" (fetch new)',
123
+ enum: ['quick', 'deep']
124
+ }
125
+ },
126
+ required: ['work_id']
127
+ }
128
+ },
129
+ {
130
+ name: 'openalex_batch_get_works',
131
+ description: 'Get information for multiple papers in one request. More efficient than calling get_work multiple times.',
132
+ inputSchema: {
133
+ type: 'object',
134
+ properties: {
135
+ work_ids: {
136
+ type: 'array',
137
+ items: {
138
+ type: 'string'
139
+ },
140
+ description: 'Array of paper IDs (OpenAlex IDs only, max 50)'
141
+ },
142
+ include_abstract: {
143
+ type: 'boolean',
144
+ description: 'Include abstracts in response (default: false for performance)',
145
+ default: false
146
+ }
147
+ },
148
+ required: ['work_ids']
149
+ }
150
+ },
151
+ {
152
+ name: 'openalex_detect_fulltext',
153
+ description: 'Check if full text is available for a paper and get the open access URL if available.',
154
+ inputSchema: {
155
+ type: 'object',
156
+ properties: {
157
+ work_id: {
158
+ type: 'string',
159
+ description: 'Paper identifier (OpenAlex ID, DOI, or PMID)'
160
+ }
161
+ },
162
+ required: ['work_id']
163
+ }
164
+ },
165
+ {
166
+ name: 'openalex_download_fulltext',
167
+ description: 'Download full text PDF for an open access paper. Returns the cached file path on success.',
168
+ inputSchema: {
169
+ type: 'object',
170
+ properties: {
171
+ work_id: {
172
+ type: 'string',
173
+ description: 'Paper identifier (OpenAlex ID, DOI, or PMID)'
174
+ },
175
+ force_download: {
176
+ type: 'boolean',
177
+ description: 'Force re-download even if cached (default: false)',
178
+ default: false
179
+ }
180
+ },
181
+ required: ['work_id']
182
+ }
183
+ },
184
+ {
185
+ name: 'openalex_get_fulltext_sections',
186
+ description: 'Get extracted sections (abstract, introduction, methods, results, discussion, etc.) from a downloaded paper. Will trigger download if not cached.',
187
+ inputSchema: {
188
+ type: 'object',
189
+ properties: {
190
+ work_id: {
191
+ type: 'string',
192
+ description: 'Paper identifier (OpenAlex ID, DOI, or PMID)'
193
+ },
194
+ sections: {
195
+ type: 'array',
196
+ items: {
197
+ type: 'string',
198
+ enum: ['abstract', 'introduction', 'methods', 'results', 'discussion', 'conclusion', 'references']
199
+ },
200
+ description: 'Specific sections to retrieve (default: all available sections)'
201
+ }
202
+ },
203
+ required: ['work_id']
204
+ }
205
+ },
206
+ {
207
+ name: 'openalex_cache_stats',
208
+ description: 'Get cache statistics or clear the cache. Shows number of cached papers, total size, and cache directory.',
209
+ inputSchema: {
210
+ type: 'object',
211
+ properties: {
212
+ action: {
213
+ type: 'string',
214
+ description: 'Action: "stats" for statistics or "clear" to clear cache',
215
+ enum: ['stats', 'clear'],
216
+ default: 'stats'
217
+ }
218
+ }
219
+ }
220
+ },
221
+ {
222
+ name: 'openalex_system_check',
223
+ description: 'Check system status and API connectivity. Returns health status, API reachability, cache status, and version info.',
224
+ inputSchema: {
225
+ type: 'object',
226
+ properties: {}
227
+ }
228
+ }
229
+ ]
230
+ };
231
+ });
232
+
233
+ /**
234
+ * 处理工具调用请求
235
+ */
236
+ server.setRequestHandler(CallToolRequestSchema, async (request) => {
237
+ const { name, arguments: args } = request.params;
238
+
239
+ try {
240
+ switch (name) {
241
+ case 'openalex_search': {
242
+ const query = String(args?.query || '');
243
+ const max_results = Number(args?.max_results) || 20;
244
+ const page = Number(args?.page) || 1;
245
+ const sort_by = args?.sort_by ? String(args.sort_by) : undefined;
246
+ const filters = args?.filters || {};
247
+
248
+ const results = await openAlexClient.search(query, {
249
+ max_results,
250
+ page,
251
+ sort_by,
252
+ filters
253
+ });
254
+ const optimized = optimizeSearchResults(results);
255
+ return {
256
+ content: [
257
+ {
258
+ type: 'text',
259
+ text: JSON.stringify(optimized, null, 2)
260
+ }
261
+ ]
262
+ };
263
+ }
264
+
265
+ case 'openalex_get_work': {
266
+ const work_id = String(args?.work_id || '');
267
+ const include_abstract = Boolean(args?.include_abstract !== false);
268
+
269
+ // 检查缓存
270
+ if (CACHE_ENABLED) {
271
+ const cacheKey = `work_${work_id}_${include_abstract}`;
272
+ const cached = await cacheManager.get(cacheKey);
273
+ if (cached) {
274
+ return {
275
+ content: [
276
+ {
277
+ type: 'text',
278
+ text: JSON.stringify({
279
+ ...cached,
280
+ _cached: true
281
+ }, null, 2)
282
+ }
283
+ ]
284
+ };
285
+ }
286
+ }
287
+
288
+ // 从 API 获取
289
+ const work = await openAlexClient.getWork(work_id, {
290
+ include_abstract,
291
+ include_authors: true,
292
+ include_topics: true
293
+ });
294
+ const optimized = optimizeWork(work);
295
+
296
+ // 保存到缓存
297
+ if (CACHE_ENABLED) {
298
+ const cacheKey = `work_${work_id}_${include_abstract}`;
299
+ await cacheManager.set(cacheKey, optimized, 30);
300
+ }
301
+
302
+ return {
303
+ content: [
304
+ {
305
+ type: 'text',
306
+ text: JSON.stringify(optimized, null, 2)
307
+ }
308
+ ]
309
+ };
310
+ }
311
+
312
+ case 'openalex_batch_get_works': {
313
+ const work_ids = Array.isArray(args?.work_ids) ? args.work_ids : [];
314
+ const include_abstract = Boolean(args?.include_abstract);
315
+
316
+ // 从 API 批量获取
317
+ const results = await openAlexClient.batchGetWorks(work_ids, {
318
+ include_abstract,
319
+ include_authors: true
320
+ });
321
+ const optimized = optimizeBatchResults(results);
322
+
323
+ // 保存到缓存
324
+ if (CACHE_ENABLED && optimized.papers) {
325
+ for (const paper of optimized.papers) {
326
+ const cacheKey = `work_${paper.id}_${include_abstract}`;
327
+ await cacheManager.set(cacheKey, paper, 30);
328
+ }
329
+ }
330
+
331
+ return {
332
+ content: [
333
+ {
334
+ type: 'text',
335
+ text: JSON.stringify(optimized, null, 2)
336
+ }
337
+ ]
338
+ };
339
+ }
340
+
341
+ case 'openalex_detect_fulltext': {
342
+ const work_id = String(args?.work_id || '');
343
+
344
+ // 先获取论文信息
345
+ const work = await openAlexClient.getWork(work_id, {
346
+ include_abstract: false,
347
+ include_authors: false,
348
+ include_topics: false
349
+ });
350
+ const optimized = optimizeWork(work);
351
+
352
+ // 检测全文可用性
353
+ const detection = await fulltextDownloader.detectFulltext(optimized);
354
+
355
+ return {
356
+ content: [
357
+ {
358
+ type: 'text',
359
+ text: JSON.stringify(detection, null, 2)
360
+ }
361
+ ]
362
+ };
363
+ }
364
+
365
+ case 'openalex_download_fulltext': {
366
+ const work_id = String(args?.work_id || '');
367
+ const force_download = Boolean(args?.force_download);
368
+
369
+ // 先获取论文信息
370
+ const work = await openAlexClient.getWork(work_id, {
371
+ include_abstract: false,
372
+ include_authors: false,
373
+ include_topics: false
374
+ });
375
+ const optimized = optimizeWork(work);
376
+
377
+ // 检测全文 URL
378
+ const detection = await fulltextDownloader.detectFulltext(optimized);
379
+
380
+ if (!detection.fulltext_available || !detection.oa_url) {
381
+ return {
382
+ content: [
383
+ {
384
+ type: 'text',
385
+ text: JSON.stringify({
386
+ work_id,
387
+ status: 'unavailable',
388
+ message: 'Full text is not available for this paper'
389
+ }, null, 2)
390
+ }
391
+ ],
392
+ isError: false
393
+ };
394
+ }
395
+
396
+ // 下载全文
397
+ const downloadResult = await fulltextDownloader.downloadFulltext(
398
+ work_id,
399
+ String(detection.oa_url || ''),
400
+ force_download
401
+ );
402
+
403
+ return {
404
+ content: [
405
+ {
406
+ type: 'text',
407
+ text: JSON.stringify(downloadResult, null, 2)
408
+ }
409
+ ]
410
+ };
411
+ }
412
+
413
+ case 'openalex_get_fulltext_sections': {
414
+ const work_id = String(args?.work_id || '');
415
+ const sections = Array.isArray(args?.sections) ? args.sections : undefined;
416
+
417
+ // 先尝试读取已提取的章节
418
+ try {
419
+ const sectionsData = await fulltextDownloader.getSections(work_id, sections);
420
+ return {
421
+ content: [
422
+ {
423
+ type: 'text',
424
+ text: JSON.stringify({
425
+ work_id,
426
+ sections: sectionsData
427
+ }, null, 2)
428
+ }
429
+ ]
430
+ };
431
+ } catch (error) {
432
+ // 章节不存在,需要先下载和提取
433
+ const work = await openAlexClient.getWork(work_id, {
434
+ include_abstract: false,
435
+ include_authors: false,
436
+ include_topics: false
437
+ });
438
+ const optimized = optimizeWork(work);
439
+
440
+ const detection = await fulltextDownloader.detectFulltext(optimized);
441
+
442
+ if (!detection.fulltext_available || !detection.oa_url) {
443
+ return {
444
+ content: [
445
+ {
446
+ type: 'text',
447
+ text: JSON.stringify({
448
+ work_id,
449
+ status: 'unavailable',
450
+ message: 'Full text is not available for this paper'
451
+ }, null, 2)
452
+ }
453
+ ],
454
+ isError: false
455
+ };
456
+ }
457
+
458
+ // 下载并提取
459
+ await fulltextDownloader.downloadFulltext(work_id, String(detection.oa_url || ''), false);
460
+ const extractResult = await fulltextDownloader.extractAndSaveSections(work_id);
461
+
462
+ if (extractResult.status === 'failed') {
463
+ return {
464
+ content: [
465
+ {
466
+ type: 'text',
467
+ text: JSON.stringify(extractResult, null, 2)
468
+ }
469
+ ],
470
+ isError: true
471
+ };
472
+ }
473
+
474
+ const sectionsData = await fulltextDownloader.getSections(work_id, sections);
475
+ return {
476
+ content: [
477
+ {
478
+ type: 'text',
479
+ text: JSON.stringify({
480
+ work_id,
481
+ sections: sectionsData
482
+ }, null, 2)
483
+ }
484
+ ]
485
+ };
486
+ }
487
+ }
488
+
489
+ case 'openalex_cache_stats': {
490
+ const action = args?.action || 'stats';
491
+
492
+ if (action === 'clear') {
493
+ // 清空缓存
494
+ const stats = await cacheManager.getStats();
495
+ // 注意:当前 CacheManager 没有实现 clearAll 方法,这里只返回统计信息
496
+ return {
497
+ content: [
498
+ {
499
+ type: 'text',
500
+ text: JSON.stringify({
501
+ message: 'Cache clear requested (not implemented yet)',
502
+ current_stats: stats
503
+ }, null, 2)
504
+ }
505
+ ]
506
+ };
507
+ } else {
508
+ // 获取统计信息
509
+ const stats = await cacheManager.getStats();
510
+ return {
511
+ content: [
512
+ {
513
+ type: 'text',
514
+ text: JSON.stringify({
515
+ papers_cached: stats.totalFiles,
516
+ total_size_bytes: stats.totalSize,
517
+ total_size_mb: (stats.totalSize / (1024 * 1024)).toFixed(2),
518
+ oldest_cache: stats.oldestCache,
519
+ newest_cache: stats.newestCache,
520
+ cache_dir: path.resolve(__dirname, '..', 'cache', 'papers'),
521
+ cache_enabled: CACHE_ENABLED
522
+ }, null, 2)
523
+ }
524
+ ]
525
+ };
526
+ }
527
+ }
528
+
529
+ case 'openalex_system_check': {
530
+ // 测试 API 连接
531
+ let api_reachable = false;
532
+ let api_error = null;
533
+
534
+ try {
535
+ // 发送简单的测试请求
536
+ await openAlexClient.search('test', { max_results: 1 });
537
+ api_reachable = true;
538
+ } catch (error) {
539
+ api_error = error instanceof Error ? error.message : String(error);
540
+ }
541
+
542
+ return {
543
+ content: [
544
+ {
545
+ type: 'text',
546
+ text: JSON.stringify({
547
+ status: api_reachable ? 'healthy' : 'degraded',
548
+ api_reachable,
549
+ api_error: api_error,
550
+ cache_enabled: CACHE_ENABLED,
551
+ api_key_configured: !!OPENALEX_API_KEY,
552
+ version: '1.0.0',
553
+ abstract_mode: ABSTRACT_MODE
554
+ }, null, 2)
555
+ }
556
+ ]
557
+ };
558
+ }
559
+
560
+ default:
561
+ throw new Error(`Unknown tool: ${name}`);
562
+ }
563
+ } catch (error) {
564
+ const errorMessage = error instanceof Error ? error.message : String(error);
565
+ return {
566
+ content: [
567
+ {
568
+ type: 'text',
569
+ text: JSON.stringify({
570
+ error: errorMessage,
571
+ tool: name
572
+ }, null, 2)
573
+ }
574
+ ],
575
+ isError: true
576
+ };
577
+ }
578
+ });
579
+
580
+ /**
581
+ * 启动 MCP 服务器
582
+ */
583
+ async function main() {
584
+ const transport = new StdioServerTransport();
585
+ await server.connect(transport);
586
+
587
+ // 错误处理
588
+ server.onerror = (error) => {
589
+ console.error('[MCP Server Error]:', error);
590
+ };
591
+
592
+ // 优雅关闭
593
+ process.on('SIGINT', async () => {
594
+ await server.close();
595
+ process.exit(0);
596
+ });
597
+ }
598
+
599
+ // 启动服务器
600
+ main().catch((error) => {
601
+ console.error('Failed to start server:', error);
602
+ process.exit(1);
603
+ });